mirror of https://gitee.com/bigwinds/arangodb
1100 lines
42 KiB
C++
1100 lines
42 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief Aql, execution engine
|
|
///
|
|
/// @file
|
|
///
|
|
/// DISCLAIMER
|
|
///
|
|
/// Copyright 2014 ArangoDB GmbH, Cologne, Germany
|
|
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
|
///
|
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
|
/// you may not use this file except in compliance with the License.
|
|
/// You may obtain a copy of the License at
|
|
///
|
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
|
///
|
|
/// Unless required by applicable law or agreed to in writing, software
|
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
/// See the License for the specific language governing permissions and
|
|
/// limitations under the License.
|
|
///
|
|
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
|
///
|
|
/// @author Jan Steemann
|
|
/// @author Copyright 2014, ArangoDB GmbH, Cologne, Germany
|
|
/// @author Copyright 2012-2013, triAGENS GmbH, Cologne, Germany
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "Aql/ExecutionEngine.h"
|
|
#include "Aql/ExecutionBlock.h"
|
|
#include "Aql/ExecutionNode.h"
|
|
#include "Aql/ExecutionPlan.h"
|
|
#include "Aql/QueryRegistry.h"
|
|
#include "Aql/WalkerWorker.h"
|
|
#include "Cluster/ClusterComm.h"
|
|
#include "Basics/Exceptions.h"
|
|
#include "Basics/logging.h"
|
|
|
|
using namespace triagens::aql;
|
|
using namespace triagens::arango;
|
|
using Json = triagens::basics::Json;
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief helper function to create a block
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static ExecutionBlock* createBlock (ExecutionEngine* engine,
|
|
ExecutionNode const* en,
|
|
std::unordered_map<ExecutionNode*, ExecutionBlock*> const& cache) {
|
|
switch (en->getType()) {
|
|
case ExecutionNode::SINGLETON: {
|
|
return new SingletonBlock(engine, static_cast<SingletonNode const*>(en));
|
|
}
|
|
case ExecutionNode::INDEX_RANGE: {
|
|
return new IndexRangeBlock(engine, static_cast<IndexRangeNode const*>(en));
|
|
}
|
|
case ExecutionNode::ENUMERATE_COLLECTION: {
|
|
return new EnumerateCollectionBlock(engine,
|
|
static_cast<EnumerateCollectionNode const*>(en));
|
|
}
|
|
case ExecutionNode::ENUMERATE_LIST: {
|
|
return new EnumerateListBlock(engine,
|
|
static_cast<EnumerateListNode const*>(en));
|
|
}
|
|
case ExecutionNode::CALCULATION: {
|
|
return new CalculationBlock(engine,
|
|
static_cast<CalculationNode const*>(en));
|
|
}
|
|
case ExecutionNode::FILTER: {
|
|
return new FilterBlock(engine, static_cast<FilterNode const*>(en));
|
|
}
|
|
case ExecutionNode::LIMIT: {
|
|
return new LimitBlock(engine, static_cast<LimitNode const*>(en));
|
|
}
|
|
case ExecutionNode::SORT: {
|
|
return new SortBlock(engine, static_cast<SortNode const*>(en));
|
|
}
|
|
case ExecutionNode::AGGREGATE: {
|
|
return new AggregateBlock(engine, static_cast<AggregateNode const*>(en));
|
|
}
|
|
case ExecutionNode::SUBQUERY: {
|
|
auto es = static_cast<SubqueryNode const*>(en);
|
|
auto it = cache.find(es->getSubquery());
|
|
|
|
TRI_ASSERT(it != cache.end());
|
|
|
|
return new SubqueryBlock(engine,
|
|
static_cast<SubqueryNode const*>(en),
|
|
it->second);
|
|
}
|
|
case ExecutionNode::RETURN: {
|
|
return new ReturnBlock(engine,
|
|
static_cast<ReturnNode const*>(en));
|
|
}
|
|
case ExecutionNode::REMOVE: {
|
|
return new RemoveBlock(engine,
|
|
static_cast<RemoveNode const*>(en));
|
|
}
|
|
case ExecutionNode::INSERT: {
|
|
return new InsertBlock(engine,
|
|
static_cast<InsertNode const*>(en));
|
|
}
|
|
case ExecutionNode::UPDATE: {
|
|
return new UpdateBlock(engine,
|
|
static_cast<UpdateNode const*>(en));
|
|
}
|
|
case ExecutionNode::REPLACE: {
|
|
return new ReplaceBlock(engine,
|
|
static_cast<ReplaceNode const*>(en));
|
|
}
|
|
case ExecutionNode::NORESULTS: {
|
|
return new NoResultsBlock(engine,
|
|
static_cast<NoResultsNode const*>(en));
|
|
}
|
|
case ExecutionNode::SCATTER: {
|
|
auto&& shardIds = static_cast<ScatterNode const*>(en)->collection()->shardIds();
|
|
return new ScatterBlock(engine,
|
|
static_cast<ScatterNode const*>(en),
|
|
shardIds);
|
|
}
|
|
case ExecutionNode::DISTRIBUTE: {
|
|
auto&& shardIds = static_cast<DistributeNode const*>(en)->collection()->shardIds();
|
|
return new DistributeBlock(engine,
|
|
static_cast<DistributeNode const*>(en),
|
|
shardIds,
|
|
static_cast<DistributeNode const*>
|
|
(en)->collection());
|
|
}
|
|
case ExecutionNode::GATHER: {
|
|
return new GatherBlock(engine,
|
|
static_cast<GatherNode const*>(en));
|
|
}
|
|
case ExecutionNode::REMOTE: {
|
|
auto remote = static_cast<RemoteNode const*>(en);
|
|
return new RemoteBlock(engine,
|
|
remote,
|
|
remote->server(),
|
|
remote->ownName(),
|
|
remote->queryId());
|
|
}
|
|
case ExecutionNode::ILLEGAL: {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "illegal node type");
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- class ExecutionEngine
|
|
// -----------------------------------------------------------------------------
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- constructors / destructors
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief create the engine
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
ExecutionEngine::ExecutionEngine (Query* query)
|
|
: _stats(),
|
|
_blocks(),
|
|
_root(nullptr),
|
|
_query(query),
|
|
_wasShutdown(false),
|
|
_previouslyLockedShards(nullptr),
|
|
_lockedShards(nullptr) {
|
|
|
|
_blocks.reserve(8);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief destroy the engine, frees all assigned blocks
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
ExecutionEngine::~ExecutionEngine () {
|
|
try {
|
|
shutdown(TRI_ERROR_INTERNAL);
|
|
}
|
|
catch (...) {
|
|
// shutdown can throw - ignore it in the destructor
|
|
}
|
|
|
|
for (auto it = _blocks.begin(); it != _blocks.end(); ++it) {
|
|
delete (*it);
|
|
}
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- walker class for ExecutionNode to instanciate
|
|
// -----------------------------------------------------------------------------
|
|
|
|
struct Instanciator : public WalkerWorker<ExecutionNode> {
|
|
ExecutionEngine* engine;
|
|
ExecutionBlock* root;
|
|
std::unordered_map<ExecutionNode*, ExecutionBlock*> cache;
|
|
|
|
Instanciator (ExecutionEngine* engine)
|
|
: engine(engine),
|
|
root(nullptr) {
|
|
}
|
|
|
|
~Instanciator () {
|
|
}
|
|
|
|
virtual void after (ExecutionNode* en) override final {
|
|
ExecutionBlock* eb = createBlock(engine, en, cache);
|
|
|
|
if (eb == nullptr) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "illegal node type");
|
|
}
|
|
|
|
// do we need to adjust the root node?
|
|
auto const nodeType = en->getType();
|
|
|
|
if (nodeType == ExecutionNode::RETURN ||
|
|
nodeType == ExecutionNode::REMOVE ||
|
|
nodeType == ExecutionNode::INSERT ||
|
|
nodeType == ExecutionNode::UPDATE ||
|
|
nodeType == ExecutionNode::REPLACE) {
|
|
root = eb;
|
|
}
|
|
else if (nodeType == ExecutionNode::DISTRIBUTE ||
|
|
nodeType == ExecutionNode::SCATTER ||
|
|
nodeType == ExecutionNode::GATHER) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "logic error, got cluster node in local query");
|
|
}
|
|
|
|
try {
|
|
engine->addBlock(eb);
|
|
}
|
|
catch (...) {
|
|
delete eb;
|
|
throw;
|
|
}
|
|
|
|
// Now add dependencies:
|
|
std::vector<ExecutionNode*> deps = en->getDependencies();
|
|
for (auto it = deps.begin(); it != deps.end(); ++it) {
|
|
auto it2 = cache.find(*it);
|
|
TRI_ASSERT(it2 != cache.end());
|
|
eb->addDependency(it2->second);
|
|
}
|
|
|
|
if (root == nullptr &&
|
|
en->getParents().empty()) {
|
|
// adjust the root node if none was set already
|
|
root = eb;
|
|
}
|
|
|
|
cache.emplace(std::make_pair(en, eb));
|
|
}
|
|
|
|
};
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- walker class for ExecutionNode to instanciate
|
|
// -----------------------------------------------------------------------------
|
|
|
|
// Here is a description of how the instanciation of an execution plan
|
|
// works in the cluster. See below for a complete example
|
|
//
|
|
// The instanciation of this works as follows:
|
|
// (0) Variable usage and register planning is done in the global plan
|
|
// (1) A walk with subqueries is done on the whole plan
|
|
// The purpose is to plan how many ExecutionEngines we need, where they
|
|
// have to be instanciated and which plan nodes belong to each of them.
|
|
// Such a walk is depth first and visits subqueries after it has visited
|
|
// the dependencies of the subquery node recursively. Whenever the
|
|
// walk passes by a RemoteNode it switches location between coordinator
|
|
// and DBserver and starts a new engine. The nodes of an engine are
|
|
// collected in the after method.
|
|
// This walk results in a list of engines and a list of nodes for
|
|
// each engine. It follows that the order in these lists is as follows:
|
|
// The first engine is the main one on the coordinator, it has id 0.
|
|
// The order of the engines is exactly as they are discovered in the
|
|
// walk. That is, engines closer to the root are earlier and engines
|
|
// in subqueries are later. The nodes in each engine are always
|
|
// done in a way such that a dependency D of a node N is earlier in the
|
|
// list as N, and a subquery node is later in the list than the nodes
|
|
// of the subquery.
|
|
// (2) buildEngines is called with that data. It proceeds engine by engine,
|
|
// starting from the back of the list. This means that an engine that
|
|
// is referred to in a RemoteNode (because its nodes are dependencies
|
|
// of that node) are always already instanciated before the RemoteNode
|
|
// is instanciated. The corresponding query ids are collected in a
|
|
// global hash table, for which the key consists of the id of the
|
|
// RemoteNode using the query and the actual query id. For each engine,
|
|
// the nodes are instanciated along the list of nodes for that engine.
|
|
// This means that all dependencies of a node N are already instanciated
|
|
// when N is instanciated. We distintuish the coordinator and the
|
|
// DBserver case. In the former one we have to clone a part of the
|
|
// plan and in the latter we have to send a part to a DBserver via HTTP.
|
|
//
|
|
// Here is a fully worked out example:
|
|
//
|
|
// FOR i IN [1,2]
|
|
// FOR d IN coll
|
|
// FILTER d.pass == i
|
|
// LET s = (FOR e IN coll2 FILTER e.name == d.name RETURN e)
|
|
// RETURN {d:d, s:s}
|
|
//
|
|
// this is optimized to, variable and register planning is done in this plan:
|
|
//
|
|
// Singleton
|
|
// ^
|
|
// EnumList [1,2] Singleton
|
|
// ^ ^
|
|
// Scatter (2) Enum coll2
|
|
// ^ ^
|
|
// Remote Calc e.name==d.name
|
|
// ^ ^
|
|
// Enum coll Filter (3)
|
|
// ^ ^
|
|
// Calc d.pass==i Remote
|
|
// ^ ^
|
|
// Filter (1) Gather
|
|
// ^ ^
|
|
// Remote Return
|
|
// ^ ^
|
|
// Gather |
|
|
// ^ |
|
|
// Subquery -------------------/
|
|
// ^
|
|
// Calc {d:d, s:s}
|
|
// ^
|
|
// Return (0)
|
|
//
|
|
// There are 4 engines here, their corresponding root nodes are labelled
|
|
// in the above picture in round brackets with the ids of the engine.
|
|
// Engines 1 and 3 have to be replicated for each shard of coll or coll2
|
|
// respectively, and sent to the right DBserver via HTTP. Engine 0 is the
|
|
// main one on the coordinator and engine 2 is a non-main part on the
|
|
// coordinator. Recall that the walk goes first to the dependencies before
|
|
// it visits the nodes of the subquery. Thus, the walk builds up the lists
|
|
// in this order:
|
|
// engine 0: [Remote, Gather, Remote, Gather, Return, Subquery, Calc, Return]
|
|
// engine 1: [Remote, Enum coll, Calc d.pass==i, Filter]
|
|
// engine 2: [Singleton, EnumList [1,2], Scatter]
|
|
// engine 3: [Singleton, Enum coll2, Calc e.name==d.name, Filter]
|
|
// buildEngines will then do engines in the order 3, 2, 1, 0 and for each
|
|
// of them the nodes from left to right in these lists. In the end, we have
|
|
// a proper instanciation of the whole thing.
|
|
|
|
|
|
struct CoordinatorInstanciator : public WalkerWorker<ExecutionNode> {
|
|
enum EngineLocation {
|
|
COORDINATOR,
|
|
DBSERVER
|
|
};
|
|
|
|
struct EngineInfo {
|
|
EngineInfo (EngineLocation location,
|
|
size_t id,
|
|
triagens::aql::QueryPart p,
|
|
size_t idOfRemoteNode)
|
|
: location(location),
|
|
id(id),
|
|
nodes(),
|
|
part(p),
|
|
idOfRemoteNode(idOfRemoteNode) {
|
|
}
|
|
|
|
Collection* getCollection () const {
|
|
Collection* collection = nullptr;
|
|
|
|
for (auto en = nodes.rbegin(); en != nodes.rend(); ++en) {
|
|
// find the collection to be used
|
|
if ((*en)->getType() == ExecutionNode::ENUMERATE_COLLECTION) {
|
|
collection = const_cast<Collection*>(static_cast<EnumerateCollectionNode*>((*en))->collection());
|
|
}
|
|
else if ((*en)->getType() == ExecutionNode::INDEX_RANGE) {
|
|
collection = const_cast<Collection*>(static_cast<IndexRangeNode*>((*en))->collection());
|
|
}
|
|
else if ((*en)->getType() == ExecutionNode::INSERT ||
|
|
(*en)->getType() == ExecutionNode::UPDATE ||
|
|
(*en)->getType() == ExecutionNode::REPLACE ||
|
|
(*en)->getType() == ExecutionNode::REMOVE) {
|
|
collection = const_cast<Collection*>(static_cast<ModificationNode*>((*en))->collection());
|
|
}
|
|
}
|
|
|
|
TRI_ASSERT(collection != nullptr);
|
|
return collection;
|
|
}
|
|
|
|
EngineLocation const location;
|
|
size_t const id;
|
|
std::vector<ExecutionNode*> nodes;
|
|
triagens::aql::QueryPart part; // only relevant for DBserver parts
|
|
size_t idOfRemoteNode; // id of the remote node
|
|
// in the original plan that needs this engine
|
|
};
|
|
|
|
Query* query;
|
|
QueryRegistry* queryRegistry;
|
|
ExecutionBlock* root;
|
|
EngineLocation currentLocation;
|
|
size_t currentEngineId;
|
|
std::vector<EngineInfo> engines;
|
|
std::vector<size_t> engineStack; // stack of engine ids, used for
|
|
// RemoteNodes
|
|
std::unordered_set<std::string> collNamesSeenOnDBServer;
|
|
// names of sharded collections that we have already seen on a DBserver
|
|
// this is relevant to decide whether or not the engine there is a main
|
|
// query or a dependent one.
|
|
std::unordered_map<std::string, std::string> queryIds;
|
|
// this map allows to find the queries which are the parts of the big
|
|
// query. There are two cases, the first is for the remote queries on
|
|
// the DBservers, for these, the key is:
|
|
// itoa(ID of RemoteNode in original plan) + ":" + shardId
|
|
// and the value is the
|
|
// queryId on DBserver
|
|
// with a * appended, if it is a PART_MAIN query.
|
|
// The second case is a query, which lives on the coordinator but is not
|
|
// the main query. For these, we store
|
|
// itoa(ID of RemoteNode in original plan) + "/" + <name of vocbase>
|
|
// and the value is the
|
|
// queryId used in the QueryRegistry
|
|
// this is built up when we instanciate the various engines on the
|
|
// DBservers and used when we instanciate the ones on the
|
|
// coordinator. Note that the main query and engine is not put into
|
|
// this map at all.
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief constructor
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
CoordinatorInstanciator (Query* query,
|
|
QueryRegistry* queryRegistry)
|
|
: query(query),
|
|
queryRegistry(queryRegistry),
|
|
root(nullptr),
|
|
currentLocation(COORDINATOR),
|
|
currentEngineId(0),
|
|
engines() {
|
|
|
|
TRI_ASSERT(query != nullptr);
|
|
TRI_ASSERT(queryRegistry != nullptr);
|
|
|
|
engines.emplace_back(COORDINATOR, 0, PART_MAIN, 0);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief destructor
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
~CoordinatorInstanciator () {
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief generatePlanForOneShard
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
triagens::basics::Json generatePlanForOneShard (EngineInfo const& info,
|
|
QueryId& connectedId,
|
|
std::string const& shardId,
|
|
bool verbose) {
|
|
// copy the relevant fragment of the plan for each shard
|
|
// Note that in these parts of the query there are no SubqueryNodes,
|
|
// since they are all on the coordinator!
|
|
ExecutionPlan plan(query->ast());
|
|
|
|
ExecutionNode* previous = nullptr;
|
|
for (ExecutionNode const* current : info.nodes) {
|
|
auto clone = current->clone(&plan, false, false);
|
|
// UNNECESSARY, because clone does it: plan.registerNode(clone);
|
|
|
|
if (current->getType() == ExecutionNode::REMOTE) {
|
|
// update the remote node with the information about the query
|
|
static_cast<RemoteNode*>(clone)->server("server:" + triagens::arango::ServerState::instance()->getId());
|
|
static_cast<RemoteNode*>(clone)->ownName(shardId);
|
|
static_cast<RemoteNode*>(clone)->queryId(connectedId);
|
|
}
|
|
|
|
if (previous != nullptr) {
|
|
clone->addDependency(previous);
|
|
}
|
|
|
|
previous = clone;
|
|
}
|
|
plan.root(previous);
|
|
plan.setVarUsageComputed();
|
|
return plan.root()->toJson(TRI_UNKNOWN_MEM_ZONE, verbose);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief distributePlanToShard, send a single plan to one shard
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void distributePlanToShard (triagens::arango::CoordTransactionID& coordTransactionID,
|
|
EngineInfo const& info,
|
|
Collection* collection,
|
|
QueryId& connectedId,
|
|
std::string const& shardId,
|
|
TRI_json_t* jsonPlan) {
|
|
// create a JSON representation of the plan
|
|
Json result(Json::Object);
|
|
|
|
// inject the current shard id into the collection
|
|
collection->setCurrentShard(shardId);
|
|
|
|
Json jsonNodesList(TRI_UNKNOWN_MEM_ZONE, jsonPlan, Json::NOFREE);
|
|
|
|
// add the collection
|
|
Json jsonCollectionsList(Json::Array);
|
|
Json json(Json::Object);
|
|
jsonCollectionsList(json("name", Json(collection->getName()))
|
|
("type", Json(TRI_TransactionTypeGetStr(collection->accessType))));
|
|
|
|
jsonNodesList.set("collections", jsonCollectionsList);
|
|
jsonNodesList.set("variables", query->ast()->variables()->toJson(TRI_UNKNOWN_MEM_ZONE));
|
|
|
|
result.set("plan", jsonNodesList);
|
|
if (info.part == triagens::aql::PART_MAIN) {
|
|
result.set("part", Json("main"));
|
|
}
|
|
else {
|
|
result.set("part", Json("dependent"));
|
|
}
|
|
|
|
Json optimizerOptionsRules(Json::Array);
|
|
Json optimizerOptions(Json::Object);
|
|
|
|
Json options(Json::Object);
|
|
optimizerOptionsRules.add(Json("-all"));
|
|
optimizerOptions.set("rules", optimizerOptionsRules);
|
|
options.set("optimizer", optimizerOptions);
|
|
result.set("options", options);
|
|
std::unique_ptr<std::string> body(new std::string(triagens::basics::JsonHelper::toString(result.json())));
|
|
|
|
// std::cout << "GENERATED A PLAN FOR THE REMOTE SERVERS: " << *(body.get()) << "\n";
|
|
|
|
auto cc = triagens::arango::ClusterComm::instance();
|
|
|
|
std::string const url("/_db/" + triagens::basics::StringUtils::urlEncode(collection->vocbase->_name) +
|
|
"/_api/aql/instanciate");
|
|
|
|
auto headers = new std::map<std::string, std::string>;
|
|
(*headers)["X-Arango-Nolock"] = shardId; // Prevent locking
|
|
auto res = cc->asyncRequest("",
|
|
coordTransactionID,
|
|
"shard:" + shardId,
|
|
triagens::rest::HttpRequest::HTTP_REQUEST_POST,
|
|
url,
|
|
body.release(),
|
|
true,
|
|
headers,
|
|
nullptr,
|
|
30.0);
|
|
|
|
if (res != nullptr) {
|
|
delete res;
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief aggregateQueryIds, get answers for all shards in a Scatter/Gather
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void aggregateQueryIds (EngineInfo const& info,
|
|
triagens::arango::ClusterComm*& cc,
|
|
triagens::arango::CoordTransactionID& coordTransactionID,
|
|
Collection* collection) {
|
|
|
|
// pick up the remote query ids
|
|
std::vector<std::string> shardIds = collection->shardIds();
|
|
|
|
std::string error;
|
|
int count = 0;
|
|
int nrok = 0;
|
|
for (count = (int) shardIds.size(); count > 0; count--) {
|
|
auto res = cc->wait("", coordTransactionID, 0, "", 30.0);
|
|
|
|
if (res->status == triagens::arango::CL_COMM_RECEIVED) {
|
|
if (res->answer_code == triagens::rest::HttpResponse::OK ||
|
|
res->answer_code == triagens::rest::HttpResponse::CREATED ||
|
|
res->answer_code == triagens::rest::HttpResponse::ACCEPTED) {
|
|
// query instanciated without problems
|
|
nrok++;
|
|
|
|
// pick up query id from response
|
|
triagens::basics::Json response(TRI_UNKNOWN_MEM_ZONE, triagens::basics::JsonHelper::fromString(res->answer->body()));
|
|
std::string queryId = triagens::basics::JsonHelper::getStringValue(response.json(), "queryId", "");
|
|
|
|
// std::cout << "DB SERVER ANSWERED WITHOUT ERROR: " << res->answer->body() << ", REMOTENODEID: " << info.idOfRemoteNode << " SHARDID:" << res->shardID << ", QUERYID: " << queryId << "\n";
|
|
std::string theID
|
|
= triagens::basics::StringUtils::itoa(info.idOfRemoteNode)
|
|
+ ":" + res->shardID;
|
|
if (info.part == triagens::aql::PART_MAIN) {
|
|
queryIds.emplace(theID, queryId+"*");
|
|
}
|
|
else {
|
|
queryIds.emplace(theID, queryId);
|
|
}
|
|
}
|
|
else {
|
|
error += "DB SERVER ANSWERED WITH ERROR: ";
|
|
error += res->answer->body();
|
|
error += "\n";
|
|
}
|
|
}
|
|
else {
|
|
error += std::string("Communication with shard '") +
|
|
std::string(res->shardID) +
|
|
std::string("' on cluster node '") +
|
|
std::string(res->serverID) +
|
|
std::string("' failed : ") +
|
|
res->errorMessage;
|
|
}
|
|
delete res;
|
|
}
|
|
|
|
// std::cout << "GOT ALL RESPONSES FROM DB SERVERS: " << nrok << "\n";
|
|
|
|
if (nrok != (int) shardIds.size()) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, error);
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief distributePlansToShards, for a single Scatter/Gather block
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void distributePlansToShards (EngineInfo const& info,
|
|
QueryId connectedId) {
|
|
|
|
// std::cout << "distributePlansToShards: " << info.id << std::endl;
|
|
Collection* collection = info.getCollection();
|
|
// now send the plan to the remote servers
|
|
triagens::arango::CoordTransactionID coordTransactionID = TRI_NewTickServer();
|
|
auto cc = triagens::arango::ClusterComm::instance();
|
|
TRI_ASSERT(cc != nullptr);
|
|
|
|
// iterate over all shards of the collection
|
|
for (auto & shardId : collection->shardIds()) {
|
|
// inject the current shard id into the collection
|
|
collection->setCurrentShard(shardId);
|
|
auto jsonPlan = generatePlanForOneShard(info, connectedId, shardId, true);
|
|
|
|
distributePlanToShard(coordTransactionID, info, collection, connectedId, shardId, jsonPlan.steal());
|
|
}
|
|
|
|
// fix collection
|
|
collection->resetCurrentShard();
|
|
aggregateQueryIds(info, cc, coordTransactionID, collection);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief buildEngineCoordinator, for a single piece
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
ExecutionEngine* buildEngineCoordinator (EngineInfo& info) {
|
|
Query* localQuery = query;
|
|
bool needToClone = info.id > 0; // use the original for the main part
|
|
if (needToClone) {
|
|
// need a new query instance on the coordinator
|
|
localQuery = query->clone(PART_DEPENDENT, false);
|
|
if (localQuery == nullptr) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL,"cannot clone query");
|
|
}
|
|
}
|
|
|
|
try {
|
|
std::unique_ptr<ExecutionEngine> engine(new ExecutionEngine(localQuery));
|
|
localQuery->engine(engine.get());
|
|
|
|
std::unordered_map<ExecutionNode*, ExecutionBlock*> cache;
|
|
RemoteNode* remoteNode = nullptr;
|
|
|
|
for (auto en = info.nodes.begin(); en != info.nodes.end(); ++en) {
|
|
auto const nodeType = (*en)->getType();
|
|
|
|
if (nodeType == ExecutionNode::REMOTE) {
|
|
remoteNode = static_cast<RemoteNode*>((*en));
|
|
continue;
|
|
}
|
|
|
|
// for all node types but REMOTEs, we create blocks
|
|
ExecutionBlock* eb = createBlock(engine.get(), (*en), cache);
|
|
|
|
if (eb == nullptr) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "illegal node type");
|
|
}
|
|
|
|
try {
|
|
engine.get()->addBlock(eb);
|
|
}
|
|
catch (...) {
|
|
delete eb;
|
|
throw;
|
|
}
|
|
|
|
std::vector<ExecutionNode*> deps = (*en)->getDependencies();
|
|
|
|
for (auto dep = deps.begin(); dep != deps.end(); ++dep) {
|
|
auto d = cache.find(*dep);
|
|
|
|
if (d != cache.end()) {
|
|
// add regular dependencies
|
|
eb->addDependency((*d).second);
|
|
}
|
|
}
|
|
|
|
if (nodeType == ExecutionNode::GATHER) {
|
|
// we found a gather node
|
|
TRI_ASSERT(remoteNode != nullptr);
|
|
|
|
// now we'll create a remote node for each shard and add it to the gather node
|
|
Collection const* collection = nullptr;
|
|
if (nodeType == ExecutionNode::GATHER) {
|
|
collection = static_cast<GatherNode const*>((*en))->collection();
|
|
}
|
|
else {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL);
|
|
}
|
|
|
|
auto&& shardIds = collection->shardIds();
|
|
|
|
for (auto const& shardId : shardIds) {
|
|
std::string theId
|
|
= triagens::basics::StringUtils::itoa(remoteNode->id())
|
|
+ ":" + shardId;
|
|
auto it = queryIds.find(theId);
|
|
if (it == queryIds.end()) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "could not find query id in list");
|
|
}
|
|
std::string idThere = it->second;
|
|
if (idThere.back() == '*') {
|
|
idThere.pop_back();
|
|
}
|
|
ExecutionBlock* r = new RemoteBlock(engine.get(),
|
|
remoteNode,
|
|
"shard:" + shardId, // server
|
|
"", // ownName
|
|
idThere); // queryId
|
|
|
|
try {
|
|
engine.get()->addBlock(r);
|
|
}
|
|
catch (...) {
|
|
delete r;
|
|
throw;
|
|
}
|
|
|
|
eb->addDependency(r);
|
|
}
|
|
}
|
|
|
|
// the last block is always the root
|
|
engine->root(eb);
|
|
|
|
// put it into our cache:
|
|
cache.emplace(std::make_pair((*en), eb));
|
|
}
|
|
|
|
TRI_ASSERT(engine->root() != nullptr);
|
|
|
|
// localQuery is stored in the engine
|
|
return engine.release();
|
|
}
|
|
catch (...) {
|
|
localQuery->engine(nullptr); // engine is already destroyed by unique_ptr
|
|
if (needToClone) {
|
|
delete localQuery;
|
|
}
|
|
throw;
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief buildEngines, build engines on DBservers and coordinator
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
ExecutionEngine* buildEngines () {
|
|
ExecutionEngine* engine = nullptr;
|
|
QueryId id = 0;
|
|
|
|
for (auto it = engines.rbegin(); it != engines.rend(); ++it) {
|
|
// std::cout << "Doing engine: " << it->id << " location:"
|
|
// << it->location << std::endl;
|
|
if ((*it).location == COORDINATOR) {
|
|
// create a coordinator-based engine
|
|
engine = buildEngineCoordinator(*it);
|
|
TRI_ASSERT(engine != nullptr);
|
|
|
|
if ((*it).id > 0) {
|
|
// create a remote id for the engine that we can pass to
|
|
// the plans to be created for the DBServers
|
|
id = TRI_NewTickServer();
|
|
|
|
try {
|
|
queryRegistry->insert(id, engine->getQuery(), 3600.0);
|
|
}
|
|
catch (...) {
|
|
delete engine->getQuery();
|
|
// This deletes the new query as well as the engine
|
|
throw;
|
|
}
|
|
try {
|
|
std::string queryId = triagens::basics::StringUtils::itoa(id);
|
|
std::string theID
|
|
= triagens::basics::StringUtils::itoa(it->idOfRemoteNode)
|
|
+ "/" + engine->getQuery()->vocbase()->_name;
|
|
queryIds.emplace(theID, queryId);
|
|
}
|
|
catch (...) {
|
|
queryRegistry->destroy(engine->getQuery()->vocbase(),
|
|
id, TRI_ERROR_INTERNAL);
|
|
// This deletes query, engine and entry in QueryRegistry
|
|
throw;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
// create an engine on a remote DB server
|
|
// hand in the previous engine's id
|
|
distributePlansToShards((*it), id);
|
|
}
|
|
}
|
|
|
|
TRI_ASSERT(engine != nullptr);
|
|
|
|
// return the last created coordinator-based engine
|
|
// this is the local engine that we'll use to run the query
|
|
return engine;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief before method for collection of pieces phase
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
virtual bool before (ExecutionNode* en) override final {
|
|
auto const nodeType = en->getType();
|
|
|
|
if (nodeType == ExecutionNode::REMOTE) {
|
|
// got a remote node
|
|
// this indicates the end of an execution section
|
|
|
|
engineStack.push_back(currentEngineId);
|
|
|
|
// begin a new engine
|
|
// flip current location
|
|
currentLocation = (currentLocation == COORDINATOR ? DBSERVER : COORDINATOR);
|
|
currentEngineId = engines.size();
|
|
QueryPart part = PART_DEPENDENT;
|
|
if (currentLocation == DBSERVER) {
|
|
auto rn = static_cast<RemoteNode*>(en);
|
|
Collection const* coll = rn->collection();
|
|
if (collNamesSeenOnDBServer.find(coll->name) ==
|
|
collNamesSeenOnDBServer.end()) {
|
|
part = PART_MAIN;
|
|
collNamesSeenOnDBServer.insert(coll->name);
|
|
}
|
|
}
|
|
// For the coordinator we do not care about main or part:
|
|
engines.emplace_back(currentLocation, currentEngineId, part, en->id());
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief after method for collection of pieces phase
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
virtual void after (ExecutionNode* en) override final {
|
|
auto const nodeType = en->getType();
|
|
|
|
if (nodeType == ExecutionNode::REMOTE) {
|
|
currentEngineId = engineStack.back();
|
|
engineStack.pop_back();
|
|
currentLocation = engines[currentEngineId].location;
|
|
}
|
|
|
|
// assign the current node to the current engine
|
|
engines[currentEngineId].nodes.push_back(en);
|
|
}
|
|
};
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- public functions
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief create an execution engine from a plan
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
ExecutionEngine* ExecutionEngine::instanciateFromPlan (QueryRegistry* queryRegistry,
|
|
Query* query,
|
|
ExecutionPlan* plan,
|
|
bool planRegisters) {
|
|
ExecutionEngine* engine = nullptr;
|
|
|
|
try {
|
|
if (! plan->varUsageComputed()) {
|
|
plan->findVarUsage();
|
|
}
|
|
if (planRegisters) {
|
|
plan->planRegisters();
|
|
}
|
|
|
|
ExecutionBlock* root = nullptr;
|
|
|
|
if (triagens::arango::ServerState::instance()->isCoordinator()) {
|
|
// instanciate the engine on the coordinator
|
|
std::unique_ptr<CoordinatorInstanciator> inst(new CoordinatorInstanciator(query, queryRegistry));
|
|
plan->root()->walk(inst.get());
|
|
|
|
// std::cout << "ORIGINAL PLAN:\n" << plan->toJson(query->ast(), TRI_UNKNOWN_MEM_ZONE, true).toString() << "\n\n";
|
|
|
|
#if 0
|
|
// Just for debugging
|
|
for (auto& ei : inst->engines) {
|
|
std::cout << "EngineInfo: id=" << ei.id
|
|
<< " Location=" << ei.location << std::endl;
|
|
for (auto& n : ei.nodes) {
|
|
std::cout << "Node: type=" << n->getTypeString() << std::endl;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
try {
|
|
engine = inst.get()->buildEngines();
|
|
root = engine->root();
|
|
// Now find all shards that take part:
|
|
if (Transaction::_makeNolockHeaders != nullptr) {
|
|
engine->_lockedShards = new std::unordered_set<std::string>(*Transaction::_makeNolockHeaders);
|
|
engine->_previouslyLockedShards = Transaction::_makeNolockHeaders;
|
|
}
|
|
else {
|
|
engine->_lockedShards = new std::unordered_set<std::string>();
|
|
engine->_previouslyLockedShards = nullptr;
|
|
}
|
|
std::map<std::string, std::string> forLocking;
|
|
for (auto& q : inst.get()->queryIds) {
|
|
std::string theId = q.first;
|
|
std::string queryId = q.second;
|
|
// std::cout << "queryIds: " << theId << " : " << queryId << std::endl;
|
|
auto pos = theId.find(':');
|
|
if (pos != std::string::npos) {
|
|
// So this is a remote one on a DBserver:
|
|
if (queryId.back() == '*') { // only the PART_MAIN one!
|
|
queryId.pop_back();
|
|
std::string shardId = theId.substr(pos+1);
|
|
engine->_lockedShards->insert(shardId);
|
|
forLocking.emplace(shardId, queryId);
|
|
}
|
|
}
|
|
}
|
|
// Second round, this time we deal with the coordinator pieces
|
|
// and tell them the lockedShards as well, we need to copy, since
|
|
// they want to delete independently:
|
|
for (auto& q : inst.get()->queryIds) {
|
|
std::string theId = q.first;
|
|
std::string queryId = q.second;
|
|
// std::cout << "queryIds: " << theId << " : " << queryId << std::endl;
|
|
auto pos = theId.find('/');
|
|
if (pos != std::string::npos) {
|
|
// std::cout << "Setting lockedShards for query ID "
|
|
// << queryId << std::endl;
|
|
QueryId qId = triagens::basics::StringUtils::uint64(queryId);
|
|
TRI_vocbase_t* vocbase = query->vocbase();
|
|
Query* q = queryRegistry->open(vocbase, qId);
|
|
q->engine()->setLockedShards(new std::unordered_set<std::string>(*engine->_lockedShards));
|
|
queryRegistry->close(vocbase, qId);
|
|
// std::cout << "Setting lockedShards done." << std::endl;
|
|
}
|
|
}
|
|
// Now lock them all in the right order:
|
|
for (auto& p : forLocking) {
|
|
std::string const& shardId = p.first;
|
|
std::string const& queryId = p.second;
|
|
// Lock shard on DBserver:
|
|
triagens::arango::CoordTransactionID coordTransactionID
|
|
= TRI_NewTickServer();
|
|
auto cc = triagens::arango::ClusterComm::instance();
|
|
TRI_vocbase_t* vocbase = query->vocbase();
|
|
std::string const url("/_db/"
|
|
+ triagens::basics::StringUtils::urlEncode(vocbase->_name)
|
|
+ "/_api/aql/lock/" + queryId);
|
|
std::map<std::string, std::string> headers;
|
|
auto res = cc->syncRequest("", coordTransactionID,
|
|
"shard:" + shardId,
|
|
triagens::rest::HttpRequest::HTTP_REQUEST_PUT, url, "{}",
|
|
headers, 30.0);
|
|
if (res->status != CL_COMM_SENT) {
|
|
std::string message("could not lock all shards");
|
|
if (res->errorMessage.length() > 0) {
|
|
message += std::string(" : ") + res->errorMessage;
|
|
}
|
|
delete res;
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(
|
|
TRI_ERROR_QUERY_COLLECTION_LOCK_FAILED,
|
|
message);
|
|
}
|
|
delete res;
|
|
}
|
|
Transaction::_makeNolockHeaders = engine->_lockedShards;
|
|
}
|
|
catch (...) {
|
|
// We need to destroy all queries that we have built and stuffed
|
|
// into the QueryRegistry as well as those that we have pushed to
|
|
// the DBservers via HTTP:
|
|
TRI_vocbase_t* vocbase = query->vocbase();
|
|
for (auto& q : inst.get()->queryIds) {
|
|
std::string theId = q.first;
|
|
std::string queryId = q.second;
|
|
auto pos = theId.find(':');
|
|
if (pos != std::string::npos) {
|
|
// So this is a remote one on a DBserver:
|
|
std::string shardId = theId.substr(pos+1);
|
|
// Remove query from DBserver:
|
|
triagens::arango::CoordTransactionID coordTransactionID
|
|
= TRI_NewTickServer();
|
|
auto cc = triagens::arango::ClusterComm::instance();
|
|
if (queryId.back() == '*') {
|
|
queryId.pop_back();
|
|
}
|
|
std::string const url("/_db/"
|
|
+ triagens::basics::StringUtils::urlEncode(vocbase->_name)
|
|
+ "/_api/aql/shutdown/" + queryId);
|
|
std::map<std::string, std::string> headers;
|
|
auto res = cc->syncRequest("", coordTransactionID,
|
|
"shard:" + shardId,
|
|
triagens::rest::HttpRequest::HTTP_REQUEST_PUT, url,
|
|
"{\"code\": 0}", headers, 30.0);
|
|
// Ignore result, we need to try to remove all.
|
|
// However, log the incident if we have an errormessage.
|
|
if (res->errorMessage.length() > 0) {
|
|
std::string msg("while trying to unregister query ");
|
|
msg += queryId +
|
|
std::string("from shard: ") +
|
|
shardId +
|
|
std::string("communication failed: ") +
|
|
res->errorMessage;
|
|
LOG_WARNING("%s", msg.c_str());
|
|
}
|
|
delete res;
|
|
}
|
|
else {
|
|
// Remove query from registry:
|
|
try {
|
|
queryRegistry->destroy(vocbase,
|
|
triagens::basics::StringUtils::uint64(queryId),
|
|
TRI_ERROR_INTERNAL);
|
|
}
|
|
catch (...) {
|
|
// Ignore problems
|
|
}
|
|
}
|
|
}
|
|
throw;
|
|
}
|
|
}
|
|
else {
|
|
// instanciate the engine on a local server
|
|
engine = new ExecutionEngine(query);
|
|
std::unique_ptr<Instanciator> inst(new Instanciator(engine));
|
|
plan->root()->walk(inst.get());
|
|
root = inst.get()->root;
|
|
}
|
|
|
|
TRI_ASSERT(root != nullptr);
|
|
engine->_root = root;
|
|
root->initialize();
|
|
root->initializeCursor(nullptr, 0);
|
|
|
|
return engine;
|
|
}
|
|
catch (...) {
|
|
if (! triagens::arango::ServerState::instance()->isCoordinator()) {
|
|
delete engine;
|
|
}
|
|
throw;
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief add a block to the engine
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void ExecutionEngine::addBlock (ExecutionBlock* block) {
|
|
TRI_ASSERT(block != nullptr);
|
|
|
|
_blocks.push_back(block);
|
|
}
|
|
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- END-OF-FILE
|
|
// -----------------------------------------------------------------------------
|
|
|
|
// Local Variables:
|
|
// mode: outline-minor
|
|
// outline-regexp: "/// @brief\\|/// {@inheritDoc}\\|/// @page\\|// --SECTION--\\|/// @\\}"
|
|
// End:
|