1
0
Fork 0
arangodb/arangod/Aql/ClusterBlocks.h

432 lines
14 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Max Neunhoeffer
////////////////////////////////////////////////////////////////////////////////
#ifndef ARANGOD_AQL_CLUSTER_BLOCKS_H
#define ARANGOD_AQL_CLUSTER_BLOCKS_H 1
#include "Basics/Common.h"
#include "Aql/ClusterNodes.h"
#include "Aql/ExecutionBlock.h"
#include "Aql/ExecutionNode.h"
#include "Aql/SortRegister.h"
#include "Rest/GeneralRequest.h"
#include <velocypack/Builder.h>
namespace arangodb {
namespace httpclient {
class SimpleHttpResult;
}
namespace transaction {
class Methods;
}
struct ClusterCommResult;
namespace aql {
class AqlItemBlock;
struct Collection;
class ExecutionEngine;
class BlockWithClients : public ExecutionBlock {
public:
BlockWithClients(ExecutionEngine* engine, ExecutionNode const* ep,
std::vector<std::string> const& shardIds);
virtual ~BlockWithClients() {}
public:
/// @brief initializeCursor
std::pair<ExecutionState, Result> initializeCursor(AqlItemBlock* items, size_t pos) override;
/// @brief shutdown
std::pair<ExecutionState, Result> shutdown(int) override;
/// @brief getSome: shouldn't be used, use skipSomeForShard
std::pair<ExecutionState, std::unique_ptr<AqlItemBlock>> getSome(
size_t atMost) override final {
TRI_ASSERT(false);
THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED);
}
/// @brief skipSome: shouldn't be used, use skipSomeForShard
std::pair<ExecutionState, size_t> skipSome(size_t atMost) override final {
TRI_ASSERT(false);
THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED);
}
/// @brief getSomeForShard
std::pair<ExecutionState, std::unique_ptr<AqlItemBlock>> getSomeForShard(
size_t atMost, std::string const& shardId);
/// @brief skipSomeForShard
std::pair<ExecutionState, size_t> skipSomeForShard(
size_t atMost, std::string const& shardId);
/// @brief hasMoreForShard: any more for shard <shardId>?
virtual bool hasMoreForShard(std::string const& shardId) = 0;
virtual ExecutionState getHasMoreStateForShard(std::string const& shardId) = 0;
protected:
/// @brief getOrSkipSomeForShard
virtual std::pair<ExecutionState, arangodb::Result> getOrSkipSomeForShard(
size_t atMost, bool skipping, std::unique_ptr<AqlItemBlock>& result,
size_t& skipped, std::string const& shardId) = 0;
/// @brief getClientId: get the number <clientId> (used internally)
/// corresponding to <shardId>
size_t getClientId(std::string const& shardId);
/// @brief hasMoreForClientId: any more for client <cliendId>?
virtual bool hasMoreForClientId(size_t clientId) = 0;
protected:
/// @brief _shardIdMap: map from shardIds to clientNrs
std::unordered_map<std::string, size_t> _shardIdMap;
/// @brief _nrClients: total number of clients
size_t _nrClients;
private:
bool _wasShutdown;
};
class ScatterBlock final : public BlockWithClients {
public:
ScatterBlock(ExecutionEngine* engine, ScatterNode const* ep,
std::vector<std::string> const& shardIds)
: BlockWithClients(engine, ep, shardIds) {}
/// @brief initializeCursor
std::pair<ExecutionState, Result> initializeCursor(AqlItemBlock* items, size_t pos) override;
/// @brief hasMoreForShard: any more for shard <shardId>?
bool hasMoreForShard(std::string const& shardId) override;
private:
/// @brief getHasMoreStateForClientId: State for client <cliendId>?
ExecutionState getHasMoreStateForClientId(size_t clientId);
/// @brief hasMoreForClientId: any more for client <cliendId>?
bool hasMoreForClientId(size_t clientId) override;
ExecutionState getHasMoreStateForShard(const std::string &shardId) override;
/// @brief getOrSkipSomeForShard
std::pair<ExecutionState, arangodb::Result> getOrSkipSomeForShard(
size_t atMost, bool skipping, std::unique_ptr<AqlItemBlock>& result,
size_t& skipped, std::string const& shardId) override final;
/// @brief _posForClient:
std::vector<std::pair<size_t, size_t>> _posForClient;
};
class DistributeBlock final : public BlockWithClients {
public:
DistributeBlock(ExecutionEngine* engine, DistributeNode const* ep,
std::vector<std::string> const& shardIds,
Collection const* collection);
/// @brief initializeCursor
std::pair<ExecutionState, Result> initializeCursor(AqlItemBlock* items, size_t pos) override;
/// @brief hasMoreForShard: any more for shard <shardId>?
bool hasMoreForShard(std::string const& shardId) override;
private:
/// @brief hasMoreForClientId: any more for client <cliendId>?
bool hasMoreForClientId(size_t clientId) override;
/// @brief getHasMoreStateForClientId: State for client <cliendId>?
ExecutionState getHasMoreStateForClientId(size_t clientId);
/// @brief getOrSkipSomeForShard
std::pair<ExecutionState, arangodb::Result> getOrSkipSomeForShard(
size_t atMost, bool skipping, std::unique_ptr<AqlItemBlock>& result,
size_t& skipped, std::string const& shardId) override final;
/// @brief getBlockForClient: try to get at atMost pairs into
/// _distBuffer.at(clientId).
std::pair<ExecutionState, bool> getBlockForClient(size_t atMost, size_t clientId);
ExecutionState getHasMoreStateForShard(const std::string &shardId) override;
/// @brief sendToClient: for each row of the incoming AqlItemBlock use the
/// attributes <shardKeys> of the register <id> to determine to which shard
/// the
/// row should be sent.
size_t sendToClient(AqlItemBlock*);
/// @brief create a new document key
std::string createKey(arangodb::velocypack::Slice) const;
// a reusable Builder object for building _key values
arangodb::velocypack::Builder _keyBuilder;
// a reusable Builder object for building document objects
arangodb::velocypack::Builder _objectBuilder;
/// @brief _distBuffer.at(i) is a deque containing pairs (j,k) such that
// _buffer.at(j) row k should be sent to the client with id = i.
std::vector<std::deque<std::pair<size_t, size_t>>> _distBuffer;
/// @brief _colectionName: the name of the sharded collection
Collection const* _collection;
/// @brief _index: the block in _buffer we are currently considering
size_t _index;
/// @brief _regId: the register to inspect
RegisterId _regId;
/// @brief a second register to inspect (used only for UPSERT nodes at the
/// moment to distinguish between search and insert)
RegisterId _alternativeRegId;
/// @brief whether or not the collection uses the default sharding
bool _usesDefaultSharding;
/// @brief allow specified keys even in non-default sharding case
bool _allowSpecifiedKeys;
};
class RemoteBlock final : public ExecutionBlock {
/// @brief constructors/destructors
public:
RemoteBlock(ExecutionEngine* engine, RemoteNode const* en,
std::string const& server, std::string const& ownName,
std::string const& queryId);
/// @brief timeout
static double const defaultTimeOut;
/// @brief initializeCursor, could be called multiple times
std::pair<ExecutionState, Result> initializeCursor(AqlItemBlock* items, size_t pos) override;
/// @brief shutdown, will be called exactly once for the whole query
std::pair<ExecutionState, Result> shutdown(int) override final;
/// @brief getSome
std::pair<ExecutionState, std::unique_ptr<AqlItemBlock>> getSome(size_t atMost) override final;
/// @brief skipSome
std::pair<ExecutionState, size_t> skipSome(size_t atMost) override final;
/// @brief handleAsyncResult
bool handleAsyncResult(ClusterCommResult* result) override;
private:
/**
* @brief Handle communication errors in Async case.
*
* @param result The network response we got from cluster comm.
*
* @return A wrapped Result Object, that is either ok() or contains
* the error information to be thrown in get/skip some.
*/
arangodb::Result handleCommErrors(ClusterCommResult* result) const;
/// @brief internal method to send a request. Will register a callback to be reactivated
arangodb::Result sendAsyncRequest(
rest::RequestType type, std::string const& urlPart,
std::shared_ptr<std::string const> body);
std::shared_ptr<velocypack::Builder> stealResultBody();
/// @brief our server, can be like "shard:S1000" or like "server:Claus"
std::string const _server;
/// @brief our own identity, in case of the coordinator this is empty,
/// in case of the DBservers, this is the shard ID as a string
std::string const _ownName;
/// @brief the ID of the query on the server as a string
std::string const _queryId;
/// @brief whether or not this block will forward initialize,
/// initializeCursor or shutDown requests
bool const _isResponsibleForInitializeCursor;
/// @brief the last unprocessed result. Make sure to reset it
/// after it is processed.
std::shared_ptr<httpclient::SimpleHttpResult> _lastResponse;
/// @brief the last remote response Result object, may contain an error.
arangodb::Result _lastError;
};
////////////////////////////////////////////////////////////////////////////////
/// @class UnsortingGatherBlock
/// @brief Execution block for gathers without order
////////////////////////////////////////////////////////////////////////////////
class UnsortingGatherBlock final : public ExecutionBlock {
public:
UnsortingGatherBlock(ExecutionEngine& engine, GatherNode const& en)
: ExecutionBlock(&engine, &en) {
TRI_ASSERT(en.elements().empty());
}
/// @brief initializeCursor
std::pair<ExecutionState, arangodb::Result> initializeCursor(AqlItemBlock* items, size_t pos) override final;
/// @brief getSome
std::pair<ExecutionState, std::unique_ptr<AqlItemBlock>> getSome(
size_t atMost) override final;
/// @brief skipSome
std::pair<ExecutionState, size_t> skipSome(size_t atMost) override final;
private:
/// @brief _atDep: currently pulling blocks from _dependencies.at(_atDep),
size_t _atDep{};
}; // UnsortingGatherBlock
////////////////////////////////////////////////////////////////////////////////
/// @struct SortingStrategy
////////////////////////////////////////////////////////////////////////////////
struct SortingStrategy {
typedef std::pair<
size_t, // dependency index
size_t // position within a dependecy
> ValueType;
virtual ~SortingStrategy() = default;
/// @brief returns next value
virtual ValueType nextValue() = 0;
/// @brief prepare strategy fetching values
virtual void prepare(std::vector<ValueType>& /*blockPos*/) { }
/// @brief resets strategy state
virtual void reset() = 0;
}; // SortingStrategy
////////////////////////////////////////////////////////////////////////////////
/// @class SortingGatherBlock
/// @brief Execution block for gathers with order
////////////////////////////////////////////////////////////////////////////////
class SortingGatherBlock final : public ExecutionBlock {
public:
SortingGatherBlock(
ExecutionEngine& engine,
GatherNode const& en
);
~SortingGatherBlock();
/// @brief initializeCursor
std::pair<ExecutionState, arangodb::Result> initializeCursor(AqlItemBlock* items, size_t pos) override final;
/// @brief getSome
std::pair<ExecutionState, std::unique_ptr<AqlItemBlock>> getSome(
size_t atMost) override final;
/// @brief skipSome
std::pair<ExecutionState, size_t> skipSome(size_t atMost) override final;
private:
void clearBuffers() noexcept;
/**
* @brief Fills all _gatherBlockBuffer entries. Is repeatable during WAITING.
*
*
* @param atMost The amount of data requested per block.
*
* @return Will return {WAITING, 0} if it had to request new data from upstream.
* If everything is in place: all buffers are either filled, or the upstream
* block is DONE. Will return {DONE, SUM(_gatherBlockBuffer)} on success.
*/
std::pair<ExecutionState, size_t> fillBuffers(size_t atMost);
/// @brief getBlock: from dependency i into _gatherBlockBuffer.at(i),
/// non-simple case only
std::pair<ExecutionState, bool> getBlocks(size_t i, size_t atMost);
/// @brief Updates _gatherBlockBuffer and _gatherBlockPos so they point to the
/// next row.
void nextRow(size_t i);
/// @brief Calculates and returns the number of available rows in buffer i.
size_t availableRows(size_t i) const;
private:
/// @brief _gatherBlockBuffer: buffer the incoming block from each dependency
/// separately
std::vector<std::deque<AqlItemBlock*>> _gatherBlockBuffer;
/// @brief _gatherBlockPos: pairs (i, _pos in _buffer.at(i)), i.e. the same as
/// the usual _pos but one pair per dependency
std::vector<std::pair<size_t, size_t>> _gatherBlockPos;
/// @brief sort elements for this block
std::vector<SortRegister> _sortRegisters;
/// @brief sorting strategy
std::unique_ptr<SortingStrategy> _strategy;
}; // SortingGatherBlock
class SingleRemoteOperationBlock final : public ExecutionBlock {
/// @brief constructors/destructors
private:
bool getOne(arangodb::aql::AqlItemBlock* aqlres,
size_t outputCounter);
public:
SingleRemoteOperationBlock(ExecutionEngine* engine,
SingleRemoteOperationNode const* en
);
/// @brief timeout
static double const defaultTimeOut;
/// @brief getSome
std::pair<ExecutionState, std::unique_ptr<AqlItemBlock>> getSome(
size_t atMost) override final;
/// @brief skipSome
std::pair<ExecutionState, size_t> skipSome(size_t atMost) override final;
private:
/// @brief _colectionName: the name of the sharded collection
Collection const* _collection;
/// @brief the key of the document to fetch
std::string const _key;
};
} // namespace arangodb::aql
} // namespace arangodb
#endif