mirror of https://gitee.com/bigwinds/arangodb
Merge branch 'devel' of ssh://github.com/ArangoDB/ArangoDB into devel
This commit is contained in:
commit
ee3216b021
|
@ -129,17 +129,21 @@ bool CleanOutServer::start() const {
|
|||
if (res.accepted && res.indices.size()==1 && res.indices[0]) {
|
||||
|
||||
LOG_TOPIC(INFO, Logger::AGENCY) << "Pending: Clean out server " + _server;
|
||||
LOG(WARN) << __FILE__<<__LINE__ ;
|
||||
|
||||
// Check if we can get things done in the first place
|
||||
if (!checkFeasibility()) {
|
||||
finish("DBServers/" + _server);
|
||||
return false;
|
||||
finish("DBServers/" + _server, false);
|
||||
LOG(WARN) << __FILE__<<__LINE__ ;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
LOG(WARN) << __FILE__<<__LINE__ ;
|
||||
|
||||
|
||||
// Schedule shard relocations
|
||||
scheduleMoveShards();
|
||||
LOG(WARN) << __FILE__<<__LINE__ ;
|
||||
|
||||
return true;
|
||||
|
||||
|
@ -244,9 +248,9 @@ bool CleanOutServer::checkFeasibility () const {
|
|||
}
|
||||
|
||||
LOG_TOPIC(ERR, Logger::AGENCY)
|
||||
<< "Cannot accomodate all shards " << collections.str()
|
||||
<< " with replication factors " << factors.str()
|
||||
<< " after cleaning out server " << _server;
|
||||
<< "Cannot accomodate shards " << collections.str()
|
||||
<< "with replication factors " << factors.str()
|
||||
<< "after cleaning out server " << _server;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -206,22 +206,23 @@ std::vector<check_t> Supervision::checkCoordinators() {
|
|||
return ret;
|
||||
}
|
||||
|
||||
|
||||
bool Supervision::doChecks(bool timedout) {
|
||||
bool Supervision::updateSnapshot() {
|
||||
|
||||
if (_agent == nullptr || this->isStopping()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
_snapshot = _agent->readDB().get(_agencyPrefix);
|
||||
|
||||
LOG_TOPIC(DEBUG, Logger::AGENCY) << "Sanity checks";
|
||||
/*std::vector<check_t> ret = */checkDBServers();
|
||||
checkCoordinators();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Supervision::doChecks(bool timedout) {
|
||||
|
||||
checkDBServers();
|
||||
checkCoordinators();
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
void Supervision::run() {
|
||||
|
||||
CONDITION_LOCKER(guard, _cv);
|
||||
|
@ -256,6 +257,7 @@ void Supervision::run() {
|
|||
}
|
||||
|
||||
// Do supervision
|
||||
updateSnapshot();
|
||||
doChecks(timedout);
|
||||
workJobs();
|
||||
|
||||
|
@ -267,36 +269,45 @@ void Supervision::workJobs() {
|
|||
|
||||
Node::Children const& todos = _snapshot(toDoPrefix).children();
|
||||
Node::Children const& pends = _snapshot(pendingPrefix).children();
|
||||
|
||||
if (!todos.empty()) {
|
||||
for (auto const& todoEnt : todos) {
|
||||
Node const& job = *todoEnt.second;
|
||||
LOG(WARN) << __FILE__<<__LINE__ << job.toJson();
|
||||
|
||||
std::string jobType = job("type").getString(),
|
||||
jobId = job("jobId").getString(),
|
||||
creator = job("creator").getString();
|
||||
if (jobType == "failedServer") {
|
||||
FailedServer fs(_snapshot, _agent, jobId, creator, _agencyPrefix);
|
||||
} else if (jobType == "cleanOutServer") {
|
||||
CleanOutServer cos(_snapshot, _agent, jobId, creator, _agencyPrefix);
|
||||
try {
|
||||
std::string jobType = job("type").getString(),
|
||||
jobId = job("jobId").getString(),
|
||||
creator = job("creator").getString();
|
||||
if (jobType == "failedServer") {
|
||||
FailedServer fs(_snapshot, _agent, jobId, creator, _agencyPrefix);
|
||||
} else if (jobType == "cleanOutServer") {
|
||||
CleanOutServer cos(_snapshot, _agent, jobId, creator, _agencyPrefix);
|
||||
}
|
||||
} catch (std::exception const& e) {
|
||||
LOG_TOPIC(ERR, Logger::AGENCY) << e.what() << " " << __FILE__ << __LINE__;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!pends.empty()) {
|
||||
for (auto const& pendEnt : pends) {
|
||||
Node const& job = *pendEnt.second;
|
||||
LOG(WARN) << __FILE__<<__LINE__ << job.toJson();
|
||||
|
||||
std::string jobType = job("type").getString(),
|
||||
jobId = job("jobId").getString(),
|
||||
creator = job("creator").getString();
|
||||
if (jobType == "failedServer") {
|
||||
FailedServer fs(_snapshot, _agent, jobId, creator, _agencyPrefix);
|
||||
} else if (jobType == "cleanOutServer") {
|
||||
CleanOutServer cos(_snapshot, _agent, jobId, creator, _agencyPrefix);
|
||||
try {
|
||||
std::string jobType = job("type").getString(),
|
||||
jobId = job("jobId").getString(),
|
||||
creator = job("creator").getString();
|
||||
if (jobType == "failedServer") {
|
||||
FailedServer fs(_snapshot, _agent, jobId, creator, _agencyPrefix);
|
||||
} else if (jobType == "cleanOutServer") {
|
||||
CleanOutServer cos(_snapshot, _agent, jobId, creator, _agencyPrefix);
|
||||
}
|
||||
} catch (std::exception const& e) {
|
||||
LOG_TOPIC(ERR, Logger::AGENCY) << e.what() << " " << __FILE__ << __LINE__;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Start thread
|
||||
|
|
|
@ -141,6 +141,9 @@ class Supervision : public arangodb::Thread {
|
|||
/// @brief Perform sanity checking
|
||||
bool doChecks(bool);
|
||||
|
||||
/// @brief update my local agency snapshot
|
||||
bool updateSnapshot();
|
||||
|
||||
Agent* _agent; /**< @brief My agent */
|
||||
Node _snapshot;
|
||||
|
||||
|
|
|
@ -196,7 +196,7 @@ actions.defineHttp({
|
|||
var DBserver = req.parameters.DBserver;
|
||||
var coord = { coordTransactionID: ArangoClusterInfo.uniqid() };
|
||||
var options = { coordTransactionID: coord.coordTransactionID, timeout:10 };
|
||||
var op = ArangoClusterComm.asyncRequest("GET","server:"+local,"_system",
|
||||
var op = ArangoClusterComm.asyncRequest("GET","server:"+DBserver,"_system",
|
||||
"/_admin/statistics","",{},options);
|
||||
var r = ArangoClusterComm.wait(op);
|
||||
res.contentType = "application/json; charset=utf-8";
|
||||
|
|
|
@ -228,6 +228,10 @@ void ApplicationServer::beginShutdown() {
|
|||
// to run method
|
||||
}
|
||||
|
||||
void ApplicationServer::shutdownFatalError() {
|
||||
reportServerProgress(ServerState::ABORT);
|
||||
}
|
||||
|
||||
VPackBuilder ApplicationServer::options(
|
||||
std::unordered_set<std::string> const& excludes) const {
|
||||
return _options->toVPack(false, excludes);
|
||||
|
|
|
@ -44,7 +44,8 @@ enum class ServerState {
|
|||
IN_START,
|
||||
IN_WAIT,
|
||||
IN_STOP,
|
||||
STOPPED
|
||||
STOPPED,
|
||||
ABORT
|
||||
};
|
||||
|
||||
class ProgressHandler {
|
||||
|
@ -191,6 +192,9 @@ class ApplicationServer {
|
|||
// signal the server to shut down
|
||||
void beginShutdown();
|
||||
|
||||
// report that we are going down by fatal error
|
||||
void shutdownFatalError();
|
||||
|
||||
// return VPack options
|
||||
VPackBuilder options(std::unordered_set<std::string> const& excludes) const;
|
||||
|
||||
|
|
|
@ -67,6 +67,12 @@ static std::string FriendlyServiceName = "ArangoDB - the multi-model database";
|
|||
|
||||
static SERVICE_STATUS_HANDLE ServiceStatus;
|
||||
|
||||
void reportServiceAborted(void) {
|
||||
if (ArangoInstance != nullptr && ArangoInstance->_server != nullptr) {
|
||||
ArangoInstance->_server->beginShutdown();
|
||||
}
|
||||
}
|
||||
|
||||
// So we have a valid minidump area during startup:
|
||||
void WindowsServiceFeature::StartArangoService (bool WaitForRunning) {
|
||||
TRI_ERRORBUF;
|
||||
|
@ -290,6 +296,71 @@ void WindowsServiceFeature::installService() {
|
|||
CloseServiceHandle(schService);
|
||||
}
|
||||
|
||||
void WindowsServiceFeature::DeleteService (bool force) {
|
||||
CHAR path[MAX_PATH] = "";
|
||||
|
||||
if (! GetModuleFileNameA(nullptr, path, MAX_PATH)) {
|
||||
std::cerr << "FATAL: GetModuleFileNameA failed" << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
std::cout << "INFO: removing service '" << ServiceName << "'" << std::endl;
|
||||
|
||||
SC_HANDLE schSCManager = OpenSCManager(nullptr, SERVICES_ACTIVE_DATABASE, SC_MANAGER_ALL_ACCESS);
|
||||
|
||||
if (schSCManager == 0) {
|
||||
std::cerr << "FATAL: OpenSCManager failed with " << GetLastError() << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
SC_HANDLE schService = OpenServiceA(
|
||||
schSCManager, // SCManager database
|
||||
ServiceName.c_str(), // name of service
|
||||
DELETE|SERVICE_QUERY_CONFIG); // first validate whether its us, then delete.
|
||||
|
||||
char serviceConfigMemory[8192]; // msdn says: 8k is enough.
|
||||
DWORD bytesNeeded = 0;
|
||||
if (QueryServiceConfig(schService,
|
||||
(LPQUERY_SERVICE_CONFIGA)&serviceConfigMemory,
|
||||
sizeof(serviceConfigMemory),
|
||||
&bytesNeeded)) {
|
||||
QUERY_SERVICE_CONFIG *cfg = (QUERY_SERVICE_CONFIG*) &serviceConfigMemory;
|
||||
|
||||
std::string command = std::string("\"") + std::string(path) + std::string("\" --start-service");
|
||||
if (strcmp(cfg->lpBinaryPathName, command.c_str())) {
|
||||
if (! force) {
|
||||
std::cerr << "NOT removing service of other installation: " <<
|
||||
cfg->lpBinaryPathName <<
|
||||
" Our path is: " <<
|
||||
path << std::endl;
|
||||
|
||||
CloseServiceHandle(schSCManager);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
std::cerr << "Removing service of other installation because of FORCE: " <<
|
||||
cfg->lpBinaryPathName <<
|
||||
"Our path is: " <<
|
||||
path << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CloseServiceHandle(schSCManager);
|
||||
|
||||
if (schService == 0) {
|
||||
std::cerr << "FATAL: OpenServiceA failed with " << GetLastError() << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (! DeleteService(schService)) {
|
||||
std::cerr << "FATAL: DeleteService failed with " << GetLastError() << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
CloseServiceHandle(schService);
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief flips the status for a service
|
||||
|
@ -357,6 +428,24 @@ void WindowsServiceFeature::shutDownBegins () {
|
|||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief wrap ArangoDB server so we can properly emmit a status on shutdown
|
||||
/// starting
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
void WindowsServiceFeature::shutDownComplete () {
|
||||
// startup finished - signalize we're running.
|
||||
SetServiceStatus(SERVICE_STOPPED, NO_ERROR, 0, 0);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief wrap ArangoDB server so we can properly emmit a status on shutdown
|
||||
/// starting
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
void WindowsServiceFeature::shutDownFailure () {
|
||||
// startup finished - signalize we're running.
|
||||
SetServiceStatus(SERVICE_STOP, ERROR_FAIL_RESTART, 0, 0);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief service control handler
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -467,15 +556,15 @@ void WindowsServiceFeature::collectOptions(std::shared_ptr<ProgramOptions> optio
|
|||
}
|
||||
|
||||
void WindowsServiceFeature::validateOptions(std::shared_ptr<ProgramOptions> options) {
|
||||
if (_startAsService) {
|
||||
}
|
||||
else if (_installService) {
|
||||
|
||||
if (_installService) {
|
||||
installService();
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
else if (_unInstallService) {
|
||||
}
|
||||
|
||||
else if (_forceUninstall) {
|
||||
}
|
||||
else if (_startAsService) {
|
||||
ProgressHandler reporter{
|
||||
[this](ServerState state) {
|
||||
|
|
|
@ -36,6 +36,7 @@ class WindowsServiceFeature final : public application_features::ApplicationFeat
|
|||
|
||||
private:
|
||||
void installService();
|
||||
void DeleteService (bool force);
|
||||
void StartArangoService (bool WaitForRunning);
|
||||
void StopArangoService (bool WaitForShutdown);
|
||||
void startupProgress ();
|
||||
|
@ -43,6 +44,8 @@ class WindowsServiceFeature final : public application_features::ApplicationFeat
|
|||
void startupFinished ();
|
||||
|
||||
void shutDownBegins ();
|
||||
void shutDownComplete ();
|
||||
void shutDownFailure ();
|
||||
|
||||
public:
|
||||
bool _installService = false;
|
||||
|
|
|
@ -619,7 +619,16 @@ void ADB_WindowsEntryFunction() {
|
|||
TRI_Application_Exit_SetExit(ADB_WindowsExitFunction);
|
||||
}
|
||||
|
||||
TRI_serviceAboert_t serviceAbort = nullptr;
|
||||
|
||||
void TRI_SetWindowsServiceAbortFunction(TRI_serviceAboert_t f) {
|
||||
serviceAbort = f;
|
||||
}
|
||||
|
||||
void ADB_WindowsExitFunction(int exitCode, void* data) {
|
||||
if (serviceAbort != nullptr) {
|
||||
serviceAbort();
|
||||
}
|
||||
int res = finalizeWindows(TRI_WIN_FINAL_WSASTARTUP_FUNCTION_CALL, 0);
|
||||
|
||||
if (res != 0) {
|
||||
|
|
|
@ -108,6 +108,10 @@ int TRI_MapSystemError(DWORD);
|
|||
bool TRI_InitWindowsEventLog(void);
|
||||
void TRI_CloseWindowsEventlog(void);
|
||||
|
||||
typedef void (*TRI_serviceAboert_t)(void);
|
||||
|
||||
void TRI_SetWindowsServiceAbortFunction(TRI_serviceAboert_t);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief logs a message to the windows event log.
|
||||
/// we rather are keen on logging something at all then on being able to work
|
||||
|
|
Loading…
Reference in New Issue