1
0
Fork 0

Merge branch 'devel' of ssh://github.com/ArangoDB/ArangoDB into devel

This commit is contained in:
Max Neunhoeffer 2016-06-03 06:33:06 -06:00
commit ee3216b021
10 changed files with 167 additions and 36 deletions

View File

@ -129,17 +129,21 @@ bool CleanOutServer::start() const {
if (res.accepted && res.indices.size()==1 && res.indices[0]) { if (res.accepted && res.indices.size()==1 && res.indices[0]) {
LOG_TOPIC(INFO, Logger::AGENCY) << "Pending: Clean out server " + _server; LOG_TOPIC(INFO, Logger::AGENCY) << "Pending: Clean out server " + _server;
LOG(WARN) << __FILE__<<__LINE__ ;
// Check if we can get things done in the first place // Check if we can get things done in the first place
if (!checkFeasibility()) { if (!checkFeasibility()) {
finish("DBServers/" + _server); finish("DBServers/" + _server, false);
LOG(WARN) << __FILE__<<__LINE__ ;
return false; return false;
} }
LOG(WARN) << __FILE__<<__LINE__ ;
// Schedule shard relocations // Schedule shard relocations
scheduleMoveShards(); scheduleMoveShards();
LOG(WARN) << __FILE__<<__LINE__ ;
return true; return true;
@ -244,9 +248,9 @@ bool CleanOutServer::checkFeasibility () const {
} }
LOG_TOPIC(ERR, Logger::AGENCY) LOG_TOPIC(ERR, Logger::AGENCY)
<< "Cannot accomodate all shards " << collections.str() << "Cannot accomodate shards " << collections.str()
<< " with replication factors " << factors.str() << "with replication factors " << factors.str()
<< " after cleaning out server " << _server; << "after cleaning out server " << _server;
return false; return false;
} }

View File

@ -206,22 +206,23 @@ std::vector<check_t> Supervision::checkCoordinators() {
return ret; return ret;
} }
bool Supervision::updateSnapshot() {
bool Supervision::doChecks(bool timedout) {
if (_agent == nullptr || this->isStopping()) { if (_agent == nullptr || this->isStopping()) {
return false; return false;
} }
_snapshot = _agent->readDB().get(_agencyPrefix); _snapshot = _agent->readDB().get(_agencyPrefix);
LOG_TOPIC(DEBUG, Logger::AGENCY) << "Sanity checks";
/*std::vector<check_t> ret = */checkDBServers();
checkCoordinators();
return true; return true;
} }
bool Supervision::doChecks(bool timedout) {
checkDBServers();
checkCoordinators();
return true;
}
void Supervision::run() { void Supervision::run() {
CONDITION_LOCKER(guard, _cv); CONDITION_LOCKER(guard, _cv);
@ -256,6 +257,7 @@ void Supervision::run() {
} }
// Do supervision // Do supervision
updateSnapshot();
doChecks(timedout); doChecks(timedout);
workJobs(); workJobs();
@ -267,11 +269,12 @@ void Supervision::workJobs() {
Node::Children const& todos = _snapshot(toDoPrefix).children(); Node::Children const& todos = _snapshot(toDoPrefix).children();
Node::Children const& pends = _snapshot(pendingPrefix).children(); Node::Children const& pends = _snapshot(pendingPrefix).children();
if (!todos.empty()) { if (!todos.empty()) {
for (auto const& todoEnt : todos) { for (auto const& todoEnt : todos) {
Node const& job = *todoEnt.second; Node const& job = *todoEnt.second;
LOG(WARN) << __FILE__<<__LINE__ << job.toJson();
try {
std::string jobType = job("type").getString(), std::string jobType = job("type").getString(),
jobId = job("jobId").getString(), jobId = job("jobId").getString(),
creator = job("creator").getString(); creator = job("creator").getString();
@ -280,13 +283,17 @@ void Supervision::workJobs() {
} else if (jobType == "cleanOutServer") { } else if (jobType == "cleanOutServer") {
CleanOutServer cos(_snapshot, _agent, jobId, creator, _agencyPrefix); CleanOutServer cos(_snapshot, _agent, jobId, creator, _agencyPrefix);
} }
} catch (std::exception const& e) {
LOG_TOPIC(ERR, Logger::AGENCY) << e.what() << " " << __FILE__ << __LINE__;
} }
} }
}
if (!pends.empty()) { if (!pends.empty()) {
for (auto const& pendEnt : pends) { for (auto const& pendEnt : pends) {
Node const& job = *pendEnt.second; Node const& job = *pendEnt.second;
LOG(WARN) << __FILE__<<__LINE__ << job.toJson();
try {
std::string jobType = job("type").getString(), std::string jobType = job("type").getString(),
jobId = job("jobId").getString(), jobId = job("jobId").getString(),
creator = job("creator").getString(); creator = job("creator").getString();
@ -295,8 +302,12 @@ void Supervision::workJobs() {
} else if (jobType == "cleanOutServer") { } else if (jobType == "cleanOutServer") {
CleanOutServer cos(_snapshot, _agent, jobId, creator, _agencyPrefix); CleanOutServer cos(_snapshot, _agent, jobId, creator, _agencyPrefix);
} }
} catch (std::exception const& e) {
LOG_TOPIC(ERR, Logger::AGENCY) << e.what() << " " << __FILE__ << __LINE__;
} }
} }
}
} }
// Start thread // Start thread

View File

@ -141,6 +141,9 @@ class Supervision : public arangodb::Thread {
/// @brief Perform sanity checking /// @brief Perform sanity checking
bool doChecks(bool); bool doChecks(bool);
/// @brief update my local agency snapshot
bool updateSnapshot();
Agent* _agent; /**< @brief My agent */ Agent* _agent; /**< @brief My agent */
Node _snapshot; Node _snapshot;

View File

@ -196,7 +196,7 @@ actions.defineHttp({
var DBserver = req.parameters.DBserver; var DBserver = req.parameters.DBserver;
var coord = { coordTransactionID: ArangoClusterInfo.uniqid() }; var coord = { coordTransactionID: ArangoClusterInfo.uniqid() };
var options = { coordTransactionID: coord.coordTransactionID, timeout:10 }; var options = { coordTransactionID: coord.coordTransactionID, timeout:10 };
var op = ArangoClusterComm.asyncRequest("GET","server:"+local,"_system", var op = ArangoClusterComm.asyncRequest("GET","server:"+DBserver,"_system",
"/_admin/statistics","",{},options); "/_admin/statistics","",{},options);
var r = ArangoClusterComm.wait(op); var r = ArangoClusterComm.wait(op);
res.contentType = "application/json; charset=utf-8"; res.contentType = "application/json; charset=utf-8";

View File

@ -228,6 +228,10 @@ void ApplicationServer::beginShutdown() {
// to run method // to run method
} }
void ApplicationServer::shutdownFatalError() {
reportServerProgress(ServerState::ABORT);
}
VPackBuilder ApplicationServer::options( VPackBuilder ApplicationServer::options(
std::unordered_set<std::string> const& excludes) const { std::unordered_set<std::string> const& excludes) const {
return _options->toVPack(false, excludes); return _options->toVPack(false, excludes);

View File

@ -44,7 +44,8 @@ enum class ServerState {
IN_START, IN_START,
IN_WAIT, IN_WAIT,
IN_STOP, IN_STOP,
STOPPED STOPPED,
ABORT
}; };
class ProgressHandler { class ProgressHandler {
@ -191,6 +192,9 @@ class ApplicationServer {
// signal the server to shut down // signal the server to shut down
void beginShutdown(); void beginShutdown();
// report that we are going down by fatal error
void shutdownFatalError();
// return VPack options // return VPack options
VPackBuilder options(std::unordered_set<std::string> const& excludes) const; VPackBuilder options(std::unordered_set<std::string> const& excludes) const;

View File

@ -67,6 +67,12 @@ static std::string FriendlyServiceName = "ArangoDB - the multi-model database";
static SERVICE_STATUS_HANDLE ServiceStatus; static SERVICE_STATUS_HANDLE ServiceStatus;
void reportServiceAborted(void) {
if (ArangoInstance != nullptr && ArangoInstance->_server != nullptr) {
ArangoInstance->_server->beginShutdown();
}
}
// So we have a valid minidump area during startup: // So we have a valid minidump area during startup:
void WindowsServiceFeature::StartArangoService (bool WaitForRunning) { void WindowsServiceFeature::StartArangoService (bool WaitForRunning) {
TRI_ERRORBUF; TRI_ERRORBUF;
@ -290,6 +296,71 @@ void WindowsServiceFeature::installService() {
CloseServiceHandle(schService); CloseServiceHandle(schService);
} }
void WindowsServiceFeature::DeleteService (bool force) {
CHAR path[MAX_PATH] = "";
if (! GetModuleFileNameA(nullptr, path, MAX_PATH)) {
std::cerr << "FATAL: GetModuleFileNameA failed" << std::endl;
exit(EXIT_FAILURE);
}
std::cout << "INFO: removing service '" << ServiceName << "'" << std::endl;
SC_HANDLE schSCManager = OpenSCManager(nullptr, SERVICES_ACTIVE_DATABASE, SC_MANAGER_ALL_ACCESS);
if (schSCManager == 0) {
std::cerr << "FATAL: OpenSCManager failed with " << GetLastError() << std::endl;
exit(EXIT_FAILURE);
}
SC_HANDLE schService = OpenServiceA(
schSCManager, // SCManager database
ServiceName.c_str(), // name of service
DELETE|SERVICE_QUERY_CONFIG); // first validate whether its us, then delete.
char serviceConfigMemory[8192]; // msdn says: 8k is enough.
DWORD bytesNeeded = 0;
if (QueryServiceConfig(schService,
(LPQUERY_SERVICE_CONFIGA)&serviceConfigMemory,
sizeof(serviceConfigMemory),
&bytesNeeded)) {
QUERY_SERVICE_CONFIG *cfg = (QUERY_SERVICE_CONFIG*) &serviceConfigMemory;
std::string command = std::string("\"") + std::string(path) + std::string("\" --start-service");
if (strcmp(cfg->lpBinaryPathName, command.c_str())) {
if (! force) {
std::cerr << "NOT removing service of other installation: " <<
cfg->lpBinaryPathName <<
" Our path is: " <<
path << std::endl;
CloseServiceHandle(schSCManager);
return;
}
else {
std::cerr << "Removing service of other installation because of FORCE: " <<
cfg->lpBinaryPathName <<
"Our path is: " <<
path << std::endl;
}
}
}
CloseServiceHandle(schSCManager);
if (schService == 0) {
std::cerr << "FATAL: OpenServiceA failed with " << GetLastError() << std::endl;
exit(EXIT_FAILURE);
}
if (! DeleteService(schService)) {
std::cerr << "FATAL: DeleteService failed with " << GetLastError() << std::endl;
exit(EXIT_FAILURE);
}
CloseServiceHandle(schService);
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief flips the status for a service /// @brief flips the status for a service
@ -357,6 +428,24 @@ void WindowsServiceFeature::shutDownBegins () {
} }
//////////////////////////////////////////////////////////////////////////////
/// @brief wrap ArangoDB server so we can properly emmit a status on shutdown
/// starting
//////////////////////////////////////////////////////////////////////////////
void WindowsServiceFeature::shutDownComplete () {
// startup finished - signalize we're running.
SetServiceStatus(SERVICE_STOPPED, NO_ERROR, 0, 0);
}
//////////////////////////////////////////////////////////////////////////////
/// @brief wrap ArangoDB server so we can properly emmit a status on shutdown
/// starting
//////////////////////////////////////////////////////////////////////////////
void WindowsServiceFeature::shutDownFailure () {
// startup finished - signalize we're running.
SetServiceStatus(SERVICE_STOP, ERROR_FAIL_RESTART, 0, 0);
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief service control handler /// @brief service control handler
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -467,15 +556,15 @@ void WindowsServiceFeature::collectOptions(std::shared_ptr<ProgramOptions> optio
} }
void WindowsServiceFeature::validateOptions(std::shared_ptr<ProgramOptions> options) { void WindowsServiceFeature::validateOptions(std::shared_ptr<ProgramOptions> options) {
if (_startAsService) {
} if (_installService) {
else if (_installService) {
installService(); installService();
exit(EXIT_SUCCESS); exit(EXIT_SUCCESS);
} }
else if (_unInstallService) { else if (_unInstallService) {
} }
else if (_forceUninstall) {
}
else if (_startAsService) { else if (_startAsService) {
ProgressHandler reporter{ ProgressHandler reporter{
[this](ServerState state) { [this](ServerState state) {

View File

@ -36,6 +36,7 @@ class WindowsServiceFeature final : public application_features::ApplicationFeat
private: private:
void installService(); void installService();
void DeleteService (bool force);
void StartArangoService (bool WaitForRunning); void StartArangoService (bool WaitForRunning);
void StopArangoService (bool WaitForShutdown); void StopArangoService (bool WaitForShutdown);
void startupProgress (); void startupProgress ();
@ -43,6 +44,8 @@ class WindowsServiceFeature final : public application_features::ApplicationFeat
void startupFinished (); void startupFinished ();
void shutDownBegins (); void shutDownBegins ();
void shutDownComplete ();
void shutDownFailure ();
public: public:
bool _installService = false; bool _installService = false;

View File

@ -619,7 +619,16 @@ void ADB_WindowsEntryFunction() {
TRI_Application_Exit_SetExit(ADB_WindowsExitFunction); TRI_Application_Exit_SetExit(ADB_WindowsExitFunction);
} }
TRI_serviceAboert_t serviceAbort = nullptr;
void TRI_SetWindowsServiceAbortFunction(TRI_serviceAboert_t f) {
serviceAbort = f;
}
void ADB_WindowsExitFunction(int exitCode, void* data) { void ADB_WindowsExitFunction(int exitCode, void* data) {
if (serviceAbort != nullptr) {
serviceAbort();
}
int res = finalizeWindows(TRI_WIN_FINAL_WSASTARTUP_FUNCTION_CALL, 0); int res = finalizeWindows(TRI_WIN_FINAL_WSASTARTUP_FUNCTION_CALL, 0);
if (res != 0) { if (res != 0) {

View File

@ -108,6 +108,10 @@ int TRI_MapSystemError(DWORD);
bool TRI_InitWindowsEventLog(void); bool TRI_InitWindowsEventLog(void);
void TRI_CloseWindowsEventlog(void); void TRI_CloseWindowsEventlog(void);
typedef void (*TRI_serviceAboert_t)(void);
void TRI_SetWindowsServiceAbortFunction(TRI_serviceAboert_t);
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief logs a message to the windows event log. /// @brief logs a message to the windows event log.
/// we rather are keen on logging something at all then on being able to work /// we rather are keen on logging something at all then on being able to work