1
0
Fork 0

releveling logging in maintenance module (#7925)

This commit is contained in:
Kaveh Vahedipour 2019-01-10 12:15:48 +01:00 committed by Max Neunhöffer
parent 4ca7a3bc09
commit 7b37922f92
3 changed files with 33 additions and 28 deletions

View File

@ -82,6 +82,8 @@ v3.4.2 (XXXX-XX-XX)
* Improve single threaded performance by scheduler optimization. * Improve single threaded performance by scheduler optimization.
* Releveling logging in maintenance
v3.4.1 (2018-12-19) v3.4.1 (2018-12-19)
------------------- -------------------

View File

@ -111,7 +111,7 @@ SynchronizeShard::SynchronizeShard(MaintenanceFeature& feature, ActionDescriptio
TRI_ASSERT(desc.has(SHARD_VERSION)); TRI_ASSERT(desc.has(SHARD_VERSION));
if (!error.str().empty()) { if (!error.str().empty()) {
LOG_TOPIC(ERR, Logger::MAINTENANCE) << "SynchronizeShard: " << error.str(); LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "SynchronizeShard: " << error.str();
_result.reset(TRI_ERROR_INTERNAL, error.str()); _result.reset(TRI_ERROR_INTERNAL, error.str());
setState(FAILED); setState(FAILED);
} }
@ -187,7 +187,7 @@ static arangodb::Result collectionCount(std::shared_ptr<arangodb::LogicalCollect
Result res = trx.begin(); Result res = trx.begin();
if (!res.ok()) { if (!res.ok()) {
LOG_TOPIC(ERR, Logger::MAINTENANCE) << "Failed to start count transaction: " << res; LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "Failed to start count transaction: " << res;
return res; return res;
} }
@ -196,7 +196,7 @@ static arangodb::Result collectionCount(std::shared_ptr<arangodb::LogicalCollect
res = trx.finish(opResult.result); res = trx.finish(opResult.result);
if (res.fail()) { if (res.fail()) {
LOG_TOPIC(ERR, Logger::MAINTENANCE) LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
<< "Failed to finish count transaction: " << res; << "Failed to finish count transaction: " << res;
return res; return res;
} }
@ -232,7 +232,7 @@ static arangodb::Result addShardFollower(std::string const& endpoint,
std::string errorMsg( std::string errorMsg(
"SynchronizeShard::addShardFollower: Failed to lookup collection "); "SynchronizeShard::addShardFollower: Failed to lookup collection ");
errorMsg += shard; errorMsg += shard;
LOG_TOPIC(ERR, Logger::MAINTENANCE) << errorMsg; LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << errorMsg;
return arangodb::Result(TRI_ERROR_ARANGO_DATA_SOURCE_NOT_FOUND, errorMsg); return arangodb::Result(TRI_ERROR_ARANGO_DATA_SOURCE_NOT_FOUND, errorMsg);
} }
@ -276,7 +276,7 @@ static arangodb::Result addShardFollower(std::string const& endpoint,
if (result == nullptr || result->getHttpReturnCode() != 200) { if (result == nullptr || result->getHttpReturnCode() != 200) {
if (lockJobId != 0) { if (lockJobId != 0) {
errorMessage += comres->stringifyErrorMessage(); errorMessage += comres->stringifyErrorMessage();
LOG_TOPIC(ERR, Logger::MAINTENANCE) << errorMessage; LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << errorMessage;
} else { } else {
errorMessage += "With shortcut (can happen, no problem)."; errorMessage += "With shortcut (can happen, no problem).";
LOG_TOPIC(INFO, Logger::MAINTENANCE) << errorMessage; LOG_TOPIC(INFO, Logger::MAINTENANCE) << errorMessage;
@ -292,7 +292,7 @@ static arangodb::Result addShardFollower(std::string const& endpoint,
errorMsg += database; errorMsg += database;
errorMsg += " exception: "; errorMsg += " exception: ";
errorMsg += e.what(); errorMsg += e.what();
LOG_TOPIC(ERR, Logger::MAINTENANCE) << errorMsg; LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << errorMsg;
return arangodb::Result(TRI_ERROR_ARANGO_DATABASE_NOT_FOUND, errorMsg); return arangodb::Result(TRI_ERROR_ARANGO_DATABASE_NOT_FOUND, errorMsg);
} }
} }
@ -322,7 +322,7 @@ static arangodb::Result cancelReadLockOnLeader(std::string const& endpoint,
if (result == nullptr || result->getHttpReturnCode() != 200) { if (result == nullptr || result->getHttpReturnCode() != 200) {
auto errorMessage = comres->stringifyErrorMessage(); auto errorMessage = comres->stringifyErrorMessage();
LOG_TOPIC(ERR, Logger::MAINTENANCE) LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
<< "cancelReadLockOnLeader: exception caught for " << body.toJson() << "cancelReadLockOnLeader: exception caught for " << body.toJson()
<< ": " << errorMessage; << ": " << errorMessage;
return arangodb::Result(TRI_ERROR_INTERNAL, errorMessage); return arangodb::Result(TRI_ERROR_INTERNAL, errorMessage);
@ -355,7 +355,7 @@ static arangodb::Result cancelBarrier(std::string const& endpoint,
// and is preferable over a segfault. // and is preferable over a segfault.
TRI_ASSERT(comres != nullptr); TRI_ASSERT(comres != nullptr);
if (comres == nullptr) { if (comres == nullptr) {
LOG_TOPIC(ERR, Logger::MAINTENANCE) LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
<< "CancelBarrier: error: syncRequest returned null"; << "CancelBarrier: error: syncRequest returned null";
return arangodb::Result{TRI_ERROR_INTERNAL}; return arangodb::Result{TRI_ERROR_INTERNAL};
} }
@ -365,14 +365,14 @@ static arangodb::Result cancelBarrier(std::string const& endpoint,
if (result == nullptr || if (result == nullptr ||
(result->getHttpReturnCode() != 200 && result->getHttpReturnCode() != 204)) { (result->getHttpReturnCode() != 200 && result->getHttpReturnCode() != 204)) {
std::string errorMessage = comres->stringifyErrorMessage(); std::string errorMessage = comres->stringifyErrorMessage();
LOG_TOPIC(ERR, Logger::MAINTENANCE) << "CancelBarrier: error" << errorMessage; LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "CancelBarrier: error" << errorMessage;
return arangodb::Result(TRI_ERROR_INTERNAL, errorMessage); return arangodb::Result(TRI_ERROR_INTERNAL, errorMessage);
} }
} else { } else {
std::string error( std::string error(
"CancelBarrier: failed to send message to leader : status "); "CancelBarrier: failed to send message to leader : status ");
error += comres->status; error += comres->status;
LOG_TOPIC(ERR, Logger::MAINTENANCE) << error; LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << error;
return arangodb::Result(TRI_ERROR_INTERNAL, error); return arangodb::Result(TRI_ERROR_INTERNAL, error);
} }
@ -478,7 +478,7 @@ arangodb::Result SynchronizeShard::getReadLock(
} }
LOG_TOPIC(ERR, Logger::MAINTENANCE) LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
<< "startReadLockOnLeader: couldn't obtain lock on shard leader giving up"; << "startReadLockOnLeader: couldn't obtain lock on shard leader giving up";
double timeLeft = double timeLeft =
@ -491,7 +491,7 @@ arangodb::Result SynchronizeShard::getReadLock(
clientId, 1, endpoint, rest::RequestType::DELETE_REQ, url, body.toJson(), clientId, 1, endpoint, rest::RequestType::DELETE_REQ, url, body.toJson(),
std::unordered_map<std::string, std::string>(), timeLeft); std::unordered_map<std::string, std::string>(), timeLeft);
if (r->result == nullptr || r->result->getHttpReturnCode() != 200) { if (r->result == nullptr || r->result->getHttpReturnCode() != 200) {
LOG_TOPIC(ERR, Logger::MAINTENANCE) LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
<< "startReadLockOnLeader: cancelation error for shard - " << collection << "startReadLockOnLeader: cancelation error for shard - " << collection
<< " " << r->getErrorCode() << ": " << r->stringifyErrorMessage(); << " " << r->getErrorCode() << ": " << r->stringifyErrorMessage();
} }
@ -507,7 +507,7 @@ arangodb::Result SynchronizeShard::startReadLockOnLeader(
rlid = 0; rlid = 0;
arangodb::Result result = getReadLockId(endpoint, database, clientId, timeout, rlid); arangodb::Result result = getReadLockId(endpoint, database, clientId, timeout, rlid);
if (!result.ok()) { if (!result.ok()) {
LOG_TOPIC(ERR, Logger::MAINTENANCE) << result.errorMessage(); LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << result.errorMessage();
return result; return result;
} else { } else {
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "Got read lock id: " << rlid; LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "Got read lock id: " << rlid;
@ -559,7 +559,7 @@ static arangodb::Result replicationSynchronize(
Result r = syncer->run(configuration._incremental); Result r = syncer->run(configuration._incremental);
if (r.fail()) { if (r.fail()) {
LOG_TOPIC(ERR, Logger::REPLICATION) LOG_TOPIC(DEBUG, Logger::REPLICATION)
<< "initial sync failed for database '" << database << "initial sync failed for database '" << database
<< "': " << r.errorMessage(); << "': " << r.errorMessage();
THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(),
@ -640,7 +640,7 @@ static arangodb::Result replicationSynchronizeCatchup(VPackSlice const& conf, do
} }
if (r.fail()) { if (r.fail()) {
LOG_TOPIC(ERR, Logger::REPLICATION) LOG_TOPIC(DEBUG, Logger::REPLICATION)
<< "syncCollectionFinalize failed: " << r.errorMessage(); << "syncCollectionFinalize failed: " << r.errorMessage();
} }
@ -677,7 +677,7 @@ static arangodb::Result replicationSynchronizeFinalize(VPackSlice const& conf) {
} }
if (r.fail()) { if (r.fail()) {
LOG_TOPIC(ERR, Logger::REPLICATION) LOG_TOPIC(DEBUG, Logger::REPLICATION)
<< "syncCollectionFinalize failed: " << r.errorMessage(); << "syncCollectionFinalize failed: " << r.errorMessage();
} }
@ -690,7 +690,7 @@ bool SynchronizeShard::first() {
std::string shard = _description.get(SHARD); std::string shard = _description.get(SHARD);
std::string leader = _description.get(THE_LEADER); std::string leader = _description.get(THE_LEADER);
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) LOG_TOPIC(INFO, Logger::MAINTENANCE)
<< "SynchronizeShard: synchronizing shard '" << database << "/" << shard << "SynchronizeShard: synchronizing shard '" << database << "/" << shard
<< "' for central '" << database << "/" << planId << "'"; << "' for central '" << database << "/" << planId << "'";
@ -772,7 +772,7 @@ bool SynchronizeShard::first() {
if (collection == nullptr) { if (collection == nullptr) {
std::stringstream error; std::stringstream error;
error << "failed to lookup local shard " << shard; error << "failed to lookup local shard " << shard;
LOG_TOPIC(ERR, Logger::MAINTENANCE) << "SynchronizeOneShard: " << error.str(); LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "SynchronizeOneShard: " << error.str();
_result.reset(TRI_ERROR_ARANGO_DATA_SOURCE_NOT_FOUND, error.str()); _result.reset(TRI_ERROR_ARANGO_DATA_SOURCE_NOT_FOUND, error.str());
return false; return false;
} }
@ -782,7 +782,7 @@ bool SynchronizeShard::first() {
if (!collectionCount(collection, docCount).ok()) { if (!collectionCount(collection, docCount).ok()) {
std::stringstream error; std::stringstream error;
error << "failed to get a count on leader " << shard; error << "failed to get a count on leader " << shard;
LOG_TOPIC(ERR, Logger::MAINTENANCE) << "SynchronizeShard " << error.str(); LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "SynchronizeShard " << error.str();
_result.reset(TRI_ERROR_INTERNAL, error.str()); _result.reset(TRI_ERROR_INTERNAL, error.str());
return false; return false;
} }
@ -879,7 +879,7 @@ bool SynchronizeShard::first() {
std::stringstream error; std::stringstream error;
error << "could not initially synchronize shard " << shard << ": " error << "could not initially synchronize shard " << shard << ": "
<< res.errorMessage(); << res.errorMessage();
LOG_TOPIC(ERR, Logger::MAINTENANCE) << "SynchronizeOneShard: " << error.str(); LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "SynchronizeOneShard: " << error.str();
_result.reset(TRI_ERROR_INTERNAL, error.str()); _result.reset(TRI_ERROR_INTERNAL, error.str());
return false; return false;
} }
@ -897,7 +897,7 @@ bool SynchronizeShard::first() {
<< "shard " << shard << "shard " << shard
<< " seems to be gone from leader, this " << " seems to be gone from leader, this "
"can happen if a collection was dropped during synchronization!"; "can happen if a collection was dropped during synchronization!";
LOG_TOPIC(WARN, Logger::MAINTENANCE) << "SynchronizeOneShard: " << error.str(); LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "SynchronizeOneShard: " << error.str();
_result.reset(TRI_ERROR_INTERNAL, error.str()); _result.reset(TRI_ERROR_INTERNAL, error.str());
return false; return false;
} }
@ -931,7 +931,7 @@ bool SynchronizeShard::first() {
error << "synchronization of"; error << "synchronization of";
AppendShardInformationToMessage(database, shard, planId, startTime, error); AppendShardInformationToMessage(database, shard, planId, startTime, error);
error << " failed: " << e.what(); error << " failed: " << e.what();
LOG_TOPIC(ERR, Logger::MAINTENANCE) << error.str(); LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << error.str();
_result.reset(TRI_ERROR_INTERNAL, e.what()); _result.reset(TRI_ERROR_INTERNAL, e.what());
return false; return false;
} }
@ -939,7 +939,7 @@ bool SynchronizeShard::first() {
} catch (std::exception const& e) { } catch (std::exception const& e) {
// This catches the case that we could not even find the collection // This catches the case that we could not even find the collection
// locally, because the DatabaseGuard constructor threw. // locally, because the DatabaseGuard constructor threw.
LOG_TOPIC(WARN, Logger::MAINTENANCE) LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
<< "action " << _description << " failed with exception " << e.what(); << "action " << _description << " failed with exception " << e.what();
_result.reset(TRI_ERROR_INTERNAL, e.what()); _result.reset(TRI_ERROR_INTERNAL, e.what());
return false; return false;
@ -1046,7 +1046,7 @@ ResultT<TRI_voc_tick_t> SynchronizeShard::catchupWithReadLock(
} }
} }
if (didTimeout) { if (didTimeout) {
LOG_TOPIC(WARN, Logger::MAINTENANCE) LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
<< "Could not catchup under softLock for " << shard << " on leader: " << leader << "Could not catchup under softLock for " << shard << " on leader: " << leader
<< " now activating hardLock. This is expected under high load."; << " now activating hardLock. This is expected under high load.";
} }
@ -1127,9 +1127,12 @@ Result SynchronizeShard::catchupWithExclusiveLock(
void SynchronizeShard::setState(ActionState state) { void SynchronizeShard::setState(ActionState state) {
if ((COMPLETE == state || FAILED == state) && _state != state) { if ((COMPLETE == state || FAILED == state) && _state != state) {
TRI_ASSERT(_description.has("shard")); auto const& shard = _description.get("shard");
_feature.incShardVersion(_description.get("shard")); if (COMPLETE == state) {
LOG_TOPIC(INFO, Logger::MAINTENANCE)
<< "SynchronizeShard: synchronization completed for shard " << shard;
}
_feature.incShardVersion(shard);
} }
ActionBase::setState(state); ActionBase::setState(state);
} }

View File

@ -122,7 +122,7 @@ LogTopic Logger::FLUSH("flush", LogLevel::INFO);
LogTopic Logger::GRAPHS("graphs", LogLevel::INFO); LogTopic Logger::GRAPHS("graphs", LogLevel::INFO);
LogTopic Logger::HEARTBEAT("heartbeat", LogLevel::INFO); LogTopic Logger::HEARTBEAT("heartbeat", LogLevel::INFO);
LogTopic Logger::HTTPCLIENT("httpclient", LogLevel::WARN); LogTopic Logger::HTTPCLIENT("httpclient", LogLevel::WARN);
LogTopic Logger::MAINTENANCE("maintenance", LogLevel::WARN); LogTopic Logger::MAINTENANCE("maintenance", LogLevel::INFO);
LogTopic Logger::MEMORY("memory", LogLevel::WARN); LogTopic Logger::MEMORY("memory", LogLevel::WARN);
LogTopic Logger::MMAP("mmap"); LogTopic Logger::MMAP("mmap");
LogTopic Logger::PERFORMANCE("performance", LogLevel::WARN); LogTopic Logger::PERFORMANCE("performance", LogLevel::WARN);