mirror of https://gitee.com/bigwinds/arangodb
releveling logging in maintenance module (#7925)
This commit is contained in:
parent
4ca7a3bc09
commit
7b37922f92
|
@ -82,6 +82,8 @@ v3.4.2 (XXXX-XX-XX)
|
||||||
|
|
||||||
* Improve single threaded performance by scheduler optimization.
|
* Improve single threaded performance by scheduler optimization.
|
||||||
|
|
||||||
|
* Releveling logging in maintenance
|
||||||
|
|
||||||
v3.4.1 (2018-12-19)
|
v3.4.1 (2018-12-19)
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
|
|
|
@ -111,7 +111,7 @@ SynchronizeShard::SynchronizeShard(MaintenanceFeature& feature, ActionDescriptio
|
||||||
TRI_ASSERT(desc.has(SHARD_VERSION));
|
TRI_ASSERT(desc.has(SHARD_VERSION));
|
||||||
|
|
||||||
if (!error.str().empty()) {
|
if (!error.str().empty()) {
|
||||||
LOG_TOPIC(ERR, Logger::MAINTENANCE) << "SynchronizeShard: " << error.str();
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "SynchronizeShard: " << error.str();
|
||||||
_result.reset(TRI_ERROR_INTERNAL, error.str());
|
_result.reset(TRI_ERROR_INTERNAL, error.str());
|
||||||
setState(FAILED);
|
setState(FAILED);
|
||||||
}
|
}
|
||||||
|
@ -187,7 +187,7 @@ static arangodb::Result collectionCount(std::shared_ptr<arangodb::LogicalCollect
|
||||||
|
|
||||||
Result res = trx.begin();
|
Result res = trx.begin();
|
||||||
if (!res.ok()) {
|
if (!res.ok()) {
|
||||||
LOG_TOPIC(ERR, Logger::MAINTENANCE) << "Failed to start count transaction: " << res;
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "Failed to start count transaction: " << res;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -196,7 +196,7 @@ static arangodb::Result collectionCount(std::shared_ptr<arangodb::LogicalCollect
|
||||||
res = trx.finish(opResult.result);
|
res = trx.finish(opResult.result);
|
||||||
|
|
||||||
if (res.fail()) {
|
if (res.fail()) {
|
||||||
LOG_TOPIC(ERR, Logger::MAINTENANCE)
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
|
||||||
<< "Failed to finish count transaction: " << res;
|
<< "Failed to finish count transaction: " << res;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
@ -232,7 +232,7 @@ static arangodb::Result addShardFollower(std::string const& endpoint,
|
||||||
std::string errorMsg(
|
std::string errorMsg(
|
||||||
"SynchronizeShard::addShardFollower: Failed to lookup collection ");
|
"SynchronizeShard::addShardFollower: Failed to lookup collection ");
|
||||||
errorMsg += shard;
|
errorMsg += shard;
|
||||||
LOG_TOPIC(ERR, Logger::MAINTENANCE) << errorMsg;
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << errorMsg;
|
||||||
return arangodb::Result(TRI_ERROR_ARANGO_DATA_SOURCE_NOT_FOUND, errorMsg);
|
return arangodb::Result(TRI_ERROR_ARANGO_DATA_SOURCE_NOT_FOUND, errorMsg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -276,7 +276,7 @@ static arangodb::Result addShardFollower(std::string const& endpoint,
|
||||||
if (result == nullptr || result->getHttpReturnCode() != 200) {
|
if (result == nullptr || result->getHttpReturnCode() != 200) {
|
||||||
if (lockJobId != 0) {
|
if (lockJobId != 0) {
|
||||||
errorMessage += comres->stringifyErrorMessage();
|
errorMessage += comres->stringifyErrorMessage();
|
||||||
LOG_TOPIC(ERR, Logger::MAINTENANCE) << errorMessage;
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << errorMessage;
|
||||||
} else {
|
} else {
|
||||||
errorMessage += "With shortcut (can happen, no problem).";
|
errorMessage += "With shortcut (can happen, no problem).";
|
||||||
LOG_TOPIC(INFO, Logger::MAINTENANCE) << errorMessage;
|
LOG_TOPIC(INFO, Logger::MAINTENANCE) << errorMessage;
|
||||||
|
@ -292,7 +292,7 @@ static arangodb::Result addShardFollower(std::string const& endpoint,
|
||||||
errorMsg += database;
|
errorMsg += database;
|
||||||
errorMsg += " exception: ";
|
errorMsg += " exception: ";
|
||||||
errorMsg += e.what();
|
errorMsg += e.what();
|
||||||
LOG_TOPIC(ERR, Logger::MAINTENANCE) << errorMsg;
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << errorMsg;
|
||||||
return arangodb::Result(TRI_ERROR_ARANGO_DATABASE_NOT_FOUND, errorMsg);
|
return arangodb::Result(TRI_ERROR_ARANGO_DATABASE_NOT_FOUND, errorMsg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -322,7 +322,7 @@ static arangodb::Result cancelReadLockOnLeader(std::string const& endpoint,
|
||||||
|
|
||||||
if (result == nullptr || result->getHttpReturnCode() != 200) {
|
if (result == nullptr || result->getHttpReturnCode() != 200) {
|
||||||
auto errorMessage = comres->stringifyErrorMessage();
|
auto errorMessage = comres->stringifyErrorMessage();
|
||||||
LOG_TOPIC(ERR, Logger::MAINTENANCE)
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
|
||||||
<< "cancelReadLockOnLeader: exception caught for " << body.toJson()
|
<< "cancelReadLockOnLeader: exception caught for " << body.toJson()
|
||||||
<< ": " << errorMessage;
|
<< ": " << errorMessage;
|
||||||
return arangodb::Result(TRI_ERROR_INTERNAL, errorMessage);
|
return arangodb::Result(TRI_ERROR_INTERNAL, errorMessage);
|
||||||
|
@ -355,7 +355,7 @@ static arangodb::Result cancelBarrier(std::string const& endpoint,
|
||||||
// and is preferable over a segfault.
|
// and is preferable over a segfault.
|
||||||
TRI_ASSERT(comres != nullptr);
|
TRI_ASSERT(comres != nullptr);
|
||||||
if (comres == nullptr) {
|
if (comres == nullptr) {
|
||||||
LOG_TOPIC(ERR, Logger::MAINTENANCE)
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
|
||||||
<< "CancelBarrier: error: syncRequest returned null";
|
<< "CancelBarrier: error: syncRequest returned null";
|
||||||
return arangodb::Result{TRI_ERROR_INTERNAL};
|
return arangodb::Result{TRI_ERROR_INTERNAL};
|
||||||
}
|
}
|
||||||
|
@ -365,14 +365,14 @@ static arangodb::Result cancelBarrier(std::string const& endpoint,
|
||||||
if (result == nullptr ||
|
if (result == nullptr ||
|
||||||
(result->getHttpReturnCode() != 200 && result->getHttpReturnCode() != 204)) {
|
(result->getHttpReturnCode() != 200 && result->getHttpReturnCode() != 204)) {
|
||||||
std::string errorMessage = comres->stringifyErrorMessage();
|
std::string errorMessage = comres->stringifyErrorMessage();
|
||||||
LOG_TOPIC(ERR, Logger::MAINTENANCE) << "CancelBarrier: error" << errorMessage;
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "CancelBarrier: error" << errorMessage;
|
||||||
return arangodb::Result(TRI_ERROR_INTERNAL, errorMessage);
|
return arangodb::Result(TRI_ERROR_INTERNAL, errorMessage);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
std::string error(
|
std::string error(
|
||||||
"CancelBarrier: failed to send message to leader : status ");
|
"CancelBarrier: failed to send message to leader : status ");
|
||||||
error += comres->status;
|
error += comres->status;
|
||||||
LOG_TOPIC(ERR, Logger::MAINTENANCE) << error;
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << error;
|
||||||
return arangodb::Result(TRI_ERROR_INTERNAL, error);
|
return arangodb::Result(TRI_ERROR_INTERNAL, error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -478,7 +478,7 @@ arangodb::Result SynchronizeShard::getReadLock(
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_TOPIC(ERR, Logger::MAINTENANCE)
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
|
||||||
<< "startReadLockOnLeader: couldn't obtain lock on shard leader giving up";
|
<< "startReadLockOnLeader: couldn't obtain lock on shard leader giving up";
|
||||||
|
|
||||||
double timeLeft =
|
double timeLeft =
|
||||||
|
@ -491,7 +491,7 @@ arangodb::Result SynchronizeShard::getReadLock(
|
||||||
clientId, 1, endpoint, rest::RequestType::DELETE_REQ, url, body.toJson(),
|
clientId, 1, endpoint, rest::RequestType::DELETE_REQ, url, body.toJson(),
|
||||||
std::unordered_map<std::string, std::string>(), timeLeft);
|
std::unordered_map<std::string, std::string>(), timeLeft);
|
||||||
if (r->result == nullptr || r->result->getHttpReturnCode() != 200) {
|
if (r->result == nullptr || r->result->getHttpReturnCode() != 200) {
|
||||||
LOG_TOPIC(ERR, Logger::MAINTENANCE)
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
|
||||||
<< "startReadLockOnLeader: cancelation error for shard - " << collection
|
<< "startReadLockOnLeader: cancelation error for shard - " << collection
|
||||||
<< " " << r->getErrorCode() << ": " << r->stringifyErrorMessage();
|
<< " " << r->getErrorCode() << ": " << r->stringifyErrorMessage();
|
||||||
}
|
}
|
||||||
|
@ -507,7 +507,7 @@ arangodb::Result SynchronizeShard::startReadLockOnLeader(
|
||||||
rlid = 0;
|
rlid = 0;
|
||||||
arangodb::Result result = getReadLockId(endpoint, database, clientId, timeout, rlid);
|
arangodb::Result result = getReadLockId(endpoint, database, clientId, timeout, rlid);
|
||||||
if (!result.ok()) {
|
if (!result.ok()) {
|
||||||
LOG_TOPIC(ERR, Logger::MAINTENANCE) << result.errorMessage();
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << result.errorMessage();
|
||||||
return result;
|
return result;
|
||||||
} else {
|
} else {
|
||||||
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "Got read lock id: " << rlid;
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "Got read lock id: " << rlid;
|
||||||
|
@ -559,7 +559,7 @@ static arangodb::Result replicationSynchronize(
|
||||||
Result r = syncer->run(configuration._incremental);
|
Result r = syncer->run(configuration._incremental);
|
||||||
|
|
||||||
if (r.fail()) {
|
if (r.fail()) {
|
||||||
LOG_TOPIC(ERR, Logger::REPLICATION)
|
LOG_TOPIC(DEBUG, Logger::REPLICATION)
|
||||||
<< "initial sync failed for database '" << database
|
<< "initial sync failed for database '" << database
|
||||||
<< "': " << r.errorMessage();
|
<< "': " << r.errorMessage();
|
||||||
THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(),
|
THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(),
|
||||||
|
@ -640,7 +640,7 @@ static arangodb::Result replicationSynchronizeCatchup(VPackSlice const& conf, do
|
||||||
}
|
}
|
||||||
|
|
||||||
if (r.fail()) {
|
if (r.fail()) {
|
||||||
LOG_TOPIC(ERR, Logger::REPLICATION)
|
LOG_TOPIC(DEBUG, Logger::REPLICATION)
|
||||||
<< "syncCollectionFinalize failed: " << r.errorMessage();
|
<< "syncCollectionFinalize failed: " << r.errorMessage();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -677,7 +677,7 @@ static arangodb::Result replicationSynchronizeFinalize(VPackSlice const& conf) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (r.fail()) {
|
if (r.fail()) {
|
||||||
LOG_TOPIC(ERR, Logger::REPLICATION)
|
LOG_TOPIC(DEBUG, Logger::REPLICATION)
|
||||||
<< "syncCollectionFinalize failed: " << r.errorMessage();
|
<< "syncCollectionFinalize failed: " << r.errorMessage();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -690,7 +690,7 @@ bool SynchronizeShard::first() {
|
||||||
std::string shard = _description.get(SHARD);
|
std::string shard = _description.get(SHARD);
|
||||||
std::string leader = _description.get(THE_LEADER);
|
std::string leader = _description.get(THE_LEADER);
|
||||||
|
|
||||||
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
|
LOG_TOPIC(INFO, Logger::MAINTENANCE)
|
||||||
<< "SynchronizeShard: synchronizing shard '" << database << "/" << shard
|
<< "SynchronizeShard: synchronizing shard '" << database << "/" << shard
|
||||||
<< "' for central '" << database << "/" << planId << "'";
|
<< "' for central '" << database << "/" << planId << "'";
|
||||||
|
|
||||||
|
@ -772,7 +772,7 @@ bool SynchronizeShard::first() {
|
||||||
if (collection == nullptr) {
|
if (collection == nullptr) {
|
||||||
std::stringstream error;
|
std::stringstream error;
|
||||||
error << "failed to lookup local shard " << shard;
|
error << "failed to lookup local shard " << shard;
|
||||||
LOG_TOPIC(ERR, Logger::MAINTENANCE) << "SynchronizeOneShard: " << error.str();
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "SynchronizeOneShard: " << error.str();
|
||||||
_result.reset(TRI_ERROR_ARANGO_DATA_SOURCE_NOT_FOUND, error.str());
|
_result.reset(TRI_ERROR_ARANGO_DATA_SOURCE_NOT_FOUND, error.str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -782,7 +782,7 @@ bool SynchronizeShard::first() {
|
||||||
if (!collectionCount(collection, docCount).ok()) {
|
if (!collectionCount(collection, docCount).ok()) {
|
||||||
std::stringstream error;
|
std::stringstream error;
|
||||||
error << "failed to get a count on leader " << shard;
|
error << "failed to get a count on leader " << shard;
|
||||||
LOG_TOPIC(ERR, Logger::MAINTENANCE) << "SynchronizeShard " << error.str();
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "SynchronizeShard " << error.str();
|
||||||
_result.reset(TRI_ERROR_INTERNAL, error.str());
|
_result.reset(TRI_ERROR_INTERNAL, error.str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -879,7 +879,7 @@ bool SynchronizeShard::first() {
|
||||||
std::stringstream error;
|
std::stringstream error;
|
||||||
error << "could not initially synchronize shard " << shard << ": "
|
error << "could not initially synchronize shard " << shard << ": "
|
||||||
<< res.errorMessage();
|
<< res.errorMessage();
|
||||||
LOG_TOPIC(ERR, Logger::MAINTENANCE) << "SynchronizeOneShard: " << error.str();
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "SynchronizeOneShard: " << error.str();
|
||||||
_result.reset(TRI_ERROR_INTERNAL, error.str());
|
_result.reset(TRI_ERROR_INTERNAL, error.str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -897,7 +897,7 @@ bool SynchronizeShard::first() {
|
||||||
<< "shard " << shard
|
<< "shard " << shard
|
||||||
<< " seems to be gone from leader, this "
|
<< " seems to be gone from leader, this "
|
||||||
"can happen if a collection was dropped during synchronization!";
|
"can happen if a collection was dropped during synchronization!";
|
||||||
LOG_TOPIC(WARN, Logger::MAINTENANCE) << "SynchronizeOneShard: " << error.str();
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "SynchronizeOneShard: " << error.str();
|
||||||
_result.reset(TRI_ERROR_INTERNAL, error.str());
|
_result.reset(TRI_ERROR_INTERNAL, error.str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -931,7 +931,7 @@ bool SynchronizeShard::first() {
|
||||||
error << "synchronization of";
|
error << "synchronization of";
|
||||||
AppendShardInformationToMessage(database, shard, planId, startTime, error);
|
AppendShardInformationToMessage(database, shard, planId, startTime, error);
|
||||||
error << " failed: " << e.what();
|
error << " failed: " << e.what();
|
||||||
LOG_TOPIC(ERR, Logger::MAINTENANCE) << error.str();
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << error.str();
|
||||||
_result.reset(TRI_ERROR_INTERNAL, e.what());
|
_result.reset(TRI_ERROR_INTERNAL, e.what());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -939,7 +939,7 @@ bool SynchronizeShard::first() {
|
||||||
} catch (std::exception const& e) {
|
} catch (std::exception const& e) {
|
||||||
// This catches the case that we could not even find the collection
|
// This catches the case that we could not even find the collection
|
||||||
// locally, because the DatabaseGuard constructor threw.
|
// locally, because the DatabaseGuard constructor threw.
|
||||||
LOG_TOPIC(WARN, Logger::MAINTENANCE)
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
|
||||||
<< "action " << _description << " failed with exception " << e.what();
|
<< "action " << _description << " failed with exception " << e.what();
|
||||||
_result.reset(TRI_ERROR_INTERNAL, e.what());
|
_result.reset(TRI_ERROR_INTERNAL, e.what());
|
||||||
return false;
|
return false;
|
||||||
|
@ -1046,7 +1046,7 @@ ResultT<TRI_voc_tick_t> SynchronizeShard::catchupWithReadLock(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (didTimeout) {
|
if (didTimeout) {
|
||||||
LOG_TOPIC(WARN, Logger::MAINTENANCE)
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
|
||||||
<< "Could not catchup under softLock for " << shard << " on leader: " << leader
|
<< "Could not catchup under softLock for " << shard << " on leader: " << leader
|
||||||
<< " now activating hardLock. This is expected under high load.";
|
<< " now activating hardLock. This is expected under high load.";
|
||||||
}
|
}
|
||||||
|
@ -1127,9 +1127,12 @@ Result SynchronizeShard::catchupWithExclusiveLock(
|
||||||
|
|
||||||
void SynchronizeShard::setState(ActionState state) {
|
void SynchronizeShard::setState(ActionState state) {
|
||||||
if ((COMPLETE == state || FAILED == state) && _state != state) {
|
if ((COMPLETE == state || FAILED == state) && _state != state) {
|
||||||
TRI_ASSERT(_description.has("shard"));
|
auto const& shard = _description.get("shard");
|
||||||
_feature.incShardVersion(_description.get("shard"));
|
if (COMPLETE == state) {
|
||||||
|
LOG_TOPIC(INFO, Logger::MAINTENANCE)
|
||||||
|
<< "SynchronizeShard: synchronization completed for shard " << shard;
|
||||||
|
}
|
||||||
|
_feature.incShardVersion(shard);
|
||||||
}
|
}
|
||||||
|
|
||||||
ActionBase::setState(state);
|
ActionBase::setState(state);
|
||||||
}
|
}
|
||||||
|
|
|
@ -122,7 +122,7 @@ LogTopic Logger::FLUSH("flush", LogLevel::INFO);
|
||||||
LogTopic Logger::GRAPHS("graphs", LogLevel::INFO);
|
LogTopic Logger::GRAPHS("graphs", LogLevel::INFO);
|
||||||
LogTopic Logger::HEARTBEAT("heartbeat", LogLevel::INFO);
|
LogTopic Logger::HEARTBEAT("heartbeat", LogLevel::INFO);
|
||||||
LogTopic Logger::HTTPCLIENT("httpclient", LogLevel::WARN);
|
LogTopic Logger::HTTPCLIENT("httpclient", LogLevel::WARN);
|
||||||
LogTopic Logger::MAINTENANCE("maintenance", LogLevel::WARN);
|
LogTopic Logger::MAINTENANCE("maintenance", LogLevel::INFO);
|
||||||
LogTopic Logger::MEMORY("memory", LogLevel::WARN);
|
LogTopic Logger::MEMORY("memory", LogLevel::WARN);
|
||||||
LogTopic Logger::MMAP("mmap");
|
LogTopic Logger::MMAP("mmap");
|
||||||
LogTopic Logger::PERFORMANCE("performance", LogLevel::WARN);
|
LogTopic Logger::PERFORMANCE("performance", LogLevel::WARN);
|
||||||
|
|
Loading…
Reference in New Issue