mirror of https://gitee.com/bigwinds/arangodb
port of replication improvements from 3.4 (#8308)
This commit is contained in:
parent
def7b613db
commit
12e11a5197
|
@ -34,16 +34,30 @@ The result might look like this:
|
|||
{
|
||||
"state" : {
|
||||
"running" : true,
|
||||
"lastLogTick" : "133322013",
|
||||
"totalEvents" : 16,
|
||||
"time" : "2014-07-06T12:58:11Z"
|
||||
"lastLogTick" : "2064735086",
|
||||
"lastUncommittedLogTick" : "2064735086",
|
||||
"totalEvents" : 2064735086,
|
||||
"time" : "2019-03-01T11:38:39Z"
|
||||
},
|
||||
"server" : {
|
||||
"version" : "2.2.0-devel",
|
||||
"serverId" : "40897075811372"
|
||||
"version" : "3.4.4",
|
||||
"serverId" : "135694526467864",
|
||||
"engine" : "rocksdb"
|
||||
},
|
||||
"clients" : {
|
||||
}
|
||||
"clients" : [
|
||||
{
|
||||
"serverId" : "46402312160836",
|
||||
"time" : "2019-03-01T11:38:39Z",
|
||||
"expires" : "2019-03-01T13:38:39Z",
|
||||
"lastServedTick" : "2064459411"
|
||||
},
|
||||
{
|
||||
"serverId" : "260321896124903",
|
||||
"time" : "2019-03-01T11:29:45Z",
|
||||
"expires" : "2019-03-01T13:29:45Z",
|
||||
"lastServedTick" : "2002717896"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -51,10 +65,16 @@ The *running* attribute will always be true. In earlier versions of ArangoDB the
|
|||
replication was optional and this could have been *false*.
|
||||
|
||||
The *totalEvents* attribute indicates how many log events have been logged since
|
||||
the start of the ArangoDB server. Finally, the *lastLogTick* value indicates the
|
||||
_id_ of the last operation that was written to the server's _write-ahead log_.
|
||||
the start of the ArangoDB server. The *lastLogTick* value indicates the _id_ of the
|
||||
last committed operation that was written to the server's _write-ahead log_.
|
||||
It can be used to determine whether new operations were logged, and is also used
|
||||
by the _replication applier_ for incremental fetching of data.
|
||||
by the _replication applier_ for incremental fetching of data. The *lastUncommittedLogTick*
|
||||
value contains the _id_ of the last uncommitted operation that was written to the
|
||||
server's WAL. For the RocksDB storage engine, *lastLogTick* and *lastUncommittedLogTick*
|
||||
are identical, as the WAL only contains committed operations.
|
||||
|
||||
The *clients* attribute reveals which clients (slaves) have connected to the
|
||||
master recently, and up to which tick value they caught up with the replication.
|
||||
|
||||
**Note**: The replication logger state can also be queried via the
|
||||
[HTTP API](../../../../HTTP/Replications/index.html).
|
||||
|
@ -108,41 +128,67 @@ The result might look like this:
|
|||
```js
|
||||
{
|
||||
"state" : {
|
||||
"started" : "2019-03-01T11:36:33Z",
|
||||
"running" : true,
|
||||
"lastAppliedContinuousTick" : "152786205",
|
||||
"lastProcessedContinuousTick" : "152786205",
|
||||
"lastAvailableContinuousTick" : "152786205",
|
||||
"phase" : "running",
|
||||
"lastAppliedContinuousTick" : "2050724544",
|
||||
"lastProcessedContinuousTick" : "2050724544",
|
||||
"lastAvailableContinuousTick" : "2050724546",
|
||||
"safeResumeTick" : "2050694546",
|
||||
"ticksBehind" : 2,
|
||||
"progress" : {
|
||||
"time" : "2014-07-06T13:04:57Z",
|
||||
"message" : "fetching master log from offset 152786205",
|
||||
"time" : "2019-03-01T11:36:33Z",
|
||||
"message" : "fetching master log from tick 2050694546, last scanned tick 2050664547, first regular tick 2050544543, barrier: 0, open transactions: 1, chunk size 6291456",
|
||||
"failedConnects" : 0
|
||||
},
|
||||
"totalRequests" : 38,
|
||||
"totalRequests" : 2,
|
||||
"totalFailedConnects" : 0,
|
||||
"totalEvents" : 1,
|
||||
"totalEvents" : 50010,
|
||||
"totalDocuments" : 50000,
|
||||
"totalRemovals" : 0,
|
||||
"totalResyncs" : 0,
|
||||
"totalOperationsExcluded" : 0,
|
||||
"totalApplyTime" : 1.1071290969848633,
|
||||
"averageApplyTime" : 1.1071290969848633,
|
||||
"totalFetchTime" : 0.2129514217376709,
|
||||
"averageFetchTime" : 0.10647571086883545,
|
||||
"lastError" : {
|
||||
"errorNum" : 0
|
||||
},
|
||||
"time" : "2014-07-06T13:04:57Z"
|
||||
"time" : "2019-03-01T11:36:34Z"
|
||||
},
|
||||
"server" : {
|
||||
"version" : "2.2.0-devel",
|
||||
"serverId" : "210189384542896"
|
||||
"version" : "3.4.4",
|
||||
"serverId" : "46402312160836"
|
||||
},
|
||||
"endpoint" : "tcp://master.example.org:8529",
|
||||
"database" : "_system"
|
||||
"endpoint" : "tcp://master.example.org",
|
||||
"database" : "test"
|
||||
}
|
||||
```
|
||||
|
||||
The *running* attribute indicates whether the _replication applier_ of the current
|
||||
database is currently running and polling the server at *endpoint* for new events.
|
||||
database is currently running and polling the master at *endpoint* for new events.
|
||||
|
||||
The *started* attribute shows at what date and time the applier was started (if at all).
|
||||
|
||||
The *progress.failedConnects* attribute shows how many failed connection attempts
|
||||
the _replication applier_ currently has encountered in a row. In contrast, the
|
||||
*totalFailedConnects* attribute indicates how many failed connection attempts the
|
||||
_applier_ has made in total. The *totalRequests* attribute shows how many requests
|
||||
the _applier_ has sent to the master database in total. The *totalEvents* attribute
|
||||
shows how many log events the _applier_ has read from the master.
|
||||
the _applier_ has sent to the master database in total.
|
||||
|
||||
The *totalEvents* attribute shows how many log events the _applier_ has read from the
|
||||
master. The *totalDocuments* and *totalRemovals* attributes indicate how may document
|
||||
operations the slave has applied locally.
|
||||
|
||||
The attributes *totalApplyTime* and *totalFetchTime* show the total time the applier
|
||||
spent for applying data batches locally, and the total time the applier waited on
|
||||
data-fetching requests to the master, respectively.
|
||||
The *averageApplyTime* and *averageFetchTime* attributes show the average times clocked
|
||||
for these operations. Note that the average times will greatly be influenced by the
|
||||
chunk size used in the applier configuration (bigger chunk sizes mean less requests to
|
||||
the slave, but the batches will include more data and take more time to create
|
||||
and apply).
|
||||
|
||||
The *progress.message* sub-attribute provides a brief hint of what the _applier_
|
||||
currently does (if it is running). The *lastError* attribute also has an optional
|
||||
|
@ -157,22 +203,42 @@ due to (repeated) connection problems:
|
|||
```js
|
||||
{
|
||||
"state" : {
|
||||
"started" : "2019-03-01T11:51:18Z",
|
||||
"running" : false,
|
||||
"phase" : "inactive",
|
||||
"lastAppliedContinuousTick" : "2101606350",
|
||||
"lastProcessedContinuousTick" : "2101606370",
|
||||
"lastAvailableContinuousTick" : "2101606370",
|
||||
"safeResumeTick" : "2101606350",
|
||||
"progress" : {
|
||||
"time" : "2014-07-06T13:14:37Z",
|
||||
"message" : "applier stopped",
|
||||
"time" : "2019-03-01T11:52:45Z",
|
||||
"message" : "applier shut down",
|
||||
"failedConnects" : 6
|
||||
},
|
||||
"totalRequests" : 79,
|
||||
"totalFailedConnects" : 11,
|
||||
"totalRequests" : 19,
|
||||
"totalFailedConnects" : 6,
|
||||
"totalEvents" : 0,
|
||||
"totalDocuments" : 0,
|
||||
"totalRemovals" : 0,
|
||||
"totalResyncs" : 0,
|
||||
"totalOperationsExcluded" : 0,
|
||||
"totalApplyTime" : 0,
|
||||
"averageApplyTime" : 0,
|
||||
"totalFetchTime" : 0.03386974334716797,
|
||||
"averageFetchTime" : 0.0028224786122639975,
|
||||
"lastError" : {
|
||||
"time" : "2014-07-06T13:09:41Z",
|
||||
"errorMessage" : "could not connect to master at tcp://master.example.org:8529: Could not connect to 'tcp:/...",
|
||||
"errorNum" : 1400
|
||||
},
|
||||
...
|
||||
}
|
||||
"errorNum" : 1400,
|
||||
"time" : "2019-03-01T11:52:45Z",
|
||||
"errorMessage" : "could not connect to master at tcp://127.0.0.1:8529 for URL /_api/wal/tail?chunkSize=6291456&barrier=0&from=2101606369&lastScanned=2101606370&serverId=46402312160836&includeSystem=true&includeFoxxQueues=false: Could not connect to 'http+tcp://127.0.0.1:852..."
|
||||
},
|
||||
"time" : "2019-03-01T11:52:56Z"
|
||||
},
|
||||
"server" : {
|
||||
"version" : "3.4.4",
|
||||
"serverId" : "46402312160836"
|
||||
},
|
||||
"endpoint" : "tcp://master.example.org",
|
||||
"database" : "test"
|
||||
}
|
||||
```
|
||||
|
||||
|
|
|
@ -224,6 +224,39 @@ By decreasing this option's value, the server will start the removal of obsolete
|
|||
WAL files earlier after server start. This is useful in testing environments that
|
||||
are space-restricted and do not require keeping much WAL file data at all.
|
||||
|
||||
`--rocksdb.wal-archive-size-limit`
|
||||
|
||||
Maximum total size (in bytes) of archived WAL files to keep on a leader.
|
||||
A value of `0` will not restrict the size of the archive, so the leader will
|
||||
removed archived WAL files when there are no replication clients needing them.
|
||||
Any non-zero value will restrict the size of the WAL files archive to about the
|
||||
specified value and trigger WAL archive file deletion once the threshold is reached.
|
||||
Please note that the value is only a threshold, so the archive may get bigger than
|
||||
the configured value until the background thread actually deletes files from
|
||||
the archive. Also note that deletion from the archive will only kick in after
|
||||
`--rocksdb.wal-file-timeout-initial` seconds have elapsed after server start.
|
||||
|
||||
The default value is `0` (i.e. unlimited).
|
||||
|
||||
When setting the value to a size bigger than 0, the RocksDB storage engine
|
||||
will force a removal of archived WAL files if the total size of the archive
|
||||
exceeds the configured size. The option can be used to get rid of archived
|
||||
WAL files in a disk size-constrained environment.
|
||||
|
||||
Note that archived WAL files are normally deleted automatically after a
|
||||
short while when there is no follower attached that may read from the archive.
|
||||
However, in case when there are followers attached that may read from the
|
||||
archive, WAL files normally remain in the archive until their contents have
|
||||
been streamed to the followers. In case there are slow followers that cannot
|
||||
catch up this will cause a growth of the WAL files archive over time.
|
||||
|
||||
The option `--rocksdb.wal-archive-size-limit` can now be used to force a
|
||||
deletion of WAL files from the archive even if there are followers attached
|
||||
that may want to read the archive. In case the option is set and a leader
|
||||
deletes files from the archive that followers want to read, this will abort
|
||||
the replication on the followers. Followers can however restart the replication
|
||||
doing a resync.
|
||||
|
||||
`--rocksdb.max-transaction-size`
|
||||
|
||||
Transaction size limit (in bytes). Transactions store all keys and values in
|
||||
|
|
|
@ -52,12 +52,7 @@ class OurLessThan {
|
|||
AqlValue const& lhs = left.getValue(reg.reg);
|
||||
AqlValue const& rhs = right.getValue(reg.reg);
|
||||
|
||||
#if 0 // #ifdef USE_IRESEARCH
|
||||
TRI_ASSERT(reg.comparator);
|
||||
int const cmp = (*reg.comparator)(reg.scorer.get(), _trx, lhs, rhs);
|
||||
#else
|
||||
int const cmp = AqlValue::Compare(_trx, lhs, rhs, true);
|
||||
#endif
|
||||
|
||||
if (cmp < 0) {
|
||||
return reg.asc;
|
||||
|
|
|
@ -33,35 +33,14 @@ namespace aql {
|
|||
/// @brief sort element for block, consisting of register, sort direction,
|
||||
/// and a possible attribute path to dig into the document
|
||||
struct SortRegister {
|
||||
SortRegister(SortRegister&) = delete; //we can not copy the ireseach scorer
|
||||
SortRegister(SortRegister&&) = default;
|
||||
|
||||
#if 0 // #ifdef USE_IRESEARCH
|
||||
typedef int(*CompareFunc)(
|
||||
irs::sort::prepared const* scorer,
|
||||
transaction::Methods* trx,
|
||||
AqlValue const& lhs,
|
||||
AqlValue const& rhs
|
||||
);
|
||||
|
||||
irs::sort::prepared::ptr scorer;
|
||||
CompareFunc comparator;
|
||||
#endif
|
||||
SortRegister(SortRegister&) = delete; //we can not copy the ireseach scorer
|
||||
SortRegister(SortRegister&&) = default;
|
||||
|
||||
std::vector<std::string> const& attributePath;
|
||||
RegisterId reg;
|
||||
bool asc;
|
||||
|
||||
#if 0 // #ifdef USE_IRESEARCH
|
||||
SortRegister(RegisterId reg, SortElement const& element,
|
||||
CompareFunc comparator) noexcept
|
||||
: comparator(comparator),
|
||||
attributePath(element.attributePath),
|
||||
reg(reg),
|
||||
asc(element.ascending) {}
|
||||
#else
|
||||
SortRegister(RegisterId reg, SortElement const& element) noexcept;
|
||||
#endif
|
||||
|
||||
static void fill(ExecutionPlan const& /*execPlan*/,
|
||||
ExecutionNode::RegisterPlan const& regPlan,
|
||||
|
|
|
@ -658,6 +658,8 @@ void HeartbeatThread::runSingleServer() {
|
|||
if (applier->isActive()) {
|
||||
applier->stopAndJoin();
|
||||
}
|
||||
// we are leader now. make sure the applier drops its previous state
|
||||
applier->forget();
|
||||
lastTick = EngineSelectorFeature::ENGINE->currentTick();
|
||||
|
||||
// put the leader in optional read-only mode
|
||||
|
|
|
@ -446,10 +446,6 @@ void MaintenanceFeature::toVelocyPack(VPackBuilder& vb) const {
|
|||
}
|
||||
|
||||
} // MaintenanceFeature::toVelocyPack
|
||||
#if 0
|
||||
std::string MaintenanceFeature::toJson(VPackBuilder & builder) {
|
||||
} // MaintenanceFeature::toJson
|
||||
#endif
|
||||
|
||||
std::string const SLASH("/");
|
||||
|
||||
|
|
|
@ -335,7 +335,7 @@ void ClusterCollection::prepareIndexes(arangodb::velocypack::Slice indexesSlice)
|
|||
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
for (auto it : _indexes) {
|
||||
LOG_TOPIC(ERR, arangodb::Logger::FIXME) << "- " << it.get();
|
||||
LOG_TOPIC(ERR, arangodb::Logger::FIXME) << "- " << it->context();
|
||||
}
|
||||
#endif
|
||||
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, msg);
|
||||
|
|
|
@ -385,6 +385,7 @@ void IResearchLink::batchInsert( // insert documents
|
|||
if (!dynamic_cast<arangodb::MMFilesCollection*>(collection().getPhysical())) {
|
||||
break; // skip for non-MMFiles (fallthough for MMFiles)
|
||||
}
|
||||
// intentionally falls through
|
||||
case RecoveryState::DURING_CHECKPOINT:
|
||||
for (auto const& doc: batch) {
|
||||
ctx->remove(doc.first);
|
||||
|
@ -1746,4 +1747,4 @@ arangodb::Result IResearchLink::unload() {
|
|||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- END-OF-FILE
|
||||
// -----------------------------------------------------------------------------
|
||||
// -----------------------------------------------------------------------------
|
||||
|
|
|
@ -307,6 +307,10 @@ void MMFilesRestReplicationHandler::handleCommandLoggerFollow() {
|
|||
return;
|
||||
}
|
||||
|
||||
// don't read over the last committed tick value, which we will return
|
||||
// as part of our response as well
|
||||
tickEnd = std::max(tickEnd, state.lastCommittedTick);
|
||||
|
||||
// check if a barrier id was specified in request
|
||||
TRI_voc_tid_t barrierId = 0;
|
||||
std::string const& value3 = _request->value("barrier", found);
|
||||
|
|
|
@ -466,6 +466,13 @@ struct MMFilesWalAccessContext : WalAccessContext {
|
|||
// get the marker's tick and check whether we should include it
|
||||
TRI_voc_tick_t foundTick = marker->getTick();
|
||||
|
||||
if (foundTick > state.lastCommittedTick) {
|
||||
// don't read more than the last committed tick value, which we
|
||||
// will return as part of the result as well
|
||||
hasMore = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (foundTick <= _filter.tickEnd) {
|
||||
lastScannedTick = foundTick;
|
||||
}
|
||||
|
|
|
@ -53,7 +53,8 @@ class MMFilesWalAccess final : public WalAccess {
|
|||
|
||||
/// Tails the wall, this will already sanitize the
|
||||
WalAccessResult tail(WalAccess::Filter const& filter, size_t chunkSize,
|
||||
TRI_voc_tid_t barrierId, MarkerCallback const&) const override;
|
||||
TRI_voc_tid_t barrierId,
|
||||
MarkerCallback const&) const override;
|
||||
};
|
||||
} // namespace arangodb
|
||||
|
||||
|
|
|
@ -536,10 +536,11 @@ void DatabaseInitialSyncer::fetchDumpChunk(std::shared_ptr<Syncer::JobSynchroniz
|
|||
response.reset(client->retryRequest(rest::RequestType::GET, url, nullptr, 0, headers));
|
||||
});
|
||||
|
||||
t = TRI_microtime() - t;
|
||||
if (replutils::hasFailed(response.get())) {
|
||||
stats.waitedForDump += TRI_microtime() - t;
|
||||
stats.waitedForDump += t;
|
||||
sharedStatus->gotResponse(
|
||||
replutils::buildHttpError(response.get(), url, _config.connection));
|
||||
replutils::buildHttpError(response.get(), url, _config.connection), t);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -609,15 +610,15 @@ void DatabaseInitialSyncer::fetchDumpChunk(std::shared_ptr<Syncer::JobSynchroniz
|
|||
// fallthrough here in case everything went well
|
||||
}
|
||||
|
||||
stats.waitedForDump += TRI_microtime() - t;
|
||||
stats.waitedForDump += t;
|
||||
|
||||
if (replutils::hasFailed(response.get())) {
|
||||
// failure
|
||||
sharedStatus->gotResponse(
|
||||
replutils::buildHttpError(response.get(), url, _config.connection));
|
||||
replutils::buildHttpError(response.get(), url, _config.connection), t);
|
||||
} else {
|
||||
// success!
|
||||
sharedStatus->gotResponse(std::move(response));
|
||||
sharedStatus->gotResponse(std::move(response), t);
|
||||
}
|
||||
} catch (basics::Exception const& ex) {
|
||||
sharedStatus->gotResponse(Result(ex.code(), ex.what()));
|
||||
|
|
|
@ -57,7 +57,7 @@ DatabaseReplicationApplier::DatabaseReplicationApplier(ReplicationApplierConfigu
|
|||
|
||||
DatabaseReplicationApplier::~DatabaseReplicationApplier() {
|
||||
try {
|
||||
stop();
|
||||
stopAndJoin();
|
||||
} catch (...) {
|
||||
}
|
||||
}
|
||||
|
|
|
@ -79,7 +79,7 @@ DatabaseTailingSyncer::DatabaseTailingSyncer(TRI_vocbase_t& vocbase,
|
|||
/// @brief save the current applier state
|
||||
Result DatabaseTailingSyncer::saveApplierState() {
|
||||
auto rv = _applier->persistStateResult(false);
|
||||
if (rv.fail()){
|
||||
if (rv.fail()) {
|
||||
THROW_ARANGO_EXCEPTION(rv);
|
||||
}
|
||||
return rv;
|
||||
|
@ -180,6 +180,7 @@ Result DatabaseTailingSyncer::syncCollectionCatchupInternal(std::string const& c
|
|||
}
|
||||
if (!fromIncluded && fromTick > 0) {
|
||||
until = fromTick;
|
||||
abortOngoingTransactions();
|
||||
return Result(
|
||||
TRI_ERROR_REPLICATION_START_TICK_NOT_PRESENT,
|
||||
std::string("required follow tick value '") + StringUtils::itoa(lastIncludedTick) +
|
||||
|
@ -189,9 +190,9 @@ Result DatabaseTailingSyncer::syncCollectionCatchupInternal(std::string const& c
|
|||
"number of historic logfiles on the master.");
|
||||
}
|
||||
|
||||
uint64_t processedMarkers = 0;
|
||||
ApplyStats applyStats;
|
||||
uint64_t ignoreCount = 0;
|
||||
Result r = applyLog(response.get(), fromTick, processedMarkers, ignoreCount);
|
||||
Result r = applyLog(response.get(), fromTick, applyStats, ignoreCount);
|
||||
if (r.fail()) {
|
||||
until = fromTick;
|
||||
return r;
|
||||
|
|
|
@ -461,10 +461,16 @@ void ReplicationApplier::reconfigure(ReplicationApplierConfiguration const& conf
|
|||
storeConfiguration(true);
|
||||
}
|
||||
|
||||
/// @brief load the applier state from persistent storage
|
||||
bool ReplicationApplier::loadState() {
|
||||
WRITE_LOCKER_EVENTUAL(readLocker, _statusLock);
|
||||
return loadStateNoLock();
|
||||
}
|
||||
|
||||
/// @brief load the applier state from persistent storage
|
||||
/// must currently be called while holding the write-lock
|
||||
/// returns whether a previous state was found
|
||||
bool ReplicationApplier::loadState() {
|
||||
bool ReplicationApplier::loadStateNoLock() {
|
||||
if (!applies()) {
|
||||
// unsupported
|
||||
return false;
|
||||
|
@ -553,7 +559,6 @@ void ReplicationApplier::persistState(bool doSync) {
|
|||
}
|
||||
|
||||
Result ReplicationApplier::persistStateResult(bool doSync) {
|
||||
|
||||
LOG_TOPIC(TRACE, Logger::REPLICATION)
|
||||
<< "saving replication applier state. last applied continuous tick: "
|
||||
<< this->_state._lastAppliedContinuousTick
|
||||
|
@ -637,7 +642,7 @@ void ReplicationApplier::setError(arangodb::Result const& r) {
|
|||
}
|
||||
|
||||
Result ReplicationApplier::lastError() const {
|
||||
WRITE_LOCKER_EVENTUAL(writeLocker, _statusLock);
|
||||
READ_LOCKER_EVENTUAL(writeLocker, _statusLock);
|
||||
return Result(_state._lastError.code, _state._lastError.message);
|
||||
}
|
||||
|
||||
|
|
|
@ -103,10 +103,13 @@ class ReplicationApplier {
|
|||
/// @brief configure the replication applier
|
||||
virtual void reconfigure(ReplicationApplierConfiguration const& configuration);
|
||||
|
||||
/// @brief load the applier state from persistent storage
|
||||
bool loadState();
|
||||
|
||||
/// @brief load the applier state from persistent storage
|
||||
/// must currently be called while holding the write-lock
|
||||
/// returns whether a previous state was found
|
||||
bool loadState();
|
||||
bool loadStateNoLock();
|
||||
|
||||
/// @brief store the configuration for the applier
|
||||
virtual void storeConfiguration(bool doSync) = 0;
|
||||
|
|
|
@ -43,9 +43,16 @@ ReplicationApplierState::ReplicationApplierState()
|
|||
_totalRequests(0),
|
||||
_totalFailedConnects(0),
|
||||
_totalEvents(0),
|
||||
_totalDocuments(0),
|
||||
_totalRemovals(0),
|
||||
_totalResyncs(0),
|
||||
_skippedOperations(0) {
|
||||
_totalSkippedOperations(0),
|
||||
_totalApplyTime(0.0),
|
||||
_totalApplyInstances(0),
|
||||
_totalFetchTime(0.0),
|
||||
_totalFetchInstances(0) {
|
||||
_progressTime[0] = '\0';
|
||||
_startTime[0] = '\0';
|
||||
}
|
||||
|
||||
ReplicationApplierState::~ReplicationApplierState() {}
|
||||
|
@ -61,6 +68,7 @@ ReplicationApplierState& ReplicationApplierState::operator=(ReplicationApplierSt
|
|||
_serverId = other._serverId;
|
||||
_progressMsg = other._progressMsg;
|
||||
memcpy(&_progressTime[0], &other._progressTime[0], sizeof(_progressTime));
|
||||
memcpy(&_startTime[0], &other._startTime[0], sizeof(_startTime));
|
||||
|
||||
_lastError.code = other._lastError.code;
|
||||
_lastError.message = other._lastError.message;
|
||||
|
@ -70,8 +78,14 @@ ReplicationApplierState& ReplicationApplierState::operator=(ReplicationApplierSt
|
|||
_totalRequests = other._totalRequests;
|
||||
_totalFailedConnects = other._totalFailedConnects;
|
||||
_totalEvents = other._totalEvents;
|
||||
_totalDocuments = other._totalDocuments;
|
||||
_totalRemovals = other._totalRemovals;
|
||||
_totalResyncs = other._totalResyncs;
|
||||
_skippedOperations = other._skippedOperations;
|
||||
_totalSkippedOperations = other._totalSkippedOperations;
|
||||
_totalApplyTime = other._totalApplyTime;
|
||||
_totalApplyInstances = other._totalApplyInstances;
|
||||
_totalFetchTime = other._totalFetchTime;
|
||||
_totalFetchInstances = other._totalFetchInstances;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
@ -81,11 +95,23 @@ void ReplicationApplierState::reset(bool resetPhase, bool reducedSet) {
|
|||
_lastAppliedContinuousTick = 0;
|
||||
_safeResumeTick = 0;
|
||||
_failedConnects = 0;
|
||||
_totalRequests = 0;
|
||||
_totalFailedConnects = 0;
|
||||
_totalResyncs = 0;
|
||||
// don't need to reset the following
|
||||
// _totalFailedConnects = 0;
|
||||
// _totalResyncs = 0;
|
||||
// _totalRequests = 0;
|
||||
// _totalEvents = 0;
|
||||
// _totalDocuments = 0;
|
||||
// _totalRemovals = 0;
|
||||
// _totalSkippedOperations = 0;
|
||||
// _totalApplyTime = 0.0;
|
||||
// _totalApplyInstances = 0;
|
||||
// _totalFetchTime = 0.0;
|
||||
// _totalFetchInstances = 0;
|
||||
// _startTime[0] = '\0';
|
||||
|
||||
if(reducedSet) { return; }
|
||||
if (reducedSet) {
|
||||
return;
|
||||
}
|
||||
|
||||
_lastAvailableContinuousTick = 0;
|
||||
_preventStart = false;
|
||||
|
@ -95,9 +121,6 @@ void ReplicationApplierState::reset(bool resetPhase, bool reducedSet) {
|
|||
_serverId = 0;
|
||||
_lastError.reset();
|
||||
|
||||
_totalEvents = 0;
|
||||
_skippedOperations = 0;
|
||||
|
||||
if (resetPhase) {
|
||||
_phase = ActivityPhase::INACTIVE;
|
||||
}
|
||||
|
@ -121,6 +144,7 @@ void ReplicationApplierState::toVelocyPack(VPackBuilder& result, bool full) cons
|
|||
result.openObject();
|
||||
|
||||
if (full) {
|
||||
result.add("started", VPackValue(_startTime));
|
||||
result.add("running", VPackValue(isTailing())); // isRunning
|
||||
result.add("phase", VPackValue(ActivityToString(_phase)));
|
||||
|
||||
|
@ -156,9 +180,12 @@ void ReplicationApplierState::toVelocyPack(VPackBuilder& result, bool full) cons
|
|||
}
|
||||
|
||||
if (isTailing()) {
|
||||
TRI_voc_tick_t ticksBehind =
|
||||
int64_t ticksBehind =
|
||||
_lastAvailableContinuousTick -
|
||||
std::max(_lastAppliedContinuousTick, _lastProcessedContinuousTick);
|
||||
if (ticksBehind < 0) {
|
||||
ticksBehind = 0;
|
||||
}
|
||||
result.add("ticksBehind", VPackValue(ticksBehind));
|
||||
}
|
||||
|
||||
|
@ -174,8 +201,22 @@ void ReplicationApplierState::toVelocyPack(VPackBuilder& result, bool full) cons
|
|||
result.add("totalRequests", VPackValue(_totalRequests));
|
||||
result.add("totalFailedConnects", VPackValue(_totalFailedConnects));
|
||||
result.add("totalEvents", VPackValue(_totalEvents));
|
||||
result.add("totalDocuments", VPackValue(_totalDocuments));
|
||||
result.add("totalRemovals", VPackValue(_totalRemovals));
|
||||
result.add("totalResyncs", VPackValue(_totalResyncs));
|
||||
result.add("totalOperationsExcluded", VPackValue(_skippedOperations));
|
||||
result.add("totalOperationsExcluded", VPackValue(_totalSkippedOperations));
|
||||
result.add("totalApplyTime", VPackValue(_totalApplyTime));
|
||||
if (_totalApplyInstances == 0) {
|
||||
result.add("averageApplyTime", VPackValue(0));
|
||||
} else {
|
||||
result.add("averageApplyTime", VPackValue(_totalApplyTime / _totalApplyInstances));
|
||||
}
|
||||
result.add("totalFetchTime", VPackValue(_totalFetchTime));
|
||||
if (_totalFetchInstances == 0) {
|
||||
result.add("averageFetchTime", VPackValue(0));
|
||||
} else {
|
||||
result.add("averageFetchTime", VPackValue(_totalFetchTime / _totalFetchInstances));
|
||||
}
|
||||
|
||||
// lastError
|
||||
result.add(VPackValue("lastError"));
|
||||
|
@ -195,3 +236,9 @@ void ReplicationApplierState::toVelocyPack(VPackBuilder& result, bool full) cons
|
|||
|
||||
result.close();
|
||||
}
|
||||
|
||||
void ReplicationApplierState::setStartTime() {
|
||||
if (_startTime[0] == '\0') {
|
||||
TRI_GetTimeStampReplication(_startTime, sizeof(_startTime) - 1);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -68,6 +68,7 @@ struct ReplicationApplierState {
|
|||
std::string _progressMsg;
|
||||
char _progressTime[24];
|
||||
TRI_server_id_t _serverId;
|
||||
char _startTime[24];
|
||||
|
||||
/// performs initial sync or running tailing syncer
|
||||
bool isActive() const {
|
||||
|
@ -84,7 +85,7 @@ struct ReplicationApplierState {
|
|||
|
||||
void setError(int code, std::string const& msg) { _lastError.set(code, msg); }
|
||||
|
||||
void clearError() { _lastError.reset(); }
|
||||
void setStartTime();
|
||||
|
||||
// last error that occurred during replication
|
||||
struct LastError {
|
||||
|
@ -127,8 +128,22 @@ struct ReplicationApplierState {
|
|||
uint64_t _totalRequests;
|
||||
uint64_t _totalFailedConnects;
|
||||
uint64_t _totalEvents;
|
||||
uint64_t _totalDocuments;
|
||||
uint64_t _totalRemovals;
|
||||
uint64_t _totalResyncs;
|
||||
uint64_t _skippedOperations;
|
||||
uint64_t _totalSkippedOperations;
|
||||
|
||||
/// @brief total time spend in applyLog()
|
||||
double _totalApplyTime;
|
||||
|
||||
/// @brief number of times we called applyLog()
|
||||
uint64_t _totalApplyInstances;
|
||||
|
||||
/// @brief total time spend for fetching data from leader
|
||||
double _totalFetchTime;
|
||||
|
||||
/// @brief number of times data was fetched from leader
|
||||
uint64_t _totalFetchInstances;
|
||||
};
|
||||
|
||||
} // namespace arangodb
|
||||
|
|
|
@ -136,7 +136,7 @@ void ReplicationFeature::stop() {
|
|||
|
||||
void ReplicationFeature::unprepare() {
|
||||
if (_globalReplicationApplier != nullptr) {
|
||||
_globalReplicationApplier->stop();
|
||||
_globalReplicationApplier->stopAndJoin();
|
||||
}
|
||||
_globalReplicationApplier.reset();
|
||||
}
|
||||
|
|
|
@ -142,10 +142,10 @@ arangodb::Result applyCollectionDumpMarkerInternal(
|
|||
bool useReplace = false;
|
||||
VPackSlice keySlice = arangodb::transaction::helpers::extractKeyFromDocument(slice);
|
||||
|
||||
// if we are about to process a single document marker (outside of a multi-document
|
||||
// transaction), we first check if the target document exists. if yes, we don't
|
||||
// try an insert (which would fail anyway) but carry on with a replace.
|
||||
if (trx.isSingleOperationTransaction() && keySlice.isString()) {
|
||||
// if we are about to process a single document marker we first check if the target
|
||||
// document exists. if yes, we don't try an insert (which would fail anyway) but carry
|
||||
// on with a replace.
|
||||
if (keySlice.isString()) {
|
||||
arangodb::LocalDocumentId const oldDocumentId =
|
||||
coll->getPhysical()->lookupKey(&trx, keySlice);
|
||||
if (oldDocumentId.isSet()) {
|
||||
|
@ -250,7 +250,7 @@ arangodb::Result applyCollectionDumpMarkerInternal(
|
|||
namespace arangodb {
|
||||
|
||||
Syncer::JobSynchronizer::JobSynchronizer(std::shared_ptr<Syncer const> const& syncer)
|
||||
: _syncer(syncer), _gotResponse(false), _jobsInFlight(0) {}
|
||||
: _syncer(syncer), _gotResponse(false), _time(0.0), _jobsInFlight(0) {}
|
||||
|
||||
Syncer::JobSynchronizer::~JobSynchronizer() {
|
||||
// signal that we have got something
|
||||
|
@ -269,24 +269,26 @@ Syncer::JobSynchronizer::~JobSynchronizer() {
|
|||
|
||||
/// @brief will be called whenever a response for the job comes in
|
||||
void Syncer::JobSynchronizer::gotResponse(
|
||||
std::unique_ptr<arangodb::httpclient::SimpleHttpResult> response) noexcept {
|
||||
std::unique_ptr<arangodb::httpclient::SimpleHttpResult> response, double time) noexcept {
|
||||
CONDITION_LOCKER(guard, _condition);
|
||||
_res.reset(); // no error!
|
||||
_response = std::move(response);
|
||||
_gotResponse = true;
|
||||
_time = time;
|
||||
|
||||
guard.signal();
|
||||
}
|
||||
|
||||
/// @brief will be called whenever an error occurred
|
||||
/// expects "res" to be an error!
|
||||
void Syncer::JobSynchronizer::gotResponse(arangodb::Result&& res) noexcept {
|
||||
void Syncer::JobSynchronizer::gotResponse(arangodb::Result&& res, double time) noexcept {
|
||||
TRI_ASSERT(res.fail());
|
||||
|
||||
CONDITION_LOCKER(guard, _condition);
|
||||
_res = std::move(res);
|
||||
_response.reset();
|
||||
_gotResponse = true;
|
||||
_time = time;
|
||||
|
||||
guard.signal();
|
||||
}
|
||||
|
@ -415,7 +417,7 @@ Syncer::Syncer(ReplicationApplierConfiguration const& configuration)
|
|||
} else if (_state.applier._chunkSize < 16 * 1024) {
|
||||
_state.applier._chunkSize = 16 * 1024;
|
||||
}
|
||||
|
||||
|
||||
// get our own server-id
|
||||
_state.localServerId = ServerIdFeature::getId();
|
||||
_state.localServerIdString = basics::StringUtils::itoa(_state.localServerId);
|
||||
|
|
|
@ -66,11 +66,11 @@ class Syncer : public std::enable_shared_from_this<Syncer> {
|
|||
~JobSynchronizer();
|
||||
|
||||
/// @brief will be called whenever a response for the job comes in
|
||||
void gotResponse(std::unique_ptr<arangodb::httpclient::SimpleHttpResult> response) noexcept;
|
||||
void gotResponse(std::unique_ptr<arangodb::httpclient::SimpleHttpResult> response, double time) noexcept;
|
||||
|
||||
/// @brief will be called whenever an error occurred
|
||||
/// expects "res" to be an error!
|
||||
void gotResponse(arangodb::Result&& res) noexcept;
|
||||
void gotResponse(arangodb::Result&& res, double time = 0.0) noexcept;
|
||||
|
||||
/// @brief the calling Syncer will call and block inside this function until
|
||||
/// there is a response or the syncer/server is shut down
|
||||
|
@ -92,6 +92,9 @@ class Syncer : public std::enable_shared_from_this<Syncer> {
|
|||
/// @brief checks if there are jobs in flight (can be 0 or 1 job only)
|
||||
bool hasJobInFlight() const noexcept;
|
||||
|
||||
/// @brief return the stored elapsed time for the job
|
||||
double time() const noexcept { return _time; }
|
||||
|
||||
private:
|
||||
/// @brief the shared syncer we use to check if sychronization was
|
||||
/// externally aborted
|
||||
|
@ -103,6 +106,9 @@ class Syncer : public std::enable_shared_from_this<Syncer> {
|
|||
/// @brief true if a response was received
|
||||
bool _gotResponse;
|
||||
|
||||
/// @brief elapsed time for performing the job
|
||||
double _time;
|
||||
|
||||
/// @brief the processing response of the job (indicates failure if no
|
||||
/// response was received or if something went wrong)
|
||||
arangodb::Result _res;
|
||||
|
|
|
@ -68,6 +68,7 @@ namespace {
|
|||
static arangodb::velocypack::StringRef const cnameRef("cname");
|
||||
static arangodb::velocypack::StringRef const dataRef("data");
|
||||
static arangodb::velocypack::StringRef const tickRef("tick");
|
||||
static arangodb::velocypack::StringRef const dbRef("db");
|
||||
|
||||
bool hasHeader(std::unique_ptr<httpclient::SimpleHttpResult> const& response,
|
||||
std::string const& name) {
|
||||
|
@ -163,6 +164,56 @@ void TailingSyncer::abortOngoingTransactions() noexcept {
|
|||
}
|
||||
}
|
||||
|
||||
/// @brief abort all ongoing transactions for a specific database
|
||||
void TailingSyncer::abortOngoingTransactions(std::string const& dbName) {
|
||||
for (auto it = _ongoingTransactions.begin(); it != _ongoingTransactions.end(); /* no hoisting */) {
|
||||
auto& trx = (*it).second;
|
||||
if (trx != nullptr && trx->vocbase().name() == dbName) {
|
||||
LOG_TOPIC(TRACE, Logger::REPLICATION) << "aborting open transaction for db " << dbName;
|
||||
it = _ongoingTransactions.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// @brief count all ongoing transactions for a specific database
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
size_t TailingSyncer::countOngoingTransactions(VPackSlice slice) const {
|
||||
size_t result = 0;
|
||||
|
||||
TRI_ASSERT(slice.isObject());
|
||||
VPackSlice nameSlice = slice.get(::dbRef);
|
||||
|
||||
if (nameSlice.isString()) {
|
||||
for (auto const& it : _ongoingTransactions) {
|
||||
auto const& trx = it.second;
|
||||
if (trx != nullptr && arangodb::velocypack::StringRef(nameSlice) == trx->vocbase().name()) {
|
||||
++result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
/// @brief whether or not the are multiple ongoing transactions for one
|
||||
/// database
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
bool TailingSyncer::hasMultipleOngoingTransactions() const {
|
||||
std::unordered_set<TRI_voc_tick_t> found;
|
||||
for (auto const& it : _ongoingTransactions) {
|
||||
auto const& trx = it.second;
|
||||
if (trx != nullptr && !found.emplace(trx->vocbase().id()).second) {
|
||||
// found a duplicate
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/// @brief whether or not a marker should be skipped
|
||||
bool TailingSyncer::skipMarker(TRI_voc_tick_t firstRegularTick, VPackSlice const& slice,
|
||||
TRI_voc_tick_t actualMarkerTick, TRI_replication_operation_e type) {
|
||||
|
@ -192,6 +243,10 @@ bool TailingSyncer::skipMarker(TRI_voc_tick_t firstRegularTick, VPackSlice const
|
|||
}
|
||||
}
|
||||
|
||||
if (tooOld) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// the transient applier state is just used for one shard / collection
|
||||
if (_state.applier._restrictCollections.empty()) {
|
||||
return false;
|
||||
|
@ -246,7 +301,7 @@ Result TailingSyncer::processDBMarker(TRI_replication_operation_e type,
|
|||
TRI_ASSERT(!_ignoreDatabaseMarkers);
|
||||
|
||||
// the new wal access protocol contains database names
|
||||
VPackSlice const nameSlice = slice.get("db");
|
||||
VPackSlice const nameSlice = slice.get(::dbRef);
|
||||
if (!nameSlice.isString()) {
|
||||
THROW_ARANGO_EXCEPTION_MESSAGE(
|
||||
TRI_ERROR_REPLICATION_INVALID_RESPONSE,
|
||||
|
@ -300,6 +355,9 @@ Result TailingSyncer::processDBMarker(TRI_replication_operation_e type,
|
|||
TRI_vocbase_t* vocbase = DatabaseFeature::DATABASE->lookupDatabase(name);
|
||||
|
||||
if (vocbase != nullptr && name != TRI_VOC_SYSTEM_DATABASE) {
|
||||
// abort all ongoing transactions for the database to be dropped
|
||||
abortOngoingTransactions(name);
|
||||
|
||||
auto system = sysDbFeature->use();
|
||||
TRI_ASSERT(system.get());
|
||||
// delete from cache by id and name
|
||||
|
@ -529,7 +587,9 @@ Result TailingSyncer::startTransaction(VPackSlice const& slice) {
|
|||
// "collections":[{"cid":"230920700700391","operations":10}]}
|
||||
|
||||
TRI_vocbase_t* vocbase = resolveVocbase(slice);
|
||||
|
||||
if (vocbase == nullptr) {
|
||||
// for any other case, return "database not found" and abort the replication
|
||||
return Result(TRI_ERROR_ARANGO_DATABASE_NOT_FOUND);
|
||||
}
|
||||
|
||||
|
@ -555,7 +615,9 @@ Result TailingSyncer::startTransaction(VPackSlice const& slice) {
|
|||
|
||||
LOG_TOPIC(TRACE, Logger::REPLICATION) << "starting replication transaction " << tid;
|
||||
|
||||
TRI_ASSERT(_ongoingTransactions.empty() || _supportsMultipleOpenTransactions);
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
TRI_ASSERT(_supportsMultipleOpenTransactions || countOngoingTransactions(slice) == 0);
|
||||
#endif
|
||||
|
||||
auto trx = std::make_unique<ReplicationTransaction>(*vocbase);
|
||||
Result res = trx->begin();
|
||||
|
@ -615,7 +677,6 @@ Result TailingSyncer::commitTransaction(VPackSlice const& slice) {
|
|||
auto it = _ongoingTransactions.find(tid);
|
||||
|
||||
if (it == _ongoingTransactions.end() || (*it).second == nullptr) {
|
||||
// invalid state, no transaction was started.
|
||||
return Result(TRI_ERROR_REPLICATION_UNEXPECTED_TRANSACTION);
|
||||
}
|
||||
|
||||
|
@ -628,7 +689,9 @@ Result TailingSyncer::commitTransaction(VPackSlice const& slice) {
|
|||
|
||||
_ongoingTransactions.erase(it);
|
||||
|
||||
TRI_ASSERT(_ongoingTransactions.empty() || _supportsMultipleOpenTransactions);
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
TRI_ASSERT(_supportsMultipleOpenTransactions || countOngoingTransactions(slice) == 0);
|
||||
#endif
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -774,6 +837,7 @@ Result TailingSyncer::truncateCollection(arangodb::velocypack::Slice const& slic
|
|||
}
|
||||
|
||||
OperationOptions opts;
|
||||
opts.isRestore = true;
|
||||
OperationResult opRes = trx.truncate(col->name(), opts);
|
||||
|
||||
if (opRes.fail()) {
|
||||
|
@ -851,10 +915,18 @@ Result TailingSyncer::changeView(VPackSlice const& slice) {
|
|||
}
|
||||
|
||||
/// @brief apply a single marker from the continuous log
|
||||
Result TailingSyncer::applyLogMarker(VPackSlice const& slice, TRI_voc_tick_t firstRegularTick,
|
||||
TRI_voc_tick_t markerTick, TRI_replication_operation_e type) {
|
||||
Result TailingSyncer::applyLogMarker(VPackSlice const& slice,
|
||||
ApplyStats& applyStats,
|
||||
TRI_voc_tick_t firstRegularTick,
|
||||
TRI_voc_tick_t markerTick,
|
||||
TRI_replication_operation_e type) {
|
||||
// handle marker type
|
||||
if (type == REPLICATION_MARKER_DOCUMENT || type == REPLICATION_MARKER_REMOVE) {
|
||||
if (type == REPLICATION_MARKER_DOCUMENT) {
|
||||
++applyStats.processedDocuments;
|
||||
} else {
|
||||
++applyStats.processedRemovals;
|
||||
}
|
||||
try {
|
||||
return processDocument(type, slice);
|
||||
} catch (basics::Exception const& ex) {
|
||||
|
@ -977,7 +1049,7 @@ Result TailingSyncer::applyLogMarker(VPackSlice const& slice, TRI_voc_tick_t fir
|
|||
|
||||
/// @brief apply the data from the continuous log
|
||||
Result TailingSyncer::applyLog(SimpleHttpResult* response, TRI_voc_tick_t firstRegularTick,
|
||||
uint64_t& processedMarkers, uint64_t& ignoreCount) {
|
||||
ApplyStats& applyStats, uint64_t& ignoreCount) {
|
||||
// reload users if they were modified
|
||||
_usersModified = false;
|
||||
auto reloader = [this]() {
|
||||
|
@ -1016,7 +1088,7 @@ Result TailingSyncer::applyLog(SimpleHttpResult* response, TRI_voc_tick_t firstR
|
|||
|
||||
TRI_ASSERT(q <= end);
|
||||
|
||||
processedMarkers++;
|
||||
applyStats.processedMarkers++;
|
||||
|
||||
builder->clear();
|
||||
try {
|
||||
|
@ -1053,7 +1125,7 @@ Result TailingSyncer::applyLog(SimpleHttpResult* response, TRI_voc_tick_t firstR
|
|||
bool skipped = skipMarker(firstRegularTick, slice, markerTick, markerType);
|
||||
|
||||
if (!skipped) {
|
||||
Result res = applyLogMarker(slice, firstRegularTick, markerTick, markerType);
|
||||
Result res = applyLogMarker(slice, applyStats, firstRegularTick, markerTick, markerType);
|
||||
|
||||
if (res.fail()) {
|
||||
// apply error
|
||||
|
@ -1092,7 +1164,7 @@ Result TailingSyncer::applyLog(SimpleHttpResult* response, TRI_voc_tick_t firstR
|
|||
}
|
||||
|
||||
if (skipped) {
|
||||
++_applier->_state._skippedOperations;
|
||||
++_applier->_state._totalSkippedOperations;
|
||||
} else if (_ongoingTransactions.empty()) {
|
||||
_applier->_state._safeResumeTick = _applier->_state._lastProcessedContinuousTick;
|
||||
}
|
||||
|
@ -1106,6 +1178,9 @@ Result TailingSyncer::applyLog(SimpleHttpResult* response, TRI_voc_tick_t firstR
|
|||
/// catches exceptions
|
||||
Result TailingSyncer::run() {
|
||||
try {
|
||||
auto guard = scopeGuard([this]() {
|
||||
abortOngoingTransactions();
|
||||
});
|
||||
return runInternal();
|
||||
} catch (arangodb::basics::Exception const& ex) {
|
||||
return Result(ex.code(),
|
||||
|
@ -1136,6 +1211,9 @@ Result TailingSyncer::runInternal() {
|
|||
uint64_t shortTermFailsInRow = 0;
|
||||
|
||||
retry:
|
||||
// just to be sure we are starting/restarting from a clean state
|
||||
abortOngoingTransactions();
|
||||
|
||||
double const start = TRI_microtime();
|
||||
|
||||
Result res;
|
||||
|
@ -1145,6 +1223,7 @@ retry:
|
|||
{
|
||||
WRITE_LOCKER_EVENTUAL(writeLocker, _applier->_statusLock);
|
||||
_applier->_state._failedConnects = 0;
|
||||
_applier->_state.setStartTime();
|
||||
}
|
||||
|
||||
while (true) {
|
||||
|
@ -1237,17 +1316,18 @@ retry:
|
|||
}
|
||||
|
||||
// remove previous applier state
|
||||
abortOngoingTransactions(); //ties to clear map - no further side effects
|
||||
abortOngoingTransactions(); //tries to clear map - no further side effects
|
||||
|
||||
LOG_TOPIC(DEBUG, Logger::REPLICATION)
|
||||
<< "stopped replication applier for database '" << _state.databaseName;
|
||||
auto rv = _applier->resetState(true /*reducedSet*/);
|
||||
if(rv.fail()){
|
||||
|
||||
setAborted(false);
|
||||
|
||||
if (rv.fail()) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
setAborted(false);
|
||||
|
||||
if (!_state.applier._autoResync) {
|
||||
LOG_TOPIC(INFO, Logger::REPLICATION)
|
||||
<< "Auto resync disabled, applier will stop";
|
||||
|
@ -1269,7 +1349,7 @@ retry:
|
|||
// message only makes sense if there's at least one retry
|
||||
LOG_TOPIC(WARN, Logger::REPLICATION)
|
||||
<< "aborting automatic resynchronization for database '" << _state.databaseName
|
||||
<< "' after " << _state.applier._autoResyncRetries << " retries";
|
||||
<< "' after " << _state.applier._autoResyncRetries << " short-term retries";
|
||||
} else {
|
||||
LOG_TOPIC(WARN, Logger::REPLICATION)
|
||||
<< "aborting automatic resynchronization for database '"
|
||||
|
@ -1280,6 +1360,12 @@ retry:
|
|||
_applier->stop(res);
|
||||
return res;
|
||||
}
|
||||
|
||||
// do an automatic full resync
|
||||
LOG_TOPIC(WARN, Logger::REPLICATION)
|
||||
<< "restarting initial synchronization for database '" << _state.databaseName
|
||||
<< "' because autoResync option is set. retry #" << shortTermFailsInRow
|
||||
<< " of " << _state.applier._autoResyncRetries;
|
||||
|
||||
{
|
||||
// increase number of syncs counter
|
||||
|
@ -1288,14 +1374,9 @@ retry:
|
|||
|
||||
// necessary to reset the state here, because otherwise running the
|
||||
// InitialSyncer may fail with "applier is running" errors
|
||||
_applier->_state._phase = ReplicationApplierState::ActivityPhase::INACTIVE;
|
||||
_applier->_state._phase = ReplicationApplierState::ActivityPhase::INITIAL;
|
||||
}
|
||||
|
||||
// do an automatic full resync
|
||||
LOG_TOPIC(WARN, Logger::REPLICATION)
|
||||
<< "restarting initial synchronization for database '" << _state.databaseName
|
||||
<< "' because autoResync option is set. retry #" << shortTermFailsInRow;
|
||||
|
||||
// start initial synchronization
|
||||
try {
|
||||
std::shared_ptr<InitialSyncer> syncer = _applier->buildInitialSyncer();
|
||||
|
@ -1309,6 +1390,12 @@ retry:
|
|||
<< lastLogTick;
|
||||
_initialTick = lastLogTick;
|
||||
_useTick = true;
|
||||
|
||||
{
|
||||
WRITE_LOCKER_EVENTUAL(writeLocker, _applier->_statusLock);
|
||||
_applier->_state.setStartTime();
|
||||
}
|
||||
_applier->markThreadTailing();
|
||||
goto retry;
|
||||
}
|
||||
res.reset(r.errorNumber(), r.errorMessage());
|
||||
|
@ -1319,6 +1406,9 @@ retry:
|
|||
res.reset(TRI_ERROR_INTERNAL,
|
||||
"caught unknown exception during initial replication");
|
||||
}
|
||||
|
||||
WRITE_LOCKER_EVENTUAL(writeLocker, _applier->_statusLock);
|
||||
_applier->_state._phase = ReplicationApplierState::ActivityPhase::INACTIVE;
|
||||
}
|
||||
|
||||
abortOngoingTransactions();
|
||||
|
@ -1335,7 +1425,7 @@ void TailingSyncer::getLocalState() {
|
|||
uint64_t oldTotalRequests = _applier->_state._totalRequests;
|
||||
uint64_t oldTotalFailedConnects = _applier->_state._totalFailedConnects;
|
||||
|
||||
bool const foundState = _applier->loadState();
|
||||
bool const foundState = _applier->loadStateNoLock();
|
||||
_applier->_state._totalRequests = oldTotalRequests;
|
||||
_applier->_state._totalFailedConnects = oldTotalFailedConnects;
|
||||
|
||||
|
@ -1601,16 +1691,12 @@ Result TailingSyncer::fetchOpenTransactions(TRI_voc_tick_t fromTick, TRI_voc_tic
|
|||
|
||||
if (!fromIncluded && fromTick > 0 &&
|
||||
(!_state.master.simulate32Client() || fromTick != readTick)) {
|
||||
const std::string msg =
|
||||
std::string("required init tick value '") + StringUtils::itoa(fromTick) +
|
||||
"' is not present (anymore?) on master at " + _state.master.endpoint +
|
||||
". Last tick available on master is '" + StringUtils::itoa(readTick) +
|
||||
"'. It may be required to do a full resync and increase the number "
|
||||
"of historic logfiles/WAL file timeout on the master.";
|
||||
if (_requireFromPresent) { // hard fail
|
||||
return Result(TRI_ERROR_REPLICATION_START_TICK_NOT_PRESENT, msg);
|
||||
Result r = handleRequiredFromPresentFailure(fromTick, readTick);
|
||||
TRI_ASSERT(_ongoingTransactions.empty());
|
||||
|
||||
if (r.fail()) {
|
||||
return r;
|
||||
}
|
||||
LOG_TOPIC(WARN, Logger::REPLICATION) << msg;
|
||||
}
|
||||
|
||||
startTick = readTick;
|
||||
|
@ -1647,7 +1733,9 @@ Result TailingSyncer::fetchOpenTransactions(TRI_voc_tick_t fromTick, TRI_voc_tic
|
|||
_ongoingTransactions.emplace(StringUtils::uint64(it.copyString()), nullptr);
|
||||
}
|
||||
|
||||
TRI_ASSERT(_ongoingTransactions.size() <= 1 || _supportsMultipleOpenTransactions);
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
TRI_ASSERT(_supportsMultipleOpenTransactions || !hasMultipleOngoingTransactions());
|
||||
#endif
|
||||
|
||||
{
|
||||
std::string const progress =
|
||||
|
@ -1712,18 +1800,22 @@ void TailingSyncer::fetchMasterLog(std::shared_ptr<Syncer::JobSynchronizer> shar
|
|||
std::string body = builder.slice().toJson();
|
||||
|
||||
std::unique_ptr<httpclient::SimpleHttpResult> response;
|
||||
double time = TRI_microtime();
|
||||
|
||||
_state.connection.lease([&](httpclient::SimpleHttpClient* client) {
|
||||
response.reset(client->request(rest::RequestType::PUT, url, body.c_str(),
|
||||
body.size()));
|
||||
});
|
||||
|
||||
time = TRI_microtime() - time;
|
||||
|
||||
if (replutils::hasFailed(response.get())) {
|
||||
// failure
|
||||
sharedStatus->gotResponse(
|
||||
replutils::buildHttpError(response.get(), url, _state.connection));
|
||||
replutils::buildHttpError(response.get(), url, _state.connection), time);
|
||||
} else {
|
||||
// success!
|
||||
sharedStatus->gotResponse(std::move(response));
|
||||
sharedStatus->gotResponse(std::move(response), time);
|
||||
}
|
||||
} catch (basics::Exception const& ex) {
|
||||
sharedStatus->gotResponse(Result(ex.code(), ex.what()));
|
||||
|
@ -1794,9 +1886,17 @@ Result TailingSyncer::processMasterLog(std::shared_ptr<Syncer::JobSynchronizer>
|
|||
|
||||
TRI_voc_tick_t lastIncludedTick =
|
||||
getUIntHeader(response, StaticStrings::ReplicationHeaderLastIncluded);
|
||||
TRI_voc_tick_t tick = getUIntHeader(response, StaticStrings::ReplicationHeaderLastTick);
|
||||
TRI_voc_tick_t const tick = getUIntHeader(response, StaticStrings::ReplicationHeaderLastTick);
|
||||
|
||||
LOG_TOPIC(DEBUG, Logger::REPLICATION) << "applyLog. fetchTick: " << fetchTick << ", checkMore: " << checkMore << ", fromIncluded: " << fromIncluded << ", lastScannedTick: " << lastScannedTick << ", lastIncludedTick: " << lastIncludedTick << ", tick: " << tick;
|
||||
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
// some temporary debug output here. TODO: remove this later when the assert does not trigger anymore
|
||||
if (tick < lastIncludedTick) {
|
||||
LOG_TOPIC(ERR, Logger::FIXME) << "oops. tick: " << tick << ", lastIncludedTick: " << lastIncludedTick << ", response: " << response->getBody();
|
||||
}
|
||||
#endif
|
||||
TRI_ASSERT(tick >= lastIncludedTick);
|
||||
|
||||
if (lastIncludedTick == 0 && lastScannedTick > 0 && lastScannedTick > fetchTick) {
|
||||
// master did not have any news for us
|
||||
|
@ -1838,21 +1938,21 @@ Result TailingSyncer::processMasterLog(std::shared_ptr<Syncer::JobSynchronizer>
|
|||
_applier->_state._lastAvailableContinuousTick = tick;
|
||||
|
||||
lastAppliedTick = _applier->_state._lastAppliedContinuousTick;
|
||||
|
||||
TRI_ASSERT(_applier->_state._lastAvailableContinuousTick >= _applier->_state._lastAppliedContinuousTick);
|
||||
|
||||
_applier->_state._totalFetchTime += sharedStatus->time();
|
||||
_applier->_state._totalFetchInstances++;
|
||||
}
|
||||
|
||||
if (!fromIncluded && fetchTick > 0 &&
|
||||
(!_state.master.simulate32Client() || originalFetchTick != tick)) {
|
||||
const std::string msg =
|
||||
std::string("required follow tick value '") + StringUtils::itoa(fetchTick) +
|
||||
"' is not present (anymore?) on master at " + _state.master.endpoint +
|
||||
". Last tick available on master is '" + StringUtils::itoa(tick) +
|
||||
"'. It may be required to do a full resync and increase "
|
||||
"the number " +
|
||||
"of historic logfiles/WAL file timeout on the master";
|
||||
if (_requireFromPresent) { // hard fail
|
||||
return Result(TRI_ERROR_REPLICATION_START_TICK_NOT_PRESENT, msg);
|
||||
Result r = handleRequiredFromPresentFailure(fetchTick, tick);
|
||||
TRI_ASSERT(_ongoingTransactions.empty());
|
||||
|
||||
if (r.fail()) {
|
||||
return r;
|
||||
}
|
||||
LOG_TOPIC(WARN, Logger::REPLICATION) << msg;
|
||||
}
|
||||
|
||||
// already fetch next batch of data in the background...
|
||||
|
@ -1869,27 +1969,35 @@ Result TailingSyncer::processMasterLog(std::shared_ptr<Syncer::JobSynchronizer>
|
|||
});
|
||||
}
|
||||
|
||||
uint64_t processedMarkers = 0;
|
||||
Result r = applyLog(response.get(), firstRegularTick, processedMarkers, ignoreCount);
|
||||
ApplyStats applyStats;
|
||||
double time = TRI_microtime();
|
||||
Result r = applyLog(response.get(), firstRegularTick, applyStats, ignoreCount);
|
||||
time = TRI_microtime() - time;
|
||||
|
||||
if (r.fail()) {
|
||||
LOG_TOPIC(DEBUG, Logger::REPLICATION) << "applyLog failed with error: " << r.errorMessage();
|
||||
} else {
|
||||
LOG_TOPIC(DEBUG, Logger::REPLICATION) << "applyLog successful, lastAppliedTick: " << lastAppliedTick << ", firstRegularTick: " << firstRegularTick << ", processedMarkers: " << processedMarkers;
|
||||
LOG_TOPIC(DEBUG, Logger::REPLICATION) << "applyLog successful, lastAppliedTick: " << lastAppliedTick << ", firstRegularTick: " << firstRegularTick << ", processedMarkers: " << applyStats.processedMarkers;
|
||||
}
|
||||
|
||||
// cppcheck-suppress *
|
||||
if (processedMarkers > 0) {
|
||||
if (applyStats.processedMarkers > 0) {
|
||||
worked = true;
|
||||
|
||||
WRITE_LOCKER_EVENTUAL(writeLocker, _applier->_statusLock);
|
||||
|
||||
_applier->_state._totalEvents += processedMarkers;
|
||||
_applier->_state._totalApplyTime += time;
|
||||
_applier->_state._totalApplyInstances++;
|
||||
_applier->_state._totalEvents += applyStats.processedMarkers;
|
||||
_applier->_state._totalDocuments += applyStats.processedDocuments;
|
||||
_applier->_state._totalRemovals += applyStats.processedRemovals;
|
||||
|
||||
if (_applier->_state._lastAppliedContinuousTick != lastAppliedTick) {
|
||||
_hasWrittenState = true;
|
||||
saveApplierState();
|
||||
}
|
||||
|
||||
TRI_ASSERT(_applier->_state._lastAvailableContinuousTick >= _applier->_state._lastAppliedContinuousTick);
|
||||
} else if (bumpTick) {
|
||||
WRITE_LOCKER_EVENTUAL(writeLocker, _applier->_statusLock);
|
||||
|
||||
|
@ -1909,6 +2017,8 @@ Result TailingSyncer::processMasterLog(std::shared_ptr<Syncer::JobSynchronizer>
|
|||
_hasWrittenState = true;
|
||||
saveApplierState();
|
||||
}
|
||||
|
||||
TRI_ASSERT(_applier->_state._lastAvailableContinuousTick >= _applier->_state._lastAppliedContinuousTick);
|
||||
}
|
||||
|
||||
if (!_hasWrittenState && _useTick) {
|
||||
|
@ -1925,6 +2035,8 @@ Result TailingSyncer::processMasterLog(std::shared_ptr<Syncer::JobSynchronizer>
|
|||
}
|
||||
|
||||
saveApplierState();
|
||||
|
||||
TRI_ASSERT(_applier->_state._lastAvailableContinuousTick >= _applier->_state._lastAppliedContinuousTick);
|
||||
}
|
||||
|
||||
if (r.fail()) {
|
||||
|
@ -1962,3 +2074,31 @@ void TailingSyncer::checkParallel() {
|
|||
_workInParallel = true;
|
||||
}
|
||||
}
|
||||
|
||||
Result TailingSyncer::handleRequiredFromPresentFailure(TRI_voc_tick_t fromTick,
|
||||
TRI_voc_tick_t readTick) {
|
||||
std::string const msg =
|
||||
std::string("required init tick value '") + StringUtils::itoa(fromTick) +
|
||||
"' is not present (anymore?) on master at " + _state.master.endpoint +
|
||||
". Last tick available on master is '" + StringUtils::itoa(readTick) +
|
||||
"'. It may be required to do a full resync and increase the number "
|
||||
"of historic logfiles/WAL file timeout or archive size on the master.";
|
||||
if (_requireFromPresent) { // hard fail
|
||||
abortOngoingTransactions();
|
||||
return Result(TRI_ERROR_REPLICATION_START_TICK_NOT_PRESENT, msg);
|
||||
}
|
||||
|
||||
// only print a warning about the failure, abort ongoing transactions and go on...
|
||||
// we may have data loss and follow-up failures here, but at least all these
|
||||
// will be either logged or make the replication fail later on
|
||||
LOG_TOPIC(WARN, Logger::REPLICATION) << msg;
|
||||
|
||||
// we have to abort any running ongoing transactions, as they will be
|
||||
// holding exclusive locks on the underlying collection(s)
|
||||
if (!_ongoingTransactions.empty()) {
|
||||
LOG_TOPIC(WARN, Logger::REPLICATION) << "aborting ongoing open transactions (" << _ongoingTransactions.size() << ")";
|
||||
abortOngoingTransactions();
|
||||
}
|
||||
|
||||
return Result();
|
||||
}
|
||||
|
|
|
@ -46,6 +46,12 @@ namespace velocypack {
|
|||
class Slice;
|
||||
}
|
||||
|
||||
struct ApplyStats {
|
||||
uint64_t processedMarkers = 0;
|
||||
uint64_t processedDocuments = 0;
|
||||
uint64_t processedRemovals = 0;
|
||||
};
|
||||
|
||||
class TailingSyncer : public Syncer {
|
||||
public:
|
||||
TailingSyncer(ReplicationApplier* applier, ReplicationApplierConfiguration const&,
|
||||
|
@ -57,7 +63,7 @@ class TailingSyncer : public Syncer {
|
|||
/// @brief run method, performs continuous synchronization
|
||||
/// catches exceptions
|
||||
Result run();
|
||||
|
||||
|
||||
protected:
|
||||
/// @brief decide based on _masterInfo which api to use
|
||||
virtual std::string tailingBaseUrl(std::string const& command);
|
||||
|
@ -68,6 +74,22 @@ class TailingSyncer : public Syncer {
|
|||
/// @brief abort all ongoing transactions
|
||||
void abortOngoingTransactions() noexcept;
|
||||
|
||||
/// @brief abort all ongoing transactions for a specific database
|
||||
void abortOngoingTransactions(std::string const& dbName);
|
||||
|
||||
/// @brief count all ongoing transactions for a specific database
|
||||
/// used only from within assertions
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
size_t countOngoingTransactions(arangodb::velocypack::Slice slice) const;
|
||||
#endif
|
||||
|
||||
/// @brief whether or not the are multiple ongoing transactions for one
|
||||
/// database
|
||||
/// used only from within assertions
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
bool hasMultipleOngoingTransactions() const;
|
||||
#endif
|
||||
|
||||
/// @brief whether or not a collection should be excluded
|
||||
bool skipMarker(TRI_voc_tick_t firstRegulaTick, arangodb::velocypack::Slice const& slice,
|
||||
TRI_voc_tick_t actualMarkerTick, TRI_replication_operation_e type);
|
||||
|
@ -106,15 +128,13 @@ class TailingSyncer : public Syncer {
|
|||
|
||||
/// @brief apply a single marker from the continuous log
|
||||
Result applyLogMarker(arangodb::velocypack::Slice const& slice,
|
||||
ApplyStats& applyStats,
|
||||
TRI_voc_tick_t firstRegularTick, TRI_voc_tick_t markerTick,
|
||||
TRI_replication_operation_e type);
|
||||
|
||||
/// @brief apply the data from the continuous log
|
||||
Result applyLog(httpclient::SimpleHttpResult*, TRI_voc_tick_t firstRegularTick,
|
||||
uint64_t& processedMarkers, uint64_t& ignoreCount);
|
||||
|
||||
/// @brief get local replication applier state
|
||||
void getLocalState();
|
||||
ApplyStats& applyStats, uint64_t& ignoreCount);
|
||||
|
||||
/// @brief perform a continuous sync with the master
|
||||
Result runContinuousSync();
|
||||
|
@ -127,6 +147,9 @@ class TailingSyncer : public Syncer {
|
|||
virtual Result saveApplierState() = 0;
|
||||
|
||||
private:
|
||||
/// @brief get local replication applier state
|
||||
void getLocalState();
|
||||
|
||||
/// @brief run method, performs continuous synchronization
|
||||
/// internal method, may throw exceptions
|
||||
arangodb::Result runInternal();
|
||||
|
@ -152,6 +175,9 @@ class TailingSyncer : public Syncer {
|
|||
|
||||
arangodb::Result removeSingleDocument(arangodb::LogicalCollection* coll, std::string const& key);
|
||||
|
||||
arangodb::Result handleRequiredFromPresentFailure(TRI_voc_tick_t fromTick,
|
||||
TRI_voc_tick_t readTick);
|
||||
|
||||
protected:
|
||||
virtual bool skipMarker(arangodb::velocypack::Slice const& slice) = 0;
|
||||
|
||||
|
|
|
@ -74,4 +74,29 @@ bool TRI_ExcludeCollectionReplication(std::string const& name, bool includeSyste
|
|||
return false;
|
||||
}
|
||||
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
char const* TRI_TranslateMarkerTypeReplication(TRI_replication_operation_e type) {
|
||||
switch (type) {
|
||||
case REPLICATION_DATABASE_CREATE: return "REPLICATION_DATABASE_CREATE";
|
||||
case REPLICATION_DATABASE_DROP: return "REPLICATION_DATABASE_DROP";
|
||||
case REPLICATION_COLLECTION_CREATE: return "REPLICATION_COLLECTION_CREATE";
|
||||
case REPLICATION_COLLECTION_DROP: return "REPLICATION_COLLECTION_DROP";
|
||||
case REPLICATION_COLLECTION_RENAME: return "REPLICATION_COLLECTION_RENAME";
|
||||
case REPLICATION_COLLECTION_CHANGE: return "REPLICATION_COLLECTION_CHANGE";
|
||||
case REPLICATION_COLLECTION_TRUNCATE: return "REPLICATION_COLLECTION_TRUNCATE";
|
||||
case REPLICATION_INDEX_CREATE: return "REPLICATION_INDEX_CREATE";
|
||||
case REPLICATION_INDEX_DROP: return "REPLICATION_INDEX_DROP";
|
||||
case REPLICATION_VIEW_CREATE: return "REPLICATION_VIEW_CREATE";
|
||||
case REPLICATION_VIEW_DROP: return "REPLICATION_VIEW_DROP";
|
||||
case REPLICATION_VIEW_CHANGE: return "REPLICATION_VIEW_CHANGE";
|
||||
case REPLICATION_TRANSACTION_START: return "REPLICATION_TRANSACTION_START";
|
||||
case REPLICATION_TRANSACTION_COMMIT: return "REPLICATION_TRANSACTION_COMMIT";
|
||||
case REPLICATION_TRANSACTION_ABORT: return "REPLICATION_TRANSACTION_ABORT";
|
||||
case REPLICATION_MARKER_DOCUMENT: return "REPLICATION_MARKER_DOCUMENT";
|
||||
case REPLICATION_MARKER_REMOVE: return "REPLICATION_MARKER_REMOVE";
|
||||
default: return "INVALID";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace arangodb
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
namespace arangodb {
|
||||
|
||||
/// @brief replication operations
|
||||
typedef enum {
|
||||
enum TRI_replication_operation_e {
|
||||
REPLICATION_INVALID = 0,
|
||||
|
||||
// REPLICATION_STOP = 1000, // not used in ArangoDB 2.2 and higher
|
||||
|
@ -60,7 +60,7 @@ typedef enum {
|
|||
REPLICATION_MARKER_REMOVE = 2302,
|
||||
|
||||
REPLICATION_MAX
|
||||
} TRI_replication_operation_e;
|
||||
};
|
||||
|
||||
/// @brief generate a timestamp string in a target buffer
|
||||
void TRI_GetTimeStampReplication(char*, size_t);
|
||||
|
@ -73,6 +73,10 @@ bool TRI_ExcludeCollectionReplication(std::string const& name,
|
|||
bool includeSystem,
|
||||
bool includeFoxxQueues);
|
||||
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
char const* TRI_TranslateMarkerTypeReplication(TRI_replication_operation_e type);
|
||||
#endif
|
||||
|
||||
} // namespace arangodb
|
||||
|
||||
#endif
|
||||
|
|
|
@ -228,10 +228,7 @@ void RestWalAccessHandler::handleCommandLastTick(WalAccess const* wal) {
|
|||
}
|
||||
|
||||
void RestWalAccessHandler::handleCommandTail(WalAccess const* wal) {
|
||||
bool useVst = false;
|
||||
if (_request->transportType() == Endpoint::TransportType::VST) {
|
||||
useVst = true;
|
||||
}
|
||||
bool const useVst = (_request->transportType() == Endpoint::TransportType::VST);
|
||||
|
||||
WalAccess::Filter filter;
|
||||
if (!parseFilter(filter)) {
|
||||
|
@ -273,7 +270,7 @@ void RestWalAccessHandler::handleCommandTail(WalAccess const* wal) {
|
|||
size_t length = 0;
|
||||
|
||||
if (useVst) {
|
||||
result = wal->tail(filter, chunkSize, barrierId,
|
||||
result = wal->tail(filter, chunkSize, barrierId,
|
||||
[&](TRI_vocbase_t* vocbase, VPackSlice const& marker) {
|
||||
length++;
|
||||
|
||||
|
@ -294,7 +291,7 @@ void RestWalAccessHandler::handleCommandTail(WalAccess const* wal) {
|
|||
basics::VPackStringBufferAdapter adapter(buffer.stringBuffer());
|
||||
// note: we need the CustomTypeHandler here
|
||||
VPackDumper dumper(&adapter, &opts);
|
||||
result = wal->tail(filter, chunkSize, barrierId,
|
||||
result = wal->tail(filter, chunkSize, barrierId,
|
||||
[&](TRI_vocbase_t* vocbase, VPackSlice const& marker) {
|
||||
length++;
|
||||
|
||||
|
@ -317,9 +314,12 @@ void RestWalAccessHandler::handleCommandTail(WalAccess const* wal) {
|
|||
// transfer ownership of the buffer contents
|
||||
_response->setContentType(rest::ContentType::DUMP);
|
||||
|
||||
TRI_ASSERT(result.latestTick() >= result.lastIncludedTick());
|
||||
TRI_ASSERT(result.latestTick() >= result.lastScannedTick());
|
||||
|
||||
// set headers
|
||||
bool checkMore = result.lastIncludedTick() > 0 &&
|
||||
result.lastIncludedTick() < result.latestTick();
|
||||
bool const checkMore = result.lastIncludedTick() > 0 &&
|
||||
result.lastIncludedTick() < result.latestTick();
|
||||
_response->setHeaderNC(StaticStrings::ReplicationHeaderCheckMore,
|
||||
checkMore ? "true" : "false");
|
||||
_response->setHeaderNC(StaticStrings::ReplicationHeaderLastIncluded,
|
||||
|
|
|
@ -295,7 +295,7 @@ void RocksDBCollection::prepareIndexes(arangodb::velocypack::Slice indexesSlice)
|
|||
LOG_TOPIC(ERR, arangodb::Logger::ENGINES) << msg;
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
for (auto it : _indexes) {
|
||||
LOG_TOPIC(ERR, arangodb::Logger::ENGINES) << "- " << it.get();
|
||||
LOG_TOPIC(ERR, arangodb::Logger::ENGINES) << "- " << it->context();
|
||||
}
|
||||
#endif
|
||||
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, msg);
|
||||
|
@ -562,6 +562,8 @@ Result RocksDBCollection::truncate(transaction::Methods& trx, OperationOptions&
|
|||
TRI_ASSERT(_objectId != 0);
|
||||
auto state = RocksDBTransactionState::toState(&trx);
|
||||
RocksDBMethods* mthds = state->rocksdbMethods();
|
||||
// don't compact on restore
|
||||
bool const doCompact = !options.isRestore;
|
||||
|
||||
if (state->isOnlyExclusiveTransaction() &&
|
||||
state->hasHint(transaction::Hints::Hint::ALLOW_RANGE_DELETE) &&
|
||||
|
@ -645,7 +647,7 @@ Result RocksDBCollection::truncate(transaction::Methods& trx, OperationOptions&
|
|||
|
||||
guard.fire(); // remove blocker
|
||||
|
||||
if (numDocs > 64 * 1024) {
|
||||
if (numDocs > 64 * 1024 && doCompact) {
|
||||
// also compact the ranges in order to speed up all further accesses
|
||||
compact();
|
||||
}
|
||||
|
@ -716,7 +718,7 @@ Result RocksDBCollection::truncate(transaction::Methods& trx, OperationOptions&
|
|||
// reset to previous value after truncate is finished
|
||||
state->options().intermediateCommitCount = prvICC;
|
||||
|
||||
if (found > 64 * 1024) {
|
||||
if (found > 64 * 1024 && doCompact) {
|
||||
// also compact the ranges in order to speed up all further accesses
|
||||
compact();
|
||||
}
|
||||
|
|
|
@ -123,6 +123,46 @@ static constexpr uint64_t databaseIdForGlobalApplier = 0;
|
|||
// handles for recovery helpers
|
||||
std::vector<std::shared_ptr<RocksDBRecoveryHelper>> RocksDBEngine::_recoveryHelpers;
|
||||
|
||||
RocksDBFilePurgePreventer::RocksDBFilePurgePreventer(RocksDBEngine* engine)
|
||||
: _engine(engine) {
|
||||
TRI_ASSERT(_engine != nullptr);
|
||||
_engine->_purgeLock.readLock();
|
||||
}
|
||||
|
||||
RocksDBFilePurgePreventer::~RocksDBFilePurgePreventer() {
|
||||
if (_engine != nullptr) {
|
||||
_engine->_purgeLock.unlockRead();
|
||||
}
|
||||
}
|
||||
|
||||
RocksDBFilePurgePreventer::RocksDBFilePurgePreventer(RocksDBFilePurgePreventer&& other)
|
||||
: _engine(other._engine) {
|
||||
// steal engine from other
|
||||
other._engine = nullptr;
|
||||
}
|
||||
|
||||
RocksDBFilePurgeEnabler::RocksDBFilePurgeEnabler(RocksDBEngine* engine)
|
||||
: _engine(nullptr) {
|
||||
TRI_ASSERT(engine != nullptr);
|
||||
|
||||
if (engine->_purgeLock.tryWriteLock()) {
|
||||
// we got the lock
|
||||
_engine = engine;
|
||||
}
|
||||
}
|
||||
|
||||
RocksDBFilePurgeEnabler::~RocksDBFilePurgeEnabler() {
|
||||
if (_engine != nullptr) {
|
||||
_engine->_purgeLock.unlockWrite();
|
||||
}
|
||||
}
|
||||
|
||||
RocksDBFilePurgeEnabler::RocksDBFilePurgeEnabler(RocksDBFilePurgeEnabler&& other)
|
||||
: _engine(other._engine) {
|
||||
// steal engine from other
|
||||
other._engine = nullptr;
|
||||
}
|
||||
|
||||
// create the storage engine
|
||||
RocksDBEngine::RocksDBEngine(application_features::ApplicationServer& server)
|
||||
: StorageEngine(server, EngineName, FeatureName,
|
||||
|
@ -135,6 +175,7 @@ RocksDBEngine::RocksDBEngine(application_features::ApplicationServer& server)
|
|||
_intermediateCommitCount(transaction::Options::defaultIntermediateCommitCount),
|
||||
_pruneWaitTime(10.0),
|
||||
_pruneWaitTimeInitial(180.0),
|
||||
_maxWalArchiveSizeLimit(0),
|
||||
_releasedTick(0),
|
||||
#ifdef _WIN32
|
||||
// background syncing is not supported on Windows
|
||||
|
@ -251,6 +292,11 @@ void RocksDBEngine::collectOptions(std::shared_ptr<options::ProgramOptions> opti
|
|||
"true to enable rocksdb debug logging",
|
||||
new BooleanParameter(&_debugLogging),
|
||||
arangodb::options::makeFlags(arangodb::options::Flags::Hidden));
|
||||
|
||||
options->addOption("--rocksdb.wal-archive-size-limit",
|
||||
"maximum total size (in bytes) of archived WAL files (0 = unlimited)",
|
||||
new UInt64Parameter(&_maxWalArchiveSizeLimit),
|
||||
arangodb::options::makeFlags(arangodb::options::Flags::Hidden));
|
||||
|
||||
#ifdef USE_ENTERPRISE
|
||||
collectEnterpriseOptions(options);
|
||||
|
@ -736,7 +782,7 @@ void RocksDBEngine::unprepare() {
|
|||
|
||||
shutdownRocksDBInstance();
|
||||
}
|
||||
|
||||
|
||||
std::unique_ptr<TransactionManager> RocksDBEngine::createTransactionManager() {
|
||||
return std::unique_ptr<TransactionManager>(new RocksDBTransactionManager());
|
||||
}
|
||||
|
@ -1272,7 +1318,7 @@ arangodb::Result RocksDBEngine::dropCollection(TRI_vocbase_t& vocbase,
|
|||
|
||||
// if we get here all documents / indexes are gone.
|
||||
// We have no data garbage left.
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
return Result();
|
||||
}
|
||||
|
||||
void RocksDBEngine::destroyCollection(TRI_vocbase_t& /*vocbase*/, LogicalCollection& /*collection*/
|
||||
|
@ -1595,52 +1641,138 @@ std::vector<std::shared_ptr<RocksDBRecoveryHelper>> const& RocksDBEngine::recove
|
|||
}
|
||||
|
||||
void RocksDBEngine::determinePrunableWalFiles(TRI_voc_tick_t minTickExternal) {
|
||||
WRITE_LOCKER(lock, _walFileLock);
|
||||
rocksdb::VectorLogPtr files;
|
||||
|
||||
WRITE_LOCKER(lock, _walFileLock);
|
||||
TRI_voc_tick_t minTickToKeep = std::min(_releasedTick, minTickExternal);
|
||||
|
||||
// Retrieve the sorted list of all wal files with earliest file first
|
||||
auto status = _db->GetSortedWalFiles(files);
|
||||
if (!status.ok()) {
|
||||
LOG_TOPIC(INFO, Logger::ENGINES) << "could not get WAL files " << status.ToString();
|
||||
return;
|
||||
}
|
||||
|
||||
size_t lastLess = files.size();
|
||||
uint64_t totalArchiveSize = 0;
|
||||
for (size_t current = 0; current < files.size(); current++) {
|
||||
auto f = files[current].get();
|
||||
if (f->StartSequence() < minTickToKeep) {
|
||||
lastLess = current;
|
||||
} else {
|
||||
break;
|
||||
auto const& f = files[current].get();
|
||||
|
||||
if (f->Type() != rocksdb::WalFileType::kArchivedLogFile) {
|
||||
// we are only interested in files of the archive
|
||||
continue;
|
||||
}
|
||||
|
||||
// determine the size of the archive only if it there is a cap on the archive size
|
||||
// otherwise we can save the underlying file access
|
||||
if (_maxWalArchiveSizeLimit > 0) {
|
||||
totalArchiveSize += f->SizeFileBytes();
|
||||
}
|
||||
|
||||
if (f->StartSequence() < minTickToKeep) {
|
||||
// this file will be removed because it does not contain any data we
|
||||
// still need
|
||||
if (_prunableWalFiles.find(f->PathName()) == _prunableWalFiles.end()) {
|
||||
LOG_TOPIC(DEBUG, Logger::ENGINES)
|
||||
<< "RocksDB WAL file '" << f->PathName() << "' with start sequence "
|
||||
<< f->StartSequence() << " added to prunable list because it is not needed anymore";
|
||||
_prunableWalFiles.emplace(f->PathName(), TRI_microtime() + _pruneWaitTime);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// insert all candidate files into the map of deletable files
|
||||
if (lastLess > 0 && lastLess < files.size()) {
|
||||
for (size_t current = 0; current < lastLess; current++) {
|
||||
auto const& f = files[current].get();
|
||||
if (f->Type() == rocksdb::WalFileType::kArchivedLogFile) {
|
||||
if (_prunableWalFiles.find(f->PathName()) == _prunableWalFiles.end()) {
|
||||
LOG_TOPIC(DEBUG, Logger::ENGINES)
|
||||
<< "RocksDB WAL file '" << f->PathName() << "' with start sequence "
|
||||
<< f->StartSequence() << " added to prunable list";
|
||||
_prunableWalFiles.emplace(f->PathName(), TRI_microtime() + _pruneWaitTime);
|
||||
}
|
||||
if (_maxWalArchiveSizeLimit == 0) {
|
||||
// size of the archive is not restricted. done!
|
||||
return;
|
||||
}
|
||||
|
||||
// print current archive size
|
||||
LOG_TOPIC(TRACE, Logger::ENGINES) << "total size of the RocksDB WAL file archive: " << totalArchiveSize;
|
||||
|
||||
if (totalArchiveSize <= _maxWalArchiveSizeLimit) {
|
||||
// archive is smaller than allowed. all good
|
||||
return;
|
||||
}
|
||||
|
||||
// we got more archived files than configured. time for purging some files!
|
||||
for (size_t current = 0; current < files.size(); current++) {
|
||||
auto const& f = files[current].get();
|
||||
|
||||
if (f->Type() != rocksdb::WalFileType::kArchivedLogFile) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// force pruning
|
||||
bool doPrint = false;
|
||||
auto it = _prunableWalFiles.find(f->PathName());
|
||||
|
||||
if (it == _prunableWalFiles.end()) {
|
||||
doPrint = true;
|
||||
// using an expiration time of -1.0 indicates the file is subject to deletion
|
||||
// because the archive outgrew the maximum allowed size
|
||||
_prunableWalFiles.emplace(f->PathName(), -1.0);
|
||||
} else {
|
||||
// file already in list. now set its expiration time to the past
|
||||
// so we are sure it will get deleted
|
||||
|
||||
// using an expiration time of -1.0 indicates the file is subject to deletion
|
||||
// because the archive outgrew the maximum allowed size
|
||||
if ((*it).second > 0.0) {
|
||||
doPrint = true;
|
||||
}
|
||||
(*it).second = -1.0;
|
||||
}
|
||||
|
||||
if (doPrint) {
|
||||
LOG_TOPIC(WARN, Logger::ENGINES)
|
||||
<< "forcing removal of RocksDB WAL file '" << f->PathName() << "' with start sequence "
|
||||
<< f->StartSequence() << " because of overflowing archive. configured maximum archive size is " << _maxWalArchiveSizeLimit << ", actual archive size is: " << totalArchiveSize;
|
||||
}
|
||||
|
||||
TRI_ASSERT(totalArchiveSize >= f->SizeFileBytes());
|
||||
totalArchiveSize -= f->SizeFileBytes();
|
||||
|
||||
if (totalArchiveSize <= _maxWalArchiveSizeLimit) {
|
||||
// got enough files to remove
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RocksDBEngine::pruneWalFiles() {
|
||||
WRITE_LOCKER(lock, _walFileLock);
|
||||
RocksDBFilePurgePreventer RocksDBEngine::disallowPurging() noexcept {
|
||||
return RocksDBFilePurgePreventer(this);
|
||||
}
|
||||
|
||||
RocksDBFilePurgeEnabler RocksDBEngine::startPurging() noexcept {
|
||||
return RocksDBFilePurgeEnabler(this);
|
||||
}
|
||||
|
||||
void RocksDBEngine::pruneWalFiles() {
|
||||
// this struct makes sure that no other threads enter WAL tailing while we
|
||||
// are in here. If there are already other threads in WAL tailing while we
|
||||
// get here, we go on and only remove the WAL files that are really safe
|
||||
// to remove
|
||||
RocksDBFilePurgeEnabler purgeEnabler(rocksutils::globalRocksEngine()->startPurging());
|
||||
|
||||
WRITE_LOCKER(lock, _walFileLock);
|
||||
|
||||
// go through the map of WAL files that we have already and check if they are
|
||||
// "expired"
|
||||
for (auto it = _prunableWalFiles.begin(); it != _prunableWalFiles.end();
|
||||
/* no hoisting */) {
|
||||
// check if WAL file is expired
|
||||
if ((*it).second < TRI_microtime()) {
|
||||
bool deleteFile = false;
|
||||
|
||||
if ((*it).second <= 0.0) {
|
||||
// file can be deleted because we outgrew the configured max archive size,
|
||||
// but only if there are no other threads currently inside the WAL tailing
|
||||
// section
|
||||
deleteFile = purgeEnabler.canPurge();
|
||||
} else if ((*it).second < TRI_microtime()) {
|
||||
// file has expired, and it is always safe to delete it
|
||||
deleteFile = true;
|
||||
}
|
||||
|
||||
if (deleteFile) {
|
||||
LOG_TOPIC(DEBUG, Logger::ENGINES)
|
||||
<< "deleting RocksDB WAL file '" << (*it).first << "'";
|
||||
auto s = _db->DeleteFile((*it).first);
|
||||
|
@ -1652,6 +1784,7 @@ void RocksDBEngine::pruneWalFiles() {
|
|||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// cannot delete this file yet... must forward iterator to prevent an
|
||||
// endless loop
|
||||
++it;
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
|
||||
#include "Basics/Common.h"
|
||||
#include "Basics/Mutex.h"
|
||||
#include "Basics/ReadWriteLock.h"
|
||||
#include "RocksDBEngine/RocksDBTypes.h"
|
||||
#include "StorageEngine/StorageEngine.h"
|
||||
#include "VocBase/AccessMode.h"
|
||||
|
@ -63,18 +64,64 @@ class TransactionCollection;
|
|||
class TransactionState;
|
||||
|
||||
namespace rest {
|
||||
|
||||
class RestHandlerFactory;
|
||||
}
|
||||
|
||||
namespace transaction {
|
||||
|
||||
class ContextData;
|
||||
struct Options;
|
||||
|
||||
} // namespace transaction
|
||||
|
||||
class RocksDBEngine; // forward
|
||||
|
||||
/// @brief helper class to make file-purging thread-safe
|
||||
/// while there is an object of this type around, it will prevent
|
||||
/// purging of maybe-needed WAL files via holding a lock in the
|
||||
/// RocksDB engine. if there is no object of this type around,
|
||||
/// purging is allowed to happen
|
||||
class RocksDBFilePurgePreventer {
|
||||
public:
|
||||
RocksDBFilePurgePreventer(RocksDBFilePurgePreventer const&) = delete;
|
||||
RocksDBFilePurgePreventer& operator=(RocksDBFilePurgePreventer const&) = delete;
|
||||
RocksDBFilePurgePreventer& operator=(RocksDBFilePurgePreventer&&) = delete;
|
||||
|
||||
explicit RocksDBFilePurgePreventer(RocksDBEngine*);
|
||||
RocksDBFilePurgePreventer(RocksDBFilePurgePreventer&&);
|
||||
~RocksDBFilePurgePreventer();
|
||||
|
||||
private:
|
||||
RocksDBEngine* _engine;
|
||||
};
|
||||
|
||||
/// @brief helper class to make file-purging thread-safe
|
||||
/// creating an object of this type will try to acquire a lock that rules
|
||||
/// out all WAL iteration/WAL tailing while the lock is held. While this
|
||||
/// is the case, we are allowed to purge any WAL file, because no other
|
||||
/// thread is able to access it. Note that it is still safe to delete
|
||||
/// unneeded WAL files, as they will not be accessed by any other thread.
|
||||
/// however, without this object it would be unsafe to delete WAL files
|
||||
/// that may still be accessed by WAL tailing etc.
|
||||
class RocksDBFilePurgeEnabler {
|
||||
public:
|
||||
RocksDBFilePurgeEnabler(RocksDBFilePurgePreventer const&) = delete;
|
||||
RocksDBFilePurgeEnabler& operator=(RocksDBFilePurgeEnabler const&) = delete;
|
||||
RocksDBFilePurgeEnabler& operator=(RocksDBFilePurgeEnabler&&) = delete;
|
||||
|
||||
explicit RocksDBFilePurgeEnabler(RocksDBEngine*);
|
||||
RocksDBFilePurgeEnabler(RocksDBFilePurgeEnabler&&);
|
||||
~RocksDBFilePurgeEnabler();
|
||||
|
||||
/// @brief returns true if purging any type of WAL file is currently allowed
|
||||
bool canPurge() const { return _engine != nullptr; }
|
||||
|
||||
private:
|
||||
RocksDBEngine* _engine;
|
||||
};
|
||||
|
||||
class RocksDBEngine final : public StorageEngine {
|
||||
friend class RocksDBFilePurgePreventer;
|
||||
friend class RocksDBFilePurgeEnabler;
|
||||
|
||||
public:
|
||||
// create the storage engine
|
||||
explicit RocksDBEngine(application_features::ApplicationServer& server);
|
||||
|
@ -188,6 +235,14 @@ class RocksDBEngine final : public StorageEngine {
|
|||
void recoveryDone(TRI_vocbase_t& vocbase) override;
|
||||
|
||||
public:
|
||||
/// @brief disallow purging of WAL files even if the archive gets too big
|
||||
/// removing WAL files does not seem to be thread-safe, so we have to track
|
||||
/// usage of WAL files ourselves
|
||||
RocksDBFilePurgePreventer disallowPurging() noexcept;
|
||||
|
||||
/// @brief whether or not purging of WAL files is currently allowed
|
||||
RocksDBFilePurgeEnabler startPurging() noexcept;
|
||||
|
||||
std::string createCollection(TRI_vocbase_t& vocbase,
|
||||
LogicalCollection const& collection) override;
|
||||
|
||||
|
@ -365,8 +420,13 @@ class RocksDBEngine final : public StorageEngine {
|
|||
std::unordered_map<uint64_t, CollectionPair> _collectionMap;
|
||||
std::unordered_map<uint64_t, IndexTriple> _indexMap;
|
||||
|
||||
/// @brief protects _prunableWalFiles
|
||||
mutable basics::ReadWriteLock _walFileLock;
|
||||
// which WAL files can be pruned when
|
||||
|
||||
/// @brief which WAL files can be pruned when
|
||||
/// an expiration time of <= 0.0 means the file does not have expired, but
|
||||
/// still should be purged because the WAL files archive outgrew its max
|
||||
/// configured size
|
||||
std::unordered_map<std::string, double> _prunableWalFiles;
|
||||
|
||||
// number of seconds to wait before an obsolete WAL file is actually pruned
|
||||
|
@ -376,6 +436,9 @@ class RocksDBEngine final : public StorageEngine {
|
|||
// deletion kicks in
|
||||
double _pruneWaitTimeInitial;
|
||||
|
||||
/// @brief maximum total size (in bytes) of archived WAL files
|
||||
uint64_t _maxWalArchiveSizeLimit;
|
||||
|
||||
// do not release walfiles containing writes later than this
|
||||
TRI_voc_tick_t _releasedTick;
|
||||
|
||||
|
@ -397,8 +460,10 @@ class RocksDBEngine final : public StorageEngine {
|
|||
// too far behind and blocking incoming writes
|
||||
// (will only be set if _useThrottle is true)
|
||||
std::shared_ptr<RocksDBThrottle> _listener;
|
||||
};
|
||||
|
||||
arangodb::basics::ReadWriteLock _purgeLock;
|
||||
};
|
||||
|
||||
} // namespace arangodb
|
||||
|
||||
#endif
|
||||
|
|
|
@ -607,6 +607,9 @@ Result RocksDBRecoveryManager::parseRocksWAL() {
|
|||
WBReader handler(startSeqs);
|
||||
rocksdb::SequenceNumber earliest = engine->settingsManager()->earliestSeqNeeded();
|
||||
auto minTick = std::min(earliest, engine->releasedTick());
|
||||
|
||||
// prevent purging of WAL files while we are in here
|
||||
RocksDBFilePurgePreventer purgePreventer(rocksutils::globalRocksEngine()->disallowPurging());
|
||||
|
||||
std::unique_ptr<rocksdb::TransactionLogIterator> iterator; // reader();
|
||||
rocksdb::Status s =
|
||||
|
|
|
@ -194,7 +194,7 @@ class WALParser final : public rocksdb::WriteBatch::Handler {
|
|||
LogicalCollection* coll = loadCollection(cid);
|
||||
TRI_ASSERT(coll != nullptr);
|
||||
{
|
||||
uint64_t tick = _currentSequence + (_startOfBatch ? 0 : 1);
|
||||
uint64_t tick = _currentSequence;
|
||||
VPackObjectBuilder marker(&_builder, true);
|
||||
marker->add("tick", VPackValue(std::to_string(tick)));
|
||||
marker->add("type", VPackValue(REPLICATION_COLLECTION_TRUNCATE));
|
||||
|
@ -337,6 +337,7 @@ class WALParser final : public rocksdb::WriteBatch::Handler {
|
|||
case RocksDBLogType::DocumentRemoveAsPartOfUpdate:
|
||||
case RocksDBLogType::TrackedDocumentInsert:
|
||||
case RocksDBLogType::TrackedDocumentRemove:
|
||||
case RocksDBLogType::FlushSync:
|
||||
break; // ignore deprecated && unused markers
|
||||
|
||||
default:
|
||||
|
@ -639,6 +640,9 @@ RocksDBReplicationResult rocksutils::tailWal(TRI_vocbase_t* vocbase, uint64_t ti
|
|||
uint64_t lastTick = tickStart; // generally contains begin of last wb
|
||||
uint64_t lastWrittenTick = tickStart; // contains end tick of last wb
|
||||
uint64_t lastScannedTick = tickStart;
|
||||
|
||||
// prevent purging of WAL files while we are in here
|
||||
RocksDBFilePurgePreventer purgePreventer(rocksutils::globalRocksEngine()->disallowPurging());
|
||||
|
||||
// LOG_TOPIC(WARN, Logger::FIXME) << "1. Starting tailing: tickStart " <<
|
||||
// tickStart << " tickEnd " << tickEnd << " chunkSize " << chunkSize;//*/
|
||||
|
|
|
@ -264,6 +264,8 @@ void RocksDBRestReplicationHandler::handleCommandLoggerFollow() {
|
|||
result.errorNumber(), result.errorMessage());
|
||||
return;
|
||||
}
|
||||
|
||||
TRI_ASSERT(latest >= result.maxTick());
|
||||
|
||||
bool const checkMore = (result.maxTick() > 0 && result.maxTick() < latest);
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "Basics/StaticStrings.h"
|
||||
#include "Replication/TailingSyncer.h"
|
||||
#include "RestServer/DatabaseFeature.h"
|
||||
#include "RocksDBEngine/RocksDBColumnFamily.h"
|
||||
#include "RocksDBEngine/RocksDBCommon.h"
|
||||
|
@ -99,7 +100,8 @@ class MyWALDumper final : public rocksdb::WriteBatch::Handler, public WalAccessC
|
|||
|
||||
public:
|
||||
MyWALDumper(WalAccess::Filter const& filter,
|
||||
WalAccess::MarkerCallback const& f, size_t maxResponseSize)
|
||||
WalAccess::MarkerCallback const& f,
|
||||
size_t maxResponseSize)
|
||||
: WalAccessContext(filter, f),
|
||||
_definitionsCF(RocksDBColumnFamily::definitions()->GetID()),
|
||||
_documentsCF(RocksDBColumnFamily::documents()->GetID()),
|
||||
|
@ -126,11 +128,13 @@ class MyWALDumper final : public rocksdb::WriteBatch::Handler, public WalAccessC
|
|||
}
|
||||
|
||||
void LogData(rocksdb::Slice const& blob) override {
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
_checkTick = true;
|
||||
#endif
|
||||
// rocksdb does not count LogData towards sequence-number
|
||||
RocksDBLogType type = RocksDBLogValue::type(blob);
|
||||
|
||||
// LOG_TOPIC(WARN, Logger::REPLICATION) << "[LOG] " << _currentSequence
|
||||
// << " " << rocksDBLogTypeName(type);
|
||||
|
||||
// LOG_TOPIC(WARN, Logger::REPLICATION) << "[LOG] " << _currentSequence << " " << rocksDBLogTypeName(type);
|
||||
switch (type) {
|
||||
case RocksDBLogType::DatabaseCreate:
|
||||
resetTransientState(); // finish ongoing trx
|
||||
|
@ -199,17 +203,19 @@ class MyWALDumper final : public rocksdb::WriteBatch::Handler, public WalAccessC
|
|||
TRI_voc_cid_t cid = RocksDBLogValue::collectionId(blob);
|
||||
if (shouldHandleCollection(dbid, cid)) { // will check vocbase
|
||||
TRI_vocbase_t* vocbase = loadVocbase(dbid);
|
||||
LogicalCollection* coll = loadCollection(dbid, cid);
|
||||
TRI_ASSERT(vocbase != nullptr && coll != nullptr);
|
||||
{
|
||||
uint64_t tick = _currentSequence + (_startOfBatch ? 0 : 1);
|
||||
VPackObjectBuilder marker(&_builder, true);
|
||||
marker->add("tick", VPackValue(std::to_string(tick)));
|
||||
marker->add("type", VPackValue(REPLICATION_COLLECTION_TRUNCATE));
|
||||
marker->add("db", VPackValue(vocbase->name()));
|
||||
marker->add("cuid", VPackValue(coll->guid()));
|
||||
if (vocbase != nullptr) {
|
||||
LogicalCollection* coll = loadCollection(dbid, cid);
|
||||
TRI_ASSERT(vocbase != nullptr && coll != nullptr);
|
||||
{
|
||||
uint64_t tick = _currentSequence;
|
||||
VPackObjectBuilder marker(&_builder, true);
|
||||
marker->add("tick", VPackValue(std::to_string(tick)));
|
||||
marker->add("type", VPackValue(REPLICATION_COLLECTION_TRUNCATE));
|
||||
marker->add("db", VPackValue(vocbase->name()));
|
||||
marker->add("cuid", VPackValue(coll->guid()));
|
||||
}
|
||||
printMarker(vocbase);
|
||||
}
|
||||
printMarker(vocbase);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -221,22 +227,24 @@ class MyWALDumper final : public rocksdb::WriteBatch::Handler, public WalAccessC
|
|||
// only print markers from this collection if it is set
|
||||
if (shouldHandleCollection(dbid, cid)) { // will check vocbase
|
||||
TRI_vocbase_t* vocbase = loadVocbase(dbid);
|
||||
LogicalCollection* coll = loadCollection(dbid, cid);
|
||||
TRI_ASSERT(vocbase != nullptr && coll != nullptr);
|
||||
VPackSlice indexDef = RocksDBLogValue::indexSlice(blob);
|
||||
auto stripped = rocksutils::stripObjectIds(indexDef);
|
||||
if (vocbase != nullptr) {
|
||||
LogicalCollection* coll = loadCollection(dbid, cid);
|
||||
TRI_ASSERT(vocbase != nullptr && coll != nullptr);
|
||||
VPackSlice indexDef = RocksDBLogValue::indexSlice(blob);
|
||||
auto stripped = rocksutils::stripObjectIds(indexDef);
|
||||
|
||||
{
|
||||
uint64_t tick = _currentSequence + (_startOfBatch ? 0 : 1);
|
||||
VPackObjectBuilder marker(&_builder, true);
|
||||
marker->add("tick", VPackValue(std::to_string(tick)));
|
||||
marker->add("type", VPackValue(rocksutils::convertLogType(type)));
|
||||
marker->add("db", VPackValue(vocbase->name()));
|
||||
marker->add("cuid", VPackValue(coll->guid()));
|
||||
marker->add("data", stripped.first);
|
||||
{
|
||||
uint64_t tick = _currentSequence + (_startOfBatch ? 0 : 1);
|
||||
VPackObjectBuilder marker(&_builder, true);
|
||||
marker->add("tick", VPackValue(std::to_string(tick)));
|
||||
marker->add("type", VPackValue(rocksutils::convertLogType(type)));
|
||||
marker->add("db", VPackValue(vocbase->name()));
|
||||
marker->add("cuid", VPackValue(coll->guid()));
|
||||
marker->add("data", stripped.first);
|
||||
}
|
||||
|
||||
printMarker(vocbase);
|
||||
}
|
||||
|
||||
printMarker(vocbase);
|
||||
}
|
||||
|
||||
break;
|
||||
|
@ -251,23 +259,25 @@ class MyWALDumper final : public rocksdb::WriteBatch::Handler, public WalAccessC
|
|||
// only print markers from this collection if it is set
|
||||
if (shouldHandleCollection(dbid, cid)) {
|
||||
TRI_vocbase_t* vocbase = loadVocbase(dbid);
|
||||
LogicalCollection* col = loadCollection(dbid, cid);
|
||||
TRI_ASSERT(vocbase != nullptr && col != nullptr);
|
||||
if (vocbase != nullptr) {
|
||||
LogicalCollection* col = loadCollection(dbid, cid);
|
||||
TRI_ASSERT(vocbase != nullptr && col != nullptr);
|
||||
|
||||
{
|
||||
uint64_t tick = _currentSequence + (_startOfBatch ? 0 : 1);
|
||||
VPackObjectBuilder marker(&_builder, true);
|
||||
marker->add("tick", VPackValue(std::to_string(tick)));
|
||||
marker->add("type", VPackValue(rocksutils::convertLogType(type)));
|
||||
marker->add("db", VPackValue(vocbase->name()));
|
||||
marker->add("cuid", VPackValue(col->guid()));
|
||||
{
|
||||
uint64_t tick = _currentSequence + (_startOfBatch ? 0 : 1);
|
||||
VPackObjectBuilder marker(&_builder, true);
|
||||
marker->add("tick", VPackValue(std::to_string(tick)));
|
||||
marker->add("type", VPackValue(rocksutils::convertLogType(type)));
|
||||
marker->add("db", VPackValue(vocbase->name()));
|
||||
marker->add("cuid", VPackValue(col->guid()));
|
||||
|
||||
VPackObjectBuilder data(&_builder, "data", true);
|
||||
VPackObjectBuilder data(&_builder, "data", true);
|
||||
|
||||
data->add("id", VPackValue(std::to_string(iid)));
|
||||
data->add("id", VPackValue(std::to_string(iid)));
|
||||
}
|
||||
|
||||
printMarker(vocbase);
|
||||
}
|
||||
|
||||
printMarker(vocbase);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -321,17 +331,17 @@ class MyWALDumper final : public rocksdb::WriteBatch::Handler, public WalAccessC
|
|||
TRI_voc_tick_t dbid = RocksDBLogValue::databaseId(blob);
|
||||
if (shouldHandleDB(dbid)) {
|
||||
TRI_vocbase_t* vocbase = loadVocbase(dbid);
|
||||
// note: vocbase may be a nullptr here, if the database was already deleted!
|
||||
if (vocbase != nullptr) {
|
||||
_state = TRANSACTION;
|
||||
_currentTrxId = tid;
|
||||
_trxDbId = dbid;
|
||||
{
|
||||
VPackObjectBuilder marker(&_builder, true);
|
||||
marker->add("tick", VPackValue(std::to_string(_currentSequence)));
|
||||
marker->add("type", VPackValue(rocksutils::convertLogType(type)));
|
||||
marker->add("db", VPackValue(vocbase->name()));
|
||||
marker->add("tid", VPackValue(std::to_string(_currentTrxId)));
|
||||
}
|
||||
_builder.openObject(true);
|
||||
_builder.add("tick", VPackValue(std::to_string(_currentSequence)));
|
||||
_builder.add("type", VPackValue(rocksutils::convertLogType(type)));
|
||||
_builder.add("tid", VPackValue(std::to_string(_currentTrxId)));
|
||||
_builder.add("db", VPackValue(vocbase->name()));
|
||||
_builder.close();
|
||||
printMarker(vocbase);
|
||||
}
|
||||
}
|
||||
|
@ -392,6 +402,10 @@ class MyWALDumper final : public rocksdb::WriteBatch::Handler, public WalAccessC
|
|||
case RocksDBLogType::DocumentRemoveAsPartOfUpdate:
|
||||
case RocksDBLogType::TrackedDocumentInsert:
|
||||
case RocksDBLogType::TrackedDocumentRemove:
|
||||
case RocksDBLogType::FlushSync:
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
_checkTick = false;
|
||||
#endif
|
||||
break; // ignore deprecated / unused markers
|
||||
|
||||
default:
|
||||
|
@ -403,9 +417,11 @@ class MyWALDumper final : public rocksdb::WriteBatch::Handler, public WalAccessC
|
|||
|
||||
rocksdb::Status PutCF(uint32_t column_family_id, rocksdb::Slice const& key,
|
||||
rocksdb::Slice const& value) override {
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
_checkTick = true;
|
||||
#endif
|
||||
incTick();
|
||||
// LOG_TOPIC(WARN, Logger::ENGINES) << "[PUT] cf: " << column_family_id
|
||||
// << ", key:" << key.ToString() << " value: " << value.ToString();
|
||||
// LOG_TOPIC(WARN, Logger::ENGINES) << "[PUT] cf: " << column_family_id << ", key:" << key.ToString() << " value: " << value.ToString();
|
||||
|
||||
if (column_family_id == _definitionsCF) {
|
||||
// LogData should have triggered a commit on ongoing transactions
|
||||
|
@ -448,31 +464,33 @@ class MyWALDumper final : public rocksdb::WriteBatch::Handler, public WalAccessC
|
|||
(_state == COLLECTION_CREATE || _state == COLLECTION_RENAME ||
|
||||
_state == COLLECTION_CHANGE)) {
|
||||
TRI_vocbase_t* vocbase = loadVocbase(dbid);
|
||||
LogicalCollection* col = loadCollection(dbid, cid);
|
||||
TRI_ASSERT(vocbase != nullptr && col != nullptr);
|
||||
if (vocbase != nullptr) {
|
||||
LogicalCollection* col = loadCollection(dbid, cid);
|
||||
TRI_ASSERT(vocbase != nullptr && col != nullptr);
|
||||
|
||||
{
|
||||
VPackSlice collectionDef = RocksDBValue::data(value);
|
||||
VPackObjectBuilder marker(&_builder, true);
|
||||
marker->add("tick", VPackValue(std::to_string(_currentSequence)));
|
||||
marker->add("db", VPackValue(vocbase->name()));
|
||||
marker->add("cuid", VPackValue(col->guid()));
|
||||
{
|
||||
VPackSlice collectionDef = RocksDBValue::data(value);
|
||||
VPackObjectBuilder marker(&_builder, true);
|
||||
marker->add("tick", VPackValue(std::to_string(_currentSequence)));
|
||||
marker->add("db", VPackValue(vocbase->name()));
|
||||
marker->add("cuid", VPackValue(col->guid()));
|
||||
|
||||
if (_state == COLLECTION_CREATE) {
|
||||
auto stripped = rocksutils::stripObjectIds(collectionDef);
|
||||
marker->add("type", VPackValue(REPLICATION_COLLECTION_CREATE));
|
||||
marker->add("data", stripped.first);
|
||||
} else if (_state == COLLECTION_RENAME) {
|
||||
marker->add("type", VPackValue(REPLICATION_COLLECTION_RENAME));
|
||||
VPackObjectBuilder data(&_builder, "data", true);
|
||||
data->add("name", VPackValue(col->name()));
|
||||
} else if (_state == COLLECTION_CHANGE) {
|
||||
auto stripped = rocksutils::stripObjectIds(collectionDef);
|
||||
marker->add("type", VPackValue(REPLICATION_COLLECTION_CHANGE));
|
||||
marker->add("data", stripped.first);
|
||||
if (_state == COLLECTION_CREATE) {
|
||||
auto stripped = rocksutils::stripObjectIds(collectionDef);
|
||||
marker->add("type", VPackValue(REPLICATION_COLLECTION_CREATE));
|
||||
marker->add("data", stripped.first);
|
||||
} else if (_state == COLLECTION_RENAME) {
|
||||
marker->add("type", VPackValue(REPLICATION_COLLECTION_RENAME));
|
||||
VPackObjectBuilder data(&_builder, "data", true);
|
||||
data->add("name", VPackValue(col->name()));
|
||||
} else if (_state == COLLECTION_CHANGE) {
|
||||
auto stripped = rocksutils::stripObjectIds(collectionDef);
|
||||
marker->add("type", VPackValue(REPLICATION_COLLECTION_CHANGE));
|
||||
marker->add("data", stripped.first);
|
||||
}
|
||||
}
|
||||
printMarker(vocbase);
|
||||
}
|
||||
printMarker(vocbase);
|
||||
}
|
||||
} else if (RocksDBKey::type(key) == RocksDBEntryType::View) {
|
||||
TRI_voc_tick_t dbid = RocksDBKey::databaseId(key);
|
||||
|
@ -480,24 +498,25 @@ class MyWALDumper final : public rocksdb::WriteBatch::Handler, public WalAccessC
|
|||
|
||||
if (shouldHandleView(dbid, vid) && (_state == VIEW_CREATE || _state == VIEW_CHANGE)) {
|
||||
TRI_vocbase_t* vocbase = loadVocbase(dbid);
|
||||
TRI_ASSERT(vocbase != nullptr);
|
||||
auto view = vocbase->lookupView(vid);
|
||||
if (vocbase != nullptr) {
|
||||
auto view = vocbase->lookupView(vid);
|
||||
|
||||
if (view != nullptr) { // ignore nonexisting views
|
||||
VPackSlice viewDef = RocksDBValue::data(value);
|
||||
{
|
||||
VPackObjectBuilder marker(&_builder, true);
|
||||
marker->add("tick", VPackValue(std::to_string(_currentSequence)));
|
||||
marker->add("db", VPackValue(vocbase->name()));
|
||||
marker->add("cuid", VPackValue(view->guid()));
|
||||
marker->add("data", viewDef);
|
||||
if (_state == VIEW_CREATE) {
|
||||
marker->add("type", VPackValue(REPLICATION_VIEW_CREATE));
|
||||
} else /*if (_state == VIEW_CHANGE)*/ {
|
||||
marker->add("type", VPackValue(REPLICATION_VIEW_CHANGE));
|
||||
if (view != nullptr) { // ignore nonexisting views
|
||||
VPackSlice viewDef = RocksDBValue::data(value);
|
||||
{
|
||||
VPackObjectBuilder marker(&_builder, true);
|
||||
marker->add("tick", VPackValue(std::to_string(_currentSequence)));
|
||||
marker->add("db", VPackValue(vocbase->name()));
|
||||
marker->add("cuid", VPackValue(view->guid()));
|
||||
marker->add("data", viewDef);
|
||||
if (_state == VIEW_CREATE) {
|
||||
marker->add("type", VPackValue(REPLICATION_VIEW_CREATE));
|
||||
} else /*if (_state == VIEW_CHANGE)*/ {
|
||||
marker->add("type", VPackValue(REPLICATION_VIEW_CHANGE));
|
||||
}
|
||||
}
|
||||
printMarker(vocbase);
|
||||
}
|
||||
printMarker(vocbase);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -551,7 +570,11 @@ class MyWALDumper final : public rocksdb::WriteBatch::Handler, public WalAccessC
|
|||
|
||||
// for Delete / SingleDelete
|
||||
void handleDeleteCF(uint32_t cfId, rocksdb::Slice const& key) {
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
_checkTick = true;
|
||||
#endif
|
||||
incTick();
|
||||
// LOG_TOPIC(WARN, Logger::ENGINES) << "[DELETE] cf: " << cfId << ", key:" << key.ToString();
|
||||
|
||||
if (cfId != _primaryCF) {
|
||||
return; // ignore all document operations
|
||||
|
@ -616,7 +639,11 @@ class MyWALDumper final : public rocksdb::WriteBatch::Handler, public WalAccessC
|
|||
rocksdb::Status DeleteRangeCF(uint32_t /*column_family_id*/,
|
||||
const rocksdb::Slice& /*begin_key*/,
|
||||
const rocksdb::Slice& /*end_key*/) override {
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
_checkTick = true;
|
||||
#endif
|
||||
incTick();
|
||||
// LOG_TOPIC(WARN, Logger::ENGINES) << "[DELETE-RANGE] cf: " << column_family_id;
|
||||
// drop and truncate may use this, but we do not print anything
|
||||
return rocksdb::Status(); // make WAL iterator happy
|
||||
}
|
||||
|
@ -633,7 +660,15 @@ class MyWALDumper final : public rocksdb::WriteBatch::Handler, public WalAccessC
|
|||
}
|
||||
|
||||
void startNewBatch(rocksdb::SequenceNumber startSequence) {
|
||||
// LOG_TOPIC(TRACE, Logger::ENGINES) << "starting new batch with sequence: " << startSequence;
|
||||
TRI_ASSERT(!_stopOnNext);
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
if (_checkTick) {
|
||||
TRI_ASSERT(_startSequence < startSequence);
|
||||
TRI_ASSERT(_currentSequence < startSequence);
|
||||
}
|
||||
#endif
|
||||
|
||||
// starting new write batch
|
||||
_startSequence = startSequence;
|
||||
_currentSequence = startSequence;
|
||||
|
@ -642,6 +677,9 @@ class MyWALDumper final : public rocksdb::WriteBatch::Handler, public WalAccessC
|
|||
_currentTrxId = 0;
|
||||
_trxDbId = 0;
|
||||
_removedDocRid = 0;
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
_checkTick = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
uint64_t endBatch() {
|
||||
|
@ -654,17 +692,24 @@ class MyWALDumper final : public rocksdb::WriteBatch::Handler, public WalAccessC
|
|||
|
||||
uint64_t lastWrittenSequence() const { return _lastWrittenSequence; }
|
||||
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
void disableTickCheck() { _checkTick = false; }
|
||||
#endif
|
||||
|
||||
private:
|
||||
|
||||
void writeCommitMarker(TRI_voc_tick_t dbid) {
|
||||
TRI_ASSERT(_state == TRANSACTION);
|
||||
TRI_vocbase_t* vocbase = loadVocbase(dbid);
|
||||
if (vocbase != nullptr) { // we be in shutdown
|
||||
// note: vocbase may be a nullptr here, if the database was already deleted!
|
||||
if (vocbase != nullptr) {
|
||||
_builder.openObject(true);
|
||||
_builder.add("tick", VPackValue(std::to_string(_currentSequence)));
|
||||
_builder.add("type", VPackValue(static_cast<uint64_t>(REPLICATION_TRANSACTION_COMMIT)));
|
||||
_builder.add("db", VPackValue(vocbase->name()));
|
||||
_builder.add("tid", VPackValue(std::to_string(_currentTrxId)));
|
||||
_builder.add("db", VPackValue(vocbase->name()));
|
||||
_builder.close();
|
||||
|
||||
printMarker(vocbase);
|
||||
}
|
||||
_state = INVALID;
|
||||
|
@ -672,6 +717,7 @@ class MyWALDumper final : public rocksdb::WriteBatch::Handler, public WalAccessC
|
|||
|
||||
/// print maker in builder and clear it
|
||||
void printMarker(TRI_vocbase_t* vocbase) {
|
||||
// note: vocbase may be a nullptr here!
|
||||
TRI_ASSERT(!_builder.isEmpty());
|
||||
if (_currentSequence > _filter.tickStart) {
|
||||
_callback(vocbase, _builder.slice());
|
||||
|
@ -720,12 +766,16 @@ class MyWALDumper final : public rocksdb::WriteBatch::Handler, public WalAccessC
|
|||
TRI_voc_tick_t _trxDbId = 0; // remove eventually
|
||||
TRI_voc_rid_t _removedDocRid = 0;
|
||||
bool _stopOnNext = false;
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
bool _checkTick = true;
|
||||
#endif
|
||||
};
|
||||
|
||||
// iterates over WAL starting at 'from' and returns up to 'chunkSize' documents
|
||||
// from the corresponding database
|
||||
WalAccessResult RocksDBWalAccess::tail(Filter const& filter, size_t chunkSize,
|
||||
TRI_voc_tick_t, MarkerCallback const& func) const {
|
||||
TRI_voc_tick_t,
|
||||
MarkerCallback const& func) const {
|
||||
TRI_ASSERT(filter.transactionIds.empty()); // not supported in any way
|
||||
|
||||
rocksdb::TransactionDB* db = rocksutils::globalRocksDB();
|
||||
|
@ -747,6 +797,9 @@ WalAccessResult RocksDBWalAccess::tail(Filter const& filter, size_t chunkSize,
|
|||
uint64_t lastWrittenTick = 0; // lastTick at the end of a write batch
|
||||
uint64_t latestTick = db->GetLatestSequenceNumber();
|
||||
|
||||
// prevent purging of WAL files while we are in here
|
||||
RocksDBFilePurgePreventer purgePreventer(rocksutils::globalRocksEngine()->disallowPurging());
|
||||
|
||||
std::unique_ptr<rocksdb::TransactionLogIterator> iterator; // reader();
|
||||
// no need verifying the WAL contents
|
||||
rocksdb::TransactionLogIterator::ReadOptions ro(false);
|
||||
|
@ -773,19 +826,24 @@ WalAccessResult RocksDBWalAccess::tail(Filter const& filter, size_t chunkSize,
|
|||
break; // cancel out
|
||||
}
|
||||
|
||||
// LOG_TOPIC(INFO, Logger::ENGINES) << "found batch-seq: " <<
|
||||
// batch.sequence;
|
||||
// LOG_TOPIC(TRACE, Logger::REPLICATION) << "found batch-seq: " << batch.sequence;
|
||||
lastScannedTick = batch.sequence; // start of the batch
|
||||
|
||||
if (batch.sequence < since) {
|
||||
// LOG_DEVEL << "skipping batch from " << batch.sequence << " to "
|
||||
//<< (batch.sequence + batch.writeBatchPtr->Count());
|
||||
// LOG_TOPIC(TRACE, Logger::REPLICATION) << "skipping batch from " << batch.sequence << " to " << (batch.sequence + batch.writeBatchPtr->Count());
|
||||
iterator->Next(); // skip
|
||||
continue;
|
||||
}
|
||||
|
||||
dumper.startNewBatch(batch.sequence);
|
||||
s = batch.writeBatchPtr->Iterate(&dumper);
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
if (batch.writeBatchPtr->Count() == 0) {
|
||||
// there can be completely empty write batches. in case we encounter
|
||||
// some, we cannot assume the tick gets increased next time
|
||||
dumper.disableTickCheck();
|
||||
}
|
||||
#endif
|
||||
if (!s.ok()) {
|
||||
LOG_TOPIC(ERR, Logger::REPLICATION) << "error during WAL scan: " << s.ToString();
|
||||
break; // s is considered in the end
|
||||
|
@ -804,6 +862,10 @@ WalAccessResult RocksDBWalAccess::tail(Filter const& filter, size_t chunkSize,
|
|||
iterator->Next();
|
||||
}
|
||||
|
||||
// update our latest sequence number again, because it may have been raised
|
||||
// while scanning the WAL
|
||||
latestTick = db->GetLatestSequenceNumber();
|
||||
|
||||
WalAccessResult result(TRI_ERROR_NO_ERROR, firstTick <= filter.tickStart,
|
||||
lastWrittenTick, lastScannedTick, latestTick);
|
||||
if (!s.ok()) {
|
||||
|
|
|
@ -54,7 +54,8 @@ class RocksDBWalAccess final : public WalAccess {
|
|||
|
||||
/// Tails the wall, this will already sanitize the
|
||||
WalAccessResult tail(WalAccess::Filter const& filter, size_t chunkSize,
|
||||
TRI_voc_tick_t barrierId, MarkerCallback const&) const override;
|
||||
TRI_voc_tick_t barrierId,
|
||||
MarkerCallback const&) const override;
|
||||
};
|
||||
} // namespace arangodb
|
||||
|
||||
|
|
|
@ -148,7 +148,8 @@ class WalAccess {
|
|||
TransactionCallback const&) const = 0;
|
||||
|
||||
virtual WalAccessResult tail(Filter const& filter, size_t chunkSize,
|
||||
TRI_voc_tid_t barrierId, MarkerCallback const&) const = 0;
|
||||
TRI_voc_tid_t barrierId,
|
||||
MarkerCallback const&) const = 0;
|
||||
};
|
||||
|
||||
/// @brief helper class used to resolve vocbases
|
||||
|
|
|
@ -71,34 +71,24 @@ function ReplicationLoggerSuite () {
|
|||
|
||||
var entries = REPLICATION_LOGGER_LAST(tick, "9999999999999999999");
|
||||
|
||||
if (type === undefined) {
|
||||
return entries;
|
||||
}
|
||||
|
||||
if (Array.isArray(type)) {
|
||||
var found = false;
|
||||
entries.forEach(function(e) {
|
||||
if (! found) {
|
||||
if (e.type === 2300 && e.cname && exclude(e.cname)) {
|
||||
// exclude statistics markers here
|
||||
return;
|
||||
}
|
||||
if (e.type === type[0]) {
|
||||
found = true;
|
||||
}
|
||||
if ((e.type === 2300 || e.type === 2302) && e.cname && exclude(e.cname)) {
|
||||
// exclude statistics markers here
|
||||
return;
|
||||
}
|
||||
if (found && type.indexOf(e.type) !== -1) {
|
||||
if (type.indexOf(e.type) !== -1) {
|
||||
result.push(e);
|
||||
}
|
||||
});
|
||||
}
|
||||
else {
|
||||
entries.forEach(function(e) {
|
||||
if (e.type === 2300 && e.cname && exclude(e.cname)) {
|
||||
if ((e.type === 2300 || e.type === 2302) && e.cname && exclude(e.cname)) {
|
||||
// exclude statistics markers here
|
||||
return;
|
||||
}
|
||||
if (e.type === type) {
|
||||
if (type === undefined || e.type === type) {
|
||||
result.push(e);
|
||||
}
|
||||
});
|
||||
|
@ -526,6 +516,61 @@ function ReplicationLoggerSuite () {
|
|||
assertEqual(2201, entry[101].type);
|
||||
assertEqual(tid, entry[101].tid);
|
||||
},
|
||||
|
||||
testLoggerTruncateCollectionRocksDB : function () {
|
||||
if (db._engine().name !== 'rocksdb') {
|
||||
return;
|
||||
}
|
||||
|
||||
let c = db._create(cn);
|
||||
let docs = [];
|
||||
for (let i = 0; i < 32769; ++i) {
|
||||
docs.push({ "_key": "test" + i ,value: i });
|
||||
if (docs.length === 5000) {
|
||||
c.insert(docs);
|
||||
docs = [];
|
||||
}
|
||||
}
|
||||
c.insert(docs);
|
||||
|
||||
let tick = getLastLogTick();
|
||||
c.truncate({ compact: false });
|
||||
getLastLogTick();
|
||||
let entry = getLogEntries(tick);
|
||||
|
||||
assertEqual(1, entry.length);
|
||||
// truncate marker
|
||||
assertEqual(2004, entry[0].type);
|
||||
},
|
||||
|
||||
testLoggerTruncateCollectionAndThenSomeRocksDB : function () {
|
||||
if (db._engine().name !== 'rocksdb') {
|
||||
return;
|
||||
}
|
||||
|
||||
let c = db._create(cn);
|
||||
let docs = [];
|
||||
for (let i = 0; i < 32769; ++i) {
|
||||
docs.push({ "_key": "test" + i ,value: i });
|
||||
if (docs.length === 5000) {
|
||||
c.insert(docs);
|
||||
docs = [];
|
||||
}
|
||||
}
|
||||
c.insert(docs);
|
||||
|
||||
let tick = getLastLogTick();
|
||||
c.truncate({ compact: false });
|
||||
c.insert({ _key: "aha" });
|
||||
getLastLogTick();
|
||||
let entry = getLogEntries(tick);
|
||||
|
||||
assertEqual(2, entry.length);
|
||||
// truncate marker
|
||||
assertEqual(2004, entry[0].type);
|
||||
assertEqual(2300, entry[1].type);
|
||||
assertEqual("aha", entry[1].data._key);
|
||||
},
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief test actions
|
||||
|
|
|
@ -184,6 +184,11 @@ function ReplicationSuite() {
|
|||
collection.insert({ value: Date.now() });
|
||||
};
|
||||
|
||||
let insertOverwrite = function() {
|
||||
let collection = pickCollection();
|
||||
collection.insert({ _key: "test", value: Date.now() }, { overwrite: true });
|
||||
};
|
||||
|
||||
let remove = function() {
|
||||
let collection = pickCollection();
|
||||
if (collection.count() === 0) {
|
||||
|
@ -315,6 +320,20 @@ function ReplicationSuite() {
|
|||
});
|
||||
};
|
||||
|
||||
let insertBatch = function() {
|
||||
let collection = pickCollection();
|
||||
db._executeTransaction({
|
||||
collections: { write: [collection.name()] },
|
||||
action: function(params) {
|
||||
let collection = params.cn, db = require("internal").db;
|
||||
for (let i = 0; i < 1000; ++i) {
|
||||
db[collection].insert({ value1: Date.now() });
|
||||
}
|
||||
},
|
||||
params: { cn: collection.name() }
|
||||
});
|
||||
};
|
||||
|
||||
let createCollection = function() {
|
||||
let name = "test" + internal.genRandomAlphaNumbers(16) + Date.now();
|
||||
return db._create(name);
|
||||
|
@ -340,6 +359,11 @@ function ReplicationSuite() {
|
|||
let collection = pickCollection();
|
||||
collection.properties({ waitForSync: false });
|
||||
};
|
||||
|
||||
let truncateCollection = function() {
|
||||
let collection = pickCollection();
|
||||
collection.truncate();
|
||||
};
|
||||
|
||||
let createIndex = function () {
|
||||
let name = internal.genRandomAlphaNumbers(16) + Date.now();
|
||||
|
@ -375,6 +399,7 @@ function ReplicationSuite() {
|
|||
|
||||
let ops = [
|
||||
{ name: "insert", func: insert },
|
||||
{ name: "insertOverwrite", func: insertOverwrite },
|
||||
{ name: "remove", func: remove },
|
||||
{ name: "replace", func: replace },
|
||||
{ name: "update", func: update },
|
||||
|
@ -385,10 +410,12 @@ function ReplicationSuite() {
|
|||
{ name: "removeMulti", func: removeMulti },
|
||||
{ name: "removeInsert", func: removeInsert },
|
||||
{ name: "insertRemove", func: insertRemove },
|
||||
{ name: "insertBatch", func: insertBatch },
|
||||
{ name: "createCollection", func: createCollection },
|
||||
{ name: "dropCollection", func: dropCollection },
|
||||
{ name: "renameCollection", func: renameCollection },
|
||||
{ name: "changeCollection", func: changeCollection },
|
||||
{ name: "truncateCollection", func: truncateCollection },
|
||||
{ name: "createIndex", func: createIndex },
|
||||
{ name: "dropIndex", func: dropIndex },
|
||||
{ name: "createDatabase", func: createDatabase },
|
||||
|
@ -412,6 +439,10 @@ function ReplicationSuite() {
|
|||
|
||||
db._collections().filter(function(c) { return c.name()[0] !== '_'; }).forEach(function(c) {
|
||||
total += c.name() + "-" + c.count() + "-" + collectionChecksum(c.name());
|
||||
c.indexes().forEach(function(index) {
|
||||
delete index.selectivityEstimate;
|
||||
total += index.type + "-" + JSON.stringify(index.fields);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -428,10 +459,13 @@ function ReplicationSuite() {
|
|||
|
||||
db._collections().filter(function(c) { return c.name()[0] !== '_'; }).forEach(function(c) {
|
||||
total += c.name() + "-" + c.count() + "-" + collectionChecksum(c.name());
|
||||
c.indexes().forEach(function(index) {
|
||||
delete index.selectivityEstimate;
|
||||
total += index.type + "-" + JSON.stringify(index.fields);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
assertTrue(total.length > 0);
|
||||
assertEqual(total, state.state);
|
||||
}
|
||||
);
|
||||
|
|
|
@ -225,6 +225,11 @@ function ReplicationSuite() {
|
|||
collection.insert({ value: Date.now() });
|
||||
};
|
||||
|
||||
let insertOverwrite = function() {
|
||||
let collection = pickCollection();
|
||||
collection.insert({ _key: "test", value: Date.now() }, { overwrite: true });
|
||||
};
|
||||
|
||||
let remove = function() {
|
||||
let collection = pickCollection();
|
||||
if (collection.count() === 0) {
|
||||
|
@ -356,6 +361,21 @@ function ReplicationSuite() {
|
|||
});
|
||||
};
|
||||
|
||||
let insertBatch = function() {
|
||||
let collection = pickCollection();
|
||||
db._executeTransaction({
|
||||
collections: { write: [collection.name()] },
|
||||
action: function(params) {
|
||||
let collection = params.cn, db = require("internal").db;
|
||||
for (let i = 0; i < 1000; ++i) {
|
||||
db[collection].insert({ value1: Date.now() });
|
||||
}
|
||||
},
|
||||
params: { cn: collection.name() }
|
||||
});
|
||||
};
|
||||
|
||||
|
||||
let createCollection = function() {
|
||||
let name = "test" + internal.genRandomAlphaNumbers(16) + Date.now();
|
||||
return db._create(name);
|
||||
|
@ -385,6 +405,11 @@ function ReplicationSuite() {
|
|||
let collection = pickCollection();
|
||||
collection.properties({ waitForSync: false });
|
||||
};
|
||||
|
||||
let truncateCollection = function() {
|
||||
let collection = pickCollection();
|
||||
collection.truncate();
|
||||
};
|
||||
|
||||
let createIndex = function () {
|
||||
let name = internal.genRandomAlphaNumbers(16) + Date.now();
|
||||
|
@ -424,6 +449,7 @@ function ReplicationSuite() {
|
|||
|
||||
let ops = [
|
||||
{ name: "insert", func: insert },
|
||||
{ name: "insertOverwrite", func: insertOverwrite },
|
||||
{ name: "remove", func: remove },
|
||||
{ name: "replace", func: replace },
|
||||
{ name: "update", func: update },
|
||||
|
@ -434,10 +460,12 @@ function ReplicationSuite() {
|
|||
{ name: "removeMulti", func: removeMulti },
|
||||
{ name: "removeInsert", func: removeInsert },
|
||||
{ name: "insertRemove", func: insertRemove },
|
||||
{ name: "insertBatch", func: insertBatch },
|
||||
{ name: "createCollection", func: createCollection },
|
||||
{ name: "dropCollection", func: dropCollection },
|
||||
{ name: "renameCollection", func: renameCollection },
|
||||
{ name: "changeCollection", func: changeCollection },
|
||||
{ name: "truncateCollection", func: truncateCollection },
|
||||
{ name: "createIndex", func: createIndex },
|
||||
{ name: "dropIndex", func: dropIndex },
|
||||
{ name: "createDatabase", func: createDatabase },
|
||||
|
@ -454,6 +482,10 @@ function ReplicationSuite() {
|
|||
let total = "";
|
||||
db._collections().filter(function(c) { return c.name()[0] !== '_'; }).forEach(function(c) {
|
||||
total += c.name() + "-" + c.count() + "-" + collectionChecksum(c.name());
|
||||
c.indexes().forEach(function(index) {
|
||||
delete index.selectivityEstimate;
|
||||
total += index.type + "-" + JSON.stringify(index.fields);
|
||||
});
|
||||
});
|
||||
state.state = total;
|
||||
},
|
||||
|
@ -463,8 +495,11 @@ function ReplicationSuite() {
|
|||
let total = "";
|
||||
db._collections().filter(function(c) { return c.name()[0] !== '_'; }).forEach(function(c) {
|
||||
total += c.name() + "-" + c.count() + "-" + collectionChecksum(c.name());
|
||||
c.indexes().forEach(function(index) {
|
||||
delete index.selectivityEstimate;
|
||||
total += index.type + "-" + JSON.stringify(index.fields);
|
||||
});
|
||||
});
|
||||
assertTrue(total.length > 0);
|
||||
assertEqual(total, state.state);
|
||||
}
|
||||
);
|
||||
|
|
|
@ -53,7 +53,7 @@ const config = {
|
|||
username: username,
|
||||
password: password,
|
||||
verbose: true,
|
||||
requireFromPresent: false
|
||||
requireFromPresent: true
|
||||
};
|
||||
|
||||
// We allow the replication a delay of this many seconds at most
|
||||
|
|
|
@ -83,7 +83,7 @@ const compare = function (masterFunc, masterFunc2, slaveFuncOngoing, slaveFuncFi
|
|||
applierConfiguration.password = '';
|
||||
applierConfiguration.includeSystem = false;
|
||||
applierConfiguration.force32mode = false;
|
||||
applierConfiguration.requireFromPresent = false;
|
||||
applierConfiguration.requireFromPresent = true;
|
||||
|
||||
var syncResult = replication.syncGlobal({
|
||||
endpoint: masterEndpoint,
|
||||
|
@ -452,7 +452,7 @@ function BaseTestConfig () {
|
|||
function (state) {
|
||||
db._collection(cn).truncate();
|
||||
assertEqual(db._collection(cn).count(), 0);
|
||||
assertEqual(db._collection(cn).all().toArray().length, 0);
|
||||
assertEqual(db._collection(cn).toArray().length, 0);
|
||||
},
|
||||
|
||||
function (state) {
|
||||
|
@ -461,7 +461,46 @@ function BaseTestConfig () {
|
|||
|
||||
function (state) {
|
||||
assertEqual(db._collection(cn).count(), 0);
|
||||
assertEqual(db._collection(cn).all().toArray().length, 0);
|
||||
assertEqual(db._collection(cn).toArray().length, 0);
|
||||
}
|
||||
);
|
||||
},
|
||||
|
||||
testTruncateCollectionBiggerAndThenSome: function () {
|
||||
connectToMaster();
|
||||
|
||||
compare(
|
||||
function (state) {
|
||||
let c = db._create(cn);
|
||||
let docs = [];
|
||||
for (let i = 0; i < (32 * 1024 + 1); i++) {
|
||||
docs.push({
|
||||
value: i
|
||||
});
|
||||
if (docs.length >= 1000) {
|
||||
c.insert(docs);
|
||||
docs = [];
|
||||
}
|
||||
}
|
||||
c.insert(docs);
|
||||
},
|
||||
|
||||
function (state) {
|
||||
db._collection(cn).truncate(); // should hit range-delete in rocksdb
|
||||
assertEqual(db._collection(cn).count(), 0);
|
||||
assertEqual(db._collection(cn).toArray().length, 0);
|
||||
db._collection(cn).insert({_key: "a"});
|
||||
db._collection(cn).insert({_key: "b"});
|
||||
},
|
||||
|
||||
function (state) {
|
||||
return true;
|
||||
},
|
||||
|
||||
function (state) {
|
||||
const c = db._collection(cn);
|
||||
assertEqual(c.count(), 2);
|
||||
assertEqual(c.toArray().length, 2);
|
||||
}
|
||||
);
|
||||
},
|
||||
|
@ -492,7 +531,7 @@ function BaseTestConfig () {
|
|||
function (state) {
|
||||
db._collection(cn).truncate(); // should hit range-delete in rocksdb
|
||||
assertEqual(db._collection(cn).count(), 0);
|
||||
assertEqual(db._collection(cn).all().toArray().length, 0);
|
||||
assertEqual(db._collection(cn).toArray().length, 0);
|
||||
},
|
||||
|
||||
function (state) {
|
||||
|
@ -502,7 +541,7 @@ function BaseTestConfig () {
|
|||
function (state) {
|
||||
const c = db._collection(cn);
|
||||
assertEqual(c.count(), 0);
|
||||
assertEqual(c.all().toArray().length, 0);
|
||||
assertEqual(c.toArray().length, 0);
|
||||
}
|
||||
);
|
||||
},
|
||||
|
|
|
@ -98,7 +98,7 @@ const compare = function (masterFunc, masterFunc2, slaveFuncOngoing, slaveFuncFi
|
|||
applierConfiguration.username = 'root';
|
||||
applierConfiguration.password = '';
|
||||
applierConfiguration.force32mode = false;
|
||||
applierConfiguration.requireFromPresent = false;
|
||||
applierConfiguration.requireFromPresent = true;
|
||||
|
||||
if (!applierConfiguration.hasOwnProperty('chunkSize')) {
|
||||
applierConfiguration.chunkSize = 16384;
|
||||
|
@ -165,6 +165,101 @@ function BaseTestConfig () {
|
|||
'use strict';
|
||||
|
||||
return {
|
||||
|
||||
testStatsInsert: function () {
|
||||
connectToMaster();
|
||||
|
||||
compare(
|
||||
function (state) {
|
||||
},
|
||||
|
||||
function (state) {
|
||||
db._create(cn);
|
||||
for (let i = 0; i < 1000; ++i) {
|
||||
db._collection(cn).insert({ _key: "test" + i });
|
||||
}
|
||||
internal.wal.flush(true, true);
|
||||
},
|
||||
|
||||
function (state) {
|
||||
return true;
|
||||
},
|
||||
|
||||
function (state) {
|
||||
let s = replication.applier.state().state;
|
||||
assertTrue(s.totalDocuments >= 1000);
|
||||
assertTrue(s.totalFetchTime > 0);
|
||||
assertTrue(s.totalApplyTime > 0);
|
||||
assertTrue(s.averageApplyTime > 0);
|
||||
}
|
||||
);
|
||||
},
|
||||
|
||||
testStatsInsertUpdate: function () {
|
||||
connectToMaster();
|
||||
|
||||
compare(
|
||||
function (state) {
|
||||
db._create(cn);
|
||||
for (let i = 0; i < 1000; ++i) {
|
||||
db._collection(cn).insert({ _key: "test" + i });
|
||||
}
|
||||
internal.wal.flush(true, true);
|
||||
},
|
||||
|
||||
function (state) {
|
||||
for (let i = 0; i < 1000; ++i) {
|
||||
db._collection(cn).update("test" + i, { value: i });
|
||||
}
|
||||
internal.wal.flush(true, true);
|
||||
},
|
||||
|
||||
function (state) {
|
||||
return true;
|
||||
},
|
||||
|
||||
function (state) {
|
||||
let s = replication.applier.state().state;
|
||||
assertTrue(s.totalDocuments >= 1000);
|
||||
assertTrue(s.totalFetchTime > 0);
|
||||
assertTrue(s.totalApplyTime > 0);
|
||||
assertTrue(s.averageApplyTime > 0);
|
||||
}
|
||||
);
|
||||
},
|
||||
|
||||
testStatsRemove: function () {
|
||||
connectToMaster();
|
||||
|
||||
compare(
|
||||
function (state) {
|
||||
},
|
||||
|
||||
function (state) {
|
||||
db._create(cn);
|
||||
for (let i = 0; i < 1000; ++i) {
|
||||
db._collection(cn).insert({ _key: "test" + i });
|
||||
}
|
||||
for (let i = 0; i < 1000; ++i) {
|
||||
db._collection(cn).remove("test" + i);
|
||||
}
|
||||
internal.wal.flush(true, true);
|
||||
},
|
||||
|
||||
function (state) {
|
||||
return true;
|
||||
},
|
||||
|
||||
function (state) {
|
||||
let s = replication.applier.state().state;
|
||||
assertTrue(s.totalDocuments >= 1000);
|
||||
assertTrue(s.totalRemovals >= 1000);
|
||||
assertTrue(s.totalFetchTime > 0);
|
||||
assertTrue(s.totalApplyTime > 0);
|
||||
assertTrue(s.averageApplyTime > 0);
|
||||
}
|
||||
);
|
||||
},
|
||||
|
||||
testIncludeCollection: function () {
|
||||
connectToMaster();
|
||||
|
@ -282,6 +377,44 @@ function BaseTestConfig () {
|
|||
);
|
||||
},
|
||||
|
||||
testPrimaryKeyConflictInTransaction: function() {
|
||||
connectToMaster();
|
||||
|
||||
compare(
|
||||
function(state) {
|
||||
db._drop(cn);
|
||||
db._create(cn);
|
||||
},
|
||||
|
||||
function(state) {
|
||||
// insert same record on slave that we will insert on the master
|
||||
connectToSlave();
|
||||
db[cn].insert({ _key: "boom", who: "slave" });
|
||||
connectToMaster();
|
||||
db._executeTransaction({
|
||||
collections: { write: cn },
|
||||
action: function(params) {
|
||||
let db = require("internal").db;
|
||||
db[params.cn].insert({ _key: "meow", foo: "bar" });
|
||||
db[params.cn].insert({ _key: "boom", who: "master" });
|
||||
},
|
||||
params: { cn }
|
||||
});
|
||||
},
|
||||
|
||||
function(state) {
|
||||
return true;
|
||||
},
|
||||
|
||||
function(state) {
|
||||
assertEqual(2, db[cn].count());
|
||||
assertEqual("bar", db[cn].document("meow").foo);
|
||||
// master document version must have won
|
||||
assertEqual("master", db[cn].document("boom").who);
|
||||
}
|
||||
);
|
||||
},
|
||||
|
||||
testSecondaryKeyConflict: function () {
|
||||
connectToMaster();
|
||||
|
||||
|
@ -327,6 +460,55 @@ function BaseTestConfig () {
|
|||
);
|
||||
},
|
||||
|
||||
testSecondaryKeyConflictInTransaction: function() {
|
||||
connectToMaster();
|
||||
|
||||
compare(
|
||||
function(state) {
|
||||
db._drop(cn);
|
||||
db._create(cn);
|
||||
db[cn].ensureIndex({ type: "hash", fields: ["value"], unique: true });
|
||||
},
|
||||
|
||||
function(state) {
|
||||
// insert same record on slave that we will insert on the master
|
||||
connectToSlave();
|
||||
db[cn].insert({ _key: "slave", value: "one" });
|
||||
connectToMaster();
|
||||
db._executeTransaction({
|
||||
collections: { write: cn },
|
||||
action: function(params) {
|
||||
let db = require("internal").db;
|
||||
db[params.cn].insert({ _key: "meow", value: "abc" });
|
||||
db[params.cn].insert({ _key: "master", value: "one" });
|
||||
},
|
||||
params: { cn }
|
||||
});
|
||||
},
|
||||
|
||||
function(state) {
|
||||
return true;
|
||||
},
|
||||
|
||||
function(state) {
|
||||
assertEqual(2, db[cn].count());
|
||||
assertEqual("abc", db[cn].document("meow").value);
|
||||
|
||||
assertNull(db[cn].firstExample({ _key: "slave" }));
|
||||
assertNotNull(db[cn].firstExample({ _key: "master" }));
|
||||
let docs = db[cn].toArray();
|
||||
docs.sort(function(l, r) {
|
||||
if (l._key !== r._key) {
|
||||
return l._key < r._key ? -1 : 1;
|
||||
}
|
||||
return 0;
|
||||
});
|
||||
assertEqual("master", docs[0]._key);
|
||||
assertEqual("one", docs[0].value);
|
||||
}
|
||||
);
|
||||
},
|
||||
|
||||
// //////////////////////////////////////////////////////////////////////////////
|
||||
// / @brief test collection creation
|
||||
// //////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -335,24 +335,32 @@ describe ArangoDB do
|
|||
end
|
||||
|
||||
it "fetches a create collection action from the follow log" do
|
||||
ArangoDB.drop_collection("UnitTestsReplication")
|
||||
cid = 0
|
||||
cuid = 0
|
||||
doc = {}
|
||||
|
||||
sleep 5
|
||||
while 1
|
||||
ArangoDB.drop_collection("UnitTestsReplication")
|
||||
|
||||
cmd = api + "/lastTick"
|
||||
doc = ArangoDB.log_get("#{prefix}-lastTick", cmd, :body => "")
|
||||
doc.code.should eq(200)
|
||||
fromTick = doc.parsed_response["tick"]
|
||||
sleep 5
|
||||
|
||||
cid = ArangoDB.create_collection("UnitTestsReplication")
|
||||
cuid = ArangoDB.properties_collection(cid)["globallyUniqueId"]
|
||||
cmd = api + "/lastTick"
|
||||
doc = ArangoDB.log_get("#{prefix}-lastTick", cmd, :body => "")
|
||||
doc.code.should eq(200)
|
||||
fromTick = doc.parsed_response["tick"]
|
||||
|
||||
sleep 5
|
||||
cid = ArangoDB.create_collection("UnitTestsReplication")
|
||||
cuid = ArangoDB.properties_collection(cid)["globallyUniqueId"]
|
||||
|
||||
cmd = api + "/tail?global=true&from=" + fromTick
|
||||
doc = ArangoDB.log_get("#{prefix}-follow-create-collection", cmd, :body => "", :format => :plain)
|
||||
doc.code.should eq(200)
|
||||
sleep 1
|
||||
|
||||
cmd = api + "/tail?global=true&from=" + fromTick
|
||||
doc = ArangoDB.log_get("#{prefix}-follow-create-collection", cmd, :body => "", :format => :plain)
|
||||
[200, 204].should include(doc.code)
|
||||
|
||||
break if doc.headers["x-arango-replication-frompresent"] == "true"
|
||||
end
|
||||
|
||||
doc.headers["x-arango-replication-lastincluded"].should match(/^\d+$/)
|
||||
doc.headers["x-arango-replication-lastincluded"].should_not eq("0")
|
||||
doc.headers["content-type"].should eq("application/x-arango-dump; charset=utf-8")
|
||||
|
@ -395,44 +403,48 @@ describe ArangoDB do
|
|||
end
|
||||
|
||||
it "fetches some collection operations from the follow log" do
|
||||
ArangoDB.drop_collection("UnitTestsReplication")
|
||||
while 1
|
||||
ArangoDB.drop_collection("UnitTestsReplication")
|
||||
|
||||
sleep 5
|
||||
sleep 5
|
||||
|
||||
cmd = api + "/lastTick"
|
||||
doc = ArangoDB.log_get("#{prefix}-follow-collection", cmd, :body => "")
|
||||
doc.code.should eq(200)
|
||||
fromTick = doc.parsed_response["tick"]
|
||||
cmd = api + "/lastTick"
|
||||
doc = ArangoDB.log_get("#{prefix}-follow-collection", cmd, :body => "")
|
||||
doc.code.should eq(200)
|
||||
fromTick = doc.parsed_response["tick"]
|
||||
|
||||
# create collection
|
||||
cid = ArangoDB.create_collection("UnitTestsReplication")
|
||||
cuid = ArangoDB.properties_collection(cid)["globallyUniqueId"]
|
||||
# create collection
|
||||
cid = ArangoDB.create_collection("UnitTestsReplication")
|
||||
cuid = ArangoDB.properties_collection(cid)["globallyUniqueId"]
|
||||
|
||||
# create document
|
||||
cmd = "/_api/document?collection=UnitTestsReplication"
|
||||
body = "{ \"_key\" : \"test\", \"test\" : false }"
|
||||
doc = ArangoDB.log_post("#{prefix}-follow-collection", cmd, :body => body)
|
||||
doc.code.should eq(201)
|
||||
rev = doc.parsed_response["_rev"]
|
||||
# create document
|
||||
cmd = "/_api/document?collection=UnitTestsReplication"
|
||||
body = "{ \"_key\" : \"test\", \"test\" : false }"
|
||||
doc = ArangoDB.log_post("#{prefix}-follow-collection", cmd, :body => body)
|
||||
doc.code.should eq(201)
|
||||
rev = doc.parsed_response["_rev"]
|
||||
|
||||
# update document
|
||||
cmd = "/_api/document/UnitTestsReplication/test"
|
||||
body = "{ \"updated\" : true }"
|
||||
doc = ArangoDB.log_patch("#{prefix}-follow-collection", cmd, :body => body)
|
||||
doc.code.should eq(201)
|
||||
rev2 = doc.parsed_response["_rev"]
|
||||
# update document
|
||||
cmd = "/_api/document/UnitTestsReplication/test"
|
||||
body = "{ \"updated\" : true }"
|
||||
doc = ArangoDB.log_patch("#{prefix}-follow-collection", cmd, :body => body)
|
||||
doc.code.should eq(201)
|
||||
rev2 = doc.parsed_response["_rev"]
|
||||
|
||||
# delete document
|
||||
cmd = "/_api/document/UnitTestsReplication/test"
|
||||
doc = ArangoDB.log_delete("#{prefix}-follow-collection", cmd)
|
||||
doc.code.should eq(200)
|
||||
# delete document
|
||||
cmd = "/_api/document/UnitTestsReplication/test"
|
||||
doc = ArangoDB.log_delete("#{prefix}-follow-collection", cmd)
|
||||
doc.code.should eq(200)
|
||||
|
||||
sleep 5
|
||||
sleep 1
|
||||
|
||||
cmd = api + "/tail?global=true&from=" + fromTick
|
||||
doc = ArangoDB.log_get("#{prefix}-follow-create-collection", cmd, :body => "", :format => :plain)
|
||||
doc.code.should eq(200)
|
||||
body = doc.response.body
|
||||
cmd = api + "/tail?global=true&from=" + fromTick
|
||||
doc = ArangoDB.log_get("#{prefix}-follow-create-collection", cmd, :body => "", :format => :plain)
|
||||
[200, 204].should include(doc.code)
|
||||
body = doc.response.body
|
||||
|
||||
break if doc.headers["x-arango-replication-frompresent"] == "true"
|
||||
end
|
||||
|
||||
doc.headers["x-arango-replication-lastincluded"].should match(/^\d+$/)
|
||||
doc.headers["x-arango-replication-lastincluded"].should_not eq("0")
|
||||
|
@ -537,12 +549,16 @@ describe ArangoDB do
|
|||
doc = ArangoDB.log_delete("#{prefix}-follow-collection", cmd)
|
||||
doc.code.should eq(200)
|
||||
|
||||
sleep 3
|
||||
while 1
|
||||
sleep 1
|
||||
|
||||
cmd = api + "/tail?global=true&from=" + fromTick
|
||||
doc = ArangoDB.log_get("#{prefix}-follow-create-collection", cmd, :body => "", :format => :plain)
|
||||
doc.code.should eq(200)
|
||||
body = doc.response.body
|
||||
cmd = api + "/tail?global=true&from=" + fromTick
|
||||
doc = ArangoDB.log_get("#{prefix}-follow-create-collection", cmd, :body => "", :format => :plain)
|
||||
[200, 204].should include(doc.code)
|
||||
body = doc.response.body
|
||||
|
||||
break if doc.headers["x-arango-replication-frompresent"] == "true"
|
||||
end
|
||||
|
||||
while 1
|
||||
position = body.index("\n")
|
||||
|
|
Loading…
Reference in New Issue