From 00f5cab4071296e4cee87092dfdf55ae64a74b72 Mon Sep 17 00:00:00 2001 From: Jan Steemann Date: Fri, 3 May 2013 16:37:32 +0200 Subject: [PATCH] changed compaction to only clean datafiles with at least 10 % of dead objects --- CHANGELOG | 3 ++ arangod/VocBase/compactor.c | 64 +++++++++++++++++++++++++++++-------- 2 files changed, 53 insertions(+), 14 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 40f48f88b8..1fb938f7c8 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,9 @@ v1.3.0-alpha2 (XXXX-XX-XX) -------------------------- +* changed compaction to only compact datafiles with more at least 10% of dead + documents (byte size-wise) + * issue #498: fixed reload of authentication info when using `require("org/arangodb/users").reload()` diff --git a/arangod/VocBase/compactor.c b/arangod/VocBase/compactor.c index 2817828d1e..353c29e649 100644 --- a/arangod/VocBase/compactor.c +++ b/arangod/VocBase/compactor.c @@ -48,6 +48,27 @@ /// @{ //////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +/// @brief minimum size of dead data (in bytes) in a datafile that will make +/// the datafile eligible for compaction at all. +/// +/// Any datafile with less dead data than the threshold will not become a +/// candidate for compaction. +//////////////////////////////////////////////////////////////////////////////// + +#define COMPACTOR_DEAD_SIZE_THRESHOLD (1024 * 128) + +//////////////////////////////////////////////////////////////////////////////// +/// @brief percentage of dead documents in a datafile that will trigger the +/// compaction +/// +/// for example, if the collection contains 800 bytes of alive and 400 bytes of +/// dead documents, the share of the dead documents is 400 / (400 + 800) = 33 %. +/// if this value if higher than the threshold, the datafile will be compacted +//////////////////////////////////////////////////////////////////////////////// + +#define COMPACTOR_DEAD_SIZE_SHARE (0.1) + //////////////////////////////////////////////////////////////////////////////// /// @brief compactify interval in microseconds //////////////////////////////////////////////////////////////////////////////// @@ -431,8 +452,7 @@ static bool Compactifier (TRI_df_marker_t const* marker, if (found != NULL) { found2 = CONST_CAST(found); - // the fid won't change - TRI_ASSERT_MAINTAINER(found2->_fid == context->_dfi._fid); + found2->_fid = context->_dfi._fid; found2->_data = result; // let _key point to the new key position @@ -662,22 +682,38 @@ static bool CompactifyDocumentCollection (TRI_document_collection_t* document) { for (i = 0; i < n; ++i) { TRI_datafile_t* df; TRI_doc_datafile_info_t* dfi; + double share; df = primary->base._datafiles._buffer[i]; dfi = TRI_FindDatafileInfoPrimaryCollection(primary, df->_fid); - - if (dfi->_numberDead > 0) { - // only use those datafiles that contain dead objects - TRI_PushBackVector(&vector, dfi); - - // we stop at the first datafile. - // this is better than going over all datafiles in a collection in one go - // because the compactor is single-threaded, and collecting all datafiles - // might take a long time (it might even be that there is a request to - // delete the collection in the middle of compaction, but the compactor - // will not pick this up as it is read-locking the collection status) - break; + + if (dfi->_numberDead == 0 || dfi->_sizeDead < COMPACTOR_DEAD_SIZE_THRESHOLD) { + continue; } + + share = (double) dfi->_sizeDead / ((double) dfi->_sizeDead + (double) dfi->_sizeAlive); + + if (share < COMPACTOR_DEAD_SIZE_SHARE) { + continue; + } + + LOG_TRACE("found datafile eligible for compaction. fid: %llu, numberDead: %llu, numberAlive: %llu, sizeDead: %llu, sizeAlive: %llu", + (unsigned long long) df->_fid, + (unsigned long long) dfi->_numberDead, + (unsigned long long) dfi->_numberAlive, + (unsigned long long) dfi->_sizeDead, + (unsigned long long) dfi->_sizeAlive); + + // only use those datafiles that contain dead objects + TRI_PushBackVector(&vector, dfi); + + // we stop at the first datafile. + // this is better than going over all datafiles in a collection in one go + // because the compactor is single-threaded, and collecting all datafiles + // might take a long time (it might even be that there is a request to + // delete the collection in the middle of compaction, but the compactor + // will not pick this up as it is read-locking the collection status) + break; } // can now continue without the lock