diff --git a/CHANGELOG b/CHANGELOG index 96acd466b5..b17869ef5d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,19 @@ v3.4.5 (XXXX-XX-XX) ------------------- +* added startup option `--rocksdb.allow-fallocate` + + When set to true, allows RocksDB to use the fallocate call. If false, fallocate + calls are bypassed and no preallocation is done. Preallocation is turned on by + default, but can be turned off for operating system versions that are known to + have issues with it. + This option only has an effect on operating systems that support fallocate. + +* added startup option `--rocksdb.limit-open-files-at-startup` + + If set to true, this will limit the amount of .sst files RocksDB will inspect at + startup, which can reduce the number of IO operations performed at start. + * don't run compact() on a collection after a truncate() was done in the same transaction running compact() in the same transaction will only increase the data size on disk due to diff --git a/Documentation/Books/Manual/Programs/Arangod/Rocksdb.md b/Documentation/Books/Manual/Programs/Arangod/Rocksdb.md index 2c95e1aa31..577a7e9652 100644 --- a/Documentation/Books/Manual/Programs/Arangod/Rocksdb.md +++ b/Documentation/Books/Manual/Programs/Arangod/Rocksdb.md @@ -131,6 +131,18 @@ Only meaningful on Linux. If set, use `O_DIRECT` for writing files. Default: fal If set, issue an `fsync` call when writing to disk (set to false to issue `fdatasync` only. Default: false. + +`--rocksdb.allow-fallocate` + +Allow RocksDB to use the fallocate call. If false, fallocate calls are bypassed +and no preallocation is done. Preallocation is turned on by default, but can be +turned off for operating system versions that are known to have issues with it. +This option only has an effect on operating systems that support fallocate. + +`--rocksdb.limit-open-files-at-startup` + +If set to true, this will limit the amount of .sst files RocksDB will inspect at +startup, which can reduce the number of IO operations performed at start. `--rocksdb.block-align-data-blocks` diff --git a/arangod/RocksDBEngine/RocksDBEngine.cpp b/arangod/RocksDBEngine/RocksDBEngine.cpp index d1cef5aa10..c19e3a37c5 100644 --- a/arangod/RocksDBEngine/RocksDBEngine.cpp +++ b/arangod/RocksDBEngine/RocksDBEngine.cpp @@ -395,6 +395,7 @@ void RocksDBEngine::start() { transactionOptions.num_stripes = TRI_numberProcessors(); transactionOptions.transaction_lock_timeout = opts->_transactionLockTimeout; + _options.allow_fallocate = opts->_allowFAllocate; _options.enable_pipelined_write = opts->_enablePipelinedWrite; _options.write_buffer_size = static_cast(opts->_writeBufferSize); _options.max_write_buffer_number = static_cast(opts->_maxWriteBufferNumber); @@ -520,7 +521,14 @@ void RocksDBEngine::start() { _options.create_if_missing = true; _options.create_missing_column_families = true; - _options.max_open_files = -1; + + if (opts->_limitOpenFilesAtStartup) { + _options.max_open_files = 16; + _options.skip_stats_update_on_db_open = true; + _options.avoid_flush_during_recovery = true; + } else { + _options.max_open_files = -1; + } // WAL_ttl_seconds needs to be bigger than the sync interval of the count // manager. Should be several times bigger counter_sync_seconds @@ -700,6 +708,10 @@ void RocksDBEngine::start() { if (logger != nullptr) { logger->enable(); } + + if (opts->_limitOpenFilesAtStartup) { + _db->SetDBOptions({{"max_open_files", "-1"}}); + } if (_syncInterval > 0) { _syncThread.reset(new RocksDBSyncThread(this, std::chrono::milliseconds(_syncInterval))); diff --git a/lib/ApplicationFeatures/RocksDBOptionFeature.cpp b/lib/ApplicationFeatures/RocksDBOptionFeature.cpp index e9a7a1efb2..158a814e9a 100644 --- a/lib/ApplicationFeatures/RocksDBOptionFeature.cpp +++ b/lib/ApplicationFeatures/RocksDBOptionFeature.cpp @@ -81,7 +81,9 @@ RocksDBOptionFeature::RocksDBOptionFeature(application_features::ApplicationServ _skipCorrupted(false), _dynamicLevelBytes(true), _enableStatistics(false), - _useFileLogging(false) { + _useFileLogging(false), + _limitOpenFilesAtStartup(false), + _allowFAllocate(true) { // setting the number of background jobs to _maxBackgroundJobs = static_cast( std::max((size_t)2, std::min(TRI_numberProcessors(), (size_t)8))); @@ -306,6 +308,18 @@ void RocksDBOptionFeature::collectOptions(std::shared_ptr option "skip corrupted records in WAL recovery", new BooleanParameter(&_skipCorrupted), arangodb::options::makeFlags(arangodb::options::Flags::Hidden)); + + options->addOption("--rocksdb.limit-open-files-at-startup", + "limit the amount of .sst files RocksDB will inspect at startup, in order to startup reduce IO", + new BooleanParameter(&_limitOpenFilesAtStartup), + arangodb::options::makeFlags(arangodb::options::Flags::Hidden)) + .setIntroducedIn(30405).setIntroducedIn(30500); + + options->addOption("--rocksdb.allow-fallocate", + "if true, allow RocksDB to use fallocate calls. if false, fallocate calls are bypassed", + new BooleanParameter(&_allowFAllocate), + arangodb::options::makeFlags(arangodb::options::Flags::Hidden)) + .setIntroducedIn(30405).setIntroducedIn(30500); } void RocksDBOptionFeature::validateOptions(std::shared_ptr options) { @@ -392,9 +406,11 @@ void RocksDBOptionFeature::start() { << ", level0_compaction_trigger: " << _level0CompactionTrigger << ", level0_slowdown_trigger: " << _level0SlowdownTrigger << ", enable_pipelined_write: " << _enablePipelinedWrite - << ", optimize_filters_for_hits: " << _optimizeFiltersForHits - << ", use_direct_reads: " << _useDirectReads - << ", use_direct_io_for_flush_and_compaction: " << _useDirectIoForFlushAndCompaction - << ", use_fsync: " << _useFSync + << ", optimize_filters_for_hits: " << std::boolalpha << _optimizeFiltersForHits + << ", use_direct_reads: " << std::boolalpha << _useDirectReads + << ", use_direct_io_for_flush_and_compaction: " << std::boolalpha << _useDirectIoForFlushAndCompaction + << ", use_fsync: " << std::boolalpha << _useFSync + << ", allow_fallocate: " << std::boolalpha << _allowFAllocate + << ", max_open_files limit: " << std::boolalpha << _limitOpenFilesAtStartup << ", dynamic_level_bytes: " << std::boolalpha << _dynamicLevelBytes; } diff --git a/lib/ApplicationFeatures/RocksDBOptionFeature.h b/lib/ApplicationFeatures/RocksDBOptionFeature.h index ba4f1ea5c7..9615f50d08 100644 --- a/lib/ApplicationFeatures/RocksDBOptionFeature.h +++ b/lib/ApplicationFeatures/RocksDBOptionFeature.h @@ -80,6 +80,8 @@ class RocksDBOptionFeature final : public application_features::ApplicationFeatu bool _dynamicLevelBytes; bool _enableStatistics; bool _useFileLogging; + bool _limitOpenFilesAtStartup; + bool _allowFAllocate; }; } // namespace arangodb