mirror of https://gitee.com/bigwinds/arangodb
added option `--rocksdb.enforce-block-cache-size-limit` (#7505)
This commit is contained in:
parent
d2d68ee34d
commit
be0f8b59fa
81
CHANGELOG
81
CHANGELOG
|
@ -1,8 +1,6 @@
|
|||
devel
|
||||
-----
|
||||
|
||||
* Export Version and Storage Engine in `_admin/cluster/health` for Coordinators and DBServers.
|
||||
|
||||
* fixed Foxx complaining about valid `$schema` value in manifest.json
|
||||
|
||||
* Foxx `req.makeAbsolute` now will return meaningful values when ArangoDB is using
|
||||
|
@ -92,12 +90,89 @@ devel
|
|||
|
||||
* use `-std=c++14` for ArangoDB compilation
|
||||
|
||||
|
||||
v3.4.0-rc.5 (XXXX-XX-XX)
|
||||
------------------------
|
||||
|
||||
* export version and storage engine in `_admin/cluster/health` for Coordinators
|
||||
and DBServers.
|
||||
|
||||
* restrict the total amount of data to build up in all in-memory RocksDB write buffers
|
||||
by default to a certain fraction of the available physical RAM. This helps restricting
|
||||
memory usage for the arangod process, but may have an effect on the RocksDB storage
|
||||
engine's write performance.
|
||||
|
||||
In ArangoDB 3.3 the governing configuration option `--rocksdb.total-write-buffer-size`
|
||||
had a default value of `0`, which meant that the memory usage was not limited. ArangoDB
|
||||
3.4 now changes the default value to about 50% of available physical RAM, and 512MiB
|
||||
for setups with less than 4GiB of RAM.
|
||||
|
||||
* lower default value for `--cache.size` startup option from about 30% of physical RAM to
|
||||
about 25% percent of physical RAM.
|
||||
|
||||
* fix internal issue #2786: improved confirmation dialog when clicking the truncate
|
||||
button in the web UI
|
||||
|
||||
* Updated joi library (web UI), improved Foxx mount path validation
|
||||
|
||||
* disable startup warning for Linux kernel variable `vm.overcommit_memory` settings
|
||||
values of 0 or 1.
|
||||
Effectively `overcommit_memory` settings value of 0 or 1 fix two memory-allocation
|
||||
related issues with the default memory allocator used in ArangoDB release builds on
|
||||
64bit Linux.
|
||||
The issues will remain when running with an `overcommit_memory` settings value of 2,
|
||||
so this is now discouraged.
|
||||
Setting `overcommit_memory` to 0 or 1 (0 is the Linux kernel's default) fixes issues
|
||||
with increasing numbers of memory mappings for the arangod process (which may lead
|
||||
to an out-of-memory situation if the kernel's maximum number of mappings threshold
|
||||
is hit) and an increasing amount of memory that the kernel counts as "committed".
|
||||
With an `overcommit_memory` setting of 0 or 1, an arangod process may either be
|
||||
killed by the kernel's OOM killer or will die with a segfault when accessing memory
|
||||
it has allocated before but the kernel could not provide later on. This is still
|
||||
more acceptable than the kernel not providing any more memory to the process when
|
||||
there is still physical memory left, which may have occurred with an `overcommit_memory`
|
||||
setting of 2 after the arangod process had done lots of allocations.
|
||||
|
||||
In summary, the recommendation for the `overcommit_memory` setting is now to set it
|
||||
to 0 or 1 (0 is kernel default) and not use 2.
|
||||
|
||||
* fixed Foxx complaining about valid `$schema` value in manifest.json
|
||||
|
||||
* fix for supervision, which started failing servers using old transient store
|
||||
|
||||
* fixed a bug where indexes are used in the cluster while still being
|
||||
built on the db servers
|
||||
|
||||
* fix move leader shard: wait until all but the old leader are in sync.
|
||||
This fixes some unstable tests.
|
||||
|
||||
v3.4.0-rc.4 (XXXX-XX-XX)
|
||||
|
||||
v3.4.0-rc.4 (2018-11-04)
|
||||
------------------------
|
||||
|
||||
* fixed Foxx queues not retrying jobs with infinite `maxFailures`
|
||||
|
||||
* increase AQL query string parsing performance for queries with many (100K+) string
|
||||
values contained in the query string
|
||||
|
||||
* increase timeouts for inter-node communication in the cluster
|
||||
|
||||
* fixed undefined behavior in `/_api/import` when importing a single document went
|
||||
wrong
|
||||
|
||||
* replication bugfixes
|
||||
|
||||
* stop printing `connection class corrupted` in arangosh
|
||||
|
||||
when just starting the arangosh without a connection to a server and running
|
||||
code such as `require("internal")`, the shell always printed "connection class
|
||||
corrupted", which was somewhat misleading.
|
||||
|
||||
* add separate option `--query.slow-streaming-threshold` for tracking slow
|
||||
streaming queries with a different timeout value
|
||||
|
||||
* increase maximum number of collections/shards in an AQL query from 256 to 2048
|
||||
|
||||
* do not rely on `_modules` collection being present at arangod startup
|
||||
|
||||
* fixes a routing issue within the web ui after the use of views
|
||||
|
|
|
@ -20,4 +20,4 @@ Global size limit for all hash caches: `--cache.size`
|
|||
The global caching system, all caches, and all the data contained therein will
|
||||
fit inside this limit. The size is specified in bytes. If there is less than
|
||||
4GiB of RAM on the system, the default value is 256MiB. If there is more,
|
||||
the default is `(system RAM size - 2GiB) * 0.3`.
|
||||
the default is `(system RAM size - 2GiB) * 0.25`.
|
||||
|
|
|
@ -38,7 +38,11 @@ Default: 2.
|
|||
The total amount of data to build up in all in-memory buffers (backed by log
|
||||
files). This option, together with the block cache size configuration option,
|
||||
can be used to limit memory usage. If set to 0, the memory usage is not limited.
|
||||
Default: 0 (disabled).
|
||||
|
||||
If set to a value larger than 0, this will cap memory usage for write buffers
|
||||
but may have an effect on performance. If there is less than 4GiB of RAM on the
|
||||
system, the default value is 512MiB. If there is more, the default is
|
||||
`(system RAM size - 2GiB) * 0.5`.
|
||||
|
||||
`--rocksdb.min-write-buffer-number-to-merge`
|
||||
|
||||
|
@ -153,10 +157,19 @@ Number of threads for low priority operations (e.g. compaction). Default: number
|
|||
|
||||
`--rocksdb.block-cache-size`
|
||||
|
||||
This is the size of the block cache in bytes. Increasing this may improve
|
||||
This is the maximum size of the block cache in bytes. Increasing this may improve
|
||||
performance. If there is less than 4GiB of RAM on the system, the default value
|
||||
is 256MiB. If there is more, the default is `(system RAM size - 2GiB) * 0.3`.
|
||||
|
||||
`--rocksdb.enforce-block-cache-size-limit`
|
||||
|
||||
Whether or not the maximum size of the RocksDB block cache is strictly enforced.
|
||||
This option can be set to limit the memory usage of the block cache to at most the
|
||||
specified size. If then inserting a data block into the cache would exceed the
|
||||
cache's capacity, the data block will not be inserted. If the flag is not set,
|
||||
a data block may still get inserted into the cache. It is evicted later, but the
|
||||
cache may temporarily grow beyond its capacity limit.
|
||||
|
||||
`--rocksdb.block-cache-shard-bits`
|
||||
|
||||
The number of bits used to shard the block cache to allow concurrent operations.
|
||||
|
|
|
@ -190,6 +190,20 @@ Note: this option is not supported on Windows platforms. Setting the sync interv
|
|||
to a value greater than 0 will produce a startup warning on Windows.
|
||||
|
||||
|
||||
RocksDB write buffer size
|
||||
-------------------------
|
||||
|
||||
The total amount of data to build up in all in-memory write buffers (backed by log
|
||||
files) is now by default restricted to a certain fraction of the available physical
|
||||
RAM. This helps restricting memory usage for the arangod process, but may have an
|
||||
effect on the RocksDB storage engine's write performance.
|
||||
|
||||
In ArangoDB 3.3 the governing configuration option `--rocksdb.total-write-buffer-size`
|
||||
had a default value of `0`, which meant that the memory usage was not limited. ArangoDB
|
||||
3.4 now changes the default value to about 40% of available physical RAM, and 512MiB
|
||||
for setups with less than 4GiB of RAM.
|
||||
|
||||
|
||||
Threading and request handling
|
||||
------------------------------
|
||||
|
||||
|
|
|
@ -58,7 +58,7 @@ CacheManagerFeature::CacheManagerFeature(
|
|||
_manager(nullptr),
|
||||
_rebalancer(nullptr),
|
||||
_cacheSize((TRI_PhysicalMemory >= (static_cast<uint64_t>(4) << 30))
|
||||
? static_cast<uint64_t>((TRI_PhysicalMemory - (static_cast<uint64_t>(2) << 30)) * 0.3)
|
||||
? static_cast<uint64_t>((TRI_PhysicalMemory - (static_cast<uint64_t>(2) << 30)) * 0.25)
|
||||
: (256 << 20)),
|
||||
_rebalancingInterval(static_cast<uint64_t>(2 * 1000 * 1000)) {
|
||||
setOptional(true);
|
||||
|
|
|
@ -467,7 +467,10 @@ void RocksDBEngine::start() {
|
|||
rocksdb::BlockBasedTableOptions tableOptions;
|
||||
if (opts->_blockCacheSize > 0) {
|
||||
tableOptions.block_cache = rocksdb::NewLRUCache(
|
||||
opts->_blockCacheSize, static_cast<int>(opts->_blockCacheShardBits));
|
||||
opts->_blockCacheSize,
|
||||
static_cast<int>(opts->_blockCacheShardBits),
|
||||
/*strict_capacity_limit*/ opts->_enforceBlockCacheSizeLimit
|
||||
);
|
||||
// tableOptions.cache_index_and_filter_blocks =
|
||||
// opts->_compactionReadaheadSize > 0;
|
||||
} else {
|
||||
|
|
|
@ -72,6 +72,7 @@ RocksDBOptionFeature::RocksDBOptionFeature(
|
|||
_level0CompactionTrigger(2),
|
||||
_level0SlowdownTrigger(rocksDBDefaults.level0_slowdown_writes_trigger),
|
||||
_level0StopTrigger(rocksDBDefaults.level0_stop_writes_trigger),
|
||||
_enforceBlockCacheSizeLimit(false),
|
||||
_blockAlignDataBlocks(rocksDBTableOptionsDefaults.block_align),
|
||||
_enablePipelinedWrite(rocksDBDefaults.enable_pipelined_write),
|
||||
_optimizeFiltersForHits(rocksDBDefaults.optimize_filters_for_hits),
|
||||
|
@ -94,6 +95,15 @@ RocksDBOptionFeature::RocksDBOptionFeature(
|
|||
--_maxBackgroundJobs;
|
||||
} // if
|
||||
#endif
|
||||
|
||||
if (_totalWriteBufferSize == 0) {
|
||||
// unlimited write buffer size... now set to some fraction of physical RAM
|
||||
if (TRI_PhysicalMemory >= (static_cast<uint64_t>(4) << 30)) {
|
||||
_totalWriteBufferSize = static_cast<uint64_t>((TRI_PhysicalMemory - (static_cast<uint64_t>(2) << 30)) * 0.5);
|
||||
} else {
|
||||
_totalWriteBufferSize = (512 << 20);
|
||||
}
|
||||
}
|
||||
|
||||
setOptional(true);
|
||||
startsAfter("BasicsPhase");
|
||||
|
@ -251,6 +261,10 @@ void RocksDBOptionFeature::collectOptions(
|
|||
options->addOption("--rocksdb.block-cache-shard-bits",
|
||||
"number of shard bits to use for block cache (use -1 for default value)",
|
||||
new Int64Parameter(&_blockCacheShardBits));
|
||||
|
||||
options->addOption("--rocksdb.enforce-block-cache-size-limit",
|
||||
"if true, strictly enforces the block cache size limit",
|
||||
new BooleanParameter(&_enforceBlockCacheSizeLimit));
|
||||
|
||||
options->addOption("--rocksdb.table-block-size",
|
||||
"approximate size (in bytes) of user data packed per block",
|
||||
|
@ -357,6 +371,7 @@ void RocksDBOptionFeature::start() {
|
|||
<< ", num_threads_low: " << _numThreadsLow
|
||||
<< ", block_cache_size: " << _blockCacheSize
|
||||
<< ", block_cache_shard_bits: " << _blockCacheShardBits
|
||||
<< ", block_cache_strict_capacity_limit: " << _enforceBlockCacheSizeLimit
|
||||
<< ", table_block_size: " << _tableBlockSize
|
||||
<< ", recycle_log_file_num: " << _recycleLogFileNum
|
||||
<< ", compaction_read_ahead_size: " << _compactionReadaheadSize
|
||||
|
|
|
@ -72,6 +72,7 @@ class RocksDBOptionFeature final
|
|||
int64_t _level0CompactionTrigger;
|
||||
int64_t _level0SlowdownTrigger;
|
||||
int64_t _level0StopTrigger;
|
||||
bool _enforceBlockCacheSizeLimit;
|
||||
bool _blockAlignDataBlocks;
|
||||
bool _enablePipelinedWrite;
|
||||
bool _optimizeFiltersForHits;
|
||||
|
|
Loading…
Reference in New Issue