mirror of https://gitee.com/bigwinds/arangodb
Feature/hugepages (#2497)
* enabled checks for memory management * added MADV_NOHUGEPAGE * added documentation about NUMA and file systems
This commit is contained in:
parent
6bdc91c1b1
commit
4800a8b7f6
|
@ -143,6 +143,12 @@ void* adb_mmap(void* addr, size_t length, int prot, int flags) {
|
|||
__atomic_add_fetch(&adb_total_size, (uint64_t)length, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
#ifdef MADV_NOHUGEPAGE
|
||||
if (ret != MAP_FAILED) {
|
||||
madvise(ret, length, MADV_NOHUGEPAGE);
|
||||
}
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
devel
|
||||
-----
|
||||
|
||||
* added checks for memory managment and warn (i. e. if hugepages are enabled)
|
||||
|
||||
* set default SSL cipher suite string to "HIGH:!EXPORT:!aNULL@STRENGTH"
|
||||
|
||||
* fixed issue #2469: Authentication = true does not protect foxx-routes
|
||||
|
|
|
@ -1,9 +1,23 @@
|
|||
Operating System Configuration
|
||||
==============================
|
||||
|
||||
File Systems
|
||||
------------
|
||||
|
||||
(LINUX)
|
||||
|
||||
We recommend **not** to use BTRFS on linux, it's known to not work
|
||||
well in conjunction with ArangoDB. We experienced that arangodb
|
||||
facing latency issues on accessing its database files on BTRFS
|
||||
partitions. In conjunction with BTRFS and AUFS we also saw data loss
|
||||
on restart.
|
||||
|
||||
|
||||
Virtual Memory Page Sizes
|
||||
--------------------------
|
||||
|
||||
(LINUX)
|
||||
|
||||
By default, ArangoDB uses Jemalloc as the memory allocator. Jemalloc does a good
|
||||
job of reducing virtual memory fragmentation, especially for long-running
|
||||
processes. Unfortunately, some OS configurations can interfere with Jemalloc's
|
||||
|
@ -14,18 +28,64 @@ high memory use. Therefore, we recommend disabling these features when using
|
|||
Jemalloc with ArangoDB. Please consult your operating system's documentation for
|
||||
how to do this.
|
||||
|
||||
Execute
|
||||
|
||||
sudo bash -c "echo madvise >/sys/kernel/mm/transparent_hugepage/enabled"
|
||||
sudo bash -c "echo madvise >/sys/kernel/mm/transparent_hugepage/defrag"
|
||||
|
||||
before executing `arangod`.
|
||||
|
||||
Swap Space
|
||||
----------
|
||||
|
||||
(LINUX)
|
||||
|
||||
It is recommended to assign swap space for a server that is running arangod.
|
||||
Configuring swap space can prevent the operating system's OOM killer from
|
||||
killing ArangoDB too eagerly on Linux.
|
||||
|
||||
### Over-Commit Memory
|
||||
|
||||
Execute
|
||||
|
||||
sudo bash -c "echo 0 >/proc/sys/vm/overcommit_memory"
|
||||
|
||||
before executing `arangod`.
|
||||
|
||||
From [www.kernel.org](https://www.kernel.org/doc/Documentation/sysctl/vm.txt):
|
||||
|
||||
- When this flag is 0, the kernel attempts to estimate the amount
|
||||
of free memory left when userspace requests more memory.
|
||||
|
||||
- When this flag is 1, the kernel pretends there is always enough
|
||||
memory until it actually runs out.
|
||||
|
||||
- When this flag is 2, the kernel uses a "never overcommit"
|
||||
policy that attempts to prevent any overcommit of memory.
|
||||
|
||||
### Zone Reclaim
|
||||
|
||||
Execute
|
||||
|
||||
sudo bash -c "echo 0 >/proc/sys/vm/overcommit_memory"
|
||||
|
||||
before executing `arangod`.
|
||||
|
||||
From [www.kernel.org](https://www.kernel.org/doc/Documentation/sysctl/vm.txt):
|
||||
|
||||
This is value ORed together of
|
||||
|
||||
- 1 = Zone reclaim on
|
||||
- 2 = Zone reclaim writes dirty pages out
|
||||
- 4 = Zone reclaim swaps pages
|
||||
|
||||
NUMA
|
||||
----
|
||||
|
||||
Multi-prozessor systems often have non-uniform Access Memory (NUMA). ArangoDB
|
||||
should be started with interleave on such system. This can be archived using
|
||||
|
||||
numactl --interleave=all arangod ...
|
||||
|
||||
Environment Variables
|
||||
---------------------
|
||||
|
@ -34,3 +94,18 @@ It is recommended to set the environment variable `GLIBCXX_FORCE_NEW` to 1 on
|
|||
systems that use glibc++ in order to disable the memory pooling built into
|
||||
glibc++. That memory pooling is unnecessary because Jemalloc will already do
|
||||
memory pooling.
|
||||
|
||||
Execute
|
||||
|
||||
export GLIBCXX_FORCE_NEW=1
|
||||
|
||||
|
||||
before starting `arangod`.
|
||||
|
||||
32bit
|
||||
-----
|
||||
|
||||
While it is possible to compile ArangoDB on 32bit system, this is not a
|
||||
recommended environment. 64bit systems can address a significantly bigger
|
||||
memory region.
|
||||
|
||||
|
|
|
@ -37,6 +37,8 @@ start () {
|
|||
( cd /var/run/arangodb && chown -R arangodb:arangodb . ) || exit 1
|
||||
( cd $PIDDIR && chown arangodb:arangodb . ) || exit 1
|
||||
|
||||
export GLIBCXX_FORCE_NEW=1
|
||||
|
||||
if [ "$1" = "--database.auto-upgrade" ]; then
|
||||
$DAEMON -c $CONF --uid arangodb --gid arangodb --server.rest-server false $@
|
||||
RETVAL=$?
|
||||
|
|
|
@ -37,6 +37,8 @@ start () {
|
|||
ulimit -H -n 131072 || true
|
||||
ulimit -S -n 131072 || true
|
||||
|
||||
export GLIBCXX_FORCE_NEW=1
|
||||
|
||||
case "$1" in
|
||||
"--upgrade")
|
||||
shift
|
||||
|
|
|
@ -35,6 +35,8 @@ start() {
|
|||
ulimit -H -n 131072 || true
|
||||
ulimit -S -n 131072 || true
|
||||
|
||||
export GLIBCXX_FORCE_NEW=1
|
||||
|
||||
case "$1" in
|
||||
"--upgrade")
|
||||
shift
|
||||
|
|
|
@ -55,6 +55,8 @@ start () {
|
|||
ulimit -H -n 131072 || true
|
||||
ulimit -S -n 131072 || true
|
||||
|
||||
export GLIBCXX_FORCE_NEW=1
|
||||
|
||||
case "$1" in
|
||||
"--upgrade")
|
||||
shift
|
||||
|
|
|
@ -55,6 +55,8 @@ start () {
|
|||
ulimit -H -n 131072 || true
|
||||
ulimit -S -n 131072 || true
|
||||
|
||||
export GLIBCXX_FORCE_NEW=1
|
||||
|
||||
case "$1" in
|
||||
"--upgrade")
|
||||
shift
|
||||
|
|
|
@ -38,29 +38,35 @@ EnvironmentFeature::EnvironmentFeature(
|
|||
}
|
||||
|
||||
void EnvironmentFeature::prepare() {
|
||||
#if 0
|
||||
if (sizeof(void*) == 4) {
|
||||
// 32 bit build
|
||||
LOG_TOPIC(WARN, arangodb::Logger::FIXME) << "this is a 32 bit build of ArangoDB. "
|
||||
<< "it is recommended to run a 64 bit build instead because it can "
|
||||
<< "address significantly bigger regions of memory";
|
||||
LOG_TOPIC(WARN, arangodb::Logger::MEMORY)
|
||||
<< "this is a 32 bit build of ArangoDB. "
|
||||
<< "it is recommended to run a 64 bit build instead because it can "
|
||||
<< "address significantly bigger regions of memory";
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
|
||||
#ifdef __GLIBC__
|
||||
char const* v = getenv("GLIBCXX_FORCE_NEW");
|
||||
|
||||
if (v == nullptr) {
|
||||
// environment variable not set
|
||||
LOG_TOPIC(DEBUG, arangodb::Logger::FIXME) << "environment variable GLIBCXX_FORCE_NEW' is not set. "
|
||||
<< "it is recommended to set it to some value to avoid memory pooling in glibc++";
|
||||
LOG_TOPIC(WARN, arangodb::Logger::MEMORY)
|
||||
<< "environment variable GLIBCXX_FORCE_NEW' is not set. "
|
||||
<< "it is recommended to set it to some value to avoid memory pooling "
|
||||
"in glibc++";
|
||||
LOG_TOPIC(WARN, arangodb::Logger::MEMORY)
|
||||
<< "execute 'export GLIBCXX_FORCE_NEW=1'";
|
||||
}
|
||||
#endif
|
||||
|
||||
try {
|
||||
std::string value = basics::FileUtils::slurp("/proc/sys/vm/overcommit_memory");
|
||||
std::string value =
|
||||
basics::FileUtils::slurp("/proc/sys/vm/overcommit_memory");
|
||||
uint64_t v = basics::StringUtils::uint64(value);
|
||||
if (v == 2) {
|
||||
if (v != 0 && v != 1) {
|
||||
// from https://www.kernel.org/doc/Documentation/sysctl/vm.txt:
|
||||
//
|
||||
// When this flag is 0, the kernel attempts to estimate the amount
|
||||
|
@ -69,15 +75,19 @@ void EnvironmentFeature::prepare() {
|
|||
// memory until it actually runs out.
|
||||
// When this flag is 2, the kernel uses a "never overcommit"
|
||||
// policy that attempts to prevent any overcommit of memory.
|
||||
LOG_TOPIC(WARN, Logger::FIXME) << "/proc/sys/vm/overcommit_memory is set to '"
|
||||
<< v << "'. it is recommended to set it to a value of 0 or 1";
|
||||
LOG_TOPIC(WARN, Logger::MEMORY)
|
||||
<< "/proc/sys/vm/overcommit_memory is set to '" << v
|
||||
<< "'. It is recommended to set it to a value of 0 or 1";
|
||||
LOG_TOPIC(WARN, Logger::MEMORY) << "execute 'sudo bash -c \"echo 0 > "
|
||||
"/proc/sys/vm/overcommit_memory\"'";
|
||||
}
|
||||
} catch (...) {
|
||||
// file not found or value not convertible into integer
|
||||
}
|
||||
|
||||
|
||||
try {
|
||||
std::string value = basics::FileUtils::slurp("/proc/sys/vm/zone_reclaim_mode");
|
||||
std::string value =
|
||||
basics::FileUtils::slurp("/proc/sys/vm/zone_reclaim_mode");
|
||||
uint64_t v = basics::StringUtils::uint64(value);
|
||||
if (v != 0) {
|
||||
// from https://www.kernel.org/doc/Documentation/sysctl/vm.txt:
|
||||
|
@ -85,31 +95,52 @@ void EnvironmentFeature::prepare() {
|
|||
// This is value ORed together of
|
||||
// 1 = Zone reclaim on
|
||||
// 2 = Zone reclaim writes dirty pages out
|
||||
// 4 = Zone reclaim swaps pages
|
||||
//
|
||||
// 4 = Zone reclaim swaps pages
|
||||
//
|
||||
// https://www.poempelfox.de/blog/2010/03/19/
|
||||
LOG_TOPIC(WARN, Logger::FIXME) << "/proc/sys/vm/zone_reclaim_mode is set to '"
|
||||
<< v << "'. it is recommended to set it to a value of 0";
|
||||
LOG_TOPIC(WARN, Logger::PERFORMANCE)
|
||||
<< "/proc/sys/vm/zone_reclaim_mode is set to '" << v
|
||||
<< "'. It is recommended to set it to a value of 0";
|
||||
LOG_TOPIC(WARN, Logger::PERFORMANCE)
|
||||
<< "execute 'sudo bash -c \"echo 0 > "
|
||||
"/proc/sys/vm/zone_reclaim_mode\"'";
|
||||
}
|
||||
} catch (...) {
|
||||
// file not found or value not convertible into integer
|
||||
}
|
||||
|
||||
try {
|
||||
std::string value = basics::FileUtils::slurp("/sys/kernel/mm/transparent_hugepage/enabled");
|
||||
size_t start = value.find('[');
|
||||
size_t end = value.find(']');
|
||||
if (start != std::string::npos && end != std::string::npos && start < end && end - start >= 4) {
|
||||
value = value.substr(start + 1, end - start - 1);
|
||||
if (value == "always") {
|
||||
LOG_TOPIC(WARN, Logger::FIXME) << "/sys/kernel/mm/transparent_hugepage/enabled is set to '"
|
||||
<< value << "'. it is recommended to set it to a value of 'never' or 'madvise'";
|
||||
bool showHuge = false;
|
||||
std::vector<std::string> paths = {
|
||||
"/sys/kernel/mm/transparent_hugepage/enabled",
|
||||
"/sys/kernel/mm/transparent_hugepage/defrag"};
|
||||
|
||||
for (auto file : paths) {
|
||||
try {
|
||||
std::string value = basics::FileUtils::slurp(file);
|
||||
size_t start = value.find('[');
|
||||
size_t end = value.find(']');
|
||||
|
||||
if (start != std::string::npos && end != std::string::npos &&
|
||||
start < end && end - start >= 4) {
|
||||
value = value.substr(start + 1, end - start - 1);
|
||||
if (value == "always") {
|
||||
LOG_TOPIC(WARN, Logger::MEMORY)
|
||||
<< file << " is set to '" << value
|
||||
<< "'. It is recommended to set it to a value of 'never' "
|
||||
"or 'madvise'";
|
||||
showHuge = true;
|
||||
}
|
||||
}
|
||||
} catch (...) {
|
||||
// file not found
|
||||
}
|
||||
}
|
||||
|
||||
if (showHuge) {
|
||||
for (auto file : paths) {
|
||||
LOG_TOPIC(WARN, Logger::MEMORY)
|
||||
<< "execute 'sudo bash -c \"echo madvise > " << file << "\"'";
|
||||
}
|
||||
} catch (...) {
|
||||
// file not found
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -99,7 +99,7 @@ class MutexLocker {
|
|||
|
||||
#ifdef TRI_SHOW_LOCK_TIME
|
||||
if (_time > TRI_SHOW_LOCK_THRESHOLD) {
|
||||
LOG_TOPIC(WARN, arangodb::Logger::PERFORMANCE) << "MutexLocker " << _file << ":" << _line << " took " << _time << " s";
|
||||
LOG_TOPIC(INFO, arangodb::Logger::PERFORMANCE) << "MutexLocker " << _file << ":" << _line << " took " << _time << " s";
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -101,7 +101,7 @@ class ReadLocker {
|
|||
|
||||
#ifdef TRI_SHOW_LOCK_TIME
|
||||
if (_time > TRI_SHOW_LOCK_THRESHOLD) {
|
||||
LOG_TOPIC(WARN, arangodb::Logger::PERFORMANCE) << "ReadLocker " << _file << ":" << _line << " took " << _time << " s";
|
||||
LOG_TOPIC(INFO, arangodb::Logger::PERFORMANCE) << "ReadLocker " << _file << ":" << _line << " took " << _time << " s";
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -102,7 +102,7 @@ class WriteLocker {
|
|||
|
||||
#ifdef TRI_SHOW_LOCK_TIME
|
||||
if (_time > TRI_SHOW_LOCK_THRESHOLD) {
|
||||
LOG_TOPIC(WARN, arangodb::Logger::PERFORMANCE) << "WriteLocker " << _file << ":" << _line << " took " << _time << " s";
|
||||
LOG_TOPIC(INFO, arangodb::Logger::PERFORMANCE) << "WriteLocker " << _file << ":" << _line << " took " << _time << " s";
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -52,9 +52,9 @@ LogTopic Logger::ENGINES("engines", LogLevel::INFO);
|
|||
LogTopic Logger::FIXME("general", LogLevel::INFO);
|
||||
LogTopic Logger::GRAPHS("graphs", LogLevel::INFO);
|
||||
LogTopic Logger::HEARTBEAT("heartbeat", LogLevel::INFO);
|
||||
LogTopic Logger::MEMORY("memory", LogLevel::FATAL); // suppress
|
||||
LogTopic Logger::MEMORY("memory", LogLevel::WARN);
|
||||
LogTopic Logger::MMAP("mmap");
|
||||
LogTopic Logger::PERFORMANCE("performance", LogLevel::FATAL); // suppress
|
||||
LogTopic Logger::PERFORMANCE("performance", LogLevel::WARN);
|
||||
LogTopic Logger::PREGEL("pregel", LogLevel::INFO);
|
||||
LogTopic Logger::QUERIES("queries", LogLevel::INFO);
|
||||
LogTopic Logger::REPLICATION("replication", LogLevel::INFO);
|
||||
|
|
|
@ -347,10 +347,7 @@ std::string Version::getVerboseVersionString() {
|
|||
<< " with ASAN"
|
||||
#endif
|
||||
<< ", using "
|
||||
#ifdef TRI_HAVE_TCMALLOC
|
||||
<< "tcmalloc, "
|
||||
#endif
|
||||
#ifdef TRI_HAVE_JEMALLOC
|
||||
#ifdef ARANGODB_HAVE_JEMALLOC
|
||||
<< "jemalloc, "
|
||||
#endif
|
||||
<< "VPack " << getVPackVersion() << ", "
|
||||
|
|
Loading…
Reference in New Issue