mirror of https://gitee.com/bigwinds/arangodb
Feature/hugepages (#2497)
* enabled checks for memory management * added MADV_NOHUGEPAGE * added documentation about NUMA and file systems
This commit is contained in:
parent
6bdc91c1b1
commit
4800a8b7f6
|
@ -143,6 +143,12 @@ void* adb_mmap(void* addr, size_t length, int prot, int flags) {
|
||||||
__atomic_add_fetch(&adb_total_size, (uint64_t)length, __ATOMIC_SEQ_CST);
|
__atomic_add_fetch(&adb_total_size, (uint64_t)length, __ATOMIC_SEQ_CST);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef MADV_NOHUGEPAGE
|
||||||
|
if (ret != MAP_FAILED) {
|
||||||
|
madvise(ret, length, MADV_NOHUGEPAGE);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
devel
|
devel
|
||||||
-----
|
-----
|
||||||
|
|
||||||
|
* added checks for memory managment and warn (i. e. if hugepages are enabled)
|
||||||
|
|
||||||
* set default SSL cipher suite string to "HIGH:!EXPORT:!aNULL@STRENGTH"
|
* set default SSL cipher suite string to "HIGH:!EXPORT:!aNULL@STRENGTH"
|
||||||
|
|
||||||
* fixed issue #2469: Authentication = true does not protect foxx-routes
|
* fixed issue #2469: Authentication = true does not protect foxx-routes
|
||||||
|
|
|
@ -1,9 +1,23 @@
|
||||||
Operating System Configuration
|
Operating System Configuration
|
||||||
==============================
|
==============================
|
||||||
|
|
||||||
|
File Systems
|
||||||
|
------------
|
||||||
|
|
||||||
|
(LINUX)
|
||||||
|
|
||||||
|
We recommend **not** to use BTRFS on linux, it's known to not work
|
||||||
|
well in conjunction with ArangoDB. We experienced that arangodb
|
||||||
|
facing latency issues on accessing its database files on BTRFS
|
||||||
|
partitions. In conjunction with BTRFS and AUFS we also saw data loss
|
||||||
|
on restart.
|
||||||
|
|
||||||
|
|
||||||
Virtual Memory Page Sizes
|
Virtual Memory Page Sizes
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
||||||
|
(LINUX)
|
||||||
|
|
||||||
By default, ArangoDB uses Jemalloc as the memory allocator. Jemalloc does a good
|
By default, ArangoDB uses Jemalloc as the memory allocator. Jemalloc does a good
|
||||||
job of reducing virtual memory fragmentation, especially for long-running
|
job of reducing virtual memory fragmentation, especially for long-running
|
||||||
processes. Unfortunately, some OS configurations can interfere with Jemalloc's
|
processes. Unfortunately, some OS configurations can interfere with Jemalloc's
|
||||||
|
@ -14,18 +28,64 @@ high memory use. Therefore, we recommend disabling these features when using
|
||||||
Jemalloc with ArangoDB. Please consult your operating system's documentation for
|
Jemalloc with ArangoDB. Please consult your operating system's documentation for
|
||||||
how to do this.
|
how to do this.
|
||||||
|
|
||||||
|
Execute
|
||||||
|
|
||||||
|
sudo bash -c "echo madvise >/sys/kernel/mm/transparent_hugepage/enabled"
|
||||||
|
sudo bash -c "echo madvise >/sys/kernel/mm/transparent_hugepage/defrag"
|
||||||
|
|
||||||
|
before executing `arangod`.
|
||||||
|
|
||||||
Swap Space
|
Swap Space
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
(LINUX)
|
||||||
|
|
||||||
It is recommended to assign swap space for a server that is running arangod.
|
It is recommended to assign swap space for a server that is running arangod.
|
||||||
Configuring swap space can prevent the operating system's OOM killer from
|
Configuring swap space can prevent the operating system's OOM killer from
|
||||||
killing ArangoDB too eagerly on Linux.
|
killing ArangoDB too eagerly on Linux.
|
||||||
|
|
||||||
|
### Over-Commit Memory
|
||||||
|
|
||||||
|
Execute
|
||||||
|
|
||||||
|
sudo bash -c "echo 0 >/proc/sys/vm/overcommit_memory"
|
||||||
|
|
||||||
|
before executing `arangod`.
|
||||||
|
|
||||||
|
From [www.kernel.org](https://www.kernel.org/doc/Documentation/sysctl/vm.txt):
|
||||||
|
|
||||||
|
- When this flag is 0, the kernel attempts to estimate the amount
|
||||||
|
of free memory left when userspace requests more memory.
|
||||||
|
|
||||||
|
- When this flag is 1, the kernel pretends there is always enough
|
||||||
|
memory until it actually runs out.
|
||||||
|
|
||||||
|
- When this flag is 2, the kernel uses a "never overcommit"
|
||||||
|
policy that attempts to prevent any overcommit of memory.
|
||||||
|
|
||||||
|
### Zone Reclaim
|
||||||
|
|
||||||
|
Execute
|
||||||
|
|
||||||
|
sudo bash -c "echo 0 >/proc/sys/vm/overcommit_memory"
|
||||||
|
|
||||||
|
before executing `arangod`.
|
||||||
|
|
||||||
|
From [www.kernel.org](https://www.kernel.org/doc/Documentation/sysctl/vm.txt):
|
||||||
|
|
||||||
|
This is value ORed together of
|
||||||
|
|
||||||
|
- 1 = Zone reclaim on
|
||||||
|
- 2 = Zone reclaim writes dirty pages out
|
||||||
|
- 4 = Zone reclaim swaps pages
|
||||||
|
|
||||||
NUMA
|
NUMA
|
||||||
----
|
----
|
||||||
|
|
||||||
|
Multi-prozessor systems often have non-uniform Access Memory (NUMA). ArangoDB
|
||||||
|
should be started with interleave on such system. This can be archived using
|
||||||
|
|
||||||
|
numactl --interleave=all arangod ...
|
||||||
|
|
||||||
Environment Variables
|
Environment Variables
|
||||||
---------------------
|
---------------------
|
||||||
|
@ -34,3 +94,18 @@ It is recommended to set the environment variable `GLIBCXX_FORCE_NEW` to 1 on
|
||||||
systems that use glibc++ in order to disable the memory pooling built into
|
systems that use glibc++ in order to disable the memory pooling built into
|
||||||
glibc++. That memory pooling is unnecessary because Jemalloc will already do
|
glibc++. That memory pooling is unnecessary because Jemalloc will already do
|
||||||
memory pooling.
|
memory pooling.
|
||||||
|
|
||||||
|
Execute
|
||||||
|
|
||||||
|
export GLIBCXX_FORCE_NEW=1
|
||||||
|
|
||||||
|
|
||||||
|
before starting `arangod`.
|
||||||
|
|
||||||
|
32bit
|
||||||
|
-----
|
||||||
|
|
||||||
|
While it is possible to compile ArangoDB on 32bit system, this is not a
|
||||||
|
recommended environment. 64bit systems can address a significantly bigger
|
||||||
|
memory region.
|
||||||
|
|
||||||
|
|
|
@ -37,6 +37,8 @@ start () {
|
||||||
( cd /var/run/arangodb && chown -R arangodb:arangodb . ) || exit 1
|
( cd /var/run/arangodb && chown -R arangodb:arangodb . ) || exit 1
|
||||||
( cd $PIDDIR && chown arangodb:arangodb . ) || exit 1
|
( cd $PIDDIR && chown arangodb:arangodb . ) || exit 1
|
||||||
|
|
||||||
|
export GLIBCXX_FORCE_NEW=1
|
||||||
|
|
||||||
if [ "$1" = "--database.auto-upgrade" ]; then
|
if [ "$1" = "--database.auto-upgrade" ]; then
|
||||||
$DAEMON -c $CONF --uid arangodb --gid arangodb --server.rest-server false $@
|
$DAEMON -c $CONF --uid arangodb --gid arangodb --server.rest-server false $@
|
||||||
RETVAL=$?
|
RETVAL=$?
|
||||||
|
|
|
@ -37,6 +37,8 @@ start () {
|
||||||
ulimit -H -n 131072 || true
|
ulimit -H -n 131072 || true
|
||||||
ulimit -S -n 131072 || true
|
ulimit -S -n 131072 || true
|
||||||
|
|
||||||
|
export GLIBCXX_FORCE_NEW=1
|
||||||
|
|
||||||
case "$1" in
|
case "$1" in
|
||||||
"--upgrade")
|
"--upgrade")
|
||||||
shift
|
shift
|
||||||
|
|
|
@ -35,6 +35,8 @@ start() {
|
||||||
ulimit -H -n 131072 || true
|
ulimit -H -n 131072 || true
|
||||||
ulimit -S -n 131072 || true
|
ulimit -S -n 131072 || true
|
||||||
|
|
||||||
|
export GLIBCXX_FORCE_NEW=1
|
||||||
|
|
||||||
case "$1" in
|
case "$1" in
|
||||||
"--upgrade")
|
"--upgrade")
|
||||||
shift
|
shift
|
||||||
|
|
|
@ -55,6 +55,8 @@ start () {
|
||||||
ulimit -H -n 131072 || true
|
ulimit -H -n 131072 || true
|
||||||
ulimit -S -n 131072 || true
|
ulimit -S -n 131072 || true
|
||||||
|
|
||||||
|
export GLIBCXX_FORCE_NEW=1
|
||||||
|
|
||||||
case "$1" in
|
case "$1" in
|
||||||
"--upgrade")
|
"--upgrade")
|
||||||
shift
|
shift
|
||||||
|
|
|
@ -55,6 +55,8 @@ start () {
|
||||||
ulimit -H -n 131072 || true
|
ulimit -H -n 131072 || true
|
||||||
ulimit -S -n 131072 || true
|
ulimit -S -n 131072 || true
|
||||||
|
|
||||||
|
export GLIBCXX_FORCE_NEW=1
|
||||||
|
|
||||||
case "$1" in
|
case "$1" in
|
||||||
"--upgrade")
|
"--upgrade")
|
||||||
shift
|
shift
|
||||||
|
|
|
@ -38,29 +38,35 @@ EnvironmentFeature::EnvironmentFeature(
|
||||||
}
|
}
|
||||||
|
|
||||||
void EnvironmentFeature::prepare() {
|
void EnvironmentFeature::prepare() {
|
||||||
#if 0
|
|
||||||
if (sizeof(void*) == 4) {
|
if (sizeof(void*) == 4) {
|
||||||
// 32 bit build
|
// 32 bit build
|
||||||
LOG_TOPIC(WARN, arangodb::Logger::FIXME) << "this is a 32 bit build of ArangoDB. "
|
LOG_TOPIC(WARN, arangodb::Logger::MEMORY)
|
||||||
<< "it is recommended to run a 64 bit build instead because it can "
|
<< "this is a 32 bit build of ArangoDB. "
|
||||||
<< "address significantly bigger regions of memory";
|
<< "it is recommended to run a 64 bit build instead because it can "
|
||||||
|
<< "address significantly bigger regions of memory";
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
|
|
||||||
#ifdef __GLIBC__
|
#ifdef __GLIBC__
|
||||||
char const* v = getenv("GLIBCXX_FORCE_NEW");
|
char const* v = getenv("GLIBCXX_FORCE_NEW");
|
||||||
|
|
||||||
if (v == nullptr) {
|
if (v == nullptr) {
|
||||||
// environment variable not set
|
// environment variable not set
|
||||||
LOG_TOPIC(DEBUG, arangodb::Logger::FIXME) << "environment variable GLIBCXX_FORCE_NEW' is not set. "
|
LOG_TOPIC(WARN, arangodb::Logger::MEMORY)
|
||||||
<< "it is recommended to set it to some value to avoid memory pooling in glibc++";
|
<< "environment variable GLIBCXX_FORCE_NEW' is not set. "
|
||||||
|
<< "it is recommended to set it to some value to avoid memory pooling "
|
||||||
|
"in glibc++";
|
||||||
|
LOG_TOPIC(WARN, arangodb::Logger::MEMORY)
|
||||||
|
<< "execute 'export GLIBCXX_FORCE_NEW=1'";
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
try {
|
try {
|
||||||
std::string value = basics::FileUtils::slurp("/proc/sys/vm/overcommit_memory");
|
std::string value =
|
||||||
|
basics::FileUtils::slurp("/proc/sys/vm/overcommit_memory");
|
||||||
uint64_t v = basics::StringUtils::uint64(value);
|
uint64_t v = basics::StringUtils::uint64(value);
|
||||||
if (v == 2) {
|
if (v != 0 && v != 1) {
|
||||||
// from https://www.kernel.org/doc/Documentation/sysctl/vm.txt:
|
// from https://www.kernel.org/doc/Documentation/sysctl/vm.txt:
|
||||||
//
|
//
|
||||||
// When this flag is 0, the kernel attempts to estimate the amount
|
// When this flag is 0, the kernel attempts to estimate the amount
|
||||||
|
@ -69,15 +75,19 @@ void EnvironmentFeature::prepare() {
|
||||||
// memory until it actually runs out.
|
// memory until it actually runs out.
|
||||||
// When this flag is 2, the kernel uses a "never overcommit"
|
// When this flag is 2, the kernel uses a "never overcommit"
|
||||||
// policy that attempts to prevent any overcommit of memory.
|
// policy that attempts to prevent any overcommit of memory.
|
||||||
LOG_TOPIC(WARN, Logger::FIXME) << "/proc/sys/vm/overcommit_memory is set to '"
|
LOG_TOPIC(WARN, Logger::MEMORY)
|
||||||
<< v << "'. it is recommended to set it to a value of 0 or 1";
|
<< "/proc/sys/vm/overcommit_memory is set to '" << v
|
||||||
|
<< "'. It is recommended to set it to a value of 0 or 1";
|
||||||
|
LOG_TOPIC(WARN, Logger::MEMORY) << "execute 'sudo bash -c \"echo 0 > "
|
||||||
|
"/proc/sys/vm/overcommit_memory\"'";
|
||||||
}
|
}
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
// file not found or value not convertible into integer
|
// file not found or value not convertible into integer
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
std::string value = basics::FileUtils::slurp("/proc/sys/vm/zone_reclaim_mode");
|
std::string value =
|
||||||
|
basics::FileUtils::slurp("/proc/sys/vm/zone_reclaim_mode");
|
||||||
uint64_t v = basics::StringUtils::uint64(value);
|
uint64_t v = basics::StringUtils::uint64(value);
|
||||||
if (v != 0) {
|
if (v != 0) {
|
||||||
// from https://www.kernel.org/doc/Documentation/sysctl/vm.txt:
|
// from https://www.kernel.org/doc/Documentation/sysctl/vm.txt:
|
||||||
|
@ -85,31 +95,52 @@ void EnvironmentFeature::prepare() {
|
||||||
// This is value ORed together of
|
// This is value ORed together of
|
||||||
// 1 = Zone reclaim on
|
// 1 = Zone reclaim on
|
||||||
// 2 = Zone reclaim writes dirty pages out
|
// 2 = Zone reclaim writes dirty pages out
|
||||||
// 4 = Zone reclaim swaps pages
|
// 4 = Zone reclaim swaps pages
|
||||||
//
|
//
|
||||||
// https://www.poempelfox.de/blog/2010/03/19/
|
// https://www.poempelfox.de/blog/2010/03/19/
|
||||||
LOG_TOPIC(WARN, Logger::FIXME) << "/proc/sys/vm/zone_reclaim_mode is set to '"
|
LOG_TOPIC(WARN, Logger::PERFORMANCE)
|
||||||
<< v << "'. it is recommended to set it to a value of 0";
|
<< "/proc/sys/vm/zone_reclaim_mode is set to '" << v
|
||||||
|
<< "'. It is recommended to set it to a value of 0";
|
||||||
|
LOG_TOPIC(WARN, Logger::PERFORMANCE)
|
||||||
|
<< "execute 'sudo bash -c \"echo 0 > "
|
||||||
|
"/proc/sys/vm/zone_reclaim_mode\"'";
|
||||||
}
|
}
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
// file not found or value not convertible into integer
|
// file not found or value not convertible into integer
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
bool showHuge = false;
|
||||||
std::string value = basics::FileUtils::slurp("/sys/kernel/mm/transparent_hugepage/enabled");
|
std::vector<std::string> paths = {
|
||||||
size_t start = value.find('[');
|
"/sys/kernel/mm/transparent_hugepage/enabled",
|
||||||
size_t end = value.find(']');
|
"/sys/kernel/mm/transparent_hugepage/defrag"};
|
||||||
if (start != std::string::npos && end != std::string::npos && start < end && end - start >= 4) {
|
|
||||||
value = value.substr(start + 1, end - start - 1);
|
for (auto file : paths) {
|
||||||
if (value == "always") {
|
try {
|
||||||
LOG_TOPIC(WARN, Logger::FIXME) << "/sys/kernel/mm/transparent_hugepage/enabled is set to '"
|
std::string value = basics::FileUtils::slurp(file);
|
||||||
<< value << "'. it is recommended to set it to a value of 'never' or 'madvise'";
|
size_t start = value.find('[');
|
||||||
|
size_t end = value.find(']');
|
||||||
|
|
||||||
|
if (start != std::string::npos && end != std::string::npos &&
|
||||||
|
start < end && end - start >= 4) {
|
||||||
|
value = value.substr(start + 1, end - start - 1);
|
||||||
|
if (value == "always") {
|
||||||
|
LOG_TOPIC(WARN, Logger::MEMORY)
|
||||||
|
<< file << " is set to '" << value
|
||||||
|
<< "'. It is recommended to set it to a value of 'never' "
|
||||||
|
"or 'madvise'";
|
||||||
|
showHuge = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
} catch (...) {
|
||||||
|
// file not found
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (showHuge) {
|
||||||
|
for (auto file : paths) {
|
||||||
|
LOG_TOPIC(WARN, Logger::MEMORY)
|
||||||
|
<< "execute 'sudo bash -c \"echo madvise > " << file << "\"'";
|
||||||
}
|
}
|
||||||
} catch (...) {
|
|
||||||
// file not found
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -99,7 +99,7 @@ class MutexLocker {
|
||||||
|
|
||||||
#ifdef TRI_SHOW_LOCK_TIME
|
#ifdef TRI_SHOW_LOCK_TIME
|
||||||
if (_time > TRI_SHOW_LOCK_THRESHOLD) {
|
if (_time > TRI_SHOW_LOCK_THRESHOLD) {
|
||||||
LOG_TOPIC(WARN, arangodb::Logger::PERFORMANCE) << "MutexLocker " << _file << ":" << _line << " took " << _time << " s";
|
LOG_TOPIC(INFO, arangodb::Logger::PERFORMANCE) << "MutexLocker " << _file << ":" << _line << " took " << _time << " s";
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -101,7 +101,7 @@ class ReadLocker {
|
||||||
|
|
||||||
#ifdef TRI_SHOW_LOCK_TIME
|
#ifdef TRI_SHOW_LOCK_TIME
|
||||||
if (_time > TRI_SHOW_LOCK_THRESHOLD) {
|
if (_time > TRI_SHOW_LOCK_THRESHOLD) {
|
||||||
LOG_TOPIC(WARN, arangodb::Logger::PERFORMANCE) << "ReadLocker " << _file << ":" << _line << " took " << _time << " s";
|
LOG_TOPIC(INFO, arangodb::Logger::PERFORMANCE) << "ReadLocker " << _file << ":" << _line << " took " << _time << " s";
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -102,7 +102,7 @@ class WriteLocker {
|
||||||
|
|
||||||
#ifdef TRI_SHOW_LOCK_TIME
|
#ifdef TRI_SHOW_LOCK_TIME
|
||||||
if (_time > TRI_SHOW_LOCK_THRESHOLD) {
|
if (_time > TRI_SHOW_LOCK_THRESHOLD) {
|
||||||
LOG_TOPIC(WARN, arangodb::Logger::PERFORMANCE) << "WriteLocker " << _file << ":" << _line << " took " << _time << " s";
|
LOG_TOPIC(INFO, arangodb::Logger::PERFORMANCE) << "WriteLocker " << _file << ":" << _line << " took " << _time << " s";
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -52,9 +52,9 @@ LogTopic Logger::ENGINES("engines", LogLevel::INFO);
|
||||||
LogTopic Logger::FIXME("general", LogLevel::INFO);
|
LogTopic Logger::FIXME("general", LogLevel::INFO);
|
||||||
LogTopic Logger::GRAPHS("graphs", LogLevel::INFO);
|
LogTopic Logger::GRAPHS("graphs", LogLevel::INFO);
|
||||||
LogTopic Logger::HEARTBEAT("heartbeat", LogLevel::INFO);
|
LogTopic Logger::HEARTBEAT("heartbeat", LogLevel::INFO);
|
||||||
LogTopic Logger::MEMORY("memory", LogLevel::FATAL); // suppress
|
LogTopic Logger::MEMORY("memory", LogLevel::WARN);
|
||||||
LogTopic Logger::MMAP("mmap");
|
LogTopic Logger::MMAP("mmap");
|
||||||
LogTopic Logger::PERFORMANCE("performance", LogLevel::FATAL); // suppress
|
LogTopic Logger::PERFORMANCE("performance", LogLevel::WARN);
|
||||||
LogTopic Logger::PREGEL("pregel", LogLevel::INFO);
|
LogTopic Logger::PREGEL("pregel", LogLevel::INFO);
|
||||||
LogTopic Logger::QUERIES("queries", LogLevel::INFO);
|
LogTopic Logger::QUERIES("queries", LogLevel::INFO);
|
||||||
LogTopic Logger::REPLICATION("replication", LogLevel::INFO);
|
LogTopic Logger::REPLICATION("replication", LogLevel::INFO);
|
||||||
|
|
|
@ -347,10 +347,7 @@ std::string Version::getVerboseVersionString() {
|
||||||
<< " with ASAN"
|
<< " with ASAN"
|
||||||
#endif
|
#endif
|
||||||
<< ", using "
|
<< ", using "
|
||||||
#ifdef TRI_HAVE_TCMALLOC
|
#ifdef ARANGODB_HAVE_JEMALLOC
|
||||||
<< "tcmalloc, "
|
|
||||||
#endif
|
|
||||||
#ifdef TRI_HAVE_JEMALLOC
|
|
||||||
<< "jemalloc, "
|
<< "jemalloc, "
|
||||||
#endif
|
#endif
|
||||||
<< "VPack " << getVPackVersion() << ", "
|
<< "VPack " << getVPackVersion() << ", "
|
||||||
|
|
Loading…
Reference in New Issue