1
0
Fork 0

Feature/hugepages (#2497)

* enabled checks for memory management

* added MADV_NOHUGEPAGE

* added documentation about NUMA and file systems
This commit is contained in:
Frank Celler 2017-05-25 16:04:23 +02:00 committed by GitHub
parent 6bdc91c1b1
commit 4800a8b7f6
14 changed files with 159 additions and 38 deletions

View File

@ -143,6 +143,12 @@ void* adb_mmap(void* addr, size_t length, int prot, int flags) {
__atomic_add_fetch(&adb_total_size, (uint64_t)length, __ATOMIC_SEQ_CST);
}
#ifdef MADV_NOHUGEPAGE
if (ret != MAP_FAILED) {
madvise(ret, length, MADV_NOHUGEPAGE);
}
#endif
return ret;
}
}

View File

@ -1,6 +1,8 @@
devel
-----
* added checks for memory managment and warn (i. e. if hugepages are enabled)
* set default SSL cipher suite string to "HIGH:!EXPORT:!aNULL@STRENGTH"
* fixed issue #2469: Authentication = true does not protect foxx-routes

View File

@ -1,9 +1,23 @@
Operating System Configuration
==============================
File Systems
------------
(LINUX)
We recommend **not** to use BTRFS on linux, it's known to not work
well in conjunction with ArangoDB. We experienced that arangodb
facing latency issues on accessing its database files on BTRFS
partitions. In conjunction with BTRFS and AUFS we also saw data loss
on restart.
Virtual Memory Page Sizes
--------------------------
(LINUX)
By default, ArangoDB uses Jemalloc as the memory allocator. Jemalloc does a good
job of reducing virtual memory fragmentation, especially for long-running
processes. Unfortunately, some OS configurations can interfere with Jemalloc's
@ -14,18 +28,64 @@ high memory use. Therefore, we recommend disabling these features when using
Jemalloc with ArangoDB. Please consult your operating system's documentation for
how to do this.
Execute
sudo bash -c "echo madvise >/sys/kernel/mm/transparent_hugepage/enabled"
sudo bash -c "echo madvise >/sys/kernel/mm/transparent_hugepage/defrag"
before executing `arangod`.
Swap Space
----------
(LINUX)
It is recommended to assign swap space for a server that is running arangod.
Configuring swap space can prevent the operating system's OOM killer from
killing ArangoDB too eagerly on Linux.
### Over-Commit Memory
Execute
sudo bash -c "echo 0 >/proc/sys/vm/overcommit_memory"
before executing `arangod`.
From [www.kernel.org](https://www.kernel.org/doc/Documentation/sysctl/vm.txt):
- When this flag is 0, the kernel attempts to estimate the amount
of free memory left when userspace requests more memory.
- When this flag is 1, the kernel pretends there is always enough
memory until it actually runs out.
- When this flag is 2, the kernel uses a "never overcommit"
policy that attempts to prevent any overcommit of memory.
### Zone Reclaim
Execute
sudo bash -c "echo 0 >/proc/sys/vm/overcommit_memory"
before executing `arangod`.
From [www.kernel.org](https://www.kernel.org/doc/Documentation/sysctl/vm.txt):
This is value ORed together of
- 1 = Zone reclaim on
- 2 = Zone reclaim writes dirty pages out
- 4 = Zone reclaim swaps pages
NUMA
----
Multi-prozessor systems often have non-uniform Access Memory (NUMA). ArangoDB
should be started with interleave on such system. This can be archived using
numactl --interleave=all arangod ...
Environment Variables
---------------------
@ -34,3 +94,18 @@ It is recommended to set the environment variable `GLIBCXX_FORCE_NEW` to 1 on
systems that use glibc++ in order to disable the memory pooling built into
glibc++. That memory pooling is unnecessary because Jemalloc will already do
memory pooling.
Execute
export GLIBCXX_FORCE_NEW=1
before starting `arangod`.
32bit
-----
While it is possible to compile ArangoDB on 32bit system, this is not a
recommended environment. 64bit systems can address a significantly bigger
memory region.

View File

@ -37,6 +37,8 @@ start () {
( cd /var/run/arangodb && chown -R arangodb:arangodb . ) || exit 1
( cd $PIDDIR && chown arangodb:arangodb . ) || exit 1
export GLIBCXX_FORCE_NEW=1
if [ "$1" = "--database.auto-upgrade" ]; then
$DAEMON -c $CONF --uid arangodb --gid arangodb --server.rest-server false $@
RETVAL=$?

View File

@ -37,6 +37,8 @@ start () {
ulimit -H -n 131072 || true
ulimit -S -n 131072 || true
export GLIBCXX_FORCE_NEW=1
case "$1" in
"--upgrade")
shift

View File

@ -35,6 +35,8 @@ start() {
ulimit -H -n 131072 || true
ulimit -S -n 131072 || true
export GLIBCXX_FORCE_NEW=1
case "$1" in
"--upgrade")
shift

View File

@ -55,6 +55,8 @@ start () {
ulimit -H -n 131072 || true
ulimit -S -n 131072 || true
export GLIBCXX_FORCE_NEW=1
case "$1" in
"--upgrade")
shift

View File

@ -55,6 +55,8 @@ start () {
ulimit -H -n 131072 || true
ulimit -S -n 131072 || true
export GLIBCXX_FORCE_NEW=1
case "$1" in
"--upgrade")
shift

View File

@ -38,29 +38,35 @@ EnvironmentFeature::EnvironmentFeature(
}
void EnvironmentFeature::prepare() {
#if 0
if (sizeof(void*) == 4) {
// 32 bit build
LOG_TOPIC(WARN, arangodb::Logger::FIXME) << "this is a 32 bit build of ArangoDB. "
<< "it is recommended to run a 64 bit build instead because it can "
<< "address significantly bigger regions of memory";
LOG_TOPIC(WARN, arangodb::Logger::MEMORY)
<< "this is a 32 bit build of ArangoDB. "
<< "it is recommended to run a 64 bit build instead because it can "
<< "address significantly bigger regions of memory";
}
#ifdef __linux__
#ifdef __GLIBC__
char const* v = getenv("GLIBCXX_FORCE_NEW");
if (v == nullptr) {
// environment variable not set
LOG_TOPIC(DEBUG, arangodb::Logger::FIXME) << "environment variable GLIBCXX_FORCE_NEW' is not set. "
<< "it is recommended to set it to some value to avoid memory pooling in glibc++";
LOG_TOPIC(WARN, arangodb::Logger::MEMORY)
<< "environment variable GLIBCXX_FORCE_NEW' is not set. "
<< "it is recommended to set it to some value to avoid memory pooling "
"in glibc++";
LOG_TOPIC(WARN, arangodb::Logger::MEMORY)
<< "execute 'export GLIBCXX_FORCE_NEW=1'";
}
#endif
try {
std::string value = basics::FileUtils::slurp("/proc/sys/vm/overcommit_memory");
std::string value =
basics::FileUtils::slurp("/proc/sys/vm/overcommit_memory");
uint64_t v = basics::StringUtils::uint64(value);
if (v == 2) {
if (v != 0 && v != 1) {
// from https://www.kernel.org/doc/Documentation/sysctl/vm.txt:
//
// When this flag is 0, the kernel attempts to estimate the amount
@ -69,15 +75,19 @@ void EnvironmentFeature::prepare() {
// memory until it actually runs out.
// When this flag is 2, the kernel uses a "never overcommit"
// policy that attempts to prevent any overcommit of memory.
LOG_TOPIC(WARN, Logger::FIXME) << "/proc/sys/vm/overcommit_memory is set to '"
<< v << "'. it is recommended to set it to a value of 0 or 1";
LOG_TOPIC(WARN, Logger::MEMORY)
<< "/proc/sys/vm/overcommit_memory is set to '" << v
<< "'. It is recommended to set it to a value of 0 or 1";
LOG_TOPIC(WARN, Logger::MEMORY) << "execute 'sudo bash -c \"echo 0 > "
"/proc/sys/vm/overcommit_memory\"'";
}
} catch (...) {
// file not found or value not convertible into integer
}
try {
std::string value = basics::FileUtils::slurp("/proc/sys/vm/zone_reclaim_mode");
std::string value =
basics::FileUtils::slurp("/proc/sys/vm/zone_reclaim_mode");
uint64_t v = basics::StringUtils::uint64(value);
if (v != 0) {
// from https://www.kernel.org/doc/Documentation/sysctl/vm.txt:
@ -85,31 +95,52 @@ void EnvironmentFeature::prepare() {
// This is value ORed together of
// 1 = Zone reclaim on
// 2 = Zone reclaim writes dirty pages out
// 4 = Zone reclaim swaps pages
//
// 4 = Zone reclaim swaps pages
//
// https://www.poempelfox.de/blog/2010/03/19/
LOG_TOPIC(WARN, Logger::FIXME) << "/proc/sys/vm/zone_reclaim_mode is set to '"
<< v << "'. it is recommended to set it to a value of 0";
LOG_TOPIC(WARN, Logger::PERFORMANCE)
<< "/proc/sys/vm/zone_reclaim_mode is set to '" << v
<< "'. It is recommended to set it to a value of 0";
LOG_TOPIC(WARN, Logger::PERFORMANCE)
<< "execute 'sudo bash -c \"echo 0 > "
"/proc/sys/vm/zone_reclaim_mode\"'";
}
} catch (...) {
// file not found or value not convertible into integer
}
try {
std::string value = basics::FileUtils::slurp("/sys/kernel/mm/transparent_hugepage/enabled");
size_t start = value.find('[');
size_t end = value.find(']');
if (start != std::string::npos && end != std::string::npos && start < end && end - start >= 4) {
value = value.substr(start + 1, end - start - 1);
if (value == "always") {
LOG_TOPIC(WARN, Logger::FIXME) << "/sys/kernel/mm/transparent_hugepage/enabled is set to '"
<< value << "'. it is recommended to set it to a value of 'never' or 'madvise'";
bool showHuge = false;
std::vector<std::string> paths = {
"/sys/kernel/mm/transparent_hugepage/enabled",
"/sys/kernel/mm/transparent_hugepage/defrag"};
for (auto file : paths) {
try {
std::string value = basics::FileUtils::slurp(file);
size_t start = value.find('[');
size_t end = value.find(']');
if (start != std::string::npos && end != std::string::npos &&
start < end && end - start >= 4) {
value = value.substr(start + 1, end - start - 1);
if (value == "always") {
LOG_TOPIC(WARN, Logger::MEMORY)
<< file << " is set to '" << value
<< "'. It is recommended to set it to a value of 'never' "
"or 'madvise'";
showHuge = true;
}
}
} catch (...) {
// file not found
}
}
if (showHuge) {
for (auto file : paths) {
LOG_TOPIC(WARN, Logger::MEMORY)
<< "execute 'sudo bash -c \"echo madvise > " << file << "\"'";
}
} catch (...) {
// file not found
}
#endif
#endif
}

View File

@ -99,7 +99,7 @@ class MutexLocker {
#ifdef TRI_SHOW_LOCK_TIME
if (_time > TRI_SHOW_LOCK_THRESHOLD) {
LOG_TOPIC(WARN, arangodb::Logger::PERFORMANCE) << "MutexLocker " << _file << ":" << _line << " took " << _time << " s";
LOG_TOPIC(INFO, arangodb::Logger::PERFORMANCE) << "MutexLocker " << _file << ":" << _line << " took " << _time << " s";
}
#endif
}

View File

@ -101,7 +101,7 @@ class ReadLocker {
#ifdef TRI_SHOW_LOCK_TIME
if (_time > TRI_SHOW_LOCK_THRESHOLD) {
LOG_TOPIC(WARN, arangodb::Logger::PERFORMANCE) << "ReadLocker " << _file << ":" << _line << " took " << _time << " s";
LOG_TOPIC(INFO, arangodb::Logger::PERFORMANCE) << "ReadLocker " << _file << ":" << _line << " took " << _time << " s";
}
#endif
}

View File

@ -102,7 +102,7 @@ class WriteLocker {
#ifdef TRI_SHOW_LOCK_TIME
if (_time > TRI_SHOW_LOCK_THRESHOLD) {
LOG_TOPIC(WARN, arangodb::Logger::PERFORMANCE) << "WriteLocker " << _file << ":" << _line << " took " << _time << " s";
LOG_TOPIC(INFO, arangodb::Logger::PERFORMANCE) << "WriteLocker " << _file << ":" << _line << " took " << _time << " s";
}
#endif
}

View File

@ -52,9 +52,9 @@ LogTopic Logger::ENGINES("engines", LogLevel::INFO);
LogTopic Logger::FIXME("general", LogLevel::INFO);
LogTopic Logger::GRAPHS("graphs", LogLevel::INFO);
LogTopic Logger::HEARTBEAT("heartbeat", LogLevel::INFO);
LogTopic Logger::MEMORY("memory", LogLevel::FATAL); // suppress
LogTopic Logger::MEMORY("memory", LogLevel::WARN);
LogTopic Logger::MMAP("mmap");
LogTopic Logger::PERFORMANCE("performance", LogLevel::FATAL); // suppress
LogTopic Logger::PERFORMANCE("performance", LogLevel::WARN);
LogTopic Logger::PREGEL("pregel", LogLevel::INFO);
LogTopic Logger::QUERIES("queries", LogLevel::INFO);
LogTopic Logger::REPLICATION("replication", LogLevel::INFO);

View File

@ -347,10 +347,7 @@ std::string Version::getVerboseVersionString() {
<< " with ASAN"
#endif
<< ", using "
#ifdef TRI_HAVE_TCMALLOC
<< "tcmalloc, "
#endif
#ifdef TRI_HAVE_JEMALLOC
#ifdef ARANGODB_HAVE_JEMALLOC
<< "jemalloc, "
#endif
<< "VPack " << getVPackVersion() << ", "