From 852b62507b22c0a986032a2c9fa9cc464a5b7bd2 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 10 Mar 2022 13:22:57 +0100 Subject: [PATCH] pid1,nspawn: raise default RLIMIT_MEMLOCK to 8M This mirrors a similar check in Linux kernel 5.16 (9dcc38e2813e0cd3b195940c98b181ce6ede8f20) that raised the RLIMIT_MEMLOCK to 8M. This change does two things: raise the default limit for nspawn containers (where we try to mimic closely what the kernel does), and bump it when running on old kernels which still have the lower setting. Fixes: #16300 See: https://lwn.net/Articles/876288/ --- man/systemd-system.conf.xml | 10 ++++++---- src/basic/def.h | 5 +++++ src/core/main.c | 7 +++++++ src/core/system.conf.in | 2 +- src/nspawn/nspawn.c | 30 +++++++++++++++--------------- 5 files changed, 34 insertions(+), 20 deletions(-) diff --git a/man/systemd-system.conf.xml b/man/systemd-system.conf.xml index 351662b7570..400e35c4574 100644 --- a/man/systemd-system.conf.xml +++ b/man/systemd-system.conf.xml @@ -485,17 +485,19 @@ Most of these settings are unset, which means the resource limits are inherited from the kernel or, if invoked in a container, from the container manager. However, the following have defaults: - DefaultLimitNOFILE= defaults to 1024:&HIGH_RLIMIT_NOFILE;. + DefaultLimitNOFILE= defaults to 1024:&HIGH_RLIMIT_NOFILE;. + DefaultLimitMEMLOCK= defaults to 8M. + DefaultLimitCORE= does not have a default but it is worth mentioning that RLIMIT_CORE is set to infinity by PID 1 which is inherited by its children. - - Note that the service manager internally increases RLIMIT_MEMLOCK for - itself, however the limit is reverted to the original value for child processes forked off. + Note that the service manager internally in PID 1 bumps RLIMIT_NOFILE and + RLIMIT_MEMLOCK to higher values, however the limit is reverted to the mentioned + defaults for all child processes forked off. diff --git a/src/basic/def.h b/src/basic/def.h index eccee3d3fac..ffd462c456f 100644 --- a/src/basic/def.h +++ b/src/basic/def.h @@ -57,8 +57,13 @@ #define CONF_PATHS_STRV(n) \ STRV_MAKE(CONF_PATHS(n)) +/* The limit for PID 1 itself (which is not inherited to children) */ #define HIGH_RLIMIT_MEMLOCK (1024ULL*1024ULL*64ULL) +/* Since kernel 5.16 the kernel default limit was raised to 8M. Let's adjust things on old kernels too, and + * in containers so that our children inherit that. */ +#define DEFAULT_RLIMIT_MEMLOCK (1024ULL*1024ULL*8ULL) + #define PLYMOUTH_SOCKET { \ .un.sun_family = AF_UNIX, \ .un.sun_path = "\0/org/freedesktop/plymouthd", \ diff --git a/src/core/main.c b/src/core/main.c index cffe37de2a2..a05c24dd03b 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -2310,6 +2310,13 @@ static void fallback_rlimit_memlock(const struct rlimit *saved_rlimit_memlock) { return; } + if (arg_system) { + /* Raise the default limit to 8M also on old kernels and in containers (8M is the kernel + * default for this since kernel 5.16) */ + rl->rlim_max = MAX(rl->rlim_max, (rlim_t) DEFAULT_RLIMIT_MEMLOCK); + rl->rlim_cur = MAX(rl->rlim_cur, (rlim_t) DEFAULT_RLIMIT_MEMLOCK); + } + arg_default_rlimit[RLIMIT_MEMLOCK] = rl; } diff --git a/src/core/system.conf.in b/src/core/system.conf.in index 67e55f10a29..e132b086a6e 100644 --- a/src/core/system.conf.in +++ b/src/core/system.conf.in @@ -66,7 +66,7 @@ #DefaultLimitNOFILE=1024:{{HIGH_RLIMIT_NOFILE}} #DefaultLimitAS= #DefaultLimitNPROC= -#DefaultLimitMEMLOCK= +#DefaultLimitMEMLOCK=8M #DefaultLimitLOCKS= #DefaultLimitSIGPENDING= #DefaultLimitMSGQUEUE= diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 144e58ae897..5102c16438f 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -5294,25 +5294,25 @@ static int run_container( } static int initialize_rlimits(void) { - /* The default resource limits the kernel passes to PID 1, as per kernel 4.16. Let's pass our container payload + /* The default resource limits the kernel passes to PID 1, as per kernel 5.16. Let's pass our container payload * the same values as the kernel originally passed to PID 1, in order to minimize differences between host and * container execution environments. */ static const struct rlimit kernel_defaults[_RLIMIT_MAX] = { - [RLIMIT_AS] = { RLIM_INFINITY, RLIM_INFINITY }, - [RLIMIT_CORE] = { 0, RLIM_INFINITY }, - [RLIMIT_CPU] = { RLIM_INFINITY, RLIM_INFINITY }, - [RLIMIT_DATA] = { RLIM_INFINITY, RLIM_INFINITY }, - [RLIMIT_FSIZE] = { RLIM_INFINITY, RLIM_INFINITY }, - [RLIMIT_LOCKS] = { RLIM_INFINITY, RLIM_INFINITY }, - [RLIMIT_MEMLOCK] = { 65536, 65536 }, - [RLIMIT_MSGQUEUE] = { 819200, 819200 }, - [RLIMIT_NICE] = { 0, 0 }, - [RLIMIT_NOFILE] = { 1024, 4096 }, - [RLIMIT_RSS] = { RLIM_INFINITY, RLIM_INFINITY }, - [RLIMIT_RTPRIO] = { 0, 0 }, - [RLIMIT_RTTIME] = { RLIM_INFINITY, RLIM_INFINITY }, - [RLIMIT_STACK] = { 8388608, RLIM_INFINITY }, + [RLIMIT_AS] = { RLIM_INFINITY, RLIM_INFINITY }, + [RLIMIT_CORE] = { 0, RLIM_INFINITY }, + [RLIMIT_CPU] = { RLIM_INFINITY, RLIM_INFINITY }, + [RLIMIT_DATA] = { RLIM_INFINITY, RLIM_INFINITY }, + [RLIMIT_FSIZE] = { RLIM_INFINITY, RLIM_INFINITY }, + [RLIMIT_LOCKS] = { RLIM_INFINITY, RLIM_INFINITY }, + [RLIMIT_MEMLOCK] = { DEFAULT_RLIMIT_MEMLOCK, DEFAULT_RLIMIT_MEMLOCK }, + [RLIMIT_MSGQUEUE] = { 819200, 819200 }, + [RLIMIT_NICE] = { 0, 0 }, + [RLIMIT_NOFILE] = { 1024, 4096 }, + [RLIMIT_RSS] = { RLIM_INFINITY, RLIM_INFINITY }, + [RLIMIT_RTPRIO] = { 0, 0 }, + [RLIMIT_RTTIME] = { RLIM_INFINITY, RLIM_INFINITY }, + [RLIMIT_STACK] = { 8388608, RLIM_INFINITY }, /* The kernel scales the default for RLIMIT_NPROC and RLIMIT_SIGPENDING based on the system's amount of * RAM. To provide best compatibility we'll read these limits off PID 1 instead of hardcoding them -- 2.47.3