From: Zbigniew Jędrzejewski-Szmek Date: Thu, 30 Jul 2020 11:36:10 +0000 (+0200) Subject: pid1: stop limiting size of /dev/shm X-Git-Tag: v246~4 X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fsystemd.git;a=commitdiff_plain;h=b67ec8e5b2e1a74d7e9a3a2b3ac60b7b2e39d4ea pid1: stop limiting size of /dev/shm The explicit limit is dropped, which means that we return to the kernel default of 50% of RAM. See 362a55fc14 for a discussion why that is not as much as it seems. It turns out various applications need more space in /dev/shm and we would break them by imposing a low limit. While at it, rename the define and use a single macro for various tmpfs mounts. We don't really care what the purpose of the given tmpfs is, so it seems reasonable to use a single macro. This effectively reverts part of 7d85383edbab7. Fixes #16617. --- diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index 028c4de2802..feb88f3e6e5 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -61,51 +61,51 @@ typedef struct MountPoint { #endif static const MountPoint mount_table[] = { - { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL, MNT_FATAL|MNT_IN_CONTAINER }, - { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL, MNT_FATAL|MNT_IN_CONTAINER }, - { "devtmpfs", "/dev", "devtmpfs", "mode=755" TMPFS_LIMITS_DEV, MS_NOSUID|MS_NOEXEC|MS_STRICTATIME, + { "devtmpfs", "/dev", "devtmpfs", "mode=755" TMPFS_LIMITS_DEV, MS_NOSUID|MS_NOEXEC|MS_STRICTATIME, NULL, MNT_FATAL|MNT_IN_CONTAINER }, - { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL, MNT_NONE }, #if ENABLE_SMACK - { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV, mac_smack_use, MNT_FATAL }, - { "tmpfs", "/dev/shm", "tmpfs", "mode=1777,smackfsroot=*" TMPFS_LIMITS_DEV_SHM, MS_NOSUID|MS_NODEV|MS_STRICTATIME, + { "tmpfs", "/dev/shm", "tmpfs", "mode=1777,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME, mac_smack_use, MNT_FATAL }, #endif - { "tmpfs", "/dev/shm", "tmpfs", "mode=1777" TMPFS_LIMITS_DEV_SHM, MS_NOSUID|MS_NODEV|MS_STRICTATIME, + { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, NULL, MNT_FATAL|MNT_IN_CONTAINER }, - { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, + { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, NULL, MNT_IN_CONTAINER }, #if ENABLE_SMACK - { "tmpfs", "/run", "tmpfs", "mode=755,smackfsroot=*" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME, + { "tmpfs", "/run", "tmpfs", "mode=755,smackfsroot=*" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME, mac_smack_use, MNT_FATAL }, #endif - { "tmpfs", "/run", "tmpfs", "mode=755" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME, + { "tmpfs", "/run", "tmpfs", "mode=755" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME, NULL, MNT_FATAL|MNT_IN_CONTAINER }, - { "cgroup2", "/sys/fs/cgroup", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "cgroup2", "/sys/fs/cgroup", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV, cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE }, - { "cgroup2", "/sys/fs/cgroup", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "cgroup2", "/sys/fs/cgroup", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE }, - { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755" TMPFS_LIMITS_SYS_FS_CGROUP, MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, + { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755" TMPFS_LIMITS_SYS_FS_CGROUP, MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, - { "cgroup2", "/sys/fs/cgroup/unified", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "cgroup2", "/sys/fs/cgroup/unified", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV, cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE }, - { "cgroup2", "/sys/fs/cgroup/unified", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "cgroup2", "/sys/fs/cgroup/unified", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE }, - { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV, cg_is_legacy_wanted, MNT_IN_CONTAINER }, - { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV, cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, - { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL, MNT_NONE }, #if ENABLE_EFI - { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, is_efi_boot, MNT_NONE }, #endif - { "bpf", "/sys/fs/bpf", "bpf", "mode=700", MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "bpf", "/sys/fs/bpf", "bpf", "mode=700", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL, MNT_NONE, }, }; diff --git a/src/core/namespace.c b/src/core/namespace.c index 5865dfe93ea..36d5ff67aed 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -326,23 +326,21 @@ static int append_bind_mounts(MountEntry **p, const BindMount *binds, size_t n) } static int append_tmpfs_mounts(MountEntry **p, const TemporaryFileSystem *tmpfs, size_t n) { - size_t i; - int r; - assert(p); - for (i = 0; i < n; i++) { + for (size_t i = 0; i < n; i++) { const TemporaryFileSystem *t = tmpfs + i; _cleanup_free_ char *o = NULL, *str = NULL; unsigned long flags; bool ro = false; + int r; if (!path_is_absolute(t->path)) return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Path is not absolute: %s", t->path); - str = strjoin("mode=0755" TMPFS_LIMITS_TEMPORARY_FS ",", t->options); + str = strjoin("mode=0755" NESTED_TMPFS_LIMITS ",", t->options); if (!str) return -ENOMEM; diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index a48c7f62da2..5599c6a1b3b 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -549,7 +549,7 @@ int mount_all(const char *dest, MOUNT_IN_USERNS|MOUNT_MKDIR }, /* Then we list outer child mounts (i.e. mounts applied *before* entering user namespacing) */ - { "tmpfs", "/tmp", "tmpfs", "mode=1777" TMPFS_LIMITS_TMP, MS_NOSUID|MS_NODEV|MS_STRICTATIME, + { "tmpfs", "/tmp", "tmpfs", "mode=1777" NESTED_TMPFS_LIMITS, MS_NOSUID|MS_NODEV|MS_STRICTATIME, MOUNT_FATAL|MOUNT_APPLY_TMPFS_TMP|MOUNT_MKDIR }, { "tmpfs", "/sys", "tmpfs", "mode=555" TMPFS_LIMITS_SYS, MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS|MOUNT_MKDIR }, @@ -559,7 +559,7 @@ int mount_all(const char *dest, MOUNT_FATAL|MOUNT_MKDIR }, /* skipped if above was mounted */ { "tmpfs", "/dev", "tmpfs", "mode=755" TMPFS_LIMITS_DEV, MS_NOSUID|MS_STRICTATIME, MOUNT_FATAL|MOUNT_MKDIR }, - { "tmpfs", "/dev/shm", "tmpfs", "mode=1777" TMPFS_LIMITS_DEV_SHM, MS_NOSUID|MS_NODEV|MS_STRICTATIME, + { "tmpfs", "/dev/shm", "tmpfs", "mode=1777" NESTED_TMPFS_LIMITS, MS_NOSUID|MS_NODEV|MS_STRICTATIME, MOUNT_FATAL|MOUNT_MKDIR }, { "tmpfs", "/run", "tmpfs", "mode=755" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME, MOUNT_FATAL|MOUNT_MKDIR }, diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h index 7bad3743e0d..8fb597e7c03 100644 --- a/src/shared/mount-util.h +++ b/src/shared/mount-util.h @@ -21,13 +21,11 @@ * PID1 because 16MB of free space is required. */ #define TMPFS_LIMITS_RUN ",size=20%,nr_inodes=800k" -/* The limit used for various tmpfs mounts, but not /tmp itself. +/* The limit used for various nested tmpfs mounts, in paricular for guests started by systemd-nspawn. * 10% of RAM (using 16GB of RAM as a baseline) translates to 400k inodes (assuming 4k each) and 25% * translates to 1M inodes. - * /tmp is configured through a .mount unit file. */ -#define TMPFS_LIMITS_TMP ",size=10%,nr_inodes=400k" -#define TMPFS_LIMITS_DEV_SHM TMPFS_LIMITS_TMP -#define TMPFS_LIMITS_TEMPORARY_FS TMPFS_LIMITS_TMP + * (On the host, /tmp is configured through a .mount unit file.) */ +#define NESTED_TMPFS_LIMITS ",size=10%,nr_inodes=400k" /* More space for volatile root and /var */ #define TMPFS_LIMITS_VAR ",size=25%,nr_inodes=1m"