From: Frantisek Sumsal Date: Wed, 25 Feb 2026 18:13:37 +0000 (+0100) Subject: nspawn: actually mask certain files under /proc/ X-Git-Tag: v260-rc2~34 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=83b8daa032cd0adb538cfd9467e6acf2c44aa661;p=thirdparty%2Fsystemd.git nspawn: actually mask certain files under /proc/ /run/systemd/inaccessible/ exists only on host - in the container we have /run/host/inaccessible/, and since all the inaccessible mounts have MOUNT_IN_USERNS we need to use the latter one, otherwise the masking gets silently skipped: ~# SYSTEMD_LOG_LEVEL=debug systemd-nspawn -q --directory=foo ls -la /proc/kallsyms ... Bind-mounting /run/systemd/inaccessible/reg on /proc/kallsyms (MS_BIND "")... Failed to mount /run/systemd/inaccessible/reg (type n/a) on /proc/kallsyms (MS_BIND ""): No such file or directory Changing mount flags /proc/kallsyms (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_REMOUNT|MS_BIND "")... Failed to mount n/a (type n/a) on /proc/kallsyms (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_REMOUNT|MS_BIND ""): Invalid argument Bind-mounting /run/systemd/inaccessible/reg on /proc/kcore (MS_BIND "")... Failed to mount /run/systemd/inaccessible/reg (type n/a) on /proc/kcore (MS_BIND ""): No such file or directory Changing mount flags /proc/kcore (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_REMOUNT|MS_BIND "")... Failed to mount n/a (type n/a) on /proc/kcore (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_REMOUNT|MS_BIND ""): Invalid argument ... Inner child finished, invoking payload. -r--r--r--. 1 root root 0 Feb 25 13:19 /proc/kallsyms --- diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index cfb4aac6ff3..282a29c359f 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -534,7 +534,7 @@ int mount_all(const char *dest, const char *selinux_apifs_context) { #define PROC_INACCESSIBLE_REG(path) \ - { "/run/systemd/inaccessible/reg", (path), NULL, NULL, MS_BIND, \ + { "/run/host/inaccessible/reg", (path), NULL, NULL, MS_BIND, \ MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ \ { NULL, (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \ MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */ diff --git a/test/units/TEST-13-NSPAWN.nspawn.sh b/test/units/TEST-13-NSPAWN.nspawn.sh index c2fa9eaaf89..c753734c331 100755 --- a/test/units/TEST-13-NSPAWN.nspawn.sh +++ b/test/units/TEST-13-NSPAWN.nspawn.sh @@ -396,6 +396,46 @@ EOF (! systemd-nspawn --rlimit==) } +testcase_check_default_inaccessible_paths() { + local root container inaccessible_paths path exp + + # Taken from src/nspawn/nspawn-mount.c:mount_all() + inaccessible_paths=( + "/proc/kallsyms" + "/proc/kcore" + "/proc/keys" + "/proc/sysrq-trigger" + "/proc/timer_list" + ) + + root="$(mktemp -d /var/lib/machines/TEST-13-NSPAWN.default_inaccessible_paths.XXX)" + container="$(basename "$root")" + create_dummy_container "$root" + + # Each inaccessible path should have zeroed permissions, which stat's %a reports as a single 0 + for path in "${inaccessible_paths[@]}"; do + systemd-nspawn --directory="$root" \ + bash -xec "ls -l $path; [[ \$(stat --format=%a $path) -eq 0 ]]" + done + + # SYSTEMD_NSPAWN_API_VFS_WRITABLE=yes mounts certain API directories under /sys/ and /proc/sys/ + # as writable, and it also skips the path masking (by dropping the MOUNT_APPLY_APIVFS_RO flag) + for path in "${inaccessible_paths[@]}"; do + exp="$(stat --format=%a "$path")" + SYSTEMD_NSPAWN_API_VFS_WRITABLE=yes systemd-nspawn --directory="$root" \ + bash -xec "ls -l $path; [[ \$(stat --format=%a $path) -eq $exp ]]" + done + + # SYSTEMD_NSPAWN_API_VFS_WRITABLE=network mounts only /proc/sys/net/ as writable but doesn't + # drop the MOUNT_APPLY_APIVFS_RO flag, so the masking should still apply + for path in "${inaccessible_paths[@]}"; do + SYSTEMD_NSPAWN_API_VFS_WRITABLE=network systemd-nspawn --directory="$root" \ + bash -xec "ls -l $path; [[ \$(stat --format=%a $path) -eq 0 ]]" + done + + rm -fr "$root" +} + nspawn_settings_cleanup() { for dev in sd-host-only sd-shared{1,2,3} sd-macvlan{1,2} sd-ipvlan{1,2}; do ip link del "$dev" || :