From: Christian Brauner Date: Fri, 8 May 2026 08:48:12 +0000 (+0200) Subject: core: preserve RestrictFileSystemAccess= BPF state across daemon-reexec X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6c0f509587eb90f9d586d097500a66c2f33078d3;p=thirdparty%2Fsystemd.git core: preserve RestrictFileSystemAccess= BPF state across daemon-reexec The BPF link and .bss map FDs must survive PID1 re-execution (daemon-reexec, switch_root, soft-reboot). Without serialization, manager_free() closes them before execv, programs detach, and the verity_devices map is freed. After exec a fresh skeleton would have an empty map — but existing dm-verity devices have already called bdev_setintegrity and won't call it again. The result would be a deny-default policy with an empty map, i.e., all execution denied and the system bricked. Add serialize/deserialize support using systemd's existing serialize_fd / fdset_cloexec / deserialize_fd infrastructure: Before exec (in manager_serialize via bpf_restrict_fsaccess_serialize): - Dup each link FD and the .bss map FD into the FDSet - fdset_cloexec(fds, false) + execv() preserves them across exec After exec (in manager_deserialize + bpf_restrict_fsaccess_setup): - Deserialize the link FDs and .bss map FD into the Manager struct - bpf_restrict_fsaccess_setup() detects the deserialized FDs and skips skeleton re-creation entirely — the programs are already attached - If no longer in initrd, clear initramfs_s_dev in the kernel map No bpffs pinning is needed. This avoids a bpffs mount dependency and eliminates the external attack surface that pinned objects would create (discoverable/manipulable via unlink or BPF_OBJ_GET). The FDs remain private to PID1. Signed-off-by: Christian Brauner --- diff --git a/src/core/bpf-restrict-fsaccess.c b/src/core/bpf-restrict-fsaccess.c index 35bb2b86b11..dc8a7d63a75 100644 --- a/src/core/bpf-restrict-fsaccess.c +++ b/src/core/bpf-restrict-fsaccess.c @@ -12,6 +12,7 @@ #include "lsm-util.h" #include "manager.h" #include "memory-util.h" +#include "serialize.h" #include "string-table.h" /* DMVERITY_DEVICES_MAX lives in bpf-restrict-fsaccess.h for sharing with tests. */ @@ -141,6 +142,27 @@ bool bpf_restrict_fsaccess_supported(void) { return (supported = true); } +/* Partial deserialization (some FDs but not all) is fatal: continuing + * would leave enforcement incomplete. */ +static int restrict_fsaccess_have_deserialized_fds(Manager *m) { + size_t count = 0; + + assert(m); + + FOREACH_ELEMENT(fd, m->restrict_fsaccess_link_fds) + if (*fd >= 0) + count++; + + if (count == 0) + return 0; + if (count == ELEMENTSOF(m->restrict_fsaccess_link_fds)) + return 1; + + return log_error_errno(SYNTHETIC_ERRNO(EBADFD), + "bpf-restrict-fsaccess: Only %zu of %zu link FDs deserialized, refusing to continue with partial enforcement.", + count, ELEMENTSOF(m->restrict_fsaccess_link_fds)); +} + /* Close the initramfs trust window after switch_root by clearing initramfs_s_dev * in the BPF .bss map. The .bss is a BPF_F_MMAPABLE array map — mmap it and do * a single aligned 4-byte store instead of a full-value read-modify-write via @@ -169,6 +191,68 @@ static int restrict_fsaccess_clear_initramfs_trust(int bss_map_fd) { return 0; } +static int bpf_get_map_id(int fd, uint32_t *ret_id) { + struct bpf_map_info info = {}; + uint32_t len = sizeof(info); + int r; + + if (fd < 0) + return -EBADF; + + assert(ret_id); + + r = sym_bpf_obj_get_info_by_fd(fd, &info, &len); + if (r < 0) + return r; + + *ret_id = info.id; + return 0; +} + +/* Validate that deserialized FDs actually reference our LSM BPF links. A + * corrupted serialization file could leave FDs pointing at arbitrary kernel + * objects; a stale FD could point at a BPF link of an entirely different type + * (e.g. kprobe-multi). Verify both link type and attach type so a substituted + * FD that happens to be a BPF link still fails the check. */ +static int restrict_fsaccess_validate_deserialized_fds(Manager *m) { + int r; + + assert(m); + + r = dlopen_bpf(LOG_WARNING); + if (r < 0) + return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), + "bpf-restrict-fsaccess: Failed to load libbpf for FD validation, aborting."); + + FOREACH_ELEMENT(fd, m->restrict_fsaccess_link_fds) { + struct bpf_link_info info = {}; + uint32_t len = sizeof(info); + const char *name = restrict_fsaccess_link_names[fd - m->restrict_fsaccess_link_fds]; + + r = sym_bpf_obj_get_info_by_fd(*fd, &info, &len); + if (r < 0) + return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), + "bpf-restrict-fsaccess: Deserialized FD for %s is not a valid BPF object, aborting.", + name); + + if (info.type != BPF_LINK_TYPE_TRACING || info.tracing.attach_type != BPF_LSM_MAC) + return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), + "bpf-restrict-fsaccess: Deserialized FD for %s is not an LSM tracing link (type=%u attach=%u), aborting.", + name, info.type, info.tracing.attach_type); + } + + if (m->restrict_fsaccess_bss_map_fd >= 0) { + uint32_t id; + + r = bpf_get_map_id(m->restrict_fsaccess_bss_map_fd, &id); + if (r < 0) + return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), + "bpf-restrict-fsaccess: Deserialized FD for .bss map is not a valid BPF map, aborting."); + } + + return 0; +} + int bpf_restrict_fsaccess_setup(Manager *m) { _cleanup_(restrict_fsaccess_bpf_freep) struct restrict_fsaccess_bpf *obj = NULL; int r; @@ -178,6 +262,27 @@ int bpf_restrict_fsaccess_setup(Manager *m) { if (!MANAGER_IS_SYSTEM(m) || m->restrict_filesystem_access <= RESTRICT_FILESYSTEM_ACCESS_NO) return 0; + r = restrict_fsaccess_have_deserialized_fds(m); + if (r < 0) + return r; + if (r > 0) { + log_info("bpf-restrict-fsaccess: Recovered link FDs from previous exec, programs still attached."); + + r = restrict_fsaccess_validate_deserialized_fds(m); + if (r < 0) + return r; + if (m->switching_root) { + if (m->restrict_fsaccess_bss_map_fd < 0) + return log_error_errno(SYNTHETIC_ERRNO(EBADF), + "bpf-restrict-fsaccess: Cannot clear initramfs trust after switch_root."); + r = restrict_fsaccess_clear_initramfs_trust(m->restrict_fsaccess_bss_map_fd); + if (r < 0) + return r; + } + + return 0; + } + /* Fresh setup: verify BPF LSM is available */ if (!bpf_restrict_fsaccess_supported()) return log_warning_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), @@ -265,6 +370,29 @@ int bpf_restrict_fsaccess_close_initramfs_trust(Manager *m) { return restrict_fsaccess_clear_initramfs_trust(m->restrict_fsaccess_bss_map_fd); } +int bpf_restrict_fsaccess_serialize(Manager *m, FILE *f, FDSet *fds) { + int r; + + assert(m); + assert(f); + assert(fds); + + if (!MANAGER_IS_SYSTEM(m) || m->restrict_filesystem_access <= RESTRICT_FILESYSTEM_ACCESS_NO) + return 0; + + FOREACH_ELEMENT(fd, m->restrict_fsaccess_link_fds) { + r = serialize_fd(f, fds, restrict_fsaccess_link_names[fd - m->restrict_fsaccess_link_fds], *fd); + if (r < 0) + return r; + } + + r = serialize_fd(f, fds, "restrict-fsaccess-bss-map", m->restrict_fsaccess_bss_map_fd); + if (r < 0) + return r; + + return 0; +} + #else /* ! BPF_FRAMEWORK || ! HAVE_LSM_INTEGRITY_TYPE */ bool bpf_restrict_fsaccess_supported(void) { @@ -283,4 +411,8 @@ int bpf_restrict_fsaccess_close_initramfs_trust(Manager *m) { return 0; } +int bpf_restrict_fsaccess_serialize(Manager *m, FILE *f, FDSet *fds) { + return 0; +} + #endif diff --git a/src/core/bpf-restrict-fsaccess.h b/src/core/bpf-restrict-fsaccess.h index 7abbb7d3c61..8a0a9cf2677 100644 --- a/src/core/bpf-restrict-fsaccess.h +++ b/src/core/bpf-restrict-fsaccess.h @@ -47,3 +47,4 @@ bool bpf_restrict_fsaccess_supported(void); int bpf_restrict_fsaccess_setup(Manager *m); int bpf_restrict_fsaccess_close_initramfs_trust(Manager *m); +int bpf_restrict_fsaccess_serialize(Manager *m, FILE *f, FDSet *fds); diff --git a/src/core/manager-serialize.c b/src/core/manager-serialize.c index c7948881648..528540e57e2 100644 --- a/src/core/manager-serialize.c +++ b/src/core/manager-serialize.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #include "alloc-util.h" +#include "bpf-restrict-fsaccess.h" #include "dbus.h" #include "dynamic-user.h" #include "fd-util.h" @@ -180,6 +181,10 @@ int manager_serialize( if (r < 0) return r; + r = bpf_restrict_fsaccess_serialize(m, f, fds); + if (r < 0) + return r; + (void) fputc('\n', f); HASHMAP_FOREACH_KEY(u, t, m->units) { @@ -386,6 +391,38 @@ static void manager_deserialize_gid_refs_one(Manager *m, const char *value) { manager_deserialize_uid_refs_one_internal(&m->gid_refs, value); } +static void deserialize_restrict_fsaccess(Manager *m, const char *l, FDSet *fds) { + const char *val; + int fd; + + FOREACH_ELEMENT(name, restrict_fsaccess_link_names) { + val = startswith(l, *name); + if (!val) + continue; + val = startswith(val, "="); + if (!val) + continue; + fd = deserialize_fd(fds, val); + if (fd < 0) { + log_warning_errno(fd, "bpf-restrict-fsaccess: Failed to deserialize FD for %s: %m", *name); + return; + } + close_and_replace(m->restrict_fsaccess_link_fds[name - restrict_fsaccess_link_names], fd); + return; + } + + val = startswith(l, "restrict-fsaccess-bss-map="); + if (!val) + return; + + fd = deserialize_fd(fds, val); + if (fd < 0) { + log_warning_errno(fd, "bpf-restrict-fsaccess: Failed to deserialize FD for .bss map: %m"); + return; + } + close_and_replace(m->restrict_fsaccess_bss_map_fd, fd); +} + int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { int r; @@ -616,7 +653,9 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { else (void) varlink_server_deserialize_one(m->varlink_server, val, fds); - } else if ((val = startswith(l, "dump-ratelimit="))) + } else if (startswith(l, "restrict-fsaccess-")) + deserialize_restrict_fsaccess(m, l, fds); + else if ((val = startswith(l, "dump-ratelimit="))) deserialize_ratelimit(&m->dump_ratelimit, "dump-ratelimit", val); else if ((val = startswith(l, "reload-reexec-ratelimit="))) deserialize_ratelimit(&m->reload_reexec_ratelimit, "reload-reexec-ratelimit", val); diff --git a/src/shared/bpf-dlopen.c b/src/shared/bpf-dlopen.c index 1d2fdef781e..c7d9dbdd5bf 100644 --- a/src/shared/bpf-dlopen.c +++ b/src/shared/bpf-dlopen.c @@ -38,6 +38,7 @@ DLSYM_PROTOTYPE(bpf_map_delete_elem) = NULL; DLSYM_PROTOTYPE(bpf_map_get_fd_by_id) = NULL; DLSYM_PROTOTYPE(bpf_map_lookup_elem) = NULL; DLSYM_PROTOTYPE(bpf_map_update_elem) = NULL; +DLSYM_PROTOTYPE(bpf_obj_get_info_by_fd) = NULL; DLSYM_PROTOTYPE(bpf_object__attach_skeleton) = NULL; DLSYM_PROTOTYPE(bpf_object__destroy_skeleton) = NULL; DLSYM_PROTOTYPE(bpf_object__detach_skeleton) = NULL; @@ -154,6 +155,7 @@ int dlopen_bpf(int log_level) { DLSYM_ARG(bpf_map_get_fd_by_id), DLSYM_ARG(bpf_map_lookup_elem), DLSYM_ARG(bpf_map_update_elem), + DLSYM_ARG(bpf_obj_get_info_by_fd), DLSYM_ARG(bpf_object__attach_skeleton), DLSYM_ARG(bpf_object__destroy_skeleton), DLSYM_ARG(bpf_object__detach_skeleton), diff --git a/src/shared/bpf-dlopen.h b/src/shared/bpf-dlopen.h index b3d14f9b5f4..71e6ca5d1d6 100644 --- a/src/shared/bpf-dlopen.h +++ b/src/shared/bpf-dlopen.h @@ -25,6 +25,7 @@ extern DLSYM_PROTOTYPE(bpf_map_delete_elem); extern DLSYM_PROTOTYPE(bpf_map_get_fd_by_id); extern DLSYM_PROTOTYPE(bpf_map_lookup_elem); extern DLSYM_PROTOTYPE(bpf_map_update_elem); +extern DLSYM_PROTOTYPE(bpf_obj_get_info_by_fd); /* The *_skeleton APIs are autogenerated by bpftool, the targets can be found * in ./build/src/core/bpf/socket-bind/socket-bind.skel.h */ extern DLSYM_PROTOTYPE(bpf_object__attach_skeleton);