]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: preserve RestrictFileSystemAccess= BPF state across daemon-reexec
authorChristian Brauner <brauner@kernel.org>
Fri, 8 May 2026 08:48:12 +0000 (10:48 +0200)
committerChristian Brauner <brauner@kernel.org>
Wed, 13 May 2026 08:36:12 +0000 (10:36 +0200)
The BPF link and .bss map FDs must survive PID1 re-execution
(daemon-reexec, switch_root, soft-reboot). Without serialization,
manager_free() closes them before execv, programs detach, and the
verity_devices map is freed. After exec a fresh skeleton would have
an empty map — but existing dm-verity devices have already called
bdev_setintegrity and won't call it again. The result would be a
deny-default policy with an empty map, i.e., all execution denied
and the system bricked.

Add serialize/deserialize support using systemd's existing
serialize_fd / fdset_cloexec / deserialize_fd infrastructure:

Before exec (in manager_serialize via bpf_restrict_fsaccess_serialize):
  - Dup each link FD and the .bss map FD into the FDSet
  - fdset_cloexec(fds, false) + execv() preserves them across exec

After exec (in manager_deserialize + bpf_restrict_fsaccess_setup):
  - Deserialize the link FDs and .bss map FD into the Manager struct
  - bpf_restrict_fsaccess_setup() detects the deserialized FDs and skips
    skeleton re-creation entirely — the programs are already attached
  - If no longer in initrd, clear initramfs_s_dev in the kernel map

No bpffs pinning is needed. This avoids a bpffs mount dependency and
eliminates the external attack surface that pinned objects would create
(discoverable/manipulable via unlink or BPF_OBJ_GET). The FDs remain
private to PID1.

Signed-off-by: Christian Brauner <brauner@kernel.org>
src/core/bpf-restrict-fsaccess.c
src/core/bpf-restrict-fsaccess.h
src/core/manager-serialize.c
src/shared/bpf-dlopen.c
src/shared/bpf-dlopen.h

index 35bb2b86b11f837b3f0f9a5967c41fa3360d6d74..dc8a7d63a755caabc91194184786494a11e2c9be 100644 (file)
@@ -12,6 +12,7 @@
 #include "lsm-util.h"
 #include "manager.h"
 #include "memory-util.h"
+#include "serialize.h"
 #include "string-table.h"
 
 /* DMVERITY_DEVICES_MAX lives in bpf-restrict-fsaccess.h for sharing with tests. */
@@ -141,6 +142,27 @@ bool bpf_restrict_fsaccess_supported(void) {
         return (supported = true);
 }
 
+/* Partial deserialization (some FDs but not all) is fatal: continuing
+ * would leave enforcement incomplete. */
+static int restrict_fsaccess_have_deserialized_fds(Manager *m) {
+        size_t count = 0;
+
+        assert(m);
+
+        FOREACH_ELEMENT(fd, m->restrict_fsaccess_link_fds)
+                if (*fd >= 0)
+                        count++;
+
+        if (count == 0)
+                return 0;
+        if (count == ELEMENTSOF(m->restrict_fsaccess_link_fds))
+                return 1;
+
+        return log_error_errno(SYNTHETIC_ERRNO(EBADFD),
+                               "bpf-restrict-fsaccess: Only %zu of %zu link FDs deserialized, refusing to continue with partial enforcement.",
+                               count, ELEMENTSOF(m->restrict_fsaccess_link_fds));
+}
+
 /* Close the initramfs trust window after switch_root by clearing initramfs_s_dev
  * in the BPF .bss map. The .bss is a BPF_F_MMAPABLE array map — mmap it and do
  * a single aligned 4-byte store instead of a full-value read-modify-write via
@@ -169,6 +191,68 @@ static int restrict_fsaccess_clear_initramfs_trust(int bss_map_fd) {
         return 0;
 }
 
+static int bpf_get_map_id(int fd, uint32_t *ret_id) {
+        struct bpf_map_info info = {};
+        uint32_t len = sizeof(info);
+        int r;
+
+        if (fd < 0)
+                return -EBADF;
+
+        assert(ret_id);
+
+        r = sym_bpf_obj_get_info_by_fd(fd, &info, &len);
+        if (r < 0)
+                return r;
+
+        *ret_id = info.id;
+        return 0;
+}
+
+/* Validate that deserialized FDs actually reference our LSM BPF links. A
+ * corrupted serialization file could leave FDs pointing at arbitrary kernel
+ * objects; a stale FD could point at a BPF link of an entirely different type
+ * (e.g. kprobe-multi). Verify both link type and attach type so a substituted
+ * FD that happens to be a BPF link still fails the check. */
+static int restrict_fsaccess_validate_deserialized_fds(Manager *m) {
+        int r;
+
+        assert(m);
+
+        r = dlopen_bpf(LOG_WARNING);
+        if (r < 0)
+                return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE),
+                                       "bpf-restrict-fsaccess: Failed to load libbpf for FD validation, aborting.");
+
+        FOREACH_ELEMENT(fd, m->restrict_fsaccess_link_fds) {
+                struct bpf_link_info info = {};
+                uint32_t len = sizeof(info);
+                const char *name = restrict_fsaccess_link_names[fd - m->restrict_fsaccess_link_fds];
+
+                r = sym_bpf_obj_get_info_by_fd(*fd, &info, &len);
+                if (r < 0)
+                        return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE),
+                                               "bpf-restrict-fsaccess: Deserialized FD for %s is not a valid BPF object, aborting.",
+                                               name);
+
+                if (info.type != BPF_LINK_TYPE_TRACING || info.tracing.attach_type != BPF_LSM_MAC)
+                        return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE),
+                                               "bpf-restrict-fsaccess: Deserialized FD for %s is not an LSM tracing link (type=%u attach=%u), aborting.",
+                                               name, info.type, info.tracing.attach_type);
+        }
+
+        if (m->restrict_fsaccess_bss_map_fd >= 0) {
+                uint32_t id;
+
+                r = bpf_get_map_id(m->restrict_fsaccess_bss_map_fd, &id);
+                if (r < 0)
+                        return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE),
+                                               "bpf-restrict-fsaccess: Deserialized FD for .bss map is not a valid BPF map, aborting.");
+        }
+
+        return 0;
+}
+
 int bpf_restrict_fsaccess_setup(Manager *m) {
         _cleanup_(restrict_fsaccess_bpf_freep) struct restrict_fsaccess_bpf *obj = NULL;
         int r;
@@ -178,6 +262,27 @@ int bpf_restrict_fsaccess_setup(Manager *m) {
         if (!MANAGER_IS_SYSTEM(m) || m->restrict_filesystem_access <= RESTRICT_FILESYSTEM_ACCESS_NO)
                 return 0;
 
+        r = restrict_fsaccess_have_deserialized_fds(m);
+        if (r < 0)
+                return r;
+        if (r > 0) {
+                log_info("bpf-restrict-fsaccess: Recovered link FDs from previous exec, programs still attached.");
+
+                r = restrict_fsaccess_validate_deserialized_fds(m);
+                if (r < 0)
+                        return r;
+                if (m->switching_root) {
+                        if (m->restrict_fsaccess_bss_map_fd < 0)
+                                return log_error_errno(SYNTHETIC_ERRNO(EBADF),
+                                                       "bpf-restrict-fsaccess: Cannot clear initramfs trust after switch_root.");
+                        r = restrict_fsaccess_clear_initramfs_trust(m->restrict_fsaccess_bss_map_fd);
+                        if (r < 0)
+                                return r;
+                }
+
+                return 0;
+        }
+
         /* Fresh setup: verify BPF LSM is available */
         if (!bpf_restrict_fsaccess_supported())
                 return log_warning_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
@@ -265,6 +370,29 @@ int bpf_restrict_fsaccess_close_initramfs_trust(Manager *m) {
         return restrict_fsaccess_clear_initramfs_trust(m->restrict_fsaccess_bss_map_fd);
 }
 
+int bpf_restrict_fsaccess_serialize(Manager *m, FILE *f, FDSet *fds) {
+        int r;
+
+        assert(m);
+        assert(f);
+        assert(fds);
+
+        if (!MANAGER_IS_SYSTEM(m) || m->restrict_filesystem_access <= RESTRICT_FILESYSTEM_ACCESS_NO)
+                return 0;
+
+        FOREACH_ELEMENT(fd, m->restrict_fsaccess_link_fds) {
+                r = serialize_fd(f, fds, restrict_fsaccess_link_names[fd - m->restrict_fsaccess_link_fds], *fd);
+                if (r < 0)
+                        return r;
+        }
+
+        r = serialize_fd(f, fds, "restrict-fsaccess-bss-map", m->restrict_fsaccess_bss_map_fd);
+        if (r < 0)
+                return r;
+
+        return 0;
+}
+
 #else /* ! BPF_FRAMEWORK || ! HAVE_LSM_INTEGRITY_TYPE */
 
 bool bpf_restrict_fsaccess_supported(void) {
@@ -283,4 +411,8 @@ int bpf_restrict_fsaccess_close_initramfs_trust(Manager *m) {
         return 0;
 }
 
+int bpf_restrict_fsaccess_serialize(Manager *m, FILE *f, FDSet *fds) {
+        return 0;
+}
+
 #endif
index 7abbb7d3c61575d8a8f355fca7727b2ef3ea84e4..8a0a9cf2677601076d28922d57d163251e715502 100644 (file)
@@ -47,3 +47,4 @@ bool bpf_restrict_fsaccess_supported(void);
 int bpf_restrict_fsaccess_setup(Manager *m);
 
 int bpf_restrict_fsaccess_close_initramfs_trust(Manager *m);
+int bpf_restrict_fsaccess_serialize(Manager *m, FILE *f, FDSet *fds);
index c7948881648745d77cbb3c7b1c2b66431ae3e798..528540e57e2e76275835c988d42e92bfdcd64837 100644 (file)
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: LGPL-2.1-or-later */
 
 #include "alloc-util.h"
+#include "bpf-restrict-fsaccess.h"
 #include "dbus.h"
 #include "dynamic-user.h"
 #include "fd-util.h"
@@ -180,6 +181,10 @@ int manager_serialize(
         if (r < 0)
                 return r;
 
+        r = bpf_restrict_fsaccess_serialize(m, f, fds);
+        if (r < 0)
+                return r;
+
         (void) fputc('\n', f);
 
         HASHMAP_FOREACH_KEY(u, t, m->units) {
@@ -386,6 +391,38 @@ static void manager_deserialize_gid_refs_one(Manager *m, const char *value) {
         manager_deserialize_uid_refs_one_internal(&m->gid_refs, value);
 }
 
+static void deserialize_restrict_fsaccess(Manager *m, const char *l, FDSet *fds) {
+        const char *val;
+        int fd;
+
+        FOREACH_ELEMENT(name, restrict_fsaccess_link_names) {
+                val = startswith(l, *name);
+                if (!val)
+                        continue;
+                val = startswith(val, "=");
+                if (!val)
+                        continue;
+                fd = deserialize_fd(fds, val);
+                if (fd < 0) {
+                        log_warning_errno(fd, "bpf-restrict-fsaccess: Failed to deserialize FD for %s: %m", *name);
+                        return;
+                }
+                close_and_replace(m->restrict_fsaccess_link_fds[name - restrict_fsaccess_link_names], fd);
+                return;
+        }
+
+        val = startswith(l, "restrict-fsaccess-bss-map=");
+        if (!val)
+                return;
+
+        fd = deserialize_fd(fds, val);
+        if (fd < 0) {
+                log_warning_errno(fd, "bpf-restrict-fsaccess: Failed to deserialize FD for .bss map: %m");
+                return;
+        }
+        close_and_replace(m->restrict_fsaccess_bss_map_fd, fd);
+}
+
 int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
         int r;
 
@@ -616,7 +653,9 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
                         else
                                 (void) varlink_server_deserialize_one(m->varlink_server, val, fds);
 
-                } else if ((val = startswith(l, "dump-ratelimit=")))
+                } else if (startswith(l, "restrict-fsaccess-"))
+                        deserialize_restrict_fsaccess(m, l, fds);
+                else if ((val = startswith(l, "dump-ratelimit=")))
                         deserialize_ratelimit(&m->dump_ratelimit, "dump-ratelimit", val);
                 else if ((val = startswith(l, "reload-reexec-ratelimit=")))
                         deserialize_ratelimit(&m->reload_reexec_ratelimit, "reload-reexec-ratelimit", val);
index 1d2fdef781eea6dc2db076db9c1219db34d33576..c7d9dbdd5bfa6d8ef5bb49909814a037160f1562 100644 (file)
@@ -38,6 +38,7 @@ DLSYM_PROTOTYPE(bpf_map_delete_elem) = NULL;
 DLSYM_PROTOTYPE(bpf_map_get_fd_by_id) = NULL;
 DLSYM_PROTOTYPE(bpf_map_lookup_elem) = NULL;
 DLSYM_PROTOTYPE(bpf_map_update_elem) = NULL;
+DLSYM_PROTOTYPE(bpf_obj_get_info_by_fd) = NULL;
 DLSYM_PROTOTYPE(bpf_object__attach_skeleton) = NULL;
 DLSYM_PROTOTYPE(bpf_object__destroy_skeleton) = NULL;
 DLSYM_PROTOTYPE(bpf_object__detach_skeleton) = NULL;
@@ -154,6 +155,7 @@ int dlopen_bpf(int log_level) {
                         DLSYM_ARG(bpf_map_get_fd_by_id),
                         DLSYM_ARG(bpf_map_lookup_elem),
                         DLSYM_ARG(bpf_map_update_elem),
+                        DLSYM_ARG(bpf_obj_get_info_by_fd),
                         DLSYM_ARG(bpf_object__attach_skeleton),
                         DLSYM_ARG(bpf_object__destroy_skeleton),
                         DLSYM_ARG(bpf_object__detach_skeleton),
index b3d14f9b5f43710ff2e944eb26575f75bf895553..71e6ca5d1d65aa35297a9fbc675e61753782a978 100644 (file)
@@ -25,6 +25,7 @@ extern DLSYM_PROTOTYPE(bpf_map_delete_elem);
 extern DLSYM_PROTOTYPE(bpf_map_get_fd_by_id);
 extern DLSYM_PROTOTYPE(bpf_map_lookup_elem);
 extern DLSYM_PROTOTYPE(bpf_map_update_elem);
+extern DLSYM_PROTOTYPE(bpf_obj_get_info_by_fd);
 /* The *_skeleton APIs are autogenerated by bpftool, the targets can be found
  * in ./build/src/core/bpf/socket-bind/socket-bind.skel.h */
 extern DLSYM_PROTOTYPE(bpf_object__attach_skeleton);