core: add self-protection guard for RestrictFileSystemAccess= BPF LSM

author Christian Brauner <brauner@kernel.org>

Fri, 8 May 2026 08:49:10 +0000 (10:49 +0200)

committer Christian Brauner <brauner@kernel.org>

Wed, 13 May 2026 08:36:12 +0000 (10:36 +0200)
author Christian Brauner <brauner@kernel.org>
Fri, 8 May 2026 08:49:10 +0000 (10:49 +0200)
committer Christian Brauner <brauner@kernel.org>
Wed, 13 May 2026 08:36:12 +0000 (10:36 +0200)
diff --git a/src/bpf/restrict-fsaccess.bpf.c b/src/bpf/restrict-fsaccess.bpf.c

index a9f368ab399c0b4daa06662edb4273fd17c1ca8f..538ddf3ef17b66618bee965b8e56330ed73431e5 100644 (file)
--- a/src/bpf/restrict-fsaccess.bpf.c
+++ b/src/bpf/restrict-fsaccess.bpf.c
@@ -34,8 +34,9 @@
  #include <bpf/bpf_helpers.h>
  #include <bpf/bpf_tracing.h>
  
-#define PROT_EXEC 0x4
-#define VM_EXEC   0x00000004
+#define PROT_EXEC          0x4
+#define VM_EXEC            0x00000004
+#define PTRACE_MODE_ATTACH 0x02
  
  /* ---- Maps ---- */
  
@@ -53,6 +54,19 @@ struct {
   * — the window is closed." */
  volatile __u32 initramfs_s_dev;
  
+/* ---- Self-protection guard globals (set by PID1 after attach) ----
+ *
+ * While all IDs are 0 (the .bss default), the guard is inactive — no real BPF
+ * object has ID 0, so no comparisons match. PID1 populates these after
+ * attaching all programs. */
+volatile __u32 protected_map_id_verity;
+volatile __u32 protected_map_id_bss;
+
+/* Must equal _RESTRICT_FILESYSTEM_ACCESS_LINK_MAX in bpf-restrict-fsaccess.h — update when adding programs */
+#define NUM_PROTECTED_OBJS 9 /* 5 enforcement + 4 guard (bpf, bpf_map, bpf_prog, ptrace) */
+volatile __u32 protected_prog_ids[NUM_PROTECTED_OBJS];
+volatile __u32 protected_link_ids[NUM_PROTECTED_OBJS];
+
  /* ---- Integrity tracking hooks ---- */
  
  SEC("lsm/bdev_setintegrity")
@@ -149,4 +163,113 @@ int BPF_PROG(restrict_fsaccess_file_mprotect, struct vm_area_struct *vma,
          return check_trusted_file(file);
  }
  
+/* ---- PID1 ptrace protection ----
+ *
+ * Blocks PTRACE_MODE_ATTACH access to PID1 from any other process. This
+ * prevents ptrace(PTRACE_ATTACH), /proc/1/mem, process_vm_readv(), and
+ * pidfd_getfd() from extracting sensitive state from PID1's address space.
+ *
+ * PTRACE_MODE_READ is allowed — monitoring tools and systemctl need
+ * /proc/1/status, /proc/1/fd/, /proc/1/ns/ *, etc.
+ *
+ * PID1 accessing itself is allowed. */
+
+SEC("lsm/ptrace_access_check")
+int BPF_PROG(restrict_fsaccess_ptrace_guard, struct task_struct *child,
+             unsigned int mode)
+{
+        /* We only care about PID 1 and its threads (There are none but still.). */
+        if (child->tgid != 1)
+                return 0;
+
+        /* We only care about dangerous operations. */
+        if (!(mode & PTRACE_MODE_ATTACH))
+                return 0;
+
+        /* PID1 (any thread) accessing itself is allowed. */
+        if ((bpf_get_current_pid_tgid() >> 32) == 1)
+                return 0;
+
+        return -EPERM;
+}
+
+/* ---- Self-protection guard ----
+ *
+ * Three hooks protect our BPF objects from non-PID1 processes:
+ *
+ *   lsm/bpf_map  — fires inside bpf_map_new_fd(), the chokepoint for ALL
+ *                   code paths that produce a map FD (BPF_MAP_GET_FD_BY_ID,
+ *                   BPF_OBJ_GET, BPF_MAP_CREATE). Blocks the primary attack:
+ *                   obtaining an FD to verity_devices to inject fake trusted
+ *                   devices via BPF_MAP_UPDATE_ELEM.
+ *
+ *   lsm/bpf_prog — fires inside bpf_prog_new_fd(), same chokepoint coverage
+ *                   for programs. Defense-in-depth.
+ *
+ *   lsm/bpf      — handles BPF_LINK_GET_FD_BY_ID only. There is no
+ *                   security_bpf_link() hook in the kernel, so link
+ *                   protection uses the command-level bpf() hook. This is
+ *                   sufficient: we don't pin links in production, so
+ *                   BPF_OBJ_GET is not an attack vector for links. */
+
+SEC("lsm/bpf_map")
+int BPF_PROG(restrict_fsaccess_bpf_map_guard, struct bpf_map *map,
+             unsigned int fmode)
+{
+        __u32 id;
+
+        if ((bpf_get_current_pid_tgid() >> 32) == 1)
+                return 0;
+
+        id = map->id;
+        if (id != 0 && (id == protected_map_id_verity ||
+                        id == protected_map_id_bss))
+                return -EPERM;
+
+        return 0;
+}
+
+SEC("lsm/bpf_prog")
+int BPF_PROG(restrict_fsaccess_bpf_prog_guard, struct bpf_prog *prog)
+{
+        __u32 id;
+
+        if ((bpf_get_current_pid_tgid() >> 32) == 1)
+                return 0;
+
+        id = BPF_CORE_READ(prog, aux, id);
+        if (id == 0)
+                return 0;
+
+        for (int i = 0; i < NUM_PROTECTED_OBJS; i++)
+                if (id == protected_prog_ids[i])
+                        return -EPERM;
+
+        return 0;
+}
+
+SEC("lsm/bpf")
+int BPF_PROG(restrict_fsaccess_bpf_guard, int cmd, union bpf_attr *attr,
+             unsigned int size)
+{
+        __u32 id;
+
+        if ((bpf_get_current_pid_tgid() >> 32) == 1)
+                return 0;
+
+        if (cmd != BPF_LINK_GET_FD_BY_ID)
+                return 0;
+
+        /* link_id/map_id/prog_id share the same offset in the bpf_attr union */
+        id = attr->link_id;
+        if (id == 0)
+                return 0;
+
+        for (int i = 0; i < NUM_PROTECTED_OBJS; i++)
+                if (id == protected_link_ids[i])
+                        return -EPERM;
+
+        return 0;
+}
+
  static const char _license[] SEC("license") = "GPL";
diff --git a/src/core/bpf-restrict-fsaccess.c b/src/core/bpf-restrict-fsaccess.c

index dc8a7d63a755caabc91194184786494a11e2c9be..af8a97c6627d048ac7d7a6e7881289a29be90569 100644 (file)
--- a/src/core/bpf-restrict-fsaccess.c
+++ b/src/core/bpf-restrict-fsaccess.c
@@ -30,6 +30,10 @@ const char* const restrict_fsaccess_link_names[_RESTRICT_FILESYSTEM_ACCESS_LINK_
          [RESTRICT_FILESYSTEM_ACCESS_LINK_BPRM_CHECK]        = "restrict-fsaccess-bprm-check-link",
          [RESTRICT_FILESYSTEM_ACCESS_LINK_MMAP_FILE]         = "restrict-fsaccess-mmap-file-link",
          [RESTRICT_FILESYSTEM_ACCESS_LINK_FILE_MPROTECT]     = "restrict-fsaccess-file-mprotect-link",
+        [RESTRICT_FILESYSTEM_ACCESS_LINK_PTRACE_GUARD]      = "restrict-fsaccess-ptrace-guard-link",
+        [RESTRICT_FILESYSTEM_ACCESS_LINK_BPF_MAP_GUARD]     = "restrict-fsaccess-bpf-map-guard-link",
+        [RESTRICT_FILESYSTEM_ACCESS_LINK_BPF_PROG_GUARD]    = "restrict-fsaccess-bpf-prog-guard-link",
+        [RESTRICT_FILESYSTEM_ACCESS_LINK_BPF_GUARD]         = "restrict-fsaccess-bpf-guard-link",
  };
  
  #if BPF_FRAMEWORK && HAVE_LSM_INTEGRITY_TYPE
@@ -44,8 +48,19 @@ static struct restrict_fsaccess_bpf *restrict_fsaccess_bpf_free(struct restrict_
  
  DEFINE_TRIVIAL_CLEANUP_FUNC(struct restrict_fsaccess_bpf *, restrict_fsaccess_bpf_free);
  
-/* Verify that restrict_fsaccess_bss matches the skeleton's .bss layout */
+/* Verify that restrict_fsaccess_bss matches the skeleton's .bss layout. The sizeof
+ * check catches field additions/removals; the offsetof checks catch field
+ * reordering. Field order in restrict_fsaccess_bss must match the BPF global
+ * declaration order in restrict-fsaccess.bpf.c — this is what bpftool uses for the
+ * generated struct. The read-modify-write in restrict_fsaccess_clear_initramfs_trust()
+ * depends on this layout. */
  assert_cc(sizeof(struct restrict_fsaccess_bss) == sizeof_field(struct restrict_fsaccess_bpf, bss[0]));
+assert_cc(offsetof(struct restrict_fsaccess_bss, initramfs_s_dev) ==
+          offsetof(typeof_field(struct restrict_fsaccess_bpf, bss[0]), initramfs_s_dev));
+assert_cc(offsetof(struct restrict_fsaccess_bss, protected_map_id_verity) ==
+          offsetof(typeof_field(struct restrict_fsaccess_bpf, bss[0]), protected_map_id_verity));
+assert_cc(offsetof(struct restrict_fsaccess_bss, protected_map_id_bss) ==
+          offsetof(typeof_field(struct restrict_fsaccess_bpf, bss[0]), protected_map_id_bss));
  
  /* Build the skeleton links array indexed by the link enum. */
  #define RESTRICT_FSACCESS_LINKS(obj) {                                                                      \
@@ -54,6 +69,10 @@ assert_cc(sizeof(struct restrict_fsaccess_bss) == sizeof_field(struct restrict_f
          [RESTRICT_FILESYSTEM_ACCESS_LINK_BPRM_CHECK]        = (obj)->links.restrict_fsaccess_bprm_check,                 \
          [RESTRICT_FILESYSTEM_ACCESS_LINK_MMAP_FILE]         = (obj)->links.restrict_fsaccess_mmap_file,                  \
          [RESTRICT_FILESYSTEM_ACCESS_LINK_FILE_MPROTECT]     = (obj)->links.restrict_fsaccess_file_mprotect,              \
+        [RESTRICT_FILESYSTEM_ACCESS_LINK_PTRACE_GUARD]      = (obj)->links.restrict_fsaccess_ptrace_guard,               \
+        [RESTRICT_FILESYSTEM_ACCESS_LINK_BPF_MAP_GUARD]     = (obj)->links.restrict_fsaccess_bpf_map_guard,              \
+        [RESTRICT_FILESYSTEM_ACCESS_LINK_BPF_PROG_GUARD]    = (obj)->links.restrict_fsaccess_bpf_prog_guard,             \
+        [RESTRICT_FILESYSTEM_ACCESS_LINK_BPF_GUARD]         = (obj)->links.restrict_fsaccess_bpf_guard,                  \
  }
  
  static bool dm_verity_require_signatures(void) {
@@ -209,6 +228,63 @@ static int bpf_get_map_id(int fd, uint32_t *ret_id) {
          return 0;
  }
  
+static int bpf_get_link_ids(int fd, uint32_t *ret_link_id, uint32_t *ret_prog_id) {
+        struct bpf_link_info info = {};
+        uint32_t len = sizeof(info);
+        int r;
+
+        if (fd < 0)
+                return -EBADF;
+
+        r = sym_bpf_obj_get_info_by_fd(fd, &info, &len);
+        if (r < 0)
+                return r;
+
+        if (ret_link_id)
+                *ret_link_id = info.id;
+        if (ret_prog_id)
+                *ret_prog_id = info.prog_id;
+
+        return 0;
+}
+
+/* Populate guard globals with kernel-assigned IDs so the guard hooks block
+ * non-PID1 access to our maps/progs/links via the bpf() syscall. */
+int bpf_restrict_fsaccess_populate_guard(struct restrict_fsaccess_bpf *obj) {
+        int r;
+
+        assert(obj);
+
+        struct bpf_link *links[] = RESTRICT_FSACCESS_LINKS(obj);
+        assert_cc(ELEMENTSOF(links) == _RESTRICT_FILESYSTEM_ACCESS_LINK_MAX);
+
+        /* Map IDs */
+        r = bpf_get_map_id(sym_bpf_map__fd(obj->maps.verity_devices), &obj->bss->protected_map_id_verity);
+        if (r < 0)
+                return log_error_errno(r, "bpf-restrict-fsaccess: Failed to get verity_devices map ID: %m");
+
+        r = bpf_get_map_id(sym_bpf_map__fd(obj->maps.bss), &obj->bss->protected_map_id_bss);
+        if (r < 0)
+                return log_error_errno(r, "bpf-restrict-fsaccess: Failed to get .bss map ID: %m");
+
+        /* Link and program IDs (each link knows its associated program) */
+        FOREACH_ELEMENT(link, links) {
+                size_t idx = link - links;
+
+                r = bpf_get_link_ids(sym_bpf_link__fd(*link),
+                                     &obj->bss->protected_link_ids[idx],
+                                     &obj->bss->protected_prog_ids[idx]);
+                if (r < 0)
+                        return log_error_errno(r, "bpf-restrict-fsaccess: Failed to get link/prog IDs for %s: %m",
+                                               restrict_fsaccess_link_names[idx]);
+        }
+
+        log_info("bpf-restrict-fsaccess: Guard globals populated (verity_map=%u, bss_map=%u)",
+                 (unsigned) obj->bss->protected_map_id_verity,
+                 (unsigned) obj->bss->protected_map_id_bss);
+        return 0;
+}
+
  /* Validate that deserialized FDs actually reference our LSM BPF links. A
   * corrupted serialization file could leave FDs pointing at arbitrary kernel
   * objects; a stale FD could point at a BPF link of an entirely different type
@@ -321,12 +397,18 @@ int bpf_restrict_fsaccess_setup(Manager *m) {
  
          log_info("bpf-restrict-fsaccess: LSM BPF programs attached");
  
+        /* Now that all programs are attached, populate the guard's globals with
+         * the kernel-assigned IDs of our maps, programs, and links. From this
+         * point on, non-PID1 processes cannot obtain FDs to our BPF objects. */
+        r = bpf_restrict_fsaccess_populate_guard(obj);
+        if (r < 0)
+                return r;
+
          /* Extract owned FDs from the skeleton. These keep the kernel BPF objects
           * alive after the skeleton is destroyed. Destroying the skeleton unmaps
-         * the .bss page from our address space so no BPF state is reachable via
-         * /proc/1/mem. */
+         * the .bss page from our address space so no BPF state (guard globals,
+         * map IDs, initramfs_s_dev) is reachable via /proc/1/mem. */
          struct bpf_link *links[] = RESTRICT_FSACCESS_LINKS(obj);
-
          FOREACH_ELEMENT(link, links) {
                  size_t idx = link - links;
  
@@ -407,6 +489,10 @@ int bpf_restrict_fsaccess_setup(Manager *m) {
                                   "bpf-restrict-fsaccess: RestrictFileSystemAccess= requested but BPF framework is not compiled in.");
  }
  
+int bpf_restrict_fsaccess_populate_guard(struct restrict_fsaccess_bpf *obj) {
+        return 0;
+}
+
  int bpf_restrict_fsaccess_close_initramfs_trust(Manager *m) {
          return 0;
  }
diff --git a/src/core/bpf-restrict-fsaccess.h b/src/core/bpf-restrict-fsaccess.h

index 8a0a9cf2677601076d28922d57d163251e715502..a23beab4ce59c40b94a2b7690deacc87f7e6dc56 100644 (file)
--- a/src/core/bpf-restrict-fsaccess.h
+++ b/src/core/bpf-restrict-fsaccess.h
@@ -23,6 +23,10 @@ enum {
          RESTRICT_FILESYSTEM_ACCESS_LINK_BPRM_CHECK,
          RESTRICT_FILESYSTEM_ACCESS_LINK_MMAP_FILE,
          RESTRICT_FILESYSTEM_ACCESS_LINK_FILE_MPROTECT,
+        RESTRICT_FILESYSTEM_ACCESS_LINK_PTRACE_GUARD,
+        RESTRICT_FILESYSTEM_ACCESS_LINK_BPF_MAP_GUARD,
+        RESTRICT_FILESYSTEM_ACCESS_LINK_BPF_PROG_GUARD,
+        RESTRICT_FILESYSTEM_ACCESS_LINK_BPF_GUARD,
          _RESTRICT_FILESYSTEM_ACCESS_LINK_MAX,
  };
  
@@ -39,12 +43,17 @@ enum {
   * bpf_map_lookup_elem/bpf_map_update_elem on the serialized .bss map FD. */
  struct restrict_fsaccess_bss {
          uint32_t initramfs_s_dev; /* kernel dev_t encoding: (major << 20) | minor */
+        uint32_t protected_map_id_verity;
+        uint32_t protected_map_id_bss;
+        uint32_t protected_prog_ids[_RESTRICT_FILESYSTEM_ACCESS_LINK_MAX];
+        uint32_t protected_link_ids[_RESTRICT_FILESYSTEM_ACCESS_LINK_MAX];
  };
  
  extern const char* const restrict_fsaccess_link_names[_RESTRICT_FILESYSTEM_ACCESS_LINK_MAX];
  
  bool bpf_restrict_fsaccess_supported(void);
  int bpf_restrict_fsaccess_setup(Manager *m);
+int bpf_restrict_fsaccess_populate_guard(struct restrict_fsaccess_bpf *obj);
  
  int bpf_restrict_fsaccess_close_initramfs_trust(Manager *m);
  int bpf_restrict_fsaccess_serialize(Manager *m, FILE *f, FDSet *fds);
author	Christian Brauner <brauner@kernel.org>
	Fri, 8 May 2026 08:49:10 +0000 (10:49 +0200)
committer	Christian Brauner <brauner@kernel.org>
	Wed, 13 May 2026 08:36:12 +0000 (10:36 +0200)
src/bpf/restrict-fsaccess.bpf.c		patch \| blob \| blame \| history
src/core/bpf-restrict-fsaccess.c		patch \| blob \| blame \| history
src/core/bpf-restrict-fsaccess.h		patch \| blob \| blame \| history