#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#define PROT_EXEC 0x4
-#define VM_EXEC 0x00000004
+#define PROT_EXEC 0x4
+#define VM_EXEC 0x00000004
+#define PTRACE_MODE_ATTACH 0x02
/* ---- Maps ---- */
* — the window is closed." */
volatile __u32 initramfs_s_dev;
+/* ---- Self-protection guard globals (set by PID1 after attach) ----
+ *
+ * While all IDs are 0 (the .bss default), the guard is inactive — no real BPF
+ * object has ID 0, so no comparisons match. PID1 populates these after
+ * attaching all programs. */
+volatile __u32 protected_map_id_verity;
+volatile __u32 protected_map_id_bss;
+
+/* Must equal _RESTRICT_FILESYSTEM_ACCESS_LINK_MAX in bpf-restrict-fsaccess.h — update when adding programs */
+#define NUM_PROTECTED_OBJS 9 /* 5 enforcement + 4 guard (bpf, bpf_map, bpf_prog, ptrace) */
+volatile __u32 protected_prog_ids[NUM_PROTECTED_OBJS];
+volatile __u32 protected_link_ids[NUM_PROTECTED_OBJS];
+
/* ---- Integrity tracking hooks ---- */
SEC("lsm/bdev_setintegrity")
return check_trusted_file(file);
}
+/* ---- PID1 ptrace protection ----
+ *
+ * Blocks PTRACE_MODE_ATTACH access to PID1 from any other process. This
+ * prevents ptrace(PTRACE_ATTACH), /proc/1/mem, process_vm_readv(), and
+ * pidfd_getfd() from extracting sensitive state from PID1's address space.
+ *
+ * PTRACE_MODE_READ is allowed — monitoring tools and systemctl need
+ * /proc/1/status, /proc/1/fd/, /proc/1/ns/ *, etc.
+ *
+ * PID1 accessing itself is allowed. */
+
+SEC("lsm/ptrace_access_check")
+int BPF_PROG(restrict_fsaccess_ptrace_guard, struct task_struct *child,
+ unsigned int mode)
+{
+ /* We only care about PID 1 and its threads (There are none but still.). */
+ if (child->tgid != 1)
+ return 0;
+
+ /* We only care about dangerous operations. */
+ if (!(mode & PTRACE_MODE_ATTACH))
+ return 0;
+
+ /* PID1 (any thread) accessing itself is allowed. */
+ if ((bpf_get_current_pid_tgid() >> 32) == 1)
+ return 0;
+
+ return -EPERM;
+}
+
+/* ---- Self-protection guard ----
+ *
+ * Three hooks protect our BPF objects from non-PID1 processes:
+ *
+ * lsm/bpf_map — fires inside bpf_map_new_fd(), the chokepoint for ALL
+ * code paths that produce a map FD (BPF_MAP_GET_FD_BY_ID,
+ * BPF_OBJ_GET, BPF_MAP_CREATE). Blocks the primary attack:
+ * obtaining an FD to verity_devices to inject fake trusted
+ * devices via BPF_MAP_UPDATE_ELEM.
+ *
+ * lsm/bpf_prog — fires inside bpf_prog_new_fd(), same chokepoint coverage
+ * for programs. Defense-in-depth.
+ *
+ * lsm/bpf — handles BPF_LINK_GET_FD_BY_ID only. There is no
+ * security_bpf_link() hook in the kernel, so link
+ * protection uses the command-level bpf() hook. This is
+ * sufficient: we don't pin links in production, so
+ * BPF_OBJ_GET is not an attack vector for links. */
+
+SEC("lsm/bpf_map")
+int BPF_PROG(restrict_fsaccess_bpf_map_guard, struct bpf_map *map,
+ unsigned int fmode)
+{
+ __u32 id;
+
+ if ((bpf_get_current_pid_tgid() >> 32) == 1)
+ return 0;
+
+ id = map->id;
+ if (id != 0 && (id == protected_map_id_verity ||
+ id == protected_map_id_bss))
+ return -EPERM;
+
+ return 0;
+}
+
+SEC("lsm/bpf_prog")
+int BPF_PROG(restrict_fsaccess_bpf_prog_guard, struct bpf_prog *prog)
+{
+ __u32 id;
+
+ if ((bpf_get_current_pid_tgid() >> 32) == 1)
+ return 0;
+
+ id = BPF_CORE_READ(prog, aux, id);
+ if (id == 0)
+ return 0;
+
+ for (int i = 0; i < NUM_PROTECTED_OBJS; i++)
+ if (id == protected_prog_ids[i])
+ return -EPERM;
+
+ return 0;
+}
+
+SEC("lsm/bpf")
+int BPF_PROG(restrict_fsaccess_bpf_guard, int cmd, union bpf_attr *attr,
+ unsigned int size)
+{
+ __u32 id;
+
+ if ((bpf_get_current_pid_tgid() >> 32) == 1)
+ return 0;
+
+ if (cmd != BPF_LINK_GET_FD_BY_ID)
+ return 0;
+
+ /* link_id/map_id/prog_id share the same offset in the bpf_attr union */
+ id = attr->link_id;
+ if (id == 0)
+ return 0;
+
+ for (int i = 0; i < NUM_PROTECTED_OBJS; i++)
+ if (id == protected_link_ids[i])
+ return -EPERM;
+
+ return 0;
+}
+
static const char _license[] SEC("license") = "GPL";
[RESTRICT_FILESYSTEM_ACCESS_LINK_BPRM_CHECK] = "restrict-fsaccess-bprm-check-link",
[RESTRICT_FILESYSTEM_ACCESS_LINK_MMAP_FILE] = "restrict-fsaccess-mmap-file-link",
[RESTRICT_FILESYSTEM_ACCESS_LINK_FILE_MPROTECT] = "restrict-fsaccess-file-mprotect-link",
+ [RESTRICT_FILESYSTEM_ACCESS_LINK_PTRACE_GUARD] = "restrict-fsaccess-ptrace-guard-link",
+ [RESTRICT_FILESYSTEM_ACCESS_LINK_BPF_MAP_GUARD] = "restrict-fsaccess-bpf-map-guard-link",
+ [RESTRICT_FILESYSTEM_ACCESS_LINK_BPF_PROG_GUARD] = "restrict-fsaccess-bpf-prog-guard-link",
+ [RESTRICT_FILESYSTEM_ACCESS_LINK_BPF_GUARD] = "restrict-fsaccess-bpf-guard-link",
};
#if BPF_FRAMEWORK && HAVE_LSM_INTEGRITY_TYPE
DEFINE_TRIVIAL_CLEANUP_FUNC(struct restrict_fsaccess_bpf *, restrict_fsaccess_bpf_free);
-/* Verify that restrict_fsaccess_bss matches the skeleton's .bss layout */
+/* Verify that restrict_fsaccess_bss matches the skeleton's .bss layout. The sizeof
+ * check catches field additions/removals; the offsetof checks catch field
+ * reordering. Field order in restrict_fsaccess_bss must match the BPF global
+ * declaration order in restrict-fsaccess.bpf.c — this is what bpftool uses for the
+ * generated struct. The read-modify-write in restrict_fsaccess_clear_initramfs_trust()
+ * depends on this layout. */
assert_cc(sizeof(struct restrict_fsaccess_bss) == sizeof_field(struct restrict_fsaccess_bpf, bss[0]));
+assert_cc(offsetof(struct restrict_fsaccess_bss, initramfs_s_dev) ==
+ offsetof(typeof_field(struct restrict_fsaccess_bpf, bss[0]), initramfs_s_dev));
+assert_cc(offsetof(struct restrict_fsaccess_bss, protected_map_id_verity) ==
+ offsetof(typeof_field(struct restrict_fsaccess_bpf, bss[0]), protected_map_id_verity));
+assert_cc(offsetof(struct restrict_fsaccess_bss, protected_map_id_bss) ==
+ offsetof(typeof_field(struct restrict_fsaccess_bpf, bss[0]), protected_map_id_bss));
/* Build the skeleton links array indexed by the link enum. */
#define RESTRICT_FSACCESS_LINKS(obj) { \
[RESTRICT_FILESYSTEM_ACCESS_LINK_BPRM_CHECK] = (obj)->links.restrict_fsaccess_bprm_check, \
[RESTRICT_FILESYSTEM_ACCESS_LINK_MMAP_FILE] = (obj)->links.restrict_fsaccess_mmap_file, \
[RESTRICT_FILESYSTEM_ACCESS_LINK_FILE_MPROTECT] = (obj)->links.restrict_fsaccess_file_mprotect, \
+ [RESTRICT_FILESYSTEM_ACCESS_LINK_PTRACE_GUARD] = (obj)->links.restrict_fsaccess_ptrace_guard, \
+ [RESTRICT_FILESYSTEM_ACCESS_LINK_BPF_MAP_GUARD] = (obj)->links.restrict_fsaccess_bpf_map_guard, \
+ [RESTRICT_FILESYSTEM_ACCESS_LINK_BPF_PROG_GUARD] = (obj)->links.restrict_fsaccess_bpf_prog_guard, \
+ [RESTRICT_FILESYSTEM_ACCESS_LINK_BPF_GUARD] = (obj)->links.restrict_fsaccess_bpf_guard, \
}
static bool dm_verity_require_signatures(void) {
return 0;
}
+static int bpf_get_link_ids(int fd, uint32_t *ret_link_id, uint32_t *ret_prog_id) {
+ struct bpf_link_info info = {};
+ uint32_t len = sizeof(info);
+ int r;
+
+ if (fd < 0)
+ return -EBADF;
+
+ r = sym_bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (r < 0)
+ return r;
+
+ if (ret_link_id)
+ *ret_link_id = info.id;
+ if (ret_prog_id)
+ *ret_prog_id = info.prog_id;
+
+ return 0;
+}
+
+/* Populate guard globals with kernel-assigned IDs so the guard hooks block
+ * non-PID1 access to our maps/progs/links via the bpf() syscall. */
+int bpf_restrict_fsaccess_populate_guard(struct restrict_fsaccess_bpf *obj) {
+ int r;
+
+ assert(obj);
+
+ struct bpf_link *links[] = RESTRICT_FSACCESS_LINKS(obj);
+ assert_cc(ELEMENTSOF(links) == _RESTRICT_FILESYSTEM_ACCESS_LINK_MAX);
+
+ /* Map IDs */
+ r = bpf_get_map_id(sym_bpf_map__fd(obj->maps.verity_devices), &obj->bss->protected_map_id_verity);
+ if (r < 0)
+ return log_error_errno(r, "bpf-restrict-fsaccess: Failed to get verity_devices map ID: %m");
+
+ r = bpf_get_map_id(sym_bpf_map__fd(obj->maps.bss), &obj->bss->protected_map_id_bss);
+ if (r < 0)
+ return log_error_errno(r, "bpf-restrict-fsaccess: Failed to get .bss map ID: %m");
+
+ /* Link and program IDs (each link knows its associated program) */
+ FOREACH_ELEMENT(link, links) {
+ size_t idx = link - links;
+
+ r = bpf_get_link_ids(sym_bpf_link__fd(*link),
+ &obj->bss->protected_link_ids[idx],
+ &obj->bss->protected_prog_ids[idx]);
+ if (r < 0)
+ return log_error_errno(r, "bpf-restrict-fsaccess: Failed to get link/prog IDs for %s: %m",
+ restrict_fsaccess_link_names[idx]);
+ }
+
+ log_info("bpf-restrict-fsaccess: Guard globals populated (verity_map=%u, bss_map=%u)",
+ (unsigned) obj->bss->protected_map_id_verity,
+ (unsigned) obj->bss->protected_map_id_bss);
+ return 0;
+}
+
/* Validate that deserialized FDs actually reference our LSM BPF links. A
* corrupted serialization file could leave FDs pointing at arbitrary kernel
* objects; a stale FD could point at a BPF link of an entirely different type
log_info("bpf-restrict-fsaccess: LSM BPF programs attached");
+ /* Now that all programs are attached, populate the guard's globals with
+ * the kernel-assigned IDs of our maps, programs, and links. From this
+ * point on, non-PID1 processes cannot obtain FDs to our BPF objects. */
+ r = bpf_restrict_fsaccess_populate_guard(obj);
+ if (r < 0)
+ return r;
+
/* Extract owned FDs from the skeleton. These keep the kernel BPF objects
* alive after the skeleton is destroyed. Destroying the skeleton unmaps
- * the .bss page from our address space so no BPF state is reachable via
- * /proc/1/mem. */
+ * the .bss page from our address space so no BPF state (guard globals,
+ * map IDs, initramfs_s_dev) is reachable via /proc/1/mem. */
struct bpf_link *links[] = RESTRICT_FSACCESS_LINKS(obj);
-
FOREACH_ELEMENT(link, links) {
size_t idx = link - links;
"bpf-restrict-fsaccess: RestrictFileSystemAccess= requested but BPF framework is not compiled in.");
}
+int bpf_restrict_fsaccess_populate_guard(struct restrict_fsaccess_bpf *obj) {
+ return 0;
+}
+
int bpf_restrict_fsaccess_close_initramfs_trust(Manager *m) {
return 0;
}
RESTRICT_FILESYSTEM_ACCESS_LINK_BPRM_CHECK,
RESTRICT_FILESYSTEM_ACCESS_LINK_MMAP_FILE,
RESTRICT_FILESYSTEM_ACCESS_LINK_FILE_MPROTECT,
+ RESTRICT_FILESYSTEM_ACCESS_LINK_PTRACE_GUARD,
+ RESTRICT_FILESYSTEM_ACCESS_LINK_BPF_MAP_GUARD,
+ RESTRICT_FILESYSTEM_ACCESS_LINK_BPF_PROG_GUARD,
+ RESTRICT_FILESYSTEM_ACCESS_LINK_BPF_GUARD,
_RESTRICT_FILESYSTEM_ACCESS_LINK_MAX,
};
* bpf_map_lookup_elem/bpf_map_update_elem on the serialized .bss map FD. */
struct restrict_fsaccess_bss {
uint32_t initramfs_s_dev; /* kernel dev_t encoding: (major << 20) | minor */
+ uint32_t protected_map_id_verity;
+ uint32_t protected_map_id_bss;
+ uint32_t protected_prog_ids[_RESTRICT_FILESYSTEM_ACCESS_LINK_MAX];
+ uint32_t protected_link_ids[_RESTRICT_FILESYSTEM_ACCESS_LINK_MAX];
};
extern const char* const restrict_fsaccess_link_names[_RESTRICT_FILESYSTEM_ACCESS_LINK_MAX];
bool bpf_restrict_fsaccess_supported(void);
int bpf_restrict_fsaccess_setup(Manager *m);
+int bpf_restrict_fsaccess_populate_guard(struct restrict_fsaccess_bpf *obj);
int bpf_restrict_fsaccess_close_initramfs_trust(Manager *m);
int bpf_restrict_fsaccess_serialize(Manager *m, FILE *f, FDSet *fds);