[CGROUP_CONTROLLER_DEVICES] = "devices",
[CGROUP_CONTROLLER_PIDS] = "pids",
[CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
+ [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
};
DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
/* BPF-based pseudo-controllers, v2 only */
CGROUP_CONTROLLER_BPF_FIREWALL,
+ CGROUP_CONTROLLER_BPF_DEVICES,
_CGROUP_CONTROLLER_MAX,
_CGROUP_CONTROLLER_INVALID = -1,
CGROUP_MASK_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_DEVICES),
CGROUP_MASK_PIDS = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_PIDS),
CGROUP_MASK_BPF_FIREWALL = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FIREWALL),
+ CGROUP_MASK_BPF_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_DEVICES),
_CGROUP_MASK_ALL = CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX) - 1
} CGroupMask;
--- /dev/null
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#include <linux/libbpf.h>
+
+#include "bpf-devices.h"
+#include "bpf-program.h"
+
+#define PASS_JUMP_OFF 4096
+
+static int bpf_access_type(const char *acc) {
+ int r = 0;
+
+ assert(acc);
+
+ for (; *acc; acc++)
+ switch(*acc) {
+ case 'r':
+ r |= BPF_DEVCG_ACC_READ;
+ break;
+ case 'w':
+ r |= BPF_DEVCG_ACC_WRITE;
+ break;
+ case 'm':
+ r |= BPF_DEVCG_ACC_MKNOD;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return r;
+}
+
+int cgroup_bpf_whitelist_device(BPFProgram *prog, int type, int major, int minor, const char *acc) {
+ struct bpf_insn insn[] = {
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 6), /* compare device type */
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */
+ BPF_ALU32_IMM(BPF_AND, BPF_REG_1, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 3), /* compare access type */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 2), /* compare major */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, 1), /* compare minor */
+ BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
+ };
+ int r, access;
+
+ assert(prog);
+ assert(acc);
+
+ access = bpf_access_type(acc);
+ if (access <= 0)
+ return -EINVAL;
+
+ insn[2].imm = access;
+
+ r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ return r;
+}
+
+int cgroup_bpf_whitelist_major(BPFProgram *prog, int type, int major, const char *acc) {
+ struct bpf_insn insn[] = {
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 5), /* compare device type */
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */
+ BPF_ALU32_IMM(BPF_AND, BPF_REG_1, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 2), /* compare access type */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 1), /* compare major */
+ BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
+ };
+ int r, access;
+
+ assert(prog);
+ assert(acc);
+
+ access = bpf_access_type(acc);
+ if (access <= 0)
+ return -EINVAL;
+
+ insn[2].imm = access;
+
+ r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ return r;
+}
+
+int cgroup_init_device_bpf(BPFProgram **ret, CGroupDevicePolicy policy, bool whitelist) {
+ struct bpf_insn pre_insn[] = {
+ /* load device type to r2 */
+ BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, access_type)),
+
+ /* load access type to r3 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, access_type)),
+ BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+
+ /* load major number to r4 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, major)),
+
+ /* load minor number to r5 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, minor)),
+ };
+
+ _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
+ int r;
+
+ assert(ret);
+
+ if (policy == CGROUP_AUTO && !whitelist)
+ return 0;
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &prog);
+ if (r < 0)
+ return log_error_errno(r, "Loading device control BPF program failed: %m");
+
+ if (policy == CGROUP_CLOSED || whitelist) {
+ r = bpf_program_add_instructions(prog, pre_insn, ELEMENTSOF(pre_insn));
+ if (r < 0)
+ return log_error_errno(r, "Extending device control BPF program failed: %m");
+ }
+
+ *ret = TAKE_PTR(prog);
+
+ return 0;
+}
+
+int cgroup_apply_device_bpf(Unit *u, BPFProgram *prog, CGroupDevicePolicy policy, bool whitelist) {
+ struct bpf_insn post_insn[] = {
+ /* return DENY */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ };
+
+ struct bpf_insn exit_insn[] = {
+ /* else return ALLOW */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN()
+ };
+
+ _cleanup_free_ char *path = NULL;
+ uint32_t flags;
+ int r;
+
+ if (!prog) {
+ /* Remove existing program. */
+ u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed);
+ return 0;
+ }
+
+ if (policy != CGROUP_STRICT || whitelist) {
+ size_t off;
+
+ r = bpf_program_add_instructions(prog, post_insn, ELEMENTSOF(post_insn));
+ if (r < 0)
+ return log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ /* Fixup PASS_JUMP_OFF jump offsets. */
+ for (off = 0; off < prog->n_instructions; off++) {
+ struct bpf_insn *ins = &prog->instructions[off];
+
+ if (ins->code == (BPF_JMP | BPF_JA) && ins->off == PASS_JUMP_OFF)
+ ins->off = prog->n_instructions - off - 1;
+ }
+ } else
+ /* Explicitly forbid everything. */
+ exit_insn[0].imm = 0;
+
+ r = bpf_program_add_instructions(prog, exit_insn, ELEMENTSOF(exit_insn));
+ if (r < 0)
+ return log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine cgroup path: %m");
+
+ flags = (u->type == UNIT_SLICE || unit_cgroup_delegate(u)) ? BPF_F_ALLOW_MULTI : 0;
+
+ /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program. */
+ u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed);
+
+ r = bpf_program_cgroup_attach(prog, BPF_CGROUP_DEVICE, path, flags);
+ if (r < 0)
+ return log_error_errno(r, "Attaching device control BPF program to cgroup %s failed: %m", path);
+
+ /* Remember that this BPF program is installed now. */
+ u->bpf_device_control_installed = bpf_program_ref(prog);
+
+ return 0;
+}
+
+int bpf_devices_supported(void) {
+ struct bpf_insn trivial[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN()
+ };
+
+ _cleanup_(bpf_program_unrefp) BPFProgram *program = NULL;
+ static int supported = -1;
+ int r;
+
+ /* Checks whether BPF device controller is supported. For this, we check five things:
+ *
+ * a) whether we are privileged
+ * b) whether the unified hierarchy is being used
+ * c) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_DEVICE programs, which we require
+ */
+
+ if (supported >= 0)
+ return supported;
+
+ if (geteuid() != 0) {
+ log_debug("Not enough privileges, BPF device control is not supported.");
+ return supported = 0;
+ }
+
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
+ if (r == 0) {
+ log_debug("Not running with unified cgroups, BPF device control is not supported.");
+ return supported = 0;
+ }
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &program);
+ if (r < 0) {
+ log_debug_errno(r, "Can't allocate CGROUP DEVICE BPF program, BPF device control is not supported: %m");
+ return supported = 0;
+ }
+
+ r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial));
+ if (r < 0) {
+ log_debug_errno(r, "Can't add trivial instructions to CGROUP DEVICE BPF program, BPF device control is not supported: %m");
+ return supported = 0;
+ }
+
+ r = bpf_program_load_kernel(program, NULL, 0);
+ if (r < 0) {
+ log_debug_errno(r, "Can't load kernel CGROUP DEVICE BPF program, BPF device control is not supported: %m");
+ return supported = 0;
+ }
+
+ return supported;
+}
--- /dev/null
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+
+#include "unit.h"
+
+struct BPFProgram;
+
+int bpf_devices_supported(void);
+
+int cgroup_bpf_whitelist_device(BPFProgram *p, int type, int major, int minor, const char *acc);
+int cgroup_bpf_whitelist_major(BPFProgram *p, int type, int major, const char *acc);
+
+int cgroup_init_device_bpf(BPFProgram **ret, CGroupDevicePolicy policy, bool whitelist);
+int cgroup_apply_device_bpf(Unit *u, BPFProgram *p, CGroupDevicePolicy policy, bool whitelist);
#include "blockdev-util.h"
#include "bpf-firewall.h"
#include "btrfs-util.h"
+#include "bpf-devices.h"
#include "bus-error.h"
#include "cgroup-util.h"
#include "cgroup.h"
return 0;
}
-static int whitelist_device(const char *path, const char *node, const char *acc) {
- char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
+static int whitelist_device(BPFProgram *prog, const char *path, const char *node, const char *acc) {
struct stat st;
bool ignore_notfound;
int r;
return -ENODEV;
}
- sprintf(buf,
- "%c %u:%u %s",
- S_ISCHR(st.st_mode) ? 'c' : 'b',
- major(st.st_rdev), minor(st.st_rdev),
- acc);
+ if (cg_all_unified() > 0) {
+ if (!prog)
+ return 0;
- r = cg_set_attribute("devices", path, "devices.allow", buf);
- if (r < 0)
- log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set devices.allow on %s: %m", path);
+ cgroup_bpf_whitelist_device(prog, S_ISCHR(st.st_mode) ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK,
+ major(st.st_rdev), minor(st.st_rdev), acc);
+ } else {
+ char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
+
+ sprintf(buf,
+ "%c %u:%u %s",
+ S_ISCHR(st.st_mode) ? 'c' : 'b',
+ major(st.st_rdev), minor(st.st_rdev),
+ acc);
+
+ r = cg_set_attribute("devices", path, "devices.allow", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING,
+ r, "Failed to set devices.allow on %s: %m", path);
+ }
return r;
}
-static int whitelist_major(const char *path, const char *name, char type, const char *acc) {
+static int whitelist_major(BPFProgram *prog, const char *path, const char *name, char type, const char *acc) {
_cleanup_fclose_ FILE *f = NULL;
char line[LINE_MAX];
+ char *p, *w;
bool good = false;
int r;
return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s (%c): %m", name, type);
FOREACH_LINE(line, f, goto fail) {
- char buf[2+DECIMAL_STR_MAX(unsigned)+3+4], *p, *w;
unsigned maj;
truncate_nl(line);
if (fnmatch(name, w, 0) != 0)
continue;
- sprintf(buf,
- "%c %u:* %s",
- type,
- maj,
- acc);
+ if (cg_all_unified() > 0) {
+ if (!prog)
+ continue;
- r = cg_set_attribute("devices", path, "devices.allow", buf);
- if (r < 0)
- log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set devices.allow on %s: %m", path);
+ cgroup_bpf_whitelist_major(prog,
+ type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK,
+ maj, acc);
+ } else {
+ char buf[2+DECIMAL_STR_MAX(unsigned)+3+4];
+
+ sprintf(buf,
+ "%c %u:* %s",
+ type,
+ maj,
+ acc);
+
+ r = cg_set_attribute("devices", path, "devices.allow", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING,
+ r, "Failed to set devices.allow on %s: %m", path);
+ }
}
return 0;
}
}
- if ((apply_mask & CGROUP_MASK_DEVICES) && !is_root) {
+ if ((apply_mask & (CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES)) && !is_root) {
+ _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
CGroupDeviceAllow *a;
- /* Changing the devices list of a populated cgroup
- * might result in EINVAL, hence ignore EINVAL
- * here. */
+ if (cg_all_unified() > 0) {
+ r = cgroup_init_device_bpf(&prog, c->device_policy, c->device_allow);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to initialize device control bpf program: %m");
+ } else {
+ /* Changing the devices list of a populated cgroup
+ * might result in EINVAL, hence ignore EINVAL
+ * here. */
- if (c->device_allow || c->device_policy != CGROUP_AUTO)
- r = cg_set_attribute("devices", path, "devices.deny", "a");
- else
- r = cg_set_attribute("devices", path, "devices.allow", "a");
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to reset devices.list: %m");
+ if (c->device_allow || c->device_policy != CGROUP_AUTO)
+ r = cg_set_attribute("devices", path, "devices.deny", "a");
+ else
+ r = cg_set_attribute("devices", path, "devices.allow", "a");
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to reset devices.list: %m");
+ }
if (c->device_policy == CGROUP_CLOSED ||
(c->device_policy == CGROUP_AUTO && c->device_allow)) {
const char *x, *y;
NULSTR_FOREACH_PAIR(x, y, auto_devices)
- whitelist_device(path, x, y);
+ whitelist_device(prog, path, x, y);
/* PTS (/dev/pts) devices may not be duplicated, but accessed */
- whitelist_major(path, "pts", 'c', "rw");
+ whitelist_major(prog, path, "pts", 'c', "rw");
}
LIST_FOREACH(device_allow, a, c->device_allow) {
acc[k++] = 0;
if (path_startswith(a->path, "/dev/"))
- whitelist_device(path, a->path, acc);
+ whitelist_device(prog, path, a->path, acc);
else if ((val = startswith(a->path, "block-")))
- whitelist_major(path, val, 'b', acc);
+ whitelist_major(prog, path, val, 'b', acc);
else if ((val = startswith(a->path, "char-")))
- whitelist_major(path, val, 'c', acc);
+ whitelist_major(prog, path, val, 'c', acc);
else
log_unit_debug(u, "Ignoring device %s while writing cgroup attribute.", a->path);
}
+
+ r = cgroup_apply_device_bpf(u, prog, c->device_policy, c->device_allow);
+ if (r < 0) {
+ static bool warned = false;
+
+ log_full_errno(warned ? LOG_DEBUG : LOG_WARNING, r,
+ "Unit %s configures device ACL, but the local system doesn't seem to support the BPF-based device controller.\n"
+ "Proceeding WITHOUT applying ACL (all devices will be accessible)!\n"
+ "(This warning is only shown for the first loaded unit using device ACL.)", u->id);
+
+ warned = true;
+ }
}
if (apply_mask & CGROUP_MASK_PIDS) {
if (c->device_allow ||
c->device_policy != CGROUP_AUTO)
- mask |= CGROUP_MASK_DEVICES;
+ mask |= CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES;
if (c->tasks_accounting ||
c->tasks_max != CGROUP_LIMIT_MAX)
u->cgroup_realized = false;
u->cgroup_realized_mask = 0;
u->cgroup_enabled_mask = 0;
+
+ u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed);
}
int unit_search_main_pid(Unit *u, pid_t *ret) {
if (r > 0)
mask |= CGROUP_MASK_BPF_FIREWALL;
+ /* BPF-based device access control */
+ r = bpf_devices_supported();
+ if (r > 0)
+ mask |= CGROUP_MASK_BPF_DEVICES;
+
*ret = mask;
return 0;
}
audit-fd.h
automount.c
automount.h
+ bpf-devices.c
+ bpf-devices.h
bpf-firewall.c
bpf-firewall.h
cgroup.c
bpf_program_unref(u->ip_bpf_egress);
bpf_program_unref(u->ip_bpf_egress_installed);
+ bpf_program_unref(u->bpf_device_control_installed);
+
condition_free_list(u->conditions);
condition_free_list(u->asserts);
CGroupMask cgroup_members_mask;
int cgroup_inotify_wd;
+ /* Device Controller BPF program */
+ BPFProgram *bpf_device_control_installed;
+
/* IP BPF Firewalling/accounting */
int ip_accounting_ingress_map_fd;
int ip_accounting_egress_map_fd;
static void test_cg_mask_to_string(void) {
test_cg_mask_to_string_one(0, NULL);
- test_cg_mask_to_string_one(_CGROUP_MASK_ALL, "cpu cpuacct io blkio memory devices pids bpf-firewall");
+ test_cg_mask_to_string_one(_CGROUP_MASK_ALL, "cpu cpuacct io blkio memory devices pids bpf-firewall bpf-devices");
test_cg_mask_to_string_one(CGROUP_MASK_CPU, "cpu");
test_cg_mask_to_string_one(CGROUP_MASK_CPUACCT, "cpuacct");
test_cg_mask_to_string_one(CGROUP_MASK_IO, "io");