1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
4 #include <linux/bpf_insn.h>
6 #include "bpf-devices.h"
7 #include "bpf-program.h"
8 #include "devnum-util.h"
11 #include "nulstr-util.h"
12 #include "parse-util.h"
13 #include "path-util.h"
14 #include "stdio-util.h"
15 #include "string-util.h"
17 #define PASS_JUMP_OFF 4096
19 static int bpf_access_type(const char *acc
) {
27 r
|= BPF_DEVCG_ACC_READ
;
30 r
|= BPF_DEVCG_ACC_WRITE
;
33 r
|= BPF_DEVCG_ACC_MKNOD
;
42 static int bpf_prog_allow_list_device(
54 log_trace("%s: %c %d:%d %s", __func__
, type
, major
, minor
, acc
);
56 access
= bpf_access_type(acc
);
60 assert(IN_SET(type
, 'b', 'c'));
61 const int bpf_type
= type
== 'c' ? BPF_DEVCG_DEV_CHAR
: BPF_DEVCG_DEV_BLOCK
;
63 const struct bpf_insn insn
[] = {
64 BPF_MOV32_REG(BPF_REG_1
, BPF_REG_3
),
65 BPF_ALU32_IMM(BPF_AND
, BPF_REG_1
, access
),
66 BPF_JMP_REG(BPF_JNE
, BPF_REG_1
, BPF_REG_3
, 4), /* compare access type */
68 BPF_JMP_IMM(BPF_JNE
, BPF_REG_2
, bpf_type
, 3), /* compare device type */
69 BPF_JMP_IMM(BPF_JNE
, BPF_REG_4
, major
, 2), /* compare major */
70 BPF_JMP_IMM(BPF_JNE
, BPF_REG_5
, minor
, 1), /* compare minor */
71 BPF_JMP_A(PASS_JUMP_OFF
), /* jump to PASS */
74 if (FLAGS_SET(access
, BPF_DEVCG_ACC_READ
| BPF_DEVCG_ACC_WRITE
| BPF_DEVCG_ACC_MKNOD
))
75 r
= bpf_program_add_instructions(prog
, insn
+ 3, ELEMENTSOF(insn
) - 3);
77 r
= bpf_program_add_instructions(prog
, insn
, ELEMENTSOF(insn
));
79 log_error_errno(r
, "Extending device control BPF program failed: %m");
84 static int bpf_prog_allow_list_major(
95 log_trace("%s: %c %d:* %s", __func__
, type
, major
, acc
);
97 access
= bpf_access_type(acc
);
101 assert(IN_SET(type
, 'b', 'c'));
102 const int bpf_type
= type
== 'c' ? BPF_DEVCG_DEV_CHAR
: BPF_DEVCG_DEV_BLOCK
;
104 const struct bpf_insn insn
[] = {
105 BPF_MOV32_REG(BPF_REG_1
, BPF_REG_3
),
106 BPF_ALU32_IMM(BPF_AND
, BPF_REG_1
, access
),
107 BPF_JMP_REG(BPF_JNE
, BPF_REG_1
, BPF_REG_3
, 3), /* compare access type */
109 BPF_JMP_IMM(BPF_JNE
, BPF_REG_2
, bpf_type
, 2), /* compare device type */
110 BPF_JMP_IMM(BPF_JNE
, BPF_REG_4
, major
, 1), /* compare major */
111 BPF_JMP_A(PASS_JUMP_OFF
), /* jump to PASS */
114 if (FLAGS_SET(access
, BPF_DEVCG_ACC_READ
| BPF_DEVCG_ACC_WRITE
| BPF_DEVCG_ACC_MKNOD
))
115 r
= bpf_program_add_instructions(prog
, insn
+ 3, ELEMENTSOF(insn
) - 3);
117 r
= bpf_program_add_instructions(prog
, insn
, ELEMENTSOF(insn
));
119 log_error_errno(r
, "Extending device control BPF program failed: %m");
124 static int bpf_prog_allow_list_class(
134 log_trace("%s: %c *:* %s", __func__
, type
, acc
);
136 access
= bpf_access_type(acc
);
140 assert(IN_SET(type
, 'b', 'c'));
141 const int bpf_type
= type
== 'c' ? BPF_DEVCG_DEV_CHAR
: BPF_DEVCG_DEV_BLOCK
;
143 const struct bpf_insn insn
[] = {
144 BPF_MOV32_REG(BPF_REG_1
, BPF_REG_3
),
145 BPF_ALU32_IMM(BPF_AND
, BPF_REG_1
, access
),
146 BPF_JMP_REG(BPF_JNE
, BPF_REG_1
, BPF_REG_3
, 2), /* compare access type */
148 BPF_JMP_IMM(BPF_JNE
, BPF_REG_2
, bpf_type
, 1), /* compare device type */
149 BPF_JMP_A(PASS_JUMP_OFF
), /* jump to PASS */
152 if (FLAGS_SET(access
, BPF_DEVCG_ACC_READ
| BPF_DEVCG_ACC_WRITE
| BPF_DEVCG_ACC_MKNOD
))
153 r
= bpf_program_add_instructions(prog
, insn
+ 3, ELEMENTSOF(insn
) - 3);
155 r
= bpf_program_add_instructions(prog
, insn
, ELEMENTSOF(insn
));
157 log_error_errno(r
, "Extending device control BPF program failed: %m");
162 int bpf_devices_cgroup_init(
164 CGroupDevicePolicy policy
,
167 const struct bpf_insn pre_insn
[] = {
168 /* load device type to r2 */
169 BPF_LDX_MEM(BPF_W
, BPF_REG_2
, BPF_REG_1
,
170 offsetof(struct bpf_cgroup_dev_ctx
, access_type
)),
171 BPF_ALU32_IMM(BPF_AND
, BPF_REG_2
, 0xFFFF),
173 /* load access type to r3 */
174 BPF_LDX_MEM(BPF_W
, BPF_REG_3
, BPF_REG_1
,
175 offsetof(struct bpf_cgroup_dev_ctx
, access_type
)),
176 BPF_ALU32_IMM(BPF_RSH
, BPF_REG_3
, 16),
178 /* load major number to r4 */
179 BPF_LDX_MEM(BPF_W
, BPF_REG_4
, BPF_REG_1
,
180 offsetof(struct bpf_cgroup_dev_ctx
, major
)),
182 /* load minor number to r5 */
183 BPF_LDX_MEM(BPF_W
, BPF_REG_5
, BPF_REG_1
,
184 offsetof(struct bpf_cgroup_dev_ctx
, minor
)),
187 _cleanup_(bpf_program_freep
) BPFProgram
*prog
= NULL
;
192 if (policy
== CGROUP_DEVICE_POLICY_AUTO
&& !allow_list
)
195 r
= bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE
, "sd_devices", &prog
);
197 return log_error_errno(r
, "Loading device control BPF program failed: %m");
199 if (policy
== CGROUP_DEVICE_POLICY_CLOSED
|| allow_list
) {
200 r
= bpf_program_add_instructions(prog
, pre_insn
, ELEMENTSOF(pre_insn
));
202 return log_error_errno(r
, "Extending device control BPF program failed: %m");
205 *ret
= TAKE_PTR(prog
);
210 int bpf_devices_apply_policy(
212 CGroupDevicePolicy policy
,
214 const char *cgroup_path
,
215 BPFProgram
**prog_installed
) {
217 _cleanup_free_
char *controller_path
= NULL
;
220 /* This will assign *prog_installed if everything goes well. */
226 const bool deny_everything
= policy
== CGROUP_DEVICE_POLICY_STRICT
&& !allow_list
;
228 const struct bpf_insn post_insn
[] = {
230 BPF_MOV64_IMM(BPF_REG_0
, 0),
234 const struct bpf_insn exit_insn
[] = {
235 /* finally return DENY if deny_everything else ALLOW */
236 BPF_MOV64_IMM(BPF_REG_0
, deny_everything
? 0 : 1),
240 if (!deny_everything
) {
241 r
= bpf_program_add_instructions(*prog
, post_insn
, ELEMENTSOF(post_insn
));
243 return log_error_errno(r
, "Extending device control BPF program failed: %m");
245 /* Fixup PASS_JUMP_OFF jump offsets. */
246 for (size_t off
= 0; off
< (*prog
)->n_instructions
; off
++) {
247 struct bpf_insn
*ins
= &((*prog
)->instructions
[off
]);
249 if (ins
->code
== (BPF_JMP
| BPF_JA
) && ins
->off
== PASS_JUMP_OFF
)
250 ins
->off
= (*prog
)->n_instructions
- off
- 1;
254 r
= bpf_program_add_instructions(*prog
, exit_insn
, ELEMENTSOF(exit_insn
));
256 return log_error_errno(r
, "Extending device control BPF program failed: %m");
258 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, cgroup_path
, NULL
, &controller_path
);
260 return log_error_errno(r
, "Failed to determine cgroup path: %m");
262 r
= bpf_program_cgroup_attach(*prog
, BPF_CGROUP_DEVICE
, controller_path
, BPF_F_ALLOW_MULTI
);
264 return log_error_errno(r
, "Attaching device control BPF program to cgroup %s failed: %m",
265 empty_to_root(cgroup_path
));
268 /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program. */
269 if (prog_installed
) {
270 bpf_program_free(*prog_installed
);
271 *prog_installed
= TAKE_PTR(*prog
);
276 int bpf_devices_supported(void) {
277 const struct bpf_insn trivial
[] = {
278 BPF_MOV64_IMM(BPF_REG_0
, 1),
282 _cleanup_(bpf_program_freep
) BPFProgram
*program
= NULL
;
283 static int supported
= -1;
286 /* Checks whether BPF device controller is supported. For this, we check five things:
288 * a) whether we are privileged
289 * b) whether the unified hierarchy is being used
290 * c) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_DEVICE programs, which we require
296 if (geteuid() != 0) {
297 log_debug("Not enough privileges, BPF device control is not supported.");
298 return supported
= 0;
301 r
= cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER
);
303 return log_error_errno(r
, "Can't determine whether the unified hierarchy is used: %m");
305 log_debug("Not running with unified cgroups, BPF device control is not supported.");
306 return supported
= 0;
309 r
= bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE
, "sd_devices", &program
);
311 log_debug_errno(r
, "Can't allocate CGROUP DEVICE BPF program, BPF device control is not supported: %m");
312 return supported
= 0;
315 r
= bpf_program_add_instructions(program
, trivial
, ELEMENTSOF(trivial
));
317 log_debug_errno(r
, "Can't add trivial instructions to CGROUP DEVICE BPF program, BPF device control is not supported: %m");
318 return supported
= 0;
321 r
= bpf_program_load_kernel(program
, NULL
, 0);
323 log_debug_errno(r
, "Can't load kernel CGROUP DEVICE BPF program, BPF device control is not supported: %m");
324 return supported
= 0;
327 return supported
= 1;
330 static int allow_list_device_pattern(
338 assert(IN_SET(type
, 'b', 'c'));
340 if (cg_all_unified() > 0) {
345 return bpf_prog_allow_list_device(prog
, type
, *maj
, *min
, acc
);
347 return bpf_prog_allow_list_major(prog
, type
, *maj
, acc
);
349 return bpf_prog_allow_list_class(prog
, type
, acc
);
352 char buf
[2+DECIMAL_STR_MAX(unsigned)*2+2+4];
356 xsprintf(buf
, "%c %u:%u %s", type
, *maj
, *min
, acc
);
358 xsprintf(buf
, "%c %u:* %s", type
, *maj
, acc
);
360 xsprintf(buf
, "%c *:* %s", type
, acc
);
362 /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore
365 r
= cg_set_attribute("devices", path
, "devices.allow", buf
);
367 log_full_errno(IN_SET(r
, -ENOENT
, -EROFS
, -EINVAL
, -EACCES
, -EPERM
) ? LOG_DEBUG
: LOG_WARNING
,
368 r
, "Failed to set devices.allow on %s: %m", path
);
374 int bpf_devices_allow_list_device(
386 assert(strlen(acc
) <= 3);
388 log_trace("%s: %s %s", __func__
, node
, acc
);
390 /* Some special handling for /dev/block/%u:%u, /dev/char/%u:%u, /run/systemd/inaccessible/chr and
391 * /run/systemd/inaccessible/blk paths. Instead of stat()ing these we parse out the major/minor directly. This
392 * means clients can use these path without the device node actually around */
393 r
= device_path_parse_major_minor(node
, &mode
, &rdev
);
396 return log_warning_errno(r
, "Couldn't parse major/minor from device path '%s': %m", node
);
399 if (stat(node
, &st
) < 0)
400 return log_warning_errno(errno
, "Couldn't stat device %s: %m", node
);
402 if (!S_ISCHR(st
.st_mode
) && !S_ISBLK(st
.st_mode
))
403 return log_warning_errno(SYNTHETIC_ERRNO(ENODEV
), "%s is not a device.", node
);
406 rdev
= (dev_t
) st
.st_rdev
;
409 unsigned maj
= major(rdev
), min
= minor(rdev
);
410 return allow_list_device_pattern(prog
, path
, S_ISCHR(mode
) ? 'c' : 'b', &maj
, &min
, acc
);
413 int bpf_devices_allow_list_major(
425 assert(IN_SET(type
, 'b', 'c'));
427 if (streq(name
, "*"))
428 /* If the name is a wildcard, then apply this list to all devices of this type */
429 return allow_list_device_pattern(prog
, path
, type
, NULL
, NULL
, acc
);
431 if (safe_atou(name
, &maj
) >= 0 && DEVICE_MAJOR_VALID(maj
))
432 /* The name is numeric and suitable as major. In that case, let's take its major, and create
433 * the entry directly. */
434 return allow_list_device_pattern(prog
, path
, type
, &maj
, NULL
, acc
);
436 _cleanup_fclose_
FILE *f
= NULL
;
437 bool good
= false, any
= false;
439 f
= fopen("/proc/devices", "re");
441 return log_warning_errno(errno
, "Cannot open /proc/devices to resolve %s: %m", name
);
444 _cleanup_free_
char *line
= NULL
;
447 r
= read_line(f
, LONG_LINE_MAX
, &line
);
449 return log_warning_errno(r
, "Failed to read /proc/devices: %m");
453 if (type
== 'c' && streq(line
, "Character devices:")) {
458 if (type
== 'b' && streq(line
, "Block devices:")) {
473 w
= strpbrk(p
, WHITESPACE
);
478 r
= safe_atou(p
, &maj
);
485 w
+= strspn(w
, WHITESPACE
);
487 if (fnmatch(name
, w
, 0) != 0)
491 (void) allow_list_device_pattern(prog
, path
, type
, &maj
, NULL
, acc
);
495 return log_debug_errno(SYNTHETIC_ERRNO(ENOENT
),
496 "Device allow list pattern \"%s\" did not match anything.", name
);
501 int bpf_devices_allow_list_static(
505 static const char auto_devices
[] =
506 "/dev/null\0" "rwm\0"
507 "/dev/zero\0" "rwm\0"
508 "/dev/full\0" "rwm\0"
509 "/dev/random\0" "rwm\0"
510 "/dev/urandom\0" "rwm\0"
512 "/dev/ptmx\0" "rwm\0"
513 /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
514 "/run/systemd/inaccessible/chr\0" "rwm\0"
515 "/run/systemd/inaccessible/blk\0" "rwm\0";
518 const char *node
, *acc
;
519 NULSTR_FOREACH_PAIR(node
, acc
, auto_devices
) {
520 k
= bpf_devices_allow_list_device(prog
, path
, node
, acc
);
525 /* PTS (/dev/pts) devices may not be duplicated, but accessed */
526 k
= bpf_devices_allow_list_major(prog
, path
, "pts", 'c', "rw");