1 /* SPDX-License-Identifier: LGPL-2.1+ */
4 #include <linux/bpf_insn.h>
6 #include "bpf-devices.h"
7 #include "bpf-program.h"
10 #include "nulstr-util.h"
11 #include "parse-util.h"
12 #include "stat-util.h"
13 #include "stdio-util.h"
14 #include "string-util.h"
16 #define PASS_JUMP_OFF 4096
18 static int bpf_access_type(const char *acc
) {
26 r
|= BPF_DEVCG_ACC_READ
;
29 r
|= BPF_DEVCG_ACC_WRITE
;
32 r
|= BPF_DEVCG_ACC_MKNOD
;
41 static int bpf_prog_allow_list_device(
53 log_trace("%s: %c %d:%d %s", __func__
, type
, major
, minor
, acc
);
55 access
= bpf_access_type(acc
);
59 assert(IN_SET(type
, 'b', 'c'));
60 const int bpf_type
= type
== 'c' ? BPF_DEVCG_DEV_CHAR
: BPF_DEVCG_DEV_BLOCK
;
62 const struct bpf_insn insn
[] = {
63 BPF_MOV32_REG(BPF_REG_1
, BPF_REG_3
),
64 BPF_ALU32_IMM(BPF_AND
, BPF_REG_1
, access
),
65 BPF_JMP_REG(BPF_JNE
, BPF_REG_1
, BPF_REG_3
, 4), /* compare access type */
67 BPF_JMP_IMM(BPF_JNE
, BPF_REG_2
, bpf_type
, 3), /* compare device type */
68 BPF_JMP_IMM(BPF_JNE
, BPF_REG_4
, major
, 2), /* compare major */
69 BPF_JMP_IMM(BPF_JNE
, BPF_REG_5
, minor
, 1), /* compare minor */
70 BPF_JMP_A(PASS_JUMP_OFF
), /* jump to PASS */
73 if (FLAGS_SET(access
, BPF_DEVCG_ACC_READ
| BPF_DEVCG_ACC_WRITE
| BPF_DEVCG_ACC_MKNOD
))
74 r
= bpf_program_add_instructions(prog
, insn
+ 3, ELEMENTSOF(insn
) - 3);
76 r
= bpf_program_add_instructions(prog
, insn
, ELEMENTSOF(insn
));
78 log_error_errno(r
, "Extending device control BPF program failed: %m");
83 static int bpf_prog_allow_list_major(
94 log_trace("%s: %c %d:* %s", __func__
, type
, major
, acc
);
96 access
= bpf_access_type(acc
);
100 assert(IN_SET(type
, 'b', 'c'));
101 const int bpf_type
= type
== 'c' ? BPF_DEVCG_DEV_CHAR
: BPF_DEVCG_DEV_BLOCK
;
103 const struct bpf_insn insn
[] = {
104 BPF_MOV32_REG(BPF_REG_1
, BPF_REG_3
),
105 BPF_ALU32_IMM(BPF_AND
, BPF_REG_1
, access
),
106 BPF_JMP_REG(BPF_JNE
, BPF_REG_1
, BPF_REG_3
, 3), /* compare access type */
108 BPF_JMP_IMM(BPF_JNE
, BPF_REG_2
, bpf_type
, 2), /* compare device type */
109 BPF_JMP_IMM(BPF_JNE
, BPF_REG_4
, major
, 1), /* compare major */
110 BPF_JMP_A(PASS_JUMP_OFF
), /* jump to PASS */
113 if (FLAGS_SET(access
, BPF_DEVCG_ACC_READ
| BPF_DEVCG_ACC_WRITE
| BPF_DEVCG_ACC_MKNOD
))
114 r
= bpf_program_add_instructions(prog
, insn
+ 3, ELEMENTSOF(insn
) - 3);
116 r
= bpf_program_add_instructions(prog
, insn
, ELEMENTSOF(insn
));
118 log_error_errno(r
, "Extending device control BPF program failed: %m");
123 static int bpf_prog_allow_list_class(
133 log_trace("%s: %c *:* %s", __func__
, type
, acc
);
135 access
= bpf_access_type(acc
);
139 assert(IN_SET(type
, 'b', 'c'));
140 const int bpf_type
= type
== 'c' ? BPF_DEVCG_DEV_CHAR
: BPF_DEVCG_DEV_BLOCK
;
142 const struct bpf_insn insn
[] = {
143 BPF_MOV32_REG(BPF_REG_1
, BPF_REG_3
),
144 BPF_ALU32_IMM(BPF_AND
, BPF_REG_1
, access
),
145 BPF_JMP_REG(BPF_JNE
, BPF_REG_1
, BPF_REG_3
, 2), /* compare access type */
147 BPF_JMP_IMM(BPF_JNE
, BPF_REG_2
, bpf_type
, 1), /* compare device type */
148 BPF_JMP_A(PASS_JUMP_OFF
), /* jump to PASS */
151 if (FLAGS_SET(access
, BPF_DEVCG_ACC_READ
| BPF_DEVCG_ACC_WRITE
| BPF_DEVCG_ACC_MKNOD
))
152 r
= bpf_program_add_instructions(prog
, insn
+ 3, ELEMENTSOF(insn
) - 3);
154 r
= bpf_program_add_instructions(prog
, insn
, ELEMENTSOF(insn
));
156 log_error_errno(r
, "Extending device control BPF program failed: %m");
161 int bpf_devices_cgroup_init(
163 CGroupDevicePolicy policy
,
166 const struct bpf_insn pre_insn
[] = {
167 /* load device type to r2 */
168 BPF_LDX_MEM(BPF_W
, BPF_REG_2
, BPF_REG_1
,
169 offsetof(struct bpf_cgroup_dev_ctx
, access_type
)),
170 BPF_ALU32_IMM(BPF_AND
, BPF_REG_2
, 0xFFFF),
172 /* load access type to r3 */
173 BPF_LDX_MEM(BPF_W
, BPF_REG_3
, BPF_REG_1
,
174 offsetof(struct bpf_cgroup_dev_ctx
, access_type
)),
175 BPF_ALU32_IMM(BPF_RSH
, BPF_REG_3
, 16),
177 /* load major number to r4 */
178 BPF_LDX_MEM(BPF_W
, BPF_REG_4
, BPF_REG_1
,
179 offsetof(struct bpf_cgroup_dev_ctx
, major
)),
181 /* load minor number to r5 */
182 BPF_LDX_MEM(BPF_W
, BPF_REG_5
, BPF_REG_1
,
183 offsetof(struct bpf_cgroup_dev_ctx
, minor
)),
186 _cleanup_(bpf_program_unrefp
) BPFProgram
*prog
= NULL
;
191 if (policy
== CGROUP_DEVICE_POLICY_AUTO
&& !allow_list
)
194 r
= bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE
, &prog
);
196 return log_error_errno(r
, "Loading device control BPF program failed: %m");
198 if (policy
== CGROUP_DEVICE_POLICY_CLOSED
|| allow_list
) {
199 r
= bpf_program_add_instructions(prog
, pre_insn
, ELEMENTSOF(pre_insn
));
201 return log_error_errno(r
, "Extending device control BPF program failed: %m");
204 *ret
= TAKE_PTR(prog
);
209 int bpf_devices_apply_policy(
211 CGroupDevicePolicy policy
,
213 const char *cgroup_path
,
214 BPFProgram
**prog_installed
) {
216 _cleanup_free_
char *controller_path
= NULL
;
219 /* This will assign *keep_program if everything goes well. */
224 const bool deny_everything
= policy
== CGROUP_DEVICE_POLICY_STRICT
&& !allow_list
;
226 const struct bpf_insn post_insn
[] = {
228 BPF_MOV64_IMM(BPF_REG_0
, 0),
232 const struct bpf_insn exit_insn
[] = {
233 /* finally return DENY if deny_everything else ALLOW */
234 BPF_MOV64_IMM(BPF_REG_0
, deny_everything
? 0 : 1),
238 if (!deny_everything
) {
239 r
= bpf_program_add_instructions(prog
, post_insn
, ELEMENTSOF(post_insn
));
241 return log_error_errno(r
, "Extending device control BPF program failed: %m");
243 /* Fixup PASS_JUMP_OFF jump offsets. */
244 for (size_t off
= 0; off
< prog
->n_instructions
; off
++) {
245 struct bpf_insn
*ins
= &prog
->instructions
[off
];
247 if (ins
->code
== (BPF_JMP
| BPF_JA
) && ins
->off
== PASS_JUMP_OFF
)
248 ins
->off
= prog
->n_instructions
- off
- 1;
252 r
= bpf_program_add_instructions(prog
, exit_insn
, ELEMENTSOF(exit_insn
));
254 return log_error_errno(r
, "Extending device control BPF program failed: %m");
256 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, cgroup_path
, NULL
, &controller_path
);
258 return log_error_errno(r
, "Failed to determine cgroup path: %m");
260 r
= bpf_program_cgroup_attach(prog
, BPF_CGROUP_DEVICE
, controller_path
, BPF_F_ALLOW_MULTI
);
262 return log_error_errno(r
, "Attaching device control BPF program to cgroup %s failed: %m",
266 /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program. */
267 if (prog_installed
) {
268 bpf_program_unref(*prog_installed
);
269 *prog_installed
= bpf_program_ref(prog
);
274 int bpf_devices_supported(void) {
275 const struct bpf_insn trivial
[] = {
276 BPF_MOV64_IMM(BPF_REG_0
, 1),
280 _cleanup_(bpf_program_unrefp
) BPFProgram
*program
= NULL
;
281 static int supported
= -1;
284 /* Checks whether BPF device controller is supported. For this, we check five things:
286 * a) whether we are privileged
287 * b) whether the unified hierarchy is being used
288 * c) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_DEVICE programs, which we require
294 if (geteuid() != 0) {
295 log_debug("Not enough privileges, BPF device control is not supported.");
296 return supported
= 0;
299 r
= cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER
);
301 return log_error_errno(r
, "Can't determine whether the unified hierarchy is used: %m");
303 log_debug("Not running with unified cgroups, BPF device control is not supported.");
304 return supported
= 0;
307 r
= bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE
, &program
);
309 log_debug_errno(r
, "Can't allocate CGROUP DEVICE BPF program, BPF device control is not supported: %m");
310 return supported
= 0;
313 r
= bpf_program_add_instructions(program
, trivial
, ELEMENTSOF(trivial
));
315 log_debug_errno(r
, "Can't add trivial instructions to CGROUP DEVICE BPF program, BPF device control is not supported: %m");
316 return supported
= 0;
319 r
= bpf_program_load_kernel(program
, NULL
, 0);
321 log_debug_errno(r
, "Can't load kernel CGROUP DEVICE BPF program, BPF device control is not supported: %m");
322 return supported
= 0;
325 return supported
= 1;
328 static int allow_list_device_pattern(
336 assert(IN_SET(type
, 'b', 'c'));
338 if (cg_all_unified() > 0) {
343 return bpf_prog_allow_list_device(prog
, type
, *maj
, *min
, acc
);
345 return bpf_prog_allow_list_major(prog
, type
, *maj
, acc
);
347 return bpf_prog_allow_list_class(prog
, type
, acc
);
350 char buf
[2+DECIMAL_STR_MAX(unsigned)*2+2+4];
354 xsprintf(buf
, "%c %u:%u %s", type
, *maj
, *min
, acc
);
356 xsprintf(buf
, "%c %u:* %s", type
, *maj
, acc
);
358 xsprintf(buf
, "%c *:* %s", type
, acc
);
360 /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore
363 r
= cg_set_attribute("devices", path
, "devices.allow", buf
);
365 log_full_errno(IN_SET(r
, -ENOENT
, -EROFS
, -EINVAL
, -EACCES
, -EPERM
) ? LOG_DEBUG
: LOG_WARNING
,
366 r
, "Failed to set devices.allow on %s: %m", path
);
372 int bpf_devices_allow_list_device(
384 assert(strlen(acc
) <= 3);
386 log_trace("%s: %s %s", __func__
, node
, acc
);
388 /* Some special handling for /dev/block/%u:%u, /dev/char/%u:%u, /run/systemd/inaccessible/chr and
389 * /run/systemd/inaccessible/blk paths. Instead of stat()ing these we parse out the major/minor directly. This
390 * means clients can use these path without the device node actually around */
391 r
= device_path_parse_major_minor(node
, &mode
, &rdev
);
394 return log_warning_errno(r
, "Couldn't parse major/minor from device path '%s': %m", node
);
397 if (stat(node
, &st
) < 0)
398 return log_warning_errno(errno
, "Couldn't stat device %s: %m", node
);
400 if (!S_ISCHR(st
.st_mode
) && !S_ISBLK(st
.st_mode
))
401 return log_warning_errno(SYNTHETIC_ERRNO(ENODEV
), "%s is not a device.", node
);
404 rdev
= (dev_t
) st
.st_rdev
;
407 unsigned maj
= major(rdev
), min
= minor(rdev
);
408 return allow_list_device_pattern(prog
, path
, S_ISCHR(mode
) ? 'c' : 'b', &maj
, &min
, acc
);
411 int bpf_devices_allow_list_major(
423 assert(IN_SET(type
, 'b', 'c'));
425 if (streq(name
, "*"))
426 /* If the name is a wildcard, then apply this list to all devices of this type */
427 return allow_list_device_pattern(prog
, path
, type
, NULL
, NULL
, acc
);
429 if (safe_atou(name
, &maj
) >= 0 && DEVICE_MAJOR_VALID(maj
))
430 /* The name is numeric and suitable as major. In that case, let's take its major, and create
431 * the entry directly. */
432 return allow_list_device_pattern(prog
, path
, type
, &maj
, NULL
, acc
);
434 _cleanup_fclose_
FILE *f
= NULL
;
435 bool good
= false, any
= false;
437 f
= fopen("/proc/devices", "re");
439 return log_warning_errno(errno
, "Cannot open /proc/devices to resolve %s: %m", name
);
442 _cleanup_free_
char *line
= NULL
;
445 r
= read_line(f
, LONG_LINE_MAX
, &line
);
447 return log_warning_errno(r
, "Failed to read /proc/devices: %m");
451 if (type
== 'c' && streq(line
, "Character devices:")) {
456 if (type
== 'b' && streq(line
, "Block devices:")) {
471 w
= strpbrk(p
, WHITESPACE
);
476 r
= safe_atou(p
, &maj
);
483 w
+= strspn(w
, WHITESPACE
);
485 if (fnmatch(name
, w
, 0) != 0)
489 (void) allow_list_device_pattern(prog
, path
, type
, &maj
, NULL
, acc
);
493 return log_debug_errno(SYNTHETIC_ERRNO(ENOENT
),
494 "Device allow list pattern \"%s\" did not match anything.", name
);
499 int bpf_devices_allow_list_static(
503 static const char auto_devices
[] =
504 "/dev/null\0" "rwm\0"
505 "/dev/zero\0" "rwm\0"
506 "/dev/full\0" "rwm\0"
507 "/dev/random\0" "rwm\0"
508 "/dev/urandom\0" "rwm\0"
510 "/dev/ptmx\0" "rwm\0"
511 /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
512 "/run/systemd/inaccessible/chr\0" "rwm\0"
513 "/run/systemd/inaccessible/blk\0" "rwm\0";
516 const char *node
, *acc
;
517 NULSTR_FOREACH_PAIR(node
, acc
, auto_devices
) {
518 k
= bpf_devices_allow_list_device(prog
, path
, node
, acc
);
523 /* PTS (/dev/pts) devices may not be duplicated, but accessed */
524 k
= bpf_devices_allow_list_major(prog
, path
, "pts", 'c', "rw");