]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/bpf-devices.c
3af9e78a1e32f5e9f2987d115da3002cb0b77d92
[thirdparty/systemd.git] / src / core / bpf-devices.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <fnmatch.h>
4 #include <linux/bpf_insn.h>
5
6 #include "bpf-devices.h"
7 #include "bpf-program.h"
8 #include "devnum-util.h"
9 #include "fd-util.h"
10 #include "fileio.h"
11 #include "nulstr-util.h"
12 #include "parse-util.h"
13 #include "path-util.h"
14 #include "stdio-util.h"
15 #include "string-util.h"
16
17 #define PASS_JUMP_OFF 4096
18
19 static int bpf_access_type(const char *acc) {
20 int r = 0;
21
22 assert(acc);
23
24 for (; *acc; acc++)
25 switch (*acc) {
26 case 'r':
27 r |= BPF_DEVCG_ACC_READ;
28 break;
29 case 'w':
30 r |= BPF_DEVCG_ACC_WRITE;
31 break;
32 case 'm':
33 r |= BPF_DEVCG_ACC_MKNOD;
34 break;
35 default:
36 return -EINVAL;
37 }
38
39 return r;
40 }
41
42 static int bpf_prog_allow_list_device(
43 BPFProgram *prog,
44 char type,
45 int major,
46 int minor,
47 const char *acc) {
48
49 int r, access;
50
51 assert(prog);
52 assert(acc);
53
54 log_trace("%s: %c %d:%d %s", __func__, type, major, minor, acc);
55
56 access = bpf_access_type(acc);
57 if (access <= 0)
58 return -EINVAL;
59
60 assert(IN_SET(type, 'b', 'c'));
61 const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
62
63 const struct bpf_insn insn[] = {
64 BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
65 BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
66 BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 4), /* compare access type */
67
68 BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 3), /* compare device type */
69 BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 2), /* compare major */
70 BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, 1), /* compare minor */
71 BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
72 };
73
74 if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
75 r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
76 else
77 r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
78 if (r < 0)
79 log_error_errno(r, "Extending device control BPF program failed: %m");
80
81 return r;
82 }
83
84 static int bpf_prog_allow_list_major(
85 BPFProgram *prog,
86 char type,
87 int major,
88 const char *acc) {
89
90 int r, access;
91
92 assert(prog);
93 assert(acc);
94
95 log_trace("%s: %c %d:* %s", __func__, type, major, acc);
96
97 access = bpf_access_type(acc);
98 if (access <= 0)
99 return -EINVAL;
100
101 assert(IN_SET(type, 'b', 'c'));
102 const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
103
104 const struct bpf_insn insn[] = {
105 BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
106 BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
107 BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 3), /* compare access type */
108
109 BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 2), /* compare device type */
110 BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 1), /* compare major */
111 BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
112 };
113
114 if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
115 r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
116 else
117 r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
118 if (r < 0)
119 log_error_errno(r, "Extending device control BPF program failed: %m");
120
121 return r;
122 }
123
124 static int bpf_prog_allow_list_class(
125 BPFProgram *prog,
126 char type,
127 const char *acc) {
128
129 int r, access;
130
131 assert(prog);
132 assert(acc);
133
134 log_trace("%s: %c *:* %s", __func__, type, acc);
135
136 access = bpf_access_type(acc);
137 if (access <= 0)
138 return -EINVAL;
139
140 assert(IN_SET(type, 'b', 'c'));
141 const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
142
143 const struct bpf_insn insn[] = {
144 BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
145 BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
146 BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 2), /* compare access type */
147
148 BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 1), /* compare device type */
149 BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
150 };
151
152 if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
153 r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
154 else
155 r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
156 if (r < 0)
157 log_error_errno(r, "Extending device control BPF program failed: %m");
158
159 return r;
160 }
161
162 int bpf_devices_cgroup_init(
163 BPFProgram **ret,
164 CGroupDevicePolicy policy,
165 bool allow_list) {
166
167 const struct bpf_insn pre_insn[] = {
168 /* load device type to r2 */
169 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
170 offsetof(struct bpf_cgroup_dev_ctx, access_type)),
171 BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0xFFFF),
172
173 /* load access type to r3 */
174 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
175 offsetof(struct bpf_cgroup_dev_ctx, access_type)),
176 BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
177
178 /* load major number to r4 */
179 BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
180 offsetof(struct bpf_cgroup_dev_ctx, major)),
181
182 /* load minor number to r5 */
183 BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
184 offsetof(struct bpf_cgroup_dev_ctx, minor)),
185 };
186
187 _cleanup_(bpf_program_freep) BPFProgram *prog = NULL;
188 int r;
189
190 assert(ret);
191
192 if (policy == CGROUP_DEVICE_POLICY_AUTO && !allow_list)
193 return 0;
194
195 r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, "sd_devices", &prog);
196 if (r < 0)
197 return log_error_errno(r, "Loading device control BPF program failed: %m");
198
199 if (policy == CGROUP_DEVICE_POLICY_CLOSED || allow_list) {
200 r = bpf_program_add_instructions(prog, pre_insn, ELEMENTSOF(pre_insn));
201 if (r < 0)
202 return log_error_errno(r, "Extending device control BPF program failed: %m");
203 }
204
205 *ret = TAKE_PTR(prog);
206
207 return 0;
208 }
209
210 int bpf_devices_apply_policy(
211 BPFProgram **prog,
212 CGroupDevicePolicy policy,
213 bool allow_list,
214 const char *cgroup_path,
215 BPFProgram **prog_installed) {
216
217 _cleanup_free_ char *controller_path = NULL;
218 int r;
219
220 /* This will assign *prog_installed if everything goes well. */
221
222 assert(prog);
223 if (!*prog)
224 goto finish;
225
226 const bool deny_everything = policy == CGROUP_DEVICE_POLICY_STRICT && !allow_list;
227
228 const struct bpf_insn post_insn[] = {
229 /* return DENY */
230 BPF_MOV64_IMM(BPF_REG_0, 0),
231 BPF_JMP_A(1),
232 };
233
234 const struct bpf_insn exit_insn[] = {
235 /* finally return DENY if deny_everything else ALLOW */
236 BPF_MOV64_IMM(BPF_REG_0, deny_everything ? 0 : 1),
237 BPF_EXIT_INSN()
238 };
239
240 if (!deny_everything) {
241 r = bpf_program_add_instructions(*prog, post_insn, ELEMENTSOF(post_insn));
242 if (r < 0)
243 return log_error_errno(r, "Extending device control BPF program failed: %m");
244
245 /* Fixup PASS_JUMP_OFF jump offsets. */
246 for (size_t off = 0; off < (*prog)->n_instructions; off++) {
247 struct bpf_insn *ins = &((*prog)->instructions[off]);
248
249 if (ins->code == (BPF_JMP | BPF_JA) && ins->off == PASS_JUMP_OFF)
250 ins->off = (*prog)->n_instructions - off - 1;
251 }
252 }
253
254 r = bpf_program_add_instructions(*prog, exit_insn, ELEMENTSOF(exit_insn));
255 if (r < 0)
256 return log_error_errno(r, "Extending device control BPF program failed: %m");
257
258 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, NULL, &controller_path);
259 if (r < 0)
260 return log_error_errno(r, "Failed to determine cgroup path: %m");
261
262 r = bpf_program_cgroup_attach(*prog, BPF_CGROUP_DEVICE, controller_path, BPF_F_ALLOW_MULTI);
263 if (r < 0)
264 return log_error_errno(r, "Attaching device control BPF program to cgroup %s failed: %m",
265 empty_to_root(cgroup_path));
266
267 finish:
268 /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program. */
269 if (prog_installed) {
270 bpf_program_free(*prog_installed);
271 *prog_installed = TAKE_PTR(*prog);
272 }
273 return 0;
274 }
275
276 int bpf_devices_supported(void) {
277 const struct bpf_insn trivial[] = {
278 BPF_MOV64_IMM(BPF_REG_0, 1),
279 BPF_EXIT_INSN()
280 };
281
282 _cleanup_(bpf_program_freep) BPFProgram *program = NULL;
283 static int supported = -1;
284 int r;
285
286 /* Checks whether BPF device controller is supported. For this, we check five things:
287 *
288 * a) whether we are privileged
289 * b) whether the unified hierarchy is being used
290 * c) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_DEVICE programs, which we require
291 */
292
293 if (supported >= 0)
294 return supported;
295
296 if (geteuid() != 0) {
297 log_debug("Not enough privileges, BPF device control is not supported.");
298 return supported = 0;
299 }
300
301 r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
302 if (r < 0)
303 return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
304 if (r == 0) {
305 log_debug("Not running with unified cgroups, BPF device control is not supported.");
306 return supported = 0;
307 }
308
309 r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, "sd_devices", &program);
310 if (r < 0) {
311 log_debug_errno(r, "Can't allocate CGROUP DEVICE BPF program, BPF device control is not supported: %m");
312 return supported = 0;
313 }
314
315 r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial));
316 if (r < 0) {
317 log_debug_errno(r, "Can't add trivial instructions to CGROUP DEVICE BPF program, BPF device control is not supported: %m");
318 return supported = 0;
319 }
320
321 r = bpf_program_load_kernel(program, NULL, 0);
322 if (r < 0) {
323 log_debug_errno(r, "Can't load kernel CGROUP DEVICE BPF program, BPF device control is not supported: %m");
324 return supported = 0;
325 }
326
327 return supported = 1;
328 }
329
330 static int allow_list_device_pattern(
331 BPFProgram *prog,
332 const char *path,
333 char type,
334 const unsigned *maj,
335 const unsigned *min,
336 const char *acc) {
337
338 assert(IN_SET(type, 'b', 'c'));
339
340 if (cg_all_unified() > 0) {
341 if (!prog)
342 return 0;
343
344 if (maj && min)
345 return bpf_prog_allow_list_device(prog, type, *maj, *min, acc);
346 else if (maj)
347 return bpf_prog_allow_list_major(prog, type, *maj, acc);
348 else
349 return bpf_prog_allow_list_class(prog, type, acc);
350
351 } else {
352 char buf[2+DECIMAL_STR_MAX(unsigned)*2+2+4];
353 int r;
354
355 if (maj && min)
356 xsprintf(buf, "%c %u:%u %s", type, *maj, *min, acc);
357 else if (maj)
358 xsprintf(buf, "%c %u:* %s", type, *maj, acc);
359 else
360 xsprintf(buf, "%c *:* %s", type, acc);
361
362 /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore
363 * EINVAL here. */
364
365 r = cg_set_attribute("devices", path, "devices.allow", buf);
366 if (r < 0)
367 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING,
368 r, "Failed to set devices.allow on %s: %m", path);
369
370 return r;
371 }
372 }
373
374 int bpf_devices_allow_list_device(
375 BPFProgram *prog,
376 const char *path,
377 const char *node,
378 const char *acc) {
379
380 mode_t mode;
381 dev_t rdev;
382 int r;
383
384 assert(path);
385 assert(acc);
386 assert(strlen(acc) <= 3);
387
388 log_trace("%s: %s %s", __func__, node, acc);
389
390 /* Some special handling for /dev/block/%u:%u, /dev/char/%u:%u, /run/systemd/inaccessible/chr and
391 * /run/systemd/inaccessible/blk paths. Instead of stat()ing these we parse out the major/minor directly. This
392 * means clients can use these path without the device node actually around */
393 r = device_path_parse_major_minor(node, &mode, &rdev);
394 if (r < 0) {
395 if (r != -ENODEV)
396 return log_warning_errno(r, "Couldn't parse major/minor from device path '%s': %m", node);
397
398 struct stat st;
399 if (stat(node, &st) < 0)
400 return log_warning_errno(errno, "Couldn't stat device %s: %m", node);
401
402 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode))
403 return log_warning_errno(SYNTHETIC_ERRNO(ENODEV), "%s is not a device.", node);
404
405 mode = st.st_mode;
406 rdev = (dev_t) st.st_rdev;
407 }
408
409 unsigned maj = major(rdev), min = minor(rdev);
410 return allow_list_device_pattern(prog, path, S_ISCHR(mode) ? 'c' : 'b', &maj, &min, acc);
411 }
412
413 int bpf_devices_allow_list_major(
414 BPFProgram *prog,
415 const char *path,
416 const char *name,
417 char type,
418 const char *acc) {
419
420 unsigned maj;
421 int r;
422
423 assert(path);
424 assert(acc);
425 assert(IN_SET(type, 'b', 'c'));
426
427 if (streq(name, "*"))
428 /* If the name is a wildcard, then apply this list to all devices of this type */
429 return allow_list_device_pattern(prog, path, type, NULL, NULL, acc);
430
431 if (safe_atou(name, &maj) >= 0 && DEVICE_MAJOR_VALID(maj))
432 /* The name is numeric and suitable as major. In that case, let's take its major, and create
433 * the entry directly. */
434 return allow_list_device_pattern(prog, path, type, &maj, NULL, acc);
435
436 _cleanup_fclose_ FILE *f = NULL;
437 bool good = false, any = false;
438
439 f = fopen("/proc/devices", "re");
440 if (!f)
441 return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s: %m", name);
442
443 for (;;) {
444 _cleanup_free_ char *line = NULL;
445 char *w, *p;
446
447 r = read_line(f, LONG_LINE_MAX, &line);
448 if (r < 0)
449 return log_warning_errno(r, "Failed to read /proc/devices: %m");
450 if (r == 0)
451 break;
452
453 if (type == 'c' && streq(line, "Character devices:")) {
454 good = true;
455 continue;
456 }
457
458 if (type == 'b' && streq(line, "Block devices:")) {
459 good = true;
460 continue;
461 }
462
463 if (isempty(line)) {
464 good = false;
465 continue;
466 }
467
468 if (!good)
469 continue;
470
471 p = strstrip(line);
472
473 w = strpbrk(p, WHITESPACE);
474 if (!w)
475 continue;
476 *w = 0;
477
478 r = safe_atou(p, &maj);
479 if (r < 0)
480 continue;
481 if (maj <= 0)
482 continue;
483
484 w++;
485 w += strspn(w, WHITESPACE);
486
487 if (fnmatch(name, w, 0) != 0)
488 continue;
489
490 any = true;
491 (void) allow_list_device_pattern(prog, path, type, &maj, NULL, acc);
492 }
493
494 if (!any)
495 return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
496 "Device allow list pattern \"%s\" did not match anything.", name);
497
498 return 0;
499 }
500
501 int bpf_devices_allow_list_static(
502 BPFProgram *prog,
503 const char *path) {
504
505 static const char auto_devices[] =
506 "/dev/null\0" "rwm\0"
507 "/dev/zero\0" "rwm\0"
508 "/dev/full\0" "rwm\0"
509 "/dev/random\0" "rwm\0"
510 "/dev/urandom\0" "rwm\0"
511 "/dev/tty\0" "rwm\0"
512 "/dev/ptmx\0" "rwm\0"
513 /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
514 "/run/systemd/inaccessible/chr\0" "rwm\0"
515 "/run/systemd/inaccessible/blk\0" "rwm\0";
516 int r = 0, k;
517
518 const char *node, *acc;
519 NULSTR_FOREACH_PAIR(node, acc, auto_devices) {
520 k = bpf_devices_allow_list_device(prog, path, node, acc);
521 if (r >= 0 && k < 0)
522 r = k;
523 }
524
525 /* PTS (/dev/pts) devices may not be duplicated, but accessed */
526 k = bpf_devices_allow_list_major(prog, path, "pts", 'c', "rw");
527 if (r >= 0 && k < 0)
528 r = k;
529
530 return r;
531 }