]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/bpf-devices.c
Merge pull request #22759 from msekletar/issue-18077-long-sysfs-paths-hashing
[thirdparty/systemd.git] / src / core / bpf-devices.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
124e05b3
ZJS
2
3#include <fnmatch.h>
01234e1f 4#include <linux/bpf_insn.h>
084c7007
RG
5
6#include "bpf-devices.h"
7#include "bpf-program.h"
7176f06c 8#include "devnum-util.h"
124e05b3
ZJS
9#include "fd-util.h"
10#include "fileio.h"
a9aac7d8 11#include "nulstr-util.h"
124e05b3 12#include "parse-util.h"
6178e2f8 13#include "path-util.h"
124e05b3
ZJS
14#include "stdio-util.h"
15#include "string-util.h"
084c7007
RG
16
17#define PASS_JUMP_OFF 4096
18
19static int bpf_access_type(const char *acc) {
20 int r = 0;
21
22 assert(acc);
23
24 for (; *acc; acc++)
79893116 25 switch (*acc) {
084c7007
RG
26 case 'r':
27 r |= BPF_DEVCG_ACC_READ;
28 break;
29 case 'w':
30 r |= BPF_DEVCG_ACC_WRITE;
31 break;
32 case 'm':
33 r |= BPF_DEVCG_ACC_MKNOD;
34 break;
35 default:
36 return -EINVAL;
37 }
38
39 return r;
40}
41
6b000af4 42static int bpf_prog_allow_list_device(
07141aa0
LP
43 BPFProgram *prog,
44 char type,
45 int major,
46 int minor,
47 const char *acc) {
48
084c7007
RG
49 int r, access;
50
51 assert(prog);
52 assert(acc);
53
786cce00
ZJS
54 log_trace("%s: %c %d:%d %s", __func__, type, major, minor, acc);
55
084c7007
RG
56 access = bpf_access_type(acc);
57 if (access <= 0)
58 return -EINVAL;
59
8ad08622
ZJS
60 assert(IN_SET(type, 'b', 'c'));
61 const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
62
2899aac4 63 const struct bpf_insn insn[] = {
00486578 64 BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
2899aac4 65 BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
00486578
ZJS
66 BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 4), /* compare access type */
67
68 BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 3), /* compare device type */
69 BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 2), /* compare major */
70 BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, 1), /* compare minor */
71 BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
2899aac4 72 };
084c7007 73
00486578
ZJS
74 if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
75 r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
76 else
77 r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
084c7007
RG
78 if (r < 0)
79 log_error_errno(r, "Extending device control BPF program failed: %m");
80
81 return r;
82}
83
6b000af4 84static int bpf_prog_allow_list_major(
07141aa0
LP
85 BPFProgram *prog,
86 char type,
87 int major,
88 const char *acc) {
89
084c7007
RG
90 int r, access;
91
92 assert(prog);
93 assert(acc);
94
786cce00
ZJS
95 log_trace("%s: %c %d:* %s", __func__, type, major, acc);
96
084c7007
RG
97 access = bpf_access_type(acc);
98 if (access <= 0)
99 return -EINVAL;
100
8ad08622
ZJS
101 assert(IN_SET(type, 'b', 'c'));
102 const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
103
2899aac4 104 const struct bpf_insn insn[] = {
00486578 105 BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
2899aac4 106 BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
00486578
ZJS
107 BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 3), /* compare access type */
108
109 BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 2), /* compare device type */
110 BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 1), /* compare major */
111 BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
2899aac4 112 };
084c7007 113
00486578
ZJS
114 if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
115 r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
116 else
117 r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
084c7007
RG
118 if (r < 0)
119 log_error_errno(r, "Extending device control BPF program failed: %m");
8e8b5d2e
LP
120
121 return r;
122}
123
6b000af4 124static int bpf_prog_allow_list_class(
07141aa0
LP
125 BPFProgram *prog,
126 char type,
127 const char *acc) {
128
8e8b5d2e
LP
129 int r, access;
130
131 assert(prog);
132 assert(acc);
133
786cce00
ZJS
134 log_trace("%s: %c *:* %s", __func__, type, acc);
135
8e8b5d2e
LP
136 access = bpf_access_type(acc);
137 if (access <= 0)
138 return -EINVAL;
139
8ad08622
ZJS
140 assert(IN_SET(type, 'b', 'c'));
141 const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
142
2899aac4 143 const struct bpf_insn insn[] = {
00486578 144 BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
2899aac4 145 BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
00486578
ZJS
146 BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 2), /* compare access type */
147
148 BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 1), /* compare device type */
149 BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
2899aac4 150 };
8e8b5d2e 151
00486578
ZJS
152 if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
153 r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
154 else
155 r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
8e8b5d2e
LP
156 if (r < 0)
157 log_error_errno(r, "Extending device control BPF program failed: %m");
084c7007
RG
158
159 return r;
160}
161
07141aa0
LP
162int bpf_devices_cgroup_init(
163 BPFProgram **ret,
164 CGroupDevicePolicy policy,
6b000af4 165 bool allow_list) {
07141aa0 166
2899aac4 167 const struct bpf_insn pre_insn[] = {
084c7007 168 /* load device type to r2 */
415fe5ec 169 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
084c7007 170 offsetof(struct bpf_cgroup_dev_ctx, access_type)),
415fe5ec 171 BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0xFFFF),
084c7007
RG
172
173 /* load access type to r3 */
174 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
175 offsetof(struct bpf_cgroup_dev_ctx, access_type)),
176 BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
177
178 /* load major number to r4 */
179 BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
180 offsetof(struct bpf_cgroup_dev_ctx, major)),
181
182 /* load minor number to r5 */
183 BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
184 offsetof(struct bpf_cgroup_dev_ctx, minor)),
185 };
186
76dc1725 187 _cleanup_(bpf_program_freep) BPFProgram *prog = NULL;
084c7007
RG
188 int r;
189
190 assert(ret);
191
6b000af4 192 if (policy == CGROUP_DEVICE_POLICY_AUTO && !allow_list)
084c7007
RG
193 return 0;
194
8fe9dbb9 195 r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, "sd_devices", &prog);
084c7007
RG
196 if (r < 0)
197 return log_error_errno(r, "Loading device control BPF program failed: %m");
198
6b000af4 199 if (policy == CGROUP_DEVICE_POLICY_CLOSED || allow_list) {
084c7007
RG
200 r = bpf_program_add_instructions(prog, pre_insn, ELEMENTSOF(pre_insn));
201 if (r < 0)
202 return log_error_errno(r, "Extending device control BPF program failed: %m");
203 }
204
205 *ret = TAKE_PTR(prog);
206
207 return 0;
208}
209
0848715c 210int bpf_devices_apply_policy(
76dc1725 211 BPFProgram **prog,
0848715c 212 CGroupDevicePolicy policy,
6b000af4 213 bool allow_list,
0848715c
ZJS
214 const char *cgroup_path,
215 BPFProgram **prog_installed) {
7973f564
ZJS
216
217 _cleanup_free_ char *controller_path = NULL;
084c7007
RG
218 int r;
219
5600a261 220 /* This will assign *prog_installed if everything goes well. */
7973f564 221
76dc1725 222 assert(prog);
223 if (!*prog)
7973f564 224 goto finish;
084c7007 225
6b000af4 226 const bool deny_everything = policy == CGROUP_DEVICE_POLICY_STRICT && !allow_list;
084c7007 227
2899aac4
ZJS
228 const struct bpf_insn post_insn[] = {
229 /* return DENY */
230 BPF_MOV64_IMM(BPF_REG_0, 0),
231 BPF_JMP_A(1),
232 };
233
234 const struct bpf_insn exit_insn[] = {
235 /* finally return DENY if deny_everything else ALLOW */
236 BPF_MOV64_IMM(BPF_REG_0, deny_everything ? 0 : 1),
237 BPF_EXIT_INSN()
238 };
239
240 if (!deny_everything) {
76dc1725 241 r = bpf_program_add_instructions(*prog, post_insn, ELEMENTSOF(post_insn));
084c7007
RG
242 if (r < 0)
243 return log_error_errno(r, "Extending device control BPF program failed: %m");
244
245 /* Fixup PASS_JUMP_OFF jump offsets. */
76dc1725 246 for (size_t off = 0; off < (*prog)->n_instructions; off++) {
247 struct bpf_insn *ins = &((*prog)->instructions[off]);
084c7007
RG
248
249 if (ins->code == (BPF_JMP | BPF_JA) && ins->off == PASS_JUMP_OFF)
76dc1725 250 ins->off = (*prog)->n_instructions - off - 1;
084c7007 251 }
2899aac4 252 }
084c7007 253
76dc1725 254 r = bpf_program_add_instructions(*prog, exit_insn, ELEMENTSOF(exit_insn));
084c7007
RG
255 if (r < 0)
256 return log_error_errno(r, "Extending device control BPF program failed: %m");
257
0848715c 258 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, NULL, &controller_path);
084c7007
RG
259 if (r < 0)
260 return log_error_errno(r, "Failed to determine cgroup path: %m");
261
76dc1725 262 r = bpf_program_cgroup_attach(*prog, BPF_CGROUP_DEVICE, controller_path, BPF_F_ALLOW_MULTI);
084c7007 263 if (r < 0)
0848715c 264 return log_error_errno(r, "Attaching device control BPF program to cgroup %s failed: %m",
6178e2f8 265 empty_to_root(cgroup_path));
084c7007 266
7973f564 267 finish:
2af3eed1 268 /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program. */
7973f564 269 if (prog_installed) {
76dc1725 270 bpf_program_free(*prog_installed);
271 *prog_installed = TAKE_PTR(*prog);
7973f564 272 }
084c7007
RG
273 return 0;
274}
275
276int bpf_devices_supported(void) {
2899aac4 277 const struct bpf_insn trivial[] = {
084c7007
RG
278 BPF_MOV64_IMM(BPF_REG_0, 1),
279 BPF_EXIT_INSN()
280 };
281
76dc1725 282 _cleanup_(bpf_program_freep) BPFProgram *program = NULL;
084c7007
RG
283 static int supported = -1;
284 int r;
285
286 /* Checks whether BPF device controller is supported. For this, we check five things:
287 *
288 * a) whether we are privileged
289 * b) whether the unified hierarchy is being used
290 * c) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_DEVICE programs, which we require
291 */
292
293 if (supported >= 0)
294 return supported;
295
296 if (geteuid() != 0) {
297 log_debug("Not enough privileges, BPF device control is not supported.");
298 return supported = 0;
299 }
300
301 r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
302 if (r < 0)
303 return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
304 if (r == 0) {
305 log_debug("Not running with unified cgroups, BPF device control is not supported.");
306 return supported = 0;
307 }
308
e0c694c7 309 r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, "sd_devices", &program);
084c7007
RG
310 if (r < 0) {
311 log_debug_errno(r, "Can't allocate CGROUP DEVICE BPF program, BPF device control is not supported: %m");
312 return supported = 0;
313 }
314
315 r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial));
316 if (r < 0) {
317 log_debug_errno(r, "Can't add trivial instructions to CGROUP DEVICE BPF program, BPF device control is not supported: %m");
318 return supported = 0;
319 }
320
321 r = bpf_program_load_kernel(program, NULL, 0);
322 if (r < 0) {
323 log_debug_errno(r, "Can't load kernel CGROUP DEVICE BPF program, BPF device control is not supported: %m");
324 return supported = 0;
325 }
326
0b82cd25 327 return supported = 1;
084c7007 328}
124e05b3 329
6b000af4 330static int allow_list_device_pattern(
07141aa0
LP
331 BPFProgram *prog,
332 const char *path,
333 char type,
334 const unsigned *maj,
335 const unsigned *min,
336 const char *acc) {
337
124e05b3
ZJS
338 assert(IN_SET(type, 'b', 'c'));
339
340 if (cg_all_unified() > 0) {
341 if (!prog)
342 return 0;
343
124e05b3 344 if (maj && min)
6b000af4 345 return bpf_prog_allow_list_device(prog, type, *maj, *min, acc);
124e05b3 346 else if (maj)
6b000af4 347 return bpf_prog_allow_list_major(prog, type, *maj, acc);
124e05b3 348 else
6b000af4 349 return bpf_prog_allow_list_class(prog, type, acc);
124e05b3
ZJS
350
351 } else {
352 char buf[2+DECIMAL_STR_MAX(unsigned)*2+2+4];
353 int r;
354
355 if (maj && min)
356 xsprintf(buf, "%c %u:%u %s", type, *maj, *min, acc);
357 else if (maj)
358 xsprintf(buf, "%c %u:* %s", type, *maj, acc);
359 else
360 xsprintf(buf, "%c *:* %s", type, acc);
361
362 /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore
363 * EINVAL here. */
364
365 r = cg_set_attribute("devices", path, "devices.allow", buf);
366 if (r < 0)
367 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING,
368 r, "Failed to set devices.allow on %s: %m", path);
369
370 return r;
371 }
372}
373
6b000af4 374int bpf_devices_allow_list_device(
07141aa0
LP
375 BPFProgram *prog,
376 const char *path,
377 const char *node,
378 const char *acc) {
379
124e05b3
ZJS
380 mode_t mode;
381 dev_t rdev;
382 int r;
383
384 assert(path);
385 assert(acc);
386 assert(strlen(acc) <= 3);
387
786cce00
ZJS
388 log_trace("%s: %s %s", __func__, node, acc);
389
124e05b3
ZJS
390 /* Some special handling for /dev/block/%u:%u, /dev/char/%u:%u, /run/systemd/inaccessible/chr and
391 * /run/systemd/inaccessible/blk paths. Instead of stat()ing these we parse out the major/minor directly. This
392 * means clients can use these path without the device node actually around */
393 r = device_path_parse_major_minor(node, &mode, &rdev);
394 if (r < 0) {
395 if (r != -ENODEV)
396 return log_warning_errno(r, "Couldn't parse major/minor from device path '%s': %m", node);
397
398 struct stat st;
399 if (stat(node, &st) < 0)
400 return log_warning_errno(errno, "Couldn't stat device %s: %m", node);
401
402 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode))
403 return log_warning_errno(SYNTHETIC_ERRNO(ENODEV), "%s is not a device.", node);
404
405 mode = st.st_mode;
406 rdev = (dev_t) st.st_rdev;
407 }
408
409 unsigned maj = major(rdev), min = minor(rdev);
6b000af4 410 return allow_list_device_pattern(prog, path, S_ISCHR(mode) ? 'c' : 'b', &maj, &min, acc);
124e05b3
ZJS
411}
412
6b000af4 413int bpf_devices_allow_list_major(
07141aa0
LP
414 BPFProgram *prog,
415 const char *path,
416 const char *name,
417 char type,
418 const char *acc) {
419
124e05b3
ZJS
420 unsigned maj;
421 int r;
422
423 assert(path);
424 assert(acc);
425 assert(IN_SET(type, 'b', 'c'));
426
427 if (streq(name, "*"))
428 /* If the name is a wildcard, then apply this list to all devices of this type */
6b000af4 429 return allow_list_device_pattern(prog, path, type, NULL, NULL, acc);
124e05b3
ZJS
430
431 if (safe_atou(name, &maj) >= 0 && DEVICE_MAJOR_VALID(maj))
432 /* The name is numeric and suitable as major. In that case, let's take its major, and create
433 * the entry directly. */
6b000af4 434 return allow_list_device_pattern(prog, path, type, &maj, NULL, acc);
124e05b3
ZJS
435
436 _cleanup_fclose_ FILE *f = NULL;
45669ae2 437 bool good = false, any = false;
124e05b3
ZJS
438
439 f = fopen("/proc/devices", "re");
440 if (!f)
441 return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s: %m", name);
442
443 for (;;) {
444 _cleanup_free_ char *line = NULL;
445 char *w, *p;
446
447 r = read_line(f, LONG_LINE_MAX, &line);
448 if (r < 0)
449 return log_warning_errno(r, "Failed to read /proc/devices: %m");
450 if (r == 0)
451 break;
452
453 if (type == 'c' && streq(line, "Character devices:")) {
454 good = true;
455 continue;
456 }
457
458 if (type == 'b' && streq(line, "Block devices:")) {
459 good = true;
460 continue;
461 }
462
463 if (isempty(line)) {
464 good = false;
465 continue;
466 }
467
468 if (!good)
469 continue;
470
471 p = strstrip(line);
472
473 w = strpbrk(p, WHITESPACE);
474 if (!w)
475 continue;
476 *w = 0;
477
478 r = safe_atou(p, &maj);
479 if (r < 0)
480 continue;
481 if (maj <= 0)
482 continue;
483
484 w++;
485 w += strspn(w, WHITESPACE);
486
487 if (fnmatch(name, w, 0) != 0)
488 continue;
489
45669ae2 490 any = true;
6b000af4 491 (void) allow_list_device_pattern(prog, path, type, &maj, NULL, acc);
124e05b3
ZJS
492 }
493
45669ae2
ZJS
494 if (!any)
495 return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
6b000af4 496 "Device allow list pattern \"%s\" did not match anything.", name);
45669ae2 497
124e05b3
ZJS
498 return 0;
499}
a9aac7d8 500
6b000af4 501int bpf_devices_allow_list_static(
07141aa0
LP
502 BPFProgram *prog,
503 const char *path) {
504
a9aac7d8
ZJS
505 static const char auto_devices[] =
506 "/dev/null\0" "rwm\0"
507 "/dev/zero\0" "rwm\0"
508 "/dev/full\0" "rwm\0"
509 "/dev/random\0" "rwm\0"
510 "/dev/urandom\0" "rwm\0"
511 "/dev/tty\0" "rwm\0"
512 "/dev/ptmx\0" "rwm\0"
513 /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
514 "/run/systemd/inaccessible/chr\0" "rwm\0"
515 "/run/systemd/inaccessible/blk\0" "rwm\0";
516 int r = 0, k;
517
518 const char *node, *acc;
519 NULSTR_FOREACH_PAIR(node, acc, auto_devices) {
6b000af4 520 k = bpf_devices_allow_list_device(prog, path, node, acc);
a9aac7d8
ZJS
521 if (r >= 0 && k < 0)
522 r = k;
523 }
524
525 /* PTS (/dev/pts) devices may not be duplicated, but accessed */
6b000af4 526 k = bpf_devices_allow_list_major(prog, path, "pts", 'c', "rw");
a9aac7d8
ZJS
527 if (r >= 0 && k < 0)
528 r = k;
529
530 return r;
531}