]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/bpf-devices.c
rpm: avoid hiding errors and output in *_create_package macros
[thirdparty/systemd.git] / src / core / bpf-devices.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <fnmatch.h>
4 #include <linux/bpf_insn.h>
5
6 #include "bpf-devices.h"
7 #include "bpf-program.h"
8 #include "fd-util.h"
9 #include "fileio.h"
10 #include "nulstr-util.h"
11 #include "parse-util.h"
12 #include "stat-util.h"
13 #include "stdio-util.h"
14 #include "string-util.h"
15
16 #define PASS_JUMP_OFF 4096
17
18 static int bpf_access_type(const char *acc) {
19 int r = 0;
20
21 assert(acc);
22
23 for (; *acc; acc++)
24 switch(*acc) {
25 case 'r':
26 r |= BPF_DEVCG_ACC_READ;
27 break;
28 case 'w':
29 r |= BPF_DEVCG_ACC_WRITE;
30 break;
31 case 'm':
32 r |= BPF_DEVCG_ACC_MKNOD;
33 break;
34 default:
35 return -EINVAL;
36 }
37
38 return r;
39 }
40
41 static int bpf_prog_allow_list_device(
42 BPFProgram *prog,
43 char type,
44 int major,
45 int minor,
46 const char *acc) {
47
48 int r, access;
49
50 assert(prog);
51 assert(acc);
52
53 log_trace("%s: %c %d:%d %s", __func__, type, major, minor, acc);
54
55 access = bpf_access_type(acc);
56 if (access <= 0)
57 return -EINVAL;
58
59 assert(IN_SET(type, 'b', 'c'));
60 const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
61
62 const struct bpf_insn insn[] = {
63 BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
64 BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
65 BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 4), /* compare access type */
66
67 BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 3), /* compare device type */
68 BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 2), /* compare major */
69 BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, 1), /* compare minor */
70 BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
71 };
72
73 if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
74 r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
75 else
76 r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
77 if (r < 0)
78 log_error_errno(r, "Extending device control BPF program failed: %m");
79
80 return r;
81 }
82
83 static int bpf_prog_allow_list_major(
84 BPFProgram *prog,
85 char type,
86 int major,
87 const char *acc) {
88
89 int r, access;
90
91 assert(prog);
92 assert(acc);
93
94 log_trace("%s: %c %d:* %s", __func__, type, major, acc);
95
96 access = bpf_access_type(acc);
97 if (access <= 0)
98 return -EINVAL;
99
100 assert(IN_SET(type, 'b', 'c'));
101 const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
102
103 const struct bpf_insn insn[] = {
104 BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
105 BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
106 BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 3), /* compare access type */
107
108 BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 2), /* compare device type */
109 BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 1), /* compare major */
110 BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
111 };
112
113 if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
114 r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
115 else
116 r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
117 if (r < 0)
118 log_error_errno(r, "Extending device control BPF program failed: %m");
119
120 return r;
121 }
122
123 static int bpf_prog_allow_list_class(
124 BPFProgram *prog,
125 char type,
126 const char *acc) {
127
128 int r, access;
129
130 assert(prog);
131 assert(acc);
132
133 log_trace("%s: %c *:* %s", __func__, type, acc);
134
135 access = bpf_access_type(acc);
136 if (access <= 0)
137 return -EINVAL;
138
139 assert(IN_SET(type, 'b', 'c'));
140 const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
141
142 const struct bpf_insn insn[] = {
143 BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
144 BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
145 BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 2), /* compare access type */
146
147 BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 1), /* compare device type */
148 BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
149 };
150
151 if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
152 r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
153 else
154 r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
155 if (r < 0)
156 log_error_errno(r, "Extending device control BPF program failed: %m");
157
158 return r;
159 }
160
161 int bpf_devices_cgroup_init(
162 BPFProgram **ret,
163 CGroupDevicePolicy policy,
164 bool allow_list) {
165
166 const struct bpf_insn pre_insn[] = {
167 /* load device type to r2 */
168 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
169 offsetof(struct bpf_cgroup_dev_ctx, access_type)),
170 BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0xFFFF),
171
172 /* load access type to r3 */
173 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
174 offsetof(struct bpf_cgroup_dev_ctx, access_type)),
175 BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
176
177 /* load major number to r4 */
178 BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
179 offsetof(struct bpf_cgroup_dev_ctx, major)),
180
181 /* load minor number to r5 */
182 BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
183 offsetof(struct bpf_cgroup_dev_ctx, minor)),
184 };
185
186 _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
187 int r;
188
189 assert(ret);
190
191 if (policy == CGROUP_DEVICE_POLICY_AUTO && !allow_list)
192 return 0;
193
194 r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &prog);
195 if (r < 0)
196 return log_error_errno(r, "Loading device control BPF program failed: %m");
197
198 if (policy == CGROUP_DEVICE_POLICY_CLOSED || allow_list) {
199 r = bpf_program_add_instructions(prog, pre_insn, ELEMENTSOF(pre_insn));
200 if (r < 0)
201 return log_error_errno(r, "Extending device control BPF program failed: %m");
202 }
203
204 *ret = TAKE_PTR(prog);
205
206 return 0;
207 }
208
209 int bpf_devices_apply_policy(
210 BPFProgram *prog,
211 CGroupDevicePolicy policy,
212 bool allow_list,
213 const char *cgroup_path,
214 BPFProgram **prog_installed) {
215
216 _cleanup_free_ char *controller_path = NULL;
217 int r;
218
219 /* This will assign *keep_program if everything goes well. */
220
221 if (!prog)
222 goto finish;
223
224 const bool deny_everything = policy == CGROUP_DEVICE_POLICY_STRICT && !allow_list;
225
226 const struct bpf_insn post_insn[] = {
227 /* return DENY */
228 BPF_MOV64_IMM(BPF_REG_0, 0),
229 BPF_JMP_A(1),
230 };
231
232 const struct bpf_insn exit_insn[] = {
233 /* finally return DENY if deny_everything else ALLOW */
234 BPF_MOV64_IMM(BPF_REG_0, deny_everything ? 0 : 1),
235 BPF_EXIT_INSN()
236 };
237
238 if (!deny_everything) {
239 r = bpf_program_add_instructions(prog, post_insn, ELEMENTSOF(post_insn));
240 if (r < 0)
241 return log_error_errno(r, "Extending device control BPF program failed: %m");
242
243 /* Fixup PASS_JUMP_OFF jump offsets. */
244 for (size_t off = 0; off < prog->n_instructions; off++) {
245 struct bpf_insn *ins = &prog->instructions[off];
246
247 if (ins->code == (BPF_JMP | BPF_JA) && ins->off == PASS_JUMP_OFF)
248 ins->off = prog->n_instructions - off - 1;
249 }
250 }
251
252 r = bpf_program_add_instructions(prog, exit_insn, ELEMENTSOF(exit_insn));
253 if (r < 0)
254 return log_error_errno(r, "Extending device control BPF program failed: %m");
255
256 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, NULL, &controller_path);
257 if (r < 0)
258 return log_error_errno(r, "Failed to determine cgroup path: %m");
259
260 r = bpf_program_cgroup_attach(prog, BPF_CGROUP_DEVICE, controller_path, BPF_F_ALLOW_MULTI);
261 if (r < 0)
262 return log_error_errno(r, "Attaching device control BPF program to cgroup %s failed: %m",
263 cgroup_path);
264
265 finish:
266 /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program. */
267 if (prog_installed) {
268 bpf_program_unref(*prog_installed);
269 *prog_installed = bpf_program_ref(prog);
270 }
271 return 0;
272 }
273
274 int bpf_devices_supported(void) {
275 const struct bpf_insn trivial[] = {
276 BPF_MOV64_IMM(BPF_REG_0, 1),
277 BPF_EXIT_INSN()
278 };
279
280 _cleanup_(bpf_program_unrefp) BPFProgram *program = NULL;
281 static int supported = -1;
282 int r;
283
284 /* Checks whether BPF device controller is supported. For this, we check five things:
285 *
286 * a) whether we are privileged
287 * b) whether the unified hierarchy is being used
288 * c) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_DEVICE programs, which we require
289 */
290
291 if (supported >= 0)
292 return supported;
293
294 if (geteuid() != 0) {
295 log_debug("Not enough privileges, BPF device control is not supported.");
296 return supported = 0;
297 }
298
299 r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
300 if (r < 0)
301 return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
302 if (r == 0) {
303 log_debug("Not running with unified cgroups, BPF device control is not supported.");
304 return supported = 0;
305 }
306
307 r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &program);
308 if (r < 0) {
309 log_debug_errno(r, "Can't allocate CGROUP DEVICE BPF program, BPF device control is not supported: %m");
310 return supported = 0;
311 }
312
313 r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial));
314 if (r < 0) {
315 log_debug_errno(r, "Can't add trivial instructions to CGROUP DEVICE BPF program, BPF device control is not supported: %m");
316 return supported = 0;
317 }
318
319 r = bpf_program_load_kernel(program, NULL, 0);
320 if (r < 0) {
321 log_debug_errno(r, "Can't load kernel CGROUP DEVICE BPF program, BPF device control is not supported: %m");
322 return supported = 0;
323 }
324
325 return supported = 1;
326 }
327
328 static int allow_list_device_pattern(
329 BPFProgram *prog,
330 const char *path,
331 char type,
332 const unsigned *maj,
333 const unsigned *min,
334 const char *acc) {
335
336 assert(IN_SET(type, 'b', 'c'));
337
338 if (cg_all_unified() > 0) {
339 if (!prog)
340 return 0;
341
342 if (maj && min)
343 return bpf_prog_allow_list_device(prog, type, *maj, *min, acc);
344 else if (maj)
345 return bpf_prog_allow_list_major(prog, type, *maj, acc);
346 else
347 return bpf_prog_allow_list_class(prog, type, acc);
348
349 } else {
350 char buf[2+DECIMAL_STR_MAX(unsigned)*2+2+4];
351 int r;
352
353 if (maj && min)
354 xsprintf(buf, "%c %u:%u %s", type, *maj, *min, acc);
355 else if (maj)
356 xsprintf(buf, "%c %u:* %s", type, *maj, acc);
357 else
358 xsprintf(buf, "%c *:* %s", type, acc);
359
360 /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore
361 * EINVAL here. */
362
363 r = cg_set_attribute("devices", path, "devices.allow", buf);
364 if (r < 0)
365 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING,
366 r, "Failed to set devices.allow on %s: %m", path);
367
368 return r;
369 }
370 }
371
372 int bpf_devices_allow_list_device(
373 BPFProgram *prog,
374 const char *path,
375 const char *node,
376 const char *acc) {
377
378 mode_t mode;
379 dev_t rdev;
380 int r;
381
382 assert(path);
383 assert(acc);
384 assert(strlen(acc) <= 3);
385
386 log_trace("%s: %s %s", __func__, node, acc);
387
388 /* Some special handling for /dev/block/%u:%u, /dev/char/%u:%u, /run/systemd/inaccessible/chr and
389 * /run/systemd/inaccessible/blk paths. Instead of stat()ing these we parse out the major/minor directly. This
390 * means clients can use these path without the device node actually around */
391 r = device_path_parse_major_minor(node, &mode, &rdev);
392 if (r < 0) {
393 if (r != -ENODEV)
394 return log_warning_errno(r, "Couldn't parse major/minor from device path '%s': %m", node);
395
396 struct stat st;
397 if (stat(node, &st) < 0)
398 return log_warning_errno(errno, "Couldn't stat device %s: %m", node);
399
400 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode))
401 return log_warning_errno(SYNTHETIC_ERRNO(ENODEV), "%s is not a device.", node);
402
403 mode = st.st_mode;
404 rdev = (dev_t) st.st_rdev;
405 }
406
407 unsigned maj = major(rdev), min = minor(rdev);
408 return allow_list_device_pattern(prog, path, S_ISCHR(mode) ? 'c' : 'b', &maj, &min, acc);
409 }
410
411 int bpf_devices_allow_list_major(
412 BPFProgram *prog,
413 const char *path,
414 const char *name,
415 char type,
416 const char *acc) {
417
418 unsigned maj;
419 int r;
420
421 assert(path);
422 assert(acc);
423 assert(IN_SET(type, 'b', 'c'));
424
425 if (streq(name, "*"))
426 /* If the name is a wildcard, then apply this list to all devices of this type */
427 return allow_list_device_pattern(prog, path, type, NULL, NULL, acc);
428
429 if (safe_atou(name, &maj) >= 0 && DEVICE_MAJOR_VALID(maj))
430 /* The name is numeric and suitable as major. In that case, let's take its major, and create
431 * the entry directly. */
432 return allow_list_device_pattern(prog, path, type, &maj, NULL, acc);
433
434 _cleanup_fclose_ FILE *f = NULL;
435 bool good = false, any = false;
436
437 f = fopen("/proc/devices", "re");
438 if (!f)
439 return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s: %m", name);
440
441 for (;;) {
442 _cleanup_free_ char *line = NULL;
443 char *w, *p;
444
445 r = read_line(f, LONG_LINE_MAX, &line);
446 if (r < 0)
447 return log_warning_errno(r, "Failed to read /proc/devices: %m");
448 if (r == 0)
449 break;
450
451 if (type == 'c' && streq(line, "Character devices:")) {
452 good = true;
453 continue;
454 }
455
456 if (type == 'b' && streq(line, "Block devices:")) {
457 good = true;
458 continue;
459 }
460
461 if (isempty(line)) {
462 good = false;
463 continue;
464 }
465
466 if (!good)
467 continue;
468
469 p = strstrip(line);
470
471 w = strpbrk(p, WHITESPACE);
472 if (!w)
473 continue;
474 *w = 0;
475
476 r = safe_atou(p, &maj);
477 if (r < 0)
478 continue;
479 if (maj <= 0)
480 continue;
481
482 w++;
483 w += strspn(w, WHITESPACE);
484
485 if (fnmatch(name, w, 0) != 0)
486 continue;
487
488 any = true;
489 (void) allow_list_device_pattern(prog, path, type, &maj, NULL, acc);
490 }
491
492 if (!any)
493 return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
494 "Device allow list pattern \"%s\" did not match anything.", name);
495
496 return 0;
497 }
498
499 int bpf_devices_allow_list_static(
500 BPFProgram *prog,
501 const char *path) {
502
503 static const char auto_devices[] =
504 "/dev/null\0" "rwm\0"
505 "/dev/zero\0" "rwm\0"
506 "/dev/full\0" "rwm\0"
507 "/dev/random\0" "rwm\0"
508 "/dev/urandom\0" "rwm\0"
509 "/dev/tty\0" "rwm\0"
510 "/dev/ptmx\0" "rwm\0"
511 /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
512 "/run/systemd/inaccessible/chr\0" "rwm\0"
513 "/run/systemd/inaccessible/blk\0" "rwm\0";
514 int r = 0, k;
515
516 const char *node, *acc;
517 NULSTR_FOREACH_PAIR(node, acc, auto_devices) {
518 k = bpf_devices_allow_list_device(prog, path, node, acc);
519 if (r >= 0 && k < 0)
520 r = k;
521 }
522
523 /* PTS (/dev/pts) devices may not be duplicated, but accessed */
524 k = bpf_devices_allow_list_major(prog, path, "pts", 'c', "rw");
525 if (r >= 0 && k < 0)
526 r = k;
527
528 return r;
529 }