]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/shared/bpf-program.c
tree-wide: use -EBADF for fd initialization
[thirdparty/systemd.git] / src / shared / bpf-program.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
71e5200f
DM
2
3#include <fcntl.h>
4#include <sys/stat.h>
5#include <sys/types.h>
6#include <unistd.h>
7
8#include "alloc-util.h"
9#include "bpf-program.h"
7c248223 10#include "errno-util.h"
b57d7523 11#include "escape.h"
71e5200f 12#include "fd-util.h"
0a970718 13#include "memory-util.h"
f5947a5e 14#include "missing_syscall.h"
aa2b6f1d 15#include "path-util.h"
b57d7523 16#include "serialize.h"
9984f493
JK
17#include "string-table.h"
18
19static const char *const bpf_cgroup_attach_type_table[__MAX_BPF_ATTACH_TYPE] = {
20 [BPF_CGROUP_INET_INGRESS] = "ingress",
21 [BPF_CGROUP_INET_EGRESS] = "egress",
22 [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
23 [BPF_CGROUP_SOCK_OPS] = "sock_ops",
24 [BPF_CGROUP_DEVICE] = "device",
25 [BPF_CGROUP_INET4_BIND] = "bind4",
26 [BPF_CGROUP_INET6_BIND] = "bind6",
27 [BPF_CGROUP_INET4_CONNECT] = "connect4",
28 [BPF_CGROUP_INET6_CONNECT] = "connect6",
29 [BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
30 [BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
31 [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
32 [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
33 [BPF_CGROUP_SYSCTL] = "sysctl",
34 [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
35 [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
36 [BPF_CGROUP_GETSOCKOPT] = "getsockopt",
37 [BPF_CGROUP_SETSOCKOPT] = "setsockopt",
38};
39
40DEFINE_STRING_TABLE_LOOKUP(bpf_cgroup_attach_type, int);
71e5200f 41
76dc1725 42DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(bpf_program_hash_ops, void, trivial_hash_func, trivial_compare_func, bpf_program_free);
43
44BPFProgram *bpf_program_free(BPFProgram *p) {
45 if (!p)
46 return NULL;
47 /* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last
48 * fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated
49 * programs that attached one of their BPF programs to a cgroup will leave this program pinned for good with
50 * zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in
51 * question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during
52 * operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To
53 * counter this, we track closely to which cgroup a program was attached to and will detach it on our own
54 * whenever we close the BPF fd. */
55 (void) bpf_program_cgroup_detach(p);
56
57 safe_close(p->kernel_fd);
8fe9dbb9 58 free(p->prog_name);
76dc1725 59 free(p->instructions);
60 free(p->attached_path);
61
62 return mfree(p);
63}
7a7cf83d 64
f23f0ead
JK
65 /* struct bpf_prog_info info must be initialized since its value is both input and output
66 * for BPF_OBJ_GET_INFO_BY_FD syscall. */
67static int bpf_program_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, uint32_t info_len) {
68 union bpf_attr attr;
69
70 /* Explicitly memset to zero since some compilers may produce non-zero-initialized padding when
71 * structured initialization is used.
72 * Refer to https://github.com/systemd/systemd/issues/18164
73 */
74 zero(attr);
75 attr.info.bpf_fd = prog_fd;
76 attr.info.info_len = info_len;
77 attr.info.info = PTR_TO_UINT64(info);
78
7c248223 79 return RET_NERRNO(bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)));
f23f0ead
JK
80}
81
8fe9dbb9 82int bpf_program_new(uint32_t prog_type, const char *prog_name, BPFProgram **ret) {
76dc1725 83 _cleanup_(bpf_program_freep) BPFProgram *p = NULL;
8fe9dbb9
JK
84 _cleanup_free_ char *name = NULL;
85
86 if (prog_name) {
87 if (strlen(prog_name) >= BPF_OBJ_NAME_LEN)
88 return -ENAMETOOLONG;
89
90 name = strdup(prog_name);
91 if (!name)
92 return -ENOMEM;
93 }
71e5200f 94
06ad9d0c 95 p = new(BPFProgram, 1);
71e5200f 96 if (!p)
ca39a3ce 97 return -ENOMEM;
71e5200f 98
06ad9d0c 99 *p = (BPFProgram) {
06ad9d0c 100 .prog_type = prog_type,
254d1313 101 .kernel_fd = -EBADF,
8fe9dbb9 102 .prog_name = TAKE_PTR(name),
06ad9d0c 103 };
71e5200f 104
1cc6c93a
YW
105 *ret = TAKE_PTR(p);
106
71e5200f
DM
107 return 0;
108}
109
f23f0ead 110int bpf_program_new_from_bpffs_path(const char *path, BPFProgram **ret) {
76dc1725 111 _cleanup_(bpf_program_freep) BPFProgram *p = NULL;
f23f0ead
JK
112 struct bpf_prog_info info = {};
113 int r;
114
115 assert(path);
116 assert(ret);
117
118 p = new(BPFProgram, 1);
119 if (!p)
120 return -ENOMEM;
121
122 *p = (BPFProgram) {
123 .prog_type = BPF_PROG_TYPE_UNSPEC,
254d1313 124 .kernel_fd = -EBADF,
f23f0ead
JK
125 };
126
127 r = bpf_program_load_from_bpf_fs(p, path);
128 if (r < 0)
129 return r;
130
131 r = bpf_program_get_info_by_fd(p->kernel_fd, &info, sizeof(info));
132 if (r < 0)
133 return r;
134
135 p->prog_type = info.type;
136 *ret = TAKE_PTR(p);
137
138 return 0;
139}
140
8301aa0b 141
71e5200f
DM
142int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) {
143
144 assert(p);
145
e0ad39fc
LP
146 if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */
147 return -EBUSY;
148
319a4f4b 149 if (!GREEDY_REALLOC(p->instructions, p->n_instructions + count))
71e5200f
DM
150 return -ENOMEM;
151
152 memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
153 p->n_instructions += count;
154
155 return 0;
156}
157
158int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) {
159 union bpf_attr attr;
160
161 assert(p);
162
e0ad39fc
LP
163 if (p->kernel_fd >= 0) { /* make this idempotent */
164 memzero(log_buf, log_size);
165 return 0;
166 }
71e5200f 167
28abf5ad
LB
168 // FIXME: Clang doesn't 0-pad with structured initialization, causing
169 // the kernel to reject the bpf_attr as invalid. See:
170 // https://github.com/torvalds/linux/blob/v5.9/kernel/bpf/syscall.c#L65
171 // Ideally it should behave like GCC, so that we can remove these workarounds.
172 zero(attr);
9ca600e2
LB
173 attr.prog_type = p->prog_type;
174 attr.insns = PTR_TO_UINT64(p->instructions);
175 attr.insn_cnt = p->n_instructions;
176 attr.license = PTR_TO_UINT64("GPL");
177 attr.log_buf = PTR_TO_UINT64(log_buf);
178 attr.log_level = !!log_buf;
179 attr.log_size = log_size;
8fe9dbb9
JK
180 if (p->prog_name)
181 strncpy(attr.prog_name, p->prog_name, BPF_OBJ_NAME_LEN - 1);
71e5200f
DM
182
183 p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
184 if (p->kernel_fd < 0)
185 return -errno;
186
187 return 0;
188}
189
fab34748
KL
190int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path) {
191 union bpf_attr attr;
192
193 assert(p);
194
195 if (p->kernel_fd >= 0) /* don't overwrite an assembled or loaded program */
196 return -EBUSY;
197
28abf5ad 198 zero(attr);
9ca600e2 199 attr.pathname = PTR_TO_UINT64(path);
fab34748
KL
200
201 p->kernel_fd = bpf(BPF_OBJ_GET, &attr, sizeof(attr));
202 if (p->kernel_fd < 0)
203 return -errno;
204
205 return 0;
206}
207
9f2e6892 208int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) {
aa2b6f1d 209 _cleanup_free_ char *copy = NULL;
254d1313 210 _cleanup_close_ int fd = -EBADF;
71e5200f 211 union bpf_attr attr;
aa2b6f1d 212 int r;
71e5200f
DM
213
214 assert(p);
215 assert(type >= 0);
216 assert(path);
217
aa2b6f1d
LP
218 if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI))
219 return -EINVAL;
220
221 /* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's
222 * refuse this early. */
223 if (p->attached_path) {
224 if (!path_equal(p->attached_path, path))
225 return -EBUSY;
226 if (p->attached_type != type)
227 return -EBUSY;
228 if (p->attached_flags != flags)
229 return -EBUSY;
230
231 /* Here's a shortcut: if we previously attached this program already, then we don't have to do so
232 * again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have
233 * replaced our program since the last time, hence let's reattach it again, just to be safe. In flags
234 * == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags
235 * == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours
236 * would remain in effect. */
237 if (flags != BPF_F_ALLOW_OVERRIDE)
238 return 0;
239 }
240
241 /* Ensure we have a kernel object for this. */
242 r = bpf_program_load_kernel(p, NULL, 0);
243 if (r < 0)
244 return r;
245
246 copy = strdup(path);
247 if (!copy)
248 return -ENOMEM;
249
71e5200f
DM
250 fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
251 if (fd < 0)
252 return -errno;
253
28abf5ad 254 zero(attr);
9ca600e2
LB
255 attr.attach_type = type;
256 attr.target_fd = fd;
257 attr.attach_bpf_fd = p->kernel_fd;
258 attr.attach_flags = flags;
71e5200f
DM
259
260 if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
261 return -errno;
262
aa2b6f1d
LP
263 free_and_replace(p->attached_path, copy);
264 p->attached_type = type;
265 p->attached_flags = flags;
266
71e5200f
DM
267 return 0;
268}
269
aa2b6f1d 270int bpf_program_cgroup_detach(BPFProgram *p) {
254d1313 271 _cleanup_close_ int fd = -EBADF;
71e5200f 272
aa2b6f1d 273 assert(p);
71e5200f 274
aa2b6f1d
LP
275 if (!p->attached_path)
276 return -EUNATCH;
9b3c1897 277
aa2b6f1d
LP
278 fd = open(p->attached_path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
279 if (fd < 0) {
280 if (errno != ENOENT)
281 return -errno;
71e5200f 282
aa2b6f1d
LP
283 /* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached
284 * implicitly by the removal, hence don't complain */
71e5200f 285
aa2b6f1d
LP
286 } else {
287 union bpf_attr attr;
288
28abf5ad 289 zero(attr);
9ca600e2
LB
290 attr.attach_type = p->attached_type;
291 attr.target_fd = fd;
292 attr.attach_bpf_fd = p->kernel_fd;
aa2b6f1d
LP
293
294 if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
295 return -errno;
296 }
297
298 p->attached_path = mfree(p->attached_path);
71e5200f
DM
299
300 return 0;
301}
302
303int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
9ca600e2 304 union bpf_attr attr;
71e5200f 305
9ca600e2
LB
306 zero(attr);
307 attr.map_type = type;
308 attr.key_size = key_size;
309 attr.value_size = value_size;
310 attr.max_entries = max_entries;
311 attr.map_flags = flags;
312
7c248223 313 return RET_NERRNO(bpf(BPF_MAP_CREATE, &attr, sizeof(attr)));
71e5200f
DM
314}
315
316int bpf_map_update_element(int fd, const void *key, void *value) {
9ca600e2 317 union bpf_attr attr;
71e5200f 318
9ca600e2
LB
319 zero(attr);
320 attr.map_fd = fd;
321 attr.key = PTR_TO_UINT64(key);
322 attr.value = PTR_TO_UINT64(value);
71e5200f 323
7c248223 324 return RET_NERRNO(bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)));
71e5200f
DM
325}
326
327int bpf_map_lookup_element(int fd, const void *key, void *value) {
9ca600e2 328 union bpf_attr attr;
71e5200f 329
9ca600e2
LB
330 zero(attr);
331 attr.map_fd = fd;
332 attr.key = PTR_TO_UINT64(key);
333 attr.value = PTR_TO_UINT64(value);
71e5200f 334
7c248223 335 return RET_NERRNO(bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)));
71e5200f 336}
f23f0ead
JK
337
338int bpf_program_pin(int prog_fd, const char *bpffs_path) {
339 union bpf_attr attr;
340
341 zero(attr);
342 attr.pathname = PTR_TO_UINT64((void *) bpffs_path);
343 attr.bpf_fd = prog_fd;
344
7c248223 345 return RET_NERRNO(bpf(BPF_OBJ_PIN, &attr, sizeof(attr)));
f23f0ead
JK
346}
347
348int bpf_program_get_id_by_fd(int prog_fd, uint32_t *ret_id) {
349 struct bpf_prog_info info = {};
350 int r;
351
352 assert(ret_id);
353
354 r = bpf_program_get_info_by_fd(prog_fd, &info, sizeof(info));
355 if (r < 0)
356 return r;
357
358 *ret_id = info.id;
359
360 return 0;
361};
b57d7523
LP
362
363int bpf_program_serialize_attachment(
364 FILE *f,
365 FDSet *fds,
366 const char *key,
367 BPFProgram *p) {
368
369 _cleanup_free_ char *escaped = NULL;
370 int copy, r;
371
372 if (!p || !p->attached_path)
373 return 0;
374
375 assert(p->kernel_fd >= 0);
376
377 escaped = cescape(p->attached_path);
378 if (!escaped)
379 return -ENOMEM;
380
381 copy = fdset_put_dup(fds, p->kernel_fd);
382 if (copy < 0)
383 return log_error_errno(copy, "Failed to add BPF kernel fd to serialize: %m");
384
385 r = serialize_item_format(
386 f,
387 key,
388 "%i %s %s",
389 copy,
390 bpf_cgroup_attach_type_to_string(p->attached_type),
391 escaped);
392 if (r < 0)
393 return r;
394
395 /* After serialization, let's forget the fact that this program is attached. The attachment — if you
396 * so will — is now 'owned' by the serialization, and not us anymore. Why does that matter? Because
397 * of BPF's less-than-ideal lifecycle handling: to detach a program from a cgroup we have to
398 * explicitly do so, it's not done implicitly on close(). Now, since we are serializing here we don't
399 * want the program to be detached while freeing things, so that the attachment can be retained after
400 * deserializing again. bpf_program_free() implicitly detaches things, if attached_path is non-NULL,
401 * hence we set it to NULL here. */
402
403 p->attached_path = mfree(p->attached_path);
404 return 0;
405}
406
407int bpf_program_serialize_attachment_set(FILE *f, FDSet *fds, const char *key, Set *set) {
408 BPFProgram *p;
409 int r;
410
411 SET_FOREACH(p, set) {
412 r = bpf_program_serialize_attachment(f, fds, key, p);
413 if (r < 0)
414 return r;
415 }
416
417 return 0;
418}
419
420int bpf_program_deserialize_attachment(const char *v, FDSet *fds, BPFProgram **bpfp) {
421 _cleanup_free_ char *sfd = NULL, *sat = NULL, *unescaped = NULL;
76dc1725 422 _cleanup_(bpf_program_freep) BPFProgram *p = NULL;
254d1313 423 _cleanup_close_ int fd = -EBADF;
e437538f 424 ssize_t l;
b57d7523
LP
425 int ifd, at, r;
426
427 assert(v);
428 assert(bpfp);
429
430 /* Extract first word: the fd number */
431 r = extract_first_word(&v, &sfd, NULL, 0);
432 if (r < 0)
433 return r;
434 if (r == 0)
435 return -EINVAL;
436
437 r = safe_atoi(sfd, &ifd);
438 if (r < 0)
439 return r;
440 if (ifd < 0)
441 return -EBADF;
442
443 /* Extract second word: the attach type */
444 r = extract_first_word(&v, &sat, NULL, 0);
445 if (r < 0)
446 return r;
447 if (r == 0)
448 return -EINVAL;
449
450 at = bpf_cgroup_attach_type_from_string(sat);
451 if (at < 0)
452 return at;
453
454 /* The rest is the path */
e437538f
ZJS
455 l = cunescape(v, 0, &unescaped);
456 if (l < 0)
457 return l;
b57d7523
LP
458
459 fd = fdset_remove(fds, ifd);
460 if (fd < 0)
461 return fd;
462
463 p = new(BPFProgram, 1);
464 if (!p)
465 return -ENOMEM;
466
467 *p = (BPFProgram) {
b57d7523
LP
468 .kernel_fd = TAKE_FD(fd),
469 .prog_type = BPF_PROG_TYPE_UNSPEC,
470 .attached_path = TAKE_PTR(unescaped),
471 .attached_type = at,
472 };
473
474 if (*bpfp)
76dc1725 475 bpf_program_free(*bpfp);
b57d7523
LP
476
477 *bpfp = TAKE_PTR(p);
478 return 0;
479}
480
481int bpf_program_deserialize_attachment_set(const char *v, FDSet *fds, Set **bpfsetp) {
482 BPFProgram *p = NULL;
483 int r;
484
485 assert(v);
486 assert(bpfsetp);
487
488 r = bpf_program_deserialize_attachment(v, fds, &p);
489 if (r < 0)
490 return r;
491
492 r = set_ensure_consume(bpfsetp, &bpf_program_hash_ops, p);
493 if (r < 0)
494 return r;
495
496 return 0;
497}