]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/bpf-program.c
ab7562c17bc2b46951ce1ae07360ee61c9acc7f6
[thirdparty/systemd.git] / src / basic / bpf-program.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 Copyright 2016 Daniel Mack
4 ***/
5
6 #include <fcntl.h>
7 #include <sys/stat.h>
8 #include <sys/types.h>
9 #include <unistd.h>
10
11 #include "alloc-util.h"
12 #include "bpf-program.h"
13 #include "fd-util.h"
14 #include "log.h"
15 #include "missing.h"
16 #include "path-util.h"
17 #include "util.h"
18
19 int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
20 _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
21
22 p = new0(BPFProgram, 1);
23 if (!p)
24 return log_oom();
25
26 p->n_ref = 1;
27 p->prog_type = prog_type;
28 p->kernel_fd = -1;
29
30 *ret = TAKE_PTR(p);
31
32 return 0;
33 }
34
35 BPFProgram *bpf_program_ref(BPFProgram *p) {
36 if (!p)
37 return NULL;
38
39 assert(p->n_ref > 0);
40 p->n_ref++;
41
42 return p;
43 }
44
45 BPFProgram *bpf_program_unref(BPFProgram *p) {
46 if (!p)
47 return NULL;
48
49 assert(p->n_ref > 0);
50 p->n_ref--;
51
52 if (p->n_ref > 0)
53 return NULL;
54
55 /* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last
56 * fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated
57 * programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with
58 * zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in
59 * question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during
60 * operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To
61 * counter this, we track closely to which cgroup a program was attached to and will detach it on our own
62 * whenever we close the BPF fd. */
63 (void) bpf_program_cgroup_detach(p);
64
65 safe_close(p->kernel_fd);
66 free(p->instructions);
67 free(p->attached_path);
68
69 return mfree(p);
70 }
71
72 int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) {
73
74 assert(p);
75
76 if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */
77 return -EBUSY;
78
79 if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count))
80 return -ENOMEM;
81
82 memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
83 p->n_instructions += count;
84
85 return 0;
86 }
87
88 int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) {
89 union bpf_attr attr;
90
91 assert(p);
92
93 if (p->kernel_fd >= 0) { /* make this idempotent */
94 memzero(log_buf, log_size);
95 return 0;
96 }
97
98 attr = (union bpf_attr) {
99 .prog_type = p->prog_type,
100 .insns = PTR_TO_UINT64(p->instructions),
101 .insn_cnt = p->n_instructions,
102 .license = PTR_TO_UINT64("GPL"),
103 .log_buf = PTR_TO_UINT64(log_buf),
104 .log_level = !!log_buf,
105 .log_size = log_size,
106 };
107
108 p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
109 if (p->kernel_fd < 0)
110 return -errno;
111
112 return 0;
113 }
114
115 int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) {
116 _cleanup_free_ char *copy = NULL;
117 _cleanup_close_ int fd = -1;
118 union bpf_attr attr;
119 int r;
120
121 assert(p);
122 assert(type >= 0);
123 assert(path);
124
125 if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI))
126 return -EINVAL;
127
128 /* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's
129 * refuse this early. */
130 if (p->attached_path) {
131 if (!path_equal(p->attached_path, path))
132 return -EBUSY;
133 if (p->attached_type != type)
134 return -EBUSY;
135 if (p->attached_flags != flags)
136 return -EBUSY;
137
138 /* Here's a shortcut: if we previously attached this program already, then we don't have to do so
139 * again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have
140 * replaced our program since the last time, hence let's reattach it again, just to be safe. In flags
141 * == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags
142 * == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours
143 * would remain in effect. */
144 if (flags != BPF_F_ALLOW_OVERRIDE)
145 return 0;
146 }
147
148 /* Ensure we have a kernel object for this. */
149 r = bpf_program_load_kernel(p, NULL, 0);
150 if (r < 0)
151 return r;
152
153 copy = strdup(path);
154 if (!copy)
155 return -ENOMEM;
156
157 fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
158 if (fd < 0)
159 return -errno;
160
161 attr = (union bpf_attr) {
162 .attach_type = type,
163 .target_fd = fd,
164 .attach_bpf_fd = p->kernel_fd,
165 .attach_flags = flags,
166 };
167
168 if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
169 return -errno;
170
171 free_and_replace(p->attached_path, copy);
172 p->attached_type = type;
173 p->attached_flags = flags;
174
175 return 0;
176 }
177
178 int bpf_program_cgroup_detach(BPFProgram *p) {
179 _cleanup_close_ int fd = -1;
180
181 assert(p);
182
183 if (!p->attached_path)
184 return -EUNATCH;
185
186 fd = open(p->attached_path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
187 if (fd < 0) {
188 if (errno != ENOENT)
189 return -errno;
190
191 /* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached
192 * implicitly by the removal, hence don't complain */
193
194 } else {
195 union bpf_attr attr;
196
197 attr = (union bpf_attr) {
198 .attach_type = p->attached_type,
199 .target_fd = fd,
200 .attach_bpf_fd = p->kernel_fd,
201 };
202
203 if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
204 return -errno;
205 }
206
207 p->attached_path = mfree(p->attached_path);
208
209 return 0;
210 }
211
212 int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
213 union bpf_attr attr = {
214 .map_type = type,
215 .key_size = key_size,
216 .value_size = value_size,
217 .max_entries = max_entries,
218 .map_flags = flags,
219 };
220 int fd;
221
222 fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
223 if (fd < 0)
224 return -errno;
225
226 return fd;
227 }
228
229 int bpf_map_update_element(int fd, const void *key, void *value) {
230
231 union bpf_attr attr = {
232 .map_fd = fd,
233 .key = PTR_TO_UINT64(key),
234 .value = PTR_TO_UINT64(value),
235 };
236
237 if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
238 return -errno;
239
240 return 0;
241 }
242
243 int bpf_map_lookup_element(int fd, const void *key, void *value) {
244
245 union bpf_attr attr = {
246 .map_fd = fd,
247 .key = PTR_TO_UINT64(key),
248 .value = PTR_TO_UINT64(value),
249 };
250
251 if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
252 return -errno;
253
254 return 0;
255 }