]>
Commit | Line | Data |
---|---|---|
db9ecf05 | 1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
71e5200f DM |
2 | |
3 | #include <fcntl.h> | |
4 | #include <sys/stat.h> | |
5 | #include <sys/types.h> | |
6 | #include <unistd.h> | |
7 | ||
8 | #include "alloc-util.h" | |
9 | #include "bpf-program.h" | |
10 | #include "fd-util.h" | |
0a970718 | 11 | #include "memory-util.h" |
f5947a5e | 12 | #include "missing_syscall.h" |
aa2b6f1d | 13 | #include "path-util.h" |
71e5200f DM |
14 | |
15 | int bpf_program_new(uint32_t prog_type, BPFProgram **ret) { | |
16 | _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL; | |
17 | ||
18 | p = new0(BPFProgram, 1); | |
19 | if (!p) | |
ca39a3ce | 20 | return -ENOMEM; |
71e5200f | 21 | |
aa2b6f1d | 22 | p->n_ref = 1; |
71e5200f DM |
23 | p->prog_type = prog_type; |
24 | p->kernel_fd = -1; | |
25 | ||
1cc6c93a YW |
26 | *ret = TAKE_PTR(p); |
27 | ||
71e5200f DM |
28 | return 0; |
29 | } | |
30 | ||
8301aa0b YW |
31 | static BPFProgram *bpf_program_free(BPFProgram *p) { |
32 | assert(p); | |
aa2b6f1d LP |
33 | |
34 | /* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last | |
35 | * fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated | |
36 | * programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with | |
37 | * zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in | |
38 | * question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during | |
39 | * operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To | |
40 | * counter this, we track closely to which cgroup a program was attached to and will detach it on our own | |
41 | * whenever we close the BPF fd. */ | |
42 | (void) bpf_program_cgroup_detach(p); | |
43 | ||
71e5200f DM |
44 | safe_close(p->kernel_fd); |
45 | free(p->instructions); | |
aa2b6f1d | 46 | free(p->attached_path); |
71e5200f DM |
47 | |
48 | return mfree(p); | |
49 | } | |
50 | ||
8301aa0b YW |
51 | DEFINE_TRIVIAL_REF_UNREF_FUNC(BPFProgram, bpf_program, bpf_program_free); |
52 | ||
71e5200f DM |
53 | int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) { |
54 | ||
55 | assert(p); | |
56 | ||
e0ad39fc LP |
57 | if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */ |
58 | return -EBUSY; | |
59 | ||
71e5200f DM |
60 | if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count)) |
61 | return -ENOMEM; | |
62 | ||
63 | memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count); | |
64 | p->n_instructions += count; | |
65 | ||
66 | return 0; | |
67 | } | |
68 | ||
69 | int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) { | |
70 | union bpf_attr attr; | |
71 | ||
72 | assert(p); | |
73 | ||
e0ad39fc LP |
74 | if (p->kernel_fd >= 0) { /* make this idempotent */ |
75 | memzero(log_buf, log_size); | |
76 | return 0; | |
77 | } | |
71e5200f | 78 | |
28abf5ad LB |
79 | // FIXME: Clang doesn't 0-pad with structured initialization, causing |
80 | // the kernel to reject the bpf_attr as invalid. See: | |
81 | // https://github.com/torvalds/linux/blob/v5.9/kernel/bpf/syscall.c#L65 | |
82 | // Ideally it should behave like GCC, so that we can remove these workarounds. | |
83 | zero(attr); | |
71e5200f DM |
84 | attr = (union bpf_attr) { |
85 | .prog_type = p->prog_type, | |
86 | .insns = PTR_TO_UINT64(p->instructions), | |
87 | .insn_cnt = p->n_instructions, | |
88 | .license = PTR_TO_UINT64("GPL"), | |
89 | .log_buf = PTR_TO_UINT64(log_buf), | |
90 | .log_level = !!log_buf, | |
91 | .log_size = log_size, | |
92 | }; | |
93 | ||
94 | p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); | |
95 | if (p->kernel_fd < 0) | |
96 | return -errno; | |
97 | ||
98 | return 0; | |
99 | } | |
100 | ||
fab34748 KL |
101 | int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path) { |
102 | union bpf_attr attr; | |
103 | ||
104 | assert(p); | |
105 | ||
106 | if (p->kernel_fd >= 0) /* don't overwrite an assembled or loaded program */ | |
107 | return -EBUSY; | |
108 | ||
28abf5ad | 109 | zero(attr); |
fab34748 KL |
110 | attr = (union bpf_attr) { |
111 | .pathname = PTR_TO_UINT64(path), | |
112 | }; | |
113 | ||
114 | p->kernel_fd = bpf(BPF_OBJ_GET, &attr, sizeof(attr)); | |
115 | if (p->kernel_fd < 0) | |
116 | return -errno; | |
117 | ||
118 | return 0; | |
119 | } | |
120 | ||
9f2e6892 | 121 | int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) { |
aa2b6f1d | 122 | _cleanup_free_ char *copy = NULL; |
71e5200f DM |
123 | _cleanup_close_ int fd = -1; |
124 | union bpf_attr attr; | |
aa2b6f1d | 125 | int r; |
71e5200f DM |
126 | |
127 | assert(p); | |
128 | assert(type >= 0); | |
129 | assert(path); | |
130 | ||
aa2b6f1d LP |
131 | if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI)) |
132 | return -EINVAL; | |
133 | ||
134 | /* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's | |
135 | * refuse this early. */ | |
136 | if (p->attached_path) { | |
137 | if (!path_equal(p->attached_path, path)) | |
138 | return -EBUSY; | |
139 | if (p->attached_type != type) | |
140 | return -EBUSY; | |
141 | if (p->attached_flags != flags) | |
142 | return -EBUSY; | |
143 | ||
144 | /* Here's a shortcut: if we previously attached this program already, then we don't have to do so | |
145 | * again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have | |
146 | * replaced our program since the last time, hence let's reattach it again, just to be safe. In flags | |
147 | * == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags | |
148 | * == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours | |
149 | * would remain in effect. */ | |
150 | if (flags != BPF_F_ALLOW_OVERRIDE) | |
151 | return 0; | |
152 | } | |
153 | ||
154 | /* Ensure we have a kernel object for this. */ | |
155 | r = bpf_program_load_kernel(p, NULL, 0); | |
156 | if (r < 0) | |
157 | return r; | |
158 | ||
159 | copy = strdup(path); | |
160 | if (!copy) | |
161 | return -ENOMEM; | |
162 | ||
71e5200f DM |
163 | fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC); |
164 | if (fd < 0) | |
165 | return -errno; | |
166 | ||
28abf5ad | 167 | zero(attr); |
71e5200f DM |
168 | attr = (union bpf_attr) { |
169 | .attach_type = type, | |
170 | .target_fd = fd, | |
171 | .attach_bpf_fd = p->kernel_fd, | |
9f2e6892 | 172 | .attach_flags = flags, |
71e5200f DM |
173 | }; |
174 | ||
175 | if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0) | |
176 | return -errno; | |
177 | ||
aa2b6f1d LP |
178 | free_and_replace(p->attached_path, copy); |
179 | p->attached_type = type; | |
180 | p->attached_flags = flags; | |
181 | ||
71e5200f DM |
182 | return 0; |
183 | } | |
184 | ||
aa2b6f1d | 185 | int bpf_program_cgroup_detach(BPFProgram *p) { |
71e5200f | 186 | _cleanup_close_ int fd = -1; |
71e5200f | 187 | |
aa2b6f1d | 188 | assert(p); |
71e5200f | 189 | |
aa2b6f1d LP |
190 | if (!p->attached_path) |
191 | return -EUNATCH; | |
9b3c1897 | 192 | |
aa2b6f1d LP |
193 | fd = open(p->attached_path, O_DIRECTORY|O_RDONLY|O_CLOEXEC); |
194 | if (fd < 0) { | |
195 | if (errno != ENOENT) | |
196 | return -errno; | |
71e5200f | 197 | |
aa2b6f1d LP |
198 | /* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached |
199 | * implicitly by the removal, hence don't complain */ | |
71e5200f | 200 | |
aa2b6f1d LP |
201 | } else { |
202 | union bpf_attr attr; | |
203 | ||
28abf5ad | 204 | zero(attr); |
aa2b6f1d LP |
205 | attr = (union bpf_attr) { |
206 | .attach_type = p->attached_type, | |
207 | .target_fd = fd, | |
208 | .attach_bpf_fd = p->kernel_fd, | |
209 | }; | |
210 | ||
211 | if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0) | |
212 | return -errno; | |
213 | } | |
214 | ||
215 | p->attached_path = mfree(p->attached_path); | |
71e5200f DM |
216 | |
217 | return 0; | |
218 | } | |
219 | ||
220 | int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) { | |
221 | union bpf_attr attr = { | |
222 | .map_type = type, | |
223 | .key_size = key_size, | |
224 | .value_size = value_size, | |
225 | .max_entries = max_entries, | |
226 | .map_flags = flags, | |
227 | }; | |
228 | int fd; | |
229 | ||
230 | fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); | |
231 | if (fd < 0) | |
232 | return -errno; | |
233 | ||
234 | return fd; | |
235 | } | |
236 | ||
237 | int bpf_map_update_element(int fd, const void *key, void *value) { | |
238 | ||
239 | union bpf_attr attr = { | |
240 | .map_fd = fd, | |
241 | .key = PTR_TO_UINT64(key), | |
242 | .value = PTR_TO_UINT64(value), | |
243 | }; | |
244 | ||
245 | if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0) | |
246 | return -errno; | |
247 | ||
248 | return 0; | |
249 | } | |
250 | ||
251 | int bpf_map_lookup_element(int fd, const void *key, void *value) { | |
252 | ||
253 | union bpf_attr attr = { | |
254 | .map_fd = fd, | |
255 | .key = PTR_TO_UINT64(key), | |
256 | .value = PTR_TO_UINT64(value), | |
257 | }; | |
258 | ||
259 | if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0) | |
260 | return -errno; | |
261 | ||
262 | return 0; | |
263 | } |