]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/bpf-firewall.c
Add SPDX license identifiers to source files under the LGPL
[thirdparty/systemd.git] / src / core / bpf-firewall.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2016 Daniel Mack
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <arpa/inet.h>
22 #include <assert.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <linux/libbpf.h>
26 #include <net/ethernet.h>
27 #include <net/if.h>
28 #include <netinet/ip.h>
29 #include <netinet/ip6.h>
30 #include <stddef.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35
36 #include "alloc-util.h"
37 #include "bpf-firewall.h"
38 #include "bpf-program.h"
39 #include "fd-util.h"
40 #include "ip-address-access.h"
41 #include "unit.h"
42
43 enum {
44 MAP_KEY_PACKETS,
45 MAP_KEY_BYTES,
46 };
47
48 enum {
49 ACCESS_ALLOWED = 1,
50 ACCESS_DENIED = 2,
51 };
52
53 /* Compile instructions for one list of addresses, one direction and one specific verdict on matches. */
54
55 static int add_lookup_instructions(
56 BPFProgram *p,
57 int map_fd,
58 int protocol,
59 bool is_ingress,
60 int verdict) {
61
62 int r, addr_offset, addr_size;
63
64 assert(p);
65 assert(map_fd >= 0);
66
67 switch (protocol) {
68
69 case ETH_P_IP:
70 addr_size = sizeof(uint32_t);
71 addr_offset = is_ingress ?
72 offsetof(struct iphdr, saddr) :
73 offsetof(struct iphdr, daddr);
74 break;
75
76 case ETH_P_IPV6:
77 addr_size = 4 * sizeof(uint32_t);
78 addr_offset = is_ingress ?
79 offsetof(struct ip6_hdr, ip6_src.s6_addr) :
80 offsetof(struct ip6_hdr, ip6_dst.s6_addr);
81 break;
82
83 default:
84 return -EAFNOSUPPORT;
85 }
86
87 do {
88 /* Compare IPv4 with one word instruction (32bit) */
89 struct bpf_insn insn[] = {
90 /* If skb->protocol != ETH_P_IP, skip this whole block. The offset will be set later. */
91 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, htobe16(protocol), 0),
92
93 /*
94 * Call into BPF_FUNC_skb_load_bytes to load the dst/src IP address
95 *
96 * R1: Pointer to the skb
97 * R2: Data offset
98 * R3: Destination buffer on the stack (r10 - 4)
99 * R4: Number of bytes to read (4)
100 */
101
102 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
103 BPF_MOV32_IMM(BPF_REG_2, addr_offset),
104
105 BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
106 BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -addr_size),
107
108 BPF_MOV32_IMM(BPF_REG_4, addr_size),
109 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
110
111 /*
112 * Call into BPF_FUNC_map_lookup_elem to see if the address matches any entry in the
113 * LPM trie map. For this to work, the prefixlen field of 'struct bpf_lpm_trie_key'
114 * has to be set to the maximum possible value.
115 *
116 * On success, the looked up value is stored in R0. For this application, the actual
117 * value doesn't matter, however; we just set the bit in @verdict in R8 if we found any
118 * matching value.
119 */
120
121 BPF_LD_MAP_FD(BPF_REG_1, map_fd),
122 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
123 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -addr_size - sizeof(uint32_t)),
124 BPF_ST_MEM(BPF_W, BPF_REG_2, 0, addr_size * 8),
125
126 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
127 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
128 BPF_ALU32_IMM(BPF_OR, BPF_REG_8, verdict),
129 };
130
131 /* Jump label fixup */
132 insn[0].off = ELEMENTSOF(insn) - 1;
133
134 r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
135 if (r < 0)
136 return r;
137
138 } while (false);
139
140 return 0;
141 }
142
143 static int bpf_firewall_compile_bpf(
144 Unit *u,
145 bool is_ingress,
146 BPFProgram **ret) {
147
148 struct bpf_insn pre_insn[] = {
149 /*
150 * When the eBPF program is entered, R1 contains the address of the skb.
151 * However, R1-R5 are scratch registers that are not preserved when calling
152 * into kernel functions, so we need to save anything that's supposed to
153 * stay around to R6-R9. Save the skb to R6.
154 */
155 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
156
157 /*
158 * Although we cannot access the skb data directly from eBPF programs used in this
159 * scenario, the kernel has prepared some fields for us to access through struct __sk_buff.
160 * Load the protocol (IPv4, IPv6) used by the packet in flight once and cache it in R7
161 * for later use.
162 */
163 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct __sk_buff, protocol)),
164
165 /*
166 * R8 is used to keep track of whether any address check has explicitly allowed or denied the packet
167 * through ACCESS_DENIED or ACCESS_ALLOWED bits. Reset them both to 0 in the beginning.
168 */
169 BPF_MOV32_IMM(BPF_REG_8, 0),
170 };
171
172 /*
173 * The access checkers compiled for the configured allowance and denial lists
174 * write to R8 at runtime. The following code prepares for an early exit that
175 * skip the accounting if the packet is denied.
176 *
177 * R0 = 1
178 * if (R8 == ACCESS_DENIED)
179 * R0 = 0
180 *
181 * This means that if both ACCESS_DENIED and ACCESS_ALLOWED are set, the packet
182 * is allowed to pass.
183 */
184 struct bpf_insn post_insn[] = {
185 BPF_MOV64_IMM(BPF_REG_0, 1),
186 BPF_JMP_IMM(BPF_JNE, BPF_REG_8, ACCESS_DENIED, 1),
187 BPF_MOV64_IMM(BPF_REG_0, 0),
188 };
189
190 _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
191 int accounting_map_fd, r;
192 bool access_enabled;
193
194 assert(u);
195 assert(ret);
196
197 accounting_map_fd = is_ingress ?
198 u->ip_accounting_ingress_map_fd :
199 u->ip_accounting_egress_map_fd;
200
201 access_enabled =
202 u->ipv4_allow_map_fd >= 0 ||
203 u->ipv6_allow_map_fd >= 0 ||
204 u->ipv4_deny_map_fd >= 0 ||
205 u->ipv6_deny_map_fd >= 0;
206
207 if (accounting_map_fd < 0 && !access_enabled) {
208 *ret = NULL;
209 return 0;
210 }
211
212 r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p);
213 if (r < 0)
214 return r;
215
216 r = bpf_program_add_instructions(p, pre_insn, ELEMENTSOF(pre_insn));
217 if (r < 0)
218 return r;
219
220 if (access_enabled) {
221 /*
222 * The simple rule this function translates into eBPF instructions is:
223 *
224 * - Access will be granted when an address matches an entry in @list_allow
225 * - Otherwise, access will be denied when an address matches an entry in @list_deny
226 * - Otherwise, access will be granted
227 */
228
229 if (u->ipv4_deny_map_fd >= 0) {
230 r = add_lookup_instructions(p, u->ipv4_deny_map_fd, ETH_P_IP, is_ingress, ACCESS_DENIED);
231 if (r < 0)
232 return r;
233 }
234
235 if (u->ipv6_deny_map_fd >= 0) {
236 r = add_lookup_instructions(p, u->ipv6_deny_map_fd, ETH_P_IPV6, is_ingress, ACCESS_DENIED);
237 if (r < 0)
238 return r;
239 }
240
241 if (u->ipv4_allow_map_fd >= 0) {
242 r = add_lookup_instructions(p, u->ipv4_allow_map_fd, ETH_P_IP, is_ingress, ACCESS_ALLOWED);
243 if (r < 0)
244 return r;
245 }
246
247 if (u->ipv6_allow_map_fd >= 0) {
248 r = add_lookup_instructions(p, u->ipv6_allow_map_fd, ETH_P_IPV6, is_ingress, ACCESS_ALLOWED);
249 if (r < 0)
250 return r;
251 }
252 }
253
254 r = bpf_program_add_instructions(p, post_insn, ELEMENTSOF(post_insn));
255 if (r < 0)
256 return r;
257
258 if (accounting_map_fd >= 0) {
259 struct bpf_insn insn[] = {
260 /*
261 * If R0 == 0, the packet will be denied; skip the accounting instructions in this case.
262 * The jump label will be fixed up later.
263 */
264 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0),
265
266 /* Count packets */
267 BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */
268 BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
269 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
270 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
271 BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd), /* load map fd to r1 */
272 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
273 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
274 BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
275 BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
276
277 /* Count bytes */
278 BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */
279 BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
280 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
281 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
282 BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd),
283 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
284 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
285 BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */
286 BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
287
288 /* Allow the packet to pass */
289 BPF_MOV64_IMM(BPF_REG_0, 1),
290 };
291
292 /* Jump label fixup */
293 insn[0].off = ELEMENTSOF(insn) - 1;
294
295 r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
296 if (r < 0)
297 return r;
298 }
299
300 do {
301 /*
302 * Exit from the eBPF program, R0 contains the verdict.
303 * 0 means the packet is denied, 1 means the packet may pass.
304 */
305 struct bpf_insn insn[] = {
306 BPF_EXIT_INSN()
307 };
308
309 r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
310 if (r < 0)
311 return r;
312 } while (false);
313
314 *ret = p;
315 p = NULL;
316
317 return 0;
318 }
319
320 static int bpf_firewall_count_access_items(IPAddressAccessItem *list, size_t *n_ipv4, size_t *n_ipv6) {
321 IPAddressAccessItem *a;
322
323 assert(n_ipv4);
324 assert(n_ipv6);
325
326 LIST_FOREACH(items, a, list) {
327 switch (a->family) {
328
329 case AF_INET:
330 (*n_ipv4)++;
331 break;
332
333 case AF_INET6:
334 (*n_ipv6)++;
335 break;
336
337 default:
338 return -EAFNOSUPPORT;
339 }
340 }
341
342 return 0;
343 }
344
345 static int bpf_firewall_add_access_items(
346 IPAddressAccessItem *list,
347 int ipv4_map_fd,
348 int ipv6_map_fd,
349 int verdict) {
350
351 struct bpf_lpm_trie_key *key_ipv4, *key_ipv6;
352 uint64_t value = verdict;
353 IPAddressAccessItem *a;
354 int r;
355
356 key_ipv4 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t));
357 key_ipv6 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t) * 4);
358
359 LIST_FOREACH(items, a, list) {
360 switch (a->family) {
361
362 case AF_INET:
363 key_ipv4->prefixlen = a->prefixlen;
364 memcpy(key_ipv4->data, &a->address, sizeof(uint32_t));
365
366 r = bpf_map_update_element(ipv4_map_fd, key_ipv4, &value);
367 if (r < 0)
368 return r;
369
370 break;
371
372 case AF_INET6:
373 key_ipv6->prefixlen = a->prefixlen;
374 memcpy(key_ipv6->data, &a->address, 4 * sizeof(uint32_t));
375
376 r = bpf_map_update_element(ipv6_map_fd, key_ipv6, &value);
377 if (r < 0)
378 return r;
379
380 break;
381
382 default:
383 return -EAFNOSUPPORT;
384 }
385 }
386
387 return 0;
388 }
389
390 static int bpf_firewall_prepare_access_maps(
391 Unit *u,
392 int verdict,
393 int *ret_ipv4_map_fd,
394 int *ret_ipv6_map_fd) {
395
396 _cleanup_close_ int ipv4_map_fd = -1, ipv6_map_fd = -1;
397 size_t n_ipv4 = 0, n_ipv6 = 0;
398 Unit *p;
399 int r;
400
401 assert(ret_ipv4_map_fd);
402 assert(ret_ipv6_map_fd);
403
404 for (p = u; p; p = UNIT_DEREF(p->slice)) {
405 CGroupContext *cc;
406
407 cc = unit_get_cgroup_context(p);
408 if (!cc)
409 continue;
410
411 bpf_firewall_count_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny, &n_ipv4, &n_ipv6);
412 }
413
414 if (n_ipv4 > 0) {
415 ipv4_map_fd = bpf_map_new(
416 BPF_MAP_TYPE_LPM_TRIE,
417 offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t),
418 sizeof(uint64_t),
419 n_ipv4,
420 BPF_F_NO_PREALLOC);
421 if (ipv4_map_fd < 0)
422 return ipv4_map_fd;
423 }
424
425 if (n_ipv6 > 0) {
426 ipv6_map_fd = bpf_map_new(
427 BPF_MAP_TYPE_LPM_TRIE,
428 offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t)*4,
429 sizeof(uint64_t),
430 n_ipv6,
431 BPF_F_NO_PREALLOC);
432 if (ipv6_map_fd < 0)
433 return ipv6_map_fd;
434 }
435
436 for (p = u; p; p = UNIT_DEREF(p->slice)) {
437 CGroupContext *cc;
438
439 cc = unit_get_cgroup_context(p);
440 if (!cc)
441 continue;
442
443 r = bpf_firewall_add_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny,
444 ipv4_map_fd, ipv6_map_fd, verdict);
445 if (r < 0)
446 return r;
447 }
448
449 *ret_ipv4_map_fd = ipv4_map_fd;
450 *ret_ipv6_map_fd = ipv6_map_fd;
451
452 ipv4_map_fd = ipv6_map_fd = -1;
453 return 0;
454 }
455
456 static int bpf_firewall_prepare_accounting_maps(bool enabled, int *fd_ingress, int *fd_egress) {
457 int r;
458
459 assert(fd_ingress);
460 assert(fd_egress);
461
462 if (enabled) {
463 if (*fd_ingress < 0) {
464 r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
465 if (r < 0)
466 return r;
467
468 *fd_ingress = r;
469 }
470
471 if (*fd_egress < 0) {
472
473 r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
474 if (r < 0)
475 return r;
476
477 *fd_egress = r;
478 }
479 } else {
480 *fd_ingress = safe_close(*fd_ingress);
481 *fd_egress = safe_close(*fd_egress);
482 }
483
484 return 0;
485 }
486
487 int bpf_firewall_compile(Unit *u) {
488 CGroupContext *cc;
489 int r;
490
491 assert(u);
492
493 r = bpf_firewall_supported();
494 if (r < 0)
495 return r;
496 if (r == 0) {
497 log_debug("BPF firewalling not supported on this systemd, proceeding without.");
498 return -EOPNOTSUPP;
499 }
500
501 /* Note that when we compile a new firewall we first flush out the access maps and the BPF programs themselves,
502 * but we reuse the the accounting maps. That way the firewall in effect always maps to the actual
503 * configuration, but we don't flush out the accounting unnecessarily */
504
505 u->ip_bpf_ingress = bpf_program_unref(u->ip_bpf_ingress);
506 u->ip_bpf_egress = bpf_program_unref(u->ip_bpf_egress);
507
508 u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
509 u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
510
511 u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd);
512 u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
513
514 cc = unit_get_cgroup_context(u);
515 if (!cc)
516 return -EINVAL;
517
518 r = bpf_firewall_prepare_access_maps(u, ACCESS_ALLOWED, &u->ipv4_allow_map_fd, &u->ipv6_allow_map_fd);
519 if (r < 0)
520 return log_error_errno(r, "Preparation of eBPF allow maps failed: %m");
521
522 r = bpf_firewall_prepare_access_maps(u, ACCESS_DENIED, &u->ipv4_deny_map_fd, &u->ipv6_deny_map_fd);
523 if (r < 0)
524 return log_error_errno(r, "Preparation of eBPF deny maps failed: %m");
525
526 r = bpf_firewall_prepare_accounting_maps(cc->ip_accounting, &u->ip_accounting_ingress_map_fd, &u->ip_accounting_egress_map_fd);
527 if (r < 0)
528 return log_error_errno(r, "Preparation of eBPF accounting maps failed: %m");
529
530 r = bpf_firewall_compile_bpf(u, true, &u->ip_bpf_ingress);
531 if (r < 0)
532 return log_error_errno(r, "Compilation for ingress BPF program failed: %m");
533
534 r = bpf_firewall_compile_bpf(u, false, &u->ip_bpf_egress);
535 if (r < 0)
536 return log_error_errno(r, "Compilation for egress BPF program failed: %m");
537
538 return 0;
539 }
540
541 int bpf_firewall_install(Unit *u) {
542 _cleanup_free_ char *path = NULL;
543 CGroupContext *cc;
544 int r;
545
546 assert(u);
547
548 if (!u->cgroup_path)
549 return -EINVAL;
550
551 cc = unit_get_cgroup_context(u);
552 if (!cc)
553 return -EINVAL;
554
555 r = bpf_firewall_supported();
556 if (r < 0)
557 return r;
558 if (r == 0) {
559 log_debug("BPF firewalling not supported on this systemd, proceeding without.");
560 return -EOPNOTSUPP;
561 }
562
563 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
564 if (r < 0)
565 return log_error_errno(r, "Failed to determine cgroup path: %m");
566
567 if (u->ip_bpf_egress) {
568 r = bpf_program_load_kernel(u->ip_bpf_egress, NULL, 0);
569 if (r < 0)
570 return log_error_errno(r, "Kernel upload of egress BPF program failed: %m");
571
572 r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
573 if (r < 0)
574 return log_error_errno(r, "Attaching egress BPF program to cgroup %s failed: %m", path);
575 } else {
576 r = bpf_program_cgroup_detach(BPF_CGROUP_INET_EGRESS, path);
577 if (r < 0)
578 return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_ERR, r,
579 "Detaching egress BPF program from cgroup failed: %m");
580 }
581
582 if (u->ip_bpf_ingress) {
583 r = bpf_program_load_kernel(u->ip_bpf_ingress, NULL, 0);
584 if (r < 0)
585 return log_error_errno(r, "Kernel upload of ingress BPF program failed: %m");
586
587 r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
588 if (r < 0)
589 return log_error_errno(r, "Attaching ingress BPF program to cgroup %s failed: %m", path);
590 } else {
591 r = bpf_program_cgroup_detach(BPF_CGROUP_INET_INGRESS, path);
592 if (r < 0)
593 return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_ERR, r,
594 "Detaching ingress BPF program from cgroup failed: %m");
595 }
596
597 return 0;
598 }
599
600 int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets) {
601 uint64_t key, packets;
602 int r;
603
604 if (map_fd < 0)
605 return -EBADF;
606
607 if (ret_packets) {
608 key = MAP_KEY_PACKETS;
609 r = bpf_map_lookup_element(map_fd, &key, &packets);
610 if (r < 0)
611 return r;
612 }
613
614 if (ret_bytes) {
615 key = MAP_KEY_BYTES;
616 r = bpf_map_lookup_element(map_fd, &key, ret_bytes);
617 if (r < 0)
618 return r;
619 }
620
621 if (ret_packets)
622 *ret_packets = packets;
623
624 return 0;
625 }
626
627 int bpf_firewall_reset_accounting(int map_fd) {
628 uint64_t key, value = 0;
629 int r;
630
631 if (map_fd < 0)
632 return -EBADF;
633
634 key = MAP_KEY_PACKETS;
635 r = bpf_map_update_element(map_fd, &key, &value);
636 if (r < 0)
637 return r;
638
639 key = MAP_KEY_BYTES;
640 return bpf_map_update_element(map_fd, &key, &value);
641 }
642
643
644 int bpf_firewall_supported(void) {
645 struct bpf_insn trivial[] = {
646 BPF_MOV64_IMM(BPF_REG_0, 1),
647 BPF_EXIT_INSN()
648 };
649
650 _cleanup_(bpf_program_unrefp) BPFProgram *program = NULL;
651 static int supported = -1;
652 int fd, r;
653
654 /* Checks whether BPF firewalling is supported. For this, we check three things:
655 *
656 * a) whether we are privileged
657 * b) whether the unified hierarchy is being used
658 * c) the BPF implementation in the kernel supports BPF LPM TRIE maps, which we require
659 *
660 */
661
662 if (supported >= 0)
663 return supported;
664
665 if (geteuid() != 0) {
666 log_debug("Not enough privileges, BPF firewalling is not supported.");
667 return supported = false;
668 }
669
670 r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
671 if (r < 0)
672 return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
673 if (r == 0)
674 return supported = false;
675
676 fd = bpf_map_new(BPF_MAP_TYPE_LPM_TRIE,
677 offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint64_t),
678 sizeof(uint64_t),
679 1,
680 BPF_F_NO_PREALLOC);
681 if (fd < 0) {
682 log_debug_errno(r, "Can't allocate BPF LPM TRIE map, BPF firewalling is not supported: %m");
683 return supported = false;
684 }
685
686 safe_close(fd);
687
688 if (bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &program) < 0) {
689 log_debug_errno(r, "Can't allocate CGROUP SKB BPF program, BPF firewalling is not supported: %m");
690 return supported = false;
691 }
692
693 r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial));
694 if (r < 0) {
695 log_debug_errno(r, "Can't add trivial instructions to CGROUP SKB BPF program, BPF firewalling is not supported: %m");
696 return supported = false;
697 }
698
699 r = bpf_program_load_kernel(program, NULL, 0);
700 if (r < 0) {
701 log_debug_errno(r, "Can't load kernel CGROUP SKB BPF program, BPF firewalling is not supported: %m");
702 return supported = false;
703 }
704
705 return supported = true;
706 }