1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2016 Daniel Mack
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
21 #include <arpa/inet.h>
25 #include <linux/libbpf.h>
26 #include <net/ethernet.h>
28 #include <netinet/ip.h>
29 #include <netinet/ip6.h>
36 #include "alloc-util.h"
37 #include "bpf-firewall.h"
38 #include "bpf-program.h"
40 #include "ip-address-access.h"
53 /* Compile instructions for one list of addresses, one direction and one specific verdict on matches. */
55 static int add_lookup_instructions(
62 int r
, addr_offset
, addr_size
;
70 addr_size
= sizeof(uint32_t);
71 addr_offset
= is_ingress
?
72 offsetof(struct iphdr
, saddr
) :
73 offsetof(struct iphdr
, daddr
);
77 addr_size
= 4 * sizeof(uint32_t);
78 addr_offset
= is_ingress
?
79 offsetof(struct ip6_hdr
, ip6_src
.s6_addr
) :
80 offsetof(struct ip6_hdr
, ip6_dst
.s6_addr
);
88 /* Compare IPv4 with one word instruction (32bit) */
89 struct bpf_insn insn
[] = {
90 /* If skb->protocol != ETH_P_IP, skip this whole block. The offset will be set later. */
91 BPF_JMP_IMM(BPF_JNE
, BPF_REG_7
, htobe16(protocol
), 0),
94 * Call into BPF_FUNC_skb_load_bytes to load the dst/src IP address
96 * R1: Pointer to the skb
98 * R3: Destination buffer on the stack (r10 - 4)
99 * R4: Number of bytes to read (4)
102 BPF_MOV64_REG(BPF_REG_1
, BPF_REG_6
),
103 BPF_MOV32_IMM(BPF_REG_2
, addr_offset
),
105 BPF_MOV64_REG(BPF_REG_3
, BPF_REG_10
),
106 BPF_ALU64_IMM(BPF_ADD
, BPF_REG_3
, -addr_size
),
108 BPF_MOV32_IMM(BPF_REG_4
, addr_size
),
109 BPF_RAW_INSN(BPF_JMP
| BPF_CALL
, 0, 0, 0, BPF_FUNC_skb_load_bytes
),
112 * Call into BPF_FUNC_map_lookup_elem to see if the address matches any entry in the
113 * LPM trie map. For this to work, the prefixlen field of 'struct bpf_lpm_trie_key'
114 * has to be set to the maximum possible value.
116 * On success, the looked up value is stored in R0. For this application, the actual
117 * value doesn't matter, however; we just set the bit in @verdict in R8 if we found any
121 BPF_LD_MAP_FD(BPF_REG_1
, map_fd
),
122 BPF_MOV64_REG(BPF_REG_2
, BPF_REG_10
),
123 BPF_ALU64_IMM(BPF_ADD
, BPF_REG_2
, -addr_size
- sizeof(uint32_t)),
124 BPF_ST_MEM(BPF_W
, BPF_REG_2
, 0, addr_size
* 8),
126 BPF_RAW_INSN(BPF_JMP
| BPF_CALL
, 0, 0, 0, BPF_FUNC_map_lookup_elem
),
127 BPF_JMP_IMM(BPF_JEQ
, BPF_REG_0
, 0, 1),
128 BPF_ALU32_IMM(BPF_OR
, BPF_REG_8
, verdict
),
131 /* Jump label fixup */
132 insn
[0].off
= ELEMENTSOF(insn
) - 1;
134 r
= bpf_program_add_instructions(p
, insn
, ELEMENTSOF(insn
));
143 static int bpf_firewall_compile_bpf(
148 struct bpf_insn pre_insn
[] = {
150 * When the eBPF program is entered, R1 contains the address of the skb.
151 * However, R1-R5 are scratch registers that are not preserved when calling
152 * into kernel functions, so we need to save anything that's supposed to
153 * stay around to R6-R9. Save the skb to R6.
155 BPF_MOV64_REG(BPF_REG_6
, BPF_REG_1
),
158 * Although we cannot access the skb data directly from eBPF programs used in this
159 * scenario, the kernel has prepared some fields for us to access through struct __sk_buff.
160 * Load the protocol (IPv4, IPv6) used by the packet in flight once and cache it in R7
163 BPF_LDX_MEM(BPF_W
, BPF_REG_7
, BPF_REG_6
, offsetof(struct __sk_buff
, protocol
)),
166 * R8 is used to keep track of whether any address check has explicitly allowed or denied the packet
167 * through ACCESS_DENIED or ACCESS_ALLOWED bits. Reset them both to 0 in the beginning.
169 BPF_MOV32_IMM(BPF_REG_8
, 0),
173 * The access checkers compiled for the configured allowance and denial lists
174 * write to R8 at runtime. The following code prepares for an early exit that
175 * skip the accounting if the packet is denied.
178 * if (R8 == ACCESS_DENIED)
181 * This means that if both ACCESS_DENIED and ACCESS_ALLOWED are set, the packet
182 * is allowed to pass.
184 struct bpf_insn post_insn
[] = {
185 BPF_MOV64_IMM(BPF_REG_0
, 1),
186 BPF_JMP_IMM(BPF_JNE
, BPF_REG_8
, ACCESS_DENIED
, 1),
187 BPF_MOV64_IMM(BPF_REG_0
, 0),
190 _cleanup_(bpf_program_unrefp
) BPFProgram
*p
= NULL
;
191 int accounting_map_fd
, r
;
197 accounting_map_fd
= is_ingress
?
198 u
->ip_accounting_ingress_map_fd
:
199 u
->ip_accounting_egress_map_fd
;
202 u
->ipv4_allow_map_fd
>= 0 ||
203 u
->ipv6_allow_map_fd
>= 0 ||
204 u
->ipv4_deny_map_fd
>= 0 ||
205 u
->ipv6_deny_map_fd
>= 0;
207 if (accounting_map_fd
< 0 && !access_enabled
) {
212 r
= bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB
, &p
);
216 r
= bpf_program_add_instructions(p
, pre_insn
, ELEMENTSOF(pre_insn
));
220 if (access_enabled
) {
222 * The simple rule this function translates into eBPF instructions is:
224 * - Access will be granted when an address matches an entry in @list_allow
225 * - Otherwise, access will be denied when an address matches an entry in @list_deny
226 * - Otherwise, access will be granted
229 if (u
->ipv4_deny_map_fd
>= 0) {
230 r
= add_lookup_instructions(p
, u
->ipv4_deny_map_fd
, ETH_P_IP
, is_ingress
, ACCESS_DENIED
);
235 if (u
->ipv6_deny_map_fd
>= 0) {
236 r
= add_lookup_instructions(p
, u
->ipv6_deny_map_fd
, ETH_P_IPV6
, is_ingress
, ACCESS_DENIED
);
241 if (u
->ipv4_allow_map_fd
>= 0) {
242 r
= add_lookup_instructions(p
, u
->ipv4_allow_map_fd
, ETH_P_IP
, is_ingress
, ACCESS_ALLOWED
);
247 if (u
->ipv6_allow_map_fd
>= 0) {
248 r
= add_lookup_instructions(p
, u
->ipv6_allow_map_fd
, ETH_P_IPV6
, is_ingress
, ACCESS_ALLOWED
);
254 r
= bpf_program_add_instructions(p
, post_insn
, ELEMENTSOF(post_insn
));
258 if (accounting_map_fd
>= 0) {
259 struct bpf_insn insn
[] = {
261 * If R0 == 0, the packet will be denied; skip the accounting instructions in this case.
262 * The jump label will be fixed up later.
264 BPF_JMP_IMM(BPF_JEQ
, BPF_REG_0
, 0, 0),
267 BPF_MOV64_IMM(BPF_REG_0
, MAP_KEY_PACKETS
), /* r0 = 0 */
268 BPF_STX_MEM(BPF_W
, BPF_REG_10
, BPF_REG_0
, -4), /* *(u32 *)(fp - 4) = r0 */
269 BPF_MOV64_REG(BPF_REG_2
, BPF_REG_10
),
270 BPF_ALU64_IMM(BPF_ADD
, BPF_REG_2
, -4), /* r2 = fp - 4 */
271 BPF_LD_MAP_FD(BPF_REG_1
, accounting_map_fd
), /* load map fd to r1 */
272 BPF_RAW_INSN(BPF_JMP
| BPF_CALL
, 0, 0, 0, BPF_FUNC_map_lookup_elem
),
273 BPF_JMP_IMM(BPF_JEQ
, BPF_REG_0
, 0, 2),
274 BPF_MOV64_IMM(BPF_REG_1
, 1), /* r1 = 1 */
275 BPF_RAW_INSN(BPF_STX
| BPF_XADD
| BPF_DW
, BPF_REG_0
, BPF_REG_1
, 0, 0), /* xadd r0 += r1 */
278 BPF_MOV64_IMM(BPF_REG_0
, MAP_KEY_BYTES
), /* r0 = 1 */
279 BPF_STX_MEM(BPF_W
, BPF_REG_10
, BPF_REG_0
, -4), /* *(u32 *)(fp - 4) = r0 */
280 BPF_MOV64_REG(BPF_REG_2
, BPF_REG_10
),
281 BPF_ALU64_IMM(BPF_ADD
, BPF_REG_2
, -4), /* r2 = fp - 4 */
282 BPF_LD_MAP_FD(BPF_REG_1
, accounting_map_fd
),
283 BPF_RAW_INSN(BPF_JMP
| BPF_CALL
, 0, 0, 0, BPF_FUNC_map_lookup_elem
),
284 BPF_JMP_IMM(BPF_JEQ
, BPF_REG_0
, 0, 2),
285 BPF_LDX_MEM(BPF_W
, BPF_REG_1
, BPF_REG_6
, offsetof(struct __sk_buff
, len
)), /* r1 = skb->len */
286 BPF_RAW_INSN(BPF_STX
| BPF_XADD
| BPF_DW
, BPF_REG_0
, BPF_REG_1
, 0, 0), /* xadd r0 += r1 */
288 /* Allow the packet to pass */
289 BPF_MOV64_IMM(BPF_REG_0
, 1),
292 /* Jump label fixup */
293 insn
[0].off
= ELEMENTSOF(insn
) - 1;
295 r
= bpf_program_add_instructions(p
, insn
, ELEMENTSOF(insn
));
302 * Exit from the eBPF program, R0 contains the verdict.
303 * 0 means the packet is denied, 1 means the packet may pass.
305 struct bpf_insn insn
[] = {
309 r
= bpf_program_add_instructions(p
, insn
, ELEMENTSOF(insn
));
319 static int bpf_firewall_count_access_items(IPAddressAccessItem
*list
, size_t *n_ipv4
, size_t *n_ipv6
) {
320 IPAddressAccessItem
*a
;
325 LIST_FOREACH(items
, a
, list
) {
337 return -EAFNOSUPPORT
;
344 static int bpf_firewall_add_access_items(
345 IPAddressAccessItem
*list
,
350 struct bpf_lpm_trie_key
*key_ipv4
, *key_ipv6
;
351 uint64_t value
= verdict
;
352 IPAddressAccessItem
*a
;
355 key_ipv4
= alloca0(offsetof(struct bpf_lpm_trie_key
, data
) + sizeof(uint32_t));
356 key_ipv6
= alloca0(offsetof(struct bpf_lpm_trie_key
, data
) + sizeof(uint32_t) * 4);
358 LIST_FOREACH(items
, a
, list
) {
362 key_ipv4
->prefixlen
= a
->prefixlen
;
363 memcpy(key_ipv4
->data
, &a
->address
, sizeof(uint32_t));
365 r
= bpf_map_update_element(ipv4_map_fd
, key_ipv4
, &value
);
372 key_ipv6
->prefixlen
= a
->prefixlen
;
373 memcpy(key_ipv6
->data
, &a
->address
, 4 * sizeof(uint32_t));
375 r
= bpf_map_update_element(ipv6_map_fd
, key_ipv6
, &value
);
382 return -EAFNOSUPPORT
;
389 static int bpf_firewall_prepare_access_maps(
392 int *ret_ipv4_map_fd
,
393 int *ret_ipv6_map_fd
) {
395 _cleanup_close_
int ipv4_map_fd
= -1, ipv6_map_fd
= -1;
396 size_t n_ipv4
= 0, n_ipv6
= 0;
400 assert(ret_ipv4_map_fd
);
401 assert(ret_ipv6_map_fd
);
403 for (p
= u
; p
; p
= UNIT_DEREF(p
->slice
)) {
406 cc
= unit_get_cgroup_context(p
);
410 bpf_firewall_count_access_items(verdict
== ACCESS_ALLOWED
? cc
->ip_address_allow
: cc
->ip_address_deny
, &n_ipv4
, &n_ipv6
);
414 ipv4_map_fd
= bpf_map_new(
415 BPF_MAP_TYPE_LPM_TRIE
,
416 offsetof(struct bpf_lpm_trie_key
, data
) + sizeof(uint32_t),
425 ipv6_map_fd
= bpf_map_new(
426 BPF_MAP_TYPE_LPM_TRIE
,
427 offsetof(struct bpf_lpm_trie_key
, data
) + sizeof(uint32_t)*4,
435 for (p
= u
; p
; p
= UNIT_DEREF(p
->slice
)) {
438 cc
= unit_get_cgroup_context(p
);
442 r
= bpf_firewall_add_access_items(verdict
== ACCESS_ALLOWED
? cc
->ip_address_allow
: cc
->ip_address_deny
,
443 ipv4_map_fd
, ipv6_map_fd
, verdict
);
448 *ret_ipv4_map_fd
= ipv4_map_fd
;
449 *ret_ipv6_map_fd
= ipv6_map_fd
;
451 ipv4_map_fd
= ipv6_map_fd
= -1;
455 static int bpf_firewall_prepare_accounting_maps(Unit
*u
, bool enabled
, int *fd_ingress
, int *fd_egress
) {
463 if (*fd_ingress
< 0) {
464 r
= bpf_map_new(BPF_MAP_TYPE_ARRAY
, sizeof(int), sizeof(uint64_t), 2, 0);
471 if (*fd_egress
< 0) {
473 r
= bpf_map_new(BPF_MAP_TYPE_ARRAY
, sizeof(int), sizeof(uint64_t), 2, 0);
481 *fd_ingress
= safe_close(*fd_ingress
);
482 *fd_egress
= safe_close(*fd_egress
);
484 zero(u
->ip_accounting_extra
);
490 int bpf_firewall_compile(Unit
*u
) {
496 cc
= unit_get_cgroup_context(u
);
500 supported
= bpf_firewall_supported();
503 if (supported
== BPF_FIREWALL_UNSUPPORTED
) {
504 log_debug("BPF firewalling not supported on this manager, proceeding without.");
507 if (supported
!= BPF_FIREWALL_SUPPORTED_WITH_MULTI
&& u
->type
== UNIT_SLICE
) {
508 /* If BPF_F_ALLOW_MULTI is not supported we don't support any BPF magic on inner nodes (i.e. on slice
509 * units), since that would mean leaf nodes couldn't do any BPF anymore at all. Under the assumption
510 * that BPF is more interesting on leaf nodes we hence avoid it on inner nodes in that case. This is
511 * consistent with old systemd behaviour from before v238, where BPF wasn't supported in inner nodes at
513 log_debug("BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units.");
517 /* Note that when we compile a new firewall we first flush out the access maps and the BPF programs themselves,
518 * but we reuse the the accounting maps. That way the firewall in effect always maps to the actual
519 * configuration, but we don't flush out the accounting unnecessarily */
521 u
->ip_bpf_ingress
= bpf_program_unref(u
->ip_bpf_ingress
);
522 u
->ip_bpf_egress
= bpf_program_unref(u
->ip_bpf_egress
);
524 u
->ipv4_allow_map_fd
= safe_close(u
->ipv4_allow_map_fd
);
525 u
->ipv4_deny_map_fd
= safe_close(u
->ipv4_deny_map_fd
);
527 u
->ipv6_allow_map_fd
= safe_close(u
->ipv6_allow_map_fd
);
528 u
->ipv6_deny_map_fd
= safe_close(u
->ipv6_deny_map_fd
);
530 if (u
->type
!= UNIT_SLICE
) {
531 /* In inner nodes we only do accounting, we do not actually bother with access control. However, leaf
532 * nodes will incorporate all IP access rules set on all their parent nodes. This has the benefit that
533 * they can optionally cancel out system-wide rules. Since inner nodes can't contain processes this
534 * means that all configure IP access rules *will* take effect on processes, even though we never
535 * compile them for inner nodes. */
537 r
= bpf_firewall_prepare_access_maps(u
, ACCESS_ALLOWED
, &u
->ipv4_allow_map_fd
, &u
->ipv6_allow_map_fd
);
539 return log_error_errno(r
, "Preparation of eBPF allow maps failed: %m");
541 r
= bpf_firewall_prepare_access_maps(u
, ACCESS_DENIED
, &u
->ipv4_deny_map_fd
, &u
->ipv6_deny_map_fd
);
543 return log_error_errno(r
, "Preparation of eBPF deny maps failed: %m");
546 r
= bpf_firewall_prepare_accounting_maps(u
, cc
->ip_accounting
, &u
->ip_accounting_ingress_map_fd
, &u
->ip_accounting_egress_map_fd
);
548 return log_error_errno(r
, "Preparation of eBPF accounting maps failed: %m");
550 r
= bpf_firewall_compile_bpf(u
, true, &u
->ip_bpf_ingress
);
552 return log_error_errno(r
, "Compilation for ingress BPF program failed: %m");
554 r
= bpf_firewall_compile_bpf(u
, false, &u
->ip_bpf_egress
);
556 return log_error_errno(r
, "Compilation for egress BPF program failed: %m");
561 int bpf_firewall_install(Unit
*u
) {
562 _cleanup_free_
char *path
= NULL
;
569 cc
= unit_get_cgroup_context(u
);
574 if (!u
->cgroup_realized
)
577 supported
= bpf_firewall_supported();
580 if (supported
== BPF_FIREWALL_UNSUPPORTED
) {
581 log_debug("BPF firewalling not supported on this manager, proceeding without.");
584 if (supported
!= BPF_FIREWALL_SUPPORTED_WITH_MULTI
&& u
->type
== UNIT_SLICE
) {
585 log_debug("BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units.");
589 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, u
->cgroup_path
, NULL
, &path
);
591 return log_error_errno(r
, "Failed to determine cgroup path: %m");
593 flags
= (supported
== BPF_FIREWALL_SUPPORTED_WITH_MULTI
&&
594 (u
->type
== UNIT_SLICE
|| unit_cgroup_delegate(u
))) ? BPF_F_ALLOW_MULTI
: 0;
596 /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program, to
597 * minimize the time window when we don't account for IP traffic. */
598 u
->ip_bpf_egress_installed
= bpf_program_unref(u
->ip_bpf_egress_installed
);
599 u
->ip_bpf_ingress_installed
= bpf_program_unref(u
->ip_bpf_ingress_installed
);
601 if (u
->ip_bpf_egress
) {
602 r
= bpf_program_cgroup_attach(u
->ip_bpf_egress
, BPF_CGROUP_INET_EGRESS
, path
, flags
);
604 return log_error_errno(r
, "Attaching egress BPF program to cgroup %s failed: %m", path
);
606 /* Remember that this BPF program is installed now. */
607 u
->ip_bpf_egress_installed
= bpf_program_ref(u
->ip_bpf_egress
);
610 if (u
->ip_bpf_ingress
) {
611 r
= bpf_program_cgroup_attach(u
->ip_bpf_ingress
, BPF_CGROUP_INET_INGRESS
, path
, flags
);
613 return log_error_errno(r
, "Attaching ingress BPF program to cgroup %s failed: %m", path
);
615 u
->ip_bpf_ingress_installed
= bpf_program_ref(u
->ip_bpf_ingress
);
621 int bpf_firewall_read_accounting(int map_fd
, uint64_t *ret_bytes
, uint64_t *ret_packets
) {
622 uint64_t key
, packets
;
629 key
= MAP_KEY_PACKETS
;
630 r
= bpf_map_lookup_element(map_fd
, &key
, &packets
);
637 r
= bpf_map_lookup_element(map_fd
, &key
, ret_bytes
);
643 *ret_packets
= packets
;
648 int bpf_firewall_reset_accounting(int map_fd
) {
649 uint64_t key
, value
= 0;
655 key
= MAP_KEY_PACKETS
;
656 r
= bpf_map_update_element(map_fd
, &key
, &value
);
661 return bpf_map_update_element(map_fd
, &key
, &value
);
664 int bpf_firewall_supported(void) {
665 struct bpf_insn trivial
[] = {
666 BPF_MOV64_IMM(BPF_REG_0
, 1),
670 _cleanup_(bpf_program_unrefp
) BPFProgram
*program
= NULL
;
671 static int supported
= -1;
675 /* Checks whether BPF firewalling is supported. For this, we check five things:
677 * a) whether we are privileged
678 * b) whether the unified hierarchy is being used
679 * c) the BPF implementation in the kernel supports BPF LPM TRIE maps, which we require
680 * d) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_SKB programs, which we require
681 * e) the BPF implementation in the kernel supports the BPF_PROG_ATTACH call, which we require
688 if (geteuid() != 0) {
689 log_debug("Not enough privileges, BPF firewalling is not supported.");
690 return supported
= BPF_FIREWALL_UNSUPPORTED
;
693 r
= cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER
);
695 return log_error_errno(r
, "Can't determine whether the unified hierarchy is used: %m");
697 log_debug("Not running with unified cgroups, BPF firewalling is not supported.");
698 return supported
= BPF_FIREWALL_UNSUPPORTED
;
701 fd
= bpf_map_new(BPF_MAP_TYPE_LPM_TRIE
,
702 offsetof(struct bpf_lpm_trie_key
, data
) + sizeof(uint64_t),
707 log_debug_errno(r
, "Can't allocate BPF LPM TRIE map, BPF firewalling is not supported: %m");
708 return supported
= BPF_FIREWALL_UNSUPPORTED
;
713 if (bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB
, &program
) < 0) {
714 log_debug_errno(r
, "Can't allocate CGROUP SKB BPF program, BPF firewalling is not supported: %m");
715 return supported
= BPF_FIREWALL_UNSUPPORTED
;
718 r
= bpf_program_add_instructions(program
, trivial
, ELEMENTSOF(trivial
));
720 log_debug_errno(r
, "Can't add trivial instructions to CGROUP SKB BPF program, BPF firewalling is not supported: %m");
721 return supported
= BPF_FIREWALL_UNSUPPORTED
;
724 r
= bpf_program_load_kernel(program
, NULL
, 0);
726 log_debug_errno(r
, "Can't load kernel CGROUP SKB BPF program, BPF firewalling is not supported: %m");
727 return supported
= BPF_FIREWALL_UNSUPPORTED
;
730 /* Unfortunately the kernel allows us to create BPF_PROG_TYPE_CGROUP_SKB programs even when CONFIG_CGROUP_BPF
731 * is turned off at kernel compilation time. This sucks of course: why does it allow us to create a cgroup BPF
732 * program if we can't do a thing with it later?
734 * We detect this case by issuing the BPF_PROG_ATTACH bpf() call with invalid file descriptors: if
735 * CONFIG_CGROUP_BPF is turned off, then the call will fail early with EINVAL. If it is turned on the
736 * parameters are validated however, and that'll fail with EBADF then. */
738 attr
= (union bpf_attr
) {
739 .attach_type
= BPF_CGROUP_INET_EGRESS
,
744 r
= bpf(BPF_PROG_ATTACH
, &attr
, sizeof(attr
));
746 if (errno
!= EBADF
) {
747 log_debug_errno(errno
, "Didn't get EBADF from BPF_PROG_ATTACH, BPF firewalling is not supported: %m");
748 return supported
= BPF_FIREWALL_UNSUPPORTED
;
753 log_debug("Wut? Kernel accepted our invalid BPF_PROG_ATTACH call? Something is weird, assuming BPF firewalling is broken and hence not supported.");
754 return supported
= BPF_FIREWALL_UNSUPPORTED
;
757 /* So now we know that the BPF program is generally available, let's see if BPF_F_ALLOW_MULTI is also supported
758 * (which was added in kernel 4.15). We use a similar logic as before, but this time we use
759 * BPF_F_ALLOW_MULTI. Since the flags are checked early in the system call we'll get EINVAL if it's not
760 * supported, and EBADF as before if it is available. */
762 attr
= (union bpf_attr
) {
763 .attach_type
= BPF_CGROUP_INET_EGRESS
,
766 .attach_flags
= BPF_F_ALLOW_MULTI
,
769 r
= bpf(BPF_PROG_ATTACH
, &attr
, sizeof(attr
));
771 if (errno
== EBADF
) {
772 log_debug_errno(errno
, "Got EBADF when using BPF_F_ALLOW_MULTI, which indicates it is supported. Yay!");
773 return supported
= BPF_FIREWALL_SUPPORTED_WITH_MULTI
;
777 log_debug_errno(errno
, "Got EINVAL error when using BPF_F_ALLOW_MULTI, which indicates it's not supported.");
779 log_debug_errno(errno
, "Got unexpected error when using BPF_F_ALLOW_MULTI, assuming it's not supported: %m");
781 return supported
= BPF_FIREWALL_SUPPORTED
;
783 log_debug("Wut? Kernel accepted our invalid BPF_PROG_ATTACH+BPF_F_ALLOW_MULTI call? Something is weird, assuming BPF firewalling is broken and hence not supported.");
784 return supported
= BPF_FIREWALL_UNSUPPORTED
;