src/core/bpf-firewall.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2016 Daniel Mack
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #include <arpa/inet.h>
  22 #include <assert.h>
  23 #include <errno.h>
  24 #include <fcntl.h>
  25 #include <linux/libbpf.h>
  26 #include <net/ethernet.h>
  27 #include <net/if.h>
  28 #include <netinet/ip.h>
  29 #include <netinet/ip6.h>
  30 #include <stddef.h>
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33 #include <string.h>
  34 #include <unistd.h>
  35
  36 #include "alloc-util.h"
  37 #include "bpf-firewall.h"
  38 #include "bpf-program.h"
  39 #include "fd-util.h"
  40 #include "ip-address-access.h"
  41 #include "unit.h"
  42
  43 enum {
  44         MAP_KEY_PACKETS,
  45         MAP_KEY_BYTES,
  46 };
  47
  48 enum {
  49         ACCESS_ALLOWED = 1,
  50         ACCESS_DENIED  = 2,
  51 };
  52
  53 /* Compile instructions for one list of addresses, one direction and one specific verdict on matches. */
  54
  55 static int add_lookup_instructions(
  56                 BPFProgram *p,
  57                 int map_fd,
  58                 int protocol,
  59                 bool is_ingress,
  60                 int verdict) {
  61
  62         int r, addr_offset, addr_size;
  63
  64         assert(p);
  65         assert(map_fd >= 0);
  66
  67         switch (protocol) {
  68
  69         case ETH_P_IP:
  70                 addr_size = sizeof(uint32_t);
  71                 addr_offset = is_ingress ?
  72                         offsetof(struct iphdr, saddr) :
  73                         offsetof(struct iphdr, daddr);
  74                 break;
  75
  76         case ETH_P_IPV6:
  77                 addr_size = 4 * sizeof(uint32_t);
  78                 addr_offset = is_ingress ?
  79                         offsetof(struct ip6_hdr, ip6_src.s6_addr) :
  80                         offsetof(struct ip6_hdr, ip6_dst.s6_addr);
  81                 break;
  82
  83         default:
  84                 return -EAFNOSUPPORT;
  85         }
  86
  87         do {
  88                 /* Compare IPv4 with one word instruction (32bit) */
  89                 struct bpf_insn insn[] = {
  90                         /* If skb->protocol != ETH_P_IP, skip this whole block. The offset will be set later. */
  91                         BPF_JMP_IMM(BPF_JNE, BPF_REG_7, htobe16(protocol), 0),
  92
  93                         /*
  94                          * Call into BPF_FUNC_skb_load_bytes to load the dst/src IP address
  95                          *
  96                          * R1: Pointer to the skb
  97                          * R2: Data offset
  98                          * R3: Destination buffer on the stack (r10 - 4)
  99                          * R4: Number of bytes to read (4)
 100                          */
 101
 102                         BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
 103                         BPF_MOV32_IMM(BPF_REG_2, addr_offset),
 104
 105                         BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
 106                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -addr_size),
 107
 108                         BPF_MOV32_IMM(BPF_REG_4, addr_size),
 109                         BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
 110
 111                         /*
 112                          * Call into BPF_FUNC_map_lookup_elem to see if the address matches any entry in the
 113                          * LPM trie map. For this to work, the prefixlen field of 'struct bpf_lpm_trie_key'
 114                          * has to be set to the maximum possible value.
 115                          *
 116                          * On success, the looked up value is stored in R0. For this application, the actual
 117                          * value doesn't matter, however; we just set the bit in @verdict in R8 if we found any
 118                          * matching value.
 119                          */
 120
 121                         BPF_LD_MAP_FD(BPF_REG_1, map_fd),
 122                         BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 123                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -addr_size - sizeof(uint32_t)),
 124                         BPF_ST_MEM(BPF_W, BPF_REG_2, 0, addr_size * 8),
 125
 126                         BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 127                         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
 128                         BPF_ALU32_IMM(BPF_OR, BPF_REG_8, verdict),
 129                 };
 130
 131                 /* Jump label fixup */
 132                 insn[0].off = ELEMENTSOF(insn) - 1;
 133
 134                 r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
 135                 if (r < 0)
 136                         return r;
 137
 138         } while (false);
 139
 140         return 0;
 141 }
 142
 143 static int bpf_firewall_compile_bpf(
 144                 Unit *u,
 145                 bool is_ingress,
 146                 BPFProgram **ret) {
 147
 148         struct bpf_insn pre_insn[] = {
 149                 /*
 150                  * When the eBPF program is entered, R1 contains the address of the skb.
 151                  * However, R1-R5 are scratch registers that are not preserved when calling
 152                  * into kernel functions, so we need to save anything that's supposed to
 153                  * stay around to R6-R9. Save the skb to R6.
 154                  */
 155                 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
 156
 157                 /*
 158                  * Although we cannot access the skb data directly from eBPF programs used in this
 159                  * scenario, the kernel has prepared some fields for us to access through struct __sk_buff.
 160                  * Load the protocol (IPv4, IPv6) used by the packet in flight once and cache it in R7
 161                  * for later use.
 162                  */
 163                 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct __sk_buff, protocol)),
 164
 165                 /*
 166                  * R8 is used to keep track of whether any address check has explicitly allowed or denied the packet
 167                  * through ACCESS_DENIED or ACCESS_ALLOWED bits. Reset them both to 0 in the beginning.
 168                  */
 169                 BPF_MOV32_IMM(BPF_REG_8, 0),
 170         };
 171
 172         /*
 173          * The access checkers compiled for the configured allowance and denial lists
 174          * write to R8 at runtime. The following code prepares for an early exit that
 175          * skip the accounting if the packet is denied.
 176          *
 177          * R0 = 1
 178          * if (R8 == ACCESS_DENIED)
 179          *     R0 = 0
 180          *
 181          * This means that if both ACCESS_DENIED and ACCESS_ALLOWED are set, the packet
 182          * is allowed to pass.
 183          */
 184         struct bpf_insn post_insn[] = {
 185                 BPF_MOV64_IMM(BPF_REG_0, 1),
 186                 BPF_JMP_IMM(BPF_JNE, BPF_REG_8, ACCESS_DENIED, 1),
 187                 BPF_MOV64_IMM(BPF_REG_0, 0),
 188         };
 189
 190         _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
 191         int accounting_map_fd, r;
 192         bool access_enabled;
 193
 194         assert(u);
 195         assert(ret);
 196
 197         accounting_map_fd = is_ingress ?
 198                 u->ip_accounting_ingress_map_fd :
 199                 u->ip_accounting_egress_map_fd;
 200
 201         access_enabled =
 202                 u->ipv4_allow_map_fd >= 0 ||
 203                 u->ipv6_allow_map_fd >= 0 ||
 204                 u->ipv4_deny_map_fd >= 0 ||
 205                 u->ipv6_deny_map_fd >= 0;
 206
 207         if (accounting_map_fd < 0 && !access_enabled) {
 208                 *ret = NULL;
 209                 return 0;
 210         }
 211
 212         r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p);
 213         if (r < 0)
 214                 return r;
 215
 216         r = bpf_program_add_instructions(p, pre_insn, ELEMENTSOF(pre_insn));
 217         if (r < 0)
 218                 return r;
 219
 220         if (access_enabled) {
 221                 /*
 222                  * The simple rule this function translates into eBPF instructions is:
 223                  *
 224                  * - Access will be granted when an address matches an entry in @list_allow
 225                  * - Otherwise, access will be denied when an address matches an entry in @list_deny
 226                  * - Otherwise, access will be granted
 227                  */
 228
 229                 if (u->ipv4_deny_map_fd >= 0) {
 230                         r = add_lookup_instructions(p, u->ipv4_deny_map_fd, ETH_P_IP, is_ingress, ACCESS_DENIED);
 231                         if (r < 0)
 232                                 return r;
 233                 }
 234
 235                 if (u->ipv6_deny_map_fd >= 0) {
 236                         r = add_lookup_instructions(p, u->ipv6_deny_map_fd, ETH_P_IPV6, is_ingress, ACCESS_DENIED);
 237                         if (r < 0)
 238                                 return r;
 239                 }
 240
 241                 if (u->ipv4_allow_map_fd >= 0) {
 242                         r = add_lookup_instructions(p, u->ipv4_allow_map_fd, ETH_P_IP, is_ingress, ACCESS_ALLOWED);
 243                         if (r < 0)
 244                                 return r;
 245                 }
 246
 247                 if (u->ipv6_allow_map_fd >= 0) {
 248                         r = add_lookup_instructions(p, u->ipv6_allow_map_fd, ETH_P_IPV6, is_ingress, ACCESS_ALLOWED);
 249                         if (r < 0)
 250                                 return r;
 251                 }
 252         }
 253
 254         r = bpf_program_add_instructions(p, post_insn, ELEMENTSOF(post_insn));
 255         if (r < 0)
 256                 return r;
 257
 258         if (accounting_map_fd >= 0) {
 259                 struct bpf_insn insn[] = {
 260                         /*
 261                          * If R0 == 0, the packet will be denied; skip the accounting instructions in this case.
 262                          * The jump label will be fixed up later.
 263                          */
 264                         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0),
 265
 266                         /* Count packets */
 267                         BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */
 268                         BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
 269                         BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 270                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
 271                         BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd), /* load map fd to r1 */
 272                         BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 273                         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
 274                         BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
 275                         BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
 276
 277                         /* Count bytes */
 278                         BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */
 279                         BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
 280                         BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 281                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
 282                         BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd),
 283                         BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 284                         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
 285                         BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */
 286                         BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
 287
 288                         /* Allow the packet to pass */
 289                         BPF_MOV64_IMM(BPF_REG_0, 1),
 290                 };
 291
 292                 /* Jump label fixup */
 293                 insn[0].off = ELEMENTSOF(insn) - 1;
 294
 295                 r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
 296                 if (r < 0)
 297                         return r;
 298         }
 299
 300         do {
 301                 /*
 302                  * Exit from the eBPF program, R0 contains the verdict.
 303                  * 0 means the packet is denied, 1 means the packet may pass.
 304                  */
 305                 struct bpf_insn insn[] = {
 306                         BPF_EXIT_INSN()
 307                 };
 308
 309                 r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
 310                 if (r < 0)
 311                         return r;
 312         } while (false);
 313
 314         *ret = p;
 315         p = NULL;
 316
 317         return 0;
 318 }
 319
 320 static int bpf_firewall_count_access_items(IPAddressAccessItem *list, size_t *n_ipv4, size_t *n_ipv6) {
 321         IPAddressAccessItem *a;
 322
 323         assert(n_ipv4);
 324         assert(n_ipv6);
 325
 326         LIST_FOREACH(items, a, list) {
 327                 switch (a->family) {
 328
 329                 case AF_INET:
 330                         (*n_ipv4)++;
 331                         break;
 332
 333                 case AF_INET6:
 334                         (*n_ipv6)++;
 335                         break;
 336
 337                 default:
 338                         return -EAFNOSUPPORT;
 339                 }
 340         }
 341
 342         return 0;
 343 }
 344
 345 static int bpf_firewall_add_access_items(
 346                 IPAddressAccessItem *list,
 347                 int ipv4_map_fd,
 348                 int ipv6_map_fd,
 349                 int verdict) {
 350
 351         struct bpf_lpm_trie_key *key_ipv4, *key_ipv6;
 352         uint64_t value = verdict;
 353         IPAddressAccessItem *a;
 354         int r;
 355
 356         key_ipv4 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t));
 357         key_ipv6 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t) * 4);
 358
 359         LIST_FOREACH(items, a, list) {
 360                 switch (a->family) {
 361
 362                 case AF_INET:
 363                         key_ipv4->prefixlen = a->prefixlen;
 364                         memcpy(key_ipv4->data, &a->address, sizeof(uint32_t));
 365
 366                         r = bpf_map_update_element(ipv4_map_fd, key_ipv4, &value);
 367                         if (r < 0)
 368                                 return r;
 369
 370                         break;
 371
 372                 case AF_INET6:
 373                         key_ipv6->prefixlen = a->prefixlen;
 374                         memcpy(key_ipv6->data, &a->address, 4 * sizeof(uint32_t));
 375
 376                         r = bpf_map_update_element(ipv6_map_fd, key_ipv6, &value);
 377                         if (r < 0)
 378                                 return r;
 379
 380                         break;
 381
 382                 default:
 383                         return -EAFNOSUPPORT;
 384                 }
 385         }
 386
 387         return 0;
 388 }
 389
 390 static int bpf_firewall_prepare_access_maps(
 391                 Unit *u,
 392                 int verdict,
 393                 int *ret_ipv4_map_fd,
 394                 int *ret_ipv6_map_fd) {
 395
 396         _cleanup_close_ int ipv4_map_fd = -1, ipv6_map_fd = -1;
 397         size_t n_ipv4 = 0, n_ipv6 = 0;
 398         Unit *p;
 399         int r;
 400
 401         assert(ret_ipv4_map_fd);
 402         assert(ret_ipv6_map_fd);
 403
 404         for (p = u; p; p = UNIT_DEREF(p->slice)) {
 405                 CGroupContext *cc;
 406
 407                 cc = unit_get_cgroup_context(p);
 408                 if (!cc)
 409                         continue;
 410
 411                 bpf_firewall_count_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny, &n_ipv4, &n_ipv6);
 412         }
 413
 414         if (n_ipv4 > 0) {
 415                 ipv4_map_fd = bpf_map_new(
 416                                 BPF_MAP_TYPE_LPM_TRIE,
 417                                 offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t),
 418                                 sizeof(uint64_t),
 419                                 n_ipv4,
 420                                 BPF_F_NO_PREALLOC);
 421                 if (ipv4_map_fd < 0)
 422                         return ipv4_map_fd;
 423         }
 424
 425         if (n_ipv6 > 0) {
 426                 ipv6_map_fd = bpf_map_new(
 427                                 BPF_MAP_TYPE_LPM_TRIE,
 428                                 offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t)*4,
 429                                 sizeof(uint64_t),
 430                                 n_ipv6,
 431                                 BPF_F_NO_PREALLOC);
 432                 if (ipv6_map_fd < 0)
 433                         return ipv6_map_fd;
 434         }
 435
 436         for (p = u; p; p = UNIT_DEREF(p->slice)) {
 437                 CGroupContext *cc;
 438
 439                 cc = unit_get_cgroup_context(p);
 440                 if (!cc)
 441                         continue;
 442
 443                 r = bpf_firewall_add_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny,
 444                                                   ipv4_map_fd, ipv6_map_fd, verdict);
 445                 if (r < 0)
 446                         return r;
 447         }
 448
 449         *ret_ipv4_map_fd = ipv4_map_fd;
 450         *ret_ipv6_map_fd = ipv6_map_fd;
 451
 452         ipv4_map_fd = ipv6_map_fd = -1;
 453         return 0;
 454 }
 455
 456 static int bpf_firewall_prepare_accounting_maps(bool enabled, int *fd_ingress, int *fd_egress) {
 457         int r;
 458
 459         assert(fd_ingress);
 460         assert(fd_egress);
 461
 462         if (enabled) {
 463                 if (*fd_ingress < 0) {
 464                         r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
 465                         if (r < 0)
 466                                 return r;
 467
 468                         *fd_ingress = r;
 469                 }
 470
 471                 if (*fd_egress < 0) {
 472
 473                         r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
 474                         if (r < 0)
 475                                 return r;
 476
 477                         *fd_egress = r;
 478                 }
 479         } else {
 480                 *fd_ingress = safe_close(*fd_ingress);
 481                 *fd_egress = safe_close(*fd_egress);
 482         }
 483
 484         return 0;
 485 }
 486
 487 int bpf_firewall_compile(Unit *u) {
 488         CGroupContext *cc;
 489         int r;
 490
 491         assert(u);
 492
 493         r = bpf_firewall_supported();
 494         if (r < 0)
 495                 return r;
 496         if (r == 0) {
 497                 log_debug("BPF firewalling not supported on this systemd, proceeding without.");
 498                 return -EOPNOTSUPP;
 499         }
 500
 501         /* Note that when we compile a new firewall we first flush out the access maps and the BPF programs themselves,
 502          * but we reuse the the accounting maps. That way the firewall in effect always maps to the actual
 503          * configuration, but we don't flush out the accounting unnecessarily */
 504
 505         u->ip_bpf_ingress = bpf_program_unref(u->ip_bpf_ingress);
 506         u->ip_bpf_egress = bpf_program_unref(u->ip_bpf_egress);
 507
 508         u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
 509         u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
 510
 511         u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd);
 512         u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
 513
 514         cc = unit_get_cgroup_context(u);
 515         if (!cc)
 516                 return -EINVAL;
 517
 518         r = bpf_firewall_prepare_access_maps(u, ACCESS_ALLOWED, &u->ipv4_allow_map_fd, &u->ipv6_allow_map_fd);
 519         if (r < 0)
 520                 return log_error_errno(r, "Preparation of eBPF allow maps failed: %m");
 521
 522         r = bpf_firewall_prepare_access_maps(u, ACCESS_DENIED, &u->ipv4_deny_map_fd, &u->ipv6_deny_map_fd);
 523         if (r < 0)
 524                 return log_error_errno(r, "Preparation of eBPF deny maps failed: %m");
 525
 526         r = bpf_firewall_prepare_accounting_maps(cc->ip_accounting, &u->ip_accounting_ingress_map_fd, &u->ip_accounting_egress_map_fd);
 527         if (r < 0)
 528                 return log_error_errno(r, "Preparation of eBPF accounting maps failed: %m");
 529
 530         r = bpf_firewall_compile_bpf(u, true, &u->ip_bpf_ingress);
 531         if (r < 0)
 532                 return log_error_errno(r, "Compilation for ingress BPF program failed: %m");
 533
 534         r = bpf_firewall_compile_bpf(u, false, &u->ip_bpf_egress);
 535         if (r < 0)
 536                 return log_error_errno(r, "Compilation for egress BPF program failed: %m");
 537
 538         return 0;
 539 }
 540
 541 int bpf_firewall_install(Unit *u) {
 542         _cleanup_free_ char *path = NULL;
 543         CGroupContext *cc;
 544         int r;
 545
 546         assert(u);
 547
 548         if (!u->cgroup_path)
 549                 return -EINVAL;
 550
 551         cc = unit_get_cgroup_context(u);
 552         if (!cc)
 553                 return -EINVAL;
 554
 555         r = bpf_firewall_supported();
 556         if (r < 0)
 557                 return r;
 558         if (r == 0) {
 559                 log_debug("BPF firewalling not supported on this systemd, proceeding without.");
 560                 return -EOPNOTSUPP;
 561         }
 562
 563         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
 564         if (r < 0)
 565                 return log_error_errno(r, "Failed to determine cgroup path: %m");
 566
 567         if (u->ip_bpf_egress) {
 568                 r = bpf_program_load_kernel(u->ip_bpf_egress, NULL, 0);
 569                 if (r < 0)
 570                         return log_error_errno(r, "Kernel upload of egress BPF program failed: %m");
 571
 572                 r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
 573                 if (r < 0)
 574                         return log_error_errno(r, "Attaching egress BPF program to cgroup %s failed: %m", path);
 575         } else {
 576                 r = bpf_program_cgroup_detach(BPF_CGROUP_INET_EGRESS, path);
 577                 if (r < 0)
 578                         return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_ERR, r,
 579                                               "Detaching egress BPF program from cgroup failed: %m");
 580         }
 581
 582         if (u->ip_bpf_ingress) {
 583                 r = bpf_program_load_kernel(u->ip_bpf_ingress, NULL, 0);
 584                 if (r < 0)
 585                         return log_error_errno(r, "Kernel upload of ingress BPF program failed: %m");
 586
 587                 r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
 588                 if (r < 0)
 589                         return log_error_errno(r, "Attaching ingress BPF program to cgroup %s failed: %m", path);
 590         } else {
 591                 r = bpf_program_cgroup_detach(BPF_CGROUP_INET_INGRESS, path);
 592                 if (r < 0)
 593                         return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_ERR, r,
 594                                               "Detaching ingress BPF program from cgroup failed: %m");
 595         }
 596
 597         return 0;
 598 }
 599
 600 int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets) {
 601         uint64_t key, packets;
 602         int r;
 603
 604         if (map_fd < 0)
 605                 return -EBADF;
 606
 607         if (ret_packets) {
 608                 key = MAP_KEY_PACKETS;
 609                 r = bpf_map_lookup_element(map_fd, &key, &packets);
 610                 if (r < 0)
 611                         return r;
 612         }
 613
 614         if (ret_bytes) {
 615                 key = MAP_KEY_BYTES;
 616                 r = bpf_map_lookup_element(map_fd, &key, ret_bytes);
 617                 if (r < 0)
 618                         return r;
 619         }
 620
 621         if (ret_packets)
 622                 *ret_packets = packets;
 623
 624         return 0;
 625 }
 626
 627 int bpf_firewall_reset_accounting(int map_fd) {
 628         uint64_t key, value = 0;
 629         int r;
 630
 631         if (map_fd < 0)
 632                 return -EBADF;
 633
 634         key = MAP_KEY_PACKETS;
 635         r = bpf_map_update_element(map_fd, &key, &value);
 636         if (r < 0)
 637                 return r;
 638
 639         key = MAP_KEY_BYTES;
 640         return bpf_map_update_element(map_fd, &key, &value);
 641 }
 642
 643
 644 int bpf_firewall_supported(void) {
 645         struct bpf_insn trivial[] = {
 646                 BPF_MOV64_IMM(BPF_REG_0, 1),
 647                 BPF_EXIT_INSN()
 648         };
 649
 650         _cleanup_(bpf_program_unrefp) BPFProgram *program = NULL;
 651         static int supported = -1;
 652         int fd, r;
 653
 654         /* Checks whether BPF firewalling is supported. For this, we check three things:
 655          *
 656          * a) whether we are privileged
 657          * b) whether the unified hierarchy is being used
 658          * c) the BPF implementation in the kernel supports BPF LPM TRIE maps, which we require
 659          *
 660          */
 661
 662         if (supported >= 0)
 663                 return supported;
 664
 665         if (geteuid() != 0) {
 666                 log_debug("Not enough privileges, BPF firewalling is not supported.");
 667                 return supported = false;
 668         }
 669
 670         r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
 671         if (r < 0)
 672                 return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
 673         if (r == 0)
 674                 return supported = false;
 675
 676         fd = bpf_map_new(BPF_MAP_TYPE_LPM_TRIE,
 677                          offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint64_t),
 678                          sizeof(uint64_t),
 679                          1,
 680                          BPF_F_NO_PREALLOC);
 681         if (fd < 0) {
 682                 log_debug_errno(r, "Can't allocate BPF LPM TRIE map, BPF firewalling is not supported: %m");
 683                 return supported = false;
 684         }
 685
 686         safe_close(fd);
 687
 688         if (bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &program) < 0) {
 689                 log_debug_errno(r, "Can't allocate CGROUP SKB BPF program, BPF firewalling is not supported: %m");
 690                 return supported = false;
 691         }
 692
 693         r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial));
 694         if (r < 0) {
 695                 log_debug_errno(r, "Can't add trivial instructions to CGROUP SKB BPF program, BPF firewalling is not supported: %m");
 696                 return supported = false;
 697         }
 698
 699         r = bpf_program_load_kernel(program, NULL, 0);
 700         if (r < 0) {
 701                 log_debug_errno(r, "Can't load kernel CGROUP SKB BPF program, BPF firewalling is not supported: %m");
 702                 return supported = false;
 703         }
 704
 705         return supported = true;
 706 }