src/core/bpf-firewall.c

   1 /***
   2   This file is part of systemd.
   3
   4   Copyright 2016 Daniel Mack
   5
   6   systemd is free software; you can redistribute it and/or modify it
   7   under the terms of the GNU Lesser General Public License as published by
   8   the Free Software Foundation; either version 2.1 of the License, or
   9   (at your option) any later version.
  10
  11   systemd is distributed in the hope that it will be useful, but
  12   WITHOUT ANY WARRANTY; without even the implied warranty of
  13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14   Lesser General Public License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public License
  17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  18 ***/
  19
  20 #include <arpa/inet.h>
  21 #include <assert.h>
  22 #include <errno.h>
  23 #include <fcntl.h>
  24 #include <linux/libbpf.h>
  25 #include <net/ethernet.h>
  26 #include <net/if.h>
  27 #include <netinet/ip.h>
  28 #include <netinet/ip6.h>
  29 #include <stddef.h>
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <string.h>
  33 #include <unistd.h>
  34
  35 #include "alloc-util.h"
  36 #include "bpf-firewall.h"
  37 #include "bpf-program.h"
  38 #include "fd-util.h"
  39 #include "ip-address-access.h"
  40 #include "unit.h"
  41
  42 enum {
  43         MAP_KEY_PACKETS,
  44         MAP_KEY_BYTES,
  45 };
  46
  47 enum {
  48         ACCESS_ALLOWED = 1,
  49         ACCESS_DENIED  = 2,
  50 };
  51
  52 /* Compile instructions for one list of addresses, one direction and one specific verdict on matches. */
  53
  54 static int add_lookup_instructions(
  55                 BPFProgram *p,
  56                 int map_fd,
  57                 int protocol,
  58                 bool is_ingress,
  59                 int verdict) {
  60
  61         int r, addr_offset, addr_size;
  62
  63         assert(p);
  64         assert(map_fd >= 0);
  65
  66         switch (protocol) {
  67
  68         case ETH_P_IP:
  69                 addr_size = sizeof(uint32_t);
  70                 addr_offset = is_ingress ?
  71                         offsetof(struct iphdr, saddr) :
  72                         offsetof(struct iphdr, daddr);
  73                 break;
  74
  75         case ETH_P_IPV6:
  76                 addr_size = 4 * sizeof(uint32_t);
  77                 addr_offset = is_ingress ?
  78                         offsetof(struct ip6_hdr, ip6_src.s6_addr) :
  79                         offsetof(struct ip6_hdr, ip6_dst.s6_addr);
  80                 break;
  81
  82         default:
  83                 return -EAFNOSUPPORT;
  84         }
  85
  86         do {
  87                 /* Compare IPv4 with one word instruction (32bit) */
  88                 struct bpf_insn insn[] = {
  89                         /* If skb->protocol != ETH_P_IP, skip this whole block. The offset will be set later. */
  90                         BPF_JMP_IMM(BPF_JNE, BPF_REG_7, htobe16(protocol), 0),
  91
  92                         /*
  93                          * Call into BPF_FUNC_skb_load_bytes to load the dst/src IP address
  94                          *
  95                          * R1: Pointer to the skb
  96                          * R2: Data offset
  97                          * R3: Destination buffer on the stack (r10 - 4)
  98                          * R4: Number of bytes to read (4)
  99                          */
 100
 101                         BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
 102                         BPF_MOV32_IMM(BPF_REG_2, addr_offset),
 103
 104                         BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
 105                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -addr_size),
 106
 107                         BPF_MOV32_IMM(BPF_REG_4, addr_size),
 108                         BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
 109
 110                         /*
 111                          * Call into BPF_FUNC_map_lookup_elem to see if the address matches any entry in the
 112                          * LPM trie map. For this to work, the prefixlen field of 'struct bpf_lpm_trie_key'
 113                          * has to be set to the maximum possible value.
 114                          *
 115                          * On success, the looked up value is stored in R0. For this application, the actual
 116                          * value doesn't matter, however; we just set the bit in @verdict in R8 if we found any
 117                          * matching value.
 118                          */
 119
 120                         BPF_LD_MAP_FD(BPF_REG_1, map_fd),
 121                         BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 122                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -addr_size - sizeof(uint32_t)),
 123                         BPF_ST_MEM(BPF_W, BPF_REG_2, 0, addr_size * 8),
 124
 125                         BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 126                         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
 127                         BPF_ALU32_IMM(BPF_OR, BPF_REG_8, verdict),
 128                 };
 129
 130                 /* Jump label fixup */
 131                 insn[0].off = ELEMENTSOF(insn) - 1;
 132
 133                 r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
 134                 if (r < 0)
 135                         return r;
 136
 137         } while (false);
 138
 139         return 0;
 140 }
 141
 142 static int bpf_firewall_compile_bpf(
 143                 Unit *u,
 144                 bool is_ingress,
 145                 BPFProgram **ret) {
 146
 147         struct bpf_insn pre_insn[] = {
 148                 /*
 149                  * When the eBPF program is entered, R1 contains the address of the skb.
 150                  * However, R1-R5 are scratch registers that are not preserved when calling
 151                  * into kernel functions, so we need to save anything that's supposed to
 152                  * stay around to R6-R9. Save the skb to R6.
 153                  */
 154                 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
 155
 156                 /*
 157                  * Although we cannot access the skb data directly from eBPF programs used in this
 158                  * scenario, the kernel has prepared some fields for us to access through struct __sk_buff.
 159                  * Load the protocol (IPv4, IPv6) used by the packet in flight once and cache it in R7
 160                  * for later use.
 161                  */
 162                 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct __sk_buff, protocol)),
 163
 164                 /*
 165                  * R8 is used to keep track of whether any address check has explicitly allowed or denied the packet
 166                  * through ACCESS_DENIED or ACCESS_ALLOWED bits. Reset them both to 0 in the beginning.
 167                  */
 168                 BPF_MOV32_IMM(BPF_REG_8, 0),
 169         };
 170
 171         /*
 172          * The access checkers compiled for the configured allowance and denial lists
 173          * write to R8 at runtime. The following code prepares for an early exit that
 174          * skip the accounting if the packet is denied.
 175          *
 176          * R0 = 1
 177          * if (R8 == ACCESS_DENIED)
 178          *     R0 = 0
 179          *
 180          * This means that if both ACCESS_DENIED and ACCESS_ALLOWED are set, the packet
 181          * is allowed to pass.
 182          */
 183         struct bpf_insn post_insn[] = {
 184                 BPF_MOV64_IMM(BPF_REG_0, 1),
 185                 BPF_JMP_IMM(BPF_JNE, BPF_REG_8, ACCESS_DENIED, 1),
 186                 BPF_MOV64_IMM(BPF_REG_0, 0),
 187         };
 188
 189         _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
 190         int accounting_map_fd, r;
 191         bool access_enabled;
 192
 193         assert(u);
 194         assert(ret);
 195
 196         accounting_map_fd = is_ingress ?
 197                 u->ip_accounting_ingress_map_fd :
 198                 u->ip_accounting_egress_map_fd;
 199
 200         access_enabled =
 201                 u->ipv4_allow_map_fd >= 0 ||
 202                 u->ipv6_allow_map_fd >= 0 ||
 203                 u->ipv4_deny_map_fd >= 0 ||
 204                 u->ipv6_deny_map_fd >= 0;
 205
 206         if (accounting_map_fd < 0 && !access_enabled) {
 207                 *ret = NULL;
 208                 return 0;
 209         }
 210
 211         r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p);
 212         if (r < 0)
 213                 return r;
 214
 215         r = bpf_program_add_instructions(p, pre_insn, ELEMENTSOF(pre_insn));
 216         if (r < 0)
 217                 return r;
 218
 219         if (access_enabled) {
 220                 /*
 221                  * The simple rule this function translates into eBPF instructions is:
 222                  *
 223                  * - Access will be granted when an address matches an entry in @list_allow
 224                  * - Otherwise, access will be denied when an address matches an entry in @list_deny
 225                  * - Otherwise, access will be granted
 226                  */
 227
 228                 if (u->ipv4_deny_map_fd >= 0) {
 229                         r = add_lookup_instructions(p, u->ipv4_deny_map_fd, ETH_P_IP, is_ingress, ACCESS_DENIED);
 230                         if (r < 0)
 231                                 return r;
 232                 }
 233
 234                 if (u->ipv6_deny_map_fd >= 0) {
 235                         r = add_lookup_instructions(p, u->ipv6_deny_map_fd, ETH_P_IPV6, is_ingress, ACCESS_DENIED);
 236                         if (r < 0)
 237                                 return r;
 238                 }
 239
 240                 if (u->ipv4_allow_map_fd >= 0) {
 241                         r = add_lookup_instructions(p, u->ipv4_allow_map_fd, ETH_P_IP, is_ingress, ACCESS_ALLOWED);
 242                         if (r < 0)
 243                                 return r;
 244                 }
 245
 246                 if (u->ipv6_allow_map_fd >= 0) {
 247                         r = add_lookup_instructions(p, u->ipv6_allow_map_fd, ETH_P_IPV6, is_ingress, ACCESS_ALLOWED);
 248                         if (r < 0)
 249                                 return r;
 250                 }
 251         }
 252
 253         r = bpf_program_add_instructions(p, post_insn, ELEMENTSOF(post_insn));
 254         if (r < 0)
 255                 return r;
 256
 257         if (accounting_map_fd >= 0) {
 258                 struct bpf_insn insn[] = {
 259                         /*
 260                          * If R0 == 0, the packet will be denied; skip the accounting instructions in this case.
 261                          * The jump label will be fixed up later.
 262                          */
 263                         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0),
 264
 265                         /* Count packets */
 266                         BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */
 267                         BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
 268                         BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 269                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
 270                         BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd), /* load map fd to r1 */
 271                         BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 272                         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
 273                         BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
 274                         BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
 275
 276                         /* Count bytes */
 277                         BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */
 278                         BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
 279                         BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 280                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
 281                         BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd),
 282                         BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 283                         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
 284                         BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */
 285                         BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
 286
 287                         /* Allow the packet to pass */
 288                         BPF_MOV64_IMM(BPF_REG_0, 1),
 289                 };
 290
 291                 /* Jump label fixup */
 292                 insn[0].off = ELEMENTSOF(insn) - 1;
 293
 294                 r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
 295                 if (r < 0)
 296                         return r;
 297         }
 298
 299         do {
 300                 /*
 301                  * Exit from the eBPF program, R0 contains the verdict.
 302                  * 0 means the packet is denied, 1 means the packet may pass.
 303                  */
 304                 struct bpf_insn insn[] = {
 305                         BPF_EXIT_INSN()
 306                 };
 307
 308                 r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
 309                 if (r < 0)
 310                         return r;
 311         } while (false);
 312
 313         *ret = p;
 314         p = NULL;
 315
 316         return 0;
 317 }
 318
 319 static int bpf_firewall_count_access_items(IPAddressAccessItem *list, size_t *n_ipv4, size_t *n_ipv6) {
 320         IPAddressAccessItem *a;
 321
 322         assert(n_ipv4);
 323         assert(n_ipv6);
 324
 325         LIST_FOREACH(items, a, list) {
 326                 switch (a->family) {
 327
 328                 case AF_INET:
 329                         (*n_ipv4)++;
 330                         break;
 331
 332                 case AF_INET6:
 333                         (*n_ipv6)++;
 334                         break;
 335
 336                 default:
 337                         return -EAFNOSUPPORT;
 338                 }
 339         }
 340
 341         return 0;
 342 }
 343
 344 static int bpf_firewall_add_access_items(
 345                 IPAddressAccessItem *list,
 346                 int ipv4_map_fd,
 347                 int ipv6_map_fd,
 348                 int verdict) {
 349
 350         struct bpf_lpm_trie_key *key_ipv4, *key_ipv6;
 351         uint64_t value = verdict;
 352         IPAddressAccessItem *a;
 353         int r;
 354
 355         key_ipv4 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t));
 356         key_ipv6 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t) * 4);
 357
 358         LIST_FOREACH(items, a, list) {
 359                 switch (a->family) {
 360
 361                 case AF_INET:
 362                         key_ipv4->prefixlen = a->prefixlen;
 363                         memcpy(key_ipv4->data, &a->address, sizeof(uint32_t));
 364
 365                         r = bpf_map_update_element(ipv4_map_fd, key_ipv4, &value);
 366                         if (r < 0)
 367                                 return r;
 368
 369                         break;
 370
 371                 case AF_INET6:
 372                         key_ipv6->prefixlen = a->prefixlen;
 373                         memcpy(key_ipv6->data, &a->address, 4 * sizeof(uint32_t));
 374
 375                         r = bpf_map_update_element(ipv6_map_fd, key_ipv6, &value);
 376                         if (r < 0)
 377                                 return r;
 378
 379                         break;
 380
 381                 default:
 382                         return -EAFNOSUPPORT;
 383                 }
 384         }
 385
 386         return 0;
 387 }
 388
 389 static int bpf_firewall_prepare_access_maps(
 390                 Unit *u,
 391                 int verdict,
 392                 int *ret_ipv4_map_fd,
 393                 int *ret_ipv6_map_fd) {
 394
 395         _cleanup_close_ int ipv4_map_fd = -1, ipv6_map_fd = -1;
 396         size_t n_ipv4 = 0, n_ipv6 = 0;
 397         Unit *p;
 398         int r;
 399
 400         assert(ret_ipv4_map_fd);
 401         assert(ret_ipv6_map_fd);
 402
 403         for (p = u; p; p = UNIT_DEREF(p->slice)) {
 404                 CGroupContext *cc;
 405
 406                 cc = unit_get_cgroup_context(p);
 407                 if (!cc)
 408                         continue;
 409
 410                 bpf_firewall_count_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny, &n_ipv4, &n_ipv6);
 411         }
 412
 413         if (n_ipv4 > 0) {
 414                 ipv4_map_fd = bpf_map_new(
 415                                 BPF_MAP_TYPE_LPM_TRIE,
 416                                 offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t),
 417                                 sizeof(uint64_t),
 418                                 n_ipv4,
 419                                 BPF_F_NO_PREALLOC);
 420                 if (ipv4_map_fd < 0)
 421                         return ipv4_map_fd;
 422         }
 423
 424         if (n_ipv6 > 0) {
 425                 ipv6_map_fd = bpf_map_new(
 426                                 BPF_MAP_TYPE_LPM_TRIE,
 427                                 offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t)*4,
 428                                 sizeof(uint64_t),
 429                                 n_ipv6,
 430                                 BPF_F_NO_PREALLOC);
 431                 if (ipv6_map_fd < 0)
 432                         return ipv6_map_fd;
 433         }
 434
 435         for (p = u; p; p = UNIT_DEREF(p->slice)) {
 436                 CGroupContext *cc;
 437
 438                 cc = unit_get_cgroup_context(p);
 439                 if (!cc)
 440                         continue;
 441
 442                 r = bpf_firewall_add_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny,
 443                                                   ipv4_map_fd, ipv6_map_fd, verdict);
 444                 if (r < 0)
 445                         return r;
 446         }
 447
 448         *ret_ipv4_map_fd = ipv4_map_fd;
 449         *ret_ipv6_map_fd = ipv6_map_fd;
 450
 451         ipv4_map_fd = ipv6_map_fd = -1;
 452         return 0;
 453 }
 454
 455 static int bpf_firewall_prepare_accounting_maps(bool enabled, int *fd_ingress, int *fd_egress) {
 456         int r;
 457
 458         assert(fd_ingress);
 459         assert(fd_egress);
 460
 461         if (enabled) {
 462                 if (*fd_ingress < 0) {
 463                         r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
 464                         if (r < 0)
 465                                 return r;
 466
 467                         *fd_ingress = r;
 468                 }
 469
 470                 if (*fd_egress < 0) {
 471
 472                         r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
 473                         if (r < 0)
 474                                 return r;
 475
 476                         *fd_egress = r;
 477                 }
 478         } else {
 479                 *fd_ingress = safe_close(*fd_ingress);
 480                 *fd_egress = safe_close(*fd_egress);
 481         }
 482
 483         return 0;
 484 }
 485
 486 int bpf_firewall_compile(Unit *u) {
 487         CGroupContext *cc;
 488         int r;
 489
 490         assert(u);
 491
 492         r = bpf_firewall_supported();
 493         if (r < 0)
 494                 return r;
 495         if (r == 0) {
 496                 log_debug("BPF firewalling not supported on this systemd, proceeding without.");
 497                 return -EOPNOTSUPP;
 498         }
 499
 500         /* Note that when we compile a new firewall we first flush out the access maps and the BPF programs themselves,
 501          * but we reuse the the accounting maps. That way the firewall in effect always maps to the actual
 502          * configuration, but we don't flush out the accounting unnecessarily */
 503
 504         u->ip_bpf_ingress = bpf_program_unref(u->ip_bpf_ingress);
 505         u->ip_bpf_egress = bpf_program_unref(u->ip_bpf_egress);
 506
 507         u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
 508         u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
 509
 510         u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd);
 511         u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
 512
 513         cc = unit_get_cgroup_context(u);
 514         if (!cc)
 515                 return -EINVAL;
 516
 517         r = bpf_firewall_prepare_access_maps(u, ACCESS_ALLOWED, &u->ipv4_allow_map_fd, &u->ipv6_allow_map_fd);
 518         if (r < 0)
 519                 return log_error_errno(r, "Preparation of eBPF allow maps failed: %m");
 520
 521         r = bpf_firewall_prepare_access_maps(u, ACCESS_DENIED, &u->ipv4_deny_map_fd, &u->ipv6_deny_map_fd);
 522         if (r < 0)
 523                 return log_error_errno(r, "Preparation of eBPF deny maps failed: %m");
 524
 525         r = bpf_firewall_prepare_accounting_maps(cc->ip_accounting, &u->ip_accounting_ingress_map_fd, &u->ip_accounting_egress_map_fd);
 526         if (r < 0)
 527                 return log_error_errno(r, "Preparation of eBPF accounting maps failed: %m");
 528
 529         r = bpf_firewall_compile_bpf(u, true, &u->ip_bpf_ingress);
 530         if (r < 0)
 531                 return log_error_errno(r, "Compilation for ingress BPF program failed: %m");
 532
 533         r = bpf_firewall_compile_bpf(u, false, &u->ip_bpf_egress);
 534         if (r < 0)
 535                 return log_error_errno(r, "Compilation for egress BPF program failed: %m");
 536
 537         return 0;
 538 }
 539
 540 int bpf_firewall_install(Unit *u) {
 541         _cleanup_free_ char *path = NULL;
 542         CGroupContext *cc;
 543         int r;
 544
 545         assert(u);
 546
 547         if (!u->cgroup_path)
 548                 return -EINVAL;
 549
 550         cc = unit_get_cgroup_context(u);
 551         if (!cc)
 552                 return -EINVAL;
 553
 554         r = bpf_firewall_supported();
 555         if (r < 0)
 556                 return r;
 557         if (r == 0) {
 558                 log_debug("BPF firewalling not supported on this systemd, proceeding without.");
 559                 return -EOPNOTSUPP;
 560         }
 561
 562         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
 563         if (r < 0)
 564                 return log_error_errno(r, "Failed to determine cgroup path: %m");
 565
 566         if (u->ip_bpf_egress) {
 567                 r = bpf_program_load_kernel(u->ip_bpf_egress, NULL, 0);
 568                 if (r < 0)
 569                         return log_error_errno(r, "Kernel upload of egress BPF program failed: %m");
 570
 571                 r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
 572                 if (r < 0)
 573                         return log_error_errno(r, "Attaching egress BPF program to cgroup %s failed: %m", path);
 574         } else {
 575                 r = bpf_program_cgroup_detach(BPF_CGROUP_INET_EGRESS, path);
 576                 if (r < 0)
 577                         return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_ERR, r,
 578                                               "Detaching egress BPF program from cgroup failed: %m");
 579         }
 580
 581         if (u->ip_bpf_ingress) {
 582                 r = bpf_program_load_kernel(u->ip_bpf_ingress, NULL, 0);
 583                 if (r < 0)
 584                         return log_error_errno(r, "Kernel upload of ingress BPF program failed: %m");
 585
 586                 r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
 587                 if (r < 0)
 588                         return log_error_errno(r, "Attaching ingress BPF program to cgroup %s failed: %m", path);
 589         } else {
 590                 r = bpf_program_cgroup_detach(BPF_CGROUP_INET_INGRESS, path);
 591                 if (r < 0)
 592                         return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_ERR, r,
 593                                               "Detaching ingress BPF program from cgroup failed: %m");
 594         }
 595
 596         return 0;
 597 }
 598
 599 int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets) {
 600         uint64_t key, packets;
 601         int r;
 602
 603         if (map_fd < 0)
 604                 return -EBADF;
 605
 606         if (ret_packets) {
 607                 key = MAP_KEY_PACKETS;
 608                 r = bpf_map_lookup_element(map_fd, &key, &packets);
 609                 if (r < 0)
 610                         return r;
 611         }
 612
 613         if (ret_bytes) {
 614                 key = MAP_KEY_BYTES;
 615                 r = bpf_map_lookup_element(map_fd, &key, ret_bytes);
 616                 if (r < 0)
 617                         return r;
 618         }
 619
 620         if (ret_packets)
 621                 *ret_packets = packets;
 622
 623         return 0;
 624 }
 625
 626 int bpf_firewall_reset_accounting(int map_fd) {
 627         uint64_t key, value = 0;
 628         int r;
 629
 630         if (map_fd < 0)
 631                 return -EBADF;
 632
 633         key = MAP_KEY_PACKETS;
 634         r = bpf_map_update_element(map_fd, &key, &value);
 635         if (r < 0)
 636                 return r;
 637
 638         key = MAP_KEY_BYTES;
 639         return bpf_map_update_element(map_fd, &key, &value);
 640 }
 641
 642
 643 int bpf_firewall_supported(void) {
 644         struct bpf_insn trivial[] = {
 645                 BPF_MOV64_IMM(BPF_REG_0, 1),
 646                 BPF_EXIT_INSN()
 647         };
 648
 649         _cleanup_(bpf_program_unrefp) BPFProgram *program = NULL;
 650         static int supported = -1;
 651         int fd, r;
 652
 653         /* Checks whether BPF firewalling is supported. For this, we check three things:
 654          *
 655          * a) whether we are privileged
 656          * b) whether the unified hierarchy is being used
 657          * c) the BPF implementation in the kernel supports BPF LPM TRIE maps, which we require
 658          *
 659          */
 660
 661         if (supported >= 0)
 662                 return supported;
 663
 664         if (geteuid() != 0) {
 665                 log_debug("Not enough privileges, BPF firewalling is not supported.");
 666                 return supported = false;
 667         }
 668
 669         r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
 670         if (r < 0)
 671                 return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
 672         if (r == 0)
 673                 return supported = false;
 674
 675         fd = bpf_map_new(BPF_MAP_TYPE_LPM_TRIE,
 676                          offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint64_t),
 677                          sizeof(uint64_t),
 678                          1,
 679                          BPF_F_NO_PREALLOC);
 680         if (fd < 0) {
 681                 log_debug_errno(r, "Can't allocate BPF LPM TRIE map, BPF firewalling is not supported: %m");
 682                 return supported = false;
 683         }
 684
 685         safe_close(fd);
 686
 687         if (bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &program) < 0) {
 688                 log_debug_errno(r, "Can't allocate CGROUP SKB BPF program, BPF firewalling is not supported: %m");
 689                 return supported = false;
 690         }
 691
 692         r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial));
 693         if (r < 0) {
 694                 log_debug_errno(r, "Can't add trivial instructions to CGROUP SKB BPF program, BPF firewalling is not supported: %m");
 695                 return supported = false;
 696         }
 697
 698         r = bpf_program_load_kernel(program, NULL, 0);
 699         if (r < 0) {
 700                 log_debug_errno(r, "Can't load kernel CGROUP SKB BPF program, BPF firewalling is not supported: %m");
 701                 return supported = false;
 702         }
 703
 704         return supported = true;
 705 }