src/shared/bpf-program.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2
   3 #include <fcntl.h>
   4 #include <sys/stat.h>
   5 #include <sys/types.h>
   6 #include <unistd.h>
   7
   8 #include "alloc-util.h"
   9 #include "bpf-program.h"
  10 #include "fd-util.h"
  11 #include "log.h"
  12 #include "missing.h"
  13 #include "path-util.h"
  14 #include "util.h"
  15
  16 int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
  17         _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
  18
  19         p = new0(BPFProgram, 1);
  20         if (!p)
  21                 return log_oom();
  22
  23         p->n_ref = 1;
  24         p->prog_type = prog_type;
  25         p->kernel_fd = -1;
  26
  27         *ret = TAKE_PTR(p);
  28
  29         return 0;
  30 }
  31
  32 static BPFProgram *bpf_program_free(BPFProgram *p) {
  33         assert(p);
  34
  35         /* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last
  36          * fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated
  37          * programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with
  38          * zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in
  39          * question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during
  40          * operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To
  41          * counter this, we track closely to which cgroup a program was attached to and will detach it on our own
  42          * whenever we close the BPF fd. */
  43         (void) bpf_program_cgroup_detach(p);
  44
  45         safe_close(p->kernel_fd);
  46         free(p->instructions);
  47         free(p->attached_path);
  48
  49         return mfree(p);
  50 }
  51
  52 DEFINE_TRIVIAL_REF_UNREF_FUNC(BPFProgram, bpf_program, bpf_program_free);
  53
  54 int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) {
  55
  56         assert(p);
  57
  58         if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */
  59                 return -EBUSY;
  60
  61         if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count))
  62                 return -ENOMEM;
  63
  64         memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
  65         p->n_instructions += count;
  66
  67         return 0;
  68 }
  69
  70 int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) {
  71         union bpf_attr attr;
  72
  73         assert(p);
  74
  75         if (p->kernel_fd >= 0) { /* make this idempotent */
  76                 memzero(log_buf, log_size);
  77                 return 0;
  78         }
  79
  80         attr = (union bpf_attr) {
  81                 .prog_type = p->prog_type,
  82                 .insns = PTR_TO_UINT64(p->instructions),
  83                 .insn_cnt = p->n_instructions,
  84                 .license = PTR_TO_UINT64("GPL"),
  85                 .log_buf = PTR_TO_UINT64(log_buf),
  86                 .log_level = !!log_buf,
  87                 .log_size = log_size,
  88         };
  89
  90         p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
  91         if (p->kernel_fd < 0)
  92                 return -errno;
  93
  94         return 0;
  95 }
  96
  97 int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) {
  98         _cleanup_free_ char *copy = NULL;
  99         _cleanup_close_ int fd = -1;
 100         union bpf_attr attr;
 101         int r;
 102
 103         assert(p);
 104         assert(type >= 0);
 105         assert(path);
 106
 107         if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI))
 108                 return -EINVAL;
 109
 110         /* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's
 111         * refuse this early. */
 112         if (p->attached_path) {
 113                 if (!path_equal(p->attached_path, path))
 114                         return -EBUSY;
 115                 if (p->attached_type != type)
 116                         return -EBUSY;
 117                 if (p->attached_flags != flags)
 118                         return -EBUSY;
 119
 120                 /* Here's a shortcut: if we previously attached this program already, then we don't have to do so
 121                  * again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have
 122                  * replaced our program since the last time, hence let's reattach it again, just to be safe. In flags
 123                  * == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags
 124                  * == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours
 125                  * would remain in effect. */
 126                 if (flags != BPF_F_ALLOW_OVERRIDE)
 127                         return 0;
 128         }
 129
 130         /* Ensure we have a kernel object for this. */
 131         r = bpf_program_load_kernel(p, NULL, 0);
 132         if (r < 0)
 133                 return r;
 134
 135         copy = strdup(path);
 136         if (!copy)
 137                 return -ENOMEM;
 138
 139         fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
 140         if (fd < 0)
 141                 return -errno;
 142
 143         attr = (union bpf_attr) {
 144                 .attach_type = type,
 145                 .target_fd = fd,
 146                 .attach_bpf_fd = p->kernel_fd,
 147                 .attach_flags = flags,
 148         };
 149
 150         if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
 151                 return -errno;
 152
 153         free_and_replace(p->attached_path, copy);
 154         p->attached_type = type;
 155         p->attached_flags = flags;
 156
 157         return 0;
 158 }
 159
 160 int bpf_program_cgroup_detach(BPFProgram *p) {
 161         _cleanup_close_ int fd = -1;
 162
 163         assert(p);
 164
 165         if (!p->attached_path)
 166                 return -EUNATCH;
 167
 168         fd = open(p->attached_path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
 169         if (fd < 0) {
 170                 if (errno != ENOENT)
 171                         return -errno;
 172
 173                 /* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached
 174                  * implicitly by the removal, hence don't complain */
 175
 176         } else {
 177                 union bpf_attr attr;
 178
 179                 attr = (union bpf_attr) {
 180                         .attach_type = p->attached_type,
 181                         .target_fd = fd,
 182                         .attach_bpf_fd = p->kernel_fd,
 183                 };
 184
 185                 if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
 186                         return -errno;
 187         }
 188
 189         p->attached_path = mfree(p->attached_path);
 190
 191         return 0;
 192 }
 193
 194 int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
 195         union bpf_attr attr = {
 196                 .map_type = type,
 197                 .key_size = key_size,
 198                 .value_size = value_size,
 199                 .max_entries = max_entries,
 200                 .map_flags = flags,
 201         };
 202         int fd;
 203
 204         fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
 205         if (fd < 0)
 206                 return -errno;
 207
 208         return fd;
 209 }
 210
 211 int bpf_map_update_element(int fd, const void *key, void *value) {
 212
 213         union bpf_attr attr = {
 214                 .map_fd = fd,
 215                 .key = PTR_TO_UINT64(key),
 216                 .value = PTR_TO_UINT64(value),
 217         };
 218
 219         if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
 220                 return -errno;
 221
 222         return 0;
 223 }
 224
 225 int bpf_map_lookup_element(int fd, const void *key, void *value) {
 226
 227         union bpf_attr attr = {
 228                 .map_fd = fd,
 229                 .key = PTR_TO_UINT64(key),
 230                 .value = PTR_TO_UINT64(value),
 231         };
 232
 233         if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
 234                 return -errno;
 235
 236         return 0;
 237 }