[thirdparty/systemd.git] / src / basic / bpf-program.c

/* SPDX-License-Identifier: LGPL-2.1+ */
/***
  This file is part of systemd.

  Copyright 2016 Daniel Mack

  systemd is free software; you can redistribute it and/or modify it
  under the terms of the GNU Lesser General Public License as published by
  the Free Software Foundation; either version 2.1 of the License, or
  (at your option) any later version.

  systemd is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public License
  along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/

#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#include "alloc-util.h"
#include "bpf-program.h"
#include "fd-util.h"
#include "log.h"
#include "missing.h"
#include "path-util.h"
#include "util.h"

int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
        _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;

        p = new0(BPFProgram, 1);
        if (!p)
                return log_oom();

        p->n_ref = 1;
        p->prog_type = prog_type;
        p->kernel_fd = -1;

        *ret = TAKE_PTR(p);

        return 0;
}

BPFProgram *bpf_program_ref(BPFProgram *p) {
        if (!p)
                return NULL;

        assert(p->n_ref > 0);
        p->n_ref++;

        return p;
}

BPFProgram *bpf_program_unref(BPFProgram *p) {
        if (!p)
                return NULL;

        assert(p->n_ref > 0);
        p->n_ref--;

        if (p->n_ref > 0)
                return NULL;

        /* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last
         * fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated
         * programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with
         * zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in
         * question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during
         * operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To
         * counter this, we track closely to which cgroup a program was attached to and will detach it on our own
         * whenever we close the BPF fd. */
        (void) bpf_program_cgroup_detach(p);

        safe_close(p->kernel_fd);
        free(p->instructions);
        free(p->attached_path);

        return mfree(p);
}

int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) {

        assert(p);

        if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */
                return -EBUSY;

        if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count))
                return -ENOMEM;

        memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
        p->n_instructions += count;

        return 0;
}

int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) {
        union bpf_attr attr;

        assert(p);

        if (p->kernel_fd >= 0) { /* make this idempotent */
                memzero(log_buf, log_size);
                return 0;
        }

        attr = (union bpf_attr) {
                .prog_type = p->prog_type,
                .insns = PTR_TO_UINT64(p->instructions),
                .insn_cnt = p->n_instructions,
                .license = PTR_TO_UINT64("GPL"),
                .log_buf = PTR_TO_UINT64(log_buf),
                .log_level = !!log_buf,
                .log_size = log_size,
        };

        p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
        if (p->kernel_fd < 0)
                return -errno;

        return 0;
}

int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) {
        _cleanup_free_ char *copy = NULL;
        _cleanup_close_ int fd = -1;
        union bpf_attr attr;
        int r;

        assert(p);
        assert(type >= 0);
        assert(path);

        if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI))
                return -EINVAL;

        /* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's
        * refuse this early. */
        if (p->attached_path) {
                if (!path_equal(p->attached_path, path))
                        return -EBUSY;
                if (p->attached_type != type)
                        return -EBUSY;
                if (p->attached_flags != flags)
                        return -EBUSY;

                /* Here's a shortcut: if we previously attached this program already, then we don't have to do so
                 * again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have
                 * replaced our program since the last time, hence let's reattach it again, just to be safe. In flags
                 * == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags
                 * == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours
                 * would remain in effect. */
                if (flags != BPF_F_ALLOW_OVERRIDE)
                        return 0;
        }

        /* Ensure we have a kernel object for this. */
        r = bpf_program_load_kernel(p, NULL, 0);
        if (r < 0)
                return r;

        copy = strdup(path);
        if (!copy)
                return -ENOMEM;

        fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
        if (fd < 0)
                return -errno;

        attr = (union bpf_attr) {
                .attach_type = type,
                .target_fd = fd,
                .attach_bpf_fd = p->kernel_fd,
                .attach_flags = flags,
        };

        if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
                return -errno;

        free_and_replace(p->attached_path, copy);
        p->attached_type = type;
        p->attached_flags = flags;

        return 0;
}

int bpf_program_cgroup_detach(BPFProgram *p) {
        _cleanup_close_ int fd = -1;

        assert(p);

        if (!p->attached_path)
                return -EUNATCH;

        fd = open(p->attached_path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
        if (fd < 0) {
                if (errno != ENOENT)
                        return -errno;

                /* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached
                 * implicitly by the removal, hence don't complain */

        } else {
                union bpf_attr attr;

                attr = (union bpf_attr) {
                        .attach_type = p->attached_type,
                        .target_fd = fd,
                        .attach_bpf_fd = p->kernel_fd,
                };

                if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
                        return -errno;
        }

        p->attached_path = mfree(p->attached_path);

        return 0;
}

int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
        union bpf_attr attr = {
                .map_type = type,
                .key_size = key_size,
                .value_size = value_size,
                .max_entries = max_entries,
                .map_flags = flags,
        };
        int fd;

        fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
        if (fd < 0)
                return -errno;

        return fd;
}

int bpf_map_update_element(int fd, const void *key, void *value) {

        union bpf_attr attr = {
                .map_fd = fd,
                .key = PTR_TO_UINT64(key),
                .value = PTR_TO_UINT64(value),
        };

        if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
                return -errno;

        return 0;
}

int bpf_map_lookup_element(int fd, const void *key, void *value) {

        union bpf_attr attr = {
                .map_fd = fd,
                .key = PTR_TO_UINT64(key),
                .value = PTR_TO_UINT64(value),
        };

        if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
                return -errno;

        return 0;
}
Commit	Line	Data
53e1b683	1	/* SPDX-License-Identifier: LGPL-2.1+ */
71e5200f DM	2	/***
	3	This file is part of systemd.
	4
	5	Copyright 2016 Daniel Mack
	6
	7	systemd is free software; you can redistribute it and/or modify it
	8	under the terms of the GNU Lesser General Public License as published by
	9	the Free Software Foundation; either version 2.1 of the License, or
	10	(at your option) any later version.
	11
	12	systemd is distributed in the hope that it will be useful, but
	13	WITHOUT ANY WARRANTY; without even the implied warranty of
	14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	15	Lesser General Public License for more details.
	16
	17	You should have received a copy of the GNU Lesser General Public License
	18	along with systemd; If not, see <http://www.gnu.org/licenses/>.
	19	***/
	20
	21	#include <fcntl.h>
	22	#include <sys/stat.h>
	23	#include <sys/types.h>
	24	#include <unistd.h>
	25
	26	#include "alloc-util.h"
	27	#include "bpf-program.h"
	28	#include "fd-util.h"
	29	#include "log.h"
	30	#include "missing.h"
aa2b6f1d	31	#include "path-util.h"
e0ad39fc	32	#include "util.h"
71e5200f DM	33
	34	int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
	35	_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
	36
	37	p = new0(BPFProgram, 1);
	38	if (!p)
	39	return log_oom();
	40
aa2b6f1d	41	p->n_ref = 1;
71e5200f DM	42	p->prog_type = prog_type;
	43	p->kernel_fd = -1;
	44
1cc6c93a YW	45	*ret = TAKE_PTR(p);
1cc6c93a YW	46
71e5200f DM	47	return 0;
	48	}
	49
aa2b6f1d LP	50	BPFProgram bpf_program_ref(BPFProgram p) {
	51	if (!p)
	52	return NULL;
	53
	54	assert(p->n_ref > 0);
	55	p->n_ref++;
	56
	57	return p;
	58	}
	59
71e5200f DM	60	BPFProgram bpf_program_unref(BPFProgram p) {
	61	if (!p)
	62	return NULL;
	63
aa2b6f1d LP	64	assert(p->n_ref > 0);
	65	p->n_ref--;
	66
	67	if (p->n_ref > 0)
	68	return NULL;
	69
	70	/* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last
	71	* fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated
	72	* programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with
	73	* zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in
	74	* question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during
	75	* operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To
	76	* counter this, we track closely to which cgroup a program was attached to and will detach it on our own
	77	* whenever we close the BPF fd. */
	78	(void) bpf_program_cgroup_detach(p);
	79
71e5200f DM	80	safe_close(p->kernel_fd);
71e5200f DM	81	free(p->instructions);
aa2b6f1d	82	free(p->attached_path);
71e5200f DM	83
	84	return mfree(p);
	85	}
	86
	87	int bpf_program_add_instructions(BPFProgram p, const struct bpf_insn instructions, size_t count) {
	88
	89	assert(p);
	90
e0ad39fc LP	91	if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */
	92	return -EBUSY;
	93
71e5200f DM	94	if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count))
	95	return -ENOMEM;
	96
	97	memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
	98	p->n_instructions += count;
	99
	100	return 0;
	101	}
	102
	103	int bpf_program_load_kernel(BPFProgram p, char log_buf, size_t log_size) {
	104	union bpf_attr attr;
	105
	106	assert(p);
	107
e0ad39fc LP	108	if (p->kernel_fd >= 0) { /* make this idempotent */
	109	memzero(log_buf, log_size);
	110	return 0;
	111	}
71e5200f DM	112
	113	attr = (union bpf_attr) {
	114	.prog_type = p->prog_type,
	115	.insns = PTR_TO_UINT64(p->instructions),
	116	.insn_cnt = p->n_instructions,
	117	.license = PTR_TO_UINT64("GPL"),
	118	.log_buf = PTR_TO_UINT64(log_buf),
	119	.log_level = !!log_buf,
	120	.log_size = log_size,
	121	};
	122
	123	p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
	124	if (p->kernel_fd < 0)
	125	return -errno;
	126
	127	return 0;
	128	}
	129
9f2e6892	130	int bpf_program_cgroup_attach(BPFProgram p, int type, const char path, uint32_t flags) {
aa2b6f1d	131	_cleanup_free_ char *copy = NULL;
71e5200f DM	132	_cleanup_close_ int fd = -1;
71e5200f DM	133	union bpf_attr attr;
aa2b6f1d	134	int r;
71e5200f DM	135
	136	assert(p);
	137	assert(type >= 0);
	138	assert(path);
	139
aa2b6f1d LP	140	if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI))
	141	return -EINVAL;
	142
	143	/* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's
	144	* refuse this early. */
	145	if (p->attached_path) {
	146	if (!path_equal(p->attached_path, path))
	147	return -EBUSY;
	148	if (p->attached_type != type)
	149	return -EBUSY;
	150	if (p->attached_flags != flags)
	151	return -EBUSY;
	152
	153	/* Here's a shortcut: if we previously attached this program already, then we don't have to do so
	154	* again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have
	155	* replaced our program since the last time, hence let's reattach it again, just to be safe. In flags
	156	* == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags
	157	* == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours
	158	* would remain in effect. */
	159	if (flags != BPF_F_ALLOW_OVERRIDE)
	160	return 0;
	161	}
	162
	163	/* Ensure we have a kernel object for this. */
	164	r = bpf_program_load_kernel(p, NULL, 0);
	165	if (r < 0)
	166	return r;
	167
	168	copy = strdup(path);
	169	if (!copy)
	170	return -ENOMEM;
	171
71e5200f DM	172	fd = open(path, O_DIRECTORY\|O_RDONLY\|O_CLOEXEC);
	173	if (fd < 0)
	174	return -errno;
	175
	176	attr = (union bpf_attr) {
	177	.attach_type = type,
	178	.target_fd = fd,
	179	.attach_bpf_fd = p->kernel_fd,
9f2e6892	180	.attach_flags = flags,
71e5200f DM	181	};
	182
	183	if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
	184	return -errno;
	185
aa2b6f1d LP	186	free_and_replace(p->attached_path, copy);
	187	p->attached_type = type;
	188	p->attached_flags = flags;
	189
71e5200f DM	190	return 0;
	191	}
	192
aa2b6f1d	193	int bpf_program_cgroup_detach(BPFProgram *p) {
71e5200f	194	_cleanup_close_ int fd = -1;
71e5200f	195
aa2b6f1d	196	assert(p);
71e5200f	197
aa2b6f1d LP	198	if (!p->attached_path)
aa2b6f1d LP	199	return -EUNATCH;
9b3c1897	200
aa2b6f1d LP	201	fd = open(p->attached_path, O_DIRECTORY\|O_RDONLY\|O_CLOEXEC);
	202	if (fd < 0) {
	203	if (errno != ENOENT)
	204	return -errno;
71e5200f	205
aa2b6f1d LP	206	/* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached
aa2b6f1d LP	207	* implicitly by the removal, hence don't complain */
71e5200f	208
aa2b6f1d LP	209	} else {
	210	union bpf_attr attr;
	211
	212	attr = (union bpf_attr) {
	213	.attach_type = p->attached_type,
	214	.target_fd = fd,
	215	.attach_bpf_fd = p->kernel_fd,
	216	};
	217
	218	if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
	219	return -errno;
	220	}
	221
	222	p->attached_path = mfree(p->attached_path);
71e5200f DM	223
	224	return 0;
	225	}
	226
	227	int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
	228	union bpf_attr attr = {
	229	.map_type = type,
	230	.key_size = key_size,
	231	.value_size = value_size,
	232	.max_entries = max_entries,
	233	.map_flags = flags,
	234	};
	235	int fd;
	236
	237	fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
	238	if (fd < 0)
	239	return -errno;
	240
	241	return fd;
	242	}
	243
	244	int bpf_map_update_element(int fd, const void key, void value) {
	245
	246	union bpf_attr attr = {
	247	.map_fd = fd,
	248	.key = PTR_TO_UINT64(key),
	249	.value = PTR_TO_UINT64(value),
	250	};
	251
	252	if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
	253	return -errno;
	254
	255	return 0;
	256	}
	257
	258	int bpf_map_lookup_element(int fd, const void key, void value) {
	259
	260	union bpf_attr attr = {
	261	.map_fd = fd,
	262	.key = PTR_TO_UINT64(key),
	263	.value = PTR_TO_UINT64(value),
	264	};
	265
	266	if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
	267	return -errno;
	268
	269	return 0;
	270	}