[thirdparty/systemd.git] / src / shared / bpf-program.c

/* SPDX-License-Identifier: LGPL-2.1+ */

#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#include "alloc-util.h"
#include "bpf-program.h"
#include "fd-util.h"
#include "log.h"
#include "memory-util.h"
#include "missing.h"
#include "path-util.h"

int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
        _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;

        p = new0(BPFProgram, 1);
        if (!p)
                return log_oom();

        p->n_ref = 1;
        p->prog_type = prog_type;
        p->kernel_fd = -1;

        *ret = TAKE_PTR(p);

        return 0;
}

static BPFProgram *bpf_program_free(BPFProgram *p) {
        assert(p);

        /* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last
         * fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated
         * programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with
         * zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in
         * question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during
         * operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To
         * counter this, we track closely to which cgroup a program was attached to and will detach it on our own
         * whenever we close the BPF fd. */
        (void) bpf_program_cgroup_detach(p);

        safe_close(p->kernel_fd);
        free(p->instructions);
        free(p->attached_path);

        return mfree(p);
}

DEFINE_TRIVIAL_REF_UNREF_FUNC(BPFProgram, bpf_program, bpf_program_free);

int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) {

        assert(p);

        if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */
                return -EBUSY;

        if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count))
                return -ENOMEM;

        memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
        p->n_instructions += count;

        return 0;
}

int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) {
        union bpf_attr attr;

        assert(p);

        if (p->kernel_fd >= 0) { /* make this idempotent */
                memzero(log_buf, log_size);
                return 0;
        }

        attr = (union bpf_attr) {
                .prog_type = p->prog_type,
                .insns = PTR_TO_UINT64(p->instructions),
                .insn_cnt = p->n_instructions,
                .license = PTR_TO_UINT64("GPL"),
                .log_buf = PTR_TO_UINT64(log_buf),
                .log_level = !!log_buf,
                .log_size = log_size,
        };

        p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
        if (p->kernel_fd < 0)
                return -errno;

        return 0;
}

int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) {
        _cleanup_free_ char *copy = NULL;
        _cleanup_close_ int fd = -1;
        union bpf_attr attr;
        int r;

        assert(p);
        assert(type >= 0);
        assert(path);

        if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI))
                return -EINVAL;

        /* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's
        * refuse this early. */
        if (p->attached_path) {
                if (!path_equal(p->attached_path, path))
                        return -EBUSY;
                if (p->attached_type != type)
                        return -EBUSY;
                if (p->attached_flags != flags)
                        return -EBUSY;

                /* Here's a shortcut: if we previously attached this program already, then we don't have to do so
                 * again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have
                 * replaced our program since the last time, hence let's reattach it again, just to be safe. In flags
                 * == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags
                 * == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours
                 * would remain in effect. */
                if (flags != BPF_F_ALLOW_OVERRIDE)
                        return 0;
        }

        /* Ensure we have a kernel object for this. */
        r = bpf_program_load_kernel(p, NULL, 0);
        if (r < 0)
                return r;

        copy = strdup(path);
        if (!copy)
                return -ENOMEM;

        fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
        if (fd < 0)
                return -errno;

        attr = (union bpf_attr) {
                .attach_type = type,
                .target_fd = fd,
                .attach_bpf_fd = p->kernel_fd,
                .attach_flags = flags,
        };

        if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
                return -errno;

        free_and_replace(p->attached_path, copy);
        p->attached_type = type;
        p->attached_flags = flags;

        return 0;
}

int bpf_program_cgroup_detach(BPFProgram *p) {
        _cleanup_close_ int fd = -1;

        assert(p);

        if (!p->attached_path)
                return -EUNATCH;

        fd = open(p->attached_path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
        if (fd < 0) {
                if (errno != ENOENT)
                        return -errno;

                /* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached
                 * implicitly by the removal, hence don't complain */

        } else {
                union bpf_attr attr;

                attr = (union bpf_attr) {
                        .attach_type = p->attached_type,
                        .target_fd = fd,
                        .attach_bpf_fd = p->kernel_fd,
                };

                if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
                        return -errno;
        }

        p->attached_path = mfree(p->attached_path);

        return 0;
}

int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
        union bpf_attr attr = {
                .map_type = type,
                .key_size = key_size,
                .value_size = value_size,
                .max_entries = max_entries,
                .map_flags = flags,
        };
        int fd;

        fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
        if (fd < 0)
                return -errno;

        return fd;
}

int bpf_map_update_element(int fd, const void *key, void *value) {

        union bpf_attr attr = {
                .map_fd = fd,
                .key = PTR_TO_UINT64(key),
                .value = PTR_TO_UINT64(value),
        };

        if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
                return -errno;

        return 0;
}

int bpf_map_lookup_element(int fd, const void *key, void *value) {

        union bpf_attr attr = {
                .map_fd = fd,
                .key = PTR_TO_UINT64(key),
                .value = PTR_TO_UINT64(value),
        };

        if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
                return -errno;

        return 0;
}
Commit	Line	Data
53e1b683	1	/* SPDX-License-Identifier: LGPL-2.1+ */
71e5200f DM	2
	3	#include <fcntl.h>
	4	#include <sys/stat.h>
	5	#include <sys/types.h>
	6	#include <unistd.h>
	7
	8	#include "alloc-util.h"
	9	#include "bpf-program.h"
	10	#include "fd-util.h"
	11	#include "log.h"
0a970718	12	#include "memory-util.h"
71e5200f	13	#include "missing.h"
aa2b6f1d	14	#include "path-util.h"
71e5200f DM	15
	16	int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
	17	_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
	18
	19	p = new0(BPFProgram, 1);
	20	if (!p)
	21	return log_oom();
	22
aa2b6f1d	23	p->n_ref = 1;
71e5200f DM	24	p->prog_type = prog_type;
	25	p->kernel_fd = -1;
	26
1cc6c93a YW	27	*ret = TAKE_PTR(p);
1cc6c93a YW	28
71e5200f DM	29	return 0;
	30	}
	31
8301aa0b YW	32	static BPFProgram bpf_program_free(BPFProgram p) {
8301aa0b YW	33	assert(p);
aa2b6f1d LP	34
	35	/* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last
	36	* fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated
	37	* programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with
	38	* zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in
	39	* question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during
	40	* operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To
	41	* counter this, we track closely to which cgroup a program was attached to and will detach it on our own
	42	* whenever we close the BPF fd. */
	43	(void) bpf_program_cgroup_detach(p);
	44
71e5200f DM	45	safe_close(p->kernel_fd);
71e5200f DM	46	free(p->instructions);
aa2b6f1d	47	free(p->attached_path);
71e5200f DM	48
	49	return mfree(p);
	50	}
	51
8301aa0b YW	52	DEFINE_TRIVIAL_REF_UNREF_FUNC(BPFProgram, bpf_program, bpf_program_free);
8301aa0b YW	53
71e5200f DM	54	int bpf_program_add_instructions(BPFProgram p, const struct bpf_insn instructions, size_t count) {
	55
	56	assert(p);
	57
e0ad39fc LP	58	if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */
	59	return -EBUSY;
	60
71e5200f DM	61	if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count))
	62	return -ENOMEM;
	63
	64	memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
	65	p->n_instructions += count;
	66
	67	return 0;
	68	}
	69
	70	int bpf_program_load_kernel(BPFProgram p, char log_buf, size_t log_size) {
	71	union bpf_attr attr;
	72
	73	assert(p);
	74
e0ad39fc LP	75	if (p->kernel_fd >= 0) { /* make this idempotent */
	76	memzero(log_buf, log_size);
	77	return 0;
	78	}
71e5200f DM	79
	80	attr = (union bpf_attr) {
	81	.prog_type = p->prog_type,
	82	.insns = PTR_TO_UINT64(p->instructions),
	83	.insn_cnt = p->n_instructions,
	84	.license = PTR_TO_UINT64("GPL"),
	85	.log_buf = PTR_TO_UINT64(log_buf),
	86	.log_level = !!log_buf,
	87	.log_size = log_size,
	88	};
	89
	90	p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
	91	if (p->kernel_fd < 0)
	92	return -errno;
	93
	94	return 0;
	95	}
	96
9f2e6892	97	int bpf_program_cgroup_attach(BPFProgram p, int type, const char path, uint32_t flags) {
aa2b6f1d	98	_cleanup_free_ char *copy = NULL;
71e5200f DM	99	_cleanup_close_ int fd = -1;
71e5200f DM	100	union bpf_attr attr;
aa2b6f1d	101	int r;
71e5200f DM	102
	103	assert(p);
	104	assert(type >= 0);
	105	assert(path);
	106
aa2b6f1d LP	107	if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI))
	108	return -EINVAL;
	109
	110	/* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's
	111	* refuse this early. */
	112	if (p->attached_path) {
	113	if (!path_equal(p->attached_path, path))
	114	return -EBUSY;
	115	if (p->attached_type != type)
	116	return -EBUSY;
	117	if (p->attached_flags != flags)
	118	return -EBUSY;
	119
	120	/* Here's a shortcut: if we previously attached this program already, then we don't have to do so
	121	* again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have
	122	* replaced our program since the last time, hence let's reattach it again, just to be safe. In flags
	123	* == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags
	124	* == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours
	125	* would remain in effect. */
	126	if (flags != BPF_F_ALLOW_OVERRIDE)
	127	return 0;
	128	}
	129
	130	/* Ensure we have a kernel object for this. */
	131	r = bpf_program_load_kernel(p, NULL, 0);
	132	if (r < 0)
	133	return r;
	134
	135	copy = strdup(path);
	136	if (!copy)
	137	return -ENOMEM;
	138
71e5200f DM	139	fd = open(path, O_DIRECTORY\|O_RDONLY\|O_CLOEXEC);
	140	if (fd < 0)
	141	return -errno;
	142
	143	attr = (union bpf_attr) {
	144	.attach_type = type,
	145	.target_fd = fd,
	146	.attach_bpf_fd = p->kernel_fd,
9f2e6892	147	.attach_flags = flags,
71e5200f DM	148	};
	149
	150	if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
	151	return -errno;
	152
aa2b6f1d LP	153	free_and_replace(p->attached_path, copy);
	154	p->attached_type = type;
	155	p->attached_flags = flags;
	156
71e5200f DM	157	return 0;
	158	}
	159
aa2b6f1d	160	int bpf_program_cgroup_detach(BPFProgram *p) {
71e5200f	161	_cleanup_close_ int fd = -1;
71e5200f	162
aa2b6f1d	163	assert(p);
71e5200f	164
aa2b6f1d LP	165	if (!p->attached_path)
aa2b6f1d LP	166	return -EUNATCH;
9b3c1897	167
aa2b6f1d LP	168	fd = open(p->attached_path, O_DIRECTORY\|O_RDONLY\|O_CLOEXEC);
	169	if (fd < 0) {
	170	if (errno != ENOENT)
	171	return -errno;
71e5200f	172
aa2b6f1d LP	173	/* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached
aa2b6f1d LP	174	* implicitly by the removal, hence don't complain */
71e5200f	175
aa2b6f1d LP	176	} else {
	177	union bpf_attr attr;
	178
	179	attr = (union bpf_attr) {
	180	.attach_type = p->attached_type,
	181	.target_fd = fd,
	182	.attach_bpf_fd = p->kernel_fd,
	183	};
	184
	185	if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
	186	return -errno;
	187	}
	188
	189	p->attached_path = mfree(p->attached_path);
71e5200f DM	190
	191	return 0;
	192	}
	193
	194	int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
	195	union bpf_attr attr = {
	196	.map_type = type,
	197	.key_size = key_size,
	198	.value_size = value_size,
	199	.max_entries = max_entries,
	200	.map_flags = flags,
	201	};
	202	int fd;
	203
	204	fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
	205	if (fd < 0)
	206	return -errno;
	207
	208	return fd;
	209	}
	210
	211	int bpf_map_update_element(int fd, const void key, void value) {
	212
	213	union bpf_attr attr = {
	214	.map_fd = fd,
	215	.key = PTR_TO_UINT64(key),
	216	.value = PTR_TO_UINT64(value),
	217	};
	218
	219	if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
	220	return -errno;
	221
	222	return 0;
	223	}
	224
	225	int bpf_map_lookup_element(int fd, const void key, void value) {
	226
	227	union bpf_attr attr = {
	228	.map_fd = fd,
	229	.key = PTR_TO_UINT64(key),
	230	.value = PTR_TO_UINT64(value),
	231	};
	232
	233	if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
	234	return -errno;
	235
	236	return 0;
	237	}