[thirdparty/systemd.git] / src / shared / bpf-program.c

/* SPDX-License-Identifier: LGPL-2.1-or-later */

#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#include "alloc-util.h"
#include "bpf-program.h"
#include "fd-util.h"
#include "memory-util.h"
#include "missing_syscall.h"
#include "path-util.h"

int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
        _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;

        p = new0(BPFProgram, 1);
        if (!p)
                return -ENOMEM;

        p->n_ref = 1;
        p->prog_type = prog_type;
        p->kernel_fd = -1;

        *ret = TAKE_PTR(p);

        return 0;
}

static BPFProgram *bpf_program_free(BPFProgram *p) {
        assert(p);

        /* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last
         * fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated
         * programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with
         * zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in
         * question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during
         * operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To
         * counter this, we track closely to which cgroup a program was attached to and will detach it on our own
         * whenever we close the BPF fd. */
        (void) bpf_program_cgroup_detach(p);

        safe_close(p->kernel_fd);
        free(p->instructions);
        free(p->attached_path);

        return mfree(p);
}

DEFINE_TRIVIAL_REF_UNREF_FUNC(BPFProgram, bpf_program, bpf_program_free);

int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) {

        assert(p);

        if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */
                return -EBUSY;

        if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count))
                return -ENOMEM;

        memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
        p->n_instructions += count;

        return 0;
}

int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) {
        union bpf_attr attr;

        assert(p);

        if (p->kernel_fd >= 0) { /* make this idempotent */
                memzero(log_buf, log_size);
                return 0;
        }

        // FIXME: Clang doesn't 0-pad with structured initialization, causing
        // the kernel to reject the bpf_attr as invalid. See:
        // https://github.com/torvalds/linux/blob/v5.9/kernel/bpf/syscall.c#L65
        // Ideally it should behave like GCC, so that we can remove these workarounds.
        zero(attr);
        attr = (union bpf_attr) {
                .prog_type = p->prog_type,
                .insns = PTR_TO_UINT64(p->instructions),
                .insn_cnt = p->n_instructions,
                .license = PTR_TO_UINT64("GPL"),
                .log_buf = PTR_TO_UINT64(log_buf),
                .log_level = !!log_buf,
                .log_size = log_size,
        };

        p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
        if (p->kernel_fd < 0)
                return -errno;

        return 0;
}

int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path) {
        union bpf_attr attr;

        assert(p);

        if (p->kernel_fd >= 0) /* don't overwrite an assembled or loaded program */
                return -EBUSY;

        zero(attr);
        attr = (union bpf_attr) {
                .pathname = PTR_TO_UINT64(path),
        };

        p->kernel_fd = bpf(BPF_OBJ_GET, &attr, sizeof(attr));
        if (p->kernel_fd < 0)
                return -errno;

        return 0;
}

int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) {
        _cleanup_free_ char *copy = NULL;
        _cleanup_close_ int fd = -1;
        union bpf_attr attr;
        int r;

        assert(p);
        assert(type >= 0);
        assert(path);

        if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI))
                return -EINVAL;

        /* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's
        * refuse this early. */
        if (p->attached_path) {
                if (!path_equal(p->attached_path, path))
                        return -EBUSY;
                if (p->attached_type != type)
                        return -EBUSY;
                if (p->attached_flags != flags)
                        return -EBUSY;

                /* Here's a shortcut: if we previously attached this program already, then we don't have to do so
                 * again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have
                 * replaced our program since the last time, hence let's reattach it again, just to be safe. In flags
                 * == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags
                 * == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours
                 * would remain in effect. */
                if (flags != BPF_F_ALLOW_OVERRIDE)
                        return 0;
        }

        /* Ensure we have a kernel object for this. */
        r = bpf_program_load_kernel(p, NULL, 0);
        if (r < 0)
                return r;

        copy = strdup(path);
        if (!copy)
                return -ENOMEM;

        fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
        if (fd < 0)
                return -errno;

        zero(attr);
        attr = (union bpf_attr) {
                .attach_type = type,
                .target_fd = fd,
                .attach_bpf_fd = p->kernel_fd,
                .attach_flags = flags,
        };

        if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
                return -errno;

        free_and_replace(p->attached_path, copy);
        p->attached_type = type;
        p->attached_flags = flags;

        return 0;
}

int bpf_program_cgroup_detach(BPFProgram *p) {
        _cleanup_close_ int fd = -1;

        assert(p);

        if (!p->attached_path)
                return -EUNATCH;

        fd = open(p->attached_path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
        if (fd < 0) {
                if (errno != ENOENT)
                        return -errno;

                /* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached
                 * implicitly by the removal, hence don't complain */

        } else {
                union bpf_attr attr;

                zero(attr);
                attr = (union bpf_attr) {
                        .attach_type = p->attached_type,
                        .target_fd = fd,
                        .attach_bpf_fd = p->kernel_fd,
                };

                if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
                        return -errno;
        }

        p->attached_path = mfree(p->attached_path);

        return 0;
}

int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
        union bpf_attr attr = {
                .map_type = type,
                .key_size = key_size,
                .value_size = value_size,
                .max_entries = max_entries,
                .map_flags = flags,
        };
        int fd;

        fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
        if (fd < 0)
                return -errno;

        return fd;
}

int bpf_map_update_element(int fd, const void *key, void *value) {

        union bpf_attr attr = {
                .map_fd = fd,
                .key = PTR_TO_UINT64(key),
                .value = PTR_TO_UINT64(value),
        };

        if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
                return -errno;

        return 0;
}

int bpf_map_lookup_element(int fd, const void *key, void *value) {

        union bpf_attr attr = {
                .map_fd = fd,
                .key = PTR_TO_UINT64(key),
                .value = PTR_TO_UINT64(value),
        };

        if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
                return -errno;

        return 0;
}
Commit	Line	Data
db9ecf05	1	/* SPDX-License-Identifier: LGPL-2.1-or-later */
71e5200f DM	2
	3	#include <fcntl.h>
	4	#include <sys/stat.h>
	5	#include <sys/types.h>
	6	#include <unistd.h>
	7
	8	#include "alloc-util.h"
	9	#include "bpf-program.h"
	10	#include "fd-util.h"
0a970718	11	#include "memory-util.h"
f5947a5e	12	#include "missing_syscall.h"
aa2b6f1d	13	#include "path-util.h"
71e5200f DM	14
	15	int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
	16	_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
	17
	18	p = new0(BPFProgram, 1);
	19	if (!p)
ca39a3ce	20	return -ENOMEM;
71e5200f	21
aa2b6f1d	22	p->n_ref = 1;
71e5200f DM	23	p->prog_type = prog_type;
	24	p->kernel_fd = -1;
	25
1cc6c93a YW	26	*ret = TAKE_PTR(p);
1cc6c93a YW	27
71e5200f DM	28	return 0;
	29	}
	30
8301aa0b YW	31	static BPFProgram bpf_program_free(BPFProgram p) {
8301aa0b YW	32	assert(p);
aa2b6f1d LP	33
	34	/* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last
	35	* fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated
	36	* programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with
	37	* zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in
	38	* question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during
	39	* operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To
	40	* counter this, we track closely to which cgroup a program was attached to and will detach it on our own
	41	* whenever we close the BPF fd. */
	42	(void) bpf_program_cgroup_detach(p);
	43
71e5200f DM	44	safe_close(p->kernel_fd);
71e5200f DM	45	free(p->instructions);
aa2b6f1d	46	free(p->attached_path);
71e5200f DM	47
	48	return mfree(p);
	49	}
	50
8301aa0b YW	51	DEFINE_TRIVIAL_REF_UNREF_FUNC(BPFProgram, bpf_program, bpf_program_free);
8301aa0b YW	52
71e5200f DM	53	int bpf_program_add_instructions(BPFProgram p, const struct bpf_insn instructions, size_t count) {
	54
	55	assert(p);
	56
e0ad39fc LP	57	if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */
	58	return -EBUSY;
	59
71e5200f DM	60	if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count))
	61	return -ENOMEM;
	62
	63	memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
	64	p->n_instructions += count;
	65
	66	return 0;
	67	}
	68
	69	int bpf_program_load_kernel(BPFProgram p, char log_buf, size_t log_size) {
	70	union bpf_attr attr;
	71
	72	assert(p);
	73
e0ad39fc LP	74	if (p->kernel_fd >= 0) { /* make this idempotent */
	75	memzero(log_buf, log_size);
	76	return 0;
	77	}
71e5200f	78
28abf5ad LB	79	// FIXME: Clang doesn't 0-pad with structured initialization, causing
	80	// the kernel to reject the bpf_attr as invalid. See:
	81	// https://github.com/torvalds/linux/blob/v5.9/kernel/bpf/syscall.c#L65
	82	// Ideally it should behave like GCC, so that we can remove these workarounds.
	83	zero(attr);
71e5200f DM	84	attr = (union bpf_attr) {
	85	.prog_type = p->prog_type,
	86	.insns = PTR_TO_UINT64(p->instructions),
	87	.insn_cnt = p->n_instructions,
	88	.license = PTR_TO_UINT64("GPL"),
	89	.log_buf = PTR_TO_UINT64(log_buf),
	90	.log_level = !!log_buf,
	91	.log_size = log_size,
	92	};
	93
	94	p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
	95	if (p->kernel_fd < 0)
	96	return -errno;
	97
	98	return 0;
	99	}
	100
fab34748 KL	101	int bpf_program_load_from_bpf_fs(BPFProgram p, const char path) {
	102	union bpf_attr attr;
	103
	104	assert(p);
	105
	106	if (p->kernel_fd >= 0) /* don't overwrite an assembled or loaded program */
	107	return -EBUSY;
	108
28abf5ad	109	zero(attr);
fab34748 KL	110	attr = (union bpf_attr) {
	111	.pathname = PTR_TO_UINT64(path),
	112	};
	113
	114	p->kernel_fd = bpf(BPF_OBJ_GET, &attr, sizeof(attr));
	115	if (p->kernel_fd < 0)
	116	return -errno;
	117
	118	return 0;
	119	}
	120
9f2e6892	121	int bpf_program_cgroup_attach(BPFProgram p, int type, const char path, uint32_t flags) {
aa2b6f1d	122	_cleanup_free_ char *copy = NULL;
71e5200f DM	123	_cleanup_close_ int fd = -1;
71e5200f DM	124	union bpf_attr attr;
aa2b6f1d	125	int r;
71e5200f DM	126
	127	assert(p);
	128	assert(type >= 0);
	129	assert(path);
	130
aa2b6f1d LP	131	if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI))
	132	return -EINVAL;
	133
	134	/* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's
	135	* refuse this early. */
	136	if (p->attached_path) {
	137	if (!path_equal(p->attached_path, path))
	138	return -EBUSY;
	139	if (p->attached_type != type)
	140	return -EBUSY;
	141	if (p->attached_flags != flags)
	142	return -EBUSY;
	143
	144	/* Here's a shortcut: if we previously attached this program already, then we don't have to do so
	145	* again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have
	146	* replaced our program since the last time, hence let's reattach it again, just to be safe. In flags
	147	* == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags
	148	* == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours
	149	* would remain in effect. */
	150	if (flags != BPF_F_ALLOW_OVERRIDE)
	151	return 0;
	152	}
	153
	154	/* Ensure we have a kernel object for this. */
	155	r = bpf_program_load_kernel(p, NULL, 0);
	156	if (r < 0)
	157	return r;
	158
	159	copy = strdup(path);
	160	if (!copy)
	161	return -ENOMEM;
	162
71e5200f DM	163	fd = open(path, O_DIRECTORY\|O_RDONLY\|O_CLOEXEC);
	164	if (fd < 0)
	165	return -errno;
	166
28abf5ad	167	zero(attr);
71e5200f DM	168	attr = (union bpf_attr) {
	169	.attach_type = type,
	170	.target_fd = fd,
	171	.attach_bpf_fd = p->kernel_fd,
9f2e6892	172	.attach_flags = flags,
71e5200f DM	173	};
	174
	175	if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
	176	return -errno;
	177
aa2b6f1d LP	178	free_and_replace(p->attached_path, copy);
	179	p->attached_type = type;
	180	p->attached_flags = flags;
	181
71e5200f DM	182	return 0;
	183	}
	184
aa2b6f1d	185	int bpf_program_cgroup_detach(BPFProgram *p) {
71e5200f	186	_cleanup_close_ int fd = -1;
71e5200f	187
aa2b6f1d	188	assert(p);
71e5200f	189
aa2b6f1d LP	190	if (!p->attached_path)
aa2b6f1d LP	191	return -EUNATCH;
9b3c1897	192
aa2b6f1d LP	193	fd = open(p->attached_path, O_DIRECTORY\|O_RDONLY\|O_CLOEXEC);
	194	if (fd < 0) {
	195	if (errno != ENOENT)
	196	return -errno;
71e5200f	197
aa2b6f1d LP	198	/* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached
aa2b6f1d LP	199	* implicitly by the removal, hence don't complain */
71e5200f	200
aa2b6f1d LP	201	} else {
	202	union bpf_attr attr;
	203
28abf5ad	204	zero(attr);
aa2b6f1d LP	205	attr = (union bpf_attr) {
	206	.attach_type = p->attached_type,
	207	.target_fd = fd,
	208	.attach_bpf_fd = p->kernel_fd,
	209	};
	210
	211	if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
	212	return -errno;
	213	}
	214
	215	p->attached_path = mfree(p->attached_path);
71e5200f DM	216
	217	return 0;
	218	}
	219
	220	int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
	221	union bpf_attr attr = {
	222	.map_type = type,
	223	.key_size = key_size,
	224	.value_size = value_size,
	225	.max_entries = max_entries,
	226	.map_flags = flags,
	227	};
	228	int fd;
	229
	230	fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
	231	if (fd < 0)
	232	return -errno;
	233
	234	return fd;
	235	}
	236
	237	int bpf_map_update_element(int fd, const void key, void value) {
	238
	239	union bpf_attr attr = {
	240	.map_fd = fd,
	241	.key = PTR_TO_UINT64(key),
	242	.value = PTR_TO_UINT64(value),
	243	};
	244
	245	if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
	246	return -errno;
	247
	248	return 0;
	249	}
	250
	251	int bpf_map_lookup_element(int fd, const void key, void value) {
	252
	253	union bpf_attr attr = {
	254	.map_fd = fd,
	255	.key = PTR_TO_UINT64(key),
	256	.value = PTR_TO_UINT64(value),
	257	};
	258
	259	if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
	260	return -errno;
	261
	262	return 0;
	263	}