[thirdparty/systemd.git] / src / basic / bpf-program.c

/* SPDX-License-Identifier: LGPL-2.1+ */
/***
  This file is part of systemd.

  Copyright 2016 Daniel Mack
***/

#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#include "alloc-util.h"
#include "bpf-program.h"
#include "fd-util.h"
#include "log.h"
#include "missing.h"
#include "path-util.h"
#include "util.h"

int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
        _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;

        p = new0(BPFProgram, 1);
        if (!p)
                return log_oom();

        p->n_ref = 1;
        p->prog_type = prog_type;
        p->kernel_fd = -1;

        *ret = TAKE_PTR(p);

        return 0;
}

BPFProgram *bpf_program_ref(BPFProgram *p) {
        if (!p)
                return NULL;

        assert(p->n_ref > 0);
        p->n_ref++;

        return p;
}

BPFProgram *bpf_program_unref(BPFProgram *p) {
        if (!p)
                return NULL;

        assert(p->n_ref > 0);
        p->n_ref--;

        if (p->n_ref > 0)
                return NULL;

        /* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last
         * fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated
         * programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with
         * zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in
         * question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during
         * operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To
         * counter this, we track closely to which cgroup a program was attached to and will detach it on our own
         * whenever we close the BPF fd. */
        (void) bpf_program_cgroup_detach(p);

        safe_close(p->kernel_fd);
        free(p->instructions);
        free(p->attached_path);

        return mfree(p);
}

int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) {

        assert(p);

        if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */
                return -EBUSY;

        if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count))
                return -ENOMEM;

        memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
        p->n_instructions += count;

        return 0;
}

int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) {
        union bpf_attr attr;

        assert(p);

        if (p->kernel_fd >= 0) { /* make this idempotent */
                memzero(log_buf, log_size);
                return 0;
        }

        attr = (union bpf_attr) {
                .prog_type = p->prog_type,
                .insns = PTR_TO_UINT64(p->instructions),
                .insn_cnt = p->n_instructions,
                .license = PTR_TO_UINT64("GPL"),
                .log_buf = PTR_TO_UINT64(log_buf),
                .log_level = !!log_buf,
                .log_size = log_size,
        };

        p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
        if (p->kernel_fd < 0)
                return -errno;

        return 0;
}

int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) {
        _cleanup_free_ char *copy = NULL;
        _cleanup_close_ int fd = -1;
        union bpf_attr attr;
        int r;

        assert(p);
        assert(type >= 0);
        assert(path);

        if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI))
                return -EINVAL;

        /* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's
        * refuse this early. */
        if (p->attached_path) {
                if (!path_equal(p->attached_path, path))
                        return -EBUSY;
                if (p->attached_type != type)
                        return -EBUSY;
                if (p->attached_flags != flags)
                        return -EBUSY;

                /* Here's a shortcut: if we previously attached this program already, then we don't have to do so
                 * again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have
                 * replaced our program since the last time, hence let's reattach it again, just to be safe. In flags
                 * == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags
                 * == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours
                 * would remain in effect. */
                if (flags != BPF_F_ALLOW_OVERRIDE)
                        return 0;
        }

        /* Ensure we have a kernel object for this. */
        r = bpf_program_load_kernel(p, NULL, 0);
        if (r < 0)
                return r;

        copy = strdup(path);
        if (!copy)
                return -ENOMEM;

        fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
        if (fd < 0)
                return -errno;

        attr = (union bpf_attr) {
                .attach_type = type,
                .target_fd = fd,
                .attach_bpf_fd = p->kernel_fd,
                .attach_flags = flags,
        };

        if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
                return -errno;

        free_and_replace(p->attached_path, copy);
        p->attached_type = type;
        p->attached_flags = flags;

        return 0;
}

int bpf_program_cgroup_detach(BPFProgram *p) {
        _cleanup_close_ int fd = -1;

        assert(p);

        if (!p->attached_path)
                return -EUNATCH;

        fd = open(p->attached_path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
        if (fd < 0) {
                if (errno != ENOENT)
                        return -errno;

                /* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached
                 * implicitly by the removal, hence don't complain */

        } else {
                union bpf_attr attr;

                attr = (union bpf_attr) {
                        .attach_type = p->attached_type,
                        .target_fd = fd,
                        .attach_bpf_fd = p->kernel_fd,
                };

                if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
                        return -errno;
        }

        p->attached_path = mfree(p->attached_path);

        return 0;
}

int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
        union bpf_attr attr = {
                .map_type = type,
                .key_size = key_size,
                .value_size = value_size,
                .max_entries = max_entries,
                .map_flags = flags,
        };
        int fd;

        fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
        if (fd < 0)
                return -errno;

        return fd;
}

int bpf_map_update_element(int fd, const void *key, void *value) {

        union bpf_attr attr = {
                .map_fd = fd,
                .key = PTR_TO_UINT64(key),
                .value = PTR_TO_UINT64(value),
        };

        if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
                return -errno;

        return 0;
}

int bpf_map_lookup_element(int fd, const void *key, void *value) {

        union bpf_attr attr = {
                .map_fd = fd,
                .key = PTR_TO_UINT64(key),
                .value = PTR_TO_UINT64(value),
        };

        if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
                return -errno;

        return 0;
}
Commit	Line	Data
53e1b683	1	/* SPDX-License-Identifier: LGPL-2.1+ */
71e5200f DM	2	/***
	3	This file is part of systemd.
	4
	5	Copyright 2016 Daniel Mack
71e5200f DM	6	***/
	7
	8	#include <fcntl.h>
	9	#include <sys/stat.h>
	10	#include <sys/types.h>
	11	#include <unistd.h>
	12
	13	#include "alloc-util.h"
	14	#include "bpf-program.h"
	15	#include "fd-util.h"
	16	#include "log.h"
	17	#include "missing.h"
aa2b6f1d	18	#include "path-util.h"
e0ad39fc	19	#include "util.h"
71e5200f DM	20
	21	int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
	22	_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
	23
	24	p = new0(BPFProgram, 1);
	25	if (!p)
	26	return log_oom();
	27
aa2b6f1d	28	p->n_ref = 1;
71e5200f DM	29	p->prog_type = prog_type;
	30	p->kernel_fd = -1;
	31
1cc6c93a YW	32	*ret = TAKE_PTR(p);
1cc6c93a YW	33
71e5200f DM	34	return 0;
	35	}
	36
aa2b6f1d LP	37	BPFProgram bpf_program_ref(BPFProgram p) {
	38	if (!p)
	39	return NULL;
	40
	41	assert(p->n_ref > 0);
	42	p->n_ref++;
	43
	44	return p;
	45	}
	46
71e5200f DM	47	BPFProgram bpf_program_unref(BPFProgram p) {
	48	if (!p)
	49	return NULL;
	50
aa2b6f1d LP	51	assert(p->n_ref > 0);
	52	p->n_ref--;
	53
	54	if (p->n_ref > 0)
	55	return NULL;
	56
	57	/* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last
	58	* fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated
	59	* programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with
	60	* zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in
	61	* question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during
	62	* operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To
	63	* counter this, we track closely to which cgroup a program was attached to and will detach it on our own
	64	* whenever we close the BPF fd. */
	65	(void) bpf_program_cgroup_detach(p);
	66
71e5200f DM	67	safe_close(p->kernel_fd);
71e5200f DM	68	free(p->instructions);
aa2b6f1d	69	free(p->attached_path);
71e5200f DM	70
	71	return mfree(p);
	72	}
	73
	74	int bpf_program_add_instructions(BPFProgram p, const struct bpf_insn instructions, size_t count) {
	75
	76	assert(p);
	77
e0ad39fc LP	78	if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */
	79	return -EBUSY;
	80
71e5200f DM	81	if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count))
	82	return -ENOMEM;
	83
	84	memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
	85	p->n_instructions += count;
	86
	87	return 0;
	88	}
	89
	90	int bpf_program_load_kernel(BPFProgram p, char log_buf, size_t log_size) {
	91	union bpf_attr attr;
	92
	93	assert(p);
	94
e0ad39fc LP	95	if (p->kernel_fd >= 0) { /* make this idempotent */
	96	memzero(log_buf, log_size);
	97	return 0;
	98	}
71e5200f DM	99
	100	attr = (union bpf_attr) {
	101	.prog_type = p->prog_type,
	102	.insns = PTR_TO_UINT64(p->instructions),
	103	.insn_cnt = p->n_instructions,
	104	.license = PTR_TO_UINT64("GPL"),
	105	.log_buf = PTR_TO_UINT64(log_buf),
	106	.log_level = !!log_buf,
	107	.log_size = log_size,
	108	};
	109
	110	p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
	111	if (p->kernel_fd < 0)
	112	return -errno;
	113
	114	return 0;
	115	}
	116
9f2e6892	117	int bpf_program_cgroup_attach(BPFProgram p, int type, const char path, uint32_t flags) {
aa2b6f1d	118	_cleanup_free_ char *copy = NULL;
71e5200f DM	119	_cleanup_close_ int fd = -1;
71e5200f DM	120	union bpf_attr attr;
aa2b6f1d	121	int r;
71e5200f DM	122
	123	assert(p);
	124	assert(type >= 0);
	125	assert(path);
	126
aa2b6f1d LP	127	if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI))
	128	return -EINVAL;
	129
	130	/* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's
	131	* refuse this early. */
	132	if (p->attached_path) {
	133	if (!path_equal(p->attached_path, path))
	134	return -EBUSY;
	135	if (p->attached_type != type)
	136	return -EBUSY;
	137	if (p->attached_flags != flags)
	138	return -EBUSY;
	139
	140	/* Here's a shortcut: if we previously attached this program already, then we don't have to do so
	141	* again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have
	142	* replaced our program since the last time, hence let's reattach it again, just to be safe. In flags
	143	* == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags
	144	* == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours
	145	* would remain in effect. */
	146	if (flags != BPF_F_ALLOW_OVERRIDE)
	147	return 0;
	148	}
	149
	150	/* Ensure we have a kernel object for this. */
	151	r = bpf_program_load_kernel(p, NULL, 0);
	152	if (r < 0)
	153	return r;
	154
	155	copy = strdup(path);
	156	if (!copy)
	157	return -ENOMEM;
	158
71e5200f DM	159	fd = open(path, O_DIRECTORY\|O_RDONLY\|O_CLOEXEC);
	160	if (fd < 0)
	161	return -errno;
	162
	163	attr = (union bpf_attr) {
	164	.attach_type = type,
	165	.target_fd = fd,
	166	.attach_bpf_fd = p->kernel_fd,
9f2e6892	167	.attach_flags = flags,
71e5200f DM	168	};
	169
	170	if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
	171	return -errno;
	172
aa2b6f1d LP	173	free_and_replace(p->attached_path, copy);
	174	p->attached_type = type;
	175	p->attached_flags = flags;
	176
71e5200f DM	177	return 0;
	178	}
	179
aa2b6f1d	180	int bpf_program_cgroup_detach(BPFProgram *p) {
71e5200f	181	_cleanup_close_ int fd = -1;
71e5200f	182
aa2b6f1d	183	assert(p);
71e5200f	184
aa2b6f1d LP	185	if (!p->attached_path)
aa2b6f1d LP	186	return -EUNATCH;
9b3c1897	187
aa2b6f1d LP	188	fd = open(p->attached_path, O_DIRECTORY\|O_RDONLY\|O_CLOEXEC);
	189	if (fd < 0) {
	190	if (errno != ENOENT)
	191	return -errno;
71e5200f	192
aa2b6f1d LP	193	/* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached
aa2b6f1d LP	194	* implicitly by the removal, hence don't complain */
71e5200f	195
aa2b6f1d LP	196	} else {
	197	union bpf_attr attr;
	198
	199	attr = (union bpf_attr) {
	200	.attach_type = p->attached_type,
	201	.target_fd = fd,
	202	.attach_bpf_fd = p->kernel_fd,
	203	};
	204
	205	if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
	206	return -errno;
	207	}
	208
	209	p->attached_path = mfree(p->attached_path);
71e5200f DM	210
	211	return 0;
	212	}
	213
	214	int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
	215	union bpf_attr attr = {
	216	.map_type = type,
	217	.key_size = key_size,
	218	.value_size = value_size,
	219	.max_entries = max_entries,
	220	.map_flags = flags,
	221	};
	222	int fd;
	223
	224	fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
	225	if (fd < 0)
	226	return -errno;
	227
	228	return fd;
	229	}
	230
	231	int bpf_map_update_element(int fd, const void key, void value) {
	232
	233	union bpf_attr attr = {
	234	.map_fd = fd,
	235	.key = PTR_TO_UINT64(key),
	236	.value = PTR_TO_UINT64(value),
	237	};
	238
	239	if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
	240	return -errno;
	241
	242	return 0;
	243	}
	244
	245	int bpf_map_lookup_element(int fd, const void key, void value) {
	246
	247	union bpf_attr attr = {
	248	.map_fd = fd,
	249	.key = PTR_TO_UINT64(key),
	250	.value = PTR_TO_UINT64(value),
	251	};
	252
	253	if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
	254	return -errno;
	255
	256	return 0;
	257	}