From ea9826eb946d57aaba7e6bfa2d6b120136c6b20f Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 15 May 2025 16:32:46 +0200 Subject: [PATCH] core: add options to delegate BPFFS token creation Add four new options BPFDelegate{Commands,Maps,Programs,Attachments}= in order to delegate to a BPFFS instance the permission to create tokens. The value is a list of options taken from: https://github.com/torvalds/linux/blob/v6.14/include/uapi/linux/bpf.h#L922-L1121 The special value "any" means to allow every possible values. More informations about BPF tokens here: https://lwn.net/Articles/947173/ --- .github/workflows/linter.yml | 10 +- man/meson.build | 11 ++ man/org.freedesktop.systemd1.xml | 112 +++++++++++++++++++++ man/systemd.exec.xml | 50 +++++++++ src/basic/generate-bpf-delegate-configs.py | 76 ++++++++++++++ src/basic/meson.build | 13 ++- src/core/dbus-execute.c | 104 +++++++++++++++++++ src/core/exec-invoke.c | 36 ++++++- src/core/execute-serialize.c | 40 ++++++++ src/core/execute.c | 13 +++ src/core/execute.h | 1 + src/core/load-fragment-gperf.gperf.in | 4 + src/core/load-fragment.c | 8 ++ src/core/load-fragment.h | 4 + src/core/namespace.c | 66 ++++++++++++ src/core/namespace.h | 47 +++++++++ src/shared/bus-unit-util.c | 4 + src/test/meson.build | 6 ++ src/test/test-bpf-token.c | 28 ++++++ test/units/TEST-07-PID1.private-bpf.sh | 50 +++++++++ tools/xml_helper.py | 2 + 21 files changed, 677 insertions(+), 8 deletions(-) create mode 100755 src/basic/generate-bpf-delegate-configs.py create mode 100644 src/test/test-bpf-token.c diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index 85369074605..73609e89e91 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -41,7 +41,7 @@ jobs: - uses: systemd/mkosi@0d1143150835b21c1bfe64428df5f45b558280b1 - name: Check that tabs are not used in Python code - run: sh -c '! git grep -P "\\t" -- src/boot/generate-hwids-section.py src/ukify/ukify.py test/integration-tests/integration-test-wrapper.py' + run: sh -c '! git grep -P "\\t" -- src/basic/generate-bpf-delegate-configs.py src/boot/generate-hwids-section.py src/ukify/ukify.py test/integration-tests/integration-test-wrapper.py' - name: Build tools tree run: | @@ -56,20 +56,20 @@ jobs: - name: Run mypy run: | mkosi sandbox -- mypy --version - mkosi sandbox -- mypy src/boot/generate-hwids-section.py src/test/generate-sym-test.py src/ukify/ukify.py test/integration-tests/integration-test-wrapper.py + mkosi sandbox -- mypy src/basic/generate-bpf-delegate-configs.py src/boot/generate-hwids-section.py src/test/generate-sym-test.py src/ukify/ukify.py test/integration-tests/integration-test-wrapper.py - name: Run ruff check run: | mkosi sandbox -- ruff --version - mkosi sandbox -- ruff check src/boot/generate-hwids-section.py src/test/generate-sym-test.py src/ukify/ukify.py test/integration-tests/integration-test-wrapper.py + mkosi sandbox -- ruff check src/basic/generate-bpf-delegate-configs.py src/boot/generate-hwids-section.py src/test/generate-sym-test.py src/ukify/ukify.py test/integration-tests/integration-test-wrapper.py - name: Run ruff format run: | mkosi sandbox -- ruff --version - if ! mkosi sandbox -- ruff format --check src/boot/generate-hwids-section.py src/test/generate-sym-test.py src/ukify/ukify.py test/integration-tests/integration-test-wrapper.py + if ! mkosi sandbox -- ruff format --check src/basic/generate-bpf-delegate-configs.py src/boot/generate-hwids-section.py src/test/generate-sym-test.py src/ukify/ukify.py test/integration-tests/integration-test-wrapper.py then echo "Please run 'ruff format' on the above files or apply the diffs below manually" - mkosi sandbox -- ruff format --check --quiet --diff src/boot/generate-hwids-section.py src/test/generate-sym-test.py src/ukify/ukify.py test/integration-tests/integration-test-wrapper.py + mkosi sandbox -- ruff format --check --quiet --diff src/basic/generate-bpf-delegate-configs.py src/boot/generate-hwids-section.py src/test/generate-sym-test.py src/ukify/ukify.py test/integration-tests/integration-test-wrapper.py fi - name: Configure meson diff --git a/man/meson.build b/man/meson.build index cd52ed5298b..15f3fdb34b4 100644 --- a/man/meson.build +++ b/man/meson.build @@ -35,6 +35,17 @@ custom_entities_ent = custom_target( man_page_depends += custom_entities_ent +generate_bpf_delegate_configs = find_program('../src/basic/generate-bpf-delegate-configs.py') + +bpf_delegate_xml = custom_target( + input : files('../src/basic/include/linux/bpf.h'), + output : 'bpf-delegate.xml', + command : [generate_bpf_delegate_configs, + 'doc', + '@INPUT@'], + capture : true) +man_page_depends += bpf_delegate_xml + man_pages = [] html_pages = [] source_xml_files = [] diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index 95fb54d2f29..2f61e2f9464 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -3376,6 +3376,14 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s PrivateBPF = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s BPFDelegateCommands = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s BPFDelegateMaps = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s BPFDelegatePrograms = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s BPFDelegateAttachments = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly b MemoryKSM = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s NetworkNamespacePath = '...'; @@ -3979,6 +3987,14 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + + + + + + + @@ -4707,6 +4723,14 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + + + + + + + @@ -5591,6 +5615,14 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s PrivateBPF = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s BPFDelegateCommands = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s BPFDelegateMaps = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s BPFDelegatePrograms = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s BPFDelegateAttachments = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly b MemoryKSM = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s NetworkNamespacePath = '...'; @@ -6214,6 +6246,14 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + + + + + + + @@ -6922,6 +6962,14 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + + + + + + + @@ -7630,6 +7678,14 @@ node /org/freedesktop/systemd1/unit/home_2emount { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s PrivateBPF = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s BPFDelegateCommands = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s BPFDelegateMaps = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s BPFDelegatePrograms = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s BPFDelegateAttachments = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly b MemoryKSM = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s NetworkNamespacePath = '...'; @@ -8175,6 +8231,14 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + + + + + + + @@ -8791,6 +8855,14 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + + + + + + + @@ -9632,6 +9704,14 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s PrivateBPF = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s BPFDelegateCommands = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s BPFDelegateMaps = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s BPFDelegatePrograms = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s BPFDelegateAttachments = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly b MemoryKSM = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s NetworkNamespacePath = '...'; @@ -10159,6 +10239,14 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + + + + + + + @@ -10757,6 +10845,14 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + + + + + + + @@ -12341,6 +12437,10 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ ProtectHostnameEx, DelegateNamespaces, PrivateBPF, + BPFDelegateCommands, + BPFDelegateMaps, + BPFDelegatePrograms, + BPFDelegateAttachments, RemoveSubGroup(), StateDirectoryQuota, StateDirectoryQuotaUsage, @@ -12400,6 +12500,10 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ AcceptFileDescriptors, DelegateNamespaces, PrivateBPF, + BPFDelegateCommands, + BPFDelegateMaps, + BPFDelegatePrograms, + BPFDelegateAttachments, RemoveSubgroup(), DeferTrigger, DeferTriggerMaxUSec, @@ -12456,6 +12560,10 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ ProtectHostnameEx, DelegateNamespaces, PrivateBPF, + BPFDelegateCommands, + BPFDelegateMaps, + BPFDelegatePrograms, + BPFDelegateAttachments, RemoveSubgroup(), ReloadResult, CleanResult, @@ -12512,6 +12620,10 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ ProtectHostnameEx, DelegateNamespaces, PrivateBPF, + BPFDelegateCommands, + BPFDelegateMaps, + BPFDelegatePrograms, + BPFDelegateAttachments, RemoveSubgroup(), StateDirectoryQuota, StateDirectoryQuotaUsage, diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 85db1de264e..f6a9e0cdab0 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -2565,6 +2565,56 @@ RestrictNamespaces=~cgroup net + + BPFDelegateCommands= + + Accepts a list of BPF commands to allow or any to allow everything. + Defaults to none. The accepted values are: + + Requires PrivateBPF= enabled to be effective. + This will set the delegate_cmds bpffs mount option. + A more detailed explanation of the feature can be found in this + LWN post. + + + + + + BPFDelegateMaps= + + Accepts a list of BPF maps to allow or any to allow everything. + Defaults to none. The accepted values are: + + This will set the delegate_maps bpffs mount option. + See BPFDelegateCommands= for dependencies and more details. + + + + + + BPFDelegatePrograms= + + Accepts a list of BPF programs to allow or any to allow everything. + Defaults to none. The accepted values are: + + This will set the delegate_progs bpffs mount option. + See BPFDelegateCommands= for dependencies and more details. + + + + + + BPFDelegateAttachments= + + Accepts a list of BPF attach points to allow or any to allow everything. + Defaults to none. The accepted values are: + + This will set the delegate_attachs bpffs mount option. + See BPFDelegateCommands= for dependencies and more details. + + + + LockPersonality= diff --git a/src/basic/generate-bpf-delegate-configs.py b/src/basic/generate-bpf-delegate-configs.py new file mode 100755 index 00000000000..a358a69e76b --- /dev/null +++ b/src/basic/generate-bpf-delegate-configs.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 + +# SPDX-License-Identifier: LGPL-2.1-or-later + +# Convert the bpf_{cmd,map_type,prog_type,attach_type} enums into a string +# array to be used as configuration options + +import re +import sys + + +def print_usage_and_exit() -> None: + print(f'Usage: {sys.argv[0]}
') + sys.exit(1) + + +if len(sys.argv) != 3: + print_usage_and_exit() + +output = sys.argv[1] +header = sys.argv[2] + +if output not in ['code', 'doc']: + print(f'Invalid format: {format}') + print_usage_and_exit() + +with open(header) as file: + inEnum = False + enumValues: list[str] = [] + enumName = '' + + if output == 'doc': + print("""\ + + + +""") + + for line in file: + line = line.strip() + + if inEnum: + # Inside an enum definition + if re.match(r'^\s*}', line): + # End of an enum definition + inEnum = False + # Print the enum values as a static const char* array + if output == 'code': + print(f'static const char* const {enumName}_table[] = {{') + else: + print(f'') + for enumValue in enumValues: + words = enumValue.split('_') + enumValue = words[0] + ''.join(word.capitalize() for word in words[1:]) + if output == 'code': + print(f'\t"{enumValue}",') + else: + print(f'{enumValue}') + if output == 'code': + print('};') + else: + print('') + enumValues = [] + else: + # Collect enum values + match = re.fullmatch(r'(\w+)\b,', line) + if match and len(match.groups()) > 0 and not match[1].startswith('__'): + enumValues.append(match[1]) + elif match := re.match(r'^\s*enum\s+bpf_(cmd|map_type|prog_type|attach_type)+\s*{', line): + # Start of a new enum + inEnum = True + enumName = 'bpf_delegate_' + match[1] + + if output == 'doc': + print('') diff --git a/src/basic/meson.build b/src/basic/meson.build index 7a67d112759..2627cdb1640 100644 --- a/src/basic/meson.build +++ b/src/basic/meson.build @@ -273,8 +273,17 @@ filesystem_switch_case_inc = custom_target( '@INPUT@'], capture : true) -generated_sources += [filesystem_list_inc, filesystem_switch_case_inc, filesystems_gperf_h] -basic_sources += [filesystem_list_inc, filesystem_switch_case_inc, filesystems_gperf_h] +generate_bpf_delegate_configs = find_program('generate-bpf-delegate-configs.py') +bpf_delegate_configs_inc = custom_target( + input : files('include/linux/bpf.h'), + output : 'bpf-delegate-configs.inc', + command : [generate_bpf_delegate_configs, + 'code', + '@INPUT@'], + capture : true) + +generated_sources += [filesystem_list_inc, filesystem_switch_case_inc, filesystems_gperf_h, bpf_delegate_configs_inc] +basic_sources += [filesystem_list_inc, filesystem_switch_case_inc, filesystems_gperf_h, bpf_delegate_configs_inc] libbasic_static = static_library( 'basic', diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 7e4d6fa6dbf..e273470c7aa 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -1134,6 +1134,90 @@ static int property_get_unsigned_as_uint16( return sd_bus_message_append_basic(reply, 'q', &q); } +static int property_get_bpf_delegate_commands( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + uint64_t *u = ASSERT_PTR(userdata); + _cleanup_free_ char *s = NULL; + + assert(reply); + + s = bpf_delegate_commands_to_string(*u); + if (!s) + return -ENOMEM; + + return sd_bus_message_append(reply, "s", s); +} + +static int property_get_bpf_delegate_maps( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + uint64_t *u = ASSERT_PTR(userdata); + _cleanup_free_ char *s = NULL; + + assert(reply); + + s = bpf_delegate_maps_to_string(*u); + if (!s) + return -ENOMEM; + + return sd_bus_message_append(reply, "s", s); +} + +static int property_get_bpf_delegate_programs( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + uint64_t *u = ASSERT_PTR(userdata); + _cleanup_free_ char *s = NULL; + + assert(reply); + + s = bpf_delegate_programs_to_string(*u); + if (!s) + return -ENOMEM; + + return sd_bus_message_append(reply, "s", s); +} + +static int property_get_bpf_delegate_attachments( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + uint64_t *u = ASSERT_PTR(userdata); + _cleanup_free_ char *s = NULL; + + assert(reply); + + s = bpf_delegate_attachments_to_string(*u); + if (!s) + return -ENOMEM; + + return sd_bus_message_append(reply, "s", s); +} + const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_VTABLE_START(0), SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(ExecContext, environment), SD_BUS_VTABLE_PROPERTY_CONST), @@ -1318,6 +1402,10 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("ProtectHostname", "b", property_get_protect_hostname, offsetof(ExecContext, protect_hostname), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ProtectHostnameEx", "(ss)", property_get_protect_hostname_ex, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateBPF", "s", property_get_private_bpf, offsetof(ExecContext, private_bpf), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("BPFDelegateCommands", "s", property_get_bpf_delegate_commands, offsetof(ExecContext, bpf_delegate_commands), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("BPFDelegateMaps", "s", property_get_bpf_delegate_maps, offsetof(ExecContext, bpf_delegate_maps), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("BPFDelegatePrograms", "s", property_get_bpf_delegate_programs, offsetof(ExecContext, bpf_delegate_programs), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("BPFDelegateAttachments", "s", property_get_bpf_delegate_attachments, offsetof(ExecContext, bpf_delegate_attachments), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("MemoryKSM", "b", bus_property_get_tristate, offsetof(ExecContext, memory_ksm), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("NetworkNamespacePath", "s", NULL, offsetof(ExecContext, network_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("IPCNamespacePath", "s", NULL, offsetof(ExecContext, ipc_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST), @@ -1756,6 +1844,10 @@ static BUS_DEFINE_SET_TRANSIENT_PARSE(keyring_mode, ExecKeyringMode, exec_keyrin static BUS_DEFINE_SET_TRANSIENT_PARSE(protect_proc, ProtectProc, protect_proc_from_string); static BUS_DEFINE_SET_TRANSIENT_PARSE(proc_subset, ProcSubset, proc_subset_from_string); static BUS_DEFINE_SET_TRANSIENT_PARSE(private_bpf, PrivateBPF, private_bpf_from_string); +static BUS_DEFINE_SET_TRANSIENT_PARSE_PTR(bpf_delegate_commands, uint64_t, bpf_delegate_commands_from_string); +static BUS_DEFINE_SET_TRANSIENT_PARSE_PTR(bpf_delegate_maps, uint64_t, bpf_delegate_maps_from_string); +static BUS_DEFINE_SET_TRANSIENT_PARSE_PTR(bpf_delegate_programs, uint64_t, bpf_delegate_programs_from_string); +static BUS_DEFINE_SET_TRANSIENT_PARSE_PTR(bpf_delegate_attachments, uint64_t, bpf_delegate_attachments_from_string); BUS_DEFINE_SET_TRANSIENT_PARSE(exec_preserve_mode, ExecPreserveMode, exec_preserve_mode_from_string); static BUS_DEFINE_SET_TRANSIENT_PARSE_PTR(personality, unsigned long, parse_personality); static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(secure_bits, "i", int32_t, int, "%" PRIi32, secure_bits_to_string_alloc_with_check); @@ -2285,6 +2377,18 @@ int bus_exec_context_set_transient_property( if (streq(name, "PrivateBPF")) return bus_set_transient_private_bpf(u, name, &c->private_bpf, message, flags, error); + if (streq(name, "BPFDelegateCommands")) + return bus_set_transient_bpf_delegate_commands(u, name, &c->bpf_delegate_commands, message, flags, error); + + if (streq(name, "BPFDelegateMaps")) + return bus_set_transient_bpf_delegate_maps(u, name, &c->bpf_delegate_maps, message, flags, error); + + if (streq(name, "BPFDelegatePrograms")) + return bus_set_transient_bpf_delegate_programs(u, name, &c->bpf_delegate_programs, message, flags, error); + + if (streq(name, "BPFDelegateAttachments")) + return bus_set_transient_bpf_delegate_attachments(u, name, &c->bpf_delegate_attachments, message, flags, error); + if (streq(name, "RuntimeDirectoryPreserve")) return bus_set_transient_exec_preserve_mode(u, name, &c->runtime_directory_preserve_mode, message, flags, error); diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c index e6fce99340b..3853aef4afc 100644 --- a/src/core/exec-invoke.c +++ b/src/core/exec-invoke.c @@ -2271,6 +2271,7 @@ static int setup_private_users_child(int unshare_ready_fd, const char *uid_map, } static int bpffs_prepare( + const ExecContext *c, PidRef *ret_pid, int *ret_sock_fd, int *ret_errno_pipe) { @@ -2295,6 +2296,7 @@ static int bpffs_prepare( return log_debug_errno(r, "Failed to fork bpffs privileged helper: %m"); if (r == 0) { _cleanup_close_ int fs_fd = -EBADF; + char number[STRLEN("0x") + sizeof(c->bpf_delegate_commands) * 2 + 1]; bpffs_errno_pipe[0] = safe_close(bpffs_errno_pipe[0]); socket_fds[0] = safe_close(socket_fds[0]); @@ -2305,6 +2307,38 @@ static int bpffs_prepare( report_errno_and_exit(bpffs_errno_pipe[1], fs_fd); } + xsprintf(number, "0x%"PRIx64, c->bpf_delegate_commands); + + r = fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_cmds", number, /* aux = */ 0); + if (r < 0) { + log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m"); + report_errno_and_exit(bpffs_errno_pipe[1], errno); + } + + xsprintf(number, "0x%"PRIx64, c->bpf_delegate_maps); + + r = fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_maps", number, /* aux = */ 0); + if (r < 0) { + log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m"); + report_errno_and_exit(bpffs_errno_pipe[1], errno); + } + + xsprintf(number, "0x%"PRIx64, c->bpf_delegate_programs); + + r = fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_progs", number, /* aux = */ 0); + if (r < 0) { + log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m"); + report_errno_and_exit(bpffs_errno_pipe[1], errno); + } + + xsprintf(number, "0x%"PRIx64, c->bpf_delegate_attachments); + + r = fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_attachs", number, /* aux = */ 0); + if (r < 0) { + log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m"); + report_errno_and_exit(bpffs_errno_pipe[1], errno); + } + r = fsconfig(fs_fd, FSCONFIG_CMD_CREATE, /* key = */ NULL, /* value = */ NULL, /* aux = */ 0); if (r < 0) { log_debug_errno(errno, "Failed to create bpffs superblock: %m"); @@ -5703,7 +5737,7 @@ int exec_invoke( * This is the kernel sample doing this: * https://github.com/torvalds/linux/blob/master/tools/testing/selftests/bpf/prog_tests/token.c */ - r = bpffs_prepare(&bpffs_pidref, &bpffs_socket_fd, &bpffs_errno_pipe); + r = bpffs_prepare(context, &bpffs_pidref, &bpffs_socket_fd, &bpffs_errno_pipe); if (r < 0) { *exit_status = EXIT_BPF; return log_error_errno(r, "Failed to mount bpffs in bpffs_prepare(): %m"); diff --git a/src/core/execute-serialize.c b/src/core/execute-serialize.c index 167e4dfd7fa..e5a97f59aa1 100644 --- a/src/core/execute-serialize.c +++ b/src/core/execute-serialize.c @@ -1807,6 +1807,30 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) { if (r < 0) return r; + if (c->bpf_delegate_commands != 0) { + r = serialize_item_format(f, "exec-context-bpf-delegate-commands", "0x%"PRIx64, c->bpf_delegate_commands); + if (r < 0) + return r; + } + + if (c->bpf_delegate_maps != 0) { + r = serialize_item_format(f, "exec-context-bpf-delegate-maps", "0x%"PRIx64, c->bpf_delegate_maps); + if (r < 0) + return r; + } + + if (c->bpf_delegate_programs != 0) { + r = serialize_item_format(f, "exec-context-bpf-delegate-programs", "0x%"PRIx64, c->bpf_delegate_programs); + if (r < 0) + return r; + } + + if (c->bpf_delegate_attachments != 0) { + r = serialize_item_format(f, "exec-context-bpf-delegate-attachments", "0x%"PRIx64, c->bpf_delegate_attachments); + if (r < 0) + return r; + } + r = serialize_item(f, "exec-context-runtime-directory-preserve-mode", exec_preserve_mode_to_string(c->runtime_directory_preserve_mode)); if (r < 0) return r; @@ -2749,6 +2773,22 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) { c->private_bpf = private_bpf_from_string(val); if (c->private_bpf < 0) return -EINVAL; + } else if ((val = startswith(l, "exec-context-bpf-delegate-commands="))) { + r = safe_atoux64(val, &c->bpf_delegate_commands); + if (r < 0) + return r; + } else if ((val = startswith(l, "exec-context-bpf-delegate-maps="))) { + r = safe_atoux64(val, &c->bpf_delegate_maps); + if (r < 0) + return r; + } else if ((val = startswith(l, "exec-context-bpf-delegate-programs="))) { + r = safe_atoux64(val, &c->bpf_delegate_programs); + if (r < 0) + return r; + } else if ((val = startswith(l, "exec-context-bpf-delegate-attachments="))) { + r = safe_atoux64(val, &c->bpf_delegate_attachments); + if (r < 0) + return r; } else if ((val = startswith(l, "exec-context-runtime-directory-preserve-mode="))) { c->runtime_directory_preserve_mode = exec_preserve_mode_from_string(val); if (c->runtime_directory_preserve_mode < 0) diff --git a/src/core/execute.c b/src/core/execute.c index 9fc9e549de3..c562a12b2a2 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1156,6 +1156,19 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { prefix, proc_subset_to_string(c->proc_subset), prefix, private_bpf_to_string(c->private_bpf)); + if (c->private_bpf == PRIVATE_BPF_YES) { + _cleanup_free_ char + *commands = bpf_delegate_commands_to_string(c->bpf_delegate_commands), + *maps = bpf_delegate_maps_to_string(c->bpf_delegate_maps), + *programs = bpf_delegate_programs_to_string(c->bpf_delegate_programs), + *attachments = bpf_delegate_attachments_to_string(c->bpf_delegate_attachments); + + fprintf(f, "%sBPFDelegateCommands: %s\n", prefix, strna(commands)); + fprintf(f, "%sBPFDelegateMaps: %s\n", prefix, strna(maps)); + fprintf(f, "%sBPFDelegatePrograms: %s\n", prefix, strna(programs)); + fprintf(f, "%sBPFDelegateAttachments: %s\n", prefix, strna(attachments)); + } + if (c->set_login_environment >= 0) fprintf(f, "%sSetLoginEnvironment: %s\n", prefix, yes_no(c->set_login_environment > 0)); diff --git a/src/core/execute.h b/src/core/execute.h index 6f1df610a8a..64e88960624 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -301,6 +301,7 @@ typedef struct ExecContext { ProcSubset proc_subset; /* subset= */ PrivateBPF private_bpf; + uint64_t bpf_delegate_commands, bpf_delegate_maps, bpf_delegate_programs, bpf_delegate_attachments; int private_mounts; int mount_apivfs; diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in index edb06395399..1977e882b51 100644 --- a/src/core/load-fragment-gperf.gperf.in +++ b/src/core/load-fragment-gperf.gperf.in @@ -68,6 +68,10 @@ {{type}}.ProtectProc, config_parse_protect_proc, 0, offsetof({{type}}, exec_context.protect_proc) {{type}}.ProcSubset, config_parse_proc_subset, 0, offsetof({{type}}, exec_context.proc_subset) {{type}}.PrivateBPF, config_parse_private_bpf, 0, offsetof({{type}}, exec_context.private_bpf) +{{type}}.BPFDelegateCommands, config_parse_bpf_delegate_commands, 0, offsetof({{type}}, exec_context.bpf_delegate_commands) +{{type}}.BPFDelegateMaps, config_parse_bpf_delegate_maps, 0, offsetof({{type}}, exec_context.bpf_delegate_maps) +{{type}}.BPFDelegatePrograms, config_parse_bpf_delegate_programs, 0, offsetof({{type}}, exec_context.bpf_delegate_programs) +{{type}}.BPFDelegateAttachments, config_parse_bpf_delegate_attachments, 0, offsetof({{type}}, exec_context.bpf_delegate_attachments) {% if HAVE_SECCOMP %} {{type}}.SystemCallFilter, config_parse_syscall_filter, 0, offsetof({{type}}, exec_context) {{type}}.SystemCallArchitectures, config_parse_syscall_archs, 0, offsetof({{type}}, exec_context.syscall_archs) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 9c544a35e05..8bc66804545 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -161,6 +161,10 @@ DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_numa_policy, mpol, int, -1); DEFINE_CONFIG_PARSE_ENUM(config_parse_status_unit_format, status_unit_format, StatusUnitFormat); DEFINE_CONFIG_PARSE_ENUM_FULL(config_parse_socket_timestamping, socket_timestamping_from_string_harder, SocketTimestamping); DEFINE_CONFIG_PARSE_ENUM(config_parse_socket_defer_trigger, socket_defer_trigger, SocketDeferTrigger); +DEFINE_CONFIG_PARSE_PTR(config_parse_bpf_delegate_commands, bpf_delegate_commands_from_string, uint64_t); +DEFINE_CONFIG_PARSE_PTR(config_parse_bpf_delegate_maps, bpf_delegate_maps_from_string, uint64_t); +DEFINE_CONFIG_PARSE_PTR(config_parse_bpf_delegate_programs, bpf_delegate_programs_from_string, uint64_t); +DEFINE_CONFIG_PARSE_PTR(config_parse_bpf_delegate_attachments, bpf_delegate_attachments_from_string, uint64_t); bool contains_instance_specifier_superset(const char *s) { const char *p, *q; @@ -6271,6 +6275,10 @@ void unit_dump_config_items(FILE *f) { { config_parse_personality, "PERSONALITY" }, { config_parse_log_filter_patterns, "REGEX" }, { config_parse_mount_node, "NODE" }, + { config_parse_bpf_delegate_commands, "BPF_DELEGATE_COMMANDS" }, + { config_parse_bpf_delegate_maps, "BPF_DELEGATE_MAPS" }, + { config_parse_bpf_delegate_programs, "BPF_DELEGATE_PROGRAMS" }, + { config_parse_bpf_delegate_attachments, "BPF_DELEGATE_ATTACHMENTS" }, }; const char *prev = NULL; diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index ba226e2e5c3..ccbe7198ea2 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -130,6 +130,10 @@ CONFIG_PARSER_PROTOTYPE(config_parse_exec_keyring_mode); CONFIG_PARSER_PROTOTYPE(config_parse_protect_proc); CONFIG_PARSER_PROTOTYPE(config_parse_proc_subset); CONFIG_PARSER_PROTOTYPE(config_parse_private_bpf); +CONFIG_PARSER_PROTOTYPE(config_parse_bpf_delegate_commands); +CONFIG_PARSER_PROTOTYPE(config_parse_bpf_delegate_maps); +CONFIG_PARSER_PROTOTYPE(config_parse_bpf_delegate_programs); +CONFIG_PARSER_PROTOTYPE(config_parse_bpf_delegate_attachments); CONFIG_PARSER_PROTOTYPE(config_parse_job_timeout_sec); CONFIG_PARSER_PROTOTYPE(config_parse_job_running_timeout_sec); CONFIG_PARSER_PROTOTYPE(config_parse_log_extra_fields); diff --git a/src/core/namespace.c b/src/core/namespace.c index 0768eafac22..166fdf253ab 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -10,6 +10,7 @@ #include "alloc-util.h" #include "base-filesystem.h" +#include "bitfield.h" #include "chase.h" #include "dev-setup.h" #include "devnum-util.h" @@ -17,6 +18,7 @@ #include "errno-util.h" #include "escape.h" #include "extension-util.h" +#include "extract-word.h" #include "fd-util.h" #include "format-util.h" #include "fs-util.h" @@ -36,6 +38,7 @@ #include "nsflags.h" #include "nulstr-util.h" #include "os-util.h" +#include "parse-util.h" #include "path-util.h" #include "pidref.h" #include "process-util.h" @@ -3966,6 +3969,69 @@ static const char* const private_bpf_table[_PRIVATE_BPF_MAX] = { DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(private_bpf, PrivateBPF, PRIVATE_BPF_YES); +#include "bpf-delegate-configs.inc" + +DEFINE_STRING_TABLE_LOOKUP(bpf_delegate_cmd, uint64_t); +DEFINE_STRING_TABLE_LOOKUP(bpf_delegate_map_type, uint64_t); +DEFINE_STRING_TABLE_LOOKUP(bpf_delegate_prog_type, uint64_t); +DEFINE_STRING_TABLE_LOOKUP(bpf_delegate_attach_type, uint64_t); + +char* bpf_delegate_to_string(uint64_t u, const char * (*parser)(uint64_t) _const_ ) { + assert(parser); + + if (u == UINT64_MAX) + return strdup("any"); + + _cleanup_free_ char *buf = NULL; + + BIT_FOREACH(i, u) { + const char *s = parser(i); + if (s) { + if (!strextend_with_separator(&buf, ",", s)) + return NULL; + } else { + if (strextendf_with_separator(&buf, ",", "%d", i) < 0) + return NULL; + } + } + + return TAKE_PTR(buf) ?: strdup(""); +} + +int bpf_delegate_from_string(const char *s, uint64_t *ret, uint64_t (*parser)(const char *)) { + int r; + + assert(s); + assert(ret); + assert(parser); + + if (streq(s, "any")) { + *ret = UINT64_MAX; + return 0; + } + + uint64_t mask = 0; + for (;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&s, &word, ",", /* flags = */ 0); + if (r < 0) + return log_warning_errno(r, "Failed to parse delegate options \"%s\": %m", s); + if (r == 0) + break; + + r = parser(word); + if (r < 0) + log_warning_errno(r, "Unknown BPF delegate option, ignoring: %s", word); + else + mask |= UINT64_C(1) << r; + } + + *ret = mask; + + return 0; +} + static const char* const private_tmp_table[_PRIVATE_TMP_MAX] = { [PRIVATE_TMP_NO] = "no", [PRIVATE_TMP_CONNECTED] = "connected", diff --git a/src/core/namespace.h b/src/core/namespace.h index 178ed1e5480..42e146a7e8e 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -236,6 +236,53 @@ ProcSubset proc_subset_from_string(const char *s) _pure_; const char* private_bpf_to_string(PrivateBPF i) _const_; PrivateBPF private_bpf_from_string(const char *s) _pure_; +const char* bpf_delegate_cmd_to_string(uint64_t u) _const_; +uint64_t bpf_delegate_cmd_from_string(const char *s) _pure_; + +const char* bpf_delegate_map_type_to_string(uint64_t u) _const_; +uint64_t bpf_delegate_map_type_from_string(const char *s) _pure_; + +const char* bpf_delegate_prog_type_to_string(uint64_t u) _const_; +uint64_t bpf_delegate_prog_type_from_string(const char *s) _pure_; + +const char* bpf_delegate_attach_type_to_string(uint64_t u) _const_; +uint64_t bpf_delegate_attach_type_from_string(const char *s) _pure_; + +char* bpf_delegate_to_string(uint64_t u, const char * (*parser)(uint64_t) _const_); +int bpf_delegate_from_string(const char *s, uint64_t *ret, uint64_t (*parser)(const char *)); + +static inline int bpf_delegate_commands_from_string(const char *s, uint64_t *ret) { + return bpf_delegate_from_string(s, ret, bpf_delegate_cmd_from_string); +} + +static inline char * bpf_delegate_commands_to_string(uint64_t u) { + return bpf_delegate_to_string(u, bpf_delegate_cmd_to_string); +} + +static inline int bpf_delegate_maps_from_string(const char *s, uint64_t *ret) { + return bpf_delegate_from_string(s, ret, bpf_delegate_map_type_from_string); +} + +static inline char * bpf_delegate_maps_to_string(uint64_t u) { + return bpf_delegate_to_string(u, bpf_delegate_map_type_to_string); +} + +static inline int bpf_delegate_programs_from_string(const char *s, uint64_t *ret) { + return bpf_delegate_from_string(s, ret, bpf_delegate_prog_type_from_string); +} + +static inline char * bpf_delegate_programs_to_string(uint64_t u) { + return bpf_delegate_to_string(u, bpf_delegate_prog_type_to_string); +} + +static inline int bpf_delegate_attachments_from_string(const char *s, uint64_t *ret) { + return bpf_delegate_from_string(s, ret, bpf_delegate_attach_type_from_string); +} + +static inline char * bpf_delegate_attachments_to_string(uint64_t u) { + return bpf_delegate_to_string(u, bpf_delegate_attach_type_to_string); +} + const char* private_tmp_to_string(PrivateTmp i) _const_; PrivateTmp private_tmp_from_string(const char *s) _pure_; diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 8fc97db191b..1ba37bdab41 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -2426,6 +2426,10 @@ static const BusProperty execute_properties[] = { { "ExtensionImagePolicy", bus_append_string }, { "PrivatePIDs", bus_append_string }, { "PrivateBPF", bus_append_string }, + { "BPFDelegateCommands", bus_append_string }, + { "BPFDelegateMaps", bus_append_string }, + { "BPFDelegatePrograms", bus_append_string }, + { "BPFDelegateAttachments", bus_append_string }, { "IgnoreSIGPIPE", bus_append_parse_boolean }, { "TTYVHangup", bus_append_parse_boolean }, { "TTYReset", bus_append_parse_boolean }, diff --git a/src/test/meson.build b/src/test/meson.build index e99605af723..c0c2a4e9055 100644 --- a/src/test/meson.build +++ b/src/test/meson.build @@ -486,6 +486,12 @@ executables += [ 'sources' : files('test-bpf-restrict-fs.c'), 'dependencies' : common_test_dependencies, }, + core_test_template + { + 'sources' : files('test-bpf-token.c'), + 'dependencies' : common_test_dependencies + libbpf, + 'conditions' : ['BPF_FRAMEWORK'], + 'type' : 'manual', + }, core_test_template + { 'sources' : files('test-cgroup-cpu.c'), }, diff --git a/src/test/test-bpf-token.c b/src/test/test-bpf-token.c new file mode 100644 index 00000000000..23dd1430821 --- /dev/null +++ b/src/test/test-bpf-token.c @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include + +#include "fd-util.h" +#include "main-func.h" +#include "tests.h" + +static int run(int argc, char *argv[]) { +#if __LIBBPF_CURRENT_VERSION_GEQ(1, 5) + _cleanup_close_ int bpffs_fd = -EBADF, token_fd = -EBADF; + + bpffs_fd = open("/sys/fs/bpf", O_RDONLY); + if (bpffs_fd < 0) + return -errno; + + token_fd = bpf_token_create(bpffs_fd, /* opts = */ NULL); + if (token_fd < 0) + return -errno; + + return 0; +#else + exit(77); +#endif +} + +DEFINE_MAIN_FUNCTION(run); diff --git a/test/units/TEST-07-PID1.private-bpf.sh b/test/units/TEST-07-PID1.private-bpf.sh index f0c1dcf73e4..f405b08d073 100755 --- a/test/units/TEST-07-PID1.private-bpf.sh +++ b/test/units/TEST-07-PID1.private-bpf.sh @@ -19,3 +19,53 @@ systemd-run --wait \ -p DelegateNamespaces=mnt \ -p PrivateBPF=yes \ grep -q '^none /sys/fs/bpf bpf rw' /proc/mounts + +# Check that when specifying the delegate arguments, the mount options are set properly +check_mount_opts() { + local delegate=$1 mnt_opts=$2 + systemd-run --wait \ + -p PrivateUsers=yes \ + -p PrivateMounts=yes \ + -p DelegateNamespaces=mnt \ + -p PrivateBPF=yes \ + -p "$delegate" \ + grep -q "$mnt_opts" /proc/mounts +} + +check_mount_opts 'BPFDelegateCommands=BPFObjPin,BPFBtfLoad,BPFMapFreeze,BPFLinkDetach' 'delegate_cmds=obj_pin:btf_load:map_freeze:link_detach' +check_mount_opts 'BPFDelegateMaps=BPFMapTypeArray,BPFMapTypeCpumap,BPFMapTypeRingbuf' 'delegate_maps=array:cpumap:ringbuf' +check_mount_opts 'BPFDelegatePrograms=BPFProgTypeTracepoint,BPFProgTypeXdp,BPFProgTypeTracing' 'delegate_progs=tracepoint:xdp:tracing' +check_mount_opts 'BPFDelegateAttachments=BPFFlowDissector,BPFCgroupSysctl,BPFNetfilter' 'delegate_attachs=flow_dissector:cgroup_sysctl:netfilter' + +# Building test-bpf-token requires BPF support +if systemctl --version | grep -q -- -BPF_FRAMEWORK; then + exit 0 +fi + +# The following test will always return 77 if at compile time the libbpf version +# is less than 1.5.0. If it happens don't let the whole test fail +set +e + +/usr/lib/systemd/tests/unit-tests/manual/test-bpf-token +if [ $? -eq 77 ]; then + exit 0 +fi + +set -e + +# Check that our helper is able to get a BPF token +systemd-run --wait \ + -p PrivateUsers=yes \ + -p PrivateMounts=yes \ + -p DelegateNamespaces=mnt \ + -p PrivateBPF=yes \ + -p BPFDelegateCommands=BPFProgLoad \ + /usr/lib/systemd/tests/unit-tests/manual/test-bpf-token + +# Check that without the delegates, the helper aborts trying to get a token +! systemd-run --wait \ + -p PrivateUsers=yes \ + -p PrivateMounts=yes \ + -p DelegateNamespaces=mnt \ + -p PrivateBPF=yes \ + /usr/lib/systemd/tests/unit-tests/manual/test-bpf-token diff --git a/tools/xml_helper.py b/tools/xml_helper.py index 6f71350f5b8..294630ded15 100755 --- a/tools/xml_helper.py +++ b/tools/xml_helper.py @@ -10,6 +10,8 @@ class CustomResolver(tree.Resolver): return self.resolve_filename('man/custom-entities.ent', context) if 'ethtool-link-mode' in url: return self.resolve_filename('src/shared/ethtool-link-mode.xml', context) + if 'bpf-delegate' in url: + return self.resolve_filename('man/bpf-delegate.xml', context) return None -- 2.47.3