From: Lennart Poettering Date: Mon, 10 Mar 2025 12:39:53 +0000 (+0100) Subject: nsresourced: check polkit before executing our operations X-Git-Tag: v258-rc1~1062^2~4 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d2f3ddfc6552ea4edafa6dce8b11dd336ff26ae3;p=thirdparty%2Fsystemd.git nsresourced: check polkit before executing our operations Let's tighten rules on namespace operations: let's always ask PK for permission before doing anything. Note that if polkit is absent we'll still allow things, and the default PK policy will also still allow things, but there's now a clear way how people can not allow things if they want, by modifying the PK policy. --- diff --git a/src/nsresourced/io.systemd.namespace-resource.policy b/src/nsresourced/io.systemd.namespace-resource.policy new file mode 100644 index 00000000000..b71efb9fc2c --- /dev/null +++ b/src/nsresourced/io.systemd.namespace-resource.policy @@ -0,0 +1,78 @@ + + + + + + + + The systemd Project + https://systemd.io + + + + Allow user namespace allocation + Authentication is required for an application to allocate a user namespace '$(name)' with an automatically assigned transient UID range. + + yes + yes + yes + + io.systemd.namespace-resource.register-user-namespace + + + + + Allow user namespace registration + Authentication is required for an application to register a user namespace '$(name)'. + + yes + yes + yes + + io.systemd.namespace-resource.allocate-user-namespace + + + + + Allow adding a mount to a user namespace + Authentication is required for an application to add a mount to a user namespace. + + yes + yes + yes + + + + + + Allow adding a control group to a user namespace + Authentication is required for an application to add a control group to a user namespace. + + yes + yes + yes + + + + + + Allow adding a network interface to a user namespace + Authentication is required for an application to add a network interface of type $(type) to a user namespace. + + yes + yes + yes + + + + diff --git a/src/nsresourced/meson.build b/src/nsresourced/meson.build index d8524d52bf6..b80d6b2a342 100644 --- a/src/nsresourced/meson.build +++ b/src/nsresourced/meson.build @@ -48,3 +48,6 @@ executables += [ 'include_directories' : [ includes, userns_restrict_include ], }, ] + +install_data('io.systemd.namespace-resource.policy', + install_dir : polkitpolicydir) diff --git a/src/nsresourced/nsresourcework.c b/src/nsresourced/nsresourcework.c index 86d1428ccc8..671afc69977 100644 --- a/src/nsresourced/nsresourcework.c +++ b/src/nsresourced/nsresourcework.c @@ -17,6 +17,7 @@ #include "sd-netlink.h" #include "sd-varlink.h" +#include "bus-polkit.h" #include "env-util.h" #include "fd-util.h" #include "fileio.h" @@ -53,10 +54,14 @@ #define ITERATIONS_MAX 64U #define RUNTIME_MAX_USEC (5 * USEC_PER_MINUTE) #define PRESSURE_SLEEP_TIME_USEC (50 * USEC_PER_MSEC) -#define CONNECTION_IDLE_USEC (15 * USEC_PER_SEC) #define LISTEN_IDLE_USEC (90 * USEC_PER_SEC) #define USERNS_PER_UID 256 +typedef struct Context { + Hashmap *polkit_registry; + struct userns_restrict_bpf *bpf; +} Context; + typedef struct LookupParameters { const char *user_name; const char *group_name; @@ -824,9 +829,9 @@ static int vl_method_allocate_user_range(sd_varlink *link, sd_json_variant *para {} }; - struct userns_restrict_bpf **bpf = ASSERT_PTR(userdata); _cleanup_close_ int userns_fd = -EBADF, registry_dir_fd = -EBADF, lock_fd = -EBADF; _cleanup_free_ char *userns_name = NULL; + Context *c = ASSERT_PTR(userdata); uid_t peer_uid; struct stat userns_st; AllocateParameters p = { @@ -854,7 +859,7 @@ static int vl_method_allocate_user_range(sd_varlink *link, sd_json_variant *para if (r != 0) return r; - userns_fd = sd_varlink_take_fd(link, p.userns_fd_idx); + userns_fd = sd_varlink_peek_dup_fd(link, p.userns_fd_idx); if (userns_fd < 0) return log_debug_errno(userns_fd, "Failed to take user namespace fd from Varlink connection: %m"); @@ -873,8 +878,24 @@ static int vl_method_allocate_user_range(sd_varlink *link, sd_json_variant *para if (r < 0) return r; - if (!*bpf) { - r = userns_restrict_install(/* pin= */ true, bpf); + const char *polkit_details[] = { + "name", userns_name, + NULL, + }; + + r = varlink_verify_polkit_async_full( + link, + /* bus= */ NULL, + "io.systemd.namespace-resource.allocate-user-namespace", + polkit_details, + /* good_user= */ UID_INVALID, + POLKIT_DEFAULT_ALLOW, /* If no polkit is installed, allow unpriv userns namespace allocation */ + &c->polkit_registry); + if (r <= 0) + return r; + + if (!c->bpf) { + r = userns_restrict_install(/* pin= */ true, &c->bpf); if (r < 0) return r; } @@ -915,7 +936,7 @@ static int vl_method_allocate_user_range(sd_varlink *link, sd_json_variant *para /* Register the userns in the BPF map with an empty allowlist */ r = userns_restrict_put_by_fd( - *bpf, + c->bpf, userns_fd, /* replace= */ true, /* mount_fds= */ NULL, @@ -1026,9 +1047,9 @@ static int vl_method_register_user_namespace(sd_varlink *link, sd_json_variant * {} }; - struct userns_restrict_bpf **bpf = ASSERT_PTR(userdata); _cleanup_close_ int userns_fd = -EBADF, registry_dir_fd = -EBADF; _cleanup_free_ char *userns_name = NULL; + Context *c = ASSERT_PTR(userdata); uid_t peer_uid; struct stat userns_st; RegisterParameters p = { @@ -1051,7 +1072,7 @@ static int vl_method_register_user_namespace(sd_varlink *link, sd_json_variant * if (r != 0) return r; - userns_fd = sd_varlink_take_fd(link, p.userns_fd_idx); + userns_fd = sd_varlink_peek_dup_fd(link, p.userns_fd_idx); if (userns_fd < 0) return userns_fd; @@ -1070,8 +1091,24 @@ static int vl_method_register_user_namespace(sd_varlink *link, sd_json_variant * if (r < 0) return r; - if (!*bpf) { - r = userns_restrict_install(/* pin= */ true, bpf); + const char *polkit_details[] = { + "name", userns_name, + NULL, + }; + + r = varlink_verify_polkit_async_full( + link, + /* bus= */ NULL, + "io.systemd.namespace-resource.register-user-namespace", + polkit_details, + /* good_user= */ UID_INVALID, + POLKIT_DEFAULT_ALLOW, /* If no polkit is installed, allow unpriv userns namespace registration */ + &c->polkit_registry); + if (r <= 0) + return r; + + if (!c->bpf) { + r = userns_restrict_install(/* pin= */ true, &c->bpf); if (r < 0) return r; } @@ -1114,7 +1151,7 @@ static int vl_method_register_user_namespace(sd_varlink *link, sd_json_variant * /* Register the userns in the BPF map with an empty allowlist */ r = userns_restrict_put_by_fd( - *bpf, + c->bpf, userns_fd, /* replace= */ true, /* mount_fds= */ NULL, @@ -1153,7 +1190,7 @@ static int vl_method_add_mount_to_user_namespace(sd_varlink *link, sd_json_varia }; _cleanup_close_ int userns_fd = -EBADF, mount_fd = -EBADF, registry_dir_fd = -EBADF; - struct userns_restrict_bpf **bpf = ASSERT_PTR(userdata); + Context *c = ASSERT_PTR(userdata); AddMountParameters p = { .userns_fd_idx = UINT_MAX, .mount_fd_idx = UINT_MAX, @@ -1180,7 +1217,7 @@ static int vl_method_add_mount_to_user_namespace(sd_varlink *link, sd_json_varia if (r != 0) return r; - userns_fd = sd_varlink_take_fd(link, p.userns_fd_idx); + userns_fd = sd_varlink_peek_dup_fd(link, p.userns_fd_idx); if (userns_fd < 0) return userns_fd; @@ -1191,7 +1228,7 @@ static int vl_method_add_mount_to_user_namespace(sd_varlink *link, sd_json_varia if (fstat(userns_fd, &userns_st) < 0) return -errno; - mount_fd = sd_varlink_take_fd(link, p.mount_fd_idx); + mount_fd = sd_varlink_peek_dup_fd(link, p.mount_fd_idx); if (mount_fd < 0) return mount_fd; @@ -1207,6 +1244,17 @@ static int vl_method_add_mount_to_user_namespace(sd_varlink *link, sd_json_varia if (r < 0) return r; + r = varlink_verify_polkit_async_full( + link, + /* bus= */ NULL, + "io.systemd.namespace-resource.delegate-mount", + /* polkit_details= */ NULL, + /* good_user= */ UID_INVALID, + POLKIT_DEFAULT_ALLOW, /* If no polkit is installed, allow delegation of mounts to registered userns */ + &c->polkit_registry); + if (r <= 0) + return r; + registry_dir_fd = userns_registry_open_fd(); if (registry_dir_fd < 0) return registry_dir_fd; @@ -1226,8 +1274,8 @@ static int vl_method_add_mount_to_user_namespace(sd_varlink *link, sd_json_varia if (r < 0) return r; - if (!*bpf) { - r = userns_restrict_install(/* pin= */ true, bpf); + if (!c->bpf) { + r = userns_restrict_install(/* pin= */ true, &c->bpf); if (r < 0) return r; } @@ -1244,7 +1292,7 @@ static int vl_method_add_mount_to_user_namespace(sd_varlink *link, sd_json_varia /* Add this mount to the user namespace's BPF map allowlist entry. */ r = userns_restrict_put_by_fd( - *bpf, + c->bpf, userns_fd, /* replace= */ false, &mount_fd, @@ -1310,6 +1358,7 @@ static int vl_method_add_cgroup_to_user_namespace(sd_varlink *link, sd_json_vari .cgroup_fd_idx = UINT_MAX, }; _cleanup_(userns_info_freep) UserNamespaceInfo *userns_info = NULL; + Context *c = ASSERT_PTR(userdata); struct stat userns_st, cgroup_st; uid_t peer_uid; int r; @@ -1325,7 +1374,7 @@ static int vl_method_add_cgroup_to_user_namespace(sd_varlink *link, sd_json_vari if (r != 0) return r; - userns_fd = sd_varlink_take_fd(link, p.userns_fd_idx); + userns_fd = sd_varlink_peek_dup_fd(link, p.userns_fd_idx); if (userns_fd < 0) return log_debug_errno(userns_fd, "Failed to take user namespace fd from Varlink connection: %m"); @@ -1336,7 +1385,7 @@ static int vl_method_add_cgroup_to_user_namespace(sd_varlink *link, sd_json_vari if (fstat(userns_fd, &userns_st) < 0) return log_debug_errno(errno, "Failed to fstat() user namespace fd: %m"); - cgroup_fd = sd_varlink_take_fd(link, p.cgroup_fd_idx); + cgroup_fd = sd_varlink_peek_dup_fd(link, p.cgroup_fd_idx); if (cgroup_fd < 0) return log_debug_errno(cgroup_fd, "Failed to take cgroup fd from Varlink connection: %m"); @@ -1348,6 +1397,17 @@ static int vl_method_add_cgroup_to_user_namespace(sd_varlink *link, sd_json_vari if (fstat(cgroup_fd, &cgroup_st) < 0) return log_debug_errno(errno, "Failed to fstat() cgroup fd: %m"); + r = varlink_verify_polkit_async_full( + link, + /* bus= */ NULL, + "io.systemd.namespace-resource.delegate-cgroup", + /* polkit_details= */ NULL, + /* good_user= */ UID_INVALID, + POLKIT_DEFAULT_ALLOW, /* If no polkit is installed, allow delegation of cgroups to registered userns */ + &c->polkit_registry); + if (r <= 0) + return r; + registry_dir_fd = userns_registry_open_fd(); if (registry_dir_fd < 0) return registry_dir_fd; @@ -1665,6 +1725,7 @@ static int vl_method_add_netif_to_user_namespace(sd_varlink *link, sd_json_varia }; _cleanup_close_ int userns_fd = -EBADF, netns_fd = -EBADF, registry_dir_fd = -EBADF; + Context *c = ASSERT_PTR(userdata); AddNetworkParameters p = { .userns_fd_idx = UINT_MAX, .netns_fd_idx = UINT_MAX, @@ -1682,7 +1743,7 @@ static int vl_method_add_netif_to_user_namespace(sd_varlink *link, sd_json_varia if (r != 0) return r; - userns_fd = sd_varlink_take_fd(link, p.userns_fd_idx); + userns_fd = sd_varlink_peek_dup_fd(link, p.userns_fd_idx); if (userns_fd < 0) return userns_fd; @@ -1695,7 +1756,7 @@ static int vl_method_add_netif_to_user_namespace(sd_varlink *link, sd_json_varia return -errno; if (p.netns_fd_idx != UINT_MAX) { - netns_fd = sd_varlink_take_fd(link, p.netns_fd_idx); + netns_fd = sd_varlink_peek_dup_fd(link, p.netns_fd_idx); if (netns_fd < 0) return netns_fd; @@ -1724,6 +1785,22 @@ static int vl_method_add_netif_to_user_namespace(sd_varlink *link, sd_json_varia } else return sd_varlink_error_invalid_parameter_name(link, "mode"); + const char *polkit_details[] = { + "type", p.mode, + NULL, + }; + + r = varlink_verify_polkit_async_full( + link, + /* bus= */ NULL, + "io.systemd.namespace-resource.delegate-network-interface", + polkit_details, + /* good_user= */ UID_INVALID, + POLKIT_DEFAULT_ALLOW, /* If no polkit is installed, allow delegation of network interfaces to registered userns */ + &c->polkit_registry); + if (r <= 0) + return r; + registry_dir_fd = userns_registry_open_fd(); if (registry_dir_fd < 0) return registry_dir_fd; @@ -1835,11 +1912,20 @@ static int vl_method_add_netif_to_user_namespace(sd_varlink *link, sd_json_varia static int process_connection(sd_varlink_server *server, int _fd) { _cleanup_close_ int fd = TAKE_FD(_fd); /* always take possession */ _cleanup_(sd_varlink_close_unrefp) sd_varlink *vl = NULL; + _cleanup_(sd_event_unrefp) sd_event *event = NULL; int r; assert(server); assert(fd >= 0); + r = sd_event_new(&event); + if (r < 0) + return r; + + r = sd_varlink_server_attach_event(server, event, /* priority= */ 0); + if (r < 0) + return log_error_errno(r, "Failed to attach Varlink server to event loop: %m"); + r = sd_varlink_server_add_connection(server, fd, &vl); if (r < 0) return log_error_errno(r, "Failed to add connection: %m"); @@ -1847,31 +1933,28 @@ static int process_connection(sd_varlink_server *server, int _fd) { TAKE_FD(fd); vl = sd_varlink_ref(vl); - for (;;) { - r = sd_varlink_process(vl); - if (r == -ENOTCONN) { - log_debug("Connection terminated."); - break; - } - if (r < 0) - return log_error_errno(r, "Failed to process connection: %m"); - if (r > 0) - continue; + r = sd_event_loop(event); + if (r < 0) + return log_error_errno(r, "Failed to run event loop: %m"); - r = sd_varlink_wait(vl, CONNECTION_IDLE_USEC); - if (r < 0) - return log_error_errno(r, "Failed to wait for connection events: %m"); - if (r == 0) - break; - } + r = sd_varlink_server_detach_event(server); + if (r < 0) + return log_error_errno(r, "Failed to detach Varlink server from event loop: %m"); return 0; } +static void context_free(Context *c) { + assert(c); + + c->polkit_registry = hashmap_free(c->polkit_registry); + c->bpf = userns_restrict_bpf_free(c->bpf); +} + static int run(int argc, char *argv[]) { - _cleanup_(userns_restrict_bpf_freep) struct userns_restrict_bpf *bpf = NULL; usec_t start_time, listen_idle_usec, last_busy_usec = USEC_INFINITY; _cleanup_(sd_varlink_server_unrefp) sd_varlink_server *server = NULL; + _cleanup_(context_free) Context c = {}; _cleanup_(pidref_done) PidRef parent = PIDREF_NULL; unsigned n_iterations = 0; int m, listen_fd, r; @@ -1896,7 +1979,7 @@ static int run(int argc, char *argv[]) { &server, SD_VARLINK_SERVER_INHERIT_USERDATA| SD_VARLINK_SERVER_ALLOW_FD_PASSING_INPUT|SD_VARLINK_SERVER_ALLOW_FD_PASSING_OUTPUT, - &bpf); + &c); if (r < 0) return log_error_errno(r, "Failed to allocate varlink server: %m"); @@ -1920,6 +2003,10 @@ static int run(int argc, char *argv[]) { if (r < 0) return log_error_errno(r, "Failed to bind methods: %m"); + r = sd_varlink_server_set_exit_on_idle(server, true); + if (r < 0) + return log_error_errno(r, "Failed to enable exit-on-idle mode: %m"); + r = getenv_bool("NSRESOURCE_FIXED_WORKER"); if (r < 0) return log_error_errno(r, "Failed to parse NSRESOURCE_FIXED_WORKER: %m");