]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
nsresourced: check polkit before executing our operations
authorLennart Poettering <lennart@poettering.net>
Mon, 10 Mar 2025 12:39:53 +0000 (13:39 +0100)
committerLennart Poettering <lennart@poettering.net>
Mon, 17 Mar 2025 15:03:18 +0000 (16:03 +0100)
Let's tighten rules on namespace operations: let's always ask PK for
permission before doing anything.

Note that if polkit is absent we'll still allow things, and the default
PK policy will also still allow things, but there's now a clear way how
people can not allow things if they want, by modifying the PK policy.

src/nsresourced/io.systemd.namespace-resource.policy [new file with mode: 0644]
src/nsresourced/meson.build
src/nsresourced/nsresourcework.c

diff --git a/src/nsresourced/io.systemd.namespace-resource.policy b/src/nsresourced/io.systemd.namespace-resource.policy
new file mode 100644 (file)
index 0000000..b71efb9
--- /dev/null
@@ -0,0 +1,78 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+        "https://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!--
+  SPDX-License-Identifier: LGPL-2.1-or-later
+
+  This file is part of systemd.
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+-->
+
+<policyconfig>
+
+        <vendor>The systemd Project</vendor>
+        <vendor_url>https://systemd.io</vendor_url>
+
+        <!-- Allow allocation of a user namespace with an automatically assigned UID range -->
+        <action id="io.systemd.namespace-resource.allocate-user-namespace">
+                <description gettext-domain="systemd">Allow user namespace allocation</description>
+                <message gettext-domain="systemd">Authentication is required for an application to allocate a user namespace '$(name)' with an automatically assigned transient UID range.</message>
+                <defaults>
+                        <allow_any>yes</allow_any>
+                        <allow_inactive>yes</allow_inactive>
+                        <allow_active>yes</allow_active>
+                </defaults>
+                <annotate key="org.freedesktop.policykit.imply">io.systemd.namespace-resource.register-user-namespace</annotate>
+        </action>
+
+        <!-- Allow registration of a user namespace with a range allocated elsewhere -->
+        <action id="io.systemd.namespace-resource.register-user-namespace">
+                <description gettext-domain="systemd">Allow user namespace registration</description>
+                <message gettext-domain="systemd">Authentication is required for an application to register a user namespace '$(name)'.</message>
+                <defaults>
+                        <allow_any>yes</allow_any>
+                        <allow_inactive>yes</allow_inactive>
+                        <allow_active>yes</allow_active>
+                </defaults>
+                <annotate key="org.freedesktop.policykit.imply">io.systemd.namespace-resource.allocate-user-namespace</annotate>
+        </action>
+
+        <!-- Allow adding a mount to a registered userns -->
+        <action id="io.systemd.namespace-resource.delegate-mount">
+                <description gettext-domain="systemd">Allow adding a mount to a user namespace</description>
+                <message gettext-domain="systemd">Authentication is required for an application to add a mount to a user namespace.</message>
+                <defaults>
+                        <allow_any>yes</allow_any>
+                        <allow_inactive>yes</allow_inactive>
+                        <allow_active>yes</allow_active>
+                </defaults>
+        </action>
+
+        <!-- Allow adding a cgroup to a registered userns -->
+        <action id="io.systemd.namespace-resource.delegate-cgroup">
+                <description gettext-domain="systemd">Allow adding a control group to a user namespace</description>
+                <message gettext-domain="systemd">Authentication is required for an application to add a control group to a user namespace.</message>
+                <defaults>
+                        <allow_any>yes</allow_any>
+                        <allow_inactive>yes</allow_inactive>
+                        <allow_active>yes</allow_active>
+                </defaults>
+        </action>
+
+        <!-- Allow adding a network interface to a registered userns -->
+        <action id="io.systemd.namespace-resource.delegate-network-interface">
+                <description gettext-domain="systemd">Allow adding a network interface to a user namespace</description>
+                <message gettext-domain="systemd">Authentication is required for an application to add a network interface of type $(type) to a user namespace.</message>
+                <defaults>
+                        <allow_any>yes</allow_any>
+                        <allow_inactive>yes</allow_inactive>
+                        <allow_active>yes</allow_active>
+                </defaults>
+        </action>
+
+</policyconfig>
index d8524d52bf62896b22c54e603cb78ef5e6e2f0f5..b80d6b2a3427399ff8de5aa73789ce502279a826 100644 (file)
@@ -48,3 +48,6 @@ executables += [
                 'include_directories' : [ includes, userns_restrict_include ],
         },
 ]
+
+install_data('io.systemd.namespace-resource.policy',
+             install_dir : polkitpolicydir)
index 86d1428ccc8820efd9825eaa7bede45627871f1d..671afc69977a8eb7907f6ea2030d4b2bdd4b48a8 100644 (file)
@@ -17,6 +17,7 @@
 #include "sd-netlink.h"
 #include "sd-varlink.h"
 
+#include "bus-polkit.h"
 #include "env-util.h"
 #include "fd-util.h"
 #include "fileio.h"
 #define ITERATIONS_MAX 64U
 #define RUNTIME_MAX_USEC (5 * USEC_PER_MINUTE)
 #define PRESSURE_SLEEP_TIME_USEC (50 * USEC_PER_MSEC)
-#define CONNECTION_IDLE_USEC (15 * USEC_PER_SEC)
 #define LISTEN_IDLE_USEC (90 * USEC_PER_SEC)
 #define USERNS_PER_UID 256
 
+typedef struct Context {
+        Hashmap *polkit_registry;
+        struct userns_restrict_bpf *bpf;
+} Context;
+
 typedef struct LookupParameters {
         const char *user_name;
         const char *group_name;
@@ -824,9 +829,9 @@ static int vl_method_allocate_user_range(sd_varlink *link, sd_json_variant *para
                 {}
         };
 
-        struct userns_restrict_bpf **bpf = ASSERT_PTR(userdata);
         _cleanup_close_ int userns_fd = -EBADF, registry_dir_fd = -EBADF, lock_fd = -EBADF;
         _cleanup_free_ char *userns_name = NULL;
+        Context *c = ASSERT_PTR(userdata);
         uid_t peer_uid;
         struct stat userns_st;
         AllocateParameters p = {
@@ -854,7 +859,7 @@ static int vl_method_allocate_user_range(sd_varlink *link, sd_json_variant *para
         if (r != 0)
                 return r;
 
-        userns_fd = sd_varlink_take_fd(link, p.userns_fd_idx);
+        userns_fd = sd_varlink_peek_dup_fd(link, p.userns_fd_idx);
         if (userns_fd < 0)
                 return log_debug_errno(userns_fd, "Failed to take user namespace fd from Varlink connection: %m");
 
@@ -873,8 +878,24 @@ static int vl_method_allocate_user_range(sd_varlink *link, sd_json_variant *para
         if (r < 0)
                 return r;
 
-        if (!*bpf) {
-                r = userns_restrict_install(/* pin= */ true, bpf);
+        const char *polkit_details[] = {
+                "name", userns_name,
+                NULL,
+        };
+
+        r = varlink_verify_polkit_async_full(
+                        link,
+                        /* bus= */ NULL,
+                        "io.systemd.namespace-resource.allocate-user-namespace",
+                        polkit_details,
+                        /* good_user= */ UID_INVALID,
+                        POLKIT_DEFAULT_ALLOW, /* If no polkit is installed, allow unpriv userns namespace allocation */
+                        &c->polkit_registry);
+        if (r <= 0)
+                return r;
+
+        if (!c->bpf) {
+                r = userns_restrict_install(/* pin= */ true, &c->bpf);
                 if (r < 0)
                         return r;
         }
@@ -915,7 +936,7 @@ static int vl_method_allocate_user_range(sd_varlink *link, sd_json_variant *para
 
         /* Register the userns in the BPF map with an empty allowlist */
         r = userns_restrict_put_by_fd(
-                        *bpf,
+                        c->bpf,
                         userns_fd,
                         /* replace= */ true,
                         /* mount_fds= */ NULL,
@@ -1026,9 +1047,9 @@ static int vl_method_register_user_namespace(sd_varlink *link, sd_json_variant *
                 {}
         };
 
-        struct userns_restrict_bpf **bpf = ASSERT_PTR(userdata);
         _cleanup_close_ int userns_fd = -EBADF, registry_dir_fd = -EBADF;
         _cleanup_free_ char *userns_name = NULL;
+        Context *c = ASSERT_PTR(userdata);
         uid_t peer_uid;
         struct stat userns_st;
         RegisterParameters p = {
@@ -1051,7 +1072,7 @@ static int vl_method_register_user_namespace(sd_varlink *link, sd_json_variant *
         if (r != 0)
                 return r;
 
-        userns_fd = sd_varlink_take_fd(link, p.userns_fd_idx);
+        userns_fd = sd_varlink_peek_dup_fd(link, p.userns_fd_idx);
         if (userns_fd < 0)
                 return userns_fd;
 
@@ -1070,8 +1091,24 @@ static int vl_method_register_user_namespace(sd_varlink *link, sd_json_variant *
         if (r < 0)
                 return r;
 
-        if (!*bpf) {
-                r = userns_restrict_install(/* pin= */ true, bpf);
+        const char *polkit_details[] = {
+                "name", userns_name,
+                NULL,
+        };
+
+        r = varlink_verify_polkit_async_full(
+                        link,
+                        /* bus= */ NULL,
+                        "io.systemd.namespace-resource.register-user-namespace",
+                        polkit_details,
+                        /* good_user= */ UID_INVALID,
+                        POLKIT_DEFAULT_ALLOW, /* If no polkit is installed, allow unpriv userns namespace registration */
+                        &c->polkit_registry);
+        if (r <= 0)
+                return r;
+
+        if (!c->bpf) {
+                r = userns_restrict_install(/* pin= */ true, &c->bpf);
                 if (r < 0)
                         return r;
         }
@@ -1114,7 +1151,7 @@ static int vl_method_register_user_namespace(sd_varlink *link, sd_json_variant *
 
         /* Register the userns in the BPF map with an empty allowlist */
         r = userns_restrict_put_by_fd(
-                        *bpf,
+                        c->bpf,
                         userns_fd,
                         /* replace= */ true,
                         /* mount_fds= */ NULL,
@@ -1153,7 +1190,7 @@ static int vl_method_add_mount_to_user_namespace(sd_varlink *link, sd_json_varia
         };
 
         _cleanup_close_ int userns_fd = -EBADF, mount_fd = -EBADF, registry_dir_fd = -EBADF;
-        struct userns_restrict_bpf **bpf = ASSERT_PTR(userdata);
+        Context *c = ASSERT_PTR(userdata);
         AddMountParameters p = {
                 .userns_fd_idx = UINT_MAX,
                 .mount_fd_idx = UINT_MAX,
@@ -1180,7 +1217,7 @@ static int vl_method_add_mount_to_user_namespace(sd_varlink *link, sd_json_varia
         if (r != 0)
                 return r;
 
-        userns_fd = sd_varlink_take_fd(link, p.userns_fd_idx);
+        userns_fd = sd_varlink_peek_dup_fd(link, p.userns_fd_idx);
         if (userns_fd < 0)
                 return userns_fd;
 
@@ -1191,7 +1228,7 @@ static int vl_method_add_mount_to_user_namespace(sd_varlink *link, sd_json_varia
         if (fstat(userns_fd, &userns_st) < 0)
                 return -errno;
 
-        mount_fd = sd_varlink_take_fd(link, p.mount_fd_idx);
+        mount_fd = sd_varlink_peek_dup_fd(link, p.mount_fd_idx);
         if (mount_fd < 0)
                 return mount_fd;
 
@@ -1207,6 +1244,17 @@ static int vl_method_add_mount_to_user_namespace(sd_varlink *link, sd_json_varia
         if (r < 0)
                 return r;
 
+        r = varlink_verify_polkit_async_full(
+                        link,
+                        /* bus= */ NULL,
+                        "io.systemd.namespace-resource.delegate-mount",
+                        /* polkit_details= */ NULL,
+                        /* good_user= */ UID_INVALID,
+                        POLKIT_DEFAULT_ALLOW, /* If no polkit is installed, allow delegation of mounts to registered userns */
+                        &c->polkit_registry);
+        if (r <= 0)
+                return r;
+
         registry_dir_fd = userns_registry_open_fd();
         if (registry_dir_fd < 0)
                 return registry_dir_fd;
@@ -1226,8 +1274,8 @@ static int vl_method_add_mount_to_user_namespace(sd_varlink *link, sd_json_varia
         if (r < 0)
                 return r;
 
-        if (!*bpf) {
-                r = userns_restrict_install(/* pin= */ true, bpf);
+        if (!c->bpf) {
+                r = userns_restrict_install(/* pin= */ true, &c->bpf);
                 if (r < 0)
                         return r;
         }
@@ -1244,7 +1292,7 @@ static int vl_method_add_mount_to_user_namespace(sd_varlink *link, sd_json_varia
 
         /* Add this mount to the user namespace's BPF map allowlist entry. */
         r = userns_restrict_put_by_fd(
-                        *bpf,
+                        c->bpf,
                         userns_fd,
                         /* replace= */ false,
                         &mount_fd,
@@ -1310,6 +1358,7 @@ static int vl_method_add_cgroup_to_user_namespace(sd_varlink *link, sd_json_vari
                 .cgroup_fd_idx = UINT_MAX,
         };
         _cleanup_(userns_info_freep) UserNamespaceInfo *userns_info = NULL;
+        Context *c = ASSERT_PTR(userdata);
         struct stat userns_st, cgroup_st;
         uid_t peer_uid;
         int r;
@@ -1325,7 +1374,7 @@ static int vl_method_add_cgroup_to_user_namespace(sd_varlink *link, sd_json_vari
         if (r != 0)
                 return r;
 
-        userns_fd = sd_varlink_take_fd(link, p.userns_fd_idx);
+        userns_fd = sd_varlink_peek_dup_fd(link, p.userns_fd_idx);
         if (userns_fd < 0)
                 return log_debug_errno(userns_fd, "Failed to take user namespace fd from Varlink connection: %m");
 
@@ -1336,7 +1385,7 @@ static int vl_method_add_cgroup_to_user_namespace(sd_varlink *link, sd_json_vari
         if (fstat(userns_fd, &userns_st) < 0)
                 return log_debug_errno(errno, "Failed to fstat() user namespace fd: %m");
 
-        cgroup_fd = sd_varlink_take_fd(link, p.cgroup_fd_idx);
+        cgroup_fd = sd_varlink_peek_dup_fd(link, p.cgroup_fd_idx);
         if (cgroup_fd < 0)
                 return log_debug_errno(cgroup_fd, "Failed to take cgroup fd from Varlink connection: %m");
 
@@ -1348,6 +1397,17 @@ static int vl_method_add_cgroup_to_user_namespace(sd_varlink *link, sd_json_vari
         if (fstat(cgroup_fd, &cgroup_st) < 0)
                 return log_debug_errno(errno, "Failed to fstat() cgroup fd: %m");
 
+        r = varlink_verify_polkit_async_full(
+                        link,
+                        /* bus= */ NULL,
+                        "io.systemd.namespace-resource.delegate-cgroup",
+                        /* polkit_details= */ NULL,
+                        /* good_user= */ UID_INVALID,
+                        POLKIT_DEFAULT_ALLOW, /* If no polkit is installed, allow delegation of cgroups to registered userns */
+                        &c->polkit_registry);
+        if (r <= 0)
+                return r;
+
         registry_dir_fd = userns_registry_open_fd();
         if (registry_dir_fd < 0)
                 return registry_dir_fd;
@@ -1665,6 +1725,7 @@ static int vl_method_add_netif_to_user_namespace(sd_varlink *link, sd_json_varia
         };
 
         _cleanup_close_ int userns_fd = -EBADF, netns_fd = -EBADF, registry_dir_fd = -EBADF;
+        Context *c = ASSERT_PTR(userdata);
         AddNetworkParameters p = {
                 .userns_fd_idx = UINT_MAX,
                 .netns_fd_idx = UINT_MAX,
@@ -1682,7 +1743,7 @@ static int vl_method_add_netif_to_user_namespace(sd_varlink *link, sd_json_varia
         if (r != 0)
                 return r;
 
-        userns_fd = sd_varlink_take_fd(link, p.userns_fd_idx);
+        userns_fd = sd_varlink_peek_dup_fd(link, p.userns_fd_idx);
         if (userns_fd < 0)
                 return userns_fd;
 
@@ -1695,7 +1756,7 @@ static int vl_method_add_netif_to_user_namespace(sd_varlink *link, sd_json_varia
                 return -errno;
 
         if (p.netns_fd_idx != UINT_MAX) {
-                netns_fd = sd_varlink_take_fd(link, p.netns_fd_idx);
+                netns_fd = sd_varlink_peek_dup_fd(link, p.netns_fd_idx);
                 if (netns_fd < 0)
                         return netns_fd;
 
@@ -1724,6 +1785,22 @@ static int vl_method_add_netif_to_user_namespace(sd_varlink *link, sd_json_varia
         } else
                 return sd_varlink_error_invalid_parameter_name(link, "mode");
 
+        const char *polkit_details[] = {
+                "type", p.mode,
+                NULL,
+        };
+
+        r = varlink_verify_polkit_async_full(
+                        link,
+                        /* bus= */ NULL,
+                        "io.systemd.namespace-resource.delegate-network-interface",
+                        polkit_details,
+                        /* good_user= */ UID_INVALID,
+                        POLKIT_DEFAULT_ALLOW, /* If no polkit is installed, allow delegation of network interfaces to registered userns */
+                        &c->polkit_registry);
+        if (r <= 0)
+                return r;
+
         registry_dir_fd = userns_registry_open_fd();
         if (registry_dir_fd < 0)
                 return registry_dir_fd;
@@ -1835,11 +1912,20 @@ static int vl_method_add_netif_to_user_namespace(sd_varlink *link, sd_json_varia
 static int process_connection(sd_varlink_server *server, int _fd) {
         _cleanup_close_ int fd = TAKE_FD(_fd); /* always take possession */
         _cleanup_(sd_varlink_close_unrefp) sd_varlink *vl = NULL;
+        _cleanup_(sd_event_unrefp) sd_event *event = NULL;
         int r;
 
         assert(server);
         assert(fd >= 0);
 
+        r = sd_event_new(&event);
+        if (r < 0)
+                return r;
+
+        r = sd_varlink_server_attach_event(server, event, /* priority= */ 0);
+        if (r < 0)
+                return log_error_errno(r, "Failed to attach Varlink server to event loop: %m");
+
         r = sd_varlink_server_add_connection(server, fd, &vl);
         if (r < 0)
                 return log_error_errno(r, "Failed to add connection: %m");
@@ -1847,31 +1933,28 @@ static int process_connection(sd_varlink_server *server, int _fd) {
         TAKE_FD(fd);
         vl = sd_varlink_ref(vl);
 
-        for (;;) {
-                r = sd_varlink_process(vl);
-                if (r == -ENOTCONN) {
-                        log_debug("Connection terminated.");
-                        break;
-                }
-                if (r < 0)
-                        return log_error_errno(r, "Failed to process connection: %m");
-                if (r > 0)
-                        continue;
+        r = sd_event_loop(event);
+        if (r < 0)
+                return log_error_errno(r, "Failed to run event loop: %m");
 
-                r = sd_varlink_wait(vl, CONNECTION_IDLE_USEC);
-                if (r < 0)
-                        return log_error_errno(r, "Failed to wait for connection events: %m");
-                if (r == 0)
-                        break;
-        }
+        r = sd_varlink_server_detach_event(server);
+        if (r < 0)
+                return log_error_errno(r, "Failed to detach Varlink server from event loop: %m");
 
         return 0;
 }
 
+static void context_free(Context *c) {
+        assert(c);
+
+        c->polkit_registry = hashmap_free(c->polkit_registry);
+        c->bpf = userns_restrict_bpf_free(c->bpf);
+}
+
 static int run(int argc, char *argv[]) {
-        _cleanup_(userns_restrict_bpf_freep) struct userns_restrict_bpf *bpf = NULL;
         usec_t start_time, listen_idle_usec, last_busy_usec = USEC_INFINITY;
         _cleanup_(sd_varlink_server_unrefp) sd_varlink_server *server = NULL;
+        _cleanup_(context_free) Context c = {};
         _cleanup_(pidref_done) PidRef parent = PIDREF_NULL;
         unsigned n_iterations = 0;
         int m, listen_fd, r;
@@ -1896,7 +1979,7 @@ static int run(int argc, char *argv[]) {
                         &server,
                         SD_VARLINK_SERVER_INHERIT_USERDATA|
                         SD_VARLINK_SERVER_ALLOW_FD_PASSING_INPUT|SD_VARLINK_SERVER_ALLOW_FD_PASSING_OUTPUT,
-                        &bpf);
+                        &c);
         if (r < 0)
                 return log_error_errno(r, "Failed to allocate varlink server: %m");
 
@@ -1920,6 +2003,10 @@ static int run(int argc, char *argv[]) {
         if (r < 0)
                 return log_error_errno(r, "Failed to bind methods: %m");
 
+        r = sd_varlink_server_set_exit_on_idle(server, true);
+        if (r < 0)
+                return log_error_errno(r, "Failed to enable exit-on-idle mode: %m");
+
         r = getenv_bool("NSRESOURCE_FIXED_WORKER");
         if (r < 0)
                 return log_error_errno(r, "Failed to parse NSRESOURCE_FIXED_WORKER: %m");