]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
udev: run the main process, workers, and spawned commands in /udev subcgroup 22752/head
authorYu Watanabe <watanabe.yu+github@gmail.com>
Wed, 16 Mar 2022 11:46:49 +0000 (20:46 +0900)
committerYu Watanabe <watanabe.yu+github@gmail.com>
Thu, 17 Mar 2022 11:24:38 +0000 (20:24 +0900)
And enable cgroup delegation for udevd.
Then, processes invoked through ExecReload= are assigned .control
subcgroup, and they are not killed by cg_kill().

Fixes #16867 and #22686.

src/udev/udevd.c
units/systemd-udevd.service.in

index 8380d674c50d84afad7614a97c5d967ce9f1293f..c6f6d945c82ef2d00e599d97781f646f387f51c5 100644 (file)
@@ -28,6 +28,7 @@
 #include "sd-event.h"
 
 #include "alloc-util.h"
+#include "cgroup-setup.h"
 #include "cgroup-util.h"
 #include "cpu-set-util.h"
 #include "dev-setup.h"
@@ -48,6 +49,7 @@
 #include "mkdir.h"
 #include "netlink-util.h"
 #include "parse-util.h"
+#include "path-util.h"
 #include "pretty-print.h"
 #include "proc-cmdline.h"
 #include "process-util.h"
@@ -85,7 +87,7 @@ typedef struct Manager {
         sd_event *event;
         Hashmap *workers;
         LIST_HEAD(Event, events);
-        const char *cgroup;
+        char *cgroup;
         pid_t pid; /* the process that originally allocated the manager object */
         int log_level;
 
@@ -238,6 +240,7 @@ static Manager* manager_free(Manager *manager) {
         safe_close(manager->inotify_fd);
         safe_close_pair(manager->worker_watch);
 
+        free(manager->cgroup);
         return mfree(manager);
 }
 
@@ -1722,12 +1725,63 @@ static int parse_argv(int argc, char *argv[]) {
         return 1;
 }
 
-static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent, const char *cgroup) {
+static int create_subcgroup(char **ret) {
+        _cleanup_free_ char *cgroup = NULL, *subcgroup = NULL;
+        int r;
+
+        if (getppid() != 1)
+                return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Not invoked by PID1.");
+
+        r = sd_booted();
+        if (r < 0)
+                return log_debug_errno(r, "Failed to check if systemd is running: %m");
+        if (r == 0)
+                return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "systemd is not running.");
+
+        /* Get our own cgroup, we regularly kill everything udev has left behind.
+         * We only do this on systemd systems, and only if we are directly spawned
+         * by PID1. Otherwise we are not guaranteed to have a dedicated cgroup. */
+
+        r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
+        if (r < 0) {
+                if (IN_SET(r, -ENOENT, -ENOMEDIUM))
+                        return log_debug_errno(r, "Dedicated cgroup not found: %m");
+                return log_debug_errno(r, "Failed to get cgroup: %m");
+        }
+
+        r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, cgroup, "trusted.delegate");
+        if (IN_SET(r, 0, -ENODATA))
+                return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "The cgroup %s is not delegated to us.", cgroup);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to read trusted.delegate attribute: %m");
+
+        /* We are invoked with our own delegated cgroup tree, let's move us one level down, so that we
+         * don't collide with the "no processes in inner nodes" rule of cgroups, when the service
+         * manager invokes the ExecReload= job in the .control/ subcgroup. */
+
+        subcgroup = path_join(cgroup, "/udev");
+        if (!subcgroup)
+                return log_oom_debug();
+
+        r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup, 0);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to create %s subcgroup: %m", subcgroup);
+
+        log_debug("Created %s subcgroup.", subcgroup);
+        if (ret)
+                *ret = TAKE_PTR(subcgroup);
+        return 0;
+}
+
+static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent) {
         _cleanup_(manager_freep) Manager *manager = NULL;
+        _cleanup_free_ char *cgroup = NULL;
         int r;
 
         assert(ret);
 
+        (void) create_subcgroup(&cgroup);
+
         manager = new(Manager, 1);
         if (!manager)
                 return log_oom();
@@ -1735,7 +1789,7 @@ static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent, const char *cg
         *manager = (Manager) {
                 .inotify_fd = -1,
                 .worker_watch = { -1, -1 },
-                .cgroup = cgroup,
+                .cgroup = TAKE_PTR(cgroup),
         };
 
         r = udev_ctrl_new_from_fd(&manager->ctrl, fd_ctrl);
@@ -1880,7 +1934,6 @@ static int main_loop(Manager *manager) {
 }
 
 int run_udevd(int argc, char *argv[]) {
-        _cleanup_free_ char *cgroup = NULL;
         _cleanup_(manager_freep) Manager *manager = NULL;
         int fd_ctrl = -1, fd_uevent = -1;
         int r;
@@ -1937,24 +1990,11 @@ int run_udevd(int argc, char *argv[]) {
         if (r < 0 && r != -EEXIST)
                 return log_error_errno(r, "Failed to create /run/udev: %m");
 
-        if (getppid() == 1 && sd_booted() > 0) {
-                /* Get our own cgroup, we regularly kill everything udev has left behind.
-                 * We only do this on systemd systems, and only if we are directly spawned
-                 * by PID1. Otherwise we are not guaranteed to have a dedicated cgroup. */
-                r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
-                if (r < 0) {
-                        if (IN_SET(r, -ENOENT, -ENOMEDIUM))
-                                log_debug_errno(r, "Dedicated cgroup not found: %m");
-                        else
-                                log_warning_errno(r, "Failed to get cgroup: %m");
-                }
-        }
-
         r = listen_fds(&fd_ctrl, &fd_uevent);
         if (r < 0)
                 return log_error_errno(r, "Failed to listen on fds: %m");
 
-        r = manager_new(&manager, fd_ctrl, fd_uevent, cgroup);
+        r = manager_new(&manager, fd_ctrl, fd_uevent);
         if (r < 0)
                 return log_error_errno(r, "Failed to create manager: %m");
 
index d042bfb0d3b58e1805756f63461563693db0556d..99011982740c4d102e3d1d6dc7552eed19b4da47 100644 (file)
@@ -16,6 +16,7 @@ Before=sysinit.target
 ConditionPathIsReadWrite=/sys
 
 [Service]
+Delegate=pids
 DeviceAllow=block-* rwm
 DeviceAllow=char-* rwm
 Type=notify