]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
cgroup: handle ENODEV on cg_read_pid() gracefully
authorLennart Poettering <lennart@poettering.net>
Wed, 9 Jul 2025 12:28:28 +0000 (14:28 +0200)
committerMike Yuan <me@yhndnzj.com>
Wed, 9 Jul 2025 18:45:59 +0000 (20:45 +0200)
The recently added test case TEST-07-PID1.subgroup-kill.sh surfaced a
race: if we enumerate PIDs in a cgroup, and the cgroup is unlinked at
the very same time reading will result in ENODEV. We need to handle that
gracefully. Hence let's do so.

Noticed while looking at:

https://github.com/systemd/systemd/actions/runs/16143084441/job/45554929264?pr=38120

src/basic/cgroup-util.c
src/core/dbus-unit.c
src/shared/cgroup-setup.c
src/shared/cgroup-show.c

index f68b2103f8e858926536f3addae37b8b1674f557..e643c71714dc438b435cccab725733c64f875e3e 100644 (file)
@@ -142,6 +142,9 @@ int cg_read_pid(FILE *f, pid_t *ret, CGroupFlags flags) {
         assert(f);
         assert(ret);
 
+        /* NB: The kernel returns ENODEV if we tried to read from cgroup.procs of a cgroup that has been
+         * removed already. Callers should handle that! */
+
         for (;;) {
                 errno = 0;
                 if (fscanf(f, "%lu", &ul) != 1) {
@@ -300,6 +303,13 @@ int cg_kill(
                         _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
 
                         r = cg_read_pidref(f, &pidref, flags);
+                        if (r == -ENODEV) {
+                                /* reading from cgroup.pids will result in ENODEV if the cgroup is
+                                 * concurrently removed. Just leave in that case, because a removed cgroup
+                                 * contains no processes anymore. */
+                                done = true;
+                                break;
+                        }
                         if (r < 0)
                                 return RET_GATHER(ret, log_debug_errno(r, "Failed to read pidref from cgroup '%s': %m", path));
                         if (r == 0)
index 8f1f333943bdec3d5e05515ab27757018b977df4..cf015b5a7bcc5f00587e613f6d5ca4308a9706e5 100644 (file)
@@ -1351,10 +1351,13 @@ static int append_cgroup(sd_bus_message *reply, const char *p, Set *pids) {
                 /* libvirt / qemu uses threaded mode and cgroup.procs cannot be read at the lower levels.
                  * From https://docs.kernel.org/admin-guide/cgroup-v2.html#threads, “cgroup.procs” in a
                  * threaded domain cgroup contains the PIDs of all processes in the subtree and is not
-                 * readable in the subtree proper. */
+                 * readable in the subtree proper.
+                 *
+                 * We'll see ENODEV when trying to enumerate processes and the cgroup is removed at the same
+                 * time. Handle this gracefully. */
 
                 r = cg_read_pidref(f, &pidref, /* flags = */ 0);
-                if (IN_SET(r, 0, -EOPNOTSUPP))
+                if (IN_SET(r, 0, -EOPNOTSUPP, -ENODEV))
                         break;
                 if (r < 0)
                         return r;
index 8e6cb55427f7e74a95885c671c5095d475294524..f6f0fe2c19d79753c8dcf523bc5a76ace7dc3dc4 100644 (file)
@@ -367,6 +367,8 @@ int cg_migrate(
                         if (r < 0)
                                 return RET_GATHER(ret, r);
                 }
+                if (r == -ENODEV)
+                        continue;
                 if (r < 0)
                         return RET_GATHER(ret, r);
         } while (!done);
index b95cecb3e66d3ff961d241ca229ee007a8344fb6..b97edbdba77dcaaf6776babd0a37b835cee97fe5 100644 (file)
@@ -107,9 +107,12 @@ static int show_cgroup_one_by_path(
                 /* libvirt / qemu uses threaded mode and cgroup.procs cannot be read at the lower levels.
                  * From https://docs.kernel.org/admin-guide/cgroup-v2.html#threads,
                  * “cgroup.procs” in a threaded domain cgroup contains the PIDs of all processes in
-                 * the subtree and is not readable in the subtree proper. */
+                 * the subtree and is not readable in the subtree proper.
+                 *
+                 * ENODEV is generated when we enumerate processes from a cgroup and the cgroup is removed
+                 * concurrently. */
                 r = cg_read_pid(f, &pid, /* flags = */ 0);
-                if (IN_SET(r, 0, -EOPNOTSUPP))
+                if (IN_SET(r, 0, -EOPNOTSUPP, -ENODEV))
                         break;
                 if (r < 0)
                         return r;