From: Lennart Poettering Date: Wed, 9 Jul 2025 12:28:28 +0000 (+0200) Subject: cgroup: handle ENODEV on cg_read_pid() gracefully X-Git-Tag: v258-rc1~131 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=726183627bf5aae68d537bfaa3cf8307bbc4d1f0;p=thirdparty%2Fsystemd.git cgroup: handle ENODEV on cg_read_pid() gracefully The recently added test case TEST-07-PID1.subgroup-kill.sh surfaced a race: if we enumerate PIDs in a cgroup, and the cgroup is unlinked at the very same time reading will result in ENODEV. We need to handle that gracefully. Hence let's do so. Noticed while looking at: https://github.com/systemd/systemd/actions/runs/16143084441/job/45554929264?pr=38120 --- diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index f68b2103f8e..e643c71714d 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -142,6 +142,9 @@ int cg_read_pid(FILE *f, pid_t *ret, CGroupFlags flags) { assert(f); assert(ret); + /* NB: The kernel returns ENODEV if we tried to read from cgroup.procs of a cgroup that has been + * removed already. Callers should handle that! */ + for (;;) { errno = 0; if (fscanf(f, "%lu", &ul) != 1) { @@ -300,6 +303,13 @@ int cg_kill( _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL; r = cg_read_pidref(f, &pidref, flags); + if (r == -ENODEV) { + /* reading from cgroup.pids will result in ENODEV if the cgroup is + * concurrently removed. Just leave in that case, because a removed cgroup + * contains no processes anymore. */ + done = true; + break; + } if (r < 0) return RET_GATHER(ret, log_debug_errno(r, "Failed to read pidref from cgroup '%s': %m", path)); if (r == 0) diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index 8f1f333943b..cf015b5a7bc 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -1351,10 +1351,13 @@ static int append_cgroup(sd_bus_message *reply, const char *p, Set *pids) { /* libvirt / qemu uses threaded mode and cgroup.procs cannot be read at the lower levels. * From https://docs.kernel.org/admin-guide/cgroup-v2.html#threads, “cgroup.procs” in a * threaded domain cgroup contains the PIDs of all processes in the subtree and is not - * readable in the subtree proper. */ + * readable in the subtree proper. + * + * We'll see ENODEV when trying to enumerate processes and the cgroup is removed at the same + * time. Handle this gracefully. */ r = cg_read_pidref(f, &pidref, /* flags = */ 0); - if (IN_SET(r, 0, -EOPNOTSUPP)) + if (IN_SET(r, 0, -EOPNOTSUPP, -ENODEV)) break; if (r < 0) return r; diff --git a/src/shared/cgroup-setup.c b/src/shared/cgroup-setup.c index 8e6cb55427f..f6f0fe2c19d 100644 --- a/src/shared/cgroup-setup.c +++ b/src/shared/cgroup-setup.c @@ -367,6 +367,8 @@ int cg_migrate( if (r < 0) return RET_GATHER(ret, r); } + if (r == -ENODEV) + continue; if (r < 0) return RET_GATHER(ret, r); } while (!done); diff --git a/src/shared/cgroup-show.c b/src/shared/cgroup-show.c index b95cecb3e66..b97edbdba77 100644 --- a/src/shared/cgroup-show.c +++ b/src/shared/cgroup-show.c @@ -107,9 +107,12 @@ static int show_cgroup_one_by_path( /* libvirt / qemu uses threaded mode and cgroup.procs cannot be read at the lower levels. * From https://docs.kernel.org/admin-guide/cgroup-v2.html#threads, * “cgroup.procs” in a threaded domain cgroup contains the PIDs of all processes in - * the subtree and is not readable in the subtree proper. */ + * the subtree and is not readable in the subtree proper. + * + * ENODEV is generated when we enumerate processes from a cgroup and the cgroup is removed + * concurrently. */ r = cg_read_pid(f, &pid, /* flags = */ 0); - if (IN_SET(r, 0, -EOPNOTSUPP)) + if (IN_SET(r, 0, -EOPNOTSUPP, -ENODEV)) break; if (r < 0) return r;