]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
oomd: check if a cgroup can be killed before attempting to kill it
authorMatteo Croce <teknoraver@meta.com>
Mon, 17 Nov 2025 16:30:34 +0000 (17:30 +0100)
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Fri, 21 Nov 2025 08:15:37 +0000 (09:15 +0100)
On OOM event, oomd tries to kill a cgroup until it succeedes.
The kill can fail with EPERM in case a pid is not killed, this leaves
the cgroup with only half of the processed killed.
This is unlikely but theoretically possible in a user namespace,
where systemd run as root inside the container and tries to kill a
cgroup with some PID from the host namespace.

To address this, send the SIG0 signal to all the processes to check
that we have privileges to kill them.

src/oom/oomd-util.c

index e463ce01f2c2d3c1aa36a064a9a8aab0883017d3..0231d62a08198b971d68c277e24f3b4fd3007868 100644 (file)
@@ -337,6 +337,12 @@ int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char
                 if (c->pgscan == 0 && c->current_memory_usage == 0)
                         continue;
 
+                /* First try killing recursively to ensure all child cgroups can be killed. */
+                r = cg_kill_recursive(c->path, /* sig= */ 0, CGROUP_IGNORE_SELF, /* killed_pids= */ NULL,
+                                      /* log_kill= */ NULL, /* userdata= */ NULL);
+                if (r < 0)
+                        continue;
+
                 r = oomd_cgroup_kill(c->path, /* recurse= */ true, /* dry_run= */ dry_run);
                 if (r == -ENOMEM)
                         return r; /* Treat oom as a hard error */
@@ -381,6 +387,12 @@ int oomd_kill_by_swap_usage(Hashmap *h, uint64_t threshold_usage, bool dry_run,
                 if (c->swap_usage <= threshold_usage)
                         continue;
 
+                /* First try killing recursively to ensure all child cgroups can be killed. */
+                r = cg_kill_recursive(c->path, /* sig= */ 0, CGROUP_IGNORE_SELF, /* killed_pids= */ NULL,
+                                      /* log_kill= */ NULL, /* userdata= */ NULL);
+                if (r < 0)
+                        continue;
+
                 r = oomd_cgroup_kill(c->path, /* recurse= */ true, /* dry_run= */ dry_run);
                 if (r == -ENOMEM)
                         return r; /* Treat oom as a hard error */