]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
oomd: handle situations when no cgroups are killed 22183/head
authorAnita Zhang <the.anitazha@gmail.com>
Wed, 19 Jan 2022 21:26:01 +0000 (13:26 -0800)
committerAnita Zhang <the.anitazha@gmail.com>
Thu, 20 Jan 2022 22:15:13 +0000 (14:15 -0800)
Currently if systemd-oomd doesn't kill anything in a selected cgroup, it
selects a new candidate immediately. But if a selected cgroup wasn't killed,
it is likely due to it disappearing or getting cleaned up between the time
it was selected as a candidate and getting sent SIGKILL(s). We should handle
it as though systemd-oomd did perform a kill so that it will check
swap/pressure again before it tries to select a new candidate.

src/oom/oomd-manager.c
src/oom/oomd-util.c

index 9f4f083ab9ac9c0ef0821bf60e9e9668056b56a0..b0a81474ccf961311ae64e69e249aba36916da58 100644 (file)
@@ -410,7 +410,7 @@ static int monitor_swap_contexts_handler(sd_event_source *s, uint64_t usec, void
                 if (r < 0)
                         log_notice_errno(r, "Failed to kill any cgroup(s) based on swap: %m");
                 else {
-                        if (selected)
+                        if (selected && r > 0)
                                 log_notice("Killed %s due to memory used (%"PRIu64") / total (%"PRIu64") and "
                                            "swap used (%"PRIu64") / total (%"PRIu64") being more than "
                                            PERMYRIAD_AS_PERCENT_FORMAT_STR,
@@ -518,9 +518,13 @@ static int monitor_memory_pressure_contexts_handler(sd_event_source *s, uint64_t
                         if (r < 0)
                                 log_notice_errno(r, "Failed to kill any cgroup(s) under %s based on pressure: %m", t->path);
                         else {
-                                /* Don't act on all the high pressure cgroups at once; return as soon as we kill one */
+                                /* Don't act on all the high pressure cgroups at once; return as soon as we kill one.
+                                 * If r == 0 then it means there were not eligible candidates, the candidate cgroup
+                                 * disappeared, or the candidate cgroup has no processes by the time we tried to kill
+                                 * it. In either case, go through the event loop again and select a new candidate if
+                                 * pressure is still high. */
                                 m->mem_pressure_post_action_delay_start = usec_now;
-                                if (selected)
+                                if (selected && r > 0)
                                         log_notice("Killed %s due to memory pressure for %s being %lu.%02lu%% > %lu.%02lu%%"
                                                    " for > %s with reclaim activity",
                                                    selected, t->path,
index b54bf483d603ea763a9435dc8cd5b850049ce67c..cef7519a74bee3ee3eed687f30922851940757d7 100644 (file)
@@ -206,6 +206,9 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) {
         else if (r < 0)
                 return r;
 
+        if (set_isempty(pids_killed))
+                log_debug("Nothing killed when attempting to kill %s", path);
+
         r = increment_oomd_xattr(path, "user.oomd_kill", set_size(pids_killed));
         if (r < 0)
                 log_debug_errno(r, "Failed to set user.oomd_kill on kill: %m");
@@ -231,8 +234,6 @@ int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char
                         continue;
 
                 r = oomd_cgroup_kill(sorted[i]->path, true, dry_run);
-                if (r == 0)
-                        continue; /* We didn't find anything to kill */
                 if (r == -ENOMEM)
                         return r; /* Treat oom as a hard error */
                 if (r < 0) {
@@ -245,7 +246,7 @@ int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char
                 if (!selected)
                         return -ENOMEM;
                 *ret_selected = selected;
-                return 1;
+                return r;
         }
 
         return ret;
@@ -271,8 +272,6 @@ int oomd_kill_by_swap_usage(Hashmap *h, uint64_t threshold_usage, bool dry_run,
                         continue;
 
                 r = oomd_cgroup_kill(sorted[i]->path, true, dry_run);
-                if (r == 0)
-                        continue; /* We didn't find anything to kill */
                 if (r == -ENOMEM)
                         return r; /* Treat oom as a hard error */
                 if (r < 0) {
@@ -285,7 +284,7 @@ int oomd_kill_by_swap_usage(Hashmap *h, uint64_t threshold_usage, bool dry_run,
                 if (!selected)
                         return -ENOMEM;
                 *ret_selected = selected;
-                return 1;
+                return r;
         }
 
         return ret;