]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
oomd: don't get pressure candidates on every interval
authorAnita Zhang <the.anitazha@gmail.com>
Fri, 26 Mar 2021 08:53:15 +0000 (01:53 -0700)
committerAnita Zhang <the.anitazha@gmail.com>
Mon, 5 Apr 2021 09:01:32 +0000 (02:01 -0700)
Only start collecting candidates for a memory pressure kill when we're
hitting the limit (but before the duration hitting that limit is
exceeded). This brings CPU util from ~1% to 0.3%.

Addresses CPU util from
https://bugzilla.redhat.com/show_bug.cgi?id=1941340
and
https://bugzilla.redhat.com/show_bug.cgi?id=1944646

src/oom/oomd-manager.c

index 73555d2594d5e67c11e13162c44f187c12ea43c6..07a5d52d1e31bc886af945bf8c0725fd96477773 100644 (file)
@@ -374,8 +374,17 @@ static int monitor_swap_contexts_handler(sd_event_source *s, uint64_t usec, void
         return 0;
 }
 
+static void clear_candidate_hashmapp(Manager **m) {
+        if (*m)
+                hashmap_clear((*m)->monitored_mem_pressure_cgroup_contexts_candidates);
+}
+
 static int monitor_memory_pressure_contexts_handler(sd_event_source *s, uint64_t usec, void *userdata) {
+        /* Don't want to use stale candidate data. Setting this will clear the candidate hashmap on return unless we
+         * update the candidate data (in which case clear_candidates will be NULL). */
+        _cleanup_(clear_candidate_hashmapp) Manager *clear_candidates = userdata;
         _cleanup_set_free_ Set *targets = NULL;
+        bool in_post_action_delay = false;
         Manager *m = userdata;
         usec_t usec_now;
         int r;
@@ -400,10 +409,8 @@ static int monitor_memory_pressure_contexts_handler(sd_event_source *s, uint64_t
         }
 
         /* Return early if nothing is requesting memory pressure monitoring */
-        if (hashmap_isempty(m->monitored_mem_pressure_cgroup_contexts)) {
-                hashmap_clear(m->monitored_mem_pressure_cgroup_contexts_candidates);
+        if (hashmap_isempty(m->monitored_mem_pressure_cgroup_contexts))
                 return 0;
-        }
 
         /* Update the cgroups used for detection/action */
         r = update_monitored_cgroup_contexts(&m->monitored_mem_pressure_cgroup_contexts);
@@ -423,7 +430,7 @@ static int monitor_memory_pressure_contexts_handler(sd_event_source *s, uint64_t
          * values and go on a kill storm. */
         if (m->mem_pressure_post_action_delay_start > 0) {
                 if (m->mem_pressure_post_action_delay_start + POST_ACTION_DELAY_USEC > usec_now)
-                        return 0;
+                        in_post_action_delay = true;
                 else
                         m->mem_pressure_post_action_delay_start = 0;
         }
@@ -433,7 +440,7 @@ static int monitor_memory_pressure_contexts_handler(sd_event_source *s, uint64_t
                 return log_oom();
         if (r < 0)
                 log_debug_errno(r, "Failed to check if memory pressure exceeded limits, ignoring: %m");
-        else if (r == 1) {
+        else if (r == 1 && !in_post_action_delay) {
                 OomdCGroupContext *t;
                 SET_FOREACH(t, targets) {
                         _cleanup_free_ char *selected = NULL;
@@ -455,6 +462,15 @@ static int monitor_memory_pressure_contexts_handler(sd_event_source *s, uint64_t
                                                   m->default_mem_pressure_duration_usec,
                                                   USEC_PER_SEC));
 
+                        r = update_monitored_cgroup_contexts_candidates(
+                                        m->monitored_mem_pressure_cgroup_contexts, &m->monitored_mem_pressure_cgroup_contexts_candidates);
+                        if (r == -ENOMEM)
+                                return log_oom();
+                        if (r < 0)
+                                log_debug_errno(r, "Failed to update monitored memory pressure candidate cgroup contexts, ignoring: %m");
+                        else
+                                clear_candidates = NULL;
+
                         r = oomd_kill_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run, &selected);
                         if (r == -ENOMEM)
                                 return log_oom();
@@ -475,6 +491,28 @@ static int monitor_memory_pressure_contexts_handler(sd_event_source *s, uint64_t
                                 return 0;
                         }
                 }
+        } else {
+                /* If any monitored cgroup is over their pressure limit, get all the kill candidates for every
+                 * monitored cgroup. This saves CPU cycles from doing it every interval by only doing it when a kill
+                 * might happen.
+                 * Candidate cgroup data will continue to get updated during the post-action delay period in case
+                 * pressure continues to be high after a kill. */
+                OomdCGroupContext *c;
+                HASHMAP_FOREACH(c, m->monitored_mem_pressure_cgroup_contexts) {
+                        if (c->mem_pressure_limit_hit_start == 0)
+                                continue;
+
+                        r = update_monitored_cgroup_contexts_candidates(
+                                        m->monitored_mem_pressure_cgroup_contexts, &m->monitored_mem_pressure_cgroup_contexts_candidates);
+                        if (r == -ENOMEM)
+                                return log_oom();
+                        if (r < 0)
+                                log_debug_errno(r, "Failed to update monitored memory pressure candidate cgroup contexts, ignoring: %m");
+                        else {
+                                clear_candidates = NULL;
+                                break;
+                        }
+                }
         }
 
         return 0;