]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
oomd: sort by pgscan rate not pgscan
authorAnita Zhang <the.anitazha@gmail.com>
Tue, 16 Mar 2021 00:21:45 +0000 (17:21 -0700)
committerAnita Zhang <the.anitazha@gmail.com>
Wed, 17 Mar 2021 17:17:03 +0000 (10:17 -0700)
For pressure based killing we want to target who has the highest
increase in pgscan from the previous interval (vs. the previous logic
which used raw pgscan). This will prevent biasing towards long running
cgroups as mentioned in #19007.

src/oom/oomd-manager.c
src/oom/oomd-util.c
src/oom/oomd-util.h
src/oom/test-oomd-util.c

index 6ce96062a1c0150fd2fe2b1676d75cdc7036ba7e..72781f3e88a82031547a6ec2b0864f154b2c3ce9 100644 (file)
@@ -357,9 +357,9 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
                                 log_notice("Memory pressure for %s is greater than %lu for more than %"PRIu64" seconds and there was reclaim activity",
                                         t->path, LOAD_INT(t->mem_pressure_limit), m->default_mem_pressure_duration_usec / USEC_PER_SEC);
 
-                                r = oomd_kill_by_pgscan(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run);
+                                r = oomd_kill_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run);
                                 if (r == -ENOMEM)
-                                        return log_error_errno(r, "Failed to kill cgroup processes by pgscan");
+                                        return log_error_errno(r, "Failed to kill cgroup processes by pgscan rate: %m");
                                 if (r < 0)
                                         log_info("Failed to kill any cgroup(s) under %s based on pressure", t->path);
                                 else {
index 0c63b20dc6c00a00f730f2218c6eca5719b89f56..0ddb197f2b050559f655b8aa2cc6d2ff6d9cd383 100644 (file)
@@ -208,13 +208,13 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) {
         return set_size(pids_killed) != 0;
 }
 
-int oomd_kill_by_pgscan(Hashmap *h, const char *prefix, bool dry_run) {
+int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run) {
         _cleanup_free_ OomdCGroupContext **sorted = NULL;
         int r;
 
         assert(h);
 
-        r = oomd_sort_cgroup_contexts(h, compare_pgscan_and_memory_usage, prefix, &sorted);
+        r = oomd_sort_cgroup_contexts(h, compare_pgscan_rate_and_memory_usage, prefix, &sorted);
         if (r < 0)
                 return r;
 
index cf9f00dccc5daa28c5841adb607451a8b5a1e221..560697a4f46fc74ba560647b6e043f91d10e8acb 100644 (file)
@@ -66,7 +66,8 @@ bool oomd_swap_free_below(const OomdSystemContext *ctx, int threshold_permyriad)
 
 /* The compare functions will sort from largest to smallest, putting all the contexts with "avoid" at the end
  * (after the smallest values). */
-static inline int compare_pgscan_and_memory_usage(OomdCGroupContext * const *c1, OomdCGroupContext * const *c2) {
+static inline int compare_pgscan_rate_and_memory_usage(OomdCGroupContext * const *c1, OomdCGroupContext * const *c2) {
+        uint64_t last1, last2;
         int r;
 
         assert(c1);
@@ -76,7 +77,22 @@ static inline int compare_pgscan_and_memory_usage(OomdCGroupContext * const *c1,
         if (r != 0)
                 return r;
 
-        r = CMP((*c2)->pgscan, (*c1)->pgscan);
+        /* If last_pgscan > pgscan, assume the cgroup was recreated and reset last_pgscan to zero. */
+        last2 = (*c2)->last_pgscan;
+        if ((*c2)->last_pgscan > (*c2)->pgscan) {
+                log_info("Last pgscan %" PRIu64 "greater than current pgscan %" PRIu64 "for %s. Using last pgscan of zero.",
+                                (*c2)->last_pgscan, (*c2)->pgscan, (*c2)->path);
+                last2 = 0;
+        }
+
+        last1 = (*c1)->last_pgscan;
+        if ((*c1)->last_pgscan > (*c1)->pgscan) {
+                log_info("Last pgscan %" PRIu64 "greater than current pgscan %" PRIu64 "for %s. Using last pgscan of zero.",
+                                (*c1)->last_pgscan, (*c1)->pgscan, (*c1)->path);
+                last1 = 0;
+        }
+
+        r = CMP((*c2)->pgscan - last2, (*c1)->pgscan - last1);
         if (r != 0)
                 return r;
 
@@ -107,7 +123,7 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run);
 /* The following oomd_kill_by_* functions return 1 if processes were killed, or negative otherwise. */
 /* If `prefix` is supplied, only cgroups whose paths start with `prefix` are eligible candidates. Otherwise,
  * everything in `h` is a candidate. */
-int oomd_kill_by_pgscan(Hashmap *h, const char *prefix, bool dry_run);
+int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run);
 int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run);
 
 int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret);
index 748753188d0a42d2a9fd3bebfcaa0e16a995ad79..bd1c574ca7fa02cabb24d155edcf9f541d190625 100644 (file)
@@ -372,33 +372,45 @@ static void test_oomd_sort_cgroups(void) {
                                  "/herp.slice/derp.scope",
                                  "/herp.slice/derp.scope/sheep.service",
                                  "/zupa.slice",
+                                 "/boop.slice",
                                  "/omitted.slice",
                                  "/avoid.slice");
 
-        OomdCGroupContext ctx[6] = {
+        OomdCGroupContext ctx[7] = {
                 { .path = paths[0],
                   .swap_usage = 20,
-                  .pgscan = 60,
+                  .last_pgscan = 0,
+                  .pgscan = 33,
                   .current_memory_usage = 10 },
                 { .path = paths[1],
                   .swap_usage = 60,
-                  .pgscan = 40,
+                  .last_pgscan = 33,
+                  .pgscan = 1,
                   .current_memory_usage = 20 },
                 { .path = paths[2],
                   .swap_usage = 40,
-                  .pgscan = 40,
+                  .last_pgscan = 1,
+                  .pgscan = 33,
                   .current_memory_usage = 40 },
                 { .path = paths[3],
                   .swap_usage = 10,
-                  .pgscan = 80,
+                  .last_pgscan = 33,
+                  .pgscan = 2,
                   .current_memory_usage = 10 },
                 { .path = paths[4],
+                  .swap_usage = 11,
+                  .last_pgscan = 33,
+                  .pgscan = 33,
+                  .current_memory_usage = 10 },
+                { .path = paths[5],
                   .swap_usage = 90,
-                  .pgscan = 100,
+                  .last_pgscan = 0,
+                  .pgscan = UINT64_MAX,
                   .preference = MANAGED_OOM_PREFERENCE_OMIT },
-                { .path = paths[5],
+                { .path = paths[6],
                   .swap_usage = 99,
-                  .pgscan = 200,
+                  .last_pgscan = 0,
+                  .pgscan = UINT64_MAX,
                   .preference = MANAGED_OOM_PREFERENCE_AVOID },
         };
 
@@ -408,32 +420,36 @@ static void test_oomd_sort_cgroups(void) {
         assert_se(hashmap_put(h, "/herp.slice/derp.scope", &ctx[1]) >= 0);
         assert_se(hashmap_put(h, "/herp.slice/derp.scope/sheep.service", &ctx[2]) >= 0);
         assert_se(hashmap_put(h, "/zupa.slice", &ctx[3]) >= 0);
-        assert_se(hashmap_put(h, "/omitted.slice", &ctx[4]) >= 0);
-        assert_se(hashmap_put(h, "/avoid.slice", &ctx[5]) >= 0);
+        assert_se(hashmap_put(h, "/boop.slice", &ctx[4]) >= 0);
+        assert_se(hashmap_put(h, "/omitted.slice", &ctx[5]) >= 0);
+        assert_se(hashmap_put(h, "/avoid.slice", &ctx[6]) >= 0);
 
-        assert_se(oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted_cgroups) == 5);
+        assert_se(oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted_cgroups) == 6);
         assert_se(sorted_cgroups[0] == &ctx[1]);
         assert_se(sorted_cgroups[1] == &ctx[2]);
         assert_se(sorted_cgroups[2] == &ctx[0]);
-        assert_se(sorted_cgroups[3] == &ctx[3]);
-        assert_se(sorted_cgroups[4] == &ctx[5]);
+        assert_se(sorted_cgroups[3] == &ctx[4]);
+        assert_se(sorted_cgroups[4] == &ctx[3]);
+        assert_se(sorted_cgroups[5] == &ctx[6]);
         sorted_cgroups = mfree(sorted_cgroups);
 
-        assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan_and_memory_usage, NULL, &sorted_cgroups) == 5);
-        assert_se(sorted_cgroups[0] == &ctx[3]);
-        assert_se(sorted_cgroups[1] == &ctx[0]);
-        assert_se(sorted_cgroups[2] == &ctx[2]);
+        assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan_rate_and_memory_usage, NULL, &sorted_cgroups) == 6);
+        assert_se(sorted_cgroups[0] == &ctx[0]);
+        assert_se(sorted_cgroups[1] == &ctx[2]);
+        assert_se(sorted_cgroups[2] == &ctx[3]);
         assert_se(sorted_cgroups[3] == &ctx[1]);
-        assert_se(sorted_cgroups[4] == &ctx[5]);
+        assert_se(sorted_cgroups[4] == &ctx[4]);
+        assert_se(sorted_cgroups[5] == &ctx[6]);
         sorted_cgroups = mfree(sorted_cgroups);
 
-        assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan_and_memory_usage, "/herp.slice/derp.scope", &sorted_cgroups) == 2);
+        assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan_rate_and_memory_usage, "/herp.slice/derp.scope", &sorted_cgroups) == 2);
         assert_se(sorted_cgroups[0] == &ctx[2]);
         assert_se(sorted_cgroups[1] == &ctx[1]);
         assert_se(sorted_cgroups[2] == 0);
         assert_se(sorted_cgroups[3] == 0);
         assert_se(sorted_cgroups[4] == 0);
         assert_se(sorted_cgroups[5] == 0);
+        assert_se(sorted_cgroups[6] == 0);
         sorted_cgroups = mfree(sorted_cgroups);
 }