if (r < 0)
log_debug_errno(r, "Failed to update monitored memory pressure candidate cgroup contexts, ignoring: %m");
- if (oomd_memory_reclaim(m->monitored_mem_pressure_cgroup_contexts))
- m->last_reclaim_at = usec_now;
-
/* Since pressure counters are lagging, we need to wait a bit after a kill to ensure we don't read stale
* values and go on a kill storm. */
if (m->mem_pressure_post_action_delay_start > 0) {
if (r < 0)
log_debug_errno(r, "Failed to check if memory pressure exceeded limits, ignoring: %m");
else if (r == 1) {
- /* Check if there was reclaim activity in the given interval. The concern is the following case:
- * Pressure climbed, a lot of high-frequency pages were reclaimed, and we killed the offending
- * cgroup. Even after this, well-behaved processes will fault in recently resident pages and
- * this will cause pressure to remain high. Thus if there isn't any reclaim pressure, no need
- * to kill something (it won't help anyways). */
- if ((usec_now - m->last_reclaim_at) <= RECLAIM_DURATION_USEC) {
- OomdCGroupContext *t;
-
- SET_FOREACH(t, targets) {
- _cleanup_free_ char *selected = NULL;
- char ts[FORMAT_TIMESPAN_MAX];
-
- log_debug("Memory pressure for %s is %lu.%02lu%% > %lu.%02lu%% for > %s with reclaim activity",
- t->path,
- LOAD_INT(t->memory_pressure.avg10), LOAD_FRAC(t->memory_pressure.avg10),
- LOAD_INT(t->mem_pressure_limit), LOAD_FRAC(t->mem_pressure_limit),
- format_timespan(ts, sizeof ts,
- m->default_mem_pressure_duration_usec,
- USEC_PER_SEC));
-
- r = oomd_kill_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run, &selected);
- if (r == -ENOMEM)
- return log_oom();
- if (r < 0)
- log_notice_errno(r, "Failed to kill any cgroup(s) under %s based on pressure: %m", t->path);
- else {
- /* Don't act on all the high pressure cgroups at once; return as soon as we kill one */
- m->mem_pressure_post_action_delay_start = usec_now;
- if (selected)
- log_notice("Killed %s due to memory pressure for %s being %lu.%02lu%% > %lu.%02lu%%"
- " for > %s with reclaim activity",
- selected, t->path,
- LOAD_INT(t->memory_pressure.avg10), LOAD_FRAC(t->memory_pressure.avg10),
- LOAD_INT(t->mem_pressure_limit), LOAD_FRAC(t->mem_pressure_limit),
- format_timespan(ts, sizeof ts,
- m->default_mem_pressure_duration_usec,
- USEC_PER_SEC));
- return 0;
- }
+ OomdCGroupContext *t;
+ SET_FOREACH(t, targets) {
+ _cleanup_free_ char *selected = NULL;
+ char ts[FORMAT_TIMESPAN_MAX];
+
+ /* Check if there was reclaim activity in the given interval. The concern is the following case:
+ * Pressure climbed, a lot of high-frequency pages were reclaimed, and we killed the offending
+ * cgroup. Even after this, well-behaved processes will fault in recently resident pages and
+ * this will cause pressure to remain high. Thus if there isn't any reclaim pressure, no need
+ * to kill something (it won't help anyways). */
+ if ((now(CLOCK_MONOTONIC) - t->last_had_mem_reclaim) > RECLAIM_DURATION_USEC)
+ continue;
+
+ log_debug("Memory pressure for %s is %lu.%02lu%% > %lu.%02lu%% for > %s with reclaim activity",
+ t->path,
+ LOAD_INT(t->memory_pressure.avg10), LOAD_FRAC(t->memory_pressure.avg10),
+ LOAD_INT(t->mem_pressure_limit), LOAD_FRAC(t->mem_pressure_limit),
+ format_timespan(ts, sizeof ts,
+ m->default_mem_pressure_duration_usec,
+ USEC_PER_SEC));
+
+ r = oomd_kill_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run, &selected);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ log_notice_errno(r, "Failed to kill any cgroup(s) under %s based on pressure: %m", t->path);
+ else {
+ /* Don't act on all the high pressure cgroups at once; return as soon as we kill one */
+ m->mem_pressure_post_action_delay_start = usec_now;
+ if (selected)
+ log_notice("Killed %s due to memory pressure for %s being %lu.%02lu%% > %lu.%02lu%%"
+ " for > %s with reclaim activity",
+ selected, t->path,
+ LOAD_INT(t->memory_pressure.avg10), LOAD_FRAC(t->memory_pressure.avg10),
+ LOAD_INT(t->mem_pressure_limit), LOAD_FRAC(t->mem_pressure_limit),
+ format_timespan(ts, sizeof ts,
+ m->default_mem_pressure_duration_usec,
+ USEC_PER_SEC));
+ return 0;
}
}
}
OomdSystemContext system_context;
- usec_t last_reclaim_at;
usec_t mem_pressure_post_action_delay_start;
sd_event_source *swap_context_event_source;
return 0;
}
-bool oomd_memory_reclaim(Hashmap *h) {
- uint64_t pgscan = 0, pgscan_of = 0, last_pgscan = 0, last_pgscan_of = 0;
- OomdCGroupContext *ctx;
-
- assert(h);
-
- /* If sum of all the current pgscan values are greater than the sum of all the last_pgscan values,
- * there was reclaim activity. Used along with pressure checks to decide whether to take action. */
-
- HASHMAP_FOREACH(ctx, h) {
- uint64_t sum;
-
- sum = pgscan + ctx->pgscan;
- if (sum < pgscan || sum < ctx->pgscan)
- pgscan_of++; /* count overflows */
- pgscan = sum;
-
- sum = last_pgscan + ctx->last_pgscan;
- if (sum < last_pgscan || sum < ctx->last_pgscan)
- last_pgscan_of++; /* count overflows */
- last_pgscan = sum;
- }
-
- /* overflow counts are the same, return sums comparison */
- if (last_pgscan_of == pgscan_of)
- return pgscan > last_pgscan;
-
- return pgscan_of > last_pgscan_of;
-}
-
uint64_t oomd_pgscan_rate(const OomdCGroupContext *c) {
uint64_t last_pgscan;
curr_ctx->last_pgscan = old_ctx->pgscan;
curr_ctx->mem_pressure_limit = old_ctx->mem_pressure_limit;
curr_ctx->last_hit_mem_pressure_limit = old_ctx->last_hit_mem_pressure_limit;
+ curr_ctx->last_had_mem_reclaim = old_ctx->last_had_mem_reclaim;
}
+ if (oomd_pgscan_rate(curr_ctx) > 0)
+ curr_ctx->last_had_mem_reclaim = now(CLOCK_MONOTONIC);
+
r = hashmap_put(new_h, curr_ctx->path, curr_ctx);
if (r < 0)
return r;
ctx->last_pgscan = old_ctx->pgscan;
ctx->mem_pressure_limit = old_ctx->mem_pressure_limit;
ctx->last_hit_mem_pressure_limit = old_ctx->last_hit_mem_pressure_limit;
+ ctx->last_had_mem_reclaim = old_ctx->last_had_mem_reclaim;
+
+ if (oomd_pgscan_rate(ctx) > 0)
+ ctx->last_had_mem_reclaim = now(CLOCK_MONOTONIC);
}
}
ManagedOOMPreference preference;
- /* These are only used by oomd_pressure_above for acting on high memory pressure. */
+ /* These are only used for acting on high memory pressure. */
loadavg_t mem_pressure_limit;
usec_t mem_pressure_duration_usec;
usec_t last_hit_mem_pressure_limit;
+ usec_t last_had_mem_reclaim;
};
struct OomdSystemContext {
* Returns -ENOMEM for allocation errors. */
int oomd_pressure_above(Hashmap *h, usec_t duration, Set **ret);
-/* Sum up current OomdCGroupContexts' pgscan values and last interval's pgscan values in `h`. Returns true if the
- * current sum is higher than the last interval's sum (there was some reclaim activity). */
-bool oomd_memory_reclaim(Hashmap *h);
-
/* Returns true if the amount of swap free is below the permyriad of swap specified by `threshold_permyriad`. */
bool oomd_swap_free_below(const OomdSystemContext *ctx, int threshold_permyriad);
assert_se(oomd_insert_cgroup_context(NULL, h1, cgroup) == -EEXIST);
/* make sure certain values from h1 get updated in h2 */
- c1->pgscan = 5555;
+ c1->pgscan = UINT64_MAX;
c1->mem_pressure_limit = 6789;
c1->last_hit_mem_pressure_limit = 42;
+ c1->last_had_mem_reclaim = 888;
assert_se(h2 = hashmap_new(&oomd_cgroup_ctx_hash_ops));
assert_se(oomd_insert_cgroup_context(h1, h2, cgroup) == 0);
c1 = hashmap_get(h1, cgroup);
assert_se(c1);
assert_se(c2);
assert_se(c1 != c2);
- assert_se(c2->last_pgscan == 5555);
+ assert_se(c2->last_pgscan == UINT64_MAX);
assert_se(c2->mem_pressure_limit == 6789);
assert_se(c2->last_hit_mem_pressure_limit == 42);
+ assert_se(c2->last_had_mem_reclaim == 888); /* assumes the live pgscan is less than UINT64_MAX */
/* Assert that avoid/omit are not set if the cgroup is not owned by root */
if (test_xattrs) {
char **paths = STRV_MAKE("/0.slice",
"/1.slice");
- OomdCGroupContext ctx_old[3] = {
+ OomdCGroupContext ctx_old[2] = {
{ .path = paths[0],
.mem_pressure_limit = 5,
.last_hit_mem_pressure_limit = 777,
+ .last_had_mem_reclaim = 888,
.pgscan = 57 },
{ .path = paths[1],
.mem_pressure_limit = 6,
.last_hit_mem_pressure_limit = 888,
+ .last_had_mem_reclaim = 888,
.pgscan = 42 },
};
- OomdCGroupContext ctx_new[3] = {
+ OomdCGroupContext ctx_new[2] = {
{ .path = paths[0],
- .pgscan = 100 },
+ .pgscan = 57 },
{ .path = paths[1],
.pgscan = 101 },
};
assert_se(c_old->pgscan == c_new->last_pgscan);
assert_se(c_old->mem_pressure_limit == c_new->mem_pressure_limit);
assert_se(c_old->last_hit_mem_pressure_limit == c_new->last_hit_mem_pressure_limit);
+ assert_se(c_old->last_had_mem_reclaim == c_new->last_had_mem_reclaim);
assert_se(c_old = hashmap_get(h_old, "/1.slice"));
assert_se(c_new = hashmap_get(h_new, "/1.slice"));
assert_se(c_old->pgscan == c_new->last_pgscan);
assert_se(c_old->mem_pressure_limit == c_new->mem_pressure_limit);
assert_se(c_old->last_hit_mem_pressure_limit == c_new->last_hit_mem_pressure_limit);
+ assert_se(c_new->last_had_mem_reclaim > c_old->last_had_mem_reclaim);
}
static void test_oomd_system_context_acquire(void) {
assert_se(c->last_hit_mem_pressure_limit == 0);
}
-static void test_oomd_memory_reclaim(void) {
- _cleanup_hashmap_free_ Hashmap *h1 = NULL;
- char **paths = STRV_MAKE("/0.slice",
- "/1.slice",
- "/2.slice",
- "/3.slice",
- "/4.slice");
-
- OomdCGroupContext ctx[5] = {
- { .path = paths[0],
- .last_pgscan = 100,
- .pgscan = 100 },
- { .path = paths[1],
- .last_pgscan = 100,
- .pgscan = 100 },
- { .path = paths[2],
- .last_pgscan = 77,
- .pgscan = 33 },
- { .path = paths[3],
- .last_pgscan = UINT64_MAX,
- .pgscan = 100 },
- { .path = paths[4],
- .last_pgscan = 100,
- .pgscan = UINT64_MAX },
- };
-
- assert_se(h1 = hashmap_new(&string_hash_ops));
- assert_se(hashmap_put(h1, paths[0], &ctx[0]) >= 0);
- assert_se(hashmap_put(h1, paths[1], &ctx[1]) >= 0);
- assert_se(oomd_memory_reclaim(h1) == false);
-
- assert_se(hashmap_put(h1, paths[2], &ctx[2]) >= 0);
- assert_se(oomd_memory_reclaim(h1) == false);
-
- assert_se(hashmap_put(h1, paths[4], &ctx[4]) >= 0);
- assert_se(oomd_memory_reclaim(h1) == true);
-
- assert_se(hashmap_put(h1, paths[3], &ctx[3]) >= 0);
- assert_se(oomd_memory_reclaim(h1) == false);
-}
-
static void test_oomd_swap_free_below(void) {
OomdSystemContext ctx = (OomdSystemContext) {
.swap_total = 20971512 * 1024U,
test_oomd_update_cgroup_contexts_between_hashmaps();
test_oomd_system_context_acquire();
test_oomd_pressure_above();
- test_oomd_memory_reclaim();
test_oomd_swap_free_below();
test_oomd_sort_cgroups();