]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/oom/oomd-util.c
Merge pull request #18481 from keszybz/rpm-restart-post-trans
[thirdparty/systemd.git] / src / oom / oomd-util.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
61ff7397
AZ
2
3#include <sys/xattr.h>
4#include <unistd.h>
5
6#include "fd-util.h"
7#include "format-util.h"
8#include "oomd-util.h"
9#include "parse-util.h"
10#include "path-util.h"
11#include "procfs-util.h"
12#include "signal-util.h"
13#include "sort-util.h"
14#include "stat-util.h"
15#include "stdio-util.h"
16
17DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
18 oomd_cgroup_ctx_hash_ops,
19 char,
20 string_hash_func,
21 string_compare_func,
22 OomdCGroupContext,
23 oomd_cgroup_context_free);
24
25static int log_kill(pid_t pid, int sig, void *userdata) {
26 log_debug("oomd attempting to kill " PID_FMT " with %s", pid, signal_to_string(sig));
27 return 0;
28}
29
30static int increment_oomd_xattr(const char *path, const char *xattr, uint64_t num_procs_killed) {
31 _cleanup_free_ char *value = NULL;
32 char buf[DECIMAL_STR_MAX(uint64_t) + 1];
33 uint64_t curr_count = 0;
34 int r;
35
36 assert(path);
37 assert(xattr);
38
39 r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, path, xattr, &value);
40 if (r < 0 && r != -ENODATA)
41 return r;
42
43 if (!isempty(value)) {
44 r = safe_atou64(value, &curr_count);
45 if (r < 0)
46 return r;
47 }
48
49 if (curr_count > UINT64_MAX - num_procs_killed)
50 return -EOVERFLOW;
51
52 xsprintf(buf, "%"PRIu64, curr_count + num_procs_killed);
53 r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, path, xattr, buf, strlen(buf), 0);
54 if (r < 0)
55 return r;
56
57 return 0;
58}
59
60OomdCGroupContext *oomd_cgroup_context_free(OomdCGroupContext *ctx) {
61 if (!ctx)
62 return NULL;
63
64 free(ctx->path);
65 return mfree(ctx);
66}
67
68int oomd_pressure_above(Hashmap *h, usec_t duration, Set **ret) {
69 _cleanup_set_free_ Set *targets = NULL;
70 OomdCGroupContext *ctx;
71 char *key;
72 int r;
73
74 assert(h);
75 assert(ret);
76
77 targets = set_new(NULL);
78 if (!targets)
79 return -ENOMEM;
80
81 HASHMAP_FOREACH_KEY(ctx, key, h) {
82 if (ctx->memory_pressure.avg10 > ctx->mem_pressure_limit) {
83 usec_t diff;
84
85 if (ctx->last_hit_mem_pressure_limit == 0)
86 ctx->last_hit_mem_pressure_limit = now(CLOCK_MONOTONIC);
87
88 diff = now(CLOCK_MONOTONIC) - ctx->last_hit_mem_pressure_limit;
89 if (diff >= duration) {
90 r = set_put(targets, ctx);
91 if (r < 0)
92 return -ENOMEM;
93 }
94 } else
95 ctx->last_hit_mem_pressure_limit = 0;
96 }
97
98 if (!set_isempty(targets)) {
99 *ret = TAKE_PTR(targets);
100 return 1;
101 }
102
103 *ret = NULL;
104 return 0;
105}
106
107bool oomd_memory_reclaim(Hashmap *h) {
108 uint64_t pgscan = 0, pgscan_of = 0, last_pgscan = 0, last_pgscan_of = 0;
109 OomdCGroupContext *ctx;
110
111 assert(h);
112
113 /* If sum of all the current pgscan values are greater than the sum of all the last_pgscan values,
114 * there was reclaim activity. Used along with pressure checks to decide whether to take action. */
115
116 HASHMAP_FOREACH(ctx, h) {
117 uint64_t sum;
118
119 sum = pgscan + ctx->pgscan;
120 if (sum < pgscan || sum < ctx->pgscan)
121 pgscan_of++; /* count overflows */
122 pgscan = sum;
123
124 sum = last_pgscan + ctx->last_pgscan;
125 if (sum < last_pgscan || sum < ctx->last_pgscan)
126 last_pgscan_of++; /* count overflows */
127 last_pgscan = sum;
128 }
129
130 /* overflow counts are the same, return sums comparison */
131 if (last_pgscan_of == pgscan_of)
132 return pgscan > last_pgscan;
133
134 return pgscan_of > last_pgscan_of;
135}
136
137bool oomd_swap_free_below(const OomdSystemContext *ctx, uint64_t threshold_percent) {
138 uint64_t swap_threshold;
139
140 assert(ctx);
141 assert(threshold_percent <= 100);
142
143 swap_threshold = ctx->swap_total * threshold_percent / ((uint64_t) 100);
144 return (ctx->swap_total - ctx->swap_used) < swap_threshold;
145}
146
147int oomd_sort_cgroup_contexts(Hashmap *h, oomd_compare_t compare_func, const char *prefix, OomdCGroupContext ***ret) {
148 _cleanup_free_ OomdCGroupContext **sorted = NULL;
149 OomdCGroupContext *item;
150 size_t k = 0;
151
152 assert(h);
153 assert(compare_func);
154 assert(ret);
155
156 sorted = new0(OomdCGroupContext*, hashmap_size(h));
157 if (!sorted)
158 return -ENOMEM;
159
160 HASHMAP_FOREACH(item, h) {
59331b8e
AZ
161 /* Skip over cgroups that are not valid candidates or are explicitly marked for omission */
162 if ((item->path && prefix && !path_startswith(item->path, prefix)) || item->preference == MANAGED_OOM_PREFERENCE_OMIT)
61ff7397
AZ
163 continue;
164
165 sorted[k++] = item;
166 }
167
168 typesafe_qsort(sorted, k, compare_func);
169
170 *ret = TAKE_PTR(sorted);
171
172 assert(k <= INT_MAX);
173 return (int) k;
174}
175
176int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) {
177 _cleanup_set_free_ Set *pids_killed = NULL;
178 int r;
179
180 assert(path);
181
182 if (dry_run) {
183 _cleanup_free_ char *cg_path = NULL;
184
185 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &cg_path);
186 if (r < 0)
187 return r;
188
189 log_debug("oomd dry-run: Would have tried to kill %s with recurse=%s", cg_path, true_false(recurse));
190 return 0;
191 }
192
193 pids_killed = set_new(NULL);
194 if (!pids_killed)
195 return -ENOMEM;
196
197 if (recurse)
198 r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, path, SIGKILL, CGROUP_IGNORE_SELF, pids_killed, log_kill, NULL);
199 else
200 r = cg_kill(SYSTEMD_CGROUP_CONTROLLER, path, SIGKILL, CGROUP_IGNORE_SELF, pids_killed, log_kill, NULL);
201 if (r < 0)
202 return r;
203
e3038333 204 r = increment_oomd_xattr(path, "user.oomd_kill", set_size(pids_killed));
61ff7397 205 if (r < 0)
e3038333 206 log_debug_errno(r, "Failed to set user.oomd_kill on kill: %m");
61ff7397
AZ
207
208 return set_size(pids_killed) != 0;
209}
210
211int oomd_kill_by_pgscan(Hashmap *h, const char *prefix, bool dry_run) {
212 _cleanup_free_ OomdCGroupContext **sorted = NULL;
213 int r;
214
215 assert(h);
216
1f76411b 217 r = oomd_sort_cgroup_contexts(h, compare_pgscan_and_memory_usage, prefix, &sorted);
61ff7397
AZ
218 if (r < 0)
219 return r;
220
221 for (int i = 0; i < r; i++) {
59331b8e
AZ
222 /* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure. */
223 /* Don't break since there might be "avoid" cgroups at the end. */
74f834e9 224 if (sorted[i]->pgscan == 0 && sorted[i]->current_memory_usage == 0)
59331b8e 225 continue;
61ff7397
AZ
226
227 r = oomd_cgroup_kill(sorted[i]->path, true, dry_run);
228 if (r > 0 || r == -ENOMEM)
229 break;
230 }
231
232 return r;
233}
234
235int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run) {
236 _cleanup_free_ OomdCGroupContext **sorted = NULL;
237 int r;
238
239 assert(h);
240
241 r = oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted);
242 if (r < 0)
243 return r;
244
245 /* Try to kill cgroups with non-zero swap usage until we either succeed in
246 * killing or we get to a cgroup with no swap usage. */
247 for (int i = 0; i < r; i++) {
59331b8e
AZ
248 /* Skip over cgroups with no resource usage. Don't break since there might be "avoid"
249 * cgroups at the end. */
61ff7397 250 if (sorted[i]->swap_usage == 0)
59331b8e 251 continue;
61ff7397
AZ
252
253 r = oomd_cgroup_kill(sorted[i]->path, true, dry_run);
254 if (r > 0 || r == -ENOMEM)
255 break;
256 }
257
258 return r;
259}
260
261int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) {
262 _cleanup_(oomd_cgroup_context_freep) OomdCGroupContext *ctx = NULL;
263 _cleanup_free_ char *p = NULL, *val = NULL;
264 bool is_root;
59331b8e 265 uid_t uid;
61ff7397
AZ
266 int r;
267
268 assert(path);
269 assert(ret);
270
271 ctx = new0(OomdCGroupContext, 1);
272 if (!ctx)
273 return -ENOMEM;
274
275 is_root = empty_or_root(path);
59331b8e 276 ctx->preference = MANAGED_OOM_PREFERENCE_NONE;
61ff7397
AZ
277
278 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "memory.pressure", &p);
279 if (r < 0)
280 return log_debug_errno(r, "Error getting cgroup memory pressure path from %s: %m", path);
281
282 r = read_resource_pressure(p, PRESSURE_TYPE_FULL, &ctx->memory_pressure);
283 if (r < 0)
284 return log_debug_errno(r, "Error parsing memory pressure from %s: %m", p);
285
59331b8e
AZ
286 r = cg_get_owner(SYSTEMD_CGROUP_CONTROLLER, path, &uid);
287 if (r < 0)
288 log_debug_errno(r, "Failed to get owner/group from %s: %m", path);
289 else if (uid == 0) {
290 /* Ignore most errors when reading the xattr since it is usually unset and cgroup xattrs are only used
291 * as an optional feature of systemd-oomd (and the system might not even support them). */
292 r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, path, "user.oomd_avoid");
293 if (r == -ENOMEM)
294 return r;
295 ctx->preference = r == 1 ? MANAGED_OOM_PREFERENCE_AVOID : ctx->preference;
296
297 r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, path, "user.oomd_omit");
298 if (r == -ENOMEM)
299 return r;
300 ctx->preference = r == 1 ? MANAGED_OOM_PREFERENCE_OMIT : ctx->preference;
301 }
302
61ff7397
AZ
303 if (is_root) {
304 r = procfs_memory_get_used(&ctx->current_memory_usage);
305 if (r < 0)
306 return log_debug_errno(r, "Error getting memory used from procfs: %m");
307 } else {
308 r = cg_get_attribute_as_uint64(SYSTEMD_CGROUP_CONTROLLER, path, "memory.current", &ctx->current_memory_usage);
309 if (r < 0)
310 return log_debug_errno(r, "Error getting memory.current from %s: %m", path);
311
312 r = cg_get_attribute_as_uint64(SYSTEMD_CGROUP_CONTROLLER, path, "memory.min", &ctx->memory_min);
313 if (r < 0)
314 return log_debug_errno(r, "Error getting memory.min from %s: %m", path);
315
316 r = cg_get_attribute_as_uint64(SYSTEMD_CGROUP_CONTROLLER, path, "memory.low", &ctx->memory_low);
317 if (r < 0)
318 return log_debug_errno(r, "Error getting memory.low from %s: %m", path);
319
320 r = cg_get_attribute_as_uint64(SYSTEMD_CGROUP_CONTROLLER, path, "memory.swap.current", &ctx->swap_usage);
321 if (r < 0)
322 return log_debug_errno(r, "Error getting memory.swap.current from %s: %m", path);
323
324 r = cg_get_keyed_attribute(SYSTEMD_CGROUP_CONTROLLER, path, "memory.stat", STRV_MAKE("pgscan"), &val);
325 if (r < 0)
326 return log_debug_errno(r, "Error getting pgscan from memory.stat under %s: %m", path);
327
328 r = safe_atou64(val, &ctx->pgscan);
329 if (r < 0)
330 return log_debug_errno(r, "Error converting pgscan value to uint64_t: %m");
331 }
332
333 ctx->path = strdup(empty_to_root(path));
334 if (!ctx->path)
335 return -ENOMEM;
336
337 *ret = TAKE_PTR(ctx);
338 return 0;
339}
340
341int oomd_system_context_acquire(const char *proc_swaps_path, OomdSystemContext *ret) {
342 _cleanup_fclose_ FILE *f = NULL;
343 OomdSystemContext ctx = {};
344 int r;
345
346 assert(proc_swaps_path);
347 assert(ret);
348
349 f = fopen(proc_swaps_path, "re");
350 if (!f)
351 return -errno;
352
353 (void) fscanf(f, "%*s %*s %*s %*s %*s\n");
354
355 for (;;) {
356 uint64_t total, used;
357
358 r = fscanf(f,
359 "%*s " /* device/file */
360 "%*s " /* type of swap */
361 "%" PRIu64 " " /* swap size */
362 "%" PRIu64 " " /* used */
363 "%*s\n", /* priority */
364 &total, &used);
365
366 if (r == EOF && feof(f))
367 break;
368
369 if (r != 2) {
370 if (ferror(f))
371 return log_debug_errno(errno, "Error reading from %s: %m", proc_swaps_path);
372
373 return log_debug_errno(SYNTHETIC_ERRNO(EIO),
374 "Failed to parse values from %s: %m", proc_swaps_path);
375 }
376
377 ctx.swap_total += total * 1024U;
378 ctx.swap_used += used * 1024U;
379 }
380
381 *ret = ctx;
382 return 0;
383}
384
385int oomd_insert_cgroup_context(Hashmap *old_h, Hashmap *new_h, const char *path) {
386 _cleanup_(oomd_cgroup_context_freep) OomdCGroupContext *curr_ctx = NULL;
387 OomdCGroupContext *old_ctx, *ctx;
388 int r;
389
390 assert(new_h);
391 assert(path);
392
393 r = oomd_cgroup_context_acquire(path, &curr_ctx);
394 if (r < 0)
395 return log_debug_errno(r, "Failed to get OomdCGroupContext for %s: %m", path);
396
397 old_ctx = hashmap_get(old_h, path);
398 if (old_ctx) {
399 curr_ctx->last_pgscan = old_ctx->pgscan;
400 curr_ctx->mem_pressure_limit = old_ctx->mem_pressure_limit;
401 curr_ctx->last_hit_mem_pressure_limit = old_ctx->last_hit_mem_pressure_limit;
402 }
403
404 ctx = TAKE_PTR(curr_ctx);
405 r = hashmap_put(new_h, ctx->path, ctx);
406 if (r < 0)
407 return r;
408
409 return 0;
410}
5c616ecf
AZ
411
412void oomd_dump_swap_cgroup_context(const OomdCGroupContext *ctx, FILE *f, const char *prefix) {
413 char swap[FORMAT_BYTES_MAX];
414
415 assert(ctx);
416 assert(f);
417
418 if (!empty_or_root(ctx->path))
419 fprintf(f,
420 "%sPath: %s\n"
421 "%s\tSwap Usage: %s\n",
422 strempty(prefix), ctx->path,
423 strempty(prefix), format_bytes(swap, sizeof(swap), ctx->swap_usage));
424 else
425 fprintf(f,
426 "%sPath: %s\n"
427 "%s\tSwap Usage: (see System Context)\n",
428 strempty(prefix), ctx->path,
429 strempty(prefix));
430}
431
432void oomd_dump_memory_pressure_cgroup_context(const OomdCGroupContext *ctx, FILE *f, const char *prefix) {
433 char tbuf[FORMAT_TIMESPAN_MAX], mem_use[FORMAT_BYTES_MAX];
434 char mem_min[FORMAT_BYTES_MAX], mem_low[FORMAT_BYTES_MAX];
435
436 assert(ctx);
437 assert(f);
438
439 fprintf(f,
440 "%sPath: %s\n"
0a9f9344 441 "%s\tMemory Pressure Limit: %lu.%02lu%%\n"
5c616ecf
AZ
442 "%s\tPressure: Avg10: %lu.%02lu Avg60: %lu.%02lu Avg300: %lu.%02lu Total: %s\n"
443 "%s\tCurrent Memory Usage: %s\n",
444 strempty(prefix), ctx->path,
0a9f9344 445 strempty(prefix), LOAD_INT(ctx->mem_pressure_limit), LOAD_FRAC(ctx->mem_pressure_limit),
5c616ecf
AZ
446 strempty(prefix),
447 LOAD_INT(ctx->memory_pressure.avg10), LOAD_FRAC(ctx->memory_pressure.avg10),
448 LOAD_INT(ctx->memory_pressure.avg60), LOAD_FRAC(ctx->memory_pressure.avg60),
449 LOAD_INT(ctx->memory_pressure.avg300), LOAD_FRAC(ctx->memory_pressure.avg300),
450 format_timespan(tbuf, sizeof(tbuf), ctx->memory_pressure.total, USEC_PER_SEC),
451 strempty(prefix), format_bytes(mem_use, sizeof(mem_use), ctx->current_memory_usage));
452
453 if (!empty_or_root(ctx->path))
454 fprintf(f,
455 "%s\tMemory Min: %s\n"
456 "%s\tMemory Low: %s\n"
457 "%s\tPgscan: %" PRIu64 "\n",
458 strempty(prefix), format_bytes_cgroup_protection(mem_min, sizeof(mem_min), ctx->memory_min),
459 strempty(prefix), format_bytes_cgroup_protection(mem_low, sizeof(mem_low), ctx->memory_low),
460 strempty(prefix), ctx->pgscan);
461}
462
463void oomd_dump_system_context(const OomdSystemContext *ctx, FILE *f, const char *prefix) {
464 char used[FORMAT_BYTES_MAX], total[FORMAT_BYTES_MAX];
465
466 assert(ctx);
467 assert(f);
468
469 fprintf(f,
470 "%sSwap: Used: %s Total: %s\n",
471 strempty(prefix),
472 format_bytes(used, sizeof(used), ctx->swap_used),
473 format_bytes(total, sizeof(total), ctx->swap_total));
474}