]>
Commit | Line | Data |
---|---|---|
db9ecf05 | 1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
61ff7397 AZ |
2 | |
3 | #include <sys/xattr.h> | |
4 | #include <unistd.h> | |
5 | ||
00675c36 | 6 | #include "errno-util.h" |
61ff7397 | 7 | #include "fd-util.h" |
47136b9d | 8 | #include "fileio.h" |
61ff7397 | 9 | #include "format-util.h" |
2485b7e2 | 10 | #include "memstream-util.h" |
61ff7397 AZ |
11 | #include "oomd-util.h" |
12 | #include "parse-util.h" | |
13 | #include "path-util.h" | |
14 | #include "procfs-util.h" | |
15 | #include "signal-util.h" | |
16 | #include "sort-util.h" | |
17 | #include "stat-util.h" | |
18 | #include "stdio-util.h" | |
652a4efb | 19 | #include "user-util.h" |
61ff7397 AZ |
20 | |
21 | DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR( | |
22 | oomd_cgroup_ctx_hash_ops, | |
23 | char, | |
24 | string_hash_func, | |
25 | string_compare_func, | |
26 | OomdCGroupContext, | |
27 | oomd_cgroup_context_free); | |
28 | ||
29 | static int log_kill(pid_t pid, int sig, void *userdata) { | |
30 | log_debug("oomd attempting to kill " PID_FMT " with %s", pid, signal_to_string(sig)); | |
31 | return 0; | |
32 | } | |
33 | ||
34 | static int increment_oomd_xattr(const char *path, const char *xattr, uint64_t num_procs_killed) { | |
35 | _cleanup_free_ char *value = NULL; | |
36 | char buf[DECIMAL_STR_MAX(uint64_t) + 1]; | |
37 | uint64_t curr_count = 0; | |
38 | int r; | |
39 | ||
40 | assert(path); | |
41 | assert(xattr); | |
42 | ||
bd1791b5 | 43 | r = cg_get_xattr_malloc(path, xattr, &value); |
00675c36 | 44 | if (r < 0 && !ERRNO_IS_XATTR_ABSENT(r)) |
61ff7397 AZ |
45 | return r; |
46 | ||
47 | if (!isempty(value)) { | |
48 | r = safe_atou64(value, &curr_count); | |
49 | if (r < 0) | |
50 | return r; | |
51 | } | |
52 | ||
53 | if (curr_count > UINT64_MAX - num_procs_killed) | |
54 | return -EOVERFLOW; | |
55 | ||
56 | xsprintf(buf, "%"PRIu64, curr_count + num_procs_killed); | |
bd1791b5 | 57 | r = cg_set_xattr(path, xattr, buf, strlen(buf), 0); |
61ff7397 AZ |
58 | if (r < 0) |
59 | return r; | |
60 | ||
61 | return 0; | |
62 | } | |
63 | ||
64 | OomdCGroupContext *oomd_cgroup_context_free(OomdCGroupContext *ctx) { | |
65 | if (!ctx) | |
66 | return NULL; | |
67 | ||
68 | free(ctx->path); | |
69 | return mfree(ctx); | |
70 | } | |
71 | ||
72 | int oomd_pressure_above(Hashmap *h, usec_t duration, Set **ret) { | |
73 | _cleanup_set_free_ Set *targets = NULL; | |
74 | OomdCGroupContext *ctx; | |
75 | char *key; | |
76 | int r; | |
77 | ||
78 | assert(h); | |
79 | assert(ret); | |
80 | ||
81 | targets = set_new(NULL); | |
82 | if (!targets) | |
83 | return -ENOMEM; | |
84 | ||
85 | HASHMAP_FOREACH_KEY(ctx, key, h) { | |
86 | if (ctx->memory_pressure.avg10 > ctx->mem_pressure_limit) { | |
87 | usec_t diff; | |
88 | ||
69c8f025 AZ |
89 | if (ctx->mem_pressure_limit_hit_start == 0) |
90 | ctx->mem_pressure_limit_hit_start = now(CLOCK_MONOTONIC); | |
61ff7397 | 91 | |
69c8f025 | 92 | diff = now(CLOCK_MONOTONIC) - ctx->mem_pressure_limit_hit_start; |
61ff7397 AZ |
93 | if (diff >= duration) { |
94 | r = set_put(targets, ctx); | |
95 | if (r < 0) | |
96 | return -ENOMEM; | |
97 | } | |
98 | } else | |
69c8f025 | 99 | ctx->mem_pressure_limit_hit_start = 0; |
61ff7397 AZ |
100 | } |
101 | ||
102 | if (!set_isempty(targets)) { | |
103 | *ret = TAKE_PTR(targets); | |
104 | return 1; | |
105 | } | |
106 | ||
107 | *ret = NULL; | |
108 | return 0; | |
109 | } | |
110 | ||
37d8020c AZ |
111 | uint64_t oomd_pgscan_rate(const OomdCGroupContext *c) { |
112 | uint64_t last_pgscan; | |
113 | ||
114 | assert(c); | |
115 | ||
116 | /* If last_pgscan > pgscan, assume the cgroup was recreated and reset last_pgscan to zero. | |
117 | * pgscan is monotonic and in practice should not decrease (except in the recreation case). */ | |
118 | last_pgscan = c->last_pgscan; | |
119 | if (c->last_pgscan > c->pgscan) { | |
120 | log_debug("Last pgscan %"PRIu64" greater than current pgscan %"PRIu64" for %s. Using last pgscan of zero.", | |
121 | c->last_pgscan, c->pgscan, c->path); | |
122 | last_pgscan = 0; | |
123 | } | |
124 | ||
125 | return c->pgscan - last_pgscan; | |
126 | } | |
127 | ||
030bc91c | 128 | bool oomd_mem_available_below(const OomdSystemContext *ctx, int threshold_permyriad) { |
eeeaa422 AZ |
129 | uint64_t mem_threshold; |
130 | ||
131 | assert(ctx); | |
132 | assert(threshold_permyriad <= 10000); | |
133 | ||
134 | mem_threshold = ctx->mem_total * threshold_permyriad / (uint64_t) 10000; | |
e82acab4 | 135 | return LESS_BY(ctx->mem_total, ctx->mem_used) < mem_threshold; |
eeeaa422 AZ |
136 | } |
137 | ||
d06e7fb5 | 138 | bool oomd_swap_free_below(const OomdSystemContext *ctx, int threshold_permyriad) { |
61ff7397 AZ |
139 | uint64_t swap_threshold; |
140 | ||
141 | assert(ctx); | |
d06e7fb5 | 142 | assert(threshold_permyriad <= 10000); |
61ff7397 | 143 | |
d06e7fb5 | 144 | swap_threshold = ctx->swap_total * threshold_permyriad / (uint64_t) 10000; |
61ff7397 AZ |
145 | return (ctx->swap_total - ctx->swap_used) < swap_threshold; |
146 | } | |
147 | ||
652a4efb | 148 | int oomd_fetch_cgroup_oom_preference(OomdCGroupContext *ctx, const char *prefix) { |
66bc4f6f | 149 | uid_t uid; |
652a4efb NR |
150 | int r; |
151 | ||
152 | assert(ctx); | |
153 | ||
154 | prefix = empty_to_root(prefix); | |
155 | ||
156 | if (!path_startswith(ctx->path, prefix)) | |
157 | return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), | |
158 | "%s is not a descendant of %s", ctx->path, prefix); | |
159 | ||
bd1791b5 | 160 | r = cg_get_owner(ctx->path, &uid); |
652a4efb NR |
161 | if (r < 0) |
162 | return log_debug_errno(r, "Failed to get owner/group from %s: %m", ctx->path); | |
163 | ||
66bc4f6f NR |
164 | if (uid != 0) { |
165 | uid_t prefix_uid; | |
166 | ||
bd1791b5 | 167 | r = cg_get_owner(prefix, &prefix_uid); |
66bc4f6f NR |
168 | if (r < 0) |
169 | return log_debug_errno(r, "Failed to get owner/group from %s: %m", prefix); | |
170 | ||
171 | if (uid != prefix_uid) { | |
172 | ctx->preference = MANAGED_OOM_PREFERENCE_NONE; | |
173 | return 0; | |
174 | } | |
175 | } | |
176 | ||
177 | /* Ignore most errors when reading the xattr since it is usually unset and cgroup xattrs are only used | |
178 | * as an optional feature of systemd-oomd (and the system might not even support them). */ | |
bd1791b5 | 179 | r = cg_get_xattr_bool(ctx->path, "user.oomd_avoid"); |
66bc4f6f NR |
180 | if (r == -ENOMEM) |
181 | return log_oom_debug(); | |
182 | if (r < 0 && !ERRNO_IS_XATTR_ABSENT(r)) | |
183 | log_debug_errno(r, "Failed to get xattr user.oomd_avoid, ignoring: %m"); | |
184 | ctx->preference = r > 0 ? MANAGED_OOM_PREFERENCE_AVOID : ctx->preference; | |
652a4efb | 185 | |
bd1791b5 | 186 | r = cg_get_xattr_bool(ctx->path, "user.oomd_omit"); |
66bc4f6f NR |
187 | if (r == -ENOMEM) |
188 | return log_oom_debug(); | |
189 | if (r < 0 && !ERRNO_IS_XATTR_ABSENT(r)) | |
190 | log_debug_errno(r, "Failed to get xattr user.oomd_omit, ignoring: %m"); | |
191 | ctx->preference = r > 0 ? MANAGED_OOM_PREFERENCE_OMIT : ctx->preference; | |
652a4efb NR |
192 | |
193 | return 0; | |
194 | } | |
195 | ||
61ff7397 AZ |
196 | int oomd_sort_cgroup_contexts(Hashmap *h, oomd_compare_t compare_func, const char *prefix, OomdCGroupContext ***ret) { |
197 | _cleanup_free_ OomdCGroupContext **sorted = NULL; | |
198 | OomdCGroupContext *item; | |
199 | size_t k = 0; | |
652a4efb | 200 | int r; |
61ff7397 AZ |
201 | |
202 | assert(h); | |
203 | assert(compare_func); | |
204 | assert(ret); | |
205 | ||
206 | sorted = new0(OomdCGroupContext*, hashmap_size(h)); | |
207 | if (!sorted) | |
208 | return -ENOMEM; | |
209 | ||
210 | HASHMAP_FOREACH(item, h) { | |
59331b8e | 211 | /* Skip over cgroups that are not valid candidates or are explicitly marked for omission */ |
652a4efb NR |
212 | if (item->path && prefix && !path_startswith(item->path, prefix)) |
213 | continue; | |
214 | ||
215 | r = oomd_fetch_cgroup_oom_preference(item, prefix); | |
216 | if (r == -ENOMEM) | |
217 | return r; | |
218 | ||
219 | if (item->preference == MANAGED_OOM_PREFERENCE_OMIT) | |
61ff7397 AZ |
220 | continue; |
221 | ||
222 | sorted[k++] = item; | |
223 | } | |
224 | ||
225 | typesafe_qsort(sorted, k, compare_func); | |
226 | ||
227 | *ret = TAKE_PTR(sorted); | |
228 | ||
229 | assert(k <= INT_MAX); | |
230 | return (int) k; | |
231 | } | |
232 | ||
233 | int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) { | |
234 | _cleanup_set_free_ Set *pids_killed = NULL; | |
235 | int r; | |
236 | ||
237 | assert(path); | |
238 | ||
239 | if (dry_run) { | |
240 | _cleanup_free_ char *cg_path = NULL; | |
241 | ||
242 | r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &cg_path); | |
243 | if (r < 0) | |
244 | return r; | |
245 | ||
3b703840 | 246 | log_info("oomd dry-run: Would have tried to kill %s with recurse=%s", cg_path, true_false(recurse)); |
61ff7397 AZ |
247 | return 0; |
248 | } | |
249 | ||
250 | pids_killed = set_new(NULL); | |
251 | if (!pids_killed) | |
252 | return -ENOMEM; | |
253 | ||
38c41427 NK |
254 | r = increment_oomd_xattr(path, "user.oomd_ooms", 1); |
255 | if (r < 0) | |
256 | log_debug_errno(r, "Failed to set user.oomd_ooms before kill: %m"); | |
257 | ||
61ff7397 | 258 | if (recurse) |
bd1791b5 | 259 | r = cg_kill_recursive(path, SIGKILL, CGROUP_IGNORE_SELF, pids_killed, log_kill, NULL); |
61ff7397 | 260 | else |
bd1791b5 | 261 | r = cg_kill(path, SIGKILL, CGROUP_IGNORE_SELF, pids_killed, log_kill, NULL); |
2ee20946 AZ |
262 | |
263 | /* The cgroup could have been cleaned up after we have sent SIGKILL to all of the processes, but before | |
264 | * we could do one last iteration of cgroup.procs to check. Or the service unit could have exited and | |
265 | * was removed between picking candidates and coming into this function. In either case, let's log | |
266 | * about it let the caller decide what to do once they know how many PIDs were killed. */ | |
267 | if (IN_SET(r, -ENOENT, -ENODEV)) | |
268 | log_debug_errno(r, "Error when sending SIGKILL to processes in cgroup path %s, ignoring: %m", path); | |
269 | else if (r < 0) | |
61ff7397 AZ |
270 | return r; |
271 | ||
914d4e99 AZ |
272 | if (set_isempty(pids_killed)) |
273 | log_debug("Nothing killed when attempting to kill %s", path); | |
274 | ||
e3038333 | 275 | r = increment_oomd_xattr(path, "user.oomd_kill", set_size(pids_killed)); |
61ff7397 | 276 | if (r < 0) |
e3038333 | 277 | log_debug_errno(r, "Failed to set user.oomd_kill on kill: %m"); |
61ff7397 AZ |
278 | |
279 | return set_size(pids_killed) != 0; | |
280 | } | |
281 | ||
29f4185a BB |
282 | typedef void (*dump_candidate_func)(const OomdCGroupContext *ctx, FILE *f, const char *prefix); |
283 | ||
284 | static int dump_kill_candidates(OomdCGroupContext **sorted, int n, int dump_until, dump_candidate_func dump_func) { | |
2485b7e2 YW |
285 | _cleanup_(memstream_done) MemStream m = {}; |
286 | FILE *f; | |
287 | ||
29f4185a | 288 | /* Try dumping top offendors, ignoring any errors that might happen. */ |
29f4185a | 289 | |
2485b7e2 | 290 | f = memstream_init(&m); |
29f4185a | 291 | if (!f) |
2485b7e2 | 292 | return -ENOMEM; |
29f4185a BB |
293 | |
294 | fprintf(f, "Considered %d cgroups for killing, top candidates were:\n", n); | |
295 | for (int i = 0; i < dump_until; i++) | |
296 | dump_func(sorted[i], f, "\t"); | |
297 | ||
2485b7e2 | 298 | return memstream_dump(LOG_INFO, &m); |
29f4185a BB |
299 | } |
300 | ||
37a7e159 | 301 | int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char **ret_selected) { |
61ff7397 | 302 | _cleanup_free_ OomdCGroupContext **sorted = NULL; |
f94a80ab | 303 | int n, r, ret = 0; |
29f4185a | 304 | int dump_until; |
61ff7397 AZ |
305 | |
306 | assert(h); | |
37a7e159 | 307 | assert(ret_selected); |
61ff7397 | 308 | |
f94a80ab ZJS |
309 | n = oomd_sort_cgroup_contexts(h, compare_pgscan_rate_and_memory_usage, prefix, &sorted); |
310 | if (n < 0) | |
311 | return n; | |
61ff7397 | 312 | |
29f4185a | 313 | dump_until = MIN(n, DUMP_ON_KILL_COUNT); |
f94a80ab | 314 | for (int i = 0; i < n; i++) { |
37a7e159 AZ |
315 | /* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure. |
316 | * Continue since there might be "avoid" cgroups at the end. */ | |
74f834e9 | 317 | if (sorted[i]->pgscan == 0 && sorted[i]->current_memory_usage == 0) |
59331b8e | 318 | continue; |
61ff7397 | 319 | |
ebfb6019 | 320 | r = oomd_cgroup_kill(sorted[i]->path, /* recurse= */ true, /* dry_run= */ dry_run); |
37a7e159 AZ |
321 | if (r == -ENOMEM) |
322 | return r; /* Treat oom as a hard error */ | |
323 | if (r < 0) { | |
324 | if (ret == 0) | |
325 | ret = r; | |
326 | continue; /* Try to find something else to kill */ | |
327 | } | |
328 | ||
b814de30 | 329 | dump_until = MAX(dump_until, i + 1); |
37a7e159 AZ |
330 | char *selected = strdup(sorted[i]->path); |
331 | if (!selected) | |
332 | return -ENOMEM; | |
333 | *ret_selected = selected; | |
29f4185a BB |
334 | ret = r; |
335 | break; | |
61ff7397 AZ |
336 | } |
337 | ||
29f4185a BB |
338 | dump_kill_candidates(sorted, n, dump_until, oomd_dump_memory_pressure_cgroup_context); |
339 | ||
37a7e159 | 340 | return ret; |
61ff7397 AZ |
341 | } |
342 | ||
685b0985 | 343 | int oomd_kill_by_swap_usage(Hashmap *h, uint64_t threshold_usage, bool dry_run, char **ret_selected) { |
61ff7397 | 344 | _cleanup_free_ OomdCGroupContext **sorted = NULL; |
f94a80ab | 345 | int n, r, ret = 0; |
29f4185a | 346 | int dump_until; |
61ff7397 AZ |
347 | |
348 | assert(h); | |
37a7e159 | 349 | assert(ret_selected); |
61ff7397 | 350 | |
f94a80ab ZJS |
351 | n = oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted); |
352 | if (n < 0) | |
353 | return n; | |
61ff7397 | 354 | |
29f4185a | 355 | dump_until = MIN(n, DUMP_ON_KILL_COUNT); |
685b0985 AZ |
356 | /* Try to kill cgroups with non-zero swap usage until we either succeed in killing or we get to a cgroup with |
357 | * no swap usage. Threshold killing only cgroups with more than threshold swap usage. */ | |
f94a80ab | 358 | for (int i = 0; i < n; i++) { |
685b0985 AZ |
359 | /* Skip over cgroups with not enough swap usage. Don't break since there might be "avoid" |
360 | * cgroups at the end. */ | |
361 | if (sorted[i]->swap_usage <= threshold_usage) | |
59331b8e | 362 | continue; |
61ff7397 | 363 | |
ebfb6019 | 364 | r = oomd_cgroup_kill(sorted[i]->path, /* recurse= */ true, /* dry_run= */ dry_run); |
37a7e159 AZ |
365 | if (r == -ENOMEM) |
366 | return r; /* Treat oom as a hard error */ | |
367 | if (r < 0) { | |
368 | if (ret == 0) | |
369 | ret = r; | |
370 | continue; /* Try to find something else to kill */ | |
371 | } | |
372 | ||
b814de30 | 373 | dump_until = MAX(dump_until, i + 1); |
37a7e159 AZ |
374 | char *selected = strdup(sorted[i]->path); |
375 | if (!selected) | |
376 | return -ENOMEM; | |
377 | *ret_selected = selected; | |
29f4185a BB |
378 | ret = r; |
379 | break; | |
61ff7397 AZ |
380 | } |
381 | ||
29f4185a BB |
382 | dump_kill_candidates(sorted, n, dump_until, oomd_dump_swap_cgroup_context); |
383 | ||
37a7e159 | 384 | return ret; |
61ff7397 AZ |
385 | } |
386 | ||
387 | int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) { | |
388 | _cleanup_(oomd_cgroup_context_freep) OomdCGroupContext *ctx = NULL; | |
389 | _cleanup_free_ char *p = NULL, *val = NULL; | |
390 | bool is_root; | |
391 | int r; | |
392 | ||
393 | assert(path); | |
394 | assert(ret); | |
395 | ||
396 | ctx = new0(OomdCGroupContext, 1); | |
397 | if (!ctx) | |
398 | return -ENOMEM; | |
399 | ||
400 | is_root = empty_or_root(path); | |
59331b8e | 401 | ctx->preference = MANAGED_OOM_PREFERENCE_NONE; |
61ff7397 AZ |
402 | |
403 | r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "memory.pressure", &p); | |
404 | if (r < 0) | |
405 | return log_debug_errno(r, "Error getting cgroup memory pressure path from %s: %m", path); | |
406 | ||
407 | r = read_resource_pressure(p, PRESSURE_TYPE_FULL, &ctx->memory_pressure); | |
408 | if (r < 0) | |
409 | return log_debug_errno(r, "Error parsing memory pressure from %s: %m", p); | |
410 | ||
411 | if (is_root) { | |
412 | r = procfs_memory_get_used(&ctx->current_memory_usage); | |
413 | if (r < 0) | |
414 | return log_debug_errno(r, "Error getting memory used from procfs: %m"); | |
415 | } else { | |
416 | r = cg_get_attribute_as_uint64(SYSTEMD_CGROUP_CONTROLLER, path, "memory.current", &ctx->current_memory_usage); | |
417 | if (r < 0) | |
418 | return log_debug_errno(r, "Error getting memory.current from %s: %m", path); | |
419 | ||
420 | r = cg_get_attribute_as_uint64(SYSTEMD_CGROUP_CONTROLLER, path, "memory.min", &ctx->memory_min); | |
421 | if (r < 0) | |
422 | return log_debug_errno(r, "Error getting memory.min from %s: %m", path); | |
423 | ||
424 | r = cg_get_attribute_as_uint64(SYSTEMD_CGROUP_CONTROLLER, path, "memory.low", &ctx->memory_low); | |
425 | if (r < 0) | |
426 | return log_debug_errno(r, "Error getting memory.low from %s: %m", path); | |
427 | ||
428 | r = cg_get_attribute_as_uint64(SYSTEMD_CGROUP_CONTROLLER, path, "memory.swap.current", &ctx->swap_usage); | |
13540027 DS |
429 | if (r == -ENODATA) |
430 | /* The kernel can be compiled without support for memory.swap.* files, | |
431 | * or it can be disabled with boot param 'swapaccount=0' */ | |
432 | log_once(LOG_WARNING, "No kernel support for memory.swap.current from %s (try boot param swapaccount=1), ignoring.", path); | |
433 | else if (r < 0) | |
61ff7397 AZ |
434 | return log_debug_errno(r, "Error getting memory.swap.current from %s: %m", path); |
435 | ||
436 | r = cg_get_keyed_attribute(SYSTEMD_CGROUP_CONTROLLER, path, "memory.stat", STRV_MAKE("pgscan"), &val); | |
437 | if (r < 0) | |
438 | return log_debug_errno(r, "Error getting pgscan from memory.stat under %s: %m", path); | |
439 | ||
440 | r = safe_atou64(val, &ctx->pgscan); | |
441 | if (r < 0) | |
442 | return log_debug_errno(r, "Error converting pgscan value to uint64_t: %m"); | |
443 | } | |
444 | ||
445 | ctx->path = strdup(empty_to_root(path)); | |
446 | if (!ctx->path) | |
447 | return -ENOMEM; | |
448 | ||
449 | *ret = TAKE_PTR(ctx); | |
450 | return 0; | |
451 | } | |
452 | ||
47136b9d | 453 | int oomd_system_context_acquire(const char *proc_meminfo_path, OomdSystemContext *ret) { |
61ff7397 | 454 | _cleanup_fclose_ FILE *f = NULL; |
47136b9d | 455 | unsigned field_filled = 0; |
61ff7397 | 456 | OomdSystemContext ctx = {}; |
030bc91c | 457 | uint64_t mem_available, swap_free; |
61ff7397 AZ |
458 | int r; |
459 | ||
e82acab4 AZ |
460 | enum { |
461 | MEM_TOTAL = 1U << 0, | |
030bc91c | 462 | MEM_AVAILABLE = 1U << 1, |
e82acab4 AZ |
463 | SWAP_TOTAL = 1U << 2, |
464 | SWAP_FREE = 1U << 3, | |
030bc91c | 465 | ALL = MEM_TOTAL|MEM_AVAILABLE|SWAP_TOTAL|SWAP_FREE, |
e82acab4 AZ |
466 | }; |
467 | ||
47136b9d | 468 | assert(proc_meminfo_path); |
61ff7397 AZ |
469 | assert(ret); |
470 | ||
47136b9d | 471 | f = fopen(proc_meminfo_path, "re"); |
61ff7397 AZ |
472 | if (!f) |
473 | return -errno; | |
474 | ||
61ff7397 | 475 | for (;;) { |
47136b9d AZ |
476 | _cleanup_free_ char *line = NULL; |
477 | char *word; | |
61ff7397 | 478 | |
47136b9d AZ |
479 | r = read_line(f, LONG_LINE_MAX, &line); |
480 | if (r < 0) | |
481 | return r; | |
482 | if (r == 0) | |
483 | return -EINVAL; | |
484 | ||
eeeaa422 | 485 | if ((word = startswith(line, "MemTotal:"))) { |
e82acab4 | 486 | field_filled |= MEM_TOTAL; |
eeeaa422 | 487 | r = convert_meminfo_value_to_uint64_bytes(word, &ctx.mem_total); |
030bc91c NR |
488 | } else if ((word = startswith(line, "MemAvailable:"))) { |
489 | field_filled |= MEM_AVAILABLE; | |
490 | r = convert_meminfo_value_to_uint64_bytes(word, &mem_available); | |
eeeaa422 | 491 | } else if ((word = startswith(line, "SwapTotal:"))) { |
e82acab4 | 492 | field_filled |= SWAP_TOTAL; |
47136b9d AZ |
493 | r = convert_meminfo_value_to_uint64_bytes(word, &ctx.swap_total); |
494 | } else if ((word = startswith(line, "SwapFree:"))) { | |
e82acab4 | 495 | field_filled |= SWAP_FREE; |
47136b9d AZ |
496 | r = convert_meminfo_value_to_uint64_bytes(word, &swap_free); |
497 | } else | |
498 | continue; | |
61ff7397 | 499 | |
47136b9d AZ |
500 | if (r < 0) |
501 | return log_debug_errno(r, "Error converting '%s' from %s to uint64_t: %m", line, proc_meminfo_path); | |
61ff7397 | 502 | |
e82acab4 | 503 | if (field_filled == ALL) |
47136b9d AZ |
504 | break; |
505 | } | |
61ff7397 | 506 | |
e82acab4 | 507 | if (field_filled != ALL) |
47136b9d | 508 | return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "%s is missing expected fields", proc_meminfo_path); |
61ff7397 | 509 | |
030bc91c | 510 | if (mem_available > ctx.mem_total) |
eeeaa422 | 511 | return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), |
030bc91c NR |
512 | "MemAvailable (%" PRIu64 ") cannot be greater than MemTotal (%" PRIu64 ") %m", |
513 | mem_available, | |
eeeaa422 AZ |
514 | ctx.mem_total); |
515 | ||
47136b9d AZ |
516 | if (swap_free > ctx.swap_total) |
517 | return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), | |
518 | "SwapFree (%" PRIu64 ") cannot be greater than SwapTotal (%" PRIu64 ") %m", | |
519 | swap_free, | |
520 | ctx.swap_total); | |
521 | ||
030bc91c | 522 | ctx.mem_used = ctx.mem_total - mem_available; |
47136b9d | 523 | ctx.swap_used = ctx.swap_total - swap_free; |
61ff7397 AZ |
524 | |
525 | *ret = ctx; | |
526 | return 0; | |
527 | } | |
528 | ||
529 | int oomd_insert_cgroup_context(Hashmap *old_h, Hashmap *new_h, const char *path) { | |
530 | _cleanup_(oomd_cgroup_context_freep) OomdCGroupContext *curr_ctx = NULL; | |
45da27fa | 531 | OomdCGroupContext *old_ctx; |
61ff7397 AZ |
532 | int r; |
533 | ||
534 | assert(new_h); | |
535 | assert(path); | |
536 | ||
50c0578b AZ |
537 | path = empty_to_root(path); |
538 | ||
61ff7397 AZ |
539 | r = oomd_cgroup_context_acquire(path, &curr_ctx); |
540 | if (r < 0) | |
541 | return log_debug_errno(r, "Failed to get OomdCGroupContext for %s: %m", path); | |
542 | ||
50c0578b AZ |
543 | assert_se(streq(path, curr_ctx->path)); |
544 | ||
61ff7397 AZ |
545 | old_ctx = hashmap_get(old_h, path); |
546 | if (old_ctx) { | |
547 | curr_ctx->last_pgscan = old_ctx->pgscan; | |
548 | curr_ctx->mem_pressure_limit = old_ctx->mem_pressure_limit; | |
69c8f025 | 549 | curr_ctx->mem_pressure_limit_hit_start = old_ctx->mem_pressure_limit_hit_start; |
df637ede | 550 | curr_ctx->last_had_mem_reclaim = old_ctx->last_had_mem_reclaim; |
61ff7397 AZ |
551 | } |
552 | ||
df637ede AZ |
553 | if (oomd_pgscan_rate(curr_ctx) > 0) |
554 | curr_ctx->last_had_mem_reclaim = now(CLOCK_MONOTONIC); | |
555 | ||
45da27fa | 556 | r = hashmap_put(new_h, curr_ctx->path, curr_ctx); |
61ff7397 AZ |
557 | if (r < 0) |
558 | return r; | |
559 | ||
45da27fa | 560 | TAKE_PTR(curr_ctx); |
61ff7397 AZ |
561 | return 0; |
562 | } | |
5c616ecf | 563 | |
b037a6da AZ |
564 | void oomd_update_cgroup_contexts_between_hashmaps(Hashmap *old_h, Hashmap *curr_h) { |
565 | OomdCGroupContext *ctx; | |
566 | ||
567 | assert(old_h); | |
568 | assert(curr_h); | |
569 | ||
570 | HASHMAP_FOREACH(ctx, curr_h) { | |
571 | OomdCGroupContext *old_ctx; | |
572 | ||
573 | old_ctx = hashmap_get(old_h, ctx->path); | |
574 | if (!old_ctx) | |
575 | continue; | |
576 | ||
577 | ctx->last_pgscan = old_ctx->pgscan; | |
578 | ctx->mem_pressure_limit = old_ctx->mem_pressure_limit; | |
69c8f025 | 579 | ctx->mem_pressure_limit_hit_start = old_ctx->mem_pressure_limit_hit_start; |
df637ede AZ |
580 | ctx->last_had_mem_reclaim = old_ctx->last_had_mem_reclaim; |
581 | ||
582 | if (oomd_pgscan_rate(ctx) > 0) | |
583 | ctx->last_had_mem_reclaim = now(CLOCK_MONOTONIC); | |
b037a6da AZ |
584 | } |
585 | } | |
586 | ||
5c616ecf | 587 | void oomd_dump_swap_cgroup_context(const OomdCGroupContext *ctx, FILE *f, const char *prefix) { |
5c616ecf AZ |
588 | assert(ctx); |
589 | assert(f); | |
590 | ||
591 | if (!empty_or_root(ctx->path)) | |
592 | fprintf(f, | |
593 | "%sPath: %s\n" | |
594 | "%s\tSwap Usage: %s\n", | |
595 | strempty(prefix), ctx->path, | |
2b59bf51 | 596 | strempty(prefix), FORMAT_BYTES(ctx->swap_usage)); |
5c616ecf AZ |
597 | else |
598 | fprintf(f, | |
599 | "%sPath: %s\n" | |
600 | "%s\tSwap Usage: (see System Context)\n", | |
601 | strempty(prefix), ctx->path, | |
602 | strempty(prefix)); | |
603 | } | |
604 | ||
605 | void oomd_dump_memory_pressure_cgroup_context(const OomdCGroupContext *ctx, FILE *f, const char *prefix) { | |
5c616ecf AZ |
606 | assert(ctx); |
607 | assert(f); | |
608 | ||
609 | fprintf(f, | |
610 | "%sPath: %s\n" | |
0a9f9344 | 611 | "%s\tMemory Pressure Limit: %lu.%02lu%%\n" |
5c616ecf AZ |
612 | "%s\tPressure: Avg10: %lu.%02lu Avg60: %lu.%02lu Avg300: %lu.%02lu Total: %s\n" |
613 | "%s\tCurrent Memory Usage: %s\n", | |
614 | strempty(prefix), ctx->path, | |
3542da24 | 615 | strempty(prefix), LOADAVG_INT_SIDE(ctx->mem_pressure_limit), LOADAVG_DECIMAL_SIDE(ctx->mem_pressure_limit), |
5c616ecf | 616 | strempty(prefix), |
3542da24 LB |
617 | LOADAVG_INT_SIDE(ctx->memory_pressure.avg10), LOADAVG_DECIMAL_SIDE(ctx->memory_pressure.avg10), |
618 | LOADAVG_INT_SIDE(ctx->memory_pressure.avg60), LOADAVG_DECIMAL_SIDE(ctx->memory_pressure.avg60), | |
619 | LOADAVG_INT_SIDE(ctx->memory_pressure.avg300), LOADAVG_DECIMAL_SIDE(ctx->memory_pressure.avg300), | |
5291f26d | 620 | FORMAT_TIMESPAN(ctx->memory_pressure.total, USEC_PER_SEC), |
2b59bf51 | 621 | strempty(prefix), FORMAT_BYTES(ctx->current_memory_usage)); |
5c616ecf AZ |
622 | |
623 | if (!empty_or_root(ctx->path)) | |
624 | fprintf(f, | |
625 | "%s\tMemory Min: %s\n" | |
626 | "%s\tMemory Low: %s\n" | |
bb081240 AZ |
627 | "%s\tPgscan: %" PRIu64 "\n" |
628 | "%s\tLast Pgscan: %" PRIu64 "\n", | |
9ca7e3d0 ZJS |
629 | strempty(prefix), FORMAT_BYTES_CGROUP_PROTECTION(ctx->memory_min), |
630 | strempty(prefix), FORMAT_BYTES_CGROUP_PROTECTION(ctx->memory_low), | |
bb081240 AZ |
631 | strempty(prefix), ctx->pgscan, |
632 | strempty(prefix), ctx->last_pgscan); | |
5c616ecf AZ |
633 | } |
634 | ||
635 | void oomd_dump_system_context(const OomdSystemContext *ctx, FILE *f, const char *prefix) { | |
5c616ecf AZ |
636 | assert(ctx); |
637 | assert(f); | |
638 | ||
639 | fprintf(f, | |
eeeaa422 | 640 | "%sMemory: Used: %s Total: %s\n" |
5c616ecf AZ |
641 | "%sSwap: Used: %s Total: %s\n", |
642 | strempty(prefix), | |
2b59bf51 ZJS |
643 | FORMAT_BYTES(ctx->mem_used), |
644 | FORMAT_BYTES(ctx->mem_total), | |
eeeaa422 | 645 | strempty(prefix), |
2b59bf51 ZJS |
646 | FORMAT_BYTES(ctx->swap_used), |
647 | FORMAT_BYTES(ctx->swap_total)); | |
5c616ecf | 648 | } |