]>
Commit | Line | Data |
---|---|---|
db9ecf05 | 1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
61ff7397 AZ |
2 | |
3 | #include <sys/xattr.h> | |
4 | #include <unistd.h> | |
5 | ||
00675c36 | 6 | #include "errno-util.h" |
61ff7397 | 7 | #include "fd-util.h" |
47136b9d | 8 | #include "fileio.h" |
61ff7397 AZ |
9 | #include "format-util.h" |
10 | #include "oomd-util.h" | |
11 | #include "parse-util.h" | |
12 | #include "path-util.h" | |
13 | #include "procfs-util.h" | |
14 | #include "signal-util.h" | |
15 | #include "sort-util.h" | |
16 | #include "stat-util.h" | |
17 | #include "stdio-util.h" | |
652a4efb | 18 | #include "user-util.h" |
61ff7397 AZ |
19 | |
20 | DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR( | |
21 | oomd_cgroup_ctx_hash_ops, | |
22 | char, | |
23 | string_hash_func, | |
24 | string_compare_func, | |
25 | OomdCGroupContext, | |
26 | oomd_cgroup_context_free); | |
27 | ||
28 | static int log_kill(pid_t pid, int sig, void *userdata) { | |
29 | log_debug("oomd attempting to kill " PID_FMT " with %s", pid, signal_to_string(sig)); | |
30 | return 0; | |
31 | } | |
32 | ||
33 | static int increment_oomd_xattr(const char *path, const char *xattr, uint64_t num_procs_killed) { | |
34 | _cleanup_free_ char *value = NULL; | |
35 | char buf[DECIMAL_STR_MAX(uint64_t) + 1]; | |
36 | uint64_t curr_count = 0; | |
37 | int r; | |
38 | ||
39 | assert(path); | |
40 | assert(xattr); | |
41 | ||
42 | r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, path, xattr, &value); | |
00675c36 | 43 | if (r < 0 && !ERRNO_IS_XATTR_ABSENT(r)) |
61ff7397 AZ |
44 | return r; |
45 | ||
46 | if (!isempty(value)) { | |
47 | r = safe_atou64(value, &curr_count); | |
48 | if (r < 0) | |
49 | return r; | |
50 | } | |
51 | ||
52 | if (curr_count > UINT64_MAX - num_procs_killed) | |
53 | return -EOVERFLOW; | |
54 | ||
55 | xsprintf(buf, "%"PRIu64, curr_count + num_procs_killed); | |
56 | r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, path, xattr, buf, strlen(buf), 0); | |
57 | if (r < 0) | |
58 | return r; | |
59 | ||
60 | return 0; | |
61 | } | |
62 | ||
63 | OomdCGroupContext *oomd_cgroup_context_free(OomdCGroupContext *ctx) { | |
64 | if (!ctx) | |
65 | return NULL; | |
66 | ||
67 | free(ctx->path); | |
68 | return mfree(ctx); | |
69 | } | |
70 | ||
71 | int oomd_pressure_above(Hashmap *h, usec_t duration, Set **ret) { | |
72 | _cleanup_set_free_ Set *targets = NULL; | |
73 | OomdCGroupContext *ctx; | |
74 | char *key; | |
75 | int r; | |
76 | ||
77 | assert(h); | |
78 | assert(ret); | |
79 | ||
80 | targets = set_new(NULL); | |
81 | if (!targets) | |
82 | return -ENOMEM; | |
83 | ||
84 | HASHMAP_FOREACH_KEY(ctx, key, h) { | |
85 | if (ctx->memory_pressure.avg10 > ctx->mem_pressure_limit) { | |
86 | usec_t diff; | |
87 | ||
69c8f025 AZ |
88 | if (ctx->mem_pressure_limit_hit_start == 0) |
89 | ctx->mem_pressure_limit_hit_start = now(CLOCK_MONOTONIC); | |
61ff7397 | 90 | |
69c8f025 | 91 | diff = now(CLOCK_MONOTONIC) - ctx->mem_pressure_limit_hit_start; |
61ff7397 AZ |
92 | if (diff >= duration) { |
93 | r = set_put(targets, ctx); | |
94 | if (r < 0) | |
95 | return -ENOMEM; | |
96 | } | |
97 | } else | |
69c8f025 | 98 | ctx->mem_pressure_limit_hit_start = 0; |
61ff7397 AZ |
99 | } |
100 | ||
101 | if (!set_isempty(targets)) { | |
102 | *ret = TAKE_PTR(targets); | |
103 | return 1; | |
104 | } | |
105 | ||
106 | *ret = NULL; | |
107 | return 0; | |
108 | } | |
109 | ||
37d8020c AZ |
110 | uint64_t oomd_pgscan_rate(const OomdCGroupContext *c) { |
111 | uint64_t last_pgscan; | |
112 | ||
113 | assert(c); | |
114 | ||
115 | /* If last_pgscan > pgscan, assume the cgroup was recreated and reset last_pgscan to zero. | |
116 | * pgscan is monotonic and in practice should not decrease (except in the recreation case). */ | |
117 | last_pgscan = c->last_pgscan; | |
118 | if (c->last_pgscan > c->pgscan) { | |
119 | log_debug("Last pgscan %"PRIu64" greater than current pgscan %"PRIu64" for %s. Using last pgscan of zero.", | |
120 | c->last_pgscan, c->pgscan, c->path); | |
121 | last_pgscan = 0; | |
122 | } | |
123 | ||
124 | return c->pgscan - last_pgscan; | |
125 | } | |
126 | ||
030bc91c | 127 | bool oomd_mem_available_below(const OomdSystemContext *ctx, int threshold_permyriad) { |
eeeaa422 AZ |
128 | uint64_t mem_threshold; |
129 | ||
130 | assert(ctx); | |
131 | assert(threshold_permyriad <= 10000); | |
132 | ||
133 | mem_threshold = ctx->mem_total * threshold_permyriad / (uint64_t) 10000; | |
e82acab4 | 134 | return LESS_BY(ctx->mem_total, ctx->mem_used) < mem_threshold; |
eeeaa422 AZ |
135 | } |
136 | ||
d06e7fb5 | 137 | bool oomd_swap_free_below(const OomdSystemContext *ctx, int threshold_permyriad) { |
61ff7397 AZ |
138 | uint64_t swap_threshold; |
139 | ||
140 | assert(ctx); | |
d06e7fb5 | 141 | assert(threshold_permyriad <= 10000); |
61ff7397 | 142 | |
d06e7fb5 | 143 | swap_threshold = ctx->swap_total * threshold_permyriad / (uint64_t) 10000; |
61ff7397 AZ |
144 | return (ctx->swap_total - ctx->swap_used) < swap_threshold; |
145 | } | |
146 | ||
652a4efb | 147 | int oomd_fetch_cgroup_oom_preference(OomdCGroupContext *ctx, const char *prefix) { |
66bc4f6f | 148 | uid_t uid; |
652a4efb NR |
149 | int r; |
150 | ||
151 | assert(ctx); | |
152 | ||
153 | prefix = empty_to_root(prefix); | |
154 | ||
155 | if (!path_startswith(ctx->path, prefix)) | |
156 | return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), | |
157 | "%s is not a descendant of %s", ctx->path, prefix); | |
158 | ||
159 | r = cg_get_owner(SYSTEMD_CGROUP_CONTROLLER, ctx->path, &uid); | |
160 | if (r < 0) | |
161 | return log_debug_errno(r, "Failed to get owner/group from %s: %m", ctx->path); | |
162 | ||
66bc4f6f NR |
163 | if (uid != 0) { |
164 | uid_t prefix_uid; | |
165 | ||
166 | r = cg_get_owner(SYSTEMD_CGROUP_CONTROLLER, prefix, &prefix_uid); | |
167 | if (r < 0) | |
168 | return log_debug_errno(r, "Failed to get owner/group from %s: %m", prefix); | |
169 | ||
170 | if (uid != prefix_uid) { | |
171 | ctx->preference = MANAGED_OOM_PREFERENCE_NONE; | |
172 | return 0; | |
173 | } | |
174 | } | |
175 | ||
176 | /* Ignore most errors when reading the xattr since it is usually unset and cgroup xattrs are only used | |
177 | * as an optional feature of systemd-oomd (and the system might not even support them). */ | |
178 | r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, ctx->path, "user.oomd_avoid"); | |
179 | if (r == -ENOMEM) | |
180 | return log_oom_debug(); | |
181 | if (r < 0 && !ERRNO_IS_XATTR_ABSENT(r)) | |
182 | log_debug_errno(r, "Failed to get xattr user.oomd_avoid, ignoring: %m"); | |
183 | ctx->preference = r > 0 ? MANAGED_OOM_PREFERENCE_AVOID : ctx->preference; | |
652a4efb | 184 | |
66bc4f6f NR |
185 | r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, ctx->path, "user.oomd_omit"); |
186 | if (r == -ENOMEM) | |
187 | return log_oom_debug(); | |
188 | if (r < 0 && !ERRNO_IS_XATTR_ABSENT(r)) | |
189 | log_debug_errno(r, "Failed to get xattr user.oomd_omit, ignoring: %m"); | |
190 | ctx->preference = r > 0 ? MANAGED_OOM_PREFERENCE_OMIT : ctx->preference; | |
652a4efb NR |
191 | |
192 | return 0; | |
193 | } | |
194 | ||
61ff7397 AZ |
195 | int oomd_sort_cgroup_contexts(Hashmap *h, oomd_compare_t compare_func, const char *prefix, OomdCGroupContext ***ret) { |
196 | _cleanup_free_ OomdCGroupContext **sorted = NULL; | |
197 | OomdCGroupContext *item; | |
198 | size_t k = 0; | |
652a4efb | 199 | int r; |
61ff7397 AZ |
200 | |
201 | assert(h); | |
202 | assert(compare_func); | |
203 | assert(ret); | |
204 | ||
205 | sorted = new0(OomdCGroupContext*, hashmap_size(h)); | |
206 | if (!sorted) | |
207 | return -ENOMEM; | |
208 | ||
209 | HASHMAP_FOREACH(item, h) { | |
59331b8e | 210 | /* Skip over cgroups that are not valid candidates or are explicitly marked for omission */ |
652a4efb NR |
211 | if (item->path && prefix && !path_startswith(item->path, prefix)) |
212 | continue; | |
213 | ||
214 | r = oomd_fetch_cgroup_oom_preference(item, prefix); | |
215 | if (r == -ENOMEM) | |
216 | return r; | |
217 | ||
218 | if (item->preference == MANAGED_OOM_PREFERENCE_OMIT) | |
61ff7397 AZ |
219 | continue; |
220 | ||
221 | sorted[k++] = item; | |
222 | } | |
223 | ||
224 | typesafe_qsort(sorted, k, compare_func); | |
225 | ||
226 | *ret = TAKE_PTR(sorted); | |
227 | ||
228 | assert(k <= INT_MAX); | |
229 | return (int) k; | |
230 | } | |
231 | ||
232 | int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) { | |
233 | _cleanup_set_free_ Set *pids_killed = NULL; | |
234 | int r; | |
235 | ||
236 | assert(path); | |
237 | ||
238 | if (dry_run) { | |
239 | _cleanup_free_ char *cg_path = NULL; | |
240 | ||
241 | r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &cg_path); | |
242 | if (r < 0) | |
243 | return r; | |
244 | ||
3b703840 | 245 | log_info("oomd dry-run: Would have tried to kill %s with recurse=%s", cg_path, true_false(recurse)); |
61ff7397 AZ |
246 | return 0; |
247 | } | |
248 | ||
249 | pids_killed = set_new(NULL); | |
250 | if (!pids_killed) | |
251 | return -ENOMEM; | |
252 | ||
38c41427 NK |
253 | r = increment_oomd_xattr(path, "user.oomd_ooms", 1); |
254 | if (r < 0) | |
255 | log_debug_errno(r, "Failed to set user.oomd_ooms before kill: %m"); | |
256 | ||
61ff7397 AZ |
257 | if (recurse) |
258 | r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, path, SIGKILL, CGROUP_IGNORE_SELF, pids_killed, log_kill, NULL); | |
259 | else | |
260 | r = cg_kill(SYSTEMD_CGROUP_CONTROLLER, path, SIGKILL, CGROUP_IGNORE_SELF, pids_killed, log_kill, NULL); | |
2ee20946 AZ |
261 | |
262 | /* The cgroup could have been cleaned up after we have sent SIGKILL to all of the processes, but before | |
263 | * we could do one last iteration of cgroup.procs to check. Or the service unit could have exited and | |
264 | * was removed between picking candidates and coming into this function. In either case, let's log | |
265 | * about it let the caller decide what to do once they know how many PIDs were killed. */ | |
266 | if (IN_SET(r, -ENOENT, -ENODEV)) | |
267 | log_debug_errno(r, "Error when sending SIGKILL to processes in cgroup path %s, ignoring: %m", path); | |
268 | else if (r < 0) | |
61ff7397 AZ |
269 | return r; |
270 | ||
914d4e99 AZ |
271 | if (set_isempty(pids_killed)) |
272 | log_debug("Nothing killed when attempting to kill %s", path); | |
273 | ||
e3038333 | 274 | r = increment_oomd_xattr(path, "user.oomd_kill", set_size(pids_killed)); |
61ff7397 | 275 | if (r < 0) |
e3038333 | 276 | log_debug_errno(r, "Failed to set user.oomd_kill on kill: %m"); |
61ff7397 AZ |
277 | |
278 | return set_size(pids_killed) != 0; | |
279 | } | |
280 | ||
29f4185a BB |
281 | typedef void (*dump_candidate_func)(const OomdCGroupContext *ctx, FILE *f, const char *prefix); |
282 | ||
283 | static int dump_kill_candidates(OomdCGroupContext **sorted, int n, int dump_until, dump_candidate_func dump_func) { | |
284 | /* Try dumping top offendors, ignoring any errors that might happen. */ | |
285 | _cleanup_free_ char *dump = NULL; | |
286 | _cleanup_fclose_ FILE *f = NULL; | |
287 | int r; | |
288 | size_t size; | |
289 | ||
290 | f = open_memstream_unlocked(&dump, &size); | |
291 | if (!f) | |
100abbc6 | 292 | return -errno; |
29f4185a BB |
293 | |
294 | fprintf(f, "Considered %d cgroups for killing, top candidates were:\n", n); | |
295 | for (int i = 0; i < dump_until; i++) | |
296 | dump_func(sorted[i], f, "\t"); | |
297 | ||
298 | r = fflush_and_check(f); | |
299 | if (r < 0) | |
300 | return r; | |
301 | ||
29f4185a BB |
302 | return log_dump(LOG_INFO, dump); |
303 | } | |
304 | ||
37a7e159 | 305 | int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char **ret_selected) { |
61ff7397 | 306 | _cleanup_free_ OomdCGroupContext **sorted = NULL; |
f94a80ab | 307 | int n, r, ret = 0; |
29f4185a | 308 | int dump_until; |
61ff7397 AZ |
309 | |
310 | assert(h); | |
37a7e159 | 311 | assert(ret_selected); |
61ff7397 | 312 | |
f94a80ab ZJS |
313 | n = oomd_sort_cgroup_contexts(h, compare_pgscan_rate_and_memory_usage, prefix, &sorted); |
314 | if (n < 0) | |
315 | return n; | |
61ff7397 | 316 | |
29f4185a | 317 | dump_until = MIN(n, DUMP_ON_KILL_COUNT); |
f94a80ab | 318 | for (int i = 0; i < n; i++) { |
37a7e159 AZ |
319 | /* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure. |
320 | * Continue since there might be "avoid" cgroups at the end. */ | |
74f834e9 | 321 | if (sorted[i]->pgscan == 0 && sorted[i]->current_memory_usage == 0) |
59331b8e | 322 | continue; |
61ff7397 AZ |
323 | |
324 | r = oomd_cgroup_kill(sorted[i]->path, true, dry_run); | |
37a7e159 AZ |
325 | if (r == -ENOMEM) |
326 | return r; /* Treat oom as a hard error */ | |
327 | if (r < 0) { | |
328 | if (ret == 0) | |
329 | ret = r; | |
330 | continue; /* Try to find something else to kill */ | |
331 | } | |
332 | ||
b814de30 | 333 | dump_until = MAX(dump_until, i + 1); |
37a7e159 AZ |
334 | char *selected = strdup(sorted[i]->path); |
335 | if (!selected) | |
336 | return -ENOMEM; | |
337 | *ret_selected = selected; | |
29f4185a BB |
338 | ret = r; |
339 | break; | |
61ff7397 AZ |
340 | } |
341 | ||
29f4185a BB |
342 | dump_kill_candidates(sorted, n, dump_until, oomd_dump_memory_pressure_cgroup_context); |
343 | ||
37a7e159 | 344 | return ret; |
61ff7397 AZ |
345 | } |
346 | ||
685b0985 | 347 | int oomd_kill_by_swap_usage(Hashmap *h, uint64_t threshold_usage, bool dry_run, char **ret_selected) { |
61ff7397 | 348 | _cleanup_free_ OomdCGroupContext **sorted = NULL; |
f94a80ab | 349 | int n, r, ret = 0; |
29f4185a | 350 | int dump_until; |
61ff7397 AZ |
351 | |
352 | assert(h); | |
37a7e159 | 353 | assert(ret_selected); |
61ff7397 | 354 | |
f94a80ab ZJS |
355 | n = oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted); |
356 | if (n < 0) | |
357 | return n; | |
61ff7397 | 358 | |
29f4185a | 359 | dump_until = MIN(n, DUMP_ON_KILL_COUNT); |
685b0985 AZ |
360 | /* Try to kill cgroups with non-zero swap usage until we either succeed in killing or we get to a cgroup with |
361 | * no swap usage. Threshold killing only cgroups with more than threshold swap usage. */ | |
f94a80ab | 362 | for (int i = 0; i < n; i++) { |
685b0985 AZ |
363 | /* Skip over cgroups with not enough swap usage. Don't break since there might be "avoid" |
364 | * cgroups at the end. */ | |
365 | if (sorted[i]->swap_usage <= threshold_usage) | |
59331b8e | 366 | continue; |
61ff7397 AZ |
367 | |
368 | r = oomd_cgroup_kill(sorted[i]->path, true, dry_run); | |
37a7e159 AZ |
369 | if (r == -ENOMEM) |
370 | return r; /* Treat oom as a hard error */ | |
371 | if (r < 0) { | |
372 | if (ret == 0) | |
373 | ret = r; | |
374 | continue; /* Try to find something else to kill */ | |
375 | } | |
376 | ||
b814de30 | 377 | dump_until = MAX(dump_until, i + 1); |
37a7e159 AZ |
378 | char *selected = strdup(sorted[i]->path); |
379 | if (!selected) | |
380 | return -ENOMEM; | |
381 | *ret_selected = selected; | |
29f4185a BB |
382 | ret = r; |
383 | break; | |
61ff7397 AZ |
384 | } |
385 | ||
29f4185a BB |
386 | dump_kill_candidates(sorted, n, dump_until, oomd_dump_swap_cgroup_context); |
387 | ||
37a7e159 | 388 | return ret; |
61ff7397 AZ |
389 | } |
390 | ||
391 | int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) { | |
392 | _cleanup_(oomd_cgroup_context_freep) OomdCGroupContext *ctx = NULL; | |
393 | _cleanup_free_ char *p = NULL, *val = NULL; | |
394 | bool is_root; | |
395 | int r; | |
396 | ||
397 | assert(path); | |
398 | assert(ret); | |
399 | ||
400 | ctx = new0(OomdCGroupContext, 1); | |
401 | if (!ctx) | |
402 | return -ENOMEM; | |
403 | ||
404 | is_root = empty_or_root(path); | |
59331b8e | 405 | ctx->preference = MANAGED_OOM_PREFERENCE_NONE; |
61ff7397 AZ |
406 | |
407 | r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "memory.pressure", &p); | |
408 | if (r < 0) | |
409 | return log_debug_errno(r, "Error getting cgroup memory pressure path from %s: %m", path); | |
410 | ||
411 | r = read_resource_pressure(p, PRESSURE_TYPE_FULL, &ctx->memory_pressure); | |
412 | if (r < 0) | |
413 | return log_debug_errno(r, "Error parsing memory pressure from %s: %m", p); | |
414 | ||
415 | if (is_root) { | |
416 | r = procfs_memory_get_used(&ctx->current_memory_usage); | |
417 | if (r < 0) | |
418 | return log_debug_errno(r, "Error getting memory used from procfs: %m"); | |
419 | } else { | |
420 | r = cg_get_attribute_as_uint64(SYSTEMD_CGROUP_CONTROLLER, path, "memory.current", &ctx->current_memory_usage); | |
421 | if (r < 0) | |
422 | return log_debug_errno(r, "Error getting memory.current from %s: %m", path); | |
423 | ||
424 | r = cg_get_attribute_as_uint64(SYSTEMD_CGROUP_CONTROLLER, path, "memory.min", &ctx->memory_min); | |
425 | if (r < 0) | |
426 | return log_debug_errno(r, "Error getting memory.min from %s: %m", path); | |
427 | ||
428 | r = cg_get_attribute_as_uint64(SYSTEMD_CGROUP_CONTROLLER, path, "memory.low", &ctx->memory_low); | |
429 | if (r < 0) | |
430 | return log_debug_errno(r, "Error getting memory.low from %s: %m", path); | |
431 | ||
432 | r = cg_get_attribute_as_uint64(SYSTEMD_CGROUP_CONTROLLER, path, "memory.swap.current", &ctx->swap_usage); | |
13540027 DS |
433 | if (r == -ENODATA) |
434 | /* The kernel can be compiled without support for memory.swap.* files, | |
435 | * or it can be disabled with boot param 'swapaccount=0' */ | |
436 | log_once(LOG_WARNING, "No kernel support for memory.swap.current from %s (try boot param swapaccount=1), ignoring.", path); | |
437 | else if (r < 0) | |
61ff7397 AZ |
438 | return log_debug_errno(r, "Error getting memory.swap.current from %s: %m", path); |
439 | ||
440 | r = cg_get_keyed_attribute(SYSTEMD_CGROUP_CONTROLLER, path, "memory.stat", STRV_MAKE("pgscan"), &val); | |
441 | if (r < 0) | |
442 | return log_debug_errno(r, "Error getting pgscan from memory.stat under %s: %m", path); | |
443 | ||
444 | r = safe_atou64(val, &ctx->pgscan); | |
445 | if (r < 0) | |
446 | return log_debug_errno(r, "Error converting pgscan value to uint64_t: %m"); | |
447 | } | |
448 | ||
449 | ctx->path = strdup(empty_to_root(path)); | |
450 | if (!ctx->path) | |
451 | return -ENOMEM; | |
452 | ||
453 | *ret = TAKE_PTR(ctx); | |
454 | return 0; | |
455 | } | |
456 | ||
47136b9d | 457 | int oomd_system_context_acquire(const char *proc_meminfo_path, OomdSystemContext *ret) { |
61ff7397 | 458 | _cleanup_fclose_ FILE *f = NULL; |
47136b9d | 459 | unsigned field_filled = 0; |
61ff7397 | 460 | OomdSystemContext ctx = {}; |
030bc91c | 461 | uint64_t mem_available, swap_free; |
61ff7397 AZ |
462 | int r; |
463 | ||
e82acab4 AZ |
464 | enum { |
465 | MEM_TOTAL = 1U << 0, | |
030bc91c | 466 | MEM_AVAILABLE = 1U << 1, |
e82acab4 AZ |
467 | SWAP_TOTAL = 1U << 2, |
468 | SWAP_FREE = 1U << 3, | |
030bc91c | 469 | ALL = MEM_TOTAL|MEM_AVAILABLE|SWAP_TOTAL|SWAP_FREE, |
e82acab4 AZ |
470 | }; |
471 | ||
47136b9d | 472 | assert(proc_meminfo_path); |
61ff7397 AZ |
473 | assert(ret); |
474 | ||
47136b9d | 475 | f = fopen(proc_meminfo_path, "re"); |
61ff7397 AZ |
476 | if (!f) |
477 | return -errno; | |
478 | ||
61ff7397 | 479 | for (;;) { |
47136b9d AZ |
480 | _cleanup_free_ char *line = NULL; |
481 | char *word; | |
61ff7397 | 482 | |
47136b9d AZ |
483 | r = read_line(f, LONG_LINE_MAX, &line); |
484 | if (r < 0) | |
485 | return r; | |
486 | if (r == 0) | |
487 | return -EINVAL; | |
488 | ||
eeeaa422 | 489 | if ((word = startswith(line, "MemTotal:"))) { |
e82acab4 | 490 | field_filled |= MEM_TOTAL; |
eeeaa422 | 491 | r = convert_meminfo_value_to_uint64_bytes(word, &ctx.mem_total); |
030bc91c NR |
492 | } else if ((word = startswith(line, "MemAvailable:"))) { |
493 | field_filled |= MEM_AVAILABLE; | |
494 | r = convert_meminfo_value_to_uint64_bytes(word, &mem_available); | |
eeeaa422 | 495 | } else if ((word = startswith(line, "SwapTotal:"))) { |
e82acab4 | 496 | field_filled |= SWAP_TOTAL; |
47136b9d AZ |
497 | r = convert_meminfo_value_to_uint64_bytes(word, &ctx.swap_total); |
498 | } else if ((word = startswith(line, "SwapFree:"))) { | |
e82acab4 | 499 | field_filled |= SWAP_FREE; |
47136b9d AZ |
500 | r = convert_meminfo_value_to_uint64_bytes(word, &swap_free); |
501 | } else | |
502 | continue; | |
61ff7397 | 503 | |
47136b9d AZ |
504 | if (r < 0) |
505 | return log_debug_errno(r, "Error converting '%s' from %s to uint64_t: %m", line, proc_meminfo_path); | |
61ff7397 | 506 | |
e82acab4 | 507 | if (field_filled == ALL) |
47136b9d AZ |
508 | break; |
509 | } | |
61ff7397 | 510 | |
e82acab4 | 511 | if (field_filled != ALL) |
47136b9d | 512 | return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "%s is missing expected fields", proc_meminfo_path); |
61ff7397 | 513 | |
030bc91c | 514 | if (mem_available > ctx.mem_total) |
eeeaa422 | 515 | return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), |
030bc91c NR |
516 | "MemAvailable (%" PRIu64 ") cannot be greater than MemTotal (%" PRIu64 ") %m", |
517 | mem_available, | |
eeeaa422 AZ |
518 | ctx.mem_total); |
519 | ||
47136b9d AZ |
520 | if (swap_free > ctx.swap_total) |
521 | return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), | |
522 | "SwapFree (%" PRIu64 ") cannot be greater than SwapTotal (%" PRIu64 ") %m", | |
523 | swap_free, | |
524 | ctx.swap_total); | |
525 | ||
030bc91c | 526 | ctx.mem_used = ctx.mem_total - mem_available; |
47136b9d | 527 | ctx.swap_used = ctx.swap_total - swap_free; |
61ff7397 AZ |
528 | |
529 | *ret = ctx; | |
530 | return 0; | |
531 | } | |
532 | ||
533 | int oomd_insert_cgroup_context(Hashmap *old_h, Hashmap *new_h, const char *path) { | |
534 | _cleanup_(oomd_cgroup_context_freep) OomdCGroupContext *curr_ctx = NULL; | |
45da27fa | 535 | OomdCGroupContext *old_ctx; |
61ff7397 AZ |
536 | int r; |
537 | ||
538 | assert(new_h); | |
539 | assert(path); | |
540 | ||
50c0578b AZ |
541 | path = empty_to_root(path); |
542 | ||
61ff7397 AZ |
543 | r = oomd_cgroup_context_acquire(path, &curr_ctx); |
544 | if (r < 0) | |
545 | return log_debug_errno(r, "Failed to get OomdCGroupContext for %s: %m", path); | |
546 | ||
50c0578b AZ |
547 | assert_se(streq(path, curr_ctx->path)); |
548 | ||
61ff7397 AZ |
549 | old_ctx = hashmap_get(old_h, path); |
550 | if (old_ctx) { | |
551 | curr_ctx->last_pgscan = old_ctx->pgscan; | |
552 | curr_ctx->mem_pressure_limit = old_ctx->mem_pressure_limit; | |
69c8f025 | 553 | curr_ctx->mem_pressure_limit_hit_start = old_ctx->mem_pressure_limit_hit_start; |
df637ede | 554 | curr_ctx->last_had_mem_reclaim = old_ctx->last_had_mem_reclaim; |
61ff7397 AZ |
555 | } |
556 | ||
df637ede AZ |
557 | if (oomd_pgscan_rate(curr_ctx) > 0) |
558 | curr_ctx->last_had_mem_reclaim = now(CLOCK_MONOTONIC); | |
559 | ||
45da27fa | 560 | r = hashmap_put(new_h, curr_ctx->path, curr_ctx); |
61ff7397 AZ |
561 | if (r < 0) |
562 | return r; | |
563 | ||
45da27fa | 564 | TAKE_PTR(curr_ctx); |
61ff7397 AZ |
565 | return 0; |
566 | } | |
5c616ecf | 567 | |
b037a6da AZ |
568 | void oomd_update_cgroup_contexts_between_hashmaps(Hashmap *old_h, Hashmap *curr_h) { |
569 | OomdCGroupContext *ctx; | |
570 | ||
571 | assert(old_h); | |
572 | assert(curr_h); | |
573 | ||
574 | HASHMAP_FOREACH(ctx, curr_h) { | |
575 | OomdCGroupContext *old_ctx; | |
576 | ||
577 | old_ctx = hashmap_get(old_h, ctx->path); | |
578 | if (!old_ctx) | |
579 | continue; | |
580 | ||
581 | ctx->last_pgscan = old_ctx->pgscan; | |
582 | ctx->mem_pressure_limit = old_ctx->mem_pressure_limit; | |
69c8f025 | 583 | ctx->mem_pressure_limit_hit_start = old_ctx->mem_pressure_limit_hit_start; |
df637ede AZ |
584 | ctx->last_had_mem_reclaim = old_ctx->last_had_mem_reclaim; |
585 | ||
586 | if (oomd_pgscan_rate(ctx) > 0) | |
587 | ctx->last_had_mem_reclaim = now(CLOCK_MONOTONIC); | |
b037a6da AZ |
588 | } |
589 | } | |
590 | ||
5c616ecf | 591 | void oomd_dump_swap_cgroup_context(const OomdCGroupContext *ctx, FILE *f, const char *prefix) { |
5c616ecf AZ |
592 | assert(ctx); |
593 | assert(f); | |
594 | ||
595 | if (!empty_or_root(ctx->path)) | |
596 | fprintf(f, | |
597 | "%sPath: %s\n" | |
598 | "%s\tSwap Usage: %s\n", | |
599 | strempty(prefix), ctx->path, | |
2b59bf51 | 600 | strempty(prefix), FORMAT_BYTES(ctx->swap_usage)); |
5c616ecf AZ |
601 | else |
602 | fprintf(f, | |
603 | "%sPath: %s\n" | |
604 | "%s\tSwap Usage: (see System Context)\n", | |
605 | strempty(prefix), ctx->path, | |
606 | strempty(prefix)); | |
607 | } | |
608 | ||
609 | void oomd_dump_memory_pressure_cgroup_context(const OomdCGroupContext *ctx, FILE *f, const char *prefix) { | |
5c616ecf AZ |
610 | assert(ctx); |
611 | assert(f); | |
612 | ||
613 | fprintf(f, | |
614 | "%sPath: %s\n" | |
0a9f9344 | 615 | "%s\tMemory Pressure Limit: %lu.%02lu%%\n" |
5c616ecf AZ |
616 | "%s\tPressure: Avg10: %lu.%02lu Avg60: %lu.%02lu Avg300: %lu.%02lu Total: %s\n" |
617 | "%s\tCurrent Memory Usage: %s\n", | |
618 | strempty(prefix), ctx->path, | |
3542da24 | 619 | strempty(prefix), LOADAVG_INT_SIDE(ctx->mem_pressure_limit), LOADAVG_DECIMAL_SIDE(ctx->mem_pressure_limit), |
5c616ecf | 620 | strempty(prefix), |
3542da24 LB |
621 | LOADAVG_INT_SIDE(ctx->memory_pressure.avg10), LOADAVG_DECIMAL_SIDE(ctx->memory_pressure.avg10), |
622 | LOADAVG_INT_SIDE(ctx->memory_pressure.avg60), LOADAVG_DECIMAL_SIDE(ctx->memory_pressure.avg60), | |
623 | LOADAVG_INT_SIDE(ctx->memory_pressure.avg300), LOADAVG_DECIMAL_SIDE(ctx->memory_pressure.avg300), | |
5291f26d | 624 | FORMAT_TIMESPAN(ctx->memory_pressure.total, USEC_PER_SEC), |
2b59bf51 | 625 | strempty(prefix), FORMAT_BYTES(ctx->current_memory_usage)); |
5c616ecf AZ |
626 | |
627 | if (!empty_or_root(ctx->path)) | |
628 | fprintf(f, | |
629 | "%s\tMemory Min: %s\n" | |
630 | "%s\tMemory Low: %s\n" | |
bb081240 AZ |
631 | "%s\tPgscan: %" PRIu64 "\n" |
632 | "%s\tLast Pgscan: %" PRIu64 "\n", | |
9ca7e3d0 ZJS |
633 | strempty(prefix), FORMAT_BYTES_CGROUP_PROTECTION(ctx->memory_min), |
634 | strempty(prefix), FORMAT_BYTES_CGROUP_PROTECTION(ctx->memory_low), | |
bb081240 AZ |
635 | strempty(prefix), ctx->pgscan, |
636 | strempty(prefix), ctx->last_pgscan); | |
5c616ecf AZ |
637 | } |
638 | ||
639 | void oomd_dump_system_context(const OomdSystemContext *ctx, FILE *f, const char *prefix) { | |
5c616ecf AZ |
640 | assert(ctx); |
641 | assert(f); | |
642 | ||
643 | fprintf(f, | |
eeeaa422 | 644 | "%sMemory: Used: %s Total: %s\n" |
5c616ecf AZ |
645 | "%sSwap: Used: %s Total: %s\n", |
646 | strempty(prefix), | |
2b59bf51 ZJS |
647 | FORMAT_BYTES(ctx->mem_used), |
648 | FORMAT_BYTES(ctx->mem_total), | |
eeeaa422 | 649 | strempty(prefix), |
2b59bf51 ZJS |
650 | FORMAT_BYTES(ctx->swap_used), |
651 | FORMAT_BYTES(ctx->swap_total)); | |
5c616ecf | 652 | } |