]>
Commit | Line | Data |
---|---|---|
db9ecf05 | 1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
9de5e321 | 2 | |
5c616ecf AZ |
3 | #include "bus-log-control-api.h" |
4 | #include "bus-util.h" | |
5 | #include "bus-polkit.h" | |
9de5e321 AZ |
6 | #include "cgroup-util.h" |
7 | #include "fd-util.h" | |
8 | #include "fileio.h" | |
408a3bbd | 9 | #include "memory-util.h" |
5c616ecf | 10 | #include "oomd-manager-bus.h" |
9de5e321 AZ |
11 | #include "oomd-manager.h" |
12 | #include "path-util.h" | |
d9d3f05d | 13 | #include "percent-util.h" |
9de5e321 AZ |
14 | |
15 | typedef struct ManagedOOMReply { | |
16 | ManagedOOMMode mode; | |
17 | char *path; | |
18 | char *property; | |
d06e7fb5 | 19 | uint32_t limit; |
9de5e321 AZ |
20 | } ManagedOOMReply; |
21 | ||
22 | static void managed_oom_reply_destroy(ManagedOOMReply *reply) { | |
23 | assert(reply); | |
24 | free(reply->path); | |
25 | free(reply->property); | |
26 | } | |
27 | ||
28 | static int managed_oom_mode(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) { | |
29 | ManagedOOMMode *mode = userdata, m; | |
30 | const char *s; | |
31 | ||
32 | assert(mode); | |
33 | assert_se(s = json_variant_string(v)); | |
34 | ||
35 | m = managed_oom_mode_from_string(s); | |
36 | if (m < 0) | |
7211c853 | 37 | return json_log(v, flags, m, "%s is not a valid ManagedOOMMode", s); |
9de5e321 AZ |
38 | |
39 | *mode = m; | |
40 | return 0; | |
41 | } | |
42 | ||
43 | static int process_managed_oom_reply( | |
44 | Varlink *link, | |
45 | JsonVariant *parameters, | |
46 | const char *error_id, | |
47 | VarlinkReplyFlags flags, | |
48 | void *userdata) { | |
49 | JsonVariant *c, *cgroups; | |
50 | Manager *m = userdata; | |
51 | int r = 0; | |
52 | ||
53 | assert(m); | |
54 | ||
55 | static const JsonDispatch dispatch_table[] = { | |
d06e7fb5 LP |
56 | { "mode", JSON_VARIANT_STRING, managed_oom_mode, offsetof(ManagedOOMReply, mode), JSON_MANDATORY }, |
57 | { "path", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, path), JSON_MANDATORY }, | |
58 | { "property", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, property), JSON_MANDATORY }, | |
59 | { "limit", JSON_VARIANT_UNSIGNED, json_dispatch_uint32, offsetof(ManagedOOMReply, limit), 0 }, | |
9de5e321 AZ |
60 | {}, |
61 | }; | |
62 | ||
63 | if (error_id) { | |
64 | r = -EIO; | |
65 | log_debug("Error getting ManagedOOM cgroups: %s", error_id); | |
66 | goto finish; | |
67 | } | |
68 | ||
69 | cgroups = json_variant_by_key(parameters, "cgroups"); | |
70 | if (!cgroups) { | |
71 | r = -EINVAL; | |
72 | goto finish; | |
73 | } | |
74 | ||
75 | /* Skip malformed elements and keep processing in case the others are good */ | |
76 | JSON_VARIANT_ARRAY_FOREACH(c, cgroups) { | |
77 | _cleanup_(managed_oom_reply_destroy) ManagedOOMReply reply = {}; | |
78 | OomdCGroupContext *ctx; | |
79 | Hashmap *monitor_hm; | |
80 | loadavg_t limit; | |
81 | int ret; | |
82 | ||
83 | if (!json_variant_is_object(c)) | |
84 | continue; | |
85 | ||
86 | ret = json_dispatch(c, dispatch_table, NULL, 0, &reply); | |
87 | if (ret == -ENOMEM) { | |
88 | r = ret; | |
89 | goto finish; | |
d06e7fb5 LP |
90 | } |
91 | if (ret < 0) | |
9de5e321 AZ |
92 | continue; |
93 | ||
94 | monitor_hm = streq(reply.property, "ManagedOOMSwap") ? | |
95 | m->monitored_swap_cgroup_contexts : m->monitored_mem_pressure_cgroup_contexts; | |
96 | ||
97 | if (reply.mode == MANAGED_OOM_AUTO) { | |
df7f3eab | 98 | (void) oomd_cgroup_context_free(hashmap_remove(monitor_hm, empty_to_root(reply.path))); |
9de5e321 AZ |
99 | continue; |
100 | } | |
101 | ||
102 | limit = m->default_mem_pressure_limit; | |
103 | ||
d06e7fb5 LP |
104 | if (streq(reply.property, "ManagedOOMMemoryPressure") && reply.limit > 0) { |
105 | int permyriad = UINT32_SCALE_TO_PERMYRIAD(reply.limit); | |
106 | ||
107 | ret = store_loadavg_fixed_point( | |
108 | (unsigned long) permyriad / 100, | |
109 | (unsigned long) permyriad % 100, | |
110 | &limit); | |
111 | if (ret < 0) | |
9de5e321 | 112 | continue; |
9de5e321 AZ |
113 | } |
114 | ||
50c0578b | 115 | ret = oomd_insert_cgroup_context(NULL, monitor_hm, reply.path); |
9de5e321 AZ |
116 | if (ret == -ENOMEM) { |
117 | r = ret; | |
118 | goto finish; | |
119 | } | |
77b04c0a AZ |
120 | if (ret < 0 && ret != -EEXIST) |
121 | log_debug_errno(ret, "Failed to insert reply, ignoring: %m"); | |
9de5e321 AZ |
122 | |
123 | /* Always update the limit in case it was changed. For non-memory pressure detection the value is | |
124 | * ignored so always updating it here is not a problem. */ | |
df7f3eab | 125 | ctx = hashmap_get(monitor_hm, empty_to_root(reply.path)); |
9de5e321 AZ |
126 | if (ctx) |
127 | ctx->mem_pressure_limit = limit; | |
128 | } | |
129 | ||
130 | finish: | |
131 | if (!FLAGS_SET(flags, VARLINK_REPLY_CONTINUES)) | |
132 | m->varlink = varlink_close_unref(link); | |
133 | ||
134 | return r; | |
135 | } | |
136 | ||
137 | /* Fill `new_h` with `path`'s descendent OomdCGroupContexts. Only include descendent cgroups that are possible | |
138 | * candidates for action. That is, only leaf cgroups or cgroups with memory.oom.group set to "1". | |
139 | * | |
140 | * This function ignores most errors in order to handle cgroups that may have been cleaned up while populating | |
141 | * the hashmap. | |
142 | * | |
143 | * `new_h` is of the form { key: cgroup paths -> value: OomdCGroupContext } */ | |
144 | static int recursively_get_cgroup_context(Hashmap *new_h, const char *path) { | |
145 | _cleanup_free_ char *subpath = NULL; | |
146 | _cleanup_closedir_ DIR *d = NULL; | |
147 | int r; | |
148 | ||
149 | assert(new_h); | |
150 | assert(path); | |
151 | ||
152 | r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d); | |
153 | if (r < 0) | |
154 | return r; | |
155 | ||
156 | r = cg_read_subgroup(d, &subpath); | |
157 | if (r < 0) | |
158 | return r; | |
159 | else if (r == 0) { /* No subgroups? We're a leaf node */ | |
160 | r = oomd_insert_cgroup_context(NULL, new_h, path); | |
77b04c0a AZ |
161 | if (r == -ENOMEM) |
162 | return r; | |
163 | if (r < 0) | |
164 | log_debug_errno(r, "Failed to insert context for %s, ignoring: %m", path); | |
165 | return 0; | |
9de5e321 AZ |
166 | } |
167 | ||
168 | do { | |
169 | _cleanup_free_ char *cg_path = NULL; | |
170 | bool oom_group; | |
171 | ||
172 | cg_path = path_join(empty_to_root(path), subpath); | |
173 | if (!cg_path) | |
174 | return -ENOMEM; | |
175 | ||
176 | subpath = mfree(subpath); | |
177 | ||
178 | r = cg_get_attribute_as_bool("memory", cg_path, "memory.oom.group", &oom_group); | |
179 | /* The cgroup might be gone. Skip it as a candidate since we can't get information on it. */ | |
77b04c0a AZ |
180 | if (r == -ENOMEM) |
181 | return r; | |
182 | if (r < 0) { | |
183 | log_debug_errno(r, "Failed to read memory.oom.group from %s, ignoring: %m", cg_path); | |
184 | return 0; | |
185 | } | |
9de5e321 | 186 | |
349a2003 | 187 | if (oom_group) |
9de5e321 | 188 | r = oomd_insert_cgroup_context(NULL, new_h, cg_path); |
349a2003 | 189 | else |
9de5e321 | 190 | r = recursively_get_cgroup_context(new_h, cg_path); |
349a2003 AZ |
191 | if (r == -ENOMEM) |
192 | return r; | |
77b04c0a AZ |
193 | if (r < 0) |
194 | log_debug_errno(r, "Failed to insert or recursively get from %s, ignoring: %m", cg_path); | |
9de5e321 AZ |
195 | } while ((r = cg_read_subgroup(d, &subpath)) > 0); |
196 | ||
197 | return 0; | |
198 | } | |
199 | ||
200 | static int update_monitored_cgroup_contexts(Hashmap **monitored_cgroups) { | |
201 | _cleanup_hashmap_free_ Hashmap *new_base = NULL; | |
202 | OomdCGroupContext *ctx; | |
203 | int r; | |
204 | ||
205 | assert(monitored_cgroups); | |
206 | ||
207 | new_base = hashmap_new(&oomd_cgroup_ctx_hash_ops); | |
208 | if (!new_base) | |
209 | return -ENOMEM; | |
210 | ||
211 | HASHMAP_FOREACH(ctx, *monitored_cgroups) { | |
212 | /* Skip most errors since the cgroup we're trying to update might not exist anymore. */ | |
213 | r = oomd_insert_cgroup_context(*monitored_cgroups, new_base, ctx->path); | |
214 | if (r == -ENOMEM) | |
215 | return r; | |
77b04c0a AZ |
216 | if (r < 0 && !IN_SET(r, -EEXIST, -ENOENT)) |
217 | log_debug_errno(r, "Failed to insert context for %s, ignoring: %m", ctx->path); | |
9de5e321 AZ |
218 | } |
219 | ||
220 | hashmap_free(*monitored_cgroups); | |
221 | *monitored_cgroups = TAKE_PTR(new_base); | |
222 | ||
223 | return 0; | |
224 | } | |
225 | ||
226 | static int get_monitored_cgroup_contexts_candidates(Hashmap *monitored_cgroups, Hashmap **ret_candidates) { | |
227 | _cleanup_hashmap_free_ Hashmap *candidates = NULL; | |
228 | OomdCGroupContext *ctx; | |
229 | int r; | |
230 | ||
231 | assert(monitored_cgroups); | |
232 | assert(ret_candidates); | |
233 | ||
234 | candidates = hashmap_new(&oomd_cgroup_ctx_hash_ops); | |
235 | if (!candidates) | |
236 | return -ENOMEM; | |
237 | ||
238 | HASHMAP_FOREACH(ctx, monitored_cgroups) { | |
239 | r = recursively_get_cgroup_context(candidates, ctx->path); | |
240 | if (r == -ENOMEM) | |
241 | return r; | |
77b04c0a AZ |
242 | if (r < 0) |
243 | log_debug_errno(r, "Failed to recursively get contexts for %s, ignoring: %m", ctx->path); | |
9de5e321 AZ |
244 | } |
245 | ||
246 | *ret_candidates = TAKE_PTR(candidates); | |
247 | ||
248 | return 0; | |
249 | } | |
250 | ||
91cbb4bd AZ |
251 | static int update_monitored_cgroup_contexts_candidates(Hashmap *monitored_cgroups, Hashmap **candidates) { |
252 | _cleanup_hashmap_free_ Hashmap *new_candidates = NULL; | |
253 | int r; | |
254 | ||
255 | assert(monitored_cgroups); | |
256 | assert(candidates); | |
257 | assert(*candidates); | |
258 | ||
259 | r = get_monitored_cgroup_contexts_candidates(monitored_cgroups, &new_candidates); | |
260 | if (r < 0) | |
261 | return log_debug_errno(r, "Failed to get candidate contexts: %m"); | |
262 | ||
263 | oomd_update_cgroup_contexts_between_hashmaps(*candidates, new_candidates); | |
264 | ||
265 | hashmap_free(*candidates); | |
266 | *candidates = TAKE_PTR(new_candidates); | |
267 | ||
268 | return 0; | |
269 | } | |
270 | ||
9de5e321 AZ |
271 | static int acquire_managed_oom_connect(Manager *m) { |
272 | _cleanup_(varlink_close_unrefp) Varlink *link = NULL; | |
273 | int r; | |
274 | ||
275 | assert(m); | |
276 | assert(m->event); | |
277 | ||
278 | r = varlink_connect_address(&link, VARLINK_ADDR_PATH_MANAGED_OOM); | |
279 | if (r < 0) | |
280 | return log_error_errno(r, "Failed to connect to %s: %m", VARLINK_ADDR_PATH_MANAGED_OOM); | |
281 | ||
282 | (void) varlink_set_userdata(link, m); | |
283 | (void) varlink_set_description(link, "oomd"); | |
284 | (void) varlink_set_relative_timeout(link, USEC_INFINITY); | |
285 | ||
286 | r = varlink_attach_event(link, m->event, SD_EVENT_PRIORITY_NORMAL); | |
287 | if (r < 0) | |
288 | return log_error_errno(r, "Failed to attach varlink connection to event loop: %m"); | |
289 | ||
290 | r = varlink_bind_reply(link, process_managed_oom_reply); | |
291 | if (r < 0) | |
292 | return log_error_errno(r, "Failed to bind reply callback: %m"); | |
293 | ||
294 | r = varlink_observe(link, "io.systemd.ManagedOOM.SubscribeManagedOOMCGroups", NULL); | |
295 | if (r < 0) | |
296 | return log_error_errno(r, "Failed to observe varlink call: %m"); | |
297 | ||
298 | m->varlink = TAKE_PTR(link); | |
299 | return 0; | |
300 | } | |
301 | ||
302 | static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, void *userdata) { | |
303 | _cleanup_set_free_ Set *targets = NULL; | |
304 | Manager *m = userdata; | |
305 | usec_t usec_now; | |
306 | int r; | |
307 | ||
308 | assert(s); | |
309 | assert(userdata); | |
310 | ||
311 | /* Reset timer */ | |
312 | r = sd_event_now(sd_event_source_get_event(s), CLOCK_MONOTONIC, &usec_now); | |
313 | if (r < 0) | |
77b04c0a | 314 | return log_error_errno(r, "Failed to reset event timer: %m"); |
9de5e321 AZ |
315 | |
316 | r = sd_event_source_set_time_relative(s, INTERVAL_USEC); | |
317 | if (r < 0) | |
77b04c0a | 318 | return log_error_errno(r, "Failed to set relative time for timer: %m"); |
9de5e321 AZ |
319 | |
320 | /* Reconnect if our connection dropped */ | |
321 | if (!m->varlink) { | |
322 | r = acquire_managed_oom_connect(m); | |
323 | if (r < 0) | |
77b04c0a | 324 | return log_error_errno(r, "Failed to acquire varlink connection: %m"); |
9de5e321 AZ |
325 | } |
326 | ||
327 | /* Update the cgroups used for detection/action */ | |
328 | r = update_monitored_cgroup_contexts(&m->monitored_swap_cgroup_contexts); | |
329 | if (r == -ENOMEM) | |
77b04c0a AZ |
330 | return log_oom(); |
331 | if (r < 0) | |
332 | log_debug_errno(r, "Failed to update monitored swap cgroup contexts, ignoring: %m"); | |
9de5e321 AZ |
333 | |
334 | r = update_monitored_cgroup_contexts(&m->monitored_mem_pressure_cgroup_contexts); | |
335 | if (r == -ENOMEM) | |
77b04c0a AZ |
336 | return log_oom(); |
337 | if (r < 0) | |
338 | log_debug_errno(r, "Failed to update monitored memory pressure cgroup contexts, ignoring: %m"); | |
9de5e321 | 339 | |
91cbb4bd AZ |
340 | r = update_monitored_cgroup_contexts_candidates( |
341 | m->monitored_mem_pressure_cgroup_contexts, &m->monitored_mem_pressure_cgroup_contexts_candidates); | |
342 | if (r == -ENOMEM) | |
77b04c0a AZ |
343 | return log_oom(); |
344 | if (r < 0) | |
345 | log_debug_errno(r, "Failed to update monitored memory pressure candidate cgroup contexts, ignoring: %m"); | |
91cbb4bd | 346 | |
9de5e321 | 347 | r = oomd_system_context_acquire("/proc/swaps", &m->system_context); |
408a3bbd AZ |
348 | /* If there aren't units depending on swap actions, the only error we exit on is ENOMEM. |
349 | * Allow ENOENT in the event that swap is disabled on the system. */ | |
350 | if (r == -ENOMEM || (r < 0 && r != -ENOENT && !hashmap_isempty(m->monitored_swap_cgroup_contexts))) | |
77b04c0a | 351 | return log_error_errno(r, "Failed to acquire system context: %m"); |
408a3bbd AZ |
352 | else if (r == -ENOENT) |
353 | zero(m->system_context); | |
9de5e321 | 354 | |
924c89e9 AZ |
355 | if (oomd_memory_reclaim(m->monitored_mem_pressure_cgroup_contexts)) |
356 | m->last_reclaim_at = usec_now; | |
357 | ||
9de5e321 AZ |
358 | /* If we're still recovering from a kill, don't try to kill again yet */ |
359 | if (m->post_action_delay_start > 0) { | |
360 | if (m->post_action_delay_start + POST_ACTION_DELAY_USEC > usec_now) | |
361 | return 0; | |
362 | else | |
363 | m->post_action_delay_start = 0; | |
364 | } | |
365 | ||
c20aa7b1 | 366 | r = oomd_pressure_above(m->monitored_mem_pressure_cgroup_contexts, m->default_mem_pressure_duration_usec, &targets); |
9de5e321 | 367 | if (r == -ENOMEM) |
77b04c0a AZ |
368 | return log_oom(); |
369 | if (r < 0) | |
370 | log_debug_errno(r, "Failed to check if memory pressure exceeded limits, ignoring: %m"); | |
9de5e321 | 371 | else if (r == 1) { |
924c89e9 | 372 | /* Check if there was reclaim activity in the given interval. The concern is the following case: |
9de5e321 AZ |
373 | * Pressure climbed, a lot of high-frequency pages were reclaimed, and we killed the offending |
374 | * cgroup. Even after this, well-behaved processes will fault in recently resident pages and | |
375 | * this will cause pressure to remain high. Thus if there isn't any reclaim pressure, no need | |
376 | * to kill something (it won't help anyways). */ | |
924c89e9 | 377 | if ((usec_now - m->last_reclaim_at) <= RECLAIM_DURATION_USEC) { |
9de5e321 AZ |
378 | OomdCGroupContext *t; |
379 | ||
9de5e321 | 380 | SET_FOREACH(t, targets) { |
37a7e159 AZ |
381 | _cleanup_free_ char *selected = NULL; |
382 | char ts[FORMAT_TIMESPAN_MAX]; | |
383 | ||
384 | log_debug("Memory pressure for %s is %lu.%02lu%% > %lu.%02lu%% for > %s with reclaim activity", | |
385 | t->path, | |
386 | LOAD_INT(t->memory_pressure.avg10), LOAD_FRAC(t->memory_pressure.avg10), | |
387 | LOAD_INT(t->mem_pressure_limit), LOAD_FRAC(t->mem_pressure_limit), | |
388 | format_timespan(ts, sizeof ts, | |
389 | m->default_mem_pressure_duration_usec, | |
390 | USEC_PER_SEC)); | |
391 | ||
392 | r = oomd_kill_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run, &selected); | |
9de5e321 | 393 | if (r == -ENOMEM) |
77b04c0a | 394 | return log_oom(); |
9de5e321 | 395 | if (r < 0) |
77b04c0a | 396 | log_notice_errno(r, "Failed to kill any cgroup(s) under %s based on pressure: %m", t->path); |
9de5e321 AZ |
397 | else { |
398 | /* Don't act on all the high pressure cgroups at once; return as soon as we kill one */ | |
399 | m->post_action_delay_start = usec_now; | |
37a7e159 AZ |
400 | if (selected) |
401 | log_notice("Killed %s due to memory pressure for %s being %lu.%02lu%% > %lu.%02lu%%" | |
402 | " for > %s with reclaim activity", | |
403 | selected, t->path, | |
404 | LOAD_INT(t->memory_pressure.avg10), LOAD_FRAC(t->memory_pressure.avg10), | |
405 | LOAD_INT(t->mem_pressure_limit), LOAD_FRAC(t->mem_pressure_limit), | |
406 | format_timespan(ts, sizeof ts, | |
407 | m->default_mem_pressure_duration_usec, | |
408 | USEC_PER_SEC)); | |
9de5e321 AZ |
409 | return 0; |
410 | } | |
411 | } | |
412 | } | |
413 | } | |
414 | ||
d06e7fb5 | 415 | if (oomd_swap_free_below(&m->system_context, 10000 - m->swap_used_limit_permyriad)) { |
9de5e321 | 416 | _cleanup_hashmap_free_ Hashmap *candidates = NULL; |
37a7e159 | 417 | _cleanup_free_ char *selected = NULL; |
9de5e321 | 418 | |
37a7e159 AZ |
419 | log_debug("Swap used (%"PRIu64") / total (%"PRIu64") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR, |
420 | m->system_context.swap_used, m->system_context.swap_total, | |
421 | PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad)); | |
9de5e321 AZ |
422 | |
423 | r = get_monitored_cgroup_contexts_candidates(m->monitored_swap_cgroup_contexts, &candidates); | |
424 | if (r == -ENOMEM) | |
77b04c0a AZ |
425 | return log_oom(); |
426 | if (r < 0) | |
427 | log_debug_errno(r, "Failed to get monitored swap cgroup candidates, ignoring: %m"); | |
9de5e321 | 428 | |
37a7e159 | 429 | r = oomd_kill_by_swap_usage(candidates, m->dry_run, &selected); |
9de5e321 | 430 | if (r == -ENOMEM) |
77b04c0a | 431 | return log_oom(); |
9de5e321 | 432 | if (r < 0) |
77b04c0a | 433 | log_notice_errno(r, "Failed to kill any cgroup(s) based on swap: %m"); |
9de5e321 AZ |
434 | else { |
435 | m->post_action_delay_start = usec_now; | |
37a7e159 AZ |
436 | if (selected) |
437 | log_notice("Killed %s due to swap used (%"PRIu64") / total (%"PRIu64") being more than " | |
438 | PERMYRIAD_AS_PERCENT_FORMAT_STR, | |
439 | selected, m->system_context.swap_used, m->system_context.swap_total, | |
440 | PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad)); | |
9de5e321 AZ |
441 | return 0; |
442 | } | |
443 | } | |
444 | ||
445 | return 0; | |
446 | } | |
447 | ||
448 | static int monitor_cgroup_contexts(Manager *m) { | |
449 | _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL; | |
450 | int r; | |
451 | ||
452 | assert(m); | |
453 | assert(m->event); | |
454 | ||
455 | r = sd_event_add_time(m->event, &s, CLOCK_MONOTONIC, 0, 0, monitor_cgroup_contexts_handler, m); | |
456 | if (r < 0) | |
457 | return r; | |
458 | ||
459 | r = sd_event_source_set_exit_on_failure(s, true); | |
460 | if (r < 0) | |
461 | return r; | |
462 | ||
463 | r = sd_event_source_set_enabled(s, SD_EVENT_ON); | |
464 | if (r < 0) | |
465 | return r; | |
466 | ||
467 | (void) sd_event_source_set_description(s, "oomd-timer"); | |
468 | ||
469 | m->cgroup_context_event_source = TAKE_PTR(s); | |
470 | return 0; | |
471 | } | |
472 | ||
75db809a | 473 | Manager* manager_free(Manager *m) { |
9de5e321 AZ |
474 | assert(m); |
475 | ||
476 | varlink_close_unref(m->varlink); | |
477 | sd_event_source_unref(m->cgroup_context_event_source); | |
478 | sd_event_unref(m->event); | |
479 | ||
5c616ecf AZ |
480 | bus_verify_polkit_async_registry_free(m->polkit_registry); |
481 | sd_bus_flush_close_unref(m->bus); | |
482 | ||
9de5e321 AZ |
483 | hashmap_free(m->monitored_swap_cgroup_contexts); |
484 | hashmap_free(m->monitored_mem_pressure_cgroup_contexts); | |
91cbb4bd | 485 | hashmap_free(m->monitored_mem_pressure_cgroup_contexts_candidates); |
9de5e321 | 486 | |
75db809a | 487 | return mfree(m); |
9de5e321 AZ |
488 | } |
489 | ||
490 | int manager_new(Manager **ret) { | |
491 | _cleanup_(manager_freep) Manager *m = NULL; | |
492 | int r; | |
493 | ||
494 | assert(ret); | |
495 | ||
496 | m = new0(Manager, 1); | |
497 | if (!m) | |
498 | return -ENOMEM; | |
499 | ||
500 | r = sd_event_default(&m->event); | |
501 | if (r < 0) | |
502 | return r; | |
503 | ||
504 | (void) sd_event_set_watchdog(m->event, true); | |
505 | ||
506 | r = sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL); | |
507 | if (r < 0) | |
508 | return r; | |
509 | ||
510 | r = sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL); | |
511 | if (r < 0) | |
512 | return r; | |
513 | ||
514 | m->monitored_swap_cgroup_contexts = hashmap_new(&oomd_cgroup_ctx_hash_ops); | |
515 | if (!m->monitored_swap_cgroup_contexts) | |
516 | return -ENOMEM; | |
517 | ||
518 | m->monitored_mem_pressure_cgroup_contexts = hashmap_new(&oomd_cgroup_ctx_hash_ops); | |
519 | if (!m->monitored_mem_pressure_cgroup_contexts) | |
520 | return -ENOMEM; | |
521 | ||
91cbb4bd AZ |
522 | m->monitored_mem_pressure_cgroup_contexts_candidates = hashmap_new(&oomd_cgroup_ctx_hash_ops); |
523 | if (!m->monitored_mem_pressure_cgroup_contexts_candidates) | |
524 | return -ENOMEM; | |
525 | ||
9de5e321 AZ |
526 | *ret = TAKE_PTR(m); |
527 | return 0; | |
528 | } | |
529 | ||
5c616ecf AZ |
530 | static int manager_connect_bus(Manager *m) { |
531 | int r; | |
532 | ||
533 | assert(m); | |
534 | assert(!m->bus); | |
535 | ||
536 | r = bus_open_system_watch_bind_with_description(&m->bus, "bus-api-oom"); | |
537 | if (r < 0) | |
538 | return log_error_errno(r, "Failed to connect to bus: %m"); | |
539 | ||
c9a00f5a | 540 | r = bus_add_implementation(m->bus, &manager_object, m); |
5c616ecf | 541 | if (r < 0) |
c9a00f5a | 542 | return r; |
5c616ecf AZ |
543 | |
544 | r = bus_log_control_api_register(m->bus); | |
545 | if (r < 0) | |
546 | return r; | |
547 | ||
548 | r = sd_bus_request_name_async(m->bus, NULL, "org.freedesktop.oom1", 0, NULL, NULL); | |
549 | if (r < 0) | |
550 | return log_error_errno(r, "Failed to request name: %m"); | |
551 | ||
552 | r = sd_bus_attach_event(m->bus, m->event, 0); | |
553 | if (r < 0) | |
554 | return log_error_errno(r, "Failed to attach bus to event loop: %m"); | |
555 | ||
556 | return 0; | |
557 | } | |
558 | ||
d06e7fb5 LP |
559 | int manager_start( |
560 | Manager *m, | |
561 | bool dry_run, | |
562 | int swap_used_limit_permyriad, | |
563 | int mem_pressure_limit_permyriad, | |
564 | usec_t mem_pressure_usec) { | |
565 | ||
0a9f9344 | 566 | unsigned long l, f; |
9de5e321 AZ |
567 | int r; |
568 | ||
569 | assert(m); | |
570 | ||
571 | m->dry_run = dry_run; | |
572 | ||
d06e7fb5 LP |
573 | m->swap_used_limit_permyriad = swap_used_limit_permyriad >= 0 ? swap_used_limit_permyriad : DEFAULT_SWAP_USED_LIMIT_PERCENT * 100; |
574 | assert(m->swap_used_limit_permyriad <= 10000); | |
9de5e321 | 575 | |
d06e7fb5 | 576 | if (mem_pressure_limit_permyriad >= 0) { |
0a9f9344 AZ |
577 | assert(mem_pressure_limit_permyriad <= 10000); |
578 | ||
579 | l = mem_pressure_limit_permyriad / 100; | |
580 | f = mem_pressure_limit_permyriad % 100; | |
581 | } else { | |
582 | l = DEFAULT_MEM_PRESSURE_LIMIT_PERCENT; | |
583 | f = 0; | |
584 | } | |
585 | r = store_loadavg_fixed_point(l, f, &m->default_mem_pressure_limit); | |
9de5e321 AZ |
586 | if (r < 0) |
587 | return r; | |
588 | ||
c20aa7b1 AZ |
589 | m->default_mem_pressure_duration_usec = mem_pressure_usec ?: DEFAULT_MEM_PRESSURE_DURATION_USEC; |
590 | ||
5c616ecf AZ |
591 | r = manager_connect_bus(m); |
592 | if (r < 0) | |
593 | return r; | |
594 | ||
9de5e321 AZ |
595 | r = acquire_managed_oom_connect(m); |
596 | if (r < 0) | |
597 | return r; | |
598 | ||
599 | r = monitor_cgroup_contexts(m); | |
600 | if (r < 0) | |
601 | return r; | |
602 | ||
603 | return 0; | |
604 | } | |
5c616ecf AZ |
605 | |
606 | int manager_get_dump_string(Manager *m, char **ret) { | |
607 | _cleanup_free_ char *dump = NULL; | |
608 | _cleanup_fclose_ FILE *f = NULL; | |
c20aa7b1 | 609 | char buf[FORMAT_TIMESPAN_MAX]; |
5c616ecf AZ |
610 | OomdCGroupContext *c; |
611 | size_t size; | |
612 | char *key; | |
613 | int r; | |
614 | ||
615 | assert(m); | |
616 | assert(ret); | |
617 | ||
618 | f = open_memstream_unlocked(&dump, &size); | |
619 | if (!f) | |
620 | return -errno; | |
621 | ||
622 | fprintf(f, | |
623 | "Dry Run: %s\n" | |
d06e7fb5 | 624 | "Swap Used Limit: " PERMYRIAD_AS_PERCENT_FORMAT_STR "\n" |
0a9f9344 | 625 | "Default Memory Pressure Limit: %lu.%02lu%%\n" |
c20aa7b1 | 626 | "Default Memory Pressure Duration: %s\n" |
5c616ecf AZ |
627 | "System Context:\n", |
628 | yes_no(m->dry_run), | |
d06e7fb5 | 629 | PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad), |
0a9f9344 | 630 | LOAD_INT(m->default_mem_pressure_limit), LOAD_FRAC(m->default_mem_pressure_limit), |
c20aa7b1 | 631 | format_timespan(buf, sizeof(buf), m->default_mem_pressure_duration_usec, USEC_PER_SEC)); |
5c616ecf AZ |
632 | oomd_dump_system_context(&m->system_context, f, "\t"); |
633 | ||
634 | fprintf(f, "Swap Monitored CGroups:\n"); | |
635 | HASHMAP_FOREACH_KEY(c, key, m->monitored_swap_cgroup_contexts) | |
636 | oomd_dump_swap_cgroup_context(c, f, "\t"); | |
637 | ||
638 | fprintf(f, "Memory Pressure Monitored CGroups:\n"); | |
639 | HASHMAP_FOREACH_KEY(c, key, m->monitored_mem_pressure_cgroup_contexts) | |
640 | oomd_dump_memory_pressure_cgroup_context(c, f, "\t"); | |
641 | ||
642 | r = fflush_and_check(f); | |
643 | if (r < 0) | |
644 | return r; | |
645 | ||
646 | f = safe_fclose(f); | |
647 | ||
648 | *ret = TAKE_PTR(dump); | |
649 | return 0; | |
650 | } |