1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
7 #include "cgroup-setup.h"
8 #include "dbus-scope.h"
10 #include "exit-status.h"
13 #include "parse-util.h"
15 #include "random-util.h"
17 #include "serialize.h"
20 #include "string-table.h"
21 #include "string-util.h"
24 #include "user-util.h"
26 static const UnitActiveState state_translation_table
[_SCOPE_STATE_MAX
] = {
27 [SCOPE_DEAD
] = UNIT_INACTIVE
,
28 [SCOPE_START_CHOWN
] = UNIT_ACTIVATING
,
29 [SCOPE_RUNNING
] = UNIT_ACTIVE
,
30 [SCOPE_ABANDONED
] = UNIT_ACTIVE
,
31 [SCOPE_STOP_SIGTERM
] = UNIT_DEACTIVATING
,
32 [SCOPE_STOP_SIGKILL
] = UNIT_DEACTIVATING
,
33 [SCOPE_FAILED
] = UNIT_FAILED
,
36 static int scope_dispatch_timer(sd_event_source
*source
, usec_t usec
, void *userdata
);
38 static void scope_init(Unit
*u
) {
39 Scope
*s
= ASSERT_PTR(SCOPE(u
));
41 assert(u
->load_state
== UNIT_STUB
);
43 s
->runtime_max_usec
= USEC_INFINITY
;
44 s
->timeout_stop_usec
= u
->manager
->defaults
.timeout_stop_usec
;
45 u
->ignore_on_isolate
= true;
46 s
->user
= s
->group
= NULL
;
47 s
->oom_policy
= _OOM_POLICY_INVALID
;
50 static void scope_done(Unit
*u
) {
51 Scope
*s
= ASSERT_PTR(SCOPE(u
));
53 s
->controller
= mfree(s
->controller
);
54 s
->controller_track
= sd_bus_track_unref(s
->controller_track
);
56 s
->timer_event_source
= sd_event_source_disable_unref(s
->timer_event_source
);
58 s
->user
= mfree(s
->user
);
59 s
->group
= mfree(s
->group
);
62 static usec_t
scope_running_timeout(Scope
*s
) {
67 if (s
->runtime_rand_extra_usec
!= 0) {
68 delta
= random_u64_range(s
->runtime_rand_extra_usec
);
69 log_unit_debug(UNIT(s
), "Adding delta of %s sec to timeout", FORMAT_TIMESPAN(delta
, USEC_PER_SEC
));
72 return usec_add(usec_add(UNIT(s
)->active_enter_timestamp
.monotonic
,
77 static int scope_arm_timer(Scope
*s
, bool relative
, usec_t usec
) {
80 return unit_arm_timer(UNIT(s
), &s
->timer_event_source
, relative
, usec
, scope_dispatch_timer
);
83 static void scope_set_state(Scope
*s
, ScopeState state
) {
88 if (s
->state
!= state
)
89 bus_unit_send_pending_change_signal(UNIT(s
), false);
94 if (!IN_SET(state
, SCOPE_STOP_SIGTERM
, SCOPE_STOP_SIGKILL
, SCOPE_START_CHOWN
, SCOPE_RUNNING
))
95 s
->timer_event_source
= sd_event_source_disable_unref(s
->timer_event_source
);
97 if (!IN_SET(old_state
, SCOPE_DEAD
, SCOPE_FAILED
) && IN_SET(state
, SCOPE_DEAD
, SCOPE_FAILED
))
98 unit_unwatch_all_pids(UNIT(s
));
100 if (state
!= old_state
)
101 log_unit_debug(UNIT(s
), "Changed %s -> %s",
102 scope_state_to_string(old_state
), scope_state_to_string(state
));
104 unit_notify(UNIT(s
), state_translation_table
[old_state
], state_translation_table
[state
], /* reload_success = */ true);
107 static int scope_add_default_dependencies(Scope
*s
) {
112 if (!UNIT(s
)->default_dependencies
)
115 /* Make sure scopes are unloaded on shutdown */
116 r
= unit_add_two_dependencies_by_name(
118 UNIT_BEFORE
, UNIT_CONFLICTS
,
119 SPECIAL_SHUTDOWN_TARGET
, true,
120 UNIT_DEPENDENCY_DEFAULT
);
127 static int scope_verify(Scope
*s
) {
129 assert(UNIT(s
)->load_state
== UNIT_LOADED
);
131 if (set_isempty(UNIT(s
)->pids
) &&
132 !MANAGER_IS_RELOADING(UNIT(s
)->manager
) &&
133 !unit_has_name(UNIT(s
), SPECIAL_INIT_SCOPE
))
134 return log_unit_error_errno(UNIT(s
), SYNTHETIC_ERRNO(ENOENT
), "Scope has no PIDs. Refusing.");
139 static int scope_load_init_scope(Unit
*u
) {
142 if (!unit_has_name(u
, SPECIAL_INIT_SCOPE
))
148 /* init.scope is a bit special, as it has to stick around forever. Because of its special semantics we
149 * synthesize it here, instead of relying on the unit file on disk. */
151 u
->default_dependencies
= false;
153 /* Prettify things, if we can. */
155 u
->description
= strdup("System and Service Manager");
156 if (!u
->documentation
)
157 (void) strv_extend(&u
->documentation
, "man:systemd(1)");
162 static int scope_add_extras(Scope
*s
) {
165 r
= unit_patch_contexts(UNIT(s
));
169 r
= unit_set_default_slice(UNIT(s
));
173 if (s
->oom_policy
< 0)
174 s
->oom_policy
= s
->cgroup_context
.delegate
? OOM_CONTINUE
: UNIT(s
)->manager
->defaults
.oom_policy
;
176 s
->cgroup_context
.memory_oom_group
= s
->oom_policy
== OOM_KILL
;
178 return scope_add_default_dependencies(s
);
181 static int scope_load(Unit
*u
) {
182 Scope
*s
= ASSERT_PTR(SCOPE(u
));
185 assert(u
->load_state
== UNIT_STUB
);
187 if (!u
->transient
&& !MANAGER_IS_RELOADING(u
->manager
))
188 /* Refuse to load non-transient scope units, but allow them while reloading. */
191 r
= scope_load_init_scope(u
);
195 r
= unit_load_fragment_and_dropin(u
, false);
199 if (u
->load_state
!= UNIT_LOADED
)
202 r
= scope_add_extras(s
);
206 return scope_verify(s
);
209 static usec_t
scope_coldplug_timeout(Scope
*s
) {
212 switch (s
->deserialized_state
) {
215 return scope_running_timeout(s
);
217 case SCOPE_STOP_SIGKILL
:
218 case SCOPE_STOP_SIGTERM
:
219 return usec_add(UNIT(s
)->state_change_timestamp
.monotonic
, s
->timeout_stop_usec
);
222 return USEC_INFINITY
;
226 static int scope_coldplug(Unit
*u
) {
227 Scope
*s
= ASSERT_PTR(SCOPE(u
));
230 assert(s
->state
== SCOPE_DEAD
);
232 if (s
->deserialized_state
== s
->state
)
235 r
= scope_arm_timer(s
, /* relative= */ false, scope_coldplug_timeout(s
));
239 if (!IN_SET(s
->deserialized_state
, SCOPE_DEAD
, SCOPE_FAILED
) && u
->pids
) {
241 SET_FOREACH(pid
, u
->pids
) {
242 r
= unit_watch_pidref(u
, pid
, /* exclusive= */ false);
248 bus_scope_track_controller(s
);
250 scope_set_state(s
, s
->deserialized_state
);
254 static void scope_dump(Unit
*u
, FILE *f
, const char *prefix
) {
255 Scope
*s
= ASSERT_PTR(SCOPE(u
));
261 "%sScope State: %s\n"
263 "%sRuntimeMaxSec: %s\n"
264 "%sRuntimeRandomizedExtraSec: %s\n"
266 prefix
, scope_state_to_string(s
->state
),
267 prefix
, scope_result_to_string(s
->result
),
268 prefix
, FORMAT_TIMESPAN(s
->runtime_max_usec
, USEC_PER_SEC
),
269 prefix
, FORMAT_TIMESPAN(s
->runtime_rand_extra_usec
, USEC_PER_SEC
),
270 prefix
, oom_policy_to_string(s
->oom_policy
));
272 cgroup_context_dump(u
, f
, prefix
);
273 kill_context_dump(&s
->kill_context
, f
, prefix
);
276 static void scope_enter_dead(Scope
*s
, ScopeResult f
) {
279 if (s
->result
== SCOPE_SUCCESS
)
282 unit_log_result(UNIT(s
), s
->result
== SCOPE_SUCCESS
, scope_result_to_string(s
->result
));
283 scope_set_state(s
, s
->result
!= SCOPE_SUCCESS
? SCOPE_FAILED
: SCOPE_DEAD
);
286 static void scope_enter_signal(Scope
*s
, ScopeState state
, ScopeResult f
) {
287 bool skip_signal
= false;
292 if (s
->result
== SCOPE_SUCCESS
)
295 /* If we have a controller set let's ask the controller nicely to terminate the scope, instead of us going
296 * directly into SIGTERM berserk mode */
297 if (state
== SCOPE_STOP_SIGTERM
)
298 skip_signal
= bus_scope_send_request_stop(s
) > 0;
303 r
= unit_kill_context(
305 state
!= SCOPE_STOP_SIGTERM
? KILL_KILL
:
306 s
->was_abandoned
? KILL_TERMINATE_AND_LOG
:
309 log_unit_warning_errno(UNIT(s
), r
, "Failed to kill processes: %m");
315 r
= scope_arm_timer(s
, /* relative= */ true, s
->timeout_stop_usec
);
317 log_unit_warning_errno(UNIT(s
), r
, "Failed to install timer: %m");
321 scope_set_state(s
, state
);
322 } else if (state
== SCOPE_STOP_SIGTERM
)
323 scope_enter_signal(s
, SCOPE_STOP_SIGKILL
, SCOPE_SUCCESS
);
325 scope_enter_dead(s
, SCOPE_SUCCESS
);
330 scope_enter_dead(s
, SCOPE_FAILURE_RESOURCES
);
333 static int scope_enter_start_chown(Scope
*s
) {
334 Unit
*u
= UNIT(ASSERT_PTR(s
));
335 _cleanup_(pidref_done
) PidRef pidref
= PIDREF_NULL
;
340 if (!s
->cgroup_runtime
)
343 r
= scope_arm_timer(s
, /* relative= */ true, u
->manager
->defaults
.timeout_start_usec
);
347 r
= unit_fork_helper_process(u
, "(sd-chown-cgroup)", /* into_cgroup= */ true, &pidref
);
352 uid_t uid
= UID_INVALID
;
353 gid_t gid
= GID_INVALID
;
355 if (!isempty(s
->user
)) {
356 const char *user
= s
->user
;
358 r
= get_user_creds(&user
, &uid
, &gid
, NULL
, NULL
, 0);
360 log_unit_error_errno(UNIT(s
), r
, "Failed to resolve user \"%s\": %m", user
);
365 if (!isempty(s
->group
)) {
366 const char *group
= s
->group
;
368 r
= get_group_creds(&group
, &gid
, 0);
370 log_unit_error_errno(UNIT(s
), r
, "Failed to resolve group \"%s\": %m", group
);
375 r
= cg_set_access(s
->cgroup_runtime
->cgroup_path
, uid
, gid
);
377 log_unit_error_errno(UNIT(s
), r
, "Failed to adjust control group access: %m");
384 r
= unit_watch_pidref(UNIT(s
), &pidref
, /* exclusive= */ true);
388 scope_set_state(s
, SCOPE_START_CHOWN
);
392 s
->timer_event_source
= sd_event_source_disable_unref(s
->timer_event_source
);
396 static int scope_enter_running(Scope
*s
) {
397 Unit
*u
= UNIT(ASSERT_PTR(s
));
400 (void) bus_scope_track_controller(s
);
402 r
= unit_acquire_invocation_id(u
);
406 unit_export_state_files(u
);
408 r
= unit_attach_pids_to_cgroup(u
, u
->pids
, NULL
);
410 log_unit_warning_errno(u
, r
, "Failed to add PIDs to scope's control group: %m");
414 r
= log_unit_warning_errno(u
, SYNTHETIC_ERRNO(ECHILD
), "No PIDs left to attach to the scope's control group, refusing.");
417 log_unit_debug(u
, "%i %s added to scope's control group.", r
, r
== 1 ? "process" : "processes");
419 s
->result
= SCOPE_SUCCESS
;
421 scope_set_state(s
, SCOPE_RUNNING
);
423 /* Set the maximum runtime timeout. */
424 scope_arm_timer(s
, /* relative= */ false, scope_running_timeout(s
));
426 /* Unwatch all pids we've just added to cgroup. We rely on empty notifications there. */
427 unit_unwatch_all_pids(u
);
432 scope_enter_dead(s
, SCOPE_FAILURE_RESOURCES
);
436 static int scope_start(Unit
*u
) {
437 Scope
*s
= ASSERT_PTR(SCOPE(u
));
439 if (unit_has_name(u
, SPECIAL_INIT_SCOPE
))
442 if (s
->state
== SCOPE_FAILED
)
445 /* We can't fulfill this right now, please try again later */
446 if (IN_SET(s
->state
, SCOPE_STOP_SIGTERM
, SCOPE_STOP_SIGKILL
))
449 assert(s
->state
== SCOPE_DEAD
);
451 if (!u
->transient
&& !MANAGER_IS_RELOADING(u
->manager
))
454 (void) unit_realize_cgroup(u
);
455 (void) unit_reset_accounting(u
);
457 /* We check only for User= option to keep behavior consistent with logic for service units,
458 * i.e. having 'Delegate=true Group=foo' w/o specifying User= has no effect. */
459 if (s
->user
&& unit_cgroup_delegate(u
))
460 return scope_enter_start_chown(s
);
462 return scope_enter_running(s
);
465 static int scope_stop(Unit
*u
) {
466 Scope
*s
= ASSERT_PTR(SCOPE(u
));
468 if (IN_SET(s
->state
, SCOPE_STOP_SIGTERM
, SCOPE_STOP_SIGKILL
))
471 assert(IN_SET(s
->state
, SCOPE_RUNNING
, SCOPE_ABANDONED
));
473 scope_enter_signal(s
, SCOPE_STOP_SIGTERM
, SCOPE_SUCCESS
);
477 static void scope_reset_failed(Unit
*u
) {
478 Scope
*s
= ASSERT_PTR(SCOPE(u
));
480 if (s
->state
== SCOPE_FAILED
)
481 scope_set_state(s
, SCOPE_DEAD
);
483 s
->result
= SCOPE_SUCCESS
;
486 static int scope_get_timeout(Unit
*u
, usec_t
*timeout
) {
487 Scope
*s
= ASSERT_PTR(SCOPE(u
));
491 if (!s
->timer_event_source
)
494 r
= sd_event_source_get_time(s
->timer_event_source
, &t
);
497 if (t
== USEC_INFINITY
)
504 static int scope_serialize(Unit
*u
, FILE *f
, FDSet
*fds
) {
505 Scope
*s
= ASSERT_PTR(SCOPE(u
));
511 (void) serialize_item(f
, "state", scope_state_to_string(s
->state
));
512 (void) serialize_bool(f
, "was-abandoned", s
->was_abandoned
);
515 (void) serialize_item(f
, "controller", s
->controller
);
517 SET_FOREACH(pid
, u
->pids
)
518 serialize_pidref(f
, fds
, "pids", pid
);
523 static int scope_deserialize_item(Unit
*u
, const char *key
, const char *value
, FDSet
*fds
) {
524 Scope
*s
= ASSERT_PTR(SCOPE(u
));
531 if (streq(key
, "state")) {
534 state
= scope_state_from_string(value
);
536 log_unit_debug(u
, "Failed to parse state value: %s", value
);
538 s
->deserialized_state
= state
;
540 } else if (streq(key
, "was-abandoned")) {
543 k
= parse_boolean(value
);
545 log_unit_debug(u
, "Failed to parse boolean value: %s", value
);
547 s
->was_abandoned
= k
;
548 } else if (streq(key
, "controller")) {
550 r
= free_and_strdup(&s
->controller
, value
);
554 } else if (streq(key
, "pids")) {
555 _cleanup_(pidref_done
) PidRef pidref
= PIDREF_NULL
;
557 /* We don't check if we already received the pid before here because unit_watch_pidref()
558 * does this check internally and discards the new pidref if we already received it before. */
559 if (deserialize_pidref(fds
, value
, &pidref
) >= 0) {
560 r
= unit_watch_pidref(u
, &pidref
, /* exclusive= */ false);
562 log_unit_debug(u
, "Failed to watch PID, ignoring: %s", value
);
565 log_unit_debug(u
, "Unknown serialization key: %s", key
);
570 static void scope_notify_cgroup_empty_event(Unit
*u
) {
571 Scope
*s
= ASSERT_PTR(SCOPE(u
));
573 log_unit_debug(u
, "cgroup is empty");
575 if (IN_SET(s
->state
, SCOPE_RUNNING
, SCOPE_ABANDONED
, SCOPE_STOP_SIGTERM
, SCOPE_STOP_SIGKILL
))
576 scope_enter_dead(s
, SCOPE_SUCCESS
);
579 static void scope_notify_cgroup_oom_event(Unit
*u
, bool managed_oom
) {
580 Scope
*s
= ASSERT_PTR(SCOPE(u
));
583 log_unit_debug(u
, "Process(es) of control group were killed by systemd-oomd.");
585 log_unit_debug(u
, "Process of control group was killed by the OOM killer.");
587 if (s
->oom_policy
== OOM_CONTINUE
)
592 case SCOPE_START_CHOWN
:
594 scope_enter_signal(s
, SCOPE_STOP_SIGTERM
, SCOPE_FAILURE_OOM_KILL
);
597 case SCOPE_STOP_SIGTERM
:
598 scope_enter_signal(s
, SCOPE_STOP_SIGKILL
, SCOPE_FAILURE_OOM_KILL
);
601 case SCOPE_STOP_SIGKILL
:
602 if (s
->result
== SCOPE_SUCCESS
)
603 s
->result
= SCOPE_FAILURE_OOM_KILL
;
605 /* SCOPE_DEAD, SCOPE_ABANDONED, and SCOPE_FAILED end up in default */
611 static void scope_sigchld_event(Unit
*u
, pid_t pid
, int code
, int status
) {
612 Scope
*s
= ASSERT_PTR(SCOPE(u
));
614 if (s
->state
== SCOPE_START_CHOWN
) {
615 if (!is_clean_exit(code
, status
, EXIT_CLEAN_COMMAND
, NULL
))
616 scope_enter_dead(s
, SCOPE_FAILURE_RESOURCES
);
618 scope_enter_running(s
);
623 static int scope_dispatch_timer(sd_event_source
*source
, usec_t usec
, void *userdata
) {
624 Scope
*s
= ASSERT_PTR(SCOPE(userdata
));
626 assert(s
->timer_event_source
== source
);
631 log_unit_warning(UNIT(s
), "Scope reached runtime time limit. Stopping.");
632 scope_enter_signal(s
, SCOPE_STOP_SIGTERM
, SCOPE_FAILURE_TIMEOUT
);
635 case SCOPE_STOP_SIGTERM
:
636 if (s
->kill_context
.send_sigkill
) {
637 log_unit_warning(UNIT(s
), "Stopping timed out. Killing.");
638 scope_enter_signal(s
, SCOPE_STOP_SIGKILL
, SCOPE_FAILURE_TIMEOUT
);
640 log_unit_warning(UNIT(s
), "Stopping timed out. Skipping SIGKILL.");
641 scope_enter_dead(s
, SCOPE_FAILURE_TIMEOUT
);
646 case SCOPE_STOP_SIGKILL
:
647 log_unit_warning(UNIT(s
), "Still around after SIGKILL. Ignoring.");
648 scope_enter_dead(s
, SCOPE_FAILURE_TIMEOUT
);
651 case SCOPE_START_CHOWN
:
652 log_unit_warning(UNIT(s
), "User lookup timed out. Entering failed state.");
653 scope_enter_dead(s
, SCOPE_FAILURE_TIMEOUT
);
657 assert_not_reached();
663 int scope_abandon(Scope
*s
) {
666 if (unit_has_name(UNIT(s
), SPECIAL_INIT_SCOPE
))
669 if (!IN_SET(s
->state
, SCOPE_RUNNING
, SCOPE_ABANDONED
))
672 s
->was_abandoned
= true;
674 s
->controller
= mfree(s
->controller
);
675 s
->controller_track
= sd_bus_track_unref(s
->controller_track
);
677 scope_set_state(s
, SCOPE_ABANDONED
);
682 static UnitActiveState
scope_active_state(Unit
*u
) {
683 Scope
*s
= ASSERT_PTR(SCOPE(u
));
685 return state_translation_table
[s
->state
];
688 static const char *scope_sub_state_to_string(Unit
*u
) {
689 Scope
*s
= ASSERT_PTR(SCOPE(u
));
691 return scope_state_to_string(s
->state
);
694 static void scope_enumerate_perpetual(Manager
*m
) {
700 /* Let's unconditionally add the "init.scope" special unit
701 * that encapsulates PID 1. Note that PID 1 already is in the
702 * cgroup for this, we hence just need to allocate the object
703 * for it and that's it. */
705 u
= manager_get_unit(m
, SPECIAL_INIT_SCOPE
);
707 r
= unit_new_for_name(m
, sizeof(Scope
), SPECIAL_INIT_SCOPE
, &u
);
709 return (void) log_error_errno(r
, "Failed to allocate the special %s unit: %m",
715 SCOPE(u
)->deserialized_state
= SCOPE_RUNNING
;
717 unit_add_to_load_queue(u
);
718 unit_add_to_dbus_queue(u
);
719 /* Enqueue an explicit cgroup realization here. Unlike other cgroups this one already exists and is
720 * populated (by us, after all!) already, even when we are not in a reload cycle. Hence we cannot
721 * apply the settings at creation time anymore, but let's at least apply them asynchronously. */
722 unit_add_to_cgroup_realize_queue(u
);
725 static const char* const scope_result_table
[_SCOPE_RESULT_MAX
] = {
726 [SCOPE_SUCCESS
] = "success",
727 [SCOPE_FAILURE_RESOURCES
] = "resources",
728 [SCOPE_FAILURE_TIMEOUT
] = "timeout",
729 [SCOPE_FAILURE_OOM_KILL
] = "oom-kill",
732 DEFINE_STRING_TABLE_LOOKUP(scope_result
, ScopeResult
);
734 const UnitVTable scope_vtable
= {
735 .object_size
= sizeof(Scope
),
736 .cgroup_context_offset
= offsetof(Scope
, cgroup_context
),
737 .kill_context_offset
= offsetof(Scope
, kill_context
),
738 .cgroup_runtime_offset
= offsetof(Scope
, cgroup_runtime
),
744 .private_section
= "Scope",
746 .can_transient
= true,
747 .can_delegate
= true,
750 .can_set_managed_oom
= true,
756 .coldplug
= scope_coldplug
,
760 .start
= scope_start
,
763 .freezer_action
= unit_cgroup_freezer_action
,
765 .get_timeout
= scope_get_timeout
,
767 .serialize
= scope_serialize
,
768 .deserialize_item
= scope_deserialize_item
,
770 .active_state
= scope_active_state
,
771 .sub_state_to_string
= scope_sub_state_to_string
,
773 .sigchld_event
= scope_sigchld_event
,
775 .reset_failed
= scope_reset_failed
,
777 .notify_cgroup_empty
= scope_notify_cgroup_empty_event
,
778 .notify_cgroup_oom
= scope_notify_cgroup_oom_event
,
780 .bus_set_property
= bus_scope_set_property
,
781 .bus_commit_properties
= bus_scope_commit_properties
,
783 .enumerate_perpetual
= scope_enumerate_perpetual
,