#include <unistd.h>
#include "alloc-util.h"
+#include "cgroup-setup.h"
#include "dbus-scope.h"
#include "dbus-unit.h"
+#include "exit-status.h"
#include "load-dropin.h"
#include "log.h"
#include "process-util.h"
#include "strv.h"
#include "unit-name.h"
#include "unit.h"
+#include "user-util.h"
static const UnitActiveState state_translation_table[_SCOPE_STATE_MAX] = {
[SCOPE_DEAD] = UNIT_INACTIVE,
+ [SCOPE_START_CHOWN] = UNIT_ACTIVATING,
[SCOPE_RUNNING] = UNIT_ACTIVE,
[SCOPE_ABANDONED] = UNIT_ACTIVE,
[SCOPE_STOP_SIGTERM] = UNIT_DEACTIVATING,
s->runtime_max_usec = USEC_INFINITY;
s->timeout_stop_usec = u->manager->default_timeout_stop_usec;
u->ignore_on_isolate = true;
+ s->user = s->group = NULL;
+ s->oom_policy = _OOM_POLICY_INVALID;
}
static void scope_done(Unit *u) {
s->controller_track = sd_bus_track_unref(s->controller_track);
s->timer_event_source = sd_event_source_disable_unref(s->timer_event_source);
+
+ s->user = mfree(s->user);
+ s->group = mfree(s->group);
}
static usec_t scope_running_timeout(Scope *s) {
old_state = s->state;
s->state = state;
- if (!IN_SET(state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
+ if (!IN_SET(state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL, SCOPE_START_CHOWN, SCOPE_RUNNING))
s->timer_event_source = sd_event_source_disable_unref(s->timer_event_source);
if (IN_SET(state, SCOPE_DEAD, SCOPE_FAILED)) {
if (state != old_state)
log_debug("%s changed %s -> %s", UNIT(s)->id, scope_state_to_string(old_state), scope_state_to_string(state));
- unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], 0);
+ unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], /* reload_success = */ true);
}
static int scope_add_default_dependencies(Scope *s) {
if (r < 0)
return r;
+ if (s->oom_policy < 0)
+ s->oom_policy = s->cgroup_context.delegate ? OOM_CONTINUE : UNIT(s)->manager->default_oom_policy;
+
+ s->cgroup_context.memory_oom_group = s->oom_policy == OOM_KILL;
+
return scope_add_default_dependencies(s);
}
"%sScope State: %s\n"
"%sResult: %s\n"
"%sRuntimeMaxSec: %s\n"
- "%sRuntimeRandomizedExtraSec: %s\n",
+ "%sRuntimeRandomizedExtraSec: %s\n"
+ "%sOOMPolicy: %s\n",
prefix, scope_state_to_string(s->state),
prefix, scope_result_to_string(s->result),
prefix, FORMAT_TIMESPAN(s->runtime_max_usec, USEC_PER_SEC),
- prefix, FORMAT_TIMESPAN(s->runtime_rand_extra_usec, USEC_PER_SEC));
+ prefix, FORMAT_TIMESPAN(s->runtime_rand_extra_usec, USEC_PER_SEC),
+ prefix, oom_policy_to_string(s->oom_policy));
cgroup_context_dump(UNIT(s), f, prefix);
kill_context_dump(&s->kill_context, f, prefix);
scope_enter_dead(s, SCOPE_FAILURE_RESOURCES);
}
-static int scope_start(Unit *u) {
- Scope *s = SCOPE(u);
+static int scope_enter_start_chown(Scope *s) {
+ Unit *u = UNIT(s);
+ pid_t pid;
int r;
assert(s);
+ assert(s->user);
- if (unit_has_name(u, SPECIAL_INIT_SCOPE))
- return -EPERM;
+ r = scope_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), u->manager->default_timeout_start_usec));
+ if (r < 0)
+ return r;
- if (s->state == SCOPE_FAILED)
- return -EPERM;
+ r = unit_fork_helper_process(u, "(sd-chown-cgroup)", &pid);
+ if (r < 0)
+ goto fail;
- /* We can't fulfill this right now, please try again later */
- if (IN_SET(s->state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
- return -EAGAIN;
+ if (r == 0) {
+ uid_t uid = UID_INVALID;
+ gid_t gid = GID_INVALID;
- assert(s->state == SCOPE_DEAD);
+ if (!isempty(s->user)) {
+ const char *user = s->user;
- if (!u->transient && !MANAGER_IS_RELOADING(u->manager))
- return -ENOENT;
+ r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
+ if (r < 0) {
+ log_unit_error_errno(UNIT(s), r, "Failed to resolve user \"%s\": %m", user);
+ _exit(EXIT_USER);
+ }
+ }
+
+ if (!isempty(s->group)) {
+ const char *group = s->group;
+
+ r = get_group_creds(&group, &gid, 0);
+ if (r < 0) {
+ log_unit_error_errno(UNIT(s), r, "Failed to resolve group \"%s\": %m", group);
+ _exit(EXIT_GROUP);
+ }
+ }
+
+ r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, uid, gid);
+ if (r < 0) {
+ log_unit_error_errno(UNIT(s), r, "Failed to adjust control group access: %m");
+ _exit(EXIT_CGROUP);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ r = unit_watch_pid(UNIT(s), pid, true);
+ if (r < 0)
+ goto fail;
+
+ scope_set_state(s, SCOPE_START_CHOWN);
+
+ return 1;
+fail:
+ s->timer_event_source = sd_event_source_disable_unref(s->timer_event_source);
+ return r;
+}
+
+static int scope_enter_running(Scope *s) {
+ Unit *u = UNIT(s);
+ int r;
+
+ assert(s);
(void) bus_scope_track_controller(s);
if (r < 0)
return r;
- (void) unit_realize_cgroup(u);
- (void) unit_reset_accounting(u);
-
unit_export_state_files(u);
r = unit_attach_pids_to_cgroup(u, u->pids, NULL);
return 1;
}
+static int scope_start(Unit *u) {
+ Scope *s = SCOPE(u);
+
+ assert(s);
+
+ if (unit_has_name(u, SPECIAL_INIT_SCOPE))
+ return -EPERM;
+
+ if (s->state == SCOPE_FAILED)
+ return -EPERM;
+
+ /* We can't fulfill this right now, please try again later */
+ if (IN_SET(s->state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
+ return -EAGAIN;
+
+ assert(s->state == SCOPE_DEAD);
+
+ if (!u->transient && !MANAGER_IS_RELOADING(u->manager))
+ return -ENOENT;
+
+ (void) unit_realize_cgroup(u);
+ (void) unit_reset_accounting(u);
+
+ /* We check only for User= option to keep behavior consistent with logic for service units,
+ * i.e. having 'Delegate=true Group=foo' w/o specifying User= has no effect. */
+ if (s->user && unit_cgroup_delegate(u))
+ return scope_enter_start_chown(s);
+
+ return scope_enter_running(s);
+}
+
static int scope_stop(Unit *u) {
Scope *s = SCOPE(u);
s->result = SCOPE_SUCCESS;
}
-static int scope_kill(Unit *u, KillWho who, int signo, sd_bus_error *error) {
- return unit_kill_common(u, who, signo, -1, -1, error);
+static int scope_kill(Unit *u, KillWho who, int signo, int code, int value, sd_bus_error *error) {
+ return unit_kill_common(u, who, signo, code, value, -1, -1, error);
}
static int scope_get_timeout(Unit *u, usec_t *timeout) {
if (IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
scope_enter_dead(s, SCOPE_SUCCESS);
+}
- /* If the cgroup empty notification comes when the unit is not active, we must have failed to clean
- * up the cgroup earlier and should do it now. */
- if (IN_SET(s->state, SCOPE_DEAD, SCOPE_FAILED))
- unit_prune_cgroup(u);
+static void scope_notify_cgroup_oom_event(Unit *u, bool managed_oom) {
+ Scope *s = SCOPE(u);
+
+ if (managed_oom)
+ log_unit_debug(u, "Process(es) of control group were killed by systemd-oomd.");
+ else
+ log_unit_debug(u, "Process of control group was killed by the OOM killer.");
+
+ if (s->oom_policy == OOM_CONTINUE)
+ return;
+
+ switch (s->state) {
+
+ case SCOPE_START_CHOWN:
+ case SCOPE_RUNNING:
+ scope_enter_signal(s, SCOPE_STOP_SIGTERM, SCOPE_FAILURE_OOM_KILL);
+ break;
+
+ case SCOPE_STOP_SIGTERM:
+ scope_enter_signal(s, SCOPE_STOP_SIGKILL, SCOPE_FAILURE_OOM_KILL);
+ break;
+
+ case SCOPE_STOP_SIGKILL:
+ if (s->result == SCOPE_SUCCESS)
+ s->result = SCOPE_FAILURE_OOM_KILL;
+ break;
+ /* SCOPE_DEAD, SCOPE_ABANDONED, and SCOPE_FAILED end up in default */
+ default:
+ ;
+ }
}
static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
- assert(u);
+ Scope *s = SCOPE(u);
+
+ assert(s);
+
+ if (s->state == SCOPE_START_CHOWN) {
+ if (!is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
+ scope_enter_dead(s, SCOPE_FAILURE_RESOURCES);
+ else
+ scope_enter_running(s);
+ return;
+ }
/* If we get a SIGCHLD event for one of the processes we were interested in, then we look for others to
* watch, under the assumption that we'll sooner or later get a SIGCHLD for them, as the original
scope_enter_dead(s, SCOPE_FAILURE_TIMEOUT);
break;
+ case SCOPE_START_CHOWN:
+ log_unit_warning(UNIT(s), "User lookup timed out. Entering failed state.");
+ scope_enter_dead(s, SCOPE_FAILURE_TIMEOUT);
+ break;
+
default:
assert_not_reached();
}
return 0;
}
-_pure_ static UnitActiveState scope_active_state(Unit *u) {
+static UnitActiveState scope_active_state(Unit *u) {
assert(u);
return state_translation_table[SCOPE(u)->state];
}
-_pure_ static const char *scope_sub_state_to_string(Unit *u) {
+static const char *scope_sub_state_to_string(Unit *u) {
assert(u);
return scope_state_to_string(SCOPE(u)->state);
unit_add_to_load_queue(u);
unit_add_to_dbus_queue(u);
+ /* Enqueue an explicit cgroup realization here. Unlike other cgroups this one already exists and is
+ * populated (by us, after all!) already, even when we are not in a reload cycle. Hence we cannot
+ * apply the settings at creation time anymore, but let's at least apply them asynchronously. */
+ unit_add_to_cgroup_realize_queue(u);
}
static const char* const scope_result_table[_SCOPE_RESULT_MAX] = {
[SCOPE_SUCCESS] = "success",
[SCOPE_FAILURE_RESOURCES] = "resources",
[SCOPE_FAILURE_TIMEOUT] = "timeout",
+ [SCOPE_FAILURE_OOM_KILL] = "oom-kill",
};
DEFINE_STRING_TABLE_LOOKUP(scope_result, ScopeResult);
.reset_failed = scope_reset_failed,
.notify_cgroup_empty = scope_notify_cgroup_empty_event,
+ .notify_cgroup_oom = scope_notify_cgroup_oom_event,
.bus_set_property = bus_scope_set_property,
.bus_commit_properties = bus_scope_commit_properties,