From 0f23564ad4a191a92bc5544edf800bb2cfbb3513 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 May 2025 17:38:26 +0200 Subject: [PATCH] pid1: add ability to kill processes in a subgroup of a unit This is useful for things like machined, where the system machined wants to manage a machine owned by the user somewhere down the tree. --- man/org.freedesktop.systemd1.xml | 79 ++++++++++++++++++-------- man/systemctl.xml | 17 +++--- src/core/dbus-manager.c | 11 ++++ src/core/dbus-unit.c | 60 ++++++++++++++++++- src/core/dbus-unit.h | 1 + src/core/kill.c | 2 + src/core/kill.h | 2 + src/core/org.freedesktop.systemd1.conf | 8 +++ src/core/unit.c | 49 +++++++++++----- src/core/unit.h | 2 +- src/test/test-execute.c | 2 +- 11 files changed, 183 insertions(+), 50 deletions(-) diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index 27c3929a9cc..d5f270c6817 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -112,6 +112,10 @@ node /org/freedesktop/systemd1 { KillUnit(in s name, in s whom, in i signal); + KillUnitSubgroup(in s name, + in s whom, + in s subgroup, + in i signal); QueueSignalUnit(in s name, in s whom, in i signal, @@ -831,6 +835,8 @@ node /org/freedesktop/systemd1 { + + @@ -1315,12 +1321,24 @@ node /org/freedesktop/systemd1 { KillUnit() may be used to kill (i.e. send a signal to) all processes of a unit. It takes the unit name, an enum who and a UNIX signal number to send. The who enum is one of - main, control or all. If - main, only the main process of the unit is killed. If control, only - the control process of the unit is killed. If all, all processes are killed. A + main, control, cgroup or + all. If main, only the main process of the unit is killed. If + control, only the control process of the unit is killed. If + cgroup is specified only the processes in the control group of the unit are killed, + which might or might not include the main and control processes too. If all, all + processes are killed, i.e. the main process, the control process and those in the control group. A control process is for example a process that is configured via ExecStop= and is spawned in parallel to the main daemon process in order to shut it - down. + down. The value may be suffixed by -fail in which case the operation will fail of no + matching process was found (otherwise it will return successfully, executing no operation). + + KillUnitSubgroup() is just like KillUnit() but takes an + additional path argument that selects a sub-control-group of the unit's control group. Only processes + in that subgroup are killed. The path my be specified with our without leading /, in + both cases it is taken relatively to the unit's control group. If the subgroup path is specified as an + empty string or as / it has the same effect as KillUnit(). If + it is specified as anything else the who parameter must be set to either + cgroup or cgroup-fail. QueueSignalUnit() is similar to KillUnit() but may be used to enqueue a POSIX Realtime Signal (i.e. SIGRTMIN+… and @@ -1886,8 +1904,8 @@ node /org/freedesktop/systemd1 { Read access is generally granted to all clients. Additionally, for unprivileged clients, some operations are allowed through the polkit privilege system. Operations which modify unit state (StartUnit(), StopUnit(), KillUnit(), - QueueSignalUnit(), RestartUnit() and similar, - SetProperty()) require + KillUnitSubgroup(), QueueSignalUnit(), + RestartUnit() and similar, SetProperty()) require org.freedesktop.systemd1.manage-units. Operations which modify unit file enablement state (EnableUnitFiles(), DisableUnitFiles(), EnableUnitFilesWithFlags(), DisableUnitFilesWithFlags(), @@ -1936,6 +1954,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { out a(uosos) affected_jobs); Kill(in s whom, in i signal); + KillSubgroup(in s subgroup, + in i signal); QueueSignal(in s whom, in i signal, in i value); @@ -2259,6 +2279,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -2487,13 +2509,13 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { Start(), Stop(), Reload(), Restart(), TryRestart(), ReloadOrRestart(), ReloadOrTryRestart(), - Kill(), QueueSignal(), ResetFailed(), - and SetProperties() implement the same operation as the respective methods on the - Manager object (see above). However, these methods operate on the unit - object and hence do not take a unit name parameter. Invoking the methods directly on the Manager object - has the advantage of not requiring a GetUnit() call to get the unit object for a - specific unit name. Calling the methods on the Manager object is hence a round trip - optimization. + Kill(), KillSubgroup(), QueueSignal(), + ResetFailed(), and SetProperties() implement the same + operation as the respective methods on the Manager object (see + above). However, these methods operate on the unit object and hence do not take a unit name + parameter. Invoking the methods directly on the Manager object has the advantage of not requiring a + GetUnit() call to get the unit object for a specific unit name. Calling the + methods on the Manager object is hence a round trip optimization. @@ -12224,7 +12246,8 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ ShutdownStartTimestamp, ShutdownStartTimestampMonotonic, and SoftRebootsCount were added in version 256. - RemoveSubgroupFromUnit() was added in version 258. + RemoveSubgroupFromUnit(), and + KillUnitSubgroup() were added in version 258. Unit Objects @@ -12301,8 +12324,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ CacheDirectoryQuotaUsage, CacheDirectoryAccounting, LogsDirectoryQuota, - LogsDirectoryQuotaUsage, and - LogsDirectoryAccounting, were added in version 258. + LogsDirectoryQuotaUsage, + LogsDirectoryAccounting, and + KillSubgroup() were added in version 258. Socket Unit Objects @@ -12360,8 +12384,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ CacheDirectoryQuotaUsage, CacheDirectoryAccounting, LogsDirectoryQuota, - LogsDirectoryQuotaUsage, and - LogsDirectoryAccounting, were added in version 258. + LogsDirectoryQuotaUsage, + LogsDirectoryAccounting, and + KillSubgroup() were added in version 258. Mount Unit Objects @@ -12414,8 +12439,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ CacheDirectoryQuotaUsage, CacheDirectoryAccounting, LogsDirectoryQuota, - LogsDirectoryQuotaUsage, and - LogsDirectoryAccounting, were added in version 258. + LogsDirectoryQuotaUsage, + LogsDirectoryAccounting, and + KillSubgroup() were added in version 258. Swap Unit Objects @@ -12466,8 +12492,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ CacheDirectoryQuotaUsage, CacheDirectoryAccounting, LogsDirectoryQuota, - LogsDirectoryQuotaUsage, and - LogsDirectoryAccounting, were added in version 258. + LogsDirectoryQuotaUsage, + LogsDirectoryAccounting, and + KillSubgroup() were added in version 258. Slice Unit Objects @@ -12495,8 +12522,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ ManagedOOMMemoryPressureDurationUSec was added in version 257. ConcurrencyHardMax, ConcurrencySoftMax, - NCurrentlyActive and - RemoveSubgroup() were added in version 258. + NCurrentlyActive, + RemoveSubgroup(), and + KillSubgroup() were added in version 258. Scope Unit Objects @@ -12523,7 +12551,8 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ EffectiveTasksMax, and MemoryZSwapWriteback were added in version 256. ManagedOOMMemoryPressureDurationUSec was added in version 257. - RemoveSubgroup() was added in version 258. + RemoveSubgroup() and + KillSubgroup() were added in version 258. Job Objects diff --git a/man/systemctl.xml b/man/systemctl.xml index 08cfb5421b8..9678be018e8 100644 --- a/man/systemctl.xml +++ b/man/systemctl.xml @@ -2481,14 +2481,15 @@ Jan 12 10:46:45 example.com bluetoothd[8900]: gatt-time-server: Input/output err When used with kill, choose which processes to send a UNIX process signal - to. Must be one of , or to - select whether to kill only the main process, the control process or all processes of the unit. The - main process of the unit is the one that defines the life-time of it. A control process of a unit - is one that is invoked by the manager to induce state changes of it. For example, all processes - started due to the ExecStartPre=, ExecStop= or - ExecReload= settings of service units are control processes. Note that there is - only one control process per unit at a time, as only one state change is executed at a time. For - services of type Type=forking, the initial process started by the manager for + to. Must be one of , , or + to select whether to kill only the main process, the control process, all + processes in the unit's control group or all processes of the unit. The main process of the unit is + the one that defines the life-time of it. A control process of a unit is one that is invoked by the + manager to induce state changes of it. For example, all processes started due to the + ExecStartPre=, ExecStop= or ExecReload= + settings of service units are control processes. Note that there is only one control process per + unit at a time, as only one state change is executed at a time. For services of type + Type=forking, the initial process started by the manager for ExecStart= is a control process, while the process ultimately forked off by that one is then considered the main process of the unit (if it can be determined). This is different for service units of other types, where the process forked off by the manager for diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index 330d3cea2ea..1aca5ad21fe 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -831,6 +831,12 @@ static int method_kill_unit(sd_bus_message *message, void *userdata, sd_bus_erro return method_generic_unit_operation(message, userdata, error, bus_unit_method_kill, 0); } +static int method_kill_unit_subgroup(sd_bus_message *message, void *userdata, sd_bus_error *error) { + /* We don't bother with GENERIC_UNIT_LOAD nor GENERIC_UNIT_VALIDATE_LOADED here, as it shouldn't + * matter whether a unit is loaded for killing any processes possibly in the unit's cgroup. */ + return method_generic_unit_operation(message, userdata, error, bus_unit_method_kill_subgroup, 0); +} + static int method_clean_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) { /* Load the unit if necessary, in order to load it, and insist on the unit being loaded to be * cleaned */ @@ -3025,6 +3031,11 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_NO_RESULT, method_kill_unit, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD_WITH_ARGS("KillUnitSubgroup", + SD_BUS_ARGS("s", name, "s", whom, "s", subgroup, "i", signal), + SD_BUS_NO_RESULT, + method_kill_unit_subgroup, + SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD_WITH_ARGS("QueueSignalUnit", SD_BUS_ARGS("s", name, "s", whom, "i", signal, "i", value), SD_BUS_NO_RESULT, diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index dc5fbfbf2a8..f212324e56f 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -572,7 +572,60 @@ int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error * if (r == 0) return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ - r = unit_kill(u, whom, signo, code, value, error); + r = unit_kill(u, whom, /* subgroup= */ NULL, signo, code, value, error); + if (r < 0) + return r; + + return sd_bus_reply_method_return(message, NULL); +} + +int bus_unit_method_kill_subgroup(sd_bus_message *message, void *userdata, sd_bus_error *error) { + Unit *u = ASSERT_PTR(userdata); + int r; + + assert(message); + + r = mac_selinux_unit_access_check(u, message, "stop", error); + if (r < 0) + return r; + + const char *swhom, *subgroup; + int32_t signo; + r = sd_bus_message_read(message, "ssi", &swhom, &subgroup, &signo); + if (r < 0) + return r; + + KillWhom whom; + if (isempty(swhom)) + whom = KILL_CGROUP; + else { + whom = kill_whom_from_string(swhom); + if (whom < 0) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid whom argument: %s", swhom); + } + + if (isempty(subgroup)) + subgroup = NULL; + else if (!path_is_normalized(subgroup)) + return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Specified cgroup sub-path is not valid."); + else if (!IN_SET(whom, KILL_CGROUP, KILL_CGROUP_FAIL)) + return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Subgroup can only be specified in combination with 'cgroup' or 'cgroup-fail'."); + + if (!SIGNAL_VALID(signo)) + return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Signal number out of range."); + + r = bus_verify_manage_units_async_full( + u, + "kill-subgroup", + N_("Authentication is required to send a UNIX signal to the processes of subgroup of '$(unit)'."), + message, + error); + if (r < 0) + return r; + if (r == 0) + return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ + + r = unit_kill(u, whom, subgroup, signo, SI_USER, /* value= */ 0, error); if (r < 0) return r; @@ -986,6 +1039,11 @@ const sd_bus_vtable bus_unit_vtable[] = { SD_BUS_NO_RESULT, bus_unit_method_kill, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD_WITH_ARGS("KillSubgroup", + SD_BUS_ARGS("s", subgroup, "i", signal), + SD_BUS_NO_RESULT, + bus_unit_method_kill_subgroup, + SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD_WITH_ARGS("QueueSignal", SD_BUS_ARGS("s", whom, "i", signal, "i", value), SD_BUS_NO_RESULT, diff --git a/src/core/dbus-unit.h b/src/core/dbus-unit.h index 9b879b11beb..97278931e0c 100644 --- a/src/core/dbus-unit.h +++ b/src/core/dbus-unit.h @@ -16,6 +16,7 @@ void bus_unit_send_removed_signal(Unit *u); int bus_unit_method_start_generic(sd_bus_message *message, Unit *u, JobType job_type, bool reload_if_possible, sd_bus_error *error); int bus_unit_method_enqueue_job(sd_bus_message *message, void *userdata, sd_bus_error *error); int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error); +int bus_unit_method_kill_subgroup(sd_bus_message *message, void *userdata, sd_bus_error *error); int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus_error *error); int bus_unit_set_properties(Unit *u, sd_bus_message *message, UnitWriteFlags flags, bool commit, sd_bus_error *error); diff --git a/src/core/kill.c b/src/core/kill.c index 07f6ecc97e9..88c0edfa271 100644 --- a/src/core/kill.c +++ b/src/core/kill.c @@ -54,6 +54,8 @@ static const char* const kill_whom_table[_KILL_WHOM_MAX] = { [KILL_MAIN_FAIL] = "main-fail", [KILL_CONTROL_FAIL] = "control-fail", [KILL_ALL_FAIL] = "all-fail", + [KILL_CGROUP] = "cgroup", + [KILL_CGROUP_FAIL] = "cgroup-fail", }; DEFINE_STRING_TABLE_LOOKUP(kill_whom, KillWhom); diff --git a/src/core/kill.h b/src/core/kill.h index fff6e2c2ebe..199945bc8b8 100644 --- a/src/core/kill.h +++ b/src/core/kill.h @@ -31,6 +31,8 @@ typedef enum KillWhom { KILL_MAIN_FAIL, KILL_CONTROL_FAIL, KILL_ALL_FAIL, + KILL_CGROUP, + KILL_CGROUP_FAIL, _KILL_WHOM_MAX, _KILL_WHOM_INVALID = -EINVAL, } KillWhom; diff --git a/src/core/org.freedesktop.systemd1.conf b/src/core/org.freedesktop.systemd1.conf index 2b978a1e770..4d80c051216 100644 --- a/src/core/org.freedesktop.systemd1.conf +++ b/src/core/org.freedesktop.systemd1.conf @@ -246,6 +246,10 @@ send_interface="org.freedesktop.systemd1.Manager" send_member="KillUnit"/> + + @@ -410,6 +414,10 @@ send_interface="org.freedesktop.systemd1.Unit" send_member="Kill"/> + + diff --git a/src/core/unit.c b/src/core/unit.c index 1feb5226554..14c8c39bbbc 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -4041,6 +4041,7 @@ static int unit_kill_one( int unit_kill( Unit *u, KillWhom whom, + const char *subgroup, int signo, int code, int value, @@ -4060,11 +4061,19 @@ int unit_kill( assert(SIGNAL_VALID(signo)); assert(IN_SET(code, SI_USER, SI_QUEUE)); + if (subgroup) { + if (!IN_SET(whom, KILL_CGROUP, KILL_CGROUP_FAIL)) + return sd_bus_error_set(ret_error, SD_BUS_ERROR_NOT_SUPPORTED, "Killing by subgroup is only supported for 'cgroup' or 'cgroup-kill' modes."); + + if (!unit_cgroup_delegate(u)) + return sd_bus_error_set(ret_error, SD_BUS_ERROR_NOT_SUPPORTED, "Killing by subgroup is only available for units with control group delegation enabled."); + } + main_pid = unit_main_pid(u); control_pid = unit_control_pid(u); if (!UNIT_HAS_CGROUP_CONTEXT(u) && !main_pid && !control_pid) - return sd_bus_error_setf(ret_error, SD_BUS_ERROR_NOT_SUPPORTED, "Unit type does not support process killing."); + return sd_bus_error_set(ret_error, SD_BUS_ERROR_NOT_SUPPORTED, "Unit type does not support process killing."); if (IN_SET(whom, KILL_MAIN, KILL_MAIN_FAIL)) { if (!main_pid) @@ -4095,46 +4104,58 @@ int unit_kill( /* Note: if we shall enqueue rather than kill we won't do this via the cgroup mechanism, since it * doesn't really make much sense (and given that enqueued values are a relatively expensive * resource, and we shouldn't allow us to be subjects for such allocation sprees) */ - if (IN_SET(whom, KILL_ALL, KILL_ALL_FAIL) && code == SI_USER) { + if (IN_SET(whom, KILL_ALL, KILL_ALL_FAIL, KILL_CGROUP, KILL_CGROUP_FAIL) && code == SI_USER) { CGroupRuntime *crt = unit_get_cgroup_runtime(u); if (crt && crt->cgroup_path) { _cleanup_set_free_ Set *pid_set = NULL; + _cleanup_free_ char *joined = NULL; + const char *p; + + if (empty_or_root(subgroup)) + p = crt->cgroup_path; + else { + joined = path_join(crt->cgroup_path, subgroup); + if (!joined) + return -ENOMEM; + + p = joined; + } if (signo == SIGKILL) { - r = cg_kill_kernel_sigkill(crt->cgroup_path); + r = cg_kill_kernel_sigkill(p); if (r >= 0) { killed = true; - log_unit_info(u, "Killed unit cgroup with SIGKILL on client request."); + log_unit_info(u, "Killed unit cgroup '%s' with SIGKILL on client request.", p); goto finish; } if (r != -EOPNOTSUPP) { if (ret >= 0) sd_bus_error_set_errnof(ret_error, r, "Failed to kill unit cgroup: %m"); - RET_GATHER(ret, log_unit_warning_errno(u, r, "Failed to kill unit cgroup: %m")); + RET_GATHER(ret, log_unit_warning_errno(u, r, "Failed to kill unit cgroup '%s': %m", p)); goto finish; } /* Fall back to manual enumeration */ - } else { - /* Exclude the main/control pids from being killed via the cgroup if - * not SIGKILL */ + } else if (IN_SET(whom, KILL_ALL, KILL_ALL_FAIL)) { + /* Exclude the main/control pids from being killed via the cgroup if not + * SIGKILL */ r = unit_pid_set(u, &pid_set); if (r < 0) return log_oom(); } - r = cg_kill_recursive(crt->cgroup_path, signo, 0, pid_set, kill_common_log, u); + r = cg_kill_recursive(p, signo, /* flags= */ 0, pid_set, kill_common_log, u); if (r < 0 && !IN_SET(r, -ESRCH, -ENOENT)) { if (ret >= 0) sd_bus_error_set_errnof( ret_error, r, - "Failed to send signal SIG%s to auxiliary processes: %m", - signal_to_string(signo)); + "Failed to send signal SIG%s to processes in unit cgroup '%s': %m", + signal_to_string(signo), p); RET_GATHER(ret, log_unit_warning_errno( u, r, - "Failed to send signal SIG%s to auxiliary processes on client request: %m", - signal_to_string(signo))); + "Failed to send signal SIG%s to processes in unit cgroup '%s' on client request: %m", + signal_to_string(signo), p)); } killed = killed || r > 0; } @@ -4142,7 +4163,7 @@ int unit_kill( finish: /* If the "fail" versions of the operation are requested, then complain if the set of processes we killed is empty */ - if (ret >= 0 && !killed && IN_SET(whom, KILL_ALL_FAIL, KILL_CONTROL_FAIL, KILL_MAIN_FAIL)) + if (ret >= 0 && !killed && IN_SET(whom, KILL_ALL_FAIL, KILL_CONTROL_FAIL, KILL_MAIN_FAIL, KILL_CGROUP_FAIL)) return sd_bus_error_set_const(ret_error, BUS_ERROR_NO_SUCH_PROCESS, "No matching processes to kill"); return ret; diff --git a/src/core/unit.h b/src/core/unit.h index 6e0b58e62e3..62652540bc0 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -886,7 +886,7 @@ int unit_start(Unit *u, ActivationDetails *details); int unit_stop(Unit *u); int unit_reload(Unit *u); -int unit_kill(Unit *u, KillWhom w, int signo, int code, int value, sd_bus_error *ret_error); +int unit_kill(Unit *u, KillWhom w, const char *subgroup, int signo, int code, int value, sd_bus_error *ret_error); void unit_notify_cgroup_oom(Unit *u, bool managed_oom); diff --git a/src/test/test-execute.c b/src/test/test-execute.c index 23c1fe2eedb..4dad93cfb4f 100644 --- a/src/test/test-execute.c +++ b/src/test/test-execute.c @@ -63,7 +63,7 @@ static int time_handler(sd_event_source *s, uint64_t usec, void *userdata) { int r; log_error("Test timeout when testing %s", unit->id); - r = unit_kill(unit, KILL_ALL, SIGKILL, SI_USER, 0, NULL); + r = unit_kill(unit, KILL_ALL, /* subgroup= */ NULL, SIGKILL, SI_USER, /* value= */ 0, /* ret_error= */ NULL); if (r < 0) log_error_errno(r, "Failed to kill %s, ignoring: %m", unit->id); -- 2.47.3