From: Lennart Poettering Date: Wed, 21 May 2025 15:23:47 +0000 (+0200) Subject: machined: optionally track machines in cgroup subgroups X-Git-Tag: v258-rc1~79^2~19 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d5feeb373cc13d96fa66967a6bdb7461df32c920;p=thirdparty%2Fsystemd.git machined: optionally track machines in cgroup subgroups --- diff --git a/man/org.freedesktop.machine1.xml b/man/org.freedesktop.machine1.xml index 35b2d64cc85..ea3a7062159 100644 --- a/man/org.freedesktop.machine1.xml +++ b/man/org.freedesktop.machine1.xml @@ -506,6 +506,8 @@ node /org/freedesktop/machine1/machine/rawhide { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s Unit = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s Subgroup = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly u Leader = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly t LeaderPIDFDId = ...; @@ -598,6 +600,8 @@ node /org/freedesktop/machine1/machine/rawhide { + + @@ -676,6 +680,9 @@ node /org/freedesktop/machine1/machine/rawhide { running, or closing. Note that the state machine is not considered part of the API and states might be removed or added without this being considered API breakage. + + Subgroup contains the sub-control-group path this machine's processes reside + in, relative to the specified unit's control group. @@ -717,9 +724,9 @@ $ gdbus introspect --system \ Machine Objects CopyFromWithFlags() and CopyToWithFlags() were added in version 252. - GetSSHInfo(), VSockCID, SSHAddress + GetSSHInfo(), VSockCID, SSHAddress, and SSHPrivateKeyPath were added in version 256. - LeaderPIDFDId was added in version 258. + LeaderPIDFDId and Subgroup were added in version 258. diff --git a/src/machine/machine-dbus.c b/src/machine/machine-dbus.c index f4676106ac2..8935b594656 100644 --- a/src/machine/machine-dbus.c +++ b/src/machine/machine-dbus.c @@ -717,6 +717,7 @@ static const sd_bus_vtable machine_vtable[] = { SD_BUS_PROPERTY("Service", "s", NULL, offsetof(Machine, service), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Unit", "s", NULL, offsetof(Machine, unit), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Scope", "s", NULL, offsetof(Machine, unit), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("Subgroup", "s", NULL, offsetof(Machine, subgroup), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Leader", "u", bus_property_get_pid, offsetof(Machine, leader.pid), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("LeaderPIDFDId", "t", bus_property_get_pidfdid, offsetof(Machine, leader), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Class", "s", property_get_class, offsetof(Machine, class), SD_BUS_VTABLE_PROPERTY_CONST), diff --git a/src/machine/machine-varlink.c b/src/machine/machine-varlink.c index 1c2013de51b..b5754e2c49f 100644 --- a/src/machine/machine-varlink.c +++ b/src/machine/machine-varlink.c @@ -175,7 +175,7 @@ int vl_method_register(sd_varlink *link, sd_json_variant *parameters, sd_varlink return r; if (!machine->allocate_unit) { - r = cg_pidref_get_unit(&machine->leader, &machine->unit); + r = cg_pidref_get_unit_full(&machine->leader, &machine->unit, &machine->subgroup); if (r < 0) return r; } diff --git a/src/machine/machine.c b/src/machine/machine.c index 3975bc17547..7ce512dd62a 100644 --- a/src/machine/machine.c +++ b/src/machine/machine.c @@ -134,13 +134,19 @@ Machine* machine_free(Machine *m) { sd_bus_message_unref(m->create_message); free(m->name); - free(m->scope_job); + free(m->state_file); free(m->service); free(m->root_directory); + + free(m->unit); + free(m->subgroup); + free(m->scope_job); + free(m->netif); free(m->ssh_address); free(m->ssh_private_key_path); + return mfree(m); } @@ -156,7 +162,7 @@ int machine_save(Machine *m) { return 0; _cleanup_(unlink_and_freep) char *sl = NULL; /* auto-unlink! */ - if (m->unit) { + if (m->unit && !m->subgroup) { sl = strjoin("/run/systemd/machines/unit:", m->unit); if (!sl) return log_oom(); @@ -244,7 +250,7 @@ int machine_save(Machine *m) { static void machine_unlink(Machine *m) { assert(m); - if (m->unit) { + if (m->unit && !m->subgroup) { const char *sl = strjoina("/run/systemd/machines/unit:", m->unit); (void) unlink(sl); } @@ -266,6 +272,7 @@ int machine_load(Machine *m) { r = parse_env_file(NULL, m->state_file, "NAME", &name, "SCOPE", &m->unit, + "SUBGROUP", &m->subgroup, "SCOPE_JOB", &m->scope_job, "SERVICE", &m->service, "ROOT", &m->root_directory, @@ -380,6 +387,7 @@ static int machine_start_scope( assert(machine); assert(pidref_is_set(&machine->leader)); assert(!machine->unit); + assert(!machine->subgroup); escaped = unit_name_escape(machine->name); if (!escaped) @@ -476,9 +484,11 @@ static int machine_ensure_scope(Machine *m, sd_bus_message *properties, sd_bus_e assert(m->unit); - r = hashmap_ensure_put(&m->manager->machines_by_unit, &string_hash_ops, m->unit, m); - if (r < 0) - return r; + if (!m->subgroup) { + r = hashmap_ensure_put(&m->manager->machines_by_unit, &string_hash_ops, m->unit, m); + if (r < 0) + return r; + } return 0; } @@ -566,7 +576,7 @@ int machine_stop(Machine *m) { if (!IN_SET(m->class, MACHINE_CONTAINER, MACHINE_VM)) return -EOPNOTSUPP; - if (m->unit) { + if (m->unit && !m->subgroup) { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; char *job = NULL; @@ -637,7 +647,7 @@ bool machine_may_gc(Machine *m, bool drop_not_started) { return false; } - if (m->unit) { + if (m->unit && !m->subgroup) { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; r = manager_unit_is_active(m->manager, m->unit, &error); @@ -683,14 +693,14 @@ int machine_kill(Machine *m, KillWhom whom, int signo) { if (!IN_SET(m->class, MACHINE_VM, MACHINE_CONTAINER)) return -EOPNOTSUPP; - if (!m->unit) - return -ESRCH; - if (whom == KILL_LEADER) /* If we shall simply kill the leader, do so directly */ return pidref_kill(&m->leader, signo); + if (!m->unit) + return -ESRCH; + /* Otherwise, make PID 1 do it for us, for the entire cgroup */ - return manager_kill_unit(m->manager, m->unit, signo, NULL); + return manager_kill_unit(m->manager, m->unit, m->subgroup, signo, /* error= */ NULL); } int machine_openpt(Machine *m, int flags, char **ret_peer) { @@ -1124,8 +1134,13 @@ void machine_release_unit(Machine *m) { m->referenced = false; } - (void) hashmap_remove_value(m->manager->machines_by_unit, m->unit, m); + if (!m->subgroup) + (void) hashmap_remove_value(m->manager->machines_by_unit, m->unit, m); + m->unit = mfree(m->unit); + + /* Also free the subgroup, because it only makes sense in the context of the unit */ + m->subgroup = mfree(m->subgroup); } int machine_get_uid_shift(Machine *m, uid_t *ret) { diff --git a/src/machine/machine.h b/src/machine/machine.h index 870146fc225..8762263a8bf 100644 --- a/src/machine/machine.h +++ b/src/machine/machine.h @@ -45,6 +45,7 @@ typedef struct Machine { char *root_directory; char *unit; + char *subgroup; char *scope_job; PidRef leader; diff --git a/src/machine/machinectl.c b/src/machine/machinectl.c index 116d5ef74d9..7731fae7089 100644 --- a/src/machine/machinectl.c +++ b/src/machine/machinectl.c @@ -412,9 +412,15 @@ static int list_images(int argc, char *argv[], void *userdata) { return show_table(table, "images"); } -static int show_unit_cgroup(sd_bus *bus, const char *unit, pid_t leader) { +static int show_unit_cgroup( + sd_bus *bus, + const char *unit, + const char *subgroup, + pid_t leader) { + _cleanup_free_ char *cgroup = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + OutputFlags extra_flags = 0; int r; assert(bus); @@ -427,8 +433,16 @@ static int show_unit_cgroup(sd_bus *bus, const char *unit, pid_t leader) { if (isempty(cgroup)) return 0; + if (!empty_or_root(subgroup)) { + if (!path_extend(&cgroup, subgroup)) + return log_oom(); + + /* If we have a subcgroup, then hide all processes outside of it */ + extra_flags |= OUTPUT_HIDE_EXTRA; + } + unsigned c = MAX(LESS_BY(columns(), 18U), 10U); - r = unit_show_processes(bus, unit, cgroup, "\t\t ", c, get_output_flags(), &error); + r = unit_show_processes(bus, unit, cgroup, "\t\t ", c, get_output_flags() | extra_flags, &error); if (r == -EBADR) { if (arg_transport == BUS_TRANSPORT_REMOTE) @@ -494,6 +508,7 @@ typedef struct MachineStatusInfo { const char *class; const char *service; const char *unit; + const char *subgroup; const char *root_directory; pid_t leader; struct dual_timestamp timestamp; @@ -589,7 +604,11 @@ static void print_machine_status_info(sd_bus *bus, MachineStatusInfo *i) { if (i->unit) { printf("\t Unit: %s\n", i->unit); - show_unit_cgroup(bus, i->unit, i->leader); + + if (!empty_or_root(i->subgroup)) + printf("\tSubgroup: %s\n", i->subgroup); + + show_unit_cgroup(bus, i->unit, i->subgroup, i->leader); if (arg_transport == BUS_TRANSPORT_LOCAL) @@ -636,6 +655,7 @@ static int show_machine_info(const char *verb, sd_bus *bus, const char *path, bo { "Class", "s", NULL, offsetof(MachineStatusInfo, class) }, { "Service", "s", NULL, offsetof(MachineStatusInfo, service) }, { "Unit", "s", NULL, offsetof(MachineStatusInfo, unit) }, + { "Subgroup", "s", NULL, offsetof(MachineStatusInfo, subgroup) }, { "RootDirectory", "s", NULL, offsetof(MachineStatusInfo, root_directory) }, { "Leader", "u", NULL, offsetof(MachineStatusInfo, leader) }, { "Timestamp", "t", NULL, offsetof(MachineStatusInfo, timestamp.realtime) }, diff --git a/src/machine/machined-core.c b/src/machine/machined-core.c index 93f1cc5ee80..075ea4a8031 100644 --- a/src/machine/machined-core.c +++ b/src/machine/machined-core.c @@ -22,28 +22,41 @@ #include "user-util.h" int manager_get_machine_by_pidref(Manager *m, const PidRef *pidref, Machine **ret) { - Machine *mm; - int r; + _cleanup_(pidref_done) PidRef current = PIDREF_NULL; + Machine *mm = NULL; assert(m); assert(pidref_is_set(pidref)); assert(ret); - mm = hashmap_get(m->machines_by_leader, pidref); - if (!mm) { - _cleanup_free_ char *unit = NULL; + for (;;) { + /* First, compare by leader */ + mm = hashmap_get(m->machines_by_leader, pidref); + if (mm) + break; - r = cg_pidref_get_unit(pidref, &unit); - if (r >= 0) + /* Then look for the unit */ + _cleanup_free_ char *unit = NULL; + if (cg_pidref_get_unit(pidref, &unit) >= 0) { mm = hashmap_get(m->machines_by_unit, unit); - } - if (!mm) { - *ret = NULL; - return 0; + if (mm) + break; + } + + /* Maybe this process is in per-user unit? If so, let's go up the process tree, and check + * that, we should eventually hit PID 1 of the container tree, which we should be able to + * recognize. */ + _cleanup_(pidref_done) PidRef parent = PIDREF_NULL; + if (pidref_get_ppid_as_pidref(pidref, &parent) < 0) + break; + + pidref_done(¤t); + current = TAKE_PIDREF(parent); + pidref = ¤t; } *ret = mm; - return 1; + return !!mm; } int manager_add_machine(Manager *m, const char *name, Machine **ret) { diff --git a/src/machine/machined-dbus.c b/src/machine/machined-dbus.c index 6a31e6aa3be..8aa00ba3edd 100644 --- a/src/machine/machined-dbus.c +++ b/src/machine/machined-dbus.c @@ -411,7 +411,7 @@ static int method_register_machine_internal(sd_bus_message *message, bool read_n if (r == 0) return 1; /* Will call us back */ - r = cg_pidref_get_unit(&m->leader, &m->unit); + r = cg_pidref_get_unit_full(&m->leader, &m->unit, &m->subgroup); if (r < 0) { r = sd_bus_error_set_errnof(error, r, "Failed to determine unit of process "PID_FMT" : %m", @@ -1276,11 +1276,14 @@ int manager_stop_unit(Manager *manager, const char *unit, sd_bus_error *error, c return 1; } -int manager_kill_unit(Manager *manager, const char *unit, int signo, sd_bus_error *error) { +int manager_kill_unit(Manager *manager, const char *unit, const char *subgroup, int signo, sd_bus_error *reterr_error) { assert(manager); assert(unit); - return bus_call_method(manager->bus, bus_systemd_mgr, "KillUnit", error, NULL, "ssi", unit, "all", signo); + if (empty_or_root(subgroup)) + return bus_call_method(manager->bus, bus_systemd_mgr, "KillUnit", reterr_error, NULL, "ssi", unit, "all", signo); + + return bus_call_method(manager->bus, bus_systemd_mgr, "KillUnitSubgroup", reterr_error, NULL, "sssi", unit, "cgroup", subgroup, signo); } int manager_unit_is_active(Manager *manager, const char *unit, sd_bus_error *reterr_error) { diff --git a/src/machine/machined-varlink.c b/src/machine/machined-varlink.c index f3351724b41..8551d703e7b 100644 --- a/src/machine/machined-varlink.c +++ b/src/machine/machined-varlink.c @@ -477,6 +477,7 @@ static int list_machine_one_and_maybe_read_metadata(sd_varlink *link, Machine *m JSON_BUILD_PAIR_STRING_NON_EMPTY("service", m->service), JSON_BUILD_PAIR_STRING_NON_EMPTY("rootDirectory", m->root_directory), JSON_BUILD_PAIR_STRING_NON_EMPTY("unit", m->unit), + JSON_BUILD_PAIR_STRING_NON_EMPTY("subgroup", m->subgroup), SD_JSON_BUILD_PAIR_CONDITION(pidref_is_set(&m->leader), "leader", JSON_BUILD_PIDREF(&m->leader)), SD_JSON_BUILD_PAIR_CONDITION(dual_timestamp_is_set(&m->timestamp), "timestamp", JSON_BUILD_DUAL_TIMESTAMP(&m->timestamp)), JSON_BUILD_PAIR_UNSIGNED_NOT_EQUAL("vSockCid", m->vsock_cid, VMADDR_CID_ANY), diff --git a/src/machine/machined.h b/src/machine/machined.h index c4753b8bfa0..d03577be3cb 100644 --- a/src/machine/machined.h +++ b/src/machine/machined.h @@ -10,7 +10,9 @@ typedef struct Manager { sd_bus *bus; Hashmap *machines; - Hashmap *machines_by_unit; + Hashmap *machines_by_unit; /* This hashmap only tracks machines where a system-level encapsulates + * the machine fully, and exclusively. It's not used if a machine is + * run in a cgroup further down the tree. */ Hashmap *machines_by_leader; sd_event_source *deferred_gc_event_source; @@ -44,7 +46,7 @@ int match_properties_changed(sd_bus_message *message, void *userdata, sd_bus_err int match_job_removed(sd_bus_message *message, void *userdata, sd_bus_error *error); int manager_stop_unit(Manager *manager, const char *unit, sd_bus_error *error, char **job); -int manager_kill_unit(Manager *manager, const char *unit, int signo, sd_bus_error *error); +int manager_kill_unit(Manager *manager, const char *unit, const char *subgroup, int signo, sd_bus_error *error); int manager_unref_unit(Manager *m, const char *unit, sd_bus_error *error); int manager_unit_is_active(Manager *manager, const char *unit, sd_bus_error *reterr_errno); int manager_job_is_active(Manager *manager, const char *path, sd_bus_error *reterr_errno); diff --git a/src/shared/bus-unit-procs.c b/src/shared/bus-unit-procs.c index 9aea7db7853..bd510efebd3 100644 --- a/src/shared/bus-unit-procs.c +++ b/src/shared/bus-unit-procs.c @@ -394,7 +394,13 @@ int unit_show_processes( if (r < 0) goto finish; - r = dump_extra_processes(cgroups, prefix, n_columns, flags); + if (!FLAGS_SET(flags, OUTPUT_HIDE_EXTRA)) { + r = dump_extra_processes(cgroups, prefix, n_columns, flags); + if (r < 0) + goto finish; + } + + r = 0; finish: while ((cg = hashmap_first(cgroups))) diff --git a/src/shared/output-mode.h b/src/shared/output-mode.h index ffd2e4fe809..7ee17df9389 100644 --- a/src/shared/output-mode.h +++ b/src/shared/output-mode.h @@ -48,6 +48,7 @@ typedef enum OutputFlags { OUTPUT_KERNEL_THREADS = 1 << 9, OUTPUT_CGROUP_XATTRS = 1 << 10, OUTPUT_CGROUP_ID = 1 << 11, + OUTPUT_HIDE_EXTRA = 1 << 12, } OutputFlags; sd_json_format_flags_t output_mode_to_json_format_flags(OutputMode m); diff --git a/src/shared/varlink-io.systemd.Machine.c b/src/shared/varlink-io.systemd.Machine.c index b80cbc65d02..1bb7c00fa18 100644 --- a/src/shared/varlink-io.systemd.Machine.c +++ b/src/shared/varlink-io.systemd.Machine.c @@ -96,7 +96,9 @@ static SD_VARLINK_DEFINE_METHOD_FULL( SD_VARLINK_FIELD_COMMENT("OS release information of the machine. It contains an array of key value pairs read from the os-release(5) file in the image."), SD_VARLINK_DEFINE_OUTPUT(OSRelease, SD_VARLINK_STRING, SD_VARLINK_NULLABLE|SD_VARLINK_ARRAY), SD_VARLINK_FIELD_COMMENT("Return the base UID/GID of the machine"), - SD_VARLINK_DEFINE_OUTPUT(UIDShift, SD_VARLINK_INT, SD_VARLINK_NULLABLE)); + SD_VARLINK_DEFINE_OUTPUT(UIDShift, SD_VARLINK_INT, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Subcgroup path of the machine, relative to the unit's cgroup path"), + SD_VARLINK_DEFINE_OUTPUT(Subgroup, SD_VARLINK_STRING, SD_VARLINK_NULLABLE)); static SD_VARLINK_DEFINE_ENUM_TYPE( MachineOpenMode,