]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
machined: optionally track machines in cgroup subgroups
authorLennart Poettering <lennart@poettering.net>
Wed, 21 May 2025 15:23:47 +0000 (17:23 +0200)
committerLennart Poettering <lennart@poettering.net>
Fri, 11 Jul 2025 16:15:12 +0000 (18:15 +0200)
13 files changed:
man/org.freedesktop.machine1.xml
src/machine/machine-dbus.c
src/machine/machine-varlink.c
src/machine/machine.c
src/machine/machine.h
src/machine/machinectl.c
src/machine/machined-core.c
src/machine/machined-dbus.c
src/machine/machined-varlink.c
src/machine/machined.h
src/shared/bus-unit-procs.c
src/shared/output-mode.h
src/shared/varlink-io.systemd.Machine.c

index 35b2d64cc85401f31e9a973bdd37b672c43b95a0..ea3a70621597bab9a1384be9f998cefcca004003 100644 (file)
@@ -506,6 +506,8 @@ node /org/freedesktop/machine1/machine/rawhide {
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s Unit = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly s Subgroup = '...';
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly u Leader = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly t LeaderPIDFDId = ...;
@@ -598,6 +600,8 @@ node /org/freedesktop/machine1/machine/rawhide {
 
     <variablelist class="dbus-property" generated="True" extra-ref="Unit"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="Subgroup"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="Leader"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="LeaderPIDFDId"/>
@@ -676,6 +680,9 @@ node /org/freedesktop/machine1/machine/rawhide {
       <literal>running</literal>, or <literal>closing</literal>. Note that the state machine is not considered
       part of the API and states might be removed or added without this being considered API breakage.
       </para>
+
+      <para><varname>Subgroup</varname> contains the sub-control-group path this machine's processes reside
+      in, relative to the specified unit's control group.</para>
     </refsect2>
   </refsect1>
 
@@ -717,9 +724,9 @@ $ gdbus introspect --system \
       <title>Machine Objects</title>
       <para><function>CopyFromWithFlags()</function> and
       <function>CopyToWithFlags()</function> were added in version 252.</para>
-      <para><function>GetSSHInfo()</function>, <varname>VSockCID</varname>, <varname>SSHAddress</varname>
+      <para><function>GetSSHInfo()</function>, <varname>VSockCID</varname>, <varname>SSHAddress</varname>,
       and <varname>SSHPrivateKeyPath</varname> were added in version 256.</para>
-      <para><varname>LeaderPIDFDId</varname> was added in version 258.</para>
+      <para><varname>LeaderPIDFDId</varname> and <varname>Subgroup</varname> were added in version 258.</para>
     </refsect2>
   </refsect1>
 
index f4676106ac27368ea05a82070686e3b051c363a4..8935b594656008f17683d52a5d84ef3496ec6c73 100644 (file)
@@ -717,6 +717,7 @@ static const sd_bus_vtable machine_vtable[] = {
         SD_BUS_PROPERTY("Service", "s", NULL, offsetof(Machine, service), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Unit", "s", NULL, offsetof(Machine, unit), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Scope", "s", NULL, offsetof(Machine, unit), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+        SD_BUS_PROPERTY("Subgroup", "s", NULL, offsetof(Machine, subgroup), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Leader", "u", bus_property_get_pid, offsetof(Machine, leader.pid), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("LeaderPIDFDId", "t", bus_property_get_pidfdid, offsetof(Machine, leader), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Class", "s", property_get_class, offsetof(Machine, class), SD_BUS_VTABLE_PROPERTY_CONST),
index 1c2013de51baea13383dfe82f38017b5b8948ddf..b5754e2c49fda063f1ed6b8f6ef84fd63773c288 100644 (file)
@@ -175,7 +175,7 @@ int vl_method_register(sd_varlink *link, sd_json_variant *parameters, sd_varlink
                 return r;
 
         if (!machine->allocate_unit) {
-                r = cg_pidref_get_unit(&machine->leader, &machine->unit);
+                r = cg_pidref_get_unit_full(&machine->leader, &machine->unit, &machine->subgroup);
                 if (r < 0)
                         return r;
         }
index 3975bc1754715fd24705543e9c0f9e467c38b76f..7ce512dd62adc3f5edc5f0569d0cd9b43fb5b3b1 100644 (file)
@@ -134,13 +134,19 @@ Machine* machine_free(Machine *m) {
         sd_bus_message_unref(m->create_message);
 
         free(m->name);
-        free(m->scope_job);
+
         free(m->state_file);
         free(m->service);
         free(m->root_directory);
+
+        free(m->unit);
+        free(m->subgroup);
+        free(m->scope_job);
+
         free(m->netif);
         free(m->ssh_address);
         free(m->ssh_private_key_path);
+
         return mfree(m);
 }
 
@@ -156,7 +162,7 @@ int machine_save(Machine *m) {
                 return 0;
 
         _cleanup_(unlink_and_freep) char *sl = NULL; /* auto-unlink! */
-        if (m->unit) {
+        if (m->unit && !m->subgroup) {
                 sl = strjoin("/run/systemd/machines/unit:", m->unit);
                 if (!sl)
                         return log_oom();
@@ -244,7 +250,7 @@ int machine_save(Machine *m) {
 static void machine_unlink(Machine *m) {
         assert(m);
 
-        if (m->unit) {
+        if (m->unit && !m->subgroup) {
                 const char *sl = strjoina("/run/systemd/machines/unit:", m->unit);
                 (void) unlink(sl);
         }
@@ -266,6 +272,7 @@ int machine_load(Machine *m) {
         r = parse_env_file(NULL, m->state_file,
                            "NAME",                 &name,
                            "SCOPE",                &m->unit,
+                           "SUBGROUP",             &m->subgroup,
                            "SCOPE_JOB",            &m->scope_job,
                            "SERVICE",              &m->service,
                            "ROOT",                 &m->root_directory,
@@ -380,6 +387,7 @@ static int machine_start_scope(
         assert(machine);
         assert(pidref_is_set(&machine->leader));
         assert(!machine->unit);
+        assert(!machine->subgroup);
 
         escaped = unit_name_escape(machine->name);
         if (!escaped)
@@ -476,9 +484,11 @@ static int machine_ensure_scope(Machine *m, sd_bus_message *properties, sd_bus_e
 
         assert(m->unit);
 
-        r = hashmap_ensure_put(&m->manager->machines_by_unit, &string_hash_ops, m->unit, m);
-        if (r < 0)
-                return r;
+        if (!m->subgroup) {
+                r = hashmap_ensure_put(&m->manager->machines_by_unit, &string_hash_ops, m->unit, m);
+                if (r < 0)
+                        return r;
+        }
 
         return 0;
 }
@@ -566,7 +576,7 @@ int machine_stop(Machine *m) {
         if (!IN_SET(m->class, MACHINE_CONTAINER, MACHINE_VM))
                 return -EOPNOTSUPP;
 
-        if (m->unit) {
+        if (m->unit && !m->subgroup) {
                 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
                 char *job = NULL;
 
@@ -637,7 +647,7 @@ bool machine_may_gc(Machine *m, bool drop_not_started) {
                         return false;
         }
 
-        if (m->unit) {
+        if (m->unit && !m->subgroup) {
                 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
 
                 r = manager_unit_is_active(m->manager, m->unit, &error);
@@ -683,14 +693,14 @@ int machine_kill(Machine *m, KillWhom whom, int signo) {
         if (!IN_SET(m->class, MACHINE_VM, MACHINE_CONTAINER))
                 return -EOPNOTSUPP;
 
-        if (!m->unit)
-                return -ESRCH;
-
         if (whom == KILL_LEADER) /* If we shall simply kill the leader, do so directly */
                 return pidref_kill(&m->leader, signo);
 
+        if (!m->unit)
+                return -ESRCH;
+
         /* Otherwise, make PID 1 do it for us, for the entire cgroup */
-        return manager_kill_unit(m->manager, m->unit, signo, NULL);
+        return manager_kill_unit(m->manager, m->unit, m->subgroup, signo, /* error= */ NULL);
 }
 
 int machine_openpt(Machine *m, int flags, char **ret_peer) {
@@ -1124,8 +1134,13 @@ void machine_release_unit(Machine *m) {
                 m->referenced = false;
         }
 
-        (void) hashmap_remove_value(m->manager->machines_by_unit, m->unit, m);
+        if (!m->subgroup)
+                (void) hashmap_remove_value(m->manager->machines_by_unit, m->unit, m);
+
         m->unit = mfree(m->unit);
+
+        /* Also free the subgroup, because it only makes sense in the context of the unit */
+        m->subgroup = mfree(m->subgroup);
 }
 
 int machine_get_uid_shift(Machine *m, uid_t *ret) {
index 870146fc2251e90cc718a728f09037a05369ea5e..8762263a8bf844dc9af4942309d38241a40e53a3 100644 (file)
@@ -45,6 +45,7 @@ typedef struct Machine {
         char *root_directory;
 
         char *unit;
+        char *subgroup;
         char *scope_job;
 
         PidRef leader;
index 116d5ef74d94bd71befd7ad6cca06c4d26978e19..7731fae70899c1c738e312c5c8b1de56ff0f1362 100644 (file)
@@ -412,9 +412,15 @@ static int list_images(int argc, char *argv[], void *userdata) {
         return show_table(table, "images");
 }
 
-static int show_unit_cgroup(sd_bus *bus, const char *unit, pid_t leader) {
+static int show_unit_cgroup(
+                sd_bus *bus,
+                const char *unit,
+                const char *subgroup,
+                pid_t leader) {
+
         _cleanup_free_ char *cgroup = NULL;
         _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+        OutputFlags extra_flags = 0;
         int r;
 
         assert(bus);
@@ -427,8 +433,16 @@ static int show_unit_cgroup(sd_bus *bus, const char *unit, pid_t leader) {
         if (isempty(cgroup))
                 return 0;
 
+        if (!empty_or_root(subgroup)) {
+                if (!path_extend(&cgroup, subgroup))
+                        return log_oom();
+
+                /* If we have a subcgroup, then hide all processes outside of it */
+                extra_flags |= OUTPUT_HIDE_EXTRA;
+        }
+
         unsigned c = MAX(LESS_BY(columns(), 18U), 10U);
-        r = unit_show_processes(bus, unit, cgroup, "\t\t  ", c, get_output_flags(), &error);
+        r = unit_show_processes(bus, unit, cgroup, "\t\t  ", c, get_output_flags() | extra_flags, &error);
         if (r == -EBADR) {
 
                 if (arg_transport == BUS_TRANSPORT_REMOTE)
@@ -494,6 +508,7 @@ typedef struct MachineStatusInfo {
         const char *class;
         const char *service;
         const char *unit;
+        const char *subgroup;
         const char *root_directory;
         pid_t leader;
         struct dual_timestamp timestamp;
@@ -589,7 +604,11 @@ static void print_machine_status_info(sd_bus *bus, MachineStatusInfo *i) {
 
         if (i->unit) {
                 printf("\t    Unit: %s\n", i->unit);
-                show_unit_cgroup(bus, i->unit, i->leader);
+
+                if (!empty_or_root(i->subgroup))
+                        printf("\tSubgroup: %s\n", i->subgroup);
+
+                show_unit_cgroup(bus, i->unit, i->subgroup, i->leader);
 
                 if (arg_transport == BUS_TRANSPORT_LOCAL)
 
@@ -636,6 +655,7 @@ static int show_machine_info(const char *verb, sd_bus *bus, const char *path, bo
                 { "Class",              "s",  NULL,          offsetof(MachineStatusInfo, class)               },
                 { "Service",            "s",  NULL,          offsetof(MachineStatusInfo, service)             },
                 { "Unit",               "s",  NULL,          offsetof(MachineStatusInfo, unit)                },
+                { "Subgroup",           "s",  NULL,          offsetof(MachineStatusInfo, subgroup)            },
                 { "RootDirectory",      "s",  NULL,          offsetof(MachineStatusInfo, root_directory)      },
                 { "Leader",             "u",  NULL,          offsetof(MachineStatusInfo, leader)              },
                 { "Timestamp",          "t",  NULL,          offsetof(MachineStatusInfo, timestamp.realtime)  },
index 93f1cc5ee80620f58adf3903adb7f08a8fc3fd42..075ea4a80317b20ebf54614e4a8452a6ff1384e3 100644 (file)
 #include "user-util.h"
 
 int manager_get_machine_by_pidref(Manager *m, const PidRef *pidref, Machine **ret) {
-        Machine *mm;
-        int r;
+        _cleanup_(pidref_done) PidRef current = PIDREF_NULL;
+        Machine *mm = NULL;
 
         assert(m);
         assert(pidref_is_set(pidref));
         assert(ret);
 
-        mm = hashmap_get(m->machines_by_leader, pidref);
-        if (!mm) {
-                _cleanup_free_ char *unit = NULL;
+        for (;;) {
+                /* First, compare by leader */
+                mm = hashmap_get(m->machines_by_leader, pidref);
+                if (mm)
+                        break;
 
-                r = cg_pidref_get_unit(pidref, &unit);
-                if (r >= 0)
+                /* Then look for the unit */
+                _cleanup_free_ char *unit = NULL;
+                if (cg_pidref_get_unit(pidref, &unit) >= 0) {
                         mm = hashmap_get(m->machines_by_unit, unit);
-        }
-        if (!mm) {
-                *ret = NULL;
-                return 0;
+                        if (mm)
+                                break;
+                }
+
+                /* Maybe this process is in per-user unit? If so, let's go up the process tree, and check
+                 * that, we should eventually hit PID 1 of the container tree, which we should be able to
+                 * recognize. */
+                _cleanup_(pidref_done) PidRef parent = PIDREF_NULL;
+                if (pidref_get_ppid_as_pidref(pidref, &parent) < 0)
+                        break;
+
+                pidref_done(&current);
+                current = TAKE_PIDREF(parent);
+                pidref = &current;
         }
 
         *ret = mm;
-        return 1;
+        return !!mm;
 }
 
 int manager_add_machine(Manager *m, const char *name, Machine **ret) {
index 6a31e6aa3be86bc5cdfd9155f34d473039b2fff3..8aa00ba3edd53e1e101f06c0541bb9ce0e0a323b 100644 (file)
@@ -411,7 +411,7 @@ static int method_register_machine_internal(sd_bus_message *message, bool read_n
         if (r == 0)
                 return 1; /* Will call us back */
 
-        r = cg_pidref_get_unit(&m->leader, &m->unit);
+        r = cg_pidref_get_unit_full(&m->leader, &m->unit, &m->subgroup);
         if (r < 0) {
                 r = sd_bus_error_set_errnof(error, r,
                                             "Failed to determine unit of process "PID_FMT" : %m",
@@ -1276,11 +1276,14 @@ int manager_stop_unit(Manager *manager, const char *unit, sd_bus_error *error, c
         return 1;
 }
 
-int manager_kill_unit(Manager *manager, const char *unit, int signo, sd_bus_error *error) {
+int manager_kill_unit(Manager *manager, const char *unit, const char *subgroup, int signo, sd_bus_error *reterr_error) {
         assert(manager);
         assert(unit);
 
-        return bus_call_method(manager->bus, bus_systemd_mgr, "KillUnit", error, NULL, "ssi", unit, "all", signo);
+        if (empty_or_root(subgroup))
+                return bus_call_method(manager->bus, bus_systemd_mgr, "KillUnit", reterr_error, NULL, "ssi", unit, "all", signo);
+
+        return bus_call_method(manager->bus, bus_systemd_mgr, "KillUnitSubgroup", reterr_error, NULL, "sssi", unit, "cgroup", subgroup, signo);
 }
 
 int manager_unit_is_active(Manager *manager, const char *unit, sd_bus_error *reterr_error) {
index f3351724b4164094c47fbf0053167a9dd5eb47f9..8551d703e7b9a36fc8343b32efd8e81623b82227 100644 (file)
@@ -477,6 +477,7 @@ static int list_machine_one_and_maybe_read_metadata(sd_varlink *link, Machine *m
                         JSON_BUILD_PAIR_STRING_NON_EMPTY("service", m->service),
                         JSON_BUILD_PAIR_STRING_NON_EMPTY("rootDirectory", m->root_directory),
                         JSON_BUILD_PAIR_STRING_NON_EMPTY("unit", m->unit),
+                        JSON_BUILD_PAIR_STRING_NON_EMPTY("subgroup", m->subgroup),
                         SD_JSON_BUILD_PAIR_CONDITION(pidref_is_set(&m->leader), "leader", JSON_BUILD_PIDREF(&m->leader)),
                         SD_JSON_BUILD_PAIR_CONDITION(dual_timestamp_is_set(&m->timestamp), "timestamp", JSON_BUILD_DUAL_TIMESTAMP(&m->timestamp)),
                         JSON_BUILD_PAIR_UNSIGNED_NOT_EQUAL("vSockCid", m->vsock_cid, VMADDR_CID_ANY),
index c4753b8bfa094a3755a9c65ddac14528a6c6277b..d03577be3cbbae6ae9fb584d82c45f993b2de73b 100644 (file)
@@ -10,7 +10,9 @@ typedef struct Manager {
         sd_bus *bus;
 
         Hashmap *machines;
-        Hashmap *machines_by_unit;
+        Hashmap *machines_by_unit;    /* This hashmap only tracks machines where a system-level encapsulates
+                                       * the machine fully, and exclusively. It's not used if a machine is
+                                       * run in a cgroup further down the tree. */
         Hashmap *machines_by_leader;
 
         sd_event_source *deferred_gc_event_source;
@@ -44,7 +46,7 @@ int match_properties_changed(sd_bus_message *message, void *userdata, sd_bus_err
 int match_job_removed(sd_bus_message *message, void *userdata, sd_bus_error *error);
 
 int manager_stop_unit(Manager *manager, const char *unit, sd_bus_error *error, char **job);
-int manager_kill_unit(Manager *manager, const char *unit, int signo, sd_bus_error *error);
+int manager_kill_unit(Manager *manager, const char *unit, const char *subgroup, int signo, sd_bus_error *error);
 int manager_unref_unit(Manager *m, const char *unit, sd_bus_error *error);
 int manager_unit_is_active(Manager *manager, const char *unit, sd_bus_error *reterr_errno);
 int manager_job_is_active(Manager *manager, const char *path, sd_bus_error *reterr_errno);
index 9aea7db7853282b7b6b19da56f49dfbdd5ed12d1..bd510efebd35659d0f274630e7334c6b24511d4e 100644 (file)
@@ -394,7 +394,13 @@ int unit_show_processes(
         if (r < 0)
                 goto finish;
 
-        r = dump_extra_processes(cgroups, prefix, n_columns, flags);
+        if (!FLAGS_SET(flags, OUTPUT_HIDE_EXTRA)) {
+                r = dump_extra_processes(cgroups, prefix, n_columns, flags);
+                if (r < 0)
+                        goto finish;
+        }
+
+        r = 0;
 
 finish:
         while ((cg = hashmap_first(cgroups)))
index ffd2e4fe8098128195d908e1d08c574b92b40bff..7ee17df9389b5f26944c004321aa02b239d73ecf 100644 (file)
@@ -48,6 +48,7 @@ typedef enum OutputFlags {
         OUTPUT_KERNEL_THREADS    = 1 << 9,
         OUTPUT_CGROUP_XATTRS     = 1 << 10,
         OUTPUT_CGROUP_ID         = 1 << 11,
+        OUTPUT_HIDE_EXTRA        = 1 << 12,
 } OutputFlags;
 
 sd_json_format_flags_t output_mode_to_json_format_flags(OutputMode m);
index b80cbc65d0257c2ad3d32a0ae0df9d4a35de1ced..1bb7c00fa18ce0940610170ea7e4a57aa2997264 100644 (file)
@@ -96,7 +96,9 @@ static SD_VARLINK_DEFINE_METHOD_FULL(
                 SD_VARLINK_FIELD_COMMENT("OS release information of the machine. It contains an array of key value pairs read from the os-release(5) file in the image."),
                 SD_VARLINK_DEFINE_OUTPUT(OSRelease, SD_VARLINK_STRING, SD_VARLINK_NULLABLE|SD_VARLINK_ARRAY),
                 SD_VARLINK_FIELD_COMMENT("Return the base UID/GID of the machine"),
-                SD_VARLINK_DEFINE_OUTPUT(UIDShift, SD_VARLINK_INT, SD_VARLINK_NULLABLE));
+                SD_VARLINK_DEFINE_OUTPUT(UIDShift, SD_VARLINK_INT, SD_VARLINK_NULLABLE),
+                SD_VARLINK_FIELD_COMMENT("Subcgroup path of the machine, relative to the unit's cgroup path"),
+                SD_VARLINK_DEFINE_OUTPUT(Subgroup, SD_VARLINK_STRING, SD_VARLINK_NULLABLE));
 
 static SD_VARLINK_DEFINE_ENUM_TYPE(
                 MachineOpenMode,