]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
machined: also track 'supervisor' process of a machine
authorLennart Poettering <lennart@poettering.net>
Wed, 2 Jul 2025 09:20:23 +0000 (11:20 +0200)
committerLennart Poettering <lennart@poettering.net>
Fri, 11 Jul 2025 16:15:12 +0000 (18:15 +0200)
So far, machined strictly tracked the "leader" process of a machine,
i.e. the topmost process that is actually the payload of the machine.
Its runtime also defines the runtime of the machine, and we can directly
interact with it if we need to, for example for containers to join the
namespaces, or kill it.

Let's optionally also track the "supervisor" process of a machine, i.e.
the host process that manages the payload if there is one. This is
generally useful info, but in particular is useful because we might need
to communicate with it to shutdown a machine without cooperation of the
payload. Traditionally we did this by simply stopping the unit of the
machine, but this is not doable now that the host machined can be used
to track per-user machines.

In the long run we probably want a more bespoke protocol between
machined and supervisors (so that we can execute other commands too,
such as request cooperative reboots/shutdowns), but that's for later.

Some environments call the concept "monitor" rather than "supervisor" or
use some other term. I stuck to "supervisor" because nspawn uses this,
and ultimately one name is as good as another.

And of course, in other implementations of VM managers of containers
there might not be a single process tracking each VM/container. Because
of this, the concept of a supervisor is optional.

man/machinectl.xml
man/org.freedesktop.machine1.xml
src/machine/machine-dbus.c
src/machine/machine-varlink.c
src/machine/machine.c
src/machine/machine.h
src/machine/machined-dbus.c
src/machine/machined-varlink.c
src/shared/varlink-io.systemd.Machine.c

index 13fa0eae7113e6bdeb6c5c8e5cd168a6bcc4d0e3..e64a20bb1d045f4d4e42e9041426be9a12248419 100644 (file)
       <varlistentry>
         <term><option>--kill-whom=</option></term>
 
-        <listitem><para>When used with <command>kill</command>, choose
-        which processes to kill. Must be one of
-        <option>leader</option>, or <option>all</option> to select
-        whether to kill only the leader process of the machine or all
-        processes of the machine. If omitted, defaults to
-        <option>all</option>.</para>
+        <listitem><para>When used with <command>kill</command>, choose which processes to kill. Must be one
+        of <option>leader</option>, <option>supervisor</option>, or <option>all</option> to select whether to
+        kill only the leader process of the machine, the supervisor process of the machine, or all processes
+        of the machine. If omitted, defaults to <option>all</option>.</para>
 
         <xi:include href="version-info.xml" xpointer="v206"/></listitem>
       </varlistentry>
index c52aed0dbc0f767939f50b53d92faec7338cdf3a..ccd81be659cf35a97a9566acbad3f158690eda0c 100644 (file)
@@ -512,6 +512,10 @@ node /org/freedesktop/machine1/machine/rawhide {
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly t LeaderPIDFDId = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly u Supervisor = ...;
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly t SupervisorPIDFDId = ...;
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s Class = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s RootDirectory = '...';
@@ -608,6 +612,10 @@ node /org/freedesktop/machine1/machine/rawhide {
 
     <variablelist class="dbus-property" generated="True" extra-ref="LeaderPIDFDId"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="Supervisor"/>
+
+    <variablelist class="dbus-property" generated="True" extra-ref="SupervisorPIDFDId"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="Class"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="RootDirectory"/>
@@ -655,11 +663,20 @@ node /org/freedesktop/machine1/machine/rawhide {
 
       <para><varname>Unit</varname> is the systemd scope or service unit name for the machine.</para>
 
-      <para><varname>Leader</varname> is the PID of the leader process of the machine.</para>
+      <para><varname>Leader</varname> is the PID of the leader process of the machine. (The leader process is the
+      top-level process of the payload of the machine, i.e. for containers PID 1 inside the container, and
+      for VMs the process encapsulating the VM. Its lifetime defines the lifetime of the machine.)</para>
 
       <para><varname>LeaderPIDFDId</varname> encodes the Linux pidfd inode ID of the leader process of the
       machine.</para>
 
+      <para><varname>Supervisor</varname> is the PID of the supervisor process of the machine. (The
+      supervisor process is the process that manages the machine, if there's a separate one for it. A
+      supervisor process is not always defined, and does not define the machine's lifetime.)</para>
+
+      <para><varname>SupervisorPIDFDId</varname> encodes the Linux pidfd inode ID of the supervisor process of the
+      machine.</para>
+
       <para><varname>Class</varname> is the class of the machine and is either the string "vm" (for real VMs
       based on virtualized hardware) or "container" (for lightweight userspace virtualization sharing the
       same kernel as the host).</para>
@@ -732,8 +749,9 @@ $ gdbus introspect --system \
       <function>CopyToWithFlags()</function> were added in version 252.</para>
       <para><function>GetSSHInfo()</function>, <varname>VSockCID</varname>, <varname>SSHAddress</varname>,
       and <varname>SSHPrivateKeyPath</varname> were added in version 256.</para>
-      <para><varname>LeaderPIDFDId</varname>, <varname>Subgroup</varname>, and <varname>UID</varname> were
-      added in version 258.</para>
+      <para><varname>LeaderPIDFDId</varname>, <varname>Supervisor</varname>,
+      <varname>SupervisorPIDFDId</varname>, <varname>Subgroup</varname>, and <varname>UID</varname> were added
+      in version 258.</para>
     </refsect2>
   </refsect1>
 
index 321ddbfdccd505c5c59990701c3a6cbff839a9cb..f903a1371e0e9da66c1cc2f1ee572126d9b51ec2 100644 (file)
@@ -735,6 +735,8 @@ static const sd_bus_vtable machine_vtable[] = {
         SD_BUS_PROPERTY("Subgroup", "s", NULL, offsetof(Machine, subgroup), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Leader", "u", bus_property_get_pid, offsetof(Machine, leader.pid), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("LeaderPIDFDId", "t", bus_property_get_pidfdid, offsetof(Machine, leader), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("Supervisor", "u", bus_property_get_pid, offsetof(Machine, supervisor.pid), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("SupervisorPIDFDId", "t", bus_property_get_pidfdid, offsetof(Machine, supervisor), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Class", "s", property_get_class, offsetof(Machine, class), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("RootDirectory", "s", NULL, offsetof(Machine, root_directory), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("NetworkInterfaces", "ai", property_get_netif, 0, SD_BUS_VTABLE_PROPERTY_CONST),
index a773094cfe7d4d181e3d9c16b80fbe7ccafd9525..776654b51d14332b3f7543abca35e44dec1f954c 100644 (file)
@@ -46,8 +46,8 @@ static int machine_name(const char *name, sd_json_variant *variant, sd_json_disp
         return 0;
 }
 
-static int machine_leader(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata) {
-        PidRef *leader = ASSERT_PTR(userdata);
+static int machine_pidref(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata) {
+        PidRef *pidref = ASSERT_PTR(userdata);
         _cleanup_(pidref_done) PidRef temp = PIDREF_NULL;
         int r;
 
@@ -56,14 +56,14 @@ static int machine_leader(const char *name, sd_json_variant *variant, sd_json_di
                 return r;
 
         if (temp.pid == 1) /* refuse PID 1 */
-                return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid leader PID.", strna(name));
+                return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid PID.", strna(name));
 
         /* When both leader and leaderProcessId are specified, they must be consistent with each other. */
-        if (pidref_is_set(leader) && !pidref_equal(leader, &temp))
-                return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' conflicts with already dispatched leader PID.", strna(name));
+        if (pidref_is_set(pidref) && !pidref_equal(pidref, &temp))
+                return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' conflicts with already dispatched PID.", strna(name));
 
-        pidref_done(leader);
-        *leader = TAKE_PIDREF(temp);
+        pidref_done(pidref);
+        *pidref = TAKE_PIDREF(temp);
 
         return 0;
 }
@@ -128,18 +128,20 @@ int vl_method_register(sd_varlink *link, sd_json_variant *parameters, sd_varlink
         int r;
 
         static const sd_json_dispatch_field dispatch_table[] = {
-                { "name",              SD_JSON_VARIANT_STRING,        machine_name,             offsetof(Machine, name),                 SD_JSON_MANDATORY },
-                { "id",                SD_JSON_VARIANT_STRING,        sd_json_dispatch_id128,   offsetof(Machine, id),                   0                 },
-                { "service",           SD_JSON_VARIANT_STRING,        sd_json_dispatch_string,  offsetof(Machine, service),              0                 },
-                { "class",             SD_JSON_VARIANT_STRING,        dispatch_machine_class,   offsetof(Machine, class),                SD_JSON_MANDATORY },
-                { "leader",            _SD_JSON_VARIANT_TYPE_INVALID, machine_leader,           offsetof(Machine, leader),               SD_JSON_STRICT    },
-                { "leaderProcessId",   SD_JSON_VARIANT_OBJECT,        machine_leader,           offsetof(Machine, leader),               SD_JSON_STRICT    },
-                { "rootDirectory",     SD_JSON_VARIANT_STRING,        json_dispatch_path,       offsetof(Machine, root_directory),       0                 },
-                { "ifIndices",         SD_JSON_VARIANT_ARRAY,         machine_ifindices,        0,                                       0                 },
-                { "vSockCid",          _SD_JSON_VARIANT_TYPE_INVALID, machine_cid,              offsetof(Machine, vsock_cid),            0                 },
-                { "sshAddress",        SD_JSON_VARIANT_STRING,        sd_json_dispatch_string,  offsetof(Machine, ssh_address),          SD_JSON_STRICT    },
-                { "sshPrivateKeyPath", SD_JSON_VARIANT_STRING,        json_dispatch_path,       offsetof(Machine, ssh_private_key_path), 0                 },
-                { "allocateUnit",      SD_JSON_VARIANT_BOOLEAN,       sd_json_dispatch_stdbool, offsetof(Machine, allocate_unit),        0                 },
+                { "name",                SD_JSON_VARIANT_STRING,        machine_name,             offsetof(Machine, name),                 SD_JSON_MANDATORY },
+                { "id",                  SD_JSON_VARIANT_STRING,        sd_json_dispatch_id128,   offsetof(Machine, id),                   0                 },
+                { "service",             SD_JSON_VARIANT_STRING,        sd_json_dispatch_string,  offsetof(Machine, service),              0                 },
+                { "class",               SD_JSON_VARIANT_STRING,        dispatch_machine_class,   offsetof(Machine, class),                SD_JSON_MANDATORY },
+                { "leader",              _SD_JSON_VARIANT_TYPE_INVALID, machine_pidref,           offsetof(Machine, leader),               SD_JSON_STRICT    },
+                { "leaderProcessId",     SD_JSON_VARIANT_OBJECT,        machine_pidref,           offsetof(Machine, leader),               SD_JSON_STRICT    },
+                { "supervisor",          _SD_JSON_VARIANT_TYPE_INVALID, machine_pidref,           offsetof(Machine, supervisor),           SD_JSON_STRICT    },
+                { "supervisorProcessId", SD_JSON_VARIANT_OBJECT,        machine_pidref,           offsetof(Machine, supervisor),           SD_JSON_STRICT    },
+                { "rootDirectory",       SD_JSON_VARIANT_STRING,        json_dispatch_path,       offsetof(Machine, root_directory),       0                 },
+                { "ifIndices",           SD_JSON_VARIANT_ARRAY,         machine_ifindices,        0,                                       0                 },
+                { "vSockCid",            _SD_JSON_VARIANT_TYPE_INVALID, machine_cid,              offsetof(Machine, vsock_cid),            0                 },
+                { "sshAddress",          SD_JSON_VARIANT_STRING,        sd_json_dispatch_string,  offsetof(Machine, ssh_address),          SD_JSON_STRICT    },
+                { "sshPrivateKeyPath",   SD_JSON_VARIANT_STRING,        json_dispatch_path,       offsetof(Machine, ssh_private_key_path), 0                 },
+                { "allocateUnit",        SD_JSON_VARIANT_BOOLEAN,       sd_json_dispatch_stdbool, offsetof(Machine, allocate_unit),        0                 },
                 VARLINK_DISPATCH_POLKIT_FIELD,
                 {}
         };
@@ -168,6 +170,18 @@ int vl_method_register(sd_varlink *link, sd_json_variant *parameters, sd_varlink
                         return r;
         }
 
+        if (!pidref_is_set(&machine->supervisor)) {
+                _cleanup_(pidref_done) PidRef client_pidref = PIDREF_NULL;
+
+                r = varlink_get_peer_pidref(link, &client_pidref);
+                if (r < 0)
+                        return r;
+
+                /* If the client process is not the leader, then make it the supervisor */
+                if (!pidref_equal(&client_pidref, &machine->leader))
+                        machine->supervisor = TAKE_PIDREF(client_pidref);
+        }
+
         r = sd_varlink_get_peer_uid(link, &machine->uid);
         if (r < 0)
                 return r;
index 91c14501842a25a9c6749ce440c12d648e87cdd7..0a8db2dfa82280e6215316caf92a88057b2327d9 100644 (file)
@@ -62,6 +62,7 @@ int machine_new(MachineClass class, const char *name, Machine **ret) {
         *m = (Machine) {
                 .class = class,
                 .leader = PIDREF_NULL,
+                .supervisor = PIDREF_NULL,
                 .vsock_cid = VMADDR_CID_ANY,
         };
 
@@ -131,6 +132,9 @@ Machine* machine_free(Machine *m) {
                 pidref_done(&m->leader);
         }
 
+        m->supervisor_pidfd_event_source = sd_event_source_disable_unref(m->supervisor_pidfd_event_source);
+        pidref_done(&m->supervisor);
+
         sd_bus_message_unref(m->create_message);
 
         free(m->name);
@@ -205,6 +209,13 @@ int machine_save(Machine *m) {
                         fprintf(f, "LEADER_PIDFDID=%" PRIu64 "\n", m->leader.fd_id);
         }
 
+        if (pidref_is_set(&m->supervisor)) {
+                fprintf(f, "SUPERVISOR=" PID_FMT "\n", m->supervisor.pid);
+                (void) pidref_acquire_pidfd_id(&m->supervisor);
+                if (m->supervisor.fd_id != 0)
+                        fprintf(f, "SUPERVISOR_PIDFDID=%" PRIu64 "\n", m->supervisor.fd_id);
+        }
+
         if (m->class != _MACHINE_CLASS_INVALID)
                 fprintf(f, "CLASS=%s\n", machine_class_to_string(m->class));
 
@@ -261,8 +272,41 @@ static void machine_unlink(Machine *m) {
                 (void) unlink(m->state_file);
 }
 
+static void parse_pid_and_pidfdid(
+                PidRef *pidref,
+                const char *pid,
+                const char *pidfdid,
+                const char *name) {
+
+        int r;
+
+        assert(pidref);
+        assert(name);
+
+        pidref_done(pidref);
+
+        if (!pid)
+                return;
+        r = pidref_set_pidstr(pidref, pid);
+        if (r < 0)
+                return (void) log_debug_errno(r, "Failed to set %s PID to '%s', ignoring: %m", name, pid);
+
+        if (!pidfdid)
+                return;
+        uint64_t fd_id;
+        r = safe_atou64(pidfdid, &fd_id);
+        if (r < 0)
+                return (void) log_warning_errno(r, "Failed to parse %s pidfd ID, ignoring: %s", name, pidfdid);
+        (void) pidref_acquire_pidfd_id(pidref);
+        if (fd_id != pidref->fd_id) {
+                log_debug("PID of %s got recycled, ignoring.", name);
+                pidref_done(pidref);
+        }
+}
+
 int machine_load(Machine *m) {
-        _cleanup_free_ char *name = NULL, *realtime = NULL, *monotonic = NULL, *id = NULL, *leader = NULL, *leader_pidfdid = NULL,
+        _cleanup_free_ char *name = NULL, *realtime = NULL, *monotonic = NULL, *id = NULL,
+                *leader = NULL, *leader_pidfdid = NULL, *supervisor = NULL, *supervisor_pidfdid = NULL,
                 *class = NULL, *netif = NULL, *vsock_cid = NULL, *uid = NULL;
         int r;
 
@@ -281,6 +325,8 @@ int machine_load(Machine *m) {
                            "ID",                   &id,
                            "LEADER",               &leader,
                            "LEADER_PIDFDID",       &leader_pidfdid,
+                           "SUPERVISOR",           &supervisor,
+                           "SUPERVISOR_PIDFDID",   &supervisor_pidfdid,
                            "CLASS",                &class,
                            "REALTIME",             &realtime,
                            "MONOTONIC",            &monotonic,
@@ -300,26 +346,8 @@ int machine_load(Machine *m) {
         if (id)
                 (void) sd_id128_from_string(id, &m->id);
 
-        pidref_done(&m->leader);
-        if (leader) {
-                r = pidref_set_pidstr(&m->leader, leader);
-                if (r < 0)
-                        log_debug_errno(r, "Failed to set leader PID to '%s', ignoring: %m", leader);
-                else if (leader_pidfdid) {
-                        uint64_t fd_id;
-                        r = safe_atou64(leader_pidfdid, &fd_id);
-                        if (r < 0)
-                                log_warning_errno(r, "Failed to parse leader pidfd ID, ignoring: %s", leader_pidfdid);
-                        else {
-                                (void) pidref_acquire_pidfd_id(&m->leader);
-
-                                if (fd_id != m->leader.fd_id) {
-                                        log_debug("Leader PID got recycled, ignoring.");
-                                        pidref_done(&m->leader);
-                                }
-                        }
-                }
-        }
+        parse_pid_and_pidfdid(&m->leader, leader, leader_pidfdid, "leader");
+        parse_pid_and_pidfdid(&m->supervisor, supervisor, supervisor_pidfdid, "supervisor");
 
         if (class) {
                 MachineClass c = machine_class_from_string(class);
@@ -523,25 +551,35 @@ static int machine_dispatch_leader_pidfd(sd_event_source *s, int fd, unsigned re
         return 0;
 }
 
-static int machine_watch_pidfd(Machine *m) {
+static int machine_dispatch_supervisor_pidfd(sd_event_source *s, int fd, unsigned revents, void *userdata) {
+        Machine *m = ASSERT_PTR(userdata);
+
+        m->supervisor_pidfd_event_source = sd_event_source_disable_unref(m->supervisor_pidfd_event_source);
+        machine_add_to_gc_queue(m);
+
+        return 0;
+}
+
+static int machine_watch_pidfd(Machine *m, PidRef *pidref, sd_event_source **source, sd_event_io_handler_t cb) {
         int r;
 
         assert(m);
         assert(m->manager);
-        assert(pidref_is_set(&m->leader));
-        assert(!m->leader_pidfd_event_source);
+        assert(source);
+        assert(!*source);
+        assert(cb);
 
-        if (m->leader.fd < 0)
+        if (!pidref_is_set(pidref) || pidref->fd < 0)
                 return 0;
 
-        /* If we have a pidfd for the leader, let's also track it for POLLIN, and GC the machine
+        /* If we have a pidfd for the leader or supervisor, let's also track it for POLLIN, and GC the machine
          * automatically if it dies */
 
-        r = sd_event_add_io(m->manager->event, &m->leader_pidfd_event_source, m->leader.fd, EPOLLIN, machine_dispatch_leader_pidfd, m);
+        r = sd_event_add_io(m->manager->event, source, pidref->fd, EPOLLIN, cb, m);
         if (r < 0)
                 return r;
 
-        (void) sd_event_source_set_description(m->leader_pidfd_event_source, "machine-pidfd");
+        (void) sd_event_source_set_description(*source, "machine-pidfd");
 
         return 0;
 }
@@ -561,7 +599,11 @@ int machine_start(Machine *m, sd_bus_message *properties, sd_bus_error *error) {
         if (r < 0)
                 return r;
 
-        r = machine_watch_pidfd(m);
+        r = machine_watch_pidfd(m, &m->leader, &m->leader_pidfd_event_source, machine_dispatch_leader_pidfd);
+        if (r < 0)
+                return r;
+
+        r = machine_watch_pidfd(m, &m->supervisor, &m->supervisor_pidfd_event_source, machine_dispatch_supervisor_pidfd);
         if (r < 0)
                 return r;
 
@@ -598,6 +640,7 @@ int machine_stop(Machine *m) {
                 return -EOPNOTSUPP;
 
         if (m->unit && !m->subgroup) {
+                /* If the machine runs as its own unit, then we'll terminate that */
                 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
                 char *job = NULL;
 
@@ -606,7 +649,14 @@ int machine_stop(Machine *m) {
                         return log_error_errno(r, "Failed to stop machine unit: %s", bus_error_message(&error, r));
 
                 free_and_replace(m->scope_job, job);
-        }
+
+        } else if (pidref_is_set(&m->supervisor)) {
+                /* Otherwise, send a friendly SIGTERM to the supervisor */
+                r = pidref_kill(&m->supervisor, SIGTERM);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to kill supervisor process " PID_FMT " of machine '%s': %m", m->supervisor.pid, m->name);
+        } else
+                return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Don't know how to terminate machine '%s'.", m->name);
 
         m->stopping = true;
 
@@ -714,14 +764,23 @@ int machine_kill(Machine *m, KillWhom whom, int signo) {
         if (!IN_SET(m->class, MACHINE_VM, MACHINE_CONTAINER))
                 return -EOPNOTSUPP;
 
-        if (whom == KILL_LEADER) /* If we shall simply kill the leader, do so directly */
+        switch (whom) {
+
+        case KILL_LEADER:
                 return pidref_kill(&m->leader, signo);
 
-        if (!m->unit)
-                return -ESRCH;
+        case KILL_SUPERVISOR:
+                return pidref_kill(&m->supervisor, signo);
+
+        case KILL_ALL:
+                if (!m->unit)
+                        return -ESRCH;
+
+                return manager_kill_unit(m->manager, m->unit, m->subgroup, signo, /* error= */ NULL);
 
-        /* Otherwise, make PID 1 do it for us, for the entire cgroup */
-        return manager_kill_unit(m->manager, m->unit, m->subgroup, signo, /* error= */ NULL);
+        default:
+                assert_not_reached();
+        }
 }
 
 int machine_openpt(Machine *m, int flags, char **ret_peer) {
@@ -1504,8 +1563,9 @@ static const char* const machine_state_table[_MACHINE_STATE_MAX] = {
 DEFINE_STRING_TABLE_LOOKUP(machine_state, MachineState);
 
 static const char* const kill_whom_table[_KILL_WHOM_MAX] = {
-        [KILL_LEADER] = "leader",
-        [KILL_ALL] = "all"
+        [KILL_LEADER]     = "leader",
+        [KILL_SUPERVISOR] = "supervisor",
+        [KILL_ALL]        = "all",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(kill_whom, KillWhom);
index dddc0c8000cb823af94233ce35c4e307721b91fd..dda1612916c7841dc0855daf9912f5148465c62e 100644 (file)
@@ -27,6 +27,7 @@ typedef enum MachineClass {
 
 typedef enum KillWhom {
         KILL_LEADER,
+        KILL_SUPERVISOR,
         KILL_ALL,
         _KILL_WHOM_MAX,
         _KILL_WHOM_INVALID = -EINVAL,
@@ -50,8 +51,16 @@ typedef struct Machine {
         char *subgroup;
         char *scope_job;
 
-        PidRef leader;
-        sd_event_source *leader_pidfd_event_source;
+        /* Leader: the top-level process that encapsulates the machine itself. For containers that's PID 1,
+         * for VMs that's qemu or whatever process wraps the actual VM code. This process defines the runtime
+         * lifecycle of the machine. In case of containers we can use this reference to enter namespaces,
+         * send signals and so on.
+         *
+         * Supervisor: the process that supervises the machine, if there is any and if that process is
+         * responsible for a single machine. Sending SIGTERM to this process should (non-cooperatively)
+         * terminate the machine. */
+        PidRef leader, supervisor;
+        sd_event_source *leader_pidfd_event_source, *supervisor_pidfd_event_source;
 
         dual_timestamp timestamp;
 
index 82c0addefbe7f9b575d4263d4d8e905076261224..72141b8116dbe9d88940988225417e12ee27f88a 100644 (file)
@@ -233,7 +233,7 @@ static int method_create_or_register_machine(
                 Machine **ret,
                 sd_bus_error *error) {
 
-        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
+        _cleanup_(pidref_done) PidRef leader_pidref = PIDREF_NULL, supervisor_pidref = PIDREF_NULL;
         const char *name, *service, *class, *root_directory;
         const int32_t *netif = NULL;
         MachineClass c;
@@ -289,13 +289,23 @@ static int method_create_or_register_machine(
                 return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Root directory must be empty or an absolute path");
 
         if (leader == 0) {
-                r = bus_query_sender_pidref(message, &pidref);
+                /* If no PID is specified, the client is the leader */
+                r = bus_query_sender_pidref(message, &leader_pidref);
                 if (r < 0)
                         return sd_bus_error_set_errnof(error, r, "Failed to pin client process: %m");
         } else {
-                r = pidref_set_pid(&pidref, leader);
+                /* If a PID is specified that's the leader, but if the client process is different from it, than that's the supervisor */
+                r = pidref_set_pid(&leader_pidref, leader);
                 if (r < 0)
                         return sd_bus_error_set_errnof(error, r, "Failed to pin process " PID_FMT ": %m", (pid_t) leader);
+
+                _cleanup_(pidref_done) PidRef client_pidref = PIDREF_NULL;
+                r = bus_query_sender_pidref(message, &client_pidref);
+                if (r < 0)
+                        return sd_bus_error_set_errnof(error, r, "Failed to pin client process: %m");
+
+                if (!pidref_equal(&client_pidref, &leader_pidref))
+                        supervisor_pidref = TAKE_PIDREF(client_pidref);
         }
 
         if (hashmap_get(manager->machines, name))
@@ -332,7 +342,8 @@ static int method_create_or_register_machine(
         if (r < 0)
                 return r;
 
-        m->leader = TAKE_PIDREF(pidref);
+        m->leader = TAKE_PIDREF(leader_pidref);
+        m->supervisor = TAKE_PIDREF(supervisor_pidref);
         m->class = c;
         m->id = id;
         m->uid = uid;
index 7dac3cb0d20bdc8f151c37020305ce64d0ec2b81..52b1fc12d290cdf7d32c85bf1ded906e6964e381 100644 (file)
@@ -479,6 +479,7 @@ static int list_machine_one_and_maybe_read_metadata(sd_varlink *link, Machine *m
                         JSON_BUILD_PAIR_STRING_NON_EMPTY("unit", m->unit),
                         JSON_BUILD_PAIR_STRING_NON_EMPTY("subgroup", m->subgroup),
                         SD_JSON_BUILD_PAIR_CONDITION(pidref_is_set(&m->leader), "leader", JSON_BUILD_PIDREF(&m->leader)),
+                        SD_JSON_BUILD_PAIR_CONDITION(pidref_is_set(&m->supervisor), "supervisor", JSON_BUILD_PIDREF(&m->supervisor)),
                         SD_JSON_BUILD_PAIR_CONDITION(dual_timestamp_is_set(&m->timestamp), "timestamp", JSON_BUILD_DUAL_TIMESTAMP(&m->timestamp)),
                         JSON_BUILD_PAIR_UNSIGNED_NOT_EQUAL("vSockCid", m->vsock_cid, VMADDR_CID_ANY),
                         JSON_BUILD_PAIR_STRING_NON_EMPTY("sshAddress", m->ssh_address),
index ad7dff228f16e1af0c967c33579aa5cace22d920..d37f672d051c2baae4bf032a2db88a8a51c1fe4a 100644 (file)
@@ -38,6 +38,10 @@ static SD_VARLINK_DEFINE_METHOD(
                 SD_VARLINK_DEFINE_INPUT(leader,                  SD_VARLINK_INT,    SD_VARLINK_NULLABLE),
                 SD_VARLINK_FIELD_COMMENT("The leader PID as ProcessId structure. If both the leader and leaderProcessId parameters are specified they must reference the same process. Typically one would only specify one or the other however. It's generally recommended to specify leaderProcessId as it references a process in a robust way without risk of identifier recycling."),
                 SD_VARLINK_DEFINE_INPUT_BY_TYPE(leaderProcessId, ProcessId,         SD_VARLINK_NULLABLE),
+                SD_VARLINK_FIELD_COMMENT("The supervisor PID as simple positive integer."),
+                SD_VARLINK_DEFINE_INPUT(supervisor,              SD_VARLINK_INT,    SD_VARLINK_NULLABLE),
+                SD_VARLINK_FIELD_COMMENT("The supervisor PID as ProcessId structure. If both the supervisor and supervisorProcessId parameters are specified they must reference the same process. Typically only one or the other would be specified. It's generally recommended to specify supervisorProcessId as it references a process in a robust way without risk of identifier recycling."),
+                SD_VARLINK_DEFINE_INPUT_BY_TYPE(supervisorProcessId, ProcessId,         SD_VARLINK_NULLABLE),
                 SD_VARLINK_DEFINE_INPUT(rootDirectory,           SD_VARLINK_STRING, SD_VARLINK_NULLABLE),
                 SD_VARLINK_DEFINE_INPUT(ifIndices,               SD_VARLINK_INT,    SD_VARLINK_ARRAY|SD_VARLINK_NULLABLE),
                 SD_VARLINK_DEFINE_INPUT(vSockCid,                SD_VARLINK_INT,    SD_VARLINK_NULLABLE),
@@ -58,7 +62,7 @@ static SD_VARLINK_DEFINE_METHOD(
 static SD_VARLINK_DEFINE_METHOD(
                 Kill,
                 VARLINK_DEFINE_MACHINE_LOOKUP_AND_POLKIT_INPUT_FIELDS,
-                SD_VARLINK_FIELD_COMMENT("Identifier that specifies what precisely to send the signal to (either 'leader' or 'all')."),
+                SD_VARLINK_FIELD_COMMENT("Identifier that specifies what precisely to send the signal to (either 'leader', 'supervisor', or 'all')."),
                 SD_VARLINK_DEFINE_INPUT(whom, SD_VARLINK_STRING, SD_VARLINK_NULLABLE),
                 SD_VARLINK_FIELD_COMMENT("Numeric UNIX signal integer."),
                 SD_VARLINK_DEFINE_INPUT(signal, SD_VARLINK_INT, 0));
@@ -79,6 +83,8 @@ static SD_VARLINK_DEFINE_METHOD_FULL(
                 SD_VARLINK_DEFINE_OUTPUT(class, SD_VARLINK_STRING, 0),
                 SD_VARLINK_FIELD_COMMENT("Leader process PID of this machine"),
                 SD_VARLINK_DEFINE_OUTPUT_BY_TYPE(leader, ProcessId, SD_VARLINK_NULLABLE),
+                SD_VARLINK_FIELD_COMMENT("Supervisor process PID of this machine"),
+                SD_VARLINK_DEFINE_OUTPUT_BY_TYPE(supervisor, ProcessId, SD_VARLINK_NULLABLE),
                 SD_VARLINK_FIELD_COMMENT("Root directory of this machine, if known, relative to host file system"),
                 SD_VARLINK_DEFINE_OUTPUT(rootDirectory, SD_VARLINK_STRING, SD_VARLINK_NULLABLE),
                 SD_VARLINK_FIELD_COMMENT("The service manager unit this machine resides in"),