]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
nspawn: do basic port to PidRef
authorLennart Poettering <lennart@poettering.net>
Fri, 23 May 2025 15:22:40 +0000 (17:22 +0200)
committerLennart Poettering <lennart@poettering.net>
Fri, 6 Jun 2025 22:28:07 +0000 (00:28 +0200)
THis is sometimes a bit superficial, but in many cases allows us to use
pidfd for various of our operations.

src/nspawn/nspawn-cgroup.c
src/nspawn/nspawn-cgroup.h
src/nspawn/nspawn-network.c
src/nspawn/nspawn-network.h
src/nspawn/nspawn-register.c
src/nspawn/nspawn-register.h
src/nspawn/nspawn.c

index 98450b6f1dcfefb381e0e764a1a0a3643c752280..7cef863c93f0bd4e1af236256a2ff3925fa6e313 100644 (file)
@@ -15,6 +15,7 @@
 #include "nspawn-cgroup.h"
 #include "nsresource.h"
 #include "path-util.h"
+#include "pidref.h"
 #include "string-util.h"
 #include "strv.h"
 
@@ -45,7 +46,7 @@ static int chown_cgroup_path(const char *path, uid_t uid_shift) {
 }
 
 int create_subcgroup(
-                pid_t pid,
+                const PidRef *pid,
                 bool keep_unit,
                 uid_t uid_shift,
                 int userns_fd,
@@ -55,7 +56,8 @@ int create_subcgroup(
         CGroupMask supported;
         int r;
 
-        assert(pid > 1);
+        assert(pidref_is_set(pid));
+        assert(pid->pid > 1);
         assert((userns_fd >= 0) == (userns_mode == USER_NAMESPACE_MANAGED));
 
         /* In the unified hierarchy inner nodes may only contain subgroups, but not processes. Hence, if we running in
@@ -75,7 +77,7 @@ int create_subcgroup(
         if (keep_unit)
                 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
         else
-                r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
+                r = cg_pidref_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
         if (r < 0)
                 return log_error_errno(r, "Failed to get our control group: %m");
 
@@ -89,7 +91,7 @@ int create_subcgroup(
                 return log_oom();
 
         if (userns_mode != USER_NAMESPACE_MANAGED)
-                r = cg_create_and_attach(payload, pid);
+                r = cg_create_and_attach(payload, pid->pid);
         else
                 r = cg_create(payload);
         if (r < 0)
@@ -102,9 +104,9 @@ int create_subcgroup(
                 if (cgroup_fd < 0)
                         return log_error_errno(cgroup_fd, "Failed to open cgroup %s: %m", payload);
 
-                r = cg_fd_attach(cgroup_fd, pid);
+                r = cg_fd_attach(cgroup_fd, pid->pid);
                 if (r < 0)
-                        return log_error_errno(r, "Failed to add process " PID_FMT " to cgroup %s: %m", pid, payload);
+                        return log_error_errno(r, "Failed to add process " PID_FMT " to cgroup %s: %m", pid->pid, payload);
 
                 r = nsresource_add_cgroup(userns_fd, cgroup_fd);
                 if (r < 0)
index 57c973772881a9223883ba1514601933d75503bb..8d61513ad52f1ec166dd71b8274277e294b16b4d 100644 (file)
@@ -5,7 +5,7 @@
 #include "nspawn-settings.h"
 
 int create_subcgroup(
-                pid_t pid,
+                const PidRef *pid,
                 bool keep_unit,
                 uid_t uid_shift,
                 int userns_fd,
index afd5294d04a12e1627420efaec679252c3136505..e828d0f14138a2da7a6cde2731c095a0faaecdc7 100644 (file)
@@ -24,6 +24,7 @@
 #include "netif-util.h"
 #include "netlink-util.h"
 #include "nspawn-network.h"
+#include "pidref.h"
 #include "process-util.h"
 #include "socket-util.h"
 #include "stat-util.h"
@@ -86,7 +87,7 @@ static int set_alternative_ifname(sd_netlink *rtnl, const char *ifname, const ch
 
 static int add_veth(
                 sd_netlink *rtnl,
-                pid_t pid,
+                const PidRef *pid,
                 const char *ifname_host,
                 const char *altifname_host,
                 const struct ether_addr *mac_host,
@@ -97,6 +98,7 @@ static int add_veth(
         int r;
 
         assert(rtnl);
+        assert(pidref_is_set(pid));
         assert(ifname_host);
         assert(mac_host);
         assert(ifname_container);
@@ -134,7 +136,7 @@ static int add_veth(
         if (r < 0)
                 return log_error_errno(r, "Failed to add netlink MAC address: %m");
 
-        r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid);
+        r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid->pid);
         if (r < 0)
                 return log_error_errno(r, "Failed to add netlink namespace field: %m");
 
@@ -160,7 +162,7 @@ static int add_veth(
 }
 
 int setup_veth(const char *machine_name,
-               pid_t pid,
+               const PidRef *pid,
                char iface_name[IFNAMSIZ],
                bool bridge,
                const struct ether_addr *provided_mac) {
@@ -172,7 +174,7 @@ int setup_veth(const char *machine_name,
         int r;
 
         assert(machine_name);
-        assert(pid > 0);
+        assert(pidref_is_set(pid));
         assert(iface_name);
 
         /* Use two different interface name prefixes depending whether
@@ -212,7 +214,7 @@ int setup_veth(const char *machine_name,
 
 int setup_veth_extra(
                 const char *machine_name,
-                pid_t pid,
+                const PidRef *pid,
                 char **pairs) {
 
         _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
@@ -220,7 +222,7 @@ int setup_veth_extra(
         int r;
 
         assert(machine_name);
-        assert(pid > 0);
+        assert(pidref_is_set(pid));
 
         if (strv_isempty(pairs))
                 return 0;
@@ -727,11 +729,13 @@ int move_back_network_interfaces(int child_netns_fd, char **interface_pairs) {
         return 0;
 }
 
-int setup_macvlan(const char *machine_name, pid_t pid, char **iface_pairs) {
+int setup_macvlan(const char *machine_name, const PidRef *pid, char **iface_pairs) {
         _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
         unsigned idx = 0;
         int r;
 
+        assert(pidref_is_set(pid));
+
         if (strv_isempty(iface_pairs))
                 return 0;
 
@@ -775,7 +779,7 @@ int setup_macvlan(const char *machine_name, pid_t pid, char **iface_pairs) {
                 if (r < 0)
                         return log_error_errno(r, "Failed to add netlink MAC address: %m");
 
-                r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid);
+                r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid->pid);
                 if (r < 0)
                         return log_error_errno(r, "Failed to add netlink namespace field: %m");
 
@@ -860,10 +864,12 @@ int remove_macvlan(int child_netns_fd, char **interface_pairs) {
         return 0;
 }
 
-int setup_ipvlan(const char *machine_name, pid_t pid, char **iface_pairs) {
+int setup_ipvlan(const char *machine_name, const PidRef *pid, char **iface_pairs) {
         _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
         int r;
 
+        assert(pidref_is_set(pid));
+
         if (strv_isempty(iface_pairs))
                 return 0;
 
@@ -898,7 +904,7 @@ int setup_ipvlan(const char *machine_name, pid_t pid, char **iface_pairs) {
                 if (r < 0)
                         return log_error_errno(r, "Failed to add netlink interface name: %m");
 
-                r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid);
+                r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid->pid);
                 if (r < 0)
                         return log_error_errno(r, "Failed to add netlink namespace field: %m");
 
index 4bf3dab52b410003da0a443eab0f488792433b0d..692c40108cbb0c41ccb9f3741d77116f120650bb 100644 (file)
@@ -8,15 +8,15 @@
 int test_network_interfaces_initialized(char **iface_pairs);
 int resolve_network_interface_names(char **iface_pairs);
 
-int setup_veth(const char *machine_name, pid_t pid, char iface_name[IFNAMSIZ], bool bridge, const struct ether_addr *provided_mac);
-int setup_veth_extra(const char *machine_name, pid_t pid, char **pairs);
+int setup_veth(const char *machine_name, const PidRef *pid, char iface_name[IFNAMSIZ], bool bridge, const struct ether_addr *provided_mac);
+int setup_veth_extra(const char *machine_name, const PidRef *pid, char **pairs);
 
 int setup_bridge(const char *veth_name, const char *bridge_name, bool create);
 int remove_bridge(const char *bridge_name);
 
-int setup_macvlan(const char *machine_name, pid_t pid, char **iface_pairs);
+int setup_macvlan(const char *machine_name, const PidRef *pid, char **iface_pairs);
 int remove_macvlan(int child_netns_fd, char **interface_pairs);
-int setup_ipvlan(const char *machine_name, pid_t pid, char **iface_pairs);
+int setup_ipvlan(const char *machine_name, const PidRef *pid, char **iface_pairs);
 
 int move_network_interfaces(int netns_fd, char **iface_pairs);
 int move_back_network_interfaces(int child_netns_fd, char **interface_pairs);
index 750cb5e8748f83cf5ed06b7e2df6339475e91052..b746d095300f8918e0c39068007e5c9641d2d79e 100644 (file)
@@ -134,7 +134,7 @@ static int can_set_coredump_receive(sd_bus *bus) {
 int register_machine(
                 sd_bus *bus,
                 const char *machine_name,
-                pid_t pid,
+                const PidRef *pid,
                 const char *directory,
                 sd_id128_t uuid,
                 int local_ifindex,
@@ -165,7 +165,7 @@ int register_machine(
                                 SD_BUS_MESSAGE_APPEND_ID128(uuid),
                                 service,
                                 "container",
-                                (uint32_t) pid,
+                                pidref_is_set(pid) ? (uint32_t) pid->pid : 0,
                                 strempty(directory),
                                 local_ifindex > 0 ? 1 : 0, local_ifindex);
         } else {
@@ -182,7 +182,7 @@ int register_machine(
                                 SD_BUS_MESSAGE_APPEND_ID128(uuid),
                                 service,
                                 "container",
-                                (uint32_t) pid,
+                                pidref_is_set(pid) ? (uint32_t) pid->pid : 0,
                                 strempty(directory),
                                 local_ifindex > 0 ? 1 : 0, local_ifindex);
                 if (r < 0)
@@ -252,7 +252,7 @@ int unregister_machine(
 int allocate_scope(
                 sd_bus *bus,
                 const char *machine_name,
-                pid_t pid,
+                const PidRef* pid,
                 const char *slice,
                 CustomMount *mounts,
                 unsigned n_mounts,
@@ -294,12 +294,7 @@ int allocate_scope(
 
         description = strjoina("Container ", machine_name);
 
-        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
-        r = pidref_set_pid(&pidref, pid);
-        if (r < 0)
-                return log_error_errno(r, "Failed to allocate PID reference: %m");
-
-        r = bus_append_scope_pidref(m, &pidref, FLAGS_SET(flags, ALLOCATE_SCOPE_ALLOW_PIDFD));
+        r = bus_append_scope_pidref(m, pid, FLAGS_SET(flags, ALLOCATE_SCOPE_ALLOW_PIDFD));
         if (r < 0)
                 return bus_log_create_error(r);
 
index 83c134dc6e67b909dd7cab5fd36a6c5e5cb10e0f..bbb0d9124e5c807dd4c6bdbac2c4aaad56b2f8d7 100644 (file)
@@ -11,7 +11,7 @@ typedef enum RegisterMachineFlags {
 int register_machine(
                 sd_bus *bus,
                 const char *machine_name,
-                pid_t pid,
+                const PidRef *pid,
                 const char *directory,
                 sd_id128_t uuid,
                 int local_ifindex,
@@ -32,7 +32,7 @@ typedef enum AllocateScopeFlags {
 int allocate_scope(
                 sd_bus *bus,
                 const char *machine_name,
-                pid_t pid,
+                const PidRef *pid,
                 const char *slice,
                 CustomMount *mounts, unsigned n_mounts,
                 int kill_signal,
index 383fca6448ee91885244c51acac8a3e4e1746604..36b74954829d55f1f096c4b09c65582c81eddbea 100644 (file)
@@ -2890,14 +2890,18 @@ static int recursive_chown(const char *directory, uid_t shift, uid_t range) {
  * That is, success is indicated by a return value of zero, and an
  * error is indicated by a non-zero value.
  */
-static int wait_for_container(pid_t pid, ContainerStatus *container) {
+static int wait_for_container(PidRef *pid, ContainerStatus *container) {
         siginfo_t status;
         int r;
 
-        r = wait_for_terminate(pid, &status);
+        assert(pidref_is_set(pid));
+
+        r = pidref_wait_for_terminate(pid, &status);
         if (r < 0)
                 return log_warning_errno(r, "Failed to wait for container: %m");
 
+        pidref_done(pid);
+
         switch (status.si_code) {
 
         case CLD_EXITED:
@@ -2933,29 +2937,25 @@ static int wait_for_container(pid_t pid, ContainerStatus *container) {
 }
 
 static int on_orderly_shutdown(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
-        pid_t pid;
+        PidRef *pid = ASSERT_PTR(userdata);
 
-        pid = PTR_TO_PID(userdata);
-        if (pid > 0) {
-                if (kill(pid, arg_kill_signal) >= 0) {
+        if (pidref_is_set(pid))
+                if (pidref_kill(pid, arg_kill_signal) >= 0) {
                         log_info("Trying to halt container. Send SIGTERM again to trigger immediate termination.");
                         sd_event_source_set_userdata(s, NULL);
                         return 0;
                 }
-        }
 
         sd_event_exit(sd_event_source_get_event(s), 0);
         return 0;
 }
 
 static int on_sigchld(sd_event_source *s, const struct signalfd_siginfo *ssi, void *userdata) {
-        pid_t pid;
 
         assert(s);
         assert(ssi);
 
-        pid = PTR_TO_PID(userdata);
-
+        PidRef *pid = ASSERT_PTR(userdata);
         for (;;) {
                 siginfo_t si = {};
 
@@ -2963,7 +2963,7 @@ static int on_sigchld(sd_event_source *s, const struct signalfd_siginfo *ssi, vo
                         return log_error_errno(errno, "Failed to waitid(): %m");
                 if (si.si_pid == 0) /* No pending children. */
                         break;
-                if (si.si_pid == pid) {
+                if (si.si_pid == pid->pid) {
                         /* The main process we care for has exited. Return from
                          * signal handler but leave the zombie. */
                         sd_event_exit(sd_event_source_get_event(s), 0);
@@ -2978,15 +2978,13 @@ static int on_sigchld(sd_event_source *s, const struct signalfd_siginfo *ssi, vo
 }
 
 static int on_request_stop(sd_bus_message *m, void *userdata, sd_bus_error *error) {
-        pid_t pid;
+        PidRef *pid = ASSERT_PTR(userdata);
 
         assert(m);
 
-        pid = PTR_TO_PID(userdata);
-
         if (arg_kill_signal > 0) {
                 log_info("Container termination requested. Attempting to halt container.");
-                (void) kill(pid, arg_kill_signal);
+                (void) pidref_kill(pid, arg_kill_signal);
         } else {
                 log_info("Container termination requested. Exiting.");
                 sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), 0);
@@ -3856,7 +3854,6 @@ static int outer_child(
         _cleanup_(bind_user_context_freep) BindUserContext *bind_user_context = NULL;
         _cleanup_strv_free_ char **os_release_pairs = NULL;
         bool idmap = false;
-        pid_t pid;
         ssize_t l;
         int r;
 
@@ -4322,7 +4319,7 @@ static int outer_child(
         if (notify_fd < 0)
                 return notify_fd;
 
-        pid = raw_clone(SIGCHLD|CLONE_NEWNS|
+        pid_t pid = raw_clone(SIGCHLD|CLONE_NEWNS|
                         arg_clone_ns_flags |
                         (IN_SET(arg_userns_mode, USER_NAMESPACE_FIXED, USER_NAMESPACE_PICK) ? CLONE_NEWUSER : 0) |
                         ((arg_private_network && arg_userns_mode == USER_NAMESPACE_MANAGED) ? CLONE_NEWNET : 0));
@@ -4530,7 +4527,7 @@ static int make_uid_map_string(
 }
 
 static int setup_uid_map(
-                pid_t pid,
+                const PidRef *pid,
                 const uid_t bind_user_uid[],
                 size_t n_bind_user_uid) {
 
@@ -4538,13 +4535,14 @@ static int setup_uid_map(
         _cleanup_free_ char *s = NULL;
         int r;
 
-        assert(pid > 1);
+        assert(pidref_is_set(pid));
+        assert(pid->pid > 1);
 
         /* Build the UID map string */
         if (make_uid_map_string(bind_user_uid, n_bind_user_uid, 0, &s) < 0) /* offset=0 contains the UID pair */
                 return log_oom();
 
-        xsprintf(uid_map, "/proc/" PID_FMT "/uid_map", pid);
+        xsprintf(uid_map, "/proc/" PID_FMT "/uid_map", pid->pid);
         r = write_string_file(uid_map, s, WRITE_STRING_FILE_DISABLE_BUFFER);
         if (r < 0)
                 return log_error_errno(r, "Failed to write UID map: %m");
@@ -4554,7 +4552,7 @@ static int setup_uid_map(
         if (make_uid_map_string(bind_user_uid, n_bind_user_uid, 2, &s) < 0) /* offset=2 contains the GID pair */
                 return log_oom();
 
-        xsprintf(uid_map, "/proc/" PID_FMT "/gid_map", pid);
+        xsprintf(uid_map, "/proc/" PID_FMT "/gid_map", pid->pid);
         r = write_string_file(uid_map, s, WRITE_STRING_FILE_DISABLE_BUFFER);
         if (r < 0)
                 return log_error_errno(r, "Failed to write GID map: %m");
@@ -4563,7 +4561,7 @@ static int setup_uid_map(
 }
 
 static int nspawn_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
-        pid_t inner_child_pid = PTR_TO_PID(userdata);
+        PidRef *inner_child_pid = ASSERT_PTR(userdata);
         int r;
 
         assert(userdata);
@@ -4576,7 +4574,7 @@ static int nspawn_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t r
         if (r < 0)
                 return r;
 
-        if (sender_pid.pid != inner_child_pid) {
+        if (!pidref_equal(&sender_pid, inner_child_pid)) {
                 log_debug("Received notify message from process that is not the payload's PID 1. Ignoring.");
                 return 0;
         }
@@ -4605,7 +4603,7 @@ static int nspawn_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t r
         return 0;
 }
 
-static int setup_notify_parent(sd_event *event, int fd, pid_t *inner_child_pid, sd_event_source **notify_event_source) {
+static int setup_notify_parent(sd_event *event, int fd, PidRef *inner_child_pid, sd_event_source **notify_event_source) {
         int r;
 
         if (fd < 0)
@@ -4621,9 +4619,9 @@ static int setup_notify_parent(sd_event *event, int fd, pid_t *inner_child_pid,
 }
 
 static int ptyfwd_hotkey(PTYForward *f, char c, void *userdata) {
-        pid_t pid = PTR_TO_PID(userdata);
+        PidRef *pid = ASSERT_PTR(userdata);
         const char *word;
-        int sig = 0;
+        int sig = 0, r;
 
         assert(f);
 
@@ -4643,8 +4641,9 @@ static int ptyfwd_hotkey(PTYForward *f, char c, void *userdata) {
                 return 0;
         }
 
-        if (kill(pid, sig) < 0)
-                log_error_errno(errno, "Failed to send %s (%s request) to PID 1 of container: %m", signal_to_string(sig), word);
+        r = pidref_kill(pid, sig);
+        if (r < 0)
+                log_error_errno(r, "Failed to send %s (%s request) to PID 1 of container: %m", signal_to_string(sig), word);
         else
                 log_info("Sent %s (%s request) to PID 1 of container.", signal_to_string(sig), word);
 
@@ -5090,7 +5089,7 @@ static int run_container(
                 bool *veth_created,
                 struct ExposeArgs *expose_args,
                 int *master,
-                pid_t *pid,
+                PidRef *pid,
                 int *ret) {
 
         _cleanup_(release_lock_file) LockFile uid_shift_lock = LOCK_FILE_INIT;
@@ -5169,25 +5168,40 @@ static int run_container(
                                                "Path %s doesn't refer to a network namespace, refusing.", arg_network_namespace_path);
         }
 
+        bool in_child;
         if (arg_userns_mode != USER_NAMESPACE_MANAGED) {
                 assert(userns_fd < 0);
                 /* If we have no user namespace then we'll clone and create a new mount namespace right-away. */
 
-                *pid = raw_clone(SIGCHLD|CLONE_NEWNS);
-                if (*pid < 0)
+                pid_t _pid = raw_clone(SIGCHLD|CLONE_NEWNS);
+                if (_pid < 0)
                         return log_error_errno(errno, "clone() failed%s: %m",
                                                errno == EINVAL ?
                                                ", do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in)" : "");
+                if (_pid != 0) {
+                        r = pidref_set_pid(pid, _pid);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to allocate pidfd: %m");
+                }
+
+                in_child = _pid == 0;
         } else {
                 assert(userns_fd >= 0);
                 /* If we have a user namespace then we'll clone() first, and then join the user namespace,
                  * and then open the mount namespace, so that it is owned by the user namespace */
 
-                *pid = raw_clone(SIGCHLD);
-                if (*pid < 0)
+                pid_t _pid = raw_clone(SIGCHLD);
+                if (_pid < 0)
                         return log_error_errno(errno, "clone() failed: %m");
 
-                if (*pid == 0) {
+                if (_pid != 0) {
+                        r = pidref_set_pid(pid, _pid);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to allocate pidfd: %m");
+                }
+
+                in_child = _pid == 0;
+                if (in_child) {
                         if (setns(userns_fd, CLONE_NEWUSER) < 0) {
                                 log_error_errno(errno, "Failed to join allocate user namespace: %m");
                                 _exit(EXIT_FAILURE);
@@ -5206,7 +5220,7 @@ static int run_container(
                 }
         }
 
-        if (*pid == 0) {
+        if (in_child) {
                 /* The outer child only has a file system namespace. */
                 barrier_set_role(&barrier, BARRIER_CHILD);
 
@@ -5288,19 +5302,25 @@ static int run_container(
         }
 
         /* Wait for the outer child. */
-        r = wait_for_terminate_and_check("(sd-namespace)", *pid, WAIT_LOG_ABNORMAL);
+        r = pidref_wait_for_terminate_and_check("(sd-namespace)", pid, WAIT_LOG_ABNORMAL);
         if (r < 0)
                 return r;
+        pidref_done(pid);
         if (r != EXIT_SUCCESS)
                 return -EIO;
 
         /* And now retrieve the PID of the inner child. */
-        l = recv(fd_outer_socket_pair[0], pid, sizeof *pid, 0);
+        pid_t _pid;
+        l = recv(fd_outer_socket_pair[0], &_pid, sizeof _pid, 0);
         if (l < 0)
                 return log_error_errno(errno, "Failed to read inner child PID: %m");
-        if (l != sizeof *pid)
+        if (l != sizeof _pid)
                 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading inner child PID.");
 
+        r = pidref_set_pid(pid, _pid);
+        if (r < 0)
+                return log_error_errno(r, "Failed to allocate pidfd: %m");
+
         /* We also retrieve container UUID in case it was generated by outer child */
         l = recv(fd_outer_socket_pair[0], &arg_uuid, sizeof arg_uuid, 0);
         if (l < 0)
@@ -5314,14 +5334,14 @@ static int run_container(
                 return log_error_errno(notify_socket,
                                        "Failed to receive notification socket from the outer child: %m");
 
-        log_debug("Init process invoked as PID "PID_FMT, *pid);
+        log_debug("Init process invoked as PID "PID_FMT, pid->pid);
 
         if (arg_userns_mode != USER_NAMESPACE_NO) {
                 if (!barrier_place_and_sync(&barrier)) /* #1 */
                         return log_error_errno(SYNTHETIC_ERRNO(ESRCH), "Child died too early.");
 
                 if (arg_userns_mode != USER_NAMESPACE_MANAGED) {
-                        r = setup_uid_map(*pid, bind_user_uid, n_bind_user_uid);
+                        r = setup_uid_map(pid, bind_user_uid, n_bind_user_uid);
                         if (r < 0)
                                 return r;
                 }
@@ -5349,7 +5369,7 @@ static int run_container(
 
                 if (arg_network_veth) {
                         if (arg_userns_mode != USER_NAMESPACE_MANAGED) {
-                                r = setup_veth(arg_machine, *pid, veth_name,
+                                r = setup_veth(arg_machine, pid, veth_name,
                                                arg_network_bridge || arg_network_zone, &arg_network_provided_mac);
                                 if (r < 0)
                                         return r;
@@ -5389,7 +5409,7 @@ static int run_container(
                         }
                 }
 
-                r = setup_veth_extra(arg_machine, *pid, arg_network_veth_extra);
+                r = setup_veth_extra(arg_machine, pid, arg_network_veth_extra);
                 if (r < 0)
                         return r;
 
@@ -5399,11 +5419,11 @@ static int run_container(
                    remove them on its own, since they cannot be referenced by anything yet. */
                 *veth_created = true;
 
-                r = setup_macvlan(arg_machine, *pid, arg_network_macvlan);
+                r = setup_macvlan(arg_machine, pid, arg_network_macvlan);
                 if (r < 0)
                         return r;
 
-                r = setup_ipvlan(arg_machine, *pid, arg_network_ipvlan);
+                r = setup_ipvlan(arg_machine, pid, arg_network_ipvlan);
                 if (r < 0)
                         return r;
         }
@@ -5435,7 +5455,9 @@ static int run_container(
                                 NULL,
                                 "org.freedesktop.systemd1.Scope",
                                 "RequestStop",
-                                on_request_stop, NULL, PID_TO_PTR(*pid));
+                                on_request_stop,
+                                NULL,
+                                pid);
                 if (r < 0)
                         return log_error_errno(r, "Failed to request RequestStop match: %m");
         }
@@ -5446,7 +5468,7 @@ static int run_container(
                 r = register_machine(
                                 bus,
                                 arg_machine,
-                                *pid,
+                                pid,
                                 arg_directory,
                                 arg_uuid,
                                 ifi,
@@ -5466,7 +5488,7 @@ static int run_container(
                 r = allocate_scope(
                                 bus,
                                 arg_machine,
-                                *pid,
+                                pid,
                                 arg_slice,
                                 arg_custom_mounts, arg_n_custom_mounts,
                                 arg_kill_signal,
@@ -5481,7 +5503,7 @@ static int run_container(
                 log_notice("Machine and scope registration turned off, --slice= and --property= settings will have no effect.");
 
         r = create_subcgroup(
-                        *pid,
+                        pid,
                         arg_keep_unit,
                         arg_uid_shift,
                         userns_fd,
@@ -5514,7 +5536,7 @@ static int run_container(
                         return log_error_errno(r, "Failed to attach bus to event loop: %m");
         }
 
-        r = setup_notify_parent(event, notify_socket, PID_TO_PTR(*pid), &notify_event_source);
+        r = setup_notify_parent(event, notify_socket, pid, &notify_event_source);
         if (r < 0)
                 return r;
 
@@ -5541,7 +5563,7 @@ static int run_container(
 
         (void) sd_notifyf(false,
                           "STATUS=Container running.\n"
-                          "X_NSPAWN_LEADER_PID=" PID_FMT, *pid);
+                          "X_NSPAWN_LEADER_PID=" PID_FMT, pid->pid);
         if (!arg_notify_ready) {
                 r = sd_notify(false, "READY=1\n");
                 if (r < 0)
@@ -5559,8 +5581,8 @@ static int run_container(
 
         if (arg_kill_signal > 0) {
                 /* Try to kill the init system on SIGINT or SIGTERM */
-                (void) sd_event_add_signal(event, NULL, SIGINT, on_orderly_shutdown, PID_TO_PTR(*pid));
-                (void) sd_event_add_signal(event, NULL, SIGTERM, on_orderly_shutdown, PID_TO_PTR(*pid));
+                (void) sd_event_add_signal(event, NULL, SIGINT, on_orderly_shutdown, pid);
+                (void) sd_event_add_signal(event, NULL, SIGTERM, on_orderly_shutdown, pid);
         } else {
                 /* Immediately exit */
                 (void) sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
@@ -5574,7 +5596,7 @@ static int run_container(
                 log_debug_errno(r, "Failed allocate memory pressure event source, ignoring: %m");
 
         /* Exit when the child exits */
-        (void) sd_event_add_signal(event, NULL, SIGCHLD, on_sigchld, PID_TO_PTR(*pid));
+        (void) sd_event_add_signal(event, NULL, SIGCHLD, on_sigchld, pid);
 
         /* Retrieve the kmsg fifo allocated by inner child */
         fd_kmsg_fifo = receive_one_fd(fd_inner_socket_pair[0], 0);
@@ -5640,7 +5662,7 @@ static int run_container(
                         (void) pty_forward_set_window_title(forward, GLYPH_BLUE_CIRCLE, /* hostname = */ NULL,
                                                             STRV_MAKE("Container", arg_machine));
 
-                        pty_forward_set_hotkey_handler(forward, ptyfwd_hotkey, PID_TO_PTR(*pid));
+                        pty_forward_set_hotkey_handler(forward, ptyfwd_hotkey, pid);
                         break;
 
                 default:
@@ -5661,7 +5683,7 @@ static int run_container(
                 terminate_scope(bus, arg_machine);
 
         /* Normally redundant, but better safe than sorry */
-        (void) kill(*pid, SIGKILL);
+        (void) pidref_kill(pid, SIGKILL);
 
         fd_kmsg_fifo = safe_close(fd_kmsg_fifo);
 
@@ -5675,7 +5697,7 @@ static int run_container(
                         return r;
         }
 
-        r = wait_for_container(TAKE_PID(*pid), &container_status);
+        r = wait_for_container(pid, &container_status);
 
         /* Tell machined that we are gone. */
         if (arg_register && bus)
@@ -5880,7 +5902,7 @@ static int run(int argc, char *argv[]) {
         _cleanup_(loop_device_unrefp) LoopDevice *loop = NULL;
         _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
         _cleanup_(fw_ctx_freep) FirewallContext *fw_ctx = NULL;
-        pid_t pid = 0;
+        _cleanup_(pidref_done) PidRef pid = PIDREF_NULL;
 
         log_setup();
 
@@ -6391,8 +6413,8 @@ finish:
                          r == 0 && ret == EXIT_FORCE_RESTART ? "STOPPING=1\nSTATUS=Restarting..." :
                                                                "STOPPING=1\nSTATUS=Terminating...");
 
-        if (pid > 0)
-                (void) kill(pid, SIGKILL);
+        if (pidref_is_set(&pid))
+                (void) pidref_kill(&pid, SIGKILL);
 
         /* Try to flush whatever is still queued in the pty */
         if (master >= 0) {
@@ -6400,8 +6422,10 @@ finish:
                 master = safe_close(master);
         }
 
-        if (pid > 0)
-                (void) wait_for_terminate(pid, NULL);
+        if (pidref_is_set(&pid)) {
+                (void) pidref_wait_for_terminate(&pid, NULL);
+                pidref_done(&pid);
+        }
 
         pager_close();