]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/nspawn/nspawn-stub-pid1.c
Merge pull request #17082 from poettering/nspawn-ctty-tweaks
[thirdparty/systemd.git] / src / nspawn / nspawn-stub-pid1.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
7732f92b 2
f5947a5e 3#include <sys/ioctl.h>
7732f92b 4#include <sys/reboot.h>
7732f92b 5#include <sys/wait.h>
75bf701f 6#include <sys/prctl.h>
fe993888 7#include <unistd.h>
7732f92b 8
f2fb2ec9
LP
9#include "def.h"
10#include "exit-status.h"
7732f92b
LP
11#include "fd-util.h"
12#include "log.h"
13#include "nspawn-stub-pid1.h"
14#include "process-util.h"
15#include "signal-util.h"
16#include "time-util.h"
7732f92b 17
75bf701f
LP
18static int reset_environ(const char *new_environment, size_t length) {
19 unsigned long start, end;
20
21 start = (unsigned long) new_environment;
22 end = start + length;
23
24 if (prctl(PR_SET_MM, PR_SET_MM_ENV_START, start, 0, 0) < 0)
25 return -errno;
26
27 if (prctl(PR_SET_MM, PR_SET_MM_ENV_END, end, 0, 0) < 0)
28 return -errno;
29
30 return 0;
31}
32
33int stub_pid1(sd_id128_t uuid) {
7732f92b
LP
34 enum {
35 STATE_RUNNING,
36 STATE_REBOOT,
37 STATE_POWEROFF,
38 } state = STATE_RUNNING;
39
40 sigset_t fullmask, oldmask, waitmask;
41 usec_t quit_usec = USEC_INFINITY;
42 pid_t pid;
43 int r;
44
75bf701f
LP
45 /* The new environment we set up, on the stack. */
46 char new_environment[] =
47 "container=systemd-nspawn\0"
48 "container_uuid=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
49
7732f92b
LP
50 /* Implements a stub PID 1, that reaps all processes and processes a couple of standard signals. This is useful
51 * for allowing arbitrary processes run in a container, and still have all zombies reaped. */
52
53 assert_se(sigfillset(&fullmask) >= 0);
54 assert_se(sigprocmask(SIG_BLOCK, &fullmask, &oldmask) >= 0);
55
56 pid = fork();
57 if (pid < 0)
58 return log_error_errno(errno, "Failed to fork child pid: %m");
59
60 if (pid == 0) {
61 /* Return in the child */
62 assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) >= 0);
efe42662
LP
63
64 if (setsid() < 0)
65 return log_error_errno(errno, "Failed to become session leader in payload process: %m");
66
7732f92b
LP
67 return 0;
68 }
69
70 reset_all_signal_handlers();
71
72 log_close();
7acf581a 73 (void) close_all_fds(NULL, 0);
7732f92b
LP
74 log_open();
75
335d2ead
LP
76 if (ioctl(STDIN_FILENO, TIOCNOTTY) < 0) {
77 if (errno != ENOTTY)
78 log_warning_errno(errno, "Unexpected error from TIOCNOTTY ioctl in init stub process, ignoring: %m");
79 } else
80 log_warning("Expected TIOCNOTTY to fail, but it succeeded in init stub process, ignoring.");
81
75bf701f
LP
82 /* Flush out /proc/self/environ, so that we don't leak the environment from the host into the container. Also,
83 * set $container= and $container_uuid= so that clients in the container that query it from /proc/1/environ
1a012455 84 * find them set. */
75bf701f
LP
85 sd_id128_to_string(uuid, new_environment + sizeof(new_environment) - SD_ID128_STRING_MAX);
86 reset_environ(new_environment, sizeof(new_environment));
87
4c253ed1 88 (void) rename_process("(sd-stubinit)");
7732f92b
LP
89
90 assert_se(sigemptyset(&waitmask) >= 0);
91 assert_se(sigset_add_many(&waitmask,
92 SIGCHLD, /* posix: process died */
93 SIGINT, /* sysv: ctrl-alt-del */
94 SIGRTMIN+3, /* systemd: halt */
95 SIGRTMIN+4, /* systemd: poweroff */
96 SIGRTMIN+5, /* systemd: reboot */
97 SIGRTMIN+6, /* systemd: kexec */
98 SIGRTMIN+13, /* systemd: halt */
99 SIGRTMIN+14, /* systemd: poweroff */
100 SIGRTMIN+15, /* systemd: reboot */
101 SIGRTMIN+16, /* systemd: kexec */
102 -1) >= 0);
103
104 /* Note that we ignore SIGTERM (sysv's reexec), SIGHUP (reload), and all other signals here, since we don't
105 * support reexec/reloading in this stub process. */
106
107 for (;;) {
108 siginfo_t si;
109 usec_t current_usec;
110
111 si.si_pid = 0;
112 r = waitid(P_ALL, 0, &si, WEXITED|WNOHANG);
113 if (r < 0) {
114 r = log_error_errno(errno, "Failed to reap children: %m");
115 goto finish;
116 }
117
118 current_usec = now(CLOCK_MONOTONIC);
119
120 if (si.si_pid == pid || current_usec >= quit_usec) {
121
122 /* The child we started ourselves died or we reached a timeout. */
123
124 if (state == STATE_REBOOT) { /* dispatch a queued reboot */
125 (void) reboot(RB_AUTOBOOT);
126 r = log_error_errno(errno, "Failed to reboot: %m");
127 goto finish;
128
129 } else if (state == STATE_POWEROFF)
130 (void) reboot(RB_POWER_OFF); /* if this fails, fall back to normal exit. */
131
132 if (si.si_pid == pid && si.si_code == CLD_EXITED)
133 r = si.si_status; /* pass on exit code */
134 else
f2fb2ec9 135 r = EXIT_EXCEPTION; /* signal, coredump, timeout, … */
7732f92b
LP
136
137 goto finish;
138 }
139 if (si.si_pid != 0)
140 /* We reaped something. Retry until there's nothing more to reap. */
141 continue;
142
143 if (quit_usec == USEC_INFINITY)
144 r = sigwaitinfo(&waitmask, &si);
145 else {
146 struct timespec ts;
147 r = sigtimedwait(&waitmask, &si, timespec_store(&ts, quit_usec - current_usec));
148 }
149 if (r < 0) {
150 if (errno == EINTR) /* strace -p attach can result in EINTR, let's handle this nicely. */
151 continue;
152 if (errno == EAGAIN) /* timeout reached */
153 continue;
154
155 r = log_error_errno(errno, "Failed to wait for signal: %m");
156 goto finish;
157 }
158
159 if (si.si_signo == SIGCHLD)
160 continue; /* Let's reap this */
161
162 if (state != STATE_RUNNING)
163 continue;
164
165 /* Would love to use a switch() statement here, but SIGRTMIN is actually a function call, not a
166 * constant… */
167
168 if (si.si_signo == SIGRTMIN+3 ||
169 si.si_signo == SIGRTMIN+4 ||
170 si.si_signo == SIGRTMIN+13 ||
171 si.si_signo == SIGRTMIN+14)
172
173 state = STATE_POWEROFF;
174
175 else if (si.si_signo == SIGINT ||
176 si.si_signo == SIGRTMIN+5 ||
177 si.si_signo == SIGRTMIN+6 ||
178 si.si_signo == SIGRTMIN+15 ||
179 si.si_signo == SIGRTMIN+16)
180
181 state = STATE_REBOOT;
182 else
183 assert_not_reached("Got unexpected signal");
184
a4624785
LP
185 r = kill_and_sigcont(pid, SIGTERM);
186
187 /* Let's send a SIGHUP after the SIGTERM, as shells tend to ignore SIGTERM but do react to SIGHUP. We
188 * do it strictly in this order, so that the SIGTERM is dispatched first, and SIGHUP second for those
189 * processes which handle both. That's because services tend to bind configuration reload or something
190 * else to SIGHUP. */
191
192 if (r != -ESRCH)
193 (void) kill(pid, SIGHUP);
194
7732f92b
LP
195 quit_usec = now(CLOCK_MONOTONIC) + DEFAULT_TIMEOUT_USEC;
196 }
197
198finish:
199 _exit(r < 0 ? EXIT_FAILURE : r);
200}