]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/nspawn/nspawn-stub-pid1.c
Add SPDX license identifiers to source files under the LGPL
[thirdparty/systemd.git] / src / nspawn / nspawn-stub-pid1.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
7732f92b
LP
2/***
3 This file is part of systemd.
4
5 Copyright 2016 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19***/
20
21#include <sys/reboot.h>
7732f92b 22#include <sys/wait.h>
75bf701f 23#include <sys/prctl.h>
fe993888 24#include <unistd.h>
7732f92b
LP
25
26#include "fd-util.h"
27#include "log.h"
28#include "nspawn-stub-pid1.h"
29#include "process-util.h"
30#include "signal-util.h"
31#include "time-util.h"
32#include "def.h"
33
75bf701f
LP
34static int reset_environ(const char *new_environment, size_t length) {
35 unsigned long start, end;
36
37 start = (unsigned long) new_environment;
38 end = start + length;
39
40 if (prctl(PR_SET_MM, PR_SET_MM_ENV_START, start, 0, 0) < 0)
41 return -errno;
42
43 if (prctl(PR_SET_MM, PR_SET_MM_ENV_END, end, 0, 0) < 0)
44 return -errno;
45
46 return 0;
47}
48
49int stub_pid1(sd_id128_t uuid) {
7732f92b
LP
50 enum {
51 STATE_RUNNING,
52 STATE_REBOOT,
53 STATE_POWEROFF,
54 } state = STATE_RUNNING;
55
56 sigset_t fullmask, oldmask, waitmask;
57 usec_t quit_usec = USEC_INFINITY;
58 pid_t pid;
59 int r;
60
75bf701f
LP
61 /* The new environment we set up, on the stack. */
62 char new_environment[] =
63 "container=systemd-nspawn\0"
64 "container_uuid=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
65
7732f92b
LP
66 /* Implements a stub PID 1, that reaps all processes and processes a couple of standard signals. This is useful
67 * for allowing arbitrary processes run in a container, and still have all zombies reaped. */
68
69 assert_se(sigfillset(&fullmask) >= 0);
70 assert_se(sigprocmask(SIG_BLOCK, &fullmask, &oldmask) >= 0);
71
72 pid = fork();
73 if (pid < 0)
74 return log_error_errno(errno, "Failed to fork child pid: %m");
75
76 if (pid == 0) {
77 /* Return in the child */
78 assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) >= 0);
79 setsid();
80 return 0;
81 }
82
83 reset_all_signal_handlers();
84
85 log_close();
86 close_all_fds(NULL, 0);
87 log_open();
88
75bf701f
LP
89 /* Flush out /proc/self/environ, so that we don't leak the environment from the host into the container. Also,
90 * set $container= and $container_uuid= so that clients in the container that query it from /proc/1/environ
1a012455 91 * find them set. */
75bf701f
LP
92 sd_id128_to_string(uuid, new_environment + sizeof(new_environment) - SD_ID128_STRING_MAX);
93 reset_environ(new_environment, sizeof(new_environment));
94
7732f92b
LP
95 rename_process("STUBINIT");
96
97 assert_se(sigemptyset(&waitmask) >= 0);
98 assert_se(sigset_add_many(&waitmask,
99 SIGCHLD, /* posix: process died */
100 SIGINT, /* sysv: ctrl-alt-del */
101 SIGRTMIN+3, /* systemd: halt */
102 SIGRTMIN+4, /* systemd: poweroff */
103 SIGRTMIN+5, /* systemd: reboot */
104 SIGRTMIN+6, /* systemd: kexec */
105 SIGRTMIN+13, /* systemd: halt */
106 SIGRTMIN+14, /* systemd: poweroff */
107 SIGRTMIN+15, /* systemd: reboot */
108 SIGRTMIN+16, /* systemd: kexec */
109 -1) >= 0);
110
111 /* Note that we ignore SIGTERM (sysv's reexec), SIGHUP (reload), and all other signals here, since we don't
112 * support reexec/reloading in this stub process. */
113
114 for (;;) {
115 siginfo_t si;
116 usec_t current_usec;
117
118 si.si_pid = 0;
119 r = waitid(P_ALL, 0, &si, WEXITED|WNOHANG);
120 if (r < 0) {
121 r = log_error_errno(errno, "Failed to reap children: %m");
122 goto finish;
123 }
124
125 current_usec = now(CLOCK_MONOTONIC);
126
127 if (si.si_pid == pid || current_usec >= quit_usec) {
128
129 /* The child we started ourselves died or we reached a timeout. */
130
131 if (state == STATE_REBOOT) { /* dispatch a queued reboot */
132 (void) reboot(RB_AUTOBOOT);
133 r = log_error_errno(errno, "Failed to reboot: %m");
134 goto finish;
135
136 } else if (state == STATE_POWEROFF)
137 (void) reboot(RB_POWER_OFF); /* if this fails, fall back to normal exit. */
138
139 if (si.si_pid == pid && si.si_code == CLD_EXITED)
140 r = si.si_status; /* pass on exit code */
141 else
142 r = 255; /* signal, coredump, timeout, … */
143
144 goto finish;
145 }
146 if (si.si_pid != 0)
147 /* We reaped something. Retry until there's nothing more to reap. */
148 continue;
149
150 if (quit_usec == USEC_INFINITY)
151 r = sigwaitinfo(&waitmask, &si);
152 else {
153 struct timespec ts;
154 r = sigtimedwait(&waitmask, &si, timespec_store(&ts, quit_usec - current_usec));
155 }
156 if (r < 0) {
157 if (errno == EINTR) /* strace -p attach can result in EINTR, let's handle this nicely. */
158 continue;
159 if (errno == EAGAIN) /* timeout reached */
160 continue;
161
162 r = log_error_errno(errno, "Failed to wait for signal: %m");
163 goto finish;
164 }
165
166 if (si.si_signo == SIGCHLD)
167 continue; /* Let's reap this */
168
169 if (state != STATE_RUNNING)
170 continue;
171
172 /* Would love to use a switch() statement here, but SIGRTMIN is actually a function call, not a
173 * constant… */
174
175 if (si.si_signo == SIGRTMIN+3 ||
176 si.si_signo == SIGRTMIN+4 ||
177 si.si_signo == SIGRTMIN+13 ||
178 si.si_signo == SIGRTMIN+14)
179
180 state = STATE_POWEROFF;
181
182 else if (si.si_signo == SIGINT ||
183 si.si_signo == SIGRTMIN+5 ||
184 si.si_signo == SIGRTMIN+6 ||
185 si.si_signo == SIGRTMIN+15 ||
186 si.si_signo == SIGRTMIN+16)
187
188 state = STATE_REBOOT;
189 else
190 assert_not_reached("Got unexpected signal");
191
a4624785
LP
192 r = kill_and_sigcont(pid, SIGTERM);
193
194 /* Let's send a SIGHUP after the SIGTERM, as shells tend to ignore SIGTERM but do react to SIGHUP. We
195 * do it strictly in this order, so that the SIGTERM is dispatched first, and SIGHUP second for those
196 * processes which handle both. That's because services tend to bind configuration reload or something
197 * else to SIGHUP. */
198
199 if (r != -ESRCH)
200 (void) kill(pid, SIGHUP);
201
7732f92b
LP
202 quit_usec = now(CLOCK_MONOTONIC) + DEFAULT_TIMEOUT_USEC;
203 }
204
205finish:
206 _exit(r < 0 ? EXIT_FAILURE : r);
207}