]>
git.ipfire.org Git - thirdparty/systemd.git/blob - src/nspawn/nspawn-stub-pid1.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2016 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
21 #include <sys/reboot.h>
23 #include <sys/prctl.h>
28 #include "nspawn-stub-pid1.h"
29 #include "process-util.h"
30 #include "signal-util.h"
31 #include "time-util.h"
34 static int reset_environ(const char *new_environment
, size_t length
) {
35 unsigned long start
, end
;
37 start
= (unsigned long) new_environment
;
40 if (prctl(PR_SET_MM
, PR_SET_MM_ENV_START
, start
, 0, 0) < 0)
43 if (prctl(PR_SET_MM
, PR_SET_MM_ENV_END
, end
, 0, 0) < 0)
49 int stub_pid1(sd_id128_t uuid
) {
54 } state
= STATE_RUNNING
;
56 sigset_t fullmask
, oldmask
, waitmask
;
57 usec_t quit_usec
= USEC_INFINITY
;
61 /* The new environment we set up, on the stack. */
62 char new_environment
[] =
63 "container=systemd-nspawn\0"
64 "container_uuid=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
66 /* Implements a stub PID 1, that reaps all processes and processes a couple of standard signals. This is useful
67 * for allowing arbitrary processes run in a container, and still have all zombies reaped. */
69 assert_se(sigfillset(&fullmask
) >= 0);
70 assert_se(sigprocmask(SIG_BLOCK
, &fullmask
, &oldmask
) >= 0);
74 return log_error_errno(errno
, "Failed to fork child pid: %m");
77 /* Return in the child */
78 assert_se(sigprocmask(SIG_SETMASK
, &oldmask
, NULL
) >= 0);
83 reset_all_signal_handlers();
86 close_all_fds(NULL
, 0);
89 /* Flush out /proc/self/environ, so that we don't leak the environment from the host into the container. Also,
90 * set $container= and $container_uuid= so that clients in the container that query it from /proc/1/environ
92 sd_id128_to_string(uuid
, new_environment
+ sizeof(new_environment
) - SD_ID128_STRING_MAX
);
93 reset_environ(new_environment
, sizeof(new_environment
));
95 rename_process("STUBINIT");
97 assert_se(sigemptyset(&waitmask
) >= 0);
98 assert_se(sigset_add_many(&waitmask
,
99 SIGCHLD
, /* posix: process died */
100 SIGINT
, /* sysv: ctrl-alt-del */
101 SIGRTMIN
+3, /* systemd: halt */
102 SIGRTMIN
+4, /* systemd: poweroff */
103 SIGRTMIN
+5, /* systemd: reboot */
104 SIGRTMIN
+6, /* systemd: kexec */
105 SIGRTMIN
+13, /* systemd: halt */
106 SIGRTMIN
+14, /* systemd: poweroff */
107 SIGRTMIN
+15, /* systemd: reboot */
108 SIGRTMIN
+16, /* systemd: kexec */
111 /* Note that we ignore SIGTERM (sysv's reexec), SIGHUP (reload), and all other signals here, since we don't
112 * support reexec/reloading in this stub process. */
119 r
= waitid(P_ALL
, 0, &si
, WEXITED
|WNOHANG
);
121 r
= log_error_errno(errno
, "Failed to reap children: %m");
125 current_usec
= now(CLOCK_MONOTONIC
);
127 if (si
.si_pid
== pid
|| current_usec
>= quit_usec
) {
129 /* The child we started ourselves died or we reached a timeout. */
131 if (state
== STATE_REBOOT
) { /* dispatch a queued reboot */
132 (void) reboot(RB_AUTOBOOT
);
133 r
= log_error_errno(errno
, "Failed to reboot: %m");
136 } else if (state
== STATE_POWEROFF
)
137 (void) reboot(RB_POWER_OFF
); /* if this fails, fall back to normal exit. */
139 if (si
.si_pid
== pid
&& si
.si_code
== CLD_EXITED
)
140 r
= si
.si_status
; /* pass on exit code */
142 r
= 255; /* signal, coredump, timeout, … */
147 /* We reaped something. Retry until there's nothing more to reap. */
150 if (quit_usec
== USEC_INFINITY
)
151 r
= sigwaitinfo(&waitmask
, &si
);
154 r
= sigtimedwait(&waitmask
, &si
, timespec_store(&ts
, quit_usec
- current_usec
));
157 if (errno
== EINTR
) /* strace -p attach can result in EINTR, let's handle this nicely. */
159 if (errno
== EAGAIN
) /* timeout reached */
162 r
= log_error_errno(errno
, "Failed to wait for signal: %m");
166 if (si
.si_signo
== SIGCHLD
)
167 continue; /* Let's reap this */
169 if (state
!= STATE_RUNNING
)
172 /* Would love to use a switch() statement here, but SIGRTMIN is actually a function call, not a
175 if (si
.si_signo
== SIGRTMIN
+3 ||
176 si
.si_signo
== SIGRTMIN
+4 ||
177 si
.si_signo
== SIGRTMIN
+13 ||
178 si
.si_signo
== SIGRTMIN
+14)
180 state
= STATE_POWEROFF
;
182 else if (si
.si_signo
== SIGINT
||
183 si
.si_signo
== SIGRTMIN
+5 ||
184 si
.si_signo
== SIGRTMIN
+6 ||
185 si
.si_signo
== SIGRTMIN
+15 ||
186 si
.si_signo
== SIGRTMIN
+16)
188 state
= STATE_REBOOT
;
190 assert_not_reached("Got unexpected signal");
192 r
= kill_and_sigcont(pid
, SIGTERM
);
194 /* Let's send a SIGHUP after the SIGTERM, as shells tend to ignore SIGTERM but do react to SIGHUP. We
195 * do it strictly in this order, so that the SIGTERM is dispatched first, and SIGHUP second for those
196 * processes which handle both. That's because services tend to bind configuration reload or something
200 (void) kill(pid
, SIGHUP
);
202 quit_usec
= now(CLOCK_MONOTONIC
) + DEFAULT_TIMEOUT_USEC
;
206 _exit(r
< 0 ? EXIT_FAILURE
: r
);