]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
7732f92b LP |
2 | /*** |
3 | This file is part of systemd. | |
4 | ||
5 | Copyright 2016 Lennart Poettering | |
6 | ||
7 | systemd is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU Lesser General Public License as published by | |
9 | the Free Software Foundation; either version 2.1 of the License, or | |
10 | (at your option) any later version. | |
11 | ||
12 | systemd is distributed in the hope that it will be useful, but | |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | Lesser General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU Lesser General Public License | |
18 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
19 | ***/ | |
20 | ||
21 | #include <sys/reboot.h> | |
7732f92b | 22 | #include <sys/wait.h> |
75bf701f | 23 | #include <sys/prctl.h> |
fe993888 | 24 | #include <unistd.h> |
7732f92b LP |
25 | |
26 | #include "fd-util.h" | |
27 | #include "log.h" | |
28 | #include "nspawn-stub-pid1.h" | |
29 | #include "process-util.h" | |
30 | #include "signal-util.h" | |
31 | #include "time-util.h" | |
32 | #include "def.h" | |
33 | ||
75bf701f LP |
34 | static int reset_environ(const char *new_environment, size_t length) { |
35 | unsigned long start, end; | |
36 | ||
37 | start = (unsigned long) new_environment; | |
38 | end = start + length; | |
39 | ||
40 | if (prctl(PR_SET_MM, PR_SET_MM_ENV_START, start, 0, 0) < 0) | |
41 | return -errno; | |
42 | ||
43 | if (prctl(PR_SET_MM, PR_SET_MM_ENV_END, end, 0, 0) < 0) | |
44 | return -errno; | |
45 | ||
46 | return 0; | |
47 | } | |
48 | ||
49 | int stub_pid1(sd_id128_t uuid) { | |
7732f92b LP |
50 | enum { |
51 | STATE_RUNNING, | |
52 | STATE_REBOOT, | |
53 | STATE_POWEROFF, | |
54 | } state = STATE_RUNNING; | |
55 | ||
56 | sigset_t fullmask, oldmask, waitmask; | |
57 | usec_t quit_usec = USEC_INFINITY; | |
58 | pid_t pid; | |
59 | int r; | |
60 | ||
75bf701f LP |
61 | /* The new environment we set up, on the stack. */ |
62 | char new_environment[] = | |
63 | "container=systemd-nspawn\0" | |
64 | "container_uuid=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"; | |
65 | ||
7732f92b LP |
66 | /* Implements a stub PID 1, that reaps all processes and processes a couple of standard signals. This is useful |
67 | * for allowing arbitrary processes run in a container, and still have all zombies reaped. */ | |
68 | ||
69 | assert_se(sigfillset(&fullmask) >= 0); | |
70 | assert_se(sigprocmask(SIG_BLOCK, &fullmask, &oldmask) >= 0); | |
71 | ||
72 | pid = fork(); | |
73 | if (pid < 0) | |
74 | return log_error_errno(errno, "Failed to fork child pid: %m"); | |
75 | ||
76 | if (pid == 0) { | |
77 | /* Return in the child */ | |
78 | assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) >= 0); | |
79 | setsid(); | |
80 | return 0; | |
81 | } | |
82 | ||
83 | reset_all_signal_handlers(); | |
84 | ||
85 | log_close(); | |
86 | close_all_fds(NULL, 0); | |
87 | log_open(); | |
88 | ||
75bf701f LP |
89 | /* Flush out /proc/self/environ, so that we don't leak the environment from the host into the container. Also, |
90 | * set $container= and $container_uuid= so that clients in the container that query it from /proc/1/environ | |
1a012455 | 91 | * find them set. */ |
75bf701f LP |
92 | sd_id128_to_string(uuid, new_environment + sizeof(new_environment) - SD_ID128_STRING_MAX); |
93 | reset_environ(new_environment, sizeof(new_environment)); | |
94 | ||
7732f92b LP |
95 | rename_process("STUBINIT"); |
96 | ||
97 | assert_se(sigemptyset(&waitmask) >= 0); | |
98 | assert_se(sigset_add_many(&waitmask, | |
99 | SIGCHLD, /* posix: process died */ | |
100 | SIGINT, /* sysv: ctrl-alt-del */ | |
101 | SIGRTMIN+3, /* systemd: halt */ | |
102 | SIGRTMIN+4, /* systemd: poweroff */ | |
103 | SIGRTMIN+5, /* systemd: reboot */ | |
104 | SIGRTMIN+6, /* systemd: kexec */ | |
105 | SIGRTMIN+13, /* systemd: halt */ | |
106 | SIGRTMIN+14, /* systemd: poweroff */ | |
107 | SIGRTMIN+15, /* systemd: reboot */ | |
108 | SIGRTMIN+16, /* systemd: kexec */ | |
109 | -1) >= 0); | |
110 | ||
111 | /* Note that we ignore SIGTERM (sysv's reexec), SIGHUP (reload), and all other signals here, since we don't | |
112 | * support reexec/reloading in this stub process. */ | |
113 | ||
114 | for (;;) { | |
115 | siginfo_t si; | |
116 | usec_t current_usec; | |
117 | ||
118 | si.si_pid = 0; | |
119 | r = waitid(P_ALL, 0, &si, WEXITED|WNOHANG); | |
120 | if (r < 0) { | |
121 | r = log_error_errno(errno, "Failed to reap children: %m"); | |
122 | goto finish; | |
123 | } | |
124 | ||
125 | current_usec = now(CLOCK_MONOTONIC); | |
126 | ||
127 | if (si.si_pid == pid || current_usec >= quit_usec) { | |
128 | ||
129 | /* The child we started ourselves died or we reached a timeout. */ | |
130 | ||
131 | if (state == STATE_REBOOT) { /* dispatch a queued reboot */ | |
132 | (void) reboot(RB_AUTOBOOT); | |
133 | r = log_error_errno(errno, "Failed to reboot: %m"); | |
134 | goto finish; | |
135 | ||
136 | } else if (state == STATE_POWEROFF) | |
137 | (void) reboot(RB_POWER_OFF); /* if this fails, fall back to normal exit. */ | |
138 | ||
139 | if (si.si_pid == pid && si.si_code == CLD_EXITED) | |
140 | r = si.si_status; /* pass on exit code */ | |
141 | else | |
142 | r = 255; /* signal, coredump, timeout, … */ | |
143 | ||
144 | goto finish; | |
145 | } | |
146 | if (si.si_pid != 0) | |
147 | /* We reaped something. Retry until there's nothing more to reap. */ | |
148 | continue; | |
149 | ||
150 | if (quit_usec == USEC_INFINITY) | |
151 | r = sigwaitinfo(&waitmask, &si); | |
152 | else { | |
153 | struct timespec ts; | |
154 | r = sigtimedwait(&waitmask, &si, timespec_store(&ts, quit_usec - current_usec)); | |
155 | } | |
156 | if (r < 0) { | |
157 | if (errno == EINTR) /* strace -p attach can result in EINTR, let's handle this nicely. */ | |
158 | continue; | |
159 | if (errno == EAGAIN) /* timeout reached */ | |
160 | continue; | |
161 | ||
162 | r = log_error_errno(errno, "Failed to wait for signal: %m"); | |
163 | goto finish; | |
164 | } | |
165 | ||
166 | if (si.si_signo == SIGCHLD) | |
167 | continue; /* Let's reap this */ | |
168 | ||
169 | if (state != STATE_RUNNING) | |
170 | continue; | |
171 | ||
172 | /* Would love to use a switch() statement here, but SIGRTMIN is actually a function call, not a | |
173 | * constant… */ | |
174 | ||
175 | if (si.si_signo == SIGRTMIN+3 || | |
176 | si.si_signo == SIGRTMIN+4 || | |
177 | si.si_signo == SIGRTMIN+13 || | |
178 | si.si_signo == SIGRTMIN+14) | |
179 | ||
180 | state = STATE_POWEROFF; | |
181 | ||
182 | else if (si.si_signo == SIGINT || | |
183 | si.si_signo == SIGRTMIN+5 || | |
184 | si.si_signo == SIGRTMIN+6 || | |
185 | si.si_signo == SIGRTMIN+15 || | |
186 | si.si_signo == SIGRTMIN+16) | |
187 | ||
188 | state = STATE_REBOOT; | |
189 | else | |
190 | assert_not_reached("Got unexpected signal"); | |
191 | ||
a4624785 LP |
192 | r = kill_and_sigcont(pid, SIGTERM); |
193 | ||
194 | /* Let's send a SIGHUP after the SIGTERM, as shells tend to ignore SIGTERM but do react to SIGHUP. We | |
195 | * do it strictly in this order, so that the SIGTERM is dispatched first, and SIGHUP second for those | |
196 | * processes which handle both. That's because services tend to bind configuration reload or something | |
197 | * else to SIGHUP. */ | |
198 | ||
199 | if (r != -ESRCH) | |
200 | (void) kill(pid, SIGHUP); | |
201 | ||
7732f92b LP |
202 | quit_usec = now(CLOCK_MONOTONIC) + DEFAULT_TIMEOUT_USEC; |
203 | } | |
204 | ||
205 | finish: | |
206 | _exit(r < 0 ? EXIT_FAILURE : r); | |
207 | } |