]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
7732f92b | 2 | |
f5947a5e | 3 | #include <sys/ioctl.h> |
7732f92b | 4 | #include <sys/reboot.h> |
7732f92b | 5 | #include <sys/wait.h> |
75bf701f | 6 | #include <sys/prctl.h> |
fe993888 | 7 | #include <unistd.h> |
7732f92b | 8 | |
f2fb2ec9 LP |
9 | #include "def.h" |
10 | #include "exit-status.h" | |
7732f92b LP |
11 | #include "fd-util.h" |
12 | #include "log.h" | |
13 | #include "nspawn-stub-pid1.h" | |
14 | #include "process-util.h" | |
15 | #include "signal-util.h" | |
16 | #include "time-util.h" | |
7732f92b | 17 | |
75bf701f LP |
18 | static int reset_environ(const char *new_environment, size_t length) { |
19 | unsigned long start, end; | |
20 | ||
21 | start = (unsigned long) new_environment; | |
22 | end = start + length; | |
23 | ||
24 | if (prctl(PR_SET_MM, PR_SET_MM_ENV_START, start, 0, 0) < 0) | |
25 | return -errno; | |
26 | ||
27 | if (prctl(PR_SET_MM, PR_SET_MM_ENV_END, end, 0, 0) < 0) | |
28 | return -errno; | |
29 | ||
30 | return 0; | |
31 | } | |
32 | ||
33 | int stub_pid1(sd_id128_t uuid) { | |
7732f92b LP |
34 | enum { |
35 | STATE_RUNNING, | |
36 | STATE_REBOOT, | |
37 | STATE_POWEROFF, | |
38 | } state = STATE_RUNNING; | |
39 | ||
40 | sigset_t fullmask, oldmask, waitmask; | |
41 | usec_t quit_usec = USEC_INFINITY; | |
42 | pid_t pid; | |
43 | int r; | |
44 | ||
75bf701f LP |
45 | /* The new environment we set up, on the stack. */ |
46 | char new_environment[] = | |
47 | "container=systemd-nspawn\0" | |
48 | "container_uuid=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"; | |
49 | ||
7732f92b LP |
50 | /* Implements a stub PID 1, that reaps all processes and processes a couple of standard signals. This is useful |
51 | * for allowing arbitrary processes run in a container, and still have all zombies reaped. */ | |
52 | ||
53 | assert_se(sigfillset(&fullmask) >= 0); | |
54 | assert_se(sigprocmask(SIG_BLOCK, &fullmask, &oldmask) >= 0); | |
55 | ||
56 | pid = fork(); | |
57 | if (pid < 0) | |
58 | return log_error_errno(errno, "Failed to fork child pid: %m"); | |
59 | ||
60 | if (pid == 0) { | |
61 | /* Return in the child */ | |
62 | assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) >= 0); | |
efe42662 LP |
63 | |
64 | if (setsid() < 0) | |
65 | return log_error_errno(errno, "Failed to become session leader in payload process: %m"); | |
66 | ||
7732f92b LP |
67 | return 0; |
68 | } | |
69 | ||
70 | reset_all_signal_handlers(); | |
71 | ||
72 | log_close(); | |
7acf581a | 73 | (void) close_all_fds(NULL, 0); |
7732f92b LP |
74 | log_open(); |
75 | ||
335d2ead LP |
76 | if (ioctl(STDIN_FILENO, TIOCNOTTY) < 0) { |
77 | if (errno != ENOTTY) | |
78 | log_warning_errno(errno, "Unexpected error from TIOCNOTTY ioctl in init stub process, ignoring: %m"); | |
79 | } else | |
80 | log_warning("Expected TIOCNOTTY to fail, but it succeeded in init stub process, ignoring."); | |
81 | ||
75bf701f LP |
82 | /* Flush out /proc/self/environ, so that we don't leak the environment from the host into the container. Also, |
83 | * set $container= and $container_uuid= so that clients in the container that query it from /proc/1/environ | |
1a012455 | 84 | * find them set. */ |
75bf701f LP |
85 | sd_id128_to_string(uuid, new_environment + sizeof(new_environment) - SD_ID128_STRING_MAX); |
86 | reset_environ(new_environment, sizeof(new_environment)); | |
87 | ||
4c253ed1 | 88 | (void) rename_process("(sd-stubinit)"); |
7732f92b LP |
89 | |
90 | assert_se(sigemptyset(&waitmask) >= 0); | |
91 | assert_se(sigset_add_many(&waitmask, | |
92 | SIGCHLD, /* posix: process died */ | |
93 | SIGINT, /* sysv: ctrl-alt-del */ | |
94 | SIGRTMIN+3, /* systemd: halt */ | |
95 | SIGRTMIN+4, /* systemd: poweroff */ | |
96 | SIGRTMIN+5, /* systemd: reboot */ | |
97 | SIGRTMIN+6, /* systemd: kexec */ | |
98 | SIGRTMIN+13, /* systemd: halt */ | |
99 | SIGRTMIN+14, /* systemd: poweroff */ | |
100 | SIGRTMIN+15, /* systemd: reboot */ | |
101 | SIGRTMIN+16, /* systemd: kexec */ | |
102 | -1) >= 0); | |
103 | ||
104 | /* Note that we ignore SIGTERM (sysv's reexec), SIGHUP (reload), and all other signals here, since we don't | |
105 | * support reexec/reloading in this stub process. */ | |
106 | ||
107 | for (;;) { | |
108 | siginfo_t si; | |
109 | usec_t current_usec; | |
110 | ||
111 | si.si_pid = 0; | |
112 | r = waitid(P_ALL, 0, &si, WEXITED|WNOHANG); | |
113 | if (r < 0) { | |
114 | r = log_error_errno(errno, "Failed to reap children: %m"); | |
115 | goto finish; | |
116 | } | |
117 | ||
118 | current_usec = now(CLOCK_MONOTONIC); | |
119 | ||
120 | if (si.si_pid == pid || current_usec >= quit_usec) { | |
121 | ||
122 | /* The child we started ourselves died or we reached a timeout. */ | |
123 | ||
124 | if (state == STATE_REBOOT) { /* dispatch a queued reboot */ | |
125 | (void) reboot(RB_AUTOBOOT); | |
126 | r = log_error_errno(errno, "Failed to reboot: %m"); | |
127 | goto finish; | |
128 | ||
129 | } else if (state == STATE_POWEROFF) | |
130 | (void) reboot(RB_POWER_OFF); /* if this fails, fall back to normal exit. */ | |
131 | ||
132 | if (si.si_pid == pid && si.si_code == CLD_EXITED) | |
133 | r = si.si_status; /* pass on exit code */ | |
134 | else | |
f2fb2ec9 | 135 | r = EXIT_EXCEPTION; /* signal, coredump, timeout, … */ |
7732f92b LP |
136 | |
137 | goto finish; | |
138 | } | |
139 | if (si.si_pid != 0) | |
140 | /* We reaped something. Retry until there's nothing more to reap. */ | |
141 | continue; | |
142 | ||
143 | if (quit_usec == USEC_INFINITY) | |
144 | r = sigwaitinfo(&waitmask, &si); | |
145 | else { | |
146 | struct timespec ts; | |
147 | r = sigtimedwait(&waitmask, &si, timespec_store(&ts, quit_usec - current_usec)); | |
148 | } | |
149 | if (r < 0) { | |
150 | if (errno == EINTR) /* strace -p attach can result in EINTR, let's handle this nicely. */ | |
151 | continue; | |
152 | if (errno == EAGAIN) /* timeout reached */ | |
153 | continue; | |
154 | ||
155 | r = log_error_errno(errno, "Failed to wait for signal: %m"); | |
156 | goto finish; | |
157 | } | |
158 | ||
159 | if (si.si_signo == SIGCHLD) | |
160 | continue; /* Let's reap this */ | |
161 | ||
162 | if (state != STATE_RUNNING) | |
163 | continue; | |
164 | ||
165 | /* Would love to use a switch() statement here, but SIGRTMIN is actually a function call, not a | |
166 | * constant… */ | |
167 | ||
168 | if (si.si_signo == SIGRTMIN+3 || | |
169 | si.si_signo == SIGRTMIN+4 || | |
170 | si.si_signo == SIGRTMIN+13 || | |
171 | si.si_signo == SIGRTMIN+14) | |
172 | ||
173 | state = STATE_POWEROFF; | |
174 | ||
175 | else if (si.si_signo == SIGINT || | |
176 | si.si_signo == SIGRTMIN+5 || | |
177 | si.si_signo == SIGRTMIN+6 || | |
178 | si.si_signo == SIGRTMIN+15 || | |
179 | si.si_signo == SIGRTMIN+16) | |
180 | ||
181 | state = STATE_REBOOT; | |
182 | else | |
183 | assert_not_reached("Got unexpected signal"); | |
184 | ||
a4624785 LP |
185 | r = kill_and_sigcont(pid, SIGTERM); |
186 | ||
187 | /* Let's send a SIGHUP after the SIGTERM, as shells tend to ignore SIGTERM but do react to SIGHUP. We | |
188 | * do it strictly in this order, so that the SIGTERM is dispatched first, and SIGHUP second for those | |
189 | * processes which handle both. That's because services tend to bind configuration reload or something | |
190 | * else to SIGHUP. */ | |
191 | ||
192 | if (r != -ESRCH) | |
193 | (void) kill(pid, SIGHUP); | |
194 | ||
7732f92b LP |
195 | quit_usec = now(CLOCK_MONOTONIC) + DEFAULT_TIMEOUT_USEC; |
196 | } | |
197 | ||
198 | finish: | |
199 | _exit(r < 0 ? EXIT_FAILURE : r); | |
200 | } |