]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
7732f92b LP |
2 | |
3 | #include <sys/reboot.h> | |
7732f92b | 4 | #include <sys/wait.h> |
75bf701f | 5 | #include <sys/prctl.h> |
fe993888 | 6 | #include <unistd.h> |
7732f92b LP |
7 | |
8 | #include "fd-util.h" | |
9 | #include "log.h" | |
cacc0d7a | 10 | #include "missing.h" |
7732f92b LP |
11 | #include "nspawn-stub-pid1.h" |
12 | #include "process-util.h" | |
13 | #include "signal-util.h" | |
14 | #include "time-util.h" | |
15 | #include "def.h" | |
16 | ||
75bf701f LP |
17 | static int reset_environ(const char *new_environment, size_t length) { |
18 | unsigned long start, end; | |
19 | ||
20 | start = (unsigned long) new_environment; | |
21 | end = start + length; | |
22 | ||
23 | if (prctl(PR_SET_MM, PR_SET_MM_ENV_START, start, 0, 0) < 0) | |
24 | return -errno; | |
25 | ||
26 | if (prctl(PR_SET_MM, PR_SET_MM_ENV_END, end, 0, 0) < 0) | |
27 | return -errno; | |
28 | ||
29 | return 0; | |
30 | } | |
31 | ||
32 | int stub_pid1(sd_id128_t uuid) { | |
7732f92b LP |
33 | enum { |
34 | STATE_RUNNING, | |
35 | STATE_REBOOT, | |
36 | STATE_POWEROFF, | |
37 | } state = STATE_RUNNING; | |
38 | ||
39 | sigset_t fullmask, oldmask, waitmask; | |
40 | usec_t quit_usec = USEC_INFINITY; | |
41 | pid_t pid; | |
42 | int r; | |
43 | ||
75bf701f LP |
44 | /* The new environment we set up, on the stack. */ |
45 | char new_environment[] = | |
46 | "container=systemd-nspawn\0" | |
47 | "container_uuid=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"; | |
48 | ||
7732f92b LP |
49 | /* Implements a stub PID 1, that reaps all processes and processes a couple of standard signals. This is useful |
50 | * for allowing arbitrary processes run in a container, and still have all zombies reaped. */ | |
51 | ||
52 | assert_se(sigfillset(&fullmask) >= 0); | |
53 | assert_se(sigprocmask(SIG_BLOCK, &fullmask, &oldmask) >= 0); | |
54 | ||
55 | pid = fork(); | |
56 | if (pid < 0) | |
57 | return log_error_errno(errno, "Failed to fork child pid: %m"); | |
58 | ||
59 | if (pid == 0) { | |
60 | /* Return in the child */ | |
61 | assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) >= 0); | |
62 | setsid(); | |
63 | return 0; | |
64 | } | |
65 | ||
66 | reset_all_signal_handlers(); | |
67 | ||
68 | log_close(); | |
69 | close_all_fds(NULL, 0); | |
70 | log_open(); | |
71 | ||
75bf701f LP |
72 | /* Flush out /proc/self/environ, so that we don't leak the environment from the host into the container. Also, |
73 | * set $container= and $container_uuid= so that clients in the container that query it from /proc/1/environ | |
1a012455 | 74 | * find them set. */ |
75bf701f LP |
75 | sd_id128_to_string(uuid, new_environment + sizeof(new_environment) - SD_ID128_STRING_MAX); |
76 | reset_environ(new_environment, sizeof(new_environment)); | |
77 | ||
4c253ed1 | 78 | (void) rename_process("(sd-stubinit)"); |
7732f92b LP |
79 | |
80 | assert_se(sigemptyset(&waitmask) >= 0); | |
81 | assert_se(sigset_add_many(&waitmask, | |
82 | SIGCHLD, /* posix: process died */ | |
83 | SIGINT, /* sysv: ctrl-alt-del */ | |
84 | SIGRTMIN+3, /* systemd: halt */ | |
85 | SIGRTMIN+4, /* systemd: poweroff */ | |
86 | SIGRTMIN+5, /* systemd: reboot */ | |
87 | SIGRTMIN+6, /* systemd: kexec */ | |
88 | SIGRTMIN+13, /* systemd: halt */ | |
89 | SIGRTMIN+14, /* systemd: poweroff */ | |
90 | SIGRTMIN+15, /* systemd: reboot */ | |
91 | SIGRTMIN+16, /* systemd: kexec */ | |
92 | -1) >= 0); | |
93 | ||
94 | /* Note that we ignore SIGTERM (sysv's reexec), SIGHUP (reload), and all other signals here, since we don't | |
95 | * support reexec/reloading in this stub process. */ | |
96 | ||
97 | for (;;) { | |
98 | siginfo_t si; | |
99 | usec_t current_usec; | |
100 | ||
101 | si.si_pid = 0; | |
102 | r = waitid(P_ALL, 0, &si, WEXITED|WNOHANG); | |
103 | if (r < 0) { | |
104 | r = log_error_errno(errno, "Failed to reap children: %m"); | |
105 | goto finish; | |
106 | } | |
107 | ||
108 | current_usec = now(CLOCK_MONOTONIC); | |
109 | ||
110 | if (si.si_pid == pid || current_usec >= quit_usec) { | |
111 | ||
112 | /* The child we started ourselves died or we reached a timeout. */ | |
113 | ||
114 | if (state == STATE_REBOOT) { /* dispatch a queued reboot */ | |
115 | (void) reboot(RB_AUTOBOOT); | |
116 | r = log_error_errno(errno, "Failed to reboot: %m"); | |
117 | goto finish; | |
118 | ||
119 | } else if (state == STATE_POWEROFF) | |
120 | (void) reboot(RB_POWER_OFF); /* if this fails, fall back to normal exit. */ | |
121 | ||
122 | if (si.si_pid == pid && si.si_code == CLD_EXITED) | |
123 | r = si.si_status; /* pass on exit code */ | |
124 | else | |
125 | r = 255; /* signal, coredump, timeout, … */ | |
126 | ||
127 | goto finish; | |
128 | } | |
129 | if (si.si_pid != 0) | |
130 | /* We reaped something. Retry until there's nothing more to reap. */ | |
131 | continue; | |
132 | ||
133 | if (quit_usec == USEC_INFINITY) | |
134 | r = sigwaitinfo(&waitmask, &si); | |
135 | else { | |
136 | struct timespec ts; | |
137 | r = sigtimedwait(&waitmask, &si, timespec_store(&ts, quit_usec - current_usec)); | |
138 | } | |
139 | if (r < 0) { | |
140 | if (errno == EINTR) /* strace -p attach can result in EINTR, let's handle this nicely. */ | |
141 | continue; | |
142 | if (errno == EAGAIN) /* timeout reached */ | |
143 | continue; | |
144 | ||
145 | r = log_error_errno(errno, "Failed to wait for signal: %m"); | |
146 | goto finish; | |
147 | } | |
148 | ||
149 | if (si.si_signo == SIGCHLD) | |
150 | continue; /* Let's reap this */ | |
151 | ||
152 | if (state != STATE_RUNNING) | |
153 | continue; | |
154 | ||
155 | /* Would love to use a switch() statement here, but SIGRTMIN is actually a function call, not a | |
156 | * constant… */ | |
157 | ||
158 | if (si.si_signo == SIGRTMIN+3 || | |
159 | si.si_signo == SIGRTMIN+4 || | |
160 | si.si_signo == SIGRTMIN+13 || | |
161 | si.si_signo == SIGRTMIN+14) | |
162 | ||
163 | state = STATE_POWEROFF; | |
164 | ||
165 | else if (si.si_signo == SIGINT || | |
166 | si.si_signo == SIGRTMIN+5 || | |
167 | si.si_signo == SIGRTMIN+6 || | |
168 | si.si_signo == SIGRTMIN+15 || | |
169 | si.si_signo == SIGRTMIN+16) | |
170 | ||
171 | state = STATE_REBOOT; | |
172 | else | |
173 | assert_not_reached("Got unexpected signal"); | |
174 | ||
a4624785 LP |
175 | r = kill_and_sigcont(pid, SIGTERM); |
176 | ||
177 | /* Let's send a SIGHUP after the SIGTERM, as shells tend to ignore SIGTERM but do react to SIGHUP. We | |
178 | * do it strictly in this order, so that the SIGTERM is dispatched first, and SIGHUP second for those | |
179 | * processes which handle both. That's because services tend to bind configuration reload or something | |
180 | * else to SIGHUP. */ | |
181 | ||
182 | if (r != -ESRCH) | |
183 | (void) kill(pid, SIGHUP); | |
184 | ||
7732f92b LP |
185 | quit_usec = now(CLOCK_MONOTONIC) + DEFAULT_TIMEOUT_USEC; |
186 | } | |
187 | ||
188 | finish: | |
189 | _exit(r < 0 ? EXIT_FAILURE : r); | |
190 | } |