]>
Commit | Line | Data |
---|---|---|
1 | /* SPDX-License-Identifier: LGPL-2.1+ */ | |
2 | /*** | |
3 | This file is part of systemd. | |
4 | ||
5 | Copyright 2010 ProFUSION embedded systems | |
6 | ||
7 | systemd is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU Lesser General Public License as published by | |
9 | the Free Software Foundation; either version 2.1 of the License, or | |
10 | (at your option) any later version. | |
11 | ||
12 | systemd is distributed in the hope that it will be useful, but | |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | Lesser General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU Lesser General Public License | |
18 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
19 | ***/ | |
20 | ||
21 | #include <errno.h> | |
22 | #include <getopt.h> | |
23 | #include <linux/reboot.h> | |
24 | #include <signal.h> | |
25 | #include <stdbool.h> | |
26 | #include <stdlib.h> | |
27 | #include <sys/mman.h> | |
28 | #include <sys/mount.h> | |
29 | #include <sys/reboot.h> | |
30 | #include <sys/stat.h> | |
31 | #include <unistd.h> | |
32 | ||
33 | #include "alloc-util.h" | |
34 | #include "cgroup-util.h" | |
35 | #include "def.h" | |
36 | #include "exec-util.h" | |
37 | #include "fileio.h" | |
38 | #include "killall.h" | |
39 | #include "log.h" | |
40 | #include "missing.h" | |
41 | #include "parse-util.h" | |
42 | #include "process-util.h" | |
43 | #include "string-util.h" | |
44 | #include "switch-root.h" | |
45 | #include "terminal-util.h" | |
46 | #include "umount.h" | |
47 | #include "util.h" | |
48 | #include "virt.h" | |
49 | #include "watchdog.h" | |
50 | ||
51 | #define FINALIZE_ATTEMPTS 50 | |
52 | ||
53 | static char* arg_verb; | |
54 | static uint8_t arg_exit_code; | |
55 | ||
56 | static int parse_argv(int argc, char *argv[]) { | |
57 | enum { | |
58 | ARG_LOG_LEVEL = 0x100, | |
59 | ARG_LOG_TARGET, | |
60 | ARG_LOG_COLOR, | |
61 | ARG_LOG_LOCATION, | |
62 | ARG_EXIT_CODE, | |
63 | }; | |
64 | ||
65 | static const struct option options[] = { | |
66 | { "log-level", required_argument, NULL, ARG_LOG_LEVEL }, | |
67 | { "log-target", required_argument, NULL, ARG_LOG_TARGET }, | |
68 | { "log-color", optional_argument, NULL, ARG_LOG_COLOR }, | |
69 | { "log-location", optional_argument, NULL, ARG_LOG_LOCATION }, | |
70 | { "exit-code", required_argument, NULL, ARG_EXIT_CODE }, | |
71 | {} | |
72 | }; | |
73 | ||
74 | int c, r; | |
75 | ||
76 | assert(argc >= 1); | |
77 | assert(argv); | |
78 | ||
79 | /* "-" prevents getopt from permuting argv[] and moving the verb away | |
80 | * from argv[1]. Our interface to initrd promises it'll be there. */ | |
81 | while ((c = getopt_long(argc, argv, "-", options, NULL)) >= 0) | |
82 | switch (c) { | |
83 | ||
84 | case ARG_LOG_LEVEL: | |
85 | r = log_set_max_level_from_string(optarg); | |
86 | if (r < 0) | |
87 | log_error("Failed to parse log level %s, ignoring.", optarg); | |
88 | ||
89 | break; | |
90 | ||
91 | case ARG_LOG_TARGET: | |
92 | r = log_set_target_from_string(optarg); | |
93 | if (r < 0) | |
94 | log_error("Failed to parse log target %s, ignoring", optarg); | |
95 | ||
96 | break; | |
97 | ||
98 | case ARG_LOG_COLOR: | |
99 | ||
100 | if (optarg) { | |
101 | r = log_show_color_from_string(optarg); | |
102 | if (r < 0) | |
103 | log_error("Failed to parse log color setting %s, ignoring", optarg); | |
104 | } else | |
105 | log_show_color(true); | |
106 | ||
107 | break; | |
108 | ||
109 | case ARG_LOG_LOCATION: | |
110 | if (optarg) { | |
111 | r = log_show_location_from_string(optarg); | |
112 | if (r < 0) | |
113 | log_error("Failed to parse log location setting %s, ignoring", optarg); | |
114 | } else | |
115 | log_show_location(true); | |
116 | ||
117 | break; | |
118 | ||
119 | case ARG_EXIT_CODE: | |
120 | r = safe_atou8(optarg, &arg_exit_code); | |
121 | if (r < 0) | |
122 | log_error("Failed to parse exit code %s, ignoring", optarg); | |
123 | ||
124 | break; | |
125 | ||
126 | case '\001': | |
127 | if (!arg_verb) | |
128 | arg_verb = optarg; | |
129 | else | |
130 | log_error("Excess arguments, ignoring"); | |
131 | break; | |
132 | ||
133 | case '?': | |
134 | return -EINVAL; | |
135 | ||
136 | default: | |
137 | assert_not_reached("Unhandled option code."); | |
138 | } | |
139 | ||
140 | if (!arg_verb) { | |
141 | log_error("Verb argument missing."); | |
142 | return -EINVAL; | |
143 | } | |
144 | ||
145 | return 0; | |
146 | } | |
147 | ||
148 | static int switch_root_initramfs(void) { | |
149 | if (mount("/run/initramfs", "/run/initramfs", NULL, MS_BIND, NULL) < 0) | |
150 | return log_error_errno(errno, "Failed to mount bind /run/initramfs on /run/initramfs: %m"); | |
151 | ||
152 | if (mount(NULL, "/run/initramfs", NULL, MS_PRIVATE, NULL) < 0) | |
153 | return log_error_errno(errno, "Failed to make /run/initramfs private mount: %m"); | |
154 | ||
155 | /* switch_root with MS_BIND, because there might still be processes lurking around, which have open file descriptors. | |
156 | * /run/initramfs/shutdown will take care of these. | |
157 | * Also do not detach the old root, because /run/initramfs/shutdown needs to access it. | |
158 | */ | |
159 | return switch_root("/run/initramfs", "/oldroot", false, MS_BIND); | |
160 | } | |
161 | ||
162 | int main(int argc, char *argv[]) { | |
163 | bool need_umount, need_swapoff, need_loop_detach, need_dm_detach; | |
164 | bool in_container, use_watchdog = false; | |
165 | _cleanup_free_ char *cgroup = NULL; | |
166 | char *arguments[3]; | |
167 | unsigned retries; | |
168 | int cmd, r; | |
169 | static const char* const dirs[] = {SYSTEM_SHUTDOWN_PATH, NULL}; | |
170 | ||
171 | log_parse_environment(); | |
172 | r = parse_argv(argc, argv); | |
173 | if (r < 0) | |
174 | goto error; | |
175 | ||
176 | /* journald will die if not gone yet. The log target defaults | |
177 | * to console, but may have been changed by command line options. */ | |
178 | ||
179 | log_close_console(); /* force reopen of /dev/console */ | |
180 | log_open(); | |
181 | ||
182 | umask(0022); | |
183 | ||
184 | if (getpid_cached() != 1) { | |
185 | log_error("Not executed by init (PID 1)."); | |
186 | r = -EPERM; | |
187 | goto error; | |
188 | } | |
189 | ||
190 | if (streq(arg_verb, "reboot")) | |
191 | cmd = RB_AUTOBOOT; | |
192 | else if (streq(arg_verb, "poweroff")) | |
193 | cmd = RB_POWER_OFF; | |
194 | else if (streq(arg_verb, "halt")) | |
195 | cmd = RB_HALT_SYSTEM; | |
196 | else if (streq(arg_verb, "kexec")) | |
197 | cmd = LINUX_REBOOT_CMD_KEXEC; | |
198 | else if (streq(arg_verb, "exit")) | |
199 | cmd = 0; /* ignored, just checking that arg_verb is valid */ | |
200 | else { | |
201 | r = -EINVAL; | |
202 | log_error("Unknown action '%s'.", arg_verb); | |
203 | goto error; | |
204 | } | |
205 | ||
206 | (void) cg_get_root_path(&cgroup); | |
207 | in_container = detect_container() > 0; | |
208 | ||
209 | use_watchdog = !!getenv("WATCHDOG_USEC"); | |
210 | ||
211 | /* Lock us into memory */ | |
212 | mlockall(MCL_CURRENT|MCL_FUTURE); | |
213 | ||
214 | /* Synchronize everything that is not written to disk yet at this point already. This is a good idea so that | |
215 | * slow IO is processed here already and the final process killing spree is not impacted by processes | |
216 | * desperately trying to sync IO to disk within their timeout. */ | |
217 | if (!in_container) | |
218 | sync(); | |
219 | ||
220 | log_info("Sending SIGTERM to remaining processes..."); | |
221 | broadcast_signal(SIGTERM, true, true); | |
222 | ||
223 | log_info("Sending SIGKILL to remaining processes..."); | |
224 | broadcast_signal(SIGKILL, true, false); | |
225 | ||
226 | need_umount = !in_container; | |
227 | need_swapoff = !in_container; | |
228 | need_loop_detach = !in_container; | |
229 | need_dm_detach = !in_container; | |
230 | ||
231 | /* Unmount all mountpoints, swaps, and loopback devices */ | |
232 | for (retries = 0; retries < FINALIZE_ATTEMPTS; retries++) { | |
233 | bool changed = false; | |
234 | ||
235 | if (use_watchdog) | |
236 | watchdog_ping(); | |
237 | ||
238 | /* Let's trim the cgroup tree on each iteration so | |
239 | that we leave an empty cgroup tree around, so that | |
240 | container managers get a nice notify event when we | |
241 | are down */ | |
242 | if (cgroup) | |
243 | cg_trim(SYSTEMD_CGROUP_CONTROLLER, cgroup, false); | |
244 | ||
245 | if (need_umount) { | |
246 | log_info("Unmounting file systems."); | |
247 | r = umount_all(&changed); | |
248 | if (r == 0) { | |
249 | need_umount = false; | |
250 | log_info("All filesystems unmounted."); | |
251 | } else if (r > 0) | |
252 | log_info("Not all file systems unmounted, %d left.", r); | |
253 | else | |
254 | log_error_errno(r, "Failed to unmount file systems: %m"); | |
255 | } | |
256 | ||
257 | if (need_swapoff) { | |
258 | log_info("Deactivating swaps."); | |
259 | r = swapoff_all(&changed); | |
260 | if (r == 0) { | |
261 | need_swapoff = false; | |
262 | log_info("All swaps deactivated."); | |
263 | } else if (r > 0) | |
264 | log_info("Not all swaps deactivated, %d left.", r); | |
265 | else | |
266 | log_error_errno(r, "Failed to deactivate swaps: %m"); | |
267 | } | |
268 | ||
269 | if (need_loop_detach) { | |
270 | log_info("Detaching loop devices."); | |
271 | r = loopback_detach_all(&changed); | |
272 | if (r == 0) { | |
273 | need_loop_detach = false; | |
274 | log_info("All loop devices detached."); | |
275 | } else if (r > 0) | |
276 | log_info("Not all loop devices detached, %d left.", r); | |
277 | else | |
278 | log_error_errno(r, "Failed to detach loop devices: %m"); | |
279 | } | |
280 | ||
281 | if (need_dm_detach) { | |
282 | log_info("Detaching DM devices."); | |
283 | r = dm_detach_all(&changed); | |
284 | if (r == 0) { | |
285 | need_dm_detach = false; | |
286 | log_info("All DM devices detached."); | |
287 | } else if (r > 0) | |
288 | log_info("Not all DM devices detached, %d left.", r); | |
289 | else | |
290 | log_error_errno(r, "Failed to detach DM devices: %m"); | |
291 | } | |
292 | ||
293 | if (!need_umount && !need_swapoff && !need_loop_detach && !need_dm_detach) { | |
294 | if (retries > 0) | |
295 | log_info("All filesystems, swaps, loop devices, DM devices detached."); | |
296 | /* Yay, done */ | |
297 | goto initrd_jump; | |
298 | } | |
299 | ||
300 | /* If in this iteration we didn't manage to | |
301 | * unmount/deactivate anything, we simply give up */ | |
302 | if (!changed) { | |
303 | log_info("Cannot finalize remaining%s%s%s%s continuing.", | |
304 | need_umount ? " file systems," : "", | |
305 | need_swapoff ? " swap devices," : "", | |
306 | need_loop_detach ? " loop devices," : "", | |
307 | need_dm_detach ? " DM devices," : ""); | |
308 | goto initrd_jump; | |
309 | } | |
310 | ||
311 | log_debug("After %u retries, couldn't finalize remaining %s%s%s%s trying again.", | |
312 | retries + 1, | |
313 | need_umount ? " file systems," : "", | |
314 | need_swapoff ? " swap devices," : "", | |
315 | need_loop_detach ? " loop devices," : "", | |
316 | need_dm_detach ? " DM devices," : ""); | |
317 | } | |
318 | ||
319 | log_error("Too many iterations, giving up."); | |
320 | ||
321 | initrd_jump: | |
322 | ||
323 | arguments[0] = NULL; | |
324 | arguments[1] = arg_verb; | |
325 | arguments[2] = NULL; | |
326 | execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments); | |
327 | ||
328 | if (!in_container && !in_initrd() && | |
329 | access("/run/initramfs/shutdown", X_OK) == 0) { | |
330 | r = switch_root_initramfs(); | |
331 | if (r >= 0) { | |
332 | argv[0] = (char*) "/shutdown"; | |
333 | ||
334 | setsid(); | |
335 | make_console_stdio(); | |
336 | ||
337 | log_info("Successfully changed into root pivot.\n" | |
338 | "Returning to initrd..."); | |
339 | ||
340 | execv("/shutdown", argv); | |
341 | log_error_errno(errno, "Failed to execute shutdown binary: %m"); | |
342 | } else | |
343 | log_error_errno(r, "Failed to switch root to \"/run/initramfs\": %m"); | |
344 | ||
345 | } | |
346 | ||
347 | if (need_umount || need_swapoff || need_loop_detach || need_dm_detach) | |
348 | log_error("Failed to finalize %s%s%s%s ignoring", | |
349 | need_umount ? " file systems," : "", | |
350 | need_swapoff ? " swap devices," : "", | |
351 | need_loop_detach ? " loop devices," : "", | |
352 | need_dm_detach ? " DM devices," : ""); | |
353 | ||
354 | /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be | |
355 | * sync'ed explicitly in advance. So let's do this here, but not needlessly slow down containers. Note that we | |
356 | * sync'ed things already once above, but we did some more work since then which might have caused IO, hence | |
357 | * let's doit once more. */ | |
358 | if (!in_container) | |
359 | sync(); | |
360 | ||
361 | if (streq(arg_verb, "exit")) { | |
362 | if (in_container) | |
363 | exit(arg_exit_code); | |
364 | else { | |
365 | /* We cannot exit() on the host, fallback on another | |
366 | * method. */ | |
367 | cmd = RB_POWER_OFF; | |
368 | } | |
369 | } | |
370 | ||
371 | switch (cmd) { | |
372 | ||
373 | case LINUX_REBOOT_CMD_KEXEC: | |
374 | ||
375 | if (!in_container) { | |
376 | /* We cheat and exec kexec to avoid doing all its work */ | |
377 | pid_t pid; | |
378 | ||
379 | log_info("Rebooting with kexec."); | |
380 | ||
381 | pid = fork(); | |
382 | if (pid < 0) | |
383 | log_error_errno(errno, "Failed to fork: %m"); | |
384 | else if (pid == 0) { | |
385 | ||
386 | const char * const args[] = { | |
387 | KEXEC, "-e", NULL | |
388 | }; | |
389 | ||
390 | /* Child */ | |
391 | ||
392 | execv(args[0], (char * const *) args); | |
393 | _exit(EXIT_FAILURE); | |
394 | } else | |
395 | wait_for_terminate_and_warn("kexec", pid, true); | |
396 | } | |
397 | ||
398 | cmd = RB_AUTOBOOT; | |
399 | _fallthrough_; | |
400 | case RB_AUTOBOOT: | |
401 | ||
402 | if (!in_container) { | |
403 | _cleanup_free_ char *param = NULL; | |
404 | ||
405 | r = read_one_line_file("/run/systemd/reboot-param", ¶m); | |
406 | if (r < 0 && r != -ENOENT) | |
407 | log_warning_errno(r, "Failed to read reboot parameter file: %m"); | |
408 | ||
409 | if (!isempty(param)) { | |
410 | log_info("Rebooting with argument '%s'.", param); | |
411 | syscall(SYS_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_RESTART2, param); | |
412 | log_warning_errno(errno, "Failed to reboot with parameter, retrying without: %m"); | |
413 | } | |
414 | } | |
415 | ||
416 | log_info("Rebooting."); | |
417 | break; | |
418 | ||
419 | case RB_POWER_OFF: | |
420 | log_info("Powering off."); | |
421 | break; | |
422 | ||
423 | case RB_HALT_SYSTEM: | |
424 | log_info("Halting system."); | |
425 | break; | |
426 | ||
427 | default: | |
428 | assert_not_reached("Unknown magic"); | |
429 | } | |
430 | ||
431 | reboot(cmd); | |
432 | if (errno == EPERM && in_container) { | |
433 | /* If we are in a container, and we lacked | |
434 | * CAP_SYS_BOOT just exit, this will kill our | |
435 | * container for good. */ | |
436 | log_info("Exiting container."); | |
437 | exit(0); | |
438 | } | |
439 | ||
440 | r = log_error_errno(errno, "Failed to invoke reboot(): %m"); | |
441 | ||
442 | error: | |
443 | log_emergency_errno(r, "Critical error while doing system shutdown: %m"); | |
444 | freeze(); | |
445 | } |