]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/shutdown.c
tree-wide: introduce new safe_fork() helper and port everything over
[thirdparty/systemd.git] / src / core / shutdown.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 ProFUSION embedded systems
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <getopt.h>
23 #include <linux/reboot.h>
24 #include <signal.h>
25 #include <stdbool.h>
26 #include <stdlib.h>
27 #include <sys/mman.h>
28 #include <sys/mount.h>
29 #include <sys/reboot.h>
30 #include <sys/stat.h>
31 #include <unistd.h>
32
33 #include "alloc-util.h"
34 #include "cgroup-util.h"
35 #include "fd-util.h"
36 #include "def.h"
37 #include "exec-util.h"
38 #include "fileio.h"
39 #include "killall.h"
40 #include "log.h"
41 #include "missing.h"
42 #include "parse-util.h"
43 #include "process-util.h"
44 #include "signal-util.h"
45 #include "string-util.h"
46 #include "switch-root.h"
47 #include "terminal-util.h"
48 #include "umount.h"
49 #include "util.h"
50 #include "virt.h"
51 #include "watchdog.h"
52
53 #define FINALIZE_ATTEMPTS 50
54
55 #define SYNC_PROGRESS_ATTEMPTS 3
56 #define SYNC_TIMEOUT_USEC (10*USEC_PER_SEC)
57
58 static char* arg_verb;
59 static uint8_t arg_exit_code;
60
61 static int parse_argv(int argc, char *argv[]) {
62 enum {
63 ARG_LOG_LEVEL = 0x100,
64 ARG_LOG_TARGET,
65 ARG_LOG_COLOR,
66 ARG_LOG_LOCATION,
67 ARG_EXIT_CODE,
68 };
69
70 static const struct option options[] = {
71 { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
72 { "log-target", required_argument, NULL, ARG_LOG_TARGET },
73 { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
74 { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
75 { "exit-code", required_argument, NULL, ARG_EXIT_CODE },
76 {}
77 };
78
79 int c, r;
80
81 assert(argc >= 1);
82 assert(argv);
83
84 /* "-" prevents getopt from permuting argv[] and moving the verb away
85 * from argv[1]. Our interface to initrd promises it'll be there. */
86 while ((c = getopt_long(argc, argv, "-", options, NULL)) >= 0)
87 switch (c) {
88
89 case ARG_LOG_LEVEL:
90 r = log_set_max_level_from_string(optarg);
91 if (r < 0)
92 log_error("Failed to parse log level %s, ignoring.", optarg);
93
94 break;
95
96 case ARG_LOG_TARGET:
97 r = log_set_target_from_string(optarg);
98 if (r < 0)
99 log_error("Failed to parse log target %s, ignoring", optarg);
100
101 break;
102
103 case ARG_LOG_COLOR:
104
105 if (optarg) {
106 r = log_show_color_from_string(optarg);
107 if (r < 0)
108 log_error("Failed to parse log color setting %s, ignoring", optarg);
109 } else
110 log_show_color(true);
111
112 break;
113
114 case ARG_LOG_LOCATION:
115 if (optarg) {
116 r = log_show_location_from_string(optarg);
117 if (r < 0)
118 log_error("Failed to parse log location setting %s, ignoring", optarg);
119 } else
120 log_show_location(true);
121
122 break;
123
124 case ARG_EXIT_CODE:
125 r = safe_atou8(optarg, &arg_exit_code);
126 if (r < 0)
127 log_error("Failed to parse exit code %s, ignoring", optarg);
128
129 break;
130
131 case '\001':
132 if (!arg_verb)
133 arg_verb = optarg;
134 else
135 log_error("Excess arguments, ignoring");
136 break;
137
138 case '?':
139 return -EINVAL;
140
141 default:
142 assert_not_reached("Unhandled option code.");
143 }
144
145 if (!arg_verb) {
146 log_error("Verb argument missing.");
147 return -EINVAL;
148 }
149
150 return 0;
151 }
152
153 static int switch_root_initramfs(void) {
154 if (mount("/run/initramfs", "/run/initramfs", NULL, MS_BIND, NULL) < 0)
155 return log_error_errno(errno, "Failed to mount bind /run/initramfs on /run/initramfs: %m");
156
157 if (mount(NULL, "/run/initramfs", NULL, MS_PRIVATE, NULL) < 0)
158 return log_error_errno(errno, "Failed to make /run/initramfs private mount: %m");
159
160 /* switch_root with MS_BIND, because there might still be processes lurking around, which have open file descriptors.
161 * /run/initramfs/shutdown will take care of these.
162 * Also do not detach the old root, because /run/initramfs/shutdown needs to access it.
163 */
164 return switch_root("/run/initramfs", "/oldroot", false, MS_BIND);
165 }
166
167 /* Read the following fields from /proc/meminfo:
168 *
169 * NFS_Unstable
170 * Writeback
171 * Dirty
172 *
173 * Return true if the sum of these fields is greater than the previous
174 * value input. For all other issues, report the failure and indicate that
175 * the sync is not making progress.
176 */
177 static bool sync_making_progress(unsigned long long *prev_dirty) {
178 _cleanup_fclose_ FILE *f = NULL;
179 char line[LINE_MAX];
180 bool r = false;
181 unsigned long long val = 0;
182
183 f = fopen("/proc/meminfo", "re");
184 if (!f)
185 return log_warning_errno(errno, "Failed to open /proc/meminfo: %m");
186
187 FOREACH_LINE(line, f, log_warning_errno(errno, "Failed to parse /proc/meminfo: %m")) {
188 unsigned long long ull = 0;
189
190 if (!first_word(line, "NFS_Unstable:") && !first_word(line, "Writeback:") && !first_word(line, "Dirty:"))
191 continue;
192
193 errno = 0;
194 if (sscanf(line, "%*s %llu %*s", &ull) != 1) {
195 if (errno != 0)
196 log_warning_errno(errno, "Failed to parse /proc/meminfo: %m");
197 else
198 log_warning("Failed to parse /proc/meminfo");
199
200 return false;
201 }
202
203 val += ull;
204 }
205
206 r = *prev_dirty > val;
207
208 *prev_dirty = val;
209
210 return r;
211 }
212
213 static void sync_with_progress(void) {
214 unsigned checks;
215 pid_t pid;
216 int r;
217 unsigned long long dirty = ULONG_LONG_MAX;
218
219 BLOCK_SIGNALS(SIGCHLD);
220
221 /* Due to the possiblity of the sync operation hanging, we fork
222 * a child process and monitor the progress. If the timeout
223 * lapses, the assumption is that that particular sync stalled. */
224 r = safe_fork("(sd-sync)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS, &pid);
225 if (r < 0) {
226 log_error_errno(r, "Failed to fork: %m");
227 return;
228 }
229 if (r == 0) {
230 /* Start the sync operation here in the child */
231 sync();
232 _exit(EXIT_SUCCESS);
233 }
234
235 log_info("Syncing filesystems and block devices.");
236
237 /* Start monitoring the sync operation. If more than
238 * SYNC_PROGRESS_ATTEMPTS lapse without progress being made,
239 * we assume that the sync is stalled */
240 for (checks = 0; checks < SYNC_PROGRESS_ATTEMPTS; checks++) {
241 r = wait_for_terminate_with_timeout(pid, SYNC_TIMEOUT_USEC);
242 if (r == 0)
243 /* Sync finished without error.
244 * (The sync itself does not return an error code) */
245 return;
246 else if (r == -ETIMEDOUT) {
247 /* Reset the check counter if the "Dirty" value is
248 * decreasing */
249 if (sync_making_progress(&dirty))
250 checks = 0;
251 } else {
252 log_error_errno(r, "Failed to sync filesystems and block devices: %m");
253 return;
254 }
255 }
256
257 /* Only reached in the event of a timeout. We should issue a kill
258 * to the stray process. */
259 log_error("Syncing filesystems and block devices - timed out, issuing SIGKILL to PID "PID_FMT".", pid);
260 (void) kill(pid, SIGKILL);
261 }
262
263 int main(int argc, char *argv[]) {
264 bool need_umount, need_swapoff, need_loop_detach, need_dm_detach;
265 bool in_container, use_watchdog = false;
266 _cleanup_free_ char *cgroup = NULL;
267 char *arguments[3];
268 unsigned retries;
269 int cmd, r;
270 static const char* const dirs[] = {SYSTEM_SHUTDOWN_PATH, NULL};
271 char *watchdog_device;
272
273 log_parse_environment();
274 r = parse_argv(argc, argv);
275 if (r < 0)
276 goto error;
277
278 /* journald will die if not gone yet. The log target defaults
279 * to console, but may have been changed by command line options. */
280
281 log_close_console(); /* force reopen of /dev/console */
282 log_open();
283
284 umask(0022);
285
286 if (getpid_cached() != 1) {
287 log_error("Not executed by init (PID 1).");
288 r = -EPERM;
289 goto error;
290 }
291
292 if (streq(arg_verb, "reboot"))
293 cmd = RB_AUTOBOOT;
294 else if (streq(arg_verb, "poweroff"))
295 cmd = RB_POWER_OFF;
296 else if (streq(arg_verb, "halt"))
297 cmd = RB_HALT_SYSTEM;
298 else if (streq(arg_verb, "kexec"))
299 cmd = LINUX_REBOOT_CMD_KEXEC;
300 else if (streq(arg_verb, "exit"))
301 cmd = 0; /* ignored, just checking that arg_verb is valid */
302 else {
303 r = -EINVAL;
304 log_error("Unknown action '%s'.", arg_verb);
305 goto error;
306 }
307
308 (void) cg_get_root_path(&cgroup);
309 in_container = detect_container() > 0;
310
311 use_watchdog = !!getenv("WATCHDOG_USEC");
312 watchdog_device = getenv("WATCHDOG_DEVICE");
313 if (watchdog_device) {
314 r = watchdog_set_device(watchdog_device);
315 if (r < 0)
316 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m",
317 watchdog_device);
318 }
319
320 /* Lock us into memory */
321 mlockall(MCL_CURRENT|MCL_FUTURE);
322
323 /* Synchronize everything that is not written to disk yet at this point already. This is a good idea so that
324 * slow IO is processed here already and the final process killing spree is not impacted by processes
325 * desperately trying to sync IO to disk within their timeout. Do not remove this sync, data corruption will
326 * result. */
327 if (!in_container)
328 sync_with_progress();
329
330 log_info("Sending SIGTERM to remaining processes...");
331 broadcast_signal(SIGTERM, true, true);
332
333 log_info("Sending SIGKILL to remaining processes...");
334 broadcast_signal(SIGKILL, true, false);
335
336 need_umount = !in_container;
337 need_swapoff = !in_container;
338 need_loop_detach = !in_container;
339 need_dm_detach = !in_container;
340
341 /* Unmount all mountpoints, swaps, and loopback devices */
342 for (retries = 0; retries < FINALIZE_ATTEMPTS; retries++) {
343 bool changed = false;
344
345 if (use_watchdog)
346 watchdog_ping();
347
348 /* Let's trim the cgroup tree on each iteration so
349 that we leave an empty cgroup tree around, so that
350 container managers get a nice notify event when we
351 are down */
352 if (cgroup)
353 cg_trim(SYSTEMD_CGROUP_CONTROLLER, cgroup, false);
354
355 if (need_umount) {
356 log_info("Unmounting file systems.");
357 r = umount_all(&changed);
358 if (r == 0) {
359 need_umount = false;
360 log_info("All filesystems unmounted.");
361 } else if (r > 0)
362 log_info("Not all file systems unmounted, %d left.", r);
363 else
364 log_error_errno(r, "Failed to unmount file systems: %m");
365 }
366
367 if (need_swapoff) {
368 log_info("Deactivating swaps.");
369 r = swapoff_all(&changed);
370 if (r == 0) {
371 need_swapoff = false;
372 log_info("All swaps deactivated.");
373 } else if (r > 0)
374 log_info("Not all swaps deactivated, %d left.", r);
375 else
376 log_error_errno(r, "Failed to deactivate swaps: %m");
377 }
378
379 if (need_loop_detach) {
380 log_info("Detaching loop devices.");
381 r = loopback_detach_all(&changed);
382 if (r == 0) {
383 need_loop_detach = false;
384 log_info("All loop devices detached.");
385 } else if (r > 0)
386 log_info("Not all loop devices detached, %d left.", r);
387 else
388 log_error_errno(r, "Failed to detach loop devices: %m");
389 }
390
391 if (need_dm_detach) {
392 log_info("Detaching DM devices.");
393 r = dm_detach_all(&changed);
394 if (r == 0) {
395 need_dm_detach = false;
396 log_info("All DM devices detached.");
397 } else if (r > 0)
398 log_info("Not all DM devices detached, %d left.", r);
399 else
400 log_error_errno(r, "Failed to detach DM devices: %m");
401 }
402
403 if (!need_umount && !need_swapoff && !need_loop_detach && !need_dm_detach) {
404 if (retries > 0)
405 log_info("All filesystems, swaps, loop devices, DM devices detached.");
406 /* Yay, done */
407 goto initrd_jump;
408 }
409
410 /* If in this iteration we didn't manage to
411 * unmount/deactivate anything, we simply give up */
412 if (!changed) {
413 log_info("Cannot finalize remaining%s%s%s%s continuing.",
414 need_umount ? " file systems," : "",
415 need_swapoff ? " swap devices," : "",
416 need_loop_detach ? " loop devices," : "",
417 need_dm_detach ? " DM devices," : "");
418 goto initrd_jump;
419 }
420
421 log_debug("After %u retries, couldn't finalize remaining %s%s%s%s trying again.",
422 retries + 1,
423 need_umount ? " file systems," : "",
424 need_swapoff ? " swap devices," : "",
425 need_loop_detach ? " loop devices," : "",
426 need_dm_detach ? " DM devices," : "");
427 }
428
429 log_error("Too many iterations, giving up.");
430
431 initrd_jump:
432
433 /* We're done with the watchdog. */
434 watchdog_free_device();
435
436 arguments[0] = NULL;
437 arguments[1] = arg_verb;
438 arguments[2] = NULL;
439 execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments);
440
441 if (!in_container && !in_initrd() &&
442 access("/run/initramfs/shutdown", X_OK) == 0) {
443 r = switch_root_initramfs();
444 if (r >= 0) {
445 argv[0] = (char*) "/shutdown";
446
447 setsid();
448 make_console_stdio();
449
450 log_info("Successfully changed into root pivot.\n"
451 "Returning to initrd...");
452
453 execv("/shutdown", argv);
454 log_error_errno(errno, "Failed to execute shutdown binary: %m");
455 } else
456 log_error_errno(r, "Failed to switch root to \"/run/initramfs\": %m");
457
458 }
459
460 if (need_umount || need_swapoff || need_loop_detach || need_dm_detach)
461 log_error("Failed to finalize %s%s%s%s ignoring",
462 need_umount ? " file systems," : "",
463 need_swapoff ? " swap devices," : "",
464 need_loop_detach ? " loop devices," : "",
465 need_dm_detach ? " DM devices," : "");
466
467 /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be
468 * sync'ed explicitly in advance. So let's do this here, but not needlessly slow down containers. Note that we
469 * sync'ed things already once above, but we did some more work since then which might have caused IO, hence
470 * let's do it once more. Do not remove this sync, data corruption will result. */
471 if (!in_container)
472 sync_with_progress();
473
474 if (streq(arg_verb, "exit")) {
475 if (in_container)
476 exit(arg_exit_code);
477 else {
478 /* We cannot exit() on the host, fallback on another
479 * method. */
480 cmd = RB_POWER_OFF;
481 }
482 }
483
484 switch (cmd) {
485
486 case LINUX_REBOOT_CMD_KEXEC:
487
488 if (!in_container) {
489 /* We cheat and exec kexec to avoid doing all its work */
490 pid_t pid;
491
492 log_info("Rebooting with kexec.");
493
494 r = safe_fork("(sd-kexec)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS, &pid);
495 if (r < 0)
496 log_error_errno(r, "Failed to fork: %m");
497 if (r == 0) {
498
499 const char * const args[] = {
500 KEXEC, "-e", NULL
501 };
502
503 /* Child */
504
505 execv(args[0], (char * const *) args);
506 _exit(EXIT_FAILURE);
507 }
508
509 (void) wait_for_terminate_and_warn("kexec", pid, true);
510 }
511
512 cmd = RB_AUTOBOOT;
513 _fallthrough_;
514 case RB_AUTOBOOT:
515
516 if (!in_container) {
517 _cleanup_free_ char *param = NULL;
518
519 r = read_one_line_file("/run/systemd/reboot-param", &param);
520 if (r < 0 && r != -ENOENT)
521 log_warning_errno(r, "Failed to read reboot parameter file: %m");
522
523 if (!isempty(param)) {
524 log_info("Rebooting with argument '%s'.", param);
525 syscall(SYS_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_RESTART2, param);
526 log_warning_errno(errno, "Failed to reboot with parameter, retrying without: %m");
527 }
528 }
529
530 log_info("Rebooting.");
531 break;
532
533 case RB_POWER_OFF:
534 log_info("Powering off.");
535 break;
536
537 case RB_HALT_SYSTEM:
538 log_info("Halting system.");
539 break;
540
541 default:
542 assert_not_reached("Unknown magic");
543 }
544
545 reboot(cmd);
546 if (errno == EPERM && in_container) {
547 /* If we are in a container, and we lacked
548 * CAP_SYS_BOOT just exit, this will kill our
549 * container for good. */
550 log_info("Exiting container.");
551 exit(0);
552 }
553
554 r = log_error_errno(errno, "Failed to invoke reboot(): %m");
555
556 error:
557 log_emergency_errno(r, "Critical error while doing system shutdown: %m");
558 freeze();
559 }