]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/shutdown.c
tree-wide: beautify remaining copyright statements
[thirdparty/systemd.git] / src / core / shutdown.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
b1b2a107 2/***
96b2fb93 3 Copyright © 2010 ProFUSION embedded systems
b1b2a107
FF
4***/
5
b1b2a107 6#include <errno.h>
07630cea 7#include <getopt.h>
c01dcddf 8#include <linux/reboot.h>
b1b2a107
FF
9#include <signal.h>
10#include <stdbool.h>
11#include <stdlib.h>
07630cea
LP
12#include <sys/mman.h>
13#include <sys/mount.h>
14#include <sys/reboot.h>
15#include <sys/stat.h>
16#include <unistd.h>
b1b2a107 17
b5efdb8a 18#include "alloc-util.h"
d00c2631 19#include "async.h"
07630cea
LP
20#include "cgroup-util.h"
21#include "def.h"
89711996 22#include "exec-util.h"
d00c2631 23#include "fd-util.h"
ec26be51 24#include "fileio.h"
07630cea
LP
25#include "killall.h"
26#include "log.h"
27#include "missing.h"
6bedfcbb 28#include "parse-util.h"
07630cea 29#include "process-util.h"
c01dcddf 30#include "reboot-util.h"
73ad712f 31#include "signal-util.h"
07630cea
LP
32#include "string-util.h"
33#include "switch-root.h"
34#include "terminal-util.h"
b1b2a107
FF
35#include "umount.h"
36#include "util.h"
b52aae1d 37#include "virt.h"
e96d6be7 38#include "watchdog.h"
b1b2a107 39
73ad712f
KW
40#define SYNC_PROGRESS_ATTEMPTS 3
41#define SYNC_TIMEOUT_USEC (10*USEC_PER_SEC)
42
b1e90ec5 43static char* arg_verb;
287419c1 44static uint8_t arg_exit_code;
e73c54b8 45static usec_t arg_timeout = DEFAULT_TIMEOUT_USEC;
b1e90ec5
ZJS
46
47static int parse_argv(int argc, char *argv[]) {
48 enum {
49 ARG_LOG_LEVEL = 0x100,
50 ARG_LOG_TARGET,
51 ARG_LOG_COLOR,
52 ARG_LOG_LOCATION,
287419c1 53 ARG_EXIT_CODE,
e73c54b8 54 ARG_TIMEOUT,
b1e90ec5
ZJS
55 };
56
57 static const struct option options[] = {
58 { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
59 { "log-target", required_argument, NULL, ARG_LOG_TARGET },
60 { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
61 { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
287419c1 62 { "exit-code", required_argument, NULL, ARG_EXIT_CODE },
e73c54b8 63 { "timeout", required_argument, NULL, ARG_TIMEOUT },
b1e90ec5
ZJS
64 {}
65 };
66
67 int c, r;
68
69 assert(argc >= 1);
70 assert(argv);
71
4b5d8d0f
MS
72 /* "-" prevents getopt from permuting argv[] and moving the verb away
73 * from argv[1]. Our interface to initrd promises it'll be there. */
74 while ((c = getopt_long(argc, argv, "-", options, NULL)) >= 0)
b1e90ec5
ZJS
75 switch (c) {
76
77 case ARG_LOG_LEVEL:
78 r = log_set_max_level_from_string(optarg);
79 if (r < 0)
d405394c 80 log_error_errno(r, "Failed to parse log level %s, ignoring.", optarg);
b1e90ec5
ZJS
81
82 break;
83
84 case ARG_LOG_TARGET:
85 r = log_set_target_from_string(optarg);
86 if (r < 0)
d405394c 87 log_error_errno(r, "Failed to parse log target %s, ignoring", optarg);
b1e90ec5
ZJS
88
89 break;
90
91 case ARG_LOG_COLOR:
92
93 if (optarg) {
94 r = log_show_color_from_string(optarg);
95 if (r < 0)
d405394c 96 log_error_errno(r, "Failed to parse log color setting %s, ignoring", optarg);
b1e90ec5
ZJS
97 } else
98 log_show_color(true);
99
100 break;
101
102 case ARG_LOG_LOCATION:
103 if (optarg) {
104 r = log_show_location_from_string(optarg);
105 if (r < 0)
d405394c 106 log_error_errno(r, "Failed to parse log location setting %s, ignoring", optarg);
b1e90ec5
ZJS
107 } else
108 log_show_location(true);
109
110 break;
111
287419c1
AC
112 case ARG_EXIT_CODE:
113 r = safe_atou8(optarg, &arg_exit_code);
114 if (r < 0)
d405394c 115 log_error_errno(r, "Failed to parse exit code %s, ignoring", optarg);
287419c1
AC
116
117 break;
118
e73c54b8
JK
119 case ARG_TIMEOUT:
120 r = parse_sec(optarg, &arg_timeout);
121 if (r < 0)
d405394c 122 log_error_errno(r, "Failed to parse shutdown timeout %s, ignoring", optarg);
e73c54b8
JK
123
124 break;
125
4b5d8d0f
MS
126 case '\001':
127 if (!arg_verb)
128 arg_verb = optarg;
129 else
130 log_error("Excess arguments, ignoring");
131 break;
132
b1e90ec5 133 case '?':
b1e90ec5
ZJS
134 return -EINVAL;
135
136 default:
137 assert_not_reached("Unhandled option code.");
138 }
139
4b5d8d0f 140 if (!arg_verb) {
b1e90ec5
ZJS
141 log_error("Verb argument missing.");
142 return -EINVAL;
143 }
144
b1e90ec5
ZJS
145 return 0;
146}
147
5a4bf02f 148static int switch_root_initramfs(void) {
4a62c710
MS
149 if (mount("/run/initramfs", "/run/initramfs", NULL, MS_BIND, NULL) < 0)
150 return log_error_errno(errno, "Failed to mount bind /run/initramfs on /run/initramfs: %m");
89d471d5 151
4a62c710
MS
152 if (mount(NULL, "/run/initramfs", NULL, MS_PRIVATE, NULL) < 0)
153 return log_error_errno(errno, "Failed to make /run/initramfs private mount: %m");
89d471d5 154
f131770b 155 /* switch_root with MS_BIND, because there might still be processes lurking around, which have open file descriptors.
5a4bf02f
HH
156 * /run/initramfs/shutdown will take care of these.
157 * Also do not detach the old root, because /run/initramfs/shutdown needs to access it.
158 */
159 return switch_root("/run/initramfs", "/oldroot", false, MS_BIND);
7cb1094a
HH
160}
161
73ad712f
KW
162/* Read the following fields from /proc/meminfo:
163 *
164 * NFS_Unstable
165 * Writeback
166 * Dirty
167 *
168 * Return true if the sum of these fields is greater than the previous
169 * value input. For all other issues, report the failure and indicate that
170 * the sync is not making progress.
171 */
172static bool sync_making_progress(unsigned long long *prev_dirty) {
173 _cleanup_fclose_ FILE *f = NULL;
174 char line[LINE_MAX];
175 bool r = false;
176 unsigned long long val = 0;
177
178 f = fopen("/proc/meminfo", "re");
179 if (!f)
180 return log_warning_errno(errno, "Failed to open /proc/meminfo: %m");
181
182 FOREACH_LINE(line, f, log_warning_errno(errno, "Failed to parse /proc/meminfo: %m")) {
183 unsigned long long ull = 0;
184
185 if (!first_word(line, "NFS_Unstable:") && !first_word(line, "Writeback:") && !first_word(line, "Dirty:"))
186 continue;
187
188 errno = 0;
189 if (sscanf(line, "%*s %llu %*s", &ull) != 1) {
190 if (errno != 0)
191 log_warning_errno(errno, "Failed to parse /proc/meminfo: %m");
192 else
193 log_warning("Failed to parse /proc/meminfo");
194
195 return false;
196 }
197
198 val += ull;
199 }
200
201 r = *prev_dirty > val;
202
203 *prev_dirty = val;
204
205 return r;
206}
207
208static void sync_with_progress(void) {
d00c2631 209 unsigned long long dirty = ULONG_LONG_MAX;
73ad712f
KW
210 unsigned checks;
211 pid_t pid;
212 int r;
73ad712f
KW
213
214 BLOCK_SIGNALS(SIGCHLD);
215
d00c2631
LP
216 /* Due to the possiblity of the sync operation hanging, we fork a child process and monitor the progress. If
217 * the timeout lapses, the assumption is that that particular sync stalled. */
218
219 r = asynchronous_sync(&pid);
4c253ed1 220 if (r < 0) {
d00c2631 221 log_error_errno(r, "Failed to fork sync(): %m");
73ad712f
KW
222 return;
223 }
73ad712f
KW
224
225 log_info("Syncing filesystems and block devices.");
226
227 /* Start monitoring the sync operation. If more than
228 * SYNC_PROGRESS_ATTEMPTS lapse without progress being made,
229 * we assume that the sync is stalled */
230 for (checks = 0; checks < SYNC_PROGRESS_ATTEMPTS; checks++) {
231 r = wait_for_terminate_with_timeout(pid, SYNC_TIMEOUT_USEC);
232 if (r == 0)
233 /* Sync finished without error.
234 * (The sync itself does not return an error code) */
235 return;
236 else if (r == -ETIMEDOUT) {
237 /* Reset the check counter if the "Dirty" value is
238 * decreasing */
239 if (sync_making_progress(&dirty))
240 checks = 0;
241 } else {
242 log_error_errno(r, "Failed to sync filesystems and block devices: %m");
243 return;
244 }
245 }
246
247 /* Only reached in the event of a timeout. We should issue a kill
248 * to the stray process. */
249 log_error("Syncing filesystems and block devices - timed out, issuing SIGKILL to PID "PID_FMT".", pid);
250 (void) kill(pid, SIGKILL);
251}
252
b1b2a107 253int main(int argc, char *argv[]) {
8c977838 254 bool need_umount, need_swapoff, need_loop_detach, need_dm_detach;
456b2199 255 bool in_container, use_watchdog = false, can_initrd;
06beed6d 256 _cleanup_free_ char *cgroup = NULL;
6edd7d0a 257 char *arguments[3];
456b2199 258 int cmd, r, umount_log_level = LOG_INFO;
e801700e 259 static const char* const dirs[] = {SYSTEM_SHUTDOWN_PATH, NULL};
8a2c1fbf 260 char *watchdog_device;
b1b2a107 261
e18805fb
LP
262 /* The log target defaults to console, but the original systemd process will pass its log target in through a
263 * command line argument, which will override this default. Also, ensure we'll never log to the journal or
264 * syslog, as these logging daemons are either already dead or will die very soon. */
265
266 log_set_target(LOG_TARGET_CONSOLE);
267 log_set_prohibit_ipc(true);
b1e90ec5 268 log_parse_environment();
e18805fb 269
b1e90ec5
ZJS
270 r = parse_argv(argc, argv);
271 if (r < 0)
272 goto error;
ec26be51 273
b1b2a107
FF
274 log_open();
275
4c12626c
LP
276 umask(0022);
277
df0ff127 278 if (getpid_cached() != 1) {
b1e90ec5 279 log_error("Not executed by init (PID 1).");
b1b2a107
FF
280 r = -EPERM;
281 goto error;
282 }
283
b1e90ec5 284 if (streq(arg_verb, "reboot"))
b1b2a107 285 cmd = RB_AUTOBOOT;
b1e90ec5 286 else if (streq(arg_verb, "poweroff"))
b1b2a107 287 cmd = RB_POWER_OFF;
b1e90ec5 288 else if (streq(arg_verb, "halt"))
b1b2a107 289 cmd = RB_HALT_SYSTEM;
b1e90ec5 290 else if (streq(arg_verb, "kexec"))
b1b2a107 291 cmd = LINUX_REBOOT_CMD_KEXEC;
287419c1
AC
292 else if (streq(arg_verb, "exit"))
293 cmd = 0; /* ignored, just checking that arg_verb is valid */
b1b2a107 294 else {
b1e90ec5 295 log_error("Unknown action '%s'.", arg_verb);
e18805fb 296 r = -EINVAL;
b1b2a107
FF
297 goto error;
298 }
299
0b9aa270 300 (void) cg_get_root_path(&cgroup);
2e79d182 301 in_container = detect_container() > 0;
41f85451 302
5d904a6a 303 use_watchdog = getenv("WATCHDOG_USEC");
8a2c1fbf
EJ
304 watchdog_device = getenv("WATCHDOG_DEVICE");
305 if (watchdog_device) {
306 r = watchdog_set_device(watchdog_device);
307 if (r < 0)
308 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m",
309 watchdog_device);
310 }
e96d6be7 311
2e79d182 312 /* Lock us into memory */
e18805fb 313 (void) mlockall(MCL_CURRENT|MCL_FUTURE);
b1b2a107 314
2e79d182
LP
315 /* Synchronize everything that is not written to disk yet at this point already. This is a good idea so that
316 * slow IO is processed here already and the final process killing spree is not impacted by processes
73ad712f
KW
317 * desperately trying to sync IO to disk within their timeout. Do not remove this sync, data corruption will
318 * result. */
2e79d182 319 if (!in_container)
73ad712f 320 sync_with_progress();
2e79d182 321
e557b1a6 322 disable_coredumps();
27b372c1 323
ab58e291 324 log_info("Sending SIGTERM to remaining processes...");
e73c54b8 325 broadcast_signal(SIGTERM, true, true, arg_timeout);
b1b2a107 326
ab58e291 327 log_info("Sending SIGKILL to remaining processes...");
e73c54b8 328 broadcast_signal(SIGKILL, true, false, arg_timeout);
40e85d00 329
d89b5fed 330 need_umount = !in_container;
8c977838
ZJS
331 need_swapoff = !in_container;
332 need_loop_detach = !in_container;
333 need_dm_detach = !in_container;
456b2199 334 can_initrd = !in_container && !in_initrd() && access("/run/initramfs/shutdown", X_OK) == 0;
b1b2a107 335
567ea02a 336 /* Unmount all mountpoints, swaps, and loopback devices */
ac9cea5b 337 for (;;) {
12aad1d0
LP
338 bool changed = false;
339
e96d6be7
LP
340 if (use_watchdog)
341 watchdog_ping();
342
41f85451
LP
343 /* Let's trim the cgroup tree on each iteration so
344 that we leave an empty cgroup tree around, so that
345 container managers get a nice notify event when we
346 are down */
347 if (cgroup)
348 cg_trim(SYSTEMD_CGROUP_CONTROLLER, cgroup, false);
349
b1b2a107 350 if (need_umount) {
ab58e291 351 log_info("Unmounting file systems.");
456b2199 352 r = umount_all(&changed, umount_log_level);
bce93b7a 353 if (r == 0) {
b1b2a107 354 need_umount = false;
bce93b7a
MS
355 log_info("All filesystems unmounted.");
356 } else if (r > 0)
ab58e291 357 log_info("Not all file systems unmounted, %d left.", r);
b1b2a107 358 else
da927ba9 359 log_error_errno(r, "Failed to unmount file systems: %m");
b1b2a107
FF
360 }
361
362 if (need_swapoff) {
735e0712 363 log_info("Deactivating swaps.");
12aad1d0 364 r = swapoff_all(&changed);
bce93b7a 365 if (r == 0) {
b1b2a107 366 need_swapoff = false;
735e0712 367 log_info("All swaps deactivated.");
bce93b7a 368 } else if (r > 0)
735e0712 369 log_info("Not all swaps deactivated, %d left.", r);
b1b2a107 370 else
da927ba9 371 log_error_errno(r, "Failed to deactivate swaps: %m");
b1b2a107
FF
372 }
373
374 if (need_loop_detach) {
375 log_info("Detaching loop devices.");
456b2199 376 r = loopback_detach_all(&changed, umount_log_level);
bce93b7a 377 if (r == 0) {
b1b2a107 378 need_loop_detach = false;
bce93b7a
MS
379 log_info("All loop devices detached.");
380 } else if (r > 0)
ab58e291 381 log_info("Not all loop devices detached, %d left.", r);
b1b2a107 382 else
da927ba9 383 log_error_errno(r, "Failed to detach loop devices: %m");
d48141ba 384 }
b1b2a107 385
d48141ba
LP
386 if (need_dm_detach) {
387 log_info("Detaching DM devices.");
456b2199 388 r = dm_detach_all(&changed, umount_log_level);
bce93b7a 389 if (r == 0) {
d48141ba 390 need_dm_detach = false;
bce93b7a
MS
391 log_info("All DM devices detached.");
392 } else if (r > 0)
2569a5ce 393 log_info("Not all DM devices detached, %d left.", r);
d48141ba 394 else
da927ba9 395 log_error_errno(r, "Failed to detach DM devices: %m");
b1b2a107
FF
396 }
397
a27d2184 398 if (!need_umount && !need_swapoff && !need_loop_detach && !need_dm_detach) {
ac9cea5b 399 log_info("All filesystems, swaps, loop devices and DM devices detached.");
12aad1d0 400 /* Yay, done */
ac9cea5b 401 break;
a27d2184 402 }
b1b2a107 403
456b2199
JJ
404 if (!changed && umount_log_level == LOG_INFO && !can_initrd) {
405 /* There are things we cannot get rid of. Loop one more time
406 * with LOG_ERR to inform the user. Note that we don't need
407 * to do this if there is a initrd to switch to, because that
408 * one is likely to get rid of the remounting mounts. If not,
409 * it will log about them. */
410 umount_log_level = LOG_ERR;
411 continue;
412 }
413
12aad1d0 414 /* If in this iteration we didn't manage to
bd3fa1d2 415 * unmount/deactivate anything, we simply give up */
12aad1d0 416 if (!changed) {
8c977838
ZJS
417 log_info("Cannot finalize remaining%s%s%s%s continuing.",
418 need_umount ? " file systems," : "",
419 need_swapoff ? " swap devices," : "",
420 need_loop_detach ? " loop devices," : "",
421 need_dm_detach ? " DM devices," : "");
ac9cea5b 422 break;
12aad1d0
LP
423 }
424
ac9cea5b 425 log_debug("Couldn't finalize remaining %s%s%s%s trying again.",
8c977838
ZJS
426 need_umount ? " file systems," : "",
427 need_swapoff ? " swap devices," : "",
428 need_loop_detach ? " loop devices," : "",
429 need_dm_detach ? " DM devices," : "");
b1b2a107
FF
430 }
431
8a2c1fbf
EJ
432 /* We're done with the watchdog. */
433 watchdog_free_device();
434
6edd7d0a 435 arguments[0] = NULL;
b1e90ec5 436 arguments[1] = arg_verb;
6edd7d0a 437 arguments[2] = NULL;
c6e47247 438 execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments);
83cc030f 439
456b2199 440 if (can_initrd) {
5a4bf02f
HH
441 r = switch_root_initramfs();
442 if (r >= 0) {
a2726e5c 443 argv[0] = (char*) "/shutdown";
30d743f4 444
5a4bf02f
HH
445 setsid();
446 make_console_stdio();
447
448 log_info("Successfully changed into root pivot.\n"
449 "Returning to initrd...");
30d743f4 450
a2726e5c 451 execv("/shutdown", argv);
56f64d95 452 log_error_errno(errno, "Failed to execute shutdown binary: %m");
5a4bf02f 453 } else
da927ba9 454 log_error_errno(r, "Failed to switch root to \"/run/initramfs\": %m");
5a4bf02f 455
7cb1094a
HH
456 }
457
8c977838
ZJS
458 if (need_umount || need_swapoff || need_loop_detach || need_dm_detach)
459 log_error("Failed to finalize %s%s%s%s ignoring",
460 need_umount ? " file systems," : "",
461 need_swapoff ? " swap devices," : "",
462 need_loop_detach ? " loop devices," : "",
463 need_dm_detach ? " DM devices," : "");
464
2e79d182
LP
465 /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be
466 * sync'ed explicitly in advance. So let's do this here, but not needlessly slow down containers. Note that we
467 * sync'ed things already once above, but we did some more work since then which might have caused IO, hence
73ad712f 468 * let's do it once more. Do not remove this sync, data corruption will result. */
0049f05a 469 if (!in_container)
73ad712f 470 sync_with_progress();
0049f05a 471
287419c1
AC
472 if (streq(arg_verb, "exit")) {
473 if (in_container)
1f409a0c
LP
474 return arg_exit_code;
475
476 cmd = RB_POWER_OFF; /* We cannot exit() on the host, fallback on another method. */
287419c1
AC
477 }
478
477def80
LP
479 switch (cmd) {
480
481 case LINUX_REBOOT_CMD_KEXEC:
cb7ec564
LP
482
483 if (!in_container) {
484 /* We cheat and exec kexec to avoid doing all its work */
477def80 485 log_info("Rebooting with kexec.");
cb7ec564 486
1f5d1e02 487 r = safe_fork("(sd-kexec)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_LOG|FORK_WAIT, NULL);
4c253ed1 488 if (r == 0) {
477def80
LP
489 const char * const args[] = {
490 KEXEC, "-e", NULL
491 };
492
cb7ec564 493 /* Child */
477def80 494
cb7ec564 495 execv(args[0], (char * const *) args);
477def80 496 _exit(EXIT_FAILURE);
4c253ed1
LP
497 }
498
1f5d1e02 499 /* If we are still running, then the kexec can't have worked, let's fall through */
b1b2a107 500 }
e61cd186
LP
501
502 cmd = RB_AUTOBOOT;
4831981d 503 _fallthrough_;
477def80 504
c01dcddf
LP
505 case RB_AUTOBOOT:
506 (void) reboot_with_parameter(REBOOT_LOG);
477def80
LP
507 log_info("Rebooting.");
508 break;
509
510 case RB_POWER_OFF:
511 log_info("Powering off.");
512 break;
513
514 case RB_HALT_SYSTEM:
515 log_info("Halting system.");
516 break;
517
518 default:
519 assert_not_reached("Unknown magic");
520 }
cb7ec564 521
118cf952 522 (void) reboot(cmd);
cb7ec564
LP
523 if (errno == EPERM && in_container) {
524 /* If we are in a container, and we lacked
525 * CAP_SYS_BOOT just exit, this will kill our
526 * container for good. */
477def80 527 log_info("Exiting container.");
1f409a0c 528 return EXIT_SUCCESS;
cb7ec564
LP
529 }
530
76ef789d 531 r = log_error_errno(errno, "Failed to invoke reboot(): %m");
b1b2a107
FF
532
533 error:
da927ba9 534 log_emergency_errno(r, "Critical error while doing system shutdown: %m");
b1b2a107 535 freeze();
b1b2a107 536}