]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/shutdown.c
tree-wide: drop license boilerplate
[thirdparty/systemd.git] / src / core / shutdown.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
b1b2a107
FF
2/***
3 This file is part of systemd.
4
5 Copyright 2010 ProFUSION embedded systems
b1b2a107
FF
6***/
7
b1b2a107 8#include <errno.h>
07630cea 9#include <getopt.h>
c01dcddf 10#include <linux/reboot.h>
b1b2a107
FF
11#include <signal.h>
12#include <stdbool.h>
13#include <stdlib.h>
07630cea
LP
14#include <sys/mman.h>
15#include <sys/mount.h>
16#include <sys/reboot.h>
17#include <sys/stat.h>
18#include <unistd.h>
b1b2a107 19
b5efdb8a 20#include "alloc-util.h"
d00c2631 21#include "async.h"
07630cea
LP
22#include "cgroup-util.h"
23#include "def.h"
89711996 24#include "exec-util.h"
d00c2631 25#include "fd-util.h"
ec26be51 26#include "fileio.h"
07630cea
LP
27#include "killall.h"
28#include "log.h"
29#include "missing.h"
6bedfcbb 30#include "parse-util.h"
07630cea 31#include "process-util.h"
c01dcddf 32#include "reboot-util.h"
73ad712f 33#include "signal-util.h"
07630cea
LP
34#include "string-util.h"
35#include "switch-root.h"
36#include "terminal-util.h"
b1b2a107
FF
37#include "umount.h"
38#include "util.h"
b52aae1d 39#include "virt.h"
e96d6be7 40#include "watchdog.h"
b1b2a107 41
73ad712f
KW
42#define SYNC_PROGRESS_ATTEMPTS 3
43#define SYNC_TIMEOUT_USEC (10*USEC_PER_SEC)
44
b1e90ec5 45static char* arg_verb;
287419c1 46static uint8_t arg_exit_code;
e73c54b8 47static usec_t arg_timeout = DEFAULT_TIMEOUT_USEC;
b1e90ec5
ZJS
48
49static int parse_argv(int argc, char *argv[]) {
50 enum {
51 ARG_LOG_LEVEL = 0x100,
52 ARG_LOG_TARGET,
53 ARG_LOG_COLOR,
54 ARG_LOG_LOCATION,
287419c1 55 ARG_EXIT_CODE,
e73c54b8 56 ARG_TIMEOUT,
b1e90ec5
ZJS
57 };
58
59 static const struct option options[] = {
60 { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
61 { "log-target", required_argument, NULL, ARG_LOG_TARGET },
62 { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
63 { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
287419c1 64 { "exit-code", required_argument, NULL, ARG_EXIT_CODE },
e73c54b8 65 { "timeout", required_argument, NULL, ARG_TIMEOUT },
b1e90ec5
ZJS
66 {}
67 };
68
69 int c, r;
70
71 assert(argc >= 1);
72 assert(argv);
73
4b5d8d0f
MS
74 /* "-" prevents getopt from permuting argv[] and moving the verb away
75 * from argv[1]. Our interface to initrd promises it'll be there. */
76 while ((c = getopt_long(argc, argv, "-", options, NULL)) >= 0)
b1e90ec5
ZJS
77 switch (c) {
78
79 case ARG_LOG_LEVEL:
80 r = log_set_max_level_from_string(optarg);
81 if (r < 0)
d405394c 82 log_error_errno(r, "Failed to parse log level %s, ignoring.", optarg);
b1e90ec5
ZJS
83
84 break;
85
86 case ARG_LOG_TARGET:
87 r = log_set_target_from_string(optarg);
88 if (r < 0)
d405394c 89 log_error_errno(r, "Failed to parse log target %s, ignoring", optarg);
b1e90ec5
ZJS
90
91 break;
92
93 case ARG_LOG_COLOR:
94
95 if (optarg) {
96 r = log_show_color_from_string(optarg);
97 if (r < 0)
d405394c 98 log_error_errno(r, "Failed to parse log color setting %s, ignoring", optarg);
b1e90ec5
ZJS
99 } else
100 log_show_color(true);
101
102 break;
103
104 case ARG_LOG_LOCATION:
105 if (optarg) {
106 r = log_show_location_from_string(optarg);
107 if (r < 0)
d405394c 108 log_error_errno(r, "Failed to parse log location setting %s, ignoring", optarg);
b1e90ec5
ZJS
109 } else
110 log_show_location(true);
111
112 break;
113
287419c1
AC
114 case ARG_EXIT_CODE:
115 r = safe_atou8(optarg, &arg_exit_code);
116 if (r < 0)
d405394c 117 log_error_errno(r, "Failed to parse exit code %s, ignoring", optarg);
287419c1
AC
118
119 break;
120
e73c54b8
JK
121 case ARG_TIMEOUT:
122 r = parse_sec(optarg, &arg_timeout);
123 if (r < 0)
d405394c 124 log_error_errno(r, "Failed to parse shutdown timeout %s, ignoring", optarg);
e73c54b8
JK
125
126 break;
127
4b5d8d0f
MS
128 case '\001':
129 if (!arg_verb)
130 arg_verb = optarg;
131 else
132 log_error("Excess arguments, ignoring");
133 break;
134
b1e90ec5 135 case '?':
b1e90ec5
ZJS
136 return -EINVAL;
137
138 default:
139 assert_not_reached("Unhandled option code.");
140 }
141
4b5d8d0f 142 if (!arg_verb) {
b1e90ec5
ZJS
143 log_error("Verb argument missing.");
144 return -EINVAL;
145 }
146
b1e90ec5
ZJS
147 return 0;
148}
149
5a4bf02f 150static int switch_root_initramfs(void) {
4a62c710
MS
151 if (mount("/run/initramfs", "/run/initramfs", NULL, MS_BIND, NULL) < 0)
152 return log_error_errno(errno, "Failed to mount bind /run/initramfs on /run/initramfs: %m");
89d471d5 153
4a62c710
MS
154 if (mount(NULL, "/run/initramfs", NULL, MS_PRIVATE, NULL) < 0)
155 return log_error_errno(errno, "Failed to make /run/initramfs private mount: %m");
89d471d5 156
f131770b 157 /* switch_root with MS_BIND, because there might still be processes lurking around, which have open file descriptors.
5a4bf02f
HH
158 * /run/initramfs/shutdown will take care of these.
159 * Also do not detach the old root, because /run/initramfs/shutdown needs to access it.
160 */
161 return switch_root("/run/initramfs", "/oldroot", false, MS_BIND);
7cb1094a
HH
162}
163
73ad712f
KW
164/* Read the following fields from /proc/meminfo:
165 *
166 * NFS_Unstable
167 * Writeback
168 * Dirty
169 *
170 * Return true if the sum of these fields is greater than the previous
171 * value input. For all other issues, report the failure and indicate that
172 * the sync is not making progress.
173 */
174static bool sync_making_progress(unsigned long long *prev_dirty) {
175 _cleanup_fclose_ FILE *f = NULL;
176 char line[LINE_MAX];
177 bool r = false;
178 unsigned long long val = 0;
179
180 f = fopen("/proc/meminfo", "re");
181 if (!f)
182 return log_warning_errno(errno, "Failed to open /proc/meminfo: %m");
183
184 FOREACH_LINE(line, f, log_warning_errno(errno, "Failed to parse /proc/meminfo: %m")) {
185 unsigned long long ull = 0;
186
187 if (!first_word(line, "NFS_Unstable:") && !first_word(line, "Writeback:") && !first_word(line, "Dirty:"))
188 continue;
189
190 errno = 0;
191 if (sscanf(line, "%*s %llu %*s", &ull) != 1) {
192 if (errno != 0)
193 log_warning_errno(errno, "Failed to parse /proc/meminfo: %m");
194 else
195 log_warning("Failed to parse /proc/meminfo");
196
197 return false;
198 }
199
200 val += ull;
201 }
202
203 r = *prev_dirty > val;
204
205 *prev_dirty = val;
206
207 return r;
208}
209
210static void sync_with_progress(void) {
d00c2631 211 unsigned long long dirty = ULONG_LONG_MAX;
73ad712f
KW
212 unsigned checks;
213 pid_t pid;
214 int r;
73ad712f
KW
215
216 BLOCK_SIGNALS(SIGCHLD);
217
d00c2631
LP
218 /* Due to the possiblity of the sync operation hanging, we fork a child process and monitor the progress. If
219 * the timeout lapses, the assumption is that that particular sync stalled. */
220
221 r = asynchronous_sync(&pid);
4c253ed1 222 if (r < 0) {
d00c2631 223 log_error_errno(r, "Failed to fork sync(): %m");
73ad712f
KW
224 return;
225 }
73ad712f
KW
226
227 log_info("Syncing filesystems and block devices.");
228
229 /* Start monitoring the sync operation. If more than
230 * SYNC_PROGRESS_ATTEMPTS lapse without progress being made,
231 * we assume that the sync is stalled */
232 for (checks = 0; checks < SYNC_PROGRESS_ATTEMPTS; checks++) {
233 r = wait_for_terminate_with_timeout(pid, SYNC_TIMEOUT_USEC);
234 if (r == 0)
235 /* Sync finished without error.
236 * (The sync itself does not return an error code) */
237 return;
238 else if (r == -ETIMEDOUT) {
239 /* Reset the check counter if the "Dirty" value is
240 * decreasing */
241 if (sync_making_progress(&dirty))
242 checks = 0;
243 } else {
244 log_error_errno(r, "Failed to sync filesystems and block devices: %m");
245 return;
246 }
247 }
248
249 /* Only reached in the event of a timeout. We should issue a kill
250 * to the stray process. */
251 log_error("Syncing filesystems and block devices - timed out, issuing SIGKILL to PID "PID_FMT".", pid);
252 (void) kill(pid, SIGKILL);
253}
254
b1b2a107 255int main(int argc, char *argv[]) {
8c977838 256 bool need_umount, need_swapoff, need_loop_detach, need_dm_detach;
456b2199 257 bool in_container, use_watchdog = false, can_initrd;
06beed6d 258 _cleanup_free_ char *cgroup = NULL;
6edd7d0a 259 char *arguments[3];
456b2199 260 int cmd, r, umount_log_level = LOG_INFO;
e801700e 261 static const char* const dirs[] = {SYSTEM_SHUTDOWN_PATH, NULL};
8a2c1fbf 262 char *watchdog_device;
b1b2a107 263
e18805fb
LP
264 /* The log target defaults to console, but the original systemd process will pass its log target in through a
265 * command line argument, which will override this default. Also, ensure we'll never log to the journal or
266 * syslog, as these logging daemons are either already dead or will die very soon. */
267
268 log_set_target(LOG_TARGET_CONSOLE);
269 log_set_prohibit_ipc(true);
b1e90ec5 270 log_parse_environment();
e18805fb 271
b1e90ec5
ZJS
272 r = parse_argv(argc, argv);
273 if (r < 0)
274 goto error;
ec26be51 275
b1b2a107
FF
276 log_open();
277
4c12626c
LP
278 umask(0022);
279
df0ff127 280 if (getpid_cached() != 1) {
b1e90ec5 281 log_error("Not executed by init (PID 1).");
b1b2a107
FF
282 r = -EPERM;
283 goto error;
284 }
285
b1e90ec5 286 if (streq(arg_verb, "reboot"))
b1b2a107 287 cmd = RB_AUTOBOOT;
b1e90ec5 288 else if (streq(arg_verb, "poweroff"))
b1b2a107 289 cmd = RB_POWER_OFF;
b1e90ec5 290 else if (streq(arg_verb, "halt"))
b1b2a107 291 cmd = RB_HALT_SYSTEM;
b1e90ec5 292 else if (streq(arg_verb, "kexec"))
b1b2a107 293 cmd = LINUX_REBOOT_CMD_KEXEC;
287419c1
AC
294 else if (streq(arg_verb, "exit"))
295 cmd = 0; /* ignored, just checking that arg_verb is valid */
b1b2a107 296 else {
b1e90ec5 297 log_error("Unknown action '%s'.", arg_verb);
e18805fb 298 r = -EINVAL;
b1b2a107
FF
299 goto error;
300 }
301
0b9aa270 302 (void) cg_get_root_path(&cgroup);
2e79d182 303 in_container = detect_container() > 0;
41f85451 304
e96d6be7 305 use_watchdog = !!getenv("WATCHDOG_USEC");
8a2c1fbf
EJ
306 watchdog_device = getenv("WATCHDOG_DEVICE");
307 if (watchdog_device) {
308 r = watchdog_set_device(watchdog_device);
309 if (r < 0)
310 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m",
311 watchdog_device);
312 }
e96d6be7 313
2e79d182 314 /* Lock us into memory */
e18805fb 315 (void) mlockall(MCL_CURRENT|MCL_FUTURE);
b1b2a107 316
2e79d182
LP
317 /* Synchronize everything that is not written to disk yet at this point already. This is a good idea so that
318 * slow IO is processed here already and the final process killing spree is not impacted by processes
73ad712f
KW
319 * desperately trying to sync IO to disk within their timeout. Do not remove this sync, data corruption will
320 * result. */
2e79d182 321 if (!in_container)
73ad712f 322 sync_with_progress();
2e79d182 323
e557b1a6 324 disable_coredumps();
27b372c1 325
ab58e291 326 log_info("Sending SIGTERM to remaining processes...");
e73c54b8 327 broadcast_signal(SIGTERM, true, true, arg_timeout);
b1b2a107 328
ab58e291 329 log_info("Sending SIGKILL to remaining processes...");
e73c54b8 330 broadcast_signal(SIGKILL, true, false, arg_timeout);
40e85d00 331
d89b5fed 332 need_umount = !in_container;
8c977838
ZJS
333 need_swapoff = !in_container;
334 need_loop_detach = !in_container;
335 need_dm_detach = !in_container;
456b2199 336 can_initrd = !in_container && !in_initrd() && access("/run/initramfs/shutdown", X_OK) == 0;
b1b2a107 337
567ea02a 338 /* Unmount all mountpoints, swaps, and loopback devices */
ac9cea5b 339 for (;;) {
12aad1d0
LP
340 bool changed = false;
341
e96d6be7
LP
342 if (use_watchdog)
343 watchdog_ping();
344
41f85451
LP
345 /* Let's trim the cgroup tree on each iteration so
346 that we leave an empty cgroup tree around, so that
347 container managers get a nice notify event when we
348 are down */
349 if (cgroup)
350 cg_trim(SYSTEMD_CGROUP_CONTROLLER, cgroup, false);
351
b1b2a107 352 if (need_umount) {
ab58e291 353 log_info("Unmounting file systems.");
456b2199 354 r = umount_all(&changed, umount_log_level);
bce93b7a 355 if (r == 0) {
b1b2a107 356 need_umount = false;
bce93b7a
MS
357 log_info("All filesystems unmounted.");
358 } else if (r > 0)
ab58e291 359 log_info("Not all file systems unmounted, %d left.", r);
b1b2a107 360 else
da927ba9 361 log_error_errno(r, "Failed to unmount file systems: %m");
b1b2a107
FF
362 }
363
364 if (need_swapoff) {
735e0712 365 log_info("Deactivating swaps.");
12aad1d0 366 r = swapoff_all(&changed);
bce93b7a 367 if (r == 0) {
b1b2a107 368 need_swapoff = false;
735e0712 369 log_info("All swaps deactivated.");
bce93b7a 370 } else if (r > 0)
735e0712 371 log_info("Not all swaps deactivated, %d left.", r);
b1b2a107 372 else
da927ba9 373 log_error_errno(r, "Failed to deactivate swaps: %m");
b1b2a107
FF
374 }
375
376 if (need_loop_detach) {
377 log_info("Detaching loop devices.");
456b2199 378 r = loopback_detach_all(&changed, umount_log_level);
bce93b7a 379 if (r == 0) {
b1b2a107 380 need_loop_detach = false;
bce93b7a
MS
381 log_info("All loop devices detached.");
382 } else if (r > 0)
ab58e291 383 log_info("Not all loop devices detached, %d left.", r);
b1b2a107 384 else
da927ba9 385 log_error_errno(r, "Failed to detach loop devices: %m");
d48141ba 386 }
b1b2a107 387
d48141ba
LP
388 if (need_dm_detach) {
389 log_info("Detaching DM devices.");
456b2199 390 r = dm_detach_all(&changed, umount_log_level);
bce93b7a 391 if (r == 0) {
d48141ba 392 need_dm_detach = false;
bce93b7a
MS
393 log_info("All DM devices detached.");
394 } else if (r > 0)
2569a5ce 395 log_info("Not all DM devices detached, %d left.", r);
d48141ba 396 else
da927ba9 397 log_error_errno(r, "Failed to detach DM devices: %m");
b1b2a107
FF
398 }
399
a27d2184 400 if (!need_umount && !need_swapoff && !need_loop_detach && !need_dm_detach) {
ac9cea5b 401 log_info("All filesystems, swaps, loop devices and DM devices detached.");
12aad1d0 402 /* Yay, done */
ac9cea5b 403 break;
a27d2184 404 }
b1b2a107 405
456b2199
JJ
406 if (!changed && umount_log_level == LOG_INFO && !can_initrd) {
407 /* There are things we cannot get rid of. Loop one more time
408 * with LOG_ERR to inform the user. Note that we don't need
409 * to do this if there is a initrd to switch to, because that
410 * one is likely to get rid of the remounting mounts. If not,
411 * it will log about them. */
412 umount_log_level = LOG_ERR;
413 continue;
414 }
415
12aad1d0 416 /* If in this iteration we didn't manage to
bd3fa1d2 417 * unmount/deactivate anything, we simply give up */
12aad1d0 418 if (!changed) {
8c977838
ZJS
419 log_info("Cannot finalize remaining%s%s%s%s continuing.",
420 need_umount ? " file systems," : "",
421 need_swapoff ? " swap devices," : "",
422 need_loop_detach ? " loop devices," : "",
423 need_dm_detach ? " DM devices," : "");
ac9cea5b 424 break;
12aad1d0
LP
425 }
426
ac9cea5b 427 log_debug("Couldn't finalize remaining %s%s%s%s trying again.",
8c977838
ZJS
428 need_umount ? " file systems," : "",
429 need_swapoff ? " swap devices," : "",
430 need_loop_detach ? " loop devices," : "",
431 need_dm_detach ? " DM devices," : "");
b1b2a107
FF
432 }
433
8a2c1fbf
EJ
434 /* We're done with the watchdog. */
435 watchdog_free_device();
436
6edd7d0a 437 arguments[0] = NULL;
b1e90ec5 438 arguments[1] = arg_verb;
6edd7d0a 439 arguments[2] = NULL;
c6e47247 440 execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments);
83cc030f 441
456b2199 442 if (can_initrd) {
5a4bf02f
HH
443 r = switch_root_initramfs();
444 if (r >= 0) {
a2726e5c 445 argv[0] = (char*) "/shutdown";
30d743f4 446
5a4bf02f
HH
447 setsid();
448 make_console_stdio();
449
450 log_info("Successfully changed into root pivot.\n"
451 "Returning to initrd...");
30d743f4 452
a2726e5c 453 execv("/shutdown", argv);
56f64d95 454 log_error_errno(errno, "Failed to execute shutdown binary: %m");
5a4bf02f 455 } else
da927ba9 456 log_error_errno(r, "Failed to switch root to \"/run/initramfs\": %m");
5a4bf02f 457
7cb1094a
HH
458 }
459
8c977838
ZJS
460 if (need_umount || need_swapoff || need_loop_detach || need_dm_detach)
461 log_error("Failed to finalize %s%s%s%s ignoring",
462 need_umount ? " file systems," : "",
463 need_swapoff ? " swap devices," : "",
464 need_loop_detach ? " loop devices," : "",
465 need_dm_detach ? " DM devices," : "");
466
2e79d182
LP
467 /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be
468 * sync'ed explicitly in advance. So let's do this here, but not needlessly slow down containers. Note that we
469 * sync'ed things already once above, but we did some more work since then which might have caused IO, hence
73ad712f 470 * let's do it once more. Do not remove this sync, data corruption will result. */
0049f05a 471 if (!in_container)
73ad712f 472 sync_with_progress();
0049f05a 473
287419c1
AC
474 if (streq(arg_verb, "exit")) {
475 if (in_container)
1f409a0c
LP
476 return arg_exit_code;
477
478 cmd = RB_POWER_OFF; /* We cannot exit() on the host, fallback on another method. */
287419c1
AC
479 }
480
477def80
LP
481 switch (cmd) {
482
483 case LINUX_REBOOT_CMD_KEXEC:
cb7ec564
LP
484
485 if (!in_container) {
486 /* We cheat and exec kexec to avoid doing all its work */
477def80 487 log_info("Rebooting with kexec.");
cb7ec564 488
1f5d1e02 489 r = safe_fork("(sd-kexec)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_LOG|FORK_WAIT, NULL);
4c253ed1 490 if (r == 0) {
477def80
LP
491 const char * const args[] = {
492 KEXEC, "-e", NULL
493 };
494
cb7ec564 495 /* Child */
477def80 496
cb7ec564 497 execv(args[0], (char * const *) args);
477def80 498 _exit(EXIT_FAILURE);
4c253ed1
LP
499 }
500
1f5d1e02 501 /* If we are still running, then the kexec can't have worked, let's fall through */
b1b2a107 502 }
e61cd186
LP
503
504 cmd = RB_AUTOBOOT;
4831981d 505 _fallthrough_;
477def80 506
c01dcddf
LP
507 case RB_AUTOBOOT:
508 (void) reboot_with_parameter(REBOOT_LOG);
477def80
LP
509 log_info("Rebooting.");
510 break;
511
512 case RB_POWER_OFF:
513 log_info("Powering off.");
514 break;
515
516 case RB_HALT_SYSTEM:
517 log_info("Halting system.");
518 break;
519
520 default:
521 assert_not_reached("Unknown magic");
522 }
cb7ec564 523
118cf952 524 (void) reboot(cmd);
cb7ec564
LP
525 if (errno == EPERM && in_container) {
526 /* If we are in a container, and we lacked
527 * CAP_SYS_BOOT just exit, this will kill our
528 * container for good. */
477def80 529 log_info("Exiting container.");
1f409a0c 530 return EXIT_SUCCESS;
cb7ec564
LP
531 }
532
76ef789d 533 r = log_error_errno(errno, "Failed to invoke reboot(): %m");
b1b2a107
FF
534
535 error:
da927ba9 536 log_emergency_errno(r, "Critical error while doing system shutdown: %m");
b1b2a107 537 freeze();
b1b2a107 538}