]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/shutdown.c
coccinelle: make use of SYNTHETIC_ERRNO
[thirdparty/systemd.git] / src / core / shutdown.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
b1b2a107 2/***
96b2fb93 3 Copyright © 2010 ProFUSION embedded systems
b1b2a107
FF
4***/
5
b1b2a107 6#include <errno.h>
07630cea 7#include <getopt.h>
c01dcddf 8#include <linux/reboot.h>
b1b2a107
FF
9#include <signal.h>
10#include <stdbool.h>
11#include <stdlib.h>
07630cea
LP
12#include <sys/mman.h>
13#include <sys/mount.h>
14#include <sys/reboot.h>
15#include <sys/stat.h>
16#include <unistd.h>
b1b2a107 17
b5efdb8a 18#include "alloc-util.h"
d00c2631 19#include "async.h"
07630cea
LP
20#include "cgroup-util.h"
21#include "def.h"
89711996 22#include "exec-util.h"
d00c2631 23#include "fd-util.h"
ec26be51 24#include "fileio.h"
07630cea
LP
25#include "killall.h"
26#include "log.h"
27#include "missing.h"
6bedfcbb 28#include "parse-util.h"
07630cea 29#include "process-util.h"
c01dcddf 30#include "reboot-util.h"
73ad712f 31#include "signal-util.h"
07630cea
LP
32#include "string-util.h"
33#include "switch-root.h"
34#include "terminal-util.h"
b1b2a107
FF
35#include "umount.h"
36#include "util.h"
b52aae1d 37#include "virt.h"
e96d6be7 38#include "watchdog.h"
b1b2a107 39
73ad712f
KW
40#define SYNC_PROGRESS_ATTEMPTS 3
41#define SYNC_TIMEOUT_USEC (10*USEC_PER_SEC)
42
b1e90ec5 43static char* arg_verb;
287419c1 44static uint8_t arg_exit_code;
e73c54b8 45static usec_t arg_timeout = DEFAULT_TIMEOUT_USEC;
b1e90ec5
ZJS
46
47static int parse_argv(int argc, char *argv[]) {
48 enum {
49 ARG_LOG_LEVEL = 0x100,
50 ARG_LOG_TARGET,
51 ARG_LOG_COLOR,
52 ARG_LOG_LOCATION,
287419c1 53 ARG_EXIT_CODE,
e73c54b8 54 ARG_TIMEOUT,
b1e90ec5
ZJS
55 };
56
57 static const struct option options[] = {
58 { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
59 { "log-target", required_argument, NULL, ARG_LOG_TARGET },
60 { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
61 { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
287419c1 62 { "exit-code", required_argument, NULL, ARG_EXIT_CODE },
e73c54b8 63 { "timeout", required_argument, NULL, ARG_TIMEOUT },
b1e90ec5
ZJS
64 {}
65 };
66
67 int c, r;
68
69 assert(argc >= 1);
70 assert(argv);
71
4b5d8d0f
MS
72 /* "-" prevents getopt from permuting argv[] and moving the verb away
73 * from argv[1]. Our interface to initrd promises it'll be there. */
74 while ((c = getopt_long(argc, argv, "-", options, NULL)) >= 0)
b1e90ec5
ZJS
75 switch (c) {
76
77 case ARG_LOG_LEVEL:
78 r = log_set_max_level_from_string(optarg);
79 if (r < 0)
5e1ee764 80 log_error_errno(r, "Failed to parse log level %s, ignoring: %m", optarg);
b1e90ec5
ZJS
81
82 break;
83
84 case ARG_LOG_TARGET:
85 r = log_set_target_from_string(optarg);
86 if (r < 0)
5e1ee764 87 log_error_errno(r, "Failed to parse log target %s, ignoring: %m", optarg);
b1e90ec5
ZJS
88
89 break;
90
91 case ARG_LOG_COLOR:
92
93 if (optarg) {
94 r = log_show_color_from_string(optarg);
95 if (r < 0)
5e1ee764 96 log_error_errno(r, "Failed to parse log color setting %s, ignoring: %m", optarg);
b1e90ec5
ZJS
97 } else
98 log_show_color(true);
99
100 break;
101
102 case ARG_LOG_LOCATION:
103 if (optarg) {
104 r = log_show_location_from_string(optarg);
105 if (r < 0)
5e1ee764 106 log_error_errno(r, "Failed to parse log location setting %s, ignoring: %m", optarg);
b1e90ec5
ZJS
107 } else
108 log_show_location(true);
109
110 break;
111
287419c1
AC
112 case ARG_EXIT_CODE:
113 r = safe_atou8(optarg, &arg_exit_code);
114 if (r < 0)
5e1ee764 115 log_error_errno(r, "Failed to parse exit code %s, ignoring: %m", optarg);
287419c1
AC
116
117 break;
118
e73c54b8
JK
119 case ARG_TIMEOUT:
120 r = parse_sec(optarg, &arg_timeout);
121 if (r < 0)
5e1ee764 122 log_error_errno(r, "Failed to parse shutdown timeout %s, ignoring: %m", optarg);
e73c54b8
JK
123
124 break;
125
4b5d8d0f
MS
126 case '\001':
127 if (!arg_verb)
128 arg_verb = optarg;
129 else
130 log_error("Excess arguments, ignoring");
131 break;
132
b1e90ec5 133 case '?':
b1e90ec5
ZJS
134 return -EINVAL;
135
136 default:
137 assert_not_reached("Unhandled option code.");
138 }
139
baaa35ad
ZJS
140 if (!arg_verb)
141 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
142 "Verb argument missing.");
b1e90ec5 143
b1e90ec5
ZJS
144 return 0;
145}
146
5a4bf02f 147static int switch_root_initramfs(void) {
4a62c710
MS
148 if (mount("/run/initramfs", "/run/initramfs", NULL, MS_BIND, NULL) < 0)
149 return log_error_errno(errno, "Failed to mount bind /run/initramfs on /run/initramfs: %m");
89d471d5 150
4a62c710
MS
151 if (mount(NULL, "/run/initramfs", NULL, MS_PRIVATE, NULL) < 0)
152 return log_error_errno(errno, "Failed to make /run/initramfs private mount: %m");
89d471d5 153
f131770b 154 /* switch_root with MS_BIND, because there might still be processes lurking around, which have open file descriptors.
5a4bf02f
HH
155 * /run/initramfs/shutdown will take care of these.
156 * Also do not detach the old root, because /run/initramfs/shutdown needs to access it.
157 */
158 return switch_root("/run/initramfs", "/oldroot", false, MS_BIND);
7cb1094a
HH
159}
160
73ad712f
KW
161/* Read the following fields from /proc/meminfo:
162 *
163 * NFS_Unstable
164 * Writeback
165 * Dirty
166 *
167 * Return true if the sum of these fields is greater than the previous
168 * value input. For all other issues, report the failure and indicate that
169 * the sync is not making progress.
170 */
171static bool sync_making_progress(unsigned long long *prev_dirty) {
172 _cleanup_fclose_ FILE *f = NULL;
73ad712f 173 unsigned long long val = 0;
a34f0dae 174 bool r = false;
73ad712f
KW
175
176 f = fopen("/proc/meminfo", "re");
177 if (!f)
178 return log_warning_errno(errno, "Failed to open /proc/meminfo: %m");
179
a34f0dae
LP
180 for (;;) {
181 _cleanup_free_ char *line = NULL;
73ad712f 182 unsigned long long ull = 0;
a34f0dae
LP
183 int q;
184
185 q = read_line(f, LONG_LINE_MAX, &line);
186 if (q < 0)
187 return log_warning_errno(q, "Failed to parse /proc/meminfo: %m");
188 if (q == 0)
189 break;
73ad712f
KW
190
191 if (!first_word(line, "NFS_Unstable:") && !first_word(line, "Writeback:") && !first_word(line, "Dirty:"))
192 continue;
193
194 errno = 0;
195 if (sscanf(line, "%*s %llu %*s", &ull) != 1) {
196 if (errno != 0)
197 log_warning_errno(errno, "Failed to parse /proc/meminfo: %m");
198 else
199 log_warning("Failed to parse /proc/meminfo");
200
201 return false;
202 }
203
204 val += ull;
205 }
206
207 r = *prev_dirty > val;
208
209 *prev_dirty = val;
210
211 return r;
212}
213
214static void sync_with_progress(void) {
d00c2631 215 unsigned long long dirty = ULONG_LONG_MAX;
73ad712f
KW
216 unsigned checks;
217 pid_t pid;
218 int r;
73ad712f
KW
219
220 BLOCK_SIGNALS(SIGCHLD);
221
d00c2631
LP
222 /* Due to the possiblity of the sync operation hanging, we fork a child process and monitor the progress. If
223 * the timeout lapses, the assumption is that that particular sync stalled. */
224
225 r = asynchronous_sync(&pid);
4c253ed1 226 if (r < 0) {
d00c2631 227 log_error_errno(r, "Failed to fork sync(): %m");
73ad712f
KW
228 return;
229 }
73ad712f
KW
230
231 log_info("Syncing filesystems and block devices.");
232
233 /* Start monitoring the sync operation. If more than
234 * SYNC_PROGRESS_ATTEMPTS lapse without progress being made,
235 * we assume that the sync is stalled */
236 for (checks = 0; checks < SYNC_PROGRESS_ATTEMPTS; checks++) {
237 r = wait_for_terminate_with_timeout(pid, SYNC_TIMEOUT_USEC);
238 if (r == 0)
239 /* Sync finished without error.
240 * (The sync itself does not return an error code) */
241 return;
242 else if (r == -ETIMEDOUT) {
243 /* Reset the check counter if the "Dirty" value is
244 * decreasing */
245 if (sync_making_progress(&dirty))
246 checks = 0;
247 } else {
248 log_error_errno(r, "Failed to sync filesystems and block devices: %m");
249 return;
250 }
251 }
252
253 /* Only reached in the event of a timeout. We should issue a kill
254 * to the stray process. */
255 log_error("Syncing filesystems and block devices - timed out, issuing SIGKILL to PID "PID_FMT".", pid);
256 (void) kill(pid, SIGKILL);
257}
258
b1b2a107 259int main(int argc, char *argv[]) {
8c977838 260 bool need_umount, need_swapoff, need_loop_detach, need_dm_detach;
456b2199 261 bool in_container, use_watchdog = false, can_initrd;
06beed6d 262 _cleanup_free_ char *cgroup = NULL;
6edd7d0a 263 char *arguments[3];
456b2199 264 int cmd, r, umount_log_level = LOG_INFO;
e801700e 265 static const char* const dirs[] = {SYSTEM_SHUTDOWN_PATH, NULL};
8a2c1fbf 266 char *watchdog_device;
b1b2a107 267
e18805fb
LP
268 /* The log target defaults to console, but the original systemd process will pass its log target in through a
269 * command line argument, which will override this default. Also, ensure we'll never log to the journal or
270 * syslog, as these logging daemons are either already dead or will die very soon. */
271
272 log_set_target(LOG_TARGET_CONSOLE);
273 log_set_prohibit_ipc(true);
b1e90ec5 274 log_parse_environment();
e18805fb 275
b1e90ec5
ZJS
276 r = parse_argv(argc, argv);
277 if (r < 0)
278 goto error;
ec26be51 279
b1b2a107
FF
280 log_open();
281
4c12626c
LP
282 umask(0022);
283
df0ff127 284 if (getpid_cached() != 1) {
b1e90ec5 285 log_error("Not executed by init (PID 1).");
b1b2a107
FF
286 r = -EPERM;
287 goto error;
288 }
289
b1e90ec5 290 if (streq(arg_verb, "reboot"))
b1b2a107 291 cmd = RB_AUTOBOOT;
b1e90ec5 292 else if (streq(arg_verb, "poweroff"))
b1b2a107 293 cmd = RB_POWER_OFF;
b1e90ec5 294 else if (streq(arg_verb, "halt"))
b1b2a107 295 cmd = RB_HALT_SYSTEM;
b1e90ec5 296 else if (streq(arg_verb, "kexec"))
b1b2a107 297 cmd = LINUX_REBOOT_CMD_KEXEC;
287419c1
AC
298 else if (streq(arg_verb, "exit"))
299 cmd = 0; /* ignored, just checking that arg_verb is valid */
b1b2a107 300 else {
b1e90ec5 301 log_error("Unknown action '%s'.", arg_verb);
e18805fb 302 r = -EINVAL;
b1b2a107
FF
303 goto error;
304 }
305
0b9aa270 306 (void) cg_get_root_path(&cgroup);
2e79d182 307 in_container = detect_container() > 0;
41f85451 308
5d904a6a 309 use_watchdog = getenv("WATCHDOG_USEC");
8a2c1fbf
EJ
310 watchdog_device = getenv("WATCHDOG_DEVICE");
311 if (watchdog_device) {
312 r = watchdog_set_device(watchdog_device);
313 if (r < 0)
314 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m",
315 watchdog_device);
316 }
e96d6be7 317
2e79d182 318 /* Lock us into memory */
e18805fb 319 (void) mlockall(MCL_CURRENT|MCL_FUTURE);
b1b2a107 320
2e79d182
LP
321 /* Synchronize everything that is not written to disk yet at this point already. This is a good idea so that
322 * slow IO is processed here already and the final process killing spree is not impacted by processes
73ad712f
KW
323 * desperately trying to sync IO to disk within their timeout. Do not remove this sync, data corruption will
324 * result. */
2e79d182 325 if (!in_container)
73ad712f 326 sync_with_progress();
2e79d182 327
e557b1a6 328 disable_coredumps();
27b372c1 329
ab58e291 330 log_info("Sending SIGTERM to remaining processes...");
e73c54b8 331 broadcast_signal(SIGTERM, true, true, arg_timeout);
b1b2a107 332
ab58e291 333 log_info("Sending SIGKILL to remaining processes...");
e73c54b8 334 broadcast_signal(SIGKILL, true, false, arg_timeout);
40e85d00 335
d89b5fed 336 need_umount = !in_container;
8c977838
ZJS
337 need_swapoff = !in_container;
338 need_loop_detach = !in_container;
339 need_dm_detach = !in_container;
456b2199 340 can_initrd = !in_container && !in_initrd() && access("/run/initramfs/shutdown", X_OK) == 0;
b1b2a107 341
567ea02a 342 /* Unmount all mountpoints, swaps, and loopback devices */
ac9cea5b 343 for (;;) {
12aad1d0
LP
344 bool changed = false;
345
e96d6be7
LP
346 if (use_watchdog)
347 watchdog_ping();
348
41f85451
LP
349 /* Let's trim the cgroup tree on each iteration so
350 that we leave an empty cgroup tree around, so that
351 container managers get a nice notify event when we
352 are down */
353 if (cgroup)
354 cg_trim(SYSTEMD_CGROUP_CONTROLLER, cgroup, false);
355
b1b2a107 356 if (need_umount) {
ab58e291 357 log_info("Unmounting file systems.");
456b2199 358 r = umount_all(&changed, umount_log_level);
bce93b7a 359 if (r == 0) {
b1b2a107 360 need_umount = false;
bce93b7a
MS
361 log_info("All filesystems unmounted.");
362 } else if (r > 0)
ab58e291 363 log_info("Not all file systems unmounted, %d left.", r);
b1b2a107 364 else
da927ba9 365 log_error_errno(r, "Failed to unmount file systems: %m");
b1b2a107
FF
366 }
367
368 if (need_swapoff) {
735e0712 369 log_info("Deactivating swaps.");
12aad1d0 370 r = swapoff_all(&changed);
bce93b7a 371 if (r == 0) {
b1b2a107 372 need_swapoff = false;
735e0712 373 log_info("All swaps deactivated.");
bce93b7a 374 } else if (r > 0)
735e0712 375 log_info("Not all swaps deactivated, %d left.", r);
b1b2a107 376 else
da927ba9 377 log_error_errno(r, "Failed to deactivate swaps: %m");
b1b2a107
FF
378 }
379
380 if (need_loop_detach) {
381 log_info("Detaching loop devices.");
456b2199 382 r = loopback_detach_all(&changed, umount_log_level);
bce93b7a 383 if (r == 0) {
b1b2a107 384 need_loop_detach = false;
bce93b7a
MS
385 log_info("All loop devices detached.");
386 } else if (r > 0)
ab58e291 387 log_info("Not all loop devices detached, %d left.", r);
b1b2a107 388 else
da927ba9 389 log_error_errno(r, "Failed to detach loop devices: %m");
d48141ba 390 }
b1b2a107 391
d48141ba
LP
392 if (need_dm_detach) {
393 log_info("Detaching DM devices.");
456b2199 394 r = dm_detach_all(&changed, umount_log_level);
bce93b7a 395 if (r == 0) {
d48141ba 396 need_dm_detach = false;
bce93b7a
MS
397 log_info("All DM devices detached.");
398 } else if (r > 0)
2569a5ce 399 log_info("Not all DM devices detached, %d left.", r);
d48141ba 400 else
da927ba9 401 log_error_errno(r, "Failed to detach DM devices: %m");
b1b2a107
FF
402 }
403
a27d2184 404 if (!need_umount && !need_swapoff && !need_loop_detach && !need_dm_detach) {
ac9cea5b 405 log_info("All filesystems, swaps, loop devices and DM devices detached.");
12aad1d0 406 /* Yay, done */
ac9cea5b 407 break;
a27d2184 408 }
b1b2a107 409
456b2199
JJ
410 if (!changed && umount_log_level == LOG_INFO && !can_initrd) {
411 /* There are things we cannot get rid of. Loop one more time
412 * with LOG_ERR to inform the user. Note that we don't need
413 * to do this if there is a initrd to switch to, because that
414 * one is likely to get rid of the remounting mounts. If not,
415 * it will log about them. */
416 umount_log_level = LOG_ERR;
417 continue;
418 }
419
12aad1d0 420 /* If in this iteration we didn't manage to
bd3fa1d2 421 * unmount/deactivate anything, we simply give up */
12aad1d0 422 if (!changed) {
8c977838
ZJS
423 log_info("Cannot finalize remaining%s%s%s%s continuing.",
424 need_umount ? " file systems," : "",
425 need_swapoff ? " swap devices," : "",
426 need_loop_detach ? " loop devices," : "",
427 need_dm_detach ? " DM devices," : "");
ac9cea5b 428 break;
12aad1d0
LP
429 }
430
ac9cea5b 431 log_debug("Couldn't finalize remaining %s%s%s%s trying again.",
8c977838
ZJS
432 need_umount ? " file systems," : "",
433 need_swapoff ? " swap devices," : "",
434 need_loop_detach ? " loop devices," : "",
435 need_dm_detach ? " DM devices," : "");
b1b2a107
FF
436 }
437
8a2c1fbf
EJ
438 /* We're done with the watchdog. */
439 watchdog_free_device();
440
6edd7d0a 441 arguments[0] = NULL;
b1e90ec5 442 arguments[1] = arg_verb;
6edd7d0a 443 arguments[2] = NULL;
78ec1bb4 444 execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments, NULL);
83cc030f 445
456b2199 446 if (can_initrd) {
5a4bf02f
HH
447 r = switch_root_initramfs();
448 if (r >= 0) {
a2726e5c 449 argv[0] = (char*) "/shutdown";
30d743f4 450
5a4bf02f
HH
451 setsid();
452 make_console_stdio();
453
454 log_info("Successfully changed into root pivot.\n"
455 "Returning to initrd...");
30d743f4 456
a2726e5c 457 execv("/shutdown", argv);
56f64d95 458 log_error_errno(errno, "Failed to execute shutdown binary: %m");
5a4bf02f 459 } else
da927ba9 460 log_error_errno(r, "Failed to switch root to \"/run/initramfs\": %m");
5a4bf02f 461
7cb1094a
HH
462 }
463
8c977838
ZJS
464 if (need_umount || need_swapoff || need_loop_detach || need_dm_detach)
465 log_error("Failed to finalize %s%s%s%s ignoring",
466 need_umount ? " file systems," : "",
467 need_swapoff ? " swap devices," : "",
468 need_loop_detach ? " loop devices," : "",
469 need_dm_detach ? " DM devices," : "");
470
2e79d182
LP
471 /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be
472 * sync'ed explicitly in advance. So let's do this here, but not needlessly slow down containers. Note that we
473 * sync'ed things already once above, but we did some more work since then which might have caused IO, hence
73ad712f 474 * let's do it once more. Do not remove this sync, data corruption will result. */
0049f05a 475 if (!in_container)
73ad712f 476 sync_with_progress();
0049f05a 477
287419c1
AC
478 if (streq(arg_verb, "exit")) {
479 if (in_container)
1f409a0c
LP
480 return arg_exit_code;
481
482 cmd = RB_POWER_OFF; /* We cannot exit() on the host, fallback on another method. */
287419c1
AC
483 }
484
477def80
LP
485 switch (cmd) {
486
487 case LINUX_REBOOT_CMD_KEXEC:
cb7ec564
LP
488
489 if (!in_container) {
490 /* We cheat and exec kexec to avoid doing all its work */
477def80 491 log_info("Rebooting with kexec.");
cb7ec564 492
1f5d1e02 493 r = safe_fork("(sd-kexec)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_LOG|FORK_WAIT, NULL);
4c253ed1 494 if (r == 0) {
477def80
LP
495 const char * const args[] = {
496 KEXEC, "-e", NULL
497 };
498
cb7ec564 499 /* Child */
477def80 500
cb7ec564 501 execv(args[0], (char * const *) args);
477def80 502 _exit(EXIT_FAILURE);
4c253ed1
LP
503 }
504
1f5d1e02 505 /* If we are still running, then the kexec can't have worked, let's fall through */
b1b2a107 506 }
e61cd186
LP
507
508 cmd = RB_AUTOBOOT;
4831981d 509 _fallthrough_;
477def80 510
c01dcddf
LP
511 case RB_AUTOBOOT:
512 (void) reboot_with_parameter(REBOOT_LOG);
477def80
LP
513 log_info("Rebooting.");
514 break;
515
516 case RB_POWER_OFF:
517 log_info("Powering off.");
518 break;
519
520 case RB_HALT_SYSTEM:
521 log_info("Halting system.");
522 break;
523
524 default:
525 assert_not_reached("Unknown magic");
526 }
cb7ec564 527
118cf952 528 (void) reboot(cmd);
cb7ec564
LP
529 if (errno == EPERM && in_container) {
530 /* If we are in a container, and we lacked
531 * CAP_SYS_BOOT just exit, this will kill our
532 * container for good. */
477def80 533 log_info("Exiting container.");
1f409a0c 534 return EXIT_SUCCESS;
cb7ec564
LP
535 }
536
76ef789d 537 r = log_error_errno(errno, "Failed to invoke reboot(): %m");
b1b2a107
FF
538
539 error:
da927ba9 540 log_emergency_errno(r, "Critical error while doing system shutdown: %m");
b1b2a107 541 freeze();
b1b2a107 542}