1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 ProFUSION embedded systems
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <linux/reboot.h>
28 #include <sys/mount.h>
29 #include <sys/reboot.h>
33 #include "alloc-util.h"
35 #include "cgroup-util.h"
37 #include "exec-util.h"
43 #include "parse-util.h"
44 #include "process-util.h"
45 #include "signal-util.h"
46 #include "string-util.h"
47 #include "switch-root.h"
48 #include "terminal-util.h"
54 #define FINALIZE_ATTEMPTS 50
56 #define SYNC_PROGRESS_ATTEMPTS 3
57 #define SYNC_TIMEOUT_USEC (10*USEC_PER_SEC)
59 static char* arg_verb
;
60 static uint8_t arg_exit_code
;
61 static usec_t arg_timeout
= DEFAULT_TIMEOUT_USEC
;
63 static int parse_argv(int argc
, char *argv
[]) {
65 ARG_LOG_LEVEL
= 0x100,
73 static const struct option options
[] = {
74 { "log-level", required_argument
, NULL
, ARG_LOG_LEVEL
},
75 { "log-target", required_argument
, NULL
, ARG_LOG_TARGET
},
76 { "log-color", optional_argument
, NULL
, ARG_LOG_COLOR
},
77 { "log-location", optional_argument
, NULL
, ARG_LOG_LOCATION
},
78 { "exit-code", required_argument
, NULL
, ARG_EXIT_CODE
},
79 { "timeout", required_argument
, NULL
, ARG_TIMEOUT
},
88 /* "-" prevents getopt from permuting argv[] and moving the verb away
89 * from argv[1]. Our interface to initrd promises it'll be there. */
90 while ((c
= getopt_long(argc
, argv
, "-", options
, NULL
)) >= 0)
94 r
= log_set_max_level_from_string(optarg
);
96 log_error_errno(r
, "Failed to parse log level %s, ignoring.", optarg
);
101 r
= log_set_target_from_string(optarg
);
103 log_error_errno(r
, "Failed to parse log target %s, ignoring", optarg
);
110 r
= log_show_color_from_string(optarg
);
112 log_error_errno(r
, "Failed to parse log color setting %s, ignoring", optarg
);
114 log_show_color(true);
118 case ARG_LOG_LOCATION
:
120 r
= log_show_location_from_string(optarg
);
122 log_error_errno(r
, "Failed to parse log location setting %s, ignoring", optarg
);
124 log_show_location(true);
129 r
= safe_atou8(optarg
, &arg_exit_code
);
131 log_error_errno(r
, "Failed to parse exit code %s, ignoring", optarg
);
136 r
= parse_sec(optarg
, &arg_timeout
);
138 log_error_errno(r
, "Failed to parse shutdown timeout %s, ignoring", optarg
);
146 log_error("Excess arguments, ignoring");
153 assert_not_reached("Unhandled option code.");
157 log_error("Verb argument missing.");
164 static int switch_root_initramfs(void) {
165 if (mount("/run/initramfs", "/run/initramfs", NULL
, MS_BIND
, NULL
) < 0)
166 return log_error_errno(errno
, "Failed to mount bind /run/initramfs on /run/initramfs: %m");
168 if (mount(NULL
, "/run/initramfs", NULL
, MS_PRIVATE
, NULL
) < 0)
169 return log_error_errno(errno
, "Failed to make /run/initramfs private mount: %m");
171 /* switch_root with MS_BIND, because there might still be processes lurking around, which have open file descriptors.
172 * /run/initramfs/shutdown will take care of these.
173 * Also do not detach the old root, because /run/initramfs/shutdown needs to access it.
175 return switch_root("/run/initramfs", "/oldroot", false, MS_BIND
);
178 /* Read the following fields from /proc/meminfo:
184 * Return true if the sum of these fields is greater than the previous
185 * value input. For all other issues, report the failure and indicate that
186 * the sync is not making progress.
188 static bool sync_making_progress(unsigned long long *prev_dirty
) {
189 _cleanup_fclose_
FILE *f
= NULL
;
192 unsigned long long val
= 0;
194 f
= fopen("/proc/meminfo", "re");
196 return log_warning_errno(errno
, "Failed to open /proc/meminfo: %m");
198 FOREACH_LINE(line
, f
, log_warning_errno(errno
, "Failed to parse /proc/meminfo: %m")) {
199 unsigned long long ull
= 0;
201 if (!first_word(line
, "NFS_Unstable:") && !first_word(line
, "Writeback:") && !first_word(line
, "Dirty:"))
205 if (sscanf(line
, "%*s %llu %*s", &ull
) != 1) {
207 log_warning_errno(errno
, "Failed to parse /proc/meminfo: %m");
209 log_warning("Failed to parse /proc/meminfo");
217 r
= *prev_dirty
> val
;
224 static void sync_with_progress(void) {
225 unsigned long long dirty
= ULONG_LONG_MAX
;
230 BLOCK_SIGNALS(SIGCHLD
);
232 /* Due to the possiblity of the sync operation hanging, we fork a child process and monitor the progress. If
233 * the timeout lapses, the assumption is that that particular sync stalled. */
235 r
= asynchronous_sync(&pid
);
237 log_error_errno(r
, "Failed to fork sync(): %m");
241 log_info("Syncing filesystems and block devices.");
243 /* Start monitoring the sync operation. If more than
244 * SYNC_PROGRESS_ATTEMPTS lapse without progress being made,
245 * we assume that the sync is stalled */
246 for (checks
= 0; checks
< SYNC_PROGRESS_ATTEMPTS
; checks
++) {
247 r
= wait_for_terminate_with_timeout(pid
, SYNC_TIMEOUT_USEC
);
249 /* Sync finished without error.
250 * (The sync itself does not return an error code) */
252 else if (r
== -ETIMEDOUT
) {
253 /* Reset the check counter if the "Dirty" value is
255 if (sync_making_progress(&dirty
))
258 log_error_errno(r
, "Failed to sync filesystems and block devices: %m");
263 /* Only reached in the event of a timeout. We should issue a kill
264 * to the stray process. */
265 log_error("Syncing filesystems and block devices - timed out, issuing SIGKILL to PID "PID_FMT
".", pid
);
266 (void) kill(pid
, SIGKILL
);
269 int main(int argc
, char *argv
[]) {
270 bool need_umount
, need_swapoff
, need_loop_detach
, need_dm_detach
;
271 bool in_container
, use_watchdog
= false;
272 _cleanup_free_
char *cgroup
= NULL
;
276 static const char* const dirs
[] = {SYSTEM_SHUTDOWN_PATH
, NULL
};
277 char *watchdog_device
;
279 /* The log target defaults to console, but the original systemd process will pass its log target in through a
280 * command line argument, which will override this default. Also, ensure we'll never log to the journal or
281 * syslog, as these logging daemons are either already dead or will die very soon. */
283 log_set_target(LOG_TARGET_CONSOLE
);
284 log_set_prohibit_ipc(true);
285 log_parse_environment();
287 r
= parse_argv(argc
, argv
);
295 if (getpid_cached() != 1) {
296 log_error("Not executed by init (PID 1).");
301 if (streq(arg_verb
, "reboot"))
303 else if (streq(arg_verb
, "poweroff"))
305 else if (streq(arg_verb
, "halt"))
306 cmd
= RB_HALT_SYSTEM
;
307 else if (streq(arg_verb
, "kexec"))
308 cmd
= LINUX_REBOOT_CMD_KEXEC
;
309 else if (streq(arg_verb
, "exit"))
310 cmd
= 0; /* ignored, just checking that arg_verb is valid */
312 log_error("Unknown action '%s'.", arg_verb
);
317 (void) cg_get_root_path(&cgroup
);
318 in_container
= detect_container() > 0;
320 use_watchdog
= !!getenv("WATCHDOG_USEC");
321 watchdog_device
= getenv("WATCHDOG_DEVICE");
322 if (watchdog_device
) {
323 r
= watchdog_set_device(watchdog_device
);
325 log_warning_errno(r
, "Failed to set watchdog device to %s, ignoring: %m",
329 /* Lock us into memory */
330 (void) mlockall(MCL_CURRENT
|MCL_FUTURE
);
332 /* Synchronize everything that is not written to disk yet at this point already. This is a good idea so that
333 * slow IO is processed here already and the final process killing spree is not impacted by processes
334 * desperately trying to sync IO to disk within their timeout. Do not remove this sync, data corruption will
337 sync_with_progress();
341 log_info("Sending SIGTERM to remaining processes...");
342 broadcast_signal(SIGTERM
, true, true, arg_timeout
);
344 log_info("Sending SIGKILL to remaining processes...");
345 broadcast_signal(SIGKILL
, true, false, arg_timeout
);
347 need_umount
= !in_container
;
348 need_swapoff
= !in_container
;
349 need_loop_detach
= !in_container
;
350 need_dm_detach
= !in_container
;
352 /* Unmount all mountpoints, swaps, and loopback devices */
353 for (retries
= 0; retries
< FINALIZE_ATTEMPTS
; retries
++) {
354 bool changed
= false;
359 /* Let's trim the cgroup tree on each iteration so
360 that we leave an empty cgroup tree around, so that
361 container managers get a nice notify event when we
364 cg_trim(SYSTEMD_CGROUP_CONTROLLER
, cgroup
, false);
367 log_info("Unmounting file systems.");
368 r
= umount_all(&changed
);
371 log_info("All filesystems unmounted.");
373 log_info("Not all file systems unmounted, %d left.", r
);
375 log_error_errno(r
, "Failed to unmount file systems: %m");
379 log_info("Deactivating swaps.");
380 r
= swapoff_all(&changed
);
382 need_swapoff
= false;
383 log_info("All swaps deactivated.");
385 log_info("Not all swaps deactivated, %d left.", r
);
387 log_error_errno(r
, "Failed to deactivate swaps: %m");
390 if (need_loop_detach
) {
391 log_info("Detaching loop devices.");
392 r
= loopback_detach_all(&changed
);
394 need_loop_detach
= false;
395 log_info("All loop devices detached.");
397 log_info("Not all loop devices detached, %d left.", r
);
399 log_error_errno(r
, "Failed to detach loop devices: %m");
402 if (need_dm_detach
) {
403 log_info("Detaching DM devices.");
404 r
= dm_detach_all(&changed
);
406 need_dm_detach
= false;
407 log_info("All DM devices detached.");
409 log_info("Not all DM devices detached, %d left.", r
);
411 log_error_errno(r
, "Failed to detach DM devices: %m");
414 if (!need_umount
&& !need_swapoff
&& !need_loop_detach
&& !need_dm_detach
) {
416 log_info("All filesystems, swaps, loop devices, DM devices detached.");
421 /* If in this iteration we didn't manage to
422 * unmount/deactivate anything, we simply give up */
424 log_info("Cannot finalize remaining%s%s%s%s continuing.",
425 need_umount
? " file systems," : "",
426 need_swapoff
? " swap devices," : "",
427 need_loop_detach
? " loop devices," : "",
428 need_dm_detach
? " DM devices," : "");
432 log_debug("After %u retries, couldn't finalize remaining %s%s%s%s trying again.",
434 need_umount
? " file systems," : "",
435 need_swapoff
? " swap devices," : "",
436 need_loop_detach
? " loop devices," : "",
437 need_dm_detach
? " DM devices," : "");
440 log_error("Too many iterations, giving up.");
444 /* We're done with the watchdog. */
445 watchdog_free_device();
448 arguments
[1] = arg_verb
;
450 execute_directories(dirs
, DEFAULT_TIMEOUT_USEC
, NULL
, NULL
, arguments
);
452 if (!in_container
&& !in_initrd() &&
453 access("/run/initramfs/shutdown", X_OK
) == 0) {
454 r
= switch_root_initramfs();
456 argv
[0] = (char*) "/shutdown";
459 make_console_stdio();
461 log_info("Successfully changed into root pivot.\n"
462 "Returning to initrd...");
464 execv("/shutdown", argv
);
465 log_error_errno(errno
, "Failed to execute shutdown binary: %m");
467 log_error_errno(r
, "Failed to switch root to \"/run/initramfs\": %m");
471 if (need_umount
|| need_swapoff
|| need_loop_detach
|| need_dm_detach
)
472 log_error("Failed to finalize %s%s%s%s ignoring",
473 need_umount
? " file systems," : "",
474 need_swapoff
? " swap devices," : "",
475 need_loop_detach
? " loop devices," : "",
476 need_dm_detach
? " DM devices," : "");
478 /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be
479 * sync'ed explicitly in advance. So let's do this here, but not needlessly slow down containers. Note that we
480 * sync'ed things already once above, but we did some more work since then which might have caused IO, hence
481 * let's do it once more. Do not remove this sync, data corruption will result. */
483 sync_with_progress();
485 if (streq(arg_verb
, "exit")) {
489 /* We cannot exit() on the host, fallback on another
497 case LINUX_REBOOT_CMD_KEXEC
:
500 /* We cheat and exec kexec to avoid doing all its work */
501 log_info("Rebooting with kexec.");
503 r
= safe_fork("(sd-kexec)", FORK_RESET_SIGNALS
|FORK_CLOSE_ALL_FDS
|FORK_LOG
|FORK_WAIT
, NULL
);
505 const char * const args
[] = {
511 execv(args
[0], (char * const *) args
);
515 /* If we are still running, then the kexec can't have worked, let's fall through */
523 _cleanup_free_
char *param
= NULL
;
525 r
= read_one_line_file("/run/systemd/reboot-param", ¶m
);
526 if (r
< 0 && r
!= -ENOENT
)
527 log_warning_errno(r
, "Failed to read reboot parameter file: %m");
529 if (!isempty(param
)) {
530 log_info("Rebooting with argument '%s'.", param
);
531 syscall(SYS_reboot
, LINUX_REBOOT_MAGIC1
, LINUX_REBOOT_MAGIC2
, LINUX_REBOOT_CMD_RESTART2
, param
);
532 log_warning_errno(errno
, "Failed to reboot with parameter, retrying without: %m");
536 log_info("Rebooting.");
540 log_info("Powering off.");
544 log_info("Halting system.");
548 assert_not_reached("Unknown magic");
552 if (errno
== EPERM
&& in_container
) {
553 /* If we are in a container, and we lacked
554 * CAP_SYS_BOOT just exit, this will kill our
555 * container for good. */
556 log_info("Exiting container.");
560 r
= log_error_errno(errno
, "Failed to invoke reboot(): %m");
563 log_emergency_errno(r
, "Critical error while doing system shutdown: %m");