1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 ProFUSION embedded systems
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <linux/reboot.h>
28 #include <sys/mount.h>
29 #include <sys/reboot.h>
33 #include "alloc-util.h"
35 #include "cgroup-util.h"
37 #include "exec-util.h"
43 #include "parse-util.h"
44 #include "process-util.h"
45 #include "reboot-util.h"
46 #include "signal-util.h"
47 #include "string-util.h"
48 #include "switch-root.h"
49 #include "terminal-util.h"
55 #define FINALIZE_ATTEMPTS 50
57 #define SYNC_PROGRESS_ATTEMPTS 3
58 #define SYNC_TIMEOUT_USEC (10*USEC_PER_SEC)
60 static char* arg_verb
;
61 static uint8_t arg_exit_code
;
62 static usec_t arg_timeout
= DEFAULT_TIMEOUT_USEC
;
64 static int parse_argv(int argc
, char *argv
[]) {
66 ARG_LOG_LEVEL
= 0x100,
74 static const struct option options
[] = {
75 { "log-level", required_argument
, NULL
, ARG_LOG_LEVEL
},
76 { "log-target", required_argument
, NULL
, ARG_LOG_TARGET
},
77 { "log-color", optional_argument
, NULL
, ARG_LOG_COLOR
},
78 { "log-location", optional_argument
, NULL
, ARG_LOG_LOCATION
},
79 { "exit-code", required_argument
, NULL
, ARG_EXIT_CODE
},
80 { "timeout", required_argument
, NULL
, ARG_TIMEOUT
},
89 /* "-" prevents getopt from permuting argv[] and moving the verb away
90 * from argv[1]. Our interface to initrd promises it'll be there. */
91 while ((c
= getopt_long(argc
, argv
, "-", options
, NULL
)) >= 0)
95 r
= log_set_max_level_from_string(optarg
);
97 log_error_errno(r
, "Failed to parse log level %s, ignoring.", optarg
);
102 r
= log_set_target_from_string(optarg
);
104 log_error_errno(r
, "Failed to parse log target %s, ignoring", optarg
);
111 r
= log_show_color_from_string(optarg
);
113 log_error_errno(r
, "Failed to parse log color setting %s, ignoring", optarg
);
115 log_show_color(true);
119 case ARG_LOG_LOCATION
:
121 r
= log_show_location_from_string(optarg
);
123 log_error_errno(r
, "Failed to parse log location setting %s, ignoring", optarg
);
125 log_show_location(true);
130 r
= safe_atou8(optarg
, &arg_exit_code
);
132 log_error_errno(r
, "Failed to parse exit code %s, ignoring", optarg
);
137 r
= parse_sec(optarg
, &arg_timeout
);
139 log_error_errno(r
, "Failed to parse shutdown timeout %s, ignoring", optarg
);
147 log_error("Excess arguments, ignoring");
154 assert_not_reached("Unhandled option code.");
158 log_error("Verb argument missing.");
165 static int switch_root_initramfs(void) {
166 if (mount("/run/initramfs", "/run/initramfs", NULL
, MS_BIND
, NULL
) < 0)
167 return log_error_errno(errno
, "Failed to mount bind /run/initramfs on /run/initramfs: %m");
169 if (mount(NULL
, "/run/initramfs", NULL
, MS_PRIVATE
, NULL
) < 0)
170 return log_error_errno(errno
, "Failed to make /run/initramfs private mount: %m");
172 /* switch_root with MS_BIND, because there might still be processes lurking around, which have open file descriptors.
173 * /run/initramfs/shutdown will take care of these.
174 * Also do not detach the old root, because /run/initramfs/shutdown needs to access it.
176 return switch_root("/run/initramfs", "/oldroot", false, MS_BIND
);
179 /* Read the following fields from /proc/meminfo:
185 * Return true if the sum of these fields is greater than the previous
186 * value input. For all other issues, report the failure and indicate that
187 * the sync is not making progress.
189 static bool sync_making_progress(unsigned long long *prev_dirty
) {
190 _cleanup_fclose_
FILE *f
= NULL
;
193 unsigned long long val
= 0;
195 f
= fopen("/proc/meminfo", "re");
197 return log_warning_errno(errno
, "Failed to open /proc/meminfo: %m");
199 FOREACH_LINE(line
, f
, log_warning_errno(errno
, "Failed to parse /proc/meminfo: %m")) {
200 unsigned long long ull
= 0;
202 if (!first_word(line
, "NFS_Unstable:") && !first_word(line
, "Writeback:") && !first_word(line
, "Dirty:"))
206 if (sscanf(line
, "%*s %llu %*s", &ull
) != 1) {
208 log_warning_errno(errno
, "Failed to parse /proc/meminfo: %m");
210 log_warning("Failed to parse /proc/meminfo");
218 r
= *prev_dirty
> val
;
225 static void sync_with_progress(void) {
226 unsigned long long dirty
= ULONG_LONG_MAX
;
231 BLOCK_SIGNALS(SIGCHLD
);
233 /* Due to the possiblity of the sync operation hanging, we fork a child process and monitor the progress. If
234 * the timeout lapses, the assumption is that that particular sync stalled. */
236 r
= asynchronous_sync(&pid
);
238 log_error_errno(r
, "Failed to fork sync(): %m");
242 log_info("Syncing filesystems and block devices.");
244 /* Start monitoring the sync operation. If more than
245 * SYNC_PROGRESS_ATTEMPTS lapse without progress being made,
246 * we assume that the sync is stalled */
247 for (checks
= 0; checks
< SYNC_PROGRESS_ATTEMPTS
; checks
++) {
248 r
= wait_for_terminate_with_timeout(pid
, SYNC_TIMEOUT_USEC
);
250 /* Sync finished without error.
251 * (The sync itself does not return an error code) */
253 else if (r
== -ETIMEDOUT
) {
254 /* Reset the check counter if the "Dirty" value is
256 if (sync_making_progress(&dirty
))
259 log_error_errno(r
, "Failed to sync filesystems and block devices: %m");
264 /* Only reached in the event of a timeout. We should issue a kill
265 * to the stray process. */
266 log_error("Syncing filesystems and block devices - timed out, issuing SIGKILL to PID "PID_FMT
".", pid
);
267 (void) kill(pid
, SIGKILL
);
270 int main(int argc
, char *argv
[]) {
271 bool need_umount
, need_swapoff
, need_loop_detach
, need_dm_detach
;
272 bool in_container
, use_watchdog
= false;
273 _cleanup_free_
char *cgroup
= NULL
;
277 static const char* const dirs
[] = {SYSTEM_SHUTDOWN_PATH
, NULL
};
278 char *watchdog_device
;
280 /* The log target defaults to console, but the original systemd process will pass its log target in through a
281 * command line argument, which will override this default. Also, ensure we'll never log to the journal or
282 * syslog, as these logging daemons are either already dead or will die very soon. */
284 log_set_target(LOG_TARGET_CONSOLE
);
285 log_set_prohibit_ipc(true);
286 log_parse_environment();
288 r
= parse_argv(argc
, argv
);
296 if (getpid_cached() != 1) {
297 log_error("Not executed by init (PID 1).");
302 if (streq(arg_verb
, "reboot"))
304 else if (streq(arg_verb
, "poweroff"))
306 else if (streq(arg_verb
, "halt"))
307 cmd
= RB_HALT_SYSTEM
;
308 else if (streq(arg_verb
, "kexec"))
309 cmd
= LINUX_REBOOT_CMD_KEXEC
;
310 else if (streq(arg_verb
, "exit"))
311 cmd
= 0; /* ignored, just checking that arg_verb is valid */
313 log_error("Unknown action '%s'.", arg_verb
);
318 (void) cg_get_root_path(&cgroup
);
319 in_container
= detect_container() > 0;
321 use_watchdog
= !!getenv("WATCHDOG_USEC");
322 watchdog_device
= getenv("WATCHDOG_DEVICE");
323 if (watchdog_device
) {
324 r
= watchdog_set_device(watchdog_device
);
326 log_warning_errno(r
, "Failed to set watchdog device to %s, ignoring: %m",
330 /* Lock us into memory */
331 (void) mlockall(MCL_CURRENT
|MCL_FUTURE
);
333 /* Synchronize everything that is not written to disk yet at this point already. This is a good idea so that
334 * slow IO is processed here already and the final process killing spree is not impacted by processes
335 * desperately trying to sync IO to disk within their timeout. Do not remove this sync, data corruption will
338 sync_with_progress();
342 log_info("Sending SIGTERM to remaining processes...");
343 broadcast_signal(SIGTERM
, true, true, arg_timeout
);
345 log_info("Sending SIGKILL to remaining processes...");
346 broadcast_signal(SIGKILL
, true, false, arg_timeout
);
348 need_umount
= !in_container
;
349 need_swapoff
= !in_container
;
350 need_loop_detach
= !in_container
;
351 need_dm_detach
= !in_container
;
353 /* Unmount all mountpoints, swaps, and loopback devices */
354 for (retries
= 0; retries
< FINALIZE_ATTEMPTS
; retries
++) {
355 bool changed
= false;
360 /* Let's trim the cgroup tree on each iteration so
361 that we leave an empty cgroup tree around, so that
362 container managers get a nice notify event when we
365 cg_trim(SYSTEMD_CGROUP_CONTROLLER
, cgroup
, false);
368 log_info("Unmounting file systems.");
369 r
= umount_all(&changed
);
372 log_info("All filesystems unmounted.");
374 log_info("Not all file systems unmounted, %d left.", r
);
376 log_error_errno(r
, "Failed to unmount file systems: %m");
380 log_info("Deactivating swaps.");
381 r
= swapoff_all(&changed
);
383 need_swapoff
= false;
384 log_info("All swaps deactivated.");
386 log_info("Not all swaps deactivated, %d left.", r
);
388 log_error_errno(r
, "Failed to deactivate swaps: %m");
391 if (need_loop_detach
) {
392 log_info("Detaching loop devices.");
393 r
= loopback_detach_all(&changed
);
395 need_loop_detach
= false;
396 log_info("All loop devices detached.");
398 log_info("Not all loop devices detached, %d left.", r
);
400 log_error_errno(r
, "Failed to detach loop devices: %m");
403 if (need_dm_detach
) {
404 log_info("Detaching DM devices.");
405 r
= dm_detach_all(&changed
);
407 need_dm_detach
= false;
408 log_info("All DM devices detached.");
410 log_info("Not all DM devices detached, %d left.", r
);
412 log_error_errno(r
, "Failed to detach DM devices: %m");
415 if (!need_umount
&& !need_swapoff
&& !need_loop_detach
&& !need_dm_detach
) {
417 log_info("All filesystems, swaps, loop devices, DM devices detached.");
422 /* If in this iteration we didn't manage to
423 * unmount/deactivate anything, we simply give up */
425 log_info("Cannot finalize remaining%s%s%s%s continuing.",
426 need_umount
? " file systems," : "",
427 need_swapoff
? " swap devices," : "",
428 need_loop_detach
? " loop devices," : "",
429 need_dm_detach
? " DM devices," : "");
433 log_debug("After %u retries, couldn't finalize remaining %s%s%s%s trying again.",
435 need_umount
? " file systems," : "",
436 need_swapoff
? " swap devices," : "",
437 need_loop_detach
? " loop devices," : "",
438 need_dm_detach
? " DM devices," : "");
441 log_error("Too many iterations, giving up.");
445 /* We're done with the watchdog. */
446 watchdog_free_device();
449 arguments
[1] = arg_verb
;
451 execute_directories(dirs
, DEFAULT_TIMEOUT_USEC
, NULL
, NULL
, arguments
);
453 if (!in_container
&& !in_initrd() &&
454 access("/run/initramfs/shutdown", X_OK
) == 0) {
455 r
= switch_root_initramfs();
457 argv
[0] = (char*) "/shutdown";
460 make_console_stdio();
462 log_info("Successfully changed into root pivot.\n"
463 "Returning to initrd...");
465 execv("/shutdown", argv
);
466 log_error_errno(errno
, "Failed to execute shutdown binary: %m");
468 log_error_errno(r
, "Failed to switch root to \"/run/initramfs\": %m");
472 if (need_umount
|| need_swapoff
|| need_loop_detach
|| need_dm_detach
)
473 log_error("Failed to finalize %s%s%s%s ignoring",
474 need_umount
? " file systems," : "",
475 need_swapoff
? " swap devices," : "",
476 need_loop_detach
? " loop devices," : "",
477 need_dm_detach
? " DM devices," : "");
479 /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be
480 * sync'ed explicitly in advance. So let's do this here, but not needlessly slow down containers. Note that we
481 * sync'ed things already once above, but we did some more work since then which might have caused IO, hence
482 * let's do it once more. Do not remove this sync, data corruption will result. */
484 sync_with_progress();
486 if (streq(arg_verb
, "exit")) {
488 return arg_exit_code
;
490 cmd
= RB_POWER_OFF
; /* We cannot exit() on the host, fallback on another method. */
495 case LINUX_REBOOT_CMD_KEXEC
:
498 /* We cheat and exec kexec to avoid doing all its work */
499 log_info("Rebooting with kexec.");
501 r
= safe_fork("(sd-kexec)", FORK_RESET_SIGNALS
|FORK_CLOSE_ALL_FDS
|FORK_LOG
|FORK_WAIT
, NULL
);
503 const char * const args
[] = {
509 execv(args
[0], (char * const *) args
);
513 /* If we are still running, then the kexec can't have worked, let's fall through */
520 (void) reboot_with_parameter(REBOOT_LOG
);
521 log_info("Rebooting.");
525 log_info("Powering off.");
529 log_info("Halting system.");
533 assert_not_reached("Unknown magic");
537 if (errno
== EPERM
&& in_container
) {
538 /* If we are in a container, and we lacked
539 * CAP_SYS_BOOT just exit, this will kill our
540 * container for good. */
541 log_info("Exiting container.");
545 r
= log_error_errno(errno
, "Failed to invoke reboot(): %m");
548 log_emergency_errno(r
, "Critical error while doing system shutdown: %m");