1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 ProFUSION embedded systems
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <linux/reboot.h>
28 #include <sys/mount.h>
29 #include <sys/reboot.h>
33 #include "alloc-util.h"
34 #include "cgroup-util.h"
37 #include "exec-util.h"
42 #include "parse-util.h"
43 #include "process-util.h"
44 #include "signal-util.h"
45 #include "string-util.h"
46 #include "switch-root.h"
47 #include "terminal-util.h"
53 #define FINALIZE_ATTEMPTS 50
55 #define SYNC_PROGRESS_ATTEMPTS 3
56 #define SYNC_TIMEOUT_USEC (10*USEC_PER_SEC)
58 static char* arg_verb
;
59 static uint8_t arg_exit_code
;
61 static int parse_argv(int argc
, char *argv
[]) {
63 ARG_LOG_LEVEL
= 0x100,
70 static const struct option options
[] = {
71 { "log-level", required_argument
, NULL
, ARG_LOG_LEVEL
},
72 { "log-target", required_argument
, NULL
, ARG_LOG_TARGET
},
73 { "log-color", optional_argument
, NULL
, ARG_LOG_COLOR
},
74 { "log-location", optional_argument
, NULL
, ARG_LOG_LOCATION
},
75 { "exit-code", required_argument
, NULL
, ARG_EXIT_CODE
},
84 /* "-" prevents getopt from permuting argv[] and moving the verb away
85 * from argv[1]. Our interface to initrd promises it'll be there. */
86 while ((c
= getopt_long(argc
, argv
, "-", options
, NULL
)) >= 0)
90 r
= log_set_max_level_from_string(optarg
);
92 log_error("Failed to parse log level %s, ignoring.", optarg
);
97 r
= log_set_target_from_string(optarg
);
99 log_error("Failed to parse log target %s, ignoring", optarg
);
106 r
= log_show_color_from_string(optarg
);
108 log_error("Failed to parse log color setting %s, ignoring", optarg
);
110 log_show_color(true);
114 case ARG_LOG_LOCATION
:
116 r
= log_show_location_from_string(optarg
);
118 log_error("Failed to parse log location setting %s, ignoring", optarg
);
120 log_show_location(true);
125 r
= safe_atou8(optarg
, &arg_exit_code
);
127 log_error("Failed to parse exit code %s, ignoring", optarg
);
135 log_error("Excess arguments, ignoring");
142 assert_not_reached("Unhandled option code.");
146 log_error("Verb argument missing.");
153 static int switch_root_initramfs(void) {
154 if (mount("/run/initramfs", "/run/initramfs", NULL
, MS_BIND
, NULL
) < 0)
155 return log_error_errno(errno
, "Failed to mount bind /run/initramfs on /run/initramfs: %m");
157 if (mount(NULL
, "/run/initramfs", NULL
, MS_PRIVATE
, NULL
) < 0)
158 return log_error_errno(errno
, "Failed to make /run/initramfs private mount: %m");
160 /* switch_root with MS_BIND, because there might still be processes lurking around, which have open file descriptors.
161 * /run/initramfs/shutdown will take care of these.
162 * Also do not detach the old root, because /run/initramfs/shutdown needs to access it.
164 return switch_root("/run/initramfs", "/oldroot", false, MS_BIND
);
167 /* Read the following fields from /proc/meminfo:
173 * Return true if the sum of these fields is greater than the previous
174 * value input. For all other issues, report the failure and indicate that
175 * the sync is not making progress.
177 static bool sync_making_progress(unsigned long long *prev_dirty
) {
178 _cleanup_fclose_
FILE *f
= NULL
;
181 unsigned long long val
= 0;
183 f
= fopen("/proc/meminfo", "re");
185 return log_warning_errno(errno
, "Failed to open /proc/meminfo: %m");
187 FOREACH_LINE(line
, f
, log_warning_errno(errno
, "Failed to parse /proc/meminfo: %m")) {
188 unsigned long long ull
= 0;
190 if (!first_word(line
, "NFS_Unstable:") && !first_word(line
, "Writeback:") && !first_word(line
, "Dirty:"))
194 if (sscanf(line
, "%*s %llu %*s", &ull
) != 1) {
196 log_warning_errno(errno
, "Failed to parse /proc/meminfo: %m");
198 log_warning("Failed to parse /proc/meminfo");
206 r
= *prev_dirty
> val
;
213 static void sync_with_progress(void) {
217 unsigned long long dirty
= ULONG_LONG_MAX
;
219 BLOCK_SIGNALS(SIGCHLD
);
221 /* Due to the possiblity of the sync operation hanging, we fork
222 * a child process and monitor the progress. If the timeout
223 * lapses, the assumption is that that particular sync stalled. */
226 log_error_errno(errno
, "Failed to fork: %m");
231 /* Start the sync operation here in the child */
236 log_info("Syncing filesystems and block devices.");
238 /* Start monitoring the sync operation. If more than
239 * SYNC_PROGRESS_ATTEMPTS lapse without progress being made,
240 * we assume that the sync is stalled */
241 for (checks
= 0; checks
< SYNC_PROGRESS_ATTEMPTS
; checks
++) {
242 r
= wait_for_terminate_with_timeout(pid
, SYNC_TIMEOUT_USEC
);
244 /* Sync finished without error.
245 * (The sync itself does not return an error code) */
247 else if (r
== -ETIMEDOUT
) {
248 /* Reset the check counter if the "Dirty" value is
250 if (sync_making_progress(&dirty
))
253 log_error_errno(r
, "Failed to sync filesystems and block devices: %m");
258 /* Only reached in the event of a timeout. We should issue a kill
259 * to the stray process. */
260 log_error("Syncing filesystems and block devices - timed out, issuing SIGKILL to PID "PID_FMT
".", pid
);
261 (void) kill(pid
, SIGKILL
);
264 int main(int argc
, char *argv
[]) {
265 bool need_umount
, need_swapoff
, need_loop_detach
, need_dm_detach
;
266 bool in_container
, use_watchdog
= false;
267 _cleanup_free_
char *cgroup
= NULL
;
271 static const char* const dirs
[] = {SYSTEM_SHUTDOWN_PATH
, NULL
};
272 char *watchdog_device
;
274 log_parse_environment();
275 r
= parse_argv(argc
, argv
);
279 /* journald will die if not gone yet. The log target defaults
280 * to console, but may have been changed by command line options. */
282 log_close_console(); /* force reopen of /dev/console */
287 if (getpid_cached() != 1) {
288 log_error("Not executed by init (PID 1).");
293 if (streq(arg_verb
, "reboot"))
295 else if (streq(arg_verb
, "poweroff"))
297 else if (streq(arg_verb
, "halt"))
298 cmd
= RB_HALT_SYSTEM
;
299 else if (streq(arg_verb
, "kexec"))
300 cmd
= LINUX_REBOOT_CMD_KEXEC
;
301 else if (streq(arg_verb
, "exit"))
302 cmd
= 0; /* ignored, just checking that arg_verb is valid */
305 log_error("Unknown action '%s'.", arg_verb
);
309 (void) cg_get_root_path(&cgroup
);
310 in_container
= detect_container() > 0;
312 use_watchdog
= !!getenv("WATCHDOG_USEC");
313 watchdog_device
= getenv("WATCHDOG_DEVICE");
314 if (watchdog_device
) {
315 r
= watchdog_set_device(watchdog_device
);
317 log_warning_errno(r
, "Failed to set watchdog device to %s, ignoring: %m",
321 /* Lock us into memory */
322 mlockall(MCL_CURRENT
|MCL_FUTURE
);
324 /* Synchronize everything that is not written to disk yet at this point already. This is a good idea so that
325 * slow IO is processed here already and the final process killing spree is not impacted by processes
326 * desperately trying to sync IO to disk within their timeout. Do not remove this sync, data corruption will
329 sync_with_progress();
331 log_info("Sending SIGTERM to remaining processes...");
332 broadcast_signal(SIGTERM
, true, true);
334 log_info("Sending SIGKILL to remaining processes...");
335 broadcast_signal(SIGKILL
, true, false);
337 need_umount
= !in_container
;
338 need_swapoff
= !in_container
;
339 need_loop_detach
= !in_container
;
340 need_dm_detach
= !in_container
;
342 /* Unmount all mountpoints, swaps, and loopback devices */
343 for (retries
= 0; retries
< FINALIZE_ATTEMPTS
; retries
++) {
344 bool changed
= false;
349 /* Let's trim the cgroup tree on each iteration so
350 that we leave an empty cgroup tree around, so that
351 container managers get a nice notify event when we
354 cg_trim(SYSTEMD_CGROUP_CONTROLLER
, cgroup
, false);
357 log_info("Unmounting file systems.");
358 r
= umount_all(&changed
);
361 log_info("All filesystems unmounted.");
363 log_info("Not all file systems unmounted, %d left.", r
);
365 log_error_errno(r
, "Failed to unmount file systems: %m");
369 log_info("Deactivating swaps.");
370 r
= swapoff_all(&changed
);
372 need_swapoff
= false;
373 log_info("All swaps deactivated.");
375 log_info("Not all swaps deactivated, %d left.", r
);
377 log_error_errno(r
, "Failed to deactivate swaps: %m");
380 if (need_loop_detach
) {
381 log_info("Detaching loop devices.");
382 r
= loopback_detach_all(&changed
);
384 need_loop_detach
= false;
385 log_info("All loop devices detached.");
387 log_info("Not all loop devices detached, %d left.", r
);
389 log_error_errno(r
, "Failed to detach loop devices: %m");
392 if (need_dm_detach
) {
393 log_info("Detaching DM devices.");
394 r
= dm_detach_all(&changed
);
396 need_dm_detach
= false;
397 log_info("All DM devices detached.");
399 log_info("Not all DM devices detached, %d left.", r
);
401 log_error_errno(r
, "Failed to detach DM devices: %m");
404 if (!need_umount
&& !need_swapoff
&& !need_loop_detach
&& !need_dm_detach
) {
406 log_info("All filesystems, swaps, loop devices, DM devices detached.");
411 /* If in this iteration we didn't manage to
412 * unmount/deactivate anything, we simply give up */
414 log_info("Cannot finalize remaining%s%s%s%s continuing.",
415 need_umount
? " file systems," : "",
416 need_swapoff
? " swap devices," : "",
417 need_loop_detach
? " loop devices," : "",
418 need_dm_detach
? " DM devices," : "");
422 log_debug("After %u retries, couldn't finalize remaining %s%s%s%s trying again.",
424 need_umount
? " file systems," : "",
425 need_swapoff
? " swap devices," : "",
426 need_loop_detach
? " loop devices," : "",
427 need_dm_detach
? " DM devices," : "");
430 log_error("Too many iterations, giving up.");
434 /* We're done with the watchdog. */
435 watchdog_free_device();
438 arguments
[1] = arg_verb
;
440 execute_directories(dirs
, DEFAULT_TIMEOUT_USEC
, NULL
, NULL
, arguments
);
442 if (!in_container
&& !in_initrd() &&
443 access("/run/initramfs/shutdown", X_OK
) == 0) {
444 r
= switch_root_initramfs();
446 argv
[0] = (char*) "/shutdown";
449 make_console_stdio();
451 log_info("Successfully changed into root pivot.\n"
452 "Returning to initrd...");
454 execv("/shutdown", argv
);
455 log_error_errno(errno
, "Failed to execute shutdown binary: %m");
457 log_error_errno(r
, "Failed to switch root to \"/run/initramfs\": %m");
461 if (need_umount
|| need_swapoff
|| need_loop_detach
|| need_dm_detach
)
462 log_error("Failed to finalize %s%s%s%s ignoring",
463 need_umount
? " file systems," : "",
464 need_swapoff
? " swap devices," : "",
465 need_loop_detach
? " loop devices," : "",
466 need_dm_detach
? " DM devices," : "");
468 /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be
469 * sync'ed explicitly in advance. So let's do this here, but not needlessly slow down containers. Note that we
470 * sync'ed things already once above, but we did some more work since then which might have caused IO, hence
471 * let's do it once more. Do not remove this sync, data corruption will result. */
473 sync_with_progress();
475 if (streq(arg_verb
, "exit")) {
479 /* We cannot exit() on the host, fallback on another
487 case LINUX_REBOOT_CMD_KEXEC
:
490 /* We cheat and exec kexec to avoid doing all its work */
493 log_info("Rebooting with kexec.");
497 log_error_errno(errno
, "Failed to fork: %m");
500 const char * const args
[] = {
506 execv(args
[0], (char * const *) args
);
509 wait_for_terminate_and_warn("kexec", pid
, true);
517 _cleanup_free_
char *param
= NULL
;
519 r
= read_one_line_file("/run/systemd/reboot-param", ¶m
);
520 if (r
< 0 && r
!= -ENOENT
)
521 log_warning_errno(r
, "Failed to read reboot parameter file: %m");
523 if (!isempty(param
)) {
524 log_info("Rebooting with argument '%s'.", param
);
525 syscall(SYS_reboot
, LINUX_REBOOT_MAGIC1
, LINUX_REBOOT_MAGIC2
, LINUX_REBOOT_CMD_RESTART2
, param
);
526 log_warning_errno(errno
, "Failed to reboot with parameter, retrying without: %m");
530 log_info("Rebooting.");
534 log_info("Powering off.");
538 log_info("Halting system.");
542 assert_not_reached("Unknown magic");
546 if (errno
== EPERM
&& in_container
) {
547 /* If we are in a container, and we lacked
548 * CAP_SYS_BOOT just exit, this will kill our
549 * container for good. */
550 log_info("Exiting container.");
554 r
= log_error_errno(errno
, "Failed to invoke reboot(): %m");
557 log_emergency_errno(r
, "Critical error while doing system shutdown: %m");