1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 ProFUSION embedded systems
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <linux/reboot.h>
28 #include <sys/mount.h>
29 #include <sys/reboot.h>
33 #include "alloc-util.h"
35 #include "cgroup-util.h"
37 #include "exec-util.h"
43 #include "parse-util.h"
44 #include "process-util.h"
45 #include "signal-util.h"
46 #include "string-util.h"
47 #include "switch-root.h"
48 #include "terminal-util.h"
54 #define FINALIZE_ATTEMPTS 50
56 #define SYNC_PROGRESS_ATTEMPTS 3
57 #define SYNC_TIMEOUT_USEC (10*USEC_PER_SEC)
59 static char* arg_verb
;
60 static uint8_t arg_exit_code
;
62 static int parse_argv(int argc
, char *argv
[]) {
64 ARG_LOG_LEVEL
= 0x100,
71 static const struct option options
[] = {
72 { "log-level", required_argument
, NULL
, ARG_LOG_LEVEL
},
73 { "log-target", required_argument
, NULL
, ARG_LOG_TARGET
},
74 { "log-color", optional_argument
, NULL
, ARG_LOG_COLOR
},
75 { "log-location", optional_argument
, NULL
, ARG_LOG_LOCATION
},
76 { "exit-code", required_argument
, NULL
, ARG_EXIT_CODE
},
85 /* "-" prevents getopt from permuting argv[] and moving the verb away
86 * from argv[1]. Our interface to initrd promises it'll be there. */
87 while ((c
= getopt_long(argc
, argv
, "-", options
, NULL
)) >= 0)
91 r
= log_set_max_level_from_string(optarg
);
93 log_error("Failed to parse log level %s, ignoring.", optarg
);
98 r
= log_set_target_from_string(optarg
);
100 log_error("Failed to parse log target %s, ignoring", optarg
);
107 r
= log_show_color_from_string(optarg
);
109 log_error("Failed to parse log color setting %s, ignoring", optarg
);
111 log_show_color(true);
115 case ARG_LOG_LOCATION
:
117 r
= log_show_location_from_string(optarg
);
119 log_error("Failed to parse log location setting %s, ignoring", optarg
);
121 log_show_location(true);
126 r
= safe_atou8(optarg
, &arg_exit_code
);
128 log_error("Failed to parse exit code %s, ignoring", optarg
);
136 log_error("Excess arguments, ignoring");
143 assert_not_reached("Unhandled option code.");
147 log_error("Verb argument missing.");
154 static int switch_root_initramfs(void) {
155 if (mount("/run/initramfs", "/run/initramfs", NULL
, MS_BIND
, NULL
) < 0)
156 return log_error_errno(errno
, "Failed to mount bind /run/initramfs on /run/initramfs: %m");
158 if (mount(NULL
, "/run/initramfs", NULL
, MS_PRIVATE
, NULL
) < 0)
159 return log_error_errno(errno
, "Failed to make /run/initramfs private mount: %m");
161 /* switch_root with MS_BIND, because there might still be processes lurking around, which have open file descriptors.
162 * /run/initramfs/shutdown will take care of these.
163 * Also do not detach the old root, because /run/initramfs/shutdown needs to access it.
165 return switch_root("/run/initramfs", "/oldroot", false, MS_BIND
);
168 /* Read the following fields from /proc/meminfo:
174 * Return true if the sum of these fields is greater than the previous
175 * value input. For all other issues, report the failure and indicate that
176 * the sync is not making progress.
178 static bool sync_making_progress(unsigned long long *prev_dirty
) {
179 _cleanup_fclose_
FILE *f
= NULL
;
182 unsigned long long val
= 0;
184 f
= fopen("/proc/meminfo", "re");
186 return log_warning_errno(errno
, "Failed to open /proc/meminfo: %m");
188 FOREACH_LINE(line
, f
, log_warning_errno(errno
, "Failed to parse /proc/meminfo: %m")) {
189 unsigned long long ull
= 0;
191 if (!first_word(line
, "NFS_Unstable:") && !first_word(line
, "Writeback:") && !first_word(line
, "Dirty:"))
195 if (sscanf(line
, "%*s %llu %*s", &ull
) != 1) {
197 log_warning_errno(errno
, "Failed to parse /proc/meminfo: %m");
199 log_warning("Failed to parse /proc/meminfo");
207 r
= *prev_dirty
> val
;
214 static void sync_with_progress(void) {
215 unsigned long long dirty
= ULONG_LONG_MAX
;
220 BLOCK_SIGNALS(SIGCHLD
);
222 /* Due to the possiblity of the sync operation hanging, we fork a child process and monitor the progress. If
223 * the timeout lapses, the assumption is that that particular sync stalled. */
225 r
= asynchronous_sync(&pid
);
227 log_error_errno(r
, "Failed to fork sync(): %m");
231 log_info("Syncing filesystems and block devices.");
233 /* Start monitoring the sync operation. If more than
234 * SYNC_PROGRESS_ATTEMPTS lapse without progress being made,
235 * we assume that the sync is stalled */
236 for (checks
= 0; checks
< SYNC_PROGRESS_ATTEMPTS
; checks
++) {
237 r
= wait_for_terminate_with_timeout(pid
, SYNC_TIMEOUT_USEC
);
239 /* Sync finished without error.
240 * (The sync itself does not return an error code) */
242 else if (r
== -ETIMEDOUT
) {
243 /* Reset the check counter if the "Dirty" value is
245 if (sync_making_progress(&dirty
))
248 log_error_errno(r
, "Failed to sync filesystems and block devices: %m");
253 /* Only reached in the event of a timeout. We should issue a kill
254 * to the stray process. */
255 log_error("Syncing filesystems and block devices - timed out, issuing SIGKILL to PID "PID_FMT
".", pid
);
256 (void) kill(pid
, SIGKILL
);
259 int main(int argc
, char *argv
[]) {
260 bool need_umount
, need_swapoff
, need_loop_detach
, need_dm_detach
;
261 bool in_container
, use_watchdog
= false;
262 _cleanup_free_
char *cgroup
= NULL
;
266 static const char* const dirs
[] = {SYSTEM_SHUTDOWN_PATH
, NULL
};
267 char *watchdog_device
;
269 log_parse_environment();
270 r
= parse_argv(argc
, argv
);
274 /* journald will die if not gone yet. The log target defaults
275 * to console, but may have been changed by command line options. */
277 log_close_console(); /* force reopen of /dev/console */
282 if (getpid_cached() != 1) {
283 log_error("Not executed by init (PID 1).");
288 if (streq(arg_verb
, "reboot"))
290 else if (streq(arg_verb
, "poweroff"))
292 else if (streq(arg_verb
, "halt"))
293 cmd
= RB_HALT_SYSTEM
;
294 else if (streq(arg_verb
, "kexec"))
295 cmd
= LINUX_REBOOT_CMD_KEXEC
;
296 else if (streq(arg_verb
, "exit"))
297 cmd
= 0; /* ignored, just checking that arg_verb is valid */
300 log_error("Unknown action '%s'.", arg_verb
);
304 (void) cg_get_root_path(&cgroup
);
305 in_container
= detect_container() > 0;
307 use_watchdog
= !!getenv("WATCHDOG_USEC");
308 watchdog_device
= getenv("WATCHDOG_DEVICE");
309 if (watchdog_device
) {
310 r
= watchdog_set_device(watchdog_device
);
312 log_warning_errno(r
, "Failed to set watchdog device to %s, ignoring: %m",
316 /* Lock us into memory */
317 mlockall(MCL_CURRENT
|MCL_FUTURE
);
319 /* Synchronize everything that is not written to disk yet at this point already. This is a good idea so that
320 * slow IO is processed here already and the final process killing spree is not impacted by processes
321 * desperately trying to sync IO to disk within their timeout. Do not remove this sync, data corruption will
324 sync_with_progress();
326 log_info("Sending SIGTERM to remaining processes...");
327 broadcast_signal(SIGTERM
, true, true);
329 log_info("Sending SIGKILL to remaining processes...");
330 broadcast_signal(SIGKILL
, true, false);
332 need_umount
= !in_container
;
333 need_swapoff
= !in_container
;
334 need_loop_detach
= !in_container
;
335 need_dm_detach
= !in_container
;
337 /* Unmount all mountpoints, swaps, and loopback devices */
338 for (retries
= 0; retries
< FINALIZE_ATTEMPTS
; retries
++) {
339 bool changed
= false;
344 /* Let's trim the cgroup tree on each iteration so
345 that we leave an empty cgroup tree around, so that
346 container managers get a nice notify event when we
349 cg_trim(SYSTEMD_CGROUP_CONTROLLER
, cgroup
, false);
352 log_info("Unmounting file systems.");
353 r
= umount_all(&changed
);
356 log_info("All filesystems unmounted.");
358 log_info("Not all file systems unmounted, %d left.", r
);
360 log_error_errno(r
, "Failed to unmount file systems: %m");
364 log_info("Deactivating swaps.");
365 r
= swapoff_all(&changed
);
367 need_swapoff
= false;
368 log_info("All swaps deactivated.");
370 log_info("Not all swaps deactivated, %d left.", r
);
372 log_error_errno(r
, "Failed to deactivate swaps: %m");
375 if (need_loop_detach
) {
376 log_info("Detaching loop devices.");
377 r
= loopback_detach_all(&changed
);
379 need_loop_detach
= false;
380 log_info("All loop devices detached.");
382 log_info("Not all loop devices detached, %d left.", r
);
384 log_error_errno(r
, "Failed to detach loop devices: %m");
387 if (need_dm_detach
) {
388 log_info("Detaching DM devices.");
389 r
= dm_detach_all(&changed
);
391 need_dm_detach
= false;
392 log_info("All DM devices detached.");
394 log_info("Not all DM devices detached, %d left.", r
);
396 log_error_errno(r
, "Failed to detach DM devices: %m");
399 if (!need_umount
&& !need_swapoff
&& !need_loop_detach
&& !need_dm_detach
) {
401 log_info("All filesystems, swaps, loop devices, DM devices detached.");
406 /* If in this iteration we didn't manage to
407 * unmount/deactivate anything, we simply give up */
409 log_info("Cannot finalize remaining%s%s%s%s continuing.",
410 need_umount
? " file systems," : "",
411 need_swapoff
? " swap devices," : "",
412 need_loop_detach
? " loop devices," : "",
413 need_dm_detach
? " DM devices," : "");
417 log_debug("After %u retries, couldn't finalize remaining %s%s%s%s trying again.",
419 need_umount
? " file systems," : "",
420 need_swapoff
? " swap devices," : "",
421 need_loop_detach
? " loop devices," : "",
422 need_dm_detach
? " DM devices," : "");
425 log_error("Too many iterations, giving up.");
429 /* We're done with the watchdog. */
430 watchdog_free_device();
433 arguments
[1] = arg_verb
;
435 execute_directories(dirs
, DEFAULT_TIMEOUT_USEC
, NULL
, NULL
, arguments
);
437 if (!in_container
&& !in_initrd() &&
438 access("/run/initramfs/shutdown", X_OK
) == 0) {
439 r
= switch_root_initramfs();
441 argv
[0] = (char*) "/shutdown";
444 make_console_stdio();
446 log_info("Successfully changed into root pivot.\n"
447 "Returning to initrd...");
449 execv("/shutdown", argv
);
450 log_error_errno(errno
, "Failed to execute shutdown binary: %m");
452 log_error_errno(r
, "Failed to switch root to \"/run/initramfs\": %m");
456 if (need_umount
|| need_swapoff
|| need_loop_detach
|| need_dm_detach
)
457 log_error("Failed to finalize %s%s%s%s ignoring",
458 need_umount
? " file systems," : "",
459 need_swapoff
? " swap devices," : "",
460 need_loop_detach
? " loop devices," : "",
461 need_dm_detach
? " DM devices," : "");
463 /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be
464 * sync'ed explicitly in advance. So let's do this here, but not needlessly slow down containers. Note that we
465 * sync'ed things already once above, but we did some more work since then which might have caused IO, hence
466 * let's do it once more. Do not remove this sync, data corruption will result. */
468 sync_with_progress();
470 if (streq(arg_verb
, "exit")) {
474 /* We cannot exit() on the host, fallback on another
482 case LINUX_REBOOT_CMD_KEXEC
:
485 /* We cheat and exec kexec to avoid doing all its work */
486 log_info("Rebooting with kexec.");
488 r
= safe_fork("(sd-kexec)", FORK_RESET_SIGNALS
|FORK_CLOSE_ALL_FDS
|FORK_LOG
|FORK_WAIT
, NULL
);
490 const char * const args
[] = {
496 execv(args
[0], (char * const *) args
);
500 /* If we are still running, then the kexec can't have worked, let's fall through */
508 _cleanup_free_
char *param
= NULL
;
510 r
= read_one_line_file("/run/systemd/reboot-param", ¶m
);
511 if (r
< 0 && r
!= -ENOENT
)
512 log_warning_errno(r
, "Failed to read reboot parameter file: %m");
514 if (!isempty(param
)) {
515 log_info("Rebooting with argument '%s'.", param
);
516 syscall(SYS_reboot
, LINUX_REBOOT_MAGIC1
, LINUX_REBOOT_MAGIC2
, LINUX_REBOOT_CMD_RESTART2
, param
);
517 log_warning_errno(errno
, "Failed to reboot with parameter, retrying without: %m");
521 log_info("Rebooting.");
525 log_info("Powering off.");
529 log_info("Halting system.");
533 assert_not_reached("Unknown magic");
537 if (errno
== EPERM
&& in_container
) {
538 /* If we are in a container, and we lacked
539 * CAP_SYS_BOOT just exit, this will kill our
540 * container for good. */
541 log_info("Exiting container.");
545 r
= log_error_errno(errno
, "Failed to invoke reboot(): %m");
548 log_emergency_errno(r
, "Critical error while doing system shutdown: %m");