]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/shutdown.c
1da6e59bb808f8e3661c96035f3a77514b0316a8
[thirdparty/systemd.git] / src / core / shutdown.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 ProFUSION embedded systems
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <getopt.h>
23 #include <linux/reboot.h>
24 #include <signal.h>
25 #include <stdbool.h>
26 #include <stdlib.h>
27 #include <sys/mman.h>
28 #include <sys/mount.h>
29 #include <sys/reboot.h>
30 #include <sys/stat.h>
31 #include <unistd.h>
32
33 #include "alloc-util.h"
34 #include "async.h"
35 #include "cgroup-util.h"
36 #include "def.h"
37 #include "exec-util.h"
38 #include "fd-util.h"
39 #include "fileio.h"
40 #include "killall.h"
41 #include "log.h"
42 #include "missing.h"
43 #include "parse-util.h"
44 #include "process-util.h"
45 #include "reboot-util.h"
46 #include "signal-util.h"
47 #include "string-util.h"
48 #include "switch-root.h"
49 #include "terminal-util.h"
50 #include "umount.h"
51 #include "util.h"
52 #include "virt.h"
53 #include "watchdog.h"
54
55 #define SYNC_PROGRESS_ATTEMPTS 3
56 #define SYNC_TIMEOUT_USEC (10*USEC_PER_SEC)
57
58 static char* arg_verb;
59 static uint8_t arg_exit_code;
60 static usec_t arg_timeout = DEFAULT_TIMEOUT_USEC;
61
62 static int parse_argv(int argc, char *argv[]) {
63 enum {
64 ARG_LOG_LEVEL = 0x100,
65 ARG_LOG_TARGET,
66 ARG_LOG_COLOR,
67 ARG_LOG_LOCATION,
68 ARG_EXIT_CODE,
69 ARG_TIMEOUT,
70 };
71
72 static const struct option options[] = {
73 { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
74 { "log-target", required_argument, NULL, ARG_LOG_TARGET },
75 { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
76 { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
77 { "exit-code", required_argument, NULL, ARG_EXIT_CODE },
78 { "timeout", required_argument, NULL, ARG_TIMEOUT },
79 {}
80 };
81
82 int c, r;
83
84 assert(argc >= 1);
85 assert(argv);
86
87 /* "-" prevents getopt from permuting argv[] and moving the verb away
88 * from argv[1]. Our interface to initrd promises it'll be there. */
89 while ((c = getopt_long(argc, argv, "-", options, NULL)) >= 0)
90 switch (c) {
91
92 case ARG_LOG_LEVEL:
93 r = log_set_max_level_from_string(optarg);
94 if (r < 0)
95 log_error_errno(r, "Failed to parse log level %s, ignoring.", optarg);
96
97 break;
98
99 case ARG_LOG_TARGET:
100 r = log_set_target_from_string(optarg);
101 if (r < 0)
102 log_error_errno(r, "Failed to parse log target %s, ignoring", optarg);
103
104 break;
105
106 case ARG_LOG_COLOR:
107
108 if (optarg) {
109 r = log_show_color_from_string(optarg);
110 if (r < 0)
111 log_error_errno(r, "Failed to parse log color setting %s, ignoring", optarg);
112 } else
113 log_show_color(true);
114
115 break;
116
117 case ARG_LOG_LOCATION:
118 if (optarg) {
119 r = log_show_location_from_string(optarg);
120 if (r < 0)
121 log_error_errno(r, "Failed to parse log location setting %s, ignoring", optarg);
122 } else
123 log_show_location(true);
124
125 break;
126
127 case ARG_EXIT_CODE:
128 r = safe_atou8(optarg, &arg_exit_code);
129 if (r < 0)
130 log_error_errno(r, "Failed to parse exit code %s, ignoring", optarg);
131
132 break;
133
134 case ARG_TIMEOUT:
135 r = parse_sec(optarg, &arg_timeout);
136 if (r < 0)
137 log_error_errno(r, "Failed to parse shutdown timeout %s, ignoring", optarg);
138
139 break;
140
141 case '\001':
142 if (!arg_verb)
143 arg_verb = optarg;
144 else
145 log_error("Excess arguments, ignoring");
146 break;
147
148 case '?':
149 return -EINVAL;
150
151 default:
152 assert_not_reached("Unhandled option code.");
153 }
154
155 if (!arg_verb) {
156 log_error("Verb argument missing.");
157 return -EINVAL;
158 }
159
160 return 0;
161 }
162
163 static int switch_root_initramfs(void) {
164 if (mount("/run/initramfs", "/run/initramfs", NULL, MS_BIND, NULL) < 0)
165 return log_error_errno(errno, "Failed to mount bind /run/initramfs on /run/initramfs: %m");
166
167 if (mount(NULL, "/run/initramfs", NULL, MS_PRIVATE, NULL) < 0)
168 return log_error_errno(errno, "Failed to make /run/initramfs private mount: %m");
169
170 /* switch_root with MS_BIND, because there might still be processes lurking around, which have open file descriptors.
171 * /run/initramfs/shutdown will take care of these.
172 * Also do not detach the old root, because /run/initramfs/shutdown needs to access it.
173 */
174 return switch_root("/run/initramfs", "/oldroot", false, MS_BIND);
175 }
176
177 /* Read the following fields from /proc/meminfo:
178 *
179 * NFS_Unstable
180 * Writeback
181 * Dirty
182 *
183 * Return true if the sum of these fields is greater than the previous
184 * value input. For all other issues, report the failure and indicate that
185 * the sync is not making progress.
186 */
187 static bool sync_making_progress(unsigned long long *prev_dirty) {
188 _cleanup_fclose_ FILE *f = NULL;
189 char line[LINE_MAX];
190 bool r = false;
191 unsigned long long val = 0;
192
193 f = fopen("/proc/meminfo", "re");
194 if (!f)
195 return log_warning_errno(errno, "Failed to open /proc/meminfo: %m");
196
197 FOREACH_LINE(line, f, log_warning_errno(errno, "Failed to parse /proc/meminfo: %m")) {
198 unsigned long long ull = 0;
199
200 if (!first_word(line, "NFS_Unstable:") && !first_word(line, "Writeback:") && !first_word(line, "Dirty:"))
201 continue;
202
203 errno = 0;
204 if (sscanf(line, "%*s %llu %*s", &ull) != 1) {
205 if (errno != 0)
206 log_warning_errno(errno, "Failed to parse /proc/meminfo: %m");
207 else
208 log_warning("Failed to parse /proc/meminfo");
209
210 return false;
211 }
212
213 val += ull;
214 }
215
216 r = *prev_dirty > val;
217
218 *prev_dirty = val;
219
220 return r;
221 }
222
223 static void sync_with_progress(void) {
224 unsigned long long dirty = ULONG_LONG_MAX;
225 unsigned checks;
226 pid_t pid;
227 int r;
228
229 BLOCK_SIGNALS(SIGCHLD);
230
231 /* Due to the possiblity of the sync operation hanging, we fork a child process and monitor the progress. If
232 * the timeout lapses, the assumption is that that particular sync stalled. */
233
234 r = asynchronous_sync(&pid);
235 if (r < 0) {
236 log_error_errno(r, "Failed to fork sync(): %m");
237 return;
238 }
239
240 log_info("Syncing filesystems and block devices.");
241
242 /* Start monitoring the sync operation. If more than
243 * SYNC_PROGRESS_ATTEMPTS lapse without progress being made,
244 * we assume that the sync is stalled */
245 for (checks = 0; checks < SYNC_PROGRESS_ATTEMPTS; checks++) {
246 r = wait_for_terminate_with_timeout(pid, SYNC_TIMEOUT_USEC);
247 if (r == 0)
248 /* Sync finished without error.
249 * (The sync itself does not return an error code) */
250 return;
251 else if (r == -ETIMEDOUT) {
252 /* Reset the check counter if the "Dirty" value is
253 * decreasing */
254 if (sync_making_progress(&dirty))
255 checks = 0;
256 } else {
257 log_error_errno(r, "Failed to sync filesystems and block devices: %m");
258 return;
259 }
260 }
261
262 /* Only reached in the event of a timeout. We should issue a kill
263 * to the stray process. */
264 log_error("Syncing filesystems and block devices - timed out, issuing SIGKILL to PID "PID_FMT".", pid);
265 (void) kill(pid, SIGKILL);
266 }
267
268 int main(int argc, char *argv[]) {
269 bool need_umount, need_swapoff, need_loop_detach, need_dm_detach;
270 bool in_container, use_watchdog = false, can_initrd;
271 _cleanup_free_ char *cgroup = NULL;
272 char *arguments[3];
273 int cmd, r, umount_log_level = LOG_INFO;
274 static const char* const dirs[] = {SYSTEM_SHUTDOWN_PATH, NULL};
275 char *watchdog_device;
276
277 /* The log target defaults to console, but the original systemd process will pass its log target in through a
278 * command line argument, which will override this default. Also, ensure we'll never log to the journal or
279 * syslog, as these logging daemons are either already dead or will die very soon. */
280
281 log_set_target(LOG_TARGET_CONSOLE);
282 log_set_prohibit_ipc(true);
283 log_parse_environment();
284
285 r = parse_argv(argc, argv);
286 if (r < 0)
287 goto error;
288
289 log_open();
290
291 umask(0022);
292
293 if (getpid_cached() != 1) {
294 log_error("Not executed by init (PID 1).");
295 r = -EPERM;
296 goto error;
297 }
298
299 if (streq(arg_verb, "reboot"))
300 cmd = RB_AUTOBOOT;
301 else if (streq(arg_verb, "poweroff"))
302 cmd = RB_POWER_OFF;
303 else if (streq(arg_verb, "halt"))
304 cmd = RB_HALT_SYSTEM;
305 else if (streq(arg_verb, "kexec"))
306 cmd = LINUX_REBOOT_CMD_KEXEC;
307 else if (streq(arg_verb, "exit"))
308 cmd = 0; /* ignored, just checking that arg_verb is valid */
309 else {
310 log_error("Unknown action '%s'.", arg_verb);
311 r = -EINVAL;
312 goto error;
313 }
314
315 (void) cg_get_root_path(&cgroup);
316 in_container = detect_container() > 0;
317
318 use_watchdog = !!getenv("WATCHDOG_USEC");
319 watchdog_device = getenv("WATCHDOG_DEVICE");
320 if (watchdog_device) {
321 r = watchdog_set_device(watchdog_device);
322 if (r < 0)
323 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m",
324 watchdog_device);
325 }
326
327 /* Lock us into memory */
328 (void) mlockall(MCL_CURRENT|MCL_FUTURE);
329
330 /* Synchronize everything that is not written to disk yet at this point already. This is a good idea so that
331 * slow IO is processed here already and the final process killing spree is not impacted by processes
332 * desperately trying to sync IO to disk within their timeout. Do not remove this sync, data corruption will
333 * result. */
334 if (!in_container)
335 sync_with_progress();
336
337 disable_coredumps();
338
339 log_info("Sending SIGTERM to remaining processes...");
340 broadcast_signal(SIGTERM, true, true, arg_timeout);
341
342 log_info("Sending SIGKILL to remaining processes...");
343 broadcast_signal(SIGKILL, true, false, arg_timeout);
344
345 need_umount = !in_container;
346 need_swapoff = !in_container;
347 need_loop_detach = !in_container;
348 need_dm_detach = !in_container;
349 can_initrd = !in_container && !in_initrd() && access("/run/initramfs/shutdown", X_OK) == 0;
350
351 /* Unmount all mountpoints, swaps, and loopback devices */
352 for (;;) {
353 bool changed = false;
354
355 if (use_watchdog)
356 watchdog_ping();
357
358 /* Let's trim the cgroup tree on each iteration so
359 that we leave an empty cgroup tree around, so that
360 container managers get a nice notify event when we
361 are down */
362 if (cgroup)
363 cg_trim(SYSTEMD_CGROUP_CONTROLLER, cgroup, false);
364
365 if (need_umount) {
366 log_info("Unmounting file systems.");
367 r = umount_all(&changed, umount_log_level);
368 if (r == 0) {
369 need_umount = false;
370 log_info("All filesystems unmounted.");
371 } else if (r > 0)
372 log_info("Not all file systems unmounted, %d left.", r);
373 else
374 log_error_errno(r, "Failed to unmount file systems: %m");
375 }
376
377 if (need_swapoff) {
378 log_info("Deactivating swaps.");
379 r = swapoff_all(&changed);
380 if (r == 0) {
381 need_swapoff = false;
382 log_info("All swaps deactivated.");
383 } else if (r > 0)
384 log_info("Not all swaps deactivated, %d left.", r);
385 else
386 log_error_errno(r, "Failed to deactivate swaps: %m");
387 }
388
389 if (need_loop_detach) {
390 log_info("Detaching loop devices.");
391 r = loopback_detach_all(&changed, umount_log_level);
392 if (r == 0) {
393 need_loop_detach = false;
394 log_info("All loop devices detached.");
395 } else if (r > 0)
396 log_info("Not all loop devices detached, %d left.", r);
397 else
398 log_error_errno(r, "Failed to detach loop devices: %m");
399 }
400
401 if (need_dm_detach) {
402 log_info("Detaching DM devices.");
403 r = dm_detach_all(&changed, umount_log_level);
404 if (r == 0) {
405 need_dm_detach = false;
406 log_info("All DM devices detached.");
407 } else if (r > 0)
408 log_info("Not all DM devices detached, %d left.", r);
409 else
410 log_error_errno(r, "Failed to detach DM devices: %m");
411 }
412
413 if (!need_umount && !need_swapoff && !need_loop_detach && !need_dm_detach) {
414 log_info("All filesystems, swaps, loop devices and DM devices detached.");
415 /* Yay, done */
416 break;
417 }
418
419 if (!changed && umount_log_level == LOG_INFO && !can_initrd) {
420 /* There are things we cannot get rid of. Loop one more time
421 * with LOG_ERR to inform the user. Note that we don't need
422 * to do this if there is a initrd to switch to, because that
423 * one is likely to get rid of the remounting mounts. If not,
424 * it will log about them. */
425 umount_log_level = LOG_ERR;
426 continue;
427 }
428
429 /* If in this iteration we didn't manage to
430 * unmount/deactivate anything, we simply give up */
431 if (!changed) {
432 log_info("Cannot finalize remaining%s%s%s%s continuing.",
433 need_umount ? " file systems," : "",
434 need_swapoff ? " swap devices," : "",
435 need_loop_detach ? " loop devices," : "",
436 need_dm_detach ? " DM devices," : "");
437 break;
438 }
439
440 log_debug("Couldn't finalize remaining %s%s%s%s trying again.",
441 need_umount ? " file systems," : "",
442 need_swapoff ? " swap devices," : "",
443 need_loop_detach ? " loop devices," : "",
444 need_dm_detach ? " DM devices," : "");
445 }
446
447 /* We're done with the watchdog. */
448 watchdog_free_device();
449
450 arguments[0] = NULL;
451 arguments[1] = arg_verb;
452 arguments[2] = NULL;
453 execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments);
454
455 if (can_initrd) {
456 r = switch_root_initramfs();
457 if (r >= 0) {
458 argv[0] = (char*) "/shutdown";
459
460 setsid();
461 make_console_stdio();
462
463 log_info("Successfully changed into root pivot.\n"
464 "Returning to initrd...");
465
466 execv("/shutdown", argv);
467 log_error_errno(errno, "Failed to execute shutdown binary: %m");
468 } else
469 log_error_errno(r, "Failed to switch root to \"/run/initramfs\": %m");
470
471 }
472
473 if (need_umount || need_swapoff || need_loop_detach || need_dm_detach)
474 log_error("Failed to finalize %s%s%s%s ignoring",
475 need_umount ? " file systems," : "",
476 need_swapoff ? " swap devices," : "",
477 need_loop_detach ? " loop devices," : "",
478 need_dm_detach ? " DM devices," : "");
479
480 /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be
481 * sync'ed explicitly in advance. So let's do this here, but not needlessly slow down containers. Note that we
482 * sync'ed things already once above, but we did some more work since then which might have caused IO, hence
483 * let's do it once more. Do not remove this sync, data corruption will result. */
484 if (!in_container)
485 sync_with_progress();
486
487 if (streq(arg_verb, "exit")) {
488 if (in_container)
489 return arg_exit_code;
490
491 cmd = RB_POWER_OFF; /* We cannot exit() on the host, fallback on another method. */
492 }
493
494 switch (cmd) {
495
496 case LINUX_REBOOT_CMD_KEXEC:
497
498 if (!in_container) {
499 /* We cheat and exec kexec to avoid doing all its work */
500 log_info("Rebooting with kexec.");
501
502 r = safe_fork("(sd-kexec)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_LOG|FORK_WAIT, NULL);
503 if (r == 0) {
504 const char * const args[] = {
505 KEXEC, "-e", NULL
506 };
507
508 /* Child */
509
510 execv(args[0], (char * const *) args);
511 _exit(EXIT_FAILURE);
512 }
513
514 /* If we are still running, then the kexec can't have worked, let's fall through */
515 }
516
517 cmd = RB_AUTOBOOT;
518 _fallthrough_;
519
520 case RB_AUTOBOOT:
521 (void) reboot_with_parameter(REBOOT_LOG);
522 log_info("Rebooting.");
523 break;
524
525 case RB_POWER_OFF:
526 log_info("Powering off.");
527 break;
528
529 case RB_HALT_SYSTEM:
530 log_info("Halting system.");
531 break;
532
533 default:
534 assert_not_reached("Unknown magic");
535 }
536
537 (void) reboot(cmd);
538 if (errno == EPERM && in_container) {
539 /* If we are in a container, and we lacked
540 * CAP_SYS_BOOT just exit, this will kill our
541 * container for good. */
542 log_info("Exiting container.");
543 return EXIT_SUCCESS;
544 }
545
546 r = log_error_errno(errno, "Failed to invoke reboot(): %m");
547
548 error:
549 log_emergency_errno(r, "Critical error while doing system shutdown: %m");
550 freeze();
551 }