]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/shutdown.c
Merge pull request #8429 from medhefgo/sd-shutdown
[thirdparty/systemd.git] / src / core / shutdown.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 ProFUSION embedded systems
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <getopt.h>
23 #include <linux/reboot.h>
24 #include <signal.h>
25 #include <stdbool.h>
26 #include <stdlib.h>
27 #include <sys/mman.h>
28 #include <sys/mount.h>
29 #include <sys/reboot.h>
30 #include <sys/stat.h>
31 #include <unistd.h>
32
33 #include "alloc-util.h"
34 #include "async.h"
35 #include "cgroup-util.h"
36 #include "def.h"
37 #include "exec-util.h"
38 #include "fd-util.h"
39 #include "fileio.h"
40 #include "killall.h"
41 #include "log.h"
42 #include "missing.h"
43 #include "parse-util.h"
44 #include "process-util.h"
45 #include "reboot-util.h"
46 #include "signal-util.h"
47 #include "string-util.h"
48 #include "switch-root.h"
49 #include "terminal-util.h"
50 #include "umount.h"
51 #include "util.h"
52 #include "virt.h"
53 #include "watchdog.h"
54
55 #define FINALIZE_ATTEMPTS 50
56
57 #define SYNC_PROGRESS_ATTEMPTS 3
58 #define SYNC_TIMEOUT_USEC (10*USEC_PER_SEC)
59
60 static char* arg_verb;
61 static uint8_t arg_exit_code;
62 static usec_t arg_timeout = DEFAULT_TIMEOUT_USEC;
63
64 static int parse_argv(int argc, char *argv[]) {
65 enum {
66 ARG_LOG_LEVEL = 0x100,
67 ARG_LOG_TARGET,
68 ARG_LOG_COLOR,
69 ARG_LOG_LOCATION,
70 ARG_EXIT_CODE,
71 ARG_TIMEOUT,
72 };
73
74 static const struct option options[] = {
75 { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
76 { "log-target", required_argument, NULL, ARG_LOG_TARGET },
77 { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
78 { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
79 { "exit-code", required_argument, NULL, ARG_EXIT_CODE },
80 { "timeout", required_argument, NULL, ARG_TIMEOUT },
81 {}
82 };
83
84 int c, r;
85
86 assert(argc >= 1);
87 assert(argv);
88
89 /* "-" prevents getopt from permuting argv[] and moving the verb away
90 * from argv[1]. Our interface to initrd promises it'll be there. */
91 while ((c = getopt_long(argc, argv, "-", options, NULL)) >= 0)
92 switch (c) {
93
94 case ARG_LOG_LEVEL:
95 r = log_set_max_level_from_string(optarg);
96 if (r < 0)
97 log_error_errno(r, "Failed to parse log level %s, ignoring.", optarg);
98
99 break;
100
101 case ARG_LOG_TARGET:
102 r = log_set_target_from_string(optarg);
103 if (r < 0)
104 log_error_errno(r, "Failed to parse log target %s, ignoring", optarg);
105
106 break;
107
108 case ARG_LOG_COLOR:
109
110 if (optarg) {
111 r = log_show_color_from_string(optarg);
112 if (r < 0)
113 log_error_errno(r, "Failed to parse log color setting %s, ignoring", optarg);
114 } else
115 log_show_color(true);
116
117 break;
118
119 case ARG_LOG_LOCATION:
120 if (optarg) {
121 r = log_show_location_from_string(optarg);
122 if (r < 0)
123 log_error_errno(r, "Failed to parse log location setting %s, ignoring", optarg);
124 } else
125 log_show_location(true);
126
127 break;
128
129 case ARG_EXIT_CODE:
130 r = safe_atou8(optarg, &arg_exit_code);
131 if (r < 0)
132 log_error_errno(r, "Failed to parse exit code %s, ignoring", optarg);
133
134 break;
135
136 case ARG_TIMEOUT:
137 r = parse_sec(optarg, &arg_timeout);
138 if (r < 0)
139 log_error_errno(r, "Failed to parse shutdown timeout %s, ignoring", optarg);
140
141 break;
142
143 case '\001':
144 if (!arg_verb)
145 arg_verb = optarg;
146 else
147 log_error("Excess arguments, ignoring");
148 break;
149
150 case '?':
151 return -EINVAL;
152
153 default:
154 assert_not_reached("Unhandled option code.");
155 }
156
157 if (!arg_verb) {
158 log_error("Verb argument missing.");
159 return -EINVAL;
160 }
161
162 return 0;
163 }
164
165 static int switch_root_initramfs(void) {
166 if (mount("/run/initramfs", "/run/initramfs", NULL, MS_BIND, NULL) < 0)
167 return log_error_errno(errno, "Failed to mount bind /run/initramfs on /run/initramfs: %m");
168
169 if (mount(NULL, "/run/initramfs", NULL, MS_PRIVATE, NULL) < 0)
170 return log_error_errno(errno, "Failed to make /run/initramfs private mount: %m");
171
172 /* switch_root with MS_BIND, because there might still be processes lurking around, which have open file descriptors.
173 * /run/initramfs/shutdown will take care of these.
174 * Also do not detach the old root, because /run/initramfs/shutdown needs to access it.
175 */
176 return switch_root("/run/initramfs", "/oldroot", false, MS_BIND);
177 }
178
179 /* Read the following fields from /proc/meminfo:
180 *
181 * NFS_Unstable
182 * Writeback
183 * Dirty
184 *
185 * Return true if the sum of these fields is greater than the previous
186 * value input. For all other issues, report the failure and indicate that
187 * the sync is not making progress.
188 */
189 static bool sync_making_progress(unsigned long long *prev_dirty) {
190 _cleanup_fclose_ FILE *f = NULL;
191 char line[LINE_MAX];
192 bool r = false;
193 unsigned long long val = 0;
194
195 f = fopen("/proc/meminfo", "re");
196 if (!f)
197 return log_warning_errno(errno, "Failed to open /proc/meminfo: %m");
198
199 FOREACH_LINE(line, f, log_warning_errno(errno, "Failed to parse /proc/meminfo: %m")) {
200 unsigned long long ull = 0;
201
202 if (!first_word(line, "NFS_Unstable:") && !first_word(line, "Writeback:") && !first_word(line, "Dirty:"))
203 continue;
204
205 errno = 0;
206 if (sscanf(line, "%*s %llu %*s", &ull) != 1) {
207 if (errno != 0)
208 log_warning_errno(errno, "Failed to parse /proc/meminfo: %m");
209 else
210 log_warning("Failed to parse /proc/meminfo");
211
212 return false;
213 }
214
215 val += ull;
216 }
217
218 r = *prev_dirty > val;
219
220 *prev_dirty = val;
221
222 return r;
223 }
224
225 static void sync_with_progress(void) {
226 unsigned long long dirty = ULONG_LONG_MAX;
227 unsigned checks;
228 pid_t pid;
229 int r;
230
231 BLOCK_SIGNALS(SIGCHLD);
232
233 /* Due to the possiblity of the sync operation hanging, we fork a child process and monitor the progress. If
234 * the timeout lapses, the assumption is that that particular sync stalled. */
235
236 r = asynchronous_sync(&pid);
237 if (r < 0) {
238 log_error_errno(r, "Failed to fork sync(): %m");
239 return;
240 }
241
242 log_info("Syncing filesystems and block devices.");
243
244 /* Start monitoring the sync operation. If more than
245 * SYNC_PROGRESS_ATTEMPTS lapse without progress being made,
246 * we assume that the sync is stalled */
247 for (checks = 0; checks < SYNC_PROGRESS_ATTEMPTS; checks++) {
248 r = wait_for_terminate_with_timeout(pid, SYNC_TIMEOUT_USEC);
249 if (r == 0)
250 /* Sync finished without error.
251 * (The sync itself does not return an error code) */
252 return;
253 else if (r == -ETIMEDOUT) {
254 /* Reset the check counter if the "Dirty" value is
255 * decreasing */
256 if (sync_making_progress(&dirty))
257 checks = 0;
258 } else {
259 log_error_errno(r, "Failed to sync filesystems and block devices: %m");
260 return;
261 }
262 }
263
264 /* Only reached in the event of a timeout. We should issue a kill
265 * to the stray process. */
266 log_error("Syncing filesystems and block devices - timed out, issuing SIGKILL to PID "PID_FMT".", pid);
267 (void) kill(pid, SIGKILL);
268 }
269
270 int main(int argc, char *argv[]) {
271 bool need_umount, need_swapoff, need_loop_detach, need_dm_detach;
272 bool in_container, use_watchdog = false, can_initrd;
273 _cleanup_free_ char *cgroup = NULL;
274 char *arguments[3];
275 unsigned retries;
276 int cmd, r, umount_log_level = LOG_INFO;
277 static const char* const dirs[] = {SYSTEM_SHUTDOWN_PATH, NULL};
278 char *watchdog_device;
279
280 /* The log target defaults to console, but the original systemd process will pass its log target in through a
281 * command line argument, which will override this default. Also, ensure we'll never log to the journal or
282 * syslog, as these logging daemons are either already dead or will die very soon. */
283
284 log_set_target(LOG_TARGET_CONSOLE);
285 log_set_prohibit_ipc(true);
286 log_parse_environment();
287
288 r = parse_argv(argc, argv);
289 if (r < 0)
290 goto error;
291
292 log_open();
293
294 umask(0022);
295
296 if (getpid_cached() != 1) {
297 log_error("Not executed by init (PID 1).");
298 r = -EPERM;
299 goto error;
300 }
301
302 if (streq(arg_verb, "reboot"))
303 cmd = RB_AUTOBOOT;
304 else if (streq(arg_verb, "poweroff"))
305 cmd = RB_POWER_OFF;
306 else if (streq(arg_verb, "halt"))
307 cmd = RB_HALT_SYSTEM;
308 else if (streq(arg_verb, "kexec"))
309 cmd = LINUX_REBOOT_CMD_KEXEC;
310 else if (streq(arg_verb, "exit"))
311 cmd = 0; /* ignored, just checking that arg_verb is valid */
312 else {
313 log_error("Unknown action '%s'.", arg_verb);
314 r = -EINVAL;
315 goto error;
316 }
317
318 (void) cg_get_root_path(&cgroup);
319 in_container = detect_container() > 0;
320
321 use_watchdog = !!getenv("WATCHDOG_USEC");
322 watchdog_device = getenv("WATCHDOG_DEVICE");
323 if (watchdog_device) {
324 r = watchdog_set_device(watchdog_device);
325 if (r < 0)
326 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m",
327 watchdog_device);
328 }
329
330 /* Lock us into memory */
331 (void) mlockall(MCL_CURRENT|MCL_FUTURE);
332
333 /* Synchronize everything that is not written to disk yet at this point already. This is a good idea so that
334 * slow IO is processed here already and the final process killing spree is not impacted by processes
335 * desperately trying to sync IO to disk within their timeout. Do not remove this sync, data corruption will
336 * result. */
337 if (!in_container)
338 sync_with_progress();
339
340 disable_coredumps();
341
342 log_info("Sending SIGTERM to remaining processes...");
343 broadcast_signal(SIGTERM, true, true, arg_timeout);
344
345 log_info("Sending SIGKILL to remaining processes...");
346 broadcast_signal(SIGKILL, true, false, arg_timeout);
347
348 need_umount = !in_container;
349 need_swapoff = !in_container;
350 need_loop_detach = !in_container;
351 need_dm_detach = !in_container;
352 can_initrd = !in_container && !in_initrd() && access("/run/initramfs/shutdown", X_OK) == 0;
353
354 /* Unmount all mountpoints, swaps, and loopback devices */
355 for (retries = 0; retries < FINALIZE_ATTEMPTS; retries++) {
356 bool changed = false;
357
358 if (use_watchdog)
359 watchdog_ping();
360
361 /* Let's trim the cgroup tree on each iteration so
362 that we leave an empty cgroup tree around, so that
363 container managers get a nice notify event when we
364 are down */
365 if (cgroup)
366 cg_trim(SYSTEMD_CGROUP_CONTROLLER, cgroup, false);
367
368 if (need_umount) {
369 log_info("Unmounting file systems.");
370 r = umount_all(&changed, umount_log_level);
371 if (r == 0) {
372 need_umount = false;
373 log_info("All filesystems unmounted.");
374 } else if (r > 0)
375 log_info("Not all file systems unmounted, %d left.", r);
376 else
377 log_error_errno(r, "Failed to unmount file systems: %m");
378 }
379
380 if (need_swapoff) {
381 log_info("Deactivating swaps.");
382 r = swapoff_all(&changed);
383 if (r == 0) {
384 need_swapoff = false;
385 log_info("All swaps deactivated.");
386 } else if (r > 0)
387 log_info("Not all swaps deactivated, %d left.", r);
388 else
389 log_error_errno(r, "Failed to deactivate swaps: %m");
390 }
391
392 if (need_loop_detach) {
393 log_info("Detaching loop devices.");
394 r = loopback_detach_all(&changed, umount_log_level);
395 if (r == 0) {
396 need_loop_detach = false;
397 log_info("All loop devices detached.");
398 } else if (r > 0)
399 log_info("Not all loop devices detached, %d left.", r);
400 else
401 log_error_errno(r, "Failed to detach loop devices: %m");
402 }
403
404 if (need_dm_detach) {
405 log_info("Detaching DM devices.");
406 r = dm_detach_all(&changed, umount_log_level);
407 if (r == 0) {
408 need_dm_detach = false;
409 log_info("All DM devices detached.");
410 } else if (r > 0)
411 log_info("Not all DM devices detached, %d left.", r);
412 else
413 log_error_errno(r, "Failed to detach DM devices: %m");
414 }
415
416 if (!need_umount && !need_swapoff && !need_loop_detach && !need_dm_detach) {
417 if (retries > 0)
418 log_info("All filesystems, swaps, loop devices, DM devices detached.");
419 /* Yay, done */
420 goto initrd_jump;
421 }
422
423 if (!changed && umount_log_level == LOG_INFO && !can_initrd) {
424 /* There are things we cannot get rid of. Loop one more time
425 * with LOG_ERR to inform the user. Note that we don't need
426 * to do this if there is a initrd to switch to, because that
427 * one is likely to get rid of the remounting mounts. If not,
428 * it will log about them. */
429 umount_log_level = LOG_ERR;
430 continue;
431 }
432
433 /* If in this iteration we didn't manage to
434 * unmount/deactivate anything, we simply give up */
435 if (!changed) {
436 log_info("Cannot finalize remaining%s%s%s%s continuing.",
437 need_umount ? " file systems," : "",
438 need_swapoff ? " swap devices," : "",
439 need_loop_detach ? " loop devices," : "",
440 need_dm_detach ? " DM devices," : "");
441 goto initrd_jump;
442 }
443
444 log_debug("After %u retries, couldn't finalize remaining %s%s%s%s trying again.",
445 retries + 1,
446 need_umount ? " file systems," : "",
447 need_swapoff ? " swap devices," : "",
448 need_loop_detach ? " loop devices," : "",
449 need_dm_detach ? " DM devices," : "");
450 }
451
452 log_error("Too many iterations, giving up.");
453
454 initrd_jump:
455
456 /* We're done with the watchdog. */
457 watchdog_free_device();
458
459 arguments[0] = NULL;
460 arguments[1] = arg_verb;
461 arguments[2] = NULL;
462 execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments);
463
464 if (can_initrd) {
465 r = switch_root_initramfs();
466 if (r >= 0) {
467 argv[0] = (char*) "/shutdown";
468
469 setsid();
470 make_console_stdio();
471
472 log_info("Successfully changed into root pivot.\n"
473 "Returning to initrd...");
474
475 execv("/shutdown", argv);
476 log_error_errno(errno, "Failed to execute shutdown binary: %m");
477 } else
478 log_error_errno(r, "Failed to switch root to \"/run/initramfs\": %m");
479
480 }
481
482 if (need_umount || need_swapoff || need_loop_detach || need_dm_detach)
483 log_error("Failed to finalize %s%s%s%s ignoring",
484 need_umount ? " file systems," : "",
485 need_swapoff ? " swap devices," : "",
486 need_loop_detach ? " loop devices," : "",
487 need_dm_detach ? " DM devices," : "");
488
489 /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be
490 * sync'ed explicitly in advance. So let's do this here, but not needlessly slow down containers. Note that we
491 * sync'ed things already once above, but we did some more work since then which might have caused IO, hence
492 * let's do it once more. Do not remove this sync, data corruption will result. */
493 if (!in_container)
494 sync_with_progress();
495
496 if (streq(arg_verb, "exit")) {
497 if (in_container)
498 return arg_exit_code;
499
500 cmd = RB_POWER_OFF; /* We cannot exit() on the host, fallback on another method. */
501 }
502
503 switch (cmd) {
504
505 case LINUX_REBOOT_CMD_KEXEC:
506
507 if (!in_container) {
508 /* We cheat and exec kexec to avoid doing all its work */
509 log_info("Rebooting with kexec.");
510
511 r = safe_fork("(sd-kexec)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_LOG|FORK_WAIT, NULL);
512 if (r == 0) {
513 const char * const args[] = {
514 KEXEC, "-e", NULL
515 };
516
517 /* Child */
518
519 execv(args[0], (char * const *) args);
520 _exit(EXIT_FAILURE);
521 }
522
523 /* If we are still running, then the kexec can't have worked, let's fall through */
524 }
525
526 cmd = RB_AUTOBOOT;
527 _fallthrough_;
528
529 case RB_AUTOBOOT:
530 (void) reboot_with_parameter(REBOOT_LOG);
531 log_info("Rebooting.");
532 break;
533
534 case RB_POWER_OFF:
535 log_info("Powering off.");
536 break;
537
538 case RB_HALT_SYSTEM:
539 log_info("Halting system.");
540 break;
541
542 default:
543 assert_not_reached("Unknown magic");
544 }
545
546 (void) reboot(cmd);
547 if (errno == EPERM && in_container) {
548 /* If we are in a container, and we lacked
549 * CAP_SYS_BOOT just exit, this will kill our
550 * container for good. */
551 log_info("Exiting container.");
552 return EXIT_SUCCESS;
553 }
554
555 r = log_error_errno(errno, "Failed to invoke reboot(): %m");
556
557 error:
558 log_emergency_errno(r, "Critical error while doing system shutdown: %m");
559 freeze();
560 }