]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/shutdown.c
Merge pull request #7191 from Mic92/systemd
[thirdparty/systemd.git] / src / core / shutdown.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 ProFUSION embedded systems
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <getopt.h>
23 #include <linux/reboot.h>
24 #include <signal.h>
25 #include <stdbool.h>
26 #include <stdlib.h>
27 #include <sys/mman.h>
28 #include <sys/mount.h>
29 #include <sys/reboot.h>
30 #include <sys/stat.h>
31 #include <unistd.h>
32
33 #include "alloc-util.h"
34 #include "async.h"
35 #include "cgroup-util.h"
36 #include "def.h"
37 #include "exec-util.h"
38 #include "fd-util.h"
39 #include "fileio.h"
40 #include "killall.h"
41 #include "log.h"
42 #include "missing.h"
43 #include "parse-util.h"
44 #include "process-util.h"
45 #include "signal-util.h"
46 #include "string-util.h"
47 #include "switch-root.h"
48 #include "terminal-util.h"
49 #include "umount.h"
50 #include "util.h"
51 #include "virt.h"
52 #include "watchdog.h"
53
54 #define FINALIZE_ATTEMPTS 50
55
56 #define SYNC_PROGRESS_ATTEMPTS 3
57 #define SYNC_TIMEOUT_USEC (10*USEC_PER_SEC)
58
59 static char* arg_verb;
60 static uint8_t arg_exit_code;
61
62 static int parse_argv(int argc, char *argv[]) {
63 enum {
64 ARG_LOG_LEVEL = 0x100,
65 ARG_LOG_TARGET,
66 ARG_LOG_COLOR,
67 ARG_LOG_LOCATION,
68 ARG_EXIT_CODE,
69 };
70
71 static const struct option options[] = {
72 { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
73 { "log-target", required_argument, NULL, ARG_LOG_TARGET },
74 { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
75 { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
76 { "exit-code", required_argument, NULL, ARG_EXIT_CODE },
77 {}
78 };
79
80 int c, r;
81
82 assert(argc >= 1);
83 assert(argv);
84
85 /* "-" prevents getopt from permuting argv[] and moving the verb away
86 * from argv[1]. Our interface to initrd promises it'll be there. */
87 while ((c = getopt_long(argc, argv, "-", options, NULL)) >= 0)
88 switch (c) {
89
90 case ARG_LOG_LEVEL:
91 r = log_set_max_level_from_string(optarg);
92 if (r < 0)
93 log_error("Failed to parse log level %s, ignoring.", optarg);
94
95 break;
96
97 case ARG_LOG_TARGET:
98 r = log_set_target_from_string(optarg);
99 if (r < 0)
100 log_error("Failed to parse log target %s, ignoring", optarg);
101
102 break;
103
104 case ARG_LOG_COLOR:
105
106 if (optarg) {
107 r = log_show_color_from_string(optarg);
108 if (r < 0)
109 log_error("Failed to parse log color setting %s, ignoring", optarg);
110 } else
111 log_show_color(true);
112
113 break;
114
115 case ARG_LOG_LOCATION:
116 if (optarg) {
117 r = log_show_location_from_string(optarg);
118 if (r < 0)
119 log_error("Failed to parse log location setting %s, ignoring", optarg);
120 } else
121 log_show_location(true);
122
123 break;
124
125 case ARG_EXIT_CODE:
126 r = safe_atou8(optarg, &arg_exit_code);
127 if (r < 0)
128 log_error("Failed to parse exit code %s, ignoring", optarg);
129
130 break;
131
132 case '\001':
133 if (!arg_verb)
134 arg_verb = optarg;
135 else
136 log_error("Excess arguments, ignoring");
137 break;
138
139 case '?':
140 return -EINVAL;
141
142 default:
143 assert_not_reached("Unhandled option code.");
144 }
145
146 if (!arg_verb) {
147 log_error("Verb argument missing.");
148 return -EINVAL;
149 }
150
151 return 0;
152 }
153
154 static int switch_root_initramfs(void) {
155 if (mount("/run/initramfs", "/run/initramfs", NULL, MS_BIND, NULL) < 0)
156 return log_error_errno(errno, "Failed to mount bind /run/initramfs on /run/initramfs: %m");
157
158 if (mount(NULL, "/run/initramfs", NULL, MS_PRIVATE, NULL) < 0)
159 return log_error_errno(errno, "Failed to make /run/initramfs private mount: %m");
160
161 /* switch_root with MS_BIND, because there might still be processes lurking around, which have open file descriptors.
162 * /run/initramfs/shutdown will take care of these.
163 * Also do not detach the old root, because /run/initramfs/shutdown needs to access it.
164 */
165 return switch_root("/run/initramfs", "/oldroot", false, MS_BIND);
166 }
167
168 /* Read the following fields from /proc/meminfo:
169 *
170 * NFS_Unstable
171 * Writeback
172 * Dirty
173 *
174 * Return true if the sum of these fields is greater than the previous
175 * value input. For all other issues, report the failure and indicate that
176 * the sync is not making progress.
177 */
178 static bool sync_making_progress(unsigned long long *prev_dirty) {
179 _cleanup_fclose_ FILE *f = NULL;
180 char line[LINE_MAX];
181 bool r = false;
182 unsigned long long val = 0;
183
184 f = fopen("/proc/meminfo", "re");
185 if (!f)
186 return log_warning_errno(errno, "Failed to open /proc/meminfo: %m");
187
188 FOREACH_LINE(line, f, log_warning_errno(errno, "Failed to parse /proc/meminfo: %m")) {
189 unsigned long long ull = 0;
190
191 if (!first_word(line, "NFS_Unstable:") && !first_word(line, "Writeback:") && !first_word(line, "Dirty:"))
192 continue;
193
194 errno = 0;
195 if (sscanf(line, "%*s %llu %*s", &ull) != 1) {
196 if (errno != 0)
197 log_warning_errno(errno, "Failed to parse /proc/meminfo: %m");
198 else
199 log_warning("Failed to parse /proc/meminfo");
200
201 return false;
202 }
203
204 val += ull;
205 }
206
207 r = *prev_dirty > val;
208
209 *prev_dirty = val;
210
211 return r;
212 }
213
214 static void sync_with_progress(void) {
215 unsigned long long dirty = ULONG_LONG_MAX;
216 unsigned checks;
217 pid_t pid;
218 int r;
219
220 BLOCK_SIGNALS(SIGCHLD);
221
222 /* Due to the possiblity of the sync operation hanging, we fork a child process and monitor the progress. If
223 * the timeout lapses, the assumption is that that particular sync stalled. */
224
225 r = asynchronous_sync(&pid);
226 if (r < 0) {
227 log_error_errno(r, "Failed to fork sync(): %m");
228 return;
229 }
230
231 log_info("Syncing filesystems and block devices.");
232
233 /* Start monitoring the sync operation. If more than
234 * SYNC_PROGRESS_ATTEMPTS lapse without progress being made,
235 * we assume that the sync is stalled */
236 for (checks = 0; checks < SYNC_PROGRESS_ATTEMPTS; checks++) {
237 r = wait_for_terminate_with_timeout(pid, SYNC_TIMEOUT_USEC);
238 if (r == 0)
239 /* Sync finished without error.
240 * (The sync itself does not return an error code) */
241 return;
242 else if (r == -ETIMEDOUT) {
243 /* Reset the check counter if the "Dirty" value is
244 * decreasing */
245 if (sync_making_progress(&dirty))
246 checks = 0;
247 } else {
248 log_error_errno(r, "Failed to sync filesystems and block devices: %m");
249 return;
250 }
251 }
252
253 /* Only reached in the event of a timeout. We should issue a kill
254 * to the stray process. */
255 log_error("Syncing filesystems and block devices - timed out, issuing SIGKILL to PID "PID_FMT".", pid);
256 (void) kill(pid, SIGKILL);
257 }
258
259 int main(int argc, char *argv[]) {
260 bool need_umount, need_swapoff, need_loop_detach, need_dm_detach;
261 bool in_container, use_watchdog = false;
262 _cleanup_free_ char *cgroup = NULL;
263 char *arguments[3];
264 unsigned retries;
265 int cmd, r;
266 static const char* const dirs[] = {SYSTEM_SHUTDOWN_PATH, NULL};
267 char *watchdog_device;
268
269 log_parse_environment();
270 r = parse_argv(argc, argv);
271 if (r < 0)
272 goto error;
273
274 /* journald will die if not gone yet. The log target defaults
275 * to console, but may have been changed by command line options. */
276
277 log_close_console(); /* force reopen of /dev/console */
278 log_open();
279
280 umask(0022);
281
282 if (getpid_cached() != 1) {
283 log_error("Not executed by init (PID 1).");
284 r = -EPERM;
285 goto error;
286 }
287
288 if (streq(arg_verb, "reboot"))
289 cmd = RB_AUTOBOOT;
290 else if (streq(arg_verb, "poweroff"))
291 cmd = RB_POWER_OFF;
292 else if (streq(arg_verb, "halt"))
293 cmd = RB_HALT_SYSTEM;
294 else if (streq(arg_verb, "kexec"))
295 cmd = LINUX_REBOOT_CMD_KEXEC;
296 else if (streq(arg_verb, "exit"))
297 cmd = 0; /* ignored, just checking that arg_verb is valid */
298 else {
299 r = -EINVAL;
300 log_error("Unknown action '%s'.", arg_verb);
301 goto error;
302 }
303
304 (void) cg_get_root_path(&cgroup);
305 in_container = detect_container() > 0;
306
307 use_watchdog = !!getenv("WATCHDOG_USEC");
308 watchdog_device = getenv("WATCHDOG_DEVICE");
309 if (watchdog_device) {
310 r = watchdog_set_device(watchdog_device);
311 if (r < 0)
312 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m",
313 watchdog_device);
314 }
315
316 /* Lock us into memory */
317 mlockall(MCL_CURRENT|MCL_FUTURE);
318
319 /* Synchronize everything that is not written to disk yet at this point already. This is a good idea so that
320 * slow IO is processed here already and the final process killing spree is not impacted by processes
321 * desperately trying to sync IO to disk within their timeout. Do not remove this sync, data corruption will
322 * result. */
323 if (!in_container)
324 sync_with_progress();
325
326 log_info("Sending SIGTERM to remaining processes...");
327 broadcast_signal(SIGTERM, true, true);
328
329 log_info("Sending SIGKILL to remaining processes...");
330 broadcast_signal(SIGKILL, true, false);
331
332 need_umount = !in_container;
333 need_swapoff = !in_container;
334 need_loop_detach = !in_container;
335 need_dm_detach = !in_container;
336
337 /* Unmount all mountpoints, swaps, and loopback devices */
338 for (retries = 0; retries < FINALIZE_ATTEMPTS; retries++) {
339 bool changed = false;
340
341 if (use_watchdog)
342 watchdog_ping();
343
344 /* Let's trim the cgroup tree on each iteration so
345 that we leave an empty cgroup tree around, so that
346 container managers get a nice notify event when we
347 are down */
348 if (cgroup)
349 cg_trim(SYSTEMD_CGROUP_CONTROLLER, cgroup, false);
350
351 if (need_umount) {
352 log_info("Unmounting file systems.");
353 r = umount_all(&changed);
354 if (r == 0) {
355 need_umount = false;
356 log_info("All filesystems unmounted.");
357 } else if (r > 0)
358 log_info("Not all file systems unmounted, %d left.", r);
359 else
360 log_error_errno(r, "Failed to unmount file systems: %m");
361 }
362
363 if (need_swapoff) {
364 log_info("Deactivating swaps.");
365 r = swapoff_all(&changed);
366 if (r == 0) {
367 need_swapoff = false;
368 log_info("All swaps deactivated.");
369 } else if (r > 0)
370 log_info("Not all swaps deactivated, %d left.", r);
371 else
372 log_error_errno(r, "Failed to deactivate swaps: %m");
373 }
374
375 if (need_loop_detach) {
376 log_info("Detaching loop devices.");
377 r = loopback_detach_all(&changed);
378 if (r == 0) {
379 need_loop_detach = false;
380 log_info("All loop devices detached.");
381 } else if (r > 0)
382 log_info("Not all loop devices detached, %d left.", r);
383 else
384 log_error_errno(r, "Failed to detach loop devices: %m");
385 }
386
387 if (need_dm_detach) {
388 log_info("Detaching DM devices.");
389 r = dm_detach_all(&changed);
390 if (r == 0) {
391 need_dm_detach = false;
392 log_info("All DM devices detached.");
393 } else if (r > 0)
394 log_info("Not all DM devices detached, %d left.", r);
395 else
396 log_error_errno(r, "Failed to detach DM devices: %m");
397 }
398
399 if (!need_umount && !need_swapoff && !need_loop_detach && !need_dm_detach) {
400 if (retries > 0)
401 log_info("All filesystems, swaps, loop devices, DM devices detached.");
402 /* Yay, done */
403 goto initrd_jump;
404 }
405
406 /* If in this iteration we didn't manage to
407 * unmount/deactivate anything, we simply give up */
408 if (!changed) {
409 log_info("Cannot finalize remaining%s%s%s%s continuing.",
410 need_umount ? " file systems," : "",
411 need_swapoff ? " swap devices," : "",
412 need_loop_detach ? " loop devices," : "",
413 need_dm_detach ? " DM devices," : "");
414 goto initrd_jump;
415 }
416
417 log_debug("After %u retries, couldn't finalize remaining %s%s%s%s trying again.",
418 retries + 1,
419 need_umount ? " file systems," : "",
420 need_swapoff ? " swap devices," : "",
421 need_loop_detach ? " loop devices," : "",
422 need_dm_detach ? " DM devices," : "");
423 }
424
425 log_error("Too many iterations, giving up.");
426
427 initrd_jump:
428
429 /* We're done with the watchdog. */
430 watchdog_free_device();
431
432 arguments[0] = NULL;
433 arguments[1] = arg_verb;
434 arguments[2] = NULL;
435 execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments);
436
437 if (!in_container && !in_initrd() &&
438 access("/run/initramfs/shutdown", X_OK) == 0) {
439 r = switch_root_initramfs();
440 if (r >= 0) {
441 argv[0] = (char*) "/shutdown";
442
443 setsid();
444 make_console_stdio();
445
446 log_info("Successfully changed into root pivot.\n"
447 "Returning to initrd...");
448
449 execv("/shutdown", argv);
450 log_error_errno(errno, "Failed to execute shutdown binary: %m");
451 } else
452 log_error_errno(r, "Failed to switch root to \"/run/initramfs\": %m");
453
454 }
455
456 if (need_umount || need_swapoff || need_loop_detach || need_dm_detach)
457 log_error("Failed to finalize %s%s%s%s ignoring",
458 need_umount ? " file systems," : "",
459 need_swapoff ? " swap devices," : "",
460 need_loop_detach ? " loop devices," : "",
461 need_dm_detach ? " DM devices," : "");
462
463 /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be
464 * sync'ed explicitly in advance. So let's do this here, but not needlessly slow down containers. Note that we
465 * sync'ed things already once above, but we did some more work since then which might have caused IO, hence
466 * let's do it once more. Do not remove this sync, data corruption will result. */
467 if (!in_container)
468 sync_with_progress();
469
470 if (streq(arg_verb, "exit")) {
471 if (in_container)
472 exit(arg_exit_code);
473 else {
474 /* We cannot exit() on the host, fallback on another
475 * method. */
476 cmd = RB_POWER_OFF;
477 }
478 }
479
480 switch (cmd) {
481
482 case LINUX_REBOOT_CMD_KEXEC:
483
484 if (!in_container) {
485 /* We cheat and exec kexec to avoid doing all its work */
486 log_info("Rebooting with kexec.");
487
488 r = safe_fork("(sd-kexec)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_LOG|FORK_WAIT, NULL);
489 if (r == 0) {
490 const char * const args[] = {
491 KEXEC, "-e", NULL
492 };
493
494 /* Child */
495
496 execv(args[0], (char * const *) args);
497 _exit(EXIT_FAILURE);
498 }
499
500 /* If we are still running, then the kexec can't have worked, let's fall through */
501 }
502
503 cmd = RB_AUTOBOOT;
504 _fallthrough_;
505 case RB_AUTOBOOT:
506
507 if (!in_container) {
508 _cleanup_free_ char *param = NULL;
509
510 r = read_one_line_file("/run/systemd/reboot-param", &param);
511 if (r < 0 && r != -ENOENT)
512 log_warning_errno(r, "Failed to read reboot parameter file: %m");
513
514 if (!isempty(param)) {
515 log_info("Rebooting with argument '%s'.", param);
516 syscall(SYS_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_RESTART2, param);
517 log_warning_errno(errno, "Failed to reboot with parameter, retrying without: %m");
518 }
519 }
520
521 log_info("Rebooting.");
522 break;
523
524 case RB_POWER_OFF:
525 log_info("Powering off.");
526 break;
527
528 case RB_HALT_SYSTEM:
529 log_info("Halting system.");
530 break;
531
532 default:
533 assert_not_reached("Unknown magic");
534 }
535
536 reboot(cmd);
537 if (errno == EPERM && in_container) {
538 /* If we are in a container, and we lacked
539 * CAP_SYS_BOOT just exit, this will kill our
540 * container for good. */
541 log_info("Exiting container.");
542 exit(EXIT_SUCCESS);
543 }
544
545 r = log_error_errno(errno, "Failed to invoke reboot(): %m");
546
547 error:
548 log_emergency_errno(r, "Critical error while doing system shutdown: %m");
549 freeze();
550 }