]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/shutdown.c
shutdown: always pass errno to logging functions
[thirdparty/systemd.git] / src / core / shutdown.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 ProFUSION embedded systems
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <getopt.h>
23 #include <linux/reboot.h>
24 #include <signal.h>
25 #include <stdbool.h>
26 #include <stdlib.h>
27 #include <sys/mman.h>
28 #include <sys/mount.h>
29 #include <sys/reboot.h>
30 #include <sys/stat.h>
31 #include <unistd.h>
32
33 #include "alloc-util.h"
34 #include "async.h"
35 #include "cgroup-util.h"
36 #include "def.h"
37 #include "exec-util.h"
38 #include "fd-util.h"
39 #include "fileio.h"
40 #include "killall.h"
41 #include "log.h"
42 #include "missing.h"
43 #include "parse-util.h"
44 #include "process-util.h"
45 #include "signal-util.h"
46 #include "string-util.h"
47 #include "switch-root.h"
48 #include "terminal-util.h"
49 #include "umount.h"
50 #include "util.h"
51 #include "virt.h"
52 #include "watchdog.h"
53
54 #define FINALIZE_ATTEMPTS 50
55
56 #define SYNC_PROGRESS_ATTEMPTS 3
57 #define SYNC_TIMEOUT_USEC (10*USEC_PER_SEC)
58
59 static char* arg_verb;
60 static uint8_t arg_exit_code;
61 static usec_t arg_timeout = DEFAULT_TIMEOUT_USEC;
62
63 static int parse_argv(int argc, char *argv[]) {
64 enum {
65 ARG_LOG_LEVEL = 0x100,
66 ARG_LOG_TARGET,
67 ARG_LOG_COLOR,
68 ARG_LOG_LOCATION,
69 ARG_EXIT_CODE,
70 ARG_TIMEOUT,
71 };
72
73 static const struct option options[] = {
74 { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
75 { "log-target", required_argument, NULL, ARG_LOG_TARGET },
76 { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
77 { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
78 { "exit-code", required_argument, NULL, ARG_EXIT_CODE },
79 { "timeout", required_argument, NULL, ARG_TIMEOUT },
80 {}
81 };
82
83 int c, r;
84
85 assert(argc >= 1);
86 assert(argv);
87
88 /* "-" prevents getopt from permuting argv[] and moving the verb away
89 * from argv[1]. Our interface to initrd promises it'll be there. */
90 while ((c = getopt_long(argc, argv, "-", options, NULL)) >= 0)
91 switch (c) {
92
93 case ARG_LOG_LEVEL:
94 r = log_set_max_level_from_string(optarg);
95 if (r < 0)
96 log_error_errno(r, "Failed to parse log level %s, ignoring.", optarg);
97
98 break;
99
100 case ARG_LOG_TARGET:
101 r = log_set_target_from_string(optarg);
102 if (r < 0)
103 log_error_errno(r, "Failed to parse log target %s, ignoring", optarg);
104
105 break;
106
107 case ARG_LOG_COLOR:
108
109 if (optarg) {
110 r = log_show_color_from_string(optarg);
111 if (r < 0)
112 log_error_errno(r, "Failed to parse log color setting %s, ignoring", optarg);
113 } else
114 log_show_color(true);
115
116 break;
117
118 case ARG_LOG_LOCATION:
119 if (optarg) {
120 r = log_show_location_from_string(optarg);
121 if (r < 0)
122 log_error_errno(r, "Failed to parse log location setting %s, ignoring", optarg);
123 } else
124 log_show_location(true);
125
126 break;
127
128 case ARG_EXIT_CODE:
129 r = safe_atou8(optarg, &arg_exit_code);
130 if (r < 0)
131 log_error_errno(r, "Failed to parse exit code %s, ignoring", optarg);
132
133 break;
134
135 case ARG_TIMEOUT:
136 r = parse_sec(optarg, &arg_timeout);
137 if (r < 0)
138 log_error_errno(r, "Failed to parse shutdown timeout %s, ignoring", optarg);
139
140 break;
141
142 case '\001':
143 if (!arg_verb)
144 arg_verb = optarg;
145 else
146 log_error("Excess arguments, ignoring");
147 break;
148
149 case '?':
150 return -EINVAL;
151
152 default:
153 assert_not_reached("Unhandled option code.");
154 }
155
156 if (!arg_verb) {
157 log_error("Verb argument missing.");
158 return -EINVAL;
159 }
160
161 return 0;
162 }
163
164 static int switch_root_initramfs(void) {
165 if (mount("/run/initramfs", "/run/initramfs", NULL, MS_BIND, NULL) < 0)
166 return log_error_errno(errno, "Failed to mount bind /run/initramfs on /run/initramfs: %m");
167
168 if (mount(NULL, "/run/initramfs", NULL, MS_PRIVATE, NULL) < 0)
169 return log_error_errno(errno, "Failed to make /run/initramfs private mount: %m");
170
171 /* switch_root with MS_BIND, because there might still be processes lurking around, which have open file descriptors.
172 * /run/initramfs/shutdown will take care of these.
173 * Also do not detach the old root, because /run/initramfs/shutdown needs to access it.
174 */
175 return switch_root("/run/initramfs", "/oldroot", false, MS_BIND);
176 }
177
178 /* Read the following fields from /proc/meminfo:
179 *
180 * NFS_Unstable
181 * Writeback
182 * Dirty
183 *
184 * Return true if the sum of these fields is greater than the previous
185 * value input. For all other issues, report the failure and indicate that
186 * the sync is not making progress.
187 */
188 static bool sync_making_progress(unsigned long long *prev_dirty) {
189 _cleanup_fclose_ FILE *f = NULL;
190 char line[LINE_MAX];
191 bool r = false;
192 unsigned long long val = 0;
193
194 f = fopen("/proc/meminfo", "re");
195 if (!f)
196 return log_warning_errno(errno, "Failed to open /proc/meminfo: %m");
197
198 FOREACH_LINE(line, f, log_warning_errno(errno, "Failed to parse /proc/meminfo: %m")) {
199 unsigned long long ull = 0;
200
201 if (!first_word(line, "NFS_Unstable:") && !first_word(line, "Writeback:") && !first_word(line, "Dirty:"))
202 continue;
203
204 errno = 0;
205 if (sscanf(line, "%*s %llu %*s", &ull) != 1) {
206 if (errno != 0)
207 log_warning_errno(errno, "Failed to parse /proc/meminfo: %m");
208 else
209 log_warning("Failed to parse /proc/meminfo");
210
211 return false;
212 }
213
214 val += ull;
215 }
216
217 r = *prev_dirty > val;
218
219 *prev_dirty = val;
220
221 return r;
222 }
223
224 static void sync_with_progress(void) {
225 unsigned long long dirty = ULONG_LONG_MAX;
226 unsigned checks;
227 pid_t pid;
228 int r;
229
230 BLOCK_SIGNALS(SIGCHLD);
231
232 /* Due to the possiblity of the sync operation hanging, we fork a child process and monitor the progress. If
233 * the timeout lapses, the assumption is that that particular sync stalled. */
234
235 r = asynchronous_sync(&pid);
236 if (r < 0) {
237 log_error_errno(r, "Failed to fork sync(): %m");
238 return;
239 }
240
241 log_info("Syncing filesystems and block devices.");
242
243 /* Start monitoring the sync operation. If more than
244 * SYNC_PROGRESS_ATTEMPTS lapse without progress being made,
245 * we assume that the sync is stalled */
246 for (checks = 0; checks < SYNC_PROGRESS_ATTEMPTS; checks++) {
247 r = wait_for_terminate_with_timeout(pid, SYNC_TIMEOUT_USEC);
248 if (r == 0)
249 /* Sync finished without error.
250 * (The sync itself does not return an error code) */
251 return;
252 else if (r == -ETIMEDOUT) {
253 /* Reset the check counter if the "Dirty" value is
254 * decreasing */
255 if (sync_making_progress(&dirty))
256 checks = 0;
257 } else {
258 log_error_errno(r, "Failed to sync filesystems and block devices: %m");
259 return;
260 }
261 }
262
263 /* Only reached in the event of a timeout. We should issue a kill
264 * to the stray process. */
265 log_error("Syncing filesystems and block devices - timed out, issuing SIGKILL to PID "PID_FMT".", pid);
266 (void) kill(pid, SIGKILL);
267 }
268
269 int main(int argc, char *argv[]) {
270 bool need_umount, need_swapoff, need_loop_detach, need_dm_detach;
271 bool in_container, use_watchdog = false;
272 _cleanup_free_ char *cgroup = NULL;
273 char *arguments[3];
274 unsigned retries;
275 int cmd, r;
276 static const char* const dirs[] = {SYSTEM_SHUTDOWN_PATH, NULL};
277 char *watchdog_device;
278
279 log_parse_environment();
280 r = parse_argv(argc, argv);
281 if (r < 0)
282 goto error;
283
284 /* journald will die if not gone yet. The log target defaults
285 * to console, but may have been changed by command line options. */
286
287 log_set_prohibit_ipc(true);
288 log_open();
289
290 umask(0022);
291
292 if (getpid_cached() != 1) {
293 log_error("Not executed by init (PID 1).");
294 r = -EPERM;
295 goto error;
296 }
297
298 if (streq(arg_verb, "reboot"))
299 cmd = RB_AUTOBOOT;
300 else if (streq(arg_verb, "poweroff"))
301 cmd = RB_POWER_OFF;
302 else if (streq(arg_verb, "halt"))
303 cmd = RB_HALT_SYSTEM;
304 else if (streq(arg_verb, "kexec"))
305 cmd = LINUX_REBOOT_CMD_KEXEC;
306 else if (streq(arg_verb, "exit"))
307 cmd = 0; /* ignored, just checking that arg_verb is valid */
308 else {
309 r = -EINVAL;
310 log_error("Unknown action '%s'.", arg_verb);
311 goto error;
312 }
313
314 (void) cg_get_root_path(&cgroup);
315 in_container = detect_container() > 0;
316
317 use_watchdog = !!getenv("WATCHDOG_USEC");
318 watchdog_device = getenv("WATCHDOG_DEVICE");
319 if (watchdog_device) {
320 r = watchdog_set_device(watchdog_device);
321 if (r < 0)
322 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m",
323 watchdog_device);
324 }
325
326 /* Lock us into memory */
327 mlockall(MCL_CURRENT|MCL_FUTURE);
328
329 /* Synchronize everything that is not written to disk yet at this point already. This is a good idea so that
330 * slow IO is processed here already and the final process killing spree is not impacted by processes
331 * desperately trying to sync IO to disk within their timeout. Do not remove this sync, data corruption will
332 * result. */
333 if (!in_container)
334 sync_with_progress();
335
336 disable_coredumps();
337
338 log_info("Sending SIGTERM to remaining processes...");
339 broadcast_signal(SIGTERM, true, true, arg_timeout);
340
341 log_info("Sending SIGKILL to remaining processes...");
342 broadcast_signal(SIGKILL, true, false, arg_timeout);
343
344 need_umount = !in_container;
345 need_swapoff = !in_container;
346 need_loop_detach = !in_container;
347 need_dm_detach = !in_container;
348
349 /* Unmount all mountpoints, swaps, and loopback devices */
350 for (retries = 0; retries < FINALIZE_ATTEMPTS; retries++) {
351 bool changed = false;
352
353 if (use_watchdog)
354 watchdog_ping();
355
356 /* Let's trim the cgroup tree on each iteration so
357 that we leave an empty cgroup tree around, so that
358 container managers get a nice notify event when we
359 are down */
360 if (cgroup)
361 cg_trim(SYSTEMD_CGROUP_CONTROLLER, cgroup, false);
362
363 if (need_umount) {
364 log_info("Unmounting file systems.");
365 r = umount_all(&changed);
366 if (r == 0) {
367 need_umount = false;
368 log_info("All filesystems unmounted.");
369 } else if (r > 0)
370 log_info("Not all file systems unmounted, %d left.", r);
371 else
372 log_error_errno(r, "Failed to unmount file systems: %m");
373 }
374
375 if (need_swapoff) {
376 log_info("Deactivating swaps.");
377 r = swapoff_all(&changed);
378 if (r == 0) {
379 need_swapoff = false;
380 log_info("All swaps deactivated.");
381 } else if (r > 0)
382 log_info("Not all swaps deactivated, %d left.", r);
383 else
384 log_error_errno(r, "Failed to deactivate swaps: %m");
385 }
386
387 if (need_loop_detach) {
388 log_info("Detaching loop devices.");
389 r = loopback_detach_all(&changed);
390 if (r == 0) {
391 need_loop_detach = false;
392 log_info("All loop devices detached.");
393 } else if (r > 0)
394 log_info("Not all loop devices detached, %d left.", r);
395 else
396 log_error_errno(r, "Failed to detach loop devices: %m");
397 }
398
399 if (need_dm_detach) {
400 log_info("Detaching DM devices.");
401 r = dm_detach_all(&changed);
402 if (r == 0) {
403 need_dm_detach = false;
404 log_info("All DM devices detached.");
405 } else if (r > 0)
406 log_info("Not all DM devices detached, %d left.", r);
407 else
408 log_error_errno(r, "Failed to detach DM devices: %m");
409 }
410
411 if (!need_umount && !need_swapoff && !need_loop_detach && !need_dm_detach) {
412 if (retries > 0)
413 log_info("All filesystems, swaps, loop devices, DM devices detached.");
414 /* Yay, done */
415 goto initrd_jump;
416 }
417
418 /* If in this iteration we didn't manage to
419 * unmount/deactivate anything, we simply give up */
420 if (!changed) {
421 log_info("Cannot finalize remaining%s%s%s%s continuing.",
422 need_umount ? " file systems," : "",
423 need_swapoff ? " swap devices," : "",
424 need_loop_detach ? " loop devices," : "",
425 need_dm_detach ? " DM devices," : "");
426 goto initrd_jump;
427 }
428
429 log_debug("After %u retries, couldn't finalize remaining %s%s%s%s trying again.",
430 retries + 1,
431 need_umount ? " file systems," : "",
432 need_swapoff ? " swap devices," : "",
433 need_loop_detach ? " loop devices," : "",
434 need_dm_detach ? " DM devices," : "");
435 }
436
437 log_error("Too many iterations, giving up.");
438
439 initrd_jump:
440
441 /* We're done with the watchdog. */
442 watchdog_free_device();
443
444 arguments[0] = NULL;
445 arguments[1] = arg_verb;
446 arguments[2] = NULL;
447 execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments);
448
449 if (!in_container && !in_initrd() &&
450 access("/run/initramfs/shutdown", X_OK) == 0) {
451 r = switch_root_initramfs();
452 if (r >= 0) {
453 argv[0] = (char*) "/shutdown";
454
455 setsid();
456 make_console_stdio();
457
458 log_info("Successfully changed into root pivot.\n"
459 "Returning to initrd...");
460
461 execv("/shutdown", argv);
462 log_error_errno(errno, "Failed to execute shutdown binary: %m");
463 } else
464 log_error_errno(r, "Failed to switch root to \"/run/initramfs\": %m");
465
466 }
467
468 if (need_umount || need_swapoff || need_loop_detach || need_dm_detach)
469 log_error("Failed to finalize %s%s%s%s ignoring",
470 need_umount ? " file systems," : "",
471 need_swapoff ? " swap devices," : "",
472 need_loop_detach ? " loop devices," : "",
473 need_dm_detach ? " DM devices," : "");
474
475 /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be
476 * sync'ed explicitly in advance. So let's do this here, but not needlessly slow down containers. Note that we
477 * sync'ed things already once above, but we did some more work since then which might have caused IO, hence
478 * let's do it once more. Do not remove this sync, data corruption will result. */
479 if (!in_container)
480 sync_with_progress();
481
482 if (streq(arg_verb, "exit")) {
483 if (in_container)
484 exit(arg_exit_code);
485 else {
486 /* We cannot exit() on the host, fallback on another
487 * method. */
488 cmd = RB_POWER_OFF;
489 }
490 }
491
492 switch (cmd) {
493
494 case LINUX_REBOOT_CMD_KEXEC:
495
496 if (!in_container) {
497 /* We cheat and exec kexec to avoid doing all its work */
498 log_info("Rebooting with kexec.");
499
500 r = safe_fork("(sd-kexec)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_LOG|FORK_WAIT, NULL);
501 if (r == 0) {
502 const char * const args[] = {
503 KEXEC, "-e", NULL
504 };
505
506 /* Child */
507
508 execv(args[0], (char * const *) args);
509 _exit(EXIT_FAILURE);
510 }
511
512 /* If we are still running, then the kexec can't have worked, let's fall through */
513 }
514
515 cmd = RB_AUTOBOOT;
516 _fallthrough_;
517 case RB_AUTOBOOT:
518
519 if (!in_container) {
520 _cleanup_free_ char *param = NULL;
521
522 r = read_one_line_file("/run/systemd/reboot-param", &param);
523 if (r < 0 && r != -ENOENT)
524 log_warning_errno(r, "Failed to read reboot parameter file: %m");
525
526 if (!isempty(param)) {
527 log_info("Rebooting with argument '%s'.", param);
528 syscall(SYS_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_RESTART2, param);
529 log_warning_errno(errno, "Failed to reboot with parameter, retrying without: %m");
530 }
531 }
532
533 log_info("Rebooting.");
534 break;
535
536 case RB_POWER_OFF:
537 log_info("Powering off.");
538 break;
539
540 case RB_HALT_SYSTEM:
541 log_info("Halting system.");
542 break;
543
544 default:
545 assert_not_reached("Unknown magic");
546 }
547
548 reboot(cmd);
549 if (errno == EPERM && in_container) {
550 /* If we are in a container, and we lacked
551 * CAP_SYS_BOOT just exit, this will kill our
552 * container for good. */
553 log_info("Exiting container.");
554 exit(EXIT_SUCCESS);
555 }
556
557 r = log_error_errno(errno, "Failed to invoke reboot(): %m");
558
559 error:
560 log_emergency_errno(r, "Critical error while doing system shutdown: %m");
561 freeze();
562 }