1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
9 #include <sys/utsname.h>
12 #if HAVE_VALGRIND_VALGRIND_H
13 # include <valgrind/valgrind.h>
17 #include "sd-daemon.h"
18 #include "sd-messages.h"
20 #include "alloc-util.h"
21 #include "apparmor-setup.h"
22 #include "architecture.h"
23 #include "argv-util.h"
25 #include "bus-error.h"
26 #include "capability-util.h"
27 #include "cgroup-setup.h"
29 #include "clock-util.h"
30 #include "clock-warp.h"
31 #include "conf-parser.h"
32 #include "confidential-virt.h"
33 #include "constants.h"
35 #include "coredump-util.h"
36 #include "cpu-set-util.h"
37 #include "crash-handler.h"
39 #include "dbus-manager.h"
40 #include "dev-setup.h"
41 #include "efi-random.h"
42 #include "emergency-action.h"
48 #include "format-util.h"
49 #include "getopt-defs.h"
50 #include "hexdecoct.h"
51 #include "hostname-setup.h"
52 #include "id128-util.h"
53 #include "ima-setup.h"
54 #include "import-creds.h"
55 #include "initrd-util.h"
57 #include "ipe-setup.h"
59 #include "kmod-setup.h"
60 #include "label-util.h"
61 #include "limits-util.h"
62 #include "load-fragment.h"
64 #include "loopback-setup.h"
65 #include "machine-id-setup.h"
68 #include "manager-dump.h"
69 #include "manager-serialize.h"
70 #include "mkdir-label.h"
71 #include "mount-setup.h"
72 #include "mount-util.h"
74 #include "osc-context.h"
76 #include "parse-argument.h"
77 #include "parse-util.h"
78 #include "path-util.h"
79 #include "pretty-print.h"
80 #include "proc-cmdline.h"
81 #include "process-util.h"
82 #include "random-util.h"
83 #include "rlimit-util.h"
85 #include "seccomp-util.h"
86 #include "selinux-setup.h"
87 #include "selinux-util.h"
88 #include "serialize.h"
90 #include "signal-util.h"
91 #include "smack-setup.h"
93 #include "stat-util.h"
94 #include "stdio-util.h"
96 #include "switch-root.h"
97 #include "sysctl-util.h"
98 #include "terminal-util.h"
99 #include "time-util.h"
100 #include "umask-util.h"
101 #include "unit-name.h"
102 #include "user-util.h"
105 #include "watchdog.h"
107 #if HAS_FEATURE_ADDRESS_SANITIZER
108 #include <sanitizer/lsan_interface.h>
116 ACTION_DUMP_CONFIGURATION_ITEMS
,
117 ACTION_DUMP_BUS_PROPERTIES
,
118 ACTION_BUS_INTROSPECT
,
119 } arg_action
= ACTION_RUN
;
121 static const char *arg_bus_introspect
= NULL
;
123 /* Those variables are initialized to 0 automatically, so we avoid uninitialized memory access. Real
124 * defaults are assigned in reset_arguments() below. */
125 static char *arg_default_unit
;
126 static RuntimeScope arg_runtime_scope
;
129 bool arg_crash_shell
;
130 CrashAction arg_crash_action
;
131 static char *arg_confirm_spawn
;
132 static ShowStatus arg_show_status
;
133 static StatusUnitFormat arg_status_unit_format
;
134 static bool arg_switched_root
;
135 static PagerFlags arg_pager_flags
;
136 static bool arg_service_watchdogs
;
137 static UnitDefaults arg_defaults
;
138 static usec_t arg_runtime_watchdog
;
139 static usec_t arg_reboot_watchdog
;
140 static usec_t arg_kexec_watchdog
;
141 static usec_t arg_pretimeout_watchdog
;
142 static char *arg_early_core_pattern
;
143 static char *arg_watchdog_pretimeout_governor
;
144 static char *arg_watchdog_device
;
145 static char **arg_default_environment
;
146 static char **arg_manager_environment
;
147 static uint64_t arg_capability_bounding_set
;
148 static bool arg_no_new_privs
;
149 static int arg_protect_system
;
150 static nsec_t arg_timer_slack_nsec
;
151 static Set
* arg_syscall_archs
;
152 static FILE* arg_serialization
;
153 static sd_id128_t arg_machine_id
;
154 static bool arg_machine_id_from_firmware
= false;
155 static EmergencyAction arg_cad_burst_action
;
156 static CPUSet arg_cpu_affinity
;
157 static NUMAPolicy arg_numa_policy
;
158 static usec_t arg_clock_usec
;
159 static void *arg_random_seed
;
160 static size_t arg_random_seed_size
;
161 static usec_t arg_reload_limit_interval_sec
;
162 static unsigned arg_reload_limit_burst
;
164 /* A copy of the original environment block */
165 static char **saved_env
= NULL
;
167 static int parse_configuration(const struct rlimit
*saved_rlimit_nofile
,
168 const struct rlimit
*saved_rlimit_memlock
);
170 static DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_crash_action
, crash_action
, CrashAction
, CRASH_FREEZE
);
172 static int manager_find_user_config_paths(char ***ret_files
, char ***ret_dirs
) {
173 _cleanup_free_
char *base
= NULL
;
174 _cleanup_strv_free_
char **files
= NULL
, **dirs
= NULL
;
177 r
= xdg_user_config_dir("/systemd", &base
);
181 r
= strv_extendf(&files
, "%s/user.conf", base
);
185 r
= strv_extend(&files
, PKGSYSCONFDIR
"/user.conf");
189 r
= strv_consume(&dirs
, TAKE_PTR(base
));
193 r
= strv_extend_strv(&dirs
, CONF_PATHS_STRV("systemd"), false);
197 *ret_files
= TAKE_PTR(files
);
198 *ret_dirs
= TAKE_PTR(dirs
);
202 static int save_console_winsize_in_environment(int tty_fd
) {
207 struct winsize ws
= {};
208 if (ioctl(tty_fd
, TIOCGWINSZ
, &ws
) < 0) {
209 log_debug_errno(errno
, "Failed to acquire console window size, ignoring.");
213 if (ws
.ws_col
<= 0 && ws
.ws_row
<= 0) {
214 log_debug("No console window size set, ignoring.");
218 r
= setenvf("COLUMNS", /* overwrite= */ true, "%u", ws
.ws_col
);
220 log_debug_errno(r
, "Failed to set $COLUMNS, ignoring: %m");
224 r
= setenvf("LINES", /* overwrite= */ true, "%u", ws
.ws_row
);
226 log_debug_errno(r
, "Failed to set $LINES, ignoring: %m");
230 log_debug("Recorded console dimensions in environment: $COLUMNS=%u $LINES=%u.", ws
.ws_col
, ws
.ws_row
);
234 (void) unsetenv("COLUMNS");
235 (void) unsetenv("LINES");
239 static int console_setup(void) {
241 if (getpid_cached() != 1)
244 _cleanup_close_
int tty_fd
= -EBADF
;
246 tty_fd
= open_terminal("/dev/console", O_RDWR
|O_NOCTTY
|O_CLOEXEC
);
248 return log_error_errno(tty_fd
, "Failed to open %s: %m", "/dev/console");
250 /* We don't want to force text mode. Plymouth may be showing pictures already from initrd. */
251 reset_dev_console_fd(tty_fd
, /* switch_to_text= */ false);
253 save_console_winsize_in_environment(tty_fd
);
258 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
263 if (STR_IN_SET(key
, "systemd.unit", "rd.systemd.unit")) {
265 if (proc_cmdline_value_missing(key
, value
))
268 if (!unit_name_is_valid(value
, UNIT_NAME_PLAIN
|UNIT_NAME_INSTANCE
))
269 log_warning("Unit name specified on %s= is not valid, ignoring: %s", key
, value
);
270 else if (in_initrd() == !!startswith(key
, "rd."))
271 return free_and_strdup_warn(&arg_default_unit
, value
);
273 } else if (proc_cmdline_key_streq(key
, "systemd.dump_core")) {
275 r
= value
? parse_boolean(value
) : true;
277 log_warning_errno(r
, "Failed to parse dump core switch %s, ignoring: %m", value
);
281 } else if (proc_cmdline_key_streq(key
, "systemd.early_core_pattern")) {
283 if (proc_cmdline_value_missing(key
, value
))
286 if (path_is_absolute(value
))
287 (void) parse_path_argument(value
, false, &arg_early_core_pattern
);
289 log_warning("Specified core pattern '%s' is not an absolute path, ignoring.", value
);
291 } else if (proc_cmdline_key_streq(key
, "systemd.crash_chvt")) {
294 arg_crash_chvt
= 0; /* turn on */
296 r
= parse_crash_chvt(value
, &arg_crash_chvt
);
298 log_warning_errno(r
, "Failed to parse crash chvt switch %s, ignoring: %m", value
);
301 } else if (proc_cmdline_key_streq(key
, "systemd.crash_shell")) {
303 r
= value
? parse_boolean(value
) : true;
305 log_warning_errno(r
, "Failed to parse crash shell switch %s, ignoring: %m", value
);
309 } else if (proc_cmdline_key_streq(key
, "systemd.crash_reboot")) {
311 r
= value
? parse_boolean(value
) : true;
313 log_warning_errno(r
, "Failed to parse crash reboot switch %s, ignoring: %m", value
);
315 arg_crash_action
= r
? CRASH_REBOOT
: CRASH_FREEZE
;
317 } else if (proc_cmdline_key_streq(key
, "systemd.crash_action")) {
319 if (proc_cmdline_value_missing(key
, value
))
322 r
= crash_action_from_string(value
);
324 log_warning_errno(r
, "Failed to parse crash action switch %s, ignoring: %m", value
);
326 arg_crash_action
= r
;
328 } else if (proc_cmdline_key_streq(key
, "systemd.confirm_spawn")) {
331 r
= parse_confirm_spawn(value
, &s
);
333 log_warning_errno(r
, "Failed to parse confirm_spawn switch %s, ignoring: %m", value
);
335 free_and_replace(arg_confirm_spawn
, s
);
337 } else if (proc_cmdline_key_streq(key
, "systemd.service_watchdogs")) {
339 r
= value
? parse_boolean(value
) : true;
341 log_warning_errno(r
, "Failed to parse service watchdog switch %s, ignoring: %m", value
);
343 arg_service_watchdogs
= r
;
345 } else if (proc_cmdline_key_streq(key
, "systemd.show_status")) {
348 r
= parse_show_status(value
, &arg_show_status
);
350 log_warning_errno(r
, "Failed to parse show status switch %s, ignoring: %m", value
);
352 arg_show_status
= SHOW_STATUS_YES
;
354 } else if (proc_cmdline_key_streq(key
, "systemd.status_unit_format")) {
356 if (proc_cmdline_value_missing(key
, value
))
359 r
= status_unit_format_from_string(value
);
361 log_warning_errno(r
, "Failed to parse %s=%s, ignoring: %m", key
, value
);
363 arg_status_unit_format
= r
;
365 } else if (proc_cmdline_key_streq(key
, "systemd.default_standard_output")) {
367 if (proc_cmdline_value_missing(key
, value
))
370 r
= exec_output_from_string(value
);
372 log_warning_errno(r
, "Failed to parse default standard output switch %s, ignoring: %m", value
);
374 arg_defaults
.std_output
= r
;
376 } else if (proc_cmdline_key_streq(key
, "systemd.default_standard_error")) {
378 if (proc_cmdline_value_missing(key
, value
))
381 r
= exec_output_from_string(value
);
383 log_warning_errno(r
, "Failed to parse default standard error switch %s, ignoring: %m", value
);
385 arg_defaults
.std_error
= r
;
387 } else if (streq(key
, "systemd.setenv")) {
389 if (proc_cmdline_value_missing(key
, value
))
392 if (!env_assignment_is_valid(value
))
393 log_warning("Environment variable assignment '%s' is not valid. Ignoring.", value
);
395 r
= strv_env_replace_strdup(&arg_default_environment
, value
);
400 } else if (proc_cmdline_key_streq(key
, "systemd.machine_id")) {
402 if (proc_cmdline_value_missing(key
, value
))
405 if (streq(value
, "firmware"))
406 arg_machine_id_from_firmware
= true;
408 r
= id128_from_string_nonzero(value
, &arg_machine_id
);
410 log_warning_errno(r
, "MachineID '%s' is not valid, ignoring: %m", value
);
412 arg_machine_id_from_firmware
= false;
414 } else if (proc_cmdline_key_streq(key
, "systemd.default_timeout_start_sec")) {
416 if (proc_cmdline_value_missing(key
, value
))
419 r
= parse_sec(value
, &arg_defaults
.timeout_start_usec
);
421 log_warning_errno(r
, "Failed to parse default start timeout '%s', ignoring: %m", value
);
423 if (arg_defaults
.timeout_start_usec
<= 0)
424 arg_defaults
.timeout_start_usec
= USEC_INFINITY
;
426 } else if (proc_cmdline_key_streq(key
, "systemd.default_device_timeout_sec")) {
428 if (proc_cmdline_value_missing(key
, value
))
431 r
= parse_sec(value
, &arg_defaults
.device_timeout_usec
);
433 log_warning_errno(r
, "Failed to parse default device timeout '%s', ignoring: %m", value
);
435 if (arg_defaults
.device_timeout_usec
<= 0)
436 arg_defaults
.device_timeout_usec
= USEC_INFINITY
;
438 } else if (proc_cmdline_key_streq(key
, "systemd.cpu_affinity")) {
440 if (proc_cmdline_value_missing(key
, value
))
443 r
= parse_cpu_set(value
, &arg_cpu_affinity
);
445 log_warning_errno(r
, "Failed to parse CPU affinity mask '%s', ignoring: %m", value
);
447 } else if (proc_cmdline_key_streq(key
, "systemd.watchdog_device")) {
449 if (proc_cmdline_value_missing(key
, value
))
452 (void) parse_path_argument(value
, false, &arg_watchdog_device
);
454 } else if (proc_cmdline_key_streq(key
, "systemd.watchdog_sec")) {
456 if (proc_cmdline_value_missing(key
, value
))
459 if (streq(value
, "default"))
460 arg_runtime_watchdog
= USEC_INFINITY
;
461 else if (streq(value
, "off"))
462 arg_runtime_watchdog
= 0;
464 r
= parse_sec(value
, &arg_runtime_watchdog
);
466 log_warning_errno(r
, "Failed to parse systemd.watchdog_sec= argument '%s', ignoring: %m", value
);
471 arg_kexec_watchdog
= arg_reboot_watchdog
= arg_runtime_watchdog
;
473 } else if (proc_cmdline_key_streq(key
, "systemd.watchdog_pre_sec")) {
475 if (proc_cmdline_value_missing(key
, value
))
478 if (streq(value
, "default"))
479 arg_pretimeout_watchdog
= USEC_INFINITY
;
480 else if (streq(value
, "off"))
481 arg_pretimeout_watchdog
= 0;
483 r
= parse_sec(value
, &arg_pretimeout_watchdog
);
485 log_warning_errno(r
, "Failed to parse systemd.watchdog_pre_sec= argument '%s', ignoring: %m", value
);
490 } else if (proc_cmdline_key_streq(key
, "systemd.watchdog_pretimeout_governor")) {
492 if (proc_cmdline_value_missing(key
, value
) || isempty(value
)) {
493 arg_watchdog_pretimeout_governor
= mfree(arg_watchdog_pretimeout_governor
);
497 if (!string_is_safe(value
)) {
498 log_warning("Watchdog pretimeout governor '%s' is not valid, ignoring.", value
);
502 return free_and_strdup_warn(&arg_watchdog_pretimeout_governor
, value
);
504 } else if (proc_cmdline_key_streq(key
, "systemd.clock_usec")) {
506 if (proc_cmdline_value_missing(key
, value
))
509 r
= safe_atou64(value
, &arg_clock_usec
);
511 log_warning_errno(r
, "Failed to parse systemd.clock_usec= argument, ignoring: %s", value
);
513 } else if (proc_cmdline_key_streq(key
, "systemd.random_seed")) {
517 if (proc_cmdline_value_missing(key
, value
))
520 r
= unbase64mem(value
, &p
, &sz
);
522 log_warning_errno(r
, "Failed to parse systemd.random_seed= argument, ignoring: %s", value
);
524 free(arg_random_seed
);
525 arg_random_seed
= sz
> 0 ? p
: mfree(p
);
526 arg_random_seed_size
= sz
;
528 } else if (proc_cmdline_key_streq(key
, "systemd.reload_limit_interval_sec")) {
530 if (proc_cmdline_value_missing(key
, value
))
533 r
= parse_sec(value
, &arg_reload_limit_interval_sec
);
535 log_warning_errno(r
, "Failed to parse systemd.reload_limit_interval_sec= argument '%s', ignoring: %m", value
);
539 } else if (proc_cmdline_key_streq(key
, "systemd.reload_limit_burst")) {
541 if (proc_cmdline_value_missing(key
, value
))
544 r
= safe_atou(value
, &arg_reload_limit_burst
);
546 log_warning_errno(r
, "Failed to parse systemd.reload_limit_burst= argument '%s', ignoring: %m", value
);
550 } else if (streq(key
, "quiet") && !value
) {
552 if (arg_show_status
== _SHOW_STATUS_INVALID
)
553 arg_show_status
= SHOW_STATUS_ERROR
;
555 } else if (streq(key
, "debug") && !value
) {
557 /* Note that log_parse_environment() handles 'debug'
558 * too, and sets the log level to LOG_DEBUG. */
560 if (detect_container() > 0)
561 log_set_target(LOG_TARGET_CONSOLE
);
566 /* Compatible with SysV, but supported independently even if SysV compatibility is disabled. */
567 target
= runlevel_to_target(key
);
569 return free_and_strdup_warn(&arg_default_unit
, target
);
575 #define DEFINE_SETTER(name, func, descr) \
576 static int name(const char *unit, \
577 const char *filename, \
579 const char *section, \
580 unsigned section_line, \
581 const char *lvalue, \
583 const char *rvalue, \
595 log_syntax(unit, LOG_ERR, filename, line, r, \
596 "Invalid " descr "'%s': %m", \
602 DEFINE_SETTER(config_parse_level2
, log_set_max_level_from_string
, "log level");
603 DEFINE_SETTER(config_parse_target
, log_set_target_from_string
, "target");
604 DEFINE_SETTER(config_parse_color
, log_show_color_from_string
, "color");
605 DEFINE_SETTER(config_parse_location
, log_show_location_from_string
, "location");
606 DEFINE_SETTER(config_parse_time
, log_show_time_from_string
, "time");
608 static int config_parse_default_timeout_abort(
610 const char *filename
,
613 unsigned section_line
,
621 r
= config_parse_timeout_abort(
630 &arg_defaults
.timeout_abort_usec
,
633 arg_defaults
.timeout_abort_set
= r
;
637 static int config_parse_oom_score_adjust(
639 const char *filename
,
642 unsigned section_line
,
651 if (isempty(rvalue
)) {
652 arg_defaults
.oom_score_adjust_set
= false;
656 r
= parse_oom_score_adjust(rvalue
, &oa
);
658 return log_syntax_parse_error(unit
, filename
, line
, r
, lvalue
, rvalue
);
660 arg_defaults
.oom_score_adjust
= oa
;
661 arg_defaults
.oom_score_adjust_set
= true;
666 static int config_parse_protect_system_pid1(
668 const char *filename
,
671 unsigned section_line
,
678 int *v
= ASSERT_PTR(data
), r
;
680 /* This is modelled after the per-service ProtectSystem= setting, but a bit more restricted on one
681 * hand, and more automatic in another. i.e. we currently only support yes/no (not "strict" or
682 * "full"). And we will enable this automatically for the initrd unless configured otherwise.
684 * We might extend this later to match more closely what the per-service ProtectSystem= can do, but
685 * this is not trivial, due to ordering constraints: besides /usr/ we don't really have much mounted
686 * at the moment we enable this logic. */
688 if (isempty(rvalue
) || streq(rvalue
, "auto")) {
693 r
= parse_boolean(rvalue
);
695 return log_syntax_parse_error(unit
, filename
, line
, r
, lvalue
, rvalue
);
701 static int config_parse_crash_reboot(
703 const char *filename
,
706 unsigned section_line
,
713 CrashAction
*v
= ASSERT_PTR(data
);
716 if (isempty(rvalue
)) {
721 r
= parse_boolean(rvalue
);
723 return log_syntax_parse_error(unit
, filename
, line
, r
, lvalue
, rvalue
);
725 *v
= r
> 0 ? CRASH_REBOOT
: CRASH_FREEZE
;
729 static int parse_config_file(void) {
730 const ConfigTableItem items
[] = {
731 { "Manager", "LogLevel", config_parse_level2
, 0, NULL
},
732 { "Manager", "LogTarget", config_parse_target
, 0, NULL
},
733 { "Manager", "LogColor", config_parse_color
, 0, NULL
},
734 { "Manager", "LogLocation", config_parse_location
, 0, NULL
},
735 { "Manager", "LogTime", config_parse_time
, 0, NULL
},
736 { "Manager", "DumpCore", config_parse_bool
, 0, &arg_dump_core
},
737 { "Manager", "CrashChVT", /* legacy */ config_parse_crash_chvt
, 0, &arg_crash_chvt
},
738 { "Manager", "CrashChangeVT", config_parse_crash_chvt
, 0, &arg_crash_chvt
},
739 { "Manager", "CrashShell", config_parse_bool
, 0, &arg_crash_shell
},
740 { "Manager", "CrashReboot", config_parse_crash_reboot
, 0, &arg_crash_action
},
741 { "Manager", "CrashAction", config_parse_crash_action
, 0, &arg_crash_action
},
742 { "Manager", "ShowStatus", config_parse_show_status
, 0, &arg_show_status
},
743 { "Manager", "StatusUnitFormat", config_parse_status_unit_format
, 0, &arg_status_unit_format
},
744 { "Manager", "CPUAffinity", config_parse_cpu_set
, 0, &arg_cpu_affinity
},
745 { "Manager", "NUMAPolicy", config_parse_numa_policy
, 0, &arg_numa_policy
.type
},
746 { "Manager", "NUMAMask", config_parse_numa_mask
, 0, &arg_numa_policy
.nodes
},
747 { "Manager", "JoinControllers", config_parse_warn_compat
, DISABLED_LEGACY
, NULL
},
748 { "Manager", "RuntimeWatchdogSec", config_parse_watchdog_sec
, 0, &arg_runtime_watchdog
},
749 { "Manager", "RuntimeWatchdogPreSec", config_parse_watchdog_sec
, 0, &arg_pretimeout_watchdog
},
750 { "Manager", "RebootWatchdogSec", config_parse_watchdog_sec
, 0, &arg_reboot_watchdog
},
751 { "Manager", "ShutdownWatchdogSec", config_parse_watchdog_sec
, 0, &arg_reboot_watchdog
}, /* obsolete alias */
752 { "Manager", "KExecWatchdogSec", config_parse_watchdog_sec
, 0, &arg_kexec_watchdog
},
753 { "Manager", "WatchdogDevice", config_parse_path
, 0, &arg_watchdog_device
},
754 { "Manager", "RuntimeWatchdogPreGovernor", config_parse_string
, CONFIG_PARSE_STRING_SAFE
, &arg_watchdog_pretimeout_governor
},
755 { "Manager", "CapabilityBoundingSet", config_parse_capability_set
, 0, &arg_capability_bounding_set
},
756 { "Manager", "NoNewPrivileges", config_parse_bool
, 0, &arg_no_new_privs
},
757 { "Manager", "ProtectSystem", config_parse_protect_system_pid1
, 0, &arg_protect_system
},
759 { "Manager", "SystemCallArchitectures", config_parse_syscall_archs
, 0, &arg_syscall_archs
},
761 { "Manager", "SystemCallArchitectures", config_parse_warn_compat
, DISABLED_CONFIGURATION
, NULL
},
764 { "Manager", "TimerSlackNSec", config_parse_nsec
, 0, &arg_timer_slack_nsec
},
765 { "Manager", "DefaultTimerAccuracySec", config_parse_sec
, 0, &arg_defaults
.timer_accuracy_usec
},
766 { "Manager", "DefaultStandardOutput", config_parse_output_restricted
, 0, &arg_defaults
.std_output
},
767 { "Manager", "DefaultStandardError", config_parse_output_restricted
, 0, &arg_defaults
.std_error
},
768 { "Manager", "DefaultTimeoutStartSec", config_parse_sec
, 0, &arg_defaults
.timeout_start_usec
},
769 { "Manager", "DefaultTimeoutStopSec", config_parse_sec
, 0, &arg_defaults
.timeout_stop_usec
},
770 { "Manager", "DefaultTimeoutAbortSec", config_parse_default_timeout_abort
, 0, NULL
},
771 { "Manager", "DefaultDeviceTimeoutSec", config_parse_sec
, 0, &arg_defaults
.device_timeout_usec
},
772 { "Manager", "DefaultRestartSec", config_parse_sec
, 0, &arg_defaults
.restart_usec
},
773 { "Manager", "DefaultStartLimitInterval", config_parse_sec
, 0, &arg_defaults
.start_limit
.interval
}, /* obsolete alias */
774 { "Manager", "DefaultStartLimitIntervalSec", config_parse_sec
, 0, &arg_defaults
.start_limit
.interval
},
775 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned
, 0, &arg_defaults
.start_limit
.burst
},
776 { "Manager", "DefaultEnvironment", config_parse_environ
, arg_runtime_scope
, &arg_default_environment
},
777 { "Manager", "ManagerEnvironment", config_parse_environ
, arg_runtime_scope
, &arg_manager_environment
},
778 { "Manager", "DefaultLimitCPU", config_parse_rlimit
, RLIMIT_CPU
, arg_defaults
.rlimit
},
779 { "Manager", "DefaultLimitFSIZE", config_parse_rlimit
, RLIMIT_FSIZE
, arg_defaults
.rlimit
},
780 { "Manager", "DefaultLimitDATA", config_parse_rlimit
, RLIMIT_DATA
, arg_defaults
.rlimit
},
781 { "Manager", "DefaultLimitSTACK", config_parse_rlimit
, RLIMIT_STACK
, arg_defaults
.rlimit
},
782 { "Manager", "DefaultLimitCORE", config_parse_rlimit
, RLIMIT_CORE
, arg_defaults
.rlimit
},
783 { "Manager", "DefaultLimitRSS", config_parse_rlimit
, RLIMIT_RSS
, arg_defaults
.rlimit
},
784 { "Manager", "DefaultLimitNOFILE", config_parse_rlimit
, RLIMIT_NOFILE
, arg_defaults
.rlimit
},
785 { "Manager", "DefaultLimitAS", config_parse_rlimit
, RLIMIT_AS
, arg_defaults
.rlimit
},
786 { "Manager", "DefaultLimitNPROC", config_parse_rlimit
, RLIMIT_NPROC
, arg_defaults
.rlimit
},
787 { "Manager", "DefaultLimitMEMLOCK", config_parse_rlimit
, RLIMIT_MEMLOCK
, arg_defaults
.rlimit
},
788 { "Manager", "DefaultLimitLOCKS", config_parse_rlimit
, RLIMIT_LOCKS
, arg_defaults
.rlimit
},
789 { "Manager", "DefaultLimitSIGPENDING", config_parse_rlimit
, RLIMIT_SIGPENDING
, arg_defaults
.rlimit
},
790 { "Manager", "DefaultLimitMSGQUEUE", config_parse_rlimit
, RLIMIT_MSGQUEUE
, arg_defaults
.rlimit
},
791 { "Manager", "DefaultLimitNICE", config_parse_rlimit
, RLIMIT_NICE
, arg_defaults
.rlimit
},
792 { "Manager", "DefaultLimitRTPRIO", config_parse_rlimit
, RLIMIT_RTPRIO
, arg_defaults
.rlimit
},
793 { "Manager", "DefaultLimitRTTIME", config_parse_rlimit
, RLIMIT_RTTIME
, arg_defaults
.rlimit
},
794 { "Manager", "DefaultCPUAccounting", config_parse_warn_compat
, DISABLED_LEGACY
, NULL
},
795 { "Manager", "DefaultIOAccounting", config_parse_bool
, 0, &arg_defaults
.io_accounting
},
796 { "Manager", "DefaultIPAccounting", config_parse_bool
, 0, &arg_defaults
.ip_accounting
},
797 { "Manager", "DefaultBlockIOAccounting", config_parse_warn_compat
, DISABLED_LEGACY
, NULL
},
798 { "Manager", "DefaultMemoryAccounting", config_parse_bool
, 0, &arg_defaults
.memory_accounting
},
799 { "Manager", "DefaultTasksAccounting", config_parse_bool
, 0, &arg_defaults
.tasks_accounting
},
800 { "Manager", "DefaultTasksMax", config_parse_tasks_max
, 0, &arg_defaults
.tasks_max
},
801 { "Manager", "DefaultMemoryPressureThresholdSec", config_parse_sec
, 0, &arg_defaults
.memory_pressure_threshold_usec
},
802 { "Manager", "DefaultMemoryPressureWatch", config_parse_memory_pressure_watch
, 0, &arg_defaults
.memory_pressure_watch
},
803 { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action
, arg_runtime_scope
, &arg_cad_burst_action
},
804 { "Manager", "DefaultOOMPolicy", config_parse_oom_policy
, 0, &arg_defaults
.oom_policy
},
805 { "Manager", "DefaultOOMScoreAdjust", config_parse_oom_score_adjust
, 0, NULL
},
806 { "Manager", "ReloadLimitIntervalSec", config_parse_sec
, 0, &arg_reload_limit_interval_sec
},
807 { "Manager", "ReloadLimitBurst", config_parse_unsigned
, 0, &arg_reload_limit_burst
},
809 { "Manager", "DefaultSmackProcessLabel", config_parse_string
, 0, &arg_defaults
.smack_process_label
},
811 { "Manager", "DefaultSmackProcessLabel", config_parse_warn_compat
, DISABLED_CONFIGURATION
, NULL
},
816 if (arg_runtime_scope
== RUNTIME_SCOPE_SYSTEM
)
817 (void) config_parse_standard_file_with_dropins(
818 "systemd/system.conf",
820 config_item_table_lookup
, items
,
822 /* userdata= */ NULL
);
824 _cleanup_strv_free_
char **files
= NULL
, **dirs
= NULL
;
827 assert(arg_runtime_scope
== RUNTIME_SCOPE_USER
);
829 r
= manager_find_user_config_paths(&files
, &dirs
);
831 return log_error_errno(r
, "Failed to determine config file paths: %m");
833 (void) config_parse_many(
834 (const char* const*) files
,
835 (const char* const*) dirs
,
839 config_item_table_lookup
, items
,
844 /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we use
845 * USEC_INFINITY like everywhere else. */
846 if (arg_defaults
.timeout_start_usec
<= 0)
847 arg_defaults
.timeout_start_usec
= USEC_INFINITY
;
848 if (arg_defaults
.timeout_stop_usec
<= 0)
849 arg_defaults
.timeout_stop_usec
= USEC_INFINITY
;
854 static void set_manager_defaults(Manager
*m
) {
859 /* Propagates the various default unit property settings into the manager object, i.e. properties
860 * that do not affect the manager itself, but are just what newly allocated units will have set if
861 * they haven't set anything else. (Also see set_manager_settings() for the settings that affect the
862 * manager's own behaviour) */
864 r
= manager_set_unit_defaults(m
, &arg_defaults
);
866 log_warning_errno(r
, "Failed to set manager defaults, ignoring: %m");
868 r
= manager_default_environment(m
);
870 log_warning_errno(r
, "Failed to set manager default environment, ignoring: %m");
872 r
= manager_transient_environment_add(m
, arg_default_environment
);
874 log_warning_errno(r
, "Failed to add to transient environment, ignoring: %m");
877 static void set_manager_settings(Manager
*m
) {
882 /* Propagates the various manager settings into the manager object, i.e. properties that
883 * affect the manager itself (as opposed to just being inherited into newly allocated
884 * units, see set_manager_defaults() above). */
886 m
->confirm_spawn
= arg_confirm_spawn
;
887 m
->service_watchdogs
= arg_service_watchdogs
;
888 m
->cad_burst_action
= arg_cad_burst_action
;
889 /* Note that we don't do structured initialization here, otherwise it will reset the rate limit
890 * counter on every daemon-reload. */
891 m
->reload_reexec_ratelimit
.interval
= arg_reload_limit_interval_sec
;
892 m
->reload_reexec_ratelimit
.burst
= arg_reload_limit_burst
;
894 manager_set_watchdog(m
, WATCHDOG_RUNTIME
, arg_runtime_watchdog
);
895 manager_set_watchdog(m
, WATCHDOG_REBOOT
, arg_reboot_watchdog
);
896 manager_set_watchdog(m
, WATCHDOG_KEXEC
, arg_kexec_watchdog
);
897 manager_set_watchdog(m
, WATCHDOG_PRETIMEOUT
, arg_pretimeout_watchdog
);
898 r
= manager_set_watchdog_pretimeout_governor(m
, arg_watchdog_pretimeout_governor
);
900 log_warning_errno(r
, "Failed to set watchdog pretimeout governor to '%s', ignoring: %m", arg_watchdog_pretimeout_governor
);
902 manager_set_show_status(m
, arg_show_status
, "command line");
903 m
->status_unit_format
= arg_status_unit_format
;
906 static int parse_argv(int argc
, char *argv
[]) {
912 static const struct option options
[] = {
913 COMMON_GETOPT_OPTIONS
,
914 SYSTEMD_GETOPT_OPTIONS
,
919 bool user_arg_seen
= false;
924 if (getpid_cached() == 1)
927 while ((c
= getopt_long(argc
, argv
, SYSTEMD_GETOPT_SHORT_OPTIONS
, options
, NULL
)) >= 0)
932 r
= log_set_max_level_from_string(optarg
);
934 return log_error_errno(r
, "Failed to parse log level \"%s\": %m", optarg
);
939 r
= log_set_target_from_string(optarg
);
941 return log_error_errno(r
, "Failed to parse log target \"%s\": %m", optarg
);
948 r
= log_show_color_from_string(optarg
);
950 return log_error_errno(r
, "Failed to parse log color setting \"%s\": %m",
953 log_show_color(true);
957 case ARG_LOG_LOCATION
:
959 r
= log_show_location_from_string(optarg
);
961 return log_error_errno(r
, "Failed to parse log location setting \"%s\": %m",
964 log_show_location(true);
971 r
= log_show_time_from_string(optarg
);
973 return log_error_errno(r
, "Failed to parse log time setting \"%s\": %m",
980 case ARG_DEFAULT_STD_OUTPUT
:
981 r
= exec_output_from_string(optarg
);
983 return log_error_errno(r
, "Failed to parse default standard output setting \"%s\": %m",
985 arg_defaults
.std_output
= r
;
988 case ARG_DEFAULT_STD_ERROR
:
989 r
= exec_output_from_string(optarg
);
991 return log_error_errno(r
, "Failed to parse default standard error output setting \"%s\": %m",
993 arg_defaults
.std_error
= r
;
997 r
= free_and_strdup(&arg_default_unit
, optarg
);
999 return log_error_errno(r
, "Failed to set default unit \"%s\": %m", optarg
);
1004 arg_runtime_scope
= RUNTIME_SCOPE_SYSTEM
;
1008 arg_runtime_scope
= RUNTIME_SCOPE_USER
;
1009 user_arg_seen
= true;
1013 arg_action
= ACTION_TEST
;
1017 arg_pager_flags
|= PAGER_DISABLE
;
1021 arg_action
= ACTION_VERSION
;
1024 case ARG_DUMP_CONFIGURATION_ITEMS
:
1025 arg_action
= ACTION_DUMP_CONFIGURATION_ITEMS
;
1028 case ARG_DUMP_BUS_PROPERTIES
:
1029 arg_action
= ACTION_DUMP_BUS_PROPERTIES
;
1032 case ARG_BUS_INTROSPECT
:
1033 arg_bus_introspect
= optarg
;
1034 arg_action
= ACTION_BUS_INTROSPECT
;
1038 r
= parse_boolean_argument("--dump-core", optarg
, &arg_dump_core
);
1043 case ARG_CRASH_CHVT
:
1044 r
= parse_crash_chvt(optarg
, &arg_crash_chvt
);
1046 return log_error_errno(r
, "Failed to parse crash virtual terminal index: \"%s\": %m",
1050 case ARG_CRASH_SHELL
:
1051 r
= parse_boolean_argument("--crash-shell", optarg
, &arg_crash_shell
);
1056 case ARG_CRASH_REBOOT
:
1057 r
= parse_boolean_argument("--crash-reboot", optarg
, NULL
);
1060 arg_crash_action
= r
> 0 ? CRASH_REBOOT
: CRASH_FREEZE
;
1063 case ARG_CRASH_ACTION
:
1064 r
= crash_action_from_string(optarg
);
1066 return log_error_errno(r
, "Failed to parse crash action \"%s\": %m", optarg
);
1067 arg_crash_action
= r
;
1070 case ARG_CONFIRM_SPAWN
:
1071 arg_confirm_spawn
= mfree(arg_confirm_spawn
);
1073 r
= parse_confirm_spawn(optarg
, &arg_confirm_spawn
);
1075 return log_error_errno(r
, "Failed to parse confirm spawn option: \"%s\": %m",
1079 case ARG_SERVICE_WATCHDOGS
:
1080 r
= parse_boolean_argument("--service-watchdogs=", optarg
, &arg_service_watchdogs
);
1085 case ARG_SHOW_STATUS
:
1087 r
= parse_show_status(optarg
, &arg_show_status
);
1089 return log_error_errno(r
, "Failed to parse show status boolean: \"%s\": %m",
1092 arg_show_status
= SHOW_STATUS_YES
;
1095 case ARG_DESERIALIZE
: {
1099 fd
= parse_fd(optarg
);
1101 return log_error_errno(fd
, "Failed to parse serialization fd \"%s\": %m", optarg
);
1103 (void) fd_cloexec(fd
, true);
1105 f
= fdopen(fd
, "r");
1107 return log_error_errno(errno
, "Failed to open serialization fd %d: %m", fd
);
1109 safe_fclose(arg_serialization
);
1110 arg_serialization
= f
;
1115 case ARG_SWITCHED_ROOT
:
1116 arg_switched_root
= true;
1119 case ARG_MACHINE_ID
:
1120 r
= id128_from_string_nonzero(optarg
, &arg_machine_id
);
1122 return log_error_errno(r
, "MachineID '%s' is not valid: %m", optarg
);
1126 arg_action
= ACTION_HELP
;
1130 log_set_max_level(LOG_DEBUG
);
1136 /* Just to eat away the sysvinit kernel cmdline args that we'll parse in
1137 * parse_proc_cmdline_item() or ignore, without any getopt() error messages.
1140 if (getpid_cached() != 1)
1146 assert_not_reached();
1149 if (optind
< argc
&& getpid_cached() != 1)
1150 /* Hmm, when we aren't run as init system let's complain about excess arguments */
1151 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "Excess arguments.");
1153 if (arg_action
== ACTION_RUN
&& arg_runtime_scope
== RUNTIME_SCOPE_USER
&& !user_arg_seen
)
1154 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
1155 "Explicit --user argument required to run as user manager.");
1160 static int help(void) {
1161 _cleanup_free_
char *link
= NULL
;
1164 r
= terminal_urlify_man("systemd", "1", &link
);
1168 printf("%s [OPTIONS...]\n\n"
1169 "%sStarts and monitors system and user services.%s\n\n"
1170 "This program takes no positional arguments.\n\n"
1172 " -h --help Show this help\n"
1173 " --version Show version\n"
1174 " --test Determine initial transaction, dump it and exit\n"
1175 " --system Combined with --test: operate in system mode\n"
1176 " --user Combined with --test: operate in user mode\n"
1177 " --dump-configuration-items Dump understood unit configuration items\n"
1178 " --dump-bus-properties Dump exposed bus properties\n"
1179 " --bus-introspect=PATH Write XML introspection data\n"
1180 " --unit=UNIT Set default unit\n"
1181 " --dump-core[=BOOL] Dump core on crash\n"
1182 " --crash-vt=NR Change to specified VT on crash\n"
1183 " --crash-action=ACTION Specify what to do on crash\n"
1184 " --crash-shell[=BOOL] Run shell on crash\n"
1185 " --confirm-spawn[=BOOL] Ask for confirmation when spawning processes\n"
1186 " --show-status[=BOOL] Show status updates on the console during boot\n"
1187 " --log-target=TARGET Set log target (console, journal, kmsg,\n"
1188 " journal-or-kmsg, null)\n"
1189 " --log-level=LEVEL Set log level (debug, info, notice, warning,\n"
1190 " err, crit, alert, emerg)\n"
1191 " --log-color[=BOOL] Highlight important log messages\n"
1192 " --log-location[=BOOL] Include code location in log messages\n"
1193 " --log-time[=BOOL] Prefix log messages with current time\n"
1194 " --default-standard-output= Set default standard output for services\n"
1195 " --default-standard-error= Set default standard error output for services\n"
1196 " --no-pager Do not pipe output into a pager\n"
1197 "\nSee the %s for details.\n",
1198 program_invocation_short_name
,
1208 static int prepare_reexecute(
1212 bool switching_root
) {
1214 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1215 _cleanup_fclose_
FILE *f
= NULL
;
1222 /* Make sure nothing is really destructed when we shut down */
1224 bus_manager_send_reloading(m
, true);
1226 r
= manager_open_serialization(m
, &f
);
1228 return log_error_errno(r
, "Failed to create serialization file: %m");
1234 r
= manager_serialize(m
, f
, fds
, switching_root
);
1238 r
= finish_serialization_file(f
);
1240 return log_error_errno(r
, "Failed to finish serialization file: %m");
1242 r
= fd_cloexec(fileno(f
), false);
1244 return log_error_errno(r
, "Failed to disable O_CLOEXEC for serialization: %m");
1246 r
= fdset_cloexec(fds
, false);
1248 return log_error_errno(r
, "Failed to disable O_CLOEXEC for serialization fds: %m");
1250 *ret_f
= TAKE_PTR(f
);
1251 *ret_fds
= TAKE_PTR(fds
);
1256 static void bump_file_max_and_nr_open(void) {
1258 /* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large
1259 * numbers of file descriptors are no longer a performance problem and their memory is properly
1260 * tracked by memcg, thus counting them and limiting them in another two layers of limits is
1261 * unnecessary and just complicates things. This function hence turns off 2 of the 4 levels of limits
1262 * on file descriptors, and makes RLIMIT_NOLIMIT (soft + hard) the only ones that really matter. */
1264 #if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN
1268 #if BUMP_PROC_SYS_FS_FILE_MAX
1269 /* The maximum the kernel allows for this since 5.2 is LONG_MAX, use that. (Previously things were
1270 * different, but the operation would fail silently.) */
1271 r
= sysctl_write("fs/file-max", LONG_MAX_STR
);
1273 log_full_errno(IN_SET(r
, -EROFS
, -EPERM
, -EACCES
) ? LOG_DEBUG
: LOG_WARNING
,
1274 r
, "Failed to bump fs.file-max, ignoring: %m");
1277 #if BUMP_PROC_SYS_FS_NR_OPEN
1280 /* Argh! The kernel enforces maximum and minimum values on the fs.nr_open, but we don't really know
1281 * what they are. The expression by which the maximum is determined is dependent on the architecture,
1282 * and is something we don't really want to copy to userspace, as it is dependent on implementation
1283 * details of the kernel. Since the kernel doesn't expose the maximum value to us, we can only try
1284 * and hope. Hence, let's start with INT_MAX, and then keep halving the value until we find one that
1285 * works. Ugly? Yes, absolutely, but kernel APIs are kernel APIs, so what do can we do... 🤯 */
1290 v
&= ~(__SIZEOF_POINTER__
- 1); /* Round down to next multiple of the pointer size */
1292 log_warning("Can't bump fs.nr_open, value too small.");
1298 log_error_errno(k
, "Failed to read fs.nr_open: %m");
1301 if (k
>= v
) { /* Already larger */
1302 log_debug("Skipping bump, value is already larger.");
1306 r
= sysctl_writef("fs/nr_open", "%i", v
);
1308 log_debug("Couldn't write fs.nr_open as %i, halving it.", v
);
1313 log_full_errno(IN_SET(r
, -EROFS
, -EPERM
, -EACCES
) ? LOG_DEBUG
: LOG_WARNING
, r
, "Failed to bump fs.nr_open, ignoring: %m");
1317 log_debug("Successfully bumped fs.nr_open to %i", v
);
1323 static int bump_rlimit_nofile(const struct rlimit
*saved_rlimit
) {
1324 struct rlimit new_rlimit
;
1327 /* Get the underlying absolute limit the kernel enforces */
1328 nr
= read_nr_open();
1330 /* Calculate the new limits to use for us. Never lower from what we inherited. */
1331 new_rlimit
= (struct rlimit
) {
1332 .rlim_cur
= MAX((rlim_t
) nr
, saved_rlimit
->rlim_cur
),
1333 .rlim_max
= MAX((rlim_t
) nr
, saved_rlimit
->rlim_max
),
1336 /* Shortcut if nothing changes. */
1337 if (saved_rlimit
->rlim_max
>= new_rlimit
.rlim_max
&&
1338 saved_rlimit
->rlim_cur
>= new_rlimit
.rlim_cur
) {
1339 log_debug("RLIMIT_NOFILE is already as high or higher than we need it, not bumping.");
1343 /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows, for
1344 * both hard and soft. */
1345 r
= setrlimit_closest(RLIMIT_NOFILE
, &new_rlimit
);
1347 return log_warning_errno(r
, "Setting RLIMIT_NOFILE failed, ignoring: %m");
1352 static int bump_rlimit_memlock(const struct rlimit
*saved_rlimit
) {
1353 struct rlimit new_rlimit
;
1357 /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even if we have CAP_IPC_LOCK
1358 * which should normally disable such checks. We need them to implement IPAddressAllow= and
1359 * IPAddressDeny=, hence let's bump the value high enough for our user. */
1361 /* Using MAX() on resource limits only is safe if RLIM_INFINITY is > 0. POSIX declares that rlim_t
1362 * must be unsigned, hence this is a given, but let's make this clear here. */
1363 assert_cc(RLIM_INFINITY
> 0);
1365 mm
= physical_memory_scale(1, 8); /* Let's scale how much we allow to be locked by the amount of
1366 * physical RAM. We allow an eighth to be locked by us, just to
1369 new_rlimit
= (struct rlimit
) {
1370 .rlim_cur
= MAX3(HIGH_RLIMIT_MEMLOCK
, saved_rlimit
->rlim_cur
, mm
),
1371 .rlim_max
= MAX3(HIGH_RLIMIT_MEMLOCK
, saved_rlimit
->rlim_max
, mm
),
1374 if (saved_rlimit
->rlim_max
>= new_rlimit
.rlim_cur
&&
1375 saved_rlimit
->rlim_cur
>= new_rlimit
.rlim_max
) {
1376 log_debug("RLIMIT_MEMLOCK is already as high or higher than we need it, not bumping.");
1380 r
= setrlimit_closest(RLIMIT_MEMLOCK
, &new_rlimit
);
1382 return log_warning_errno(r
, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
1387 static int enforce_syscall_archs(Set
*archs
) {
1391 if (!is_seccomp_available())
1394 r
= seccomp_restrict_archs(arg_syscall_archs
);
1396 return log_error_errno(r
, "Failed to enforce system call architecture restriction: %m");
1401 static int os_release_status(void) {
1402 _cleanup_free_
char *pretty_name
= NULL
, *name
= NULL
, *version
= NULL
,
1403 *ansi_color
= NULL
, *support_end
= NULL
;
1406 r
= parse_os_release(NULL
,
1407 "PRETTY_NAME", &pretty_name
,
1409 "VERSION", &version
,
1410 "ANSI_COLOR", &ansi_color
,
1411 "SUPPORT_END", &support_end
);
1413 return log_full_errno(r
== -ENOENT
? LOG_DEBUG
: LOG_WARNING
, r
,
1414 "Failed to read os-release file, ignoring: %m");
1416 const char *label
= os_release_pretty_name(pretty_name
, name
);
1417 const char *color
= empty_to_null(ansi_color
) ?: "1";
1419 if (show_status_on(arg_show_status
)) {
1421 if (log_get_show_color())
1422 status_printf(NULL
, 0,
1423 ANSI_HIGHLIGHT
"Booting initrd of " ANSI_NORMAL
"\x1B[%sm%s" ANSI_NORMAL ANSI_HIGHLIGHT
"." ANSI_NORMAL
,
1426 status_printf(NULL
, 0,
1427 "Booting initrd of %s...", label
);
1429 if (log_get_show_color())
1430 status_printf(NULL
, 0,
1431 "\n" ANSI_HIGHLIGHT
"Welcome to " ANSI_NORMAL
"\x1B[%sm%s" ANSI_NORMAL ANSI_HIGHLIGHT
"!" ANSI_NORMAL
"\n",
1434 status_printf(NULL
, 0,
1435 "\nWelcome to %s!\n",
1440 if (support_end
&& os_release_support_ended(support_end
, /* quiet */ false, NULL
) > 0)
1441 /* pretty_name may include the version already, so we'll print the version only if we
1442 * have it and we're not using pretty_name. */
1443 status_printf(ANSI_HIGHLIGHT_RED
" !! " ANSI_NORMAL
, 0,
1444 "This OS version (%s%s%s) is past its end-of-support date (%s)",
1446 (pretty_name
|| !version
) ? "" : " version ",
1447 (pretty_name
|| !version
) ? "" : version
,
1453 static int setup_os_release(RuntimeScope scope
) {
1454 char os_release_dst
[STRLEN("/run/user//systemd/propagate/.os-release-stage/os-release") + DECIMAL_STR_MAX(uid_t
)] =
1455 "/run/systemd/propagate/.os-release-stage/os-release";
1456 const char *os_release_src
= "/etc/os-release";
1459 assert(IN_SET(scope
, RUNTIME_SCOPE_SYSTEM
, RUNTIME_SCOPE_USER
));
1461 if (access("/etc/os-release", F_OK
) < 0) {
1462 if (errno
!= ENOENT
)
1463 log_debug_errno(errno
, "Failed to check if /etc/os-release exists, ignoring: %m");
1465 os_release_src
= "/usr/lib/os-release";
1468 if (scope
== RUNTIME_SCOPE_USER
)
1469 xsprintf(os_release_dst
, "/run/user/" UID_FMT
"/systemd/propagate/.os-release-stage/os-release", geteuid());
1471 r
= mkdir_parents_label(os_release_dst
, 0755);
1473 return log_debug_errno(r
, "Failed to create parent directory of '%s', ignoring: %m", os_release_dst
);
1475 r
= copy_file_atomic(os_release_src
, os_release_dst
, 0644, COPY_MAC_CREATE
|COPY_REPLACE
);
1477 return log_debug_errno(r
, "Failed to copy '%s' to '%s', ignoring: %m",
1478 os_release_src
, os_release_dst
);
1483 static int write_container_id(void) {
1485 int r
= 0; /* avoid false maybe-uninitialized warning */
1487 c
= getenv("container");
1492 r
= write_string_file("/run/systemd/container", c
, WRITE_STRING_FILE_CREATE
);
1494 return log_warning_errno(r
, "Failed to write /run/systemd/container, ignoring: %m");
1499 static int write_boot_or_shutdown_osc(const char *type
) {
1502 assert(STRPTR_IN_SET(type
, "boot", "shutdown"));
1504 if (getenv_terminal_is_dumb())
1507 _cleanup_close_
int fd
= open_terminal("/dev/console", O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
1509 return log_debug_errno(fd
, "Failed to open /dev/console to print %s OSC, ignoring: %m", type
);
1511 _cleanup_free_
char *seq
= NULL
;
1512 if (streq(type
, "boot"))
1513 r
= osc_context_open_boot(&seq
);
1515 r
= osc_context_close(SD_ID128_ALLF
, &seq
);
1517 return log_debug_errno(r
, "Failed to acquire %s OSC sequence, ignoring: %m", type
);
1519 r
= loop_write(fd
, seq
, SIZE_MAX
);
1521 return log_debug_errno(r
, "Failed to write %s OSC sequence, ignoring: %m", type
);
1523 if (DEBUG_LOGGING
) {
1524 _cleanup_free_
char *h
= cescape(seq
);
1525 log_debug("OSC sequence for %s successfully written: %s", type
, strna(h
));
1531 static int bump_unix_max_dgram_qlen(void) {
1532 _cleanup_free_
char *qlen
= NULL
;
1536 /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel default of 16 is simply too low. We set
1537 * the value really really early during boot, so that it is actually applied to all our sockets,
1538 * including the $NOTIFY_SOCKET one. */
1540 r
= read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen
);
1542 return log_full_errno(r
== -ENOENT
? LOG_DEBUG
: LOG_WARNING
, r
,
1543 "Failed to read AF_UNIX datagram queue length, ignoring: %m");
1545 r
= safe_atolu(qlen
, &v
);
1547 return log_warning_errno(r
, "Failed to parse AF_UNIX datagram queue length '%s', ignoring: %m", qlen
);
1549 if (v
>= DEFAULT_UNIX_MAX_DGRAM_QLEN
)
1552 r
= sysctl_write("net/unix/max_dgram_qlen", STRINGIFY(DEFAULT_UNIX_MAX_DGRAM_QLEN
));
1554 return log_full_errno(IN_SET(r
, -EROFS
, -EPERM
, -EACCES
) ? LOG_DEBUG
: LOG_WARNING
, r
,
1555 "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
1560 static int fixup_environment(void) {
1563 /* Only fix up the environment when we are started as PID 1 */
1564 if (getpid_cached() != 1)
1567 /* We expect the environment to be set correctly if run inside a container. */
1568 if (detect_container() > 0)
1571 /* When started as PID1, the kernel uses /dev/console for our stdios and uses TERM=linux whatever the
1572 * backend device used by the console. We try to make a better guess here since some consoles might
1573 * not have support for color mode for example.
1575 * However if TERM was configured through the kernel command line then leave it alone. */
1576 _cleanup_free_
char *term
= NULL
;
1577 r
= proc_cmdline_get_key("TERM", 0, &term
);
1581 /* If we pick up $TERM, then also pick up $COLORTERM, $NO_COLOR */
1582 FOREACH_STRING(v
, "COLORTERM", "NO_COLOR") {
1583 _cleanup_free_
char *vv
= NULL
;
1584 r
= proc_cmdline_get_key(v
, 0, &vv
);
1587 if (r
> 0 && setenv(v
, vv
, /* overwrite= */ true) < 0)
1591 /* If no $TERM is set then look for the per-tty variable instead */
1592 r
= proc_cmdline_get_key("systemd.tty.term.console", 0, &term
);
1598 (void) query_term_for_tty("/dev/console", &term
);
1600 if (setenv("TERM", term
?: FALLBACK_TERM
, /* overwrite= */ true) < 0)
1603 /* The kernels sets HOME=/ for init. Let's undo this. */
1604 if (path_equal(getenv("HOME"), "/"))
1605 assert_se(unsetenv("HOME") == 0);
1610 static void redirect_telinit(int argc
, char *argv
[]) {
1612 /* This is compatibility support for SysV, where calling init as a user is identical to telinit. */
1614 #if HAVE_SYSV_COMPAT
1615 if (getpid_cached() == 1)
1618 if (!invoked_as(argv
, "init"))
1621 execv(SYSTEMCTL_BINARY_PATH
, argv
);
1622 log_error_errno(errno
, "Failed to execute %s: %m", SYSTEMCTL_BINARY_PATH
);
1627 static int become_shutdown(int objective
, int retval
) {
1628 static const char* const table
[_MANAGER_OBJECTIVE_MAX
] = {
1629 [MANAGER_EXIT
] = "exit",
1630 [MANAGER_REBOOT
] = "reboot",
1631 [MANAGER_POWEROFF
] = "poweroff",
1632 [MANAGER_HALT
] = "halt",
1633 [MANAGER_KEXEC
] = "kexec",
1636 char timeout
[STRLEN("--timeout=") + DECIMAL_STR_MAX(usec_t
) + STRLEN("us")],
1637 exit_code
[STRLEN("--exit-code=") + DECIMAL_STR_MAX(uint8_t)];
1639 _cleanup_strv_free_
char **env_block
= NULL
;
1640 _cleanup_free_
char *max_log_levels
= NULL
;
1641 usec_t watchdog_timer
= 0;
1644 assert(objective
>= 0 && objective
< _MANAGER_OBJECTIVE_MAX
);
1645 assert(table
[objective
]);
1647 xsprintf(timeout
, "--timeout=%" PRI_USEC
"us", arg_defaults
.timeout_stop_usec
);
1649 const char* command_line
[11] = {
1650 SYSTEMD_SHUTDOWN_BINARY_PATH
,
1653 /* Note that the last position is a terminator and must contain NULL. */
1657 assert(command_line
[pos
-1]);
1658 assert(!command_line
[pos
]);
1660 (void) log_max_levels_to_string(log_get_max_level(), &max_log_levels
);
1662 if (max_log_levels
) {
1663 command_line
[pos
++] = "--log-level";
1664 command_line
[pos
++] = max_log_levels
;
1667 switch (log_get_target()) {
1669 case LOG_TARGET_KMSG
:
1670 case LOG_TARGET_JOURNAL_OR_KMSG
:
1671 case LOG_TARGET_SYSLOG_OR_KMSG
:
1672 command_line
[pos
++] = "--log-target=kmsg";
1675 case LOG_TARGET_NULL
:
1676 command_line
[pos
++] = "--log-target=null";
1679 case LOG_TARGET_CONSOLE
:
1681 command_line
[pos
++] = "--log-target=console";
1684 if (log_get_show_color())
1685 command_line
[pos
++] = "--log-color";
1687 if (log_get_show_location())
1688 command_line
[pos
++] = "--log-location";
1690 if (log_get_show_time())
1691 command_line
[pos
++] = "--log-time";
1693 xsprintf(exit_code
, "--exit-code=%d", retval
);
1694 command_line
[pos
++] = exit_code
;
1696 assert(pos
< ELEMENTSOF(command_line
));
1700 if (objective
== MANAGER_REBOOT
)
1701 watchdog_timer
= arg_reboot_watchdog
;
1702 else if (objective
== MANAGER_KEXEC
)
1703 watchdog_timer
= arg_kexec_watchdog
;
1705 /* If we reboot or kexec let's set the shutdown watchdog and tell the
1706 * shutdown binary to repeatedly ping it.
1707 * Disable the pretimeout watchdog, as we do not support it from the shutdown binary. */
1708 (void) watchdog_setup_pretimeout(0);
1709 (void) watchdog_setup_pretimeout_governor(NULL
);
1710 r
= watchdog_setup(watchdog_timer
);
1711 watchdog_close(/* disarm= */ r
< 0);
1713 /* The environment block: */
1715 env_block
= strv_copy(environ
);
1717 /* Tell the binary how often to ping, ignore failure */
1718 (void) strv_extendf(&env_block
, "WATCHDOG_USEC="USEC_FMT
, watchdog_timer
);
1720 /* Make sure that tools that look for $WATCHDOG_USEC (and might get started by the exitrd) don't get
1721 * confused by the variable, because the sd_watchdog_enabled() protocol uses the same variable for
1722 * the same purposes. */
1723 (void) strv_extendf(&env_block
, "WATCHDOG_PID=" PID_FMT
, getpid_cached());
1725 if (arg_watchdog_device
)
1726 (void) strv_extendf(&env_block
, "WATCHDOG_DEVICE=%s", arg_watchdog_device
);
1728 (void) write_boot_or_shutdown_osc("shutdown");
1730 execve(SYSTEMD_SHUTDOWN_BINARY_PATH
, (char **) command_line
, env_block
);
1734 static void initialize_clock_timewarp(void) {
1737 /* This is called very early on, before we parse the kernel command line or otherwise figure out why
1738 * we are running, but only once. */
1740 if (clock_is_localtime(NULL
) > 0) {
1743 /* The very first call of settimeofday() also does a time warp in the kernel.
1745 * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to
1746 * take care of maintaining the RTC and do all adjustments. This matches the behavior of
1747 * Windows, which leaves the RTC alone if the registry tells that the RTC runs in UTC.
1749 r
= clock_set_timezone(&min
);
1751 log_error_errno(r
, "Failed to apply local time delta, ignoring: %m");
1753 log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min
);
1755 } else if (!in_initrd())
1757 * Do a dummy very first call to seal the kernel's time warp magic.
1759 * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with
1760 * LOCAL, but the real system could be set up that way. In such case, we need to delay the
1761 * time-warp or the sealing until we reach the real system.
1763 * Do no set the kernel's timezone. The concept of local time cannot be supported reliably,
1764 * the time will jump or be incorrect at every daylight saving time change. All kernel local
1765 * time concepts will be treated as UTC that way.
1767 (void) clock_reset_timewarp();
1770 static void apply_clock_update(void) {
1771 /* This is called later than clock_apply_epoch(), i.e. after we have parsed
1772 * configuration files/kernel command line and such. */
1774 if (arg_clock_usec
== 0)
1777 if (getpid_cached() != 1)
1780 if (clock_settime(CLOCK_REALTIME
, TIMESPEC_STORE(arg_clock_usec
)) < 0)
1781 log_error_errno(errno
, "Failed to set system clock to time specified on kernel command line: %m");
1783 log_info("Set system clock to %s, as specified on the kernel command line.",
1784 FORMAT_TIMESTAMP(arg_clock_usec
));
1787 static void cmdline_take_random_seed(void) {
1791 if (arg_random_seed_size
== 0)
1794 if (getpid_cached() != 1)
1797 assert(arg_random_seed
);
1798 suggested
= random_pool_size();
1800 if (arg_random_seed_size
< suggested
)
1801 log_warning("Random seed specified on kernel command line has size %zu, but %zu bytes required to fill entropy pool.",
1802 arg_random_seed_size
, suggested
);
1804 r
= random_write_entropy(-1, arg_random_seed
, arg_random_seed_size
, true);
1806 log_warning_errno(r
, "Failed to credit entropy specified on kernel command line, ignoring: %m");
1810 log_notice("Successfully credited entropy passed on kernel command line.\n"
1811 "Note that the seed provided this way is accessible to unprivileged programs. "
1812 "This functionality should not be used outside of testing environments.");
1815 static void initialize_coredump(bool skip_setup
) {
1816 if (getpid_cached() != 1)
1819 /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour
1820 * the limit) will process core dumps for system services by default. */
1821 if (setrlimit(RLIMIT_CORE
, &RLIMIT_MAKE_CONST(RLIM_INFINITY
)) < 0)
1822 log_warning_errno(errno
, "Failed to set RLIMIT_CORE: %m");
1824 /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored
1825 * until the systemd-coredump tool is enabled via sysctl. However it can be changed via the kernel
1826 * command line later so core dumps can still be generated during early startup and in initrd. */
1828 disable_coredumps();
1831 static void initialize_core_pattern(bool skip_setup
) {
1834 if (skip_setup
|| !arg_early_core_pattern
)
1837 if (getpid_cached() != 1)
1840 r
= write_string_file("/proc/sys/kernel/core_pattern", arg_early_core_pattern
, WRITE_STRING_FILE_DISABLE_BUFFER
);
1842 log_warning_errno(r
, "Failed to write '%s' to /proc/sys/kernel/core_pattern, ignoring: %m",
1843 arg_early_core_pattern
);
1846 static void apply_protect_system(bool skip_setup
) {
1849 if (skip_setup
|| getpid_cached() != 1 || arg_protect_system
== 0)
1852 if (arg_protect_system
< 0 && !in_initrd()) {
1853 log_debug("ProtectSystem=auto selected, but not running in an initrd, skipping.");
1857 r
= make_mount_point("/usr");
1859 log_warning_errno(r
, "Failed to make /usr/ a mount point, ignoring: %m");
1863 if (mount_nofollow_verbose(
1868 MS_BIND
|MS_REMOUNT
|MS_RDONLY
,
1869 /* options= */ NULL
) < 0)
1872 log_info("Successfully made /usr/ read-only.");
1875 static void update_cpu_affinity(bool skip_setup
) {
1876 _cleanup_free_
char *mask
= NULL
;
1878 if (skip_setup
|| !arg_cpu_affinity
.set
)
1881 assert(arg_cpu_affinity
.allocated
> 0);
1883 mask
= cpu_set_to_range_string(&arg_cpu_affinity
);
1884 log_debug("Setting CPU affinity to {%s}.", strnull(mask
));
1886 if (sched_setaffinity(0, arg_cpu_affinity
.allocated
, arg_cpu_affinity
.set
) < 0)
1887 log_warning_errno(errno
, "Failed to set CPU affinity, ignoring: %m");
1890 static void update_numa_policy(bool skip_setup
) {
1892 _cleanup_free_
char *nodes
= NULL
;
1893 const char * policy
= NULL
;
1895 if (skip_setup
|| !mpol_is_valid(numa_policy_get_type(&arg_numa_policy
)))
1898 if (DEBUG_LOGGING
) {
1899 policy
= mpol_to_string(numa_policy_get_type(&arg_numa_policy
));
1900 nodes
= cpu_set_to_range_string(&arg_numa_policy
.nodes
);
1901 log_debug("Setting NUMA policy to %s, with nodes {%s}.", strnull(policy
), strnull(nodes
));
1904 r
= apply_numa_policy(&arg_numa_policy
);
1905 if (r
== -EOPNOTSUPP
)
1906 log_debug_errno(r
, "NUMA support not available, ignoring.");
1908 log_warning_errno(r
, "Failed to set NUMA memory policy, ignoring: %m");
1911 static void filter_args(
1920 /* Copy some filtered arguments into the dst array from src. */
1921 for (int i
= 1; i
< argc
; i
++) {
1922 if (STR_IN_SET(src
[i
],
1928 if (startswith(src
[i
], "--deserialize="))
1930 if (streq(src
[i
], "--deserialize")) {
1931 i
++; /* Skip the argument too */
1935 /* Skip target unit designators. We already acted upon this information and have queued
1936 * appropriate jobs. We don't want to redo all this after reexecution. */
1937 if (startswith(src
[i
], "--unit="))
1939 if (streq(src
[i
], "--unit")) {
1940 i
++; /* Skip the argument too */
1944 /* Seems we have a good old option. Let's pass it over to the new instance. */
1945 dst
[(*dst_index
)++] = src
[i
];
1949 static void finish_remaining_processes(ManagerObjective objective
) {
1950 assert(objective
>= 0 && objective
< _MANAGER_OBJECTIVE_MAX
);
1952 /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
1953 * SIGCHLD for them after deserializing. */
1954 if (IN_SET(objective
, MANAGER_SWITCH_ROOT
, MANAGER_SOFT_REBOOT
))
1955 broadcast_signal(SIGTERM
, /* wait_for_exit= */ false, /* send_sighup= */ true, arg_defaults
.timeout_stop_usec
);
1957 /* On soft reboot really make sure nothing is left. Note that this will skip cgroups
1958 * of units that were configured with SurviveFinalKillSignal=yes. */
1959 if (objective
== MANAGER_SOFT_REBOOT
)
1960 broadcast_signal(SIGKILL
, /* wait_for_exit= */ false, /* send_sighup= */ false, arg_defaults
.timeout_stop_usec
);
1963 static int do_reexecute(
1964 ManagerObjective objective
,
1967 const struct rlimit
*saved_rlimit_nofile
,
1968 const struct rlimit
*saved_rlimit_memlock
,
1970 const char *switch_root_dir
,
1971 const char *switch_root_init
,
1972 uint64_t saved_capability_ambient_set
,
1973 const char **ret_error_message
) {
1975 size_t i
, args_size
;
1979 assert(IN_SET(objective
, MANAGER_REEXECUTE
, MANAGER_SWITCH_ROOT
, MANAGER_SOFT_REBOOT
));
1981 assert(saved_rlimit_nofile
);
1982 assert(saved_rlimit_memlock
);
1983 assert(ret_error_message
);
1985 /* Close and disarm the watchdog, so that the new instance can reinitialize it, but the machine
1986 * doesn't get rebooted while we do that. */
1987 watchdog_close(/* disarm= */ true);
1989 if (!switch_root_dir
&& objective
== MANAGER_SOFT_REBOOT
) {
1990 /* If no switch root dir is specified, then check if /run/nextroot/ qualifies and use that */
1991 r
= path_is_os_tree("/run/nextroot");
1992 if (r
< 0 && r
!= -ENOENT
)
1993 log_debug_errno(r
, "Failed to determine if /run/nextroot/ is a valid OS tree, ignoring: %m");
1995 switch_root_dir
= "/run/nextroot";
1998 if (switch_root_dir
) {
1999 /* If we're supposed to switch root, preemptively check the existence of a usable init.
2000 * Otherwise the system might end up in a completely undebuggable state afterwards. */
2001 if (switch_root_init
) {
2002 r
= chase_and_access(switch_root_init
, switch_root_dir
, CHASE_PREFIX_ROOT
, X_OK
, /* ret_path = */ NULL
);
2004 log_warning_errno(r
, "Failed to chase configured init %s/%s: %m",
2005 switch_root_dir
, switch_root_init
);
2007 r
= chase_and_access(SYSTEMD_BINARY_PATH
, switch_root_dir
, CHASE_PREFIX_ROOT
, X_OK
, /* ret_path = */ NULL
);
2009 log_debug_errno(r
, "Failed to chase our own binary %s/%s: %m",
2010 switch_root_dir
, SYSTEMD_BINARY_PATH
);
2014 r
= chase_and_access("/sbin/init", switch_root_dir
, CHASE_PREFIX_ROOT
, X_OK
, /* ret_path = */ NULL
);
2016 *ret_error_message
= "Switch root target contains no usable init";
2017 return log_error_errno(r
, "Failed to chase %s/sbin/init", switch_root_dir
);
2022 /* Reset RLIMIT_NOFILE + RLIMIT_MEMLOCK back to the kernel defaults, so that the new systemd can pass
2023 * the kernel default to its child processes */
2024 if (saved_rlimit_nofile
->rlim_cur
!= 0)
2025 (void) setrlimit(RLIMIT_NOFILE
, saved_rlimit_nofile
);
2026 if (saved_rlimit_memlock
->rlim_cur
!= RLIM_INFINITY
)
2027 (void) setrlimit(RLIMIT_MEMLOCK
, saved_rlimit_memlock
);
2029 finish_remaining_processes(objective
);
2031 if (switch_root_dir
) {
2032 r
= switch_root(/* new_root= */ switch_root_dir
,
2033 /* old_root_after= */ NULL
,
2034 /* flags= */ (objective
== MANAGER_SWITCH_ROOT
? SWITCH_ROOT_DESTROY_OLD_ROOT
: 0) |
2035 (objective
== MANAGER_SOFT_REBOOT
? 0 : SWITCH_ROOT_RECURSIVE_RUN
));
2037 log_error_errno(r
, "Failed to switch root, trying to continue: %m");
2040 r
= capability_ambient_set_apply(saved_capability_ambient_set
, /* also_inherit= */ false);
2042 log_warning_errno(r
, "Failed to apply the starting ambient set, ignoring: %m");
2044 args_size
= argc
+ 5;
2045 args
= newa(const char*, args_size
);
2047 if (!switch_root_init
) {
2048 char sfd
[STRLEN("--deserialize=") + DECIMAL_STR_MAX(int)];
2050 /* First try to spawn ourselves with the right path, and with full serialization. We do this
2051 * only if the user didn't specify an explicit init to spawn. */
2053 assert(arg_serialization
);
2056 xsprintf(sfd
, "--deserialize=%i", fileno(arg_serialization
));
2058 i
= 1; /* Leave args[0] empty for now. */
2060 /* Put our stuff first to make sure it always gets parsed in case
2061 * we get weird stuff from the kernel cmdline (like --) */
2062 if (IN_SET(objective
, MANAGER_SWITCH_ROOT
, MANAGER_SOFT_REBOOT
))
2063 args
[i
++] = "--switched-root";
2064 args
[i
++] = runtime_scope_cmdline_option_to_string(arg_runtime_scope
);
2067 filter_args(args
, &i
, argv
, argc
);
2071 assert(i
<= args_size
);
2074 * We want valgrind to print its memory usage summary before reexecution. Valgrind won't do
2075 * this is on its own on exec(), but it will do it on exit(). Hence, to ensure we get a
2076 * summary here, fork() off a child, let it exit() cleanly, so that it prints the summary,
2077 * and wait() for it in the parent, before proceeding into the exec().
2079 valgrind_summary_hack();
2081 args
[0] = SYSTEMD_BINARY_PATH
;
2082 (void) execv(args
[0], (char* const*) args
);
2084 if (objective
== MANAGER_REEXECUTE
) {
2085 *ret_error_message
= "Failed to execute our own binary";
2086 return log_error_errno(errno
, "Failed to execute our own binary %s: %m", args
[0]);
2089 log_debug_errno(errno
, "Failed to execute our own binary %s, trying fallback: %m", args
[0]);
2092 /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and
2093 * envp[]. (Well, modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[],
2094 * but let's hope that doesn't matter.) */
2096 arg_serialization
= safe_fclose(arg_serialization
);
2097 fds
= fdset_free(fds
);
2099 /* Drop /run/systemd directory. Some of its content can be used as a flag indicating that systemd is
2100 * the init system but we might be replacing it with something different. If systemd is used again it
2101 * will recreate the directory and its content anyway. */
2102 r
= rm_rf("/run/systemd.pre-switch-root", REMOVE_ROOT
|REMOVE_MISSING_OK
);
2104 log_warning_errno(r
, "Failed to prepare /run/systemd.pre-switch-root/, ignoring: %m");
2106 r
= RET_NERRNO(rename("/run/systemd", "/run/systemd.pre-switch-root"));
2108 log_warning_errno(r
, "Failed to move /run/systemd/ to /run/systemd.pre-switch-root/, ignoring: %m");
2110 /* Reopen the console */
2111 (void) make_console_stdio();
2113 i
= 1; /* Leave args[0] empty for now. */
2114 for (int j
= 1; j
<= argc
; j
++)
2115 args
[i
++] = argv
[j
];
2116 assert(i
<= args_size
);
2118 /* Re-enable any blocked signals, especially important if we switch from initrd to init=... */
2119 (void) reset_all_signal_handlers();
2120 (void) reset_signal_mask();
2121 (void) rlimit_nofile_safe();
2123 if (switch_root_init
) {
2124 args
[0] = switch_root_init
;
2125 (void) execve(args
[0], (char* const*) args
, saved_env
);
2126 log_warning_errno(errno
, "Failed to execute configured init %s, trying fallback: %m", args
[0]);
2129 args
[0] = "/sbin/init";
2130 (void) execv(args
[0], (char* const*) args
);
2132 *ret_error_message
= "Failed to execute /sbin/init";
2135 manager_status_printf(NULL
, STATUS_TYPE_EMERGENCY
,
2136 ANSI_HIGHLIGHT_RED
" !! " ANSI_NORMAL
,
2137 "%s", *ret_error_message
);
2139 log_warning_errno(r
, "No /sbin/init, trying fallback shell");
2141 args
[0] = "/bin/sh";
2143 (void) execve(args
[0], (char* const*) args
, saved_env
);
2145 *ret_error_message
= "Failed to execute fallback shell";
2148 return log_error_errno(r
, "%s, giving up: %m", *ret_error_message
);
2151 static int invoke_main_loop(
2153 const struct rlimit
*saved_rlimit_nofile
,
2154 const struct rlimit
*saved_rlimit_memlock
,
2155 int *ret_retval
, /* Return parameters relevant for shutting down */
2156 FDSet
**ret_fds
, /* Return parameters for reexecuting */
2157 char **ret_switch_root_dir
, /* … */
2158 char **ret_switch_root_init
, /* … */
2159 const char **ret_error_message
) {
2164 assert(saved_rlimit_nofile
);
2165 assert(saved_rlimit_memlock
);
2168 assert(ret_switch_root_dir
);
2169 assert(ret_switch_root_init
);
2170 assert(ret_error_message
);
2173 int objective
= manager_loop(m
);
2174 if (objective
< 0) {
2175 *ret_error_message
= "Failed to run main loop";
2176 return log_struct_errno(LOG_EMERG
, objective
,
2177 LOG_MESSAGE("Failed to run main loop: %m"),
2178 LOG_MESSAGE_ID(SD_MESSAGE_CORE_MAINLOOP_FAILED_STR
));
2181 /* Ensure shutdown timestamp is taken even when bypassing the job engine */
2182 if (IN_SET(objective
,
2183 MANAGER_SOFT_REBOOT
,
2187 MANAGER_POWEROFF
) &&
2188 !dual_timestamp_is_set(m
->timestamps
+ MANAGER_TIMESTAMP_SHUTDOWN_START
))
2189 dual_timestamp_now(m
->timestamps
+ MANAGER_TIMESTAMP_SHUTDOWN_START
);
2191 switch (objective
) {
2193 case MANAGER_RELOAD
: {
2194 LogTarget saved_log_target
;
2195 int saved_log_level
;
2197 manager_send_reloading(m
);
2199 log_info("Reloading...");
2201 /* First, save any overridden log level/target, then parse the configuration file,
2202 * which might change the log level to new settings. */
2204 saved_log_level
= m
->log_level_overridden
? log_get_max_level() : -1;
2205 saved_log_target
= m
->log_target_overridden
? log_get_target() : _LOG_TARGET_INVALID
;
2207 (void) parse_configuration(saved_rlimit_nofile
, saved_rlimit_memlock
);
2209 set_manager_defaults(m
);
2210 set_manager_settings(m
);
2212 update_cpu_affinity(false);
2213 update_numa_policy(false);
2215 if (saved_log_level
>= 0)
2216 manager_override_log_level(m
, saved_log_level
);
2217 if (saved_log_target
>= 0)
2218 manager_override_log_target(m
, saved_log_target
);
2220 if (manager_reload(m
) < 0)
2221 /* Reloading failed before the point of no return.
2222 * Let's continue running as if nothing happened. */
2223 m
->objective
= MANAGER_OK
;
2225 log_info("Reloading finished in " USEC_FMT
" ms.",
2226 usec_sub_unsigned(now(CLOCK_MONOTONIC
), m
->timestamps
[MANAGER_TIMESTAMP_UNITS_LOAD
].monotonic
) / USEC_PER_MSEC
);
2231 case MANAGER_REEXECUTE
:
2233 manager_send_reloading(m
); /* From the perspective of the manager calling us this is
2234 * pretty much the same as a reload */
2236 r
= prepare_reexecute(m
, &arg_serialization
, ret_fds
, false);
2238 *ret_error_message
= "Failed to prepare for reexecution";
2242 log_notice("Reexecuting.");
2244 *ret_retval
= EXIT_FAILURE
;
2245 *ret_switch_root_dir
= *ret_switch_root_init
= NULL
;
2249 case MANAGER_SWITCH_ROOT
:
2251 manager_send_reloading(m
); /* From the perspective of the manager calling us this is
2252 * pretty much the same as a reload */
2254 manager_set_switching_root(m
, true);
2256 if (!m
->switch_root_init
) {
2257 r
= prepare_reexecute(m
, &arg_serialization
, ret_fds
, true);
2259 *ret_error_message
= "Failed to prepare for reexecution";
2265 log_notice("Switching root.");
2267 *ret_retval
= EXIT_FAILURE
;
2269 /* Steal the switch root parameters */
2270 *ret_switch_root_dir
= TAKE_PTR(m
->switch_root
);
2271 *ret_switch_root_init
= TAKE_PTR(m
->switch_root_init
);
2275 case MANAGER_SOFT_REBOOT
:
2276 manager_send_reloading(m
);
2277 manager_set_switching_root(m
, true);
2279 r
= prepare_reexecute(m
, &arg_serialization
, ret_fds
, /* switching_root= */ true);
2281 *ret_error_message
= "Failed to prepare for reexecution";
2285 log_notice("Soft-rebooting.");
2287 *ret_retval
= EXIT_FAILURE
;
2288 *ret_switch_root_dir
= TAKE_PTR(m
->switch_root
);
2289 *ret_switch_root_init
= NULL
;
2294 if (MANAGER_IS_USER(m
)) {
2297 *ret_retval
= m
->return_value
;
2299 *ret_switch_root_dir
= *ret_switch_root_init
= NULL
;
2305 case MANAGER_REBOOT
:
2306 case MANAGER_POWEROFF
:
2308 case MANAGER_KEXEC
: {
2309 log_notice("Shutting down.");
2311 *ret_retval
= m
->return_value
;
2313 *ret_switch_root_dir
= *ret_switch_root_init
= NULL
;
2319 assert_not_reached();
2324 static void log_execution_mode(bool *ret_first_boot
) {
2325 bool first_boot
= false;
2328 assert(ret_first_boot
);
2330 switch (arg_runtime_scope
) {
2332 case RUNTIME_SCOPE_SYSTEM
: {
2336 log_info("systemd " GIT_VERSION
" running in %ssystem mode (%s)",
2337 arg_action
== ACTION_TEST
? "test " : "",
2340 v
= detect_virtualization();
2342 log_info("Detected virtualization %s.", virtualization_to_string(v
));
2344 v
= detect_confidential_virtualization();
2346 log_info("Detected confidential virtualization %s.", confidential_virtualization_to_string(v
));
2348 log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
2351 log_info("Running in initrd.");
2353 _cleanup_free_
char *id_text
= NULL
;
2355 /* Let's check whether we are in first boot. First, check if an override was
2356 * specified on the kernel command line. If yes, we honour that. */
2358 r
= proc_cmdline_get_bool("systemd.condition_first_boot", /* flags = */ 0, &first_boot
);
2360 log_debug_errno(r
, "Failed to parse systemd.condition_first_boot= kernel command line argument, ignoring: %m");
2363 log_full(first_boot
? LOG_INFO
: LOG_DEBUG
,
2364 "Kernel command line argument says we are %s first boot.",
2365 first_boot
? "in" : "not in");
2367 /* Second, perform autodetection. We use /etc/machine-id as flag file for
2368 * this: If it is missing or contains the value "uninitialized", this is the
2369 * first boot. In other cases, it is not. This allows container managers and
2370 * installers to provision a couple of files in /etc but still permit the
2371 * first-boot initialization to occur. If the container manager wants to
2372 * provision the machine ID it should pass $container_uuid to PID 1. */
2374 r
= read_one_line_file("/etc/machine-id", &id_text
);
2375 if (r
< 0 || streq(id_text
, "uninitialized")) {
2376 if (r
< 0 && r
!= -ENOENT
)
2377 log_warning_errno(r
, "Unexpected error while reading /etc/machine-id, assuming first boot: %m");
2380 log_info("Detected first boot.");
2382 log_debug("Detected initialized system, this is not the first boot.");
2386 assert_se(uname(&uts
) >= 0);
2388 if (strverscmp_improved(uts
.release
, KERNEL_BASELINE_VERSION
) < 0)
2389 log_warning("Warning! Reported kernel version %s is older than systemd's required baseline kernel version %s. "
2390 "Your mileage may vary.", uts
.release
, KERNEL_BASELINE_VERSION
);
2392 log_debug("Kernel version %s, our baseline is %s", uts
.release
, KERNEL_BASELINE_VERSION
);
2397 case RUNTIME_SCOPE_USER
:
2398 if (DEBUG_LOGGING
) {
2399 _cleanup_free_
char *t
= NULL
;
2401 t
= uid_to_name(getuid());
2402 log_debug("systemd " GIT_VERSION
" running in %suser mode for user " UID_FMT
"/%s. (%s)",
2403 arg_action
== ACTION_TEST
? " test" : "",
2404 getuid(), strna(t
), systemd_features
);
2410 assert_not_reached();
2413 *ret_first_boot
= first_boot
;
2416 static int initialize_runtime(
2419 struct rlimit
*saved_rlimit_nofile
,
2420 struct rlimit
*saved_rlimit_memlock
,
2421 uint64_t *saved_ambient_set
,
2422 const char **ret_error_message
) {
2426 assert(saved_ambient_set
);
2427 assert(ret_error_message
);
2429 /* Sets up various runtime parameters. Many of these initializations are conditionalized:
2431 * - Some only apply to --system instances
2432 * - Some only apply to --user instances
2433 * - Some only apply when we first start up, but not when we reexecute
2436 if (arg_action
!= ACTION_RUN
)
2439 update_cpu_affinity(skip_setup
);
2440 update_numa_policy(skip_setup
);
2442 switch (arg_runtime_scope
) {
2444 case RUNTIME_SCOPE_SYSTEM
:
2445 /* Make sure we leave a core dump without panicking the kernel. */
2446 install_crash_handler();
2449 /* Check that /usr/ is either on the same file system as / or mounted already. */
2450 if (dir_is_empty("/usr", /* ignore_hidden_or_backup = */ true) > 0) {
2451 *ret_error_message
= "Refusing to run in unsupported environment where /usr/ is not populated";
2455 /* Pull credentials from various sources into a common credential directory (we do
2456 * this here, before setting up the machine ID, so that we can use credential info
2457 * for setting up the machine ID) */
2458 (void) import_credentials();
2460 (void) os_release_status();
2461 (void) machine_id_setup(/* root = */ NULL
, arg_machine_id
,
2462 (first_boot
? MACHINE_ID_SETUP_FORCE_TRANSIENT
: 0) |
2463 (arg_machine_id_from_firmware
? MACHINE_ID_SETUP_FORCE_FIRMWARE
: 0),
2465 (void) hostname_setup(/* really = */ true);
2466 (void) loopback_setup();
2468 bump_unix_max_dgram_qlen();
2469 bump_file_max_and_nr_open();
2471 write_container_id();
2473 (void) write_boot_or_shutdown_osc("boot");
2475 /* Copy os-release to the propagate directory, so that we update it for services running
2476 * under RootDirectory=/RootImage= when we do a soft reboot. */
2477 r
= setup_os_release(RUNTIME_SCOPE_SYSTEM
);
2479 log_warning_errno(r
, "Failed to copy os-release for propagation, ignoring: %m");
2482 r
= watchdog_set_device(arg_watchdog_device
);
2484 log_warning_errno(r
, "Failed to set watchdog device to %s, ignoring: %m", arg_watchdog_device
);
2486 if (!cap_test_all(arg_capability_bounding_set
)) {
2487 r
= capability_bounding_set_drop_usermode(arg_capability_bounding_set
);
2489 *ret_error_message
= "Failed to drop capability bounding set of usermode helpers";
2490 return log_struct_errno(LOG_EMERG
, r
,
2491 LOG_MESSAGE("Failed to drop capability bounding set of usermode helpers: %m"),
2492 LOG_MESSAGE_ID(SD_MESSAGE_CORE_CAPABILITY_BOUNDING_USER_STR
));
2495 r
= capability_bounding_set_drop(arg_capability_bounding_set
, true);
2497 *ret_error_message
= "Failed to drop capability bounding set";
2498 return log_struct_errno(LOG_EMERG
, r
,
2499 LOG_MESSAGE("Failed to drop capability bounding set: %m"),
2500 LOG_MESSAGE_ID(SD_MESSAGE_CORE_CAPABILITY_BOUNDING_STR
));
2504 if (arg_no_new_privs
) {
2505 if (prctl(PR_SET_NO_NEW_PRIVS
, 1, 0, 0, 0) < 0) {
2506 *ret_error_message
= "Failed to disable new privileges";
2507 return log_struct_errno(LOG_EMERG
, errno
,
2508 LOG_MESSAGE("Failed to disable new privileges: %m"),
2509 LOG_MESSAGE_ID(SD_MESSAGE_CORE_DISABLE_PRIVILEGES_STR
));
2515 case RUNTIME_SCOPE_USER
: {
2516 _cleanup_free_
char *p
= NULL
;
2518 /* Create the runtime directory and place the inaccessible device nodes there, if we run in
2519 * user mode. In system mode mount_setup() already did that. */
2521 r
= xdg_user_runtime_dir("/systemd", &p
);
2523 *ret_error_message
= "$XDG_RUNTIME_DIR is not set";
2524 return log_struct_errno(LOG_EMERG
, r
,
2525 LOG_MESSAGE("Failed to determine $XDG_RUNTIME_DIR path: %m"),
2526 LOG_MESSAGE_ID(SD_MESSAGE_CORE_NO_XDGDIR_PATH_STR
));
2530 (void) mkdir_p_label(p
, 0755);
2531 (void) make_inaccessible_nodes(p
, UID_INVALID
, GID_INVALID
);
2533 r
= setup_os_release(RUNTIME_SCOPE_USER
);
2535 log_warning_errno(r
, "Failed to copy os-release for propagation, ignoring: %m");
2542 assert_not_reached();
2545 /* The two operations on the ambient set are meant for a user serssion manager. They do not affect
2546 * system manager operation, because by default it starts with an empty ambient set.
2548 * Preserve the ambient set for later use with sd-executor processes. */
2549 r
= capability_get_ambient(saved_ambient_set
);
2551 log_warning_errno(r
, "Failed to save ambient capabilities, ignoring: %m");
2553 /* Clear ambient capabilities, so services do not inherit them implicitly. Dropping them does
2554 * not affect the permitted and effective sets which are important for the manager itself to
2556 r
= capability_ambient_set_apply(0, /* also_inherit= */ false);
2558 log_warning_errno(r
, "Failed to reset ambient capability set, ignoring: %m");
2560 if (arg_timer_slack_nsec
!= NSEC_INFINITY
)
2561 if (prctl(PR_SET_TIMERSLACK
, arg_timer_slack_nsec
) < 0)
2562 log_warning_errno(errno
, "Failed to adjust timer slack, ignoring: %m");
2564 if (arg_syscall_archs
) {
2565 r
= enforce_syscall_archs(arg_syscall_archs
);
2567 *ret_error_message
= "Failed to set syscall architectures";
2572 r
= make_reaper_process(true);
2574 log_warning_errno(r
, "Failed to make us a subreaper, ignoring: %m");
2576 /* Bump up RLIMIT_NOFILE for systemd itself */
2577 (void) bump_rlimit_nofile(saved_rlimit_nofile
);
2578 (void) bump_rlimit_memlock(saved_rlimit_memlock
);
2583 static int do_queue_default_job(
2585 const char **ret_error_message
) {
2587 _cleanup_(sd_bus_error_free
) sd_bus_error error
= SD_BUS_ERROR_NULL
;
2593 if (arg_default_unit
)
2594 unit
= arg_default_unit
;
2595 else if (in_initrd())
2596 unit
= SPECIAL_INITRD_TARGET
;
2598 unit
= SPECIAL_DEFAULT_TARGET
;
2600 log_debug("Activating default unit: %s", unit
);
2602 r
= manager_load_startable_unit_or_warn(m
, unit
, NULL
, &target
);
2603 if (r
< 0 && in_initrd() && !arg_default_unit
) {
2604 /* Fall back to default.target, which we used to always use by default. Only do this if no
2605 * explicit configuration was given. */
2607 log_info("Falling back to %s.", SPECIAL_DEFAULT_TARGET
);
2609 r
= manager_load_startable_unit_or_warn(m
, SPECIAL_DEFAULT_TARGET
, NULL
, &target
);
2612 log_info("Falling back to %s.", SPECIAL_RESCUE_TARGET
);
2614 r
= manager_load_startable_unit_or_warn(m
, SPECIAL_RESCUE_TARGET
, NULL
, &target
);
2616 *ret_error_message
= r
== -ERFKILL
? SPECIAL_RESCUE_TARGET
" masked"
2617 : "Failed to load " SPECIAL_RESCUE_TARGET
;
2622 assert(target
->load_state
== UNIT_LOADED
);
2624 r
= manager_add_job(m
, JOB_START
, target
, JOB_ISOLATE
, &error
, &job
);
2626 log_debug_errno(r
, "Default target could not be isolated, starting instead: %s", bus_error_message(&error
, r
));
2628 sd_bus_error_free(&error
);
2630 r
= manager_add_job(m
, JOB_START
, target
, JOB_REPLACE
, &error
, &job
);
2632 *ret_error_message
= "Failed to start default target";
2633 return log_struct_errno(LOG_EMERG
, r
,
2634 LOG_MESSAGE("Failed to start default target: %s", bus_error_message(&error
, r
)),
2635 LOG_MESSAGE_ID(SD_MESSAGE_CORE_START_TARGET_FAILED_STR
));
2639 *ret_error_message
= "Failed to isolate default target";
2640 return log_struct_errno(LOG_EMERG
, r
,
2641 LOG_MESSAGE("Failed to isolate default target: %s", bus_error_message(&error
, r
)),
2642 LOG_MESSAGE_ID(SD_MESSAGE_CORE_ISOLATE_TARGET_FAILED_STR
));
2644 log_info("Queued %s job for default target %s.",
2645 job_type_to_string(job
->type
),
2646 unit_status_string(job
->unit
, NULL
));
2648 m
->default_unit_job_id
= job
->id
;
2653 static void save_rlimits(struct rlimit
*saved_rlimit_nofile
,
2654 struct rlimit
*saved_rlimit_memlock
) {
2656 assert(saved_rlimit_nofile
);
2657 assert(saved_rlimit_memlock
);
2659 if (getrlimit(RLIMIT_NOFILE
, saved_rlimit_nofile
) < 0)
2660 log_warning_errno(errno
, "Reading RLIMIT_NOFILE failed, ignoring: %m");
2662 if (getrlimit(RLIMIT_MEMLOCK
, saved_rlimit_memlock
) < 0)
2663 log_warning_errno(errno
, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
2666 static void fallback_rlimit_nofile(const struct rlimit
*saved_rlimit_nofile
) {
2669 if (arg_defaults
.rlimit
[RLIMIT_NOFILE
])
2672 /* Make sure forked processes get limits based on the original kernel setting */
2674 rl
= newdup(struct rlimit
, saved_rlimit_nofile
, 1);
2680 /* Bump the hard limit for system services to a substantially higher value. The default
2681 * hard limit current kernels set is pretty low (4K), mostly for historical
2682 * reasons. According to kernel developers, the fd handling in recent kernels has been
2683 * optimized substantially enough, so that we can bump the limit now, without paying too
2684 * high a price in memory or performance. Note however that we only bump the hard limit,
2685 * not the soft limit. That's because select() works the way it works, and chokes on fds
2686 * >= 1024. If we'd bump the soft limit globally, it might accidentally happen to
2687 * unexpecting programs that they get fds higher than what they can process using
2688 * select(). By only bumping the hard limit but leaving the low limit as it is we avoid
2689 * this pitfall: programs that are written by folks aware of the select() problem in mind
2690 * (and thus use poll()/epoll instead of select(), the way everybody should) can
2691 * explicitly opt into high fds by bumping their soft limit beyond 1024, to the hard limit
2693 if (arg_runtime_scope
== RUNTIME_SCOPE_SYSTEM
) {
2696 /* Get the underlying absolute limit the kernel enforces */
2697 nr
= read_nr_open();
2699 rl
->rlim_max
= MIN((rlim_t
) nr
, MAX(rl
->rlim_max
, (rlim_t
) HIGH_RLIMIT_NOFILE
));
2702 /* If for some reason we were invoked with a soft limit above 1024 (which should never
2703 * happen!, but who knows what we get passed in from pam_limit when invoked as --user
2704 * instance), then lower what we pass on to not confuse our children */
2705 rl
->rlim_cur
= MIN(rl
->rlim_cur
, (rlim_t
) FD_SETSIZE
);
2707 arg_defaults
.rlimit
[RLIMIT_NOFILE
] = rl
;
2710 static void fallback_rlimit_memlock(const struct rlimit
*saved_rlimit_memlock
) {
2713 /* Pass the original value down to invoked processes */
2715 if (arg_defaults
.rlimit
[RLIMIT_MEMLOCK
])
2718 rl
= newdup(struct rlimit
, saved_rlimit_memlock
, 1);
2724 if (arg_runtime_scope
== RUNTIME_SCOPE_SYSTEM
) {
2725 /* Raise the default limit to 8M also on old kernels and in containers (8M is the kernel
2726 * default for this since kernel 5.16) */
2727 rl
->rlim_max
= MAX(rl
->rlim_max
, (rlim_t
) DEFAULT_RLIMIT_MEMLOCK
);
2728 rl
->rlim_cur
= MAX(rl
->rlim_cur
, (rlim_t
) DEFAULT_RLIMIT_MEMLOCK
);
2731 arg_defaults
.rlimit
[RLIMIT_MEMLOCK
] = rl
;
2734 static void setenv_manager_environment(void) {
2737 STRV_FOREACH(p
, arg_manager_environment
) {
2738 log_debug("Setting '%s' in our own environment.", *p
);
2740 r
= putenv_dup(*p
, true);
2742 log_warning_errno(r
, "Failed to setenv \"%s\", ignoring: %m", *p
);
2746 static void reset_arguments(void) {
2747 /* Frees/resets arg_* variables, with a few exceptions commented below. */
2749 arg_default_unit
= mfree(arg_default_unit
);
2751 /* arg_runtime_scope — ignore */
2753 arg_dump_core
= true;
2754 arg_crash_chvt
= -1;
2755 arg_crash_shell
= false;
2756 arg_crash_action
= CRASH_FREEZE
;
2757 arg_confirm_spawn
= mfree(arg_confirm_spawn
);
2758 arg_show_status
= _SHOW_STATUS_INVALID
;
2759 arg_status_unit_format
= STATUS_UNIT_FORMAT_DEFAULT
;
2760 arg_switched_root
= false;
2761 arg_pager_flags
= 0;
2762 arg_service_watchdogs
= true;
2764 unit_defaults_done(&arg_defaults
);
2765 unit_defaults_init(&arg_defaults
, arg_runtime_scope
);
2767 arg_runtime_watchdog
= 0;
2768 arg_reboot_watchdog
= 10 * USEC_PER_MINUTE
;
2769 arg_kexec_watchdog
= 0;
2770 arg_pretimeout_watchdog
= 0;
2771 arg_early_core_pattern
= mfree(arg_early_core_pattern
);
2772 arg_watchdog_device
= mfree(arg_watchdog_device
);
2773 arg_watchdog_pretimeout_governor
= mfree(arg_watchdog_pretimeout_governor
);
2775 arg_default_environment
= strv_free(arg_default_environment
);
2776 arg_manager_environment
= strv_free(arg_manager_environment
);
2778 arg_capability_bounding_set
= CAP_MASK_UNSET
;
2779 arg_no_new_privs
= false;
2780 arg_protect_system
= -1;
2781 arg_timer_slack_nsec
= NSEC_INFINITY
;
2783 arg_syscall_archs
= set_free(arg_syscall_archs
);
2785 /* arg_serialization — ignore */
2787 arg_machine_id
= (sd_id128_t
) {};
2788 arg_cad_burst_action
= EMERGENCY_ACTION_REBOOT_FORCE
;
2790 cpu_set_done(&arg_cpu_affinity
);
2791 numa_policy_reset(&arg_numa_policy
);
2793 arg_random_seed
= mfree(arg_random_seed
);
2794 arg_random_seed_size
= 0;
2797 arg_reload_limit_interval_sec
= 0;
2798 arg_reload_limit_burst
= 0;
2801 static void determine_default_oom_score_adjust(void) {
2804 /* Run our services at slightly higher OOM score than ourselves. But let's be conservative here, and
2805 * do this only if we don't run as root (i.e. only if we are run in user mode, for an unprivileged
2808 if (arg_defaults
.oom_score_adjust_set
)
2814 r
= get_oom_score_adjust(&a
);
2816 return (void) log_warning_errno(r
, "Failed to determine current OOM score adjustment value, ignoring: %m");
2818 assert_cc(100 <= OOM_SCORE_ADJ_MAX
);
2819 b
= a
>= OOM_SCORE_ADJ_MAX
- 100 ? OOM_SCORE_ADJ_MAX
: a
+ 100;
2824 arg_defaults
.oom_score_adjust
= b
;
2825 arg_defaults
.oom_score_adjust_set
= true;
2828 static int parse_configuration(const struct rlimit
*saved_rlimit_nofile
,
2829 const struct rlimit
*saved_rlimit_memlock
) {
2832 assert(saved_rlimit_nofile
);
2833 assert(saved_rlimit_memlock
);
2835 /* Assign configuration defaults */
2838 r
= parse_config_file();
2840 log_warning_errno(r
, "Failed to parse config file, ignoring: %m");
2842 if (arg_runtime_scope
== RUNTIME_SCOPE_SYSTEM
) {
2843 r
= proc_cmdline_parse(parse_proc_cmdline_item
, NULL
, 0);
2845 log_warning_errno(r
, "Failed to parse kernel command line, ignoring: %m");
2848 /* Initialize some default rlimits for services if they haven't been configured */
2849 fallback_rlimit_nofile(saved_rlimit_nofile
);
2850 fallback_rlimit_memlock(saved_rlimit_memlock
);
2852 /* Note that this also parses bits from the kernel command line, including "debug". */
2853 log_parse_environment();
2855 /* Initialize the show status setting if it hasn't been set explicitly yet */
2856 if (arg_show_status
== _SHOW_STATUS_INVALID
)
2857 arg_show_status
= SHOW_STATUS_YES
;
2859 /* Slightly raise the OOM score for our services if we are running for unprivileged users. */
2860 determine_default_oom_score_adjust();
2862 /* Push variables into the manager environment block */
2863 setenv_manager_environment();
2865 /* Parse log environment variables again to take into account any new environment variables. */
2866 log_parse_environment();
2871 static int safety_checks(void) {
2873 if (getpid_cached() == 1 &&
2874 arg_action
!= ACTION_RUN
)
2875 return log_error_errno(SYNTHETIC_ERRNO(EPERM
),
2876 "Unsupported execution mode while PID 1.");
2878 if (getpid_cached() == 1 &&
2879 arg_runtime_scope
== RUNTIME_SCOPE_USER
)
2880 return log_error_errno(SYNTHETIC_ERRNO(EPERM
),
2881 "Can't run --user mode as PID 1.");
2883 if (arg_action
== ACTION_RUN
&&
2884 arg_runtime_scope
== RUNTIME_SCOPE_SYSTEM
&&
2885 getpid_cached() != 1)
2886 return log_error_errno(SYNTHETIC_ERRNO(EPERM
),
2887 "Can't run system mode unless PID 1.");
2889 if (arg_action
== ACTION_TEST
&&
2891 return log_error_errno(SYNTHETIC_ERRNO(EPERM
),
2892 "Don't run test mode as root.");
2894 switch (arg_runtime_scope
) {
2896 case RUNTIME_SCOPE_USER
:
2898 if (arg_action
== ACTION_RUN
&&
2900 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP
),
2901 "Trying to run as user instance, but the system has not been booted with systemd.");
2903 if (arg_action
== ACTION_RUN
&&
2904 !getenv("XDG_RUNTIME_DIR"))
2905 return log_error_errno(SYNTHETIC_ERRNO(EUNATCH
),
2906 "Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
2910 case RUNTIME_SCOPE_SYSTEM
:
2911 if (arg_action
== ACTION_RUN
&&
2912 running_in_chroot() > 0)
2913 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP
),
2914 "Cannot be run in a chroot() environment.");
2918 assert_not_reached();
2924 static int initialize_security(
2925 bool *loaded_policy
,
2926 dual_timestamp
*security_start_timestamp
,
2927 dual_timestamp
*security_finish_timestamp
,
2928 const char **ret_error_message
) {
2932 assert(loaded_policy
);
2933 assert(security_start_timestamp
);
2934 assert(security_finish_timestamp
);
2935 assert(ret_error_message
);
2937 dual_timestamp_now(security_start_timestamp
);
2939 r
= mac_selinux_setup(loaded_policy
);
2941 *ret_error_message
= "Failed to load SELinux policy";
2945 r
= mac_smack_setup(loaded_policy
);
2947 *ret_error_message
= "Failed to load SMACK policy";
2951 r
= mac_apparmor_setup();
2953 *ret_error_message
= "Failed to load AppArmor policy";
2959 *ret_error_message
= "Failed to load IMA policy";
2965 *ret_error_message
= "Failed to load IPE policy";
2969 dual_timestamp_now(security_finish_timestamp
);
2973 static int collect_fds(FDSet
**ret_fds
, const char **ret_error_message
) {
2977 assert(ret_error_message
);
2979 /* Pick up all fds passed to us. We apply a filter here: we only take the fds that have O_CLOEXEC
2980 * off. All fds passed via execve() to us must have O_CLOEXEC off, and our own code and dependencies
2981 * should be clean enough to set O_CLOEXEC universally. Thus checking the bit should be a safe
2982 * mechanism to distinguish passed in fds from our own.
2984 * Why bother? Some subsystems we initialize early, specifically selinux might keep fds open in our
2985 * process behind our back. We should not take possession of that (and then accidentally close
2986 * it). SELinux thankfully sets O_CLOEXEC on its fds, so this test should work. */
2987 r
= fdset_new_fill(/* filter_cloexec= */ 0, ret_fds
);
2989 *ret_error_message
= "Failed to allocate fd set";
2990 return log_struct_errno(LOG_EMERG
, r
,
2991 LOG_MESSAGE("Failed to allocate fd set: %m"),
2992 LOG_MESSAGE_ID(SD_MESSAGE_CORE_FD_SET_FAILED_STR
));
2995 /* The serialization fd should have O_CLOEXEC turned on already, let's verify that we didn't pick it up here */
2996 assert_se(!arg_serialization
|| !fdset_contains(*ret_fds
, fileno(arg_serialization
)));
3001 static void setup_console_terminal(bool skip_setup
) {
3003 if (arg_runtime_scope
!= RUNTIME_SCOPE_SYSTEM
)
3006 /* If we are init, we connect stdin/stdout/stderr to /dev/null and make sure we don't have a
3007 * controlling tty. */
3008 terminal_detach_session();
3010 /* Reset the console, but only if this is really init and we are freshly booted */
3012 (void) console_setup();
3015 static bool early_skip_setup_check(int argc
, char *argv
[]) {
3016 bool found_deserialize
= false;
3018 /* Determine if this is a reexecution or normal bootup. We do the full command line parsing much
3019 * later, so let's just have a quick peek here. Note that if we have switched root, do all the
3020 * special setup things anyway, even if in that case we also do deserialization. */
3022 for (int i
= 1; i
< argc
; i
++)
3023 if (streq(argv
[i
], "--switched-root"))
3024 return false; /* If we switched root, don't skip the setup. */
3025 else if (startswith(argv
[i
], "--deserialize=") || streq(argv
[i
], "--deserialize"))
3026 found_deserialize
= true;
3028 return found_deserialize
; /* When we are deserializing, then we are reexecuting, hence avoid the extensive setup */
3031 static int save_env(void) {
3034 l
= strv_copy(environ
);
3038 strv_free_and_replace(saved_env
, l
);
3042 int main(int argc
, char *argv
[]) {
3044 initrd_timestamp
= DUAL_TIMESTAMP_NULL
,
3045 userspace_timestamp
= DUAL_TIMESTAMP_NULL
,
3046 kernel_timestamp
= DUAL_TIMESTAMP_NULL
,
3047 security_start_timestamp
= DUAL_TIMESTAMP_NULL
,
3048 security_finish_timestamp
= DUAL_TIMESTAMP_NULL
;
3049 struct rlimit saved_rlimit_nofile
= RLIMIT_MAKE_CONST(0),
3050 saved_rlimit_memlock
= RLIMIT_MAKE_CONST(RLIM_INFINITY
); /* The original rlimits we passed
3051 * in. Note we use different values
3052 * for the two that indicate whether
3053 * these fields are initialized! */
3054 bool skip_setup
, loaded_policy
= false, queue_default_job
= false, first_boot
= false;
3055 char *switch_root_dir
= NULL
, *switch_root_init
= NULL
;
3056 usec_t before_startup
, after_startup
;
3057 static char systemd
[] = "systemd";
3058 const char *error_message
= NULL
;
3059 uint64_t saved_ambient_set
= 0;
3060 int r
, retval
= EXIT_FAILURE
;
3064 assert_se(argc
> 0 && !isempty(argv
[0]));
3066 /* SysV compatibility: redirect init → telinit */
3067 redirect_telinit(argc
, argv
);
3069 /* Take timestamps early on */
3070 dual_timestamp_from_monotonic(&kernel_timestamp
, 0);
3071 dual_timestamp_now(&userspace_timestamp
);
3073 /* Figure out whether we need to do initialize the system, or if we already did that because we are
3075 skip_setup
= early_skip_setup_check(argc
, argv
);
3077 /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent
3078 * reexecution we are then called 'systemd'. That is confusing, hence let's call us systemd
3080 program_invocation_short_name
= systemd
;
3081 (void) prctl(PR_SET_NAME
, systemd
);
3083 /* Save the original command line */
3084 save_argc_argv(argc
, argv
);
3086 /* Save the original environment as we might need to restore it if we're requested to execute another
3087 * system manager later. */
3090 error_message
= "Failed to copy environment block";
3094 /* Make sure that if the user says "syslog" we actually log to the journal. */
3095 log_set_upgrade_syslog_to_journal(true);
3097 if (getpid_cached() == 1) {
3098 /* When we run as PID 1 force system mode */
3099 arg_runtime_scope
= RUNTIME_SCOPE_SYSTEM
;
3101 /* Disable the umask logic */
3104 /* Make sure that at least initially we do not ever log to journald/syslogd, because it might
3105 * not be activated yet (even though the log socket for it exists). */
3106 log_set_prohibit_ipc(true);
3108 /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This
3109 * is important so that we never end up logging to any foreign stderr, for example if we have
3110 * to log in a child process right before execve()'ing the actual binary, at a point in time
3111 * where socket activation stderr/stdout area already set up. */
3112 log_set_always_reopen_console(true);
3114 if (detect_container() <= 0) {
3116 /* Running outside of a container as PID 1 */
3117 log_set_target_and_open(LOG_TARGET_KMSG
);
3120 initrd_timestamp
= userspace_timestamp
;
3123 r
= mount_setup_early();
3125 error_message
= "Failed to mount early API filesystems";
3130 /* We might have just mounted /proc, so let's try to parse the kernel
3131 * command line log arguments immediately. */
3132 log_parse_environment();
3134 /* Let's open the log backend a second time, in case the first time didn't
3135 * work. Quite possibly we have mounted /dev just now, so /dev/kmsg became
3136 * available, and it previously wasn't. */
3140 disable_printk_ratelimit();
3142 r
= initialize_security(
3144 &security_start_timestamp
,
3145 &security_finish_timestamp
,
3153 error_message
= "Failed to initialize MAC support";
3158 initialize_clock_timewarp();
3160 clock_apply_epoch(/* allow_backwards= */ !skip_setup
);
3162 /* Set the default for later on, but don't actually open the logs like this for
3163 * now. Note that if we are transitioning from the initrd there might still be
3164 * journal fd open, and we shouldn't attempt opening that before we parsed
3165 * /proc/cmdline which might redirect output elsewhere. */
3166 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG
);
3169 /* Running inside a container, as PID 1 */
3170 log_set_target_and_open(LOG_TARGET_CONSOLE
);
3172 /* For later on, see above... */
3173 log_set_target(LOG_TARGET_JOURNAL
);
3175 /* clear the kernel timestamp, because we are in a container */
3176 kernel_timestamp
= DUAL_TIMESTAMP_NULL
;
3179 initialize_coredump(skip_setup
);
3181 r
= fixup_environment();
3183 log_struct_errno(LOG_EMERG
, r
,
3184 LOG_MESSAGE("Failed to fix up PID 1 environment: %m"),
3185 LOG_MESSAGE_ID(SD_MESSAGE_CORE_PID1_ENVIRONMENT_STR
));
3186 error_message
= "Failed to fix up PID1 environment";
3190 /* Try to figure out if we can use colors with the console. No need to do that for user
3191 * instances since they never log into the console. */
3192 log_show_color(colors_enabled());
3194 r
= make_null_stdio();
3196 log_warning_errno(r
, "Failed to redirect standard streams to /dev/null, ignoring: %m");
3198 /* Load the kernel modules early. */
3200 (void) kmod_setup();
3202 /* Mount /proc, /sys and friends, so that /proc/cmdline and /proc/$PID/fd is available. */
3203 r
= mount_setup(loaded_policy
, skip_setup
);
3205 error_message
= "Failed to mount API filesystems";
3209 /* The efivarfs is now mounted, let's lock down the system token. */
3210 lock_down_efi_variables();
3212 /* Running as user instance */
3213 arg_runtime_scope
= RUNTIME_SCOPE_USER
;
3214 log_set_always_reopen_console(true);
3215 log_set_target_and_open(LOG_TARGET_AUTO
);
3217 /* clear the kernel timestamp, because we are not PID 1 */
3218 kernel_timestamp
= DUAL_TIMESTAMP_NULL
;
3222 error_message
= "Failed to initialize MAC support";
3227 /* Save the original RLIMIT_NOFILE/RLIMIT_MEMLOCK so that we can reset it later when
3228 * transitioning from the initrd to the main systemd or suchlike. */
3229 save_rlimits(&saved_rlimit_nofile
, &saved_rlimit_memlock
);
3231 /* Reset all signal handlers. */
3232 (void) reset_all_signal_handlers();
3233 (void) ignore_signals(SIGNALS_IGNORE
);
3235 (void) parse_configuration(&saved_rlimit_nofile
, &saved_rlimit_memlock
);
3237 r
= parse_argv(argc
, argv
);
3239 error_message
= "Failed to parse command line arguments";
3243 r
= safety_checks();
3247 if (IN_SET(arg_action
, ACTION_TEST
, ACTION_HELP
, ACTION_DUMP_CONFIGURATION_ITEMS
, ACTION_DUMP_BUS_PROPERTIES
, ACTION_BUS_INTROSPECT
))
3248 pager_open(arg_pager_flags
);
3250 if (arg_action
!= ACTION_RUN
)
3253 if (arg_action
== ACTION_HELP
) {
3254 retval
= help() < 0 ? EXIT_FAILURE
: EXIT_SUCCESS
;
3256 } else if (arg_action
== ACTION_VERSION
) {
3259 } else if (arg_action
== ACTION_DUMP_CONFIGURATION_ITEMS
) {
3260 unit_dump_config_items(stdout
);
3261 retval
= EXIT_SUCCESS
;
3263 } else if (arg_action
== ACTION_DUMP_BUS_PROPERTIES
) {
3264 dump_bus_properties(stdout
);
3265 retval
= EXIT_SUCCESS
;
3267 } else if (arg_action
== ACTION_BUS_INTROSPECT
) {
3268 r
= bus_manager_introspect_implementations(stdout
, arg_bus_introspect
);
3269 retval
= r
>= 0 ? EXIT_SUCCESS
: EXIT_FAILURE
;
3273 assert_se(IN_SET(arg_action
, ACTION_RUN
, ACTION_TEST
));
3275 /* Move out of the way, so that we won't block unmounts */
3276 assert_se(chdir("/") == 0);
3278 if (arg_action
== ACTION_RUN
) {
3280 /* Apply the systemd.clock_usec= kernel command line switch */
3281 apply_clock_update();
3283 /* Apply random seed from kernel command line */
3284 cmdline_take_random_seed();
3287 /* A core pattern might have been specified via the cmdline. */
3288 initialize_core_pattern(skip_setup
);
3290 /* Make /usr/ read-only */
3291 apply_protect_system(skip_setup
);
3293 /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
3296 /* Remember open file descriptors for later deserialization */
3297 r
= collect_fds(&fds
, &error_message
);
3301 /* Give up any control of the console, but make sure its initialized. */
3302 setup_console_terminal(skip_setup
);
3304 /* Open the logging devices, if possible and necessary */
3308 log_execution_mode(&first_boot
);
3310 r
= cg_has_legacy();
3312 error_message
= "Failed to check cgroup hierarchy";
3316 r
= log_full_errno(LOG_EMERG
, SYNTHETIC_ERRNO(EPROTO
),
3317 "Detected cgroup v1 hierarchy at /sys/fs/cgroup/, which is no longer supported by current version of systemd.\n"
3318 "Please instruct your initrd to mount cgroup v2 (unified) hierarchy,\n"
3319 "possibly by removing any stale kernel command line options, such as:\n"
3320 " systemd.legacy_systemd_cgroup_controller=1\n"
3321 " systemd.unified_cgroup_hierarchy=0");
3323 error_message
= "Detected unsupported legacy cgroup hierarchy, refusing execution";
3327 r
= initialize_runtime(skip_setup
,
3329 &saved_rlimit_nofile
,
3330 &saved_rlimit_memlock
,
3336 r
= manager_new(arg_runtime_scope
,
3337 arg_action
== ACTION_TEST
? MANAGER_TEST_FULL
: 0,
3340 log_struct_errno(LOG_EMERG
, r
,
3341 LOG_MESSAGE("Failed to allocate manager object: %m"),
3342 LOG_MESSAGE_ID(SD_MESSAGE_CORE_MANAGER_ALLOCATE_STR
));
3343 error_message
= "Failed to allocate manager object";
3347 m
->timestamps
[MANAGER_TIMESTAMP_KERNEL
] = kernel_timestamp
;
3348 m
->timestamps
[MANAGER_TIMESTAMP_INITRD
] = initrd_timestamp
;
3349 m
->timestamps
[MANAGER_TIMESTAMP_USERSPACE
] = userspace_timestamp
;
3350 m
->timestamps
[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_START
)] = security_start_timestamp
;
3351 m
->timestamps
[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_FINISH
)] = security_finish_timestamp
;
3353 m
->saved_ambient_set
= saved_ambient_set
;
3355 set_manager_defaults(m
);
3356 set_manager_settings(m
);
3357 manager_set_first_boot(m
, first_boot
);
3358 manager_set_switching_root(m
, arg_switched_root
);
3360 /* Remember whether we should queue the default job */
3361 queue_default_job
= !arg_serialization
|| arg_switched_root
;
3363 before_startup
= now(CLOCK_MONOTONIC
);
3365 r
= manager_startup(m
, arg_serialization
, fds
, /* root= */ NULL
);
3367 error_message
= "Failed to start up manager";
3371 /* This will close all file descriptors that were opened, but not claimed by any unit. */
3372 fds
= fdset_free(fds
);
3373 arg_serialization
= safe_fclose(arg_serialization
);
3375 if (queue_default_job
) {
3376 r
= do_queue_default_job(m
, &error_message
);
3381 after_startup
= now(CLOCK_MONOTONIC
);
3383 log_full(arg_action
== ACTION_TEST
? LOG_INFO
: LOG_DEBUG
,
3384 "Loaded units and determined initial transaction in %s.",
3385 FORMAT_TIMESPAN(after_startup
- before_startup
, 100 * USEC_PER_MSEC
));
3387 if (arg_action
== ACTION_TEST
) {
3388 manager_test_summary(m
);
3389 retval
= EXIT_SUCCESS
;
3393 r
= invoke_main_loop(m
,
3394 &saved_rlimit_nofile
,
3395 &saved_rlimit_memlock
,
3401 /* MANAGER_OK and MANAGER_RELOAD are not expected here. */
3402 assert(r
< 0 || IN_SET(r
, MANAGER_REEXECUTE
, MANAGER_EXIT
) ||
3403 (arg_runtime_scope
== RUNTIME_SCOPE_SYSTEM
&&
3404 IN_SET(r
, MANAGER_REBOOT
,
3405 MANAGER_SOFT_REBOOT
,
3409 MANAGER_SWITCH_ROOT
)));
3415 arg_reboot_watchdog
= manager_get_watchdog(m
, WATCHDOG_REBOOT
);
3416 arg_kexec_watchdog
= manager_get_watchdog(m
, WATCHDOG_KEXEC
);
3417 m
= manager_free(m
);
3420 mac_selinux_finish();
3422 if (IN_SET(r
, MANAGER_REEXECUTE
, MANAGER_SWITCH_ROOT
, MANAGER_SOFT_REBOOT
))
3425 &saved_rlimit_nofile
,
3426 &saved_rlimit_memlock
,
3431 &error_message
); /* This only returns if reexecution failed */
3433 arg_serialization
= safe_fclose(arg_serialization
);
3434 fds
= fdset_free(fds
);
3436 saved_env
= strv_free(saved_env
);
3438 #if HAVE_VALGRIND_VALGRIND_H
3439 /* If we are PID 1 and running under valgrind, then let's exit
3440 * here explicitly. valgrind will only generate nice output on
3441 * exit(), not on exec(), hence let's do the former not the
3443 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND
) {
3444 /* Cleanup watchdog_device strings for valgrind. We need them
3445 * in become_shutdown() so normally we cannot free them yet. */
3446 watchdog_free_device();
3452 #if HAS_FEATURE_ADDRESS_SANITIZER
3453 /* At this stage we most likely don't have stdio/stderr open, so the following
3454 * LSan check would not print any actionable information and would just crash
3455 * PID 1. To make this a bit more helpful, let's try to open /dev/console,
3456 * and if we succeed redirect LSan's report there. */
3457 if (getpid_cached() == 1) {
3458 _cleanup_close_
int tty_fd
= -EBADF
;
3460 tty_fd
= open_terminal("/dev/console", O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
3462 __sanitizer_set_report_fd((void*) (intptr_t) tty_fd
);
3464 __lsan_do_leak_check();
3469 (void) sd_notifyf(/* unset_environment= */ false,
3472 /* Try to invoke the shutdown binary unless we already failed.
3473 * If we failed above, we want to freeze after finishing cleanup. */
3474 if (arg_runtime_scope
== RUNTIME_SCOPE_SYSTEM
&&
3475 IN_SET(r
, MANAGER_EXIT
, MANAGER_REBOOT
, MANAGER_POWEROFF
, MANAGER_HALT
, MANAGER_KEXEC
)) {
3476 r
= become_shutdown(r
, retval
);
3477 log_error_errno(r
, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
3478 error_message
= "Failed to execute shutdown binary";
3481 /* This is primarily useful when running systemd in a VM, as it provides the user running the VM with
3482 * a mechanism to pick up systemd's exit status in the VM. */
3483 (void) sd_notifyf(/* unset_environment= */ false,
3484 "EXIT_STATUS=%i", retval
);
3486 watchdog_free_device();
3487 arg_watchdog_device
= mfree(arg_watchdog_device
);
3489 if (getpid_cached() == 1) {
3491 manager_status_printf(NULL
, STATUS_TYPE_EMERGENCY
,
3492 ANSI_HIGHLIGHT_RED
"!!!!!!" ANSI_NORMAL
,
3493 "%s.", error_message
);
3494 freeze_or_exit_or_reboot();