1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
10 #include <sys/utsname.h>
13 #if HAVE_VALGRIND_VALGRIND_H
14 # include <valgrind/valgrind.h>
18 #include "sd-daemon.h"
19 #include "sd-messages.h"
21 #include "alloc-util.h"
22 #include "apparmor-setup.h"
23 #include "architecture.h"
24 #include "argv-util.h"
26 #include "bus-error.h"
27 #include "capability-util.h"
28 #include "cgroup-setup.h"
30 #include "clock-util.h"
31 #include "clock-warp.h"
32 #include "conf-parser.h"
33 #include "confidential-virt.h"
34 #include "constants.h"
36 #include "coredump-util.h"
37 #include "cpu-set-util.h"
38 #include "crash-handler.h"
40 #include "dbus-manager.h"
41 #include "dev-setup.h"
42 #include "efi-random.h"
43 #include "emergency-action.h"
49 #include "format-util.h"
50 #include "getopt-defs.h"
51 #include "hexdecoct.h"
52 #include "hostname-setup.h"
53 #include "id128-util.h"
54 #include "ima-setup.h"
55 #include "import-creds.h"
56 #include "initrd-util.h"
58 #include "ipe-setup.h"
60 #include "kmod-setup.h"
61 #include "label-util.h"
62 #include "libmount-util.h"
63 #include "limits-util.h"
64 #include "load-fragment.h"
66 #include "loopback-setup.h"
67 #include "machine-id-setup.h"
70 #include "manager-dump.h"
71 #include "manager-serialize.h"
72 #include "mkdir-label.h"
73 #include "mount-setup.h"
74 #include "mount-util.h"
76 #include "osc-context.h"
78 #include "parse-argument.h"
79 #include "parse-util.h"
80 #include "path-util.h"
81 #include "pretty-print.h"
82 #include "proc-cmdline.h"
83 #include "process-util.h"
84 #include "random-util.h"
85 #include "rlimit-util.h"
87 #include "seccomp-util.h"
88 #include "selinux-setup.h"
89 #include "selinux-util.h"
90 #include "serialize.h"
92 #include "signal-util.h"
93 #include "smack-setup.h"
95 #include "stat-util.h"
96 #include "stdio-util.h"
98 #include "switch-root.h"
99 #include "sysctl-util.h"
100 #include "terminal-util.h"
101 #include "time-util.h"
102 #include "umask-util.h"
103 #include "unit-name.h"
104 #include "user-util.h"
107 #include "watchdog.h"
109 #if HAS_FEATURE_ADDRESS_SANITIZER
110 #include <sanitizer/lsan_interface.h>
118 ACTION_DUMP_CONFIGURATION_ITEMS
,
119 ACTION_DUMP_BUS_PROPERTIES
,
120 ACTION_BUS_INTROSPECT
,
121 } arg_action
= ACTION_RUN
;
123 static const char *arg_bus_introspect
= NULL
;
125 /* Those variables are initialized to 0 automatically, so we avoid uninitialized memory access. Real
126 * defaults are assigned in reset_arguments() below. */
127 static char *arg_default_unit
;
128 static RuntimeScope arg_runtime_scope
;
131 bool arg_crash_shell
;
132 CrashAction arg_crash_action
;
133 static char *arg_confirm_spawn
;
134 static ShowStatus arg_show_status
;
135 static StatusUnitFormat arg_status_unit_format
;
136 static bool arg_switched_root
;
137 static PagerFlags arg_pager_flags
;
138 static bool arg_service_watchdogs
;
139 static UnitDefaults arg_defaults
;
140 static usec_t arg_runtime_watchdog
;
141 static usec_t arg_reboot_watchdog
;
142 static usec_t arg_kexec_watchdog
;
143 static usec_t arg_pretimeout_watchdog
;
144 static char *arg_early_core_pattern
;
145 static char *arg_watchdog_pretimeout_governor
;
146 static char *arg_watchdog_device
;
147 static char **arg_default_environment
;
148 static char **arg_manager_environment
;
149 static uint64_t arg_capability_bounding_set
;
150 static bool arg_no_new_privs
;
151 static int arg_protect_system
;
152 static nsec_t arg_timer_slack_nsec
;
153 static Set
* arg_syscall_archs
;
154 static FILE* arg_serialization
;
155 static sd_id128_t arg_machine_id
;
156 static bool arg_machine_id_from_firmware
= false;
157 static EmergencyAction arg_cad_burst_action
;
158 static CPUSet arg_cpu_affinity
;
159 static NUMAPolicy arg_numa_policy
;
160 static usec_t arg_clock_usec
;
161 static void *arg_random_seed
;
162 static size_t arg_random_seed_size
;
163 static usec_t arg_reload_limit_interval_sec
;
164 static unsigned arg_reload_limit_burst
;
166 /* A copy of the original environment block */
167 static char **saved_env
= NULL
;
169 static int parse_configuration(const struct rlimit
*saved_rlimit_nofile
,
170 const struct rlimit
*saved_rlimit_memlock
);
172 static DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_crash_action
, crash_action
, CrashAction
, CRASH_FREEZE
);
174 static int manager_find_user_config_paths(char ***ret_files
, char ***ret_dirs
) {
175 _cleanup_free_
char *base
= NULL
;
176 _cleanup_strv_free_
char **files
= NULL
, **dirs
= NULL
;
179 r
= xdg_user_config_dir("/systemd", &base
);
183 r
= strv_extendf(&files
, "%s/user.conf", base
);
187 r
= strv_extend(&files
, PKGSYSCONFDIR
"/user.conf");
191 r
= strv_consume(&dirs
, TAKE_PTR(base
));
195 r
= strv_extend_strv(&dirs
, CONF_PATHS_STRV("systemd"), false);
199 *ret_files
= TAKE_PTR(files
);
200 *ret_dirs
= TAKE_PTR(dirs
);
204 static int save_console_winsize_in_environment(int tty_fd
) {
209 struct winsize ws
= {};
210 if (ioctl(tty_fd
, TIOCGWINSZ
, &ws
) < 0) {
211 log_debug_errno(errno
, "Failed to acquire console window size, ignoring.");
215 if (ws
.ws_col
<= 0 && ws
.ws_row
<= 0) {
216 log_debug("No console window size set, ignoring.");
220 r
= setenvf("COLUMNS", /* overwrite= */ true, "%u", ws
.ws_col
);
222 log_debug_errno(r
, "Failed to set $COLUMNS, ignoring: %m");
226 r
= setenvf("LINES", /* overwrite= */ true, "%u", ws
.ws_row
);
228 log_debug_errno(r
, "Failed to set $LINES, ignoring: %m");
232 log_debug("Recorded console dimensions in environment: $COLUMNS=%u $LINES=%u.", ws
.ws_col
, ws
.ws_row
);
236 (void) unsetenv("COLUMNS");
237 (void) unsetenv("LINES");
241 static int console_setup(void) {
243 if (getpid_cached() != 1)
246 _cleanup_close_
int tty_fd
= -EBADF
;
248 tty_fd
= open_terminal("/dev/console", O_RDWR
|O_NOCTTY
|O_CLOEXEC
);
250 return log_error_errno(tty_fd
, "Failed to open %s: %m", "/dev/console");
252 /* We don't want to force text mode. Plymouth may be showing pictures already from initrd. */
253 reset_dev_console_fd(tty_fd
, /* switch_to_text= */ false);
255 save_console_winsize_in_environment(tty_fd
);
260 static int parse_timeout(const char *value
, usec_t
*ret
) {
266 if (streq(value
, "default"))
267 *ret
= USEC_INFINITY
;
268 else if (streq(value
, "off"))
271 r
= parse_sec(value
, ret
);
276 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
281 if (STR_IN_SET(key
, "systemd.unit", "rd.systemd.unit")) {
283 if (proc_cmdline_value_missing(key
, value
))
286 if (!unit_name_is_valid(value
, UNIT_NAME_PLAIN
|UNIT_NAME_INSTANCE
))
287 log_warning("Unit name specified on %s= is not valid, ignoring: %s", key
, value
);
288 else if (in_initrd() == !!startswith(key
, "rd."))
289 return free_and_strdup_warn(&arg_default_unit
, value
);
291 } else if (proc_cmdline_key_streq(key
, "systemd.dump_core")) {
293 r
= value
? parse_boolean(value
) : true;
295 log_warning_errno(r
, "Failed to parse dump core switch %s, ignoring: %m", value
);
299 } else if (proc_cmdline_key_streq(key
, "systemd.early_core_pattern")) {
301 if (proc_cmdline_value_missing(key
, value
))
304 if (path_is_absolute(value
))
305 (void) parse_path_argument(value
, false, &arg_early_core_pattern
);
307 log_warning("Specified core pattern '%s' is not an absolute path, ignoring.", value
);
309 } else if (proc_cmdline_key_streq(key
, "systemd.crash_chvt")) {
312 arg_crash_chvt
= 0; /* turn on */
314 r
= parse_crash_chvt(value
, &arg_crash_chvt
);
316 log_warning_errno(r
, "Failed to parse crash chvt switch %s, ignoring: %m", value
);
319 } else if (proc_cmdline_key_streq(key
, "systemd.crash_shell")) {
321 r
= value
? parse_boolean(value
) : true;
323 log_warning_errno(r
, "Failed to parse crash shell switch %s, ignoring: %m", value
);
327 } else if (proc_cmdline_key_streq(key
, "systemd.crash_reboot")) {
329 r
= value
? parse_boolean(value
) : true;
331 log_warning_errno(r
, "Failed to parse crash reboot switch %s, ignoring: %m", value
);
333 arg_crash_action
= r
? CRASH_REBOOT
: CRASH_FREEZE
;
335 } else if (proc_cmdline_key_streq(key
, "systemd.crash_action")) {
337 if (proc_cmdline_value_missing(key
, value
))
340 r
= crash_action_from_string(value
);
342 log_warning_errno(r
, "Failed to parse crash action switch %s, ignoring: %m", value
);
344 arg_crash_action
= r
;
346 } else if (proc_cmdline_key_streq(key
, "systemd.confirm_spawn")) {
349 r
= parse_confirm_spawn(value
, &s
);
351 log_warning_errno(r
, "Failed to parse confirm_spawn switch %s, ignoring: %m", value
);
353 free_and_replace(arg_confirm_spawn
, s
);
355 } else if (proc_cmdline_key_streq(key
, "systemd.service_watchdogs")) {
357 r
= value
? parse_boolean(value
) : true;
359 log_warning_errno(r
, "Failed to parse service watchdog switch %s, ignoring: %m", value
);
361 arg_service_watchdogs
= r
;
363 } else if (proc_cmdline_key_streq(key
, "systemd.show_status")) {
366 r
= parse_show_status(value
, &arg_show_status
);
368 log_warning_errno(r
, "Failed to parse show status switch %s, ignoring: %m", value
);
370 arg_show_status
= SHOW_STATUS_YES
;
372 } else if (proc_cmdline_key_streq(key
, "systemd.status_unit_format")) {
374 if (proc_cmdline_value_missing(key
, value
))
377 r
= status_unit_format_from_string(value
);
379 log_warning_errno(r
, "Failed to parse %s=%s, ignoring: %m", key
, value
);
381 arg_status_unit_format
= r
;
383 } else if (proc_cmdline_key_streq(key
, "systemd.default_standard_output")) {
385 if (proc_cmdline_value_missing(key
, value
))
388 r
= exec_output_from_string(value
);
390 log_warning_errno(r
, "Failed to parse default standard output switch %s, ignoring: %m", value
);
392 arg_defaults
.std_output
= r
;
394 } else if (proc_cmdline_key_streq(key
, "systemd.default_standard_error")) {
396 if (proc_cmdline_value_missing(key
, value
))
399 r
= exec_output_from_string(value
);
401 log_warning_errno(r
, "Failed to parse default standard error switch %s, ignoring: %m", value
);
403 arg_defaults
.std_error
= r
;
405 } else if (streq(key
, "systemd.setenv")) {
407 if (proc_cmdline_value_missing(key
, value
))
410 if (!env_assignment_is_valid(value
))
411 log_warning("Environment variable assignment '%s' is not valid. Ignoring.", value
);
413 r
= strv_env_replace_strdup(&arg_default_environment
, value
);
418 } else if (proc_cmdline_key_streq(key
, "systemd.machine_id")) {
420 if (proc_cmdline_value_missing(key
, value
))
423 if (streq(value
, "firmware"))
424 arg_machine_id_from_firmware
= true;
426 r
= id128_from_string_nonzero(value
, &arg_machine_id
);
428 log_warning_errno(r
, "MachineID '%s' is not valid, ignoring: %m", value
);
430 arg_machine_id_from_firmware
= false;
432 } else if (proc_cmdline_key_streq(key
, "systemd.default_timeout_start_sec")) {
434 if (proc_cmdline_value_missing(key
, value
))
437 r
= parse_sec(value
, &arg_defaults
.timeout_start_usec
);
439 log_warning_errno(r
, "Failed to parse default start timeout '%s', ignoring: %m", value
);
441 if (arg_defaults
.timeout_start_usec
<= 0)
442 arg_defaults
.timeout_start_usec
= USEC_INFINITY
;
444 } else if (proc_cmdline_key_streq(key
, "systemd.default_device_timeout_sec")) {
446 if (proc_cmdline_value_missing(key
, value
))
449 r
= parse_sec(value
, &arg_defaults
.device_timeout_usec
);
451 log_warning_errno(r
, "Failed to parse default device timeout '%s', ignoring: %m", value
);
453 if (arg_defaults
.device_timeout_usec
<= 0)
454 arg_defaults
.device_timeout_usec
= USEC_INFINITY
;
456 } else if (proc_cmdline_key_streq(key
, "systemd.cpu_affinity")) {
458 if (proc_cmdline_value_missing(key
, value
))
461 r
= parse_cpu_set(value
, &arg_cpu_affinity
);
463 log_warning_errno(r
, "Failed to parse CPU affinity mask '%s', ignoring: %m", value
);
465 } else if (proc_cmdline_key_streq(key
, "systemd.watchdog_device")) {
467 if (proc_cmdline_value_missing(key
, value
))
470 (void) parse_path_argument(value
, false, &arg_watchdog_device
);
472 } else if (proc_cmdline_key_streq(key
, "systemd.watchdog_sec")) {
474 if (proc_cmdline_value_missing(key
, value
))
477 r
= parse_timeout(value
, &arg_runtime_watchdog
);
479 log_warning_errno(r
, "Failed to parse systemd.watchdog_sec= argument '%s', ignoring: %m", value
);
483 arg_kexec_watchdog
= arg_reboot_watchdog
= arg_runtime_watchdog
;
485 } else if (proc_cmdline_key_streq(key
, "systemd.watchdog_pre_sec")) {
487 if (proc_cmdline_value_missing(key
, value
))
490 r
= parse_timeout(value
, &arg_pretimeout_watchdog
);
492 log_warning_errno(r
, "Failed to parse systemd.watchdog_pre_sec= argument '%s', ignoring: %m", value
);
496 } else if (proc_cmdline_key_streq(key
, "systemd.watchdog_pretimeout_governor")) {
498 if (proc_cmdline_value_missing(key
, value
) || isempty(value
)) {
499 arg_watchdog_pretimeout_governor
= mfree(arg_watchdog_pretimeout_governor
);
503 if (!string_is_safe(value
)) {
504 log_warning("Watchdog pretimeout governor '%s' is not valid, ignoring.", value
);
508 return free_and_strdup_warn(&arg_watchdog_pretimeout_governor
, value
);
510 } else if (proc_cmdline_key_streq(key
, "systemd.clock_usec")) {
512 if (proc_cmdline_value_missing(key
, value
))
515 r
= safe_atou64(value
, &arg_clock_usec
);
517 log_warning_errno(r
, "Failed to parse systemd.clock_usec= argument, ignoring: %s", value
);
519 } else if (proc_cmdline_key_streq(key
, "systemd.random_seed")) {
523 if (proc_cmdline_value_missing(key
, value
))
526 r
= unbase64mem(value
, &p
, &sz
);
528 log_warning_errno(r
, "Failed to parse systemd.random_seed= argument, ignoring: %s", value
);
530 free(arg_random_seed
);
531 arg_random_seed
= sz
> 0 ? p
: mfree(p
);
532 arg_random_seed_size
= sz
;
534 } else if (proc_cmdline_key_streq(key
, "systemd.reload_limit_interval_sec")) {
536 if (proc_cmdline_value_missing(key
, value
))
539 r
= parse_sec(value
, &arg_reload_limit_interval_sec
);
541 log_warning_errno(r
, "Failed to parse systemd.reload_limit_interval_sec= argument '%s', ignoring: %m", value
);
545 } else if (proc_cmdline_key_streq(key
, "systemd.reload_limit_burst")) {
547 if (proc_cmdline_value_missing(key
, value
))
550 r
= safe_atou(value
, &arg_reload_limit_burst
);
552 log_warning_errno(r
, "Failed to parse systemd.reload_limit_burst= argument '%s', ignoring: %m", value
);
556 } else if (streq(key
, "quiet") && !value
) {
558 if (arg_show_status
== _SHOW_STATUS_INVALID
)
559 arg_show_status
= SHOW_STATUS_ERROR
;
561 } else if (streq(key
, "debug") && !value
) {
563 /* Note that log_parse_environment() handles 'debug'
564 * too, and sets the log level to LOG_DEBUG. */
566 if (detect_container() > 0)
567 log_set_target(LOG_TARGET_CONSOLE
);
572 /* Compatible with SysV, but supported independently even if SysV compatibility is disabled. */
573 target
= runlevel_to_target(key
);
575 return free_and_strdup_warn(&arg_default_unit
, target
);
581 #define DEFINE_SETTER(name, func, descr) \
582 static int name(const char *unit, \
583 const char *filename, \
585 const char *section, \
586 unsigned section_line, \
587 const char *lvalue, \
589 const char *rvalue, \
601 log_syntax(unit, LOG_ERR, filename, line, r, \
602 "Invalid " descr "'%s': %m", \
608 DEFINE_SETTER(config_parse_level2
, log_set_max_level_from_string
, "log level");
609 DEFINE_SETTER(config_parse_target
, log_set_target_from_string
, "target");
610 DEFINE_SETTER(config_parse_color
, log_show_color_from_string
, "color");
611 DEFINE_SETTER(config_parse_location
, log_show_location_from_string
, "location");
612 DEFINE_SETTER(config_parse_time
, log_show_time_from_string
, "time");
614 static int config_parse_default_timeout_abort(
616 const char *filename
,
619 unsigned section_line
,
627 r
= config_parse_timeout_abort(
636 &arg_defaults
.timeout_abort_usec
,
639 arg_defaults
.timeout_abort_set
= r
;
643 static int config_parse_oom_score_adjust(
645 const char *filename
,
648 unsigned section_line
,
657 if (isempty(rvalue
)) {
658 arg_defaults
.oom_score_adjust_set
= false;
662 r
= parse_oom_score_adjust(rvalue
, &oa
);
664 return log_syntax_parse_error(unit
, filename
, line
, r
, lvalue
, rvalue
);
666 arg_defaults
.oom_score_adjust
= oa
;
667 arg_defaults
.oom_score_adjust_set
= true;
672 static int config_parse_protect_system_pid1(
674 const char *filename
,
677 unsigned section_line
,
684 int *v
= ASSERT_PTR(data
), r
;
686 /* This is modelled after the per-service ProtectSystem= setting, but a bit more restricted on one
687 * hand, and more automatic in another. i.e. we currently only support yes/no (not "strict" or
688 * "full"). And we will enable this automatically for the initrd unless configured otherwise.
690 * We might extend this later to match more closely what the per-service ProtectSystem= can do, but
691 * this is not trivial, due to ordering constraints: besides /usr/ we don't really have much mounted
692 * at the moment we enable this logic. */
694 if (isempty(rvalue
) || streq(rvalue
, "auto")) {
699 r
= parse_boolean(rvalue
);
701 return log_syntax_parse_error(unit
, filename
, line
, r
, lvalue
, rvalue
);
707 static int config_parse_crash_reboot(
709 const char *filename
,
712 unsigned section_line
,
719 CrashAction
*v
= ASSERT_PTR(data
);
722 if (isempty(rvalue
)) {
727 r
= parse_boolean(rvalue
);
729 return log_syntax_parse_error(unit
, filename
, line
, r
, lvalue
, rvalue
);
731 *v
= r
> 0 ? CRASH_REBOOT
: CRASH_FREEZE
;
735 static int parse_config_file(void) {
736 const ConfigTableItem items
[] = {
737 { "Manager", "LogLevel", config_parse_level2
, 0, NULL
},
738 { "Manager", "LogTarget", config_parse_target
, 0, NULL
},
739 { "Manager", "LogColor", config_parse_color
, 0, NULL
},
740 { "Manager", "LogLocation", config_parse_location
, 0, NULL
},
741 { "Manager", "LogTime", config_parse_time
, 0, NULL
},
742 { "Manager", "DumpCore", config_parse_bool
, 0, &arg_dump_core
},
743 { "Manager", "CrashChVT", /* legacy */ config_parse_crash_chvt
, 0, &arg_crash_chvt
},
744 { "Manager", "CrashChangeVT", config_parse_crash_chvt
, 0, &arg_crash_chvt
},
745 { "Manager", "CrashShell", config_parse_bool
, 0, &arg_crash_shell
},
746 { "Manager", "CrashReboot", config_parse_crash_reboot
, 0, &arg_crash_action
},
747 { "Manager", "CrashAction", config_parse_crash_action
, 0, &arg_crash_action
},
748 { "Manager", "ShowStatus", config_parse_show_status
, 0, &arg_show_status
},
749 { "Manager", "StatusUnitFormat", config_parse_status_unit_format
, 0, &arg_status_unit_format
},
750 { "Manager", "CPUAffinity", config_parse_cpu_set
, 0, &arg_cpu_affinity
},
751 { "Manager", "NUMAPolicy", config_parse_numa_policy
, 0, &arg_numa_policy
.type
},
752 { "Manager", "NUMAMask", config_parse_numa_mask
, 0, &arg_numa_policy
.nodes
},
753 { "Manager", "JoinControllers", config_parse_warn_compat
, DISABLED_LEGACY
, NULL
},
754 { "Manager", "RuntimeWatchdogSec", config_parse_watchdog_sec
, 0, &arg_runtime_watchdog
},
755 { "Manager", "RuntimeWatchdogPreSec", config_parse_watchdog_sec
, 0, &arg_pretimeout_watchdog
},
756 { "Manager", "RebootWatchdogSec", config_parse_watchdog_sec
, 0, &arg_reboot_watchdog
},
757 { "Manager", "ShutdownWatchdogSec", config_parse_watchdog_sec
, 0, &arg_reboot_watchdog
}, /* obsolete alias */
758 { "Manager", "KExecWatchdogSec", config_parse_watchdog_sec
, 0, &arg_kexec_watchdog
},
759 { "Manager", "WatchdogDevice", config_parse_path
, 0, &arg_watchdog_device
},
760 { "Manager", "RuntimeWatchdogPreGovernor", config_parse_string
, CONFIG_PARSE_STRING_SAFE
, &arg_watchdog_pretimeout_governor
},
761 { "Manager", "CapabilityBoundingSet", config_parse_capability_set
, 0, &arg_capability_bounding_set
},
762 { "Manager", "NoNewPrivileges", config_parse_bool
, 0, &arg_no_new_privs
},
763 { "Manager", "ProtectSystem", config_parse_protect_system_pid1
, 0, &arg_protect_system
},
765 { "Manager", "SystemCallArchitectures", config_parse_syscall_archs
, 0, &arg_syscall_archs
},
767 { "Manager", "SystemCallArchitectures", config_parse_warn_compat
, DISABLED_CONFIGURATION
, NULL
},
770 { "Manager", "TimerSlackNSec", config_parse_nsec
, 0, &arg_timer_slack_nsec
},
771 { "Manager", "DefaultTimerAccuracySec", config_parse_sec
, 0, &arg_defaults
.timer_accuracy_usec
},
772 { "Manager", "DefaultStandardOutput", config_parse_output_restricted
, 0, &arg_defaults
.std_output
},
773 { "Manager", "DefaultStandardError", config_parse_output_restricted
, 0, &arg_defaults
.std_error
},
774 { "Manager", "DefaultTimeoutStartSec", config_parse_sec
, 0, &arg_defaults
.timeout_start_usec
},
775 { "Manager", "DefaultTimeoutStopSec", config_parse_sec
, 0, &arg_defaults
.timeout_stop_usec
},
776 { "Manager", "DefaultTimeoutAbortSec", config_parse_default_timeout_abort
, 0, NULL
},
777 { "Manager", "DefaultDeviceTimeoutSec", config_parse_sec
, 0, &arg_defaults
.device_timeout_usec
},
778 { "Manager", "DefaultRestartSec", config_parse_sec
, 0, &arg_defaults
.restart_usec
},
779 { "Manager", "DefaultStartLimitInterval", config_parse_sec
, 0, &arg_defaults
.start_limit
.interval
}, /* obsolete alias */
780 { "Manager", "DefaultStartLimitIntervalSec", config_parse_sec
, 0, &arg_defaults
.start_limit
.interval
},
781 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned
, 0, &arg_defaults
.start_limit
.burst
},
782 { "Manager", "DefaultRestrictSUIDSGID", config_parse_bool
, 0, &arg_defaults
.restrict_suid_sgid
},
783 { "Manager", "DefaultEnvironment", config_parse_environ
, arg_runtime_scope
, &arg_default_environment
},
784 { "Manager", "ManagerEnvironment", config_parse_environ
, arg_runtime_scope
, &arg_manager_environment
},
785 { "Manager", "DefaultLimitCPU", config_parse_rlimit
, RLIMIT_CPU
, arg_defaults
.rlimit
},
786 { "Manager", "DefaultLimitFSIZE", config_parse_rlimit
, RLIMIT_FSIZE
, arg_defaults
.rlimit
},
787 { "Manager", "DefaultLimitDATA", config_parse_rlimit
, RLIMIT_DATA
, arg_defaults
.rlimit
},
788 { "Manager", "DefaultLimitSTACK", config_parse_rlimit
, RLIMIT_STACK
, arg_defaults
.rlimit
},
789 { "Manager", "DefaultLimitCORE", config_parse_rlimit
, RLIMIT_CORE
, arg_defaults
.rlimit
},
790 { "Manager", "DefaultLimitRSS", config_parse_rlimit
, RLIMIT_RSS
, arg_defaults
.rlimit
},
791 { "Manager", "DefaultLimitNOFILE", config_parse_rlimit
, RLIMIT_NOFILE
, arg_defaults
.rlimit
},
792 { "Manager", "DefaultLimitAS", config_parse_rlimit
, RLIMIT_AS
, arg_defaults
.rlimit
},
793 { "Manager", "DefaultLimitNPROC", config_parse_rlimit
, RLIMIT_NPROC
, arg_defaults
.rlimit
},
794 { "Manager", "DefaultLimitMEMLOCK", config_parse_rlimit
, RLIMIT_MEMLOCK
, arg_defaults
.rlimit
},
795 { "Manager", "DefaultLimitLOCKS", config_parse_rlimit
, RLIMIT_LOCKS
, arg_defaults
.rlimit
},
796 { "Manager", "DefaultLimitSIGPENDING", config_parse_rlimit
, RLIMIT_SIGPENDING
, arg_defaults
.rlimit
},
797 { "Manager", "DefaultLimitMSGQUEUE", config_parse_rlimit
, RLIMIT_MSGQUEUE
, arg_defaults
.rlimit
},
798 { "Manager", "DefaultLimitNICE", config_parse_rlimit
, RLIMIT_NICE
, arg_defaults
.rlimit
},
799 { "Manager", "DefaultLimitRTPRIO", config_parse_rlimit
, RLIMIT_RTPRIO
, arg_defaults
.rlimit
},
800 { "Manager", "DefaultLimitRTTIME", config_parse_rlimit
, RLIMIT_RTTIME
, arg_defaults
.rlimit
},
801 { "Manager", "DefaultCPUAccounting", config_parse_warn_compat
, DISABLED_LEGACY
, NULL
},
802 { "Manager", "DefaultIOAccounting", config_parse_bool
, 0, &arg_defaults
.io_accounting
},
803 { "Manager", "DefaultIPAccounting", config_parse_bool
, 0, &arg_defaults
.ip_accounting
},
804 { "Manager", "DefaultBlockIOAccounting", config_parse_warn_compat
, DISABLED_LEGACY
, NULL
},
805 { "Manager", "DefaultMemoryAccounting", config_parse_bool
, 0, &arg_defaults
.memory_accounting
},
806 { "Manager", "DefaultTasksAccounting", config_parse_bool
, 0, &arg_defaults
.tasks_accounting
},
807 { "Manager", "DefaultTasksMax", config_parse_tasks_max
, 0, &arg_defaults
.tasks_max
},
808 { "Manager", "DefaultMemoryPressureThresholdSec", config_parse_sec
, 0, &arg_defaults
.memory_pressure_threshold_usec
},
809 { "Manager", "DefaultMemoryPressureWatch", config_parse_memory_pressure_watch
, 0, &arg_defaults
.memory_pressure_watch
},
810 { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action
, arg_runtime_scope
, &arg_cad_burst_action
},
811 { "Manager", "DefaultOOMPolicy", config_parse_oom_policy
, 0, &arg_defaults
.oom_policy
},
812 { "Manager", "DefaultOOMScoreAdjust", config_parse_oom_score_adjust
, 0, NULL
},
813 { "Manager", "ReloadLimitIntervalSec", config_parse_sec
, 0, &arg_reload_limit_interval_sec
},
814 { "Manager", "ReloadLimitBurst", config_parse_unsigned
, 0, &arg_reload_limit_burst
},
816 { "Manager", "DefaultSmackProcessLabel", config_parse_string
, 0, &arg_defaults
.smack_process_label
},
818 { "Manager", "DefaultSmackProcessLabel", config_parse_warn_compat
, DISABLED_CONFIGURATION
, NULL
},
823 if (arg_runtime_scope
== RUNTIME_SCOPE_SYSTEM
)
824 (void) config_parse_standard_file_with_dropins(
825 "systemd/system.conf",
827 config_item_table_lookup
, items
,
829 /* userdata= */ NULL
);
831 _cleanup_strv_free_
char **files
= NULL
, **dirs
= NULL
;
834 assert(arg_runtime_scope
== RUNTIME_SCOPE_USER
);
836 r
= manager_find_user_config_paths(&files
, &dirs
);
838 return log_error_errno(r
, "Failed to determine config file paths: %m");
840 (void) config_parse_many(
841 (const char* const*) files
,
842 (const char* const*) dirs
,
846 config_item_table_lookup
, items
,
851 /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we use
852 * USEC_INFINITY like everywhere else. */
853 if (arg_defaults
.timeout_start_usec
<= 0)
854 arg_defaults
.timeout_start_usec
= USEC_INFINITY
;
855 if (arg_defaults
.timeout_stop_usec
<= 0)
856 arg_defaults
.timeout_stop_usec
= USEC_INFINITY
;
861 static void set_manager_defaults(Manager
*m
) {
866 /* Propagates the various default unit property settings into the manager object, i.e. properties
867 * that do not affect the manager itself, but are just what newly allocated units will have set if
868 * they haven't set anything else. (Also see set_manager_settings() for the settings that affect the
869 * manager's own behaviour) */
871 r
= manager_set_unit_defaults(m
, &arg_defaults
);
873 log_warning_errno(r
, "Failed to set manager defaults, ignoring: %m");
875 r
= manager_default_environment(m
);
877 log_warning_errno(r
, "Failed to set manager default environment, ignoring: %m");
879 r
= manager_transient_environment_add(m
, arg_default_environment
);
881 log_warning_errno(r
, "Failed to add to transient environment, ignoring: %m");
884 static void set_manager_settings(Manager
*m
) {
889 /* Propagates the various manager settings into the manager object, i.e. properties that
890 * affect the manager itself (as opposed to just being inherited into newly allocated
891 * units, see set_manager_defaults() above). */
893 m
->confirm_spawn
= arg_confirm_spawn
;
894 m
->service_watchdogs
= arg_service_watchdogs
;
895 m
->cad_burst_action
= arg_cad_burst_action
;
896 /* Note that we don't do structured initialization here, otherwise it will reset the rate limit
897 * counter on every daemon-reload. */
898 m
->reload_reexec_ratelimit
.interval
= arg_reload_limit_interval_sec
;
899 m
->reload_reexec_ratelimit
.burst
= arg_reload_limit_burst
;
901 manager_set_watchdog(m
, WATCHDOG_RUNTIME
, arg_runtime_watchdog
);
902 manager_set_watchdog(m
, WATCHDOG_REBOOT
, arg_reboot_watchdog
);
903 manager_set_watchdog(m
, WATCHDOG_KEXEC
, arg_kexec_watchdog
);
904 manager_set_watchdog(m
, WATCHDOG_PRETIMEOUT
, arg_pretimeout_watchdog
);
905 r
= manager_set_watchdog_pretimeout_governor(m
, arg_watchdog_pretimeout_governor
);
907 log_warning_errno(r
, "Failed to set watchdog pretimeout governor to '%s', ignoring: %m", arg_watchdog_pretimeout_governor
);
909 manager_set_show_status(m
, arg_show_status
, "command line");
910 m
->status_unit_format
= arg_status_unit_format
;
913 static int parse_argv(int argc
, char *argv
[]) {
919 static const struct option options
[] = {
920 COMMON_GETOPT_OPTIONS
,
921 SYSTEMD_GETOPT_OPTIONS
,
926 bool user_arg_seen
= false;
931 if (getpid_cached() == 1)
934 while ((c
= getopt_long(argc
, argv
, SYSTEMD_GETOPT_SHORT_OPTIONS
, options
, NULL
)) >= 0)
939 r
= log_set_max_level_from_string(optarg
);
941 return log_error_errno(r
, "Failed to parse log level \"%s\": %m", optarg
);
946 r
= log_set_target_from_string(optarg
);
948 return log_error_errno(r
, "Failed to parse log target \"%s\": %m", optarg
);
955 r
= log_show_color_from_string(optarg
);
957 return log_error_errno(r
, "Failed to parse log color setting \"%s\": %m",
960 log_show_color(true);
964 case ARG_LOG_LOCATION
:
966 r
= log_show_location_from_string(optarg
);
968 return log_error_errno(r
, "Failed to parse log location setting \"%s\": %m",
971 log_show_location(true);
978 r
= log_show_time_from_string(optarg
);
980 return log_error_errno(r
, "Failed to parse log time setting \"%s\": %m",
987 case ARG_DEFAULT_STD_OUTPUT
:
988 r
= exec_output_from_string(optarg
);
990 return log_error_errno(r
, "Failed to parse default standard output setting \"%s\": %m",
992 arg_defaults
.std_output
= r
;
995 case ARG_DEFAULT_STD_ERROR
:
996 r
= exec_output_from_string(optarg
);
998 return log_error_errno(r
, "Failed to parse default standard error output setting \"%s\": %m",
1000 arg_defaults
.std_error
= r
;
1004 r
= free_and_strdup(&arg_default_unit
, optarg
);
1006 return log_error_errno(r
, "Failed to set default unit \"%s\": %m", optarg
);
1011 arg_runtime_scope
= RUNTIME_SCOPE_SYSTEM
;
1015 arg_runtime_scope
= RUNTIME_SCOPE_USER
;
1016 user_arg_seen
= true;
1020 arg_action
= ACTION_TEST
;
1024 arg_pager_flags
|= PAGER_DISABLE
;
1028 arg_action
= ACTION_VERSION
;
1031 case ARG_DUMP_CONFIGURATION_ITEMS
:
1032 arg_action
= ACTION_DUMP_CONFIGURATION_ITEMS
;
1035 case ARG_DUMP_BUS_PROPERTIES
:
1036 arg_action
= ACTION_DUMP_BUS_PROPERTIES
;
1039 case ARG_BUS_INTROSPECT
:
1040 arg_bus_introspect
= optarg
;
1041 arg_action
= ACTION_BUS_INTROSPECT
;
1045 r
= parse_boolean_argument("--dump-core", optarg
, &arg_dump_core
);
1050 case ARG_CRASH_CHVT
:
1051 r
= parse_crash_chvt(optarg
, &arg_crash_chvt
);
1053 return log_error_errno(r
, "Failed to parse crash virtual terminal index: \"%s\": %m",
1057 case ARG_CRASH_SHELL
:
1058 r
= parse_boolean_argument("--crash-shell", optarg
, &arg_crash_shell
);
1063 case ARG_CRASH_REBOOT
:
1064 r
= parse_boolean_argument("--crash-reboot", optarg
, NULL
);
1067 arg_crash_action
= r
> 0 ? CRASH_REBOOT
: CRASH_FREEZE
;
1070 case ARG_CRASH_ACTION
:
1071 r
= crash_action_from_string(optarg
);
1073 return log_error_errno(r
, "Failed to parse crash action \"%s\": %m", optarg
);
1074 arg_crash_action
= r
;
1077 case ARG_CONFIRM_SPAWN
:
1078 arg_confirm_spawn
= mfree(arg_confirm_spawn
);
1080 r
= parse_confirm_spawn(optarg
, &arg_confirm_spawn
);
1082 return log_error_errno(r
, "Failed to parse confirm spawn option: \"%s\": %m",
1086 case ARG_SERVICE_WATCHDOGS
:
1087 r
= parse_boolean_argument("--service-watchdogs=", optarg
, &arg_service_watchdogs
);
1092 case ARG_SHOW_STATUS
:
1094 r
= parse_show_status(optarg
, &arg_show_status
);
1096 return log_error_errno(r
, "Failed to parse show status boolean: \"%s\": %m",
1099 arg_show_status
= SHOW_STATUS_YES
;
1102 case ARG_DESERIALIZE
: {
1106 fd
= parse_fd(optarg
);
1108 return log_error_errno(fd
, "Failed to parse serialization fd \"%s\": %m", optarg
);
1110 (void) fd_cloexec(fd
, true);
1112 f
= fdopen(fd
, "r");
1114 return log_error_errno(errno
, "Failed to open serialization fd %d: %m", fd
);
1116 safe_fclose(arg_serialization
);
1117 arg_serialization
= f
;
1122 case ARG_SWITCHED_ROOT
:
1123 arg_switched_root
= true;
1126 case ARG_MACHINE_ID
:
1127 r
= id128_from_string_nonzero(optarg
, &arg_machine_id
);
1129 return log_error_errno(r
, "MachineID '%s' is not valid: %m", optarg
);
1133 arg_action
= ACTION_HELP
;
1137 log_set_max_level(LOG_DEBUG
);
1143 /* Just to eat away the sysvinit kernel cmdline args that we'll parse in
1144 * parse_proc_cmdline_item() or ignore, without any getopt() error messages.
1147 if (getpid_cached() != 1)
1153 assert_not_reached();
1156 if (optind
< argc
&& getpid_cached() != 1)
1157 /* Hmm, when we aren't run as init system let's complain about excess arguments */
1158 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "Excess arguments.");
1160 if (arg_action
== ACTION_RUN
&& arg_runtime_scope
== RUNTIME_SCOPE_USER
&& !user_arg_seen
)
1161 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
1162 "Explicit --user argument required to run as user manager.");
1167 static int help(void) {
1168 _cleanup_free_
char *link
= NULL
;
1171 r
= terminal_urlify_man("systemd", "1", &link
);
1175 printf("%s [OPTIONS...]\n\n"
1176 "%sStarts and monitors system and user services.%s\n\n"
1177 "This program takes no positional arguments.\n\n"
1179 " -h --help Show this help\n"
1180 " --version Show version\n"
1181 " --test Determine initial transaction, dump it and exit\n"
1182 " --system Combined with --test: operate in system mode\n"
1183 " --user Combined with --test: operate in user mode\n"
1184 " --dump-configuration-items Dump understood unit configuration items\n"
1185 " --dump-bus-properties Dump exposed bus properties\n"
1186 " --bus-introspect=PATH Write XML introspection data\n"
1187 " --unit=UNIT Set default unit\n"
1188 " --dump-core[=BOOL] Dump core on crash\n"
1189 " --crash-vt=NR Change to specified VT on crash\n"
1190 " --crash-action=ACTION Specify what to do on crash\n"
1191 " --crash-shell[=BOOL] Run shell on crash\n"
1192 " --confirm-spawn[=BOOL] Ask for confirmation when spawning processes\n"
1193 " --show-status[=BOOL] Show status updates on the console during boot\n"
1194 " --log-target=TARGET Set log target (console, journal, kmsg,\n"
1195 " journal-or-kmsg, null)\n"
1196 " --log-level=LEVEL Set log level (debug, info, notice, warning,\n"
1197 " err, crit, alert, emerg)\n"
1198 " --log-color[=BOOL] Highlight important log messages\n"
1199 " --log-location[=BOOL] Include code location in log messages\n"
1200 " --log-time[=BOOL] Prefix log messages with current time\n"
1201 " --default-standard-output= Set default standard output for services\n"
1202 " --default-standard-error= Set default standard error output for services\n"
1203 " --no-pager Do not pipe output into a pager\n"
1204 "\nSee the %s for details.\n",
1205 program_invocation_short_name
,
1215 static int prepare_reexecute(
1219 bool switching_root
) {
1221 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1222 _cleanup_fclose_
FILE *f
= NULL
;
1229 /* Make sure nothing is really destructed when we shut down */
1231 bus_manager_send_reloading(m
, true);
1233 r
= manager_open_serialization(m
, &f
);
1235 return log_error_errno(r
, "Failed to create serialization file: %m");
1241 r
= manager_serialize(m
, f
, fds
, switching_root
);
1245 r
= finish_serialization_file(f
);
1247 return log_error_errno(r
, "Failed to finish serialization file: %m");
1249 r
= fd_cloexec(fileno(f
), false);
1251 return log_error_errno(r
, "Failed to disable O_CLOEXEC for serialization: %m");
1253 r
= fdset_cloexec(fds
, false);
1255 return log_error_errno(r
, "Failed to disable O_CLOEXEC for serialization fds: %m");
1257 *ret_f
= TAKE_PTR(f
);
1258 *ret_fds
= TAKE_PTR(fds
);
1263 static void bump_file_max_and_nr_open(void) {
1265 /* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large
1266 * numbers of file descriptors are no longer a performance problem and their memory is properly
1267 * tracked by memcg, thus counting them and limiting them in another two layers of limits is
1268 * unnecessary and just complicates things. This function hence turns off 2 of the 4 levels of limits
1269 * on file descriptors, and makes RLIMIT_NOLIMIT (soft + hard) the only ones that really matter. */
1271 #if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN
1275 #if BUMP_PROC_SYS_FS_FILE_MAX
1276 /* The maximum the kernel allows for this since 5.2 is LONG_MAX, use that. (Previously things were
1277 * different, but the operation would fail silently.) */
1278 r
= sysctl_write("fs/file-max", LONG_MAX_STR
);
1280 log_full_errno(ERRNO_IS_NEG_FS_WRITE_REFUSED(r
) ? LOG_DEBUG
: LOG_WARNING
, r
,
1281 "Failed to bump fs.file-max, ignoring: %m");
1284 #if BUMP_PROC_SYS_FS_NR_OPEN
1285 /* The kernel enforces maximum and minimum values on the fs.nr_open, but they are not directly
1286 * exposed, but hardcoded in fs/file.c. Hopefully, these values will not be changed, but not sure.
1287 * Let's first try the hardcoded maximum value, and if it does not work, try the half of it. */
1289 for (unsigned v
= NR_OPEN_MAXIMUM
; v
>= NR_OPEN_MINIMUM
; v
/= 2) {
1290 unsigned k
= read_nr_open();
1291 if (k
>= v
) { /* Already larger */
1292 log_debug("Skipping bump, value is already larger.");
1296 r
= sysctl_writef("fs/nr_open", "%u", v
);
1298 log_debug("Couldn't write fs.nr_open as %u, halving it.", v
);
1302 log_full_errno(ERRNO_IS_NEG_FS_WRITE_REFUSED(r
) ? LOG_DEBUG
: LOG_WARNING
, r
,
1303 "Failed to bump fs.nr_open, ignoring: %m");
1307 log_debug("Successfully bumped fs.nr_open to %u", v
);
1313 static int bump_rlimit_nofile(const struct rlimit
*saved_rlimit
) {
1314 struct rlimit new_rlimit
;
1317 /* Get the underlying absolute limit the kernel enforces */
1318 unsigned nr
= read_nr_open();
1320 /* Calculate the new limits to use for us. Never lower from what we inherited. */
1321 new_rlimit
= (struct rlimit
) {
1322 .rlim_cur
= MAX((rlim_t
) nr
, saved_rlimit
->rlim_cur
),
1323 .rlim_max
= MAX((rlim_t
) nr
, saved_rlimit
->rlim_max
),
1326 /* Shortcut if nothing changes. */
1327 if (saved_rlimit
->rlim_max
>= new_rlimit
.rlim_max
&&
1328 saved_rlimit
->rlim_cur
>= new_rlimit
.rlim_cur
) {
1329 log_debug("RLIMIT_NOFILE is already as high or higher than we need it, not bumping.");
1333 /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows, for
1334 * both hard and soft. */
1335 r
= setrlimit_closest(RLIMIT_NOFILE
, &new_rlimit
);
1337 return log_warning_errno(r
, "Setting RLIMIT_NOFILE failed, ignoring: %m");
1342 static int bump_rlimit_memlock(const struct rlimit
*saved_rlimit
) {
1343 struct rlimit new_rlimit
;
1347 /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even if we have CAP_IPC_LOCK
1348 * which should normally disable such checks. We need them to implement IPAddressAllow= and
1349 * IPAddressDeny=, hence let's bump the value high enough for our user. */
1351 /* Using MAX() on resource limits only is safe if RLIM_INFINITY is > 0. POSIX declares that rlim_t
1352 * must be unsigned, hence this is a given, but let's make this clear here. */
1353 assert_cc(RLIM_INFINITY
> 0);
1355 mm
= physical_memory_scale(1, 8); /* Let's scale how much we allow to be locked by the amount of
1356 * physical RAM. We allow an eighth to be locked by us, just to
1359 new_rlimit
= (struct rlimit
) {
1360 .rlim_cur
= MAX3(HIGH_RLIMIT_MEMLOCK
, saved_rlimit
->rlim_cur
, mm
),
1361 .rlim_max
= MAX3(HIGH_RLIMIT_MEMLOCK
, saved_rlimit
->rlim_max
, mm
),
1364 if (saved_rlimit
->rlim_max
>= new_rlimit
.rlim_cur
&&
1365 saved_rlimit
->rlim_cur
>= new_rlimit
.rlim_max
) {
1366 log_debug("RLIMIT_MEMLOCK is already as high or higher than we need it, not bumping.");
1370 r
= setrlimit_closest(RLIMIT_MEMLOCK
, &new_rlimit
);
1372 return log_warning_errno(r
, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
1377 static int enforce_syscall_archs(Set
*archs
) {
1381 if (!is_seccomp_available())
1384 r
= seccomp_restrict_archs(arg_syscall_archs
);
1386 return log_error_errno(r
, "Failed to enforce system call architecture restriction: %m");
1391 static int os_release_status(void) {
1392 _cleanup_free_
char *pretty_name
= NULL
, *name
= NULL
, *version
= NULL
,
1393 *ansi_color
= NULL
, *support_end
= NULL
;
1396 r
= parse_os_release(NULL
,
1397 "PRETTY_NAME", &pretty_name
,
1399 "VERSION", &version
,
1400 "ANSI_COLOR", &ansi_color
,
1401 "SUPPORT_END", &support_end
);
1403 return log_full_errno(r
== -ENOENT
? LOG_DEBUG
: LOG_WARNING
, r
,
1404 "Failed to read os-release file, ignoring: %m");
1406 const char *label
= os_release_pretty_name(pretty_name
, name
);
1407 const char *color
= empty_to_null(ansi_color
) ?: "1";
1409 if (show_status_on(arg_show_status
)) {
1411 if (log_get_show_color())
1412 status_printf(NULL
, 0,
1413 ANSI_HIGHLIGHT
"Booting initrd of " ANSI_NORMAL
"\x1B[%sm%s" ANSI_NORMAL ANSI_HIGHLIGHT
"." ANSI_NORMAL
,
1416 status_printf(NULL
, 0,
1417 "Booting initrd of %s...", label
);
1419 if (log_get_show_color())
1420 status_printf(NULL
, 0,
1421 "\n" ANSI_HIGHLIGHT
"Welcome to " ANSI_NORMAL
"\x1B[%sm%s" ANSI_NORMAL ANSI_HIGHLIGHT
"!" ANSI_NORMAL
"\n",
1424 status_printf(NULL
, 0,
1425 "\nWelcome to %s!\n",
1430 if (support_end
&& os_release_support_ended(support_end
, /* quiet = */ false, /* ret_eol = */ NULL
) > 0)
1431 /* pretty_name may include the version already, so we'll print the version only if we
1432 * have it and we're not using pretty_name. */
1433 status_printf(ANSI_HIGHLIGHT_RED
" !! " ANSI_NORMAL
, 0,
1434 "This OS version (%s%s%s) is past its end-of-support date (%s)",
1436 (pretty_name
|| !version
) ? "" : " version ",
1437 (pretty_name
|| !version
) ? "" : version
,
1443 static int setup_os_release(RuntimeScope scope
) {
1444 char os_release_dst
[STRLEN("/run/user//systemd/propagate/.os-release-stage/os-release") + DECIMAL_STR_MAX(uid_t
)] =
1445 "/run/systemd/propagate/.os-release-stage/os-release";
1446 const char *os_release_src
= "/etc/os-release";
1449 assert(IN_SET(scope
, RUNTIME_SCOPE_SYSTEM
, RUNTIME_SCOPE_USER
));
1451 if (access("/etc/os-release", F_OK
) < 0) {
1452 if (errno
!= ENOENT
)
1453 log_debug_errno(errno
, "Failed to check if /etc/os-release exists, ignoring: %m");
1455 os_release_src
= "/usr/lib/os-release";
1458 if (scope
== RUNTIME_SCOPE_USER
)
1459 xsprintf(os_release_dst
, "/run/user/" UID_FMT
"/systemd/propagate/.os-release-stage/os-release", geteuid());
1461 r
= mkdir_parents_label(os_release_dst
, 0755);
1463 return log_debug_errno(r
, "Failed to create parent directory of '%s', ignoring: %m", os_release_dst
);
1465 r
= copy_file_atomic(os_release_src
, os_release_dst
, 0644, COPY_MAC_CREATE
|COPY_REPLACE
);
1467 return log_debug_errno(r
, "Failed to copy '%s' to '%s', ignoring: %m",
1468 os_release_src
, os_release_dst
);
1473 static int write_container_id(void) {
1475 int r
= 0; /* avoid false maybe-uninitialized warning */
1477 c
= getenv("container");
1482 r
= write_string_file("/run/systemd/container", c
, WRITE_STRING_FILE_CREATE
);
1484 return log_warning_errno(r
, "Failed to write /run/systemd/container, ignoring: %m");
1489 static int write_boot_or_shutdown_osc(const char *type
) {
1492 assert(STRPTR_IN_SET(type
, "boot", "shutdown"));
1494 if (getenv_terminal_is_dumb())
1497 _cleanup_close_
int fd
= open_terminal("/dev/console", O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
1499 return log_debug_errno(fd
, "Failed to open /dev/console to print %s OSC, ignoring: %m", type
);
1501 _cleanup_free_
char *seq
= NULL
;
1502 if (streq(type
, "boot"))
1503 r
= osc_context_open_boot(&seq
);
1505 r
= osc_context_close(SD_ID128_ALLF
, &seq
);
1507 return log_debug_errno(r
, "Failed to acquire %s OSC sequence, ignoring: %m", type
);
1509 r
= loop_write(fd
, seq
, SIZE_MAX
);
1511 return log_debug_errno(r
, "Failed to write %s OSC sequence, ignoring: %m", type
);
1513 if (DEBUG_LOGGING
) {
1514 _cleanup_free_
char *h
= cescape(seq
);
1515 log_debug("OSC sequence for %s successfully written: %s", type
, strna(h
));
1521 static int bump_unix_max_dgram_qlen(void) {
1522 _cleanup_free_
char *qlen
= NULL
;
1526 /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel default of 16 is simply too low. We set
1527 * the value really really early during boot, so that it is actually applied to all our sockets,
1528 * including the $NOTIFY_SOCKET one. */
1530 r
= read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen
);
1532 return log_full_errno(r
== -ENOENT
? LOG_DEBUG
: LOG_WARNING
, r
,
1533 "Failed to read AF_UNIX datagram queue length, ignoring: %m");
1535 r
= safe_atolu(qlen
, &v
);
1537 return log_warning_errno(r
, "Failed to parse AF_UNIX datagram queue length '%s', ignoring: %m", qlen
);
1539 if (v
>= DEFAULT_UNIX_MAX_DGRAM_QLEN
)
1542 r
= sysctl_write("net/unix/max_dgram_qlen", STRINGIFY(DEFAULT_UNIX_MAX_DGRAM_QLEN
));
1544 return log_full_errno(ERRNO_IS_NEG_FS_WRITE_REFUSED(r
) ? LOG_DEBUG
: LOG_WARNING
, r
,
1545 "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
1550 static int fixup_environment(void) {
1553 /* Only fix up the environment when we are started as PID 1 */
1554 if (getpid_cached() != 1)
1557 /* We expect the environment to be set correctly if run inside a container. */
1558 if (detect_container() > 0)
1561 /* When started as PID1, the kernel uses /dev/console for our stdios and uses TERM=linux whatever the
1562 * backend device used by the console. We try to make a better guess here since some consoles might
1563 * not have support for color mode for example.
1565 * However if TERM was configured through the kernel command line then leave it alone. */
1566 _cleanup_free_
char *term
= NULL
;
1567 r
= proc_cmdline_get_key("TERM", 0, &term
);
1571 /* If we pick up $TERM, then also pick up $COLORTERM, $NO_COLOR */
1572 FOREACH_STRING(v
, "COLORTERM", "NO_COLOR") {
1573 _cleanup_free_
char *vv
= NULL
;
1574 r
= proc_cmdline_get_key(v
, 0, &vv
);
1577 if (r
> 0 && setenv(v
, vv
, /* overwrite= */ true) < 0)
1581 /* If no $TERM is set then look for the per-tty variable instead */
1582 r
= proc_cmdline_get_key("systemd.tty.term.console", 0, &term
);
1588 (void) query_term_for_tty("/dev/console", &term
);
1590 if (setenv("TERM", term
?: FALLBACK_TERM
, /* overwrite= */ true) < 0)
1593 /* The kernels sets HOME=/ for init. Let's undo this. */
1594 if (path_equal(getenv("HOME"), "/"))
1595 assert_se(unsetenv("HOME") == 0);
1600 static int become_shutdown(int objective
, int retval
) {
1601 static const char* const table
[_MANAGER_OBJECTIVE_MAX
] = {
1602 [MANAGER_EXIT
] = "exit",
1603 [MANAGER_REBOOT
] = "reboot",
1604 [MANAGER_POWEROFF
] = "poweroff",
1605 [MANAGER_HALT
] = "halt",
1606 [MANAGER_KEXEC
] = "kexec",
1609 char timeout
[STRLEN("--timeout=") + DECIMAL_STR_MAX(usec_t
) + STRLEN("us")],
1610 exit_code
[STRLEN("--exit-code=") + DECIMAL_STR_MAX(uint8_t)];
1612 _cleanup_strv_free_
char **env_block
= NULL
;
1613 _cleanup_free_
char *max_log_levels
= NULL
;
1614 usec_t watchdog_timer
= 0;
1617 assert(objective
>= 0 && objective
< _MANAGER_OBJECTIVE_MAX
);
1618 assert(table
[objective
]);
1620 xsprintf(timeout
, "--timeout=%" PRI_USEC
"us", arg_defaults
.timeout_stop_usec
);
1622 const char* command_line
[11] = {
1623 SYSTEMD_SHUTDOWN_BINARY_PATH
,
1626 /* Note that the last position is a terminator and must contain NULL. */
1630 assert(command_line
[pos
-1]);
1631 assert(!command_line
[pos
]);
1633 (void) log_max_levels_to_string(log_get_max_level(), &max_log_levels
);
1635 if (max_log_levels
) {
1636 command_line
[pos
++] = "--log-level";
1637 command_line
[pos
++] = max_log_levels
;
1640 switch (log_get_target()) {
1642 case LOG_TARGET_KMSG
:
1643 case LOG_TARGET_JOURNAL_OR_KMSG
:
1644 case LOG_TARGET_SYSLOG_OR_KMSG
:
1645 command_line
[pos
++] = "--log-target=kmsg";
1648 case LOG_TARGET_NULL
:
1649 command_line
[pos
++] = "--log-target=null";
1652 case LOG_TARGET_CONSOLE
:
1654 command_line
[pos
++] = "--log-target=console";
1657 if (log_get_show_color())
1658 command_line
[pos
++] = "--log-color";
1660 if (log_get_show_location())
1661 command_line
[pos
++] = "--log-location";
1663 if (log_get_show_time())
1664 command_line
[pos
++] = "--log-time";
1666 xsprintf(exit_code
, "--exit-code=%d", retval
);
1667 command_line
[pos
++] = exit_code
;
1669 assert(pos
< ELEMENTSOF(command_line
));
1673 if (objective
== MANAGER_REBOOT
)
1674 watchdog_timer
= arg_reboot_watchdog
;
1675 else if (objective
== MANAGER_KEXEC
)
1676 watchdog_timer
= arg_kexec_watchdog
;
1678 /* If we reboot or kexec let's set the shutdown watchdog and tell the
1679 * shutdown binary to repeatedly ping it.
1680 * Disable the pretimeout watchdog, as we do not support it from the shutdown binary. */
1681 (void) watchdog_setup_pretimeout(0);
1682 (void) watchdog_setup_pretimeout_governor(NULL
);
1683 r
= watchdog_setup(watchdog_timer
);
1684 watchdog_close(/* disarm= */ r
< 0);
1686 /* The environment block: */
1688 env_block
= strv_copy(environ
);
1690 /* Tell the binary how often to ping, ignore failure */
1691 (void) strv_extendf(&env_block
, "WATCHDOG_USEC="USEC_FMT
, watchdog_timer
);
1693 /* Make sure that tools that look for $WATCHDOG_USEC (and might get started by the exitrd) don't get
1694 * confused by the variable, because the sd_watchdog_enabled() protocol uses the same variable for
1695 * the same purposes. */
1696 (void) strv_extendf(&env_block
, "WATCHDOG_PID=" PID_FMT
, getpid_cached());
1698 if (arg_watchdog_device
)
1699 (void) strv_extendf(&env_block
, "WATCHDOG_DEVICE=%s", arg_watchdog_device
);
1701 (void) write_boot_or_shutdown_osc("shutdown");
1703 execve(SYSTEMD_SHUTDOWN_BINARY_PATH
, (char **) command_line
, env_block
);
1707 static void initialize_clock_timewarp(void) {
1710 /* This is called very early on, before we parse the kernel command line or otherwise figure out why
1711 * we are running, but only once. */
1713 if (clock_is_localtime(NULL
) > 0) {
1716 /* The very first call of settimeofday() also does a time warp in the kernel.
1718 * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to
1719 * take care of maintaining the RTC and do all adjustments. This matches the behavior of
1720 * Windows, which leaves the RTC alone if the registry tells that the RTC runs in UTC.
1722 r
= clock_set_timezone(&min
);
1724 log_error_errno(r
, "Failed to apply local time delta, ignoring: %m");
1726 log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min
);
1728 } else if (!in_initrd())
1730 * Do a dummy very first call to seal the kernel's time warp magic.
1732 * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with
1733 * LOCAL, but the real system could be set up that way. In such case, we need to delay the
1734 * time-warp or the sealing until we reach the real system.
1736 * Do no set the kernel's timezone. The concept of local time cannot be supported reliably,
1737 * the time will jump or be incorrect at every daylight saving time change. All kernel local
1738 * time concepts will be treated as UTC that way.
1740 (void) clock_reset_timewarp();
1743 static void apply_clock_update(void) {
1744 /* This is called later than clock_apply_epoch(), i.e. after we have parsed
1745 * configuration files/kernel command line and such. */
1747 if (arg_clock_usec
== 0)
1750 if (getpid_cached() != 1)
1753 if (clock_settime(CLOCK_REALTIME
, TIMESPEC_STORE(arg_clock_usec
)) < 0)
1754 log_error_errno(errno
, "Failed to set system clock to time specified on kernel command line: %m");
1756 log_info("Set system clock to %s, as specified on the kernel command line.",
1757 FORMAT_TIMESTAMP(arg_clock_usec
));
1760 static void cmdline_take_random_seed(void) {
1764 if (arg_random_seed_size
== 0)
1767 if (getpid_cached() != 1)
1770 assert(arg_random_seed
);
1771 suggested
= random_pool_size();
1773 if (arg_random_seed_size
< suggested
)
1774 log_warning("Random seed specified on kernel command line has size %zu, but %zu bytes required to fill entropy pool.",
1775 arg_random_seed_size
, suggested
);
1777 r
= random_write_entropy(-1, arg_random_seed
, arg_random_seed_size
, true);
1779 log_warning_errno(r
, "Failed to credit entropy specified on kernel command line, ignoring: %m");
1783 log_notice("Successfully credited entropy passed on kernel command line.\n"
1784 "Note that the seed provided this way is accessible to unprivileged programs. "
1785 "This functionality should not be used outside of testing environments.");
1788 static void initialize_coredump(bool skip_setup
) {
1789 if (getpid_cached() != 1)
1792 /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour
1793 * the limit) will process core dumps for system services by default. */
1794 if (setrlimit(RLIMIT_CORE
, &RLIMIT_MAKE_CONST(RLIM_INFINITY
)) < 0)
1795 log_warning_errno(errno
, "Failed to set RLIMIT_CORE: %m");
1797 /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored
1798 * until the systemd-coredump tool is enabled via sysctl. However it can be changed via the kernel
1799 * command line later so core dumps can still be generated during early startup and in initrd. */
1801 disable_coredumps();
1804 static void initialize_core_pattern(bool skip_setup
) {
1807 if (skip_setup
|| !arg_early_core_pattern
)
1810 if (getpid_cached() != 1)
1813 r
= write_string_file("/proc/sys/kernel/core_pattern", arg_early_core_pattern
, WRITE_STRING_FILE_DISABLE_BUFFER
);
1815 log_warning_errno(r
, "Failed to write '%s' to /proc/sys/kernel/core_pattern, ignoring: %m",
1816 arg_early_core_pattern
);
1819 static void apply_protect_system(bool skip_setup
) {
1822 if (skip_setup
|| getpid_cached() != 1 || arg_protect_system
== 0)
1825 if (arg_protect_system
< 0 && !in_initrd()) {
1826 log_debug("ProtectSystem=auto selected, but not running in an initrd, skipping.");
1830 r
= make_mount_point("/usr");
1832 log_warning_errno(r
, "Failed to make /usr/ a mount point, ignoring: %m");
1836 if (mount_nofollow_verbose(
1841 MS_BIND
|MS_REMOUNT
|MS_RDONLY
,
1842 /* options= */ NULL
) < 0)
1845 log_info("Successfully made /usr/ read-only.");
1848 static void update_cpu_affinity(bool skip_setup
) {
1849 _cleanup_free_
char *mask
= NULL
;
1851 if (skip_setup
|| !arg_cpu_affinity
.set
)
1854 assert(arg_cpu_affinity
.allocated
> 0);
1856 mask
= cpu_set_to_range_string(&arg_cpu_affinity
);
1857 log_debug("Setting CPU affinity to {%s}.", strnull(mask
));
1859 if (sched_setaffinity(0, arg_cpu_affinity
.allocated
, arg_cpu_affinity
.set
) < 0)
1860 log_warning_errno(errno
, "Failed to set CPU affinity, ignoring: %m");
1863 static void update_numa_policy(bool skip_setup
) {
1865 _cleanup_free_
char *nodes
= NULL
;
1866 const char * policy
= NULL
;
1868 if (skip_setup
|| !mpol_is_valid(numa_policy_get_type(&arg_numa_policy
)))
1871 if (DEBUG_LOGGING
) {
1872 policy
= mpol_to_string(numa_policy_get_type(&arg_numa_policy
));
1873 nodes
= cpu_set_to_range_string(&arg_numa_policy
.nodes
);
1874 log_debug("Setting NUMA policy to %s, with nodes {%s}.", strnull(policy
), strnull(nodes
));
1877 r
= apply_numa_policy(&arg_numa_policy
);
1878 if (r
== -EOPNOTSUPP
)
1879 log_debug_errno(r
, "NUMA support not available, ignoring.");
1881 log_warning_errno(r
, "Failed to set NUMA memory policy, ignoring: %m");
1884 static void filter_args(
1893 /* Copy some filtered arguments into the dst array from src. */
1894 for (int i
= 1; i
< argc
; i
++) {
1895 if (STR_IN_SET(src
[i
],
1901 if (startswith(src
[i
], "--deserialize="))
1903 if (streq(src
[i
], "--deserialize")) {
1904 i
++; /* Skip the argument too */
1908 /* Skip target unit designators. We already acted upon this information and have queued
1909 * appropriate jobs. We don't want to redo all this after reexecution. */
1910 if (startswith(src
[i
], "--unit="))
1912 if (streq(src
[i
], "--unit")) {
1913 i
++; /* Skip the argument too */
1917 /* Seems we have a good old option. Let's pass it over to the new instance. */
1918 dst
[(*dst_index
)++] = src
[i
];
1922 static void finish_remaining_processes(ManagerObjective objective
) {
1923 assert(objective
>= 0 && objective
< _MANAGER_OBJECTIVE_MAX
);
1925 /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
1926 * SIGCHLD for them after deserializing. */
1927 if (IN_SET(objective
, MANAGER_SWITCH_ROOT
, MANAGER_SOFT_REBOOT
))
1928 broadcast_signal(SIGTERM
, /* wait_for_exit= */ false, /* send_sighup= */ true, arg_defaults
.timeout_stop_usec
);
1930 /* On soft reboot really make sure nothing is left. Note that this will skip cgroups
1931 * of units that were configured with SurviveFinalKillSignal=yes. */
1932 if (objective
== MANAGER_SOFT_REBOOT
)
1933 broadcast_signal(SIGKILL
, /* wait_for_exit= */ false, /* send_sighup= */ false, arg_defaults
.timeout_stop_usec
);
1936 static void reduce_vt(ManagerObjective objective
) {
1939 if (objective
!= MANAGER_SOFT_REBOOT
)
1942 /* Switches back to VT 1, and releases all other VTs, in an attempt to return to a situation similar
1943 * to how it was during the original kernel initialization. This is important because if some random
1944 * TTY is in foreground, /dev/console will end up pointing to it, where the future init system will
1945 * then write its status output to, but where it probably shouldn't be writing to. */
1949 log_debug_errno(r
, "Failed to switch to VT TTY 1, ignoring: %m");
1951 _cleanup_close_
int tty0_fd
= open_terminal("/dev/tty0", O_RDWR
|O_NOCTTY
|O_CLOEXEC
|O_NONBLOCK
);
1953 return (void) log_debug_errno(tty0_fd
, "Failed to open '/dev/tty0', ignoring: %m");
1955 for (int ttynr
= 2; ttynr
<= VTNR_MAX
; ttynr
++)
1956 if (ioctl(tty0_fd
, VT_DISALLOCATE
, ttynr
) < 0)
1957 log_debug_errno(errno
, "Failed to disallocate VT TTY %i, ignoring: %m", ttynr
);
1959 log_debug("Successfully disallocated VT TTY %i.", ttynr
);
1962 static int do_reexecute(
1963 ManagerObjective objective
,
1966 const struct rlimit
*saved_rlimit_nofile
,
1967 const struct rlimit
*saved_rlimit_memlock
,
1969 const char *switch_root_dir
,
1970 const char *switch_root_init
,
1971 uint64_t saved_capability_ambient_set
,
1972 const char **ret_error_message
) {
1974 size_t i
, args_size
;
1978 assert(IN_SET(objective
, MANAGER_REEXECUTE
, MANAGER_SWITCH_ROOT
, MANAGER_SOFT_REBOOT
));
1980 assert(saved_rlimit_nofile
);
1981 assert(saved_rlimit_memlock
);
1982 assert(ret_error_message
);
1984 /* Close and disarm the watchdog, so that the new instance can reinitialize it, but the machine
1985 * doesn't get rebooted while we do that. */
1986 watchdog_close(/* disarm= */ true);
1988 if (!switch_root_dir
&& objective
== MANAGER_SOFT_REBOOT
) {
1989 /* If no switch root dir is specified, then check if /run/nextroot/ qualifies and use that */
1990 r
= path_is_os_tree("/run/nextroot");
1991 if (r
< 0 && r
!= -ENOENT
)
1992 log_debug_errno(r
, "Failed to determine if /run/nextroot/ is a valid OS tree, ignoring: %m");
1994 switch_root_dir
= "/run/nextroot";
1997 if (switch_root_dir
) {
1998 /* If we're supposed to switch root, preemptively check the existence of a usable init.
1999 * Otherwise the system might end up in a completely undebuggable state afterwards. */
2000 if (switch_root_init
) {
2001 r
= chase_and_access(switch_root_init
, switch_root_dir
, CHASE_PREFIX_ROOT
, X_OK
, /* ret_path = */ NULL
);
2003 log_warning_errno(r
, "Failed to chase configured init %s/%s: %m",
2004 switch_root_dir
, switch_root_init
);
2006 r
= chase_and_access(SYSTEMD_BINARY_PATH
, switch_root_dir
, CHASE_PREFIX_ROOT
, X_OK
, /* ret_path = */ NULL
);
2008 log_debug_errno(r
, "Failed to chase our own binary %s/%s: %m",
2009 switch_root_dir
, SYSTEMD_BINARY_PATH
);
2013 r
= chase_and_access("/sbin/init", switch_root_dir
, CHASE_PREFIX_ROOT
, X_OK
, /* ret_path = */ NULL
);
2015 *ret_error_message
= "Switch root target contains no usable init";
2016 return log_error_errno(r
, "Failed to chase %s/sbin/init", switch_root_dir
);
2021 /* Reset RLIMIT_NOFILE + RLIMIT_MEMLOCK back to the kernel defaults, so that the new systemd can pass
2022 * the kernel default to its child processes */
2023 if (saved_rlimit_nofile
->rlim_cur
!= 0)
2024 (void) setrlimit(RLIMIT_NOFILE
, saved_rlimit_nofile
);
2025 if (saved_rlimit_memlock
->rlim_cur
!= RLIM_INFINITY
)
2026 (void) setrlimit(RLIMIT_MEMLOCK
, saved_rlimit_memlock
);
2028 finish_remaining_processes(objective
);
2029 reduce_vt(objective
);
2031 if (switch_root_dir
) {
2032 r
= switch_root(/* new_root= */ switch_root_dir
,
2033 /* old_root_after= */ NULL
,
2034 /* flags= */ (objective
== MANAGER_SWITCH_ROOT
? SWITCH_ROOT_DESTROY_OLD_ROOT
: 0) |
2035 (objective
== MANAGER_SOFT_REBOOT
? 0 : SWITCH_ROOT_RECURSIVE_RUN
));
2037 log_error_errno(r
, "Failed to switch root, trying to continue: %m");
2040 r
= capability_ambient_set_apply(saved_capability_ambient_set
, /* also_inherit= */ false);
2042 log_warning_errno(r
, "Failed to apply the starting ambient set, ignoring: %m");
2044 args_size
= argc
+ 5;
2045 args
= newa(const char*, args_size
);
2047 if (!switch_root_init
) {
2048 char sfd
[STRLEN("--deserialize=") + DECIMAL_STR_MAX(int)];
2050 /* First try to spawn ourselves with the right path, and with full serialization. We do this
2051 * only if the user didn't specify an explicit init to spawn. */
2053 assert(arg_serialization
);
2056 xsprintf(sfd
, "--deserialize=%i", fileno(arg_serialization
));
2058 i
= 1; /* Leave args[0] empty for now. */
2060 /* Put our stuff first to make sure it always gets parsed in case
2061 * we get weird stuff from the kernel cmdline (like --) */
2062 if (IN_SET(objective
, MANAGER_SWITCH_ROOT
, MANAGER_SOFT_REBOOT
))
2063 args
[i
++] = "--switched-root";
2064 args
[i
++] = runtime_scope_cmdline_option_to_string(arg_runtime_scope
);
2067 filter_args(args
, &i
, argv
, argc
);
2071 assert(i
<= args_size
);
2074 * We want valgrind to print its memory usage summary before reexecution. Valgrind won't do
2075 * this is on its own on exec(), but it will do it on exit(). Hence, to ensure we get a
2076 * summary here, fork() off a child, let it exit() cleanly, so that it prints the summary,
2077 * and wait() for it in the parent, before proceeding into the exec().
2079 valgrind_summary_hack();
2081 args
[0] = SYSTEMD_BINARY_PATH
;
2082 (void) execv(args
[0], (char* const*) args
);
2084 if (objective
== MANAGER_REEXECUTE
) {
2085 *ret_error_message
= "Failed to execute our own binary";
2086 return log_error_errno(errno
, "Failed to execute our own binary %s: %m", args
[0]);
2089 log_debug_errno(errno
, "Failed to execute our own binary %s, trying fallback: %m", args
[0]);
2092 /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and
2093 * envp[]. (Well, modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[],
2094 * but let's hope that doesn't matter.) */
2096 arg_serialization
= safe_fclose(arg_serialization
);
2097 fds
= fdset_free(fds
);
2099 /* Drop /run/systemd directory. Some of its content can be used as a flag indicating that systemd is
2100 * the init system but we might be replacing it with something different. If systemd is used again it
2101 * will recreate the directory and its content anyway. */
2102 r
= rm_rf("/run/systemd.pre-switch-root", REMOVE_ROOT
|REMOVE_MISSING_OK
);
2104 log_warning_errno(r
, "Failed to prepare /run/systemd.pre-switch-root/, ignoring: %m");
2106 r
= RET_NERRNO(rename("/run/systemd", "/run/systemd.pre-switch-root"));
2108 log_warning_errno(r
, "Failed to move /run/systemd/ to /run/systemd.pre-switch-root/, ignoring: %m");
2110 /* Reopen the console */
2111 (void) make_console_stdio();
2113 i
= 1; /* Leave args[0] empty for now. */
2114 for (int j
= 1; j
<= argc
; j
++)
2115 args
[i
++] = argv
[j
];
2116 assert(i
<= args_size
);
2118 /* Re-enable any blocked signals, especially important if we switch from initrd to init=... */
2119 (void) reset_all_signal_handlers();
2120 (void) reset_signal_mask();
2121 (void) rlimit_nofile_safe();
2123 if (switch_root_init
) {
2124 args
[0] = switch_root_init
;
2125 (void) execve(args
[0], (char* const*) args
, saved_env
);
2126 log_warning_errno(errno
, "Failed to execute configured init %s, trying fallback: %m", args
[0]);
2129 args
[0] = "/sbin/init";
2130 (void) execv(args
[0], (char* const*) args
);
2132 *ret_error_message
= "Failed to execute /sbin/init";
2135 manager_status_printf(NULL
, STATUS_TYPE_EMERGENCY
,
2136 ANSI_HIGHLIGHT_RED
" !! " ANSI_NORMAL
,
2137 "%s", *ret_error_message
);
2139 log_warning_errno(r
, "No /sbin/init, trying fallback shell");
2141 args
[0] = "/bin/sh";
2143 (void) execve(args
[0], (char* const*) args
, saved_env
);
2145 *ret_error_message
= "Failed to execute fallback shell";
2148 return log_error_errno(r
, "%s, giving up: %m", *ret_error_message
);
2151 static int invoke_main_loop(
2153 const struct rlimit
*saved_rlimit_nofile
,
2154 const struct rlimit
*saved_rlimit_memlock
,
2155 int *ret_retval
, /* Return parameters relevant for shutting down */
2156 FDSet
**ret_fds
, /* Return parameters for reexecuting */
2157 char **ret_switch_root_dir
, /* … */
2158 char **ret_switch_root_init
, /* … */
2159 const char **ret_error_message
) {
2164 assert(saved_rlimit_nofile
);
2165 assert(saved_rlimit_memlock
);
2168 assert(ret_switch_root_dir
);
2169 assert(ret_switch_root_init
);
2170 assert(ret_error_message
);
2173 int objective
= manager_loop(m
);
2174 if (objective
< 0) {
2175 *ret_error_message
= "Failed to run main loop";
2176 return log_struct_errno(LOG_EMERG
, objective
,
2177 LOG_MESSAGE("Failed to run main loop: %m"),
2178 LOG_MESSAGE_ID(SD_MESSAGE_CORE_MAINLOOP_FAILED_STR
));
2181 /* Ensure shutdown timestamp is taken even when bypassing the job engine */
2182 if (IN_SET(objective
,
2183 MANAGER_SOFT_REBOOT
,
2187 MANAGER_POWEROFF
) &&
2188 !dual_timestamp_is_set(m
->timestamps
+ MANAGER_TIMESTAMP_SHUTDOWN_START
))
2189 dual_timestamp_now(m
->timestamps
+ MANAGER_TIMESTAMP_SHUTDOWN_START
);
2191 switch (objective
) {
2193 case MANAGER_RELOAD
: {
2194 LogTarget saved_log_target
;
2195 int saved_log_level
;
2197 manager_send_reloading(m
);
2199 log_info("Reloading...");
2201 /* First, save any overridden log level/target, then parse the configuration file,
2202 * which might change the log level to new settings. */
2204 saved_log_level
= m
->log_level_overridden
? log_get_max_level() : -1;
2205 saved_log_target
= m
->log_target_overridden
? log_get_target() : _LOG_TARGET_INVALID
;
2207 (void) parse_configuration(saved_rlimit_nofile
, saved_rlimit_memlock
);
2209 set_manager_defaults(m
);
2210 set_manager_settings(m
);
2212 update_cpu_affinity(false);
2213 update_numa_policy(false);
2215 if (saved_log_level
>= 0)
2216 manager_override_log_level(m
, saved_log_level
);
2217 if (saved_log_target
>= 0)
2218 manager_override_log_target(m
, saved_log_target
);
2220 if (manager_reload(m
) < 0)
2221 /* Reloading failed before the point of no return.
2222 * Let's continue running as if nothing happened. */
2223 m
->objective
= MANAGER_OK
;
2225 log_info("Reloading finished in " USEC_FMT
" ms.",
2226 usec_sub_unsigned(now(CLOCK_MONOTONIC
), m
->timestamps
[MANAGER_TIMESTAMP_UNITS_LOAD
].monotonic
) / USEC_PER_MSEC
);
2231 case MANAGER_REEXECUTE
:
2233 manager_send_reloading(m
); /* From the perspective of the manager calling us this is
2234 * pretty much the same as a reload */
2236 r
= prepare_reexecute(m
, &arg_serialization
, ret_fds
, false);
2238 *ret_error_message
= "Failed to prepare for reexecution";
2242 log_notice("Reexecuting.");
2244 *ret_retval
= EXIT_FAILURE
;
2245 *ret_switch_root_dir
= *ret_switch_root_init
= NULL
;
2249 case MANAGER_SWITCH_ROOT
:
2251 manager_send_reloading(m
); /* From the perspective of the manager calling us this is
2252 * pretty much the same as a reload */
2254 manager_set_switching_root(m
, true);
2256 if (!m
->switch_root_init
) {
2257 r
= prepare_reexecute(m
, &arg_serialization
, ret_fds
, true);
2259 *ret_error_message
= "Failed to prepare for reexecution";
2265 log_notice("Switching root.");
2267 *ret_retval
= EXIT_FAILURE
;
2269 /* Steal the switch root parameters */
2270 *ret_switch_root_dir
= TAKE_PTR(m
->switch_root
);
2271 *ret_switch_root_init
= TAKE_PTR(m
->switch_root_init
);
2275 case MANAGER_SOFT_REBOOT
:
2276 manager_send_reloading(m
);
2277 manager_set_switching_root(m
, true);
2279 r
= prepare_reexecute(m
, &arg_serialization
, ret_fds
, /* switching_root= */ true);
2281 *ret_error_message
= "Failed to prepare for reexecution";
2285 log_notice("Soft-rebooting.");
2287 *ret_retval
= EXIT_FAILURE
;
2288 *ret_switch_root_dir
= TAKE_PTR(m
->switch_root
);
2289 *ret_switch_root_init
= NULL
;
2294 if (MANAGER_IS_USER(m
)) {
2297 *ret_retval
= m
->return_value
;
2299 *ret_switch_root_dir
= *ret_switch_root_init
= NULL
;
2305 case MANAGER_REBOOT
:
2306 case MANAGER_POWEROFF
:
2308 case MANAGER_KEXEC
: {
2309 log_notice("Shutting down.");
2311 *ret_retval
= m
->return_value
;
2313 *ret_switch_root_dir
= *ret_switch_root_init
= NULL
;
2319 assert_not_reached();
2324 static void log_execution_mode(bool *ret_first_boot
) {
2325 bool first_boot
= false;
2328 assert(ret_first_boot
);
2330 switch (arg_runtime_scope
) {
2332 case RUNTIME_SCOPE_SYSTEM
: {
2336 log_info("systemd " GIT_VERSION
" running in %ssystem mode (%s)",
2337 arg_action
== ACTION_TEST
? "test " : "",
2340 v
= detect_virtualization();
2342 log_info("Detected virtualization %s.", virtualization_to_string(v
));
2344 v
= detect_confidential_virtualization();
2346 log_info("Detected confidential virtualization %s.", confidential_virtualization_to_string(v
));
2348 log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
2351 log_info("Running in initrd.");
2353 _cleanup_free_
char *id_text
= NULL
;
2355 /* Let's check whether we are in first boot. First, check if an override was
2356 * specified on the kernel command line. If yes, we honour that. */
2358 r
= proc_cmdline_get_bool("systemd.condition_first_boot", /* flags = */ 0, &first_boot
);
2360 log_debug_errno(r
, "Failed to parse systemd.condition_first_boot= kernel command line argument, ignoring: %m");
2363 log_full(first_boot
? LOG_INFO
: LOG_DEBUG
,
2364 "Kernel command line argument says we are %s first boot.",
2365 first_boot
? "in" : "not in");
2367 /* Second, perform autodetection. We use /etc/machine-id as flag file for
2368 * this: If it is missing or contains the value "uninitialized", this is the
2369 * first boot. In other cases, it is not. This allows container managers and
2370 * installers to provision a couple of files in /etc but still permit the
2371 * first-boot initialization to occur. If the container manager wants to
2372 * provision the machine ID it should pass $container_uuid to PID 1. */
2374 r
= read_one_line_file("/etc/machine-id", &id_text
);
2375 if (r
< 0 || streq(id_text
, "uninitialized")) {
2376 if (r
< 0 && r
!= -ENOENT
)
2377 log_warning_errno(r
, "Unexpected error while reading /etc/machine-id, assuming first boot: %m");
2380 log_info("Detected first boot.");
2382 log_debug("Detected initialized system, this is not the first boot.");
2386 assert_se(uname(&uts
) >= 0);
2388 if (strverscmp_improved(uts
.release
, KERNEL_BASELINE_VERSION
) < 0)
2389 log_warning("Warning! Reported kernel version %s is older than systemd's required baseline kernel version %s. "
2390 "Your mileage may vary.", uts
.release
, KERNEL_BASELINE_VERSION
);
2392 log_debug("Kernel version %s, our baseline is %s", uts
.release
, KERNEL_BASELINE_VERSION
);
2397 case RUNTIME_SCOPE_USER
:
2398 if (DEBUG_LOGGING
) {
2399 _cleanup_free_
char *t
= NULL
;
2401 t
= uid_to_name(getuid());
2402 log_debug("systemd " GIT_VERSION
" running in %suser mode for user " UID_FMT
"/%s. (%s)",
2403 arg_action
== ACTION_TEST
? " test" : "",
2404 getuid(), strna(t
), systemd_features
);
2410 assert_not_reached();
2413 *ret_first_boot
= first_boot
;
2416 static int initialize_runtime(
2419 struct rlimit
*saved_rlimit_nofile
,
2420 struct rlimit
*saved_rlimit_memlock
,
2421 uint64_t *saved_ambient_set
,
2422 const char **ret_error_message
) {
2426 assert(saved_ambient_set
);
2427 assert(ret_error_message
);
2429 /* Sets up various runtime parameters. Many of these initializations are conditionalized:
2431 * - Some only apply to --system instances
2432 * - Some only apply to --user instances
2433 * - Some only apply when we first start up, but not when we reexecute
2436 if (arg_action
!= ACTION_RUN
)
2439 update_cpu_affinity(skip_setup
);
2440 update_numa_policy(skip_setup
);
2442 switch (arg_runtime_scope
) {
2444 case RUNTIME_SCOPE_SYSTEM
:
2445 /* Make sure we leave a core dump without panicking the kernel. */
2446 install_crash_handler();
2449 /* Check that /usr/ is either on the same file system as / or mounted already. */
2450 if (dir_is_empty("/usr", /* ignore_hidden_or_backup = */ true) > 0) {
2451 *ret_error_message
= "Refusing to run in unsupported environment where /usr/ is not populated";
2455 /* Pull credentials from various sources into a common credential directory (we do
2456 * this here, before setting up the machine ID, so that we can use credential info
2457 * for setting up the machine ID) */
2458 (void) import_credentials();
2460 (void) os_release_status();
2461 (void) machine_id_setup(/* root = */ NULL
, arg_machine_id
,
2462 (first_boot
? MACHINE_ID_SETUP_FORCE_TRANSIENT
: 0) |
2463 (arg_machine_id_from_firmware
? MACHINE_ID_SETUP_FORCE_FIRMWARE
: 0),
2465 (void) hostname_setup(/* really = */ true);
2466 (void) loopback_setup();
2468 bump_unix_max_dgram_qlen();
2469 bump_file_max_and_nr_open();
2471 write_container_id();
2473 (void) write_boot_or_shutdown_osc("boot");
2475 /* Copy os-release to the propagate directory, so that we update it for services running
2476 * under RootDirectory=/RootImage= when we do a soft reboot. */
2477 r
= setup_os_release(RUNTIME_SCOPE_SYSTEM
);
2479 log_warning_errno(r
, "Failed to copy os-release for propagation, ignoring: %m");
2482 r
= watchdog_set_device(arg_watchdog_device
);
2484 log_warning_errno(r
, "Failed to set watchdog device to %s, ignoring: %m", arg_watchdog_device
);
2486 if (!cap_test_all(arg_capability_bounding_set
)) {
2487 r
= capability_bounding_set_drop_usermode(arg_capability_bounding_set
);
2489 *ret_error_message
= "Failed to drop capability bounding set of usermode helpers";
2490 return log_struct_errno(LOG_EMERG
, r
,
2491 LOG_MESSAGE("Failed to drop capability bounding set of usermode helpers: %m"),
2492 LOG_MESSAGE_ID(SD_MESSAGE_CORE_CAPABILITY_BOUNDING_USER_STR
));
2495 r
= capability_bounding_set_drop(arg_capability_bounding_set
, true);
2497 *ret_error_message
= "Failed to drop capability bounding set";
2498 return log_struct_errno(LOG_EMERG
, r
,
2499 LOG_MESSAGE("Failed to drop capability bounding set: %m"),
2500 LOG_MESSAGE_ID(SD_MESSAGE_CORE_CAPABILITY_BOUNDING_STR
));
2504 if (arg_no_new_privs
) {
2505 if (prctl(PR_SET_NO_NEW_PRIVS
, 1, 0, 0, 0) < 0) {
2506 *ret_error_message
= "Failed to disable new privileges";
2507 return log_struct_errno(LOG_EMERG
, errno
,
2508 LOG_MESSAGE("Failed to disable new privileges: %m"),
2509 LOG_MESSAGE_ID(SD_MESSAGE_CORE_DISABLE_PRIVILEGES_STR
));
2515 case RUNTIME_SCOPE_USER
: {
2516 _cleanup_free_
char *p
= NULL
;
2518 /* Create the runtime directory and place the inaccessible device nodes there, if we run in
2519 * user mode. In system mode mount_setup() already did that. */
2521 r
= xdg_user_runtime_dir("/systemd", &p
);
2523 *ret_error_message
= "$XDG_RUNTIME_DIR is not set";
2524 return log_struct_errno(LOG_EMERG
, r
,
2525 LOG_MESSAGE("Failed to determine $XDG_RUNTIME_DIR path: %m"),
2526 LOG_MESSAGE_ID(SD_MESSAGE_CORE_NO_XDGDIR_PATH_STR
));
2530 (void) mkdir_p_label(p
, 0755);
2531 (void) make_inaccessible_nodes(p
, UID_INVALID
, GID_INVALID
);
2533 r
= setup_os_release(RUNTIME_SCOPE_USER
);
2535 log_warning_errno(r
, "Failed to copy os-release for propagation, ignoring: %m");
2542 assert_not_reached();
2545 /* The two operations on the ambient set are meant for a user serssion manager. They do not affect
2546 * system manager operation, because by default it starts with an empty ambient set.
2548 * Preserve the ambient set for later use with sd-executor processes. */
2549 r
= capability_get_ambient(saved_ambient_set
);
2551 log_warning_errno(r
, "Failed to save ambient capabilities, ignoring: %m");
2553 /* Clear ambient capabilities, so services do not inherit them implicitly. Dropping them does
2554 * not affect the permitted and effective sets which are important for the manager itself to
2556 r
= capability_ambient_set_apply(0, /* also_inherit= */ false);
2558 log_warning_errno(r
, "Failed to reset ambient capability set, ignoring: %m");
2560 if (arg_timer_slack_nsec
!= NSEC_INFINITY
)
2561 if (prctl(PR_SET_TIMERSLACK
, arg_timer_slack_nsec
) < 0)
2562 log_warning_errno(errno
, "Failed to adjust timer slack, ignoring: %m");
2564 if (arg_syscall_archs
) {
2565 r
= enforce_syscall_archs(arg_syscall_archs
);
2567 *ret_error_message
= "Failed to set syscall architectures";
2572 r
= make_reaper_process(true);
2574 log_warning_errno(r
, "Failed to make us a subreaper, ignoring: %m");
2576 /* Bump up RLIMIT_NOFILE for systemd itself */
2577 (void) bump_rlimit_nofile(saved_rlimit_nofile
);
2578 (void) bump_rlimit_memlock(saved_rlimit_memlock
);
2583 static int do_queue_default_job(
2585 const char **ret_error_message
) {
2587 _cleanup_(sd_bus_error_free
) sd_bus_error error
= SD_BUS_ERROR_NULL
;
2593 if (arg_default_unit
)
2594 unit
= arg_default_unit
;
2595 else if (in_initrd())
2596 unit
= SPECIAL_INITRD_TARGET
;
2598 unit
= SPECIAL_DEFAULT_TARGET
;
2600 log_debug("Activating default unit: %s", unit
);
2602 r
= manager_load_startable_unit_or_warn(m
, unit
, NULL
, &target
);
2603 if (r
< 0 && in_initrd() && !arg_default_unit
) {
2604 /* Fall back to default.target, which we used to always use by default. Only do this if no
2605 * explicit configuration was given. */
2607 log_info("Falling back to %s.", SPECIAL_DEFAULT_TARGET
);
2609 r
= manager_load_startable_unit_or_warn(m
, SPECIAL_DEFAULT_TARGET
, NULL
, &target
);
2612 log_info("Falling back to %s.", SPECIAL_RESCUE_TARGET
);
2614 r
= manager_load_startable_unit_or_warn(m
, SPECIAL_RESCUE_TARGET
, NULL
, &target
);
2616 *ret_error_message
= r
== -ERFKILL
? SPECIAL_RESCUE_TARGET
" masked"
2617 : "Failed to load " SPECIAL_RESCUE_TARGET
;
2622 assert(target
->load_state
== UNIT_LOADED
);
2624 r
= manager_add_job(m
, JOB_START
, target
, JOB_ISOLATE
, &error
, &job
);
2626 log_debug_errno(r
, "Default target could not be isolated, starting instead: %s", bus_error_message(&error
, r
));
2628 sd_bus_error_free(&error
);
2630 r
= manager_add_job(m
, JOB_START
, target
, JOB_REPLACE
, &error
, &job
);
2632 *ret_error_message
= "Failed to start default target";
2633 return log_struct_errno(LOG_EMERG
, r
,
2634 LOG_MESSAGE("Failed to start default target: %s", bus_error_message(&error
, r
)),
2635 LOG_MESSAGE_ID(SD_MESSAGE_CORE_START_TARGET_FAILED_STR
));
2639 *ret_error_message
= "Failed to isolate default target";
2640 return log_struct_errno(LOG_EMERG
, r
,
2641 LOG_MESSAGE("Failed to isolate default target: %s", bus_error_message(&error
, r
)),
2642 LOG_MESSAGE_ID(SD_MESSAGE_CORE_ISOLATE_TARGET_FAILED_STR
));
2645 log_info("Queued %s job for default target %s.",
2646 job_type_to_string(job
->type
), unit_status_string(job
->unit
, NULL
));
2651 static void save_rlimits(struct rlimit
*saved_rlimit_nofile
,
2652 struct rlimit
*saved_rlimit_memlock
) {
2654 assert(saved_rlimit_nofile
);
2655 assert(saved_rlimit_memlock
);
2657 if (getrlimit(RLIMIT_NOFILE
, saved_rlimit_nofile
) < 0)
2658 log_warning_errno(errno
, "Reading RLIMIT_NOFILE failed, ignoring: %m");
2660 if (getrlimit(RLIMIT_MEMLOCK
, saved_rlimit_memlock
) < 0)
2661 log_warning_errno(errno
, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
2664 static void fallback_rlimit_nofile(const struct rlimit
*saved_rlimit_nofile
) {
2667 if (arg_defaults
.rlimit
[RLIMIT_NOFILE
])
2670 /* Make sure forked processes get limits based on the original kernel setting */
2672 rl
= newdup(struct rlimit
, saved_rlimit_nofile
, 1);
2678 /* Bump the hard limit for system services to a substantially higher value. The default
2679 * hard limit current kernels set is pretty low (4K), mostly for historical
2680 * reasons. According to kernel developers, the fd handling in recent kernels has been
2681 * optimized substantially enough, so that we can bump the limit now, without paying too
2682 * high a price in memory or performance. Note however that we only bump the hard limit,
2683 * not the soft limit. That's because select() works the way it works, and chokes on fds
2684 * >= 1024. If we'd bump the soft limit globally, it might accidentally happen to
2685 * unexpecting programs that they get fds higher than what they can process using
2686 * select(). By only bumping the hard limit but leaving the low limit as it is we avoid
2687 * this pitfall: programs that are written by folks aware of the select() problem in mind
2688 * (and thus use poll()/epoll instead of select(), the way everybody should) can
2689 * explicitly opt into high fds by bumping their soft limit beyond 1024, to the hard limit
2691 if (arg_runtime_scope
== RUNTIME_SCOPE_SYSTEM
)
2692 rl
->rlim_max
= MIN((rlim_t
) read_nr_open(), MAX(rl
->rlim_max
, (rlim_t
) HIGH_RLIMIT_NOFILE
));
2694 /* If for some reason we were invoked with a soft limit above 1024 (which should never
2695 * happen!, but who knows what we get passed in from pam_limit when invoked as --user
2696 * instance), then lower what we pass on to not confuse our children */
2697 rl
->rlim_cur
= MIN(rl
->rlim_cur
, (rlim_t
) FD_SETSIZE
);
2699 arg_defaults
.rlimit
[RLIMIT_NOFILE
] = rl
;
2702 static void fallback_rlimit_memlock(const struct rlimit
*saved_rlimit_memlock
) {
2705 /* Pass the original value down to invoked processes */
2707 if (arg_defaults
.rlimit
[RLIMIT_MEMLOCK
])
2710 rl
= newdup(struct rlimit
, saved_rlimit_memlock
, 1);
2716 if (arg_runtime_scope
== RUNTIME_SCOPE_SYSTEM
) {
2717 /* Raise the default limit to 8M also on old kernels and in containers (8M is the kernel
2718 * default for this since kernel 5.16) */
2719 rl
->rlim_max
= MAX(rl
->rlim_max
, (rlim_t
) DEFAULT_RLIMIT_MEMLOCK
);
2720 rl
->rlim_cur
= MAX(rl
->rlim_cur
, (rlim_t
) DEFAULT_RLIMIT_MEMLOCK
);
2723 arg_defaults
.rlimit
[RLIMIT_MEMLOCK
] = rl
;
2726 static void setenv_manager_environment(void) {
2729 STRV_FOREACH(p
, arg_manager_environment
) {
2730 log_debug("Setting '%s' in our own environment.", *p
);
2732 r
= putenv_dup(*p
, true);
2734 log_warning_errno(r
, "Failed to setenv \"%s\", ignoring: %m", *p
);
2738 static void reset_arguments(void) {
2739 /* Frees/resets arg_* variables, with a few exceptions commented below. */
2741 arg_default_unit
= mfree(arg_default_unit
);
2743 /* arg_runtime_scope — ignore */
2745 arg_dump_core
= true;
2746 arg_crash_chvt
= -1;
2747 arg_crash_shell
= false;
2748 arg_crash_action
= CRASH_FREEZE
;
2749 arg_confirm_spawn
= mfree(arg_confirm_spawn
);
2750 arg_show_status
= _SHOW_STATUS_INVALID
;
2751 arg_status_unit_format
= STATUS_UNIT_FORMAT_DEFAULT
;
2752 arg_switched_root
= false;
2753 arg_pager_flags
= 0;
2754 arg_service_watchdogs
= true;
2756 unit_defaults_done(&arg_defaults
);
2757 unit_defaults_init(&arg_defaults
, arg_runtime_scope
);
2759 arg_runtime_watchdog
= 0;
2760 arg_reboot_watchdog
= 10 * USEC_PER_MINUTE
;
2761 arg_kexec_watchdog
= 0;
2762 arg_pretimeout_watchdog
= 0;
2763 arg_early_core_pattern
= mfree(arg_early_core_pattern
);
2764 arg_watchdog_device
= mfree(arg_watchdog_device
);
2765 arg_watchdog_pretimeout_governor
= mfree(arg_watchdog_pretimeout_governor
);
2767 arg_default_environment
= strv_free(arg_default_environment
);
2768 arg_manager_environment
= strv_free(arg_manager_environment
);
2770 arg_capability_bounding_set
= CAP_MASK_ALL
;
2771 arg_no_new_privs
= false;
2772 arg_protect_system
= -1;
2773 arg_timer_slack_nsec
= NSEC_INFINITY
;
2775 arg_syscall_archs
= set_free(arg_syscall_archs
);
2777 /* arg_serialization — ignore */
2779 arg_machine_id
= (sd_id128_t
) {};
2780 arg_cad_burst_action
= EMERGENCY_ACTION_REBOOT_FORCE
;
2782 cpu_set_done(&arg_cpu_affinity
);
2783 numa_policy_reset(&arg_numa_policy
);
2785 arg_random_seed
= mfree(arg_random_seed
);
2786 arg_random_seed_size
= 0;
2789 arg_reload_limit_interval_sec
= 0;
2790 arg_reload_limit_burst
= 0;
2793 static void determine_default_oom_score_adjust(void) {
2796 /* Run our services at slightly higher OOM score than ourselves. But let's be conservative here, and
2797 * do this only if we don't run as root (i.e. only if we are run in user mode, for an unprivileged
2800 if (arg_defaults
.oom_score_adjust_set
)
2806 r
= get_oom_score_adjust(&a
);
2808 return (void) log_warning_errno(r
, "Failed to determine current OOM score adjustment value, ignoring: %m");
2810 assert_cc(100 <= OOM_SCORE_ADJ_MAX
);
2811 b
= a
>= OOM_SCORE_ADJ_MAX
- 100 ? OOM_SCORE_ADJ_MAX
: a
+ 100;
2816 arg_defaults
.oom_score_adjust
= b
;
2817 arg_defaults
.oom_score_adjust_set
= true;
2820 static int parse_configuration(const struct rlimit
*saved_rlimit_nofile
,
2821 const struct rlimit
*saved_rlimit_memlock
) {
2824 assert(saved_rlimit_nofile
);
2825 assert(saved_rlimit_memlock
);
2827 /* Assign configuration defaults */
2830 r
= parse_config_file();
2832 log_warning_errno(r
, "Failed to parse config file, ignoring: %m");
2834 if (arg_runtime_scope
== RUNTIME_SCOPE_SYSTEM
) {
2835 r
= proc_cmdline_parse(parse_proc_cmdline_item
, NULL
, 0);
2837 log_warning_errno(r
, "Failed to parse kernel command line, ignoring: %m");
2840 /* Initialize the show status setting if it hasn't been explicitly set yet */
2841 if (arg_show_status
== _SHOW_STATUS_INVALID
)
2842 arg_show_status
= SHOW_STATUS_YES
;
2844 /* Push variables into the manager environment block */
2845 setenv_manager_environment();
2847 /* Parse log environment variables to take into account any new environment variables.
2848 * Note that this also parses bits from the kernel command line, including "debug". */
2849 log_parse_environment();
2851 /* Initialize some default rlimits for services if they haven't been configured */
2852 fallback_rlimit_nofile(saved_rlimit_nofile
);
2853 fallback_rlimit_memlock(saved_rlimit_memlock
);
2855 /* Slightly raise the OOM score for our services if we are running for unprivileged users. */
2856 determine_default_oom_score_adjust();
2861 static int safety_checks(void) {
2863 if (getpid_cached() == 1 &&
2864 arg_action
!= ACTION_RUN
)
2865 return log_error_errno(SYNTHETIC_ERRNO(EPERM
),
2866 "Unsupported execution mode while PID 1.");
2868 if (getpid_cached() == 1 &&
2869 arg_runtime_scope
== RUNTIME_SCOPE_USER
)
2870 return log_error_errno(SYNTHETIC_ERRNO(EPERM
),
2871 "Can't run --user mode as PID 1.");
2873 if (arg_action
== ACTION_RUN
&&
2874 arg_runtime_scope
== RUNTIME_SCOPE_SYSTEM
&&
2875 getpid_cached() != 1)
2876 return log_error_errno(SYNTHETIC_ERRNO(EPERM
),
2877 "Can't run system mode unless PID 1.");
2879 if (arg_action
== ACTION_TEST
&&
2881 return log_error_errno(SYNTHETIC_ERRNO(EPERM
),
2882 "Don't run test mode as root.");
2884 switch (arg_runtime_scope
) {
2886 case RUNTIME_SCOPE_USER
:
2888 if (arg_action
== ACTION_RUN
&&
2890 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP
),
2891 "Trying to run as user instance, but the system has not been booted with systemd.");
2893 if (arg_action
== ACTION_RUN
&&
2894 !getenv("XDG_RUNTIME_DIR"))
2895 return log_error_errno(SYNTHETIC_ERRNO(EUNATCH
),
2896 "Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
2900 case RUNTIME_SCOPE_SYSTEM
:
2901 if (arg_action
== ACTION_RUN
&&
2902 running_in_chroot() > 0)
2903 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP
),
2904 "Cannot be run in a chroot() environment.");
2908 assert_not_reached();
2914 static int initialize_security(
2915 bool *loaded_policy
,
2916 dual_timestamp
*security_start_timestamp
,
2917 dual_timestamp
*security_finish_timestamp
,
2918 const char **ret_error_message
) {
2922 assert(loaded_policy
);
2923 assert(security_start_timestamp
);
2924 assert(security_finish_timestamp
);
2925 assert(ret_error_message
);
2927 dual_timestamp_now(security_start_timestamp
);
2929 r
= mac_selinux_setup(loaded_policy
);
2931 *ret_error_message
= "Failed to load SELinux policy";
2935 r
= mac_smack_setup(loaded_policy
);
2937 *ret_error_message
= "Failed to load SMACK policy";
2941 r
= mac_apparmor_setup();
2943 *ret_error_message
= "Failed to load AppArmor policy";
2949 *ret_error_message
= "Failed to load IMA policy";
2955 *ret_error_message
= "Failed to load IPE policy";
2959 dual_timestamp_now(security_finish_timestamp
);
2963 static int collect_fds(FDSet
**ret_fds
, const char **ret_error_message
) {
2967 assert(ret_error_message
);
2969 /* Pick up all fds passed to us. We apply a filter here: we only take the fds that have O_CLOEXEC
2970 * off. All fds passed via execve() to us must have O_CLOEXEC off, and our own code and dependencies
2971 * should be clean enough to set O_CLOEXEC universally. Thus checking the bit should be a safe
2972 * mechanism to distinguish passed in fds from our own.
2974 * Why bother? Some subsystems we initialize early, specifically selinux might keep fds open in our
2975 * process behind our back. We should not take possession of that (and then accidentally close
2976 * it). SELinux thankfully sets O_CLOEXEC on its fds, so this test should work. */
2977 r
= fdset_new_fill(/* filter_cloexec= */ 0, ret_fds
);
2979 *ret_error_message
= "Failed to allocate fd set";
2980 return log_struct_errno(LOG_EMERG
, r
,
2981 LOG_MESSAGE("Failed to allocate fd set: %m"),
2982 LOG_MESSAGE_ID(SD_MESSAGE_CORE_FD_SET_FAILED_STR
));
2985 /* The serialization fd should have O_CLOEXEC turned on already, let's verify that we didn't pick it up here */
2986 assert_se(!arg_serialization
|| !fdset_contains(*ret_fds
, fileno(arg_serialization
)));
2991 static void setup_console_terminal(bool skip_setup
) {
2993 if (arg_runtime_scope
!= RUNTIME_SCOPE_SYSTEM
)
2996 /* If we are init, we connect stdin/stdout/stderr to /dev/null and make sure we don't have a
2997 * controlling tty. */
2998 terminal_detach_session();
3000 /* Reset the console, but only if this is really init and we are freshly booted */
3002 (void) console_setup();
3005 static bool early_skip_setup_check(int argc
, char *argv
[]) {
3006 bool found_deserialize
= false;
3008 /* Determine if this is a reexecution or normal bootup. We do the full command line parsing much
3009 * later, so let's just have a quick peek here. Note that if we have switched root, do all the
3010 * special setup things anyway, even if in that case we also do deserialization. */
3012 for (int i
= 1; i
< argc
; i
++)
3013 if (streq(argv
[i
], "--switched-root"))
3014 return false; /* If we switched root, don't skip the setup. */
3015 else if (startswith(argv
[i
], "--deserialize=") || streq(argv
[i
], "--deserialize"))
3016 found_deserialize
= true;
3018 return found_deserialize
; /* When we are deserializing, then we are reexecuting, hence avoid the extensive setup */
3021 static int save_env(void) {
3024 l
= strv_copy(environ
);
3028 strv_free_and_replace(saved_env
, l
);
3032 int main(int argc
, char *argv
[]) {
3034 initrd_timestamp
= DUAL_TIMESTAMP_NULL
,
3035 userspace_timestamp
= DUAL_TIMESTAMP_NULL
,
3036 kernel_timestamp
= DUAL_TIMESTAMP_NULL
,
3037 security_start_timestamp
= DUAL_TIMESTAMP_NULL
,
3038 security_finish_timestamp
= DUAL_TIMESTAMP_NULL
;
3039 struct rlimit saved_rlimit_nofile
= RLIMIT_MAKE_CONST(0),
3040 saved_rlimit_memlock
= RLIMIT_MAKE_CONST(RLIM_INFINITY
); /* The original rlimits we passed
3041 * in. Note we use different values
3042 * for the two that indicate whether
3043 * these fields are initialized! */
3044 bool skip_setup
, loaded_policy
= false, queue_default_job
= false, first_boot
= false;
3045 char *switch_root_dir
= NULL
, *switch_root_init
= NULL
;
3046 usec_t before_startup
, after_startup
;
3047 static char systemd
[] = "systemd";
3048 const char *error_message
= NULL
;
3049 uint64_t saved_ambient_set
= 0;
3050 int r
, retval
= EXIT_FAILURE
;
3054 assert_se(argc
> 0 && !isempty(argv
[0]));
3056 /* Take timestamps early on */
3057 dual_timestamp_from_monotonic(&kernel_timestamp
, 0);
3058 dual_timestamp_now(&userspace_timestamp
);
3060 /* Figure out whether we need to do initialize the system, or if we already did that because we are
3062 skip_setup
= early_skip_setup_check(argc
, argv
);
3064 /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent
3065 * reexecution we are then called 'systemd'. That is confusing, hence let's call us systemd
3067 program_invocation_short_name
= systemd
;
3068 (void) prctl(PR_SET_NAME
, systemd
);
3070 /* Save the original command line */
3071 save_argc_argv(argc
, argv
);
3073 /* Save the original environment as we might need to restore it if we're requested to execute another
3074 * system manager later. */
3077 error_message
= "Failed to copy environment block";
3081 /* Make sure that if the user says "syslog" we actually log to the journal. */
3082 log_set_upgrade_syslog_to_journal(true);
3084 if (getpid_cached() == 1) {
3085 /* When we run as PID 1 force system mode */
3086 arg_runtime_scope
= RUNTIME_SCOPE_SYSTEM
;
3088 /* Disable the umask logic */
3091 /* Make sure that at least initially we do not ever log to journald/syslogd, because it might
3092 * not be activated yet (even though the log socket for it exists). */
3093 log_set_prohibit_ipc(true);
3095 /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This
3096 * is important so that we never end up logging to any foreign stderr, for example if we have
3097 * to log in a child process right before execve()'ing the actual binary, at a point in time
3098 * where socket activation stderr/stdout area already set up. */
3099 log_set_always_reopen_console(true);
3101 if (detect_container() <= 0) {
3103 /* Running outside of a container as PID 1 */
3104 log_set_target_and_open(LOG_TARGET_KMSG
);
3107 initrd_timestamp
= userspace_timestamp
;
3110 r
= mount_setup_early();
3112 error_message
= "Failed to mount early API filesystems";
3117 /* We might have just mounted /proc, so let's try to parse the kernel
3118 * command line log arguments immediately. */
3119 log_parse_environment();
3121 /* Let's open the log backend a second time, in case the first time didn't
3122 * work. Quite possibly we have mounted /dev just now, so /dev/kmsg became
3123 * available, and it previously wasn't. */
3127 disable_printk_ratelimit();
3129 r
= initialize_security(
3131 &security_start_timestamp
,
3132 &security_finish_timestamp
,
3140 error_message
= "Failed to initialize MAC support";
3145 initialize_clock_timewarp();
3147 clock_apply_epoch(/* allow_backwards= */ !skip_setup
);
3149 /* Set the default for later on, but don't actually open the logs like this for
3150 * now. Note that if we are transitioning from the initrd there might still be
3151 * journal fd open, and we shouldn't attempt opening that before we parsed
3152 * /proc/cmdline which might redirect output elsewhere. */
3153 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG
);
3156 /* Running inside a container, as PID 1 */
3157 log_set_target_and_open(LOG_TARGET_CONSOLE
);
3159 /* For later on, see above... */
3160 log_set_target(LOG_TARGET_JOURNAL
);
3162 /* clear the kernel timestamp, because we are in a container */
3163 kernel_timestamp
= DUAL_TIMESTAMP_NULL
;
3166 initialize_coredump(skip_setup
);
3168 r
= fixup_environment();
3170 log_struct_errno(LOG_EMERG
, r
,
3171 LOG_MESSAGE("Failed to fix up PID 1 environment: %m"),
3172 LOG_MESSAGE_ID(SD_MESSAGE_CORE_PID1_ENVIRONMENT_STR
));
3173 error_message
= "Failed to fix up PID1 environment";
3177 /* Try to figure out if we can use colors with the console. No need to do that for user
3178 * instances since they never log into the console. */
3179 log_show_color(colors_enabled());
3181 r
= make_null_stdio();
3183 log_warning_errno(r
, "Failed to redirect standard streams to /dev/null, ignoring: %m");
3185 /* Load the kernel modules early. */
3187 (void) kmod_setup();
3189 /* Mount /proc, /sys and friends, so that /proc/cmdline and /proc/$PID/fd is available. */
3190 r
= mount_setup(loaded_policy
, skip_setup
);
3192 error_message
= "Failed to mount API filesystems";
3196 /* The efivarfs is now mounted, let's lock down the system token. */
3197 lock_down_efi_variables();
3199 /* Running as user instance */
3200 arg_runtime_scope
= RUNTIME_SCOPE_USER
;
3201 log_set_always_reopen_console(true);
3202 log_set_target_and_open(LOG_TARGET_AUTO
);
3204 /* clear the kernel timestamp, because we are not PID 1 */
3205 kernel_timestamp
= DUAL_TIMESTAMP_NULL
;
3209 error_message
= "Failed to initialize MAC support";
3214 /* Save the original RLIMIT_NOFILE/RLIMIT_MEMLOCK so that we can reset it later when
3215 * transitioning from the initrd to the main systemd or suchlike. */
3216 save_rlimits(&saved_rlimit_nofile
, &saved_rlimit_memlock
);
3218 /* Reset all signal handlers. */
3219 (void) reset_all_signal_handlers();
3220 (void) ignore_signals(SIGNALS_IGNORE
);
3222 (void) parse_configuration(&saved_rlimit_nofile
, &saved_rlimit_memlock
);
3224 r
= parse_argv(argc
, argv
);
3226 error_message
= "Failed to parse command line arguments";
3230 r
= safety_checks();
3234 if (IN_SET(arg_action
, ACTION_TEST
, ACTION_HELP
, ACTION_DUMP_CONFIGURATION_ITEMS
, ACTION_DUMP_BUS_PROPERTIES
, ACTION_BUS_INTROSPECT
))
3235 pager_open(arg_pager_flags
);
3237 if (arg_action
!= ACTION_RUN
)
3240 if (arg_action
== ACTION_HELP
) {
3241 retval
= help() < 0 ? EXIT_FAILURE
: EXIT_SUCCESS
;
3243 } else if (arg_action
== ACTION_VERSION
) {
3246 } else if (arg_action
== ACTION_DUMP_CONFIGURATION_ITEMS
) {
3247 unit_dump_config_items(stdout
);
3248 retval
= EXIT_SUCCESS
;
3250 } else if (arg_action
== ACTION_DUMP_BUS_PROPERTIES
) {
3251 dump_bus_properties(stdout
);
3252 retval
= EXIT_SUCCESS
;
3254 } else if (arg_action
== ACTION_BUS_INTROSPECT
) {
3255 r
= bus_manager_introspect_implementations(stdout
, arg_bus_introspect
);
3256 retval
= r
>= 0 ? EXIT_SUCCESS
: EXIT_FAILURE
;
3260 assert_se(IN_SET(arg_action
, ACTION_RUN
, ACTION_TEST
));
3262 /* Move out of the way, so that we won't block unmounts */
3263 assert_se(chdir("/") == 0);
3265 if (arg_action
== ACTION_RUN
) {
3267 /* Apply the systemd.clock_usec= kernel command line switch */
3268 apply_clock_update();
3270 /* Apply random seed from kernel command line */
3271 cmdline_take_random_seed();
3274 /* A core pattern might have been specified via the cmdline. */
3275 initialize_core_pattern(skip_setup
);
3277 /* Make /usr/ read-only */
3278 apply_protect_system(skip_setup
);
3280 /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
3283 /* Remember open file descriptors for later deserialization */
3284 r
= collect_fds(&fds
, &error_message
);
3288 /* Give up any control of the console, but make sure its initialized. */
3289 setup_console_terminal(skip_setup
);
3291 /* Open the logging devices, if possible and necessary */
3295 log_execution_mode(&first_boot
);
3297 r
= cg_has_legacy();
3299 error_message
= "Failed to check cgroup hierarchy";
3303 r
= log_full_errno(LOG_EMERG
, SYNTHETIC_ERRNO(EPROTO
),
3304 "Detected cgroup v1 hierarchy at /sys/fs/cgroup/, which is no longer supported by current version of systemd.\n"
3305 "Please instruct your initrd to mount cgroup v2 (unified) hierarchy,\n"
3306 "possibly by removing any stale kernel command line options, such as:\n"
3307 " systemd.legacy_systemd_cgroup_controller=1\n"
3308 " systemd.unified_cgroup_hierarchy=0");
3310 error_message
= "Detected unsupported legacy cgroup hierarchy, refusing execution";
3314 /* Building without libmount is allowed, but if it is compiled in, then we must be able to load it */
3315 r
= dlopen_libmount();
3316 if (r
< 0 && !ERRNO_IS_NEG_NOT_SUPPORTED(r
)) {
3317 error_message
= "Failed to load libmount.so";
3321 r
= initialize_runtime(skip_setup
,
3323 &saved_rlimit_nofile
,
3324 &saved_rlimit_memlock
,
3330 r
= manager_new(arg_runtime_scope
,
3331 arg_action
== ACTION_TEST
? MANAGER_TEST_FULL
: 0,
3334 log_struct_errno(LOG_EMERG
, r
,
3335 LOG_MESSAGE("Failed to allocate manager object: %m"),
3336 LOG_MESSAGE_ID(SD_MESSAGE_CORE_MANAGER_ALLOCATE_STR
));
3337 error_message
= "Failed to allocate manager object";
3341 m
->timestamps
[MANAGER_TIMESTAMP_KERNEL
] = kernel_timestamp
;
3342 m
->timestamps
[MANAGER_TIMESTAMP_INITRD
] = initrd_timestamp
;
3343 m
->timestamps
[MANAGER_TIMESTAMP_USERSPACE
] = userspace_timestamp
;
3344 m
->timestamps
[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_START
)] = security_start_timestamp
;
3345 m
->timestamps
[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_FINISH
)] = security_finish_timestamp
;
3347 m
->saved_ambient_set
= saved_ambient_set
;
3349 set_manager_defaults(m
);
3350 set_manager_settings(m
);
3351 manager_set_first_boot(m
, first_boot
);
3352 manager_set_switching_root(m
, arg_switched_root
);
3354 /* Remember whether we should queue the default job */
3355 queue_default_job
= !arg_serialization
|| arg_switched_root
;
3357 before_startup
= now(CLOCK_MONOTONIC
);
3359 r
= manager_startup(m
, arg_serialization
, fds
, /* root= */ NULL
);
3361 error_message
= "Failed to start up manager";
3365 /* This will close all file descriptors that were opened, but not claimed by any unit. */
3366 fds
= fdset_free(fds
);
3367 arg_serialization
= safe_fclose(arg_serialization
);
3369 if (queue_default_job
) {
3370 r
= do_queue_default_job(m
, &error_message
);
3375 after_startup
= now(CLOCK_MONOTONIC
);
3377 log_full(arg_action
== ACTION_TEST
? LOG_INFO
: LOG_DEBUG
,
3378 "Loaded units and determined initial transaction in %s.",
3379 FORMAT_TIMESPAN(after_startup
- before_startup
, 100 * USEC_PER_MSEC
));
3381 if (arg_action
== ACTION_TEST
) {
3382 manager_test_summary(m
);
3383 retval
= EXIT_SUCCESS
;
3387 r
= invoke_main_loop(m
,
3388 &saved_rlimit_nofile
,
3389 &saved_rlimit_memlock
,
3395 /* MANAGER_OK and MANAGER_RELOAD are not expected here. */
3396 assert(r
< 0 || IN_SET(r
, MANAGER_REEXECUTE
, MANAGER_EXIT
) ||
3397 (arg_runtime_scope
== RUNTIME_SCOPE_SYSTEM
&&
3398 IN_SET(r
, MANAGER_REBOOT
,
3399 MANAGER_SOFT_REBOOT
,
3403 MANAGER_SWITCH_ROOT
)));
3409 arg_reboot_watchdog
= manager_get_watchdog(m
, WATCHDOG_REBOOT
);
3410 arg_kexec_watchdog
= manager_get_watchdog(m
, WATCHDOG_KEXEC
);
3411 m
= manager_free(m
);
3414 mac_selinux_finish();
3416 if (IN_SET(r
, MANAGER_REEXECUTE
, MANAGER_SWITCH_ROOT
, MANAGER_SOFT_REBOOT
))
3419 &saved_rlimit_nofile
,
3420 &saved_rlimit_memlock
,
3425 &error_message
); /* This only returns if reexecution failed */
3427 arg_serialization
= safe_fclose(arg_serialization
);
3428 fds
= fdset_free(fds
);
3430 saved_env
= strv_free(saved_env
);
3432 #if HAVE_VALGRIND_VALGRIND_H
3433 /* If we are PID 1 and running under valgrind, then let's exit
3434 * here explicitly. valgrind will only generate nice output on
3435 * exit(), not on exec(), hence let's do the former not the
3437 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND
) {
3438 /* Cleanup watchdog_device strings for valgrind. We need them
3439 * in become_shutdown() so normally we cannot free them yet. */
3440 watchdog_free_device();
3446 #if HAS_FEATURE_ADDRESS_SANITIZER
3447 /* At this stage we most likely don't have stdio/stderr open, so the following
3448 * LSan check would not print any actionable information and would just crash
3449 * PID 1. To make this a bit more helpful, let's try to open /dev/console,
3450 * and if we succeed redirect LSan's report there. */
3451 if (getpid_cached() == 1) {
3452 _cleanup_close_
int tty_fd
= -EBADF
;
3454 tty_fd
= open_terminal("/dev/console", O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
3456 __sanitizer_set_report_fd((void*) (intptr_t) tty_fd
);
3458 __lsan_do_leak_check();
3463 (void) sd_notifyf(/* unset_environment= */ false,
3466 /* Try to invoke the shutdown binary unless we already failed.
3467 * If we failed above, we want to freeze after finishing cleanup. */
3468 if (arg_runtime_scope
== RUNTIME_SCOPE_SYSTEM
&&
3469 IN_SET(r
, MANAGER_EXIT
, MANAGER_REBOOT
, MANAGER_POWEROFF
, MANAGER_HALT
, MANAGER_KEXEC
)) {
3470 r
= become_shutdown(r
, retval
);
3471 log_error_errno(r
, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
3472 error_message
= "Failed to execute shutdown binary";
3475 /* This is primarily useful when running systemd in a VM, as it provides the user running the VM with
3476 * a mechanism to pick up systemd's exit status in the VM. */
3477 (void) sd_notifyf(/* unset_environment= */ false,
3478 "EXIT_STATUS=%i", retval
);
3480 watchdog_free_device();
3481 arg_watchdog_device
= mfree(arg_watchdog_device
);
3483 if (getpid_cached() == 1) {
3485 manager_status_printf(NULL
, STATUS_TYPE_EMERGENCY
,
3486 ANSI_HIGHLIGHT_RED
"!!!!!!" ANSI_NORMAL
,
3487 "%s.", error_message
);
3488 freeze_or_exit_or_reboot();