1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
8 #include <sys/reboot.h>
13 #if HAVE_VALGRIND_VALGRIND_H
14 #include <valgrind/valgrind.h>
18 #include "sd-daemon.h"
19 #include "sd-messages.h"
21 #include "alloc-util.h"
22 #include "apparmor-setup.h"
23 #include "architecture.h"
25 #include "bus-error.h"
27 #include "capability-util.h"
28 #include "cgroup-util.h"
29 #include "clock-util.h"
30 #include "conf-parser.h"
31 #include "cpu-set-util.h"
32 #include "dbus-manager.h"
35 #include "dev-setup.h"
36 #include "efi-random.h"
38 #include "emergency-action.h"
40 #include "exit-status.h"
44 #include "format-util.h"
46 #include "hexdecoct.h"
47 #include "hostname-setup.h"
48 #include "ima-setup.h"
50 #include "kmod-setup.h"
51 #include "limits-util.h"
52 #include "load-fragment.h"
54 #include "loopback-setup.h"
55 #include "machine-id-setup.h"
58 #include "mount-setup.h"
61 #include "parse-argument.h"
62 #include "parse-util.h"
63 #include "path-util.h"
64 #include "pretty-print.h"
65 #include "proc-cmdline.h"
66 #include "process-util.h"
67 #include "random-util.h"
68 #include "raw-clone.h"
69 #include "rlimit-util.h"
71 #include "seccomp-util.h"
73 #include "selinux-setup.h"
74 #include "selinux-util.h"
75 #include "signal-util.h"
76 #include "smack-setup.h"
78 #include "stat-util.h"
79 #include "stdio-util.h"
81 #include "switch-root.h"
82 #include "sysctl-util.h"
83 #include "terminal-util.h"
84 #include "umask-util.h"
85 #include "user-util.h"
90 #if HAS_FEATURE_ADDRESS_SANITIZER
91 #include <sanitizer/lsan_interface.h>
94 #define DEFAULT_TASKS_MAX ((TasksMax) { 15U, 100U }) /* 15% */
101 ACTION_DUMP_CONFIGURATION_ITEMS
,
102 ACTION_DUMP_BUS_PROPERTIES
,
103 ACTION_BUS_INTROSPECT
,
104 } arg_action
= ACTION_RUN
;
106 static const char *arg_bus_introspect
= NULL
;
108 /* Those variables are initialized to 0 automatically, so we avoid uninitialized memory access. Real
109 * defaults are assigned in reset_arguments() below. */
110 static char *arg_default_unit
;
111 static bool arg_system
;
112 static bool arg_dump_core
;
113 static int arg_crash_chvt
;
114 static bool arg_crash_shell
;
115 static bool arg_crash_reboot
;
116 static char *arg_confirm_spawn
;
117 static ShowStatus arg_show_status
;
118 static StatusUnitFormat arg_status_unit_format
;
119 static bool arg_switched_root
;
120 static PagerFlags arg_pager_flags
;
121 static bool arg_service_watchdogs
;
122 static ExecOutput arg_default_std_output
;
123 static ExecOutput arg_default_std_error
;
124 static usec_t arg_default_restart_usec
;
125 static usec_t arg_default_timeout_start_usec
;
126 static usec_t arg_default_timeout_stop_usec
;
127 static usec_t arg_default_timeout_abort_usec
;
128 static bool arg_default_timeout_abort_set
;
129 static usec_t arg_default_start_limit_interval
;
130 static unsigned arg_default_start_limit_burst
;
131 static usec_t arg_runtime_watchdog
;
132 static usec_t arg_reboot_watchdog
;
133 static usec_t arg_kexec_watchdog
;
134 static char *arg_early_core_pattern
;
135 static char *arg_watchdog_device
;
136 static char **arg_default_environment
;
137 static char **arg_manager_environment
;
138 static struct rlimit
*arg_default_rlimit
[_RLIMIT_MAX
];
139 static uint64_t arg_capability_bounding_set
;
140 static bool arg_no_new_privs
;
141 static nsec_t arg_timer_slack_nsec
;
142 static usec_t arg_default_timer_accuracy_usec
;
143 static Set
* arg_syscall_archs
;
144 static FILE* arg_serialization
;
145 static int arg_default_cpu_accounting
;
146 static bool arg_default_io_accounting
;
147 static bool arg_default_ip_accounting
;
148 static bool arg_default_blockio_accounting
;
149 static bool arg_default_memory_accounting
;
150 static bool arg_default_tasks_accounting
;
151 static TasksMax arg_default_tasks_max
;
152 static sd_id128_t arg_machine_id
;
153 static EmergencyAction arg_cad_burst_action
;
154 static OOMPolicy arg_default_oom_policy
;
155 static CPUSet arg_cpu_affinity
;
156 static NUMAPolicy arg_numa_policy
;
157 static usec_t arg_clock_usec
;
158 static void *arg_random_seed
;
159 static size_t arg_random_seed_size
;
161 /* A copy of the original environment block */
162 static char **saved_env
= NULL
;
164 static int parse_configuration(const struct rlimit
*saved_rlimit_nofile
,
165 const struct rlimit
*saved_rlimit_memlock
);
167 static int manager_find_user_config_paths(char ***ret_files
, char ***ret_dirs
) {
168 _cleanup_free_
char *base
= NULL
;
169 _cleanup_strv_free_
char **files
= NULL
, **dirs
= NULL
;
172 r
= xdg_user_config_dir(&base
, "/systemd");
176 r
= strv_extendf(&files
, "%s/user.conf", base
);
180 r
= strv_extend(&files
, PKGSYSCONFDIR
"/user.conf");
184 r
= strv_consume(&dirs
, TAKE_PTR(base
));
188 r
= strv_extend_strv(&dirs
, CONF_PATHS_STRV("systemd"), false);
192 *ret_files
= TAKE_PTR(files
);
193 *ret_dirs
= TAKE_PTR(dirs
);
197 _noreturn_
static void freeze_or_exit_or_reboot(void) {
199 /* If we are running in a container, let's prefer exiting, after all we can propagate an exit code to
200 * the container manager, and thus inform it that something went wrong. */
201 if (detect_container() > 0) {
202 log_emergency("Exiting PID 1...");
203 _exit(EXIT_EXCEPTION
);
206 if (arg_crash_reboot
) {
207 log_notice("Rebooting in 10s...");
210 log_notice("Rebooting now...");
211 (void) reboot(RB_AUTOBOOT
);
212 log_emergency_errno(errno
, "Failed to reboot: %m");
215 log_emergency("Freezing execution.");
219 _noreturn_
static void crash(int sig
) {
223 if (getpid_cached() != 1)
224 /* Pass this on immediately, if this is not PID 1 */
226 else if (!arg_dump_core
)
227 log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig
));
229 sa
= (struct sigaction
) {
230 .sa_handler
= nop_signal_handler
,
231 .sa_flags
= SA_NOCLDSTOP
|SA_RESTART
,
234 /* We want to wait for the core process, hence let's enable SIGCHLD */
235 (void) sigaction(SIGCHLD
, &sa
, NULL
);
237 pid
= raw_clone(SIGCHLD
);
239 log_emergency_errno(errno
, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig
));
241 /* Enable default signal handler for core dump */
243 sa
= (struct sigaction
) {
244 .sa_handler
= SIG_DFL
,
246 (void) sigaction(sig
, &sa
, NULL
);
248 /* Don't limit the coredump size */
249 (void) setrlimit(RLIMIT_CORE
, &RLIMIT_MAKE_CONST(RLIM_INFINITY
));
251 /* Just to be sure... */
254 /* Raise the signal again */
256 (void) kill(pid
, sig
); /* raise() would kill the parent */
258 assert_not_reached("We shouldn't be here...");
259 _exit(EXIT_EXCEPTION
);
264 /* Order things nicely. */
265 r
= wait_for_terminate(pid
, &status
);
267 log_emergency_errno(r
, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig
));
268 else if (status
.si_code
!= CLD_DUMPED
) {
269 const char *s
= status
.si_code
== CLD_EXITED
270 ? exit_status_to_string(status
.si_status
, EXIT_STATUS_LIBC
)
271 : signal_to_string(status
.si_status
);
273 log_emergency("Caught <%s>, core dump failed (child "PID_FMT
", code=%s, status=%i/%s).",
274 signal_to_string(sig
),
276 sigchld_code_to_string(status
.si_code
),
277 status
.si_status
, strna(s
));
279 log_emergency("Caught <%s>, dumped core as pid "PID_FMT
".",
280 signal_to_string(sig
), pid
);
284 if (arg_crash_chvt
>= 0)
285 (void) chvt(arg_crash_chvt
);
287 sa
= (struct sigaction
) {
288 .sa_handler
= SIG_IGN
,
289 .sa_flags
= SA_NOCLDSTOP
|SA_NOCLDWAIT
|SA_RESTART
,
292 /* Let the kernel reap children for us */
293 (void) sigaction(SIGCHLD
, &sa
, NULL
);
295 if (arg_crash_shell
) {
296 log_notice("Executing crash shell in 10s...");
299 pid
= raw_clone(SIGCHLD
);
301 log_emergency_errno(errno
, "Failed to fork off crash shell: %m");
304 (void) make_console_stdio();
305 (void) rlimit_nofile_safe();
306 (void) execle("/bin/sh", "/bin/sh", NULL
, environ
);
308 log_emergency_errno(errno
, "execle() failed: %m");
309 _exit(EXIT_EXCEPTION
);
311 log_info("Spawned crash shell as PID "PID_FMT
".", pid
);
312 (void) wait_for_terminate(pid
, NULL
);
316 freeze_or_exit_or_reboot();
319 static void install_crash_handler(void) {
320 static const struct sigaction sa
= {
322 .sa_flags
= SA_NODEFER
, /* So that we can raise the signal again from the signal handler */
326 /* We ignore the return value here, since, we don't mind if we cannot set up a crash handler */
327 r
= sigaction_many(&sa
, SIGNALS_CRASH_HANDLER
);
329 log_debug_errno(r
, "I had trouble setting up the crash handler, ignoring: %m");
332 static int console_setup(void) {
333 _cleanup_close_
int tty_fd
= -1;
336 tty_fd
= open_terminal("/dev/console", O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
338 return log_error_errno(tty_fd
, "Failed to open /dev/console: %m");
340 /* We don't want to force text mode. plymouth may be showing
341 * pictures already from initrd. */
342 r
= reset_terminal_fd(tty_fd
, false);
344 return log_error_errno(r
, "Failed to reset /dev/console: %m");
349 static int set_machine_id(const char *m
) {
353 if (sd_id128_from_string(m
, &t
) < 0)
356 if (sd_id128_is_null(t
))
363 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
368 if (STR_IN_SET(key
, "systemd.unit", "rd.systemd.unit")) {
370 if (proc_cmdline_value_missing(key
, value
))
373 if (!unit_name_is_valid(value
, UNIT_NAME_PLAIN
|UNIT_NAME_INSTANCE
))
374 log_warning("Unit name specified on %s= is not valid, ignoring: %s", key
, value
);
375 else if (in_initrd() == !!startswith(key
, "rd."))
376 return free_and_strdup_warn(&arg_default_unit
, value
);
378 } else if (proc_cmdline_key_streq(key
, "systemd.dump_core")) {
380 r
= value
? parse_boolean(value
) : true;
382 log_warning_errno(r
, "Failed to parse dump core switch %s, ignoring: %m", value
);
386 } else if (proc_cmdline_key_streq(key
, "systemd.early_core_pattern")) {
388 if (proc_cmdline_value_missing(key
, value
))
391 if (path_is_absolute(value
))
392 (void) parse_path_argument(value
, false, &arg_early_core_pattern
);
394 log_warning("Specified core pattern '%s' is not an absolute path, ignoring.", value
);
396 } else if (proc_cmdline_key_streq(key
, "systemd.crash_chvt")) {
399 arg_crash_chvt
= 0; /* turn on */
401 r
= parse_crash_chvt(value
, &arg_crash_chvt
);
403 log_warning_errno(r
, "Failed to parse crash chvt switch %s, ignoring: %m", value
);
406 } else if (proc_cmdline_key_streq(key
, "systemd.crash_shell")) {
408 r
= value
? parse_boolean(value
) : true;
410 log_warning_errno(r
, "Failed to parse crash shell switch %s, ignoring: %m", value
);
414 } else if (proc_cmdline_key_streq(key
, "systemd.crash_reboot")) {
416 r
= value
? parse_boolean(value
) : true;
418 log_warning_errno(r
, "Failed to parse crash reboot switch %s, ignoring: %m", value
);
420 arg_crash_reboot
= r
;
422 } else if (proc_cmdline_key_streq(key
, "systemd.confirm_spawn")) {
425 r
= parse_confirm_spawn(value
, &s
);
427 log_warning_errno(r
, "Failed to parse confirm_spawn switch %s, ignoring: %m", value
);
429 free_and_replace(arg_confirm_spawn
, s
);
431 } else if (proc_cmdline_key_streq(key
, "systemd.service_watchdogs")) {
433 r
= value
? parse_boolean(value
) : true;
435 log_warning_errno(r
, "Failed to parse service watchdog switch %s, ignoring: %m", value
);
437 arg_service_watchdogs
= r
;
439 } else if (proc_cmdline_key_streq(key
, "systemd.show_status")) {
442 r
= parse_show_status(value
, &arg_show_status
);
444 log_warning_errno(r
, "Failed to parse show status switch %s, ignoring: %m", value
);
446 arg_show_status
= SHOW_STATUS_YES
;
448 } else if (proc_cmdline_key_streq(key
, "systemd.status_unit_format")) {
450 if (proc_cmdline_value_missing(key
, value
))
453 r
= status_unit_format_from_string(value
);
455 log_warning_errno(r
, "Failed to parse %s=%s, ignoring: %m", key
, value
);
457 arg_status_unit_format
= r
;
459 } else if (proc_cmdline_key_streq(key
, "systemd.default_standard_output")) {
461 if (proc_cmdline_value_missing(key
, value
))
464 r
= exec_output_from_string(value
);
466 log_warning_errno(r
, "Failed to parse default standard output switch %s, ignoring: %m", value
);
468 arg_default_std_output
= r
;
470 } else if (proc_cmdline_key_streq(key
, "systemd.default_standard_error")) {
472 if (proc_cmdline_value_missing(key
, value
))
475 r
= exec_output_from_string(value
);
477 log_warning_errno(r
, "Failed to parse default standard error switch %s, ignoring: %m", value
);
479 arg_default_std_error
= r
;
481 } else if (streq(key
, "systemd.setenv")) {
483 if (proc_cmdline_value_missing(key
, value
))
486 if (!env_assignment_is_valid(value
))
487 log_warning("Environment variable assignment '%s' is not valid. Ignoring.", value
);
489 r
= strv_env_replace_strdup(&arg_default_environment
, value
);
494 } else if (proc_cmdline_key_streq(key
, "systemd.machine_id")) {
496 if (proc_cmdline_value_missing(key
, value
))
499 r
= set_machine_id(value
);
501 log_warning_errno(r
, "MachineID '%s' is not valid, ignoring: %m", value
);
503 } else if (proc_cmdline_key_streq(key
, "systemd.default_timeout_start_sec")) {
505 if (proc_cmdline_value_missing(key
, value
))
508 r
= parse_sec(value
, &arg_default_timeout_start_usec
);
510 log_warning_errno(r
, "Failed to parse default start timeout '%s', ignoring: %m", value
);
512 if (arg_default_timeout_start_usec
<= 0)
513 arg_default_timeout_start_usec
= USEC_INFINITY
;
515 } else if (proc_cmdline_key_streq(key
, "systemd.cpu_affinity")) {
517 if (proc_cmdline_value_missing(key
, value
))
520 r
= parse_cpu_set(value
, &arg_cpu_affinity
);
522 log_warning_errno(r
, "Failed to parse CPU affinity mask '%s', ignoring: %m", value
);
524 } else if (proc_cmdline_key_streq(key
, "systemd.watchdog_device")) {
526 if (proc_cmdline_value_missing(key
, value
))
529 (void) parse_path_argument(value
, false, &arg_watchdog_device
);
531 } else if (proc_cmdline_key_streq(key
, "systemd.clock_usec")) {
533 if (proc_cmdline_value_missing(key
, value
))
536 r
= safe_atou64(value
, &arg_clock_usec
);
538 log_warning_errno(r
, "Failed to parse systemd.clock_usec= argument, ignoring: %s", value
);
540 } else if (proc_cmdline_key_streq(key
, "systemd.random_seed")) {
544 if (proc_cmdline_value_missing(key
, value
))
547 r
= unbase64mem(value
, SIZE_MAX
, &p
, &sz
);
549 log_warning_errno(r
, "Failed to parse systemd.random_seed= argument, ignoring: %s", value
);
551 free(arg_random_seed
);
552 arg_random_seed
= sz
> 0 ? p
: mfree(p
);
553 arg_random_seed_size
= sz
;
555 } else if (streq(key
, "quiet") && !value
) {
557 if (arg_show_status
== _SHOW_STATUS_INVALID
)
558 arg_show_status
= SHOW_STATUS_ERROR
;
560 } else if (streq(key
, "debug") && !value
) {
562 /* Note that log_parse_environment() handles 'debug'
563 * too, and sets the log level to LOG_DEBUG. */
565 if (detect_container() > 0)
566 log_set_target(LOG_TARGET_CONSOLE
);
571 /* Compatible with SysV, but supported independently even if SysV compatibility is disabled. */
572 target
= runlevel_to_target(key
);
574 return free_and_strdup_warn(&arg_default_unit
, target
);
580 #define DEFINE_SETTER(name, func, descr) \
581 static int name(const char *unit, \
582 const char *filename, \
584 const char *section, \
585 unsigned section_line, \
586 const char *lvalue, \
588 const char *rvalue, \
600 log_syntax(unit, LOG_ERR, filename, line, r, \
601 "Invalid " descr "'%s': %m", \
607 DEFINE_SETTER(config_parse_level2
, log_set_max_level_from_string
, "log level");
608 DEFINE_SETTER(config_parse_target
, log_set_target_from_string
, "target");
609 DEFINE_SETTER(config_parse_color
, log_show_color_from_string
, "color");
610 DEFINE_SETTER(config_parse_location
, log_show_location_from_string
, "location");
611 DEFINE_SETTER(config_parse_time
, log_show_time_from_string
, "time");
613 static int config_parse_default_timeout_abort(
615 const char *filename
,
618 unsigned section_line
,
626 r
= config_parse_timeout_abort(unit
, filename
, line
, section
, section_line
, lvalue
, ltype
, rvalue
,
627 &arg_default_timeout_abort_usec
, userdata
);
629 arg_default_timeout_abort_set
= r
;
633 static int parse_config_file(void) {
634 const ConfigTableItem items
[] = {
635 { "Manager", "LogLevel", config_parse_level2
, 0, NULL
},
636 { "Manager", "LogTarget", config_parse_target
, 0, NULL
},
637 { "Manager", "LogColor", config_parse_color
, 0, NULL
},
638 { "Manager", "LogLocation", config_parse_location
, 0, NULL
},
639 { "Manager", "LogTime", config_parse_time
, 0, NULL
},
640 { "Manager", "DumpCore", config_parse_bool
, 0, &arg_dump_core
},
641 { "Manager", "CrashChVT", /* legacy */ config_parse_crash_chvt
, 0, &arg_crash_chvt
},
642 { "Manager", "CrashChangeVT", config_parse_crash_chvt
, 0, &arg_crash_chvt
},
643 { "Manager", "CrashShell", config_parse_bool
, 0, &arg_crash_shell
},
644 { "Manager", "CrashReboot", config_parse_bool
, 0, &arg_crash_reboot
},
645 { "Manager", "ShowStatus", config_parse_show_status
, 0, &arg_show_status
},
646 { "Manager", "StatusUnitFormat", config_parse_status_unit_format
, 0, &arg_status_unit_format
},
647 { "Manager", "CPUAffinity", config_parse_cpu_affinity2
, 0, &arg_cpu_affinity
},
648 { "Manager", "NUMAPolicy", config_parse_numa_policy
, 0, &arg_numa_policy
.type
},
649 { "Manager", "NUMAMask", config_parse_numa_mask
, 0, &arg_numa_policy
},
650 { "Manager", "JoinControllers", config_parse_warn_compat
, DISABLED_CONFIGURATION
, NULL
},
651 { "Manager", "RuntimeWatchdogSec", config_parse_sec
, 0, &arg_runtime_watchdog
},
652 { "Manager", "RebootWatchdogSec", config_parse_sec
, 0, &arg_reboot_watchdog
},
653 { "Manager", "ShutdownWatchdogSec", config_parse_sec
, 0, &arg_reboot_watchdog
}, /* obsolete alias */
654 { "Manager", "KExecWatchdogSec", config_parse_sec
, 0, &arg_kexec_watchdog
},
655 { "Manager", "WatchdogDevice", config_parse_path
, 0, &arg_watchdog_device
},
656 { "Manager", "CapabilityBoundingSet", config_parse_capability_set
, 0, &arg_capability_bounding_set
},
657 { "Manager", "NoNewPrivileges", config_parse_bool
, 0, &arg_no_new_privs
},
659 { "Manager", "SystemCallArchitectures", config_parse_syscall_archs
, 0, &arg_syscall_archs
},
661 { "Manager", "TimerSlackNSec", config_parse_nsec
, 0, &arg_timer_slack_nsec
},
662 { "Manager", "DefaultTimerAccuracySec", config_parse_sec
, 0, &arg_default_timer_accuracy_usec
},
663 { "Manager", "DefaultStandardOutput", config_parse_output_restricted
, 0, &arg_default_std_output
},
664 { "Manager", "DefaultStandardError", config_parse_output_restricted
, 0, &arg_default_std_error
},
665 { "Manager", "DefaultTimeoutStartSec", config_parse_sec
, 0, &arg_default_timeout_start_usec
},
666 { "Manager", "DefaultTimeoutStopSec", config_parse_sec
, 0, &arg_default_timeout_stop_usec
},
667 { "Manager", "DefaultTimeoutAbortSec", config_parse_default_timeout_abort
, 0, NULL
},
668 { "Manager", "DefaultRestartSec", config_parse_sec
, 0, &arg_default_restart_usec
},
669 { "Manager", "DefaultStartLimitInterval", config_parse_sec
, 0, &arg_default_start_limit_interval
}, /* obsolete alias */
670 { "Manager", "DefaultStartLimitIntervalSec", config_parse_sec
, 0, &arg_default_start_limit_interval
},
671 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned
, 0, &arg_default_start_limit_burst
},
672 { "Manager", "DefaultEnvironment", config_parse_environ
, 0, &arg_default_environment
},
673 { "Manager", "ManagerEnvironment", config_parse_environ
, 0, &arg_manager_environment
},
674 { "Manager", "DefaultLimitCPU", config_parse_rlimit
, RLIMIT_CPU
, arg_default_rlimit
},
675 { "Manager", "DefaultLimitFSIZE", config_parse_rlimit
, RLIMIT_FSIZE
, arg_default_rlimit
},
676 { "Manager", "DefaultLimitDATA", config_parse_rlimit
, RLIMIT_DATA
, arg_default_rlimit
},
677 { "Manager", "DefaultLimitSTACK", config_parse_rlimit
, RLIMIT_STACK
, arg_default_rlimit
},
678 { "Manager", "DefaultLimitCORE", config_parse_rlimit
, RLIMIT_CORE
, arg_default_rlimit
},
679 { "Manager", "DefaultLimitRSS", config_parse_rlimit
, RLIMIT_RSS
, arg_default_rlimit
},
680 { "Manager", "DefaultLimitNOFILE", config_parse_rlimit
, RLIMIT_NOFILE
, arg_default_rlimit
},
681 { "Manager", "DefaultLimitAS", config_parse_rlimit
, RLIMIT_AS
, arg_default_rlimit
},
682 { "Manager", "DefaultLimitNPROC", config_parse_rlimit
, RLIMIT_NPROC
, arg_default_rlimit
},
683 { "Manager", "DefaultLimitMEMLOCK", config_parse_rlimit
, RLIMIT_MEMLOCK
, arg_default_rlimit
},
684 { "Manager", "DefaultLimitLOCKS", config_parse_rlimit
, RLIMIT_LOCKS
, arg_default_rlimit
},
685 { "Manager", "DefaultLimitSIGPENDING", config_parse_rlimit
, RLIMIT_SIGPENDING
, arg_default_rlimit
},
686 { "Manager", "DefaultLimitMSGQUEUE", config_parse_rlimit
, RLIMIT_MSGQUEUE
, arg_default_rlimit
},
687 { "Manager", "DefaultLimitNICE", config_parse_rlimit
, RLIMIT_NICE
, arg_default_rlimit
},
688 { "Manager", "DefaultLimitRTPRIO", config_parse_rlimit
, RLIMIT_RTPRIO
, arg_default_rlimit
},
689 { "Manager", "DefaultLimitRTTIME", config_parse_rlimit
, RLIMIT_RTTIME
, arg_default_rlimit
},
690 { "Manager", "DefaultCPUAccounting", config_parse_tristate
, 0, &arg_default_cpu_accounting
},
691 { "Manager", "DefaultIOAccounting", config_parse_bool
, 0, &arg_default_io_accounting
},
692 { "Manager", "DefaultIPAccounting", config_parse_bool
, 0, &arg_default_ip_accounting
},
693 { "Manager", "DefaultBlockIOAccounting", config_parse_bool
, 0, &arg_default_blockio_accounting
},
694 { "Manager", "DefaultMemoryAccounting", config_parse_bool
, 0, &arg_default_memory_accounting
},
695 { "Manager", "DefaultTasksAccounting", config_parse_bool
, 0, &arg_default_tasks_accounting
},
696 { "Manager", "DefaultTasksMax", config_parse_tasks_max
, 0, &arg_default_tasks_max
},
697 { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action
, 0, &arg_cad_burst_action
},
698 { "Manager", "DefaultOOMPolicy", config_parse_oom_policy
, 0, &arg_default_oom_policy
},
702 _cleanup_strv_free_
char **files
= NULL
, **dirs
= NULL
;
707 suffix
= "system.conf.d";
709 r
= manager_find_user_config_paths(&files
, &dirs
);
711 return log_error_errno(r
, "Failed to determine config file paths: %m");
713 suffix
= "user.conf.d";
716 (void) config_parse_many(
717 (const char* const*) (files
?: STRV_MAKE(PKGSYSCONFDIR
"/system.conf")),
718 (const char* const*) (dirs
?: CONF_PATHS_STRV("systemd")),
721 config_item_table_lookup
, items
,
726 /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we use
727 * USEC_INFINITY like everywhere else. */
728 if (arg_default_timeout_start_usec
<= 0)
729 arg_default_timeout_start_usec
= USEC_INFINITY
;
730 if (arg_default_timeout_stop_usec
<= 0)
731 arg_default_timeout_stop_usec
= USEC_INFINITY
;
736 static void set_manager_defaults(Manager
*m
) {
740 /* Propagates the various default unit property settings into the manager object, i.e. properties that do not
741 * affect the manager itself, but are just what newly allocated units will have set if they haven't set
742 * anything else. (Also see set_manager_settings() for the settings that affect the manager's own behaviour) */
744 m
->default_timer_accuracy_usec
= arg_default_timer_accuracy_usec
;
745 m
->default_std_output
= arg_default_std_output
;
746 m
->default_std_error
= arg_default_std_error
;
747 m
->default_timeout_start_usec
= arg_default_timeout_start_usec
;
748 m
->default_timeout_stop_usec
= arg_default_timeout_stop_usec
;
749 m
->default_timeout_abort_usec
= arg_default_timeout_abort_usec
;
750 m
->default_timeout_abort_set
= arg_default_timeout_abort_set
;
751 m
->default_restart_usec
= arg_default_restart_usec
;
752 m
->default_start_limit_interval
= arg_default_start_limit_interval
;
753 m
->default_start_limit_burst
= arg_default_start_limit_burst
;
755 /* On 4.15+ with unified hierarchy, CPU accounting is essentially free as it doesn't require the CPU
756 * controller to be enabled, so the default is to enable it unless we got told otherwise. */
757 if (arg_default_cpu_accounting
>= 0)
758 m
->default_cpu_accounting
= arg_default_cpu_accounting
;
760 m
->default_cpu_accounting
= cpu_accounting_is_cheap();
762 m
->default_io_accounting
= arg_default_io_accounting
;
763 m
->default_ip_accounting
= arg_default_ip_accounting
;
764 m
->default_blockio_accounting
= arg_default_blockio_accounting
;
765 m
->default_memory_accounting
= arg_default_memory_accounting
;
766 m
->default_tasks_accounting
= arg_default_tasks_accounting
;
767 m
->default_tasks_max
= arg_default_tasks_max
;
768 m
->default_oom_policy
= arg_default_oom_policy
;
770 (void) manager_set_default_rlimits(m
, arg_default_rlimit
);
772 (void) manager_default_environment(m
);
773 (void) manager_transient_environment_add(m
, arg_default_environment
);
776 static void set_manager_settings(Manager
*m
) {
780 /* Propagates the various manager settings into the manager object, i.e. properties that
781 * effect the manager itself (as opposed to just being inherited into newly allocated
782 * units, see set_manager_defaults() above). */
784 m
->confirm_spawn
= arg_confirm_spawn
;
785 m
->service_watchdogs
= arg_service_watchdogs
;
786 m
->cad_burst_action
= arg_cad_burst_action
;
788 manager_set_watchdog(m
, WATCHDOG_RUNTIME
, arg_runtime_watchdog
);
789 manager_set_watchdog(m
, WATCHDOG_REBOOT
, arg_reboot_watchdog
);
790 manager_set_watchdog(m
, WATCHDOG_KEXEC
, arg_kexec_watchdog
);
792 manager_set_show_status(m
, arg_show_status
, "commandline");
793 m
->status_unit_format
= arg_status_unit_format
;
796 static int parse_argv(int argc
, char *argv
[]) {
798 ARG_LOG_LEVEL
= 0x100,
809 ARG_DUMP_CONFIGURATION_ITEMS
,
810 ARG_DUMP_BUS_PROPERTIES
,
820 ARG_DEFAULT_STD_OUTPUT
,
821 ARG_DEFAULT_STD_ERROR
,
823 ARG_SERVICE_WATCHDOGS
,
826 static const struct option options
[] = {
827 { "log-level", required_argument
, NULL
, ARG_LOG_LEVEL
},
828 { "log-target", required_argument
, NULL
, ARG_LOG_TARGET
},
829 { "log-color", optional_argument
, NULL
, ARG_LOG_COLOR
},
830 { "log-location", optional_argument
, NULL
, ARG_LOG_LOCATION
},
831 { "log-time", optional_argument
, NULL
, ARG_LOG_TIME
},
832 { "unit", required_argument
, NULL
, ARG_UNIT
},
833 { "system", no_argument
, NULL
, ARG_SYSTEM
},
834 { "user", no_argument
, NULL
, ARG_USER
},
835 { "test", no_argument
, NULL
, ARG_TEST
},
836 { "no-pager", no_argument
, NULL
, ARG_NO_PAGER
},
837 { "help", no_argument
, NULL
, 'h' },
838 { "version", no_argument
, NULL
, ARG_VERSION
},
839 { "dump-configuration-items", no_argument
, NULL
, ARG_DUMP_CONFIGURATION_ITEMS
},
840 { "dump-bus-properties", no_argument
, NULL
, ARG_DUMP_BUS_PROPERTIES
},
841 { "bus-introspect", required_argument
, NULL
, ARG_BUS_INTROSPECT
},
842 { "dump-core", optional_argument
, NULL
, ARG_DUMP_CORE
},
843 { "crash-chvt", required_argument
, NULL
, ARG_CRASH_CHVT
},
844 { "crash-shell", optional_argument
, NULL
, ARG_CRASH_SHELL
},
845 { "crash-reboot", optional_argument
, NULL
, ARG_CRASH_REBOOT
},
846 { "confirm-spawn", optional_argument
, NULL
, ARG_CONFIRM_SPAWN
},
847 { "show-status", optional_argument
, NULL
, ARG_SHOW_STATUS
},
848 { "deserialize", required_argument
, NULL
, ARG_DESERIALIZE
},
849 { "switched-root", no_argument
, NULL
, ARG_SWITCHED_ROOT
},
850 { "default-standard-output", required_argument
, NULL
, ARG_DEFAULT_STD_OUTPUT
, },
851 { "default-standard-error", required_argument
, NULL
, ARG_DEFAULT_STD_ERROR
, },
852 { "machine-id", required_argument
, NULL
, ARG_MACHINE_ID
},
853 { "service-watchdogs", required_argument
, NULL
, ARG_SERVICE_WATCHDOGS
},
858 bool user_arg_seen
= false;
863 if (getpid_cached() == 1)
866 while ((c
= getopt_long(argc
, argv
, "hDbsz:", options
, NULL
)) >= 0)
871 r
= log_set_max_level_from_string(optarg
);
873 return log_error_errno(r
, "Failed to parse log level \"%s\": %m", optarg
);
878 r
= log_set_target_from_string(optarg
);
880 return log_error_errno(r
, "Failed to parse log target \"%s\": %m", optarg
);
887 r
= log_show_color_from_string(optarg
);
889 return log_error_errno(r
, "Failed to parse log color setting \"%s\": %m",
892 log_show_color(true);
896 case ARG_LOG_LOCATION
:
898 r
= log_show_location_from_string(optarg
);
900 return log_error_errno(r
, "Failed to parse log location setting \"%s\": %m",
903 log_show_location(true);
910 r
= log_show_time_from_string(optarg
);
912 return log_error_errno(r
, "Failed to parse log time setting \"%s\": %m",
919 case ARG_DEFAULT_STD_OUTPUT
:
920 r
= exec_output_from_string(optarg
);
922 return log_error_errno(r
, "Failed to parse default standard output setting \"%s\": %m",
924 arg_default_std_output
= r
;
927 case ARG_DEFAULT_STD_ERROR
:
928 r
= exec_output_from_string(optarg
);
930 return log_error_errno(r
, "Failed to parse default standard error output setting \"%s\": %m",
932 arg_default_std_error
= r
;
936 r
= free_and_strdup(&arg_default_unit
, optarg
);
938 return log_error_errno(r
, "Failed to set default unit \"%s\": %m", optarg
);
948 user_arg_seen
= true;
952 arg_action
= ACTION_TEST
;
956 arg_pager_flags
|= PAGER_DISABLE
;
960 arg_action
= ACTION_VERSION
;
963 case ARG_DUMP_CONFIGURATION_ITEMS
:
964 arg_action
= ACTION_DUMP_CONFIGURATION_ITEMS
;
967 case ARG_DUMP_BUS_PROPERTIES
:
968 arg_action
= ACTION_DUMP_BUS_PROPERTIES
;
971 case ARG_BUS_INTROSPECT
:
972 arg_bus_introspect
= optarg
;
973 arg_action
= ACTION_BUS_INTROSPECT
;
977 r
= parse_boolean_argument("--dump-core", optarg
, &arg_dump_core
);
983 r
= parse_crash_chvt(optarg
, &arg_crash_chvt
);
985 return log_error_errno(r
, "Failed to parse crash virtual terminal index: \"%s\": %m",
989 case ARG_CRASH_SHELL
:
990 r
= parse_boolean_argument("--crash-shell", optarg
, &arg_crash_shell
);
995 case ARG_CRASH_REBOOT
:
996 r
= parse_boolean_argument("--crash-reboot", optarg
, &arg_crash_reboot
);
1001 case ARG_CONFIRM_SPAWN
:
1002 arg_confirm_spawn
= mfree(arg_confirm_spawn
);
1004 r
= parse_confirm_spawn(optarg
, &arg_confirm_spawn
);
1006 return log_error_errno(r
, "Failed to parse confirm spawn option: \"%s\": %m",
1010 case ARG_SERVICE_WATCHDOGS
:
1011 r
= parse_boolean_argument("--service-watchdogs=", optarg
, &arg_service_watchdogs
);
1016 case ARG_SHOW_STATUS
:
1018 r
= parse_show_status(optarg
, &arg_show_status
);
1020 return log_error_errno(r
, "Failed to parse show status boolean: \"%s\": %m",
1023 arg_show_status
= SHOW_STATUS_YES
;
1026 case ARG_DESERIALIZE
: {
1030 r
= safe_atoi(optarg
, &fd
);
1032 log_error_errno(r
, "Failed to parse deserialize option \"%s\": %m", optarg
);
1034 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
1035 "Invalid deserialize fd: %d",
1038 (void) fd_cloexec(fd
, true);
1040 f
= fdopen(fd
, "r");
1042 return log_error_errno(errno
, "Failed to open serialization fd %d: %m", fd
);
1044 safe_fclose(arg_serialization
);
1045 arg_serialization
= f
;
1050 case ARG_SWITCHED_ROOT
:
1051 arg_switched_root
= true;
1054 case ARG_MACHINE_ID
:
1055 r
= set_machine_id(optarg
);
1057 return log_error_errno(r
, "MachineID '%s' is not valid: %m", optarg
);
1061 arg_action
= ACTION_HELP
;
1065 log_set_max_level(LOG_DEBUG
);
1071 /* Just to eat away the sysvinit kernel cmdline args that we'll parse in
1072 * parse_proc_cmdline_item() or ignore, without any getopt() error messages.
1075 if (getpid_cached() != 1)
1081 assert_not_reached("Unhandled option code.");
1084 if (optind
< argc
&& getpid_cached() != 1)
1085 /* Hmm, when we aren't run as init system let's complain about excess arguments */
1086 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "Excess arguments.");
1088 if (arg_action
== ACTION_RUN
&& !arg_system
&& !user_arg_seen
)
1089 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
1090 "Explicit --user argument required to run as user manager.");
1095 static int help(void) {
1096 _cleanup_free_
char *link
= NULL
;
1099 r
= terminal_urlify_man("systemd", "1", &link
);
1103 printf("%s [OPTIONS...]\n\n"
1104 "%sStarts and monitors system and user services.%s\n\n"
1105 "This program takes no positional arguments.\n\n"
1107 " -h --help Show this help\n"
1108 " --version Show version\n"
1109 " --test Determine initial transaction, dump it and exit\n"
1110 " --system In combination with --test: operate as system service manager\n"
1111 " --user In combination with --test: operate as per-user service manager\n"
1112 " --no-pager Do not pipe output into a pager\n"
1113 " --dump-configuration-items Dump understood unit configuration items\n"
1114 " --dump-bus-properties Dump exposed bus properties\n"
1115 " --bus-introspect=PATH Write XML introspection data\n"
1116 " --unit=UNIT Set default unit\n"
1117 " --dump-core[=BOOL] Dump core on crash\n"
1118 " --crash-vt=NR Change to specified VT on crash\n"
1119 " --crash-reboot[=BOOL] Reboot on crash\n"
1120 " --crash-shell[=BOOL] Run shell on crash\n"
1121 " --confirm-spawn[=BOOL] Ask for confirmation when spawning processes\n"
1122 " --show-status[=BOOL] Show status updates on the console during bootup\n"
1123 " --log-target=TARGET Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
1124 " --log-level=LEVEL Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1125 " --log-color[=BOOL] Highlight important log messages\n"
1126 " --log-location[=BOOL] Include code location in log messages\n"
1127 " --log-time[=BOOL] Prefix log messages with current time\n"
1128 " --default-standard-output= Set default standard output for services\n"
1129 " --default-standard-error= Set default standard error output for services\n"
1130 "\nSee the %s for details.\n",
1131 program_invocation_short_name
,
1141 static int prepare_reexecute(
1145 bool switching_root
) {
1147 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1148 _cleanup_fclose_
FILE *f
= NULL
;
1155 r
= manager_open_serialization(m
, &f
);
1157 return log_error_errno(r
, "Failed to create serialization file: %m");
1159 /* Make sure nothing is really destructed when we shut down */
1161 bus_manager_send_reloading(m
, true);
1167 r
= manager_serialize(m
, f
, fds
, switching_root
);
1171 if (fseeko(f
, 0, SEEK_SET
) == (off_t
) -1)
1172 return log_error_errno(errno
, "Failed to rewind serialization fd: %m");
1174 r
= fd_cloexec(fileno(f
), false);
1176 return log_error_errno(r
, "Failed to disable O_CLOEXEC for serialization: %m");
1178 r
= fdset_cloexec(fds
, false);
1180 return log_error_errno(r
, "Failed to disable O_CLOEXEC for serialization fds: %m");
1182 *ret_f
= TAKE_PTR(f
);
1183 *ret_fds
= TAKE_PTR(fds
);
1188 static void bump_file_max_and_nr_open(void) {
1190 /* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large numbers of file
1191 * descriptors are no longer a performance problem and their memory is properly tracked by memcg, thus counting
1192 * them and limiting them in another two layers of limits is unnecessary and just complicates things. This
1193 * function hence turns off 2 of the 4 levels of limits on file descriptors, and makes RLIMIT_NOLIMIT (soft +
1194 * hard) the only ones that really matter. */
1196 #if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN
1200 #if BUMP_PROC_SYS_FS_FILE_MAX
1201 /* The maximum the kernel allows for this since 5.2 is LONG_MAX, use that. (Previously thing where
1202 * different but the operation would fail silently.) */
1203 r
= sysctl_writef("fs/file-max", "%li\n", LONG_MAX
);
1205 log_full_errno(IN_SET(r
, -EROFS
, -EPERM
, -EACCES
) ? LOG_DEBUG
: LOG_WARNING
, r
, "Failed to bump fs.file-max, ignoring: %m");
1208 #if BUMP_PROC_SYS_FS_NR_OPEN
1211 /* Arg! The kernel enforces maximum and minimum values on the fs.nr_open, but we don't really know what they
1212 * are. The expression by which the maximum is determined is dependent on the architecture, and is something we
1213 * don't really want to copy to userspace, as it is dependent on implementation details of the kernel. Since
1214 * the kernel doesn't expose the maximum value to us, we can only try and hope. Hence, let's start with
1215 * INT_MAX, and then keep halving the value until we find one that works. Ugly? Yes, absolutely, but kernel
1216 * APIs are kernel APIs, so what do can we do... 🤯 */
1221 v
&= ~(__SIZEOF_POINTER__
- 1); /* Round down to next multiple of the pointer size */
1223 log_warning("Can't bump fs.nr_open, value too small.");
1229 log_error_errno(k
, "Failed to read fs.nr_open: %m");
1232 if (k
>= v
) { /* Already larger */
1233 log_debug("Skipping bump, value is already larger.");
1237 r
= sysctl_writef("fs/nr_open", "%i\n", v
);
1239 log_debug("Couldn't write fs.nr_open as %i, halving it.", v
);
1244 log_full_errno(IN_SET(r
, -EROFS
, -EPERM
, -EACCES
) ? LOG_DEBUG
: LOG_WARNING
, r
, "Failed to bump fs.nr_open, ignoring: %m");
1248 log_debug("Successfully bumped fs.nr_open to %i", v
);
1254 static int bump_rlimit_nofile(struct rlimit
*saved_rlimit
) {
1255 struct rlimit new_rlimit
;
1258 /* Get the underlying absolute limit the kernel enforces */
1259 nr
= read_nr_open();
1261 /* Calculate the new limits to use for us. Never lower from what we inherited. */
1262 new_rlimit
= (struct rlimit
) {
1263 .rlim_cur
= MAX((rlim_t
) nr
, saved_rlimit
->rlim_cur
),
1264 .rlim_max
= MAX((rlim_t
) nr
, saved_rlimit
->rlim_max
),
1267 /* Shortcut if nothing changes. */
1268 if (saved_rlimit
->rlim_max
>= new_rlimit
.rlim_max
&&
1269 saved_rlimit
->rlim_cur
>= new_rlimit
.rlim_cur
) {
1270 log_debug("RLIMIT_NOFILE is already as high or higher than we need it, not bumping.");
1274 /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows, for
1275 * both hard and soft. */
1276 r
= setrlimit_closest(RLIMIT_NOFILE
, &new_rlimit
);
1278 return log_warning_errno(r
, "Setting RLIMIT_NOFILE failed, ignoring: %m");
1283 static int bump_rlimit_memlock(struct rlimit
*saved_rlimit
) {
1284 struct rlimit new_rlimit
;
1288 /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even if we have CAP_IPC_LOCK which should
1289 * normally disable such checks. We need them to implement IPAddressAllow= and IPAddressDeny=, hence let's bump
1290 * the value high enough for our user. */
1292 /* Using MAX() on resource limits only is safe if RLIM_INFINITY is > 0. POSIX declares that rlim_t
1293 * must be unsigned, hence this is a given, but let's make this clear here. */
1294 assert_cc(RLIM_INFINITY
> 0);
1296 mm
= physical_memory_scale(1, 8); /* Let's scale how much we allow to be locked by the amount of physical
1297 * RAM. We allow an eighth to be locked by us, just to pick a value. */
1299 new_rlimit
= (struct rlimit
) {
1300 .rlim_cur
= MAX3(HIGH_RLIMIT_MEMLOCK
, saved_rlimit
->rlim_cur
, mm
),
1301 .rlim_max
= MAX3(HIGH_RLIMIT_MEMLOCK
, saved_rlimit
->rlim_max
, mm
),
1304 if (saved_rlimit
->rlim_max
>= new_rlimit
.rlim_cur
&&
1305 saved_rlimit
->rlim_cur
>= new_rlimit
.rlim_max
) {
1306 log_debug("RLIMIT_MEMLOCK is already as high or higher than we need it, not bumping.");
1310 r
= setrlimit_closest(RLIMIT_MEMLOCK
, &new_rlimit
);
1312 return log_warning_errno(r
, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
1317 static void test_usr(void) {
1319 /* Check that /usr is either on the same file system as / or mounted already. */
1321 if (dir_is_empty("/usr") <= 0)
1324 log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
1325 "Some things will probably break (sometimes even silently) in mysterious ways. "
1326 "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1329 static int enforce_syscall_archs(Set
*archs
) {
1333 if (!is_seccomp_available())
1336 r
= seccomp_restrict_archs(arg_syscall_archs
);
1338 return log_error_errno(r
, "Failed to enforce system call architecture restrication: %m");
1343 static int status_welcome(void) {
1344 _cleanup_free_
char *pretty_name
= NULL
, *ansi_color
= NULL
;
1347 if (!show_status_on(arg_show_status
))
1350 r
= parse_os_release(NULL
,
1351 "PRETTY_NAME", &pretty_name
,
1352 "ANSI_COLOR", &ansi_color
);
1354 log_full_errno(r
== -ENOENT
? LOG_DEBUG
: LOG_WARNING
, r
,
1355 "Failed to read os-release file, ignoring: %m");
1357 if (log_get_show_color())
1358 return status_printf(NULL
, 0,
1359 "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
1360 isempty(ansi_color
) ? "1" : ansi_color
,
1361 isempty(pretty_name
) ? "Linux" : pretty_name
);
1363 return status_printf(NULL
, 0,
1364 "\nWelcome to %s!\n",
1365 isempty(pretty_name
) ? "Linux" : pretty_name
);
1368 static int write_container_id(void) {
1372 c
= getenv("container");
1376 RUN_WITH_UMASK(0022)
1377 r
= write_string_file("/run/systemd/container", c
, WRITE_STRING_FILE_CREATE
);
1379 return log_warning_errno(r
, "Failed to write /run/systemd/container, ignoring: %m");
1384 static int bump_unix_max_dgram_qlen(void) {
1385 _cleanup_free_
char *qlen
= NULL
;
1389 /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel default of 16 is simply too low. We set the value
1390 * really really early during boot, so that it is actually applied to all our sockets, including the
1391 * $NOTIFY_SOCKET one. */
1393 r
= read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen
);
1395 return log_full_errno(r
== -ENOENT
? LOG_DEBUG
: LOG_WARNING
, r
, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
1397 r
= safe_atolu(qlen
, &v
);
1399 return log_warning_errno(r
, "Failed to parse AF_UNIX datagram queue length '%s', ignoring: %m", qlen
);
1401 if (v
>= DEFAULT_UNIX_MAX_DGRAM_QLEN
)
1404 r
= write_string_filef("/proc/sys/net/unix/max_dgram_qlen", WRITE_STRING_FILE_DISABLE_BUFFER
, "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN
);
1406 return log_full_errno(IN_SET(r
, -EROFS
, -EPERM
, -EACCES
) ? LOG_DEBUG
: LOG_WARNING
, r
,
1407 "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
1412 static int fixup_environment(void) {
1413 _cleanup_free_
char *term
= NULL
;
1417 /* Only fix up the environment when we are started as PID 1 */
1418 if (getpid_cached() != 1)
1421 /* We expect the environment to be set correctly if run inside a container. */
1422 if (detect_container() > 0)
1425 /* When started as PID1, the kernel uses /dev/console for our stdios and uses TERM=linux whatever the backend
1426 * device used by the console. We try to make a better guess here since some consoles might not have support
1427 * for color mode for example.
1429 * However if TERM was configured through the kernel command line then leave it alone. */
1430 r
= proc_cmdline_get_key("TERM", 0, &term
);
1434 t
= term
?: default_term_for_tty("/dev/console");
1436 if (setenv("TERM", t
, 1) < 0)
1439 /* The kernels sets HOME=/ for init. Let's undo this. */
1440 if (path_equal_ptr(getenv("HOME"), "/"))
1441 assert_se(unsetenv("HOME") == 0);
1446 static void redirect_telinit(int argc
, char *argv
[]) {
1448 /* This is compatibility support for SysV, where calling init as a user is identical to telinit. */
1450 #if HAVE_SYSV_COMPAT
1451 if (getpid_cached() == 1)
1454 if (!invoked_as(argv
, "init"))
1457 execv(SYSTEMCTL_BINARY_PATH
, argv
);
1458 log_error_errno(errno
, "Failed to exec " SYSTEMCTL_BINARY_PATH
": %m");
1463 static int become_shutdown(
1464 const char *shutdown_verb
,
1467 char log_level
[DECIMAL_STR_MAX(int) + 1],
1468 exit_code
[DECIMAL_STR_MAX(uint8_t) + 1],
1469 timeout
[DECIMAL_STR_MAX(usec_t
) + 1];
1471 const char* command_line
[13] = {
1472 SYSTEMD_SHUTDOWN_BINARY_PATH
,
1474 "--timeout", timeout
,
1475 "--log-level", log_level
,
1479 _cleanup_strv_free_
char **env_block
= NULL
;
1482 usec_t watchdog_timer
= 0;
1484 assert(shutdown_verb
);
1485 assert(!command_line
[pos
]);
1486 env_block
= strv_copy(environ
);
1488 xsprintf(log_level
, "%d", log_get_max_level());
1489 xsprintf(timeout
, "%" PRI_USEC
"us", arg_default_timeout_stop_usec
);
1491 switch (log_get_target()) {
1493 case LOG_TARGET_KMSG
:
1494 case LOG_TARGET_JOURNAL_OR_KMSG
:
1495 case LOG_TARGET_SYSLOG_OR_KMSG
:
1496 command_line
[pos
++] = "kmsg";
1499 case LOG_TARGET_NULL
:
1500 command_line
[pos
++] = "null";
1503 case LOG_TARGET_CONSOLE
:
1505 command_line
[pos
++] = "console";
1509 if (log_get_show_color())
1510 command_line
[pos
++] = "--log-color";
1512 if (log_get_show_location())
1513 command_line
[pos
++] = "--log-location";
1515 if (log_get_show_time())
1516 command_line
[pos
++] = "--log-time";
1518 if (streq(shutdown_verb
, "exit")) {
1519 command_line
[pos
++] = "--exit-code";
1520 command_line
[pos
++] = exit_code
;
1521 xsprintf(exit_code
, "%d", retval
);
1524 assert(pos
< ELEMENTSOF(command_line
));
1526 if (streq(shutdown_verb
, "reboot"))
1527 watchdog_timer
= arg_reboot_watchdog
;
1528 else if (streq(shutdown_verb
, "kexec"))
1529 watchdog_timer
= arg_kexec_watchdog
;
1531 if (watchdog_timer
> 0 && watchdog_timer
!= USEC_INFINITY
) {
1535 /* If we reboot or kexec let's set the shutdown
1536 * watchdog and tell the shutdown binary to
1537 * repeatedly ping it */
1538 r
= watchdog_set_timeout(&watchdog_timer
);
1539 watchdog_close(r
< 0);
1541 /* Tell the binary how often to ping, ignore failure */
1542 if (asprintf(&e
, "WATCHDOG_USEC="USEC_FMT
, watchdog_timer
) > 0)
1543 (void) strv_consume(&env_block
, e
);
1545 if (arg_watchdog_device
&&
1546 asprintf(&e
, "WATCHDOG_DEVICE=%s", arg_watchdog_device
) > 0)
1547 (void) strv_consume(&env_block
, e
);
1549 watchdog_close(true);
1551 /* Avoid the creation of new processes forked by the
1552 * kernel; at this point, we will not listen to the
1554 if (detect_container() <= 0)
1555 (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER
);
1557 execve(SYSTEMD_SHUTDOWN_BINARY_PATH
, (char **) command_line
, env_block
);
1561 static void initialize_clock(void) {
1564 /* This is called very early on, before we parse the kernel command line or otherwise figure out why
1565 * we are running, but only once. */
1567 if (clock_is_localtime(NULL
) > 0) {
1571 * The very first call of settimeofday() also does a time warp in the kernel.
1573 * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to take care
1574 * of maintaining the RTC and do all adjustments. This matches the behavior of Windows, which leaves
1575 * the RTC alone if the registry tells that the RTC runs in UTC.
1577 r
= clock_set_timezone(&min
);
1579 log_error_errno(r
, "Failed to apply local time delta, ignoring: %m");
1581 log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min
);
1583 } else if (!in_initrd())
1585 * Do a dummy very first call to seal the kernel's time warp magic.
1587 * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with LOCAL, but the
1588 * real system could be set up that way. In such case, we need to delay the time-warp or the sealing
1589 * until we reach the real system.
1591 * Do no set the kernel's timezone. The concept of local time cannot be supported reliably, the time
1592 * will jump or be incorrect at every daylight saving time change. All kernel local time concepts will
1593 * be treated as UTC that way.
1595 (void) clock_reset_timewarp();
1597 r
= clock_apply_epoch();
1599 log_error_errno(r
, "Current system time is before build time, but cannot correct: %m");
1601 log_info("System time before build time, advancing clock.");
1604 static void apply_clock_update(void) {
1607 /* This is called later than initialize_clock(), i.e. after we parsed configuration files/kernel
1608 * command line and such. */
1610 if (arg_clock_usec
== 0)
1613 if (getpid_cached() != 1)
1616 if (clock_settime(CLOCK_REALTIME
, timespec_store(&ts
, arg_clock_usec
)) < 0)
1617 log_error_errno(errno
, "Failed to set system clock to time specified on kernel command line: %m");
1619 char buf
[FORMAT_TIMESTAMP_MAX
];
1621 log_info("Set system clock to %s, as specified on the kernel command line.",
1622 format_timestamp(buf
, sizeof(buf
), arg_clock_usec
));
1626 static void cmdline_take_random_seed(void) {
1630 if (arg_random_seed_size
== 0)
1633 if (getpid_cached() != 1)
1636 assert(arg_random_seed
);
1637 suggested
= random_pool_size();
1639 if (arg_random_seed_size
< suggested
)
1640 log_warning("Random seed specified on kernel command line has size %zu, but %zu bytes required to fill entropy pool.",
1641 arg_random_seed_size
, suggested
);
1643 r
= random_write_entropy(-1, arg_random_seed
, arg_random_seed_size
, true);
1645 log_warning_errno(r
, "Failed to credit entropy specified on kernel command line, ignoring: %m");
1649 log_notice("Successfully credited entropy passed on kernel command line.\n"
1650 "Note that the seed provided this way is accessible to unprivileged programs. This functionality should not be used outside of testing environments.");
1653 static void initialize_coredump(bool skip_setup
) {
1655 if (getpid_cached() != 1)
1658 /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
1659 * will process core dumps for system services by default. */
1660 if (setrlimit(RLIMIT_CORE
, &RLIMIT_MAKE_CONST(RLIM_INFINITY
)) < 0)
1661 log_warning_errno(errno
, "Failed to set RLIMIT_CORE: %m");
1663 /* But at the same time, turn off the core_pattern logic by default, so that no
1664 * coredumps are stored until the systemd-coredump tool is enabled via
1665 * sysctl. However it can be changed via the kernel command line later so core
1666 * dumps can still be generated during early startup and in initramfs. */
1668 disable_coredumps();
1672 static void initialize_core_pattern(bool skip_setup
) {
1675 if (skip_setup
|| !arg_early_core_pattern
)
1678 if (getpid_cached() != 1)
1681 r
= write_string_file("/proc/sys/kernel/core_pattern", arg_early_core_pattern
, WRITE_STRING_FILE_DISABLE_BUFFER
);
1683 log_warning_errno(r
, "Failed to write '%s' to /proc/sys/kernel/core_pattern, ignoring: %m", arg_early_core_pattern
);
1686 static void update_cpu_affinity(bool skip_setup
) {
1687 _cleanup_free_
char *mask
= NULL
;
1689 if (skip_setup
|| !arg_cpu_affinity
.set
)
1692 assert(arg_cpu_affinity
.allocated
> 0);
1694 mask
= cpu_set_to_string(&arg_cpu_affinity
);
1695 log_debug("Setting CPU affinity to %s.", strnull(mask
));
1697 if (sched_setaffinity(0, arg_cpu_affinity
.allocated
, arg_cpu_affinity
.set
) < 0)
1698 log_warning_errno(errno
, "Failed to set CPU affinity: %m");
1701 static void update_numa_policy(bool skip_setup
) {
1703 _cleanup_free_
char *nodes
= NULL
;
1704 const char * policy
= NULL
;
1706 if (skip_setup
|| !mpol_is_valid(numa_policy_get_type(&arg_numa_policy
)))
1709 if (DEBUG_LOGGING
) {
1710 policy
= mpol_to_string(numa_policy_get_type(&arg_numa_policy
));
1711 nodes
= cpu_set_to_range_string(&arg_numa_policy
.nodes
);
1712 log_debug("Setting NUMA policy to %s, with nodes %s.", strnull(policy
), strnull(nodes
));
1715 r
= apply_numa_policy(&arg_numa_policy
);
1716 if (r
== -EOPNOTSUPP
)
1717 log_debug_errno(r
, "NUMA support not available, ignoring.");
1719 log_warning_errno(r
, "Failed to set NUMA memory policy: %m");
1722 static void do_reexecute(
1725 const struct rlimit
*saved_rlimit_nofile
,
1726 const struct rlimit
*saved_rlimit_memlock
,
1728 const char *switch_root_dir
,
1729 const char *switch_root_init
,
1730 const char **ret_error_message
) {
1732 unsigned i
, j
, args_size
;
1736 assert(saved_rlimit_nofile
);
1737 assert(saved_rlimit_memlock
);
1738 assert(ret_error_message
);
1740 /* Close and disarm the watchdog, so that the new instance can reinitialize it, but doesn't get rebooted while
1742 watchdog_close(true);
1744 /* Reset RLIMIT_NOFILE + RLIMIT_MEMLOCK back to the kernel defaults, so that the new systemd can pass
1745 * the kernel default to its child processes */
1746 if (saved_rlimit_nofile
->rlim_cur
!= 0)
1747 (void) setrlimit(RLIMIT_NOFILE
, saved_rlimit_nofile
);
1748 if (saved_rlimit_memlock
->rlim_cur
!= RLIM_INFINITY
)
1749 (void) setrlimit(RLIMIT_MEMLOCK
, saved_rlimit_memlock
);
1751 if (switch_root_dir
) {
1752 /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
1753 * SIGCHLD for them after deserializing. */
1754 broadcast_signal(SIGTERM
, false, true, arg_default_timeout_stop_usec
);
1756 /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
1757 r
= switch_root(switch_root_dir
, "/mnt", true, MS_MOVE
);
1759 log_error_errno(r
, "Failed to switch root, trying to continue: %m");
1762 args_size
= MAX(6, argc
+1);
1763 args
= newa(const char*, args_size
);
1765 if (!switch_root_init
) {
1766 char sfd
[DECIMAL_STR_MAX(int) + 1];
1768 /* First try to spawn ourselves with the right path, and with full serialization. We do this only if
1769 * the user didn't specify an explicit init to spawn. */
1771 assert(arg_serialization
);
1774 xsprintf(sfd
, "%i", fileno(arg_serialization
));
1777 args
[i
++] = SYSTEMD_BINARY_PATH
;
1778 if (switch_root_dir
)
1779 args
[i
++] = "--switched-root";
1780 args
[i
++] = arg_system
? "--system" : "--user";
1781 args
[i
++] = "--deserialize";
1785 assert(i
<= args_size
);
1788 * We want valgrind to print its memory usage summary before reexecution. Valgrind won't do this is on
1789 * its own on exec(), but it will do it on exit(). Hence, to ensure we get a summary here, fork() off
1790 * a child, let it exit() cleanly, so that it prints the summary, and wait() for it in the parent,
1791 * before proceeding into the exec().
1793 valgrind_summary_hack();
1795 (void) execv(args
[0], (char* const*) args
);
1796 log_debug_errno(errno
, "Failed to execute our own binary, trying fallback: %m");
1799 /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and envp[]. (Well,
1800 * modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[], but let's hope that
1801 * doesn't matter.) */
1803 arg_serialization
= safe_fclose(arg_serialization
);
1804 fds
= fdset_free(fds
);
1806 /* Reopen the console */
1807 (void) make_console_stdio();
1809 for (j
= 1, i
= 1; j
< (unsigned) argc
; j
++)
1810 args
[i
++] = argv
[j
];
1812 assert(i
<= args_size
);
1814 /* Re-enable any blocked signals, especially important if we switch from initial ramdisk to init=... */
1815 (void) reset_all_signal_handlers();
1816 (void) reset_signal_mask();
1817 (void) rlimit_nofile_safe();
1819 if (switch_root_init
) {
1820 args
[0] = switch_root_init
;
1821 (void) execve(args
[0], (char* const*) args
, saved_env
);
1822 log_warning_errno(errno
, "Failed to execute configured init, trying fallback: %m");
1825 args
[0] = "/sbin/init";
1826 (void) execv(args
[0], (char* const*) args
);
1829 manager_status_printf(NULL
, STATUS_TYPE_EMERGENCY
,
1830 ANSI_HIGHLIGHT_RED
" !! " ANSI_NORMAL
,
1831 "Failed to execute /sbin/init");
1834 log_warning("No /sbin/init, trying fallback");
1836 args
[0] = "/bin/sh";
1838 (void) execve(args
[0], (char* const*) args
, saved_env
);
1839 log_error_errno(errno
, "Failed to execute /bin/sh, giving up: %m");
1841 log_warning_errno(r
, "Failed to execute /sbin/init, giving up: %m");
1843 *ret_error_message
= "Failed to execute fallback shell";
1846 static int invoke_main_loop(
1848 const struct rlimit
*saved_rlimit_nofile
,
1849 const struct rlimit
*saved_rlimit_memlock
,
1850 bool *ret_reexecute
,
1851 int *ret_retval
, /* Return parameters relevant for shutting down */
1852 const char **ret_shutdown_verb
, /* … */
1853 FDSet
**ret_fds
, /* Return parameters for reexecuting */
1854 char **ret_switch_root_dir
, /* … */
1855 char **ret_switch_root_init
, /* … */
1856 const char **ret_error_message
) {
1861 assert(saved_rlimit_nofile
);
1862 assert(saved_rlimit_memlock
);
1863 assert(ret_reexecute
);
1865 assert(ret_shutdown_verb
);
1867 assert(ret_switch_root_dir
);
1868 assert(ret_switch_root_init
);
1869 assert(ret_error_message
);
1872 r
= manager_loop(m
);
1874 *ret_error_message
= "Failed to run main loop";
1875 return log_emergency_errno(r
, "Failed to run main loop: %m");
1878 switch ((ManagerObjective
) r
) {
1880 case MANAGER_RELOAD
: {
1881 LogTarget saved_log_target
;
1882 int saved_log_level
;
1884 log_info("Reloading.");
1886 /* First, save any overridden log level/target, then parse the configuration file, which might
1887 * change the log level to new settings. */
1889 saved_log_level
= m
->log_level_overridden
? log_get_max_level() : -1;
1890 saved_log_target
= m
->log_target_overridden
? log_get_target() : _LOG_TARGET_INVALID
;
1892 (void) parse_configuration(saved_rlimit_nofile
, saved_rlimit_memlock
);
1894 set_manager_defaults(m
);
1895 set_manager_settings(m
);
1897 update_cpu_affinity(false);
1898 update_numa_policy(false);
1900 if (saved_log_level
>= 0)
1901 manager_override_log_level(m
, saved_log_level
);
1902 if (saved_log_target
>= 0)
1903 manager_override_log_target(m
, saved_log_target
);
1905 r
= manager_reload(m
);
1907 /* Reloading failed before the point of no return. Let's continue running as if nothing happened. */
1908 m
->objective
= MANAGER_OK
;
1913 case MANAGER_REEXECUTE
:
1915 r
= prepare_reexecute(m
, &arg_serialization
, ret_fds
, false);
1917 *ret_error_message
= "Failed to prepare for reexecution";
1921 log_notice("Reexecuting.");
1923 *ret_reexecute
= true;
1924 *ret_retval
= EXIT_SUCCESS
;
1925 *ret_shutdown_verb
= NULL
;
1926 *ret_switch_root_dir
= *ret_switch_root_init
= NULL
;
1930 case MANAGER_SWITCH_ROOT
:
1931 if (!m
->switch_root_init
) {
1932 r
= prepare_reexecute(m
, &arg_serialization
, ret_fds
, true);
1934 *ret_error_message
= "Failed to prepare for reexecution";
1940 log_notice("Switching root.");
1942 *ret_reexecute
= true;
1943 *ret_retval
= EXIT_SUCCESS
;
1944 *ret_shutdown_verb
= NULL
;
1946 /* Steal the switch root parameters */
1947 *ret_switch_root_dir
= TAKE_PTR(m
->switch_root
);
1948 *ret_switch_root_init
= TAKE_PTR(m
->switch_root_init
);
1954 if (MANAGER_IS_USER(m
)) {
1957 *ret_reexecute
= false;
1958 *ret_retval
= m
->return_value
;
1959 *ret_shutdown_verb
= NULL
;
1961 *ret_switch_root_dir
= *ret_switch_root_init
= NULL
;
1967 case MANAGER_REBOOT
:
1968 case MANAGER_POWEROFF
:
1970 case MANAGER_KEXEC
: {
1971 static const char * const table
[_MANAGER_OBJECTIVE_MAX
] = {
1972 [MANAGER_EXIT
] = "exit",
1973 [MANAGER_REBOOT
] = "reboot",
1974 [MANAGER_POWEROFF
] = "poweroff",
1975 [MANAGER_HALT
] = "halt",
1976 [MANAGER_KEXEC
] = "kexec",
1979 log_notice("Shutting down.");
1981 *ret_reexecute
= false;
1982 *ret_retval
= m
->return_value
;
1983 assert_se(*ret_shutdown_verb
= table
[m
->objective
]);
1985 *ret_switch_root_dir
= *ret_switch_root_init
= NULL
;
1991 assert_not_reached("Unknown or unexpected manager objective.");
1996 static void log_execution_mode(bool *ret_first_boot
) {
1997 assert(ret_first_boot
);
2002 log_info("systemd " GIT_VERSION
" running in %ssystem mode. (%s)",
2003 arg_action
== ACTION_TEST
? "test " : "",
2006 v
= detect_virtualization();
2008 log_info("Detected virtualization %s.", virtualization_to_string(v
));
2010 log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
2013 *ret_first_boot
= false;
2014 log_info("Running in initial RAM disk.");
2017 _cleanup_free_
char *id_text
= NULL
;
2019 /* Let's check whether we are in first boot. We use /etc/machine-id as flag file
2020 * for this: If it is missing or contains the value "uninitialized", this is the
2021 * first boot. In any other case, it is not. This allows container managers and
2022 * installers to provision a couple of files already. If the container manager
2023 * wants to provision the machine ID itself it should pass $container_uuid to PID 1. */
2025 r
= read_one_line_file("/etc/machine-id", &id_text
);
2026 if (r
< 0 || streq(id_text
, "uninitialized")) {
2027 if (r
< 0 && r
!= -ENOENT
)
2028 log_warning_errno(r
, "Unexpected error while reading /etc/machine-id, ignoring: %m");
2030 *ret_first_boot
= true;
2031 log_info("Detected first boot.");
2033 *ret_first_boot
= false;
2034 log_debug("Detected initialized system, this is not the first boot.");
2038 if (DEBUG_LOGGING
) {
2039 _cleanup_free_
char *t
;
2041 t
= uid_to_name(getuid());
2042 log_debug("systemd " GIT_VERSION
" running in %suser mode for user " UID_FMT
"/%s. (%s)",
2043 arg_action
== ACTION_TEST
? " test" : "",
2044 getuid(), strna(t
), systemd_features
);
2047 *ret_first_boot
= false;
2051 static int initialize_runtime(
2054 struct rlimit
*saved_rlimit_nofile
,
2055 struct rlimit
*saved_rlimit_memlock
,
2056 const char **ret_error_message
) {
2059 assert(ret_error_message
);
2061 /* Sets up various runtime parameters. Many of these initializations are conditionalized:
2063 * - Some only apply to --system instances
2064 * - Some only apply to --user instances
2065 * - Some only apply when we first start up, but not when we reexecute
2068 if (arg_action
!= ACTION_RUN
)
2071 update_cpu_affinity(skip_setup
);
2072 update_numa_policy(skip_setup
);
2075 /* Make sure we leave a core dump without panicking the kernel. */
2076 install_crash_handler();
2079 r
= mount_cgroup_controllers();
2081 *ret_error_message
= "Failed to mount cgroup hierarchies";
2086 (void) hostname_setup(true);
2087 /* Force transient machine-id on first boot. */
2088 machine_id_setup(NULL
, first_boot
, arg_machine_id
, NULL
);
2089 (void) loopback_setup();
2090 bump_unix_max_dgram_qlen();
2091 bump_file_max_and_nr_open();
2093 write_container_id();
2096 if (arg_watchdog_device
) {
2097 r
= watchdog_set_device(arg_watchdog_device
);
2099 log_warning_errno(r
, "Failed to set watchdog device to %s, ignoring: %m", arg_watchdog_device
);
2102 _cleanup_free_
char *p
= NULL
;
2104 /* Create the runtime directory and place the inaccessible device nodes there, if we run in
2105 * user mode. In system mode mount_setup() already did that. */
2107 r
= xdg_user_runtime_dir(&p
, "/systemd");
2109 *ret_error_message
= "$XDG_RUNTIME_DIR is not set";
2110 return log_emergency_errno(r
, "Failed to determine $XDG_RUNTIME_DIR path: %m");
2113 (void) mkdir_p_label(p
, 0755);
2114 (void) make_inaccessible_nodes(p
, UID_INVALID
, GID_INVALID
);
2117 if (arg_timer_slack_nsec
!= NSEC_INFINITY
)
2118 if (prctl(PR_SET_TIMERSLACK
, arg_timer_slack_nsec
) < 0)
2119 log_warning_errno(errno
, "Failed to adjust timer slack, ignoring: %m");
2121 if (arg_system
&& !cap_test_all(arg_capability_bounding_set
)) {
2122 r
= capability_bounding_set_drop_usermode(arg_capability_bounding_set
);
2124 *ret_error_message
= "Failed to drop capability bounding set of usermode helpers";
2125 return log_emergency_errno(r
, "Failed to drop capability bounding set of usermode helpers: %m");
2128 r
= capability_bounding_set_drop(arg_capability_bounding_set
, true);
2130 *ret_error_message
= "Failed to drop capability bounding set";
2131 return log_emergency_errno(r
, "Failed to drop capability bounding set: %m");
2135 if (arg_system
&& arg_no_new_privs
) {
2136 if (prctl(PR_SET_NO_NEW_PRIVS
, 1, 0, 0, 0) < 0) {
2137 *ret_error_message
= "Failed to disable new privileges";
2138 return log_emergency_errno(errno
, "Failed to disable new privileges: %m");
2142 if (arg_syscall_archs
) {
2143 r
= enforce_syscall_archs(arg_syscall_archs
);
2145 *ret_error_message
= "Failed to set syscall architectures";
2151 /* Become reaper of our children */
2152 if (prctl(PR_SET_CHILD_SUBREAPER
, 1) < 0)
2153 log_warning_errno(errno
, "Failed to make us a subreaper: %m");
2155 /* Bump up RLIMIT_NOFILE for systemd itself */
2156 (void) bump_rlimit_nofile(saved_rlimit_nofile
);
2157 (void) bump_rlimit_memlock(saved_rlimit_memlock
);
2162 static int do_queue_default_job(
2164 const char **ret_error_message
) {
2166 _cleanup_(sd_bus_error_free
) sd_bus_error error
= SD_BUS_ERROR_NULL
;
2172 if (arg_default_unit
)
2173 unit
= arg_default_unit
;
2174 else if (in_initrd())
2175 unit
= SPECIAL_INITRD_TARGET
;
2177 unit
= SPECIAL_DEFAULT_TARGET
;
2179 log_debug("Activating default unit: %s", unit
);
2181 r
= manager_load_startable_unit_or_warn(m
, unit
, NULL
, &target
);
2182 if (r
< 0 && in_initrd() && !arg_default_unit
) {
2183 /* Fall back to default.target, which we used to always use by default. Only do this if no
2184 * explicit configuration was given. */
2186 log_info("Falling back to " SPECIAL_DEFAULT_TARGET
".");
2188 r
= manager_load_startable_unit_or_warn(m
, SPECIAL_DEFAULT_TARGET
, NULL
, &target
);
2191 log_info("Falling back to " SPECIAL_RESCUE_TARGET
".");
2193 r
= manager_load_startable_unit_or_warn(m
, SPECIAL_RESCUE_TARGET
, NULL
, &target
);
2195 *ret_error_message
= r
== -ERFKILL
? SPECIAL_RESCUE_TARGET
" masked"
2196 : "Failed to load " SPECIAL_RESCUE_TARGET
;
2201 assert(target
->load_state
== UNIT_LOADED
);
2203 r
= manager_add_job(m
, JOB_START
, target
, JOB_ISOLATE
, NULL
, &error
, &job
);
2205 log_debug_errno(r
, "Default target could not be isolated, starting instead: %s", bus_error_message(&error
, r
));
2207 sd_bus_error_free(&error
);
2209 r
= manager_add_job(m
, JOB_START
, target
, JOB_REPLACE
, NULL
, &error
, &job
);
2211 *ret_error_message
= "Failed to start default target";
2212 return log_emergency_errno(r
, "Failed to start default target: %s", bus_error_message(&error
, r
));
2216 *ret_error_message
= "Failed to isolate default target";
2217 return log_emergency_errno(r
, "Failed to isolate default target: %s", bus_error_message(&error
, r
));
2219 log_info("Queued %s job for default target %s.",
2220 job_type_to_string(job
->type
),
2221 unit_status_string(job
->unit
));
2223 m
->default_unit_job_id
= job
->id
;
2228 static void save_rlimits(struct rlimit
*saved_rlimit_nofile
,
2229 struct rlimit
*saved_rlimit_memlock
) {
2231 assert(saved_rlimit_nofile
);
2232 assert(saved_rlimit_memlock
);
2234 if (getrlimit(RLIMIT_NOFILE
, saved_rlimit_nofile
) < 0)
2235 log_warning_errno(errno
, "Reading RLIMIT_NOFILE failed, ignoring: %m");
2237 if (getrlimit(RLIMIT_MEMLOCK
, saved_rlimit_memlock
) < 0)
2238 log_warning_errno(errno
, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
2241 static void fallback_rlimit_nofile(const struct rlimit
*saved_rlimit_nofile
) {
2244 if (arg_default_rlimit
[RLIMIT_NOFILE
])
2247 /* Make sure forked processes get limits based on the original kernel setting */
2249 rl
= newdup(struct rlimit
, saved_rlimit_nofile
, 1);
2255 /* Bump the hard limit for system services to a substantially higher value. The default
2256 * hard limit current kernels set is pretty low (4K), mostly for historical
2257 * reasons. According to kernel developers, the fd handling in recent kernels has been
2258 * optimized substantially enough, so that we can bump the limit now, without paying too
2259 * high a price in memory or performance. Note however that we only bump the hard limit,
2260 * not the soft limit. That's because select() works the way it works, and chokes on fds
2261 * >= 1024. If we'd bump the soft limit globally, it might accidentally happen to
2262 * unexpecting programs that they get fds higher than what they can process using
2263 * select(). By only bumping the hard limit but leaving the low limit as it is we avoid
2264 * this pitfall: programs that are written by folks aware of the select() problem in mind
2265 * (and thus use poll()/epoll instead of select(), the way everybody should) can
2266 * explicitly opt into high fds by bumping their soft limit beyond 1024, to the hard limit
2271 /* Get the underlying absolute limit the kernel enforces */
2272 nr
= read_nr_open();
2274 rl
->rlim_max
= MIN((rlim_t
) nr
, MAX(rl
->rlim_max
, (rlim_t
) HIGH_RLIMIT_NOFILE
));
2277 /* If for some reason we were invoked with a soft limit above 1024 (which should never
2278 * happen!, but who knows what we get passed in from pam_limit when invoked as --user
2279 * instance), then lower what we pass on to not confuse our children */
2280 rl
->rlim_cur
= MIN(rl
->rlim_cur
, (rlim_t
) FD_SETSIZE
);
2282 arg_default_rlimit
[RLIMIT_NOFILE
] = rl
;
2285 static void fallback_rlimit_memlock(const struct rlimit
*saved_rlimit_memlock
) {
2288 /* Pass the original value down to invoked processes */
2290 if (arg_default_rlimit
[RLIMIT_MEMLOCK
])
2293 rl
= newdup(struct rlimit
, saved_rlimit_memlock
, 1);
2299 arg_default_rlimit
[RLIMIT_MEMLOCK
] = rl
;
2302 static void setenv_manager_environment(void) {
2306 STRV_FOREACH(p
, arg_manager_environment
) {
2307 log_debug("Setting '%s' in our own environment.", *p
);
2309 r
= putenv_dup(*p
, true);
2311 log_warning_errno(errno
, "Failed to setenv \"%s\", ignoring: %m", *p
);
2315 static void reset_arguments(void) {
2316 /* Frees/resets arg_* variables, with a few exceptions commented below. */
2318 arg_default_unit
= mfree(arg_default_unit
);
2320 /* arg_system — ignore */
2322 arg_dump_core
= true;
2323 arg_crash_chvt
= -1;
2324 arg_crash_shell
= false;
2325 arg_crash_reboot
= false;
2326 arg_confirm_spawn
= mfree(arg_confirm_spawn
);
2327 arg_show_status
= _SHOW_STATUS_INVALID
;
2328 arg_status_unit_format
= STATUS_UNIT_FORMAT_DEFAULT
;
2329 arg_switched_root
= false;
2330 arg_pager_flags
= 0;
2331 arg_service_watchdogs
= true;
2332 arg_default_std_output
= EXEC_OUTPUT_JOURNAL
;
2333 arg_default_std_error
= EXEC_OUTPUT_INHERIT
;
2334 arg_default_restart_usec
= DEFAULT_RESTART_USEC
;
2335 arg_default_timeout_start_usec
= DEFAULT_TIMEOUT_USEC
;
2336 arg_default_timeout_stop_usec
= DEFAULT_TIMEOUT_USEC
;
2337 arg_default_timeout_abort_usec
= DEFAULT_TIMEOUT_USEC
;
2338 arg_default_timeout_abort_set
= false;
2339 arg_default_start_limit_interval
= DEFAULT_START_LIMIT_INTERVAL
;
2340 arg_default_start_limit_burst
= DEFAULT_START_LIMIT_BURST
;
2341 arg_runtime_watchdog
= 0;
2342 arg_reboot_watchdog
= 10 * USEC_PER_MINUTE
;
2343 arg_kexec_watchdog
= 0;
2344 arg_early_core_pattern
= NULL
;
2345 arg_watchdog_device
= NULL
;
2347 arg_default_environment
= strv_free(arg_default_environment
);
2348 arg_manager_environment
= strv_free(arg_manager_environment
);
2349 rlimit_free_all(arg_default_rlimit
);
2351 arg_capability_bounding_set
= CAP_ALL
;
2352 arg_no_new_privs
= false;
2353 arg_timer_slack_nsec
= NSEC_INFINITY
;
2354 arg_default_timer_accuracy_usec
= 1 * USEC_PER_MINUTE
;
2356 arg_syscall_archs
= set_free(arg_syscall_archs
);
2358 /* arg_serialization — ignore */
2360 arg_default_cpu_accounting
= -1;
2361 arg_default_io_accounting
= false;
2362 arg_default_ip_accounting
= false;
2363 arg_default_blockio_accounting
= false;
2364 arg_default_memory_accounting
= MEMORY_ACCOUNTING_DEFAULT
;
2365 arg_default_tasks_accounting
= true;
2366 arg_default_tasks_max
= DEFAULT_TASKS_MAX
;
2367 arg_machine_id
= (sd_id128_t
) {};
2368 arg_cad_burst_action
= EMERGENCY_ACTION_REBOOT_FORCE
;
2369 arg_default_oom_policy
= OOM_STOP
;
2371 cpu_set_reset(&arg_cpu_affinity
);
2372 numa_policy_reset(&arg_numa_policy
);
2374 arg_random_seed
= mfree(arg_random_seed
);
2375 arg_random_seed_size
= 0;
2379 static int parse_configuration(const struct rlimit
*saved_rlimit_nofile
,
2380 const struct rlimit
*saved_rlimit_memlock
) {
2383 assert(saved_rlimit_nofile
);
2384 assert(saved_rlimit_memlock
);
2386 /* Assign configuration defaults */
2389 r
= parse_config_file();
2391 log_warning_errno(r
, "Failed to parse config file, ignoring: %m");
2394 r
= proc_cmdline_parse(parse_proc_cmdline_item
, NULL
, 0);
2396 log_warning_errno(r
, "Failed to parse kernel command line, ignoring: %m");
2399 /* Initialize some default rlimits for services if they haven't been configured */
2400 fallback_rlimit_nofile(saved_rlimit_nofile
);
2401 fallback_rlimit_memlock(saved_rlimit_memlock
);
2403 /* Note that this also parses bits from the kernel command line, including "debug". */
2404 log_parse_environment();
2406 /* Initialize the show status setting if it hasn't been set explicitly yet */
2407 if (arg_show_status
== _SHOW_STATUS_INVALID
)
2408 arg_show_status
= SHOW_STATUS_YES
;
2410 /* Push variables into the manager environment block */
2411 setenv_manager_environment();
2416 static int safety_checks(void) {
2418 if (getpid_cached() == 1 &&
2419 arg_action
!= ACTION_RUN
)
2420 return log_error_errno(SYNTHETIC_ERRNO(EPERM
),
2421 "Unsupported execution mode while PID 1.");
2423 if (getpid_cached() == 1 &&
2425 return log_error_errno(SYNTHETIC_ERRNO(EPERM
),
2426 "Can't run --user mode as PID 1.");
2428 if (arg_action
== ACTION_RUN
&&
2430 getpid_cached() != 1)
2431 return log_error_errno(SYNTHETIC_ERRNO(EPERM
),
2432 "Can't run system mode unless PID 1.");
2434 if (arg_action
== ACTION_TEST
&&
2436 return log_error_errno(SYNTHETIC_ERRNO(EPERM
),
2437 "Don't run test mode as root.");
2440 arg_action
== ACTION_RUN
&&
2442 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP
),
2443 "Trying to run as user instance, but the system has not been booted with systemd.");
2446 arg_action
== ACTION_RUN
&&
2447 !getenv("XDG_RUNTIME_DIR"))
2448 return log_error_errno(SYNTHETIC_ERRNO(EUNATCH
),
2449 "Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
2452 arg_action
== ACTION_RUN
&&
2453 running_in_chroot() > 0)
2454 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP
),
2455 "Cannot be run in a chroot() environment.");
2460 static int initialize_security(
2461 bool *loaded_policy
,
2462 dual_timestamp
*security_start_timestamp
,
2463 dual_timestamp
*security_finish_timestamp
,
2464 const char **ret_error_message
) {
2468 assert(loaded_policy
);
2469 assert(security_start_timestamp
);
2470 assert(security_finish_timestamp
);
2471 assert(ret_error_message
);
2473 dual_timestamp_get(security_start_timestamp
);
2475 r
= mac_selinux_setup(loaded_policy
);
2477 *ret_error_message
= "Failed to load SELinux policy";
2481 r
= mac_smack_setup(loaded_policy
);
2483 *ret_error_message
= "Failed to load SMACK policy";
2487 r
= mac_apparmor_setup();
2489 *ret_error_message
= "Failed to load AppArmor policy";
2495 *ret_error_message
= "Failed to load IMA policy";
2499 dual_timestamp_get(security_finish_timestamp
);
2503 static void test_summary(Manager
*m
) {
2506 printf("-> By units:\n");
2507 manager_dump_units(m
, stdout
, "\t");
2509 printf("-> By jobs:\n");
2510 manager_dump_jobs(m
, stdout
, "\t");
2513 static int collect_fds(FDSet
**ret_fds
, const char **ret_error_message
) {
2517 assert(ret_error_message
);
2519 r
= fdset_new_fill(ret_fds
);
2521 *ret_error_message
= "Failed to allocate fd set";
2522 return log_emergency_errno(r
, "Failed to allocate fd set: %m");
2525 fdset_cloexec(*ret_fds
, true);
2527 if (arg_serialization
)
2528 assert_se(fdset_remove(*ret_fds
, fileno(arg_serialization
)) >= 0);
2533 static void setup_console_terminal(bool skip_setup
) {
2538 /* Become a session leader if we aren't one yet. */
2541 /* If we are init, we connect stdin/stdout/stderr to /dev/null and make sure we don't have a controlling
2543 (void) release_terminal();
2545 /* Reset the console, but only if this is really init and we are freshly booted */
2546 if (getpid_cached() == 1 && !skip_setup
)
2547 (void) console_setup();
2550 static bool early_skip_setup_check(int argc
, char *argv
[]) {
2551 bool found_deserialize
= false;
2554 /* Determine if this is a reexecution or normal bootup. We do the full command line parsing much later, so
2555 * let's just have a quick peek here. Note that if we have switched root, do all the special setup things
2556 * anyway, even if in that case we also do deserialization. */
2558 for (i
= 1; i
< argc
; i
++) {
2559 if (streq(argv
[i
], "--switched-root"))
2560 return false; /* If we switched root, don't skip the setup. */
2561 else if (streq(argv
[i
], "--deserialize"))
2562 found_deserialize
= true;
2565 return found_deserialize
; /* When we are deserializing, then we are reexecuting, hence avoid the extensive setup */
2568 static int save_env(void) {
2571 l
= strv_copy(environ
);
2575 strv_free_and_replace(saved_env
, l
);
2579 int main(int argc
, char *argv
[]) {
2581 dual_timestamp initrd_timestamp
= DUAL_TIMESTAMP_NULL
, userspace_timestamp
= DUAL_TIMESTAMP_NULL
, kernel_timestamp
= DUAL_TIMESTAMP_NULL
,
2582 security_start_timestamp
= DUAL_TIMESTAMP_NULL
, security_finish_timestamp
= DUAL_TIMESTAMP_NULL
;
2583 struct rlimit saved_rlimit_nofile
= RLIMIT_MAKE_CONST(0),
2584 saved_rlimit_memlock
= RLIMIT_MAKE_CONST(RLIM_INFINITY
); /* The original rlimits we passed
2585 * in. Note we use different values
2586 * for the two that indicate whether
2587 * these fields are initialized! */
2588 bool skip_setup
, loaded_policy
= false, queue_default_job
= false, first_boot
= false, reexecute
= false;
2589 char *switch_root_dir
= NULL
, *switch_root_init
= NULL
;
2590 usec_t before_startup
, after_startup
;
2591 static char systemd
[] = "systemd";
2592 char timespan
[FORMAT_TIMESPAN_MAX
];
2593 const char *shutdown_verb
= NULL
, *error_message
= NULL
;
2594 int r
, retval
= EXIT_FAILURE
;
2598 /* SysV compatibility: redirect init → telinit */
2599 redirect_telinit(argc
, argv
);
2601 /* Take timestamps early on */
2602 dual_timestamp_from_monotonic(&kernel_timestamp
, 0);
2603 dual_timestamp_get(&userspace_timestamp
);
2605 /* Figure out whether we need to do initialize the system, or if we already did that because we are
2607 skip_setup
= early_skip_setup_check(argc
, argv
);
2609 /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent reexecution we
2610 * are then called 'systemd'. That is confusing, hence let's call us systemd right-away. */
2611 program_invocation_short_name
= systemd
;
2612 (void) prctl(PR_SET_NAME
, systemd
);
2614 /* Save the original command line */
2615 save_argc_argv(argc
, argv
);
2617 /* Save the original environment as we might need to restore it if we're requested to execute another
2618 * system manager later. */
2621 error_message
= "Failed to copy environment block";
2625 /* Make sure that if the user says "syslog" we actually log to the journal. */
2626 log_set_upgrade_syslog_to_journal(true);
2628 if (getpid_cached() == 1) {
2629 /* When we run as PID 1 force system mode */
2632 /* Disable the umask logic */
2635 /* Make sure that at least initially we do not ever log to journald/syslogd, because it might not be
2636 * activated yet (even though the log socket for it exists). */
2637 log_set_prohibit_ipc(true);
2639 /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This is
2640 * important so that we never end up logging to any foreign stderr, for example if we have to log in a
2641 * child process right before execve()'ing the actual binary, at a point in time where socket
2642 * activation stderr/stdout area already set up. */
2643 log_set_always_reopen_console(true);
2645 if (detect_container() <= 0) {
2647 /* Running outside of a container as PID 1 */
2648 log_set_target(LOG_TARGET_KMSG
);
2652 initrd_timestamp
= userspace_timestamp
;
2655 r
= mount_setup_early();
2657 error_message
= "Failed to mount early API filesystems";
2661 /* Let's open the log backend a second time, in case the first time didn't
2662 * work. Quite possibly we have mounted /dev just now, so /dev/kmsg became
2663 * available, and it previously wasn't. */
2666 disable_printk_ratelimit();
2668 r
= initialize_security(
2670 &security_start_timestamp
,
2671 &security_finish_timestamp
,
2677 if (mac_selinux_init() < 0) {
2678 error_message
= "Failed to initialize SELinux support";
2685 /* Set the default for later on, but don't actually open the logs like this for now. Note that
2686 * if we are transitioning from the initrd there might still be journal fd open, and we
2687 * shouldn't attempt opening that before we parsed /proc/cmdline which might redirect output
2689 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG
);
2692 /* Running inside a container, as PID 1 */
2693 log_set_target(LOG_TARGET_CONSOLE
);
2696 /* For later on, see above... */
2697 log_set_target(LOG_TARGET_JOURNAL
);
2699 /* clear the kernel timestamp, because we are in a container */
2700 kernel_timestamp
= DUAL_TIMESTAMP_NULL
;
2703 initialize_coredump(skip_setup
);
2705 r
= fixup_environment();
2707 log_emergency_errno(r
, "Failed to fix up PID 1 environment: %m");
2708 error_message
= "Failed to fix up PID1 environment";
2712 /* Try to figure out if we can use colors with the console. No need to do that for user instances since
2713 * they never log into the console. */
2714 log_show_color(colors_enabled());
2716 r
= make_null_stdio();
2718 log_warning_errno(r
, "Failed to redirect standard streams to /dev/null, ignoring: %m");
2720 /* Load the kernel modules early. */
2724 /* Mount /proc, /sys and friends, so that /proc/cmdline and /proc/$PID/fd is available. */
2725 r
= mount_setup(loaded_policy
, skip_setup
);
2727 error_message
= "Failed to mount API filesystems";
2731 /* The efivarfs is now mounted, let's read the random seed off it */
2732 (void) efi_take_random_seed();
2734 /* Cache command-line options passed from EFI variables */
2736 (void) cache_efi_options_variable();
2738 /* Running as user instance */
2740 log_set_target(LOG_TARGET_AUTO
);
2743 /* clear the kernel timestamp, because we are not PID 1 */
2744 kernel_timestamp
= DUAL_TIMESTAMP_NULL
;
2746 if (mac_selinux_init() < 0) {
2747 error_message
= "Failed to initialize SELinux support";
2752 /* Save the original RLIMIT_NOFILE/RLIMIT_MEMLOCK so that we can reset it later when
2753 * transitioning from the initrd to the main systemd or suchlike. */
2754 save_rlimits(&saved_rlimit_nofile
, &saved_rlimit_memlock
);
2756 /* Reset all signal handlers. */
2757 (void) reset_all_signal_handlers();
2758 (void) ignore_signals(SIGNALS_IGNORE
);
2760 (void) parse_configuration(&saved_rlimit_nofile
, &saved_rlimit_memlock
);
2762 r
= parse_argv(argc
, argv
);
2764 error_message
= "Failed to parse commandline arguments";
2768 r
= safety_checks();
2772 if (IN_SET(arg_action
, ACTION_TEST
, ACTION_HELP
, ACTION_DUMP_CONFIGURATION_ITEMS
, ACTION_DUMP_BUS_PROPERTIES
, ACTION_BUS_INTROSPECT
))
2773 (void) pager_open(arg_pager_flags
);
2775 if (arg_action
!= ACTION_RUN
)
2778 if (arg_action
== ACTION_HELP
) {
2779 retval
= help() < 0 ? EXIT_FAILURE
: EXIT_SUCCESS
;
2781 } else if (arg_action
== ACTION_VERSION
) {
2784 } else if (arg_action
== ACTION_DUMP_CONFIGURATION_ITEMS
) {
2785 unit_dump_config_items(stdout
);
2786 retval
= EXIT_SUCCESS
;
2788 } else if (arg_action
== ACTION_DUMP_BUS_PROPERTIES
) {
2789 dump_bus_properties(stdout
);
2790 retval
= EXIT_SUCCESS
;
2792 } else if (arg_action
== ACTION_BUS_INTROSPECT
) {
2793 r
= bus_manager_introspect_implementations(stdout
, arg_bus_introspect
);
2794 retval
= r
>= 0 ? EXIT_SUCCESS
: EXIT_FAILURE
;
2798 assert_se(IN_SET(arg_action
, ACTION_RUN
, ACTION_TEST
));
2800 /* Move out of the way, so that we won't block unmounts */
2801 assert_se(chdir("/") == 0);
2803 if (arg_action
== ACTION_RUN
) {
2805 /* Apply the systemd.clock_usec= kernel command line switch */
2806 apply_clock_update();
2808 /* Apply random seed from kernel command line */
2809 cmdline_take_random_seed();
2812 /* A core pattern might have been specified via the cmdline. */
2813 initialize_core_pattern(skip_setup
);
2815 /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
2818 /* Remember open file descriptors for later deserialization */
2819 r
= collect_fds(&fds
, &error_message
);
2823 /* Give up any control of the console, but make sure its initialized. */
2824 setup_console_terminal(skip_setup
);
2826 /* Open the logging devices, if possible and necessary */
2830 log_execution_mode(&first_boot
);
2832 r
= initialize_runtime(skip_setup
,
2834 &saved_rlimit_nofile
,
2835 &saved_rlimit_memlock
,
2840 r
= manager_new(arg_system
? UNIT_FILE_SYSTEM
: UNIT_FILE_USER
,
2841 arg_action
== ACTION_TEST
? MANAGER_TEST_FULL
: 0,
2844 log_emergency_errno(r
, "Failed to allocate manager object: %m");
2845 error_message
= "Failed to allocate manager object";
2849 m
->timestamps
[MANAGER_TIMESTAMP_KERNEL
] = kernel_timestamp
;
2850 m
->timestamps
[MANAGER_TIMESTAMP_INITRD
] = initrd_timestamp
;
2851 m
->timestamps
[MANAGER_TIMESTAMP_USERSPACE
] = userspace_timestamp
;
2852 m
->timestamps
[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_START
)] = security_start_timestamp
;
2853 m
->timestamps
[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_FINISH
)] = security_finish_timestamp
;
2855 set_manager_defaults(m
);
2856 set_manager_settings(m
);
2857 manager_set_first_boot(m
, first_boot
);
2859 /* Remember whether we should queue the default job */
2860 queue_default_job
= !arg_serialization
|| arg_switched_root
;
2862 before_startup
= now(CLOCK_MONOTONIC
);
2864 r
= manager_startup(m
, arg_serialization
, fds
);
2866 error_message
= "Failed to start up manager";
2870 /* This will close all file descriptors that were opened, but not claimed by any unit. */
2871 fds
= fdset_free(fds
);
2872 arg_serialization
= safe_fclose(arg_serialization
);
2874 if (queue_default_job
) {
2875 r
= do_queue_default_job(m
, &error_message
);
2880 after_startup
= now(CLOCK_MONOTONIC
);
2882 log_full(arg_action
== ACTION_TEST
? LOG_INFO
: LOG_DEBUG
,
2883 "Loaded units and determined initial transaction in %s.",
2884 format_timespan(timespan
, sizeof(timespan
), after_startup
- before_startup
, 100 * USEC_PER_MSEC
));
2886 if (arg_action
== ACTION_TEST
) {
2888 retval
= EXIT_SUCCESS
;
2892 (void) invoke_main_loop(m
,
2893 &saved_rlimit_nofile
,
2894 &saved_rlimit_memlock
,
2907 arg_reboot_watchdog
= manager_get_watchdog(m
, WATCHDOG_REBOOT
);
2908 arg_kexec_watchdog
= manager_get_watchdog(m
, WATCHDOG_KEXEC
);
2909 m
= manager_free(m
);
2912 mac_selinux_finish();
2915 do_reexecute(argc
, argv
,
2916 &saved_rlimit_nofile
,
2917 &saved_rlimit_memlock
,
2921 &error_message
); /* This only returns if reexecution failed */
2923 arg_serialization
= safe_fclose(arg_serialization
);
2924 fds
= fdset_free(fds
);
2926 saved_env
= strv_free(saved_env
);
2928 #if HAVE_VALGRIND_VALGRIND_H
2929 /* If we are PID 1 and running under valgrind, then let's exit
2930 * here explicitly. valgrind will only generate nice output on
2931 * exit(), not on exec(), hence let's do the former not the
2933 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND
) {
2934 /* Cleanup watchdog_device strings for valgrind. We need them
2935 * in become_shutdown() so normally we cannot free them yet. */
2936 watchdog_free_device();
2937 arg_watchdog_device
= mfree(arg_watchdog_device
);
2943 #if HAS_FEATURE_ADDRESS_SANITIZER
2944 __lsan_do_leak_check();
2947 if (shutdown_verb
) {
2948 r
= become_shutdown(shutdown_verb
, retval
);
2949 log_error_errno(r
, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
2950 error_message
= "Failed to execute shutdown binary";
2953 watchdog_free_device();
2954 arg_watchdog_device
= mfree(arg_watchdog_device
);
2956 if (getpid_cached() == 1) {
2958 manager_status_printf(NULL
, STATUS_TYPE_EMERGENCY
,
2959 ANSI_HIGHLIGHT_RED
"!!!!!!" ANSI_NORMAL
,
2960 "%s.", error_message
);
2961 freeze_or_exit_or_reboot();