1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
8 #include <sys/reboot.h>
13 #if HAVE_VALGRIND_VALGRIND_H
14 #include <valgrind/valgrind.h>
18 #include "sd-daemon.h"
19 #include "sd-messages.h"
21 #include "alloc-util.h"
22 #include "apparmor-setup.h"
23 #include "architecture.h"
25 #include "bus-error.h"
27 #include "capability-util.h"
28 #include "cgroup-util.h"
29 #include "clock-util.h"
30 #include "conf-parser.h"
31 #include "cpu-set-util.h"
32 #include "dbus-manager.h"
35 #include "dev-setup.h"
36 #include "efi-random.h"
38 #include "emergency-action.h"
40 #include "exit-status.h"
44 #include "format-util.h"
46 #include "hexdecoct.h"
47 #include "hostname-setup.h"
48 #include "ima-setup.h"
50 #include "kmod-setup.h"
51 #include "limits-util.h"
52 #include "load-fragment.h"
54 #include "loopback-setup.h"
55 #include "machine-id-setup.h"
58 #include "mount-setup.h"
61 #include "parse-argument.h"
62 #include "parse-util.h"
63 #include "path-util.h"
64 #include "pretty-print.h"
65 #include "proc-cmdline.h"
66 #include "process-util.h"
67 #include "random-util.h"
68 #include "raw-clone.h"
69 #include "rlimit-util.h"
71 #include "seccomp-util.h"
73 #include "selinux-setup.h"
74 #include "selinux-util.h"
75 #include "signal-util.h"
76 #include "smack-setup.h"
78 #include "stat-util.h"
79 #include "stdio-util.h"
81 #include "switch-root.h"
82 #include "sysctl-util.h"
83 #include "terminal-util.h"
84 #include "umask-util.h"
85 #include "user-util.h"
90 #if HAS_FEATURE_ADDRESS_SANITIZER
91 #include <sanitizer/lsan_interface.h>
94 #define DEFAULT_TASKS_MAX ((TasksMax) { 15U, 100U }) /* 15% */
101 ACTION_DUMP_CONFIGURATION_ITEMS
,
102 ACTION_DUMP_BUS_PROPERTIES
,
103 ACTION_BUS_INTROSPECT
,
104 } arg_action
= ACTION_RUN
;
106 static const char *arg_bus_introspect
= NULL
;
108 /* Those variables are initialized to 0 automatically, so we avoid uninitialized memory access. Real
109 * defaults are assigned in reset_arguments() below. */
110 static char *arg_default_unit
;
111 static bool arg_system
;
112 static bool arg_dump_core
;
113 static int arg_crash_chvt
;
114 static bool arg_crash_shell
;
115 static bool arg_crash_reboot
;
116 static char *arg_confirm_spawn
;
117 static ShowStatus arg_show_status
;
118 static StatusUnitFormat arg_status_unit_format
;
119 static bool arg_switched_root
;
120 static PagerFlags arg_pager_flags
;
121 static bool arg_service_watchdogs
;
122 static ExecOutput arg_default_std_output
;
123 static ExecOutput arg_default_std_error
;
124 static usec_t arg_default_restart_usec
;
125 static usec_t arg_default_timeout_start_usec
;
126 static usec_t arg_default_timeout_stop_usec
;
127 static usec_t arg_default_timeout_abort_usec
;
128 static bool arg_default_timeout_abort_set
;
129 static usec_t arg_default_start_limit_interval
;
130 static unsigned arg_default_start_limit_burst
;
131 static usec_t arg_runtime_watchdog
;
132 static usec_t arg_reboot_watchdog
;
133 static usec_t arg_kexec_watchdog
;
134 static char *arg_early_core_pattern
;
135 static char *arg_watchdog_device
;
136 static char **arg_default_environment
;
137 static struct rlimit
*arg_default_rlimit
[_RLIMIT_MAX
];
138 static uint64_t arg_capability_bounding_set
;
139 static bool arg_no_new_privs
;
140 static nsec_t arg_timer_slack_nsec
;
141 static usec_t arg_default_timer_accuracy_usec
;
142 static Set
* arg_syscall_archs
;
143 static FILE* arg_serialization
;
144 static int arg_default_cpu_accounting
;
145 static bool arg_default_io_accounting
;
146 static bool arg_default_ip_accounting
;
147 static bool arg_default_blockio_accounting
;
148 static bool arg_default_memory_accounting
;
149 static bool arg_default_tasks_accounting
;
150 static TasksMax arg_default_tasks_max
;
151 static sd_id128_t arg_machine_id
;
152 static EmergencyAction arg_cad_burst_action
;
153 static OOMPolicy arg_default_oom_policy
;
154 static CPUSet arg_cpu_affinity
;
155 static NUMAPolicy arg_numa_policy
;
156 static usec_t arg_clock_usec
;
157 static void *arg_random_seed
;
158 static size_t arg_random_seed_size
;
160 /* A copy of the original environment block */
161 static char **saved_env
= NULL
;
163 static int parse_configuration(const struct rlimit
*saved_rlimit_nofile
,
164 const struct rlimit
*saved_rlimit_memlock
);
166 _noreturn_
static void freeze_or_exit_or_reboot(void) {
168 /* If we are running in a container, let's prefer exiting, after all we can propagate an exit code to
169 * the container manager, and thus inform it that something went wrong. */
170 if (detect_container() > 0) {
171 log_emergency("Exiting PID 1...");
172 _exit(EXIT_EXCEPTION
);
175 if (arg_crash_reboot
) {
176 log_notice("Rebooting in 10s...");
179 log_notice("Rebooting now...");
180 (void) reboot(RB_AUTOBOOT
);
181 log_emergency_errno(errno
, "Failed to reboot: %m");
184 log_emergency("Freezing execution.");
188 _noreturn_
static void crash(int sig
) {
192 if (getpid_cached() != 1)
193 /* Pass this on immediately, if this is not PID 1 */
195 else if (!arg_dump_core
)
196 log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig
));
198 sa
= (struct sigaction
) {
199 .sa_handler
= nop_signal_handler
,
200 .sa_flags
= SA_NOCLDSTOP
|SA_RESTART
,
203 /* We want to wait for the core process, hence let's enable SIGCHLD */
204 (void) sigaction(SIGCHLD
, &sa
, NULL
);
206 pid
= raw_clone(SIGCHLD
);
208 log_emergency_errno(errno
, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig
));
210 /* Enable default signal handler for core dump */
212 sa
= (struct sigaction
) {
213 .sa_handler
= SIG_DFL
,
215 (void) sigaction(sig
, &sa
, NULL
);
217 /* Don't limit the coredump size */
218 (void) setrlimit(RLIMIT_CORE
, &RLIMIT_MAKE_CONST(RLIM_INFINITY
));
220 /* Just to be sure... */
223 /* Raise the signal again */
225 (void) kill(pid
, sig
); /* raise() would kill the parent */
227 assert_not_reached("We shouldn't be here...");
228 _exit(EXIT_EXCEPTION
);
233 /* Order things nicely. */
234 r
= wait_for_terminate(pid
, &status
);
236 log_emergency_errno(r
, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig
));
237 else if (status
.si_code
!= CLD_DUMPED
) {
238 const char *s
= status
.si_code
== CLD_EXITED
239 ? exit_status_to_string(status
.si_status
, EXIT_STATUS_LIBC
)
240 : signal_to_string(status
.si_status
);
242 log_emergency("Caught <%s>, core dump failed (child "PID_FMT
", code=%s, status=%i/%s).",
243 signal_to_string(sig
),
245 sigchld_code_to_string(status
.si_code
),
246 status
.si_status
, strna(s
));
248 log_emergency("Caught <%s>, dumped core as pid "PID_FMT
".",
249 signal_to_string(sig
), pid
);
253 if (arg_crash_chvt
>= 0)
254 (void) chvt(arg_crash_chvt
);
256 sa
= (struct sigaction
) {
257 .sa_handler
= SIG_IGN
,
258 .sa_flags
= SA_NOCLDSTOP
|SA_NOCLDWAIT
|SA_RESTART
,
261 /* Let the kernel reap children for us */
262 (void) sigaction(SIGCHLD
, &sa
, NULL
);
264 if (arg_crash_shell
) {
265 log_notice("Executing crash shell in 10s...");
268 pid
= raw_clone(SIGCHLD
);
270 log_emergency_errno(errno
, "Failed to fork off crash shell: %m");
273 (void) make_console_stdio();
274 (void) rlimit_nofile_safe();
275 (void) execle("/bin/sh", "/bin/sh", NULL
, environ
);
277 log_emergency_errno(errno
, "execle() failed: %m");
278 _exit(EXIT_EXCEPTION
);
280 log_info("Spawned crash shell as PID "PID_FMT
".", pid
);
281 (void) wait_for_terminate(pid
, NULL
);
285 freeze_or_exit_or_reboot();
288 static void install_crash_handler(void) {
289 static const struct sigaction sa
= {
291 .sa_flags
= SA_NODEFER
, /* So that we can raise the signal again from the signal handler */
295 /* We ignore the return value here, since, we don't mind if we
296 * cannot set up a crash handler */
297 r
= sigaction_many(&sa
, SIGNALS_CRASH_HANDLER
, -1);
299 log_debug_errno(r
, "I had trouble setting up the crash handler, ignoring: %m");
302 static int console_setup(void) {
303 _cleanup_close_
int tty_fd
= -1;
306 tty_fd
= open_terminal("/dev/console", O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
308 return log_error_errno(tty_fd
, "Failed to open /dev/console: %m");
310 /* We don't want to force text mode. plymouth may be showing
311 * pictures already from initrd. */
312 r
= reset_terminal_fd(tty_fd
, false);
314 return log_error_errno(r
, "Failed to reset /dev/console: %m");
319 static int set_machine_id(const char *m
) {
323 if (sd_id128_from_string(m
, &t
) < 0)
326 if (sd_id128_is_null(t
))
333 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
338 if (STR_IN_SET(key
, "systemd.unit", "rd.systemd.unit")) {
340 if (proc_cmdline_value_missing(key
, value
))
343 if (!unit_name_is_valid(value
, UNIT_NAME_PLAIN
|UNIT_NAME_INSTANCE
))
344 log_warning("Unit name specified on %s= is not valid, ignoring: %s", key
, value
);
345 else if (in_initrd() == !!startswith(key
, "rd."))
346 return free_and_strdup_warn(&arg_default_unit
, value
);
348 } else if (proc_cmdline_key_streq(key
, "systemd.dump_core")) {
350 r
= value
? parse_boolean(value
) : true;
352 log_warning_errno(r
, "Failed to parse dump core switch %s, ignoring: %m", value
);
356 } else if (proc_cmdline_key_streq(key
, "systemd.early_core_pattern")) {
358 if (proc_cmdline_value_missing(key
, value
))
361 if (path_is_absolute(value
))
362 (void) parse_path_argument(value
, false, &arg_early_core_pattern
);
364 log_warning("Specified core pattern '%s' is not an absolute path, ignoring.", value
);
366 } else if (proc_cmdline_key_streq(key
, "systemd.crash_chvt")) {
369 arg_crash_chvt
= 0; /* turn on */
371 r
= parse_crash_chvt(value
, &arg_crash_chvt
);
373 log_warning_errno(r
, "Failed to parse crash chvt switch %s, ignoring: %m", value
);
376 } else if (proc_cmdline_key_streq(key
, "systemd.crash_shell")) {
378 r
= value
? parse_boolean(value
) : true;
380 log_warning_errno(r
, "Failed to parse crash shell switch %s, ignoring: %m", value
);
384 } else if (proc_cmdline_key_streq(key
, "systemd.crash_reboot")) {
386 r
= value
? parse_boolean(value
) : true;
388 log_warning_errno(r
, "Failed to parse crash reboot switch %s, ignoring: %m", value
);
390 arg_crash_reboot
= r
;
392 } else if (proc_cmdline_key_streq(key
, "systemd.confirm_spawn")) {
395 r
= parse_confirm_spawn(value
, &s
);
397 log_warning_errno(r
, "Failed to parse confirm_spawn switch %s, ignoring: %m", value
);
399 free_and_replace(arg_confirm_spawn
, s
);
401 } else if (proc_cmdline_key_streq(key
, "systemd.service_watchdogs")) {
403 r
= value
? parse_boolean(value
) : true;
405 log_warning_errno(r
, "Failed to parse service watchdog switch %s, ignoring: %m", value
);
407 arg_service_watchdogs
= r
;
409 } else if (proc_cmdline_key_streq(key
, "systemd.show_status")) {
412 r
= parse_show_status(value
, &arg_show_status
);
414 log_warning_errno(r
, "Failed to parse show status switch %s, ignoring: %m", value
);
416 arg_show_status
= SHOW_STATUS_YES
;
418 } else if (proc_cmdline_key_streq(key
, "systemd.status_unit_format")) {
420 if (proc_cmdline_value_missing(key
, value
))
423 r
= status_unit_format_from_string(value
);
425 log_warning_errno(r
, "Failed to parse %s=%s, ignoring: %m", key
, value
);
427 arg_status_unit_format
= r
;
429 } else if (proc_cmdline_key_streq(key
, "systemd.default_standard_output")) {
431 if (proc_cmdline_value_missing(key
, value
))
434 r
= exec_output_from_string(value
);
436 log_warning_errno(r
, "Failed to parse default standard output switch %s, ignoring: %m", value
);
438 arg_default_std_output
= r
;
440 } else if (proc_cmdline_key_streq(key
, "systemd.default_standard_error")) {
442 if (proc_cmdline_value_missing(key
, value
))
445 r
= exec_output_from_string(value
);
447 log_warning_errno(r
, "Failed to parse default standard error switch %s, ignoring: %m", value
);
449 arg_default_std_error
= r
;
451 } else if (streq(key
, "systemd.setenv")) {
453 if (proc_cmdline_value_missing(key
, value
))
456 if (!env_assignment_is_valid(value
))
457 log_warning("Environment variable assignment '%s' is not valid. Ignoring.", value
);
459 r
= strv_env_replace_strdup(&arg_default_environment
, value
);
464 } else if (proc_cmdline_key_streq(key
, "systemd.machine_id")) {
466 if (proc_cmdline_value_missing(key
, value
))
469 r
= set_machine_id(value
);
471 log_warning_errno(r
, "MachineID '%s' is not valid, ignoring: %m", value
);
473 } else if (proc_cmdline_key_streq(key
, "systemd.default_timeout_start_sec")) {
475 if (proc_cmdline_value_missing(key
, value
))
478 r
= parse_sec(value
, &arg_default_timeout_start_usec
);
480 log_warning_errno(r
, "Failed to parse default start timeout '%s', ignoring: %m", value
);
482 if (arg_default_timeout_start_usec
<= 0)
483 arg_default_timeout_start_usec
= USEC_INFINITY
;
485 } else if (proc_cmdline_key_streq(key
, "systemd.cpu_affinity")) {
487 if (proc_cmdline_value_missing(key
, value
))
490 r
= parse_cpu_set(value
, &arg_cpu_affinity
);
492 log_warning_errno(r
, "Failed to parse CPU affinity mask '%s', ignoring: %m", value
);
494 } else if (proc_cmdline_key_streq(key
, "systemd.watchdog_device")) {
496 if (proc_cmdline_value_missing(key
, value
))
499 (void) parse_path_argument(value
, false, &arg_watchdog_device
);
501 } else if (proc_cmdline_key_streq(key
, "systemd.clock_usec")) {
503 if (proc_cmdline_value_missing(key
, value
))
506 r
= safe_atou64(value
, &arg_clock_usec
);
508 log_warning_errno(r
, "Failed to parse systemd.clock_usec= argument, ignoring: %s", value
);
510 } else if (proc_cmdline_key_streq(key
, "systemd.random_seed")) {
514 if (proc_cmdline_value_missing(key
, value
))
517 r
= unbase64mem(value
, (size_t) -1, &p
, &sz
);
519 log_warning_errno(r
, "Failed to parse systemd.random_seed= argument, ignoring: %s", value
);
521 free(arg_random_seed
);
522 arg_random_seed
= sz
> 0 ? p
: mfree(p
);
523 arg_random_seed_size
= sz
;
525 } else if (streq(key
, "quiet") && !value
) {
527 if (arg_show_status
== _SHOW_STATUS_INVALID
)
528 arg_show_status
= SHOW_STATUS_ERROR
;
530 } else if (streq(key
, "debug") && !value
) {
532 /* Note that log_parse_environment() handles 'debug'
533 * too, and sets the log level to LOG_DEBUG. */
535 if (detect_container() > 0)
536 log_set_target(LOG_TARGET_CONSOLE
);
541 /* Compatible with SysV, but supported independently even if SysV compatibility is disabled. */
542 target
= runlevel_to_target(key
);
544 return free_and_strdup_warn(&arg_default_unit
, target
);
550 #define DEFINE_SETTER(name, func, descr) \
551 static int name(const char *unit, \
552 const char *filename, \
554 const char *section, \
555 unsigned section_line, \
556 const char *lvalue, \
558 const char *rvalue, \
570 log_syntax(unit, LOG_ERR, filename, line, r, \
571 "Invalid " descr "'%s': %m", \
577 DEFINE_SETTER(config_parse_level2
, log_set_max_level_from_string
, "log level");
578 DEFINE_SETTER(config_parse_target
, log_set_target_from_string
, "target");
579 DEFINE_SETTER(config_parse_color
, log_show_color_from_string
, "color");
580 DEFINE_SETTER(config_parse_location
, log_show_location_from_string
, "location");
581 DEFINE_SETTER(config_parse_time
, log_show_time_from_string
, "time");
583 static int config_parse_default_timeout_abort(
585 const char *filename
,
588 unsigned section_line
,
596 r
= config_parse_timeout_abort(unit
, filename
, line
, section
, section_line
, lvalue
, ltype
, rvalue
,
597 &arg_default_timeout_abort_usec
, userdata
);
599 arg_default_timeout_abort_set
= r
;
603 static int parse_config_file(void) {
604 const ConfigTableItem items
[] = {
605 { "Manager", "LogLevel", config_parse_level2
, 0, NULL
},
606 { "Manager", "LogTarget", config_parse_target
, 0, NULL
},
607 { "Manager", "LogColor", config_parse_color
, 0, NULL
},
608 { "Manager", "LogLocation", config_parse_location
, 0, NULL
},
609 { "Manager", "LogTime", config_parse_time
, 0, NULL
},
610 { "Manager", "DumpCore", config_parse_bool
, 0, &arg_dump_core
},
611 { "Manager", "CrashChVT", /* legacy */ config_parse_crash_chvt
, 0, &arg_crash_chvt
},
612 { "Manager", "CrashChangeVT", config_parse_crash_chvt
, 0, &arg_crash_chvt
},
613 { "Manager", "CrashShell", config_parse_bool
, 0, &arg_crash_shell
},
614 { "Manager", "CrashReboot", config_parse_bool
, 0, &arg_crash_reboot
},
615 { "Manager", "ShowStatus", config_parse_show_status
, 0, &arg_show_status
},
616 { "Manager", "StatusUnitFormat", config_parse_status_unit_format
, 0, &arg_status_unit_format
},
617 { "Manager", "CPUAffinity", config_parse_cpu_affinity2
, 0, &arg_cpu_affinity
},
618 { "Manager", "NUMAPolicy", config_parse_numa_policy
, 0, &arg_numa_policy
.type
},
619 { "Manager", "NUMAMask", config_parse_numa_mask
, 0, &arg_numa_policy
},
620 { "Manager", "JoinControllers", config_parse_warn_compat
, DISABLED_CONFIGURATION
, NULL
},
621 { "Manager", "RuntimeWatchdogSec", config_parse_sec
, 0, &arg_runtime_watchdog
},
622 { "Manager", "RebootWatchdogSec", config_parse_sec
, 0, &arg_reboot_watchdog
},
623 { "Manager", "ShutdownWatchdogSec", config_parse_sec
, 0, &arg_reboot_watchdog
}, /* obsolete alias */
624 { "Manager", "KExecWatchdogSec", config_parse_sec
, 0, &arg_kexec_watchdog
},
625 { "Manager", "WatchdogDevice", config_parse_path
, 0, &arg_watchdog_device
},
626 { "Manager", "CapabilityBoundingSet", config_parse_capability_set
, 0, &arg_capability_bounding_set
},
627 { "Manager", "NoNewPrivileges", config_parse_bool
, 0, &arg_no_new_privs
},
629 { "Manager", "SystemCallArchitectures", config_parse_syscall_archs
, 0, &arg_syscall_archs
},
631 { "Manager", "TimerSlackNSec", config_parse_nsec
, 0, &arg_timer_slack_nsec
},
632 { "Manager", "DefaultTimerAccuracySec", config_parse_sec
, 0, &arg_default_timer_accuracy_usec
},
633 { "Manager", "DefaultStandardOutput", config_parse_output_restricted
, 0, &arg_default_std_output
},
634 { "Manager", "DefaultStandardError", config_parse_output_restricted
, 0, &arg_default_std_error
},
635 { "Manager", "DefaultTimeoutStartSec", config_parse_sec
, 0, &arg_default_timeout_start_usec
},
636 { "Manager", "DefaultTimeoutStopSec", config_parse_sec
, 0, &arg_default_timeout_stop_usec
},
637 { "Manager", "DefaultTimeoutAbortSec", config_parse_default_timeout_abort
, 0, NULL
},
638 { "Manager", "DefaultRestartSec", config_parse_sec
, 0, &arg_default_restart_usec
},
639 { "Manager", "DefaultStartLimitInterval", config_parse_sec
, 0, &arg_default_start_limit_interval
}, /* obsolete alias */
640 { "Manager", "DefaultStartLimitIntervalSec", config_parse_sec
, 0, &arg_default_start_limit_interval
},
641 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned
, 0, &arg_default_start_limit_burst
},
642 { "Manager", "DefaultEnvironment", config_parse_environ
, 0, &arg_default_environment
},
643 { "Manager", "DefaultLimitCPU", config_parse_rlimit
, RLIMIT_CPU
, arg_default_rlimit
},
644 { "Manager", "DefaultLimitFSIZE", config_parse_rlimit
, RLIMIT_FSIZE
, arg_default_rlimit
},
645 { "Manager", "DefaultLimitDATA", config_parse_rlimit
, RLIMIT_DATA
, arg_default_rlimit
},
646 { "Manager", "DefaultLimitSTACK", config_parse_rlimit
, RLIMIT_STACK
, arg_default_rlimit
},
647 { "Manager", "DefaultLimitCORE", config_parse_rlimit
, RLIMIT_CORE
, arg_default_rlimit
},
648 { "Manager", "DefaultLimitRSS", config_parse_rlimit
, RLIMIT_RSS
, arg_default_rlimit
},
649 { "Manager", "DefaultLimitNOFILE", config_parse_rlimit
, RLIMIT_NOFILE
, arg_default_rlimit
},
650 { "Manager", "DefaultLimitAS", config_parse_rlimit
, RLIMIT_AS
, arg_default_rlimit
},
651 { "Manager", "DefaultLimitNPROC", config_parse_rlimit
, RLIMIT_NPROC
, arg_default_rlimit
},
652 { "Manager", "DefaultLimitMEMLOCK", config_parse_rlimit
, RLIMIT_MEMLOCK
, arg_default_rlimit
},
653 { "Manager", "DefaultLimitLOCKS", config_parse_rlimit
, RLIMIT_LOCKS
, arg_default_rlimit
},
654 { "Manager", "DefaultLimitSIGPENDING", config_parse_rlimit
, RLIMIT_SIGPENDING
, arg_default_rlimit
},
655 { "Manager", "DefaultLimitMSGQUEUE", config_parse_rlimit
, RLIMIT_MSGQUEUE
, arg_default_rlimit
},
656 { "Manager", "DefaultLimitNICE", config_parse_rlimit
, RLIMIT_NICE
, arg_default_rlimit
},
657 { "Manager", "DefaultLimitRTPRIO", config_parse_rlimit
, RLIMIT_RTPRIO
, arg_default_rlimit
},
658 { "Manager", "DefaultLimitRTTIME", config_parse_rlimit
, RLIMIT_RTTIME
, arg_default_rlimit
},
659 { "Manager", "DefaultCPUAccounting", config_parse_tristate
, 0, &arg_default_cpu_accounting
},
660 { "Manager", "DefaultIOAccounting", config_parse_bool
, 0, &arg_default_io_accounting
},
661 { "Manager", "DefaultIPAccounting", config_parse_bool
, 0, &arg_default_ip_accounting
},
662 { "Manager", "DefaultBlockIOAccounting", config_parse_bool
, 0, &arg_default_blockio_accounting
},
663 { "Manager", "DefaultMemoryAccounting", config_parse_bool
, 0, &arg_default_memory_accounting
},
664 { "Manager", "DefaultTasksAccounting", config_parse_bool
, 0, &arg_default_tasks_accounting
},
665 { "Manager", "DefaultTasksMax", config_parse_tasks_max
, 0, &arg_default_tasks_max
},
666 { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action
, 0, &arg_cad_burst_action
},
667 { "Manager", "DefaultOOMPolicy", config_parse_oom_policy
, 0, &arg_default_oom_policy
},
671 const char *fn
, *conf_dirs_nulstr
;
674 PKGSYSCONFDIR
"/system.conf" :
675 PKGSYSCONFDIR
"/user.conf";
677 conf_dirs_nulstr
= arg_system
?
678 CONF_PATHS_NULSTR("systemd/system.conf.d") :
679 CONF_PATHS_NULSTR("systemd/user.conf.d");
681 (void) config_parse_many_nulstr(
682 fn
, conf_dirs_nulstr
,
684 config_item_table_lookup
, items
,
689 /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY
690 * like everywhere else. */
691 if (arg_default_timeout_start_usec
<= 0)
692 arg_default_timeout_start_usec
= USEC_INFINITY
;
693 if (arg_default_timeout_stop_usec
<= 0)
694 arg_default_timeout_stop_usec
= USEC_INFINITY
;
699 static void set_manager_defaults(Manager
*m
) {
703 /* Propagates the various default unit property settings into the manager object, i.e. properties that do not
704 * affect the manager itself, but are just what newly allocated units will have set if they haven't set
705 * anything else. (Also see set_manager_settings() for the settings that affect the manager's own behaviour) */
707 m
->default_timer_accuracy_usec
= arg_default_timer_accuracy_usec
;
708 m
->default_std_output
= arg_default_std_output
;
709 m
->default_std_error
= arg_default_std_error
;
710 m
->default_timeout_start_usec
= arg_default_timeout_start_usec
;
711 m
->default_timeout_stop_usec
= arg_default_timeout_stop_usec
;
712 m
->default_timeout_abort_usec
= arg_default_timeout_abort_usec
;
713 m
->default_timeout_abort_set
= arg_default_timeout_abort_set
;
714 m
->default_restart_usec
= arg_default_restart_usec
;
715 m
->default_start_limit_interval
= arg_default_start_limit_interval
;
716 m
->default_start_limit_burst
= arg_default_start_limit_burst
;
718 /* On 4.15+ with unified hierarchy, CPU accounting is essentially free as it doesn't require the CPU
719 * controller to be enabled, so the default is to enable it unless we got told otherwise. */
720 if (arg_default_cpu_accounting
>= 0)
721 m
->default_cpu_accounting
= arg_default_cpu_accounting
;
723 m
->default_cpu_accounting
= cpu_accounting_is_cheap();
725 m
->default_io_accounting
= arg_default_io_accounting
;
726 m
->default_ip_accounting
= arg_default_ip_accounting
;
727 m
->default_blockio_accounting
= arg_default_blockio_accounting
;
728 m
->default_memory_accounting
= arg_default_memory_accounting
;
729 m
->default_tasks_accounting
= arg_default_tasks_accounting
;
730 m
->default_tasks_max
= arg_default_tasks_max
;
731 m
->default_oom_policy
= arg_default_oom_policy
;
733 (void) manager_set_default_rlimits(m
, arg_default_rlimit
);
735 (void) manager_default_environment(m
);
736 (void) manager_transient_environment_add(m
, arg_default_environment
);
739 static void set_manager_settings(Manager
*m
) {
743 /* Propagates the various manager settings into the manager object, i.e. properties that
744 * effect the manager itself (as opposed to just being inherited into newly allocated
745 * units, see set_manager_defaults() above). */
747 m
->confirm_spawn
= arg_confirm_spawn
;
748 m
->service_watchdogs
= arg_service_watchdogs
;
749 m
->cad_burst_action
= arg_cad_burst_action
;
751 manager_set_watchdog(m
, WATCHDOG_RUNTIME
, arg_runtime_watchdog
);
752 manager_set_watchdog(m
, WATCHDOG_REBOOT
, arg_reboot_watchdog
);
753 manager_set_watchdog(m
, WATCHDOG_KEXEC
, arg_kexec_watchdog
);
755 manager_set_show_status(m
, arg_show_status
, "commandline");
756 m
->status_unit_format
= arg_status_unit_format
;
759 static int parse_argv(int argc
, char *argv
[]) {
761 ARG_LOG_LEVEL
= 0x100,
772 ARG_DUMP_CONFIGURATION_ITEMS
,
773 ARG_DUMP_BUS_PROPERTIES
,
783 ARG_DEFAULT_STD_OUTPUT
,
784 ARG_DEFAULT_STD_ERROR
,
786 ARG_SERVICE_WATCHDOGS
,
789 static const struct option options
[] = {
790 { "log-level", required_argument
, NULL
, ARG_LOG_LEVEL
},
791 { "log-target", required_argument
, NULL
, ARG_LOG_TARGET
},
792 { "log-color", optional_argument
, NULL
, ARG_LOG_COLOR
},
793 { "log-location", optional_argument
, NULL
, ARG_LOG_LOCATION
},
794 { "log-time", optional_argument
, NULL
, ARG_LOG_TIME
},
795 { "unit", required_argument
, NULL
, ARG_UNIT
},
796 { "system", no_argument
, NULL
, ARG_SYSTEM
},
797 { "user", no_argument
, NULL
, ARG_USER
},
798 { "test", no_argument
, NULL
, ARG_TEST
},
799 { "no-pager", no_argument
, NULL
, ARG_NO_PAGER
},
800 { "help", no_argument
, NULL
, 'h' },
801 { "version", no_argument
, NULL
, ARG_VERSION
},
802 { "dump-configuration-items", no_argument
, NULL
, ARG_DUMP_CONFIGURATION_ITEMS
},
803 { "dump-bus-properties", no_argument
, NULL
, ARG_DUMP_BUS_PROPERTIES
},
804 { "bus-introspect", required_argument
, NULL
, ARG_BUS_INTROSPECT
},
805 { "dump-core", optional_argument
, NULL
, ARG_DUMP_CORE
},
806 { "crash-chvt", required_argument
, NULL
, ARG_CRASH_CHVT
},
807 { "crash-shell", optional_argument
, NULL
, ARG_CRASH_SHELL
},
808 { "crash-reboot", optional_argument
, NULL
, ARG_CRASH_REBOOT
},
809 { "confirm-spawn", optional_argument
, NULL
, ARG_CONFIRM_SPAWN
},
810 { "show-status", optional_argument
, NULL
, ARG_SHOW_STATUS
},
811 { "deserialize", required_argument
, NULL
, ARG_DESERIALIZE
},
812 { "switched-root", no_argument
, NULL
, ARG_SWITCHED_ROOT
},
813 { "default-standard-output", required_argument
, NULL
, ARG_DEFAULT_STD_OUTPUT
, },
814 { "default-standard-error", required_argument
, NULL
, ARG_DEFAULT_STD_ERROR
, },
815 { "machine-id", required_argument
, NULL
, ARG_MACHINE_ID
},
816 { "service-watchdogs", required_argument
, NULL
, ARG_SERVICE_WATCHDOGS
},
821 bool user_arg_seen
= false;
826 if (getpid_cached() == 1)
829 while ((c
= getopt_long(argc
, argv
, "hDbsz:", options
, NULL
)) >= 0)
834 r
= log_set_max_level_from_string(optarg
);
836 return log_error_errno(r
, "Failed to parse log level \"%s\": %m", optarg
);
841 r
= log_set_target_from_string(optarg
);
843 return log_error_errno(r
, "Failed to parse log target \"%s\": %m", optarg
);
850 r
= log_show_color_from_string(optarg
);
852 return log_error_errno(r
, "Failed to parse log color setting \"%s\": %m",
855 log_show_color(true);
859 case ARG_LOG_LOCATION
:
861 r
= log_show_location_from_string(optarg
);
863 return log_error_errno(r
, "Failed to parse log location setting \"%s\": %m",
866 log_show_location(true);
873 r
= log_show_time_from_string(optarg
);
875 return log_error_errno(r
, "Failed to parse log time setting \"%s\": %m",
882 case ARG_DEFAULT_STD_OUTPUT
:
883 r
= exec_output_from_string(optarg
);
885 return log_error_errno(r
, "Failed to parse default standard output setting \"%s\": %m",
887 arg_default_std_output
= r
;
890 case ARG_DEFAULT_STD_ERROR
:
891 r
= exec_output_from_string(optarg
);
893 return log_error_errno(r
, "Failed to parse default standard error output setting \"%s\": %m",
895 arg_default_std_error
= r
;
899 r
= free_and_strdup(&arg_default_unit
, optarg
);
901 return log_error_errno(r
, "Failed to set default unit \"%s\": %m", optarg
);
911 user_arg_seen
= true;
915 arg_action
= ACTION_TEST
;
919 arg_pager_flags
|= PAGER_DISABLE
;
923 arg_action
= ACTION_VERSION
;
926 case ARG_DUMP_CONFIGURATION_ITEMS
:
927 arg_action
= ACTION_DUMP_CONFIGURATION_ITEMS
;
930 case ARG_DUMP_BUS_PROPERTIES
:
931 arg_action
= ACTION_DUMP_BUS_PROPERTIES
;
934 case ARG_BUS_INTROSPECT
:
935 arg_bus_introspect
= optarg
;
936 arg_action
= ACTION_BUS_INTROSPECT
;
941 arg_dump_core
= true;
943 r
= parse_boolean(optarg
);
945 return log_error_errno(r
, "Failed to parse dump core boolean: \"%s\": %m",
952 r
= parse_crash_chvt(optarg
, &arg_crash_chvt
);
954 return log_error_errno(r
, "Failed to parse crash virtual terminal index: \"%s\": %m",
958 case ARG_CRASH_SHELL
:
960 arg_crash_shell
= true;
962 r
= parse_boolean(optarg
);
964 return log_error_errno(r
, "Failed to parse crash shell boolean: \"%s\": %m",
970 case ARG_CRASH_REBOOT
:
972 arg_crash_reboot
= true;
974 r
= parse_boolean(optarg
);
976 return log_error_errno(r
, "Failed to parse crash shell boolean: \"%s\": %m",
978 arg_crash_reboot
= r
;
982 case ARG_CONFIRM_SPAWN
:
983 arg_confirm_spawn
= mfree(arg_confirm_spawn
);
985 r
= parse_confirm_spawn(optarg
, &arg_confirm_spawn
);
987 return log_error_errno(r
, "Failed to parse confirm spawn option: \"%s\": %m",
991 case ARG_SERVICE_WATCHDOGS
:
992 r
= parse_boolean(optarg
);
994 return log_error_errno(r
, "Failed to parse service watchdogs boolean: \"%s\": %m",
996 arg_service_watchdogs
= r
;
999 case ARG_SHOW_STATUS
:
1001 r
= parse_show_status(optarg
, &arg_show_status
);
1003 return log_error_errno(r
, "Failed to parse show status boolean: \"%s\": %m",
1006 arg_show_status
= SHOW_STATUS_YES
;
1009 case ARG_DESERIALIZE
: {
1013 r
= safe_atoi(optarg
, &fd
);
1015 log_error_errno(r
, "Failed to parse deserialize option \"%s\": %m", optarg
);
1017 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
1018 "Invalid deserialize fd: %d",
1021 (void) fd_cloexec(fd
, true);
1023 f
= fdopen(fd
, "r");
1025 return log_error_errno(errno
, "Failed to open serialization fd %d: %m", fd
);
1027 safe_fclose(arg_serialization
);
1028 arg_serialization
= f
;
1033 case ARG_SWITCHED_ROOT
:
1034 arg_switched_root
= true;
1037 case ARG_MACHINE_ID
:
1038 r
= set_machine_id(optarg
);
1040 return log_error_errno(r
, "MachineID '%s' is not valid: %m", optarg
);
1044 arg_action
= ACTION_HELP
;
1048 log_set_max_level(LOG_DEBUG
);
1054 /* Just to eat away the sysvinit kernel cmdline args that we'll parse in
1055 * parse_proc_cmdline_item() or ignore, without any getopt() error messages.
1058 if (getpid_cached() != 1)
1064 assert_not_reached("Unhandled option code.");
1067 if (optind
< argc
&& getpid_cached() != 1)
1068 /* Hmm, when we aren't run as init system let's complain about excess arguments */
1069 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "Excess arguments.");
1071 if (arg_action
== ACTION_RUN
&& !arg_system
&& !user_arg_seen
)
1072 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
1073 "Explicit --user argument required to run as user manager.");
1078 static int help(void) {
1079 _cleanup_free_
char *link
= NULL
;
1082 r
= terminal_urlify_man("systemd", "1", &link
);
1086 printf("%s [OPTIONS...]\n\n"
1087 "%sStarts and monitors system and user services.%s\n\n"
1088 "This program takes no positional arguments.\n\n"
1090 " -h --help Show this help\n"
1091 " --version Show version\n"
1092 " --test Determine initial transaction, dump it and exit\n"
1093 " --system In combination with --test: operate as system service manager\n"
1094 " --user In combination with --test: operate as per-user service manager\n"
1095 " --no-pager Do not pipe output into a pager\n"
1096 " --dump-configuration-items Dump understood unit configuration items\n"
1097 " --dump-bus-properties Dump exposed bus properties\n"
1098 " --bus-introspect=PATH Write XML introspection data\n"
1099 " --unit=UNIT Set default unit\n"
1100 " --dump-core[=BOOL] Dump core on crash\n"
1101 " --crash-vt=NR Change to specified VT on crash\n"
1102 " --crash-reboot[=BOOL] Reboot on crash\n"
1103 " --crash-shell[=BOOL] Run shell on crash\n"
1104 " --confirm-spawn[=BOOL] Ask for confirmation when spawning processes\n"
1105 " --show-status[=BOOL] Show status updates on the console during bootup\n"
1106 " --log-target=TARGET Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
1107 " --log-level=LEVEL Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1108 " --log-color[=BOOL] Highlight important log messages\n"
1109 " --log-location[=BOOL] Include code location in log messages\n"
1110 " --log-time[=BOOL] Prefix log messages with current time\n"
1111 " --default-standard-output= Set default standard output for services\n"
1112 " --default-standard-error= Set default standard error output for services\n"
1113 "\nSee the %s for details.\n",
1114 program_invocation_short_name
,
1124 static int prepare_reexecute(
1128 bool switching_root
) {
1130 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1131 _cleanup_fclose_
FILE *f
= NULL
;
1138 r
= manager_open_serialization(m
, &f
);
1140 return log_error_errno(r
, "Failed to create serialization file: %m");
1142 /* Make sure nothing is really destructed when we shut down */
1144 bus_manager_send_reloading(m
, true);
1150 r
= manager_serialize(m
, f
, fds
, switching_root
);
1154 if (fseeko(f
, 0, SEEK_SET
) == (off_t
) -1)
1155 return log_error_errno(errno
, "Failed to rewind serialization fd: %m");
1157 r
= fd_cloexec(fileno(f
), false);
1159 return log_error_errno(r
, "Failed to disable O_CLOEXEC for serialization: %m");
1161 r
= fdset_cloexec(fds
, false);
1163 return log_error_errno(r
, "Failed to disable O_CLOEXEC for serialization fds: %m");
1165 *ret_f
= TAKE_PTR(f
);
1166 *ret_fds
= TAKE_PTR(fds
);
1171 static void bump_file_max_and_nr_open(void) {
1173 /* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large numbers of file
1174 * descriptors are no longer a performance problem and their memory is properly tracked by memcg, thus counting
1175 * them and limiting them in another two layers of limits is unnecessary and just complicates things. This
1176 * function hence turns off 2 of the 4 levels of limits on file descriptors, and makes RLIMIT_NOLIMIT (soft +
1177 * hard) the only ones that really matter. */
1179 #if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN
1183 #if BUMP_PROC_SYS_FS_FILE_MAX
1184 /* The maximum the kernel allows for this since 5.2 is LONG_MAX, use that. (Previously thing where
1185 * different but the operation would fail silently.) */
1186 r
= sysctl_writef("fs/file-max", "%li\n", LONG_MAX
);
1188 log_full_errno(IN_SET(r
, -EROFS
, -EPERM
, -EACCES
) ? LOG_DEBUG
: LOG_WARNING
, r
, "Failed to bump fs.file-max, ignoring: %m");
1191 #if BUMP_PROC_SYS_FS_NR_OPEN
1194 /* Arg! The kernel enforces maximum and minimum values on the fs.nr_open, but we don't really know what they
1195 * are. The expression by which the maximum is determined is dependent on the architecture, and is something we
1196 * don't really want to copy to userspace, as it is dependent on implementation details of the kernel. Since
1197 * the kernel doesn't expose the maximum value to us, we can only try and hope. Hence, let's start with
1198 * INT_MAX, and then keep halving the value until we find one that works. Ugly? Yes, absolutely, but kernel
1199 * APIs are kernel APIs, so what do can we do... 🤯 */
1204 v
&= ~(__SIZEOF_POINTER__
- 1); /* Round down to next multiple of the pointer size */
1206 log_warning("Can't bump fs.nr_open, value too small.");
1212 log_error_errno(k
, "Failed to read fs.nr_open: %m");
1215 if (k
>= v
) { /* Already larger */
1216 log_debug("Skipping bump, value is already larger.");
1220 r
= sysctl_writef("fs/nr_open", "%i\n", v
);
1222 log_debug("Couldn't write fs.nr_open as %i, halving it.", v
);
1227 log_full_errno(IN_SET(r
, -EROFS
, -EPERM
, -EACCES
) ? LOG_DEBUG
: LOG_WARNING
, r
, "Failed to bump fs.nr_open, ignoring: %m");
1231 log_debug("Successfully bumped fs.nr_open to %i", v
);
1237 static int bump_rlimit_nofile(struct rlimit
*saved_rlimit
) {
1238 struct rlimit new_rlimit
;
1241 /* Get the underlying absolute limit the kernel enforces */
1242 nr
= read_nr_open();
1244 /* Calculate the new limits to use for us. Never lower from what we inherited. */
1245 new_rlimit
= (struct rlimit
) {
1246 .rlim_cur
= MAX((rlim_t
) nr
, saved_rlimit
->rlim_cur
),
1247 .rlim_max
= MAX((rlim_t
) nr
, saved_rlimit
->rlim_max
),
1250 /* Shortcut if nothing changes. */
1251 if (saved_rlimit
->rlim_max
>= new_rlimit
.rlim_max
&&
1252 saved_rlimit
->rlim_cur
>= new_rlimit
.rlim_cur
) {
1253 log_debug("RLIMIT_NOFILE is already as high or higher than we need it, not bumping.");
1257 /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows, for
1258 * both hard and soft. */
1259 r
= setrlimit_closest(RLIMIT_NOFILE
, &new_rlimit
);
1261 return log_warning_errno(r
, "Setting RLIMIT_NOFILE failed, ignoring: %m");
1266 static int bump_rlimit_memlock(struct rlimit
*saved_rlimit
) {
1267 struct rlimit new_rlimit
;
1271 /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even if we have CAP_IPC_LOCK which should
1272 * normally disable such checks. We need them to implement IPAddressAllow= and IPAddressDeny=, hence let's bump
1273 * the value high enough for our user. */
1275 /* Using MAX() on resource limits only is safe if RLIM_INFINITY is > 0. POSIX declares that rlim_t
1276 * must be unsigned, hence this is a given, but let's make this clear here. */
1277 assert_cc(RLIM_INFINITY
> 0);
1279 mm
= physical_memory() / 8; /* Let's scale how much we allow to be locked by the amount of physical
1280 * RAM. We allow an eighth to be locked by us, just to pick a value. */
1282 new_rlimit
= (struct rlimit
) {
1283 .rlim_cur
= MAX3(HIGH_RLIMIT_MEMLOCK
, saved_rlimit
->rlim_cur
, mm
),
1284 .rlim_max
= MAX3(HIGH_RLIMIT_MEMLOCK
, saved_rlimit
->rlim_max
, mm
),
1287 if (saved_rlimit
->rlim_max
>= new_rlimit
.rlim_cur
&&
1288 saved_rlimit
->rlim_cur
>= new_rlimit
.rlim_max
) {
1289 log_debug("RLIMIT_MEMLOCK is already as high or higher than we need it, not bumping.");
1293 r
= setrlimit_closest(RLIMIT_MEMLOCK
, &new_rlimit
);
1295 return log_warning_errno(r
, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
1300 static void test_usr(void) {
1302 /* Check that /usr is either on the same file system as / or mounted already. */
1304 if (dir_is_empty("/usr") <= 0)
1307 log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
1308 "Some things will probably break (sometimes even silently) in mysterious ways. "
1309 "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1312 static int enforce_syscall_archs(Set
*archs
) {
1316 if (!is_seccomp_available())
1319 r
= seccomp_restrict_archs(arg_syscall_archs
);
1321 return log_error_errno(r
, "Failed to enforce system call architecture restrication: %m");
1326 static int status_welcome(void) {
1327 _cleanup_free_
char *pretty_name
= NULL
, *ansi_color
= NULL
;
1330 if (!show_status_on(arg_show_status
))
1333 r
= parse_os_release(NULL
,
1334 "PRETTY_NAME", &pretty_name
,
1335 "ANSI_COLOR", &ansi_color
,
1338 log_full_errno(r
== -ENOENT
? LOG_DEBUG
: LOG_WARNING
, r
,
1339 "Failed to read os-release file, ignoring: %m");
1341 if (log_get_show_color())
1342 return status_printf(NULL
, 0,
1343 "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
1344 isempty(ansi_color
) ? "1" : ansi_color
,
1345 isempty(pretty_name
) ? "Linux" : pretty_name
);
1347 return status_printf(NULL
, 0,
1348 "\nWelcome to %s!\n",
1349 isempty(pretty_name
) ? "Linux" : pretty_name
);
1352 static int write_container_id(void) {
1356 c
= getenv("container");
1360 RUN_WITH_UMASK(0022)
1361 r
= write_string_file("/run/systemd/container", c
, WRITE_STRING_FILE_CREATE
);
1363 return log_warning_errno(r
, "Failed to write /run/systemd/container, ignoring: %m");
1368 static int bump_unix_max_dgram_qlen(void) {
1369 _cleanup_free_
char *qlen
= NULL
;
1373 /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel default of 16 is simply too low. We set the value
1374 * really really early during boot, so that it is actually applied to all our sockets, including the
1375 * $NOTIFY_SOCKET one. */
1377 r
= read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen
);
1379 return log_full_errno(r
== -ENOENT
? LOG_DEBUG
: LOG_WARNING
, r
, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
1381 r
= safe_atolu(qlen
, &v
);
1383 return log_warning_errno(r
, "Failed to parse AF_UNIX datagram queue length '%s', ignoring: %m", qlen
);
1385 if (v
>= DEFAULT_UNIX_MAX_DGRAM_QLEN
)
1388 r
= write_string_filef("/proc/sys/net/unix/max_dgram_qlen", WRITE_STRING_FILE_DISABLE_BUFFER
, "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN
);
1390 return log_full_errno(IN_SET(r
, -EROFS
, -EPERM
, -EACCES
) ? LOG_DEBUG
: LOG_WARNING
, r
,
1391 "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
1396 static int fixup_environment(void) {
1397 _cleanup_free_
char *term
= NULL
;
1401 /* Only fix up the environment when we are started as PID 1 */
1402 if (getpid_cached() != 1)
1405 /* We expect the environment to be set correctly if run inside a container. */
1406 if (detect_container() > 0)
1409 /* When started as PID1, the kernel uses /dev/console for our stdios and uses TERM=linux whatever the backend
1410 * device used by the console. We try to make a better guess here since some consoles might not have support
1411 * for color mode for example.
1413 * However if TERM was configured through the kernel command line then leave it alone. */
1414 r
= proc_cmdline_get_key("TERM", 0, &term
);
1418 t
= term
?: default_term_for_tty("/dev/console");
1420 if (setenv("TERM", t
, 1) < 0)
1423 /* The kernels sets HOME=/ for init. Let's undo this. */
1424 if (path_equal_ptr(getenv("HOME"), "/"))
1425 assert_se(unsetenv("HOME") == 0);
1430 static void redirect_telinit(int argc
, char *argv
[]) {
1432 /* This is compatibility support for SysV, where calling init as a user is identical to telinit. */
1434 #if HAVE_SYSV_COMPAT
1435 if (getpid_cached() == 1)
1438 if (!strstr(program_invocation_short_name
, "init"))
1441 execv(SYSTEMCTL_BINARY_PATH
, argv
);
1442 log_error_errno(errno
, "Failed to exec " SYSTEMCTL_BINARY_PATH
": %m");
1447 static int become_shutdown(
1448 const char *shutdown_verb
,
1451 char log_level
[DECIMAL_STR_MAX(int) + 1],
1452 exit_code
[DECIMAL_STR_MAX(uint8_t) + 1],
1453 timeout
[DECIMAL_STR_MAX(usec_t
) + 1];
1455 const char* command_line
[13] = {
1456 SYSTEMD_SHUTDOWN_BINARY_PATH
,
1458 "--timeout", timeout
,
1459 "--log-level", log_level
,
1463 _cleanup_strv_free_
char **env_block
= NULL
;
1466 usec_t watchdog_timer
= 0;
1468 assert(shutdown_verb
);
1469 assert(!command_line
[pos
]);
1470 env_block
= strv_copy(environ
);
1472 xsprintf(log_level
, "%d", log_get_max_level());
1473 xsprintf(timeout
, "%" PRI_USEC
"us", arg_default_timeout_stop_usec
);
1475 switch (log_get_target()) {
1477 case LOG_TARGET_KMSG
:
1478 case LOG_TARGET_JOURNAL_OR_KMSG
:
1479 case LOG_TARGET_SYSLOG_OR_KMSG
:
1480 command_line
[pos
++] = "kmsg";
1483 case LOG_TARGET_NULL
:
1484 command_line
[pos
++] = "null";
1487 case LOG_TARGET_CONSOLE
:
1489 command_line
[pos
++] = "console";
1493 if (log_get_show_color())
1494 command_line
[pos
++] = "--log-color";
1496 if (log_get_show_location())
1497 command_line
[pos
++] = "--log-location";
1499 if (log_get_show_time())
1500 command_line
[pos
++] = "--log-time";
1502 if (streq(shutdown_verb
, "exit")) {
1503 command_line
[pos
++] = "--exit-code";
1504 command_line
[pos
++] = exit_code
;
1505 xsprintf(exit_code
, "%d", retval
);
1508 assert(pos
< ELEMENTSOF(command_line
));
1510 if (streq(shutdown_verb
, "reboot"))
1511 watchdog_timer
= arg_reboot_watchdog
;
1512 else if (streq(shutdown_verb
, "kexec"))
1513 watchdog_timer
= arg_kexec_watchdog
;
1515 if (watchdog_timer
> 0 && watchdog_timer
!= USEC_INFINITY
) {
1519 /* If we reboot or kexec let's set the shutdown
1520 * watchdog and tell the shutdown binary to
1521 * repeatedly ping it */
1522 r
= watchdog_set_timeout(&watchdog_timer
);
1523 watchdog_close(r
< 0);
1525 /* Tell the binary how often to ping, ignore failure */
1526 if (asprintf(&e
, "WATCHDOG_USEC="USEC_FMT
, watchdog_timer
) > 0)
1527 (void) strv_consume(&env_block
, e
);
1529 if (arg_watchdog_device
&&
1530 asprintf(&e
, "WATCHDOG_DEVICE=%s", arg_watchdog_device
) > 0)
1531 (void) strv_consume(&env_block
, e
);
1533 watchdog_close(true);
1535 /* Avoid the creation of new processes forked by the
1536 * kernel; at this point, we will not listen to the
1538 if (detect_container() <= 0)
1539 (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER
);
1541 execve(SYSTEMD_SHUTDOWN_BINARY_PATH
, (char **) command_line
, env_block
);
1545 static void initialize_clock(void) {
1548 /* This is called very early on, before we parse the kernel command line or otherwise figure out why
1549 * we are running, but only once. */
1551 if (clock_is_localtime(NULL
) > 0) {
1555 * The very first call of settimeofday() also does a time warp in the kernel.
1557 * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to take care
1558 * of maintaining the RTC and do all adjustments. This matches the behavior of Windows, which leaves
1559 * the RTC alone if the registry tells that the RTC runs in UTC.
1561 r
= clock_set_timezone(&min
);
1563 log_error_errno(r
, "Failed to apply local time delta, ignoring: %m");
1565 log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min
);
1567 } else if (!in_initrd())
1569 * Do a dummy very first call to seal the kernel's time warp magic.
1571 * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with LOCAL, but the
1572 * real system could be set up that way. In such case, we need to delay the time-warp or the sealing
1573 * until we reach the real system.
1575 * Do no set the kernel's timezone. The concept of local time cannot be supported reliably, the time
1576 * will jump or be incorrect at every daylight saving time change. All kernel local time concepts will
1577 * be treated as UTC that way.
1579 (void) clock_reset_timewarp();
1581 r
= clock_apply_epoch();
1583 log_error_errno(r
, "Current system time is before build time, but cannot correct: %m");
1585 log_info("System time before build time, advancing clock.");
1588 static void apply_clock_update(void) {
1591 /* This is called later than initialize_clock(), i.e. after we parsed configuration files/kernel
1592 * command line and such. */
1594 if (arg_clock_usec
== 0)
1597 if (getpid_cached() != 1)
1600 if (clock_settime(CLOCK_REALTIME
, timespec_store(&ts
, arg_clock_usec
)) < 0)
1601 log_error_errno(errno
, "Failed to set system clock to time specified on kernel command line: %m");
1603 char buf
[FORMAT_TIMESTAMP_MAX
];
1605 log_info("Set system clock to %s, as specified on the kernel command line.",
1606 format_timestamp(buf
, sizeof(buf
), arg_clock_usec
));
1610 static void cmdline_take_random_seed(void) {
1614 if (arg_random_seed_size
== 0)
1617 if (getpid_cached() != 1)
1620 assert(arg_random_seed
);
1621 suggested
= random_pool_size();
1623 if (arg_random_seed_size
< suggested
)
1624 log_warning("Random seed specified on kernel command line has size %zu, but %zu bytes required to fill entropy pool.",
1625 arg_random_seed_size
, suggested
);
1627 r
= random_write_entropy(-1, arg_random_seed
, arg_random_seed_size
, true);
1629 log_warning_errno(r
, "Failed to credit entropy specified on kernel command line, ignoring: %m");
1633 log_notice("Successfully credited entropy passed on kernel command line.\n"
1634 "Note that the seed provided this way is accessible to unprivileged programs. This functionality should not be used outside of testing environments.");
1637 static void initialize_coredump(bool skip_setup
) {
1639 if (getpid_cached() != 1)
1642 /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
1643 * will process core dumps for system services by default. */
1644 if (setrlimit(RLIMIT_CORE
, &RLIMIT_MAKE_CONST(RLIM_INFINITY
)) < 0)
1645 log_warning_errno(errno
, "Failed to set RLIMIT_CORE: %m");
1647 /* But at the same time, turn off the core_pattern logic by default, so that no
1648 * coredumps are stored until the systemd-coredump tool is enabled via
1649 * sysctl. However it can be changed via the kernel command line later so core
1650 * dumps can still be generated during early startup and in initramfs. */
1652 disable_coredumps();
1656 static void initialize_core_pattern(bool skip_setup
) {
1659 if (skip_setup
|| !arg_early_core_pattern
)
1662 if (getpid_cached() != 1)
1665 r
= write_string_file("/proc/sys/kernel/core_pattern", arg_early_core_pattern
, WRITE_STRING_FILE_DISABLE_BUFFER
);
1667 log_warning_errno(r
, "Failed to write '%s' to /proc/sys/kernel/core_pattern, ignoring: %m", arg_early_core_pattern
);
1670 static void update_cpu_affinity(bool skip_setup
) {
1671 _cleanup_free_
char *mask
= NULL
;
1673 if (skip_setup
|| !arg_cpu_affinity
.set
)
1676 assert(arg_cpu_affinity
.allocated
> 0);
1678 mask
= cpu_set_to_string(&arg_cpu_affinity
);
1679 log_debug("Setting CPU affinity to %s.", strnull(mask
));
1681 if (sched_setaffinity(0, arg_cpu_affinity
.allocated
, arg_cpu_affinity
.set
) < 0)
1682 log_warning_errno(errno
, "Failed to set CPU affinity: %m");
1685 static void update_numa_policy(bool skip_setup
) {
1687 _cleanup_free_
char *nodes
= NULL
;
1688 const char * policy
= NULL
;
1690 if (skip_setup
|| !mpol_is_valid(numa_policy_get_type(&arg_numa_policy
)))
1693 if (DEBUG_LOGGING
) {
1694 policy
= mpol_to_string(numa_policy_get_type(&arg_numa_policy
));
1695 nodes
= cpu_set_to_range_string(&arg_numa_policy
.nodes
);
1696 log_debug("Setting NUMA policy to %s, with nodes %s.", strnull(policy
), strnull(nodes
));
1699 r
= apply_numa_policy(&arg_numa_policy
);
1700 if (r
== -EOPNOTSUPP
)
1701 log_debug_errno(r
, "NUMA support not available, ignoring.");
1703 log_warning_errno(r
, "Failed to set NUMA memory policy: %m");
1706 static void do_reexecute(
1709 const struct rlimit
*saved_rlimit_nofile
,
1710 const struct rlimit
*saved_rlimit_memlock
,
1712 const char *switch_root_dir
,
1713 const char *switch_root_init
,
1714 const char **ret_error_message
) {
1716 unsigned i
, j
, args_size
;
1720 assert(saved_rlimit_nofile
);
1721 assert(saved_rlimit_memlock
);
1722 assert(ret_error_message
);
1724 /* Close and disarm the watchdog, so that the new instance can reinitialize it, but doesn't get rebooted while
1726 watchdog_close(true);
1728 /* Reset RLIMIT_NOFILE + RLIMIT_MEMLOCK back to the kernel defaults, so that the new systemd can pass
1729 * the kernel default to its child processes */
1730 if (saved_rlimit_nofile
->rlim_cur
!= 0)
1731 (void) setrlimit(RLIMIT_NOFILE
, saved_rlimit_nofile
);
1732 if (saved_rlimit_memlock
->rlim_cur
!= RLIM_INFINITY
)
1733 (void) setrlimit(RLIMIT_MEMLOCK
, saved_rlimit_memlock
);
1735 if (switch_root_dir
) {
1736 /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
1737 * SIGCHLD for them after deserializing. */
1738 broadcast_signal(SIGTERM
, false, true, arg_default_timeout_stop_usec
);
1740 /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
1741 r
= switch_root(switch_root_dir
, "/mnt", true, MS_MOVE
);
1743 log_error_errno(r
, "Failed to switch root, trying to continue: %m");
1746 args_size
= MAX(6, argc
+1);
1747 args
= newa(const char*, args_size
);
1749 if (!switch_root_init
) {
1750 char sfd
[DECIMAL_STR_MAX(int) + 1];
1752 /* First try to spawn ourselves with the right path, and with full serialization. We do this only if
1753 * the user didn't specify an explicit init to spawn. */
1755 assert(arg_serialization
);
1758 xsprintf(sfd
, "%i", fileno(arg_serialization
));
1761 args
[i
++] = SYSTEMD_BINARY_PATH
;
1762 if (switch_root_dir
)
1763 args
[i
++] = "--switched-root";
1764 args
[i
++] = arg_system
? "--system" : "--user";
1765 args
[i
++] = "--deserialize";
1769 assert(i
<= args_size
);
1772 * We want valgrind to print its memory usage summary before reexecution. Valgrind won't do this is on
1773 * its own on exec(), but it will do it on exit(). Hence, to ensure we get a summary here, fork() off
1774 * a child, let it exit() cleanly, so that it prints the summary, and wait() for it in the parent,
1775 * before proceeding into the exec().
1777 valgrind_summary_hack();
1779 (void) execv(args
[0], (char* const*) args
);
1780 log_debug_errno(errno
, "Failed to execute our own binary, trying fallback: %m");
1783 /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and envp[]. (Well,
1784 * modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[], but let's hope that
1785 * doesn't matter.) */
1787 arg_serialization
= safe_fclose(arg_serialization
);
1788 fds
= fdset_free(fds
);
1790 /* Reopen the console */
1791 (void) make_console_stdio();
1793 for (j
= 1, i
= 1; j
< (unsigned) argc
; j
++)
1794 args
[i
++] = argv
[j
];
1796 assert(i
<= args_size
);
1798 /* Re-enable any blocked signals, especially important if we switch from initial ramdisk to init=... */
1799 (void) reset_all_signal_handlers();
1800 (void) reset_signal_mask();
1801 (void) rlimit_nofile_safe();
1803 if (switch_root_init
) {
1804 args
[0] = switch_root_init
;
1805 (void) execve(args
[0], (char* const*) args
, saved_env
);
1806 log_warning_errno(errno
, "Failed to execute configured init, trying fallback: %m");
1809 args
[0] = "/sbin/init";
1810 (void) execv(args
[0], (char* const*) args
);
1813 manager_status_printf(NULL
, STATUS_TYPE_EMERGENCY
,
1814 ANSI_HIGHLIGHT_RED
" !! " ANSI_NORMAL
,
1815 "Failed to execute /sbin/init");
1818 log_warning("No /sbin/init, trying fallback");
1820 args
[0] = "/bin/sh";
1822 (void) execve(args
[0], (char* const*) args
, saved_env
);
1823 log_error_errno(errno
, "Failed to execute /bin/sh, giving up: %m");
1825 log_warning_errno(r
, "Failed to execute /sbin/init, giving up: %m");
1827 *ret_error_message
= "Failed to execute fallback shell";
1830 static int invoke_main_loop(
1832 const struct rlimit
*saved_rlimit_nofile
,
1833 const struct rlimit
*saved_rlimit_memlock
,
1834 bool *ret_reexecute
,
1835 int *ret_retval
, /* Return parameters relevant for shutting down */
1836 const char **ret_shutdown_verb
, /* … */
1837 FDSet
**ret_fds
, /* Return parameters for reexecuting */
1838 char **ret_switch_root_dir
, /* … */
1839 char **ret_switch_root_init
, /* … */
1840 const char **ret_error_message
) {
1845 assert(saved_rlimit_nofile
);
1846 assert(saved_rlimit_memlock
);
1847 assert(ret_reexecute
);
1849 assert(ret_shutdown_verb
);
1851 assert(ret_switch_root_dir
);
1852 assert(ret_switch_root_init
);
1853 assert(ret_error_message
);
1856 r
= manager_loop(m
);
1858 *ret_error_message
= "Failed to run main loop";
1859 return log_emergency_errno(r
, "Failed to run main loop: %m");
1862 switch ((ManagerObjective
) r
) {
1864 case MANAGER_RELOAD
: {
1865 LogTarget saved_log_target
;
1866 int saved_log_level
;
1868 log_info("Reloading.");
1870 /* First, save any overridden log level/target, then parse the configuration file, which might
1871 * change the log level to new settings. */
1873 saved_log_level
= m
->log_level_overridden
? log_get_max_level() : -1;
1874 saved_log_target
= m
->log_target_overridden
? log_get_target() : _LOG_TARGET_INVALID
;
1876 (void) parse_configuration(saved_rlimit_nofile
, saved_rlimit_memlock
);
1878 set_manager_defaults(m
);
1879 set_manager_settings(m
);
1881 update_cpu_affinity(false);
1882 update_numa_policy(false);
1884 if (saved_log_level
>= 0)
1885 manager_override_log_level(m
, saved_log_level
);
1886 if (saved_log_target
>= 0)
1887 manager_override_log_target(m
, saved_log_target
);
1889 r
= manager_reload(m
);
1891 /* Reloading failed before the point of no return. Let's continue running as if nothing happened. */
1892 m
->objective
= MANAGER_OK
;
1897 case MANAGER_REEXECUTE
:
1899 r
= prepare_reexecute(m
, &arg_serialization
, ret_fds
, false);
1901 *ret_error_message
= "Failed to prepare for reexecution";
1905 log_notice("Reexecuting.");
1907 *ret_reexecute
= true;
1908 *ret_retval
= EXIT_SUCCESS
;
1909 *ret_shutdown_verb
= NULL
;
1910 *ret_switch_root_dir
= *ret_switch_root_init
= NULL
;
1914 case MANAGER_SWITCH_ROOT
:
1915 if (!m
->switch_root_init
) {
1916 r
= prepare_reexecute(m
, &arg_serialization
, ret_fds
, true);
1918 *ret_error_message
= "Failed to prepare for reexecution";
1924 log_notice("Switching root.");
1926 *ret_reexecute
= true;
1927 *ret_retval
= EXIT_SUCCESS
;
1928 *ret_shutdown_verb
= NULL
;
1930 /* Steal the switch root parameters */
1931 *ret_switch_root_dir
= TAKE_PTR(m
->switch_root
);
1932 *ret_switch_root_init
= TAKE_PTR(m
->switch_root_init
);
1938 if (MANAGER_IS_USER(m
)) {
1941 *ret_reexecute
= false;
1942 *ret_retval
= m
->return_value
;
1943 *ret_shutdown_verb
= NULL
;
1945 *ret_switch_root_dir
= *ret_switch_root_init
= NULL
;
1951 case MANAGER_REBOOT
:
1952 case MANAGER_POWEROFF
:
1954 case MANAGER_KEXEC
: {
1955 static const char * const table
[_MANAGER_OBJECTIVE_MAX
] = {
1956 [MANAGER_EXIT
] = "exit",
1957 [MANAGER_REBOOT
] = "reboot",
1958 [MANAGER_POWEROFF
] = "poweroff",
1959 [MANAGER_HALT
] = "halt",
1960 [MANAGER_KEXEC
] = "kexec",
1963 log_notice("Shutting down.");
1965 *ret_reexecute
= false;
1966 *ret_retval
= m
->return_value
;
1967 assert_se(*ret_shutdown_verb
= table
[m
->objective
]);
1969 *ret_switch_root_dir
= *ret_switch_root_init
= NULL
;
1975 assert_not_reached("Unknown or unexpected manager objective.");
1980 static void log_execution_mode(bool *ret_first_boot
) {
1981 assert(ret_first_boot
);
1986 log_info("systemd " GIT_VERSION
" running in %ssystem mode. (%s)",
1987 arg_action
== ACTION_TEST
? "test " : "",
1990 v
= detect_virtualization();
1992 log_info("Detected virtualization %s.", virtualization_to_string(v
));
1994 log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
1997 *ret_first_boot
= false;
1998 log_info("Running in initial RAM disk.");
2001 _cleanup_free_
char *id_text
= NULL
;
2003 /* Let's check whether we are in first boot. We use /etc/machine-id as flag file
2004 * for this: If it is missing or contains the value "uninitialized", this is the
2005 * first boot. In any other case, it is not. This allows container managers and
2006 * installers to provision a couple of files already. If the container manager
2007 * wants to provision the machine ID itself it should pass $container_uuid to PID 1. */
2009 r
= read_one_line_file("/etc/machine-id", &id_text
);
2010 if (r
< 0 || streq(id_text
, "uninitialized")) {
2011 if (r
< 0 && r
!= -ENOENT
)
2012 log_warning_errno(r
, "Unexpected error while reading /etc/machine-id, ignoring: %m");
2014 *ret_first_boot
= true;
2015 log_info("Detected first boot.");
2017 *ret_first_boot
= false;
2018 log_debug("Detected initialized system, this is not the first boot.");
2022 if (DEBUG_LOGGING
) {
2023 _cleanup_free_
char *t
;
2025 t
= uid_to_name(getuid());
2026 log_debug("systemd " GIT_VERSION
" running in %suser mode for user " UID_FMT
"/%s. (%s)",
2027 arg_action
== ACTION_TEST
? " test" : "",
2028 getuid(), strna(t
), systemd_features
);
2031 *ret_first_boot
= false;
2035 static int initialize_runtime(
2038 struct rlimit
*saved_rlimit_nofile
,
2039 struct rlimit
*saved_rlimit_memlock
,
2040 const char **ret_error_message
) {
2043 assert(ret_error_message
);
2045 /* Sets up various runtime parameters. Many of these initializations are conditionalized:
2047 * - Some only apply to --system instances
2048 * - Some only apply to --user instances
2049 * - Some only apply when we first start up, but not when we reexecute
2052 if (arg_action
!= ACTION_RUN
)
2055 update_cpu_affinity(skip_setup
);
2056 update_numa_policy(skip_setup
);
2059 /* Make sure we leave a core dump without panicking the kernel. */
2060 install_crash_handler();
2063 r
= mount_cgroup_controllers();
2065 *ret_error_message
= "Failed to mount cgroup hierarchies";
2070 (void) hostname_setup(true);
2071 /* Force transient machine-id on first boot. */
2072 machine_id_setup(NULL
, first_boot
, arg_machine_id
, NULL
);
2073 (void) loopback_setup();
2074 bump_unix_max_dgram_qlen();
2075 bump_file_max_and_nr_open();
2077 write_container_id();
2080 if (arg_watchdog_device
) {
2081 r
= watchdog_set_device(arg_watchdog_device
);
2083 log_warning_errno(r
, "Failed to set watchdog device to %s, ignoring: %m", arg_watchdog_device
);
2086 _cleanup_free_
char *p
= NULL
;
2088 /* Create the runtime directory and place the inaccessible device nodes there, if we run in
2089 * user mode. In system mode mount_setup() already did that. */
2091 r
= xdg_user_runtime_dir(&p
, "/systemd");
2093 *ret_error_message
= "$XDG_RUNTIME_DIR is not set";
2094 return log_emergency_errno(r
, "Failed to determine $XDG_RUNTIME_DIR path: %m");
2097 (void) mkdir_p_label(p
, 0755);
2098 (void) make_inaccessible_nodes(p
, UID_INVALID
, GID_INVALID
);
2101 if (arg_timer_slack_nsec
!= NSEC_INFINITY
)
2102 if (prctl(PR_SET_TIMERSLACK
, arg_timer_slack_nsec
) < 0)
2103 log_warning_errno(errno
, "Failed to adjust timer slack, ignoring: %m");
2105 if (arg_system
&& !cap_test_all(arg_capability_bounding_set
)) {
2106 r
= capability_bounding_set_drop_usermode(arg_capability_bounding_set
);
2108 *ret_error_message
= "Failed to drop capability bounding set of usermode helpers";
2109 return log_emergency_errno(r
, "Failed to drop capability bounding set of usermode helpers: %m");
2112 r
= capability_bounding_set_drop(arg_capability_bounding_set
, true);
2114 *ret_error_message
= "Failed to drop capability bounding set";
2115 return log_emergency_errno(r
, "Failed to drop capability bounding set: %m");
2119 if (arg_system
&& arg_no_new_privs
) {
2120 if (prctl(PR_SET_NO_NEW_PRIVS
, 1, 0, 0, 0) < 0) {
2121 *ret_error_message
= "Failed to disable new privileges";
2122 return log_emergency_errno(errno
, "Failed to disable new privileges: %m");
2126 if (arg_syscall_archs
) {
2127 r
= enforce_syscall_archs(arg_syscall_archs
);
2129 *ret_error_message
= "Failed to set syscall architectures";
2135 /* Become reaper of our children */
2136 if (prctl(PR_SET_CHILD_SUBREAPER
, 1) < 0)
2137 log_warning_errno(errno
, "Failed to make us a subreaper: %m");
2139 /* Bump up RLIMIT_NOFILE for systemd itself */
2140 (void) bump_rlimit_nofile(saved_rlimit_nofile
);
2141 (void) bump_rlimit_memlock(saved_rlimit_memlock
);
2146 static int do_queue_default_job(
2148 const char **ret_error_message
) {
2150 _cleanup_(sd_bus_error_free
) sd_bus_error error
= SD_BUS_ERROR_NULL
;
2156 if (arg_default_unit
)
2157 unit
= arg_default_unit
;
2158 else if (in_initrd())
2159 unit
= SPECIAL_INITRD_TARGET
;
2161 unit
= SPECIAL_DEFAULT_TARGET
;
2163 log_debug("Activating default unit: %s", unit
);
2165 r
= manager_load_startable_unit_or_warn(m
, unit
, NULL
, &target
);
2166 if (r
< 0 && in_initrd() && !arg_default_unit
) {
2167 /* Fall back to default.target, which we used to always use by default. Only do this if no
2168 * explicit configuration was given. */
2170 log_info("Falling back to " SPECIAL_DEFAULT_TARGET
".");
2172 r
= manager_load_startable_unit_or_warn(m
, SPECIAL_DEFAULT_TARGET
, NULL
, &target
);
2175 log_info("Falling back to " SPECIAL_RESCUE_TARGET
".");
2177 r
= manager_load_startable_unit_or_warn(m
, SPECIAL_RESCUE_TARGET
, NULL
, &target
);
2179 *ret_error_message
= r
== -ERFKILL
? SPECIAL_RESCUE_TARGET
" masked"
2180 : "Failed to load " SPECIAL_RESCUE_TARGET
;
2185 assert(target
->load_state
== UNIT_LOADED
);
2187 r
= manager_add_job(m
, JOB_START
, target
, JOB_ISOLATE
, NULL
, &error
, &job
);
2189 log_debug_errno(r
, "Default target could not be isolated, starting instead: %s", bus_error_message(&error
, r
));
2191 sd_bus_error_free(&error
);
2193 r
= manager_add_job(m
, JOB_START
, target
, JOB_REPLACE
, NULL
, &error
, &job
);
2195 *ret_error_message
= "Failed to start default target";
2196 return log_emergency_errno(r
, "Failed to start default target: %s", bus_error_message(&error
, r
));
2200 *ret_error_message
= "Failed to isolate default target";
2201 return log_emergency_errno(r
, "Failed to isolate default target: %s", bus_error_message(&error
, r
));
2203 log_info("Queued %s job for default target %s.",
2204 job_type_to_string(job
->type
),
2205 unit_status_string(job
->unit
));
2207 m
->default_unit_job_id
= job
->id
;
2212 static void save_rlimits(struct rlimit
*saved_rlimit_nofile
,
2213 struct rlimit
*saved_rlimit_memlock
) {
2215 assert(saved_rlimit_nofile
);
2216 assert(saved_rlimit_memlock
);
2218 if (getrlimit(RLIMIT_NOFILE
, saved_rlimit_nofile
) < 0)
2219 log_warning_errno(errno
, "Reading RLIMIT_NOFILE failed, ignoring: %m");
2221 if (getrlimit(RLIMIT_MEMLOCK
, saved_rlimit_memlock
) < 0)
2222 log_warning_errno(errno
, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
2225 static void fallback_rlimit_nofile(const struct rlimit
*saved_rlimit_nofile
) {
2228 if (arg_default_rlimit
[RLIMIT_NOFILE
])
2231 /* Make sure forked processes get limits based on the original kernel setting */
2233 rl
= newdup(struct rlimit
, saved_rlimit_nofile
, 1);
2239 /* Bump the hard limit for system services to a substantially higher value. The default
2240 * hard limit current kernels set is pretty low (4K), mostly for historical
2241 * reasons. According to kernel developers, the fd handling in recent kernels has been
2242 * optimized substantially enough, so that we can bump the limit now, without paying too
2243 * high a price in memory or performance. Note however that we only bump the hard limit,
2244 * not the soft limit. That's because select() works the way it works, and chokes on fds
2245 * >= 1024. If we'd bump the soft limit globally, it might accidentally happen to
2246 * unexpecting programs that they get fds higher than what they can process using
2247 * select(). By only bumping the hard limit but leaving the low limit as it is we avoid
2248 * this pitfall: programs that are written by folks aware of the select() problem in mind
2249 * (and thus use poll()/epoll instead of select(), the way everybody should) can
2250 * explicitly opt into high fds by bumping their soft limit beyond 1024, to the hard limit
2255 /* Get the underlying absolute limit the kernel enforces */
2256 nr
= read_nr_open();
2258 rl
->rlim_max
= MIN((rlim_t
) nr
, MAX(rl
->rlim_max
, (rlim_t
) HIGH_RLIMIT_NOFILE
));
2261 /* If for some reason we were invoked with a soft limit above 1024 (which should never
2262 * happen!, but who knows what we get passed in from pam_limit when invoked as --user
2263 * instance), then lower what we pass on to not confuse our children */
2264 rl
->rlim_cur
= MIN(rl
->rlim_cur
, (rlim_t
) FD_SETSIZE
);
2266 arg_default_rlimit
[RLIMIT_NOFILE
] = rl
;
2269 static void fallback_rlimit_memlock(const struct rlimit
*saved_rlimit_memlock
) {
2272 /* Pass the original value down to invoked processes */
2274 if (arg_default_rlimit
[RLIMIT_MEMLOCK
])
2277 rl
= newdup(struct rlimit
, saved_rlimit_memlock
, 1);
2283 arg_default_rlimit
[RLIMIT_MEMLOCK
] = rl
;
2286 static void reset_arguments(void) {
2287 /* Frees/resets arg_* variables, with a few exceptions commented below. */
2289 arg_default_unit
= mfree(arg_default_unit
);
2291 /* arg_system — ignore */
2293 arg_dump_core
= true;
2294 arg_crash_chvt
= -1;
2295 arg_crash_shell
= false;
2296 arg_crash_reboot
= false;
2297 arg_confirm_spawn
= mfree(arg_confirm_spawn
);
2298 arg_show_status
= _SHOW_STATUS_INVALID
;
2299 arg_status_unit_format
= STATUS_UNIT_FORMAT_DEFAULT
;
2300 arg_switched_root
= false;
2301 arg_pager_flags
= 0;
2302 arg_service_watchdogs
= true;
2303 arg_default_std_output
= EXEC_OUTPUT_JOURNAL
;
2304 arg_default_std_error
= EXEC_OUTPUT_INHERIT
;
2305 arg_default_restart_usec
= DEFAULT_RESTART_USEC
;
2306 arg_default_timeout_start_usec
= DEFAULT_TIMEOUT_USEC
;
2307 arg_default_timeout_stop_usec
= DEFAULT_TIMEOUT_USEC
;
2308 arg_default_timeout_abort_usec
= DEFAULT_TIMEOUT_USEC
;
2309 arg_default_timeout_abort_set
= false;
2310 arg_default_start_limit_interval
= DEFAULT_START_LIMIT_INTERVAL
;
2311 arg_default_start_limit_burst
= DEFAULT_START_LIMIT_BURST
;
2312 arg_runtime_watchdog
= 0;
2313 arg_reboot_watchdog
= 10 * USEC_PER_MINUTE
;
2314 arg_kexec_watchdog
= 0;
2315 arg_early_core_pattern
= NULL
;
2316 arg_watchdog_device
= NULL
;
2318 arg_default_environment
= strv_free(arg_default_environment
);
2319 rlimit_free_all(arg_default_rlimit
);
2321 arg_capability_bounding_set
= CAP_ALL
;
2322 arg_no_new_privs
= false;
2323 arg_timer_slack_nsec
= NSEC_INFINITY
;
2324 arg_default_timer_accuracy_usec
= 1 * USEC_PER_MINUTE
;
2326 arg_syscall_archs
= set_free(arg_syscall_archs
);
2328 /* arg_serialization — ignore */
2330 arg_default_cpu_accounting
= -1;
2331 arg_default_io_accounting
= false;
2332 arg_default_ip_accounting
= false;
2333 arg_default_blockio_accounting
= false;
2334 arg_default_memory_accounting
= MEMORY_ACCOUNTING_DEFAULT
;
2335 arg_default_tasks_accounting
= true;
2336 arg_default_tasks_max
= DEFAULT_TASKS_MAX
;
2337 arg_machine_id
= (sd_id128_t
) {};
2338 arg_cad_burst_action
= EMERGENCY_ACTION_REBOOT_FORCE
;
2339 arg_default_oom_policy
= OOM_STOP
;
2341 cpu_set_reset(&arg_cpu_affinity
);
2342 numa_policy_reset(&arg_numa_policy
);
2344 arg_random_seed
= mfree(arg_random_seed
);
2345 arg_random_seed_size
= 0;
2349 static int parse_configuration(const struct rlimit
*saved_rlimit_nofile
,
2350 const struct rlimit
*saved_rlimit_memlock
) {
2353 assert(saved_rlimit_nofile
);
2354 assert(saved_rlimit_memlock
);
2356 /* Assign configuration defaults */
2359 r
= parse_config_file();
2361 log_warning_errno(r
, "Failed to parse config file, ignoring: %m");
2364 r
= proc_cmdline_parse(parse_proc_cmdline_item
, NULL
, 0);
2366 log_warning_errno(r
, "Failed to parse kernel command line, ignoring: %m");
2369 /* Initialize some default rlimits for services if they haven't been configured */
2370 fallback_rlimit_nofile(saved_rlimit_nofile
);
2371 fallback_rlimit_memlock(saved_rlimit_memlock
);
2373 /* Note that this also parses bits from the kernel command line, including "debug". */
2374 log_parse_environment();
2376 /* Initialize the show status setting if it hasn't been set explicitly yet */
2377 if (arg_show_status
== _SHOW_STATUS_INVALID
)
2378 arg_show_status
= SHOW_STATUS_YES
;
2383 static int safety_checks(void) {
2385 if (getpid_cached() == 1 &&
2386 arg_action
!= ACTION_RUN
)
2387 return log_error_errno(SYNTHETIC_ERRNO(EPERM
),
2388 "Unsupported execution mode while PID 1.");
2390 if (getpid_cached() == 1 &&
2392 return log_error_errno(SYNTHETIC_ERRNO(EPERM
),
2393 "Can't run --user mode as PID 1.");
2395 if (arg_action
== ACTION_RUN
&&
2397 getpid_cached() != 1)
2398 return log_error_errno(SYNTHETIC_ERRNO(EPERM
),
2399 "Can't run system mode unless PID 1.");
2401 if (arg_action
== ACTION_TEST
&&
2403 return log_error_errno(SYNTHETIC_ERRNO(EPERM
),
2404 "Don't run test mode as root.");
2407 arg_action
== ACTION_RUN
&&
2409 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP
),
2410 "Trying to run as user instance, but the system has not been booted with systemd.");
2413 arg_action
== ACTION_RUN
&&
2414 !getenv("XDG_RUNTIME_DIR"))
2415 return log_error_errno(SYNTHETIC_ERRNO(EUNATCH
),
2416 "Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
2419 arg_action
== ACTION_RUN
&&
2420 running_in_chroot() > 0)
2421 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP
),
2422 "Cannot be run in a chroot() environment.");
2427 static int initialize_security(
2428 bool *loaded_policy
,
2429 dual_timestamp
*security_start_timestamp
,
2430 dual_timestamp
*security_finish_timestamp
,
2431 const char **ret_error_message
) {
2435 assert(loaded_policy
);
2436 assert(security_start_timestamp
);
2437 assert(security_finish_timestamp
);
2438 assert(ret_error_message
);
2440 dual_timestamp_get(security_start_timestamp
);
2442 r
= mac_selinux_setup(loaded_policy
);
2444 *ret_error_message
= "Failed to load SELinux policy";
2448 r
= mac_smack_setup(loaded_policy
);
2450 *ret_error_message
= "Failed to load SMACK policy";
2454 r
= mac_apparmor_setup();
2456 *ret_error_message
= "Failed to load AppArmor policy";
2462 *ret_error_message
= "Failed to load IMA policy";
2466 dual_timestamp_get(security_finish_timestamp
);
2470 static void test_summary(Manager
*m
) {
2473 printf("-> By units:\n");
2474 manager_dump_units(m
, stdout
, "\t");
2476 printf("-> By jobs:\n");
2477 manager_dump_jobs(m
, stdout
, "\t");
2480 static int collect_fds(FDSet
**ret_fds
, const char **ret_error_message
) {
2484 assert(ret_error_message
);
2486 r
= fdset_new_fill(ret_fds
);
2488 *ret_error_message
= "Failed to allocate fd set";
2489 return log_emergency_errno(r
, "Failed to allocate fd set: %m");
2492 fdset_cloexec(*ret_fds
, true);
2494 if (arg_serialization
)
2495 assert_se(fdset_remove(*ret_fds
, fileno(arg_serialization
)) >= 0);
2500 static void setup_console_terminal(bool skip_setup
) {
2505 /* Become a session leader if we aren't one yet. */
2508 /* If we are init, we connect stdin/stdout/stderr to /dev/null and make sure we don't have a controlling
2510 (void) release_terminal();
2512 /* Reset the console, but only if this is really init and we are freshly booted */
2513 if (getpid_cached() == 1 && !skip_setup
)
2514 (void) console_setup();
2517 static bool early_skip_setup_check(int argc
, char *argv
[]) {
2518 bool found_deserialize
= false;
2521 /* Determine if this is a reexecution or normal bootup. We do the full command line parsing much later, so
2522 * let's just have a quick peek here. Note that if we have switched root, do all the special setup things
2523 * anyway, even if in that case we also do deserialization. */
2525 for (i
= 1; i
< argc
; i
++) {
2526 if (streq(argv
[i
], "--switched-root"))
2527 return false; /* If we switched root, don't skip the setup. */
2528 else if (streq(argv
[i
], "--deserialize"))
2529 found_deserialize
= true;
2532 return found_deserialize
; /* When we are deserializing, then we are reexecuting, hence avoid the extensive setup */
2535 static int save_env(void) {
2538 l
= strv_copy(environ
);
2542 strv_free_and_replace(saved_env
, l
);
2546 int main(int argc
, char *argv
[]) {
2548 dual_timestamp initrd_timestamp
= DUAL_TIMESTAMP_NULL
, userspace_timestamp
= DUAL_TIMESTAMP_NULL
, kernel_timestamp
= DUAL_TIMESTAMP_NULL
,
2549 security_start_timestamp
= DUAL_TIMESTAMP_NULL
, security_finish_timestamp
= DUAL_TIMESTAMP_NULL
;
2550 struct rlimit saved_rlimit_nofile
= RLIMIT_MAKE_CONST(0),
2551 saved_rlimit_memlock
= RLIMIT_MAKE_CONST(RLIM_INFINITY
); /* The original rlimits we passed
2552 * in. Note we use different values
2553 * for the two that indicate whether
2554 * these fields are initialized! */
2555 bool skip_setup
, loaded_policy
= false, queue_default_job
= false, first_boot
= false, reexecute
= false;
2556 char *switch_root_dir
= NULL
, *switch_root_init
= NULL
;
2557 usec_t before_startup
, after_startup
;
2558 static char systemd
[] = "systemd";
2559 char timespan
[FORMAT_TIMESPAN_MAX
];
2560 const char *shutdown_verb
= NULL
, *error_message
= NULL
;
2561 int r
, retval
= EXIT_FAILURE
;
2565 /* SysV compatibility: redirect init → telinit */
2566 redirect_telinit(argc
, argv
);
2568 /* Take timestamps early on */
2569 dual_timestamp_from_monotonic(&kernel_timestamp
, 0);
2570 dual_timestamp_get(&userspace_timestamp
);
2572 /* Figure out whether we need to do initialize the system, or if we already did that because we are
2574 skip_setup
= early_skip_setup_check(argc
, argv
);
2576 /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent reexecution we
2577 * are then called 'systemd'. That is confusing, hence let's call us systemd right-away. */
2578 program_invocation_short_name
= systemd
;
2579 (void) prctl(PR_SET_NAME
, systemd
);
2581 /* Save the original command line */
2582 save_argc_argv(argc
, argv
);
2584 /* Save the original environment as we might need to restore it if we're requested to execute another
2585 * system manager later. */
2588 error_message
= "Failed to copy environment block";
2592 /* Make sure that if the user says "syslog" we actually log to the journal. */
2593 log_set_upgrade_syslog_to_journal(true);
2595 if (getpid_cached() == 1) {
2596 /* When we run as PID 1 force system mode */
2599 /* Disable the umask logic */
2602 /* Make sure that at least initially we do not ever log to journald/syslogd, because it might not be
2603 * activated yet (even though the log socket for it exists). */
2604 log_set_prohibit_ipc(true);
2606 /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This is
2607 * important so that we never end up logging to any foreign stderr, for example if we have to log in a
2608 * child process right before execve()'ing the actual binary, at a point in time where socket
2609 * activation stderr/stdout area already set up. */
2610 log_set_always_reopen_console(true);
2612 if (detect_container() <= 0) {
2614 /* Running outside of a container as PID 1 */
2615 log_set_target(LOG_TARGET_KMSG
);
2619 initrd_timestamp
= userspace_timestamp
;
2622 r
= mount_setup_early();
2624 error_message
= "Failed to mount early API filesystems";
2628 /* Let's open the log backend a second time, in case the first time didn't
2629 * work. Quite possibly we have mounted /dev just now, so /dev/kmsg became
2630 * available, and it previously wasn't. */
2633 disable_printk_ratelimit();
2635 r
= initialize_security(
2637 &security_start_timestamp
,
2638 &security_finish_timestamp
,
2644 if (mac_selinux_init() < 0) {
2645 error_message
= "Failed to initialize SELinux support";
2652 /* Set the default for later on, but don't actually open the logs like this for now. Note that
2653 * if we are transitioning from the initrd there might still be journal fd open, and we
2654 * shouldn't attempt opening that before we parsed /proc/cmdline which might redirect output
2656 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG
);
2659 /* Running inside a container, as PID 1 */
2660 log_set_target(LOG_TARGET_CONSOLE
);
2663 /* For later on, see above... */
2664 log_set_target(LOG_TARGET_JOURNAL
);
2666 /* clear the kernel timestamp, because we are in a container */
2667 kernel_timestamp
= DUAL_TIMESTAMP_NULL
;
2670 initialize_coredump(skip_setup
);
2672 r
= fixup_environment();
2674 log_emergency_errno(r
, "Failed to fix up PID 1 environment: %m");
2675 error_message
= "Failed to fix up PID1 environment";
2679 /* Try to figure out if we can use colors with the console. No need to do that for user instances since
2680 * they never log into the console. */
2681 log_show_color(colors_enabled());
2683 r
= make_null_stdio();
2685 log_warning_errno(r
, "Failed to redirect standard streams to /dev/null, ignoring: %m");
2687 /* Load the kernel modules early. */
2691 /* Mount /proc, /sys and friends, so that /proc/cmdline and /proc/$PID/fd is available. */
2692 r
= mount_setup(loaded_policy
, skip_setup
);
2694 error_message
= "Failed to mount API filesystems";
2698 /* The efivarfs is now mounted, let's read the random seed off it */
2699 (void) efi_take_random_seed();
2701 /* Cache command-line options passed from EFI variables */
2703 (void) cache_efi_options_variable();
2705 /* Running as user instance */
2707 log_set_target(LOG_TARGET_AUTO
);
2710 /* clear the kernel timestamp, because we are not PID 1 */
2711 kernel_timestamp
= DUAL_TIMESTAMP_NULL
;
2713 if (mac_selinux_init() < 0) {
2714 error_message
= "Failed to initialize SELinux support";
2719 /* Save the original RLIMIT_NOFILE/RLIMIT_MEMLOCK so that we can reset it later when
2720 * transitioning from the initrd to the main systemd or suchlike. */
2721 save_rlimits(&saved_rlimit_nofile
, &saved_rlimit_memlock
);
2723 /* Reset all signal handlers. */
2724 (void) reset_all_signal_handlers();
2725 (void) ignore_signals(SIGNALS_IGNORE
, -1);
2727 (void) parse_configuration(&saved_rlimit_nofile
, &saved_rlimit_memlock
);
2729 r
= parse_argv(argc
, argv
);
2731 error_message
= "Failed to parse commandline arguments";
2735 r
= safety_checks();
2739 if (IN_SET(arg_action
, ACTION_TEST
, ACTION_HELP
, ACTION_DUMP_CONFIGURATION_ITEMS
, ACTION_DUMP_BUS_PROPERTIES
, ACTION_BUS_INTROSPECT
))
2740 (void) pager_open(arg_pager_flags
);
2742 if (arg_action
!= ACTION_RUN
)
2745 if (arg_action
== ACTION_HELP
) {
2746 retval
= help() < 0 ? EXIT_FAILURE
: EXIT_SUCCESS
;
2748 } else if (arg_action
== ACTION_VERSION
) {
2751 } else if (arg_action
== ACTION_DUMP_CONFIGURATION_ITEMS
) {
2752 unit_dump_config_items(stdout
);
2753 retval
= EXIT_SUCCESS
;
2755 } else if (arg_action
== ACTION_DUMP_BUS_PROPERTIES
) {
2756 dump_bus_properties(stdout
);
2757 retval
= EXIT_SUCCESS
;
2759 } else if (arg_action
== ACTION_BUS_INTROSPECT
) {
2760 r
= bus_manager_introspect_implementations(stdout
, arg_bus_introspect
);
2761 retval
= r
>= 0 ? EXIT_SUCCESS
: EXIT_FAILURE
;
2765 assert_se(IN_SET(arg_action
, ACTION_RUN
, ACTION_TEST
));
2767 /* Move out of the way, so that we won't block unmounts */
2768 assert_se(chdir("/") == 0);
2770 if (arg_action
== ACTION_RUN
) {
2772 /* Apply the systemd.clock_usec= kernel command line switch */
2773 apply_clock_update();
2775 /* Apply random seed from kernel command line */
2776 cmdline_take_random_seed();
2779 /* A core pattern might have been specified via the cmdline. */
2780 initialize_core_pattern(skip_setup
);
2782 /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
2785 /* Remember open file descriptors for later deserialization */
2786 r
= collect_fds(&fds
, &error_message
);
2790 /* Give up any control of the console, but make sure its initialized. */
2791 setup_console_terminal(skip_setup
);
2793 /* Open the logging devices, if possible and necessary */
2797 log_execution_mode(&first_boot
);
2799 r
= initialize_runtime(skip_setup
,
2801 &saved_rlimit_nofile
,
2802 &saved_rlimit_memlock
,
2807 r
= manager_new(arg_system
? UNIT_FILE_SYSTEM
: UNIT_FILE_USER
,
2808 arg_action
== ACTION_TEST
? MANAGER_TEST_FULL
: 0,
2811 log_emergency_errno(r
, "Failed to allocate manager object: %m");
2812 error_message
= "Failed to allocate manager object";
2816 m
->timestamps
[MANAGER_TIMESTAMP_KERNEL
] = kernel_timestamp
;
2817 m
->timestamps
[MANAGER_TIMESTAMP_INITRD
] = initrd_timestamp
;
2818 m
->timestamps
[MANAGER_TIMESTAMP_USERSPACE
] = userspace_timestamp
;
2819 m
->timestamps
[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_START
)] = security_start_timestamp
;
2820 m
->timestamps
[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_FINISH
)] = security_finish_timestamp
;
2822 set_manager_defaults(m
);
2823 set_manager_settings(m
);
2824 manager_set_first_boot(m
, first_boot
);
2826 /* Remember whether we should queue the default job */
2827 queue_default_job
= !arg_serialization
|| arg_switched_root
;
2829 before_startup
= now(CLOCK_MONOTONIC
);
2831 r
= manager_startup(m
, arg_serialization
, fds
);
2833 error_message
= "Failed to start up manager";
2837 /* This will close all file descriptors that were opened, but not claimed by any unit. */
2838 fds
= fdset_free(fds
);
2839 arg_serialization
= safe_fclose(arg_serialization
);
2841 if (queue_default_job
) {
2842 r
= do_queue_default_job(m
, &error_message
);
2847 after_startup
= now(CLOCK_MONOTONIC
);
2849 log_full(arg_action
== ACTION_TEST
? LOG_INFO
: LOG_DEBUG
,
2850 "Loaded units and determined initial transaction in %s.",
2851 format_timespan(timespan
, sizeof(timespan
), after_startup
- before_startup
, 100 * USEC_PER_MSEC
));
2853 if (arg_action
== ACTION_TEST
) {
2855 retval
= EXIT_SUCCESS
;
2859 (void) invoke_main_loop(m
,
2860 &saved_rlimit_nofile
,
2861 &saved_rlimit_memlock
,
2874 arg_reboot_watchdog
= manager_get_watchdog(m
, WATCHDOG_REBOOT
);
2875 arg_kexec_watchdog
= manager_get_watchdog(m
, WATCHDOG_KEXEC
);
2876 m
= manager_free(m
);
2879 mac_selinux_finish();
2882 do_reexecute(argc
, argv
,
2883 &saved_rlimit_nofile
,
2884 &saved_rlimit_memlock
,
2888 &error_message
); /* This only returns if reexecution failed */
2890 arg_serialization
= safe_fclose(arg_serialization
);
2891 fds
= fdset_free(fds
);
2893 saved_env
= strv_free(saved_env
);
2895 #if HAVE_VALGRIND_VALGRIND_H
2896 /* If we are PID 1 and running under valgrind, then let's exit
2897 * here explicitly. valgrind will only generate nice output on
2898 * exit(), not on exec(), hence let's do the former not the
2900 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND
) {
2901 /* Cleanup watchdog_device strings for valgrind. We need them
2902 * in become_shutdown() so normally we cannot free them yet. */
2903 watchdog_free_device();
2904 arg_watchdog_device
= mfree(arg_watchdog_device
);
2910 #if HAS_FEATURE_ADDRESS_SANITIZER
2911 __lsan_do_leak_check();
2914 if (shutdown_verb
) {
2915 r
= become_shutdown(shutdown_verb
, retval
);
2916 log_error_errno(r
, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
2917 error_message
= "Failed to execute shutdown binary";
2920 watchdog_free_device();
2921 arg_watchdog_device
= mfree(arg_watchdog_device
);
2923 if (getpid_cached() == 1) {
2925 manager_status_printf(NULL
, STATUS_TYPE_EMERGENCY
,
2926 ANSI_HIGHLIGHT_RED
"!!!!!!" ANSI_NORMAL
,
2927 "%s.", error_message
);
2928 freeze_or_exit_or_reboot();