]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/main.c
git-contrib: copypaste-friendly output
[thirdparty/systemd.git] / src / core / main.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
a7334b09 2
60918275 3#include <errno.h>
3dfc9763 4#include <fcntl.h>
f170852a 5#include <getopt.h>
664f88a7 6#include <sys/mount.h>
3dfc9763 7#include <sys/prctl.h>
b9e74c39 8#include <sys/reboot.h>
3dfc9763 9#include <unistd.h>
349cc4a5 10#if HAVE_SECCOMP
b64a3d86
LP
11#include <seccomp.h>
12#endif
349cc4a5 13#if HAVE_VALGRIND_VALGRIND_H
3dfc9763
LP
14#include <valgrind/valgrind.h>
15#endif
54b434b1 16
718db961 17#include "sd-bus.h"
cf0fbc49 18#include "sd-daemon.h"
b2e7486c 19#include "sd-messages.h"
3dfc9763 20
b5efdb8a 21#include "alloc-util.h"
2ffadd3c 22#include "apparmor-setup.h"
d9d93745 23#include "architecture.h"
3dfc9763
LP
24#include "build.h"
25#include "bus-error.h"
26#include "bus-util.h"
430f0182 27#include "capability-util.h"
a88c5b8a 28#include "cgroup-util.h"
24efb112 29#include "clock-util.h"
3dfc9763 30#include "conf-parser.h"
618234a5 31#include "cpu-set-util.h"
3dfc9763 32#include "dbus-manager.h"
c18ecf03 33#include "dbus.h"
3dfc9763 34#include "def.h"
32429805 35#include "dev-setup.h"
c18ecf03 36#include "efi-random.h"
209b2592 37#include "efivars.h"
eee8b7ab 38#include "emergency-action.h"
3dfc9763 39#include "env-util.h"
57b7a260 40#include "exit-status.h"
3ffd4af2 41#include "fd-util.h"
3dfc9763 42#include "fdset.h"
718db961 43#include "fileio.h"
f97b34a6 44#include "format-util.h"
f4f15635 45#include "fs-util.h"
d247f232 46#include "hexdecoct.h"
3dfc9763
LP
47#include "hostname-setup.h"
48#include "ima-setup.h"
49#include "killall.h"
50#include "kmod-setup.h"
eefc66aa 51#include "limits-util.h"
d7b8eec7 52#include "load-fragment.h"
3dfc9763 53#include "log.h"
b6e66135 54#include "loopback-setup.h"
b6e66135 55#include "machine-id-setup.h"
3dfc9763 56#include "manager.h"
2a341bb9 57#include "manager-dump.h"
32429805 58#include "mkdir.h"
3dfc9763 59#include "mount-setup.h"
d58ad743 60#include "os-util.h"
3dfc9763 61#include "pager.h"
614b022c 62#include "parse-argument.h"
6bedfcbb 63#include "parse-util.h"
7d5ceb64 64#include "path-util.h"
294bf0c3 65#include "pretty-print.h"
4e731273 66#include "proc-cmdline.h"
3dfc9763 67#include "process-util.h"
d247f232 68#include "random-util.h"
8869a0b4 69#include "raw-clone.h"
78f22b97 70#include "rlimit-util.h"
349cc4a5 71#if HAVE_SECCOMP
83f12b27
FS
72#include "seccomp-util.h"
73#endif
b6e66135 74#include "selinux-setup.h"
3dfc9763
LP
75#include "selinux-util.h"
76#include "signal-util.h"
ffbd2c4d 77#include "smack-setup.h"
3dfc9763 78#include "special.h"
8fcde012 79#include "stat-util.h"
15a5e950 80#include "stdio-util.h"
3dfc9763
LP
81#include "strv.h"
82#include "switch-root.h"
a8b627aa 83#include "sysctl-util.h"
3dfc9763 84#include "terminal-util.h"
8612da97 85#include "umask-util.h"
b1d4f8e1 86#include "user-util.h"
9ce17593 87#include "util.h"
3dfc9763
LP
88#include "virt.h"
89#include "watchdog.h"
b6e66135 90
7e11a95e
EV
91#if HAS_FEATURE_ADDRESS_SANITIZER
92#include <sanitizer/lsan_interface.h>
93#endif
94
3a0f06c4
ZJS
95#define DEFAULT_TASKS_MAX ((TasksMax) { 15U, 100U }) /* 15% */
96
f170852a
LP
97static enum {
98 ACTION_RUN,
e965d56d 99 ACTION_HELP,
9ba0bc4e 100 ACTION_VERSION,
e537352b 101 ACTION_TEST,
bbc1acab
YW
102 ACTION_DUMP_CONFIGURATION_ITEMS,
103 ACTION_DUMP_BUS_PROPERTIES,
5c08257b 104 ACTION_BUS_INTROSPECT,
fa0f4d8a 105} arg_action = ACTION_RUN;
fb39af4c 106
5c08257b
ZJS
107static const char *arg_bus_introspect = NULL;
108
45250e66
LP
109/* Those variables are initialized to 0 automatically, so we avoid uninitialized memory access. Real
110 * defaults are assigned in reset_arguments() below. */
fb39af4c
ZJS
111static char *arg_default_unit;
112static bool arg_system;
113static bool arg_dump_core;
114static int arg_crash_chvt;
115static bool arg_crash_shell;
116static bool arg_crash_reboot;
117static char *arg_confirm_spawn;
118static ShowStatus arg_show_status;
36cf4507 119static StatusUnitFormat arg_status_unit_format;
fb39af4c
ZJS
120static bool arg_switched_root;
121static PagerFlags arg_pager_flags;
122static bool arg_service_watchdogs;
123static ExecOutput arg_default_std_output;
124static ExecOutput arg_default_std_error;
125static usec_t arg_default_restart_usec;
126static usec_t arg_default_timeout_start_usec;
127static usec_t arg_default_timeout_stop_usec;
128static usec_t arg_default_timeout_abort_usec;
129static bool arg_default_timeout_abort_set;
130static usec_t arg_default_start_limit_interval;
131static unsigned arg_default_start_limit_burst;
132static usec_t arg_runtime_watchdog;
65224c1d 133static usec_t arg_reboot_watchdog;
acafd7d8 134static usec_t arg_kexec_watchdog;
fb39af4c
ZJS
135static char *arg_early_core_pattern;
136static char *arg_watchdog_device;
137static char **arg_default_environment;
d55ed7de 138static char **arg_manager_environment;
fb39af4c
ZJS
139static struct rlimit *arg_default_rlimit[_RLIMIT_MAX];
140static uint64_t arg_capability_bounding_set;
141static bool arg_no_new_privs;
142static nsec_t arg_timer_slack_nsec;
143static usec_t arg_default_timer_accuracy_usec;
144static Set* arg_syscall_archs;
145static FILE* arg_serialization;
146static int arg_default_cpu_accounting;
147static bool arg_default_io_accounting;
148static bool arg_default_ip_accounting;
149static bool arg_default_blockio_accounting;
150static bool arg_default_memory_accounting;
151static bool arg_default_tasks_accounting;
3a0f06c4 152static TasksMax arg_default_tasks_max;
fb39af4c
ZJS
153static sd_id128_t arg_machine_id;
154static EmergencyAction arg_cad_burst_action;
155static OOMPolicy arg_default_oom_policy;
156static CPUSet arg_cpu_affinity;
b070c7c0 157static NUMAPolicy arg_numa_policy;
3753325b 158static usec_t arg_clock_usec;
d247f232
LP
159static void *arg_random_seed;
160static size_t arg_random_seed_size;
61fbbac1 161
0e06a031
LP
162/* A copy of the original environment block */
163static char **saved_env = NULL;
164
a9fd4cd1
FB
165static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
166 const struct rlimit *saved_rlimit_memlock);
4fc935ca 167
f70e6fb4
ZJS
168static int manager_find_user_config_paths(char ***ret_files, char ***ret_dirs) {
169 _cleanup_free_ char *base = NULL;
170 _cleanup_strv_free_ char **files = NULL, **dirs = NULL;
171 int r;
172
173 r = xdg_user_config_dir(&base, "/systemd");
174 if (r < 0)
175 return r;
176
177 r = strv_extendf(&files, "%s/user.conf", base);
178 if (r < 0)
179 return r;
180
181 r = strv_extend(&files, PKGSYSCONFDIR "/user.conf");
182 if (r < 0)
183 return r;
184
185 r = strv_consume(&dirs, TAKE_PTR(base));
186 if (r < 0)
187 return r;
188
189 r = strv_extend_strv(&dirs, CONF_PATHS_STRV("systemd"), false);
190 if (r < 0)
191 return r;
192
193 *ret_files = TAKE_PTR(files);
194 *ret_dirs = TAKE_PTR(dirs);
195 return 0;
196}
197
bb259772
LP
198_noreturn_ static void freeze_or_exit_or_reboot(void) {
199
c3b6a348
LP
200 /* If we are running in a container, let's prefer exiting, after all we can propagate an exit code to
201 * the container manager, and thus inform it that something went wrong. */
bb259772
LP
202 if (detect_container() > 0) {
203 log_emergency("Exiting PID 1...");
c3b6a348 204 _exit(EXIT_EXCEPTION);
bb259772 205 }
b9e74c39
LP
206
207 if (arg_crash_reboot) {
208 log_notice("Rebooting in 10s...");
209 (void) sleep(10);
210
211 log_notice("Rebooting now...");
212 (void) reboot(RB_AUTOBOOT);
213 log_emergency_errno(errno, "Failed to reboot: %m");
214 }
215
216 log_emergency("Freezing execution.");
217 freeze();
218}
219
848e863a 220_noreturn_ static void crash(int sig) {
7d06dad9
MS
221 struct sigaction sa;
222 pid_t pid;
97c4f35c 223
df0ff127 224 if (getpid_cached() != 1)
abb26902 225 /* Pass this on immediately, if this is not PID 1 */
92ca4cac 226 (void) raise(sig);
abb26902 227 else if (!arg_dump_core)
4104970e 228 log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig));
97c4f35c 229 else {
7d06dad9 230 sa = (struct sigaction) {
189d5bac 231 .sa_handler = nop_signal_handler,
b92bea5d
ZJS
232 .sa_flags = SA_NOCLDSTOP|SA_RESTART,
233 };
97c4f35c 234
6f5e3f35 235 /* We want to wait for the core process, hence let's enable SIGCHLD */
92ca4cac 236 (void) sigaction(SIGCHLD, &sa, NULL);
6f5e3f35 237
8869a0b4 238 pid = raw_clone(SIGCHLD);
e62d8c39 239 if (pid < 0)
56f64d95 240 log_emergency_errno(errno, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
97c4f35c 241 else if (pid == 0) {
97c4f35c 242 /* Enable default signal handler for core dump */
15a90032 243
92ca4cac
LP
244 sa = (struct sigaction) {
245 .sa_handler = SIG_DFL,
246 };
247 (void) sigaction(sig, &sa, NULL);
97c4f35c 248
15a90032
LP
249 /* Don't limit the coredump size */
250 (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY));
97c4f35c
LP
251
252 /* Just to be sure... */
e62d9b81 253 (void) chdir("/");
97c4f35c
LP
254
255 /* Raise the signal again */
ee05e779 256 pid = raw_getpid();
92ca4cac 257 (void) kill(pid, sig); /* raise() would kill the parent */
97c4f35c
LP
258
259 assert_not_reached("We shouldn't be here...");
bb85a582 260 _exit(EXIT_EXCEPTION);
4fc935ca 261 } else {
8e12a6ae
LP
262 siginfo_t status;
263 int r;
4fc935ca
LP
264
265 /* Order things nicely. */
e62d8c39
ZJS
266 r = wait_for_terminate(pid, &status);
267 if (r < 0)
da927ba9 268 log_emergency_errno(r, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig));
e1714f02
ZJS
269 else if (status.si_code != CLD_DUMPED) {
270 const char *s = status.si_code == CLD_EXITED
e04ed6db 271 ? exit_status_to_string(status.si_status, EXIT_STATUS_LIBC)
e1714f02
ZJS
272 : signal_to_string(status.si_status);
273
ee05e779
ZJS
274 log_emergency("Caught <%s>, core dump failed (child "PID_FMT", code=%s, status=%i/%s).",
275 signal_to_string(sig),
e1714f02
ZJS
276 pid,
277 sigchld_code_to_string(status.si_code),
278 status.si_status, strna(s));
279 } else
280 log_emergency("Caught <%s>, dumped core as pid "PID_FMT".",
281 signal_to_string(sig), pid);
97c4f35c
LP
282 }
283 }
284
b9e74c39 285 if (arg_crash_chvt >= 0)
92ca4cac 286 (void) chvt(arg_crash_chvt);
601f6a1e 287
7d06dad9
MS
288 sa = (struct sigaction) {
289 .sa_handler = SIG_IGN,
290 .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
291 };
292
293 /* Let the kernel reap children for us */
294 (void) sigaction(SIGCHLD, &sa, NULL);
8c43883a 295
7d06dad9 296 if (arg_crash_shell) {
b9e74c39 297 log_notice("Executing crash shell in 10s...");
92ca4cac 298 (void) sleep(10);
4fc935ca 299
8869a0b4 300 pid = raw_clone(SIGCHLD);
cd3bd60a 301 if (pid < 0)
56f64d95 302 log_emergency_errno(errno, "Failed to fork off crash shell: %m");
6f5e3f35 303 else if (pid == 0) {
b9e74c39 304 (void) setsid();
92ca4cac 305 (void) make_console_stdio();
595225af 306 (void) rlimit_nofile_safe();
92ca4cac 307 (void) execle("/bin/sh", "/bin/sh", NULL, environ);
6f5e3f35 308
ee05e779 309 log_emergency_errno(errno, "execle() failed: %m");
bb85a582 310 _exit(EXIT_EXCEPTION);
b9e74c39
LP
311 } else {
312 log_info("Spawned crash shell as PID "PID_FMT".", pid);
4cf0b03b 313 (void) wait_for_terminate(pid, NULL);
b9e74c39 314 }
4fc935ca
LP
315 }
316
bb259772 317 freeze_or_exit_or_reboot();
97c4f35c
LP
318}
319
320static void install_crash_handler(void) {
297d563d 321 static const struct sigaction sa = {
b92bea5d 322 .sa_handler = crash,
297d563d 323 .sa_flags = SA_NODEFER, /* So that we can raise the signal again from the signal handler */
b92bea5d 324 };
297d563d 325 int r;
97c4f35c 326
9c274488
LP
327 /* We ignore the return value here, since, we don't mind if we cannot set up a crash handler */
328 r = sigaction_many(&sa, SIGNALS_CRASH_HANDLER);
297d563d
LP
329 if (r < 0)
330 log_debug_errno(r, "I had trouble setting up the crash handler, ignoring: %m");
97c4f35c 331}
f170852a 332
56d96fc0
LP
333static int console_setup(void) {
334 _cleanup_close_ int tty_fd = -1;
335 int r;
80876c20 336
512947d4 337 tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
23bbb0de
MS
338 if (tty_fd < 0)
339 return log_error_errno(tty_fd, "Failed to open /dev/console: %m");
80876c20 340
56d96fc0
LP
341 /* We don't want to force text mode. plymouth may be showing
342 * pictures already from initrd. */
512947d4 343 r = reset_terminal_fd(tty_fd, false);
23bbb0de
MS
344 if (r < 0)
345 return log_error_errno(r, "Failed to reset /dev/console: %m");
843d2643 346
56d96fc0 347 return 0;
80876c20
LP
348}
349
ee48dbd5 350static int set_machine_id(const char *m) {
e042eab7 351 sd_id128_t t;
8b26cdbd 352 assert(m);
ee48dbd5 353
e042eab7 354 if (sd_id128_from_string(m, &t) < 0)
ee48dbd5
NC
355 return -EINVAL;
356
e042eab7 357 if (sd_id128_is_null(t))
ee48dbd5
NC
358 return -EINVAL;
359
e042eab7 360 arg_machine_id = t;
ee48dbd5
NC
361 return 0;
362}
363
96287a49 364static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
059cb385 365 int r;
f170852a 366
059cb385 367 assert(key);
5192bd19 368
1d84ad94 369 if (STR_IN_SET(key, "systemd.unit", "rd.systemd.unit")) {
bf4df7c3 370
1d84ad94
LP
371 if (proc_cmdline_value_missing(key, value))
372 return 0;
bf4df7c3 373
1d84ad94
LP
374 if (!unit_name_is_valid(value, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
375 log_warning("Unit name specified on %s= is not valid, ignoring: %s", key, value);
cd57038a
ZJS
376 else if (in_initrd() == !!startswith(key, "rd."))
377 return free_and_strdup_warn(&arg_default_unit, value);
f170852a 378
1d84ad94 379 } else if (proc_cmdline_key_streq(key, "systemd.dump_core")) {
4fc935ca 380
1d84ad94 381 r = value ? parse_boolean(value) : true;
fb472900 382 if (r < 0)
5e1ee764 383 log_warning_errno(r, "Failed to parse dump core switch %s, ignoring: %m", value);
4fc935ca 384 else
fa0f4d8a 385 arg_dump_core = r;
4fc935ca 386
c6885f5f
FB
387 } else if (proc_cmdline_key_streq(key, "systemd.early_core_pattern")) {
388
389 if (proc_cmdline_value_missing(key, value))
390 return 0;
391
392 if (path_is_absolute(value))
614b022c 393 (void) parse_path_argument(value, false, &arg_early_core_pattern);
c6885f5f
FB
394 else
395 log_warning("Specified core pattern '%s' is not an absolute path, ignoring.", value);
396
1d84ad94 397 } else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) {
b9e74c39 398
1d84ad94
LP
399 if (!value)
400 arg_crash_chvt = 0; /* turn on */
5e1ee764 401 else {
a07a7324 402 r = parse_crash_chvt(value, &arg_crash_chvt);
5e1ee764
YW
403 if (r < 0)
404 log_warning_errno(r, "Failed to parse crash chvt switch %s, ignoring: %m", value);
405 }
b9e74c39 406
1d84ad94 407 } else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) {
4fc935ca 408
1d84ad94 409 r = value ? parse_boolean(value) : true;
fb472900 410 if (r < 0)
5e1ee764 411 log_warning_errno(r, "Failed to parse crash shell switch %s, ignoring: %m", value);
4fc935ca 412 else
fa0f4d8a 413 arg_crash_shell = r;
5e7ee61c 414
1d84ad94 415 } else if (proc_cmdline_key_streq(key, "systemd.crash_reboot")) {
5e7ee61c 416
1d84ad94 417 r = value ? parse_boolean(value) : true;
b9e74c39 418 if (r < 0)
5e1ee764 419 log_warning_errno(r, "Failed to parse crash reboot switch %s, ignoring: %m", value);
5e7ee61c 420 else
b9e74c39 421 arg_crash_reboot = r;
5e7ee61c 422
1d84ad94
LP
423 } else if (proc_cmdline_key_streq(key, "systemd.confirm_spawn")) {
424 char *s;
7d5ceb64 425
1d84ad94 426 r = parse_confirm_spawn(value, &s);
059cb385 427 if (r < 0)
5e1ee764
YW
428 log_warning_errno(r, "Failed to parse confirm_spawn switch %s, ignoring: %m", value);
429 else
430 free_and_replace(arg_confirm_spawn, s);
601f6a1e 431
2a12e32e
JK
432 } else if (proc_cmdline_key_streq(key, "systemd.service_watchdogs")) {
433
434 r = value ? parse_boolean(value) : true;
435 if (r < 0)
5e1ee764 436 log_warning_errno(r, "Failed to parse service watchdog switch %s, ignoring: %m", value);
2a12e32e
JK
437 else
438 arg_service_watchdogs = r;
439
1d84ad94 440 } else if (proc_cmdline_key_streq(key, "systemd.show_status")) {
9e58ff9c 441
1d84ad94
LP
442 if (value) {
443 r = parse_show_status(value, &arg_show_status);
444 if (r < 0)
5e1ee764 445 log_warning_errno(r, "Failed to parse show status switch %s, ignoring: %m", value);
1d84ad94
LP
446 } else
447 arg_show_status = SHOW_STATUS_YES;
059cb385 448
36cf4507
ZJS
449 } else if (proc_cmdline_key_streq(key, "systemd.status_unit_format")) {
450
451 if (proc_cmdline_value_missing(key, value))
452 return 0;
453
454 r = status_unit_format_from_string(value);
455 if (r < 0)
456 log_warning_errno(r, "Failed to parse %s=%s, ignoring: %m", key, value);
457 else
458 arg_status_unit_format = r;
459
1d84ad94
LP
460 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_output")) {
461
462 if (proc_cmdline_value_missing(key, value))
463 return 0;
0a494f1f 464
059cb385 465 r = exec_output_from_string(value);
fb472900 466 if (r < 0)
5e1ee764 467 log_warning_errno(r, "Failed to parse default standard output switch %s, ignoring: %m", value);
0a494f1f
LP
468 else
469 arg_default_std_output = r;
0a494f1f 470
1d84ad94
LP
471 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_error")) {
472
473 if (proc_cmdline_value_missing(key, value))
474 return 0;
059cb385
LP
475
476 r = exec_output_from_string(value);
fb472900 477 if (r < 0)
5e1ee764 478 log_warning_errno(r, "Failed to parse default standard error switch %s, ignoring: %m", value);
0a494f1f
LP
479 else
480 arg_default_std_error = r;
9e7c5357 481
1d84ad94
LP
482 } else if (streq(key, "systemd.setenv")) {
483
484 if (proc_cmdline_value_missing(key, value))
485 return 0;
059cb385 486
b70935ac
ZJS
487 if (!env_assignment_is_valid(value))
488 log_warning("Environment variable assignment '%s' is not valid. Ignoring.", value);
489 else {
490 r = strv_env_replace_strdup(&arg_default_environment, value);
491 if (r < 0)
1d84ad94 492 return log_oom();
b70935ac 493 }
9e58ff9c 494
1d84ad94
LP
495 } else if (proc_cmdline_key_streq(key, "systemd.machine_id")) {
496
497 if (proc_cmdline_value_missing(key, value))
498 return 0;
499
500 r = set_machine_id(value);
501 if (r < 0)
5e1ee764 502 log_warning_errno(r, "MachineID '%s' is not valid, ignoring: %m", value);
ee48dbd5 503
1d84ad94
LP
504 } else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) {
505
506 if (proc_cmdline_value_missing(key, value))
507 return 0;
508
509 r = parse_sec(value, &arg_default_timeout_start_usec);
510 if (r < 0)
5e1ee764 511 log_warning_errno(r, "Failed to parse default start timeout '%s', ignoring: %m", value);
1d84ad94
LP
512
513 if (arg_default_timeout_start_usec <= 0)
514 arg_default_timeout_start_usec = USEC_INFINITY;
ee48dbd5 515
68d58f38
LP
516 } else if (proc_cmdline_key_streq(key, "systemd.cpu_affinity")) {
517
518 if (proc_cmdline_value_missing(key, value))
519 return 0;
520
521 r = parse_cpu_set(value, &arg_cpu_affinity);
522 if (r < 0)
162392b7 523 log_warning_errno(r, "Failed to parse CPU affinity mask '%s', ignoring: %m", value);
68d58f38 524
8a2c1fbf
EJ
525 } else if (proc_cmdline_key_streq(key, "systemd.watchdog_device")) {
526
527 if (proc_cmdline_value_missing(key, value))
528 return 0;
529
614b022c 530 (void) parse_path_argument(value, false, &arg_watchdog_device);
8a2c1fbf 531
3753325b
LP
532 } else if (proc_cmdline_key_streq(key, "systemd.clock_usec")) {
533
534 if (proc_cmdline_value_missing(key, value))
535 return 0;
536
537 r = safe_atou64(value, &arg_clock_usec);
538 if (r < 0)
539 log_warning_errno(r, "Failed to parse systemd.clock_usec= argument, ignoring: %s", value);
540
d247f232
LP
541 } else if (proc_cmdline_key_streq(key, "systemd.random_seed")) {
542 void *p;
543 size_t sz;
544
545 if (proc_cmdline_value_missing(key, value))
546 return 0;
547
f5fbe71d 548 r = unbase64mem(value, SIZE_MAX, &p, &sz);
d247f232
LP
549 if (r < 0)
550 log_warning_errno(r, "Failed to parse systemd.random_seed= argument, ignoring: %s", value);
551
552 free(arg_random_seed);
553 arg_random_seed = sz > 0 ? p : mfree(p);
554 arg_random_seed_size = sz;
555
059cb385 556 } else if (streq(key, "quiet") && !value) {
d7b15e0a 557
7a293242 558 if (arg_show_status == _SHOW_STATUS_INVALID)
0d066dd1 559 arg_show_status = SHOW_STATUS_ERROR;
059cb385
LP
560
561 } else if (streq(key, "debug") && !value) {
d7b15e0a 562
1de1c9c3
LP
563 /* Note that log_parse_environment() handles 'debug'
564 * too, and sets the log level to LOG_DEBUG. */
d7b15e0a 565
75f86906 566 if (detect_container() > 0)
b2103dcc 567 log_set_target(LOG_TARGET_CONSOLE);
059cb385 568
dcd61450 569 } else if (!value) {
e2c9a131 570 const char *target;
f170852a 571
ceae6295 572 /* Compatible with SysV, but supported independently even if SysV compatibility is disabled. */
e2c9a131
EV
573 target = runlevel_to_target(key);
574 if (target)
cd57038a 575 return free_and_strdup_warn(&arg_default_unit, target);
f170852a
LP
576 }
577
578 return 0;
579}
580
e8e581bf
ZJS
581#define DEFINE_SETTER(name, func, descr) \
582 static int name(const char *unit, \
583 const char *filename, \
584 unsigned line, \
585 const char *section, \
71a61510 586 unsigned section_line, \
e8e581bf
ZJS
587 const char *lvalue, \
588 int ltype, \
589 const char *rvalue, \
590 void *data, \
591 void *userdata) { \
592 \
593 int r; \
594 \
595 assert(filename); \
596 assert(lvalue); \
597 assert(rvalue); \
598 \
599 r = func(rvalue); \
600 if (r < 0) \
d1cefe0a
LP
601 log_syntax(unit, LOG_ERR, filename, line, r, \
602 "Invalid " descr "'%s': %m", \
603 rvalue); \
e8e581bf
ZJS
604 \
605 return 0; \
606 }
487393e9 607
a6ecbf83
FB
608DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level");
609DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target");
c5673ed0 610DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color");
a6ecbf83 611DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location");
c5673ed0 612DEFINE_SETTER(config_parse_time, log_show_time_from_string, "time");
487393e9 613
a61d6874
ZJS
614static int config_parse_default_timeout_abort(
615 const char *unit,
616 const char *filename,
617 unsigned line,
618 const char *section,
619 unsigned section_line,
620 const char *lvalue,
621 int ltype,
622 const char *rvalue,
623 void *data,
624 void *userdata) {
625 int r;
626
627 r = config_parse_timeout_abort(unit, filename, line, section, section_line, lvalue, ltype, rvalue,
628 &arg_default_timeout_abort_usec, userdata);
629 if (r >= 0)
630 arg_default_timeout_abort_set = r;
631 return 0;
632}
487393e9 633
a61d6874 634static int parse_config_file(void) {
f975e971 635 const ConfigTableItem items[] = {
a61d6874
ZJS
636 { "Manager", "LogLevel", config_parse_level2, 0, NULL },
637 { "Manager", "LogTarget", config_parse_target, 0, NULL },
638 { "Manager", "LogColor", config_parse_color, 0, NULL },
639 { "Manager", "LogLocation", config_parse_location, 0, NULL },
c5673ed0 640 { "Manager", "LogTime", config_parse_time, 0, NULL },
a61d6874
ZJS
641 { "Manager", "DumpCore", config_parse_bool, 0, &arg_dump_core },
642 { "Manager", "CrashChVT", /* legacy */ config_parse_crash_chvt, 0, &arg_crash_chvt },
643 { "Manager", "CrashChangeVT", config_parse_crash_chvt, 0, &arg_crash_chvt },
644 { "Manager", "CrashShell", config_parse_bool, 0, &arg_crash_shell },
645 { "Manager", "CrashReboot", config_parse_bool, 0, &arg_crash_reboot },
646 { "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status },
647 { "Manager", "StatusUnitFormat", config_parse_status_unit_format, 0, &arg_status_unit_format },
648 { "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, &arg_cpu_affinity },
649 { "Manager", "NUMAPolicy", config_parse_numa_policy, 0, &arg_numa_policy.type },
650 { "Manager", "NUMAMask", config_parse_numa_mask, 0, &arg_numa_policy },
651 { "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL },
652 { "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog },
653 { "Manager", "RebootWatchdogSec", config_parse_sec, 0, &arg_reboot_watchdog },
654 { "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_reboot_watchdog }, /* obsolete alias */
655 { "Manager", "KExecWatchdogSec", config_parse_sec, 0, &arg_kexec_watchdog },
656 { "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
657 { "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
658 { "Manager", "NoNewPrivileges", config_parse_bool, 0, &arg_no_new_privs },
349cc4a5 659#if HAVE_SECCOMP
a61d6874 660 { "Manager", "SystemCallArchitectures", config_parse_syscall_archs, 0, &arg_syscall_archs },
89fffa27 661#endif
a61d6874
ZJS
662 { "Manager", "TimerSlackNSec", config_parse_nsec, 0, &arg_timer_slack_nsec },
663 { "Manager", "DefaultTimerAccuracySec", config_parse_sec, 0, &arg_default_timer_accuracy_usec },
664 { "Manager", "DefaultStandardOutput", config_parse_output_restricted, 0, &arg_default_std_output },
665 { "Manager", "DefaultStandardError", config_parse_output_restricted, 0, &arg_default_std_error },
666 { "Manager", "DefaultTimeoutStartSec", config_parse_sec, 0, &arg_default_timeout_start_usec },
667 { "Manager", "DefaultTimeoutStopSec", config_parse_sec, 0, &arg_default_timeout_stop_usec },
668 { "Manager", "DefaultTimeoutAbortSec", config_parse_default_timeout_abort, 0, NULL },
669 { "Manager", "DefaultRestartSec", config_parse_sec, 0, &arg_default_restart_usec },
670 { "Manager", "DefaultStartLimitInterval", config_parse_sec, 0, &arg_default_start_limit_interval }, /* obsolete alias */
671 { "Manager", "DefaultStartLimitIntervalSec", config_parse_sec, 0, &arg_default_start_limit_interval },
672 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned, 0, &arg_default_start_limit_burst },
673 { "Manager", "DefaultEnvironment", config_parse_environ, 0, &arg_default_environment },
d55ed7de 674 { "Manager", "ManagerEnvironment", config_parse_environ, 0, &arg_manager_environment },
a61d6874
ZJS
675 { "Manager", "DefaultLimitCPU", config_parse_rlimit, RLIMIT_CPU, arg_default_rlimit },
676 { "Manager", "DefaultLimitFSIZE", config_parse_rlimit, RLIMIT_FSIZE, arg_default_rlimit },
677 { "Manager", "DefaultLimitDATA", config_parse_rlimit, RLIMIT_DATA, arg_default_rlimit },
678 { "Manager", "DefaultLimitSTACK", config_parse_rlimit, RLIMIT_STACK, arg_default_rlimit },
679 { "Manager", "DefaultLimitCORE", config_parse_rlimit, RLIMIT_CORE, arg_default_rlimit },
680 { "Manager", "DefaultLimitRSS", config_parse_rlimit, RLIMIT_RSS, arg_default_rlimit },
681 { "Manager", "DefaultLimitNOFILE", config_parse_rlimit, RLIMIT_NOFILE, arg_default_rlimit },
682 { "Manager", "DefaultLimitAS", config_parse_rlimit, RLIMIT_AS, arg_default_rlimit },
683 { "Manager", "DefaultLimitNPROC", config_parse_rlimit, RLIMIT_NPROC, arg_default_rlimit },
684 { "Manager", "DefaultLimitMEMLOCK", config_parse_rlimit, RLIMIT_MEMLOCK, arg_default_rlimit },
685 { "Manager", "DefaultLimitLOCKS", config_parse_rlimit, RLIMIT_LOCKS, arg_default_rlimit },
686 { "Manager", "DefaultLimitSIGPENDING", config_parse_rlimit, RLIMIT_SIGPENDING, arg_default_rlimit },
687 { "Manager", "DefaultLimitMSGQUEUE", config_parse_rlimit, RLIMIT_MSGQUEUE, arg_default_rlimit },
688 { "Manager", "DefaultLimitNICE", config_parse_rlimit, RLIMIT_NICE, arg_default_rlimit },
689 { "Manager", "DefaultLimitRTPRIO", config_parse_rlimit, RLIMIT_RTPRIO, arg_default_rlimit },
690 { "Manager", "DefaultLimitRTTIME", config_parse_rlimit, RLIMIT_RTTIME, arg_default_rlimit },
691 { "Manager", "DefaultCPUAccounting", config_parse_tristate, 0, &arg_default_cpu_accounting },
692 { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
693 { "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting },
694 { "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
695 { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
696 { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
697 { "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max },
698 { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action },
699 { "Manager", "DefaultOOMPolicy", config_parse_oom_policy, 0, &arg_default_oom_policy },
d3b1c508 700 {}
487393e9
LP
701 };
702
e94a009c
YW
703 _cleanup_strv_free_ char **files = NULL, **dirs = NULL;
704 const char *suffix;
f70e6fb4 705 int r;
75eb6154 706
e94a009c 707 if (arg_system)
f70e6fb4 708 suffix = "system.conf.d";
e94a009c
YW
709 else {
710 r = manager_find_user_config_paths(&files, &dirs);
f70e6fb4
ZJS
711 if (r < 0)
712 return log_error_errno(r, "Failed to determine config file paths: %m");
e94a009c 713
f70e6fb4
ZJS
714 suffix = "user.conf.d";
715 }
75eb6154 716
f70e6fb4 717 (void) config_parse_many(
e94a009c
YW
718 (const char* const*) (files ?: STRV_MAKE(PKGSYSCONFDIR "/system.conf")),
719 (const char* const*) (dirs ?: CONF_PATHS_STRV("systemd")),
720 suffix,
4f9ff96a
LP
721 "Manager\0",
722 config_item_table_lookup, items,
723 CONFIG_PARSE_WARN,
724 NULL,
725 NULL);
36c16a7c 726
f70e6fb4
ZJS
727 /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we use
728 * USEC_INFINITY like everywhere else. */
36c16a7c
LP
729 if (arg_default_timeout_start_usec <= 0)
730 arg_default_timeout_start_usec = USEC_INFINITY;
731 if (arg_default_timeout_stop_usec <= 0)
732 arg_default_timeout_stop_usec = USEC_INFINITY;
487393e9 733
487393e9
LP
734 return 0;
735}
736
85cb4151 737static void set_manager_defaults(Manager *m) {
06af2a04
TB
738
739 assert(m);
740
5b65ae15
LP
741 /* Propagates the various default unit property settings into the manager object, i.e. properties that do not
742 * affect the manager itself, but are just what newly allocated units will have set if they haven't set
743 * anything else. (Also see set_manager_settings() for the settings that affect the manager's own behaviour) */
744
06af2a04
TB
745 m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec;
746 m->default_std_output = arg_default_std_output;
747 m->default_std_error = arg_default_std_error;
748 m->default_timeout_start_usec = arg_default_timeout_start_usec;
749 m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
dc653bf4
JK
750 m->default_timeout_abort_usec = arg_default_timeout_abort_usec;
751 m->default_timeout_abort_set = arg_default_timeout_abort_set;
06af2a04
TB
752 m->default_restart_usec = arg_default_restart_usec;
753 m->default_start_limit_interval = arg_default_start_limit_interval;
754 m->default_start_limit_burst = arg_default_start_limit_burst;
a88c5b8a
CD
755
756 /* On 4.15+ with unified hierarchy, CPU accounting is essentially free as it doesn't require the CPU
757 * controller to be enabled, so the default is to enable it unless we got told otherwise. */
758 if (arg_default_cpu_accounting >= 0)
759 m->default_cpu_accounting = arg_default_cpu_accounting;
760 else
761 m->default_cpu_accounting = cpu_accounting_is_cheap();
762
13c31542 763 m->default_io_accounting = arg_default_io_accounting;
377bfd2d 764 m->default_ip_accounting = arg_default_ip_accounting;
06af2a04
TB
765 m->default_blockio_accounting = arg_default_blockio_accounting;
766 m->default_memory_accounting = arg_default_memory_accounting;
03a7b521 767 m->default_tasks_accounting = arg_default_tasks_accounting;
0af20ea2 768 m->default_tasks_max = arg_default_tasks_max;
afcfaa69 769 m->default_oom_policy = arg_default_oom_policy;
06af2a04 770
79a224c4
LP
771 (void) manager_set_default_rlimits(m, arg_default_rlimit);
772
773 (void) manager_default_environment(m);
774 (void) manager_transient_environment_add(m, arg_default_environment);
06af2a04
TB
775}
776
7b46fc6a
LP
777static void set_manager_settings(Manager *m) {
778
779 assert(m);
780
986935cf
FB
781 /* Propagates the various manager settings into the manager object, i.e. properties that
782 * effect the manager itself (as opposed to just being inherited into newly allocated
783 * units, see set_manager_defaults() above). */
5b65ae15 784
7b46fc6a 785 m->confirm_spawn = arg_confirm_spawn;
2a12e32e 786 m->service_watchdogs = arg_service_watchdogs;
7b46fc6a
LP
787 m->cad_burst_action = arg_cad_burst_action;
788
986935cf
FB
789 manager_set_watchdog(m, WATCHDOG_RUNTIME, arg_runtime_watchdog);
790 manager_set_watchdog(m, WATCHDOG_REBOOT, arg_reboot_watchdog);
791 manager_set_watchdog(m, WATCHDOG_KEXEC, arg_kexec_watchdog);
792
7365a296 793 manager_set_show_status(m, arg_show_status, "commandline");
36cf4507 794 m->status_unit_format = arg_status_unit_format;
7b46fc6a
LP
795}
796
f170852a 797static int parse_argv(int argc, char *argv[]) {
f170852a
LP
798 enum {
799 ARG_LOG_LEVEL = 0x100,
800 ARG_LOG_TARGET,
bbe63281
LP
801 ARG_LOG_COLOR,
802 ARG_LOG_LOCATION,
c5673ed0 803 ARG_LOG_TIME,
2f198e2f 804 ARG_UNIT,
edb9aaa8 805 ARG_SYSTEM,
af2d49f7 806 ARG_USER,
e537352b 807 ARG_TEST,
b87c2aa6 808 ARG_NO_PAGER,
9ba0bc4e 809 ARG_VERSION,
80876c20 810 ARG_DUMP_CONFIGURATION_ITEMS,
bbc1acab 811 ARG_DUMP_BUS_PROPERTIES,
5c08257b 812 ARG_BUS_INTROSPECT,
9e58ff9c 813 ARG_DUMP_CORE,
b9e74c39 814 ARG_CRASH_CHVT,
9e58ff9c 815 ARG_CRASH_SHELL,
b9e74c39 816 ARG_CRASH_REBOOT,
a16e1123 817 ARG_CONFIRM_SPAWN,
9e58ff9c 818 ARG_SHOW_STATUS,
4288f619 819 ARG_DESERIALIZE,
2660882b 820 ARG_SWITCHED_ROOT,
0a494f1f 821 ARG_DEFAULT_STD_OUTPUT,
ee48dbd5 822 ARG_DEFAULT_STD_ERROR,
2a12e32e
JK
823 ARG_MACHINE_ID,
824 ARG_SERVICE_WATCHDOGS,
f170852a
LP
825 };
826
827 static const struct option options[] = {
a16e1123
LP
828 { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
829 { "log-target", required_argument, NULL, ARG_LOG_TARGET },
bbe63281
LP
830 { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
831 { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
c5673ed0 832 { "log-time", optional_argument, NULL, ARG_LOG_TIME },
2f198e2f 833 { "unit", required_argument, NULL, ARG_UNIT },
edb9aaa8 834 { "system", no_argument, NULL, ARG_SYSTEM },
af2d49f7 835 { "user", no_argument, NULL, ARG_USER },
a16e1123 836 { "test", no_argument, NULL, ARG_TEST },
b87c2aa6 837 { "no-pager", no_argument, NULL, ARG_NO_PAGER },
a16e1123 838 { "help", no_argument, NULL, 'h' },
9ba0bc4e 839 { "version", no_argument, NULL, ARG_VERSION },
a16e1123 840 { "dump-configuration-items", no_argument, NULL, ARG_DUMP_CONFIGURATION_ITEMS },
bbc1acab 841 { "dump-bus-properties", no_argument, NULL, ARG_DUMP_BUS_PROPERTIES },
5c08257b 842 { "bus-introspect", required_argument, NULL, ARG_BUS_INTROSPECT },
a5d87bf0 843 { "dump-core", optional_argument, NULL, ARG_DUMP_CORE },
b9e74c39 844 { "crash-chvt", required_argument, NULL, ARG_CRASH_CHVT },
a5d87bf0 845 { "crash-shell", optional_argument, NULL, ARG_CRASH_SHELL },
b9e74c39 846 { "crash-reboot", optional_argument, NULL, ARG_CRASH_REBOOT },
a5d87bf0 847 { "confirm-spawn", optional_argument, NULL, ARG_CONFIRM_SPAWN },
6e98720f 848 { "show-status", optional_argument, NULL, ARG_SHOW_STATUS },
a16e1123 849 { "deserialize", required_argument, NULL, ARG_DESERIALIZE },
2660882b 850 { "switched-root", no_argument, NULL, ARG_SWITCHED_ROOT },
0a494f1f
LP
851 { "default-standard-output", required_argument, NULL, ARG_DEFAULT_STD_OUTPUT, },
852 { "default-standard-error", required_argument, NULL, ARG_DEFAULT_STD_ERROR, },
ee48dbd5 853 { "machine-id", required_argument, NULL, ARG_MACHINE_ID },
2a12e32e 854 { "service-watchdogs", required_argument, NULL, ARG_SERVICE_WATCHDOGS },
fb472900 855 {}
f170852a
LP
856 };
857
858 int c, r;
9a9ca408 859 bool user_arg_seen = false;
f170852a
LP
860
861 assert(argc >= 1);
862 assert(argv);
863
df0ff127 864 if (getpid_cached() == 1)
b770165a
LP
865 opterr = 0;
866
099663ff 867 while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
f170852a
LP
868
869 switch (c) {
870
871 case ARG_LOG_LEVEL:
fb472900 872 r = log_set_max_level_from_string(optarg);
2b5107e1
ZJS
873 if (r < 0)
874 return log_error_errno(r, "Failed to parse log level \"%s\": %m", optarg);
f170852a
LP
875
876 break;
877
878 case ARG_LOG_TARGET:
fb472900 879 r = log_set_target_from_string(optarg);
2b5107e1
ZJS
880 if (r < 0)
881 return log_error_errno(r, "Failed to parse log target \"%s\": %m", optarg);
f170852a
LP
882
883 break;
884
bbe63281
LP
885 case ARG_LOG_COLOR:
886
d0b170c8 887 if (optarg) {
fb472900 888 r = log_show_color_from_string(optarg);
2b5107e1
ZJS
889 if (r < 0)
890 return log_error_errno(r, "Failed to parse log color setting \"%s\": %m",
891 optarg);
d0b170c8
LP
892 } else
893 log_show_color(true);
bbe63281
LP
894
895 break;
896
897 case ARG_LOG_LOCATION:
d0b170c8 898 if (optarg) {
fb472900 899 r = log_show_location_from_string(optarg);
2b5107e1
ZJS
900 if (r < 0)
901 return log_error_errno(r, "Failed to parse log location setting \"%s\": %m",
902 optarg);
d0b170c8
LP
903 } else
904 log_show_location(true);
bbe63281
LP
905
906 break;
907
c5673ed0
DS
908 case ARG_LOG_TIME:
909
910 if (optarg) {
911 r = log_show_time_from_string(optarg);
912 if (r < 0)
913 return log_error_errno(r, "Failed to parse log time setting \"%s\": %m",
914 optarg);
915 } else
916 log_show_time(true);
917
918 break;
919
0a494f1f 920 case ARG_DEFAULT_STD_OUTPUT:
fb472900 921 r = exec_output_from_string(optarg);
2b5107e1
ZJS
922 if (r < 0)
923 return log_error_errno(r, "Failed to parse default standard output setting \"%s\": %m",
924 optarg);
925 arg_default_std_output = r;
0a494f1f
LP
926 break;
927
928 case ARG_DEFAULT_STD_ERROR:
fb472900 929 r = exec_output_from_string(optarg);
2b5107e1
ZJS
930 if (r < 0)
931 return log_error_errno(r, "Failed to parse default standard error output setting \"%s\": %m",
932 optarg);
933 arg_default_std_error = r;
0a494f1f
LP
934 break;
935
2f198e2f 936 case ARG_UNIT:
e6e242ad 937 r = free_and_strdup(&arg_default_unit, optarg);
23bbb0de 938 if (r < 0)
2b5107e1 939 return log_error_errno(r, "Failed to set default unit \"%s\": %m", optarg);
f170852a
LP
940
941 break;
942
edb9aaa8 943 case ARG_SYSTEM:
463d0d15 944 arg_system = true;
edb9aaa8 945 break;
a5dab5ce 946
af2d49f7 947 case ARG_USER:
463d0d15 948 arg_system = false;
9a9ca408 949 user_arg_seen = true;
a5dab5ce 950 break;
a5dab5ce 951
e965d56d 952 case ARG_TEST:
fa0f4d8a 953 arg_action = ACTION_TEST;
b87c2aa6
ZJS
954 break;
955
956 case ARG_NO_PAGER:
0221d68a 957 arg_pager_flags |= PAGER_DISABLE;
e965d56d
LP
958 break;
959
9ba0bc4e
ZJS
960 case ARG_VERSION:
961 arg_action = ACTION_VERSION;
962 break;
963
e537352b 964 case ARG_DUMP_CONFIGURATION_ITEMS:
fa0f4d8a 965 arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
e537352b
LP
966 break;
967
bbc1acab
YW
968 case ARG_DUMP_BUS_PROPERTIES:
969 arg_action = ACTION_DUMP_BUS_PROPERTIES;
970 break;
971
5c08257b
ZJS
972 case ARG_BUS_INTROSPECT:
973 arg_bus_introspect = optarg;
974 arg_action = ACTION_BUS_INTROSPECT;
975 break;
976
9e58ff9c 977 case ARG_DUMP_CORE:
599c7c54
ZJS
978 r = parse_boolean_argument("--dump-core", optarg, &arg_dump_core);
979 if (r < 0)
980 return r;
b9e74c39
LP
981 break;
982
983 case ARG_CRASH_CHVT:
a07a7324 984 r = parse_crash_chvt(optarg, &arg_crash_chvt);
b9e74c39 985 if (r < 0)
2b5107e1
ZJS
986 return log_error_errno(r, "Failed to parse crash virtual terminal index: \"%s\": %m",
987 optarg);
9e58ff9c
LP
988 break;
989
990 case ARG_CRASH_SHELL:
599c7c54
ZJS
991 r = parse_boolean_argument("--crash-shell", optarg, &arg_crash_shell);
992 if (r < 0)
993 return r;
b9e74c39
LP
994 break;
995
996 case ARG_CRASH_REBOOT:
599c7c54
ZJS
997 r = parse_boolean_argument("--crash-reboot", optarg, &arg_crash_reboot);
998 if (r < 0)
999 return r;
9e58ff9c
LP
1000 break;
1001
80876c20 1002 case ARG_CONFIRM_SPAWN:
7d5ceb64
FB
1003 arg_confirm_spawn = mfree(arg_confirm_spawn);
1004
1005 r = parse_confirm_spawn(optarg, &arg_confirm_spawn);
1006 if (r < 0)
2b5107e1
ZJS
1007 return log_error_errno(r, "Failed to parse confirm spawn option: \"%s\": %m",
1008 optarg);
80876c20
LP
1009 break;
1010
2a12e32e 1011 case ARG_SERVICE_WATCHDOGS:
599c7c54 1012 r = parse_boolean_argument("--service-watchdogs=", optarg, &arg_service_watchdogs);
2a12e32e 1013 if (r < 0)
599c7c54 1014 return r;
2a12e32e
JK
1015 break;
1016
9e58ff9c 1017 case ARG_SHOW_STATUS:
d450b6f2
ZJS
1018 if (optarg) {
1019 r = parse_show_status(optarg, &arg_show_status);
ac7ec288 1020 if (r < 0)
2b5107e1
ZJS
1021 return log_error_errno(r, "Failed to parse show status boolean: \"%s\": %m",
1022 optarg);
d450b6f2
ZJS
1023 } else
1024 arg_show_status = SHOW_STATUS_YES;
6e98720f 1025 break;
a5d87bf0 1026
a16e1123
LP
1027 case ARG_DESERIALIZE: {
1028 int fd;
1029 FILE *f;
1030
01e10de3 1031 r = safe_atoi(optarg, &fd);
2b5107e1
ZJS
1032 if (r < 0)
1033 log_error_errno(r, "Failed to parse deserialize option \"%s\": %m", optarg);
baaa35ad
ZJS
1034 if (fd < 0)
1035 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1036 "Invalid deserialize fd: %d",
1037 fd);
a16e1123 1038
b9e74c39 1039 (void) fd_cloexec(fd, true);
01e10de3
LP
1040
1041 f = fdopen(fd, "r");
4a62c710 1042 if (!f)
2b5107e1 1043 return log_error_errno(errno, "Failed to open serialization fd %d: %m", fd);
a16e1123 1044
74ca738f 1045 safe_fclose(arg_serialization);
d3b1c508 1046 arg_serialization = f;
a16e1123
LP
1047
1048 break;
1049 }
1050
2660882b 1051 case ARG_SWITCHED_ROOT:
bf4df7c3 1052 arg_switched_root = true;
d03bc1b8
HH
1053 break;
1054
ee48dbd5
NC
1055 case ARG_MACHINE_ID:
1056 r = set_machine_id(optarg);
54500613 1057 if (r < 0)
2b5107e1 1058 return log_error_errno(r, "MachineID '%s' is not valid: %m", optarg);
ee48dbd5
NC
1059 break;
1060
f170852a 1061 case 'h':
fa0f4d8a 1062 arg_action = ACTION_HELP;
f170852a
LP
1063 break;
1064
1d2e23ab
LP
1065 case 'D':
1066 log_set_max_level(LOG_DEBUG);
1067 break;
1068
099663ff
LP
1069 case 'b':
1070 case 's':
1071 case 'z':
cd57038a
ZJS
1072 /* Just to eat away the sysvinit kernel cmdline args that we'll parse in
1073 * parse_proc_cmdline_item() or ignore, without any getopt() error messages.
1074 */
099663ff 1075 case '?':
df0ff127 1076 if (getpid_cached() != 1)
099663ff 1077 return -EINVAL;
601185b4
ZJS
1078 else
1079 return 0;
099663ff 1080
601185b4
ZJS
1081 default:
1082 assert_not_reached("Unhandled option code.");
f170852a
LP
1083 }
1084
d7a0f1f4 1085 if (optind < argc && getpid_cached() != 1)
9a9ca408
ZJS
1086 /* Hmm, when we aren't run as init system let's complain about excess arguments */
1087 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Excess arguments.");
1088
1089 if (arg_action == ACTION_RUN && !arg_system && !user_arg_seen)
baaa35ad 1090 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
9a9ca408 1091 "Explicit --user argument required to run as user manager.");
d821e6d6 1092
f170852a
LP
1093 return 0;
1094}
1095
1096static int help(void) {
37ec0fdd
LP
1097 _cleanup_free_ char *link = NULL;
1098 int r;
1099
1100 r = terminal_urlify_man("systemd", "1", &link);
1101 if (r < 0)
1102 return log_oom();
f170852a 1103
2e33c433 1104 printf("%s [OPTIONS...]\n\n"
7ae47326
ZJS
1105 "%sStarts and monitors system and user services.%s\n\n"
1106 "This program takes no positional arguments.\n\n"
1107 "%sOptions%s:\n"
e537352b 1108 " -h --help Show this help\n"
cb4069d9 1109 " --version Show version\n"
cd69e88b
LP
1110 " --test Determine initial transaction, dump it and exit\n"
1111 " --system In combination with --test: operate as system service manager\n"
1112 " --user In combination with --test: operate as per-user service manager\n"
b87c2aa6 1113 " --no-pager Do not pipe output into a pager\n"
80876c20 1114 " --dump-configuration-items Dump understood unit configuration items\n"
bbc1acab 1115 " --dump-bus-properties Dump exposed bus properties\n"
5c08257b 1116 " --bus-introspect=PATH Write XML introspection data\n"
9e58ff9c 1117 " --unit=UNIT Set default unit\n"
b9e74c39
LP
1118 " --dump-core[=BOOL] Dump core on crash\n"
1119 " --crash-vt=NR Change to specified VT on crash\n"
1120 " --crash-reboot[=BOOL] Reboot on crash\n"
1121 " --crash-shell[=BOOL] Run shell on crash\n"
1122 " --confirm-spawn[=BOOL] Ask for confirmation when spawning processes\n"
1123 " --show-status[=BOOL] Show status updates on the console during bootup\n"
c1dc6153 1124 " --log-target=TARGET Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
9e58ff9c 1125 " --log-level=LEVEL Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
b9e74c39
LP
1126 " --log-color[=BOOL] Highlight important log messages\n"
1127 " --log-location[=BOOL] Include code location in log messages\n"
c5673ed0 1128 " --log-time[=BOOL] Prefix log messages with current time\n"
0a494f1f 1129 " --default-standard-output= Set default standard output for services\n"
37ec0fdd 1130 " --default-standard-error= Set default standard error output for services\n"
bc556335
DDM
1131 "\nSee the %s for details.\n",
1132 program_invocation_short_name,
1133 ansi_highlight(),
1134 ansi_normal(),
1135 ansi_underline(),
1136 ansi_normal(),
1137 link);
f170852a
LP
1138
1139 return 0;
1140}
1141
2cc856ac
LP
1142static int prepare_reexecute(
1143 Manager *m,
1144 FILE **ret_f,
1145 FDSet **ret_fds,
1146 bool switching_root) {
1147
48b90859
LP
1148 _cleanup_fdset_free_ FDSet *fds = NULL;
1149 _cleanup_fclose_ FILE *f = NULL;
a16e1123
LP
1150 int r;
1151
1152 assert(m);
2cc856ac
LP
1153 assert(ret_f);
1154 assert(ret_fds);
a16e1123 1155
6b78f9b4 1156 r = manager_open_serialization(m, &f);
48b90859
LP
1157 if (r < 0)
1158 return log_error_errno(r, "Failed to create serialization file: %m");
a16e1123 1159
71445ae7 1160 /* Make sure nothing is really destructed when we shut down */
313cefa1 1161 m->n_reloading++;
718db961 1162 bus_manager_send_reloading(m, true);
71445ae7 1163
6b78f9b4 1164 fds = fdset_new();
48b90859
LP
1165 if (!fds)
1166 return log_oom();
a16e1123 1167
b3680f49 1168 r = manager_serialize(m, f, fds, switching_root);
48b90859 1169 if (r < 0)
d68c645b 1170 return r;
a16e1123 1171
48b90859
LP
1172 if (fseeko(f, 0, SEEK_SET) == (off_t) -1)
1173 return log_error_errno(errno, "Failed to rewind serialization fd: %m");
a16e1123 1174
6b78f9b4 1175 r = fd_cloexec(fileno(f), false);
48b90859
LP
1176 if (r < 0)
1177 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
a16e1123 1178
6b78f9b4 1179 r = fdset_cloexec(fds, false);
48b90859
LP
1180 if (r < 0)
1181 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
a16e1123 1182
2cc856ac
LP
1183 *ret_f = TAKE_PTR(f);
1184 *ret_fds = TAKE_PTR(fds);
a16e1123 1185
48b90859 1186 return 0;
a16e1123
LP
1187}
1188
a8b627aa
LP
1189static void bump_file_max_and_nr_open(void) {
1190
1191 /* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large numbers of file
1192 * descriptors are no longer a performance problem and their memory is properly tracked by memcg, thus counting
1193 * them and limiting them in another two layers of limits is unnecessary and just complicates things. This
1194 * function hence turns off 2 of the 4 levels of limits on file descriptors, and makes RLIMIT_NOLIMIT (soft +
1195 * hard) the only ones that really matter. */
1196
1197#if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN
a8b627aa
LP
1198 int r;
1199#endif
1200
1201#if BUMP_PROC_SYS_FS_FILE_MAX
409607c1
ZJS
1202 /* The maximum the kernel allows for this since 5.2 is LONG_MAX, use that. (Previously things were
1203 * different, but the operation would fail silently.) */
56e8419a 1204 r = sysctl_writef("fs/file-max", "%li\n", LONG_MAX);
a8b627aa
LP
1205 if (r < 0)
1206 log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.file-max, ignoring: %m");
1207#endif
1208
a8b627aa
LP
1209#if BUMP_PROC_SYS_FS_NR_OPEN
1210 int v = INT_MAX;
1211
1212 /* Arg! The kernel enforces maximum and minimum values on the fs.nr_open, but we don't really know what they
1213 * are. The expression by which the maximum is determined is dependent on the architecture, and is something we
1214 * don't really want to copy to userspace, as it is dependent on implementation details of the kernel. Since
1215 * the kernel doesn't expose the maximum value to us, we can only try and hope. Hence, let's start with
1216 * INT_MAX, and then keep halving the value until we find one that works. Ugly? Yes, absolutely, but kernel
1217 * APIs are kernel APIs, so what do can we do... 🤯 */
1218
1219 for (;;) {
1220 int k;
1221
1222 v &= ~(__SIZEOF_POINTER__ - 1); /* Round down to next multiple of the pointer size */
1223 if (v < 1024) {
1224 log_warning("Can't bump fs.nr_open, value too small.");
1225 break;
1226 }
1227
1228 k = read_nr_open();
1229 if (k < 0) {
1230 log_error_errno(k, "Failed to read fs.nr_open: %m");
1231 break;
1232 }
1233 if (k >= v) { /* Already larger */
1234 log_debug("Skipping bump, value is already larger.");
1235 break;
1236 }
1237
56e8419a 1238 r = sysctl_writef("fs/nr_open", "%i\n", v);
a8b627aa
LP
1239 if (r == -EINVAL) {
1240 log_debug("Couldn't write fs.nr_open as %i, halving it.", v);
1241 v /= 2;
1242 continue;
1243 }
1244 if (r < 0) {
1245 log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.nr_open, ignoring: %m");
1246 break;
1247 }
1248
1249 log_debug("Successfully bumped fs.nr_open to %i", v);
1250 break;
1251 }
1252#endif
1253}
1254
4096d6f5 1255static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
cda7faa9 1256 struct rlimit new_rlimit;
9264cc39 1257 int r, nr;
4096d6f5 1258
52d62075
LP
1259 /* Get the underlying absolute limit the kernel enforces */
1260 nr = read_nr_open();
1261
cda7faa9
LP
1262 /* Calculate the new limits to use for us. Never lower from what we inherited. */
1263 new_rlimit = (struct rlimit) {
1264 .rlim_cur = MAX((rlim_t) nr, saved_rlimit->rlim_cur),
1265 .rlim_max = MAX((rlim_t) nr, saved_rlimit->rlim_max),
1266 };
1267
1268 /* Shortcut if nothing changes. */
1269 if (saved_rlimit->rlim_max >= new_rlimit.rlim_max &&
1270 saved_rlimit->rlim_cur >= new_rlimit.rlim_cur) {
1271 log_debug("RLIMIT_NOFILE is already as high or higher than we need it, not bumping.");
1272 return 0;
1273 }
1274
52d62075
LP
1275 /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows, for
1276 * both hard and soft. */
cda7faa9 1277 r = setrlimit_closest(RLIMIT_NOFILE, &new_rlimit);
23bbb0de 1278 if (r < 0)
3ce40911 1279 return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
4096d6f5
LP
1280
1281 return 0;
1282}
1283
fb3ae275 1284static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
cda7faa9 1285 struct rlimit new_rlimit;
04d1ee0f 1286 uint64_t mm;
fb3ae275
LP
1287 int r;
1288
a17c1712 1289 /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even if we have CAP_IPC_LOCK which should
6e3c443b 1290 * normally disable such checks. We need them to implement IPAddressAllow= and IPAddressDeny=, hence let's bump
a17c1712 1291 * the value high enough for our user. */
fb3ae275 1292
cda7faa9
LP
1293 /* Using MAX() on resource limits only is safe if RLIM_INFINITY is > 0. POSIX declares that rlim_t
1294 * must be unsigned, hence this is a given, but let's make this clear here. */
1295 assert_cc(RLIM_INFINITY > 0);
1296
60dcf3dc
LP
1297 mm = physical_memory_scale(1, 8); /* Let's scale how much we allow to be locked by the amount of physical
1298 * RAM. We allow an eighth to be locked by us, just to pick a value. */
04d1ee0f 1299
cda7faa9 1300 new_rlimit = (struct rlimit) {
04d1ee0f
LP
1301 .rlim_cur = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_cur, mm),
1302 .rlim_max = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_max, mm),
cda7faa9
LP
1303 };
1304
1305 if (saved_rlimit->rlim_max >= new_rlimit.rlim_cur &&
1306 saved_rlimit->rlim_cur >= new_rlimit.rlim_max) {
1307 log_debug("RLIMIT_MEMLOCK is already as high or higher than we need it, not bumping.");
1308 return 0;
1309 }
1310
1311 r = setrlimit_closest(RLIMIT_MEMLOCK, &new_rlimit);
fb3ae275
LP
1312 if (r < 0)
1313 return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
1314
1315 return 0;
1316}
1317
80758717 1318static void test_usr(void) {
80758717 1319
796ac4c1 1320 /* Check that /usr is either on the same file system as / or mounted already. */
80758717 1321
871c44a7
LP
1322 if (dir_is_empty("/usr") <= 0)
1323 return;
1324
8b173b5e 1325 log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
871c44a7
LP
1326 "Some things will probably break (sometimes even silently) in mysterious ways. "
1327 "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1328}
1329
d3b1c508 1330static int enforce_syscall_archs(Set *archs) {
349cc4a5 1331#if HAVE_SECCOMP
d3b1c508
LP
1332 int r;
1333
83f12b27
FS
1334 if (!is_seccomp_available())
1335 return 0;
1336
469830d1 1337 r = seccomp_restrict_archs(arg_syscall_archs);
d3b1c508 1338 if (r < 0)
469830d1 1339 return log_error_errno(r, "Failed to enforce system call architecture restrication: %m");
d3b1c508 1340#endif
469830d1 1341 return 0;
d3b1c508
LP
1342}
1343
b6e2f329
LP
1344static int status_welcome(void) {
1345 _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
1346 int r;
1347
5ca02bfc 1348 if (!show_status_on(arg_show_status))
fd8c85c6
LP
1349 return 0;
1350
d58ad743
LP
1351 r = parse_os_release(NULL,
1352 "PRETTY_NAME", &pretty_name,
209c1470 1353 "ANSI_COLOR", &ansi_color);
d58ad743
LP
1354 if (r < 0)
1355 log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
1356 "Failed to read os-release file, ignoring: %m");
b6e2f329 1357
dc9b5816 1358 if (log_get_show_color())
a885727a 1359 return status_printf(NULL, 0,
dc9b5816
ZJS
1360 "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
1361 isempty(ansi_color) ? "1" : ansi_color,
1362 isempty(pretty_name) ? "Linux" : pretty_name);
1363 else
a885727a 1364 return status_printf(NULL, 0,
dc9b5816
ZJS
1365 "\nWelcome to %s!\n",
1366 isempty(pretty_name) ? "Linux" : pretty_name);
b6e2f329
LP
1367}
1368
fdd25311
LP
1369static int write_container_id(void) {
1370 const char *c;
7756528e 1371 int r = 0; /* avoid false maybe-uninitialized warning */
fdd25311
LP
1372
1373 c = getenv("container");
1374 if (isempty(c))
1375 return 0;
1376
8612da97
LP
1377 RUN_WITH_UMASK(0022)
1378 r = write_string_file("/run/systemd/container", c, WRITE_STRING_FILE_CREATE);
19854865 1379 if (r < 0)
f1f849b0 1380 return log_warning_errno(r, "Failed to write /run/systemd/container, ignoring: %m");
19854865
LP
1381
1382 return 1;
1383}
1384
1385static int bump_unix_max_dgram_qlen(void) {
1386 _cleanup_free_ char *qlen = NULL;
1387 unsigned long v;
1388 int r;
1389
3130fca5
LP
1390 /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel default of 16 is simply too low. We set the value
1391 * really really early during boot, so that it is actually applied to all our sockets, including the
1392 * $NOTIFY_SOCKET one. */
19854865
LP
1393
1394 r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen);
1395 if (r < 0)
875622c3 1396 return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
19854865
LP
1397
1398 r = safe_atolu(qlen, &v);
1399 if (r < 0)
3130fca5 1400 return log_warning_errno(r, "Failed to parse AF_UNIX datagram queue length '%s', ignoring: %m", qlen);
19854865
LP
1401
1402 if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN)
1403 return 0;
1404
57512c89 1405 r = write_string_filef("/proc/sys/net/unix/max_dgram_qlen", WRITE_STRING_FILE_DISABLE_BUFFER, "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN);
19854865
LP
1406 if (r < 0)
1407 return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
1408 "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
1409
1410 return 1;
fdd25311
LP
1411}
1412
32391275
FB
1413static int fixup_environment(void) {
1414 _cleanup_free_ char *term = NULL;
4dc63c4b 1415 const char *t;
32391275
FB
1416 int r;
1417
43db615b
LP
1418 /* Only fix up the environment when we are started as PID 1 */
1419 if (getpid_cached() != 1)
1420 return 0;
1421
1422 /* We expect the environment to be set correctly if run inside a container. */
84af7821
LP
1423 if (detect_container() > 0)
1424 return 0;
1425
43db615b
LP
1426 /* When started as PID1, the kernel uses /dev/console for our stdios and uses TERM=linux whatever the backend
1427 * device used by the console. We try to make a better guess here since some consoles might not have support
1428 * for color mode for example.
32391275 1429 *
43db615b 1430 * However if TERM was configured through the kernel command line then leave it alone. */
1d84ad94 1431 r = proc_cmdline_get_key("TERM", 0, &term);
32391275
FB
1432 if (r < 0)
1433 return r;
32391275 1434
4dc63c4b
LP
1435 t = term ?: default_term_for_tty("/dev/console");
1436
1437 if (setenv("TERM", t, 1) < 0)
32391275
FB
1438 return -errno;
1439
9d48671c 1440 /* The kernels sets HOME=/ for init. Let's undo this. */
44ee03d1
ZJS
1441 if (path_equal_ptr(getenv("HOME"), "/"))
1442 assert_se(unsetenv("HOME") == 0);
9d48671c 1443
32391275
FB
1444 return 0;
1445}
1446
6808a0bc
LP
1447static void redirect_telinit(int argc, char *argv[]) {
1448
1449 /* This is compatibility support for SysV, where calling init as a user is identical to telinit. */
1450
1451#if HAVE_SYSV_COMPAT
1452 if (getpid_cached() == 1)
1453 return;
1454
2306d177 1455 if (!invoked_as(argv, "init"))
6808a0bc
LP
1456 return;
1457
1458 execv(SYSTEMCTL_BINARY_PATH, argv);
1459 log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
a45d7127 1460 exit(EXIT_FAILURE);
6808a0bc
LP
1461#endif
1462}
1463
4a36297c
LP
1464static int become_shutdown(
1465 const char *shutdown_verb,
7eb35049 1466 int retval) {
4a36297c
LP
1467
1468 char log_level[DECIMAL_STR_MAX(int) + 1],
e73c54b8
JK
1469 exit_code[DECIMAL_STR_MAX(uint8_t) + 1],
1470 timeout[DECIMAL_STR_MAX(usec_t) + 1];
4a36297c 1471
e73c54b8 1472 const char* command_line[13] = {
4a36297c
LP
1473 SYSTEMD_SHUTDOWN_BINARY_PATH,
1474 shutdown_verb,
e73c54b8 1475 "--timeout", timeout,
4a36297c
LP
1476 "--log-level", log_level,
1477 "--log-target",
1478 };
1479
1480 _cleanup_strv_free_ char **env_block = NULL;
e73c54b8 1481 size_t pos = 7;
4a36297c 1482 int r;
acafd7d8 1483 usec_t watchdog_timer = 0;
4a36297c 1484
7eb35049 1485 assert(shutdown_verb);
234519ae 1486 assert(!command_line[pos]);
4a36297c
LP
1487 env_block = strv_copy(environ);
1488
1489 xsprintf(log_level, "%d", log_get_max_level());
e73c54b8 1490 xsprintf(timeout, "%" PRI_USEC "us", arg_default_timeout_stop_usec);
4a36297c
LP
1491
1492 switch (log_get_target()) {
1493
1494 case LOG_TARGET_KMSG:
1495 case LOG_TARGET_JOURNAL_OR_KMSG:
1496 case LOG_TARGET_SYSLOG_OR_KMSG:
1497 command_line[pos++] = "kmsg";
1498 break;
1499
1500 case LOG_TARGET_NULL:
1501 command_line[pos++] = "null";
1502 break;
1503
1504 case LOG_TARGET_CONSOLE:
1505 default:
1506 command_line[pos++] = "console";
1507 break;
1508 };
1509
1510 if (log_get_show_color())
1511 command_line[pos++] = "--log-color";
1512
1513 if (log_get_show_location())
1514 command_line[pos++] = "--log-location";
1515
c5673ed0
DS
1516 if (log_get_show_time())
1517 command_line[pos++] = "--log-time";
1518
4a36297c
LP
1519 if (streq(shutdown_verb, "exit")) {
1520 command_line[pos++] = "--exit-code";
1521 command_line[pos++] = exit_code;
1522 xsprintf(exit_code, "%d", retval);
1523 }
1524
1525 assert(pos < ELEMENTSOF(command_line));
1526
acafd7d8 1527 if (streq(shutdown_verb, "reboot"))
65224c1d 1528 watchdog_timer = arg_reboot_watchdog;
acafd7d8
LB
1529 else if (streq(shutdown_verb, "kexec"))
1530 watchdog_timer = arg_kexec_watchdog;
1531
1532 if (watchdog_timer > 0 && watchdog_timer != USEC_INFINITY) {
7eb35049 1533
4a36297c
LP
1534 char *e;
1535
acafd7d8 1536 /* If we reboot or kexec let's set the shutdown
4a36297c
LP
1537 * watchdog and tell the shutdown binary to
1538 * repeatedly ping it */
acafd7d8 1539 r = watchdog_set_timeout(&watchdog_timer);
4a36297c
LP
1540 watchdog_close(r < 0);
1541
1542 /* Tell the binary how often to ping, ignore failure */
acafd7d8 1543 if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, watchdog_timer) > 0)
8a2c1fbf
EJ
1544 (void) strv_consume(&env_block, e);
1545
1546 if (arg_watchdog_device &&
1547 asprintf(&e, "WATCHDOG_DEVICE=%s", arg_watchdog_device) > 0)
1548 (void) strv_consume(&env_block, e);
4a36297c
LP
1549 } else
1550 watchdog_close(true);
1551
1552 /* Avoid the creation of new processes forked by the
1553 * kernel; at this point, we will not listen to the
1554 * signals anyway */
1555 if (detect_container() <= 0)
1556 (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1557
1558 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1559 return -errno;
1560}
1561
e839bafd
LP
1562static void initialize_clock(void) {
1563 int r;
1564
3753325b
LP
1565 /* This is called very early on, before we parse the kernel command line or otherwise figure out why
1566 * we are running, but only once. */
1567
e839bafd
LP
1568 if (clock_is_localtime(NULL) > 0) {
1569 int min;
1570
1571 /*
1572 * The very first call of settimeofday() also does a time warp in the kernel.
1573 *
1574 * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to take care
1575 * of maintaining the RTC and do all adjustments. This matches the behavior of Windows, which leaves
1576 * the RTC alone if the registry tells that the RTC runs in UTC.
1577 */
1578 r = clock_set_timezone(&min);
1579 if (r < 0)
1580 log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
1581 else
1582 log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1583
d46b79bb 1584 } else if (!in_initrd())
e839bafd
LP
1585 /*
1586 * Do a dummy very first call to seal the kernel's time warp magic.
1587 *
1588 * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with LOCAL, but the
1589 * real system could be set up that way. In such case, we need to delay the time-warp or the sealing
1590 * until we reach the real system.
1591 *
1592 * Do no set the kernel's timezone. The concept of local time cannot be supported reliably, the time
1593 * will jump or be incorrect at every daylight saving time change. All kernel local time concepts will
1594 * be treated as UTC that way.
1595 */
1596 (void) clock_reset_timewarp();
e839bafd
LP
1597
1598 r = clock_apply_epoch();
1599 if (r < 0)
1600 log_error_errno(r, "Current system time is before build time, but cannot correct: %m");
1601 else if (r > 0)
1602 log_info("System time before build time, advancing clock.");
1603}
1604
3753325b
LP
1605static void apply_clock_update(void) {
1606 struct timespec ts;
1607
1608 /* This is called later than initialize_clock(), i.e. after we parsed configuration files/kernel
1609 * command line and such. */
1610
1611 if (arg_clock_usec == 0)
1612 return;
1613
45250e66
LP
1614 if (getpid_cached() != 1)
1615 return;
1616
3753325b
LP
1617 if (clock_settime(CLOCK_REALTIME, timespec_store(&ts, arg_clock_usec)) < 0)
1618 log_error_errno(errno, "Failed to set system clock to time specified on kernel command line: %m");
04f5c018 1619 else
3753325b 1620 log_info("Set system clock to %s, as specified on the kernel command line.",
04f5c018 1621 FORMAT_TIMESTAMP(arg_clock_usec));
3753325b
LP
1622}
1623
d247f232 1624static void cmdline_take_random_seed(void) {
d247f232
LP
1625 size_t suggested;
1626 int r;
1627
1628 if (arg_random_seed_size == 0)
1629 return;
1630
1631 if (getpid_cached() != 1)
1632 return;
1633
1634 assert(arg_random_seed);
1635 suggested = random_pool_size();
1636
1637 if (arg_random_seed_size < suggested)
1638 log_warning("Random seed specified on kernel command line has size %zu, but %zu bytes required to fill entropy pool.",
1639 arg_random_seed_size, suggested);
1640
61bd7d1e 1641 r = random_write_entropy(-1, arg_random_seed, arg_random_seed_size, true);
d247f232
LP
1642 if (r < 0) {
1643 log_warning_errno(r, "Failed to credit entropy specified on kernel command line, ignoring: %m");
1644 return;
1645 }
1646
1647 log_notice("Successfully credited entropy passed on kernel command line.\n"
1648 "Note that the seed provided this way is accessible to unprivileged programs. This functionality should not be used outside of testing environments.");
1649}
1650
1e41242e 1651static void initialize_coredump(bool skip_setup) {
752bcb77 1652#if ENABLE_COREDUMP
1e41242e
LP
1653 if (getpid_cached() != 1)
1654 return;
1655
1656 /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
1657 * will process core dumps for system services by default. */
1658 if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
1659 log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
1660
c6885f5f
FB
1661 /* But at the same time, turn off the core_pattern logic by default, so that no
1662 * coredumps are stored until the systemd-coredump tool is enabled via
1663 * sysctl. However it can be changed via the kernel command line later so core
1664 * dumps can still be generated during early startup and in initramfs. */
1e41242e 1665 if (!skip_setup)
e557b1a6 1666 disable_coredumps();
752bcb77 1667#endif
1e41242e
LP
1668}
1669
c6885f5f
FB
1670static void initialize_core_pattern(bool skip_setup) {
1671 int r;
1672
1673 if (skip_setup || !arg_early_core_pattern)
1674 return;
1675
1676 if (getpid_cached() != 1)
1677 return;
1678
57512c89 1679 r = write_string_file("/proc/sys/kernel/core_pattern", arg_early_core_pattern, WRITE_STRING_FILE_DISABLE_BUFFER);
c6885f5f
FB
1680 if (r < 0)
1681 log_warning_errno(r, "Failed to write '%s' to /proc/sys/kernel/core_pattern, ignoring: %m", arg_early_core_pattern);
1682}
1683
61fbbac1
ZJS
1684static void update_cpu_affinity(bool skip_setup) {
1685 _cleanup_free_ char *mask = NULL;
1686
1687 if (skip_setup || !arg_cpu_affinity.set)
1688 return;
1689
1690 assert(arg_cpu_affinity.allocated > 0);
1691
1692 mask = cpu_set_to_string(&arg_cpu_affinity);
1693 log_debug("Setting CPU affinity to %s.", strnull(mask));
1694
1695 if (sched_setaffinity(0, arg_cpu_affinity.allocated, arg_cpu_affinity.set) < 0)
1696 log_warning_errno(errno, "Failed to set CPU affinity: %m");
1697}
1698
b070c7c0
MS
1699static void update_numa_policy(bool skip_setup) {
1700 int r;
1701 _cleanup_free_ char *nodes = NULL;
1702 const char * policy = NULL;
1703
1704 if (skip_setup || !mpol_is_valid(numa_policy_get_type(&arg_numa_policy)))
1705 return;
1706
1707 if (DEBUG_LOGGING) {
1708 policy = mpol_to_string(numa_policy_get_type(&arg_numa_policy));
1709 nodes = cpu_set_to_range_string(&arg_numa_policy.nodes);
1710 log_debug("Setting NUMA policy to %s, with nodes %s.", strnull(policy), strnull(nodes));
1711 }
1712
1713 r = apply_numa_policy(&arg_numa_policy);
1714 if (r == -EOPNOTSUPP)
1715 log_debug_errno(r, "NUMA support not available, ignoring.");
1716 else if (r < 0)
1717 log_warning_errno(r, "Failed to set NUMA memory policy: %m");
1718}
1719
3c7878f9
LP
1720static void do_reexecute(
1721 int argc,
1722 char *argv[],
1723 const struct rlimit *saved_rlimit_nofile,
1724 const struct rlimit *saved_rlimit_memlock,
1725 FDSet *fds,
1726 const char *switch_root_dir,
1727 const char *switch_root_init,
1728 const char **ret_error_message) {
1729
1730 unsigned i, j, args_size;
1731 const char **args;
1732 int r;
1733
1734 assert(saved_rlimit_nofile);
1735 assert(saved_rlimit_memlock);
1736 assert(ret_error_message);
1737
1738 /* Close and disarm the watchdog, so that the new instance can reinitialize it, but doesn't get rebooted while
1739 * we do that */
1740 watchdog_close(true);
1741
ddfa8b0b
LP
1742 /* Reset RLIMIT_NOFILE + RLIMIT_MEMLOCK back to the kernel defaults, so that the new systemd can pass
1743 * the kernel default to its child processes */
1744 if (saved_rlimit_nofile->rlim_cur != 0)
3c7878f9 1745 (void) setrlimit(RLIMIT_NOFILE, saved_rlimit_nofile);
ddfa8b0b 1746 if (saved_rlimit_memlock->rlim_cur != RLIM_INFINITY)
3c7878f9
LP
1747 (void) setrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock);
1748
1749 if (switch_root_dir) {
1750 /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
1751 * SIGCHLD for them after deserializing. */
e73c54b8 1752 broadcast_signal(SIGTERM, false, true, arg_default_timeout_stop_usec);
3c7878f9
LP
1753
1754 /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
1755 r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE);
1756 if (r < 0)
1757 log_error_errno(r, "Failed to switch root, trying to continue: %m");
1758 }
1759
1760 args_size = MAX(6, argc+1);
1761 args = newa(const char*, args_size);
1762
1763 if (!switch_root_init) {
1764 char sfd[DECIMAL_STR_MAX(int) + 1];
1765
1766 /* First try to spawn ourselves with the right path, and with full serialization. We do this only if
1767 * the user didn't specify an explicit init to spawn. */
1768
1769 assert(arg_serialization);
1770 assert(fds);
1771
1772 xsprintf(sfd, "%i", fileno(arg_serialization));
1773
1774 i = 0;
1775 args[i++] = SYSTEMD_BINARY_PATH;
1776 if (switch_root_dir)
1777 args[i++] = "--switched-root";
1778 args[i++] = arg_system ? "--system" : "--user";
1779 args[i++] = "--deserialize";
1780 args[i++] = sfd;
1781 args[i++] = NULL;
1782
1783 assert(i <= args_size);
1784
1785 /*
1786 * We want valgrind to print its memory usage summary before reexecution. Valgrind won't do this is on
1787 * its own on exec(), but it will do it on exit(). Hence, to ensure we get a summary here, fork() off
1788 * a child, let it exit() cleanly, so that it prints the summary, and wait() for it in the parent,
1789 * before proceeding into the exec().
1790 */
1791 valgrind_summary_hack();
1792
1793 (void) execv(args[0], (char* const*) args);
1794 log_debug_errno(errno, "Failed to execute our own binary, trying fallback: %m");
1795 }
1796
1797 /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and envp[]. (Well,
1798 * modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[], but let's hope that
1799 * doesn't matter.) */
1800
1801 arg_serialization = safe_fclose(arg_serialization);
1802 fds = fdset_free(fds);
1803
1804 /* Reopen the console */
1805 (void) make_console_stdio();
1806
1807 for (j = 1, i = 1; j < (unsigned) argc; j++)
1808 args[i++] = argv[j];
1809 args[i++] = NULL;
1810 assert(i <= args_size);
1811
5238e957 1812 /* Re-enable any blocked signals, especially important if we switch from initial ramdisk to init=... */
3c7878f9
LP
1813 (void) reset_all_signal_handlers();
1814 (void) reset_signal_mask();
595225af 1815 (void) rlimit_nofile_safe();
3c7878f9
LP
1816
1817 if (switch_root_init) {
1818 args[0] = switch_root_init;
a5cede8c 1819 (void) execve(args[0], (char* const*) args, saved_env);
3c7878f9
LP
1820 log_warning_errno(errno, "Failed to execute configured init, trying fallback: %m");
1821 }
1822
1823 args[0] = "/sbin/init";
1824 (void) execv(args[0], (char* const*) args);
1825 r = -errno;
1826
1827 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
1828 ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
1829 "Failed to execute /sbin/init");
1830
1831 if (r == -ENOENT) {
1832 log_warning("No /sbin/init, trying fallback");
1833
1834 args[0] = "/bin/sh";
1835 args[1] = NULL;
a5cede8c 1836 (void) execve(args[0], (char* const*) args, saved_env);
3c7878f9
LP
1837 log_error_errno(errno, "Failed to execute /bin/sh, giving up: %m");
1838 } else
1839 log_warning_errno(r, "Failed to execute /sbin/init, giving up: %m");
1840
1841 *ret_error_message = "Failed to execute fallback shell";
1842}
1843
7eb35049
LP
1844static int invoke_main_loop(
1845 Manager *m,
a9fd4cd1
FB
1846 const struct rlimit *saved_rlimit_nofile,
1847 const struct rlimit *saved_rlimit_memlock,
7eb35049
LP
1848 bool *ret_reexecute,
1849 int *ret_retval, /* Return parameters relevant for shutting down */
1850 const char **ret_shutdown_verb, /* … */
1851 FDSet **ret_fds, /* Return parameters for reexecuting */
1852 char **ret_switch_root_dir, /* … */
1853 char **ret_switch_root_init, /* … */
1854 const char **ret_error_message) {
1855
1856 int r;
1857
1858 assert(m);
a9fd4cd1
FB
1859 assert(saved_rlimit_nofile);
1860 assert(saved_rlimit_memlock);
7eb35049
LP
1861 assert(ret_reexecute);
1862 assert(ret_retval);
1863 assert(ret_shutdown_verb);
1864 assert(ret_fds);
1865 assert(ret_switch_root_dir);
1866 assert(ret_switch_root_init);
1867 assert(ret_error_message);
1868
1869 for (;;) {
1870 r = manager_loop(m);
1871 if (r < 0) {
1872 *ret_error_message = "Failed to run main loop";
1873 return log_emergency_errno(r, "Failed to run main loop: %m");
1874 }
1875
3ca4d0b3 1876 switch ((ManagerObjective) r) {
7eb35049 1877
a6ecbf83 1878 case MANAGER_RELOAD: {
bda7d78b 1879 LogTarget saved_log_target;
a6ecbf83
FB
1880 int saved_log_level;
1881
7eb35049
LP
1882 log_info("Reloading.");
1883
3fe91079 1884 /* First, save any overridden log level/target, then parse the configuration file, which might
bda7d78b
FB
1885 * change the log level to new settings. */
1886
a6ecbf83 1887 saved_log_level = m->log_level_overridden ? log_get_max_level() : -1;
bda7d78b 1888 saved_log_target = m->log_target_overridden ? log_get_target() : _LOG_TARGET_INVALID;
a6ecbf83 1889
a9fd4cd1 1890 (void) parse_configuration(saved_rlimit_nofile, saved_rlimit_memlock);
7eb35049
LP
1891
1892 set_manager_defaults(m);
986935cf 1893 set_manager_settings(m);
7eb35049 1894
61fbbac1 1895 update_cpu_affinity(false);
b070c7c0 1896 update_numa_policy(false);
61fbbac1 1897
a6ecbf83
FB
1898 if (saved_log_level >= 0)
1899 manager_override_log_level(m, saved_log_level);
bda7d78b
FB
1900 if (saved_log_target >= 0)
1901 manager_override_log_target(m, saved_log_target);
a6ecbf83 1902
7eb35049
LP
1903 r = manager_reload(m);
1904 if (r < 0)
7a35fa24
LP
1905 /* Reloading failed before the point of no return. Let's continue running as if nothing happened. */
1906 m->objective = MANAGER_OK;
7eb35049
LP
1907
1908 break;
a6ecbf83 1909 }
7eb35049
LP
1910
1911 case MANAGER_REEXECUTE:
1912
1913 r = prepare_reexecute(m, &arg_serialization, ret_fds, false);
1914 if (r < 0) {
1915 *ret_error_message = "Failed to prepare for reexecution";
1916 return r;
1917 }
1918
1919 log_notice("Reexecuting.");
1920
1921 *ret_reexecute = true;
1922 *ret_retval = EXIT_SUCCESS;
1923 *ret_shutdown_verb = NULL;
1924 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1925
1926 return 0;
1927
1928 case MANAGER_SWITCH_ROOT:
1929 if (!m->switch_root_init) {
1930 r = prepare_reexecute(m, &arg_serialization, ret_fds, true);
1931 if (r < 0) {
1932 *ret_error_message = "Failed to prepare for reexecution";
1933 return r;
1934 }
1935 } else
1936 *ret_fds = NULL;
1937
1938 log_notice("Switching root.");
1939
1940 *ret_reexecute = true;
1941 *ret_retval = EXIT_SUCCESS;
1942 *ret_shutdown_verb = NULL;
1943
1944 /* Steal the switch root parameters */
49052946
YW
1945 *ret_switch_root_dir = TAKE_PTR(m->switch_root);
1946 *ret_switch_root_init = TAKE_PTR(m->switch_root_init);
7eb35049
LP
1947
1948 return 0;
1949
1950 case MANAGER_EXIT:
1951
1952 if (MANAGER_IS_USER(m)) {
1953 log_debug("Exit.");
1954
1955 *ret_reexecute = false;
1956 *ret_retval = m->return_value;
1957 *ret_shutdown_verb = NULL;
1958 *ret_fds = NULL;
1959 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1960
1961 return 0;
1962 }
1963
1964 _fallthrough_;
1965 case MANAGER_REBOOT:
1966 case MANAGER_POWEROFF:
1967 case MANAGER_HALT:
1968 case MANAGER_KEXEC: {
af41e508
LP
1969 static const char * const table[_MANAGER_OBJECTIVE_MAX] = {
1970 [MANAGER_EXIT] = "exit",
1971 [MANAGER_REBOOT] = "reboot",
7eb35049 1972 [MANAGER_POWEROFF] = "poweroff",
af41e508
LP
1973 [MANAGER_HALT] = "halt",
1974 [MANAGER_KEXEC] = "kexec",
7eb35049
LP
1975 };
1976
1977 log_notice("Shutting down.");
1978
1979 *ret_reexecute = false;
1980 *ret_retval = m->return_value;
af41e508 1981 assert_se(*ret_shutdown_verb = table[m->objective]);
7eb35049
LP
1982 *ret_fds = NULL;
1983 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1984
1985 return 0;
1986 }
1987
1988 default:
af41e508 1989 assert_not_reached("Unknown or unexpected manager objective.");
7eb35049
LP
1990 }
1991 }
1992}
1993
31aef7ff
LP
1994static void log_execution_mode(bool *ret_first_boot) {
1995 assert(ret_first_boot);
1996
1997 if (arg_system) {
1998 int v;
1999
e7b18106 2000 log_info("systemd " GIT_VERSION " running in %ssystem mode (%s)",
91b79ba8
ZJS
2001 arg_action == ACTION_TEST ? "test " : "",
2002 systemd_features);
31aef7ff
LP
2003
2004 v = detect_virtualization();
2005 if (v > 0)
2006 log_info("Detected virtualization %s.", virtualization_to_string(v));
2007
2008 log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
2009
2010 if (in_initrd()) {
2011 *ret_first_boot = false;
2012 log_info("Running in initial RAM disk.");
2013 } else {
583cef3b
HS
2014 int r;
2015 _cleanup_free_ char *id_text = NULL;
2016
2017 /* Let's check whether we are in first boot. We use /etc/machine-id as flag file
2018 * for this: If it is missing or contains the value "uninitialized", this is the
2019 * first boot. In any other case, it is not. This allows container managers and
2020 * installers to provision a couple of files already. If the container manager
2021 * wants to provision the machine ID itself it should pass $container_uuid to PID 1. */
2022
2023 r = read_one_line_file("/etc/machine-id", &id_text);
2024 if (r < 0 || streq(id_text, "uninitialized")) {
2025 if (r < 0 && r != -ENOENT)
2026 log_warning_errno(r, "Unexpected error while reading /etc/machine-id, ignoring: %m");
2027
2028 *ret_first_boot = true;
2029 log_info("Detected first boot.");
2030 } else {
2031 *ret_first_boot = false;
2032 log_debug("Detected initialized system, this is not the first boot.");
2033 }
31aef7ff
LP
2034 }
2035 } else {
b9e90f3a 2036 if (DEBUG_LOGGING) {
c2b2df60 2037 _cleanup_free_ char *t = NULL;
31aef7ff 2038
b9e90f3a 2039 t = uid_to_name(getuid());
91b79ba8
ZJS
2040 log_debug("systemd " GIT_VERSION " running in %suser mode for user " UID_FMT "/%s. (%s)",
2041 arg_action == ACTION_TEST ? " test" : "",
2042 getuid(), strna(t), systemd_features);
b9e90f3a 2043 }
31aef7ff
LP
2044
2045 *ret_first_boot = false;
2046 }
2047}
2048
5afbaa36
LP
2049static int initialize_runtime(
2050 bool skip_setup,
3023f2fe 2051 bool first_boot,
5afbaa36
LP
2052 struct rlimit *saved_rlimit_nofile,
2053 struct rlimit *saved_rlimit_memlock,
2054 const char **ret_error_message) {
5afbaa36
LP
2055 int r;
2056
2057 assert(ret_error_message);
2058
2059 /* Sets up various runtime parameters. Many of these initializations are conditionalized:
2060 *
2061 * - Some only apply to --system instances
2062 * - Some only apply to --user instances
2063 * - Some only apply when we first start up, but not when we reexecute
2064 */
2065
2d776038
LP
2066 if (arg_action != ACTION_RUN)
2067 return 0;
2068
61fbbac1 2069 update_cpu_affinity(skip_setup);
b070c7c0 2070 update_numa_policy(skip_setup);
61fbbac1 2071
3c3c6cb9 2072 if (arg_system) {
5238e957 2073 /* Make sure we leave a core dump without panicking the kernel. */
3c3c6cb9 2074 install_crash_handler();
5afbaa36 2075
3c3c6cb9 2076 if (!skip_setup) {
143fadf3 2077 r = mount_cgroup_controllers();
3c3c6cb9
LP
2078 if (r < 0) {
2079 *ret_error_message = "Failed to mount cgroup hierarchies";
2080 return r;
2081 }
2082
2083 status_welcome();
b6fad306 2084 (void) hostname_setup(true);
3023f2fe
HS
2085 /* Force transient machine-id on first boot. */
2086 machine_id_setup(NULL, first_boot, arg_machine_id, NULL);
df883de9 2087 (void) loopback_setup();
3c3c6cb9 2088 bump_unix_max_dgram_qlen();
a8b627aa 2089 bump_file_max_and_nr_open();
3c3c6cb9
LP
2090 test_usr();
2091 write_container_id();
2092 }
8a2c1fbf 2093
3c3c6cb9
LP
2094 if (arg_watchdog_device) {
2095 r = watchdog_set_device(arg_watchdog_device);
2096 if (r < 0)
2097 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m", arg_watchdog_device);
2098 }
32429805
LP
2099 } else {
2100 _cleanup_free_ char *p = NULL;
2101
2102 /* Create the runtime directory and place the inaccessible device nodes there, if we run in
2103 * user mode. In system mode mount_setup() already did that. */
2104
2105 r = xdg_user_runtime_dir(&p, "/systemd");
2106 if (r < 0) {
2107 *ret_error_message = "$XDG_RUNTIME_DIR is not set";
2108 return log_emergency_errno(r, "Failed to determine $XDG_RUNTIME_DIR path: %m");
2109 }
2110
e813a74a 2111 (void) mkdir_p_label(p, 0755);
32429805 2112 (void) make_inaccessible_nodes(p, UID_INVALID, GID_INVALID);
3c3c6cb9 2113 }
5afbaa36
LP
2114
2115 if (arg_timer_slack_nsec != NSEC_INFINITY)
2116 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
3a671cd1 2117 log_warning_errno(errno, "Failed to adjust timer slack, ignoring: %m");
5afbaa36
LP
2118
2119 if (arg_system && !cap_test_all(arg_capability_bounding_set)) {
2120 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set);
2121 if (r < 0) {
2122 *ret_error_message = "Failed to drop capability bounding set of usermode helpers";
2123 return log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
2124 }
2125
2126 r = capability_bounding_set_drop(arg_capability_bounding_set, true);
2127 if (r < 0) {
2128 *ret_error_message = "Failed to drop capability bounding set";
2129 return log_emergency_errno(r, "Failed to drop capability bounding set: %m");
2130 }
2131 }
2132
39362f6f
JB
2133 if (arg_system && arg_no_new_privs) {
2134 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
2135 *ret_error_message = "Failed to disable new privileges";
2136 return log_emergency_errno(errno, "Failed to disable new privileges: %m");
2137 }
2138 }
2139
5afbaa36
LP
2140 if (arg_syscall_archs) {
2141 r = enforce_syscall_archs(arg_syscall_archs);
2142 if (r < 0) {
2143 *ret_error_message = "Failed to set syscall architectures";
2144 return r;
2145 }
2146 }
2147
2148 if (!arg_system)
2149 /* Become reaper of our children */
2150 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
2151 log_warning_errno(errno, "Failed to make us a subreaper: %m");
2152
a17c1712
LP
2153 /* Bump up RLIMIT_NOFILE for systemd itself */
2154 (void) bump_rlimit_nofile(saved_rlimit_nofile);
2155 (void) bump_rlimit_memlock(saved_rlimit_memlock);
5afbaa36
LP
2156
2157 return 0;
2158}
2159
6acca5fc
LP
2160static int do_queue_default_job(
2161 Manager *m,
2162 const char **ret_error_message) {
2163
2164 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
f1d075dc
ZJS
2165 const char *unit;
2166 Job *job;
2167 Unit *target;
6acca5fc
LP
2168 int r;
2169
8755dbad 2170 if (arg_default_unit)
f1d075dc 2171 unit = arg_default_unit;
8755dbad 2172 else if (in_initrd())
f1d075dc 2173 unit = SPECIAL_INITRD_TARGET;
8755dbad 2174 else
f1d075dc 2175 unit = SPECIAL_DEFAULT_TARGET;
8755dbad 2176
f1d075dc 2177 log_debug("Activating default unit: %s", unit);
8755dbad 2178
f1d075dc 2179 r = manager_load_startable_unit_or_warn(m, unit, NULL, &target);
8755dbad
ZJS
2180 if (r < 0 && in_initrd() && !arg_default_unit) {
2181 /* Fall back to default.target, which we used to always use by default. Only do this if no
2182 * explicit configuration was given. */
2183
2184 log_info("Falling back to " SPECIAL_DEFAULT_TARGET ".");
6acca5fc 2185
8755dbad
ZJS
2186 r = manager_load_startable_unit_or_warn(m, SPECIAL_DEFAULT_TARGET, NULL, &target);
2187 }
4109ede7 2188 if (r < 0) {
8755dbad 2189 log_info("Falling back to " SPECIAL_RESCUE_TARGET ".");
6acca5fc 2190
4109ede7 2191 r = manager_load_startable_unit_or_warn(m, SPECIAL_RESCUE_TARGET, NULL, &target);
6acca5fc 2192 if (r < 0) {
8755dbad
ZJS
2193 *ret_error_message = r == -ERFKILL ? SPECIAL_RESCUE_TARGET " masked"
2194 : "Failed to load " SPECIAL_RESCUE_TARGET;
4109ede7 2195 return r;
6acca5fc
LP
2196 }
2197 }
2198
2199 assert(target->load_state == UNIT_LOADED);
2200
f1d075dc 2201 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, NULL, &error, &job);
6acca5fc
LP
2202 if (r == -EPERM) {
2203 log_debug_errno(r, "Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
2204
2205 sd_bus_error_free(&error);
2206
f1d075dc 2207 r = manager_add_job(m, JOB_START, target, JOB_REPLACE, NULL, &error, &job);
6acca5fc
LP
2208 if (r < 0) {
2209 *ret_error_message = "Failed to start default target";
2210 return log_emergency_errno(r, "Failed to start default target: %s", bus_error_message(&error, r));
2211 }
2212
2213 } else if (r < 0) {
2214 *ret_error_message = "Failed to isolate default target";
2215 return log_emergency_errno(r, "Failed to isolate default target: %s", bus_error_message(&error, r));
c86c31d9
ZJS
2216 } else
2217 log_info("Queued %s job for default target %s.",
2218 job_type_to_string(job->type),
04d232d8 2219 unit_status_string(job->unit, NULL));
6acca5fc 2220
f1d075dc 2221 m->default_unit_job_id = job->id;
6acca5fc
LP
2222
2223 return 0;
2224}
2225
a9fd4cd1
FB
2226static void save_rlimits(struct rlimit *saved_rlimit_nofile,
2227 struct rlimit *saved_rlimit_memlock) {
2228
2229 assert(saved_rlimit_nofile);
2230 assert(saved_rlimit_memlock);
2231
2232 if (getrlimit(RLIMIT_NOFILE, saved_rlimit_nofile) < 0)
2233 log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
2234
2235 if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock) < 0)
2236 log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
2237}
2238
2239static void fallback_rlimit_nofile(const struct rlimit *saved_rlimit_nofile) {
2240 struct rlimit *rl;
2241
2242 if (arg_default_rlimit[RLIMIT_NOFILE])
2243 return;
2244
2245 /* Make sure forked processes get limits based on the original kernel setting */
2246
2247 rl = newdup(struct rlimit, saved_rlimit_nofile, 1);
2248 if (!rl) {
2249 log_oom();
2250 return;
2251 }
2252
2253 /* Bump the hard limit for system services to a substantially higher value. The default
2254 * hard limit current kernels set is pretty low (4K), mostly for historical
2255 * reasons. According to kernel developers, the fd handling in recent kernels has been
2256 * optimized substantially enough, so that we can bump the limit now, without paying too
2257 * high a price in memory or performance. Note however that we only bump the hard limit,
2258 * not the soft limit. That's because select() works the way it works, and chokes on fds
2259 * >= 1024. If we'd bump the soft limit globally, it might accidentally happen to
2260 * unexpecting programs that they get fds higher than what they can process using
2261 * select(). By only bumping the hard limit but leaving the low limit as it is we avoid
2262 * this pitfall: programs that are written by folks aware of the select() problem in mind
2263 * (and thus use poll()/epoll instead of select(), the way everybody should) can
2264 * explicitly opt into high fds by bumping their soft limit beyond 1024, to the hard limit
2265 * we pass. */
2266 if (arg_system) {
2267 int nr;
2268
2269 /* Get the underlying absolute limit the kernel enforces */
2270 nr = read_nr_open();
2271
2272 rl->rlim_max = MIN((rlim_t) nr, MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE));
2273 }
2274
2275 /* If for some reason we were invoked with a soft limit above 1024 (which should never
2276 * happen!, but who knows what we get passed in from pam_limit when invoked as --user
2277 * instance), then lower what we pass on to not confuse our children */
2278 rl->rlim_cur = MIN(rl->rlim_cur, (rlim_t) FD_SETSIZE);
2279
2280 arg_default_rlimit[RLIMIT_NOFILE] = rl;
2281}
2282
2283static void fallback_rlimit_memlock(const struct rlimit *saved_rlimit_memlock) {
2284 struct rlimit *rl;
2285
2286 /* Pass the original value down to invoked processes */
2287
2288 if (arg_default_rlimit[RLIMIT_MEMLOCK])
2289 return;
2290
2291 rl = newdup(struct rlimit, saved_rlimit_memlock, 1);
2292 if (!rl) {
2293 log_oom();
2294 return;
2295 }
2296
2297 arg_default_rlimit[RLIMIT_MEMLOCK] = rl;
2298}
2299
d55ed7de
ZJS
2300static void setenv_manager_environment(void) {
2301 char **p;
2302 int r;
2303
2304 STRV_FOREACH(p, arg_manager_environment) {
2305 log_debug("Setting '%s' in our own environment.", *p);
2306
2307 r = putenv_dup(*p, true);
2308 if (r < 0)
2309 log_warning_errno(errno, "Failed to setenv \"%s\", ignoring: %m", *p);
2310 }
2311}
2312
fb39af4c
ZJS
2313static void reset_arguments(void) {
2314 /* Frees/resets arg_* variables, with a few exceptions commented below. */
970777b5
LP
2315
2316 arg_default_unit = mfree(arg_default_unit);
fb39af4c
ZJS
2317
2318 /* arg_system — ignore */
2319
2320 arg_dump_core = true;
2321 arg_crash_chvt = -1;
2322 arg_crash_shell = false;
2323 arg_crash_reboot = false;
970777b5 2324 arg_confirm_spawn = mfree(arg_confirm_spawn);
fb39af4c 2325 arg_show_status = _SHOW_STATUS_INVALID;
36cf4507 2326 arg_status_unit_format = STATUS_UNIT_FORMAT_DEFAULT;
fb39af4c
ZJS
2327 arg_switched_root = false;
2328 arg_pager_flags = 0;
2329 arg_service_watchdogs = true;
2330 arg_default_std_output = EXEC_OUTPUT_JOURNAL;
2331 arg_default_std_error = EXEC_OUTPUT_INHERIT;
2332 arg_default_restart_usec = DEFAULT_RESTART_USEC;
2333 arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
2334 arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
2335 arg_default_timeout_abort_usec = DEFAULT_TIMEOUT_USEC;
2336 arg_default_timeout_abort_set = false;
2337 arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
2338 arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
2339 arg_runtime_watchdog = 0;
65224c1d 2340 arg_reboot_watchdog = 10 * USEC_PER_MINUTE;
acafd7d8 2341 arg_kexec_watchdog = 0;
fb39af4c
ZJS
2342 arg_early_core_pattern = NULL;
2343 arg_watchdog_device = NULL;
2344
970777b5 2345 arg_default_environment = strv_free(arg_default_environment);
d55ed7de 2346 arg_manager_environment = strv_free(arg_manager_environment);
fb39af4c
ZJS
2347 rlimit_free_all(arg_default_rlimit);
2348
2349 arg_capability_bounding_set = CAP_ALL;
2350 arg_no_new_privs = false;
2351 arg_timer_slack_nsec = NSEC_INFINITY;
2352 arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
2353
970777b5 2354 arg_syscall_archs = set_free(arg_syscall_archs);
61fbbac1 2355
fb39af4c
ZJS
2356 /* arg_serialization — ignore */
2357
2358 arg_default_cpu_accounting = -1;
2359 arg_default_io_accounting = false;
2360 arg_default_ip_accounting = false;
2361 arg_default_blockio_accounting = false;
2362 arg_default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
2363 arg_default_tasks_accounting = true;
3a0f06c4 2364 arg_default_tasks_max = DEFAULT_TASKS_MAX;
fb39af4c
ZJS
2365 arg_machine_id = (sd_id128_t) {};
2366 arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
2367 arg_default_oom_policy = OOM_STOP;
2368
61fbbac1 2369 cpu_set_reset(&arg_cpu_affinity);
b070c7c0 2370 numa_policy_reset(&arg_numa_policy);
d247f232
LP
2371
2372 arg_random_seed = mfree(arg_random_seed);
2373 arg_random_seed_size = 0;
33d943d1 2374 arg_clock_usec = 0;
970777b5
LP
2375}
2376
a9fd4cd1
FB
2377static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
2378 const struct rlimit *saved_rlimit_memlock) {
97d1fb94
LP
2379 int r;
2380
a9fd4cd1
FB
2381 assert(saved_rlimit_nofile);
2382 assert(saved_rlimit_memlock);
2383
fb39af4c
ZJS
2384 /* Assign configuration defaults */
2385 reset_arguments();
2386
97d1fb94 2387 r = parse_config_file();
470a5e6d
ZJS
2388 if (r < 0)
2389 log_warning_errno(r, "Failed to parse config file, ignoring: %m");
97d1fb94
LP
2390
2391 if (arg_system) {
2392 r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
2393 if (r < 0)
2394 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
2395 }
2396
a9fd4cd1
FB
2397 /* Initialize some default rlimits for services if they haven't been configured */
2398 fallback_rlimit_nofile(saved_rlimit_nofile);
2399 fallback_rlimit_memlock(saved_rlimit_memlock);
2400
97d1fb94
LP
2401 /* Note that this also parses bits from the kernel command line, including "debug". */
2402 log_parse_environment();
2403
db33214b 2404 /* Initialize the show status setting if it hasn't been set explicitly yet */
7a293242 2405 if (arg_show_status == _SHOW_STATUS_INVALID)
db33214b
LP
2406 arg_show_status = SHOW_STATUS_YES;
2407
d55ed7de
ZJS
2408 /* Push variables into the manager environment block */
2409 setenv_manager_environment();
2410
97d1fb94
LP
2411 return 0;
2412}
2413
b0d7c989
LP
2414static int safety_checks(void) {
2415
febf46a4 2416 if (getpid_cached() == 1 &&
baaa35ad
ZJS
2417 arg_action != ACTION_RUN)
2418 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2419 "Unsupported execution mode while PID 1.");
febf46a4
LP
2420
2421 if (getpid_cached() == 1 &&
baaa35ad
ZJS
2422 !arg_system)
2423 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2424 "Can't run --user mode as PID 1.");
febf46a4
LP
2425
2426 if (arg_action == ACTION_RUN &&
2427 arg_system &&
baaa35ad
ZJS
2428 getpid_cached() != 1)
2429 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2430 "Can't run system mode unless PID 1.");
febf46a4 2431
b0d7c989 2432 if (arg_action == ACTION_TEST &&
baaa35ad
ZJS
2433 geteuid() == 0)
2434 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2435 "Don't run test mode as root.");
b0d7c989
LP
2436
2437 if (!arg_system &&
2438 arg_action == ACTION_RUN &&
baaa35ad
ZJS
2439 sd_booted() <= 0)
2440 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
2441 "Trying to run as user instance, but the system has not been booted with systemd.");
b0d7c989
LP
2442
2443 if (!arg_system &&
2444 arg_action == ACTION_RUN &&
baaa35ad
ZJS
2445 !getenv("XDG_RUNTIME_DIR"))
2446 return log_error_errno(SYNTHETIC_ERRNO(EUNATCH),
2447 "Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
b0d7c989
LP
2448
2449 if (arg_system &&
2450 arg_action == ACTION_RUN &&
baaa35ad
ZJS
2451 running_in_chroot() > 0)
2452 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
2453 "Cannot be run in a chroot() environment.");
b0d7c989
LP
2454
2455 return 0;
2456}
2457
74da609f
LP
2458static int initialize_security(
2459 bool *loaded_policy,
2460 dual_timestamp *security_start_timestamp,
2461 dual_timestamp *security_finish_timestamp,
2462 const char **ret_error_message) {
2463
2464 int r;
2465
2466 assert(loaded_policy);
2467 assert(security_start_timestamp);
2468 assert(security_finish_timestamp);
2469 assert(ret_error_message);
2470
2471 dual_timestamp_get(security_start_timestamp);
2472
97149f40 2473 r = mac_selinux_setup(loaded_policy);
74da609f
LP
2474 if (r < 0) {
2475 *ret_error_message = "Failed to load SELinux policy";
2476 return r;
2477 }
2478
2479 r = mac_smack_setup(loaded_policy);
2480 if (r < 0) {
2481 *ret_error_message = "Failed to load SMACK policy";
2482 return r;
2483 }
2484
2ffadd3c
Y
2485 r = mac_apparmor_setup();
2486 if (r < 0) {
2487 *ret_error_message = "Failed to load AppArmor policy";
2488 return r;
2489 }
2490
74da609f
LP
2491 r = ima_setup();
2492 if (r < 0) {
2493 *ret_error_message = "Failed to load IMA policy";
2494 return r;
2495 }
2496
2497 dual_timestamp_get(security_finish_timestamp);
2498 return 0;
2499}
2500
efeb853f
LP
2501static int collect_fds(FDSet **ret_fds, const char **ret_error_message) {
2502 int r;
2503
2504 assert(ret_fds);
2505 assert(ret_error_message);
2506
2507 r = fdset_new_fill(ret_fds);
2508 if (r < 0) {
2509 *ret_error_message = "Failed to allocate fd set";
2510 return log_emergency_errno(r, "Failed to allocate fd set: %m");
2511 }
2512
2513 fdset_cloexec(*ret_fds, true);
2514
2515 if (arg_serialization)
2516 assert_se(fdset_remove(*ret_fds, fileno(arg_serialization)) >= 0);
2517
2518 return 0;
2519}
2520
2e51b31c
LP
2521static void setup_console_terminal(bool skip_setup) {
2522
2523 if (!arg_system)
2524 return;
2525
2526 /* Become a session leader if we aren't one yet. */
2527 (void) setsid();
2528
2529 /* If we are init, we connect stdin/stdout/stderr to /dev/null and make sure we don't have a controlling
2530 * tty. */
2531 (void) release_terminal();
2532
2533 /* Reset the console, but only if this is really init and we are freshly booted */
2534 if (getpid_cached() == 1 && !skip_setup)
2535 (void) console_setup();
2536}
2537
aa40ff07
LP
2538static bool early_skip_setup_check(int argc, char *argv[]) {
2539 bool found_deserialize = false;
aa40ff07
LP
2540
2541 /* Determine if this is a reexecution or normal bootup. We do the full command line parsing much later, so
2542 * let's just have a quick peek here. Note that if we have switched root, do all the special setup things
2543 * anyway, even if in that case we also do deserialization. */
2544
431733b8 2545 for (int i = 1; i < argc; i++)
aa40ff07
LP
2546 if (streq(argv[i], "--switched-root"))
2547 return false; /* If we switched root, don't skip the setup. */
2548 else if (streq(argv[i], "--deserialize"))
2549 found_deserialize = true;
aa40ff07
LP
2550
2551 return found_deserialize; /* When we are deserializing, then we are reexecuting, hence avoid the extensive setup */
2552}
2553
0e06a031
LP
2554static int save_env(void) {
2555 char **l;
2556
2557 l = strv_copy(environ);
2558 if (!l)
2559 return -ENOMEM;
2560
2561 strv_free_and_replace(saved_env, l);
2562 return 0;
2563}
2564
60918275 2565int main(int argc, char *argv[]) {
625e8690
LP
2566
2567 dual_timestamp initrd_timestamp = DUAL_TIMESTAMP_NULL, userspace_timestamp = DUAL_TIMESTAMP_NULL, kernel_timestamp = DUAL_TIMESTAMP_NULL,
2568 security_start_timestamp = DUAL_TIMESTAMP_NULL, security_finish_timestamp = DUAL_TIMESTAMP_NULL;
ddfa8b0b
LP
2569 struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0),
2570 saved_rlimit_memlock = RLIMIT_MAKE_CONST(RLIM_INFINITY); /* The original rlimits we passed
2571 * in. Note we use different values
2572 * for the two that indicate whether
2573 * these fields are initialized! */
625e8690
LP
2574 bool skip_setup, loaded_policy = false, queue_default_job = false, first_boot = false, reexecute = false;
2575 char *switch_root_dir = NULL, *switch_root_init = NULL;
9d76d730 2576 usec_t before_startup, after_startup;
625e8690 2577 static char systemd[] = "systemd";
625e8690
LP
2578 const char *shutdown_verb = NULL, *error_message = NULL;
2579 int r, retval = EXIT_FAILURE;
2580 Manager *m = NULL;
a16e1123 2581 FDSet *fds = NULL;
27b14a22 2582
d72a8f10 2583 /* SysV compatibility: redirect init → telinit */
6808a0bc 2584 redirect_telinit(argc, argv);
2cb1a60d 2585
d72a8f10 2586 /* Take timestamps early on */
c3a170f3
HH
2587 dual_timestamp_from_monotonic(&kernel_timestamp, 0);
2588 dual_timestamp_get(&userspace_timestamp);
2589
d72a8f10
LP
2590 /* Figure out whether we need to do initialize the system, or if we already did that because we are
2591 * reexecuting */
aa40ff07 2592 skip_setup = early_skip_setup_check(argc, argv);
d03bc1b8 2593
d72a8f10
LP
2594 /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent reexecution we
2595 * are then called 'systemd'. That is confusing, hence let's call us systemd right-away. */
f3b6a3ed 2596 program_invocation_short_name = systemd;
eee8b7ab 2597 (void) prctl(PR_SET_NAME, systemd);
5d6b1584 2598
d72a8f10 2599 /* Save the original command line */
36fea155 2600 save_argc_argv(argc, argv);
f3b6a3ed 2601
0e06a031
LP
2602 /* Save the original environment as we might need to restore it if we're requested to execute another
2603 * system manager later. */
2604 r = save_env();
2605 if (r < 0) {
2606 error_message = "Failed to copy environment block";
2607 goto finish;
2608 }
a5cede8c 2609
6fdb8de4 2610 /* Make sure that if the user says "syslog" we actually log to the journal. */
c1dc6153 2611 log_set_upgrade_syslog_to_journal(true);
bbe63281 2612
df0ff127 2613 if (getpid_cached() == 1) {
b5752d23
LP
2614 /* When we run as PID 1 force system mode */
2615 arg_system = true;
2616
48a601fe 2617 /* Disable the umask logic */
90dc8c2e
MG
2618 umask(0);
2619
92890452
LP
2620 /* Make sure that at least initially we do not ever log to journald/syslogd, because it might not be
2621 * activated yet (even though the log socket for it exists). */
d075092f
LP
2622 log_set_prohibit_ipc(true);
2623
48a601fe
LP
2624 /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This is
2625 * important so that we never end up logging to any foreign stderr, for example if we have to log in a
2626 * child process right before execve()'ing the actual binary, at a point in time where socket
2627 * activation stderr/stdout area already set up. */
2628 log_set_always_reopen_console(true);
48a601fe 2629
92890452 2630 if (detect_container() <= 0) {
4f8d551f 2631
92890452 2632 /* Running outside of a container as PID 1 */
92890452
LP
2633 log_set_target(LOG_TARGET_KMSG);
2634 log_open();
a866073d 2635
92890452
LP
2636 if (in_initrd())
2637 initrd_timestamp = userspace_timestamp;
c3ba6250 2638
92890452
LP
2639 if (!skip_setup) {
2640 r = mount_setup_early();
2641 if (r < 0) {
2642 error_message = "Failed to mount early API filesystems";
2643 goto finish;
2644 }
2645
0a2eef1e
LP
2646 /* Let's open the log backend a second time, in case the first time didn't
2647 * work. Quite possibly we have mounted /dev just now, so /dev/kmsg became
2648 * available, and it previously wasn't. */
2649 log_open();
2650
6123dfaa
ZJS
2651 disable_printk_ratelimit();
2652
92890452
LP
2653 r = initialize_security(
2654 &loaded_policy,
2655 &security_start_timestamp,
2656 &security_finish_timestamp,
2657 &error_message);
2658 if (r < 0)
2659 goto finish;
d723cd65 2660 }
eee8b7ab 2661
92890452 2662 if (mac_selinux_init() < 0) {
a9ba0e32 2663 error_message = "Failed to initialize SELinux support";
96694e99 2664 goto finish;
92890452 2665 }
0b3325e7 2666
92890452
LP
2667 if (!skip_setup)
2668 initialize_clock();
2669
2670 /* Set the default for later on, but don't actually open the logs like this for now. Note that
2671 * if we are transitioning from the initrd there might still be journal fd open, and we
2672 * shouldn't attempt opening that before we parsed /proc/cmdline which might redirect output
2673 * elsewhere. */
2674 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
2675
2676 } else {
2677 /* Running inside a container, as PID 1 */
92890452
LP
2678 log_set_target(LOG_TARGET_CONSOLE);
2679 log_open();
2680
2681 /* For later on, see above... */
2682 log_set_target(LOG_TARGET_JOURNAL);
2683
45250e66 2684 /* clear the kernel timestamp, because we are in a container */
92890452 2685 kernel_timestamp = DUAL_TIMESTAMP_NULL;
cb6531be 2686 }
7948c4df 2687
92890452 2688 initialize_coredump(skip_setup);
a866073d 2689
92890452
LP
2690 r = fixup_environment();
2691 if (r < 0) {
2692 log_emergency_errno(r, "Failed to fix up PID 1 environment: %m");
2693 error_message = "Failed to fix up PID1 environment";
2694 goto finish;
2695 }
a866073d 2696
92890452
LP
2697 /* Try to figure out if we can use colors with the console. No need to do that for user instances since
2698 * they never log into the console. */
3a18b604 2699 log_show_color(colors_enabled());
92890452 2700
c76cf844
AK
2701 r = make_null_stdio();
2702 if (r < 0)
92890452 2703 log_warning_errno(r, "Failed to redirect standard streams to /dev/null, ignoring: %m");
f84f9974 2704
a132bef0 2705 /* Load the kernel modules early. */
2e75e2a8
DM
2706 if (!skip_setup)
2707 kmod_setup();
2e75e2a8 2708
3196e423 2709 /* Mount /proc, /sys and friends, so that /proc/cmdline and /proc/$PID/fd is available. */
f74349d8 2710 r = mount_setup(loaded_policy, skip_setup);
cb6531be
ZJS
2711 if (r < 0) {
2712 error_message = "Failed to mount API filesystems";
8efe3c01 2713 goto finish;
cb6531be 2714 }
c18ecf03
LP
2715
2716 /* The efivarfs is now mounted, let's read the random seed off it */
2717 (void) efi_take_random_seed();
209b2592
FB
2718
2719 /* Cache command-line options passed from EFI variables */
2720 if (!skip_setup)
2721 (void) cache_efi_options_variable();
3196e423
LP
2722 } else {
2723 /* Running as user instance */
2724 arg_system = false;
2725 log_set_target(LOG_TARGET_AUTO);
2726 log_open();
2727
2728 /* clear the kernel timestamp, because we are not PID 1 */
2729 kernel_timestamp = DUAL_TIMESTAMP_NULL;
2730
2731 if (mac_selinux_init() < 0) {
2732 error_message = "Failed to initialize SELinux support";
2733 goto finish;
2734 }
0c85a4f3 2735 }
4ade7963 2736
a9fd4cd1
FB
2737 /* Save the original RLIMIT_NOFILE/RLIMIT_MEMLOCK so that we can reset it later when
2738 * transitioning from the initrd to the main systemd or suchlike. */
2739 save_rlimits(&saved_rlimit_nofile, &saved_rlimit_memlock);
2740
4ade7963 2741 /* Reset all signal handlers. */
ce30c8dc 2742 (void) reset_all_signal_handlers();
9c274488 2743 (void) ignore_signals(SIGNALS_IGNORE);
078e4539 2744
ffe5c01e
FB
2745 (void) parse_configuration(&saved_rlimit_nofile, &saved_rlimit_memlock);
2746
2747 r = parse_argv(argc, argv);
2748 if (r < 0) {
2749 error_message = "Failed to parse commandline arguments";
f170852a 2750 goto finish;
ffe5c01e 2751 }
10c961b9 2752
b0d7c989
LP
2753 r = safety_checks();
2754 if (r < 0)
fe783b03 2755 goto finish;
fe783b03 2756
5c08257b 2757 if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS, ACTION_DUMP_BUS_PROPERTIES, ACTION_BUS_INTROSPECT))
0221d68a 2758 (void) pager_open(arg_pager_flags);
b0d7c989
LP
2759
2760 if (arg_action != ACTION_RUN)
74e7579c 2761 skip_setup = true;
b87c2aa6 2762
fa0f4d8a 2763 if (arg_action == ACTION_HELP) {
37ec0fdd 2764 retval = help() < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
f170852a 2765 goto finish;
9ba0bc4e
ZJS
2766 } else if (arg_action == ACTION_VERSION) {
2767 retval = version();
2768 goto finish;
fa0f4d8a 2769 } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
e537352b 2770 unit_dump_config_items(stdout);
22f4096c 2771 retval = EXIT_SUCCESS;
e537352b 2772 goto finish;
bbc1acab
YW
2773 } else if (arg_action == ACTION_DUMP_BUS_PROPERTIES) {
2774 dump_bus_properties(stdout);
2775 retval = EXIT_SUCCESS;
2776 goto finish;
5c08257b
ZJS
2777 } else if (arg_action == ACTION_BUS_INTROSPECT) {
2778 r = bus_manager_introspect_implementations(stdout, arg_bus_introspect);
2779 retval = r >= 0 ? EXIT_SUCCESS : EXIT_FAILURE;
2780 goto finish;
f170852a
LP
2781 }
2782
4c701096 2783 assert_se(IN_SET(arg_action, ACTION_RUN, ACTION_TEST));
f170852a 2784
5a2e0c62
LP
2785 /* Move out of the way, so that we won't block unmounts */
2786 assert_se(chdir("/") == 0);
2787
dea374e8 2788 if (arg_action == ACTION_RUN) {
d247f232
LP
2789 if (!skip_setup) {
2790 /* Apply the systemd.clock_usec= kernel command line switch */
45250e66 2791 apply_clock_update();
a70c72a0 2792
d247f232
LP
2793 /* Apply random seed from kernel command line */
2794 cmdline_take_random_seed();
2795 }
2796
c6885f5f
FB
2797 /* A core pattern might have been specified via the cmdline. */
2798 initialize_core_pattern(skip_setup);
2799
efeb853f 2800 /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
a70c72a0
LP
2801 log_close();
2802
2803 /* Remember open file descriptors for later deserialization */
efeb853f
LP
2804 r = collect_fds(&fds, &error_message);
2805 if (r < 0)
dea374e8 2806 goto finish;
a16e1123 2807
2e51b31c
LP
2808 /* Give up any control of the console, but make sure its initialized. */
2809 setup_console_terminal(skip_setup);
56d96fc0 2810
a70c72a0
LP
2811 /* Open the logging devices, if possible and necessary */
2812 log_open();
56d96fc0 2813 }
4ade7963 2814
31aef7ff 2815 log_execution_mode(&first_boot);
a5dab5ce 2816
2d776038 2817 r = initialize_runtime(skip_setup,
3023f2fe 2818 first_boot,
2d776038
LP
2819 &saved_rlimit_nofile,
2820 &saved_rlimit_memlock,
2821 &error_message);
2822 if (r < 0)
2823 goto finish;
4096d6f5 2824
e0a3da1f
ZJS
2825 r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
2826 arg_action == ACTION_TEST ? MANAGER_TEST_FULL : 0,
2827 &m);
e96d6be7 2828 if (r < 0) {
da927ba9 2829 log_emergency_errno(r, "Failed to allocate manager object: %m");
cb6531be 2830 error_message = "Failed to allocate manager object";
60918275
LP
2831 goto finish;
2832 }
2833
9f9f0342
LP
2834 m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;
2835 m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;
2836 m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;
d4ee7bd8
YW
2837 m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_START)] = security_start_timestamp;
2838 m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_FINISH)] = security_finish_timestamp;
9e58ff9c 2839
85cb4151 2840 set_manager_defaults(m);
7b46fc6a 2841 set_manager_settings(m);
fd130612 2842 manager_set_first_boot(m, first_boot);
27d340c7 2843
bf4df7c3 2844 /* Remember whether we should queue the default job */
d3b1c508 2845 queue_default_job = !arg_serialization || arg_switched_root;
bf4df7c3 2846
9d76d730
LP
2847 before_startup = now(CLOCK_MONOTONIC);
2848
d3b1c508 2849 r = manager_startup(m, arg_serialization, fds);
58f88d92 2850 if (r < 0) {
cefb3eda 2851 error_message = "Failed to start up manager";
58f88d92
ZJS
2852 goto finish;
2853 }
a16e1123 2854
6acca5fc 2855 /* This will close all file descriptors that were opened, but not claimed by any unit. */
2feceb5e 2856 fds = fdset_free(fds);
74ca738f 2857 arg_serialization = safe_fclose(arg_serialization);
bf4df7c3
LP
2858
2859 if (queue_default_job) {
6acca5fc 2860 r = do_queue_default_job(m, &error_message);
718db961 2861 if (r < 0)
37d88da7 2862 goto finish;
6acca5fc 2863 }
ab17a050 2864
6acca5fc 2865 after_startup = now(CLOCK_MONOTONIC);
60918275 2866
6acca5fc
LP
2867 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
2868 "Loaded units and determined initial transaction in %s.",
5291f26d 2869 FORMAT_TIMESPAN(after_startup - before_startup, 100 * USEC_PER_MSEC));
07672f49 2870
6acca5fc 2871 if (arg_action == ACTION_TEST) {
2a341bb9 2872 manager_test_summary(m);
6acca5fc
LP
2873 retval = EXIT_SUCCESS;
2874 goto finish;
e965d56d 2875 }
d46de8a1 2876
3046b6db 2877 (void) invoke_main_loop(m,
a9fd4cd1
FB
2878 &saved_rlimit_nofile,
2879 &saved_rlimit_memlock,
3046b6db
LP
2880 &reexecute,
2881 &retval,
2882 &shutdown_verb,
2883 &fds,
2884 &switch_root_dir,
2885 &switch_root_init,
2886 &error_message);
f170852a 2887
60918275 2888finish:
b87c2aa6
ZJS
2889 pager_close();
2890
92890452 2891 if (m) {
986935cf
FB
2892 arg_reboot_watchdog = manager_get_watchdog(m, WATCHDOG_REBOOT);
2893 arg_kexec_watchdog = manager_get_watchdog(m, WATCHDOG_KEXEC);
92890452
LP
2894 m = manager_free(m);
2895 }
60918275 2896
cc56fafe 2897 mac_selinux_finish();
b2bb3dbe 2898
3c7878f9
LP
2899 if (reexecute)
2900 do_reexecute(argc, argv,
2901 &saved_rlimit_nofile,
2902 &saved_rlimit_memlock,
2903 fds,
2904 switch_root_dir,
2905 switch_root_init,
2906 &error_message); /* This only returns if reexecution failed */
a16e1123 2907
74ca738f 2908 arg_serialization = safe_fclose(arg_serialization);
2feceb5e 2909 fds = fdset_free(fds);
a16e1123 2910
0e06a031
LP
2911 saved_env = strv_free(saved_env);
2912
349cc4a5 2913#if HAVE_VALGRIND_VALGRIND_H
54b434b1
LP
2914 /* If we are PID 1 and running under valgrind, then let's exit
2915 * here explicitly. valgrind will only generate nice output on
2916 * exit(), not on exec(), hence let's do the former not the
2917 * latter here. */
8a2c1fbf
EJ
2918 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
2919 /* Cleanup watchdog_device strings for valgrind. We need them
2920 * in become_shutdown() so normally we cannot free them yet. */
2921 watchdog_free_device();
2922 arg_watchdog_device = mfree(arg_watchdog_device);
7d9eea2b 2923 reset_arguments();
27fe58b7 2924 return retval;
8a2c1fbf 2925 }
54b434b1
LP
2926#endif
2927
7e11a95e
EV
2928#if HAS_FEATURE_ADDRESS_SANITIZER
2929 __lsan_do_leak_check();
2930#endif
2931
b9080b03 2932 if (shutdown_verb) {
7eb35049 2933 r = become_shutdown(shutdown_verb, retval);
4a36297c 2934 log_error_errno(r, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
9b9881d7 2935 error_message = "Failed to execute shutdown binary";
b9080b03
FF
2936 }
2937
8a2c1fbf
EJ
2938 watchdog_free_device();
2939 arg_watchdog_device = mfree(arg_watchdog_device);
2940
df0ff127 2941 if (getpid_cached() == 1) {
cb6531be
ZJS
2942 if (error_message)
2943 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
1fc464f6 2944 ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL,
bb259772
LP
2945 "%s.", error_message);
2946 freeze_or_exit_or_reboot();
cb6531be 2947 }
c3b3c274 2948
7d9eea2b 2949 reset_arguments();
60918275
LP
2950 return retval;
2951}