]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/main.c
Merge pull request #17836 from poettering/tpm2-condition
[thirdparty/systemd.git] / src / core / main.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
a7334b09 2
60918275 3#include <errno.h>
3dfc9763 4#include <fcntl.h>
f170852a 5#include <getopt.h>
664f88a7 6#include <sys/mount.h>
3dfc9763 7#include <sys/prctl.h>
b9e74c39 8#include <sys/reboot.h>
3dfc9763 9#include <unistd.h>
349cc4a5 10#if HAVE_SECCOMP
b64a3d86
LP
11#include <seccomp.h>
12#endif
349cc4a5 13#if HAVE_VALGRIND_VALGRIND_H
3dfc9763
LP
14#include <valgrind/valgrind.h>
15#endif
54b434b1 16
718db961 17#include "sd-bus.h"
cf0fbc49 18#include "sd-daemon.h"
b2e7486c 19#include "sd-messages.h"
3dfc9763 20
b5efdb8a 21#include "alloc-util.h"
2ffadd3c 22#include "apparmor-setup.h"
d9d93745 23#include "architecture.h"
3dfc9763
LP
24#include "build.h"
25#include "bus-error.h"
26#include "bus-util.h"
430f0182 27#include "capability-util.h"
a88c5b8a 28#include "cgroup-util.h"
24efb112 29#include "clock-util.h"
3dfc9763 30#include "conf-parser.h"
618234a5 31#include "cpu-set-util.h"
3dfc9763 32#include "dbus-manager.h"
c18ecf03 33#include "dbus.h"
3dfc9763 34#include "def.h"
32429805 35#include "dev-setup.h"
c18ecf03 36#include "efi-random.h"
209b2592 37#include "efivars.h"
eee8b7ab 38#include "emergency-action.h"
3dfc9763 39#include "env-util.h"
57b7a260 40#include "exit-status.h"
3ffd4af2 41#include "fd-util.h"
3dfc9763 42#include "fdset.h"
718db961 43#include "fileio.h"
f97b34a6 44#include "format-util.h"
f4f15635 45#include "fs-util.h"
d247f232 46#include "hexdecoct.h"
3dfc9763
LP
47#include "hostname-setup.h"
48#include "ima-setup.h"
49#include "killall.h"
50#include "kmod-setup.h"
eefc66aa 51#include "limits-util.h"
d7b8eec7 52#include "load-fragment.h"
3dfc9763 53#include "log.h"
b6e66135 54#include "loopback-setup.h"
b6e66135 55#include "machine-id-setup.h"
3dfc9763 56#include "manager.h"
32429805 57#include "mkdir.h"
3dfc9763 58#include "mount-setup.h"
d58ad743 59#include "os-util.h"
3dfc9763 60#include "pager.h"
6bedfcbb 61#include "parse-util.h"
7d5ceb64 62#include "path-util.h"
294bf0c3 63#include "pretty-print.h"
4e731273 64#include "proc-cmdline.h"
3dfc9763 65#include "process-util.h"
d247f232 66#include "random-util.h"
8869a0b4 67#include "raw-clone.h"
78f22b97 68#include "rlimit-util.h"
349cc4a5 69#if HAVE_SECCOMP
83f12b27
FS
70#include "seccomp-util.h"
71#endif
b6e66135 72#include "selinux-setup.h"
3dfc9763
LP
73#include "selinux-util.h"
74#include "signal-util.h"
ffbd2c4d 75#include "smack-setup.h"
3dfc9763 76#include "special.h"
8fcde012 77#include "stat-util.h"
15a5e950 78#include "stdio-util.h"
3dfc9763
LP
79#include "strv.h"
80#include "switch-root.h"
a8b627aa 81#include "sysctl-util.h"
3dfc9763 82#include "terminal-util.h"
8612da97 83#include "umask-util.h"
b1d4f8e1 84#include "user-util.h"
9ce17593 85#include "util.h"
3dfc9763
LP
86#include "virt.h"
87#include "watchdog.h"
b6e66135 88
7e11a95e
EV
89#if HAS_FEATURE_ADDRESS_SANITIZER
90#include <sanitizer/lsan_interface.h>
91#endif
92
3a0f06c4
ZJS
93#define DEFAULT_TASKS_MAX ((TasksMax) { 15U, 100U }) /* 15% */
94
f170852a
LP
95static enum {
96 ACTION_RUN,
e965d56d 97 ACTION_HELP,
9ba0bc4e 98 ACTION_VERSION,
e537352b 99 ACTION_TEST,
bbc1acab
YW
100 ACTION_DUMP_CONFIGURATION_ITEMS,
101 ACTION_DUMP_BUS_PROPERTIES,
5c08257b 102 ACTION_BUS_INTROSPECT,
fa0f4d8a 103} arg_action = ACTION_RUN;
fb39af4c 104
5c08257b
ZJS
105static const char *arg_bus_introspect = NULL;
106
45250e66
LP
107/* Those variables are initialized to 0 automatically, so we avoid uninitialized memory access. Real
108 * defaults are assigned in reset_arguments() below. */
fb39af4c
ZJS
109static char *arg_default_unit;
110static bool arg_system;
111static bool arg_dump_core;
112static int arg_crash_chvt;
113static bool arg_crash_shell;
114static bool arg_crash_reboot;
115static char *arg_confirm_spawn;
116static ShowStatus arg_show_status;
36cf4507 117static StatusUnitFormat arg_status_unit_format;
fb39af4c
ZJS
118static bool arg_switched_root;
119static PagerFlags arg_pager_flags;
120static bool arg_service_watchdogs;
121static ExecOutput arg_default_std_output;
122static ExecOutput arg_default_std_error;
123static usec_t arg_default_restart_usec;
124static usec_t arg_default_timeout_start_usec;
125static usec_t arg_default_timeout_stop_usec;
126static usec_t arg_default_timeout_abort_usec;
127static bool arg_default_timeout_abort_set;
128static usec_t arg_default_start_limit_interval;
129static unsigned arg_default_start_limit_burst;
130static usec_t arg_runtime_watchdog;
65224c1d 131static usec_t arg_reboot_watchdog;
acafd7d8 132static usec_t arg_kexec_watchdog;
fb39af4c
ZJS
133static char *arg_early_core_pattern;
134static char *arg_watchdog_device;
135static char **arg_default_environment;
136static struct rlimit *arg_default_rlimit[_RLIMIT_MAX];
137static uint64_t arg_capability_bounding_set;
138static bool arg_no_new_privs;
139static nsec_t arg_timer_slack_nsec;
140static usec_t arg_default_timer_accuracy_usec;
141static Set* arg_syscall_archs;
142static FILE* arg_serialization;
143static int arg_default_cpu_accounting;
144static bool arg_default_io_accounting;
145static bool arg_default_ip_accounting;
146static bool arg_default_blockio_accounting;
147static bool arg_default_memory_accounting;
148static bool arg_default_tasks_accounting;
3a0f06c4 149static TasksMax arg_default_tasks_max;
fb39af4c
ZJS
150static sd_id128_t arg_machine_id;
151static EmergencyAction arg_cad_burst_action;
152static OOMPolicy arg_default_oom_policy;
153static CPUSet arg_cpu_affinity;
b070c7c0 154static NUMAPolicy arg_numa_policy;
3753325b 155static usec_t arg_clock_usec;
d247f232
LP
156static void *arg_random_seed;
157static size_t arg_random_seed_size;
61fbbac1 158
0e06a031
LP
159/* A copy of the original environment block */
160static char **saved_env = NULL;
161
a9fd4cd1
FB
162static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
163 const struct rlimit *saved_rlimit_memlock);
4fc935ca 164
bb259772
LP
165_noreturn_ static void freeze_or_exit_or_reboot(void) {
166
c3b6a348
LP
167 /* If we are running in a container, let's prefer exiting, after all we can propagate an exit code to
168 * the container manager, and thus inform it that something went wrong. */
bb259772
LP
169 if (detect_container() > 0) {
170 log_emergency("Exiting PID 1...");
c3b6a348 171 _exit(EXIT_EXCEPTION);
bb259772 172 }
b9e74c39
LP
173
174 if (arg_crash_reboot) {
175 log_notice("Rebooting in 10s...");
176 (void) sleep(10);
177
178 log_notice("Rebooting now...");
179 (void) reboot(RB_AUTOBOOT);
180 log_emergency_errno(errno, "Failed to reboot: %m");
181 }
182
183 log_emergency("Freezing execution.");
184 freeze();
185}
186
848e863a 187_noreturn_ static void crash(int sig) {
7d06dad9
MS
188 struct sigaction sa;
189 pid_t pid;
97c4f35c 190
df0ff127 191 if (getpid_cached() != 1)
abb26902 192 /* Pass this on immediately, if this is not PID 1 */
92ca4cac 193 (void) raise(sig);
abb26902 194 else if (!arg_dump_core)
4104970e 195 log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig));
97c4f35c 196 else {
7d06dad9 197 sa = (struct sigaction) {
189d5bac 198 .sa_handler = nop_signal_handler,
b92bea5d
ZJS
199 .sa_flags = SA_NOCLDSTOP|SA_RESTART,
200 };
97c4f35c 201
6f5e3f35 202 /* We want to wait for the core process, hence let's enable SIGCHLD */
92ca4cac 203 (void) sigaction(SIGCHLD, &sa, NULL);
6f5e3f35 204
8869a0b4 205 pid = raw_clone(SIGCHLD);
e62d8c39 206 if (pid < 0)
56f64d95 207 log_emergency_errno(errno, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
97c4f35c 208 else if (pid == 0) {
97c4f35c 209 /* Enable default signal handler for core dump */
15a90032 210
92ca4cac
LP
211 sa = (struct sigaction) {
212 .sa_handler = SIG_DFL,
213 };
214 (void) sigaction(sig, &sa, NULL);
97c4f35c 215
15a90032
LP
216 /* Don't limit the coredump size */
217 (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY));
97c4f35c
LP
218
219 /* Just to be sure... */
e62d9b81 220 (void) chdir("/");
97c4f35c
LP
221
222 /* Raise the signal again */
ee05e779 223 pid = raw_getpid();
92ca4cac 224 (void) kill(pid, sig); /* raise() would kill the parent */
97c4f35c
LP
225
226 assert_not_reached("We shouldn't be here...");
bb85a582 227 _exit(EXIT_EXCEPTION);
4fc935ca 228 } else {
8e12a6ae
LP
229 siginfo_t status;
230 int r;
4fc935ca
LP
231
232 /* Order things nicely. */
e62d8c39
ZJS
233 r = wait_for_terminate(pid, &status);
234 if (r < 0)
da927ba9 235 log_emergency_errno(r, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig));
e1714f02
ZJS
236 else if (status.si_code != CLD_DUMPED) {
237 const char *s = status.si_code == CLD_EXITED
e04ed6db 238 ? exit_status_to_string(status.si_status, EXIT_STATUS_LIBC)
e1714f02
ZJS
239 : signal_to_string(status.si_status);
240
ee05e779
ZJS
241 log_emergency("Caught <%s>, core dump failed (child "PID_FMT", code=%s, status=%i/%s).",
242 signal_to_string(sig),
e1714f02
ZJS
243 pid,
244 sigchld_code_to_string(status.si_code),
245 status.si_status, strna(s));
246 } else
247 log_emergency("Caught <%s>, dumped core as pid "PID_FMT".",
248 signal_to_string(sig), pid);
97c4f35c
LP
249 }
250 }
251
b9e74c39 252 if (arg_crash_chvt >= 0)
92ca4cac 253 (void) chvt(arg_crash_chvt);
601f6a1e 254
7d06dad9
MS
255 sa = (struct sigaction) {
256 .sa_handler = SIG_IGN,
257 .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
258 };
259
260 /* Let the kernel reap children for us */
261 (void) sigaction(SIGCHLD, &sa, NULL);
8c43883a 262
7d06dad9 263 if (arg_crash_shell) {
b9e74c39 264 log_notice("Executing crash shell in 10s...");
92ca4cac 265 (void) sleep(10);
4fc935ca 266
8869a0b4 267 pid = raw_clone(SIGCHLD);
cd3bd60a 268 if (pid < 0)
56f64d95 269 log_emergency_errno(errno, "Failed to fork off crash shell: %m");
6f5e3f35 270 else if (pid == 0) {
b9e74c39 271 (void) setsid();
92ca4cac 272 (void) make_console_stdio();
595225af 273 (void) rlimit_nofile_safe();
92ca4cac 274 (void) execle("/bin/sh", "/bin/sh", NULL, environ);
6f5e3f35 275
ee05e779 276 log_emergency_errno(errno, "execle() failed: %m");
bb85a582 277 _exit(EXIT_EXCEPTION);
b9e74c39
LP
278 } else {
279 log_info("Spawned crash shell as PID "PID_FMT".", pid);
4cf0b03b 280 (void) wait_for_terminate(pid, NULL);
b9e74c39 281 }
4fc935ca
LP
282 }
283
bb259772 284 freeze_or_exit_or_reboot();
97c4f35c
LP
285}
286
287static void install_crash_handler(void) {
297d563d 288 static const struct sigaction sa = {
b92bea5d 289 .sa_handler = crash,
297d563d 290 .sa_flags = SA_NODEFER, /* So that we can raise the signal again from the signal handler */
b92bea5d 291 };
297d563d 292 int r;
97c4f35c 293
297d563d
LP
294 /* We ignore the return value here, since, we don't mind if we
295 * cannot set up a crash handler */
296 r = sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
297 if (r < 0)
298 log_debug_errno(r, "I had trouble setting up the crash handler, ignoring: %m");
97c4f35c 299}
f170852a 300
56d96fc0
LP
301static int console_setup(void) {
302 _cleanup_close_ int tty_fd = -1;
303 int r;
80876c20 304
512947d4 305 tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
23bbb0de
MS
306 if (tty_fd < 0)
307 return log_error_errno(tty_fd, "Failed to open /dev/console: %m");
80876c20 308
56d96fc0
LP
309 /* We don't want to force text mode. plymouth may be showing
310 * pictures already from initrd. */
512947d4 311 r = reset_terminal_fd(tty_fd, false);
23bbb0de
MS
312 if (r < 0)
313 return log_error_errno(r, "Failed to reset /dev/console: %m");
843d2643 314
56d96fc0 315 return 0;
80876c20
LP
316}
317
ee48dbd5 318static int set_machine_id(const char *m) {
e042eab7 319 sd_id128_t t;
8b26cdbd 320 assert(m);
ee48dbd5 321
e042eab7 322 if (sd_id128_from_string(m, &t) < 0)
ee48dbd5
NC
323 return -EINVAL;
324
e042eab7 325 if (sd_id128_is_null(t))
ee48dbd5
NC
326 return -EINVAL;
327
e042eab7 328 arg_machine_id = t;
ee48dbd5
NC
329 return 0;
330}
331
96287a49 332static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
059cb385 333 int r;
f170852a 334
059cb385 335 assert(key);
5192bd19 336
1d84ad94 337 if (STR_IN_SET(key, "systemd.unit", "rd.systemd.unit")) {
bf4df7c3 338
1d84ad94
LP
339 if (proc_cmdline_value_missing(key, value))
340 return 0;
bf4df7c3 341
1d84ad94
LP
342 if (!unit_name_is_valid(value, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
343 log_warning("Unit name specified on %s= is not valid, ignoring: %s", key, value);
cd57038a
ZJS
344 else if (in_initrd() == !!startswith(key, "rd."))
345 return free_and_strdup_warn(&arg_default_unit, value);
f170852a 346
1d84ad94 347 } else if (proc_cmdline_key_streq(key, "systemd.dump_core")) {
4fc935ca 348
1d84ad94 349 r = value ? parse_boolean(value) : true;
fb472900 350 if (r < 0)
5e1ee764 351 log_warning_errno(r, "Failed to parse dump core switch %s, ignoring: %m", value);
4fc935ca 352 else
fa0f4d8a 353 arg_dump_core = r;
4fc935ca 354
c6885f5f
FB
355 } else if (proc_cmdline_key_streq(key, "systemd.early_core_pattern")) {
356
357 if (proc_cmdline_value_missing(key, value))
358 return 0;
359
360 if (path_is_absolute(value))
361 (void) parse_path_argument_and_warn(value, false, &arg_early_core_pattern);
362 else
363 log_warning("Specified core pattern '%s' is not an absolute path, ignoring.", value);
364
1d84ad94 365 } else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) {
b9e74c39 366
1d84ad94
LP
367 if (!value)
368 arg_crash_chvt = 0; /* turn on */
5e1ee764 369 else {
a07a7324 370 r = parse_crash_chvt(value, &arg_crash_chvt);
5e1ee764
YW
371 if (r < 0)
372 log_warning_errno(r, "Failed to parse crash chvt switch %s, ignoring: %m", value);
373 }
b9e74c39 374
1d84ad94 375 } else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) {
4fc935ca 376
1d84ad94 377 r = value ? parse_boolean(value) : true;
fb472900 378 if (r < 0)
5e1ee764 379 log_warning_errno(r, "Failed to parse crash shell switch %s, ignoring: %m", value);
4fc935ca 380 else
fa0f4d8a 381 arg_crash_shell = r;
5e7ee61c 382
1d84ad94 383 } else if (proc_cmdline_key_streq(key, "systemd.crash_reboot")) {
5e7ee61c 384
1d84ad94 385 r = value ? parse_boolean(value) : true;
b9e74c39 386 if (r < 0)
5e1ee764 387 log_warning_errno(r, "Failed to parse crash reboot switch %s, ignoring: %m", value);
5e7ee61c 388 else
b9e74c39 389 arg_crash_reboot = r;
5e7ee61c 390
1d84ad94
LP
391 } else if (proc_cmdline_key_streq(key, "systemd.confirm_spawn")) {
392 char *s;
7d5ceb64 393
1d84ad94 394 r = parse_confirm_spawn(value, &s);
059cb385 395 if (r < 0)
5e1ee764
YW
396 log_warning_errno(r, "Failed to parse confirm_spawn switch %s, ignoring: %m", value);
397 else
398 free_and_replace(arg_confirm_spawn, s);
601f6a1e 399
2a12e32e
JK
400 } else if (proc_cmdline_key_streq(key, "systemd.service_watchdogs")) {
401
402 r = value ? parse_boolean(value) : true;
403 if (r < 0)
5e1ee764 404 log_warning_errno(r, "Failed to parse service watchdog switch %s, ignoring: %m", value);
2a12e32e
JK
405 else
406 arg_service_watchdogs = r;
407
1d84ad94 408 } else if (proc_cmdline_key_streq(key, "systemd.show_status")) {
9e58ff9c 409
1d84ad94
LP
410 if (value) {
411 r = parse_show_status(value, &arg_show_status);
412 if (r < 0)
5e1ee764 413 log_warning_errno(r, "Failed to parse show status switch %s, ignoring: %m", value);
1d84ad94
LP
414 } else
415 arg_show_status = SHOW_STATUS_YES;
059cb385 416
36cf4507
ZJS
417 } else if (proc_cmdline_key_streq(key, "systemd.status_unit_format")) {
418
419 if (proc_cmdline_value_missing(key, value))
420 return 0;
421
422 r = status_unit_format_from_string(value);
423 if (r < 0)
424 log_warning_errno(r, "Failed to parse %s=%s, ignoring: %m", key, value);
425 else
426 arg_status_unit_format = r;
427
1d84ad94
LP
428 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_output")) {
429
430 if (proc_cmdline_value_missing(key, value))
431 return 0;
0a494f1f 432
059cb385 433 r = exec_output_from_string(value);
fb472900 434 if (r < 0)
5e1ee764 435 log_warning_errno(r, "Failed to parse default standard output switch %s, ignoring: %m", value);
0a494f1f
LP
436 else
437 arg_default_std_output = r;
0a494f1f 438
1d84ad94
LP
439 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_error")) {
440
441 if (proc_cmdline_value_missing(key, value))
442 return 0;
059cb385
LP
443
444 r = exec_output_from_string(value);
fb472900 445 if (r < 0)
5e1ee764 446 log_warning_errno(r, "Failed to parse default standard error switch %s, ignoring: %m", value);
0a494f1f
LP
447 else
448 arg_default_std_error = r;
9e7c5357 449
1d84ad94
LP
450 } else if (streq(key, "systemd.setenv")) {
451
452 if (proc_cmdline_value_missing(key, value))
453 return 0;
059cb385
LP
454
455 if (env_assignment_is_valid(value)) {
e21fea24
KS
456 char **env;
457
059cb385 458 env = strv_env_set(arg_default_environment, value);
1d84ad94
LP
459 if (!env)
460 return log_oom();
461
462 arg_default_environment = env;
e21fea24 463 } else
059cb385 464 log_warning("Environment variable name '%s' is not valid. Ignoring.", value);
9e58ff9c 465
1d84ad94
LP
466 } else if (proc_cmdline_key_streq(key, "systemd.machine_id")) {
467
468 if (proc_cmdline_value_missing(key, value))
469 return 0;
470
471 r = set_machine_id(value);
472 if (r < 0)
5e1ee764 473 log_warning_errno(r, "MachineID '%s' is not valid, ignoring: %m", value);
ee48dbd5 474
1d84ad94
LP
475 } else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) {
476
477 if (proc_cmdline_value_missing(key, value))
478 return 0;
479
480 r = parse_sec(value, &arg_default_timeout_start_usec);
481 if (r < 0)
5e1ee764 482 log_warning_errno(r, "Failed to parse default start timeout '%s', ignoring: %m", value);
1d84ad94
LP
483
484 if (arg_default_timeout_start_usec <= 0)
485 arg_default_timeout_start_usec = USEC_INFINITY;
ee48dbd5 486
68d58f38
LP
487 } else if (proc_cmdline_key_streq(key, "systemd.cpu_affinity")) {
488
489 if (proc_cmdline_value_missing(key, value))
490 return 0;
491
492 r = parse_cpu_set(value, &arg_cpu_affinity);
493 if (r < 0)
162392b7 494 log_warning_errno(r, "Failed to parse CPU affinity mask '%s', ignoring: %m", value);
68d58f38 495
8a2c1fbf
EJ
496 } else if (proc_cmdline_key_streq(key, "systemd.watchdog_device")) {
497
498 if (proc_cmdline_value_missing(key, value))
499 return 0;
500
c6885f5f 501 (void) parse_path_argument_and_warn(value, false, &arg_watchdog_device);
8a2c1fbf 502
3753325b
LP
503 } else if (proc_cmdline_key_streq(key, "systemd.clock_usec")) {
504
505 if (proc_cmdline_value_missing(key, value))
506 return 0;
507
508 r = safe_atou64(value, &arg_clock_usec);
509 if (r < 0)
510 log_warning_errno(r, "Failed to parse systemd.clock_usec= argument, ignoring: %s", value);
511
d247f232
LP
512 } else if (proc_cmdline_key_streq(key, "systemd.random_seed")) {
513 void *p;
514 size_t sz;
515
516 if (proc_cmdline_value_missing(key, value))
517 return 0;
518
519 r = unbase64mem(value, (size_t) -1, &p, &sz);
520 if (r < 0)
521 log_warning_errno(r, "Failed to parse systemd.random_seed= argument, ignoring: %s", value);
522
523 free(arg_random_seed);
524 arg_random_seed = sz > 0 ? p : mfree(p);
525 arg_random_seed_size = sz;
526
059cb385 527 } else if (streq(key, "quiet") && !value) {
d7b15e0a 528
7a293242 529 if (arg_show_status == _SHOW_STATUS_INVALID)
0d066dd1 530 arg_show_status = SHOW_STATUS_ERROR;
059cb385
LP
531
532 } else if (streq(key, "debug") && !value) {
d7b15e0a 533
1de1c9c3
LP
534 /* Note that log_parse_environment() handles 'debug'
535 * too, and sets the log level to LOG_DEBUG. */
d7b15e0a 536
75f86906 537 if (detect_container() > 0)
b2103dcc 538 log_set_target(LOG_TARGET_CONSOLE);
059cb385 539
dcd61450 540 } else if (!value) {
e2c9a131 541 const char *target;
f170852a 542
ceae6295 543 /* Compatible with SysV, but supported independently even if SysV compatibility is disabled. */
e2c9a131
EV
544 target = runlevel_to_target(key);
545 if (target)
cd57038a 546 return free_and_strdup_warn(&arg_default_unit, target);
f170852a
LP
547 }
548
549 return 0;
550}
551
e8e581bf
ZJS
552#define DEFINE_SETTER(name, func, descr) \
553 static int name(const char *unit, \
554 const char *filename, \
555 unsigned line, \
556 const char *section, \
71a61510 557 unsigned section_line, \
e8e581bf
ZJS
558 const char *lvalue, \
559 int ltype, \
560 const char *rvalue, \
561 void *data, \
562 void *userdata) { \
563 \
564 int r; \
565 \
566 assert(filename); \
567 assert(lvalue); \
568 assert(rvalue); \
569 \
570 r = func(rvalue); \
571 if (r < 0) \
d1cefe0a
LP
572 log_syntax(unit, LOG_ERR, filename, line, r, \
573 "Invalid " descr "'%s': %m", \
574 rvalue); \
e8e581bf
ZJS
575 \
576 return 0; \
577 }
487393e9 578
a6ecbf83
FB
579DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level");
580DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target");
c5673ed0 581DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color");
a6ecbf83 582DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location");
c5673ed0 583DEFINE_SETTER(config_parse_time, log_show_time_from_string, "time");
487393e9 584
a61d6874
ZJS
585static int config_parse_default_timeout_abort(
586 const char *unit,
587 const char *filename,
588 unsigned line,
589 const char *section,
590 unsigned section_line,
591 const char *lvalue,
592 int ltype,
593 const char *rvalue,
594 void *data,
595 void *userdata) {
596 int r;
597
598 r = config_parse_timeout_abort(unit, filename, line, section, section_line, lvalue, ltype, rvalue,
599 &arg_default_timeout_abort_usec, userdata);
600 if (r >= 0)
601 arg_default_timeout_abort_set = r;
602 return 0;
603}
487393e9 604
a61d6874 605static int parse_config_file(void) {
f975e971 606 const ConfigTableItem items[] = {
a61d6874
ZJS
607 { "Manager", "LogLevel", config_parse_level2, 0, NULL },
608 { "Manager", "LogTarget", config_parse_target, 0, NULL },
609 { "Manager", "LogColor", config_parse_color, 0, NULL },
610 { "Manager", "LogLocation", config_parse_location, 0, NULL },
c5673ed0 611 { "Manager", "LogTime", config_parse_time, 0, NULL },
a61d6874
ZJS
612 { "Manager", "DumpCore", config_parse_bool, 0, &arg_dump_core },
613 { "Manager", "CrashChVT", /* legacy */ config_parse_crash_chvt, 0, &arg_crash_chvt },
614 { "Manager", "CrashChangeVT", config_parse_crash_chvt, 0, &arg_crash_chvt },
615 { "Manager", "CrashShell", config_parse_bool, 0, &arg_crash_shell },
616 { "Manager", "CrashReboot", config_parse_bool, 0, &arg_crash_reboot },
617 { "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status },
618 { "Manager", "StatusUnitFormat", config_parse_status_unit_format, 0, &arg_status_unit_format },
619 { "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, &arg_cpu_affinity },
620 { "Manager", "NUMAPolicy", config_parse_numa_policy, 0, &arg_numa_policy.type },
621 { "Manager", "NUMAMask", config_parse_numa_mask, 0, &arg_numa_policy },
622 { "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL },
623 { "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog },
624 { "Manager", "RebootWatchdogSec", config_parse_sec, 0, &arg_reboot_watchdog },
625 { "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_reboot_watchdog }, /* obsolete alias */
626 { "Manager", "KExecWatchdogSec", config_parse_sec, 0, &arg_kexec_watchdog },
627 { "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
628 { "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
629 { "Manager", "NoNewPrivileges", config_parse_bool, 0, &arg_no_new_privs },
349cc4a5 630#if HAVE_SECCOMP
a61d6874 631 { "Manager", "SystemCallArchitectures", config_parse_syscall_archs, 0, &arg_syscall_archs },
89fffa27 632#endif
a61d6874
ZJS
633 { "Manager", "TimerSlackNSec", config_parse_nsec, 0, &arg_timer_slack_nsec },
634 { "Manager", "DefaultTimerAccuracySec", config_parse_sec, 0, &arg_default_timer_accuracy_usec },
635 { "Manager", "DefaultStandardOutput", config_parse_output_restricted, 0, &arg_default_std_output },
636 { "Manager", "DefaultStandardError", config_parse_output_restricted, 0, &arg_default_std_error },
637 { "Manager", "DefaultTimeoutStartSec", config_parse_sec, 0, &arg_default_timeout_start_usec },
638 { "Manager", "DefaultTimeoutStopSec", config_parse_sec, 0, &arg_default_timeout_stop_usec },
639 { "Manager", "DefaultTimeoutAbortSec", config_parse_default_timeout_abort, 0, NULL },
640 { "Manager", "DefaultRestartSec", config_parse_sec, 0, &arg_default_restart_usec },
641 { "Manager", "DefaultStartLimitInterval", config_parse_sec, 0, &arg_default_start_limit_interval }, /* obsolete alias */
642 { "Manager", "DefaultStartLimitIntervalSec", config_parse_sec, 0, &arg_default_start_limit_interval },
643 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned, 0, &arg_default_start_limit_burst },
644 { "Manager", "DefaultEnvironment", config_parse_environ, 0, &arg_default_environment },
645 { "Manager", "DefaultLimitCPU", config_parse_rlimit, RLIMIT_CPU, arg_default_rlimit },
646 { "Manager", "DefaultLimitFSIZE", config_parse_rlimit, RLIMIT_FSIZE, arg_default_rlimit },
647 { "Manager", "DefaultLimitDATA", config_parse_rlimit, RLIMIT_DATA, arg_default_rlimit },
648 { "Manager", "DefaultLimitSTACK", config_parse_rlimit, RLIMIT_STACK, arg_default_rlimit },
649 { "Manager", "DefaultLimitCORE", config_parse_rlimit, RLIMIT_CORE, arg_default_rlimit },
650 { "Manager", "DefaultLimitRSS", config_parse_rlimit, RLIMIT_RSS, arg_default_rlimit },
651 { "Manager", "DefaultLimitNOFILE", config_parse_rlimit, RLIMIT_NOFILE, arg_default_rlimit },
652 { "Manager", "DefaultLimitAS", config_parse_rlimit, RLIMIT_AS, arg_default_rlimit },
653 { "Manager", "DefaultLimitNPROC", config_parse_rlimit, RLIMIT_NPROC, arg_default_rlimit },
654 { "Manager", "DefaultLimitMEMLOCK", config_parse_rlimit, RLIMIT_MEMLOCK, arg_default_rlimit },
655 { "Manager", "DefaultLimitLOCKS", config_parse_rlimit, RLIMIT_LOCKS, arg_default_rlimit },
656 { "Manager", "DefaultLimitSIGPENDING", config_parse_rlimit, RLIMIT_SIGPENDING, arg_default_rlimit },
657 { "Manager", "DefaultLimitMSGQUEUE", config_parse_rlimit, RLIMIT_MSGQUEUE, arg_default_rlimit },
658 { "Manager", "DefaultLimitNICE", config_parse_rlimit, RLIMIT_NICE, arg_default_rlimit },
659 { "Manager", "DefaultLimitRTPRIO", config_parse_rlimit, RLIMIT_RTPRIO, arg_default_rlimit },
660 { "Manager", "DefaultLimitRTTIME", config_parse_rlimit, RLIMIT_RTTIME, arg_default_rlimit },
661 { "Manager", "DefaultCPUAccounting", config_parse_tristate, 0, &arg_default_cpu_accounting },
662 { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
663 { "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting },
664 { "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
665 { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
666 { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
667 { "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max },
668 { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action },
669 { "Manager", "DefaultOOMPolicy", config_parse_oom_policy, 0, &arg_default_oom_policy },
d3b1c508 670 {}
487393e9
LP
671 };
672
1b907b5c 673 const char *fn, *conf_dirs_nulstr;
487393e9 674
463d0d15 675 fn = arg_system ?
75eb6154
LP
676 PKGSYSCONFDIR "/system.conf" :
677 PKGSYSCONFDIR "/user.conf";
678
463d0d15 679 conf_dirs_nulstr = arg_system ?
75eb6154
LP
680 CONF_PATHS_NULSTR("systemd/system.conf.d") :
681 CONF_PATHS_NULSTR("systemd/user.conf.d");
682
4f9ff96a
LP
683 (void) config_parse_many_nulstr(
684 fn, conf_dirs_nulstr,
685 "Manager\0",
686 config_item_table_lookup, items,
687 CONFIG_PARSE_WARN,
688 NULL,
689 NULL);
36c16a7c
LP
690
691 /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY
692 * like everywhere else. */
693 if (arg_default_timeout_start_usec <= 0)
694 arg_default_timeout_start_usec = USEC_INFINITY;
695 if (arg_default_timeout_stop_usec <= 0)
696 arg_default_timeout_stop_usec = USEC_INFINITY;
487393e9 697
487393e9
LP
698 return 0;
699}
700
85cb4151 701static void set_manager_defaults(Manager *m) {
06af2a04
TB
702
703 assert(m);
704
5b65ae15
LP
705 /* Propagates the various default unit property settings into the manager object, i.e. properties that do not
706 * affect the manager itself, but are just what newly allocated units will have set if they haven't set
707 * anything else. (Also see set_manager_settings() for the settings that affect the manager's own behaviour) */
708
06af2a04
TB
709 m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec;
710 m->default_std_output = arg_default_std_output;
711 m->default_std_error = arg_default_std_error;
712 m->default_timeout_start_usec = arg_default_timeout_start_usec;
713 m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
dc653bf4
JK
714 m->default_timeout_abort_usec = arg_default_timeout_abort_usec;
715 m->default_timeout_abort_set = arg_default_timeout_abort_set;
06af2a04
TB
716 m->default_restart_usec = arg_default_restart_usec;
717 m->default_start_limit_interval = arg_default_start_limit_interval;
718 m->default_start_limit_burst = arg_default_start_limit_burst;
a88c5b8a
CD
719
720 /* On 4.15+ with unified hierarchy, CPU accounting is essentially free as it doesn't require the CPU
721 * controller to be enabled, so the default is to enable it unless we got told otherwise. */
722 if (arg_default_cpu_accounting >= 0)
723 m->default_cpu_accounting = arg_default_cpu_accounting;
724 else
725 m->default_cpu_accounting = cpu_accounting_is_cheap();
726
13c31542 727 m->default_io_accounting = arg_default_io_accounting;
377bfd2d 728 m->default_ip_accounting = arg_default_ip_accounting;
06af2a04
TB
729 m->default_blockio_accounting = arg_default_blockio_accounting;
730 m->default_memory_accounting = arg_default_memory_accounting;
03a7b521 731 m->default_tasks_accounting = arg_default_tasks_accounting;
0af20ea2 732 m->default_tasks_max = arg_default_tasks_max;
afcfaa69 733 m->default_oom_policy = arg_default_oom_policy;
06af2a04 734
79a224c4
LP
735 (void) manager_set_default_rlimits(m, arg_default_rlimit);
736
737 (void) manager_default_environment(m);
738 (void) manager_transient_environment_add(m, arg_default_environment);
06af2a04
TB
739}
740
7b46fc6a
LP
741static void set_manager_settings(Manager *m) {
742
743 assert(m);
744
986935cf
FB
745 /* Propagates the various manager settings into the manager object, i.e. properties that
746 * effect the manager itself (as opposed to just being inherited into newly allocated
747 * units, see set_manager_defaults() above). */
5b65ae15 748
7b46fc6a 749 m->confirm_spawn = arg_confirm_spawn;
2a12e32e 750 m->service_watchdogs = arg_service_watchdogs;
7b46fc6a
LP
751 m->cad_burst_action = arg_cad_burst_action;
752
986935cf
FB
753 manager_set_watchdog(m, WATCHDOG_RUNTIME, arg_runtime_watchdog);
754 manager_set_watchdog(m, WATCHDOG_REBOOT, arg_reboot_watchdog);
755 manager_set_watchdog(m, WATCHDOG_KEXEC, arg_kexec_watchdog);
756
7365a296 757 manager_set_show_status(m, arg_show_status, "commandline");
36cf4507 758 m->status_unit_format = arg_status_unit_format;
7b46fc6a
LP
759}
760
f170852a 761static int parse_argv(int argc, char *argv[]) {
f170852a
LP
762 enum {
763 ARG_LOG_LEVEL = 0x100,
764 ARG_LOG_TARGET,
bbe63281
LP
765 ARG_LOG_COLOR,
766 ARG_LOG_LOCATION,
c5673ed0 767 ARG_LOG_TIME,
2f198e2f 768 ARG_UNIT,
edb9aaa8 769 ARG_SYSTEM,
af2d49f7 770 ARG_USER,
e537352b 771 ARG_TEST,
b87c2aa6 772 ARG_NO_PAGER,
9ba0bc4e 773 ARG_VERSION,
80876c20 774 ARG_DUMP_CONFIGURATION_ITEMS,
bbc1acab 775 ARG_DUMP_BUS_PROPERTIES,
5c08257b 776 ARG_BUS_INTROSPECT,
9e58ff9c 777 ARG_DUMP_CORE,
b9e74c39 778 ARG_CRASH_CHVT,
9e58ff9c 779 ARG_CRASH_SHELL,
b9e74c39 780 ARG_CRASH_REBOOT,
a16e1123 781 ARG_CONFIRM_SPAWN,
9e58ff9c 782 ARG_SHOW_STATUS,
4288f619 783 ARG_DESERIALIZE,
2660882b 784 ARG_SWITCHED_ROOT,
0a494f1f 785 ARG_DEFAULT_STD_OUTPUT,
ee48dbd5 786 ARG_DEFAULT_STD_ERROR,
2a12e32e
JK
787 ARG_MACHINE_ID,
788 ARG_SERVICE_WATCHDOGS,
f170852a
LP
789 };
790
791 static const struct option options[] = {
a16e1123
LP
792 { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
793 { "log-target", required_argument, NULL, ARG_LOG_TARGET },
bbe63281
LP
794 { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
795 { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
c5673ed0 796 { "log-time", optional_argument, NULL, ARG_LOG_TIME },
2f198e2f 797 { "unit", required_argument, NULL, ARG_UNIT },
edb9aaa8 798 { "system", no_argument, NULL, ARG_SYSTEM },
af2d49f7 799 { "user", no_argument, NULL, ARG_USER },
a16e1123 800 { "test", no_argument, NULL, ARG_TEST },
b87c2aa6 801 { "no-pager", no_argument, NULL, ARG_NO_PAGER },
a16e1123 802 { "help", no_argument, NULL, 'h' },
9ba0bc4e 803 { "version", no_argument, NULL, ARG_VERSION },
a16e1123 804 { "dump-configuration-items", no_argument, NULL, ARG_DUMP_CONFIGURATION_ITEMS },
bbc1acab 805 { "dump-bus-properties", no_argument, NULL, ARG_DUMP_BUS_PROPERTIES },
5c08257b 806 { "bus-introspect", required_argument, NULL, ARG_BUS_INTROSPECT },
a5d87bf0 807 { "dump-core", optional_argument, NULL, ARG_DUMP_CORE },
b9e74c39 808 { "crash-chvt", required_argument, NULL, ARG_CRASH_CHVT },
a5d87bf0 809 { "crash-shell", optional_argument, NULL, ARG_CRASH_SHELL },
b9e74c39 810 { "crash-reboot", optional_argument, NULL, ARG_CRASH_REBOOT },
a5d87bf0 811 { "confirm-spawn", optional_argument, NULL, ARG_CONFIRM_SPAWN },
6e98720f 812 { "show-status", optional_argument, NULL, ARG_SHOW_STATUS },
a16e1123 813 { "deserialize", required_argument, NULL, ARG_DESERIALIZE },
2660882b 814 { "switched-root", no_argument, NULL, ARG_SWITCHED_ROOT },
0a494f1f
LP
815 { "default-standard-output", required_argument, NULL, ARG_DEFAULT_STD_OUTPUT, },
816 { "default-standard-error", required_argument, NULL, ARG_DEFAULT_STD_ERROR, },
ee48dbd5 817 { "machine-id", required_argument, NULL, ARG_MACHINE_ID },
2a12e32e 818 { "service-watchdogs", required_argument, NULL, ARG_SERVICE_WATCHDOGS },
fb472900 819 {}
f170852a
LP
820 };
821
822 int c, r;
823
824 assert(argc >= 1);
825 assert(argv);
826
df0ff127 827 if (getpid_cached() == 1)
b770165a
LP
828 opterr = 0;
829
099663ff 830 while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
f170852a
LP
831
832 switch (c) {
833
834 case ARG_LOG_LEVEL:
fb472900 835 r = log_set_max_level_from_string(optarg);
2b5107e1
ZJS
836 if (r < 0)
837 return log_error_errno(r, "Failed to parse log level \"%s\": %m", optarg);
f170852a
LP
838
839 break;
840
841 case ARG_LOG_TARGET:
fb472900 842 r = log_set_target_from_string(optarg);
2b5107e1
ZJS
843 if (r < 0)
844 return log_error_errno(r, "Failed to parse log target \"%s\": %m", optarg);
f170852a
LP
845
846 break;
847
bbe63281
LP
848 case ARG_LOG_COLOR:
849
d0b170c8 850 if (optarg) {
fb472900 851 r = log_show_color_from_string(optarg);
2b5107e1
ZJS
852 if (r < 0)
853 return log_error_errno(r, "Failed to parse log color setting \"%s\": %m",
854 optarg);
d0b170c8
LP
855 } else
856 log_show_color(true);
bbe63281
LP
857
858 break;
859
860 case ARG_LOG_LOCATION:
d0b170c8 861 if (optarg) {
fb472900 862 r = log_show_location_from_string(optarg);
2b5107e1
ZJS
863 if (r < 0)
864 return log_error_errno(r, "Failed to parse log location setting \"%s\": %m",
865 optarg);
d0b170c8
LP
866 } else
867 log_show_location(true);
bbe63281
LP
868
869 break;
870
c5673ed0
DS
871 case ARG_LOG_TIME:
872
873 if (optarg) {
874 r = log_show_time_from_string(optarg);
875 if (r < 0)
876 return log_error_errno(r, "Failed to parse log time setting \"%s\": %m",
877 optarg);
878 } else
879 log_show_time(true);
880
881 break;
882
0a494f1f 883 case ARG_DEFAULT_STD_OUTPUT:
fb472900 884 r = exec_output_from_string(optarg);
2b5107e1
ZJS
885 if (r < 0)
886 return log_error_errno(r, "Failed to parse default standard output setting \"%s\": %m",
887 optarg);
888 arg_default_std_output = r;
0a494f1f
LP
889 break;
890
891 case ARG_DEFAULT_STD_ERROR:
fb472900 892 r = exec_output_from_string(optarg);
2b5107e1
ZJS
893 if (r < 0)
894 return log_error_errno(r, "Failed to parse default standard error output setting \"%s\": %m",
895 optarg);
896 arg_default_std_error = r;
0a494f1f
LP
897 break;
898
2f198e2f 899 case ARG_UNIT:
e6e242ad 900 r = free_and_strdup(&arg_default_unit, optarg);
23bbb0de 901 if (r < 0)
2b5107e1 902 return log_error_errno(r, "Failed to set default unit \"%s\": %m", optarg);
f170852a
LP
903
904 break;
905
edb9aaa8 906 case ARG_SYSTEM:
463d0d15 907 arg_system = true;
edb9aaa8 908 break;
a5dab5ce 909
af2d49f7 910 case ARG_USER:
463d0d15 911 arg_system = false;
a5dab5ce 912 break;
a5dab5ce 913
e965d56d 914 case ARG_TEST:
fa0f4d8a 915 arg_action = ACTION_TEST;
b87c2aa6
ZJS
916 break;
917
918 case ARG_NO_PAGER:
0221d68a 919 arg_pager_flags |= PAGER_DISABLE;
e965d56d
LP
920 break;
921
9ba0bc4e
ZJS
922 case ARG_VERSION:
923 arg_action = ACTION_VERSION;
924 break;
925
e537352b 926 case ARG_DUMP_CONFIGURATION_ITEMS:
fa0f4d8a 927 arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
e537352b
LP
928 break;
929
bbc1acab
YW
930 case ARG_DUMP_BUS_PROPERTIES:
931 arg_action = ACTION_DUMP_BUS_PROPERTIES;
932 break;
933
5c08257b
ZJS
934 case ARG_BUS_INTROSPECT:
935 arg_bus_introspect = optarg;
936 arg_action = ACTION_BUS_INTROSPECT;
937 break;
938
9e58ff9c 939 case ARG_DUMP_CORE:
b9e74c39
LP
940 if (!optarg)
941 arg_dump_core = true;
942 else {
943 r = parse_boolean(optarg);
944 if (r < 0)
2b5107e1
ZJS
945 return log_error_errno(r, "Failed to parse dump core boolean: \"%s\": %m",
946 optarg);
b9e74c39 947 arg_dump_core = r;
a5d87bf0 948 }
b9e74c39
LP
949 break;
950
951 case ARG_CRASH_CHVT:
a07a7324 952 r = parse_crash_chvt(optarg, &arg_crash_chvt);
b9e74c39 953 if (r < 0)
2b5107e1
ZJS
954 return log_error_errno(r, "Failed to parse crash virtual terminal index: \"%s\": %m",
955 optarg);
9e58ff9c
LP
956 break;
957
958 case ARG_CRASH_SHELL:
b9e74c39
LP
959 if (!optarg)
960 arg_crash_shell = true;
961 else {
962 r = parse_boolean(optarg);
963 if (r < 0)
2b5107e1
ZJS
964 return log_error_errno(r, "Failed to parse crash shell boolean: \"%s\": %m",
965 optarg);
b9e74c39
LP
966 arg_crash_shell = r;
967 }
968 break;
969
970 case ARG_CRASH_REBOOT:
971 if (!optarg)
972 arg_crash_reboot = true;
973 else {
974 r = parse_boolean(optarg);
975 if (r < 0)
2b5107e1
ZJS
976 return log_error_errno(r, "Failed to parse crash shell boolean: \"%s\": %m",
977 optarg);
b9e74c39 978 arg_crash_reboot = r;
a5d87bf0 979 }
9e58ff9c
LP
980 break;
981
80876c20 982 case ARG_CONFIRM_SPAWN:
7d5ceb64
FB
983 arg_confirm_spawn = mfree(arg_confirm_spawn);
984
985 r = parse_confirm_spawn(optarg, &arg_confirm_spawn);
986 if (r < 0)
2b5107e1
ZJS
987 return log_error_errno(r, "Failed to parse confirm spawn option: \"%s\": %m",
988 optarg);
80876c20
LP
989 break;
990
2a12e32e
JK
991 case ARG_SERVICE_WATCHDOGS:
992 r = parse_boolean(optarg);
993 if (r < 0)
2b5107e1
ZJS
994 return log_error_errno(r, "Failed to parse service watchdogs boolean: \"%s\": %m",
995 optarg);
2a12e32e
JK
996 arg_service_watchdogs = r;
997 break;
998
9e58ff9c 999 case ARG_SHOW_STATUS:
d450b6f2
ZJS
1000 if (optarg) {
1001 r = parse_show_status(optarg, &arg_show_status);
ac7ec288 1002 if (r < 0)
2b5107e1
ZJS
1003 return log_error_errno(r, "Failed to parse show status boolean: \"%s\": %m",
1004 optarg);
d450b6f2
ZJS
1005 } else
1006 arg_show_status = SHOW_STATUS_YES;
6e98720f 1007 break;
a5d87bf0 1008
a16e1123
LP
1009 case ARG_DESERIALIZE: {
1010 int fd;
1011 FILE *f;
1012
01e10de3 1013 r = safe_atoi(optarg, &fd);
2b5107e1
ZJS
1014 if (r < 0)
1015 log_error_errno(r, "Failed to parse deserialize option \"%s\": %m", optarg);
baaa35ad
ZJS
1016 if (fd < 0)
1017 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1018 "Invalid deserialize fd: %d",
1019 fd);
a16e1123 1020
b9e74c39 1021 (void) fd_cloexec(fd, true);
01e10de3
LP
1022
1023 f = fdopen(fd, "r");
4a62c710 1024 if (!f)
2b5107e1 1025 return log_error_errno(errno, "Failed to open serialization fd %d: %m", fd);
a16e1123 1026
74ca738f 1027 safe_fclose(arg_serialization);
d3b1c508 1028 arg_serialization = f;
a16e1123
LP
1029
1030 break;
1031 }
1032
2660882b 1033 case ARG_SWITCHED_ROOT:
bf4df7c3 1034 arg_switched_root = true;
d03bc1b8
HH
1035 break;
1036
ee48dbd5
NC
1037 case ARG_MACHINE_ID:
1038 r = set_machine_id(optarg);
54500613 1039 if (r < 0)
2b5107e1 1040 return log_error_errno(r, "MachineID '%s' is not valid: %m", optarg);
ee48dbd5
NC
1041 break;
1042
f170852a 1043 case 'h':
fa0f4d8a 1044 arg_action = ACTION_HELP;
f170852a
LP
1045 break;
1046
1d2e23ab
LP
1047 case 'D':
1048 log_set_max_level(LOG_DEBUG);
1049 break;
1050
099663ff
LP
1051 case 'b':
1052 case 's':
1053 case 'z':
cd57038a
ZJS
1054 /* Just to eat away the sysvinit kernel cmdline args that we'll parse in
1055 * parse_proc_cmdline_item() or ignore, without any getopt() error messages.
1056 */
099663ff 1057 case '?':
df0ff127 1058 if (getpid_cached() != 1)
099663ff 1059 return -EINVAL;
601185b4
ZJS
1060 else
1061 return 0;
099663ff 1062
601185b4
ZJS
1063 default:
1064 assert_not_reached("Unhandled option code.");
f170852a
LP
1065 }
1066
d7a0f1f4 1067 if (optind < argc && getpid_cached() != 1)
d821e6d6
LP
1068 /* Hmm, when we aren't run as init system
1069 * let's complain about excess arguments */
baaa35ad
ZJS
1070 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1071 "Excess arguments.");
d821e6d6 1072
f170852a
LP
1073 return 0;
1074}
1075
1076static int help(void) {
37ec0fdd
LP
1077 _cleanup_free_ char *link = NULL;
1078 int r;
1079
1080 r = terminal_urlify_man("systemd", "1", &link);
1081 if (r < 0)
1082 return log_oom();
f170852a 1083
2e33c433 1084 printf("%s [OPTIONS...]\n\n"
7ae47326
ZJS
1085 "%sStarts and monitors system and user services.%s\n\n"
1086 "This program takes no positional arguments.\n\n"
1087 "%sOptions%s:\n"
e537352b 1088 " -h --help Show this help\n"
cb4069d9 1089 " --version Show version\n"
cd69e88b
LP
1090 " --test Determine initial transaction, dump it and exit\n"
1091 " --system In combination with --test: operate as system service manager\n"
1092 " --user In combination with --test: operate as per-user service manager\n"
b87c2aa6 1093 " --no-pager Do not pipe output into a pager\n"
80876c20 1094 " --dump-configuration-items Dump understood unit configuration items\n"
bbc1acab 1095 " --dump-bus-properties Dump exposed bus properties\n"
5c08257b 1096 " --bus-introspect=PATH Write XML introspection data\n"
9e58ff9c 1097 " --unit=UNIT Set default unit\n"
b9e74c39
LP
1098 " --dump-core[=BOOL] Dump core on crash\n"
1099 " --crash-vt=NR Change to specified VT on crash\n"
1100 " --crash-reboot[=BOOL] Reboot on crash\n"
1101 " --crash-shell[=BOOL] Run shell on crash\n"
1102 " --confirm-spawn[=BOOL] Ask for confirmation when spawning processes\n"
1103 " --show-status[=BOOL] Show status updates on the console during bootup\n"
c1dc6153 1104 " --log-target=TARGET Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
9e58ff9c 1105 " --log-level=LEVEL Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
b9e74c39
LP
1106 " --log-color[=BOOL] Highlight important log messages\n"
1107 " --log-location[=BOOL] Include code location in log messages\n"
c5673ed0 1108 " --log-time[=BOOL] Prefix log messages with current time\n"
0a494f1f 1109 " --default-standard-output= Set default standard output for services\n"
37ec0fdd
LP
1110 " --default-standard-error= Set default standard error output for services\n"
1111 "\nSee the %s for details.\n"
1112 , program_invocation_short_name
7ae47326
ZJS
1113 , ansi_highlight(), ansi_normal()
1114 , ansi_underline(), ansi_normal()
37ec0fdd
LP
1115 , link
1116 );
f170852a
LP
1117
1118 return 0;
1119}
1120
2cc856ac
LP
1121static int prepare_reexecute(
1122 Manager *m,
1123 FILE **ret_f,
1124 FDSet **ret_fds,
1125 bool switching_root) {
1126
48b90859
LP
1127 _cleanup_fdset_free_ FDSet *fds = NULL;
1128 _cleanup_fclose_ FILE *f = NULL;
a16e1123
LP
1129 int r;
1130
1131 assert(m);
2cc856ac
LP
1132 assert(ret_f);
1133 assert(ret_fds);
a16e1123 1134
6b78f9b4 1135 r = manager_open_serialization(m, &f);
48b90859
LP
1136 if (r < 0)
1137 return log_error_errno(r, "Failed to create serialization file: %m");
a16e1123 1138
71445ae7 1139 /* Make sure nothing is really destructed when we shut down */
313cefa1 1140 m->n_reloading++;
718db961 1141 bus_manager_send_reloading(m, true);
71445ae7 1142
6b78f9b4 1143 fds = fdset_new();
48b90859
LP
1144 if (!fds)
1145 return log_oom();
a16e1123 1146
b3680f49 1147 r = manager_serialize(m, f, fds, switching_root);
48b90859 1148 if (r < 0)
d68c645b 1149 return r;
a16e1123 1150
48b90859
LP
1151 if (fseeko(f, 0, SEEK_SET) == (off_t) -1)
1152 return log_error_errno(errno, "Failed to rewind serialization fd: %m");
a16e1123 1153
6b78f9b4 1154 r = fd_cloexec(fileno(f), false);
48b90859
LP
1155 if (r < 0)
1156 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
a16e1123 1157
6b78f9b4 1158 r = fdset_cloexec(fds, false);
48b90859
LP
1159 if (r < 0)
1160 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
a16e1123 1161
2cc856ac
LP
1162 *ret_f = TAKE_PTR(f);
1163 *ret_fds = TAKE_PTR(fds);
a16e1123 1164
48b90859 1165 return 0;
a16e1123
LP
1166}
1167
a8b627aa
LP
1168static void bump_file_max_and_nr_open(void) {
1169
1170 /* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large numbers of file
1171 * descriptors are no longer a performance problem and their memory is properly tracked by memcg, thus counting
1172 * them and limiting them in another two layers of limits is unnecessary and just complicates things. This
1173 * function hence turns off 2 of the 4 levels of limits on file descriptors, and makes RLIMIT_NOLIMIT (soft +
1174 * hard) the only ones that really matter. */
1175
1176#if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN
a8b627aa
LP
1177 int r;
1178#endif
1179
1180#if BUMP_PROC_SYS_FS_FILE_MAX
6e2f7894
LP
1181 /* The maximum the kernel allows for this since 5.2 is LONG_MAX, use that. (Previously thing where
1182 * different but the operation would fail silently.) */
56e8419a 1183 r = sysctl_writef("fs/file-max", "%li\n", LONG_MAX);
a8b627aa
LP
1184 if (r < 0)
1185 log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.file-max, ignoring: %m");
1186#endif
1187
a8b627aa
LP
1188#if BUMP_PROC_SYS_FS_NR_OPEN
1189 int v = INT_MAX;
1190
1191 /* Arg! The kernel enforces maximum and minimum values on the fs.nr_open, but we don't really know what they
1192 * are. The expression by which the maximum is determined is dependent on the architecture, and is something we
1193 * don't really want to copy to userspace, as it is dependent on implementation details of the kernel. Since
1194 * the kernel doesn't expose the maximum value to us, we can only try and hope. Hence, let's start with
1195 * INT_MAX, and then keep halving the value until we find one that works. Ugly? Yes, absolutely, but kernel
1196 * APIs are kernel APIs, so what do can we do... 🤯 */
1197
1198 for (;;) {
1199 int k;
1200
1201 v &= ~(__SIZEOF_POINTER__ - 1); /* Round down to next multiple of the pointer size */
1202 if (v < 1024) {
1203 log_warning("Can't bump fs.nr_open, value too small.");
1204 break;
1205 }
1206
1207 k = read_nr_open();
1208 if (k < 0) {
1209 log_error_errno(k, "Failed to read fs.nr_open: %m");
1210 break;
1211 }
1212 if (k >= v) { /* Already larger */
1213 log_debug("Skipping bump, value is already larger.");
1214 break;
1215 }
1216
56e8419a 1217 r = sysctl_writef("fs/nr_open", "%i\n", v);
a8b627aa
LP
1218 if (r == -EINVAL) {
1219 log_debug("Couldn't write fs.nr_open as %i, halving it.", v);
1220 v /= 2;
1221 continue;
1222 }
1223 if (r < 0) {
1224 log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.nr_open, ignoring: %m");
1225 break;
1226 }
1227
1228 log_debug("Successfully bumped fs.nr_open to %i", v);
1229 break;
1230 }
1231#endif
1232}
1233
4096d6f5 1234static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
cda7faa9 1235 struct rlimit new_rlimit;
9264cc39 1236 int r, nr;
4096d6f5 1237
52d62075
LP
1238 /* Get the underlying absolute limit the kernel enforces */
1239 nr = read_nr_open();
1240
cda7faa9
LP
1241 /* Calculate the new limits to use for us. Never lower from what we inherited. */
1242 new_rlimit = (struct rlimit) {
1243 .rlim_cur = MAX((rlim_t) nr, saved_rlimit->rlim_cur),
1244 .rlim_max = MAX((rlim_t) nr, saved_rlimit->rlim_max),
1245 };
1246
1247 /* Shortcut if nothing changes. */
1248 if (saved_rlimit->rlim_max >= new_rlimit.rlim_max &&
1249 saved_rlimit->rlim_cur >= new_rlimit.rlim_cur) {
1250 log_debug("RLIMIT_NOFILE is already as high or higher than we need it, not bumping.");
1251 return 0;
1252 }
1253
52d62075
LP
1254 /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows, for
1255 * both hard and soft. */
cda7faa9 1256 r = setrlimit_closest(RLIMIT_NOFILE, &new_rlimit);
23bbb0de 1257 if (r < 0)
3ce40911 1258 return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
4096d6f5
LP
1259
1260 return 0;
1261}
1262
fb3ae275 1263static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
cda7faa9 1264 struct rlimit new_rlimit;
04d1ee0f 1265 uint64_t mm;
fb3ae275
LP
1266 int r;
1267
a17c1712 1268 /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even if we have CAP_IPC_LOCK which should
6e3c443b 1269 * normally disable such checks. We need them to implement IPAddressAllow= and IPAddressDeny=, hence let's bump
a17c1712 1270 * the value high enough for our user. */
fb3ae275 1271
cda7faa9
LP
1272 /* Using MAX() on resource limits only is safe if RLIM_INFINITY is > 0. POSIX declares that rlim_t
1273 * must be unsigned, hence this is a given, but let's make this clear here. */
1274 assert_cc(RLIM_INFINITY > 0);
1275
04d1ee0f 1276 mm = physical_memory() / 8; /* Let's scale how much we allow to be locked by the amount of physical
2d4f8cf4 1277 * RAM. We allow an eighth to be locked by us, just to pick a value. */
04d1ee0f 1278
cda7faa9 1279 new_rlimit = (struct rlimit) {
04d1ee0f
LP
1280 .rlim_cur = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_cur, mm),
1281 .rlim_max = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_max, mm),
cda7faa9
LP
1282 };
1283
1284 if (saved_rlimit->rlim_max >= new_rlimit.rlim_cur &&
1285 saved_rlimit->rlim_cur >= new_rlimit.rlim_max) {
1286 log_debug("RLIMIT_MEMLOCK is already as high or higher than we need it, not bumping.");
1287 return 0;
1288 }
1289
1290 r = setrlimit_closest(RLIMIT_MEMLOCK, &new_rlimit);
fb3ae275
LP
1291 if (r < 0)
1292 return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
1293
1294 return 0;
1295}
1296
80758717 1297static void test_usr(void) {
80758717 1298
796ac4c1 1299 /* Check that /usr is either on the same file system as / or mounted already. */
80758717 1300
871c44a7
LP
1301 if (dir_is_empty("/usr") <= 0)
1302 return;
1303
8b173b5e 1304 log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
871c44a7
LP
1305 "Some things will probably break (sometimes even silently) in mysterious ways. "
1306 "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1307}
1308
d3b1c508 1309static int enforce_syscall_archs(Set *archs) {
349cc4a5 1310#if HAVE_SECCOMP
d3b1c508
LP
1311 int r;
1312
83f12b27
FS
1313 if (!is_seccomp_available())
1314 return 0;
1315
469830d1 1316 r = seccomp_restrict_archs(arg_syscall_archs);
d3b1c508 1317 if (r < 0)
469830d1 1318 return log_error_errno(r, "Failed to enforce system call architecture restrication: %m");
d3b1c508 1319#endif
469830d1 1320 return 0;
d3b1c508
LP
1321}
1322
b6e2f329
LP
1323static int status_welcome(void) {
1324 _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
1325 int r;
1326
5ca02bfc 1327 if (!show_status_on(arg_show_status))
fd8c85c6
LP
1328 return 0;
1329
d58ad743
LP
1330 r = parse_os_release(NULL,
1331 "PRETTY_NAME", &pretty_name,
1332 "ANSI_COLOR", &ansi_color,
1333 NULL);
1334 if (r < 0)
1335 log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
1336 "Failed to read os-release file, ignoring: %m");
b6e2f329 1337
dc9b5816 1338 if (log_get_show_color())
a885727a 1339 return status_printf(NULL, 0,
dc9b5816
ZJS
1340 "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
1341 isempty(ansi_color) ? "1" : ansi_color,
1342 isempty(pretty_name) ? "Linux" : pretty_name);
1343 else
a885727a 1344 return status_printf(NULL, 0,
dc9b5816
ZJS
1345 "\nWelcome to %s!\n",
1346 isempty(pretty_name) ? "Linux" : pretty_name);
b6e2f329
LP
1347}
1348
fdd25311
LP
1349static int write_container_id(void) {
1350 const char *c;
19854865 1351 int r;
fdd25311
LP
1352
1353 c = getenv("container");
1354 if (isempty(c))
1355 return 0;
1356
8612da97
LP
1357 RUN_WITH_UMASK(0022)
1358 r = write_string_file("/run/systemd/container", c, WRITE_STRING_FILE_CREATE);
19854865 1359 if (r < 0)
f1f849b0 1360 return log_warning_errno(r, "Failed to write /run/systemd/container, ignoring: %m");
19854865
LP
1361
1362 return 1;
1363}
1364
1365static int bump_unix_max_dgram_qlen(void) {
1366 _cleanup_free_ char *qlen = NULL;
1367 unsigned long v;
1368 int r;
1369
3130fca5
LP
1370 /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel default of 16 is simply too low. We set the value
1371 * really really early during boot, so that it is actually applied to all our sockets, including the
1372 * $NOTIFY_SOCKET one. */
19854865
LP
1373
1374 r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen);
1375 if (r < 0)
875622c3 1376 return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
19854865
LP
1377
1378 r = safe_atolu(qlen, &v);
1379 if (r < 0)
3130fca5 1380 return log_warning_errno(r, "Failed to parse AF_UNIX datagram queue length '%s', ignoring: %m", qlen);
19854865
LP
1381
1382 if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN)
1383 return 0;
1384
57512c89 1385 r = write_string_filef("/proc/sys/net/unix/max_dgram_qlen", WRITE_STRING_FILE_DISABLE_BUFFER, "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN);
19854865
LP
1386 if (r < 0)
1387 return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
1388 "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
1389
1390 return 1;
fdd25311
LP
1391}
1392
32391275
FB
1393static int fixup_environment(void) {
1394 _cleanup_free_ char *term = NULL;
4dc63c4b 1395 const char *t;
32391275
FB
1396 int r;
1397
43db615b
LP
1398 /* Only fix up the environment when we are started as PID 1 */
1399 if (getpid_cached() != 1)
1400 return 0;
1401
1402 /* We expect the environment to be set correctly if run inside a container. */
84af7821
LP
1403 if (detect_container() > 0)
1404 return 0;
1405
43db615b
LP
1406 /* When started as PID1, the kernel uses /dev/console for our stdios and uses TERM=linux whatever the backend
1407 * device used by the console. We try to make a better guess here since some consoles might not have support
1408 * for color mode for example.
32391275 1409 *
43db615b 1410 * However if TERM was configured through the kernel command line then leave it alone. */
1d84ad94 1411 r = proc_cmdline_get_key("TERM", 0, &term);
32391275
FB
1412 if (r < 0)
1413 return r;
32391275 1414
4dc63c4b
LP
1415 t = term ?: default_term_for_tty("/dev/console");
1416
1417 if (setenv("TERM", t, 1) < 0)
32391275
FB
1418 return -errno;
1419
9d48671c 1420 /* The kernels sets HOME=/ for init. Let's undo this. */
44ee03d1
ZJS
1421 if (path_equal_ptr(getenv("HOME"), "/"))
1422 assert_se(unsetenv("HOME") == 0);
9d48671c 1423
32391275
FB
1424 return 0;
1425}
1426
6808a0bc
LP
1427static void redirect_telinit(int argc, char *argv[]) {
1428
1429 /* This is compatibility support for SysV, where calling init as a user is identical to telinit. */
1430
1431#if HAVE_SYSV_COMPAT
1432 if (getpid_cached() == 1)
1433 return;
1434
1435 if (!strstr(program_invocation_short_name, "init"))
1436 return;
1437
1438 execv(SYSTEMCTL_BINARY_PATH, argv);
1439 log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
a45d7127 1440 exit(EXIT_FAILURE);
6808a0bc
LP
1441#endif
1442}
1443
4a36297c
LP
1444static int become_shutdown(
1445 const char *shutdown_verb,
7eb35049 1446 int retval) {
4a36297c
LP
1447
1448 char log_level[DECIMAL_STR_MAX(int) + 1],
e73c54b8
JK
1449 exit_code[DECIMAL_STR_MAX(uint8_t) + 1],
1450 timeout[DECIMAL_STR_MAX(usec_t) + 1];
4a36297c 1451
e73c54b8 1452 const char* command_line[13] = {
4a36297c
LP
1453 SYSTEMD_SHUTDOWN_BINARY_PATH,
1454 shutdown_verb,
e73c54b8 1455 "--timeout", timeout,
4a36297c
LP
1456 "--log-level", log_level,
1457 "--log-target",
1458 };
1459
1460 _cleanup_strv_free_ char **env_block = NULL;
e73c54b8 1461 size_t pos = 7;
4a36297c 1462 int r;
acafd7d8 1463 usec_t watchdog_timer = 0;
4a36297c 1464
7eb35049 1465 assert(shutdown_verb);
234519ae 1466 assert(!command_line[pos]);
4a36297c
LP
1467 env_block = strv_copy(environ);
1468
1469 xsprintf(log_level, "%d", log_get_max_level());
e73c54b8 1470 xsprintf(timeout, "%" PRI_USEC "us", arg_default_timeout_stop_usec);
4a36297c
LP
1471
1472 switch (log_get_target()) {
1473
1474 case LOG_TARGET_KMSG:
1475 case LOG_TARGET_JOURNAL_OR_KMSG:
1476 case LOG_TARGET_SYSLOG_OR_KMSG:
1477 command_line[pos++] = "kmsg";
1478 break;
1479
1480 case LOG_TARGET_NULL:
1481 command_line[pos++] = "null";
1482 break;
1483
1484 case LOG_TARGET_CONSOLE:
1485 default:
1486 command_line[pos++] = "console";
1487 break;
1488 };
1489
1490 if (log_get_show_color())
1491 command_line[pos++] = "--log-color";
1492
1493 if (log_get_show_location())
1494 command_line[pos++] = "--log-location";
1495
c5673ed0
DS
1496 if (log_get_show_time())
1497 command_line[pos++] = "--log-time";
1498
4a36297c
LP
1499 if (streq(shutdown_verb, "exit")) {
1500 command_line[pos++] = "--exit-code";
1501 command_line[pos++] = exit_code;
1502 xsprintf(exit_code, "%d", retval);
1503 }
1504
1505 assert(pos < ELEMENTSOF(command_line));
1506
acafd7d8 1507 if (streq(shutdown_verb, "reboot"))
65224c1d 1508 watchdog_timer = arg_reboot_watchdog;
acafd7d8
LB
1509 else if (streq(shutdown_verb, "kexec"))
1510 watchdog_timer = arg_kexec_watchdog;
1511
1512 if (watchdog_timer > 0 && watchdog_timer != USEC_INFINITY) {
7eb35049 1513
4a36297c
LP
1514 char *e;
1515
acafd7d8 1516 /* If we reboot or kexec let's set the shutdown
4a36297c
LP
1517 * watchdog and tell the shutdown binary to
1518 * repeatedly ping it */
acafd7d8 1519 r = watchdog_set_timeout(&watchdog_timer);
4a36297c
LP
1520 watchdog_close(r < 0);
1521
1522 /* Tell the binary how often to ping, ignore failure */
acafd7d8 1523 if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, watchdog_timer) > 0)
8a2c1fbf
EJ
1524 (void) strv_consume(&env_block, e);
1525
1526 if (arg_watchdog_device &&
1527 asprintf(&e, "WATCHDOG_DEVICE=%s", arg_watchdog_device) > 0)
1528 (void) strv_consume(&env_block, e);
4a36297c
LP
1529 } else
1530 watchdog_close(true);
1531
1532 /* Avoid the creation of new processes forked by the
1533 * kernel; at this point, we will not listen to the
1534 * signals anyway */
1535 if (detect_container() <= 0)
1536 (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1537
1538 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1539 return -errno;
1540}
1541
e839bafd
LP
1542static void initialize_clock(void) {
1543 int r;
1544
3753325b
LP
1545 /* This is called very early on, before we parse the kernel command line or otherwise figure out why
1546 * we are running, but only once. */
1547
e839bafd
LP
1548 if (clock_is_localtime(NULL) > 0) {
1549 int min;
1550
1551 /*
1552 * The very first call of settimeofday() also does a time warp in the kernel.
1553 *
1554 * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to take care
1555 * of maintaining the RTC and do all adjustments. This matches the behavior of Windows, which leaves
1556 * the RTC alone if the registry tells that the RTC runs in UTC.
1557 */
1558 r = clock_set_timezone(&min);
1559 if (r < 0)
1560 log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
1561 else
1562 log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1563
d46b79bb 1564 } else if (!in_initrd())
e839bafd
LP
1565 /*
1566 * Do a dummy very first call to seal the kernel's time warp magic.
1567 *
1568 * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with LOCAL, but the
1569 * real system could be set up that way. In such case, we need to delay the time-warp or the sealing
1570 * until we reach the real system.
1571 *
1572 * Do no set the kernel's timezone. The concept of local time cannot be supported reliably, the time
1573 * will jump or be incorrect at every daylight saving time change. All kernel local time concepts will
1574 * be treated as UTC that way.
1575 */
1576 (void) clock_reset_timewarp();
e839bafd
LP
1577
1578 r = clock_apply_epoch();
1579 if (r < 0)
1580 log_error_errno(r, "Current system time is before build time, but cannot correct: %m");
1581 else if (r > 0)
1582 log_info("System time before build time, advancing clock.");
1583}
1584
3753325b
LP
1585static void apply_clock_update(void) {
1586 struct timespec ts;
1587
1588 /* This is called later than initialize_clock(), i.e. after we parsed configuration files/kernel
1589 * command line and such. */
1590
1591 if (arg_clock_usec == 0)
1592 return;
1593
45250e66
LP
1594 if (getpid_cached() != 1)
1595 return;
1596
3753325b
LP
1597 if (clock_settime(CLOCK_REALTIME, timespec_store(&ts, arg_clock_usec)) < 0)
1598 log_error_errno(errno, "Failed to set system clock to time specified on kernel command line: %m");
1599 else {
1600 char buf[FORMAT_TIMESTAMP_MAX];
1601
1602 log_info("Set system clock to %s, as specified on the kernel command line.",
1603 format_timestamp(buf, sizeof(buf), arg_clock_usec));
1604 }
1605}
1606
d247f232
LP
1607static void cmdline_take_random_seed(void) {
1608 _cleanup_close_ int random_fd = -1;
1609 size_t suggested;
1610 int r;
1611
1612 if (arg_random_seed_size == 0)
1613 return;
1614
1615 if (getpid_cached() != 1)
1616 return;
1617
1618 assert(arg_random_seed);
1619 suggested = random_pool_size();
1620
1621 if (arg_random_seed_size < suggested)
1622 log_warning("Random seed specified on kernel command line has size %zu, but %zu bytes required to fill entropy pool.",
1623 arg_random_seed_size, suggested);
1624
1625 random_fd = open("/dev/urandom", O_WRONLY|O_CLOEXEC|O_NOCTTY);
1626 if (random_fd < 0) {
1627 log_warning_errno(errno, "Failed to open /dev/urandom for writing, ignoring: %m");
1628 return;
1629 }
1630
1631 r = random_write_entropy(random_fd, arg_random_seed, arg_random_seed_size, true);
1632 if (r < 0) {
1633 log_warning_errno(r, "Failed to credit entropy specified on kernel command line, ignoring: %m");
1634 return;
1635 }
1636
1637 log_notice("Successfully credited entropy passed on kernel command line.\n"
1638 "Note that the seed provided this way is accessible to unprivileged programs. This functionality should not be used outside of testing environments.");
1639}
1640
1e41242e 1641static void initialize_coredump(bool skip_setup) {
752bcb77 1642#if ENABLE_COREDUMP
1e41242e
LP
1643 if (getpid_cached() != 1)
1644 return;
1645
1646 /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
1647 * will process core dumps for system services by default. */
1648 if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
1649 log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
1650
c6885f5f
FB
1651 /* But at the same time, turn off the core_pattern logic by default, so that no
1652 * coredumps are stored until the systemd-coredump tool is enabled via
1653 * sysctl. However it can be changed via the kernel command line later so core
1654 * dumps can still be generated during early startup and in initramfs. */
1e41242e 1655 if (!skip_setup)
e557b1a6 1656 disable_coredumps();
752bcb77 1657#endif
1e41242e
LP
1658}
1659
c6885f5f
FB
1660static void initialize_core_pattern(bool skip_setup) {
1661 int r;
1662
1663 if (skip_setup || !arg_early_core_pattern)
1664 return;
1665
1666 if (getpid_cached() != 1)
1667 return;
1668
57512c89 1669 r = write_string_file("/proc/sys/kernel/core_pattern", arg_early_core_pattern, WRITE_STRING_FILE_DISABLE_BUFFER);
c6885f5f
FB
1670 if (r < 0)
1671 log_warning_errno(r, "Failed to write '%s' to /proc/sys/kernel/core_pattern, ignoring: %m", arg_early_core_pattern);
1672}
1673
61fbbac1
ZJS
1674static void update_cpu_affinity(bool skip_setup) {
1675 _cleanup_free_ char *mask = NULL;
1676
1677 if (skip_setup || !arg_cpu_affinity.set)
1678 return;
1679
1680 assert(arg_cpu_affinity.allocated > 0);
1681
1682 mask = cpu_set_to_string(&arg_cpu_affinity);
1683 log_debug("Setting CPU affinity to %s.", strnull(mask));
1684
1685 if (sched_setaffinity(0, arg_cpu_affinity.allocated, arg_cpu_affinity.set) < 0)
1686 log_warning_errno(errno, "Failed to set CPU affinity: %m");
1687}
1688
b070c7c0
MS
1689static void update_numa_policy(bool skip_setup) {
1690 int r;
1691 _cleanup_free_ char *nodes = NULL;
1692 const char * policy = NULL;
1693
1694 if (skip_setup || !mpol_is_valid(numa_policy_get_type(&arg_numa_policy)))
1695 return;
1696
1697 if (DEBUG_LOGGING) {
1698 policy = mpol_to_string(numa_policy_get_type(&arg_numa_policy));
1699 nodes = cpu_set_to_range_string(&arg_numa_policy.nodes);
1700 log_debug("Setting NUMA policy to %s, with nodes %s.", strnull(policy), strnull(nodes));
1701 }
1702
1703 r = apply_numa_policy(&arg_numa_policy);
1704 if (r == -EOPNOTSUPP)
1705 log_debug_errno(r, "NUMA support not available, ignoring.");
1706 else if (r < 0)
1707 log_warning_errno(r, "Failed to set NUMA memory policy: %m");
1708}
1709
3c7878f9
LP
1710static void do_reexecute(
1711 int argc,
1712 char *argv[],
1713 const struct rlimit *saved_rlimit_nofile,
1714 const struct rlimit *saved_rlimit_memlock,
1715 FDSet *fds,
1716 const char *switch_root_dir,
1717 const char *switch_root_init,
1718 const char **ret_error_message) {
1719
1720 unsigned i, j, args_size;
1721 const char **args;
1722 int r;
1723
1724 assert(saved_rlimit_nofile);
1725 assert(saved_rlimit_memlock);
1726 assert(ret_error_message);
1727
1728 /* Close and disarm the watchdog, so that the new instance can reinitialize it, but doesn't get rebooted while
1729 * we do that */
1730 watchdog_close(true);
1731
ddfa8b0b
LP
1732 /* Reset RLIMIT_NOFILE + RLIMIT_MEMLOCK back to the kernel defaults, so that the new systemd can pass
1733 * the kernel default to its child processes */
1734 if (saved_rlimit_nofile->rlim_cur != 0)
3c7878f9 1735 (void) setrlimit(RLIMIT_NOFILE, saved_rlimit_nofile);
ddfa8b0b 1736 if (saved_rlimit_memlock->rlim_cur != RLIM_INFINITY)
3c7878f9
LP
1737 (void) setrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock);
1738
1739 if (switch_root_dir) {
1740 /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
1741 * SIGCHLD for them after deserializing. */
e73c54b8 1742 broadcast_signal(SIGTERM, false, true, arg_default_timeout_stop_usec);
3c7878f9
LP
1743
1744 /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
1745 r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE);
1746 if (r < 0)
1747 log_error_errno(r, "Failed to switch root, trying to continue: %m");
1748 }
1749
1750 args_size = MAX(6, argc+1);
1751 args = newa(const char*, args_size);
1752
1753 if (!switch_root_init) {
1754 char sfd[DECIMAL_STR_MAX(int) + 1];
1755
1756 /* First try to spawn ourselves with the right path, and with full serialization. We do this only if
1757 * the user didn't specify an explicit init to spawn. */
1758
1759 assert(arg_serialization);
1760 assert(fds);
1761
1762 xsprintf(sfd, "%i", fileno(arg_serialization));
1763
1764 i = 0;
1765 args[i++] = SYSTEMD_BINARY_PATH;
1766 if (switch_root_dir)
1767 args[i++] = "--switched-root";
1768 args[i++] = arg_system ? "--system" : "--user";
1769 args[i++] = "--deserialize";
1770 args[i++] = sfd;
1771 args[i++] = NULL;
1772
1773 assert(i <= args_size);
1774
1775 /*
1776 * We want valgrind to print its memory usage summary before reexecution. Valgrind won't do this is on
1777 * its own on exec(), but it will do it on exit(). Hence, to ensure we get a summary here, fork() off
1778 * a child, let it exit() cleanly, so that it prints the summary, and wait() for it in the parent,
1779 * before proceeding into the exec().
1780 */
1781 valgrind_summary_hack();
1782
1783 (void) execv(args[0], (char* const*) args);
1784 log_debug_errno(errno, "Failed to execute our own binary, trying fallback: %m");
1785 }
1786
1787 /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and envp[]. (Well,
1788 * modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[], but let's hope that
1789 * doesn't matter.) */
1790
1791 arg_serialization = safe_fclose(arg_serialization);
1792 fds = fdset_free(fds);
1793
1794 /* Reopen the console */
1795 (void) make_console_stdio();
1796
1797 for (j = 1, i = 1; j < (unsigned) argc; j++)
1798 args[i++] = argv[j];
1799 args[i++] = NULL;
1800 assert(i <= args_size);
1801
5238e957 1802 /* Re-enable any blocked signals, especially important if we switch from initial ramdisk to init=... */
3c7878f9
LP
1803 (void) reset_all_signal_handlers();
1804 (void) reset_signal_mask();
595225af 1805 (void) rlimit_nofile_safe();
3c7878f9
LP
1806
1807 if (switch_root_init) {
1808 args[0] = switch_root_init;
a5cede8c 1809 (void) execve(args[0], (char* const*) args, saved_env);
3c7878f9
LP
1810 log_warning_errno(errno, "Failed to execute configured init, trying fallback: %m");
1811 }
1812
1813 args[0] = "/sbin/init";
1814 (void) execv(args[0], (char* const*) args);
1815 r = -errno;
1816
1817 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
1818 ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
1819 "Failed to execute /sbin/init");
1820
1821 if (r == -ENOENT) {
1822 log_warning("No /sbin/init, trying fallback");
1823
1824 args[0] = "/bin/sh";
1825 args[1] = NULL;
a5cede8c 1826 (void) execve(args[0], (char* const*) args, saved_env);
3c7878f9
LP
1827 log_error_errno(errno, "Failed to execute /bin/sh, giving up: %m");
1828 } else
1829 log_warning_errno(r, "Failed to execute /sbin/init, giving up: %m");
1830
1831 *ret_error_message = "Failed to execute fallback shell";
1832}
1833
7eb35049
LP
1834static int invoke_main_loop(
1835 Manager *m,
a9fd4cd1
FB
1836 const struct rlimit *saved_rlimit_nofile,
1837 const struct rlimit *saved_rlimit_memlock,
7eb35049
LP
1838 bool *ret_reexecute,
1839 int *ret_retval, /* Return parameters relevant for shutting down */
1840 const char **ret_shutdown_verb, /* … */
1841 FDSet **ret_fds, /* Return parameters for reexecuting */
1842 char **ret_switch_root_dir, /* … */
1843 char **ret_switch_root_init, /* … */
1844 const char **ret_error_message) {
1845
1846 int r;
1847
1848 assert(m);
a9fd4cd1
FB
1849 assert(saved_rlimit_nofile);
1850 assert(saved_rlimit_memlock);
7eb35049
LP
1851 assert(ret_reexecute);
1852 assert(ret_retval);
1853 assert(ret_shutdown_verb);
1854 assert(ret_fds);
1855 assert(ret_switch_root_dir);
1856 assert(ret_switch_root_init);
1857 assert(ret_error_message);
1858
1859 for (;;) {
1860 r = manager_loop(m);
1861 if (r < 0) {
1862 *ret_error_message = "Failed to run main loop";
1863 return log_emergency_errno(r, "Failed to run main loop: %m");
1864 }
1865
3ca4d0b3 1866 switch ((ManagerObjective) r) {
7eb35049 1867
a6ecbf83 1868 case MANAGER_RELOAD: {
bda7d78b 1869 LogTarget saved_log_target;
a6ecbf83
FB
1870 int saved_log_level;
1871
7eb35049
LP
1872 log_info("Reloading.");
1873
3fe91079 1874 /* First, save any overridden log level/target, then parse the configuration file, which might
bda7d78b
FB
1875 * change the log level to new settings. */
1876
a6ecbf83 1877 saved_log_level = m->log_level_overridden ? log_get_max_level() : -1;
bda7d78b 1878 saved_log_target = m->log_target_overridden ? log_get_target() : _LOG_TARGET_INVALID;
a6ecbf83 1879
a9fd4cd1 1880 (void) parse_configuration(saved_rlimit_nofile, saved_rlimit_memlock);
7eb35049
LP
1881
1882 set_manager_defaults(m);
986935cf 1883 set_manager_settings(m);
7eb35049 1884
61fbbac1 1885 update_cpu_affinity(false);
b070c7c0 1886 update_numa_policy(false);
61fbbac1 1887
a6ecbf83
FB
1888 if (saved_log_level >= 0)
1889 manager_override_log_level(m, saved_log_level);
bda7d78b
FB
1890 if (saved_log_target >= 0)
1891 manager_override_log_target(m, saved_log_target);
a6ecbf83 1892
7eb35049
LP
1893 r = manager_reload(m);
1894 if (r < 0)
7a35fa24
LP
1895 /* Reloading failed before the point of no return. Let's continue running as if nothing happened. */
1896 m->objective = MANAGER_OK;
7eb35049
LP
1897
1898 break;
a6ecbf83 1899 }
7eb35049
LP
1900
1901 case MANAGER_REEXECUTE:
1902
1903 r = prepare_reexecute(m, &arg_serialization, ret_fds, false);
1904 if (r < 0) {
1905 *ret_error_message = "Failed to prepare for reexecution";
1906 return r;
1907 }
1908
1909 log_notice("Reexecuting.");
1910
1911 *ret_reexecute = true;
1912 *ret_retval = EXIT_SUCCESS;
1913 *ret_shutdown_verb = NULL;
1914 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1915
1916 return 0;
1917
1918 case MANAGER_SWITCH_ROOT:
1919 if (!m->switch_root_init) {
1920 r = prepare_reexecute(m, &arg_serialization, ret_fds, true);
1921 if (r < 0) {
1922 *ret_error_message = "Failed to prepare for reexecution";
1923 return r;
1924 }
1925 } else
1926 *ret_fds = NULL;
1927
1928 log_notice("Switching root.");
1929
1930 *ret_reexecute = true;
1931 *ret_retval = EXIT_SUCCESS;
1932 *ret_shutdown_verb = NULL;
1933
1934 /* Steal the switch root parameters */
49052946
YW
1935 *ret_switch_root_dir = TAKE_PTR(m->switch_root);
1936 *ret_switch_root_init = TAKE_PTR(m->switch_root_init);
7eb35049
LP
1937
1938 return 0;
1939
1940 case MANAGER_EXIT:
1941
1942 if (MANAGER_IS_USER(m)) {
1943 log_debug("Exit.");
1944
1945 *ret_reexecute = false;
1946 *ret_retval = m->return_value;
1947 *ret_shutdown_verb = NULL;
1948 *ret_fds = NULL;
1949 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1950
1951 return 0;
1952 }
1953
1954 _fallthrough_;
1955 case MANAGER_REBOOT:
1956 case MANAGER_POWEROFF:
1957 case MANAGER_HALT:
1958 case MANAGER_KEXEC: {
af41e508
LP
1959 static const char * const table[_MANAGER_OBJECTIVE_MAX] = {
1960 [MANAGER_EXIT] = "exit",
1961 [MANAGER_REBOOT] = "reboot",
7eb35049 1962 [MANAGER_POWEROFF] = "poweroff",
af41e508
LP
1963 [MANAGER_HALT] = "halt",
1964 [MANAGER_KEXEC] = "kexec",
7eb35049
LP
1965 };
1966
1967 log_notice("Shutting down.");
1968
1969 *ret_reexecute = false;
1970 *ret_retval = m->return_value;
af41e508 1971 assert_se(*ret_shutdown_verb = table[m->objective]);
7eb35049
LP
1972 *ret_fds = NULL;
1973 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1974
1975 return 0;
1976 }
1977
1978 default:
af41e508 1979 assert_not_reached("Unknown or unexpected manager objective.");
7eb35049
LP
1980 }
1981 }
1982}
1983
31aef7ff
LP
1984static void log_execution_mode(bool *ret_first_boot) {
1985 assert(ret_first_boot);
1986
1987 if (arg_system) {
1988 int v;
1989
91b79ba8
ZJS
1990 log_info("systemd " GIT_VERSION " running in %ssystem mode. (%s)",
1991 arg_action == ACTION_TEST ? "test " : "",
1992 systemd_features);
31aef7ff
LP
1993
1994 v = detect_virtualization();
1995 if (v > 0)
1996 log_info("Detected virtualization %s.", virtualization_to_string(v));
1997
1998 log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
1999
2000 if (in_initrd()) {
2001 *ret_first_boot = false;
2002 log_info("Running in initial RAM disk.");
2003 } else {
583cef3b
HS
2004 int r;
2005 _cleanup_free_ char *id_text = NULL;
2006
2007 /* Let's check whether we are in first boot. We use /etc/machine-id as flag file
2008 * for this: If it is missing or contains the value "uninitialized", this is the
2009 * first boot. In any other case, it is not. This allows container managers and
2010 * installers to provision a couple of files already. If the container manager
2011 * wants to provision the machine ID itself it should pass $container_uuid to PID 1. */
2012
2013 r = read_one_line_file("/etc/machine-id", &id_text);
2014 if (r < 0 || streq(id_text, "uninitialized")) {
2015 if (r < 0 && r != -ENOENT)
2016 log_warning_errno(r, "Unexpected error while reading /etc/machine-id, ignoring: %m");
2017
2018 *ret_first_boot = true;
2019 log_info("Detected first boot.");
2020 } else {
2021 *ret_first_boot = false;
2022 log_debug("Detected initialized system, this is not the first boot.");
2023 }
31aef7ff
LP
2024 }
2025 } else {
b9e90f3a
LP
2026 if (DEBUG_LOGGING) {
2027 _cleanup_free_ char *t;
31aef7ff 2028
b9e90f3a 2029 t = uid_to_name(getuid());
91b79ba8
ZJS
2030 log_debug("systemd " GIT_VERSION " running in %suser mode for user " UID_FMT "/%s. (%s)",
2031 arg_action == ACTION_TEST ? " test" : "",
2032 getuid(), strna(t), systemd_features);
b9e90f3a 2033 }
31aef7ff
LP
2034
2035 *ret_first_boot = false;
2036 }
2037}
2038
5afbaa36
LP
2039static int initialize_runtime(
2040 bool skip_setup,
3023f2fe 2041 bool first_boot,
5afbaa36
LP
2042 struct rlimit *saved_rlimit_nofile,
2043 struct rlimit *saved_rlimit_memlock,
2044 const char **ret_error_message) {
5afbaa36
LP
2045 int r;
2046
2047 assert(ret_error_message);
2048
2049 /* Sets up various runtime parameters. Many of these initializations are conditionalized:
2050 *
2051 * - Some only apply to --system instances
2052 * - Some only apply to --user instances
2053 * - Some only apply when we first start up, but not when we reexecute
2054 */
2055
2d776038
LP
2056 if (arg_action != ACTION_RUN)
2057 return 0;
2058
61fbbac1 2059 update_cpu_affinity(skip_setup);
b070c7c0 2060 update_numa_policy(skip_setup);
61fbbac1 2061
3c3c6cb9 2062 if (arg_system) {
5238e957 2063 /* Make sure we leave a core dump without panicking the kernel. */
3c3c6cb9 2064 install_crash_handler();
5afbaa36 2065
3c3c6cb9 2066 if (!skip_setup) {
143fadf3 2067 r = mount_cgroup_controllers();
3c3c6cb9
LP
2068 if (r < 0) {
2069 *ret_error_message = "Failed to mount cgroup hierarchies";
2070 return r;
2071 }
2072
2073 status_welcome();
2074 hostname_setup();
3023f2fe
HS
2075 /* Force transient machine-id on first boot. */
2076 machine_id_setup(NULL, first_boot, arg_machine_id, NULL);
df883de9 2077 (void) loopback_setup();
3c3c6cb9 2078 bump_unix_max_dgram_qlen();
a8b627aa 2079 bump_file_max_and_nr_open();
3c3c6cb9
LP
2080 test_usr();
2081 write_container_id();
2082 }
8a2c1fbf 2083
3c3c6cb9
LP
2084 if (arg_watchdog_device) {
2085 r = watchdog_set_device(arg_watchdog_device);
2086 if (r < 0)
2087 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m", arg_watchdog_device);
2088 }
32429805
LP
2089 } else {
2090 _cleanup_free_ char *p = NULL;
2091
2092 /* Create the runtime directory and place the inaccessible device nodes there, if we run in
2093 * user mode. In system mode mount_setup() already did that. */
2094
2095 r = xdg_user_runtime_dir(&p, "/systemd");
2096 if (r < 0) {
2097 *ret_error_message = "$XDG_RUNTIME_DIR is not set";
2098 return log_emergency_errno(r, "Failed to determine $XDG_RUNTIME_DIR path: %m");
2099 }
2100
e813a74a 2101 (void) mkdir_p_label(p, 0755);
32429805 2102 (void) make_inaccessible_nodes(p, UID_INVALID, GID_INVALID);
3c3c6cb9 2103 }
5afbaa36
LP
2104
2105 if (arg_timer_slack_nsec != NSEC_INFINITY)
2106 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
3a671cd1 2107 log_warning_errno(errno, "Failed to adjust timer slack, ignoring: %m");
5afbaa36
LP
2108
2109 if (arg_system && !cap_test_all(arg_capability_bounding_set)) {
2110 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set);
2111 if (r < 0) {
2112 *ret_error_message = "Failed to drop capability bounding set of usermode helpers";
2113 return log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
2114 }
2115
2116 r = capability_bounding_set_drop(arg_capability_bounding_set, true);
2117 if (r < 0) {
2118 *ret_error_message = "Failed to drop capability bounding set";
2119 return log_emergency_errno(r, "Failed to drop capability bounding set: %m");
2120 }
2121 }
2122
39362f6f
JB
2123 if (arg_system && arg_no_new_privs) {
2124 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
2125 *ret_error_message = "Failed to disable new privileges";
2126 return log_emergency_errno(errno, "Failed to disable new privileges: %m");
2127 }
2128 }
2129
5afbaa36
LP
2130 if (arg_syscall_archs) {
2131 r = enforce_syscall_archs(arg_syscall_archs);
2132 if (r < 0) {
2133 *ret_error_message = "Failed to set syscall architectures";
2134 return r;
2135 }
2136 }
2137
2138 if (!arg_system)
2139 /* Become reaper of our children */
2140 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
2141 log_warning_errno(errno, "Failed to make us a subreaper: %m");
2142
a17c1712
LP
2143 /* Bump up RLIMIT_NOFILE for systemd itself */
2144 (void) bump_rlimit_nofile(saved_rlimit_nofile);
2145 (void) bump_rlimit_memlock(saved_rlimit_memlock);
5afbaa36
LP
2146
2147 return 0;
2148}
2149
6acca5fc
LP
2150static int do_queue_default_job(
2151 Manager *m,
2152 const char **ret_error_message) {
2153
2154 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
f1d075dc
ZJS
2155 const char *unit;
2156 Job *job;
2157 Unit *target;
6acca5fc
LP
2158 int r;
2159
8755dbad 2160 if (arg_default_unit)
f1d075dc 2161 unit = arg_default_unit;
8755dbad 2162 else if (in_initrd())
f1d075dc 2163 unit = SPECIAL_INITRD_TARGET;
8755dbad 2164 else
f1d075dc 2165 unit = SPECIAL_DEFAULT_TARGET;
8755dbad 2166
f1d075dc 2167 log_debug("Activating default unit: %s", unit);
8755dbad 2168
f1d075dc 2169 r = manager_load_startable_unit_or_warn(m, unit, NULL, &target);
8755dbad
ZJS
2170 if (r < 0 && in_initrd() && !arg_default_unit) {
2171 /* Fall back to default.target, which we used to always use by default. Only do this if no
2172 * explicit configuration was given. */
2173
2174 log_info("Falling back to " SPECIAL_DEFAULT_TARGET ".");
6acca5fc 2175
8755dbad
ZJS
2176 r = manager_load_startable_unit_or_warn(m, SPECIAL_DEFAULT_TARGET, NULL, &target);
2177 }
4109ede7 2178 if (r < 0) {
8755dbad 2179 log_info("Falling back to " SPECIAL_RESCUE_TARGET ".");
6acca5fc 2180
4109ede7 2181 r = manager_load_startable_unit_or_warn(m, SPECIAL_RESCUE_TARGET, NULL, &target);
6acca5fc 2182 if (r < 0) {
8755dbad
ZJS
2183 *ret_error_message = r == -ERFKILL ? SPECIAL_RESCUE_TARGET " masked"
2184 : "Failed to load " SPECIAL_RESCUE_TARGET;
4109ede7 2185 return r;
6acca5fc
LP
2186 }
2187 }
2188
2189 assert(target->load_state == UNIT_LOADED);
2190
f1d075dc 2191 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, NULL, &error, &job);
6acca5fc
LP
2192 if (r == -EPERM) {
2193 log_debug_errno(r, "Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
2194
2195 sd_bus_error_free(&error);
2196
f1d075dc 2197 r = manager_add_job(m, JOB_START, target, JOB_REPLACE, NULL, &error, &job);
6acca5fc
LP
2198 if (r < 0) {
2199 *ret_error_message = "Failed to start default target";
2200 return log_emergency_errno(r, "Failed to start default target: %s", bus_error_message(&error, r));
2201 }
2202
2203 } else if (r < 0) {
2204 *ret_error_message = "Failed to isolate default target";
2205 return log_emergency_errno(r, "Failed to isolate default target: %s", bus_error_message(&error, r));
c86c31d9
ZJS
2206 } else
2207 log_info("Queued %s job for default target %s.",
2208 job_type_to_string(job->type),
2209 unit_status_string(job->unit));
6acca5fc 2210
f1d075dc 2211 m->default_unit_job_id = job->id;
6acca5fc
LP
2212
2213 return 0;
2214}
2215
a9fd4cd1
FB
2216static void save_rlimits(struct rlimit *saved_rlimit_nofile,
2217 struct rlimit *saved_rlimit_memlock) {
2218
2219 assert(saved_rlimit_nofile);
2220 assert(saved_rlimit_memlock);
2221
2222 if (getrlimit(RLIMIT_NOFILE, saved_rlimit_nofile) < 0)
2223 log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
2224
2225 if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock) < 0)
2226 log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
2227}
2228
2229static void fallback_rlimit_nofile(const struct rlimit *saved_rlimit_nofile) {
2230 struct rlimit *rl;
2231
2232 if (arg_default_rlimit[RLIMIT_NOFILE])
2233 return;
2234
2235 /* Make sure forked processes get limits based on the original kernel setting */
2236
2237 rl = newdup(struct rlimit, saved_rlimit_nofile, 1);
2238 if (!rl) {
2239 log_oom();
2240 return;
2241 }
2242
2243 /* Bump the hard limit for system services to a substantially higher value. The default
2244 * hard limit current kernels set is pretty low (4K), mostly for historical
2245 * reasons. According to kernel developers, the fd handling in recent kernels has been
2246 * optimized substantially enough, so that we can bump the limit now, without paying too
2247 * high a price in memory or performance. Note however that we only bump the hard limit,
2248 * not the soft limit. That's because select() works the way it works, and chokes on fds
2249 * >= 1024. If we'd bump the soft limit globally, it might accidentally happen to
2250 * unexpecting programs that they get fds higher than what they can process using
2251 * select(). By only bumping the hard limit but leaving the low limit as it is we avoid
2252 * this pitfall: programs that are written by folks aware of the select() problem in mind
2253 * (and thus use poll()/epoll instead of select(), the way everybody should) can
2254 * explicitly opt into high fds by bumping their soft limit beyond 1024, to the hard limit
2255 * we pass. */
2256 if (arg_system) {
2257 int nr;
2258
2259 /* Get the underlying absolute limit the kernel enforces */
2260 nr = read_nr_open();
2261
2262 rl->rlim_max = MIN((rlim_t) nr, MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE));
2263 }
2264
2265 /* If for some reason we were invoked with a soft limit above 1024 (which should never
2266 * happen!, but who knows what we get passed in from pam_limit when invoked as --user
2267 * instance), then lower what we pass on to not confuse our children */
2268 rl->rlim_cur = MIN(rl->rlim_cur, (rlim_t) FD_SETSIZE);
2269
2270 arg_default_rlimit[RLIMIT_NOFILE] = rl;
2271}
2272
2273static void fallback_rlimit_memlock(const struct rlimit *saved_rlimit_memlock) {
2274 struct rlimit *rl;
2275
2276 /* Pass the original value down to invoked processes */
2277
2278 if (arg_default_rlimit[RLIMIT_MEMLOCK])
2279 return;
2280
2281 rl = newdup(struct rlimit, saved_rlimit_memlock, 1);
2282 if (!rl) {
2283 log_oom();
2284 return;
2285 }
2286
2287 arg_default_rlimit[RLIMIT_MEMLOCK] = rl;
2288}
2289
fb39af4c
ZJS
2290static void reset_arguments(void) {
2291 /* Frees/resets arg_* variables, with a few exceptions commented below. */
970777b5
LP
2292
2293 arg_default_unit = mfree(arg_default_unit);
fb39af4c
ZJS
2294
2295 /* arg_system — ignore */
2296
2297 arg_dump_core = true;
2298 arg_crash_chvt = -1;
2299 arg_crash_shell = false;
2300 arg_crash_reboot = false;
970777b5 2301 arg_confirm_spawn = mfree(arg_confirm_spawn);
fb39af4c 2302 arg_show_status = _SHOW_STATUS_INVALID;
36cf4507 2303 arg_status_unit_format = STATUS_UNIT_FORMAT_DEFAULT;
fb39af4c
ZJS
2304 arg_switched_root = false;
2305 arg_pager_flags = 0;
2306 arg_service_watchdogs = true;
2307 arg_default_std_output = EXEC_OUTPUT_JOURNAL;
2308 arg_default_std_error = EXEC_OUTPUT_INHERIT;
2309 arg_default_restart_usec = DEFAULT_RESTART_USEC;
2310 arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
2311 arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
2312 arg_default_timeout_abort_usec = DEFAULT_TIMEOUT_USEC;
2313 arg_default_timeout_abort_set = false;
2314 arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
2315 arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
2316 arg_runtime_watchdog = 0;
65224c1d 2317 arg_reboot_watchdog = 10 * USEC_PER_MINUTE;
acafd7d8 2318 arg_kexec_watchdog = 0;
fb39af4c
ZJS
2319 arg_early_core_pattern = NULL;
2320 arg_watchdog_device = NULL;
2321
970777b5 2322 arg_default_environment = strv_free(arg_default_environment);
fb39af4c
ZJS
2323 rlimit_free_all(arg_default_rlimit);
2324
2325 arg_capability_bounding_set = CAP_ALL;
2326 arg_no_new_privs = false;
2327 arg_timer_slack_nsec = NSEC_INFINITY;
2328 arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
2329
970777b5 2330 arg_syscall_archs = set_free(arg_syscall_archs);
61fbbac1 2331
fb39af4c
ZJS
2332 /* arg_serialization — ignore */
2333
2334 arg_default_cpu_accounting = -1;
2335 arg_default_io_accounting = false;
2336 arg_default_ip_accounting = false;
2337 arg_default_blockio_accounting = false;
2338 arg_default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
2339 arg_default_tasks_accounting = true;
3a0f06c4 2340 arg_default_tasks_max = DEFAULT_TASKS_MAX;
fb39af4c
ZJS
2341 arg_machine_id = (sd_id128_t) {};
2342 arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
2343 arg_default_oom_policy = OOM_STOP;
2344
61fbbac1 2345 cpu_set_reset(&arg_cpu_affinity);
b070c7c0 2346 numa_policy_reset(&arg_numa_policy);
d247f232
LP
2347
2348 arg_random_seed = mfree(arg_random_seed);
2349 arg_random_seed_size = 0;
33d943d1 2350 arg_clock_usec = 0;
970777b5
LP
2351}
2352
a9fd4cd1
FB
2353static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
2354 const struct rlimit *saved_rlimit_memlock) {
97d1fb94
LP
2355 int r;
2356
a9fd4cd1
FB
2357 assert(saved_rlimit_nofile);
2358 assert(saved_rlimit_memlock);
2359
fb39af4c
ZJS
2360 /* Assign configuration defaults */
2361 reset_arguments();
2362
97d1fb94 2363 r = parse_config_file();
470a5e6d
ZJS
2364 if (r < 0)
2365 log_warning_errno(r, "Failed to parse config file, ignoring: %m");
97d1fb94
LP
2366
2367 if (arg_system) {
2368 r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
2369 if (r < 0)
2370 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
2371 }
2372
a9fd4cd1
FB
2373 /* Initialize some default rlimits for services if they haven't been configured */
2374 fallback_rlimit_nofile(saved_rlimit_nofile);
2375 fallback_rlimit_memlock(saved_rlimit_memlock);
2376
97d1fb94
LP
2377 /* Note that this also parses bits from the kernel command line, including "debug". */
2378 log_parse_environment();
2379
db33214b 2380 /* Initialize the show status setting if it hasn't been set explicitly yet */
7a293242 2381 if (arg_show_status == _SHOW_STATUS_INVALID)
db33214b
LP
2382 arg_show_status = SHOW_STATUS_YES;
2383
97d1fb94
LP
2384 return 0;
2385}
2386
b0d7c989
LP
2387static int safety_checks(void) {
2388
febf46a4 2389 if (getpid_cached() == 1 &&
baaa35ad
ZJS
2390 arg_action != ACTION_RUN)
2391 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2392 "Unsupported execution mode while PID 1.");
febf46a4
LP
2393
2394 if (getpid_cached() == 1 &&
baaa35ad
ZJS
2395 !arg_system)
2396 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2397 "Can't run --user mode as PID 1.");
febf46a4
LP
2398
2399 if (arg_action == ACTION_RUN &&
2400 arg_system &&
baaa35ad
ZJS
2401 getpid_cached() != 1)
2402 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2403 "Can't run system mode unless PID 1.");
febf46a4 2404
b0d7c989 2405 if (arg_action == ACTION_TEST &&
baaa35ad
ZJS
2406 geteuid() == 0)
2407 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2408 "Don't run test mode as root.");
b0d7c989
LP
2409
2410 if (!arg_system &&
2411 arg_action == ACTION_RUN &&
baaa35ad
ZJS
2412 sd_booted() <= 0)
2413 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
2414 "Trying to run as user instance, but the system has not been booted with systemd.");
b0d7c989
LP
2415
2416 if (!arg_system &&
2417 arg_action == ACTION_RUN &&
baaa35ad
ZJS
2418 !getenv("XDG_RUNTIME_DIR"))
2419 return log_error_errno(SYNTHETIC_ERRNO(EUNATCH),
2420 "Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
b0d7c989
LP
2421
2422 if (arg_system &&
2423 arg_action == ACTION_RUN &&
baaa35ad
ZJS
2424 running_in_chroot() > 0)
2425 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
2426 "Cannot be run in a chroot() environment.");
b0d7c989
LP
2427
2428 return 0;
2429}
2430
74da609f
LP
2431static int initialize_security(
2432 bool *loaded_policy,
2433 dual_timestamp *security_start_timestamp,
2434 dual_timestamp *security_finish_timestamp,
2435 const char **ret_error_message) {
2436
2437 int r;
2438
2439 assert(loaded_policy);
2440 assert(security_start_timestamp);
2441 assert(security_finish_timestamp);
2442 assert(ret_error_message);
2443
2444 dual_timestamp_get(security_start_timestamp);
2445
97149f40 2446 r = mac_selinux_setup(loaded_policy);
74da609f
LP
2447 if (r < 0) {
2448 *ret_error_message = "Failed to load SELinux policy";
2449 return r;
2450 }
2451
2452 r = mac_smack_setup(loaded_policy);
2453 if (r < 0) {
2454 *ret_error_message = "Failed to load SMACK policy";
2455 return r;
2456 }
2457
2ffadd3c
Y
2458 r = mac_apparmor_setup();
2459 if (r < 0) {
2460 *ret_error_message = "Failed to load AppArmor policy";
2461 return r;
2462 }
2463
74da609f
LP
2464 r = ima_setup();
2465 if (r < 0) {
2466 *ret_error_message = "Failed to load IMA policy";
2467 return r;
2468 }
2469
2470 dual_timestamp_get(security_finish_timestamp);
2471 return 0;
2472}
2473
263162da
LP
2474static void test_summary(Manager *m) {
2475 assert(m);
2476
2477 printf("-> By units:\n");
2478 manager_dump_units(m, stdout, "\t");
2479
2480 printf("-> By jobs:\n");
2481 manager_dump_jobs(m, stdout, "\t");
2482}
2483
efeb853f
LP
2484static int collect_fds(FDSet **ret_fds, const char **ret_error_message) {
2485 int r;
2486
2487 assert(ret_fds);
2488 assert(ret_error_message);
2489
2490 r = fdset_new_fill(ret_fds);
2491 if (r < 0) {
2492 *ret_error_message = "Failed to allocate fd set";
2493 return log_emergency_errno(r, "Failed to allocate fd set: %m");
2494 }
2495
2496 fdset_cloexec(*ret_fds, true);
2497
2498 if (arg_serialization)
2499 assert_se(fdset_remove(*ret_fds, fileno(arg_serialization)) >= 0);
2500
2501 return 0;
2502}
2503
2e51b31c
LP
2504static void setup_console_terminal(bool skip_setup) {
2505
2506 if (!arg_system)
2507 return;
2508
2509 /* Become a session leader if we aren't one yet. */
2510 (void) setsid();
2511
2512 /* If we are init, we connect stdin/stdout/stderr to /dev/null and make sure we don't have a controlling
2513 * tty. */
2514 (void) release_terminal();
2515
2516 /* Reset the console, but only if this is really init and we are freshly booted */
2517 if (getpid_cached() == 1 && !skip_setup)
2518 (void) console_setup();
2519}
2520
aa40ff07
LP
2521static bool early_skip_setup_check(int argc, char *argv[]) {
2522 bool found_deserialize = false;
2523 int i;
2524
2525 /* Determine if this is a reexecution or normal bootup. We do the full command line parsing much later, so
2526 * let's just have a quick peek here. Note that if we have switched root, do all the special setup things
2527 * anyway, even if in that case we also do deserialization. */
2528
2529 for (i = 1; i < argc; i++) {
aa40ff07
LP
2530 if (streq(argv[i], "--switched-root"))
2531 return false; /* If we switched root, don't skip the setup. */
2532 else if (streq(argv[i], "--deserialize"))
2533 found_deserialize = true;
2534 }
2535
2536 return found_deserialize; /* When we are deserializing, then we are reexecuting, hence avoid the extensive setup */
2537}
2538
0e06a031
LP
2539static int save_env(void) {
2540 char **l;
2541
2542 l = strv_copy(environ);
2543 if (!l)
2544 return -ENOMEM;
2545
2546 strv_free_and_replace(saved_env, l);
2547 return 0;
2548}
2549
60918275 2550int main(int argc, char *argv[]) {
625e8690
LP
2551
2552 dual_timestamp initrd_timestamp = DUAL_TIMESTAMP_NULL, userspace_timestamp = DUAL_TIMESTAMP_NULL, kernel_timestamp = DUAL_TIMESTAMP_NULL,
2553 security_start_timestamp = DUAL_TIMESTAMP_NULL, security_finish_timestamp = DUAL_TIMESTAMP_NULL;
ddfa8b0b
LP
2554 struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0),
2555 saved_rlimit_memlock = RLIMIT_MAKE_CONST(RLIM_INFINITY); /* The original rlimits we passed
2556 * in. Note we use different values
2557 * for the two that indicate whether
2558 * these fields are initialized! */
625e8690
LP
2559 bool skip_setup, loaded_policy = false, queue_default_job = false, first_boot = false, reexecute = false;
2560 char *switch_root_dir = NULL, *switch_root_init = NULL;
9d76d730 2561 usec_t before_startup, after_startup;
625e8690 2562 static char systemd[] = "systemd";
9d76d730 2563 char timespan[FORMAT_TIMESPAN_MAX];
625e8690
LP
2564 const char *shutdown_verb = NULL, *error_message = NULL;
2565 int r, retval = EXIT_FAILURE;
2566 Manager *m = NULL;
a16e1123 2567 FDSet *fds = NULL;
27b14a22 2568
d72a8f10 2569 /* SysV compatibility: redirect init → telinit */
6808a0bc 2570 redirect_telinit(argc, argv);
2cb1a60d 2571
d72a8f10 2572 /* Take timestamps early on */
c3a170f3
HH
2573 dual_timestamp_from_monotonic(&kernel_timestamp, 0);
2574 dual_timestamp_get(&userspace_timestamp);
2575
d72a8f10
LP
2576 /* Figure out whether we need to do initialize the system, or if we already did that because we are
2577 * reexecuting */
aa40ff07 2578 skip_setup = early_skip_setup_check(argc, argv);
d03bc1b8 2579
d72a8f10
LP
2580 /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent reexecution we
2581 * are then called 'systemd'. That is confusing, hence let's call us systemd right-away. */
f3b6a3ed 2582 program_invocation_short_name = systemd;
eee8b7ab 2583 (void) prctl(PR_SET_NAME, systemd);
5d6b1584 2584
d72a8f10 2585 /* Save the original command line */
36fea155 2586 save_argc_argv(argc, argv);
f3b6a3ed 2587
0e06a031
LP
2588 /* Save the original environment as we might need to restore it if we're requested to execute another
2589 * system manager later. */
2590 r = save_env();
2591 if (r < 0) {
2592 error_message = "Failed to copy environment block";
2593 goto finish;
2594 }
a5cede8c 2595
6fdb8de4 2596 /* Make sure that if the user says "syslog" we actually log to the journal. */
c1dc6153 2597 log_set_upgrade_syslog_to_journal(true);
bbe63281 2598
df0ff127 2599 if (getpid_cached() == 1) {
b5752d23
LP
2600 /* When we run as PID 1 force system mode */
2601 arg_system = true;
2602
48a601fe 2603 /* Disable the umask logic */
90dc8c2e
MG
2604 umask(0);
2605
92890452
LP
2606 /* Make sure that at least initially we do not ever log to journald/syslogd, because it might not be
2607 * activated yet (even though the log socket for it exists). */
d075092f
LP
2608 log_set_prohibit_ipc(true);
2609
48a601fe
LP
2610 /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This is
2611 * important so that we never end up logging to any foreign stderr, for example if we have to log in a
2612 * child process right before execve()'ing the actual binary, at a point in time where socket
2613 * activation stderr/stdout area already set up. */
2614 log_set_always_reopen_console(true);
48a601fe 2615
92890452 2616 if (detect_container() <= 0) {
4f8d551f 2617
92890452 2618 /* Running outside of a container as PID 1 */
92890452
LP
2619 log_set_target(LOG_TARGET_KMSG);
2620 log_open();
a866073d 2621
92890452
LP
2622 if (in_initrd())
2623 initrd_timestamp = userspace_timestamp;
c3ba6250 2624
92890452
LP
2625 if (!skip_setup) {
2626 r = mount_setup_early();
2627 if (r < 0) {
2628 error_message = "Failed to mount early API filesystems";
2629 goto finish;
2630 }
2631
0a2eef1e
LP
2632 /* Let's open the log backend a second time, in case the first time didn't
2633 * work. Quite possibly we have mounted /dev just now, so /dev/kmsg became
2634 * available, and it previously wasn't. */
2635 log_open();
2636
6123dfaa
ZJS
2637 disable_printk_ratelimit();
2638
92890452
LP
2639 r = initialize_security(
2640 &loaded_policy,
2641 &security_start_timestamp,
2642 &security_finish_timestamp,
2643 &error_message);
2644 if (r < 0)
2645 goto finish;
d723cd65 2646 }
eee8b7ab 2647
92890452 2648 if (mac_selinux_init() < 0) {
a9ba0e32 2649 error_message = "Failed to initialize SELinux support";
96694e99 2650 goto finish;
92890452 2651 }
0b3325e7 2652
92890452
LP
2653 if (!skip_setup)
2654 initialize_clock();
2655
2656 /* Set the default for later on, but don't actually open the logs like this for now. Note that
2657 * if we are transitioning from the initrd there might still be journal fd open, and we
2658 * shouldn't attempt opening that before we parsed /proc/cmdline which might redirect output
2659 * elsewhere. */
2660 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
2661
2662 } else {
2663 /* Running inside a container, as PID 1 */
92890452
LP
2664 log_set_target(LOG_TARGET_CONSOLE);
2665 log_open();
2666
2667 /* For later on, see above... */
2668 log_set_target(LOG_TARGET_JOURNAL);
2669
45250e66 2670 /* clear the kernel timestamp, because we are in a container */
92890452 2671 kernel_timestamp = DUAL_TIMESTAMP_NULL;
cb6531be 2672 }
7948c4df 2673
92890452 2674 initialize_coredump(skip_setup);
a866073d 2675
92890452
LP
2676 r = fixup_environment();
2677 if (r < 0) {
2678 log_emergency_errno(r, "Failed to fix up PID 1 environment: %m");
2679 error_message = "Failed to fix up PID1 environment";
2680 goto finish;
2681 }
a866073d 2682
92890452
LP
2683 /* Try to figure out if we can use colors with the console. No need to do that for user instances since
2684 * they never log into the console. */
3a18b604 2685 log_show_color(colors_enabled());
92890452 2686
c76cf844
AK
2687 r = make_null_stdio();
2688 if (r < 0)
92890452 2689 log_warning_errno(r, "Failed to redirect standard streams to /dev/null, ignoring: %m");
f84f9974 2690
a132bef0 2691 /* Load the kernel modules early. */
2e75e2a8
DM
2692 if (!skip_setup)
2693 kmod_setup();
2e75e2a8 2694
3196e423 2695 /* Mount /proc, /sys and friends, so that /proc/cmdline and /proc/$PID/fd is available. */
f74349d8 2696 r = mount_setup(loaded_policy, skip_setup);
cb6531be
ZJS
2697 if (r < 0) {
2698 error_message = "Failed to mount API filesystems";
8efe3c01 2699 goto finish;
cb6531be 2700 }
c18ecf03
LP
2701
2702 /* The efivarfs is now mounted, let's read the random seed off it */
2703 (void) efi_take_random_seed();
209b2592
FB
2704
2705 /* Cache command-line options passed from EFI variables */
2706 if (!skip_setup)
2707 (void) cache_efi_options_variable();
3196e423
LP
2708 } else {
2709 /* Running as user instance */
2710 arg_system = false;
2711 log_set_target(LOG_TARGET_AUTO);
2712 log_open();
2713
2714 /* clear the kernel timestamp, because we are not PID 1 */
2715 kernel_timestamp = DUAL_TIMESTAMP_NULL;
2716
2717 if (mac_selinux_init() < 0) {
2718 error_message = "Failed to initialize SELinux support";
2719 goto finish;
2720 }
0c85a4f3 2721 }
4ade7963 2722
a9fd4cd1
FB
2723 /* Save the original RLIMIT_NOFILE/RLIMIT_MEMLOCK so that we can reset it later when
2724 * transitioning from the initrd to the main systemd or suchlike. */
2725 save_rlimits(&saved_rlimit_nofile, &saved_rlimit_memlock);
2726
4ade7963 2727 /* Reset all signal handlers. */
ce30c8dc
LP
2728 (void) reset_all_signal_handlers();
2729 (void) ignore_signals(SIGNALS_IGNORE, -1);
078e4539 2730
ffe5c01e
FB
2731 (void) parse_configuration(&saved_rlimit_nofile, &saved_rlimit_memlock);
2732
2733 r = parse_argv(argc, argv);
2734 if (r < 0) {
2735 error_message = "Failed to parse commandline arguments";
f170852a 2736 goto finish;
ffe5c01e 2737 }
10c961b9 2738
b0d7c989
LP
2739 r = safety_checks();
2740 if (r < 0)
fe783b03 2741 goto finish;
fe783b03 2742
5c08257b 2743 if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS, ACTION_DUMP_BUS_PROPERTIES, ACTION_BUS_INTROSPECT))
0221d68a 2744 (void) pager_open(arg_pager_flags);
b0d7c989
LP
2745
2746 if (arg_action != ACTION_RUN)
74e7579c 2747 skip_setup = true;
b87c2aa6 2748
fa0f4d8a 2749 if (arg_action == ACTION_HELP) {
37ec0fdd 2750 retval = help() < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
f170852a 2751 goto finish;
9ba0bc4e
ZJS
2752 } else if (arg_action == ACTION_VERSION) {
2753 retval = version();
2754 goto finish;
fa0f4d8a 2755 } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
e537352b 2756 unit_dump_config_items(stdout);
22f4096c 2757 retval = EXIT_SUCCESS;
e537352b 2758 goto finish;
bbc1acab
YW
2759 } else if (arg_action == ACTION_DUMP_BUS_PROPERTIES) {
2760 dump_bus_properties(stdout);
2761 retval = EXIT_SUCCESS;
2762 goto finish;
5c08257b
ZJS
2763 } else if (arg_action == ACTION_BUS_INTROSPECT) {
2764 r = bus_manager_introspect_implementations(stdout, arg_bus_introspect);
2765 retval = r >= 0 ? EXIT_SUCCESS : EXIT_FAILURE;
2766 goto finish;
f170852a
LP
2767 }
2768
4c701096 2769 assert_se(IN_SET(arg_action, ACTION_RUN, ACTION_TEST));
f170852a 2770
5a2e0c62
LP
2771 /* Move out of the way, so that we won't block unmounts */
2772 assert_se(chdir("/") == 0);
2773
dea374e8 2774 if (arg_action == ACTION_RUN) {
d247f232
LP
2775 if (!skip_setup) {
2776 /* Apply the systemd.clock_usec= kernel command line switch */
45250e66 2777 apply_clock_update();
a70c72a0 2778
d247f232
LP
2779 /* Apply random seed from kernel command line */
2780 cmdline_take_random_seed();
2781 }
2782
c6885f5f
FB
2783 /* A core pattern might have been specified via the cmdline. */
2784 initialize_core_pattern(skip_setup);
2785
efeb853f 2786 /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
a70c72a0
LP
2787 log_close();
2788
2789 /* Remember open file descriptors for later deserialization */
efeb853f
LP
2790 r = collect_fds(&fds, &error_message);
2791 if (r < 0)
dea374e8 2792 goto finish;
a16e1123 2793
2e51b31c
LP
2794 /* Give up any control of the console, but make sure its initialized. */
2795 setup_console_terminal(skip_setup);
56d96fc0 2796
a70c72a0
LP
2797 /* Open the logging devices, if possible and necessary */
2798 log_open();
56d96fc0 2799 }
4ade7963 2800
31aef7ff 2801 log_execution_mode(&first_boot);
a5dab5ce 2802
2d776038 2803 r = initialize_runtime(skip_setup,
3023f2fe 2804 first_boot,
2d776038
LP
2805 &saved_rlimit_nofile,
2806 &saved_rlimit_memlock,
2807 &error_message);
2808 if (r < 0)
2809 goto finish;
4096d6f5 2810
e0a3da1f
ZJS
2811 r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
2812 arg_action == ACTION_TEST ? MANAGER_TEST_FULL : 0,
2813 &m);
e96d6be7 2814 if (r < 0) {
da927ba9 2815 log_emergency_errno(r, "Failed to allocate manager object: %m");
cb6531be 2816 error_message = "Failed to allocate manager object";
60918275
LP
2817 goto finish;
2818 }
2819
9f9f0342
LP
2820 m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;
2821 m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;
2822 m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;
d4ee7bd8
YW
2823 m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_START)] = security_start_timestamp;
2824 m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_FINISH)] = security_finish_timestamp;
9e58ff9c 2825
85cb4151 2826 set_manager_defaults(m);
7b46fc6a 2827 set_manager_settings(m);
fd130612 2828 manager_set_first_boot(m, first_boot);
27d340c7 2829
bf4df7c3 2830 /* Remember whether we should queue the default job */
d3b1c508 2831 queue_default_job = !arg_serialization || arg_switched_root;
bf4df7c3 2832
9d76d730
LP
2833 before_startup = now(CLOCK_MONOTONIC);
2834
d3b1c508 2835 r = manager_startup(m, arg_serialization, fds);
58f88d92 2836 if (r < 0) {
cefb3eda 2837 error_message = "Failed to start up manager";
58f88d92
ZJS
2838 goto finish;
2839 }
a16e1123 2840
6acca5fc 2841 /* This will close all file descriptors that were opened, but not claimed by any unit. */
2feceb5e 2842 fds = fdset_free(fds);
74ca738f 2843 arg_serialization = safe_fclose(arg_serialization);
bf4df7c3
LP
2844
2845 if (queue_default_job) {
6acca5fc 2846 r = do_queue_default_job(m, &error_message);
718db961 2847 if (r < 0)
37d88da7 2848 goto finish;
6acca5fc 2849 }
ab17a050 2850
6acca5fc 2851 after_startup = now(CLOCK_MONOTONIC);
60918275 2852
6acca5fc
LP
2853 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
2854 "Loaded units and determined initial transaction in %s.",
2855 format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 100 * USEC_PER_MSEC));
07672f49 2856
6acca5fc 2857 if (arg_action == ACTION_TEST) {
263162da 2858 test_summary(m);
6acca5fc
LP
2859 retval = EXIT_SUCCESS;
2860 goto finish;
e965d56d 2861 }
d46de8a1 2862
3046b6db 2863 (void) invoke_main_loop(m,
a9fd4cd1
FB
2864 &saved_rlimit_nofile,
2865 &saved_rlimit_memlock,
3046b6db
LP
2866 &reexecute,
2867 &retval,
2868 &shutdown_verb,
2869 &fds,
2870 &switch_root_dir,
2871 &switch_root_init,
2872 &error_message);
f170852a 2873
60918275 2874finish:
b87c2aa6
ZJS
2875 pager_close();
2876
92890452 2877 if (m) {
986935cf
FB
2878 arg_reboot_watchdog = manager_get_watchdog(m, WATCHDOG_REBOOT);
2879 arg_kexec_watchdog = manager_get_watchdog(m, WATCHDOG_KEXEC);
92890452
LP
2880 m = manager_free(m);
2881 }
60918275 2882
cc56fafe 2883 mac_selinux_finish();
b2bb3dbe 2884
3c7878f9
LP
2885 if (reexecute)
2886 do_reexecute(argc, argv,
2887 &saved_rlimit_nofile,
2888 &saved_rlimit_memlock,
2889 fds,
2890 switch_root_dir,
2891 switch_root_init,
2892 &error_message); /* This only returns if reexecution failed */
a16e1123 2893
74ca738f 2894 arg_serialization = safe_fclose(arg_serialization);
2feceb5e 2895 fds = fdset_free(fds);
a16e1123 2896
0e06a031
LP
2897 saved_env = strv_free(saved_env);
2898
349cc4a5 2899#if HAVE_VALGRIND_VALGRIND_H
54b434b1
LP
2900 /* If we are PID 1 and running under valgrind, then let's exit
2901 * here explicitly. valgrind will only generate nice output on
2902 * exit(), not on exec(), hence let's do the former not the
2903 * latter here. */
8a2c1fbf
EJ
2904 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
2905 /* Cleanup watchdog_device strings for valgrind. We need them
2906 * in become_shutdown() so normally we cannot free them yet. */
2907 watchdog_free_device();
2908 arg_watchdog_device = mfree(arg_watchdog_device);
7d9eea2b 2909 reset_arguments();
27fe58b7 2910 return retval;
8a2c1fbf 2911 }
54b434b1
LP
2912#endif
2913
7e11a95e
EV
2914#if HAS_FEATURE_ADDRESS_SANITIZER
2915 __lsan_do_leak_check();
2916#endif
2917
b9080b03 2918 if (shutdown_verb) {
7eb35049 2919 r = become_shutdown(shutdown_verb, retval);
4a36297c 2920 log_error_errno(r, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
9b9881d7 2921 error_message = "Failed to execute shutdown binary";
b9080b03
FF
2922 }
2923
8a2c1fbf
EJ
2924 watchdog_free_device();
2925 arg_watchdog_device = mfree(arg_watchdog_device);
2926
df0ff127 2927 if (getpid_cached() == 1) {
cb6531be
ZJS
2928 if (error_message)
2929 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
1fc464f6 2930 ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL,
bb259772
LP
2931 "%s.", error_message);
2932 freeze_or_exit_or_reboot();
cb6531be 2933 }
c3b3c274 2934
7d9eea2b 2935 reset_arguments();
60918275
LP
2936 return retval;
2937}