]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/main.c
Merge pull request #18007 from fw-strlen/ipv6_masq_and_dnat
[thirdparty/systemd.git] / src / core / main.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
a7334b09 2
60918275 3#include <errno.h>
3dfc9763 4#include <fcntl.h>
f170852a 5#include <getopt.h>
664f88a7 6#include <sys/mount.h>
3dfc9763 7#include <sys/prctl.h>
b9e74c39 8#include <sys/reboot.h>
3dfc9763 9#include <unistd.h>
349cc4a5 10#if HAVE_SECCOMP
b64a3d86
LP
11#include <seccomp.h>
12#endif
349cc4a5 13#if HAVE_VALGRIND_VALGRIND_H
3dfc9763
LP
14#include <valgrind/valgrind.h>
15#endif
54b434b1 16
718db961 17#include "sd-bus.h"
cf0fbc49 18#include "sd-daemon.h"
b2e7486c 19#include "sd-messages.h"
3dfc9763 20
b5efdb8a 21#include "alloc-util.h"
2ffadd3c 22#include "apparmor-setup.h"
d9d93745 23#include "architecture.h"
3dfc9763
LP
24#include "build.h"
25#include "bus-error.h"
26#include "bus-util.h"
430f0182 27#include "capability-util.h"
a88c5b8a 28#include "cgroup-util.h"
24efb112 29#include "clock-util.h"
3dfc9763 30#include "conf-parser.h"
618234a5 31#include "cpu-set-util.h"
3dfc9763 32#include "dbus-manager.h"
c18ecf03 33#include "dbus.h"
3dfc9763 34#include "def.h"
32429805 35#include "dev-setup.h"
c18ecf03 36#include "efi-random.h"
209b2592 37#include "efivars.h"
eee8b7ab 38#include "emergency-action.h"
3dfc9763 39#include "env-util.h"
57b7a260 40#include "exit-status.h"
3ffd4af2 41#include "fd-util.h"
3dfc9763 42#include "fdset.h"
718db961 43#include "fileio.h"
f97b34a6 44#include "format-util.h"
f4f15635 45#include "fs-util.h"
d247f232 46#include "hexdecoct.h"
3dfc9763
LP
47#include "hostname-setup.h"
48#include "ima-setup.h"
49#include "killall.h"
50#include "kmod-setup.h"
eefc66aa 51#include "limits-util.h"
d7b8eec7 52#include "load-fragment.h"
3dfc9763 53#include "log.h"
b6e66135 54#include "loopback-setup.h"
b6e66135 55#include "machine-id-setup.h"
3dfc9763 56#include "manager.h"
32429805 57#include "mkdir.h"
3dfc9763 58#include "mount-setup.h"
d58ad743 59#include "os-util.h"
3dfc9763 60#include "pager.h"
614b022c 61#include "parse-argument.h"
6bedfcbb 62#include "parse-util.h"
7d5ceb64 63#include "path-util.h"
294bf0c3 64#include "pretty-print.h"
4e731273 65#include "proc-cmdline.h"
3dfc9763 66#include "process-util.h"
d247f232 67#include "random-util.h"
8869a0b4 68#include "raw-clone.h"
78f22b97 69#include "rlimit-util.h"
349cc4a5 70#if HAVE_SECCOMP
83f12b27
FS
71#include "seccomp-util.h"
72#endif
b6e66135 73#include "selinux-setup.h"
3dfc9763
LP
74#include "selinux-util.h"
75#include "signal-util.h"
ffbd2c4d 76#include "smack-setup.h"
3dfc9763 77#include "special.h"
8fcde012 78#include "stat-util.h"
15a5e950 79#include "stdio-util.h"
3dfc9763
LP
80#include "strv.h"
81#include "switch-root.h"
a8b627aa 82#include "sysctl-util.h"
3dfc9763 83#include "terminal-util.h"
8612da97 84#include "umask-util.h"
b1d4f8e1 85#include "user-util.h"
9ce17593 86#include "util.h"
3dfc9763
LP
87#include "virt.h"
88#include "watchdog.h"
b6e66135 89
7e11a95e
EV
90#if HAS_FEATURE_ADDRESS_SANITIZER
91#include <sanitizer/lsan_interface.h>
92#endif
93
3a0f06c4
ZJS
94#define DEFAULT_TASKS_MAX ((TasksMax) { 15U, 100U }) /* 15% */
95
f170852a
LP
96static enum {
97 ACTION_RUN,
e965d56d 98 ACTION_HELP,
9ba0bc4e 99 ACTION_VERSION,
e537352b 100 ACTION_TEST,
bbc1acab
YW
101 ACTION_DUMP_CONFIGURATION_ITEMS,
102 ACTION_DUMP_BUS_PROPERTIES,
5c08257b 103 ACTION_BUS_INTROSPECT,
fa0f4d8a 104} arg_action = ACTION_RUN;
fb39af4c 105
5c08257b
ZJS
106static const char *arg_bus_introspect = NULL;
107
45250e66
LP
108/* Those variables are initialized to 0 automatically, so we avoid uninitialized memory access. Real
109 * defaults are assigned in reset_arguments() below. */
fb39af4c
ZJS
110static char *arg_default_unit;
111static bool arg_system;
112static bool arg_dump_core;
113static int arg_crash_chvt;
114static bool arg_crash_shell;
115static bool arg_crash_reboot;
116static char *arg_confirm_spawn;
117static ShowStatus arg_show_status;
36cf4507 118static StatusUnitFormat arg_status_unit_format;
fb39af4c
ZJS
119static bool arg_switched_root;
120static PagerFlags arg_pager_flags;
121static bool arg_service_watchdogs;
122static ExecOutput arg_default_std_output;
123static ExecOutput arg_default_std_error;
124static usec_t arg_default_restart_usec;
125static usec_t arg_default_timeout_start_usec;
126static usec_t arg_default_timeout_stop_usec;
127static usec_t arg_default_timeout_abort_usec;
128static bool arg_default_timeout_abort_set;
129static usec_t arg_default_start_limit_interval;
130static unsigned arg_default_start_limit_burst;
131static usec_t arg_runtime_watchdog;
65224c1d 132static usec_t arg_reboot_watchdog;
acafd7d8 133static usec_t arg_kexec_watchdog;
fb39af4c
ZJS
134static char *arg_early_core_pattern;
135static char *arg_watchdog_device;
136static char **arg_default_environment;
137static struct rlimit *arg_default_rlimit[_RLIMIT_MAX];
138static uint64_t arg_capability_bounding_set;
139static bool arg_no_new_privs;
140static nsec_t arg_timer_slack_nsec;
141static usec_t arg_default_timer_accuracy_usec;
142static Set* arg_syscall_archs;
143static FILE* arg_serialization;
144static int arg_default_cpu_accounting;
145static bool arg_default_io_accounting;
146static bool arg_default_ip_accounting;
147static bool arg_default_blockio_accounting;
148static bool arg_default_memory_accounting;
149static bool arg_default_tasks_accounting;
3a0f06c4 150static TasksMax arg_default_tasks_max;
fb39af4c
ZJS
151static sd_id128_t arg_machine_id;
152static EmergencyAction arg_cad_burst_action;
153static OOMPolicy arg_default_oom_policy;
154static CPUSet arg_cpu_affinity;
b070c7c0 155static NUMAPolicy arg_numa_policy;
3753325b 156static usec_t arg_clock_usec;
d247f232
LP
157static void *arg_random_seed;
158static size_t arg_random_seed_size;
61fbbac1 159
0e06a031
LP
160/* A copy of the original environment block */
161static char **saved_env = NULL;
162
a9fd4cd1
FB
163static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
164 const struct rlimit *saved_rlimit_memlock);
4fc935ca 165
bb259772
LP
166_noreturn_ static void freeze_or_exit_or_reboot(void) {
167
c3b6a348
LP
168 /* If we are running in a container, let's prefer exiting, after all we can propagate an exit code to
169 * the container manager, and thus inform it that something went wrong. */
bb259772
LP
170 if (detect_container() > 0) {
171 log_emergency("Exiting PID 1...");
c3b6a348 172 _exit(EXIT_EXCEPTION);
bb259772 173 }
b9e74c39
LP
174
175 if (arg_crash_reboot) {
176 log_notice("Rebooting in 10s...");
177 (void) sleep(10);
178
179 log_notice("Rebooting now...");
180 (void) reboot(RB_AUTOBOOT);
181 log_emergency_errno(errno, "Failed to reboot: %m");
182 }
183
184 log_emergency("Freezing execution.");
185 freeze();
186}
187
848e863a 188_noreturn_ static void crash(int sig) {
7d06dad9
MS
189 struct sigaction sa;
190 pid_t pid;
97c4f35c 191
df0ff127 192 if (getpid_cached() != 1)
abb26902 193 /* Pass this on immediately, if this is not PID 1 */
92ca4cac 194 (void) raise(sig);
abb26902 195 else if (!arg_dump_core)
4104970e 196 log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig));
97c4f35c 197 else {
7d06dad9 198 sa = (struct sigaction) {
189d5bac 199 .sa_handler = nop_signal_handler,
b92bea5d
ZJS
200 .sa_flags = SA_NOCLDSTOP|SA_RESTART,
201 };
97c4f35c 202
6f5e3f35 203 /* We want to wait for the core process, hence let's enable SIGCHLD */
92ca4cac 204 (void) sigaction(SIGCHLD, &sa, NULL);
6f5e3f35 205
8869a0b4 206 pid = raw_clone(SIGCHLD);
e62d8c39 207 if (pid < 0)
56f64d95 208 log_emergency_errno(errno, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
97c4f35c 209 else if (pid == 0) {
97c4f35c 210 /* Enable default signal handler for core dump */
15a90032 211
92ca4cac
LP
212 sa = (struct sigaction) {
213 .sa_handler = SIG_DFL,
214 };
215 (void) sigaction(sig, &sa, NULL);
97c4f35c 216
15a90032
LP
217 /* Don't limit the coredump size */
218 (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY));
97c4f35c
LP
219
220 /* Just to be sure... */
e62d9b81 221 (void) chdir("/");
97c4f35c
LP
222
223 /* Raise the signal again */
ee05e779 224 pid = raw_getpid();
92ca4cac 225 (void) kill(pid, sig); /* raise() would kill the parent */
97c4f35c
LP
226
227 assert_not_reached("We shouldn't be here...");
bb85a582 228 _exit(EXIT_EXCEPTION);
4fc935ca 229 } else {
8e12a6ae
LP
230 siginfo_t status;
231 int r;
4fc935ca
LP
232
233 /* Order things nicely. */
e62d8c39
ZJS
234 r = wait_for_terminate(pid, &status);
235 if (r < 0)
da927ba9 236 log_emergency_errno(r, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig));
e1714f02
ZJS
237 else if (status.si_code != CLD_DUMPED) {
238 const char *s = status.si_code == CLD_EXITED
e04ed6db 239 ? exit_status_to_string(status.si_status, EXIT_STATUS_LIBC)
e1714f02
ZJS
240 : signal_to_string(status.si_status);
241
ee05e779
ZJS
242 log_emergency("Caught <%s>, core dump failed (child "PID_FMT", code=%s, status=%i/%s).",
243 signal_to_string(sig),
e1714f02
ZJS
244 pid,
245 sigchld_code_to_string(status.si_code),
246 status.si_status, strna(s));
247 } else
248 log_emergency("Caught <%s>, dumped core as pid "PID_FMT".",
249 signal_to_string(sig), pid);
97c4f35c
LP
250 }
251 }
252
b9e74c39 253 if (arg_crash_chvt >= 0)
92ca4cac 254 (void) chvt(arg_crash_chvt);
601f6a1e 255
7d06dad9
MS
256 sa = (struct sigaction) {
257 .sa_handler = SIG_IGN,
258 .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
259 };
260
261 /* Let the kernel reap children for us */
262 (void) sigaction(SIGCHLD, &sa, NULL);
8c43883a 263
7d06dad9 264 if (arg_crash_shell) {
b9e74c39 265 log_notice("Executing crash shell in 10s...");
92ca4cac 266 (void) sleep(10);
4fc935ca 267
8869a0b4 268 pid = raw_clone(SIGCHLD);
cd3bd60a 269 if (pid < 0)
56f64d95 270 log_emergency_errno(errno, "Failed to fork off crash shell: %m");
6f5e3f35 271 else if (pid == 0) {
b9e74c39 272 (void) setsid();
92ca4cac 273 (void) make_console_stdio();
595225af 274 (void) rlimit_nofile_safe();
92ca4cac 275 (void) execle("/bin/sh", "/bin/sh", NULL, environ);
6f5e3f35 276
ee05e779 277 log_emergency_errno(errno, "execle() failed: %m");
bb85a582 278 _exit(EXIT_EXCEPTION);
b9e74c39
LP
279 } else {
280 log_info("Spawned crash shell as PID "PID_FMT".", pid);
4cf0b03b 281 (void) wait_for_terminate(pid, NULL);
b9e74c39 282 }
4fc935ca
LP
283 }
284
bb259772 285 freeze_or_exit_or_reboot();
97c4f35c
LP
286}
287
288static void install_crash_handler(void) {
297d563d 289 static const struct sigaction sa = {
b92bea5d 290 .sa_handler = crash,
297d563d 291 .sa_flags = SA_NODEFER, /* So that we can raise the signal again from the signal handler */
b92bea5d 292 };
297d563d 293 int r;
97c4f35c 294
297d563d
LP
295 /* We ignore the return value here, since, we don't mind if we
296 * cannot set up a crash handler */
297 r = sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
298 if (r < 0)
299 log_debug_errno(r, "I had trouble setting up the crash handler, ignoring: %m");
97c4f35c 300}
f170852a 301
56d96fc0
LP
302static int console_setup(void) {
303 _cleanup_close_ int tty_fd = -1;
304 int r;
80876c20 305
512947d4 306 tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
23bbb0de
MS
307 if (tty_fd < 0)
308 return log_error_errno(tty_fd, "Failed to open /dev/console: %m");
80876c20 309
56d96fc0
LP
310 /* We don't want to force text mode. plymouth may be showing
311 * pictures already from initrd. */
512947d4 312 r = reset_terminal_fd(tty_fd, false);
23bbb0de
MS
313 if (r < 0)
314 return log_error_errno(r, "Failed to reset /dev/console: %m");
843d2643 315
56d96fc0 316 return 0;
80876c20
LP
317}
318
ee48dbd5 319static int set_machine_id(const char *m) {
e042eab7 320 sd_id128_t t;
8b26cdbd 321 assert(m);
ee48dbd5 322
e042eab7 323 if (sd_id128_from_string(m, &t) < 0)
ee48dbd5
NC
324 return -EINVAL;
325
e042eab7 326 if (sd_id128_is_null(t))
ee48dbd5
NC
327 return -EINVAL;
328
e042eab7 329 arg_machine_id = t;
ee48dbd5
NC
330 return 0;
331}
332
96287a49 333static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
059cb385 334 int r;
f170852a 335
059cb385 336 assert(key);
5192bd19 337
1d84ad94 338 if (STR_IN_SET(key, "systemd.unit", "rd.systemd.unit")) {
bf4df7c3 339
1d84ad94
LP
340 if (proc_cmdline_value_missing(key, value))
341 return 0;
bf4df7c3 342
1d84ad94
LP
343 if (!unit_name_is_valid(value, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
344 log_warning("Unit name specified on %s= is not valid, ignoring: %s", key, value);
cd57038a
ZJS
345 else if (in_initrd() == !!startswith(key, "rd."))
346 return free_and_strdup_warn(&arg_default_unit, value);
f170852a 347
1d84ad94 348 } else if (proc_cmdline_key_streq(key, "systemd.dump_core")) {
4fc935ca 349
1d84ad94 350 r = value ? parse_boolean(value) : true;
fb472900 351 if (r < 0)
5e1ee764 352 log_warning_errno(r, "Failed to parse dump core switch %s, ignoring: %m", value);
4fc935ca 353 else
fa0f4d8a 354 arg_dump_core = r;
4fc935ca 355
c6885f5f
FB
356 } else if (proc_cmdline_key_streq(key, "systemd.early_core_pattern")) {
357
358 if (proc_cmdline_value_missing(key, value))
359 return 0;
360
361 if (path_is_absolute(value))
614b022c 362 (void) parse_path_argument(value, false, &arg_early_core_pattern);
c6885f5f
FB
363 else
364 log_warning("Specified core pattern '%s' is not an absolute path, ignoring.", value);
365
1d84ad94 366 } else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) {
b9e74c39 367
1d84ad94
LP
368 if (!value)
369 arg_crash_chvt = 0; /* turn on */
5e1ee764 370 else {
a07a7324 371 r = parse_crash_chvt(value, &arg_crash_chvt);
5e1ee764
YW
372 if (r < 0)
373 log_warning_errno(r, "Failed to parse crash chvt switch %s, ignoring: %m", value);
374 }
b9e74c39 375
1d84ad94 376 } else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) {
4fc935ca 377
1d84ad94 378 r = value ? parse_boolean(value) : true;
fb472900 379 if (r < 0)
5e1ee764 380 log_warning_errno(r, "Failed to parse crash shell switch %s, ignoring: %m", value);
4fc935ca 381 else
fa0f4d8a 382 arg_crash_shell = r;
5e7ee61c 383
1d84ad94 384 } else if (proc_cmdline_key_streq(key, "systemd.crash_reboot")) {
5e7ee61c 385
1d84ad94 386 r = value ? parse_boolean(value) : true;
b9e74c39 387 if (r < 0)
5e1ee764 388 log_warning_errno(r, "Failed to parse crash reboot switch %s, ignoring: %m", value);
5e7ee61c 389 else
b9e74c39 390 arg_crash_reboot = r;
5e7ee61c 391
1d84ad94
LP
392 } else if (proc_cmdline_key_streq(key, "systemd.confirm_spawn")) {
393 char *s;
7d5ceb64 394
1d84ad94 395 r = parse_confirm_spawn(value, &s);
059cb385 396 if (r < 0)
5e1ee764
YW
397 log_warning_errno(r, "Failed to parse confirm_spawn switch %s, ignoring: %m", value);
398 else
399 free_and_replace(arg_confirm_spawn, s);
601f6a1e 400
2a12e32e
JK
401 } else if (proc_cmdline_key_streq(key, "systemd.service_watchdogs")) {
402
403 r = value ? parse_boolean(value) : true;
404 if (r < 0)
5e1ee764 405 log_warning_errno(r, "Failed to parse service watchdog switch %s, ignoring: %m", value);
2a12e32e
JK
406 else
407 arg_service_watchdogs = r;
408
1d84ad94 409 } else if (proc_cmdline_key_streq(key, "systemd.show_status")) {
9e58ff9c 410
1d84ad94
LP
411 if (value) {
412 r = parse_show_status(value, &arg_show_status);
413 if (r < 0)
5e1ee764 414 log_warning_errno(r, "Failed to parse show status switch %s, ignoring: %m", value);
1d84ad94
LP
415 } else
416 arg_show_status = SHOW_STATUS_YES;
059cb385 417
36cf4507
ZJS
418 } else if (proc_cmdline_key_streq(key, "systemd.status_unit_format")) {
419
420 if (proc_cmdline_value_missing(key, value))
421 return 0;
422
423 r = status_unit_format_from_string(value);
424 if (r < 0)
425 log_warning_errno(r, "Failed to parse %s=%s, ignoring: %m", key, value);
426 else
427 arg_status_unit_format = r;
428
1d84ad94
LP
429 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_output")) {
430
431 if (proc_cmdline_value_missing(key, value))
432 return 0;
0a494f1f 433
059cb385 434 r = exec_output_from_string(value);
fb472900 435 if (r < 0)
5e1ee764 436 log_warning_errno(r, "Failed to parse default standard output switch %s, ignoring: %m", value);
0a494f1f
LP
437 else
438 arg_default_std_output = r;
0a494f1f 439
1d84ad94
LP
440 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_error")) {
441
442 if (proc_cmdline_value_missing(key, value))
443 return 0;
059cb385
LP
444
445 r = exec_output_from_string(value);
fb472900 446 if (r < 0)
5e1ee764 447 log_warning_errno(r, "Failed to parse default standard error switch %s, ignoring: %m", value);
0a494f1f
LP
448 else
449 arg_default_std_error = r;
9e7c5357 450
1d84ad94
LP
451 } else if (streq(key, "systemd.setenv")) {
452
453 if (proc_cmdline_value_missing(key, value))
454 return 0;
059cb385 455
b70935ac
ZJS
456 if (!env_assignment_is_valid(value))
457 log_warning("Environment variable assignment '%s' is not valid. Ignoring.", value);
458 else {
459 r = strv_env_replace_strdup(&arg_default_environment, value);
460 if (r < 0)
1d84ad94 461 return log_oom();
b70935ac 462 }
9e58ff9c 463
1d84ad94
LP
464 } else if (proc_cmdline_key_streq(key, "systemd.machine_id")) {
465
466 if (proc_cmdline_value_missing(key, value))
467 return 0;
468
469 r = set_machine_id(value);
470 if (r < 0)
5e1ee764 471 log_warning_errno(r, "MachineID '%s' is not valid, ignoring: %m", value);
ee48dbd5 472
1d84ad94
LP
473 } else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) {
474
475 if (proc_cmdline_value_missing(key, value))
476 return 0;
477
478 r = parse_sec(value, &arg_default_timeout_start_usec);
479 if (r < 0)
5e1ee764 480 log_warning_errno(r, "Failed to parse default start timeout '%s', ignoring: %m", value);
1d84ad94
LP
481
482 if (arg_default_timeout_start_usec <= 0)
483 arg_default_timeout_start_usec = USEC_INFINITY;
ee48dbd5 484
68d58f38
LP
485 } else if (proc_cmdline_key_streq(key, "systemd.cpu_affinity")) {
486
487 if (proc_cmdline_value_missing(key, value))
488 return 0;
489
490 r = parse_cpu_set(value, &arg_cpu_affinity);
491 if (r < 0)
162392b7 492 log_warning_errno(r, "Failed to parse CPU affinity mask '%s', ignoring: %m", value);
68d58f38 493
8a2c1fbf
EJ
494 } else if (proc_cmdline_key_streq(key, "systemd.watchdog_device")) {
495
496 if (proc_cmdline_value_missing(key, value))
497 return 0;
498
614b022c 499 (void) parse_path_argument(value, false, &arg_watchdog_device);
8a2c1fbf 500
3753325b
LP
501 } else if (proc_cmdline_key_streq(key, "systemd.clock_usec")) {
502
503 if (proc_cmdline_value_missing(key, value))
504 return 0;
505
506 r = safe_atou64(value, &arg_clock_usec);
507 if (r < 0)
508 log_warning_errno(r, "Failed to parse systemd.clock_usec= argument, ignoring: %s", value);
509
d247f232
LP
510 } else if (proc_cmdline_key_streq(key, "systemd.random_seed")) {
511 void *p;
512 size_t sz;
513
514 if (proc_cmdline_value_missing(key, value))
515 return 0;
516
517 r = unbase64mem(value, (size_t) -1, &p, &sz);
518 if (r < 0)
519 log_warning_errno(r, "Failed to parse systemd.random_seed= argument, ignoring: %s", value);
520
521 free(arg_random_seed);
522 arg_random_seed = sz > 0 ? p : mfree(p);
523 arg_random_seed_size = sz;
524
059cb385 525 } else if (streq(key, "quiet") && !value) {
d7b15e0a 526
7a293242 527 if (arg_show_status == _SHOW_STATUS_INVALID)
0d066dd1 528 arg_show_status = SHOW_STATUS_ERROR;
059cb385
LP
529
530 } else if (streq(key, "debug") && !value) {
d7b15e0a 531
1de1c9c3
LP
532 /* Note that log_parse_environment() handles 'debug'
533 * too, and sets the log level to LOG_DEBUG. */
d7b15e0a 534
75f86906 535 if (detect_container() > 0)
b2103dcc 536 log_set_target(LOG_TARGET_CONSOLE);
059cb385 537
dcd61450 538 } else if (!value) {
e2c9a131 539 const char *target;
f170852a 540
ceae6295 541 /* Compatible with SysV, but supported independently even if SysV compatibility is disabled. */
e2c9a131
EV
542 target = runlevel_to_target(key);
543 if (target)
cd57038a 544 return free_and_strdup_warn(&arg_default_unit, target);
f170852a
LP
545 }
546
547 return 0;
548}
549
e8e581bf
ZJS
550#define DEFINE_SETTER(name, func, descr) \
551 static int name(const char *unit, \
552 const char *filename, \
553 unsigned line, \
554 const char *section, \
71a61510 555 unsigned section_line, \
e8e581bf
ZJS
556 const char *lvalue, \
557 int ltype, \
558 const char *rvalue, \
559 void *data, \
560 void *userdata) { \
561 \
562 int r; \
563 \
564 assert(filename); \
565 assert(lvalue); \
566 assert(rvalue); \
567 \
568 r = func(rvalue); \
569 if (r < 0) \
d1cefe0a
LP
570 log_syntax(unit, LOG_ERR, filename, line, r, \
571 "Invalid " descr "'%s': %m", \
572 rvalue); \
e8e581bf
ZJS
573 \
574 return 0; \
575 }
487393e9 576
a6ecbf83
FB
577DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level");
578DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target");
c5673ed0 579DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color");
a6ecbf83 580DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location");
c5673ed0 581DEFINE_SETTER(config_parse_time, log_show_time_from_string, "time");
487393e9 582
a61d6874
ZJS
583static int config_parse_default_timeout_abort(
584 const char *unit,
585 const char *filename,
586 unsigned line,
587 const char *section,
588 unsigned section_line,
589 const char *lvalue,
590 int ltype,
591 const char *rvalue,
592 void *data,
593 void *userdata) {
594 int r;
595
596 r = config_parse_timeout_abort(unit, filename, line, section, section_line, lvalue, ltype, rvalue,
597 &arg_default_timeout_abort_usec, userdata);
598 if (r >= 0)
599 arg_default_timeout_abort_set = r;
600 return 0;
601}
487393e9 602
a61d6874 603static int parse_config_file(void) {
f975e971 604 const ConfigTableItem items[] = {
a61d6874
ZJS
605 { "Manager", "LogLevel", config_parse_level2, 0, NULL },
606 { "Manager", "LogTarget", config_parse_target, 0, NULL },
607 { "Manager", "LogColor", config_parse_color, 0, NULL },
608 { "Manager", "LogLocation", config_parse_location, 0, NULL },
c5673ed0 609 { "Manager", "LogTime", config_parse_time, 0, NULL },
a61d6874
ZJS
610 { "Manager", "DumpCore", config_parse_bool, 0, &arg_dump_core },
611 { "Manager", "CrashChVT", /* legacy */ config_parse_crash_chvt, 0, &arg_crash_chvt },
612 { "Manager", "CrashChangeVT", config_parse_crash_chvt, 0, &arg_crash_chvt },
613 { "Manager", "CrashShell", config_parse_bool, 0, &arg_crash_shell },
614 { "Manager", "CrashReboot", config_parse_bool, 0, &arg_crash_reboot },
615 { "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status },
616 { "Manager", "StatusUnitFormat", config_parse_status_unit_format, 0, &arg_status_unit_format },
617 { "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, &arg_cpu_affinity },
618 { "Manager", "NUMAPolicy", config_parse_numa_policy, 0, &arg_numa_policy.type },
619 { "Manager", "NUMAMask", config_parse_numa_mask, 0, &arg_numa_policy },
620 { "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL },
621 { "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog },
622 { "Manager", "RebootWatchdogSec", config_parse_sec, 0, &arg_reboot_watchdog },
623 { "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_reboot_watchdog }, /* obsolete alias */
624 { "Manager", "KExecWatchdogSec", config_parse_sec, 0, &arg_kexec_watchdog },
625 { "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
626 { "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
627 { "Manager", "NoNewPrivileges", config_parse_bool, 0, &arg_no_new_privs },
349cc4a5 628#if HAVE_SECCOMP
a61d6874 629 { "Manager", "SystemCallArchitectures", config_parse_syscall_archs, 0, &arg_syscall_archs },
89fffa27 630#endif
a61d6874
ZJS
631 { "Manager", "TimerSlackNSec", config_parse_nsec, 0, &arg_timer_slack_nsec },
632 { "Manager", "DefaultTimerAccuracySec", config_parse_sec, 0, &arg_default_timer_accuracy_usec },
633 { "Manager", "DefaultStandardOutput", config_parse_output_restricted, 0, &arg_default_std_output },
634 { "Manager", "DefaultStandardError", config_parse_output_restricted, 0, &arg_default_std_error },
635 { "Manager", "DefaultTimeoutStartSec", config_parse_sec, 0, &arg_default_timeout_start_usec },
636 { "Manager", "DefaultTimeoutStopSec", config_parse_sec, 0, &arg_default_timeout_stop_usec },
637 { "Manager", "DefaultTimeoutAbortSec", config_parse_default_timeout_abort, 0, NULL },
638 { "Manager", "DefaultRestartSec", config_parse_sec, 0, &arg_default_restart_usec },
639 { "Manager", "DefaultStartLimitInterval", config_parse_sec, 0, &arg_default_start_limit_interval }, /* obsolete alias */
640 { "Manager", "DefaultStartLimitIntervalSec", config_parse_sec, 0, &arg_default_start_limit_interval },
641 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned, 0, &arg_default_start_limit_burst },
642 { "Manager", "DefaultEnvironment", config_parse_environ, 0, &arg_default_environment },
643 { "Manager", "DefaultLimitCPU", config_parse_rlimit, RLIMIT_CPU, arg_default_rlimit },
644 { "Manager", "DefaultLimitFSIZE", config_parse_rlimit, RLIMIT_FSIZE, arg_default_rlimit },
645 { "Manager", "DefaultLimitDATA", config_parse_rlimit, RLIMIT_DATA, arg_default_rlimit },
646 { "Manager", "DefaultLimitSTACK", config_parse_rlimit, RLIMIT_STACK, arg_default_rlimit },
647 { "Manager", "DefaultLimitCORE", config_parse_rlimit, RLIMIT_CORE, arg_default_rlimit },
648 { "Manager", "DefaultLimitRSS", config_parse_rlimit, RLIMIT_RSS, arg_default_rlimit },
649 { "Manager", "DefaultLimitNOFILE", config_parse_rlimit, RLIMIT_NOFILE, arg_default_rlimit },
650 { "Manager", "DefaultLimitAS", config_parse_rlimit, RLIMIT_AS, arg_default_rlimit },
651 { "Manager", "DefaultLimitNPROC", config_parse_rlimit, RLIMIT_NPROC, arg_default_rlimit },
652 { "Manager", "DefaultLimitMEMLOCK", config_parse_rlimit, RLIMIT_MEMLOCK, arg_default_rlimit },
653 { "Manager", "DefaultLimitLOCKS", config_parse_rlimit, RLIMIT_LOCKS, arg_default_rlimit },
654 { "Manager", "DefaultLimitSIGPENDING", config_parse_rlimit, RLIMIT_SIGPENDING, arg_default_rlimit },
655 { "Manager", "DefaultLimitMSGQUEUE", config_parse_rlimit, RLIMIT_MSGQUEUE, arg_default_rlimit },
656 { "Manager", "DefaultLimitNICE", config_parse_rlimit, RLIMIT_NICE, arg_default_rlimit },
657 { "Manager", "DefaultLimitRTPRIO", config_parse_rlimit, RLIMIT_RTPRIO, arg_default_rlimit },
658 { "Manager", "DefaultLimitRTTIME", config_parse_rlimit, RLIMIT_RTTIME, arg_default_rlimit },
659 { "Manager", "DefaultCPUAccounting", config_parse_tristate, 0, &arg_default_cpu_accounting },
660 { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
661 { "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting },
662 { "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
663 { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
664 { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
665 { "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max },
666 { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action },
667 { "Manager", "DefaultOOMPolicy", config_parse_oom_policy, 0, &arg_default_oom_policy },
d3b1c508 668 {}
487393e9
LP
669 };
670
1b907b5c 671 const char *fn, *conf_dirs_nulstr;
487393e9 672
463d0d15 673 fn = arg_system ?
75eb6154
LP
674 PKGSYSCONFDIR "/system.conf" :
675 PKGSYSCONFDIR "/user.conf";
676
463d0d15 677 conf_dirs_nulstr = arg_system ?
75eb6154
LP
678 CONF_PATHS_NULSTR("systemd/system.conf.d") :
679 CONF_PATHS_NULSTR("systemd/user.conf.d");
680
4f9ff96a
LP
681 (void) config_parse_many_nulstr(
682 fn, conf_dirs_nulstr,
683 "Manager\0",
684 config_item_table_lookup, items,
685 CONFIG_PARSE_WARN,
686 NULL,
687 NULL);
36c16a7c
LP
688
689 /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY
690 * like everywhere else. */
691 if (arg_default_timeout_start_usec <= 0)
692 arg_default_timeout_start_usec = USEC_INFINITY;
693 if (arg_default_timeout_stop_usec <= 0)
694 arg_default_timeout_stop_usec = USEC_INFINITY;
487393e9 695
487393e9
LP
696 return 0;
697}
698
85cb4151 699static void set_manager_defaults(Manager *m) {
06af2a04
TB
700
701 assert(m);
702
5b65ae15
LP
703 /* Propagates the various default unit property settings into the manager object, i.e. properties that do not
704 * affect the manager itself, but are just what newly allocated units will have set if they haven't set
705 * anything else. (Also see set_manager_settings() for the settings that affect the manager's own behaviour) */
706
06af2a04
TB
707 m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec;
708 m->default_std_output = arg_default_std_output;
709 m->default_std_error = arg_default_std_error;
710 m->default_timeout_start_usec = arg_default_timeout_start_usec;
711 m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
dc653bf4
JK
712 m->default_timeout_abort_usec = arg_default_timeout_abort_usec;
713 m->default_timeout_abort_set = arg_default_timeout_abort_set;
06af2a04
TB
714 m->default_restart_usec = arg_default_restart_usec;
715 m->default_start_limit_interval = arg_default_start_limit_interval;
716 m->default_start_limit_burst = arg_default_start_limit_burst;
a88c5b8a
CD
717
718 /* On 4.15+ with unified hierarchy, CPU accounting is essentially free as it doesn't require the CPU
719 * controller to be enabled, so the default is to enable it unless we got told otherwise. */
720 if (arg_default_cpu_accounting >= 0)
721 m->default_cpu_accounting = arg_default_cpu_accounting;
722 else
723 m->default_cpu_accounting = cpu_accounting_is_cheap();
724
13c31542 725 m->default_io_accounting = arg_default_io_accounting;
377bfd2d 726 m->default_ip_accounting = arg_default_ip_accounting;
06af2a04
TB
727 m->default_blockio_accounting = arg_default_blockio_accounting;
728 m->default_memory_accounting = arg_default_memory_accounting;
03a7b521 729 m->default_tasks_accounting = arg_default_tasks_accounting;
0af20ea2 730 m->default_tasks_max = arg_default_tasks_max;
afcfaa69 731 m->default_oom_policy = arg_default_oom_policy;
06af2a04 732
79a224c4
LP
733 (void) manager_set_default_rlimits(m, arg_default_rlimit);
734
735 (void) manager_default_environment(m);
736 (void) manager_transient_environment_add(m, arg_default_environment);
06af2a04
TB
737}
738
7b46fc6a
LP
739static void set_manager_settings(Manager *m) {
740
741 assert(m);
742
986935cf
FB
743 /* Propagates the various manager settings into the manager object, i.e. properties that
744 * effect the manager itself (as opposed to just being inherited into newly allocated
745 * units, see set_manager_defaults() above). */
5b65ae15 746
7b46fc6a 747 m->confirm_spawn = arg_confirm_spawn;
2a12e32e 748 m->service_watchdogs = arg_service_watchdogs;
7b46fc6a
LP
749 m->cad_burst_action = arg_cad_burst_action;
750
986935cf
FB
751 manager_set_watchdog(m, WATCHDOG_RUNTIME, arg_runtime_watchdog);
752 manager_set_watchdog(m, WATCHDOG_REBOOT, arg_reboot_watchdog);
753 manager_set_watchdog(m, WATCHDOG_KEXEC, arg_kexec_watchdog);
754
7365a296 755 manager_set_show_status(m, arg_show_status, "commandline");
36cf4507 756 m->status_unit_format = arg_status_unit_format;
7b46fc6a
LP
757}
758
f170852a 759static int parse_argv(int argc, char *argv[]) {
f170852a
LP
760 enum {
761 ARG_LOG_LEVEL = 0x100,
762 ARG_LOG_TARGET,
bbe63281
LP
763 ARG_LOG_COLOR,
764 ARG_LOG_LOCATION,
c5673ed0 765 ARG_LOG_TIME,
2f198e2f 766 ARG_UNIT,
edb9aaa8 767 ARG_SYSTEM,
af2d49f7 768 ARG_USER,
e537352b 769 ARG_TEST,
b87c2aa6 770 ARG_NO_PAGER,
9ba0bc4e 771 ARG_VERSION,
80876c20 772 ARG_DUMP_CONFIGURATION_ITEMS,
bbc1acab 773 ARG_DUMP_BUS_PROPERTIES,
5c08257b 774 ARG_BUS_INTROSPECT,
9e58ff9c 775 ARG_DUMP_CORE,
b9e74c39 776 ARG_CRASH_CHVT,
9e58ff9c 777 ARG_CRASH_SHELL,
b9e74c39 778 ARG_CRASH_REBOOT,
a16e1123 779 ARG_CONFIRM_SPAWN,
9e58ff9c 780 ARG_SHOW_STATUS,
4288f619 781 ARG_DESERIALIZE,
2660882b 782 ARG_SWITCHED_ROOT,
0a494f1f 783 ARG_DEFAULT_STD_OUTPUT,
ee48dbd5 784 ARG_DEFAULT_STD_ERROR,
2a12e32e
JK
785 ARG_MACHINE_ID,
786 ARG_SERVICE_WATCHDOGS,
f170852a
LP
787 };
788
789 static const struct option options[] = {
a16e1123
LP
790 { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
791 { "log-target", required_argument, NULL, ARG_LOG_TARGET },
bbe63281
LP
792 { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
793 { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
c5673ed0 794 { "log-time", optional_argument, NULL, ARG_LOG_TIME },
2f198e2f 795 { "unit", required_argument, NULL, ARG_UNIT },
edb9aaa8 796 { "system", no_argument, NULL, ARG_SYSTEM },
af2d49f7 797 { "user", no_argument, NULL, ARG_USER },
a16e1123 798 { "test", no_argument, NULL, ARG_TEST },
b87c2aa6 799 { "no-pager", no_argument, NULL, ARG_NO_PAGER },
a16e1123 800 { "help", no_argument, NULL, 'h' },
9ba0bc4e 801 { "version", no_argument, NULL, ARG_VERSION },
a16e1123 802 { "dump-configuration-items", no_argument, NULL, ARG_DUMP_CONFIGURATION_ITEMS },
bbc1acab 803 { "dump-bus-properties", no_argument, NULL, ARG_DUMP_BUS_PROPERTIES },
5c08257b 804 { "bus-introspect", required_argument, NULL, ARG_BUS_INTROSPECT },
a5d87bf0 805 { "dump-core", optional_argument, NULL, ARG_DUMP_CORE },
b9e74c39 806 { "crash-chvt", required_argument, NULL, ARG_CRASH_CHVT },
a5d87bf0 807 { "crash-shell", optional_argument, NULL, ARG_CRASH_SHELL },
b9e74c39 808 { "crash-reboot", optional_argument, NULL, ARG_CRASH_REBOOT },
a5d87bf0 809 { "confirm-spawn", optional_argument, NULL, ARG_CONFIRM_SPAWN },
6e98720f 810 { "show-status", optional_argument, NULL, ARG_SHOW_STATUS },
a16e1123 811 { "deserialize", required_argument, NULL, ARG_DESERIALIZE },
2660882b 812 { "switched-root", no_argument, NULL, ARG_SWITCHED_ROOT },
0a494f1f
LP
813 { "default-standard-output", required_argument, NULL, ARG_DEFAULT_STD_OUTPUT, },
814 { "default-standard-error", required_argument, NULL, ARG_DEFAULT_STD_ERROR, },
ee48dbd5 815 { "machine-id", required_argument, NULL, ARG_MACHINE_ID },
2a12e32e 816 { "service-watchdogs", required_argument, NULL, ARG_SERVICE_WATCHDOGS },
fb472900 817 {}
f170852a
LP
818 };
819
820 int c, r;
9a9ca408 821 bool user_arg_seen = false;
f170852a
LP
822
823 assert(argc >= 1);
824 assert(argv);
825
df0ff127 826 if (getpid_cached() == 1)
b770165a
LP
827 opterr = 0;
828
099663ff 829 while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
f170852a
LP
830
831 switch (c) {
832
833 case ARG_LOG_LEVEL:
fb472900 834 r = log_set_max_level_from_string(optarg);
2b5107e1
ZJS
835 if (r < 0)
836 return log_error_errno(r, "Failed to parse log level \"%s\": %m", optarg);
f170852a
LP
837
838 break;
839
840 case ARG_LOG_TARGET:
fb472900 841 r = log_set_target_from_string(optarg);
2b5107e1
ZJS
842 if (r < 0)
843 return log_error_errno(r, "Failed to parse log target \"%s\": %m", optarg);
f170852a
LP
844
845 break;
846
bbe63281
LP
847 case ARG_LOG_COLOR:
848
d0b170c8 849 if (optarg) {
fb472900 850 r = log_show_color_from_string(optarg);
2b5107e1
ZJS
851 if (r < 0)
852 return log_error_errno(r, "Failed to parse log color setting \"%s\": %m",
853 optarg);
d0b170c8
LP
854 } else
855 log_show_color(true);
bbe63281
LP
856
857 break;
858
859 case ARG_LOG_LOCATION:
d0b170c8 860 if (optarg) {
fb472900 861 r = log_show_location_from_string(optarg);
2b5107e1
ZJS
862 if (r < 0)
863 return log_error_errno(r, "Failed to parse log location setting \"%s\": %m",
864 optarg);
d0b170c8
LP
865 } else
866 log_show_location(true);
bbe63281
LP
867
868 break;
869
c5673ed0
DS
870 case ARG_LOG_TIME:
871
872 if (optarg) {
873 r = log_show_time_from_string(optarg);
874 if (r < 0)
875 return log_error_errno(r, "Failed to parse log time setting \"%s\": %m",
876 optarg);
877 } else
878 log_show_time(true);
879
880 break;
881
0a494f1f 882 case ARG_DEFAULT_STD_OUTPUT:
fb472900 883 r = exec_output_from_string(optarg);
2b5107e1
ZJS
884 if (r < 0)
885 return log_error_errno(r, "Failed to parse default standard output setting \"%s\": %m",
886 optarg);
887 arg_default_std_output = r;
0a494f1f
LP
888 break;
889
890 case ARG_DEFAULT_STD_ERROR:
fb472900 891 r = exec_output_from_string(optarg);
2b5107e1
ZJS
892 if (r < 0)
893 return log_error_errno(r, "Failed to parse default standard error output setting \"%s\": %m",
894 optarg);
895 arg_default_std_error = r;
0a494f1f
LP
896 break;
897
2f198e2f 898 case ARG_UNIT:
e6e242ad 899 r = free_and_strdup(&arg_default_unit, optarg);
23bbb0de 900 if (r < 0)
2b5107e1 901 return log_error_errno(r, "Failed to set default unit \"%s\": %m", optarg);
f170852a
LP
902
903 break;
904
edb9aaa8 905 case ARG_SYSTEM:
463d0d15 906 arg_system = true;
edb9aaa8 907 break;
a5dab5ce 908
af2d49f7 909 case ARG_USER:
463d0d15 910 arg_system = false;
9a9ca408 911 user_arg_seen = true;
a5dab5ce 912 break;
a5dab5ce 913
e965d56d 914 case ARG_TEST:
fa0f4d8a 915 arg_action = ACTION_TEST;
b87c2aa6
ZJS
916 break;
917
918 case ARG_NO_PAGER:
0221d68a 919 arg_pager_flags |= PAGER_DISABLE;
e965d56d
LP
920 break;
921
9ba0bc4e
ZJS
922 case ARG_VERSION:
923 arg_action = ACTION_VERSION;
924 break;
925
e537352b 926 case ARG_DUMP_CONFIGURATION_ITEMS:
fa0f4d8a 927 arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
e537352b
LP
928 break;
929
bbc1acab
YW
930 case ARG_DUMP_BUS_PROPERTIES:
931 arg_action = ACTION_DUMP_BUS_PROPERTIES;
932 break;
933
5c08257b
ZJS
934 case ARG_BUS_INTROSPECT:
935 arg_bus_introspect = optarg;
936 arg_action = ACTION_BUS_INTROSPECT;
937 break;
938
9e58ff9c 939 case ARG_DUMP_CORE:
b9e74c39
LP
940 if (!optarg)
941 arg_dump_core = true;
942 else {
943 r = parse_boolean(optarg);
944 if (r < 0)
2b5107e1
ZJS
945 return log_error_errno(r, "Failed to parse dump core boolean: \"%s\": %m",
946 optarg);
b9e74c39 947 arg_dump_core = r;
a5d87bf0 948 }
b9e74c39
LP
949 break;
950
951 case ARG_CRASH_CHVT:
a07a7324 952 r = parse_crash_chvt(optarg, &arg_crash_chvt);
b9e74c39 953 if (r < 0)
2b5107e1
ZJS
954 return log_error_errno(r, "Failed to parse crash virtual terminal index: \"%s\": %m",
955 optarg);
9e58ff9c
LP
956 break;
957
958 case ARG_CRASH_SHELL:
b9e74c39
LP
959 if (!optarg)
960 arg_crash_shell = true;
961 else {
962 r = parse_boolean(optarg);
963 if (r < 0)
2b5107e1
ZJS
964 return log_error_errno(r, "Failed to parse crash shell boolean: \"%s\": %m",
965 optarg);
b9e74c39
LP
966 arg_crash_shell = r;
967 }
968 break;
969
970 case ARG_CRASH_REBOOT:
971 if (!optarg)
972 arg_crash_reboot = true;
973 else {
974 r = parse_boolean(optarg);
975 if (r < 0)
2b5107e1
ZJS
976 return log_error_errno(r, "Failed to parse crash shell boolean: \"%s\": %m",
977 optarg);
b9e74c39 978 arg_crash_reboot = r;
a5d87bf0 979 }
9e58ff9c
LP
980 break;
981
80876c20 982 case ARG_CONFIRM_SPAWN:
7d5ceb64
FB
983 arg_confirm_spawn = mfree(arg_confirm_spawn);
984
985 r = parse_confirm_spawn(optarg, &arg_confirm_spawn);
986 if (r < 0)
2b5107e1
ZJS
987 return log_error_errno(r, "Failed to parse confirm spawn option: \"%s\": %m",
988 optarg);
80876c20
LP
989 break;
990
2a12e32e
JK
991 case ARG_SERVICE_WATCHDOGS:
992 r = parse_boolean(optarg);
993 if (r < 0)
2b5107e1
ZJS
994 return log_error_errno(r, "Failed to parse service watchdogs boolean: \"%s\": %m",
995 optarg);
2a12e32e
JK
996 arg_service_watchdogs = r;
997 break;
998
9e58ff9c 999 case ARG_SHOW_STATUS:
d450b6f2
ZJS
1000 if (optarg) {
1001 r = parse_show_status(optarg, &arg_show_status);
ac7ec288 1002 if (r < 0)
2b5107e1
ZJS
1003 return log_error_errno(r, "Failed to parse show status boolean: \"%s\": %m",
1004 optarg);
d450b6f2
ZJS
1005 } else
1006 arg_show_status = SHOW_STATUS_YES;
6e98720f 1007 break;
a5d87bf0 1008
a16e1123
LP
1009 case ARG_DESERIALIZE: {
1010 int fd;
1011 FILE *f;
1012
01e10de3 1013 r = safe_atoi(optarg, &fd);
2b5107e1
ZJS
1014 if (r < 0)
1015 log_error_errno(r, "Failed to parse deserialize option \"%s\": %m", optarg);
baaa35ad
ZJS
1016 if (fd < 0)
1017 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1018 "Invalid deserialize fd: %d",
1019 fd);
a16e1123 1020
b9e74c39 1021 (void) fd_cloexec(fd, true);
01e10de3
LP
1022
1023 f = fdopen(fd, "r");
4a62c710 1024 if (!f)
2b5107e1 1025 return log_error_errno(errno, "Failed to open serialization fd %d: %m", fd);
a16e1123 1026
74ca738f 1027 safe_fclose(arg_serialization);
d3b1c508 1028 arg_serialization = f;
a16e1123
LP
1029
1030 break;
1031 }
1032
2660882b 1033 case ARG_SWITCHED_ROOT:
bf4df7c3 1034 arg_switched_root = true;
d03bc1b8
HH
1035 break;
1036
ee48dbd5
NC
1037 case ARG_MACHINE_ID:
1038 r = set_machine_id(optarg);
54500613 1039 if (r < 0)
2b5107e1 1040 return log_error_errno(r, "MachineID '%s' is not valid: %m", optarg);
ee48dbd5
NC
1041 break;
1042
f170852a 1043 case 'h':
fa0f4d8a 1044 arg_action = ACTION_HELP;
f170852a
LP
1045 break;
1046
1d2e23ab
LP
1047 case 'D':
1048 log_set_max_level(LOG_DEBUG);
1049 break;
1050
099663ff
LP
1051 case 'b':
1052 case 's':
1053 case 'z':
cd57038a
ZJS
1054 /* Just to eat away the sysvinit kernel cmdline args that we'll parse in
1055 * parse_proc_cmdline_item() or ignore, without any getopt() error messages.
1056 */
099663ff 1057 case '?':
df0ff127 1058 if (getpid_cached() != 1)
099663ff 1059 return -EINVAL;
601185b4
ZJS
1060 else
1061 return 0;
099663ff 1062
601185b4
ZJS
1063 default:
1064 assert_not_reached("Unhandled option code.");
f170852a
LP
1065 }
1066
d7a0f1f4 1067 if (optind < argc && getpid_cached() != 1)
9a9ca408
ZJS
1068 /* Hmm, when we aren't run as init system let's complain about excess arguments */
1069 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Excess arguments.");
1070
1071 if (arg_action == ACTION_RUN && !arg_system && !user_arg_seen)
baaa35ad 1072 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
9a9ca408 1073 "Explicit --user argument required to run as user manager.");
d821e6d6 1074
f170852a
LP
1075 return 0;
1076}
1077
1078static int help(void) {
37ec0fdd
LP
1079 _cleanup_free_ char *link = NULL;
1080 int r;
1081
1082 r = terminal_urlify_man("systemd", "1", &link);
1083 if (r < 0)
1084 return log_oom();
f170852a 1085
2e33c433 1086 printf("%s [OPTIONS...]\n\n"
7ae47326
ZJS
1087 "%sStarts and monitors system and user services.%s\n\n"
1088 "This program takes no positional arguments.\n\n"
1089 "%sOptions%s:\n"
e537352b 1090 " -h --help Show this help\n"
cb4069d9 1091 " --version Show version\n"
cd69e88b
LP
1092 " --test Determine initial transaction, dump it and exit\n"
1093 " --system In combination with --test: operate as system service manager\n"
1094 " --user In combination with --test: operate as per-user service manager\n"
b87c2aa6 1095 " --no-pager Do not pipe output into a pager\n"
80876c20 1096 " --dump-configuration-items Dump understood unit configuration items\n"
bbc1acab 1097 " --dump-bus-properties Dump exposed bus properties\n"
5c08257b 1098 " --bus-introspect=PATH Write XML introspection data\n"
9e58ff9c 1099 " --unit=UNIT Set default unit\n"
b9e74c39
LP
1100 " --dump-core[=BOOL] Dump core on crash\n"
1101 " --crash-vt=NR Change to specified VT on crash\n"
1102 " --crash-reboot[=BOOL] Reboot on crash\n"
1103 " --crash-shell[=BOOL] Run shell on crash\n"
1104 " --confirm-spawn[=BOOL] Ask for confirmation when spawning processes\n"
1105 " --show-status[=BOOL] Show status updates on the console during bootup\n"
c1dc6153 1106 " --log-target=TARGET Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
9e58ff9c 1107 " --log-level=LEVEL Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
b9e74c39
LP
1108 " --log-color[=BOOL] Highlight important log messages\n"
1109 " --log-location[=BOOL] Include code location in log messages\n"
c5673ed0 1110 " --log-time[=BOOL] Prefix log messages with current time\n"
0a494f1f 1111 " --default-standard-output= Set default standard output for services\n"
37ec0fdd 1112 " --default-standard-error= Set default standard error output for services\n"
bc556335
DDM
1113 "\nSee the %s for details.\n",
1114 program_invocation_short_name,
1115 ansi_highlight(),
1116 ansi_normal(),
1117 ansi_underline(),
1118 ansi_normal(),
1119 link);
f170852a
LP
1120
1121 return 0;
1122}
1123
2cc856ac
LP
1124static int prepare_reexecute(
1125 Manager *m,
1126 FILE **ret_f,
1127 FDSet **ret_fds,
1128 bool switching_root) {
1129
48b90859
LP
1130 _cleanup_fdset_free_ FDSet *fds = NULL;
1131 _cleanup_fclose_ FILE *f = NULL;
a16e1123
LP
1132 int r;
1133
1134 assert(m);
2cc856ac
LP
1135 assert(ret_f);
1136 assert(ret_fds);
a16e1123 1137
6b78f9b4 1138 r = manager_open_serialization(m, &f);
48b90859
LP
1139 if (r < 0)
1140 return log_error_errno(r, "Failed to create serialization file: %m");
a16e1123 1141
71445ae7 1142 /* Make sure nothing is really destructed when we shut down */
313cefa1 1143 m->n_reloading++;
718db961 1144 bus_manager_send_reloading(m, true);
71445ae7 1145
6b78f9b4 1146 fds = fdset_new();
48b90859
LP
1147 if (!fds)
1148 return log_oom();
a16e1123 1149
b3680f49 1150 r = manager_serialize(m, f, fds, switching_root);
48b90859 1151 if (r < 0)
d68c645b 1152 return r;
a16e1123 1153
48b90859
LP
1154 if (fseeko(f, 0, SEEK_SET) == (off_t) -1)
1155 return log_error_errno(errno, "Failed to rewind serialization fd: %m");
a16e1123 1156
6b78f9b4 1157 r = fd_cloexec(fileno(f), false);
48b90859
LP
1158 if (r < 0)
1159 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
a16e1123 1160
6b78f9b4 1161 r = fdset_cloexec(fds, false);
48b90859
LP
1162 if (r < 0)
1163 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
a16e1123 1164
2cc856ac
LP
1165 *ret_f = TAKE_PTR(f);
1166 *ret_fds = TAKE_PTR(fds);
a16e1123 1167
48b90859 1168 return 0;
a16e1123
LP
1169}
1170
a8b627aa
LP
1171static void bump_file_max_and_nr_open(void) {
1172
1173 /* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large numbers of file
1174 * descriptors are no longer a performance problem and their memory is properly tracked by memcg, thus counting
1175 * them and limiting them in another two layers of limits is unnecessary and just complicates things. This
1176 * function hence turns off 2 of the 4 levels of limits on file descriptors, and makes RLIMIT_NOLIMIT (soft +
1177 * hard) the only ones that really matter. */
1178
1179#if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN
a8b627aa
LP
1180 int r;
1181#endif
1182
1183#if BUMP_PROC_SYS_FS_FILE_MAX
6e2f7894
LP
1184 /* The maximum the kernel allows for this since 5.2 is LONG_MAX, use that. (Previously thing where
1185 * different but the operation would fail silently.) */
56e8419a 1186 r = sysctl_writef("fs/file-max", "%li\n", LONG_MAX);
a8b627aa
LP
1187 if (r < 0)
1188 log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.file-max, ignoring: %m");
1189#endif
1190
a8b627aa
LP
1191#if BUMP_PROC_SYS_FS_NR_OPEN
1192 int v = INT_MAX;
1193
1194 /* Arg! The kernel enforces maximum and minimum values on the fs.nr_open, but we don't really know what they
1195 * are. The expression by which the maximum is determined is dependent on the architecture, and is something we
1196 * don't really want to copy to userspace, as it is dependent on implementation details of the kernel. Since
1197 * the kernel doesn't expose the maximum value to us, we can only try and hope. Hence, let's start with
1198 * INT_MAX, and then keep halving the value until we find one that works. Ugly? Yes, absolutely, but kernel
1199 * APIs are kernel APIs, so what do can we do... 🤯 */
1200
1201 for (;;) {
1202 int k;
1203
1204 v &= ~(__SIZEOF_POINTER__ - 1); /* Round down to next multiple of the pointer size */
1205 if (v < 1024) {
1206 log_warning("Can't bump fs.nr_open, value too small.");
1207 break;
1208 }
1209
1210 k = read_nr_open();
1211 if (k < 0) {
1212 log_error_errno(k, "Failed to read fs.nr_open: %m");
1213 break;
1214 }
1215 if (k >= v) { /* Already larger */
1216 log_debug("Skipping bump, value is already larger.");
1217 break;
1218 }
1219
56e8419a 1220 r = sysctl_writef("fs/nr_open", "%i\n", v);
a8b627aa
LP
1221 if (r == -EINVAL) {
1222 log_debug("Couldn't write fs.nr_open as %i, halving it.", v);
1223 v /= 2;
1224 continue;
1225 }
1226 if (r < 0) {
1227 log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.nr_open, ignoring: %m");
1228 break;
1229 }
1230
1231 log_debug("Successfully bumped fs.nr_open to %i", v);
1232 break;
1233 }
1234#endif
1235}
1236
4096d6f5 1237static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
cda7faa9 1238 struct rlimit new_rlimit;
9264cc39 1239 int r, nr;
4096d6f5 1240
52d62075
LP
1241 /* Get the underlying absolute limit the kernel enforces */
1242 nr = read_nr_open();
1243
cda7faa9
LP
1244 /* Calculate the new limits to use for us. Never lower from what we inherited. */
1245 new_rlimit = (struct rlimit) {
1246 .rlim_cur = MAX((rlim_t) nr, saved_rlimit->rlim_cur),
1247 .rlim_max = MAX((rlim_t) nr, saved_rlimit->rlim_max),
1248 };
1249
1250 /* Shortcut if nothing changes. */
1251 if (saved_rlimit->rlim_max >= new_rlimit.rlim_max &&
1252 saved_rlimit->rlim_cur >= new_rlimit.rlim_cur) {
1253 log_debug("RLIMIT_NOFILE is already as high or higher than we need it, not bumping.");
1254 return 0;
1255 }
1256
52d62075
LP
1257 /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows, for
1258 * both hard and soft. */
cda7faa9 1259 r = setrlimit_closest(RLIMIT_NOFILE, &new_rlimit);
23bbb0de 1260 if (r < 0)
3ce40911 1261 return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
4096d6f5
LP
1262
1263 return 0;
1264}
1265
fb3ae275 1266static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
cda7faa9 1267 struct rlimit new_rlimit;
04d1ee0f 1268 uint64_t mm;
fb3ae275
LP
1269 int r;
1270
a17c1712 1271 /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even if we have CAP_IPC_LOCK which should
6e3c443b 1272 * normally disable such checks. We need them to implement IPAddressAllow= and IPAddressDeny=, hence let's bump
a17c1712 1273 * the value high enough for our user. */
fb3ae275 1274
cda7faa9
LP
1275 /* Using MAX() on resource limits only is safe if RLIM_INFINITY is > 0. POSIX declares that rlim_t
1276 * must be unsigned, hence this is a given, but let's make this clear here. */
1277 assert_cc(RLIM_INFINITY > 0);
1278
04d1ee0f 1279 mm = physical_memory() / 8; /* Let's scale how much we allow to be locked by the amount of physical
2d4f8cf4 1280 * RAM. We allow an eighth to be locked by us, just to pick a value. */
04d1ee0f 1281
cda7faa9 1282 new_rlimit = (struct rlimit) {
04d1ee0f
LP
1283 .rlim_cur = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_cur, mm),
1284 .rlim_max = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_max, mm),
cda7faa9
LP
1285 };
1286
1287 if (saved_rlimit->rlim_max >= new_rlimit.rlim_cur &&
1288 saved_rlimit->rlim_cur >= new_rlimit.rlim_max) {
1289 log_debug("RLIMIT_MEMLOCK is already as high or higher than we need it, not bumping.");
1290 return 0;
1291 }
1292
1293 r = setrlimit_closest(RLIMIT_MEMLOCK, &new_rlimit);
fb3ae275
LP
1294 if (r < 0)
1295 return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
1296
1297 return 0;
1298}
1299
80758717 1300static void test_usr(void) {
80758717 1301
796ac4c1 1302 /* Check that /usr is either on the same file system as / or mounted already. */
80758717 1303
871c44a7
LP
1304 if (dir_is_empty("/usr") <= 0)
1305 return;
1306
8b173b5e 1307 log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
871c44a7
LP
1308 "Some things will probably break (sometimes even silently) in mysterious ways. "
1309 "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1310}
1311
d3b1c508 1312static int enforce_syscall_archs(Set *archs) {
349cc4a5 1313#if HAVE_SECCOMP
d3b1c508
LP
1314 int r;
1315
83f12b27
FS
1316 if (!is_seccomp_available())
1317 return 0;
1318
469830d1 1319 r = seccomp_restrict_archs(arg_syscall_archs);
d3b1c508 1320 if (r < 0)
469830d1 1321 return log_error_errno(r, "Failed to enforce system call architecture restrication: %m");
d3b1c508 1322#endif
469830d1 1323 return 0;
d3b1c508
LP
1324}
1325
b6e2f329
LP
1326static int status_welcome(void) {
1327 _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
1328 int r;
1329
5ca02bfc 1330 if (!show_status_on(arg_show_status))
fd8c85c6
LP
1331 return 0;
1332
d58ad743
LP
1333 r = parse_os_release(NULL,
1334 "PRETTY_NAME", &pretty_name,
1335 "ANSI_COLOR", &ansi_color,
1336 NULL);
1337 if (r < 0)
1338 log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
1339 "Failed to read os-release file, ignoring: %m");
b6e2f329 1340
dc9b5816 1341 if (log_get_show_color())
a885727a 1342 return status_printf(NULL, 0,
dc9b5816
ZJS
1343 "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
1344 isempty(ansi_color) ? "1" : ansi_color,
1345 isempty(pretty_name) ? "Linux" : pretty_name);
1346 else
a885727a 1347 return status_printf(NULL, 0,
dc9b5816
ZJS
1348 "\nWelcome to %s!\n",
1349 isempty(pretty_name) ? "Linux" : pretty_name);
b6e2f329
LP
1350}
1351
fdd25311
LP
1352static int write_container_id(void) {
1353 const char *c;
19854865 1354 int r;
fdd25311
LP
1355
1356 c = getenv("container");
1357 if (isempty(c))
1358 return 0;
1359
8612da97
LP
1360 RUN_WITH_UMASK(0022)
1361 r = write_string_file("/run/systemd/container", c, WRITE_STRING_FILE_CREATE);
19854865 1362 if (r < 0)
f1f849b0 1363 return log_warning_errno(r, "Failed to write /run/systemd/container, ignoring: %m");
19854865
LP
1364
1365 return 1;
1366}
1367
1368static int bump_unix_max_dgram_qlen(void) {
1369 _cleanup_free_ char *qlen = NULL;
1370 unsigned long v;
1371 int r;
1372
3130fca5
LP
1373 /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel default of 16 is simply too low. We set the value
1374 * really really early during boot, so that it is actually applied to all our sockets, including the
1375 * $NOTIFY_SOCKET one. */
19854865
LP
1376
1377 r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen);
1378 if (r < 0)
875622c3 1379 return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
19854865
LP
1380
1381 r = safe_atolu(qlen, &v);
1382 if (r < 0)
3130fca5 1383 return log_warning_errno(r, "Failed to parse AF_UNIX datagram queue length '%s', ignoring: %m", qlen);
19854865
LP
1384
1385 if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN)
1386 return 0;
1387
57512c89 1388 r = write_string_filef("/proc/sys/net/unix/max_dgram_qlen", WRITE_STRING_FILE_DISABLE_BUFFER, "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN);
19854865
LP
1389 if (r < 0)
1390 return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
1391 "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
1392
1393 return 1;
fdd25311
LP
1394}
1395
32391275
FB
1396static int fixup_environment(void) {
1397 _cleanup_free_ char *term = NULL;
4dc63c4b 1398 const char *t;
32391275
FB
1399 int r;
1400
43db615b
LP
1401 /* Only fix up the environment when we are started as PID 1 */
1402 if (getpid_cached() != 1)
1403 return 0;
1404
1405 /* We expect the environment to be set correctly if run inside a container. */
84af7821
LP
1406 if (detect_container() > 0)
1407 return 0;
1408
43db615b
LP
1409 /* When started as PID1, the kernel uses /dev/console for our stdios and uses TERM=linux whatever the backend
1410 * device used by the console. We try to make a better guess here since some consoles might not have support
1411 * for color mode for example.
32391275 1412 *
43db615b 1413 * However if TERM was configured through the kernel command line then leave it alone. */
1d84ad94 1414 r = proc_cmdline_get_key("TERM", 0, &term);
32391275
FB
1415 if (r < 0)
1416 return r;
32391275 1417
4dc63c4b
LP
1418 t = term ?: default_term_for_tty("/dev/console");
1419
1420 if (setenv("TERM", t, 1) < 0)
32391275
FB
1421 return -errno;
1422
9d48671c 1423 /* The kernels sets HOME=/ for init. Let's undo this. */
44ee03d1
ZJS
1424 if (path_equal_ptr(getenv("HOME"), "/"))
1425 assert_se(unsetenv("HOME") == 0);
9d48671c 1426
32391275
FB
1427 return 0;
1428}
1429
6808a0bc
LP
1430static void redirect_telinit(int argc, char *argv[]) {
1431
1432 /* This is compatibility support for SysV, where calling init as a user is identical to telinit. */
1433
1434#if HAVE_SYSV_COMPAT
1435 if (getpid_cached() == 1)
1436 return;
1437
1438 if (!strstr(program_invocation_short_name, "init"))
1439 return;
1440
1441 execv(SYSTEMCTL_BINARY_PATH, argv);
1442 log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
a45d7127 1443 exit(EXIT_FAILURE);
6808a0bc
LP
1444#endif
1445}
1446
4a36297c
LP
1447static int become_shutdown(
1448 const char *shutdown_verb,
7eb35049 1449 int retval) {
4a36297c
LP
1450
1451 char log_level[DECIMAL_STR_MAX(int) + 1],
e73c54b8
JK
1452 exit_code[DECIMAL_STR_MAX(uint8_t) + 1],
1453 timeout[DECIMAL_STR_MAX(usec_t) + 1];
4a36297c 1454
e73c54b8 1455 const char* command_line[13] = {
4a36297c
LP
1456 SYSTEMD_SHUTDOWN_BINARY_PATH,
1457 shutdown_verb,
e73c54b8 1458 "--timeout", timeout,
4a36297c
LP
1459 "--log-level", log_level,
1460 "--log-target",
1461 };
1462
1463 _cleanup_strv_free_ char **env_block = NULL;
e73c54b8 1464 size_t pos = 7;
4a36297c 1465 int r;
acafd7d8 1466 usec_t watchdog_timer = 0;
4a36297c 1467
7eb35049 1468 assert(shutdown_verb);
234519ae 1469 assert(!command_line[pos]);
4a36297c
LP
1470 env_block = strv_copy(environ);
1471
1472 xsprintf(log_level, "%d", log_get_max_level());
e73c54b8 1473 xsprintf(timeout, "%" PRI_USEC "us", arg_default_timeout_stop_usec);
4a36297c
LP
1474
1475 switch (log_get_target()) {
1476
1477 case LOG_TARGET_KMSG:
1478 case LOG_TARGET_JOURNAL_OR_KMSG:
1479 case LOG_TARGET_SYSLOG_OR_KMSG:
1480 command_line[pos++] = "kmsg";
1481 break;
1482
1483 case LOG_TARGET_NULL:
1484 command_line[pos++] = "null";
1485 break;
1486
1487 case LOG_TARGET_CONSOLE:
1488 default:
1489 command_line[pos++] = "console";
1490 break;
1491 };
1492
1493 if (log_get_show_color())
1494 command_line[pos++] = "--log-color";
1495
1496 if (log_get_show_location())
1497 command_line[pos++] = "--log-location";
1498
c5673ed0
DS
1499 if (log_get_show_time())
1500 command_line[pos++] = "--log-time";
1501
4a36297c
LP
1502 if (streq(shutdown_verb, "exit")) {
1503 command_line[pos++] = "--exit-code";
1504 command_line[pos++] = exit_code;
1505 xsprintf(exit_code, "%d", retval);
1506 }
1507
1508 assert(pos < ELEMENTSOF(command_line));
1509
acafd7d8 1510 if (streq(shutdown_verb, "reboot"))
65224c1d 1511 watchdog_timer = arg_reboot_watchdog;
acafd7d8
LB
1512 else if (streq(shutdown_verb, "kexec"))
1513 watchdog_timer = arg_kexec_watchdog;
1514
1515 if (watchdog_timer > 0 && watchdog_timer != USEC_INFINITY) {
7eb35049 1516
4a36297c
LP
1517 char *e;
1518
acafd7d8 1519 /* If we reboot or kexec let's set the shutdown
4a36297c
LP
1520 * watchdog and tell the shutdown binary to
1521 * repeatedly ping it */
acafd7d8 1522 r = watchdog_set_timeout(&watchdog_timer);
4a36297c
LP
1523 watchdog_close(r < 0);
1524
1525 /* Tell the binary how often to ping, ignore failure */
acafd7d8 1526 if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, watchdog_timer) > 0)
8a2c1fbf
EJ
1527 (void) strv_consume(&env_block, e);
1528
1529 if (arg_watchdog_device &&
1530 asprintf(&e, "WATCHDOG_DEVICE=%s", arg_watchdog_device) > 0)
1531 (void) strv_consume(&env_block, e);
4a36297c
LP
1532 } else
1533 watchdog_close(true);
1534
1535 /* Avoid the creation of new processes forked by the
1536 * kernel; at this point, we will not listen to the
1537 * signals anyway */
1538 if (detect_container() <= 0)
1539 (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1540
1541 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1542 return -errno;
1543}
1544
e839bafd
LP
1545static void initialize_clock(void) {
1546 int r;
1547
3753325b
LP
1548 /* This is called very early on, before we parse the kernel command line or otherwise figure out why
1549 * we are running, but only once. */
1550
e839bafd
LP
1551 if (clock_is_localtime(NULL) > 0) {
1552 int min;
1553
1554 /*
1555 * The very first call of settimeofday() also does a time warp in the kernel.
1556 *
1557 * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to take care
1558 * of maintaining the RTC and do all adjustments. This matches the behavior of Windows, which leaves
1559 * the RTC alone if the registry tells that the RTC runs in UTC.
1560 */
1561 r = clock_set_timezone(&min);
1562 if (r < 0)
1563 log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
1564 else
1565 log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1566
d46b79bb 1567 } else if (!in_initrd())
e839bafd
LP
1568 /*
1569 * Do a dummy very first call to seal the kernel's time warp magic.
1570 *
1571 * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with LOCAL, but the
1572 * real system could be set up that way. In such case, we need to delay the time-warp or the sealing
1573 * until we reach the real system.
1574 *
1575 * Do no set the kernel's timezone. The concept of local time cannot be supported reliably, the time
1576 * will jump or be incorrect at every daylight saving time change. All kernel local time concepts will
1577 * be treated as UTC that way.
1578 */
1579 (void) clock_reset_timewarp();
e839bafd
LP
1580
1581 r = clock_apply_epoch();
1582 if (r < 0)
1583 log_error_errno(r, "Current system time is before build time, but cannot correct: %m");
1584 else if (r > 0)
1585 log_info("System time before build time, advancing clock.");
1586}
1587
3753325b
LP
1588static void apply_clock_update(void) {
1589 struct timespec ts;
1590
1591 /* This is called later than initialize_clock(), i.e. after we parsed configuration files/kernel
1592 * command line and such. */
1593
1594 if (arg_clock_usec == 0)
1595 return;
1596
45250e66
LP
1597 if (getpid_cached() != 1)
1598 return;
1599
3753325b
LP
1600 if (clock_settime(CLOCK_REALTIME, timespec_store(&ts, arg_clock_usec)) < 0)
1601 log_error_errno(errno, "Failed to set system clock to time specified on kernel command line: %m");
1602 else {
1603 char buf[FORMAT_TIMESTAMP_MAX];
1604
1605 log_info("Set system clock to %s, as specified on the kernel command line.",
1606 format_timestamp(buf, sizeof(buf), arg_clock_usec));
1607 }
1608}
1609
d247f232 1610static void cmdline_take_random_seed(void) {
d247f232
LP
1611 size_t suggested;
1612 int r;
1613
1614 if (arg_random_seed_size == 0)
1615 return;
1616
1617 if (getpid_cached() != 1)
1618 return;
1619
1620 assert(arg_random_seed);
1621 suggested = random_pool_size();
1622
1623 if (arg_random_seed_size < suggested)
1624 log_warning("Random seed specified on kernel command line has size %zu, but %zu bytes required to fill entropy pool.",
1625 arg_random_seed_size, suggested);
1626
61bd7d1e 1627 r = random_write_entropy(-1, arg_random_seed, arg_random_seed_size, true);
d247f232
LP
1628 if (r < 0) {
1629 log_warning_errno(r, "Failed to credit entropy specified on kernel command line, ignoring: %m");
1630 return;
1631 }
1632
1633 log_notice("Successfully credited entropy passed on kernel command line.\n"
1634 "Note that the seed provided this way is accessible to unprivileged programs. This functionality should not be used outside of testing environments.");
1635}
1636
1e41242e 1637static void initialize_coredump(bool skip_setup) {
752bcb77 1638#if ENABLE_COREDUMP
1e41242e
LP
1639 if (getpid_cached() != 1)
1640 return;
1641
1642 /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
1643 * will process core dumps for system services by default. */
1644 if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
1645 log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
1646
c6885f5f
FB
1647 /* But at the same time, turn off the core_pattern logic by default, so that no
1648 * coredumps are stored until the systemd-coredump tool is enabled via
1649 * sysctl. However it can be changed via the kernel command line later so core
1650 * dumps can still be generated during early startup and in initramfs. */
1e41242e 1651 if (!skip_setup)
e557b1a6 1652 disable_coredumps();
752bcb77 1653#endif
1e41242e
LP
1654}
1655
c6885f5f
FB
1656static void initialize_core_pattern(bool skip_setup) {
1657 int r;
1658
1659 if (skip_setup || !arg_early_core_pattern)
1660 return;
1661
1662 if (getpid_cached() != 1)
1663 return;
1664
57512c89 1665 r = write_string_file("/proc/sys/kernel/core_pattern", arg_early_core_pattern, WRITE_STRING_FILE_DISABLE_BUFFER);
c6885f5f
FB
1666 if (r < 0)
1667 log_warning_errno(r, "Failed to write '%s' to /proc/sys/kernel/core_pattern, ignoring: %m", arg_early_core_pattern);
1668}
1669
61fbbac1
ZJS
1670static void update_cpu_affinity(bool skip_setup) {
1671 _cleanup_free_ char *mask = NULL;
1672
1673 if (skip_setup || !arg_cpu_affinity.set)
1674 return;
1675
1676 assert(arg_cpu_affinity.allocated > 0);
1677
1678 mask = cpu_set_to_string(&arg_cpu_affinity);
1679 log_debug("Setting CPU affinity to %s.", strnull(mask));
1680
1681 if (sched_setaffinity(0, arg_cpu_affinity.allocated, arg_cpu_affinity.set) < 0)
1682 log_warning_errno(errno, "Failed to set CPU affinity: %m");
1683}
1684
b070c7c0
MS
1685static void update_numa_policy(bool skip_setup) {
1686 int r;
1687 _cleanup_free_ char *nodes = NULL;
1688 const char * policy = NULL;
1689
1690 if (skip_setup || !mpol_is_valid(numa_policy_get_type(&arg_numa_policy)))
1691 return;
1692
1693 if (DEBUG_LOGGING) {
1694 policy = mpol_to_string(numa_policy_get_type(&arg_numa_policy));
1695 nodes = cpu_set_to_range_string(&arg_numa_policy.nodes);
1696 log_debug("Setting NUMA policy to %s, with nodes %s.", strnull(policy), strnull(nodes));
1697 }
1698
1699 r = apply_numa_policy(&arg_numa_policy);
1700 if (r == -EOPNOTSUPP)
1701 log_debug_errno(r, "NUMA support not available, ignoring.");
1702 else if (r < 0)
1703 log_warning_errno(r, "Failed to set NUMA memory policy: %m");
1704}
1705
3c7878f9
LP
1706static void do_reexecute(
1707 int argc,
1708 char *argv[],
1709 const struct rlimit *saved_rlimit_nofile,
1710 const struct rlimit *saved_rlimit_memlock,
1711 FDSet *fds,
1712 const char *switch_root_dir,
1713 const char *switch_root_init,
1714 const char **ret_error_message) {
1715
1716 unsigned i, j, args_size;
1717 const char **args;
1718 int r;
1719
1720 assert(saved_rlimit_nofile);
1721 assert(saved_rlimit_memlock);
1722 assert(ret_error_message);
1723
1724 /* Close and disarm the watchdog, so that the new instance can reinitialize it, but doesn't get rebooted while
1725 * we do that */
1726 watchdog_close(true);
1727
ddfa8b0b
LP
1728 /* Reset RLIMIT_NOFILE + RLIMIT_MEMLOCK back to the kernel defaults, so that the new systemd can pass
1729 * the kernel default to its child processes */
1730 if (saved_rlimit_nofile->rlim_cur != 0)
3c7878f9 1731 (void) setrlimit(RLIMIT_NOFILE, saved_rlimit_nofile);
ddfa8b0b 1732 if (saved_rlimit_memlock->rlim_cur != RLIM_INFINITY)
3c7878f9
LP
1733 (void) setrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock);
1734
1735 if (switch_root_dir) {
1736 /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
1737 * SIGCHLD for them after deserializing. */
e73c54b8 1738 broadcast_signal(SIGTERM, false, true, arg_default_timeout_stop_usec);
3c7878f9
LP
1739
1740 /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
1741 r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE);
1742 if (r < 0)
1743 log_error_errno(r, "Failed to switch root, trying to continue: %m");
1744 }
1745
1746 args_size = MAX(6, argc+1);
1747 args = newa(const char*, args_size);
1748
1749 if (!switch_root_init) {
1750 char sfd[DECIMAL_STR_MAX(int) + 1];
1751
1752 /* First try to spawn ourselves with the right path, and with full serialization. We do this only if
1753 * the user didn't specify an explicit init to spawn. */
1754
1755 assert(arg_serialization);
1756 assert(fds);
1757
1758 xsprintf(sfd, "%i", fileno(arg_serialization));
1759
1760 i = 0;
1761 args[i++] = SYSTEMD_BINARY_PATH;
1762 if (switch_root_dir)
1763 args[i++] = "--switched-root";
1764 args[i++] = arg_system ? "--system" : "--user";
1765 args[i++] = "--deserialize";
1766 args[i++] = sfd;
1767 args[i++] = NULL;
1768
1769 assert(i <= args_size);
1770
1771 /*
1772 * We want valgrind to print its memory usage summary before reexecution. Valgrind won't do this is on
1773 * its own on exec(), but it will do it on exit(). Hence, to ensure we get a summary here, fork() off
1774 * a child, let it exit() cleanly, so that it prints the summary, and wait() for it in the parent,
1775 * before proceeding into the exec().
1776 */
1777 valgrind_summary_hack();
1778
1779 (void) execv(args[0], (char* const*) args);
1780 log_debug_errno(errno, "Failed to execute our own binary, trying fallback: %m");
1781 }
1782
1783 /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and envp[]. (Well,
1784 * modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[], but let's hope that
1785 * doesn't matter.) */
1786
1787 arg_serialization = safe_fclose(arg_serialization);
1788 fds = fdset_free(fds);
1789
1790 /* Reopen the console */
1791 (void) make_console_stdio();
1792
1793 for (j = 1, i = 1; j < (unsigned) argc; j++)
1794 args[i++] = argv[j];
1795 args[i++] = NULL;
1796 assert(i <= args_size);
1797
5238e957 1798 /* Re-enable any blocked signals, especially important if we switch from initial ramdisk to init=... */
3c7878f9
LP
1799 (void) reset_all_signal_handlers();
1800 (void) reset_signal_mask();
595225af 1801 (void) rlimit_nofile_safe();
3c7878f9
LP
1802
1803 if (switch_root_init) {
1804 args[0] = switch_root_init;
a5cede8c 1805 (void) execve(args[0], (char* const*) args, saved_env);
3c7878f9
LP
1806 log_warning_errno(errno, "Failed to execute configured init, trying fallback: %m");
1807 }
1808
1809 args[0] = "/sbin/init";
1810 (void) execv(args[0], (char* const*) args);
1811 r = -errno;
1812
1813 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
1814 ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
1815 "Failed to execute /sbin/init");
1816
1817 if (r == -ENOENT) {
1818 log_warning("No /sbin/init, trying fallback");
1819
1820 args[0] = "/bin/sh";
1821 args[1] = NULL;
a5cede8c 1822 (void) execve(args[0], (char* const*) args, saved_env);
3c7878f9
LP
1823 log_error_errno(errno, "Failed to execute /bin/sh, giving up: %m");
1824 } else
1825 log_warning_errno(r, "Failed to execute /sbin/init, giving up: %m");
1826
1827 *ret_error_message = "Failed to execute fallback shell";
1828}
1829
7eb35049
LP
1830static int invoke_main_loop(
1831 Manager *m,
a9fd4cd1
FB
1832 const struct rlimit *saved_rlimit_nofile,
1833 const struct rlimit *saved_rlimit_memlock,
7eb35049
LP
1834 bool *ret_reexecute,
1835 int *ret_retval, /* Return parameters relevant for shutting down */
1836 const char **ret_shutdown_verb, /* … */
1837 FDSet **ret_fds, /* Return parameters for reexecuting */
1838 char **ret_switch_root_dir, /* … */
1839 char **ret_switch_root_init, /* … */
1840 const char **ret_error_message) {
1841
1842 int r;
1843
1844 assert(m);
a9fd4cd1
FB
1845 assert(saved_rlimit_nofile);
1846 assert(saved_rlimit_memlock);
7eb35049
LP
1847 assert(ret_reexecute);
1848 assert(ret_retval);
1849 assert(ret_shutdown_verb);
1850 assert(ret_fds);
1851 assert(ret_switch_root_dir);
1852 assert(ret_switch_root_init);
1853 assert(ret_error_message);
1854
1855 for (;;) {
1856 r = manager_loop(m);
1857 if (r < 0) {
1858 *ret_error_message = "Failed to run main loop";
1859 return log_emergency_errno(r, "Failed to run main loop: %m");
1860 }
1861
3ca4d0b3 1862 switch ((ManagerObjective) r) {
7eb35049 1863
a6ecbf83 1864 case MANAGER_RELOAD: {
bda7d78b 1865 LogTarget saved_log_target;
a6ecbf83
FB
1866 int saved_log_level;
1867
7eb35049
LP
1868 log_info("Reloading.");
1869
3fe91079 1870 /* First, save any overridden log level/target, then parse the configuration file, which might
bda7d78b
FB
1871 * change the log level to new settings. */
1872
a6ecbf83 1873 saved_log_level = m->log_level_overridden ? log_get_max_level() : -1;
bda7d78b 1874 saved_log_target = m->log_target_overridden ? log_get_target() : _LOG_TARGET_INVALID;
a6ecbf83 1875
a9fd4cd1 1876 (void) parse_configuration(saved_rlimit_nofile, saved_rlimit_memlock);
7eb35049
LP
1877
1878 set_manager_defaults(m);
986935cf 1879 set_manager_settings(m);
7eb35049 1880
61fbbac1 1881 update_cpu_affinity(false);
b070c7c0 1882 update_numa_policy(false);
61fbbac1 1883
a6ecbf83
FB
1884 if (saved_log_level >= 0)
1885 manager_override_log_level(m, saved_log_level);
bda7d78b
FB
1886 if (saved_log_target >= 0)
1887 manager_override_log_target(m, saved_log_target);
a6ecbf83 1888
7eb35049
LP
1889 r = manager_reload(m);
1890 if (r < 0)
7a35fa24
LP
1891 /* Reloading failed before the point of no return. Let's continue running as if nothing happened. */
1892 m->objective = MANAGER_OK;
7eb35049
LP
1893
1894 break;
a6ecbf83 1895 }
7eb35049
LP
1896
1897 case MANAGER_REEXECUTE:
1898
1899 r = prepare_reexecute(m, &arg_serialization, ret_fds, false);
1900 if (r < 0) {
1901 *ret_error_message = "Failed to prepare for reexecution";
1902 return r;
1903 }
1904
1905 log_notice("Reexecuting.");
1906
1907 *ret_reexecute = true;
1908 *ret_retval = EXIT_SUCCESS;
1909 *ret_shutdown_verb = NULL;
1910 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1911
1912 return 0;
1913
1914 case MANAGER_SWITCH_ROOT:
1915 if (!m->switch_root_init) {
1916 r = prepare_reexecute(m, &arg_serialization, ret_fds, true);
1917 if (r < 0) {
1918 *ret_error_message = "Failed to prepare for reexecution";
1919 return r;
1920 }
1921 } else
1922 *ret_fds = NULL;
1923
1924 log_notice("Switching root.");
1925
1926 *ret_reexecute = true;
1927 *ret_retval = EXIT_SUCCESS;
1928 *ret_shutdown_verb = NULL;
1929
1930 /* Steal the switch root parameters */
49052946
YW
1931 *ret_switch_root_dir = TAKE_PTR(m->switch_root);
1932 *ret_switch_root_init = TAKE_PTR(m->switch_root_init);
7eb35049
LP
1933
1934 return 0;
1935
1936 case MANAGER_EXIT:
1937
1938 if (MANAGER_IS_USER(m)) {
1939 log_debug("Exit.");
1940
1941 *ret_reexecute = false;
1942 *ret_retval = m->return_value;
1943 *ret_shutdown_verb = NULL;
1944 *ret_fds = NULL;
1945 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1946
1947 return 0;
1948 }
1949
1950 _fallthrough_;
1951 case MANAGER_REBOOT:
1952 case MANAGER_POWEROFF:
1953 case MANAGER_HALT:
1954 case MANAGER_KEXEC: {
af41e508
LP
1955 static const char * const table[_MANAGER_OBJECTIVE_MAX] = {
1956 [MANAGER_EXIT] = "exit",
1957 [MANAGER_REBOOT] = "reboot",
7eb35049 1958 [MANAGER_POWEROFF] = "poweroff",
af41e508
LP
1959 [MANAGER_HALT] = "halt",
1960 [MANAGER_KEXEC] = "kexec",
7eb35049
LP
1961 };
1962
1963 log_notice("Shutting down.");
1964
1965 *ret_reexecute = false;
1966 *ret_retval = m->return_value;
af41e508 1967 assert_se(*ret_shutdown_verb = table[m->objective]);
7eb35049
LP
1968 *ret_fds = NULL;
1969 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1970
1971 return 0;
1972 }
1973
1974 default:
af41e508 1975 assert_not_reached("Unknown or unexpected manager objective.");
7eb35049
LP
1976 }
1977 }
1978}
1979
31aef7ff
LP
1980static void log_execution_mode(bool *ret_first_boot) {
1981 assert(ret_first_boot);
1982
1983 if (arg_system) {
1984 int v;
1985
91b79ba8
ZJS
1986 log_info("systemd " GIT_VERSION " running in %ssystem mode. (%s)",
1987 arg_action == ACTION_TEST ? "test " : "",
1988 systemd_features);
31aef7ff
LP
1989
1990 v = detect_virtualization();
1991 if (v > 0)
1992 log_info("Detected virtualization %s.", virtualization_to_string(v));
1993
1994 log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
1995
1996 if (in_initrd()) {
1997 *ret_first_boot = false;
1998 log_info("Running in initial RAM disk.");
1999 } else {
583cef3b
HS
2000 int r;
2001 _cleanup_free_ char *id_text = NULL;
2002
2003 /* Let's check whether we are in first boot. We use /etc/machine-id as flag file
2004 * for this: If it is missing or contains the value "uninitialized", this is the
2005 * first boot. In any other case, it is not. This allows container managers and
2006 * installers to provision a couple of files already. If the container manager
2007 * wants to provision the machine ID itself it should pass $container_uuid to PID 1. */
2008
2009 r = read_one_line_file("/etc/machine-id", &id_text);
2010 if (r < 0 || streq(id_text, "uninitialized")) {
2011 if (r < 0 && r != -ENOENT)
2012 log_warning_errno(r, "Unexpected error while reading /etc/machine-id, ignoring: %m");
2013
2014 *ret_first_boot = true;
2015 log_info("Detected first boot.");
2016 } else {
2017 *ret_first_boot = false;
2018 log_debug("Detected initialized system, this is not the first boot.");
2019 }
31aef7ff
LP
2020 }
2021 } else {
b9e90f3a
LP
2022 if (DEBUG_LOGGING) {
2023 _cleanup_free_ char *t;
31aef7ff 2024
b9e90f3a 2025 t = uid_to_name(getuid());
91b79ba8
ZJS
2026 log_debug("systemd " GIT_VERSION " running in %suser mode for user " UID_FMT "/%s. (%s)",
2027 arg_action == ACTION_TEST ? " test" : "",
2028 getuid(), strna(t), systemd_features);
b9e90f3a 2029 }
31aef7ff
LP
2030
2031 *ret_first_boot = false;
2032 }
2033}
2034
5afbaa36
LP
2035static int initialize_runtime(
2036 bool skip_setup,
3023f2fe 2037 bool first_boot,
5afbaa36
LP
2038 struct rlimit *saved_rlimit_nofile,
2039 struct rlimit *saved_rlimit_memlock,
2040 const char **ret_error_message) {
5afbaa36
LP
2041 int r;
2042
2043 assert(ret_error_message);
2044
2045 /* Sets up various runtime parameters. Many of these initializations are conditionalized:
2046 *
2047 * - Some only apply to --system instances
2048 * - Some only apply to --user instances
2049 * - Some only apply when we first start up, but not when we reexecute
2050 */
2051
2d776038
LP
2052 if (arg_action != ACTION_RUN)
2053 return 0;
2054
61fbbac1 2055 update_cpu_affinity(skip_setup);
b070c7c0 2056 update_numa_policy(skip_setup);
61fbbac1 2057
3c3c6cb9 2058 if (arg_system) {
5238e957 2059 /* Make sure we leave a core dump without panicking the kernel. */
3c3c6cb9 2060 install_crash_handler();
5afbaa36 2061
3c3c6cb9 2062 if (!skip_setup) {
143fadf3 2063 r = mount_cgroup_controllers();
3c3c6cb9
LP
2064 if (r < 0) {
2065 *ret_error_message = "Failed to mount cgroup hierarchies";
2066 return r;
2067 }
2068
2069 status_welcome();
b6fad306 2070 (void) hostname_setup(true);
3023f2fe
HS
2071 /* Force transient machine-id on first boot. */
2072 machine_id_setup(NULL, first_boot, arg_machine_id, NULL);
df883de9 2073 (void) loopback_setup();
3c3c6cb9 2074 bump_unix_max_dgram_qlen();
a8b627aa 2075 bump_file_max_and_nr_open();
3c3c6cb9
LP
2076 test_usr();
2077 write_container_id();
2078 }
8a2c1fbf 2079
3c3c6cb9
LP
2080 if (arg_watchdog_device) {
2081 r = watchdog_set_device(arg_watchdog_device);
2082 if (r < 0)
2083 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m", arg_watchdog_device);
2084 }
32429805
LP
2085 } else {
2086 _cleanup_free_ char *p = NULL;
2087
2088 /* Create the runtime directory and place the inaccessible device nodes there, if we run in
2089 * user mode. In system mode mount_setup() already did that. */
2090
2091 r = xdg_user_runtime_dir(&p, "/systemd");
2092 if (r < 0) {
2093 *ret_error_message = "$XDG_RUNTIME_DIR is not set";
2094 return log_emergency_errno(r, "Failed to determine $XDG_RUNTIME_DIR path: %m");
2095 }
2096
e813a74a 2097 (void) mkdir_p_label(p, 0755);
32429805 2098 (void) make_inaccessible_nodes(p, UID_INVALID, GID_INVALID);
3c3c6cb9 2099 }
5afbaa36
LP
2100
2101 if (arg_timer_slack_nsec != NSEC_INFINITY)
2102 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
3a671cd1 2103 log_warning_errno(errno, "Failed to adjust timer slack, ignoring: %m");
5afbaa36
LP
2104
2105 if (arg_system && !cap_test_all(arg_capability_bounding_set)) {
2106 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set);
2107 if (r < 0) {
2108 *ret_error_message = "Failed to drop capability bounding set of usermode helpers";
2109 return log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
2110 }
2111
2112 r = capability_bounding_set_drop(arg_capability_bounding_set, true);
2113 if (r < 0) {
2114 *ret_error_message = "Failed to drop capability bounding set";
2115 return log_emergency_errno(r, "Failed to drop capability bounding set: %m");
2116 }
2117 }
2118
39362f6f
JB
2119 if (arg_system && arg_no_new_privs) {
2120 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
2121 *ret_error_message = "Failed to disable new privileges";
2122 return log_emergency_errno(errno, "Failed to disable new privileges: %m");
2123 }
2124 }
2125
5afbaa36
LP
2126 if (arg_syscall_archs) {
2127 r = enforce_syscall_archs(arg_syscall_archs);
2128 if (r < 0) {
2129 *ret_error_message = "Failed to set syscall architectures";
2130 return r;
2131 }
2132 }
2133
2134 if (!arg_system)
2135 /* Become reaper of our children */
2136 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
2137 log_warning_errno(errno, "Failed to make us a subreaper: %m");
2138
a17c1712
LP
2139 /* Bump up RLIMIT_NOFILE for systemd itself */
2140 (void) bump_rlimit_nofile(saved_rlimit_nofile);
2141 (void) bump_rlimit_memlock(saved_rlimit_memlock);
5afbaa36
LP
2142
2143 return 0;
2144}
2145
6acca5fc
LP
2146static int do_queue_default_job(
2147 Manager *m,
2148 const char **ret_error_message) {
2149
2150 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
f1d075dc
ZJS
2151 const char *unit;
2152 Job *job;
2153 Unit *target;
6acca5fc
LP
2154 int r;
2155
8755dbad 2156 if (arg_default_unit)
f1d075dc 2157 unit = arg_default_unit;
8755dbad 2158 else if (in_initrd())
f1d075dc 2159 unit = SPECIAL_INITRD_TARGET;
8755dbad 2160 else
f1d075dc 2161 unit = SPECIAL_DEFAULT_TARGET;
8755dbad 2162
f1d075dc 2163 log_debug("Activating default unit: %s", unit);
8755dbad 2164
f1d075dc 2165 r = manager_load_startable_unit_or_warn(m, unit, NULL, &target);
8755dbad
ZJS
2166 if (r < 0 && in_initrd() && !arg_default_unit) {
2167 /* Fall back to default.target, which we used to always use by default. Only do this if no
2168 * explicit configuration was given. */
2169
2170 log_info("Falling back to " SPECIAL_DEFAULT_TARGET ".");
6acca5fc 2171
8755dbad
ZJS
2172 r = manager_load_startable_unit_or_warn(m, SPECIAL_DEFAULT_TARGET, NULL, &target);
2173 }
4109ede7 2174 if (r < 0) {
8755dbad 2175 log_info("Falling back to " SPECIAL_RESCUE_TARGET ".");
6acca5fc 2176
4109ede7 2177 r = manager_load_startable_unit_or_warn(m, SPECIAL_RESCUE_TARGET, NULL, &target);
6acca5fc 2178 if (r < 0) {
8755dbad
ZJS
2179 *ret_error_message = r == -ERFKILL ? SPECIAL_RESCUE_TARGET " masked"
2180 : "Failed to load " SPECIAL_RESCUE_TARGET;
4109ede7 2181 return r;
6acca5fc
LP
2182 }
2183 }
2184
2185 assert(target->load_state == UNIT_LOADED);
2186
f1d075dc 2187 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, NULL, &error, &job);
6acca5fc
LP
2188 if (r == -EPERM) {
2189 log_debug_errno(r, "Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
2190
2191 sd_bus_error_free(&error);
2192
f1d075dc 2193 r = manager_add_job(m, JOB_START, target, JOB_REPLACE, NULL, &error, &job);
6acca5fc
LP
2194 if (r < 0) {
2195 *ret_error_message = "Failed to start default target";
2196 return log_emergency_errno(r, "Failed to start default target: %s", bus_error_message(&error, r));
2197 }
2198
2199 } else if (r < 0) {
2200 *ret_error_message = "Failed to isolate default target";
2201 return log_emergency_errno(r, "Failed to isolate default target: %s", bus_error_message(&error, r));
c86c31d9
ZJS
2202 } else
2203 log_info("Queued %s job for default target %s.",
2204 job_type_to_string(job->type),
2205 unit_status_string(job->unit));
6acca5fc 2206
f1d075dc 2207 m->default_unit_job_id = job->id;
6acca5fc
LP
2208
2209 return 0;
2210}
2211
a9fd4cd1
FB
2212static void save_rlimits(struct rlimit *saved_rlimit_nofile,
2213 struct rlimit *saved_rlimit_memlock) {
2214
2215 assert(saved_rlimit_nofile);
2216 assert(saved_rlimit_memlock);
2217
2218 if (getrlimit(RLIMIT_NOFILE, saved_rlimit_nofile) < 0)
2219 log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
2220
2221 if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock) < 0)
2222 log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
2223}
2224
2225static void fallback_rlimit_nofile(const struct rlimit *saved_rlimit_nofile) {
2226 struct rlimit *rl;
2227
2228 if (arg_default_rlimit[RLIMIT_NOFILE])
2229 return;
2230
2231 /* Make sure forked processes get limits based on the original kernel setting */
2232
2233 rl = newdup(struct rlimit, saved_rlimit_nofile, 1);
2234 if (!rl) {
2235 log_oom();
2236 return;
2237 }
2238
2239 /* Bump the hard limit for system services to a substantially higher value. The default
2240 * hard limit current kernels set is pretty low (4K), mostly for historical
2241 * reasons. According to kernel developers, the fd handling in recent kernels has been
2242 * optimized substantially enough, so that we can bump the limit now, without paying too
2243 * high a price in memory or performance. Note however that we only bump the hard limit,
2244 * not the soft limit. That's because select() works the way it works, and chokes on fds
2245 * >= 1024. If we'd bump the soft limit globally, it might accidentally happen to
2246 * unexpecting programs that they get fds higher than what they can process using
2247 * select(). By only bumping the hard limit but leaving the low limit as it is we avoid
2248 * this pitfall: programs that are written by folks aware of the select() problem in mind
2249 * (and thus use poll()/epoll instead of select(), the way everybody should) can
2250 * explicitly opt into high fds by bumping their soft limit beyond 1024, to the hard limit
2251 * we pass. */
2252 if (arg_system) {
2253 int nr;
2254
2255 /* Get the underlying absolute limit the kernel enforces */
2256 nr = read_nr_open();
2257
2258 rl->rlim_max = MIN((rlim_t) nr, MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE));
2259 }
2260
2261 /* If for some reason we were invoked with a soft limit above 1024 (which should never
2262 * happen!, but who knows what we get passed in from pam_limit when invoked as --user
2263 * instance), then lower what we pass on to not confuse our children */
2264 rl->rlim_cur = MIN(rl->rlim_cur, (rlim_t) FD_SETSIZE);
2265
2266 arg_default_rlimit[RLIMIT_NOFILE] = rl;
2267}
2268
2269static void fallback_rlimit_memlock(const struct rlimit *saved_rlimit_memlock) {
2270 struct rlimit *rl;
2271
2272 /* Pass the original value down to invoked processes */
2273
2274 if (arg_default_rlimit[RLIMIT_MEMLOCK])
2275 return;
2276
2277 rl = newdup(struct rlimit, saved_rlimit_memlock, 1);
2278 if (!rl) {
2279 log_oom();
2280 return;
2281 }
2282
2283 arg_default_rlimit[RLIMIT_MEMLOCK] = rl;
2284}
2285
fb39af4c
ZJS
2286static void reset_arguments(void) {
2287 /* Frees/resets arg_* variables, with a few exceptions commented below. */
970777b5
LP
2288
2289 arg_default_unit = mfree(arg_default_unit);
fb39af4c
ZJS
2290
2291 /* arg_system — ignore */
2292
2293 arg_dump_core = true;
2294 arg_crash_chvt = -1;
2295 arg_crash_shell = false;
2296 arg_crash_reboot = false;
970777b5 2297 arg_confirm_spawn = mfree(arg_confirm_spawn);
fb39af4c 2298 arg_show_status = _SHOW_STATUS_INVALID;
36cf4507 2299 arg_status_unit_format = STATUS_UNIT_FORMAT_DEFAULT;
fb39af4c
ZJS
2300 arg_switched_root = false;
2301 arg_pager_flags = 0;
2302 arg_service_watchdogs = true;
2303 arg_default_std_output = EXEC_OUTPUT_JOURNAL;
2304 arg_default_std_error = EXEC_OUTPUT_INHERIT;
2305 arg_default_restart_usec = DEFAULT_RESTART_USEC;
2306 arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
2307 arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
2308 arg_default_timeout_abort_usec = DEFAULT_TIMEOUT_USEC;
2309 arg_default_timeout_abort_set = false;
2310 arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
2311 arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
2312 arg_runtime_watchdog = 0;
65224c1d 2313 arg_reboot_watchdog = 10 * USEC_PER_MINUTE;
acafd7d8 2314 arg_kexec_watchdog = 0;
fb39af4c
ZJS
2315 arg_early_core_pattern = NULL;
2316 arg_watchdog_device = NULL;
2317
970777b5 2318 arg_default_environment = strv_free(arg_default_environment);
fb39af4c
ZJS
2319 rlimit_free_all(arg_default_rlimit);
2320
2321 arg_capability_bounding_set = CAP_ALL;
2322 arg_no_new_privs = false;
2323 arg_timer_slack_nsec = NSEC_INFINITY;
2324 arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
2325
970777b5 2326 arg_syscall_archs = set_free(arg_syscall_archs);
61fbbac1 2327
fb39af4c
ZJS
2328 /* arg_serialization — ignore */
2329
2330 arg_default_cpu_accounting = -1;
2331 arg_default_io_accounting = false;
2332 arg_default_ip_accounting = false;
2333 arg_default_blockio_accounting = false;
2334 arg_default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
2335 arg_default_tasks_accounting = true;
3a0f06c4 2336 arg_default_tasks_max = DEFAULT_TASKS_MAX;
fb39af4c
ZJS
2337 arg_machine_id = (sd_id128_t) {};
2338 arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
2339 arg_default_oom_policy = OOM_STOP;
2340
61fbbac1 2341 cpu_set_reset(&arg_cpu_affinity);
b070c7c0 2342 numa_policy_reset(&arg_numa_policy);
d247f232
LP
2343
2344 arg_random_seed = mfree(arg_random_seed);
2345 arg_random_seed_size = 0;
33d943d1 2346 arg_clock_usec = 0;
970777b5
LP
2347}
2348
a9fd4cd1
FB
2349static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
2350 const struct rlimit *saved_rlimit_memlock) {
97d1fb94
LP
2351 int r;
2352
a9fd4cd1
FB
2353 assert(saved_rlimit_nofile);
2354 assert(saved_rlimit_memlock);
2355
fb39af4c
ZJS
2356 /* Assign configuration defaults */
2357 reset_arguments();
2358
97d1fb94 2359 r = parse_config_file();
470a5e6d
ZJS
2360 if (r < 0)
2361 log_warning_errno(r, "Failed to parse config file, ignoring: %m");
97d1fb94
LP
2362
2363 if (arg_system) {
2364 r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
2365 if (r < 0)
2366 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
2367 }
2368
a9fd4cd1
FB
2369 /* Initialize some default rlimits for services if they haven't been configured */
2370 fallback_rlimit_nofile(saved_rlimit_nofile);
2371 fallback_rlimit_memlock(saved_rlimit_memlock);
2372
97d1fb94
LP
2373 /* Note that this also parses bits from the kernel command line, including "debug". */
2374 log_parse_environment();
2375
db33214b 2376 /* Initialize the show status setting if it hasn't been set explicitly yet */
7a293242 2377 if (arg_show_status == _SHOW_STATUS_INVALID)
db33214b
LP
2378 arg_show_status = SHOW_STATUS_YES;
2379
97d1fb94
LP
2380 return 0;
2381}
2382
b0d7c989
LP
2383static int safety_checks(void) {
2384
febf46a4 2385 if (getpid_cached() == 1 &&
baaa35ad
ZJS
2386 arg_action != ACTION_RUN)
2387 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2388 "Unsupported execution mode while PID 1.");
febf46a4
LP
2389
2390 if (getpid_cached() == 1 &&
baaa35ad
ZJS
2391 !arg_system)
2392 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2393 "Can't run --user mode as PID 1.");
febf46a4
LP
2394
2395 if (arg_action == ACTION_RUN &&
2396 arg_system &&
baaa35ad
ZJS
2397 getpid_cached() != 1)
2398 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2399 "Can't run system mode unless PID 1.");
febf46a4 2400
b0d7c989 2401 if (arg_action == ACTION_TEST &&
baaa35ad
ZJS
2402 geteuid() == 0)
2403 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2404 "Don't run test mode as root.");
b0d7c989
LP
2405
2406 if (!arg_system &&
2407 arg_action == ACTION_RUN &&
baaa35ad
ZJS
2408 sd_booted() <= 0)
2409 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
2410 "Trying to run as user instance, but the system has not been booted with systemd.");
b0d7c989
LP
2411
2412 if (!arg_system &&
2413 arg_action == ACTION_RUN &&
baaa35ad
ZJS
2414 !getenv("XDG_RUNTIME_DIR"))
2415 return log_error_errno(SYNTHETIC_ERRNO(EUNATCH),
2416 "Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
b0d7c989
LP
2417
2418 if (arg_system &&
2419 arg_action == ACTION_RUN &&
baaa35ad
ZJS
2420 running_in_chroot() > 0)
2421 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
2422 "Cannot be run in a chroot() environment.");
b0d7c989
LP
2423
2424 return 0;
2425}
2426
74da609f
LP
2427static int initialize_security(
2428 bool *loaded_policy,
2429 dual_timestamp *security_start_timestamp,
2430 dual_timestamp *security_finish_timestamp,
2431 const char **ret_error_message) {
2432
2433 int r;
2434
2435 assert(loaded_policy);
2436 assert(security_start_timestamp);
2437 assert(security_finish_timestamp);
2438 assert(ret_error_message);
2439
2440 dual_timestamp_get(security_start_timestamp);
2441
97149f40 2442 r = mac_selinux_setup(loaded_policy);
74da609f
LP
2443 if (r < 0) {
2444 *ret_error_message = "Failed to load SELinux policy";
2445 return r;
2446 }
2447
2448 r = mac_smack_setup(loaded_policy);
2449 if (r < 0) {
2450 *ret_error_message = "Failed to load SMACK policy";
2451 return r;
2452 }
2453
2ffadd3c
Y
2454 r = mac_apparmor_setup();
2455 if (r < 0) {
2456 *ret_error_message = "Failed to load AppArmor policy";
2457 return r;
2458 }
2459
74da609f
LP
2460 r = ima_setup();
2461 if (r < 0) {
2462 *ret_error_message = "Failed to load IMA policy";
2463 return r;
2464 }
2465
2466 dual_timestamp_get(security_finish_timestamp);
2467 return 0;
2468}
2469
263162da
LP
2470static void test_summary(Manager *m) {
2471 assert(m);
2472
2473 printf("-> By units:\n");
2474 manager_dump_units(m, stdout, "\t");
2475
2476 printf("-> By jobs:\n");
2477 manager_dump_jobs(m, stdout, "\t");
2478}
2479
efeb853f
LP
2480static int collect_fds(FDSet **ret_fds, const char **ret_error_message) {
2481 int r;
2482
2483 assert(ret_fds);
2484 assert(ret_error_message);
2485
2486 r = fdset_new_fill(ret_fds);
2487 if (r < 0) {
2488 *ret_error_message = "Failed to allocate fd set";
2489 return log_emergency_errno(r, "Failed to allocate fd set: %m");
2490 }
2491
2492 fdset_cloexec(*ret_fds, true);
2493
2494 if (arg_serialization)
2495 assert_se(fdset_remove(*ret_fds, fileno(arg_serialization)) >= 0);
2496
2497 return 0;
2498}
2499
2e51b31c
LP
2500static void setup_console_terminal(bool skip_setup) {
2501
2502 if (!arg_system)
2503 return;
2504
2505 /* Become a session leader if we aren't one yet. */
2506 (void) setsid();
2507
2508 /* If we are init, we connect stdin/stdout/stderr to /dev/null and make sure we don't have a controlling
2509 * tty. */
2510 (void) release_terminal();
2511
2512 /* Reset the console, but only if this is really init and we are freshly booted */
2513 if (getpid_cached() == 1 && !skip_setup)
2514 (void) console_setup();
2515}
2516
aa40ff07
LP
2517static bool early_skip_setup_check(int argc, char *argv[]) {
2518 bool found_deserialize = false;
2519 int i;
2520
2521 /* Determine if this is a reexecution or normal bootup. We do the full command line parsing much later, so
2522 * let's just have a quick peek here. Note that if we have switched root, do all the special setup things
2523 * anyway, even if in that case we also do deserialization. */
2524
2525 for (i = 1; i < argc; i++) {
aa40ff07
LP
2526 if (streq(argv[i], "--switched-root"))
2527 return false; /* If we switched root, don't skip the setup. */
2528 else if (streq(argv[i], "--deserialize"))
2529 found_deserialize = true;
2530 }
2531
2532 return found_deserialize; /* When we are deserializing, then we are reexecuting, hence avoid the extensive setup */
2533}
2534
0e06a031
LP
2535static int save_env(void) {
2536 char **l;
2537
2538 l = strv_copy(environ);
2539 if (!l)
2540 return -ENOMEM;
2541
2542 strv_free_and_replace(saved_env, l);
2543 return 0;
2544}
2545
60918275 2546int main(int argc, char *argv[]) {
625e8690
LP
2547
2548 dual_timestamp initrd_timestamp = DUAL_TIMESTAMP_NULL, userspace_timestamp = DUAL_TIMESTAMP_NULL, kernel_timestamp = DUAL_TIMESTAMP_NULL,
2549 security_start_timestamp = DUAL_TIMESTAMP_NULL, security_finish_timestamp = DUAL_TIMESTAMP_NULL;
ddfa8b0b
LP
2550 struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0),
2551 saved_rlimit_memlock = RLIMIT_MAKE_CONST(RLIM_INFINITY); /* The original rlimits we passed
2552 * in. Note we use different values
2553 * for the two that indicate whether
2554 * these fields are initialized! */
625e8690
LP
2555 bool skip_setup, loaded_policy = false, queue_default_job = false, first_boot = false, reexecute = false;
2556 char *switch_root_dir = NULL, *switch_root_init = NULL;
9d76d730 2557 usec_t before_startup, after_startup;
625e8690 2558 static char systemd[] = "systemd";
9d76d730 2559 char timespan[FORMAT_TIMESPAN_MAX];
625e8690
LP
2560 const char *shutdown_verb = NULL, *error_message = NULL;
2561 int r, retval = EXIT_FAILURE;
2562 Manager *m = NULL;
a16e1123 2563 FDSet *fds = NULL;
27b14a22 2564
d72a8f10 2565 /* SysV compatibility: redirect init → telinit */
6808a0bc 2566 redirect_telinit(argc, argv);
2cb1a60d 2567
d72a8f10 2568 /* Take timestamps early on */
c3a170f3
HH
2569 dual_timestamp_from_monotonic(&kernel_timestamp, 0);
2570 dual_timestamp_get(&userspace_timestamp);
2571
d72a8f10
LP
2572 /* Figure out whether we need to do initialize the system, or if we already did that because we are
2573 * reexecuting */
aa40ff07 2574 skip_setup = early_skip_setup_check(argc, argv);
d03bc1b8 2575
d72a8f10
LP
2576 /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent reexecution we
2577 * are then called 'systemd'. That is confusing, hence let's call us systemd right-away. */
f3b6a3ed 2578 program_invocation_short_name = systemd;
eee8b7ab 2579 (void) prctl(PR_SET_NAME, systemd);
5d6b1584 2580
d72a8f10 2581 /* Save the original command line */
36fea155 2582 save_argc_argv(argc, argv);
f3b6a3ed 2583
0e06a031
LP
2584 /* Save the original environment as we might need to restore it if we're requested to execute another
2585 * system manager later. */
2586 r = save_env();
2587 if (r < 0) {
2588 error_message = "Failed to copy environment block";
2589 goto finish;
2590 }
a5cede8c 2591
6fdb8de4 2592 /* Make sure that if the user says "syslog" we actually log to the journal. */
c1dc6153 2593 log_set_upgrade_syslog_to_journal(true);
bbe63281 2594
df0ff127 2595 if (getpid_cached() == 1) {
b5752d23
LP
2596 /* When we run as PID 1 force system mode */
2597 arg_system = true;
2598
48a601fe 2599 /* Disable the umask logic */
90dc8c2e
MG
2600 umask(0);
2601
92890452
LP
2602 /* Make sure that at least initially we do not ever log to journald/syslogd, because it might not be
2603 * activated yet (even though the log socket for it exists). */
d075092f
LP
2604 log_set_prohibit_ipc(true);
2605
48a601fe
LP
2606 /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This is
2607 * important so that we never end up logging to any foreign stderr, for example if we have to log in a
2608 * child process right before execve()'ing the actual binary, at a point in time where socket
2609 * activation stderr/stdout area already set up. */
2610 log_set_always_reopen_console(true);
48a601fe 2611
92890452 2612 if (detect_container() <= 0) {
4f8d551f 2613
92890452 2614 /* Running outside of a container as PID 1 */
92890452
LP
2615 log_set_target(LOG_TARGET_KMSG);
2616 log_open();
a866073d 2617
92890452
LP
2618 if (in_initrd())
2619 initrd_timestamp = userspace_timestamp;
c3ba6250 2620
92890452
LP
2621 if (!skip_setup) {
2622 r = mount_setup_early();
2623 if (r < 0) {
2624 error_message = "Failed to mount early API filesystems";
2625 goto finish;
2626 }
2627
0a2eef1e
LP
2628 /* Let's open the log backend a second time, in case the first time didn't
2629 * work. Quite possibly we have mounted /dev just now, so /dev/kmsg became
2630 * available, and it previously wasn't. */
2631 log_open();
2632
6123dfaa
ZJS
2633 disable_printk_ratelimit();
2634
92890452
LP
2635 r = initialize_security(
2636 &loaded_policy,
2637 &security_start_timestamp,
2638 &security_finish_timestamp,
2639 &error_message);
2640 if (r < 0)
2641 goto finish;
d723cd65 2642 }
eee8b7ab 2643
92890452 2644 if (mac_selinux_init() < 0) {
a9ba0e32 2645 error_message = "Failed to initialize SELinux support";
96694e99 2646 goto finish;
92890452 2647 }
0b3325e7 2648
92890452
LP
2649 if (!skip_setup)
2650 initialize_clock();
2651
2652 /* Set the default for later on, but don't actually open the logs like this for now. Note that
2653 * if we are transitioning from the initrd there might still be journal fd open, and we
2654 * shouldn't attempt opening that before we parsed /proc/cmdline which might redirect output
2655 * elsewhere. */
2656 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
2657
2658 } else {
2659 /* Running inside a container, as PID 1 */
92890452
LP
2660 log_set_target(LOG_TARGET_CONSOLE);
2661 log_open();
2662
2663 /* For later on, see above... */
2664 log_set_target(LOG_TARGET_JOURNAL);
2665
45250e66 2666 /* clear the kernel timestamp, because we are in a container */
92890452 2667 kernel_timestamp = DUAL_TIMESTAMP_NULL;
cb6531be 2668 }
7948c4df 2669
92890452 2670 initialize_coredump(skip_setup);
a866073d 2671
92890452
LP
2672 r = fixup_environment();
2673 if (r < 0) {
2674 log_emergency_errno(r, "Failed to fix up PID 1 environment: %m");
2675 error_message = "Failed to fix up PID1 environment";
2676 goto finish;
2677 }
a866073d 2678
92890452
LP
2679 /* Try to figure out if we can use colors with the console. No need to do that for user instances since
2680 * they never log into the console. */
3a18b604 2681 log_show_color(colors_enabled());
92890452 2682
c76cf844
AK
2683 r = make_null_stdio();
2684 if (r < 0)
92890452 2685 log_warning_errno(r, "Failed to redirect standard streams to /dev/null, ignoring: %m");
f84f9974 2686
a132bef0 2687 /* Load the kernel modules early. */
2e75e2a8
DM
2688 if (!skip_setup)
2689 kmod_setup();
2e75e2a8 2690
3196e423 2691 /* Mount /proc, /sys and friends, so that /proc/cmdline and /proc/$PID/fd is available. */
f74349d8 2692 r = mount_setup(loaded_policy, skip_setup);
cb6531be
ZJS
2693 if (r < 0) {
2694 error_message = "Failed to mount API filesystems";
8efe3c01 2695 goto finish;
cb6531be 2696 }
c18ecf03
LP
2697
2698 /* The efivarfs is now mounted, let's read the random seed off it */
2699 (void) efi_take_random_seed();
209b2592
FB
2700
2701 /* Cache command-line options passed from EFI variables */
2702 if (!skip_setup)
2703 (void) cache_efi_options_variable();
3196e423
LP
2704 } else {
2705 /* Running as user instance */
2706 arg_system = false;
2707 log_set_target(LOG_TARGET_AUTO);
2708 log_open();
2709
2710 /* clear the kernel timestamp, because we are not PID 1 */
2711 kernel_timestamp = DUAL_TIMESTAMP_NULL;
2712
2713 if (mac_selinux_init() < 0) {
2714 error_message = "Failed to initialize SELinux support";
2715 goto finish;
2716 }
0c85a4f3 2717 }
4ade7963 2718
a9fd4cd1
FB
2719 /* Save the original RLIMIT_NOFILE/RLIMIT_MEMLOCK so that we can reset it later when
2720 * transitioning from the initrd to the main systemd or suchlike. */
2721 save_rlimits(&saved_rlimit_nofile, &saved_rlimit_memlock);
2722
4ade7963 2723 /* Reset all signal handlers. */
ce30c8dc
LP
2724 (void) reset_all_signal_handlers();
2725 (void) ignore_signals(SIGNALS_IGNORE, -1);
078e4539 2726
ffe5c01e
FB
2727 (void) parse_configuration(&saved_rlimit_nofile, &saved_rlimit_memlock);
2728
2729 r = parse_argv(argc, argv);
2730 if (r < 0) {
2731 error_message = "Failed to parse commandline arguments";
f170852a 2732 goto finish;
ffe5c01e 2733 }
10c961b9 2734
b0d7c989
LP
2735 r = safety_checks();
2736 if (r < 0)
fe783b03 2737 goto finish;
fe783b03 2738
5c08257b 2739 if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS, ACTION_DUMP_BUS_PROPERTIES, ACTION_BUS_INTROSPECT))
0221d68a 2740 (void) pager_open(arg_pager_flags);
b0d7c989
LP
2741
2742 if (arg_action != ACTION_RUN)
74e7579c 2743 skip_setup = true;
b87c2aa6 2744
fa0f4d8a 2745 if (arg_action == ACTION_HELP) {
37ec0fdd 2746 retval = help() < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
f170852a 2747 goto finish;
9ba0bc4e
ZJS
2748 } else if (arg_action == ACTION_VERSION) {
2749 retval = version();
2750 goto finish;
fa0f4d8a 2751 } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
e537352b 2752 unit_dump_config_items(stdout);
22f4096c 2753 retval = EXIT_SUCCESS;
e537352b 2754 goto finish;
bbc1acab
YW
2755 } else if (arg_action == ACTION_DUMP_BUS_PROPERTIES) {
2756 dump_bus_properties(stdout);
2757 retval = EXIT_SUCCESS;
2758 goto finish;
5c08257b
ZJS
2759 } else if (arg_action == ACTION_BUS_INTROSPECT) {
2760 r = bus_manager_introspect_implementations(stdout, arg_bus_introspect);
2761 retval = r >= 0 ? EXIT_SUCCESS : EXIT_FAILURE;
2762 goto finish;
f170852a
LP
2763 }
2764
4c701096 2765 assert_se(IN_SET(arg_action, ACTION_RUN, ACTION_TEST));
f170852a 2766
5a2e0c62
LP
2767 /* Move out of the way, so that we won't block unmounts */
2768 assert_se(chdir("/") == 0);
2769
dea374e8 2770 if (arg_action == ACTION_RUN) {
d247f232
LP
2771 if (!skip_setup) {
2772 /* Apply the systemd.clock_usec= kernel command line switch */
45250e66 2773 apply_clock_update();
a70c72a0 2774
d247f232
LP
2775 /* Apply random seed from kernel command line */
2776 cmdline_take_random_seed();
2777 }
2778
c6885f5f
FB
2779 /* A core pattern might have been specified via the cmdline. */
2780 initialize_core_pattern(skip_setup);
2781
efeb853f 2782 /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
a70c72a0
LP
2783 log_close();
2784
2785 /* Remember open file descriptors for later deserialization */
efeb853f
LP
2786 r = collect_fds(&fds, &error_message);
2787 if (r < 0)
dea374e8 2788 goto finish;
a16e1123 2789
2e51b31c
LP
2790 /* Give up any control of the console, but make sure its initialized. */
2791 setup_console_terminal(skip_setup);
56d96fc0 2792
a70c72a0
LP
2793 /* Open the logging devices, if possible and necessary */
2794 log_open();
56d96fc0 2795 }
4ade7963 2796
31aef7ff 2797 log_execution_mode(&first_boot);
a5dab5ce 2798
2d776038 2799 r = initialize_runtime(skip_setup,
3023f2fe 2800 first_boot,
2d776038
LP
2801 &saved_rlimit_nofile,
2802 &saved_rlimit_memlock,
2803 &error_message);
2804 if (r < 0)
2805 goto finish;
4096d6f5 2806
e0a3da1f
ZJS
2807 r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
2808 arg_action == ACTION_TEST ? MANAGER_TEST_FULL : 0,
2809 &m);
e96d6be7 2810 if (r < 0) {
da927ba9 2811 log_emergency_errno(r, "Failed to allocate manager object: %m");
cb6531be 2812 error_message = "Failed to allocate manager object";
60918275
LP
2813 goto finish;
2814 }
2815
9f9f0342
LP
2816 m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;
2817 m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;
2818 m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;
d4ee7bd8
YW
2819 m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_START)] = security_start_timestamp;
2820 m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_FINISH)] = security_finish_timestamp;
9e58ff9c 2821
85cb4151 2822 set_manager_defaults(m);
7b46fc6a 2823 set_manager_settings(m);
fd130612 2824 manager_set_first_boot(m, first_boot);
27d340c7 2825
bf4df7c3 2826 /* Remember whether we should queue the default job */
d3b1c508 2827 queue_default_job = !arg_serialization || arg_switched_root;
bf4df7c3 2828
9d76d730
LP
2829 before_startup = now(CLOCK_MONOTONIC);
2830
d3b1c508 2831 r = manager_startup(m, arg_serialization, fds);
58f88d92 2832 if (r < 0) {
cefb3eda 2833 error_message = "Failed to start up manager";
58f88d92
ZJS
2834 goto finish;
2835 }
a16e1123 2836
6acca5fc 2837 /* This will close all file descriptors that were opened, but not claimed by any unit. */
2feceb5e 2838 fds = fdset_free(fds);
74ca738f 2839 arg_serialization = safe_fclose(arg_serialization);
bf4df7c3
LP
2840
2841 if (queue_default_job) {
6acca5fc 2842 r = do_queue_default_job(m, &error_message);
718db961 2843 if (r < 0)
37d88da7 2844 goto finish;
6acca5fc 2845 }
ab17a050 2846
6acca5fc 2847 after_startup = now(CLOCK_MONOTONIC);
60918275 2848
6acca5fc
LP
2849 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
2850 "Loaded units and determined initial transaction in %s.",
2851 format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 100 * USEC_PER_MSEC));
07672f49 2852
6acca5fc 2853 if (arg_action == ACTION_TEST) {
263162da 2854 test_summary(m);
6acca5fc
LP
2855 retval = EXIT_SUCCESS;
2856 goto finish;
e965d56d 2857 }
d46de8a1 2858
3046b6db 2859 (void) invoke_main_loop(m,
a9fd4cd1
FB
2860 &saved_rlimit_nofile,
2861 &saved_rlimit_memlock,
3046b6db
LP
2862 &reexecute,
2863 &retval,
2864 &shutdown_verb,
2865 &fds,
2866 &switch_root_dir,
2867 &switch_root_init,
2868 &error_message);
f170852a 2869
60918275 2870finish:
b87c2aa6
ZJS
2871 pager_close();
2872
92890452 2873 if (m) {
986935cf
FB
2874 arg_reboot_watchdog = manager_get_watchdog(m, WATCHDOG_REBOOT);
2875 arg_kexec_watchdog = manager_get_watchdog(m, WATCHDOG_KEXEC);
92890452
LP
2876 m = manager_free(m);
2877 }
60918275 2878
cc56fafe 2879 mac_selinux_finish();
b2bb3dbe 2880
3c7878f9
LP
2881 if (reexecute)
2882 do_reexecute(argc, argv,
2883 &saved_rlimit_nofile,
2884 &saved_rlimit_memlock,
2885 fds,
2886 switch_root_dir,
2887 switch_root_init,
2888 &error_message); /* This only returns if reexecution failed */
a16e1123 2889
74ca738f 2890 arg_serialization = safe_fclose(arg_serialization);
2feceb5e 2891 fds = fdset_free(fds);
a16e1123 2892
0e06a031
LP
2893 saved_env = strv_free(saved_env);
2894
349cc4a5 2895#if HAVE_VALGRIND_VALGRIND_H
54b434b1
LP
2896 /* If we are PID 1 and running under valgrind, then let's exit
2897 * here explicitly. valgrind will only generate nice output on
2898 * exit(), not on exec(), hence let's do the former not the
2899 * latter here. */
8a2c1fbf
EJ
2900 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
2901 /* Cleanup watchdog_device strings for valgrind. We need them
2902 * in become_shutdown() so normally we cannot free them yet. */
2903 watchdog_free_device();
2904 arg_watchdog_device = mfree(arg_watchdog_device);
7d9eea2b 2905 reset_arguments();
27fe58b7 2906 return retval;
8a2c1fbf 2907 }
54b434b1
LP
2908#endif
2909
7e11a95e
EV
2910#if HAS_FEATURE_ADDRESS_SANITIZER
2911 __lsan_do_leak_check();
2912#endif
2913
b9080b03 2914 if (shutdown_verb) {
7eb35049 2915 r = become_shutdown(shutdown_verb, retval);
4a36297c 2916 log_error_errno(r, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
9b9881d7 2917 error_message = "Failed to execute shutdown binary";
b9080b03
FF
2918 }
2919
8a2c1fbf
EJ
2920 watchdog_free_device();
2921 arg_watchdog_device = mfree(arg_watchdog_device);
2922
df0ff127 2923 if (getpid_cached() == 1) {
cb6531be
ZJS
2924 if (error_message)
2925 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
1fc464f6 2926 ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL,
bb259772
LP
2927 "%s.", error_message);
2928 freeze_or_exit_or_reboot();
cb6531be 2929 }
c3b3c274 2930
7d9eea2b 2931 reset_arguments();
60918275
LP
2932 return retval;
2933}