]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/main.c
TODO: add entry about alias logging
[thirdparty/systemd.git] / src / core / main.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
a7334b09 2
60918275 3#include <errno.h>
3dfc9763 4#include <fcntl.h>
f170852a 5#include <getopt.h>
664f88a7 6#include <sys/mount.h>
3dfc9763 7#include <sys/prctl.h>
b9e74c39 8#include <sys/reboot.h>
3dfc9763 9#include <unistd.h>
349cc4a5 10#if HAVE_SECCOMP
b64a3d86
LP
11#include <seccomp.h>
12#endif
349cc4a5 13#if HAVE_VALGRIND_VALGRIND_H
3dfc9763
LP
14#include <valgrind/valgrind.h>
15#endif
54b434b1 16
718db961 17#include "sd-bus.h"
cf0fbc49 18#include "sd-daemon.h"
b2e7486c 19#include "sd-messages.h"
3dfc9763 20
b5efdb8a 21#include "alloc-util.h"
2ffadd3c 22#include "apparmor-setup.h"
d9d93745 23#include "architecture.h"
3dfc9763
LP
24#include "build.h"
25#include "bus-error.h"
26#include "bus-util.h"
430f0182 27#include "capability-util.h"
a88c5b8a 28#include "cgroup-util.h"
24efb112 29#include "clock-util.h"
3dfc9763 30#include "conf-parser.h"
618234a5 31#include "cpu-set-util.h"
3dfc9763 32#include "dbus-manager.h"
c18ecf03 33#include "dbus.h"
3dfc9763 34#include "def.h"
32429805 35#include "dev-setup.h"
c18ecf03 36#include "efi-random.h"
209b2592 37#include "efivars.h"
eee8b7ab 38#include "emergency-action.h"
3dfc9763 39#include "env-util.h"
57b7a260 40#include "exit-status.h"
3ffd4af2 41#include "fd-util.h"
3dfc9763 42#include "fdset.h"
718db961 43#include "fileio.h"
f97b34a6 44#include "format-util.h"
f4f15635 45#include "fs-util.h"
d247f232 46#include "hexdecoct.h"
3dfc9763
LP
47#include "hostname-setup.h"
48#include "ima-setup.h"
49#include "killall.h"
50#include "kmod-setup.h"
eefc66aa 51#include "limits-util.h"
d7b8eec7 52#include "load-fragment.h"
3dfc9763 53#include "log.h"
b6e66135 54#include "loopback-setup.h"
b6e66135 55#include "machine-id-setup.h"
3dfc9763 56#include "manager.h"
32429805 57#include "mkdir.h"
3dfc9763 58#include "mount-setup.h"
d58ad743 59#include "os-util.h"
3dfc9763 60#include "pager.h"
614b022c 61#include "parse-argument.h"
6bedfcbb 62#include "parse-util.h"
7d5ceb64 63#include "path-util.h"
294bf0c3 64#include "pretty-print.h"
4e731273 65#include "proc-cmdline.h"
3dfc9763 66#include "process-util.h"
d247f232 67#include "random-util.h"
8869a0b4 68#include "raw-clone.h"
78f22b97 69#include "rlimit-util.h"
349cc4a5 70#if HAVE_SECCOMP
83f12b27
FS
71#include "seccomp-util.h"
72#endif
b6e66135 73#include "selinux-setup.h"
3dfc9763
LP
74#include "selinux-util.h"
75#include "signal-util.h"
ffbd2c4d 76#include "smack-setup.h"
3dfc9763 77#include "special.h"
8fcde012 78#include "stat-util.h"
15a5e950 79#include "stdio-util.h"
3dfc9763
LP
80#include "strv.h"
81#include "switch-root.h"
a8b627aa 82#include "sysctl-util.h"
3dfc9763 83#include "terminal-util.h"
8612da97 84#include "umask-util.h"
b1d4f8e1 85#include "user-util.h"
9ce17593 86#include "util.h"
3dfc9763
LP
87#include "virt.h"
88#include "watchdog.h"
b6e66135 89
7e11a95e
EV
90#if HAS_FEATURE_ADDRESS_SANITIZER
91#include <sanitizer/lsan_interface.h>
92#endif
93
3a0f06c4
ZJS
94#define DEFAULT_TASKS_MAX ((TasksMax) { 15U, 100U }) /* 15% */
95
f170852a
LP
96static enum {
97 ACTION_RUN,
e965d56d 98 ACTION_HELP,
9ba0bc4e 99 ACTION_VERSION,
e537352b 100 ACTION_TEST,
bbc1acab
YW
101 ACTION_DUMP_CONFIGURATION_ITEMS,
102 ACTION_DUMP_BUS_PROPERTIES,
5c08257b 103 ACTION_BUS_INTROSPECT,
fa0f4d8a 104} arg_action = ACTION_RUN;
fb39af4c 105
5c08257b
ZJS
106static const char *arg_bus_introspect = NULL;
107
45250e66
LP
108/* Those variables are initialized to 0 automatically, so we avoid uninitialized memory access. Real
109 * defaults are assigned in reset_arguments() below. */
fb39af4c
ZJS
110static char *arg_default_unit;
111static bool arg_system;
112static bool arg_dump_core;
113static int arg_crash_chvt;
114static bool arg_crash_shell;
115static bool arg_crash_reboot;
116static char *arg_confirm_spawn;
117static ShowStatus arg_show_status;
36cf4507 118static StatusUnitFormat arg_status_unit_format;
fb39af4c
ZJS
119static bool arg_switched_root;
120static PagerFlags arg_pager_flags;
121static bool arg_service_watchdogs;
122static ExecOutput arg_default_std_output;
123static ExecOutput arg_default_std_error;
124static usec_t arg_default_restart_usec;
125static usec_t arg_default_timeout_start_usec;
126static usec_t arg_default_timeout_stop_usec;
127static usec_t arg_default_timeout_abort_usec;
128static bool arg_default_timeout_abort_set;
129static usec_t arg_default_start_limit_interval;
130static unsigned arg_default_start_limit_burst;
131static usec_t arg_runtime_watchdog;
65224c1d 132static usec_t arg_reboot_watchdog;
acafd7d8 133static usec_t arg_kexec_watchdog;
fb39af4c
ZJS
134static char *arg_early_core_pattern;
135static char *arg_watchdog_device;
136static char **arg_default_environment;
d55ed7de 137static char **arg_manager_environment;
fb39af4c
ZJS
138static struct rlimit *arg_default_rlimit[_RLIMIT_MAX];
139static uint64_t arg_capability_bounding_set;
140static bool arg_no_new_privs;
141static nsec_t arg_timer_slack_nsec;
142static usec_t arg_default_timer_accuracy_usec;
143static Set* arg_syscall_archs;
144static FILE* arg_serialization;
145static int arg_default_cpu_accounting;
146static bool arg_default_io_accounting;
147static bool arg_default_ip_accounting;
148static bool arg_default_blockio_accounting;
149static bool arg_default_memory_accounting;
150static bool arg_default_tasks_accounting;
3a0f06c4 151static TasksMax arg_default_tasks_max;
fb39af4c
ZJS
152static sd_id128_t arg_machine_id;
153static EmergencyAction arg_cad_burst_action;
154static OOMPolicy arg_default_oom_policy;
155static CPUSet arg_cpu_affinity;
b070c7c0 156static NUMAPolicy arg_numa_policy;
3753325b 157static usec_t arg_clock_usec;
d247f232
LP
158static void *arg_random_seed;
159static size_t arg_random_seed_size;
61fbbac1 160
0e06a031
LP
161/* A copy of the original environment block */
162static char **saved_env = NULL;
163
a9fd4cd1
FB
164static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
165 const struct rlimit *saved_rlimit_memlock);
4fc935ca 166
f70e6fb4
ZJS
167static int manager_find_user_config_paths(char ***ret_files, char ***ret_dirs) {
168 _cleanup_free_ char *base = NULL;
169 _cleanup_strv_free_ char **files = NULL, **dirs = NULL;
170 int r;
171
172 r = xdg_user_config_dir(&base, "/systemd");
173 if (r < 0)
174 return r;
175
176 r = strv_extendf(&files, "%s/user.conf", base);
177 if (r < 0)
178 return r;
179
180 r = strv_extend(&files, PKGSYSCONFDIR "/user.conf");
181 if (r < 0)
182 return r;
183
184 r = strv_consume(&dirs, TAKE_PTR(base));
185 if (r < 0)
186 return r;
187
188 r = strv_extend_strv(&dirs, CONF_PATHS_STRV("systemd"), false);
189 if (r < 0)
190 return r;
191
192 *ret_files = TAKE_PTR(files);
193 *ret_dirs = TAKE_PTR(dirs);
194 return 0;
195}
196
bb259772
LP
197_noreturn_ static void freeze_or_exit_or_reboot(void) {
198
c3b6a348
LP
199 /* If we are running in a container, let's prefer exiting, after all we can propagate an exit code to
200 * the container manager, and thus inform it that something went wrong. */
bb259772
LP
201 if (detect_container() > 0) {
202 log_emergency("Exiting PID 1...");
c3b6a348 203 _exit(EXIT_EXCEPTION);
bb259772 204 }
b9e74c39
LP
205
206 if (arg_crash_reboot) {
207 log_notice("Rebooting in 10s...");
208 (void) sleep(10);
209
210 log_notice("Rebooting now...");
211 (void) reboot(RB_AUTOBOOT);
212 log_emergency_errno(errno, "Failed to reboot: %m");
213 }
214
215 log_emergency("Freezing execution.");
216 freeze();
217}
218
848e863a 219_noreturn_ static void crash(int sig) {
7d06dad9
MS
220 struct sigaction sa;
221 pid_t pid;
97c4f35c 222
df0ff127 223 if (getpid_cached() != 1)
abb26902 224 /* Pass this on immediately, if this is not PID 1 */
92ca4cac 225 (void) raise(sig);
abb26902 226 else if (!arg_dump_core)
4104970e 227 log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig));
97c4f35c 228 else {
7d06dad9 229 sa = (struct sigaction) {
189d5bac 230 .sa_handler = nop_signal_handler,
b92bea5d
ZJS
231 .sa_flags = SA_NOCLDSTOP|SA_RESTART,
232 };
97c4f35c 233
6f5e3f35 234 /* We want to wait for the core process, hence let's enable SIGCHLD */
92ca4cac 235 (void) sigaction(SIGCHLD, &sa, NULL);
6f5e3f35 236
8869a0b4 237 pid = raw_clone(SIGCHLD);
e62d8c39 238 if (pid < 0)
56f64d95 239 log_emergency_errno(errno, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
97c4f35c 240 else if (pid == 0) {
97c4f35c 241 /* Enable default signal handler for core dump */
15a90032 242
92ca4cac
LP
243 sa = (struct sigaction) {
244 .sa_handler = SIG_DFL,
245 };
246 (void) sigaction(sig, &sa, NULL);
97c4f35c 247
15a90032
LP
248 /* Don't limit the coredump size */
249 (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY));
97c4f35c
LP
250
251 /* Just to be sure... */
e62d9b81 252 (void) chdir("/");
97c4f35c
LP
253
254 /* Raise the signal again */
ee05e779 255 pid = raw_getpid();
92ca4cac 256 (void) kill(pid, sig); /* raise() would kill the parent */
97c4f35c
LP
257
258 assert_not_reached("We shouldn't be here...");
bb85a582 259 _exit(EXIT_EXCEPTION);
4fc935ca 260 } else {
8e12a6ae
LP
261 siginfo_t status;
262 int r;
4fc935ca
LP
263
264 /* Order things nicely. */
e62d8c39
ZJS
265 r = wait_for_terminate(pid, &status);
266 if (r < 0)
da927ba9 267 log_emergency_errno(r, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig));
e1714f02
ZJS
268 else if (status.si_code != CLD_DUMPED) {
269 const char *s = status.si_code == CLD_EXITED
e04ed6db 270 ? exit_status_to_string(status.si_status, EXIT_STATUS_LIBC)
e1714f02
ZJS
271 : signal_to_string(status.si_status);
272
ee05e779
ZJS
273 log_emergency("Caught <%s>, core dump failed (child "PID_FMT", code=%s, status=%i/%s).",
274 signal_to_string(sig),
e1714f02
ZJS
275 pid,
276 sigchld_code_to_string(status.si_code),
277 status.si_status, strna(s));
278 } else
279 log_emergency("Caught <%s>, dumped core as pid "PID_FMT".",
280 signal_to_string(sig), pid);
97c4f35c
LP
281 }
282 }
283
b9e74c39 284 if (arg_crash_chvt >= 0)
92ca4cac 285 (void) chvt(arg_crash_chvt);
601f6a1e 286
7d06dad9
MS
287 sa = (struct sigaction) {
288 .sa_handler = SIG_IGN,
289 .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
290 };
291
292 /* Let the kernel reap children for us */
293 (void) sigaction(SIGCHLD, &sa, NULL);
8c43883a 294
7d06dad9 295 if (arg_crash_shell) {
b9e74c39 296 log_notice("Executing crash shell in 10s...");
92ca4cac 297 (void) sleep(10);
4fc935ca 298
8869a0b4 299 pid = raw_clone(SIGCHLD);
cd3bd60a 300 if (pid < 0)
56f64d95 301 log_emergency_errno(errno, "Failed to fork off crash shell: %m");
6f5e3f35 302 else if (pid == 0) {
b9e74c39 303 (void) setsid();
92ca4cac 304 (void) make_console_stdio();
595225af 305 (void) rlimit_nofile_safe();
92ca4cac 306 (void) execle("/bin/sh", "/bin/sh", NULL, environ);
6f5e3f35 307
ee05e779 308 log_emergency_errno(errno, "execle() failed: %m");
bb85a582 309 _exit(EXIT_EXCEPTION);
b9e74c39
LP
310 } else {
311 log_info("Spawned crash shell as PID "PID_FMT".", pid);
4cf0b03b 312 (void) wait_for_terminate(pid, NULL);
b9e74c39 313 }
4fc935ca
LP
314 }
315
bb259772 316 freeze_or_exit_or_reboot();
97c4f35c
LP
317}
318
319static void install_crash_handler(void) {
297d563d 320 static const struct sigaction sa = {
b92bea5d 321 .sa_handler = crash,
297d563d 322 .sa_flags = SA_NODEFER, /* So that we can raise the signal again from the signal handler */
b92bea5d 323 };
297d563d 324 int r;
97c4f35c 325
9c274488
LP
326 /* We ignore the return value here, since, we don't mind if we cannot set up a crash handler */
327 r = sigaction_many(&sa, SIGNALS_CRASH_HANDLER);
297d563d
LP
328 if (r < 0)
329 log_debug_errno(r, "I had trouble setting up the crash handler, ignoring: %m");
97c4f35c 330}
f170852a 331
56d96fc0
LP
332static int console_setup(void) {
333 _cleanup_close_ int tty_fd = -1;
334 int r;
80876c20 335
512947d4 336 tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
23bbb0de
MS
337 if (tty_fd < 0)
338 return log_error_errno(tty_fd, "Failed to open /dev/console: %m");
80876c20 339
56d96fc0
LP
340 /* We don't want to force text mode. plymouth may be showing
341 * pictures already from initrd. */
512947d4 342 r = reset_terminal_fd(tty_fd, false);
23bbb0de
MS
343 if (r < 0)
344 return log_error_errno(r, "Failed to reset /dev/console: %m");
843d2643 345
56d96fc0 346 return 0;
80876c20
LP
347}
348
ee48dbd5 349static int set_machine_id(const char *m) {
e042eab7 350 sd_id128_t t;
8b26cdbd 351 assert(m);
ee48dbd5 352
e042eab7 353 if (sd_id128_from_string(m, &t) < 0)
ee48dbd5
NC
354 return -EINVAL;
355
e042eab7 356 if (sd_id128_is_null(t))
ee48dbd5
NC
357 return -EINVAL;
358
e042eab7 359 arg_machine_id = t;
ee48dbd5
NC
360 return 0;
361}
362
96287a49 363static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
059cb385 364 int r;
f170852a 365
059cb385 366 assert(key);
5192bd19 367
1d84ad94 368 if (STR_IN_SET(key, "systemd.unit", "rd.systemd.unit")) {
bf4df7c3 369
1d84ad94
LP
370 if (proc_cmdline_value_missing(key, value))
371 return 0;
bf4df7c3 372
1d84ad94
LP
373 if (!unit_name_is_valid(value, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
374 log_warning("Unit name specified on %s= is not valid, ignoring: %s", key, value);
cd57038a
ZJS
375 else if (in_initrd() == !!startswith(key, "rd."))
376 return free_and_strdup_warn(&arg_default_unit, value);
f170852a 377
1d84ad94 378 } else if (proc_cmdline_key_streq(key, "systemd.dump_core")) {
4fc935ca 379
1d84ad94 380 r = value ? parse_boolean(value) : true;
fb472900 381 if (r < 0)
5e1ee764 382 log_warning_errno(r, "Failed to parse dump core switch %s, ignoring: %m", value);
4fc935ca 383 else
fa0f4d8a 384 arg_dump_core = r;
4fc935ca 385
c6885f5f
FB
386 } else if (proc_cmdline_key_streq(key, "systemd.early_core_pattern")) {
387
388 if (proc_cmdline_value_missing(key, value))
389 return 0;
390
391 if (path_is_absolute(value))
614b022c 392 (void) parse_path_argument(value, false, &arg_early_core_pattern);
c6885f5f
FB
393 else
394 log_warning("Specified core pattern '%s' is not an absolute path, ignoring.", value);
395
1d84ad94 396 } else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) {
b9e74c39 397
1d84ad94
LP
398 if (!value)
399 arg_crash_chvt = 0; /* turn on */
5e1ee764 400 else {
a07a7324 401 r = parse_crash_chvt(value, &arg_crash_chvt);
5e1ee764
YW
402 if (r < 0)
403 log_warning_errno(r, "Failed to parse crash chvt switch %s, ignoring: %m", value);
404 }
b9e74c39 405
1d84ad94 406 } else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) {
4fc935ca 407
1d84ad94 408 r = value ? parse_boolean(value) : true;
fb472900 409 if (r < 0)
5e1ee764 410 log_warning_errno(r, "Failed to parse crash shell switch %s, ignoring: %m", value);
4fc935ca 411 else
fa0f4d8a 412 arg_crash_shell = r;
5e7ee61c 413
1d84ad94 414 } else if (proc_cmdline_key_streq(key, "systemd.crash_reboot")) {
5e7ee61c 415
1d84ad94 416 r = value ? parse_boolean(value) : true;
b9e74c39 417 if (r < 0)
5e1ee764 418 log_warning_errno(r, "Failed to parse crash reboot switch %s, ignoring: %m", value);
5e7ee61c 419 else
b9e74c39 420 arg_crash_reboot = r;
5e7ee61c 421
1d84ad94
LP
422 } else if (proc_cmdline_key_streq(key, "systemd.confirm_spawn")) {
423 char *s;
7d5ceb64 424
1d84ad94 425 r = parse_confirm_spawn(value, &s);
059cb385 426 if (r < 0)
5e1ee764
YW
427 log_warning_errno(r, "Failed to parse confirm_spawn switch %s, ignoring: %m", value);
428 else
429 free_and_replace(arg_confirm_spawn, s);
601f6a1e 430
2a12e32e
JK
431 } else if (proc_cmdline_key_streq(key, "systemd.service_watchdogs")) {
432
433 r = value ? parse_boolean(value) : true;
434 if (r < 0)
5e1ee764 435 log_warning_errno(r, "Failed to parse service watchdog switch %s, ignoring: %m", value);
2a12e32e
JK
436 else
437 arg_service_watchdogs = r;
438
1d84ad94 439 } else if (proc_cmdline_key_streq(key, "systemd.show_status")) {
9e58ff9c 440
1d84ad94
LP
441 if (value) {
442 r = parse_show_status(value, &arg_show_status);
443 if (r < 0)
5e1ee764 444 log_warning_errno(r, "Failed to parse show status switch %s, ignoring: %m", value);
1d84ad94
LP
445 } else
446 arg_show_status = SHOW_STATUS_YES;
059cb385 447
36cf4507
ZJS
448 } else if (proc_cmdline_key_streq(key, "systemd.status_unit_format")) {
449
450 if (proc_cmdline_value_missing(key, value))
451 return 0;
452
453 r = status_unit_format_from_string(value);
454 if (r < 0)
455 log_warning_errno(r, "Failed to parse %s=%s, ignoring: %m", key, value);
456 else
457 arg_status_unit_format = r;
458
1d84ad94
LP
459 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_output")) {
460
461 if (proc_cmdline_value_missing(key, value))
462 return 0;
0a494f1f 463
059cb385 464 r = exec_output_from_string(value);
fb472900 465 if (r < 0)
5e1ee764 466 log_warning_errno(r, "Failed to parse default standard output switch %s, ignoring: %m", value);
0a494f1f
LP
467 else
468 arg_default_std_output = r;
0a494f1f 469
1d84ad94
LP
470 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_error")) {
471
472 if (proc_cmdline_value_missing(key, value))
473 return 0;
059cb385
LP
474
475 r = exec_output_from_string(value);
fb472900 476 if (r < 0)
5e1ee764 477 log_warning_errno(r, "Failed to parse default standard error switch %s, ignoring: %m", value);
0a494f1f
LP
478 else
479 arg_default_std_error = r;
9e7c5357 480
1d84ad94
LP
481 } else if (streq(key, "systemd.setenv")) {
482
483 if (proc_cmdline_value_missing(key, value))
484 return 0;
059cb385 485
b70935ac
ZJS
486 if (!env_assignment_is_valid(value))
487 log_warning("Environment variable assignment '%s' is not valid. Ignoring.", value);
488 else {
489 r = strv_env_replace_strdup(&arg_default_environment, value);
490 if (r < 0)
1d84ad94 491 return log_oom();
b70935ac 492 }
9e58ff9c 493
1d84ad94
LP
494 } else if (proc_cmdline_key_streq(key, "systemd.machine_id")) {
495
496 if (proc_cmdline_value_missing(key, value))
497 return 0;
498
499 r = set_machine_id(value);
500 if (r < 0)
5e1ee764 501 log_warning_errno(r, "MachineID '%s' is not valid, ignoring: %m", value);
ee48dbd5 502
1d84ad94
LP
503 } else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) {
504
505 if (proc_cmdline_value_missing(key, value))
506 return 0;
507
508 r = parse_sec(value, &arg_default_timeout_start_usec);
509 if (r < 0)
5e1ee764 510 log_warning_errno(r, "Failed to parse default start timeout '%s', ignoring: %m", value);
1d84ad94
LP
511
512 if (arg_default_timeout_start_usec <= 0)
513 arg_default_timeout_start_usec = USEC_INFINITY;
ee48dbd5 514
68d58f38
LP
515 } else if (proc_cmdline_key_streq(key, "systemd.cpu_affinity")) {
516
517 if (proc_cmdline_value_missing(key, value))
518 return 0;
519
520 r = parse_cpu_set(value, &arg_cpu_affinity);
521 if (r < 0)
162392b7 522 log_warning_errno(r, "Failed to parse CPU affinity mask '%s', ignoring: %m", value);
68d58f38 523
8a2c1fbf
EJ
524 } else if (proc_cmdline_key_streq(key, "systemd.watchdog_device")) {
525
526 if (proc_cmdline_value_missing(key, value))
527 return 0;
528
614b022c 529 (void) parse_path_argument(value, false, &arg_watchdog_device);
8a2c1fbf 530
3753325b
LP
531 } else if (proc_cmdline_key_streq(key, "systemd.clock_usec")) {
532
533 if (proc_cmdline_value_missing(key, value))
534 return 0;
535
536 r = safe_atou64(value, &arg_clock_usec);
537 if (r < 0)
538 log_warning_errno(r, "Failed to parse systemd.clock_usec= argument, ignoring: %s", value);
539
d247f232
LP
540 } else if (proc_cmdline_key_streq(key, "systemd.random_seed")) {
541 void *p;
542 size_t sz;
543
544 if (proc_cmdline_value_missing(key, value))
545 return 0;
546
f5fbe71d 547 r = unbase64mem(value, SIZE_MAX, &p, &sz);
d247f232
LP
548 if (r < 0)
549 log_warning_errno(r, "Failed to parse systemd.random_seed= argument, ignoring: %s", value);
550
551 free(arg_random_seed);
552 arg_random_seed = sz > 0 ? p : mfree(p);
553 arg_random_seed_size = sz;
554
059cb385 555 } else if (streq(key, "quiet") && !value) {
d7b15e0a 556
7a293242 557 if (arg_show_status == _SHOW_STATUS_INVALID)
0d066dd1 558 arg_show_status = SHOW_STATUS_ERROR;
059cb385
LP
559
560 } else if (streq(key, "debug") && !value) {
d7b15e0a 561
1de1c9c3
LP
562 /* Note that log_parse_environment() handles 'debug'
563 * too, and sets the log level to LOG_DEBUG. */
d7b15e0a 564
75f86906 565 if (detect_container() > 0)
b2103dcc 566 log_set_target(LOG_TARGET_CONSOLE);
059cb385 567
dcd61450 568 } else if (!value) {
e2c9a131 569 const char *target;
f170852a 570
ceae6295 571 /* Compatible with SysV, but supported independently even if SysV compatibility is disabled. */
e2c9a131
EV
572 target = runlevel_to_target(key);
573 if (target)
cd57038a 574 return free_and_strdup_warn(&arg_default_unit, target);
f170852a
LP
575 }
576
577 return 0;
578}
579
e8e581bf
ZJS
580#define DEFINE_SETTER(name, func, descr) \
581 static int name(const char *unit, \
582 const char *filename, \
583 unsigned line, \
584 const char *section, \
71a61510 585 unsigned section_line, \
e8e581bf
ZJS
586 const char *lvalue, \
587 int ltype, \
588 const char *rvalue, \
589 void *data, \
590 void *userdata) { \
591 \
592 int r; \
593 \
594 assert(filename); \
595 assert(lvalue); \
596 assert(rvalue); \
597 \
598 r = func(rvalue); \
599 if (r < 0) \
d1cefe0a
LP
600 log_syntax(unit, LOG_ERR, filename, line, r, \
601 "Invalid " descr "'%s': %m", \
602 rvalue); \
e8e581bf
ZJS
603 \
604 return 0; \
605 }
487393e9 606
a6ecbf83
FB
607DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level");
608DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target");
c5673ed0 609DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color");
a6ecbf83 610DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location");
c5673ed0 611DEFINE_SETTER(config_parse_time, log_show_time_from_string, "time");
487393e9 612
a61d6874
ZJS
613static int config_parse_default_timeout_abort(
614 const char *unit,
615 const char *filename,
616 unsigned line,
617 const char *section,
618 unsigned section_line,
619 const char *lvalue,
620 int ltype,
621 const char *rvalue,
622 void *data,
623 void *userdata) {
624 int r;
625
626 r = config_parse_timeout_abort(unit, filename, line, section, section_line, lvalue, ltype, rvalue,
627 &arg_default_timeout_abort_usec, userdata);
628 if (r >= 0)
629 arg_default_timeout_abort_set = r;
630 return 0;
631}
487393e9 632
a61d6874 633static int parse_config_file(void) {
f975e971 634 const ConfigTableItem items[] = {
a61d6874
ZJS
635 { "Manager", "LogLevel", config_parse_level2, 0, NULL },
636 { "Manager", "LogTarget", config_parse_target, 0, NULL },
637 { "Manager", "LogColor", config_parse_color, 0, NULL },
638 { "Manager", "LogLocation", config_parse_location, 0, NULL },
c5673ed0 639 { "Manager", "LogTime", config_parse_time, 0, NULL },
a61d6874
ZJS
640 { "Manager", "DumpCore", config_parse_bool, 0, &arg_dump_core },
641 { "Manager", "CrashChVT", /* legacy */ config_parse_crash_chvt, 0, &arg_crash_chvt },
642 { "Manager", "CrashChangeVT", config_parse_crash_chvt, 0, &arg_crash_chvt },
643 { "Manager", "CrashShell", config_parse_bool, 0, &arg_crash_shell },
644 { "Manager", "CrashReboot", config_parse_bool, 0, &arg_crash_reboot },
645 { "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status },
646 { "Manager", "StatusUnitFormat", config_parse_status_unit_format, 0, &arg_status_unit_format },
647 { "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, &arg_cpu_affinity },
648 { "Manager", "NUMAPolicy", config_parse_numa_policy, 0, &arg_numa_policy.type },
649 { "Manager", "NUMAMask", config_parse_numa_mask, 0, &arg_numa_policy },
650 { "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL },
651 { "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog },
652 { "Manager", "RebootWatchdogSec", config_parse_sec, 0, &arg_reboot_watchdog },
653 { "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_reboot_watchdog }, /* obsolete alias */
654 { "Manager", "KExecWatchdogSec", config_parse_sec, 0, &arg_kexec_watchdog },
655 { "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
656 { "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
657 { "Manager", "NoNewPrivileges", config_parse_bool, 0, &arg_no_new_privs },
349cc4a5 658#if HAVE_SECCOMP
a61d6874 659 { "Manager", "SystemCallArchitectures", config_parse_syscall_archs, 0, &arg_syscall_archs },
89fffa27 660#endif
a61d6874
ZJS
661 { "Manager", "TimerSlackNSec", config_parse_nsec, 0, &arg_timer_slack_nsec },
662 { "Manager", "DefaultTimerAccuracySec", config_parse_sec, 0, &arg_default_timer_accuracy_usec },
663 { "Manager", "DefaultStandardOutput", config_parse_output_restricted, 0, &arg_default_std_output },
664 { "Manager", "DefaultStandardError", config_parse_output_restricted, 0, &arg_default_std_error },
665 { "Manager", "DefaultTimeoutStartSec", config_parse_sec, 0, &arg_default_timeout_start_usec },
666 { "Manager", "DefaultTimeoutStopSec", config_parse_sec, 0, &arg_default_timeout_stop_usec },
667 { "Manager", "DefaultTimeoutAbortSec", config_parse_default_timeout_abort, 0, NULL },
668 { "Manager", "DefaultRestartSec", config_parse_sec, 0, &arg_default_restart_usec },
669 { "Manager", "DefaultStartLimitInterval", config_parse_sec, 0, &arg_default_start_limit_interval }, /* obsolete alias */
670 { "Manager", "DefaultStartLimitIntervalSec", config_parse_sec, 0, &arg_default_start_limit_interval },
671 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned, 0, &arg_default_start_limit_burst },
672 { "Manager", "DefaultEnvironment", config_parse_environ, 0, &arg_default_environment },
d55ed7de 673 { "Manager", "ManagerEnvironment", config_parse_environ, 0, &arg_manager_environment },
a61d6874
ZJS
674 { "Manager", "DefaultLimitCPU", config_parse_rlimit, RLIMIT_CPU, arg_default_rlimit },
675 { "Manager", "DefaultLimitFSIZE", config_parse_rlimit, RLIMIT_FSIZE, arg_default_rlimit },
676 { "Manager", "DefaultLimitDATA", config_parse_rlimit, RLIMIT_DATA, arg_default_rlimit },
677 { "Manager", "DefaultLimitSTACK", config_parse_rlimit, RLIMIT_STACK, arg_default_rlimit },
678 { "Manager", "DefaultLimitCORE", config_parse_rlimit, RLIMIT_CORE, arg_default_rlimit },
679 { "Manager", "DefaultLimitRSS", config_parse_rlimit, RLIMIT_RSS, arg_default_rlimit },
680 { "Manager", "DefaultLimitNOFILE", config_parse_rlimit, RLIMIT_NOFILE, arg_default_rlimit },
681 { "Manager", "DefaultLimitAS", config_parse_rlimit, RLIMIT_AS, arg_default_rlimit },
682 { "Manager", "DefaultLimitNPROC", config_parse_rlimit, RLIMIT_NPROC, arg_default_rlimit },
683 { "Manager", "DefaultLimitMEMLOCK", config_parse_rlimit, RLIMIT_MEMLOCK, arg_default_rlimit },
684 { "Manager", "DefaultLimitLOCKS", config_parse_rlimit, RLIMIT_LOCKS, arg_default_rlimit },
685 { "Manager", "DefaultLimitSIGPENDING", config_parse_rlimit, RLIMIT_SIGPENDING, arg_default_rlimit },
686 { "Manager", "DefaultLimitMSGQUEUE", config_parse_rlimit, RLIMIT_MSGQUEUE, arg_default_rlimit },
687 { "Manager", "DefaultLimitNICE", config_parse_rlimit, RLIMIT_NICE, arg_default_rlimit },
688 { "Manager", "DefaultLimitRTPRIO", config_parse_rlimit, RLIMIT_RTPRIO, arg_default_rlimit },
689 { "Manager", "DefaultLimitRTTIME", config_parse_rlimit, RLIMIT_RTTIME, arg_default_rlimit },
690 { "Manager", "DefaultCPUAccounting", config_parse_tristate, 0, &arg_default_cpu_accounting },
691 { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
692 { "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting },
693 { "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
694 { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
695 { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
696 { "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max },
697 { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action },
698 { "Manager", "DefaultOOMPolicy", config_parse_oom_policy, 0, &arg_default_oom_policy },
d3b1c508 699 {}
487393e9
LP
700 };
701
e94a009c
YW
702 _cleanup_strv_free_ char **files = NULL, **dirs = NULL;
703 const char *suffix;
f70e6fb4 704 int r;
75eb6154 705
e94a009c 706 if (arg_system)
f70e6fb4 707 suffix = "system.conf.d";
e94a009c
YW
708 else {
709 r = manager_find_user_config_paths(&files, &dirs);
f70e6fb4
ZJS
710 if (r < 0)
711 return log_error_errno(r, "Failed to determine config file paths: %m");
e94a009c 712
f70e6fb4
ZJS
713 suffix = "user.conf.d";
714 }
75eb6154 715
f70e6fb4 716 (void) config_parse_many(
e94a009c
YW
717 (const char* const*) (files ?: STRV_MAKE(PKGSYSCONFDIR "/system.conf")),
718 (const char* const*) (dirs ?: CONF_PATHS_STRV("systemd")),
719 suffix,
4f9ff96a
LP
720 "Manager\0",
721 config_item_table_lookup, items,
722 CONFIG_PARSE_WARN,
723 NULL,
724 NULL);
36c16a7c 725
f70e6fb4
ZJS
726 /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we use
727 * USEC_INFINITY like everywhere else. */
36c16a7c
LP
728 if (arg_default_timeout_start_usec <= 0)
729 arg_default_timeout_start_usec = USEC_INFINITY;
730 if (arg_default_timeout_stop_usec <= 0)
731 arg_default_timeout_stop_usec = USEC_INFINITY;
487393e9 732
487393e9
LP
733 return 0;
734}
735
85cb4151 736static void set_manager_defaults(Manager *m) {
06af2a04
TB
737
738 assert(m);
739
5b65ae15
LP
740 /* Propagates the various default unit property settings into the manager object, i.e. properties that do not
741 * affect the manager itself, but are just what newly allocated units will have set if they haven't set
742 * anything else. (Also see set_manager_settings() for the settings that affect the manager's own behaviour) */
743
06af2a04
TB
744 m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec;
745 m->default_std_output = arg_default_std_output;
746 m->default_std_error = arg_default_std_error;
747 m->default_timeout_start_usec = arg_default_timeout_start_usec;
748 m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
dc653bf4
JK
749 m->default_timeout_abort_usec = arg_default_timeout_abort_usec;
750 m->default_timeout_abort_set = arg_default_timeout_abort_set;
06af2a04
TB
751 m->default_restart_usec = arg_default_restart_usec;
752 m->default_start_limit_interval = arg_default_start_limit_interval;
753 m->default_start_limit_burst = arg_default_start_limit_burst;
a88c5b8a
CD
754
755 /* On 4.15+ with unified hierarchy, CPU accounting is essentially free as it doesn't require the CPU
756 * controller to be enabled, so the default is to enable it unless we got told otherwise. */
757 if (arg_default_cpu_accounting >= 0)
758 m->default_cpu_accounting = arg_default_cpu_accounting;
759 else
760 m->default_cpu_accounting = cpu_accounting_is_cheap();
761
13c31542 762 m->default_io_accounting = arg_default_io_accounting;
377bfd2d 763 m->default_ip_accounting = arg_default_ip_accounting;
06af2a04
TB
764 m->default_blockio_accounting = arg_default_blockio_accounting;
765 m->default_memory_accounting = arg_default_memory_accounting;
03a7b521 766 m->default_tasks_accounting = arg_default_tasks_accounting;
0af20ea2 767 m->default_tasks_max = arg_default_tasks_max;
afcfaa69 768 m->default_oom_policy = arg_default_oom_policy;
06af2a04 769
79a224c4
LP
770 (void) manager_set_default_rlimits(m, arg_default_rlimit);
771
772 (void) manager_default_environment(m);
773 (void) manager_transient_environment_add(m, arg_default_environment);
06af2a04
TB
774}
775
7b46fc6a
LP
776static void set_manager_settings(Manager *m) {
777
778 assert(m);
779
986935cf
FB
780 /* Propagates the various manager settings into the manager object, i.e. properties that
781 * effect the manager itself (as opposed to just being inherited into newly allocated
782 * units, see set_manager_defaults() above). */
5b65ae15 783
7b46fc6a 784 m->confirm_spawn = arg_confirm_spawn;
2a12e32e 785 m->service_watchdogs = arg_service_watchdogs;
7b46fc6a
LP
786 m->cad_burst_action = arg_cad_burst_action;
787
986935cf
FB
788 manager_set_watchdog(m, WATCHDOG_RUNTIME, arg_runtime_watchdog);
789 manager_set_watchdog(m, WATCHDOG_REBOOT, arg_reboot_watchdog);
790 manager_set_watchdog(m, WATCHDOG_KEXEC, arg_kexec_watchdog);
791
7365a296 792 manager_set_show_status(m, arg_show_status, "commandline");
36cf4507 793 m->status_unit_format = arg_status_unit_format;
7b46fc6a
LP
794}
795
f170852a 796static int parse_argv(int argc, char *argv[]) {
f170852a
LP
797 enum {
798 ARG_LOG_LEVEL = 0x100,
799 ARG_LOG_TARGET,
bbe63281
LP
800 ARG_LOG_COLOR,
801 ARG_LOG_LOCATION,
c5673ed0 802 ARG_LOG_TIME,
2f198e2f 803 ARG_UNIT,
edb9aaa8 804 ARG_SYSTEM,
af2d49f7 805 ARG_USER,
e537352b 806 ARG_TEST,
b87c2aa6 807 ARG_NO_PAGER,
9ba0bc4e 808 ARG_VERSION,
80876c20 809 ARG_DUMP_CONFIGURATION_ITEMS,
bbc1acab 810 ARG_DUMP_BUS_PROPERTIES,
5c08257b 811 ARG_BUS_INTROSPECT,
9e58ff9c 812 ARG_DUMP_CORE,
b9e74c39 813 ARG_CRASH_CHVT,
9e58ff9c 814 ARG_CRASH_SHELL,
b9e74c39 815 ARG_CRASH_REBOOT,
a16e1123 816 ARG_CONFIRM_SPAWN,
9e58ff9c 817 ARG_SHOW_STATUS,
4288f619 818 ARG_DESERIALIZE,
2660882b 819 ARG_SWITCHED_ROOT,
0a494f1f 820 ARG_DEFAULT_STD_OUTPUT,
ee48dbd5 821 ARG_DEFAULT_STD_ERROR,
2a12e32e
JK
822 ARG_MACHINE_ID,
823 ARG_SERVICE_WATCHDOGS,
f170852a
LP
824 };
825
826 static const struct option options[] = {
a16e1123
LP
827 { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
828 { "log-target", required_argument, NULL, ARG_LOG_TARGET },
bbe63281
LP
829 { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
830 { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
c5673ed0 831 { "log-time", optional_argument, NULL, ARG_LOG_TIME },
2f198e2f 832 { "unit", required_argument, NULL, ARG_UNIT },
edb9aaa8 833 { "system", no_argument, NULL, ARG_SYSTEM },
af2d49f7 834 { "user", no_argument, NULL, ARG_USER },
a16e1123 835 { "test", no_argument, NULL, ARG_TEST },
b87c2aa6 836 { "no-pager", no_argument, NULL, ARG_NO_PAGER },
a16e1123 837 { "help", no_argument, NULL, 'h' },
9ba0bc4e 838 { "version", no_argument, NULL, ARG_VERSION },
a16e1123 839 { "dump-configuration-items", no_argument, NULL, ARG_DUMP_CONFIGURATION_ITEMS },
bbc1acab 840 { "dump-bus-properties", no_argument, NULL, ARG_DUMP_BUS_PROPERTIES },
5c08257b 841 { "bus-introspect", required_argument, NULL, ARG_BUS_INTROSPECT },
a5d87bf0 842 { "dump-core", optional_argument, NULL, ARG_DUMP_CORE },
b9e74c39 843 { "crash-chvt", required_argument, NULL, ARG_CRASH_CHVT },
a5d87bf0 844 { "crash-shell", optional_argument, NULL, ARG_CRASH_SHELL },
b9e74c39 845 { "crash-reboot", optional_argument, NULL, ARG_CRASH_REBOOT },
a5d87bf0 846 { "confirm-spawn", optional_argument, NULL, ARG_CONFIRM_SPAWN },
6e98720f 847 { "show-status", optional_argument, NULL, ARG_SHOW_STATUS },
a16e1123 848 { "deserialize", required_argument, NULL, ARG_DESERIALIZE },
2660882b 849 { "switched-root", no_argument, NULL, ARG_SWITCHED_ROOT },
0a494f1f
LP
850 { "default-standard-output", required_argument, NULL, ARG_DEFAULT_STD_OUTPUT, },
851 { "default-standard-error", required_argument, NULL, ARG_DEFAULT_STD_ERROR, },
ee48dbd5 852 { "machine-id", required_argument, NULL, ARG_MACHINE_ID },
2a12e32e 853 { "service-watchdogs", required_argument, NULL, ARG_SERVICE_WATCHDOGS },
fb472900 854 {}
f170852a
LP
855 };
856
857 int c, r;
9a9ca408 858 bool user_arg_seen = false;
f170852a
LP
859
860 assert(argc >= 1);
861 assert(argv);
862
df0ff127 863 if (getpid_cached() == 1)
b770165a
LP
864 opterr = 0;
865
099663ff 866 while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
f170852a
LP
867
868 switch (c) {
869
870 case ARG_LOG_LEVEL:
fb472900 871 r = log_set_max_level_from_string(optarg);
2b5107e1
ZJS
872 if (r < 0)
873 return log_error_errno(r, "Failed to parse log level \"%s\": %m", optarg);
f170852a
LP
874
875 break;
876
877 case ARG_LOG_TARGET:
fb472900 878 r = log_set_target_from_string(optarg);
2b5107e1
ZJS
879 if (r < 0)
880 return log_error_errno(r, "Failed to parse log target \"%s\": %m", optarg);
f170852a
LP
881
882 break;
883
bbe63281
LP
884 case ARG_LOG_COLOR:
885
d0b170c8 886 if (optarg) {
fb472900 887 r = log_show_color_from_string(optarg);
2b5107e1
ZJS
888 if (r < 0)
889 return log_error_errno(r, "Failed to parse log color setting \"%s\": %m",
890 optarg);
d0b170c8
LP
891 } else
892 log_show_color(true);
bbe63281
LP
893
894 break;
895
896 case ARG_LOG_LOCATION:
d0b170c8 897 if (optarg) {
fb472900 898 r = log_show_location_from_string(optarg);
2b5107e1
ZJS
899 if (r < 0)
900 return log_error_errno(r, "Failed to parse log location setting \"%s\": %m",
901 optarg);
d0b170c8
LP
902 } else
903 log_show_location(true);
bbe63281
LP
904
905 break;
906
c5673ed0
DS
907 case ARG_LOG_TIME:
908
909 if (optarg) {
910 r = log_show_time_from_string(optarg);
911 if (r < 0)
912 return log_error_errno(r, "Failed to parse log time setting \"%s\": %m",
913 optarg);
914 } else
915 log_show_time(true);
916
917 break;
918
0a494f1f 919 case ARG_DEFAULT_STD_OUTPUT:
fb472900 920 r = exec_output_from_string(optarg);
2b5107e1
ZJS
921 if (r < 0)
922 return log_error_errno(r, "Failed to parse default standard output setting \"%s\": %m",
923 optarg);
924 arg_default_std_output = r;
0a494f1f
LP
925 break;
926
927 case ARG_DEFAULT_STD_ERROR:
fb472900 928 r = exec_output_from_string(optarg);
2b5107e1
ZJS
929 if (r < 0)
930 return log_error_errno(r, "Failed to parse default standard error output setting \"%s\": %m",
931 optarg);
932 arg_default_std_error = r;
0a494f1f
LP
933 break;
934
2f198e2f 935 case ARG_UNIT:
e6e242ad 936 r = free_and_strdup(&arg_default_unit, optarg);
23bbb0de 937 if (r < 0)
2b5107e1 938 return log_error_errno(r, "Failed to set default unit \"%s\": %m", optarg);
f170852a
LP
939
940 break;
941
edb9aaa8 942 case ARG_SYSTEM:
463d0d15 943 arg_system = true;
edb9aaa8 944 break;
a5dab5ce 945
af2d49f7 946 case ARG_USER:
463d0d15 947 arg_system = false;
9a9ca408 948 user_arg_seen = true;
a5dab5ce 949 break;
a5dab5ce 950
e965d56d 951 case ARG_TEST:
fa0f4d8a 952 arg_action = ACTION_TEST;
b87c2aa6
ZJS
953 break;
954
955 case ARG_NO_PAGER:
0221d68a 956 arg_pager_flags |= PAGER_DISABLE;
e965d56d
LP
957 break;
958
9ba0bc4e
ZJS
959 case ARG_VERSION:
960 arg_action = ACTION_VERSION;
961 break;
962
e537352b 963 case ARG_DUMP_CONFIGURATION_ITEMS:
fa0f4d8a 964 arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
e537352b
LP
965 break;
966
bbc1acab
YW
967 case ARG_DUMP_BUS_PROPERTIES:
968 arg_action = ACTION_DUMP_BUS_PROPERTIES;
969 break;
970
5c08257b
ZJS
971 case ARG_BUS_INTROSPECT:
972 arg_bus_introspect = optarg;
973 arg_action = ACTION_BUS_INTROSPECT;
974 break;
975
9e58ff9c 976 case ARG_DUMP_CORE:
599c7c54
ZJS
977 r = parse_boolean_argument("--dump-core", optarg, &arg_dump_core);
978 if (r < 0)
979 return r;
b9e74c39
LP
980 break;
981
982 case ARG_CRASH_CHVT:
a07a7324 983 r = parse_crash_chvt(optarg, &arg_crash_chvt);
b9e74c39 984 if (r < 0)
2b5107e1
ZJS
985 return log_error_errno(r, "Failed to parse crash virtual terminal index: \"%s\": %m",
986 optarg);
9e58ff9c
LP
987 break;
988
989 case ARG_CRASH_SHELL:
599c7c54
ZJS
990 r = parse_boolean_argument("--crash-shell", optarg, &arg_crash_shell);
991 if (r < 0)
992 return r;
b9e74c39
LP
993 break;
994
995 case ARG_CRASH_REBOOT:
599c7c54
ZJS
996 r = parse_boolean_argument("--crash-reboot", optarg, &arg_crash_reboot);
997 if (r < 0)
998 return r;
9e58ff9c
LP
999 break;
1000
80876c20 1001 case ARG_CONFIRM_SPAWN:
7d5ceb64
FB
1002 arg_confirm_spawn = mfree(arg_confirm_spawn);
1003
1004 r = parse_confirm_spawn(optarg, &arg_confirm_spawn);
1005 if (r < 0)
2b5107e1
ZJS
1006 return log_error_errno(r, "Failed to parse confirm spawn option: \"%s\": %m",
1007 optarg);
80876c20
LP
1008 break;
1009
2a12e32e 1010 case ARG_SERVICE_WATCHDOGS:
599c7c54 1011 r = parse_boolean_argument("--service-watchdogs=", optarg, &arg_service_watchdogs);
2a12e32e 1012 if (r < 0)
599c7c54 1013 return r;
2a12e32e
JK
1014 break;
1015
9e58ff9c 1016 case ARG_SHOW_STATUS:
d450b6f2
ZJS
1017 if (optarg) {
1018 r = parse_show_status(optarg, &arg_show_status);
ac7ec288 1019 if (r < 0)
2b5107e1
ZJS
1020 return log_error_errno(r, "Failed to parse show status boolean: \"%s\": %m",
1021 optarg);
d450b6f2
ZJS
1022 } else
1023 arg_show_status = SHOW_STATUS_YES;
6e98720f 1024 break;
a5d87bf0 1025
a16e1123
LP
1026 case ARG_DESERIALIZE: {
1027 int fd;
1028 FILE *f;
1029
01e10de3 1030 r = safe_atoi(optarg, &fd);
2b5107e1
ZJS
1031 if (r < 0)
1032 log_error_errno(r, "Failed to parse deserialize option \"%s\": %m", optarg);
baaa35ad
ZJS
1033 if (fd < 0)
1034 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1035 "Invalid deserialize fd: %d",
1036 fd);
a16e1123 1037
b9e74c39 1038 (void) fd_cloexec(fd, true);
01e10de3
LP
1039
1040 f = fdopen(fd, "r");
4a62c710 1041 if (!f)
2b5107e1 1042 return log_error_errno(errno, "Failed to open serialization fd %d: %m", fd);
a16e1123 1043
74ca738f 1044 safe_fclose(arg_serialization);
d3b1c508 1045 arg_serialization = f;
a16e1123
LP
1046
1047 break;
1048 }
1049
2660882b 1050 case ARG_SWITCHED_ROOT:
bf4df7c3 1051 arg_switched_root = true;
d03bc1b8
HH
1052 break;
1053
ee48dbd5
NC
1054 case ARG_MACHINE_ID:
1055 r = set_machine_id(optarg);
54500613 1056 if (r < 0)
2b5107e1 1057 return log_error_errno(r, "MachineID '%s' is not valid: %m", optarg);
ee48dbd5
NC
1058 break;
1059
f170852a 1060 case 'h':
fa0f4d8a 1061 arg_action = ACTION_HELP;
f170852a
LP
1062 break;
1063
1d2e23ab
LP
1064 case 'D':
1065 log_set_max_level(LOG_DEBUG);
1066 break;
1067
099663ff
LP
1068 case 'b':
1069 case 's':
1070 case 'z':
cd57038a
ZJS
1071 /* Just to eat away the sysvinit kernel cmdline args that we'll parse in
1072 * parse_proc_cmdline_item() or ignore, without any getopt() error messages.
1073 */
099663ff 1074 case '?':
df0ff127 1075 if (getpid_cached() != 1)
099663ff 1076 return -EINVAL;
601185b4
ZJS
1077 else
1078 return 0;
099663ff 1079
601185b4
ZJS
1080 default:
1081 assert_not_reached("Unhandled option code.");
f170852a
LP
1082 }
1083
d7a0f1f4 1084 if (optind < argc && getpid_cached() != 1)
9a9ca408
ZJS
1085 /* Hmm, when we aren't run as init system let's complain about excess arguments */
1086 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Excess arguments.");
1087
1088 if (arg_action == ACTION_RUN && !arg_system && !user_arg_seen)
baaa35ad 1089 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
9a9ca408 1090 "Explicit --user argument required to run as user manager.");
d821e6d6 1091
f170852a
LP
1092 return 0;
1093}
1094
1095static int help(void) {
37ec0fdd
LP
1096 _cleanup_free_ char *link = NULL;
1097 int r;
1098
1099 r = terminal_urlify_man("systemd", "1", &link);
1100 if (r < 0)
1101 return log_oom();
f170852a 1102
2e33c433 1103 printf("%s [OPTIONS...]\n\n"
7ae47326
ZJS
1104 "%sStarts and monitors system and user services.%s\n\n"
1105 "This program takes no positional arguments.\n\n"
1106 "%sOptions%s:\n"
e537352b 1107 " -h --help Show this help\n"
cb4069d9 1108 " --version Show version\n"
cd69e88b
LP
1109 " --test Determine initial transaction, dump it and exit\n"
1110 " --system In combination with --test: operate as system service manager\n"
1111 " --user In combination with --test: operate as per-user service manager\n"
b87c2aa6 1112 " --no-pager Do not pipe output into a pager\n"
80876c20 1113 " --dump-configuration-items Dump understood unit configuration items\n"
bbc1acab 1114 " --dump-bus-properties Dump exposed bus properties\n"
5c08257b 1115 " --bus-introspect=PATH Write XML introspection data\n"
9e58ff9c 1116 " --unit=UNIT Set default unit\n"
b9e74c39
LP
1117 " --dump-core[=BOOL] Dump core on crash\n"
1118 " --crash-vt=NR Change to specified VT on crash\n"
1119 " --crash-reboot[=BOOL] Reboot on crash\n"
1120 " --crash-shell[=BOOL] Run shell on crash\n"
1121 " --confirm-spawn[=BOOL] Ask for confirmation when spawning processes\n"
1122 " --show-status[=BOOL] Show status updates on the console during bootup\n"
c1dc6153 1123 " --log-target=TARGET Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
9e58ff9c 1124 " --log-level=LEVEL Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
b9e74c39
LP
1125 " --log-color[=BOOL] Highlight important log messages\n"
1126 " --log-location[=BOOL] Include code location in log messages\n"
c5673ed0 1127 " --log-time[=BOOL] Prefix log messages with current time\n"
0a494f1f 1128 " --default-standard-output= Set default standard output for services\n"
37ec0fdd 1129 " --default-standard-error= Set default standard error output for services\n"
bc556335
DDM
1130 "\nSee the %s for details.\n",
1131 program_invocation_short_name,
1132 ansi_highlight(),
1133 ansi_normal(),
1134 ansi_underline(),
1135 ansi_normal(),
1136 link);
f170852a
LP
1137
1138 return 0;
1139}
1140
2cc856ac
LP
1141static int prepare_reexecute(
1142 Manager *m,
1143 FILE **ret_f,
1144 FDSet **ret_fds,
1145 bool switching_root) {
1146
48b90859
LP
1147 _cleanup_fdset_free_ FDSet *fds = NULL;
1148 _cleanup_fclose_ FILE *f = NULL;
a16e1123
LP
1149 int r;
1150
1151 assert(m);
2cc856ac
LP
1152 assert(ret_f);
1153 assert(ret_fds);
a16e1123 1154
6b78f9b4 1155 r = manager_open_serialization(m, &f);
48b90859
LP
1156 if (r < 0)
1157 return log_error_errno(r, "Failed to create serialization file: %m");
a16e1123 1158
71445ae7 1159 /* Make sure nothing is really destructed when we shut down */
313cefa1 1160 m->n_reloading++;
718db961 1161 bus_manager_send_reloading(m, true);
71445ae7 1162
6b78f9b4 1163 fds = fdset_new();
48b90859
LP
1164 if (!fds)
1165 return log_oom();
a16e1123 1166
b3680f49 1167 r = manager_serialize(m, f, fds, switching_root);
48b90859 1168 if (r < 0)
d68c645b 1169 return r;
a16e1123 1170
48b90859
LP
1171 if (fseeko(f, 0, SEEK_SET) == (off_t) -1)
1172 return log_error_errno(errno, "Failed to rewind serialization fd: %m");
a16e1123 1173
6b78f9b4 1174 r = fd_cloexec(fileno(f), false);
48b90859
LP
1175 if (r < 0)
1176 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
a16e1123 1177
6b78f9b4 1178 r = fdset_cloexec(fds, false);
48b90859
LP
1179 if (r < 0)
1180 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
a16e1123 1181
2cc856ac
LP
1182 *ret_f = TAKE_PTR(f);
1183 *ret_fds = TAKE_PTR(fds);
a16e1123 1184
48b90859 1185 return 0;
a16e1123
LP
1186}
1187
a8b627aa
LP
1188static void bump_file_max_and_nr_open(void) {
1189
1190 /* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large numbers of file
1191 * descriptors are no longer a performance problem and their memory is properly tracked by memcg, thus counting
1192 * them and limiting them in another two layers of limits is unnecessary and just complicates things. This
1193 * function hence turns off 2 of the 4 levels of limits on file descriptors, and makes RLIMIT_NOLIMIT (soft +
1194 * hard) the only ones that really matter. */
1195
1196#if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN
a8b627aa
LP
1197 int r;
1198#endif
1199
1200#if BUMP_PROC_SYS_FS_FILE_MAX
409607c1
ZJS
1201 /* The maximum the kernel allows for this since 5.2 is LONG_MAX, use that. (Previously things were
1202 * different, but the operation would fail silently.) */
56e8419a 1203 r = sysctl_writef("fs/file-max", "%li\n", LONG_MAX);
a8b627aa
LP
1204 if (r < 0)
1205 log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.file-max, ignoring: %m");
1206#endif
1207
a8b627aa
LP
1208#if BUMP_PROC_SYS_FS_NR_OPEN
1209 int v = INT_MAX;
1210
1211 /* Arg! The kernel enforces maximum and minimum values on the fs.nr_open, but we don't really know what they
1212 * are. The expression by which the maximum is determined is dependent on the architecture, and is something we
1213 * don't really want to copy to userspace, as it is dependent on implementation details of the kernel. Since
1214 * the kernel doesn't expose the maximum value to us, we can only try and hope. Hence, let's start with
1215 * INT_MAX, and then keep halving the value until we find one that works. Ugly? Yes, absolutely, but kernel
1216 * APIs are kernel APIs, so what do can we do... 🤯 */
1217
1218 for (;;) {
1219 int k;
1220
1221 v &= ~(__SIZEOF_POINTER__ - 1); /* Round down to next multiple of the pointer size */
1222 if (v < 1024) {
1223 log_warning("Can't bump fs.nr_open, value too small.");
1224 break;
1225 }
1226
1227 k = read_nr_open();
1228 if (k < 0) {
1229 log_error_errno(k, "Failed to read fs.nr_open: %m");
1230 break;
1231 }
1232 if (k >= v) { /* Already larger */
1233 log_debug("Skipping bump, value is already larger.");
1234 break;
1235 }
1236
56e8419a 1237 r = sysctl_writef("fs/nr_open", "%i\n", v);
a8b627aa
LP
1238 if (r == -EINVAL) {
1239 log_debug("Couldn't write fs.nr_open as %i, halving it.", v);
1240 v /= 2;
1241 continue;
1242 }
1243 if (r < 0) {
1244 log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.nr_open, ignoring: %m");
1245 break;
1246 }
1247
1248 log_debug("Successfully bumped fs.nr_open to %i", v);
1249 break;
1250 }
1251#endif
1252}
1253
4096d6f5 1254static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
cda7faa9 1255 struct rlimit new_rlimit;
9264cc39 1256 int r, nr;
4096d6f5 1257
52d62075
LP
1258 /* Get the underlying absolute limit the kernel enforces */
1259 nr = read_nr_open();
1260
cda7faa9
LP
1261 /* Calculate the new limits to use for us. Never lower from what we inherited. */
1262 new_rlimit = (struct rlimit) {
1263 .rlim_cur = MAX((rlim_t) nr, saved_rlimit->rlim_cur),
1264 .rlim_max = MAX((rlim_t) nr, saved_rlimit->rlim_max),
1265 };
1266
1267 /* Shortcut if nothing changes. */
1268 if (saved_rlimit->rlim_max >= new_rlimit.rlim_max &&
1269 saved_rlimit->rlim_cur >= new_rlimit.rlim_cur) {
1270 log_debug("RLIMIT_NOFILE is already as high or higher than we need it, not bumping.");
1271 return 0;
1272 }
1273
52d62075
LP
1274 /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows, for
1275 * both hard and soft. */
cda7faa9 1276 r = setrlimit_closest(RLIMIT_NOFILE, &new_rlimit);
23bbb0de 1277 if (r < 0)
3ce40911 1278 return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
4096d6f5
LP
1279
1280 return 0;
1281}
1282
fb3ae275 1283static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
cda7faa9 1284 struct rlimit new_rlimit;
04d1ee0f 1285 uint64_t mm;
fb3ae275
LP
1286 int r;
1287
a17c1712 1288 /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even if we have CAP_IPC_LOCK which should
6e3c443b 1289 * normally disable such checks. We need them to implement IPAddressAllow= and IPAddressDeny=, hence let's bump
a17c1712 1290 * the value high enough for our user. */
fb3ae275 1291
cda7faa9
LP
1292 /* Using MAX() on resource limits only is safe if RLIM_INFINITY is > 0. POSIX declares that rlim_t
1293 * must be unsigned, hence this is a given, but let's make this clear here. */
1294 assert_cc(RLIM_INFINITY > 0);
1295
60dcf3dc
LP
1296 mm = physical_memory_scale(1, 8); /* Let's scale how much we allow to be locked by the amount of physical
1297 * RAM. We allow an eighth to be locked by us, just to pick a value. */
04d1ee0f 1298
cda7faa9 1299 new_rlimit = (struct rlimit) {
04d1ee0f
LP
1300 .rlim_cur = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_cur, mm),
1301 .rlim_max = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_max, mm),
cda7faa9
LP
1302 };
1303
1304 if (saved_rlimit->rlim_max >= new_rlimit.rlim_cur &&
1305 saved_rlimit->rlim_cur >= new_rlimit.rlim_max) {
1306 log_debug("RLIMIT_MEMLOCK is already as high or higher than we need it, not bumping.");
1307 return 0;
1308 }
1309
1310 r = setrlimit_closest(RLIMIT_MEMLOCK, &new_rlimit);
fb3ae275
LP
1311 if (r < 0)
1312 return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
1313
1314 return 0;
1315}
1316
80758717 1317static void test_usr(void) {
80758717 1318
796ac4c1 1319 /* Check that /usr is either on the same file system as / or mounted already. */
80758717 1320
871c44a7
LP
1321 if (dir_is_empty("/usr") <= 0)
1322 return;
1323
8b173b5e 1324 log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
871c44a7
LP
1325 "Some things will probably break (sometimes even silently) in mysterious ways. "
1326 "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1327}
1328
d3b1c508 1329static int enforce_syscall_archs(Set *archs) {
349cc4a5 1330#if HAVE_SECCOMP
d3b1c508
LP
1331 int r;
1332
83f12b27
FS
1333 if (!is_seccomp_available())
1334 return 0;
1335
469830d1 1336 r = seccomp_restrict_archs(arg_syscall_archs);
d3b1c508 1337 if (r < 0)
469830d1 1338 return log_error_errno(r, "Failed to enforce system call architecture restrication: %m");
d3b1c508 1339#endif
469830d1 1340 return 0;
d3b1c508
LP
1341}
1342
b6e2f329
LP
1343static int status_welcome(void) {
1344 _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
1345 int r;
1346
5ca02bfc 1347 if (!show_status_on(arg_show_status))
fd8c85c6
LP
1348 return 0;
1349
d58ad743
LP
1350 r = parse_os_release(NULL,
1351 "PRETTY_NAME", &pretty_name,
209c1470 1352 "ANSI_COLOR", &ansi_color);
d58ad743
LP
1353 if (r < 0)
1354 log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
1355 "Failed to read os-release file, ignoring: %m");
b6e2f329 1356
dc9b5816 1357 if (log_get_show_color())
a885727a 1358 return status_printf(NULL, 0,
dc9b5816
ZJS
1359 "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
1360 isempty(ansi_color) ? "1" : ansi_color,
1361 isempty(pretty_name) ? "Linux" : pretty_name);
1362 else
a885727a 1363 return status_printf(NULL, 0,
dc9b5816
ZJS
1364 "\nWelcome to %s!\n",
1365 isempty(pretty_name) ? "Linux" : pretty_name);
b6e2f329
LP
1366}
1367
fdd25311
LP
1368static int write_container_id(void) {
1369 const char *c;
7756528e 1370 int r = 0; /* avoid false maybe-uninitialized warning */
fdd25311
LP
1371
1372 c = getenv("container");
1373 if (isempty(c))
1374 return 0;
1375
8612da97
LP
1376 RUN_WITH_UMASK(0022)
1377 r = write_string_file("/run/systemd/container", c, WRITE_STRING_FILE_CREATE);
19854865 1378 if (r < 0)
f1f849b0 1379 return log_warning_errno(r, "Failed to write /run/systemd/container, ignoring: %m");
19854865
LP
1380
1381 return 1;
1382}
1383
1384static int bump_unix_max_dgram_qlen(void) {
1385 _cleanup_free_ char *qlen = NULL;
1386 unsigned long v;
1387 int r;
1388
3130fca5
LP
1389 /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel default of 16 is simply too low. We set the value
1390 * really really early during boot, so that it is actually applied to all our sockets, including the
1391 * $NOTIFY_SOCKET one. */
19854865
LP
1392
1393 r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen);
1394 if (r < 0)
875622c3 1395 return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
19854865
LP
1396
1397 r = safe_atolu(qlen, &v);
1398 if (r < 0)
3130fca5 1399 return log_warning_errno(r, "Failed to parse AF_UNIX datagram queue length '%s', ignoring: %m", qlen);
19854865
LP
1400
1401 if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN)
1402 return 0;
1403
57512c89 1404 r = write_string_filef("/proc/sys/net/unix/max_dgram_qlen", WRITE_STRING_FILE_DISABLE_BUFFER, "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN);
19854865
LP
1405 if (r < 0)
1406 return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
1407 "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
1408
1409 return 1;
fdd25311
LP
1410}
1411
32391275
FB
1412static int fixup_environment(void) {
1413 _cleanup_free_ char *term = NULL;
4dc63c4b 1414 const char *t;
32391275
FB
1415 int r;
1416
43db615b
LP
1417 /* Only fix up the environment when we are started as PID 1 */
1418 if (getpid_cached() != 1)
1419 return 0;
1420
1421 /* We expect the environment to be set correctly if run inside a container. */
84af7821
LP
1422 if (detect_container() > 0)
1423 return 0;
1424
43db615b
LP
1425 /* When started as PID1, the kernel uses /dev/console for our stdios and uses TERM=linux whatever the backend
1426 * device used by the console. We try to make a better guess here since some consoles might not have support
1427 * for color mode for example.
32391275 1428 *
43db615b 1429 * However if TERM was configured through the kernel command line then leave it alone. */
1d84ad94 1430 r = proc_cmdline_get_key("TERM", 0, &term);
32391275
FB
1431 if (r < 0)
1432 return r;
32391275 1433
4dc63c4b
LP
1434 t = term ?: default_term_for_tty("/dev/console");
1435
1436 if (setenv("TERM", t, 1) < 0)
32391275
FB
1437 return -errno;
1438
9d48671c 1439 /* The kernels sets HOME=/ for init. Let's undo this. */
44ee03d1
ZJS
1440 if (path_equal_ptr(getenv("HOME"), "/"))
1441 assert_se(unsetenv("HOME") == 0);
9d48671c 1442
32391275
FB
1443 return 0;
1444}
1445
6808a0bc
LP
1446static void redirect_telinit(int argc, char *argv[]) {
1447
1448 /* This is compatibility support for SysV, where calling init as a user is identical to telinit. */
1449
1450#if HAVE_SYSV_COMPAT
1451 if (getpid_cached() == 1)
1452 return;
1453
2306d177 1454 if (!invoked_as(argv, "init"))
6808a0bc
LP
1455 return;
1456
1457 execv(SYSTEMCTL_BINARY_PATH, argv);
1458 log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
a45d7127 1459 exit(EXIT_FAILURE);
6808a0bc
LP
1460#endif
1461}
1462
4a36297c
LP
1463static int become_shutdown(
1464 const char *shutdown_verb,
7eb35049 1465 int retval) {
4a36297c
LP
1466
1467 char log_level[DECIMAL_STR_MAX(int) + 1],
e73c54b8
JK
1468 exit_code[DECIMAL_STR_MAX(uint8_t) + 1],
1469 timeout[DECIMAL_STR_MAX(usec_t) + 1];
4a36297c 1470
e73c54b8 1471 const char* command_line[13] = {
4a36297c
LP
1472 SYSTEMD_SHUTDOWN_BINARY_PATH,
1473 shutdown_verb,
e73c54b8 1474 "--timeout", timeout,
4a36297c
LP
1475 "--log-level", log_level,
1476 "--log-target",
1477 };
1478
1479 _cleanup_strv_free_ char **env_block = NULL;
e73c54b8 1480 size_t pos = 7;
4a36297c 1481 int r;
acafd7d8 1482 usec_t watchdog_timer = 0;
4a36297c 1483
7eb35049 1484 assert(shutdown_verb);
234519ae 1485 assert(!command_line[pos]);
4a36297c
LP
1486 env_block = strv_copy(environ);
1487
1488 xsprintf(log_level, "%d", log_get_max_level());
e73c54b8 1489 xsprintf(timeout, "%" PRI_USEC "us", arg_default_timeout_stop_usec);
4a36297c
LP
1490
1491 switch (log_get_target()) {
1492
1493 case LOG_TARGET_KMSG:
1494 case LOG_TARGET_JOURNAL_OR_KMSG:
1495 case LOG_TARGET_SYSLOG_OR_KMSG:
1496 command_line[pos++] = "kmsg";
1497 break;
1498
1499 case LOG_TARGET_NULL:
1500 command_line[pos++] = "null";
1501 break;
1502
1503 case LOG_TARGET_CONSOLE:
1504 default:
1505 command_line[pos++] = "console";
1506 break;
1507 };
1508
1509 if (log_get_show_color())
1510 command_line[pos++] = "--log-color";
1511
1512 if (log_get_show_location())
1513 command_line[pos++] = "--log-location";
1514
c5673ed0
DS
1515 if (log_get_show_time())
1516 command_line[pos++] = "--log-time";
1517
4a36297c
LP
1518 if (streq(shutdown_verb, "exit")) {
1519 command_line[pos++] = "--exit-code";
1520 command_line[pos++] = exit_code;
1521 xsprintf(exit_code, "%d", retval);
1522 }
1523
1524 assert(pos < ELEMENTSOF(command_line));
1525
acafd7d8 1526 if (streq(shutdown_verb, "reboot"))
65224c1d 1527 watchdog_timer = arg_reboot_watchdog;
acafd7d8
LB
1528 else if (streq(shutdown_verb, "kexec"))
1529 watchdog_timer = arg_kexec_watchdog;
1530
1531 if (watchdog_timer > 0 && watchdog_timer != USEC_INFINITY) {
7eb35049 1532
4a36297c
LP
1533 char *e;
1534
acafd7d8 1535 /* If we reboot or kexec let's set the shutdown
4a36297c
LP
1536 * watchdog and tell the shutdown binary to
1537 * repeatedly ping it */
acafd7d8 1538 r = watchdog_set_timeout(&watchdog_timer);
4a36297c
LP
1539 watchdog_close(r < 0);
1540
1541 /* Tell the binary how often to ping, ignore failure */
acafd7d8 1542 if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, watchdog_timer) > 0)
8a2c1fbf
EJ
1543 (void) strv_consume(&env_block, e);
1544
1545 if (arg_watchdog_device &&
1546 asprintf(&e, "WATCHDOG_DEVICE=%s", arg_watchdog_device) > 0)
1547 (void) strv_consume(&env_block, e);
4a36297c
LP
1548 } else
1549 watchdog_close(true);
1550
1551 /* Avoid the creation of new processes forked by the
1552 * kernel; at this point, we will not listen to the
1553 * signals anyway */
1554 if (detect_container() <= 0)
1555 (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1556
1557 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1558 return -errno;
1559}
1560
e839bafd
LP
1561static void initialize_clock(void) {
1562 int r;
1563
3753325b
LP
1564 /* This is called very early on, before we parse the kernel command line or otherwise figure out why
1565 * we are running, but only once. */
1566
e839bafd
LP
1567 if (clock_is_localtime(NULL) > 0) {
1568 int min;
1569
1570 /*
1571 * The very first call of settimeofday() also does a time warp in the kernel.
1572 *
1573 * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to take care
1574 * of maintaining the RTC and do all adjustments. This matches the behavior of Windows, which leaves
1575 * the RTC alone if the registry tells that the RTC runs in UTC.
1576 */
1577 r = clock_set_timezone(&min);
1578 if (r < 0)
1579 log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
1580 else
1581 log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1582
d46b79bb 1583 } else if (!in_initrd())
e839bafd
LP
1584 /*
1585 * Do a dummy very first call to seal the kernel's time warp magic.
1586 *
1587 * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with LOCAL, but the
1588 * real system could be set up that way. In such case, we need to delay the time-warp or the sealing
1589 * until we reach the real system.
1590 *
1591 * Do no set the kernel's timezone. The concept of local time cannot be supported reliably, the time
1592 * will jump or be incorrect at every daylight saving time change. All kernel local time concepts will
1593 * be treated as UTC that way.
1594 */
1595 (void) clock_reset_timewarp();
e839bafd
LP
1596
1597 r = clock_apply_epoch();
1598 if (r < 0)
1599 log_error_errno(r, "Current system time is before build time, but cannot correct: %m");
1600 else if (r > 0)
1601 log_info("System time before build time, advancing clock.");
1602}
1603
3753325b
LP
1604static void apply_clock_update(void) {
1605 struct timespec ts;
1606
1607 /* This is called later than initialize_clock(), i.e. after we parsed configuration files/kernel
1608 * command line and such. */
1609
1610 if (arg_clock_usec == 0)
1611 return;
1612
45250e66
LP
1613 if (getpid_cached() != 1)
1614 return;
1615
3753325b
LP
1616 if (clock_settime(CLOCK_REALTIME, timespec_store(&ts, arg_clock_usec)) < 0)
1617 log_error_errno(errno, "Failed to set system clock to time specified on kernel command line: %m");
1618 else {
1619 char buf[FORMAT_TIMESTAMP_MAX];
1620
1621 log_info("Set system clock to %s, as specified on the kernel command line.",
1622 format_timestamp(buf, sizeof(buf), arg_clock_usec));
1623 }
1624}
1625
d247f232 1626static void cmdline_take_random_seed(void) {
d247f232
LP
1627 size_t suggested;
1628 int r;
1629
1630 if (arg_random_seed_size == 0)
1631 return;
1632
1633 if (getpid_cached() != 1)
1634 return;
1635
1636 assert(arg_random_seed);
1637 suggested = random_pool_size();
1638
1639 if (arg_random_seed_size < suggested)
1640 log_warning("Random seed specified on kernel command line has size %zu, but %zu bytes required to fill entropy pool.",
1641 arg_random_seed_size, suggested);
1642
61bd7d1e 1643 r = random_write_entropy(-1, arg_random_seed, arg_random_seed_size, true);
d247f232
LP
1644 if (r < 0) {
1645 log_warning_errno(r, "Failed to credit entropy specified on kernel command line, ignoring: %m");
1646 return;
1647 }
1648
1649 log_notice("Successfully credited entropy passed on kernel command line.\n"
1650 "Note that the seed provided this way is accessible to unprivileged programs. This functionality should not be used outside of testing environments.");
1651}
1652
1e41242e 1653static void initialize_coredump(bool skip_setup) {
752bcb77 1654#if ENABLE_COREDUMP
1e41242e
LP
1655 if (getpid_cached() != 1)
1656 return;
1657
1658 /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
1659 * will process core dumps for system services by default. */
1660 if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
1661 log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
1662
c6885f5f
FB
1663 /* But at the same time, turn off the core_pattern logic by default, so that no
1664 * coredumps are stored until the systemd-coredump tool is enabled via
1665 * sysctl. However it can be changed via the kernel command line later so core
1666 * dumps can still be generated during early startup and in initramfs. */
1e41242e 1667 if (!skip_setup)
e557b1a6 1668 disable_coredumps();
752bcb77 1669#endif
1e41242e
LP
1670}
1671
c6885f5f
FB
1672static void initialize_core_pattern(bool skip_setup) {
1673 int r;
1674
1675 if (skip_setup || !arg_early_core_pattern)
1676 return;
1677
1678 if (getpid_cached() != 1)
1679 return;
1680
57512c89 1681 r = write_string_file("/proc/sys/kernel/core_pattern", arg_early_core_pattern, WRITE_STRING_FILE_DISABLE_BUFFER);
c6885f5f
FB
1682 if (r < 0)
1683 log_warning_errno(r, "Failed to write '%s' to /proc/sys/kernel/core_pattern, ignoring: %m", arg_early_core_pattern);
1684}
1685
61fbbac1
ZJS
1686static void update_cpu_affinity(bool skip_setup) {
1687 _cleanup_free_ char *mask = NULL;
1688
1689 if (skip_setup || !arg_cpu_affinity.set)
1690 return;
1691
1692 assert(arg_cpu_affinity.allocated > 0);
1693
1694 mask = cpu_set_to_string(&arg_cpu_affinity);
1695 log_debug("Setting CPU affinity to %s.", strnull(mask));
1696
1697 if (sched_setaffinity(0, arg_cpu_affinity.allocated, arg_cpu_affinity.set) < 0)
1698 log_warning_errno(errno, "Failed to set CPU affinity: %m");
1699}
1700
b070c7c0
MS
1701static void update_numa_policy(bool skip_setup) {
1702 int r;
1703 _cleanup_free_ char *nodes = NULL;
1704 const char * policy = NULL;
1705
1706 if (skip_setup || !mpol_is_valid(numa_policy_get_type(&arg_numa_policy)))
1707 return;
1708
1709 if (DEBUG_LOGGING) {
1710 policy = mpol_to_string(numa_policy_get_type(&arg_numa_policy));
1711 nodes = cpu_set_to_range_string(&arg_numa_policy.nodes);
1712 log_debug("Setting NUMA policy to %s, with nodes %s.", strnull(policy), strnull(nodes));
1713 }
1714
1715 r = apply_numa_policy(&arg_numa_policy);
1716 if (r == -EOPNOTSUPP)
1717 log_debug_errno(r, "NUMA support not available, ignoring.");
1718 else if (r < 0)
1719 log_warning_errno(r, "Failed to set NUMA memory policy: %m");
1720}
1721
3c7878f9
LP
1722static void do_reexecute(
1723 int argc,
1724 char *argv[],
1725 const struct rlimit *saved_rlimit_nofile,
1726 const struct rlimit *saved_rlimit_memlock,
1727 FDSet *fds,
1728 const char *switch_root_dir,
1729 const char *switch_root_init,
1730 const char **ret_error_message) {
1731
1732 unsigned i, j, args_size;
1733 const char **args;
1734 int r;
1735
1736 assert(saved_rlimit_nofile);
1737 assert(saved_rlimit_memlock);
1738 assert(ret_error_message);
1739
1740 /* Close and disarm the watchdog, so that the new instance can reinitialize it, but doesn't get rebooted while
1741 * we do that */
1742 watchdog_close(true);
1743
ddfa8b0b
LP
1744 /* Reset RLIMIT_NOFILE + RLIMIT_MEMLOCK back to the kernel defaults, so that the new systemd can pass
1745 * the kernel default to its child processes */
1746 if (saved_rlimit_nofile->rlim_cur != 0)
3c7878f9 1747 (void) setrlimit(RLIMIT_NOFILE, saved_rlimit_nofile);
ddfa8b0b 1748 if (saved_rlimit_memlock->rlim_cur != RLIM_INFINITY)
3c7878f9
LP
1749 (void) setrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock);
1750
1751 if (switch_root_dir) {
1752 /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
1753 * SIGCHLD for them after deserializing. */
e73c54b8 1754 broadcast_signal(SIGTERM, false, true, arg_default_timeout_stop_usec);
3c7878f9
LP
1755
1756 /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
1757 r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE);
1758 if (r < 0)
1759 log_error_errno(r, "Failed to switch root, trying to continue: %m");
1760 }
1761
1762 args_size = MAX(6, argc+1);
1763 args = newa(const char*, args_size);
1764
1765 if (!switch_root_init) {
1766 char sfd[DECIMAL_STR_MAX(int) + 1];
1767
1768 /* First try to spawn ourselves with the right path, and with full serialization. We do this only if
1769 * the user didn't specify an explicit init to spawn. */
1770
1771 assert(arg_serialization);
1772 assert(fds);
1773
1774 xsprintf(sfd, "%i", fileno(arg_serialization));
1775
1776 i = 0;
1777 args[i++] = SYSTEMD_BINARY_PATH;
1778 if (switch_root_dir)
1779 args[i++] = "--switched-root";
1780 args[i++] = arg_system ? "--system" : "--user";
1781 args[i++] = "--deserialize";
1782 args[i++] = sfd;
1783 args[i++] = NULL;
1784
1785 assert(i <= args_size);
1786
1787 /*
1788 * We want valgrind to print its memory usage summary before reexecution. Valgrind won't do this is on
1789 * its own on exec(), but it will do it on exit(). Hence, to ensure we get a summary here, fork() off
1790 * a child, let it exit() cleanly, so that it prints the summary, and wait() for it in the parent,
1791 * before proceeding into the exec().
1792 */
1793 valgrind_summary_hack();
1794
1795 (void) execv(args[0], (char* const*) args);
1796 log_debug_errno(errno, "Failed to execute our own binary, trying fallback: %m");
1797 }
1798
1799 /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and envp[]. (Well,
1800 * modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[], but let's hope that
1801 * doesn't matter.) */
1802
1803 arg_serialization = safe_fclose(arg_serialization);
1804 fds = fdset_free(fds);
1805
1806 /* Reopen the console */
1807 (void) make_console_stdio();
1808
1809 for (j = 1, i = 1; j < (unsigned) argc; j++)
1810 args[i++] = argv[j];
1811 args[i++] = NULL;
1812 assert(i <= args_size);
1813
5238e957 1814 /* Re-enable any blocked signals, especially important if we switch from initial ramdisk to init=... */
3c7878f9
LP
1815 (void) reset_all_signal_handlers();
1816 (void) reset_signal_mask();
595225af 1817 (void) rlimit_nofile_safe();
3c7878f9
LP
1818
1819 if (switch_root_init) {
1820 args[0] = switch_root_init;
a5cede8c 1821 (void) execve(args[0], (char* const*) args, saved_env);
3c7878f9
LP
1822 log_warning_errno(errno, "Failed to execute configured init, trying fallback: %m");
1823 }
1824
1825 args[0] = "/sbin/init";
1826 (void) execv(args[0], (char* const*) args);
1827 r = -errno;
1828
1829 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
1830 ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
1831 "Failed to execute /sbin/init");
1832
1833 if (r == -ENOENT) {
1834 log_warning("No /sbin/init, trying fallback");
1835
1836 args[0] = "/bin/sh";
1837 args[1] = NULL;
a5cede8c 1838 (void) execve(args[0], (char* const*) args, saved_env);
3c7878f9
LP
1839 log_error_errno(errno, "Failed to execute /bin/sh, giving up: %m");
1840 } else
1841 log_warning_errno(r, "Failed to execute /sbin/init, giving up: %m");
1842
1843 *ret_error_message = "Failed to execute fallback shell";
1844}
1845
7eb35049
LP
1846static int invoke_main_loop(
1847 Manager *m,
a9fd4cd1
FB
1848 const struct rlimit *saved_rlimit_nofile,
1849 const struct rlimit *saved_rlimit_memlock,
7eb35049
LP
1850 bool *ret_reexecute,
1851 int *ret_retval, /* Return parameters relevant for shutting down */
1852 const char **ret_shutdown_verb, /* … */
1853 FDSet **ret_fds, /* Return parameters for reexecuting */
1854 char **ret_switch_root_dir, /* … */
1855 char **ret_switch_root_init, /* … */
1856 const char **ret_error_message) {
1857
1858 int r;
1859
1860 assert(m);
a9fd4cd1
FB
1861 assert(saved_rlimit_nofile);
1862 assert(saved_rlimit_memlock);
7eb35049
LP
1863 assert(ret_reexecute);
1864 assert(ret_retval);
1865 assert(ret_shutdown_verb);
1866 assert(ret_fds);
1867 assert(ret_switch_root_dir);
1868 assert(ret_switch_root_init);
1869 assert(ret_error_message);
1870
1871 for (;;) {
1872 r = manager_loop(m);
1873 if (r < 0) {
1874 *ret_error_message = "Failed to run main loop";
1875 return log_emergency_errno(r, "Failed to run main loop: %m");
1876 }
1877
3ca4d0b3 1878 switch ((ManagerObjective) r) {
7eb35049 1879
a6ecbf83 1880 case MANAGER_RELOAD: {
bda7d78b 1881 LogTarget saved_log_target;
a6ecbf83
FB
1882 int saved_log_level;
1883
7eb35049
LP
1884 log_info("Reloading.");
1885
3fe91079 1886 /* First, save any overridden log level/target, then parse the configuration file, which might
bda7d78b
FB
1887 * change the log level to new settings. */
1888
a6ecbf83 1889 saved_log_level = m->log_level_overridden ? log_get_max_level() : -1;
bda7d78b 1890 saved_log_target = m->log_target_overridden ? log_get_target() : _LOG_TARGET_INVALID;
a6ecbf83 1891
a9fd4cd1 1892 (void) parse_configuration(saved_rlimit_nofile, saved_rlimit_memlock);
7eb35049
LP
1893
1894 set_manager_defaults(m);
986935cf 1895 set_manager_settings(m);
7eb35049 1896
61fbbac1 1897 update_cpu_affinity(false);
b070c7c0 1898 update_numa_policy(false);
61fbbac1 1899
a6ecbf83
FB
1900 if (saved_log_level >= 0)
1901 manager_override_log_level(m, saved_log_level);
bda7d78b
FB
1902 if (saved_log_target >= 0)
1903 manager_override_log_target(m, saved_log_target);
a6ecbf83 1904
7eb35049
LP
1905 r = manager_reload(m);
1906 if (r < 0)
7a35fa24
LP
1907 /* Reloading failed before the point of no return. Let's continue running as if nothing happened. */
1908 m->objective = MANAGER_OK;
7eb35049
LP
1909
1910 break;
a6ecbf83 1911 }
7eb35049
LP
1912
1913 case MANAGER_REEXECUTE:
1914
1915 r = prepare_reexecute(m, &arg_serialization, ret_fds, false);
1916 if (r < 0) {
1917 *ret_error_message = "Failed to prepare for reexecution";
1918 return r;
1919 }
1920
1921 log_notice("Reexecuting.");
1922
1923 *ret_reexecute = true;
1924 *ret_retval = EXIT_SUCCESS;
1925 *ret_shutdown_verb = NULL;
1926 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1927
1928 return 0;
1929
1930 case MANAGER_SWITCH_ROOT:
1931 if (!m->switch_root_init) {
1932 r = prepare_reexecute(m, &arg_serialization, ret_fds, true);
1933 if (r < 0) {
1934 *ret_error_message = "Failed to prepare for reexecution";
1935 return r;
1936 }
1937 } else
1938 *ret_fds = NULL;
1939
1940 log_notice("Switching root.");
1941
1942 *ret_reexecute = true;
1943 *ret_retval = EXIT_SUCCESS;
1944 *ret_shutdown_verb = NULL;
1945
1946 /* Steal the switch root parameters */
49052946
YW
1947 *ret_switch_root_dir = TAKE_PTR(m->switch_root);
1948 *ret_switch_root_init = TAKE_PTR(m->switch_root_init);
7eb35049
LP
1949
1950 return 0;
1951
1952 case MANAGER_EXIT:
1953
1954 if (MANAGER_IS_USER(m)) {
1955 log_debug("Exit.");
1956
1957 *ret_reexecute = false;
1958 *ret_retval = m->return_value;
1959 *ret_shutdown_verb = NULL;
1960 *ret_fds = NULL;
1961 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1962
1963 return 0;
1964 }
1965
1966 _fallthrough_;
1967 case MANAGER_REBOOT:
1968 case MANAGER_POWEROFF:
1969 case MANAGER_HALT:
1970 case MANAGER_KEXEC: {
af41e508
LP
1971 static const char * const table[_MANAGER_OBJECTIVE_MAX] = {
1972 [MANAGER_EXIT] = "exit",
1973 [MANAGER_REBOOT] = "reboot",
7eb35049 1974 [MANAGER_POWEROFF] = "poweroff",
af41e508
LP
1975 [MANAGER_HALT] = "halt",
1976 [MANAGER_KEXEC] = "kexec",
7eb35049
LP
1977 };
1978
1979 log_notice("Shutting down.");
1980
1981 *ret_reexecute = false;
1982 *ret_retval = m->return_value;
af41e508 1983 assert_se(*ret_shutdown_verb = table[m->objective]);
7eb35049
LP
1984 *ret_fds = NULL;
1985 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1986
1987 return 0;
1988 }
1989
1990 default:
af41e508 1991 assert_not_reached("Unknown or unexpected manager objective.");
7eb35049
LP
1992 }
1993 }
1994}
1995
31aef7ff
LP
1996static void log_execution_mode(bool *ret_first_boot) {
1997 assert(ret_first_boot);
1998
1999 if (arg_system) {
2000 int v;
2001
e7b18106 2002 log_info("systemd " GIT_VERSION " running in %ssystem mode (%s)",
91b79ba8
ZJS
2003 arg_action == ACTION_TEST ? "test " : "",
2004 systemd_features);
31aef7ff
LP
2005
2006 v = detect_virtualization();
2007 if (v > 0)
2008 log_info("Detected virtualization %s.", virtualization_to_string(v));
2009
2010 log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
2011
2012 if (in_initrd()) {
2013 *ret_first_boot = false;
2014 log_info("Running in initial RAM disk.");
2015 } else {
583cef3b
HS
2016 int r;
2017 _cleanup_free_ char *id_text = NULL;
2018
2019 /* Let's check whether we are in first boot. We use /etc/machine-id as flag file
2020 * for this: If it is missing or contains the value "uninitialized", this is the
2021 * first boot. In any other case, it is not. This allows container managers and
2022 * installers to provision a couple of files already. If the container manager
2023 * wants to provision the machine ID itself it should pass $container_uuid to PID 1. */
2024
2025 r = read_one_line_file("/etc/machine-id", &id_text);
2026 if (r < 0 || streq(id_text, "uninitialized")) {
2027 if (r < 0 && r != -ENOENT)
2028 log_warning_errno(r, "Unexpected error while reading /etc/machine-id, ignoring: %m");
2029
2030 *ret_first_boot = true;
2031 log_info("Detected first boot.");
2032 } else {
2033 *ret_first_boot = false;
2034 log_debug("Detected initialized system, this is not the first boot.");
2035 }
31aef7ff
LP
2036 }
2037 } else {
b9e90f3a 2038 if (DEBUG_LOGGING) {
c2b2df60 2039 _cleanup_free_ char *t = NULL;
31aef7ff 2040
b9e90f3a 2041 t = uid_to_name(getuid());
91b79ba8
ZJS
2042 log_debug("systemd " GIT_VERSION " running in %suser mode for user " UID_FMT "/%s. (%s)",
2043 arg_action == ACTION_TEST ? " test" : "",
2044 getuid(), strna(t), systemd_features);
b9e90f3a 2045 }
31aef7ff
LP
2046
2047 *ret_first_boot = false;
2048 }
2049}
2050
5afbaa36
LP
2051static int initialize_runtime(
2052 bool skip_setup,
3023f2fe 2053 bool first_boot,
5afbaa36
LP
2054 struct rlimit *saved_rlimit_nofile,
2055 struct rlimit *saved_rlimit_memlock,
2056 const char **ret_error_message) {
5afbaa36
LP
2057 int r;
2058
2059 assert(ret_error_message);
2060
2061 /* Sets up various runtime parameters. Many of these initializations are conditionalized:
2062 *
2063 * - Some only apply to --system instances
2064 * - Some only apply to --user instances
2065 * - Some only apply when we first start up, but not when we reexecute
2066 */
2067
2d776038
LP
2068 if (arg_action != ACTION_RUN)
2069 return 0;
2070
61fbbac1 2071 update_cpu_affinity(skip_setup);
b070c7c0 2072 update_numa_policy(skip_setup);
61fbbac1 2073
3c3c6cb9 2074 if (arg_system) {
5238e957 2075 /* Make sure we leave a core dump without panicking the kernel. */
3c3c6cb9 2076 install_crash_handler();
5afbaa36 2077
3c3c6cb9 2078 if (!skip_setup) {
143fadf3 2079 r = mount_cgroup_controllers();
3c3c6cb9
LP
2080 if (r < 0) {
2081 *ret_error_message = "Failed to mount cgroup hierarchies";
2082 return r;
2083 }
2084
2085 status_welcome();
b6fad306 2086 (void) hostname_setup(true);
3023f2fe
HS
2087 /* Force transient machine-id on first boot. */
2088 machine_id_setup(NULL, first_boot, arg_machine_id, NULL);
df883de9 2089 (void) loopback_setup();
3c3c6cb9 2090 bump_unix_max_dgram_qlen();
a8b627aa 2091 bump_file_max_and_nr_open();
3c3c6cb9
LP
2092 test_usr();
2093 write_container_id();
2094 }
8a2c1fbf 2095
3c3c6cb9
LP
2096 if (arg_watchdog_device) {
2097 r = watchdog_set_device(arg_watchdog_device);
2098 if (r < 0)
2099 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m", arg_watchdog_device);
2100 }
32429805
LP
2101 } else {
2102 _cleanup_free_ char *p = NULL;
2103
2104 /* Create the runtime directory and place the inaccessible device nodes there, if we run in
2105 * user mode. In system mode mount_setup() already did that. */
2106
2107 r = xdg_user_runtime_dir(&p, "/systemd");
2108 if (r < 0) {
2109 *ret_error_message = "$XDG_RUNTIME_DIR is not set";
2110 return log_emergency_errno(r, "Failed to determine $XDG_RUNTIME_DIR path: %m");
2111 }
2112
e813a74a 2113 (void) mkdir_p_label(p, 0755);
32429805 2114 (void) make_inaccessible_nodes(p, UID_INVALID, GID_INVALID);
3c3c6cb9 2115 }
5afbaa36
LP
2116
2117 if (arg_timer_slack_nsec != NSEC_INFINITY)
2118 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
3a671cd1 2119 log_warning_errno(errno, "Failed to adjust timer slack, ignoring: %m");
5afbaa36
LP
2120
2121 if (arg_system && !cap_test_all(arg_capability_bounding_set)) {
2122 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set);
2123 if (r < 0) {
2124 *ret_error_message = "Failed to drop capability bounding set of usermode helpers";
2125 return log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
2126 }
2127
2128 r = capability_bounding_set_drop(arg_capability_bounding_set, true);
2129 if (r < 0) {
2130 *ret_error_message = "Failed to drop capability bounding set";
2131 return log_emergency_errno(r, "Failed to drop capability bounding set: %m");
2132 }
2133 }
2134
39362f6f
JB
2135 if (arg_system && arg_no_new_privs) {
2136 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
2137 *ret_error_message = "Failed to disable new privileges";
2138 return log_emergency_errno(errno, "Failed to disable new privileges: %m");
2139 }
2140 }
2141
5afbaa36
LP
2142 if (arg_syscall_archs) {
2143 r = enforce_syscall_archs(arg_syscall_archs);
2144 if (r < 0) {
2145 *ret_error_message = "Failed to set syscall architectures";
2146 return r;
2147 }
2148 }
2149
2150 if (!arg_system)
2151 /* Become reaper of our children */
2152 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
2153 log_warning_errno(errno, "Failed to make us a subreaper: %m");
2154
a17c1712
LP
2155 /* Bump up RLIMIT_NOFILE for systemd itself */
2156 (void) bump_rlimit_nofile(saved_rlimit_nofile);
2157 (void) bump_rlimit_memlock(saved_rlimit_memlock);
5afbaa36
LP
2158
2159 return 0;
2160}
2161
6acca5fc
LP
2162static int do_queue_default_job(
2163 Manager *m,
2164 const char **ret_error_message) {
2165
2166 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
f1d075dc
ZJS
2167 const char *unit;
2168 Job *job;
2169 Unit *target;
6acca5fc
LP
2170 int r;
2171
8755dbad 2172 if (arg_default_unit)
f1d075dc 2173 unit = arg_default_unit;
8755dbad 2174 else if (in_initrd())
f1d075dc 2175 unit = SPECIAL_INITRD_TARGET;
8755dbad 2176 else
f1d075dc 2177 unit = SPECIAL_DEFAULT_TARGET;
8755dbad 2178
f1d075dc 2179 log_debug("Activating default unit: %s", unit);
8755dbad 2180
f1d075dc 2181 r = manager_load_startable_unit_or_warn(m, unit, NULL, &target);
8755dbad
ZJS
2182 if (r < 0 && in_initrd() && !arg_default_unit) {
2183 /* Fall back to default.target, which we used to always use by default. Only do this if no
2184 * explicit configuration was given. */
2185
2186 log_info("Falling back to " SPECIAL_DEFAULT_TARGET ".");
6acca5fc 2187
8755dbad
ZJS
2188 r = manager_load_startable_unit_or_warn(m, SPECIAL_DEFAULT_TARGET, NULL, &target);
2189 }
4109ede7 2190 if (r < 0) {
8755dbad 2191 log_info("Falling back to " SPECIAL_RESCUE_TARGET ".");
6acca5fc 2192
4109ede7 2193 r = manager_load_startable_unit_or_warn(m, SPECIAL_RESCUE_TARGET, NULL, &target);
6acca5fc 2194 if (r < 0) {
8755dbad
ZJS
2195 *ret_error_message = r == -ERFKILL ? SPECIAL_RESCUE_TARGET " masked"
2196 : "Failed to load " SPECIAL_RESCUE_TARGET;
4109ede7 2197 return r;
6acca5fc
LP
2198 }
2199 }
2200
2201 assert(target->load_state == UNIT_LOADED);
2202
f1d075dc 2203 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, NULL, &error, &job);
6acca5fc
LP
2204 if (r == -EPERM) {
2205 log_debug_errno(r, "Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
2206
2207 sd_bus_error_free(&error);
2208
f1d075dc 2209 r = manager_add_job(m, JOB_START, target, JOB_REPLACE, NULL, &error, &job);
6acca5fc
LP
2210 if (r < 0) {
2211 *ret_error_message = "Failed to start default target";
2212 return log_emergency_errno(r, "Failed to start default target: %s", bus_error_message(&error, r));
2213 }
2214
2215 } else if (r < 0) {
2216 *ret_error_message = "Failed to isolate default target";
2217 return log_emergency_errno(r, "Failed to isolate default target: %s", bus_error_message(&error, r));
c86c31d9
ZJS
2218 } else
2219 log_info("Queued %s job for default target %s.",
2220 job_type_to_string(job->type),
2221 unit_status_string(job->unit));
6acca5fc 2222
f1d075dc 2223 m->default_unit_job_id = job->id;
6acca5fc
LP
2224
2225 return 0;
2226}
2227
a9fd4cd1
FB
2228static void save_rlimits(struct rlimit *saved_rlimit_nofile,
2229 struct rlimit *saved_rlimit_memlock) {
2230
2231 assert(saved_rlimit_nofile);
2232 assert(saved_rlimit_memlock);
2233
2234 if (getrlimit(RLIMIT_NOFILE, saved_rlimit_nofile) < 0)
2235 log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
2236
2237 if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock) < 0)
2238 log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
2239}
2240
2241static void fallback_rlimit_nofile(const struct rlimit *saved_rlimit_nofile) {
2242 struct rlimit *rl;
2243
2244 if (arg_default_rlimit[RLIMIT_NOFILE])
2245 return;
2246
2247 /* Make sure forked processes get limits based on the original kernel setting */
2248
2249 rl = newdup(struct rlimit, saved_rlimit_nofile, 1);
2250 if (!rl) {
2251 log_oom();
2252 return;
2253 }
2254
2255 /* Bump the hard limit for system services to a substantially higher value. The default
2256 * hard limit current kernels set is pretty low (4K), mostly for historical
2257 * reasons. According to kernel developers, the fd handling in recent kernels has been
2258 * optimized substantially enough, so that we can bump the limit now, without paying too
2259 * high a price in memory or performance. Note however that we only bump the hard limit,
2260 * not the soft limit. That's because select() works the way it works, and chokes on fds
2261 * >= 1024. If we'd bump the soft limit globally, it might accidentally happen to
2262 * unexpecting programs that they get fds higher than what they can process using
2263 * select(). By only bumping the hard limit but leaving the low limit as it is we avoid
2264 * this pitfall: programs that are written by folks aware of the select() problem in mind
2265 * (and thus use poll()/epoll instead of select(), the way everybody should) can
2266 * explicitly opt into high fds by bumping their soft limit beyond 1024, to the hard limit
2267 * we pass. */
2268 if (arg_system) {
2269 int nr;
2270
2271 /* Get the underlying absolute limit the kernel enforces */
2272 nr = read_nr_open();
2273
2274 rl->rlim_max = MIN((rlim_t) nr, MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE));
2275 }
2276
2277 /* If for some reason we were invoked with a soft limit above 1024 (which should never
2278 * happen!, but who knows what we get passed in from pam_limit when invoked as --user
2279 * instance), then lower what we pass on to not confuse our children */
2280 rl->rlim_cur = MIN(rl->rlim_cur, (rlim_t) FD_SETSIZE);
2281
2282 arg_default_rlimit[RLIMIT_NOFILE] = rl;
2283}
2284
2285static void fallback_rlimit_memlock(const struct rlimit *saved_rlimit_memlock) {
2286 struct rlimit *rl;
2287
2288 /* Pass the original value down to invoked processes */
2289
2290 if (arg_default_rlimit[RLIMIT_MEMLOCK])
2291 return;
2292
2293 rl = newdup(struct rlimit, saved_rlimit_memlock, 1);
2294 if (!rl) {
2295 log_oom();
2296 return;
2297 }
2298
2299 arg_default_rlimit[RLIMIT_MEMLOCK] = rl;
2300}
2301
d55ed7de
ZJS
2302static void setenv_manager_environment(void) {
2303 char **p;
2304 int r;
2305
2306 STRV_FOREACH(p, arg_manager_environment) {
2307 log_debug("Setting '%s' in our own environment.", *p);
2308
2309 r = putenv_dup(*p, true);
2310 if (r < 0)
2311 log_warning_errno(errno, "Failed to setenv \"%s\", ignoring: %m", *p);
2312 }
2313}
2314
fb39af4c
ZJS
2315static void reset_arguments(void) {
2316 /* Frees/resets arg_* variables, with a few exceptions commented below. */
970777b5
LP
2317
2318 arg_default_unit = mfree(arg_default_unit);
fb39af4c
ZJS
2319
2320 /* arg_system — ignore */
2321
2322 arg_dump_core = true;
2323 arg_crash_chvt = -1;
2324 arg_crash_shell = false;
2325 arg_crash_reboot = false;
970777b5 2326 arg_confirm_spawn = mfree(arg_confirm_spawn);
fb39af4c 2327 arg_show_status = _SHOW_STATUS_INVALID;
36cf4507 2328 arg_status_unit_format = STATUS_UNIT_FORMAT_DEFAULT;
fb39af4c
ZJS
2329 arg_switched_root = false;
2330 arg_pager_flags = 0;
2331 arg_service_watchdogs = true;
2332 arg_default_std_output = EXEC_OUTPUT_JOURNAL;
2333 arg_default_std_error = EXEC_OUTPUT_INHERIT;
2334 arg_default_restart_usec = DEFAULT_RESTART_USEC;
2335 arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
2336 arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
2337 arg_default_timeout_abort_usec = DEFAULT_TIMEOUT_USEC;
2338 arg_default_timeout_abort_set = false;
2339 arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
2340 arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
2341 arg_runtime_watchdog = 0;
65224c1d 2342 arg_reboot_watchdog = 10 * USEC_PER_MINUTE;
acafd7d8 2343 arg_kexec_watchdog = 0;
fb39af4c
ZJS
2344 arg_early_core_pattern = NULL;
2345 arg_watchdog_device = NULL;
2346
970777b5 2347 arg_default_environment = strv_free(arg_default_environment);
d55ed7de 2348 arg_manager_environment = strv_free(arg_manager_environment);
fb39af4c
ZJS
2349 rlimit_free_all(arg_default_rlimit);
2350
2351 arg_capability_bounding_set = CAP_ALL;
2352 arg_no_new_privs = false;
2353 arg_timer_slack_nsec = NSEC_INFINITY;
2354 arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
2355
970777b5 2356 arg_syscall_archs = set_free(arg_syscall_archs);
61fbbac1 2357
fb39af4c
ZJS
2358 /* arg_serialization — ignore */
2359
2360 arg_default_cpu_accounting = -1;
2361 arg_default_io_accounting = false;
2362 arg_default_ip_accounting = false;
2363 arg_default_blockio_accounting = false;
2364 arg_default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
2365 arg_default_tasks_accounting = true;
3a0f06c4 2366 arg_default_tasks_max = DEFAULT_TASKS_MAX;
fb39af4c
ZJS
2367 arg_machine_id = (sd_id128_t) {};
2368 arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
2369 arg_default_oom_policy = OOM_STOP;
2370
61fbbac1 2371 cpu_set_reset(&arg_cpu_affinity);
b070c7c0 2372 numa_policy_reset(&arg_numa_policy);
d247f232
LP
2373
2374 arg_random_seed = mfree(arg_random_seed);
2375 arg_random_seed_size = 0;
33d943d1 2376 arg_clock_usec = 0;
970777b5
LP
2377}
2378
a9fd4cd1
FB
2379static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
2380 const struct rlimit *saved_rlimit_memlock) {
97d1fb94
LP
2381 int r;
2382
a9fd4cd1
FB
2383 assert(saved_rlimit_nofile);
2384 assert(saved_rlimit_memlock);
2385
fb39af4c
ZJS
2386 /* Assign configuration defaults */
2387 reset_arguments();
2388
97d1fb94 2389 r = parse_config_file();
470a5e6d
ZJS
2390 if (r < 0)
2391 log_warning_errno(r, "Failed to parse config file, ignoring: %m");
97d1fb94
LP
2392
2393 if (arg_system) {
2394 r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
2395 if (r < 0)
2396 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
2397 }
2398
a9fd4cd1
FB
2399 /* Initialize some default rlimits for services if they haven't been configured */
2400 fallback_rlimit_nofile(saved_rlimit_nofile);
2401 fallback_rlimit_memlock(saved_rlimit_memlock);
2402
97d1fb94
LP
2403 /* Note that this also parses bits from the kernel command line, including "debug". */
2404 log_parse_environment();
2405
db33214b 2406 /* Initialize the show status setting if it hasn't been set explicitly yet */
7a293242 2407 if (arg_show_status == _SHOW_STATUS_INVALID)
db33214b
LP
2408 arg_show_status = SHOW_STATUS_YES;
2409
d55ed7de
ZJS
2410 /* Push variables into the manager environment block */
2411 setenv_manager_environment();
2412
97d1fb94
LP
2413 return 0;
2414}
2415
b0d7c989
LP
2416static int safety_checks(void) {
2417
febf46a4 2418 if (getpid_cached() == 1 &&
baaa35ad
ZJS
2419 arg_action != ACTION_RUN)
2420 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2421 "Unsupported execution mode while PID 1.");
febf46a4
LP
2422
2423 if (getpid_cached() == 1 &&
baaa35ad
ZJS
2424 !arg_system)
2425 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2426 "Can't run --user mode as PID 1.");
febf46a4
LP
2427
2428 if (arg_action == ACTION_RUN &&
2429 arg_system &&
baaa35ad
ZJS
2430 getpid_cached() != 1)
2431 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2432 "Can't run system mode unless PID 1.");
febf46a4 2433
b0d7c989 2434 if (arg_action == ACTION_TEST &&
baaa35ad
ZJS
2435 geteuid() == 0)
2436 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2437 "Don't run test mode as root.");
b0d7c989
LP
2438
2439 if (!arg_system &&
2440 arg_action == ACTION_RUN &&
baaa35ad
ZJS
2441 sd_booted() <= 0)
2442 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
2443 "Trying to run as user instance, but the system has not been booted with systemd.");
b0d7c989
LP
2444
2445 if (!arg_system &&
2446 arg_action == ACTION_RUN &&
baaa35ad
ZJS
2447 !getenv("XDG_RUNTIME_DIR"))
2448 return log_error_errno(SYNTHETIC_ERRNO(EUNATCH),
2449 "Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
b0d7c989
LP
2450
2451 if (arg_system &&
2452 arg_action == ACTION_RUN &&
baaa35ad
ZJS
2453 running_in_chroot() > 0)
2454 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
2455 "Cannot be run in a chroot() environment.");
b0d7c989
LP
2456
2457 return 0;
2458}
2459
74da609f
LP
2460static int initialize_security(
2461 bool *loaded_policy,
2462 dual_timestamp *security_start_timestamp,
2463 dual_timestamp *security_finish_timestamp,
2464 const char **ret_error_message) {
2465
2466 int r;
2467
2468 assert(loaded_policy);
2469 assert(security_start_timestamp);
2470 assert(security_finish_timestamp);
2471 assert(ret_error_message);
2472
2473 dual_timestamp_get(security_start_timestamp);
2474
97149f40 2475 r = mac_selinux_setup(loaded_policy);
74da609f
LP
2476 if (r < 0) {
2477 *ret_error_message = "Failed to load SELinux policy";
2478 return r;
2479 }
2480
2481 r = mac_smack_setup(loaded_policy);
2482 if (r < 0) {
2483 *ret_error_message = "Failed to load SMACK policy";
2484 return r;
2485 }
2486
2ffadd3c
Y
2487 r = mac_apparmor_setup();
2488 if (r < 0) {
2489 *ret_error_message = "Failed to load AppArmor policy";
2490 return r;
2491 }
2492
74da609f
LP
2493 r = ima_setup();
2494 if (r < 0) {
2495 *ret_error_message = "Failed to load IMA policy";
2496 return r;
2497 }
2498
2499 dual_timestamp_get(security_finish_timestamp);
2500 return 0;
2501}
2502
263162da
LP
2503static void test_summary(Manager *m) {
2504 assert(m);
2505
2506 printf("-> By units:\n");
2507 manager_dump_units(m, stdout, "\t");
2508
2509 printf("-> By jobs:\n");
2510 manager_dump_jobs(m, stdout, "\t");
2511}
2512
efeb853f
LP
2513static int collect_fds(FDSet **ret_fds, const char **ret_error_message) {
2514 int r;
2515
2516 assert(ret_fds);
2517 assert(ret_error_message);
2518
2519 r = fdset_new_fill(ret_fds);
2520 if (r < 0) {
2521 *ret_error_message = "Failed to allocate fd set";
2522 return log_emergency_errno(r, "Failed to allocate fd set: %m");
2523 }
2524
2525 fdset_cloexec(*ret_fds, true);
2526
2527 if (arg_serialization)
2528 assert_se(fdset_remove(*ret_fds, fileno(arg_serialization)) >= 0);
2529
2530 return 0;
2531}
2532
2e51b31c
LP
2533static void setup_console_terminal(bool skip_setup) {
2534
2535 if (!arg_system)
2536 return;
2537
2538 /* Become a session leader if we aren't one yet. */
2539 (void) setsid();
2540
2541 /* If we are init, we connect stdin/stdout/stderr to /dev/null and make sure we don't have a controlling
2542 * tty. */
2543 (void) release_terminal();
2544
2545 /* Reset the console, but only if this is really init and we are freshly booted */
2546 if (getpid_cached() == 1 && !skip_setup)
2547 (void) console_setup();
2548}
2549
aa40ff07
LP
2550static bool early_skip_setup_check(int argc, char *argv[]) {
2551 bool found_deserialize = false;
aa40ff07
LP
2552
2553 /* Determine if this is a reexecution or normal bootup. We do the full command line parsing much later, so
2554 * let's just have a quick peek here. Note that if we have switched root, do all the special setup things
2555 * anyway, even if in that case we also do deserialization. */
2556
431733b8 2557 for (int i = 1; i < argc; i++)
aa40ff07
LP
2558 if (streq(argv[i], "--switched-root"))
2559 return false; /* If we switched root, don't skip the setup. */
2560 else if (streq(argv[i], "--deserialize"))
2561 found_deserialize = true;
aa40ff07
LP
2562
2563 return found_deserialize; /* When we are deserializing, then we are reexecuting, hence avoid the extensive setup */
2564}
2565
0e06a031
LP
2566static int save_env(void) {
2567 char **l;
2568
2569 l = strv_copy(environ);
2570 if (!l)
2571 return -ENOMEM;
2572
2573 strv_free_and_replace(saved_env, l);
2574 return 0;
2575}
2576
60918275 2577int main(int argc, char *argv[]) {
625e8690
LP
2578
2579 dual_timestamp initrd_timestamp = DUAL_TIMESTAMP_NULL, userspace_timestamp = DUAL_TIMESTAMP_NULL, kernel_timestamp = DUAL_TIMESTAMP_NULL,
2580 security_start_timestamp = DUAL_TIMESTAMP_NULL, security_finish_timestamp = DUAL_TIMESTAMP_NULL;
ddfa8b0b
LP
2581 struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0),
2582 saved_rlimit_memlock = RLIMIT_MAKE_CONST(RLIM_INFINITY); /* The original rlimits we passed
2583 * in. Note we use different values
2584 * for the two that indicate whether
2585 * these fields are initialized! */
625e8690
LP
2586 bool skip_setup, loaded_policy = false, queue_default_job = false, first_boot = false, reexecute = false;
2587 char *switch_root_dir = NULL, *switch_root_init = NULL;
9d76d730 2588 usec_t before_startup, after_startup;
625e8690 2589 static char systemd[] = "systemd";
9d76d730 2590 char timespan[FORMAT_TIMESPAN_MAX];
625e8690
LP
2591 const char *shutdown_verb = NULL, *error_message = NULL;
2592 int r, retval = EXIT_FAILURE;
2593 Manager *m = NULL;
a16e1123 2594 FDSet *fds = NULL;
27b14a22 2595
d72a8f10 2596 /* SysV compatibility: redirect init → telinit */
6808a0bc 2597 redirect_telinit(argc, argv);
2cb1a60d 2598
d72a8f10 2599 /* Take timestamps early on */
c3a170f3
HH
2600 dual_timestamp_from_monotonic(&kernel_timestamp, 0);
2601 dual_timestamp_get(&userspace_timestamp);
2602
d72a8f10
LP
2603 /* Figure out whether we need to do initialize the system, or if we already did that because we are
2604 * reexecuting */
aa40ff07 2605 skip_setup = early_skip_setup_check(argc, argv);
d03bc1b8 2606
d72a8f10
LP
2607 /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent reexecution we
2608 * are then called 'systemd'. That is confusing, hence let's call us systemd right-away. */
f3b6a3ed 2609 program_invocation_short_name = systemd;
eee8b7ab 2610 (void) prctl(PR_SET_NAME, systemd);
5d6b1584 2611
d72a8f10 2612 /* Save the original command line */
36fea155 2613 save_argc_argv(argc, argv);
f3b6a3ed 2614
0e06a031
LP
2615 /* Save the original environment as we might need to restore it if we're requested to execute another
2616 * system manager later. */
2617 r = save_env();
2618 if (r < 0) {
2619 error_message = "Failed to copy environment block";
2620 goto finish;
2621 }
a5cede8c 2622
6fdb8de4 2623 /* Make sure that if the user says "syslog" we actually log to the journal. */
c1dc6153 2624 log_set_upgrade_syslog_to_journal(true);
bbe63281 2625
df0ff127 2626 if (getpid_cached() == 1) {
b5752d23
LP
2627 /* When we run as PID 1 force system mode */
2628 arg_system = true;
2629
48a601fe 2630 /* Disable the umask logic */
90dc8c2e
MG
2631 umask(0);
2632
92890452
LP
2633 /* Make sure that at least initially we do not ever log to journald/syslogd, because it might not be
2634 * activated yet (even though the log socket for it exists). */
d075092f
LP
2635 log_set_prohibit_ipc(true);
2636
48a601fe
LP
2637 /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This is
2638 * important so that we never end up logging to any foreign stderr, for example if we have to log in a
2639 * child process right before execve()'ing the actual binary, at a point in time where socket
2640 * activation stderr/stdout area already set up. */
2641 log_set_always_reopen_console(true);
48a601fe 2642
92890452 2643 if (detect_container() <= 0) {
4f8d551f 2644
92890452 2645 /* Running outside of a container as PID 1 */
92890452
LP
2646 log_set_target(LOG_TARGET_KMSG);
2647 log_open();
a866073d 2648
92890452
LP
2649 if (in_initrd())
2650 initrd_timestamp = userspace_timestamp;
c3ba6250 2651
92890452
LP
2652 if (!skip_setup) {
2653 r = mount_setup_early();
2654 if (r < 0) {
2655 error_message = "Failed to mount early API filesystems";
2656 goto finish;
2657 }
2658
0a2eef1e
LP
2659 /* Let's open the log backend a second time, in case the first time didn't
2660 * work. Quite possibly we have mounted /dev just now, so /dev/kmsg became
2661 * available, and it previously wasn't. */
2662 log_open();
2663
6123dfaa
ZJS
2664 disable_printk_ratelimit();
2665
92890452
LP
2666 r = initialize_security(
2667 &loaded_policy,
2668 &security_start_timestamp,
2669 &security_finish_timestamp,
2670 &error_message);
2671 if (r < 0)
2672 goto finish;
d723cd65 2673 }
eee8b7ab 2674
92890452 2675 if (mac_selinux_init() < 0) {
a9ba0e32 2676 error_message = "Failed to initialize SELinux support";
96694e99 2677 goto finish;
92890452 2678 }
0b3325e7 2679
92890452
LP
2680 if (!skip_setup)
2681 initialize_clock();
2682
2683 /* Set the default for later on, but don't actually open the logs like this for now. Note that
2684 * if we are transitioning from the initrd there might still be journal fd open, and we
2685 * shouldn't attempt opening that before we parsed /proc/cmdline which might redirect output
2686 * elsewhere. */
2687 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
2688
2689 } else {
2690 /* Running inside a container, as PID 1 */
92890452
LP
2691 log_set_target(LOG_TARGET_CONSOLE);
2692 log_open();
2693
2694 /* For later on, see above... */
2695 log_set_target(LOG_TARGET_JOURNAL);
2696
45250e66 2697 /* clear the kernel timestamp, because we are in a container */
92890452 2698 kernel_timestamp = DUAL_TIMESTAMP_NULL;
cb6531be 2699 }
7948c4df 2700
92890452 2701 initialize_coredump(skip_setup);
a866073d 2702
92890452
LP
2703 r = fixup_environment();
2704 if (r < 0) {
2705 log_emergency_errno(r, "Failed to fix up PID 1 environment: %m");
2706 error_message = "Failed to fix up PID1 environment";
2707 goto finish;
2708 }
a866073d 2709
92890452
LP
2710 /* Try to figure out if we can use colors with the console. No need to do that for user instances since
2711 * they never log into the console. */
3a18b604 2712 log_show_color(colors_enabled());
92890452 2713
c76cf844
AK
2714 r = make_null_stdio();
2715 if (r < 0)
92890452 2716 log_warning_errno(r, "Failed to redirect standard streams to /dev/null, ignoring: %m");
f84f9974 2717
a132bef0 2718 /* Load the kernel modules early. */
2e75e2a8
DM
2719 if (!skip_setup)
2720 kmod_setup();
2e75e2a8 2721
3196e423 2722 /* Mount /proc, /sys and friends, so that /proc/cmdline and /proc/$PID/fd is available. */
f74349d8 2723 r = mount_setup(loaded_policy, skip_setup);
cb6531be
ZJS
2724 if (r < 0) {
2725 error_message = "Failed to mount API filesystems";
8efe3c01 2726 goto finish;
cb6531be 2727 }
c18ecf03
LP
2728
2729 /* The efivarfs is now mounted, let's read the random seed off it */
2730 (void) efi_take_random_seed();
209b2592
FB
2731
2732 /* Cache command-line options passed from EFI variables */
2733 if (!skip_setup)
2734 (void) cache_efi_options_variable();
3196e423
LP
2735 } else {
2736 /* Running as user instance */
2737 arg_system = false;
2738 log_set_target(LOG_TARGET_AUTO);
2739 log_open();
2740
2741 /* clear the kernel timestamp, because we are not PID 1 */
2742 kernel_timestamp = DUAL_TIMESTAMP_NULL;
2743
2744 if (mac_selinux_init() < 0) {
2745 error_message = "Failed to initialize SELinux support";
2746 goto finish;
2747 }
0c85a4f3 2748 }
4ade7963 2749
a9fd4cd1
FB
2750 /* Save the original RLIMIT_NOFILE/RLIMIT_MEMLOCK so that we can reset it later when
2751 * transitioning from the initrd to the main systemd or suchlike. */
2752 save_rlimits(&saved_rlimit_nofile, &saved_rlimit_memlock);
2753
4ade7963 2754 /* Reset all signal handlers. */
ce30c8dc 2755 (void) reset_all_signal_handlers();
9c274488 2756 (void) ignore_signals(SIGNALS_IGNORE);
078e4539 2757
ffe5c01e
FB
2758 (void) parse_configuration(&saved_rlimit_nofile, &saved_rlimit_memlock);
2759
2760 r = parse_argv(argc, argv);
2761 if (r < 0) {
2762 error_message = "Failed to parse commandline arguments";
f170852a 2763 goto finish;
ffe5c01e 2764 }
10c961b9 2765
b0d7c989
LP
2766 r = safety_checks();
2767 if (r < 0)
fe783b03 2768 goto finish;
fe783b03 2769
5c08257b 2770 if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS, ACTION_DUMP_BUS_PROPERTIES, ACTION_BUS_INTROSPECT))
0221d68a 2771 (void) pager_open(arg_pager_flags);
b0d7c989
LP
2772
2773 if (arg_action != ACTION_RUN)
74e7579c 2774 skip_setup = true;
b87c2aa6 2775
fa0f4d8a 2776 if (arg_action == ACTION_HELP) {
37ec0fdd 2777 retval = help() < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
f170852a 2778 goto finish;
9ba0bc4e
ZJS
2779 } else if (arg_action == ACTION_VERSION) {
2780 retval = version();
2781 goto finish;
fa0f4d8a 2782 } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
e537352b 2783 unit_dump_config_items(stdout);
22f4096c 2784 retval = EXIT_SUCCESS;
e537352b 2785 goto finish;
bbc1acab
YW
2786 } else if (arg_action == ACTION_DUMP_BUS_PROPERTIES) {
2787 dump_bus_properties(stdout);
2788 retval = EXIT_SUCCESS;
2789 goto finish;
5c08257b
ZJS
2790 } else if (arg_action == ACTION_BUS_INTROSPECT) {
2791 r = bus_manager_introspect_implementations(stdout, arg_bus_introspect);
2792 retval = r >= 0 ? EXIT_SUCCESS : EXIT_FAILURE;
2793 goto finish;
f170852a
LP
2794 }
2795
4c701096 2796 assert_se(IN_SET(arg_action, ACTION_RUN, ACTION_TEST));
f170852a 2797
5a2e0c62
LP
2798 /* Move out of the way, so that we won't block unmounts */
2799 assert_se(chdir("/") == 0);
2800
dea374e8 2801 if (arg_action == ACTION_RUN) {
d247f232
LP
2802 if (!skip_setup) {
2803 /* Apply the systemd.clock_usec= kernel command line switch */
45250e66 2804 apply_clock_update();
a70c72a0 2805
d247f232
LP
2806 /* Apply random seed from kernel command line */
2807 cmdline_take_random_seed();
2808 }
2809
c6885f5f
FB
2810 /* A core pattern might have been specified via the cmdline. */
2811 initialize_core_pattern(skip_setup);
2812
efeb853f 2813 /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
a70c72a0
LP
2814 log_close();
2815
2816 /* Remember open file descriptors for later deserialization */
efeb853f
LP
2817 r = collect_fds(&fds, &error_message);
2818 if (r < 0)
dea374e8 2819 goto finish;
a16e1123 2820
2e51b31c
LP
2821 /* Give up any control of the console, but make sure its initialized. */
2822 setup_console_terminal(skip_setup);
56d96fc0 2823
a70c72a0
LP
2824 /* Open the logging devices, if possible and necessary */
2825 log_open();
56d96fc0 2826 }
4ade7963 2827
31aef7ff 2828 log_execution_mode(&first_boot);
a5dab5ce 2829
2d776038 2830 r = initialize_runtime(skip_setup,
3023f2fe 2831 first_boot,
2d776038
LP
2832 &saved_rlimit_nofile,
2833 &saved_rlimit_memlock,
2834 &error_message);
2835 if (r < 0)
2836 goto finish;
4096d6f5 2837
e0a3da1f
ZJS
2838 r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
2839 arg_action == ACTION_TEST ? MANAGER_TEST_FULL : 0,
2840 &m);
e96d6be7 2841 if (r < 0) {
da927ba9 2842 log_emergency_errno(r, "Failed to allocate manager object: %m");
cb6531be 2843 error_message = "Failed to allocate manager object";
60918275
LP
2844 goto finish;
2845 }
2846
9f9f0342
LP
2847 m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;
2848 m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;
2849 m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;
d4ee7bd8
YW
2850 m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_START)] = security_start_timestamp;
2851 m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_FINISH)] = security_finish_timestamp;
9e58ff9c 2852
85cb4151 2853 set_manager_defaults(m);
7b46fc6a 2854 set_manager_settings(m);
fd130612 2855 manager_set_first_boot(m, first_boot);
27d340c7 2856
bf4df7c3 2857 /* Remember whether we should queue the default job */
d3b1c508 2858 queue_default_job = !arg_serialization || arg_switched_root;
bf4df7c3 2859
9d76d730
LP
2860 before_startup = now(CLOCK_MONOTONIC);
2861
d3b1c508 2862 r = manager_startup(m, arg_serialization, fds);
58f88d92 2863 if (r < 0) {
cefb3eda 2864 error_message = "Failed to start up manager";
58f88d92
ZJS
2865 goto finish;
2866 }
a16e1123 2867
6acca5fc 2868 /* This will close all file descriptors that were opened, but not claimed by any unit. */
2feceb5e 2869 fds = fdset_free(fds);
74ca738f 2870 arg_serialization = safe_fclose(arg_serialization);
bf4df7c3
LP
2871
2872 if (queue_default_job) {
6acca5fc 2873 r = do_queue_default_job(m, &error_message);
718db961 2874 if (r < 0)
37d88da7 2875 goto finish;
6acca5fc 2876 }
ab17a050 2877
6acca5fc 2878 after_startup = now(CLOCK_MONOTONIC);
60918275 2879
6acca5fc
LP
2880 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
2881 "Loaded units and determined initial transaction in %s.",
2882 format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 100 * USEC_PER_MSEC));
07672f49 2883
6acca5fc 2884 if (arg_action == ACTION_TEST) {
263162da 2885 test_summary(m);
6acca5fc
LP
2886 retval = EXIT_SUCCESS;
2887 goto finish;
e965d56d 2888 }
d46de8a1 2889
3046b6db 2890 (void) invoke_main_loop(m,
a9fd4cd1
FB
2891 &saved_rlimit_nofile,
2892 &saved_rlimit_memlock,
3046b6db
LP
2893 &reexecute,
2894 &retval,
2895 &shutdown_verb,
2896 &fds,
2897 &switch_root_dir,
2898 &switch_root_init,
2899 &error_message);
f170852a 2900
60918275 2901finish:
b87c2aa6
ZJS
2902 pager_close();
2903
92890452 2904 if (m) {
986935cf
FB
2905 arg_reboot_watchdog = manager_get_watchdog(m, WATCHDOG_REBOOT);
2906 arg_kexec_watchdog = manager_get_watchdog(m, WATCHDOG_KEXEC);
92890452
LP
2907 m = manager_free(m);
2908 }
60918275 2909
cc56fafe 2910 mac_selinux_finish();
b2bb3dbe 2911
3c7878f9
LP
2912 if (reexecute)
2913 do_reexecute(argc, argv,
2914 &saved_rlimit_nofile,
2915 &saved_rlimit_memlock,
2916 fds,
2917 switch_root_dir,
2918 switch_root_init,
2919 &error_message); /* This only returns if reexecution failed */
a16e1123 2920
74ca738f 2921 arg_serialization = safe_fclose(arg_serialization);
2feceb5e 2922 fds = fdset_free(fds);
a16e1123 2923
0e06a031
LP
2924 saved_env = strv_free(saved_env);
2925
349cc4a5 2926#if HAVE_VALGRIND_VALGRIND_H
54b434b1
LP
2927 /* If we are PID 1 and running under valgrind, then let's exit
2928 * here explicitly. valgrind will only generate nice output on
2929 * exit(), not on exec(), hence let's do the former not the
2930 * latter here. */
8a2c1fbf
EJ
2931 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
2932 /* Cleanup watchdog_device strings for valgrind. We need them
2933 * in become_shutdown() so normally we cannot free them yet. */
2934 watchdog_free_device();
2935 arg_watchdog_device = mfree(arg_watchdog_device);
7d9eea2b 2936 reset_arguments();
27fe58b7 2937 return retval;
8a2c1fbf 2938 }
54b434b1
LP
2939#endif
2940
7e11a95e
EV
2941#if HAS_FEATURE_ADDRESS_SANITIZER
2942 __lsan_do_leak_check();
2943#endif
2944
b9080b03 2945 if (shutdown_verb) {
7eb35049 2946 r = become_shutdown(shutdown_verb, retval);
4a36297c 2947 log_error_errno(r, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
9b9881d7 2948 error_message = "Failed to execute shutdown binary";
b9080b03
FF
2949 }
2950
8a2c1fbf
EJ
2951 watchdog_free_device();
2952 arg_watchdog_device = mfree(arg_watchdog_device);
2953
df0ff127 2954 if (getpid_cached() == 1) {
cb6531be
ZJS
2955 if (error_message)
2956 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
1fc464f6 2957 ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL,
bb259772
LP
2958 "%s.", error_message);
2959 freeze_or_exit_or_reboot();
cb6531be 2960 }
c3b3c274 2961
7d9eea2b 2962 reset_arguments();
60918275
LP
2963 return retval;
2964}