]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/main.c
manager: print status text of the service when waiting for a job
[thirdparty/systemd.git] / src / core / main.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
a7334b09 2
60918275 3#include <errno.h>
3dfc9763 4#include <fcntl.h>
f170852a 5#include <getopt.h>
664f88a7 6#include <sys/mount.h>
3dfc9763 7#include <sys/prctl.h>
b9e74c39 8#include <sys/reboot.h>
3dfc9763 9#include <unistd.h>
349cc4a5 10#if HAVE_SECCOMP
b64a3d86
LP
11#include <seccomp.h>
12#endif
349cc4a5 13#if HAVE_VALGRIND_VALGRIND_H
3dfc9763
LP
14#include <valgrind/valgrind.h>
15#endif
54b434b1 16
718db961 17#include "sd-bus.h"
cf0fbc49 18#include "sd-daemon.h"
b2e7486c 19#include "sd-messages.h"
3dfc9763 20
b5efdb8a 21#include "alloc-util.h"
2ffadd3c 22#include "apparmor-setup.h"
d9d93745 23#include "architecture.h"
3dfc9763
LP
24#include "build.h"
25#include "bus-error.h"
26#include "bus-util.h"
430f0182 27#include "capability-util.h"
a88c5b8a 28#include "cgroup-util.h"
24efb112 29#include "clock-util.h"
3dfc9763 30#include "conf-parser.h"
618234a5 31#include "cpu-set-util.h"
3dfc9763 32#include "dbus-manager.h"
c18ecf03 33#include "dbus.h"
3dfc9763 34#include "def.h"
32429805 35#include "dev-setup.h"
c18ecf03 36#include "efi-random.h"
209b2592 37#include "efivars.h"
eee8b7ab 38#include "emergency-action.h"
3dfc9763 39#include "env-util.h"
57b7a260 40#include "exit-status.h"
3ffd4af2 41#include "fd-util.h"
3dfc9763 42#include "fdset.h"
718db961 43#include "fileio.h"
f97b34a6 44#include "format-util.h"
f4f15635 45#include "fs-util.h"
d247f232 46#include "hexdecoct.h"
3dfc9763
LP
47#include "hostname-setup.h"
48#include "ima-setup.h"
49#include "killall.h"
50#include "kmod-setup.h"
eefc66aa 51#include "limits-util.h"
d7b8eec7 52#include "load-fragment.h"
3dfc9763 53#include "log.h"
b6e66135 54#include "loopback-setup.h"
b6e66135 55#include "machine-id-setup.h"
3dfc9763 56#include "manager.h"
2a341bb9 57#include "manager-dump.h"
a01ba4b2 58#include "manager-serialize.h"
32429805 59#include "mkdir.h"
3dfc9763 60#include "mount-setup.h"
d58ad743 61#include "os-util.h"
3dfc9763 62#include "pager.h"
614b022c 63#include "parse-argument.h"
6bedfcbb 64#include "parse-util.h"
7d5ceb64 65#include "path-util.h"
294bf0c3 66#include "pretty-print.h"
4e731273 67#include "proc-cmdline.h"
3dfc9763 68#include "process-util.h"
d247f232 69#include "random-util.h"
8869a0b4 70#include "raw-clone.h"
78f22b97 71#include "rlimit-util.h"
349cc4a5 72#if HAVE_SECCOMP
83f12b27
FS
73#include "seccomp-util.h"
74#endif
b6e66135 75#include "selinux-setup.h"
3dfc9763
LP
76#include "selinux-util.h"
77#include "signal-util.h"
ffbd2c4d 78#include "smack-setup.h"
3dfc9763 79#include "special.h"
8fcde012 80#include "stat-util.h"
15a5e950 81#include "stdio-util.h"
3dfc9763
LP
82#include "strv.h"
83#include "switch-root.h"
a8b627aa 84#include "sysctl-util.h"
3dfc9763 85#include "terminal-util.h"
8612da97 86#include "umask-util.h"
b1d4f8e1 87#include "user-util.h"
9ce17593 88#include "util.h"
3dfc9763
LP
89#include "virt.h"
90#include "watchdog.h"
b6e66135 91
7e11a95e
EV
92#if HAS_FEATURE_ADDRESS_SANITIZER
93#include <sanitizer/lsan_interface.h>
94#endif
95
3a0f06c4
ZJS
96#define DEFAULT_TASKS_MAX ((TasksMax) { 15U, 100U }) /* 15% */
97
f170852a
LP
98static enum {
99 ACTION_RUN,
e965d56d 100 ACTION_HELP,
9ba0bc4e 101 ACTION_VERSION,
e537352b 102 ACTION_TEST,
bbc1acab
YW
103 ACTION_DUMP_CONFIGURATION_ITEMS,
104 ACTION_DUMP_BUS_PROPERTIES,
5c08257b 105 ACTION_BUS_INTROSPECT,
fa0f4d8a 106} arg_action = ACTION_RUN;
fb39af4c 107
5c08257b
ZJS
108static const char *arg_bus_introspect = NULL;
109
45250e66
LP
110/* Those variables are initialized to 0 automatically, so we avoid uninitialized memory access. Real
111 * defaults are assigned in reset_arguments() below. */
fb39af4c
ZJS
112static char *arg_default_unit;
113static bool arg_system;
114static bool arg_dump_core;
115static int arg_crash_chvt;
116static bool arg_crash_shell;
117static bool arg_crash_reboot;
118static char *arg_confirm_spawn;
119static ShowStatus arg_show_status;
36cf4507 120static StatusUnitFormat arg_status_unit_format;
fb39af4c
ZJS
121static bool arg_switched_root;
122static PagerFlags arg_pager_flags;
123static bool arg_service_watchdogs;
124static ExecOutput arg_default_std_output;
125static ExecOutput arg_default_std_error;
126static usec_t arg_default_restart_usec;
127static usec_t arg_default_timeout_start_usec;
128static usec_t arg_default_timeout_stop_usec;
129static usec_t arg_default_timeout_abort_usec;
130static bool arg_default_timeout_abort_set;
131static usec_t arg_default_start_limit_interval;
132static unsigned arg_default_start_limit_burst;
133static usec_t arg_runtime_watchdog;
65224c1d 134static usec_t arg_reboot_watchdog;
acafd7d8 135static usec_t arg_kexec_watchdog;
fb39af4c
ZJS
136static char *arg_early_core_pattern;
137static char *arg_watchdog_device;
138static char **arg_default_environment;
d55ed7de 139static char **arg_manager_environment;
fb39af4c
ZJS
140static struct rlimit *arg_default_rlimit[_RLIMIT_MAX];
141static uint64_t arg_capability_bounding_set;
142static bool arg_no_new_privs;
143static nsec_t arg_timer_slack_nsec;
144static usec_t arg_default_timer_accuracy_usec;
145static Set* arg_syscall_archs;
146static FILE* arg_serialization;
147static int arg_default_cpu_accounting;
148static bool arg_default_io_accounting;
149static bool arg_default_ip_accounting;
150static bool arg_default_blockio_accounting;
151static bool arg_default_memory_accounting;
152static bool arg_default_tasks_accounting;
3a0f06c4 153static TasksMax arg_default_tasks_max;
fb39af4c
ZJS
154static sd_id128_t arg_machine_id;
155static EmergencyAction arg_cad_burst_action;
156static OOMPolicy arg_default_oom_policy;
157static CPUSet arg_cpu_affinity;
b070c7c0 158static NUMAPolicy arg_numa_policy;
3753325b 159static usec_t arg_clock_usec;
d247f232
LP
160static void *arg_random_seed;
161static size_t arg_random_seed_size;
61fbbac1 162
0e06a031
LP
163/* A copy of the original environment block */
164static char **saved_env = NULL;
165
a9fd4cd1
FB
166static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
167 const struct rlimit *saved_rlimit_memlock);
4fc935ca 168
f70e6fb4
ZJS
169static int manager_find_user_config_paths(char ***ret_files, char ***ret_dirs) {
170 _cleanup_free_ char *base = NULL;
171 _cleanup_strv_free_ char **files = NULL, **dirs = NULL;
172 int r;
173
174 r = xdg_user_config_dir(&base, "/systemd");
175 if (r < 0)
176 return r;
177
178 r = strv_extendf(&files, "%s/user.conf", base);
179 if (r < 0)
180 return r;
181
182 r = strv_extend(&files, PKGSYSCONFDIR "/user.conf");
183 if (r < 0)
184 return r;
185
186 r = strv_consume(&dirs, TAKE_PTR(base));
187 if (r < 0)
188 return r;
189
190 r = strv_extend_strv(&dirs, CONF_PATHS_STRV("systemd"), false);
191 if (r < 0)
192 return r;
193
194 *ret_files = TAKE_PTR(files);
195 *ret_dirs = TAKE_PTR(dirs);
196 return 0;
197}
198
bb259772
LP
199_noreturn_ static void freeze_or_exit_or_reboot(void) {
200
c3b6a348
LP
201 /* If we are running in a container, let's prefer exiting, after all we can propagate an exit code to
202 * the container manager, and thus inform it that something went wrong. */
bb259772
LP
203 if (detect_container() > 0) {
204 log_emergency("Exiting PID 1...");
c3b6a348 205 _exit(EXIT_EXCEPTION);
bb259772 206 }
b9e74c39
LP
207
208 if (arg_crash_reboot) {
209 log_notice("Rebooting in 10s...");
210 (void) sleep(10);
211
212 log_notice("Rebooting now...");
213 (void) reboot(RB_AUTOBOOT);
214 log_emergency_errno(errno, "Failed to reboot: %m");
215 }
216
217 log_emergency("Freezing execution.");
218 freeze();
219}
220
848e863a 221_noreturn_ static void crash(int sig) {
7d06dad9
MS
222 struct sigaction sa;
223 pid_t pid;
97c4f35c 224
df0ff127 225 if (getpid_cached() != 1)
abb26902 226 /* Pass this on immediately, if this is not PID 1 */
92ca4cac 227 (void) raise(sig);
abb26902 228 else if (!arg_dump_core)
4104970e 229 log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig));
97c4f35c 230 else {
7d06dad9 231 sa = (struct sigaction) {
189d5bac 232 .sa_handler = nop_signal_handler,
b92bea5d
ZJS
233 .sa_flags = SA_NOCLDSTOP|SA_RESTART,
234 };
97c4f35c 235
6f5e3f35 236 /* We want to wait for the core process, hence let's enable SIGCHLD */
92ca4cac 237 (void) sigaction(SIGCHLD, &sa, NULL);
6f5e3f35 238
8869a0b4 239 pid = raw_clone(SIGCHLD);
e62d8c39 240 if (pid < 0)
56f64d95 241 log_emergency_errno(errno, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
97c4f35c 242 else if (pid == 0) {
97c4f35c 243 /* Enable default signal handler for core dump */
15a90032 244
92ca4cac
LP
245 sa = (struct sigaction) {
246 .sa_handler = SIG_DFL,
247 };
248 (void) sigaction(sig, &sa, NULL);
97c4f35c 249
15a90032
LP
250 /* Don't limit the coredump size */
251 (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY));
97c4f35c
LP
252
253 /* Just to be sure... */
e62d9b81 254 (void) chdir("/");
97c4f35c
LP
255
256 /* Raise the signal again */
ee05e779 257 pid = raw_getpid();
92ca4cac 258 (void) kill(pid, sig); /* raise() would kill the parent */
97c4f35c
LP
259
260 assert_not_reached("We shouldn't be here...");
bb85a582 261 _exit(EXIT_EXCEPTION);
4fc935ca 262 } else {
8e12a6ae
LP
263 siginfo_t status;
264 int r;
4fc935ca
LP
265
266 /* Order things nicely. */
e62d8c39
ZJS
267 r = wait_for_terminate(pid, &status);
268 if (r < 0)
da927ba9 269 log_emergency_errno(r, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig));
e1714f02
ZJS
270 else if (status.si_code != CLD_DUMPED) {
271 const char *s = status.si_code == CLD_EXITED
e04ed6db 272 ? exit_status_to_string(status.si_status, EXIT_STATUS_LIBC)
e1714f02
ZJS
273 : signal_to_string(status.si_status);
274
ee05e779
ZJS
275 log_emergency("Caught <%s>, core dump failed (child "PID_FMT", code=%s, status=%i/%s).",
276 signal_to_string(sig),
e1714f02
ZJS
277 pid,
278 sigchld_code_to_string(status.si_code),
279 status.si_status, strna(s));
280 } else
281 log_emergency("Caught <%s>, dumped core as pid "PID_FMT".",
282 signal_to_string(sig), pid);
97c4f35c
LP
283 }
284 }
285
b9e74c39 286 if (arg_crash_chvt >= 0)
92ca4cac 287 (void) chvt(arg_crash_chvt);
601f6a1e 288
7d06dad9
MS
289 sa = (struct sigaction) {
290 .sa_handler = SIG_IGN,
291 .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
292 };
293
294 /* Let the kernel reap children for us */
295 (void) sigaction(SIGCHLD, &sa, NULL);
8c43883a 296
7d06dad9 297 if (arg_crash_shell) {
b9e74c39 298 log_notice("Executing crash shell in 10s...");
92ca4cac 299 (void) sleep(10);
4fc935ca 300
8869a0b4 301 pid = raw_clone(SIGCHLD);
cd3bd60a 302 if (pid < 0)
56f64d95 303 log_emergency_errno(errno, "Failed to fork off crash shell: %m");
6f5e3f35 304 else if (pid == 0) {
b9e74c39 305 (void) setsid();
92ca4cac 306 (void) make_console_stdio();
595225af 307 (void) rlimit_nofile_safe();
92ca4cac 308 (void) execle("/bin/sh", "/bin/sh", NULL, environ);
6f5e3f35 309
ee05e779 310 log_emergency_errno(errno, "execle() failed: %m");
bb85a582 311 _exit(EXIT_EXCEPTION);
b9e74c39
LP
312 } else {
313 log_info("Spawned crash shell as PID "PID_FMT".", pid);
4cf0b03b 314 (void) wait_for_terminate(pid, NULL);
b9e74c39 315 }
4fc935ca
LP
316 }
317
bb259772 318 freeze_or_exit_or_reboot();
97c4f35c
LP
319}
320
321static void install_crash_handler(void) {
297d563d 322 static const struct sigaction sa = {
b92bea5d 323 .sa_handler = crash,
297d563d 324 .sa_flags = SA_NODEFER, /* So that we can raise the signal again from the signal handler */
b92bea5d 325 };
297d563d 326 int r;
97c4f35c 327
9c274488
LP
328 /* We ignore the return value here, since, we don't mind if we cannot set up a crash handler */
329 r = sigaction_many(&sa, SIGNALS_CRASH_HANDLER);
297d563d
LP
330 if (r < 0)
331 log_debug_errno(r, "I had trouble setting up the crash handler, ignoring: %m");
97c4f35c 332}
f170852a 333
56d96fc0
LP
334static int console_setup(void) {
335 _cleanup_close_ int tty_fd = -1;
336 int r;
80876c20 337
512947d4 338 tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
23bbb0de
MS
339 if (tty_fd < 0)
340 return log_error_errno(tty_fd, "Failed to open /dev/console: %m");
80876c20 341
56d96fc0
LP
342 /* We don't want to force text mode. plymouth may be showing
343 * pictures already from initrd. */
512947d4 344 r = reset_terminal_fd(tty_fd, false);
23bbb0de
MS
345 if (r < 0)
346 return log_error_errno(r, "Failed to reset /dev/console: %m");
843d2643 347
56d96fc0 348 return 0;
80876c20
LP
349}
350
ee48dbd5 351static int set_machine_id(const char *m) {
e042eab7 352 sd_id128_t t;
8b26cdbd 353 assert(m);
ee48dbd5 354
e042eab7 355 if (sd_id128_from_string(m, &t) < 0)
ee48dbd5
NC
356 return -EINVAL;
357
e042eab7 358 if (sd_id128_is_null(t))
ee48dbd5
NC
359 return -EINVAL;
360
e042eab7 361 arg_machine_id = t;
ee48dbd5
NC
362 return 0;
363}
364
96287a49 365static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
059cb385 366 int r;
f170852a 367
059cb385 368 assert(key);
5192bd19 369
1d84ad94 370 if (STR_IN_SET(key, "systemd.unit", "rd.systemd.unit")) {
bf4df7c3 371
1d84ad94
LP
372 if (proc_cmdline_value_missing(key, value))
373 return 0;
bf4df7c3 374
1d84ad94
LP
375 if (!unit_name_is_valid(value, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
376 log_warning("Unit name specified on %s= is not valid, ignoring: %s", key, value);
cd57038a
ZJS
377 else if (in_initrd() == !!startswith(key, "rd."))
378 return free_and_strdup_warn(&arg_default_unit, value);
f170852a 379
1d84ad94 380 } else if (proc_cmdline_key_streq(key, "systemd.dump_core")) {
4fc935ca 381
1d84ad94 382 r = value ? parse_boolean(value) : true;
fb472900 383 if (r < 0)
5e1ee764 384 log_warning_errno(r, "Failed to parse dump core switch %s, ignoring: %m", value);
4fc935ca 385 else
fa0f4d8a 386 arg_dump_core = r;
4fc935ca 387
c6885f5f
FB
388 } else if (proc_cmdline_key_streq(key, "systemd.early_core_pattern")) {
389
390 if (proc_cmdline_value_missing(key, value))
391 return 0;
392
393 if (path_is_absolute(value))
614b022c 394 (void) parse_path_argument(value, false, &arg_early_core_pattern);
c6885f5f
FB
395 else
396 log_warning("Specified core pattern '%s' is not an absolute path, ignoring.", value);
397
1d84ad94 398 } else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) {
b9e74c39 399
1d84ad94
LP
400 if (!value)
401 arg_crash_chvt = 0; /* turn on */
5e1ee764 402 else {
a07a7324 403 r = parse_crash_chvt(value, &arg_crash_chvt);
5e1ee764
YW
404 if (r < 0)
405 log_warning_errno(r, "Failed to parse crash chvt switch %s, ignoring: %m", value);
406 }
b9e74c39 407
1d84ad94 408 } else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) {
4fc935ca 409
1d84ad94 410 r = value ? parse_boolean(value) : true;
fb472900 411 if (r < 0)
5e1ee764 412 log_warning_errno(r, "Failed to parse crash shell switch %s, ignoring: %m", value);
4fc935ca 413 else
fa0f4d8a 414 arg_crash_shell = r;
5e7ee61c 415
1d84ad94 416 } else if (proc_cmdline_key_streq(key, "systemd.crash_reboot")) {
5e7ee61c 417
1d84ad94 418 r = value ? parse_boolean(value) : true;
b9e74c39 419 if (r < 0)
5e1ee764 420 log_warning_errno(r, "Failed to parse crash reboot switch %s, ignoring: %m", value);
5e7ee61c 421 else
b9e74c39 422 arg_crash_reboot = r;
5e7ee61c 423
1d84ad94
LP
424 } else if (proc_cmdline_key_streq(key, "systemd.confirm_spawn")) {
425 char *s;
7d5ceb64 426
1d84ad94 427 r = parse_confirm_spawn(value, &s);
059cb385 428 if (r < 0)
5e1ee764
YW
429 log_warning_errno(r, "Failed to parse confirm_spawn switch %s, ignoring: %m", value);
430 else
431 free_and_replace(arg_confirm_spawn, s);
601f6a1e 432
2a12e32e
JK
433 } else if (proc_cmdline_key_streq(key, "systemd.service_watchdogs")) {
434
435 r = value ? parse_boolean(value) : true;
436 if (r < 0)
5e1ee764 437 log_warning_errno(r, "Failed to parse service watchdog switch %s, ignoring: %m", value);
2a12e32e
JK
438 else
439 arg_service_watchdogs = r;
440
1d84ad94 441 } else if (proc_cmdline_key_streq(key, "systemd.show_status")) {
9e58ff9c 442
1d84ad94
LP
443 if (value) {
444 r = parse_show_status(value, &arg_show_status);
445 if (r < 0)
5e1ee764 446 log_warning_errno(r, "Failed to parse show status switch %s, ignoring: %m", value);
1d84ad94
LP
447 } else
448 arg_show_status = SHOW_STATUS_YES;
059cb385 449
36cf4507
ZJS
450 } else if (proc_cmdline_key_streq(key, "systemd.status_unit_format")) {
451
452 if (proc_cmdline_value_missing(key, value))
453 return 0;
454
455 r = status_unit_format_from_string(value);
456 if (r < 0)
457 log_warning_errno(r, "Failed to parse %s=%s, ignoring: %m", key, value);
458 else
459 arg_status_unit_format = r;
460
1d84ad94
LP
461 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_output")) {
462
463 if (proc_cmdline_value_missing(key, value))
464 return 0;
0a494f1f 465
059cb385 466 r = exec_output_from_string(value);
fb472900 467 if (r < 0)
5e1ee764 468 log_warning_errno(r, "Failed to parse default standard output switch %s, ignoring: %m", value);
0a494f1f
LP
469 else
470 arg_default_std_output = r;
0a494f1f 471
1d84ad94
LP
472 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_error")) {
473
474 if (proc_cmdline_value_missing(key, value))
475 return 0;
059cb385
LP
476
477 r = exec_output_from_string(value);
fb472900 478 if (r < 0)
5e1ee764 479 log_warning_errno(r, "Failed to parse default standard error switch %s, ignoring: %m", value);
0a494f1f
LP
480 else
481 arg_default_std_error = r;
9e7c5357 482
1d84ad94
LP
483 } else if (streq(key, "systemd.setenv")) {
484
485 if (proc_cmdline_value_missing(key, value))
486 return 0;
059cb385 487
b70935ac
ZJS
488 if (!env_assignment_is_valid(value))
489 log_warning("Environment variable assignment '%s' is not valid. Ignoring.", value);
490 else {
491 r = strv_env_replace_strdup(&arg_default_environment, value);
492 if (r < 0)
1d84ad94 493 return log_oom();
b70935ac 494 }
9e58ff9c 495
1d84ad94
LP
496 } else if (proc_cmdline_key_streq(key, "systemd.machine_id")) {
497
498 if (proc_cmdline_value_missing(key, value))
499 return 0;
500
501 r = set_machine_id(value);
502 if (r < 0)
5e1ee764 503 log_warning_errno(r, "MachineID '%s' is not valid, ignoring: %m", value);
ee48dbd5 504
1d84ad94
LP
505 } else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) {
506
507 if (proc_cmdline_value_missing(key, value))
508 return 0;
509
510 r = parse_sec(value, &arg_default_timeout_start_usec);
511 if (r < 0)
5e1ee764 512 log_warning_errno(r, "Failed to parse default start timeout '%s', ignoring: %m", value);
1d84ad94
LP
513
514 if (arg_default_timeout_start_usec <= 0)
515 arg_default_timeout_start_usec = USEC_INFINITY;
ee48dbd5 516
68d58f38
LP
517 } else if (proc_cmdline_key_streq(key, "systemd.cpu_affinity")) {
518
519 if (proc_cmdline_value_missing(key, value))
520 return 0;
521
522 r = parse_cpu_set(value, &arg_cpu_affinity);
523 if (r < 0)
162392b7 524 log_warning_errno(r, "Failed to parse CPU affinity mask '%s', ignoring: %m", value);
68d58f38 525
8a2c1fbf
EJ
526 } else if (proc_cmdline_key_streq(key, "systemd.watchdog_device")) {
527
528 if (proc_cmdline_value_missing(key, value))
529 return 0;
530
614b022c 531 (void) parse_path_argument(value, false, &arg_watchdog_device);
8a2c1fbf 532
3753325b
LP
533 } else if (proc_cmdline_key_streq(key, "systemd.clock_usec")) {
534
535 if (proc_cmdline_value_missing(key, value))
536 return 0;
537
538 r = safe_atou64(value, &arg_clock_usec);
539 if (r < 0)
540 log_warning_errno(r, "Failed to parse systemd.clock_usec= argument, ignoring: %s", value);
541
d247f232
LP
542 } else if (proc_cmdline_key_streq(key, "systemd.random_seed")) {
543 void *p;
544 size_t sz;
545
546 if (proc_cmdline_value_missing(key, value))
547 return 0;
548
f5fbe71d 549 r = unbase64mem(value, SIZE_MAX, &p, &sz);
d247f232
LP
550 if (r < 0)
551 log_warning_errno(r, "Failed to parse systemd.random_seed= argument, ignoring: %s", value);
552
553 free(arg_random_seed);
554 arg_random_seed = sz > 0 ? p : mfree(p);
555 arg_random_seed_size = sz;
556
059cb385 557 } else if (streq(key, "quiet") && !value) {
d7b15e0a 558
7a293242 559 if (arg_show_status == _SHOW_STATUS_INVALID)
0d066dd1 560 arg_show_status = SHOW_STATUS_ERROR;
059cb385
LP
561
562 } else if (streq(key, "debug") && !value) {
d7b15e0a 563
1de1c9c3
LP
564 /* Note that log_parse_environment() handles 'debug'
565 * too, and sets the log level to LOG_DEBUG. */
d7b15e0a 566
75f86906 567 if (detect_container() > 0)
b2103dcc 568 log_set_target(LOG_TARGET_CONSOLE);
059cb385 569
dcd61450 570 } else if (!value) {
e2c9a131 571 const char *target;
f170852a 572
ceae6295 573 /* Compatible with SysV, but supported independently even if SysV compatibility is disabled. */
e2c9a131
EV
574 target = runlevel_to_target(key);
575 if (target)
cd57038a 576 return free_and_strdup_warn(&arg_default_unit, target);
f170852a
LP
577 }
578
579 return 0;
580}
581
e8e581bf
ZJS
582#define DEFINE_SETTER(name, func, descr) \
583 static int name(const char *unit, \
584 const char *filename, \
585 unsigned line, \
586 const char *section, \
71a61510 587 unsigned section_line, \
e8e581bf
ZJS
588 const char *lvalue, \
589 int ltype, \
590 const char *rvalue, \
591 void *data, \
592 void *userdata) { \
593 \
594 int r; \
595 \
596 assert(filename); \
597 assert(lvalue); \
598 assert(rvalue); \
599 \
600 r = func(rvalue); \
601 if (r < 0) \
d1cefe0a
LP
602 log_syntax(unit, LOG_ERR, filename, line, r, \
603 "Invalid " descr "'%s': %m", \
604 rvalue); \
e8e581bf
ZJS
605 \
606 return 0; \
607 }
487393e9 608
a6ecbf83
FB
609DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level");
610DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target");
c5673ed0 611DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color");
a6ecbf83 612DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location");
c5673ed0 613DEFINE_SETTER(config_parse_time, log_show_time_from_string, "time");
487393e9 614
a61d6874
ZJS
615static int config_parse_default_timeout_abort(
616 const char *unit,
617 const char *filename,
618 unsigned line,
619 const char *section,
620 unsigned section_line,
621 const char *lvalue,
622 int ltype,
623 const char *rvalue,
624 void *data,
625 void *userdata) {
626 int r;
627
628 r = config_parse_timeout_abort(unit, filename, line, section, section_line, lvalue, ltype, rvalue,
629 &arg_default_timeout_abort_usec, userdata);
630 if (r >= 0)
631 arg_default_timeout_abort_set = r;
632 return 0;
633}
487393e9 634
a61d6874 635static int parse_config_file(void) {
f975e971 636 const ConfigTableItem items[] = {
a61d6874
ZJS
637 { "Manager", "LogLevel", config_parse_level2, 0, NULL },
638 { "Manager", "LogTarget", config_parse_target, 0, NULL },
639 { "Manager", "LogColor", config_parse_color, 0, NULL },
640 { "Manager", "LogLocation", config_parse_location, 0, NULL },
c5673ed0 641 { "Manager", "LogTime", config_parse_time, 0, NULL },
a61d6874
ZJS
642 { "Manager", "DumpCore", config_parse_bool, 0, &arg_dump_core },
643 { "Manager", "CrashChVT", /* legacy */ config_parse_crash_chvt, 0, &arg_crash_chvt },
644 { "Manager", "CrashChangeVT", config_parse_crash_chvt, 0, &arg_crash_chvt },
645 { "Manager", "CrashShell", config_parse_bool, 0, &arg_crash_shell },
646 { "Manager", "CrashReboot", config_parse_bool, 0, &arg_crash_reboot },
647 { "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status },
648 { "Manager", "StatusUnitFormat", config_parse_status_unit_format, 0, &arg_status_unit_format },
649 { "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, &arg_cpu_affinity },
650 { "Manager", "NUMAPolicy", config_parse_numa_policy, 0, &arg_numa_policy.type },
651 { "Manager", "NUMAMask", config_parse_numa_mask, 0, &arg_numa_policy },
652 { "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL },
653 { "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog },
654 { "Manager", "RebootWatchdogSec", config_parse_sec, 0, &arg_reboot_watchdog },
655 { "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_reboot_watchdog }, /* obsolete alias */
656 { "Manager", "KExecWatchdogSec", config_parse_sec, 0, &arg_kexec_watchdog },
657 { "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
658 { "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
659 { "Manager", "NoNewPrivileges", config_parse_bool, 0, &arg_no_new_privs },
349cc4a5 660#if HAVE_SECCOMP
a61d6874 661 { "Manager", "SystemCallArchitectures", config_parse_syscall_archs, 0, &arg_syscall_archs },
89fffa27 662#endif
a61d6874
ZJS
663 { "Manager", "TimerSlackNSec", config_parse_nsec, 0, &arg_timer_slack_nsec },
664 { "Manager", "DefaultTimerAccuracySec", config_parse_sec, 0, &arg_default_timer_accuracy_usec },
665 { "Manager", "DefaultStandardOutput", config_parse_output_restricted, 0, &arg_default_std_output },
666 { "Manager", "DefaultStandardError", config_parse_output_restricted, 0, &arg_default_std_error },
667 { "Manager", "DefaultTimeoutStartSec", config_parse_sec, 0, &arg_default_timeout_start_usec },
668 { "Manager", "DefaultTimeoutStopSec", config_parse_sec, 0, &arg_default_timeout_stop_usec },
669 { "Manager", "DefaultTimeoutAbortSec", config_parse_default_timeout_abort, 0, NULL },
670 { "Manager", "DefaultRestartSec", config_parse_sec, 0, &arg_default_restart_usec },
671 { "Manager", "DefaultStartLimitInterval", config_parse_sec, 0, &arg_default_start_limit_interval }, /* obsolete alias */
672 { "Manager", "DefaultStartLimitIntervalSec", config_parse_sec, 0, &arg_default_start_limit_interval },
673 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned, 0, &arg_default_start_limit_burst },
674 { "Manager", "DefaultEnvironment", config_parse_environ, 0, &arg_default_environment },
d55ed7de 675 { "Manager", "ManagerEnvironment", config_parse_environ, 0, &arg_manager_environment },
a61d6874
ZJS
676 { "Manager", "DefaultLimitCPU", config_parse_rlimit, RLIMIT_CPU, arg_default_rlimit },
677 { "Manager", "DefaultLimitFSIZE", config_parse_rlimit, RLIMIT_FSIZE, arg_default_rlimit },
678 { "Manager", "DefaultLimitDATA", config_parse_rlimit, RLIMIT_DATA, arg_default_rlimit },
679 { "Manager", "DefaultLimitSTACK", config_parse_rlimit, RLIMIT_STACK, arg_default_rlimit },
680 { "Manager", "DefaultLimitCORE", config_parse_rlimit, RLIMIT_CORE, arg_default_rlimit },
681 { "Manager", "DefaultLimitRSS", config_parse_rlimit, RLIMIT_RSS, arg_default_rlimit },
682 { "Manager", "DefaultLimitNOFILE", config_parse_rlimit, RLIMIT_NOFILE, arg_default_rlimit },
683 { "Manager", "DefaultLimitAS", config_parse_rlimit, RLIMIT_AS, arg_default_rlimit },
684 { "Manager", "DefaultLimitNPROC", config_parse_rlimit, RLIMIT_NPROC, arg_default_rlimit },
685 { "Manager", "DefaultLimitMEMLOCK", config_parse_rlimit, RLIMIT_MEMLOCK, arg_default_rlimit },
686 { "Manager", "DefaultLimitLOCKS", config_parse_rlimit, RLIMIT_LOCKS, arg_default_rlimit },
687 { "Manager", "DefaultLimitSIGPENDING", config_parse_rlimit, RLIMIT_SIGPENDING, arg_default_rlimit },
688 { "Manager", "DefaultLimitMSGQUEUE", config_parse_rlimit, RLIMIT_MSGQUEUE, arg_default_rlimit },
689 { "Manager", "DefaultLimitNICE", config_parse_rlimit, RLIMIT_NICE, arg_default_rlimit },
690 { "Manager", "DefaultLimitRTPRIO", config_parse_rlimit, RLIMIT_RTPRIO, arg_default_rlimit },
691 { "Manager", "DefaultLimitRTTIME", config_parse_rlimit, RLIMIT_RTTIME, arg_default_rlimit },
692 { "Manager", "DefaultCPUAccounting", config_parse_tristate, 0, &arg_default_cpu_accounting },
693 { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
694 { "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting },
695 { "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
696 { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
697 { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
698 { "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max },
699 { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action },
700 { "Manager", "DefaultOOMPolicy", config_parse_oom_policy, 0, &arg_default_oom_policy },
d3b1c508 701 {}
487393e9
LP
702 };
703
e94a009c
YW
704 _cleanup_strv_free_ char **files = NULL, **dirs = NULL;
705 const char *suffix;
f70e6fb4 706 int r;
75eb6154 707
e94a009c 708 if (arg_system)
f70e6fb4 709 suffix = "system.conf.d";
e94a009c
YW
710 else {
711 r = manager_find_user_config_paths(&files, &dirs);
f70e6fb4
ZJS
712 if (r < 0)
713 return log_error_errno(r, "Failed to determine config file paths: %m");
e94a009c 714
f70e6fb4
ZJS
715 suffix = "user.conf.d";
716 }
75eb6154 717
f70e6fb4 718 (void) config_parse_many(
e94a009c
YW
719 (const char* const*) (files ?: STRV_MAKE(PKGSYSCONFDIR "/system.conf")),
720 (const char* const*) (dirs ?: CONF_PATHS_STRV("systemd")),
721 suffix,
4f9ff96a
LP
722 "Manager\0",
723 config_item_table_lookup, items,
724 CONFIG_PARSE_WARN,
725 NULL,
726 NULL);
36c16a7c 727
f70e6fb4
ZJS
728 /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we use
729 * USEC_INFINITY like everywhere else. */
36c16a7c
LP
730 if (arg_default_timeout_start_usec <= 0)
731 arg_default_timeout_start_usec = USEC_INFINITY;
732 if (arg_default_timeout_stop_usec <= 0)
733 arg_default_timeout_stop_usec = USEC_INFINITY;
487393e9 734
487393e9
LP
735 return 0;
736}
737
85cb4151 738static void set_manager_defaults(Manager *m) {
06af2a04
TB
739
740 assert(m);
741
5b65ae15
LP
742 /* Propagates the various default unit property settings into the manager object, i.e. properties that do not
743 * affect the manager itself, but are just what newly allocated units will have set if they haven't set
744 * anything else. (Also see set_manager_settings() for the settings that affect the manager's own behaviour) */
745
06af2a04
TB
746 m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec;
747 m->default_std_output = arg_default_std_output;
748 m->default_std_error = arg_default_std_error;
749 m->default_timeout_start_usec = arg_default_timeout_start_usec;
750 m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
dc653bf4
JK
751 m->default_timeout_abort_usec = arg_default_timeout_abort_usec;
752 m->default_timeout_abort_set = arg_default_timeout_abort_set;
06af2a04
TB
753 m->default_restart_usec = arg_default_restart_usec;
754 m->default_start_limit_interval = arg_default_start_limit_interval;
755 m->default_start_limit_burst = arg_default_start_limit_burst;
a88c5b8a
CD
756
757 /* On 4.15+ with unified hierarchy, CPU accounting is essentially free as it doesn't require the CPU
758 * controller to be enabled, so the default is to enable it unless we got told otherwise. */
759 if (arg_default_cpu_accounting >= 0)
760 m->default_cpu_accounting = arg_default_cpu_accounting;
761 else
762 m->default_cpu_accounting = cpu_accounting_is_cheap();
763
13c31542 764 m->default_io_accounting = arg_default_io_accounting;
377bfd2d 765 m->default_ip_accounting = arg_default_ip_accounting;
06af2a04
TB
766 m->default_blockio_accounting = arg_default_blockio_accounting;
767 m->default_memory_accounting = arg_default_memory_accounting;
03a7b521 768 m->default_tasks_accounting = arg_default_tasks_accounting;
0af20ea2 769 m->default_tasks_max = arg_default_tasks_max;
afcfaa69 770 m->default_oom_policy = arg_default_oom_policy;
06af2a04 771
79a224c4
LP
772 (void) manager_set_default_rlimits(m, arg_default_rlimit);
773
774 (void) manager_default_environment(m);
775 (void) manager_transient_environment_add(m, arg_default_environment);
06af2a04
TB
776}
777
7b46fc6a
LP
778static void set_manager_settings(Manager *m) {
779
780 assert(m);
781
986935cf
FB
782 /* Propagates the various manager settings into the manager object, i.e. properties that
783 * effect the manager itself (as opposed to just being inherited into newly allocated
784 * units, see set_manager_defaults() above). */
5b65ae15 785
7b46fc6a 786 m->confirm_spawn = arg_confirm_spawn;
2a12e32e 787 m->service_watchdogs = arg_service_watchdogs;
7b46fc6a
LP
788 m->cad_burst_action = arg_cad_burst_action;
789
986935cf
FB
790 manager_set_watchdog(m, WATCHDOG_RUNTIME, arg_runtime_watchdog);
791 manager_set_watchdog(m, WATCHDOG_REBOOT, arg_reboot_watchdog);
792 manager_set_watchdog(m, WATCHDOG_KEXEC, arg_kexec_watchdog);
793
7365a296 794 manager_set_show_status(m, arg_show_status, "commandline");
36cf4507 795 m->status_unit_format = arg_status_unit_format;
7b46fc6a
LP
796}
797
f170852a 798static int parse_argv(int argc, char *argv[]) {
f170852a
LP
799 enum {
800 ARG_LOG_LEVEL = 0x100,
801 ARG_LOG_TARGET,
bbe63281
LP
802 ARG_LOG_COLOR,
803 ARG_LOG_LOCATION,
c5673ed0 804 ARG_LOG_TIME,
2f198e2f 805 ARG_UNIT,
edb9aaa8 806 ARG_SYSTEM,
af2d49f7 807 ARG_USER,
e537352b 808 ARG_TEST,
b87c2aa6 809 ARG_NO_PAGER,
9ba0bc4e 810 ARG_VERSION,
80876c20 811 ARG_DUMP_CONFIGURATION_ITEMS,
bbc1acab 812 ARG_DUMP_BUS_PROPERTIES,
5c08257b 813 ARG_BUS_INTROSPECT,
9e58ff9c 814 ARG_DUMP_CORE,
b9e74c39 815 ARG_CRASH_CHVT,
9e58ff9c 816 ARG_CRASH_SHELL,
b9e74c39 817 ARG_CRASH_REBOOT,
a16e1123 818 ARG_CONFIRM_SPAWN,
9e58ff9c 819 ARG_SHOW_STATUS,
4288f619 820 ARG_DESERIALIZE,
2660882b 821 ARG_SWITCHED_ROOT,
0a494f1f 822 ARG_DEFAULT_STD_OUTPUT,
ee48dbd5 823 ARG_DEFAULT_STD_ERROR,
2a12e32e
JK
824 ARG_MACHINE_ID,
825 ARG_SERVICE_WATCHDOGS,
f170852a
LP
826 };
827
828 static const struct option options[] = {
a16e1123
LP
829 { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
830 { "log-target", required_argument, NULL, ARG_LOG_TARGET },
bbe63281
LP
831 { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
832 { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
c5673ed0 833 { "log-time", optional_argument, NULL, ARG_LOG_TIME },
2f198e2f 834 { "unit", required_argument, NULL, ARG_UNIT },
edb9aaa8 835 { "system", no_argument, NULL, ARG_SYSTEM },
af2d49f7 836 { "user", no_argument, NULL, ARG_USER },
a16e1123 837 { "test", no_argument, NULL, ARG_TEST },
b87c2aa6 838 { "no-pager", no_argument, NULL, ARG_NO_PAGER },
a16e1123 839 { "help", no_argument, NULL, 'h' },
9ba0bc4e 840 { "version", no_argument, NULL, ARG_VERSION },
a16e1123 841 { "dump-configuration-items", no_argument, NULL, ARG_DUMP_CONFIGURATION_ITEMS },
bbc1acab 842 { "dump-bus-properties", no_argument, NULL, ARG_DUMP_BUS_PROPERTIES },
5c08257b 843 { "bus-introspect", required_argument, NULL, ARG_BUS_INTROSPECT },
a5d87bf0 844 { "dump-core", optional_argument, NULL, ARG_DUMP_CORE },
b9e74c39 845 { "crash-chvt", required_argument, NULL, ARG_CRASH_CHVT },
a5d87bf0 846 { "crash-shell", optional_argument, NULL, ARG_CRASH_SHELL },
b9e74c39 847 { "crash-reboot", optional_argument, NULL, ARG_CRASH_REBOOT },
a5d87bf0 848 { "confirm-spawn", optional_argument, NULL, ARG_CONFIRM_SPAWN },
6e98720f 849 { "show-status", optional_argument, NULL, ARG_SHOW_STATUS },
a16e1123 850 { "deserialize", required_argument, NULL, ARG_DESERIALIZE },
2660882b 851 { "switched-root", no_argument, NULL, ARG_SWITCHED_ROOT },
0a494f1f
LP
852 { "default-standard-output", required_argument, NULL, ARG_DEFAULT_STD_OUTPUT, },
853 { "default-standard-error", required_argument, NULL, ARG_DEFAULT_STD_ERROR, },
ee48dbd5 854 { "machine-id", required_argument, NULL, ARG_MACHINE_ID },
2a12e32e 855 { "service-watchdogs", required_argument, NULL, ARG_SERVICE_WATCHDOGS },
fb472900 856 {}
f170852a
LP
857 };
858
859 int c, r;
9a9ca408 860 bool user_arg_seen = false;
f170852a
LP
861
862 assert(argc >= 1);
863 assert(argv);
864
df0ff127 865 if (getpid_cached() == 1)
b770165a
LP
866 opterr = 0;
867
099663ff 868 while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
f170852a
LP
869
870 switch (c) {
871
872 case ARG_LOG_LEVEL:
fb472900 873 r = log_set_max_level_from_string(optarg);
2b5107e1
ZJS
874 if (r < 0)
875 return log_error_errno(r, "Failed to parse log level \"%s\": %m", optarg);
f170852a
LP
876
877 break;
878
879 case ARG_LOG_TARGET:
fb472900 880 r = log_set_target_from_string(optarg);
2b5107e1
ZJS
881 if (r < 0)
882 return log_error_errno(r, "Failed to parse log target \"%s\": %m", optarg);
f170852a
LP
883
884 break;
885
bbe63281
LP
886 case ARG_LOG_COLOR:
887
d0b170c8 888 if (optarg) {
fb472900 889 r = log_show_color_from_string(optarg);
2b5107e1
ZJS
890 if (r < 0)
891 return log_error_errno(r, "Failed to parse log color setting \"%s\": %m",
892 optarg);
d0b170c8
LP
893 } else
894 log_show_color(true);
bbe63281
LP
895
896 break;
897
898 case ARG_LOG_LOCATION:
d0b170c8 899 if (optarg) {
fb472900 900 r = log_show_location_from_string(optarg);
2b5107e1
ZJS
901 if (r < 0)
902 return log_error_errno(r, "Failed to parse log location setting \"%s\": %m",
903 optarg);
d0b170c8
LP
904 } else
905 log_show_location(true);
bbe63281
LP
906
907 break;
908
c5673ed0
DS
909 case ARG_LOG_TIME:
910
911 if (optarg) {
912 r = log_show_time_from_string(optarg);
913 if (r < 0)
914 return log_error_errno(r, "Failed to parse log time setting \"%s\": %m",
915 optarg);
916 } else
917 log_show_time(true);
918
919 break;
920
0a494f1f 921 case ARG_DEFAULT_STD_OUTPUT:
fb472900 922 r = exec_output_from_string(optarg);
2b5107e1
ZJS
923 if (r < 0)
924 return log_error_errno(r, "Failed to parse default standard output setting \"%s\": %m",
925 optarg);
926 arg_default_std_output = r;
0a494f1f
LP
927 break;
928
929 case ARG_DEFAULT_STD_ERROR:
fb472900 930 r = exec_output_from_string(optarg);
2b5107e1
ZJS
931 if (r < 0)
932 return log_error_errno(r, "Failed to parse default standard error output setting \"%s\": %m",
933 optarg);
934 arg_default_std_error = r;
0a494f1f
LP
935 break;
936
2f198e2f 937 case ARG_UNIT:
e6e242ad 938 r = free_and_strdup(&arg_default_unit, optarg);
23bbb0de 939 if (r < 0)
2b5107e1 940 return log_error_errno(r, "Failed to set default unit \"%s\": %m", optarg);
f170852a
LP
941
942 break;
943
edb9aaa8 944 case ARG_SYSTEM:
463d0d15 945 arg_system = true;
edb9aaa8 946 break;
a5dab5ce 947
af2d49f7 948 case ARG_USER:
463d0d15 949 arg_system = false;
9a9ca408 950 user_arg_seen = true;
a5dab5ce 951 break;
a5dab5ce 952
e965d56d 953 case ARG_TEST:
fa0f4d8a 954 arg_action = ACTION_TEST;
b87c2aa6
ZJS
955 break;
956
957 case ARG_NO_PAGER:
0221d68a 958 arg_pager_flags |= PAGER_DISABLE;
e965d56d
LP
959 break;
960
9ba0bc4e
ZJS
961 case ARG_VERSION:
962 arg_action = ACTION_VERSION;
963 break;
964
e537352b 965 case ARG_DUMP_CONFIGURATION_ITEMS:
fa0f4d8a 966 arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
e537352b
LP
967 break;
968
bbc1acab
YW
969 case ARG_DUMP_BUS_PROPERTIES:
970 arg_action = ACTION_DUMP_BUS_PROPERTIES;
971 break;
972
5c08257b
ZJS
973 case ARG_BUS_INTROSPECT:
974 arg_bus_introspect = optarg;
975 arg_action = ACTION_BUS_INTROSPECT;
976 break;
977
9e58ff9c 978 case ARG_DUMP_CORE:
599c7c54
ZJS
979 r = parse_boolean_argument("--dump-core", optarg, &arg_dump_core);
980 if (r < 0)
981 return r;
b9e74c39
LP
982 break;
983
984 case ARG_CRASH_CHVT:
a07a7324 985 r = parse_crash_chvt(optarg, &arg_crash_chvt);
b9e74c39 986 if (r < 0)
2b5107e1
ZJS
987 return log_error_errno(r, "Failed to parse crash virtual terminal index: \"%s\": %m",
988 optarg);
9e58ff9c
LP
989 break;
990
991 case ARG_CRASH_SHELL:
599c7c54
ZJS
992 r = parse_boolean_argument("--crash-shell", optarg, &arg_crash_shell);
993 if (r < 0)
994 return r;
b9e74c39
LP
995 break;
996
997 case ARG_CRASH_REBOOT:
599c7c54
ZJS
998 r = parse_boolean_argument("--crash-reboot", optarg, &arg_crash_reboot);
999 if (r < 0)
1000 return r;
9e58ff9c
LP
1001 break;
1002
80876c20 1003 case ARG_CONFIRM_SPAWN:
7d5ceb64
FB
1004 arg_confirm_spawn = mfree(arg_confirm_spawn);
1005
1006 r = parse_confirm_spawn(optarg, &arg_confirm_spawn);
1007 if (r < 0)
2b5107e1
ZJS
1008 return log_error_errno(r, "Failed to parse confirm spawn option: \"%s\": %m",
1009 optarg);
80876c20
LP
1010 break;
1011
2a12e32e 1012 case ARG_SERVICE_WATCHDOGS:
599c7c54 1013 r = parse_boolean_argument("--service-watchdogs=", optarg, &arg_service_watchdogs);
2a12e32e 1014 if (r < 0)
599c7c54 1015 return r;
2a12e32e
JK
1016 break;
1017
9e58ff9c 1018 case ARG_SHOW_STATUS:
d450b6f2
ZJS
1019 if (optarg) {
1020 r = parse_show_status(optarg, &arg_show_status);
ac7ec288 1021 if (r < 0)
2b5107e1
ZJS
1022 return log_error_errno(r, "Failed to parse show status boolean: \"%s\": %m",
1023 optarg);
d450b6f2
ZJS
1024 } else
1025 arg_show_status = SHOW_STATUS_YES;
6e98720f 1026 break;
a5d87bf0 1027
a16e1123
LP
1028 case ARG_DESERIALIZE: {
1029 int fd;
1030 FILE *f;
1031
01e10de3 1032 r = safe_atoi(optarg, &fd);
2b5107e1
ZJS
1033 if (r < 0)
1034 log_error_errno(r, "Failed to parse deserialize option \"%s\": %m", optarg);
baaa35ad
ZJS
1035 if (fd < 0)
1036 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1037 "Invalid deserialize fd: %d",
1038 fd);
a16e1123 1039
b9e74c39 1040 (void) fd_cloexec(fd, true);
01e10de3
LP
1041
1042 f = fdopen(fd, "r");
4a62c710 1043 if (!f)
2b5107e1 1044 return log_error_errno(errno, "Failed to open serialization fd %d: %m", fd);
a16e1123 1045
74ca738f 1046 safe_fclose(arg_serialization);
d3b1c508 1047 arg_serialization = f;
a16e1123
LP
1048
1049 break;
1050 }
1051
2660882b 1052 case ARG_SWITCHED_ROOT:
bf4df7c3 1053 arg_switched_root = true;
d03bc1b8
HH
1054 break;
1055
ee48dbd5
NC
1056 case ARG_MACHINE_ID:
1057 r = set_machine_id(optarg);
54500613 1058 if (r < 0)
2b5107e1 1059 return log_error_errno(r, "MachineID '%s' is not valid: %m", optarg);
ee48dbd5
NC
1060 break;
1061
f170852a 1062 case 'h':
fa0f4d8a 1063 arg_action = ACTION_HELP;
f170852a
LP
1064 break;
1065
1d2e23ab
LP
1066 case 'D':
1067 log_set_max_level(LOG_DEBUG);
1068 break;
1069
099663ff
LP
1070 case 'b':
1071 case 's':
1072 case 'z':
cd57038a
ZJS
1073 /* Just to eat away the sysvinit kernel cmdline args that we'll parse in
1074 * parse_proc_cmdline_item() or ignore, without any getopt() error messages.
1075 */
099663ff 1076 case '?':
df0ff127 1077 if (getpid_cached() != 1)
099663ff 1078 return -EINVAL;
601185b4
ZJS
1079 else
1080 return 0;
099663ff 1081
601185b4
ZJS
1082 default:
1083 assert_not_reached("Unhandled option code.");
f170852a
LP
1084 }
1085
d7a0f1f4 1086 if (optind < argc && getpid_cached() != 1)
9a9ca408
ZJS
1087 /* Hmm, when we aren't run as init system let's complain about excess arguments */
1088 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Excess arguments.");
1089
1090 if (arg_action == ACTION_RUN && !arg_system && !user_arg_seen)
baaa35ad 1091 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
9a9ca408 1092 "Explicit --user argument required to run as user manager.");
d821e6d6 1093
f170852a
LP
1094 return 0;
1095}
1096
1097static int help(void) {
37ec0fdd
LP
1098 _cleanup_free_ char *link = NULL;
1099 int r;
1100
1101 r = terminal_urlify_man("systemd", "1", &link);
1102 if (r < 0)
1103 return log_oom();
f170852a 1104
2e33c433 1105 printf("%s [OPTIONS...]\n\n"
7ae47326
ZJS
1106 "%sStarts and monitors system and user services.%s\n\n"
1107 "This program takes no positional arguments.\n\n"
1108 "%sOptions%s:\n"
e537352b 1109 " -h --help Show this help\n"
cb4069d9 1110 " --version Show version\n"
cd69e88b
LP
1111 " --test Determine initial transaction, dump it and exit\n"
1112 " --system In combination with --test: operate as system service manager\n"
1113 " --user In combination with --test: operate as per-user service manager\n"
b87c2aa6 1114 " --no-pager Do not pipe output into a pager\n"
80876c20 1115 " --dump-configuration-items Dump understood unit configuration items\n"
bbc1acab 1116 " --dump-bus-properties Dump exposed bus properties\n"
5c08257b 1117 " --bus-introspect=PATH Write XML introspection data\n"
9e58ff9c 1118 " --unit=UNIT Set default unit\n"
b9e74c39
LP
1119 " --dump-core[=BOOL] Dump core on crash\n"
1120 " --crash-vt=NR Change to specified VT on crash\n"
1121 " --crash-reboot[=BOOL] Reboot on crash\n"
1122 " --crash-shell[=BOOL] Run shell on crash\n"
1123 " --confirm-spawn[=BOOL] Ask for confirmation when spawning processes\n"
1124 " --show-status[=BOOL] Show status updates on the console during bootup\n"
c1dc6153 1125 " --log-target=TARGET Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
9e58ff9c 1126 " --log-level=LEVEL Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
b9e74c39
LP
1127 " --log-color[=BOOL] Highlight important log messages\n"
1128 " --log-location[=BOOL] Include code location in log messages\n"
c5673ed0 1129 " --log-time[=BOOL] Prefix log messages with current time\n"
0a494f1f 1130 " --default-standard-output= Set default standard output for services\n"
37ec0fdd 1131 " --default-standard-error= Set default standard error output for services\n"
bc556335
DDM
1132 "\nSee the %s for details.\n",
1133 program_invocation_short_name,
1134 ansi_highlight(),
1135 ansi_normal(),
1136 ansi_underline(),
1137 ansi_normal(),
1138 link);
f170852a
LP
1139
1140 return 0;
1141}
1142
2cc856ac
LP
1143static int prepare_reexecute(
1144 Manager *m,
1145 FILE **ret_f,
1146 FDSet **ret_fds,
1147 bool switching_root) {
1148
48b90859
LP
1149 _cleanup_fdset_free_ FDSet *fds = NULL;
1150 _cleanup_fclose_ FILE *f = NULL;
a16e1123
LP
1151 int r;
1152
1153 assert(m);
2cc856ac
LP
1154 assert(ret_f);
1155 assert(ret_fds);
a16e1123 1156
6b78f9b4 1157 r = manager_open_serialization(m, &f);
48b90859
LP
1158 if (r < 0)
1159 return log_error_errno(r, "Failed to create serialization file: %m");
a16e1123 1160
71445ae7 1161 /* Make sure nothing is really destructed when we shut down */
313cefa1 1162 m->n_reloading++;
718db961 1163 bus_manager_send_reloading(m, true);
71445ae7 1164
6b78f9b4 1165 fds = fdset_new();
48b90859
LP
1166 if (!fds)
1167 return log_oom();
a16e1123 1168
b3680f49 1169 r = manager_serialize(m, f, fds, switching_root);
48b90859 1170 if (r < 0)
d68c645b 1171 return r;
a16e1123 1172
48b90859
LP
1173 if (fseeko(f, 0, SEEK_SET) == (off_t) -1)
1174 return log_error_errno(errno, "Failed to rewind serialization fd: %m");
a16e1123 1175
6b78f9b4 1176 r = fd_cloexec(fileno(f), false);
48b90859
LP
1177 if (r < 0)
1178 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
a16e1123 1179
6b78f9b4 1180 r = fdset_cloexec(fds, false);
48b90859
LP
1181 if (r < 0)
1182 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
a16e1123 1183
2cc856ac
LP
1184 *ret_f = TAKE_PTR(f);
1185 *ret_fds = TAKE_PTR(fds);
a16e1123 1186
48b90859 1187 return 0;
a16e1123
LP
1188}
1189
a8b627aa
LP
1190static void bump_file_max_and_nr_open(void) {
1191
1192 /* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large numbers of file
1193 * descriptors are no longer a performance problem and their memory is properly tracked by memcg, thus counting
1194 * them and limiting them in another two layers of limits is unnecessary and just complicates things. This
1195 * function hence turns off 2 of the 4 levels of limits on file descriptors, and makes RLIMIT_NOLIMIT (soft +
1196 * hard) the only ones that really matter. */
1197
1198#if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN
a8b627aa
LP
1199 int r;
1200#endif
1201
1202#if BUMP_PROC_SYS_FS_FILE_MAX
409607c1
ZJS
1203 /* The maximum the kernel allows for this since 5.2 is LONG_MAX, use that. (Previously things were
1204 * different, but the operation would fail silently.) */
56e8419a 1205 r = sysctl_writef("fs/file-max", "%li\n", LONG_MAX);
a8b627aa
LP
1206 if (r < 0)
1207 log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.file-max, ignoring: %m");
1208#endif
1209
a8b627aa
LP
1210#if BUMP_PROC_SYS_FS_NR_OPEN
1211 int v = INT_MAX;
1212
1213 /* Arg! The kernel enforces maximum and minimum values on the fs.nr_open, but we don't really know what they
1214 * are. The expression by which the maximum is determined is dependent on the architecture, and is something we
1215 * don't really want to copy to userspace, as it is dependent on implementation details of the kernel. Since
1216 * the kernel doesn't expose the maximum value to us, we can only try and hope. Hence, let's start with
1217 * INT_MAX, and then keep halving the value until we find one that works. Ugly? Yes, absolutely, but kernel
1218 * APIs are kernel APIs, so what do can we do... 🤯 */
1219
1220 for (;;) {
1221 int k;
1222
1223 v &= ~(__SIZEOF_POINTER__ - 1); /* Round down to next multiple of the pointer size */
1224 if (v < 1024) {
1225 log_warning("Can't bump fs.nr_open, value too small.");
1226 break;
1227 }
1228
1229 k = read_nr_open();
1230 if (k < 0) {
1231 log_error_errno(k, "Failed to read fs.nr_open: %m");
1232 break;
1233 }
1234 if (k >= v) { /* Already larger */
1235 log_debug("Skipping bump, value is already larger.");
1236 break;
1237 }
1238
56e8419a 1239 r = sysctl_writef("fs/nr_open", "%i\n", v);
a8b627aa
LP
1240 if (r == -EINVAL) {
1241 log_debug("Couldn't write fs.nr_open as %i, halving it.", v);
1242 v /= 2;
1243 continue;
1244 }
1245 if (r < 0) {
1246 log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.nr_open, ignoring: %m");
1247 break;
1248 }
1249
1250 log_debug("Successfully bumped fs.nr_open to %i", v);
1251 break;
1252 }
1253#endif
1254}
1255
4096d6f5 1256static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
cda7faa9 1257 struct rlimit new_rlimit;
9264cc39 1258 int r, nr;
4096d6f5 1259
52d62075
LP
1260 /* Get the underlying absolute limit the kernel enforces */
1261 nr = read_nr_open();
1262
cda7faa9
LP
1263 /* Calculate the new limits to use for us. Never lower from what we inherited. */
1264 new_rlimit = (struct rlimit) {
1265 .rlim_cur = MAX((rlim_t) nr, saved_rlimit->rlim_cur),
1266 .rlim_max = MAX((rlim_t) nr, saved_rlimit->rlim_max),
1267 };
1268
1269 /* Shortcut if nothing changes. */
1270 if (saved_rlimit->rlim_max >= new_rlimit.rlim_max &&
1271 saved_rlimit->rlim_cur >= new_rlimit.rlim_cur) {
1272 log_debug("RLIMIT_NOFILE is already as high or higher than we need it, not bumping.");
1273 return 0;
1274 }
1275
52d62075
LP
1276 /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows, for
1277 * both hard and soft. */
cda7faa9 1278 r = setrlimit_closest(RLIMIT_NOFILE, &new_rlimit);
23bbb0de 1279 if (r < 0)
3ce40911 1280 return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
4096d6f5
LP
1281
1282 return 0;
1283}
1284
fb3ae275 1285static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
cda7faa9 1286 struct rlimit new_rlimit;
04d1ee0f 1287 uint64_t mm;
fb3ae275
LP
1288 int r;
1289
a17c1712 1290 /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even if we have CAP_IPC_LOCK which should
6e3c443b 1291 * normally disable such checks. We need them to implement IPAddressAllow= and IPAddressDeny=, hence let's bump
a17c1712 1292 * the value high enough for our user. */
fb3ae275 1293
cda7faa9
LP
1294 /* Using MAX() on resource limits only is safe if RLIM_INFINITY is > 0. POSIX declares that rlim_t
1295 * must be unsigned, hence this is a given, but let's make this clear here. */
1296 assert_cc(RLIM_INFINITY > 0);
1297
60dcf3dc
LP
1298 mm = physical_memory_scale(1, 8); /* Let's scale how much we allow to be locked by the amount of physical
1299 * RAM. We allow an eighth to be locked by us, just to pick a value. */
04d1ee0f 1300
cda7faa9 1301 new_rlimit = (struct rlimit) {
04d1ee0f
LP
1302 .rlim_cur = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_cur, mm),
1303 .rlim_max = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_max, mm),
cda7faa9
LP
1304 };
1305
1306 if (saved_rlimit->rlim_max >= new_rlimit.rlim_cur &&
1307 saved_rlimit->rlim_cur >= new_rlimit.rlim_max) {
1308 log_debug("RLIMIT_MEMLOCK is already as high or higher than we need it, not bumping.");
1309 return 0;
1310 }
1311
1312 r = setrlimit_closest(RLIMIT_MEMLOCK, &new_rlimit);
fb3ae275
LP
1313 if (r < 0)
1314 return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
1315
1316 return 0;
1317}
1318
80758717 1319static void test_usr(void) {
80758717 1320
796ac4c1 1321 /* Check that /usr is either on the same file system as / or mounted already. */
80758717 1322
871c44a7
LP
1323 if (dir_is_empty("/usr") <= 0)
1324 return;
1325
8b173b5e 1326 log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
871c44a7
LP
1327 "Some things will probably break (sometimes even silently) in mysterious ways. "
1328 "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1329}
1330
d3b1c508 1331static int enforce_syscall_archs(Set *archs) {
349cc4a5 1332#if HAVE_SECCOMP
d3b1c508
LP
1333 int r;
1334
83f12b27
FS
1335 if (!is_seccomp_available())
1336 return 0;
1337
469830d1 1338 r = seccomp_restrict_archs(arg_syscall_archs);
d3b1c508 1339 if (r < 0)
469830d1 1340 return log_error_errno(r, "Failed to enforce system call architecture restrication: %m");
d3b1c508 1341#endif
469830d1 1342 return 0;
d3b1c508
LP
1343}
1344
b6e2f329
LP
1345static int status_welcome(void) {
1346 _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
1347 int r;
1348
5ca02bfc 1349 if (!show_status_on(arg_show_status))
fd8c85c6
LP
1350 return 0;
1351
d58ad743
LP
1352 r = parse_os_release(NULL,
1353 "PRETTY_NAME", &pretty_name,
209c1470 1354 "ANSI_COLOR", &ansi_color);
d58ad743
LP
1355 if (r < 0)
1356 log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
1357 "Failed to read os-release file, ignoring: %m");
b6e2f329 1358
dc9b5816 1359 if (log_get_show_color())
a885727a 1360 return status_printf(NULL, 0,
dc9b5816
ZJS
1361 "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
1362 isempty(ansi_color) ? "1" : ansi_color,
1363 isempty(pretty_name) ? "Linux" : pretty_name);
1364 else
a885727a 1365 return status_printf(NULL, 0,
dc9b5816
ZJS
1366 "\nWelcome to %s!\n",
1367 isempty(pretty_name) ? "Linux" : pretty_name);
b6e2f329
LP
1368}
1369
fdd25311
LP
1370static int write_container_id(void) {
1371 const char *c;
7756528e 1372 int r = 0; /* avoid false maybe-uninitialized warning */
fdd25311
LP
1373
1374 c = getenv("container");
1375 if (isempty(c))
1376 return 0;
1377
8612da97
LP
1378 RUN_WITH_UMASK(0022)
1379 r = write_string_file("/run/systemd/container", c, WRITE_STRING_FILE_CREATE);
19854865 1380 if (r < 0)
f1f849b0 1381 return log_warning_errno(r, "Failed to write /run/systemd/container, ignoring: %m");
19854865
LP
1382
1383 return 1;
1384}
1385
1386static int bump_unix_max_dgram_qlen(void) {
1387 _cleanup_free_ char *qlen = NULL;
1388 unsigned long v;
1389 int r;
1390
3130fca5
LP
1391 /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel default of 16 is simply too low. We set the value
1392 * really really early during boot, so that it is actually applied to all our sockets, including the
1393 * $NOTIFY_SOCKET one. */
19854865
LP
1394
1395 r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen);
1396 if (r < 0)
875622c3 1397 return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
19854865
LP
1398
1399 r = safe_atolu(qlen, &v);
1400 if (r < 0)
3130fca5 1401 return log_warning_errno(r, "Failed to parse AF_UNIX datagram queue length '%s', ignoring: %m", qlen);
19854865
LP
1402
1403 if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN)
1404 return 0;
1405
57512c89 1406 r = write_string_filef("/proc/sys/net/unix/max_dgram_qlen", WRITE_STRING_FILE_DISABLE_BUFFER, "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN);
19854865
LP
1407 if (r < 0)
1408 return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
1409 "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
1410
1411 return 1;
fdd25311
LP
1412}
1413
32391275
FB
1414static int fixup_environment(void) {
1415 _cleanup_free_ char *term = NULL;
4dc63c4b 1416 const char *t;
32391275
FB
1417 int r;
1418
43db615b
LP
1419 /* Only fix up the environment when we are started as PID 1 */
1420 if (getpid_cached() != 1)
1421 return 0;
1422
1423 /* We expect the environment to be set correctly if run inside a container. */
84af7821
LP
1424 if (detect_container() > 0)
1425 return 0;
1426
43db615b
LP
1427 /* When started as PID1, the kernel uses /dev/console for our stdios and uses TERM=linux whatever the backend
1428 * device used by the console. We try to make a better guess here since some consoles might not have support
1429 * for color mode for example.
32391275 1430 *
43db615b 1431 * However if TERM was configured through the kernel command line then leave it alone. */
1d84ad94 1432 r = proc_cmdline_get_key("TERM", 0, &term);
32391275
FB
1433 if (r < 0)
1434 return r;
32391275 1435
4dc63c4b
LP
1436 t = term ?: default_term_for_tty("/dev/console");
1437
1438 if (setenv("TERM", t, 1) < 0)
32391275
FB
1439 return -errno;
1440
9d48671c 1441 /* The kernels sets HOME=/ for init. Let's undo this. */
44ee03d1
ZJS
1442 if (path_equal_ptr(getenv("HOME"), "/"))
1443 assert_se(unsetenv("HOME") == 0);
9d48671c 1444
32391275
FB
1445 return 0;
1446}
1447
6808a0bc
LP
1448static void redirect_telinit(int argc, char *argv[]) {
1449
1450 /* This is compatibility support for SysV, where calling init as a user is identical to telinit. */
1451
1452#if HAVE_SYSV_COMPAT
1453 if (getpid_cached() == 1)
1454 return;
1455
2306d177 1456 if (!invoked_as(argv, "init"))
6808a0bc
LP
1457 return;
1458
1459 execv(SYSTEMCTL_BINARY_PATH, argv);
1460 log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
a45d7127 1461 exit(EXIT_FAILURE);
6808a0bc
LP
1462#endif
1463}
1464
4a36297c
LP
1465static int become_shutdown(
1466 const char *shutdown_verb,
7eb35049 1467 int retval) {
4a36297c
LP
1468
1469 char log_level[DECIMAL_STR_MAX(int) + 1],
e73c54b8
JK
1470 exit_code[DECIMAL_STR_MAX(uint8_t) + 1],
1471 timeout[DECIMAL_STR_MAX(usec_t) + 1];
4a36297c 1472
e73c54b8 1473 const char* command_line[13] = {
4a36297c
LP
1474 SYSTEMD_SHUTDOWN_BINARY_PATH,
1475 shutdown_verb,
e73c54b8 1476 "--timeout", timeout,
4a36297c
LP
1477 "--log-level", log_level,
1478 "--log-target",
1479 };
1480
1481 _cleanup_strv_free_ char **env_block = NULL;
e73c54b8 1482 size_t pos = 7;
4a36297c 1483 int r;
acafd7d8 1484 usec_t watchdog_timer = 0;
4a36297c 1485
7eb35049 1486 assert(shutdown_verb);
234519ae 1487 assert(!command_line[pos]);
4a36297c
LP
1488 env_block = strv_copy(environ);
1489
1490 xsprintf(log_level, "%d", log_get_max_level());
e73c54b8 1491 xsprintf(timeout, "%" PRI_USEC "us", arg_default_timeout_stop_usec);
4a36297c
LP
1492
1493 switch (log_get_target()) {
1494
1495 case LOG_TARGET_KMSG:
1496 case LOG_TARGET_JOURNAL_OR_KMSG:
1497 case LOG_TARGET_SYSLOG_OR_KMSG:
1498 command_line[pos++] = "kmsg";
1499 break;
1500
1501 case LOG_TARGET_NULL:
1502 command_line[pos++] = "null";
1503 break;
1504
1505 case LOG_TARGET_CONSOLE:
1506 default:
1507 command_line[pos++] = "console";
1508 break;
1509 };
1510
1511 if (log_get_show_color())
1512 command_line[pos++] = "--log-color";
1513
1514 if (log_get_show_location())
1515 command_line[pos++] = "--log-location";
1516
c5673ed0
DS
1517 if (log_get_show_time())
1518 command_line[pos++] = "--log-time";
1519
4a36297c
LP
1520 if (streq(shutdown_verb, "exit")) {
1521 command_line[pos++] = "--exit-code";
1522 command_line[pos++] = exit_code;
1523 xsprintf(exit_code, "%d", retval);
1524 }
1525
1526 assert(pos < ELEMENTSOF(command_line));
1527
acafd7d8 1528 if (streq(shutdown_verb, "reboot"))
65224c1d 1529 watchdog_timer = arg_reboot_watchdog;
acafd7d8
LB
1530 else if (streq(shutdown_verb, "kexec"))
1531 watchdog_timer = arg_kexec_watchdog;
1532
1533 if (watchdog_timer > 0 && watchdog_timer != USEC_INFINITY) {
7eb35049 1534
4a36297c
LP
1535 char *e;
1536
acafd7d8 1537 /* If we reboot or kexec let's set the shutdown
4a36297c
LP
1538 * watchdog and tell the shutdown binary to
1539 * repeatedly ping it */
acafd7d8 1540 r = watchdog_set_timeout(&watchdog_timer);
4a36297c
LP
1541 watchdog_close(r < 0);
1542
1543 /* Tell the binary how often to ping, ignore failure */
acafd7d8 1544 if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, watchdog_timer) > 0)
8a2c1fbf
EJ
1545 (void) strv_consume(&env_block, e);
1546
1547 if (arg_watchdog_device &&
1548 asprintf(&e, "WATCHDOG_DEVICE=%s", arg_watchdog_device) > 0)
1549 (void) strv_consume(&env_block, e);
4a36297c
LP
1550 } else
1551 watchdog_close(true);
1552
1553 /* Avoid the creation of new processes forked by the
1554 * kernel; at this point, we will not listen to the
1555 * signals anyway */
1556 if (detect_container() <= 0)
1557 (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1558
1559 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1560 return -errno;
1561}
1562
e839bafd
LP
1563static void initialize_clock(void) {
1564 int r;
1565
3753325b
LP
1566 /* This is called very early on, before we parse the kernel command line or otherwise figure out why
1567 * we are running, but only once. */
1568
e839bafd
LP
1569 if (clock_is_localtime(NULL) > 0) {
1570 int min;
1571
1572 /*
1573 * The very first call of settimeofday() also does a time warp in the kernel.
1574 *
1575 * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to take care
1576 * of maintaining the RTC and do all adjustments. This matches the behavior of Windows, which leaves
1577 * the RTC alone if the registry tells that the RTC runs in UTC.
1578 */
1579 r = clock_set_timezone(&min);
1580 if (r < 0)
1581 log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
1582 else
1583 log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1584
d46b79bb 1585 } else if (!in_initrd())
e839bafd
LP
1586 /*
1587 * Do a dummy very first call to seal the kernel's time warp magic.
1588 *
1589 * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with LOCAL, but the
1590 * real system could be set up that way. In such case, we need to delay the time-warp or the sealing
1591 * until we reach the real system.
1592 *
1593 * Do no set the kernel's timezone. The concept of local time cannot be supported reliably, the time
1594 * will jump or be incorrect at every daylight saving time change. All kernel local time concepts will
1595 * be treated as UTC that way.
1596 */
1597 (void) clock_reset_timewarp();
e839bafd
LP
1598
1599 r = clock_apply_epoch();
1600 if (r < 0)
1601 log_error_errno(r, "Current system time is before build time, but cannot correct: %m");
1602 else if (r > 0)
1603 log_info("System time before build time, advancing clock.");
1604}
1605
3753325b
LP
1606static void apply_clock_update(void) {
1607 struct timespec ts;
1608
1609 /* This is called later than initialize_clock(), i.e. after we parsed configuration files/kernel
1610 * command line and such. */
1611
1612 if (arg_clock_usec == 0)
1613 return;
1614
45250e66
LP
1615 if (getpid_cached() != 1)
1616 return;
1617
3753325b
LP
1618 if (clock_settime(CLOCK_REALTIME, timespec_store(&ts, arg_clock_usec)) < 0)
1619 log_error_errno(errno, "Failed to set system clock to time specified on kernel command line: %m");
04f5c018 1620 else
3753325b 1621 log_info("Set system clock to %s, as specified on the kernel command line.",
04f5c018 1622 FORMAT_TIMESTAMP(arg_clock_usec));
3753325b
LP
1623}
1624
d247f232 1625static void cmdline_take_random_seed(void) {
d247f232
LP
1626 size_t suggested;
1627 int r;
1628
1629 if (arg_random_seed_size == 0)
1630 return;
1631
1632 if (getpid_cached() != 1)
1633 return;
1634
1635 assert(arg_random_seed);
1636 suggested = random_pool_size();
1637
1638 if (arg_random_seed_size < suggested)
1639 log_warning("Random seed specified on kernel command line has size %zu, but %zu bytes required to fill entropy pool.",
1640 arg_random_seed_size, suggested);
1641
61bd7d1e 1642 r = random_write_entropy(-1, arg_random_seed, arg_random_seed_size, true);
d247f232
LP
1643 if (r < 0) {
1644 log_warning_errno(r, "Failed to credit entropy specified on kernel command line, ignoring: %m");
1645 return;
1646 }
1647
1648 log_notice("Successfully credited entropy passed on kernel command line.\n"
1649 "Note that the seed provided this way is accessible to unprivileged programs. This functionality should not be used outside of testing environments.");
1650}
1651
1e41242e 1652static void initialize_coredump(bool skip_setup) {
752bcb77 1653#if ENABLE_COREDUMP
1e41242e
LP
1654 if (getpid_cached() != 1)
1655 return;
1656
1657 /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
1658 * will process core dumps for system services by default. */
1659 if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
1660 log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
1661
c6885f5f
FB
1662 /* But at the same time, turn off the core_pattern logic by default, so that no
1663 * coredumps are stored until the systemd-coredump tool is enabled via
1664 * sysctl. However it can be changed via the kernel command line later so core
1665 * dumps can still be generated during early startup and in initramfs. */
1e41242e 1666 if (!skip_setup)
e557b1a6 1667 disable_coredumps();
752bcb77 1668#endif
1e41242e
LP
1669}
1670
c6885f5f
FB
1671static void initialize_core_pattern(bool skip_setup) {
1672 int r;
1673
1674 if (skip_setup || !arg_early_core_pattern)
1675 return;
1676
1677 if (getpid_cached() != 1)
1678 return;
1679
57512c89 1680 r = write_string_file("/proc/sys/kernel/core_pattern", arg_early_core_pattern, WRITE_STRING_FILE_DISABLE_BUFFER);
c6885f5f
FB
1681 if (r < 0)
1682 log_warning_errno(r, "Failed to write '%s' to /proc/sys/kernel/core_pattern, ignoring: %m", arg_early_core_pattern);
1683}
1684
61fbbac1
ZJS
1685static void update_cpu_affinity(bool skip_setup) {
1686 _cleanup_free_ char *mask = NULL;
1687
1688 if (skip_setup || !arg_cpu_affinity.set)
1689 return;
1690
1691 assert(arg_cpu_affinity.allocated > 0);
1692
1693 mask = cpu_set_to_string(&arg_cpu_affinity);
1694 log_debug("Setting CPU affinity to %s.", strnull(mask));
1695
1696 if (sched_setaffinity(0, arg_cpu_affinity.allocated, arg_cpu_affinity.set) < 0)
1697 log_warning_errno(errno, "Failed to set CPU affinity: %m");
1698}
1699
b070c7c0
MS
1700static void update_numa_policy(bool skip_setup) {
1701 int r;
1702 _cleanup_free_ char *nodes = NULL;
1703 const char * policy = NULL;
1704
1705 if (skip_setup || !mpol_is_valid(numa_policy_get_type(&arg_numa_policy)))
1706 return;
1707
1708 if (DEBUG_LOGGING) {
1709 policy = mpol_to_string(numa_policy_get_type(&arg_numa_policy));
1710 nodes = cpu_set_to_range_string(&arg_numa_policy.nodes);
1711 log_debug("Setting NUMA policy to %s, with nodes %s.", strnull(policy), strnull(nodes));
1712 }
1713
1714 r = apply_numa_policy(&arg_numa_policy);
1715 if (r == -EOPNOTSUPP)
1716 log_debug_errno(r, "NUMA support not available, ignoring.");
1717 else if (r < 0)
1718 log_warning_errno(r, "Failed to set NUMA memory policy: %m");
1719}
1720
3c7878f9
LP
1721static void do_reexecute(
1722 int argc,
1723 char *argv[],
1724 const struct rlimit *saved_rlimit_nofile,
1725 const struct rlimit *saved_rlimit_memlock,
1726 FDSet *fds,
1727 const char *switch_root_dir,
1728 const char *switch_root_init,
1729 const char **ret_error_message) {
1730
1731 unsigned i, j, args_size;
1732 const char **args;
1733 int r;
1734
1735 assert(saved_rlimit_nofile);
1736 assert(saved_rlimit_memlock);
1737 assert(ret_error_message);
1738
1739 /* Close and disarm the watchdog, so that the new instance can reinitialize it, but doesn't get rebooted while
1740 * we do that */
1741 watchdog_close(true);
1742
ddfa8b0b
LP
1743 /* Reset RLIMIT_NOFILE + RLIMIT_MEMLOCK back to the kernel defaults, so that the new systemd can pass
1744 * the kernel default to its child processes */
1745 if (saved_rlimit_nofile->rlim_cur != 0)
3c7878f9 1746 (void) setrlimit(RLIMIT_NOFILE, saved_rlimit_nofile);
ddfa8b0b 1747 if (saved_rlimit_memlock->rlim_cur != RLIM_INFINITY)
3c7878f9
LP
1748 (void) setrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock);
1749
1750 if (switch_root_dir) {
1751 /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
1752 * SIGCHLD for them after deserializing. */
e73c54b8 1753 broadcast_signal(SIGTERM, false, true, arg_default_timeout_stop_usec);
3c7878f9
LP
1754
1755 /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
1756 r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE);
1757 if (r < 0)
1758 log_error_errno(r, "Failed to switch root, trying to continue: %m");
1759 }
1760
1761 args_size = MAX(6, argc+1);
1762 args = newa(const char*, args_size);
1763
1764 if (!switch_root_init) {
1765 char sfd[DECIMAL_STR_MAX(int) + 1];
1766
1767 /* First try to spawn ourselves with the right path, and with full serialization. We do this only if
1768 * the user didn't specify an explicit init to spawn. */
1769
1770 assert(arg_serialization);
1771 assert(fds);
1772
1773 xsprintf(sfd, "%i", fileno(arg_serialization));
1774
1775 i = 0;
1776 args[i++] = SYSTEMD_BINARY_PATH;
1777 if (switch_root_dir)
1778 args[i++] = "--switched-root";
1779 args[i++] = arg_system ? "--system" : "--user";
1780 args[i++] = "--deserialize";
1781 args[i++] = sfd;
1782 args[i++] = NULL;
1783
1784 assert(i <= args_size);
1785
1786 /*
1787 * We want valgrind to print its memory usage summary before reexecution. Valgrind won't do this is on
1788 * its own on exec(), but it will do it on exit(). Hence, to ensure we get a summary here, fork() off
1789 * a child, let it exit() cleanly, so that it prints the summary, and wait() for it in the parent,
1790 * before proceeding into the exec().
1791 */
1792 valgrind_summary_hack();
1793
1794 (void) execv(args[0], (char* const*) args);
1795 log_debug_errno(errno, "Failed to execute our own binary, trying fallback: %m");
1796 }
1797
1798 /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and envp[]. (Well,
1799 * modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[], but let's hope that
1800 * doesn't matter.) */
1801
1802 arg_serialization = safe_fclose(arg_serialization);
1803 fds = fdset_free(fds);
1804
1805 /* Reopen the console */
1806 (void) make_console_stdio();
1807
1808 for (j = 1, i = 1; j < (unsigned) argc; j++)
1809 args[i++] = argv[j];
1810 args[i++] = NULL;
1811 assert(i <= args_size);
1812
5238e957 1813 /* Re-enable any blocked signals, especially important if we switch from initial ramdisk to init=... */
3c7878f9
LP
1814 (void) reset_all_signal_handlers();
1815 (void) reset_signal_mask();
595225af 1816 (void) rlimit_nofile_safe();
3c7878f9
LP
1817
1818 if (switch_root_init) {
1819 args[0] = switch_root_init;
a5cede8c 1820 (void) execve(args[0], (char* const*) args, saved_env);
3c7878f9
LP
1821 log_warning_errno(errno, "Failed to execute configured init, trying fallback: %m");
1822 }
1823
1824 args[0] = "/sbin/init";
1825 (void) execv(args[0], (char* const*) args);
1826 r = -errno;
1827
1828 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
1829 ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
1830 "Failed to execute /sbin/init");
1831
1832 if (r == -ENOENT) {
1833 log_warning("No /sbin/init, trying fallback");
1834
1835 args[0] = "/bin/sh";
1836 args[1] = NULL;
a5cede8c 1837 (void) execve(args[0], (char* const*) args, saved_env);
3c7878f9
LP
1838 log_error_errno(errno, "Failed to execute /bin/sh, giving up: %m");
1839 } else
1840 log_warning_errno(r, "Failed to execute /sbin/init, giving up: %m");
1841
1842 *ret_error_message = "Failed to execute fallback shell";
1843}
1844
7eb35049
LP
1845static int invoke_main_loop(
1846 Manager *m,
a9fd4cd1
FB
1847 const struct rlimit *saved_rlimit_nofile,
1848 const struct rlimit *saved_rlimit_memlock,
7eb35049
LP
1849 bool *ret_reexecute,
1850 int *ret_retval, /* Return parameters relevant for shutting down */
1851 const char **ret_shutdown_verb, /* … */
1852 FDSet **ret_fds, /* Return parameters for reexecuting */
1853 char **ret_switch_root_dir, /* … */
1854 char **ret_switch_root_init, /* … */
1855 const char **ret_error_message) {
1856
1857 int r;
1858
1859 assert(m);
a9fd4cd1
FB
1860 assert(saved_rlimit_nofile);
1861 assert(saved_rlimit_memlock);
7eb35049
LP
1862 assert(ret_reexecute);
1863 assert(ret_retval);
1864 assert(ret_shutdown_verb);
1865 assert(ret_fds);
1866 assert(ret_switch_root_dir);
1867 assert(ret_switch_root_init);
1868 assert(ret_error_message);
1869
1870 for (;;) {
1871 r = manager_loop(m);
1872 if (r < 0) {
1873 *ret_error_message = "Failed to run main loop";
1874 return log_emergency_errno(r, "Failed to run main loop: %m");
1875 }
1876
3ca4d0b3 1877 switch ((ManagerObjective) r) {
7eb35049 1878
a6ecbf83 1879 case MANAGER_RELOAD: {
bda7d78b 1880 LogTarget saved_log_target;
a6ecbf83
FB
1881 int saved_log_level;
1882
7eb35049
LP
1883 log_info("Reloading.");
1884
3fe91079 1885 /* First, save any overridden log level/target, then parse the configuration file, which might
bda7d78b
FB
1886 * change the log level to new settings. */
1887
a6ecbf83 1888 saved_log_level = m->log_level_overridden ? log_get_max_level() : -1;
bda7d78b 1889 saved_log_target = m->log_target_overridden ? log_get_target() : _LOG_TARGET_INVALID;
a6ecbf83 1890
a9fd4cd1 1891 (void) parse_configuration(saved_rlimit_nofile, saved_rlimit_memlock);
7eb35049
LP
1892
1893 set_manager_defaults(m);
986935cf 1894 set_manager_settings(m);
7eb35049 1895
61fbbac1 1896 update_cpu_affinity(false);
b070c7c0 1897 update_numa_policy(false);
61fbbac1 1898
a6ecbf83
FB
1899 if (saved_log_level >= 0)
1900 manager_override_log_level(m, saved_log_level);
bda7d78b
FB
1901 if (saved_log_target >= 0)
1902 manager_override_log_target(m, saved_log_target);
a6ecbf83 1903
7eb35049
LP
1904 r = manager_reload(m);
1905 if (r < 0)
7a35fa24
LP
1906 /* Reloading failed before the point of no return. Let's continue running as if nothing happened. */
1907 m->objective = MANAGER_OK;
7eb35049
LP
1908
1909 break;
a6ecbf83 1910 }
7eb35049
LP
1911
1912 case MANAGER_REEXECUTE:
1913
1914 r = prepare_reexecute(m, &arg_serialization, ret_fds, false);
1915 if (r < 0) {
1916 *ret_error_message = "Failed to prepare for reexecution";
1917 return r;
1918 }
1919
1920 log_notice("Reexecuting.");
1921
1922 *ret_reexecute = true;
1923 *ret_retval = EXIT_SUCCESS;
1924 *ret_shutdown_verb = NULL;
1925 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1926
1927 return 0;
1928
1929 case MANAGER_SWITCH_ROOT:
1930 if (!m->switch_root_init) {
1931 r = prepare_reexecute(m, &arg_serialization, ret_fds, true);
1932 if (r < 0) {
1933 *ret_error_message = "Failed to prepare for reexecution";
1934 return r;
1935 }
1936 } else
1937 *ret_fds = NULL;
1938
1939 log_notice("Switching root.");
1940
1941 *ret_reexecute = true;
1942 *ret_retval = EXIT_SUCCESS;
1943 *ret_shutdown_verb = NULL;
1944
1945 /* Steal the switch root parameters */
49052946
YW
1946 *ret_switch_root_dir = TAKE_PTR(m->switch_root);
1947 *ret_switch_root_init = TAKE_PTR(m->switch_root_init);
7eb35049
LP
1948
1949 return 0;
1950
1951 case MANAGER_EXIT:
1952
1953 if (MANAGER_IS_USER(m)) {
1954 log_debug("Exit.");
1955
1956 *ret_reexecute = false;
1957 *ret_retval = m->return_value;
1958 *ret_shutdown_verb = NULL;
1959 *ret_fds = NULL;
1960 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1961
1962 return 0;
1963 }
1964
1965 _fallthrough_;
1966 case MANAGER_REBOOT:
1967 case MANAGER_POWEROFF:
1968 case MANAGER_HALT:
1969 case MANAGER_KEXEC: {
af41e508
LP
1970 static const char * const table[_MANAGER_OBJECTIVE_MAX] = {
1971 [MANAGER_EXIT] = "exit",
1972 [MANAGER_REBOOT] = "reboot",
7eb35049 1973 [MANAGER_POWEROFF] = "poweroff",
af41e508
LP
1974 [MANAGER_HALT] = "halt",
1975 [MANAGER_KEXEC] = "kexec",
7eb35049
LP
1976 };
1977
1978 log_notice("Shutting down.");
1979
1980 *ret_reexecute = false;
1981 *ret_retval = m->return_value;
af41e508 1982 assert_se(*ret_shutdown_verb = table[m->objective]);
7eb35049
LP
1983 *ret_fds = NULL;
1984 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1985
1986 return 0;
1987 }
1988
1989 default:
af41e508 1990 assert_not_reached("Unknown or unexpected manager objective.");
7eb35049
LP
1991 }
1992 }
1993}
1994
31aef7ff
LP
1995static void log_execution_mode(bool *ret_first_boot) {
1996 assert(ret_first_boot);
1997
1998 if (arg_system) {
1999 int v;
2000
e7b18106 2001 log_info("systemd " GIT_VERSION " running in %ssystem mode (%s)",
91b79ba8
ZJS
2002 arg_action == ACTION_TEST ? "test " : "",
2003 systemd_features);
31aef7ff
LP
2004
2005 v = detect_virtualization();
2006 if (v > 0)
2007 log_info("Detected virtualization %s.", virtualization_to_string(v));
2008
2009 log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
2010
2011 if (in_initrd()) {
2012 *ret_first_boot = false;
2013 log_info("Running in initial RAM disk.");
2014 } else {
583cef3b
HS
2015 int r;
2016 _cleanup_free_ char *id_text = NULL;
2017
2018 /* Let's check whether we are in first boot. We use /etc/machine-id as flag file
2019 * for this: If it is missing or contains the value "uninitialized", this is the
2020 * first boot. In any other case, it is not. This allows container managers and
2021 * installers to provision a couple of files already. If the container manager
2022 * wants to provision the machine ID itself it should pass $container_uuid to PID 1. */
2023
2024 r = read_one_line_file("/etc/machine-id", &id_text);
2025 if (r < 0 || streq(id_text, "uninitialized")) {
2026 if (r < 0 && r != -ENOENT)
2027 log_warning_errno(r, "Unexpected error while reading /etc/machine-id, ignoring: %m");
2028
2029 *ret_first_boot = true;
2030 log_info("Detected first boot.");
2031 } else {
2032 *ret_first_boot = false;
2033 log_debug("Detected initialized system, this is not the first boot.");
2034 }
31aef7ff
LP
2035 }
2036 } else {
b9e90f3a 2037 if (DEBUG_LOGGING) {
c2b2df60 2038 _cleanup_free_ char *t = NULL;
31aef7ff 2039
b9e90f3a 2040 t = uid_to_name(getuid());
91b79ba8
ZJS
2041 log_debug("systemd " GIT_VERSION " running in %suser mode for user " UID_FMT "/%s. (%s)",
2042 arg_action == ACTION_TEST ? " test" : "",
2043 getuid(), strna(t), systemd_features);
b9e90f3a 2044 }
31aef7ff
LP
2045
2046 *ret_first_boot = false;
2047 }
2048}
2049
5afbaa36
LP
2050static int initialize_runtime(
2051 bool skip_setup,
3023f2fe 2052 bool first_boot,
5afbaa36
LP
2053 struct rlimit *saved_rlimit_nofile,
2054 struct rlimit *saved_rlimit_memlock,
2055 const char **ret_error_message) {
5afbaa36
LP
2056 int r;
2057
2058 assert(ret_error_message);
2059
2060 /* Sets up various runtime parameters. Many of these initializations are conditionalized:
2061 *
2062 * - Some only apply to --system instances
2063 * - Some only apply to --user instances
2064 * - Some only apply when we first start up, but not when we reexecute
2065 */
2066
2d776038
LP
2067 if (arg_action != ACTION_RUN)
2068 return 0;
2069
61fbbac1 2070 update_cpu_affinity(skip_setup);
b070c7c0 2071 update_numa_policy(skip_setup);
61fbbac1 2072
3c3c6cb9 2073 if (arg_system) {
5238e957 2074 /* Make sure we leave a core dump without panicking the kernel. */
3c3c6cb9 2075 install_crash_handler();
5afbaa36 2076
3c3c6cb9 2077 if (!skip_setup) {
143fadf3 2078 r = mount_cgroup_controllers();
3c3c6cb9
LP
2079 if (r < 0) {
2080 *ret_error_message = "Failed to mount cgroup hierarchies";
2081 return r;
2082 }
2083
2084 status_welcome();
b6fad306 2085 (void) hostname_setup(true);
3023f2fe
HS
2086 /* Force transient machine-id on first boot. */
2087 machine_id_setup(NULL, first_boot, arg_machine_id, NULL);
df883de9 2088 (void) loopback_setup();
3c3c6cb9 2089 bump_unix_max_dgram_qlen();
a8b627aa 2090 bump_file_max_and_nr_open();
3c3c6cb9
LP
2091 test_usr();
2092 write_container_id();
2093 }
8a2c1fbf 2094
3c3c6cb9
LP
2095 if (arg_watchdog_device) {
2096 r = watchdog_set_device(arg_watchdog_device);
2097 if (r < 0)
2098 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m", arg_watchdog_device);
2099 }
32429805
LP
2100 } else {
2101 _cleanup_free_ char *p = NULL;
2102
2103 /* Create the runtime directory and place the inaccessible device nodes there, if we run in
2104 * user mode. In system mode mount_setup() already did that. */
2105
2106 r = xdg_user_runtime_dir(&p, "/systemd");
2107 if (r < 0) {
2108 *ret_error_message = "$XDG_RUNTIME_DIR is not set";
2109 return log_emergency_errno(r, "Failed to determine $XDG_RUNTIME_DIR path: %m");
2110 }
2111
e813a74a 2112 (void) mkdir_p_label(p, 0755);
32429805 2113 (void) make_inaccessible_nodes(p, UID_INVALID, GID_INVALID);
3c3c6cb9 2114 }
5afbaa36
LP
2115
2116 if (arg_timer_slack_nsec != NSEC_INFINITY)
2117 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
3a671cd1 2118 log_warning_errno(errno, "Failed to adjust timer slack, ignoring: %m");
5afbaa36
LP
2119
2120 if (arg_system && !cap_test_all(arg_capability_bounding_set)) {
2121 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set);
2122 if (r < 0) {
2123 *ret_error_message = "Failed to drop capability bounding set of usermode helpers";
2124 return log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
2125 }
2126
2127 r = capability_bounding_set_drop(arg_capability_bounding_set, true);
2128 if (r < 0) {
2129 *ret_error_message = "Failed to drop capability bounding set";
2130 return log_emergency_errno(r, "Failed to drop capability bounding set: %m");
2131 }
2132 }
2133
39362f6f
JB
2134 if (arg_system && arg_no_new_privs) {
2135 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
2136 *ret_error_message = "Failed to disable new privileges";
2137 return log_emergency_errno(errno, "Failed to disable new privileges: %m");
2138 }
2139 }
2140
5afbaa36
LP
2141 if (arg_syscall_archs) {
2142 r = enforce_syscall_archs(arg_syscall_archs);
2143 if (r < 0) {
2144 *ret_error_message = "Failed to set syscall architectures";
2145 return r;
2146 }
2147 }
2148
2149 if (!arg_system)
2150 /* Become reaper of our children */
2151 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
2152 log_warning_errno(errno, "Failed to make us a subreaper: %m");
2153
a17c1712
LP
2154 /* Bump up RLIMIT_NOFILE for systemd itself */
2155 (void) bump_rlimit_nofile(saved_rlimit_nofile);
2156 (void) bump_rlimit_memlock(saved_rlimit_memlock);
5afbaa36
LP
2157
2158 return 0;
2159}
2160
6acca5fc
LP
2161static int do_queue_default_job(
2162 Manager *m,
2163 const char **ret_error_message) {
2164
2165 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
f1d075dc
ZJS
2166 const char *unit;
2167 Job *job;
2168 Unit *target;
6acca5fc
LP
2169 int r;
2170
8755dbad 2171 if (arg_default_unit)
f1d075dc 2172 unit = arg_default_unit;
8755dbad 2173 else if (in_initrd())
f1d075dc 2174 unit = SPECIAL_INITRD_TARGET;
8755dbad 2175 else
f1d075dc 2176 unit = SPECIAL_DEFAULT_TARGET;
8755dbad 2177
f1d075dc 2178 log_debug("Activating default unit: %s", unit);
8755dbad 2179
f1d075dc 2180 r = manager_load_startable_unit_or_warn(m, unit, NULL, &target);
8755dbad
ZJS
2181 if (r < 0 && in_initrd() && !arg_default_unit) {
2182 /* Fall back to default.target, which we used to always use by default. Only do this if no
2183 * explicit configuration was given. */
2184
2185 log_info("Falling back to " SPECIAL_DEFAULT_TARGET ".");
6acca5fc 2186
8755dbad
ZJS
2187 r = manager_load_startable_unit_or_warn(m, SPECIAL_DEFAULT_TARGET, NULL, &target);
2188 }
4109ede7 2189 if (r < 0) {
8755dbad 2190 log_info("Falling back to " SPECIAL_RESCUE_TARGET ".");
6acca5fc 2191
4109ede7 2192 r = manager_load_startable_unit_or_warn(m, SPECIAL_RESCUE_TARGET, NULL, &target);
6acca5fc 2193 if (r < 0) {
8755dbad
ZJS
2194 *ret_error_message = r == -ERFKILL ? SPECIAL_RESCUE_TARGET " masked"
2195 : "Failed to load " SPECIAL_RESCUE_TARGET;
4109ede7 2196 return r;
6acca5fc
LP
2197 }
2198 }
2199
2200 assert(target->load_state == UNIT_LOADED);
2201
f1d075dc 2202 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, NULL, &error, &job);
6acca5fc
LP
2203 if (r == -EPERM) {
2204 log_debug_errno(r, "Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
2205
2206 sd_bus_error_free(&error);
2207
f1d075dc 2208 r = manager_add_job(m, JOB_START, target, JOB_REPLACE, NULL, &error, &job);
6acca5fc
LP
2209 if (r < 0) {
2210 *ret_error_message = "Failed to start default target";
2211 return log_emergency_errno(r, "Failed to start default target: %s", bus_error_message(&error, r));
2212 }
2213
2214 } else if (r < 0) {
2215 *ret_error_message = "Failed to isolate default target";
2216 return log_emergency_errno(r, "Failed to isolate default target: %s", bus_error_message(&error, r));
c86c31d9
ZJS
2217 } else
2218 log_info("Queued %s job for default target %s.",
2219 job_type_to_string(job->type),
04d232d8 2220 unit_status_string(job->unit, NULL));
6acca5fc 2221
f1d075dc 2222 m->default_unit_job_id = job->id;
6acca5fc
LP
2223
2224 return 0;
2225}
2226
a9fd4cd1
FB
2227static void save_rlimits(struct rlimit *saved_rlimit_nofile,
2228 struct rlimit *saved_rlimit_memlock) {
2229
2230 assert(saved_rlimit_nofile);
2231 assert(saved_rlimit_memlock);
2232
2233 if (getrlimit(RLIMIT_NOFILE, saved_rlimit_nofile) < 0)
2234 log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
2235
2236 if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock) < 0)
2237 log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
2238}
2239
2240static void fallback_rlimit_nofile(const struct rlimit *saved_rlimit_nofile) {
2241 struct rlimit *rl;
2242
2243 if (arg_default_rlimit[RLIMIT_NOFILE])
2244 return;
2245
2246 /* Make sure forked processes get limits based on the original kernel setting */
2247
2248 rl = newdup(struct rlimit, saved_rlimit_nofile, 1);
2249 if (!rl) {
2250 log_oom();
2251 return;
2252 }
2253
2254 /* Bump the hard limit for system services to a substantially higher value. The default
2255 * hard limit current kernels set is pretty low (4K), mostly for historical
2256 * reasons. According to kernel developers, the fd handling in recent kernels has been
2257 * optimized substantially enough, so that we can bump the limit now, without paying too
2258 * high a price in memory or performance. Note however that we only bump the hard limit,
2259 * not the soft limit. That's because select() works the way it works, and chokes on fds
2260 * >= 1024. If we'd bump the soft limit globally, it might accidentally happen to
2261 * unexpecting programs that they get fds higher than what they can process using
2262 * select(). By only bumping the hard limit but leaving the low limit as it is we avoid
2263 * this pitfall: programs that are written by folks aware of the select() problem in mind
2264 * (and thus use poll()/epoll instead of select(), the way everybody should) can
2265 * explicitly opt into high fds by bumping their soft limit beyond 1024, to the hard limit
2266 * we pass. */
2267 if (arg_system) {
2268 int nr;
2269
2270 /* Get the underlying absolute limit the kernel enforces */
2271 nr = read_nr_open();
2272
2273 rl->rlim_max = MIN((rlim_t) nr, MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE));
2274 }
2275
2276 /* If for some reason we were invoked with a soft limit above 1024 (which should never
2277 * happen!, but who knows what we get passed in from pam_limit when invoked as --user
2278 * instance), then lower what we pass on to not confuse our children */
2279 rl->rlim_cur = MIN(rl->rlim_cur, (rlim_t) FD_SETSIZE);
2280
2281 arg_default_rlimit[RLIMIT_NOFILE] = rl;
2282}
2283
2284static void fallback_rlimit_memlock(const struct rlimit *saved_rlimit_memlock) {
2285 struct rlimit *rl;
2286
2287 /* Pass the original value down to invoked processes */
2288
2289 if (arg_default_rlimit[RLIMIT_MEMLOCK])
2290 return;
2291
2292 rl = newdup(struct rlimit, saved_rlimit_memlock, 1);
2293 if (!rl) {
2294 log_oom();
2295 return;
2296 }
2297
2298 arg_default_rlimit[RLIMIT_MEMLOCK] = rl;
2299}
2300
d55ed7de
ZJS
2301static void setenv_manager_environment(void) {
2302 char **p;
2303 int r;
2304
2305 STRV_FOREACH(p, arg_manager_environment) {
2306 log_debug("Setting '%s' in our own environment.", *p);
2307
2308 r = putenv_dup(*p, true);
2309 if (r < 0)
2310 log_warning_errno(errno, "Failed to setenv \"%s\", ignoring: %m", *p);
2311 }
2312}
2313
fb39af4c
ZJS
2314static void reset_arguments(void) {
2315 /* Frees/resets arg_* variables, with a few exceptions commented below. */
970777b5
LP
2316
2317 arg_default_unit = mfree(arg_default_unit);
fb39af4c
ZJS
2318
2319 /* arg_system — ignore */
2320
2321 arg_dump_core = true;
2322 arg_crash_chvt = -1;
2323 arg_crash_shell = false;
2324 arg_crash_reboot = false;
970777b5 2325 arg_confirm_spawn = mfree(arg_confirm_spawn);
fb39af4c 2326 arg_show_status = _SHOW_STATUS_INVALID;
36cf4507 2327 arg_status_unit_format = STATUS_UNIT_FORMAT_DEFAULT;
fb39af4c
ZJS
2328 arg_switched_root = false;
2329 arg_pager_flags = 0;
2330 arg_service_watchdogs = true;
2331 arg_default_std_output = EXEC_OUTPUT_JOURNAL;
2332 arg_default_std_error = EXEC_OUTPUT_INHERIT;
2333 arg_default_restart_usec = DEFAULT_RESTART_USEC;
2334 arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
2335 arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
2336 arg_default_timeout_abort_usec = DEFAULT_TIMEOUT_USEC;
2337 arg_default_timeout_abort_set = false;
2338 arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
2339 arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
2340 arg_runtime_watchdog = 0;
65224c1d 2341 arg_reboot_watchdog = 10 * USEC_PER_MINUTE;
acafd7d8 2342 arg_kexec_watchdog = 0;
fb39af4c
ZJS
2343 arg_early_core_pattern = NULL;
2344 arg_watchdog_device = NULL;
2345
970777b5 2346 arg_default_environment = strv_free(arg_default_environment);
d55ed7de 2347 arg_manager_environment = strv_free(arg_manager_environment);
fb39af4c
ZJS
2348 rlimit_free_all(arg_default_rlimit);
2349
2350 arg_capability_bounding_set = CAP_ALL;
2351 arg_no_new_privs = false;
2352 arg_timer_slack_nsec = NSEC_INFINITY;
2353 arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
2354
970777b5 2355 arg_syscall_archs = set_free(arg_syscall_archs);
61fbbac1 2356
fb39af4c
ZJS
2357 /* arg_serialization — ignore */
2358
2359 arg_default_cpu_accounting = -1;
2360 arg_default_io_accounting = false;
2361 arg_default_ip_accounting = false;
2362 arg_default_blockio_accounting = false;
2363 arg_default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
2364 arg_default_tasks_accounting = true;
3a0f06c4 2365 arg_default_tasks_max = DEFAULT_TASKS_MAX;
fb39af4c
ZJS
2366 arg_machine_id = (sd_id128_t) {};
2367 arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
2368 arg_default_oom_policy = OOM_STOP;
2369
61fbbac1 2370 cpu_set_reset(&arg_cpu_affinity);
b070c7c0 2371 numa_policy_reset(&arg_numa_policy);
d247f232
LP
2372
2373 arg_random_seed = mfree(arg_random_seed);
2374 arg_random_seed_size = 0;
33d943d1 2375 arg_clock_usec = 0;
970777b5
LP
2376}
2377
a9fd4cd1
FB
2378static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
2379 const struct rlimit *saved_rlimit_memlock) {
97d1fb94
LP
2380 int r;
2381
a9fd4cd1
FB
2382 assert(saved_rlimit_nofile);
2383 assert(saved_rlimit_memlock);
2384
fb39af4c
ZJS
2385 /* Assign configuration defaults */
2386 reset_arguments();
2387
97d1fb94 2388 r = parse_config_file();
470a5e6d
ZJS
2389 if (r < 0)
2390 log_warning_errno(r, "Failed to parse config file, ignoring: %m");
97d1fb94
LP
2391
2392 if (arg_system) {
2393 r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
2394 if (r < 0)
2395 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
2396 }
2397
a9fd4cd1
FB
2398 /* Initialize some default rlimits for services if they haven't been configured */
2399 fallback_rlimit_nofile(saved_rlimit_nofile);
2400 fallback_rlimit_memlock(saved_rlimit_memlock);
2401
97d1fb94
LP
2402 /* Note that this also parses bits from the kernel command line, including "debug". */
2403 log_parse_environment();
2404
db33214b 2405 /* Initialize the show status setting if it hasn't been set explicitly yet */
7a293242 2406 if (arg_show_status == _SHOW_STATUS_INVALID)
db33214b
LP
2407 arg_show_status = SHOW_STATUS_YES;
2408
d55ed7de
ZJS
2409 /* Push variables into the manager environment block */
2410 setenv_manager_environment();
2411
97d1fb94
LP
2412 return 0;
2413}
2414
b0d7c989
LP
2415static int safety_checks(void) {
2416
febf46a4 2417 if (getpid_cached() == 1 &&
baaa35ad
ZJS
2418 arg_action != ACTION_RUN)
2419 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2420 "Unsupported execution mode while PID 1.");
febf46a4
LP
2421
2422 if (getpid_cached() == 1 &&
baaa35ad
ZJS
2423 !arg_system)
2424 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2425 "Can't run --user mode as PID 1.");
febf46a4
LP
2426
2427 if (arg_action == ACTION_RUN &&
2428 arg_system &&
baaa35ad
ZJS
2429 getpid_cached() != 1)
2430 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2431 "Can't run system mode unless PID 1.");
febf46a4 2432
b0d7c989 2433 if (arg_action == ACTION_TEST &&
baaa35ad
ZJS
2434 geteuid() == 0)
2435 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2436 "Don't run test mode as root.");
b0d7c989
LP
2437
2438 if (!arg_system &&
2439 arg_action == ACTION_RUN &&
baaa35ad
ZJS
2440 sd_booted() <= 0)
2441 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
2442 "Trying to run as user instance, but the system has not been booted with systemd.");
b0d7c989
LP
2443
2444 if (!arg_system &&
2445 arg_action == ACTION_RUN &&
baaa35ad
ZJS
2446 !getenv("XDG_RUNTIME_DIR"))
2447 return log_error_errno(SYNTHETIC_ERRNO(EUNATCH),
2448 "Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
b0d7c989
LP
2449
2450 if (arg_system &&
2451 arg_action == ACTION_RUN &&
baaa35ad
ZJS
2452 running_in_chroot() > 0)
2453 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
2454 "Cannot be run in a chroot() environment.");
b0d7c989
LP
2455
2456 return 0;
2457}
2458
74da609f
LP
2459static int initialize_security(
2460 bool *loaded_policy,
2461 dual_timestamp *security_start_timestamp,
2462 dual_timestamp *security_finish_timestamp,
2463 const char **ret_error_message) {
2464
2465 int r;
2466
2467 assert(loaded_policy);
2468 assert(security_start_timestamp);
2469 assert(security_finish_timestamp);
2470 assert(ret_error_message);
2471
2472 dual_timestamp_get(security_start_timestamp);
2473
97149f40 2474 r = mac_selinux_setup(loaded_policy);
74da609f
LP
2475 if (r < 0) {
2476 *ret_error_message = "Failed to load SELinux policy";
2477 return r;
2478 }
2479
2480 r = mac_smack_setup(loaded_policy);
2481 if (r < 0) {
2482 *ret_error_message = "Failed to load SMACK policy";
2483 return r;
2484 }
2485
2ffadd3c
Y
2486 r = mac_apparmor_setup();
2487 if (r < 0) {
2488 *ret_error_message = "Failed to load AppArmor policy";
2489 return r;
2490 }
2491
74da609f
LP
2492 r = ima_setup();
2493 if (r < 0) {
2494 *ret_error_message = "Failed to load IMA policy";
2495 return r;
2496 }
2497
2498 dual_timestamp_get(security_finish_timestamp);
2499 return 0;
2500}
2501
efeb853f
LP
2502static int collect_fds(FDSet **ret_fds, const char **ret_error_message) {
2503 int r;
2504
2505 assert(ret_fds);
2506 assert(ret_error_message);
2507
2508 r = fdset_new_fill(ret_fds);
2509 if (r < 0) {
2510 *ret_error_message = "Failed to allocate fd set";
2511 return log_emergency_errno(r, "Failed to allocate fd set: %m");
2512 }
2513
2514 fdset_cloexec(*ret_fds, true);
2515
2516 if (arg_serialization)
2517 assert_se(fdset_remove(*ret_fds, fileno(arg_serialization)) >= 0);
2518
2519 return 0;
2520}
2521
2e51b31c
LP
2522static void setup_console_terminal(bool skip_setup) {
2523
2524 if (!arg_system)
2525 return;
2526
2527 /* Become a session leader if we aren't one yet. */
2528 (void) setsid();
2529
2530 /* If we are init, we connect stdin/stdout/stderr to /dev/null and make sure we don't have a controlling
2531 * tty. */
2532 (void) release_terminal();
2533
2534 /* Reset the console, but only if this is really init and we are freshly booted */
2535 if (getpid_cached() == 1 && !skip_setup)
2536 (void) console_setup();
2537}
2538
aa40ff07
LP
2539static bool early_skip_setup_check(int argc, char *argv[]) {
2540 bool found_deserialize = false;
aa40ff07
LP
2541
2542 /* Determine if this is a reexecution or normal bootup. We do the full command line parsing much later, so
2543 * let's just have a quick peek here. Note that if we have switched root, do all the special setup things
2544 * anyway, even if in that case we also do deserialization. */
2545
431733b8 2546 for (int i = 1; i < argc; i++)
aa40ff07
LP
2547 if (streq(argv[i], "--switched-root"))
2548 return false; /* If we switched root, don't skip the setup. */
2549 else if (streq(argv[i], "--deserialize"))
2550 found_deserialize = true;
aa40ff07
LP
2551
2552 return found_deserialize; /* When we are deserializing, then we are reexecuting, hence avoid the extensive setup */
2553}
2554
0e06a031
LP
2555static int save_env(void) {
2556 char **l;
2557
2558 l = strv_copy(environ);
2559 if (!l)
2560 return -ENOMEM;
2561
2562 strv_free_and_replace(saved_env, l);
2563 return 0;
2564}
2565
60918275 2566int main(int argc, char *argv[]) {
625e8690
LP
2567
2568 dual_timestamp initrd_timestamp = DUAL_TIMESTAMP_NULL, userspace_timestamp = DUAL_TIMESTAMP_NULL, kernel_timestamp = DUAL_TIMESTAMP_NULL,
2569 security_start_timestamp = DUAL_TIMESTAMP_NULL, security_finish_timestamp = DUAL_TIMESTAMP_NULL;
ddfa8b0b
LP
2570 struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0),
2571 saved_rlimit_memlock = RLIMIT_MAKE_CONST(RLIM_INFINITY); /* The original rlimits we passed
2572 * in. Note we use different values
2573 * for the two that indicate whether
2574 * these fields are initialized! */
625e8690
LP
2575 bool skip_setup, loaded_policy = false, queue_default_job = false, first_boot = false, reexecute = false;
2576 char *switch_root_dir = NULL, *switch_root_init = NULL;
9d76d730 2577 usec_t before_startup, after_startup;
625e8690 2578 static char systemd[] = "systemd";
625e8690
LP
2579 const char *shutdown_verb = NULL, *error_message = NULL;
2580 int r, retval = EXIT_FAILURE;
2581 Manager *m = NULL;
a16e1123 2582 FDSet *fds = NULL;
27b14a22 2583
d72a8f10 2584 /* SysV compatibility: redirect init → telinit */
6808a0bc 2585 redirect_telinit(argc, argv);
2cb1a60d 2586
d72a8f10 2587 /* Take timestamps early on */
c3a170f3
HH
2588 dual_timestamp_from_monotonic(&kernel_timestamp, 0);
2589 dual_timestamp_get(&userspace_timestamp);
2590
d72a8f10
LP
2591 /* Figure out whether we need to do initialize the system, or if we already did that because we are
2592 * reexecuting */
aa40ff07 2593 skip_setup = early_skip_setup_check(argc, argv);
d03bc1b8 2594
d72a8f10
LP
2595 /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent reexecution we
2596 * are then called 'systemd'. That is confusing, hence let's call us systemd right-away. */
f3b6a3ed 2597 program_invocation_short_name = systemd;
eee8b7ab 2598 (void) prctl(PR_SET_NAME, systemd);
5d6b1584 2599
d72a8f10 2600 /* Save the original command line */
36fea155 2601 save_argc_argv(argc, argv);
f3b6a3ed 2602
0e06a031
LP
2603 /* Save the original environment as we might need to restore it if we're requested to execute another
2604 * system manager later. */
2605 r = save_env();
2606 if (r < 0) {
2607 error_message = "Failed to copy environment block";
2608 goto finish;
2609 }
a5cede8c 2610
6fdb8de4 2611 /* Make sure that if the user says "syslog" we actually log to the journal. */
c1dc6153 2612 log_set_upgrade_syslog_to_journal(true);
bbe63281 2613
df0ff127 2614 if (getpid_cached() == 1) {
b5752d23
LP
2615 /* When we run as PID 1 force system mode */
2616 arg_system = true;
2617
48a601fe 2618 /* Disable the umask logic */
90dc8c2e
MG
2619 umask(0);
2620
92890452
LP
2621 /* Make sure that at least initially we do not ever log to journald/syslogd, because it might not be
2622 * activated yet (even though the log socket for it exists). */
d075092f
LP
2623 log_set_prohibit_ipc(true);
2624
48a601fe
LP
2625 /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This is
2626 * important so that we never end up logging to any foreign stderr, for example if we have to log in a
2627 * child process right before execve()'ing the actual binary, at a point in time where socket
2628 * activation stderr/stdout area already set up. */
2629 log_set_always_reopen_console(true);
48a601fe 2630
92890452 2631 if (detect_container() <= 0) {
4f8d551f 2632
92890452 2633 /* Running outside of a container as PID 1 */
92890452
LP
2634 log_set_target(LOG_TARGET_KMSG);
2635 log_open();
a866073d 2636
92890452
LP
2637 if (in_initrd())
2638 initrd_timestamp = userspace_timestamp;
c3ba6250 2639
92890452
LP
2640 if (!skip_setup) {
2641 r = mount_setup_early();
2642 if (r < 0) {
2643 error_message = "Failed to mount early API filesystems";
2644 goto finish;
2645 }
2646
0a2eef1e
LP
2647 /* Let's open the log backend a second time, in case the first time didn't
2648 * work. Quite possibly we have mounted /dev just now, so /dev/kmsg became
2649 * available, and it previously wasn't. */
2650 log_open();
2651
6123dfaa
ZJS
2652 disable_printk_ratelimit();
2653
92890452
LP
2654 r = initialize_security(
2655 &loaded_policy,
2656 &security_start_timestamp,
2657 &security_finish_timestamp,
2658 &error_message);
2659 if (r < 0)
2660 goto finish;
d723cd65 2661 }
eee8b7ab 2662
92890452 2663 if (mac_selinux_init() < 0) {
a9ba0e32 2664 error_message = "Failed to initialize SELinux support";
96694e99 2665 goto finish;
92890452 2666 }
0b3325e7 2667
92890452
LP
2668 if (!skip_setup)
2669 initialize_clock();
2670
2671 /* Set the default for later on, but don't actually open the logs like this for now. Note that
2672 * if we are transitioning from the initrd there might still be journal fd open, and we
2673 * shouldn't attempt opening that before we parsed /proc/cmdline which might redirect output
2674 * elsewhere. */
2675 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
2676
2677 } else {
2678 /* Running inside a container, as PID 1 */
92890452
LP
2679 log_set_target(LOG_TARGET_CONSOLE);
2680 log_open();
2681
2682 /* For later on, see above... */
2683 log_set_target(LOG_TARGET_JOURNAL);
2684
45250e66 2685 /* clear the kernel timestamp, because we are in a container */
92890452 2686 kernel_timestamp = DUAL_TIMESTAMP_NULL;
cb6531be 2687 }
7948c4df 2688
92890452 2689 initialize_coredump(skip_setup);
a866073d 2690
92890452
LP
2691 r = fixup_environment();
2692 if (r < 0) {
2693 log_emergency_errno(r, "Failed to fix up PID 1 environment: %m");
2694 error_message = "Failed to fix up PID1 environment";
2695 goto finish;
2696 }
a866073d 2697
92890452
LP
2698 /* Try to figure out if we can use colors with the console. No need to do that for user instances since
2699 * they never log into the console. */
3a18b604 2700 log_show_color(colors_enabled());
92890452 2701
c76cf844
AK
2702 r = make_null_stdio();
2703 if (r < 0)
92890452 2704 log_warning_errno(r, "Failed to redirect standard streams to /dev/null, ignoring: %m");
f84f9974 2705
a132bef0 2706 /* Load the kernel modules early. */
2e75e2a8
DM
2707 if (!skip_setup)
2708 kmod_setup();
2e75e2a8 2709
3196e423 2710 /* Mount /proc, /sys and friends, so that /proc/cmdline and /proc/$PID/fd is available. */
f74349d8 2711 r = mount_setup(loaded_policy, skip_setup);
cb6531be
ZJS
2712 if (r < 0) {
2713 error_message = "Failed to mount API filesystems";
8efe3c01 2714 goto finish;
cb6531be 2715 }
c18ecf03
LP
2716
2717 /* The efivarfs is now mounted, let's read the random seed off it */
2718 (void) efi_take_random_seed();
209b2592
FB
2719
2720 /* Cache command-line options passed from EFI variables */
2721 if (!skip_setup)
2722 (void) cache_efi_options_variable();
3196e423
LP
2723 } else {
2724 /* Running as user instance */
2725 arg_system = false;
2726 log_set_target(LOG_TARGET_AUTO);
2727 log_open();
2728
2729 /* clear the kernel timestamp, because we are not PID 1 */
2730 kernel_timestamp = DUAL_TIMESTAMP_NULL;
2731
2732 if (mac_selinux_init() < 0) {
2733 error_message = "Failed to initialize SELinux support";
2734 goto finish;
2735 }
0c85a4f3 2736 }
4ade7963 2737
a9fd4cd1
FB
2738 /* Save the original RLIMIT_NOFILE/RLIMIT_MEMLOCK so that we can reset it later when
2739 * transitioning from the initrd to the main systemd or suchlike. */
2740 save_rlimits(&saved_rlimit_nofile, &saved_rlimit_memlock);
2741
4ade7963 2742 /* Reset all signal handlers. */
ce30c8dc 2743 (void) reset_all_signal_handlers();
9c274488 2744 (void) ignore_signals(SIGNALS_IGNORE);
078e4539 2745
ffe5c01e
FB
2746 (void) parse_configuration(&saved_rlimit_nofile, &saved_rlimit_memlock);
2747
2748 r = parse_argv(argc, argv);
2749 if (r < 0) {
2750 error_message = "Failed to parse commandline arguments";
f170852a 2751 goto finish;
ffe5c01e 2752 }
10c961b9 2753
b0d7c989
LP
2754 r = safety_checks();
2755 if (r < 0)
fe783b03 2756 goto finish;
fe783b03 2757
5c08257b 2758 if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS, ACTION_DUMP_BUS_PROPERTIES, ACTION_BUS_INTROSPECT))
0221d68a 2759 (void) pager_open(arg_pager_flags);
b0d7c989
LP
2760
2761 if (arg_action != ACTION_RUN)
74e7579c 2762 skip_setup = true;
b87c2aa6 2763
fa0f4d8a 2764 if (arg_action == ACTION_HELP) {
37ec0fdd 2765 retval = help() < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
f170852a 2766 goto finish;
9ba0bc4e
ZJS
2767 } else if (arg_action == ACTION_VERSION) {
2768 retval = version();
2769 goto finish;
fa0f4d8a 2770 } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
e537352b 2771 unit_dump_config_items(stdout);
22f4096c 2772 retval = EXIT_SUCCESS;
e537352b 2773 goto finish;
bbc1acab
YW
2774 } else if (arg_action == ACTION_DUMP_BUS_PROPERTIES) {
2775 dump_bus_properties(stdout);
2776 retval = EXIT_SUCCESS;
2777 goto finish;
5c08257b
ZJS
2778 } else if (arg_action == ACTION_BUS_INTROSPECT) {
2779 r = bus_manager_introspect_implementations(stdout, arg_bus_introspect);
2780 retval = r >= 0 ? EXIT_SUCCESS : EXIT_FAILURE;
2781 goto finish;
f170852a
LP
2782 }
2783
4c701096 2784 assert_se(IN_SET(arg_action, ACTION_RUN, ACTION_TEST));
f170852a 2785
5a2e0c62
LP
2786 /* Move out of the way, so that we won't block unmounts */
2787 assert_se(chdir("/") == 0);
2788
dea374e8 2789 if (arg_action == ACTION_RUN) {
d247f232
LP
2790 if (!skip_setup) {
2791 /* Apply the systemd.clock_usec= kernel command line switch */
45250e66 2792 apply_clock_update();
a70c72a0 2793
d247f232
LP
2794 /* Apply random seed from kernel command line */
2795 cmdline_take_random_seed();
2796 }
2797
c6885f5f
FB
2798 /* A core pattern might have been specified via the cmdline. */
2799 initialize_core_pattern(skip_setup);
2800
efeb853f 2801 /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
a70c72a0
LP
2802 log_close();
2803
2804 /* Remember open file descriptors for later deserialization */
efeb853f
LP
2805 r = collect_fds(&fds, &error_message);
2806 if (r < 0)
dea374e8 2807 goto finish;
a16e1123 2808
2e51b31c
LP
2809 /* Give up any control of the console, but make sure its initialized. */
2810 setup_console_terminal(skip_setup);
56d96fc0 2811
a70c72a0
LP
2812 /* Open the logging devices, if possible and necessary */
2813 log_open();
56d96fc0 2814 }
4ade7963 2815
31aef7ff 2816 log_execution_mode(&first_boot);
a5dab5ce 2817
2d776038 2818 r = initialize_runtime(skip_setup,
3023f2fe 2819 first_boot,
2d776038
LP
2820 &saved_rlimit_nofile,
2821 &saved_rlimit_memlock,
2822 &error_message);
2823 if (r < 0)
2824 goto finish;
4096d6f5 2825
e0a3da1f
ZJS
2826 r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
2827 arg_action == ACTION_TEST ? MANAGER_TEST_FULL : 0,
2828 &m);
e96d6be7 2829 if (r < 0) {
da927ba9 2830 log_emergency_errno(r, "Failed to allocate manager object: %m");
cb6531be 2831 error_message = "Failed to allocate manager object";
60918275
LP
2832 goto finish;
2833 }
2834
9f9f0342
LP
2835 m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;
2836 m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;
2837 m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;
d4ee7bd8
YW
2838 m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_START)] = security_start_timestamp;
2839 m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_FINISH)] = security_finish_timestamp;
9e58ff9c 2840
85cb4151 2841 set_manager_defaults(m);
7b46fc6a 2842 set_manager_settings(m);
fd130612 2843 manager_set_first_boot(m, first_boot);
27d340c7 2844
bf4df7c3 2845 /* Remember whether we should queue the default job */
d3b1c508 2846 queue_default_job = !arg_serialization || arg_switched_root;
bf4df7c3 2847
9d76d730
LP
2848 before_startup = now(CLOCK_MONOTONIC);
2849
d3b1c508 2850 r = manager_startup(m, arg_serialization, fds);
58f88d92 2851 if (r < 0) {
cefb3eda 2852 error_message = "Failed to start up manager";
58f88d92
ZJS
2853 goto finish;
2854 }
a16e1123 2855
6acca5fc 2856 /* This will close all file descriptors that were opened, but not claimed by any unit. */
2feceb5e 2857 fds = fdset_free(fds);
74ca738f 2858 arg_serialization = safe_fclose(arg_serialization);
bf4df7c3
LP
2859
2860 if (queue_default_job) {
6acca5fc 2861 r = do_queue_default_job(m, &error_message);
718db961 2862 if (r < 0)
37d88da7 2863 goto finish;
6acca5fc 2864 }
ab17a050 2865
6acca5fc 2866 after_startup = now(CLOCK_MONOTONIC);
60918275 2867
6acca5fc
LP
2868 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
2869 "Loaded units and determined initial transaction in %s.",
5291f26d 2870 FORMAT_TIMESPAN(after_startup - before_startup, 100 * USEC_PER_MSEC));
07672f49 2871
6acca5fc 2872 if (arg_action == ACTION_TEST) {
2a341bb9 2873 manager_test_summary(m);
6acca5fc
LP
2874 retval = EXIT_SUCCESS;
2875 goto finish;
e965d56d 2876 }
d46de8a1 2877
3046b6db 2878 (void) invoke_main_loop(m,
a9fd4cd1
FB
2879 &saved_rlimit_nofile,
2880 &saved_rlimit_memlock,
3046b6db
LP
2881 &reexecute,
2882 &retval,
2883 &shutdown_verb,
2884 &fds,
2885 &switch_root_dir,
2886 &switch_root_init,
2887 &error_message);
f170852a 2888
60918275 2889finish:
b87c2aa6
ZJS
2890 pager_close();
2891
92890452 2892 if (m) {
986935cf
FB
2893 arg_reboot_watchdog = manager_get_watchdog(m, WATCHDOG_REBOOT);
2894 arg_kexec_watchdog = manager_get_watchdog(m, WATCHDOG_KEXEC);
92890452
LP
2895 m = manager_free(m);
2896 }
60918275 2897
cc56fafe 2898 mac_selinux_finish();
b2bb3dbe 2899
3c7878f9
LP
2900 if (reexecute)
2901 do_reexecute(argc, argv,
2902 &saved_rlimit_nofile,
2903 &saved_rlimit_memlock,
2904 fds,
2905 switch_root_dir,
2906 switch_root_init,
2907 &error_message); /* This only returns if reexecution failed */
a16e1123 2908
74ca738f 2909 arg_serialization = safe_fclose(arg_serialization);
2feceb5e 2910 fds = fdset_free(fds);
a16e1123 2911
0e06a031
LP
2912 saved_env = strv_free(saved_env);
2913
349cc4a5 2914#if HAVE_VALGRIND_VALGRIND_H
54b434b1
LP
2915 /* If we are PID 1 and running under valgrind, then let's exit
2916 * here explicitly. valgrind will only generate nice output on
2917 * exit(), not on exec(), hence let's do the former not the
2918 * latter here. */
8a2c1fbf
EJ
2919 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
2920 /* Cleanup watchdog_device strings for valgrind. We need them
2921 * in become_shutdown() so normally we cannot free them yet. */
2922 watchdog_free_device();
2923 arg_watchdog_device = mfree(arg_watchdog_device);
7d9eea2b 2924 reset_arguments();
27fe58b7 2925 return retval;
8a2c1fbf 2926 }
54b434b1
LP
2927#endif
2928
7e11a95e
EV
2929#if HAS_FEATURE_ADDRESS_SANITIZER
2930 __lsan_do_leak_check();
2931#endif
2932
b9080b03 2933 if (shutdown_verb) {
7eb35049 2934 r = become_shutdown(shutdown_verb, retval);
4a36297c 2935 log_error_errno(r, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
9b9881d7 2936 error_message = "Failed to execute shutdown binary";
b9080b03
FF
2937 }
2938
8a2c1fbf
EJ
2939 watchdog_free_device();
2940 arg_watchdog_device = mfree(arg_watchdog_device);
2941
df0ff127 2942 if (getpid_cached() == 1) {
cb6531be
ZJS
2943 if (error_message)
2944 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
1fc464f6 2945 ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL,
bb259772
LP
2946 "%s.", error_message);
2947 freeze_or_exit_or_reboot();
cb6531be 2948 }
c3b3c274 2949
7d9eea2b 2950 reset_arguments();
60918275
LP
2951 return retval;
2952}