]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/main.c
tree-wide: Various forward header cleanups
[thirdparty/systemd.git] / src / core / main.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
a7334b09 2
3dfc9763 3#include <fcntl.h>
f170852a 4#include <getopt.h>
d4a402e4 5#include <linux/oom.h>
836e4e7e 6#include <stdlib.h>
664f88a7 7#include <sys/mount.h>
3dfc9763 8#include <sys/prctl.h>
40efaaed 9#include <sys/utsname.h>
3dfc9763 10#include <unistd.h>
836e4e7e 11
349cc4a5 12#if HAVE_VALGRIND_VALGRIND_H
50b35193 13# include <valgrind/valgrind.h>
3dfc9763 14#endif
54b434b1 15
718db961 16#include "sd-bus.h"
cf0fbc49 17#include "sd-daemon.h"
b2e7486c 18#include "sd-messages.h"
3dfc9763 19
b5efdb8a 20#include "alloc-util.h"
2ffadd3c 21#include "apparmor-setup.h"
d9d93745 22#include "architecture.h"
ee617a4e 23#include "argv-util.h"
3dfc9763
LP
24#include "build.h"
25#include "bus-error.h"
430f0182 26#include "capability-util.h"
ea25672d 27#include "cgroup-setup.h"
aaa27e2e 28#include "chase.h"
24efb112 29#include "clock-util.h"
4d47aa8c 30#include "clock-warp.h"
3dfc9763 31#include "conf-parser.h"
024469dd 32#include "confidential-virt.h"
5d1e57b8 33#include "constants.h"
3f37a825 34#include "copy.h"
836e4e7e 35#include "coredump-util.h"
618234a5 36#include "cpu-set-util.h"
898c9a6f 37#include "crash-handler.h"
c18ecf03 38#include "dbus.h"
1cf40697 39#include "dbus-manager.h"
32429805 40#include "dev-setup.h"
c18ecf03 41#include "efi-random.h"
eee8b7ab 42#include "emergency-action.h"
3dfc9763 43#include "env-util.h"
98c28313 44#include "escape.h"
3ffd4af2 45#include "fd-util.h"
3dfc9763 46#include "fdset.h"
718db961 47#include "fileio.h"
f97b34a6 48#include "format-util.h"
6339d3e6 49#include "getopt-defs.h"
d247f232 50#include "hexdecoct.h"
3dfc9763 51#include "hostname-setup.h"
836e4e7e 52#include "id128-util.h"
3dfc9763 53#include "ima-setup.h"
4b9a4b01 54#include "import-creds.h"
baa6a42d 55#include "initrd-util.h"
98c28313 56#include "io-util.h"
394c6141 57#include "ipe-setup.h"
3dfc9763
LP
58#include "killall.h"
59#include "kmod-setup.h"
1e35e81b 60#include "label-util.h"
eefc66aa 61#include "limits-util.h"
d7b8eec7 62#include "load-fragment.h"
3dfc9763 63#include "log.h"
b6e66135 64#include "loopback-setup.h"
b6e66135 65#include "machine-id-setup.h"
898c9a6f 66#include "main.h"
3dfc9763 67#include "manager.h"
2a341bb9 68#include "manager-dump.h"
a01ba4b2 69#include "manager-serialize.h"
35cd0ba5 70#include "mkdir-label.h"
3dfc9763 71#include "mount-setup.h"
ffc1ec73 72#include "mount-util.h"
d58ad743 73#include "os-util.h"
98c28313 74#include "osc-context.h"
3dfc9763 75#include "pager.h"
614b022c 76#include "parse-argument.h"
6bedfcbb 77#include "parse-util.h"
7d5ceb64 78#include "path-util.h"
294bf0c3 79#include "pretty-print.h"
4e731273 80#include "proc-cmdline.h"
3dfc9763 81#include "process-util.h"
d247f232 82#include "random-util.h"
78f22b97 83#include "rlimit-util.h"
8c28dd24 84#include "rm-rf.h"
83f12b27 85#include "seccomp-util.h"
b6e66135 86#include "selinux-setup.h"
3dfc9763 87#include "selinux-util.h"
5d1e57b8 88#include "serialize.h"
836e4e7e 89#include "set.h"
3dfc9763 90#include "signal-util.h"
ffbd2c4d 91#include "smack-setup.h"
3dfc9763 92#include "special.h"
8fcde012 93#include "stat-util.h"
15a5e950 94#include "stdio-util.h"
3dfc9763
LP
95#include "strv.h"
96#include "switch-root.h"
a8b627aa 97#include "sysctl-util.h"
3dfc9763 98#include "terminal-util.h"
b10abe4b 99#include "time-util.h"
8612da97 100#include "umask-util.h"
836e4e7e 101#include "unit-name.h"
b1d4f8e1 102#include "user-util.h"
bdb577f5 103#include "version.h"
3dfc9763
LP
104#include "virt.h"
105#include "watchdog.h"
b6e66135 106
7e11a95e
EV
107#if HAS_FEATURE_ADDRESS_SANITIZER
108#include <sanitizer/lsan_interface.h>
109#endif
110
f170852a
LP
111static enum {
112 ACTION_RUN,
e965d56d 113 ACTION_HELP,
9ba0bc4e 114 ACTION_VERSION,
e537352b 115 ACTION_TEST,
bbc1acab
YW
116 ACTION_DUMP_CONFIGURATION_ITEMS,
117 ACTION_DUMP_BUS_PROPERTIES,
5c08257b 118 ACTION_BUS_INTROSPECT,
fa0f4d8a 119} arg_action = ACTION_RUN;
fb39af4c 120
5c08257b
ZJS
121static const char *arg_bus_introspect = NULL;
122
45250e66
LP
123/* Those variables are initialized to 0 automatically, so we avoid uninitialized memory access. Real
124 * defaults are assigned in reset_arguments() below. */
fb39af4c 125static char *arg_default_unit;
4870133b 126static RuntimeScope arg_runtime_scope;
898c9a6f
LP
127bool arg_dump_core;
128int arg_crash_chvt;
129bool arg_crash_shell;
7a66f215 130CrashAction arg_crash_action;
fb39af4c
ZJS
131static char *arg_confirm_spawn;
132static ShowStatus arg_show_status;
36cf4507 133static StatusUnitFormat arg_status_unit_format;
fb39af4c
ZJS
134static bool arg_switched_root;
135static PagerFlags arg_pager_flags;
136static bool arg_service_watchdogs;
c9e120e0 137static UnitDefaults arg_defaults;
fb39af4c 138static usec_t arg_runtime_watchdog;
65224c1d 139static usec_t arg_reboot_watchdog;
acafd7d8 140static usec_t arg_kexec_watchdog;
5717062e 141static usec_t arg_pretimeout_watchdog;
fb39af4c 142static char *arg_early_core_pattern;
aff3a9e1 143static char *arg_watchdog_pretimeout_governor;
fb39af4c
ZJS
144static char *arg_watchdog_device;
145static char **arg_default_environment;
d55ed7de 146static char **arg_manager_environment;
fb39af4c
ZJS
147static uint64_t arg_capability_bounding_set;
148static bool arg_no_new_privs;
ffc1ec73 149static int arg_protect_system;
fb39af4c 150static nsec_t arg_timer_slack_nsec;
fb39af4c
ZJS
151static Set* arg_syscall_archs;
152static FILE* arg_serialization;
fb39af4c 153static sd_id128_t arg_machine_id;
274a38c7 154static bool arg_machine_id_from_firmware = false;
fb39af4c 155static EmergencyAction arg_cad_burst_action;
fb39af4c 156static CPUSet arg_cpu_affinity;
b070c7c0 157static NUMAPolicy arg_numa_policy;
3753325b 158static usec_t arg_clock_usec;
d247f232
LP
159static void *arg_random_seed;
160static size_t arg_random_seed_size;
856bfaeb
LB
161static usec_t arg_reload_limit_interval_sec;
162static unsigned arg_reload_limit_burst;
61fbbac1 163
0e06a031
LP
164/* A copy of the original environment block */
165static char **saved_env = NULL;
166
a9fd4cd1
FB
167static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
168 const struct rlimit *saved_rlimit_memlock);
4fc935ca 169
42efe5be 170static DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_crash_action, crash_action, CrashAction, CRASH_FREEZE);
7a66f215 171
f70e6fb4
ZJS
172static int manager_find_user_config_paths(char ***ret_files, char ***ret_dirs) {
173 _cleanup_free_ char *base = NULL;
174 _cleanup_strv_free_ char **files = NULL, **dirs = NULL;
175 int r;
176
60cd6deb 177 r = xdg_user_config_dir("/systemd", &base);
f70e6fb4
ZJS
178 if (r < 0)
179 return r;
180
181 r = strv_extendf(&files, "%s/user.conf", base);
182 if (r < 0)
183 return r;
184
185 r = strv_extend(&files, PKGSYSCONFDIR "/user.conf");
186 if (r < 0)
187 return r;
188
189 r = strv_consume(&dirs, TAKE_PTR(base));
190 if (r < 0)
191 return r;
192
193 r = strv_extend_strv(&dirs, CONF_PATHS_STRV("systemd"), false);
194 if (r < 0)
195 return r;
196
197 *ret_files = TAKE_PTR(files);
198 *ret_dirs = TAKE_PTR(dirs);
199 return 0;
200}
201
b4112281
LP
202static int save_console_winsize_in_environment(int tty_fd) {
203 int r;
204
205 assert(tty_fd >= 0);
206
207 struct winsize ws = {};
208 if (ioctl(tty_fd, TIOCGWINSZ, &ws) < 0) {
209 log_debug_errno(errno, "Failed to acquire console window size, ignoring.");
210 goto unset;
211 }
212
213 if (ws.ws_col <= 0 && ws.ws_row <= 0) {
214 log_debug("No console window size set, ignoring.");
215 goto unset;
216 }
217
218 r = setenvf("COLUMNS", /* overwrite= */ true, "%u", ws.ws_col);
219 if (r < 0) {
220 log_debug_errno(r, "Failed to set $COLUMNS, ignoring: %m");
221 goto unset;
222 }
223
224 r = setenvf("LINES", /* overwrite= */ true, "%u", ws.ws_row);
225 if (r < 0) {
226 log_debug_errno(r, "Failed to set $LINES, ignoring: %m");
227 goto unset;
228 }
229
230 log_debug("Recorded console dimensions in environment: $COLUMNS=%u $LINES=%u.", ws.ws_col, ws.ws_row);
231 return 1;
232
233unset:
234 (void) unsetenv("COLUMNS");
235 (void) unsetenv("LINES");
236 return 0;
237}
238
56d96fc0 239static int console_setup(void) {
2736295d
LP
240
241 if (getpid_cached() != 1)
242 return 0;
243
254d1313 244 _cleanup_close_ int tty_fd = -EBADF;
80876c20 245
2736295d 246 tty_fd = open_terminal("/dev/console", O_RDWR|O_NOCTTY|O_CLOEXEC);
23bbb0de 247 if (tty_fd < 0)
42ba9974 248 return log_error_errno(tty_fd, "Failed to open %s: %m", "/dev/console");
80876c20 249
2736295d
LP
250 /* We don't want to force text mode. Plymouth may be showing pictures already from initrd. */
251 reset_dev_console_fd(tty_fd, /* switch_to_text= */ false);
b4112281
LP
252
253 save_console_winsize_in_environment(tty_fd);
254
56d96fc0 255 return 0;
80876c20
LP
256}
257
1da7dcf4
DT
258static int parse_timeout(const char *value, usec_t *ret) {
259 int r = 0;
260
261 assert(value);
262 assert(ret);
263
264 if (streq(value, "default"))
265 *ret = USEC_INFINITY;
266 else if (streq(value, "off"))
267 *ret = 0;
268 else
269 r = parse_sec(value, ret);
270
271 return r;
272}
273
96287a49 274static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
059cb385 275 int r;
f170852a 276
059cb385 277 assert(key);
5192bd19 278
1d84ad94 279 if (STR_IN_SET(key, "systemd.unit", "rd.systemd.unit")) {
bf4df7c3 280
1d84ad94
LP
281 if (proc_cmdline_value_missing(key, value))
282 return 0;
bf4df7c3 283
1d84ad94
LP
284 if (!unit_name_is_valid(value, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
285 log_warning("Unit name specified on %s= is not valid, ignoring: %s", key, value);
cd57038a
ZJS
286 else if (in_initrd() == !!startswith(key, "rd."))
287 return free_and_strdup_warn(&arg_default_unit, value);
f170852a 288
1d84ad94 289 } else if (proc_cmdline_key_streq(key, "systemd.dump_core")) {
4fc935ca 290
1d84ad94 291 r = value ? parse_boolean(value) : true;
fb472900 292 if (r < 0)
5e1ee764 293 log_warning_errno(r, "Failed to parse dump core switch %s, ignoring: %m", value);
4fc935ca 294 else
fa0f4d8a 295 arg_dump_core = r;
4fc935ca 296
c6885f5f
FB
297 } else if (proc_cmdline_key_streq(key, "systemd.early_core_pattern")) {
298
299 if (proc_cmdline_value_missing(key, value))
300 return 0;
301
302 if (path_is_absolute(value))
614b022c 303 (void) parse_path_argument(value, false, &arg_early_core_pattern);
c6885f5f
FB
304 else
305 log_warning("Specified core pattern '%s' is not an absolute path, ignoring.", value);
306
1d84ad94 307 } else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) {
b9e74c39 308
1d84ad94
LP
309 if (!value)
310 arg_crash_chvt = 0; /* turn on */
5e1ee764 311 else {
a07a7324 312 r = parse_crash_chvt(value, &arg_crash_chvt);
5e1ee764
YW
313 if (r < 0)
314 log_warning_errno(r, "Failed to parse crash chvt switch %s, ignoring: %m", value);
315 }
b9e74c39 316
1d84ad94 317 } else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) {
4fc935ca 318
1d84ad94 319 r = value ? parse_boolean(value) : true;
fb472900 320 if (r < 0)
5e1ee764 321 log_warning_errno(r, "Failed to parse crash shell switch %s, ignoring: %m", value);
4fc935ca 322 else
fa0f4d8a 323 arg_crash_shell = r;
5e7ee61c 324
1d84ad94 325 } else if (proc_cmdline_key_streq(key, "systemd.crash_reboot")) {
5e7ee61c 326
1d84ad94 327 r = value ? parse_boolean(value) : true;
b9e74c39 328 if (r < 0)
5e1ee764 329 log_warning_errno(r, "Failed to parse crash reboot switch %s, ignoring: %m", value);
5e7ee61c 330 else
7a66f215
DDM
331 arg_crash_action = r ? CRASH_REBOOT : CRASH_FREEZE;
332
333 } else if (proc_cmdline_key_streq(key, "systemd.crash_action")) {
334
335 if (proc_cmdline_value_missing(key, value))
336 return 0;
337
338 r = crash_action_from_string(value);
339 if (r < 0)
340 log_warning_errno(r, "Failed to parse crash action switch %s, ignoring: %m", value);
341 else
342 arg_crash_action = r;
5e7ee61c 343
1d84ad94
LP
344 } else if (proc_cmdline_key_streq(key, "systemd.confirm_spawn")) {
345 char *s;
7d5ceb64 346
1d84ad94 347 r = parse_confirm_spawn(value, &s);
059cb385 348 if (r < 0)
5e1ee764
YW
349 log_warning_errno(r, "Failed to parse confirm_spawn switch %s, ignoring: %m", value);
350 else
351 free_and_replace(arg_confirm_spawn, s);
601f6a1e 352
2a12e32e
JK
353 } else if (proc_cmdline_key_streq(key, "systemd.service_watchdogs")) {
354
355 r = value ? parse_boolean(value) : true;
356 if (r < 0)
5e1ee764 357 log_warning_errno(r, "Failed to parse service watchdog switch %s, ignoring: %m", value);
2a12e32e
JK
358 else
359 arg_service_watchdogs = r;
360
1d84ad94 361 } else if (proc_cmdline_key_streq(key, "systemd.show_status")) {
9e58ff9c 362
1d84ad94
LP
363 if (value) {
364 r = parse_show_status(value, &arg_show_status);
365 if (r < 0)
5e1ee764 366 log_warning_errno(r, "Failed to parse show status switch %s, ignoring: %m", value);
1d84ad94
LP
367 } else
368 arg_show_status = SHOW_STATUS_YES;
059cb385 369
36cf4507
ZJS
370 } else if (proc_cmdline_key_streq(key, "systemd.status_unit_format")) {
371
372 if (proc_cmdline_value_missing(key, value))
373 return 0;
374
375 r = status_unit_format_from_string(value);
376 if (r < 0)
377 log_warning_errno(r, "Failed to parse %s=%s, ignoring: %m", key, value);
378 else
379 arg_status_unit_format = r;
380
1d84ad94
LP
381 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_output")) {
382
383 if (proc_cmdline_value_missing(key, value))
384 return 0;
0a494f1f 385
059cb385 386 r = exec_output_from_string(value);
fb472900 387 if (r < 0)
5e1ee764 388 log_warning_errno(r, "Failed to parse default standard output switch %s, ignoring: %m", value);
0a494f1f 389 else
c9e120e0 390 arg_defaults.std_output = r;
0a494f1f 391
1d84ad94
LP
392 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_error")) {
393
394 if (proc_cmdline_value_missing(key, value))
395 return 0;
059cb385
LP
396
397 r = exec_output_from_string(value);
fb472900 398 if (r < 0)
5e1ee764 399 log_warning_errno(r, "Failed to parse default standard error switch %s, ignoring: %m", value);
0a494f1f 400 else
c9e120e0 401 arg_defaults.std_error = r;
9e7c5357 402
1d84ad94
LP
403 } else if (streq(key, "systemd.setenv")) {
404
405 if (proc_cmdline_value_missing(key, value))
406 return 0;
059cb385 407
b70935ac
ZJS
408 if (!env_assignment_is_valid(value))
409 log_warning("Environment variable assignment '%s' is not valid. Ignoring.", value);
410 else {
411 r = strv_env_replace_strdup(&arg_default_environment, value);
412 if (r < 0)
1d84ad94 413 return log_oom();
b70935ac 414 }
9e58ff9c 415
1d84ad94
LP
416 } else if (proc_cmdline_key_streq(key, "systemd.machine_id")) {
417
418 if (proc_cmdline_value_missing(key, value))
419 return 0;
420
274a38c7
MJ
421 if (streq(value, "firmware"))
422 arg_machine_id_from_firmware = true;
423 else {
424 r = id128_from_string_nonzero(value, &arg_machine_id);
425 if (r < 0)
426 log_warning_errno(r, "MachineID '%s' is not valid, ignoring: %m", value);
427 else
428 arg_machine_id_from_firmware = false;
429 }
1d84ad94
LP
430 } else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) {
431
432 if (proc_cmdline_value_missing(key, value))
433 return 0;
434
c9e120e0 435 r = parse_sec(value, &arg_defaults.timeout_start_usec);
1d84ad94 436 if (r < 0)
5e1ee764 437 log_warning_errno(r, "Failed to parse default start timeout '%s', ignoring: %m", value);
1d84ad94 438
c9e120e0
LP
439 if (arg_defaults.timeout_start_usec <= 0)
440 arg_defaults.timeout_start_usec = USEC_INFINITY;
ee48dbd5 441
6b818cd7
DDM
442 } else if (proc_cmdline_key_streq(key, "systemd.default_device_timeout_sec")) {
443
444 if (proc_cmdline_value_missing(key, value))
445 return 0;
446
c9e120e0 447 r = parse_sec(value, &arg_defaults.device_timeout_usec);
6b818cd7
DDM
448 if (r < 0)
449 log_warning_errno(r, "Failed to parse default device timeout '%s', ignoring: %m", value);
450
c9e120e0
LP
451 if (arg_defaults.device_timeout_usec <= 0)
452 arg_defaults.device_timeout_usec = USEC_INFINITY;
6b818cd7 453
68d58f38
LP
454 } else if (proc_cmdline_key_streq(key, "systemd.cpu_affinity")) {
455
456 if (proc_cmdline_value_missing(key, value))
457 return 0;
458
459 r = parse_cpu_set(value, &arg_cpu_affinity);
460 if (r < 0)
162392b7 461 log_warning_errno(r, "Failed to parse CPU affinity mask '%s', ignoring: %m", value);
68d58f38 462
8a2c1fbf
EJ
463 } else if (proc_cmdline_key_streq(key, "systemd.watchdog_device")) {
464
465 if (proc_cmdline_value_missing(key, value))
466 return 0;
467
614b022c 468 (void) parse_path_argument(value, false, &arg_watchdog_device);
8a2c1fbf 469
b3aa73e4
FB
470 } else if (proc_cmdline_key_streq(key, "systemd.watchdog_sec")) {
471
472 if (proc_cmdline_value_missing(key, value))
473 return 0;
474
1da7dcf4
DT
475 r = parse_timeout(value, &arg_runtime_watchdog);
476 if (r < 0) {
477 log_warning_errno(r, "Failed to parse systemd.watchdog_sec= argument '%s', ignoring: %m", value);
478 return 0;
8a85c5b6
FB
479 }
480
481 arg_kexec_watchdog = arg_reboot_watchdog = arg_runtime_watchdog;
b3aa73e4 482
5717062e
CK
483 } else if (proc_cmdline_key_streq(key, "systemd.watchdog_pre_sec")) {
484
485 if (proc_cmdline_value_missing(key, value))
486 return 0;
487
1da7dcf4
DT
488 r = parse_timeout(value, &arg_pretimeout_watchdog);
489 if (r < 0) {
490 log_warning_errno(r, "Failed to parse systemd.watchdog_pre_sec= argument '%s', ignoring: %m", value);
491 return 0;
5717062e
CK
492 }
493
aff3a9e1
LB
494 } else if (proc_cmdline_key_streq(key, "systemd.watchdog_pretimeout_governor")) {
495
496 if (proc_cmdline_value_missing(key, value) || isempty(value)) {
497 arg_watchdog_pretimeout_governor = mfree(arg_watchdog_pretimeout_governor);
498 return 0;
499 }
500
501 if (!string_is_safe(value)) {
502 log_warning("Watchdog pretimeout governor '%s' is not valid, ignoring.", value);
503 return 0;
504 }
505
506 return free_and_strdup_warn(&arg_watchdog_pretimeout_governor, value);
507
3753325b
LP
508 } else if (proc_cmdline_key_streq(key, "systemd.clock_usec")) {
509
510 if (proc_cmdline_value_missing(key, value))
511 return 0;
512
513 r = safe_atou64(value, &arg_clock_usec);
514 if (r < 0)
515 log_warning_errno(r, "Failed to parse systemd.clock_usec= argument, ignoring: %s", value);
516
d247f232
LP
517 } else if (proc_cmdline_key_streq(key, "systemd.random_seed")) {
518 void *p;
519 size_t sz;
520
521 if (proc_cmdline_value_missing(key, value))
522 return 0;
523
bdd2036e 524 r = unbase64mem(value, &p, &sz);
d247f232
LP
525 if (r < 0)
526 log_warning_errno(r, "Failed to parse systemd.random_seed= argument, ignoring: %s", value);
527
528 free(arg_random_seed);
529 arg_random_seed = sz > 0 ? p : mfree(p);
530 arg_random_seed_size = sz;
531
856bfaeb
LB
532 } else if (proc_cmdline_key_streq(key, "systemd.reload_limit_interval_sec")) {
533
534 if (proc_cmdline_value_missing(key, value))
535 return 0;
536
537 r = parse_sec(value, &arg_reload_limit_interval_sec);
538 if (r < 0) {
539 log_warning_errno(r, "Failed to parse systemd.reload_limit_interval_sec= argument '%s', ignoring: %m", value);
540 return 0;
541 }
542
543 } else if (proc_cmdline_key_streq(key, "systemd.reload_limit_burst")) {
544
545 if (proc_cmdline_value_missing(key, value))
546 return 0;
547
548 r = safe_atou(value, &arg_reload_limit_burst);
549 if (r < 0) {
550 log_warning_errno(r, "Failed to parse systemd.reload_limit_burst= argument '%s', ignoring: %m", value);
551 return 0;
552 }
553
059cb385 554 } else if (streq(key, "quiet") && !value) {
d7b15e0a 555
7a293242 556 if (arg_show_status == _SHOW_STATUS_INVALID)
0d066dd1 557 arg_show_status = SHOW_STATUS_ERROR;
059cb385
LP
558
559 } else if (streq(key, "debug") && !value) {
d7b15e0a 560
1de1c9c3
LP
561 /* Note that log_parse_environment() handles 'debug'
562 * too, and sets the log level to LOG_DEBUG. */
d7b15e0a 563
75f86906 564 if (detect_container() > 0)
b2103dcc 565 log_set_target(LOG_TARGET_CONSOLE);
059cb385 566
dcd61450 567 } else if (!value) {
e2c9a131 568 const char *target;
f170852a 569
ceae6295 570 /* Compatible with SysV, but supported independently even if SysV compatibility is disabled. */
e2c9a131
EV
571 target = runlevel_to_target(key);
572 if (target)
cd57038a 573 return free_and_strdup_warn(&arg_default_unit, target);
f170852a
LP
574 }
575
576 return 0;
577}
578
e8e581bf
ZJS
579#define DEFINE_SETTER(name, func, descr) \
580 static int name(const char *unit, \
581 const char *filename, \
582 unsigned line, \
583 const char *section, \
71a61510 584 unsigned section_line, \
e8e581bf
ZJS
585 const char *lvalue, \
586 int ltype, \
587 const char *rvalue, \
588 void *data, \
589 void *userdata) { \
590 \
591 int r; \
592 \
593 assert(filename); \
594 assert(lvalue); \
595 assert(rvalue); \
596 \
597 r = func(rvalue); \
598 if (r < 0) \
d1cefe0a
LP
599 log_syntax(unit, LOG_ERR, filename, line, r, \
600 "Invalid " descr "'%s': %m", \
601 rvalue); \
e8e581bf
ZJS
602 \
603 return 0; \
604 }
487393e9 605
a6ecbf83
FB
606DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level");
607DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target");
c5673ed0 608DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color");
a6ecbf83 609DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location");
c5673ed0 610DEFINE_SETTER(config_parse_time, log_show_time_from_string, "time");
487393e9 611
a61d6874
ZJS
612static int config_parse_default_timeout_abort(
613 const char *unit,
614 const char *filename,
615 unsigned line,
616 const char *section,
617 unsigned section_line,
618 const char *lvalue,
619 int ltype,
620 const char *rvalue,
621 void *data,
622 void *userdata) {
623 int r;
624
c9e120e0
LP
625 r = config_parse_timeout_abort(
626 unit,
627 filename,
628 line,
629 section,
630 section_line,
631 lvalue,
632 ltype,
633 rvalue,
634 &arg_defaults.timeout_abort_usec,
635 userdata);
a61d6874 636 if (r >= 0)
c9e120e0 637 arg_defaults.timeout_abort_set = r;
a61d6874
ZJS
638 return 0;
639}
487393e9 640
d4a402e4
LP
641static int config_parse_oom_score_adjust(
642 const char *unit,
643 const char *filename,
644 unsigned line,
645 const char *section,
646 unsigned section_line,
647 const char *lvalue,
648 int ltype,
649 const char *rvalue,
650 void *data,
651 void *userdata) {
652
653 int oa, r;
654
655 if (isempty(rvalue)) {
c9e120e0 656 arg_defaults.oom_score_adjust_set = false;
d4a402e4
LP
657 return 0;
658 }
659
660 r = parse_oom_score_adjust(rvalue, &oa);
0ddf4aca
MY
661 if (r < 0)
662 return log_syntax_parse_error(unit, filename, line, r, lvalue, rvalue);
d4a402e4 663
c9e120e0
LP
664 arg_defaults.oom_score_adjust = oa;
665 arg_defaults.oom_score_adjust_set = true;
d4a402e4
LP
666
667 return 0;
668}
669
ffc1ec73
LP
670static int config_parse_protect_system_pid1(
671 const char *unit,
672 const char *filename,
673 unsigned line,
674 const char *section,
675 unsigned section_line,
676 const char *lvalue,
677 int ltype,
678 const char *rvalue,
679 void *data,
680 void *userdata) {
681
682 int *v = ASSERT_PTR(data), r;
683
684 /* This is modelled after the per-service ProtectSystem= setting, but a bit more restricted on one
685 * hand, and more automatic in another. i.e. we currently only support yes/no (not "strict" or
686 * "full"). And we will enable this automatically for the initrd unless configured otherwise.
687 *
688 * We might extend this later to match more closely what the per-service ProtectSystem= can do, but
689 * this is not trivial, due to ordering constraints: besides /usr/ we don't really have much mounted
690 * at the moment we enable this logic. */
691
692 if (isempty(rvalue) || streq(rvalue, "auto")) {
693 *v = -1;
694 return 0;
695 }
696
697 r = parse_boolean(rvalue);
0ddf4aca
MY
698 if (r < 0)
699 return log_syntax_parse_error(unit, filename, line, r, lvalue, rvalue);
ffc1ec73
LP
700
701 *v = r;
702 return 0;
703}
704
7a66f215
DDM
705static int config_parse_crash_reboot(
706 const char *unit,
707 const char *filename,
708 unsigned line,
709 const char *section,
710 unsigned section_line,
711 const char *lvalue,
712 int ltype,
713 const char *rvalue,
714 void *data,
715 void *userdata) {
716
717 CrashAction *v = ASSERT_PTR(data);
718 int r;
719
720 if (isempty(rvalue)) {
721 *v = CRASH_REBOOT;
722 return 0;
723 }
724
725 r = parse_boolean(rvalue);
0ddf4aca
MY
726 if (r < 0)
727 return log_syntax_parse_error(unit, filename, line, r, lvalue, rvalue);
7a66f215
DDM
728
729 *v = r > 0 ? CRASH_REBOOT : CRASH_FREEZE;
730 return 0;
731}
732
a61d6874 733static int parse_config_file(void) {
f975e971 734 const ConfigTableItem items[] = {
3f87eaa5
YW
735 { "Manager", "LogLevel", config_parse_level2, 0, NULL },
736 { "Manager", "LogTarget", config_parse_target, 0, NULL },
737 { "Manager", "LogColor", config_parse_color, 0, NULL },
738 { "Manager", "LogLocation", config_parse_location, 0, NULL },
739 { "Manager", "LogTime", config_parse_time, 0, NULL },
740 { "Manager", "DumpCore", config_parse_bool, 0, &arg_dump_core },
741 { "Manager", "CrashChVT", /* legacy */ config_parse_crash_chvt, 0, &arg_crash_chvt },
742 { "Manager", "CrashChangeVT", config_parse_crash_chvt, 0, &arg_crash_chvt },
743 { "Manager", "CrashShell", config_parse_bool, 0, &arg_crash_shell },
7a66f215
DDM
744 { "Manager", "CrashReboot", config_parse_crash_reboot, 0, &arg_crash_action },
745 { "Manager", "CrashAction", config_parse_crash_action, 0, &arg_crash_action },
3f87eaa5
YW
746 { "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status },
747 { "Manager", "StatusUnitFormat", config_parse_status_unit_format, 0, &arg_status_unit_format },
fe3ada07 748 { "Manager", "CPUAffinity", config_parse_cpu_set, 0, &arg_cpu_affinity },
3f87eaa5 749 { "Manager", "NUMAPolicy", config_parse_numa_policy, 0, &arg_numa_policy.type },
0b9ae2d3 750 { "Manager", "NUMAMask", config_parse_numa_mask, 0, &arg_numa_policy.nodes },
62b5bd3c 751 { "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_LEGACY, NULL },
3f87eaa5
YW
752 { "Manager", "RuntimeWatchdogSec", config_parse_watchdog_sec, 0, &arg_runtime_watchdog },
753 { "Manager", "RuntimeWatchdogPreSec", config_parse_watchdog_sec, 0, &arg_pretimeout_watchdog },
754 { "Manager", "RebootWatchdogSec", config_parse_watchdog_sec, 0, &arg_reboot_watchdog },
755 { "Manager", "ShutdownWatchdogSec", config_parse_watchdog_sec, 0, &arg_reboot_watchdog }, /* obsolete alias */
756 { "Manager", "KExecWatchdogSec", config_parse_watchdog_sec, 0, &arg_kexec_watchdog },
757 { "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
758 { "Manager", "RuntimeWatchdogPreGovernor", config_parse_string, CONFIG_PARSE_STRING_SAFE, &arg_watchdog_pretimeout_governor },
759 { "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
760 { "Manager", "NoNewPrivileges", config_parse_bool, 0, &arg_no_new_privs },
ffc1ec73 761 { "Manager", "ProtectSystem", config_parse_protect_system_pid1, 0, &arg_protect_system },
349cc4a5 762#if HAVE_SECCOMP
3f87eaa5 763 { "Manager", "SystemCallArchitectures", config_parse_syscall_archs, 0, &arg_syscall_archs },
6aa2c555
LP
764#else
765 { "Manager", "SystemCallArchitectures", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL },
766
89fffa27 767#endif
3f87eaa5 768 { "Manager", "TimerSlackNSec", config_parse_nsec, 0, &arg_timer_slack_nsec },
c9e120e0
LP
769 { "Manager", "DefaultTimerAccuracySec", config_parse_sec, 0, &arg_defaults.timer_accuracy_usec },
770 { "Manager", "DefaultStandardOutput", config_parse_output_restricted, 0, &arg_defaults.std_output },
771 { "Manager", "DefaultStandardError", config_parse_output_restricted, 0, &arg_defaults.std_error },
772 { "Manager", "DefaultTimeoutStartSec", config_parse_sec, 0, &arg_defaults.timeout_start_usec },
773 { "Manager", "DefaultTimeoutStopSec", config_parse_sec, 0, &arg_defaults.timeout_stop_usec },
3f87eaa5 774 { "Manager", "DefaultTimeoutAbortSec", config_parse_default_timeout_abort, 0, NULL },
c9e120e0
LP
775 { "Manager", "DefaultDeviceTimeoutSec", config_parse_sec, 0, &arg_defaults.device_timeout_usec },
776 { "Manager", "DefaultRestartSec", config_parse_sec, 0, &arg_defaults.restart_usec },
14702b9c
DDM
777 { "Manager", "DefaultStartLimitInterval", config_parse_sec, 0, &arg_defaults.start_limit.interval}, /* obsolete alias */
778 { "Manager", "DefaultStartLimitIntervalSec", config_parse_sec, 0, &arg_defaults.start_limit.interval},
779 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned, 0, &arg_defaults.start_limit.burst },
30bbdf07 780 { "Manager", "DefaultRestrictSUIDSGID", config_parse_bool, 0, &arg_defaults.restrict_suid_sgid },
4870133b
LP
781 { "Manager", "DefaultEnvironment", config_parse_environ, arg_runtime_scope, &arg_default_environment },
782 { "Manager", "ManagerEnvironment", config_parse_environ, arg_runtime_scope, &arg_manager_environment },
c9e120e0
LP
783 { "Manager", "DefaultLimitCPU", config_parse_rlimit, RLIMIT_CPU, arg_defaults.rlimit },
784 { "Manager", "DefaultLimitFSIZE", config_parse_rlimit, RLIMIT_FSIZE, arg_defaults.rlimit },
785 { "Manager", "DefaultLimitDATA", config_parse_rlimit, RLIMIT_DATA, arg_defaults.rlimit },
786 { "Manager", "DefaultLimitSTACK", config_parse_rlimit, RLIMIT_STACK, arg_defaults.rlimit },
787 { "Manager", "DefaultLimitCORE", config_parse_rlimit, RLIMIT_CORE, arg_defaults.rlimit },
788 { "Manager", "DefaultLimitRSS", config_parse_rlimit, RLIMIT_RSS, arg_defaults.rlimit },
789 { "Manager", "DefaultLimitNOFILE", config_parse_rlimit, RLIMIT_NOFILE, arg_defaults.rlimit },
790 { "Manager", "DefaultLimitAS", config_parse_rlimit, RLIMIT_AS, arg_defaults.rlimit },
791 { "Manager", "DefaultLimitNPROC", config_parse_rlimit, RLIMIT_NPROC, arg_defaults.rlimit },
792 { "Manager", "DefaultLimitMEMLOCK", config_parse_rlimit, RLIMIT_MEMLOCK, arg_defaults.rlimit },
793 { "Manager", "DefaultLimitLOCKS", config_parse_rlimit, RLIMIT_LOCKS, arg_defaults.rlimit },
794 { "Manager", "DefaultLimitSIGPENDING", config_parse_rlimit, RLIMIT_SIGPENDING, arg_defaults.rlimit },
795 { "Manager", "DefaultLimitMSGQUEUE", config_parse_rlimit, RLIMIT_MSGQUEUE, arg_defaults.rlimit },
796 { "Manager", "DefaultLimitNICE", config_parse_rlimit, RLIMIT_NICE, arg_defaults.rlimit },
797 { "Manager", "DefaultLimitRTPRIO", config_parse_rlimit, RLIMIT_RTPRIO, arg_defaults.rlimit },
798 { "Manager", "DefaultLimitRTTIME", config_parse_rlimit, RLIMIT_RTTIME, arg_defaults.rlimit },
29da53dd 799 { "Manager", "DefaultCPUAccounting", config_parse_warn_compat, DISABLED_LEGACY, NULL },
c9e120e0
LP
800 { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_defaults.io_accounting },
801 { "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_defaults.ip_accounting },
98d64ff5 802 { "Manager", "DefaultBlockIOAccounting", config_parse_warn_compat, DISABLED_LEGACY, NULL },
c9e120e0
LP
803 { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_defaults.memory_accounting },
804 { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_defaults.tasks_accounting },
805 { "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_defaults.tasks_max },
806 { "Manager", "DefaultMemoryPressureThresholdSec", config_parse_sec, 0, &arg_defaults.memory_pressure_threshold_usec },
807 { "Manager", "DefaultMemoryPressureWatch", config_parse_memory_pressure_watch, 0, &arg_defaults.memory_pressure_watch },
4870133b 808 { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, arg_runtime_scope, &arg_cad_burst_action },
c9e120e0 809 { "Manager", "DefaultOOMPolicy", config_parse_oom_policy, 0, &arg_defaults.oom_policy },
3f87eaa5 810 { "Manager", "DefaultOOMScoreAdjust", config_parse_oom_score_adjust, 0, NULL },
856bfaeb
LB
811 { "Manager", "ReloadLimitIntervalSec", config_parse_sec, 0, &arg_reload_limit_interval_sec },
812 { "Manager", "ReloadLimitBurst", config_parse_unsigned, 0, &arg_reload_limit_burst },
aa5ae971 813#if ENABLE_SMACK
c9e120e0 814 { "Manager", "DefaultSmackProcessLabel", config_parse_string, 0, &arg_defaults.smack_process_label },
aa5ae971
ŁS
815#else
816 { "Manager", "DefaultSmackProcessLabel", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL },
817#endif
d3b1c508 818 {}
487393e9
LP
819 };
820
4870133b 821 if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM)
6378f257
ZJS
822 (void) config_parse_standard_file_with_dropins(
823 "systemd/system.conf",
824 "Manager\0",
825 config_item_table_lookup, items,
826 CONFIG_PARSE_WARN,
827 /* userdata= */ NULL);
e94a009c 828 else {
07e0ffc8
FB
829 _cleanup_strv_free_ char **files = NULL, **dirs = NULL;
830 int r;
831
4870133b
LP
832 assert(arg_runtime_scope == RUNTIME_SCOPE_USER);
833
e94a009c 834 r = manager_find_user_config_paths(&files, &dirs);
f70e6fb4
ZJS
835 if (r < 0)
836 return log_error_errno(r, "Failed to determine config file paths: %m");
e94a009c 837
07e0ffc8
FB
838 (void) config_parse_many(
839 (const char* const*) files,
840 (const char* const*) dirs,
841 "user.conf.d",
947f59ba 842 /* root = */ NULL,
07e0ffc8
FB
843 "Manager\0",
844 config_item_table_lookup, items,
845 CONFIG_PARSE_WARN,
846 NULL, NULL, NULL);
f70e6fb4 847 }
75eb6154 848
f70e6fb4
ZJS
849 /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we use
850 * USEC_INFINITY like everywhere else. */
c9e120e0
LP
851 if (arg_defaults.timeout_start_usec <= 0)
852 arg_defaults.timeout_start_usec = USEC_INFINITY;
853 if (arg_defaults.timeout_stop_usec <= 0)
854 arg_defaults.timeout_stop_usec = USEC_INFINITY;
487393e9 855
487393e9
LP
856 return 0;
857}
858
85cb4151 859static void set_manager_defaults(Manager *m) {
bfb27b06 860 int r;
06af2a04
TB
861
862 assert(m);
863
bfb27b06
LP
864 /* Propagates the various default unit property settings into the manager object, i.e. properties
865 * that do not affect the manager itself, but are just what newly allocated units will have set if
866 * they haven't set anything else. (Also see set_manager_settings() for the settings that affect the
867 * manager's own behaviour) */
868
869 r = manager_set_unit_defaults(m, &arg_defaults);
870 if (r < 0)
871 log_warning_errno(r, "Failed to set manager defaults, ignoring: %m");
872
873 r = manager_default_environment(m);
874 if (r < 0)
875 log_warning_errno(r, "Failed to set manager default environment, ignoring: %m");
876
877 r = manager_transient_environment_add(m, arg_default_environment);
878 if (r < 0)
879 log_warning_errno(r, "Failed to add to transient environment, ignoring: %m");
06af2a04
TB
880}
881
7b46fc6a 882static void set_manager_settings(Manager *m) {
aff3a9e1 883 int r;
7b46fc6a
LP
884
885 assert(m);
886
986935cf 887 /* Propagates the various manager settings into the manager object, i.e. properties that
4db8663b 888 * affect the manager itself (as opposed to just being inherited into newly allocated
986935cf 889 * units, see set_manager_defaults() above). */
5b65ae15 890
7b46fc6a 891 m->confirm_spawn = arg_confirm_spawn;
2a12e32e 892 m->service_watchdogs = arg_service_watchdogs;
7b46fc6a 893 m->cad_burst_action = arg_cad_burst_action;
856bfaeb
LB
894 /* Note that we don't do structured initialization here, otherwise it will reset the rate limit
895 * counter on every daemon-reload. */
8312b17a
LB
896 m->reload_reexec_ratelimit.interval = arg_reload_limit_interval_sec;
897 m->reload_reexec_ratelimit.burst = arg_reload_limit_burst;
7b46fc6a 898
986935cf
FB
899 manager_set_watchdog(m, WATCHDOG_RUNTIME, arg_runtime_watchdog);
900 manager_set_watchdog(m, WATCHDOG_REBOOT, arg_reboot_watchdog);
901 manager_set_watchdog(m, WATCHDOG_KEXEC, arg_kexec_watchdog);
5717062e 902 manager_set_watchdog(m, WATCHDOG_PRETIMEOUT, arg_pretimeout_watchdog);
aff3a9e1
LB
903 r = manager_set_watchdog_pretimeout_governor(m, arg_watchdog_pretimeout_governor);
904 if (r < 0)
905 log_warning_errno(r, "Failed to set watchdog pretimeout governor to '%s', ignoring: %m", arg_watchdog_pretimeout_governor);
986935cf 906
7c52d523 907 manager_set_show_status(m, arg_show_status, "command line");
36cf4507 908 m->status_unit_format = arg_status_unit_format;
7b46fc6a
LP
909}
910
f170852a 911static int parse_argv(int argc, char *argv[]) {
f170852a 912 enum {
6339d3e6
YW
913 COMMON_GETOPT_ARGS,
914 SYSTEMD_GETOPT_ARGS,
f170852a
LP
915 };
916
917 static const struct option options[] = {
6339d3e6
YW
918 COMMON_GETOPT_OPTIONS,
919 SYSTEMD_GETOPT_OPTIONS,
fb472900 920 {}
f170852a
LP
921 };
922
923 int c, r;
9a9ca408 924 bool user_arg_seen = false;
f170852a
LP
925
926 assert(argc >= 1);
927 assert(argv);
928
df0ff127 929 if (getpid_cached() == 1)
b770165a
LP
930 opterr = 0;
931
6339d3e6 932 while ((c = getopt_long(argc, argv, SYSTEMD_GETOPT_SHORT_OPTIONS, options, NULL)) >= 0)
f170852a
LP
933
934 switch (c) {
935
936 case ARG_LOG_LEVEL:
fb472900 937 r = log_set_max_level_from_string(optarg);
2b5107e1
ZJS
938 if (r < 0)
939 return log_error_errno(r, "Failed to parse log level \"%s\": %m", optarg);
f170852a
LP
940
941 break;
942
943 case ARG_LOG_TARGET:
fb472900 944 r = log_set_target_from_string(optarg);
2b5107e1
ZJS
945 if (r < 0)
946 return log_error_errno(r, "Failed to parse log target \"%s\": %m", optarg);
f170852a
LP
947
948 break;
949
bbe63281
LP
950 case ARG_LOG_COLOR:
951
d0b170c8 952 if (optarg) {
fb472900 953 r = log_show_color_from_string(optarg);
2b5107e1
ZJS
954 if (r < 0)
955 return log_error_errno(r, "Failed to parse log color setting \"%s\": %m",
956 optarg);
d0b170c8
LP
957 } else
958 log_show_color(true);
bbe63281
LP
959
960 break;
961
962 case ARG_LOG_LOCATION:
d0b170c8 963 if (optarg) {
fb472900 964 r = log_show_location_from_string(optarg);
2b5107e1
ZJS
965 if (r < 0)
966 return log_error_errno(r, "Failed to parse log location setting \"%s\": %m",
967 optarg);
d0b170c8
LP
968 } else
969 log_show_location(true);
bbe63281
LP
970
971 break;
972
c5673ed0
DS
973 case ARG_LOG_TIME:
974
975 if (optarg) {
976 r = log_show_time_from_string(optarg);
977 if (r < 0)
978 return log_error_errno(r, "Failed to parse log time setting \"%s\": %m",
979 optarg);
980 } else
981 log_show_time(true);
982
983 break;
984
0a494f1f 985 case ARG_DEFAULT_STD_OUTPUT:
fb472900 986 r = exec_output_from_string(optarg);
2b5107e1
ZJS
987 if (r < 0)
988 return log_error_errno(r, "Failed to parse default standard output setting \"%s\": %m",
989 optarg);
c9e120e0 990 arg_defaults.std_output = r;
0a494f1f
LP
991 break;
992
993 case ARG_DEFAULT_STD_ERROR:
fb472900 994 r = exec_output_from_string(optarg);
2b5107e1
ZJS
995 if (r < 0)
996 return log_error_errno(r, "Failed to parse default standard error output setting \"%s\": %m",
997 optarg);
c9e120e0 998 arg_defaults.std_error = r;
0a494f1f
LP
999 break;
1000
2f198e2f 1001 case ARG_UNIT:
e6e242ad 1002 r = free_and_strdup(&arg_default_unit, optarg);
23bbb0de 1003 if (r < 0)
2b5107e1 1004 return log_error_errno(r, "Failed to set default unit \"%s\": %m", optarg);
f170852a
LP
1005
1006 break;
1007
edb9aaa8 1008 case ARG_SYSTEM:
4870133b 1009 arg_runtime_scope = RUNTIME_SCOPE_SYSTEM;
edb9aaa8 1010 break;
a5dab5ce 1011
af2d49f7 1012 case ARG_USER:
4870133b 1013 arg_runtime_scope = RUNTIME_SCOPE_USER;
9a9ca408 1014 user_arg_seen = true;
a5dab5ce 1015 break;
a5dab5ce 1016
e965d56d 1017 case ARG_TEST:
fa0f4d8a 1018 arg_action = ACTION_TEST;
b87c2aa6
ZJS
1019 break;
1020
1021 case ARG_NO_PAGER:
0221d68a 1022 arg_pager_flags |= PAGER_DISABLE;
e965d56d
LP
1023 break;
1024
9ba0bc4e
ZJS
1025 case ARG_VERSION:
1026 arg_action = ACTION_VERSION;
1027 break;
1028
e537352b 1029 case ARG_DUMP_CONFIGURATION_ITEMS:
fa0f4d8a 1030 arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
e537352b
LP
1031 break;
1032
bbc1acab
YW
1033 case ARG_DUMP_BUS_PROPERTIES:
1034 arg_action = ACTION_DUMP_BUS_PROPERTIES;
1035 break;
1036
5c08257b
ZJS
1037 case ARG_BUS_INTROSPECT:
1038 arg_bus_introspect = optarg;
1039 arg_action = ACTION_BUS_INTROSPECT;
1040 break;
1041
9e58ff9c 1042 case ARG_DUMP_CORE:
599c7c54
ZJS
1043 r = parse_boolean_argument("--dump-core", optarg, &arg_dump_core);
1044 if (r < 0)
1045 return r;
b9e74c39
LP
1046 break;
1047
1048 case ARG_CRASH_CHVT:
a07a7324 1049 r = parse_crash_chvt(optarg, &arg_crash_chvt);
b9e74c39 1050 if (r < 0)
2b5107e1
ZJS
1051 return log_error_errno(r, "Failed to parse crash virtual terminal index: \"%s\": %m",
1052 optarg);
9e58ff9c
LP
1053 break;
1054
1055 case ARG_CRASH_SHELL:
599c7c54
ZJS
1056 r = parse_boolean_argument("--crash-shell", optarg, &arg_crash_shell);
1057 if (r < 0)
1058 return r;
b9e74c39
LP
1059 break;
1060
1061 case ARG_CRASH_REBOOT:
7a66f215 1062 r = parse_boolean_argument("--crash-reboot", optarg, NULL);
599c7c54
ZJS
1063 if (r < 0)
1064 return r;
7a66f215
DDM
1065 arg_crash_action = r > 0 ? CRASH_REBOOT : CRASH_FREEZE;
1066 break;
1067
1068 case ARG_CRASH_ACTION:
1069 r = crash_action_from_string(optarg);
1070 if (r < 0)
1071 return log_error_errno(r, "Failed to parse crash action \"%s\": %m", optarg);
1072 arg_crash_action = r;
9e58ff9c
LP
1073 break;
1074
80876c20 1075 case ARG_CONFIRM_SPAWN:
7d5ceb64
FB
1076 arg_confirm_spawn = mfree(arg_confirm_spawn);
1077
1078 r = parse_confirm_spawn(optarg, &arg_confirm_spawn);
1079 if (r < 0)
2b5107e1
ZJS
1080 return log_error_errno(r, "Failed to parse confirm spawn option: \"%s\": %m",
1081 optarg);
80876c20
LP
1082 break;
1083
2a12e32e 1084 case ARG_SERVICE_WATCHDOGS:
599c7c54 1085 r = parse_boolean_argument("--service-watchdogs=", optarg, &arg_service_watchdogs);
2a12e32e 1086 if (r < 0)
599c7c54 1087 return r;
2a12e32e
JK
1088 break;
1089
9e58ff9c 1090 case ARG_SHOW_STATUS:
d450b6f2
ZJS
1091 if (optarg) {
1092 r = parse_show_status(optarg, &arg_show_status);
ac7ec288 1093 if (r < 0)
2b5107e1
ZJS
1094 return log_error_errno(r, "Failed to parse show status boolean: \"%s\": %m",
1095 optarg);
d450b6f2
ZJS
1096 } else
1097 arg_show_status = SHOW_STATUS_YES;
6e98720f 1098 break;
a5d87bf0 1099
a16e1123
LP
1100 case ARG_DESERIALIZE: {
1101 int fd;
1102 FILE *f;
1103
e652663a 1104 fd = parse_fd(optarg);
baaa35ad 1105 if (fd < 0)
d2132d3d 1106 return log_error_errno(fd, "Failed to parse serialization fd \"%s\": %m", optarg);
a16e1123 1107
b9e74c39 1108 (void) fd_cloexec(fd, true);
01e10de3
LP
1109
1110 f = fdopen(fd, "r");
4a62c710 1111 if (!f)
2b5107e1 1112 return log_error_errno(errno, "Failed to open serialization fd %d: %m", fd);
a16e1123 1113
74ca738f 1114 safe_fclose(arg_serialization);
d3b1c508 1115 arg_serialization = f;
a16e1123
LP
1116
1117 break;
1118 }
1119
2660882b 1120 case ARG_SWITCHED_ROOT:
bf4df7c3 1121 arg_switched_root = true;
d03bc1b8
HH
1122 break;
1123
ee48dbd5 1124 case ARG_MACHINE_ID:
aea3f594 1125 r = id128_from_string_nonzero(optarg, &arg_machine_id);
54500613 1126 if (r < 0)
2b5107e1 1127 return log_error_errno(r, "MachineID '%s' is not valid: %m", optarg);
ee48dbd5
NC
1128 break;
1129
f170852a 1130 case 'h':
fa0f4d8a 1131 arg_action = ACTION_HELP;
f170852a
LP
1132 break;
1133
1d2e23ab
LP
1134 case 'D':
1135 log_set_max_level(LOG_DEBUG);
1136 break;
1137
099663ff
LP
1138 case 'b':
1139 case 's':
1140 case 'z':
cd57038a
ZJS
1141 /* Just to eat away the sysvinit kernel cmdline args that we'll parse in
1142 * parse_proc_cmdline_item() or ignore, without any getopt() error messages.
1143 */
099663ff 1144 case '?':
df0ff127 1145 if (getpid_cached() != 1)
099663ff 1146 return -EINVAL;
601185b4
ZJS
1147 else
1148 return 0;
099663ff 1149
601185b4 1150 default:
04499a70 1151 assert_not_reached();
f170852a
LP
1152 }
1153
d7a0f1f4 1154 if (optind < argc && getpid_cached() != 1)
9a9ca408
ZJS
1155 /* Hmm, when we aren't run as init system let's complain about excess arguments */
1156 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Excess arguments.");
1157
4870133b 1158 if (arg_action == ACTION_RUN && arg_runtime_scope == RUNTIME_SCOPE_USER && !user_arg_seen)
baaa35ad 1159 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
9a9ca408 1160 "Explicit --user argument required to run as user manager.");
d821e6d6 1161
f170852a
LP
1162 return 0;
1163}
1164
1165static int help(void) {
37ec0fdd
LP
1166 _cleanup_free_ char *link = NULL;
1167 int r;
1168
1169 r = terminal_urlify_man("systemd", "1", &link);
1170 if (r < 0)
1171 return log_oom();
f170852a 1172
2e33c433 1173 printf("%s [OPTIONS...]\n\n"
7ae47326
ZJS
1174 "%sStarts and monitors system and user services.%s\n\n"
1175 "This program takes no positional arguments.\n\n"
1176 "%sOptions%s:\n"
e537352b 1177 " -h --help Show this help\n"
cb4069d9 1178 " --version Show version\n"
cd69e88b 1179 " --test Determine initial transaction, dump it and exit\n"
03b3b55e
ZJS
1180 " --system Combined with --test: operate in system mode\n"
1181 " --user Combined with --test: operate in user mode\n"
80876c20 1182 " --dump-configuration-items Dump understood unit configuration items\n"
bbc1acab 1183 " --dump-bus-properties Dump exposed bus properties\n"
5c08257b 1184 " --bus-introspect=PATH Write XML introspection data\n"
9e58ff9c 1185 " --unit=UNIT Set default unit\n"
b9e74c39
LP
1186 " --dump-core[=BOOL] Dump core on crash\n"
1187 " --crash-vt=NR Change to specified VT on crash\n"
7a66f215 1188 " --crash-action=ACTION Specify what to do on crash\n"
b9e74c39
LP
1189 " --crash-shell[=BOOL] Run shell on crash\n"
1190 " --confirm-spawn[=BOOL] Ask for confirmation when spawning processes\n"
03b3b55e
ZJS
1191 " --show-status[=BOOL] Show status updates on the console during boot\n"
1192 " --log-target=TARGET Set log target (console, journal, kmsg,\n"
1193 " journal-or-kmsg, null)\n"
1194 " --log-level=LEVEL Set log level (debug, info, notice, warning,\n"
1195 " err, crit, alert, emerg)\n"
b9e74c39
LP
1196 " --log-color[=BOOL] Highlight important log messages\n"
1197 " --log-location[=BOOL] Include code location in log messages\n"
c5673ed0 1198 " --log-time[=BOOL] Prefix log messages with current time\n"
0a494f1f 1199 " --default-standard-output= Set default standard output for services\n"
37ec0fdd 1200 " --default-standard-error= Set default standard error output for services\n"
03b3b55e 1201 " --no-pager Do not pipe output into a pager\n"
bc556335
DDM
1202 "\nSee the %s for details.\n",
1203 program_invocation_short_name,
1204 ansi_highlight(),
1205 ansi_normal(),
1206 ansi_underline(),
1207 ansi_normal(),
1208 link);
f170852a
LP
1209
1210 return 0;
1211}
1212
2cc856ac
LP
1213static int prepare_reexecute(
1214 Manager *m,
1215 FILE **ret_f,
1216 FDSet **ret_fds,
1217 bool switching_root) {
1218
48b90859
LP
1219 _cleanup_fdset_free_ FDSet *fds = NULL;
1220 _cleanup_fclose_ FILE *f = NULL;
a16e1123
LP
1221 int r;
1222
1223 assert(m);
2cc856ac
LP
1224 assert(ret_f);
1225 assert(ret_fds);
a16e1123 1226
71445ae7 1227 /* Make sure nothing is really destructed when we shut down */
313cefa1 1228 m->n_reloading++;
718db961 1229 bus_manager_send_reloading(m, true);
71445ae7 1230
5d1e57b8
LP
1231 r = manager_open_serialization(m, &f);
1232 if (r < 0)
1233 return log_error_errno(r, "Failed to create serialization file: %m");
1234
6b78f9b4 1235 fds = fdset_new();
48b90859
LP
1236 if (!fds)
1237 return log_oom();
a16e1123 1238
b3680f49 1239 r = manager_serialize(m, f, fds, switching_root);
48b90859 1240 if (r < 0)
d68c645b 1241 return r;
a16e1123 1242
5d1e57b8
LP
1243 r = finish_serialization_file(f);
1244 if (r < 0)
1245 return log_error_errno(r, "Failed to finish serialization file: %m");
a16e1123 1246
6b78f9b4 1247 r = fd_cloexec(fileno(f), false);
48b90859
LP
1248 if (r < 0)
1249 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
a16e1123 1250
6b78f9b4 1251 r = fdset_cloexec(fds, false);
48b90859
LP
1252 if (r < 0)
1253 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
a16e1123 1254
2cc856ac
LP
1255 *ret_f = TAKE_PTR(f);
1256 *ret_fds = TAKE_PTR(fds);
a16e1123 1257
48b90859 1258 return 0;
a16e1123
LP
1259}
1260
a8b627aa
LP
1261static void bump_file_max_and_nr_open(void) {
1262
ff3a7019
ZJS
1263 /* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large
1264 * numbers of file descriptors are no longer a performance problem and their memory is properly
1265 * tracked by memcg, thus counting them and limiting them in another two layers of limits is
1266 * unnecessary and just complicates things. This function hence turns off 2 of the 4 levels of limits
1267 * on file descriptors, and makes RLIMIT_NOLIMIT (soft + hard) the only ones that really matter. */
a8b627aa
LP
1268
1269#if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN
a8b627aa
LP
1270 int r;
1271#endif
1272
1273#if BUMP_PROC_SYS_FS_FILE_MAX
409607c1
ZJS
1274 /* The maximum the kernel allows for this since 5.2 is LONG_MAX, use that. (Previously things were
1275 * different, but the operation would fail silently.) */
a5fac1df 1276 r = sysctl_write("fs/file-max", LONG_MAX_STR);
a8b627aa 1277 if (r < 0)
e28d408c
YW
1278 log_full_errno(ERRNO_IS_NEG_FS_WRITE_REFUSED(r) ? LOG_DEBUG : LOG_WARNING, r,
1279 "Failed to bump fs.file-max, ignoring: %m");
a8b627aa
LP
1280#endif
1281
a8b627aa 1282#if BUMP_PROC_SYS_FS_NR_OPEN
cfba9b9e
YW
1283 /* The kernel enforces maximum and minimum values on the fs.nr_open, but they are not directly
1284 * exposed, but hardcoded in fs/file.c. Hopefully, these values will not be changed, but not sure.
1285 * Let's first try the hardcoded maximum value, and if it does not work, try the half of it. */
a8b627aa 1286
cfba9b9e
YW
1287 for (unsigned v = NR_OPEN_MAXIMUM; v >= NR_OPEN_MINIMUM; v /= 2) {
1288 unsigned k = read_nr_open();
a8b627aa
LP
1289 if (k >= v) { /* Already larger */
1290 log_debug("Skipping bump, value is already larger.");
1291 break;
1292 }
1293
cfba9b9e 1294 r = sysctl_writef("fs/nr_open", "%u", v);
a8b627aa 1295 if (r == -EINVAL) {
cfba9b9e 1296 log_debug("Couldn't write fs.nr_open as %u, halving it.", v);
a8b627aa
LP
1297 continue;
1298 }
1299 if (r < 0) {
e28d408c
YW
1300 log_full_errno(ERRNO_IS_NEG_FS_WRITE_REFUSED(r) ? LOG_DEBUG : LOG_WARNING, r,
1301 "Failed to bump fs.nr_open, ignoring: %m");
a8b627aa
LP
1302 break;
1303 }
1304
cfba9b9e 1305 log_debug("Successfully bumped fs.nr_open to %u", v);
a8b627aa
LP
1306 break;
1307 }
1308#endif
1309}
1310
eadb4f19 1311static int bump_rlimit_nofile(const struct rlimit *saved_rlimit) {
cda7faa9 1312 struct rlimit new_rlimit;
cfba9b9e 1313 int r;
4096d6f5 1314
52d62075 1315 /* Get the underlying absolute limit the kernel enforces */
cfba9b9e 1316 unsigned nr = read_nr_open();
52d62075 1317
cda7faa9
LP
1318 /* Calculate the new limits to use for us. Never lower from what we inherited. */
1319 new_rlimit = (struct rlimit) {
1320 .rlim_cur = MAX((rlim_t) nr, saved_rlimit->rlim_cur),
1321 .rlim_max = MAX((rlim_t) nr, saved_rlimit->rlim_max),
1322 };
1323
1324 /* Shortcut if nothing changes. */
1325 if (saved_rlimit->rlim_max >= new_rlimit.rlim_max &&
1326 saved_rlimit->rlim_cur >= new_rlimit.rlim_cur) {
1327 log_debug("RLIMIT_NOFILE is already as high or higher than we need it, not bumping.");
1328 return 0;
1329 }
1330
52d62075
LP
1331 /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows, for
1332 * both hard and soft. */
cda7faa9 1333 r = setrlimit_closest(RLIMIT_NOFILE, &new_rlimit);
23bbb0de 1334 if (r < 0)
3ce40911 1335 return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
4096d6f5
LP
1336
1337 return 0;
1338}
1339
eadb4f19 1340static int bump_rlimit_memlock(const struct rlimit *saved_rlimit) {
cda7faa9 1341 struct rlimit new_rlimit;
04d1ee0f 1342 uint64_t mm;
fb3ae275
LP
1343 int r;
1344
ff3a7019
ZJS
1345 /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even if we have CAP_IPC_LOCK
1346 * which should normally disable such checks. We need them to implement IPAddressAllow= and
1347 * IPAddressDeny=, hence let's bump the value high enough for our user. */
fb3ae275 1348
cda7faa9
LP
1349 /* Using MAX() on resource limits only is safe if RLIM_INFINITY is > 0. POSIX declares that rlim_t
1350 * must be unsigned, hence this is a given, but let's make this clear here. */
1351 assert_cc(RLIM_INFINITY > 0);
1352
ff3a7019
ZJS
1353 mm = physical_memory_scale(1, 8); /* Let's scale how much we allow to be locked by the amount of
1354 * physical RAM. We allow an eighth to be locked by us, just to
1355 * pick a value. */
04d1ee0f 1356
cda7faa9 1357 new_rlimit = (struct rlimit) {
04d1ee0f
LP
1358 .rlim_cur = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_cur, mm),
1359 .rlim_max = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_max, mm),
cda7faa9
LP
1360 };
1361
1362 if (saved_rlimit->rlim_max >= new_rlimit.rlim_cur &&
1363 saved_rlimit->rlim_cur >= new_rlimit.rlim_max) {
1364 log_debug("RLIMIT_MEMLOCK is already as high or higher than we need it, not bumping.");
1365 return 0;
1366 }
1367
1368 r = setrlimit_closest(RLIMIT_MEMLOCK, &new_rlimit);
fb3ae275
LP
1369 if (r < 0)
1370 return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
1371
1372 return 0;
1373}
1374
d3b1c508 1375static int enforce_syscall_archs(Set *archs) {
349cc4a5 1376#if HAVE_SECCOMP
d3b1c508
LP
1377 int r;
1378
83f12b27
FS
1379 if (!is_seccomp_available())
1380 return 0;
1381
469830d1 1382 r = seccomp_restrict_archs(arg_syscall_archs);
d3b1c508 1383 if (r < 0)
ef6bf967 1384 return log_error_errno(r, "Failed to enforce system call architecture restriction: %m");
d3b1c508 1385#endif
469830d1 1386 return 0;
d3b1c508
LP
1387}
1388
4bd03515
ZJS
1389static int os_release_status(void) {
1390 _cleanup_free_ char *pretty_name = NULL, *name = NULL, *version = NULL,
1391 *ansi_color = NULL, *support_end = NULL;
b6e2f329
LP
1392 int r;
1393
d58ad743
LP
1394 r = parse_os_release(NULL,
1395 "PRETTY_NAME", &pretty_name,
4bd03515
ZJS
1396 "NAME", &name,
1397 "VERSION", &version,
1398 "ANSI_COLOR", &ansi_color,
1399 "SUPPORT_END", &support_end);
d58ad743 1400 if (r < 0)
4bd03515
ZJS
1401 return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
1402 "Failed to read os-release file, ignoring: %m");
b6e2f329 1403
02b7005e 1404 const char *label = os_release_pretty_name(pretty_name, name);
f499191c 1405 const char *color = empty_to_null(ansi_color) ?: "1";
4bd03515
ZJS
1406
1407 if (show_status_on(arg_show_status)) {
f499191c
LP
1408 if (in_initrd()) {
1409 if (log_get_show_color())
1410 status_printf(NULL, 0,
1411 ANSI_HIGHLIGHT "Booting initrd of " ANSI_NORMAL "\x1B[%sm%s" ANSI_NORMAL ANSI_HIGHLIGHT "." ANSI_NORMAL,
1412 color, label);
1413 else
1414 status_printf(NULL, 0,
1415 "Booting initrd of %s...", label);
1416 } else {
1417 if (log_get_show_color())
1418 status_printf(NULL, 0,
1419 "\n" ANSI_HIGHLIGHT "Welcome to " ANSI_NORMAL "\x1B[%sm%s" ANSI_NORMAL ANSI_HIGHLIGHT "!" ANSI_NORMAL "\n",
1420 color, label);
1421 else
1422 status_printf(NULL, 0,
1423 "\nWelcome to %s!\n",
1424 label);
1425 }
4bd03515
ZJS
1426 }
1427
de695626 1428 if (support_end && os_release_support_ended(support_end, /* quiet = */ false, /* ret_eol = */ NULL) > 0)
4bd03515
ZJS
1429 /* pretty_name may include the version already, so we'll print the version only if we
1430 * have it and we're not using pretty_name. */
1431 status_printf(ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL, 0,
1432 "This OS version (%s%s%s) is past its end-of-support date (%s)",
1433 label,
1434 (pretty_name || !version) ? "" : " version ",
1435 (pretty_name || !version) ? "" : version,
1436 support_end);
1437
1438 return 0;
b6e2f329
LP
1439}
1440
3f37a825 1441static int setup_os_release(RuntimeScope scope) {
20817cd2
MY
1442 char os_release_dst[STRLEN("/run/user//systemd/propagate/.os-release-stage/os-release") + DECIMAL_STR_MAX(uid_t)] =
1443 "/run/systemd/propagate/.os-release-stage/os-release";
3f37a825
LB
1444 const char *os_release_src = "/etc/os-release";
1445 int r;
1446
20817cd2
MY
1447 assert(IN_SET(scope, RUNTIME_SCOPE_SYSTEM, RUNTIME_SCOPE_USER));
1448
3f37a825
LB
1449 if (access("/etc/os-release", F_OK) < 0) {
1450 if (errno != ENOENT)
1451 log_debug_errno(errno, "Failed to check if /etc/os-release exists, ignoring: %m");
1452
1453 os_release_src = "/usr/lib/os-release";
1454 }
1455
20817cd2
MY
1456 if (scope == RUNTIME_SCOPE_USER)
1457 xsprintf(os_release_dst, "/run/user/" UID_FMT "/systemd/propagate/.os-release-stage/os-release", geteuid());
3f37a825
LB
1458
1459 r = mkdir_parents_label(os_release_dst, 0755);
bf85c239 1460 if (r < 0)
20817cd2 1461 return log_debug_errno(r, "Failed to create parent directory of '%s', ignoring: %m", os_release_dst);
3f37a825 1462
663e2756 1463 r = copy_file_atomic(os_release_src, os_release_dst, 0644, COPY_MAC_CREATE|COPY_REPLACE);
3f37a825 1464 if (r < 0)
20817cd2
MY
1465 return log_debug_errno(r, "Failed to copy '%s' to '%s', ignoring: %m",
1466 os_release_src, os_release_dst);
3f37a825
LB
1467
1468 return 0;
1469}
1470
fdd25311
LP
1471static int write_container_id(void) {
1472 const char *c;
7756528e 1473 int r = 0; /* avoid false maybe-uninitialized warning */
fdd25311
LP
1474
1475 c = getenv("container");
1476 if (isempty(c))
1477 return 0;
1478
2053593f 1479 WITH_UMASK(0022)
8612da97 1480 r = write_string_file("/run/systemd/container", c, WRITE_STRING_FILE_CREATE);
19854865 1481 if (r < 0)
f1f849b0 1482 return log_warning_errno(r, "Failed to write /run/systemd/container, ignoring: %m");
19854865
LP
1483
1484 return 1;
1485}
1486
14a40a6d
MY
1487static int write_boot_or_shutdown_osc(const char *type) {
1488 int r;
1489
1490 assert(STRPTR_IN_SET(type, "boot", "shutdown"));
1491
1492 if (getenv_terminal_is_dumb())
1493 return 0;
1494
1495 _cleanup_close_ int fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
1496 if (fd < 0)
1497 return log_debug_errno(fd, "Failed to open /dev/console to print %s OSC, ignoring: %m", type);
1498
1499 _cleanup_free_ char *seq = NULL;
1500 if (streq(type, "boot"))
1501 r = osc_context_open_boot(&seq);
1502 else
1503 r = osc_context_close(SD_ID128_ALLF, &seq);
1504 if (r < 0)
1505 return log_debug_errno(r, "Failed to acquire %s OSC sequence, ignoring: %m", type);
1506
1507 r = loop_write(fd, seq, SIZE_MAX);
1508 if (r < 0)
1509 return log_debug_errno(r, "Failed to write %s OSC sequence, ignoring: %m", type);
1510
1511 if (DEBUG_LOGGING) {
1512 _cleanup_free_ char *h = cescape(seq);
1513 log_debug("OSC sequence for %s successfully written: %s", type, strna(h));
1514 }
1515
1516 return 0;
1517}
1518
19854865
LP
1519static int bump_unix_max_dgram_qlen(void) {
1520 _cleanup_free_ char *qlen = NULL;
1521 unsigned long v;
1522 int r;
1523
ff3a7019
ZJS
1524 /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel default of 16 is simply too low. We set
1525 * the value really really early during boot, so that it is actually applied to all our sockets,
1526 * including the $NOTIFY_SOCKET one. */
19854865
LP
1527
1528 r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen);
1529 if (r < 0)
ff3a7019
ZJS
1530 return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
1531 "Failed to read AF_UNIX datagram queue length, ignoring: %m");
19854865
LP
1532
1533 r = safe_atolu(qlen, &v);
1534 if (r < 0)
3130fca5 1535 return log_warning_errno(r, "Failed to parse AF_UNIX datagram queue length '%s', ignoring: %m", qlen);
19854865
LP
1536
1537 if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN)
1538 return 0;
1539
27c8ca43 1540 r = sysctl_write("net/unix/max_dgram_qlen", STRINGIFY(DEFAULT_UNIX_MAX_DGRAM_QLEN));
19854865 1541 if (r < 0)
e28d408c 1542 return log_full_errno(ERRNO_IS_NEG_FS_WRITE_REFUSED(r) ? LOG_DEBUG : LOG_WARNING, r,
19854865
LP
1543 "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
1544
1545 return 1;
fdd25311
LP
1546}
1547
32391275 1548static int fixup_environment(void) {
32391275
FB
1549 int r;
1550
43db615b
LP
1551 /* Only fix up the environment when we are started as PID 1 */
1552 if (getpid_cached() != 1)
1553 return 0;
1554
1555 /* We expect the environment to be set correctly if run inside a container. */
84af7821
LP
1556 if (detect_container() > 0)
1557 return 0;
1558
ff3a7019
ZJS
1559 /* When started as PID1, the kernel uses /dev/console for our stdios and uses TERM=linux whatever the
1560 * backend device used by the console. We try to make a better guess here since some consoles might
1561 * not have support for color mode for example.
32391275 1562 *
43db615b 1563 * However if TERM was configured through the kernel command line then leave it alone. */
19aff5f7 1564 _cleanup_free_ char *term = NULL;
1d84ad94 1565 r = proc_cmdline_get_key("TERM", 0, &term);
32391275
FB
1566 if (r < 0)
1567 return r;
19aff5f7
LP
1568 if (r > 0) {
1569 /* If we pick up $TERM, then also pick up $COLORTERM, $NO_COLOR */
1570 FOREACH_STRING(v, "COLORTERM", "NO_COLOR") {
1571 _cleanup_free_ char *vv = NULL;
1572 r = proc_cmdline_get_key(v, 0, &vv);
1573 if (r < 0)
1574 return r;
1575 if (r > 0 && setenv(v, vv, /* overwrite= */ true) < 0)
1576 return -errno;
1577 }
1578 } else {
1579 /* If no $TERM is set then look for the per-tty variable instead */
153d5dfd
DDM
1580 r = proc_cmdline_get_key("systemd.tty.term.console", 0, &term);
1581 if (r < 0)
1582 return r;
1583 }
1584
ad6ca4a6
ZJS
1585 if (!term)
1586 (void) query_term_for_tty("/dev/console", &term);
1587
1588 if (setenv("TERM", term ?: FALLBACK_TERM, /* overwrite= */ true) < 0)
32391275
FB
1589 return -errno;
1590
9d48671c 1591 /* The kernels sets HOME=/ for init. Let's undo this. */
1934242b 1592 if (path_equal(getenv("HOME"), "/"))
44ee03d1 1593 assert_se(unsetenv("HOME") == 0);
9d48671c 1594
32391275
FB
1595 return 0;
1596}
1597
b22d392d 1598static int become_shutdown(int objective, int retval) {
b22d392d
VC
1599 static const char* const table[_MANAGER_OBJECTIVE_MAX] = {
1600 [MANAGER_EXIT] = "exit",
1601 [MANAGER_REBOOT] = "reboot",
1602 [MANAGER_POWEROFF] = "poweroff",
1603 [MANAGER_HALT] = "halt",
1604 [MANAGER_KEXEC] = "kexec",
1605 };
4a36297c 1606
e8815abf 1607 char timeout[STRLEN("--timeout=") + DECIMAL_STR_MAX(usec_t) + STRLEN("us")],
4688b089 1608 exit_code[STRLEN("--exit-code=") + DECIMAL_STR_MAX(uint8_t)];
4a36297c
LP
1609
1610 _cleanup_strv_free_ char **env_block = NULL;
e8815abf 1611 _cleanup_free_ char *max_log_levels = NULL;
f16890f8 1612 usec_t watchdog_timer = 0;
4a36297c
LP
1613 int r;
1614
b22d392d
VC
1615 assert(objective >= 0 && objective < _MANAGER_OBJECTIVE_MAX);
1616 assert(table[objective]);
4a36297c 1617
c9e120e0 1618 xsprintf(timeout, "--timeout=%" PRI_USEC "us", arg_defaults.timeout_stop_usec);
4a36297c 1619
e8815abf 1620 const char* command_line[11] = {
4688b089
ZJS
1621 SYSTEMD_SHUTDOWN_BINARY_PATH,
1622 table[objective],
4688b089
ZJS
1623 timeout,
1624 /* Note that the last position is a terminator and must contain NULL. */
1625 };
e8815abf 1626 size_t pos = 3;
4688b089
ZJS
1627
1628 assert(command_line[pos-1]);
1629 assert(!command_line[pos]);
1630
e8815abf
DDM
1631 (void) log_max_levels_to_string(log_get_max_level(), &max_log_levels);
1632
1633 if (max_log_levels) {
1634 command_line[pos++] = "--log-level";
1635 command_line[pos++] = max_log_levels;
1636 }
1637
4a36297c
LP
1638 switch (log_get_target()) {
1639
1640 case LOG_TARGET_KMSG:
1641 case LOG_TARGET_JOURNAL_OR_KMSG:
1642 case LOG_TARGET_SYSLOG_OR_KMSG:
d2ebd50d 1643 command_line[pos++] = "--log-target=kmsg";
4a36297c
LP
1644 break;
1645
1646 case LOG_TARGET_NULL:
6b7f150b 1647 command_line[pos++] = "--log-target=null";
4a36297c
LP
1648 break;
1649
1650 case LOG_TARGET_CONSOLE:
1651 default:
d2ebd50d 1652 command_line[pos++] = "--log-target=console";
4a36297c
LP
1653 };
1654
1655 if (log_get_show_color())
1656 command_line[pos++] = "--log-color";
1657
1658 if (log_get_show_location())
1659 command_line[pos++] = "--log-location";
1660
c5673ed0
DS
1661 if (log_get_show_time())
1662 command_line[pos++] = "--log-time";
1663
3f92250f
DDM
1664 xsprintf(exit_code, "--exit-code=%d", retval);
1665 command_line[pos++] = exit_code;
4a36297c 1666
14ecfc1c 1667 assert(pos < ELEMENTSOF(command_line));
4688b089
ZJS
1668
1669 /* The watchdog: */
4a36297c 1670
b22d392d 1671 if (objective == MANAGER_REBOOT)
65224c1d 1672 watchdog_timer = arg_reboot_watchdog;
b22d392d 1673 else if (objective == MANAGER_KEXEC)
acafd7d8
LB
1674 watchdog_timer = arg_kexec_watchdog;
1675
f16890f8 1676 /* If we reboot or kexec let's set the shutdown watchdog and tell the
5717062e
CK
1677 * shutdown binary to repeatedly ping it.
1678 * Disable the pretimeout watchdog, as we do not support it from the shutdown binary. */
1679 (void) watchdog_setup_pretimeout(0);
aff3a9e1 1680 (void) watchdog_setup_pretimeout_governor(NULL);
f16890f8 1681 r = watchdog_setup(watchdog_timer);
d46aab13 1682 watchdog_close(/* disarm= */ r < 0);
4a36297c 1683
4688b089
ZJS
1684 /* The environment block: */
1685
1686 env_block = strv_copy(environ);
1687
f16890f8
FB
1688 /* Tell the binary how often to ping, ignore failure */
1689 (void) strv_extendf(&env_block, "WATCHDOG_USEC="USEC_FMT, watchdog_timer);
8a2c1fbf 1690
4b20ae9a
LP
1691 /* Make sure that tools that look for $WATCHDOG_USEC (and might get started by the exitrd) don't get
1692 * confused by the variable, because the sd_watchdog_enabled() protocol uses the same variable for
1693 * the same purposes. */
1694 (void) strv_extendf(&env_block, "WATCHDOG_PID=" PID_FMT, getpid_cached());
1695
f16890f8
FB
1696 if (arg_watchdog_device)
1697 (void) strv_extendf(&env_block, "WATCHDOG_DEVICE=%s", arg_watchdog_device);
4a36297c 1698
14a40a6d
MY
1699 (void) write_boot_or_shutdown_osc("shutdown");
1700
4a36297c
LP
1701 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1702 return -errno;
1703}
1704
20fa2bb8 1705static void initialize_clock_timewarp(void) {
e839bafd
LP
1706 int r;
1707
3753325b
LP
1708 /* This is called very early on, before we parse the kernel command line or otherwise figure out why
1709 * we are running, but only once. */
1710
e839bafd
LP
1711 if (clock_is_localtime(NULL) > 0) {
1712 int min;
1713
ff3a7019 1714 /* The very first call of settimeofday() also does a time warp in the kernel.
e839bafd 1715 *
ff3a7019 1716 * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to
20fa2bb8 1717 * take care of maintaining the RTC and do all adjustments. This matches the behavior of
ff3a7019 1718 * Windows, which leaves the RTC alone if the registry tells that the RTC runs in UTC.
e839bafd
LP
1719 */
1720 r = clock_set_timezone(&min);
1721 if (r < 0)
1722 log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
1723 else
1724 log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1725
d46b79bb 1726 } else if (!in_initrd())
e839bafd
LP
1727 /*
1728 * Do a dummy very first call to seal the kernel's time warp magic.
1729 *
ff3a7019
ZJS
1730 * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with
1731 * LOCAL, but the real system could be set up that way. In such case, we need to delay the
1732 * time-warp or the sealing until we reach the real system.
e839bafd 1733 *
ff3a7019
ZJS
1734 * Do no set the kernel's timezone. The concept of local time cannot be supported reliably,
1735 * the time will jump or be incorrect at every daylight saving time change. All kernel local
1736 * time concepts will be treated as UTC that way.
e839bafd
LP
1737 */
1738 (void) clock_reset_timewarp();
e839bafd
LP
1739}
1740
3753325b 1741static void apply_clock_update(void) {
20fa2bb8
ZJS
1742 /* This is called later than clock_apply_epoch(), i.e. after we have parsed
1743 * configuration files/kernel command line and such. */
3753325b
LP
1744
1745 if (arg_clock_usec == 0)
1746 return;
1747
45250e66
LP
1748 if (getpid_cached() != 1)
1749 return;
1750
52bb308c 1751 if (clock_settime(CLOCK_REALTIME, TIMESPEC_STORE(arg_clock_usec)) < 0)
3753325b 1752 log_error_errno(errno, "Failed to set system clock to time specified on kernel command line: %m");
04f5c018 1753 else
3753325b 1754 log_info("Set system clock to %s, as specified on the kernel command line.",
04f5c018 1755 FORMAT_TIMESTAMP(arg_clock_usec));
3753325b
LP
1756}
1757
d247f232 1758static void cmdline_take_random_seed(void) {
d247f232
LP
1759 size_t suggested;
1760 int r;
1761
1762 if (arg_random_seed_size == 0)
1763 return;
1764
1765 if (getpid_cached() != 1)
1766 return;
1767
1768 assert(arg_random_seed);
1769 suggested = random_pool_size();
1770
1771 if (arg_random_seed_size < suggested)
1772 log_warning("Random seed specified on kernel command line has size %zu, but %zu bytes required to fill entropy pool.",
1773 arg_random_seed_size, suggested);
1774
61bd7d1e 1775 r = random_write_entropy(-1, arg_random_seed, arg_random_seed_size, true);
d247f232
LP
1776 if (r < 0) {
1777 log_warning_errno(r, "Failed to credit entropy specified on kernel command line, ignoring: %m");
1778 return;
1779 }
1780
1781 log_notice("Successfully credited entropy passed on kernel command line.\n"
ff3a7019
ZJS
1782 "Note that the seed provided this way is accessible to unprivileged programs. "
1783 "This functionality should not be used outside of testing environments.");
d247f232
LP
1784}
1785
1e41242e 1786static void initialize_coredump(bool skip_setup) {
1e41242e
LP
1787 if (getpid_cached() != 1)
1788 return;
1789
ff3a7019
ZJS
1790 /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour
1791 * the limit) will process core dumps for system services by default. */
1e41242e
LP
1792 if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
1793 log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
1794
ff3a7019
ZJS
1795 /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored
1796 * until the systemd-coredump tool is enabled via sysctl. However it can be changed via the kernel
32e27670 1797 * command line later so core dumps can still be generated during early startup and in initrd. */
1e41242e 1798 if (!skip_setup)
e557b1a6 1799 disable_coredumps();
1e41242e
LP
1800}
1801
c6885f5f
FB
1802static void initialize_core_pattern(bool skip_setup) {
1803 int r;
1804
1805 if (skip_setup || !arg_early_core_pattern)
1806 return;
1807
1808 if (getpid_cached() != 1)
1809 return;
1810
57512c89 1811 r = write_string_file("/proc/sys/kernel/core_pattern", arg_early_core_pattern, WRITE_STRING_FILE_DISABLE_BUFFER);
c6885f5f 1812 if (r < 0)
ff3a7019
ZJS
1813 log_warning_errno(r, "Failed to write '%s' to /proc/sys/kernel/core_pattern, ignoring: %m",
1814 arg_early_core_pattern);
c6885f5f
FB
1815}
1816
ffc1ec73
LP
1817static void apply_protect_system(bool skip_setup) {
1818 int r;
1819
1820 if (skip_setup || getpid_cached() != 1 || arg_protect_system == 0)
1821 return;
1822
1823 if (arg_protect_system < 0 && !in_initrd()) {
1824 log_debug("ProtectSystem=auto selected, but not running in an initrd, skipping.");
1825 return;
1826 }
1827
1828 r = make_mount_point("/usr");
1829 if (r < 0) {
1830 log_warning_errno(r, "Failed to make /usr/ a mount point, ignoring: %m");
1831 return;
1832 }
1833
1834 if (mount_nofollow_verbose(
1835 LOG_WARNING,
1836 /* what= */ NULL,
1837 "/usr",
1838 /* fstype= */ NULL,
1839 MS_BIND|MS_REMOUNT|MS_RDONLY,
1840 /* options= */ NULL) < 0)
1841 return;
1842
1843 log_info("Successfully made /usr/ read-only.");
1844}
1845
61fbbac1
ZJS
1846static void update_cpu_affinity(bool skip_setup) {
1847 _cleanup_free_ char *mask = NULL;
1848
1849 if (skip_setup || !arg_cpu_affinity.set)
1850 return;
1851
1852 assert(arg_cpu_affinity.allocated > 0);
1853
667030bf
ZJS
1854 mask = cpu_set_to_range_string(&arg_cpu_affinity);
1855 log_debug("Setting CPU affinity to {%s}.", strnull(mask));
61fbbac1
ZJS
1856
1857 if (sched_setaffinity(0, arg_cpu_affinity.allocated, arg_cpu_affinity.set) < 0)
6b1fa539 1858 log_warning_errno(errno, "Failed to set CPU affinity, ignoring: %m");
61fbbac1
ZJS
1859}
1860
b070c7c0
MS
1861static void update_numa_policy(bool skip_setup) {
1862 int r;
1863 _cleanup_free_ char *nodes = NULL;
1864 const char * policy = NULL;
1865
1866 if (skip_setup || !mpol_is_valid(numa_policy_get_type(&arg_numa_policy)))
1867 return;
1868
1869 if (DEBUG_LOGGING) {
1870 policy = mpol_to_string(numa_policy_get_type(&arg_numa_policy));
1871 nodes = cpu_set_to_range_string(&arg_numa_policy.nodes);
667030bf 1872 log_debug("Setting NUMA policy to %s, with nodes {%s}.", strnull(policy), strnull(nodes));
b070c7c0
MS
1873 }
1874
1875 r = apply_numa_policy(&arg_numa_policy);
1876 if (r == -EOPNOTSUPP)
1877 log_debug_errno(r, "NUMA support not available, ignoring.");
1878 else if (r < 0)
6b1fa539 1879 log_warning_errno(r, "Failed to set NUMA memory policy, ignoring: %m");
b070c7c0
MS
1880}
1881
19fd72df
LP
1882static void filter_args(
1883 const char* dst[],
1884 size_t *dst_index,
1885 char **src,
1886 int argc) {
1887
846f1da4 1888 assert(dst);
19fd72df 1889 assert(dst_index);
846f1da4
ZJS
1890
1891 /* Copy some filtered arguments into the dst array from src. */
1892 for (int i = 1; i < argc; i++) {
1893 if (STR_IN_SET(src[i],
1894 "--switched-root",
1895 "--system",
1896 "--user"))
1897 continue;
1898
1899 if (startswith(src[i], "--deserialize="))
1900 continue;
1901 if (streq(src[i], "--deserialize")) {
1902 i++; /* Skip the argument too */
1903 continue;
1904 }
1905
1906 /* Skip target unit designators. We already acted upon this information and have queued
1907 * appropriate jobs. We don't want to redo all this after reexecution. */
1908 if (startswith(src[i], "--unit="))
1909 continue;
1910 if (streq(src[i], "--unit")) {
1911 i++; /* Skip the argument too */
1912 continue;
1913 }
1914
846f1da4 1915 /* Seems we have a good old option. Let's pass it over to the new instance. */
19fd72df 1916 dst[(*dst_index)++] = src[i];
846f1da4
ZJS
1917 }
1918}
1919
05be3e8b
LB
1920static void finish_remaining_processes(ManagerObjective objective) {
1921 assert(objective >= 0 && objective < _MANAGER_OBJECTIVE_MAX);
1922
1923 /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
1924 * SIGCHLD for them after deserializing. */
1925 if (IN_SET(objective, MANAGER_SWITCH_ROOT, MANAGER_SOFT_REBOOT))
c9e120e0 1926 broadcast_signal(SIGTERM, /* wait_for_exit= */ false, /* send_sighup= */ true, arg_defaults.timeout_stop_usec);
05be3e8b 1927
559214cb
LB
1928 /* On soft reboot really make sure nothing is left. Note that this will skip cgroups
1929 * of units that were configured with SurviveFinalKillSignal=yes. */
05be3e8b 1930 if (objective == MANAGER_SOFT_REBOOT)
c9e120e0 1931 broadcast_signal(SIGKILL, /* wait_for_exit= */ false, /* send_sighup= */ false, arg_defaults.timeout_stop_usec);
05be3e8b
LB
1932}
1933
1e3eee8c
ZJS
1934static int do_reexecute(
1935 ManagerObjective objective,
3c7878f9 1936 int argc,
846f1da4 1937 char* argv[],
3c7878f9
LP
1938 const struct rlimit *saved_rlimit_nofile,
1939 const struct rlimit *saved_rlimit_memlock,
1940 FDSet *fds,
1941 const char *switch_root_dir,
1942 const char *switch_root_init,
b004393d 1943 uint64_t saved_capability_ambient_set,
3c7878f9
LP
1944 const char **ret_error_message) {
1945
19fd72df 1946 size_t i, args_size;
3c7878f9
LP
1947 const char **args;
1948 int r;
1949
13ffc607 1950 assert(IN_SET(objective, MANAGER_REEXECUTE, MANAGER_SWITCH_ROOT, MANAGER_SOFT_REBOOT));
19fd72df 1951 assert(argc >= 0);
3c7878f9
LP
1952 assert(saved_rlimit_nofile);
1953 assert(saved_rlimit_memlock);
1954 assert(ret_error_message);
1955
ab596e4c
ZJS
1956 /* Close and disarm the watchdog, so that the new instance can reinitialize it, but the machine
1957 * doesn't get rebooted while we do that. */
1958 watchdog_close(/* disarm= */ true);
3c7878f9 1959
13ffc607
LP
1960 if (!switch_root_dir && objective == MANAGER_SOFT_REBOOT) {
1961 /* If no switch root dir is specified, then check if /run/nextroot/ qualifies and use that */
1962 r = path_is_os_tree("/run/nextroot");
1963 if (r < 0 && r != -ENOENT)
1964 log_debug_errno(r, "Failed to determine if /run/nextroot/ is a valid OS tree, ignoring: %m");
1965 else if (r > 0)
1966 switch_root_dir = "/run/nextroot";
1967 }
3c7878f9 1968
93e19483
MY
1969 if (switch_root_dir) {
1970 /* If we're supposed to switch root, preemptively check the existence of a usable init.
1971 * Otherwise the system might end up in a completely undebuggable state afterwards. */
1972 if (switch_root_init) {
1973 r = chase_and_access(switch_root_init, switch_root_dir, CHASE_PREFIX_ROOT, X_OK, /* ret_path = */ NULL);
1974 if (r < 0)
1975 log_warning_errno(r, "Failed to chase configured init %s/%s: %m",
1976 switch_root_dir, switch_root_init);
1977 } else {
1978 r = chase_and_access(SYSTEMD_BINARY_PATH, switch_root_dir, CHASE_PREFIX_ROOT, X_OK, /* ret_path = */ NULL);
1979 if (r < 0)
1980 log_debug_errno(r, "Failed to chase our own binary %s/%s: %m",
1981 switch_root_dir, SYSTEMD_BINARY_PATH);
1982 }
1983
1984 if (r < 0) {
1985 r = chase_and_access("/sbin/init", switch_root_dir, CHASE_PREFIX_ROOT, X_OK, /* ret_path = */ NULL);
1986 if (r < 0) {
1987 *ret_error_message = "Switch root target contains no usable init";
1988 return log_error_errno(r, "Failed to chase %s/sbin/init", switch_root_dir);
1989 }
1990 }
1991 }
1992
1993 /* Reset RLIMIT_NOFILE + RLIMIT_MEMLOCK back to the kernel defaults, so that the new systemd can pass
1994 * the kernel default to its child processes */
1995 if (saved_rlimit_nofile->rlim_cur != 0)
1996 (void) setrlimit(RLIMIT_NOFILE, saved_rlimit_nofile);
1997 if (saved_rlimit_memlock->rlim_cur != RLIM_INFINITY)
1998 (void) setrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock);
1999
2000 finish_remaining_processes(objective);
2001
13ffc607 2002 if (switch_root_dir) {
13ffc607
LP
2003 r = switch_root(/* new_root= */ switch_root_dir,
2004 /* old_root_after= */ NULL,
b12d41a8 2005 /* flags= */ (objective == MANAGER_SWITCH_ROOT ? SWITCH_ROOT_DESTROY_OLD_ROOT : 0) |
c2d62118 2006 (objective == MANAGER_SOFT_REBOOT ? 0 : SWITCH_ROOT_RECURSIVE_RUN));
3c7878f9
LP
2007 if (r < 0)
2008 log_error_errno(r, "Failed to switch root, trying to continue: %m");
2009 }
2010
b004393d 2011 r = capability_ambient_set_apply(saved_capability_ambient_set, /* also_inherit= */ false);
e0ebc81b 2012 if (r < 0)
c4c416b1 2013 log_warning_errno(r, "Failed to apply the starting ambient set, ignoring: %m");
e0ebc81b 2014
d2ebd50d 2015 args_size = argc + 5;
3c7878f9
LP
2016 args = newa(const char*, args_size);
2017
2018 if (!switch_root_init) {
d2ebd50d 2019 char sfd[STRLEN("--deserialize=") + DECIMAL_STR_MAX(int)];
3c7878f9 2020
ff3a7019
ZJS
2021 /* First try to spawn ourselves with the right path, and with full serialization. We do this
2022 * only if the user didn't specify an explicit init to spawn. */
3c7878f9
LP
2023
2024 assert(arg_serialization);
2025 assert(fds);
2026
d2ebd50d 2027 xsprintf(sfd, "--deserialize=%i", fileno(arg_serialization));
3c7878f9 2028
846f1da4 2029 i = 1; /* Leave args[0] empty for now. */
846f1da4 2030
06afda6b
FS
2031 /* Put our stuff first to make sure it always gets parsed in case
2032 * we get weird stuff from the kernel cmdline (like --) */
13ffc607 2033 if (IN_SET(objective, MANAGER_SWITCH_ROOT, MANAGER_SOFT_REBOOT))
3c7878f9 2034 args[i++] = "--switched-root";
40d73340 2035 args[i++] = runtime_scope_cmdline_option_to_string(arg_runtime_scope);
3c7878f9 2036 args[i++] = sfd;
06afda6b
FS
2037
2038 filter_args(args, &i, argv, argc);
2039
3c7878f9
LP
2040 args[i++] = NULL;
2041
2042 assert(i <= args_size);
2043
2044 /*
50b35193
ZJS
2045 * We want valgrind to print its memory usage summary before reexecution. Valgrind won't do
2046 * this is on its own on exec(), but it will do it on exit(). Hence, to ensure we get a
ff3a7019
ZJS
2047 * summary here, fork() off a child, let it exit() cleanly, so that it prints the summary,
2048 * and wait() for it in the parent, before proceeding into the exec().
3c7878f9
LP
2049 */
2050 valgrind_summary_hack();
2051
846f1da4 2052 args[0] = SYSTEMD_BINARY_PATH;
3c7878f9 2053 (void) execv(args[0], (char* const*) args);
1e3eee8c
ZJS
2054
2055 if (objective == MANAGER_REEXECUTE) {
2056 *ret_error_message = "Failed to execute our own binary";
2057 return log_error_errno(errno, "Failed to execute our own binary %s: %m", args[0]);
2058 }
2059
846f1da4 2060 log_debug_errno(errno, "Failed to execute our own binary %s, trying fallback: %m", args[0]);
3c7878f9
LP
2061 }
2062
ff3a7019
ZJS
2063 /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and
2064 * envp[]. (Well, modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[],
2065 * but let's hope that doesn't matter.) */
3c7878f9
LP
2066
2067 arg_serialization = safe_fclose(arg_serialization);
2068 fds = fdset_free(fds);
2069
8c28dd24
FB
2070 /* Drop /run/systemd directory. Some of its content can be used as a flag indicating that systemd is
2071 * the init system but we might be replacing it with something different. If systemd is used again it
2072 * will recreate the directory and its content anyway. */
2073 r = rm_rf("/run/systemd.pre-switch-root", REMOVE_ROOT|REMOVE_MISSING_OK);
2074 if (r < 0)
2075 log_warning_errno(r, "Failed to prepare /run/systemd.pre-switch-root/, ignoring: %m");
2076
2077 r = RET_NERRNO(rename("/run/systemd", "/run/systemd.pre-switch-root"));
2078 if (r < 0)
2079 log_warning_errno(r, "Failed to move /run/systemd/ to /run/systemd.pre-switch-root/, ignoring: %m");
2080
3c7878f9
LP
2081 /* Reopen the console */
2082 (void) make_console_stdio();
2083
846f1da4
ZJS
2084 i = 1; /* Leave args[0] empty for now. */
2085 for (int j = 1; j <= argc; j++)
3c7878f9 2086 args[i++] = argv[j];
3c7878f9
LP
2087 assert(i <= args_size);
2088
55c041b4 2089 /* Re-enable any blocked signals, especially important if we switch from initrd to init=... */
3c7878f9
LP
2090 (void) reset_all_signal_handlers();
2091 (void) reset_signal_mask();
595225af 2092 (void) rlimit_nofile_safe();
3c7878f9
LP
2093
2094 if (switch_root_init) {
2095 args[0] = switch_root_init;
a5cede8c 2096 (void) execve(args[0], (char* const*) args, saved_env);
846f1da4 2097 log_warning_errno(errno, "Failed to execute configured init %s, trying fallback: %m", args[0]);
3c7878f9
LP
2098 }
2099
2100 args[0] = "/sbin/init";
2101 (void) execv(args[0], (char* const*) args);
2102 r = -errno;
fce19eb6 2103 *ret_error_message = "Failed to execute /sbin/init";
3c7878f9
LP
2104
2105 if (r == -ENOENT) {
fce19eb6
MY
2106 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
2107 ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
2108 "%s", *ret_error_message);
2109
93e19483 2110 log_warning_errno(r, "No /sbin/init, trying fallback shell");
3c7878f9
LP
2111
2112 args[0] = "/bin/sh";
2113 args[1] = NULL;
a5cede8c 2114 (void) execve(args[0], (char* const*) args, saved_env);
93e19483 2115 r = -errno;
fce19eb6 2116 *ret_error_message = "Failed to execute fallback shell";
93e19483
MY
2117 }
2118
fce19eb6 2119 return log_error_errno(r, "%s, giving up: %m", *ret_error_message);
3c7878f9
LP
2120}
2121
7eb35049
LP
2122static int invoke_main_loop(
2123 Manager *m,
a9fd4cd1
FB
2124 const struct rlimit *saved_rlimit_nofile,
2125 const struct rlimit *saved_rlimit_memlock,
7eb35049 2126 int *ret_retval, /* Return parameters relevant for shutting down */
7eb35049
LP
2127 FDSet **ret_fds, /* Return parameters for reexecuting */
2128 char **ret_switch_root_dir, /* … */
2129 char **ret_switch_root_init, /* … */
2130 const char **ret_error_message) {
2131
2132 int r;
2133
2134 assert(m);
a9fd4cd1
FB
2135 assert(saved_rlimit_nofile);
2136 assert(saved_rlimit_memlock);
7eb35049 2137 assert(ret_retval);
7eb35049
LP
2138 assert(ret_fds);
2139 assert(ret_switch_root_dir);
2140 assert(ret_switch_root_init);
2141 assert(ret_error_message);
2142
2143 for (;;) {
5409c6fc
ZJS
2144 int objective = manager_loop(m);
2145 if (objective < 0) {
7eb35049 2146 *ret_error_message = "Failed to run main loop";
ad5db940
OJ
2147 return log_struct_errno(LOG_EMERG, objective,
2148 LOG_MESSAGE("Failed to run main loop: %m"),
3cf6a3a3 2149 LOG_MESSAGE_ID(SD_MESSAGE_CORE_MAINLOOP_FAILED_STR));
7eb35049
LP
2150 }
2151
b3f54861
LB
2152 /* Ensure shutdown timestamp is taken even when bypassing the job engine */
2153 if (IN_SET(objective,
2154 MANAGER_SOFT_REBOOT,
2155 MANAGER_REBOOT,
2156 MANAGER_KEXEC,
2157 MANAGER_HALT,
2158 MANAGER_POWEROFF) &&
2159 !dual_timestamp_is_set(m->timestamps + MANAGER_TIMESTAMP_SHUTDOWN_START))
2160 dual_timestamp_now(m->timestamps + MANAGER_TIMESTAMP_SHUTDOWN_START);
2161
5409c6fc 2162 switch (objective) {
7eb35049 2163
a6ecbf83 2164 case MANAGER_RELOAD: {
bda7d78b 2165 LogTarget saved_log_target;
a6ecbf83
FB
2166 int saved_log_level;
2167
dd0ab174
LP
2168 manager_send_reloading(m);
2169
af2fb2f2 2170 log_info("Reloading...");
7eb35049 2171
ff3a7019
ZJS
2172 /* First, save any overridden log level/target, then parse the configuration file,
2173 * which might change the log level to new settings. */
bda7d78b 2174
a6ecbf83 2175 saved_log_level = m->log_level_overridden ? log_get_max_level() : -1;
bda7d78b 2176 saved_log_target = m->log_target_overridden ? log_get_target() : _LOG_TARGET_INVALID;
a6ecbf83 2177
a9fd4cd1 2178 (void) parse_configuration(saved_rlimit_nofile, saved_rlimit_memlock);
7eb35049
LP
2179
2180 set_manager_defaults(m);
986935cf 2181 set_manager_settings(m);
7eb35049 2182
61fbbac1 2183 update_cpu_affinity(false);
b070c7c0 2184 update_numa_policy(false);
61fbbac1 2185
a6ecbf83
FB
2186 if (saved_log_level >= 0)
2187 manager_override_log_level(m, saved_log_level);
bda7d78b
FB
2188 if (saved_log_target >= 0)
2189 manager_override_log_target(m, saved_log_target);
a6ecbf83 2190
5409c6fc 2191 if (manager_reload(m) < 0)
ff3a7019
ZJS
2192 /* Reloading failed before the point of no return.
2193 * Let's continue running as if nothing happened. */
7a35fa24 2194 m->objective = MANAGER_OK;
af2fb2f2
LB
2195 else
2196 log_info("Reloading finished in " USEC_FMT " ms.",
2197 usec_sub_unsigned(now(CLOCK_MONOTONIC), m->timestamps[MANAGER_TIMESTAMP_UNITS_LOAD].monotonic) / USEC_PER_MSEC);
7eb35049 2198
5409c6fc 2199 continue;
a6ecbf83 2200 }
7eb35049
LP
2201
2202 case MANAGER_REEXECUTE:
dd0ab174
LP
2203
2204 manager_send_reloading(m); /* From the perspective of the manager calling us this is
2205 * pretty much the same as a reload */
2206
7eb35049
LP
2207 r = prepare_reexecute(m, &arg_serialization, ret_fds, false);
2208 if (r < 0) {
2209 *ret_error_message = "Failed to prepare for reexecution";
2210 return r;
2211 }
2212
2213 log_notice("Reexecuting.");
2214
590e0e3b 2215 *ret_retval = EXIT_FAILURE;
7eb35049
LP
2216 *ret_switch_root_dir = *ret_switch_root_init = NULL;
2217
5409c6fc 2218 return objective;
7eb35049
LP
2219
2220 case MANAGER_SWITCH_ROOT:
dd0ab174
LP
2221
2222 manager_send_reloading(m); /* From the perspective of the manager calling us this is
2223 * pretty much the same as a reload */
2224
d35fe8c0
FB
2225 manager_set_switching_root(m, true);
2226
7eb35049
LP
2227 if (!m->switch_root_init) {
2228 r = prepare_reexecute(m, &arg_serialization, ret_fds, true);
2229 if (r < 0) {
2230 *ret_error_message = "Failed to prepare for reexecution";
2231 return r;
2232 }
2233 } else
2234 *ret_fds = NULL;
2235
2236 log_notice("Switching root.");
2237
590e0e3b 2238 *ret_retval = EXIT_FAILURE;
7eb35049
LP
2239
2240 /* Steal the switch root parameters */
49052946
YW
2241 *ret_switch_root_dir = TAKE_PTR(m->switch_root);
2242 *ret_switch_root_init = TAKE_PTR(m->switch_root_init);
7eb35049 2243
5409c6fc 2244 return objective;
7eb35049 2245
13ffc607
LP
2246 case MANAGER_SOFT_REBOOT:
2247 manager_send_reloading(m);
2248 manager_set_switching_root(m, true);
2249
2250 r = prepare_reexecute(m, &arg_serialization, ret_fds, /* switching_root= */ true);
2251 if (r < 0) {
2252 *ret_error_message = "Failed to prepare for reexecution";
2253 return r;
2254 }
2255
2256 log_notice("Soft-rebooting.");
2257
590e0e3b 2258 *ret_retval = EXIT_FAILURE;
13ffc607
LP
2259 *ret_switch_root_dir = TAKE_PTR(m->switch_root);
2260 *ret_switch_root_init = NULL;
2261
2262 return objective;
2263
7eb35049 2264 case MANAGER_EXIT:
7eb35049
LP
2265 if (MANAGER_IS_USER(m)) {
2266 log_debug("Exit.");
2267
7eb35049 2268 *ret_retval = m->return_value;
7eb35049
LP
2269 *ret_fds = NULL;
2270 *ret_switch_root_dir = *ret_switch_root_init = NULL;
2271
5409c6fc 2272 return objective;
7eb35049
LP
2273 }
2274
2275 _fallthrough_;
2276 case MANAGER_REBOOT:
2277 case MANAGER_POWEROFF:
2278 case MANAGER_HALT:
2279 case MANAGER_KEXEC: {
7eb35049
LP
2280 log_notice("Shutting down.");
2281
7eb35049 2282 *ret_retval = m->return_value;
7eb35049
LP
2283 *ret_fds = NULL;
2284 *ret_switch_root_dir = *ret_switch_root_init = NULL;
2285
5409c6fc 2286 return objective;
7eb35049
LP
2287 }
2288
2289 default:
04499a70 2290 assert_not_reached();
7eb35049
LP
2291 }
2292 }
2293}
2294
31aef7ff 2295static void log_execution_mode(bool *ret_first_boot) {
7cd43e34 2296 bool first_boot = false;
4870133b 2297 int r;
7cd43e34 2298
31aef7ff
LP
2299 assert(ret_first_boot);
2300
4870133b
LP
2301 switch (arg_runtime_scope) {
2302
2303 case RUNTIME_SCOPE_SYSTEM: {
40efaaed 2304 struct utsname uts;
31aef7ff
LP
2305 int v;
2306
e7b18106 2307 log_info("systemd " GIT_VERSION " running in %ssystem mode (%s)",
91b79ba8
ZJS
2308 arg_action == ACTION_TEST ? "test " : "",
2309 systemd_features);
31aef7ff
LP
2310
2311 v = detect_virtualization();
2312 if (v > 0)
2313 log_info("Detected virtualization %s.", virtualization_to_string(v));
2314
024469dd
DB
2315 v = detect_confidential_virtualization();
2316 if (v > 0)
2317 log_info("Detected confidential virtualization %s.", confidential_virtualization_to_string(v));
2318
31aef7ff
LP
2319 log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
2320
7cd43e34 2321 if (in_initrd())
55c041b4 2322 log_info("Running in initrd.");
7cd43e34 2323 else {
583cef3b
HS
2324 _cleanup_free_ char *id_text = NULL;
2325
7cd43e34 2326 /* Let's check whether we are in first boot. First, check if an override was
7c52d523 2327 * specified on the kernel command line. If yes, we honour that. */
7cd43e34 2328
78266a54 2329 r = proc_cmdline_get_bool("systemd.condition_first_boot", /* flags = */ 0, &first_boot);
7cd43e34 2330 if (r < 0)
78266a54 2331 log_debug_errno(r, "Failed to parse systemd.condition_first_boot= kernel command line argument, ignoring: %m");
7cd43e34
ZJS
2332
2333 if (r > 0)
2334 log_full(first_boot ? LOG_INFO : LOG_DEBUG,
7c52d523 2335 "Kernel command line argument says we are %s first boot.",
7cd43e34
ZJS
2336 first_boot ? "in" : "not in");
2337 else {
2338 /* Second, perform autodetection. We use /etc/machine-id as flag file for
2339 * this: If it is missing or contains the value "uninitialized", this is the
2340 * first boot. In other cases, it is not. This allows container managers and
2341 * installers to provision a couple of files in /etc but still permit the
2342 * first-boot initialization to occur. If the container manager wants to
2343 * provision the machine ID it should pass $container_uuid to PID 1. */
2344
2345 r = read_one_line_file("/etc/machine-id", &id_text);
2346 if (r < 0 || streq(id_text, "uninitialized")) {
2347 if (r < 0 && r != -ENOENT)
60e4b429 2348 log_warning_errno(r, "Unexpected error while reading /etc/machine-id, assuming first boot: %m");
7cd43e34
ZJS
2349
2350 first_boot = true;
2351 log_info("Detected first boot.");
2352 } else
2353 log_debug("Detected initialized system, this is not the first boot.");
583cef3b 2354 }
31aef7ff 2355 }
40efaaed 2356
5180394b 2357 assert_se(uname(&uts) >= 0);
40efaaed
LP
2358
2359 if (strverscmp_improved(uts.release, KERNEL_BASELINE_VERSION) < 0)
2360 log_warning("Warning! Reported kernel version %s is older than systemd's required baseline kernel version %s. "
2361 "Your mileage may vary.", uts.release, KERNEL_BASELINE_VERSION);
2362 else
2363 log_debug("Kernel version %s, our baseline is %s", uts.release, KERNEL_BASELINE_VERSION);
4870133b
LP
2364
2365 break;
2366 }
2367
2368 case RUNTIME_SCOPE_USER:
b9e90f3a 2369 if (DEBUG_LOGGING) {
c2b2df60 2370 _cleanup_free_ char *t = NULL;
31aef7ff 2371
b9e90f3a 2372 t = uid_to_name(getuid());
91b79ba8
ZJS
2373 log_debug("systemd " GIT_VERSION " running in %suser mode for user " UID_FMT "/%s. (%s)",
2374 arg_action == ACTION_TEST ? " test" : "",
2375 getuid(), strna(t), systemd_features);
b9e90f3a 2376 }
4870133b
LP
2377
2378 break;
2379
2380 default:
2381 assert_not_reached();
31aef7ff 2382 }
7cd43e34
ZJS
2383
2384 *ret_first_boot = first_boot;
31aef7ff
LP
2385}
2386
5afbaa36
LP
2387static int initialize_runtime(
2388 bool skip_setup,
3023f2fe 2389 bool first_boot,
5afbaa36
LP
2390 struct rlimit *saved_rlimit_nofile,
2391 struct rlimit *saved_rlimit_memlock,
b004393d 2392 uint64_t *saved_ambient_set,
5afbaa36 2393 const char **ret_error_message) {
87c4f0f8 2394
5afbaa36
LP
2395 int r;
2396
b004393d 2397 assert(saved_ambient_set);
5afbaa36
LP
2398 assert(ret_error_message);
2399
2400 /* Sets up various runtime parameters. Many of these initializations are conditionalized:
2401 *
2402 * - Some only apply to --system instances
2403 * - Some only apply to --user instances
2404 * - Some only apply when we first start up, but not when we reexecute
2405 */
2406
2d776038
LP
2407 if (arg_action != ACTION_RUN)
2408 return 0;
2409
61fbbac1 2410 update_cpu_affinity(skip_setup);
b070c7c0 2411 update_numa_policy(skip_setup);
61fbbac1 2412
4870133b
LP
2413 switch (arg_runtime_scope) {
2414
2415 case RUNTIME_SCOPE_SYSTEM:
5238e957 2416 /* Make sure we leave a core dump without panicking the kernel. */
3c3c6cb9 2417 install_crash_handler();
5afbaa36 2418
3c3c6cb9 2419 if (!skip_setup) {
87c4f0f8
MY
2420 /* Check that /usr/ is either on the same file system as / or mounted already. */
2421 if (dir_is_empty("/usr", /* ignore_hidden_or_backup = */ true) > 0) {
2422 *ret_error_message = "Refusing to run in unsupported environment where /usr/ is not populated";
2423 return -ENOEXEC;
2424 }
2425
deb0d489
LP
2426 /* Pull credentials from various sources into a common credential directory (we do
2427 * this here, before setting up the machine ID, so that we can use credential info
2428 * for setting up the machine ID) */
2429 (void) import_credentials();
2430
4bd03515 2431 (void) os_release_status();
0c7a2a55
MY
2432 (void) machine_id_setup(/* root = */ NULL, arg_machine_id,
2433 (first_boot ? MACHINE_ID_SETUP_FORCE_TRANSIENT : 0) |
2434 (arg_machine_id_from_firmware ? MACHINE_ID_SETUP_FORCE_FIRMWARE : 0),
1cce9d93 2435 /* ret = */ NULL);
af9c45d5 2436 (void) hostname_setup(/* really = */ true);
df883de9 2437 (void) loopback_setup();
87c4f0f8 2438
3c3c6cb9 2439 bump_unix_max_dgram_qlen();
a8b627aa 2440 bump_file_max_and_nr_open();
87c4f0f8 2441
3c3c6cb9 2442 write_container_id();
3f37a825 2443
98c28313
LP
2444 (void) write_boot_or_shutdown_osc("boot");
2445
3f37a825
LB
2446 /* Copy os-release to the propagate directory, so that we update it for services running
2447 * under RootDirectory=/RootImage= when we do a soft reboot. */
2448 r = setup_os_release(RUNTIME_SCOPE_SYSTEM);
2449 if (r < 0)
2450 log_warning_errno(r, "Failed to copy os-release for propagation, ignoring: %m");
3c3c6cb9 2451 }
8a2c1fbf 2452
82d7a151
YW
2453 r = watchdog_set_device(arg_watchdog_device);
2454 if (r < 0)
2455 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m", arg_watchdog_device);
4870133b 2456
0acf4dfe
MY
2457 if (!cap_test_all(arg_capability_bounding_set)) {
2458 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set);
2459 if (r < 0) {
2460 *ret_error_message = "Failed to drop capability bounding set of usermode helpers";
2461 return log_struct_errno(LOG_EMERG, r,
2462 LOG_MESSAGE("Failed to drop capability bounding set of usermode helpers: %m"),
3cf6a3a3 2463 LOG_MESSAGE_ID(SD_MESSAGE_CORE_CAPABILITY_BOUNDING_USER_STR));
0acf4dfe
MY
2464 }
2465
2466 r = capability_bounding_set_drop(arg_capability_bounding_set, true);
2467 if (r < 0) {
2468 *ret_error_message = "Failed to drop capability bounding set";
2469 return log_struct_errno(LOG_EMERG, r,
2470 LOG_MESSAGE("Failed to drop capability bounding set: %m"),
3cf6a3a3 2471 LOG_MESSAGE_ID(SD_MESSAGE_CORE_CAPABILITY_BOUNDING_STR));
0acf4dfe
MY
2472 }
2473 }
2474
2475 if (arg_no_new_privs) {
2476 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
2477 *ret_error_message = "Failed to disable new privileges";
2478 return log_struct_errno(LOG_EMERG, errno,
2479 LOG_MESSAGE("Failed to disable new privileges: %m"),
3cf6a3a3 2480 LOG_MESSAGE_ID(SD_MESSAGE_CORE_DISABLE_PRIVILEGES_STR));
0acf4dfe
MY
2481 }
2482 }
2483
4870133b
LP
2484 break;
2485
2486 case RUNTIME_SCOPE_USER: {
32429805
LP
2487 _cleanup_free_ char *p = NULL;
2488
2489 /* Create the runtime directory and place the inaccessible device nodes there, if we run in
2490 * user mode. In system mode mount_setup() already did that. */
2491
60cd6deb 2492 r = xdg_user_runtime_dir("/systemd", &p);
32429805
LP
2493 if (r < 0) {
2494 *ret_error_message = "$XDG_RUNTIME_DIR is not set";
ad5db940
OJ
2495 return log_struct_errno(LOG_EMERG, r,
2496 LOG_MESSAGE("Failed to determine $XDG_RUNTIME_DIR path: %m"),
3cf6a3a3 2497 LOG_MESSAGE_ID(SD_MESSAGE_CORE_NO_XDGDIR_PATH_STR));
32429805
LP
2498 }
2499
91003256
MY
2500 if (!skip_setup) {
2501 (void) mkdir_p_label(p, 0755);
2502 (void) make_inaccessible_nodes(p, UID_INVALID, GID_INVALID);
2503
2504 r = setup_os_release(RUNTIME_SCOPE_USER);
2505 if (r < 0)
2506 log_warning_errno(r, "Failed to copy os-release for propagation, ignoring: %m");
2507 }
2508
4870133b
LP
2509 break;
2510 }
2511
2512 default:
2513 assert_not_reached();
3c3c6cb9 2514 }
5afbaa36 2515
e0ebc81b
ŁS
2516 /* The two operations on the ambient set are meant for a user serssion manager. They do not affect
2517 * system manager operation, because by default it starts with an empty ambient set.
2518 *
2519 * Preserve the ambient set for later use with sd-executor processes. */
b004393d 2520 r = capability_get_ambient(saved_ambient_set);
e0ebc81b 2521 if (r < 0)
c4c416b1 2522 log_warning_errno(r, "Failed to save ambient capabilities, ignoring: %m");
e0ebc81b
ŁS
2523
2524 /* Clear ambient capabilities, so services do not inherit them implicitly. Dropping them does
2525 * not affect the permitted and effective sets which are important for the manager itself to
2526 * operate. */
88a26e10
LP
2527 r = capability_ambient_set_apply(0, /* also_inherit= */ false);
2528 if (r < 0)
2529 log_warning_errno(r, "Failed to reset ambient capability set, ignoring: %m");
e0ebc81b 2530
5afbaa36
LP
2531 if (arg_timer_slack_nsec != NSEC_INFINITY)
2532 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
3a671cd1 2533 log_warning_errno(errno, "Failed to adjust timer slack, ignoring: %m");
5afbaa36 2534
5afbaa36
LP
2535 if (arg_syscall_archs) {
2536 r = enforce_syscall_archs(arg_syscall_archs);
2537 if (r < 0) {
2538 *ret_error_message = "Failed to set syscall architectures";
2539 return r;
2540 }
2541 }
2542
8c3fe1b5
LP
2543 r = make_reaper_process(true);
2544 if (r < 0)
2545 log_warning_errno(r, "Failed to make us a subreaper, ignoring: %m");
5afbaa36 2546
a17c1712
LP
2547 /* Bump up RLIMIT_NOFILE for systemd itself */
2548 (void) bump_rlimit_nofile(saved_rlimit_nofile);
2549 (void) bump_rlimit_memlock(saved_rlimit_memlock);
5afbaa36
LP
2550
2551 return 0;
2552}
2553
6acca5fc
LP
2554static int do_queue_default_job(
2555 Manager *m,
2556 const char **ret_error_message) {
2557
2558 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
f1d075dc
ZJS
2559 const char *unit;
2560 Job *job;
2561 Unit *target;
6acca5fc
LP
2562 int r;
2563
8755dbad 2564 if (arg_default_unit)
f1d075dc 2565 unit = arg_default_unit;
8755dbad 2566 else if (in_initrd())
f1d075dc 2567 unit = SPECIAL_INITRD_TARGET;
8755dbad 2568 else
f1d075dc 2569 unit = SPECIAL_DEFAULT_TARGET;
8755dbad 2570
f1d075dc 2571 log_debug("Activating default unit: %s", unit);
8755dbad 2572
f1d075dc 2573 r = manager_load_startable_unit_or_warn(m, unit, NULL, &target);
8755dbad
ZJS
2574 if (r < 0 && in_initrd() && !arg_default_unit) {
2575 /* Fall back to default.target, which we used to always use by default. Only do this if no
2576 * explicit configuration was given. */
2577
2b2ca7ff 2578 log_info("Falling back to %s.", SPECIAL_DEFAULT_TARGET);
6acca5fc 2579
8755dbad
ZJS
2580 r = manager_load_startable_unit_or_warn(m, SPECIAL_DEFAULT_TARGET, NULL, &target);
2581 }
4109ede7 2582 if (r < 0) {
2b2ca7ff 2583 log_info("Falling back to %s.", SPECIAL_RESCUE_TARGET);
6acca5fc 2584
4109ede7 2585 r = manager_load_startable_unit_or_warn(m, SPECIAL_RESCUE_TARGET, NULL, &target);
6acca5fc 2586 if (r < 0) {
8755dbad
ZJS
2587 *ret_error_message = r == -ERFKILL ? SPECIAL_RESCUE_TARGET " masked"
2588 : "Failed to load " SPECIAL_RESCUE_TARGET;
4109ede7 2589 return r;
6acca5fc
LP
2590 }
2591 }
2592
2593 assert(target->load_state == UNIT_LOADED);
2594
d993ad6c 2595 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, &error, &job);
6acca5fc
LP
2596 if (r == -EPERM) {
2597 log_debug_errno(r, "Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
2598
2599 sd_bus_error_free(&error);
2600
d993ad6c 2601 r = manager_add_job(m, JOB_START, target, JOB_REPLACE, &error, &job);
6acca5fc
LP
2602 if (r < 0) {
2603 *ret_error_message = "Failed to start default target";
ad5db940
OJ
2604 return log_struct_errno(LOG_EMERG, r,
2605 LOG_MESSAGE("Failed to start default target: %s", bus_error_message(&error, r)),
3cf6a3a3 2606 LOG_MESSAGE_ID(SD_MESSAGE_CORE_START_TARGET_FAILED_STR));
6acca5fc
LP
2607 }
2608
2609 } else if (r < 0) {
2610 *ret_error_message = "Failed to isolate default target";
ad5db940
OJ
2611 return log_struct_errno(LOG_EMERG, r,
2612 LOG_MESSAGE("Failed to isolate default target: %s", bus_error_message(&error, r)),
3cf6a3a3 2613 LOG_MESSAGE_ID(SD_MESSAGE_CORE_ISOLATE_TARGET_FAILED_STR));
c86c31d9
ZJS
2614 } else
2615 log_info("Queued %s job for default target %s.",
2616 job_type_to_string(job->type),
04d232d8 2617 unit_status_string(job->unit, NULL));
6acca5fc 2618
f1d075dc 2619 m->default_unit_job_id = job->id;
6acca5fc
LP
2620
2621 return 0;
2622}
2623
a9fd4cd1
FB
2624static void save_rlimits(struct rlimit *saved_rlimit_nofile,
2625 struct rlimit *saved_rlimit_memlock) {
2626
2627 assert(saved_rlimit_nofile);
2628 assert(saved_rlimit_memlock);
2629
2630 if (getrlimit(RLIMIT_NOFILE, saved_rlimit_nofile) < 0)
2631 log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
2632
2633 if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock) < 0)
2634 log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
2635}
2636
2637static void fallback_rlimit_nofile(const struct rlimit *saved_rlimit_nofile) {
2638 struct rlimit *rl;
2639
c9e120e0 2640 if (arg_defaults.rlimit[RLIMIT_NOFILE])
a9fd4cd1
FB
2641 return;
2642
2643 /* Make sure forked processes get limits based on the original kernel setting */
2644
2645 rl = newdup(struct rlimit, saved_rlimit_nofile, 1);
2646 if (!rl) {
2647 log_oom();
2648 return;
2649 }
2650
2651 /* Bump the hard limit for system services to a substantially higher value. The default
2652 * hard limit current kernels set is pretty low (4K), mostly for historical
2653 * reasons. According to kernel developers, the fd handling in recent kernels has been
2654 * optimized substantially enough, so that we can bump the limit now, without paying too
2655 * high a price in memory or performance. Note however that we only bump the hard limit,
2656 * not the soft limit. That's because select() works the way it works, and chokes on fds
2657 * >= 1024. If we'd bump the soft limit globally, it might accidentally happen to
2658 * unexpecting programs that they get fds higher than what they can process using
2659 * select(). By only bumping the hard limit but leaving the low limit as it is we avoid
2660 * this pitfall: programs that are written by folks aware of the select() problem in mind
2661 * (and thus use poll()/epoll instead of select(), the way everybody should) can
2662 * explicitly opt into high fds by bumping their soft limit beyond 1024, to the hard limit
2663 * we pass. */
cfba9b9e
YW
2664 if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM)
2665 rl->rlim_max = MIN((rlim_t) read_nr_open(), MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE));
a9fd4cd1
FB
2666
2667 /* If for some reason we were invoked with a soft limit above 1024 (which should never
2668 * happen!, but who knows what we get passed in from pam_limit when invoked as --user
2669 * instance), then lower what we pass on to not confuse our children */
2670 rl->rlim_cur = MIN(rl->rlim_cur, (rlim_t) FD_SETSIZE);
2671
c9e120e0 2672 arg_defaults.rlimit[RLIMIT_NOFILE] = rl;
a9fd4cd1
FB
2673}
2674
2675static void fallback_rlimit_memlock(const struct rlimit *saved_rlimit_memlock) {
2676 struct rlimit *rl;
2677
2678 /* Pass the original value down to invoked processes */
2679
c9e120e0 2680 if (arg_defaults.rlimit[RLIMIT_MEMLOCK])
a9fd4cd1
FB
2681 return;
2682
2683 rl = newdup(struct rlimit, saved_rlimit_memlock, 1);
2684 if (!rl) {
2685 log_oom();
2686 return;
2687 }
2688
4870133b 2689 if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM) {
852b6250
LP
2690 /* Raise the default limit to 8M also on old kernels and in containers (8M is the kernel
2691 * default for this since kernel 5.16) */
2692 rl->rlim_max = MAX(rl->rlim_max, (rlim_t) DEFAULT_RLIMIT_MEMLOCK);
2693 rl->rlim_cur = MAX(rl->rlim_cur, (rlim_t) DEFAULT_RLIMIT_MEMLOCK);
2694 }
2695
c9e120e0 2696 arg_defaults.rlimit[RLIMIT_MEMLOCK] = rl;
a9fd4cd1
FB
2697}
2698
d55ed7de 2699static void setenv_manager_environment(void) {
d55ed7de
ZJS
2700 int r;
2701
2702 STRV_FOREACH(p, arg_manager_environment) {
2703 log_debug("Setting '%s' in our own environment.", *p);
2704
2705 r = putenv_dup(*p, true);
2706 if (r < 0)
ac10f7e2 2707 log_warning_errno(r, "Failed to setenv \"%s\", ignoring: %m", *p);
d55ed7de
ZJS
2708 }
2709}
2710
fb39af4c
ZJS
2711static void reset_arguments(void) {
2712 /* Frees/resets arg_* variables, with a few exceptions commented below. */
970777b5
LP
2713
2714 arg_default_unit = mfree(arg_default_unit);
fb39af4c 2715
4870133b 2716 /* arg_runtime_scope — ignore */
fb39af4c
ZJS
2717
2718 arg_dump_core = true;
2719 arg_crash_chvt = -1;
2720 arg_crash_shell = false;
7a66f215 2721 arg_crash_action = CRASH_FREEZE;
970777b5 2722 arg_confirm_spawn = mfree(arg_confirm_spawn);
fb39af4c 2723 arg_show_status = _SHOW_STATUS_INVALID;
36cf4507 2724 arg_status_unit_format = STATUS_UNIT_FORMAT_DEFAULT;
fb39af4c
ZJS
2725 arg_switched_root = false;
2726 arg_pager_flags = 0;
2727 arg_service_watchdogs = true;
c9e120e0
LP
2728
2729 unit_defaults_done(&arg_defaults);
ea09a416 2730 unit_defaults_init(&arg_defaults, arg_runtime_scope);
c9e120e0 2731
fb39af4c 2732 arg_runtime_watchdog = 0;
65224c1d 2733 arg_reboot_watchdog = 10 * USEC_PER_MINUTE;
acafd7d8 2734 arg_kexec_watchdog = 0;
5717062e 2735 arg_pretimeout_watchdog = 0;
919ea64f
ŁS
2736 arg_early_core_pattern = mfree(arg_early_core_pattern);
2737 arg_watchdog_device = mfree(arg_watchdog_device);
aff3a9e1 2738 arg_watchdog_pretimeout_governor = mfree(arg_watchdog_pretimeout_governor);
fb39af4c 2739
970777b5 2740 arg_default_environment = strv_free(arg_default_environment);
d55ed7de 2741 arg_manager_environment = strv_free(arg_manager_environment);
fb39af4c 2742
3fd5190b 2743 arg_capability_bounding_set = CAP_MASK_UNSET;
fb39af4c 2744 arg_no_new_privs = false;
ffc1ec73 2745 arg_protect_system = -1;
fb39af4c 2746 arg_timer_slack_nsec = NSEC_INFINITY;
fb39af4c 2747
970777b5 2748 arg_syscall_archs = set_free(arg_syscall_archs);
61fbbac1 2749
fb39af4c
ZJS
2750 /* arg_serialization — ignore */
2751
fb39af4c
ZJS
2752 arg_machine_id = (sd_id128_t) {};
2753 arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
fb39af4c 2754
296fe3d5 2755 cpu_set_done(&arg_cpu_affinity);
b070c7c0 2756 numa_policy_reset(&arg_numa_policy);
d247f232
LP
2757
2758 arg_random_seed = mfree(arg_random_seed);
2759 arg_random_seed_size = 0;
33d943d1 2760 arg_clock_usec = 0;
d4a402e4 2761
856bfaeb
LB
2762 arg_reload_limit_interval_sec = 0;
2763 arg_reload_limit_burst = 0;
d4a402e4
LP
2764}
2765
2766static void determine_default_oom_score_adjust(void) {
2767 int r, a, b;
2768
2769 /* Run our services at slightly higher OOM score than ourselves. But let's be conservative here, and
2770 * do this only if we don't run as root (i.e. only if we are run in user mode, for an unprivileged
2771 * user). */
2772
c9e120e0 2773 if (arg_defaults.oom_score_adjust_set)
d4a402e4
LP
2774 return;
2775
2776 if (getuid() == 0)
2777 return;
2778
2779 r = get_oom_score_adjust(&a);
2780 if (r < 0)
2781 return (void) log_warning_errno(r, "Failed to determine current OOM score adjustment value, ignoring: %m");
2782
2783 assert_cc(100 <= OOM_SCORE_ADJ_MAX);
2784 b = a >= OOM_SCORE_ADJ_MAX - 100 ? OOM_SCORE_ADJ_MAX : a + 100;
2785
2786 if (a == b)
2787 return;
2788
c9e120e0
LP
2789 arg_defaults.oom_score_adjust = b;
2790 arg_defaults.oom_score_adjust_set = true;
970777b5
LP
2791}
2792
a9fd4cd1
FB
2793static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
2794 const struct rlimit *saved_rlimit_memlock) {
97d1fb94
LP
2795 int r;
2796
a9fd4cd1
FB
2797 assert(saved_rlimit_nofile);
2798 assert(saved_rlimit_memlock);
2799
fb39af4c
ZJS
2800 /* Assign configuration defaults */
2801 reset_arguments();
2802
97d1fb94 2803 r = parse_config_file();
470a5e6d
ZJS
2804 if (r < 0)
2805 log_warning_errno(r, "Failed to parse config file, ignoring: %m");
97d1fb94 2806
4870133b 2807 if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM) {
97d1fb94
LP
2808 r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
2809 if (r < 0)
2810 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
2811 }
2812
8f2c5dea 2813 /* Initialize the show status setting if it hasn't been explicitly set yet */
7a293242 2814 if (arg_show_status == _SHOW_STATUS_INVALID)
db33214b
LP
2815 arg_show_status = SHOW_STATUS_YES;
2816
d55ed7de
ZJS
2817 /* Push variables into the manager environment block */
2818 setenv_manager_environment();
2819
8f2c5dea 2820 /* Parse log environment variables to take into account any new environment variables.
2821 * Note that this also parses bits from the kernel command line, including "debug". */
a4303b40
DDM
2822 log_parse_environment();
2823
8f2c5dea 2824 /* Initialize some default rlimits for services if they haven't been configured */
2825 fallback_rlimit_nofile(saved_rlimit_nofile);
2826 fallback_rlimit_memlock(saved_rlimit_memlock);
2827
2828 /* Slightly raise the OOM score for our services if we are running for unprivileged users. */
2829 determine_default_oom_score_adjust();
2830
97d1fb94
LP
2831 return 0;
2832}
2833
b0d7c989
LP
2834static int safety_checks(void) {
2835
febf46a4 2836 if (getpid_cached() == 1 &&
baaa35ad
ZJS
2837 arg_action != ACTION_RUN)
2838 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2839 "Unsupported execution mode while PID 1.");
febf46a4
LP
2840
2841 if (getpid_cached() == 1 &&
4870133b 2842 arg_runtime_scope == RUNTIME_SCOPE_USER)
baaa35ad
ZJS
2843 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2844 "Can't run --user mode as PID 1.");
febf46a4
LP
2845
2846 if (arg_action == ACTION_RUN &&
4870133b 2847 arg_runtime_scope == RUNTIME_SCOPE_SYSTEM &&
baaa35ad
ZJS
2848 getpid_cached() != 1)
2849 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2850 "Can't run system mode unless PID 1.");
febf46a4 2851
b0d7c989 2852 if (arg_action == ACTION_TEST &&
baaa35ad
ZJS
2853 geteuid() == 0)
2854 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2855 "Don't run test mode as root.");
b0d7c989 2856
4870133b
LP
2857 switch (arg_runtime_scope) {
2858
2859 case RUNTIME_SCOPE_USER:
2860
2861 if (arg_action == ACTION_RUN &&
2862 sd_booted() <= 0)
2863 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
2864 "Trying to run as user instance, but the system has not been booted with systemd.");
2865
2866 if (arg_action == ACTION_RUN &&
2867 !getenv("XDG_RUNTIME_DIR"))
2868 return log_error_errno(SYNTHETIC_ERRNO(EUNATCH),
2869 "Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
2870
2871 break;
b0d7c989 2872
4870133b
LP
2873 case RUNTIME_SCOPE_SYSTEM:
2874 if (arg_action == ACTION_RUN &&
2875 running_in_chroot() > 0)
2876 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
2877 "Cannot be run in a chroot() environment.");
2878 break;
b0d7c989 2879
4870133b
LP
2880 default:
2881 assert_not_reached();
2882 }
b0d7c989
LP
2883
2884 return 0;
2885}
2886
74da609f
LP
2887static int initialize_security(
2888 bool *loaded_policy,
2889 dual_timestamp *security_start_timestamp,
2890 dual_timestamp *security_finish_timestamp,
2891 const char **ret_error_message) {
2892
2893 int r;
2894
2895 assert(loaded_policy);
2896 assert(security_start_timestamp);
2897 assert(security_finish_timestamp);
2898 assert(ret_error_message);
2899
fa5a0251 2900 dual_timestamp_now(security_start_timestamp);
74da609f 2901
97149f40 2902 r = mac_selinux_setup(loaded_policy);
74da609f
LP
2903 if (r < 0) {
2904 *ret_error_message = "Failed to load SELinux policy";
2905 return r;
2906 }
2907
2908 r = mac_smack_setup(loaded_policy);
2909 if (r < 0) {
2910 *ret_error_message = "Failed to load SMACK policy";
2911 return r;
2912 }
2913
2ffadd3c
Y
2914 r = mac_apparmor_setup();
2915 if (r < 0) {
2916 *ret_error_message = "Failed to load AppArmor policy";
2917 return r;
2918 }
2919
74da609f
LP
2920 r = ima_setup();
2921 if (r < 0) {
2922 *ret_error_message = "Failed to load IMA policy";
2923 return r;
2924 }
2925
394c6141
LB
2926 r = ipe_setup();
2927 if (r < 0) {
2928 *ret_error_message = "Failed to load IPE policy";
2929 return r;
2930 }
2931
fa5a0251 2932 dual_timestamp_now(security_finish_timestamp);
74da609f
LP
2933 return 0;
2934}
2935
efeb853f
LP
2936static int collect_fds(FDSet **ret_fds, const char **ret_error_message) {
2937 int r;
2938
2939 assert(ret_fds);
2940 assert(ret_error_message);
2941
a3dff21a
LP
2942 /* Pick up all fds passed to us. We apply a filter here: we only take the fds that have O_CLOEXEC
2943 * off. All fds passed via execve() to us must have O_CLOEXEC off, and our own code and dependencies
2944 * should be clean enough to set O_CLOEXEC universally. Thus checking the bit should be a safe
2945 * mechanism to distinguish passed in fds from our own.
2946 *
2947 * Why bother? Some subsystems we initialize early, specifically selinux might keep fds open in our
2948 * process behind our back. We should not take possession of that (and then accidentally close
2949 * it). SELinux thankfully sets O_CLOEXEC on its fds, so this test should work. */
2950 r = fdset_new_fill(/* filter_cloexec= */ 0, ret_fds);
efeb853f
LP
2951 if (r < 0) {
2952 *ret_error_message = "Failed to allocate fd set";
ad5db940
OJ
2953 return log_struct_errno(LOG_EMERG, r,
2954 LOG_MESSAGE("Failed to allocate fd set: %m"),
3cf6a3a3 2955 LOG_MESSAGE_ID(SD_MESSAGE_CORE_FD_SET_FAILED_STR));
efeb853f
LP
2956 }
2957
a3dff21a
LP
2958 /* The serialization fd should have O_CLOEXEC turned on already, let's verify that we didn't pick it up here */
2959 assert_se(!arg_serialization || !fdset_contains(*ret_fds, fileno(arg_serialization)));
efeb853f
LP
2960
2961 return 0;
2962}
2963
2e51b31c
LP
2964static void setup_console_terminal(bool skip_setup) {
2965
4870133b 2966 if (arg_runtime_scope != RUNTIME_SCOPE_SYSTEM)
2e51b31c
LP
2967 return;
2968
ff3a7019
ZJS
2969 /* If we are init, we connect stdin/stdout/stderr to /dev/null and make sure we don't have a
2970 * controlling tty. */
1de12823 2971 terminal_detach_session();
2e51b31c
LP
2972
2973 /* Reset the console, but only if this is really init and we are freshly booted */
2736295d 2974 if (!skip_setup)
2e51b31c
LP
2975 (void) console_setup();
2976}
2977
aa40ff07
LP
2978static bool early_skip_setup_check(int argc, char *argv[]) {
2979 bool found_deserialize = false;
aa40ff07 2980
ff3a7019
ZJS
2981 /* Determine if this is a reexecution or normal bootup. We do the full command line parsing much
2982 * later, so let's just have a quick peek here. Note that if we have switched root, do all the
2983 * special setup things anyway, even if in that case we also do deserialization. */
aa40ff07 2984
431733b8 2985 for (int i = 1; i < argc; i++)
aa40ff07
LP
2986 if (streq(argv[i], "--switched-root"))
2987 return false; /* If we switched root, don't skip the setup. */
09567df7 2988 else if (startswith(argv[i], "--deserialize=") || streq(argv[i], "--deserialize"))
aa40ff07 2989 found_deserialize = true;
aa40ff07
LP
2990
2991 return found_deserialize; /* When we are deserializing, then we are reexecuting, hence avoid the extensive setup */
2992}
2993
0e06a031
LP
2994static int save_env(void) {
2995 char **l;
2996
2997 l = strv_copy(environ);
2998 if (!l)
eda75b2c 2999 return log_oom();
0e06a031
LP
3000
3001 strv_free_and_replace(saved_env, l);
3002 return 0;
3003}
3004
60918275 3005int main(int argc, char *argv[]) {
5409c6fc
ZJS
3006 dual_timestamp
3007 initrd_timestamp = DUAL_TIMESTAMP_NULL,
3008 userspace_timestamp = DUAL_TIMESTAMP_NULL,
3009 kernel_timestamp = DUAL_TIMESTAMP_NULL,
3010 security_start_timestamp = DUAL_TIMESTAMP_NULL,
3011 security_finish_timestamp = DUAL_TIMESTAMP_NULL;
ddfa8b0b
LP
3012 struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0),
3013 saved_rlimit_memlock = RLIMIT_MAKE_CONST(RLIM_INFINITY); /* The original rlimits we passed
3014 * in. Note we use different values
3015 * for the two that indicate whether
3016 * these fields are initialized! */
5409c6fc 3017 bool skip_setup, loaded_policy = false, queue_default_job = false, first_boot = false;
625e8690 3018 char *switch_root_dir = NULL, *switch_root_init = NULL;
9d76d730 3019 usec_t before_startup, after_startup;
625e8690 3020 static char systemd[] = "systemd";
b22d392d 3021 const char *error_message = NULL;
103018ec 3022 uint64_t saved_ambient_set = 0;
625e8690
LP
3023 int r, retval = EXIT_FAILURE;
3024 Manager *m = NULL;
a16e1123 3025 FDSet *fds = NULL;
27b14a22 3026
61b9769b 3027 assert_se(argc > 0 && !isempty(argv[0]));
cf3095ac 3028
d72a8f10 3029 /* Take timestamps early on */
c3a170f3 3030 dual_timestamp_from_monotonic(&kernel_timestamp, 0);
fa5a0251 3031 dual_timestamp_now(&userspace_timestamp);
c3a170f3 3032
d72a8f10 3033 /* Figure out whether we need to do initialize the system, or if we already did that because we are
ff3a7019 3034 * reexecuting. */
aa40ff07 3035 skip_setup = early_skip_setup_check(argc, argv);
d03bc1b8 3036
ff3a7019
ZJS
3037 /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent
3038 * reexecution we are then called 'systemd'. That is confusing, hence let's call us systemd
3039 * right-away. */
f3b6a3ed 3040 program_invocation_short_name = systemd;
eee8b7ab 3041 (void) prctl(PR_SET_NAME, systemd);
5d6b1584 3042
d72a8f10 3043 /* Save the original command line */
36fea155 3044 save_argc_argv(argc, argv);
f3b6a3ed 3045
0e06a031
LP
3046 /* Save the original environment as we might need to restore it if we're requested to execute another
3047 * system manager later. */
3048 r = save_env();
3049 if (r < 0) {
3050 error_message = "Failed to copy environment block";
3051 goto finish;
3052 }
a5cede8c 3053
6fdb8de4 3054 /* Make sure that if the user says "syslog" we actually log to the journal. */
c1dc6153 3055 log_set_upgrade_syslog_to_journal(true);
bbe63281 3056
df0ff127 3057 if (getpid_cached() == 1) {
b5752d23 3058 /* When we run as PID 1 force system mode */
4870133b 3059 arg_runtime_scope = RUNTIME_SCOPE_SYSTEM;
b5752d23 3060
48a601fe 3061 /* Disable the umask logic */
90dc8c2e
MG
3062 umask(0);
3063
ff3a7019
ZJS
3064 /* Make sure that at least initially we do not ever log to journald/syslogd, because it might
3065 * not be activated yet (even though the log socket for it exists). */
d075092f
LP
3066 log_set_prohibit_ipc(true);
3067
ff3a7019
ZJS
3068 /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This
3069 * is important so that we never end up logging to any foreign stderr, for example if we have
3070 * to log in a child process right before execve()'ing the actual binary, at a point in time
3071 * where socket activation stderr/stdout area already set up. */
48a601fe 3072 log_set_always_reopen_console(true);
48a601fe 3073
92890452 3074 if (detect_container() <= 0) {
4f8d551f 3075
92890452 3076 /* Running outside of a container as PID 1 */
1e344c1d 3077 log_set_target_and_open(LOG_TARGET_KMSG);
a866073d 3078
92890452
LP
3079 if (in_initrd())
3080 initrd_timestamp = userspace_timestamp;
c3ba6250 3081
92890452
LP
3082 if (!skip_setup) {
3083 r = mount_setup_early();
3084 if (r < 0) {
3085 error_message = "Failed to mount early API filesystems";
3086 goto finish;
3087 }
d2f57745
DDM
3088 }
3089
3090 /* We might have just mounted /proc, so let's try to parse the kernel
3091 * command line log arguments immediately. */
3092 log_parse_environment();
92890452 3093
d2f57745
DDM
3094 /* Let's open the log backend a second time, in case the first time didn't
3095 * work. Quite possibly we have mounted /dev just now, so /dev/kmsg became
3096 * available, and it previously wasn't. */
3097 log_open();
0a2eef1e 3098
d2f57745 3099 if (!skip_setup) {
6123dfaa
ZJS
3100 disable_printk_ratelimit();
3101
92890452
LP
3102 r = initialize_security(
3103 &loaded_policy,
3104 &security_start_timestamp,
3105 &security_finish_timestamp,
3106 &error_message);
3107 if (r < 0)
3108 goto finish;
d723cd65 3109 }
eee8b7ab 3110
550f4718
MY
3111 r = mac_init();
3112 if (r < 0) {
a452c807 3113 error_message = "Failed to initialize MAC support";
96694e99 3114 goto finish;
92890452 3115 }
0b3325e7 3116
92890452 3117 if (!skip_setup)
20fa2bb8
ZJS
3118 initialize_clock_timewarp();
3119
3120 clock_apply_epoch(/* allow_backwards= */ !skip_setup);
92890452 3121
ff3a7019
ZJS
3122 /* Set the default for later on, but don't actually open the logs like this for
3123 * now. Note that if we are transitioning from the initrd there might still be
3124 * journal fd open, and we shouldn't attempt opening that before we parsed
3125 * /proc/cmdline which might redirect output elsewhere. */
92890452
LP
3126 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
3127
3128 } else {
3129 /* Running inside a container, as PID 1 */
1e344c1d 3130 log_set_target_and_open(LOG_TARGET_CONSOLE);
92890452
LP
3131
3132 /* For later on, see above... */
3133 log_set_target(LOG_TARGET_JOURNAL);
3134
45250e66 3135 /* clear the kernel timestamp, because we are in a container */
92890452 3136 kernel_timestamp = DUAL_TIMESTAMP_NULL;
cb6531be 3137 }
7948c4df 3138
92890452 3139 initialize_coredump(skip_setup);
a866073d 3140
92890452
LP
3141 r = fixup_environment();
3142 if (r < 0) {
ad5db940
OJ
3143 log_struct_errno(LOG_EMERG, r,
3144 LOG_MESSAGE("Failed to fix up PID 1 environment: %m"),
3cf6a3a3 3145 LOG_MESSAGE_ID(SD_MESSAGE_CORE_PID1_ENVIRONMENT_STR));
92890452
LP
3146 error_message = "Failed to fix up PID1 environment";
3147 goto finish;
3148 }
a866073d 3149
ff3a7019
ZJS
3150 /* Try to figure out if we can use colors with the console. No need to do that for user
3151 * instances since they never log into the console. */
3a18b604 3152 log_show_color(colors_enabled());
92890452 3153
c76cf844
AK
3154 r = make_null_stdio();
3155 if (r < 0)
92890452 3156 log_warning_errno(r, "Failed to redirect standard streams to /dev/null, ignoring: %m");
f84f9974 3157
a132bef0 3158 /* Load the kernel modules early. */
2e75e2a8 3159 if (!skip_setup)
e921a00d 3160 (void) kmod_setup();
2e75e2a8 3161
3196e423 3162 /* Mount /proc, /sys and friends, so that /proc/cmdline and /proc/$PID/fd is available. */
f74349d8 3163 r = mount_setup(loaded_policy, skip_setup);
cb6531be
ZJS
3164 if (r < 0) {
3165 error_message = "Failed to mount API filesystems";
8efe3c01 3166 goto finish;
cb6531be 3167 }
c18ecf03 3168
0be72218
JD
3169 /* The efivarfs is now mounted, let's lock down the system token. */
3170 lock_down_efi_variables();
3196e423
LP
3171 } else {
3172 /* Running as user instance */
4870133b 3173 arg_runtime_scope = RUNTIME_SCOPE_USER;
2a646b1d 3174 log_set_always_reopen_console(true);
1e344c1d 3175 log_set_target_and_open(LOG_TARGET_AUTO);
3196e423
LP
3176
3177 /* clear the kernel timestamp, because we are not PID 1 */
3178 kernel_timestamp = DUAL_TIMESTAMP_NULL;
3179
550f4718
MY
3180 r = mac_init();
3181 if (r < 0) {
a452c807 3182 error_message = "Failed to initialize MAC support";
3196e423
LP
3183 goto finish;
3184 }
0c85a4f3 3185 }
4ade7963 3186
a9fd4cd1
FB
3187 /* Save the original RLIMIT_NOFILE/RLIMIT_MEMLOCK so that we can reset it later when
3188 * transitioning from the initrd to the main systemd or suchlike. */
3189 save_rlimits(&saved_rlimit_nofile, &saved_rlimit_memlock);
3190
4ade7963 3191 /* Reset all signal handlers. */
ce30c8dc 3192 (void) reset_all_signal_handlers();
9c274488 3193 (void) ignore_signals(SIGNALS_IGNORE);
078e4539 3194
ffe5c01e
FB
3195 (void) parse_configuration(&saved_rlimit_nofile, &saved_rlimit_memlock);
3196
3197 r = parse_argv(argc, argv);
3198 if (r < 0) {
7c52d523 3199 error_message = "Failed to parse command line arguments";
f170852a 3200 goto finish;
ffe5c01e 3201 }
10c961b9 3202
b0d7c989
LP
3203 r = safety_checks();
3204 if (r < 0)
fe783b03 3205 goto finish;
fe783b03 3206
5c08257b 3207 if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS, ACTION_DUMP_BUS_PROPERTIES, ACTION_BUS_INTROSPECT))
384c2c32 3208 pager_open(arg_pager_flags);
b0d7c989
LP
3209
3210 if (arg_action != ACTION_RUN)
74e7579c 3211 skip_setup = true;
b87c2aa6 3212
fa0f4d8a 3213 if (arg_action == ACTION_HELP) {
37ec0fdd 3214 retval = help() < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
f170852a 3215 goto finish;
9ba0bc4e
ZJS
3216 } else if (arg_action == ACTION_VERSION) {
3217 retval = version();
3218 goto finish;
fa0f4d8a 3219 } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
e537352b 3220 unit_dump_config_items(stdout);
22f4096c 3221 retval = EXIT_SUCCESS;
e537352b 3222 goto finish;
bbc1acab
YW
3223 } else if (arg_action == ACTION_DUMP_BUS_PROPERTIES) {
3224 dump_bus_properties(stdout);
3225 retval = EXIT_SUCCESS;
3226 goto finish;
5c08257b
ZJS
3227 } else if (arg_action == ACTION_BUS_INTROSPECT) {
3228 r = bus_manager_introspect_implementations(stdout, arg_bus_introspect);
3229 retval = r >= 0 ? EXIT_SUCCESS : EXIT_FAILURE;
3230 goto finish;
f170852a
LP
3231 }
3232
4c701096 3233 assert_se(IN_SET(arg_action, ACTION_RUN, ACTION_TEST));
f170852a 3234
5a2e0c62
LP
3235 /* Move out of the way, so that we won't block unmounts */
3236 assert_se(chdir("/") == 0);
3237
dea374e8 3238 if (arg_action == ACTION_RUN) {
d247f232
LP
3239 if (!skip_setup) {
3240 /* Apply the systemd.clock_usec= kernel command line switch */
45250e66 3241 apply_clock_update();
a70c72a0 3242
d247f232
LP
3243 /* Apply random seed from kernel command line */
3244 cmdline_take_random_seed();
3245 }
3246
ffc1ec73 3247 /* A core pattern might have been specified via the cmdline. */
c6885f5f
FB
3248 initialize_core_pattern(skip_setup);
3249
ffc1ec73
LP
3250 /* Make /usr/ read-only */
3251 apply_protect_system(skip_setup);
3252
efeb853f 3253 /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
a70c72a0
LP
3254 log_close();
3255
3256 /* Remember open file descriptors for later deserialization */
efeb853f
LP
3257 r = collect_fds(&fds, &error_message);
3258 if (r < 0)
dea374e8 3259 goto finish;
a16e1123 3260
2e51b31c
LP
3261 /* Give up any control of the console, but make sure its initialized. */
3262 setup_console_terminal(skip_setup);
56d96fc0 3263
a70c72a0
LP
3264 /* Open the logging devices, if possible and necessary */
3265 log_open();
56d96fc0 3266 }
4ade7963 3267
31aef7ff 3268 log_execution_mode(&first_boot);
a5dab5ce 3269
2b61489e
MY
3270 r = cg_has_legacy();
3271 if (r < 0) {
3272 error_message = "Failed to check cgroup hierarchy";
3273 goto finish;
3274 }
3275 if (r > 0) {
3276 r = log_full_errno(LOG_EMERG, SYNTHETIC_ERRNO(EPROTO),
3277 "Detected cgroup v1 hierarchy at /sys/fs/cgroup/, which is no longer supported by current version of systemd.\n"
3278 "Please instruct your initrd to mount cgroup v2 (unified) hierarchy,\n"
3279 "possibly by removing any stale kernel command line options, such as:\n"
3280 " systemd.legacy_systemd_cgroup_controller=1\n"
3281 " systemd.unified_cgroup_hierarchy=0");
3282
3283 error_message = "Detected unsupported legacy cgroup hierarchy, refusing execution";
3284 goto finish;
3285 }
3286
2d776038 3287 r = initialize_runtime(skip_setup,
3023f2fe 3288 first_boot,
2d776038
LP
3289 &saved_rlimit_nofile,
3290 &saved_rlimit_memlock,
b004393d 3291 &saved_ambient_set,
2d776038
LP
3292 &error_message);
3293 if (r < 0)
3294 goto finish;
4096d6f5 3295
4870133b 3296 r = manager_new(arg_runtime_scope,
e0a3da1f
ZJS
3297 arg_action == ACTION_TEST ? MANAGER_TEST_FULL : 0,
3298 &m);
e96d6be7 3299 if (r < 0) {
ad5db940
OJ
3300 log_struct_errno(LOG_EMERG, r,
3301 LOG_MESSAGE("Failed to allocate manager object: %m"),
3cf6a3a3 3302 LOG_MESSAGE_ID(SD_MESSAGE_CORE_MANAGER_ALLOCATE_STR));
cb6531be 3303 error_message = "Failed to allocate manager object";
60918275
LP
3304 goto finish;
3305 }
3306
9f9f0342
LP
3307 m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;
3308 m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;
3309 m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;
d4ee7bd8
YW
3310 m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_START)] = security_start_timestamp;
3311 m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_FINISH)] = security_finish_timestamp;
9e58ff9c 3312
b004393d 3313 m->saved_ambient_set = saved_ambient_set;
e0ebc81b 3314
85cb4151 3315 set_manager_defaults(m);
7b46fc6a 3316 set_manager_settings(m);
fd130612 3317 manager_set_first_boot(m, first_boot);
d35fe8c0 3318 manager_set_switching_root(m, arg_switched_root);
27d340c7 3319
bf4df7c3 3320 /* Remember whether we should queue the default job */
d3b1c508 3321 queue_default_job = !arg_serialization || arg_switched_root;
bf4df7c3 3322
9d76d730
LP
3323 before_startup = now(CLOCK_MONOTONIC);
3324
2a7cf953 3325 r = manager_startup(m, arg_serialization, fds, /* root= */ NULL);
58f88d92 3326 if (r < 0) {
cefb3eda 3327 error_message = "Failed to start up manager";
58f88d92
ZJS
3328 goto finish;
3329 }
a16e1123 3330
6acca5fc 3331 /* This will close all file descriptors that were opened, but not claimed by any unit. */
2feceb5e 3332 fds = fdset_free(fds);
74ca738f 3333 arg_serialization = safe_fclose(arg_serialization);
bf4df7c3
LP
3334
3335 if (queue_default_job) {
6acca5fc 3336 r = do_queue_default_job(m, &error_message);
718db961 3337 if (r < 0)
37d88da7 3338 goto finish;
6acca5fc 3339 }
ab17a050 3340
6acca5fc 3341 after_startup = now(CLOCK_MONOTONIC);
60918275 3342
6acca5fc
LP
3343 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
3344 "Loaded units and determined initial transaction in %s.",
5291f26d 3345 FORMAT_TIMESPAN(after_startup - before_startup, 100 * USEC_PER_MSEC));
07672f49 3346
6acca5fc 3347 if (arg_action == ACTION_TEST) {
2a341bb9 3348 manager_test_summary(m);
6acca5fc
LP
3349 retval = EXIT_SUCCESS;
3350 goto finish;
e965d56d 3351 }
d46de8a1 3352
5409c6fc
ZJS
3353 r = invoke_main_loop(m,
3354 &saved_rlimit_nofile,
3355 &saved_rlimit_memlock,
3356 &retval,
5409c6fc
ZJS
3357 &fds,
3358 &switch_root_dir,
3359 &switch_root_init,
3360 &error_message);
33a06bbc
MY
3361 /* MANAGER_OK and MANAGER_RELOAD are not expected here. */
3362 assert(r < 0 || IN_SET(r, MANAGER_REEXECUTE, MANAGER_EXIT) ||
3363 (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM &&
3364 IN_SET(r, MANAGER_REBOOT,
3365 MANAGER_SOFT_REBOOT,
3366 MANAGER_POWEROFF,
3367 MANAGER_HALT,
3368 MANAGER_KEXEC,
3369 MANAGER_SWITCH_ROOT)));
f170852a 3370
60918275 3371finish:
b87c2aa6
ZJS
3372 pager_close();
3373
92890452 3374 if (m) {
986935cf
FB
3375 arg_reboot_watchdog = manager_get_watchdog(m, WATCHDOG_REBOOT);
3376 arg_kexec_watchdog = manager_get_watchdog(m, WATCHDOG_KEXEC);
92890452
LP
3377 m = manager_free(m);
3378 }
60918275 3379
cc56fafe 3380 mac_selinux_finish();
b2bb3dbe 3381
13ffc607 3382 if (IN_SET(r, MANAGER_REEXECUTE, MANAGER_SWITCH_ROOT, MANAGER_SOFT_REBOOT))
1e3eee8c
ZJS
3383 r = do_reexecute(r,
3384 argc, argv,
3385 &saved_rlimit_nofile,
3386 &saved_rlimit_memlock,
3387 fds,
3388 switch_root_dir,
3389 switch_root_init,
b004393d 3390 saved_ambient_set,
1e3eee8c 3391 &error_message); /* This only returns if reexecution failed */
a16e1123 3392
74ca738f 3393 arg_serialization = safe_fclose(arg_serialization);
2feceb5e 3394 fds = fdset_free(fds);
a16e1123 3395
0e06a031
LP
3396 saved_env = strv_free(saved_env);
3397
349cc4a5 3398#if HAVE_VALGRIND_VALGRIND_H
54b434b1
LP
3399 /* If we are PID 1 and running under valgrind, then let's exit
3400 * here explicitly. valgrind will only generate nice output on
3401 * exit(), not on exec(), hence let's do the former not the
3402 * latter here. */
8a2c1fbf
EJ
3403 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
3404 /* Cleanup watchdog_device strings for valgrind. We need them
3405 * in become_shutdown() so normally we cannot free them yet. */
3406 watchdog_free_device();
7d9eea2b 3407 reset_arguments();
27fe58b7 3408 return retval;
8a2c1fbf 3409 }
54b434b1
LP
3410#endif
3411
7e11a95e 3412#if HAS_FEATURE_ADDRESS_SANITIZER
ae5ce7e3
FS
3413 /* At this stage we most likely don't have stdio/stderr open, so the following
3414 * LSan check would not print any actionable information and would just crash
3415 * PID 1. To make this a bit more helpful, let's try to open /dev/console,
3416 * and if we succeed redirect LSan's report there. */
7b19bd60
FS
3417 if (getpid_cached() == 1) {
3418 _cleanup_close_ int tty_fd = -EBADF;
ae5ce7e3 3419
7b19bd60
FS
3420 tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
3421 if (tty_fd >= 0)
3422 __sanitizer_set_report_fd((void*) (intptr_t) tty_fd);
ae5ce7e3 3423
7b19bd60
FS
3424 __lsan_do_leak_check();
3425 }
7e11a95e
EV
3426#endif
3427
88eec29d 3428 if (r < 0)
fb44dc64
LP
3429 (void) sd_notifyf(/* unset_environment= */ false,
3430 "ERRNO=%i", -r);
88eec29d 3431
5409c6fc
ZJS
3432 /* Try to invoke the shutdown binary unless we already failed.
3433 * If we failed above, we want to freeze after finishing cleanup. */
4870133b
LP
3434 if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM &&
3435 IN_SET(r, MANAGER_EXIT, MANAGER_REBOOT, MANAGER_POWEROFF, MANAGER_HALT, MANAGER_KEXEC)) {
b22d392d 3436 r = become_shutdown(r, retval);
4a36297c 3437 log_error_errno(r, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
9b9881d7 3438 error_message = "Failed to execute shutdown binary";
b9080b03
FF
3439 }
3440
3a89cb84
DDM
3441 /* This is primarily useful when running systemd in a VM, as it provides the user running the VM with
3442 * a mechanism to pick up systemd's exit status in the VM. */
fb44dc64
LP
3443 (void) sd_notifyf(/* unset_environment= */ false,
3444 "EXIT_STATUS=%i", retval);
3a89cb84 3445
8a2c1fbf
EJ
3446 watchdog_free_device();
3447 arg_watchdog_device = mfree(arg_watchdog_device);
3448
df0ff127 3449 if (getpid_cached() == 1) {
cb6531be
ZJS
3450 if (error_message)
3451 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
1fc464f6 3452 ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL,
bb259772
LP
3453 "%s.", error_message);
3454 freeze_or_exit_or_reboot();
cb6531be 3455 }
c3b3c274 3456
7d9eea2b 3457 reset_arguments();
60918275
LP
3458 return retval;
3459}