src/core/main.c

   1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
   2
   3 #include <errno.h>
   4 #include <fcntl.h>
   5 #include <getopt.h>
   6 #include <sys/mount.h>
   7 #include <sys/prctl.h>
   8 #include <sys/reboot.h>
   9 #include <unistd.h>
  10 #if HAVE_SECCOMP
  11 #include <seccomp.h>
  12 #endif
  13 #if HAVE_VALGRIND_VALGRIND_H
  14 #include <valgrind/valgrind.h>
  15 #endif
  16
  17 #include "sd-bus.h"
  18 #include "sd-daemon.h"
  19 #include "sd-messages.h"
  20
  21 #include "alloc-util.h"
  22 #include "apparmor-setup.h"
  23 #include "architecture.h"
  24 #include "build.h"
  25 #include "bus-error.h"
  26 #include "bus-util.h"
  27 #include "capability-util.h"
  28 #include "cgroup-util.h"
  29 #include "clock-util.h"
  30 #include "conf-parser.h"
  31 #include "cpu-set-util.h"
  32 #include "dbus-manager.h"
  33 #include "dbus.h"
  34 #include "def.h"
  35 #include "dev-setup.h"
  36 #include "efi-random.h"
  37 #include "efivars.h"
  38 #include "emergency-action.h"
  39 #include "env-util.h"
  40 #include "exit-status.h"
  41 #include "fd-util.h"
  42 #include "fdset.h"
  43 #include "fileio.h"
  44 #include "format-util.h"
  45 #include "fs-util.h"
  46 #include "hexdecoct.h"
  47 #include "hostname-setup.h"
  48 #include "ima-setup.h"
  49 #include "killall.h"
  50 #include "kmod-setup.h"
  51 #include "limits-util.h"
  52 #include "load-fragment.h"
  53 #include "log.h"
  54 #include "loopback-setup.h"
  55 #include "machine-id-setup.h"
  56 #include "manager.h"
  57 #include "manager-dump.h"
  58 #include "manager-serialize.h"
  59 #include "mkdir.h"
  60 #include "mount-setup.h"
  61 #include "os-util.h"
  62 #include "pager.h"
  63 #include "parse-argument.h"
  64 #include "parse-util.h"
  65 #include "path-util.h"
  66 #include "pretty-print.h"
  67 #include "proc-cmdline.h"
  68 #include "process-util.h"
  69 #include "random-util.h"
  70 #include "raw-clone.h"
  71 #include "rlimit-util.h"
  72 #if HAVE_SECCOMP
  73 #include "seccomp-util.h"
  74 #endif
  75 #include "selinux-setup.h"
  76 #include "selinux-util.h"
  77 #include "signal-util.h"
  78 #include "smack-setup.h"
  79 #include "special.h"
  80 #include "stat-util.h"
  81 #include "stdio-util.h"
  82 #include "strv.h"
  83 #include "switch-root.h"
  84 #include "sysctl-util.h"
  85 #include "terminal-util.h"
  86 #include "time-util.h"
  87 #include "umask-util.h"
  88 #include "user-util.h"
  89 #include "util.h"
  90 #include "virt.h"
  91 #include "watchdog.h"
  92
  93 #if HAS_FEATURE_ADDRESS_SANITIZER
  94 #include <sanitizer/lsan_interface.h>
  95 #endif
  96
  97 #define DEFAULT_TASKS_MAX ((TasksMax) { 15U, 100U }) /* 15% */
  98
  99 static enum {
 100         ACTION_RUN,
 101         ACTION_HELP,
 102         ACTION_VERSION,
 103         ACTION_TEST,
 104         ACTION_DUMP_CONFIGURATION_ITEMS,
 105         ACTION_DUMP_BUS_PROPERTIES,
 106         ACTION_BUS_INTROSPECT,
 107 } arg_action = ACTION_RUN;
 108
 109 static const char *arg_bus_introspect = NULL;
 110
 111 /* Those variables are initialized to 0 automatically, so we avoid uninitialized memory access.  Real
 112  * defaults are assigned in reset_arguments() below. */
 113 static char *arg_default_unit;
 114 static bool arg_system;
 115 static bool arg_dump_core;
 116 static int arg_crash_chvt;
 117 static bool arg_crash_shell;
 118 static bool arg_crash_reboot;
 119 static char *arg_confirm_spawn;
 120 static ShowStatus arg_show_status;
 121 static StatusUnitFormat arg_status_unit_format;
 122 static bool arg_switched_root;
 123 static PagerFlags arg_pager_flags;
 124 static bool arg_service_watchdogs;
 125 static ExecOutput arg_default_std_output;
 126 static ExecOutput arg_default_std_error;
 127 static usec_t arg_default_restart_usec;
 128 static usec_t arg_default_timeout_start_usec;
 129 static usec_t arg_default_timeout_stop_usec;
 130 static usec_t arg_default_timeout_abort_usec;
 131 static bool arg_default_timeout_abort_set;
 132 static usec_t arg_default_start_limit_interval;
 133 static unsigned arg_default_start_limit_burst;
 134 static usec_t arg_runtime_watchdog;
 135 static usec_t arg_reboot_watchdog;
 136 static usec_t arg_kexec_watchdog;
 137 static char *arg_early_core_pattern;
 138 static char *arg_watchdog_device;
 139 static char **arg_default_environment;
 140 static char **arg_manager_environment;
 141 static struct rlimit *arg_default_rlimit[_RLIMIT_MAX];
 142 static uint64_t arg_capability_bounding_set;
 143 static bool arg_no_new_privs;
 144 static nsec_t arg_timer_slack_nsec;
 145 static usec_t arg_default_timer_accuracy_usec;
 146 static Set* arg_syscall_archs;
 147 static FILE* arg_serialization;
 148 static int arg_default_cpu_accounting;
 149 static bool arg_default_io_accounting;
 150 static bool arg_default_ip_accounting;
 151 static bool arg_default_blockio_accounting;
 152 static bool arg_default_memory_accounting;
 153 static bool arg_default_tasks_accounting;
 154 static TasksMax arg_default_tasks_max;
 155 static sd_id128_t arg_machine_id;
 156 static EmergencyAction arg_cad_burst_action;
 157 static OOMPolicy arg_default_oom_policy;
 158 static CPUSet arg_cpu_affinity;
 159 static NUMAPolicy arg_numa_policy;
 160 static usec_t arg_clock_usec;
 161 static void *arg_random_seed;
 162 static size_t arg_random_seed_size;
 163
 164 /* A copy of the original environment block */
 165 static char **saved_env = NULL;
 166
 167 static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
 168                                const struct rlimit *saved_rlimit_memlock);
 169
 170 static int manager_find_user_config_paths(char ***ret_files, char ***ret_dirs) {
 171         _cleanup_free_ char *base = NULL;
 172         _cleanup_strv_free_ char **files = NULL, **dirs = NULL;
 173         int r;
 174
 175         r = xdg_user_config_dir(&base, "/systemd");
 176         if (r < 0)
 177                 return r;
 178
 179         r = strv_extendf(&files, "%s/user.conf", base);
 180         if (r < 0)
 181                 return r;
 182
 183         r = strv_extend(&files, PKGSYSCONFDIR "/user.conf");
 184         if (r < 0)
 185                 return r;
 186
 187         r = strv_consume(&dirs, TAKE_PTR(base));
 188         if (r < 0)
 189                 return r;
 190
 191         r = strv_extend_strv(&dirs, CONF_PATHS_STRV("systemd"), false);
 192         if (r < 0)
 193                 return r;
 194
 195         *ret_files = TAKE_PTR(files);
 196         *ret_dirs = TAKE_PTR(dirs);
 197         return 0;
 198 }
 199
 200 _noreturn_ static void freeze_or_exit_or_reboot(void) {
 201
 202         /* If we are running in a container, let's prefer exiting, after all we can propagate an exit code to
 203          * the container manager, and thus inform it that something went wrong. */
 204         if (detect_container() > 0) {
 205                 log_emergency("Exiting PID 1...");
 206                 _exit(EXIT_EXCEPTION);
 207         }
 208
 209         if (arg_crash_reboot) {
 210                 log_notice("Rebooting in 10s...");
 211                 (void) sleep(10);
 212
 213                 log_notice("Rebooting now...");
 214                 (void) reboot(RB_AUTOBOOT);
 215                 log_emergency_errno(errno, "Failed to reboot: %m");
 216         }
 217
 218         log_emergency("Freezing execution.");
 219         freeze();
 220 }
 221
 222 _noreturn_ static void crash(int sig) {
 223         struct sigaction sa;
 224         pid_t pid;
 225
 226         if (getpid_cached() != 1)
 227                 /* Pass this on immediately, if this is not PID 1 */
 228                 (void) raise(sig);
 229         else if (!arg_dump_core)
 230                 log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig));
 231         else {
 232                 sa = (struct sigaction) {
 233                         .sa_handler = nop_signal_handler,
 234                         .sa_flags = SA_NOCLDSTOP|SA_RESTART,
 235                 };
 236
 237                 /* We want to wait for the core process, hence let's enable SIGCHLD */
 238                 (void) sigaction(SIGCHLD, &sa, NULL);
 239
 240                 pid = raw_clone(SIGCHLD);
 241                 if (pid < 0)
 242                         log_emergency_errno(errno, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
 243                 else if (pid == 0) {
 244                         /* Enable default signal handler for core dump */
 245
 246                         sa = (struct sigaction) {
 247                                 .sa_handler = SIG_DFL,
 248                         };
 249                         (void) sigaction(sig, &sa, NULL);
 250
 251                         /* Don't limit the coredump size */
 252                         (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY));
 253
 254                         /* Just to be sure... */
 255                         (void) chdir("/");
 256
 257                         /* Raise the signal again */
 258                         pid = raw_getpid();
 259                         (void) kill(pid, sig); /* raise() would kill the parent */
 260
 261                         assert_not_reached();
 262                         _exit(EXIT_EXCEPTION);
 263                 } else {
 264                         siginfo_t status;
 265                         int r;
 266
 267                         /* Order things nicely. */
 268                         r = wait_for_terminate(pid, &status);
 269                         if (r < 0)
 270                                 log_emergency_errno(r, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig));
 271                         else if (status.si_code != CLD_DUMPED) {
 272                                 const char *s = status.si_code == CLD_EXITED
 273                                         ? exit_status_to_string(status.si_status, EXIT_STATUS_LIBC)
 274                                         : signal_to_string(status.si_status);
 275
 276                                 log_emergency("Caught <%s>, core dump failed (child "PID_FMT", code=%s, status=%i/%s).",
 277                                               signal_to_string(sig),
 278                                               pid,
 279                                               sigchld_code_to_string(status.si_code),
 280                                               status.si_status, strna(s));
 281                         } else
 282                                 log_emergency("Caught <%s>, dumped core as pid "PID_FMT".",
 283                                               signal_to_string(sig), pid);
 284                 }
 285         }
 286
 287         if (arg_crash_chvt >= 0)
 288                 (void) chvt(arg_crash_chvt);
 289
 290         sa = (struct sigaction) {
 291                 .sa_handler = SIG_IGN,
 292                 .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
 293         };
 294
 295         /* Let the kernel reap children for us */
 296         (void) sigaction(SIGCHLD, &sa, NULL);
 297
 298         if (arg_crash_shell) {
 299                 log_notice("Executing crash shell in 10s...");
 300                 (void) sleep(10);
 301
 302                 pid = raw_clone(SIGCHLD);
 303                 if (pid < 0)
 304                         log_emergency_errno(errno, "Failed to fork off crash shell: %m");
 305                 else if (pid == 0) {
 306                         (void) setsid();
 307                         (void) make_console_stdio();
 308                         (void) rlimit_nofile_safe();
 309                         (void) execle("/bin/sh", "/bin/sh", NULL, environ);
 310
 311                         log_emergency_errno(errno, "execle() failed: %m");
 312                         _exit(EXIT_EXCEPTION);
 313                 } else {
 314                         log_info("Spawned crash shell as PID "PID_FMT".", pid);
 315                         (void) wait_for_terminate(pid, NULL);
 316                 }
 317         }
 318
 319         freeze_or_exit_or_reboot();
 320 }
 321
 322 static void install_crash_handler(void) {
 323         static const struct sigaction sa = {
 324                 .sa_handler = crash,
 325                 .sa_flags = SA_NODEFER, /* So that we can raise the signal again from the signal handler */
 326         };
 327         int r;
 328
 329         /* We ignore the return value here, since, we don't mind if we cannot set up a crash handler */
 330         r = sigaction_many(&sa, SIGNALS_CRASH_HANDLER);
 331         if (r < 0)
 332                 log_debug_errno(r, "I had trouble setting up the crash handler, ignoring: %m");
 333 }
 334
 335 static int console_setup(void) {
 336         _cleanup_close_ int tty_fd = -1;
 337         int r;
 338
 339         tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
 340         if (tty_fd < 0)
 341                 return log_error_errno(tty_fd, "Failed to open /dev/console: %m");
 342
 343         /* We don't want to force text mode.  plymouth may be showing
 344          * pictures already from initrd. */
 345         r = reset_terminal_fd(tty_fd, false);
 346         if (r < 0)
 347                 return log_error_errno(r, "Failed to reset /dev/console: %m");
 348
 349         return 0;
 350 }
 351
 352 static int set_machine_id(const char *m) {
 353         sd_id128_t t;
 354         assert(m);
 355
 356         if (sd_id128_from_string(m, &t) < 0)
 357                 return -EINVAL;
 358
 359         if (sd_id128_is_null(t))
 360                 return -EINVAL;
 361
 362         arg_machine_id = t;
 363         return 0;
 364 }
 365
 366 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
 367         int r;
 368
 369         assert(key);
 370
 371         if (STR_IN_SET(key, "systemd.unit", "rd.systemd.unit")) {
 372
 373                 if (proc_cmdline_value_missing(key, value))
 374                         return 0;
 375
 376                 if (!unit_name_is_valid(value, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
 377                         log_warning("Unit name specified on %s= is not valid, ignoring: %s", key, value);
 378                 else if (in_initrd() == !!startswith(key, "rd."))
 379                         return free_and_strdup_warn(&arg_default_unit, value);
 380
 381         } else if (proc_cmdline_key_streq(key, "systemd.dump_core")) {
 382
 383                 r = value ? parse_boolean(value) : true;
 384                 if (r < 0)
 385                         log_warning_errno(r, "Failed to parse dump core switch %s, ignoring: %m", value);
 386                 else
 387                         arg_dump_core = r;
 388
 389         } else if (proc_cmdline_key_streq(key, "systemd.early_core_pattern")) {
 390
 391                 if (proc_cmdline_value_missing(key, value))
 392                         return 0;
 393
 394                 if (path_is_absolute(value))
 395                         (void) parse_path_argument(value, false, &arg_early_core_pattern);
 396                 else
 397                         log_warning("Specified core pattern '%s' is not an absolute path, ignoring.", value);
 398
 399         } else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) {
 400
 401                 if (!value)
 402                         arg_crash_chvt = 0; /* turn on */
 403                 else {
 404                         r = parse_crash_chvt(value, &arg_crash_chvt);
 405                         if (r < 0)
 406                                 log_warning_errno(r, "Failed to parse crash chvt switch %s, ignoring: %m", value);
 407                 }
 408
 409         } else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) {
 410
 411                 r = value ? parse_boolean(value) : true;
 412                 if (r < 0)
 413                         log_warning_errno(r, "Failed to parse crash shell switch %s, ignoring: %m", value);
 414                 else
 415                         arg_crash_shell = r;
 416
 417         } else if (proc_cmdline_key_streq(key, "systemd.crash_reboot")) {
 418
 419                 r = value ? parse_boolean(value) : true;
 420                 if (r < 0)
 421                         log_warning_errno(r, "Failed to parse crash reboot switch %s, ignoring: %m", value);
 422                 else
 423                         arg_crash_reboot = r;
 424
 425         } else if (proc_cmdline_key_streq(key, "systemd.confirm_spawn")) {
 426                 char *s;
 427
 428                 r = parse_confirm_spawn(value, &s);
 429                 if (r < 0)
 430                         log_warning_errno(r, "Failed to parse confirm_spawn switch %s, ignoring: %m", value);
 431                 else
 432                         free_and_replace(arg_confirm_spawn, s);
 433
 434         } else if (proc_cmdline_key_streq(key, "systemd.service_watchdogs")) {
 435
 436                 r = value ? parse_boolean(value) : true;
 437                 if (r < 0)
 438                         log_warning_errno(r, "Failed to parse service watchdog switch %s, ignoring: %m", value);
 439                 else
 440                         arg_service_watchdogs = r;
 441
 442         } else if (proc_cmdline_key_streq(key, "systemd.show_status")) {
 443
 444                 if (value) {
 445                         r = parse_show_status(value, &arg_show_status);
 446                         if (r < 0)
 447                                 log_warning_errno(r, "Failed to parse show status switch %s, ignoring: %m", value);
 448                 } else
 449                         arg_show_status = SHOW_STATUS_YES;
 450
 451         } else if (proc_cmdline_key_streq(key, "systemd.status_unit_format")) {
 452
 453                 if (proc_cmdline_value_missing(key, value))
 454                         return 0;
 455
 456                 r = status_unit_format_from_string(value);
 457                 if (r < 0)
 458                         log_warning_errno(r, "Failed to parse %s=%s, ignoring: %m", key, value);
 459                 else
 460                         arg_status_unit_format = r;
 461
 462         } else if (proc_cmdline_key_streq(key, "systemd.default_standard_output")) {
 463
 464                 if (proc_cmdline_value_missing(key, value))
 465                         return 0;
 466
 467                 r = exec_output_from_string(value);
 468                 if (r < 0)
 469                         log_warning_errno(r, "Failed to parse default standard output switch %s, ignoring: %m", value);
 470                 else
 471                         arg_default_std_output = r;
 472
 473         } else if (proc_cmdline_key_streq(key, "systemd.default_standard_error")) {
 474
 475                 if (proc_cmdline_value_missing(key, value))
 476                         return 0;
 477
 478                 r = exec_output_from_string(value);
 479                 if (r < 0)
 480                         log_warning_errno(r, "Failed to parse default standard error switch %s, ignoring: %m", value);
 481                 else
 482                         arg_default_std_error = r;
 483
 484         } else if (streq(key, "systemd.setenv")) {
 485
 486                 if (proc_cmdline_value_missing(key, value))
 487                         return 0;
 488
 489                 if (!env_assignment_is_valid(value))
 490                         log_warning("Environment variable assignment '%s' is not valid. Ignoring.", value);
 491                 else {
 492                         r = strv_env_replace_strdup(&arg_default_environment, value);
 493                         if (r < 0)
 494                                 return log_oom();
 495                 }
 496
 497         } else if (proc_cmdline_key_streq(key, "systemd.machine_id")) {
 498
 499                 if (proc_cmdline_value_missing(key, value))
 500                         return 0;
 501
 502                 r = set_machine_id(value);
 503                 if (r < 0)
 504                         log_warning_errno(r, "MachineID '%s' is not valid, ignoring: %m", value);
 505
 506         } else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) {
 507
 508                 if (proc_cmdline_value_missing(key, value))
 509                         return 0;
 510
 511                 r = parse_sec(value, &arg_default_timeout_start_usec);
 512                 if (r < 0)
 513                         log_warning_errno(r, "Failed to parse default start timeout '%s', ignoring: %m", value);
 514
 515                 if (arg_default_timeout_start_usec <= 0)
 516                         arg_default_timeout_start_usec = USEC_INFINITY;
 517
 518         } else if (proc_cmdline_key_streq(key, "systemd.cpu_affinity")) {
 519
 520                 if (proc_cmdline_value_missing(key, value))
 521                         return 0;
 522
 523                 r = parse_cpu_set(value, &arg_cpu_affinity);
 524                 if (r < 0)
 525                         log_warning_errno(r, "Failed to parse CPU affinity mask '%s', ignoring: %m", value);
 526
 527         } else if (proc_cmdline_key_streq(key, "systemd.watchdog_device")) {
 528
 529                 if (proc_cmdline_value_missing(key, value))
 530                         return 0;
 531
 532                 (void) parse_path_argument(value, false, &arg_watchdog_device);
 533
 534         } else if (proc_cmdline_key_streq(key, "systemd.clock_usec")) {
 535
 536                 if (proc_cmdline_value_missing(key, value))
 537                         return 0;
 538
 539                 r = safe_atou64(value, &arg_clock_usec);
 540                 if (r < 0)
 541                         log_warning_errno(r, "Failed to parse systemd.clock_usec= argument, ignoring: %s", value);
 542
 543         } else if (proc_cmdline_key_streq(key, "systemd.random_seed")) {
 544                 void *p;
 545                 size_t sz;
 546
 547                 if (proc_cmdline_value_missing(key, value))
 548                         return 0;
 549
 550                 r = unbase64mem(value, SIZE_MAX, &p, &sz);
 551                 if (r < 0)
 552                         log_warning_errno(r, "Failed to parse systemd.random_seed= argument, ignoring: %s", value);
 553
 554                 free(arg_random_seed);
 555                 arg_random_seed = sz > 0 ? p : mfree(p);
 556                 arg_random_seed_size = sz;
 557
 558         } else if (streq(key, "quiet") && !value) {
 559
 560                 if (arg_show_status == _SHOW_STATUS_INVALID)
 561                         arg_show_status = SHOW_STATUS_ERROR;
 562
 563         } else if (streq(key, "debug") && !value) {
 564
 565                 /* Note that log_parse_environment() handles 'debug'
 566                  * too, and sets the log level to LOG_DEBUG. */
 567
 568                 if (detect_container() > 0)
 569                         log_set_target(LOG_TARGET_CONSOLE);
 570
 571         } else if (!value) {
 572                 const char *target;
 573
 574                 /* Compatible with SysV, but supported independently even if SysV compatibility is disabled. */
 575                 target = runlevel_to_target(key);
 576                 if (target)
 577                         return free_and_strdup_warn(&arg_default_unit, target);
 578         }
 579
 580         return 0;
 581 }
 582
 583 #define DEFINE_SETTER(name, func, descr)                              \
 584         static int name(const char *unit,                             \
 585                         const char *filename,                         \
 586                         unsigned line,                                \
 587                         const char *section,                          \
 588                         unsigned section_line,                        \
 589                         const char *lvalue,                           \
 590                         int ltype,                                    \
 591                         const char *rvalue,                           \
 592                         void *data,                                   \
 593                         void *userdata) {                             \
 594                                                                       \
 595                 int r;                                                \
 596                                                                       \
 597                 assert(filename);                                     \
 598                 assert(lvalue);                                       \
 599                 assert(rvalue);                                       \
 600                                                                       \
 601                 r = func(rvalue);                                     \
 602                 if (r < 0)                                            \
 603                         log_syntax(unit, LOG_ERR, filename, line, r,  \
 604                                    "Invalid " descr "'%s': %m",       \
 605                                    rvalue);                           \
 606                                                                       \
 607                 return 0;                                             \
 608         }
 609
 610 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level");
 611 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target");
 612 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color");
 613 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location");
 614 DEFINE_SETTER(config_parse_time, log_show_time_from_string, "time");
 615
 616 static int config_parse_default_timeout_abort(
 617                 const char *unit,
 618                 const char *filename,
 619                 unsigned line,
 620                 const char *section,
 621                 unsigned section_line,
 622                 const char *lvalue,
 623                 int ltype,
 624                 const char *rvalue,
 625                 void *data,
 626                 void *userdata) {
 627         int r;
 628
 629         r = config_parse_timeout_abort(unit, filename, line, section, section_line, lvalue, ltype, rvalue,
 630                                        &arg_default_timeout_abort_usec, userdata);
 631         if (r >= 0)
 632                 arg_default_timeout_abort_set = r;
 633         return 0;
 634 }
 635
 636 static int parse_config_file(void) {
 637         const ConfigTableItem items[] = {
 638                 { "Manager", "LogLevel",                     config_parse_level2,                0, NULL                                   },
 639                 { "Manager", "LogTarget",                    config_parse_target,                0, NULL                                   },
 640                 { "Manager", "LogColor",                     config_parse_color,                 0, NULL                                   },
 641                 { "Manager", "LogLocation",                  config_parse_location,              0, NULL                                   },
 642                 { "Manager", "LogTime",                      config_parse_time,                  0, NULL                                   },
 643                 { "Manager", "DumpCore",                     config_parse_bool,                  0, &arg_dump_core                         },
 644                 { "Manager", "CrashChVT", /* legacy */       config_parse_crash_chvt,            0, &arg_crash_chvt                        },
 645                 { "Manager", "CrashChangeVT",                config_parse_crash_chvt,            0, &arg_crash_chvt                        },
 646                 { "Manager", "CrashShell",                   config_parse_bool,                  0, &arg_crash_shell                       },
 647                 { "Manager", "CrashReboot",                  config_parse_bool,                  0, &arg_crash_reboot                      },
 648                 { "Manager", "ShowStatus",                   config_parse_show_status,           0, &arg_show_status                       },
 649                 { "Manager", "StatusUnitFormat",             config_parse_status_unit_format,    0, &arg_status_unit_format                },
 650                 { "Manager", "CPUAffinity",                  config_parse_cpu_affinity2,         0, &arg_cpu_affinity                      },
 651                 { "Manager", "NUMAPolicy",                   config_parse_numa_policy,           0, &arg_numa_policy.type                  },
 652                 { "Manager", "NUMAMask",                     config_parse_numa_mask,             0, &arg_numa_policy                       },
 653                 { "Manager", "JoinControllers",              config_parse_warn_compat,           DISABLED_CONFIGURATION, NULL              },
 654                 { "Manager", "RuntimeWatchdogSec",           config_parse_sec,                   0, &arg_runtime_watchdog                  },
 655                 { "Manager", "RebootWatchdogSec",            config_parse_sec,                   0, &arg_reboot_watchdog                   },
 656                 { "Manager", "ShutdownWatchdogSec",          config_parse_sec,                   0, &arg_reboot_watchdog                   }, /* obsolete alias */
 657                 { "Manager", "KExecWatchdogSec",             config_parse_sec,                   0, &arg_kexec_watchdog                    },
 658                 { "Manager", "WatchdogDevice",               config_parse_path,                  0, &arg_watchdog_device                   },
 659                 { "Manager", "CapabilityBoundingSet",        config_parse_capability_set,        0, &arg_capability_bounding_set           },
 660                 { "Manager", "NoNewPrivileges",              config_parse_bool,                  0, &arg_no_new_privs                      },
 661 #if HAVE_SECCOMP
 662                 { "Manager", "SystemCallArchitectures",      config_parse_syscall_archs,         0, &arg_syscall_archs                     },
 663 #endif
 664                 { "Manager", "TimerSlackNSec",               config_parse_nsec,                  0, &arg_timer_slack_nsec                  },
 665                 { "Manager", "DefaultTimerAccuracySec",      config_parse_sec,                   0, &arg_default_timer_accuracy_usec       },
 666                 { "Manager", "DefaultStandardOutput",        config_parse_output_restricted,     0, &arg_default_std_output                },
 667                 { "Manager", "DefaultStandardError",         config_parse_output_restricted,     0, &arg_default_std_error                 },
 668                 { "Manager", "DefaultTimeoutStartSec",       config_parse_sec,                   0, &arg_default_timeout_start_usec        },
 669                 { "Manager", "DefaultTimeoutStopSec",        config_parse_sec,                   0, &arg_default_timeout_stop_usec         },
 670                 { "Manager", "DefaultTimeoutAbortSec",       config_parse_default_timeout_abort, 0, NULL         },
 671                 { "Manager", "DefaultRestartSec",            config_parse_sec,                   0, &arg_default_restart_usec              },
 672                 { "Manager", "DefaultStartLimitInterval",    config_parse_sec,                   0, &arg_default_start_limit_interval      }, /* obsolete alias */
 673                 { "Manager", "DefaultStartLimitIntervalSec", config_parse_sec,                   0, &arg_default_start_limit_interval      },
 674                 { "Manager", "DefaultStartLimitBurst",       config_parse_unsigned,              0, &arg_default_start_limit_burst         },
 675                 { "Manager", "DefaultEnvironment",           config_parse_environ,               0, &arg_default_environment               },
 676                 { "Manager", "ManagerEnvironment",           config_parse_environ,               0, &arg_manager_environment               },
 677                 { "Manager", "DefaultLimitCPU",              config_parse_rlimit,                RLIMIT_CPU, arg_default_rlimit            },
 678                 { "Manager", "DefaultLimitFSIZE",            config_parse_rlimit,                RLIMIT_FSIZE, arg_default_rlimit          },
 679                 { "Manager", "DefaultLimitDATA",             config_parse_rlimit,                RLIMIT_DATA, arg_default_rlimit           },
 680                 { "Manager", "DefaultLimitSTACK",            config_parse_rlimit,                RLIMIT_STACK, arg_default_rlimit          },
 681                 { "Manager", "DefaultLimitCORE",             config_parse_rlimit,                RLIMIT_CORE, arg_default_rlimit           },
 682                 { "Manager", "DefaultLimitRSS",              config_parse_rlimit,                RLIMIT_RSS, arg_default_rlimit            },
 683                 { "Manager", "DefaultLimitNOFILE",           config_parse_rlimit,                RLIMIT_NOFILE, arg_default_rlimit         },
 684                 { "Manager", "DefaultLimitAS",               config_parse_rlimit,                RLIMIT_AS, arg_default_rlimit             },
 685                 { "Manager", "DefaultLimitNPROC",            config_parse_rlimit,                RLIMIT_NPROC, arg_default_rlimit          },
 686                 { "Manager", "DefaultLimitMEMLOCK",          config_parse_rlimit,                RLIMIT_MEMLOCK, arg_default_rlimit        },
 687                 { "Manager", "DefaultLimitLOCKS",            config_parse_rlimit,                RLIMIT_LOCKS, arg_default_rlimit          },
 688                 { "Manager", "DefaultLimitSIGPENDING",       config_parse_rlimit,                RLIMIT_SIGPENDING, arg_default_rlimit     },
 689                 { "Manager", "DefaultLimitMSGQUEUE",         config_parse_rlimit,                RLIMIT_MSGQUEUE, arg_default_rlimit       },
 690                 { "Manager", "DefaultLimitNICE",             config_parse_rlimit,                RLIMIT_NICE, arg_default_rlimit           },
 691                 { "Manager", "DefaultLimitRTPRIO",           config_parse_rlimit,                RLIMIT_RTPRIO, arg_default_rlimit         },
 692                 { "Manager", "DefaultLimitRTTIME",           config_parse_rlimit,                RLIMIT_RTTIME, arg_default_rlimit         },
 693                 { "Manager", "DefaultCPUAccounting",         config_parse_tristate,              0, &arg_default_cpu_accounting            },
 694                 { "Manager", "DefaultIOAccounting",          config_parse_bool,                  0, &arg_default_io_accounting             },
 695                 { "Manager", "DefaultIPAccounting",          config_parse_bool,                  0, &arg_default_ip_accounting             },
 696                 { "Manager", "DefaultBlockIOAccounting",     config_parse_bool,                  0, &arg_default_blockio_accounting        },
 697                 { "Manager", "DefaultMemoryAccounting",      config_parse_bool,                  0, &arg_default_memory_accounting         },
 698                 { "Manager", "DefaultTasksAccounting",       config_parse_bool,                  0, &arg_default_tasks_accounting          },
 699                 { "Manager", "DefaultTasksMax",              config_parse_tasks_max,             0, &arg_default_tasks_max                 },
 700                 { "Manager", "CtrlAltDelBurstAction",        config_parse_emergency_action,      0, &arg_cad_burst_action                  },
 701                 { "Manager", "DefaultOOMPolicy",             config_parse_oom_policy,            0, &arg_default_oom_policy                },
 702                 {}
 703         };
 704
 705         _cleanup_strv_free_ char **files = NULL, **dirs = NULL;
 706         const char *suffix;
 707         int r;
 708
 709         if (arg_system)
 710                 suffix = "system.conf.d";
 711         else {
 712                 r = manager_find_user_config_paths(&files, &dirs);
 713                 if (r < 0)
 714                         return log_error_errno(r, "Failed to determine config file paths: %m");
 715
 716                 suffix = "user.conf.d";
 717         }
 718
 719         (void) config_parse_many(
 720                         (const char* const*) (files ?: STRV_MAKE(PKGSYSCONFDIR "/system.conf")),
 721                         (const char* const*) (dirs ?: CONF_PATHS_STRV("systemd")),
 722                         suffix,
 723                         "Manager\0",
 724                         config_item_table_lookup, items,
 725                         CONFIG_PARSE_WARN,
 726                         NULL,
 727                         NULL);
 728
 729         /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we use
 730          * USEC_INFINITY like everywhere else. */
 731         if (arg_default_timeout_start_usec <= 0)
 732                 arg_default_timeout_start_usec = USEC_INFINITY;
 733         if (arg_default_timeout_stop_usec <= 0)
 734                 arg_default_timeout_stop_usec = USEC_INFINITY;
 735
 736         return 0;
 737 }
 738
 739 static void set_manager_defaults(Manager *m) {
 740
 741         assert(m);
 742
 743         /* Propagates the various default unit property settings into the manager object, i.e. properties that do not
 744          * affect the manager itself, but are just what newly allocated units will have set if they haven't set
 745          * anything else. (Also see set_manager_settings() for the settings that affect the manager's own behaviour) */
 746
 747         m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec;
 748         m->default_std_output = arg_default_std_output;
 749         m->default_std_error = arg_default_std_error;
 750         m->default_timeout_start_usec = arg_default_timeout_start_usec;
 751         m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
 752         m->default_timeout_abort_usec = arg_default_timeout_abort_usec;
 753         m->default_timeout_abort_set = arg_default_timeout_abort_set;
 754         m->default_restart_usec = arg_default_restart_usec;
 755         m->default_start_limit_interval = arg_default_start_limit_interval;
 756         m->default_start_limit_burst = arg_default_start_limit_burst;
 757
 758         /* On 4.15+ with unified hierarchy, CPU accounting is essentially free as it doesn't require the CPU
 759          * controller to be enabled, so the default is to enable it unless we got told otherwise. */
 760         if (arg_default_cpu_accounting >= 0)
 761                 m->default_cpu_accounting = arg_default_cpu_accounting;
 762         else
 763                 m->default_cpu_accounting = cpu_accounting_is_cheap();
 764
 765         m->default_io_accounting = arg_default_io_accounting;
 766         m->default_ip_accounting = arg_default_ip_accounting;
 767         m->default_blockio_accounting = arg_default_blockio_accounting;
 768         m->default_memory_accounting = arg_default_memory_accounting;
 769         m->default_tasks_accounting = arg_default_tasks_accounting;
 770         m->default_tasks_max = arg_default_tasks_max;
 771         m->default_oom_policy = arg_default_oom_policy;
 772
 773         (void) manager_set_default_rlimits(m, arg_default_rlimit);
 774
 775         (void) manager_default_environment(m);
 776         (void) manager_transient_environment_add(m, arg_default_environment);
 777 }
 778
 779 static void set_manager_settings(Manager *m) {
 780
 781         assert(m);
 782
 783         /* Propagates the various manager settings into the manager object, i.e. properties that
 784          * effect the manager itself (as opposed to just being inherited into newly allocated
 785          * units, see set_manager_defaults() above). */
 786
 787         m->confirm_spawn = arg_confirm_spawn;
 788         m->service_watchdogs = arg_service_watchdogs;
 789         m->cad_burst_action = arg_cad_burst_action;
 790
 791         manager_set_watchdog(m, WATCHDOG_RUNTIME, arg_runtime_watchdog);
 792         manager_set_watchdog(m, WATCHDOG_REBOOT, arg_reboot_watchdog);
 793         manager_set_watchdog(m, WATCHDOG_KEXEC, arg_kexec_watchdog);
 794
 795         manager_set_show_status(m, arg_show_status, "commandline");
 796         m->status_unit_format = arg_status_unit_format;
 797 }
 798
 799 static int parse_argv(int argc, char *argv[]) {
 800         enum {
 801                 ARG_LOG_LEVEL = 0x100,
 802                 ARG_LOG_TARGET,
 803                 ARG_LOG_COLOR,
 804                 ARG_LOG_LOCATION,
 805                 ARG_LOG_TIME,
 806                 ARG_UNIT,
 807                 ARG_SYSTEM,
 808                 ARG_USER,
 809                 ARG_TEST,
 810                 ARG_NO_PAGER,
 811                 ARG_VERSION,
 812                 ARG_DUMP_CONFIGURATION_ITEMS,
 813                 ARG_DUMP_BUS_PROPERTIES,
 814                 ARG_BUS_INTROSPECT,
 815                 ARG_DUMP_CORE,
 816                 ARG_CRASH_CHVT,
 817                 ARG_CRASH_SHELL,
 818                 ARG_CRASH_REBOOT,
 819                 ARG_CONFIRM_SPAWN,
 820                 ARG_SHOW_STATUS,
 821                 ARG_DESERIALIZE,
 822                 ARG_SWITCHED_ROOT,
 823                 ARG_DEFAULT_STD_OUTPUT,
 824                 ARG_DEFAULT_STD_ERROR,
 825                 ARG_MACHINE_ID,
 826                 ARG_SERVICE_WATCHDOGS,
 827         };
 828
 829         static const struct option options[] = {
 830                 { "log-level",                required_argument, NULL, ARG_LOG_LEVEL                },
 831                 { "log-target",               required_argument, NULL, ARG_LOG_TARGET               },
 832                 { "log-color",                optional_argument, NULL, ARG_LOG_COLOR                },
 833                 { "log-location",             optional_argument, NULL, ARG_LOG_LOCATION             },
 834                 { "log-time",                 optional_argument, NULL, ARG_LOG_TIME                 },
 835                 { "unit",                     required_argument, NULL, ARG_UNIT                     },
 836                 { "system",                   no_argument,       NULL, ARG_SYSTEM                   },
 837                 { "user",                     no_argument,       NULL, ARG_USER                     },
 838                 { "test",                     no_argument,       NULL, ARG_TEST                     },
 839                 { "no-pager",                 no_argument,       NULL, ARG_NO_PAGER                 },
 840                 { "help",                     no_argument,       NULL, 'h'                          },
 841                 { "version",                  no_argument,       NULL, ARG_VERSION                  },
 842                 { "dump-configuration-items", no_argument,       NULL, ARG_DUMP_CONFIGURATION_ITEMS },
 843                 { "dump-bus-properties",      no_argument,       NULL, ARG_DUMP_BUS_PROPERTIES      },
 844                 { "bus-introspect",           required_argument, NULL, ARG_BUS_INTROSPECT           },
 845                 { "dump-core",                optional_argument, NULL, ARG_DUMP_CORE                },
 846                 { "crash-chvt",               required_argument, NULL, ARG_CRASH_CHVT               },
 847                 { "crash-shell",              optional_argument, NULL, ARG_CRASH_SHELL              },
 848                 { "crash-reboot",             optional_argument, NULL, ARG_CRASH_REBOOT             },
 849                 { "confirm-spawn",            optional_argument, NULL, ARG_CONFIRM_SPAWN            },
 850                 { "show-status",              optional_argument, NULL, ARG_SHOW_STATUS              },
 851                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
 852                 { "switched-root",            no_argument,       NULL, ARG_SWITCHED_ROOT            },
 853                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
 854                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
 855                 { "machine-id",               required_argument, NULL, ARG_MACHINE_ID               },
 856                 { "service-watchdogs",        required_argument, NULL, ARG_SERVICE_WATCHDOGS        },
 857                 {}
 858         };
 859
 860         int c, r;
 861         bool user_arg_seen = false;
 862
 863         assert(argc >= 1);
 864         assert(argv);
 865
 866         if (getpid_cached() == 1)
 867                 opterr = 0;
 868
 869         while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
 870
 871                 switch (c) {
 872
 873                 case ARG_LOG_LEVEL:
 874                         r = log_set_max_level_from_string(optarg);
 875                         if (r < 0)
 876                                 return log_error_errno(r, "Failed to parse log level \"%s\": %m", optarg);
 877
 878                         break;
 879
 880                 case ARG_LOG_TARGET:
 881                         r = log_set_target_from_string(optarg);
 882                         if (r < 0)
 883                                 return log_error_errno(r, "Failed to parse log target \"%s\": %m", optarg);
 884
 885                         break;
 886
 887                 case ARG_LOG_COLOR:
 888
 889                         if (optarg) {
 890                                 r = log_show_color_from_string(optarg);
 891                                 if (r < 0)
 892                                         return log_error_errno(r, "Failed to parse log color setting \"%s\": %m",
 893                                                                optarg);
 894                         } else
 895                                 log_show_color(true);
 896
 897                         break;
 898
 899                 case ARG_LOG_LOCATION:
 900                         if (optarg) {
 901                                 r = log_show_location_from_string(optarg);
 902                                 if (r < 0)
 903                                         return log_error_errno(r, "Failed to parse log location setting \"%s\": %m",
 904                                                                optarg);
 905                         } else
 906                                 log_show_location(true);
 907
 908                         break;
 909
 910                 case ARG_LOG_TIME:
 911
 912                         if (optarg) {
 913                                 r = log_show_time_from_string(optarg);
 914                                 if (r < 0)
 915                                         return log_error_errno(r, "Failed to parse log time setting \"%s\": %m",
 916                                                                optarg);
 917                         } else
 918                                 log_show_time(true);
 919
 920                         break;
 921
 922                 case ARG_DEFAULT_STD_OUTPUT:
 923                         r = exec_output_from_string(optarg);
 924                         if (r < 0)
 925                                 return log_error_errno(r, "Failed to parse default standard output setting \"%s\": %m",
 926                                                        optarg);
 927                         arg_default_std_output = r;
 928                         break;
 929
 930                 case ARG_DEFAULT_STD_ERROR:
 931                         r = exec_output_from_string(optarg);
 932                         if (r < 0)
 933                                 return log_error_errno(r, "Failed to parse default standard error output setting \"%s\": %m",
 934                                                        optarg);
 935                         arg_default_std_error = r;
 936                         break;
 937
 938                 case ARG_UNIT:
 939                         r = free_and_strdup(&arg_default_unit, optarg);
 940                         if (r < 0)
 941                                 return log_error_errno(r, "Failed to set default unit \"%s\": %m", optarg);
 942
 943                         break;
 944
 945                 case ARG_SYSTEM:
 946                         arg_system = true;
 947                         break;
 948
 949                 case ARG_USER:
 950                         arg_system = false;
 951                         user_arg_seen = true;
 952                         break;
 953
 954                 case ARG_TEST:
 955                         arg_action = ACTION_TEST;
 956                         break;
 957
 958                 case ARG_NO_PAGER:
 959                         arg_pager_flags |= PAGER_DISABLE;
 960                         break;
 961
 962                 case ARG_VERSION:
 963                         arg_action = ACTION_VERSION;
 964                         break;
 965
 966                 case ARG_DUMP_CONFIGURATION_ITEMS:
 967                         arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
 968                         break;
 969
 970                 case ARG_DUMP_BUS_PROPERTIES:
 971                         arg_action = ACTION_DUMP_BUS_PROPERTIES;
 972                         break;
 973
 974                 case ARG_BUS_INTROSPECT:
 975                         arg_bus_introspect = optarg;
 976                         arg_action = ACTION_BUS_INTROSPECT;
 977                         break;
 978
 979                 case ARG_DUMP_CORE:
 980                         r = parse_boolean_argument("--dump-core", optarg, &arg_dump_core);
 981                         if (r < 0)
 982                                 return r;
 983                         break;
 984
 985                 case ARG_CRASH_CHVT:
 986                         r = parse_crash_chvt(optarg, &arg_crash_chvt);
 987                         if (r < 0)
 988                                 return log_error_errno(r, "Failed to parse crash virtual terminal index: \"%s\": %m",
 989                                                        optarg);
 990                         break;
 991
 992                 case ARG_CRASH_SHELL:
 993                         r = parse_boolean_argument("--crash-shell", optarg, &arg_crash_shell);
 994                         if (r < 0)
 995                                 return r;
 996                         break;
 997
 998                 case ARG_CRASH_REBOOT:
 999                         r = parse_boolean_argument("--crash-reboot", optarg, &arg_crash_reboot);
1000                         if (r < 0)
1001                                 return r;
1002                         break;
1003
1004                 case ARG_CONFIRM_SPAWN:
1005                         arg_confirm_spawn = mfree(arg_confirm_spawn);
1006
1007                         r = parse_confirm_spawn(optarg, &arg_confirm_spawn);
1008                         if (r < 0)
1009                                 return log_error_errno(r, "Failed to parse confirm spawn option: \"%s\": %m",
1010                                                        optarg);
1011                         break;
1012
1013                 case ARG_SERVICE_WATCHDOGS:
1014                         r = parse_boolean_argument("--service-watchdogs=", optarg, &arg_service_watchdogs);
1015                         if (r < 0)
1016                                 return r;
1017                         break;
1018
1019                 case ARG_SHOW_STATUS:
1020                         if (optarg) {
1021                                 r = parse_show_status(optarg, &arg_show_status);
1022                                 if (r < 0)
1023                                         return log_error_errno(r, "Failed to parse show status boolean: \"%s\": %m",
1024                                                                optarg);
1025                         } else
1026                                 arg_show_status = SHOW_STATUS_YES;
1027                         break;
1028
1029                 case ARG_DESERIALIZE: {
1030                         int fd;
1031                         FILE *f;
1032
1033                         r = safe_atoi(optarg, &fd);
1034                         if (r < 0)
1035                                 log_error_errno(r, "Failed to parse deserialize option \"%s\": %m", optarg);
1036                         if (fd < 0)
1037                                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1038                                                        "Invalid deserialize fd: %d",
1039                                                        fd);
1040
1041                         (void) fd_cloexec(fd, true);
1042
1043                         f = fdopen(fd, "r");
1044                         if (!f)
1045                                 return log_error_errno(errno, "Failed to open serialization fd %d: %m", fd);
1046
1047                         safe_fclose(arg_serialization);
1048                         arg_serialization = f;
1049
1050                         break;
1051                 }
1052
1053                 case ARG_SWITCHED_ROOT:
1054                         arg_switched_root = true;
1055                         break;
1056
1057                 case ARG_MACHINE_ID:
1058                         r = set_machine_id(optarg);
1059                         if (r < 0)
1060                                 return log_error_errno(r, "MachineID '%s' is not valid: %m", optarg);
1061                         break;
1062
1063                 case 'h':
1064                         arg_action = ACTION_HELP;
1065                         break;
1066
1067                 case 'D':
1068                         log_set_max_level(LOG_DEBUG);
1069                         break;
1070
1071                 case 'b':
1072                 case 's':
1073                 case 'z':
1074                         /* Just to eat away the sysvinit kernel cmdline args that we'll parse in
1075                          * parse_proc_cmdline_item() or ignore, without any getopt() error messages.
1076                          */
1077                 case '?':
1078                         if (getpid_cached() != 1)
1079                                 return -EINVAL;
1080                         else
1081                                 return 0;
1082
1083                 default:
1084                         assert_not_reached();
1085                 }
1086
1087         if (optind < argc && getpid_cached() != 1)
1088                 /* Hmm, when we aren't run as init system let's complain about excess arguments */
1089                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Excess arguments.");
1090
1091         if (arg_action == ACTION_RUN && !arg_system && !user_arg_seen)
1092                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1093                                        "Explicit --user argument required to run as user manager.");
1094
1095         return 0;
1096 }
1097
1098 static int help(void) {
1099         _cleanup_free_ char *link = NULL;
1100         int r;
1101
1102         r = terminal_urlify_man("systemd", "1", &link);
1103         if (r < 0)
1104                 return log_oom();
1105
1106         printf("%s [OPTIONS...]\n\n"
1107                "%sStarts and monitors system and user services.%s\n\n"
1108                "This program takes no positional arguments.\n\n"
1109                "%sOptions%s:\n"
1110                "  -h --help                      Show this help\n"
1111                "     --version                   Show version\n"
1112                "     --test                      Determine initial transaction, dump it and exit\n"
1113                "     --system                    In combination with --test: operate as system service manager\n"
1114                "     --user                      In combination with --test: operate as per-user service manager\n"
1115                "     --no-pager                  Do not pipe output into a pager\n"
1116                "     --dump-configuration-items  Dump understood unit configuration items\n"
1117                "     --dump-bus-properties       Dump exposed bus properties\n"
1118                "     --bus-introspect=PATH       Write XML introspection data\n"
1119                "     --unit=UNIT                 Set default unit\n"
1120                "     --dump-core[=BOOL]          Dump core on crash\n"
1121                "     --crash-vt=NR               Change to specified VT on crash\n"
1122                "     --crash-reboot[=BOOL]       Reboot on crash\n"
1123                "     --crash-shell[=BOOL]        Run shell on crash\n"
1124                "     --confirm-spawn[=BOOL]      Ask for confirmation when spawning processes\n"
1125                "     --show-status[=BOOL]        Show status updates on the console during bootup\n"
1126                "     --log-target=TARGET         Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
1127                "     --log-level=LEVEL           Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1128                "     --log-color[=BOOL]          Highlight important log messages\n"
1129                "     --log-location[=BOOL]       Include code location in log messages\n"
1130                "     --log-time[=BOOL]           Prefix log messages with current time\n"
1131                "     --default-standard-output=  Set default standard output for services\n"
1132                "     --default-standard-error=   Set default standard error output for services\n"
1133                "\nSee the %s for details.\n",
1134                program_invocation_short_name,
1135                ansi_highlight(),
1136                ansi_normal(),
1137                ansi_underline(),
1138                ansi_normal(),
1139                link);
1140
1141         return 0;
1142 }
1143
1144 static int prepare_reexecute(
1145                 Manager *m,
1146                 FILE **ret_f,
1147                 FDSet **ret_fds,
1148                 bool switching_root) {
1149
1150         _cleanup_fdset_free_ FDSet *fds = NULL;
1151         _cleanup_fclose_ FILE *f = NULL;
1152         int r;
1153
1154         assert(m);
1155         assert(ret_f);
1156         assert(ret_fds);
1157
1158         r = manager_open_serialization(m, &f);
1159         if (r < 0)
1160                 return log_error_errno(r, "Failed to create serialization file: %m");
1161
1162         /* Make sure nothing is really destructed when we shut down */
1163         m->n_reloading++;
1164         bus_manager_send_reloading(m, true);
1165
1166         fds = fdset_new();
1167         if (!fds)
1168                 return log_oom();
1169
1170         r = manager_serialize(m, f, fds, switching_root);
1171         if (r < 0)
1172                 return r;
1173
1174         if (fseeko(f, 0, SEEK_SET) == (off_t) -1)
1175                 return log_error_errno(errno, "Failed to rewind serialization fd: %m");
1176
1177         r = fd_cloexec(fileno(f), false);
1178         if (r < 0)
1179                 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
1180
1181         r = fdset_cloexec(fds, false);
1182         if (r < 0)
1183                 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
1184
1185         *ret_f = TAKE_PTR(f);
1186         *ret_fds = TAKE_PTR(fds);
1187
1188         return 0;
1189 }
1190
1191 static void bump_file_max_and_nr_open(void) {
1192
1193         /* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large
1194          * numbers of file descriptors are no longer a performance problem and their memory is properly
1195          * tracked by memcg, thus counting them and limiting them in another two layers of limits is
1196          * unnecessary and just complicates things. This function hence turns off 2 of the 4 levels of limits
1197          * on file descriptors, and makes RLIMIT_NOLIMIT (soft + hard) the only ones that really matter. */
1198
1199 #if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN
1200         int r;
1201 #endif
1202
1203 #if BUMP_PROC_SYS_FS_FILE_MAX
1204         /* The maximum the kernel allows for this since 5.2 is LONG_MAX, use that. (Previously things were
1205          * different, but the operation would fail silently.) */
1206         r = sysctl_writef("fs/file-max", "%li\n", LONG_MAX);
1207         if (r < 0)
1208                 log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.file-max, ignoring: %m");
1209 #endif
1210
1211 #if BUMP_PROC_SYS_FS_NR_OPEN
1212         int v = INT_MAX;
1213
1214         /* Argh! The kernel enforces maximum and minimum values on the fs.nr_open, but we don't really know
1215          * what they are. The expression by which the maximum is determined is dependent on the architecture,
1216          * and is something we don't really want to copy to userspace, as it is dependent on implementation
1217          * details of the kernel. Since the kernel doesn't expose the maximum value to us, we can only try
1218          * and hope. Hence, let's start with INT_MAX, and then keep halving the value until we find one that
1219          * works. Ugly? Yes, absolutely, but kernel APIs are kernel APIs, so what do can we do... 🤯 */
1220
1221         for (;;) {
1222                 int k;
1223
1224                 v &= ~(__SIZEOF_POINTER__ - 1); /* Round down to next multiple of the pointer size */
1225                 if (v < 1024) {
1226                         log_warning("Can't bump fs.nr_open, value too small.");
1227                         break;
1228                 }
1229
1230                 k = read_nr_open();
1231                 if (k < 0) {
1232                         log_error_errno(k, "Failed to read fs.nr_open: %m");
1233                         break;
1234                 }
1235                 if (k >= v) { /* Already larger */
1236                         log_debug("Skipping bump, value is already larger.");
1237                         break;
1238                 }
1239
1240                 r = sysctl_writef("fs/nr_open", "%i\n", v);
1241                 if (r == -EINVAL) {
1242                         log_debug("Couldn't write fs.nr_open as %i, halving it.", v);
1243                         v /= 2;
1244                         continue;
1245                 }
1246                 if (r < 0) {
1247                         log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.nr_open, ignoring: %m");
1248                         break;
1249                 }
1250
1251                 log_debug("Successfully bumped fs.nr_open to %i", v);
1252                 break;
1253         }
1254 #endif
1255 }
1256
1257 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1258         struct rlimit new_rlimit;
1259         int r, nr;
1260
1261         /* Get the underlying absolute limit the kernel enforces */
1262         nr = read_nr_open();
1263
1264         /* Calculate the new limits to use for us. Never lower from what we inherited. */
1265         new_rlimit = (struct rlimit) {
1266                 .rlim_cur = MAX((rlim_t) nr, saved_rlimit->rlim_cur),
1267                 .rlim_max = MAX((rlim_t) nr, saved_rlimit->rlim_max),
1268         };
1269
1270         /* Shortcut if nothing changes. */
1271         if (saved_rlimit->rlim_max >= new_rlimit.rlim_max &&
1272             saved_rlimit->rlim_cur >= new_rlimit.rlim_cur) {
1273                 log_debug("RLIMIT_NOFILE is already as high or higher than we need it, not bumping.");
1274                 return 0;
1275         }
1276
1277         /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows, for
1278          * both hard and soft. */
1279         r = setrlimit_closest(RLIMIT_NOFILE, &new_rlimit);
1280         if (r < 0)
1281                 return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
1282
1283         return 0;
1284 }
1285
1286 static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
1287         struct rlimit new_rlimit;
1288         uint64_t mm;
1289         int r;
1290
1291         /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even if we have CAP_IPC_LOCK
1292          * which should normally disable such checks. We need them to implement IPAddressAllow= and
1293          * IPAddressDeny=, hence let's bump the value high enough for our user. */
1294
1295         /* Using MAX() on resource limits only is safe if RLIM_INFINITY is > 0. POSIX declares that rlim_t
1296          * must be unsigned, hence this is a given, but let's make this clear here. */
1297         assert_cc(RLIM_INFINITY > 0);
1298
1299         mm = physical_memory_scale(1, 8); /* Let's scale how much we allow to be locked by the amount of
1300                                            * physical RAM. We allow an eighth to be locked by us, just to
1301                                            * pick a value. */
1302
1303         new_rlimit = (struct rlimit) {
1304                 .rlim_cur = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_cur, mm),
1305                 .rlim_max = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_max, mm),
1306         };
1307
1308         if (saved_rlimit->rlim_max >= new_rlimit.rlim_cur &&
1309             saved_rlimit->rlim_cur >= new_rlimit.rlim_max) {
1310                 log_debug("RLIMIT_MEMLOCK is already as high or higher than we need it, not bumping.");
1311                 return 0;
1312         }
1313
1314         r = setrlimit_closest(RLIMIT_MEMLOCK, &new_rlimit);
1315         if (r < 0)
1316                 return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
1317
1318         return 0;
1319 }
1320
1321 static void test_usr(void) {
1322
1323         /* Check that /usr is either on the same file system as / or mounted already. */
1324
1325         if (dir_is_empty("/usr") <= 0)
1326                 return;
1327
1328         log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
1329                     "Some things will probably break (sometimes even silently) in mysterious ways. "
1330                     "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1331 }
1332
1333 static int enforce_syscall_archs(Set *archs) {
1334 #if HAVE_SECCOMP
1335         int r;
1336
1337         if (!is_seccomp_available())
1338                 return 0;
1339
1340         r = seccomp_restrict_archs(arg_syscall_archs);
1341         if (r < 0)
1342                 return log_error_errno(r, "Failed to enforce system call architecture restrication: %m");
1343 #endif
1344         return 0;
1345 }
1346
1347 static int status_welcome(void) {
1348         _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
1349         int r;
1350
1351         if (!show_status_on(arg_show_status))
1352                 return 0;
1353
1354         r = parse_os_release(NULL,
1355                              "PRETTY_NAME", &pretty_name,
1356                              "ANSI_COLOR", &ansi_color);
1357         if (r < 0)
1358                 log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
1359                                "Failed to read os-release file, ignoring: %m");
1360
1361         if (log_get_show_color())
1362                 return status_printf(NULL, 0,
1363                                      "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
1364                                      isempty(ansi_color) ? "1" : ansi_color,
1365                                      isempty(pretty_name) ? "Linux" : pretty_name);
1366         else
1367                 return status_printf(NULL, 0,
1368                                      "\nWelcome to %s!\n",
1369                                      isempty(pretty_name) ? "Linux" : pretty_name);
1370 }
1371
1372 static int write_container_id(void) {
1373         const char *c;
1374         int r = 0;  /* avoid false maybe-uninitialized warning */
1375
1376         c = getenv("container");
1377         if (isempty(c))
1378                 return 0;
1379
1380         RUN_WITH_UMASK(0022)
1381                 r = write_string_file("/run/systemd/container", c, WRITE_STRING_FILE_CREATE);
1382         if (r < 0)
1383                 return log_warning_errno(r, "Failed to write /run/systemd/container, ignoring: %m");
1384
1385         return 1;
1386 }
1387
1388 static int bump_unix_max_dgram_qlen(void) {
1389         _cleanup_free_ char *qlen = NULL;
1390         unsigned long v;
1391         int r;
1392
1393         /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel default of 16 is simply too low. We set
1394          * the value really really early during boot, so that it is actually applied to all our sockets,
1395          * including the $NOTIFY_SOCKET one. */
1396
1397         r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen);
1398         if (r < 0)
1399                 return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
1400                                       "Failed to read AF_UNIX datagram queue length, ignoring: %m");
1401
1402         r = safe_atolu(qlen, &v);
1403         if (r < 0)
1404                 return log_warning_errno(r, "Failed to parse AF_UNIX datagram queue length '%s', ignoring: %m", qlen);
1405
1406         if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN)
1407                 return 0;
1408
1409         r = write_string_filef("/proc/sys/net/unix/max_dgram_qlen", WRITE_STRING_FILE_DISABLE_BUFFER,
1410                                "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN);
1411         if (r < 0)
1412                 return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
1413                                       "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
1414
1415         return 1;
1416 }
1417
1418 static int fixup_environment(void) {
1419         _cleanup_free_ char *term = NULL;
1420         const char *t;
1421         int r;
1422
1423         /* Only fix up the environment when we are started as PID 1 */
1424         if (getpid_cached() != 1)
1425                 return 0;
1426
1427         /* We expect the environment to be set correctly if run inside a container. */
1428         if (detect_container() > 0)
1429                 return 0;
1430
1431         /* When started as PID1, the kernel uses /dev/console for our stdios and uses TERM=linux whatever the
1432          * backend device used by the console. We try to make a better guess here since some consoles might
1433          * not have support for color mode for example.
1434          *
1435          * However if TERM was configured through the kernel command line then leave it alone. */
1436         r = proc_cmdline_get_key("TERM", 0, &term);
1437         if (r < 0)
1438                 return r;
1439
1440         t = term ?: default_term_for_tty("/dev/console");
1441
1442         if (setenv("TERM", t, 1) < 0)
1443                 return -errno;
1444
1445         /* The kernels sets HOME=/ for init. Let's undo this. */
1446         if (path_equal_ptr(getenv("HOME"), "/"))
1447                 assert_se(unsetenv("HOME") == 0);
1448
1449         return 0;
1450 }
1451
1452 static void redirect_telinit(int argc, char *argv[]) {
1453
1454         /* This is compatibility support for SysV, where calling init as a user is identical to telinit. */
1455
1456 #if HAVE_SYSV_COMPAT
1457         if (getpid_cached() == 1)
1458                 return;
1459
1460         if (!invoked_as(argv, "init"))
1461                 return;
1462
1463         execv(SYSTEMCTL_BINARY_PATH, argv);
1464         log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1465         exit(EXIT_FAILURE);
1466 #endif
1467 }
1468
1469 static int become_shutdown(
1470                 const char *shutdown_verb,
1471                 int retval) {
1472
1473         char log_level[DECIMAL_STR_MAX(int) + 1],
1474                 exit_code[DECIMAL_STR_MAX(uint8_t) + 1],
1475                 timeout[DECIMAL_STR_MAX(usec_t) + 1];
1476
1477         const char* command_line[13] = {
1478                 SYSTEMD_SHUTDOWN_BINARY_PATH,
1479                 shutdown_verb,
1480                 "--timeout", timeout,
1481                 "--log-level", log_level,
1482                 "--log-target",
1483         };
1484
1485         _cleanup_strv_free_ char **env_block = NULL;
1486         size_t pos = 7;
1487         int r;
1488         usec_t watchdog_timer = 0;
1489
1490         assert(shutdown_verb);
1491         assert(!command_line[pos]);
1492         env_block = strv_copy(environ);
1493
1494         xsprintf(log_level, "%d", log_get_max_level());
1495         xsprintf(timeout, "%" PRI_USEC "us", arg_default_timeout_stop_usec);
1496
1497         switch (log_get_target()) {
1498
1499         case LOG_TARGET_KMSG:
1500         case LOG_TARGET_JOURNAL_OR_KMSG:
1501         case LOG_TARGET_SYSLOG_OR_KMSG:
1502                 command_line[pos++] = "kmsg";
1503                 break;
1504
1505         case LOG_TARGET_NULL:
1506                 command_line[pos++] = "null";
1507                 break;
1508
1509         case LOG_TARGET_CONSOLE:
1510         default:
1511                 command_line[pos++] = "console";
1512                 break;
1513         };
1514
1515         if (log_get_show_color())
1516                 command_line[pos++] = "--log-color";
1517
1518         if (log_get_show_location())
1519                 command_line[pos++] = "--log-location";
1520
1521         if (log_get_show_time())
1522                 command_line[pos++] = "--log-time";
1523
1524         if (streq(shutdown_verb, "exit")) {
1525                 command_line[pos++] = "--exit-code";
1526                 command_line[pos++] = exit_code;
1527                 xsprintf(exit_code, "%d", retval);
1528         }
1529
1530         assert(pos < ELEMENTSOF(command_line));
1531
1532         if (streq(shutdown_verb, "reboot"))
1533                 watchdog_timer = arg_reboot_watchdog;
1534         else if (streq(shutdown_verb, "kexec"))
1535                 watchdog_timer = arg_kexec_watchdog;
1536
1537         if (watchdog_timer > 0 && watchdog_timer != USEC_INFINITY) {
1538
1539                 char *e;
1540
1541                 /* If we reboot or kexec let's set the shutdown
1542                  * watchdog and tell the shutdown binary to
1543                  * repeatedly ping it */
1544                 r = watchdog_set_timeout(&watchdog_timer);
1545                 watchdog_close(r < 0);
1546
1547                 /* Tell the binary how often to ping, ignore failure */
1548                 if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, watchdog_timer) > 0)
1549                         (void) strv_consume(&env_block, e);
1550
1551                 if (arg_watchdog_device &&
1552                     asprintf(&e, "WATCHDOG_DEVICE=%s", arg_watchdog_device) > 0)
1553                         (void) strv_consume(&env_block, e);
1554         } else
1555                 watchdog_close(true);
1556
1557         /* Avoid the creation of new processes forked by the
1558          * kernel; at this point, we will not listen to the
1559          * signals anyway */
1560         if (detect_container() <= 0)
1561                 (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1562
1563         execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1564         return -errno;
1565 }
1566
1567 static void initialize_clock(void) {
1568         int r;
1569
1570         /* This is called very early on, before we parse the kernel command line or otherwise figure out why
1571          * we are running, but only once. */
1572
1573         if (clock_is_localtime(NULL) > 0) {
1574                 int min;
1575
1576                 /* The very first call of settimeofday() also does a time warp in the kernel.
1577                  *
1578                  * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to
1579                  * take care of maintaining the RTC and do all adjustments.  This matches the behavior of
1580                  * Windows, which leaves the RTC alone if the registry tells that the RTC runs in UTC.
1581                  */
1582                 r = clock_set_timezone(&min);
1583                 if (r < 0)
1584                         log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
1585                 else
1586                         log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1587
1588         } else if (!in_initrd())
1589                 /*
1590                  * Do a dummy very first call to seal the kernel's time warp magic.
1591                  *
1592                  * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with
1593                  * LOCAL, but the real system could be set up that way. In such case, we need to delay the
1594                  * time-warp or the sealing until we reach the real system.
1595                  *
1596                  * Do no set the kernel's timezone. The concept of local time cannot be supported reliably,
1597                  * the time will jump or be incorrect at every daylight saving time change. All kernel local
1598                  * time concepts will be treated as UTC that way.
1599                  */
1600                 (void) clock_reset_timewarp();
1601
1602         ClockChangeDirection change_dir;
1603         r = clock_apply_epoch(&change_dir);
1604         if (r > 0 && change_dir == CLOCK_CHANGE_FORWARD)
1605                 log_info("System time before build time, advancing clock.");
1606         else if (r > 0 && change_dir == CLOCK_CHANGE_BACKWARD)
1607                 log_info("System time is further ahead than %s after build time, resetting clock to build time.",
1608                          FORMAT_TIMESPAN(CLOCK_VALID_RANGE_USEC_MAX, USEC_PER_DAY));
1609         else if (r < 0 && change_dir == CLOCK_CHANGE_FORWARD)
1610                 log_error_errno(r, "Current system time is before build time, but cannot correct: %m");
1611         else if (r < 0 && change_dir == CLOCK_CHANGE_BACKWARD)
1612                 log_error_errno(r, "Current system time is further ahead %s after build time, but cannot correct: %m",
1613                                 FORMAT_TIMESPAN(CLOCK_VALID_RANGE_USEC_MAX, USEC_PER_DAY));
1614 }
1615
1616 static void apply_clock_update(void) {
1617         struct timespec ts;
1618
1619         /* This is called later than initialize_clock(), i.e. after we parsed configuration files/kernel
1620          * command line and such. */
1621
1622         if (arg_clock_usec == 0)
1623                 return;
1624
1625         if (getpid_cached() != 1)
1626                 return;
1627
1628         if (clock_settime(CLOCK_REALTIME, timespec_store(&ts, arg_clock_usec)) < 0)
1629                 log_error_errno(errno, "Failed to set system clock to time specified on kernel command line: %m");
1630         else
1631                 log_info("Set system clock to %s, as specified on the kernel command line.",
1632                          FORMAT_TIMESTAMP(arg_clock_usec));
1633 }
1634
1635 static void cmdline_take_random_seed(void) {
1636         size_t suggested;
1637         int r;
1638
1639         if (arg_random_seed_size == 0)
1640                 return;
1641
1642         if (getpid_cached() != 1)
1643                 return;
1644
1645         assert(arg_random_seed);
1646         suggested = random_pool_size();
1647
1648         if (arg_random_seed_size < suggested)
1649                 log_warning("Random seed specified on kernel command line has size %zu, but %zu bytes required to fill entropy pool.",
1650                             arg_random_seed_size, suggested);
1651
1652         r = random_write_entropy(-1, arg_random_seed, arg_random_seed_size, true);
1653         if (r < 0) {
1654                 log_warning_errno(r, "Failed to credit entropy specified on kernel command line, ignoring: %m");
1655                 return;
1656         }
1657
1658         log_notice("Successfully credited entropy passed on kernel command line.\n"
1659                    "Note that the seed provided this way is accessible to unprivileged programs. "
1660                    "This functionality should not be used outside of testing environments.");
1661 }
1662
1663 static void initialize_coredump(bool skip_setup) {
1664 #if ENABLE_COREDUMP
1665         if (getpid_cached() != 1)
1666                 return;
1667
1668         /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour
1669          * the limit) will process core dumps for system services by default. */
1670         if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
1671                 log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
1672
1673         /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored
1674          * until the systemd-coredump tool is enabled via sysctl. However it can be changed via the kernel
1675          * command line later so core dumps can still be generated during early startup and in initramfs. */
1676         if (!skip_setup)
1677                 disable_coredumps();
1678 #endif
1679 }
1680
1681 static void initialize_core_pattern(bool skip_setup) {
1682         int r;
1683
1684         if (skip_setup || !arg_early_core_pattern)
1685                 return;
1686
1687         if (getpid_cached() != 1)
1688                 return;
1689
1690         r = write_string_file("/proc/sys/kernel/core_pattern", arg_early_core_pattern, WRITE_STRING_FILE_DISABLE_BUFFER);
1691         if (r < 0)
1692                 log_warning_errno(r, "Failed to write '%s' to /proc/sys/kernel/core_pattern, ignoring: %m",
1693                                   arg_early_core_pattern);
1694 }
1695
1696 static void update_cpu_affinity(bool skip_setup) {
1697         _cleanup_free_ char *mask = NULL;
1698
1699         if (skip_setup || !arg_cpu_affinity.set)
1700                 return;
1701
1702         assert(arg_cpu_affinity.allocated > 0);
1703
1704         mask = cpu_set_to_string(&arg_cpu_affinity);
1705         log_debug("Setting CPU affinity to %s.", strnull(mask));
1706
1707         if (sched_setaffinity(0, arg_cpu_affinity.allocated, arg_cpu_affinity.set) < 0)
1708                 log_warning_errno(errno, "Failed to set CPU affinity: %m");
1709 }
1710
1711 static void update_numa_policy(bool skip_setup) {
1712         int r;
1713         _cleanup_free_ char *nodes = NULL;
1714         const char * policy = NULL;
1715
1716         if (skip_setup || !mpol_is_valid(numa_policy_get_type(&arg_numa_policy)))
1717                 return;
1718
1719         if (DEBUG_LOGGING) {
1720                 policy = mpol_to_string(numa_policy_get_type(&arg_numa_policy));
1721                 nodes = cpu_set_to_range_string(&arg_numa_policy.nodes);
1722                 log_debug("Setting NUMA policy to %s, with nodes %s.", strnull(policy), strnull(nodes));
1723         }
1724
1725         r = apply_numa_policy(&arg_numa_policy);
1726         if (r == -EOPNOTSUPP)
1727                 log_debug_errno(r, "NUMA support not available, ignoring.");
1728         else if (r < 0)
1729                 log_warning_errno(r, "Failed to set NUMA memory policy: %m");
1730 }
1731
1732 static void filter_args(
1733                 const char* dst[],
1734                 size_t *dst_index,
1735                 char **src,
1736                 int argc) {
1737
1738         assert(dst);
1739         assert(dst_index);
1740
1741         /* Copy some filtered arguments into the dst array from src. */
1742         for (int i = 1; i < argc; i++) {
1743                 if (STR_IN_SET(src[i],
1744                                "--switched-root",
1745                                "--system",
1746                                "--user"))
1747                         continue;
1748
1749                 if (startswith(src[i], "--deserialize="))
1750                         continue;
1751                 if (streq(src[i], "--deserialize")) {
1752                         i++;                            /* Skip the argument too */
1753                         continue;
1754                 }
1755
1756                 /* Skip target unit designators. We already acted upon this information and have queued
1757                  * appropriate jobs. We don't want to redo all this after reexecution. */
1758                 if (startswith(src[i], "--unit="))
1759                         continue;
1760                 if (streq(src[i], "--unit")) {
1761                         i++;                            /* Skip the argument too */
1762                         continue;
1763                 }
1764
1765                 if (startswith(src[i],
1766                                in_initrd() ? "rd.systemd.unit=" : "systemd.unit="))
1767                         continue;
1768
1769                 if (runlevel_to_target(src[i]))
1770                         continue;
1771
1772                 /* Seems we have a good old option. Let's pass it over to the new instance. */
1773                 dst[(*dst_index)++] = src[i];
1774         }
1775 }
1776
1777 static void do_reexecute(
1778                 int argc,
1779                 char* argv[],
1780                 const struct rlimit *saved_rlimit_nofile,
1781                 const struct rlimit *saved_rlimit_memlock,
1782                 FDSet *fds,
1783                 const char *switch_root_dir,
1784                 const char *switch_root_init,
1785                 const char **ret_error_message) {
1786
1787         size_t i, args_size;
1788         const char **args;
1789         int r;
1790
1791         assert(argc >= 0);
1792         assert(saved_rlimit_nofile);
1793         assert(saved_rlimit_memlock);
1794         assert(ret_error_message);
1795
1796         /* Close and disarm the watchdog, so that the new instance can reinitialize it, but doesn't get
1797          * rebooted while we do that */
1798         watchdog_close(true);
1799
1800         /* Reset RLIMIT_NOFILE + RLIMIT_MEMLOCK back to the kernel defaults, so that the new systemd can pass
1801          * the kernel default to its child processes */
1802         if (saved_rlimit_nofile->rlim_cur != 0)
1803                 (void) setrlimit(RLIMIT_NOFILE, saved_rlimit_nofile);
1804         if (saved_rlimit_memlock->rlim_cur != RLIM_INFINITY)
1805                 (void) setrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock);
1806
1807         if (switch_root_dir) {
1808                 /* Kill all remaining processes from the initrd, but don't wait for them, so that we can
1809                  * handle the SIGCHLD for them after deserializing. */
1810                 broadcast_signal(SIGTERM, false, true, arg_default_timeout_stop_usec);
1811
1812                 /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
1813                 r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE);
1814                 if (r < 0)
1815                         log_error_errno(r, "Failed to switch root, trying to continue: %m");
1816         }
1817
1818         args_size = argc + 6;
1819         args = newa(const char*, args_size);
1820
1821         if (!switch_root_init) {
1822                 char sfd[DECIMAL_STR_MAX(int)];
1823
1824                 /* First try to spawn ourselves with the right path, and with full serialization. We do this
1825                  * only if the user didn't specify an explicit init to spawn. */
1826
1827                 assert(arg_serialization);
1828                 assert(fds);
1829
1830                 xsprintf(sfd, "%i", fileno(arg_serialization));
1831
1832                 i = 1;         /* Leave args[0] empty for now. */
1833                 filter_args(args, &i, argv, argc);
1834
1835                 if (switch_root_dir)
1836                         args[i++] = "--switched-root";
1837                 args[i++] = arg_system ? "--system" : "--user";
1838                 args[i++] = "--deserialize";
1839                 args[i++] = sfd;
1840                 args[i++] = NULL;
1841
1842                 assert(i <= args_size);
1843
1844                 /*
1845                  * We want valgrind to print its memory usage summary before reexecution.  Valgrind won't do
1846                  * this is on its own on exec(), but it will do it on exit().  Hence, to ensure we get a
1847                  * summary here, fork() off a child, let it exit() cleanly, so that it prints the summary,
1848                  * and wait() for it in the parent, before proceeding into the exec().
1849                  */
1850                 valgrind_summary_hack();
1851
1852                 args[0] = SYSTEMD_BINARY_PATH;
1853                 (void) execv(args[0], (char* const*) args);
1854                 log_debug_errno(errno, "Failed to execute our own binary %s, trying fallback: %m", args[0]);
1855         }
1856
1857         /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and
1858          * envp[]. (Well, modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[],
1859          * but let's hope that doesn't matter.) */
1860
1861         arg_serialization = safe_fclose(arg_serialization);
1862         fds = fdset_free(fds);
1863
1864         /* Reopen the console */
1865         (void) make_console_stdio();
1866
1867         i = 1;         /* Leave args[0] empty for now. */
1868         for (int j = 1; j <= argc; j++)
1869                 args[i++] = argv[j];
1870         assert(i <= args_size);
1871
1872         /* Re-enable any blocked signals, especially important if we switch from initial ramdisk to init=... */
1873         (void) reset_all_signal_handlers();
1874         (void) reset_signal_mask();
1875         (void) rlimit_nofile_safe();
1876
1877         if (switch_root_init) {
1878                 args[0] = switch_root_init;
1879                 (void) execve(args[0], (char* const*) args, saved_env);
1880                 log_warning_errno(errno, "Failed to execute configured init %s, trying fallback: %m", args[0]);
1881         }
1882
1883         args[0] = "/sbin/init";
1884         (void) execv(args[0], (char* const*) args);
1885         r = -errno;
1886
1887         manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
1888                               ANSI_HIGHLIGHT_RED "  !!  " ANSI_NORMAL,
1889                               "Failed to execute /sbin/init");
1890
1891         if (r == -ENOENT) {
1892                 log_warning("No /sbin/init, trying fallback");
1893
1894                 args[0] = "/bin/sh";
1895                 args[1] = NULL;
1896                 (void) execve(args[0], (char* const*) args, saved_env);
1897                 log_error_errno(errno, "Failed to execute /bin/sh, giving up: %m");
1898         } else
1899                 log_warning_errno(r, "Failed to execute /sbin/init, giving up: %m");
1900
1901         *ret_error_message = "Failed to execute fallback shell";
1902 }
1903
1904 static int invoke_main_loop(
1905                 Manager *m,
1906                 const struct rlimit *saved_rlimit_nofile,
1907                 const struct rlimit *saved_rlimit_memlock,
1908                 bool *ret_reexecute,
1909                 int *ret_retval,                   /* Return parameters relevant for shutting down */
1910                 const char **ret_shutdown_verb,    /* … */
1911                 FDSet **ret_fds,                   /* Return parameters for reexecuting */
1912                 char **ret_switch_root_dir,        /* … */
1913                 char **ret_switch_root_init,       /* … */
1914                 const char **ret_error_message) {
1915
1916         int r;
1917
1918         assert(m);
1919         assert(saved_rlimit_nofile);
1920         assert(saved_rlimit_memlock);
1921         assert(ret_reexecute);
1922         assert(ret_retval);
1923         assert(ret_shutdown_verb);
1924         assert(ret_fds);
1925         assert(ret_switch_root_dir);
1926         assert(ret_switch_root_init);
1927         assert(ret_error_message);
1928
1929         for (;;) {
1930                 r = manager_loop(m);
1931                 if (r < 0) {
1932                         *ret_error_message = "Failed to run main loop";
1933                         return log_emergency_errno(r, "Failed to run main loop: %m");
1934                 }
1935
1936                 switch ((ManagerObjective) r) {
1937
1938                 case MANAGER_RELOAD: {
1939                         LogTarget saved_log_target;
1940                         int saved_log_level;
1941
1942                         log_info("Reloading.");
1943
1944                         /* First, save any overridden log level/target, then parse the configuration file,
1945                          * which might change the log level to new settings. */
1946
1947                         saved_log_level = m->log_level_overridden ? log_get_max_level() : -1;
1948                         saved_log_target = m->log_target_overridden ? log_get_target() : _LOG_TARGET_INVALID;
1949
1950                         (void) parse_configuration(saved_rlimit_nofile, saved_rlimit_memlock);
1951
1952                         set_manager_defaults(m);
1953                         set_manager_settings(m);
1954
1955                         update_cpu_affinity(false);
1956                         update_numa_policy(false);
1957
1958                         if (saved_log_level >= 0)
1959                                 manager_override_log_level(m, saved_log_level);
1960                         if (saved_log_target >= 0)
1961                                 manager_override_log_target(m, saved_log_target);
1962
1963                         r = manager_reload(m);
1964                         if (r < 0)
1965                                 /* Reloading failed before the point of no return.
1966                                  * Let's continue running as if nothing happened. */
1967                                 m->objective = MANAGER_OK;
1968
1969                         break;
1970                 }
1971
1972                 case MANAGER_REEXECUTE:
1973
1974                         r = prepare_reexecute(m, &arg_serialization, ret_fds, false);
1975                         if (r < 0) {
1976                                 *ret_error_message = "Failed to prepare for reexecution";
1977                                 return r;
1978                         }
1979
1980                         log_notice("Reexecuting.");
1981
1982                         *ret_reexecute = true;
1983                         *ret_retval = EXIT_SUCCESS;
1984                         *ret_shutdown_verb = NULL;
1985                         *ret_switch_root_dir = *ret_switch_root_init = NULL;
1986
1987                         return 0;
1988
1989                 case MANAGER_SWITCH_ROOT:
1990                         if (!m->switch_root_init) {
1991                                 r = prepare_reexecute(m, &arg_serialization, ret_fds, true);
1992                                 if (r < 0) {
1993                                         *ret_error_message = "Failed to prepare for reexecution";
1994                                         return r;
1995                                 }
1996                         } else
1997                                 *ret_fds = NULL;
1998
1999                         log_notice("Switching root.");
2000
2001                         *ret_reexecute = true;
2002                         *ret_retval = EXIT_SUCCESS;
2003                         *ret_shutdown_verb = NULL;
2004
2005                         /* Steal the switch root parameters */
2006                         *ret_switch_root_dir = TAKE_PTR(m->switch_root);
2007                         *ret_switch_root_init = TAKE_PTR(m->switch_root_init);
2008
2009                         return 0;
2010
2011                 case MANAGER_EXIT:
2012
2013                         if (MANAGER_IS_USER(m)) {
2014                                 log_debug("Exit.");
2015
2016                                 *ret_reexecute = false;
2017                                 *ret_retval = m->return_value;
2018                                 *ret_shutdown_verb = NULL;
2019                                 *ret_fds = NULL;
2020                                 *ret_switch_root_dir = *ret_switch_root_init = NULL;
2021
2022                                 return 0;
2023                         }
2024
2025                         _fallthrough_;
2026                 case MANAGER_REBOOT:
2027                 case MANAGER_POWEROFF:
2028                 case MANAGER_HALT:
2029                 case MANAGER_KEXEC: {
2030                         static const char * const table[_MANAGER_OBJECTIVE_MAX] = {
2031                                 [MANAGER_EXIT]     = "exit",
2032                                 [MANAGER_REBOOT]   = "reboot",
2033                                 [MANAGER_POWEROFF] = "poweroff",
2034                                 [MANAGER_HALT]     = "halt",
2035                                 [MANAGER_KEXEC]    = "kexec",
2036                         };
2037
2038                         log_notice("Shutting down.");
2039
2040                         *ret_reexecute = false;
2041                         *ret_retval = m->return_value;
2042                         assert_se(*ret_shutdown_verb = table[m->objective]);
2043                         *ret_fds = NULL;
2044                         *ret_switch_root_dir = *ret_switch_root_init = NULL;
2045
2046                         return 0;
2047                 }
2048
2049                 default:
2050                         assert_not_reached();
2051                 }
2052         }
2053 }
2054
2055 static void log_execution_mode(bool *ret_first_boot) {
2056         assert(ret_first_boot);
2057
2058         if (arg_system) {
2059                 int v;
2060
2061                 log_info("systemd " GIT_VERSION " running in %ssystem mode (%s)",
2062                          arg_action == ACTION_TEST ? "test " : "",
2063                          systemd_features);
2064
2065                 v = detect_virtualization();
2066                 if (v > 0)
2067                         log_info("Detected virtualization %s.", virtualization_to_string(v));
2068
2069                 log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
2070
2071                 if (in_initrd()) {
2072                         *ret_first_boot = false;
2073                         log_info("Running in initial RAM disk.");
2074                 } else {
2075                         int r;
2076                         _cleanup_free_ char *id_text = NULL;
2077
2078                         /* Let's check whether we are in first boot.  We use /etc/machine-id as flag file
2079                          * for this: If it is missing or contains the value "uninitialized", this is the
2080                          * first boot.  In any other case, it is not.  This allows container managers and
2081                          * installers to provision a couple of files already.  If the container manager
2082                          * wants to provision the machine ID itself it should pass $container_uuid to PID 1. */
2083
2084                         r = read_one_line_file("/etc/machine-id", &id_text);
2085                         if (r < 0 || streq(id_text, "uninitialized")) {
2086                                 if (r < 0 && r != -ENOENT)
2087                                         log_warning_errno(r, "Unexpected error while reading /etc/machine-id, ignoring: %m");
2088
2089                                 *ret_first_boot = true;
2090                                 log_info("Detected first boot.");
2091                         } else {
2092                                 *ret_first_boot = false;
2093                                 log_debug("Detected initialized system, this is not the first boot.");
2094                         }
2095                 }
2096         } else {
2097                 if (DEBUG_LOGGING) {
2098                         _cleanup_free_ char *t = NULL;
2099
2100                         t = uid_to_name(getuid());
2101                         log_debug("systemd " GIT_VERSION " running in %suser mode for user " UID_FMT "/%s. (%s)",
2102                                   arg_action == ACTION_TEST ? " test" : "",
2103                                   getuid(), strna(t), systemd_features);
2104                 }
2105
2106                 *ret_first_boot = false;
2107         }
2108 }
2109
2110 static int initialize_runtime(
2111                 bool skip_setup,
2112                 bool first_boot,
2113                 struct rlimit *saved_rlimit_nofile,
2114                 struct rlimit *saved_rlimit_memlock,
2115                 const char **ret_error_message) {
2116         int r;
2117
2118         assert(ret_error_message);
2119
2120         /* Sets up various runtime parameters. Many of these initializations are conditionalized:
2121          *
2122          * - Some only apply to --system instances
2123          * - Some only apply to --user instances
2124          * - Some only apply when we first start up, but not when we reexecute
2125          */
2126
2127         if (arg_action != ACTION_RUN)
2128                 return 0;
2129
2130         update_cpu_affinity(skip_setup);
2131         update_numa_policy(skip_setup);
2132
2133         if (arg_system) {
2134                 /* Make sure we leave a core dump without panicking the kernel. */
2135                 install_crash_handler();
2136
2137                 if (!skip_setup) {
2138                         r = mount_cgroup_controllers();
2139                         if (r < 0) {
2140                                 *ret_error_message = "Failed to mount cgroup hierarchies";
2141                                 return r;
2142                         }
2143
2144                         status_welcome();
2145                         (void) hostname_setup(true);
2146                         /* Force transient machine-id on first boot. */
2147                         machine_id_setup(NULL, first_boot, arg_machine_id, NULL);
2148                         (void) loopback_setup();
2149                         bump_unix_max_dgram_qlen();
2150                         bump_file_max_and_nr_open();
2151                         test_usr();
2152                         write_container_id();
2153                 }
2154
2155                 if (arg_watchdog_device) {
2156                         r = watchdog_set_device(arg_watchdog_device);
2157                         if (r < 0)
2158                                 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m", arg_watchdog_device);
2159                 }
2160         } else {
2161                 _cleanup_free_ char *p = NULL;
2162
2163                 /* Create the runtime directory and place the inaccessible device nodes there, if we run in
2164                  * user mode. In system mode mount_setup() already did that. */
2165
2166                 r = xdg_user_runtime_dir(&p, "/systemd");
2167                 if (r < 0) {
2168                         *ret_error_message = "$XDG_RUNTIME_DIR is not set";
2169                         return log_emergency_errno(r, "Failed to determine $XDG_RUNTIME_DIR path: %m");
2170                 }
2171
2172                 (void) mkdir_p_label(p, 0755);
2173                 (void) make_inaccessible_nodes(p, UID_INVALID, GID_INVALID);
2174         }
2175
2176         if (arg_timer_slack_nsec != NSEC_INFINITY)
2177                 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
2178                         log_warning_errno(errno, "Failed to adjust timer slack, ignoring: %m");
2179
2180         if (arg_system && !cap_test_all(arg_capability_bounding_set)) {
2181                 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set);
2182                 if (r < 0) {
2183                         *ret_error_message = "Failed to drop capability bounding set of usermode helpers";
2184                         return log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
2185                 }
2186
2187                 r = capability_bounding_set_drop(arg_capability_bounding_set, true);
2188                 if (r < 0) {
2189                         *ret_error_message = "Failed to drop capability bounding set";
2190                         return log_emergency_errno(r, "Failed to drop capability bounding set: %m");
2191                 }
2192         }
2193
2194         if (arg_system && arg_no_new_privs) {
2195                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
2196                         *ret_error_message = "Failed to disable new privileges";
2197                         return log_emergency_errno(errno, "Failed to disable new privileges: %m");
2198                 }
2199         }
2200
2201         if (arg_syscall_archs) {
2202                 r = enforce_syscall_archs(arg_syscall_archs);
2203                 if (r < 0) {
2204                         *ret_error_message = "Failed to set syscall architectures";
2205                         return r;
2206                 }
2207         }
2208
2209         if (!arg_system)
2210                 /* Become reaper of our children */
2211                 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
2212                         log_warning_errno(errno, "Failed to make us a subreaper: %m");
2213
2214         /* Bump up RLIMIT_NOFILE for systemd itself */
2215         (void) bump_rlimit_nofile(saved_rlimit_nofile);
2216         (void) bump_rlimit_memlock(saved_rlimit_memlock);
2217
2218         return 0;
2219 }
2220
2221 static int do_queue_default_job(
2222                 Manager *m,
2223                 const char **ret_error_message) {
2224
2225         _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
2226         const char *unit;
2227         Job *job;
2228         Unit *target;
2229         int r;
2230
2231         if (arg_default_unit)
2232                 unit = arg_default_unit;
2233         else if (in_initrd())
2234                 unit = SPECIAL_INITRD_TARGET;
2235         else
2236                 unit = SPECIAL_DEFAULT_TARGET;
2237
2238         log_debug("Activating default unit: %s", unit);
2239
2240         r = manager_load_startable_unit_or_warn(m, unit, NULL, &target);
2241         if (r < 0 && in_initrd() && !arg_default_unit) {
2242                 /* Fall back to default.target, which we used to always use by default. Only do this if no
2243                  * explicit configuration was given. */
2244
2245                 log_info("Falling back to " SPECIAL_DEFAULT_TARGET ".");
2246
2247                 r = manager_load_startable_unit_or_warn(m, SPECIAL_DEFAULT_TARGET, NULL, &target);
2248         }
2249         if (r < 0) {
2250                 log_info("Falling back to " SPECIAL_RESCUE_TARGET ".");
2251
2252                 r = manager_load_startable_unit_or_warn(m, SPECIAL_RESCUE_TARGET, NULL, &target);
2253                 if (r < 0) {
2254                         *ret_error_message = r == -ERFKILL ? SPECIAL_RESCUE_TARGET " masked"
2255                                                            : "Failed to load " SPECIAL_RESCUE_TARGET;
2256                         return r;
2257                 }
2258         }
2259
2260         assert(target->load_state == UNIT_LOADED);
2261
2262         r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, NULL, &error, &job);
2263         if (r == -EPERM) {
2264                 log_debug_errno(r, "Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
2265
2266                 sd_bus_error_free(&error);
2267
2268                 r = manager_add_job(m, JOB_START, target, JOB_REPLACE, NULL, &error, &job);
2269                 if (r < 0) {
2270                         *ret_error_message = "Failed to start default target";
2271                         return log_emergency_errno(r, "Failed to start default target: %s", bus_error_message(&error, r));
2272                 }
2273
2274         } else if (r < 0) {
2275                 *ret_error_message = "Failed to isolate default target";
2276                 return log_emergency_errno(r, "Failed to isolate default target: %s", bus_error_message(&error, r));
2277         } else
2278                 log_info("Queued %s job for default target %s.",
2279                          job_type_to_string(job->type),
2280                          unit_status_string(job->unit, NULL));
2281
2282         m->default_unit_job_id = job->id;
2283
2284         return 0;
2285 }
2286
2287 static void save_rlimits(struct rlimit *saved_rlimit_nofile,
2288                          struct rlimit *saved_rlimit_memlock) {
2289
2290         assert(saved_rlimit_nofile);
2291         assert(saved_rlimit_memlock);
2292
2293         if (getrlimit(RLIMIT_NOFILE, saved_rlimit_nofile) < 0)
2294                 log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
2295
2296         if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock) < 0)
2297                 log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
2298 }
2299
2300 static void fallback_rlimit_nofile(const struct rlimit *saved_rlimit_nofile) {
2301         struct rlimit *rl;
2302
2303         if (arg_default_rlimit[RLIMIT_NOFILE])
2304                 return;
2305
2306         /* Make sure forked processes get limits based on the original kernel setting */
2307
2308         rl = newdup(struct rlimit, saved_rlimit_nofile, 1);
2309         if (!rl) {
2310                 log_oom();
2311                 return;
2312         }
2313
2314         /* Bump the hard limit for system services to a substantially higher value. The default
2315          * hard limit current kernels set is pretty low (4K), mostly for historical
2316          * reasons. According to kernel developers, the fd handling in recent kernels has been
2317          * optimized substantially enough, so that we can bump the limit now, without paying too
2318          * high a price in memory or performance. Note however that we only bump the hard limit,
2319          * not the soft limit. That's because select() works the way it works, and chokes on fds
2320          * >= 1024. If we'd bump the soft limit globally, it might accidentally happen to
2321          * unexpecting programs that they get fds higher than what they can process using
2322          * select(). By only bumping the hard limit but leaving the low limit as it is we avoid
2323          * this pitfall:  programs that are written by folks aware of the select() problem in mind
2324          * (and thus use poll()/epoll instead of select(), the way everybody should) can
2325          * explicitly opt into high fds by bumping their soft limit beyond 1024, to the hard limit
2326          * we pass. */
2327         if (arg_system) {
2328                 int nr;
2329
2330                 /* Get the underlying absolute limit the kernel enforces */
2331                 nr = read_nr_open();
2332
2333                 rl->rlim_max = MIN((rlim_t) nr, MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE));
2334         }
2335
2336         /* If for some reason we were invoked with a soft limit above 1024 (which should never
2337          * happen!, but who knows what we get passed in from pam_limit when invoked as --user
2338          * instance), then lower what we pass on to not confuse our children */
2339         rl->rlim_cur = MIN(rl->rlim_cur, (rlim_t) FD_SETSIZE);
2340
2341         arg_default_rlimit[RLIMIT_NOFILE] = rl;
2342 }
2343
2344 static void fallback_rlimit_memlock(const struct rlimit *saved_rlimit_memlock) {
2345         struct rlimit *rl;
2346
2347         /* Pass the original value down to invoked processes */
2348
2349         if (arg_default_rlimit[RLIMIT_MEMLOCK])
2350                 return;
2351
2352         rl = newdup(struct rlimit, saved_rlimit_memlock, 1);
2353         if (!rl) {
2354                 log_oom();
2355                 return;
2356         }
2357
2358         arg_default_rlimit[RLIMIT_MEMLOCK] = rl;
2359 }
2360
2361 static void setenv_manager_environment(void) {
2362         char **p;
2363         int r;
2364
2365         STRV_FOREACH(p, arg_manager_environment) {
2366                 log_debug("Setting '%s' in our own environment.", *p);
2367
2368                 r = putenv_dup(*p, true);
2369                 if (r < 0)
2370                         log_warning_errno(errno, "Failed to setenv \"%s\", ignoring: %m", *p);
2371         }
2372 }
2373
2374 static void reset_arguments(void) {
2375         /* Frees/resets arg_* variables, with a few exceptions commented below. */
2376
2377         arg_default_unit = mfree(arg_default_unit);
2378
2379         /* arg_system — ignore */
2380
2381         arg_dump_core = true;
2382         arg_crash_chvt = -1;
2383         arg_crash_shell = false;
2384         arg_crash_reboot = false;
2385         arg_confirm_spawn = mfree(arg_confirm_spawn);
2386         arg_show_status = _SHOW_STATUS_INVALID;
2387         arg_status_unit_format = STATUS_UNIT_FORMAT_DEFAULT;
2388         arg_switched_root = false;
2389         arg_pager_flags = 0;
2390         arg_service_watchdogs = true;
2391         arg_default_std_output = EXEC_OUTPUT_JOURNAL;
2392         arg_default_std_error = EXEC_OUTPUT_INHERIT;
2393         arg_default_restart_usec = DEFAULT_RESTART_USEC;
2394         arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
2395         arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
2396         arg_default_timeout_abort_usec = DEFAULT_TIMEOUT_USEC;
2397         arg_default_timeout_abort_set = false;
2398         arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
2399         arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
2400         arg_runtime_watchdog = 0;
2401         arg_reboot_watchdog = 10 * USEC_PER_MINUTE;
2402         arg_kexec_watchdog = 0;
2403         arg_early_core_pattern = NULL;
2404         arg_watchdog_device = NULL;
2405
2406         arg_default_environment = strv_free(arg_default_environment);
2407         arg_manager_environment = strv_free(arg_manager_environment);
2408         rlimit_free_all(arg_default_rlimit);
2409
2410         arg_capability_bounding_set = CAP_ALL;
2411         arg_no_new_privs = false;
2412         arg_timer_slack_nsec = NSEC_INFINITY;
2413         arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
2414
2415         arg_syscall_archs = set_free(arg_syscall_archs);
2416
2417         /* arg_serialization — ignore */
2418
2419         arg_default_cpu_accounting = -1;
2420         arg_default_io_accounting = false;
2421         arg_default_ip_accounting = false;
2422         arg_default_blockio_accounting = false;
2423         arg_default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
2424         arg_default_tasks_accounting = true;
2425         arg_default_tasks_max = DEFAULT_TASKS_MAX;
2426         arg_machine_id = (sd_id128_t) {};
2427         arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
2428         arg_default_oom_policy = OOM_STOP;
2429
2430         cpu_set_reset(&arg_cpu_affinity);
2431         numa_policy_reset(&arg_numa_policy);
2432
2433         arg_random_seed = mfree(arg_random_seed);
2434         arg_random_seed_size = 0;
2435         arg_clock_usec = 0;
2436 }
2437
2438 static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
2439                                const struct rlimit *saved_rlimit_memlock) {
2440         int r;
2441
2442         assert(saved_rlimit_nofile);
2443         assert(saved_rlimit_memlock);
2444
2445         /* Assign configuration defaults */
2446         reset_arguments();
2447
2448         r = parse_config_file();
2449         if (r < 0)
2450                 log_warning_errno(r, "Failed to parse config file, ignoring: %m");
2451
2452         if (arg_system) {
2453                 r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
2454                 if (r < 0)
2455                         log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
2456         }
2457
2458         /* Initialize some default rlimits for services if they haven't been configured */
2459         fallback_rlimit_nofile(saved_rlimit_nofile);
2460         fallback_rlimit_memlock(saved_rlimit_memlock);
2461
2462         /* Note that this also parses bits from the kernel command line, including "debug". */
2463         log_parse_environment();
2464
2465         /* Initialize the show status setting if it hasn't been set explicitly yet */
2466         if (arg_show_status == _SHOW_STATUS_INVALID)
2467                 arg_show_status = SHOW_STATUS_YES;
2468
2469         /* Push variables into the manager environment block */
2470         setenv_manager_environment();
2471
2472         return 0;
2473 }
2474
2475 static int safety_checks(void) {
2476
2477         if (getpid_cached() == 1 &&
2478             arg_action != ACTION_RUN)
2479                 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2480                                        "Unsupported execution mode while PID 1.");
2481
2482         if (getpid_cached() == 1 &&
2483             !arg_system)
2484                 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2485                                        "Can't run --user mode as PID 1.");
2486
2487         if (arg_action == ACTION_RUN &&
2488             arg_system &&
2489             getpid_cached() != 1)
2490                 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2491                                        "Can't run system mode unless PID 1.");
2492
2493         if (arg_action == ACTION_TEST &&
2494             geteuid() == 0)
2495                 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2496                                        "Don't run test mode as root.");
2497
2498         if (!arg_system &&
2499             arg_action == ACTION_RUN &&
2500             sd_booted() <= 0)
2501                 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
2502                                        "Trying to run as user instance, but the system has not been booted with systemd.");
2503
2504         if (!arg_system &&
2505             arg_action == ACTION_RUN &&
2506             !getenv("XDG_RUNTIME_DIR"))
2507                 return log_error_errno(SYNTHETIC_ERRNO(EUNATCH),
2508                                        "Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
2509
2510         if (arg_system &&
2511             arg_action == ACTION_RUN &&
2512             running_in_chroot() > 0)
2513                 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
2514                                        "Cannot be run in a chroot() environment.");
2515
2516         return 0;
2517 }
2518
2519 static int initialize_security(
2520                 bool *loaded_policy,
2521                 dual_timestamp *security_start_timestamp,
2522                 dual_timestamp *security_finish_timestamp,
2523                 const char **ret_error_message) {
2524
2525         int r;
2526
2527         assert(loaded_policy);
2528         assert(security_start_timestamp);
2529         assert(security_finish_timestamp);
2530         assert(ret_error_message);
2531
2532         dual_timestamp_get(security_start_timestamp);
2533
2534         r = mac_selinux_setup(loaded_policy);
2535         if (r < 0) {
2536                 *ret_error_message = "Failed to load SELinux policy";
2537                 return r;
2538         }
2539
2540         r = mac_smack_setup(loaded_policy);
2541         if (r < 0) {
2542                 *ret_error_message = "Failed to load SMACK policy";
2543                 return r;
2544         }
2545
2546         r = mac_apparmor_setup();
2547         if (r < 0) {
2548                 *ret_error_message = "Failed to load AppArmor policy";
2549                 return r;
2550         }
2551
2552         r = ima_setup();
2553         if (r < 0) {
2554                 *ret_error_message = "Failed to load IMA policy";
2555                 return r;
2556         }
2557
2558         dual_timestamp_get(security_finish_timestamp);
2559         return 0;
2560 }
2561
2562 static int collect_fds(FDSet **ret_fds, const char **ret_error_message) {
2563         int r;
2564
2565         assert(ret_fds);
2566         assert(ret_error_message);
2567
2568         r = fdset_new_fill(ret_fds);
2569         if (r < 0) {
2570                 *ret_error_message = "Failed to allocate fd set";
2571                 return log_emergency_errno(r, "Failed to allocate fd set: %m");
2572         }
2573
2574         fdset_cloexec(*ret_fds, true);
2575
2576         if (arg_serialization)
2577                 assert_se(fdset_remove(*ret_fds, fileno(arg_serialization)) >= 0);
2578
2579         return 0;
2580 }
2581
2582 static void setup_console_terminal(bool skip_setup) {
2583
2584         if (!arg_system)
2585                 return;
2586
2587         /* Become a session leader if we aren't one yet. */
2588         (void) setsid();
2589
2590         /* If we are init, we connect stdin/stdout/stderr to /dev/null and make sure we don't have a
2591          * controlling tty. */
2592         (void) release_terminal();
2593
2594         /* Reset the console, but only if this is really init and we are freshly booted */
2595         if (getpid_cached() == 1 && !skip_setup)
2596                 (void) console_setup();
2597 }
2598
2599 static bool early_skip_setup_check(int argc, char *argv[]) {
2600         bool found_deserialize = false;
2601
2602         /* Determine if this is a reexecution or normal bootup. We do the full command line parsing much
2603          * later, so let's just have a quick peek here. Note that if we have switched root, do all the
2604          * special setup things anyway, even if in that case we also do deserialization. */
2605
2606         for (int i = 1; i < argc; i++)
2607                 if (streq(argv[i], "--switched-root"))
2608                         return false; /* If we switched root, don't skip the setup. */
2609                 else if (streq(argv[i], "--deserialize"))
2610                         found_deserialize = true;
2611
2612         return found_deserialize; /* When we are deserializing, then we are reexecuting, hence avoid the extensive setup */
2613 }
2614
2615 static int save_env(void) {
2616         char **l;
2617
2618         l = strv_copy(environ);
2619         if (!l)
2620                 return -ENOMEM;
2621
2622         strv_free_and_replace(saved_env, l);
2623         return 0;
2624 }
2625
2626 int main(int argc, char *argv[]) {
2627
2628         dual_timestamp initrd_timestamp = DUAL_TIMESTAMP_NULL, userspace_timestamp = DUAL_TIMESTAMP_NULL, kernel_timestamp = DUAL_TIMESTAMP_NULL,
2629                 security_start_timestamp = DUAL_TIMESTAMP_NULL, security_finish_timestamp = DUAL_TIMESTAMP_NULL;
2630         struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0),
2631                 saved_rlimit_memlock = RLIMIT_MAKE_CONST(RLIM_INFINITY); /* The original rlimits we passed
2632                                                                           * in. Note we use different values
2633                                                                           * for the two that indicate whether
2634                                                                           * these fields are initialized! */
2635         bool skip_setup, loaded_policy = false, queue_default_job = false, first_boot = false, reexecute = false;
2636         char *switch_root_dir = NULL, *switch_root_init = NULL;
2637         usec_t before_startup, after_startup;
2638         static char systemd[] = "systemd";
2639         const char *shutdown_verb = NULL, *error_message = NULL;
2640         int r, retval = EXIT_FAILURE;
2641         Manager *m = NULL;
2642         FDSet *fds = NULL;
2643
2644         /* SysV compatibility: redirect init → telinit */
2645         redirect_telinit(argc, argv);
2646
2647         /* Take timestamps early on */
2648         dual_timestamp_from_monotonic(&kernel_timestamp, 0);
2649         dual_timestamp_get(&userspace_timestamp);
2650
2651         /* Figure out whether we need to do initialize the system, or if we already did that because we are
2652          * reexecuting. */
2653         skip_setup = early_skip_setup_check(argc, argv);
2654
2655         /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent
2656          * reexecution we are then called 'systemd'. That is confusing, hence let's call us systemd
2657          * right-away. */
2658         program_invocation_short_name = systemd;
2659         (void) prctl(PR_SET_NAME, systemd);
2660
2661         /* Save the original command line */
2662         save_argc_argv(argc, argv);
2663
2664         /* Save the original environment as we might need to restore it if we're requested to execute another
2665          * system manager later. */
2666         r = save_env();
2667         if (r < 0) {
2668                 error_message = "Failed to copy environment block";
2669                 goto finish;
2670         }
2671
2672         /* Make sure that if the user says "syslog" we actually log to the journal. */
2673         log_set_upgrade_syslog_to_journal(true);
2674
2675         if (getpid_cached() == 1) {
2676                 /* When we run as PID 1 force system mode */
2677                 arg_system = true;
2678
2679                 /* Disable the umask logic */
2680                 umask(0);
2681
2682                 /* Make sure that at least initially we do not ever log to journald/syslogd, because it might
2683                  * not be activated yet (even though the log socket for it exists). */
2684                 log_set_prohibit_ipc(true);
2685
2686                 /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This
2687                  * is important so that we never end up logging to any foreign stderr, for example if we have
2688                  * to log in a child process right before execve()'ing the actual binary, at a point in time
2689                  * where socket activation stderr/stdout area already set up. */
2690                 log_set_always_reopen_console(true);
2691
2692                 if (detect_container() <= 0) {
2693
2694                         /* Running outside of a container as PID 1 */
2695                         log_set_target(LOG_TARGET_KMSG);
2696                         log_open();
2697
2698                         if (in_initrd())
2699                                 initrd_timestamp = userspace_timestamp;
2700
2701                         if (!skip_setup) {
2702                                 r = mount_setup_early();
2703                                 if (r < 0) {
2704                                         error_message = "Failed to mount early API filesystems";
2705                                         goto finish;
2706                                 }
2707
2708                                 /* Let's open the log backend a second time, in case the first time didn't
2709                                  * work. Quite possibly we have mounted /dev just now, so /dev/kmsg became
2710                                  * available, and it previously wasn't. */
2711                                 log_open();
2712
2713                                 disable_printk_ratelimit();
2714
2715                                 r = initialize_security(
2716                                                 &loaded_policy,
2717                                                 &security_start_timestamp,
2718                                                 &security_finish_timestamp,
2719                                                 &error_message);
2720                                 if (r < 0)
2721                                         goto finish;
2722                         }
2723
2724                         if (mac_selinux_init() < 0) {
2725                                 error_message = "Failed to initialize SELinux support";
2726                                 goto finish;
2727                         }
2728
2729                         if (!skip_setup)
2730                                 initialize_clock();
2731
2732                         /* Set the default for later on, but don't actually open the logs like this for
2733                          * now. Note that if we are transitioning from the initrd there might still be
2734                          * journal fd open, and we shouldn't attempt opening that before we parsed
2735                          * /proc/cmdline which might redirect output elsewhere. */
2736                         log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
2737
2738                 } else {
2739                         /* Running inside a container, as PID 1 */
2740                         log_set_target(LOG_TARGET_CONSOLE);
2741                         log_open();
2742
2743                         /* For later on, see above... */
2744                         log_set_target(LOG_TARGET_JOURNAL);
2745
2746                         /* clear the kernel timestamp, because we are in a container */
2747                         kernel_timestamp = DUAL_TIMESTAMP_NULL;
2748                 }
2749
2750                 initialize_coredump(skip_setup);
2751
2752                 r = fixup_environment();
2753                 if (r < 0) {
2754                         log_emergency_errno(r, "Failed to fix up PID 1 environment: %m");
2755                         error_message = "Failed to fix up PID1 environment";
2756                         goto finish;
2757                 }
2758
2759                 /* Try to figure out if we can use colors with the console. No need to do that for user
2760                  * instances since they never log into the console. */
2761                 log_show_color(colors_enabled());
2762
2763                 r = make_null_stdio();
2764                 if (r < 0)
2765                         log_warning_errno(r, "Failed to redirect standard streams to /dev/null, ignoring: %m");
2766
2767                 /* Load the kernel modules early. */
2768                 if (!skip_setup)
2769                         kmod_setup();
2770
2771                 /* Mount /proc, /sys and friends, so that /proc/cmdline and /proc/$PID/fd is available. */
2772                 r = mount_setup(loaded_policy, skip_setup);
2773                 if (r < 0) {
2774                         error_message = "Failed to mount API filesystems";
2775                         goto finish;
2776                 }
2777
2778                 /* The efivarfs is now mounted, let's read the random seed off it */
2779                 (void) efi_take_random_seed();
2780
2781                 /* Cache command-line options passed from EFI variables */
2782                 if (!skip_setup)
2783                         (void) cache_efi_options_variable();
2784         } else {
2785                 /* Running as user instance */
2786                 arg_system = false;
2787                 log_set_target(LOG_TARGET_AUTO);
2788                 log_open();
2789
2790                 /* clear the kernel timestamp, because we are not PID 1 */
2791                 kernel_timestamp = DUAL_TIMESTAMP_NULL;
2792
2793                 if (mac_selinux_init() < 0) {
2794                         error_message = "Failed to initialize SELinux support";
2795                         goto finish;
2796                 }
2797         }
2798
2799         /* Save the original RLIMIT_NOFILE/RLIMIT_MEMLOCK so that we can reset it later when
2800          * transitioning from the initrd to the main systemd or suchlike. */
2801         save_rlimits(&saved_rlimit_nofile, &saved_rlimit_memlock);
2802
2803         /* Reset all signal handlers. */
2804         (void) reset_all_signal_handlers();
2805         (void) ignore_signals(SIGNALS_IGNORE);
2806
2807         (void) parse_configuration(&saved_rlimit_nofile, &saved_rlimit_memlock);
2808
2809         r = parse_argv(argc, argv);
2810         if (r < 0) {
2811                 error_message = "Failed to parse commandline arguments";
2812                 goto finish;
2813         }
2814
2815         r = safety_checks();
2816         if (r < 0)
2817                 goto finish;
2818
2819         if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS, ACTION_DUMP_BUS_PROPERTIES, ACTION_BUS_INTROSPECT))
2820                 (void) pager_open(arg_pager_flags);
2821
2822         if (arg_action != ACTION_RUN)
2823                 skip_setup = true;
2824
2825         if (arg_action == ACTION_HELP) {
2826                 retval = help() < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2827                 goto finish;
2828         } else if (arg_action == ACTION_VERSION) {
2829                 retval = version();
2830                 goto finish;
2831         } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
2832                 unit_dump_config_items(stdout);
2833                 retval = EXIT_SUCCESS;
2834                 goto finish;
2835         } else if (arg_action == ACTION_DUMP_BUS_PROPERTIES) {
2836                 dump_bus_properties(stdout);
2837                 retval = EXIT_SUCCESS;
2838                 goto finish;
2839         } else if (arg_action == ACTION_BUS_INTROSPECT) {
2840                 r = bus_manager_introspect_implementations(stdout, arg_bus_introspect);
2841                 retval = r >= 0 ? EXIT_SUCCESS : EXIT_FAILURE;
2842                 goto finish;
2843         }
2844
2845         assert_se(IN_SET(arg_action, ACTION_RUN, ACTION_TEST));
2846
2847         /* Move out of the way, so that we won't block unmounts */
2848         assert_se(chdir("/") == 0);
2849
2850         if (arg_action == ACTION_RUN) {
2851                 if (!skip_setup) {
2852                         /* Apply the systemd.clock_usec= kernel command line switch */
2853                         apply_clock_update();
2854
2855                         /* Apply random seed from kernel command line */
2856                         cmdline_take_random_seed();
2857                 }
2858
2859                 /* A core pattern might have been specified via the cmdline.  */
2860                 initialize_core_pattern(skip_setup);
2861
2862                 /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
2863                 log_close();
2864
2865                 /* Remember open file descriptors for later deserialization */
2866                 r = collect_fds(&fds, &error_message);
2867                 if (r < 0)
2868                         goto finish;
2869
2870                 /* Give up any control of the console, but make sure its initialized. */
2871                 setup_console_terminal(skip_setup);
2872
2873                 /* Open the logging devices, if possible and necessary */
2874                 log_open();
2875         }
2876
2877         log_execution_mode(&first_boot);
2878
2879         r = initialize_runtime(skip_setup,
2880                                first_boot,
2881                                &saved_rlimit_nofile,
2882                                &saved_rlimit_memlock,
2883                                &error_message);
2884         if (r < 0)
2885                 goto finish;
2886
2887         r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
2888                         arg_action == ACTION_TEST ? MANAGER_TEST_FULL : 0,
2889                         &m);
2890         if (r < 0) {
2891                 log_emergency_errno(r, "Failed to allocate manager object: %m");
2892                 error_message = "Failed to allocate manager object";
2893                 goto finish;
2894         }
2895
2896         m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;
2897         m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;
2898         m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;
2899         m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_START)] = security_start_timestamp;
2900         m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_FINISH)] = security_finish_timestamp;
2901
2902         set_manager_defaults(m);
2903         set_manager_settings(m);
2904         manager_set_first_boot(m, first_boot);
2905
2906         /* Remember whether we should queue the default job */
2907         queue_default_job = !arg_serialization || arg_switched_root;
2908
2909         before_startup = now(CLOCK_MONOTONIC);
2910
2911         r = manager_startup(m, arg_serialization, fds, /* root= */ NULL);
2912         if (r < 0) {
2913                 error_message = "Failed to start up manager";
2914                 goto finish;
2915         }
2916
2917         /* This will close all file descriptors that were opened, but not claimed by any unit. */
2918         fds = fdset_free(fds);
2919         arg_serialization = safe_fclose(arg_serialization);
2920
2921         if (queue_default_job) {
2922                 r = do_queue_default_job(m, &error_message);
2923                 if (r < 0)
2924                         goto finish;
2925         }
2926
2927         after_startup = now(CLOCK_MONOTONIC);
2928
2929         log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
2930                  "Loaded units and determined initial transaction in %s.",
2931                  FORMAT_TIMESPAN(after_startup - before_startup, 100 * USEC_PER_MSEC));
2932
2933         if (arg_action == ACTION_TEST) {
2934                 manager_test_summary(m);
2935                 retval = EXIT_SUCCESS;
2936                 goto finish;
2937         }
2938
2939         (void) invoke_main_loop(m,
2940                                 &saved_rlimit_nofile,
2941                                 &saved_rlimit_memlock,
2942                                 &reexecute,
2943                                 &retval,
2944                                 &shutdown_verb,
2945                                 &fds,
2946                                 &switch_root_dir,
2947                                 &switch_root_init,
2948                                 &error_message);
2949
2950 finish:
2951         pager_close();
2952
2953         if (m) {
2954                 arg_reboot_watchdog = manager_get_watchdog(m, WATCHDOG_REBOOT);
2955                 arg_kexec_watchdog = manager_get_watchdog(m, WATCHDOG_KEXEC);
2956                 m = manager_free(m);
2957         }
2958
2959         mac_selinux_finish();
2960
2961         if (reexecute)
2962                 do_reexecute(argc, argv,
2963                              &saved_rlimit_nofile,
2964                              &saved_rlimit_memlock,
2965                              fds,
2966                              switch_root_dir,
2967                              switch_root_init,
2968                              &error_message); /* This only returns if reexecution failed */
2969
2970         arg_serialization = safe_fclose(arg_serialization);
2971         fds = fdset_free(fds);
2972
2973         saved_env = strv_free(saved_env);
2974
2975 #if HAVE_VALGRIND_VALGRIND_H
2976         /* If we are PID 1 and running under valgrind, then let's exit
2977          * here explicitly. valgrind will only generate nice output on
2978          * exit(), not on exec(), hence let's do the former not the
2979          * latter here. */
2980         if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
2981                 /* Cleanup watchdog_device strings for valgrind. We need them
2982                  * in become_shutdown() so normally we cannot free them yet. */
2983                 watchdog_free_device();
2984                 arg_watchdog_device = mfree(arg_watchdog_device);
2985                 reset_arguments();
2986                 return retval;
2987         }
2988 #endif
2989
2990 #if HAS_FEATURE_ADDRESS_SANITIZER
2991         __lsan_do_leak_check();
2992 #endif
2993
2994         if (shutdown_verb) {
2995                 r = become_shutdown(shutdown_verb, retval);
2996                 log_error_errno(r, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
2997                 error_message = "Failed to execute shutdown binary";
2998         }
2999
3000         watchdog_free_device();
3001         arg_watchdog_device = mfree(arg_watchdog_device);
3002
3003         if (getpid_cached() == 1) {
3004                 if (error_message)
3005                         manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
3006                                               ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL,
3007                                               "%s.", error_message);
3008                 freeze_or_exit_or_reboot();
3009         }
3010
3011         reset_arguments();
3012         return retval;
3013 }