src/core/main.c

   1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
   2
   3 #include <errno.h>
   4 #include <fcntl.h>
   5 #include <getopt.h>
   6 #include <sys/mount.h>
   7 #include <sys/prctl.h>
   8 #include <sys/reboot.h>
   9 #include <unistd.h>
  10 #if HAVE_SECCOMP
  11 #include <seccomp.h>
  12 #endif
  13 #if HAVE_VALGRIND_VALGRIND_H
  14 #include <valgrind/valgrind.h>
  15 #endif
  16
  17 #include "sd-bus.h"
  18 #include "sd-daemon.h"
  19 #include "sd-messages.h"
  20
  21 #include "alloc-util.h"
  22 #include "apparmor-setup.h"
  23 #include "architecture.h"
  24 #include "build.h"
  25 #include "bus-error.h"
  26 #include "bus-util.h"
  27 #include "capability-util.h"
  28 #include "cgroup-util.h"
  29 #include "clock-util.h"
  30 #include "conf-parser.h"
  31 #include "cpu-set-util.h"
  32 #include "dbus-manager.h"
  33 #include "dbus.h"
  34 #include "def.h"
  35 #include "dev-setup.h"
  36 #include "efi-random.h"
  37 #include "efivars.h"
  38 #include "emergency-action.h"
  39 #include "env-util.h"
  40 #include "exit-status.h"
  41 #include "fd-util.h"
  42 #include "fdset.h"
  43 #include "fileio.h"
  44 #include "format-util.h"
  45 #include "fs-util.h"
  46 #include "hexdecoct.h"
  47 #include "hostname-setup.h"
  48 #include "ima-setup.h"
  49 #include "killall.h"
  50 #include "kmod-setup.h"
  51 #include "limits-util.h"
  52 #include "load-fragment.h"
  53 #include "log.h"
  54 #include "loopback-setup.h"
  55 #include "machine-id-setup.h"
  56 #include "manager.h"
  57 #include "mkdir.h"
  58 #include "mount-setup.h"
  59 #include "os-util.h"
  60 #include "pager.h"
  61 #include "parse-argument.h"
  62 #include "parse-util.h"
  63 #include "path-util.h"
  64 #include "pretty-print.h"
  65 #include "proc-cmdline.h"
  66 #include "process-util.h"
  67 #include "random-util.h"
  68 #include "raw-clone.h"
  69 #include "rlimit-util.h"
  70 #if HAVE_SECCOMP
  71 #include "seccomp-util.h"
  72 #endif
  73 #include "selinux-setup.h"
  74 #include "selinux-util.h"
  75 #include "signal-util.h"
  76 #include "smack-setup.h"
  77 #include "special.h"
  78 #include "stat-util.h"
  79 #include "stdio-util.h"
  80 #include "strv.h"
  81 #include "switch-root.h"
  82 #include "sysctl-util.h"
  83 #include "terminal-util.h"
  84 #include "umask-util.h"
  85 #include "user-util.h"
  86 #include "util.h"
  87 #include "virt.h"
  88 #include "watchdog.h"
  89
  90 #if HAS_FEATURE_ADDRESS_SANITIZER
  91 #include <sanitizer/lsan_interface.h>
  92 #endif
  93
  94 #define DEFAULT_TASKS_MAX ((TasksMax) { 15U, 100U }) /* 15% */
  95
  96 static enum {
  97         ACTION_RUN,
  98         ACTION_HELP,
  99         ACTION_VERSION,
 100         ACTION_TEST,
 101         ACTION_DUMP_CONFIGURATION_ITEMS,
 102         ACTION_DUMP_BUS_PROPERTIES,
 103         ACTION_BUS_INTROSPECT,
 104 } arg_action = ACTION_RUN;
 105
 106 static const char *arg_bus_introspect = NULL;
 107
 108 /* Those variables are initialized to 0 automatically, so we avoid uninitialized memory access.  Real
 109  * defaults are assigned in reset_arguments() below. */
 110 static char *arg_default_unit;
 111 static bool arg_system;
 112 static bool arg_dump_core;
 113 static int arg_crash_chvt;
 114 static bool arg_crash_shell;
 115 static bool arg_crash_reboot;
 116 static char *arg_confirm_spawn;
 117 static ShowStatus arg_show_status;
 118 static StatusUnitFormat arg_status_unit_format;
 119 static bool arg_switched_root;
 120 static PagerFlags arg_pager_flags;
 121 static bool arg_service_watchdogs;
 122 static ExecOutput arg_default_std_output;
 123 static ExecOutput arg_default_std_error;
 124 static usec_t arg_default_restart_usec;
 125 static usec_t arg_default_timeout_start_usec;
 126 static usec_t arg_default_timeout_stop_usec;
 127 static usec_t arg_default_timeout_abort_usec;
 128 static bool arg_default_timeout_abort_set;
 129 static usec_t arg_default_start_limit_interval;
 130 static unsigned arg_default_start_limit_burst;
 131 static usec_t arg_runtime_watchdog;
 132 static usec_t arg_reboot_watchdog;
 133 static usec_t arg_kexec_watchdog;
 134 static char *arg_early_core_pattern;
 135 static char *arg_watchdog_device;
 136 static char **arg_default_environment;
 137 static struct rlimit *arg_default_rlimit[_RLIMIT_MAX];
 138 static uint64_t arg_capability_bounding_set;
 139 static bool arg_no_new_privs;
 140 static nsec_t arg_timer_slack_nsec;
 141 static usec_t arg_default_timer_accuracy_usec;
 142 static Set* arg_syscall_archs;
 143 static FILE* arg_serialization;
 144 static int arg_default_cpu_accounting;
 145 static bool arg_default_io_accounting;
 146 static bool arg_default_ip_accounting;
 147 static bool arg_default_blockio_accounting;
 148 static bool arg_default_memory_accounting;
 149 static bool arg_default_tasks_accounting;
 150 static TasksMax arg_default_tasks_max;
 151 static sd_id128_t arg_machine_id;
 152 static EmergencyAction arg_cad_burst_action;
 153 static OOMPolicy arg_default_oom_policy;
 154 static CPUSet arg_cpu_affinity;
 155 static NUMAPolicy arg_numa_policy;
 156 static usec_t arg_clock_usec;
 157 static void *arg_random_seed;
 158 static size_t arg_random_seed_size;
 159
 160 /* A copy of the original environment block */
 161 static char **saved_env = NULL;
 162
 163 static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
 164                                const struct rlimit *saved_rlimit_memlock);
 165
 166 _noreturn_ static void freeze_or_exit_or_reboot(void) {
 167
 168         /* If we are running in a container, let's prefer exiting, after all we can propagate an exit code to
 169          * the container manager, and thus inform it that something went wrong. */
 170         if (detect_container() > 0) {
 171                 log_emergency("Exiting PID 1...");
 172                 _exit(EXIT_EXCEPTION);
 173         }
 174
 175         if (arg_crash_reboot) {
 176                 log_notice("Rebooting in 10s...");
 177                 (void) sleep(10);
 178
 179                 log_notice("Rebooting now...");
 180                 (void) reboot(RB_AUTOBOOT);
 181                 log_emergency_errno(errno, "Failed to reboot: %m");
 182         }
 183
 184         log_emergency("Freezing execution.");
 185         freeze();
 186 }
 187
 188 _noreturn_ static void crash(int sig) {
 189         struct sigaction sa;
 190         pid_t pid;
 191
 192         if (getpid_cached() != 1)
 193                 /* Pass this on immediately, if this is not PID 1 */
 194                 (void) raise(sig);
 195         else if (!arg_dump_core)
 196                 log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig));
 197         else {
 198                 sa = (struct sigaction) {
 199                         .sa_handler = nop_signal_handler,
 200                         .sa_flags = SA_NOCLDSTOP|SA_RESTART,
 201                 };
 202
 203                 /* We want to wait for the core process, hence let's enable SIGCHLD */
 204                 (void) sigaction(SIGCHLD, &sa, NULL);
 205
 206                 pid = raw_clone(SIGCHLD);
 207                 if (pid < 0)
 208                         log_emergency_errno(errno, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
 209                 else if (pid == 0) {
 210                         /* Enable default signal handler for core dump */
 211
 212                         sa = (struct sigaction) {
 213                                 .sa_handler = SIG_DFL,
 214                         };
 215                         (void) sigaction(sig, &sa, NULL);
 216
 217                         /* Don't limit the coredump size */
 218                         (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY));
 219
 220                         /* Just to be sure... */
 221                         (void) chdir("/");
 222
 223                         /* Raise the signal again */
 224                         pid = raw_getpid();
 225                         (void) kill(pid, sig); /* raise() would kill the parent */
 226
 227                         assert_not_reached("We shouldn't be here...");
 228                         _exit(EXIT_EXCEPTION);
 229                 } else {
 230                         siginfo_t status;
 231                         int r;
 232
 233                         /* Order things nicely. */
 234                         r = wait_for_terminate(pid, &status);
 235                         if (r < 0)
 236                                 log_emergency_errno(r, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig));
 237                         else if (status.si_code != CLD_DUMPED) {
 238                                 const char *s = status.si_code == CLD_EXITED
 239                                         ? exit_status_to_string(status.si_status, EXIT_STATUS_LIBC)
 240                                         : signal_to_string(status.si_status);
 241
 242                                 log_emergency("Caught <%s>, core dump failed (child "PID_FMT", code=%s, status=%i/%s).",
 243                                               signal_to_string(sig),
 244                                               pid,
 245                                               sigchld_code_to_string(status.si_code),
 246                                               status.si_status, strna(s));
 247                         } else
 248                                 log_emergency("Caught <%s>, dumped core as pid "PID_FMT".",
 249                                               signal_to_string(sig), pid);
 250                 }
 251         }
 252
 253         if (arg_crash_chvt >= 0)
 254                 (void) chvt(arg_crash_chvt);
 255
 256         sa = (struct sigaction) {
 257                 .sa_handler = SIG_IGN,
 258                 .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
 259         };
 260
 261         /* Let the kernel reap children for us */
 262         (void) sigaction(SIGCHLD, &sa, NULL);
 263
 264         if (arg_crash_shell) {
 265                 log_notice("Executing crash shell in 10s...");
 266                 (void) sleep(10);
 267
 268                 pid = raw_clone(SIGCHLD);
 269                 if (pid < 0)
 270                         log_emergency_errno(errno, "Failed to fork off crash shell: %m");
 271                 else if (pid == 0) {
 272                         (void) setsid();
 273                         (void) make_console_stdio();
 274                         (void) rlimit_nofile_safe();
 275                         (void) execle("/bin/sh", "/bin/sh", NULL, environ);
 276
 277                         log_emergency_errno(errno, "execle() failed: %m");
 278                         _exit(EXIT_EXCEPTION);
 279                 } else {
 280                         log_info("Spawned crash shell as PID "PID_FMT".", pid);
 281                         (void) wait_for_terminate(pid, NULL);
 282                 }
 283         }
 284
 285         freeze_or_exit_or_reboot();
 286 }
 287
 288 static void install_crash_handler(void) {
 289         static const struct sigaction sa = {
 290                 .sa_handler = crash,
 291                 .sa_flags = SA_NODEFER, /* So that we can raise the signal again from the signal handler */
 292         };
 293         int r;
 294
 295         /* We ignore the return value here, since, we don't mind if we
 296          * cannot set up a crash handler */
 297         r = sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
 298         if (r < 0)
 299                 log_debug_errno(r, "I had trouble setting up the crash handler, ignoring: %m");
 300 }
 301
 302 static int console_setup(void) {
 303         _cleanup_close_ int tty_fd = -1;
 304         int r;
 305
 306         tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
 307         if (tty_fd < 0)
 308                 return log_error_errno(tty_fd, "Failed to open /dev/console: %m");
 309
 310         /* We don't want to force text mode.  plymouth may be showing
 311          * pictures already from initrd. */
 312         r = reset_terminal_fd(tty_fd, false);
 313         if (r < 0)
 314                 return log_error_errno(r, "Failed to reset /dev/console: %m");
 315
 316         return 0;
 317 }
 318
 319 static int set_machine_id(const char *m) {
 320         sd_id128_t t;
 321         assert(m);
 322
 323         if (sd_id128_from_string(m, &t) < 0)
 324                 return -EINVAL;
 325
 326         if (sd_id128_is_null(t))
 327                 return -EINVAL;
 328
 329         arg_machine_id = t;
 330         return 0;
 331 }
 332
 333 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
 334         int r;
 335
 336         assert(key);
 337
 338         if (STR_IN_SET(key, "systemd.unit", "rd.systemd.unit")) {
 339
 340                 if (proc_cmdline_value_missing(key, value))
 341                         return 0;
 342
 343                 if (!unit_name_is_valid(value, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
 344                         log_warning("Unit name specified on %s= is not valid, ignoring: %s", key, value);
 345                 else if (in_initrd() == !!startswith(key, "rd."))
 346                         return free_and_strdup_warn(&arg_default_unit, value);
 347
 348         } else if (proc_cmdline_key_streq(key, "systemd.dump_core")) {
 349
 350                 r = value ? parse_boolean(value) : true;
 351                 if (r < 0)
 352                         log_warning_errno(r, "Failed to parse dump core switch %s, ignoring: %m", value);
 353                 else
 354                         arg_dump_core = r;
 355
 356         } else if (proc_cmdline_key_streq(key, "systemd.early_core_pattern")) {
 357
 358                 if (proc_cmdline_value_missing(key, value))
 359                         return 0;
 360
 361                 if (path_is_absolute(value))
 362                         (void) parse_path_argument(value, false, &arg_early_core_pattern);
 363                 else
 364                         log_warning("Specified core pattern '%s' is not an absolute path, ignoring.", value);
 365
 366         } else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) {
 367
 368                 if (!value)
 369                         arg_crash_chvt = 0; /* turn on */
 370                 else {
 371                         r = parse_crash_chvt(value, &arg_crash_chvt);
 372                         if (r < 0)
 373                                 log_warning_errno(r, "Failed to parse crash chvt switch %s, ignoring: %m", value);
 374                 }
 375
 376         } else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) {
 377
 378                 r = value ? parse_boolean(value) : true;
 379                 if (r < 0)
 380                         log_warning_errno(r, "Failed to parse crash shell switch %s, ignoring: %m", value);
 381                 else
 382                         arg_crash_shell = r;
 383
 384         } else if (proc_cmdline_key_streq(key, "systemd.crash_reboot")) {
 385
 386                 r = value ? parse_boolean(value) : true;
 387                 if (r < 0)
 388                         log_warning_errno(r, "Failed to parse crash reboot switch %s, ignoring: %m", value);
 389                 else
 390                         arg_crash_reboot = r;
 391
 392         } else if (proc_cmdline_key_streq(key, "systemd.confirm_spawn")) {
 393                 char *s;
 394
 395                 r = parse_confirm_spawn(value, &s);
 396                 if (r < 0)
 397                         log_warning_errno(r, "Failed to parse confirm_spawn switch %s, ignoring: %m", value);
 398                 else
 399                         free_and_replace(arg_confirm_spawn, s);
 400
 401         } else if (proc_cmdline_key_streq(key, "systemd.service_watchdogs")) {
 402
 403                 r = value ? parse_boolean(value) : true;
 404                 if (r < 0)
 405                         log_warning_errno(r, "Failed to parse service watchdog switch %s, ignoring: %m", value);
 406                 else
 407                         arg_service_watchdogs = r;
 408
 409         } else if (proc_cmdline_key_streq(key, "systemd.show_status")) {
 410
 411                 if (value) {
 412                         r = parse_show_status(value, &arg_show_status);
 413                         if (r < 0)
 414                                 log_warning_errno(r, "Failed to parse show status switch %s, ignoring: %m", value);
 415                 } else
 416                         arg_show_status = SHOW_STATUS_YES;
 417
 418         } else if (proc_cmdline_key_streq(key, "systemd.status_unit_format")) {
 419
 420                 if (proc_cmdline_value_missing(key, value))
 421                         return 0;
 422
 423                 r = status_unit_format_from_string(value);
 424                 if (r < 0)
 425                         log_warning_errno(r, "Failed to parse %s=%s, ignoring: %m", key, value);
 426                 else
 427                         arg_status_unit_format = r;
 428
 429         } else if (proc_cmdline_key_streq(key, "systemd.default_standard_output")) {
 430
 431                 if (proc_cmdline_value_missing(key, value))
 432                         return 0;
 433
 434                 r = exec_output_from_string(value);
 435                 if (r < 0)
 436                         log_warning_errno(r, "Failed to parse default standard output switch %s, ignoring: %m", value);
 437                 else
 438                         arg_default_std_output = r;
 439
 440         } else if (proc_cmdline_key_streq(key, "systemd.default_standard_error")) {
 441
 442                 if (proc_cmdline_value_missing(key, value))
 443                         return 0;
 444
 445                 r = exec_output_from_string(value);
 446                 if (r < 0)
 447                         log_warning_errno(r, "Failed to parse default standard error switch %s, ignoring: %m", value);
 448                 else
 449                         arg_default_std_error = r;
 450
 451         } else if (streq(key, "systemd.setenv")) {
 452
 453                 if (proc_cmdline_value_missing(key, value))
 454                         return 0;
 455
 456                 if (!env_assignment_is_valid(value))
 457                         log_warning("Environment variable assignment '%s' is not valid. Ignoring.", value);
 458                 else {
 459                         r = strv_env_replace_strdup(&arg_default_environment, value);
 460                         if (r < 0)
 461                                 return log_oom();
 462                 }
 463
 464         } else if (proc_cmdline_key_streq(key, "systemd.machine_id")) {
 465
 466                 if (proc_cmdline_value_missing(key, value))
 467                         return 0;
 468
 469                 r = set_machine_id(value);
 470                 if (r < 0)
 471                         log_warning_errno(r, "MachineID '%s' is not valid, ignoring: %m", value);
 472
 473         } else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) {
 474
 475                 if (proc_cmdline_value_missing(key, value))
 476                         return 0;
 477
 478                 r = parse_sec(value, &arg_default_timeout_start_usec);
 479                 if (r < 0)
 480                         log_warning_errno(r, "Failed to parse default start timeout '%s', ignoring: %m", value);
 481
 482                 if (arg_default_timeout_start_usec <= 0)
 483                         arg_default_timeout_start_usec = USEC_INFINITY;
 484
 485         } else if (proc_cmdline_key_streq(key, "systemd.cpu_affinity")) {
 486
 487                 if (proc_cmdline_value_missing(key, value))
 488                         return 0;
 489
 490                 r = parse_cpu_set(value, &arg_cpu_affinity);
 491                 if (r < 0)
 492                         log_warning_errno(r, "Failed to parse CPU affinity mask '%s', ignoring: %m", value);
 493
 494         } else if (proc_cmdline_key_streq(key, "systemd.watchdog_device")) {
 495
 496                 if (proc_cmdline_value_missing(key, value))
 497                         return 0;
 498
 499                 (void) parse_path_argument(value, false, &arg_watchdog_device);
 500
 501         } else if (proc_cmdline_key_streq(key, "systemd.clock_usec")) {
 502
 503                 if (proc_cmdline_value_missing(key, value))
 504                         return 0;
 505
 506                 r = safe_atou64(value, &arg_clock_usec);
 507                 if (r < 0)
 508                         log_warning_errno(r, "Failed to parse systemd.clock_usec= argument, ignoring: %s", value);
 509
 510         } else if (proc_cmdline_key_streq(key, "systemd.random_seed")) {
 511                 void *p;
 512                 size_t sz;
 513
 514                 if (proc_cmdline_value_missing(key, value))
 515                         return 0;
 516
 517                 r = unbase64mem(value, (size_t) -1, &p, &sz);
 518                 if (r < 0)
 519                         log_warning_errno(r, "Failed to parse systemd.random_seed= argument, ignoring: %s", value);
 520
 521                 free(arg_random_seed);
 522                 arg_random_seed = sz > 0 ? p : mfree(p);
 523                 arg_random_seed_size = sz;
 524
 525         } else if (streq(key, "quiet") && !value) {
 526
 527                 if (arg_show_status == _SHOW_STATUS_INVALID)
 528                         arg_show_status = SHOW_STATUS_ERROR;
 529
 530         } else if (streq(key, "debug") && !value) {
 531
 532                 /* Note that log_parse_environment() handles 'debug'
 533                  * too, and sets the log level to LOG_DEBUG. */
 534
 535                 if (detect_container() > 0)
 536                         log_set_target(LOG_TARGET_CONSOLE);
 537
 538         } else if (!value) {
 539                 const char *target;
 540
 541                 /* Compatible with SysV, but supported independently even if SysV compatibility is disabled. */
 542                 target = runlevel_to_target(key);
 543                 if (target)
 544                         return free_and_strdup_warn(&arg_default_unit, target);
 545         }
 546
 547         return 0;
 548 }
 549
 550 #define DEFINE_SETTER(name, func, descr)                              \
 551         static int name(const char *unit,                             \
 552                         const char *filename,                         \
 553                         unsigned line,                                \
 554                         const char *section,                          \
 555                         unsigned section_line,                        \
 556                         const char *lvalue,                           \
 557                         int ltype,                                    \
 558                         const char *rvalue,                           \
 559                         void *data,                                   \
 560                         void *userdata) {                             \
 561                                                                       \
 562                 int r;                                                \
 563                                                                       \
 564                 assert(filename);                                     \
 565                 assert(lvalue);                                       \
 566                 assert(rvalue);                                       \
 567                                                                       \
 568                 r = func(rvalue);                                     \
 569                 if (r < 0)                                            \
 570                         log_syntax(unit, LOG_ERR, filename, line, r,  \
 571                                    "Invalid " descr "'%s': %m",       \
 572                                    rvalue);                           \
 573                                                                       \
 574                 return 0;                                             \
 575         }
 576
 577 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level");
 578 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target");
 579 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color");
 580 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location");
 581 DEFINE_SETTER(config_parse_time, log_show_time_from_string, "time");
 582
 583 static int config_parse_default_timeout_abort(
 584                 const char *unit,
 585                 const char *filename,
 586                 unsigned line,
 587                 const char *section,
 588                 unsigned section_line,
 589                 const char *lvalue,
 590                 int ltype,
 591                 const char *rvalue,
 592                 void *data,
 593                 void *userdata) {
 594         int r;
 595
 596         r = config_parse_timeout_abort(unit, filename, line, section, section_line, lvalue, ltype, rvalue,
 597                                        &arg_default_timeout_abort_usec, userdata);
 598         if (r >= 0)
 599                 arg_default_timeout_abort_set = r;
 600         return 0;
 601 }
 602
 603 static int parse_config_file(void) {
 604         const ConfigTableItem items[] = {
 605                 { "Manager", "LogLevel",                     config_parse_level2,                0, NULL                                   },
 606                 { "Manager", "LogTarget",                    config_parse_target,                0, NULL                                   },
 607                 { "Manager", "LogColor",                     config_parse_color,                 0, NULL                                   },
 608                 { "Manager", "LogLocation",                  config_parse_location,              0, NULL                                   },
 609                 { "Manager", "LogTime",                      config_parse_time,                  0, NULL                                   },
 610                 { "Manager", "DumpCore",                     config_parse_bool,                  0, &arg_dump_core                         },
 611                 { "Manager", "CrashChVT", /* legacy */       config_parse_crash_chvt,            0, &arg_crash_chvt                        },
 612                 { "Manager", "CrashChangeVT",                config_parse_crash_chvt,            0, &arg_crash_chvt                        },
 613                 { "Manager", "CrashShell",                   config_parse_bool,                  0, &arg_crash_shell                       },
 614                 { "Manager", "CrashReboot",                  config_parse_bool,                  0, &arg_crash_reboot                      },
 615                 { "Manager", "ShowStatus",                   config_parse_show_status,           0, &arg_show_status                       },
 616                 { "Manager", "StatusUnitFormat",             config_parse_status_unit_format,    0, &arg_status_unit_format                },
 617                 { "Manager", "CPUAffinity",                  config_parse_cpu_affinity2,         0, &arg_cpu_affinity                      },
 618                 { "Manager", "NUMAPolicy",                   config_parse_numa_policy,           0, &arg_numa_policy.type                  },
 619                 { "Manager", "NUMAMask",                     config_parse_numa_mask,             0, &arg_numa_policy                       },
 620                 { "Manager", "JoinControllers",              config_parse_warn_compat,           DISABLED_CONFIGURATION, NULL              },
 621                 { "Manager", "RuntimeWatchdogSec",           config_parse_sec,                   0, &arg_runtime_watchdog                  },
 622                 { "Manager", "RebootWatchdogSec",            config_parse_sec,                   0, &arg_reboot_watchdog                   },
 623                 { "Manager", "ShutdownWatchdogSec",          config_parse_sec,                   0, &arg_reboot_watchdog                   }, /* obsolete alias */
 624                 { "Manager", "KExecWatchdogSec",             config_parse_sec,                   0, &arg_kexec_watchdog                    },
 625                 { "Manager", "WatchdogDevice",               config_parse_path,                  0, &arg_watchdog_device                   },
 626                 { "Manager", "CapabilityBoundingSet",        config_parse_capability_set,        0, &arg_capability_bounding_set           },
 627                 { "Manager", "NoNewPrivileges",              config_parse_bool,                  0, &arg_no_new_privs                      },
 628 #if HAVE_SECCOMP
 629                 { "Manager", "SystemCallArchitectures",      config_parse_syscall_archs,         0, &arg_syscall_archs                     },
 630 #endif
 631                 { "Manager", "TimerSlackNSec",               config_parse_nsec,                  0, &arg_timer_slack_nsec                  },
 632                 { "Manager", "DefaultTimerAccuracySec",      config_parse_sec,                   0, &arg_default_timer_accuracy_usec       },
 633                 { "Manager", "DefaultStandardOutput",        config_parse_output_restricted,     0, &arg_default_std_output                },
 634                 { "Manager", "DefaultStandardError",         config_parse_output_restricted,     0, &arg_default_std_error                 },
 635                 { "Manager", "DefaultTimeoutStartSec",       config_parse_sec,                   0, &arg_default_timeout_start_usec        },
 636                 { "Manager", "DefaultTimeoutStopSec",        config_parse_sec,                   0, &arg_default_timeout_stop_usec         },
 637                 { "Manager", "DefaultTimeoutAbortSec",       config_parse_default_timeout_abort, 0, NULL         },
 638                 { "Manager", "DefaultRestartSec",            config_parse_sec,                   0, &arg_default_restart_usec              },
 639                 { "Manager", "DefaultStartLimitInterval",    config_parse_sec,                   0, &arg_default_start_limit_interval      }, /* obsolete alias */
 640                 { "Manager", "DefaultStartLimitIntervalSec", config_parse_sec,                   0, &arg_default_start_limit_interval      },
 641                 { "Manager", "DefaultStartLimitBurst",       config_parse_unsigned,              0, &arg_default_start_limit_burst         },
 642                 { "Manager", "DefaultEnvironment",           config_parse_environ,               0, &arg_default_environment               },
 643                 { "Manager", "DefaultLimitCPU",              config_parse_rlimit,                RLIMIT_CPU, arg_default_rlimit            },
 644                 { "Manager", "DefaultLimitFSIZE",            config_parse_rlimit,                RLIMIT_FSIZE, arg_default_rlimit          },
 645                 { "Manager", "DefaultLimitDATA",             config_parse_rlimit,                RLIMIT_DATA, arg_default_rlimit           },
 646                 { "Manager", "DefaultLimitSTACK",            config_parse_rlimit,                RLIMIT_STACK, arg_default_rlimit          },
 647                 { "Manager", "DefaultLimitCORE",             config_parse_rlimit,                RLIMIT_CORE, arg_default_rlimit           },
 648                 { "Manager", "DefaultLimitRSS",              config_parse_rlimit,                RLIMIT_RSS, arg_default_rlimit            },
 649                 { "Manager", "DefaultLimitNOFILE",           config_parse_rlimit,                RLIMIT_NOFILE, arg_default_rlimit         },
 650                 { "Manager", "DefaultLimitAS",               config_parse_rlimit,                RLIMIT_AS, arg_default_rlimit             },
 651                 { "Manager", "DefaultLimitNPROC",            config_parse_rlimit,                RLIMIT_NPROC, arg_default_rlimit          },
 652                 { "Manager", "DefaultLimitMEMLOCK",          config_parse_rlimit,                RLIMIT_MEMLOCK, arg_default_rlimit        },
 653                 { "Manager", "DefaultLimitLOCKS",            config_parse_rlimit,                RLIMIT_LOCKS, arg_default_rlimit          },
 654                 { "Manager", "DefaultLimitSIGPENDING",       config_parse_rlimit,                RLIMIT_SIGPENDING, arg_default_rlimit     },
 655                 { "Manager", "DefaultLimitMSGQUEUE",         config_parse_rlimit,                RLIMIT_MSGQUEUE, arg_default_rlimit       },
 656                 { "Manager", "DefaultLimitNICE",             config_parse_rlimit,                RLIMIT_NICE, arg_default_rlimit           },
 657                 { "Manager", "DefaultLimitRTPRIO",           config_parse_rlimit,                RLIMIT_RTPRIO, arg_default_rlimit         },
 658                 { "Manager", "DefaultLimitRTTIME",           config_parse_rlimit,                RLIMIT_RTTIME, arg_default_rlimit         },
 659                 { "Manager", "DefaultCPUAccounting",         config_parse_tristate,              0, &arg_default_cpu_accounting            },
 660                 { "Manager", "DefaultIOAccounting",          config_parse_bool,                  0, &arg_default_io_accounting             },
 661                 { "Manager", "DefaultIPAccounting",          config_parse_bool,                  0, &arg_default_ip_accounting             },
 662                 { "Manager", "DefaultBlockIOAccounting",     config_parse_bool,                  0, &arg_default_blockio_accounting        },
 663                 { "Manager", "DefaultMemoryAccounting",      config_parse_bool,                  0, &arg_default_memory_accounting         },
 664                 { "Manager", "DefaultTasksAccounting",       config_parse_bool,                  0, &arg_default_tasks_accounting          },
 665                 { "Manager", "DefaultTasksMax",              config_parse_tasks_max,             0, &arg_default_tasks_max                 },
 666                 { "Manager", "CtrlAltDelBurstAction",        config_parse_emergency_action,      0, &arg_cad_burst_action                  },
 667                 { "Manager", "DefaultOOMPolicy",             config_parse_oom_policy,            0, &arg_default_oom_policy                },
 668                 {}
 669         };
 670
 671         const char *fn, *conf_dirs_nulstr;
 672
 673         fn = arg_system ?
 674                 PKGSYSCONFDIR "/system.conf" :
 675                 PKGSYSCONFDIR "/user.conf";
 676
 677         conf_dirs_nulstr = arg_system ?
 678                 CONF_PATHS_NULSTR("systemd/system.conf.d") :
 679                 CONF_PATHS_NULSTR("systemd/user.conf.d");
 680
 681         (void) config_parse_many_nulstr(
 682                         fn, conf_dirs_nulstr,
 683                         "Manager\0",
 684                         config_item_table_lookup, items,
 685                         CONFIG_PARSE_WARN,
 686                         NULL,
 687                         NULL);
 688
 689         /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY
 690          * like everywhere else. */
 691         if (arg_default_timeout_start_usec <= 0)
 692                 arg_default_timeout_start_usec = USEC_INFINITY;
 693         if (arg_default_timeout_stop_usec <= 0)
 694                 arg_default_timeout_stop_usec = USEC_INFINITY;
 695
 696         return 0;
 697 }
 698
 699 static void set_manager_defaults(Manager *m) {
 700
 701         assert(m);
 702
 703         /* Propagates the various default unit property settings into the manager object, i.e. properties that do not
 704          * affect the manager itself, but are just what newly allocated units will have set if they haven't set
 705          * anything else. (Also see set_manager_settings() for the settings that affect the manager's own behaviour) */
 706
 707         m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec;
 708         m->default_std_output = arg_default_std_output;
 709         m->default_std_error = arg_default_std_error;
 710         m->default_timeout_start_usec = arg_default_timeout_start_usec;
 711         m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
 712         m->default_timeout_abort_usec = arg_default_timeout_abort_usec;
 713         m->default_timeout_abort_set = arg_default_timeout_abort_set;
 714         m->default_restart_usec = arg_default_restart_usec;
 715         m->default_start_limit_interval = arg_default_start_limit_interval;
 716         m->default_start_limit_burst = arg_default_start_limit_burst;
 717
 718         /* On 4.15+ with unified hierarchy, CPU accounting is essentially free as it doesn't require the CPU
 719          * controller to be enabled, so the default is to enable it unless we got told otherwise. */
 720         if (arg_default_cpu_accounting >= 0)
 721                 m->default_cpu_accounting = arg_default_cpu_accounting;
 722         else
 723                 m->default_cpu_accounting = cpu_accounting_is_cheap();
 724
 725         m->default_io_accounting = arg_default_io_accounting;
 726         m->default_ip_accounting = arg_default_ip_accounting;
 727         m->default_blockio_accounting = arg_default_blockio_accounting;
 728         m->default_memory_accounting = arg_default_memory_accounting;
 729         m->default_tasks_accounting = arg_default_tasks_accounting;
 730         m->default_tasks_max = arg_default_tasks_max;
 731         m->default_oom_policy = arg_default_oom_policy;
 732
 733         (void) manager_set_default_rlimits(m, arg_default_rlimit);
 734
 735         (void) manager_default_environment(m);
 736         (void) manager_transient_environment_add(m, arg_default_environment);
 737 }
 738
 739 static void set_manager_settings(Manager *m) {
 740
 741         assert(m);
 742
 743         /* Propagates the various manager settings into the manager object, i.e. properties that
 744          * effect the manager itself (as opposed to just being inherited into newly allocated
 745          * units, see set_manager_defaults() above). */
 746
 747         m->confirm_spawn = arg_confirm_spawn;
 748         m->service_watchdogs = arg_service_watchdogs;
 749         m->cad_burst_action = arg_cad_burst_action;
 750
 751         manager_set_watchdog(m, WATCHDOG_RUNTIME, arg_runtime_watchdog);
 752         manager_set_watchdog(m, WATCHDOG_REBOOT, arg_reboot_watchdog);
 753         manager_set_watchdog(m, WATCHDOG_KEXEC, arg_kexec_watchdog);
 754
 755         manager_set_show_status(m, arg_show_status, "commandline");
 756         m->status_unit_format = arg_status_unit_format;
 757 }
 758
 759 static int parse_argv(int argc, char *argv[]) {
 760         enum {
 761                 ARG_LOG_LEVEL = 0x100,
 762                 ARG_LOG_TARGET,
 763                 ARG_LOG_COLOR,
 764                 ARG_LOG_LOCATION,
 765                 ARG_LOG_TIME,
 766                 ARG_UNIT,
 767                 ARG_SYSTEM,
 768                 ARG_USER,
 769                 ARG_TEST,
 770                 ARG_NO_PAGER,
 771                 ARG_VERSION,
 772                 ARG_DUMP_CONFIGURATION_ITEMS,
 773                 ARG_DUMP_BUS_PROPERTIES,
 774                 ARG_BUS_INTROSPECT,
 775                 ARG_DUMP_CORE,
 776                 ARG_CRASH_CHVT,
 777                 ARG_CRASH_SHELL,
 778                 ARG_CRASH_REBOOT,
 779                 ARG_CONFIRM_SPAWN,
 780                 ARG_SHOW_STATUS,
 781                 ARG_DESERIALIZE,
 782                 ARG_SWITCHED_ROOT,
 783                 ARG_DEFAULT_STD_OUTPUT,
 784                 ARG_DEFAULT_STD_ERROR,
 785                 ARG_MACHINE_ID,
 786                 ARG_SERVICE_WATCHDOGS,
 787         };
 788
 789         static const struct option options[] = {
 790                 { "log-level",                required_argument, NULL, ARG_LOG_LEVEL                },
 791                 { "log-target",               required_argument, NULL, ARG_LOG_TARGET               },
 792                 { "log-color",                optional_argument, NULL, ARG_LOG_COLOR                },
 793                 { "log-location",             optional_argument, NULL, ARG_LOG_LOCATION             },
 794                 { "log-time",                 optional_argument, NULL, ARG_LOG_TIME                 },
 795                 { "unit",                     required_argument, NULL, ARG_UNIT                     },
 796                 { "system",                   no_argument,       NULL, ARG_SYSTEM                   },
 797                 { "user",                     no_argument,       NULL, ARG_USER                     },
 798                 { "test",                     no_argument,       NULL, ARG_TEST                     },
 799                 { "no-pager",                 no_argument,       NULL, ARG_NO_PAGER                 },
 800                 { "help",                     no_argument,       NULL, 'h'                          },
 801                 { "version",                  no_argument,       NULL, ARG_VERSION                  },
 802                 { "dump-configuration-items", no_argument,       NULL, ARG_DUMP_CONFIGURATION_ITEMS },
 803                 { "dump-bus-properties",      no_argument,       NULL, ARG_DUMP_BUS_PROPERTIES      },
 804                 { "bus-introspect",           required_argument, NULL, ARG_BUS_INTROSPECT           },
 805                 { "dump-core",                optional_argument, NULL, ARG_DUMP_CORE                },
 806                 { "crash-chvt",               required_argument, NULL, ARG_CRASH_CHVT               },
 807                 { "crash-shell",              optional_argument, NULL, ARG_CRASH_SHELL              },
 808                 { "crash-reboot",             optional_argument, NULL, ARG_CRASH_REBOOT             },
 809                 { "confirm-spawn",            optional_argument, NULL, ARG_CONFIRM_SPAWN            },
 810                 { "show-status",              optional_argument, NULL, ARG_SHOW_STATUS              },
 811                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
 812                 { "switched-root",            no_argument,       NULL, ARG_SWITCHED_ROOT            },
 813                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
 814                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
 815                 { "machine-id",               required_argument, NULL, ARG_MACHINE_ID               },
 816                 { "service-watchdogs",        required_argument, NULL, ARG_SERVICE_WATCHDOGS        },
 817                 {}
 818         };
 819
 820         int c, r;
 821         bool user_arg_seen = false;
 822
 823         assert(argc >= 1);
 824         assert(argv);
 825
 826         if (getpid_cached() == 1)
 827                 opterr = 0;
 828
 829         while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
 830
 831                 switch (c) {
 832
 833                 case ARG_LOG_LEVEL:
 834                         r = log_set_max_level_from_string(optarg);
 835                         if (r < 0)
 836                                 return log_error_errno(r, "Failed to parse log level \"%s\": %m", optarg);
 837
 838                         break;
 839
 840                 case ARG_LOG_TARGET:
 841                         r = log_set_target_from_string(optarg);
 842                         if (r < 0)
 843                                 return log_error_errno(r, "Failed to parse log target \"%s\": %m", optarg);
 844
 845                         break;
 846
 847                 case ARG_LOG_COLOR:
 848
 849                         if (optarg) {
 850                                 r = log_show_color_from_string(optarg);
 851                                 if (r < 0)
 852                                         return log_error_errno(r, "Failed to parse log color setting \"%s\": %m",
 853                                                                optarg);
 854                         } else
 855                                 log_show_color(true);
 856
 857                         break;
 858
 859                 case ARG_LOG_LOCATION:
 860                         if (optarg) {
 861                                 r = log_show_location_from_string(optarg);
 862                                 if (r < 0)
 863                                         return log_error_errno(r, "Failed to parse log location setting \"%s\": %m",
 864                                                                optarg);
 865                         } else
 866                                 log_show_location(true);
 867
 868                         break;
 869
 870                 case ARG_LOG_TIME:
 871
 872                         if (optarg) {
 873                                 r = log_show_time_from_string(optarg);
 874                                 if (r < 0)
 875                                         return log_error_errno(r, "Failed to parse log time setting \"%s\": %m",
 876                                                                optarg);
 877                         } else
 878                                 log_show_time(true);
 879
 880                         break;
 881
 882                 case ARG_DEFAULT_STD_OUTPUT:
 883                         r = exec_output_from_string(optarg);
 884                         if (r < 0)
 885                                 return log_error_errno(r, "Failed to parse default standard output setting \"%s\": %m",
 886                                                        optarg);
 887                         arg_default_std_output = r;
 888                         break;
 889
 890                 case ARG_DEFAULT_STD_ERROR:
 891                         r = exec_output_from_string(optarg);
 892                         if (r < 0)
 893                                 return log_error_errno(r, "Failed to parse default standard error output setting \"%s\": %m",
 894                                                        optarg);
 895                         arg_default_std_error = r;
 896                         break;
 897
 898                 case ARG_UNIT:
 899                         r = free_and_strdup(&arg_default_unit, optarg);
 900                         if (r < 0)
 901                                 return log_error_errno(r, "Failed to set default unit \"%s\": %m", optarg);
 902
 903                         break;
 904
 905                 case ARG_SYSTEM:
 906                         arg_system = true;
 907                         break;
 908
 909                 case ARG_USER:
 910                         arg_system = false;
 911                         user_arg_seen = true;
 912                         break;
 913
 914                 case ARG_TEST:
 915                         arg_action = ACTION_TEST;
 916                         break;
 917
 918                 case ARG_NO_PAGER:
 919                         arg_pager_flags |= PAGER_DISABLE;
 920                         break;
 921
 922                 case ARG_VERSION:
 923                         arg_action = ACTION_VERSION;
 924                         break;
 925
 926                 case ARG_DUMP_CONFIGURATION_ITEMS:
 927                         arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
 928                         break;
 929
 930                 case ARG_DUMP_BUS_PROPERTIES:
 931                         arg_action = ACTION_DUMP_BUS_PROPERTIES;
 932                         break;
 933
 934                 case ARG_BUS_INTROSPECT:
 935                         arg_bus_introspect = optarg;
 936                         arg_action = ACTION_BUS_INTROSPECT;
 937                         break;
 938
 939                 case ARG_DUMP_CORE:
 940                         if (!optarg)
 941                                 arg_dump_core = true;
 942                         else {
 943                                 r = parse_boolean(optarg);
 944                                 if (r < 0)
 945                                         return log_error_errno(r, "Failed to parse dump core boolean: \"%s\": %m",
 946                                                                optarg);
 947                                 arg_dump_core = r;
 948                         }
 949                         break;
 950
 951                 case ARG_CRASH_CHVT:
 952                         r = parse_crash_chvt(optarg, &arg_crash_chvt);
 953                         if (r < 0)
 954                                 return log_error_errno(r, "Failed to parse crash virtual terminal index: \"%s\": %m",
 955                                                        optarg);
 956                         break;
 957
 958                 case ARG_CRASH_SHELL:
 959                         if (!optarg)
 960                                 arg_crash_shell = true;
 961                         else {
 962                                 r = parse_boolean(optarg);
 963                                 if (r < 0)
 964                                         return log_error_errno(r, "Failed to parse crash shell boolean: \"%s\": %m",
 965                                                                optarg);
 966                                 arg_crash_shell = r;
 967                         }
 968                         break;
 969
 970                 case ARG_CRASH_REBOOT:
 971                         if (!optarg)
 972                                 arg_crash_reboot = true;
 973                         else {
 974                                 r = parse_boolean(optarg);
 975                                 if (r < 0)
 976                                         return log_error_errno(r, "Failed to parse crash shell boolean: \"%s\": %m",
 977                                                                optarg);
 978                                 arg_crash_reboot = r;
 979                         }
 980                         break;
 981
 982                 case ARG_CONFIRM_SPAWN:
 983                         arg_confirm_spawn = mfree(arg_confirm_spawn);
 984
 985                         r = parse_confirm_spawn(optarg, &arg_confirm_spawn);
 986                         if (r < 0)
 987                                 return log_error_errno(r, "Failed to parse confirm spawn option: \"%s\": %m",
 988                                                        optarg);
 989                         break;
 990
 991                 case ARG_SERVICE_WATCHDOGS:
 992                         r = parse_boolean(optarg);
 993                         if (r < 0)
 994                                 return log_error_errno(r, "Failed to parse service watchdogs boolean: \"%s\": %m",
 995                                                        optarg);
 996                         arg_service_watchdogs = r;
 997                         break;
 998
 999                 case ARG_SHOW_STATUS:
1000                         if (optarg) {
1001                                 r = parse_show_status(optarg, &arg_show_status);
1002                                 if (r < 0)
1003                                         return log_error_errno(r, "Failed to parse show status boolean: \"%s\": %m",
1004                                                                optarg);
1005                         } else
1006                                 arg_show_status = SHOW_STATUS_YES;
1007                         break;
1008
1009                 case ARG_DESERIALIZE: {
1010                         int fd;
1011                         FILE *f;
1012
1013                         r = safe_atoi(optarg, &fd);
1014                         if (r < 0)
1015                                 log_error_errno(r, "Failed to parse deserialize option \"%s\": %m", optarg);
1016                         if (fd < 0)
1017                                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1018                                                        "Invalid deserialize fd: %d",
1019                                                        fd);
1020
1021                         (void) fd_cloexec(fd, true);
1022
1023                         f = fdopen(fd, "r");
1024                         if (!f)
1025                                 return log_error_errno(errno, "Failed to open serialization fd %d: %m", fd);
1026
1027                         safe_fclose(arg_serialization);
1028                         arg_serialization = f;
1029
1030                         break;
1031                 }
1032
1033                 case ARG_SWITCHED_ROOT:
1034                         arg_switched_root = true;
1035                         break;
1036
1037                 case ARG_MACHINE_ID:
1038                         r = set_machine_id(optarg);
1039                         if (r < 0)
1040                                 return log_error_errno(r, "MachineID '%s' is not valid: %m", optarg);
1041                         break;
1042
1043                 case 'h':
1044                         arg_action = ACTION_HELP;
1045                         break;
1046
1047                 case 'D':
1048                         log_set_max_level(LOG_DEBUG);
1049                         break;
1050
1051                 case 'b':
1052                 case 's':
1053                 case 'z':
1054                         /* Just to eat away the sysvinit kernel cmdline args that we'll parse in
1055                          * parse_proc_cmdline_item() or ignore, without any getopt() error messages.
1056                          */
1057                 case '?':
1058                         if (getpid_cached() != 1)
1059                                 return -EINVAL;
1060                         else
1061                                 return 0;
1062
1063                 default:
1064                         assert_not_reached("Unhandled option code.");
1065                 }
1066
1067         if (optind < argc && getpid_cached() != 1)
1068                 /* Hmm, when we aren't run as init system let's complain about excess arguments */
1069                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Excess arguments.");
1070
1071         if (arg_action == ACTION_RUN && !arg_system && !user_arg_seen)
1072                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1073                                        "Explicit --user argument required to run as user manager.");
1074
1075         return 0;
1076 }
1077
1078 static int help(void) {
1079         _cleanup_free_ char *link = NULL;
1080         int r;
1081
1082         r = terminal_urlify_man("systemd", "1", &link);
1083         if (r < 0)
1084                 return log_oom();
1085
1086         printf("%s [OPTIONS...]\n\n"
1087                "%sStarts and monitors system and user services.%s\n\n"
1088                "This program takes no positional arguments.\n\n"
1089                "%sOptions%s:\n"
1090                "  -h --help                      Show this help\n"
1091                "     --version                   Show version\n"
1092                "     --test                      Determine initial transaction, dump it and exit\n"
1093                "     --system                    In combination with --test: operate as system service manager\n"
1094                "     --user                      In combination with --test: operate as per-user service manager\n"
1095                "     --no-pager                  Do not pipe output into a pager\n"
1096                "     --dump-configuration-items  Dump understood unit configuration items\n"
1097                "     --dump-bus-properties       Dump exposed bus properties\n"
1098                "     --bus-introspect=PATH       Write XML introspection data\n"
1099                "     --unit=UNIT                 Set default unit\n"
1100                "     --dump-core[=BOOL]          Dump core on crash\n"
1101                "     --crash-vt=NR               Change to specified VT on crash\n"
1102                "     --crash-reboot[=BOOL]       Reboot on crash\n"
1103                "     --crash-shell[=BOOL]        Run shell on crash\n"
1104                "     --confirm-spawn[=BOOL]      Ask for confirmation when spawning processes\n"
1105                "     --show-status[=BOOL]        Show status updates on the console during bootup\n"
1106                "     --log-target=TARGET         Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
1107                "     --log-level=LEVEL           Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1108                "     --log-color[=BOOL]          Highlight important log messages\n"
1109                "     --log-location[=BOOL]       Include code location in log messages\n"
1110                "     --log-time[=BOOL]           Prefix log messages with current time\n"
1111                "     --default-standard-output=  Set default standard output for services\n"
1112                "     --default-standard-error=   Set default standard error output for services\n"
1113                "\nSee the %s for details.\n",
1114                program_invocation_short_name,
1115                ansi_highlight(),
1116                ansi_normal(),
1117                ansi_underline(),
1118                ansi_normal(),
1119                link);
1120
1121         return 0;
1122 }
1123
1124 static int prepare_reexecute(
1125                 Manager *m,
1126                 FILE **ret_f,
1127                 FDSet **ret_fds,
1128                 bool switching_root) {
1129
1130         _cleanup_fdset_free_ FDSet *fds = NULL;
1131         _cleanup_fclose_ FILE *f = NULL;
1132         int r;
1133
1134         assert(m);
1135         assert(ret_f);
1136         assert(ret_fds);
1137
1138         r = manager_open_serialization(m, &f);
1139         if (r < 0)
1140                 return log_error_errno(r, "Failed to create serialization file: %m");
1141
1142         /* Make sure nothing is really destructed when we shut down */
1143         m->n_reloading++;
1144         bus_manager_send_reloading(m, true);
1145
1146         fds = fdset_new();
1147         if (!fds)
1148                 return log_oom();
1149
1150         r = manager_serialize(m, f, fds, switching_root);
1151         if (r < 0)
1152                 return r;
1153
1154         if (fseeko(f, 0, SEEK_SET) == (off_t) -1)
1155                 return log_error_errno(errno, "Failed to rewind serialization fd: %m");
1156
1157         r = fd_cloexec(fileno(f), false);
1158         if (r < 0)
1159                 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
1160
1161         r = fdset_cloexec(fds, false);
1162         if (r < 0)
1163                 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
1164
1165         *ret_f = TAKE_PTR(f);
1166         *ret_fds = TAKE_PTR(fds);
1167
1168         return 0;
1169 }
1170
1171 static void bump_file_max_and_nr_open(void) {
1172
1173         /* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large numbers of file
1174          * descriptors are no longer a performance problem and their memory is properly tracked by memcg, thus counting
1175          * them and limiting them in another two layers of limits is unnecessary and just complicates things. This
1176          * function hence turns off 2 of the 4 levels of limits on file descriptors, and makes RLIMIT_NOLIMIT (soft +
1177          * hard) the only ones that really matter. */
1178
1179 #if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN
1180         int r;
1181 #endif
1182
1183 #if BUMP_PROC_SYS_FS_FILE_MAX
1184         /* The maximum the kernel allows for this since 5.2 is LONG_MAX, use that. (Previously thing where
1185          * different but the operation would fail silently.) */
1186         r = sysctl_writef("fs/file-max", "%li\n", LONG_MAX);
1187         if (r < 0)
1188                 log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.file-max, ignoring: %m");
1189 #endif
1190
1191 #if BUMP_PROC_SYS_FS_NR_OPEN
1192         int v = INT_MAX;
1193
1194         /* Arg! The kernel enforces maximum and minimum values on the fs.nr_open, but we don't really know what they
1195          * are. The expression by which the maximum is determined is dependent on the architecture, and is something we
1196          * don't really want to copy to userspace, as it is dependent on implementation details of the kernel. Since
1197          * the kernel doesn't expose the maximum value to us, we can only try and hope. Hence, let's start with
1198          * INT_MAX, and then keep halving the value until we find one that works. Ugly? Yes, absolutely, but kernel
1199          * APIs are kernel APIs, so what do can we do... 🤯 */
1200
1201         for (;;) {
1202                 int k;
1203
1204                 v &= ~(__SIZEOF_POINTER__ - 1); /* Round down to next multiple of the pointer size */
1205                 if (v < 1024) {
1206                         log_warning("Can't bump fs.nr_open, value too small.");
1207                         break;
1208                 }
1209
1210                 k = read_nr_open();
1211                 if (k < 0) {
1212                         log_error_errno(k, "Failed to read fs.nr_open: %m");
1213                         break;
1214                 }
1215                 if (k >= v) { /* Already larger */
1216                         log_debug("Skipping bump, value is already larger.");
1217                         break;
1218                 }
1219
1220                 r = sysctl_writef("fs/nr_open", "%i\n", v);
1221                 if (r == -EINVAL) {
1222                         log_debug("Couldn't write fs.nr_open as %i, halving it.", v);
1223                         v /= 2;
1224                         continue;
1225                 }
1226                 if (r < 0) {
1227                         log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.nr_open, ignoring: %m");
1228                         break;
1229                 }
1230
1231                 log_debug("Successfully bumped fs.nr_open to %i", v);
1232                 break;
1233         }
1234 #endif
1235 }
1236
1237 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1238         struct rlimit new_rlimit;
1239         int r, nr;
1240
1241         /* Get the underlying absolute limit the kernel enforces */
1242         nr = read_nr_open();
1243
1244         /* Calculate the new limits to use for us. Never lower from what we inherited. */
1245         new_rlimit = (struct rlimit) {
1246                 .rlim_cur = MAX((rlim_t) nr, saved_rlimit->rlim_cur),
1247                 .rlim_max = MAX((rlim_t) nr, saved_rlimit->rlim_max),
1248         };
1249
1250         /* Shortcut if nothing changes. */
1251         if (saved_rlimit->rlim_max >= new_rlimit.rlim_max &&
1252             saved_rlimit->rlim_cur >= new_rlimit.rlim_cur) {
1253                 log_debug("RLIMIT_NOFILE is already as high or higher than we need it, not bumping.");
1254                 return 0;
1255         }
1256
1257         /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows, for
1258          * both hard and soft. */
1259         r = setrlimit_closest(RLIMIT_NOFILE, &new_rlimit);
1260         if (r < 0)
1261                 return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
1262
1263         return 0;
1264 }
1265
1266 static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
1267         struct rlimit new_rlimit;
1268         uint64_t mm;
1269         int r;
1270
1271         /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even if we have CAP_IPC_LOCK which should
1272          * normally disable such checks. We need them to implement IPAddressAllow= and IPAddressDeny=, hence let's bump
1273          * the value high enough for our user. */
1274
1275         /* Using MAX() on resource limits only is safe if RLIM_INFINITY is > 0. POSIX declares that rlim_t
1276          * must be unsigned, hence this is a given, but let's make this clear here. */
1277         assert_cc(RLIM_INFINITY > 0);
1278
1279         mm = physical_memory() / 8; /* Let's scale how much we allow to be locked by the amount of physical
1280                                      * RAM. We allow an eighth to be locked by us, just to pick a value. */
1281
1282         new_rlimit = (struct rlimit) {
1283                 .rlim_cur = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_cur, mm),
1284                 .rlim_max = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_max, mm),
1285         };
1286
1287         if (saved_rlimit->rlim_max >= new_rlimit.rlim_cur &&
1288             saved_rlimit->rlim_cur >= new_rlimit.rlim_max) {
1289                 log_debug("RLIMIT_MEMLOCK is already as high or higher than we need it, not bumping.");
1290                 return 0;
1291         }
1292
1293         r = setrlimit_closest(RLIMIT_MEMLOCK, &new_rlimit);
1294         if (r < 0)
1295                 return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
1296
1297         return 0;
1298 }
1299
1300 static void test_usr(void) {
1301
1302         /* Check that /usr is either on the same file system as / or mounted already. */
1303
1304         if (dir_is_empty("/usr") <= 0)
1305                 return;
1306
1307         log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
1308                     "Some things will probably break (sometimes even silently) in mysterious ways. "
1309                     "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1310 }
1311
1312 static int enforce_syscall_archs(Set *archs) {
1313 #if HAVE_SECCOMP
1314         int r;
1315
1316         if (!is_seccomp_available())
1317                 return 0;
1318
1319         r = seccomp_restrict_archs(arg_syscall_archs);
1320         if (r < 0)
1321                 return log_error_errno(r, "Failed to enforce system call architecture restrication: %m");
1322 #endif
1323         return 0;
1324 }
1325
1326 static int status_welcome(void) {
1327         _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
1328         int r;
1329
1330         if (!show_status_on(arg_show_status))
1331                 return 0;
1332
1333         r = parse_os_release(NULL,
1334                              "PRETTY_NAME", &pretty_name,
1335                              "ANSI_COLOR", &ansi_color,
1336                              NULL);
1337         if (r < 0)
1338                 log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
1339                                "Failed to read os-release file, ignoring: %m");
1340
1341         if (log_get_show_color())
1342                 return status_printf(NULL, 0,
1343                                      "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
1344                                      isempty(ansi_color) ? "1" : ansi_color,
1345                                      isempty(pretty_name) ? "Linux" : pretty_name);
1346         else
1347                 return status_printf(NULL, 0,
1348                                      "\nWelcome to %s!\n",
1349                                      isempty(pretty_name) ? "Linux" : pretty_name);
1350 }
1351
1352 static int write_container_id(void) {
1353         const char *c;
1354         int r;
1355
1356         c = getenv("container");
1357         if (isempty(c))
1358                 return 0;
1359
1360         RUN_WITH_UMASK(0022)
1361                 r = write_string_file("/run/systemd/container", c, WRITE_STRING_FILE_CREATE);
1362         if (r < 0)
1363                 return log_warning_errno(r, "Failed to write /run/systemd/container, ignoring: %m");
1364
1365         return 1;
1366 }
1367
1368 static int bump_unix_max_dgram_qlen(void) {
1369         _cleanup_free_ char *qlen = NULL;
1370         unsigned long v;
1371         int r;
1372
1373         /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel default of 16 is simply too low. We set the value
1374          * really really early during boot, so that it is actually applied to all our sockets, including the
1375          * $NOTIFY_SOCKET one. */
1376
1377         r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen);
1378         if (r < 0)
1379                 return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
1380
1381         r = safe_atolu(qlen, &v);
1382         if (r < 0)
1383                 return log_warning_errno(r, "Failed to parse AF_UNIX datagram queue length '%s', ignoring: %m", qlen);
1384
1385         if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN)
1386                 return 0;
1387
1388         r = write_string_filef("/proc/sys/net/unix/max_dgram_qlen", WRITE_STRING_FILE_DISABLE_BUFFER, "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN);
1389         if (r < 0)
1390                 return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
1391                                       "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
1392
1393         return 1;
1394 }
1395
1396 static int fixup_environment(void) {
1397         _cleanup_free_ char *term = NULL;
1398         const char *t;
1399         int r;
1400
1401         /* Only fix up the environment when we are started as PID 1 */
1402         if (getpid_cached() != 1)
1403                 return 0;
1404
1405         /* We expect the environment to be set correctly if run inside a container. */
1406         if (detect_container() > 0)
1407                 return 0;
1408
1409         /* When started as PID1, the kernel uses /dev/console for our stdios and uses TERM=linux whatever the backend
1410          * device used by the console. We try to make a better guess here since some consoles might not have support
1411          * for color mode for example.
1412          *
1413          * However if TERM was configured through the kernel command line then leave it alone. */
1414         r = proc_cmdline_get_key("TERM", 0, &term);
1415         if (r < 0)
1416                 return r;
1417
1418         t = term ?: default_term_for_tty("/dev/console");
1419
1420         if (setenv("TERM", t, 1) < 0)
1421                 return -errno;
1422
1423         /* The kernels sets HOME=/ for init. Let's undo this. */
1424         if (path_equal_ptr(getenv("HOME"), "/"))
1425                 assert_se(unsetenv("HOME") == 0);
1426
1427         return 0;
1428 }
1429
1430 static void redirect_telinit(int argc, char *argv[]) {
1431
1432         /* This is compatibility support for SysV, where calling init as a user is identical to telinit. */
1433
1434 #if HAVE_SYSV_COMPAT
1435         if (getpid_cached() == 1)
1436                 return;
1437
1438         if (!strstr(program_invocation_short_name, "init"))
1439                 return;
1440
1441         execv(SYSTEMCTL_BINARY_PATH, argv);
1442         log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1443         exit(EXIT_FAILURE);
1444 #endif
1445 }
1446
1447 static int become_shutdown(
1448                 const char *shutdown_verb,
1449                 int retval) {
1450
1451         char log_level[DECIMAL_STR_MAX(int) + 1],
1452                 exit_code[DECIMAL_STR_MAX(uint8_t) + 1],
1453                 timeout[DECIMAL_STR_MAX(usec_t) + 1];
1454
1455         const char* command_line[13] = {
1456                 SYSTEMD_SHUTDOWN_BINARY_PATH,
1457                 shutdown_verb,
1458                 "--timeout", timeout,
1459                 "--log-level", log_level,
1460                 "--log-target",
1461         };
1462
1463         _cleanup_strv_free_ char **env_block = NULL;
1464         size_t pos = 7;
1465         int r;
1466         usec_t watchdog_timer = 0;
1467
1468         assert(shutdown_verb);
1469         assert(!command_line[pos]);
1470         env_block = strv_copy(environ);
1471
1472         xsprintf(log_level, "%d", log_get_max_level());
1473         xsprintf(timeout, "%" PRI_USEC "us", arg_default_timeout_stop_usec);
1474
1475         switch (log_get_target()) {
1476
1477         case LOG_TARGET_KMSG:
1478         case LOG_TARGET_JOURNAL_OR_KMSG:
1479         case LOG_TARGET_SYSLOG_OR_KMSG:
1480                 command_line[pos++] = "kmsg";
1481                 break;
1482
1483         case LOG_TARGET_NULL:
1484                 command_line[pos++] = "null";
1485                 break;
1486
1487         case LOG_TARGET_CONSOLE:
1488         default:
1489                 command_line[pos++] = "console";
1490                 break;
1491         };
1492
1493         if (log_get_show_color())
1494                 command_line[pos++] = "--log-color";
1495
1496         if (log_get_show_location())
1497                 command_line[pos++] = "--log-location";
1498
1499         if (log_get_show_time())
1500                 command_line[pos++] = "--log-time";
1501
1502         if (streq(shutdown_verb, "exit")) {
1503                 command_line[pos++] = "--exit-code";
1504                 command_line[pos++] = exit_code;
1505                 xsprintf(exit_code, "%d", retval);
1506         }
1507
1508         assert(pos < ELEMENTSOF(command_line));
1509
1510         if (streq(shutdown_verb, "reboot"))
1511                 watchdog_timer = arg_reboot_watchdog;
1512         else if (streq(shutdown_verb, "kexec"))
1513                 watchdog_timer = arg_kexec_watchdog;
1514
1515         if (watchdog_timer > 0 && watchdog_timer != USEC_INFINITY) {
1516
1517                 char *e;
1518
1519                 /* If we reboot or kexec let's set the shutdown
1520                  * watchdog and tell the shutdown binary to
1521                  * repeatedly ping it */
1522                 r = watchdog_set_timeout(&watchdog_timer);
1523                 watchdog_close(r < 0);
1524
1525                 /* Tell the binary how often to ping, ignore failure */
1526                 if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, watchdog_timer) > 0)
1527                         (void) strv_consume(&env_block, e);
1528
1529                 if (arg_watchdog_device &&
1530                     asprintf(&e, "WATCHDOG_DEVICE=%s", arg_watchdog_device) > 0)
1531                         (void) strv_consume(&env_block, e);
1532         } else
1533                 watchdog_close(true);
1534
1535         /* Avoid the creation of new processes forked by the
1536          * kernel; at this point, we will not listen to the
1537          * signals anyway */
1538         if (detect_container() <= 0)
1539                 (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1540
1541         execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1542         return -errno;
1543 }
1544
1545 static void initialize_clock(void) {
1546         int r;
1547
1548         /* This is called very early on, before we parse the kernel command line or otherwise figure out why
1549          * we are running, but only once. */
1550
1551         if (clock_is_localtime(NULL) > 0) {
1552                 int min;
1553
1554                 /*
1555                  * The very first call of settimeofday() also does a time warp in the kernel.
1556                  *
1557                  * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to take care
1558                  * of maintaining the RTC and do all adjustments.  This matches the behavior of Windows, which leaves
1559                  * the RTC alone if the registry tells that the RTC runs in UTC.
1560                  */
1561                 r = clock_set_timezone(&min);
1562                 if (r < 0)
1563                         log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
1564                 else
1565                         log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1566
1567         } else if (!in_initrd())
1568                 /*
1569                  * Do a dummy very first call to seal the kernel's time warp magic.
1570                  *
1571                  * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with LOCAL, but the
1572                  * real system could be set up that way. In such case, we need to delay the time-warp or the sealing
1573                  * until we reach the real system.
1574                  *
1575                  * Do no set the kernel's timezone. The concept of local time cannot be supported reliably, the time
1576                  * will jump or be incorrect at every daylight saving time change. All kernel local time concepts will
1577                  * be treated as UTC that way.
1578                  */
1579                 (void) clock_reset_timewarp();
1580
1581         r = clock_apply_epoch();
1582         if (r < 0)
1583                 log_error_errno(r, "Current system time is before build time, but cannot correct: %m");
1584         else if (r > 0)
1585                 log_info("System time before build time, advancing clock.");
1586 }
1587
1588 static void apply_clock_update(void) {
1589         struct timespec ts;
1590
1591         /* This is called later than initialize_clock(), i.e. after we parsed configuration files/kernel
1592          * command line and such. */
1593
1594         if (arg_clock_usec == 0)
1595                 return;
1596
1597         if (getpid_cached() != 1)
1598                 return;
1599
1600         if (clock_settime(CLOCK_REALTIME, timespec_store(&ts, arg_clock_usec)) < 0)
1601                 log_error_errno(errno, "Failed to set system clock to time specified on kernel command line: %m");
1602         else {
1603                 char buf[FORMAT_TIMESTAMP_MAX];
1604
1605                 log_info("Set system clock to %s, as specified on the kernel command line.",
1606                          format_timestamp(buf, sizeof(buf), arg_clock_usec));
1607         }
1608 }
1609
1610 static void cmdline_take_random_seed(void) {
1611         size_t suggested;
1612         int r;
1613
1614         if (arg_random_seed_size == 0)
1615                 return;
1616
1617         if (getpid_cached() != 1)
1618                 return;
1619
1620         assert(arg_random_seed);
1621         suggested = random_pool_size();
1622
1623         if (arg_random_seed_size < suggested)
1624                 log_warning("Random seed specified on kernel command line has size %zu, but %zu bytes required to fill entropy pool.",
1625                             arg_random_seed_size, suggested);
1626
1627         r = random_write_entropy(-1, arg_random_seed, arg_random_seed_size, true);
1628         if (r < 0) {
1629                 log_warning_errno(r, "Failed to credit entropy specified on kernel command line, ignoring: %m");
1630                 return;
1631         }
1632
1633         log_notice("Successfully credited entropy passed on kernel command line.\n"
1634                    "Note that the seed provided this way is accessible to unprivileged programs. This functionality should not be used outside of testing environments.");
1635 }
1636
1637 static void initialize_coredump(bool skip_setup) {
1638 #if ENABLE_COREDUMP
1639         if (getpid_cached() != 1)
1640                 return;
1641
1642         /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
1643          * will process core dumps for system services by default. */
1644         if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
1645                 log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
1646
1647         /* But at the same time, turn off the core_pattern logic by default, so that no
1648          * coredumps are stored until the systemd-coredump tool is enabled via
1649          * sysctl. However it can be changed via the kernel command line later so core
1650          * dumps can still be generated during early startup and in initramfs. */
1651         if (!skip_setup)
1652                 disable_coredumps();
1653 #endif
1654 }
1655
1656 static void initialize_core_pattern(bool skip_setup) {
1657         int r;
1658
1659         if (skip_setup || !arg_early_core_pattern)
1660                 return;
1661
1662         if (getpid_cached() != 1)
1663                 return;
1664
1665         r = write_string_file("/proc/sys/kernel/core_pattern", arg_early_core_pattern, WRITE_STRING_FILE_DISABLE_BUFFER);
1666         if (r < 0)
1667                 log_warning_errno(r, "Failed to write '%s' to /proc/sys/kernel/core_pattern, ignoring: %m", arg_early_core_pattern);
1668 }
1669
1670 static void update_cpu_affinity(bool skip_setup) {
1671         _cleanup_free_ char *mask = NULL;
1672
1673         if (skip_setup || !arg_cpu_affinity.set)
1674                 return;
1675
1676         assert(arg_cpu_affinity.allocated > 0);
1677
1678         mask = cpu_set_to_string(&arg_cpu_affinity);
1679         log_debug("Setting CPU affinity to %s.", strnull(mask));
1680
1681         if (sched_setaffinity(0, arg_cpu_affinity.allocated, arg_cpu_affinity.set) < 0)
1682                 log_warning_errno(errno, "Failed to set CPU affinity: %m");
1683 }
1684
1685 static void update_numa_policy(bool skip_setup) {
1686         int r;
1687         _cleanup_free_ char *nodes = NULL;
1688         const char * policy = NULL;
1689
1690         if (skip_setup || !mpol_is_valid(numa_policy_get_type(&arg_numa_policy)))
1691                 return;
1692
1693         if (DEBUG_LOGGING) {
1694                 policy = mpol_to_string(numa_policy_get_type(&arg_numa_policy));
1695                 nodes = cpu_set_to_range_string(&arg_numa_policy.nodes);
1696                 log_debug("Setting NUMA policy to %s, with nodes %s.", strnull(policy), strnull(nodes));
1697         }
1698
1699         r = apply_numa_policy(&arg_numa_policy);
1700         if (r == -EOPNOTSUPP)
1701                 log_debug_errno(r, "NUMA support not available, ignoring.");
1702         else if (r < 0)
1703                 log_warning_errno(r, "Failed to set NUMA memory policy: %m");
1704 }
1705
1706 static void do_reexecute(
1707                 int argc,
1708                 char *argv[],
1709                 const struct rlimit *saved_rlimit_nofile,
1710                 const struct rlimit *saved_rlimit_memlock,
1711                 FDSet *fds,
1712                 const char *switch_root_dir,
1713                 const char *switch_root_init,
1714                 const char **ret_error_message) {
1715
1716         unsigned i, j, args_size;
1717         const char **args;
1718         int r;
1719
1720         assert(saved_rlimit_nofile);
1721         assert(saved_rlimit_memlock);
1722         assert(ret_error_message);
1723
1724         /* Close and disarm the watchdog, so that the new instance can reinitialize it, but doesn't get rebooted while
1725          * we do that */
1726         watchdog_close(true);
1727
1728         /* Reset RLIMIT_NOFILE + RLIMIT_MEMLOCK back to the kernel defaults, so that the new systemd can pass
1729          * the kernel default to its child processes */
1730         if (saved_rlimit_nofile->rlim_cur != 0)
1731                 (void) setrlimit(RLIMIT_NOFILE, saved_rlimit_nofile);
1732         if (saved_rlimit_memlock->rlim_cur != RLIM_INFINITY)
1733                 (void) setrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock);
1734
1735         if (switch_root_dir) {
1736                 /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
1737                  * SIGCHLD for them after deserializing. */
1738                 broadcast_signal(SIGTERM, false, true, arg_default_timeout_stop_usec);
1739
1740                 /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
1741                 r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE);
1742                 if (r < 0)
1743                         log_error_errno(r, "Failed to switch root, trying to continue: %m");
1744         }
1745
1746         args_size = MAX(6, argc+1);
1747         args = newa(const char*, args_size);
1748
1749         if (!switch_root_init) {
1750                 char sfd[DECIMAL_STR_MAX(int) + 1];
1751
1752                 /* First try to spawn ourselves with the right path, and with full serialization. We do this only if
1753                  * the user didn't specify an explicit init to spawn. */
1754
1755                 assert(arg_serialization);
1756                 assert(fds);
1757
1758                 xsprintf(sfd, "%i", fileno(arg_serialization));
1759
1760                 i = 0;
1761                 args[i++] = SYSTEMD_BINARY_PATH;
1762                 if (switch_root_dir)
1763                         args[i++] = "--switched-root";
1764                 args[i++] = arg_system ? "--system" : "--user";
1765                 args[i++] = "--deserialize";
1766                 args[i++] = sfd;
1767                 args[i++] = NULL;
1768
1769                 assert(i <= args_size);
1770
1771                 /*
1772                  * We want valgrind to print its memory usage summary before reexecution.  Valgrind won't do this is on
1773                  * its own on exec(), but it will do it on exit().  Hence, to ensure we get a summary here, fork() off
1774                  * a child, let it exit() cleanly, so that it prints the summary, and wait() for it in the parent,
1775                  * before proceeding into the exec().
1776                  */
1777                 valgrind_summary_hack();
1778
1779                 (void) execv(args[0], (char* const*) args);
1780                 log_debug_errno(errno, "Failed to execute our own binary, trying fallback: %m");
1781         }
1782
1783         /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and envp[]. (Well,
1784          * modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[], but let's hope that
1785          * doesn't matter.) */
1786
1787         arg_serialization = safe_fclose(arg_serialization);
1788         fds = fdset_free(fds);
1789
1790         /* Reopen the console */
1791         (void) make_console_stdio();
1792
1793         for (j = 1, i = 1; j < (unsigned) argc; j++)
1794                 args[i++] = argv[j];
1795         args[i++] = NULL;
1796         assert(i <= args_size);
1797
1798         /* Re-enable any blocked signals, especially important if we switch from initial ramdisk to init=... */
1799         (void) reset_all_signal_handlers();
1800         (void) reset_signal_mask();
1801         (void) rlimit_nofile_safe();
1802
1803         if (switch_root_init) {
1804                 args[0] = switch_root_init;
1805                 (void) execve(args[0], (char* const*) args, saved_env);
1806                 log_warning_errno(errno, "Failed to execute configured init, trying fallback: %m");
1807         }
1808
1809         args[0] = "/sbin/init";
1810         (void) execv(args[0], (char* const*) args);
1811         r = -errno;
1812
1813         manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
1814                               ANSI_HIGHLIGHT_RED "  !!  " ANSI_NORMAL,
1815                               "Failed to execute /sbin/init");
1816
1817         if (r == -ENOENT) {
1818                 log_warning("No /sbin/init, trying fallback");
1819
1820                 args[0] = "/bin/sh";
1821                 args[1] = NULL;
1822                 (void) execve(args[0], (char* const*) args, saved_env);
1823                 log_error_errno(errno, "Failed to execute /bin/sh, giving up: %m");
1824         } else
1825                 log_warning_errno(r, "Failed to execute /sbin/init, giving up: %m");
1826
1827         *ret_error_message = "Failed to execute fallback shell";
1828 }
1829
1830 static int invoke_main_loop(
1831                 Manager *m,
1832                 const struct rlimit *saved_rlimit_nofile,
1833                 const struct rlimit *saved_rlimit_memlock,
1834                 bool *ret_reexecute,
1835                 int *ret_retval,                   /* Return parameters relevant for shutting down */
1836                 const char **ret_shutdown_verb,    /* … */
1837                 FDSet **ret_fds,                   /* Return parameters for reexecuting */
1838                 char **ret_switch_root_dir,        /* … */
1839                 char **ret_switch_root_init,       /* … */
1840                 const char **ret_error_message) {
1841
1842         int r;
1843
1844         assert(m);
1845         assert(saved_rlimit_nofile);
1846         assert(saved_rlimit_memlock);
1847         assert(ret_reexecute);
1848         assert(ret_retval);
1849         assert(ret_shutdown_verb);
1850         assert(ret_fds);
1851         assert(ret_switch_root_dir);
1852         assert(ret_switch_root_init);
1853         assert(ret_error_message);
1854
1855         for (;;) {
1856                 r = manager_loop(m);
1857                 if (r < 0) {
1858                         *ret_error_message = "Failed to run main loop";
1859                         return log_emergency_errno(r, "Failed to run main loop: %m");
1860                 }
1861
1862                 switch ((ManagerObjective) r) {
1863
1864                 case MANAGER_RELOAD: {
1865                         LogTarget saved_log_target;
1866                         int saved_log_level;
1867
1868                         log_info("Reloading.");
1869
1870                         /* First, save any overridden log level/target, then parse the configuration file, which might
1871                          * change the log level to new settings. */
1872
1873                         saved_log_level = m->log_level_overridden ? log_get_max_level() : -1;
1874                         saved_log_target = m->log_target_overridden ? log_get_target() : _LOG_TARGET_INVALID;
1875
1876                         (void) parse_configuration(saved_rlimit_nofile, saved_rlimit_memlock);
1877
1878                         set_manager_defaults(m);
1879                         set_manager_settings(m);
1880
1881                         update_cpu_affinity(false);
1882                         update_numa_policy(false);
1883
1884                         if (saved_log_level >= 0)
1885                                 manager_override_log_level(m, saved_log_level);
1886                         if (saved_log_target >= 0)
1887                                 manager_override_log_target(m, saved_log_target);
1888
1889                         r = manager_reload(m);
1890                         if (r < 0)
1891                                 /* Reloading failed before the point of no return. Let's continue running as if nothing happened. */
1892                                 m->objective = MANAGER_OK;
1893
1894                         break;
1895                 }
1896
1897                 case MANAGER_REEXECUTE:
1898
1899                         r = prepare_reexecute(m, &arg_serialization, ret_fds, false);
1900                         if (r < 0) {
1901                                 *ret_error_message = "Failed to prepare for reexecution";
1902                                 return r;
1903                         }
1904
1905                         log_notice("Reexecuting.");
1906
1907                         *ret_reexecute = true;
1908                         *ret_retval = EXIT_SUCCESS;
1909                         *ret_shutdown_verb = NULL;
1910                         *ret_switch_root_dir = *ret_switch_root_init = NULL;
1911
1912                         return 0;
1913
1914                 case MANAGER_SWITCH_ROOT:
1915                         if (!m->switch_root_init) {
1916                                 r = prepare_reexecute(m, &arg_serialization, ret_fds, true);
1917                                 if (r < 0) {
1918                                         *ret_error_message = "Failed to prepare for reexecution";
1919                                         return r;
1920                                 }
1921                         } else
1922                                 *ret_fds = NULL;
1923
1924                         log_notice("Switching root.");
1925
1926                         *ret_reexecute = true;
1927                         *ret_retval = EXIT_SUCCESS;
1928                         *ret_shutdown_verb = NULL;
1929
1930                         /* Steal the switch root parameters */
1931                         *ret_switch_root_dir = TAKE_PTR(m->switch_root);
1932                         *ret_switch_root_init = TAKE_PTR(m->switch_root_init);
1933
1934                         return 0;
1935
1936                 case MANAGER_EXIT:
1937
1938                         if (MANAGER_IS_USER(m)) {
1939                                 log_debug("Exit.");
1940
1941                                 *ret_reexecute = false;
1942                                 *ret_retval = m->return_value;
1943                                 *ret_shutdown_verb = NULL;
1944                                 *ret_fds = NULL;
1945                                 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1946
1947                                 return 0;
1948                         }
1949
1950                         _fallthrough_;
1951                 case MANAGER_REBOOT:
1952                 case MANAGER_POWEROFF:
1953                 case MANAGER_HALT:
1954                 case MANAGER_KEXEC: {
1955                         static const char * const table[_MANAGER_OBJECTIVE_MAX] = {
1956                                 [MANAGER_EXIT]     = "exit",
1957                                 [MANAGER_REBOOT]   = "reboot",
1958                                 [MANAGER_POWEROFF] = "poweroff",
1959                                 [MANAGER_HALT]     = "halt",
1960                                 [MANAGER_KEXEC]    = "kexec",
1961                         };
1962
1963                         log_notice("Shutting down.");
1964
1965                         *ret_reexecute = false;
1966                         *ret_retval = m->return_value;
1967                         assert_se(*ret_shutdown_verb = table[m->objective]);
1968                         *ret_fds = NULL;
1969                         *ret_switch_root_dir = *ret_switch_root_init = NULL;
1970
1971                         return 0;
1972                 }
1973
1974                 default:
1975                         assert_not_reached("Unknown or unexpected manager objective.");
1976                 }
1977         }
1978 }
1979
1980 static void log_execution_mode(bool *ret_first_boot) {
1981         assert(ret_first_boot);
1982
1983         if (arg_system) {
1984                 int v;
1985
1986                 log_info("systemd " GIT_VERSION " running in %ssystem mode. (%s)",
1987                          arg_action == ACTION_TEST ? "test " : "",
1988                          systemd_features);
1989
1990                 v = detect_virtualization();
1991                 if (v > 0)
1992                         log_info("Detected virtualization %s.", virtualization_to_string(v));
1993
1994                 log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
1995
1996                 if (in_initrd()) {
1997                         *ret_first_boot = false;
1998                         log_info("Running in initial RAM disk.");
1999                 } else {
2000                         int r;
2001                         _cleanup_free_ char *id_text = NULL;
2002
2003                         /* Let's check whether we are in first boot.  We use /etc/machine-id as flag file
2004                          * for this: If it is missing or contains the value "uninitialized", this is the
2005                          * first boot.  In any other case, it is not.  This allows container managers and
2006                          * installers to provision a couple of files already.  If the container manager
2007                          * wants to provision the machine ID itself it should pass $container_uuid to PID 1. */
2008
2009                         r = read_one_line_file("/etc/machine-id", &id_text);
2010                         if (r < 0 || streq(id_text, "uninitialized")) {
2011                                 if (r < 0 && r != -ENOENT)
2012                                         log_warning_errno(r, "Unexpected error while reading /etc/machine-id, ignoring: %m");
2013
2014                                 *ret_first_boot = true;
2015                                 log_info("Detected first boot.");
2016                         } else {
2017                                 *ret_first_boot = false;
2018                                 log_debug("Detected initialized system, this is not the first boot.");
2019                         }
2020                 }
2021         } else {
2022                 if (DEBUG_LOGGING) {
2023                         _cleanup_free_ char *t;
2024
2025                         t = uid_to_name(getuid());
2026                         log_debug("systemd " GIT_VERSION " running in %suser mode for user " UID_FMT "/%s. (%s)",
2027                                   arg_action == ACTION_TEST ? " test" : "",
2028                                   getuid(), strna(t), systemd_features);
2029                 }
2030
2031                 *ret_first_boot = false;
2032         }
2033 }
2034
2035 static int initialize_runtime(
2036                 bool skip_setup,
2037                 bool first_boot,
2038                 struct rlimit *saved_rlimit_nofile,
2039                 struct rlimit *saved_rlimit_memlock,
2040                 const char **ret_error_message) {
2041         int r;
2042
2043         assert(ret_error_message);
2044
2045         /* Sets up various runtime parameters. Many of these initializations are conditionalized:
2046          *
2047          * - Some only apply to --system instances
2048          * - Some only apply to --user instances
2049          * - Some only apply when we first start up, but not when we reexecute
2050          */
2051
2052         if (arg_action != ACTION_RUN)
2053                 return 0;
2054
2055         update_cpu_affinity(skip_setup);
2056         update_numa_policy(skip_setup);
2057
2058         if (arg_system) {
2059                 /* Make sure we leave a core dump without panicking the kernel. */
2060                 install_crash_handler();
2061
2062                 if (!skip_setup) {
2063                         r = mount_cgroup_controllers();
2064                         if (r < 0) {
2065                                 *ret_error_message = "Failed to mount cgroup hierarchies";
2066                                 return r;
2067                         }
2068
2069                         status_welcome();
2070                         (void) hostname_setup(true);
2071                         /* Force transient machine-id on first boot. */
2072                         machine_id_setup(NULL, first_boot, arg_machine_id, NULL);
2073                         (void) loopback_setup();
2074                         bump_unix_max_dgram_qlen();
2075                         bump_file_max_and_nr_open();
2076                         test_usr();
2077                         write_container_id();
2078                 }
2079
2080                 if (arg_watchdog_device) {
2081                         r = watchdog_set_device(arg_watchdog_device);
2082                         if (r < 0)
2083                                 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m", arg_watchdog_device);
2084                 }
2085         } else {
2086                 _cleanup_free_ char *p = NULL;
2087
2088                 /* Create the runtime directory and place the inaccessible device nodes there, if we run in
2089                  * user mode. In system mode mount_setup() already did that. */
2090
2091                 r = xdg_user_runtime_dir(&p, "/systemd");
2092                 if (r < 0) {
2093                         *ret_error_message = "$XDG_RUNTIME_DIR is not set";
2094                         return log_emergency_errno(r, "Failed to determine $XDG_RUNTIME_DIR path: %m");
2095                 }
2096
2097                 (void) mkdir_p_label(p, 0755);
2098                 (void) make_inaccessible_nodes(p, UID_INVALID, GID_INVALID);
2099         }
2100
2101         if (arg_timer_slack_nsec != NSEC_INFINITY)
2102                 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
2103                         log_warning_errno(errno, "Failed to adjust timer slack, ignoring: %m");
2104
2105         if (arg_system && !cap_test_all(arg_capability_bounding_set)) {
2106                 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set);
2107                 if (r < 0) {
2108                         *ret_error_message = "Failed to drop capability bounding set of usermode helpers";
2109                         return log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
2110                 }
2111
2112                 r = capability_bounding_set_drop(arg_capability_bounding_set, true);
2113                 if (r < 0) {
2114                         *ret_error_message = "Failed to drop capability bounding set";
2115                         return log_emergency_errno(r, "Failed to drop capability bounding set: %m");
2116                 }
2117         }
2118
2119         if (arg_system && arg_no_new_privs) {
2120                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
2121                         *ret_error_message = "Failed to disable new privileges";
2122                         return log_emergency_errno(errno, "Failed to disable new privileges: %m");
2123                 }
2124         }
2125
2126         if (arg_syscall_archs) {
2127                 r = enforce_syscall_archs(arg_syscall_archs);
2128                 if (r < 0) {
2129                         *ret_error_message = "Failed to set syscall architectures";
2130                         return r;
2131                 }
2132         }
2133
2134         if (!arg_system)
2135                 /* Become reaper of our children */
2136                 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
2137                         log_warning_errno(errno, "Failed to make us a subreaper: %m");
2138
2139         /* Bump up RLIMIT_NOFILE for systemd itself */
2140         (void) bump_rlimit_nofile(saved_rlimit_nofile);
2141         (void) bump_rlimit_memlock(saved_rlimit_memlock);
2142
2143         return 0;
2144 }
2145
2146 static int do_queue_default_job(
2147                 Manager *m,
2148                 const char **ret_error_message) {
2149
2150         _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
2151         const char *unit;
2152         Job *job;
2153         Unit *target;
2154         int r;
2155
2156         if (arg_default_unit)
2157                 unit = arg_default_unit;
2158         else if (in_initrd())
2159                 unit = SPECIAL_INITRD_TARGET;
2160         else
2161                 unit = SPECIAL_DEFAULT_TARGET;
2162
2163         log_debug("Activating default unit: %s", unit);
2164
2165         r = manager_load_startable_unit_or_warn(m, unit, NULL, &target);
2166         if (r < 0 && in_initrd() && !arg_default_unit) {
2167                 /* Fall back to default.target, which we used to always use by default. Only do this if no
2168                  * explicit configuration was given. */
2169
2170                 log_info("Falling back to " SPECIAL_DEFAULT_TARGET ".");
2171
2172                 r = manager_load_startable_unit_or_warn(m, SPECIAL_DEFAULT_TARGET, NULL, &target);
2173         }
2174         if (r < 0) {
2175                 log_info("Falling back to " SPECIAL_RESCUE_TARGET ".");
2176
2177                 r = manager_load_startable_unit_or_warn(m, SPECIAL_RESCUE_TARGET, NULL, &target);
2178                 if (r < 0) {
2179                         *ret_error_message = r == -ERFKILL ? SPECIAL_RESCUE_TARGET " masked"
2180                                                            : "Failed to load " SPECIAL_RESCUE_TARGET;
2181                         return r;
2182                 }
2183         }
2184
2185         assert(target->load_state == UNIT_LOADED);
2186
2187         r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, NULL, &error, &job);
2188         if (r == -EPERM) {
2189                 log_debug_errno(r, "Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
2190
2191                 sd_bus_error_free(&error);
2192
2193                 r = manager_add_job(m, JOB_START, target, JOB_REPLACE, NULL, &error, &job);
2194                 if (r < 0) {
2195                         *ret_error_message = "Failed to start default target";
2196                         return log_emergency_errno(r, "Failed to start default target: %s", bus_error_message(&error, r));
2197                 }
2198
2199         } else if (r < 0) {
2200                 *ret_error_message = "Failed to isolate default target";
2201                 return log_emergency_errno(r, "Failed to isolate default target: %s", bus_error_message(&error, r));
2202         } else
2203                 log_info("Queued %s job for default target %s.",
2204                          job_type_to_string(job->type),
2205                          unit_status_string(job->unit));
2206
2207         m->default_unit_job_id = job->id;
2208
2209         return 0;
2210 }
2211
2212 static void save_rlimits(struct rlimit *saved_rlimit_nofile,
2213                          struct rlimit *saved_rlimit_memlock) {
2214
2215         assert(saved_rlimit_nofile);
2216         assert(saved_rlimit_memlock);
2217
2218         if (getrlimit(RLIMIT_NOFILE, saved_rlimit_nofile) < 0)
2219                 log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
2220
2221         if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock) < 0)
2222                 log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
2223 }
2224
2225 static void fallback_rlimit_nofile(const struct rlimit *saved_rlimit_nofile) {
2226         struct rlimit *rl;
2227
2228         if (arg_default_rlimit[RLIMIT_NOFILE])
2229                 return;
2230
2231         /* Make sure forked processes get limits based on the original kernel setting */
2232
2233         rl = newdup(struct rlimit, saved_rlimit_nofile, 1);
2234         if (!rl) {
2235                 log_oom();
2236                 return;
2237         }
2238
2239         /* Bump the hard limit for system services to a substantially higher value. The default
2240          * hard limit current kernels set is pretty low (4K), mostly for historical
2241          * reasons. According to kernel developers, the fd handling in recent kernels has been
2242          * optimized substantially enough, so that we can bump the limit now, without paying too
2243          * high a price in memory or performance. Note however that we only bump the hard limit,
2244          * not the soft limit. That's because select() works the way it works, and chokes on fds
2245          * >= 1024. If we'd bump the soft limit globally, it might accidentally happen to
2246          * unexpecting programs that they get fds higher than what they can process using
2247          * select(). By only bumping the hard limit but leaving the low limit as it is we avoid
2248          * this pitfall:  programs that are written by folks aware of the select() problem in mind
2249          * (and thus use poll()/epoll instead of select(), the way everybody should) can
2250          * explicitly opt into high fds by bumping their soft limit beyond 1024, to the hard limit
2251          * we pass. */
2252         if (arg_system) {
2253                 int nr;
2254
2255                 /* Get the underlying absolute limit the kernel enforces */
2256                 nr = read_nr_open();
2257
2258                 rl->rlim_max = MIN((rlim_t) nr, MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE));
2259         }
2260
2261         /* If for some reason we were invoked with a soft limit above 1024 (which should never
2262          * happen!, but who knows what we get passed in from pam_limit when invoked as --user
2263          * instance), then lower what we pass on to not confuse our children */
2264         rl->rlim_cur = MIN(rl->rlim_cur, (rlim_t) FD_SETSIZE);
2265
2266         arg_default_rlimit[RLIMIT_NOFILE] = rl;
2267 }
2268
2269 static void fallback_rlimit_memlock(const struct rlimit *saved_rlimit_memlock) {
2270         struct rlimit *rl;
2271
2272         /* Pass the original value down to invoked processes */
2273
2274         if (arg_default_rlimit[RLIMIT_MEMLOCK])
2275                 return;
2276
2277         rl = newdup(struct rlimit, saved_rlimit_memlock, 1);
2278         if (!rl) {
2279                 log_oom();
2280                 return;
2281         }
2282
2283         arg_default_rlimit[RLIMIT_MEMLOCK] = rl;
2284 }
2285
2286 static void reset_arguments(void) {
2287         /* Frees/resets arg_* variables, with a few exceptions commented below. */
2288
2289         arg_default_unit = mfree(arg_default_unit);
2290
2291         /* arg_system — ignore */
2292
2293         arg_dump_core = true;
2294         arg_crash_chvt = -1;
2295         arg_crash_shell = false;
2296         arg_crash_reboot = false;
2297         arg_confirm_spawn = mfree(arg_confirm_spawn);
2298         arg_show_status = _SHOW_STATUS_INVALID;
2299         arg_status_unit_format = STATUS_UNIT_FORMAT_DEFAULT;
2300         arg_switched_root = false;
2301         arg_pager_flags = 0;
2302         arg_service_watchdogs = true;
2303         arg_default_std_output = EXEC_OUTPUT_JOURNAL;
2304         arg_default_std_error = EXEC_OUTPUT_INHERIT;
2305         arg_default_restart_usec = DEFAULT_RESTART_USEC;
2306         arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
2307         arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
2308         arg_default_timeout_abort_usec = DEFAULT_TIMEOUT_USEC;
2309         arg_default_timeout_abort_set = false;
2310         arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
2311         arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
2312         arg_runtime_watchdog = 0;
2313         arg_reboot_watchdog = 10 * USEC_PER_MINUTE;
2314         arg_kexec_watchdog = 0;
2315         arg_early_core_pattern = NULL;
2316         arg_watchdog_device = NULL;
2317
2318         arg_default_environment = strv_free(arg_default_environment);
2319         rlimit_free_all(arg_default_rlimit);
2320
2321         arg_capability_bounding_set = CAP_ALL;
2322         arg_no_new_privs = false;
2323         arg_timer_slack_nsec = NSEC_INFINITY;
2324         arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
2325
2326         arg_syscall_archs = set_free(arg_syscall_archs);
2327
2328         /* arg_serialization — ignore */
2329
2330         arg_default_cpu_accounting = -1;
2331         arg_default_io_accounting = false;
2332         arg_default_ip_accounting = false;
2333         arg_default_blockio_accounting = false;
2334         arg_default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
2335         arg_default_tasks_accounting = true;
2336         arg_default_tasks_max = DEFAULT_TASKS_MAX;
2337         arg_machine_id = (sd_id128_t) {};
2338         arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
2339         arg_default_oom_policy = OOM_STOP;
2340
2341         cpu_set_reset(&arg_cpu_affinity);
2342         numa_policy_reset(&arg_numa_policy);
2343
2344         arg_random_seed = mfree(arg_random_seed);
2345         arg_random_seed_size = 0;
2346         arg_clock_usec = 0;
2347 }
2348
2349 static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
2350                                const struct rlimit *saved_rlimit_memlock) {
2351         int r;
2352
2353         assert(saved_rlimit_nofile);
2354         assert(saved_rlimit_memlock);
2355
2356         /* Assign configuration defaults */
2357         reset_arguments();
2358
2359         r = parse_config_file();
2360         if (r < 0)
2361                 log_warning_errno(r, "Failed to parse config file, ignoring: %m");
2362
2363         if (arg_system) {
2364                 r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
2365                 if (r < 0)
2366                         log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
2367         }
2368
2369         /* Initialize some default rlimits for services if they haven't been configured */
2370         fallback_rlimit_nofile(saved_rlimit_nofile);
2371         fallback_rlimit_memlock(saved_rlimit_memlock);
2372
2373         /* Note that this also parses bits from the kernel command line, including "debug". */
2374         log_parse_environment();
2375
2376         /* Initialize the show status setting if it hasn't been set explicitly yet */
2377         if (arg_show_status == _SHOW_STATUS_INVALID)
2378                 arg_show_status = SHOW_STATUS_YES;
2379
2380         return 0;
2381 }
2382
2383 static int safety_checks(void) {
2384
2385         if (getpid_cached() == 1 &&
2386             arg_action != ACTION_RUN)
2387                 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2388                                        "Unsupported execution mode while PID 1.");
2389
2390         if (getpid_cached() == 1 &&
2391             !arg_system)
2392                 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2393                                        "Can't run --user mode as PID 1.");
2394
2395         if (arg_action == ACTION_RUN &&
2396             arg_system &&
2397             getpid_cached() != 1)
2398                 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2399                                        "Can't run system mode unless PID 1.");
2400
2401         if (arg_action == ACTION_TEST &&
2402             geteuid() == 0)
2403                 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
2404                                        "Don't run test mode as root.");
2405
2406         if (!arg_system &&
2407             arg_action == ACTION_RUN &&
2408             sd_booted() <= 0)
2409                 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
2410                                        "Trying to run as user instance, but the system has not been booted with systemd.");
2411
2412         if (!arg_system &&
2413             arg_action == ACTION_RUN &&
2414             !getenv("XDG_RUNTIME_DIR"))
2415                 return log_error_errno(SYNTHETIC_ERRNO(EUNATCH),
2416                                        "Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
2417
2418         if (arg_system &&
2419             arg_action == ACTION_RUN &&
2420             running_in_chroot() > 0)
2421                 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
2422                                        "Cannot be run in a chroot() environment.");
2423
2424         return 0;
2425 }
2426
2427 static int initialize_security(
2428                 bool *loaded_policy,
2429                 dual_timestamp *security_start_timestamp,
2430                 dual_timestamp *security_finish_timestamp,
2431                 const char **ret_error_message) {
2432
2433         int r;
2434
2435         assert(loaded_policy);
2436         assert(security_start_timestamp);
2437         assert(security_finish_timestamp);
2438         assert(ret_error_message);
2439
2440         dual_timestamp_get(security_start_timestamp);
2441
2442         r = mac_selinux_setup(loaded_policy);
2443         if (r < 0) {
2444                 *ret_error_message = "Failed to load SELinux policy";
2445                 return r;
2446         }
2447
2448         r = mac_smack_setup(loaded_policy);
2449         if (r < 0) {
2450                 *ret_error_message = "Failed to load SMACK policy";
2451                 return r;
2452         }
2453
2454         r = mac_apparmor_setup();
2455         if (r < 0) {
2456                 *ret_error_message = "Failed to load AppArmor policy";
2457                 return r;
2458         }
2459
2460         r = ima_setup();
2461         if (r < 0) {
2462                 *ret_error_message = "Failed to load IMA policy";
2463                 return r;
2464         }
2465
2466         dual_timestamp_get(security_finish_timestamp);
2467         return 0;
2468 }
2469
2470 static void test_summary(Manager *m) {
2471         assert(m);
2472
2473         printf("-> By units:\n");
2474         manager_dump_units(m, stdout, "\t");
2475
2476         printf("-> By jobs:\n");
2477         manager_dump_jobs(m, stdout, "\t");
2478 }
2479
2480 static int collect_fds(FDSet **ret_fds, const char **ret_error_message) {
2481         int r;
2482
2483         assert(ret_fds);
2484         assert(ret_error_message);
2485
2486         r = fdset_new_fill(ret_fds);
2487         if (r < 0) {
2488                 *ret_error_message = "Failed to allocate fd set";
2489                 return log_emergency_errno(r, "Failed to allocate fd set: %m");
2490         }
2491
2492         fdset_cloexec(*ret_fds, true);
2493
2494         if (arg_serialization)
2495                 assert_se(fdset_remove(*ret_fds, fileno(arg_serialization)) >= 0);
2496
2497         return 0;
2498 }
2499
2500 static void setup_console_terminal(bool skip_setup) {
2501
2502         if (!arg_system)
2503                 return;
2504
2505         /* Become a session leader if we aren't one yet. */
2506         (void) setsid();
2507
2508         /* If we are init, we connect stdin/stdout/stderr to /dev/null and make sure we don't have a controlling
2509          * tty. */
2510         (void) release_terminal();
2511
2512         /* Reset the console, but only if this is really init and we are freshly booted */
2513         if (getpid_cached() == 1 && !skip_setup)
2514                 (void) console_setup();
2515 }
2516
2517 static bool early_skip_setup_check(int argc, char *argv[]) {
2518         bool found_deserialize = false;
2519         int i;
2520
2521         /* Determine if this is a reexecution or normal bootup. We do the full command line parsing much later, so
2522          * let's just have a quick peek here. Note that if we have switched root, do all the special setup things
2523          * anyway, even if in that case we also do deserialization. */
2524
2525         for (i = 1; i < argc; i++) {
2526                 if (streq(argv[i], "--switched-root"))
2527                         return false; /* If we switched root, don't skip the setup. */
2528                 else if (streq(argv[i], "--deserialize"))
2529                         found_deserialize = true;
2530         }
2531
2532         return found_deserialize; /* When we are deserializing, then we are reexecuting, hence avoid the extensive setup */
2533 }
2534
2535 static int save_env(void) {
2536         char **l;
2537
2538         l = strv_copy(environ);
2539         if (!l)
2540                 return -ENOMEM;
2541
2542         strv_free_and_replace(saved_env, l);
2543         return 0;
2544 }
2545
2546 int main(int argc, char *argv[]) {
2547
2548         dual_timestamp initrd_timestamp = DUAL_TIMESTAMP_NULL, userspace_timestamp = DUAL_TIMESTAMP_NULL, kernel_timestamp = DUAL_TIMESTAMP_NULL,
2549                 security_start_timestamp = DUAL_TIMESTAMP_NULL, security_finish_timestamp = DUAL_TIMESTAMP_NULL;
2550         struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0),
2551                 saved_rlimit_memlock = RLIMIT_MAKE_CONST(RLIM_INFINITY); /* The original rlimits we passed
2552                                                                           * in. Note we use different values
2553                                                                           * for the two that indicate whether
2554                                                                           * these fields are initialized! */
2555         bool skip_setup, loaded_policy = false, queue_default_job = false, first_boot = false, reexecute = false;
2556         char *switch_root_dir = NULL, *switch_root_init = NULL;
2557         usec_t before_startup, after_startup;
2558         static char systemd[] = "systemd";
2559         char timespan[FORMAT_TIMESPAN_MAX];
2560         const char *shutdown_verb = NULL, *error_message = NULL;
2561         int r, retval = EXIT_FAILURE;
2562         Manager *m = NULL;
2563         FDSet *fds = NULL;
2564
2565         /* SysV compatibility: redirect init → telinit */
2566         redirect_telinit(argc, argv);
2567
2568         /* Take timestamps early on */
2569         dual_timestamp_from_monotonic(&kernel_timestamp, 0);
2570         dual_timestamp_get(&userspace_timestamp);
2571
2572         /* Figure out whether we need to do initialize the system, or if we already did that because we are
2573          * reexecuting */
2574         skip_setup = early_skip_setup_check(argc, argv);
2575
2576         /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent reexecution we
2577          * are then called 'systemd'. That is confusing, hence let's call us systemd right-away. */
2578         program_invocation_short_name = systemd;
2579         (void) prctl(PR_SET_NAME, systemd);
2580
2581         /* Save the original command line */
2582         save_argc_argv(argc, argv);
2583
2584         /* Save the original environment as we might need to restore it if we're requested to execute another
2585          * system manager later. */
2586         r = save_env();
2587         if (r < 0) {
2588                 error_message = "Failed to copy environment block";
2589                 goto finish;
2590         }
2591
2592         /* Make sure that if the user says "syslog" we actually log to the journal. */
2593         log_set_upgrade_syslog_to_journal(true);
2594
2595         if (getpid_cached() == 1) {
2596                 /* When we run as PID 1 force system mode */
2597                 arg_system = true;
2598
2599                 /* Disable the umask logic */
2600                 umask(0);
2601
2602                 /* Make sure that at least initially we do not ever log to journald/syslogd, because it might not be
2603                  * activated yet (even though the log socket for it exists). */
2604                 log_set_prohibit_ipc(true);
2605
2606                 /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This is
2607                  * important so that we never end up logging to any foreign stderr, for example if we have to log in a
2608                  * child process right before execve()'ing the actual binary, at a point in time where socket
2609                  * activation stderr/stdout area already set up. */
2610                 log_set_always_reopen_console(true);
2611
2612                 if (detect_container() <= 0) {
2613
2614                         /* Running outside of a container as PID 1 */
2615                         log_set_target(LOG_TARGET_KMSG);
2616                         log_open();
2617
2618                         if (in_initrd())
2619                                 initrd_timestamp = userspace_timestamp;
2620
2621                         if (!skip_setup) {
2622                                 r = mount_setup_early();
2623                                 if (r < 0) {
2624                                         error_message = "Failed to mount early API filesystems";
2625                                         goto finish;
2626                                 }
2627
2628                                 /* Let's open the log backend a second time, in case the first time didn't
2629                                  * work. Quite possibly we have mounted /dev just now, so /dev/kmsg became
2630                                  * available, and it previously wasn't. */
2631                                 log_open();
2632
2633                                 disable_printk_ratelimit();
2634
2635                                 r = initialize_security(
2636                                                 &loaded_policy,
2637                                                 &security_start_timestamp,
2638                                                 &security_finish_timestamp,
2639                                                 &error_message);
2640                                 if (r < 0)
2641                                         goto finish;
2642                         }
2643
2644                         if (mac_selinux_init() < 0) {
2645                                 error_message = "Failed to initialize SELinux support";
2646                                 goto finish;
2647                         }
2648
2649                         if (!skip_setup)
2650                                 initialize_clock();
2651
2652                         /* Set the default for later on, but don't actually open the logs like this for now. Note that
2653                          * if we are transitioning from the initrd there might still be journal fd open, and we
2654                          * shouldn't attempt opening that before we parsed /proc/cmdline which might redirect output
2655                          * elsewhere. */
2656                         log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
2657
2658                 } else {
2659                         /* Running inside a container, as PID 1 */
2660                         log_set_target(LOG_TARGET_CONSOLE);
2661                         log_open();
2662
2663                         /* For later on, see above... */
2664                         log_set_target(LOG_TARGET_JOURNAL);
2665
2666                         /* clear the kernel timestamp, because we are in a container */
2667                         kernel_timestamp = DUAL_TIMESTAMP_NULL;
2668                 }
2669
2670                 initialize_coredump(skip_setup);
2671
2672                 r = fixup_environment();
2673                 if (r < 0) {
2674                         log_emergency_errno(r, "Failed to fix up PID 1 environment: %m");
2675                         error_message = "Failed to fix up PID1 environment";
2676                         goto finish;
2677                 }
2678
2679                 /* Try to figure out if we can use colors with the console. No need to do that for user instances since
2680                  * they never log into the console. */
2681                 log_show_color(colors_enabled());
2682
2683                 r = make_null_stdio();
2684                 if (r < 0)
2685                         log_warning_errno(r, "Failed to redirect standard streams to /dev/null, ignoring: %m");
2686
2687                 /* Load the kernel modules early. */
2688                 if (!skip_setup)
2689                         kmod_setup();
2690
2691                 /* Mount /proc, /sys and friends, so that /proc/cmdline and /proc/$PID/fd is available. */
2692                 r = mount_setup(loaded_policy, skip_setup);
2693                 if (r < 0) {
2694                         error_message = "Failed to mount API filesystems";
2695                         goto finish;
2696                 }
2697
2698                 /* The efivarfs is now mounted, let's read the random seed off it */
2699                 (void) efi_take_random_seed();
2700
2701                 /* Cache command-line options passed from EFI variables */
2702                 if (!skip_setup)
2703                         (void) cache_efi_options_variable();
2704         } else {
2705                 /* Running as user instance */
2706                 arg_system = false;
2707                 log_set_target(LOG_TARGET_AUTO);
2708                 log_open();
2709
2710                 /* clear the kernel timestamp, because we are not PID 1 */
2711                 kernel_timestamp = DUAL_TIMESTAMP_NULL;
2712
2713                 if (mac_selinux_init() < 0) {
2714                         error_message = "Failed to initialize SELinux support";
2715                         goto finish;
2716                 }
2717         }
2718
2719         /* Save the original RLIMIT_NOFILE/RLIMIT_MEMLOCK so that we can reset it later when
2720          * transitioning from the initrd to the main systemd or suchlike. */
2721         save_rlimits(&saved_rlimit_nofile, &saved_rlimit_memlock);
2722
2723         /* Reset all signal handlers. */
2724         (void) reset_all_signal_handlers();
2725         (void) ignore_signals(SIGNALS_IGNORE, -1);
2726
2727         (void) parse_configuration(&saved_rlimit_nofile, &saved_rlimit_memlock);
2728
2729         r = parse_argv(argc, argv);
2730         if (r < 0) {
2731                 error_message = "Failed to parse commandline arguments";
2732                 goto finish;
2733         }
2734
2735         r = safety_checks();
2736         if (r < 0)
2737                 goto finish;
2738
2739         if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS, ACTION_DUMP_BUS_PROPERTIES, ACTION_BUS_INTROSPECT))
2740                 (void) pager_open(arg_pager_flags);
2741
2742         if (arg_action != ACTION_RUN)
2743                 skip_setup = true;
2744
2745         if (arg_action == ACTION_HELP) {
2746                 retval = help() < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2747                 goto finish;
2748         } else if (arg_action == ACTION_VERSION) {
2749                 retval = version();
2750                 goto finish;
2751         } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
2752                 unit_dump_config_items(stdout);
2753                 retval = EXIT_SUCCESS;
2754                 goto finish;
2755         } else if (arg_action == ACTION_DUMP_BUS_PROPERTIES) {
2756                 dump_bus_properties(stdout);
2757                 retval = EXIT_SUCCESS;
2758                 goto finish;
2759         } else if (arg_action == ACTION_BUS_INTROSPECT) {
2760                 r = bus_manager_introspect_implementations(stdout, arg_bus_introspect);
2761                 retval = r >= 0 ? EXIT_SUCCESS : EXIT_FAILURE;
2762                 goto finish;
2763         }
2764
2765         assert_se(IN_SET(arg_action, ACTION_RUN, ACTION_TEST));
2766
2767         /* Move out of the way, so that we won't block unmounts */
2768         assert_se(chdir("/") == 0);
2769
2770         if (arg_action == ACTION_RUN) {
2771                 if (!skip_setup) {
2772                         /* Apply the systemd.clock_usec= kernel command line switch */
2773                         apply_clock_update();
2774
2775                         /* Apply random seed from kernel command line */
2776                         cmdline_take_random_seed();
2777                 }
2778
2779                 /* A core pattern might have been specified via the cmdline.  */
2780                 initialize_core_pattern(skip_setup);
2781
2782                 /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
2783                 log_close();
2784
2785                 /* Remember open file descriptors for later deserialization */
2786                 r = collect_fds(&fds, &error_message);
2787                 if (r < 0)
2788                         goto finish;
2789
2790                 /* Give up any control of the console, but make sure its initialized. */
2791                 setup_console_terminal(skip_setup);
2792
2793                 /* Open the logging devices, if possible and necessary */
2794                 log_open();
2795         }
2796
2797         log_execution_mode(&first_boot);
2798
2799         r = initialize_runtime(skip_setup,
2800                                first_boot,
2801                                &saved_rlimit_nofile,
2802                                &saved_rlimit_memlock,
2803                                &error_message);
2804         if (r < 0)
2805                 goto finish;
2806
2807         r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
2808                         arg_action == ACTION_TEST ? MANAGER_TEST_FULL : 0,
2809                         &m);
2810         if (r < 0) {
2811                 log_emergency_errno(r, "Failed to allocate manager object: %m");
2812                 error_message = "Failed to allocate manager object";
2813                 goto finish;
2814         }
2815
2816         m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;
2817         m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;
2818         m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;
2819         m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_START)] = security_start_timestamp;
2820         m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_FINISH)] = security_finish_timestamp;
2821
2822         set_manager_defaults(m);
2823         set_manager_settings(m);
2824         manager_set_first_boot(m, first_boot);
2825
2826         /* Remember whether we should queue the default job */
2827         queue_default_job = !arg_serialization || arg_switched_root;
2828
2829         before_startup = now(CLOCK_MONOTONIC);
2830
2831         r = manager_startup(m, arg_serialization, fds);
2832         if (r < 0) {
2833                 error_message = "Failed to start up manager";
2834                 goto finish;
2835         }
2836
2837         /* This will close all file descriptors that were opened, but not claimed by any unit. */
2838         fds = fdset_free(fds);
2839         arg_serialization = safe_fclose(arg_serialization);
2840
2841         if (queue_default_job) {
2842                 r = do_queue_default_job(m, &error_message);
2843                 if (r < 0)
2844                         goto finish;
2845         }
2846
2847         after_startup = now(CLOCK_MONOTONIC);
2848
2849         log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
2850                  "Loaded units and determined initial transaction in %s.",
2851                  format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 100 * USEC_PER_MSEC));
2852
2853         if (arg_action == ACTION_TEST) {
2854                 test_summary(m);
2855                 retval = EXIT_SUCCESS;
2856                 goto finish;
2857         }
2858
2859         (void) invoke_main_loop(m,
2860                                 &saved_rlimit_nofile,
2861                                 &saved_rlimit_memlock,
2862                                 &reexecute,
2863                                 &retval,
2864                                 &shutdown_verb,
2865                                 &fds,
2866                                 &switch_root_dir,
2867                                 &switch_root_init,
2868                                 &error_message);
2869
2870 finish:
2871         pager_close();
2872
2873         if (m) {
2874                 arg_reboot_watchdog = manager_get_watchdog(m, WATCHDOG_REBOOT);
2875                 arg_kexec_watchdog = manager_get_watchdog(m, WATCHDOG_KEXEC);
2876                 m = manager_free(m);
2877         }
2878
2879         mac_selinux_finish();
2880
2881         if (reexecute)
2882                 do_reexecute(argc, argv,
2883                              &saved_rlimit_nofile,
2884                              &saved_rlimit_memlock,
2885                              fds,
2886                              switch_root_dir,
2887                              switch_root_init,
2888                              &error_message); /* This only returns if reexecution failed */
2889
2890         arg_serialization = safe_fclose(arg_serialization);
2891         fds = fdset_free(fds);
2892
2893         saved_env = strv_free(saved_env);
2894
2895 #if HAVE_VALGRIND_VALGRIND_H
2896         /* If we are PID 1 and running under valgrind, then let's exit
2897          * here explicitly. valgrind will only generate nice output on
2898          * exit(), not on exec(), hence let's do the former not the
2899          * latter here. */
2900         if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
2901                 /* Cleanup watchdog_device strings for valgrind. We need them
2902                  * in become_shutdown() so normally we cannot free them yet. */
2903                 watchdog_free_device();
2904                 arg_watchdog_device = mfree(arg_watchdog_device);
2905                 reset_arguments();
2906                 return retval;
2907         }
2908 #endif
2909
2910 #if HAS_FEATURE_ADDRESS_SANITIZER
2911         __lsan_do_leak_check();
2912 #endif
2913
2914         if (shutdown_verb) {
2915                 r = become_shutdown(shutdown_verb, retval);
2916                 log_error_errno(r, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
2917                 error_message = "Failed to execute shutdown binary";
2918         }
2919
2920         watchdog_free_device();
2921         arg_watchdog_device = mfree(arg_watchdog_device);
2922
2923         if (getpid_cached() == 1) {
2924                 if (error_message)
2925                         manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
2926                                               ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL,
2927                                               "%s.", error_message);
2928                 freeze_or_exit_or_reboot();
2929         }
2930
2931         reset_arguments();
2932         return retval;
2933 }