]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/core/main.c
main: uid_to_name() might theoretically fail, handle that
[thirdparty/systemd.git] / src / core / main.c
index 5d0f385d7bdcc7bb9f042a69269a51eeb8c6ea88..4b395601a04c57b6e379bc6d90c19603f040cbae 100644 (file)
 #include <sys/reboot.h>
 #include <sys/stat.h>
 #include <unistd.h>
-#ifdef HAVE_SECCOMP
+#if HAVE_SECCOMP
 #include <seccomp.h>
 #endif
-#ifdef HAVE_VALGRIND_VALGRIND_H
+#if HAVE_VALGRIND_VALGRIND_H
 #include <valgrind/valgrind.h>
 #endif
 
@@ -74,7 +74,7 @@
 #include "process-util.h"
 #include "raw-clone.h"
 #include "rlimit-util.h"
-#ifdef HAVE_SECCOMP
+#if HAVE_SECCOMP
 #include "seccomp-util.h"
 #endif
 #include "selinux-setup.h"
@@ -128,6 +128,7 @@ static Set* arg_syscall_archs = NULL;
 static FILE* arg_serialization = NULL;
 static bool arg_default_cpu_accounting = false;
 static bool arg_default_io_accounting = false;
+static bool arg_default_ip_accounting = false;
 static bool arg_default_blockio_accounting = false;
 static bool arg_default_memory_accounting = false;
 static bool arg_default_tasks_accounting = true;
@@ -716,7 +717,7 @@ static int parse_config_file(void) {
                 { "Manager", "RuntimeWatchdogSec",        config_parse_sec,              0, &arg_runtime_watchdog                  },
                 { "Manager", "ShutdownWatchdogSec",       config_parse_sec,              0, &arg_shutdown_watchdog                 },
                 { "Manager", "CapabilityBoundingSet",     config_parse_capability_set,   0, &arg_capability_bounding_set           },
-#ifdef HAVE_SECCOMP
+#if HAVE_SECCOMP
                 { "Manager", "SystemCallArchitectures",   config_parse_syscall_archs,    0, &arg_syscall_archs                     },
 #endif
                 { "Manager", "TimerSlackNSec",            config_parse_nsec,             0, &arg_timer_slack_nsec                  },
@@ -748,6 +749,7 @@ static int parse_config_file(void) {
                 { "Manager", "DefaultLimitRTTIME",        config_parse_limit,            RLIMIT_RTTIME, arg_default_rlimit         },
                 { "Manager", "DefaultCPUAccounting",      config_parse_bool,             0, &arg_default_cpu_accounting            },
                 { "Manager", "DefaultIOAccounting",       config_parse_bool,             0, &arg_default_io_accounting             },
+                { "Manager", "DefaultIPAccounting",       config_parse_bool,             0, &arg_default_ip_accounting             },
                 { "Manager", "DefaultBlockIOAccounting",  config_parse_bool,             0, &arg_default_blockio_accounting        },
                 { "Manager", "DefaultMemoryAccounting",   config_parse_bool,             0, &arg_default_memory_accounting         },
                 { "Manager", "DefaultTasksAccounting",    config_parse_bool,             0, &arg_default_tasks_accounting          },
@@ -766,7 +768,7 @@ static int parse_config_file(void) {
                 CONF_PATHS_NULSTR("systemd/system.conf.d") :
                 CONF_PATHS_NULSTR("systemd/user.conf.d");
 
-        config_parse_many_nulstr(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, false, NULL);
+        (void) config_parse_many_nulstr(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, CONFIG_PARSE_WARN, NULL);
 
         /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY
          * like everywhere else. */
@@ -778,7 +780,7 @@ static int parse_config_file(void) {
         return 0;
 }
 
-static void manager_set_defaults(Manager *m) {
+static void set_manager_defaults(Manager *m) {
 
         assert(m);
 
@@ -792,6 +794,7 @@ static void manager_set_defaults(Manager *m) {
         m->default_start_limit_burst = arg_default_start_limit_burst;
         m->default_cpu_accounting = arg_default_cpu_accounting;
         m->default_io_accounting = arg_default_io_accounting;
+        m->default_ip_accounting = arg_default_ip_accounting;
         m->default_blockio_accounting = arg_default_blockio_accounting;
         m->default_memory_accounting = arg_default_memory_accounting;
         m->default_tasks_accounting = arg_default_tasks_accounting;
@@ -801,6 +804,18 @@ static void manager_set_defaults(Manager *m) {
         manager_environment_add(m, NULL, arg_default_environment);
 }
 
+static void set_manager_settings(Manager *m) {
+
+        assert(m);
+
+        m->confirm_spawn = arg_confirm_spawn;
+        m->runtime_watchdog = arg_runtime_watchdog;
+        m->shutdown_watchdog = arg_shutdown_watchdog;
+        m->cad_burst_action = arg_cad_burst_action;
+
+        manager_set_show_status(m, arg_show_status);
+}
+
 static int parse_argv(int argc, char *argv[]) {
 
         enum {
@@ -1202,6 +1217,26 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
         return 0;
 }
 
+static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
+        int r;
+
+        assert(saved_rlimit);
+        assert(getuid() == 0);
+
+        /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even though we have CAP_IPC_LOCK which
+         * should normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's
+         * bump the value high enough for the root user. */
+
+        if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0)
+                return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
+
+        r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(1024ULL*1024ULL*16ULL));
+        if (r < 0)
+                return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
+
+        return 0;
+}
+
 static void test_usr(void) {
 
         /* Check that /usr is not a separate fs */
@@ -1241,7 +1276,7 @@ oom:
 }
 
 static int enforce_syscall_archs(Set *archs) {
-#ifdef HAVE_SECCOMP
+#if HAVE_SECCOMP
         int r;
 
         if (!is_seccomp_available())
@@ -1364,6 +1399,161 @@ static int fixup_environment(void) {
         return 0;
 }
 
+static void redirect_telinit(int argc, char *argv[]) {
+
+        /* This is compatibility support for SysV, where calling init as a user is identical to telinit. */
+
+#if HAVE_SYSV_COMPAT
+        if (getpid_cached() == 1)
+                return;
+
+        if (!strstr(program_invocation_short_name, "init"))
+                return;
+
+        execv(SYSTEMCTL_BINARY_PATH, argv);
+        log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
+        exit(1);
+#endif
+}
+
+static int become_shutdown(
+                const char *shutdown_verb,
+                int retval,
+                bool arm_reboot_watchdog) {
+
+        char log_level[DECIMAL_STR_MAX(int) + 1],
+                exit_code[DECIMAL_STR_MAX(uint8_t) + 1];
+
+        const char* command_line[11] = {
+                SYSTEMD_SHUTDOWN_BINARY_PATH,
+                shutdown_verb,
+                "--log-level", log_level,
+                "--log-target",
+        };
+
+        _cleanup_strv_free_ char **env_block = NULL;
+        size_t pos = 5;
+        int r;
+
+        assert(command_line[pos] == NULL);
+        env_block = strv_copy(environ);
+
+        xsprintf(log_level, "%d", log_get_max_level());
+
+        switch (log_get_target()) {
+
+        case LOG_TARGET_KMSG:
+        case LOG_TARGET_JOURNAL_OR_KMSG:
+        case LOG_TARGET_SYSLOG_OR_KMSG:
+                command_line[pos++] = "kmsg";
+                break;
+
+        case LOG_TARGET_NULL:
+                command_line[pos++] = "null";
+                break;
+
+        case LOG_TARGET_CONSOLE:
+        default:
+                command_line[pos++] = "console";
+                break;
+        };
+
+        if (log_get_show_color())
+                command_line[pos++] = "--log-color";
+
+        if (log_get_show_location())
+                command_line[pos++] = "--log-location";
+
+        if (streq(shutdown_verb, "exit")) {
+                command_line[pos++] = "--exit-code";
+                command_line[pos++] = exit_code;
+                xsprintf(exit_code, "%d", retval);
+        }
+
+        assert(pos < ELEMENTSOF(command_line));
+
+        if (arm_reboot_watchdog && arg_shutdown_watchdog > 0 && arg_shutdown_watchdog != USEC_INFINITY) {
+                char *e;
+
+                /* If we reboot let's set the shutdown
+                 * watchdog and tell the shutdown binary to
+                 * repeatedly ping it */
+                r = watchdog_set_timeout(&arg_shutdown_watchdog);
+                watchdog_close(r < 0);
+
+                /* Tell the binary how often to ping, ignore failure */
+                if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, arg_shutdown_watchdog) > 0)
+                        (void) strv_push(&env_block, e);
+        } else
+                watchdog_close(true);
+
+        /* Avoid the creation of new processes forked by the
+         * kernel; at this point, we will not listen to the
+         * signals anyway */
+        if (detect_container() <= 0)
+                (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
+
+        execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
+        return -errno;
+}
+
+static void initialize_clock(void) {
+        int r;
+
+        if (clock_is_localtime(NULL) > 0) {
+                int min;
+
+                /*
+                 * The very first call of settimeofday() also does a time warp in the kernel.
+                 *
+                 * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to take care
+                 * of maintaining the RTC and do all adjustments.  This matches the behavior of Windows, which leaves
+                 * the RTC alone if the registry tells that the RTC runs in UTC.
+                 */
+                r = clock_set_timezone(&min);
+                if (r < 0)
+                        log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
+                else
+                        log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
+
+        } else if (!in_initrd()) {
+                /*
+                 * Do a dummy very first call to seal the kernel's time warp magic.
+                 *
+                 * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with LOCAL, but the
+                 * real system could be set up that way. In such case, we need to delay the time-warp or the sealing
+                 * until we reach the real system.
+                 *
+                 * Do no set the kernel's timezone. The concept of local time cannot be supported reliably, the time
+                 * will jump or be incorrect at every daylight saving time change. All kernel local time concepts will
+                 * be treated as UTC that way.
+                 */
+                (void) clock_reset_timewarp();
+        }
+
+        r = clock_apply_epoch();
+        if (r < 0)
+                log_error_errno(r, "Current system time is before build time, but cannot correct: %m");
+        else if (r > 0)
+                log_info("System time before build time, advancing clock.");
+}
+
+static void initialize_coredump(bool skip_setup) {
+
+        if (getpid_cached() != 1)
+                return;
+
+        /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
+         * will process core dumps for system services by default. */
+        if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
+                log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
+
+        /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored
+         * until the systemd-coredump tool is enabled via sysctl. */
+        if (!skip_setup)
+                (void) write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", 0);
+}
+
 int main(int argc, char *argv[]) {
         Manager *m = NULL;
         int r, retval = EXIT_FAILURE;
@@ -1383,21 +1573,12 @@ int main(int argc, char *argv[]) {
         bool loaded_policy = false;
         bool arm_reboot_watchdog = false;
         bool queue_default_job = false;
-        bool empty_etc = false;
+        bool first_boot = false;
         char *switch_root_dir = NULL, *switch_root_init = NULL;
-        struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0);
+        struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0), saved_rlimit_memlock = RLIMIT_MAKE_CONST((rlim_t) -1);
         const char *error_message = NULL;
 
-#ifdef HAVE_SYSV_COMPAT
-        if (getpid_cached() != 1 && strstr(program_invocation_short_name, "init")) {
-                /* This is compatibility support for SysV, where
-                 * calling init as a user is identical to telinit. */
-
-                execv(SYSTEMCTL_BINARY_PATH, argv);
-                log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
-                return 1;
-        }
-#endif
+        redirect_telinit(argc, argv);
 
         dual_timestamp_from_monotonic(&kernel_timestamp, 0);
         dual_timestamp_get(&userspace_timestamp);
@@ -1472,46 +1653,8 @@ int main(int argc, char *argv[]) {
                         goto finish;
                 }
 
-                if (!skip_setup) {
-                        if (clock_is_localtime(NULL) > 0) {
-                                int min;
-
-                                /*
-                                 * The very first call of settimeofday() also does a time warp in the kernel.
-                                 *
-                                 * In the rtc-in-local time mode, we set the kernel's timezone, and rely on
-                                 * external tools to take care of maintaining the RTC and do all adjustments.
-                                 * This matches the behavior of Windows, which leaves the RTC alone if the
-                                 * registry tells that the RTC runs in UTC.
-                                 */
-                                r = clock_set_timezone(&min);
-                                if (r < 0)
-                                        log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
-                                else
-                                        log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
-                        } else if (!in_initrd()) {
-                                /*
-                                 * Do a dummy very first call to seal the kernel's time warp magic.
-                                 *
-                                 * Do not call this from inside the initrd. The initrd might not
-                                 * carry /etc/adjtime with LOCAL, but the real system could be set up
-                                 * that way. In such case, we need to delay the time-warp or the sealing
-                                 * until we reach the real system.
-                                 *
-                                 * Do no set the kernel's timezone. The concept of local time cannot
-                                 * be supported reliably, the time will jump or be incorrect at every daylight
-                                 * saving time change. All kernel local time concepts will be treated
-                                 * as UTC that way.
-                                 */
-                                (void) clock_reset_timewarp();
-                        }
-
-                        r = clock_apply_epoch();
-                        if (r < 0)
-                                log_error_errno(r, "Current system time is before build time, but cannot correct: %m");
-                        else if (r > 0)
-                                log_info("System time before build time, advancing clock.");
-                }
+                if (!skip_setup)
+                        initialize_clock();
 
                 /* Set the default for later on, but don't actually
                  * open the logs like this for now. Note that if we
@@ -1545,17 +1688,7 @@ int main(int argc, char *argv[]) {
                 kernel_timestamp = DUAL_TIMESTAMP_NULL;
         }
 
-        if (getpid_cached() == 1) {
-                /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
-                 * will process core dumps for system services by default. */
-                if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
-                        log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
-
-                /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored
-                 * until the systemd-coredump tool is enabled via sysctl. */
-                if (!skip_setup)
-                        (void) write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", 0);
-        }
+        initialize_coredump(skip_setup);
 
         if (arg_system) {
                 if (fixup_environment() < 0) {
@@ -1582,7 +1715,7 @@ int main(int argc, char *argv[]) {
          * /proc/$PID/fd is available. */
         if (getpid_cached() == 1) {
 
-                /* Load the kernel modules early, so that we kdbus.ko is loaded before kdbusfs shall be mounted */
+                /* Load the kernel modules early. */
                 if (!skip_setup)
                         kmod_setup();
 
@@ -1649,7 +1782,7 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
-        if (arg_action == ACTION_TEST || arg_action == ACTION_HELP) {
+        if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP)) {
                 pager_open(arg_no_pager, false);
                 skip_setup = true;
         }
@@ -1673,26 +1806,28 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
-        assert_se(arg_action == ACTION_RUN || arg_action == ACTION_TEST);
+        assert_se(IN_SET(arg_action, ACTION_RUN, ACTION_TEST));
 
         /* Close logging fds, in order not to confuse fdset below */
         log_close();
 
         /* Remember open file descriptors for later deserialization */
-        r = fdset_new_fill(&fds);
-        if (r < 0) {
-                log_emergency_errno(r, "Failed to allocate fd set: %m");
-                error_message = "Failed to allocate fd set";
-                goto finish;
-        } else
-                fdset_cloexec(fds, true);
+        if (arg_action == ACTION_RUN) {
+                r = fdset_new_fill(&fds);
+                if (r < 0) {
+                        log_emergency_errno(r, "Failed to allocate fd set: %m");
+                        error_message = "Failed to allocate fd set";
+                        goto finish;
+                } else
+                        fdset_cloexec(fds, true);
 
-        if (arg_serialization)
-                assert_se(fdset_remove(fds, fileno(arg_serialization)) >= 0);
+                if (arg_serialization)
+                        assert_se(fdset_remove(fds, fileno(arg_serialization)) >= 0);
 
-        if (arg_system)
-                /* Become a session leader if we aren't one yet. */
-                setsid();
+                if (arg_system)
+                        /* Become a session leader if we aren't one yet. */
+                        setsid();
+        }
 
         /* Move out of the way, so that we won't block unmounts */
         assert_se(chdir("/") == 0);
@@ -1742,105 +1877,98 @@ int main(int argc, char *argv[]) {
 
                 if (in_initrd())
                         log_info("Running in initial RAM disk.");
-
-                /* Let's check whether /etc is already populated. We
-                 * don't actually really check for that, but use
-                 * /etc/machine-id as flag file. This allows container
-                 * managers and installers to provision a couple of
-                 * files already. If the container manager wants to
-                 * provision the machine ID itself it should pass
-                 * $container_uuid to PID 1. */
-
-                empty_etc = access("/etc/machine-id", F_OK) < 0;
-                if (empty_etc)
-                        log_info("Running with unpopulated /etc.");
+                else {
+                        /* Let's check whether we are in first boot, i.e. whether /etc is still unpopulated. We use
+                         * /etc/machine-id as flag file, for this: if it exists we assume /etc is populated, if it
+                         * doesn't it's unpopulated. This allows container managers and installers to provision a
+                         * couple of files already. If the container manager wants to provision the machine ID itself
+                         * it should pass $container_uuid to PID 1. */
+
+                        first_boot = access("/etc/machine-id", F_OK) < 0;
+                        if (first_boot)
+                                log_info("Running with unpopulated /etc.");
+                }
         } else {
                 _cleanup_free_ char *t;
 
                 t = uid_to_name(getuid());
-                log_debug(PACKAGE_STRING " running in %suser mode for user "UID_FMT"/%s. (" SYSTEMD_FEATURES ")",
-                          arg_action == ACTION_TEST ? " test" : "", getuid(), t);
+                log_debug(PACKAGE_STRING " running in %suser mode for user " UID_FMT "/%s. (" SYSTEMD_FEATURES ")",
+                          arg_action == ACTION_TEST ? " test" : "", getuid(), strna(t));
         }
 
-        if (arg_system && !skip_setup) {
-                if (arg_show_status > 0)
-                        status_welcome();
+        if (arg_action == ACTION_RUN) {
+                if (arg_system && !skip_setup) {
+                        if (arg_show_status > 0)
+                                status_welcome();
 
-                hostname_setup();
-                machine_id_setup(NULL, arg_machine_id, NULL);
-                loopback_setup();
-                bump_unix_max_dgram_qlen();
+                        hostname_setup();
+                        machine_id_setup(NULL, arg_machine_id, NULL);
+                        loopback_setup();
+                        bump_unix_max_dgram_qlen();
 
-                test_usr();
-        }
+                        test_usr();
+                }
 
-        if (arg_system && arg_runtime_watchdog > 0 && arg_runtime_watchdog != USEC_INFINITY)
-                watchdog_set_timeout(&arg_runtime_watchdog);
+                if (arg_system && arg_runtime_watchdog > 0 && arg_runtime_watchdog != USEC_INFINITY)
+                        watchdog_set_timeout(&arg_runtime_watchdog);
 
-        if (arg_timer_slack_nsec != NSEC_INFINITY)
-                if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
-                        log_error_errno(errno, "Failed to adjust timer slack: %m");
+                if (arg_timer_slack_nsec != NSEC_INFINITY)
+                        if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
+                                log_error_errno(errno, "Failed to adjust timer slack: %m");
 
-        if (arg_system && !cap_test_all(arg_capability_bounding_set)) {
-                r = capability_bounding_set_drop_usermode(arg_capability_bounding_set);
-                if (r < 0) {
-                        log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
-                        error_message = "Failed to drop capability bounding set of usermode helpers";
-                        goto finish;
-                }
-                r = capability_bounding_set_drop(arg_capability_bounding_set, true);
-                if (r < 0) {
-                        log_emergency_errno(r, "Failed to drop capability bounding set: %m");
-                        error_message = "Failed to drop capability bounding set";
-                        goto finish;
+                if (arg_system && !cap_test_all(arg_capability_bounding_set)) {
+                        r = capability_bounding_set_drop_usermode(arg_capability_bounding_set);
+                        if (r < 0) {
+                                log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
+                                error_message = "Failed to drop capability bounding set of usermode helpers";
+                                goto finish;
+                        }
+                        r = capability_bounding_set_drop(arg_capability_bounding_set, true);
+                        if (r < 0) {
+                                log_emergency_errno(r, "Failed to drop capability bounding set: %m");
+                                error_message = "Failed to drop capability bounding set";
+                                goto finish;
+                        }
                 }
-        }
 
-        if (arg_syscall_archs) {
-                r = enforce_syscall_archs(arg_syscall_archs);
-                if (r < 0) {
-                        error_message = "Failed to set syscall architectures";
-                        goto finish;
+                if (arg_syscall_archs) {
+                        r = enforce_syscall_archs(arg_syscall_archs);
+                        if (r < 0) {
+                                error_message = "Failed to set syscall architectures";
+                                goto finish;
+                        }
                 }
-        }
 
-        if (!arg_system)
-                /* Become reaper of our children */
-                if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
-                        log_warning_errno(errno, "Failed to make us a subreaper: %m");
+                if (!arg_system)
+                        /* Become reaper of our children */
+                        if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
+                                log_warning_errno(errno, "Failed to make us a subreaper: %m");
 
-        if (arg_system) {
-                (void) bump_rlimit_nofile(&saved_rlimit_nofile);
-
-                if (empty_etc) {
-                        r = unit_file_preset_all(UNIT_FILE_SYSTEM, 0, NULL, UNIT_FILE_PRESET_ENABLE_ONLY, NULL, 0);
-                        if (r < 0)
-                                log_full_errno(r == -EEXIST ? LOG_NOTICE : LOG_WARNING, r, "Failed to populate /etc with preset unit settings, ignoring: %m");
-                        else
-                                log_info("Populated /etc with preset unit settings.");
+                if (arg_system) {
+                        /* Bump up RLIMIT_NOFILE for systemd itself */
+                        (void) bump_rlimit_nofile(&saved_rlimit_nofile);
+                        (void) bump_rlimit_memlock(&saved_rlimit_memlock);
                 }
         }
 
-        r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER, arg_action == ACTION_TEST, &m);
+        r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
+                        arg_action == ACTION_TEST ? MANAGER_TEST_FULL : 0,
+                        &m);
         if (r < 0) {
                 log_emergency_errno(r, "Failed to allocate manager object: %m");
                 error_message = "Failed to allocate manager object";
                 goto finish;
         }
 
-        m->confirm_spawn = arg_confirm_spawn;
-        m->runtime_watchdog = arg_runtime_watchdog;
-        m->shutdown_watchdog = arg_shutdown_watchdog;
         m->userspace_timestamp = userspace_timestamp;
         m->kernel_timestamp = kernel_timestamp;
         m->initrd_timestamp = initrd_timestamp;
         m->security_start_timestamp = security_start_timestamp;
         m->security_finish_timestamp = security_finish_timestamp;
-        m->cad_burst_action = arg_cad_burst_action;
 
-        manager_set_defaults(m);
-        manager_set_show_status(m, arg_show_status);
-        manager_set_first_boot(m, empty_etc);
+        set_manager_defaults(m);
+        set_manager_settings(m);
+        manager_set_first_boot(m, first_boot);
 
         /* Remember whether we should queue the default job */
         queue_default_job = !arg_serialization || arg_switched_root;
@@ -1869,7 +1997,7 @@ int main(int argc, char *argv[]) {
                 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
                 if (r < 0)
                         log_error("Failed to load default target: %s", bus_error_message(&error, r));
-                else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND)
+                else if (IN_SET(target->load_state, UNIT_ERROR, UNIT_NOT_FOUND))
                         log_error_errno(target->load_error, "Failed to load default target: %m");
                 else if (target->load_state == UNIT_MASKED)
                         log_error("Default target masked.");
@@ -1882,7 +2010,7 @@ int main(int argc, char *argv[]) {
                                 log_emergency("Failed to load rescue target: %s", bus_error_message(&error, r));
                                 error_message = "Failed to load rescue target";
                                 goto finish;
-                        } else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND) {
+                        } else if (IN_SET(target->load_state, UNIT_ERROR, UNIT_NOT_FOUND)) {
                                 log_emergency_errno(target->load_error, "Failed to load rescue target: %m");
                                 error_message = "Failed to load rescue target";
                                 goto finish;
@@ -1950,7 +2078,7 @@ int main(int argc, char *argv[]) {
                         if (r < 0)
                                 log_error("Failed to parse config file.");
 
-                        manager_set_defaults(m);
+                        set_manager_defaults(m);
 
                         r = manager_reload(m);
                         if (r < 0)
@@ -2050,6 +2178,8 @@ finish:
                  * its child processes */
                 if (saved_rlimit_nofile.rlim_cur > 0)
                         (void) setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
+                if (saved_rlimit_memlock.rlim_cur != (rlim_t) -1)
+                        (void) setrlimit(RLIMIT_MEMLOCK, &saved_rlimit_memlock);
 
                 if (switch_root_dir) {
                         /* Kill all remaining processes from the
@@ -2133,6 +2263,10 @@ finish:
                 args[0] = "/sbin/init";
                 (void) execv(args[0], (char* const*) args);
 
+                manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
+                        ANSI_HIGHLIGHT_RED "  !!  " ANSI_NORMAL,
+                        "Failed to execute /sbin/init");
+
                 if (errno == ENOENT) {
                         log_warning("No /sbin/init, trying fallback");
 
@@ -2142,12 +2276,14 @@ finish:
                         log_error_errno(errno, "Failed to execute /bin/sh, giving up: %m");
                 } else
                         log_warning_errno(errno, "Failed to execute /sbin/init, giving up: %m");
+
+                error_message = "Failed to execute fallback shell";
         }
 
         arg_serialization = safe_fclose(arg_serialization);
         fds = fdset_free(fds);
 
-#ifdef HAVE_VALGRIND_VALGRIND_H
+#if HAVE_VALGRIND_VALGRIND_H
         /* If we are PID 1 and running under valgrind, then let's exit
          * here explicitly. valgrind will only generate nice output on
          * exit(), not on exec(), hence let's do the former not the
@@ -2157,78 +2293,10 @@ finish:
 #endif
 
         if (shutdown_verb) {
-                char log_level[DECIMAL_STR_MAX(int) + 1];
-                char exit_code[DECIMAL_STR_MAX(uint8_t) + 1];
-                const char* command_line[11] = {
-                        SYSTEMD_SHUTDOWN_BINARY_PATH,
-                        shutdown_verb,
-                        "--log-level", log_level,
-                        "--log-target",
-                };
-                unsigned pos = 5;
-                _cleanup_strv_free_ char **env_block = NULL;
-
-                assert(command_line[pos] == NULL);
-                env_block = strv_copy(environ);
-
-                xsprintf(log_level, "%d", log_get_max_level());
-
-                switch (log_get_target()) {
-
-                case LOG_TARGET_KMSG:
-                case LOG_TARGET_JOURNAL_OR_KMSG:
-                case LOG_TARGET_SYSLOG_OR_KMSG:
-                        command_line[pos++] = "kmsg";
-                        break;
-
-                case LOG_TARGET_NULL:
-                        command_line[pos++] = "null";
-                        break;
-
-                case LOG_TARGET_CONSOLE:
-                default:
-                        command_line[pos++] = "console";
-                        break;
-                };
-
-                if (log_get_show_color())
-                        command_line[pos++] = "--log-color";
-
-                if (log_get_show_location())
-                        command_line[pos++] = "--log-location";
-
-                if (streq(shutdown_verb, "exit")) {
-                        command_line[pos++] = "--exit-code";
-                        command_line[pos++] = exit_code;
-                        xsprintf(exit_code, "%d", retval);
-                }
-
-                assert(pos < ELEMENTSOF(command_line));
-
-                if (arm_reboot_watchdog && arg_shutdown_watchdog > 0 && arg_shutdown_watchdog != USEC_INFINITY) {
-                        char *e;
-
-                        /* If we reboot let's set the shutdown
-                         * watchdog and tell the shutdown binary to
-                         * repeatedly ping it */
-                        r = watchdog_set_timeout(&arg_shutdown_watchdog);
-                        watchdog_close(r < 0);
-
-                        /* Tell the binary how often to ping, ignore failure */
-                        if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, arg_shutdown_watchdog) > 0)
-                                (void) strv_push(&env_block, e);
-                } else
-                        watchdog_close(true);
-
-                /* Avoid the creation of new processes forked by the
-                 * kernel; at this point, we will not listen to the
-                 * signals anyway */
-                if (detect_container() <= 0)
-                        (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
+                r = become_shutdown(shutdown_verb, retval, arm_reboot_watchdog);
 
-                execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
-                log_error_errno(errno, "Failed to execute shutdown binary, %s: %m",
-                          getpid_cached() == 1 ? "freezing" : "quitting");
+                log_error_errno(r, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
+                error_message = "Failed to execute shutdown binary";
         }
 
         if (getpid_cached() == 1) {