X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=src%2Fcore%2Fmain.c;h=839dc062ff780700c9213beec517468c6ed66bcb;hb=595225af7a4f663788d26b8720e994fed71f9410;hp=078decb432b37a9ed821a468bcf9606d17006a85;hpb=9d0798a2edf05438499c7cb5def592d52358f397;p=thirdparty%2Fsystemd.git diff --git a/src/core/main.c b/src/core/main.c index 078decb432b..839dc062ff7 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -28,6 +28,7 @@ #include "bus-error.h" #include "bus-util.h" #include "capability-util.h" +#include "cgroup-util.h" #include "clock-util.h" #include "conf-parser.h" #include "cpu-set-util.h" @@ -57,6 +58,7 @@ #include "pager.h" #include "parse-util.h" #include "path-util.h" +#include "pretty-print.h" #include "proc-cmdline.h" #include "process-util.h" #include "raw-clone.h" @@ -73,6 +75,7 @@ #include "stdio-util.h" #include "strv.h" #include "switch-root.h" +#include "sysctl-util.h" #include "terminal-util.h" #include "umask-util.h" #include "user-util.h" @@ -97,9 +100,8 @@ static bool arg_crash_reboot = false; static char *arg_confirm_spawn = NULL; static ShowStatus arg_show_status = _SHOW_STATUS_INVALID; static bool arg_switched_root = false; -static bool arg_no_pager = false; +static PagerFlags arg_pager_flags = 0; static bool arg_service_watchdogs = true; -static char ***arg_join_controllers = NULL; static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL; static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT; static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC; @@ -109,6 +111,7 @@ static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL; static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST; static usec_t arg_runtime_watchdog = 0; static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE; +static char *arg_early_core_pattern = NULL; static char *arg_watchdog_device = NULL; static char **arg_default_environment = NULL; static struct rlimit *arg_default_rlimit[_RLIMIT_MAX] = {}; @@ -118,7 +121,7 @@ static nsec_t arg_timer_slack_nsec = NSEC_INFINITY; static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE; static Set* arg_syscall_archs = NULL; static FILE* arg_serialization = NULL; -static bool arg_default_cpu_accounting = false; +static int arg_default_cpu_accounting = -1; static bool arg_default_io_accounting = false; static bool arg_default_ip_accounting = false; static bool arg_default_blockio_accounting = false; @@ -128,7 +131,14 @@ static uint64_t arg_default_tasks_max = UINT64_MAX; static sd_id128_t arg_machine_id = {}; static EmergencyAction arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE; -_noreturn_ static void freeze_or_reboot(void) { +_noreturn_ static void freeze_or_exit_or_reboot(void) { + + /* If we are running in a contianer, let's prefer exiting, after all we can propagate an exit code to the + * container manager, and thus inform it that something went wrong. */ + if (detect_container() > 0) { + log_emergency("Exiting PID 1..."); + exit(EXIT_EXCEPTION); + } if (arg_crash_reboot) { log_notice("Rebooting in 10s..."); @@ -183,7 +193,7 @@ _noreturn_ static void crash(int sig) { (void) kill(pid, sig); /* raise() would kill the parent */ assert_not_reached("We shouldn't be here..."); - _exit(EXIT_FAILURE); + _exit(EXIT_EXCEPTION); } else { siginfo_t status; int r; @@ -226,17 +236,18 @@ _noreturn_ static void crash(int sig) { else if (pid == 0) { (void) setsid(); (void) make_console_stdio(); + (void) rlimit_nofile_safe(); (void) execle("/bin/sh", "/bin/sh", NULL, environ); log_emergency_errno(errno, "execle() failed: %m"); - _exit(EXIT_FAILURE); + _exit(EXIT_EXCEPTION); } else { log_info("Spawned crash shell as PID "PID_FMT".", pid); (void) wait_for_terminate(pid, NULL); } } - freeze_or_reboot(); + freeze_or_exit_or_reboot(); } static void install_crash_handler(void) { @@ -347,22 +358,35 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat r = value ? parse_boolean(value) : true; if (r < 0) - log_warning("Failed to parse dump core switch %s. Ignoring.", value); + log_warning_errno(r, "Failed to parse dump core switch %s, ignoring: %m", value); else arg_dump_core = r; + } else if (proc_cmdline_key_streq(key, "systemd.early_core_pattern")) { + + if (proc_cmdline_value_missing(key, value)) + return 0; + + if (path_is_absolute(value)) + (void) parse_path_argument_and_warn(value, false, &arg_early_core_pattern); + else + log_warning("Specified core pattern '%s' is not an absolute path, ignoring.", value); + } else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) { if (!value) arg_crash_chvt = 0; /* turn on */ - else if (parse_crash_chvt(value) < 0) - log_warning("Failed to parse crash chvt switch %s. Ignoring.", value); + else { + r = parse_crash_chvt(value); + if (r < 0) + log_warning_errno(r, "Failed to parse crash chvt switch %s, ignoring: %m", value); + } } else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) { r = value ? parse_boolean(value) : true; if (r < 0) - log_warning("Failed to parse crash shell switch %s. Ignoring.", value); + log_warning_errno(r, "Failed to parse crash shell switch %s, ignoring: %m", value); else arg_crash_shell = r; @@ -370,7 +394,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat r = value ? parse_boolean(value) : true; if (r < 0) - log_warning("Failed to parse crash reboot switch %s. Ignoring.", value); + log_warning_errno(r, "Failed to parse crash reboot switch %s, ignoring: %m", value); else arg_crash_reboot = r; @@ -379,17 +403,15 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat r = parse_confirm_spawn(value, &s); if (r < 0) - log_warning_errno(r, "Failed to parse confirm_spawn switch %s. Ignoring.", value); - else { - free(arg_confirm_spawn); - arg_confirm_spawn = s; - } + log_warning_errno(r, "Failed to parse confirm_spawn switch %s, ignoring: %m", value); + else + free_and_replace(arg_confirm_spawn, s); } else if (proc_cmdline_key_streq(key, "systemd.service_watchdogs")) { r = value ? parse_boolean(value) : true; if (r < 0) - log_warning("Failed to parse service watchdog switch %s. Ignoring.", value); + log_warning_errno(r, "Failed to parse service watchdog switch %s, ignoring: %m", value); else arg_service_watchdogs = r; @@ -398,7 +420,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat if (value) { r = parse_show_status(value, &arg_show_status); if (r < 0) - log_warning("Failed to parse show status switch %s. Ignoring.", value); + log_warning_errno(r, "Failed to parse show status switch %s, ignoring: %m", value); } else arg_show_status = SHOW_STATUS_YES; @@ -409,7 +431,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat r = exec_output_from_string(value); if (r < 0) - log_warning("Failed to parse default standard output switch %s. Ignoring.", value); + log_warning_errno(r, "Failed to parse default standard output switch %s, ignoring: %m", value); else arg_default_std_output = r; @@ -420,7 +442,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat r = exec_output_from_string(value); if (r < 0) - log_warning("Failed to parse default standard error switch %s. Ignoring.", value); + log_warning_errno(r, "Failed to parse default standard error switch %s, ignoring: %m", value); else arg_default_std_error = r; @@ -447,7 +469,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat r = set_machine_id(value); if (r < 0) - log_warning("MachineID '%s' is not valid. Ignoring.", value); + log_warning_errno(r, "MachineID '%s' is not valid, ignoring: %m", value); } else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) { @@ -456,7 +478,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat r = parse_sec(value, &arg_default_timeout_start_usec); if (r < 0) - log_warning_errno(r, "Failed to parse default start timeout: %s, ignoring.", value); + log_warning_errno(r, "Failed to parse default start timeout '%s', ignoring: %m", value); if (arg_default_timeout_start_usec <= 0) arg_default_timeout_start_usec = USEC_INFINITY; @@ -466,7 +488,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat if (proc_cmdline_value_missing(key, value)) return 0; - parse_path_argument_and_warn(value, false, &arg_watchdog_device); + (void) parse_path_argument_and_warn(value, false, &arg_watchdog_device); } else if (streq(key, "quiet") && !value) { @@ -654,7 +676,7 @@ static int parse_config_file(void) { { "Manager", "CrashReboot", config_parse_bool, 0, &arg_crash_reboot }, { "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status }, { "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, NULL }, - { "Manager", "JoinControllers", config_parse_join_controllers, 0, &arg_join_controllers }, + { "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL }, { "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog }, { "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_shutdown_watchdog }, { "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device }, @@ -690,7 +712,7 @@ static int parse_config_file(void) { { "Manager", "DefaultLimitNICE", config_parse_rlimit, RLIMIT_NICE, arg_default_rlimit }, { "Manager", "DefaultLimitRTPRIO", config_parse_rlimit, RLIMIT_RTPRIO, arg_default_rlimit }, { "Manager", "DefaultLimitRTTIME", config_parse_rlimit, RLIMIT_RTTIME, arg_default_rlimit }, - { "Manager", "DefaultCPUAccounting", config_parse_bool, 0, &arg_default_cpu_accounting }, + { "Manager", "DefaultCPUAccounting", config_parse_tristate, 0, &arg_default_cpu_accounting }, { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting }, { "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting }, { "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting }, @@ -739,7 +761,14 @@ static void set_manager_defaults(Manager *m) { m->default_restart_usec = arg_default_restart_usec; m->default_start_limit_interval = arg_default_start_limit_interval; m->default_start_limit_burst = arg_default_start_limit_burst; - m->default_cpu_accounting = arg_default_cpu_accounting; + + /* On 4.15+ with unified hierarchy, CPU accounting is essentially free as it doesn't require the CPU + * controller to be enabled, so the default is to enable it unless we got told otherwise. */ + if (arg_default_cpu_accounting >= 0) + m->default_cpu_accounting = arg_default_cpu_accounting; + else + m->default_cpu_accounting = cpu_accounting_is_cheap(); + m->default_io_accounting = arg_default_io_accounting; m->default_ip_accounting = arg_default_ip_accounting; m->default_blockio_accounting = arg_default_blockio_accounting; @@ -747,8 +776,10 @@ static void set_manager_defaults(Manager *m) { m->default_tasks_accounting = arg_default_tasks_accounting; m->default_tasks_max = arg_default_tasks_max; - manager_set_default_rlimits(m, arg_default_rlimit); - manager_environment_add(m, NULL, arg_default_environment); + (void) manager_set_default_rlimits(m, arg_default_rlimit); + + (void) manager_default_environment(m); + (void) manager_transient_environment_add(m, arg_default_environment); } static void set_manager_settings(Manager *m) { @@ -909,7 +940,7 @@ static int parse_argv(int argc, char *argv[]) { break; case ARG_NO_PAGER: - arg_no_pager = true; + arg_pager_flags |= PAGER_DISABLE; break; case ARG_VERSION: @@ -1001,10 +1032,10 @@ static int parse_argv(int argc, char *argv[]) { r = safe_atoi(optarg, &fd); if (r < 0) log_error_errno(r, "Failed to parse deserialize option \"%s\": %m", optarg); - if (fd < 0) { - log_error("Invalid deserialize fd: %d", fd); - return -EINVAL; - } + if (fd < 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Invalid deserialize fd: %d", + fd); (void) fd_cloexec(fd, true); @@ -1058,8 +1089,8 @@ static int parse_argv(int argc, char *argv[]) { /* Hmm, when we aren't run as init system * let's complain about excess arguments */ - log_error("Excess arguments."); - return -EINVAL; + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Excess arguments."); } return 0; @@ -1104,14 +1135,19 @@ static int help(void) { return 0; } -static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) { +static int prepare_reexecute( + Manager *m, + FILE **ret_f, + FDSet **ret_fds, + bool switching_root) { + _cleanup_fdset_free_ FDSet *fds = NULL; _cleanup_fclose_ FILE *f = NULL; int r; assert(m); - assert(_f); - assert(_fds); + assert(ret_f); + assert(ret_fds); r = manager_open_serialization(m, &f); if (r < 0) @@ -1127,7 +1163,7 @@ static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching r = manager_serialize(m, f, fds, switching_root); if (r < 0) - return log_error_errno(r, "Failed to serialize state: %m"); + return r; if (fseeko(f, 0, SEEK_SET) == (off_t) -1) return log_error_errno(errno, "Failed to rewind serialization fd: %m"); @@ -1140,24 +1176,108 @@ static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching if (r < 0) return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m"); - *_f = TAKE_PTR(f); - *_fds = TAKE_PTR(fds); + *ret_f = TAKE_PTR(f); + *ret_fds = TAKE_PTR(fds); return 0; } +static void bump_file_max_and_nr_open(void) { + + /* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large numbers of file + * descriptors are no longer a performance problem and their memory is properly tracked by memcg, thus counting + * them and limiting them in another two layers of limits is unnecessary and just complicates things. This + * function hence turns off 2 of the 4 levels of limits on file descriptors, and makes RLIMIT_NOLIMIT (soft + + * hard) the only ones that really matter. */ + +#if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN + _cleanup_free_ char *t = NULL; + int r; +#endif + +#if BUMP_PROC_SYS_FS_FILE_MAX + /* I so wanted to use STRINGIFY(ULONG_MAX) here, but alas we can't as glibc/gcc define that as + * "(0x7fffffffffffffffL * 2UL + 1UL)". Seriously. 😢 */ + if (asprintf(&t, "%lu\n", ULONG_MAX) < 0) { + log_oom(); + return; + } + + r = sysctl_write("fs/file-max", t); + if (r < 0) + log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.file-max, ignoring: %m"); +#endif + +#if BUMP_PROC_SYS_FS_FILE_MAX && BUMP_PROC_SYS_FS_NR_OPEN + t = mfree(t); +#endif + +#if BUMP_PROC_SYS_FS_NR_OPEN + int v = INT_MAX; + + /* Arg! The kernel enforces maximum and minimum values on the fs.nr_open, but we don't really know what they + * are. The expression by which the maximum is determined is dependent on the architecture, and is something we + * don't really want to copy to userspace, as it is dependent on implementation details of the kernel. Since + * the kernel doesn't expose the maximum value to us, we can only try and hope. Hence, let's start with + * INT_MAX, and then keep halving the value until we find one that works. Ugly? Yes, absolutely, but kernel + * APIs are kernel APIs, so what do can we do... 🤯 */ + + for (;;) { + int k; + + v &= ~(__SIZEOF_POINTER__ - 1); /* Round down to next multiple of the pointer size */ + if (v < 1024) { + log_warning("Can't bump fs.nr_open, value too small."); + break; + } + + k = read_nr_open(); + if (k < 0) { + log_error_errno(k, "Failed to read fs.nr_open: %m"); + break; + } + if (k >= v) { /* Already larger */ + log_debug("Skipping bump, value is already larger."); + break; + } + + if (asprintf(&t, "%i\n", v) < 0) { + log_oom(); + return; + } + + r = sysctl_write("fs/nr_open", t); + t = mfree(t); + if (r == -EINVAL) { + log_debug("Couldn't write fs.nr_open as %i, halving it.", v); + v /= 2; + continue; + } + if (r < 0) { + log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.nr_open, ignoring: %m"); + break; + } + + log_debug("Successfully bumped fs.nr_open to %i", v); + break; + } +#endif +} + static int bump_rlimit_nofile(struct rlimit *saved_rlimit) { int r, nr; assert(saved_rlimit); - /* Save the original RLIMIT_NOFILE so that we can reset it - * later when transitioning from the initrd to the main + /* Save the original RLIMIT_NOFILE so that we can reset it later when transitioning from the initrd to the main * systemd or suchlike. */ if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m"); - /* Make sure forked processes get the default kernel setting */ + /* Get the underlying absolute limit the kernel enforces */ + nr = read_nr_open(); + + /* Make sure forked processes get limits based on the original kernel setting */ if (!arg_default_rlimit[RLIMIT_NOFILE]) { struct rlimit *rl; @@ -1165,11 +1285,25 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) { if (!rl) return log_oom(); + /* Bump the hard limit for system services to a substantially higher value. The default hard limit + * current kernels set is pretty low (4K), mostly for historical reasons. According to kernel + * developers, the fd handling in recent kernels has been optimized substantially enough, so that we + * can bump the limit now, without paying too high a price in memory or performance. Note however that + * we only bump the hard limit, not the soft limit. That's because select() works the way it works, and + * chokes on fds >= 1024. If we'd bump the soft limit globally, it might accidentally happen to + * unexpecting programs that they get fds higher than what they can process using select(). By only + * bumping the hard limit but leaving the low limit as it is we avoid this pitfall: programs that are + * written by folks aware of the select() problem in mind (and thus use poll()/epoll instead of + * select(), the way everybody should) can explicitly opt into high fds by bumping their soft limit + * beyond 1024, to the hard limit we pass. */ + if (arg_system) + rl->rlim_max = MIN((rlim_t) nr, MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE)); + arg_default_rlimit[RLIMIT_NOFILE] = rl; } - /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows */ - nr = read_nr_open(); + /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows, for + * both hard and soft. */ r = setrlimit_closest(RLIMIT_NOFILE, &RLIMIT_MAKE_CONST(nr)); if (r < 0) return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m"); @@ -1181,16 +1315,15 @@ static int bump_rlimit_memlock(struct rlimit *saved_rlimit) { int r; assert(saved_rlimit); - assert(getuid() == 0); - /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even though we have CAP_IPC_LOCK which - * should normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's - * bump the value high enough for the root user. */ + /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even if we have CAP_IPC_LOCK which should + * normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's bump + * the value high enough for our user. */ if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0) return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m"); - r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(1024ULL*1024ULL*16ULL)); + r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(HIGH_RLIMIT_MEMLOCK)); if (r < 0) return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m"); @@ -1239,12 +1372,12 @@ static int status_welcome(void) { "Failed to read os-release file, ignoring: %m"); if (log_get_show_color()) - return status_printf(NULL, false, false, + return status_printf(NULL, 0, "\nWelcome to \x1B[%sm%s\x1B[0m!\n", isempty(ansi_color) ? "1" : ansi_color, isempty(pretty_name) ? "Linux" : pretty_name); else - return status_printf(NULL, false, false, + return status_printf(NULL, 0, "\nWelcome to %s!\n", isempty(pretty_name) ? "Linux" : pretty_name); } @@ -1276,7 +1409,7 @@ static int bump_unix_max_dgram_qlen(void) { r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen); if (r < 0) - return log_warning_errno(r, "Failed to read AF_UNIX datagram queue length, ignoring: %m"); + return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r, "Failed to read AF_UNIX datagram queue length, ignoring: %m"); r = safe_atolu(qlen, &v); if (r < 0) @@ -1285,7 +1418,7 @@ static int bump_unix_max_dgram_qlen(void) { if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN) return 0; - r = write_string_filef("/proc/sys/net/unix/max_dgram_qlen", 0, "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN); + r = write_string_filef("/proc/sys/net/unix/max_dgram_qlen", WRITE_STRING_FILE_DISABLE_BUFFER, "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN); if (r < 0) return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump AF_UNIX datagram queue length, ignoring: %m"); @@ -1482,13 +1615,29 @@ static void initialize_coredump(bool skip_setup) { if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0) log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m"); - /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored - * until the systemd-coredump tool is enabled via sysctl. */ + /* But at the same time, turn off the core_pattern logic by default, so that no + * coredumps are stored until the systemd-coredump tool is enabled via + * sysctl. However it can be changed via the kernel command line later so core + * dumps can still be generated during early startup and in initramfs. */ if (!skip_setup) disable_coredumps(); #endif } +static void initialize_core_pattern(bool skip_setup) { + int r; + + if (skip_setup || !arg_early_core_pattern) + return; + + if (getpid_cached() != 1) + return; + + r = write_string_file("/proc/sys/kernel/core_pattern", arg_early_core_pattern, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + log_warning_errno(r, "Failed to write '%s' to /proc/sys/kernel/core_pattern, ignoring: %m", arg_early_core_pattern); +} + static void do_reexecute( int argc, char *argv[], @@ -1585,6 +1734,7 @@ static void do_reexecute( /* Reenable any blocked signals, especially important if we switch from initial ramdisk to init=... */ (void) reset_all_signal_handlers(); (void) reset_signal_mask(); + (void) rlimit_nofile_safe(); if (switch_root_init) { args[0] = switch_root_init; @@ -1641,7 +1791,7 @@ static int invoke_main_loop( return log_emergency_errno(r, "Failed to run main loop: %m"); } - switch (m->exit_code) { + switch ((ManagerObjective) r) { case MANAGER_RELOAD: { LogTarget saved_log_target; @@ -1668,7 +1818,8 @@ static int invoke_main_loop( r = manager_reload(m); if (r < 0) - log_warning_errno(r, "Failed to reload, ignoring: %m"); + /* Reloading failed before the point of no return. Let's continue running as if nothing happened. */ + m->objective = MANAGER_OK; break; } @@ -1732,19 +1883,19 @@ static int invoke_main_loop( case MANAGER_POWEROFF: case MANAGER_HALT: case MANAGER_KEXEC: { - static const char * const table[_MANAGER_EXIT_CODE_MAX] = { - [MANAGER_EXIT] = "exit", - [MANAGER_REBOOT] = "reboot", + static const char * const table[_MANAGER_OBJECTIVE_MAX] = { + [MANAGER_EXIT] = "exit", + [MANAGER_REBOOT] = "reboot", [MANAGER_POWEROFF] = "poweroff", - [MANAGER_HALT] = "halt", - [MANAGER_KEXEC] = "kexec" + [MANAGER_HALT] = "halt", + [MANAGER_KEXEC] = "kexec", }; log_notice("Shutting down."); *ret_reexecute = false; *ret_retval = m->return_value; - assert_se(*ret_shutdown_verb = table[m->exit_code]); + assert_se(*ret_shutdown_verb = table[m->objective]); *ret_fds = NULL; *ret_switch_root_dir = *ret_switch_root_init = NULL; @@ -1752,7 +1903,7 @@ static int invoke_main_loop( } default: - assert_not_reached("Unknown exit code."); + assert_not_reached("Unknown or unexpected manager objective."); } } } @@ -1824,7 +1975,7 @@ static int initialize_runtime( install_crash_handler(); if (!skip_setup) { - r = mount_cgroup_controllers(arg_join_controllers); + r = mount_cgroup_controllers(); if (r < 0) { *ret_error_message = "Failed to mount cgroup hierarchies"; return r; @@ -1835,6 +1986,7 @@ static int initialize_runtime( machine_id_setup(NULL, arg_machine_id, NULL); loopback_setup(); bump_unix_max_dgram_qlen(); + bump_file_max_and_nr_open(); test_usr(); write_container_id(); } @@ -1887,11 +2039,9 @@ static int initialize_runtime( if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) log_warning_errno(errno, "Failed to make us a subreaper: %m"); - if (arg_system) { - /* Bump up RLIMIT_NOFILE for systemd itself */ - (void) bump_rlimit_nofile(saved_rlimit_nofile); - (void) bump_rlimit_memlock(saved_rlimit_memlock); - } + /* Bump up RLIMIT_NOFILE for systemd itself */ + (void) bump_rlimit_nofile(saved_rlimit_nofile); + (void) bump_rlimit_memlock(saved_rlimit_memlock); return 0; } @@ -1950,7 +2100,6 @@ static void free_arguments(void) { arg_default_unit = mfree(arg_default_unit); arg_confirm_spawn = mfree(arg_confirm_spawn); - arg_join_controllers = strv_free_free(arg_join_controllers); arg_default_environment = strv_free(arg_default_environment); arg_syscall_archs = set_free(arg_syscall_archs); } @@ -2002,50 +2151,43 @@ static int load_configuration(int argc, char **argv, const char **ret_error_mess static int safety_checks(void) { if (getpid_cached() == 1 && - arg_action != ACTION_RUN) { - log_error("Unsupported execution mode while PID 1."); - return -EPERM; - } + arg_action != ACTION_RUN) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), + "Unsupported execution mode while PID 1."); if (getpid_cached() == 1 && - !arg_system) { - log_error("Can't run --user mode as PID 1."); - return -EPERM; - } + !arg_system) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), + "Can't run --user mode as PID 1."); if (arg_action == ACTION_RUN && arg_system && - getpid_cached() != 1) { - log_error("Can't run system mode unless PID 1."); - return -EPERM; - } + getpid_cached() != 1) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), + "Can't run system mode unless PID 1."); if (arg_action == ACTION_TEST && - geteuid() == 0) { - log_error("Don't run test mode as root."); - return -EPERM; - } + geteuid() == 0) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), + "Don't run test mode as root."); if (!arg_system && arg_action == ACTION_RUN && - sd_booted() <= 0) { - log_error("Trying to run as user instance, but the system has not been booted with systemd."); - return -EOPNOTSUPP; - } + sd_booted() <= 0) + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), + "Trying to run as user instance, but the system has not been booted with systemd."); if (!arg_system && arg_action == ACTION_RUN && - !getenv("XDG_RUNTIME_DIR")) { - log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set."); - return -EUNATCH; - } + !getenv("XDG_RUNTIME_DIR")) + return log_error_errno(SYNTHETIC_ERRNO(EUNATCH), + "Trying to run as user instance, but $XDG_RUNTIME_DIR is not set."); if (arg_system && arg_action == ACTION_RUN && - running_in_chroot() > 0) { - log_error("Cannot be run in a chroot() environment."); - return -EOPNOTSUPP; - } + running_in_chroot() > 0) + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), + "Cannot be run in a chroot() environment."); return 0; } @@ -2317,7 +2459,7 @@ int main(int argc, char *argv[]) { goto finish; if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS, ACTION_DUMP_BUS_PROPERTIES)) - (void) pager_open(arg_no_pager, false); + (void) pager_open(arg_pager_flags); if (arg_action != ACTION_RUN) skip_setup = true; @@ -2345,6 +2487,9 @@ int main(int argc, char *argv[]) { if (arg_action == ACTION_RUN) { + /* A core pattern might have been specified via the cmdline. */ + initialize_core_pattern(skip_setup); + /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */ log_close(); @@ -2395,7 +2540,6 @@ int main(int argc, char *argv[]) { r = manager_startup(m, arg_serialization, fds); if (r < 0) { - log_error_errno(r, "Failed to fully start up daemon: %m"); error_message = "Failed to start up manager"; goto finish; } @@ -2481,8 +2625,8 @@ finish: if (error_message) manager_status_printf(NULL, STATUS_TYPE_EMERGENCY, ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL, - "%s, freezing.", error_message); - freeze_or_reboot(); + "%s.", error_message); + freeze_or_exit_or_reboot(); } return retval;