]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/nspawn/nspawn.c
nspawn: make use of log_set_open_when_needed() in nspawn too
[thirdparty/systemd.git] / src / nspawn / nspawn.c
index 52c6ba1c0cc1d7835511cc96f9c4d873c3b5c883..b1add0f1590b567362bc550d9602e4ab52dd7881 100644 (file)
@@ -43,6 +43,7 @@
 #include "capability-util.h"
 #include "cgroup-util.h"
 #include "copy.h"
+#include "cpu-set-util.h"
 #include "dev-setup.h"
 #include "dissect-image.h"
 #include "env-util.h"
@@ -75,6 +76,7 @@
 #include "nspawn-settings.h"
 #include "nspawn-setuid.h"
 #include "nspawn-stub-pid1.h"
+#include "pager.h"
 #include "parse-util.h"
 #include "path-util.h"
 #include "process-util.h"
@@ -88,6 +90,7 @@
 #include "socket-util.h"
 #include "stat-util.h"
 #include "stdio-util.h"
+#include "string-table.h"
 #include "string-util.h"
 #include "strv.h"
 #include "terminal-util.h"
@@ -204,8 +207,15 @@ static char **arg_syscall_whitelist = NULL;
 static char **arg_syscall_blacklist = NULL;
 static struct rlimit *arg_rlimit[_RLIMIT_MAX] = {};
 static bool arg_no_new_privileges = false;
+static int arg_oom_score_adjust = 0;
+static bool arg_oom_score_adjust_set = false;
+static cpu_set_t *arg_cpuset = NULL;
+static unsigned arg_cpuset_ncpus = 0;
 
 static void help(void) {
+
+        (void) pager_open(false, false);
+
         printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
                "Spawn a minimal namespace container for debugging, testing and building.\n\n"
                "  -h --help                 Show this help\n"
@@ -270,6 +280,9 @@ static void help(void) {
                "     --system-call-filter=LIST|~LIST\n"
                "                            Permit/prohibit specific system calls\n"
                "     --rlimit=NAME=LIMIT    Set a resource limit for the payload\n"
+               "     --oom-score-adjust=VALUE\n"
+               "                            Adjust the OOM score value for the payload\n"
+               "     --cpu-affinity=CPUS    Adjust the CPU affinity of the container\n"
                "     --kill-signal=SIGNAL   Select signal to use for shutting down PID 1\n"
                "     --link-journal=MODE    Link up guest journal, one of no, auto, guest, \n"
                "                            host, try-guest, try-host\n"
@@ -448,6 +461,8 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_RLIMIT,
                 ARG_HOSTNAME,
                 ARG_NO_NEW_PRIVILEGES,
+                ARG_OOM_SCORE_ADJUST,
+                ARG_CPU_AFFINITY,
         };
 
         static const struct option options[] = {
@@ -504,6 +519,8 @@ static int parse_argv(int argc, char *argv[]) {
                 { "root-hash",              required_argument, NULL, ARG_ROOT_HASH              },
                 { "system-call-filter",     required_argument, NULL, ARG_SYSTEM_CALL_FILTER     },
                 { "rlimit",                 required_argument, NULL, ARG_RLIMIT                 },
+                { "oom-score-adjust",       required_argument, NULL, ARG_OOM_SCORE_ADJUST       },
+                { "cpu-affinity",           required_argument, NULL, ARG_CPU_AFFINITY           },
                 {}
         };
 
@@ -900,7 +917,10 @@ static int parse_argv(int argc, char *argv[]) {
 
                         if (!optarg)
                                 arg_volatile_mode = VOLATILE_YES;
-                        else {
+                        else if (streq(optarg, "help")) {
+                                DUMP_STRING_TABLE(volatile_mode, VolatileMode, _VOLATILE_MODE_MAX);
+                                return 0;
+                        } else {
                                 VolatileMode m;
 
                                 m = volatile_mode_from_string(optarg);
@@ -1008,6 +1028,11 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
 
                 case ARG_KILL_SIGNAL:
+                        if (streq(optarg, "help")) {
+                                DUMP_STRING_TABLE(signal, int, _NSIG);
+                                return 0;
+                        }
+
                         arg_kill_signal = signal_from_string(optarg);
                         if (arg_kill_signal < 0) {
                                 log_error("Cannot parse signal: %s", optarg);
@@ -1137,6 +1162,11 @@ static int parse_argv(int argc, char *argv[]) {
                         char *name;
                         int rl;
 
+                        if (streq(optarg, "help")) {
+                                DUMP_STRING_TABLE(rlimit, int, _RLIMIT_MAX);
+                                return 0;
+                        }
+
                         eq = strchr(optarg, '=');
                         if (!eq) {
                                 log_error("--rlimit= expects an '=' assignment.");
@@ -1167,6 +1197,31 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
                 }
 
+                case ARG_OOM_SCORE_ADJUST:
+                        r = parse_oom_score_adjust(optarg, &arg_oom_score_adjust);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to parse --oom-score-adjust= parameter: %s", optarg);
+
+                        arg_oom_score_adjust_set = true;
+                        arg_settings_mask |= SETTING_OOM_SCORE_ADJUST;
+                        break;
+
+                case ARG_CPU_AFFINITY: {
+                        _cleanup_cpu_free_ cpu_set_t *cpuset = NULL;
+
+                        r = parse_cpu_set(optarg, &cpuset);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to parse CPU affinity mask: %s", optarg);
+
+                        if (arg_cpuset)
+                                CPU_FREE(arg_cpuset);
+
+                        arg_cpuset = TAKE_PTR(cpuset);
+                        arg_cpuset_ncpus = r;
+                        arg_settings_mask |= SETTING_CPU_AFFINITY;
+                        break;
+                }
+
                 case '?':
                         return -EINVAL;
 
@@ -1791,12 +1846,14 @@ static int on_address_change(sd_netlink *rtnl, sd_netlink_message *m, void *user
 }
 
 static int setup_hostname(void) {
+        int r;
 
         if ((arg_clone_ns_flags & CLONE_NEWUTS) == 0)
                 return 0;
 
-        if (sethostname_idempotent(arg_hostname ?: arg_machine) < 0)
-                return -errno;
+        r = sethostname_idempotent(arg_hostname ?: arg_machine);
+        if (r < 0)
+                return log_error_errno(r, "Failed to set hostname: %m");
 
         return 0;
 }
@@ -2449,11 +2506,21 @@ static int inner_child(
                 rtnl_socket = safe_close(rtnl_socket);
         }
 
+        if (arg_oom_score_adjust_set) {
+                r = set_oom_score_adjust(arg_oom_score_adjust);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to adjust OOM score: %m");
+        }
+
+        if (arg_cpuset)
+                if (sched_setaffinity(0, CPU_ALLOC_SIZE(arg_cpuset_ncpus), arg_cpuset) < 0)
+                        return log_error_errno(errno, "Failed to set CPU affinity: %m");
+
         r = drop_capabilities();
         if (r < 0)
                 return log_error_errno(r, "drop_capabilities() failed: %m");
 
-        setup_hostname();
+        (void) setup_hostname();
 
         if (arg_personality != PERSONALITY_INVALID) {
                 r = safe_personality(arg_personality);
@@ -2530,15 +2597,12 @@ static int inner_child(
                         return r;
         }
 
-        /* Now, explicitly close the log, so that we
-         * then can close all remaining fds. Closing
-         * the log explicitly first has the benefit
-         * that the logging subsystem knows about it,
-         * and is thus ready to be reopened should we
-         * need it again. Note that the other fds
-         * closed here are at least the locking and
-         * barrier fds. */
+        /* Now, explicitly close the log, so that we then can close all remaining fds. Closing the log explicitly first
+         * has the benefit that the logging subsystem knows about it, and is thus ready to be reopened should we need
+         * it again. Note that the other fds closed here are at least the locking and barrier fds. */
         log_close();
+        log_set_open_when_needed(true);
+
         (void) fdset_close_others(fds);
 
         if (arg_start_mode == START_BOOT) {
@@ -2576,9 +2640,7 @@ static int inner_child(
                 exec_target = "/bin/bash, /bin/sh";
         }
 
-        r = -errno;
-        (void) log_open();
-        return log_error_errno(r, "execv(%s) failed: %m", exec_target);
+        return log_error_errno(errno, "execv(%s) failed: %m", exec_target);
 }
 
 static int setup_sd_notify_child(void) {
@@ -3121,79 +3183,14 @@ static int setup_sd_notify_parent(sd_event *event, int fd, pid_t *inner_child_pi
         return 0;
 }
 
-static int load_settings(void) {
-        _cleanup_(settings_freep) Settings *settings = NULL;
-        _cleanup_fclose_ FILE *f = NULL;
-        _cleanup_free_ char *p = NULL;
-        const char *fn, *i;
-        int r, rl;
-
-        /* If all settings are masked, there's no point in looking for
-         * the settings file */
-        if ((arg_settings_mask & _SETTINGS_MASK_ALL) == _SETTINGS_MASK_ALL)
-                return 0;
-
-        fn = strjoina(arg_machine, ".nspawn");
-
-        /* We first look in the admin's directories in /etc and /run */
-        FOREACH_STRING(i, "/etc/systemd/nspawn", "/run/systemd/nspawn") {
-                _cleanup_free_ char *j = NULL;
-
-                j = strjoin(i, "/", fn);
-                if (!j)
-                        return log_oom();
-
-                f = fopen(j, "re");
-                if (f) {
-                        p = TAKE_PTR(j);
-
-                        /* By default, we trust configuration from /etc and /run */
-                        if (arg_settings_trusted < 0)
-                                arg_settings_trusted = true;
-
-                        break;
-                }
-
-                if (errno != ENOENT)
-                        return log_error_errno(errno, "Failed to open %s: %m", j);
-        }
-
-        if (!f) {
-                /* After that, let's look for a file next to the
-                 * actual image we shall boot. */
-
-                if (arg_image) {
-                        p = file_in_same_dir(arg_image, fn);
-                        if (!p)
-                                return log_oom();
-                } else if (arg_directory) {
-                        p = file_in_same_dir(arg_directory, fn);
-                        if (!p)
-                                return log_oom();
-                }
-
-                if (p) {
-                        f = fopen(p, "re");
-                        if (!f && errno != ENOENT)
-                                return log_error_errno(errno, "Failed to open %s: %m", p);
-
-                        /* By default, we do not trust configuration from /var/lib/machines */
-                        if (arg_settings_trusted < 0)
-                                arg_settings_trusted = false;
-                }
-        }
-
-        if (!f)
-                return 0;
-
-        log_debug("Settings are trusted: %s", yes_no(arg_settings_trusted));
+static int merge_settings(Settings *settings, const char *path) {
+        int rl;
 
-        r = settings_load(f, p, &settings);
-        if (r < 0)
-                return r;
+        assert(settings);
+        assert(path);
 
-        /* Copy over bits from the settings, unless they have been
-         * explicitly masked by command line switches. */
+        /* Copy over bits from the settings, unless they have been explicitly masked by command line switches. Note
+         * that this steals the fields of the Settings* structure, and hence modifies it. */
 
         if ((arg_settings_mask & SETTING_START_MODE) == 0 &&
             settings->start_mode >= 0) {
@@ -3228,7 +3225,7 @@ static int load_settings(void) {
 
                 if (!arg_settings_trusted && plus != 0) {
                         if (settings->capability != 0)
-                                log_warning("Ignoring Capability= setting, file %s is not trusted.", p);
+                                log_warning("Ignoring Capability= setting, file %s is not trusted.", path);
                 } else
                         arg_caps_retain |= plus;
 
@@ -3247,7 +3244,7 @@ static int load_settings(void) {
             !sd_id128_is_null(settings->machine_id)) {
 
                 if (!arg_settings_trusted)
-                        log_warning("Ignoring MachineID= setting, file %s is not trusted.", p);
+                        log_warning("Ignoring MachineID= setting, file %s is not trusted.", path);
                 else
                         arg_uuid = settings->machine_id;
         }
@@ -3264,7 +3261,7 @@ static int load_settings(void) {
             settings->n_custom_mounts > 0) {
 
                 if (!arg_settings_trusted)
-                        log_warning("Ignoring TemporaryFileSystem=, Bind= and BindReadOnly= settings, file %s is not trusted.", p);
+                        log_warning("Ignoring TemporaryFileSystem=, Bind= and BindReadOnly= settings, file %s is not trusted.", path);
                 else {
                         custom_mount_free_all(arg_custom_mounts, arg_n_custom_mounts);
                         arg_custom_mounts = TAKE_PTR(settings->custom_mounts);
@@ -3284,7 +3281,7 @@ static int load_settings(void) {
              settings->network_veth_extra)) {
 
                 if (!arg_settings_trusted)
-                        log_warning("Ignoring network settings, file %s is not trusted.", p);
+                        log_warning("Ignoring network settings, file %s is not trusted.", path);
                 else {
                         arg_network_veth = settings_network_veth(settings);
                         arg_private_network = settings_private_network(settings);
@@ -3303,7 +3300,7 @@ static int load_settings(void) {
             settings->expose_ports) {
 
                 if (!arg_settings_trusted)
-                        log_warning("Ignoring Port= setting, file %s is not trusted.", p);
+                        log_warning("Ignoring Port= setting, file %s is not trusted.", path);
                 else {
                         expose_port_free_all(arg_expose_ports);
                         arg_expose_ports = TAKE_PTR(settings->expose_ports);
@@ -3314,7 +3311,7 @@ static int load_settings(void) {
             settings->userns_mode != _USER_NAMESPACE_MODE_INVALID) {
 
                 if (!arg_settings_trusted)
-                        log_warning("Ignoring PrivateUsers= and PrivateUsersChown= settings, file %s is not trusted.", p);
+                        log_warning("Ignoring PrivateUsers= and PrivateUsersChown= settings, file %s is not trusted.", path);
                 else {
                         arg_userns_mode = settings->userns_mode;
                         arg_uid_shift = settings->uid_shift;
@@ -3329,7 +3326,7 @@ static int load_settings(void) {
         if ((arg_settings_mask & SETTING_SYSCALL_FILTER) == 0) {
 
                 if (!arg_settings_trusted && !strv_isempty(arg_syscall_whitelist))
-                        log_warning("Ignoring SystemCallFilter= settings, file %s is not trusted.", p);
+                        log_warning("Ignoring SystemCallFilter= settings, file %s is not trusted.", path);
                 else {
                         strv_free_and_replace(arg_syscall_whitelist, settings->syscall_whitelist);
                         strv_free_and_replace(arg_syscall_blacklist, settings->syscall_blacklist);
@@ -3344,7 +3341,7 @@ static int load_settings(void) {
                         continue;
 
                 if (!arg_settings_trusted) {
-                        log_warning("Ignoring Limit%s= setting, file '%s' is not trusted.", rlimit_to_string(rl), p);
+                        log_warning("Ignoring Limit%s= setting, file '%s' is not trusted.", rlimit_to_string(rl), path);
                         continue;
                 }
 
@@ -3359,9 +3356,107 @@ static int load_settings(void) {
             settings->no_new_privileges >= 0)
                 arg_no_new_privileges = settings->no_new_privileges;
 
+        if ((arg_settings_mask & SETTING_OOM_SCORE_ADJUST) == 0 &&
+            settings->oom_score_adjust_set) {
+
+                if (!arg_settings_trusted)
+                        log_warning("Ignoring OOMScoreAdjust= setting, file '%s' is not trusted.", path);
+                else {
+                        arg_oom_score_adjust = settings->oom_score_adjust;
+                        arg_oom_score_adjust_set = true;
+                }
+        }
+
+        if ((arg_settings_mask & SETTING_CPU_AFFINITY) == 0 &&
+            settings->cpuset) {
+
+                if (!arg_settings_trusted)
+                        log_warning("Ignoring CPUAffinity= setting, file '%s' is not trusted.", path);
+                else {
+                        if (arg_cpuset)
+                                CPU_FREE(arg_cpuset);
+                        arg_cpuset = TAKE_PTR(settings->cpuset);
+                        arg_cpuset_ncpus = settings->cpuset_ncpus;
+                }
+        }
+
         return 0;
 }
 
+static int load_settings(void) {
+        _cleanup_(settings_freep) Settings *settings = NULL;
+        _cleanup_fclose_ FILE *f = NULL;
+        _cleanup_free_ char *p = NULL;
+        const char *fn, *i;
+        int r;
+
+        /* If all settings are masked, there's no point in looking for
+         * the settings file */
+        if ((arg_settings_mask & _SETTINGS_MASK_ALL) == _SETTINGS_MASK_ALL)
+                return 0;
+
+        fn = strjoina(arg_machine, ".nspawn");
+
+        /* We first look in the admin's directories in /etc and /run */
+        FOREACH_STRING(i, "/etc/systemd/nspawn", "/run/systemd/nspawn") {
+                _cleanup_free_ char *j = NULL;
+
+                j = strjoin(i, "/", fn);
+                if (!j)
+                        return log_oom();
+
+                f = fopen(j, "re");
+                if (f) {
+                        p = TAKE_PTR(j);
+
+                        /* By default, we trust configuration from /etc and /run */
+                        if (arg_settings_trusted < 0)
+                                arg_settings_trusted = true;
+
+                        break;
+                }
+
+                if (errno != ENOENT)
+                        return log_error_errno(errno, "Failed to open %s: %m", j);
+        }
+
+        if (!f) {
+                /* After that, let's look for a file next to the
+                 * actual image we shall boot. */
+
+                if (arg_image) {
+                        p = file_in_same_dir(arg_image, fn);
+                        if (!p)
+                                return log_oom();
+                } else if (arg_directory) {
+                        p = file_in_same_dir(arg_directory, fn);
+                        if (!p)
+                                return log_oom();
+                }
+
+                if (p) {
+                        f = fopen(p, "re");
+                        if (!f && errno != ENOENT)
+                                return log_error_errno(errno, "Failed to open %s: %m", p);
+
+                        /* By default, we do not trust configuration from /var/lib/machines */
+                        if (arg_settings_trusted < 0)
+                                arg_settings_trusted = false;
+                }
+        }
+
+        if (!f)
+                return 0;
+
+        log_debug("Settings are trusted: %s", yes_no(arg_settings_trusted));
+
+        r = settings_load(f, p, &settings);
+        if (r < 0)
+                return r;
+
+        return merge_settings(settings, p);
+}
+
 static int run(int master,
                const char* console,
                DissectedImage *dissected_image,
@@ -3783,20 +3878,20 @@ static int run(int master,
                    "STATUS=Container running.\n"
                    "X_NSPAWN_LEADER_PID=" PID_FMT, *pid);
         if (!arg_notify_ready)
-                sd_notify(false, "READY=1\n");
+                (void) sd_notify(false, "READY=1\n");
 
         if (arg_kill_signal > 0) {
                 /* Try to kill the init system on SIGINT or SIGTERM */
-                sd_event_add_signal(event, NULL, SIGINT, on_orderly_shutdown, PID_TO_PTR(*pid));
-                sd_event_add_signal(event, NULL, SIGTERM, on_orderly_shutdown, PID_TO_PTR(*pid));
+                (void) sd_event_add_signal(event, NULL, SIGINT, on_orderly_shutdown, PID_TO_PTR(*pid));
+                (void) sd_event_add_signal(event, NULL, SIGTERM, on_orderly_shutdown, PID_TO_PTR(*pid));
         } else {
                 /* Immediately exit */
-                sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
-                sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
+                (void) sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
+                (void) sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
         }
 
         /* Exit when the child exits */
-        sd_event_add_signal(event, NULL, SIGCHLD, on_sigchld, PID_TO_PTR(*pid));
+        (void) sd_event_add_signal(event, NULL, SIGCHLD, on_sigchld, PID_TO_PTR(*pid));
 
         if (arg_expose_ports) {
                 r = expose_port_watch_rtnl(event, rtnl_socket_pair[0], on_address_change, exposed, &rtnl);
@@ -4094,17 +4189,30 @@ int main(int argc, char *argv[]) {
                 }
 
                 if (arg_start_mode == START_BOOT) {
-                        if (path_is_os_tree(arg_directory) <= 0) {
-                                log_error("Directory %s doesn't look like an OS root directory (os-release file is missing). Refusing.", arg_directory);
+                       const char *p;
+
+                       if (arg_pivot_root_new)
+                               p = prefix_roota(arg_directory, arg_pivot_root_new);
+                       else
+                               p = arg_directory;
+
+                        if (path_is_os_tree(p) <= 0) {
+                                log_error("Directory %s doesn't look like an OS root directory (os-release file is missing). Refusing.", p);
                                 r = -EINVAL;
                                 goto finish;
                         }
                 } else {
-                        const char *p;
+                        const char *p, *q;
+
+                       if (arg_pivot_root_new)
+                               p = prefix_roota(arg_directory, arg_pivot_root_new);
+                       else
+                               p = arg_directory;
 
-                        p = strjoina(arg_directory, "/usr/");
-                        if (laccess(p, F_OK) < 0) {
-                                log_error("Directory %s doesn't look like it has an OS tree. Refusing.", arg_directory);
+                        q = strjoina(p, "/usr/");
+
+                        if (laccess(q, F_OK) < 0) {
+                                log_error("Directory %s doesn't look like it has an OS tree. Refusing.", p);
                                 r = -EINVAL;
                                 goto finish;
                         }
@@ -4284,6 +4392,8 @@ finish:
         if (pid > 0)
                 (void) wait_for_terminate(pid, NULL);
 
+        pager_close();
+
         if (remove_directory && arg_directory) {
                 int k;
 
@@ -4335,6 +4445,7 @@ finish:
         expose_port_free_all(arg_expose_ports);
         free(arg_root_hash);
         rlimit_free_all(arg_rlimit);
+        arg_cpuset = cpu_set_mfree(arg_cpuset);
 
         return r < 0 ? EXIT_FAILURE : ret;
 }