#include "capability-util.h"
#include "cgroup-util.h"
#include "copy.h"
+#include "cpu-set-util.h"
#include "dev-setup.h"
#include "dissect-image.h"
#include "env-util.h"
#include "nspawn-settings.h"
#include "nspawn-setuid.h"
#include "nspawn-stub-pid1.h"
+#include "pager.h"
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
#include "socket-util.h"
#include "stat-util.h"
#include "stdio-util.h"
+#include "string-table.h"
#include "string-util.h"
#include "strv.h"
#include "terminal-util.h"
static char **arg_syscall_blacklist = NULL;
static struct rlimit *arg_rlimit[_RLIMIT_MAX] = {};
static bool arg_no_new_privileges = false;
+static int arg_oom_score_adjust = 0;
+static bool arg_oom_score_adjust_set = false;
+static cpu_set_t *arg_cpuset = NULL;
+static unsigned arg_cpuset_ncpus = 0;
static void help(void) {
+
+ (void) pager_open(false, false);
+
printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
"Spawn a minimal namespace container for debugging, testing and building.\n\n"
" -h --help Show this help\n"
" --system-call-filter=LIST|~LIST\n"
" Permit/prohibit specific system calls\n"
" --rlimit=NAME=LIMIT Set a resource limit for the payload\n"
+ " --oom-score-adjust=VALUE\n"
+ " Adjust the OOM score value for the payload\n"
+ " --cpu-affinity=CPUS Adjust the CPU affinity of the container\n"
" --kill-signal=SIGNAL Select signal to use for shutting down PID 1\n"
" --link-journal=MODE Link up guest journal, one of no, auto, guest, \n"
" host, try-guest, try-host\n"
ARG_RLIMIT,
ARG_HOSTNAME,
ARG_NO_NEW_PRIVILEGES,
+ ARG_OOM_SCORE_ADJUST,
+ ARG_CPU_AFFINITY,
};
static const struct option options[] = {
{ "root-hash", required_argument, NULL, ARG_ROOT_HASH },
{ "system-call-filter", required_argument, NULL, ARG_SYSTEM_CALL_FILTER },
{ "rlimit", required_argument, NULL, ARG_RLIMIT },
+ { "oom-score-adjust", required_argument, NULL, ARG_OOM_SCORE_ADJUST },
+ { "cpu-affinity", required_argument, NULL, ARG_CPU_AFFINITY },
{}
};
if (!optarg)
arg_volatile_mode = VOLATILE_YES;
- else {
+ else if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(volatile_mode, VolatileMode, _VOLATILE_MODE_MAX);
+ return 0;
+ } else {
VolatileMode m;
m = volatile_mode_from_string(optarg);
break;
case ARG_KILL_SIGNAL:
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(signal, int, _NSIG);
+ return 0;
+ }
+
arg_kill_signal = signal_from_string(optarg);
if (arg_kill_signal < 0) {
log_error("Cannot parse signal: %s", optarg);
char *name;
int rl;
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(rlimit, int, _RLIMIT_MAX);
+ return 0;
+ }
+
eq = strchr(optarg, '=');
if (!eq) {
log_error("--rlimit= expects an '=' assignment.");
break;
}
+ case ARG_OOM_SCORE_ADJUST:
+ r = parse_oom_score_adjust(optarg, &arg_oom_score_adjust);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --oom-score-adjust= parameter: %s", optarg);
+
+ arg_oom_score_adjust_set = true;
+ arg_settings_mask |= SETTING_OOM_SCORE_ADJUST;
+ break;
+
+ case ARG_CPU_AFFINITY: {
+ _cleanup_cpu_free_ cpu_set_t *cpuset = NULL;
+
+ r = parse_cpu_set(optarg, &cpuset);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse CPU affinity mask: %s", optarg);
+
+ if (arg_cpuset)
+ CPU_FREE(arg_cpuset);
+
+ arg_cpuset = TAKE_PTR(cpuset);
+ arg_cpuset_ncpus = r;
+ arg_settings_mask |= SETTING_CPU_AFFINITY;
+ break;
+ }
+
case '?':
return -EINVAL;
}
static int setup_hostname(void) {
+ int r;
if ((arg_clone_ns_flags & CLONE_NEWUTS) == 0)
return 0;
- if (sethostname_idempotent(arg_hostname ?: arg_machine) < 0)
- return -errno;
+ r = sethostname_idempotent(arg_hostname ?: arg_machine);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set hostname: %m");
return 0;
}
rtnl_socket = safe_close(rtnl_socket);
}
+ if (arg_oom_score_adjust_set) {
+ r = set_oom_score_adjust(arg_oom_score_adjust);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust OOM score: %m");
+ }
+
+ if (arg_cpuset)
+ if (sched_setaffinity(0, CPU_ALLOC_SIZE(arg_cpuset_ncpus), arg_cpuset) < 0)
+ return log_error_errno(errno, "Failed to set CPU affinity: %m");
+
r = drop_capabilities();
if (r < 0)
return log_error_errno(r, "drop_capabilities() failed: %m");
- setup_hostname();
+ (void) setup_hostname();
if (arg_personality != PERSONALITY_INVALID) {
r = safe_personality(arg_personality);
return r;
}
- /* Now, explicitly close the log, so that we
- * then can close all remaining fds. Closing
- * the log explicitly first has the benefit
- * that the logging subsystem knows about it,
- * and is thus ready to be reopened should we
- * need it again. Note that the other fds
- * closed here are at least the locking and
- * barrier fds. */
+ /* Now, explicitly close the log, so that we then can close all remaining fds. Closing the log explicitly first
+ * has the benefit that the logging subsystem knows about it, and is thus ready to be reopened should we need
+ * it again. Note that the other fds closed here are at least the locking and barrier fds. */
log_close();
+ log_set_open_when_needed(true);
+
(void) fdset_close_others(fds);
if (arg_start_mode == START_BOOT) {
exec_target = "/bin/bash, /bin/sh";
}
- r = -errno;
- (void) log_open();
- return log_error_errno(r, "execv(%s) failed: %m", exec_target);
+ return log_error_errno(errno, "execv(%s) failed: %m", exec_target);
}
static int setup_sd_notify_child(void) {
return 0;
}
-static int load_settings(void) {
- _cleanup_(settings_freep) Settings *settings = NULL;
- _cleanup_fclose_ FILE *f = NULL;
- _cleanup_free_ char *p = NULL;
- const char *fn, *i;
- int r, rl;
-
- /* If all settings are masked, there's no point in looking for
- * the settings file */
- if ((arg_settings_mask & _SETTINGS_MASK_ALL) == _SETTINGS_MASK_ALL)
- return 0;
-
- fn = strjoina(arg_machine, ".nspawn");
-
- /* We first look in the admin's directories in /etc and /run */
- FOREACH_STRING(i, "/etc/systemd/nspawn", "/run/systemd/nspawn") {
- _cleanup_free_ char *j = NULL;
-
- j = strjoin(i, "/", fn);
- if (!j)
- return log_oom();
-
- f = fopen(j, "re");
- if (f) {
- p = TAKE_PTR(j);
-
- /* By default, we trust configuration from /etc and /run */
- if (arg_settings_trusted < 0)
- arg_settings_trusted = true;
-
- break;
- }
-
- if (errno != ENOENT)
- return log_error_errno(errno, "Failed to open %s: %m", j);
- }
-
- if (!f) {
- /* After that, let's look for a file next to the
- * actual image we shall boot. */
-
- if (arg_image) {
- p = file_in_same_dir(arg_image, fn);
- if (!p)
- return log_oom();
- } else if (arg_directory) {
- p = file_in_same_dir(arg_directory, fn);
- if (!p)
- return log_oom();
- }
-
- if (p) {
- f = fopen(p, "re");
- if (!f && errno != ENOENT)
- return log_error_errno(errno, "Failed to open %s: %m", p);
-
- /* By default, we do not trust configuration from /var/lib/machines */
- if (arg_settings_trusted < 0)
- arg_settings_trusted = false;
- }
- }
-
- if (!f)
- return 0;
-
- log_debug("Settings are trusted: %s", yes_no(arg_settings_trusted));
+static int merge_settings(Settings *settings, const char *path) {
+ int rl;
- r = settings_load(f, p, &settings);
- if (r < 0)
- return r;
+ assert(settings);
+ assert(path);
- /* Copy over bits from the settings, unless they have been
- * explicitly masked by command line switches. */
+ /* Copy over bits from the settings, unless they have been explicitly masked by command line switches. Note
+ * that this steals the fields of the Settings* structure, and hence modifies it. */
if ((arg_settings_mask & SETTING_START_MODE) == 0 &&
settings->start_mode >= 0) {
if (!arg_settings_trusted && plus != 0) {
if (settings->capability != 0)
- log_warning("Ignoring Capability= setting, file %s is not trusted.", p);
+ log_warning("Ignoring Capability= setting, file %s is not trusted.", path);
} else
arg_caps_retain |= plus;
!sd_id128_is_null(settings->machine_id)) {
if (!arg_settings_trusted)
- log_warning("Ignoring MachineID= setting, file %s is not trusted.", p);
+ log_warning("Ignoring MachineID= setting, file %s is not trusted.", path);
else
arg_uuid = settings->machine_id;
}
settings->n_custom_mounts > 0) {
if (!arg_settings_trusted)
- log_warning("Ignoring TemporaryFileSystem=, Bind= and BindReadOnly= settings, file %s is not trusted.", p);
+ log_warning("Ignoring TemporaryFileSystem=, Bind= and BindReadOnly= settings, file %s is not trusted.", path);
else {
custom_mount_free_all(arg_custom_mounts, arg_n_custom_mounts);
arg_custom_mounts = TAKE_PTR(settings->custom_mounts);
settings->network_veth_extra)) {
if (!arg_settings_trusted)
- log_warning("Ignoring network settings, file %s is not trusted.", p);
+ log_warning("Ignoring network settings, file %s is not trusted.", path);
else {
arg_network_veth = settings_network_veth(settings);
arg_private_network = settings_private_network(settings);
settings->expose_ports) {
if (!arg_settings_trusted)
- log_warning("Ignoring Port= setting, file %s is not trusted.", p);
+ log_warning("Ignoring Port= setting, file %s is not trusted.", path);
else {
expose_port_free_all(arg_expose_ports);
arg_expose_ports = TAKE_PTR(settings->expose_ports);
settings->userns_mode != _USER_NAMESPACE_MODE_INVALID) {
if (!arg_settings_trusted)
- log_warning("Ignoring PrivateUsers= and PrivateUsersChown= settings, file %s is not trusted.", p);
+ log_warning("Ignoring PrivateUsers= and PrivateUsersChown= settings, file %s is not trusted.", path);
else {
arg_userns_mode = settings->userns_mode;
arg_uid_shift = settings->uid_shift;
if ((arg_settings_mask & SETTING_SYSCALL_FILTER) == 0) {
if (!arg_settings_trusted && !strv_isempty(arg_syscall_whitelist))
- log_warning("Ignoring SystemCallFilter= settings, file %s is not trusted.", p);
+ log_warning("Ignoring SystemCallFilter= settings, file %s is not trusted.", path);
else {
strv_free_and_replace(arg_syscall_whitelist, settings->syscall_whitelist);
strv_free_and_replace(arg_syscall_blacklist, settings->syscall_blacklist);
continue;
if (!arg_settings_trusted) {
- log_warning("Ignoring Limit%s= setting, file '%s' is not trusted.", rlimit_to_string(rl), p);
+ log_warning("Ignoring Limit%s= setting, file '%s' is not trusted.", rlimit_to_string(rl), path);
continue;
}
settings->no_new_privileges >= 0)
arg_no_new_privileges = settings->no_new_privileges;
+ if ((arg_settings_mask & SETTING_OOM_SCORE_ADJUST) == 0 &&
+ settings->oom_score_adjust_set) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring OOMScoreAdjust= setting, file '%s' is not trusted.", path);
+ else {
+ arg_oom_score_adjust = settings->oom_score_adjust;
+ arg_oom_score_adjust_set = true;
+ }
+ }
+
+ if ((arg_settings_mask & SETTING_CPU_AFFINITY) == 0 &&
+ settings->cpuset) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring CPUAffinity= setting, file '%s' is not trusted.", path);
+ else {
+ if (arg_cpuset)
+ CPU_FREE(arg_cpuset);
+ arg_cpuset = TAKE_PTR(settings->cpuset);
+ arg_cpuset_ncpus = settings->cpuset_ncpus;
+ }
+ }
+
return 0;
}
+static int load_settings(void) {
+ _cleanup_(settings_freep) Settings *settings = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ const char *fn, *i;
+ int r;
+
+ /* If all settings are masked, there's no point in looking for
+ * the settings file */
+ if ((arg_settings_mask & _SETTINGS_MASK_ALL) == _SETTINGS_MASK_ALL)
+ return 0;
+
+ fn = strjoina(arg_machine, ".nspawn");
+
+ /* We first look in the admin's directories in /etc and /run */
+ FOREACH_STRING(i, "/etc/systemd/nspawn", "/run/systemd/nspawn") {
+ _cleanup_free_ char *j = NULL;
+
+ j = strjoin(i, "/", fn);
+ if (!j)
+ return log_oom();
+
+ f = fopen(j, "re");
+ if (f) {
+ p = TAKE_PTR(j);
+
+ /* By default, we trust configuration from /etc and /run */
+ if (arg_settings_trusted < 0)
+ arg_settings_trusted = true;
+
+ break;
+ }
+
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to open %s: %m", j);
+ }
+
+ if (!f) {
+ /* After that, let's look for a file next to the
+ * actual image we shall boot. */
+
+ if (arg_image) {
+ p = file_in_same_dir(arg_image, fn);
+ if (!p)
+ return log_oom();
+ } else if (arg_directory) {
+ p = file_in_same_dir(arg_directory, fn);
+ if (!p)
+ return log_oom();
+ }
+
+ if (p) {
+ f = fopen(p, "re");
+ if (!f && errno != ENOENT)
+ return log_error_errno(errno, "Failed to open %s: %m", p);
+
+ /* By default, we do not trust configuration from /var/lib/machines */
+ if (arg_settings_trusted < 0)
+ arg_settings_trusted = false;
+ }
+ }
+
+ if (!f)
+ return 0;
+
+ log_debug("Settings are trusted: %s", yes_no(arg_settings_trusted));
+
+ r = settings_load(f, p, &settings);
+ if (r < 0)
+ return r;
+
+ return merge_settings(settings, p);
+}
+
static int run(int master,
const char* console,
DissectedImage *dissected_image,
"STATUS=Container running.\n"
"X_NSPAWN_LEADER_PID=" PID_FMT, *pid);
if (!arg_notify_ready)
- sd_notify(false, "READY=1\n");
+ (void) sd_notify(false, "READY=1\n");
if (arg_kill_signal > 0) {
/* Try to kill the init system on SIGINT or SIGTERM */
- sd_event_add_signal(event, NULL, SIGINT, on_orderly_shutdown, PID_TO_PTR(*pid));
- sd_event_add_signal(event, NULL, SIGTERM, on_orderly_shutdown, PID_TO_PTR(*pid));
+ (void) sd_event_add_signal(event, NULL, SIGINT, on_orderly_shutdown, PID_TO_PTR(*pid));
+ (void) sd_event_add_signal(event, NULL, SIGTERM, on_orderly_shutdown, PID_TO_PTR(*pid));
} else {
/* Immediately exit */
- sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
- sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
}
/* Exit when the child exits */
- sd_event_add_signal(event, NULL, SIGCHLD, on_sigchld, PID_TO_PTR(*pid));
+ (void) sd_event_add_signal(event, NULL, SIGCHLD, on_sigchld, PID_TO_PTR(*pid));
if (arg_expose_ports) {
r = expose_port_watch_rtnl(event, rtnl_socket_pair[0], on_address_change, exposed, &rtnl);
}
if (arg_start_mode == START_BOOT) {
- if (path_is_os_tree(arg_directory) <= 0) {
- log_error("Directory %s doesn't look like an OS root directory (os-release file is missing). Refusing.", arg_directory);
+ const char *p;
+
+ if (arg_pivot_root_new)
+ p = prefix_roota(arg_directory, arg_pivot_root_new);
+ else
+ p = arg_directory;
+
+ if (path_is_os_tree(p) <= 0) {
+ log_error("Directory %s doesn't look like an OS root directory (os-release file is missing). Refusing.", p);
r = -EINVAL;
goto finish;
}
} else {
- const char *p;
+ const char *p, *q;
+
+ if (arg_pivot_root_new)
+ p = prefix_roota(arg_directory, arg_pivot_root_new);
+ else
+ p = arg_directory;
- p = strjoina(arg_directory, "/usr/");
- if (laccess(p, F_OK) < 0) {
- log_error("Directory %s doesn't look like it has an OS tree. Refusing.", arg_directory);
+ q = strjoina(p, "/usr/");
+
+ if (laccess(q, F_OK) < 0) {
+ log_error("Directory %s doesn't look like it has an OS tree. Refusing.", p);
r = -EINVAL;
goto finish;
}
if (pid > 0)
(void) wait_for_terminate(pid, NULL);
+ pager_close();
+
if (remove_directory && arg_directory) {
int k;
expose_port_free_all(arg_expose_ports);
free(arg_root_hash);
rlimit_free_all(arg_rlimit);
+ arg_cpuset = cpu_set_mfree(arg_cpuset);
return r < 0 ? EXIT_FAILURE : ret;
}