static const char *arg_selinux_context = NULL;
static const char *arg_selinux_apifs_context = NULL;
static char *arg_slice = NULL;
-static bool arg_private_network; /* initialized depending on arg_privileged in run() */
+static bool arg_private_network = false;
static bool arg_read_only = false;
static StartMode arg_start_mode = START_PID1;
static bool arg_ephemeral = false;
static ExposePort *arg_expose_ports = NULL;
static char **arg_property = NULL;
static sd_bus_message *arg_property_message = NULL;
-static UserNamespaceMode arg_userns_mode; /* initialized depending on arg_privileged in run() */
+static UserNamespaceMode arg_userns_mode = _USER_NAMESPACE_MODE_INVALID;
static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U;
static unsigned arg_delegate_container_ranges = 0;
static UserNamespaceOwnership arg_userns_ownership = _USER_NAMESPACE_OWNERSHIP_INVALID;
static Architecture arg_architecture = _ARCHITECTURE_INVALID;
static ImagePolicy *arg_image_policy = NULL;
static char *arg_background = NULL;
-static bool arg_privileged = false;
+static RuntimeScope arg_runtime_scope = _RUNTIME_SCOPE_INVALID;
static bool arg_cleanup = false;
static bool arg_ask_password = true;
" -b --boot Boot up full system (i.e. invoke init)\n"
" --chdir=PATH Set working directory in the container\n"
" -E --setenv=NAME[=VALUE] Pass an environment variable to PID 1\n"
- " -u --user=USER Run the command under specified user or UID\n"
+ " -u --uid=USER Run the command under specified user or UID\n"
" --kill-signal=SIGNAL Select signal to use for shutting down PID 1\n"
" --notify-ready=BOOLEAN Receive notifications from the child init process\n"
" --suppress-sync=BOOLEAN\n"
" --load-credential=ID:PATH\n"
" Load credential to pass to container from file or\n"
" AF_UNIX stream socket.\n"
+ "\n%3$sOther:%4$s\n"
+ " --system Run in the system service manager scope\n"
+ " --user Run in the user service manager scope\n"
"\nSee the %2$s for details.\n",
program_invocation_short_name,
link,
ARG_CLEANUP,
ARG_NO_ASK_PASSWORD,
ARG_MSTACK,
+ ARG_USER,
+ ARG_SYSTEM,
};
static const struct option options[] = {
{ "directory", required_argument, NULL, 'D' },
{ "template", required_argument, NULL, ARG_TEMPLATE },
{ "ephemeral", no_argument, NULL, 'x' },
- { "user", required_argument, NULL, 'u' },
+ { "uid", required_argument, NULL, 'u' },
+ { "user", optional_argument, NULL, ARG_USER },
{ "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK },
{ "as-pid2", no_argument, NULL, 'a' },
{ "boot", no_argument, NULL, 'b' },
{ "cleanup", no_argument, NULL, ARG_CLEANUP },
{ "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
{ "mstack", required_argument, NULL, ARG_MSTACK },
+ { "system", no_argument, NULL, ARG_SYSTEM },
{}
};
assert(argc >= 0);
assert(argv);
+ /* --user= used to require an argument (the container user to run as). It has been repurposed to
+ * optionally set the runtime scope, with --uid= replacing the old container user functionality.
+ * To maintain backwards compatibility with the space-separated form (--user NAME), stitch them
+ * together into --user=NAME before getopt sees them. Without this, getopt's optional_argument
+ * handling would interpret --user NAME as --user (no arg) followed by a positional argument.
+ * Operate on a copy to avoid modifying the caller's argv. */
+ _cleanup_strv_free_ char **argv_copy = NULL;
+ for (int i = 1; i < argc - 1; i++) {
+ if (streq(argv[i], "--"))
+ break; /* Respect end-of-options sentinel */
+ if (!streq(argv[i], "--user"))
+ continue;
+ if (argv[i + 1][0] == '-')
+ continue; /* Next arg is an option, not a username */
+
+ /* Deep copy so we can freely replace and free entries */
+ if (!argv_copy) {
+ argv_copy = strv_copy(argv);
+ if (!argv_copy)
+ return log_oom();
+ argv = argv_copy;
+ }
+
+ log_warning("--user NAME is deprecated, use --uid=NAME instead.");
+
+ /* Stitch "--user" and the following argument into "--user=NAME" */
+ free(argv[i]);
+ argv[i] = strjoin("--user=", argv[i + 1]);
+ if (!argv[i])
+ return log_oom();
+
+ /* Remove the now-consumed argument and shrink argc accordingly */
+ free(argv[i + 1]);
+ memmove(argv + i + 1, argv + i + 2, (argc - i - 1) * sizeof(char*));
+ argc--;
+ }
+
/* Resetting to 0 forces the invocation of an internal initialization routine of getopt_long()
* that checks for GNU extensions in optstring ('-' or '+' at the beginning). */
optind = 0;
case 'U':
if (userns_supported()) {
- /* Note that arg_userns_ownership is implied by USER_NAMESPACE_PICK further down. */
- arg_userns_mode = arg_privileged ? USER_NAMESPACE_PICK : USER_NAMESPACE_MANAGED;
+ /* Note that arg_userns_ownership is implied by USER_NAMESPACE_PICK further down.
+ * We use _USER_NAMESPACE_MODE_INVALID as a marker so that the final resolution
+ * (PICK vs MANAGED) is deferred to after the getopt loop where arg_runtime_scope
+ * has its final value regardless of option order. */
+ arg_userns_mode = _USER_NAMESPACE_MODE_INVALID;
arg_uid_shift = UID_INVALID;
arg_uid_range = UINT32_C(0x10000);
arg_ask_password = false;
break;
+ case ARG_USER:
+ if (optarg) {
+ /* --user=NAME is a deprecated alias for --uid=NAME */
+ log_warning("--user=NAME is deprecated, use --uid=NAME instead.");
+
+ r = free_and_strdup(&arg_user, optarg);
+ if (r < 0)
+ return log_oom();
+
+ arg_settings_mask |= SETTING_USER;
+ } else
+ arg_runtime_scope = RUNTIME_SCOPE_USER;
+ break;
+
+ case ARG_SYSTEM:
+ arg_runtime_scope = RUNTIME_SCOPE_SYSTEM;
+ break;
+
case '?':
return -EINVAL;
* --directory=". */
arg_directory = TAKE_PTR(arg_template);
+ /* Derive runtime scope from UID if not explicitly set via --user/--system */
+ if (arg_runtime_scope < 0)
+ arg_runtime_scope = getuid() == 0 ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER;
+
+ if (arg_userns_mode == _USER_NAMESPACE_MODE_INVALID) {
+ /* -U sets arg_userns_mode to _USER_NAMESPACE_MODE_INVALID to defer the PICK vs MANAGED
+ * resolution to here where arg_runtime_scope has its final value. */
+ if (arg_runtime_scope == RUNTIME_SCOPE_USER)
+ arg_userns_mode = USER_NAMESPACE_MANAGED;
+ else if (FLAGS_SET(arg_settings_mask, SETTING_USERNS))
+ arg_userns_mode = USER_NAMESPACE_PICK;
+ else
+ arg_userns_mode = USER_NAMESPACE_NO;
+ }
+
+ if (!FLAGS_SET(arg_settings_mask, SETTING_NETWORK))
+ /* Imply private networking for unprivileged operation, since kernel otherwise
+ * refuses mounting sysfs. */
+ arg_private_network = arg_runtime_scope == RUNTIME_SCOPE_USER;
+
arg_caps_retain |= plus;
arg_caps_retain |= arg_private_network ? UINT64_C(1) << CAP_NET_ADMIN : 0;
arg_caps_retain &= ~minus;
/* We can mount selinuxfs only if we are privileged and can do so before userns. In managed mode we
* have to enter the userns earlier, hence cannot do that. */
- /* SET_FLAG(arg_mount_settings, MOUNT_PRIVILEGED, arg_privileged); */
+ /* SET_FLAG(arg_mount_settings, MOUNT_PRIVILEGED, arg_runtime_scope == RUNTIME_SCOPE_SYSTEM); */
SET_FLAG(arg_mount_settings, MOUNT_PRIVILEGED, arg_userns_mode != USER_NAMESPACE_MANAGED);
SET_FLAG(arg_mount_settings, MOUNT_USE_USERNS, arg_userns_mode != USER_NAMESPACE_NO);
if (arg_private_network)
SET_FLAG(arg_mount_settings, MOUNT_APPLY_APIVFS_NETNS, arg_private_network);
- if (!arg_privileged && arg_userns_mode != USER_NAMESPACE_MANAGED)
- return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unprivileged operation requires managed user namespaces, as otherwise no UID range can be acquired.");
+ if (arg_runtime_scope != RUNTIME_SCOPE_SYSTEM && arg_userns_mode != USER_NAMESPACE_MANAGED)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "User-scoped operation requires managed user namespaces, as otherwise no UID range can be acquired.");
if (arg_userns_mode == USER_NAMESPACE_MANAGED && !arg_private_network)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Managed user namespace operation requires private networking, as otherwise /sys/ may not be mounted.");
if (arg_machine) {
_cleanup_(image_unrefp) Image *i = NULL;
- r = image_find(arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER,
+ r = image_find(arg_runtime_scope,
IMAGE_MACHINE, arg_machine, NULL, &i);
if (r == -ENOENT)
return log_error_errno(r, "No image for machine '%s'.", arg_machine);
_SD_PATH_INVALID,
};
- const uint64_t *q = arg_privileged ? lookup_dir_system : lookup_dir_user;
+ const uint64_t *q = arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? lookup_dir_system : lookup_dir_user;
for (; *q != _SD_PATH_INVALID; q++) {
_cleanup_free_ char *cd = NULL;
r = sd_path_lookup(*q, "systemd/nspawn", &cd);
/* Registration always happens on the system bus */
_cleanup_(sd_bus_flush_close_unrefp) sd_bus *system_bus = NULL;
- if (arg_register != 0 || (arg_privileged && !arg_keep_unit)) {
+ if (arg_register != 0 || (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM && !arg_keep_unit)) {
r = sd_bus_default_system(&system_bus);
if (r < 0)
return log_error_errno(r, "Failed to open system bus: %m");
_cleanup_(sd_bus_unrefp) sd_bus *runtime_bus = NULL;
if (arg_register != 0 || !arg_keep_unit) {
- if (arg_privileged)
+ if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM)
runtime_bus = sd_bus_ref(system_bus);
else {
r = sd_bus_default_user(&user_bus);
bool registered_system = false, registered_runtime = false;
if (arg_register != 0) {
r = register_machine_with_fallback_and_log(
- arg_privileged ? RUNTIME_SCOPE_SYSTEM : _RUNTIME_SCOPE_INVALID,
+ arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? RUNTIME_SCOPE_SYSTEM : _RUNTIME_SCOPE_INVALID,
system_bus,
runtime_bus,
arg_machine,
return 0;
}
-static void initialize_defaults(void) {
- arg_privileged = getuid() == 0;
-
- /* If running unprivileged default to systemd-nsresourced operation */
- arg_userns_mode = arg_privileged ? USER_NAMESPACE_NO : USER_NAMESPACE_MANAGED;
-
- /* Imply private networking for unprivileged operation, since kernel otherwise refuses mounting sysfs */
- arg_private_network = !arg_privileged;
-}
-
static void cleanup_propagation_and_export_directories(void) {
const char *p;
- if (!arg_machine || !arg_privileged)
+ if (!arg_machine || arg_runtime_scope != RUNTIME_SCOPE_SYSTEM)
return;
p = strjoina("/run/systemd/nspawn/propagate/", arg_machine);
log_setup();
- initialize_defaults();
-
r = parse_argv(argc, argv);
if (r <= 0)
goto finish;
r = create_ephemeral_snapshot(
arg_directory,
- arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER,
+ arg_runtime_scope,
arg_read_only,
&tree_global_lock,
&tree_local_lock,
goto finish;
r = image_path_lock(
- arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER,
+ arg_runtime_scope,
arg_directory,
(arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB,
- arg_privileged ? &tree_global_lock : NULL,
+ arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? &tree_global_lock : NULL,
&tree_local_lock);
if (r == -EBUSY) {
log_error_errno(r, "Directory tree %s is currently busy.", arg_directory);
/* Always take an exclusive lock on our own ephemeral copy. */
r = image_path_lock(
- arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER,
+ arg_runtime_scope,
np,
LOCK_EX|LOCK_NB,
- arg_privileged ? &tree_global_lock : NULL,
+ arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? &tree_global_lock : NULL,
&tree_local_lock);
if (r < 0) {
log_error_errno(r, "Failed to create image lock: %m");
remove_image = true;
} else {
r = image_path_lock(
- arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER,
+ arg_runtime_scope,
arg_image,
(arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB,
- arg_privileged ? &tree_global_lock : NULL,
+ arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? &tree_global_lock : NULL,
&tree_local_lock);
if (r == -EBUSY) {
log_error_errno(r, "Disk image %s is currently busy.", arg_image);