From: Daan De Meyer Date: Sun, 29 Mar 2026 11:10:42 +0000 (+0000) Subject: nspawn: rename --user= to --uid= and repurpose --user/--system for runtime scope X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e7fb7296f56dacc24054cddb2e1f0aa55ee7dc94;p=thirdparty%2Fsystemd.git nspawn: rename --user= to --uid= and repurpose --user/--system for runtime scope Rename nspawn's --user=NAME option to --uid=NAME for selecting the container user. The -u short option is preserved. --user=NAME and --user NAME are still accepted but emit a deprecation warning. A pre-parsing step stitches the space-separated --user NAME form into --user=NAME before getopt sees it, preserving backwards compatibility despite --user now being an optional_argument. Repurpose --user (without argument) and --system as standalone switches for selecting the runtime scope (user vs system service manager). Replace all uses of the arg_privileged boolean with arg_runtime_scope comparisons throughout nspawn. The default scope is auto-detected from the effective UID. Co-developed-by: Claude Opus 4.6 --- diff --git a/NEWS b/NEWS index c717a355ecd..71ad3455230 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,13 @@ CHANGES WITH 261 in spe: Feature Removals and Incompatible Changes: + * systemd-nspawn's --user= option has been renamed to --uid=. The -u + short option continues to work. The old --user NAME and --user=NAME + form (with and without "=") are still accepted but deprecated; a warning + is emitted suggesting --uid=NAME. The --user option (without an argument) + has been repurposed as a standalone switch (without argument) to select + the user service manager scope, matching --system. + * It was discovered that systemd-stub does not measure all the events it measures to the TPM to the hardware CC registers (e.g. Intel TDX RTMRs) using EFI_CC_MEASUREMENT_PROTOCOL. In particular, devicetree, diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml index e973b914dd0..5c7acf51594 100644 --- a/man/systemd-nspawn.xml +++ b/man/systemd-nspawn.xml @@ -629,13 +629,16 @@ - + After transitioning into the container, change to the specified user defined in the container's user database. Like all other systemd-nspawn features, this is not a security feature and - provides protection against accidental destructive operations only. + provides protection against accidental destructive operations only. This option was previously named + . The old name is still accepted but deprecated. The short option + is compatible with both old and new versions of + systemd-nspawn. - Note that if credentials are used in combination with a non-root + Note that if credentials are used in combination with a non-root (e.g.: or ), then must be used, and or must not be used, as the credentials would otherwise be unreadable @@ -1922,6 +1925,23 @@ After=sys-subsystem-net-devices-ens1.device Other + + + + + Specify whether to run in system or user scope. This controls which service manager and + machined instance to interact with, as well as operational defaults such as user namespace mode and + private networking. If neither is specified, is implied when running as + root, otherwise. + + Note: for backwards compatibility, followed by a positional argument + is interpreted as the deprecated form (now + ). To use for scope selection when positional + arguments follow, separate them with --. + + + + diff --git a/shell-completion/bash/systemd-nspawn b/shell-completion/bash/systemd-nspawn index 692020aa62c..08ff25d906c 100644 --- a/shell-completion/bash/systemd-nspawn +++ b/shell-completion/bash/systemd-nspawn @@ -69,8 +69,8 @@ _systemd_nspawn() { local -A OPTS=( [STANDALONE]='-h --help --version --private-network -b --boot --read-only -q --quiet --share-system --keep-unit -n --network-veth -j -x --ephemeral -a --as-pid2 -U --suppress-sync=yes - --cleanup' - [ARG]='-D --directory -u --user --uuid --capability --drop-capability --link-journal --bind --bind-ro + --cleanup --user --system' + [ARG]='-D --directory -u --uid --uuid --capability --drop-capability --link-journal --bind --bind-ro -M --machine -S --slice -E --setenv -Z --selinux-context -L --selinux-apifs-context --register --network-interface --network-bridge --personality -i --image --image-policy --tmpfs --volatile --network-macvlan --kill-signal --template --notify-ready --root-hash --chdir @@ -88,7 +88,7 @@ _systemd_nspawn() { compopt -o nospace comps=$(compgen -S/ -A directory -- "$cur" ) ;; - --user|-u) + --uid|-u) comps=$( __get_users ) ;; --uuid|--root-hash) diff --git a/shell-completion/zsh/_systemd-nspawn b/shell-completion/zsh/_systemd-nspawn index f613db908e3..fa79b7f8d86 100644 --- a/shell-completion/zsh/_systemd-nspawn +++ b/shell-completion/zsh/_systemd-nspawn @@ -22,7 +22,9 @@ _arguments \ '(--ephemeral -x)'{--ephemeral,-x}'[Run container with snapshot of root directory, and remove it after exit.]' \ '(--image -i)'{--image=,-i+}'[Disk image to mount the root directory for the container from.]:disk image: _files' \ '(--boot -b)'{--boot,-b}'[Automatically search for an init binary and invoke it instead of a shell or a user supplied program.]' \ - '(--user -u)'{--user=,-u+}'[Run the command under specified user, create home directory and cd into it.]:user:_users' \ + '(--uid -u)'{--uid=,-u+}'[Run the command under specified user, create home directory and cd into it.]:user:_users' \ + '--user[Run in user service manager scope]' \ + '--system[Run in system service manager scope]' \ '(--machine -M)'{--machine=,-M+}'[Sets the machine name for this container.]: : _message "container name"' \ '--uuid=[Set the specified uuid for the container.]: : _message "container UUID"' \ '(--slice -S)'{--slice=,-S+}'[Make the container part of the specified slice, instead of the default machine.slice.]: : _message slice' \ diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 62676e935f0..ff5f79a7cb6 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -154,7 +154,7 @@ static char *arg_hostname = NULL; /* The name the payload sees by default */ static const char *arg_selinux_context = NULL; static const char *arg_selinux_apifs_context = NULL; static char *arg_slice = NULL; -static bool arg_private_network; /* initialized depending on arg_privileged in run() */ +static bool arg_private_network = false; static bool arg_read_only = false; static StartMode arg_start_mode = START_PID1; static bool arg_ephemeral = false; @@ -213,7 +213,7 @@ static VolatileMode arg_volatile_mode = VOLATILE_NO; static ExposePort *arg_expose_ports = NULL; static char **arg_property = NULL; static sd_bus_message *arg_property_message = NULL; -static UserNamespaceMode arg_userns_mode; /* initialized depending on arg_privileged in run() */ +static UserNamespaceMode arg_userns_mode = _USER_NAMESPACE_MODE_INVALID; static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U; static unsigned arg_delegate_container_ranges = 0; static UserNamespaceOwnership arg_userns_ownership = _USER_NAMESPACE_OWNERSHIP_INVALID; @@ -254,7 +254,7 @@ static char *arg_settings_filename = NULL; static Architecture arg_architecture = _ARCHITECTURE_INVALID; static ImagePolicy *arg_image_policy = NULL; static char *arg_background = NULL; -static bool arg_privileged = false; +static RuntimeScope arg_runtime_scope = _RUNTIME_SCOPE_INVALID; static bool arg_cleanup = false; static bool arg_ask_password = true; @@ -408,7 +408,7 @@ static int help(void) { " -b --boot Boot up full system (i.e. invoke init)\n" " --chdir=PATH Set working directory in the container\n" " -E --setenv=NAME[=VALUE] Pass an environment variable to PID 1\n" - " -u --user=USER Run the command under specified user or UID\n" + " -u --uid=USER Run the command under specified user or UID\n" " --kill-signal=SIGNAL Select signal to use for shutting down PID 1\n" " --notify-ready=BOOLEAN Receive notifications from the child init process\n" " --suppress-sync=BOOLEAN\n" @@ -522,6 +522,9 @@ static int help(void) { " --load-credential=ID:PATH\n" " Load credential to pass to container from file or\n" " AF_UNIX stream socket.\n" + "\n%3$sOther:%4$s\n" + " --system Run in the system service manager scope\n" + " --user Run in the user service manager scope\n" "\nSee the %2$s for details.\n", program_invocation_short_name, link, @@ -750,6 +753,8 @@ static int parse_argv(int argc, char *argv[]) { ARG_CLEANUP, ARG_NO_ASK_PASSWORD, ARG_MSTACK, + ARG_USER, + ARG_SYSTEM, }; static const struct option options[] = { @@ -758,7 +763,8 @@ static int parse_argv(int argc, char *argv[]) { { "directory", required_argument, NULL, 'D' }, { "template", required_argument, NULL, ARG_TEMPLATE }, { "ephemeral", no_argument, NULL, 'x' }, - { "user", required_argument, NULL, 'u' }, + { "uid", required_argument, NULL, 'u' }, + { "user", optional_argument, NULL, ARG_USER }, { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK }, { "as-pid2", no_argument, NULL, 'a' }, { "boot", no_argument, NULL, 'b' }, @@ -831,6 +837,7 @@ static int parse_argv(int argc, char *argv[]) { { "cleanup", no_argument, NULL, ARG_CLEANUP }, { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD }, { "mstack", required_argument, NULL, ARG_MSTACK }, + { "system", no_argument, NULL, ARG_SYSTEM }, {} }; @@ -841,6 +848,43 @@ static int parse_argv(int argc, char *argv[]) { assert(argc >= 0); assert(argv); + /* --user= used to require an argument (the container user to run as). It has been repurposed to + * optionally set the runtime scope, with --uid= replacing the old container user functionality. + * To maintain backwards compatibility with the space-separated form (--user NAME), stitch them + * together into --user=NAME before getopt sees them. Without this, getopt's optional_argument + * handling would interpret --user NAME as --user (no arg) followed by a positional argument. + * Operate on a copy to avoid modifying the caller's argv. */ + _cleanup_strv_free_ char **argv_copy = NULL; + for (int i = 1; i < argc - 1; i++) { + if (streq(argv[i], "--")) + break; /* Respect end-of-options sentinel */ + if (!streq(argv[i], "--user")) + continue; + if (argv[i + 1][0] == '-') + continue; /* Next arg is an option, not a username */ + + /* Deep copy so we can freely replace and free entries */ + if (!argv_copy) { + argv_copy = strv_copy(argv); + if (!argv_copy) + return log_oom(); + argv = argv_copy; + } + + log_warning("--user NAME is deprecated, use --uid=NAME instead."); + + /* Stitch "--user" and the following argument into "--user=NAME" */ + free(argv[i]); + argv[i] = strjoin("--user=", argv[i + 1]); + if (!argv[i]) + return log_oom(); + + /* Remove the now-consumed argument and shrink argc accordingly */ + free(argv[i + 1]); + memmove(argv + i + 1, argv + i + 2, (argc - i - 1) * sizeof(char*)); + argc--; + } + /* Resetting to 0 forces the invocation of an internal initialization routine of getopt_long() * that checks for GNU extensions in optstring ('-' or '+' at the beginning). */ optind = 0; @@ -1230,8 +1274,11 @@ static int parse_argv(int argc, char *argv[]) { case 'U': if (userns_supported()) { - /* Note that arg_userns_ownership is implied by USER_NAMESPACE_PICK further down. */ - arg_userns_mode = arg_privileged ? USER_NAMESPACE_PICK : USER_NAMESPACE_MANAGED; + /* Note that arg_userns_ownership is implied by USER_NAMESPACE_PICK further down. + * We use _USER_NAMESPACE_MODE_INVALID as a marker so that the final resolution + * (PICK vs MANAGED) is deferred to after the getopt loop where arg_runtime_scope + * has its final value regardless of option order. */ + arg_userns_mode = _USER_NAMESPACE_MODE_INVALID; arg_uid_shift = UID_INVALID; arg_uid_range = UINT32_C(0x10000); @@ -1600,6 +1647,24 @@ static int parse_argv(int argc, char *argv[]) { arg_ask_password = false; break; + case ARG_USER: + if (optarg) { + /* --user=NAME is a deprecated alias for --uid=NAME */ + log_warning("--user=NAME is deprecated, use --uid=NAME instead."); + + r = free_and_strdup(&arg_user, optarg); + if (r < 0) + return log_oom(); + + arg_settings_mask |= SETTING_USER; + } else + arg_runtime_scope = RUNTIME_SCOPE_USER; + break; + + case ARG_SYSTEM: + arg_runtime_scope = RUNTIME_SCOPE_SYSTEM; + break; + case '?': return -EINVAL; @@ -1623,6 +1688,26 @@ static int parse_argv(int argc, char *argv[]) { * --directory=". */ arg_directory = TAKE_PTR(arg_template); + /* Derive runtime scope from UID if not explicitly set via --user/--system */ + if (arg_runtime_scope < 0) + arg_runtime_scope = getuid() == 0 ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER; + + if (arg_userns_mode == _USER_NAMESPACE_MODE_INVALID) { + /* -U sets arg_userns_mode to _USER_NAMESPACE_MODE_INVALID to defer the PICK vs MANAGED + * resolution to here where arg_runtime_scope has its final value. */ + if (arg_runtime_scope == RUNTIME_SCOPE_USER) + arg_userns_mode = USER_NAMESPACE_MANAGED; + else if (FLAGS_SET(arg_settings_mask, SETTING_USERNS)) + arg_userns_mode = USER_NAMESPACE_PICK; + else + arg_userns_mode = USER_NAMESPACE_NO; + } + + if (!FLAGS_SET(arg_settings_mask, SETTING_NETWORK)) + /* Imply private networking for unprivileged operation, since kernel otherwise + * refuses mounting sysfs. */ + arg_private_network = arg_runtime_scope == RUNTIME_SCOPE_USER; + arg_caps_retain |= plus; arg_caps_retain |= arg_private_network ? UINT64_C(1) << CAP_NET_ADMIN : 0; arg_caps_retain &= ~minus; @@ -1650,7 +1735,7 @@ static int verify_arguments(void) { /* We can mount selinuxfs only if we are privileged and can do so before userns. In managed mode we * have to enter the userns earlier, hence cannot do that. */ - /* SET_FLAG(arg_mount_settings, MOUNT_PRIVILEGED, arg_privileged); */ + /* SET_FLAG(arg_mount_settings, MOUNT_PRIVILEGED, arg_runtime_scope == RUNTIME_SCOPE_SYSTEM); */ SET_FLAG(arg_mount_settings, MOUNT_PRIVILEGED, arg_userns_mode != USER_NAMESPACE_MANAGED); SET_FLAG(arg_mount_settings, MOUNT_USE_USERNS, arg_userns_mode != USER_NAMESPACE_NO); @@ -1658,8 +1743,8 @@ static int verify_arguments(void) { if (arg_private_network) SET_FLAG(arg_mount_settings, MOUNT_APPLY_APIVFS_NETNS, arg_private_network); - if (!arg_privileged && arg_userns_mode != USER_NAMESPACE_MANAGED) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unprivileged operation requires managed user namespaces, as otherwise no UID range can be acquired."); + if (arg_runtime_scope != RUNTIME_SCOPE_SYSTEM && arg_userns_mode != USER_NAMESPACE_MANAGED) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "User-scoped operation requires managed user namespaces, as otherwise no UID range can be acquired."); if (arg_userns_mode == USER_NAMESPACE_MANAGED && !arg_private_network) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Managed user namespace operation requires private networking, as otherwise /sys/ may not be mounted."); @@ -3211,7 +3296,7 @@ static int determine_names(void) { if (arg_machine) { _cleanup_(image_unrefp) Image *i = NULL; - r = image_find(arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER, + r = image_find(arg_runtime_scope, IMAGE_MACHINE, arg_machine, NULL, &i); if (r == -ENOENT) return log_error_errno(r, "No image for machine '%s'.", arg_machine); @@ -5183,7 +5268,7 @@ static int load_settings(void) { _SD_PATH_INVALID, }; - const uint64_t *q = arg_privileged ? lookup_dir_system : lookup_dir_user; + const uint64_t *q = arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? lookup_dir_system : lookup_dir_user; for (; *q != _SD_PATH_INVALID; q++) { _cleanup_free_ char *cd = NULL; r = sd_path_lookup(*q, "systemd/nspawn", &cd); @@ -5614,7 +5699,7 @@ static int run_container( /* Registration always happens on the system bus */ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *system_bus = NULL; - if (arg_register != 0 || (arg_privileged && !arg_keep_unit)) { + if (arg_register != 0 || (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM && !arg_keep_unit)) { r = sd_bus_default_system(&system_bus); if (r < 0) return log_error_errno(r, "Failed to open system bus: %m"); @@ -5630,7 +5715,7 @@ static int run_container( _cleanup_(sd_bus_unrefp) sd_bus *runtime_bus = NULL; if (arg_register != 0 || !arg_keep_unit) { - if (arg_privileged) + if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM) runtime_bus = sd_bus_ref(system_bus); else { r = sd_bus_default_user(&user_bus); @@ -5700,7 +5785,7 @@ static int run_container( bool registered_system = false, registered_runtime = false; if (arg_register != 0) { r = register_machine_with_fallback_and_log( - arg_privileged ? RUNTIME_SCOPE_SYSTEM : _RUNTIME_SCOPE_INVALID, + arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? RUNTIME_SCOPE_SYSTEM : _RUNTIME_SCOPE_INVALID, system_bus, runtime_bus, arg_machine, @@ -6081,20 +6166,10 @@ static int cant_be_in_netns(void) { return 0; } -static void initialize_defaults(void) { - arg_privileged = getuid() == 0; - - /* If running unprivileged default to systemd-nsresourced operation */ - arg_userns_mode = arg_privileged ? USER_NAMESPACE_NO : USER_NAMESPACE_MANAGED; - - /* Imply private networking for unprivileged operation, since kernel otherwise refuses mounting sysfs */ - arg_private_network = !arg_privileged; -} - static void cleanup_propagation_and_export_directories(void) { const char *p; - if (!arg_machine || !arg_privileged) + if (!arg_machine || arg_runtime_scope != RUNTIME_SCOPE_SYSTEM) return; p = strjoina("/run/systemd/nspawn/propagate/", arg_machine); @@ -6138,8 +6213,6 @@ static int run(int argc, char *argv[]) { log_setup(); - initialize_defaults(); - r = parse_argv(argc, argv); if (r <= 0) goto finish; @@ -6294,7 +6367,7 @@ static int run(int argc, char *argv[]) { r = create_ephemeral_snapshot( arg_directory, - arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER, + arg_runtime_scope, arg_read_only, &tree_global_lock, &tree_local_lock, @@ -6315,10 +6388,10 @@ static int run(int argc, char *argv[]) { goto finish; r = image_path_lock( - arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER, + arg_runtime_scope, arg_directory, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, - arg_privileged ? &tree_global_lock : NULL, + arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? &tree_global_lock : NULL, &tree_local_lock); if (r == -EBUSY) { log_error_errno(r, "Directory tree %s is currently busy.", arg_directory); @@ -6446,10 +6519,10 @@ static int run(int argc, char *argv[]) { /* Always take an exclusive lock on our own ephemeral copy. */ r = image_path_lock( - arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER, + arg_runtime_scope, np, LOCK_EX|LOCK_NB, - arg_privileged ? &tree_global_lock : NULL, + arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? &tree_global_lock : NULL, &tree_local_lock); if (r < 0) { log_error_errno(r, "Failed to create image lock: %m"); @@ -6474,10 +6547,10 @@ static int run(int argc, char *argv[]) { remove_image = true; } else { r = image_path_lock( - arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER, + arg_runtime_scope, arg_image, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, - arg_privileged ? &tree_global_lock : NULL, + arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? &tree_global_lock : NULL, &tree_local_lock); if (r == -EBUSY) { log_error_errno(r, "Disk image %s is currently busy.", arg_image); diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index f91c36193dd..ae324bb218b 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -220,8 +220,8 @@ static int help(void) { " -q --quiet Do not show status information\n" " --no-pager Do not pipe output into a pager\n" " --no-ask-password Do not prompt for password\n" - " --user Interact with user manager\n" - " --system Interact with system manager\n" + " --system Run in the system service manager scope\n" + " --user Run in the user service manager scope\n" "\n%3$sImage:%4$s\n" " -D --directory=PATH Root directory for the VM\n" " -x --ephemeral Run VM with snapshot of the disk or directory\n" diff --git a/test/units/TEST-13-NSPAWN.nspawn.sh b/test/units/TEST-13-NSPAWN.nspawn.sh index d5cc05b89f5..2868cc54ef3 100755 --- a/test/units/TEST-13-NSPAWN.nspawn.sh +++ b/test/units/TEST-13-NSPAWN.nspawn.sh @@ -202,7 +202,7 @@ testcase_sanity() { systemd-nspawn --register=no --directory="$root" bash -xec '[[ $$ -eq 1 ]]' systemd-nspawn --register=no --directory="$root" --as-pid2 bash -xec '[[ $$ -eq 2 ]]' - # --user= + # --uid= # "Fake" getent passwd's bare minimum, so we don't have to pull it in # with all the DSO shenanigans cat >"$root/bin/getent" <<\EOF @@ -222,6 +222,9 @@ EOF # as bash isn't invoked with the necessary environment variables for that. useradd --root="$root" --uid 1000 --user-group --create-home testuser systemd-nspawn --register=no --directory="$root" bash -xec '[[ $USER == root ]]' + systemd-nspawn --register=no --directory="$root" --uid=testuser bash -xec '[[ $USER == testuser ]]' + # Backward compat: --user NAME (space-separated) and --user=testuser should still work + systemd-nspawn --register=no --directory="$root" --user testuser bash -xec '[[ $USER == testuser ]]' systemd-nspawn --register=no --directory="$root" --user=testuser bash -xec '[[ $USER == testuser ]]' # --settings= + .nspawn files @@ -335,10 +338,10 @@ EOF --load-credential=cred.path:/tmp/cred.path \ --set-credential="cred.set:hello world" \ bash -xec '[[ "$("$root/bin/getent" <<\EOF @@ -933,7 +936,7 @@ EOF systemd-nspawn --register=no \ --directory="$root" \ -U \ - --user=testuser \ + --uid=testuser \ --bind=/tmp/owneridmap/bind:/home/testuser:owneridmap \ ${COVERAGE_BUILD_DIR:+--bind="$COVERAGE_BUILD_DIR"} \ bash -c "$cmd" |& tee nspawn.out; then