static int outer_child(
Barrier *barrier,
const char *directory,
+ int mount_fd,
DissectedImage *dissected_image,
int fd_outer_socket,
int fd_inner_socket,
if (r < 0)
return r;
- if (dissected_image) {
+ /* Put the root dir into the target directory now. One of three mechanisms is provided: either we
+ * have a single mount fd (typically unprivileged --directory= mode) or we have a fully dissected
+ * image (--image= mode), or we have a regular path. */
+ if (mount_fd >= 0) {
+ assert(arg_directory);
+ assert(!arg_image);
+
+ if (move_mount(mount_fd, "", AT_FDCWD, directory, MOVE_MOUNT_F_EMPTY_PATH) < 0)
+ return log_error_errno(errno, "Failed to attach root directory: %m");
+
+ mount_fd = safe_close(mount_fd);
+ log_debug("Successfully attached root directory to '%s'.", directory);
+
+ } else if (dissected_image) {
+ assert(!arg_directory);
+ assert(arg_image);
+
/* If we are operating on a disk image, then mount its root directory now, but leave out the
* rest. We can read the UID shift from it if we need to. Further down we'll mount the rest,
* but then with the uid shift known. That way we can mount VFAT file systems shifted to the
(arg_start_mode == START_BOOT ? DISSECT_IMAGE_VALIDATE_OS : 0));
if (r < 0)
return r;
+ } else {
+ assert(arg_directory);
+ assert(!arg_image);
+
+ r = mount_nofollow_verbose(LOG_ERR, arg_directory, directory, /* fstype= */ NULL, MS_BIND|MS_REC, /* options= */ NULL);
+ if (r < 0)
+ return r;
}
r = determine_uid_shift(directory);
"Selected user namespace base " UID_FMT " and range " UID_FMT ".", arg_uid_shift, arg_uid_range);
}
- if (path_equal(directory, "/")) {
- /* If the directory we shall boot is the host, let's operate on a bind mount at a different
- * place, so that we can make changes to its mount structure (for example, to implement
- * --volatile=) without this interfering with our ability to access files such as
- * /etc/localtime to copy into the container. Note that we use a fixed place for this
- * (instead of a temporary directory, since we are living in our own mount namespace here
- * already, and thus don't need to be afraid of colliding with anyone else's mounts). */
- (void) mkdir_p("/run/systemd/nspawn-root", 0755);
-
- r = mount_nofollow_verbose(LOG_ERR, "/", "/run/systemd/nspawn-root", NULL, MS_BIND|MS_REC, NULL);
- if (r < 0)
- return r;
-
- directory = "/run/systemd/nspawn-root";
- }
-
- /* Make sure we always have a mount that we can move to root later on. */
- r = make_mount_point(directory);
- if (r < 0)
- return r;
-
/* So the whole tree is now MS_SLAVE, i.e. we'll still receive mount/umount events from the host
* mount namespace. For the directory we are going to run our container let's turn this off, so that
* we'll live in our own little world from now on, and propagation from the host may only happen via
}
static int run_container(
+ const char *directory,
+ int mount_fd,
DissectedImage *dissected_image,
int userns_fd,
FDSet *fds,
(void) reset_signal_mask();
r = outer_child(&barrier,
- arg_directory,
+ directory,
+ mount_fd,
dissected_image,
fd_outer_socket_pair[1],
fd_inner_socket_pair[1],
}
static int run(int argc, char *argv[]) {
- bool remove_directory = false, remove_image = false, veth_created = false, remove_tmprootdir = false;
- _cleanup_close_ int master = -EBADF, userns_fd = -EBADF;
+ bool remove_directory = false, remove_image = false, veth_created = false;
+ _cleanup_close_ int master = -EBADF, userns_fd = -EBADF, mount_fd = -EBADF;
_cleanup_fdset_free_ FDSet *fds = NULL;
int r, n_fd_passed, ret = EXIT_SUCCESS;
char veth_name[IFNAMSIZ] = "";
struct ExposeArgs expose_args = {};
_cleanup_(release_lock_file) LockFile tree_global_lock = LOCK_FILE_INIT, tree_local_lock = LOCK_FILE_INIT;
- char tmprootdir[] = "/tmp/nspawn-root-XXXXXX";
+ _cleanup_(rmdir_and_freep) char *rootdir = NULL;
_cleanup_(loop_device_unrefp) LoopDevice *loop = NULL;
_cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
_cleanup_(fw_ctx_freep) FirewallContext *fw_ctx = NULL;
if (arg_console_mode == CONSOLE_PIPE) /* if we pass STDERR on to the container, don't add our own logs into it too */
arg_quiet = true;
- if (arg_directory) {
- assert(!arg_image);
+ if (!arg_privileged) {
+ /* if we are unprivileged, let's allocate a 64K userns first */
- if (!arg_privileged) {
- r = log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Invoking container from plain directory tree is currently not supported if called without privileges.");
+ _cleanup_free_ char *userns_name = NULL;
+ if (asprintf(&userns_name, "nspawn-" PID_FMT "-%s", getpid_cached(), arg_machine) < 0) {
+ r = log_oom();
goto finish;
}
+ userns_fd = nsresource_allocate_userns(userns_name, UINT64_C(0x10000));
+ if (userns_fd < 0) {
+ r = log_error_errno(userns_fd, "Failed to allocate user namespace with 64K users: %m");
+ goto finish;
+ }
+ }
+
+ if (arg_directory) {
+ assert(!arg_image);
+
/* Safety precaution: let's not allow running images from the live host OS image, as long as
* /var from the host will propagate into container dynamically (because bad things happen if
* two systems write to the same /var). Let's allow it for the special cases where /var is
}
}
+ if (!arg_privileged) {
+ r = mountfsd_mount_directory(
+ arg_directory,
+ userns_fd,
+ determine_dissect_image_flags(),
+ &mount_fd);
+ if (r < 0)
+ goto finish;
+ }
} else {
DissectImageFlags dissect_image_flags =
determine_dissect_image_flags();
dissect_image_flags |= DISSECT_IMAGE_NO_PARTITION_TABLE;
}
- if (!mkdtemp(tmprootdir)) {
- r = log_error_errno(errno, "Failed to create temporary directory: %m");
- goto finish;
- }
-
- remove_tmprootdir = true;
-
- arg_directory = strdup(tmprootdir);
- if (!arg_directory) {
- r = log_oom();
- goto finish;
- }
-
if (arg_privileged) {
r = loop_device_make_by_path(
arg_image,
if (r < 0)
goto finish;
} else {
- _cleanup_free_ char *userns_name = strjoin("nspawn-", arg_machine);
- if (!userns_name) {
- r = log_oom();
- goto finish;
- }
-
- /* if we are unprivileged, let's allocate a 64K userns first */
- userns_fd = nsresource_allocate_userns(userns_name, UINT64_C(0x10000));
- if (userns_fd < 0) {
- r = log_error_errno(userns_fd, "Failed to allocate user namespace with 64K users: %m");
- goto finish;
- }
-
r = mountfsd_mount_image(
arg_image,
userns_fd,
arg_architecture = dissected_image_architecture(dissected_image);
}
- r = custom_mount_prepare_all(arg_directory, arg_custom_mounts, arg_n_custom_mounts);
+ /* Create a temporary place to mount stuff. */
+ r = mkdtemp_malloc("/tmp/nspawn-root-XXXXXX", &rootdir);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create temporary directory: %m");
+ goto finish;
+ }
+
+ r = custom_mount_prepare_all(rootdir, arg_custom_mounts, arg_n_custom_mounts);
if (r < 0)
goto finish;
}
for (;;) {
r = run_container(
+ rootdir,
+ mount_fd,
dissected_image,
userns_fd,
fds,
log_warning_errno(errno, "Can't remove image file '%s', ignoring: %m", arg_image);
}
- if (remove_tmprootdir) {
- if (rmdir(tmprootdir) < 0)
- log_debug_errno(errno, "Can't remove temporary root directory '%s', ignoring: %m", tmprootdir);
- }
-
if (arg_machine && arg_privileged) {
const char *p;