if (rootfs->bdev_type && !strequal(rootfs->bdev_type, "dir"))
return syserror_set(-EINVAL, "Idmapped rootfs currently only supports the \"dir\" storage driver");
-
- fd_userns = open_at(-EBADF, rootfs->mnt_opts.userns_path,
- PROTECT_OPEN_WITH_TRAILING_SYMLINKS, 0, 0);
- if (fd_userns < 0)
- return syserror("Failed to open user namespace");
}
if (rootfs->path) {
return 0;
}
+int lxc_rootfs_prepare_parent(struct lxc_handler *handler)
+{
+ __do_close int dfd_idmapped = -EBADF, fd_userns = -EBADF;
+ struct lxc_rootfs *rootfs = &handler->conf->rootfs;
+ struct lxc_storage *storage = rootfs->storage;
+ int ret;
+ const char *path_source;
+
+ if (lxc_list_empty(&handler->conf->id_map))
+ return 0;
+
+ if (is_empty_string(rootfs->mnt_opts.userns_path))
+ return 0;
+
+ if (handler->conf->rootfs_setup)
+ return 0;
+
+ if (rootfs_is_blockdev(handler->conf))
+ return syserror_set(-EOPNOTSUPP, "Idmapped mounts on block-backed storage not yet supported");
+
+ if (!can_use_bind_mounts())
+ return syserror_set(-EOPNOTSUPP, "Kernel does not support the new mount api");
+
+ if (rootfs->mnt_opts.userns_self)
+ fd_userns = dup_cloexec(handler->nsfd[LXC_NS_USER]);
+ else
+ fd_userns = open_at(-EBADF, rootfs->mnt_opts.userns_path,
+ PROTECT_OPEN_WITH_TRAILING_SYMLINKS, 0, 0);
+ if (fd_userns < 0)
+ return syserror("Failed to open user namespace");
+
+ path_source = lxc_storage_get_path(storage->src, storage->type);
+
+ dfd_idmapped = create_detached_idmapped_mount(path_source, fd_userns, true);
+ if (dfd_idmapped < 0)
+ return syserror("Failed to create detached idmapped mount");
+
+ ret = lxc_abstract_unix_send_fds(handler->data_sock[0], &dfd_idmapped, 1, NULL, 0);
+ if (ret < 0)
+ return syserror("Failed to send detached idmapped mount fd");
+
+ TRACE("Created detached idmapped mount %d", dfd_idmapped);
+ return 0;
+}
+
static int add_shmount_to_list(struct lxc_conf *conf)
{
char new_mount[PATH_MAX];
if (is_empty_string(opts->userns_path))
return syserror_set(-EINVAL, "Missing idmap path for \"idmap=<path>\" LXC specific mount option");
- fd_userns = open(opts->userns_path, O_RDONLY | O_NOCTTY | O_CLOEXEC);
- if (fd_userns < 0)
- return syserror("Failed to open user namespace");
+ if (strequal(opts->userns_path, "container")) {
+ opts->userns_self = 1;
+ } else {
+ fd_userns = open(opts->userns_path, O_RDONLY | O_NOCTTY | O_CLOEXEC);
+ if (fd_userns < 0)
+ return syserror("Failed to open user namespace");
+ }
TRACE("Parse LXC specific mount option %d->\"idmap=%s\"", fd_userns, opts->userns_path);
break;
new->rootfs.dfd_dev = -EBADF;
new->rootfs.dfd_host = -EBADF;
new->rootfs.fd_path_pin = -EBADF;
+ new->rootfs.dfd_idmapped = -EBADF;
new->rootfs.mnt_opts.userns_fd = -EBADF;
new->logfd = -1;
lxc_list_init(&new->cgroup);
return ret;
}
+static int lxc_rootfs_prepare_child(struct lxc_handler *handler)
+{
+ struct lxc_rootfs *rootfs = &handler->conf->rootfs;
+ int dfd_idmapped = -EBADF;
+ int ret;
+
+ if (lxc_list_empty(&handler->conf->id_map))
+ return 0;
+
+ if (is_empty_string(rootfs->mnt_opts.userns_path))
+ return 0;
+
+ if (handler->conf->rootfs_setup)
+ return 0;
+
+ ret = lxc_abstract_unix_recv_one_fd(handler->data_sock[1], &dfd_idmapped, NULL, 0);
+ if (ret < 0)
+ return syserror("Failed to receive idmapped mount fd");
+
+ rootfs->dfd_idmapped = dfd_idmapped;
+ TRACE("Received detached idmapped mount %d", rootfs->dfd_idmapped);
+ return 0;
+}
+
int lxc_setup(struct lxc_handler *handler)
{
int ret;
const char *lxcpath = handler->lxcpath, *name = handler->name;
struct lxc_conf *lxc_conf = handler->conf;
+ ret = lxc_rootfs_prepare_child(handler);
+ if (ret < 0)
+ return syserror("Failed to prepare rootfs");
+
ret = lxc_setup_rootfs_prepare_root(lxc_conf, name, lxcpath);
if (ret < 0)
return log_error(-1, "Failed to setup rootfs");
int create_file : 1;
int optional : 1;
int relative : 1;
+ int userns_self : 1;
char userns_path[PATH_MAX];
int userns_fd;
unsigned long mnt_flags;
char *path;
int fd_path_pin;
+ int dfd_idmapped;
int dfd_mnt;
char *mount;
__hidden extern int lxc_rootfs_prepare(struct lxc_conf *conf, bool userns);
__hidden extern void lxc_storage_put(struct lxc_conf *conf);
__hidden extern int lxc_rootfs_init(struct lxc_conf *conf, bool userns);
+__hidden extern int lxc_rootfs_prepare_parent(struct lxc_handler *handler);
__hidden extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid);
__hidden extern int lxc_create_tty(const char *name, struct lxc_conf *conf);
__hidden extern void lxc_delete_tty(struct lxc_tty_info *ttys);
return !is_empty_string(rootfs->path) ? rootfs->mount : s;
}
-static inline bool idmapped_rootfs_mnt(const struct lxc_rootfs *rootfs)
-{
- return rootfs->mnt_opts.userns_fd >= 0;
-}
-
static inline void put_lxc_mount_options(struct lxc_mount_options *mnt_opts)
{
mnt_opts->create_dir = 0;
mnt_opts->create_file = 0;
mnt_opts->optional = 0;
mnt_opts->relative = 0;
+ mnt_opts->userns_self = 0;
mnt_opts->userns_path[0] = '\0';
mnt_opts->mnt_flags = 0;
mnt_opts->prop_flags = 0;
close_prot_errno_disarm(rootfs->mnt_opts.userns_fd);
if (unpin)
close_prot_errno_disarm(rootfs->fd_path_pin);
+ close_prot_errno_disarm(rootfs->dfd_idmapped);
put_lxc_mount_options(&rootfs->mnt_opts);
storage_put(rootfs->storage);
rootfs->storage = NULL;
struct lxc_mount_attr attr = {
.attr_set = MOUNT_ATTR_IDMAP,
.userns_fd = userns_fd,
+ .propagation = MS_SLAVE,
};
int ret;
}
}
+ ret = lxc_rootfs_prepare_parent(handler);
+ if (ret) {
+ ERROR("Failed to prepare rootfs");
+ goto out_delete_net;
+ }
+
if (!lxc_sync_wake_child(handler, START_SYNC_STARTUP))
goto out_delete_net;
if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
/*
- * This handles two cases: mounting real block devices and
- * creating idmapped mounts. The block device case should be
- * obivous, i.e. no real filesystem can currently be mounted
- * from inside a user namespace.
- *
- * Idmapped mounts can currently only be created if the caller
- * is privileged wrt to the user namespace in which the
- * underlying block device has been mounted in. This basically
- * (with few exceptions) means we need to be CAP_SYS_ADMIN in
- * the initial user namespace since almost no interesting
- * filesystems can be mounted inside of user namespaces. This
- * is way we need to do the rootfs setup here. In the future
- * this may change.
+ * Most filesystems can't be mounted inside a userns so handle them here.
*/
- if (idmapped_rootfs_mnt(&conf->rootfs) || rootfs_is_blockdev(conf)) {
+ if (rootfs_is_blockdev(conf)) {
ret = unshare(CLONE_NEWNS);
if (ret < 0) {
ERROR("Failed to unshare CLONE_NEWNS");
int dir_mount(struct lxc_storage *bdev)
{
- struct lxc_mount_options *mnt_opts = &bdev->rootfs->mnt_opts;
+ struct lxc_rootfs *rootfs = bdev->rootfs;
+ struct lxc_mount_options *mnt_opts = &rootfs->mnt_opts;
__do_free char *mntdata = NULL;
unsigned long mflags = 0;
int ret;
src = lxc_storage_get_path(bdev->src, bdev->type);
+ if (rootfs->dfd_idmapped >= 0 && !can_use_bind_mounts())
+ return syserror_set(-EOPNOTSUPP, "Idmapped mount requested but kernel doesn't support new mount API");
+
if (can_use_bind_mounts()) {
__do_close int fd_source = -EBADF, fd_target = -EBADF;
- fd_source = open_at(-EBADF, src, PROTECT_OPATH_DIRECTORY, 0, 0);
- if (fd_source < 0)
- return syserror("Failed to open \"%s\"", src);
-
fd_target = open_at(-EBADF, bdev->dest, PROTECT_OPATH_DIRECTORY, 0, 0);
if (fd_target < 0)
return syserror("Failed to open \"%s\"", bdev->dest);
- ret = fd_mount_idmapped(fd_source, "", PROTECT_OPATH_DIRECTORY,
- PROTECT_LOOKUP_BENEATH, fd_target, "",
- PROTECT_OPATH_DIRECTORY,
- PROTECT_LOOKUP_BENEATH, 0,
- mnt_opts->userns_fd, true);
+ if (rootfs->dfd_idmapped >= 0) {
+ ret = move_detached_mount(rootfs->dfd_idmapped, fd_target, "",
+ PROTECT_OPATH_DIRECTORY,
+ PROTECT_LOOKUP_BENEATH);
+ } else {
+ fd_source = open_at(-EBADF, src, PROTECT_OPATH_DIRECTORY, 0, 0);
+ if (fd_source < 0)
+ return syserror("Failed to open \"%s\"", src);
+
+ ret = fd_bind_mount(fd_source, "",
+ PROTECT_OPATH_DIRECTORY,
+ PROTECT_LOOKUP_BENEATH, fd_target,
+ "", PROTECT_OPATH_DIRECTORY,
+ PROTECT_LOOKUP_BENEATH, 0, true);
+ }
if (ret < 0)
return syserror("Failed to mount \"%s\" onto \"%s\"", src, bdev->dest);
} else {