From: Christian Brauner Date: Sun, 25 Apr 2021 10:23:56 +0000 (+0200) Subject: conf: improve idmapped mounts support X-Git-Tag: lxc-5.0.0~190^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F3811%2Fhead;p=thirdparty%2Flxc.git conf: improve idmapped mounts support Setting up a detached idmapped mount is a privileged operation, mounting it doesn't have to be. Signed-off-by: Christian Brauner --- diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 195fa4940..10b20a04b 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -545,11 +545,6 @@ int lxc_rootfs_init(struct lxc_conf *conf, bool userns) if (rootfs->bdev_type && !strequal(rootfs->bdev_type, "dir")) return syserror_set(-EINVAL, "Idmapped rootfs currently only supports the \"dir\" storage driver"); - - fd_userns = open_at(-EBADF, rootfs->mnt_opts.userns_path, - PROTECT_OPEN_WITH_TRAILING_SYMLINKS, 0, 0); - if (fd_userns < 0) - return syserror("Failed to open user namespace"); } if (rootfs->path) { @@ -613,6 +608,51 @@ out: return 0; } +int lxc_rootfs_prepare_parent(struct lxc_handler *handler) +{ + __do_close int dfd_idmapped = -EBADF, fd_userns = -EBADF; + struct lxc_rootfs *rootfs = &handler->conf->rootfs; + struct lxc_storage *storage = rootfs->storage; + int ret; + const char *path_source; + + if (lxc_list_empty(&handler->conf->id_map)) + return 0; + + if (is_empty_string(rootfs->mnt_opts.userns_path)) + return 0; + + if (handler->conf->rootfs_setup) + return 0; + + if (rootfs_is_blockdev(handler->conf)) + return syserror_set(-EOPNOTSUPP, "Idmapped mounts on block-backed storage not yet supported"); + + if (!can_use_bind_mounts()) + return syserror_set(-EOPNOTSUPP, "Kernel does not support the new mount api"); + + if (rootfs->mnt_opts.userns_self) + fd_userns = dup_cloexec(handler->nsfd[LXC_NS_USER]); + else + fd_userns = open_at(-EBADF, rootfs->mnt_opts.userns_path, + PROTECT_OPEN_WITH_TRAILING_SYMLINKS, 0, 0); + if (fd_userns < 0) + return syserror("Failed to open user namespace"); + + path_source = lxc_storage_get_path(storage->src, storage->type); + + dfd_idmapped = create_detached_idmapped_mount(path_source, fd_userns, true); + if (dfd_idmapped < 0) + return syserror("Failed to create detached idmapped mount"); + + ret = lxc_abstract_unix_send_fds(handler->data_sock[0], &dfd_idmapped, 1, NULL, 0); + if (ret < 0) + return syserror("Failed to send detached idmapped mount fd"); + + TRACE("Created detached idmapped mount %d", dfd_idmapped); + return 0; +} + static int add_shmount_to_list(struct lxc_conf *conf) { char new_mount[PATH_MAX]; @@ -2197,9 +2237,13 @@ int parse_lxc_mntopts(struct lxc_mount_options *opts, char *mnt_opts) if (is_empty_string(opts->userns_path)) return syserror_set(-EINVAL, "Missing idmap path for \"idmap=\" LXC specific mount option"); - fd_userns = open(opts->userns_path, O_RDONLY | O_NOCTTY | O_CLOEXEC); - if (fd_userns < 0) - return syserror("Failed to open user namespace"); + if (strequal(opts->userns_path, "container")) { + opts->userns_self = 1; + } else { + fd_userns = open(opts->userns_path, O_RDONLY | O_NOCTTY | O_CLOEXEC); + if (fd_userns < 0) + return syserror("Failed to open user namespace"); + } TRACE("Parse LXC specific mount option %d->\"idmap=%s\"", fd_userns, opts->userns_path); break; @@ -2790,6 +2834,7 @@ struct lxc_conf *lxc_conf_init(void) new->rootfs.dfd_dev = -EBADF; new->rootfs.dfd_host = -EBADF; new->rootfs.fd_path_pin = -EBADF; + new->rootfs.dfd_idmapped = -EBADF; new->rootfs.mnt_opts.userns_fd = -EBADF; new->logfd = -1; lxc_list_init(&new->cgroup); @@ -3523,12 +3568,40 @@ static int lxc_setup_keyring(struct lsm_ops *lsm_ops, const struct lxc_conf *con return ret; } +static int lxc_rootfs_prepare_child(struct lxc_handler *handler) +{ + struct lxc_rootfs *rootfs = &handler->conf->rootfs; + int dfd_idmapped = -EBADF; + int ret; + + if (lxc_list_empty(&handler->conf->id_map)) + return 0; + + if (is_empty_string(rootfs->mnt_opts.userns_path)) + return 0; + + if (handler->conf->rootfs_setup) + return 0; + + ret = lxc_abstract_unix_recv_one_fd(handler->data_sock[1], &dfd_idmapped, NULL, 0); + if (ret < 0) + return syserror("Failed to receive idmapped mount fd"); + + rootfs->dfd_idmapped = dfd_idmapped; + TRACE("Received detached idmapped mount %d", rootfs->dfd_idmapped); + return 0; +} + int lxc_setup(struct lxc_handler *handler) { int ret; const char *lxcpath = handler->lxcpath, *name = handler->name; struct lxc_conf *lxc_conf = handler->conf; + ret = lxc_rootfs_prepare_child(handler); + if (ret < 0) + return syserror("Failed to prepare rootfs"); + ret = lxc_setup_rootfs_prepare_root(lxc_conf, name, lxcpath); if (ret < 0) return log_error(-1, "Failed to setup rootfs"); diff --git a/src/lxc/conf.h b/src/lxc/conf.h index fe70f5bf4..969f75b9b 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -198,6 +198,7 @@ struct lxc_mount_options { int create_file : 1; int optional : 1; int relative : 1; + int userns_self : 1; char userns_path[PATH_MAX]; int userns_fd; unsigned long mnt_flags; @@ -221,6 +222,7 @@ struct lxc_rootfs { char *path; int fd_path_pin; + int dfd_idmapped; int dfd_mnt; char *mount; @@ -506,6 +508,7 @@ __hidden extern int lxc_storage_prepare(struct lxc_conf *conf); __hidden extern int lxc_rootfs_prepare(struct lxc_conf *conf, bool userns); __hidden extern void lxc_storage_put(struct lxc_conf *conf); __hidden extern int lxc_rootfs_init(struct lxc_conf *conf, bool userns); +__hidden extern int lxc_rootfs_prepare_parent(struct lxc_handler *handler); __hidden extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid); __hidden extern int lxc_create_tty(const char *name, struct lxc_conf *conf); __hidden extern void lxc_delete_tty(struct lxc_tty_info *ttys); @@ -581,17 +584,13 @@ static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs) return !is_empty_string(rootfs->path) ? rootfs->mount : s; } -static inline bool idmapped_rootfs_mnt(const struct lxc_rootfs *rootfs) -{ - return rootfs->mnt_opts.userns_fd >= 0; -} - static inline void put_lxc_mount_options(struct lxc_mount_options *mnt_opts) { mnt_opts->create_dir = 0; mnt_opts->create_file = 0; mnt_opts->optional = 0; mnt_opts->relative = 0; + mnt_opts->userns_self = 0; mnt_opts->userns_path[0] = '\0'; mnt_opts->mnt_flags = 0; mnt_opts->prop_flags = 0; @@ -609,6 +608,7 @@ static inline void put_lxc_rootfs(struct lxc_rootfs *rootfs, bool unpin) close_prot_errno_disarm(rootfs->mnt_opts.userns_fd); if (unpin) close_prot_errno_disarm(rootfs->fd_path_pin); + close_prot_errno_disarm(rootfs->dfd_idmapped); put_lxc_mount_options(&rootfs->mnt_opts); storage_put(rootfs->storage); rootfs->storage = NULL; diff --git a/src/lxc/mount_utils.c b/src/lxc/mount_utils.c index 95da60f07..f6032af9e 100644 --- a/src/lxc/mount_utils.c +++ b/src/lxc/mount_utils.c @@ -243,6 +243,7 @@ int create_detached_idmapped_mount(const char *path, int userns_fd, bool recursi struct lxc_mount_attr attr = { .attr_set = MOUNT_ATTR_IDMAP, .userns_fd = userns_fd, + .propagation = MS_SLAVE, }; int ret; diff --git a/src/lxc/start.c b/src/lxc/start.c index a18ac1bd2..a6e4e57d1 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -1786,6 +1786,12 @@ static int lxc_spawn(struct lxc_handler *handler) } } + ret = lxc_rootfs_prepare_parent(handler); + if (ret) { + ERROR("Failed to prepare rootfs"); + goto out_delete_net; + } + if (!lxc_sync_wake_child(handler, START_SYNC_STARTUP)) goto out_delete_net; @@ -2043,21 +2049,9 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) { /* - * This handles two cases: mounting real block devices and - * creating idmapped mounts. The block device case should be - * obivous, i.e. no real filesystem can currently be mounted - * from inside a user namespace. - * - * Idmapped mounts can currently only be created if the caller - * is privileged wrt to the user namespace in which the - * underlying block device has been mounted in. This basically - * (with few exceptions) means we need to be CAP_SYS_ADMIN in - * the initial user namespace since almost no interesting - * filesystems can be mounted inside of user namespaces. This - * is way we need to do the rootfs setup here. In the future - * this may change. + * Most filesystems can't be mounted inside a userns so handle them here. */ - if (idmapped_rootfs_mnt(&conf->rootfs) || rootfs_is_blockdev(conf)) { + if (rootfs_is_blockdev(conf)) { ret = unshare(CLONE_NEWNS); if (ret < 0) { ERROR("Failed to unshare CLONE_NEWNS"); diff --git a/src/lxc/storage/dir.c b/src/lxc/storage/dir.c index eacf928e0..d16a75e40 100644 --- a/src/lxc/storage/dir.c +++ b/src/lxc/storage/dir.c @@ -127,7 +127,8 @@ bool dir_detect(const char *path) int dir_mount(struct lxc_storage *bdev) { - struct lxc_mount_options *mnt_opts = &bdev->rootfs->mnt_opts; + struct lxc_rootfs *rootfs = bdev->rootfs; + struct lxc_mount_options *mnt_opts = &rootfs->mnt_opts; __do_free char *mntdata = NULL; unsigned long mflags = 0; int ret; @@ -141,22 +142,31 @@ int dir_mount(struct lxc_storage *bdev) src = lxc_storage_get_path(bdev->src, bdev->type); + if (rootfs->dfd_idmapped >= 0 && !can_use_bind_mounts()) + return syserror_set(-EOPNOTSUPP, "Idmapped mount requested but kernel doesn't support new mount API"); + if (can_use_bind_mounts()) { __do_close int fd_source = -EBADF, fd_target = -EBADF; - fd_source = open_at(-EBADF, src, PROTECT_OPATH_DIRECTORY, 0, 0); - if (fd_source < 0) - return syserror("Failed to open \"%s\"", src); - fd_target = open_at(-EBADF, bdev->dest, PROTECT_OPATH_DIRECTORY, 0, 0); if (fd_target < 0) return syserror("Failed to open \"%s\"", bdev->dest); - ret = fd_mount_idmapped(fd_source, "", PROTECT_OPATH_DIRECTORY, - PROTECT_LOOKUP_BENEATH, fd_target, "", - PROTECT_OPATH_DIRECTORY, - PROTECT_LOOKUP_BENEATH, 0, - mnt_opts->userns_fd, true); + if (rootfs->dfd_idmapped >= 0) { + ret = move_detached_mount(rootfs->dfd_idmapped, fd_target, "", + PROTECT_OPATH_DIRECTORY, + PROTECT_LOOKUP_BENEATH); + } else { + fd_source = open_at(-EBADF, src, PROTECT_OPATH_DIRECTORY, 0, 0); + if (fd_source < 0) + return syserror("Failed to open \"%s\"", src); + + ret = fd_bind_mount(fd_source, "", + PROTECT_OPATH_DIRECTORY, + PROTECT_LOOKUP_BENEATH, fd_target, + "", PROTECT_OPATH_DIRECTORY, + PROTECT_LOOKUP_BENEATH, 0, true); + } if (ret < 0) return syserror("Failed to mount \"%s\" onto \"%s\"", src, bdev->dest); } else {