]> git.ipfire.org Git - thirdparty/lxc.git/commitdiff
conf: improve idmapped mounts support 3811/head
authorChristian Brauner <christian.brauner@ubuntu.com>
Sun, 25 Apr 2021 10:23:56 +0000 (12:23 +0200)
committerChristian Brauner <christian.brauner@ubuntu.com>
Sun, 25 Apr 2021 11:55:50 +0000 (13:55 +0200)
Setting up a detached idmapped mount is a privileged operation, mounting
it doesn't have to be.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
src/lxc/conf.c
src/lxc/conf.h
src/lxc/mount_utils.c
src/lxc/start.c
src/lxc/storage/dir.c

index 195fa4940c19ae37ba65abc1ec744588c15a73c5..10b20a04b2457fadee9c1165fa1d75718293403e 100644 (file)
@@ -545,11 +545,6 @@ int lxc_rootfs_init(struct lxc_conf *conf, bool userns)
 
                if (rootfs->bdev_type && !strequal(rootfs->bdev_type, "dir"))
                        return syserror_set(-EINVAL, "Idmapped rootfs currently only supports the \"dir\" storage driver");
-
-               fd_userns = open_at(-EBADF, rootfs->mnt_opts.userns_path,
-                                   PROTECT_OPEN_WITH_TRAILING_SYMLINKS, 0, 0);
-               if (fd_userns < 0)
-                       return syserror("Failed to open user namespace");
        }
 
        if (rootfs->path) {
@@ -613,6 +608,51 @@ out:
        return 0;
 }
 
+int lxc_rootfs_prepare_parent(struct lxc_handler *handler)
+{
+       __do_close int dfd_idmapped = -EBADF, fd_userns = -EBADF;
+       struct lxc_rootfs *rootfs = &handler->conf->rootfs;
+       struct lxc_storage *storage = rootfs->storage;
+       int ret;
+       const char *path_source;
+
+       if (lxc_list_empty(&handler->conf->id_map))
+               return 0;
+
+       if (is_empty_string(rootfs->mnt_opts.userns_path))
+               return 0;
+
+       if (handler->conf->rootfs_setup)
+               return 0;
+
+       if (rootfs_is_blockdev(handler->conf))
+               return syserror_set(-EOPNOTSUPP, "Idmapped mounts on block-backed storage not yet supported");
+
+       if (!can_use_bind_mounts())
+               return syserror_set(-EOPNOTSUPP, "Kernel does not support the new mount api");
+
+       if (rootfs->mnt_opts.userns_self)
+               fd_userns = dup_cloexec(handler->nsfd[LXC_NS_USER]);
+       else
+               fd_userns = open_at(-EBADF, rootfs->mnt_opts.userns_path,
+                                   PROTECT_OPEN_WITH_TRAILING_SYMLINKS, 0, 0);
+       if (fd_userns < 0)
+               return syserror("Failed to open user namespace");
+
+       path_source = lxc_storage_get_path(storage->src, storage->type);
+
+       dfd_idmapped = create_detached_idmapped_mount(path_source, fd_userns, true);
+       if (dfd_idmapped < 0)
+               return syserror("Failed to create detached idmapped mount");
+
+       ret = lxc_abstract_unix_send_fds(handler->data_sock[0], &dfd_idmapped, 1, NULL, 0);
+       if (ret < 0)
+               return syserror("Failed to send detached idmapped mount fd");
+
+       TRACE("Created detached idmapped mount %d", dfd_idmapped);
+       return 0;
+}
+
 static int add_shmount_to_list(struct lxc_conf *conf)
 {
        char new_mount[PATH_MAX];
@@ -2197,9 +2237,13 @@ int parse_lxc_mntopts(struct lxc_mount_options *opts, char *mnt_opts)
                        if (is_empty_string(opts->userns_path))
                                return syserror_set(-EINVAL, "Missing idmap path for \"idmap=<path>\" LXC specific mount option");
 
-                       fd_userns = open(opts->userns_path, O_RDONLY | O_NOCTTY | O_CLOEXEC);
-                       if (fd_userns < 0)
-                               return syserror("Failed to open user namespace");
+                       if (strequal(opts->userns_path, "container")) {
+                               opts->userns_self = 1;
+                       } else {
+                               fd_userns = open(opts->userns_path, O_RDONLY | O_NOCTTY | O_CLOEXEC);
+                               if (fd_userns < 0)
+                                       return syserror("Failed to open user namespace");
+                       }
 
                        TRACE("Parse LXC specific mount option %d->\"idmap=%s\"", fd_userns, opts->userns_path);
                        break;
@@ -2790,6 +2834,7 @@ struct lxc_conf *lxc_conf_init(void)
        new->rootfs.dfd_dev = -EBADF;
        new->rootfs.dfd_host = -EBADF;
        new->rootfs.fd_path_pin = -EBADF;
+       new->rootfs.dfd_idmapped = -EBADF;
        new->rootfs.mnt_opts.userns_fd = -EBADF;
        new->logfd = -1;
        lxc_list_init(&new->cgroup);
@@ -3523,12 +3568,40 @@ static int lxc_setup_keyring(struct lsm_ops *lsm_ops, const struct lxc_conf *con
        return ret;
 }
 
+static int lxc_rootfs_prepare_child(struct lxc_handler *handler)
+{
+       struct lxc_rootfs *rootfs = &handler->conf->rootfs;
+       int dfd_idmapped = -EBADF;
+       int ret;
+
+       if (lxc_list_empty(&handler->conf->id_map))
+               return 0;
+
+       if (is_empty_string(rootfs->mnt_opts.userns_path))
+               return 0;
+
+       if (handler->conf->rootfs_setup)
+               return 0;
+
+       ret = lxc_abstract_unix_recv_one_fd(handler->data_sock[1], &dfd_idmapped, NULL, 0);
+       if (ret < 0)
+               return syserror("Failed to receive idmapped mount fd");
+
+       rootfs->dfd_idmapped = dfd_idmapped;
+       TRACE("Received detached idmapped mount %d", rootfs->dfd_idmapped);
+       return 0;
+}
+
 int lxc_setup(struct lxc_handler *handler)
 {
        int ret;
        const char *lxcpath = handler->lxcpath, *name = handler->name;
        struct lxc_conf *lxc_conf = handler->conf;
 
+       ret = lxc_rootfs_prepare_child(handler);
+       if (ret < 0)
+               return syserror("Failed to prepare rootfs");
+
        ret = lxc_setup_rootfs_prepare_root(lxc_conf, name, lxcpath);
        if (ret < 0)
                return log_error(-1, "Failed to setup rootfs");
index fe70f5bf453836eb107512196d4ae96cd40ff8ee..969f75b9b4c20f5e5dead3c9298f3ffdcb988f42 100644 (file)
@@ -198,6 +198,7 @@ struct lxc_mount_options {
        int create_file : 1;
        int optional : 1;
        int relative : 1;
+       int userns_self : 1;
        char userns_path[PATH_MAX];
        int userns_fd;
        unsigned long mnt_flags;
@@ -221,6 +222,7 @@ struct lxc_rootfs {
 
        char *path;
        int fd_path_pin;
+       int dfd_idmapped;
 
        int dfd_mnt;
        char *mount;
@@ -506,6 +508,7 @@ __hidden extern int lxc_storage_prepare(struct lxc_conf *conf);
 __hidden extern int lxc_rootfs_prepare(struct lxc_conf *conf, bool userns);
 __hidden extern void lxc_storage_put(struct lxc_conf *conf);
 __hidden extern int lxc_rootfs_init(struct lxc_conf *conf, bool userns);
+__hidden extern int lxc_rootfs_prepare_parent(struct lxc_handler *handler);
 __hidden extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid);
 __hidden extern int lxc_create_tty(const char *name, struct lxc_conf *conf);
 __hidden extern void lxc_delete_tty(struct lxc_tty_info *ttys);
@@ -581,17 +584,13 @@ static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs)
        return !is_empty_string(rootfs->path) ? rootfs->mount : s;
 }
 
-static inline bool idmapped_rootfs_mnt(const struct lxc_rootfs *rootfs)
-{
-       return rootfs->mnt_opts.userns_fd >= 0;
-}
-
 static inline void put_lxc_mount_options(struct lxc_mount_options *mnt_opts)
 {
        mnt_opts->create_dir = 0;
        mnt_opts->create_file = 0;
        mnt_opts->optional = 0;
        mnt_opts->relative = 0;
+       mnt_opts->userns_self = 0;
        mnt_opts->userns_path[0] = '\0';
        mnt_opts->mnt_flags = 0;
        mnt_opts->prop_flags = 0;
@@ -609,6 +608,7 @@ static inline void put_lxc_rootfs(struct lxc_rootfs *rootfs, bool unpin)
                close_prot_errno_disarm(rootfs->mnt_opts.userns_fd);
                if (unpin)
                        close_prot_errno_disarm(rootfs->fd_path_pin);
+               close_prot_errno_disarm(rootfs->dfd_idmapped);
                put_lxc_mount_options(&rootfs->mnt_opts);
                storage_put(rootfs->storage);
                rootfs->storage = NULL;
index 95da60f073e04c513a68ea54d024e9a04a1076d4..f6032af9e5b05d8c6e636bba3a054bfaf79ad302 100644 (file)
@@ -243,6 +243,7 @@ int create_detached_idmapped_mount(const char *path, int userns_fd, bool recursi
        struct lxc_mount_attr attr = {
                .attr_set       = MOUNT_ATTR_IDMAP,
                .userns_fd      = userns_fd,
+               .propagation    = MS_SLAVE,
 
        };
        int ret;
index a18ac1bd2129775d000871105bd7cef4fac9b3f8..a6e4e57d141de3c5c79c812c2171aae77e13dee4 100644 (file)
@@ -1786,6 +1786,12 @@ static int lxc_spawn(struct lxc_handler *handler)
                }
        }
 
+       ret = lxc_rootfs_prepare_parent(handler);
+       if (ret) {
+               ERROR("Failed to prepare rootfs");
+               goto out_delete_net;
+       }
+
        if (!lxc_sync_wake_child(handler, START_SYNC_STARTUP))
                goto out_delete_net;
 
@@ -2043,21 +2049,9 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops,
 
        if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
                /*
-                * This handles two cases: mounting real block devices and
-                * creating idmapped mounts. The block device case should be
-                * obivous, i.e. no real filesystem can currently be mounted
-                * from inside a user namespace.
-                *
-                * Idmapped mounts can currently only be created if the caller
-                * is privileged wrt to the user namespace in which the
-                * underlying block device has been mounted in. This basically
-                * (with few exceptions) means we need to be CAP_SYS_ADMIN in
-                * the initial user namespace since almost no interesting
-                * filesystems can be mounted inside of user namespaces. This
-                * is way we need to do the rootfs setup here. In the future
-                * this may change.
+                * Most filesystems can't be mounted inside a userns so handle them here.
                 */
-               if (idmapped_rootfs_mnt(&conf->rootfs) || rootfs_is_blockdev(conf)) {
+               if (rootfs_is_blockdev(conf)) {
                        ret = unshare(CLONE_NEWNS);
                        if (ret < 0) {
                                ERROR("Failed to unshare CLONE_NEWNS");
index eacf928e027992971410d86b0207a4c7a8cbddba..d16a75e40735fd3d7b6ea93efccc3dd1de9f44bc 100644 (file)
@@ -127,7 +127,8 @@ bool dir_detect(const char *path)
 
 int dir_mount(struct lxc_storage *bdev)
 {
-       struct lxc_mount_options *mnt_opts = &bdev->rootfs->mnt_opts;
+       struct lxc_rootfs *rootfs = bdev->rootfs;
+       struct lxc_mount_options *mnt_opts = &rootfs->mnt_opts;
        __do_free char *mntdata = NULL;
        unsigned long mflags = 0;
        int ret;
@@ -141,22 +142,31 @@ int dir_mount(struct lxc_storage *bdev)
 
        src = lxc_storage_get_path(bdev->src, bdev->type);
 
+       if (rootfs->dfd_idmapped >= 0 && !can_use_bind_mounts())
+               return syserror_set(-EOPNOTSUPP, "Idmapped mount requested but kernel doesn't support new mount API");
+
        if (can_use_bind_mounts()) {
                __do_close int fd_source = -EBADF, fd_target = -EBADF;
 
-               fd_source = open_at(-EBADF, src, PROTECT_OPATH_DIRECTORY, 0, 0);
-               if (fd_source < 0)
-                       return syserror("Failed to open \"%s\"", src);
-
                fd_target = open_at(-EBADF, bdev->dest, PROTECT_OPATH_DIRECTORY, 0, 0);
                if (fd_target < 0)
                        return syserror("Failed to open \"%s\"", bdev->dest);
 
-               ret = fd_mount_idmapped(fd_source, "", PROTECT_OPATH_DIRECTORY,
-                                       PROTECT_LOOKUP_BENEATH, fd_target, "",
-                                       PROTECT_OPATH_DIRECTORY,
-                                       PROTECT_LOOKUP_BENEATH, 0,
-                                       mnt_opts->userns_fd, true);
+               if (rootfs->dfd_idmapped >= 0) {
+                       ret = move_detached_mount(rootfs->dfd_idmapped, fd_target, "",
+                                                 PROTECT_OPATH_DIRECTORY,
+                                                 PROTECT_LOOKUP_BENEATH);
+               } else {
+                       fd_source = open_at(-EBADF, src, PROTECT_OPATH_DIRECTORY, 0, 0);
+                       if (fd_source < 0)
+                               return syserror("Failed to open \"%s\"", src);
+
+                       ret = fd_bind_mount(fd_source, "",
+                                           PROTECT_OPATH_DIRECTORY,
+                                           PROTECT_LOOKUP_BENEATH, fd_target,
+                                           "", PROTECT_OPATH_DIRECTORY,
+                                           PROTECT_LOOKUP_BENEATH, 0, true);
+               }
                if (ret < 0)
                        return syserror("Failed to mount \"%s\" onto \"%s\"", src, bdev->dest);
        } else {