]> git.ipfire.org Git - thirdparty/lxc.git/commitdiff
conf: support idmapping directories
authorChristian Brauner <christian.brauner@ubuntu.com>
Wed, 17 Mar 2021 10:39:13 +0000 (11:39 +0100)
committerChristian Brauner <christian.brauner@ubuntu.com>
Wed, 21 Apr 2021 08:05:59 +0000 (10:05 +0200)
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
src/lxc/conf.c
src/lxc/conf.h
src/lxc/mount_utils.c
src/lxc/mount_utils.h
src/lxc/start.c
src/lxc/storage/dir.c

index 562e232aa1467419276e65b594ab6f32dac8c50e..e3096d257cbde3dd73746469662747954a68cb32 100644 (file)
@@ -488,11 +488,18 @@ int run_script(const char *name, const char *section, const char *script, ...)
  */
 int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns)
 {
-       __do_close int dfd_path = -EBADF, fd_pin = -EBADF;
+       __do_close int dfd_path = -EBADF, fd_pin = -EBADF, fd_userns = -EBADF;
        int ret;
        struct stat st;
        struct statfs stfs;
 
+       if (!is_empty_string(rootfs->mnt_opts.userns_path)) {
+               fd_userns = open_at(-EBADF, rootfs->mnt_opts.userns_path,
+                                   PROTECT_OPEN_WITH_TRAILING_SYMLINKS, 0, 0);
+               if (fd_userns < 0)
+                       return syserror("Failed to open user namespace");
+       }
+
        if (rootfs->path) {
                if (rootfs->bdev_type &&
                    (strequal(rootfs->bdev_type, "overlay") ||
@@ -504,13 +511,17 @@ int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns)
                dfd_path = open_at(-EBADF, "/", PROTECT_OPATH_FILE, PROTECT_LOOKUP_ABSOLUTE, 0);
        }
        if (dfd_path < 0)
-               return log_error_errno(-errno, errno, "Failed to open \"%s\"", rootfs->path);
+               return syserror("Failed to open \"%s\"", rootfs->path);
 
-       if (!rootfs->path)
-               return log_trace(0, "Not pinning because container does not have a rootfs");
+       if (!rootfs->path) {
+               TRACE("Not pinning because container does not have a rootfs");
+               goto out;
+       }
 
-       if (userns)
-               return log_trace(0, "Not pinning because container runs in user namespace");
+       if (userns) {
+               TRACE("Not pinning because container runs in user namespace");
+               goto out;
+       }
 
        ret = fstat(dfd_path, &st);
        if (ret < 0)
@@ -524,7 +535,7 @@ int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns)
                         PROTECT_LOOKUP_BENEATH,
                         S_IWUSR | S_IRUSR);
        if (fd_pin < 0)
-               return log_error_errno(-errno, errno, "Failed to pin rootfs");
+               return syserror("Failed to pin rootfs");
 
        TRACE("Pinned rootfs %d(.lxc_keep)", fd_pin);
 
@@ -546,6 +557,7 @@ int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns)
 
 out:
        rootfs->fd_path_pin = move_fd(fd_pin);
+       rootfs->mnt_opts.userns_fd = move_fd(fd_userns);
        return 0;
 }
 
@@ -2105,6 +2117,7 @@ const char *lxc_mount_options_info[LXC_MOUNT_MAX] = {
 /* Remove "optional", "create=dir", and "create=file" from mntopt */
 int parse_lxc_mntopts(struct lxc_mount_options *opts, char *mnt_opts)
 {
+       __do_close int fd_userns = -EBADF;
 
        for (size_t i = LXC_MOUNT_CREATE_DIR; i < LXC_MOUNT_MAX; i++) {
                const char *opt_name = lxc_mount_options_info[i];
@@ -2140,7 +2153,12 @@ int parse_lxc_mntopts(struct lxc_mount_options *opts, char *mnt_opts)
                        if (is_empty_string(opts->userns_path))
                                return syserror_set(-EINVAL, "Missing idmap path for \"idmap=<path>\" LXC specific mount option");
 
-                       TRACE("Parse LXC specific mount option \"idmap=%s\"", opts->userns_path);
+                       close_prot_errno_disarm(fd_userns);
+                       fd_userns = open(opts->userns_path, O_RDONLY | O_NOCTTY | O_CLOEXEC);
+                       if (fd_userns < 0)
+                               return syserror("Failed to open user namespace");
+
+                       TRACE("Parse LXC specific mount option %d->\"idmap=%s\"", fd_userns, opts->userns_path);
                        break;
                default:
                        return syserror_set(-EINVAL, "Unknown LXC specific mount option");
@@ -2726,6 +2744,7 @@ struct lxc_conf *lxc_conf_init(void)
        new->rootfs.dfd_dev = -EBADF;
        new->rootfs.dfd_host = -EBADF;
        new->rootfs.fd_path_pin = -EBADF;
+       new->rootfs.mnt_opts.userns_fd = -EBADF;
        new->logfd = -1;
        lxc_list_init(&new->cgroup);
        lxc_list_init(&new->cgroup2);
index 91b191095c26dbbebff3800dc168447095331afb..05e4197cbefbed0873d600b72995569808df5501 100644 (file)
@@ -198,6 +198,7 @@ struct lxc_mount_options {
        int optional : 1;
        int relative : 1;
        char userns_path[PATH_MAX];
+       int userns_fd;
 };
 
 /* Defines a structure to store the rootfs location, the
@@ -575,12 +576,18 @@ static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs)
        return !is_empty_string(rootfs->path) ? rootfs->mount : s;
 }
 
+static inline bool idmapped_rootfs_mnt(const struct lxc_rootfs *rootfs)
+{
+       return rootfs->mnt_opts.userns_fd >= 0;
+}
+
 static inline void put_lxc_rootfs(struct lxc_rootfs *rootfs, bool unpin)
 {
        if (rootfs) {
                close_prot_errno_disarm(rootfs->dfd_host);
                close_prot_errno_disarm(rootfs->dfd_mnt);
                close_prot_errno_disarm(rootfs->dfd_dev);
+               close_prot_errno_disarm(rootfs->mnt_opts.userns_fd);
                if (unpin)
                        close_prot_errno_disarm(rootfs->fd_path_pin);
        }
index 05d129ac14e7e30630b5702772974ade085ad771..f866d92f50320547a9526d52ad3f0f29b8542fa8 100644 (file)
@@ -236,12 +236,15 @@ int fs_attach(int fd_fs,
        return 0;
 }
 
-int fd_bind_mount(int dfd_from, const char *path_from,
-                 __u64 o_flags_from, __u64 resolve_flags_from,
-                 int dfd_to, const char *path_to,
-                 __u64 o_flags_to, __u64 resolve_flags_to,
-                 unsigned int attr_flags, bool recursive)
+static int __fd_bind_mount(int dfd_from, const char *path_from,
+                          __u64 o_flags_from, __u64 resolve_flags_from,
+                          int dfd_to, const char *path_to, __u64 o_flags_to,
+                          __u64 resolve_flags_to, unsigned int attr_flags,
+                          int userns_fd, bool recursive)
 {
+       struct lxc_mount_attr attr = {
+               .attr_set = attr_flags,
+       };
        __do_close int __fd_from = -EBADF, __fd_to = -EBADF;
        __do_close int fd_tree_from = -EBADF;
        unsigned int open_tree_flags = AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC;
@@ -266,7 +269,20 @@ int fd_bind_mount(int dfd_from, const char *path_from,
 
        fd_tree_from = open_tree(fd_from, "", open_tree_flags);
        if (fd_tree_from < 0)
-               return log_error_errno(-errno, errno, "Failed to create detached mount");
+               return syserror("Failed to create detached mount");
+
+       if (userns_fd >= 0) {
+               attr.attr_set   |= MOUNT_ATTR_IDMAP;
+               attr.userns_fd  = userns_fd;
+       }
+
+       if (attr.attr_set) {
+               ret = mount_setattr(fd_tree_from, "",
+                                   AT_EMPTY_PATH | AT_RECURSIVE,
+                                   &attr, sizeof(attr));
+               if (ret < 0)
+                       return syserror("Failed to change mount attributes");
+       }
 
        if (!is_empty_string(path_to)) {
                struct lxc_open_how how = {
@@ -284,12 +300,34 @@ int fd_bind_mount(int dfd_from, const char *path_from,
 
        ret = move_mount(fd_tree_from, "", fd_to, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH);
        if (ret)
-               return log_error_errno(-errno, errno, "Failed to attach detached mount %d to filesystem at %d", fd_tree_from, fd_to);
+               return syserror("Failed to attach detached mount %d to filesystem at %d", fd_tree_from, fd_to);
 
        TRACE("Attach detached mount %d to filesystem at %d", fd_tree_from, fd_to);
        return 0;
 }
 
+int fd_mount_idmapped(int dfd_from, const char *path_from,
+                     __u64 o_flags_from, __u64 resolve_flags_from,
+                     int dfd_to, const char *path_to,
+                     __u64 o_flags_to, __u64 resolve_flags_to,
+                     unsigned int attr_flags, int userns_fd, bool recursive)
+{
+       return __fd_bind_mount(dfd_from, path_from, o_flags_from, resolve_flags_from,
+                              dfd_to, path_to, o_flags_to, resolve_flags_to,
+                              attr_flags, userns_fd, recursive);
+}
+
+int fd_bind_mount(int dfd_from, const char *path_from,
+                 __u64 o_flags_from, __u64 resolve_flags_from,
+                 int dfd_to, const char *path_to,
+                 __u64 o_flags_to, __u64 resolve_flags_to,
+                 unsigned int attr_flags, bool recursive)
+{
+       return __fd_bind_mount(dfd_from, path_from, o_flags_from, resolve_flags_from,
+                              dfd_to, path_to, o_flags_to, resolve_flags_to,
+                              attr_flags, -EBADF, recursive);
+}
+
 int calc_remount_flags_new(int dfd_from, const char *path_from,
                           __u64 o_flags_from, __u64 resolve_flags_from,
                           bool remount, unsigned long cur_flags,
index 7edc15fd2d069f6a0942cfb85ffe9979d053362c..1fb724be0ccaa22bf992dcbc6bda76f7fd4081b8 100644 (file)
@@ -189,6 +189,13 @@ __hidden extern int fd_bind_mount(int dfd_from, const char *path_from,
                                  __u64 o_flags_to, __u64 resolve_flags_to,
                                  unsigned int attr_flags, bool recursive);
 
+__hidden extern int fd_mount_idmapped(int dfd_from, const char *path_from,
+                                     __u64 o_flags_from, __u64 resolve_flags_from,
+                                     int dfd_to, const char *path_to,
+                                     __u64 o_flags_to, __u64 resolve_flags_to,
+                                     unsigned int attr_flags, int userns_fd,
+                                     bool recursive);
+
 __hidden extern int calc_remount_flags_new(int dfd_from, const char *path_from,
                                           __u64 o_flags_from,
                                           __u64 resolve_flags_from,
index 3dc579e1d03e83e38a694bf96da3a420bd38468c..e4e7e365a769e8b4bd9b8889e052f64b7a834d8b 100644 (file)
@@ -1645,16 +1645,6 @@ static int lxc_spawn(struct lxc_handler *handler)
                goto out_delete_net;
        }
 
-       /* If the rootfs is not a blockdev, prevent the container from marking
-        * it readonly.
-        * If the container is unprivileged then skip rootfs pinning.
-        */
-       ret = lxc_rootfs_prepare(&conf->rootfs, wants_to_map_ids);
-       if (ret) {
-               ERROR("Failed to handle rootfs pinning for container \"%s\"", handler->name);
-               goto out_delete_net;
-       }
-
        /* Create a process in a new set of namespaces. */
        if (share_ns) {
                pid_t attacher_pid;
@@ -2040,9 +2030,20 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops,
                goto out_abort;
        }
 
+       /* If the rootfs is not a blockdev, prevent the container from marking
+        * it readonly.
+        * If the container is unprivileged then skip rootfs pinning.
+        */
+       ret = lxc_rootfs_prepare(&conf->rootfs, !lxc_list_empty(&conf->id_map));
+       if (ret) {
+               ERROR("Failed to handle rootfs pinning for container \"%s\"", handler->name);
+               ret = -1;
+               goto out_abort;
+       }
+
        if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
                /* If the backing store is a device, mount it here and now. */
-               if (rootfs_is_blockdev(conf)) {
+               if (idmapped_rootfs_mnt(&conf->rootfs) || rootfs_is_blockdev(conf)) {
                        ret = unshare(CLONE_NEWNS);
                        if (ret < 0) {
                                ERROR("Failed to unshare CLONE_NEWNS");
index c1ec5d17dc5f9d04ec2f79229621c6319498ea01..ea92928f1736468e83095fe9ee78e52e1e0e012a 100644 (file)
@@ -148,23 +148,46 @@ int dir_mount(struct lxc_storage *bdev)
 
        src = lxc_storage_get_path(bdev->src, bdev->type);
 
-       ret = mount(src, bdev->dest, "bind", MS_BIND | MS_REC | mntflags | pflags, mntdata);
-       if (ret < 0)
-               return log_error_errno(-errno, errno, "Failed to mount \"%s\" on \"%s\"", src, bdev->dest);
-
-       if (ret == 0 && (mntflags & MS_RDONLY)) {
-               mflags = add_required_remount_flags(src, bdev->dest, MS_BIND | MS_REC | mntflags | pflags | MS_REMOUNT);
-               ret = mount(src, bdev->dest, "bind", mflags, mntdata);
+       if (can_use_bind_mounts()) {
+               __do_close int fd_source = -EBADF, fd_target = -EBADF;
+
+               fd_source = open_at(-EBADF, src, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_ABSOLUTE, 0);
+               if (fd_source < 0)
+                       return syserror("Failed to open \"%s\"", src);
+
+               fd_target = open_at(-EBADF, bdev->dest, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_ABSOLUTE, 0);
+               if (fd_target < 0)
+                       return syserror("Failed to open \"%s\"", bdev->dest);
+
+               ret = fd_mount_idmapped(fd_source, "", PROTECT_OPATH_DIRECTORY,
+                                       PROTECT_LOOKUP_BENEATH, fd_target, "",
+                                       PROTECT_OPATH_DIRECTORY,
+                                       PROTECT_LOOKUP_BENEATH, 0,
+                                       bdev->rootfs->mnt_opts.userns_fd, true);
+               if (ret < 0)
+                       return syserror("Failed to mount \"%s\" onto \"%s\"", src, bdev->dest);
+       } else {
+               ret = mount(src, bdev->dest, "bind", MS_BIND | MS_REC | mntflags | pflags, mntdata);
                if (ret < 0)
-                       return log_error_errno(-errno, errno, "Failed to remount \"%s\" on \"%s\" read-only with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
-                                              src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
-               else
-                       DEBUG("Remounted \"%s\" on \"%s\" read-only with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
-                             src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
+                       return log_error_errno(-errno, errno, "Failed to mount \"%s\" on \"%s\"", src, bdev->dest);
+
+               if (ret == 0 && (mntflags & MS_RDONLY)) {
+                       mflags = add_required_remount_flags(src, bdev->dest, MS_BIND | MS_REC | mntflags | pflags | MS_REMOUNT);
+
+                       ret = mount(src, bdev->dest, "bind", mflags, mntdata);
+                       if (ret < 0)
+                               return log_error_errno(-errno, errno, "Failed to remount \"%s\" on \"%s\" read-only with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
+                                                      src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
+                       else
+                               DEBUG("Remounted \"%s\" on \"%s\" read-only with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
+                                     src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
+               }
+
+               TRACE("Mounted \"%s\" on \"%s\" with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
+                     src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
        }
 
-       TRACE("Mounted \"%s\" on \"%s\" with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
-             src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
+       TRACE("Mounted \"%s\" onto \"%s\"", src, bdev->dest);
        return 0;
 }