]> git.ipfire.org Git - thirdparty/util-linux.git/commitdiff
libmount: accept X-mount.idmap=
authorChristian Brauner <christian.brauner@ubuntu.com>
Wed, 1 Jun 2022 13:52:07 +0000 (15:52 +0200)
committerKarel Zak <kzak@redhat.com>
Tue, 3 Jan 2023 11:53:12 +0000 (12:53 +0100)
This adds a new mount option X-mount.idmap. This mount option can be
used to create an idmapped mount.

An idmapped mount allows to change ownership of all files located under
a mount according to the ID-mapping associated with a user namespace.

The ownership change is tied to the lifetime and localized to the
relevant mount. The relevant ID-mapping can be specified in two ways:

* A user can specify the ID-mapping directly.
  The ID-mapping must be specified using the syntax
  id-type:id-mount:id-host:id-range
  Specifying "u" as the id-type prefix creates a UID-mapping, "g"
  creates a GID-mapping and omitting id-type or specifying "b"
  creates both a UID- and GID-mapping.
  The id-mount parameter indicates the starting ID in the new mount.
  The id-host parameter indicates the starting ID in the filesystem.
  The id-range parameter indicates how many IDs are to be mapped.
  It is possible to specify multiple ID-mappings.
  The individual ID-mappings must be separated by spaces.

  For example, the ID-mapping
  X-mount.idmap=u:1000:0:1 g:1001:1:2 5000:1000:2
  creates an idmapped mount where UID 0 is mapped to UID 1000, GID 1 is
  mapped to GUID 1001, GID 2 is mapped to GID 1002, UID and GID 1000 are
  mapped to 5000, and UID and GID 1001 are mapped to 5001 in the mount.

  When an ID-mapping is specified directly a new user namespace will be
  allocated with the requested ID-mapping.
  The newly created user namespace will be attached to the mount.

* A user can specify a user namespace file.
  The user namespace will then be attached to the mount and the
  ID-mapping of the user namespace will become the ID-mapping of the
  mount.
  For example, *X-mount.idmap=/proc/PID/ns/user* will attach the user
  namespace of the process PID to the mount.

Even more details about idmapped mounts can be found in the
mount_setattr(2) manpage of the linux-manpage project.

[kzak@redhat.com: - port Christian's patch to the new libmount hooks API]

Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
Signed-off-by: Karel Zak <kzak@redhat.com>
libmount/meson.build
libmount/src/Makemodule.am
libmount/src/context_mount.c
libmount/src/hook_idmap.c [new file with mode: 0644]
libmount/src/hooks.c
libmount/src/libmount.h.in
libmount/src/mountP.h
sys-utils/mount.8.adoc

index a1d40265959d572378ae07d2835a95451d92c8ce..1367ab291ee6073f46db965e10b0d96ef3483931 100644 (file)
@@ -47,6 +47,7 @@ if LINUX
     src/hook_mkdir.c
     src/hook_subdir.c
     src/hook_owner.c
+    src/hook_idmap.c
     src/monitor.c
 '''.split()
 endif
index ba229dace78c5219bbe029be71f87c779581fc79..2881ae576292c2f0560db22f9327fae99eda0cbf 100644 (file)
@@ -36,6 +36,7 @@ libmount_la_SOURCES += \
        libmount/src/hook_mkdir.c \
        libmount/src/hook_subdir.c \
        libmount/src/hook_owner.c \
+       libmount/src/hook_idmap.c \
        libmount/src/monitor.c
 
 if HAVE_BTRFS
index 32563a8e37cedc6f3c5bf91cf15b4e3eb9bcc94a..56cfb281a10a863b6ea93361685537e992dbc519 100644 (file)
@@ -1614,6 +1614,12 @@ int mnt_context_get_mount_excode(
                        return MNT_EX_SYSERR;
                }
 
+               if (rc == -MNT_ERR_IDMAP) {
+                       if (buf)
+                               snprintf(buf, bufsz, _("filesystem was mounted, but failed to attach idmapping"));
+                       return MNT_EX_SYSERR;
+               }
+
                if (rc < 0)
                        return mnt_context_get_generic_excode(rc, buf, bufsz,
                                _("filesystem was mounted, but any subsequent operation failed: %m"));
diff --git a/libmount/src/hook_idmap.c b/libmount/src/hook_idmap.c
new file mode 100644 (file)
index 0000000..487f561
--- /dev/null
@@ -0,0 +1,482 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * This file is part of libmount from util-linux project.
+ *
+ * Copyright (C) 2022 Karel Zak <kzak@redhat.com>
+ * Copyright (C) 2022 Christian Brauner (Microsoft) <brauner@kernel.org>
+ *
+ * libmount is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ *
+ * This is X-mount.idmap= implementation.
+ */
+#include <stdbool.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <inttypes.h>
+
+#include "mountP.h"
+#include "strutils.h"
+#include "all-io.h"
+#include "mount-api-utils.h"
+#include "include/namespace.h"
+
+#ifdef HAVE_LINUX_NSFS_H
+# include <linux/nsfs.h>
+#endif
+
+#ifdef UL_HAVE_MOUNT_API
+
+typedef enum idmap_type_t {
+       ID_TYPE_UID,    /* uidmap entry */
+       ID_TYPE_GID,    /* gidmap entry */
+       ID_TYPE_UIDGID, /* uidmap and gidmap entry */
+} idmap_type_t;
+
+struct id_map {
+       idmap_type_t map_type;
+       uint32_t nsid;
+       uint32_t hostid;
+       uint32_t range;
+       struct list_head map_head;
+};
+
+struct hook_data {
+       int userns_fd;
+       struct list_head id_map;
+};
+
+static inline struct hook_data *new_hook_data(void)
+{
+       struct hook_data *hd = calloc(1, sizeof(*hd));
+
+       if (!hd)
+               return NULL;
+
+       INIT_LIST_HEAD(&hd->id_map);
+       hd->userns_fd = -1;
+       return hd;
+}
+
+static inline void free_hook_data(struct hook_data *hd)
+{
+       struct list_head *p, *pnext;
+       struct id_map *idmap;
+
+       if (!hd)
+               return;
+
+       if (hd->userns_fd >= 0) {
+               close(hd->userns_fd);
+               hd->userns_fd = -1;
+       }
+
+       list_for_each_safe(p, pnext, &hd->id_map) {
+               idmap = list_entry(p, struct id_map, map_head);
+               list_del(&idmap->map_head);
+               free(idmap);
+       }
+       INIT_LIST_HEAD(&hd->id_map);
+       free(hd);
+}
+
+static int write_id_mapping(idmap_type_t map_type, pid_t pid, const char *buf,
+                           size_t buf_size)
+{
+       int fd = -1, rc = -1, setgroups_fd = -1;
+       char path[PATH_MAX];
+
+       if (geteuid() != 0 && map_type == ID_TYPE_GID) {
+               snprintf(path, sizeof(path), "/proc/%d/setgroups", pid);
+
+               setgroups_fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY);
+               if (setgroups_fd < 0 && errno != ENOENT)
+                       goto err;
+
+               if (setgroups_fd >= 0) {
+                       rc = write_all(setgroups_fd, "deny\n", strlen("deny\n"));
+                       if (rc)
+                               goto err;
+               }
+       }
+
+       snprintf(path, sizeof(path), "/proc/%d/%cid_map", pid,
+                map_type == ID_TYPE_UID ? 'u' : 'g');
+
+       fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY);
+       if (fd < 0)
+               goto err;
+
+       rc = write_all(fd, buf, buf_size);
+
+err:
+       if (fd >= 0)
+               close(fd);
+       if (setgroups_fd >= 0)
+               close(setgroups_fd);
+
+       return rc;
+}
+
+static int map_ids(struct list_head *idmap, pid_t pid)
+{
+       int fill, left;
+       char *pos;
+       int rc = 0;
+       char mapbuf[4096] = {};
+       struct list_head *p;
+
+       for (idmap_type_t type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
+               bool had_entry = false;
+
+               pos = mapbuf;
+               list_for_each(p, idmap) {
+                       struct id_map *map = list_entry(p, struct id_map, map_head);
+
+                       /*
+                        * If the map type is ID_TYPE_UIDGID we need to include
+                        * it in both gid- and uidmap.
+                        */
+                       if (map->map_type != ID_TYPE_UIDGID && map->map_type != type)
+                               continue;
+
+                       had_entry = true;
+
+                       left = sizeof(mapbuf) - (pos - mapbuf);
+                       fill = snprintf(pos, left,
+                                       "%" PRIu32 " %" PRIu32 " %" PRIu32 "\n",
+                                       map->nsid, map->hostid, map->range);
+                       /*
+                        * The kernel only takes <= 4k for writes to
+                        * /proc/<pid>/{g,u}id_map
+                        */
+                       if (fill <= 0)
+                               return errno = EINVAL, -1;
+
+                       pos += fill;
+               }
+               if (!had_entry)
+                       continue;
+
+               rc = write_id_mapping(type, pid, mapbuf, pos - mapbuf);
+               if (rc < 0)
+                       return -1;
+
+               memset(mapbuf, 0, sizeof(mapbuf));
+       }
+
+       return 0;
+}
+
+static int wait_for_pid(pid_t pid)
+{
+       int status, rc;
+
+       do {
+               rc = waitpid(pid, &status, 0);
+       } while (rc < 0 && errno == EINTR);
+
+       if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
+               return -1;
+
+       return 0;
+}
+
+static int get_userns_fd_from_idmap(struct list_head *idmap)
+{
+       int fd_userns = -1;
+       ssize_t rc = -1;
+       char c = '1';
+       pid_t pid;
+       int sock_fds[2];
+       char path[PATH_MAX];
+
+       rc = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, sock_fds);
+       if (rc < 0)
+               return -errno;
+
+       pid = fork();
+       if (pid < 0)
+               goto err_close_sock;
+
+       if (pid == 0) {
+               close(sock_fds[1]);
+
+               rc = unshare(CLONE_NEWUSER);
+               if (rc < 0)
+                       _exit(EXIT_FAILURE);
+
+               rc = write_all(sock_fds[0], &c, 1);
+               if (rc)
+                       _exit(EXIT_FAILURE);
+
+               close(sock_fds[0]);
+
+               _exit(EXIT_SUCCESS);
+       }
+       close(sock_fds[0]);
+       sock_fds[0] = -1;
+
+       rc = read_all(sock_fds[1], &c, 1);
+       if (rc != 1)
+               goto err_wait;
+
+       rc = map_ids(idmap, pid);
+       if (rc < 0)
+               goto err_wait;
+
+       snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
+       fd_userns = open(path, O_RDONLY | O_CLOEXEC | O_NOCTTY);
+
+err_wait:
+       rc = wait_for_pid(pid);
+
+err_close_sock:
+       if (sock_fds[0] > 0)
+               close(sock_fds[0]);
+       close(sock_fds[1]);
+
+       if (rc < 0 && fd_userns >= 0) {
+               close(fd_userns);
+               fd_userns = -1;
+       }
+
+       return fd_userns;
+}
+
+static int open_userns(const char *path)
+{
+
+       int userns_fd;
+
+       userns_fd = open(path, O_RDONLY | O_CLOEXEC | O_NOCTTY);
+       if (userns_fd < 0)
+               return -1;
+
+#if defined(NS_GET_OWNER_UID)
+       /*
+        * We use NS_GET_OWNER_UID to verify that this is a user namespace.
+        * This is on a best-effort basis. If this isn't a userns then
+        * mount_setattr() will tell us to go away later.
+        */
+       if (ioctl(userns_fd, NS_GET_OWNER_UID, &(uid_t){-1}) < 0) {
+               close(userns_fd);
+               return -1;
+       }
+#endif
+       return userns_fd;
+}
+
+/*
+ * Create an idmapped mount based on context target, unmounting the
+ * non-idmapped target mount and attaching the detached idmapped mount target.
+ */
+static int hook_mount_post(
+                       struct libmnt_context *cxt,
+                       const struct libmnt_hookset *hs,
+                       void *data)
+{
+       struct hook_data *hd = (struct hook_data *) data;
+       struct mount_attr attr = {
+               .attr_set       = MOUNT_ATTR_IDMAP,
+               .userns_fd      = hd->userns_fd
+       };
+       const int recursive = cxt->mountflags & MS_REC;
+       const char *target = mnt_fs_get_target(cxt->fs);
+       int fd_tree = -1;
+       int rc;
+
+       assert(cxt);
+       assert(hd);
+       assert(target);
+       assert(hd->userns_fd >= 0);
+
+       DBG(HOOK, ul_debugobj(hs, " attaching namespace to %s", target));
+
+       /*
+        * Once a mount has been attached to the filesystem it can't be
+        * idmapped anymore. So create a new detached mount.
+        */
+       fd_tree = open_tree(-1, target,
+                           OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC |
+                           (recursive ? AT_RECURSIVE : 0));
+       if (fd_tree < 0) {
+               DBG(HOOK, ul_debugobj(hs, " failed to open tree"));
+               return -MNT_ERR_IDMAP;
+       }
+
+       /* Attach the idmapping to the mount. */
+       rc = mount_setattr(fd_tree, "",
+                          AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0),
+                          &attr, sizeof(attr));
+       if (rc < 0) {
+               DBG(HOOK, ul_debugobj(hs, " failed to set attributes"));
+               goto done;
+       }
+       /* Unmount the old, non-idmapped mount we just cloned and idmapped. */
+       rc = umount(target);
+       if (rc < 0) {
+               DBG(HOOK, ul_debugobj(hs, " failed to set umount target"));
+               goto done;
+       }
+
+       /* Attach the idmapped mount. */
+       rc = move_mount(fd_tree, "", -1, target, MOVE_MOUNT_F_EMPTY_PATH);
+       if (rc)
+               DBG(HOOK, ul_debugobj(hs, " failed to set move mount"));
+
+done:
+       close(fd_tree);
+       if (rc < 0)
+               return -MNT_ERR_IDMAP;
+
+       return 0;
+}
+
+/*
+ * Process X-mount.idmap= mount option
+ */
+static int hook_prepare_options(
+                       struct libmnt_context *cxt,
+                       const struct libmnt_hookset *hs,
+                       void *data __attribute__((__unused__)))
+{
+       struct hook_data *hd = NULL;
+       int rc;
+       char *value;
+       size_t valsz;
+       char *saveptr = NULL, *tok;
+
+       const char *o = mnt_fs_get_user_options(cxt->fs);
+       if (!o)
+               return 0;
+
+       rc = mnt_optstr_get_option(o, "X-mount.idmap", &value, &valsz);
+       if (rc < 0)
+               return -MNT_ERR_MOUNTOPT;
+       if (rc > 0)
+               return 0;       /* not found */
+
+       if (!valsz)
+               return errno = EINVAL, -MNT_ERR_MOUNTOPT;
+
+       hd = new_hook_data();
+       if (!hd)
+               return -ENOMEM;
+
+       /* Has the user given us a path to a user namespace? */
+       if (*value == '/') {
+               hd->userns_fd = open_userns(value);
+               if (hd->userns_fd < 0)
+                       goto err;
+               goto done;
+       }
+
+       /*
+        * This is an explicit ID-mapping list of the form:
+        * [id-type]:id-mount:id-host:id-range [...]
+        *
+        * We split the list into separate ID-mapping entries. The individual
+        * ID-mapping entries are separated by ' '.
+        *
+        * A long while ago I made the kernel support up to 340 individual
+        * ID-mappings. So users have quite a bit of freedom here.
+        */
+       for (tok = strtok_r(value, " ", &saveptr); tok;
+            tok = strtok_r(NULL, " ", &saveptr)) {
+               struct id_map *idmap;
+               idmap_type_t map_type;
+               uint32_t nsid = UINT_MAX, hostid = UINT_MAX, range = UINT_MAX;
+
+               if (startswith(tok, "b:")) {
+                       /* b:id-mount:id-host:id-range */
+                       map_type = ID_TYPE_UIDGID;
+                       tok += 2;
+               } else if (startswith(tok, "g:")) {
+                       /* g:id-mount:id-host:id-range */
+                       map_type = ID_TYPE_GID;
+                       tok += 2;
+               } else if (startswith(tok, "u:")) {
+                       /* u:id-mount:id-host:id-range */
+                       map_type = ID_TYPE_UID;
+                       tok += 2;
+               } else {
+                       /*
+                        * id-mount:id-host:id-range
+                        *
+                        * If the user didn't specify it explicitly then they
+                        * want this to be both a gid- and uidmap.
+                        */
+                       map_type = ID_TYPE_UIDGID;
+               }
+
+               /* id-mount:id-host:id-range */
+               rc = sscanf(tok, "%" PRIu32 ":%" PRIu32 ":%" PRIu32, &nsid,
+                           &hostid, &range);
+               if (rc != 3)
+                       goto err;
+
+               idmap = calloc(1, sizeof(*idmap));
+               if (!idmap)
+                       goto err;
+
+               idmap->map_type = map_type;
+               idmap->nsid = nsid;
+               idmap->hostid = hostid;
+               idmap->range = range;
+               INIT_LIST_HEAD(&idmap->map_head);
+               list_add_tail(&idmap->map_head, &hd->id_map);
+       }
+
+       hd->userns_fd = get_userns_fd_from_idmap(&hd->id_map);
+       if (hd->userns_fd < 0)
+               goto err;
+
+done:
+       /* define post-mount hook to enter the namespace */
+       DBG(HOOK, ul_debugobj(hs, " wanted new user namespace"));
+       rc = mnt_context_append_hook(cxt, hs,
+                               MNT_STAGE_MOUNT_POST,
+                               hd, hook_mount_post);
+       if (rc < 0)
+               goto err;
+       return 0;
+
+err:
+       DBG(HOOK, ul_debugobj(hs, " failed to setup idmap"));
+       free_hook_data(hd);
+       return -MNT_ERR_MOUNTOPT;
+}
+
+
+/* de-initiallize this module */
+static int hookset_deinit(struct libmnt_context *cxt, const struct libmnt_hookset *hs)
+{
+       void *data;
+
+       DBG(HOOK, ul_debugobj(hs, "deinit '%s'", hs->name));
+
+       /* remove all our hooks and free hook data */
+       while (mnt_context_remove_hook(cxt, hs, 0, &data) == 0) {
+               if (data)
+                       free_hook_data((struct hook_data *) data);
+               data = NULL;
+       }
+
+       return 0;
+}
+
+const struct libmnt_hookset hookset_idmap =
+{
+       .name = "__idmap",
+
+       .firststage = MNT_STAGE_PREP_OPTIONS,
+       .firstcall = hook_prepare_options,
+
+       .deinit = hookset_deinit
+};
+
+#endif /* UL_HAVE_MOUNT_API */
index 7e9e9f35b21aef8aac96f617ec32b06e4f42b119..cea6c25b3a7282bcef018d530f2b9557e5dacbe6 100644 (file)
@@ -19,6 +19,7 @@
  */
 
 #include "mountP.h"
+#include "mount-api-utils.h"
 
 /* built-in hooksets */
 static const struct libmnt_hookset *hooksets[] =
@@ -27,6 +28,9 @@ static const struct libmnt_hookset *hooksets[] =
        &hookset_mkdir,
        &hookset_subdir,
        &hookset_mount_legacy,
+#ifdef UL_HAVE_MOUNT_API
+       &hookset_idmap,
+#endif
        &hookset_owner
 #endif
 };
index 8ca58a0b5f47f52b3a540c01b8b2eb32864fbf7a..5ce20d66f97e0c26c05262df2b51008391d1fed9 100644 (file)
@@ -232,6 +232,12 @@ enum {
  * filesystem mounted, but subsequent X-mount.mode= chmod(2) failed
  */
 #define MNT_ERR_CHMOD    5012
+/**
+ * MNT_ERR_IDMAP:
+ *
+ * filesystem mounted, but subsequent X-mount.idmap= failed
+ */
+#define MNT_ERR_IDMAP    5013
 
 
 /*
index c8364a7bf2112d6475905eed300b55b265a031b3..d25442e42f9b21c751b162c059c88dd9a57c2e49 100644 (file)
@@ -307,6 +307,7 @@ extern const struct libmnt_hookset hookset_mount_legacy;
 extern const struct libmnt_hookset hookset_mkdir;
 extern const struct libmnt_hookset hookset_subdir;
 extern const struct libmnt_hookset hookset_owner;
+extern const struct libmnt_hookset hookset_idmap;
 
 extern int mnt_context_deinit_hooksets(struct libmnt_context *cxt);
 extern const struct libmnt_hookset *mnt_context_get_hookset(struct libmnt_context *cxt, const char *name);
index 4265162124007e407a72da16743e987d8c6de024..9936e2f925a6e653068518d59150eacb64b5ed71 100644 (file)
@@ -642,6 +642,33 @@ Set _mountpoint_'s ownership after mounting. Names resolved in the target mount
 *X-mount.mode*=_mode_::
 Set _mountpoint_'s mode after mounting.
 
+*X-mount.idmap*=__id-type__:__id-mount__:__id-host__:__id-range__ [__id-type__:__id-mount__:__id-host__:__id-range__], *X-mount.idmap*=__file__::
+Use this option to create an idmapped mount.
+An idmapped mount allows to change ownership of all files located under a mount according to the ID-mapping associated with a user namespace.
+The ownership change is tied to the lifetime and localized to the relevant mount.
+The relevant ID-mapping can be specified in two ways:
++
+* A user can specify the ID-mapping directly.
++
+The ID-mapping must be specified using the syntax __id-type__:__id-mount__:__id-host__:__id-range__.
+Specifying *u* as the __id-type__ prefix creates a UID-mapping, *g* creates a GID-mapping and omitting __id-type__ or specifying *b* creates both a UID- and GID-mapping.
+The __id-mount__ parameter indicates the starting ID in the new mount.
+The __id-host__ parameter indicates the starting ID in the filesystem.
+The __id-range__ parameter indicates how many IDs are to be mapped.
+It is possible to specify multiple ID-mappings.
+The individual ID-mappings must be separated by spaces.
++
+For example, the ID-mapping *X-mount.idmap=u:1000:0:1 g:1001:1:2 5000:1000:2* creates an idmapped mount where
+UID 0 is mapped to UID 1000, GID 1 is mapped to GUID 1001, GID 2 is mapped to GID 1002, UID and GID 1000 are mapped to 5000, and UID and GID 1001 are mapped to 5001 in the mount.
++
+When an ID-mapping is specified directly a new user namespace will be allocated with the requested ID-mapping.
+The newly created user namespace will be attached to the mount.
+* A user can specify a user namespace file.
++
+The user namespace will then be attached to the mount and the ID-mapping of the user namespace will become the ID-mapping of the mount.
++
+For example, *X-mount.idmap=/proc/PID/ns/user* will attach the user namespace of the process PID to the mount.
+
 *nosymfollow*::
 Do not follow symlinks when resolving paths. Symlinks can still be created, and *readlink*(1), *readlink*(2), *realpath*(1), and *realpath*(3) all still work properly.