From: Christian Brauner Date: Wed, 1 Jun 2022 13:52:07 +0000 (+0200) Subject: libmount: accept X-mount.idmap= X-Git-Tag: v2.39-rc1~335 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=0bbc62dda53f9de3e9ee9b4d823d906979e337e9;p=thirdparty%2Futil-linux.git libmount: accept X-mount.idmap= This adds a new mount option X-mount.idmap. This mount option can be used to create an idmapped mount. An idmapped mount allows to change ownership of all files located under a mount according to the ID-mapping associated with a user namespace. The ownership change is tied to the lifetime and localized to the relevant mount. The relevant ID-mapping can be specified in two ways: * A user can specify the ID-mapping directly. The ID-mapping must be specified using the syntax id-type:id-mount:id-host:id-range Specifying "u" as the id-type prefix creates a UID-mapping, "g" creates a GID-mapping and omitting id-type or specifying "b" creates both a UID- and GID-mapping. The id-mount parameter indicates the starting ID in the new mount. The id-host parameter indicates the starting ID in the filesystem. The id-range parameter indicates how many IDs are to be mapped. It is possible to specify multiple ID-mappings. The individual ID-mappings must be separated by spaces. For example, the ID-mapping X-mount.idmap=u:1000:0:1 g:1001:1:2 5000:1000:2 creates an idmapped mount where UID 0 is mapped to UID 1000, GID 1 is mapped to GUID 1001, GID 2 is mapped to GID 1002, UID and GID 1000 are mapped to 5000, and UID and GID 1001 are mapped to 5001 in the mount. When an ID-mapping is specified directly a new user namespace will be allocated with the requested ID-mapping. The newly created user namespace will be attached to the mount. * A user can specify a user namespace file. The user namespace will then be attached to the mount and the ID-mapping of the user namespace will become the ID-mapping of the mount. For example, *X-mount.idmap=/proc/PID/ns/user* will attach the user namespace of the process PID to the mount. Even more details about idmapped mounts can be found in the mount_setattr(2) manpage of the linux-manpage project. [kzak@redhat.com: - port Christian's patch to the new libmount hooks API] Signed-off-by: Christian Brauner (Microsoft) Signed-off-by: Karel Zak --- diff --git a/libmount/meson.build b/libmount/meson.build index a1d4026595..1367ab291e 100644 --- a/libmount/meson.build +++ b/libmount/meson.build @@ -47,6 +47,7 @@ if LINUX src/hook_mkdir.c src/hook_subdir.c src/hook_owner.c + src/hook_idmap.c src/monitor.c '''.split() endif diff --git a/libmount/src/Makemodule.am b/libmount/src/Makemodule.am index ba229dace7..2881ae5762 100644 --- a/libmount/src/Makemodule.am +++ b/libmount/src/Makemodule.am @@ -36,6 +36,7 @@ libmount_la_SOURCES += \ libmount/src/hook_mkdir.c \ libmount/src/hook_subdir.c \ libmount/src/hook_owner.c \ + libmount/src/hook_idmap.c \ libmount/src/monitor.c if HAVE_BTRFS diff --git a/libmount/src/context_mount.c b/libmount/src/context_mount.c index 32563a8e37..56cfb281a1 100644 --- a/libmount/src/context_mount.c +++ b/libmount/src/context_mount.c @@ -1614,6 +1614,12 @@ int mnt_context_get_mount_excode( return MNT_EX_SYSERR; } + if (rc == -MNT_ERR_IDMAP) { + if (buf) + snprintf(buf, bufsz, _("filesystem was mounted, but failed to attach idmapping")); + return MNT_EX_SYSERR; + } + if (rc < 0) return mnt_context_get_generic_excode(rc, buf, bufsz, _("filesystem was mounted, but any subsequent operation failed: %m")); diff --git a/libmount/src/hook_idmap.c b/libmount/src/hook_idmap.c new file mode 100644 index 0000000000..487f561a2f --- /dev/null +++ b/libmount/src/hook_idmap.c @@ -0,0 +1,482 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * This file is part of libmount from util-linux project. + * + * Copyright (C) 2022 Karel Zak + * Copyright (C) 2022 Christian Brauner (Microsoft) + * + * libmount is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * + * This is X-mount.idmap= implementation. + */ +#include +#include +#include +#include + +#include "mountP.h" +#include "strutils.h" +#include "all-io.h" +#include "mount-api-utils.h" +#include "include/namespace.h" + +#ifdef HAVE_LINUX_NSFS_H +# include +#endif + +#ifdef UL_HAVE_MOUNT_API + +typedef enum idmap_type_t { + ID_TYPE_UID, /* uidmap entry */ + ID_TYPE_GID, /* gidmap entry */ + ID_TYPE_UIDGID, /* uidmap and gidmap entry */ +} idmap_type_t; + +struct id_map { + idmap_type_t map_type; + uint32_t nsid; + uint32_t hostid; + uint32_t range; + struct list_head map_head; +}; + +struct hook_data { + int userns_fd; + struct list_head id_map; +}; + +static inline struct hook_data *new_hook_data(void) +{ + struct hook_data *hd = calloc(1, sizeof(*hd)); + + if (!hd) + return NULL; + + INIT_LIST_HEAD(&hd->id_map); + hd->userns_fd = -1; + return hd; +} + +static inline void free_hook_data(struct hook_data *hd) +{ + struct list_head *p, *pnext; + struct id_map *idmap; + + if (!hd) + return; + + if (hd->userns_fd >= 0) { + close(hd->userns_fd); + hd->userns_fd = -1; + } + + list_for_each_safe(p, pnext, &hd->id_map) { + idmap = list_entry(p, struct id_map, map_head); + list_del(&idmap->map_head); + free(idmap); + } + INIT_LIST_HEAD(&hd->id_map); + free(hd); +} + +static int write_id_mapping(idmap_type_t map_type, pid_t pid, const char *buf, + size_t buf_size) +{ + int fd = -1, rc = -1, setgroups_fd = -1; + char path[PATH_MAX]; + + if (geteuid() != 0 && map_type == ID_TYPE_GID) { + snprintf(path, sizeof(path), "/proc/%d/setgroups", pid); + + setgroups_fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY); + if (setgroups_fd < 0 && errno != ENOENT) + goto err; + + if (setgroups_fd >= 0) { + rc = write_all(setgroups_fd, "deny\n", strlen("deny\n")); + if (rc) + goto err; + } + } + + snprintf(path, sizeof(path), "/proc/%d/%cid_map", pid, + map_type == ID_TYPE_UID ? 'u' : 'g'); + + fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY); + if (fd < 0) + goto err; + + rc = write_all(fd, buf, buf_size); + +err: + if (fd >= 0) + close(fd); + if (setgroups_fd >= 0) + close(setgroups_fd); + + return rc; +} + +static int map_ids(struct list_head *idmap, pid_t pid) +{ + int fill, left; + char *pos; + int rc = 0; + char mapbuf[4096] = {}; + struct list_head *p; + + for (idmap_type_t type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) { + bool had_entry = false; + + pos = mapbuf; + list_for_each(p, idmap) { + struct id_map *map = list_entry(p, struct id_map, map_head); + + /* + * If the map type is ID_TYPE_UIDGID we need to include + * it in both gid- and uidmap. + */ + if (map->map_type != ID_TYPE_UIDGID && map->map_type != type) + continue; + + had_entry = true; + + left = sizeof(mapbuf) - (pos - mapbuf); + fill = snprintf(pos, left, + "%" PRIu32 " %" PRIu32 " %" PRIu32 "\n", + map->nsid, map->hostid, map->range); + /* + * The kernel only takes <= 4k for writes to + * /proc//{g,u}id_map + */ + if (fill <= 0) + return errno = EINVAL, -1; + + pos += fill; + } + if (!had_entry) + continue; + + rc = write_id_mapping(type, pid, mapbuf, pos - mapbuf); + if (rc < 0) + return -1; + + memset(mapbuf, 0, sizeof(mapbuf)); + } + + return 0; +} + +static int wait_for_pid(pid_t pid) +{ + int status, rc; + + do { + rc = waitpid(pid, &status, 0); + } while (rc < 0 && errno == EINTR); + + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) + return -1; + + return 0; +} + +static int get_userns_fd_from_idmap(struct list_head *idmap) +{ + int fd_userns = -1; + ssize_t rc = -1; + char c = '1'; + pid_t pid; + int sock_fds[2]; + char path[PATH_MAX]; + + rc = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, sock_fds); + if (rc < 0) + return -errno; + + pid = fork(); + if (pid < 0) + goto err_close_sock; + + if (pid == 0) { + close(sock_fds[1]); + + rc = unshare(CLONE_NEWUSER); + if (rc < 0) + _exit(EXIT_FAILURE); + + rc = write_all(sock_fds[0], &c, 1); + if (rc) + _exit(EXIT_FAILURE); + + close(sock_fds[0]); + + _exit(EXIT_SUCCESS); + } + close(sock_fds[0]); + sock_fds[0] = -1; + + rc = read_all(sock_fds[1], &c, 1); + if (rc != 1) + goto err_wait; + + rc = map_ids(idmap, pid); + if (rc < 0) + goto err_wait; + + snprintf(path, sizeof(path), "/proc/%d/ns/user", pid); + fd_userns = open(path, O_RDONLY | O_CLOEXEC | O_NOCTTY); + +err_wait: + rc = wait_for_pid(pid); + +err_close_sock: + if (sock_fds[0] > 0) + close(sock_fds[0]); + close(sock_fds[1]); + + if (rc < 0 && fd_userns >= 0) { + close(fd_userns); + fd_userns = -1; + } + + return fd_userns; +} + +static int open_userns(const char *path) +{ + + int userns_fd; + + userns_fd = open(path, O_RDONLY | O_CLOEXEC | O_NOCTTY); + if (userns_fd < 0) + return -1; + +#if defined(NS_GET_OWNER_UID) + /* + * We use NS_GET_OWNER_UID to verify that this is a user namespace. + * This is on a best-effort basis. If this isn't a userns then + * mount_setattr() will tell us to go away later. + */ + if (ioctl(userns_fd, NS_GET_OWNER_UID, &(uid_t){-1}) < 0) { + close(userns_fd); + return -1; + } +#endif + return userns_fd; +} + +/* + * Create an idmapped mount based on context target, unmounting the + * non-idmapped target mount and attaching the detached idmapped mount target. + */ +static int hook_mount_post( + struct libmnt_context *cxt, + const struct libmnt_hookset *hs, + void *data) +{ + struct hook_data *hd = (struct hook_data *) data; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + .userns_fd = hd->userns_fd + }; + const int recursive = cxt->mountflags & MS_REC; + const char *target = mnt_fs_get_target(cxt->fs); + int fd_tree = -1; + int rc; + + assert(cxt); + assert(hd); + assert(target); + assert(hd->userns_fd >= 0); + + DBG(HOOK, ul_debugobj(hs, " attaching namespace to %s", target)); + + /* + * Once a mount has been attached to the filesystem it can't be + * idmapped anymore. So create a new detached mount. + */ + fd_tree = open_tree(-1, target, + OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | + (recursive ? AT_RECURSIVE : 0)); + if (fd_tree < 0) { + DBG(HOOK, ul_debugobj(hs, " failed to open tree")); + return -MNT_ERR_IDMAP; + } + + /* Attach the idmapping to the mount. */ + rc = mount_setattr(fd_tree, "", + AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0), + &attr, sizeof(attr)); + if (rc < 0) { + DBG(HOOK, ul_debugobj(hs, " failed to set attributes")); + goto done; + } + /* Unmount the old, non-idmapped mount we just cloned and idmapped. */ + rc = umount(target); + if (rc < 0) { + DBG(HOOK, ul_debugobj(hs, " failed to set umount target")); + goto done; + } + + /* Attach the idmapped mount. */ + rc = move_mount(fd_tree, "", -1, target, MOVE_MOUNT_F_EMPTY_PATH); + if (rc) + DBG(HOOK, ul_debugobj(hs, " failed to set move mount")); + +done: + close(fd_tree); + if (rc < 0) + return -MNT_ERR_IDMAP; + + return 0; +} + +/* + * Process X-mount.idmap= mount option + */ +static int hook_prepare_options( + struct libmnt_context *cxt, + const struct libmnt_hookset *hs, + void *data __attribute__((__unused__))) +{ + struct hook_data *hd = NULL; + int rc; + char *value; + size_t valsz; + char *saveptr = NULL, *tok; + + const char *o = mnt_fs_get_user_options(cxt->fs); + if (!o) + return 0; + + rc = mnt_optstr_get_option(o, "X-mount.idmap", &value, &valsz); + if (rc < 0) + return -MNT_ERR_MOUNTOPT; + if (rc > 0) + return 0; /* not found */ + + if (!valsz) + return errno = EINVAL, -MNT_ERR_MOUNTOPT; + + hd = new_hook_data(); + if (!hd) + return -ENOMEM; + + /* Has the user given us a path to a user namespace? */ + if (*value == '/') { + hd->userns_fd = open_userns(value); + if (hd->userns_fd < 0) + goto err; + goto done; + } + + /* + * This is an explicit ID-mapping list of the form: + * [id-type]:id-mount:id-host:id-range [...] + * + * We split the list into separate ID-mapping entries. The individual + * ID-mapping entries are separated by ' '. + * + * A long while ago I made the kernel support up to 340 individual + * ID-mappings. So users have quite a bit of freedom here. + */ + for (tok = strtok_r(value, " ", &saveptr); tok; + tok = strtok_r(NULL, " ", &saveptr)) { + struct id_map *idmap; + idmap_type_t map_type; + uint32_t nsid = UINT_MAX, hostid = UINT_MAX, range = UINT_MAX; + + if (startswith(tok, "b:")) { + /* b:id-mount:id-host:id-range */ + map_type = ID_TYPE_UIDGID; + tok += 2; + } else if (startswith(tok, "g:")) { + /* g:id-mount:id-host:id-range */ + map_type = ID_TYPE_GID; + tok += 2; + } else if (startswith(tok, "u:")) { + /* u:id-mount:id-host:id-range */ + map_type = ID_TYPE_UID; + tok += 2; + } else { + /* + * id-mount:id-host:id-range + * + * If the user didn't specify it explicitly then they + * want this to be both a gid- and uidmap. + */ + map_type = ID_TYPE_UIDGID; + } + + /* id-mount:id-host:id-range */ + rc = sscanf(tok, "%" PRIu32 ":%" PRIu32 ":%" PRIu32, &nsid, + &hostid, &range); + if (rc != 3) + goto err; + + idmap = calloc(1, sizeof(*idmap)); + if (!idmap) + goto err; + + idmap->map_type = map_type; + idmap->nsid = nsid; + idmap->hostid = hostid; + idmap->range = range; + INIT_LIST_HEAD(&idmap->map_head); + list_add_tail(&idmap->map_head, &hd->id_map); + } + + hd->userns_fd = get_userns_fd_from_idmap(&hd->id_map); + if (hd->userns_fd < 0) + goto err; + +done: + /* define post-mount hook to enter the namespace */ + DBG(HOOK, ul_debugobj(hs, " wanted new user namespace")); + rc = mnt_context_append_hook(cxt, hs, + MNT_STAGE_MOUNT_POST, + hd, hook_mount_post); + if (rc < 0) + goto err; + return 0; + +err: + DBG(HOOK, ul_debugobj(hs, " failed to setup idmap")); + free_hook_data(hd); + return -MNT_ERR_MOUNTOPT; +} + + +/* de-initiallize this module */ +static int hookset_deinit(struct libmnt_context *cxt, const struct libmnt_hookset *hs) +{ + void *data; + + DBG(HOOK, ul_debugobj(hs, "deinit '%s'", hs->name)); + + /* remove all our hooks and free hook data */ + while (mnt_context_remove_hook(cxt, hs, 0, &data) == 0) { + if (data) + free_hook_data((struct hook_data *) data); + data = NULL; + } + + return 0; +} + +const struct libmnt_hookset hookset_idmap = +{ + .name = "__idmap", + + .firststage = MNT_STAGE_PREP_OPTIONS, + .firstcall = hook_prepare_options, + + .deinit = hookset_deinit +}; + +#endif /* UL_HAVE_MOUNT_API */ diff --git a/libmount/src/hooks.c b/libmount/src/hooks.c index 7e9e9f35b2..cea6c25b3a 100644 --- a/libmount/src/hooks.c +++ b/libmount/src/hooks.c @@ -19,6 +19,7 @@ */ #include "mountP.h" +#include "mount-api-utils.h" /* built-in hooksets */ static const struct libmnt_hookset *hooksets[] = @@ -27,6 +28,9 @@ static const struct libmnt_hookset *hooksets[] = &hookset_mkdir, &hookset_subdir, &hookset_mount_legacy, +#ifdef UL_HAVE_MOUNT_API + &hookset_idmap, +#endif &hookset_owner #endif }; diff --git a/libmount/src/libmount.h.in b/libmount/src/libmount.h.in index 8ca58a0b5f..5ce20d66f9 100644 --- a/libmount/src/libmount.h.in +++ b/libmount/src/libmount.h.in @@ -232,6 +232,12 @@ enum { * filesystem mounted, but subsequent X-mount.mode= chmod(2) failed */ #define MNT_ERR_CHMOD 5012 +/** + * MNT_ERR_IDMAP: + * + * filesystem mounted, but subsequent X-mount.idmap= failed + */ +#define MNT_ERR_IDMAP 5013 /* diff --git a/libmount/src/mountP.h b/libmount/src/mountP.h index c8364a7bf2..d25442e42f 100644 --- a/libmount/src/mountP.h +++ b/libmount/src/mountP.h @@ -307,6 +307,7 @@ extern const struct libmnt_hookset hookset_mount_legacy; extern const struct libmnt_hookset hookset_mkdir; extern const struct libmnt_hookset hookset_subdir; extern const struct libmnt_hookset hookset_owner; +extern const struct libmnt_hookset hookset_idmap; extern int mnt_context_deinit_hooksets(struct libmnt_context *cxt); extern const struct libmnt_hookset *mnt_context_get_hookset(struct libmnt_context *cxt, const char *name); diff --git a/sys-utils/mount.8.adoc b/sys-utils/mount.8.adoc index 4265162124..9936e2f925 100644 --- a/sys-utils/mount.8.adoc +++ b/sys-utils/mount.8.adoc @@ -642,6 +642,33 @@ Set _mountpoint_'s ownership after mounting. Names resolved in the target mount *X-mount.mode*=_mode_:: Set _mountpoint_'s mode after mounting. +*X-mount.idmap*=__id-type__:__id-mount__:__id-host__:__id-range__ [__id-type__:__id-mount__:__id-host__:__id-range__], *X-mount.idmap*=__file__:: +Use this option to create an idmapped mount. +An idmapped mount allows to change ownership of all files located under a mount according to the ID-mapping associated with a user namespace. +The ownership change is tied to the lifetime and localized to the relevant mount. +The relevant ID-mapping can be specified in two ways: ++ +* A user can specify the ID-mapping directly. ++ +The ID-mapping must be specified using the syntax __id-type__:__id-mount__:__id-host__:__id-range__. +Specifying *u* as the __id-type__ prefix creates a UID-mapping, *g* creates a GID-mapping and omitting __id-type__ or specifying *b* creates both a UID- and GID-mapping. +The __id-mount__ parameter indicates the starting ID in the new mount. +The __id-host__ parameter indicates the starting ID in the filesystem. +The __id-range__ parameter indicates how many IDs are to be mapped. +It is possible to specify multiple ID-mappings. +The individual ID-mappings must be separated by spaces. ++ +For example, the ID-mapping *X-mount.idmap=u:1000:0:1 g:1001:1:2 5000:1000:2* creates an idmapped mount where +UID 0 is mapped to UID 1000, GID 1 is mapped to GUID 1001, GID 2 is mapped to GID 1002, UID and GID 1000 are mapped to 5000, and UID and GID 1001 are mapped to 5001 in the mount. ++ +When an ID-mapping is specified directly a new user namespace will be allocated with the requested ID-mapping. +The newly created user namespace will be attached to the mount. +* A user can specify a user namespace file. ++ +The user namespace will then be attached to the mount and the ID-mapping of the user namespace will become the ID-mapping of the mount. ++ +For example, *X-mount.idmap=/proc/PID/ns/user* will attach the user namespace of the process PID to the mount. + *nosymfollow*:: Do not follow symlinks when resolving paths. Symlinks can still be created, and *readlink*(1), *readlink*(2), *realpath*(1), and *realpath*(3) all still work properly.