1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
3 * This file is part of libmount from util-linux project.
5 * Copyright (C) 2022 Karel Zak <kzak@redhat.com>
6 * Copyright (C) 2022 Christian Brauner (Microsoft) <brauner@kernel.org>
8 * libmount is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU Lesser General Public License as published by
10 * the Free Software Foundation; either version 2.1 of the License, or
11 * (at your option) any later version.
14 * This is X-mount.idmap= implementation.
16 * Please, see the comment in libmount/src/hooks.c to understand how hooks work.
19 #include <sys/socket.h>
21 #include <sys/ioctl.h>
22 #include <sys/mount.h>
27 #include "namespace.h"
28 #include "mount-api-utils.h"
32 #ifdef HAVE_LINUX_NSFS_H
33 # include <linux/nsfs.h>
36 #if defined(HAVE_MOUNTFD_API) && defined(HAVE_LINUX_MOUNT_H)
38 typedef enum idmap_type_t
{
39 ID_TYPE_UID
, /* uidmap entry */
40 ID_TYPE_GID
, /* gidmap entry */
41 ID_TYPE_UIDGID
, /* uidmap and gidmap entry */
45 idmap_type_t map_type
;
49 struct list_head map_head
;
54 struct list_head id_map
;
57 static inline struct hook_data
*new_hook_data(void)
59 struct hook_data
*hd
= calloc(1, sizeof(*hd
));
64 INIT_LIST_HEAD(&hd
->id_map
);
69 static inline void free_hook_data(struct hook_data
*hd
)
71 struct list_head
*p
, *pnext
;
77 if (hd
->userns_fd
>= 0) {
82 list_for_each_safe(p
, pnext
, &hd
->id_map
) {
83 idmap
= list_entry(p
, struct id_map
, map_head
);
84 list_del(&idmap
->map_head
);
87 INIT_LIST_HEAD(&hd
->id_map
);
91 static int write_id_mapping(idmap_type_t map_type
, pid_t pid
, const char *buf
,
94 int fd
= -1, rc
= -1, setgroups_fd
= -1;
97 if (geteuid() != 0 && map_type
== ID_TYPE_GID
) {
98 snprintf(path
, sizeof(path
), "/proc/%d/setgroups", pid
);
100 setgroups_fd
= open(path
, O_WRONLY
| O_CLOEXEC
| O_NOCTTY
);
101 if (setgroups_fd
< 0 && errno
!= ENOENT
)
104 if (setgroups_fd
>= 0) {
105 rc
= write_all(setgroups_fd
, "deny\n", strlen("deny\n"));
111 snprintf(path
, sizeof(path
), "/proc/%d/%cid_map", pid
,
112 map_type
== ID_TYPE_UID
? 'u' : 'g');
114 fd
= open(path
, O_WRONLY
| O_CLOEXEC
| O_NOCTTY
);
118 rc
= write_all(fd
, buf
, buf_size
);
123 if (setgroups_fd
>= 0)
129 static int map_ids(struct list_head
*idmap
, pid_t pid
)
134 char mapbuf
[4096] = {};
137 for (idmap_type_t type
= ID_TYPE_UID
; type
<= ID_TYPE_GID
; type
++) {
138 bool had_entry
= false;
141 list_for_each(p
, idmap
) {
142 struct id_map
*map
= list_entry(p
, struct id_map
, map_head
);
145 * If the map type is ID_TYPE_UIDGID we need to include
146 * it in both gid- and uidmap.
148 if (map
->map_type
!= ID_TYPE_UIDGID
&& map
->map_type
!= type
)
153 left
= sizeof(mapbuf
) - (pos
- mapbuf
);
154 fill
= snprintf(pos
, left
,
155 "%" PRIu32
" %" PRIu32
" %" PRIu32
"\n",
156 map
->nsid
, map
->hostid
, map
->range
);
158 * The kernel only takes <= 4k for writes to
159 * /proc/<pid>/{g,u}id_map
162 return errno
= EINVAL
, -1;
169 rc
= write_id_mapping(type
, pid
, mapbuf
, pos
- mapbuf
);
173 memset(mapbuf
, 0, sizeof(mapbuf
));
179 static int wait_for_pid(pid_t pid
)
184 rc
= waitpid(pid
, &status
, 0);
185 } while (rc
< 0 && errno
== EINTR
);
187 if (!WIFEXITED(status
) || WEXITSTATUS(status
) != 0)
193 static int get_userns_fd_from_idmap(struct list_head
*idmap
)
202 rc
= socketpair(PF_LOCAL
, SOCK_STREAM
| SOCK_CLOEXEC
, 0, sock_fds
);
213 rc
= unshare(CLONE_NEWUSER
);
217 /* Let parent know we're ready to have the idmapping written. */
218 rc
= write_all(sock_fds
[0], &c
, 1);
222 /* Hang around until the parent has persisted our namespace. */
223 rc
= read_all(sock_fds
[0], &c
, 1);
234 /* Wait for child to set up a new namespace. */
235 rc
= read_all(sock_fds
[1], &c
, 1);
241 rc
= map_ids(idmap
, pid
);
247 snprintf(path
, sizeof(path
), "/proc/%d/ns/user", pid
);
248 fd_userns
= open(path
, O_RDONLY
| O_CLOEXEC
| O_NOCTTY
);
250 /* Let child know we've persisted its namespace. */
251 (void)write_all(sock_fds
[1], &c
, 1);
254 rc
= wait_for_pid(pid
);
261 if (rc
< 0 && fd_userns
>= 0) {
269 static int open_userns(const char *path
)
274 userns_fd
= open(path
, O_RDONLY
| O_CLOEXEC
| O_NOCTTY
);
278 #if defined(NS_GET_OWNER_UID)
280 * We use NS_GET_OWNER_UID to verify that this is a user namespace.
281 * This is on a best-effort basis. If this isn't a userns then
282 * mount_setattr() will tell us to go away later.
284 if (ioctl(userns_fd
, NS_GET_OWNER_UID
, &(uid_t
){-1}) < 0) {
293 * Create an idmapped mount based on context target, unmounting the
294 * non-idmapped target mount and attaching the detached idmapped mount target.
296 static int hook_mount_post(
297 struct libmnt_context
*cxt
,
298 const struct libmnt_hookset
*hs
,
301 struct hook_data
*hd
= (struct hook_data
*) data
;
302 struct mount_attr attr
= {
303 .attr_set
= MOUNT_ATTR_IDMAP
,
304 .userns_fd
= hd
->userns_fd
306 const int recursive
= mnt_optlist_is_recursive(cxt
->optlist
);
307 const char *target
= mnt_fs_get_target(cxt
->fs
);
309 int rc
, is_private
= 1;
313 assert(hd
->userns_fd
>= 0);
315 DBG(HOOK
, ul_debugobj(hs
, " attaching namespace to %s", target
));
318 * Once a mount has been attached to the filesystem it can't be
319 * idmapped anymore. So create a new detached mount.
321 #ifdef USE_LIBMOUNT_MOUNTFD_SUPPORT
323 struct libmnt_sysapi
*api
= mnt_context_get_sysapi(cxt
);
325 if (api
&& api
->fd_tree
>= 0) {
326 fd_tree
= api
->fd_tree
;
328 DBG(HOOK
, ul_debugobj(hs
, " reuse tree FD"));
333 fd_tree
= open_tree(-1, target
,
334 OPEN_TREE_CLONE
| OPEN_TREE_CLOEXEC
|
335 (recursive
? AT_RECURSIVE
: 0));
337 DBG(HOOK
, ul_debugobj(hs
, " failed to open tree"));
338 return -MNT_ERR_IDMAP
;
341 /* Attach the idmapping to the mount. */
342 rc
= mount_setattr(fd_tree
, "",
343 AT_EMPTY_PATH
| (recursive
? AT_RECURSIVE
: 0),
344 &attr
, sizeof(attr
));
346 DBG(HOOK
, ul_debugobj(hs
, " failed to set attributes"));
350 /* Attach the idmapped mount. */
352 /* Unmount the old, non-idmapped mount we just cloned and idmapped. */
353 umount2(target
, MNT_DETACH
);
355 rc
= move_mount(fd_tree
, "", -1, target
, MOVE_MOUNT_F_EMPTY_PATH
);
357 DBG(HOOK
, ul_debugobj(hs
, " failed to set move mount"));
363 return -MNT_ERR_IDMAP
;
369 * Process X-mount.idmap= mount option
371 static int hook_prepare_options(
372 struct libmnt_context
*cxt
,
373 const struct libmnt_hookset
*hs
,
374 void *data
__attribute__((__unused__
)))
376 struct hook_data
*hd
= NULL
;
377 struct libmnt_optlist
*ol
;
378 struct libmnt_opt
*opt
;
380 const char *value
= NULL
;
381 char *saveptr
= NULL
, *tok
, *buf
= NULL
;
383 ol
= mnt_context_get_optlist(cxt
);
387 opt
= mnt_optlist_get_named(ol
, "X-mount.idmap", cxt
->map_userspace
);
391 value
= mnt_opt_get_value(opt
);
393 value
= skip_blank(value
);
394 if (!value
|| !*value
)
395 return errno
= EINVAL
, -MNT_ERR_MOUNTOPT
;
397 hd
= new_hook_data();
401 /* Has the user given us a path to a user namespace? */
403 hd
->userns_fd
= open_userns(value
);
404 if (hd
->userns_fd
< 0)
414 * This is an explicit ID-mapping list of the form:
415 * [id-type]:id-mount:id-host:id-range [...]
417 * We split the list into separate ID-mapping entries. The individual
418 * ID-mapping entries are separated by ' '.
420 * A long while ago I made the kernel support up to 340 individual
421 * ID-mappings. So users have quite a bit of freedom here.
423 for (tok
= strtok_r(buf
, " ", &saveptr
); tok
;
424 tok
= strtok_r(NULL
, " ", &saveptr
)) {
425 struct id_map
*idmap
;
426 idmap_type_t map_type
;
427 uint32_t nsid
= UINT_MAX
, hostid
= UINT_MAX
, range
= UINT_MAX
;
429 if (startswith(tok
, "b:")) {
430 /* b:id-mount:id-host:id-range */
431 map_type
= ID_TYPE_UIDGID
;
433 } else if (startswith(tok
, "g:")) {
434 /* g:id-mount:id-host:id-range */
435 map_type
= ID_TYPE_GID
;
437 } else if (startswith(tok
, "u:")) {
438 /* u:id-mount:id-host:id-range */
439 map_type
= ID_TYPE_UID
;
443 * id-mount:id-host:id-range
445 * If the user didn't specify it explicitly then they
446 * want this to be both a gid- and uidmap.
448 map_type
= ID_TYPE_UIDGID
;
451 /* id-mount:id-host:id-range */
452 rc
= sscanf(tok
, "%" PRIu32
":%" PRIu32
":%" PRIu32
, &nsid
,
457 idmap
= calloc(1, sizeof(*idmap
));
461 idmap
->map_type
= map_type
;
463 idmap
->hostid
= hostid
;
464 idmap
->range
= range
;
465 INIT_LIST_HEAD(&idmap
->map_head
);
466 list_add_tail(&idmap
->map_head
, &hd
->id_map
);
469 hd
->userns_fd
= get_userns_fd_from_idmap(&hd
->id_map
);
470 if (hd
->userns_fd
< 0)
474 /* define post-mount hook to enter the namespace */
475 DBG(HOOK
, ul_debugobj(hs
, " wanted new user namespace"));
476 cxt
->force_clone
= 1; /* require OPEN_TREE_CLONE */
477 rc
= mnt_context_append_hook(cxt
, hs
,
478 MNT_STAGE_MOUNT_POST
,
479 hd
, hook_mount_post
);
487 DBG(HOOK
, ul_debugobj(hs
, " failed to setup idmap"));
490 return -MNT_ERR_MOUNTOPT
;
494 /* de-initiallize this module */
495 static int hookset_deinit(struct libmnt_context
*cxt
, const struct libmnt_hookset
*hs
)
499 DBG(HOOK
, ul_debugobj(hs
, "deinit '%s'", hs
->name
));
501 /* remove all our hooks and free hook data */
502 while (mnt_context_remove_hook(cxt
, hs
, 0, &data
) == 0) {
504 free_hook_data((struct hook_data
*) data
);
511 const struct libmnt_hookset hookset_idmap
=
515 .firststage
= MNT_STAGE_PREP_OPTIONS
,
516 .firstcall
= hook_prepare_options
,
518 .deinit
= hookset_deinit
521 #endif /* HAVE_MOUNTFD_API && HAVE_LINUX_MOUNT_H */