1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
3 * This file is part of libmount from util-linux project.
5 * Copyright (C) 2022 Karel Zak <kzak@redhat.com>
6 * Copyright (C) 2022 Christian Brauner (Microsoft) <brauner@kernel.org>
8 * libmount is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU Lesser General Public License as published by
10 * the Free Software Foundation; either version 2.1 of the License, or
11 * (at your option) any later version.
14 * This is X-mount.idmap= implementation.
16 * Please, see the comment in libmount/src/hooks.c to understand how hooks work.
19 #include <sys/socket.h>
26 #include "namespace.h"
28 #ifdef HAVE_LINUX_NSFS_H
29 # include <linux/nsfs.h>
32 #ifdef HAVE_MOUNTFD_API
34 typedef enum idmap_type_t
{
35 ID_TYPE_UID
, /* uidmap entry */
36 ID_TYPE_GID
, /* gidmap entry */
37 ID_TYPE_UIDGID
, /* uidmap and gidmap entry */
41 idmap_type_t map_type
;
45 struct list_head map_head
;
50 struct list_head id_map
;
53 static inline struct hook_data
*new_hook_data(void)
55 struct hook_data
*hd
= calloc(1, sizeof(*hd
));
60 INIT_LIST_HEAD(&hd
->id_map
);
65 static inline void free_hook_data(struct hook_data
*hd
)
67 struct list_head
*p
, *pnext
;
73 if (hd
->userns_fd
>= 0) {
78 list_for_each_safe(p
, pnext
, &hd
->id_map
) {
79 idmap
= list_entry(p
, struct id_map
, map_head
);
80 list_del(&idmap
->map_head
);
83 INIT_LIST_HEAD(&hd
->id_map
);
87 static int write_id_mapping(idmap_type_t map_type
, pid_t pid
, const char *buf
,
90 int fd
= -1, rc
= -1, setgroups_fd
= -1;
93 if (geteuid() != 0 && map_type
== ID_TYPE_GID
) {
94 snprintf(path
, sizeof(path
), "/proc/%d/setgroups", pid
);
96 setgroups_fd
= open(path
, O_WRONLY
| O_CLOEXEC
| O_NOCTTY
);
97 if (setgroups_fd
< 0 && errno
!= ENOENT
)
100 if (setgroups_fd
>= 0) {
101 rc
= write_all(setgroups_fd
, "deny\n", strlen("deny\n"));
107 snprintf(path
, sizeof(path
), "/proc/%d/%cid_map", pid
,
108 map_type
== ID_TYPE_UID
? 'u' : 'g');
110 fd
= open(path
, O_WRONLY
| O_CLOEXEC
| O_NOCTTY
);
114 rc
= write_all(fd
, buf
, buf_size
);
119 if (setgroups_fd
>= 0)
125 static int map_ids(struct list_head
*idmap
, pid_t pid
)
130 char mapbuf
[4096] = {};
133 for (idmap_type_t type
= ID_TYPE_UID
; type
<= ID_TYPE_GID
; type
++) {
134 bool had_entry
= false;
137 list_for_each(p
, idmap
) {
138 struct id_map
*map
= list_entry(p
, struct id_map
, map_head
);
141 * If the map type is ID_TYPE_UIDGID we need to include
142 * it in both gid- and uidmap.
144 if (map
->map_type
!= ID_TYPE_UIDGID
&& map
->map_type
!= type
)
149 left
= sizeof(mapbuf
) - (pos
- mapbuf
);
150 fill
= snprintf(pos
, left
,
151 "%" PRIu32
" %" PRIu32
" %" PRIu32
"\n",
152 map
->nsid
, map
->hostid
, map
->range
);
154 * The kernel only takes <= 4k for writes to
155 * /proc/<pid>/{g,u}id_map
158 return errno
= EINVAL
, -1;
165 rc
= write_id_mapping(type
, pid
, mapbuf
, pos
- mapbuf
);
169 memset(mapbuf
, 0, sizeof(mapbuf
));
175 static int wait_for_pid(pid_t pid
)
180 rc
= waitpid(pid
, &status
, 0);
181 } while (rc
< 0 && errno
== EINTR
);
183 if (!WIFEXITED(status
) || WEXITSTATUS(status
) != 0)
189 static int get_userns_fd_from_idmap(struct list_head
*idmap
)
198 rc
= socketpair(PF_LOCAL
, SOCK_STREAM
| SOCK_CLOEXEC
, 0, sock_fds
);
209 rc
= unshare(CLONE_NEWUSER
);
213 /* Let parent know we're ready to have the idmapping written. */
214 rc
= write_all(sock_fds
[0], &c
, 1);
218 /* Hang around until the parent has persisted our namespace. */
219 rc
= read_all(sock_fds
[0], &c
, 1);
230 /* Wait for child to set up a new namespace. */
231 rc
= read_all(sock_fds
[1], &c
, 1);
237 rc
= map_ids(idmap
, pid
);
243 snprintf(path
, sizeof(path
), "/proc/%d/ns/user", pid
);
244 fd_userns
= open(path
, O_RDONLY
| O_CLOEXEC
| O_NOCTTY
);
246 /* Let child know we've persisted its namespace. */
247 (void)write_all(sock_fds
[1], &c
, 1);
250 rc
= wait_for_pid(pid
);
257 if (rc
< 0 && fd_userns
>= 0) {
265 static int open_userns(const char *path
)
270 userns_fd
= open(path
, O_RDONLY
| O_CLOEXEC
| O_NOCTTY
);
274 #if defined(NS_GET_OWNER_UID)
276 * We use NS_GET_OWNER_UID to verify that this is a user namespace.
277 * This is on a best-effort basis. If this isn't a userns then
278 * mount_setattr() will tell us to go away later.
280 if (ioctl(userns_fd
, NS_GET_OWNER_UID
, &(uid_t
){-1}) < 0) {
289 * Create an idmapped mount based on context target, unmounting the
290 * non-idmapped target mount and attaching the detached idmapped mount target.
292 static int hook_mount_post(
293 struct libmnt_context
*cxt
,
294 const struct libmnt_hookset
*hs
,
297 struct hook_data
*hd
= (struct hook_data
*) data
;
298 struct mount_attr attr
= {
299 .attr_set
= MOUNT_ATTR_IDMAP
,
300 .userns_fd
= hd
->userns_fd
302 const int recursive
= mnt_optlist_is_recursive(cxt
->optlist
);
303 const char *target
= mnt_fs_get_target(cxt
->fs
);
305 int rc
, is_private
= 1;
309 assert(hd
->userns_fd
>= 0);
311 DBG(HOOK
, ul_debugobj(hs
, " attaching namespace to %s", target
));
314 * Once a mount has been attached to the filesystem it can't be
315 * idmapped anymore. So create a new detached mount.
317 #ifdef USE_LIBMOUNT_MOUNTFD_SUPPORT
319 struct libmnt_sysapi
*api
= mnt_context_get_sysapi(cxt
);
321 if (api
&& api
->fd_tree
>= 0) {
322 fd_tree
= api
->fd_tree
;
324 DBG(HOOK
, ul_debugobj(hs
, " reuse tree FD"));
329 fd_tree
= open_tree(-1, target
,
330 OPEN_TREE_CLONE
| OPEN_TREE_CLOEXEC
|
331 (recursive
? AT_RECURSIVE
: 0));
333 DBG(HOOK
, ul_debugobj(hs
, " failed to open tree"));
334 return -MNT_ERR_IDMAP
;
337 /* Attach the idmapping to the mount. */
338 rc
= mount_setattr(fd_tree
, "",
339 AT_EMPTY_PATH
| (recursive
? AT_RECURSIVE
: 0),
340 &attr
, sizeof(attr
));
342 DBG(HOOK
, ul_debugobj(hs
, " failed to set attributes"));
346 /* Attach the idmapped mount. */
348 /* Unmount the old, non-idmapped mount we just cloned and idmapped. */
349 umount2(target
, MNT_DETACH
);
351 rc
= move_mount(fd_tree
, "", -1, target
, MOVE_MOUNT_F_EMPTY_PATH
);
353 DBG(HOOK
, ul_debugobj(hs
, " failed to set move mount"));
359 return -MNT_ERR_IDMAP
;
365 * Process X-mount.idmap= mount option
367 static int hook_prepare_options(
368 struct libmnt_context
*cxt
,
369 const struct libmnt_hookset
*hs
,
370 void *data
__attribute__((__unused__
)))
372 struct hook_data
*hd
= NULL
;
373 struct libmnt_optlist
*ol
;
374 struct libmnt_opt
*opt
;
376 const char *value
= NULL
;
377 char *saveptr
= NULL
, *tok
, *buf
= NULL
;
379 ol
= mnt_context_get_optlist(cxt
);
383 opt
= mnt_optlist_get_named(ol
, "X-mount.idmap", cxt
->map_userspace
);
386 value
= mnt_opt_get_value(opt
);
389 return errno
= EINVAL
, -MNT_ERR_MOUNTOPT
;
391 hd
= new_hook_data();
395 /* Has the user given us a path to a user namespace? */
397 hd
->userns_fd
= open_userns(value
);
398 if (hd
->userns_fd
< 0)
408 * This is an explicit ID-mapping list of the form:
409 * [id-type]:id-mount:id-host:id-range [...]
411 * We split the list into separate ID-mapping entries. The individual
412 * ID-mapping entries are separated by ' '.
414 * A long while ago I made the kernel support up to 340 individual
415 * ID-mappings. So users have quite a bit of freedom here.
417 for (tok
= strtok_r(buf
, " ", &saveptr
); tok
;
418 tok
= strtok_r(NULL
, " ", &saveptr
)) {
419 struct id_map
*idmap
;
420 idmap_type_t map_type
;
421 uint32_t nsid
= UINT_MAX
, hostid
= UINT_MAX
, range
= UINT_MAX
;
423 if (startswith(tok
, "b:")) {
424 /* b:id-mount:id-host:id-range */
425 map_type
= ID_TYPE_UIDGID
;
427 } else if (startswith(tok
, "g:")) {
428 /* g:id-mount:id-host:id-range */
429 map_type
= ID_TYPE_GID
;
431 } else if (startswith(tok
, "u:")) {
432 /* u:id-mount:id-host:id-range */
433 map_type
= ID_TYPE_UID
;
437 * id-mount:id-host:id-range
439 * If the user didn't specify it explicitly then they
440 * want this to be both a gid- and uidmap.
442 map_type
= ID_TYPE_UIDGID
;
445 /* id-mount:id-host:id-range */
446 rc
= sscanf(tok
, "%" PRIu32
":%" PRIu32
":%" PRIu32
, &nsid
,
451 idmap
= calloc(1, sizeof(*idmap
));
455 idmap
->map_type
= map_type
;
457 idmap
->hostid
= hostid
;
458 idmap
->range
= range
;
459 INIT_LIST_HEAD(&idmap
->map_head
);
460 list_add_tail(&idmap
->map_head
, &hd
->id_map
);
463 hd
->userns_fd
= get_userns_fd_from_idmap(&hd
->id_map
);
464 if (hd
->userns_fd
< 0)
468 /* define post-mount hook to enter the namespace */
469 DBG(HOOK
, ul_debugobj(hs
, " wanted new user namespace"));
470 cxt
->force_clone
= 1; /* require OPEN_TREE_CLONE */
471 rc
= mnt_context_append_hook(cxt
, hs
,
472 MNT_STAGE_MOUNT_POST
,
473 hd
, hook_mount_post
);
481 DBG(HOOK
, ul_debugobj(hs
, " failed to setup idmap"));
484 return -MNT_ERR_MOUNTOPT
;
488 /* de-initiallize this module */
489 static int hookset_deinit(struct libmnt_context
*cxt
, const struct libmnt_hookset
*hs
)
493 DBG(HOOK
, ul_debugobj(hs
, "deinit '%s'", hs
->name
));
495 /* remove all our hooks and free hook data */
496 while (mnt_context_remove_hook(cxt
, hs
, 0, &data
) == 0) {
498 free_hook_data((struct hook_data
*) data
);
505 const struct libmnt_hookset hookset_idmap
=
509 .firststage
= MNT_STAGE_PREP_OPTIONS
,
510 .firstcall
= hook_prepare_options
,
512 .deinit
= hookset_deinit
515 #endif /* HAVE_MOUNTFD_API */