]> git.ipfire.org Git - thirdparty/util-linux.git/blob - libmount/src/hook_idmap.c
Merge branch 'lsns--Q' of https://github.com/masatake/util-linux
[thirdparty/util-linux.git] / libmount / src / hook_idmap.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 /*
3 * This file is part of libmount from util-linux project.
4 *
5 * Copyright (C) 2022 Karel Zak <kzak@redhat.com>
6 * Copyright (C) 2022 Christian Brauner (Microsoft) <brauner@kernel.org>
7 *
8 * libmount is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU Lesser General Public License as published by
10 * the Free Software Foundation; either version 2.1 of the License, or
11 * (at your option) any later version.
12 *
13 *
14 * This is X-mount.idmap= implementation.
15 *
16 * Please, see the comment in libmount/src/hooks.c to understand how hooks work.
17 */
18 #include <stdbool.h>
19 #include <sys/socket.h>
20 #include <sys/wait.h>
21 #include <sys/ioctl.h>
22 #include <sys/mount.h>
23 #include <inttypes.h>
24
25 #include "strutils.h"
26 #include "all-io.h"
27 #include "namespace.h"
28 #include "mount-api-utils.h"
29
30 #include "mountP.h"
31
32 #ifdef HAVE_LINUX_NSFS_H
33 # include <linux/nsfs.h>
34 #endif
35
36 #if defined(HAVE_MOUNTFD_API) && defined(HAVE_LINUX_MOUNT_H)
37
38 typedef enum idmap_type_t {
39 ID_TYPE_UID, /* uidmap entry */
40 ID_TYPE_GID, /* gidmap entry */
41 ID_TYPE_UIDGID, /* uidmap and gidmap entry */
42 } idmap_type_t;
43
44 struct id_map {
45 idmap_type_t map_type;
46 uint32_t nsid;
47 uint32_t hostid;
48 uint32_t range;
49 struct list_head map_head;
50 };
51
52 struct hook_data {
53 int userns_fd;
54 struct list_head id_map;
55 };
56
57 static inline struct hook_data *new_hook_data(void)
58 {
59 struct hook_data *hd = calloc(1, sizeof(*hd));
60
61 if (!hd)
62 return NULL;
63
64 INIT_LIST_HEAD(&hd->id_map);
65 hd->userns_fd = -1;
66 return hd;
67 }
68
69 static inline void free_hook_data(struct hook_data *hd)
70 {
71 struct list_head *p, *pnext;
72 struct id_map *idmap;
73
74 if (!hd)
75 return;
76
77 if (hd->userns_fd >= 0) {
78 close(hd->userns_fd);
79 hd->userns_fd = -1;
80 }
81
82 list_for_each_safe(p, pnext, &hd->id_map) {
83 idmap = list_entry(p, struct id_map, map_head);
84 list_del(&idmap->map_head);
85 free(idmap);
86 }
87 INIT_LIST_HEAD(&hd->id_map);
88 free(hd);
89 }
90
91 static int write_id_mapping(idmap_type_t map_type, pid_t pid, const char *buf,
92 size_t buf_size)
93 {
94 int fd = -1, rc = -1, setgroups_fd = -1;
95 char path[PATH_MAX];
96
97 if (geteuid() != 0 && map_type == ID_TYPE_GID) {
98 snprintf(path, sizeof(path), "/proc/%d/setgroups", pid);
99
100 setgroups_fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY);
101 if (setgroups_fd < 0 && errno != ENOENT)
102 goto err;
103
104 if (setgroups_fd >= 0) {
105 rc = write_all(setgroups_fd, "deny\n", strlen("deny\n"));
106 if (rc)
107 goto err;
108 }
109 }
110
111 snprintf(path, sizeof(path), "/proc/%d/%cid_map", pid,
112 map_type == ID_TYPE_UID ? 'u' : 'g');
113
114 fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY);
115 if (fd < 0)
116 goto err;
117
118 rc = write_all(fd, buf, buf_size);
119
120 err:
121 if (fd >= 0)
122 close(fd);
123 if (setgroups_fd >= 0)
124 close(setgroups_fd);
125
126 return rc;
127 }
128
129 static int map_ids(struct list_head *idmap, pid_t pid)
130 {
131 int fill, left;
132 char *pos;
133 int rc = 0;
134 char mapbuf[4096] = {};
135 struct list_head *p;
136
137 for (idmap_type_t type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
138 bool had_entry = false;
139
140 pos = mapbuf;
141 list_for_each(p, idmap) {
142 struct id_map *map = list_entry(p, struct id_map, map_head);
143
144 /*
145 * If the map type is ID_TYPE_UIDGID we need to include
146 * it in both gid- and uidmap.
147 */
148 if (map->map_type != ID_TYPE_UIDGID && map->map_type != type)
149 continue;
150
151 had_entry = true;
152
153 left = sizeof(mapbuf) - (pos - mapbuf);
154 fill = snprintf(pos, left,
155 "%" PRIu32 " %" PRIu32 " %" PRIu32 "\n",
156 map->nsid, map->hostid, map->range);
157 /*
158 * The kernel only takes <= 4k for writes to
159 * /proc/<pid>/{g,u}id_map
160 */
161 if (fill <= 0)
162 return errno = EINVAL, -1;
163
164 pos += fill;
165 }
166 if (!had_entry)
167 continue;
168
169 rc = write_id_mapping(type, pid, mapbuf, pos - mapbuf);
170 if (rc < 0)
171 return -1;
172
173 memset(mapbuf, 0, sizeof(mapbuf));
174 }
175
176 return 0;
177 }
178
179 static int wait_for_pid(pid_t pid)
180 {
181 int status, rc;
182
183 do {
184 rc = waitpid(pid, &status, 0);
185 } while (rc < 0 && errno == EINTR);
186
187 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
188 return -1;
189
190 return 0;
191 }
192
193 static int get_userns_fd_from_idmap(struct list_head *idmap)
194 {
195 int fd_userns = -1;
196 ssize_t rc = -1;
197 char c = '1';
198 pid_t pid;
199 int sock_fds[2];
200 char path[PATH_MAX];
201
202 rc = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, sock_fds);
203 if (rc < 0)
204 return -errno;
205
206 pid = fork();
207 if (pid < 0)
208 goto err_close_sock;
209
210 if (pid == 0) {
211 close(sock_fds[1]);
212
213 rc = unshare(CLONE_NEWUSER);
214 if (rc < 0)
215 _exit(EXIT_FAILURE);
216
217 /* Let parent know we're ready to have the idmapping written. */
218 rc = write_all(sock_fds[0], &c, 1);
219 if (rc)
220 _exit(EXIT_FAILURE);
221
222 /* Hang around until the parent has persisted our namespace. */
223 rc = read_all(sock_fds[0], &c, 1);
224 if (rc != 1)
225 _exit(EXIT_FAILURE);
226
227 close(sock_fds[0]);
228
229 _exit(EXIT_SUCCESS);
230 }
231 close(sock_fds[0]);
232 sock_fds[0] = -1;
233
234 /* Wait for child to set up a new namespace. */
235 rc = read_all(sock_fds[1], &c, 1);
236 if (rc != 1) {
237 kill(pid, SIGKILL);
238 goto err_wait;
239 }
240
241 rc = map_ids(idmap, pid);
242 if (rc < 0) {
243 kill(pid, SIGKILL);
244 goto err_wait;
245 }
246
247 snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
248 fd_userns = open(path, O_RDONLY | O_CLOEXEC | O_NOCTTY);
249
250 /* Let child know we've persisted its namespace. */
251 (void)write_all(sock_fds[1], &c, 1);
252
253 err_wait:
254 rc = wait_for_pid(pid);
255
256 err_close_sock:
257 if (sock_fds[0] > 0)
258 close(sock_fds[0]);
259 close(sock_fds[1]);
260
261 if (rc < 0 && fd_userns >= 0) {
262 close(fd_userns);
263 fd_userns = -1;
264 }
265
266 return fd_userns;
267 }
268
269 static int open_userns(const char *path)
270 {
271
272 int userns_fd;
273
274 userns_fd = open(path, O_RDONLY | O_CLOEXEC | O_NOCTTY);
275 if (userns_fd < 0)
276 return -1;
277
278 #if defined(NS_GET_OWNER_UID)
279 /*
280 * We use NS_GET_OWNER_UID to verify that this is a user namespace.
281 * This is on a best-effort basis. If this isn't a userns then
282 * mount_setattr() will tell us to go away later.
283 */
284 if (ioctl(userns_fd, NS_GET_OWNER_UID, &(uid_t){-1}) < 0) {
285 close(userns_fd);
286 return -1;
287 }
288 #endif
289 return userns_fd;
290 }
291
292 /*
293 * Create an idmapped mount based on context target, unmounting the
294 * non-idmapped target mount and attaching the detached idmapped mount target.
295 */
296 static int hook_mount_post(
297 struct libmnt_context *cxt,
298 const struct libmnt_hookset *hs,
299 void *data)
300 {
301 struct hook_data *hd = (struct hook_data *) data;
302 struct mount_attr attr = {
303 .attr_set = MOUNT_ATTR_IDMAP,
304 .userns_fd = hd->userns_fd
305 };
306 const int recursive = mnt_optlist_is_recursive(cxt->optlist);
307 const char *target = mnt_fs_get_target(cxt->fs);
308 int fd_tree = -1;
309 int rc, is_private = 1;
310
311 assert(hd);
312 assert(target);
313 assert(hd->userns_fd >= 0);
314
315 DBG(HOOK, ul_debugobj(hs, " attaching namespace to %s", target));
316
317 /*
318 * Once a mount has been attached to the filesystem it can't be
319 * idmapped anymore. So create a new detached mount.
320 */
321 #ifdef USE_LIBMOUNT_MOUNTFD_SUPPORT
322 {
323 struct libmnt_sysapi *api = mnt_context_get_sysapi(cxt);
324
325 if (api && api->fd_tree >= 0) {
326 fd_tree = api->fd_tree;
327 is_private = 0;
328 DBG(HOOK, ul_debugobj(hs, " reuse tree FD"));
329 }
330 }
331 #endif
332 if (fd_tree < 0)
333 fd_tree = open_tree(-1, target,
334 OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC |
335 (recursive ? AT_RECURSIVE : 0));
336 if (fd_tree < 0) {
337 DBG(HOOK, ul_debugobj(hs, " failed to open tree"));
338 return -MNT_ERR_IDMAP;
339 }
340
341 /* Attach the idmapping to the mount. */
342 rc = mount_setattr(fd_tree, "",
343 AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0),
344 &attr, sizeof(attr));
345 if (rc < 0) {
346 DBG(HOOK, ul_debugobj(hs, " failed to set attributes"));
347 goto done;
348 }
349
350 /* Attach the idmapped mount. */
351 if (is_private) {
352 /* Unmount the old, non-idmapped mount we just cloned and idmapped. */
353 umount2(target, MNT_DETACH);
354
355 rc = move_mount(fd_tree, "", -1, target, MOVE_MOUNT_F_EMPTY_PATH);
356 if (rc)
357 DBG(HOOK, ul_debugobj(hs, " failed to set move mount"));
358 }
359 done:
360 if (is_private)
361 close(fd_tree);
362 if (rc < 0)
363 return -MNT_ERR_IDMAP;
364
365 return 0;
366 }
367
368 /*
369 * Process X-mount.idmap= mount option
370 */
371 static int hook_prepare_options(
372 struct libmnt_context *cxt,
373 const struct libmnt_hookset *hs,
374 void *data __attribute__((__unused__)))
375 {
376 struct hook_data *hd = NULL;
377 struct libmnt_optlist *ol;
378 struct libmnt_opt *opt;
379 int rc;
380 const char *value = NULL;
381 char *saveptr = NULL, *tok, *buf = NULL;
382
383 ol = mnt_context_get_optlist(cxt);
384 if (!ol)
385 return 0;
386
387 opt = mnt_optlist_get_named(ol, "X-mount.idmap", cxt->map_userspace);
388 if (!opt)
389 return 0;
390
391 value = mnt_opt_get_value(opt);
392 if (value)
393 value = skip_blank(value);
394 if (!value || !*value)
395 return errno = EINVAL, -MNT_ERR_MOUNTOPT;
396
397 hd = new_hook_data();
398 if (!hd)
399 return -ENOMEM;
400
401 /* Has the user given us a path to a user namespace? */
402 if (*value == '/') {
403 hd->userns_fd = open_userns(value);
404 if (hd->userns_fd < 0)
405 goto err;
406 goto done;
407 }
408
409 buf = strdup(value);
410 if (!buf)
411 goto err;
412
413 /*
414 * This is an explicit ID-mapping list of the form:
415 * [id-type]:id-mount:id-host:id-range [...]
416 *
417 * We split the list into separate ID-mapping entries. The individual
418 * ID-mapping entries are separated by ' '.
419 *
420 * A long while ago I made the kernel support up to 340 individual
421 * ID-mappings. So users have quite a bit of freedom here.
422 */
423 for (tok = strtok_r(buf, " ", &saveptr); tok;
424 tok = strtok_r(NULL, " ", &saveptr)) {
425 struct id_map *idmap;
426 idmap_type_t map_type;
427 uint32_t nsid = UINT_MAX, hostid = UINT_MAX, range = UINT_MAX;
428
429 if (startswith(tok, "b:")) {
430 /* b:id-mount:id-host:id-range */
431 map_type = ID_TYPE_UIDGID;
432 tok += 2;
433 } else if (startswith(tok, "g:")) {
434 /* g:id-mount:id-host:id-range */
435 map_type = ID_TYPE_GID;
436 tok += 2;
437 } else if (startswith(tok, "u:")) {
438 /* u:id-mount:id-host:id-range */
439 map_type = ID_TYPE_UID;
440 tok += 2;
441 } else {
442 /*
443 * id-mount:id-host:id-range
444 *
445 * If the user didn't specify it explicitly then they
446 * want this to be both a gid- and uidmap.
447 */
448 map_type = ID_TYPE_UIDGID;
449 }
450
451 /* id-mount:id-host:id-range */
452 rc = sscanf(tok, "%" PRIu32 ":%" PRIu32 ":%" PRIu32, &nsid,
453 &hostid, &range);
454 if (rc != 3)
455 goto err;
456
457 idmap = calloc(1, sizeof(*idmap));
458 if (!idmap)
459 goto err;
460
461 idmap->map_type = map_type;
462 idmap->nsid = nsid;
463 idmap->hostid = hostid;
464 idmap->range = range;
465 INIT_LIST_HEAD(&idmap->map_head);
466 list_add_tail(&idmap->map_head, &hd->id_map);
467 }
468
469 hd->userns_fd = get_userns_fd_from_idmap(&hd->id_map);
470 if (hd->userns_fd < 0)
471 goto err;
472
473 done:
474 /* define post-mount hook to enter the namespace */
475 DBG(HOOK, ul_debugobj(hs, " wanted new user namespace"));
476 cxt->force_clone = 1; /* require OPEN_TREE_CLONE */
477 rc = mnt_context_append_hook(cxt, hs,
478 MNT_STAGE_MOUNT_POST,
479 hd, hook_mount_post);
480 if (rc < 0)
481 goto err;
482
483 free(buf);
484 return 0;
485
486 err:
487 DBG(HOOK, ul_debugobj(hs, " failed to setup idmap"));
488 free_hook_data(hd);
489 free(buf);
490 return -MNT_ERR_MOUNTOPT;
491 }
492
493
494 /* de-initiallize this module */
495 static int hookset_deinit(struct libmnt_context *cxt, const struct libmnt_hookset *hs)
496 {
497 void *data;
498
499 DBG(HOOK, ul_debugobj(hs, "deinit '%s'", hs->name));
500
501 /* remove all our hooks and free hook data */
502 while (mnt_context_remove_hook(cxt, hs, 0, &data) == 0) {
503 if (data)
504 free_hook_data((struct hook_data *) data);
505 data = NULL;
506 }
507
508 return 0;
509 }
510
511 const struct libmnt_hookset hookset_idmap =
512 {
513 .name = "__idmap",
514
515 .firststage = MNT_STAGE_PREP_OPTIONS,
516 .firstcall = hook_prepare_options,
517
518 .deinit = hookset_deinit
519 };
520
521 #endif /* HAVE_MOUNTFD_API && HAVE_LINUX_MOUNT_H */