]> git.ipfire.org Git - thirdparty/util-linux.git/blob - libmount/src/hook_idmap.c
libmount: (idmap) reuse tree FD, fix umount
[thirdparty/util-linux.git] / libmount / src / hook_idmap.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 /*
3 * This file is part of libmount from util-linux project.
4 *
5 * Copyright (C) 2022 Karel Zak <kzak@redhat.com>
6 * Copyright (C) 2022 Christian Brauner (Microsoft) <brauner@kernel.org>
7 *
8 * libmount is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU Lesser General Public License as published by
10 * the Free Software Foundation; either version 2.1 of the License, or
11 * (at your option) any later version.
12 *
13 *
14 * This is X-mount.idmap= implementation.
15 *
16 * Please, see the comment in libmount/src/hooks.c to understand how hooks work.
17 */
18 #include <stdbool.h>
19 #include <sys/socket.h>
20 #include <sys/wait.h>
21 #include <inttypes.h>
22
23 #include "mountP.h"
24 #include "strutils.h"
25 #include "all-io.h"
26 #include "namespace.h"
27
28 #ifdef HAVE_LINUX_NSFS_H
29 # include <linux/nsfs.h>
30 #endif
31
32 #ifdef HAVE_MOUNTFD_API
33
34 typedef enum idmap_type_t {
35 ID_TYPE_UID, /* uidmap entry */
36 ID_TYPE_GID, /* gidmap entry */
37 ID_TYPE_UIDGID, /* uidmap and gidmap entry */
38 } idmap_type_t;
39
40 struct id_map {
41 idmap_type_t map_type;
42 uint32_t nsid;
43 uint32_t hostid;
44 uint32_t range;
45 struct list_head map_head;
46 };
47
48 struct hook_data {
49 int userns_fd;
50 struct list_head id_map;
51 };
52
53 static inline struct hook_data *new_hook_data(void)
54 {
55 struct hook_data *hd = calloc(1, sizeof(*hd));
56
57 if (!hd)
58 return NULL;
59
60 INIT_LIST_HEAD(&hd->id_map);
61 hd->userns_fd = -1;
62 return hd;
63 }
64
65 static inline void free_hook_data(struct hook_data *hd)
66 {
67 struct list_head *p, *pnext;
68 struct id_map *idmap;
69
70 if (!hd)
71 return;
72
73 if (hd->userns_fd >= 0) {
74 close(hd->userns_fd);
75 hd->userns_fd = -1;
76 }
77
78 list_for_each_safe(p, pnext, &hd->id_map) {
79 idmap = list_entry(p, struct id_map, map_head);
80 list_del(&idmap->map_head);
81 free(idmap);
82 }
83 INIT_LIST_HEAD(&hd->id_map);
84 free(hd);
85 }
86
87 static int write_id_mapping(idmap_type_t map_type, pid_t pid, const char *buf,
88 size_t buf_size)
89 {
90 int fd = -1, rc = -1, setgroups_fd = -1;
91 char path[PATH_MAX];
92
93 if (geteuid() != 0 && map_type == ID_TYPE_GID) {
94 snprintf(path, sizeof(path), "/proc/%d/setgroups", pid);
95
96 setgroups_fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY);
97 if (setgroups_fd < 0 && errno != ENOENT)
98 goto err;
99
100 if (setgroups_fd >= 0) {
101 rc = write_all(setgroups_fd, "deny\n", strlen("deny\n"));
102 if (rc)
103 goto err;
104 }
105 }
106
107 snprintf(path, sizeof(path), "/proc/%d/%cid_map", pid,
108 map_type == ID_TYPE_UID ? 'u' : 'g');
109
110 fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY);
111 if (fd < 0)
112 goto err;
113
114 rc = write_all(fd, buf, buf_size);
115
116 err:
117 if (fd >= 0)
118 close(fd);
119 if (setgroups_fd >= 0)
120 close(setgroups_fd);
121
122 return rc;
123 }
124
125 static int map_ids(struct list_head *idmap, pid_t pid)
126 {
127 int fill, left;
128 char *pos;
129 int rc = 0;
130 char mapbuf[4096] = {};
131 struct list_head *p;
132
133 for (idmap_type_t type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
134 bool had_entry = false;
135
136 pos = mapbuf;
137 list_for_each(p, idmap) {
138 struct id_map *map = list_entry(p, struct id_map, map_head);
139
140 /*
141 * If the map type is ID_TYPE_UIDGID we need to include
142 * it in both gid- and uidmap.
143 */
144 if (map->map_type != ID_TYPE_UIDGID && map->map_type != type)
145 continue;
146
147 had_entry = true;
148
149 left = sizeof(mapbuf) - (pos - mapbuf);
150 fill = snprintf(pos, left,
151 "%" PRIu32 " %" PRIu32 " %" PRIu32 "\n",
152 map->nsid, map->hostid, map->range);
153 /*
154 * The kernel only takes <= 4k for writes to
155 * /proc/<pid>/{g,u}id_map
156 */
157 if (fill <= 0)
158 return errno = EINVAL, -1;
159
160 pos += fill;
161 }
162 if (!had_entry)
163 continue;
164
165 rc = write_id_mapping(type, pid, mapbuf, pos - mapbuf);
166 if (rc < 0)
167 return -1;
168
169 memset(mapbuf, 0, sizeof(mapbuf));
170 }
171
172 return 0;
173 }
174
175 static int wait_for_pid(pid_t pid)
176 {
177 int status, rc;
178
179 do {
180 rc = waitpid(pid, &status, 0);
181 } while (rc < 0 && errno == EINTR);
182
183 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
184 return -1;
185
186 return 0;
187 }
188
189 static int get_userns_fd_from_idmap(struct list_head *idmap)
190 {
191 int fd_userns = -1;
192 ssize_t rc = -1;
193 char c = '1';
194 pid_t pid;
195 int sock_fds[2];
196 char path[PATH_MAX];
197
198 rc = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, sock_fds);
199 if (rc < 0)
200 return -errno;
201
202 pid = fork();
203 if (pid < 0)
204 goto err_close_sock;
205
206 if (pid == 0) {
207 close(sock_fds[1]);
208
209 rc = unshare(CLONE_NEWUSER);
210 if (rc < 0)
211 _exit(EXIT_FAILURE);
212
213 /* Let parent know we're ready to have the idmapping written. */
214 rc = write_all(sock_fds[0], &c, 1);
215 if (rc)
216 _exit(EXIT_FAILURE);
217
218 /* Hang around until the parent has persisted our namespace. */
219 rc = read_all(sock_fds[0], &c, 1);
220 if (rc != 1)
221 _exit(EXIT_FAILURE);
222
223 close(sock_fds[0]);
224
225 _exit(EXIT_SUCCESS);
226 }
227 close(sock_fds[0]);
228 sock_fds[0] = -1;
229
230 /* Wait for child to set up a new namespace. */
231 rc = read_all(sock_fds[1], &c, 1);
232 if (rc != 1) {
233 kill(pid, SIGKILL);
234 goto err_wait;
235 }
236
237 rc = map_ids(idmap, pid);
238 if (rc < 0) {
239 kill(pid, SIGKILL);
240 goto err_wait;
241 }
242
243 snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
244 fd_userns = open(path, O_RDONLY | O_CLOEXEC | O_NOCTTY);
245
246 /* Let child know we've persisted its namespace. */
247 (void)write_all(sock_fds[1], &c, 1);
248
249 err_wait:
250 rc = wait_for_pid(pid);
251
252 err_close_sock:
253 if (sock_fds[0] > 0)
254 close(sock_fds[0]);
255 close(sock_fds[1]);
256
257 if (rc < 0 && fd_userns >= 0) {
258 close(fd_userns);
259 fd_userns = -1;
260 }
261
262 return fd_userns;
263 }
264
265 static int open_userns(const char *path)
266 {
267
268 int userns_fd;
269
270 userns_fd = open(path, O_RDONLY | O_CLOEXEC | O_NOCTTY);
271 if (userns_fd < 0)
272 return -1;
273
274 #if defined(NS_GET_OWNER_UID)
275 /*
276 * We use NS_GET_OWNER_UID to verify that this is a user namespace.
277 * This is on a best-effort basis. If this isn't a userns then
278 * mount_setattr() will tell us to go away later.
279 */
280 if (ioctl(userns_fd, NS_GET_OWNER_UID, &(uid_t){-1}) < 0) {
281 close(userns_fd);
282 return -1;
283 }
284 #endif
285 return userns_fd;
286 }
287
288 /*
289 * Create an idmapped mount based on context target, unmounting the
290 * non-idmapped target mount and attaching the detached idmapped mount target.
291 */
292 static int hook_mount_post(
293 struct libmnt_context *cxt,
294 const struct libmnt_hookset *hs,
295 void *data)
296 {
297 struct hook_data *hd = (struct hook_data *) data;
298 struct mount_attr attr = {
299 .attr_set = MOUNT_ATTR_IDMAP,
300 .userns_fd = hd->userns_fd
301 };
302 const int recursive = mnt_optlist_is_recursive(cxt->optlist);
303 const char *target = mnt_fs_get_target(cxt->fs);
304 int fd_tree = -1;
305 int rc, is_private = 1;
306
307 assert(hd);
308 assert(target);
309 assert(hd->userns_fd >= 0);
310
311 DBG(HOOK, ul_debugobj(hs, " attaching namespace to %s", target));
312
313 /*
314 * Once a mount has been attached to the filesystem it can't be
315 * idmapped anymore. So create a new detached mount.
316 */
317 #ifdef USE_LIBMOUNT_MOUNTFD_SUPPORT
318 {
319 struct libmnt_sysapi *api = mnt_context_get_sysapi(cxt);
320
321 if (api && api->fd_tree >= 0) {
322 fd_tree = api->fd_tree;
323 is_private = 0;
324 DBG(HOOK, ul_debugobj(hs, " reuse tree FD"));
325 }
326 }
327 #endif
328 if (fd_tree < 0)
329 fd_tree = open_tree(-1, target,
330 OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC |
331 (recursive ? AT_RECURSIVE : 0));
332 if (fd_tree < 0) {
333 DBG(HOOK, ul_debugobj(hs, " failed to open tree"));
334 return -MNT_ERR_IDMAP;
335 }
336
337 /* Attach the idmapping to the mount. */
338 rc = mount_setattr(fd_tree, "",
339 AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0),
340 &attr, sizeof(attr));
341 if (rc < 0) {
342 DBG(HOOK, ul_debugobj(hs, " failed to set attributes"));
343 goto done;
344 }
345
346 /* Attach the idmapped mount. */
347 if (is_private) {
348 /* Unmount the old, non-idmapped mount we just cloned and idmapped. */
349 umount2(target, MNT_DETACH);
350
351 rc = move_mount(fd_tree, "", -1, target, MOVE_MOUNT_F_EMPTY_PATH);
352 if (rc)
353 DBG(HOOK, ul_debugobj(hs, " failed to set move mount"));
354 }
355 done:
356 if (is_private)
357 close(fd_tree);
358 if (rc < 0)
359 return -MNT_ERR_IDMAP;
360
361 return 0;
362 }
363
364 /*
365 * Process X-mount.idmap= mount option
366 */
367 static int hook_prepare_options(
368 struct libmnt_context *cxt,
369 const struct libmnt_hookset *hs,
370 void *data __attribute__((__unused__)))
371 {
372 struct hook_data *hd = NULL;
373 struct libmnt_optlist *ol;
374 struct libmnt_opt *opt;
375 int rc;
376 const char *value = NULL;
377 char *saveptr = NULL, *tok, *buf = NULL;
378
379 ol = mnt_context_get_optlist(cxt);
380 if (!ol)
381 return 0;
382
383 opt = mnt_optlist_get_named(ol, "X-mount.idmap", cxt->map_userspace);
384 if (!opt)
385 return 0;
386 value = mnt_opt_get_value(opt);
387
388 if (!value)
389 return errno = EINVAL, -MNT_ERR_MOUNTOPT;
390
391 hd = new_hook_data();
392 if (!hd)
393 return -ENOMEM;
394
395 /* Has the user given us a path to a user namespace? */
396 if (*value == '/') {
397 hd->userns_fd = open_userns(value);
398 if (hd->userns_fd < 0)
399 goto err;
400 goto done;
401 }
402
403 buf = strdup(value);
404 if (!buf)
405 goto err;
406
407 /*
408 * This is an explicit ID-mapping list of the form:
409 * [id-type]:id-mount:id-host:id-range [...]
410 *
411 * We split the list into separate ID-mapping entries. The individual
412 * ID-mapping entries are separated by ' '.
413 *
414 * A long while ago I made the kernel support up to 340 individual
415 * ID-mappings. So users have quite a bit of freedom here.
416 */
417 for (tok = strtok_r(buf, " ", &saveptr); tok;
418 tok = strtok_r(NULL, " ", &saveptr)) {
419 struct id_map *idmap;
420 idmap_type_t map_type;
421 uint32_t nsid = UINT_MAX, hostid = UINT_MAX, range = UINT_MAX;
422
423 if (startswith(tok, "b:")) {
424 /* b:id-mount:id-host:id-range */
425 map_type = ID_TYPE_UIDGID;
426 tok += 2;
427 } else if (startswith(tok, "g:")) {
428 /* g:id-mount:id-host:id-range */
429 map_type = ID_TYPE_GID;
430 tok += 2;
431 } else if (startswith(tok, "u:")) {
432 /* u:id-mount:id-host:id-range */
433 map_type = ID_TYPE_UID;
434 tok += 2;
435 } else {
436 /*
437 * id-mount:id-host:id-range
438 *
439 * If the user didn't specify it explicitly then they
440 * want this to be both a gid- and uidmap.
441 */
442 map_type = ID_TYPE_UIDGID;
443 }
444
445 /* id-mount:id-host:id-range */
446 rc = sscanf(tok, "%" PRIu32 ":%" PRIu32 ":%" PRIu32, &nsid,
447 &hostid, &range);
448 if (rc != 3)
449 goto err;
450
451 idmap = calloc(1, sizeof(*idmap));
452 if (!idmap)
453 goto err;
454
455 idmap->map_type = map_type;
456 idmap->nsid = nsid;
457 idmap->hostid = hostid;
458 idmap->range = range;
459 INIT_LIST_HEAD(&idmap->map_head);
460 list_add_tail(&idmap->map_head, &hd->id_map);
461 }
462
463 hd->userns_fd = get_userns_fd_from_idmap(&hd->id_map);
464 if (hd->userns_fd < 0)
465 goto err;
466
467 done:
468 /* define post-mount hook to enter the namespace */
469 DBG(HOOK, ul_debugobj(hs, " wanted new user namespace"));
470 cxt->force_clone = 1; /* require OPEN_TREE_CLONE */
471 rc = mnt_context_append_hook(cxt, hs,
472 MNT_STAGE_MOUNT_POST,
473 hd, hook_mount_post);
474 if (rc < 0)
475 goto err;
476
477 free(buf);
478 return 0;
479
480 err:
481 DBG(HOOK, ul_debugobj(hs, " failed to setup idmap"));
482 free_hook_data(hd);
483 free(buf);
484 return -MNT_ERR_MOUNTOPT;
485 }
486
487
488 /* de-initiallize this module */
489 static int hookset_deinit(struct libmnt_context *cxt, const struct libmnt_hookset *hs)
490 {
491 void *data;
492
493 DBG(HOOK, ul_debugobj(hs, "deinit '%s'", hs->name));
494
495 /* remove all our hooks and free hook data */
496 while (mnt_context_remove_hook(cxt, hs, 0, &data) == 0) {
497 if (data)
498 free_hook_data((struct hook_data *) data);
499 data = NULL;
500 }
501
502 return 0;
503 }
504
505 const struct libmnt_hookset hookset_idmap =
506 {
507 .name = "__idmap",
508
509 .firststage = MNT_STAGE_PREP_OPTIONS,
510 .firstcall = hook_prepare_options,
511
512 .deinit = hookset_deinit
513 };
514
515 #endif /* HAVE_MOUNTFD_API */