]>
Commit | Line | Data |
---|---|---|
0bbc62dd CB |
1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
2 | /* | |
3 | * This file is part of libmount from util-linux project. | |
4 | * | |
5 | * Copyright (C) 2022 Karel Zak <kzak@redhat.com> | |
6 | * Copyright (C) 2022 Christian Brauner (Microsoft) <brauner@kernel.org> | |
7 | * | |
8 | * libmount is free software; you can redistribute it and/or modify it | |
9 | * under the terms of the GNU Lesser General Public License as published by | |
10 | * the Free Software Foundation; either version 2.1 of the License, or | |
11 | * (at your option) any later version. | |
12 | * | |
13 | * | |
14 | * This is X-mount.idmap= implementation. | |
7f014eda KZ |
15 | * |
16 | * Please, see the comment in libmount/src/hooks.c to understand how hooks work. | |
0bbc62dd CB |
17 | */ |
18 | #include <stdbool.h> | |
19 | #include <sys/socket.h> | |
20 | #include <sys/wait.h> | |
21 | #include <inttypes.h> | |
22 | ||
23 | #include "mountP.h" | |
24 | #include "strutils.h" | |
25 | #include "all-io.h" | |
91868488 | 26 | #include "namespace.h" |
0bbc62dd CB |
27 | |
28 | #ifdef HAVE_LINUX_NSFS_H | |
29 | # include <linux/nsfs.h> | |
30 | #endif | |
31 | ||
9040c090 | 32 | #ifdef HAVE_MOUNTFD_API |
0bbc62dd CB |
33 | |
34 | typedef enum idmap_type_t { | |
35 | ID_TYPE_UID, /* uidmap entry */ | |
36 | ID_TYPE_GID, /* gidmap entry */ | |
37 | ID_TYPE_UIDGID, /* uidmap and gidmap entry */ | |
38 | } idmap_type_t; | |
39 | ||
40 | struct id_map { | |
41 | idmap_type_t map_type; | |
42 | uint32_t nsid; | |
43 | uint32_t hostid; | |
44 | uint32_t range; | |
45 | struct list_head map_head; | |
46 | }; | |
47 | ||
48 | struct hook_data { | |
49 | int userns_fd; | |
50 | struct list_head id_map; | |
51 | }; | |
52 | ||
53 | static inline struct hook_data *new_hook_data(void) | |
54 | { | |
55 | struct hook_data *hd = calloc(1, sizeof(*hd)); | |
56 | ||
57 | if (!hd) | |
58 | return NULL; | |
59 | ||
60 | INIT_LIST_HEAD(&hd->id_map); | |
61 | hd->userns_fd = -1; | |
62 | return hd; | |
63 | } | |
64 | ||
65 | static inline void free_hook_data(struct hook_data *hd) | |
66 | { | |
67 | struct list_head *p, *pnext; | |
68 | struct id_map *idmap; | |
69 | ||
70 | if (!hd) | |
71 | return; | |
72 | ||
73 | if (hd->userns_fd >= 0) { | |
74 | close(hd->userns_fd); | |
75 | hd->userns_fd = -1; | |
76 | } | |
77 | ||
78 | list_for_each_safe(p, pnext, &hd->id_map) { | |
79 | idmap = list_entry(p, struct id_map, map_head); | |
80 | list_del(&idmap->map_head); | |
81 | free(idmap); | |
82 | } | |
83 | INIT_LIST_HEAD(&hd->id_map); | |
84 | free(hd); | |
85 | } | |
86 | ||
87 | static int write_id_mapping(idmap_type_t map_type, pid_t pid, const char *buf, | |
88 | size_t buf_size) | |
89 | { | |
90 | int fd = -1, rc = -1, setgroups_fd = -1; | |
91 | char path[PATH_MAX]; | |
92 | ||
93 | if (geteuid() != 0 && map_type == ID_TYPE_GID) { | |
94 | snprintf(path, sizeof(path), "/proc/%d/setgroups", pid); | |
95 | ||
96 | setgroups_fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY); | |
97 | if (setgroups_fd < 0 && errno != ENOENT) | |
98 | goto err; | |
99 | ||
100 | if (setgroups_fd >= 0) { | |
101 | rc = write_all(setgroups_fd, "deny\n", strlen("deny\n")); | |
102 | if (rc) | |
103 | goto err; | |
104 | } | |
105 | } | |
106 | ||
107 | snprintf(path, sizeof(path), "/proc/%d/%cid_map", pid, | |
108 | map_type == ID_TYPE_UID ? 'u' : 'g'); | |
109 | ||
110 | fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY); | |
111 | if (fd < 0) | |
112 | goto err; | |
113 | ||
114 | rc = write_all(fd, buf, buf_size); | |
115 | ||
116 | err: | |
117 | if (fd >= 0) | |
118 | close(fd); | |
119 | if (setgroups_fd >= 0) | |
120 | close(setgroups_fd); | |
121 | ||
122 | return rc; | |
123 | } | |
124 | ||
125 | static int map_ids(struct list_head *idmap, pid_t pid) | |
126 | { | |
127 | int fill, left; | |
128 | char *pos; | |
129 | int rc = 0; | |
130 | char mapbuf[4096] = {}; | |
131 | struct list_head *p; | |
132 | ||
133 | for (idmap_type_t type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) { | |
134 | bool had_entry = false; | |
135 | ||
136 | pos = mapbuf; | |
137 | list_for_each(p, idmap) { | |
138 | struct id_map *map = list_entry(p, struct id_map, map_head); | |
139 | ||
140 | /* | |
141 | * If the map type is ID_TYPE_UIDGID we need to include | |
142 | * it in both gid- and uidmap. | |
143 | */ | |
144 | if (map->map_type != ID_TYPE_UIDGID && map->map_type != type) | |
145 | continue; | |
146 | ||
147 | had_entry = true; | |
148 | ||
149 | left = sizeof(mapbuf) - (pos - mapbuf); | |
150 | fill = snprintf(pos, left, | |
151 | "%" PRIu32 " %" PRIu32 " %" PRIu32 "\n", | |
152 | map->nsid, map->hostid, map->range); | |
153 | /* | |
154 | * The kernel only takes <= 4k for writes to | |
155 | * /proc/<pid>/{g,u}id_map | |
156 | */ | |
157 | if (fill <= 0) | |
158 | return errno = EINVAL, -1; | |
159 | ||
160 | pos += fill; | |
161 | } | |
162 | if (!had_entry) | |
163 | continue; | |
164 | ||
165 | rc = write_id_mapping(type, pid, mapbuf, pos - mapbuf); | |
166 | if (rc < 0) | |
167 | return -1; | |
168 | ||
169 | memset(mapbuf, 0, sizeof(mapbuf)); | |
170 | } | |
171 | ||
172 | return 0; | |
173 | } | |
174 | ||
175 | static int wait_for_pid(pid_t pid) | |
176 | { | |
177 | int status, rc; | |
178 | ||
179 | do { | |
180 | rc = waitpid(pid, &status, 0); | |
181 | } while (rc < 0 && errno == EINTR); | |
182 | ||
183 | if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) | |
184 | return -1; | |
185 | ||
186 | return 0; | |
187 | } | |
188 | ||
189 | static int get_userns_fd_from_idmap(struct list_head *idmap) | |
190 | { | |
191 | int fd_userns = -1; | |
192 | ssize_t rc = -1; | |
193 | char c = '1'; | |
194 | pid_t pid; | |
195 | int sock_fds[2]; | |
196 | char path[PATH_MAX]; | |
197 | ||
198 | rc = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, sock_fds); | |
199 | if (rc < 0) | |
200 | return -errno; | |
201 | ||
202 | pid = fork(); | |
203 | if (pid < 0) | |
204 | goto err_close_sock; | |
205 | ||
206 | if (pid == 0) { | |
207 | close(sock_fds[1]); | |
208 | ||
209 | rc = unshare(CLONE_NEWUSER); | |
210 | if (rc < 0) | |
211 | _exit(EXIT_FAILURE); | |
212 | ||
81a73d92 | 213 | /* Let parent know we're ready to have the idmapping written. */ |
0bbc62dd CB |
214 | rc = write_all(sock_fds[0], &c, 1); |
215 | if (rc) | |
216 | _exit(EXIT_FAILURE); | |
217 | ||
81a73d92 CB |
218 | /* Hang around until the parent has persisted our namespace. */ |
219 | rc = read_all(sock_fds[0], &c, 1); | |
220 | if (rc != 1) | |
221 | _exit(EXIT_FAILURE); | |
222 | ||
0bbc62dd CB |
223 | close(sock_fds[0]); |
224 | ||
225 | _exit(EXIT_SUCCESS); | |
226 | } | |
227 | close(sock_fds[0]); | |
228 | sock_fds[0] = -1; | |
229 | ||
81a73d92 | 230 | /* Wait for child to set up a new namespace. */ |
0bbc62dd | 231 | rc = read_all(sock_fds[1], &c, 1); |
648e2d6c CB |
232 | if (rc != 1) { |
233 | kill(pid, SIGKILL); | |
0bbc62dd | 234 | goto err_wait; |
648e2d6c | 235 | } |
0bbc62dd CB |
236 | |
237 | rc = map_ids(idmap, pid); | |
648e2d6c CB |
238 | if (rc < 0) { |
239 | kill(pid, SIGKILL); | |
0bbc62dd | 240 | goto err_wait; |
648e2d6c | 241 | } |
0bbc62dd CB |
242 | |
243 | snprintf(path, sizeof(path), "/proc/%d/ns/user", pid); | |
244 | fd_userns = open(path, O_RDONLY | O_CLOEXEC | O_NOCTTY); | |
245 | ||
81a73d92 | 246 | /* Let child know we've persisted its namespace. */ |
567700df | 247 | (void)write_all(sock_fds[1], &c, 1); |
81a73d92 | 248 | |
0bbc62dd CB |
249 | err_wait: |
250 | rc = wait_for_pid(pid); | |
251 | ||
252 | err_close_sock: | |
253 | if (sock_fds[0] > 0) | |
254 | close(sock_fds[0]); | |
255 | close(sock_fds[1]); | |
256 | ||
257 | if (rc < 0 && fd_userns >= 0) { | |
258 | close(fd_userns); | |
259 | fd_userns = -1; | |
260 | } | |
261 | ||
262 | return fd_userns; | |
263 | } | |
264 | ||
265 | static int open_userns(const char *path) | |
266 | { | |
267 | ||
268 | int userns_fd; | |
269 | ||
270 | userns_fd = open(path, O_RDONLY | O_CLOEXEC | O_NOCTTY); | |
271 | if (userns_fd < 0) | |
272 | return -1; | |
273 | ||
274 | #if defined(NS_GET_OWNER_UID) | |
275 | /* | |
276 | * We use NS_GET_OWNER_UID to verify that this is a user namespace. | |
277 | * This is on a best-effort basis. If this isn't a userns then | |
278 | * mount_setattr() will tell us to go away later. | |
279 | */ | |
280 | if (ioctl(userns_fd, NS_GET_OWNER_UID, &(uid_t){-1}) < 0) { | |
281 | close(userns_fd); | |
282 | return -1; | |
283 | } | |
284 | #endif | |
285 | return userns_fd; | |
286 | } | |
287 | ||
288 | /* | |
289 | * Create an idmapped mount based on context target, unmounting the | |
290 | * non-idmapped target mount and attaching the detached idmapped mount target. | |
291 | */ | |
292 | static int hook_mount_post( | |
293 | struct libmnt_context *cxt, | |
294 | const struct libmnt_hookset *hs, | |
295 | void *data) | |
296 | { | |
297 | struct hook_data *hd = (struct hook_data *) data; | |
298 | struct mount_attr attr = { | |
299 | .attr_set = MOUNT_ATTR_IDMAP, | |
300 | .userns_fd = hd->userns_fd | |
301 | }; | |
4255ebeb | 302 | const int recursive = mnt_optlist_is_recursive(cxt->optlist); |
0bbc62dd CB |
303 | const char *target = mnt_fs_get_target(cxt->fs); |
304 | int fd_tree = -1; | |
8149f7b6 | 305 | int rc, is_private = 1; |
0bbc62dd | 306 | |
0bbc62dd CB |
307 | assert(hd); |
308 | assert(target); | |
309 | assert(hd->userns_fd >= 0); | |
310 | ||
311 | DBG(HOOK, ul_debugobj(hs, " attaching namespace to %s", target)); | |
312 | ||
313 | /* | |
314 | * Once a mount has been attached to the filesystem it can't be | |
315 | * idmapped anymore. So create a new detached mount. | |
316 | */ | |
8149f7b6 KZ |
317 | #ifdef USE_LIBMOUNT_MOUNTFD_SUPPORT |
318 | { | |
319 | struct libmnt_sysapi *api = mnt_context_get_sysapi(cxt); | |
320 | ||
321 | if (api && api->fd_tree >= 0) { | |
322 | fd_tree = api->fd_tree; | |
323 | is_private = 0; | |
324 | DBG(HOOK, ul_debugobj(hs, " reuse tree FD")); | |
325 | } | |
326 | } | |
327 | #endif | |
328 | if (fd_tree < 0) | |
329 | fd_tree = open_tree(-1, target, | |
0bbc62dd CB |
330 | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | |
331 | (recursive ? AT_RECURSIVE : 0)); | |
332 | if (fd_tree < 0) { | |
333 | DBG(HOOK, ul_debugobj(hs, " failed to open tree")); | |
334 | return -MNT_ERR_IDMAP; | |
335 | } | |
336 | ||
337 | /* Attach the idmapping to the mount. */ | |
338 | rc = mount_setattr(fd_tree, "", | |
339 | AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0), | |
340 | &attr, sizeof(attr)); | |
341 | if (rc < 0) { | |
342 | DBG(HOOK, ul_debugobj(hs, " failed to set attributes")); | |
343 | goto done; | |
344 | } | |
0bbc62dd CB |
345 | |
346 | /* Attach the idmapped mount. */ | |
8149f7b6 KZ |
347 | if (is_private) { |
348 | /* Unmount the old, non-idmapped mount we just cloned and idmapped. */ | |
349 | umount2(target, MNT_DETACH); | |
0bbc62dd | 350 | |
8149f7b6 KZ |
351 | rc = move_mount(fd_tree, "", -1, target, MOVE_MOUNT_F_EMPTY_PATH); |
352 | if (rc) | |
353 | DBG(HOOK, ul_debugobj(hs, " failed to set move mount")); | |
354 | } | |
0bbc62dd | 355 | done: |
8149f7b6 KZ |
356 | if (is_private) |
357 | close(fd_tree); | |
0bbc62dd CB |
358 | if (rc < 0) |
359 | return -MNT_ERR_IDMAP; | |
360 | ||
361 | return 0; | |
362 | } | |
363 | ||
364 | /* | |
365 | * Process X-mount.idmap= mount option | |
366 | */ | |
367 | static int hook_prepare_options( | |
368 | struct libmnt_context *cxt, | |
369 | const struct libmnt_hookset *hs, | |
370 | void *data __attribute__((__unused__))) | |
371 | { | |
372 | struct hook_data *hd = NULL; | |
ccf46146 KZ |
373 | struct libmnt_optlist *ol; |
374 | struct libmnt_opt *opt; | |
0bbc62dd | 375 | int rc; |
ccf46146 KZ |
376 | const char *value = NULL; |
377 | char *saveptr = NULL, *tok, *buf = NULL; | |
0bbc62dd | 378 | |
ccf46146 KZ |
379 | ol = mnt_context_get_optlist(cxt); |
380 | if (!ol) | |
0bbc62dd CB |
381 | return 0; |
382 | ||
ccf46146 KZ |
383 | opt = mnt_optlist_get_named(ol, "X-mount.idmap", cxt->map_userspace); |
384 | if (!opt) | |
385 | return 0; | |
386 | value = mnt_opt_get_value(opt); | |
0bbc62dd | 387 | |
ccf46146 | 388 | if (!value) |
0bbc62dd CB |
389 | return errno = EINVAL, -MNT_ERR_MOUNTOPT; |
390 | ||
391 | hd = new_hook_data(); | |
392 | if (!hd) | |
393 | return -ENOMEM; | |
394 | ||
395 | /* Has the user given us a path to a user namespace? */ | |
396 | if (*value == '/') { | |
397 | hd->userns_fd = open_userns(value); | |
398 | if (hd->userns_fd < 0) | |
399 | goto err; | |
400 | goto done; | |
401 | } | |
402 | ||
ccf46146 KZ |
403 | buf = strdup(value); |
404 | if (!buf) | |
ad3330ff | 405 | goto err; |
ccf46146 | 406 | |
0bbc62dd CB |
407 | /* |
408 | * This is an explicit ID-mapping list of the form: | |
409 | * [id-type]:id-mount:id-host:id-range [...] | |
410 | * | |
411 | * We split the list into separate ID-mapping entries. The individual | |
412 | * ID-mapping entries are separated by ' '. | |
413 | * | |
414 | * A long while ago I made the kernel support up to 340 individual | |
415 | * ID-mappings. So users have quite a bit of freedom here. | |
416 | */ | |
ccf46146 | 417 | for (tok = strtok_r(buf, " ", &saveptr); tok; |
0bbc62dd CB |
418 | tok = strtok_r(NULL, " ", &saveptr)) { |
419 | struct id_map *idmap; | |
420 | idmap_type_t map_type; | |
421 | uint32_t nsid = UINT_MAX, hostid = UINT_MAX, range = UINT_MAX; | |
422 | ||
423 | if (startswith(tok, "b:")) { | |
424 | /* b:id-mount:id-host:id-range */ | |
425 | map_type = ID_TYPE_UIDGID; | |
426 | tok += 2; | |
427 | } else if (startswith(tok, "g:")) { | |
428 | /* g:id-mount:id-host:id-range */ | |
429 | map_type = ID_TYPE_GID; | |
430 | tok += 2; | |
431 | } else if (startswith(tok, "u:")) { | |
432 | /* u:id-mount:id-host:id-range */ | |
433 | map_type = ID_TYPE_UID; | |
434 | tok += 2; | |
435 | } else { | |
436 | /* | |
437 | * id-mount:id-host:id-range | |
438 | * | |
439 | * If the user didn't specify it explicitly then they | |
440 | * want this to be both a gid- and uidmap. | |
441 | */ | |
442 | map_type = ID_TYPE_UIDGID; | |
443 | } | |
444 | ||
445 | /* id-mount:id-host:id-range */ | |
446 | rc = sscanf(tok, "%" PRIu32 ":%" PRIu32 ":%" PRIu32, &nsid, | |
447 | &hostid, &range); | |
448 | if (rc != 3) | |
449 | goto err; | |
450 | ||
451 | idmap = calloc(1, sizeof(*idmap)); | |
452 | if (!idmap) | |
453 | goto err; | |
454 | ||
455 | idmap->map_type = map_type; | |
456 | idmap->nsid = nsid; | |
457 | idmap->hostid = hostid; | |
458 | idmap->range = range; | |
459 | INIT_LIST_HEAD(&idmap->map_head); | |
460 | list_add_tail(&idmap->map_head, &hd->id_map); | |
461 | } | |
462 | ||
463 | hd->userns_fd = get_userns_fd_from_idmap(&hd->id_map); | |
464 | if (hd->userns_fd < 0) | |
465 | goto err; | |
466 | ||
467 | done: | |
468 | /* define post-mount hook to enter the namespace */ | |
469 | DBG(HOOK, ul_debugobj(hs, " wanted new user namespace")); | |
8149f7b6 | 470 | cxt->force_clone = 1; /* require OPEN_TREE_CLONE */ |
0bbc62dd CB |
471 | rc = mnt_context_append_hook(cxt, hs, |
472 | MNT_STAGE_MOUNT_POST, | |
473 | hd, hook_mount_post); | |
474 | if (rc < 0) | |
475 | goto err; | |
ad3330ff KZ |
476 | |
477 | free(buf); | |
0bbc62dd CB |
478 | return 0; |
479 | ||
480 | err: | |
481 | DBG(HOOK, ul_debugobj(hs, " failed to setup idmap")); | |
482 | free_hook_data(hd); | |
ccf46146 | 483 | free(buf); |
0bbc62dd CB |
484 | return -MNT_ERR_MOUNTOPT; |
485 | } | |
486 | ||
487 | ||
488 | /* de-initiallize this module */ | |
489 | static int hookset_deinit(struct libmnt_context *cxt, const struct libmnt_hookset *hs) | |
490 | { | |
491 | void *data; | |
492 | ||
493 | DBG(HOOK, ul_debugobj(hs, "deinit '%s'", hs->name)); | |
494 | ||
495 | /* remove all our hooks and free hook data */ | |
496 | while (mnt_context_remove_hook(cxt, hs, 0, &data) == 0) { | |
497 | if (data) | |
498 | free_hook_data((struct hook_data *) data); | |
499 | data = NULL; | |
500 | } | |
501 | ||
502 | return 0; | |
503 | } | |
504 | ||
505 | const struct libmnt_hookset hookset_idmap = | |
506 | { | |
507 | .name = "__idmap", | |
508 | ||
509 | .firststage = MNT_STAGE_PREP_OPTIONS, | |
510 | .firstcall = hook_prepare_options, | |
511 | ||
512 | .deinit = hookset_deinit | |
513 | }; | |
514 | ||
9040c090 | 515 | #endif /* HAVE_MOUNTFD_API */ |