]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/mount-util.c
Merge pull request #14329 from anitazha/user_invocation_id
[thirdparty/systemd.git] / src / shared / mount-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <errno.h>
4 #include <stdlib.h>
5 #include <sys/mount.h>
6 #include <sys/stat.h>
7 #include <sys/statvfs.h>
8 #include <unistd.h>
9
10 #include "alloc-util.h"
11 #include "extract-word.h"
12 #include "fd-util.h"
13 #include "fileio.h"
14 #include "fs-util.h"
15 #include "hashmap.h"
16 #include "libmount-util.h"
17 #include "mount-util.h"
18 #include "mountpoint-util.h"
19 #include "parse-util.h"
20 #include "path-util.h"
21 #include "set.h"
22 #include "stdio-util.h"
23 #include "string-util.h"
24 #include "strv.h"
25
26 int umount_recursive(const char *prefix, int flags) {
27 int n = 0, r;
28 bool again;
29
30 /* Try to umount everything recursively below a
31 * directory. Also, take care of stacked mounts, and keep
32 * unmounting them until they are gone. */
33
34 do {
35 _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
36 _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
37
38 again = false;
39
40 r = libmount_parse("/proc/self/mountinfo", NULL, &table, &iter);
41 if (r < 0)
42 return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
43
44 for (;;) {
45 struct libmnt_fs *fs;
46 const char *path;
47
48 r = mnt_table_next_fs(table, iter, &fs);
49 if (r == 1)
50 break;
51 if (r < 0)
52 return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
53
54 path = mnt_fs_get_target(fs);
55 if (!path)
56 continue;
57
58 if (!path_startswith(path, prefix))
59 continue;
60
61 if (umount2(path, flags) < 0) {
62 r = log_debug_errno(errno, "Failed to umount %s: %m", path);
63 continue;
64 }
65
66 log_debug("Successfully unmounted %s", path);
67
68 again = true;
69 n++;
70
71 break;
72 }
73
74 } while (again);
75
76 return n;
77 }
78
79 /* Get the mount flags for the mountpoint at "path" from "table" */
80 static int get_mount_flags(const char *path, unsigned long *flags, struct libmnt_table *table) {
81 struct statvfs buf = {};
82 struct libmnt_fs *fs = NULL;
83 const char *opts = NULL;
84 int r = 0;
85
86 fs = mnt_table_find_target(table, path, MNT_ITER_FORWARD);
87 if (!fs) {
88 log_warning("Could not find '%s' in mount table", path);
89 goto fallback;
90 }
91
92 opts = mnt_fs_get_vfs_options(fs);
93 r = mnt_optstr_get_flags(opts, flags, mnt_get_builtin_optmap(MNT_LINUX_MAP));
94 if (r != 0) {
95 log_warning_errno(r, "Could not get flags for '%s': %m", path);
96 goto fallback;
97 }
98
99 /* relatime is default and trying to set it in an unprivileged container causes EPERM */
100 *flags &= ~MS_RELATIME;
101 return 0;
102
103 fallback:
104 if (statvfs(path, &buf) < 0)
105 return -errno;
106
107 *flags = buf.f_flag;
108 return 0;
109 }
110
111 /* Use this function only if you do not have direct access to /proc/self/mountinfo but the caller can open it
112 * for you. This is the case when /proc is masked or not mounted. Otherwise, use bind_remount_recursive. */
113 int bind_remount_recursive_with_mountinfo(
114 const char *prefix,
115 unsigned long new_flags,
116 unsigned long flags_mask,
117 char **blacklist,
118 FILE *proc_self_mountinfo) {
119
120 _cleanup_set_free_free_ Set *done = NULL;
121 _cleanup_free_ char *cleaned = NULL;
122 int r;
123
124 assert(proc_self_mountinfo);
125
126 /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
127 * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
128 * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
129 * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
130 * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
131 * do not have any effect on future submounts that might get propagated, they migt be writable. This includes
132 * future submounts that have been triggered via autofs.
133 *
134 * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
135 * remount operation. Note that we'll ignore the blacklist for the top-level path. */
136
137 cleaned = strdup(prefix);
138 if (!cleaned)
139 return -ENOMEM;
140
141 path_simplify(cleaned, false);
142
143 done = set_new(&path_hash_ops);
144 if (!done)
145 return -ENOMEM;
146
147 for (;;) {
148 _cleanup_set_free_free_ Set *todo = NULL;
149 _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
150 _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
151 bool top_autofs = false;
152 char *x;
153 unsigned long orig_flags;
154
155 todo = set_new(&path_hash_ops);
156 if (!todo)
157 return -ENOMEM;
158
159 rewind(proc_self_mountinfo);
160
161 r = libmount_parse("/proc/self/mountinfo", proc_self_mountinfo, &table, &iter);
162 if (r < 0)
163 return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
164
165 for (;;) {
166 struct libmnt_fs *fs;
167 const char *path, *type;
168
169 r = mnt_table_next_fs(table, iter, &fs);
170 if (r == 1)
171 break;
172 if (r < 0)
173 return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
174
175 path = mnt_fs_get_target(fs);
176 type = mnt_fs_get_fstype(fs);
177 if (!path || !type)
178 continue;
179
180 if (!path_startswith(path, cleaned))
181 continue;
182
183 /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount
184 * we shall operate on. */
185 if (!path_equal(path, cleaned)) {
186 bool blacklisted = false;
187 char **i;
188
189 STRV_FOREACH(i, blacklist) {
190 if (path_equal(*i, cleaned))
191 continue;
192
193 if (!path_startswith(*i, cleaned))
194 continue;
195
196 if (path_startswith(path, *i)) {
197 blacklisted = true;
198 log_debug("Not remounting %s blacklisted by %s, called for %s",
199 path, *i, cleaned);
200 break;
201 }
202 }
203 if (blacklisted)
204 continue;
205 }
206
207 /* Let's ignore autofs mounts. If they aren't
208 * triggered yet, we want to avoid triggering
209 * them, as we don't make any guarantees for
210 * future submounts anyway. If they are
211 * already triggered, then we will find
212 * another entry for this. */
213 if (streq(type, "autofs")) {
214 top_autofs = top_autofs || path_equal(path, cleaned);
215 continue;
216 }
217
218 if (!set_contains(done, path)) {
219 r = set_put_strdup(todo, path);
220 if (r < 0)
221 return r;
222 }
223 }
224
225 /* If we have no submounts to process anymore and if
226 * the root is either already done, or an autofs, we
227 * are done */
228 if (set_isempty(todo) &&
229 (top_autofs || set_contains(done, cleaned)))
230 return 0;
231
232 if (!set_contains(done, cleaned) &&
233 !set_contains(todo, cleaned)) {
234 /* The prefix directory itself is not yet a mount, make it one. */
235 if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
236 return -errno;
237
238 orig_flags = 0;
239 (void) get_mount_flags(cleaned, &orig_flags, table);
240 orig_flags &= ~MS_RDONLY;
241
242 if (mount(NULL, cleaned, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL) < 0)
243 return -errno;
244
245 log_debug("Made top-level directory %s a mount point.", prefix);
246
247 r = set_put_strdup(done, cleaned);
248 if (r < 0)
249 return r;
250 }
251
252 while ((x = set_steal_first(todo))) {
253
254 r = set_consume(done, x);
255 if (IN_SET(r, 0, -EEXIST))
256 continue;
257 if (r < 0)
258 return r;
259
260 /* Deal with mount points that are obstructed by a later mount */
261 r = path_is_mount_point(x, NULL, 0);
262 if (IN_SET(r, 0, -ENOENT))
263 continue;
264 if (IN_SET(r, -EACCES, -EPERM)) {
265 /* Even if root user invoke this, submounts under private FUSE or NFS mount points
266 * may not be acceessed. E.g.,
267 *
268 * $ bindfs --no-allow-other ~/mnt/mnt ~/mnt/mnt
269 * $ bindfs --no-allow-other ~/mnt ~/mnt
270 *
271 * Then, root user cannot access the mount point ~/mnt/mnt.
272 * In such cases, the submounts are ignored, as we have no way to manage them. */
273 log_debug_errno(r, "Failed to determine '%s' is mount point or not, ignoring: %m", x);
274 continue;
275 }
276 if (r < 0)
277 return r;
278
279 /* Try to reuse the original flag set */
280 orig_flags = 0;
281 (void) get_mount_flags(x, &orig_flags, table);
282 orig_flags &= ~MS_RDONLY;
283
284 if (mount(NULL, x, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL) < 0)
285 return -errno;
286
287 log_debug("Remounted %s read-only.", x);
288 }
289 }
290 }
291
292 int bind_remount_recursive(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **blacklist) {
293 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
294 int r;
295
296 r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
297 if (r < 0)
298 return r;
299
300 return bind_remount_recursive_with_mountinfo(prefix, new_flags, flags_mask, blacklist, proc_self_mountinfo);
301 }
302
303 int mount_move_root(const char *path) {
304 assert(path);
305
306 if (chdir(path) < 0)
307 return -errno;
308
309 if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
310 return -errno;
311
312 if (chroot(".") < 0)
313 return -errno;
314
315 if (chdir("/") < 0)
316 return -errno;
317
318 return 0;
319 }
320
321 int repeat_unmount(const char *path, int flags) {
322 bool done = false;
323
324 assert(path);
325
326 /* If there are multiple mounts on a mount point, this
327 * removes them all */
328
329 for (;;) {
330 if (umount2(path, flags) < 0) {
331
332 if (errno == EINVAL)
333 return done;
334
335 return -errno;
336 }
337
338 done = true;
339 }
340 }
341
342 int mode_to_inaccessible_node(const char *runtime_dir, mode_t mode, char **dest) {
343 /* This function maps a node type to a corresponding inaccessible file node. These nodes are created during
344 * early boot by PID 1. In some cases we lacked the privs to create the character and block devices (maybe
345 * because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a devices policy that excludes
346 * device nodes with major and minor of 0), but that's fine, in that case we use an AF_UNIX file node instead,
347 * which is not the same, but close enough for most uses. And most importantly, the kernel allows bind mounts
348 * from socket nodes to any non-directory file nodes, and that's the most important thing that matters. */
349 _cleanup_free_ char *d = NULL;
350 const char *node = NULL;
351 char *tmp;
352
353 assert(dest);
354
355 switch(mode & S_IFMT) {
356 case S_IFREG:
357 node = "/inaccessible/reg";
358 break;
359
360 case S_IFDIR:
361 node = "/inaccessible/dir";
362 break;
363
364 case S_IFCHR:
365 d = path_join(runtime_dir, "/inaccessible/chr");
366 if (!d)
367 return log_oom();
368
369 if (access(d, F_OK) == 0) {
370 *dest = TAKE_PTR(d);
371 return 0;
372 }
373
374 node = "/inaccessible/sock";
375 break;
376
377 case S_IFBLK:
378 d = path_join(runtime_dir, "/inaccessible/blk");
379 if (!d)
380 return log_oom();
381
382 if (access(d, F_OK) == 0) {
383 *dest = TAKE_PTR(d);
384 return 0;
385 }
386
387 node = "/inaccessible/sock";
388 break;
389
390 case S_IFIFO:
391 node = "/inaccessible/fifo";
392 break;
393
394 case S_IFSOCK:
395 node = "/inaccessible/sock";
396 break;
397 }
398
399 if (!node)
400 return -EINVAL;
401
402 tmp = path_join(runtime_dir, node);
403 if (!tmp)
404 return log_oom();
405
406 *dest = tmp;
407 return 0;
408 }
409
410 #define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
411 static char* mount_flags_to_string(long unsigned flags) {
412 char *x;
413 _cleanup_free_ char *y = NULL;
414 long unsigned overflow;
415
416 overflow = flags & ~(MS_RDONLY |
417 MS_NOSUID |
418 MS_NODEV |
419 MS_NOEXEC |
420 MS_SYNCHRONOUS |
421 MS_REMOUNT |
422 MS_MANDLOCK |
423 MS_DIRSYNC |
424 MS_NOATIME |
425 MS_NODIRATIME |
426 MS_BIND |
427 MS_MOVE |
428 MS_REC |
429 MS_SILENT |
430 MS_POSIXACL |
431 MS_UNBINDABLE |
432 MS_PRIVATE |
433 MS_SLAVE |
434 MS_SHARED |
435 MS_RELATIME |
436 MS_KERNMOUNT |
437 MS_I_VERSION |
438 MS_STRICTATIME |
439 MS_LAZYTIME);
440
441 if (flags == 0 || overflow != 0)
442 if (asprintf(&y, "%lx", overflow) < 0)
443 return NULL;
444
445 x = strjoin(FLAG(MS_RDONLY),
446 FLAG(MS_NOSUID),
447 FLAG(MS_NODEV),
448 FLAG(MS_NOEXEC),
449 FLAG(MS_SYNCHRONOUS),
450 FLAG(MS_REMOUNT),
451 FLAG(MS_MANDLOCK),
452 FLAG(MS_DIRSYNC),
453 FLAG(MS_NOATIME),
454 FLAG(MS_NODIRATIME),
455 FLAG(MS_BIND),
456 FLAG(MS_MOVE),
457 FLAG(MS_REC),
458 FLAG(MS_SILENT),
459 FLAG(MS_POSIXACL),
460 FLAG(MS_UNBINDABLE),
461 FLAG(MS_PRIVATE),
462 FLAG(MS_SLAVE),
463 FLAG(MS_SHARED),
464 FLAG(MS_RELATIME),
465 FLAG(MS_KERNMOUNT),
466 FLAG(MS_I_VERSION),
467 FLAG(MS_STRICTATIME),
468 FLAG(MS_LAZYTIME),
469 y);
470 if (!x)
471 return NULL;
472 if (!y)
473 x[strlen(x) - 1] = '\0'; /* truncate the last | */
474 return x;
475 }
476
477 int mount_verbose(
478 int error_log_level,
479 const char *what,
480 const char *where,
481 const char *type,
482 unsigned long flags,
483 const char *options) {
484
485 _cleanup_free_ char *fl = NULL, *o = NULL;
486 unsigned long f;
487 int r;
488
489 r = mount_option_mangle(options, flags, &f, &o);
490 if (r < 0)
491 return log_full_errno(error_log_level, r,
492 "Failed to mangle mount options %s: %m",
493 strempty(options));
494
495 fl = mount_flags_to_string(f);
496
497 if ((f & MS_REMOUNT) && !what && !type)
498 log_debug("Remounting %s (%s \"%s\")...",
499 where, strnull(fl), strempty(o));
500 else if (!what && !type)
501 log_debug("Mounting %s (%s \"%s\")...",
502 where, strnull(fl), strempty(o));
503 else if ((f & MS_BIND) && !type)
504 log_debug("Bind-mounting %s on %s (%s \"%s\")...",
505 what, where, strnull(fl), strempty(o));
506 else if (f & MS_MOVE)
507 log_debug("Moving mount %s → %s (%s \"%s\")...",
508 what, where, strnull(fl), strempty(o));
509 else
510 log_debug("Mounting %s on %s (%s \"%s\")...",
511 strna(type), where, strnull(fl), strempty(o));
512 if (mount(what, where, type, f, o) < 0)
513 return log_full_errno(error_log_level, errno,
514 "Failed to mount %s (type %s) on %s (%s \"%s\"): %m",
515 strna(what), strna(type), where, strnull(fl), strempty(o));
516 return 0;
517 }
518
519 int umount_verbose(const char *what) {
520 log_debug("Umounting %s...", what);
521 if (umount(what) < 0)
522 return log_error_errno(errno, "Failed to unmount %s: %m", what);
523 return 0;
524 }
525
526 int mount_option_mangle(
527 const char *options,
528 unsigned long mount_flags,
529 unsigned long *ret_mount_flags,
530 char **ret_remaining_options) {
531
532 const struct libmnt_optmap *map;
533 _cleanup_free_ char *ret = NULL;
534 const char *p;
535 int r;
536
537 /* This extracts mount flags from the mount options, and store
538 * non-mount-flag options to '*ret_remaining_options'.
539 * E.g.,
540 * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000"
541 * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
542 * "size=1630748k,mode=700,uid=1000,gid=1000".
543 * See more examples in test-mount-utils.c.
544 *
545 * Note that if 'options' does not contain any non-mount-flag options,
546 * then '*ret_remaining_options' is set to NULL instead of empty string.
547 * Note that this does not check validity of options stored in
548 * '*ret_remaining_options'.
549 * Note that if 'options' is NULL, then this just copies 'mount_flags'
550 * to '*ret_mount_flags'. */
551
552 assert(ret_mount_flags);
553 assert(ret_remaining_options);
554
555 map = mnt_get_builtin_optmap(MNT_LINUX_MAP);
556 if (!map)
557 return -EINVAL;
558
559 p = options;
560 for (;;) {
561 _cleanup_free_ char *word = NULL;
562 const struct libmnt_optmap *ent;
563
564 r = extract_first_word(&p, &word, ",", EXTRACT_UNQUOTE);
565 if (r < 0)
566 return r;
567 if (r == 0)
568 break;
569
570 for (ent = map; ent->name; ent++) {
571 /* All entries in MNT_LINUX_MAP do not take any argument.
572 * Thus, ent->name does not contain "=" or "[=]". */
573 if (!streq(word, ent->name))
574 continue;
575
576 if (!(ent->mask & MNT_INVERT))
577 mount_flags |= ent->id;
578 else if (mount_flags & ent->id)
579 mount_flags ^= ent->id;
580
581 break;
582 }
583
584 /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
585 if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL))
586 return -ENOMEM;
587 }
588
589 *ret_mount_flags = mount_flags;
590 *ret_remaining_options = TAKE_PTR(ret);
591
592 return 0;
593 }