]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/mount-util.c
f3ee656c0f179374262638f06882f9770132c1f2
[thirdparty/systemd.git] / src / shared / mount-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <errno.h>
4 #include <stdlib.h>
5 #include <sys/mount.h>
6 #include <sys/stat.h>
7 #include <sys/statvfs.h>
8 #include <unistd.h>
9
10 #include "alloc-util.h"
11 #include "extract-word.h"
12 #include "fd-util.h"
13 #include "fileio.h"
14 #include "fs-util.h"
15 #include "hashmap.h"
16 #include "libmount-util.h"
17 #include "mount-util.h"
18 #include "mountpoint-util.h"
19 #include "parse-util.h"
20 #include "path-util.h"
21 #include "set.h"
22 #include "stdio-util.h"
23 #include "string-util.h"
24 #include "strv.h"
25
26 int umount_recursive(const char *prefix, int flags) {
27 int n = 0, r;
28 bool again;
29
30 /* Try to umount everything recursively below a
31 * directory. Also, take care of stacked mounts, and keep
32 * unmounting them until they are gone. */
33
34 do {
35 _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
36 _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
37
38 again = false;
39
40 r = libmount_parse("/proc/self/mountinfo", NULL, &table, &iter);
41 if (r < 0)
42 return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
43
44 for (;;) {
45 struct libmnt_fs *fs;
46 const char *path;
47
48 r = mnt_table_next_fs(table, iter, &fs);
49 if (r == 1)
50 break;
51 if (r < 0)
52 return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
53
54 path = mnt_fs_get_target(fs);
55 if (!path)
56 continue;
57
58 if (!path_startswith(path, prefix))
59 continue;
60
61 if (umount2(path, flags) < 0) {
62 r = log_debug_errno(errno, "Failed to umount %s: %m", path);
63 continue;
64 }
65
66 log_debug("Successfully unmounted %s", path);
67
68 again = true;
69 n++;
70
71 break;
72 }
73
74 } while (again);
75
76 return n;
77 }
78
79 static int get_mount_flags(
80 struct libmnt_table *table,
81 const char *path,
82 unsigned long *ret) {
83 struct libmnt_fs *fs;
84 struct statvfs buf;
85 const char *opts;
86 int r = 0;
87
88 /* Get the mount flags for the mountpoint at "path" from "table". We have a fallback using statvfs()
89 * in place (which provides us with mostly the same info), but it's just a fallback, since using it
90 * means triggering autofs or NFS mounts, which we'd rather avoid needlessly. */
91
92 fs = mnt_table_find_target(table, path, MNT_ITER_FORWARD);
93 if (!fs) {
94 log_debug("Could not find '%s' in mount table, ignoring.", path);
95 goto fallback;
96 }
97
98 opts = mnt_fs_get_vfs_options(fs);
99 if (!opts) {
100 *ret = 0;
101 return 0;
102 }
103
104 r = mnt_optstr_get_flags(opts, ret, mnt_get_builtin_optmap(MNT_LINUX_MAP));
105 if (r != 0) {
106 log_debug_errno(r, "Could not get flags for '%s', ignoring: %m", path);
107 goto fallback;
108 }
109
110 /* MS_RELATIME is default and trying to set it in an unprivileged container causes EPERM */
111 *ret &= ~MS_RELATIME;
112 return 0;
113
114 fallback:
115 if (statvfs(path, &buf) < 0)
116 return -errno;
117
118 /* The statvfs() flags and the mount flags mostly have the same values, but for some cases do
119 * not. Hence map the flags manually. (Strictly speaking, ST_RELATIME/MS_RELATIME is the most
120 * prominent one that doesn't match, but that's the one we mask away anyway, see above.) */
121
122 *ret =
123 FLAGS_SET(buf.f_flag, ST_RDONLY) * MS_RDONLY |
124 FLAGS_SET(buf.f_flag, ST_NODEV) * MS_NODEV |
125 FLAGS_SET(buf.f_flag, ST_NOEXEC) * MS_NOEXEC |
126 FLAGS_SET(buf.f_flag, ST_NOSUID) * MS_NOSUID |
127 FLAGS_SET(buf.f_flag, ST_NOATIME) * MS_NOATIME |
128 FLAGS_SET(buf.f_flag, ST_NODIRATIME) * MS_NODIRATIME;
129
130 return 0;
131 }
132
133 /* Use this function only if you do not have direct access to /proc/self/mountinfo but the caller can open it
134 * for you. This is the case when /proc is masked or not mounted. Otherwise, use bind_remount_recursive. */
135 int bind_remount_recursive_with_mountinfo(
136 const char *prefix,
137 unsigned long new_flags,
138 unsigned long flags_mask,
139 char **blacklist,
140 FILE *proc_self_mountinfo) {
141
142 _cleanup_set_free_free_ Set *done = NULL;
143 _cleanup_free_ char *simplified = NULL;
144 int r;
145
146 assert(prefix);
147 assert(proc_self_mountinfo);
148
149 /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
150 * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
151 * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
152 * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
153 * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
154 * do not have any effect on future submounts that might get propagated, they might be writable. This includes
155 * future submounts that have been triggered via autofs.
156 *
157 * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
158 * remount operation. Note that we'll ignore the blacklist for the top-level path. */
159
160 simplified = strdup(prefix);
161 if (!simplified)
162 return -ENOMEM;
163
164 path_simplify(simplified, false);
165
166 done = set_new(&path_hash_ops);
167 if (!done)
168 return -ENOMEM;
169
170 for (;;) {
171 _cleanup_set_free_free_ Set *todo = NULL;
172 _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
173 _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
174 bool top_autofs = false;
175 char *x;
176 unsigned long orig_flags;
177
178 todo = set_new(&path_hash_ops);
179 if (!todo)
180 return -ENOMEM;
181
182 rewind(proc_self_mountinfo);
183
184 r = libmount_parse("/proc/self/mountinfo", proc_self_mountinfo, &table, &iter);
185 if (r < 0)
186 return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
187
188 for (;;) {
189 struct libmnt_fs *fs;
190 const char *path, *type;
191
192 r = mnt_table_next_fs(table, iter, &fs);
193 if (r == 1)
194 break;
195 if (r < 0)
196 return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
197
198 path = mnt_fs_get_target(fs);
199 type = mnt_fs_get_fstype(fs);
200 if (!path || !type)
201 continue;
202
203 if (!path_startswith(path, simplified))
204 continue;
205
206 /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount
207 * we shall operate on. */
208 if (!path_equal(path, simplified)) {
209 bool blacklisted = false;
210 char **i;
211
212 STRV_FOREACH(i, blacklist) {
213 if (path_equal(*i, simplified))
214 continue;
215
216 if (!path_startswith(*i, simplified))
217 continue;
218
219 if (path_startswith(path, *i)) {
220 blacklisted = true;
221 log_debug("Not remounting %s blacklisted by %s, called for %s",
222 path, *i, simplified);
223 break;
224 }
225 }
226 if (blacklisted)
227 continue;
228 }
229
230 /* Let's ignore autofs mounts. If they aren't
231 * triggered yet, we want to avoid triggering
232 * them, as we don't make any guarantees for
233 * future submounts anyway. If they are
234 * already triggered, then we will find
235 * another entry for this. */
236 if (streq(type, "autofs")) {
237 top_autofs = top_autofs || path_equal(path, simplified);
238 continue;
239 }
240
241 if (!set_contains(done, path)) {
242 r = set_put_strdup(&todo, path);
243 if (r < 0)
244 return r;
245 }
246 }
247
248 /* If we have no submounts to process anymore and if
249 * the root is either already done, or an autofs, we
250 * are done */
251 if (set_isempty(todo) &&
252 (top_autofs || set_contains(done, simplified)))
253 return 0;
254
255 if (!set_contains(done, simplified) &&
256 !set_contains(todo, simplified)) {
257 /* The prefix directory itself is not yet a mount, make it one. */
258 if (mount(simplified, simplified, NULL, MS_BIND|MS_REC, NULL) < 0)
259 return -errno;
260
261 orig_flags = 0;
262 (void) get_mount_flags(table, simplified, &orig_flags);
263
264 if (mount(NULL, simplified, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL) < 0)
265 return -errno;
266
267 log_debug("Made top-level directory %s a mount point.", prefix);
268
269 r = set_put_strdup(&done, simplified);
270 if (r < 0)
271 return r;
272 }
273
274 while ((x = set_steal_first(todo))) {
275
276 r = set_consume(done, x);
277 if (IN_SET(r, 0, -EEXIST))
278 continue;
279 if (r < 0)
280 return r;
281
282 /* Deal with mount points that are obstructed by a later mount */
283 r = path_is_mount_point(x, NULL, 0);
284 if (IN_SET(r, 0, -ENOENT))
285 continue;
286 if (IN_SET(r, -EACCES, -EPERM)) {
287 /* Even if root user invoke this, submounts under private FUSE or NFS mount points
288 * may not be acceessed. E.g.,
289 *
290 * $ bindfs --no-allow-other ~/mnt/mnt ~/mnt/mnt
291 * $ bindfs --no-allow-other ~/mnt ~/mnt
292 *
293 * Then, root user cannot access the mount point ~/mnt/mnt.
294 * In such cases, the submounts are ignored, as we have no way to manage them. */
295 log_debug_errno(r, "Failed to determine '%s' is mount point or not, ignoring: %m", x);
296 continue;
297 }
298 if (r < 0)
299 return r;
300
301 /* Try to reuse the original flag set */
302 orig_flags = 0;
303 (void) get_mount_flags(table, x, &orig_flags);
304
305 if (mount(NULL, x, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL) < 0)
306 return -errno;
307
308 log_debug("Remounted %s read-only.", x);
309 }
310 }
311 }
312
313 int bind_remount_recursive(
314 const char *prefix,
315 unsigned long new_flags,
316 unsigned long flags_mask,
317 char **blacklist) {
318
319 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
320 int r;
321
322 r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
323 if (r < 0)
324 return r;
325
326 return bind_remount_recursive_with_mountinfo(prefix, new_flags, flags_mask, blacklist, proc_self_mountinfo);
327 }
328
329 int bind_remount_one_with_mountinfo(
330 const char *path,
331 unsigned long new_flags,
332 unsigned long flags_mask,
333 FILE *proc_self_mountinfo) {
334
335 _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
336 unsigned long orig_flags = 0;
337 int r;
338
339 assert(path);
340 assert(proc_self_mountinfo);
341
342 rewind(proc_self_mountinfo);
343
344 table = mnt_new_table();
345 if (!table)
346 return -ENOMEM;
347
348 r = mnt_table_parse_stream(table, proc_self_mountinfo, "/proc/self/mountinfo");
349 if (r < 0)
350 return r;
351
352 /* Try to reuse the original flag set */
353 (void) get_mount_flags(table, path, &orig_flags);
354
355 if (mount(NULL, path, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL) < 0)
356 return -errno;
357
358 return 0;
359 }
360
361 int mount_move_root(const char *path) {
362 assert(path);
363
364 if (chdir(path) < 0)
365 return -errno;
366
367 if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
368 return -errno;
369
370 if (chroot(".") < 0)
371 return -errno;
372
373 if (chdir("/") < 0)
374 return -errno;
375
376 return 0;
377 }
378
379 int repeat_unmount(const char *path, int flags) {
380 bool done = false;
381
382 assert(path);
383
384 /* If there are multiple mounts on a mount point, this
385 * removes them all */
386
387 for (;;) {
388 if (umount2(path, flags) < 0) {
389
390 if (errno == EINVAL)
391 return done;
392
393 return -errno;
394 }
395
396 done = true;
397 }
398 }
399
400 int mode_to_inaccessible_node(const char *runtime_dir, mode_t mode, char **dest) {
401 /* This function maps a node type to a corresponding inaccessible file node. These nodes are created during
402 * early boot by PID 1. In some cases we lacked the privs to create the character and block devices (maybe
403 * because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a devices policy that excludes
404 * device nodes with major and minor of 0), but that's fine, in that case we use an AF_UNIX file node instead,
405 * which is not the same, but close enough for most uses. And most importantly, the kernel allows bind mounts
406 * from socket nodes to any non-directory file nodes, and that's the most important thing that matters. */
407 _cleanup_free_ char *d = NULL;
408 const char *node = NULL;
409 char *tmp;
410
411 assert(dest);
412
413 switch(mode & S_IFMT) {
414 case S_IFREG:
415 node = "/inaccessible/reg";
416 break;
417
418 case S_IFDIR:
419 node = "/inaccessible/dir";
420 break;
421
422 case S_IFCHR:
423 d = path_join(runtime_dir, "/inaccessible/chr");
424 if (!d)
425 return log_oom();
426
427 if (access(d, F_OK) == 0) {
428 *dest = TAKE_PTR(d);
429 return 0;
430 }
431
432 node = "/inaccessible/sock";
433 break;
434
435 case S_IFBLK:
436 d = path_join(runtime_dir, "/inaccessible/blk");
437 if (!d)
438 return log_oom();
439
440 if (access(d, F_OK) == 0) {
441 *dest = TAKE_PTR(d);
442 return 0;
443 }
444
445 node = "/inaccessible/sock";
446 break;
447
448 case S_IFIFO:
449 node = "/inaccessible/fifo";
450 break;
451
452 case S_IFSOCK:
453 node = "/inaccessible/sock";
454 break;
455 }
456
457 if (!node)
458 return -EINVAL;
459
460 tmp = path_join(runtime_dir, node);
461 if (!tmp)
462 return log_oom();
463
464 *dest = tmp;
465 return 0;
466 }
467
468 #define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
469 static char* mount_flags_to_string(long unsigned flags) {
470 char *x;
471 _cleanup_free_ char *y = NULL;
472 long unsigned overflow;
473
474 overflow = flags & ~(MS_RDONLY |
475 MS_NOSUID |
476 MS_NODEV |
477 MS_NOEXEC |
478 MS_SYNCHRONOUS |
479 MS_REMOUNT |
480 MS_MANDLOCK |
481 MS_DIRSYNC |
482 MS_NOATIME |
483 MS_NODIRATIME |
484 MS_BIND |
485 MS_MOVE |
486 MS_REC |
487 MS_SILENT |
488 MS_POSIXACL |
489 MS_UNBINDABLE |
490 MS_PRIVATE |
491 MS_SLAVE |
492 MS_SHARED |
493 MS_RELATIME |
494 MS_KERNMOUNT |
495 MS_I_VERSION |
496 MS_STRICTATIME |
497 MS_LAZYTIME);
498
499 if (flags == 0 || overflow != 0)
500 if (asprintf(&y, "%lx", overflow) < 0)
501 return NULL;
502
503 x = strjoin(FLAG(MS_RDONLY),
504 FLAG(MS_NOSUID),
505 FLAG(MS_NODEV),
506 FLAG(MS_NOEXEC),
507 FLAG(MS_SYNCHRONOUS),
508 FLAG(MS_REMOUNT),
509 FLAG(MS_MANDLOCK),
510 FLAG(MS_DIRSYNC),
511 FLAG(MS_NOATIME),
512 FLAG(MS_NODIRATIME),
513 FLAG(MS_BIND),
514 FLAG(MS_MOVE),
515 FLAG(MS_REC),
516 FLAG(MS_SILENT),
517 FLAG(MS_POSIXACL),
518 FLAG(MS_UNBINDABLE),
519 FLAG(MS_PRIVATE),
520 FLAG(MS_SLAVE),
521 FLAG(MS_SHARED),
522 FLAG(MS_RELATIME),
523 FLAG(MS_KERNMOUNT),
524 FLAG(MS_I_VERSION),
525 FLAG(MS_STRICTATIME),
526 FLAG(MS_LAZYTIME),
527 y);
528 if (!x)
529 return NULL;
530 if (!y)
531 x[strlen(x) - 1] = '\0'; /* truncate the last | */
532 return x;
533 }
534
535 int mount_verbose(
536 int error_log_level,
537 const char *what,
538 const char *where,
539 const char *type,
540 unsigned long flags,
541 const char *options) {
542
543 _cleanup_free_ char *fl = NULL, *o = NULL;
544 unsigned long f;
545 int r;
546
547 r = mount_option_mangle(options, flags, &f, &o);
548 if (r < 0)
549 return log_full_errno(error_log_level, r,
550 "Failed to mangle mount options %s: %m",
551 strempty(options));
552
553 fl = mount_flags_to_string(f);
554
555 if ((f & MS_REMOUNT) && !what && !type)
556 log_debug("Remounting %s (%s \"%s\")...",
557 where, strnull(fl), strempty(o));
558 else if (!what && !type)
559 log_debug("Mounting %s (%s \"%s\")...",
560 where, strnull(fl), strempty(o));
561 else if ((f & MS_BIND) && !type)
562 log_debug("Bind-mounting %s on %s (%s \"%s\")...",
563 what, where, strnull(fl), strempty(o));
564 else if (f & MS_MOVE)
565 log_debug("Moving mount %s → %s (%s \"%s\")...",
566 what, where, strnull(fl), strempty(o));
567 else
568 log_debug("Mounting %s on %s (%s \"%s\")...",
569 strna(type), where, strnull(fl), strempty(o));
570 if (mount(what, where, type, f, o) < 0)
571 return log_full_errno(error_log_level, errno,
572 "Failed to mount %s (type %s) on %s (%s \"%s\"): %m",
573 strna(what), strna(type), where, strnull(fl), strempty(o));
574 return 0;
575 }
576
577 int umount_verbose(const char *what) {
578 log_debug("Umounting %s...", what);
579 if (umount(what) < 0)
580 return log_error_errno(errno, "Failed to unmount %s: %m", what);
581 return 0;
582 }
583
584 int mount_option_mangle(
585 const char *options,
586 unsigned long mount_flags,
587 unsigned long *ret_mount_flags,
588 char **ret_remaining_options) {
589
590 const struct libmnt_optmap *map;
591 _cleanup_free_ char *ret = NULL;
592 const char *p;
593 int r;
594
595 /* This extracts mount flags from the mount options, and store
596 * non-mount-flag options to '*ret_remaining_options'.
597 * E.g.,
598 * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000"
599 * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
600 * "size=1630748k,mode=700,uid=1000,gid=1000".
601 * See more examples in test-mount-utils.c.
602 *
603 * Note that if 'options' does not contain any non-mount-flag options,
604 * then '*ret_remaining_options' is set to NULL instead of empty string.
605 * Note that this does not check validity of options stored in
606 * '*ret_remaining_options'.
607 * Note that if 'options' is NULL, then this just copies 'mount_flags'
608 * to '*ret_mount_flags'. */
609
610 assert(ret_mount_flags);
611 assert(ret_remaining_options);
612
613 map = mnt_get_builtin_optmap(MNT_LINUX_MAP);
614 if (!map)
615 return -EINVAL;
616
617 p = options;
618 for (;;) {
619 _cleanup_free_ char *word = NULL;
620 const struct libmnt_optmap *ent;
621
622 r = extract_first_word(&p, &word, ",", EXTRACT_UNQUOTE);
623 if (r < 0)
624 return r;
625 if (r == 0)
626 break;
627
628 for (ent = map; ent->name; ent++) {
629 /* All entries in MNT_LINUX_MAP do not take any argument.
630 * Thus, ent->name does not contain "=" or "[=]". */
631 if (!streq(word, ent->name))
632 continue;
633
634 if (!(ent->mask & MNT_INVERT))
635 mount_flags |= ent->id;
636 else if (mount_flags & ent->id)
637 mount_flags ^= ent->id;
638
639 break;
640 }
641
642 /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
643 if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL))
644 return -ENOMEM;
645 }
646
647 *ret_mount_flags = mount_flags;
648 *ret_remaining_options = TAKE_PTR(ret);
649
650 return 0;
651 }