1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
24 #include <sys/mount.h>
26 #include <sys/statvfs.h>
29 #include "alloc-util.h"
35 #include "mount-util.h"
36 #include "parse-util.h"
37 #include "path-util.h"
39 #include "stdio-util.h"
40 #include "string-util.h"
43 static int fd_fdinfo_mnt_id(int fd
, const char *filename
, int flags
, int *mnt_id
) {
44 char path
[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
45 _cleanup_free_
char *fdinfo
= NULL
;
46 _cleanup_close_
int subfd
= -1;
50 if ((flags
& AT_EMPTY_PATH
) && isempty(filename
))
51 xsprintf(path
, "/proc/self/fdinfo/%i", fd
);
53 subfd
= openat(fd
, filename
, O_CLOEXEC
|O_PATH
);
57 xsprintf(path
, "/proc/self/fdinfo/%i", subfd
);
60 r
= read_full_file(path
, &fdinfo
, NULL
);
61 if (r
== -ENOENT
) /* The fdinfo directory is a relatively new addition */
66 p
= startswith(fdinfo
, "mnt_id:");
68 p
= strstr(fdinfo
, "\nmnt_id:");
69 if (!p
) /* The mnt_id field is a relatively new addition */
75 p
+= strspn(p
, WHITESPACE
);
76 p
[strcspn(p
, WHITESPACE
)] = 0;
78 return safe_atoi(p
, mnt_id
);
81 int fd_is_mount_point(int fd
, const char *filename
, int flags
) {
82 union file_handle_union h
= FILE_HANDLE_INIT
, h_parent
= FILE_HANDLE_INIT
;
83 int mount_id
= -1, mount_id_parent
= -1;
84 bool nosupp
= false, check_st_dev
= true;
91 /* First we will try the name_to_handle_at() syscall, which
92 * tells us the mount id and an opaque file "handle". It is
93 * not supported everywhere though (kernel compile-time
94 * option, not all file systems are hooked up). If it works
95 * the mount id is usually good enough to tell us whether
96 * something is a mount point.
98 * If that didn't work we will try to read the mount id from
99 * /proc/self/fdinfo/<fd>. This is almost as good as
100 * name_to_handle_at(), however, does not return the
101 * opaque file handle. The opaque file handle is pretty useful
102 * to detect the root directory, which we should always
103 * consider a mount point. Hence we use this only as
104 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
107 * As last fallback we do traditional fstat() based st_dev
108 * comparisons. This is how things were traditionally done,
109 * but unionfs breaks this since it exposes file
110 * systems with a variety of st_dev reported. Also, btrfs
111 * subvolumes have different st_dev, even though they aren't
112 * real mounts of their own. */
114 r
= name_to_handle_at(fd
, filename
, &h
.handle
, &mount_id
, flags
);
116 if (IN_SET(errno
, ENOSYS
, EACCES
, EPERM
))
117 /* This kernel does not support name_to_handle_at() at all, or the syscall was blocked (maybe
118 * through seccomp, because we are running inside of a container?): fall back to simpler
120 goto fallback_fdinfo
;
121 else if (errno
== EOPNOTSUPP
)
122 /* This kernel or file system does not support
123 * name_to_handle_at(), hence let's see if the
124 * upper fs supports it (in which case it is a
125 * mount point), otherwise fallback to the
126 * traditional stat() logic */
132 r
= name_to_handle_at(fd
, "", &h_parent
.handle
, &mount_id_parent
, AT_EMPTY_PATH
);
134 if (errno
== EOPNOTSUPP
) {
136 /* Neither parent nor child do name_to_handle_at()?
137 We have no choice but to fall back. */
138 goto fallback_fdinfo
;
140 /* The parent can't do name_to_handle_at() but the
141 * directory we are interested in can?
142 * If so, it must be a mount point. */
148 /* The parent can do name_to_handle_at() but the
149 * directory we are interested in can't? If so, it
150 * must be a mount point. */
154 /* If the file handle for the directory we are
155 * interested in and its parent are identical, we
156 * assume this is the root directory, which is a mount
159 if (h
.handle
.handle_bytes
== h_parent
.handle
.handle_bytes
&&
160 h
.handle
.handle_type
== h_parent
.handle
.handle_type
&&
161 memcmp(h
.handle
.f_handle
, h_parent
.handle
.f_handle
, h
.handle
.handle_bytes
) == 0)
164 return mount_id
!= mount_id_parent
;
167 r
= fd_fdinfo_mnt_id(fd
, filename
, flags
, &mount_id
);
168 if (IN_SET(r
, -EOPNOTSUPP
, -EACCES
, -EPERM
))
173 r
= fd_fdinfo_mnt_id(fd
, "", AT_EMPTY_PATH
, &mount_id_parent
);
177 if (mount_id
!= mount_id_parent
)
180 /* Hmm, so, the mount ids are the same. This leaves one
181 * special case though for the root file system. For that,
182 * let's see if the parent directory has the same inode as we
183 * are interested in. Hence, let's also do fstat() checks now,
184 * too, but avoid the st_dev comparisons, since they aren't
185 * that useful on unionfs mounts. */
186 check_st_dev
= false;
189 /* yay for fstatat() taking a different set of flags than the other
191 if (flags
& AT_SYMLINK_FOLLOW
)
192 flags
&= ~AT_SYMLINK_FOLLOW
;
194 flags
|= AT_SYMLINK_NOFOLLOW
;
195 if (fstatat(fd
, filename
, &a
, flags
) < 0)
198 if (fstatat(fd
, "", &b
, AT_EMPTY_PATH
) < 0)
201 /* A directory with same device and inode as its parent? Must
202 * be the root directory */
203 if (a
.st_dev
== b
.st_dev
&&
204 a
.st_ino
== b
.st_ino
)
207 return check_st_dev
&& (a
.st_dev
!= b
.st_dev
);
210 /* flags can be AT_SYMLINK_FOLLOW or 0 */
211 int path_is_mount_point(const char *t
, const char *root
, int flags
) {
212 _cleanup_free_
char *canonical
= NULL
, *parent
= NULL
;
213 _cleanup_close_
int fd
= -1;
218 if (path_equal(t
, "/"))
221 /* we need to resolve symlinks manually, we can't just rely on
222 * fd_is_mount_point() to do that for us; if we have a structure like
223 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
224 * look at needs to be /usr, not /. */
225 if (flags
& AT_SYMLINK_FOLLOW
) {
226 r
= chase_symlinks(t
, root
, 0, &canonical
);
233 parent
= dirname_malloc(t
);
237 fd
= openat(AT_FDCWD
, parent
, O_DIRECTORY
|O_CLOEXEC
|O_PATH
);
241 return fd_is_mount_point(fd
, basename(t
), flags
);
244 int umount_recursive(const char *prefix
, int flags
) {
248 /* Try to umount everything recursively below a
249 * directory. Also, take care of stacked mounts, and keep
250 * unmounting them until they are gone. */
253 _cleanup_fclose_
FILE *proc_self_mountinfo
= NULL
;
258 proc_self_mountinfo
= fopen("/proc/self/mountinfo", "re");
259 if (!proc_self_mountinfo
)
263 _cleanup_free_
char *path
= NULL
, *p
= NULL
;
266 k
= fscanf(proc_self_mountinfo
,
267 "%*s " /* (1) mount id */
268 "%*s " /* (2) parent id */
269 "%*s " /* (3) major:minor */
270 "%*s " /* (4) root */
271 "%ms " /* (5) mount point */
272 "%*s" /* (6) mount options */
273 "%*[^-]" /* (7) optional fields */
274 "- " /* (8) separator */
275 "%*s " /* (9) file system type */
276 "%*s" /* (10) mount source */
277 "%*s" /* (11) mount options 2 */
278 "%*[^\n]", /* some rubbish at the end */
287 r
= cunescape(path
, UNESCAPE_RELAX
, &p
);
291 if (!path_startswith(p
, prefix
))
294 if (umount2(p
, flags
) < 0) {
295 r
= log_debug_errno(errno
, "Failed to umount %s: %m", p
);
299 log_debug("Successfully unmounted %s", p
);
312 static int get_mount_flags(const char *path
, unsigned long *flags
) {
315 if (statvfs(path
, &buf
) < 0)
321 /* Use this function only if do you have direct access to /proc/self/mountinfo
322 * and need the caller to open it for you. This is the case when /proc is
323 * masked or not mounted. Otherwise, use bind_remount_recursive. */
324 int bind_remount_recursive_with_mountinfo(const char *prefix
, bool ro
, char **blacklist
, FILE *proc_self_mountinfo
) {
325 _cleanup_set_free_free_ Set
*done
= NULL
;
326 _cleanup_free_
char *cleaned
= NULL
;
329 assert(proc_self_mountinfo
);
331 /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
332 * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
333 * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
334 * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
335 * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
336 * do not have any effect on future submounts that might get propagated, they migt be writable. This includes
337 * future submounts that have been triggered via autofs.
339 * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
340 * remount operation. Note that we'll ignore the blacklist for the top-level path. */
342 cleaned
= strdup(prefix
);
346 path_kill_slashes(cleaned
);
348 done
= set_new(&string_hash_ops
);
353 _cleanup_set_free_free_ Set
*todo
= NULL
;
354 bool top_autofs
= false;
356 unsigned long orig_flags
;
358 todo
= set_new(&string_hash_ops
);
362 rewind(proc_self_mountinfo
);
365 _cleanup_free_
char *path
= NULL
, *p
= NULL
, *type
= NULL
;
368 k
= fscanf(proc_self_mountinfo
,
369 "%*s " /* (1) mount id */
370 "%*s " /* (2) parent id */
371 "%*s " /* (3) major:minor */
372 "%*s " /* (4) root */
373 "%ms " /* (5) mount point */
374 "%*s" /* (6) mount options (superblock) */
375 "%*[^-]" /* (7) optional fields */
376 "- " /* (8) separator */
377 "%ms " /* (9) file system type */
378 "%*s" /* (10) mount source */
379 "%*s" /* (11) mount options (bind mount) */
380 "%*[^\n]", /* some rubbish at the end */
390 r
= cunescape(path
, UNESCAPE_RELAX
, &p
);
394 if (!path_startswith(p
, cleaned
))
397 /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount we shall
399 if (!path_equal(cleaned
, p
)) {
400 bool blacklisted
= false;
403 STRV_FOREACH(i
, blacklist
) {
405 if (path_equal(*i
, cleaned
))
408 if (!path_startswith(*i
, cleaned
))
411 if (path_startswith(p
, *i
)) {
413 log_debug("Not remounting %s, because blacklisted by %s, called for %s", p
, *i
, cleaned
);
421 /* Let's ignore autofs mounts. If they aren't
422 * triggered yet, we want to avoid triggering
423 * them, as we don't make any guarantees for
424 * future submounts anyway. If they are
425 * already triggered, then we will find
426 * another entry for this. */
427 if (streq(type
, "autofs")) {
428 top_autofs
= top_autofs
|| path_equal(cleaned
, p
);
432 if (!set_contains(done
, p
)) {
433 r
= set_consume(todo
, p
);
442 /* If we have no submounts to process anymore and if
443 * the root is either already done, or an autofs, we
445 if (set_isempty(todo
) &&
446 (top_autofs
|| set_contains(done
, cleaned
)))
449 if (!set_contains(done
, cleaned
) &&
450 !set_contains(todo
, cleaned
)) {
451 /* The prefix directory itself is not yet a mount, make it one. */
452 if (mount(cleaned
, cleaned
, NULL
, MS_BIND
|MS_REC
, NULL
) < 0)
456 (void) get_mount_flags(cleaned
, &orig_flags
);
457 orig_flags
&= ~MS_RDONLY
;
459 if (mount(NULL
, prefix
, NULL
, orig_flags
|MS_BIND
|MS_REMOUNT
|(ro
? MS_RDONLY
: 0), NULL
) < 0)
462 log_debug("Made top-level directory %s a mount point.", prefix
);
468 r
= set_consume(done
, x
);
473 while ((x
= set_steal_first(todo
))) {
475 r
= set_consume(done
, x
);
476 if (IN_SET(r
, 0, -EEXIST
))
481 /* Deal with mount points that are obstructed by a later mount */
482 r
= path_is_mount_point(x
, NULL
, 0);
483 if (IN_SET(r
, 0, -ENOENT
))
488 /* Try to reuse the original flag set */
490 (void) get_mount_flags(x
, &orig_flags
);
491 orig_flags
&= ~MS_RDONLY
;
493 if (mount(NULL
, x
, NULL
, orig_flags
|MS_BIND
|MS_REMOUNT
|(ro
? MS_RDONLY
: 0), NULL
) < 0)
496 log_debug("Remounted %s read-only.", x
);
501 int bind_remount_recursive(const char *prefix
, bool ro
, char **blacklist
) {
502 _cleanup_fclose_
FILE *proc_self_mountinfo
= NULL
;
504 proc_self_mountinfo
= fopen("/proc/self/mountinfo", "re");
505 if (!proc_self_mountinfo
)
508 return bind_remount_recursive_with_mountinfo(prefix
, ro
, blacklist
, proc_self_mountinfo
);
511 int mount_move_root(const char *path
) {
517 if (mount(path
, "/", NULL
, MS_MOVE
, NULL
) < 0)
529 bool fstype_is_network(const char *fstype
) {
532 x
= startswith(fstype
, "fuse.");
536 return STR_IN_SET(fstype
,
548 "pvfs2", /* OrangeFS */
553 bool fstype_is_api_vfs(const char *fstype
) {
554 return STR_IN_SET(fstype
,
577 bool fstype_is_ro(const char *fstype
) {
578 /* All Linux file systems that are necessarily read-only */
579 return STR_IN_SET(fstype
,
585 bool fstype_can_discard(const char *fstype
) {
586 return STR_IN_SET(fstype
,
593 int repeat_unmount(const char *path
, int flags
) {
598 /* If there are multiple mounts on a mount point, this
599 * removes them all */
602 if (umount2(path
, flags
) < 0) {
614 const char* mode_to_inaccessible_node(mode_t mode
) {
615 /* This function maps a node type to the correspondent inaccessible node type.
616 * Character and block inaccessible devices may not be created (because major=0 and minor=0),
617 * in such case we map character and block devices to the inaccessible node type socket. */
618 switch(mode
& S_IFMT
) {
620 return "/run/systemd/inaccessible/reg";
622 return "/run/systemd/inaccessible/dir";
624 if (access("/run/systemd/inaccessible/chr", F_OK
) == 0)
625 return "/run/systemd/inaccessible/chr";
626 return "/run/systemd/inaccessible/sock";
628 if (access("/run/systemd/inaccessible/blk", F_OK
) == 0)
629 return "/run/systemd/inaccessible/blk";
630 return "/run/systemd/inaccessible/sock";
632 return "/run/systemd/inaccessible/fifo";
634 return "/run/systemd/inaccessible/sock";
639 #define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
640 static char* mount_flags_to_string(long unsigned flags
) {
642 _cleanup_free_
char *y
= NULL
;
643 long unsigned overflow
;
645 overflow
= flags
& ~(MS_RDONLY
|
670 if (flags
== 0 || overflow
!= 0)
671 if (asprintf(&y
, "%lx", overflow
) < 0)
674 x
= strjoin(FLAG(MS_RDONLY
),
678 FLAG(MS_SYNCHRONOUS
),
696 FLAG(MS_STRICTATIME
),
702 x
[strlen(x
) - 1] = '\0'; /* truncate the last | */
712 const char *options
) {
714 _cleanup_free_
char *fl
= NULL
;
716 fl
= mount_flags_to_string(flags
);
718 if ((flags
& MS_REMOUNT
) && !what
&& !type
)
719 log_debug("Remounting %s (%s \"%s\")...",
720 where
, strnull(fl
), strempty(options
));
721 else if (!what
&& !type
)
722 log_debug("Mounting %s (%s \"%s\")...",
723 where
, strnull(fl
), strempty(options
));
724 else if ((flags
& MS_BIND
) && !type
)
725 log_debug("Bind-mounting %s on %s (%s \"%s\")...",
726 what
, where
, strnull(fl
), strempty(options
));
727 else if (flags
& MS_MOVE
)
728 log_debug("Moving mount %s → %s (%s \"%s\")...",
729 what
, where
, strnull(fl
), strempty(options
));
731 log_debug("Mounting %s on %s (%s \"%s\")...",
732 strna(type
), where
, strnull(fl
), strempty(options
));
733 if (mount(what
, where
, type
, flags
, options
) < 0)
734 return log_full_errno(error_log_level
, errno
,
735 "Failed to mount %s on %s (%s \"%s\"): %m",
736 strna(type
), where
, strnull(fl
), strempty(options
));
740 int umount_verbose(const char *what
) {
741 log_debug("Umounting %s...", what
);
742 if (umount(what
) < 0)
743 return log_error_errno(errno
, "Failed to unmount %s: %m", what
);
747 const char *mount_propagation_flags_to_string(unsigned long flags
) {
749 switch (flags
& (MS_SHARED
|MS_SLAVE
|MS_PRIVATE
)) {
764 int mount_propagation_flags_from_string(const char *name
, unsigned long *ret
) {
768 else if (streq(name
, "shared"))
770 else if (streq(name
, "slave"))
772 else if (streq(name
, "private"))