2 This file is part of systemd.
4 Copyright 2010 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <sys/mount.h>
25 #include <sys/statvfs.h>
28 #include "alloc-util.h"
34 #include "mount-util.h"
35 #include "parse-util.h"
36 #include "path-util.h"
38 #include "stdio-util.h"
39 #include "string-util.h"
42 static int fd_fdinfo_mnt_id(int fd
, const char *filename
, int flags
, int *mnt_id
) {
43 char path
[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
44 _cleanup_free_
char *fdinfo
= NULL
;
45 _cleanup_close_
int subfd
= -1;
49 if ((flags
& AT_EMPTY_PATH
) && isempty(filename
))
50 xsprintf(path
, "/proc/self/fdinfo/%i", fd
);
52 subfd
= openat(fd
, filename
, O_CLOEXEC
|O_PATH
);
56 xsprintf(path
, "/proc/self/fdinfo/%i", subfd
);
59 r
= read_full_file(path
, &fdinfo
, NULL
);
60 if (r
== -ENOENT
) /* The fdinfo directory is a relatively new addition */
65 p
= startswith(fdinfo
, "mnt_id:");
67 p
= strstr(fdinfo
, "\nmnt_id:");
68 if (!p
) /* The mnt_id field is a relatively new addition */
74 p
+= strspn(p
, WHITESPACE
);
75 p
[strcspn(p
, WHITESPACE
)] = 0;
77 return safe_atoi(p
, mnt_id
);
80 int fd_is_mount_point(int fd
, const char *filename
, int flags
) {
81 union file_handle_union h
= FILE_HANDLE_INIT
, h_parent
= FILE_HANDLE_INIT
;
82 int mount_id
= -1, mount_id_parent
= -1;
83 bool nosupp
= false, check_st_dev
= true;
90 /* First we will try the name_to_handle_at() syscall, which
91 * tells us the mount id and an opaque file "handle". It is
92 * not supported everywhere though (kernel compile-time
93 * option, not all file systems are hooked up). If it works
94 * the mount id is usually good enough to tell us whether
95 * something is a mount point.
97 * If that didn't work we will try to read the mount id from
98 * /proc/self/fdinfo/<fd>. This is almost as good as
99 * name_to_handle_at(), however, does not return the
100 * opaque file handle. The opaque file handle is pretty useful
101 * to detect the root directory, which we should always
102 * consider a mount point. Hence we use this only as
103 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
106 * As last fallback we do traditional fstat() based st_dev
107 * comparisons. This is how things were traditionally done,
108 * but unionfs breaks this since it exposes file
109 * systems with a variety of st_dev reported. Also, btrfs
110 * subvolumes have different st_dev, even though they aren't
111 * real mounts of their own. */
113 r
= name_to_handle_at(fd
, filename
, &h
.handle
, &mount_id
, flags
);
115 if (IN_SET(errno
, ENOSYS
, EACCES
, EPERM
))
116 /* This kernel does not support name_to_handle_at() at all, or the syscall was blocked (maybe
117 * through seccomp, because we are running inside of a container?): fall back to simpler
119 goto fallback_fdinfo
;
120 else if (errno
== EOPNOTSUPP
)
121 /* This kernel or file system does not support
122 * name_to_handle_at(), hence let's see if the
123 * upper fs supports it (in which case it is a
124 * mount point), otherwise fallback to the
125 * traditional stat() logic */
131 r
= name_to_handle_at(fd
, "", &h_parent
.handle
, &mount_id_parent
, AT_EMPTY_PATH
);
133 if (errno
== EOPNOTSUPP
) {
135 /* Neither parent nor child do name_to_handle_at()?
136 We have no choice but to fall back. */
137 goto fallback_fdinfo
;
139 /* The parent can't do name_to_handle_at() but the
140 * directory we are interested in can?
141 * If so, it must be a mount point. */
147 /* The parent can do name_to_handle_at() but the
148 * directory we are interested in can't? If so, it
149 * must be a mount point. */
153 /* If the file handle for the directory we are
154 * interested in and its parent are identical, we
155 * assume this is the root directory, which is a mount
158 if (h
.handle
.handle_bytes
== h_parent
.handle
.handle_bytes
&&
159 h
.handle
.handle_type
== h_parent
.handle
.handle_type
&&
160 memcmp(h
.handle
.f_handle
, h_parent
.handle
.f_handle
, h
.handle
.handle_bytes
) == 0)
163 return mount_id
!= mount_id_parent
;
166 r
= fd_fdinfo_mnt_id(fd
, filename
, flags
, &mount_id
);
167 if (IN_SET(r
, -EOPNOTSUPP
, -EACCES
, -EPERM
))
172 r
= fd_fdinfo_mnt_id(fd
, "", AT_EMPTY_PATH
, &mount_id_parent
);
176 if (mount_id
!= mount_id_parent
)
179 /* Hmm, so, the mount ids are the same. This leaves one
180 * special case though for the root file system. For that,
181 * let's see if the parent directory has the same inode as we
182 * are interested in. Hence, let's also do fstat() checks now,
183 * too, but avoid the st_dev comparisons, since they aren't
184 * that useful on unionfs mounts. */
185 check_st_dev
= false;
188 /* yay for fstatat() taking a different set of flags than the other
190 if (flags
& AT_SYMLINK_FOLLOW
)
191 flags
&= ~AT_SYMLINK_FOLLOW
;
193 flags
|= AT_SYMLINK_NOFOLLOW
;
194 if (fstatat(fd
, filename
, &a
, flags
) < 0)
197 if (fstatat(fd
, "", &b
, AT_EMPTY_PATH
) < 0)
200 /* A directory with same device and inode as its parent? Must
201 * be the root directory */
202 if (a
.st_dev
== b
.st_dev
&&
203 a
.st_ino
== b
.st_ino
)
206 return check_st_dev
&& (a
.st_dev
!= b
.st_dev
);
209 /* flags can be AT_SYMLINK_FOLLOW or 0 */
210 int path_is_mount_point(const char *t
, const char *root
, int flags
) {
211 _cleanup_free_
char *canonical
= NULL
, *parent
= NULL
;
212 _cleanup_close_
int fd
= -1;
217 if (path_equal(t
, "/"))
220 /* we need to resolve symlinks manually, we can't just rely on
221 * fd_is_mount_point() to do that for us; if we have a structure like
222 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
223 * look at needs to be /usr, not /. */
224 if (flags
& AT_SYMLINK_FOLLOW
) {
225 r
= chase_symlinks(t
, root
, 0, &canonical
);
232 parent
= dirname_malloc(t
);
236 fd
= openat(AT_FDCWD
, parent
, O_DIRECTORY
|O_CLOEXEC
|O_PATH
);
240 return fd_is_mount_point(fd
, basename(t
), flags
);
243 int umount_recursive(const char *prefix
, int flags
) {
247 /* Try to umount everything recursively below a
248 * directory. Also, take care of stacked mounts, and keep
249 * unmounting them until they are gone. */
252 _cleanup_fclose_
FILE *proc_self_mountinfo
= NULL
;
257 proc_self_mountinfo
= fopen("/proc/self/mountinfo", "re");
258 if (!proc_self_mountinfo
)
262 _cleanup_free_
char *path
= NULL
, *p
= NULL
;
265 k
= fscanf(proc_self_mountinfo
,
266 "%*s " /* (1) mount id */
267 "%*s " /* (2) parent id */
268 "%*s " /* (3) major:minor */
269 "%*s " /* (4) root */
270 "%ms " /* (5) mount point */
271 "%*s" /* (6) mount options */
272 "%*[^-]" /* (7) optional fields */
273 "- " /* (8) separator */
274 "%*s " /* (9) file system type */
275 "%*s" /* (10) mount source */
276 "%*s" /* (11) mount options 2 */
277 "%*[^\n]", /* some rubbish at the end */
286 r
= cunescape(path
, UNESCAPE_RELAX
, &p
);
290 if (!path_startswith(p
, prefix
))
293 if (umount2(p
, flags
) < 0) {
294 r
= log_debug_errno(errno
, "Failed to umount %s: %m", p
);
298 log_debug("Successfully unmounted %s", p
);
311 static int get_mount_flags(const char *path
, unsigned long *flags
) {
314 if (statvfs(path
, &buf
) < 0)
320 /* Use this function only if do you have direct access to /proc/self/mountinfo
321 * and need the caller to open it for you. This is the case when /proc is
322 * masked or not mounted. Otherwise, use bind_remount_recursive. */
323 int bind_remount_recursive_with_mountinfo(const char *prefix
, bool ro
, char **blacklist
, FILE *proc_self_mountinfo
) {
324 _cleanup_set_free_free_ Set
*done
= NULL
;
325 _cleanup_free_
char *cleaned
= NULL
;
328 assert(proc_self_mountinfo
);
330 /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
331 * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
332 * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
333 * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
334 * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
335 * do not have any effect on future submounts that might get propagated, they migt be writable. This includes
336 * future submounts that have been triggered via autofs.
338 * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
339 * remount operation. Note that we'll ignore the blacklist for the top-level path. */
341 cleaned
= strdup(prefix
);
345 path_kill_slashes(cleaned
);
347 done
= set_new(&string_hash_ops
);
352 _cleanup_set_free_free_ Set
*todo
= NULL
;
353 bool top_autofs
= false;
355 unsigned long orig_flags
;
357 todo
= set_new(&string_hash_ops
);
361 rewind(proc_self_mountinfo
);
364 _cleanup_free_
char *path
= NULL
, *p
= NULL
, *type
= NULL
;
367 k
= fscanf(proc_self_mountinfo
,
368 "%*s " /* (1) mount id */
369 "%*s " /* (2) parent id */
370 "%*s " /* (3) major:minor */
371 "%*s " /* (4) root */
372 "%ms " /* (5) mount point */
373 "%*s" /* (6) mount options (superblock) */
374 "%*[^-]" /* (7) optional fields */
375 "- " /* (8) separator */
376 "%ms " /* (9) file system type */
377 "%*s" /* (10) mount source */
378 "%*s" /* (11) mount options (bind mount) */
379 "%*[^\n]", /* some rubbish at the end */
389 r
= cunescape(path
, UNESCAPE_RELAX
, &p
);
393 if (!path_startswith(p
, cleaned
))
396 /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount we shall
398 if (!path_equal(cleaned
, p
)) {
399 bool blacklisted
= false;
402 STRV_FOREACH(i
, blacklist
) {
404 if (path_equal(*i
, cleaned
))
407 if (!path_startswith(*i
, cleaned
))
410 if (path_startswith(p
, *i
)) {
412 log_debug("Not remounting %s, because blacklisted by %s, called for %s", p
, *i
, cleaned
);
420 /* Let's ignore autofs mounts. If they aren't
421 * triggered yet, we want to avoid triggering
422 * them, as we don't make any guarantees for
423 * future submounts anyway. If they are
424 * already triggered, then we will find
425 * another entry for this. */
426 if (streq(type
, "autofs")) {
427 top_autofs
= top_autofs
|| path_equal(cleaned
, p
);
431 if (!set_contains(done
, p
)) {
432 r
= set_consume(todo
, p
);
441 /* If we have no submounts to process anymore and if
442 * the root is either already done, or an autofs, we
444 if (set_isempty(todo
) &&
445 (top_autofs
|| set_contains(done
, cleaned
)))
448 if (!set_contains(done
, cleaned
) &&
449 !set_contains(todo
, cleaned
)) {
450 /* The prefix directory itself is not yet a mount, make it one. */
451 if (mount(cleaned
, cleaned
, NULL
, MS_BIND
|MS_REC
, NULL
) < 0)
455 (void) get_mount_flags(cleaned
, &orig_flags
);
456 orig_flags
&= ~MS_RDONLY
;
458 if (mount(NULL
, prefix
, NULL
, orig_flags
|MS_BIND
|MS_REMOUNT
|(ro
? MS_RDONLY
: 0), NULL
) < 0)
461 log_debug("Made top-level directory %s a mount point.", prefix
);
467 r
= set_consume(done
, x
);
472 while ((x
= set_steal_first(todo
))) {
474 r
= set_consume(done
, x
);
475 if (IN_SET(r
, 0, -EEXIST
))
480 /* Deal with mount points that are obstructed by a later mount */
481 r
= path_is_mount_point(x
, NULL
, 0);
482 if (IN_SET(r
, 0, -ENOENT
))
487 /* Try to reuse the original flag set */
489 (void) get_mount_flags(x
, &orig_flags
);
490 orig_flags
&= ~MS_RDONLY
;
492 if (mount(NULL
, x
, NULL
, orig_flags
|MS_BIND
|MS_REMOUNT
|(ro
? MS_RDONLY
: 0), NULL
) < 0)
495 log_debug("Remounted %s read-only.", x
);
500 int bind_remount_recursive(const char *prefix
, bool ro
, char **blacklist
) {
501 _cleanup_fclose_
FILE *proc_self_mountinfo
= NULL
;
503 proc_self_mountinfo
= fopen("/proc/self/mountinfo", "re");
504 if (!proc_self_mountinfo
)
507 return bind_remount_recursive_with_mountinfo(prefix
, ro
, blacklist
, proc_self_mountinfo
);
510 int mount_move_root(const char *path
) {
516 if (mount(path
, "/", NULL
, MS_MOVE
, NULL
) < 0)
528 bool fstype_is_network(const char *fstype
) {
529 static const char table
[] =
541 "pvfs2\0" /* OrangeFS */
548 x
= startswith(fstype
, "fuse.");
552 return nulstr_contains(table
, fstype
);
555 bool fstype_is_api_vfs(const char *fstype
) {
556 static const char table
[] =
579 return nulstr_contains(table
, fstype
);
582 bool fstype_is_ro(const char *fstype
) {
584 /* All Linux file systems that are necessarily read-only */
586 static const char table
[] =
592 return nulstr_contains(table
, fstype
);
595 bool fstype_can_discard(const char *fstype
) {
597 static const char table
[] =
604 return nulstr_contains(table
, fstype
);
607 int repeat_unmount(const char *path
, int flags
) {
612 /* If there are multiple mounts on a mount point, this
613 * removes them all */
616 if (umount2(path
, flags
) < 0) {
628 const char* mode_to_inaccessible_node(mode_t mode
) {
629 /* This function maps a node type to the correspondent inaccessible node type.
630 * Character and block inaccessible devices may not be created (because major=0 and minor=0),
631 * in such case we map character and block devices to the inaccessible node type socket. */
632 switch(mode
& S_IFMT
) {
634 return "/run/systemd/inaccessible/reg";
636 return "/run/systemd/inaccessible/dir";
638 if (access("/run/systemd/inaccessible/chr", F_OK
) == 0)
639 return "/run/systemd/inaccessible/chr";
640 return "/run/systemd/inaccessible/sock";
642 if (access("/run/systemd/inaccessible/blk", F_OK
) == 0)
643 return "/run/systemd/inaccessible/blk";
644 return "/run/systemd/inaccessible/sock";
646 return "/run/systemd/inaccessible/fifo";
648 return "/run/systemd/inaccessible/sock";
653 #define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
654 static char* mount_flags_to_string(long unsigned flags
) {
656 _cleanup_free_
char *y
= NULL
;
657 long unsigned overflow
;
659 overflow
= flags
& ~(MS_RDONLY
|
684 if (flags
== 0 || overflow
!= 0)
685 if (asprintf(&y
, "%lx", overflow
) < 0)
688 x
= strjoin(FLAG(MS_RDONLY
),
692 FLAG(MS_SYNCHRONOUS
),
710 FLAG(MS_STRICTATIME
),
716 x
[strlen(x
) - 1] = '\0'; /* truncate the last | */
726 const char *options
) {
728 _cleanup_free_
char *fl
= NULL
;
730 fl
= mount_flags_to_string(flags
);
732 if ((flags
& MS_REMOUNT
) && !what
&& !type
)
733 log_debug("Remounting %s (%s \"%s\")...",
734 where
, strnull(fl
), strempty(options
));
735 else if (!what
&& !type
)
736 log_debug("Mounting %s (%s \"%s\")...",
737 where
, strnull(fl
), strempty(options
));
738 else if ((flags
& MS_BIND
) && !type
)
739 log_debug("Bind-mounting %s on %s (%s \"%s\")...",
740 what
, where
, strnull(fl
), strempty(options
));
741 else if (flags
& MS_MOVE
)
742 log_debug("Moving mount %s → %s (%s \"%s\")...",
743 what
, where
, strnull(fl
), strempty(options
));
745 log_debug("Mounting %s on %s (%s \"%s\")...",
746 strna(type
), where
, strnull(fl
), strempty(options
));
747 if (mount(what
, where
, type
, flags
, options
) < 0)
748 return log_full_errno(error_log_level
, errno
,
749 "Failed to mount %s on %s (%s \"%s\"): %m",
750 strna(type
), where
, strnull(fl
), strempty(options
));
754 int umount_verbose(const char *what
) {
755 log_debug("Umounting %s...", what
);
756 if (umount(what
) < 0)
757 return log_error_errno(errno
, "Failed to unmount %s: %m", what
);
761 const char *mount_propagation_flags_to_string(unsigned long flags
) {
763 switch (flags
& (MS_SHARED
|MS_SLAVE
|MS_PRIVATE
)) {
778 int mount_propagation_flags_from_string(const char *name
, unsigned long *ret
) {
782 else if (streq(name
, "shared"))
784 else if (streq(name
, "slave"))
786 else if (streq(name
, "private"))