1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <sys/mount.h>
24 #include <sys/statvfs.h>
29 #include "mount-util.h"
30 #include "parse-util.h"
31 #include "path-util.h"
33 #include "stdio-util.h"
34 #include "string-util.h"
37 static int fd_fdinfo_mnt_id(int fd
, const char *filename
, int flags
, int *mnt_id
) {
38 char path
[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
39 _cleanup_free_
char *fdinfo
= NULL
;
40 _cleanup_close_
int subfd
= -1;
44 if ((flags
& AT_EMPTY_PATH
) && isempty(filename
))
45 xsprintf(path
, "/proc/self/fdinfo/%i", fd
);
47 subfd
= openat(fd
, filename
, O_RDONLY
|O_CLOEXEC
|O_NOCTTY
|O_PATH
);
51 xsprintf(path
, "/proc/self/fdinfo/%i", subfd
);
54 r
= read_full_file(path
, &fdinfo
, NULL
);
55 if (r
== -ENOENT
) /* The fdinfo directory is a relatively new addition */
60 p
= startswith(fdinfo
, "mnt_id:");
62 p
= strstr(fdinfo
, "\nmnt_id:");
63 if (!p
) /* The mnt_id field is a relatively new addition */
69 p
+= strspn(p
, WHITESPACE
);
70 p
[strcspn(p
, WHITESPACE
)] = 0;
72 return safe_atoi(p
, mnt_id
);
76 int fd_is_mount_point(int fd
, const char *filename
, int flags
) {
77 union file_handle_union h
= FILE_HANDLE_INIT
, h_parent
= FILE_HANDLE_INIT
;
78 int mount_id
= -1, mount_id_parent
= -1;
79 bool nosupp
= false, check_st_dev
= true;
86 /* First we will try the name_to_handle_at() syscall, which
87 * tells us the mount id and an opaque file "handle". It is
88 * not supported everywhere though (kernel compile-time
89 * option, not all file systems are hooked up). If it works
90 * the mount id is usually good enough to tell us whether
91 * something is a mount point.
93 * If that didn't work we will try to read the mount id from
94 * /proc/self/fdinfo/<fd>. This is almost as good as
95 * name_to_handle_at(), however, does not return the
96 * opaque file handle. The opaque file handle is pretty useful
97 * to detect the root directory, which we should always
98 * consider a mount point. Hence we use this only as
99 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
102 * As last fallback we do traditional fstat() based st_dev
103 * comparisons. This is how things were traditionally done,
104 * but unionfs breaks breaks this since it exposes file
105 * systems with a variety of st_dev reported. Also, btrfs
106 * subvolumes have different st_dev, even though they aren't
107 * real mounts of their own. */
109 r
= name_to_handle_at(fd
, filename
, &h
.handle
, &mount_id
, flags
);
112 /* This kernel does not support name_to_handle_at()
113 * fall back to simpler logic. */
114 goto fallback_fdinfo
;
115 else if (errno
== EOPNOTSUPP
)
116 /* This kernel or file system does not support
117 * name_to_handle_at(), hence let's see if the
118 * upper fs supports it (in which case it is a
119 * mount point), otherwise fallback to the
120 * traditional stat() logic */
126 r
= name_to_handle_at(fd
, "", &h_parent
.handle
, &mount_id_parent
, AT_EMPTY_PATH
);
128 if (errno
== EOPNOTSUPP
) {
130 /* Neither parent nor child do name_to_handle_at()?
131 We have no choice but to fall back. */
132 goto fallback_fdinfo
;
134 /* The parent can't do name_to_handle_at() but the
135 * directory we are interested in can?
136 * If so, it must be a mount point. */
142 /* The parent can do name_to_handle_at() but the
143 * directory we are interested in can't? If so, it
144 * must be a mount point. */
148 /* If the file handle for the directory we are
149 * interested in and its parent are identical, we
150 * assume this is the root directory, which is a mount
153 if (h
.handle
.handle_bytes
== h_parent
.handle
.handle_bytes
&&
154 h
.handle
.handle_type
== h_parent
.handle
.handle_type
&&
155 memcmp(h
.handle
.f_handle
, h_parent
.handle
.f_handle
, h
.handle
.handle_bytes
) == 0)
158 return mount_id
!= mount_id_parent
;
161 r
= fd_fdinfo_mnt_id(fd
, filename
, flags
, &mount_id
);
162 if (r
== -EOPNOTSUPP
)
167 r
= fd_fdinfo_mnt_id(fd
, "", AT_EMPTY_PATH
, &mount_id_parent
);
171 if (mount_id
!= mount_id_parent
)
174 /* Hmm, so, the mount ids are the same. This leaves one
175 * special case though for the root file system. For that,
176 * let's see if the parent directory has the same inode as we
177 * are interested in. Hence, let's also do fstat() checks now,
178 * too, but avoid the st_dev comparisons, since they aren't
179 * that useful on unionfs mounts. */
180 check_st_dev
= false;
183 /* yay for fstatat() taking a different set of flags than the other
185 if (flags
& AT_SYMLINK_FOLLOW
)
186 flags
&= ~AT_SYMLINK_FOLLOW
;
188 flags
|= AT_SYMLINK_NOFOLLOW
;
189 if (fstatat(fd
, filename
, &a
, flags
) < 0)
192 if (fstatat(fd
, "", &b
, AT_EMPTY_PATH
) < 0)
195 /* A directory with same device and inode as its parent? Must
196 * be the root directory */
197 if (a
.st_dev
== b
.st_dev
&&
198 a
.st_ino
== b
.st_ino
)
201 return check_st_dev
&& (a
.st_dev
!= b
.st_dev
);
204 /* flags can be AT_SYMLINK_FOLLOW or 0 */
205 int path_is_mount_point(const char *t
, int flags
) {
206 _cleanup_close_
int fd
= -1;
207 _cleanup_free_
char *canonical
= NULL
, *parent
= NULL
;
211 if (path_equal(t
, "/"))
214 /* we need to resolve symlinks manually, we can't just rely on
215 * fd_is_mount_point() to do that for us; if we have a structure like
216 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
217 * look at needs to be /usr, not /. */
218 if (flags
& AT_SYMLINK_FOLLOW
) {
219 canonical
= canonicalize_file_name(t
);
226 parent
= dirname_malloc(t
);
230 fd
= openat(AT_FDCWD
, parent
, O_RDONLY
|O_NONBLOCK
|O_DIRECTORY
|O_CLOEXEC
|O_PATH
);
234 return fd_is_mount_point(fd
, basename(t
), flags
);
237 int umount_recursive(const char *prefix
, int flags
) {
241 /* Try to umount everything recursively below a
242 * directory. Also, take care of stacked mounts, and keep
243 * unmounting them until they are gone. */
246 _cleanup_fclose_
FILE *proc_self_mountinfo
= NULL
;
251 proc_self_mountinfo
= fopen("/proc/self/mountinfo", "re");
252 if (!proc_self_mountinfo
)
256 _cleanup_free_
char *path
= NULL
, *p
= NULL
;
259 k
= fscanf(proc_self_mountinfo
,
260 "%*s " /* (1) mount id */
261 "%*s " /* (2) parent id */
262 "%*s " /* (3) major:minor */
263 "%*s " /* (4) root */
264 "%ms " /* (5) mount point */
265 "%*s" /* (6) mount options */
266 "%*[^-]" /* (7) optional fields */
267 "- " /* (8) separator */
268 "%*s " /* (9) file system type */
269 "%*s" /* (10) mount source */
270 "%*s" /* (11) mount options 2 */
271 "%*[^\n]", /* some rubbish at the end */
280 r
= cunescape(path
, UNESCAPE_RELAX
, &p
);
284 if (!path_startswith(p
, prefix
))
287 if (umount2(p
, flags
) < 0) {
303 static int get_mount_flags(const char *path
, unsigned long *flags
) {
306 if (statvfs(path
, &buf
) < 0)
312 int bind_remount_recursive(const char *prefix
, bool ro
) {
313 _cleanup_set_free_free_ Set
*done
= NULL
;
314 _cleanup_free_
char *cleaned
= NULL
;
317 /* Recursively remount a directory (and all its submounts)
318 * read-only or read-write. If the directory is already
319 * mounted, we reuse the mount and simply mark it
320 * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
321 * operation). If it isn't we first make it one. Afterwards we
322 * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all
323 * submounts we can access, too. When mounts are stacked on
324 * the same mount point we only care for each individual
325 * "top-level" mount on each point, as we cannot
326 * influence/access the underlying mounts anyway. We do not
327 * have any effect on future submounts that might get
328 * propagated, they migt be writable. This includes future
329 * submounts that have been triggered via autofs. */
331 cleaned
= strdup(prefix
);
335 path_kill_slashes(cleaned
);
337 done
= set_new(&string_hash_ops
);
342 _cleanup_fclose_
FILE *proc_self_mountinfo
= NULL
;
343 _cleanup_set_free_free_ Set
*todo
= NULL
;
344 bool top_autofs
= false;
346 unsigned long orig_flags
;
348 todo
= set_new(&string_hash_ops
);
352 proc_self_mountinfo
= fopen("/proc/self/mountinfo", "re");
353 if (!proc_self_mountinfo
)
357 _cleanup_free_
char *path
= NULL
, *p
= NULL
, *type
= NULL
;
360 k
= fscanf(proc_self_mountinfo
,
361 "%*s " /* (1) mount id */
362 "%*s " /* (2) parent id */
363 "%*s " /* (3) major:minor */
364 "%*s " /* (4) root */
365 "%ms " /* (5) mount point */
366 "%*s" /* (6) mount options (superblock) */
367 "%*[^-]" /* (7) optional fields */
368 "- " /* (8) separator */
369 "%ms " /* (9) file system type */
370 "%*s" /* (10) mount source */
371 "%*s" /* (11) mount options (bind mount) */
372 "%*[^\n]", /* some rubbish at the end */
382 r
= cunescape(path
, UNESCAPE_RELAX
, &p
);
386 /* Let's ignore autofs mounts. If they aren't
387 * triggered yet, we want to avoid triggering
388 * them, as we don't make any guarantees for
389 * future submounts anyway. If they are
390 * already triggered, then we will find
391 * another entry for this. */
392 if (streq(type
, "autofs")) {
393 top_autofs
= top_autofs
|| path_equal(cleaned
, p
);
397 if (path_startswith(p
, cleaned
) &&
398 !set_contains(done
, p
)) {
400 r
= set_consume(todo
, p
);
410 /* If we have no submounts to process anymore and if
411 * the root is either already done, or an autofs, we
413 if (set_isempty(todo
) &&
414 (top_autofs
|| set_contains(done
, cleaned
)))
417 if (!set_contains(done
, cleaned
) &&
418 !set_contains(todo
, cleaned
)) {
419 /* The prefix directory itself is not yet a
420 * mount, make it one. */
421 if (mount(cleaned
, cleaned
, NULL
, MS_BIND
|MS_REC
, NULL
) < 0)
425 (void) get_mount_flags(cleaned
, &orig_flags
);
426 orig_flags
&= ~MS_RDONLY
;
428 if (mount(NULL
, prefix
, NULL
, orig_flags
|MS_BIND
|MS_REMOUNT
|(ro
? MS_RDONLY
: 0), NULL
) < 0)
435 r
= set_consume(done
, x
);
440 while ((x
= set_steal_first(todo
))) {
442 r
= set_consume(done
, x
);
443 if (r
== -EEXIST
|| r
== 0)
448 /* Try to reuse the original flag set, but
449 * don't care for errors, in case of
450 * obstructed mounts */
452 (void) get_mount_flags(x
, &orig_flags
);
453 orig_flags
&= ~MS_RDONLY
;
455 if (mount(NULL
, x
, NULL
, orig_flags
|MS_BIND
|MS_REMOUNT
|(ro
? MS_RDONLY
: 0), NULL
) < 0) {
457 /* Deal with mount points that are
458 * obstructed by a later mount */
468 int mount_move_root(const char *path
) {
474 if (mount(path
, "/", NULL
, MS_MOVE
, NULL
) < 0)