]>
git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/mountpoint-util.c
681da7402457c2508fb85bd35fe9dcb4439ee573
1 /* SPDX-License-Identifier: LGPL-2.1+ */
7 #include "alloc-util.h"
11 #include "missing_stat.h"
12 #include "missing_syscall.h"
13 #include "mountpoint-util.h"
14 #include "parse-util.h"
15 #include "path-util.h"
16 #include "stat-util.h"
17 #include "stdio-util.h"
20 /* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
21 * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
22 * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
23 * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
24 * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
25 * with large file handles anyway. */
26 #define ORIGINAL_MAX_HANDLE_SZ 128
28 int name_to_handle_at_loop (
31 struct file_handle
** ret_handle
,
35 _cleanup_free_
struct file_handle
* h
= NULL
;
36 size_t n
= ORIGINAL_MAX_HANDLE_SZ
;
38 assert (( flags
& ~( AT_SYMLINK_FOLLOW
| AT_EMPTY_PATH
)) == 0 );
40 /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
41 * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
42 * start value, it is not an upper bound on the buffer size required.
44 * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
45 * as NULL if there's no interest in either. */
50 h
= malloc0 ( offsetof ( struct file_handle
, f_handle
) + n
);
56 if ( name_to_handle_at ( fd
, path
, h
, & mnt_id
, flags
) >= 0 ) {
59 * ret_handle
= TAKE_PTR ( h
);
66 if ( errno
!= EOVERFLOW
)
69 if (! ret_handle
&& ret_mnt_id
&& mnt_id
>= 0 ) {
71 /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
72 * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
73 * be filled in, and the caller was interested in only the mount ID an nothing else. */
79 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
80 * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
81 * buffer. In that case propagate EOVERFLOW */
82 if ( h
-> handle_bytes
<= n
)
85 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
87 if ( offsetof ( struct file_handle
, f_handle
) + n
< n
) /* check for addition overflow */
94 static int fd_fdinfo_mnt_id ( int fd
, const char * filename
, int flags
, int * ret_mnt_id
) {
95 char path
[ STRLEN ( "/proc/self/fdinfo/" ) + DECIMAL_STR_MAX ( int )];
96 _cleanup_free_
char * fdinfo
= NULL
;
97 _cleanup_close_
int subfd
= - 1 ;
102 assert (( flags
& ~( AT_SYMLINK_FOLLOW
| AT_EMPTY_PATH
)) == 0 );
104 if (( flags
& AT_EMPTY_PATH
) && isempty ( filename
))
105 xsprintf ( path
, "/proc/self/fdinfo/%i" , fd
);
107 subfd
= openat ( fd
, filename
, O_CLOEXEC
| O_PATH
|( flags
& AT_SYMLINK_FOLLOW
? 0 : O_NOFOLLOW
));
111 xsprintf ( path
, "/proc/self/fdinfo/%i" , subfd
);
114 r
= read_full_file ( path
, & fdinfo
, NULL
);
115 if ( r
== - ENOENT
) /* The fdinfo directory is a relatively new addition */
120 p
= startswith ( fdinfo
, "mnt_id:" );
122 p
= strstr ( fdinfo
, " \n mnt_id:" );
123 if (! p
) /* The mnt_id field is a relatively new addition */
129 p
+= strspn ( p
, WHITESPACE
);
130 p
[ strcspn ( p
, WHITESPACE
)] = 0 ;
132 return safe_atoi ( p
, ret_mnt_id
);
135 int fd_is_mount_point ( int fd
, const char * filename
, int flags
) {
136 _cleanup_free_
struct file_handle
* h
= NULL
, * h_parent
= NULL
;
137 int mount_id
= - 1 , mount_id_parent
= - 1 ;
138 bool nosupp
= false , check_st_dev
= true ;
139 STRUCT_STATX_DEFINE ( sx
);
145 assert (( flags
& ~( AT_SYMLINK_FOLLOW
| AT_EMPTY_PATH
)) == 0 );
147 /* First we will try statx()' STATX_ATTR_MOUNT_ROOT attribute, which is our ideal API, available
150 * If that fails, our second try is the name_to_handle_at() syscall, which tells us the mount id and
151 * an opaque file "handle". It is not supported everywhere though (kernel compile-time option, not
152 * all file systems are hooked up). If it works the mount id is usually good enough to tell us
153 * whether something is a mount point.
155 * If that didn't work we will try to read the mount id from /proc/self/fdinfo/<fd>. This is almost
156 * as good as name_to_handle_at(), however, does not return the opaque file handle. The opaque file
157 * handle is pretty useful to detect the root directory, which we should always consider a mount
158 * point. Hence we use this only as fallback. Exporting the mnt_id in fdinfo is a pretty recent
161 * As last fallback we do traditional fstat() based st_dev comparisons. This is how things were
162 * traditionally done, but unionfs breaks this since it exposes file systems with a variety of st_dev
163 * reported. Also, btrfs subvolumes have different st_dev, even though they aren't real mounts of
166 if ( statx ( fd
, filename
, ( FLAGS_SET ( flags
, AT_SYMLINK_FOLLOW
) ? 0 : AT_SYMLINK_NOFOLLOW
) |
167 ( flags
& AT_EMPTY_PATH
) |
168 AT_NO_AUTOMOUNT
, 0 , & sx
) < 0 ) {
169 if (! ERRNO_IS_NOT_SUPPORTED ( errno
) && ! ERRNO_IS_PRIVILEGE ( errno
))
172 /* If statx() is not available or forbidden, fall back to name_to_handle_at() below */
173 } else if ( FLAGS_SET ( sx
. stx_attributes_mask
, STATX_ATTR_MOUNT_ROOT
)) /* yay! */
174 return FLAGS_SET ( sx
. stx_attributes
, STATX_ATTR_MOUNT_ROOT
);
176 r
= name_to_handle_at_loop ( fd
, filename
, & h
, & mount_id
, flags
);
177 if ( IN_SET ( r
, - ENOSYS
, - EACCES
, - EPERM
, - EOVERFLOW
, - EINVAL
))
178 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
179 * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
180 * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
181 * (EINVAL): fall back to simpler logic. */
182 goto fallback_fdinfo
;
183 else if ( r
== - EOPNOTSUPP
)
184 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
185 * supports it (in which case it is a mount point), otherwise fall back to the traditional stat()
191 r
= name_to_handle_at_loop ( fd
, "" , & h_parent
, & mount_id_parent
, AT_EMPTY_PATH
);
192 if ( r
== - EOPNOTSUPP
) {
194 /* Neither parent nor child do name_to_handle_at()? We have no choice but to fall back. */
195 goto fallback_fdinfo
;
197 /* The parent can't do name_to_handle_at() but the directory we are interested in can? If so,
198 * it must be a mount point. */
203 /* The parent can do name_to_handle_at() but the
204 * directory we are interested in can't? If so, it
205 * must be a mount point. */
209 /* If the file handle for the directory we are
210 * interested in and its parent are identical, we
211 * assume this is the root directory, which is a mount
214 if ( h
-> handle_bytes
== h_parent
-> handle_bytes
&&
215 h
-> handle_type
== h_parent
-> handle_type
&&
216 memcmp ( h
-> f_handle
, h_parent
-> f_handle
, h
-> handle_bytes
) == 0 )
219 return mount_id
!= mount_id_parent
;
222 r
= fd_fdinfo_mnt_id ( fd
, filename
, flags
, & mount_id
);
223 if ( IN_SET ( r
, - EOPNOTSUPP
, - EACCES
, - EPERM
))
228 r
= fd_fdinfo_mnt_id ( fd
, "" , AT_EMPTY_PATH
, & mount_id_parent
);
232 if ( mount_id
!= mount_id_parent
)
235 /* Hmm, so, the mount ids are the same. This leaves one
236 * special case though for the root file system. For that,
237 * let's see if the parent directory has the same inode as we
238 * are interested in. Hence, let's also do fstat() checks now,
239 * too, but avoid the st_dev comparisons, since they aren't
240 * that useful on unionfs mounts. */
241 check_st_dev
= false ;
244 /* yay for fstatat() taking a different set of flags than the other
246 if ( flags
& AT_SYMLINK_FOLLOW
)
247 flags
&= ~ AT_SYMLINK_FOLLOW
;
249 flags
|= AT_SYMLINK_NOFOLLOW
;
250 if ( fstatat ( fd
, filename
, & a
, flags
) < 0 )
253 if ( fstatat ( fd
, "" , & b
, AT_EMPTY_PATH
) < 0 )
256 /* A directory with same device and inode as its parent? Must
257 * be the root directory */
258 if ( a
. st_dev
== b
. st_dev
&&
259 a
. st_ino
== b
. st_ino
)
262 return check_st_dev
&& ( a
. st_dev
!= b
. st_dev
);
265 /* flags can be AT_SYMLINK_FOLLOW or 0 */
266 int path_is_mount_point ( const char * t
, const char * root
, int flags
) {
267 _cleanup_free_
char * canonical
= NULL
;
268 _cleanup_close_
int fd
= - 1 ;
272 assert (( flags
& ~ AT_SYMLINK_FOLLOW
) == 0 );
274 if ( path_equal ( t
, "/" ))
277 /* we need to resolve symlinks manually, we can't just rely on
278 * fd_is_mount_point() to do that for us; if we have a structure like
279 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
280 * look at needs to be /usr, not /. */
281 if ( flags
& AT_SYMLINK_FOLLOW
) {
282 r
= chase_symlinks ( t
, root
, CHASE_TRAIL_SLASH
, & canonical
, NULL
);
289 fd
= open_parent ( t
, O_PATH
| O_CLOEXEC
, 0 );
293 return fd_is_mount_point ( fd
, last_path_component ( t
), flags
);
296 int path_get_mnt_id ( const char * path
, int * ret
) {
297 STRUCT_NEW_STATX_DEFINE ( buf
);
300 if ( statx ( AT_FDCWD
, path
, AT_SYMLINK_NOFOLLOW
| AT_NO_AUTOMOUNT
, STATX_MNT_ID
, & buf
. sx
) < 0 ) {
301 if (! ERRNO_IS_NOT_SUPPORTED ( errno
) && ! ERRNO_IS_PRIVILEGE ( errno
))
304 /* Fall back to name_to_handle_at() and then fdinfo if statx is not supported or we lack
307 } else if ( FLAGS_SET ( buf
. nsx
. stx_mask
, STATX_MNT_ID
)) {
308 * ret
= buf
. nsx
. stx_mnt_id
;
312 r
= name_to_handle_at_loop ( AT_FDCWD
, path
, NULL
, ret
, 0 );
313 if ( IN_SET ( r
, - EOPNOTSUPP
, - ENOSYS
, - EACCES
, - EPERM
, - EOVERFLOW
, - EINVAL
)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
314 return fd_fdinfo_mnt_id ( AT_FDCWD
, path
, 0 , ret
);
319 bool fstype_is_network ( const char * fstype
) {
322 x
= startswith ( fstype
, "fuse." );
326 return STR_IN_SET ( fstype
,
340 "pvfs2" , /* OrangeFS */
346 bool fstype_is_api_vfs ( const char * fstype
) {
347 return STR_IN_SET ( fstype
,
370 bool fstype_is_blockdev_backed ( const char * fstype
) {
373 x
= startswith ( fstype
, "fuse." );
377 return ! streq ( fstype
, "9p" ) && ! fstype_is_network ( fstype
) && ! fstype_is_api_vfs ( fstype
);
380 bool fstype_is_ro ( const char * fstype
) {
381 /* All Linux file systems that are necessarily read-only */
382 return STR_IN_SET ( fstype
,
388 bool fstype_can_discard ( const char * fstype
) {
389 return STR_IN_SET ( fstype
,
396 bool fstype_can_uid_gid ( const char * fstype
) {
398 /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
399 * current and future. */
401 return STR_IN_SET ( fstype
,
413 int dev_is_devtmpfs ( void ) {
414 _cleanup_fclose_
FILE * proc_self_mountinfo
= NULL
;
418 r
= path_get_mnt_id ( "/dev" , & mount_id
);
422 r
= fopen_unlocked ( "/proc/self/mountinfo" , "re" , & proc_self_mountinfo
);
427 _cleanup_free_
char * line
= NULL
;
430 r
= read_line ( proc_self_mountinfo
, LONG_LINE_MAX
, & line
);
436 if ( sscanf ( line
, "%i" , & mid
) != 1 )
442 e
= strstr ( line
, " - " );
446 /* accept any name that starts with the currently expected type */
447 if ( startswith ( e
+ 3 , "devtmpfs" ))
454 const char * mount_propagation_flags_to_string ( unsigned long flags
) {
456 switch ( flags
& ( MS_SHARED
| MS_SLAVE
| MS_PRIVATE
)) {
470 int mount_propagation_flags_from_string ( const char * name
, unsigned long * ret
) {
474 else if ( streq ( name
, "shared" ))
476 else if ( streq ( name
, "slave" ))
478 else if ( streq ( name
, "private" ))