]>
git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/mountpoint-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
7 #include "alloc-util.h"
12 #include "mountpoint-util.h"
13 #include "parse-util.h"
14 #include "path-util.h"
15 #include "stdio-util.h"
18 /* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
19 * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
20 * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
21 * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
22 * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
23 * with large file handles anyway. */
24 #define ORIGINAL_MAX_HANDLE_SZ 128
26 int name_to_handle_at_loop (
29 struct file_handle
** ret_handle
,
33 _cleanup_free_
struct file_handle
* h
= NULL
;
34 size_t n
= ORIGINAL_MAX_HANDLE_SZ
;
36 /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
37 * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
38 * start value, it is not an upper bound on the buffer size required.
40 * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
41 * as NULL if there's no interest in either. */
46 h
= malloc0 ( offsetof ( struct file_handle
, f_handle
) + n
);
52 if ( name_to_handle_at ( fd
, path
, h
, & mnt_id
, flags
) >= 0 ) {
55 * ret_handle
= TAKE_PTR ( h
);
62 if ( errno
!= EOVERFLOW
)
65 if (! ret_handle
&& ret_mnt_id
&& mnt_id
>= 0 ) {
67 /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
68 * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
69 * be filled in, and the caller was interested in only the mount ID an nothing else. */
75 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
76 * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
77 * buffer. In that case propagate EOVERFLOW */
78 if ( h
-> handle_bytes
<= n
)
81 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
83 if ( offsetof ( struct file_handle
, f_handle
) + n
< n
) /* check for addition overflow */
90 static int fd_fdinfo_mnt_id ( int fd
, const char * filename
, int flags
, int * mnt_id
) {
91 char path
[ STRLEN ( "/proc/self/fdinfo/" ) + DECIMAL_STR_MAX ( int )];
92 _cleanup_free_
char * fdinfo
= NULL
;
93 _cleanup_close_
int subfd
= - 1 ;
97 if (( flags
& AT_EMPTY_PATH
) && isempty ( filename
))
98 xsprintf ( path
, "/proc/self/fdinfo/%i" , fd
);
100 subfd
= openat ( fd
, filename
, O_CLOEXEC
| O_PATH
|( flags
& AT_SYMLINK_FOLLOW
? 0 : O_NOFOLLOW
));
104 xsprintf ( path
, "/proc/self/fdinfo/%i" , subfd
);
107 r
= read_full_file ( path
, & fdinfo
, NULL
);
108 if ( r
== - ENOENT
) /* The fdinfo directory is a relatively new addition */
113 p
= startswith ( fdinfo
, "mnt_id:" );
115 p
= strstr ( fdinfo
, " \n mnt_id:" );
116 if (! p
) /* The mnt_id field is a relatively new addition */
122 p
+= strspn ( p
, WHITESPACE
);
123 p
[ strcspn ( p
, WHITESPACE
)] = 0 ;
125 return safe_atoi ( p
, mnt_id
);
128 int fd_is_mount_point ( int fd
, const char * filename
, int flags
) {
129 _cleanup_free_
struct file_handle
* h
= NULL
, * h_parent
= NULL
;
130 int mount_id
= - 1 , mount_id_parent
= - 1 ;
131 bool nosupp
= false , check_st_dev
= true ;
138 /* First we will try the name_to_handle_at() syscall, which
139 * tells us the mount id and an opaque file "handle". It is
140 * not supported everywhere though (kernel compile-time
141 * option, not all file systems are hooked up). If it works
142 * the mount id is usually good enough to tell us whether
143 * something is a mount point.
145 * If that didn't work we will try to read the mount id from
146 * /proc/self/fdinfo/<fd>. This is almost as good as
147 * name_to_handle_at(), however, does not return the
148 * opaque file handle. The opaque file handle is pretty useful
149 * to detect the root directory, which we should always
150 * consider a mount point. Hence we use this only as
151 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
154 * As last fallback we do traditional fstat() based st_dev
155 * comparisons. This is how things were traditionally done,
156 * but unionfs breaks this since it exposes file
157 * systems with a variety of st_dev reported. Also, btrfs
158 * subvolumes have different st_dev, even though they aren't
159 * real mounts of their own. */
161 r
= name_to_handle_at_loop ( fd
, filename
, & h
, & mount_id
, flags
);
162 if ( IN_SET ( r
, - ENOSYS
, - EACCES
, - EPERM
, - EOVERFLOW
, - EINVAL
))
163 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
164 * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
165 * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
166 * (EINVAL): fall back to simpler logic. */
167 goto fallback_fdinfo
;
168 else if ( r
== - EOPNOTSUPP
)
169 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
170 * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
176 r
= name_to_handle_at_loop ( fd
, "" , & h_parent
, & mount_id_parent
, AT_EMPTY_PATH
);
177 if ( r
== - EOPNOTSUPP
) {
179 /* Neither parent nor child do name_to_handle_at()? We have no choice but to fall back. */
180 goto fallback_fdinfo
;
182 /* The parent can't do name_to_handle_at() but the directory we are interested in can? If so,
183 * it must be a mount point. */
188 /* The parent can do name_to_handle_at() but the
189 * directory we are interested in can't? If so, it
190 * must be a mount point. */
194 /* If the file handle for the directory we are
195 * interested in and its parent are identical, we
196 * assume this is the root directory, which is a mount
199 if ( h
-> handle_bytes
== h_parent
-> handle_bytes
&&
200 h
-> handle_type
== h_parent
-> handle_type
&&
201 memcmp ( h
-> f_handle
, h_parent
-> f_handle
, h
-> handle_bytes
) == 0 )
204 return mount_id
!= mount_id_parent
;
207 r
= fd_fdinfo_mnt_id ( fd
, filename
, flags
, & mount_id
);
208 if ( IN_SET ( r
, - EOPNOTSUPP
, - EACCES
, - EPERM
))
213 r
= fd_fdinfo_mnt_id ( fd
, "" , AT_EMPTY_PATH
, & mount_id_parent
);
217 if ( mount_id
!= mount_id_parent
)
220 /* Hmm, so, the mount ids are the same. This leaves one
221 * special case though for the root file system. For that,
222 * let's see if the parent directory has the same inode as we
223 * are interested in. Hence, let's also do fstat() checks now,
224 * too, but avoid the st_dev comparisons, since they aren't
225 * that useful on unionfs mounts. */
226 check_st_dev
= false ;
229 /* yay for fstatat() taking a different set of flags than the other
231 if ( flags
& AT_SYMLINK_FOLLOW
)
232 flags
&= ~ AT_SYMLINK_FOLLOW
;
234 flags
|= AT_SYMLINK_NOFOLLOW
;
235 if ( fstatat ( fd
, filename
, & a
, flags
) < 0 )
238 if ( fstatat ( fd
, "" , & b
, AT_EMPTY_PATH
) < 0 )
241 /* A directory with same device and inode as its parent? Must
242 * be the root directory */
243 if ( a
. st_dev
== b
. st_dev
&&
244 a
. st_ino
== b
. st_ino
)
247 return check_st_dev
&& ( a
. st_dev
!= b
. st_dev
);
250 /* flags can be AT_SYMLINK_FOLLOW or 0 */
251 int path_is_mount_point ( const char * t
, const char * root
, int flags
) {
252 _cleanup_free_
char * canonical
= NULL
;
253 _cleanup_close_
int fd
= - 1 ;
257 assert (( flags
& ~ AT_SYMLINK_FOLLOW
) == 0 );
259 if ( path_equal ( t
, "/" ))
262 /* we need to resolve symlinks manually, we can't just rely on
263 * fd_is_mount_point() to do that for us; if we have a structure like
264 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
265 * look at needs to be /usr, not /. */
266 if ( flags
& AT_SYMLINK_FOLLOW
) {
267 r
= chase_symlinks ( t
, root
, CHASE_TRAIL_SLASH
, & canonical
);
274 fd
= open_parent ( t
, O_PATH
| O_CLOEXEC
, 0 );
278 return fd_is_mount_point ( fd
, last_path_component ( t
), flags
);
281 int path_get_mnt_id ( const char * path
, int * ret
) {
284 r
= name_to_handle_at_loop ( AT_FDCWD
, path
, NULL
, ret
, 0 );
285 if ( IN_SET ( r
, - EOPNOTSUPP
, - ENOSYS
, - EACCES
, - EPERM
, - EOVERFLOW
, - EINVAL
)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
286 return fd_fdinfo_mnt_id ( AT_FDCWD
, path
, 0 , ret
);
291 bool fstype_is_network ( const char * fstype
) {
294 x
= startswith ( fstype
, "fuse." );
298 return STR_IN_SET ( fstype
,
310 "pvfs2" , /* OrangeFS */
315 bool fstype_is_api_vfs ( const char * fstype
) {
316 return STR_IN_SET ( fstype
,
339 bool fstype_is_ro ( const char * fstype
) {
340 /* All Linux file systems that are necessarily read-only */
341 return STR_IN_SET ( fstype
,
347 bool fstype_can_discard ( const char * fstype
) {
348 return STR_IN_SET ( fstype
,
355 bool fstype_can_uid_gid ( const char * fstype
) {
357 /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
358 * current and future. */
360 return STR_IN_SET ( fstype
,
371 int dev_is_devtmpfs ( void ) {
372 _cleanup_fclose_
FILE * proc_self_mountinfo
= NULL
;
376 r
= path_get_mnt_id ( "/dev" , & mount_id
);
380 r
= fopen_unlocked ( "/proc/self/mountinfo" , "re" , & proc_self_mountinfo
);
385 _cleanup_free_
char * line
= NULL
;
388 r
= read_line ( proc_self_mountinfo
, LONG_LINE_MAX
, & line
);
394 if ( sscanf ( line
, "%i" , & mid
) != 1 )
400 e
= strstr ( line
, " - " );
404 /* accept any name that starts with the currently expected type */
405 if ( startswith ( e
+ 3 , "devtmpfs" ))
412 const char * mount_propagation_flags_to_string ( unsigned long flags
) {
414 switch ( flags
& ( MS_SHARED
| MS_SLAVE
| MS_PRIVATE
)) {
428 int mount_propagation_flags_from_string ( const char * name
, unsigned long * ret
) {
432 else if ( streq ( name
, "shared" ))
434 else if ( streq ( name
, "slave" ))
436 else if ( streq ( name
, "private" ))