]>
git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/mountpoint-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
8 #include "alloc-util.h"
13 #include "mountpoint-util.h"
14 #include "parse-util.h"
15 #include "path-util.h"
16 #include "stdio-util.h"
19 /* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
20 * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
21 * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
22 * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
23 * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
24 * with large file handles anyway. */
25 #define ORIGINAL_MAX_HANDLE_SZ 128
27 int name_to_handle_at_loop (
30 struct file_handle
** ret_handle
,
34 _cleanup_free_
struct file_handle
* h
= NULL
;
35 size_t n
= ORIGINAL_MAX_HANDLE_SZ
;
37 /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
38 * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
39 * start value, it is not an upper bound on the buffer size required.
41 * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
42 * as NULL if there's no interest in either. */
47 h
= malloc0 ( offsetof ( struct file_handle
, f_handle
) + n
);
53 if ( name_to_handle_at ( fd
, path
, h
, & mnt_id
, flags
) >= 0 ) {
56 * ret_handle
= TAKE_PTR ( h
);
63 if ( errno
!= EOVERFLOW
)
66 if (! ret_handle
&& ret_mnt_id
&& mnt_id
>= 0 ) {
68 /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
69 * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
70 * be filled in, and the caller was interested in only the mount ID an nothing else. */
76 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
77 * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
78 * buffer. In that case propagate EOVERFLOW */
79 if ( h
-> handle_bytes
<= n
)
82 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
84 if ( offsetof ( struct file_handle
, f_handle
) + n
< n
) /* check for addition overflow */
91 static int fd_fdinfo_mnt_id ( int fd
, const char * filename
, int flags
, int * mnt_id
) {
92 char path
[ STRLEN ( "/proc/self/fdinfo/" ) + DECIMAL_STR_MAX ( int )];
93 _cleanup_free_
char * fdinfo
= NULL
;
94 _cleanup_close_
int subfd
= - 1 ;
98 if (( flags
& AT_EMPTY_PATH
) && isempty ( filename
))
99 xsprintf ( path
, "/proc/self/fdinfo/%i" , fd
);
101 subfd
= openat ( fd
, filename
, O_CLOEXEC
| O_PATH
|( flags
& AT_SYMLINK_FOLLOW
? 0 : O_NOFOLLOW
));
105 xsprintf ( path
, "/proc/self/fdinfo/%i" , subfd
);
108 r
= read_full_file ( path
, & fdinfo
, NULL
);
109 if ( r
== - ENOENT
) /* The fdinfo directory is a relatively new addition */
114 p
= startswith ( fdinfo
, "mnt_id:" );
116 p
= strstr ( fdinfo
, " \n mnt_id:" );
117 if (! p
) /* The mnt_id field is a relatively new addition */
123 p
+= strspn ( p
, WHITESPACE
);
124 p
[ strcspn ( p
, WHITESPACE
)] = 0 ;
126 return safe_atoi ( p
, mnt_id
);
129 int fd_is_mount_point ( int fd
, const char * filename
, int flags
) {
130 _cleanup_free_
struct file_handle
* h
= NULL
, * h_parent
= NULL
;
131 int mount_id
= - 1 , mount_id_parent
= - 1 ;
132 bool nosupp
= false , check_st_dev
= true ;
139 /* First we will try the name_to_handle_at() syscall, which
140 * tells us the mount id and an opaque file "handle". It is
141 * not supported everywhere though (kernel compile-time
142 * option, not all file systems are hooked up). If it works
143 * the mount id is usually good enough to tell us whether
144 * something is a mount point.
146 * If that didn't work we will try to read the mount id from
147 * /proc/self/fdinfo/<fd>. This is almost as good as
148 * name_to_handle_at(), however, does not return the
149 * opaque file handle. The opaque file handle is pretty useful
150 * to detect the root directory, which we should always
151 * consider a mount point. Hence we use this only as
152 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
155 * As last fallback we do traditional fstat() based st_dev
156 * comparisons. This is how things were traditionally done,
157 * but unionfs breaks this since it exposes file
158 * systems with a variety of st_dev reported. Also, btrfs
159 * subvolumes have different st_dev, even though they aren't
160 * real mounts of their own. */
162 r
= name_to_handle_at_loop ( fd
, filename
, & h
, & mount_id
, flags
);
163 if ( IN_SET ( r
, - ENOSYS
, - EACCES
, - EPERM
, - EOVERFLOW
, - EINVAL
))
164 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
165 * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
166 * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
167 * (EINVAL): fall back to simpler logic. */
168 goto fallback_fdinfo
;
169 else if ( r
== - EOPNOTSUPP
)
170 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
171 * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
177 r
= name_to_handle_at_loop ( fd
, "" , & h_parent
, & mount_id_parent
, AT_EMPTY_PATH
);
178 if ( r
== - EOPNOTSUPP
) {
180 /* Neither parent nor child do name_to_handle_at()? We have no choice but to fall back. */
181 goto fallback_fdinfo
;
183 /* The parent can't do name_to_handle_at() but the directory we are interested in can? If so,
184 * it must be a mount point. */
189 /* The parent can do name_to_handle_at() but the
190 * directory we are interested in can't? If so, it
191 * must be a mount point. */
195 /* If the file handle for the directory we are
196 * interested in and its parent are identical, we
197 * assume this is the root directory, which is a mount
200 if ( h
-> handle_bytes
== h_parent
-> handle_bytes
&&
201 h
-> handle_type
== h_parent
-> handle_type
&&
202 memcmp ( h
-> f_handle
, h_parent
-> f_handle
, h
-> handle_bytes
) == 0 )
205 return mount_id
!= mount_id_parent
;
208 r
= fd_fdinfo_mnt_id ( fd
, filename
, flags
, & mount_id
);
209 if ( IN_SET ( r
, - EOPNOTSUPP
, - EACCES
, - EPERM
))
214 r
= fd_fdinfo_mnt_id ( fd
, "" , AT_EMPTY_PATH
, & mount_id_parent
);
218 if ( mount_id
!= mount_id_parent
)
221 /* Hmm, so, the mount ids are the same. This leaves one
222 * special case though for the root file system. For that,
223 * let's see if the parent directory has the same inode as we
224 * are interested in. Hence, let's also do fstat() checks now,
225 * too, but avoid the st_dev comparisons, since they aren't
226 * that useful on unionfs mounts. */
227 check_st_dev
= false ;
230 /* yay for fstatat() taking a different set of flags than the other
232 if ( flags
& AT_SYMLINK_FOLLOW
)
233 flags
&= ~ AT_SYMLINK_FOLLOW
;
235 flags
|= AT_SYMLINK_NOFOLLOW
;
236 if ( fstatat ( fd
, filename
, & a
, flags
) < 0 )
239 if ( fstatat ( fd
, "" , & b
, AT_EMPTY_PATH
) < 0 )
242 /* A directory with same device and inode as its parent? Must
243 * be the root directory */
244 if ( a
. st_dev
== b
. st_dev
&&
245 a
. st_ino
== b
. st_ino
)
248 return check_st_dev
&& ( a
. st_dev
!= b
. st_dev
);
251 /* flags can be AT_SYMLINK_FOLLOW or 0 */
252 int path_is_mount_point ( const char * t
, const char * root
, int flags
) {
253 _cleanup_free_
char * canonical
= NULL
;
254 _cleanup_close_
int fd
= - 1 ;
258 assert (( flags
& ~ AT_SYMLINK_FOLLOW
) == 0 );
260 if ( path_equal ( t
, "/" ))
263 /* we need to resolve symlinks manually, we can't just rely on
264 * fd_is_mount_point() to do that for us; if we have a structure like
265 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
266 * look at needs to be /usr, not /. */
267 if ( flags
& AT_SYMLINK_FOLLOW
) {
268 r
= chase_symlinks ( t
, root
, CHASE_TRAIL_SLASH
, & canonical
);
275 fd
= open_parent ( t
, O_PATH
| O_CLOEXEC
, 0 );
279 return fd_is_mount_point ( fd
, last_path_component ( t
), flags
);
282 int path_get_mnt_id ( const char * path
, int * ret
) {
285 r
= name_to_handle_at_loop ( AT_FDCWD
, path
, NULL
, ret
, 0 );
286 if ( IN_SET ( r
, - EOPNOTSUPP
, - ENOSYS
, - EACCES
, - EPERM
, - EOVERFLOW
, - EINVAL
)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
287 return fd_fdinfo_mnt_id ( AT_FDCWD
, path
, 0 , ret
);
292 bool fstype_is_network ( const char * fstype
) {
295 x
= startswith ( fstype
, "fuse." );
299 return STR_IN_SET ( fstype
,
311 "pvfs2" , /* OrangeFS */
316 bool fstype_is_api_vfs ( const char * fstype
) {
317 return STR_IN_SET ( fstype
,
340 bool fstype_is_ro ( const char * fstype
) {
341 /* All Linux file systems that are necessarily read-only */
342 return STR_IN_SET ( fstype
,
348 bool fstype_can_discard ( const char * fstype
) {
349 return STR_IN_SET ( fstype
,
356 bool fstype_can_uid_gid ( const char * fstype
) {
358 /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
359 * current and future. */
361 return STR_IN_SET ( fstype
,
372 int dev_is_devtmpfs ( void ) {
373 _cleanup_fclose_
FILE * proc_self_mountinfo
= NULL
;
377 r
= path_get_mnt_id ( "/dev" , & mount_id
);
381 r
= fopen_unlocked ( "/proc/self/mountinfo" , "re" , & proc_self_mountinfo
);
386 _cleanup_free_
char * line
= NULL
;
389 r
= read_line ( proc_self_mountinfo
, LONG_LINE_MAX
, & line
);
395 if ( sscanf ( line
, "%i" , & mid
) != 1 )
401 e
= strstr ( line
, " - " );
405 /* accept any name that starts with the currently expected type */
406 if ( startswith ( e
+ 3 , "devtmpfs" ))
413 const char * mount_propagation_flags_to_string ( unsigned long flags
) {
415 switch ( flags
& ( MS_SHARED
| MS_SLAVE
| MS_PRIVATE
)) {
429 int mount_propagation_flags_from_string ( const char * name
, unsigned long * ret
) {
433 else if ( streq ( name
, "shared" ))
435 else if ( streq ( name
, "slave" ))
437 else if ( streq ( name
, "private" ))