]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/mountpoint-util.c
Merge pull request #11827 from keszybz/pkgconfig-variables
[thirdparty/systemd.git] / src / basic / mountpoint-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <stdio_ext.h>
6 #include <sys/mount.h>
7
8 #include "alloc-util.h"
9 #include "fd-util.h"
10 #include "fileio.h"
11 #include "fs-util.h"
12 #include "missing.h"
13 #include "mountpoint-util.h"
14 #include "parse-util.h"
15 #include "path-util.h"
16 #include "stdio-util.h"
17 #include "strv.h"
18
19 /* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
20 * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
21 * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
22 * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
23 * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
24 * with large file handles anyway. */
25 #define ORIGINAL_MAX_HANDLE_SZ 128
26
27 int name_to_handle_at_loop(
28 int fd,
29 const char *path,
30 struct file_handle **ret_handle,
31 int *ret_mnt_id,
32 int flags) {
33
34 _cleanup_free_ struct file_handle *h = NULL;
35 size_t n = ORIGINAL_MAX_HANDLE_SZ;
36
37 /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
38 * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
39 * start value, it is not an upper bound on the buffer size required.
40 *
41 * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
42 * as NULL if there's no interest in either. */
43
44 for (;;) {
45 int mnt_id = -1;
46
47 h = malloc0(offsetof(struct file_handle, f_handle) + n);
48 if (!h)
49 return -ENOMEM;
50
51 h->handle_bytes = n;
52
53 if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
54
55 if (ret_handle)
56 *ret_handle = TAKE_PTR(h);
57
58 if (ret_mnt_id)
59 *ret_mnt_id = mnt_id;
60
61 return 0;
62 }
63 if (errno != EOVERFLOW)
64 return -errno;
65
66 if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
67
68 /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
69 * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
70 * be filled in, and the caller was interested in only the mount ID an nothing else. */
71
72 *ret_mnt_id = mnt_id;
73 return 0;
74 }
75
76 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
77 * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
78 * buffer. In that case propagate EOVERFLOW */
79 if (h->handle_bytes <= n)
80 return -EOVERFLOW;
81
82 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
83 n = h->handle_bytes;
84 if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
85 return -EOVERFLOW;
86
87 h = mfree(h);
88 }
89 }
90
91 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
92 char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
93 _cleanup_free_ char *fdinfo = NULL;
94 _cleanup_close_ int subfd = -1;
95 char *p;
96 int r;
97
98 if ((flags & AT_EMPTY_PATH) && isempty(filename))
99 xsprintf(path, "/proc/self/fdinfo/%i", fd);
100 else {
101 subfd = openat(fd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_FOLLOW ? 0 : O_NOFOLLOW));
102 if (subfd < 0)
103 return -errno;
104
105 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
106 }
107
108 r = read_full_file(path, &fdinfo, NULL);
109 if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
110 return -EOPNOTSUPP;
111 if (r < 0)
112 return r;
113
114 p = startswith(fdinfo, "mnt_id:");
115 if (!p) {
116 p = strstr(fdinfo, "\nmnt_id:");
117 if (!p) /* The mnt_id field is a relatively new addition */
118 return -EOPNOTSUPP;
119
120 p += 8;
121 }
122
123 p += strspn(p, WHITESPACE);
124 p[strcspn(p, WHITESPACE)] = 0;
125
126 return safe_atoi(p, mnt_id);
127 }
128
129 int fd_is_mount_point(int fd, const char *filename, int flags) {
130 _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
131 int mount_id = -1, mount_id_parent = -1;
132 bool nosupp = false, check_st_dev = true;
133 struct stat a, b;
134 int r;
135
136 assert(fd >= 0);
137 assert(filename);
138
139 /* First we will try the name_to_handle_at() syscall, which
140 * tells us the mount id and an opaque file "handle". It is
141 * not supported everywhere though (kernel compile-time
142 * option, not all file systems are hooked up). If it works
143 * the mount id is usually good enough to tell us whether
144 * something is a mount point.
145 *
146 * If that didn't work we will try to read the mount id from
147 * /proc/self/fdinfo/<fd>. This is almost as good as
148 * name_to_handle_at(), however, does not return the
149 * opaque file handle. The opaque file handle is pretty useful
150 * to detect the root directory, which we should always
151 * consider a mount point. Hence we use this only as
152 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
153 * kernel addition.
154 *
155 * As last fallback we do traditional fstat() based st_dev
156 * comparisons. This is how things were traditionally done,
157 * but unionfs breaks this since it exposes file
158 * systems with a variety of st_dev reported. Also, btrfs
159 * subvolumes have different st_dev, even though they aren't
160 * real mounts of their own. */
161
162 r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
163 if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
164 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
165 * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
166 * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
167 * (EINVAL): fall back to simpler logic. */
168 goto fallback_fdinfo;
169 else if (r == -EOPNOTSUPP)
170 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
171 * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
172 * logic */
173 nosupp = true;
174 else if (r < 0)
175 return r;
176
177 r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
178 if (r == -EOPNOTSUPP) {
179 if (nosupp)
180 /* Neither parent nor child do name_to_handle_at()? We have no choice but to fall back. */
181 goto fallback_fdinfo;
182 else
183 /* The parent can't do name_to_handle_at() but the directory we are interested in can? If so,
184 * it must be a mount point. */
185 return 1;
186 } else if (r < 0)
187 return r;
188
189 /* The parent can do name_to_handle_at() but the
190 * directory we are interested in can't? If so, it
191 * must be a mount point. */
192 if (nosupp)
193 return 1;
194
195 /* If the file handle for the directory we are
196 * interested in and its parent are identical, we
197 * assume this is the root directory, which is a mount
198 * point. */
199
200 if (h->handle_bytes == h_parent->handle_bytes &&
201 h->handle_type == h_parent->handle_type &&
202 memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
203 return 1;
204
205 return mount_id != mount_id_parent;
206
207 fallback_fdinfo:
208 r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
209 if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
210 goto fallback_fstat;
211 if (r < 0)
212 return r;
213
214 r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
215 if (r < 0)
216 return r;
217
218 if (mount_id != mount_id_parent)
219 return 1;
220
221 /* Hmm, so, the mount ids are the same. This leaves one
222 * special case though for the root file system. For that,
223 * let's see if the parent directory has the same inode as we
224 * are interested in. Hence, let's also do fstat() checks now,
225 * too, but avoid the st_dev comparisons, since they aren't
226 * that useful on unionfs mounts. */
227 check_st_dev = false;
228
229 fallback_fstat:
230 /* yay for fstatat() taking a different set of flags than the other
231 * _at() above */
232 if (flags & AT_SYMLINK_FOLLOW)
233 flags &= ~AT_SYMLINK_FOLLOW;
234 else
235 flags |= AT_SYMLINK_NOFOLLOW;
236 if (fstatat(fd, filename, &a, flags) < 0)
237 return -errno;
238
239 if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
240 return -errno;
241
242 /* A directory with same device and inode as its parent? Must
243 * be the root directory */
244 if (a.st_dev == b.st_dev &&
245 a.st_ino == b.st_ino)
246 return 1;
247
248 return check_st_dev && (a.st_dev != b.st_dev);
249 }
250
251 /* flags can be AT_SYMLINK_FOLLOW or 0 */
252 int path_is_mount_point(const char *t, const char *root, int flags) {
253 _cleanup_free_ char *canonical = NULL;
254 _cleanup_close_ int fd = -1;
255 int r;
256
257 assert(t);
258 assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
259
260 if (path_equal(t, "/"))
261 return 1;
262
263 /* we need to resolve symlinks manually, we can't just rely on
264 * fd_is_mount_point() to do that for us; if we have a structure like
265 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
266 * look at needs to be /usr, not /. */
267 if (flags & AT_SYMLINK_FOLLOW) {
268 r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical);
269 if (r < 0)
270 return r;
271
272 t = canonical;
273 }
274
275 fd = open_parent(t, O_PATH|O_CLOEXEC, 0);
276 if (fd < 0)
277 return -errno;
278
279 return fd_is_mount_point(fd, last_path_component(t), flags);
280 }
281
282 int path_get_mnt_id(const char *path, int *ret) {
283 int r;
284
285 r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
286 if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
287 return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
288
289 return r;
290 }
291
292 bool fstype_is_network(const char *fstype) {
293 const char *x;
294
295 x = startswith(fstype, "fuse.");
296 if (x)
297 fstype = x;
298
299 return STR_IN_SET(fstype,
300 "afs",
301 "cifs",
302 "smbfs",
303 "sshfs",
304 "ncpfs",
305 "ncp",
306 "nfs",
307 "nfs4",
308 "gfs",
309 "gfs2",
310 "glusterfs",
311 "pvfs2", /* OrangeFS */
312 "ocfs2",
313 "lustre");
314 }
315
316 bool fstype_is_api_vfs(const char *fstype) {
317 return STR_IN_SET(fstype,
318 "autofs",
319 "bpf",
320 "cgroup",
321 "cgroup2",
322 "configfs",
323 "cpuset",
324 "debugfs",
325 "devpts",
326 "devtmpfs",
327 "efivarfs",
328 "fusectl",
329 "hugetlbfs",
330 "mqueue",
331 "proc",
332 "pstore",
333 "ramfs",
334 "securityfs",
335 "sysfs",
336 "tmpfs",
337 "tracefs");
338 }
339
340 bool fstype_is_ro(const char *fstype) {
341 /* All Linux file systems that are necessarily read-only */
342 return STR_IN_SET(fstype,
343 "DM_verity_hash",
344 "iso9660",
345 "squashfs");
346 }
347
348 bool fstype_can_discard(const char *fstype) {
349 return STR_IN_SET(fstype,
350 "btrfs",
351 "ext4",
352 "vfat",
353 "xfs");
354 }
355
356 bool fstype_can_uid_gid(const char *fstype) {
357
358 /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
359 * current and future. */
360
361 return STR_IN_SET(fstype,
362 "adfs",
363 "fat",
364 "hfs",
365 "hpfs",
366 "iso9660",
367 "msdos",
368 "ntfs",
369 "vfat");
370 }
371
372 int dev_is_devtmpfs(void) {
373 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
374 int mount_id, r;
375 char *e;
376
377 r = path_get_mnt_id("/dev", &mount_id);
378 if (r < 0)
379 return r;
380
381 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
382 if (!proc_self_mountinfo)
383 return -errno;
384
385 (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
386
387 for (;;) {
388 _cleanup_free_ char *line = NULL;
389 int mid;
390
391 r = read_line(proc_self_mountinfo, LONG_LINE_MAX, &line);
392 if (r < 0)
393 return r;
394 if (r == 0)
395 break;
396
397 if (sscanf(line, "%i", &mid) != 1)
398 continue;
399
400 if (mid != mount_id)
401 continue;
402
403 e = strstr(line, " - ");
404 if (!e)
405 continue;
406
407 /* accept any name that starts with the currently expected type */
408 if (startswith(e + 3, "devtmpfs"))
409 return true;
410 }
411
412 return false;
413 }
414
415 const char *mount_propagation_flags_to_string(unsigned long flags) {
416
417 switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
418 case 0:
419 return "";
420 case MS_SHARED:
421 return "shared";
422 case MS_SLAVE:
423 return "slave";
424 case MS_PRIVATE:
425 return "private";
426 }
427
428 return NULL;
429 }
430
431 int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
432
433 if (isempty(name))
434 *ret = 0;
435 else if (streq(name, "shared"))
436 *ret = MS_SHARED;
437 else if (streq(name, "slave"))
438 *ret = MS_SLAVE;
439 else if (streq(name, "private"))
440 *ret = MS_PRIVATE;
441 else
442 return -EINVAL;
443 return 0;
444 }