]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/mountpoint-util.c
Handle d_type == DT_UNKNOWN correctly
[thirdparty/systemd.git] / src / basic / mountpoint-util.c
CommitLineData
049af8ad
ZJS
1/* SPDX-License-Identifier: LGPL-2.1+ */
2
3#include <errno.h>
4#include <fcntl.h>
049af8ad
ZJS
5#include <sys/mount.h>
6
7#include "alloc-util.h"
8#include "fd-util.h"
9#include "fileio.h"
10#include "fs-util.h"
11#include "missing.h"
12#include "mountpoint-util.h"
13#include "parse-util.h"
14#include "path-util.h"
15#include "stdio-util.h"
16#include "strv.h"
17
18/* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
19 * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
20 * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
21 * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
22 * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
23 * with large file handles anyway. */
24#define ORIGINAL_MAX_HANDLE_SZ 128
25
26int name_to_handle_at_loop(
27 int fd,
28 const char *path,
29 struct file_handle **ret_handle,
30 int *ret_mnt_id,
31 int flags) {
32
33 _cleanup_free_ struct file_handle *h = NULL;
34 size_t n = ORIGINAL_MAX_HANDLE_SZ;
35
36 /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
37 * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
38 * start value, it is not an upper bound on the buffer size required.
39 *
40 * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
41 * as NULL if there's no interest in either. */
42
43 for (;;) {
44 int mnt_id = -1;
45
46 h = malloc0(offsetof(struct file_handle, f_handle) + n);
47 if (!h)
48 return -ENOMEM;
49
50 h->handle_bytes = n;
51
52 if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
53
54 if (ret_handle)
55 *ret_handle = TAKE_PTR(h);
56
57 if (ret_mnt_id)
58 *ret_mnt_id = mnt_id;
59
60 return 0;
61 }
62 if (errno != EOVERFLOW)
63 return -errno;
64
65 if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
66
67 /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
68 * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
69 * be filled in, and the caller was interested in only the mount ID an nothing else. */
70
71 *ret_mnt_id = mnt_id;
72 return 0;
73 }
74
75 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
76 * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
77 * buffer. In that case propagate EOVERFLOW */
78 if (h->handle_bytes <= n)
79 return -EOVERFLOW;
80
81 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
82 n = h->handle_bytes;
83 if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
84 return -EOVERFLOW;
85
86 h = mfree(h);
87 }
88}
89
90static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
91 char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
92 _cleanup_free_ char *fdinfo = NULL;
93 _cleanup_close_ int subfd = -1;
94 char *p;
95 int r;
96
97 if ((flags & AT_EMPTY_PATH) && isempty(filename))
98 xsprintf(path, "/proc/self/fdinfo/%i", fd);
99 else {
be24321f 100 subfd = openat(fd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_FOLLOW ? 0 : O_NOFOLLOW));
049af8ad
ZJS
101 if (subfd < 0)
102 return -errno;
103
104 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
105 }
106
107 r = read_full_file(path, &fdinfo, NULL);
108 if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
109 return -EOPNOTSUPP;
110 if (r < 0)
111 return r;
112
113 p = startswith(fdinfo, "mnt_id:");
114 if (!p) {
115 p = strstr(fdinfo, "\nmnt_id:");
116 if (!p) /* The mnt_id field is a relatively new addition */
117 return -EOPNOTSUPP;
118
119 p += 8;
120 }
121
122 p += strspn(p, WHITESPACE);
123 p[strcspn(p, WHITESPACE)] = 0;
124
125 return safe_atoi(p, mnt_id);
126}
127
128int fd_is_mount_point(int fd, const char *filename, int flags) {
129 _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
130 int mount_id = -1, mount_id_parent = -1;
131 bool nosupp = false, check_st_dev = true;
132 struct stat a, b;
133 int r;
134
135 assert(fd >= 0);
136 assert(filename);
137
138 /* First we will try the name_to_handle_at() syscall, which
139 * tells us the mount id and an opaque file "handle". It is
140 * not supported everywhere though (kernel compile-time
141 * option, not all file systems are hooked up). If it works
142 * the mount id is usually good enough to tell us whether
143 * something is a mount point.
144 *
145 * If that didn't work we will try to read the mount id from
146 * /proc/self/fdinfo/<fd>. This is almost as good as
147 * name_to_handle_at(), however, does not return the
148 * opaque file handle. The opaque file handle is pretty useful
149 * to detect the root directory, which we should always
150 * consider a mount point. Hence we use this only as
151 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
152 * kernel addition.
153 *
154 * As last fallback we do traditional fstat() based st_dev
155 * comparisons. This is how things were traditionally done,
156 * but unionfs breaks this since it exposes file
157 * systems with a variety of st_dev reported. Also, btrfs
158 * subvolumes have different st_dev, even though they aren't
159 * real mounts of their own. */
160
161 r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
162 if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
163 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
164 * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
165 * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
166 * (EINVAL): fall back to simpler logic. */
167 goto fallback_fdinfo;
168 else if (r == -EOPNOTSUPP)
169 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
170 * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
171 * logic */
172 nosupp = true;
173 else if (r < 0)
174 return r;
175
176 r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
177 if (r == -EOPNOTSUPP) {
178 if (nosupp)
179 /* Neither parent nor child do name_to_handle_at()? We have no choice but to fall back. */
180 goto fallback_fdinfo;
181 else
182 /* The parent can't do name_to_handle_at() but the directory we are interested in can? If so,
183 * it must be a mount point. */
184 return 1;
185 } else if (r < 0)
186 return r;
187
188 /* The parent can do name_to_handle_at() but the
189 * directory we are interested in can't? If so, it
190 * must be a mount point. */
191 if (nosupp)
192 return 1;
193
194 /* If the file handle for the directory we are
195 * interested in and its parent are identical, we
196 * assume this is the root directory, which is a mount
197 * point. */
198
199 if (h->handle_bytes == h_parent->handle_bytes &&
200 h->handle_type == h_parent->handle_type &&
201 memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
202 return 1;
203
204 return mount_id != mount_id_parent;
205
206fallback_fdinfo:
207 r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
208 if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
209 goto fallback_fstat;
210 if (r < 0)
211 return r;
212
213 r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
214 if (r < 0)
215 return r;
216
217 if (mount_id != mount_id_parent)
218 return 1;
219
220 /* Hmm, so, the mount ids are the same. This leaves one
221 * special case though for the root file system. For that,
222 * let's see if the parent directory has the same inode as we
223 * are interested in. Hence, let's also do fstat() checks now,
224 * too, but avoid the st_dev comparisons, since they aren't
225 * that useful on unionfs mounts. */
226 check_st_dev = false;
227
228fallback_fstat:
229 /* yay for fstatat() taking a different set of flags than the other
230 * _at() above */
231 if (flags & AT_SYMLINK_FOLLOW)
232 flags &= ~AT_SYMLINK_FOLLOW;
233 else
234 flags |= AT_SYMLINK_NOFOLLOW;
235 if (fstatat(fd, filename, &a, flags) < 0)
236 return -errno;
237
238 if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
239 return -errno;
240
241 /* A directory with same device and inode as its parent? Must
242 * be the root directory */
243 if (a.st_dev == b.st_dev &&
244 a.st_ino == b.st_ino)
245 return 1;
246
247 return check_st_dev && (a.st_dev != b.st_dev);
248}
249
250/* flags can be AT_SYMLINK_FOLLOW or 0 */
251int path_is_mount_point(const char *t, const char *root, int flags) {
252 _cleanup_free_ char *canonical = NULL;
253 _cleanup_close_ int fd = -1;
254 int r;
255
256 assert(t);
257 assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
258
259 if (path_equal(t, "/"))
260 return 1;
261
262 /* we need to resolve symlinks manually, we can't just rely on
263 * fd_is_mount_point() to do that for us; if we have a structure like
264 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
265 * look at needs to be /usr, not /. */
266 if (flags & AT_SYMLINK_FOLLOW) {
267 r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical);
268 if (r < 0)
269 return r;
270
271 t = canonical;
272 }
273
274 fd = open_parent(t, O_PATH|O_CLOEXEC, 0);
275 if (fd < 0)
276 return -errno;
277
278 return fd_is_mount_point(fd, last_path_component(t), flags);
279}
280
281int path_get_mnt_id(const char *path, int *ret) {
282 int r;
283
284 r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
285 if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
286 return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
287
288 return r;
289}
290
291bool fstype_is_network(const char *fstype) {
292 const char *x;
293
294 x = startswith(fstype, "fuse.");
295 if (x)
296 fstype = x;
297
298 return STR_IN_SET(fstype,
299 "afs",
300 "cifs",
301 "smbfs",
302 "sshfs",
303 "ncpfs",
304 "ncp",
305 "nfs",
306 "nfs4",
307 "gfs",
308 "gfs2",
309 "glusterfs",
310 "pvfs2", /* OrangeFS */
311 "ocfs2",
312 "lustre");
313}
314
315bool fstype_is_api_vfs(const char *fstype) {
316 return STR_IN_SET(fstype,
317 "autofs",
318 "bpf",
319 "cgroup",
320 "cgroup2",
321 "configfs",
322 "cpuset",
323 "debugfs",
324 "devpts",
325 "devtmpfs",
326 "efivarfs",
327 "fusectl",
328 "hugetlbfs",
329 "mqueue",
330 "proc",
331 "pstore",
332 "ramfs",
333 "securityfs",
334 "sysfs",
335 "tmpfs",
336 "tracefs");
337}
338
339bool fstype_is_ro(const char *fstype) {
340 /* All Linux file systems that are necessarily read-only */
341 return STR_IN_SET(fstype,
342 "DM_verity_hash",
343 "iso9660",
344 "squashfs");
345}
346
347bool fstype_can_discard(const char *fstype) {
348 return STR_IN_SET(fstype,
349 "btrfs",
350 "ext4",
351 "vfat",
352 "xfs");
353}
354
355bool fstype_can_uid_gid(const char *fstype) {
356
357 /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
358 * current and future. */
359
360 return STR_IN_SET(fstype,
361 "adfs",
5797a122 362 "exfat",
049af8ad
ZJS
363 "fat",
364 "hfs",
365 "hpfs",
366 "iso9660",
367 "msdos",
368 "ntfs",
369 "vfat");
370}
371
372int dev_is_devtmpfs(void) {
373 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
374 int mount_id, r;
375 char *e;
376
377 r = path_get_mnt_id("/dev", &mount_id);
378 if (r < 0)
379 return r;
380
fdeea3f4
ZJS
381 r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
382 if (r < 0)
383 return r;
049af8ad
ZJS
384
385 for (;;) {
386 _cleanup_free_ char *line = NULL;
387 int mid;
388
389 r = read_line(proc_self_mountinfo, LONG_LINE_MAX, &line);
390 if (r < 0)
391 return r;
392 if (r == 0)
393 break;
394
395 if (sscanf(line, "%i", &mid) != 1)
396 continue;
397
398 if (mid != mount_id)
399 continue;
400
401 e = strstr(line, " - ");
402 if (!e)
403 continue;
404
405 /* accept any name that starts with the currently expected type */
406 if (startswith(e + 3, "devtmpfs"))
407 return true;
408 }
409
410 return false;
411}
412
413const char *mount_propagation_flags_to_string(unsigned long flags) {
414
415 switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
416 case 0:
417 return "";
418 case MS_SHARED:
419 return "shared";
420 case MS_SLAVE:
421 return "slave";
422 case MS_PRIVATE:
423 return "private";
424 }
425
426 return NULL;
427}
428
429int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
430
431 if (isempty(name))
432 *ret = 0;
433 else if (streq(name, "shared"))
434 *ret = MS_SHARED;
435 else if (streq(name, "slave"))
436 *ret = MS_SLAVE;
437 else if (streq(name, "private"))
438 *ret = MS_PRIVATE;
439 else
440 return -EINVAL;
441 return 0;
442}