]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/mountpoint-util.c
Add fopen_unlocked() wrapper
[thirdparty/systemd.git] / src / basic / mountpoint-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <stdio_ext.h>
6 #include <sys/mount.h>
7
8 #include "alloc-util.h"
9 #include "fd-util.h"
10 #include "fileio.h"
11 #include "fs-util.h"
12 #include "missing.h"
13 #include "mountpoint-util.h"
14 #include "parse-util.h"
15 #include "path-util.h"
16 #include "stdio-util.h"
17 #include "strv.h"
18
19 /* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
20 * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
21 * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
22 * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
23 * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
24 * with large file handles anyway. */
25 #define ORIGINAL_MAX_HANDLE_SZ 128
26
27 int name_to_handle_at_loop(
28 int fd,
29 const char *path,
30 struct file_handle **ret_handle,
31 int *ret_mnt_id,
32 int flags) {
33
34 _cleanup_free_ struct file_handle *h = NULL;
35 size_t n = ORIGINAL_MAX_HANDLE_SZ;
36
37 /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
38 * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
39 * start value, it is not an upper bound on the buffer size required.
40 *
41 * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
42 * as NULL if there's no interest in either. */
43
44 for (;;) {
45 int mnt_id = -1;
46
47 h = malloc0(offsetof(struct file_handle, f_handle) + n);
48 if (!h)
49 return -ENOMEM;
50
51 h->handle_bytes = n;
52
53 if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
54
55 if (ret_handle)
56 *ret_handle = TAKE_PTR(h);
57
58 if (ret_mnt_id)
59 *ret_mnt_id = mnt_id;
60
61 return 0;
62 }
63 if (errno != EOVERFLOW)
64 return -errno;
65
66 if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
67
68 /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
69 * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
70 * be filled in, and the caller was interested in only the mount ID an nothing else. */
71
72 *ret_mnt_id = mnt_id;
73 return 0;
74 }
75
76 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
77 * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
78 * buffer. In that case propagate EOVERFLOW */
79 if (h->handle_bytes <= n)
80 return -EOVERFLOW;
81
82 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
83 n = h->handle_bytes;
84 if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
85 return -EOVERFLOW;
86
87 h = mfree(h);
88 }
89 }
90
91 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
92 char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
93 _cleanup_free_ char *fdinfo = NULL;
94 _cleanup_close_ int subfd = -1;
95 char *p;
96 int r;
97
98 if ((flags & AT_EMPTY_PATH) && isempty(filename))
99 xsprintf(path, "/proc/self/fdinfo/%i", fd);
100 else {
101 subfd = openat(fd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_FOLLOW ? 0 : O_NOFOLLOW));
102 if (subfd < 0)
103 return -errno;
104
105 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
106 }
107
108 r = read_full_file(path, &fdinfo, NULL);
109 if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
110 return -EOPNOTSUPP;
111 if (r < 0)
112 return r;
113
114 p = startswith(fdinfo, "mnt_id:");
115 if (!p) {
116 p = strstr(fdinfo, "\nmnt_id:");
117 if (!p) /* The mnt_id field is a relatively new addition */
118 return -EOPNOTSUPP;
119
120 p += 8;
121 }
122
123 p += strspn(p, WHITESPACE);
124 p[strcspn(p, WHITESPACE)] = 0;
125
126 return safe_atoi(p, mnt_id);
127 }
128
129 int fd_is_mount_point(int fd, const char *filename, int flags) {
130 _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
131 int mount_id = -1, mount_id_parent = -1;
132 bool nosupp = false, check_st_dev = true;
133 struct stat a, b;
134 int r;
135
136 assert(fd >= 0);
137 assert(filename);
138
139 /* First we will try the name_to_handle_at() syscall, which
140 * tells us the mount id and an opaque file "handle". It is
141 * not supported everywhere though (kernel compile-time
142 * option, not all file systems are hooked up). If it works
143 * the mount id is usually good enough to tell us whether
144 * something is a mount point.
145 *
146 * If that didn't work we will try to read the mount id from
147 * /proc/self/fdinfo/<fd>. This is almost as good as
148 * name_to_handle_at(), however, does not return the
149 * opaque file handle. The opaque file handle is pretty useful
150 * to detect the root directory, which we should always
151 * consider a mount point. Hence we use this only as
152 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
153 * kernel addition.
154 *
155 * As last fallback we do traditional fstat() based st_dev
156 * comparisons. This is how things were traditionally done,
157 * but unionfs breaks this since it exposes file
158 * systems with a variety of st_dev reported. Also, btrfs
159 * subvolumes have different st_dev, even though they aren't
160 * real mounts of their own. */
161
162 r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
163 if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
164 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
165 * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
166 * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
167 * (EINVAL): fall back to simpler logic. */
168 goto fallback_fdinfo;
169 else if (r == -EOPNOTSUPP)
170 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
171 * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
172 * logic */
173 nosupp = true;
174 else if (r < 0)
175 return r;
176
177 r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
178 if (r == -EOPNOTSUPP) {
179 if (nosupp)
180 /* Neither parent nor child do name_to_handle_at()? We have no choice but to fall back. */
181 goto fallback_fdinfo;
182 else
183 /* The parent can't do name_to_handle_at() but the directory we are interested in can? If so,
184 * it must be a mount point. */
185 return 1;
186 } else if (r < 0)
187 return r;
188
189 /* The parent can do name_to_handle_at() but the
190 * directory we are interested in can't? If so, it
191 * must be a mount point. */
192 if (nosupp)
193 return 1;
194
195 /* If the file handle for the directory we are
196 * interested in and its parent are identical, we
197 * assume this is the root directory, which is a mount
198 * point. */
199
200 if (h->handle_bytes == h_parent->handle_bytes &&
201 h->handle_type == h_parent->handle_type &&
202 memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
203 return 1;
204
205 return mount_id != mount_id_parent;
206
207 fallback_fdinfo:
208 r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
209 if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
210 goto fallback_fstat;
211 if (r < 0)
212 return r;
213
214 r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
215 if (r < 0)
216 return r;
217
218 if (mount_id != mount_id_parent)
219 return 1;
220
221 /* Hmm, so, the mount ids are the same. This leaves one
222 * special case though for the root file system. For that,
223 * let's see if the parent directory has the same inode as we
224 * are interested in. Hence, let's also do fstat() checks now,
225 * too, but avoid the st_dev comparisons, since they aren't
226 * that useful on unionfs mounts. */
227 check_st_dev = false;
228
229 fallback_fstat:
230 /* yay for fstatat() taking a different set of flags than the other
231 * _at() above */
232 if (flags & AT_SYMLINK_FOLLOW)
233 flags &= ~AT_SYMLINK_FOLLOW;
234 else
235 flags |= AT_SYMLINK_NOFOLLOW;
236 if (fstatat(fd, filename, &a, flags) < 0)
237 return -errno;
238
239 if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
240 return -errno;
241
242 /* A directory with same device and inode as its parent? Must
243 * be the root directory */
244 if (a.st_dev == b.st_dev &&
245 a.st_ino == b.st_ino)
246 return 1;
247
248 return check_st_dev && (a.st_dev != b.st_dev);
249 }
250
251 /* flags can be AT_SYMLINK_FOLLOW or 0 */
252 int path_is_mount_point(const char *t, const char *root, int flags) {
253 _cleanup_free_ char *canonical = NULL;
254 _cleanup_close_ int fd = -1;
255 int r;
256
257 assert(t);
258 assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
259
260 if (path_equal(t, "/"))
261 return 1;
262
263 /* we need to resolve symlinks manually, we can't just rely on
264 * fd_is_mount_point() to do that for us; if we have a structure like
265 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
266 * look at needs to be /usr, not /. */
267 if (flags & AT_SYMLINK_FOLLOW) {
268 r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical);
269 if (r < 0)
270 return r;
271
272 t = canonical;
273 }
274
275 fd = open_parent(t, O_PATH|O_CLOEXEC, 0);
276 if (fd < 0)
277 return -errno;
278
279 return fd_is_mount_point(fd, last_path_component(t), flags);
280 }
281
282 int path_get_mnt_id(const char *path, int *ret) {
283 int r;
284
285 r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
286 if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
287 return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
288
289 return r;
290 }
291
292 bool fstype_is_network(const char *fstype) {
293 const char *x;
294
295 x = startswith(fstype, "fuse.");
296 if (x)
297 fstype = x;
298
299 return STR_IN_SET(fstype,
300 "afs",
301 "cifs",
302 "smbfs",
303 "sshfs",
304 "ncpfs",
305 "ncp",
306 "nfs",
307 "nfs4",
308 "gfs",
309 "gfs2",
310 "glusterfs",
311 "pvfs2", /* OrangeFS */
312 "ocfs2",
313 "lustre");
314 }
315
316 bool fstype_is_api_vfs(const char *fstype) {
317 return STR_IN_SET(fstype,
318 "autofs",
319 "bpf",
320 "cgroup",
321 "cgroup2",
322 "configfs",
323 "cpuset",
324 "debugfs",
325 "devpts",
326 "devtmpfs",
327 "efivarfs",
328 "fusectl",
329 "hugetlbfs",
330 "mqueue",
331 "proc",
332 "pstore",
333 "ramfs",
334 "securityfs",
335 "sysfs",
336 "tmpfs",
337 "tracefs");
338 }
339
340 bool fstype_is_ro(const char *fstype) {
341 /* All Linux file systems that are necessarily read-only */
342 return STR_IN_SET(fstype,
343 "DM_verity_hash",
344 "iso9660",
345 "squashfs");
346 }
347
348 bool fstype_can_discard(const char *fstype) {
349 return STR_IN_SET(fstype,
350 "btrfs",
351 "ext4",
352 "vfat",
353 "xfs");
354 }
355
356 bool fstype_can_uid_gid(const char *fstype) {
357
358 /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
359 * current and future. */
360
361 return STR_IN_SET(fstype,
362 "adfs",
363 "fat",
364 "hfs",
365 "hpfs",
366 "iso9660",
367 "msdos",
368 "ntfs",
369 "vfat");
370 }
371
372 int dev_is_devtmpfs(void) {
373 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
374 int mount_id, r;
375 char *e;
376
377 r = path_get_mnt_id("/dev", &mount_id);
378 if (r < 0)
379 return r;
380
381 r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
382 if (r < 0)
383 return r;
384
385 for (;;) {
386 _cleanup_free_ char *line = NULL;
387 int mid;
388
389 r = read_line(proc_self_mountinfo, LONG_LINE_MAX, &line);
390 if (r < 0)
391 return r;
392 if (r == 0)
393 break;
394
395 if (sscanf(line, "%i", &mid) != 1)
396 continue;
397
398 if (mid != mount_id)
399 continue;
400
401 e = strstr(line, " - ");
402 if (!e)
403 continue;
404
405 /* accept any name that starts with the currently expected type */
406 if (startswith(e + 3, "devtmpfs"))
407 return true;
408 }
409
410 return false;
411 }
412
413 const char *mount_propagation_flags_to_string(unsigned long flags) {
414
415 switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
416 case 0:
417 return "";
418 case MS_SHARED:
419 return "shared";
420 case MS_SLAVE:
421 return "slave";
422 case MS_PRIVATE:
423 return "private";
424 }
425
426 return NULL;
427 }
428
429 int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
430
431 if (isempty(name))
432 *ret = 0;
433 else if (streq(name, "shared"))
434 *ret = MS_SHARED;
435 else if (streq(name, "slave"))
436 *ret = MS_SLAVE;
437 else if (streq(name, "private"))
438 *ret = MS_PRIVATE;
439 else
440 return -EINVAL;
441 return 0;
442 }