]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/mountpoint-util.c
Make fopen_temporary and fopen_temporary_label unlocked
[thirdparty/systemd.git] / src / basic / mountpoint-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <sys/mount.h>
6
7 #include "alloc-util.h"
8 #include "fd-util.h"
9 #include "fileio.h"
10 #include "fs-util.h"
11 #include "missing.h"
12 #include "mountpoint-util.h"
13 #include "parse-util.h"
14 #include "path-util.h"
15 #include "stdio-util.h"
16 #include "strv.h"
17
18 /* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
19 * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
20 * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
21 * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
22 * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
23 * with large file handles anyway. */
24 #define ORIGINAL_MAX_HANDLE_SZ 128
25
26 int name_to_handle_at_loop(
27 int fd,
28 const char *path,
29 struct file_handle **ret_handle,
30 int *ret_mnt_id,
31 int flags) {
32
33 _cleanup_free_ struct file_handle *h = NULL;
34 size_t n = ORIGINAL_MAX_HANDLE_SZ;
35
36 /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
37 * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
38 * start value, it is not an upper bound on the buffer size required.
39 *
40 * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
41 * as NULL if there's no interest in either. */
42
43 for (;;) {
44 int mnt_id = -1;
45
46 h = malloc0(offsetof(struct file_handle, f_handle) + n);
47 if (!h)
48 return -ENOMEM;
49
50 h->handle_bytes = n;
51
52 if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
53
54 if (ret_handle)
55 *ret_handle = TAKE_PTR(h);
56
57 if (ret_mnt_id)
58 *ret_mnt_id = mnt_id;
59
60 return 0;
61 }
62 if (errno != EOVERFLOW)
63 return -errno;
64
65 if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
66
67 /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
68 * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
69 * be filled in, and the caller was interested in only the mount ID an nothing else. */
70
71 *ret_mnt_id = mnt_id;
72 return 0;
73 }
74
75 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
76 * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
77 * buffer. In that case propagate EOVERFLOW */
78 if (h->handle_bytes <= n)
79 return -EOVERFLOW;
80
81 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
82 n = h->handle_bytes;
83 if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
84 return -EOVERFLOW;
85
86 h = mfree(h);
87 }
88 }
89
90 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
91 char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
92 _cleanup_free_ char *fdinfo = NULL;
93 _cleanup_close_ int subfd = -1;
94 char *p;
95 int r;
96
97 if ((flags & AT_EMPTY_PATH) && isempty(filename))
98 xsprintf(path, "/proc/self/fdinfo/%i", fd);
99 else {
100 subfd = openat(fd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_FOLLOW ? 0 : O_NOFOLLOW));
101 if (subfd < 0)
102 return -errno;
103
104 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
105 }
106
107 r = read_full_file(path, &fdinfo, NULL);
108 if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
109 return -EOPNOTSUPP;
110 if (r < 0)
111 return r;
112
113 p = startswith(fdinfo, "mnt_id:");
114 if (!p) {
115 p = strstr(fdinfo, "\nmnt_id:");
116 if (!p) /* The mnt_id field is a relatively new addition */
117 return -EOPNOTSUPP;
118
119 p += 8;
120 }
121
122 p += strspn(p, WHITESPACE);
123 p[strcspn(p, WHITESPACE)] = 0;
124
125 return safe_atoi(p, mnt_id);
126 }
127
128 int fd_is_mount_point(int fd, const char *filename, int flags) {
129 _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
130 int mount_id = -1, mount_id_parent = -1;
131 bool nosupp = false, check_st_dev = true;
132 struct stat a, b;
133 int r;
134
135 assert(fd >= 0);
136 assert(filename);
137
138 /* First we will try the name_to_handle_at() syscall, which
139 * tells us the mount id and an opaque file "handle". It is
140 * not supported everywhere though (kernel compile-time
141 * option, not all file systems are hooked up). If it works
142 * the mount id is usually good enough to tell us whether
143 * something is a mount point.
144 *
145 * If that didn't work we will try to read the mount id from
146 * /proc/self/fdinfo/<fd>. This is almost as good as
147 * name_to_handle_at(), however, does not return the
148 * opaque file handle. The opaque file handle is pretty useful
149 * to detect the root directory, which we should always
150 * consider a mount point. Hence we use this only as
151 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
152 * kernel addition.
153 *
154 * As last fallback we do traditional fstat() based st_dev
155 * comparisons. This is how things were traditionally done,
156 * but unionfs breaks this since it exposes file
157 * systems with a variety of st_dev reported. Also, btrfs
158 * subvolumes have different st_dev, even though they aren't
159 * real mounts of their own. */
160
161 r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
162 if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
163 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
164 * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
165 * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
166 * (EINVAL): fall back to simpler logic. */
167 goto fallback_fdinfo;
168 else if (r == -EOPNOTSUPP)
169 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
170 * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
171 * logic */
172 nosupp = true;
173 else if (r < 0)
174 return r;
175
176 r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
177 if (r == -EOPNOTSUPP) {
178 if (nosupp)
179 /* Neither parent nor child do name_to_handle_at()? We have no choice but to fall back. */
180 goto fallback_fdinfo;
181 else
182 /* The parent can't do name_to_handle_at() but the directory we are interested in can? If so,
183 * it must be a mount point. */
184 return 1;
185 } else if (r < 0)
186 return r;
187
188 /* The parent can do name_to_handle_at() but the
189 * directory we are interested in can't? If so, it
190 * must be a mount point. */
191 if (nosupp)
192 return 1;
193
194 /* If the file handle for the directory we are
195 * interested in and its parent are identical, we
196 * assume this is the root directory, which is a mount
197 * point. */
198
199 if (h->handle_bytes == h_parent->handle_bytes &&
200 h->handle_type == h_parent->handle_type &&
201 memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
202 return 1;
203
204 return mount_id != mount_id_parent;
205
206 fallback_fdinfo:
207 r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
208 if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
209 goto fallback_fstat;
210 if (r < 0)
211 return r;
212
213 r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
214 if (r < 0)
215 return r;
216
217 if (mount_id != mount_id_parent)
218 return 1;
219
220 /* Hmm, so, the mount ids are the same. This leaves one
221 * special case though for the root file system. For that,
222 * let's see if the parent directory has the same inode as we
223 * are interested in. Hence, let's also do fstat() checks now,
224 * too, but avoid the st_dev comparisons, since they aren't
225 * that useful on unionfs mounts. */
226 check_st_dev = false;
227
228 fallback_fstat:
229 /* yay for fstatat() taking a different set of flags than the other
230 * _at() above */
231 if (flags & AT_SYMLINK_FOLLOW)
232 flags &= ~AT_SYMLINK_FOLLOW;
233 else
234 flags |= AT_SYMLINK_NOFOLLOW;
235 if (fstatat(fd, filename, &a, flags) < 0)
236 return -errno;
237
238 if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
239 return -errno;
240
241 /* A directory with same device and inode as its parent? Must
242 * be the root directory */
243 if (a.st_dev == b.st_dev &&
244 a.st_ino == b.st_ino)
245 return 1;
246
247 return check_st_dev && (a.st_dev != b.st_dev);
248 }
249
250 /* flags can be AT_SYMLINK_FOLLOW or 0 */
251 int path_is_mount_point(const char *t, const char *root, int flags) {
252 _cleanup_free_ char *canonical = NULL;
253 _cleanup_close_ int fd = -1;
254 int r;
255
256 assert(t);
257 assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
258
259 if (path_equal(t, "/"))
260 return 1;
261
262 /* we need to resolve symlinks manually, we can't just rely on
263 * fd_is_mount_point() to do that for us; if we have a structure like
264 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
265 * look at needs to be /usr, not /. */
266 if (flags & AT_SYMLINK_FOLLOW) {
267 r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical);
268 if (r < 0)
269 return r;
270
271 t = canonical;
272 }
273
274 fd = open_parent(t, O_PATH|O_CLOEXEC, 0);
275 if (fd < 0)
276 return -errno;
277
278 return fd_is_mount_point(fd, last_path_component(t), flags);
279 }
280
281 int path_get_mnt_id(const char *path, int *ret) {
282 int r;
283
284 r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
285 if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
286 return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
287
288 return r;
289 }
290
291 bool fstype_is_network(const char *fstype) {
292 const char *x;
293
294 x = startswith(fstype, "fuse.");
295 if (x)
296 fstype = x;
297
298 return STR_IN_SET(fstype,
299 "afs",
300 "cifs",
301 "smbfs",
302 "sshfs",
303 "ncpfs",
304 "ncp",
305 "nfs",
306 "nfs4",
307 "gfs",
308 "gfs2",
309 "glusterfs",
310 "pvfs2", /* OrangeFS */
311 "ocfs2",
312 "lustre");
313 }
314
315 bool fstype_is_api_vfs(const char *fstype) {
316 return STR_IN_SET(fstype,
317 "autofs",
318 "bpf",
319 "cgroup",
320 "cgroup2",
321 "configfs",
322 "cpuset",
323 "debugfs",
324 "devpts",
325 "devtmpfs",
326 "efivarfs",
327 "fusectl",
328 "hugetlbfs",
329 "mqueue",
330 "proc",
331 "pstore",
332 "ramfs",
333 "securityfs",
334 "sysfs",
335 "tmpfs",
336 "tracefs");
337 }
338
339 bool fstype_is_ro(const char *fstype) {
340 /* All Linux file systems that are necessarily read-only */
341 return STR_IN_SET(fstype,
342 "DM_verity_hash",
343 "iso9660",
344 "squashfs");
345 }
346
347 bool fstype_can_discard(const char *fstype) {
348 return STR_IN_SET(fstype,
349 "btrfs",
350 "ext4",
351 "vfat",
352 "xfs");
353 }
354
355 bool fstype_can_uid_gid(const char *fstype) {
356
357 /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
358 * current and future. */
359
360 return STR_IN_SET(fstype,
361 "adfs",
362 "fat",
363 "hfs",
364 "hpfs",
365 "iso9660",
366 "msdos",
367 "ntfs",
368 "vfat");
369 }
370
371 int dev_is_devtmpfs(void) {
372 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
373 int mount_id, r;
374 char *e;
375
376 r = path_get_mnt_id("/dev", &mount_id);
377 if (r < 0)
378 return r;
379
380 r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
381 if (r < 0)
382 return r;
383
384 for (;;) {
385 _cleanup_free_ char *line = NULL;
386 int mid;
387
388 r = read_line(proc_self_mountinfo, LONG_LINE_MAX, &line);
389 if (r < 0)
390 return r;
391 if (r == 0)
392 break;
393
394 if (sscanf(line, "%i", &mid) != 1)
395 continue;
396
397 if (mid != mount_id)
398 continue;
399
400 e = strstr(line, " - ");
401 if (!e)
402 continue;
403
404 /* accept any name that starts with the currently expected type */
405 if (startswith(e + 3, "devtmpfs"))
406 return true;
407 }
408
409 return false;
410 }
411
412 const char *mount_propagation_flags_to_string(unsigned long flags) {
413
414 switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
415 case 0:
416 return "";
417 case MS_SHARED:
418 return "shared";
419 case MS_SLAVE:
420 return "slave";
421 case MS_PRIVATE:
422 return "private";
423 }
424
425 return NULL;
426 }
427
428 int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
429
430 if (isempty(name))
431 *ret = 0;
432 else if (streq(name, "shared"))
433 *ret = MS_SHARED;
434 else if (streq(name, "slave"))
435 *ret = MS_SLAVE;
436 else if (streq(name, "private"))
437 *ret = MS_PRIVATE;
438 else
439 return -EINVAL;
440 return 0;
441 }