]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/mountpoint-util.c
mountpoint-util: move 'norecovery' detection into its own helper call
[thirdparty/systemd.git] / src / basic / mountpoint-util.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
049af8ad
ZJS
2
3#include <errno.h>
4#include <fcntl.h>
049af8ad 5#include <sys/mount.h>
7d3b157b
LP
6#if WANT_LINUX_FS_H
7#include <linux/fs.h>
8#endif
049af8ad
ZJS
9
10#include "alloc-util.h"
f4351959 11#include "chase-symlinks.h"
049af8ad
ZJS
12#include "fd-util.h"
13#include "fileio.h"
659d1924 14#include "filesystems.h"
049af8ad 15#include "fs-util.h"
7d3b157b
LP
16#include "missing_fs.h"
17#include "missing_mount.h"
69b3fa14
LP
18#include "missing_stat.h"
19#include "missing_syscall.h"
8bab8029 20#include "mkdir.h"
049af8ad 21#include "mountpoint-util.h"
659d1924 22#include "nulstr-util.h"
049af8ad
ZJS
23#include "parse-util.h"
24#include "path-util.h"
7cd296c2 25#include "stat-util.h"
049af8ad
ZJS
26#include "stdio-util.h"
27#include "strv.h"
8bab8029 28#include "user-util.h"
049af8ad
ZJS
29
30/* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
31 * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
32 * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
33 * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
34 * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
35 * with large file handles anyway. */
36#define ORIGINAL_MAX_HANDLE_SZ 128
37
38int name_to_handle_at_loop(
39 int fd,
40 const char *path,
41 struct file_handle **ret_handle,
42 int *ret_mnt_id,
43 int flags) {
44
049af8ad
ZJS
45 size_t n = ORIGINAL_MAX_HANDLE_SZ;
46
ffaf45e4
LP
47 assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
48
049af8ad
ZJS
49 /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
50 * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
51 * start value, it is not an upper bound on the buffer size required.
52 *
53 * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
54 * as NULL if there's no interest in either. */
55
56 for (;;) {
229b0086 57 _cleanup_free_ struct file_handle *h = NULL;
049af8ad
ZJS
58 int mnt_id = -1;
59
60 h = malloc0(offsetof(struct file_handle, f_handle) + n);
61 if (!h)
62 return -ENOMEM;
63
64 h->handle_bytes = n;
65
66 if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
67
68 if (ret_handle)
69 *ret_handle = TAKE_PTR(h);
70
71 if (ret_mnt_id)
72 *ret_mnt_id = mnt_id;
73
74 return 0;
75 }
76 if (errno != EOVERFLOW)
77 return -errno;
78
79 if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
80
81 /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
82 * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
83 * be filled in, and the caller was interested in only the mount ID an nothing else. */
84
85 *ret_mnt_id = mnt_id;
86 return 0;
87 }
88
89 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
90 * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
91 * buffer. In that case propagate EOVERFLOW */
92 if (h->handle_bytes <= n)
93 return -EOVERFLOW;
94
95 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
96 n = h->handle_bytes;
97 if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
98 return -EOVERFLOW;
049af8ad
ZJS
99 }
100}
101
ffaf45e4 102static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *ret_mnt_id) {
049af8ad
ZJS
103 char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
104 _cleanup_free_ char *fdinfo = NULL;
254d1313 105 _cleanup_close_ int subfd = -EBADF;
049af8ad
ZJS
106 char *p;
107 int r;
108
ffaf45e4
LP
109 assert(ret_mnt_id);
110 assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
111
049af8ad
ZJS
112 if ((flags & AT_EMPTY_PATH) && isempty(filename))
113 xsprintf(path, "/proc/self/fdinfo/%i", fd);
114 else {
be24321f 115 subfd = openat(fd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_FOLLOW ? 0 : O_NOFOLLOW));
049af8ad
ZJS
116 if (subfd < 0)
117 return -errno;
118
119 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
120 }
121
627055ce 122 r = read_full_virtual_file(path, &fdinfo, NULL);
049af8ad
ZJS
123 if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
124 return -EOPNOTSUPP;
125 if (r < 0)
126 return r;
127
128 p = startswith(fdinfo, "mnt_id:");
129 if (!p) {
130 p = strstr(fdinfo, "\nmnt_id:");
131 if (!p) /* The mnt_id field is a relatively new addition */
132 return -EOPNOTSUPP;
133
134 p += 8;
135 }
136
137 p += strspn(p, WHITESPACE);
138 p[strcspn(p, WHITESPACE)] = 0;
139
ffaf45e4 140 return safe_atoi(p, ret_mnt_id);
049af8ad
ZJS
141}
142
95231c72
LP
143static bool filename_possibly_with_slash_suffix(const char *s) {
144 const char *slash, *copied;
145
146 /* Checks whether the specified string is either file name, or a filename with a suffix of
147 * slashes. But nothing else.
148 *
149 * this is OK: foo, bar, foo/, bar/, foo//, bar///
150 * this is not OK: "", "/", "/foo", "foo/bar", ".", ".." … */
151
152 slash = strchr(s, '/');
153 if (!slash)
154 return filename_is_valid(s);
155
44571456 156 if (slash - s > PATH_MAX) /* We want to allocate on the stack below, hence do a size check first */
95231c72
LP
157 return false;
158
159 if (slash[strspn(slash, "/")] != 0) /* Check that the suffix consist only of one or more slashes */
160 return false;
161
2f82562b 162 copied = strndupa_safe(s, slash - s);
95231c72
LP
163 return filename_is_valid(copied);
164}
165
964ccab8
FB
166static bool is_name_to_handle_at_fatal_error(int err) {
167 /* name_to_handle_at() can return "acceptable" errors that are due to the context. For
168 * example the kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall
169 * was blocked (EACCES/EPERM; maybe through seccomp, because we are running inside of a
170 * container), or the mount point is not triggered yet (EOVERFLOW, think nfs4), or some
171 * general name_to_handle_at() flakiness (EINVAL). However other errors are not supposed to
172 * happen and therefore are considered fatal ones. */
173
174 assert(err < 0);
175
176 return !IN_SET(err, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL);
177}
178
049af8ad
ZJS
179int fd_is_mount_point(int fd, const char *filename, int flags) {
180 _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
181 int mount_id = -1, mount_id_parent = -1;
182 bool nosupp = false, check_st_dev = true;
7cd296c2 183 STRUCT_STATX_DEFINE(sx);
049af8ad
ZJS
184 struct stat a, b;
185 int r;
186
187 assert(fd >= 0);
71c943dc
LP
188 assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
189
190 if (!filename) {
191 /* If the file name is specified as NULL we'll see if the specified 'fd' is a mount
192 * point. That's only supported if the kernel supports statx(), or if the inode specified via
193 * 'fd' refers to a directory. Otherwise, we'll have to fail (ENOTDIR), because we have no
194 * kernel API to query the information we need. */
195 flags |= AT_EMPTY_PATH;
196 filename = "";
197 } else if (!filename_possibly_with_slash_suffix(filename))
198 /* Insist that the specified filename is actually a filename, and not a path, i.e. some inode further
199 * up or down the tree then immediately below the specified directory fd. */
95231c72
LP
200 return -EINVAL;
201
5f104080
LP
202 /* First we will try statx()' STATX_ATTR_MOUNT_ROOT attribute, which is our ideal API, available
203 * since kernel 5.8.
204 *
205 * If that fails, our second try is the name_to_handle_at() syscall, which tells us the mount id and
206 * an opaque file "handle". It is not supported everywhere though (kernel compile-time option, not
207 * all file systems are hooked up). If it works the mount id is usually good enough to tell us
208 * whether something is a mount point.
049af8ad 209 *
5f104080
LP
210 * If that didn't work we will try to read the mount id from /proc/self/fdinfo/<fd>. This is almost
211 * as good as name_to_handle_at(), however, does not return the opaque file handle. The opaque file
212 * handle is pretty useful to detect the root directory, which we should always consider a mount
213 * point. Hence we use this only as fallback. Exporting the mnt_id in fdinfo is a pretty recent
049af8ad
ZJS
214 * kernel addition.
215 *
5f104080
LP
216 * As last fallback we do traditional fstat() based st_dev comparisons. This is how things were
217 * traditionally done, but unionfs breaks this since it exposes file systems with a variety of st_dev
218 * reported. Also, btrfs subvolumes have different st_dev, even though they aren't real mounts of
219 * their own. */
220
221 if (statx(fd, filename, (FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? 0 : AT_SYMLINK_NOFOLLOW) |
222 (flags & AT_EMPTY_PATH) |
b898474f 223 AT_NO_AUTOMOUNT, STATX_TYPE, &sx) < 0) {
5f104080
LP
224 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
225 return -errno;
226
2aed63f4 227 /* If statx() is not available or forbidden, fall back to name_to_handle_at() below */
5f104080
LP
228 } else if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) /* yay! */
229 return FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT);
b898474f
LP
230 else if (FLAGS_SET(sx.stx_mask, STATX_TYPE) && S_ISLNK(sx.stx_mode))
231 return false; /* symlinks are never mount points */
049af8ad
ZJS
232
233 r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
964ccab8
FB
234 if (r < 0) {
235 if (is_name_to_handle_at_fatal_error(r))
236 return r;
237 if (r != -EOPNOTSUPP)
238 goto fallback_fdinfo;
239
240 /* This kernel or file system does not support name_to_handle_at(), hence let's see
241 * if the upper fs supports it (in which case it is a mount point), otherwise fall
242 * back to the traditional stat() logic */
049af8ad 243 nosupp = true;
964ccab8 244 }
049af8ad 245
71c943dc
LP
246 if (isempty(filename))
247 r = name_to_handle_at_loop(fd, "..", &h_parent, &mount_id_parent, 0); /* can't work for non-directories 😢 */
248 else
249 r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
964ccab8
FB
250 if (r < 0) {
251 if (is_name_to_handle_at_fatal_error(r))
252 return r;
253 if (r != -EOPNOTSUPP)
254 goto fallback_fdinfo;
049af8ad 255 if (nosupp)
964ccab8 256 /* Both the parent and the directory can't do name_to_handle_at() */
049af8ad 257 goto fallback_fdinfo;
049af8ad 258
964ccab8
FB
259 /* The parent can't do name_to_handle_at() but the directory we are
260 * interested in can? If so, it must be a mount point. */
261 return 1;
262 }
263
264 /* The parent can do name_to_handle_at() but the directory we are interested in can't? If
265 * so, it must be a mount point. */
049af8ad
ZJS
266 if (nosupp)
267 return 1;
268
964ccab8
FB
269 /* If the file handle for the directory we are interested in and its parent are identical,
270 * we assume this is the root directory, which is a mount point. */
049af8ad
ZJS
271
272 if (h->handle_bytes == h_parent->handle_bytes &&
273 h->handle_type == h_parent->handle_type &&
274 memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
275 return 1;
276
277 return mount_id != mount_id_parent;
278
279fallback_fdinfo:
280 r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
281 if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
282 goto fallback_fstat;
283 if (r < 0)
284 return r;
285
71c943dc
LP
286 if (isempty(filename))
287 r = fd_fdinfo_mnt_id(fd, "..", 0, &mount_id_parent); /* can't work for non-directories 😢 */
288 else
289 r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
049af8ad
ZJS
290 if (r < 0)
291 return r;
292
293 if (mount_id != mount_id_parent)
294 return 1;
295
9d0619de
LP
296 /* Hmm, so, the mount ids are the same. This leaves one special case though for the root file
297 * system. For that, let's see if the parent directory has the same inode as we are interested
298 * in. Hence, let's also do fstat() checks now, too, but avoid the st_dev comparisons, since they
299 * aren't that useful on unionfs mounts. */
049af8ad
ZJS
300 check_st_dev = false;
301
302fallback_fstat:
9d0619de 303 /* yay for fstatat() taking a different set of flags than the other _at() above */
049af8ad
ZJS
304 if (flags & AT_SYMLINK_FOLLOW)
305 flags &= ~AT_SYMLINK_FOLLOW;
306 else
307 flags |= AT_SYMLINK_NOFOLLOW;
308 if (fstatat(fd, filename, &a, flags) < 0)
309 return -errno;
b898474f
LP
310 if (S_ISLNK(a.st_mode)) /* Symlinks are never mount points */
311 return false;
049af8ad 312
71c943dc
LP
313 if (isempty(filename))
314 r = fstatat(fd, "..", &b, 0);
315 else
316 r = fstatat(fd, "", &b, AT_EMPTY_PATH);
317 if (r < 0)
049af8ad
ZJS
318 return -errno;
319
a9dac7a6
LP
320 /* A directory with same device and inode as its parent? Must be the root directory */
321 if (stat_inode_same(&a, &b))
049af8ad
ZJS
322 return 1;
323
324 return check_st_dev && (a.st_dev != b.st_dev);
325}
326
327/* flags can be AT_SYMLINK_FOLLOW or 0 */
328int path_is_mount_point(const char *t, const char *root, int flags) {
329 _cleanup_free_ char *canonical = NULL;
254d1313 330 _cleanup_close_ int fd = -EBADF;
049af8ad
ZJS
331 int r;
332
333 assert(t);
334 assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
335
336 if (path_equal(t, "/"))
337 return 1;
338
339 /* we need to resolve symlinks manually, we can't just rely on
340 * fd_is_mount_point() to do that for us; if we have a structure like
341 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
342 * look at needs to be /usr, not /. */
343 if (flags & AT_SYMLINK_FOLLOW) {
a5648b80 344 r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical, NULL);
049af8ad
ZJS
345 if (r < 0)
346 return r;
347
348 t = canonical;
349 }
350
351 fd = open_parent(t, O_PATH|O_CLOEXEC, 0);
352 if (fd < 0)
89a5385f 353 return fd;
049af8ad
ZJS
354
355 return fd_is_mount_point(fd, last_path_component(t), flags);
356}
357
358int path_get_mnt_id(const char *path, int *ret) {
7cd296c2 359 STRUCT_NEW_STATX_DEFINE(buf);
049af8ad
ZJS
360 int r;
361
69b3fa14
LP
362 if (statx(AT_FDCWD, path, AT_SYMLINK_NOFOLLOW|AT_NO_AUTOMOUNT, STATX_MNT_ID, &buf.sx) < 0) {
363 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
364 return -errno;
365
366 /* Fall back to name_to_handle_at() and then fdinfo if statx is not supported or we lack
367 * privileges */
368
369 } else if (FLAGS_SET(buf.nsx.stx_mask, STATX_MNT_ID)) {
370 *ret = buf.nsx.stx_mnt_id;
371 return 0;
372 }
373
049af8ad 374 r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
964ccab8
FB
375 if (r == 0 || is_name_to_handle_at_fatal_error(r))
376 return r;
049af8ad 377
964ccab8 378 return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
049af8ad
ZJS
379}
380
381bool fstype_is_network(const char *fstype) {
382 const char *x;
383
384 x = startswith(fstype, "fuse.");
385 if (x)
386 fstype = x;
387
659d1924
ILG
388 if (nulstr_contains(filesystem_sets[FILESYSTEM_SET_NETWORK].value, fstype))
389 return true;
390
391 /* Filesystems not present in the internal database */
049af8ad 392 return STR_IN_SET(fstype,
659d1924 393 "davfs",
049af8ad 394 "glusterfs",
137d4487 395 "lustre",
659d1924 396 "sshfs");
049af8ad
ZJS
397}
398
d72f4a38
TB
399bool fstype_needs_quota(const char *fstype) {
400 /* 1. quotacheck needs to be run for some filesystems after they are mounted
401 * if the filesystem was not unmounted cleanly.
402 * 2. You may need to run quotaon to enable quota usage tracking and/or
403 * enforcement.
404 * ext2 - needs 1) and 2)
405 * ext3 - needs 2) if configured using usrjquota/grpjquota mount options
406 * ext4 - needs 1) if created without journal, needs 2) if created without QUOTA
407 * filesystem feature
408 * reiserfs - needs 2).
409 * jfs - needs 2)
410 * f2fs - needs 2) if configured using usrjquota/grpjquota/prjjquota mount options
411 * xfs - nothing needed
412 * gfs2 - nothing needed
413 * ocfs2 - nothing needed
414 * btrfs - nothing needed
415 * for reference see filesystem and quota manpages */
416 return STR_IN_SET(fstype,
417 "ext2",
418 "ext3",
419 "ext4",
420 "reiserfs",
421 "jfs",
422 "f2fs");
423}
424
049af8ad 425bool fstype_is_api_vfs(const char *fstype) {
659d1924
ILG
426 const FilesystemSet *fs;
427
428 FOREACH_POINTER(fs,
429 filesystem_sets + FILESYSTEM_SET_BASIC_API,
430 filesystem_sets + FILESYSTEM_SET_AUXILIARY_API,
431 filesystem_sets + FILESYSTEM_SET_PRIVILEGED_API,
432 filesystem_sets + FILESYSTEM_SET_TEMPORARY)
433 if (nulstr_contains(fs->value, fstype))
434 return true;
435
436 /* Filesystems not present in the internal database */
049af8ad
ZJS
437 return STR_IN_SET(fstype,
438 "autofs",
049af8ad 439 "cpuset",
659d1924 440 "devtmpfs");
049af8ad
ZJS
441}
442
ac2474e4
Y
443bool fstype_is_blockdev_backed(const char *fstype) {
444 const char *x;
445
446 x = startswith(fstype, "fuse.");
447 if (x)
448 fstype = x;
449
450 return !streq(fstype, "9p") && !fstype_is_network(fstype) && !fstype_is_api_vfs(fstype);
451}
452
049af8ad
ZJS
453bool fstype_is_ro(const char *fstype) {
454 /* All Linux file systems that are necessarily read-only */
455 return STR_IN_SET(fstype,
456 "DM_verity_hash",
30741d6c 457 "cramfs",
fac2c3e9 458 "erofs",
3818d6a3 459 "iso9660",
049af8ad
ZJS
460 "squashfs");
461}
462
463bool fstype_can_discard(const char *fstype) {
4e6d305a
LP
464 int r;
465
466 assert(fstype);
467
468 /* On new kernels we can just ask the kernel */
469 r = mount_option_supported(fstype, "discard", NULL);
470 if (r >= 0)
471 return r;
472
049af8ad
ZJS
473 return STR_IN_SET(fstype,
474 "btrfs",
fb40b131 475 "f2fs",
049af8ad
ZJS
476 "ext4",
477 "vfat",
478 "xfs");
479}
480
034ebc47
LP
481bool fstype_can_norecovery(const char *fstype) {
482 int r;
483
484 assert(fstype);
485
486 /* On new kernels we can just ask the kernel */
487 r = mount_option_supported(fstype, "norecovery", NULL);
488 if (r >= 0)
489 return r;
490
491 return STR_IN_SET(fstype,
492 "ext3",
493 "ext4",
494 "xfs",
495 "btrfs");
496}
497
049af8ad
ZJS
498bool fstype_can_uid_gid(const char *fstype) {
499
500 /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
501 * current and future. */
502
503 return STR_IN_SET(fstype,
504 "adfs",
5797a122 505 "exfat",
049af8ad
ZJS
506 "fat",
507 "hfs",
508 "hpfs",
509 "iso9660",
510 "msdos",
511 "ntfs",
512 "vfat");
513}
514
515int dev_is_devtmpfs(void) {
516 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
517 int mount_id, r;
518 char *e;
519
520 r = path_get_mnt_id("/dev", &mount_id);
521 if (r < 0)
522 return r;
523
fdeea3f4
ZJS
524 r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
525 if (r < 0)
526 return r;
049af8ad
ZJS
527
528 for (;;) {
529 _cleanup_free_ char *line = NULL;
530 int mid;
531
532 r = read_line(proc_self_mountinfo, LONG_LINE_MAX, &line);
533 if (r < 0)
534 return r;
535 if (r == 0)
536 break;
537
538 if (sscanf(line, "%i", &mid) != 1)
539 continue;
540
541 if (mid != mount_id)
542 continue;
543
544 e = strstr(line, " - ");
545 if (!e)
546 continue;
547
548 /* accept any name that starts with the currently expected type */
549 if (startswith(e + 3, "devtmpfs"))
550 return true;
551 }
552
553 return false;
554}
555
61ef3051
ZJS
556int mount_fd(const char *source,
557 int target_fd,
558 const char *filesystemtype,
559 unsigned long mountflags,
560 const void *data) {
561
562 if (mount(source, FORMAT_PROC_FD_PATH(target_fd), filesystemtype, mountflags, data) < 0) {
563 if (errno != ENOENT)
564 return -errno;
565
566 /* ENOENT can mean two things: either that the source is missing, or that /proc/ isn't
567 * mounted. Check for the latter to generate better error messages. */
568 if (proc_mounted() == 0)
569 return -ENOSYS;
570
571 return -ENOENT;
572 }
573
574 return 0;
575}
576
577int mount_nofollow(
578 const char *source,
579 const char *target,
580 const char *filesystemtype,
581 unsigned long mountflags,
582 const void *data) {
583
254d1313 584 _cleanup_close_ int fd = -EBADF;
61ef3051
ZJS
585
586 /* In almost all cases we want to manipulate the mount table without following symlinks, hence
587 * mount_nofollow() is usually the way to go. The only exceptions are environments where /proc/ is
588 * not available yet, since we need /proc/self/fd/ for this logic to work. i.e. during the early
589 * initialization of namespacing/container stuff where /proc is not yet mounted (and maybe even the
590 * fs to mount) we can only use traditional mount() directly.
591 *
592 * Note that this disables following only for the final component of the target, i.e symlinks within
593 * the path of the target are honoured, as are symlinks in the source path everywhere. */
594
595 fd = open(target, O_PATH|O_CLOEXEC|O_NOFOLLOW);
596 if (fd < 0)
597 return -errno;
598
599 return mount_fd(source, fd, filesystemtype, mountflags, data);
600}
601
b205e59a 602const char *mount_propagation_flag_to_string(unsigned long flags) {
049af8ad
ZJS
603
604 switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
605 case 0:
606 return "";
607 case MS_SHARED:
608 return "shared";
609 case MS_SLAVE:
610 return "slave";
611 case MS_PRIVATE:
612 return "private";
613 }
614
615 return NULL;
616}
617
b205e59a 618int mount_propagation_flag_from_string(const char *name, unsigned long *ret) {
049af8ad
ZJS
619
620 if (isempty(name))
621 *ret = 0;
622 else if (streq(name, "shared"))
623 *ret = MS_SHARED;
624 else if (streq(name, "slave"))
625 *ret = MS_SLAVE;
626 else if (streq(name, "private"))
627 *ret = MS_PRIVATE;
628 else
629 return -EINVAL;
630 return 0;
631}
edac5c46
YW
632
633bool mount_propagation_flag_is_valid(unsigned long flag) {
634 return IN_SET(flag, 0, MS_SHARED, MS_PRIVATE, MS_SLAVE);
635}
7d3b157b
LP
636
637unsigned long ms_nosymfollow_supported(void) {
638 _cleanup_close_ int fsfd = -EBADF, mntfd = -EBADF;
639 static int cache = -1;
640
641 /* Returns MS_NOSYMFOLLOW if it is supported, zero otherwise. */
642
643 if (cache >= 0)
644 return cache ? MS_NOSYMFOLLOW : 0;
645
646 /* Checks if MS_NOSYMFOLLOW is supported (which was added in 5.10). We use the new mount API's
647 * mount_setattr() call for that, which was added in 5.12, which is close enough. */
648
649 fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
650 if (fsfd < 0) {
651 if (ERRNO_IS_NOT_SUPPORTED(errno))
652 goto not_supported;
653
654 log_debug_errno(errno, "Failed to open superblock context for tmpfs: %m");
655 return 0;
656 }
657
658 if (fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0) {
659 if (ERRNO_IS_NOT_SUPPORTED(errno))
660 goto not_supported;
661
662 log_debug_errno(errno, "Failed to create tmpfs superblock: %m");
663 return 0;
664 }
665
666 mntfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0);
667 if (mntfd < 0) {
668 if (ERRNO_IS_NOT_SUPPORTED(errno))
669 goto not_supported;
670
671 log_debug_errno(errno, "Failed to turn superblock fd into mount fd: %m");
672 return 0;
673 }
674
675 if (mount_setattr(mntfd, "", AT_EMPTY_PATH|AT_RECURSIVE,
676 &(struct mount_attr) {
677 .attr_set = MOUNT_ATTR_NOSYMFOLLOW,
678 }, sizeof(struct mount_attr)) < 0) {
679 if (ERRNO_IS_NOT_SUPPORTED(errno))
680 goto not_supported;
681
682 log_debug_errno(errno, "Failed to set MOUNT_ATTR_NOSYMFOLLOW mount attribute: %m");
683 return 0;
684 }
685
686 cache = true;
687 return MS_NOSYMFOLLOW;
688
689not_supported:
690 cache = false;
691 return 0;
692}
117e7034
LP
693
694int mount_option_supported(const char *fstype, const char *key, const char *value) {
695 _cleanup_close_ int fd = -EBADF;
696 int r;
697
698 /* Checks if the specified file system supports a mount option. Returns > 0 if it suppors it, == 0 if
699 * it does not. Return -EAGAIN if we can't determine it. And any other error otherwise. */
700
701 assert(fstype);
702 assert(key);
703
704 fd = fsopen(fstype, FSOPEN_CLOEXEC);
705 if (fd < 0) {
706 if (ERRNO_IS_NOT_SUPPORTED(errno))
707 return -EAGAIN; /* new mount API not available → don't know */
708
709 return log_debug_errno(errno, "Failed to open superblock context for '%s': %m", fstype);
710 }
711
712 /* Various file systems have not been converted to the new mount API yet. For such file systems
713 * fsconfig() with FSCONFIG_SET_STRING/FSCONFIG_SET_FLAG never fail. Which sucks, because we want to
714 * use it for testing support, after all. Let's hence do a check if the file system got converted yet
715 * first. */
716 if (fsconfig(fd, FSCONFIG_SET_FD, "adefinitelynotexistingmountoption", NULL, fd) < 0) {
717 /* If FSCONFIG_SET_FD is not supported for the fs, then the file system was not converted to
718 * the new mount API yet. If it returns EINVAL the mount option doesn't exist, but the fstype
719 * is converted. */
720 if (errno == EOPNOTSUPP)
721 return -EAGAIN; /* FSCONFIG_SET_FD not supported on the fs, hence not converted to new mount API → don't know */
722 if (errno != EINVAL)
723 return log_debug_errno(errno, "Failed to check if file system has been converted to new mount API: %m");
724
725 /* So FSCONFIG_SET_FD worked, but the option didn't exist (we got EINVAL), this means the fs
726 * is converted. Let's now ask the actual question we wonder about. */
727 } else
728 return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN), "FSCONFIG_SET_FD worked unexpectedly for '%s', whoa!", fstype);
729
730 if (value)
731 r = fsconfig(fd, FSCONFIG_SET_STRING, key, value, 0);
732 else
733 r = fsconfig(fd, FSCONFIG_SET_FLAG, key, NULL, 0);
734 if (r < 0) {
735 if (errno == EINVAL)
736 return false; /* EINVAL means option not supported. */
737
738 return log_debug_errno(errno, "Failed to set '%s%s%s' on '%s' superblock context: %m",
739 key, value ? "=" : "", strempty(value), fstype);
740 }
741
742 return true; /* works! */
743}