[thirdparty/systemd.git] / src / basic / mountpoint-util.c

/* SPDX-License-Identifier: LGPL-2.1+ */

#include <errno.h>
#include <fcntl.h>
#include <sys/mount.h>

#include "alloc-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
#include "missing_stat.h"
#include "missing_syscall.h"
#include "mountpoint-util.h"
#include "parse-util.h"
#include "path-util.h"
#include "stat-util.h"
#include "stdio-util.h"
#include "strv.h"

/* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
 * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
 * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
 * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
 * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
 * with large file handles anyway. */
#define ORIGINAL_MAX_HANDLE_SZ 128

int name_to_handle_at_loop(
                int fd,
                const char *path,
                struct file_handle **ret_handle,
                int *ret_mnt_id,
                int flags) {

        _cleanup_free_ struct file_handle *h = NULL;
        size_t n = ORIGINAL_MAX_HANDLE_SZ;

        assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);

        /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
         * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
         * start value, it is not an upper bound on the buffer size required.
         *
         * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
         * as NULL if there's no interest in either. */

        for (;;) {
                int mnt_id = -1;

                h = malloc0(offsetof(struct file_handle, f_handle) + n);
                if (!h)
                        return -ENOMEM;

                h->handle_bytes = n;

                if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {

                        if (ret_handle)
                                *ret_handle = TAKE_PTR(h);

                        if (ret_mnt_id)
                                *ret_mnt_id = mnt_id;

                        return 0;
                }
                if (errno != EOVERFLOW)
                        return -errno;

                if (!ret_handle && ret_mnt_id && mnt_id >= 0) {

                        /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
                         * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
                         * be filled in, and the caller was interested in only the mount ID an nothing else. */

                        *ret_mnt_id = mnt_id;
                        return 0;
                }

                /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
                 * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
                 * buffer. In that case propagate EOVERFLOW */
                if (h->handle_bytes <= n)
                        return -EOVERFLOW;

                /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
                n = h->handle_bytes;
                if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
                        return -EOVERFLOW;

                h = mfree(h);
        }
}

static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *ret_mnt_id) {
        char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
        _cleanup_free_ char *fdinfo = NULL;
        _cleanup_close_ int subfd = -1;
        char *p;
        int r;

        assert(ret_mnt_id);
        assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);

        if ((flags & AT_EMPTY_PATH) && isempty(filename))
                xsprintf(path, "/proc/self/fdinfo/%i", fd);
        else {
                subfd = openat(fd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_FOLLOW ? 0 : O_NOFOLLOW));
                if (subfd < 0)
                        return -errno;

                xsprintf(path, "/proc/self/fdinfo/%i", subfd);
        }

        r = read_full_file(path, &fdinfo, NULL);
        if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
                return -EOPNOTSUPP;
        if (r < 0)
                return r;

        p = startswith(fdinfo, "mnt_id:");
        if (!p) {
                p = strstr(fdinfo, "\nmnt_id:");
                if (!p) /* The mnt_id field is a relatively new addition */
                        return -EOPNOTSUPP;

                p += 8;
        }

        p += strspn(p, WHITESPACE);
        p[strcspn(p, WHITESPACE)] = 0;

        return safe_atoi(p, ret_mnt_id);
}

int fd_is_mount_point(int fd, const char *filename, int flags) {
        _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
        int mount_id = -1, mount_id_parent = -1;
        bool nosupp = false, check_st_dev = true;
        STRUCT_STATX_DEFINE(sx);
        struct stat a, b;
        int r;

        assert(fd >= 0);
        assert(filename);
        assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);

        /* First we will try statx()' STATX_ATTR_MOUNT_ROOT attribute, which is our ideal API, available
         * since kernel 5.8.
         *
         * If that fails, our second try is the name_to_handle_at() syscall, which tells us the mount id and
         * an opaque file "handle". It is not supported everywhere though (kernel compile-time option, not
         * all file systems are hooked up). If it works the mount id is usually good enough to tell us
         * whether something is a mount point.
         *
         * If that didn't work we will try to read the mount id from /proc/self/fdinfo/<fd>. This is almost
         * as good as name_to_handle_at(), however, does not return the opaque file handle. The opaque file
         * handle is pretty useful to detect the root directory, which we should always consider a mount
         * point. Hence we use this only as fallback. Exporting the mnt_id in fdinfo is a pretty recent
         * kernel addition.
         *
         * As last fallback we do traditional fstat() based st_dev comparisons. This is how things were
         * traditionally done, but unionfs breaks this since it exposes file systems with a variety of st_dev
         * reported. Also, btrfs subvolumes have different st_dev, even though they aren't real mounts of
         * their own. */

        if (statx(fd, filename, (FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? 0 : AT_SYMLINK_NOFOLLOW) |
                                (flags & AT_EMPTY_PATH) |
                                AT_NO_AUTOMOUNT, 0, &sx) < 0) {
                if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
                        return -errno;

                /* If statx() is not available or forbidden, fall back to name_to_handle_at() below */
        } else if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) /* yay! */
                return FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT);

        r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
        if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
                /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
                 * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
                 * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
                 * (EINVAL): fall back to simpler logic. */
                goto fallback_fdinfo;
        else if (r == -EOPNOTSUPP)
                /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
                 * supports it (in which case it is a mount point), otherwise fall back to the traditional stat()
                 * logic */
                nosupp = true;
        else if (r < 0)
                return r;

        r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
        if (r == -EOPNOTSUPP) {
                if (nosupp)
                        /* Neither parent nor child do name_to_handle_at()?  We have no choice but to fall back. */
                        goto fallback_fdinfo;
                else
                        /* The parent can't do name_to_handle_at() but the directory we are interested in can?  If so,
                         * it must be a mount point. */
                        return 1;
        } else if (r < 0)
                return r;

        /* The parent can do name_to_handle_at() but the
         * directory we are interested in can't? If so, it
         * must be a mount point. */
        if (nosupp)
                return 1;

        /* If the file handle for the directory we are
         * interested in and its parent are identical, we
         * assume this is the root directory, which is a mount
         * point. */

        if (h->handle_bytes == h_parent->handle_bytes &&
            h->handle_type == h_parent->handle_type &&
            memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
                return 1;

        return mount_id != mount_id_parent;

fallback_fdinfo:
        r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
        if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
                goto fallback_fstat;
        if (r < 0)
                return r;

        r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
        if (r < 0)
                return r;

        if (mount_id != mount_id_parent)
                return 1;

        /* Hmm, so, the mount ids are the same. This leaves one
         * special case though for the root file system. For that,
         * let's see if the parent directory has the same inode as we
         * are interested in. Hence, let's also do fstat() checks now,
         * too, but avoid the st_dev comparisons, since they aren't
         * that useful on unionfs mounts. */
        check_st_dev = false;

fallback_fstat:
        /* yay for fstatat() taking a different set of flags than the other
         * _at() above */
        if (flags & AT_SYMLINK_FOLLOW)
                flags &= ~AT_SYMLINK_FOLLOW;
        else
                flags |= AT_SYMLINK_NOFOLLOW;
        if (fstatat(fd, filename, &a, flags) < 0)
                return -errno;

        if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
                return -errno;

        /* A directory with same device and inode as its parent? Must
         * be the root directory */
        if (a.st_dev == b.st_dev &&
            a.st_ino == b.st_ino)
                return 1;

        return check_st_dev && (a.st_dev != b.st_dev);
}

/* flags can be AT_SYMLINK_FOLLOW or 0 */
int path_is_mount_point(const char *t, const char *root, int flags) {
        _cleanup_free_ char *canonical = NULL;
        _cleanup_close_ int fd = -1;
        int r;

        assert(t);
        assert((flags & ~AT_SYMLINK_FOLLOW) == 0);

        if (path_equal(t, "/"))
                return 1;

        /* we need to resolve symlinks manually, we can't just rely on
         * fd_is_mount_point() to do that for us; if we have a structure like
         * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
         * look at needs to be /usr, not /. */
        if (flags & AT_SYMLINK_FOLLOW) {
                r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical, NULL);
                if (r < 0)
                        return r;

                t = canonical;
        }

        fd = open_parent(t, O_PATH|O_CLOEXEC, 0);
        if (fd < 0)
                return fd;

        return fd_is_mount_point(fd, last_path_component(t), flags);
}

int path_get_mnt_id(const char *path, int *ret) {
        STRUCT_NEW_STATX_DEFINE(buf);
        int r;

        if (statx(AT_FDCWD, path, AT_SYMLINK_NOFOLLOW|AT_NO_AUTOMOUNT, STATX_MNT_ID, &buf.sx) < 0) {
                if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
                        return -errno;

                /* Fall back to name_to_handle_at() and then fdinfo if statx is not supported or we lack
                 * privileges */

        } else if (FLAGS_SET(buf.nsx.stx_mask, STATX_MNT_ID)) {
                *ret = buf.nsx.stx_mnt_id;
                return 0;
        }

        r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
        if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
                return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);

        return r;
}

bool fstype_is_network(const char *fstype) {
        const char *x;

        x = startswith(fstype, "fuse.");
        if (x)
                fstype = x;

        return STR_IN_SET(fstype,
                          "afs",
                          "ceph",
                          "cifs",
                          "smb3",
                          "smbfs",
                          "sshfs",
                          "ncpfs",
                          "ncp",
                          "nfs",
                          "nfs4",
                          "gfs",
                          "gfs2",
                          "glusterfs",
                          "pvfs2", /* OrangeFS */
                          "ocfs2",
                          "lustre",
                          "davfs");
}

bool fstype_is_api_vfs(const char *fstype) {
        return STR_IN_SET(fstype,
                          "autofs",
                          "bpf",
                          "cgroup",
                          "cgroup2",
                          "configfs",
                          "cpuset",
                          "debugfs",
                          "devpts",
                          "devtmpfs",
                          "efivarfs",
                          "fusectl",
                          "hugetlbfs",
                          "mqueue",
                          "proc",
                          "pstore",
                          "ramfs",
                          "securityfs",
                          "sysfs",
                          "tmpfs",
                          "tracefs");
}

bool fstype_is_blockdev_backed(const char *fstype) {
        const char *x;

        x = startswith(fstype, "fuse.");
        if (x)
                fstype = x;

        return !streq(fstype, "9p") && !fstype_is_network(fstype) && !fstype_is_api_vfs(fstype);
}

bool fstype_is_ro(const char *fstype) {
        /* All Linux file systems that are necessarily read-only */
        return STR_IN_SET(fstype,
                          "DM_verity_hash",
                          "iso9660",
                          "squashfs");
}

bool fstype_can_discard(const char *fstype) {
        return STR_IN_SET(fstype,
                          "btrfs",
                          "ext4",
                          "vfat",
                          "xfs");
}

bool fstype_can_uid_gid(const char *fstype) {

        /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
         * current and future. */

        return STR_IN_SET(fstype,
                          "adfs",
                          "exfat",
                          "fat",
                          "hfs",
                          "hpfs",
                          "iso9660",
                          "msdos",
                          "ntfs",
                          "vfat");
}

int dev_is_devtmpfs(void) {
        _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
        int mount_id, r;
        char *e;

        r = path_get_mnt_id("/dev", &mount_id);
        if (r < 0)
                return r;

        r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
        if (r < 0)
                return r;

        for (;;) {
                _cleanup_free_ char *line = NULL;
                int mid;

                r = read_line(proc_self_mountinfo, LONG_LINE_MAX, &line);
                if (r < 0)
                        return r;
                if (r == 0)
                        break;

                if (sscanf(line, "%i", &mid) != 1)
                        continue;

                if (mid != mount_id)
                        continue;

                e = strstr(line, " - ");
                if (!e)
                        continue;

                /* accept any name that starts with the currently expected type */
                if (startswith(e + 3, "devtmpfs"))
                        return true;
        }

        return false;
}

const char *mount_propagation_flags_to_string(unsigned long flags) {

        switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
        case 0:
                return "";
        case MS_SHARED:
                return "shared";
        case MS_SLAVE:
                return "slave";
        case MS_PRIVATE:
                return "private";
        }

        return NULL;
}

int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {

        if (isempty(name))
                *ret = 0;
        else if (streq(name, "shared"))
                *ret = MS_SHARED;
        else if (streq(name, "slave"))
                *ret = MS_SLAVE;
        else if (streq(name, "private"))
                *ret = MS_PRIVATE;
        else
                return -EINVAL;
        return 0;
}
Commit	Line	Data
049af8ad ZJS	1	/* SPDX-License-Identifier: LGPL-2.1+ */
	2
	3	#include <errno.h>
	4	#include <fcntl.h>
049af8ad ZJS	5	#include <sys/mount.h>
	6
	7	#include "alloc-util.h"
	8	#include "fd-util.h"
	9	#include "fileio.h"
	10	#include "fs-util.h"
69b3fa14 LP	11	#include "missing_stat.h"
69b3fa14 LP	12	#include "missing_syscall.h"
049af8ad ZJS	13	#include "mountpoint-util.h"
	14	#include "parse-util.h"
	15	#include "path-util.h"
7cd296c2	16	#include "stat-util.h"
049af8ad ZJS	17	#include "stdio-util.h"
	18	#include "strv.h"
	19
	20	/* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
	21	* any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
	22	* is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
	23	* EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
	24	* from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
	25	* with large file handles anyway. */
	26	#define ORIGINAL_MAX_HANDLE_SZ 128
	27
	28	int name_to_handle_at_loop(
	29	int fd,
	30	const char *path,
	31	struct file_handle **ret_handle,
	32	int *ret_mnt_id,
	33	int flags) {
	34
	35	_cleanup_free_ struct file_handle *h = NULL;
	36	size_t n = ORIGINAL_MAX_HANDLE_SZ;
	37
ffaf45e4 LP	38	assert((flags & ~(AT_SYMLINK_FOLLOW\|AT_EMPTY_PATH)) == 0);
ffaf45e4 LP	39
049af8ad ZJS	40	/* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
	41	* buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
	42	* start value, it is not an upper bound on the buffer size required.
	43	*
	44	* This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
	45	* as NULL if there's no interest in either. */
	46
	47	for (;;) {
	48	int mnt_id = -1;
	49
	50	h = malloc0(offsetof(struct file_handle, f_handle) + n);
	51	if (!h)
	52	return -ENOMEM;
	53
	54	h->handle_bytes = n;
	55
	56	if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
	57
	58	if (ret_handle)
	59	*ret_handle = TAKE_PTR(h);
	60
	61	if (ret_mnt_id)
	62	*ret_mnt_id = mnt_id;
	63
	64	return 0;
	65	}
	66	if (errno != EOVERFLOW)
	67	return -errno;
	68
	69	if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
	70
	71	/* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
	72	* buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
	73	* be filled in, and the caller was interested in only the mount ID an nothing else. */
	74
	75	*ret_mnt_id = mnt_id;
	76	return 0;
	77	}
	78
	79	/* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
	80	* else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
	81	* buffer. In that case propagate EOVERFLOW */
	82	if (h->handle_bytes <= n)
	83	return -EOVERFLOW;
	84
	85	/* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
	86	n = h->handle_bytes;
	87	if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
	88	return -EOVERFLOW;
	89
	90	h = mfree(h);
	91	}
	92	}
	93
ffaf45e4	94	static int fd_fdinfo_mnt_id(int fd, const char filename, int flags, int ret_mnt_id) {
049af8ad ZJS	95	char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
	96	_cleanup_free_ char *fdinfo = NULL;
	97	_cleanup_close_ int subfd = -1;
	98	char *p;
	99	int r;
	100
ffaf45e4 LP	101	assert(ret_mnt_id);
	102	assert((flags & ~(AT_SYMLINK_FOLLOW\|AT_EMPTY_PATH)) == 0);
	103
049af8ad ZJS	104	if ((flags & AT_EMPTY_PATH) && isempty(filename))
	105	xsprintf(path, "/proc/self/fdinfo/%i", fd);
	106	else {
be24321f	107	subfd = openat(fd, filename, O_CLOEXEC\|O_PATH\|(flags & AT_SYMLINK_FOLLOW ? 0 : O_NOFOLLOW));
049af8ad ZJS	108	if (subfd < 0)
	109	return -errno;
	110
	111	xsprintf(path, "/proc/self/fdinfo/%i", subfd);
	112	}
	113
	114	r = read_full_file(path, &fdinfo, NULL);
	115	if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
	116	return -EOPNOTSUPP;
	117	if (r < 0)
	118	return r;
	119
	120	p = startswith(fdinfo, "mnt_id:");
	121	if (!p) {
	122	p = strstr(fdinfo, "\nmnt_id:");
	123	if (!p) /* The mnt_id field is a relatively new addition */
	124	return -EOPNOTSUPP;
	125
	126	p += 8;
	127	}
	128
	129	p += strspn(p, WHITESPACE);
	130	p[strcspn(p, WHITESPACE)] = 0;
	131
ffaf45e4	132	return safe_atoi(p, ret_mnt_id);
049af8ad ZJS	133	}
	134
	135	int fd_is_mount_point(int fd, const char *filename, int flags) {
	136	_cleanup_free_ struct file_handle h = NULL, h_parent = NULL;
	137	int mount_id = -1, mount_id_parent = -1;
	138	bool nosupp = false, check_st_dev = true;
7cd296c2	139	STRUCT_STATX_DEFINE(sx);
049af8ad ZJS	140	struct stat a, b;
	141	int r;
	142
	143	assert(fd >= 0);
	144	assert(filename);
5f104080	145	assert((flags & ~(AT_SYMLINK_FOLLOW\|AT_EMPTY_PATH)) == 0);
049af8ad	146
5f104080 LP	147	/* First we will try statx()' STATX_ATTR_MOUNT_ROOT attribute, which is our ideal API, available
	148	* since kernel 5.8.
	149	*
	150	* If that fails, our second try is the name_to_handle_at() syscall, which tells us the mount id and
	151	* an opaque file "handle". It is not supported everywhere though (kernel compile-time option, not
	152	* all file systems are hooked up). If it works the mount id is usually good enough to tell us
	153	* whether something is a mount point.
049af8ad	154	*
5f104080 LP	155	* If that didn't work we will try to read the mount id from /proc/self/fdinfo/<fd>. This is almost
	156	* as good as name_to_handle_at(), however, does not return the opaque file handle. The opaque file
	157	* handle is pretty useful to detect the root directory, which we should always consider a mount
	158	* point. Hence we use this only as fallback. Exporting the mnt_id in fdinfo is a pretty recent
049af8ad ZJS	159	* kernel addition.
049af8ad ZJS	160	*
5f104080 LP	161	* As last fallback we do traditional fstat() based st_dev comparisons. This is how things were
	162	* traditionally done, but unionfs breaks this since it exposes file systems with a variety of st_dev
	163	* reported. Also, btrfs subvolumes have different st_dev, even though they aren't real mounts of
	164	* their own. */
	165
	166	if (statx(fd, filename, (FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? 0 : AT_SYMLINK_NOFOLLOW) \|
	167	(flags & AT_EMPTY_PATH) \|
	168	AT_NO_AUTOMOUNT, 0, &sx) < 0) {
	169	if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
	170	return -errno;
	171
2aed63f4	172	/* If statx() is not available or forbidden, fall back to name_to_handle_at() below */
5f104080 LP	173	} else if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) /* yay! */
5f104080 LP	174	return FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT);
049af8ad ZJS	175
	176	r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
	177	if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
	178	/* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
	179	* (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
	180	* point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
	181	* (EINVAL): fall back to simpler logic. */
	182	goto fallback_fdinfo;
	183	else if (r == -EOPNOTSUPP)
	184	/* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
2aed63f4	185	* supports it (in which case it is a mount point), otherwise fall back to the traditional stat()
049af8ad ZJS	186	* logic */
	187	nosupp = true;
	188	else if (r < 0)
	189	return r;
	190
	191	r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
	192	if (r == -EOPNOTSUPP) {
	193	if (nosupp)
	194	/* Neither parent nor child do name_to_handle_at()? We have no choice but to fall back. */
	195	goto fallback_fdinfo;
	196	else
	197	/* The parent can't do name_to_handle_at() but the directory we are interested in can? If so,
	198	* it must be a mount point. */
	199	return 1;
	200	} else if (r < 0)
	201	return r;
	202
	203	/* The parent can do name_to_handle_at() but the
	204	* directory we are interested in can't? If so, it
	205	* must be a mount point. */
	206	if (nosupp)
	207	return 1;
	208
	209	/* If the file handle for the directory we are
	210	* interested in and its parent are identical, we
	211	* assume this is the root directory, which is a mount
	212	* point. */
	213
	214	if (h->handle_bytes == h_parent->handle_bytes &&
	215	h->handle_type == h_parent->handle_type &&
	216	memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
	217	return 1;
	218
	219	return mount_id != mount_id_parent;
	220
	221	fallback_fdinfo:
	222	r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
	223	if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
	224	goto fallback_fstat;
	225	if (r < 0)
	226	return r;
	227
	228	r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
	229	if (r < 0)
	230	return r;
	231
	232	if (mount_id != mount_id_parent)
	233	return 1;
	234
	235	/* Hmm, so, the mount ids are the same. This leaves one
	236	* special case though for the root file system. For that,
	237	* let's see if the parent directory has the same inode as we
	238	* are interested in. Hence, let's also do fstat() checks now,
	239	* too, but avoid the st_dev comparisons, since they aren't
	240	* that useful on unionfs mounts. */
	241	check_st_dev = false;
	242
	243	fallback_fstat:
	244	/* yay for fstatat() taking a different set of flags than the other
	245	* _at() above */
	246	if (flags & AT_SYMLINK_FOLLOW)
	247	flags &= ~AT_SYMLINK_FOLLOW;
	248	else
	249	flags \|= AT_SYMLINK_NOFOLLOW;
250	if (fstatat(fd, filename, &a, flags) < 0)
251	return -errno;
252
253	if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
254	return -errno;
255
256	/* A directory with same device and inode as its parent? Must
257	* be the root directory */
258	if (a.st_dev == b.st_dev &&
259	a.st_ino == b.st_ino)
260	return 1;
261
262	return check_st_dev && (a.st_dev != b.st_dev);
263	}
264
265	/* flags can be AT_SYMLINK_FOLLOW or 0 */
266	int path_is_mount_point(const char t, const char root, int flags) {
267	_cleanup_free_ char *canonical = NULL;
268	_cleanup_close_ int fd = -1;
269	int r;
270
271	assert(t);
272	assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
273
274	if (path_equal(t, "/"))
275	return 1;
276
277	/* we need to resolve symlinks manually, we can't just rely on
278	* fd_is_mount_point() to do that for us; if we have a structure like
279	* /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
280	* look at needs to be /usr, not /. */
281	if (flags & AT_SYMLINK_FOLLOW) {
a5648b80	282	r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical, NULL);
049af8ad ZJS	283	if (r < 0)
	284	return r;
	285
	286	t = canonical;
	287	}
	288
	289	fd = open_parent(t, O_PATH\|O_CLOEXEC, 0);
	290	if (fd < 0)
89a5385f	291	return fd;
049af8ad ZJS	292
	293	return fd_is_mount_point(fd, last_path_component(t), flags);
	294	}
	295
	296	int path_get_mnt_id(const char path, int ret) {
7cd296c2	297	STRUCT_NEW_STATX_DEFINE(buf);
049af8ad ZJS	298	int r;
049af8ad ZJS	299
69b3fa14 LP	300	if (statx(AT_FDCWD, path, AT_SYMLINK_NOFOLLOW\|AT_NO_AUTOMOUNT, STATX_MNT_ID, &buf.sx) < 0) {
	301	if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
	302	return -errno;
	303
	304	/* Fall back to name_to_handle_at() and then fdinfo if statx is not supported or we lack
	305	* privileges */
	306
	307	} else if (FLAGS_SET(buf.nsx.stx_mask, STATX_MNT_ID)) {
	308	*ret = buf.nsx.stx_mnt_id;
	309	return 0;
	310	}
	311
049af8ad ZJS	312	r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
	313	if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
	314	return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
	315
	316	return r;
	317	}
	318
	319	bool fstype_is_network(const char *fstype) {
	320	const char *x;
	321
	322	x = startswith(fstype, "fuse.");
	323	if (x)
	324	fstype = x;
	325
	326	return STR_IN_SET(fstype,
	327	"afs",
c4742de6	328	"ceph",
049af8ad	329	"cifs",
ff7d6a74	330	"smb3",
049af8ad ZJS	331	"smbfs",
	332	"sshfs",
	333	"ncpfs",
	334	"ncp",
	335	"nfs",
	336	"nfs4",
	337	"gfs",
	338	"gfs2",
	339	"glusterfs",
	340	"pvfs2", /* OrangeFS */
	341	"ocfs2",
137d4487	342	"lustre",
137d4487	343	"davfs");
049af8ad ZJS	344	}
	345
	346	bool fstype_is_api_vfs(const char *fstype) {
	347	return STR_IN_SET(fstype,
	348	"autofs",
	349	"bpf",
	350	"cgroup",
	351	"cgroup2",
	352	"configfs",
	353	"cpuset",
	354	"debugfs",
	355	"devpts",
	356	"devtmpfs",
	357	"efivarfs",
	358	"fusectl",
	359	"hugetlbfs",
	360	"mqueue",
	361	"proc",
	362	"pstore",
	363	"ramfs",
	364	"securityfs",
	365	"sysfs",
	366	"tmpfs",
	367	"tracefs");
	368	}
	369
ac2474e4 Y	370	bool fstype_is_blockdev_backed(const char *fstype) {
	371	const char *x;
	372
	373	x = startswith(fstype, "fuse.");
	374	if (x)
	375	fstype = x;
	376
	377	return !streq(fstype, "9p") && !fstype_is_network(fstype) && !fstype_is_api_vfs(fstype);
	378	}
	379
049af8ad ZJS	380	bool fstype_is_ro(const char *fstype) {
	381	/* All Linux file systems that are necessarily read-only */
	382	return STR_IN_SET(fstype,
	383	"DM_verity_hash",
	384	"iso9660",
	385	"squashfs");
	386	}
	387
	388	bool fstype_can_discard(const char *fstype) {
	389	return STR_IN_SET(fstype,
	390	"btrfs",
	391	"ext4",
	392	"vfat",
	393	"xfs");
	394	}
	395
	396	bool fstype_can_uid_gid(const char *fstype) {
	397
	398	/* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
	399	* current and future. */
	400
	401	return STR_IN_SET(fstype,
	402	"adfs",
5797a122	403	"exfat",
049af8ad ZJS	404	"fat",
	405	"hfs",
	406	"hpfs",
	407	"iso9660",
	408	"msdos",
	409	"ntfs",
	410	"vfat");
	411	}
	412
	413	int dev_is_devtmpfs(void) {
	414	_cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
	415	int mount_id, r;
	416	char *e;
	417
	418	r = path_get_mnt_id("/dev", &mount_id);
	419	if (r < 0)
	420	return r;
	421
fdeea3f4 ZJS	422	r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
	423	if (r < 0)
	424	return r;
049af8ad ZJS	425
	426	for (;;) {
	427	_cleanup_free_ char *line = NULL;
	428	int mid;
	429
	430	r = read_line(proc_self_mountinfo, LONG_LINE_MAX, &line);
	431	if (r < 0)
	432	return r;
	433	if (r == 0)
	434	break;
	435
	436	if (sscanf(line, "%i", &mid) != 1)
	437	continue;
	438
	439	if (mid != mount_id)
	440	continue;
	441
	442	e = strstr(line, " - ");
	443	if (!e)
	444	continue;
	445
	446	/* accept any name that starts with the currently expected type */
	447	if (startswith(e + 3, "devtmpfs"))
	448	return true;
	449	}
	450
	451	return false;
	452	}
	453
	454	const char *mount_propagation_flags_to_string(unsigned long flags) {
	455
	456	switch (flags & (MS_SHARED\|MS_SLAVE\|MS_PRIVATE)) {
	457	case 0:
	458	return "";
	459	case MS_SHARED:
	460	return "shared";
	461	case MS_SLAVE:
	462	return "slave";
	463	case MS_PRIVATE:
	464	return "private";
	465	}
	466
	467	return NULL;
	468	}
	469
	470	int mount_propagation_flags_from_string(const char name, unsigned long ret) {
	471
	472	if (isempty(name))
	473	*ret = 0;
	474	else if (streq(name, "shared"))
	475	*ret = MS_SHARED;
	476	else if (streq(name, "slave"))
	477	*ret = MS_SLAVE;
	478	else if (streq(name, "private"))
	479	*ret = MS_PRIVATE;
	480	else
	481	return -EINVAL;
	482	return 0;
	483	}