From: Yu Watanabe Date: Mon, 5 Jan 2026 04:24:37 +0000 (+0900) Subject: tree-wide: statx() supports STATX_ATTR_MOUNT_ROOT since kernel 5.8 X-Git-Tag: v260-rc1~408^2~7 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a98a6eb95cc980edab4b0f9c59e6573edc7ffe0c;p=thirdparty%2Fsystemd.git tree-wide: statx() supports STATX_ATTR_MOUNT_ROOT since kernel 5.8 Our baseline on kernel is 5.10, hence we can unconditionally use it. --- diff --git a/src/basic/mountpoint-util.c b/src/basic/mountpoint-util.c index b7c48709319..b02746b14cb 100644 --- a/src/basic/mountpoint-util.c +++ b/src/basic/mountpoint-util.c @@ -198,7 +198,6 @@ bool file_handle_equal(const struct file_handle *a, const struct file_handle *b) } int is_mount_point_at(int fd, const char *filename, int flags) { - bool fd_is_self; int r; assert(fd >= 0 || fd == AT_FDCWD); @@ -216,36 +215,13 @@ int is_mount_point_at(int fd, const char *filename, int flags) { filename = ""; } - fd_is_self = true; - } else if (STR_IN_SET(filename, ".", "./")) - fd_is_self = true; - else { + } else if (!STR_IN_SET(filename, ".", "./")) { /* Insist that the specified filename is actually a filename, and not a path, i.e. some inode * further up or down the tree then immediately below the specified directory fd. */ if (!filename_possibly_with_slash_suffix(filename)) return -EINVAL; - - fd_is_self = false; } - /* First we will try statx()' STATX_ATTR_MOUNT_ROOT attribute, which is our ideal API, available - * since kernel 5.8. - * - * If that fails, our second try is the name_to_handle_at() syscall, which tells us the mount id and - * an opaque file "handle". It is not supported everywhere though (kernel compile-time option, not - * all file systems are hooked up). If it works the mount id is usually good enough to tell us - * whether something is a mount point. - * - * If that didn't work we will try to read the mount id from /proc/self/fdinfo/. This is almost - * as good as name_to_handle_at(), however, does not return the opaque file handle. The opaque file - * handle is pretty useful to detect the root directory, which we should always consider a mount - * point. Hence we use this only as fallback. - * - * Note that traditionally the check is done via fstat()-based st_dev comparisons. However, various - * file systems don't guarantee same st_dev across single fs anymore, e.g. unionfs exposes file systems - * with a variety of st_dev reported. Also, btrfs subvolumes have different st_dev, even though - * they aren't real mounts of their own. */ - struct statx sx = {}; /* explicitly initialize the struct to make msan silent. */ if (statx(fd, filename, at_flags_normalize_nofollow(flags) | @@ -255,93 +231,11 @@ int is_mount_point_at(int fd, const char *filename, int flags) { &sx) < 0) return -errno; - if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) /* yay! */ - return FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT); - - _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL; - int mount_id = -1, mount_id_parent = -1; - bool nosupp = false; - - r = name_to_handle_at_try_fid(fd, filename, &h, &mount_id, flags); - if (r < 0) { - if (is_name_to_handle_at_fatal_error(r)) - return r; - if (!ERRNO_IS_NOT_SUPPORTED(r)) - goto fallback_fdinfo; - - /* This file system does not support name_to_handle_at(), hence let's see if the upper fs - * supports it (in which case it is a mount point), otherwise fall back to the fdinfo logic. */ - nosupp = true; - } - - if (fd_is_self) - r = name_to_handle_at_try_fid(fd, "..", &h_parent, &mount_id_parent, 0); /* can't work for non-directories 😢 */ - else - r = name_to_handle_at_try_fid(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH); - if (r < 0) { - if (is_name_to_handle_at_fatal_error(r)) - return r; - if (!ERRNO_IS_NOT_SUPPORTED(r)) - goto fallback_fdinfo; - if (nosupp) - /* Both the parent and the directory can't do name_to_handle_at() */ - goto fallback_fdinfo; - - /* The parent can't do name_to_handle_at() but the directory we are - * interested in can? If so, it must be a mount point. */ - return 1; - } - - /* The parent can do name_to_handle_at() but the directory we are interested in can't? If - * so, it must be a mount point. */ - if (nosupp) - return 1; - - /* If the file handle for the directory we are interested in and its parent are identical, - * we assume this is the root directory, which is a mount point. */ - if (file_handle_equal(h_parent, h)) - return 1; - - return mount_id != mount_id_parent; - -fallback_fdinfo: - r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id); - if (r < 0) - return r; - - if (fd_is_self) - r = fd_fdinfo_mnt_id(fd, "..", 0, &mount_id_parent); /* can't work for non-directories 😢 */ - else - r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent); + r = statx_warn_mount_root(&sx, LOG_DEBUG); if (r < 0) return r; - if (mount_id != mount_id_parent) - return 1; - - /* Hmm, so, the mount ids are the same. This leaves one special case though for the root file - * system. For that, let's see if the parent directory has the same inode as we are interested - * in. */ - - struct stat a, b; - - /* yay for fstatat() taking a different set of flags than the other _at() above */ - if (fstatat(fd, filename, &a, at_flags_normalize_nofollow(flags)) < 0) - return -errno; - - if (fd_is_self) - r = fstatat(fd, "..", &b, 0); - else - r = fstatat(fd, "", &b, AT_EMPTY_PATH); - if (r < 0) - return -errno; - - /* A directory with same device and inode as its parent must be the root directory. Otherwise - * not a mount point. - * - * NB: we avoid inode_same_at() here because it internally attempts name_to_handle_at_try_fid() first, - * which is redundant. */ - return stat_inode_same(&a, &b); + return FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT); } /* flags can be AT_SYMLINK_FOLLOW or 0 */ diff --git a/src/basic/stat-util.c b/src/basic/stat-util.c index 9f2587faddd..791e99d978f 100644 --- a/src/basic/stat-util.c +++ b/src/basic/stat-util.c @@ -572,3 +572,14 @@ mode_t inode_type_from_string(const char *s) { return MODE_INVALID; } + +int statx_warn_mount_root(const struct statx *sx, int log_level) { + assert(sx); + + /* The STATX_ATTR_MOUNT_ROOT flag is supported since kernel v5.8. */ + if (!FLAGS_SET(sx->stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) + return log_full_errno(log_level, SYNTHETIC_ERRNO(ENOSYS), + "statx() did not set STATX_ATTR_MOUNT_ROOT, running on an old kernel?"); + + return 0; +} diff --git a/src/basic/stat-util.h b/src/basic/stat-util.h index 395e391786c..a0a4489f74c 100644 --- a/src/basic/stat-util.h +++ b/src/basic/stat-util.h @@ -117,3 +117,5 @@ static inline bool inode_type_can_hardlink(mode_t m) { * type). */ return IN_SET(m & S_IFMT, S_IFSOCK, S_IFLNK, S_IFREG, S_IFBLK, S_IFCHR, S_IFIFO); } + +int statx_warn_mount_root(const struct statx *sx, int log_level); diff --git a/src/shared/find-esp.c b/src/shared/find-esp.c index ca8d99922de..8e1737eebb9 100644 --- a/src/shared/find-esp.c +++ b/src/shared/find-esp.c @@ -268,7 +268,7 @@ static int verify_fsroot_dir( bool searching = FLAGS_SET(flags, VERIFY_ESP_SEARCHING), unprivileged_mode = FLAGS_SET(flags, VERIFY_ESP_UNPRIVILEGED_MODE); _cleanup_free_ char *f = NULL; - struct statx sxa, sxb; + struct statx sx; int r; /* Checks if the specified directory is at the root of its file system, and returns device @@ -287,49 +287,30 @@ static int verify_fsroot_dir( if (statx(dir_fd, strempty(f), AT_SYMLINK_NOFOLLOW|(isempty(f) ? AT_EMPTY_PATH : 0), - STATX_TYPE|STATX_INO|STATX_MNT_ID, &sxa) < 0) + STATX_TYPE|STATX_INO|STATX_MNT_ID, &sx) < 0) return log_full_errno((searching && errno == ENOENT) || (unprivileged_mode && ERRNO_IS_PRIVILEGE(errno)) ? LOG_DEBUG : LOG_ERR, errno, "Failed to determine block device node of \"%s\": %m", path); - if (!S_ISDIR(sxa.stx_mode)) + if (!S_ISDIR(sx.stx_mode)) return log_error_errno(SYNTHETIC_ERRNO(ENOTDIR), "Path \"%s\" is not a directory", path); - if (FLAGS_SET(sxa.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) { - - /* If we have STATX_ATTR_MOUNT_ROOT, we are happy, that's all we need. We operate under the - * assumption that a top of a mount point is also the top of the file system. (Which of - * course is strictly speaking not always true...) */ - - if (!FLAGS_SET(sxa.stx_attributes, STATX_ATTR_MOUNT_ROOT)) - return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, - SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV), - "Directory \"%s\" is not the root of the file system.", path); - - goto success; - } - - /* Now let's look at the parent */ - if (statx(dir_fd, "", AT_EMPTY_PATH, STATX_TYPE|STATX_INO|STATX_MNT_ID, &sxb) < 0) - return log_full_errno(unprivileged_mode && ERRNO_IS_PRIVILEGE(errno) ? LOG_DEBUG : LOG_ERR, errno, - "Failed to determine block device node of parent of \"%s\": %m", path); - - if (statx_inode_same(&sxa, &sxb)) /* for the root dir inode nr for both inodes will be the same */ - goto success; + r = statx_warn_mount_root(&sx, LOG_ERR); + if (r < 0) + return r; - if (statx_mount_same(&sxa, &sxb)) + if (!FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT)) return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV), "Directory \"%s\" is not the root of the file system.", path); -success: if (!ret_dev) return 0; - if (sxa.stx_dev_major == 0) /* Hmm, maybe a btrfs device, and the caller asked for the backing device? Then let's try to get it. */ + if (sx.stx_dev_major == 0) /* Hmm, maybe a btrfs device, and the caller asked for the backing device? Then let's try to get it. */ return btrfs_get_block_device_at(dir_fd, strempty(f), ret_dev); - *ret_dev = makedev(sxa.stx_dev_major, sxa.stx_dev_minor); + *ret_dev = makedev(sx.stx_dev_major, sx.stx_dev_minor); return 0; } diff --git a/src/tmpfiles/tmpfiles.c b/src/tmpfiles/tmpfiles.c index 90fe4b3d75c..a2ab8ded9bd 100644 --- a/src/tmpfiles/tmpfiles.c +++ b/src/tmpfiles/tmpfiles.c @@ -559,7 +559,7 @@ static int opendir_and_stat( bool *ret_mountpoint) { _cleanup_closedir_ DIR *d = NULL; - struct statx sx1; + struct statx sx; int r; assert(path); @@ -586,21 +586,16 @@ static int opendir_and_stat( return 0; } - if (statx(dirfd(d), "", AT_EMPTY_PATH, STATX_MODE|STATX_INO|STATX_ATIME|STATX_MTIME, &sx1) < 0) + if (statx(dirfd(d), "", AT_EMPTY_PATH, STATX_MODE|STATX_INO|STATX_ATIME|STATX_MTIME, &sx) < 0) return log_error_errno(errno, "statx(%s) failed: %m", path); - if (FLAGS_SET(sx1.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) - *ret_mountpoint = FLAGS_SET(sx1.stx_attributes, STATX_ATTR_MOUNT_ROOT); - else { - struct statx sx2; - if (statx(dirfd(d), "..", 0, STATX_INO, &sx2) < 0) - return log_error_errno(errno, "statx(%s/..) failed: %m", path); - - *ret_mountpoint = !statx_mount_same(&sx1, &sx2); - } + r = statx_warn_mount_root(&sx, LOG_ERR); + if (r < 0) + return r; + *ret_mountpoint = FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT); *ret = TAKE_PTR(d); - *ret_sx = sx1; + *ret_sx = sx; return 1; } @@ -713,35 +708,13 @@ static int dir_cleanup( continue; } - if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) { - /* Yay, we have the mount point API, use it */ - if (FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT)) { - log_debug("Ignoring \"%s/%s\": different mount points.", p, de->d_name); - continue; - } - } else { - /* So we might have statx() but the STATX_ATTR_MOUNT_ROOT flag is not supported, fall - * back to traditional stx_dev checking. */ - if (sx.stx_dev_major != rootdev_major || - sx.stx_dev_minor != rootdev_minor) { - log_debug("Ignoring \"%s/%s\": different filesystem.", p, de->d_name); - continue; - } - - /* Try to detect bind mounts of the same filesystem instance; they do not differ in - * device major/minors. This type of query is not supported on all kernels or - * filesystem types though. */ - if (S_ISDIR(sx.stx_mode)) { - int q; + r = statx_warn_mount_root(&sx, LOG_ERR); + if (r < 0) + return r; - q = is_mount_point_at(dirfd(d), de->d_name, 0); - if (q < 0) - log_debug_errno(q, "Failed to determine whether \"%s/%s\" is a mount point, ignoring: %m", p, de->d_name); - else if (q > 0) { - log_debug("Ignoring \"%s/%s\": different mount of the same filesystem.", p, de->d_name); - continue; - } - } + if (FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT)) { + log_debug("Ignoring \"%s/%s\": different mount points.", p, de->d_name); + continue; } atime_nsec = FLAGS_SET(sx.stx_mask, STATX_ATIME) ? statx_timestamp_load_nsec(&sx.stx_atime) : 0;