From ae19f7546ccb038966fc121e287e1a1385ba94bb Mon Sep 17 00:00:00 2001 From: Karel Zak Date: Tue, 15 Apr 2025 12:29:37 +0200 Subject: [PATCH] libmount: (subdir) support detached open_tree() (>=6.15) The latest kernel can open a directory as a tree (open_tree()) on a detached mount tree. This means we do not need to unshare and attach the root of the filesystem to a private temporary directory. All this machinery can be replaced by one open_tree() call. Old version: fsopen("ext4", FSOPEN_CLOEXEC) = 3 unshare(CLONE_NEWNS) = 0 fsconfig(3, FSCONFIG_SET_STRING, "source", "/dev/sdc", 0) = 0 fsconfig(3, FSCONFIG_CMD_CREATE, NULL, NULL, 0) = 0 fsmount(3, FSMOUNT_CLOEXEC, 0) = 6 move_mount(6, "", AT_FDCWD, "/run/mount/tmptgt", MOVE_MOUNT_F_EMPTY_PATH) = 0 open_tree(6, "subdir", OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC) = 7 setns(4, CLONE_NEWNS) = 0 move_mount(7, "", AT_FDCWD, "/mnt/test", MOVE_MOUNT_F_EMPTY_PATH) = 0 setns(5, CLONE_NEWNS) = 0 umount2("/run/mount/tmptgt", 0) = 0 setns(4, CLONE_NEWNS) = 0 New version: fsopen("ext4", FSOPEN_CLOEXEC) = 3 fsconfig(3, FSCONFIG_SET_STRING, "source", "/dev/sda", 0) = 0 fsconfig(3, FSCONFIG_CMD_CREATE, NULL, NULL, 0) = 0 fsmount(3, FSMOUNT_CLOEXEC, 0) = 4 open_tree(4, "subdir", OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC|AT_SYMLINK_NOFOLLOW|AT_NO_AUTOMOUNT|AT_RECURSIVE move_mount(5, "", AT_FDCWD, "/mnt/test", MOVE_MOUNT_F_EMPTY_PATH) = 0 Note that this commit does not completely deactivate hook_subdir.c because it is better to keep things in one place. It only adds a "subdir" to the struct libmnt_sysapi to force hook_mount.c to call open_tree(subdir), but all the logic and checks are still in hook_subdir.c. Signed-off-by: Karel Zak --- libmount/src/hook_mount.c | 26 ++++++++-- libmount/src/hook_subdir.c | 103 +++++++++++++++++++++++++++++-------- libmount/src/mountP.h | 3 ++ 3 files changed, 105 insertions(+), 27 deletions(-) diff --git a/libmount/src/hook_mount.c b/libmount/src/hook_mount.c index 1ffd19e83..9f722b27f 100644 --- a/libmount/src/hook_mount.c +++ b/libmount/src/hook_mount.c @@ -90,7 +90,9 @@ static void free_hookset_data( struct libmnt_context *cxt, close_sysapi_fds(api); + free(api->subdir); free(api); + mnt_context_set_hookset_data(cxt, hs, NULL); } @@ -224,8 +226,6 @@ static int open_fs_configuration_context(struct libmnt_context *cxt, struct libmnt_sysapi *api, const char *type) { - DBG(HOOK, ul_debug(" new FS '%s'", type)); - if (!type) return -EINVAL; @@ -285,10 +285,26 @@ static int hook_create_mount(struct libmnt_context *cxt, } if (!rc) { - api->fd_tree = fsmount(api->fd_fs, FSMOUNT_CLOEXEC, 0); - hookset_set_syscall_status(cxt, "fsmount", api->fd_tree >= 0); - if (api->fd_tree < 0) + int fd = fsmount(api->fd_fs, FSMOUNT_CLOEXEC, 0); + hookset_set_syscall_status(cxt, "fsmount", fd >= 0); + + if (fd >= 0 && api->subdir) { + /* + * subdir for Linux >= 6.15, see hook_subdir.c for more details. + */ + DBG(HOOK, ul_debugobj(hs, "opening subdir (detached) '%s'", api->subdir)); + int sub_fd = open_tree(fd, api->subdir, + AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | + AT_RECURSIVE | OPEN_TREE_CLOEXEC | + OPEN_TREE_CLONE); + hookset_set_syscall_status(cxt, "open_tree", sub_fd >= 0); + close(fd); + fd = sub_fd; + } + + if (fd < 0) rc = -errno; + api->fd_tree = fd; } if (rc) diff --git a/libmount/src/hook_subdir.c b/libmount/src/hook_subdir.c index 7cbb2c88d..5f419338e 100644 --- a/libmount/src/hook_subdir.c +++ b/libmount/src/hook_subdir.c @@ -10,8 +10,26 @@ * (at your option) any later version. * * - * This is X-mount.subdir= implementation. The code uses global hookset data - * rather than per-callback (hook) data. + * This is the implementation of X-mount.subdir=. The code uses global hookset + * data rather than per-callback (hook) data. + * + * Note that functionality varies significantly depending on the kernel version + * and available kernel mount interface: + * + * Supported scenarios: + * + * A) mount(2): + * - Unshare, mount the filesystem to a private temporary mount point + * - Bind mount subdirectory to the final target + * + * B) FD-based for Linux: + * - Unshare, attach to a temporary mount point + * - Open attached subdirectory and move to the final target + * + * C) FD-based for Linux >= 6.15 (with detached tree operations support): + * - hook_subdir.c only initializes api->subdir (according to X-mount.subdir=) + * - hook_mount.c opens detached subdirectory as a tree and later attaches to + * the final target * * Please, see the comment in libmount/src/hooks.c to understand how hooks work. */ @@ -19,6 +37,7 @@ #include "mountP.h" #include "fileutils.h" +#include "linux_version.h" struct hookset_data { char *subdir; @@ -54,10 +73,14 @@ static struct hookset_data *new_hookset_data( { struct hookset_data *hsd = calloc(1, sizeof(struct hookset_data)); - if (hsd && mnt_context_set_hookset_data(cxt, hs, hsd) != 0) { - /* probably ENOMEM problem */ - free(hsd); - hsd = NULL; + if (hsd) { + hsd->new_ns_fd = hsd->old_ns_fd = -1; + + if (mnt_context_set_hookset_data(cxt, hs, hsd) != 0) { + /* probably ENOMEM problem */ + free(hsd); + hsd = NULL; + } } return hsd; } @@ -85,8 +108,6 @@ static int tmptgt_unshare(struct hookset_data *hsd) #ifdef USE_LIBMOUNT_SUPPORT_NAMESPACES int rc = 0; - hsd->old_ns_fd = hsd->new_ns_fd = -1; - /* create directory */ rc = ul_mkdir_p(MNT_PATH_TMPTGT, S_IRWXU); if (rc) @@ -137,6 +158,9 @@ fail: static int tmptgt_cleanup(struct hookset_data *hsd) { #ifdef USE_LIBMOUNT_SUPPORT_NAMESPACES + if (!hsd || !hsd->subdir) + return 0; + if (!hsd->tmp_umounted) { umount(MNT_PATH_TMPTGT); hsd->tmp_umounted = 1; @@ -167,9 +191,13 @@ static int do_mount_subdir( const char *root) { int rc = 0; - const char *subdir = hsd->subdir; + const char *subdir; const char *target; + if (!hsd || !hsd->subdir) + return 0; + subdir = hsd->subdir; + #ifdef USE_LIBMOUNT_MOUNTFD_SUPPORT struct libmnt_sysapi *api = mnt_context_get_sysapi(cxt); @@ -186,18 +214,18 @@ static int do_mount_subdir( target = mnt_fs_get_target(cxt->fs); #ifdef USE_LIBMOUNT_MOUNTFD_SUPPORT - if (api && api->fd_tree >= 0) { - /* FD based way - unfortunately, it's impossible to open - * sub-directory on not-yet attached mount. It means - * hook_mount.c attaches FS to temporary directory, and we - * clone and move the subdir, and umount the old unshared - * temporary tree. + if (api && api->fd_tree >= 0 && !api->subdir) { + /* This is for older kernels with an FD-based mount API, but without + * support for detached open_tree() functionality. + * + * We attach the filesystem to a temporary directory, clone and + * move the subdirectory, then unmount the old unshared temporary + * tree. * - * The old mount(2) way does the same, but by BIND. - */ + * The old mount(2) method does the same, but using BIND. */ int fd; - DBG(HOOK, ul_debug("attach subdir '%s'", subdir)); + DBG(HOOK, ul_debug("opening subdir (ateched) '%s'", subdir)); fd = open_tree(api->fd_tree, subdir, OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE); mnt_context_syscall_save_status(cxt, "open_tree", fd >= 0); @@ -211,6 +239,8 @@ static int do_mount_subdir( */ setns(hsd->old_ns_fd, CLONE_NEWNS); + DBG(HOOK, ul_debug("move_mount(to=%s)", target)); + rc = move_mount(fd, "", AT_FDCWD, target, MOVE_MOUNT_F_EMPTY_PATH); mnt_context_syscall_save_status(cxt, "move_mount", rc == 0); if (rc) @@ -253,7 +283,11 @@ static int do_mount_subdir( return rc; } - +/* + * This callback is invoked after mounting on old kernels, when the new + * filesystem is already attached to the temporary directory. It then moves the + * subdirectory to the final target in the original desired namespace. + */ static int hook_mount_post( struct libmnt_context *cxt, const struct libmnt_hookset *hs, @@ -276,6 +310,12 @@ static int hook_mount_post( return rc; } +/* + * This callback is invoked before mounting, when all other hooks are + * initialized, and the FD-based API is ready (unless disabled by the user). + * + * See the description at the beginning of the file for supported scenarios. + */ static int hook_mount_pre( struct libmnt_context *cxt, const struct libmnt_hookset *hs, @@ -285,8 +325,25 @@ static int hook_mount_pre( int rc = 0; hsd = mnt_context_get_hookset_data(cxt, hs); - if (!hsd) + if (!hsd || !hsd->subdir) + return 0; + +#ifdef USE_LIBMOUNT_MOUNTFD_SUPPORT + /* + * Linux >= 6.15 can open subdir on a detached tree. Therefore, + * all of hook_subdir.c can be replaced by a single open_tree() call. + */ + struct libmnt_sysapi *api = mnt_context_get_sysapi(cxt); + + if (api + && cxt->helper == NULL + && get_linux_version() >= KERNEL_VERSION(6, 15, 0)) { + DBG(HOOK, ul_debugobj(hs, "detached subdir open (ignore hook)")); + api->subdir = hsd->subdir; + hsd->subdir = NULL; return 0; + } +#endif /* create unhared temporary target */ hsd->org_target = strdup(mnt_fs_get_target(cxt->fs)); @@ -352,8 +409,10 @@ static int is_subdir_required(struct libmnt_context *cxt, int *rc, char **subdir return *rc == 0; } -/* this is the initial callback used to check mount options and define next - * actions if necessary */ +/* + * This is the initial callback used to check mount options and define + * the next actions if necessary. + */ static int hook_prepare_target( struct libmnt_context *cxt, const struct libmnt_hookset *hs, diff --git a/libmount/src/mountP.h b/libmount/src/mountP.h index a4ce7c042..f4d80c5df 100644 --- a/libmount/src/mountP.h +++ b/libmount/src/mountP.h @@ -730,6 +730,9 @@ struct libmnt_sysapi { int fd_fs; /* FD from fsopen() or fspick() */ int fd_tree; /* FD from fsmount() or open_tree() */ + char *subdir; /* Linux >= 6.15 can directly open subdir; + * hook_subdir sets this variable */ + unsigned int is_new_fs : 1 ; /* fd_fs comes from fsopen() */ }; -- 2.47.3