]> git.ipfire.org Git - thirdparty/util-linux.git/commitdiff
libmount: (subdir) support detached open_tree() (>=6.15)
authorKarel Zak <kzak@redhat.com>
Tue, 15 Apr 2025 10:29:37 +0000 (12:29 +0200)
committerKarel Zak <kzak@redhat.com>
Wed, 16 Apr 2025 08:21:23 +0000 (10:21 +0200)
The latest kernel can open a directory as a tree (open_tree()) on a
detached mount tree. This means we do not need to unshare and
attach the root of the filesystem to a private temporary directory. All
this machinery can be replaced by one open_tree() call.

Old version:
fsopen("ext4", FSOPEN_CLOEXEC)          = 3
unshare(CLONE_NEWNS)                    = 0
fsconfig(3, FSCONFIG_SET_STRING, "source", "/dev/sdc", 0) = 0
fsconfig(3, FSCONFIG_CMD_CREATE, NULL, NULL, 0) = 0
fsmount(3, FSMOUNT_CLOEXEC, 0)          = 6
move_mount(6, "", AT_FDCWD, "/run/mount/tmptgt", MOVE_MOUNT_F_EMPTY_PATH) = 0
open_tree(6, "subdir", OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC) = 7
setns(4, CLONE_NEWNS)                   = 0
move_mount(7, "", AT_FDCWD, "/mnt/test", MOVE_MOUNT_F_EMPTY_PATH) = 0
setns(5, CLONE_NEWNS)                   = 0
umount2("/run/mount/tmptgt", 0)         = 0
setns(4, CLONE_NEWNS)                   = 0

New version:
fsopen("ext4", FSOPEN_CLOEXEC)          = 3
fsconfig(3, FSCONFIG_SET_STRING, "source", "/dev/sda", 0) = 0
fsconfig(3, FSCONFIG_CMD_CREATE, NULL, NULL, 0) = 0
fsmount(3, FSMOUNT_CLOEXEC, 0)          = 4
open_tree(4, "subdir", OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC|AT_SYMLINK_NOFOLLOW|AT_NO_AUTOMOUNT|AT_RECURSIVE
move_mount(5, "", AT_FDCWD, "/mnt/test", MOVE_MOUNT_F_EMPTY_PATH) = 0

Note that this commit does not completely deactivate hook_subdir.c
because it is better to keep things in one place. It only adds a "subdir"
to the struct libmnt_sysapi to force hook_mount.c to call open_tree(subdir),
but all the logic and checks are still in hook_subdir.c.

Signed-off-by: Karel Zak <kzak@redhat.com>
libmount/src/hook_mount.c
libmount/src/hook_subdir.c
libmount/src/mountP.h

index 1ffd19e836f19d4ca0d2f9835d3a2dd4c092df95..9f722b27f2b76e0d1196d026d2757d1a2ce27ce3 100644 (file)
@@ -90,7 +90,9 @@ static void free_hookset_data(        struct libmnt_context *cxt,
 
        close_sysapi_fds(api);
 
+       free(api->subdir);
        free(api);
+
        mnt_context_set_hookset_data(cxt, hs, NULL);
 }
 
@@ -224,8 +226,6 @@ static int open_fs_configuration_context(struct libmnt_context *cxt,
                                         struct libmnt_sysapi *api,
                                         const char *type)
 {
-       DBG(HOOK, ul_debug(" new FS '%s'", type));
-
        if (!type)
                return -EINVAL;
 
@@ -285,10 +285,26 @@ static int hook_create_mount(struct libmnt_context *cxt,
        }
 
        if (!rc) {
-               api->fd_tree = fsmount(api->fd_fs, FSMOUNT_CLOEXEC, 0);
-               hookset_set_syscall_status(cxt, "fsmount", api->fd_tree >= 0);
-               if (api->fd_tree < 0)
+               int fd = fsmount(api->fd_fs, FSMOUNT_CLOEXEC, 0);
+               hookset_set_syscall_status(cxt, "fsmount", fd >= 0);
+
+               if (fd >= 0 && api->subdir) {
+                       /*
+                        * subdir for Linux >= 6.15, see hook_subdir.c for more details.
+                        */
+                       DBG(HOOK, ul_debugobj(hs, "opening subdir (detached) '%s'", api->subdir));
+                       int sub_fd = open_tree(fd, api->subdir,
+                                       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+                                       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+                                       OPEN_TREE_CLONE);
+                       hookset_set_syscall_status(cxt, "open_tree", sub_fd >= 0);
+                       close(fd);
+                       fd = sub_fd;
+               }
+
+               if (fd < 0)
                        rc = -errno;
+               api->fd_tree = fd;
        }
 
        if (rc)
index 7cbb2c88d465a5f80ab67a0c145f7c39739826f0..5f419338eb9b053d30656393b6b90bf43c33a8be 100644 (file)
  * (at your option) any later version.
  *
  *
- * This is X-mount.subdir= implementation. The code uses global hookset data
- * rather than per-callback (hook) data.
+ * This is the implementation of X-mount.subdir=. The code uses global hookset
+ * data rather than per-callback (hook) data.
+ *
+ * Note that functionality varies significantly depending on the kernel version
+ * and available kernel mount interface:
+ *
+ * Supported scenarios:
+ *
+ * A) mount(2):
+ *     - Unshare, mount the filesystem to a private temporary mount point
+ *     - Bind mount subdirectory to the final target
+ *
+ * B) FD-based for Linux:
+ *     - Unshare, attach to a temporary mount point
+ *     - Open attached subdirectory and move to the final target
+ *
+ * C) FD-based for Linux >= 6.15 (with detached tree operations support):
+ *     - hook_subdir.c only initializes api->subdir (according to X-mount.subdir=)
+ *     - hook_mount.c opens detached subdirectory as a tree and later attaches to
+ *       the final target
  *
  * Please, see the comment in libmount/src/hooks.c to understand how hooks work.
  */
@@ -19,6 +37,7 @@
 
 #include "mountP.h"
 #include "fileutils.h"
+#include "linux_version.h"
 
 struct hookset_data {
        char *subdir;
@@ -54,10 +73,14 @@ static struct hookset_data *new_hookset_data(
 {
        struct hookset_data *hsd = calloc(1, sizeof(struct hookset_data));
 
-       if (hsd && mnt_context_set_hookset_data(cxt, hs, hsd) != 0) {
-               /* probably ENOMEM problem */
-               free(hsd);
-               hsd = NULL;
+       if (hsd) {
+               hsd->new_ns_fd = hsd->old_ns_fd = -1;
+
+               if (mnt_context_set_hookset_data(cxt, hs, hsd) != 0) {
+                       /* probably ENOMEM problem */
+                       free(hsd);
+                       hsd = NULL;
+               }
        }
        return hsd;
 }
@@ -85,8 +108,6 @@ static int tmptgt_unshare(struct hookset_data *hsd)
 #ifdef USE_LIBMOUNT_SUPPORT_NAMESPACES
        int rc = 0;
 
-       hsd->old_ns_fd = hsd->new_ns_fd = -1;
-
        /* create directory */
        rc = ul_mkdir_p(MNT_PATH_TMPTGT, S_IRWXU);
        if (rc)
@@ -137,6 +158,9 @@ fail:
 static int tmptgt_cleanup(struct hookset_data *hsd)
 {
 #ifdef USE_LIBMOUNT_SUPPORT_NAMESPACES
+       if (!hsd || !hsd->subdir)
+               return 0;
+
        if (!hsd->tmp_umounted) {
                umount(MNT_PATH_TMPTGT);
                hsd->tmp_umounted = 1;
@@ -167,9 +191,13 @@ static int do_mount_subdir(
                        const char *root)
 {
        int rc = 0;
-       const char *subdir = hsd->subdir;
+       const char *subdir;
        const char *target;
 
+       if (!hsd || !hsd->subdir)
+               return 0;
+       subdir = hsd->subdir;
+
 #ifdef USE_LIBMOUNT_MOUNTFD_SUPPORT
        struct libmnt_sysapi *api = mnt_context_get_sysapi(cxt);
 
@@ -186,18 +214,18 @@ static int do_mount_subdir(
        target = mnt_fs_get_target(cxt->fs);
 
 #ifdef USE_LIBMOUNT_MOUNTFD_SUPPORT
-       if (api && api->fd_tree >= 0) {
-               /* FD based way - unfortunately, it's impossible to open
-                * sub-directory on not-yet attached mount. It means
-                * hook_mount.c attaches FS to temporary directory, and we
-                * clone and move the subdir, and umount the old unshared
-                * temporary tree.
+       if (api && api->fd_tree >= 0 && !api->subdir) {
+               /* This is for older kernels with an FD-based mount API, but without
+                * support for detached open_tree() functionality.
+                *
+                * We attach the filesystem to a temporary directory, clone and
+                * move the subdirectory, then unmount the old unshared temporary
+                * tree.
                 *
-                * The old mount(2) way does the same, but by BIND.
-                */
+                * The old mount(2) method does the same, but using BIND. */
                int fd;
 
-               DBG(HOOK, ul_debug("attach subdir '%s'", subdir));
+               DBG(HOOK, ul_debug("opening subdir (ateched) '%s'", subdir));
                fd = open_tree(api->fd_tree, subdir,
                                        OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
                mnt_context_syscall_save_status(cxt, "open_tree", fd >= 0);
@@ -211,6 +239,8 @@ static int do_mount_subdir(
                         */
                        setns(hsd->old_ns_fd, CLONE_NEWNS);
 
+                       DBG(HOOK, ul_debug("move_mount(to=%s)", target));
+
                        rc = move_mount(fd, "", AT_FDCWD, target, MOVE_MOUNT_F_EMPTY_PATH);
                        mnt_context_syscall_save_status(cxt, "move_mount", rc == 0);
                        if (rc)
@@ -253,7 +283,11 @@ static int do_mount_subdir(
        return rc;
 }
 
-
+/*
+ * This callback is invoked after mounting on old kernels, when the new
+ * filesystem is already attached to the temporary directory. It then moves the
+ * subdirectory to the final target in the original desired namespace.
+ */
 static int hook_mount_post(
                        struct libmnt_context *cxt,
                        const struct libmnt_hookset *hs,
@@ -276,6 +310,12 @@ static int hook_mount_post(
        return rc;
 }
 
+/*
+ * This callback is invoked before mounting, when all other hooks are
+ * initialized, and the FD-based API is ready (unless disabled by the user).
+ *
+ * See the description at the beginning of the file for supported scenarios.
+ */
 static int hook_mount_pre(
                        struct libmnt_context *cxt,
                        const struct libmnt_hookset *hs,
@@ -285,8 +325,25 @@ static int hook_mount_pre(
        int rc = 0;
 
        hsd = mnt_context_get_hookset_data(cxt, hs);
-       if (!hsd)
+       if (!hsd || !hsd->subdir)
+               return 0;
+
+#ifdef USE_LIBMOUNT_MOUNTFD_SUPPORT
+       /*
+        * Linux >= 6.15 can open subdir on a detached tree. Therefore,
+        * all of hook_subdir.c can be replaced by a single open_tree() call.
+        */
+       struct libmnt_sysapi *api = mnt_context_get_sysapi(cxt);
+
+       if (api
+           && cxt->helper == NULL
+           && get_linux_version() >= KERNEL_VERSION(6, 15, 0)) {
+               DBG(HOOK, ul_debugobj(hs, "detached subdir open (ignore hook)"));
+               api->subdir = hsd->subdir;
+               hsd->subdir = NULL;
                return 0;
+       }
+#endif
 
        /* create unhared temporary target */
        hsd->org_target = strdup(mnt_fs_get_target(cxt->fs));
@@ -352,8 +409,10 @@ static int is_subdir_required(struct libmnt_context *cxt, int *rc, char **subdir
        return *rc == 0;
 }
 
-/* this is the initial callback used to check mount options and define next
- * actions if necessary */
+/*
+ * This is the initial callback used to check mount options and define
+ * the next actions if necessary.
+ */
 static int hook_prepare_target(
                        struct libmnt_context *cxt,
                        const struct libmnt_hookset *hs,
index a4ce7c04268be3add45ed192399dac5768885a0c..f4d80c5dfc0887c368c345b734193aceeb6760e5 100644 (file)
@@ -730,6 +730,9 @@ struct libmnt_sysapi {
        int     fd_fs;          /* FD from fsopen() or fspick() */
        int     fd_tree;        /* FD from fsmount() or open_tree() */
 
+       char    *subdir;        /* Linux >= 6.15 can directly open subdir;
+                                * hook_subdir sets this variable */
+
        unsigned int is_new_fs : 1 ;    /* fd_fs comes from fsopen() */
 };