]> git.ipfire.org Git - thirdparty/util-linux.git/commitdiff
libmount: (subdir) support unshared sessions too
authorKarel Zak <kzak@redhat.com>
Tue, 2 May 2023 11:27:52 +0000 (13:27 +0200)
committerKarel Zak <kzak@redhat.com>
Tue, 2 May 2023 11:27:52 +0000 (13:27 +0200)
* enter original namespace for the final move_mount(), it means that
  the original unshare() (to create the temporary mount) will be
  ineffective for this final step. God bless FD based APIs (setns()
  and open_tree())! ;-)

* improve cleanup code (call umount only once)

* remember private namespace

* update note in man page

Addresses: https://github.com/util-linux/util-linux/pull/2198
Signed-off-by: Karel Zak <kzak@redhat.com>
libmount/src/hook_subdir.c
sys-utils/mount.8.adoc
tests/ts/mount/subdir

index aee0baf3392cd7789b0527eae5b0cae5344ea0c3..7da563b85d4650549c7bf1f30be0993840704c0d 100644 (file)
 #include "fileutils.h"
 #include "mount-api-utils.h"
 
-static int tmptgt_cleanup(int old_ns_fd);
-
 struct hookset_data {
        char *subdir;
        char *org_target;
        int old_ns_fd;
+       int new_ns_fd;
+       unsigned int tmp_umounted : 1;
 };
 
+static int tmptgt_cleanup(struct hookset_data *);
+
 static void free_hookset_data( struct libmnt_context *cxt,
                                const struct libmnt_hookset *hs)
 {
@@ -37,7 +39,7 @@ static void free_hookset_data(        struct libmnt_context *cxt,
        if (!hsd)
                return;
        if (hsd->old_ns_fd >= 0)
-               tmptgt_cleanup(hsd->old_ns_fd);
+               tmptgt_cleanup(hsd);
 
        free(hsd->org_target);
        free(hsd->subdir);
@@ -79,29 +81,27 @@ static int hookset_deinit(struct libmnt_context *cxt, const struct libmnt_hookse
  * Initialize MNT_PATH_TMPTGT; mkdir, create a new namespace and
  * mark (bind mount) the directory as private.
  */
-static int tmptgt_unshare(int *old_ns_fd)
+static int tmptgt_unshare(struct hookset_data *hsd)
 {
 #ifdef USE_LIBMOUNT_SUPPORT_NAMESPACES
-       int rc = 0, fd = -1;
+       int rc = 0;
 
-       assert(old_ns_fd);
+       hsd->old_ns_fd = hsd->new_ns_fd = -1;
 
-       *old_ns_fd = -1;
+       /* create directory */
+       rc = ul_mkdir_p(MNT_PATH_TMPTGT, S_IRWXU);
+       if (rc)
+               goto fail;
 
        /* remember the current namespace */
-       fd = open("/proc/self/ns/mnt", O_RDONLY | O_CLOEXEC);
-       if (fd < 0)
+       hsd->old_ns_fd = open("/proc/self/ns/mnt", O_RDONLY | O_CLOEXEC);
+       if (hsd->old_ns_fd < 0)
                goto fail;
 
        /* create new namespace */
        if (unshare(CLONE_NEWNS) != 0)
                goto fail;
 
-       /* create directory */
-       rc = ul_mkdir_p(MNT_PATH_TMPTGT, S_IRWXU);
-       if (rc)
-               goto fail;
-
        /* try to set top-level directory as private, this is possible if
         * MNT_RUNTIME_TOPDIR (/run) is a separated filesystem. */
        if (mount("none", MNT_RUNTIME_TOPDIR, NULL, MS_PRIVATE, NULL) != 0) {
@@ -113,14 +113,18 @@ static int tmptgt_unshare(int *old_ns_fd)
                        goto fail;
        }
 
+       /* remember the new namespace */
+       hsd->new_ns_fd = open("/proc/self/ns/mnt", O_RDONLY | O_CLOEXEC);
+       if (hsd->new_ns_fd < 0)
+               goto fail;
+
        DBG(UTILS, ul_debug(MNT_PATH_TMPTGT " unshared"));
-       *old_ns_fd = fd;
        return 0;
 fail:
        if (rc == 0)
                rc = errno ? -errno : -EINVAL;
 
-       tmptgt_cleanup(fd);
+       tmptgt_cleanup(hsd);
        DBG(UTILS, ul_debug(MNT_PATH_TMPTGT " unshare failed"));
        return rc;
 #else
@@ -131,16 +135,23 @@ fail:
 /*
  * Clean up MNT_PATH_TMPTGT; umount and switch back to old namespace
  */
-static int tmptgt_cleanup(int old_ns_fd)
+static int tmptgt_cleanup(struct hookset_data *hsd)
 {
 #ifdef USE_LIBMOUNT_SUPPORT_NAMESPACES
-       umount(MNT_PATH_TMPTGT);
+       if (!hsd->tmp_umounted) {
+               umount(MNT_PATH_TMPTGT);
+               hsd->tmp_umounted = 1;
+       }
+
+       if (hsd->new_ns_fd >= 0)
+               close(hsd->new_ns_fd);
 
-       if (old_ns_fd >= 0) {
-               setns(old_ns_fd, CLONE_NEWNS);
-               close(old_ns_fd);
+       if (hsd->old_ns_fd >= 0) {
+               setns(hsd->old_ns_fd, CLONE_NEWNS);
+               close(hsd->old_ns_fd);
        }
 
+       hsd->new_ns_fd = hsd->old_ns_fd = -1;
        DBG(UTILS, ul_debug(MNT_PATH_TMPTGT " cleanup done"));
        return 0;
 #else
@@ -148,13 +159,17 @@ static int tmptgt_cleanup(int old_ns_fd)
 #endif
 }
 
+/*
+ * Attach (move) MNT_PATH_TMPTGT/subdir to the parental namespace.
+ */
 static int do_mount_subdir(
                        struct libmnt_context *cxt,
+                       struct hookset_data *hsd,
                        const char *root,
-                       const char *subdir,
                        const char *target)
 {
        int rc = 0;
+       const char *subdir = hsd->subdir;
 
 #ifdef USE_LIBMOUNT_MOUNTFD_SUPPORT
        struct libmnt_sysapi *api;
@@ -162,9 +177,10 @@ static int do_mount_subdir(
        api = mnt_context_get_sysapi(cxt);
        if (api) {
                /* FD based way - unfortunately, it's impossible to open
-                * sub-directory on not-yet attached mount. It means hook_mount.c
-                * attaches to FS to temporary directory, and we clone
-                * and move the subdir, and umount the old temporary tree.
+                * sub-directory on not-yet attached mount. It means
+                * hook_mount.c attaches FS to temporary directory, and we
+                * clone and move the subdir, and umount the old unshared
+                * temporary tree.
                 *
                 * The old mount(2) way does the same, but by BIND.
                 */
@@ -178,10 +194,19 @@ static int do_mount_subdir(
                        rc = -errno;
 
                if (!rc) {
+                       /* Note that the original parental namespace could be
+                        * private, in this case, it will not see our final mount,
+                        * so we need to move the the orignal namespace.
+                        */
+                       setns(hsd->old_ns_fd, CLONE_NEWNS);
+
                        rc = move_mount(fd, "", AT_FDCWD, target, MOVE_MOUNT_F_EMPTY_PATH);
                        set_syscall_status(cxt, "move_mount", rc == 0);
                        if (rc)
                                rc = -errno;
+
+                       /* And move back to our private namespace to cleanup */
+                       setns(hsd->new_ns_fd, CLONE_NEWNS);
                }
                if (!rc) {
                        close(api->fd_tree);
@@ -211,6 +236,8 @@ static int do_mount_subdir(
                set_syscall_status(cxt, "umount", rc == 0);
                if (rc)
                        rc = -errno;
+               hsd->tmp_umounted = 1;
+
        }
 
        return rc;
@@ -233,14 +260,13 @@ static int hook_mount_post(
        mnt_fs_set_target(cxt->fs, hsd->org_target);
 
        /* bind subdir to the real target, umount temporary target */
-       rc = do_mount_subdir(cxt, MNT_PATH_TMPTGT,
-                       hsd->subdir,
+       rc = do_mount_subdir(cxt, hsd,
+                       MNT_PATH_TMPTGT,
                        mnt_fs_get_target(cxt->fs));
        if (rc)
                return rc;
 
-       tmptgt_cleanup(hsd->old_ns_fd);
-       hsd->old_ns_fd = -1;
+       tmptgt_cleanup(hsd);
 
        return rc;
 }
@@ -262,7 +288,7 @@ static int hook_mount_pre(
        if (!hsd->org_target)
                rc = -ENOMEM;
        if (!rc)
-               rc = tmptgt_unshare(&hsd->old_ns_fd);
+               rc = tmptgt_unshare(hsd);
        if (!rc)
                mnt_fs_set_target(cxt->fs, MNT_PATH_TMPTGT);
        if (!rc)
index 9c7bed22576b3937b6efdd02a09c9105c72fb14f..511853b4a7b39c834cc317ef77c9c2c3cc37dad7 100644 (file)
@@ -700,7 +700,7 @@ Allow to make a target directory (mountpoint) if it does not exist yet. The opti
 **X-mount.subdir=**__directory__::
 Allow mounting sub-directory from a filesystem instead of the root directory. For now, this feature is implemented by temporary filesystem root directory mount in unshared namespace and then bind the sub-directory to the final mount point and umount the root of the filesystem. The sub-directory mount shows up atomically for the rest of the system although it is implemented by multiple *mount*(2) syscalls.
 +
-Note that this feature will not work in session with an unshared private mount namespace (after *unshare --mount*). If you want to use it in a mount namespace, then it requires *unshare --mount --propagation shared*.
+Note that this feature will not work in session with an unshared private mount namespace (after *unshare --mount*) on old kernels or with *mount*(8) without support for file-descriptors-based mount kernel API. In this case, you need *unshare --mount --propagation shared*.
 +
 This feature is EXPERIMENTAL.
 
index 6443d3407367c579512e37d66953df10f08d6e35..fc1ff2decd5cb04f500d25fab7b8a7aad056d46b 100755 (executable)
@@ -26,9 +26,6 @@ ts_check_test_command "$TS_CMD_FINDMNT"
 ts_skip_nonroot
 ts_check_losetup
 
-prop=$($TS_CMD_FINDMNT --task "$$" -n -o PROPAGATION "/")
-[[ "$prop" == *"private"* ]] && ts_skip "unshared session"
-
 ts_device_init
 DEVICE=$TS_LODEV