From: Karel Zak Date: Tue, 2 May 2023 11:27:52 +0000 (+0200) Subject: libmount: (subdir) support unshared sessions too X-Git-Tag: v2.39~38 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1a0f793e58cf2e4dfcf59d6df38b5b6fec7c3036;p=thirdparty%2Futil-linux.git libmount: (subdir) support unshared sessions too * enter original namespace for the final move_mount(), it means that the original unshare() (to create the temporary mount) will be ineffective for this final step. God bless FD based APIs (setns() and open_tree())! ;-) * improve cleanup code (call umount only once) * remember private namespace * update note in man page Addresses: https://github.com/util-linux/util-linux/pull/2198 Signed-off-by: Karel Zak --- diff --git a/libmount/src/hook_subdir.c b/libmount/src/hook_subdir.c index aee0baf339..7da563b85d 100644 --- a/libmount/src/hook_subdir.c +++ b/libmount/src/hook_subdir.c @@ -21,14 +21,16 @@ #include "fileutils.h" #include "mount-api-utils.h" -static int tmptgt_cleanup(int old_ns_fd); - struct hookset_data { char *subdir; char *org_target; int old_ns_fd; + int new_ns_fd; + unsigned int tmp_umounted : 1; }; +static int tmptgt_cleanup(struct hookset_data *); + static void free_hookset_data( struct libmnt_context *cxt, const struct libmnt_hookset *hs) { @@ -37,7 +39,7 @@ static void free_hookset_data( struct libmnt_context *cxt, if (!hsd) return; if (hsd->old_ns_fd >= 0) - tmptgt_cleanup(hsd->old_ns_fd); + tmptgt_cleanup(hsd); free(hsd->org_target); free(hsd->subdir); @@ -79,29 +81,27 @@ static int hookset_deinit(struct libmnt_context *cxt, const struct libmnt_hookse * Initialize MNT_PATH_TMPTGT; mkdir, create a new namespace and * mark (bind mount) the directory as private. */ -static int tmptgt_unshare(int *old_ns_fd) +static int tmptgt_unshare(struct hookset_data *hsd) { #ifdef USE_LIBMOUNT_SUPPORT_NAMESPACES - int rc = 0, fd = -1; + int rc = 0; - assert(old_ns_fd); + hsd->old_ns_fd = hsd->new_ns_fd = -1; - *old_ns_fd = -1; + /* create directory */ + rc = ul_mkdir_p(MNT_PATH_TMPTGT, S_IRWXU); + if (rc) + goto fail; /* remember the current namespace */ - fd = open("/proc/self/ns/mnt", O_RDONLY | O_CLOEXEC); - if (fd < 0) + hsd->old_ns_fd = open("/proc/self/ns/mnt", O_RDONLY | O_CLOEXEC); + if (hsd->old_ns_fd < 0) goto fail; /* create new namespace */ if (unshare(CLONE_NEWNS) != 0) goto fail; - /* create directory */ - rc = ul_mkdir_p(MNT_PATH_TMPTGT, S_IRWXU); - if (rc) - goto fail; - /* try to set top-level directory as private, this is possible if * MNT_RUNTIME_TOPDIR (/run) is a separated filesystem. */ if (mount("none", MNT_RUNTIME_TOPDIR, NULL, MS_PRIVATE, NULL) != 0) { @@ -113,14 +113,18 @@ static int tmptgt_unshare(int *old_ns_fd) goto fail; } + /* remember the new namespace */ + hsd->new_ns_fd = open("/proc/self/ns/mnt", O_RDONLY | O_CLOEXEC); + if (hsd->new_ns_fd < 0) + goto fail; + DBG(UTILS, ul_debug(MNT_PATH_TMPTGT " unshared")); - *old_ns_fd = fd; return 0; fail: if (rc == 0) rc = errno ? -errno : -EINVAL; - tmptgt_cleanup(fd); + tmptgt_cleanup(hsd); DBG(UTILS, ul_debug(MNT_PATH_TMPTGT " unshare failed")); return rc; #else @@ -131,16 +135,23 @@ fail: /* * Clean up MNT_PATH_TMPTGT; umount and switch back to old namespace */ -static int tmptgt_cleanup(int old_ns_fd) +static int tmptgt_cleanup(struct hookset_data *hsd) { #ifdef USE_LIBMOUNT_SUPPORT_NAMESPACES - umount(MNT_PATH_TMPTGT); + if (!hsd->tmp_umounted) { + umount(MNT_PATH_TMPTGT); + hsd->tmp_umounted = 1; + } + + if (hsd->new_ns_fd >= 0) + close(hsd->new_ns_fd); - if (old_ns_fd >= 0) { - setns(old_ns_fd, CLONE_NEWNS); - close(old_ns_fd); + if (hsd->old_ns_fd >= 0) { + setns(hsd->old_ns_fd, CLONE_NEWNS); + close(hsd->old_ns_fd); } + hsd->new_ns_fd = hsd->old_ns_fd = -1; DBG(UTILS, ul_debug(MNT_PATH_TMPTGT " cleanup done")); return 0; #else @@ -148,13 +159,17 @@ static int tmptgt_cleanup(int old_ns_fd) #endif } +/* + * Attach (move) MNT_PATH_TMPTGT/subdir to the parental namespace. + */ static int do_mount_subdir( struct libmnt_context *cxt, + struct hookset_data *hsd, const char *root, - const char *subdir, const char *target) { int rc = 0; + const char *subdir = hsd->subdir; #ifdef USE_LIBMOUNT_MOUNTFD_SUPPORT struct libmnt_sysapi *api; @@ -162,9 +177,10 @@ static int do_mount_subdir( api = mnt_context_get_sysapi(cxt); if (api) { /* FD based way - unfortunately, it's impossible to open - * sub-directory on not-yet attached mount. It means hook_mount.c - * attaches to FS to temporary directory, and we clone - * and move the subdir, and umount the old temporary tree. + * sub-directory on not-yet attached mount. It means + * hook_mount.c attaches FS to temporary directory, and we + * clone and move the subdir, and umount the old unshared + * temporary tree. * * The old mount(2) way does the same, but by BIND. */ @@ -178,10 +194,19 @@ static int do_mount_subdir( rc = -errno; if (!rc) { + /* Note that the original parental namespace could be + * private, in this case, it will not see our final mount, + * so we need to move the the orignal namespace. + */ + setns(hsd->old_ns_fd, CLONE_NEWNS); + rc = move_mount(fd, "", AT_FDCWD, target, MOVE_MOUNT_F_EMPTY_PATH); set_syscall_status(cxt, "move_mount", rc == 0); if (rc) rc = -errno; + + /* And move back to our private namespace to cleanup */ + setns(hsd->new_ns_fd, CLONE_NEWNS); } if (!rc) { close(api->fd_tree); @@ -211,6 +236,8 @@ static int do_mount_subdir( set_syscall_status(cxt, "umount", rc == 0); if (rc) rc = -errno; + hsd->tmp_umounted = 1; + } return rc; @@ -233,14 +260,13 @@ static int hook_mount_post( mnt_fs_set_target(cxt->fs, hsd->org_target); /* bind subdir to the real target, umount temporary target */ - rc = do_mount_subdir(cxt, MNT_PATH_TMPTGT, - hsd->subdir, + rc = do_mount_subdir(cxt, hsd, + MNT_PATH_TMPTGT, mnt_fs_get_target(cxt->fs)); if (rc) return rc; - tmptgt_cleanup(hsd->old_ns_fd); - hsd->old_ns_fd = -1; + tmptgt_cleanup(hsd); return rc; } @@ -262,7 +288,7 @@ static int hook_mount_pre( if (!hsd->org_target) rc = -ENOMEM; if (!rc) - rc = tmptgt_unshare(&hsd->old_ns_fd); + rc = tmptgt_unshare(hsd); if (!rc) mnt_fs_set_target(cxt->fs, MNT_PATH_TMPTGT); if (!rc) diff --git a/sys-utils/mount.8.adoc b/sys-utils/mount.8.adoc index 9c7bed2257..511853b4a7 100644 --- a/sys-utils/mount.8.adoc +++ b/sys-utils/mount.8.adoc @@ -700,7 +700,7 @@ Allow to make a target directory (mountpoint) if it does not exist yet. The opti **X-mount.subdir=**__directory__:: Allow mounting sub-directory from a filesystem instead of the root directory. For now, this feature is implemented by temporary filesystem root directory mount in unshared namespace and then bind the sub-directory to the final mount point and umount the root of the filesystem. The sub-directory mount shows up atomically for the rest of the system although it is implemented by multiple *mount*(2) syscalls. + -Note that this feature will not work in session with an unshared private mount namespace (after *unshare --mount*). If you want to use it in a mount namespace, then it requires *unshare --mount --propagation shared*. +Note that this feature will not work in session with an unshared private mount namespace (after *unshare --mount*) on old kernels or with *mount*(8) without support for file-descriptors-based mount kernel API. In this case, you need *unshare --mount --propagation shared*. + This feature is EXPERIMENTAL. diff --git a/tests/ts/mount/subdir b/tests/ts/mount/subdir index 6443d34073..fc1ff2decd 100755 --- a/tests/ts/mount/subdir +++ b/tests/ts/mount/subdir @@ -26,9 +26,6 @@ ts_check_test_command "$TS_CMD_FINDMNT" ts_skip_nonroot ts_check_losetup -prop=$($TS_CMD_FINDMNT --task "$$" -n -o PROPAGATION "/") -[[ "$prop" == *"private"* ]] && ts_skip "unshared session" - ts_device_init DEVICE=$TS_LODEV