From: Karel Zak Date: Wed, 11 Aug 2021 13:57:37 +0000 (+0200) Subject: libmount: add X-mount.subdir= X-Git-Tag: v2.38-rc1~309 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e311e731be92d96a674be0609da4b7ef44c16522;p=thirdparty%2Futil-linux.git libmount: add X-mount.subdir= Like btrfs subvol=, but more generic and for all filesystems. This feature is marked as EXPERIMENTAL (may be removed). # mount /dev/sdc /mnt/test -o X-mount.subdir=AAA # findmnt /dev/sdc TARGET SOURCE FSTYPE OPTIONS /mnt/test /dev/sdc[/AAA] ext4 rw,relatime,stripe=512 Implemented as: * open current namespace from /proc/self/ns/mnt * mkdir /tmp/mount/mount. * unshare mount namespace * create mount node from /tmp/mount/mount. by mount(MS_BIND) * mark /tmp/mount/mount. private by mount(MS_PRIVATE|MS_REC) * bind mount /tmp/mount/mount./subdir to * umount /tmp/mount/mount. * rmdir /tmp/mount/mount. * setns() to the original namespace Note that /tmp/mount/mount. conversion to mount node and call for MS_PRIVATE are visible (propagated to the system) if /tmp is a shared filesystem, the rest (all operations with the desired filesystem) is atomic for a parental namespace. Maybe one day it will be possible to reimplement it in more elegant way with new mount kernel APIs (open_tree(), etc.). Fixes: https://github.com/karelzak/util-linux/issues/1103 Signed-off-by: Karel Zak --- diff --git a/libmount/src/context.c b/libmount/src/context.c index 3b32224e38..8d1250f7c9 100644 --- a/libmount/src/context.c +++ b/libmount/src/context.c @@ -153,6 +153,8 @@ int mnt_reset_context(struct libmnt_context *cxt) free(cxt->helper); free(cxt->orig_user); + free(cxt->subdir); + free(cxt->tmptgt); cxt->fs = NULL; cxt->mtab = NULL; @@ -162,6 +164,8 @@ int mnt_reset_context(struct libmnt_context *cxt) cxt->mountflags = 0; cxt->user_mountflags = 0; cxt->mountdata = NULL; + cxt->tmptgt = NULL; + cxt->subdir = NULL; cxt->flags = MNT_FL_DEFAULT; /* free additional mounts list */ @@ -292,6 +296,10 @@ struct libmnt_context *mnt_copy_context(struct libmnt_context *o) goto failed; if (strdup_between_structs(n, o, orig_user)) goto failed; + if (strdup_between_structs(n, o, subdir)) + goto failed; + if (strdup_between_structs(n, o, tmptgt)) + goto failed; n->mountflags = o->mountflags; n->mountdata = o->mountdata; @@ -1860,6 +1868,31 @@ end: return rc; } +static int is_subdir_required(struct libmnt_context *cxt, int *rc) +{ + char *dir; + size_t sz; + + assert(cxt); + assert(rc); + + *rc = 0; + + if (!cxt->fs + || !cxt->fs->user_optstr + || mnt_optstr_get_option(cxt->fs->user_optstr, + "X-mount.subdir", &dir, &sz) != 0) + return 0; + + cxt->subdir = strndup(dir, sz); + if (!cxt->subdir) + *rc = -ENOMEM; + else if (asprintf(&cxt->tmptgt, "%s/mount.%d", MNT_TMPDIR, getpid()) < 0) + *rc = -ENOMEM; + + return *rc == 0; +} + static int is_mkdir_required(const char *tgt, struct libmnt_fs *fs, mode_t *mode, int *rc) { char *mstr = NULL; @@ -1974,6 +2007,17 @@ int mnt_context_prepare_target(struct libmnt_context *cxt) } } + /* X-mount.subdir= target */ + if (rc == 0 + && cxt->action == MNT_ACT_MOUNT + && (cxt->user_mountflags & MNT_MS_XFSTABCOMM) + && is_subdir_required(cxt, &rc)) { + + DBG(CXT, ul_debugobj(cxt, "subdir %s required, temporary target: %s", + cxt->subdir, cxt->tmptgt)); + } + + if (!mnt_context_switch_ns(cxt, ns_old)) return -MNT_ERR_NAMESPACE; diff --git a/libmount/src/context_mount.c b/libmount/src/context_mount.c index 55ebf79451..78e5aa87aa 100644 --- a/libmount/src/context_mount.c +++ b/libmount/src/context_mount.c @@ -753,6 +753,29 @@ static int do_mount_additional(struct libmnt_context *cxt, return 0; } +static int do_mount_subdir(struct libmnt_context *cxt, + const char *root, + const char *subdir, + const char *target) +{ + char *src = NULL; + int rc = 0; + + if (asprintf(&src, "%s/%s", root, subdir) < 0) + return -ENOMEM; + + DBG(CXT, ul_debugobj(cxt, "mount subdir %s to %s", src, target)); + if (mount(src, target, NULL, MS_BIND | MS_REC, NULL) != 0) + rc = -MNT_ERR_APPLYFLAGS; + + DBG(CXT, ul_debugobj(cxt, "umount old root %s", root)); + if (umount(root) != 0) + rc = -MNT_ERR_APPLYFLAGS; + + free(src); + return rc; +} + /* * The default is to use fstype from cxt->fs, this could be overwritten by * @try_type argument. If @try_type is specified then mount with MS_SILENT. @@ -763,7 +786,7 @@ static int do_mount_additional(struct libmnt_context *cxt, */ static int do_mount(struct libmnt_context *cxt, const char *try_type) { - int rc = 0; + int rc = 0, old_ns_fd = -1; const char *src, *target, *type; unsigned long flags; @@ -806,12 +829,6 @@ static int do_mount(struct libmnt_context *cxt, const char *try_type) if (try_type) flags |= MS_SILENT; - DBG(CXT, ul_debugobj(cxt, "%smount(2) " - "[source=%s, target=%s, type=%s, " - " mountflags=0x%08lx, mountdata=%s]", - mnt_context_is_fake(cxt) ? "(FAKE) " : "", - src, target, type, - flags, cxt->mountdata ? "yes" : "")); if (mnt_context_is_fake(cxt)) { /* @@ -819,6 +836,12 @@ static int do_mount(struct libmnt_context *cxt, const char *try_type) */ cxt->syscall_status = 0; + DBG(CXT, ul_debugobj(cxt, "FAKE mount(2) " + "[source=%s, target=%s, type=%s, " + " mountflags=0x%08lx, mountdata=%s]", + src, target, type, + flags, cxt->mountdata ? "yes" : "")); + } else if (mnt_context_propagation_only(cxt)) { /* * propagation flags *only* @@ -829,13 +852,30 @@ static int do_mount(struct libmnt_context *cxt, const char *try_type) /* * regular mount */ + + /* create unhared temporary target */ + if (cxt->subdir) { + rc = mnt_unshared_mkdir(cxt->tmptgt, + S_IRWXU, &old_ns_fd); + if (rc) + return rc; + target = cxt->tmptgt; + } + + DBG(CXT, ul_debugobj(cxt, "mount(2) " + "[source=%s, target=%s, type=%s, " + " mountflags=0x%08lx, mountdata=%s]", + src, target, type, + flags, cxt->mountdata ? "yes" : "")); + if (mount(src, target, type, flags, cxt->mountdata)) { cxt->syscall_status = -errno; DBG(CXT, ul_debugobj(cxt, "mount(2) failed [errno=%d %m]", -cxt->syscall_status)); - return -cxt->syscall_status; + rc = -cxt->syscall_status; + goto done; } - DBG(CXT, ul_debugobj(cxt, " success")); + DBG(CXT, ul_debugobj(cxt, " mount(2) success")); cxt->syscall_status = 0; /* @@ -845,7 +885,20 @@ static int do_mount(struct libmnt_context *cxt, const char *try_type) && do_mount_additional(cxt, target, flags, NULL)) { /* TODO: call umount? */ - return -MNT_ERR_APPLYFLAGS; + rc = -MNT_ERR_APPLYFLAGS; + goto done; + } + + /* + * bind subdir to the real target, umount temporary target + */ + if (cxt->subdir) { + target = mnt_fs_get_target(cxt->fs); + rc = do_mount_subdir(cxt, cxt->tmptgt, cxt->subdir, target); + if (rc) + goto done; + mnt_unshared_rmdir(cxt->tmptgt, old_ns_fd); + old_ns_fd = -1; } } @@ -855,6 +908,10 @@ static int do_mount(struct libmnt_context *cxt, const char *try_type) rc = mnt_fs_set_fstype(fs, try_type); } +done: + if (old_ns_fd >= 0) + mnt_unshared_rmdir(cxt->tmptgt, old_ns_fd); + return rc; } diff --git a/libmount/src/mountP.h b/libmount/src/mountP.h index b98d28875a..02e6d6dc3c 100644 --- a/libmount/src/mountP.h +++ b/libmount/src/mountP.h @@ -74,6 +74,7 @@ UL_DEBUG_DECLARE_MASK(libmount); #define MNT_MNTTABDIR_EXT ".fstab" /* library private paths */ +#define MNT_TMPDIR "/tmp/mount" #define MNT_RUNTIME_TOPDIR "/run" #define MNT_RUNTIME_TOPDIR_OLD "/dev" @@ -123,6 +124,9 @@ extern char *mnt_get_kernel_cmdline_option(const char *name); extern int mnt_stat_mountpoint(const char *target, struct stat *st); extern int mnt_lstat_mountpoint(const char *target, struct stat *st); +extern int mnt_unshared_mkdir(const char *path, mode_t mode, int *old_ns_fd); +extern int mnt_unshared_rmdir(const char *path, int old_ns_fd); + /* tab.c */ extern int is_mountinfo(struct libmnt_table *tb); extern int mnt_table_set_parser_fltrcb( struct libmnt_table *tb, @@ -289,6 +293,9 @@ struct libmnt_context char *fstype_pattern; /* for mnt_match_fstype() */ char *optstr_pattern; /* for mnt_match_options() */ + char *subdir; /* X-mount.subdir= */ + char *tmptgt; /* (unshared) private mount target */ + struct libmnt_fs *fs; /* filesystem description (type, mountpoint, device, ...) */ struct libmnt_fs *fs_template; /* used for @fs on mnt_reset_context() */ diff --git a/libmount/src/utils.c b/libmount/src/utils.c index c321a98811..93a88e6ad2 100644 --- a/libmount/src/utils.c +++ b/libmount/src/utils.c @@ -1143,6 +1143,77 @@ done: return 1; } +/* + * like ul_mkdir_p(), but create a new namespace and mark (bind mount) + * the directory as private. + */ +int mnt_unshared_mkdir(const char *path, mode_t mode, int *old_ns_fd) +{ + int rc = 0, fd = -1, mounted = 0; + + *old_ns_fd = -1; + + if (!path || !old_ns_fd) + return -EINVAL; + + /* create directory */ + rc = ul_mkdir_p(path, mode); + if (rc) + goto fail; + + /* remember the current namespace */ + fd = open("/proc/self/ns/mnt", O_RDONLY | O_CLOEXEC); + if (fd < 0) + goto fail; + + /* create new namespace */ + if (unshare(CLONE_NEWNS) != 0) + goto fail; + + /* make the directory private */ + mounted = mount(path, path, "none", MS_BIND, NULL) == 0; + if (!mounted) + goto fail; + if (mount("none", path, NULL, MS_PRIVATE, NULL) != 0) + goto fail; + + DBG(UTILS, ul_debug(" %s unshared", path)); + *old_ns_fd = fd; + return 0; +fail: + if (rc == 0) + rc = errno ? -errno : -EINVAL; + if (mounted) + umount(path); + if (fd >= 0) { + setns(fd, CLONE_NEWNS); /* restore original NS */ + close(fd); + } + rmdir(path); + DBG(UTILS, ul_debug(" %s unshare failed", path)); + return rc; +} + +/* + * umount, rmdir and switch back to old namespace + */ +int mnt_unshared_rmdir(const char *path, int old_ns_fd) +{ + if (!path) + return -EINVAL; + + umount(path); + rmdir(path); + + if (old_ns_fd >= 0) { + setns(old_ns_fd, CLONE_NEWNS); + close(old_ns_fd); + } + + DBG(UTILS, ul_debug(" %s removed", path)); + return 0; +} + #ifdef TEST_PROGRAM static int test_match_fstype(struct libmnt_test *ts, int argc, char *argv[]) { diff --git a/sys-utils/mount.8.adoc b/sys-utils/mount.8.adoc index 5a79e543d6..a4e5bdd66e 100644 --- a/sys-utils/mount.8.adoc +++ b/sys-utils/mount.8.adoc @@ -627,6 +627,9 @@ Note that before util-linux v2.30 the x-* options have not been maintained by li *X-mount.mkdir*[=_mode_]:: Allow to make a target directory (mountpoint) if it does not exit yet. The optional argument _mode_ specifies the filesystem access mode used for *mkdir*(2) in octal notation. The default mode is 0755. This functionality is supported only for root users or when mount executed without suid permissions. The option is also supported as x-mount.mkdir, this notation is deprecated since v2.30. +**X-mount.subdir=**__directory__:: +Allow mounting sub-directory from a filesystem instead of the root directory. For now, this feature is implemented by temporary filesystem root directory mount in unshared namespace and then bind the sub-directory to the final mount point and umount the root of the filesystem. The sub-directory mount shows up atomically for the rest of the system although it is implemented by multiple mount(2) syscalls. This feature is EXPERIMENTAL. + *nosymfollow*:: Do not follow symlinks when resolving paths. Symlinks can still be created, and *readlink*(1), *readlink*(2), *realpath*(1), and *realpath*(3) all still work properly.