From f63a2c4821566fb9c343391aa0186fd8b32e4d8e Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Sat, 18 Feb 2023 08:45:24 +0900 Subject: [PATCH] mount-util: introduce remount_sysfs() This is useful when creating a new network namespace. Unlike procfs, we need to remount sysfs, otherwise properties of the network interfaces in the main network namespace are still accessible through the old sysfs, e.g. /sys/class/net/eth0. All sub-mounts previously mounted on the sysfs are moved onto the new sysfs mount. The function will be used in later commits. --- src/shared/mount-util.c | 185 +++++++++++++++++++++++++++++++++++++ src/shared/mount-util.h | 8 ++ src/test/test-mount-util.c | 147 ++++++++++++++++++++++++++++- 3 files changed, 339 insertions(+), 1 deletion(-) diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c index 8aad531a4d9..e583261f456 100644 --- a/src/shared/mount-util.c +++ b/src/shared/mount-util.c @@ -34,6 +34,7 @@ #include "path-util.h" #include "process-util.h" #include "set.h" +#include "sort-util.h" #include "stat-util.h" #include "stdio-util.h" #include "string-table.h" @@ -1167,6 +1168,190 @@ int remount_idmap( return 0; } +typedef struct SubMount { + char *path; + int mount_fd; +} SubMount; + +static void sub_mount_clear(SubMount *s) { + assert(s); + + s->path = mfree(s->path); + s->mount_fd = safe_close(s->mount_fd); +} + +static void sub_mount_array_free(SubMount *s, size_t n) { + assert(s || n == 0); + + for (size_t i = 0; i < n; i++) + sub_mount_clear(s + i); + + free(s); +} + +static int sub_mount_compare(const SubMount *a, const SubMount *b) { + assert(a); + assert(b); + assert(a->path); + assert(b->path); + + return path_compare(a->path, b->path); +} + +static void sub_mount_drop(SubMount *s, size_t n) { + assert(s || n == 0); + + for (size_t m = 0, i = 1; i < n; i++) { + if (path_startswith(s[i].path, s[m].path)) + sub_mount_clear(s + i); + else + m = i; + } +} + +static int get_sub_mounts(const char *prefix, SubMount **ret_mounts, size_t *ret_n_mounts) { + _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL; + _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL; + SubMount *mounts = NULL; + size_t n = 0; + int r; + + CLEANUP_ARRAY(mounts, n, sub_mount_array_free); + + assert(prefix); + assert(ret_mounts); + assert(ret_n_mounts); + + r = libmount_parse("/proc/self/mountinfo", NULL, &table, &iter); + if (r < 0) + return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m"); + + for (;;) { + _cleanup_close_ int mount_fd = -EBADF; + _cleanup_free_ char *p = NULL; + struct libmnt_fs *fs; + const char *path; + int id1, id2; + + r = mnt_table_next_fs(table, iter, &fs); + if (r == 1) + break; /* EOF */ + if (r < 0) + return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m"); + + path = mnt_fs_get_target(fs); + if (!path) + continue; + + if (isempty(path_startswith(path, prefix))) + continue; + + id1 = mnt_fs_get_id(fs); + r = path_get_mnt_id(path, &id2); + if (r < 0) { + log_debug_errno(r, "Failed to get mount ID of '%s', ignoring: %m", path); + continue; + } + if (id1 != id2) { + /* The path may be hidden by another over-mount or already remounted. */ + log_debug("The mount IDs of '%s' obtained by libmount and path_get_mnt_id() are different (%i vs %i), ignoring.", + path, id1, id2); + continue; + } + + mount_fd = open_tree(AT_FDCWD, path, OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_RECURSIVE); + if (mount_fd < 0) { + if (errno == ENOENT) /* The path may be hidden by another over-mount or already unmounted. */ + continue; + + return log_debug_errno(errno, "Failed to open tree of mounted filesystem '%s': %m", path); + } + + p = strdup(path); + if (!p) + return log_oom_debug(); + + if (!GREEDY_REALLOC(mounts, n + 1)) + return log_oom_debug(); + + mounts[n++] = (SubMount) { + .path = TAKE_PTR(p), + .mount_fd = TAKE_FD(mount_fd), + }; + } + + typesafe_qsort(mounts, n, sub_mount_compare); + sub_mount_drop(mounts, n); + + *ret_mounts = TAKE_PTR(mounts); + *ret_n_mounts = n; + return 0; +} + +static int move_sub_mounts(SubMount *mounts, size_t n) { + assert(mounts || n == 0); + + for (size_t i = 0; i < n; i++) { + if (!mounts[i].path || mounts[i].mount_fd < 0) + continue; + + (void) mkdir_p_label(mounts[i].path, 0755); + + if (move_mount(mounts[i].mount_fd, "", AT_FDCWD, mounts[i].path, MOVE_MOUNT_F_EMPTY_PATH) < 0) + return log_debug_errno(errno, "Failed to move mount_fd to '%s': %m", mounts[i].path); + } + + return 0; +} + +int remount_and_move_sub_mounts( + const char *what, + const char *where, + const char *type, + unsigned long flags, + const char *options) { + + SubMount *mounts = NULL; /* avoid false maybe-uninitialized warning */ + size_t n = 0; /* avoid false maybe-uninitialized warning */ + int r; + + CLEANUP_ARRAY(mounts, n, sub_mount_array_free); + + assert(where); + + /* This is useful when creating a new network namespace. Unlike procfs, we need to remount sysfs, + * otherwise properties of the network interfaces in the main network namespace are still accessible + * through the old sysfs, e.g. /sys/class/net/eth0. All sub-mounts previously mounted on the sysfs + * are moved onto the new sysfs mount. */ + + r = path_is_mount_point(where, NULL, 0); + if (r < 0) + return log_debug_errno(r, "Failed to determine if '%s' is a mountpoint: %m", where); + if (r == 0) + /* Shortcut. Simply mount the requested filesystem. */ + return mount_nofollow_verbose(LOG_DEBUG, what, where, type, flags, options); + + /* Get the list of sub-mounts and duplicate them. */ + r = get_sub_mounts(where, &mounts, &n); + if (r < 0) + return r; + + /* Then, remount the mount and its sub-mounts. */ + (void) umount_recursive(where, 0); + + /* Remount the target filesystem. */ + r = mount_nofollow_verbose(LOG_DEBUG, what, where, type, flags, options); + if (r < 0) + return r; + + /* Finally, move the all sub-mounts on the new target mount point. */ + return move_sub_mounts(mounts, n); +} + +int remount_sysfs(const char *where) { + return remount_and_move_sub_mounts("sysfs", where, "sysfs", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL); +} + int make_mount_point_inode_from_stat(const struct stat *st, const char *dest, mode_t mode) { assert(st); assert(dest); diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h index 7554bf828e1..84ea4b63927 100644 --- a/src/shared/mount-util.h +++ b/src/shared/mount-util.h @@ -105,6 +105,14 @@ typedef enum RemountIdmapping { int remount_idmap(const char *p, uid_t uid_shift, uid_t uid_range, uid_t owner, RemountIdmapping idmapping); +int remount_and_move_sub_mounts( + const char *what, + const char *where, + const char *type, + unsigned long flags, + const char *options); +int remount_sysfs(const char *where); + /* Creates a mount point (not parents) based on the source path or stat - ie, a file or a directory */ int make_mount_point_inode_from_stat(const struct stat *st, const char *dest, mode_t mode); int make_mount_point_inode_from_path(const char *source, const char *dest, mode_t mode); diff --git a/src/test/test-mount-util.c b/src/test/test-mount-util.c index fddf70584f0..405cdf557a8 100644 --- a/src/test/test-mount-util.c +++ b/src/test/test-mount-util.c @@ -8,6 +8,7 @@ #include "fd-util.h" #include "fileio.h" #include "fs-util.h" +#include "missing_magic.h" #include "missing_mount.h" #include "mkdir.h" #include "mount-util.h" @@ -16,11 +17,142 @@ #include "path-util.h" #include "process-util.h" #include "rm-rf.h" +#include "stat-util.h" #include "string-util.h" #include "strv.h" #include "tests.h" #include "tmpfile-util.h" +TEST(remount_and_move_sub_mounts) { + int r; + + if (geteuid() != 0 || have_effective_cap(CAP_SYS_ADMIN) <= 0) + return (void) log_tests_skipped("not running privileged"); + + r = safe_fork("(remount-and-move-sub-mounts)", + FORK_RESET_SIGNALS | + FORK_CLOSE_ALL_FDS | + FORK_DEATHSIG | + FORK_WAIT | + FORK_REOPEN_LOG | + FORK_LOG | + FORK_NEW_MOUNTNS | + FORK_MOUNTNS_SLAVE, + NULL); + assert_se(r >= 0); + if (r == 0) { + _cleanup_free_ char *d = NULL, *fn = NULL; + + assert_se(mkdtemp_malloc(NULL, &d) >= 0); + + assert_se(mount_nofollow_verbose(LOG_DEBUG, "tmpfs", d, "tmpfs", MS_NOSUID|MS_NODEV, NULL) >= 0); + + assert_se(fn = path_join(d, "memo")); + assert_se(write_string_file(fn, d, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE) >= 0); + assert_se(access(fn, F_OK) >= 0); + + /* Create fs tree */ + FOREACH_STRING(p, "sub1", "sub1/hoge", "sub1/foo", "sub2", "sub2/aaa", "sub2/bbb") { + _cleanup_free_ char *where = NULL, *filename = NULL; + + assert_se(where = path_join(d, p)); + assert_se(mkdir_p(where, 0755) >= 0); + assert_se(mount_nofollow_verbose(LOG_DEBUG, "tmpfs", where, "tmpfs", MS_NOSUID|MS_NODEV, NULL) >= 0); + + assert_se(filename = path_join(where, "memo")); + assert_se(write_string_file(filename, where, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE) >= 0); + assert_se(access(filename, F_OK) >= 0); + } + + /* Hide sub1. */ + FOREACH_STRING(p, "sub1", "sub1/hogehoge", "sub1/foofoo") { + _cleanup_free_ char *where = NULL, *filename = NULL; + + assert_se(where = path_join(d, p)); + assert_se(mkdir_p(where, 0755) >= 0); + assert_se(mount_nofollow_verbose(LOG_DEBUG, "tmpfs", where, "tmpfs", MS_NOSUID|MS_NODEV, NULL) >= 0); + + assert_se(filename = path_join(where, "memo")); + assert_se(write_string_file(filename, where, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE) >= 0); + assert_se(access(filename, F_OK) >= 0); + } + + /* Remount the main fs. */ + r = remount_and_move_sub_mounts("tmpfs", d, "tmpfs", MS_NOSUID|MS_NODEV, NULL); + if (r == -EINVAL || (r < 0 && ERRNO_IS_NOT_SUPPORTED(r))) { + log_tests_skipped_errno(r, "The kernel seems too old: %m"); + _exit(EXIT_SUCCESS); + } + + /* Check the file in the main fs does not exist. */ + assert_se(access(fn, F_OK) < 0 && errno == ENOENT); + + /* Check the files in sub-mounts are kept. */ + FOREACH_STRING(p, "sub1", "sub1/hogehoge", "sub1/foofoo", "sub2", "sub2/aaa", "sub2/bbb") { + _cleanup_free_ char *where = NULL, *filename = NULL, *content = NULL; + + assert_se(where = path_join(d, p)); + assert_se(filename = path_join(where, "memo")); + assert_se(read_full_file(filename, &content, NULL) >= 0); + assert_se(streq(content, where)); + } + + /* umount sub1, and check if the previously hidden sub-mounts are dropped. */ + FOREACH_STRING(p, "sub1/hoge", "sub1/foo") { + _cleanup_free_ char *where = NULL; + + assert_se(where = path_join(d, p)); + assert_se(access(where, F_OK) < 0 && errno == ENOENT); + } + + _exit(EXIT_SUCCESS); + } +} + +TEST(remount_sysfs) { + int r; + + if (geteuid() != 0 || have_effective_cap(CAP_SYS_ADMIN) <= 0) + return (void) log_tests_skipped("not running privileged"); + + if (path_is_fs_type("/sys", SYSFS_MAGIC) <= 0) + return (void) log_tests_skipped("sysfs is not mounted on /sys"); + + if (access("/sys/class/net/dummy-test-mnt", F_OK) < 0) + return (void) log_tests_skipped_errno(errno, "The network interface dummy-test-mnt does not exit"); + + r = safe_fork("(remount-sysfs)", + FORK_RESET_SIGNALS | + FORK_CLOSE_ALL_FDS | + FORK_DEATHSIG | + FORK_WAIT | + FORK_REOPEN_LOG | + FORK_LOG | + FORK_NEW_MOUNTNS | + FORK_MOUNTNS_SLAVE, + NULL); + assert_se(r >= 0); + if (r == 0) { + assert_se(unshare(CLONE_NEWNET) >= 0); + + /* Even unshare()ed, the interfaces in the main namespace can be accessed through sysfs. */ + assert_se(access("/sys/class/net/lo", F_OK) >= 0); + assert_se(access("/sys/class/net/dummy-test-mnt", F_OK) >= 0); + + r = remount_sysfs("/sys"); + if (r == -EINVAL || (r < 0 && ERRNO_IS_NOT_SUPPORTED(r))) { + log_tests_skipped_errno(r, "The kernel seems too old: %m"); + _exit(EXIT_SUCCESS); + } + + /* After remounting sysfs, the interfaces in the main namespace cannot be accessed. */ + assert_se(access("/sys/class/net/lo", F_OK) >= 0); + assert_se(access("/sys/class/net/dummy-test-mnt", F_OK) < 0 && errno == ENOENT); + + _exit(EXIT_SUCCESS); + } +} + TEST(mount_option_mangle) { char *opts = NULL; unsigned long f; @@ -256,4 +388,17 @@ TEST(make_mount_point_inode) { assert_se(!(S_IXOTH & st.st_mode)); } -DEFINE_TEST_MAIN(LOG_DEBUG); +static int intro(void) { + /* Create a dummy network interface for testing remount_sysfs(). */ + (void) system("ip link add dummy-test-mnt type dummy"); + + return 0; +} + +static int outro(void) { + (void) system("ip link del dummy-test-mnt"); + + return 0; +} + +DEFINE_TEST_MAIN_FULL(LOG_DEBUG, intro, outro); -- 2.39.2