]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
mount-util: introduce remount_sysfs()
authorYu Watanabe <watanabe.yu+github@gmail.com>
Fri, 17 Feb 2023 23:45:24 +0000 (08:45 +0900)
committerYu Watanabe <watanabe.yu+github@gmail.com>
Thu, 23 Feb 2023 06:09:13 +0000 (15:09 +0900)
This is useful when creating a new network namespace. Unlike procfs,
we need to remount sysfs, otherwise properties of the network interfaces
in the main network namespace are still accessible through the old sysfs,
e.g. /sys/class/net/eth0. All sub-mounts previously mounted on the sysfs
are moved onto the new sysfs mount.

The function will be used in later commits.

src/shared/mount-util.c
src/shared/mount-util.h
src/test/test-mount-util.c

index 8aad531a4d9e8d59de4239b3b15f29fb8be10487..e583261f456fc4e35900e97accf3bd36cc9aacc6 100644 (file)
@@ -34,6 +34,7 @@
 #include "path-util.h"
 #include "process-util.h"
 #include "set.h"
+#include "sort-util.h"
 #include "stat-util.h"
 #include "stdio-util.h"
 #include "string-table.h"
@@ -1167,6 +1168,190 @@ int remount_idmap(
         return 0;
 }
 
+typedef struct SubMount {
+        char *path;
+        int mount_fd;
+} SubMount;
+
+static void sub_mount_clear(SubMount *s) {
+        assert(s);
+
+        s->path = mfree(s->path);
+        s->mount_fd = safe_close(s->mount_fd);
+}
+
+static void sub_mount_array_free(SubMount *s, size_t n) {
+        assert(s || n == 0);
+
+        for (size_t i = 0; i < n; i++)
+                sub_mount_clear(s + i);
+
+        free(s);
+}
+
+static int sub_mount_compare(const SubMount *a, const SubMount *b) {
+        assert(a);
+        assert(b);
+        assert(a->path);
+        assert(b->path);
+
+        return path_compare(a->path, b->path);
+}
+
+static void sub_mount_drop(SubMount *s, size_t n) {
+        assert(s || n == 0);
+
+        for (size_t m = 0, i = 1; i < n; i++) {
+                if (path_startswith(s[i].path, s[m].path))
+                        sub_mount_clear(s + i);
+                else
+                        m = i;
+        }
+}
+
+static int get_sub_mounts(const char *prefix, SubMount **ret_mounts, size_t *ret_n_mounts) {
+        _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
+        _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
+        SubMount *mounts = NULL;
+        size_t n = 0;
+        int r;
+
+        CLEANUP_ARRAY(mounts, n, sub_mount_array_free);
+
+        assert(prefix);
+        assert(ret_mounts);
+        assert(ret_n_mounts);
+
+        r = libmount_parse("/proc/self/mountinfo", NULL, &table, &iter);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
+
+        for (;;) {
+                _cleanup_close_ int mount_fd = -EBADF;
+                _cleanup_free_ char *p = NULL;
+                struct libmnt_fs *fs;
+                const char *path;
+                int id1, id2;
+
+                r = mnt_table_next_fs(table, iter, &fs);
+                if (r == 1)
+                        break; /* EOF */
+                if (r < 0)
+                        return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
+
+                path = mnt_fs_get_target(fs);
+                if (!path)
+                        continue;
+
+                if (isempty(path_startswith(path, prefix)))
+                        continue;
+
+                id1 = mnt_fs_get_id(fs);
+                r = path_get_mnt_id(path, &id2);
+                if (r < 0) {
+                        log_debug_errno(r, "Failed to get mount ID of '%s', ignoring: %m", path);
+                        continue;
+                }
+                if (id1 != id2) {
+                        /* The path may be hidden by another over-mount or already remounted. */
+                        log_debug("The mount IDs of '%s' obtained by libmount and path_get_mnt_id() are different (%i vs %i), ignoring.",
+                                  path, id1, id2);
+                        continue;
+                }
+
+                mount_fd = open_tree(AT_FDCWD, path, OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_RECURSIVE);
+                if (mount_fd < 0) {
+                        if (errno == ENOENT) /* The path may be hidden by another over-mount or already unmounted. */
+                                continue;
+
+                        return log_debug_errno(errno, "Failed to open tree of mounted filesystem '%s': %m", path);
+                }
+
+                p = strdup(path);
+                if (!p)
+                        return log_oom_debug();
+
+                if (!GREEDY_REALLOC(mounts, n + 1))
+                        return log_oom_debug();
+
+                mounts[n++] = (SubMount) {
+                        .path = TAKE_PTR(p),
+                        .mount_fd = TAKE_FD(mount_fd),
+                };
+        }
+
+        typesafe_qsort(mounts, n, sub_mount_compare);
+        sub_mount_drop(mounts, n);
+
+        *ret_mounts = TAKE_PTR(mounts);
+        *ret_n_mounts = n;
+        return 0;
+}
+
+static int move_sub_mounts(SubMount *mounts, size_t n) {
+        assert(mounts || n == 0);
+
+        for (size_t i = 0; i < n; i++) {
+                if (!mounts[i].path || mounts[i].mount_fd < 0)
+                        continue;
+
+                (void) mkdir_p_label(mounts[i].path, 0755);
+
+                if (move_mount(mounts[i].mount_fd, "", AT_FDCWD, mounts[i].path, MOVE_MOUNT_F_EMPTY_PATH) < 0)
+                        return log_debug_errno(errno, "Failed to move mount_fd to '%s': %m", mounts[i].path);
+        }
+
+        return 0;
+}
+
+int remount_and_move_sub_mounts(
+                const char *what,
+                const char *where,
+                const char *type,
+                unsigned long flags,
+                const char *options) {
+
+        SubMount *mounts = NULL; /* avoid false maybe-uninitialized warning */
+        size_t n = 0; /* avoid false maybe-uninitialized warning */
+        int r;
+
+        CLEANUP_ARRAY(mounts, n, sub_mount_array_free);
+
+        assert(where);
+
+        /* This is useful when creating a new network namespace. Unlike procfs, we need to remount sysfs,
+         * otherwise properties of the network interfaces in the main network namespace are still accessible
+         * through the old sysfs, e.g. /sys/class/net/eth0. All sub-mounts previously mounted on the sysfs
+         * are moved onto the new sysfs mount. */
+
+        r = path_is_mount_point(where, NULL, 0);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to determine if '%s' is a mountpoint: %m", where);
+        if (r == 0)
+                /* Shortcut. Simply mount the requested filesystem. */
+                return mount_nofollow_verbose(LOG_DEBUG, what, where, type, flags, options);
+
+        /* Get the list of sub-mounts and duplicate them. */
+        r = get_sub_mounts(where, &mounts, &n);
+        if (r < 0)
+                return r;
+
+        /* Then, remount the mount and its sub-mounts. */
+        (void) umount_recursive(where, 0);
+
+        /* Remount the target filesystem. */
+        r = mount_nofollow_verbose(LOG_DEBUG, what, where, type, flags, options);
+        if (r < 0)
+                return r;
+
+        /* Finally, move the all sub-mounts on the new target mount point. */
+        return move_sub_mounts(mounts, n);
+}
+
+int remount_sysfs(const char *where) {
+        return remount_and_move_sub_mounts("sysfs", where, "sysfs", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+}
+
 int make_mount_point_inode_from_stat(const struct stat *st, const char *dest, mode_t mode) {
         assert(st);
         assert(dest);
index 7554bf828e123f0595738154a253f4dd6f497adc..84ea4b6392773f388e132b7f0c5f877ccd1c9043 100644 (file)
@@ -105,6 +105,14 @@ typedef enum RemountIdmapping {
 
 int remount_idmap(const char *p, uid_t uid_shift, uid_t uid_range, uid_t owner, RemountIdmapping idmapping);
 
+int remount_and_move_sub_mounts(
+                const char *what,
+                const char *where,
+                const char *type,
+                unsigned long flags,
+                const char *options);
+int remount_sysfs(const char *where);
+
 /* Creates a mount point (not parents) based on the source path or stat - ie, a file or a directory */
 int make_mount_point_inode_from_stat(const struct stat *st, const char *dest, mode_t mode);
 int make_mount_point_inode_from_path(const char *source, const char *dest, mode_t mode);
index fddf70584f02b5ea0a825d56df23b059ab51e51a..405cdf557a821ed362f5c29258b8645b815a6066 100644 (file)
@@ -8,6 +8,7 @@
 #include "fd-util.h"
 #include "fileio.h"
 #include "fs-util.h"
+#include "missing_magic.h"
 #include "missing_mount.h"
 #include "mkdir.h"
 #include "mount-util.h"
 #include "path-util.h"
 #include "process-util.h"
 #include "rm-rf.h"
+#include "stat-util.h"
 #include "string-util.h"
 #include "strv.h"
 #include "tests.h"
 #include "tmpfile-util.h"
 
+TEST(remount_and_move_sub_mounts) {
+        int r;
+
+        if (geteuid() != 0 || have_effective_cap(CAP_SYS_ADMIN) <= 0)
+                return (void) log_tests_skipped("not running privileged");
+
+        r = safe_fork("(remount-and-move-sub-mounts)",
+                      FORK_RESET_SIGNALS |
+                      FORK_CLOSE_ALL_FDS |
+                      FORK_DEATHSIG |
+                      FORK_WAIT |
+                      FORK_REOPEN_LOG |
+                      FORK_LOG |
+                      FORK_NEW_MOUNTNS |
+                      FORK_MOUNTNS_SLAVE,
+                      NULL);
+        assert_se(r >= 0);
+        if (r == 0) {
+                _cleanup_free_ char *d = NULL, *fn = NULL;
+
+                assert_se(mkdtemp_malloc(NULL, &d) >= 0);
+
+                assert_se(mount_nofollow_verbose(LOG_DEBUG, "tmpfs", d, "tmpfs", MS_NOSUID|MS_NODEV, NULL) >= 0);
+
+                assert_se(fn = path_join(d, "memo"));
+                assert_se(write_string_file(fn, d, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE) >= 0);
+                assert_se(access(fn, F_OK) >= 0);
+
+                /* Create fs tree */
+                FOREACH_STRING(p, "sub1", "sub1/hoge", "sub1/foo", "sub2", "sub2/aaa", "sub2/bbb") {
+                        _cleanup_free_ char *where = NULL, *filename = NULL;
+
+                        assert_se(where = path_join(d, p));
+                        assert_se(mkdir_p(where, 0755) >= 0);
+                        assert_se(mount_nofollow_verbose(LOG_DEBUG, "tmpfs", where, "tmpfs", MS_NOSUID|MS_NODEV, NULL) >= 0);
+
+                        assert_se(filename = path_join(where, "memo"));
+                        assert_se(write_string_file(filename, where, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE) >= 0);
+                        assert_se(access(filename, F_OK) >= 0);
+                }
+
+                /* Hide sub1. */
+                FOREACH_STRING(p, "sub1", "sub1/hogehoge", "sub1/foofoo") {
+                        _cleanup_free_ char *where = NULL, *filename = NULL;
+
+                        assert_se(where = path_join(d, p));
+                        assert_se(mkdir_p(where, 0755) >= 0);
+                        assert_se(mount_nofollow_verbose(LOG_DEBUG, "tmpfs", where, "tmpfs", MS_NOSUID|MS_NODEV, NULL) >= 0);
+
+                        assert_se(filename = path_join(where, "memo"));
+                        assert_se(write_string_file(filename, where, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE) >= 0);
+                        assert_se(access(filename, F_OK) >= 0);
+                }
+
+                /* Remount the main fs. */
+                r = remount_and_move_sub_mounts("tmpfs", d, "tmpfs", MS_NOSUID|MS_NODEV, NULL);
+                if (r == -EINVAL || (r < 0 && ERRNO_IS_NOT_SUPPORTED(r))) {
+                        log_tests_skipped_errno(r, "The kernel seems too old: %m");
+                        _exit(EXIT_SUCCESS);
+                }
+
+                /* Check the file in the main fs does not exist. */
+                assert_se(access(fn, F_OK) < 0 && errno == ENOENT);
+
+                /* Check the files in sub-mounts are kept. */
+                FOREACH_STRING(p, "sub1", "sub1/hogehoge", "sub1/foofoo", "sub2", "sub2/aaa", "sub2/bbb") {
+                        _cleanup_free_ char *where = NULL, *filename = NULL, *content = NULL;
+
+                        assert_se(where = path_join(d, p));
+                        assert_se(filename = path_join(where, "memo"));
+                        assert_se(read_full_file(filename, &content, NULL) >= 0);
+                        assert_se(streq(content, where));
+                }
+
+                /* umount sub1, and check if the previously hidden sub-mounts are dropped. */
+                FOREACH_STRING(p, "sub1/hoge", "sub1/foo") {
+                        _cleanup_free_ char *where = NULL;
+
+                        assert_se(where = path_join(d, p));
+                        assert_se(access(where, F_OK) < 0 && errno == ENOENT);
+                }
+
+                _exit(EXIT_SUCCESS);
+        }
+}
+
+TEST(remount_sysfs) {
+        int r;
+
+        if (geteuid() != 0 || have_effective_cap(CAP_SYS_ADMIN) <= 0)
+                return (void) log_tests_skipped("not running privileged");
+
+        if (path_is_fs_type("/sys", SYSFS_MAGIC) <= 0)
+                return (void) log_tests_skipped("sysfs is not mounted on /sys");
+
+        if (access("/sys/class/net/dummy-test-mnt", F_OK) < 0)
+                return (void) log_tests_skipped_errno(errno, "The network interface dummy-test-mnt does not exit");
+
+        r = safe_fork("(remount-sysfs)",
+                      FORK_RESET_SIGNALS |
+                      FORK_CLOSE_ALL_FDS |
+                      FORK_DEATHSIG |
+                      FORK_WAIT |
+                      FORK_REOPEN_LOG |
+                      FORK_LOG |
+                      FORK_NEW_MOUNTNS |
+                      FORK_MOUNTNS_SLAVE,
+                      NULL);
+        assert_se(r >= 0);
+        if (r == 0) {
+                assert_se(unshare(CLONE_NEWNET) >= 0);
+
+                /* Even unshare()ed, the interfaces in the main namespace can be accessed through sysfs. */
+                assert_se(access("/sys/class/net/lo", F_OK) >= 0);
+                assert_se(access("/sys/class/net/dummy-test-mnt", F_OK) >= 0);
+
+                r = remount_sysfs("/sys");
+                if (r == -EINVAL || (r < 0 && ERRNO_IS_NOT_SUPPORTED(r))) {
+                        log_tests_skipped_errno(r, "The kernel seems too old: %m");
+                        _exit(EXIT_SUCCESS);
+                }
+
+                /* After remounting sysfs, the interfaces in the main namespace cannot be accessed. */
+                assert_se(access("/sys/class/net/lo", F_OK) >= 0);
+                assert_se(access("/sys/class/net/dummy-test-mnt", F_OK) < 0 && errno == ENOENT);
+
+                _exit(EXIT_SUCCESS);
+        }
+}
+
 TEST(mount_option_mangle) {
         char *opts = NULL;
         unsigned long f;
@@ -256,4 +388,17 @@ TEST(make_mount_point_inode) {
         assert_se(!(S_IXOTH & st.st_mode));
 }
 
-DEFINE_TEST_MAIN(LOG_DEBUG);
+static int intro(void) {
+         /* Create a dummy network interface for testing remount_sysfs(). */
+        (void) system("ip link add dummy-test-mnt type dummy");
+
+        return 0;
+}
+
+static int outro(void) {
+        (void) system("ip link del dummy-test-mnt");
+
+        return 0;
+}
+
+DEFINE_TEST_MAIN_FULL(LOG_DEBUG, intro, outro);