]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
Merge pull request #28764 from yuwata/core-namespace
authorYu Watanabe <watanabe.yu+github@gmail.com>
Wed, 23 Aug 2023 03:43:29 +0000 (12:43 +0900)
committerGitHub <noreply@github.com>
Wed, 23 Aug 2023 03:43:29 +0000 (12:43 +0900)
core/namespace: cleanups

src/core/namespace.c
src/shared/mount-util.c
src/shared/mount-util.h
src/test/test-mount-util.c

index 86406007ad68d66f43ddadfc5c8bce700bc80542..2197287fd08afa3824269b02018ce6e27ec6447b 100644 (file)
@@ -209,26 +209,30 @@ static const MountEntry protect_system_strict_table[] = {
 };
 
 static const char * const mount_mode_table[_MOUNT_MODE_MAX] = {
-        [INACCESSIBLE]         = "inaccessible",
-        [OVERLAY_MOUNT]        = "overlay",
-        [BIND_MOUNT]           = "bind",
-        [BIND_MOUNT_RECURSIVE] = "rbind",
-        [PRIVATE_TMP]          = "private-tmp",
-        [PRIVATE_DEV]          = "private-dev",
-        [BIND_DEV]             = "bind-dev",
-        [EMPTY_DIR]            = "empty",
-        [PRIVATE_SYSFS]        = "private-sysfs",
-        [BIND_SYSFS]           = "bind-sysfs",
-        [PROCFS]               = "procfs",
-        [READONLY]             = "read-only",
-        [READWRITE]            = "read-write",
-        [TMPFS]                = "tmpfs",
-        [MOUNT_IMAGES]         = "mount-images",
-        [READWRITE_IMPLICIT]   = "rw-implicit",
-        [EXEC]                 = "exec",
-        [NOEXEC]               = "noexec",
-        [MQUEUEFS]             = "mqueuefs",
-        [MKDIR]                = "mkdir",
+        [INACCESSIBLE]          = "inaccessible",
+        [OVERLAY_MOUNT]         = "overlay",
+        [MOUNT_IMAGES]          = "mount-images",
+        [BIND_MOUNT]            = "bind",
+        [BIND_MOUNT_RECURSIVE]  = "rbind",
+        [PRIVATE_TMP]           = "private-tmp",
+        [PRIVATE_TMP_READONLY]  = "private-tmp-read-only",
+        [PRIVATE_DEV]           = "private-dev",
+        [BIND_DEV]              = "bind-dev",
+        [EMPTY_DIR]             = "empty",
+        [PRIVATE_SYSFS]         = "private-sysfs",
+        [BIND_SYSFS]            = "bind-sysfs",
+        [PROCFS]                = "procfs",
+        [READONLY]              = "read-only",
+        [READWRITE]             = "read-write",
+        [NOEXEC]                = "noexec",
+        [EXEC]                  = "exec",
+        [TMPFS]                 = "tmpfs",
+        [RUN]                   = "run",
+        [EXTENSION_DIRECTORIES] = "extension-directories",
+        [EXTENSION_IMAGES]      = "extension-images",
+        [MQUEUEFS]              = "mqueuefs",
+        [READWRITE_IMPLICIT]    = "read-write-implicit",
+        [MKDIR]                 = "mkdir",
 };
 
 /* Helper struct for naming simplicity and reusability */
@@ -1047,34 +1051,7 @@ static int mount_bind_dev(const MountEntry *m) {
         if (r > 0) /* make this a NOP if /dev is already a mount point */
                 return 0;
 
-        r = mount_nofollow_verbose(LOG_DEBUG, "/dev", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL);
-        if (r < 0)
-                return r;
-
-        return 1;
-}
-
-static int mount_private_sysfs(const MountEntry *m) {
-        const char *p = mount_entry_path(ASSERT_PTR(m));
-        int r;
-
-        (void) mkdir_p_label(p, 0755);
-
-        r = remount_sysfs(p);
-        if (r < 0 && (ERRNO_IS_PRIVILEGE(r) || ERRNO_IS_NOT_SUPPORTED(r))) {
-                /* Running with an unprivileged user (PrivateUsers=yes), or the kernel seems old. Falling
-                 * back to bind mount the host's version so that we get all child mounts of it, too. */
-
-                log_debug_errno(r, "Failed to remount sysfs on %s, falling back to bind mount: %m", p);
-
-                (void) umount_recursive(p, 0);
-
-                r = mount_nofollow_verbose(LOG_DEBUG, "/sys", p, NULL, MS_BIND|MS_REC, NULL);
-        }
-        if (r < 0)
-                return log_debug_errno(r, "Failed to remount sysfs on %s: %m", p);
-
-        return 1;
+        return mount_nofollow_verbose(LOG_DEBUG, "/dev", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL);
 }
 
 static int mount_bind_sysfs(const MountEntry *m) {
@@ -1091,11 +1068,34 @@ static int mount_bind_sysfs(const MountEntry *m) {
                 return 0;
 
         /* Bind mount the host's version so that we get all child mounts of it, too. */
-        r = mount_nofollow_verbose(LOG_DEBUG, "/sys", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL);
-        if (r < 0)
+        return mount_nofollow_verbose(LOG_DEBUG, "/sys", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL);
+}
+
+static int mount_private_sysfs(const MountEntry *m) {
+        const char *entry_path = mount_entry_path(ASSERT_PTR(m));
+        int r, n;
+
+        (void) mkdir_p_label(entry_path, 0755);
+
+        n = umount_recursive(entry_path, 0);
+
+        r = mount_nofollow_verbose(LOG_DEBUG, "sysfs", entry_path, "sysfs", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+        if (ERRNO_IS_NEG_PRIVILEGE(r)) {
+                /* When we do not have enough privileges to mount sysfs, fall back to use existing /sys. */
+
+                if (n > 0)
+                        /* /sys or some of sub-mounts are umounted in the above. Refuse incomplete tree.
+                         * Propagate the original error code returned by mount() in the above. */
+                        return r;
+
+                return mount_bind_sysfs(m);
+
+        } else if (r < 0)
                 return r;
 
-        return 1;
+        /* We mounted a new instance now. Let's bind mount the children over now. */
+        (void) bind_mount_submounts("/sys", entry_path);
+        return 0;
 }
 
 static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
@@ -1154,34 +1154,32 @@ static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
                  * means we really don't want to use it, since it would affect our host's /proc
                  * mount. Hence let's gracefully fallback to a classic, unrestricted version. */
                 r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
-        if (r == -EPERM) {
-                /* When we do not have enough privileges to mount /proc, fallback to use existing /proc. */
+        if (ERRNO_IS_NEG_PRIVILEGE(r)) {
+                /* When we do not have enough privileges to mount /proc, fall back to use existing /proc. */
 
                 if (n > 0)
                         /* /proc or some of sub-mounts are umounted in the above. Refuse incomplete tree.
                          * Propagate the original error code returned by mount() in the above. */
-                        return -EPERM;
+                        return r;
 
                 r = path_is_mount_point(entry_path, NULL, 0);
                 if (r < 0)
                         return log_debug_errno(r, "Unable to determine whether /proc is already mounted: %m");
-                if (r == 0) {
-                        /* We lack permissions to mount a new instance of /proc, and it is not already
-                         * mounted. But we can access the host's, so as a final fallback bind-mount it to
-                         * the destination, as most likely we are inside a user manager in an unprivileged
-                         * user namespace. */
-                        r = mount_nofollow_verbose(LOG_DEBUG, "/proc", entry_path, NULL, MS_BIND|MS_REC, NULL);
-                        if (r < 0)
-                                return -EPERM;
-                }
+                if (r > 0)
+                        return 0;
+
+                /* We lack permissions to mount a new instance of /proc, and it is not already mounted. But
+                 * we can access the host's, so as a final fallback bind-mount it to the destination, as most
+                 * likely we are inside a user manager in an unprivileged user namespace. */
+                return mount_nofollow_verbose(LOG_DEBUG, "/proc", entry_path, NULL, MS_BIND|MS_REC, NULL);
+
         } else if (r < 0)
                 return r;
-        else
-                /* We mounted a new instance now. Let's bind mount the children over now. This matters for
-                 * nspawn where a bunch of files are overmounted, in particular the boot id */
-                (void) bind_mount_submounts("/proc", entry_path);
 
-        return 1;
+        /* We mounted a new instance now. Let's bind mount the children over now. This matters for nspawn
+         * where a bunch of files are overmounted, in particular the boot id */
+        (void) bind_mount_submounts("/proc", entry_path);
+        return 0;
 }
 
 static int mount_tmpfs(const MountEntry *m) {
@@ -1207,7 +1205,7 @@ static int mount_tmpfs(const MountEntry *m) {
         if (r < 0)
                 return log_debug_errno(r, "Failed to fix label of '%s' as '%s': %m", entry_path, inner_path);
 
-        return 1;
+        return 0;
 }
 
 static int mount_run(const MountEntry *m) {
@@ -1305,7 +1303,7 @@ static int mount_image(
         if (r < 0)
                 return log_debug_errno(r, "Failed to mount image %s on %s: %m", mount_entry_source(m), mount_entry_path(m));
 
-        return 1;
+        return 0;
 }
 
 static int mount_overlay(const MountEntry *m) {
@@ -1321,10 +1319,8 @@ static int mount_overlay(const MountEntry *m) {
         r = mount_nofollow_verbose(LOG_DEBUG, "overlay", mount_entry_path(m), "overlay", MS_RDONLY, options);
         if (r == -ENOENT && m->ignore)
                 return 0;
-        if (r < 0)
-                return r;
 
-        return 1;
+        return r;
 }
 
 static int follow_symlink(
index f0bf821430613ffd6c90314d3d44fc01a6709277..20f93b7c8930354ed8b7a680df7ae7d080efa422 100644 (file)
@@ -1235,7 +1235,6 @@ static void sub_mount_drop(SubMount *s, size_t n) {
 
 static int get_sub_mounts(
                 const char *prefix,
-                bool clone_tree,
                 SubMount **ret_mounts,
                 size_t *ret_n_mounts) {
         _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
@@ -1287,10 +1286,7 @@ static int get_sub_mounts(
                         continue;
                 }
 
-                if (clone_tree)
-                        mount_fd = open_tree(AT_FDCWD, path, OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_RECURSIVE);
-                else
-                        mount_fd = open(path, O_CLOEXEC|O_PATH);
+                mount_fd = open(path, O_CLOEXEC|O_PATH);
                 if (mount_fd < 0) {
                         if (errno == ENOENT) /* The path may be hidden by another over-mount or already unmounted. */
                                 continue;
@@ -1319,66 +1315,6 @@ static int get_sub_mounts(
         return 0;
 }
 
-static int move_sub_mounts(SubMount *mounts, size_t n) {
-        assert(mounts || n == 0);
-
-        for (size_t i = 0; i < n; i++) {
-                if (!mounts[i].path || mounts[i].mount_fd < 0)
-                        continue;
-
-                (void) mkdir_p_label(mounts[i].path, 0755);
-
-                if (move_mount(mounts[i].mount_fd, "", AT_FDCWD, mounts[i].path, MOVE_MOUNT_F_EMPTY_PATH) < 0)
-                        return log_debug_errno(errno, "Failed to move mount_fd to '%s': %m", mounts[i].path);
-        }
-
-        return 0;
-}
-
-int remount_and_move_sub_mounts(
-                const char *what,
-                const char *where,
-                const char *type,
-                unsigned long flags,
-                const char *options) {
-
-        SubMount *mounts = NULL;
-        size_t n = 0;
-        int r;
-
-        CLEANUP_ARRAY(mounts, n, sub_mount_array_free);
-
-        assert(where);
-
-        /* This is useful when creating a new network namespace. Unlike procfs, we need to remount sysfs,
-         * otherwise properties of the network interfaces in the main network namespace are still accessible
-         * through the old sysfs, e.g. /sys/class/net/eth0. All sub-mounts previously mounted on the sysfs
-         * are moved onto the new sysfs mount. */
-
-        r = path_is_mount_point(where, NULL, 0);
-        if (r < 0)
-                return log_debug_errno(r, "Failed to determine if '%s' is a mountpoint: %m", where);
-        if (r == 0)
-                /* Shortcut. Simply mount the requested filesystem. */
-                return mount_nofollow_verbose(LOG_DEBUG, what, where, type, flags, options);
-
-        /* Get the list of sub-mounts and duplicate them. */
-        r = get_sub_mounts(where, /* clone_tree= */ true, &mounts, &n);
-        if (r < 0)
-                return r;
-
-        /* Then, remount the mount and its sub-mounts. */
-        (void) umount_recursive(where, 0);
-
-        /* Remount the target filesystem. */
-        r = mount_nofollow_verbose(LOG_DEBUG, what, where, type, flags, options);
-        if (r < 0)
-                return r;
-
-        /* Finally, move the all sub-mounts on the new target mount point. */
-        return move_sub_mounts(mounts, n);
-}
-
 int bind_mount_submounts(
                 const char *source,
                 const char *target) {
@@ -1395,7 +1331,7 @@ int bind_mount_submounts(
 
         CLEANUP_ARRAY(mounts, n, sub_mount_array_free);
 
-        r = get_sub_mounts(source, /* clone_tree= */ false, &mounts, &n);
+        r = get_sub_mounts(source, &mounts, &n);
         if (r < 0)
                 return r;
 
@@ -1430,10 +1366,6 @@ int bind_mount_submounts(
         return ret;
 }
 
-int remount_sysfs(const char *where) {
-        return remount_and_move_sub_mounts("sysfs", where, "sysfs", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
-}
-
 int make_mount_point_inode_from_stat(const struct stat *st, const char *dest, mode_t mode) {
         assert(st);
         assert(dest);
index 7ee6750044bca8e358409d2ad33af589bc07a821..fb496e156b2c94c237d1ac4658be587db84cd79a 100644 (file)
@@ -124,14 +124,6 @@ int make_userns(uid_t uid_shift, uid_t uid_range, uid_t owner, RemountIdmapping
 int remount_idmap_fd(const char *p, int userns_fd);
 int remount_idmap(const char *p, uid_t uid_shift, uid_t uid_range, uid_t owner, RemountIdmapping idmapping);
 
-int remount_and_move_sub_mounts(
-                const char *what,
-                const char *where,
-                const char *type,
-                unsigned long flags,
-                const char *options);
-int remount_sysfs(const char *where);
-
 int bind_mount_submounts(
                 const char *source,
                 const char *target);
index 0898e68cb5d50cad3e96da2f9c5a5d17640b0cae..1cfc1f3ae552ceda604317200a1b7ead94102d62 100644 (file)
 #include "tests.h"
 #include "tmpfile-util.h"
 
-TEST(remount_and_move_sub_mounts) {
-        int r;
-
-        if (geteuid() != 0 || have_effective_cap(CAP_SYS_ADMIN) <= 0)
-                return (void) log_tests_skipped("not running privileged");
-
-        r = safe_fork("(remount-and-move-sub-mounts)",
-                      FORK_RESET_SIGNALS |
-                      FORK_CLOSE_ALL_FDS |
-                      FORK_DEATHSIG |
-                      FORK_WAIT |
-                      FORK_REOPEN_LOG |
-                      FORK_LOG |
-                      FORK_NEW_MOUNTNS |
-                      FORK_MOUNTNS_SLAVE,
-                      NULL);
-        assert_se(r >= 0);
-        if (r == 0) {
-                _cleanup_free_ char *d = NULL, *fn = NULL;
-
-                assert_se(mkdtemp_malloc(NULL, &d) >= 0);
-
-                assert_se(mount_nofollow_verbose(LOG_DEBUG, "tmpfs", d, "tmpfs", MS_NOSUID|MS_NODEV, NULL) >= 0);
-
-                assert_se(fn = path_join(d, "memo"));
-                assert_se(write_string_file(fn, d, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE) >= 0);
-                assert_se(access(fn, F_OK) >= 0);
-
-                /* Create fs tree */
-                FOREACH_STRING(p, "sub1", "sub1/hoge", "sub1/foo", "sub2", "sub2/aaa", "sub2/bbb") {
-                        _cleanup_free_ char *where = NULL, *filename = NULL;
-
-                        assert_se(where = path_join(d, p));
-                        assert_se(mkdir_p(where, 0755) >= 0);
-                        assert_se(mount_nofollow_verbose(LOG_DEBUG, "tmpfs", where, "tmpfs", MS_NOSUID|MS_NODEV, NULL) >= 0);
-
-                        assert_se(filename = path_join(where, "memo"));
-                        assert_se(write_string_file(filename, where, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE) >= 0);
-                        assert_se(access(filename, F_OK) >= 0);
-                }
-
-                /* Hide sub1. */
-                FOREACH_STRING(p, "sub1", "sub1/hogehoge", "sub1/foofoo") {
-                        _cleanup_free_ char *where = NULL, *filename = NULL;
-
-                        assert_se(where = path_join(d, p));
-                        assert_se(mkdir_p(where, 0755) >= 0);
-                        assert_se(mount_nofollow_verbose(LOG_DEBUG, "tmpfs", where, "tmpfs", MS_NOSUID|MS_NODEV, NULL) >= 0);
-
-                        assert_se(filename = path_join(where, "memo"));
-                        assert_se(write_string_file(filename, where, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE) >= 0);
-                        assert_se(access(filename, F_OK) >= 0);
-                }
-
-                /* Remount the main fs. */
-                r = remount_and_move_sub_mounts("tmpfs", d, "tmpfs", MS_NOSUID|MS_NODEV, NULL);
-                if (r == -EINVAL || (r < 0 && ERRNO_IS_NOT_SUPPORTED(r))) {
-                        log_tests_skipped_errno(r, "The kernel seems too old: %m");
-                        _exit(EXIT_SUCCESS);
-                }
-
-                /* Check the file in the main fs does not exist. */
-                assert_se(access(fn, F_OK) < 0 && errno == ENOENT);
-
-                /* Check the files in sub-mounts are kept. */
-                FOREACH_STRING(p, "sub1", "sub1/hogehoge", "sub1/foofoo", "sub2", "sub2/aaa", "sub2/bbb") {
-                        _cleanup_free_ char *where = NULL, *filename = NULL, *content = NULL;
-
-                        assert_se(where = path_join(d, p));
-                        assert_se(filename = path_join(where, "memo"));
-                        assert_se(read_full_file(filename, &content, NULL) >= 0);
-                        assert_se(streq(content, where));
-                }
-
-                /* umount sub1, and check if the previously hidden sub-mounts are dropped. */
-                FOREACH_STRING(p, "sub1/hoge", "sub1/foo") {
-                        _cleanup_free_ char *where = NULL;
-
-                        assert_se(where = path_join(d, p));
-                        assert_se(access(where, F_OK) < 0 && errno == ENOENT);
-                }
-
-                _exit(EXIT_SUCCESS);
-        }
-}
-
-TEST(remount_sysfs) {
-        int r;
-
-        if (geteuid() != 0 || have_effective_cap(CAP_SYS_ADMIN) <= 0)
-                return (void) log_tests_skipped("not running privileged");
-
-        if (path_is_fs_type("/sys", SYSFS_MAGIC) <= 0)
-                return (void) log_tests_skipped("sysfs is not mounted on /sys");
-
-        if (access("/sys/class/net/dummy-test-mnt", F_OK) < 0)
-                return (void) log_tests_skipped_errno(errno, "The network interface dummy-test-mnt does not exit");
-
-        r = safe_fork("(remount-sysfs)",
-                      FORK_RESET_SIGNALS |
-                      FORK_CLOSE_ALL_FDS |
-                      FORK_DEATHSIG |
-                      FORK_WAIT |
-                      FORK_REOPEN_LOG |
-                      FORK_LOG |
-                      FORK_NEW_MOUNTNS |
-                      FORK_MOUNTNS_SLAVE,
-                      NULL);
-        assert_se(r >= 0);
-        if (r == 0) {
-                assert_se(unshare(CLONE_NEWNET) >= 0);
-
-                /* Even unshare()ed, the interfaces in the main namespace can be accessed through sysfs. */
-                assert_se(access("/sys/class/net/lo", F_OK) >= 0);
-                assert_se(access("/sys/class/net/dummy-test-mnt", F_OK) >= 0);
-
-                r = remount_sysfs("/sys");
-                if (r == -EINVAL || (r < 0 && ERRNO_IS_NOT_SUPPORTED(r))) {
-                        log_tests_skipped_errno(r, "The kernel seems too old: %m");
-                        _exit(EXIT_SUCCESS);
-                }
-
-                /* After remounting sysfs, the interfaces in the main namespace cannot be accessed. */
-                assert_se(access("/sys/class/net/lo", F_OK) >= 0);
-                assert_se(access("/sys/class/net/dummy-test-mnt", F_OK) < 0 && errno == ENOENT);
-
-                _exit(EXIT_SUCCESS);
-        }
-}
-
 TEST(mount_option_mangle) {
         char *opts = NULL;
         unsigned long f;
@@ -636,17 +506,4 @@ TEST(bind_mount_submounts) {
         assert_se(umount_recursive(b, 0) >= 0);
 }
 
-static int intro(void) {
-         /* Create a dummy network interface for testing remount_sysfs(). */
-        (void) system("ip link add dummy-test-mnt type dummy");
-
-        return 0;
-}
-
-static int outro(void) {
-        (void) system("ip link del dummy-test-mnt");
-
-        return 0;
-}
-
-DEFINE_TEST_MAIN_FULL(LOG_DEBUG, intro, outro);
+DEFINE_TEST_MAIN(LOG_DEBUG);