]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
mount-util: use new mount API for bind mount tunnel
authorLuca Boccassi <bluca@debian.org>
Tue, 26 Sep 2023 23:01:45 +0000 (00:01 +0100)
committerLuca Boccassi <bluca@debian.org>
Mon, 2 Oct 2023 13:02:32 +0000 (14:02 +0100)
If we have the new mount API we can simplify bind mounting into a namespace
a lot

src/shared/mount-util.c

index 20f93b7c8930354ed8b7a680df7ae7d080efa422..8acbadcaf9f8dd451e3ea8ea38ac8bea0aed6dea 100644 (file)
@@ -797,12 +797,16 @@ int mount_option_mangle(
         return 0;
 }
 
-static int mount_in_namespace(
-                pid_t target,
+static int mount_in_namespace_legacy(
+                const char *chased_src_path,
+                int chased_src_fd,
+                struct stat *chased_src_st,
                 const char *propagate_path,
                 const char *incoming_path,
-                const char *src,
                 const char *dest,
+                int pidns_fd,
+                int mntns_fd,
+                int root_fd,
                 bool read_only,
                 bool make_file_or_directory,
                 const MountOptions *options,
@@ -810,52 +814,29 @@ static int mount_in_namespace(
                 bool is_image) {
 
         _cleanup_close_pair_ int errno_pipe_fd[2] = PIPE_EBADF;
-        _cleanup_close_ int mntns_fd = -EBADF, root_fd = -EBADF, pidns_fd = -EBADF, chased_src_fd = -EBADF;
         char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p;
         bool mount_slave_created = false, mount_slave_mounted = false,
                 mount_tmp_created = false, mount_tmp_mounted = false,
                 mount_outside_created = false, mount_outside_mounted = false;
-        _cleanup_free_ char *chased_src_path = NULL;
-        struct stat st;
         pid_t child;
         int r;
 
-        assert(target > 0);
+        assert(chased_src_path);
+        assert(chased_src_fd >= 0);
+        assert(chased_src_st);
         assert(propagate_path);
         assert(incoming_path);
-        assert(src);
         assert(dest);
+        assert(pidns_fd >= 0);
+        assert(mntns_fd >= 0);
+        assert(root_fd >= 0);
         assert(!options || is_image);
 
-        r = namespace_open(target, &pidns_fd, &mntns_fd, NULL, NULL, &root_fd);
-        if (r < 0)
-                return log_debug_errno(r, "Failed to retrieve FDs of the target process' namespace: %m");
-
-        r = in_same_namespace(target, 0, NAMESPACE_MOUNT);
-        if (r < 0)
-                return log_debug_errno(r, "Failed to determine if mount namespaces are equal: %m");
-        /* We can't add new mounts at runtime if the process wasn't started in a namespace */
-        if (r > 0)
-                return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to activate bind mount in target, not running in a mount namespace");
-
-        /* One day, when bind mounting /proc/self/fd/n works across namespace boundaries we should rework
-         * this logic to make use of it... */
-
         p = strjoina(propagate_path, "/");
         r = laccess(p, F_OK);
         if (r < 0)
                 return log_debug_errno(r == -ENOENT ? SYNTHETIC_ERRNO(EOPNOTSUPP) : r, "Target does not allow propagation of mount points");
 
-        r = chase(src, NULL, 0, &chased_src_path, &chased_src_fd);
-        if (r < 0)
-                return log_debug_errno(r, "Failed to resolve source path of %s: %m", src);
-        log_debug("Chased source path of %s to %s", src, chased_src_path);
-
-        if (fstat(chased_src_fd, &st) < 0)
-                return log_debug_errno(errno, "Failed to stat() resolved source path %s: %m", src);
-        if (S_ISLNK(st.st_mode)) /* This shouldn't really happen, given that we just chased the symlinks above, but let's better be safe… */
-                return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Source directory %s can't be a symbolic link", src);
-
         /* Our goal is to install a new bind mount into the container,
            possibly read-only. This is irritatingly complex
            unfortunately, currently.
@@ -885,7 +866,7 @@ static int mount_in_namespace(
         if (is_image)
                 r = mkdir_p(mount_tmp, 0700);
         else
-                r = make_mount_point_inode_from_stat(&st, mount_tmp, 0700);
+                r = make_mount_point_inode_from_stat(chased_src_st, mount_tmp, 0700);
         if (r < 0) {
                 log_debug_errno(r, "Failed to create temporary mount point %s: %m", mount_tmp);
                 goto finish;
@@ -913,7 +894,7 @@ static int mount_in_namespace(
          * right-away. */
 
         mount_outside = strjoina(propagate_path, "/XXXXXX");
-        if (is_image || S_ISDIR(st.st_mode))
+        if (is_image || S_ISDIR(chased_src_st->st_mode))
                 r = mkdtemp(mount_outside) ? 0 : -errno;
         else {
                 r = mkostemp_safe(mount_outside);
@@ -933,7 +914,7 @@ static int mount_in_namespace(
         mount_outside_mounted = true;
         mount_tmp_mounted = false;
 
-        if (is_image || S_ISDIR(st.st_mode))
+        if (is_image || S_ISDIR(chased_src_st->st_mode))
                 (void) rmdir(mount_tmp);
         else
                 (void) unlink(mount_tmp);
@@ -962,7 +943,7 @@ static int mount_in_namespace(
                 if (make_file_or_directory) {
                         if (!is_image) {
                                 (void) mkdir_parents(dest, 0755);
-                                (void) make_mount_point_inode_from_stat(&st, dest, 0700);
+                                (void) make_mount_point_inode_from_stat(chased_src_st, dest, 0700);
                         } else
                                 (void) mkdir_p(dest, 0755);
                 }
@@ -1012,7 +993,7 @@ finish:
         if (mount_outside_mounted)
                 (void) umount_verbose(LOG_DEBUG, mount_outside, UMOUNT_NOFOLLOW);
         if (mount_outside_created) {
-                if (is_image || S_ISDIR(st.st_mode))
+                if (is_image || S_ISDIR(chased_src_st->st_mode))
                         (void) rmdir(mount_outside);
                 else
                         (void) unlink(mount_outside);
@@ -1021,7 +1002,7 @@ finish:
         if (mount_tmp_mounted)
                 (void) umount_verbose(LOG_DEBUG, mount_tmp, UMOUNT_NOFOLLOW);
         if (mount_tmp_created) {
-                if (is_image || S_ISDIR(st.st_mode))
+                if (is_image || S_ISDIR(chased_src_st->st_mode))
                         (void) rmdir(mount_tmp);
                 else
                         (void) unlink(mount_tmp);
@@ -1035,6 +1016,139 @@ finish:
         return r;
 }
 
+static int mount_in_namespace(
+                pid_t target,
+                const char *propagate_path,
+                const char *incoming_path,
+                const char *src,
+                const char *dest,
+                bool read_only,
+                bool make_file_or_directory,
+                const MountOptions *options,
+                const ImagePolicy *image_policy,
+                bool is_image) {
+
+        _cleanup_close_pair_ int errno_pipe_fd[2] = PIPE_EBADF;
+        _cleanup_close_ int mntns_fd = -EBADF, root_fd = -EBADF, pidns_fd = -EBADF, chased_src_fd = -EBADF,
+                            new_mount_fd = -EBADF;
+        _cleanup_free_ char *chased_src_path = NULL;
+        struct stat st;
+        pid_t child;
+        int r;
+
+        assert(target > 0);
+        assert(propagate_path);
+        assert(incoming_path);
+        assert(src);
+        assert(dest);
+        assert(!options || is_image);
+
+        r = namespace_open(target, &pidns_fd, &mntns_fd, NULL, NULL, &root_fd);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to retrieve FDs of the target process' namespace: %m");
+
+        r = in_same_namespace(target, 0, NAMESPACE_MOUNT);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to determine if mount namespaces are equal: %m");
+        /* We can't add new mounts at runtime if the process wasn't started in a namespace */
+        if (r > 0)
+                return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to activate bind mount in target, not running in a mount namespace");
+
+        r = chase(src, NULL, 0, &chased_src_path, &chased_src_fd);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to resolve source path of %s: %m", src);
+        log_debug("Chased source path of %s to %s", src, chased_src_path);
+
+        if (fstat(chased_src_fd, &st) < 0)
+                return log_debug_errno(errno, "Failed to stat() resolved source path %s: %m", src);
+        if (S_ISLNK(st.st_mode)) /* This shouldn't really happen, given that we just chased the symlinks above, but let's better be safe… */
+                return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Source directory %s can't be a symbolic link", src);
+
+        if (is_image || !mount_new_api_supported()) /* Fallback if we can't use the new mount API */
+                return mount_in_namespace_legacy(
+                                chased_src_path,
+                                chased_src_fd,
+                                &st,
+                                propagate_path,
+                                incoming_path,
+                                dest,
+                                pidns_fd,
+                                mntns_fd,
+                                root_fd,
+                                read_only,
+                                make_file_or_directory,
+                                options,
+                                image_policy,
+                                is_image);
+
+        new_mount_fd = open_tree(
+                        chased_src_fd,
+                        "",
+                        OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC|AT_SYMLINK_NOFOLLOW|AT_EMPTY_PATH);
+        if (new_mount_fd < 0)
+                return log_debug_errno(
+                                errno,
+                                "Failed to open mount point \"%s\": %m",
+                                chased_src_path);
+
+        if (read_only && mount_setattr(new_mount_fd, "", AT_EMPTY_PATH,
+                                       &(struct mount_attr) {
+                                               .attr_set = MOUNT_ATTR_RDONLY,
+                                       }, MOUNT_ATTR_SIZE_VER0) < 0)
+                return log_debug_errno(
+                                errno,
+                                "Failed to set mount flags for \"%s\": %m",
+                                chased_src_path);
+
+        if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0)
+                return log_debug_errno(errno, "Failed to create pipe: %m");
+
+        r = namespace_fork("(sd-bindmnt)",
+                           "(sd-bindmnt-inner)",
+                           /* except_fds= */ NULL,
+                           /* n_except_fds= */ 0,
+                           FORK_RESET_SIGNALS|FORK_DEATHSIG,
+                           pidns_fd,
+                           mntns_fd,
+                           /* netns_fd= */ -1,
+                           /* userns_fd= */ -1,
+                           root_fd,
+                           &child);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to fork off: %m");
+        if (r == 0) {
+                errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]);
+
+                if (make_file_or_directory) {
+                        (void) mkdir_parents(dest, 0755);
+                        (void) make_mount_point_inode_from_stat(&st, dest, 0700);
+                }
+
+                if (move_mount(new_mount_fd, "", -EBADF, dest, MOVE_MOUNT_F_EMPTY_PATH) < 0) {
+                        (void) write(errno_pipe_fd[1], &errno, sizeof(errno));
+                        errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
+
+                        _exit(EXIT_FAILURE);
+                }
+
+                _exit(EXIT_SUCCESS);
+        }
+
+        errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
+
+        r = wait_for_terminate_and_check("(sd-bindmnt)", child, 0);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to wait for child: %m");
+        if (r != EXIT_SUCCESS) {
+                if (read(errno_pipe_fd[0], &r, sizeof(r)) == sizeof(r))
+                        return log_debug_errno(r, "Failed to mount: %m");
+
+                return log_debug_errno(SYNTHETIC_ERRNO(EPROTO), "Child failed.");
+        }
+
+        return 0;
+}
+
 int bind_mount_in_namespace(
                 pid_t target,
                 const char *propagate_path,