From: Luca Boccassi Date: Tue, 26 Sep 2023 23:01:45 +0000 (+0100) Subject: mount-util: use new mount API for bind mount tunnel X-Git-Tag: v255-rc1~360^2~4 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=a7e0199ecef8c0f8c413c069306cf6cc85a445ff;p=thirdparty%2Fsystemd.git mount-util: use new mount API for bind mount tunnel If we have the new mount API we can simplify bind mounting into a namespace a lot --- diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c index 20f93b7c893..8acbadcaf9f 100644 --- a/src/shared/mount-util.c +++ b/src/shared/mount-util.c @@ -797,12 +797,16 @@ int mount_option_mangle( return 0; } -static int mount_in_namespace( - pid_t target, +static int mount_in_namespace_legacy( + const char *chased_src_path, + int chased_src_fd, + struct stat *chased_src_st, const char *propagate_path, const char *incoming_path, - const char *src, const char *dest, + int pidns_fd, + int mntns_fd, + int root_fd, bool read_only, bool make_file_or_directory, const MountOptions *options, @@ -810,52 +814,29 @@ static int mount_in_namespace( bool is_image) { _cleanup_close_pair_ int errno_pipe_fd[2] = PIPE_EBADF; - _cleanup_close_ int mntns_fd = -EBADF, root_fd = -EBADF, pidns_fd = -EBADF, chased_src_fd = -EBADF; char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p; bool mount_slave_created = false, mount_slave_mounted = false, mount_tmp_created = false, mount_tmp_mounted = false, mount_outside_created = false, mount_outside_mounted = false; - _cleanup_free_ char *chased_src_path = NULL; - struct stat st; pid_t child; int r; - assert(target > 0); + assert(chased_src_path); + assert(chased_src_fd >= 0); + assert(chased_src_st); assert(propagate_path); assert(incoming_path); - assert(src); assert(dest); + assert(pidns_fd >= 0); + assert(mntns_fd >= 0); + assert(root_fd >= 0); assert(!options || is_image); - r = namespace_open(target, &pidns_fd, &mntns_fd, NULL, NULL, &root_fd); - if (r < 0) - return log_debug_errno(r, "Failed to retrieve FDs of the target process' namespace: %m"); - - r = in_same_namespace(target, 0, NAMESPACE_MOUNT); - if (r < 0) - return log_debug_errno(r, "Failed to determine if mount namespaces are equal: %m"); - /* We can't add new mounts at runtime if the process wasn't started in a namespace */ - if (r > 0) - return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to activate bind mount in target, not running in a mount namespace"); - - /* One day, when bind mounting /proc/self/fd/n works across namespace boundaries we should rework - * this logic to make use of it... */ - p = strjoina(propagate_path, "/"); r = laccess(p, F_OK); if (r < 0) return log_debug_errno(r == -ENOENT ? SYNTHETIC_ERRNO(EOPNOTSUPP) : r, "Target does not allow propagation of mount points"); - r = chase(src, NULL, 0, &chased_src_path, &chased_src_fd); - if (r < 0) - return log_debug_errno(r, "Failed to resolve source path of %s: %m", src); - log_debug("Chased source path of %s to %s", src, chased_src_path); - - if (fstat(chased_src_fd, &st) < 0) - return log_debug_errno(errno, "Failed to stat() resolved source path %s: %m", src); - if (S_ISLNK(st.st_mode)) /* This shouldn't really happen, given that we just chased the symlinks above, but let's better be safe… */ - return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Source directory %s can't be a symbolic link", src); - /* Our goal is to install a new bind mount into the container, possibly read-only. This is irritatingly complex unfortunately, currently. @@ -885,7 +866,7 @@ static int mount_in_namespace( if (is_image) r = mkdir_p(mount_tmp, 0700); else - r = make_mount_point_inode_from_stat(&st, mount_tmp, 0700); + r = make_mount_point_inode_from_stat(chased_src_st, mount_tmp, 0700); if (r < 0) { log_debug_errno(r, "Failed to create temporary mount point %s: %m", mount_tmp); goto finish; @@ -913,7 +894,7 @@ static int mount_in_namespace( * right-away. */ mount_outside = strjoina(propagate_path, "/XXXXXX"); - if (is_image || S_ISDIR(st.st_mode)) + if (is_image || S_ISDIR(chased_src_st->st_mode)) r = mkdtemp(mount_outside) ? 0 : -errno; else { r = mkostemp_safe(mount_outside); @@ -933,7 +914,7 @@ static int mount_in_namespace( mount_outside_mounted = true; mount_tmp_mounted = false; - if (is_image || S_ISDIR(st.st_mode)) + if (is_image || S_ISDIR(chased_src_st->st_mode)) (void) rmdir(mount_tmp); else (void) unlink(mount_tmp); @@ -962,7 +943,7 @@ static int mount_in_namespace( if (make_file_or_directory) { if (!is_image) { (void) mkdir_parents(dest, 0755); - (void) make_mount_point_inode_from_stat(&st, dest, 0700); + (void) make_mount_point_inode_from_stat(chased_src_st, dest, 0700); } else (void) mkdir_p(dest, 0755); } @@ -1012,7 +993,7 @@ finish: if (mount_outside_mounted) (void) umount_verbose(LOG_DEBUG, mount_outside, UMOUNT_NOFOLLOW); if (mount_outside_created) { - if (is_image || S_ISDIR(st.st_mode)) + if (is_image || S_ISDIR(chased_src_st->st_mode)) (void) rmdir(mount_outside); else (void) unlink(mount_outside); @@ -1021,7 +1002,7 @@ finish: if (mount_tmp_mounted) (void) umount_verbose(LOG_DEBUG, mount_tmp, UMOUNT_NOFOLLOW); if (mount_tmp_created) { - if (is_image || S_ISDIR(st.st_mode)) + if (is_image || S_ISDIR(chased_src_st->st_mode)) (void) rmdir(mount_tmp); else (void) unlink(mount_tmp); @@ -1035,6 +1016,139 @@ finish: return r; } +static int mount_in_namespace( + pid_t target, + const char *propagate_path, + const char *incoming_path, + const char *src, + const char *dest, + bool read_only, + bool make_file_or_directory, + const MountOptions *options, + const ImagePolicy *image_policy, + bool is_image) { + + _cleanup_close_pair_ int errno_pipe_fd[2] = PIPE_EBADF; + _cleanup_close_ int mntns_fd = -EBADF, root_fd = -EBADF, pidns_fd = -EBADF, chased_src_fd = -EBADF, + new_mount_fd = -EBADF; + _cleanup_free_ char *chased_src_path = NULL; + struct stat st; + pid_t child; + int r; + + assert(target > 0); + assert(propagate_path); + assert(incoming_path); + assert(src); + assert(dest); + assert(!options || is_image); + + r = namespace_open(target, &pidns_fd, &mntns_fd, NULL, NULL, &root_fd); + if (r < 0) + return log_debug_errno(r, "Failed to retrieve FDs of the target process' namespace: %m"); + + r = in_same_namespace(target, 0, NAMESPACE_MOUNT); + if (r < 0) + return log_debug_errno(r, "Failed to determine if mount namespaces are equal: %m"); + /* We can't add new mounts at runtime if the process wasn't started in a namespace */ + if (r > 0) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to activate bind mount in target, not running in a mount namespace"); + + r = chase(src, NULL, 0, &chased_src_path, &chased_src_fd); + if (r < 0) + return log_debug_errno(r, "Failed to resolve source path of %s: %m", src); + log_debug("Chased source path of %s to %s", src, chased_src_path); + + if (fstat(chased_src_fd, &st) < 0) + return log_debug_errno(errno, "Failed to stat() resolved source path %s: %m", src); + if (S_ISLNK(st.st_mode)) /* This shouldn't really happen, given that we just chased the symlinks above, but let's better be safe… */ + return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Source directory %s can't be a symbolic link", src); + + if (is_image || !mount_new_api_supported()) /* Fallback if we can't use the new mount API */ + return mount_in_namespace_legacy( + chased_src_path, + chased_src_fd, + &st, + propagate_path, + incoming_path, + dest, + pidns_fd, + mntns_fd, + root_fd, + read_only, + make_file_or_directory, + options, + image_policy, + is_image); + + new_mount_fd = open_tree( + chased_src_fd, + "", + OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC|AT_SYMLINK_NOFOLLOW|AT_EMPTY_PATH); + if (new_mount_fd < 0) + return log_debug_errno( + errno, + "Failed to open mount point \"%s\": %m", + chased_src_path); + + if (read_only && mount_setattr(new_mount_fd, "", AT_EMPTY_PATH, + &(struct mount_attr) { + .attr_set = MOUNT_ATTR_RDONLY, + }, MOUNT_ATTR_SIZE_VER0) < 0) + return log_debug_errno( + errno, + "Failed to set mount flags for \"%s\": %m", + chased_src_path); + + if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0) + return log_debug_errno(errno, "Failed to create pipe: %m"); + + r = namespace_fork("(sd-bindmnt)", + "(sd-bindmnt-inner)", + /* except_fds= */ NULL, + /* n_except_fds= */ 0, + FORK_RESET_SIGNALS|FORK_DEATHSIG, + pidns_fd, + mntns_fd, + /* netns_fd= */ -1, + /* userns_fd= */ -1, + root_fd, + &child); + if (r < 0) + return log_debug_errno(r, "Failed to fork off: %m"); + if (r == 0) { + errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]); + + if (make_file_or_directory) { + (void) mkdir_parents(dest, 0755); + (void) make_mount_point_inode_from_stat(&st, dest, 0700); + } + + if (move_mount(new_mount_fd, "", -EBADF, dest, MOVE_MOUNT_F_EMPTY_PATH) < 0) { + (void) write(errno_pipe_fd[1], &errno, sizeof(errno)); + errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]); + + _exit(EXIT_FAILURE); + } + + _exit(EXIT_SUCCESS); + } + + errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]); + + r = wait_for_terminate_and_check("(sd-bindmnt)", child, 0); + if (r < 0) + return log_debug_errno(r, "Failed to wait for child: %m"); + if (r != EXIT_SUCCESS) { + if (read(errno_pipe_fd[0], &r, sizeof(r)) == sizeof(r)) + return log_debug_errno(r, "Failed to mount: %m"); + + return log_debug_errno(SYNTHETIC_ERRNO(EPROTO), "Child failed."); + } + + return 0; +} + int bind_mount_in_namespace( pid_t target, const char *propagate_path,