From: Lennart Poettering Date: Fri, 5 Jan 2024 15:43:41 +0000 (+0100) Subject: nspawn: expose a dir in the container where it can bind AF_UNIX sockets that will... X-Git-Tag: v256-rc1~1185^2~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=613fb4b6010932efcaa93375eb4e5a06d4087052;p=thirdparty%2Fsystemd.git nspawn: expose a dir in the container where it can bind AF_UNIX sockets that will appear on the host --- diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 4df0a0092d2..7ec9889870c 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -2,7 +2,6 @@ #include #include -#include #include #if HAVE_SELINUX #include @@ -10,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -17,6 +17,8 @@ #include #include +#include /* Must be included after */ + #include "sd-bus.h" #include "sd-daemon.h" #include "sd-id128.h" @@ -3608,6 +3610,102 @@ static int setup_notify_child(void) { return TAKE_FD(fd); } +static int setup_unix_export_dir_outside(char **ret) { + int r; + + assert(ret); + + _cleanup_free_ char *p = NULL; + p = path_join("/run/systemd/nspawn/unix-export", arg_machine); + if (!p) + return log_oom(); + + r = path_is_mount_point(p, /* root= */ NULL, 0); + if (r > 0) + return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Mount point '%s' exists already, refusing.", p); + if (r < 0 && r != -ENOENT) + return log_error_errno(r, "Failed to detect if '%s' is a mount point: %m", p); + + r = mkdir_p(p, 0755); + if (r < 0) + return log_error_errno(r, "Failed to create '%s': %m", p); + + _cleanup_(rmdir_and_freep) char *q = TAKE_PTR(p); + + /* Mount the "unix export" directory really tiny, just 64 inodes. We mark the superblock writable + * (since the container shall bind sockets into it). */ + r = mount_nofollow_verbose( + LOG_ERR, + "tmpfs", + q, + "tmpfs", + MS_NODEV|MS_NOEXEC|MS_NOSUID|ms_nosymfollow_supported(), + "size=4M,nr_inodes=64,mode=0755"); + if (r < 0) + return r; + + _cleanup_(umount_and_rmdir_and_freep) char *w = TAKE_PTR(q); + + /* After creating the superblock we change the bind mount to be read-only. This means that the fs + * itself is writable, but not through the mount accessible from the host. */ + r = mount_nofollow_verbose( + LOG_ERR, + /* source= */ NULL, + w, + /* fstype= */ NULL, + MS_BIND|MS_REMOUNT|MS_RDONLY|MS_NODEV|MS_NOEXEC|MS_NOSUID|ms_nosymfollow_supported(), + /* options= */ NULL); + if (r < 0) + return r; + + *ret = TAKE_PTR(w); + return 0; +} + +static int setup_unix_export_host_inside(const char *directory, const char *unix_export_path) { + int r; + + assert(directory); + assert(unix_export_path); + + r = make_run_host(directory); + if (r < 0) + return r; + + _cleanup_free_ char *p = path_join(directory, "run/host/unix-export"); + if (!p) + return log_oom(); + + if (mkdir(p, 0755) < 0) + return log_error_errno(errno, "Failed to create '%s': %m", p); + + r = mount_nofollow_verbose( + LOG_ERR, + unix_export_path, + p, + /* fstype= */ NULL, + MS_BIND, + /* options= */ NULL); + if (r < 0) + return r; + + r = mount_nofollow_verbose( + LOG_ERR, + /* source= */ NULL, + p, + /* fstype= */ NULL, + MS_BIND|MS_REMOUNT|MS_NODEV|MS_NOEXEC|MS_NOSUID|ms_nosymfollow_supported(), + /* options= */ NULL); + if (r < 0) + return r; + + r = userns_lchown(p, 0, 0); + if (r < 0) + return log_error_errno(r, "Failed to chown '%s': %m", p); + + return 0; +} + static int outer_child( Barrier *barrier, const char *directory, @@ -3615,7 +3713,8 @@ static int outer_child( int fd_outer_socket, int fd_inner_socket, FDSet *fds, - int netns_fd) { + int netns_fd, + const char *unix_export_path) { _cleanup_(bind_user_context_freep) BindUserContext *bind_user_context = NULL; _cleanup_strv_free_ char **os_release_pairs = NULL; @@ -3909,6 +4008,10 @@ static int outer_child( p = prefix_roota(directory, "/run/host"); (void) make_inaccessible_nodes(p, arg_uid_shift, arg_uid_shift); + r = setup_unix_export_host_inside(directory, unix_export_path); + if (r < 0) + return r; + r = setup_pts(directory); if (r < 0) return r; @@ -4760,6 +4863,7 @@ static int run_container( _cleanup_close_ int notify_socket = -EBADF, mntns_fd = -EBADF, fd_kmsg_fifo = -EBADF; _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL; _cleanup_(sd_event_source_unrefp) sd_event_source *notify_event_source = NULL; + _cleanup_(umount_and_rmdir_and_freep) char *unix_export_host_dir = NULL; _cleanup_(sd_event_unrefp) sd_event *event = NULL; _cleanup_(pty_forward_freep) PTYForward *forward = NULL; _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; @@ -4775,6 +4879,11 @@ static int run_container( assert_se(sigemptyset(&mask_chld) == 0); assert_se(sigaddset(&mask_chld, SIGCHLD) == 0); + /* Set up the unix export host directory on the host first */ + r = setup_unix_export_dir_outside(&unix_export_host_dir); + if (r < 0) + return r; + if (arg_userns_mode == USER_NAMESPACE_PICK) { /* When we shall pick the UID/GID range, let's first lock /etc/passwd, so that we can safely * check with getpwuid() if the specific user already exists. Note that /etc might be @@ -4845,7 +4954,8 @@ static int run_container( fd_outer_socket_pair[1], fd_inner_socket_pair[1], fds, - child_netns_fd); + child_netns_fd, + unix_export_host_dir); if (r < 0) _exit(EXIT_FAILURE); @@ -5919,6 +6029,10 @@ finish: p = strjoina("/run/systemd/nspawn/propagate/", arg_machine); (void) rm_rf(p, REMOVE_ROOT); + + p = strjoina("/run/systemd/nspawn/unix-export/", arg_machine); + (void) umount2(p, MNT_DETACH|UMOUNT_NOFOLLOW); + (void) rmdir(p); } expose_port_flush(&fw_ctx, arg_expose_ports, AF_INET, &expose_args.address4);