From: Lennart Poettering Date: Fri, 28 Nov 2025 15:28:42 +0000 (+0100) Subject: mountfsd,nsresource: allow recycling mountfsd/nsresourced client connections X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=68ed4f3c669b3b175222eb6e3ba11cc34fa03b12;p=thirdparty%2Fsystemd.git mountfsd,nsresource: allow recycling mountfsd/nsresourced client connections So far we opened a new Varlink connection for every mountfsd/nsresourced method call. Given each tool only does a very small number of calls (usually 1…5) on them and the connections are cheap this is not too wasteful. Nonetheless, let's do something about it, and allow reusing the connection for multiple calls. This not only makes things a bit more efficient, but has one more important benefit: Varlink connections pin the security context of the client when connecting. This means that varlink method calls done with a connection established while some code was privileged will still operate as privieged once privs are dropped, until the connection is closed. This pinning effect is really nice, as it gives us behaviour in a "capability system" like scheme. Later code is going to use that to continue doing certain priv userns ops even after unsharing userns and becoming fully unpriv. --- diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c index 06e6a3081fc..888a2555fbb 100644 --- a/src/core/exec-invoke.c +++ b/src/core/exec-invoke.c @@ -14,6 +14,7 @@ #include #include "sd-messages.h" +#include "sd-varlink.h" #include "apparmor-util.h" /* IWYU pragma: keep */ #include "argv-util.h" @@ -2397,6 +2398,7 @@ static int setup_private_users_child(int unshare_ready_fd, const char *uid_map, } static int setup_private_users( + sd_varlink *nsresource_link, PrivateUsers private_users, uid_t saved_uid, /* service manager uid */ gid_t saved_gid, /* service manager gid */ @@ -2438,7 +2440,10 @@ static int setup_private_users( if (uid_is_valid(*uid) || uid_is_valid(*gid)) return log_debug_errno(SYNTHETIC_ERRNO(EPERM), "When allocating dynamic user namespace range, target UID/GID must be root, refusing."); - _cleanup_close_ int userns_fd = nsresource_allocate_userns(/* name= */ NULL, NSRESOURCE_UIDS_64K); + _cleanup_close_ int userns_fd = nsresource_allocate_userns( + nsresource_link, + /* name= */ NULL, + NSRESOURCE_UIDS_64K); if (userns_fd < 0) return userns_fd; @@ -3786,6 +3791,7 @@ static int apply_mount_namespace( PidRef *bpffs_pidref, int bpffs_socket_fd, int bpffs_errno_pipe, + sd_varlink *mountfsd_link, char **reterr_path) { _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT; @@ -4005,6 +4011,8 @@ static int apply_mount_namespace( .bpffs_pidref = bpffs_pidref, .bpffs_socket_fd = bpffs_socket_fd, .bpffs_errno_pipe = bpffs_errno_pipe, + + .mountfsd_link = mountfsd_link, }; r = setup_namespace(¶meters, reterr_path); @@ -4659,6 +4667,7 @@ static int setup_delegated_namespaces( PidRef *bpffs_pidref, int bpffs_socket_fd, int bpffs_errno_pipe, + sd_varlink *mountfsd_link, int *reterr_exit_status) { int r; @@ -4773,18 +4782,20 @@ static int setup_delegated_namespaces( exec_namespace_is_delegated(context, params, have_cap_sys_admin, CLONE_NEWNS) == delegate) { _cleanup_free_ char *error_path = NULL; - r = apply_mount_namespace(command->flags, - context, - params, - runtime, + r = apply_mount_namespace( + command->flags, + context, + params, + runtime, memory_pressure_path, - needs_sandboxing, - uid, - gid, - bpffs_pidref, - bpffs_socket_fd, - bpffs_errno_pipe, - &error_path); + needs_sandboxing, + uid, + gid, + bpffs_pidref, + bpffs_socket_fd, + bpffs_errno_pipe, + mountfsd_link, + &error_path); if (r < 0) { *reterr_exit_status = EXIT_NAMESPACE; return log_error_errno(r, "Failed to set up mount namespacing%s%s: %m", @@ -5744,6 +5755,24 @@ int exec_invoke( } } + _cleanup_(sd_varlink_unrefp) sd_varlink *mountfsd_link = NULL, *nsresource_link = NULL; + if (needs_sandboxing && + exec_context_get_effective_private_users(context, params) == PRIVATE_USERS_MANAGED) { + + /* In managed mode we need to allocate a userns via nsresource, and then assign mounts to + * it. We must do so with our original privileges (since after creating the userns, we might + * simply not have the necessary privs for the IPC calls anymore), hence do this here, ahead + * of time. */ + + r = mountfsd_connect(&mountfsd_link); + if (r < 0) + return log_error_errno(r, "Failed to connect to mountfsd: %m"); + + r = nsresource_connect(&nsresource_link); + if (r < 0) + return log_error_errno(r, "Failed to connect to nsresourced: %m"); + } + needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime); for (ExecDirectoryType dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) { @@ -5939,6 +5968,7 @@ int exec_invoke( /* The kernel requires /proc/pid/setgroups be set to "deny" prior to writing /proc/pid/gid_map in * unprivileged user namespaces. */ r = setup_private_users( + nsresource_link, pu, saved_uid, saved_gid, @@ -5977,6 +6007,7 @@ int exec_invoke( &bpffs_pidref, bpffs_socket_fd, bpffs_errno_pipe, + mountfsd_link, exit_status); if (r < 0) return r; @@ -6027,6 +6058,7 @@ int exec_invoke( PrivateUsers pu = exec_context_get_effective_private_users(context, params); r = setup_private_users( + nsresource_link, pu, saved_uid, saved_gid, @@ -6071,10 +6103,15 @@ int exec_invoke( &bpffs_pidref, bpffs_socket_fd, bpffs_errno_pipe, + mountfsd_link, exit_status); if (r < 0) return r; + /* We are done now with the nsresourced/mountfsd shenanigans, let's close the connections */ + nsresource_link = sd_varlink_unref(nsresource_link); + mountfsd_link = sd_varlink_unref(mountfsd_link); + /* Kill unnecessary process, for the case that e.g. when the bpffs mount point is hidden. */ pidref_done_sigkill_wait(&bpffs_pidref); diff --git a/src/core/namespace.c b/src/core/namespace.c index e348d26c43b..504f80cb635 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -2667,6 +2667,7 @@ int setup_namespace(const NamespaceParameters *p, char **reterr_path) { return log_debug_errno(userns_fd, "Failed to open our own user namespace: %m"); r = mountfsd_mount_image( + p->mountfsd_link, p->root_image, userns_fd, p->root_image_options, @@ -2678,7 +2679,6 @@ int setup_namespace(const NamespaceParameters *p, char **reterr_path) { return r; } } - } else if (p->root_mstack) { if (namespace_read_only(p)) mstack_flags |= MSTACK_RDONLY; @@ -2693,7 +2693,13 @@ int setup_namespace(const NamespaceParameters *p, char **reterr_path) { return log_debug_errno(userns_fd, "Failed to open our own user namespace: %m"); } - r = mstack_open_images(mstack, userns_fd, p->root_image_policy, /* image_filter= */ NULL, mstack_flags); + r = mstack_open_images( + mstack, + p->mountfsd_link, + userns_fd, + p->root_image_policy, + /* image_filter= */ NULL, + mstack_flags); if (r < 0) return r; } diff --git a/src/core/namespace.h b/src/core/namespace.h index 4b62debf2fc..26b0bf8ff2d 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -205,6 +205,8 @@ typedef struct NamespaceParameters { PidRef *bpffs_pidref; int bpffs_socket_fd; int bpffs_errno_pipe; + + sd_varlink *mountfsd_link; } NamespaceParameters; int setup_namespace(const NamespaceParameters *p, char **reterr_path); diff --git a/src/dissect/dissect.c b/src/dissect/dissect.c index 7303cfc3bd8..33ddb53d0a6 100644 --- a/src/dissect/dissect.c +++ b/src/dissect/dissect.c @@ -2247,12 +2247,16 @@ static int run(int argc, char *argv[]) { /* Don't run things in private userns, if the mount shall be attached to the host * or if we're copying from/to the host. */ if (!IN_SET(arg_action, ACTION_MOUNT, ACTION_WITH, ACTION_COPY_FROM, ACTION_COPY_TO)) { - userns_fd = nsresource_allocate_userns(/* name= */ NULL, NSRESOURCE_UIDS_64K); /* allocate 64K users by default */ + userns_fd = nsresource_allocate_userns( + /* vl= */ NULL, + /* name= */ NULL, + NSRESOURCE_UIDS_64K); /* allocate 64K users by default */ if (userns_fd < 0) return log_error_errno(userns_fd, "Failed to allocate user namespace with 64K users: %m"); } r = mountfsd_mount_image( + /* vl= */ NULL, arg_image, userns_fd, /* options= */ NULL, diff --git a/src/import/export-tar.c b/src/import/export-tar.c index 81fab3714bd..22f731de574 100644 --- a/src/import/export-tar.c +++ b/src/import/export-tar.c @@ -363,7 +363,12 @@ int tar_export_start( return log_error_errno(r, "Failed to open '%s': %m", p); _cleanup_close_ int mapped_fd = -EBADF; - r = mountfsd_mount_directory_fd(directory_fd, e->userns_fd, DISSECT_IMAGE_FOREIGN_UID, &mapped_fd); + r = mountfsd_mount_directory_fd( + /* vl= */ NULL, + directory_fd, + e->userns_fd, + DISSECT_IMAGE_FOREIGN_UID, + &mapped_fd); if (r < 0) return log_error_errno(r, "Failed to mount directory via mountfsd: %m"); diff --git a/src/import/import-common.c b/src/import/import-common.c index 0c448355fdc..0a5144f94ec 100644 --- a/src/import/import-common.c +++ b/src/import/import-common.c @@ -375,7 +375,10 @@ int import_make_foreign_userns(int *userns_fd) { if (*userns_fd >= 0) return 0; - *userns_fd = nsresource_allocate_userns(/* name= */ NULL, NSRESOURCE_UIDS_64K); /* allocate 64K users */ + *userns_fd = nsresource_allocate_userns( + /* vl= */ NULL, + /* name= */ NULL, + NSRESOURCE_UIDS_64K); /* allocate 64K users */ if (*userns_fd < 0) return log_error_errno(*userns_fd, "Failed to allocate transient user namespace: %m"); diff --git a/src/import/import-tar.c b/src/import/import-tar.c index b3d3fc61040..5e74de896e9 100644 --- a/src/import/import-tar.c +++ b/src/import/import-tar.c @@ -4,6 +4,7 @@ #include "sd-daemon.h" #include "sd-event.h" +#include "sd-varlink.h" #include "alloc-util.h" #include "btrfs-util.h" @@ -256,12 +257,27 @@ static int tar_import_fork_tar(TarImport *i) { if (r < 0) return r; + _cleanup_(sd_varlink_unrefp) sd_varlink *mountfsd_link = NULL; + r = mountfsd_connect(&mountfsd_link); + if (r < 0) + return log_error_errno(r, "Failed to connect to mountfsd: %m"); + _cleanup_close_ int directory_fd = -EBADF; - r = mountfsd_make_directory(d, MODE_INVALID, /* flags= */ 0, &directory_fd); + r = mountfsd_make_directory( + mountfsd_link, + d, + MODE_INVALID, + /* flags= */ 0, + &directory_fd); if (r < 0) return log_error_errno(r, "Failed to make directory via mountfsd: %m"); - r = mountfsd_mount_directory_fd(directory_fd, i->userns_fd, DISSECT_IMAGE_FOREIGN_UID, &i->tree_fd); + r = mountfsd_mount_directory_fd( + mountfsd_link, + directory_fd, + i->userns_fd, + DISSECT_IMAGE_FOREIGN_UID, + &i->tree_fd); if (r < 0) return log_error_errno(r, "Failed mount directory via mountfsd: %m"); } else { diff --git a/src/import/pull-oci.c b/src/import/pull-oci.c index d55bd43f570..3abb8777d1f 100644 --- a/src/import/pull-oci.c +++ b/src/import/pull-oci.c @@ -464,12 +464,27 @@ static int oci_pull_job_on_open_disk(PullJob *j) { if (r < 0) return r; + _cleanup_(sd_varlink_unrefp) sd_varlink *mountfsd_link = NULL; + r = mountfsd_connect(&mountfsd_link); + if (r < 0) + return log_error_errno(r, "Failed to connect to mountfsd: %m"); + _cleanup_close_ int directory_fd = -EBADF; - r = mountfsd_make_directory(st->temp_path, MODE_INVALID, /* flags= */ 0, &directory_fd); + r = mountfsd_make_directory( + mountfsd_link, + st->temp_path, + MODE_INVALID, + /* flags= */ 0, + &directory_fd); if (r < 0) return log_error_errno(r, "Failed to make directory via mountfsd: %m"); - r = mountfsd_mount_directory_fd(directory_fd, i->userns_fd, DISSECT_IMAGE_FOREIGN_UID, &st->tree_fd); + r = mountfsd_mount_directory_fd( + mountfsd_link, + directory_fd, + i->userns_fd, + DISSECT_IMAGE_FOREIGN_UID, + &st->tree_fd); if (r < 0) return log_error_errno(r, "Failed to mount directory via mountsd: %m"); } else { @@ -1140,6 +1155,7 @@ static int oci_pull_save_mstack(OciPull *i) { return r; r = mountfsd_make_directory_fd( + /* vl= */ NULL, dir_fd, "rw", 0755, diff --git a/src/import/pull-tar.c b/src/import/pull-tar.c index b3d80921067..ae763879b78 100644 --- a/src/import/pull-tar.c +++ b/src/import/pull-tar.c @@ -2,6 +2,7 @@ #include "sd-daemon.h" #include "sd-event.h" +#include "sd-varlink.h" #include "alloc-util.h" #include "btrfs-util.h" @@ -276,6 +277,11 @@ static int tar_pull_make_local_copy(TarPull *p) { if (r < 0) return r; + _cleanup_(sd_varlink_unrefp) sd_varlink *mountfsd_link = NULL; + r = mountfsd_connect(&mountfsd_link); + if (r < 0) + return log_error_errno(r, "Failed to connect to mountsd: %m"); + /* Usually, tar_pull_job_on_open_disk_tar() would allocate ->tree_fd for us, but if * already downloaded the image before, and are just making a copy of the original * download, we need to open ->tree_fd now */ @@ -294,18 +300,33 @@ static int tar_pull_make_local_copy(TarPull *p) { "Image tree '%s' is not owned by the foreign UID range, refusing.", p->final_path); - r = mountfsd_mount_directory_fd(directory_fd, p->userns_fd, DISSECT_IMAGE_FOREIGN_UID, &p->tree_fd); + r = mountfsd_mount_directory_fd( + mountfsd_link, + directory_fd, + p->userns_fd, + DISSECT_IMAGE_FOREIGN_UID, + &p->tree_fd); if (r < 0) return log_error_errno(r, "Failed to mount directory via mountfsd: %m"); } _cleanup_close_ int directory_fd = -EBADF; - r = mountfsd_make_directory(t, MODE_INVALID, /* flags= */ 0, &directory_fd); + r = mountfsd_make_directory( + mountfsd_link, + t, + MODE_INVALID, + /* flags= */ 0, + &directory_fd); if (r < 0) return log_error_errno(r, "Failed to make directory via mountfsd: %m"); _cleanup_close_ int copy_fd = -EBADF; - r = mountfsd_mount_directory_fd(directory_fd, p->userns_fd, DISSECT_IMAGE_FOREIGN_UID, ©_fd); + r = mountfsd_mount_directory_fd( + mountfsd_link, + directory_fd, + p->userns_fd, + DISSECT_IMAGE_FOREIGN_UID, + ©_fd); if (r < 0) return log_error_errno(r, "Failed to mount directory via mountfsd: %m"); @@ -611,12 +632,27 @@ static int tar_pull_job_on_open_disk_tar(PullJob *j) { if (r < 0) return r; + _cleanup_(sd_varlink_unrefp) sd_varlink *mountfsd_link = NULL; + r = mountfsd_connect(&mountfsd_link); + if (r < 0) + return log_error_errno(r, "Failed to connect to mountfsd: %m"); + _cleanup_close_ int directory_fd = -EBADF; - r = mountfsd_make_directory(where, MODE_INVALID, /* flags= */ 0, &directory_fd); + r = mountfsd_make_directory( + mountfsd_link, + where, + MODE_INVALID, + /* flags= */ 0, + &directory_fd); if (r < 0) return log_error_errno(r, "Failed to make directory via mountfsd: %m"); - r = mountfsd_mount_directory_fd(directory_fd, p->userns_fd, DISSECT_IMAGE_FOREIGN_UID, &p->tree_fd); + r = mountfsd_mount_directory_fd( + mountfsd_link, + directory_fd, + p->userns_fd, + DISSECT_IMAGE_FOREIGN_UID, + &p->tree_fd); if (r < 0) return log_error_errno(r, "Failed to mount directory via mountfsd: %m"); } else { diff --git a/src/mountfsd/mountwork.c b/src/mountfsd/mountwork.c index 7b8812f03a7..25f1a845516 100644 --- a/src/mountfsd/mountwork.c +++ b/src/mountfsd/mountwork.c @@ -662,6 +662,7 @@ static int vl_method_mount_image( if (r < 0) return r; + _cleanup_(sd_varlink_unrefp) sd_varlink *nsresource_link = NULL; for (PartitionDesignator d = 0; d < _PARTITION_DESIGNATOR_MAX; d++) { DissectedPartition *pp = di->partitions + d; int fd_idx; @@ -673,7 +674,14 @@ static int vl_method_mount_image( continue; if (userns_fd >= 0) { - r = nsresource_add_mount(userns_fd, pp->fsmount_fd); + + if (!nsresource_link) { + r = nsresource_connect(&nsresource_link); + if (r < 0) + return r; + } + + r = nsresource_add_mount(nsresource_link, userns_fd, pp->fsmount_fd); if (r < 0) return r; } @@ -1206,7 +1214,7 @@ static int vl_method_mount_directory( } if (userns_fd >= 0) { - r = nsresource_add_mount(userns_fd, mount_fd); + r = nsresource_add_mount(/* vl= */ NULL, userns_fd, mount_fd); if (r < 0) return r; } diff --git a/src/mstack/mstack-tool.c b/src/mstack/mstack-tool.c index 01a71d6e567..bc86d8565a3 100644 --- a/src/mstack/mstack-tool.c +++ b/src/mstack/mstack-tool.c @@ -361,6 +361,7 @@ static int mount_mstack(void) { /* dir_fd= */ -EBADF, arg_where, /* temp_mount_dir= */ NULL, /* auto-create temporary directory */ + /* mountfsd_link= */ NULL, /* userns_fd= */ -EBADF, arg_image_policy, arg_image_filter, diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c index 4206fe94e52..f1d45bbb9b8 100644 --- a/src/nspawn/nspawn-cgroup.c +++ b/src/nspawn/nspawn-cgroup.c @@ -108,7 +108,10 @@ int create_subcgroup( if (r < 0) return log_error_errno(r, "Failed to add process " PID_FMT " to cgroup %s: %m", pid->pid, payload); - r = nsresource_add_cgroup(userns_fd, cgroup_fd); + r = nsresource_add_cgroup( + /* vl= */ NULL, + userns_fd, + cgroup_fd); if (r < 0) return log_error_errno(r, "Failed to add cgroup %s to userns: %m", payload); } else { diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 47300ff7e3c..08afa171ae8 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -19,6 +19,7 @@ #include "sd-id128.h" #include "sd-netlink.h" #include "sd-path.h" +#include "sd-varlink.h" #include "alloc-util.h" #include "barrier.h" @@ -5442,7 +5443,13 @@ static int run_container( } else { _cleanup_free_ char *host_ifname = NULL; - r = nsresource_add_netif_veth(userns_fd, child_netns_fd, /* namespace_ifname= */ NULL, &host_ifname, /* ret_namespace_ifname= */ NULL); + r = nsresource_add_netif_veth( + /* vl= */ NULL, + userns_fd, + child_netns_fd, + /* namespace_ifname= */ NULL, + &host_ifname, + /* ret_namespace_ifname= */ NULL); if (r < 0) return log_error_errno(r, "Failed to add network interface to container: %m"); @@ -6030,6 +6037,7 @@ static int run(int argc, char *argv[]) { _cleanup_(mstack_freep) MStack *mstack = NULL; _cleanup_(sd_netlink_unrefp) sd_netlink *nfnl = NULL; _cleanup_(pidref_done) PidRef pid = PIDREF_NULL; + _cleanup_(sd_varlink_unrefp) sd_varlink *nsresource_link = NULL, *mountfsd_link = NULL; log_setup(); @@ -6132,13 +6140,28 @@ static int run(int argc, char *argv[]) { if (arg_userns_mode == USER_NAMESPACE_MANAGED) { /* Let's allocate a 64K userns first, if managed mode is chosen */ + r = nsresource_connect(&nsresource_link); + if (r < 0) { + log_error_errno(r, "Failed to connect to nsresourced: %m"); + goto finish; + } + + r = mountfsd_connect(&mountfsd_link); + if (r < 0) { + log_error_errno(r, "Failed to connect to mountsd: %m"); + goto finish; + } + _cleanup_free_ char *userns_name = NULL; if (asprintf(&userns_name, "nspawn-" PID_FMT "-%s", getpid_cached(), arg_machine) < 0) { r = log_oom(); goto finish; } - userns_fd = nsresource_allocate_userns(userns_name, NSRESOURCE_UIDS_64K); /* allocate 64K UIDs */ + userns_fd = nsresource_allocate_userns( + nsresource_link, + userns_name, + NSRESOURCE_UIDS_64K); /* allocate 64K UIDs */ if (userns_fd < 0) { r = log_error_errno(userns_fd, "Failed to allocate user namespace with 64K users: %m"); goto finish; @@ -6293,6 +6316,7 @@ static int run(int argc, char *argv[]) { if (userns_fd >= 0) { r = mountfsd_mount_directory( + mountfsd_link, arg_directory, userns_fd, determine_dissect_image_flags(), @@ -6443,6 +6467,7 @@ static int run(int argc, char *argv[]) { goto finish; } else { r = mountfsd_mount_image( + mountfsd_link, arg_image, userns_fd, /* options= */ NULL, @@ -6485,6 +6510,7 @@ static int run(int argc, char *argv[]) { r = mstack_open_images( mstack, + mountfsd_link, userns_fd, arg_image_policy, /* image_filter= */ NULL, @@ -6507,6 +6533,9 @@ static int run(int argc, char *argv[]) { if (r < 0) goto finish; + mountfsd_link = sd_varlink_unref(mountfsd_link); + nsresource_link = sd_varlink_unref(nsresource_link); + if (!arg_quiet) { const char *t = arg_mstack ?: arg_image ?: arg_directory; _cleanup_free_ char *u = NULL; @@ -6547,7 +6576,8 @@ static int run(int argc, char *argv[]) { mstack, userns_fd, fds, - veth_name, &veth_created, + veth_name, + &veth_created, &expose_args, &master, &pid, &ret); if (r <= 0) diff --git a/src/portable/portable.c b/src/portable/portable.c index 191125b9c58..2fe015d94ca 100644 --- a/src/portable/portable.c +++ b/src/portable/portable.c @@ -6,6 +6,7 @@ #include "sd-bus.h" #include "sd-messages.h" +#include "sd-varlink.h" #include "bus-common-errors.h" #include "bus-error.h" @@ -483,12 +484,20 @@ static int portable_extract_by_path( return log_error_errno(r, "Failed to extract image name from path '%s': %m", path); if (scope == RUNTIME_SCOPE_USER && uid_is_foreign(st.st_uid)) { - _cleanup_close_ int userns_fd = nsresource_allocate_userns(/* name= */ NULL, NSRESOURCE_UIDS_64K); + _cleanup_close_ int userns_fd = nsresource_allocate_userns( + /* vl= */ NULL, + /* name= */ NULL, + NSRESOURCE_UIDS_64K); if (userns_fd < 0) return log_debug_errno(userns_fd, "Failed to allocate user namespace: %m"); _cleanup_close_ int mfd = -EBADF; - r = mountfsd_mount_directory_fd(rfd, userns_fd, DISSECT_IMAGE_FOREIGN_UID, &mfd); + r = mountfsd_mount_directory_fd( + /* vl= */ NULL, + rfd, + userns_fd, + DISSECT_IMAGE_FOREIGN_UID, + &mfd); if (r < 0) return r; @@ -604,11 +613,15 @@ static int portable_extract_by_path( return log_debug_errno(r, "Failed to create temporary directory: %m"); if (scope == RUNTIME_SCOPE_USER) { - userns_fd = nsresource_allocate_userns(/* name= */ NULL, NSRESOURCE_UIDS_64K); + userns_fd = nsresource_allocate_userns( + /* vl= */ NULL, + /* name= */ NULL, + NSRESOURCE_UIDS_64K); if (userns_fd < 0) return log_debug_errno(userns_fd, "Failed to allocate user namespace: %m"); r = mountfsd_mount_image_fd( + /* vl= */ NULL, rfd, userns_fd, /* options= */ NULL, @@ -1808,7 +1821,10 @@ static int install_image( if (flags & PORTABLE_MIXED_COPY_LINK) { if (scope == RUNTIME_SCOPE_USER) { - _cleanup_close_ int userns_fd = nsresource_allocate_userns(/* name= */ NULL, NSRESOURCE_UIDS_64K); + _cleanup_close_ int userns_fd = nsresource_allocate_userns( + /* vl= */ NULL, + /* name= */ NULL, + NSRESOURCE_UIDS_64K); if (userns_fd < 0) return log_debug_errno(userns_fd, "Failed to allocate user namespace: %m"); @@ -1820,21 +1836,31 @@ static int install_image( if (fstat(fd, &st) < 0) return log_error_errno(errno, "Failed to stat '%s': %m", image_path); + _cleanup_(sd_varlink_unrefp) sd_varlink *mountfsd_link = NULL; + r = mountfsd_connect(&mountfsd_link); + if (r < 0) + return r; + _cleanup_close_ int tree_fd = -EBADF; if (uid_is_foreign(st.st_uid)) { - r = mountfsd_mount_directory_fd(fd, userns_fd, DISSECT_IMAGE_FOREIGN_UID, &tree_fd); + r = mountfsd_mount_directory_fd( + mountfsd_link, + fd, + userns_fd, + DISSECT_IMAGE_FOREIGN_UID, + &tree_fd); if (r < 0) return r; } else tree_fd = TAKE_FD(fd); _cleanup_close_ int directory_fd = -EBADF; - r = mountfsd_make_directory(target, MODE_INVALID, /* flags= */ 0, &directory_fd); + r = mountfsd_make_directory(mountfsd_link, target, MODE_INVALID, /* flags= */ 0, &directory_fd); if (r < 0) return r; _cleanup_close_ int copy_fd = -EBADF; - r = mountfsd_mount_directory_fd(directory_fd, userns_fd, DISSECT_IMAGE_FOREIGN_UID, ©_fd); + r = mountfsd_mount_directory_fd(mountfsd_link, directory_fd, userns_fd, DISSECT_IMAGE_FOREIGN_UID, ©_fd); if (r < 0) return r; diff --git a/src/shared/discover-image.c b/src/shared/discover-image.c index 064a3e8002a..0575310a2c2 100644 --- a/src/shared/discover-image.c +++ b/src/shared/discover-image.c @@ -11,6 +11,7 @@ #include "sd-json.h" #include "sd-path.h" +#include "sd-varlink.h" #include "alloc-util.h" #include "blockdev-util.h" @@ -1279,12 +1280,16 @@ static int unprivileged_remove(Image *i) { assert(i); - _cleanup_close_ int userns_fd = nsresource_allocate_userns(/* name= */ NULL, /* size= */ NSRESOURCE_UIDS_64K); + _cleanup_close_ int userns_fd = nsresource_allocate_userns( + /* vl= */ NULL, + /* name= */ NULL, + /* size= */ NSRESOURCE_UIDS_64K); if (userns_fd < 0) return log_debug_errno(userns_fd, "Failed to allocate transient user namespace: %m"); _cleanup_close_ int tree_fd = -EBADF; r = mountfsd_mount_directory( + /* vl= */ NULL, i->path, userns_fd, DISSECT_IMAGE_FOREIGN_UID, @@ -1623,13 +1628,22 @@ static int unprivileged_clone(Image *i, const char *new_path) { assert(i); assert(new_path); - _cleanup_close_ int userns_fd = nsresource_allocate_userns(/* name= */ NULL, /* size= */ NSRESOURCE_UIDS_64K); + _cleanup_close_ int userns_fd = nsresource_allocate_userns( + /* vl= */ NULL, + /* name= */ NULL, + /* size= */ NSRESOURCE_UIDS_64K); if (userns_fd < 0) return log_debug_errno(userns_fd, "Failed to allocate transient user namespace: %m"); + _cleanup_(sd_varlink_unrefp) sd_varlink *link = NULL; + r = mountfsd_connect(&link); + if (r < 0) + return r; + /* Map original image */ _cleanup_close_ int tree_fd = -EBADF; r = mountfsd_mount_directory( + link, i->path, userns_fd, DISSECT_IMAGE_FOREIGN_UID, @@ -1640,6 +1654,7 @@ static int unprivileged_clone(Image *i, const char *new_path) { /* Make new image */ _cleanup_close_ int new_fd = -EBADF; r = mountfsd_make_directory( + link, new_path, MODE_INVALID, /* flags= */ 0, @@ -1650,6 +1665,7 @@ static int unprivileged_clone(Image *i, const char *new_path) { /* Mount new image */ _cleanup_close_ int target_fd = -EBADF; r = mountfsd_mount_directory_fd( + link, new_fd, userns_fd, DISSECT_IMAGE_FOREIGN_UID, @@ -1657,6 +1673,8 @@ static int unprivileged_clone(Image *i, const char *new_path) { if (r < 0) return r; + link = sd_varlink_unref(link); + /* Fork off child that moves into userns and does the copying */ return copy_tree_at_foreign(tree_fd, target_fd, userns_fd); } diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c index 0c0274fbc52..eb4b6bd6099 100644 --- a/src/shared/dissect-image.c +++ b/src/shared/dissect-image.c @@ -4958,6 +4958,7 @@ int verity_dissect_and_mount( return log_debug_errno(userns_fd, "Failed to open our own user namespace: %m"); r = mountfsd_mount_image( + /* vl= */ NULL, src_fd >= 0 ? FORMAT_PROC_FD_PATH(src_fd) : src, userns_fd, options, @@ -5125,7 +5126,30 @@ static void mount_image_reply_parameters_done(MountImageReplyParameters *p) { #endif +int mountfsd_connect(sd_varlink **ret) { + int r; + + assert(ret); + + _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; + r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.MountFileSystem"); + if (r < 0) + return log_debug_errno(r, "Failed to connect to mountfsd: %m"); + + r = sd_varlink_set_allow_fd_passing_input(vl, true); + if (r < 0) + return log_debug_errno(r, "Failed to enable varlink fd passing for read: %m"); + + r = sd_varlink_set_allow_fd_passing_output(vl, true); + if (r < 0) + return log_debug_errno(r, "Failed to enable varlink fd passing for write: %m"); + + *ret = TAKE_PTR(vl); + return 0; +} + int mountfsd_mount_image_fd( + sd_varlink *vl, int image_fd, int userns_fd, const MountOptions *options, @@ -5149,7 +5173,6 @@ int mountfsd_mount_image_fd( _cleanup_(dissected_image_unrefp) DissectedImage *di = NULL; _cleanup_close_ int verity_data_fd = -EBADF; - _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; _cleanup_free_ char *ps = NULL; const char *error_id; int r; @@ -5157,17 +5180,14 @@ int mountfsd_mount_image_fd( assert(image_fd >= 0); assert(ret); - r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.MountFileSystem"); - if (r < 0) - return log_error_errno(r, "Failed to connect to mountfsd: %m"); - - r = sd_varlink_set_allow_fd_passing_input(vl, true); - if (r < 0) - return log_error_errno(r, "Failed to enable varlink fd passing for read: %m"); + _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL; + if (!vl) { + r = mountfsd_connect(&_vl); + if (r < 0) + return r; - r = sd_varlink_set_allow_fd_passing_output(vl, true); - if (r < 0) - return log_error_errno(r, "Failed to enable varlink fd passing for write: %m"); + vl = _vl; + } _cleanup_close_ int reopened_fd = -EBADF; @@ -5337,6 +5357,7 @@ int mountfsd_mount_image_fd( } int mountfsd_mount_image( + sd_varlink *vl, const char *path, int userns_fd, const MountOptions *options, @@ -5355,7 +5376,7 @@ int mountfsd_mount_image( return log_debug_errno(errno, "Failed to open '%s': %m", path); _cleanup_(dissected_image_unrefp) DissectedImage *di = NULL; - r = mountfsd_mount_image_fd(image_fd, userns_fd, options, image_policy, verity, flags, &di); + r = mountfsd_mount_image_fd(vl, image_fd, userns_fd, options, image_policy, verity, flags, &di); if (r < 0) return r; @@ -5370,6 +5391,7 @@ int mountfsd_mount_image( } int mountfsd_mount_directory_fd( + sd_varlink *vl, int directory_fd, int userns_fd, DissectImageFlags flags, @@ -5383,18 +5405,14 @@ int mountfsd_mount_directory_fd( /* Pick one identity, not both, that makes no sense. */ assert(!FLAGS_SET(flags, DISSECT_IMAGE_FOREIGN_UID|DISSECT_IMAGE_IDENTITY_UID)); - _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; - r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.MountFileSystem"); - if (r < 0) - return log_error_errno(r, "Failed to connect to mountfsd: %m"); - - r = sd_varlink_set_allow_fd_passing_input(vl, true); - if (r < 0) - return log_error_errno(r, "Failed to enable varlink fd passing for read: %m"); + _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL; + if (!vl) { + r = mountfsd_connect(&_vl); + if (r < 0) + return r; - r = sd_varlink_set_allow_fd_passing_output(vl, true); - if (r < 0) - return log_error_errno(r, "Failed to enable varlink fd passing for write: %m"); + vl = _vl; + } r = sd_varlink_push_dup_fd(vl, directory_fd); if (r < 0) @@ -5441,6 +5459,7 @@ int mountfsd_mount_directory_fd( } int mountfsd_mount_directory( + sd_varlink *vl, const char *path, int userns_fd, DissectImageFlags flags, @@ -5453,10 +5472,11 @@ int mountfsd_mount_directory( if (directory_fd < 0) return log_debug_errno(errno, "Failed to open '%s': %m", path); - return mountfsd_mount_directory_fd(directory_fd, userns_fd, flags, ret_mount_fd); + return mountfsd_mount_directory_fd(vl, directory_fd, userns_fd, flags, ret_mount_fd); } int mountfsd_make_directory_fd( + sd_varlink *vl, int parent_fd, const char *name, mode_t mode, @@ -5468,18 +5488,14 @@ int mountfsd_make_directory_fd( assert(parent_fd >= 0); assert(name); - _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; - r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.MountFileSystem"); - if (r < 0) - return log_error_errno(r, "Failed to connect to mountfsd: %m"); - - r = sd_varlink_set_allow_fd_passing_input(vl, true); - if (r < 0) - return log_error_errno(r, "Failed to enable varlink fd passing for read: %m"); + _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL; + if (!vl) { + r = mountfsd_connect(&_vl); + if (r < 0) + return r; - r = sd_varlink_set_allow_fd_passing_output(vl, true); - if (r < 0) - return log_error_errno(r, "Failed to enable varlink fd passing for write: %m"); + vl = _vl; + } r = sd_varlink_push_dup_fd(vl, parent_fd); if (r < 0) @@ -5519,6 +5535,7 @@ int mountfsd_make_directory_fd( } int mountfsd_make_directory( + sd_varlink *vl, const char *path, mode_t mode, DissectImageFlags flags, @@ -5540,7 +5557,7 @@ int mountfsd_make_directory( if (fd < 0) return log_debug_errno(r, "Failed to open '%s': %m", parent); - return mountfsd_make_directory_fd(fd, dirname, mode, flags, ret_directory_fd); + return mountfsd_make_directory_fd(vl, fd, dirname, mode, flags, ret_directory_fd); } int copy_tree_at_foreign(int source_fd, int target_fd, int userns_fd) { @@ -5600,6 +5617,7 @@ int remove_tree_foreign(const char *path, int userns_fd) { _cleanup_close_ int tree_fd = -EBADF; r = mountfsd_mount_directory( + /* vl= */ NULL, path, userns_fd, DISSECT_IMAGE_FOREIGN_UID, @@ -5611,7 +5629,7 @@ int remove_tree_foreign(const char *path, int userns_fd) { "rm-tree", /* stdio_fds= */ NULL, (int[]) { userns_fd, tree_fd }, 2, - FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_LOG|FORK_REOPEN_LOG|FORK_WAIT, + FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_REOPEN_LOG, /* ret= */ NULL); if (r < 0) return r; @@ -5625,19 +5643,19 @@ int remove_tree_foreign(const char *path, int userns_fd) { userns_fd, /* root_fd= */ -EBADF); if (r < 0) { - log_error_errno(r, "Failed to join user namespace: %m"); + log_debug_errno(r, "Failed to join user namespace: %m"); _exit(EXIT_FAILURE); } _cleanup_close_ int dfd = fd_reopen(tree_fd, O_DIRECTORY|O_CLOEXEC); if (dfd < 0) { - log_error_errno(r, "Failed to reopen tree fd: %m"); + log_debug_errno(r, "Failed to reopen tree fd: %m"); _exit(EXIT_FAILURE); } r = rm_rf_children(dfd, REMOVE_PHYSICAL|REMOVE_SUBVOLUME|REMOVE_CHMOD, /* root_dev= */ NULL); if (r < 0) - log_warning_errno(r, "Failed to empty '%s' directory in foreign UID mode, ignoring: %m", path); + log_debug_errno(r, "Failed to empty '%s' directory in foreign UID mode, ignoring: %m", path); _exit(EXIT_SUCCESS); } diff --git a/src/shared/dissect-image.h b/src/shared/dissect-image.h index aa8c6a2737b..09d7db5952b 100644 --- a/src/shared/dissect-image.h +++ b/src/shared/dissect-image.h @@ -269,13 +269,23 @@ static inline const char* dissected_partition_fstype(const DissectedPartition *m int get_common_dissect_directory(char **ret); -int mountfsd_mount_image_fd(int image_fd, int userns_fd, const MountOptions *options, const ImagePolicy *image_policy, const VeritySettings *verity, DissectImageFlags flags, DissectedImage **ret); -int mountfsd_mount_image(const char *path, int userns_fd, const MountOptions *options, const ImagePolicy *image_policy, const VeritySettings *verity, DissectImageFlags flags, DissectedImage **ret); -int mountfsd_mount_directory_fd(int directory_fd, int userns_fd, DissectImageFlags flags, int *ret_mount_fd); -int mountfsd_mount_directory(const char *path, int userns_fd, DissectImageFlags flags, int *ret_mount_fd); - -int mountfsd_make_directory_fd(int parent_fd, const char *name, mode_t mode, DissectImageFlags flags, int *ret_directory_fd); -int mountfsd_make_directory(const char *path, mode_t mode, DissectImageFlags flags, int *ret_directory_fd); +int mountfsd_connect(sd_varlink **ret); + +/* All the calls below take a 'link' parameter, that may be an already established Varlink connection object + * towards systemd-mountfsd, previously created via mountfsd_connect(). This serves two purposes: first of + * all allows more efficient resource usage, as this allows recycling already allocated resources for + * multiple calls. Secondly, the user credentials are pinned at time of mountfsd_connect(), and the caller + * hence can drop privileges afterwards while keeping open the connection and still execute relevant + * operations under the original identity, until the connection is closed. The 'link' parameter may be passed + * as NULL in which case a short-lived connection is created, just to execute the requested operation. */ + +int mountfsd_mount_image_fd(sd_varlink *vl, int image_fd, int userns_fd, const MountOptions *options, const ImagePolicy *image_policy, const VeritySettings *verity, DissectImageFlags flags, DissectedImage **ret); +int mountfsd_mount_image(sd_varlink *vl, const char *path, int userns_fd, const MountOptions *options, const ImagePolicy *image_policy, const VeritySettings *verity, DissectImageFlags flags, DissectedImage **ret); +int mountfsd_mount_directory_fd(sd_varlink *vl, int directory_fd, int userns_fd, DissectImageFlags flags, int *ret_mount_fd); +int mountfsd_mount_directory(sd_varlink *vl, const char *path, int userns_fd, DissectImageFlags flags, int *ret_mount_fd); + +int mountfsd_make_directory_fd(sd_varlink *vl, int parent_fd, const char *name, mode_t mode, DissectImageFlags flags, int *ret_directory_fd); +int mountfsd_make_directory(sd_varlink *vl, const char *path, mode_t mode, DissectImageFlags flags, int *ret_directory_fd); int copy_tree_at_foreign(int source_fd, int target_fd, int userns_fd); int remove_tree_foreign(const char *path, int userns_fd); diff --git a/src/shared/mstack.c b/src/shared/mstack.c index 8b0beb21ade..9cbc3163191 100644 --- a/src/shared/mstack.c +++ b/src/shared/mstack.c @@ -4,6 +4,8 @@ #include #include +#include "sd-varlink.h" + #include "alloc-util.h" #include "chase.h" #include "dissect-image.h" @@ -515,6 +517,7 @@ static const char *mount_name(MStackMount *m) { int mstack_open_images( MStack *mstack, + sd_varlink *mountfsd_link, int userns_fd, const ImagePolicy *image_policy, const ImageFilter *image_filter, @@ -524,6 +527,16 @@ int mstack_open_images( assert(mstack); + _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL; + if (userns_fd >= 0 && !mountfsd_link) { + /* User a single connection for all mounts */ + r = mountfsd_connect(&_vl); + if (r < 0) + return r; + + mountfsd_link = _vl; + } + FOREACH_ARRAY(m, mstack->mounts, mstack->n_mounts) { DissectImageFlags dissect_image_flags = @@ -549,6 +562,7 @@ int mstack_open_images( if (userns_fd >= 0) { r = mountfsd_mount_image_fd( + mountfsd_link, m->what_fd, userns_fd, /* options= */ NULL, @@ -642,6 +656,7 @@ int mstack_open_images( if (userns_fd >= 0) { r = mountfsd_mount_directory_fd( + mountfsd_link, m->what_fd, userns_fd, dissect_image_flags, @@ -1070,6 +1085,7 @@ int mstack_apply( int dir_fd, const char *where, const char *temp_mount_dir, + sd_varlink *link, int userns_fd, const ImagePolicy *image_policy, const ImageFilter *image_filter, @@ -1084,7 +1100,7 @@ int mstack_apply( if (r < 0) return r; - r = mstack_open_images(&mstack, userns_fd, image_policy, image_filter, flags); + r = mstack_open_images(&mstack, link, userns_fd, image_policy, image_filter, flags); if (r < 0) return r; diff --git a/src/shared/mstack.h b/src/shared/mstack.h index e526f17d983..b71ff86940b 100644 --- a/src/shared/mstack.h +++ b/src/shared/mstack.h @@ -51,12 +51,12 @@ MStack *mstack_free(MStack *mstack); DEFINE_TRIVIAL_CLEANUP_FUNC(MStack*, mstack_free); int mstack_load(const char *dir, int dir_fd, MStack **ret); -int mstack_open_images(MStack *mstack, int userns_fd, const ImagePolicy *image_policy, const ImageFilter *image_filter, MStackFlags flags); +int mstack_open_images(MStack *mstack, sd_varlink *mountfsd_link, int userns_fd, const ImagePolicy *image_policy, const ImageFilter *image_filter, MStackFlags flags); int mstack_make_mounts(MStack *mstack, const char *temp_mount_dir, MStackFlags flags); int mstack_bind_mounts(MStack *mstack, const char *where, int where_fd, MStackFlags flags, int *ret_root_fd); /* The four calls above in one */ -int mstack_apply(const char *dir, int dir_fd, const char *where, const char *temp_mount_dir, int userns_fd, const ImagePolicy *image_policy, const ImageFilter *image_filter, MStackFlags flags, int *ret_root_fd); +int mstack_apply(const char *dir, int dir_fd, const char *where, const char *temp_mount_dir, sd_varlink *mountfsd_link, int userns_fd, const ImagePolicy *image_policy, const ImageFilter *image_filter, MStackFlags flags, int *ret_root_fd); int mstack_is_read_only(MStack *mstack); int mstack_is_foreign_uid_owned(MStack *mstack); diff --git a/src/shared/nsresource.c b/src/shared/nsresource.c index 651d9bdf4d6..615f99eff10 100644 --- a/src/shared/nsresource.c +++ b/src/shared/nsresource.c @@ -57,8 +57,25 @@ static int make_pid_name(char **ret) { return 0; } -int nsresource_allocate_userns(const char *name, uint64_t size) { +int nsresource_connect(sd_varlink **ret) { + int r; + + assert(ret); + _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; + r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.NamespaceResource"); + if (r < 0) + return log_debug_errno(r, "Failed to connect to namespace resource manager: %m"); + + r = sd_varlink_set_allow_fd_passing_output(vl, true); + if (r < 0) + return log_debug_errno(r, "Failed to enable varlink fd passing for write: %m"); + + *ret = TAKE_PTR(vl); + return 0; +} + +int nsresource_allocate_userns(sd_varlink *vl, const char *name, uint64_t size) { _cleanup_close_ int userns_fd = -EBADF; _cleanup_free_ char *_name = NULL; const char *error_id; @@ -77,13 +94,14 @@ int nsresource_allocate_userns(const char *name, uint64_t size) { if (size <= 0 || size > UINT64_C(0x100000000)) /* Note: the server actually only allows allocating 1 or 64K right now */ return -EINVAL; - r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.NamespaceResource"); - if (r < 0) - return log_debug_errno(r, "Failed to connect to namespace resource manager: %m"); + _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL; + if (!vl) { + r = nsresource_connect(&_vl); + if (r < 0) + return r; - r = sd_varlink_set_allow_fd_passing_output(vl, true); - if (r < 0) - return log_debug_errno(r, "Failed to enable varlink fd passing for write: %m"); + vl = _vl; + } userns_fd = userns_acquire_empty(); if (userns_fd < 0) @@ -113,8 +131,7 @@ int nsresource_allocate_userns(const char *name, uint64_t size) { return TAKE_FD(userns_fd); } -int nsresource_register_userns(const char *name, int userns_fd) { - _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; +int nsresource_register_userns(sd_varlink *vl, const char *name, int userns_fd) { _cleanup_close_ int _userns_fd = -EBADF; _cleanup_free_ char *_name = NULL; const char *error_id; @@ -138,13 +155,14 @@ int nsresource_register_userns(const char *name, int userns_fd) { userns_fd = _userns_fd; } - r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.NamespaceResource"); - if (r < 0) - return log_debug_errno(r, "Failed to connect to namespace resource manager: %m"); + _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL; + if (!vl) { + r = nsresource_connect(&_vl); + if (r < 0) + return r; - r = sd_varlink_set_allow_fd_passing_output(vl, true); - if (r < 0) - return log_debug_errno(r, "Failed to enable varlink fd passing for write: %m"); + vl = _vl; + } userns_fd_idx = sd_varlink_push_dup_fd(vl, userns_fd); if (userns_fd_idx < 0) @@ -169,8 +187,7 @@ int nsresource_register_userns(const char *name, int userns_fd) { return 0; } -int nsresource_add_mount(int userns_fd, int mount_fd) { - _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; +int nsresource_add_mount(sd_varlink *vl, int userns_fd, int mount_fd) { _cleanup_close_ int _userns_fd = -EBADF; int r, userns_fd_idx, mount_fd_idx; const char *error_id; @@ -185,13 +202,14 @@ int nsresource_add_mount(int userns_fd, int mount_fd) { userns_fd = _userns_fd; } - r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.NamespaceResource"); - if (r < 0) - return log_error_errno(r, "Failed to connect to namespace resource manager: %m"); + _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL; + if (!vl) { + r = nsresource_connect(&_vl); + if (r < 0) + return r; - r = sd_varlink_set_allow_fd_passing_output(vl, true); - if (r < 0) - return log_error_errno(r, "Failed to enable varlink fd passing for write: %m"); + vl = _vl; + } userns_fd_idx = sd_varlink_push_dup_fd(vl, userns_fd); if (userns_fd_idx < 0) @@ -221,8 +239,7 @@ int nsresource_add_mount(int userns_fd, int mount_fd) { return 1; } -int nsresource_add_cgroup(int userns_fd, int cgroup_fd) { - _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; +int nsresource_add_cgroup(sd_varlink *vl, int userns_fd, int cgroup_fd) { _cleanup_close_ int _userns_fd = -EBADF; int r, userns_fd_idx, cgroup_fd_idx; const char *error_id; @@ -237,13 +254,14 @@ int nsresource_add_cgroup(int userns_fd, int cgroup_fd) { userns_fd = _userns_fd; } - r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.NamespaceResource"); - if (r < 0) - return log_debug_errno(r, "Failed to connect to namespace resource manager: %m"); + _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL; + if (!vl) { + r = nsresource_connect(&_vl); + if (r < 0) + return r; - r = sd_varlink_set_allow_fd_passing_output(vl, true); - if (r < 0) - return log_debug_errno(r, "Failed to enable varlink fd passing for write: %m"); + vl = _vl; + } userns_fd_idx = sd_varlink_push_dup_fd(vl, userns_fd); if (userns_fd_idx < 0) @@ -287,6 +305,7 @@ static void interface_params_done(InterfaceParams *p) { } int nsresource_add_netif_veth( + sd_varlink *vl, int userns_fd, int netns_fd, const char *namespace_ifname, @@ -294,7 +313,6 @@ int nsresource_add_netif_veth( char **ret_namespace_ifname) { _cleanup_close_ int _userns_fd = -EBADF, _netns_fd = -EBADF; - _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; int r, userns_fd_idx, netns_fd_idx; const char *error_id; @@ -314,13 +332,14 @@ int nsresource_add_netif_veth( netns_fd = _netns_fd; } - r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.NamespaceResource"); - if (r < 0) - return log_debug_errno(r, "Failed to connect to namespace resource manager: %m"); + _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL; + if (!vl) { + r = nsresource_connect(&_vl); + if (r < 0) + return r; - r = sd_varlink_set_allow_fd_passing_output(vl, true); - if (r < 0) - return log_debug_errno(r, "Failed to enable varlink fd passing for write: %m"); + vl = _vl; + } userns_fd_idx = sd_varlink_push_dup_fd(vl, userns_fd); if (userns_fd_idx < 0) @@ -368,11 +387,11 @@ int nsresource_add_netif_veth( } int nsresource_add_netif_tap( + sd_varlink *vl, int userns_fd, char **ret_host_ifname) { _cleanup_close_ int _userns_fd = -EBADF; - _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; int r, userns_fd_idx; const char *error_id; @@ -384,13 +403,14 @@ int nsresource_add_netif_tap( userns_fd = _userns_fd; } - r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.NamespaceResource"); - if (r < 0) - return log_debug_errno(r, "Failed to connect to namespace resource manager: %m"); + _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL; + if (!vl) { + r = nsresource_connect(&_vl); + if (r < 0) + return r; - r = sd_varlink_set_allow_fd_passing_output(vl, true); - if (r < 0) - return log_debug_errno(r, "Failed to enable varlink fd passing for write: %m"); + vl = _vl; + } r = sd_varlink_set_allow_fd_passing_input(vl, true); if (r < 0) diff --git a/src/shared/nsresource.h b/src/shared/nsresource.h index 136b1f85ac1..93957a10c82 100644 --- a/src/shared/nsresource.h +++ b/src/shared/nsresource.h @@ -7,9 +7,19 @@ #define NSRESOURCE_UIDS_64K 0x10000U #define NSRESOURCE_UIDS_1 1U -int nsresource_allocate_userns(const char *name, uint64_t size); -int nsresource_register_userns(const char *name, int userns_fd); -int nsresource_add_mount(int userns_fd, int mount_fd); -int nsresource_add_cgroup(int userns_fd, int cgroup_fd); -int nsresource_add_netif_veth(int userns_fd, int netns_fd, const char *namespace_ifname, char **ret_host_ifname, char **ret_namespace_ifname); -int nsresource_add_netif_tap(int userns_fd, char **ret_host_ifname); +int nsresource_connect(sd_varlink **ret); + +/* All the calls below take a 'link' parameter, that may be an already established Varlink connection object + * towards systemd-nsresourced, previously created via nsresource_connect(). This serves two purposes: first + * of all allows more efficient resource usage, as this allows recycling already allocated resources for + * multiple calls. Secondly, the user credentials are pinned at time of nsresource_connect(), and the caller + * hence can drop privileges afterwards while keeping open the connection and still execute relevant + * operations under the original identity, until the connection is closed. The 'link' parameter may be passed + * as NULL in which case a short-lived connection is created, just to execute the requested operation. */ + +int nsresource_allocate_userns(sd_varlink *vl, const char *name, uint64_t size); +int nsresource_register_userns(sd_varlink *vl, const char *name, int userns_fd); +int nsresource_add_mount(sd_varlink *vl, int userns_fd, int mount_fd); +int nsresource_add_cgroup(sd_varlink *vl, int userns_fd, int cgroup_fd); +int nsresource_add_netif_veth(sd_varlink *vl, int userns_fd, int netns_fd, const char *namespace_ifname, char **ret_host_ifname, char **ret_namespace_ifname); +int nsresource_add_netif_tap(sd_varlink *vl, int userns_fd, char **ret_host_ifname); diff --git a/src/test/test-mstack.c b/src/test/test-mstack.c index 15400af511e..7c370fcbe97 100644 --- a/src/test/test-mstack.c +++ b/src/test/test-mstack.c @@ -90,6 +90,7 @@ TEST(mstack) { ASSERT_OK(mstack_open_images( mstack, + /* mountfsd_link= */ NULL, /* userns_fd= */ -EBADF, /* image_policy= */ NULL, /* image_filter= */ NULL, diff --git a/src/test/test-nsresource.c b/src/test/test-nsresource.c index aacd776d7ff..b8845b8ffeb 100644 --- a/src/test/test-nsresource.c +++ b/src/test/test-nsresource.c @@ -2,6 +2,8 @@ #include +#include "sd-varlink.h" + #include "errno-util.h" #include "fd-util.h" #include "namespace-util.h" @@ -16,13 +18,18 @@ TEST(delegatetap) { return (void) log_tests_skipped_errno(userns_fd, "User namespaces not available"); ASSERT_OK(userns_fd); - r = nsresource_register_userns("foobar", userns_fd); + _cleanup_(sd_varlink_unrefp) sd_varlink *link = NULL; + r = nsresource_connect(&link); if (ERRNO_IS_NEG_DISCONNECT(r) || r == -ENOENT || ERRNO_IS_NEG_NOT_SUPPORTED(r)) return (void) log_tests_skipped_errno(r, "systemd-nsresourced cannot be reached"); + + r = nsresource_register_userns(link, "foobar", userns_fd); + if (ERRNO_IS_NEG_NOT_SUPPORTED(r)) + return (void) log_tests_skipped_errno(r, "systemd-nsresourced does not work"); ASSERT_OK(r); _cleanup_free_ char *ifname = NULL; - _cleanup_close_ int tap_fd = nsresource_add_netif_tap(userns_fd, &ifname); + _cleanup_close_ int tap_fd = nsresource_add_netif_tap(link, userns_fd, &ifname); if (ERRNO_IS_NEG_NOT_SUPPORTED(tap_fd)) return (void) log_tests_skipped_errno(tap_fd, "tap device support not available"); ASSERT_OK(tap_fd); diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index 0c957b48aae..7cbe7fd5f93 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -12,6 +12,7 @@ #include "sd-daemon.h" #include "sd-event.h" #include "sd-id128.h" +#include "sd-varlink.h" #include "alloc-util.h" #include "architecture.h" @@ -2041,11 +2042,16 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (asprintf(&userns_name, "vmspawn-" PID_FMT "-%s", getpid_cached(), arg_machine) < 0) return log_oom(); - r = nsresource_register_userns(userns_name, delegate_userns_fd); + _cleanup_(sd_varlink_unrefp) sd_varlink *nsresource_link = NULL; + r = nsresource_connect(&nsresource_link); + if (r < 0) + return log_error_errno(r, "Failed to connect to nsresourced: %m"); + + r = nsresource_register_userns(nsresource_link, userns_name, delegate_userns_fd); if (r < 0) return log_error_errno(r, "Failed to register user namespace with systemd-nsresourced: %m"); - tap_fd = nsresource_add_netif_tap(delegate_userns_fd, /* ret_host_ifname= */ NULL); + tap_fd = nsresource_add_netif_tap(nsresource_link, delegate_userns_fd, /* ret_host_ifname= */ NULL); if (tap_fd < 0) return log_error_errno(tap_fd, "Failed to allocate network tap device: %m");