]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
mountfsd,nsresource: allow recycling mountfsd/nsresourced client connections
authorLennart Poettering <lennart@amutable.com>
Fri, 28 Nov 2025 15:28:42 +0000 (16:28 +0100)
committerLennart Poettering <lennart@amutable.com>
Thu, 19 Feb 2026 14:08:19 +0000 (15:08 +0100)
So far we opened a new Varlink connection for every mountfsd/nsresourced
method call. Given each tool only does a very small number of calls
(usually 1…5) on them and the connections are cheap this is not too
wasteful. Nonetheless, let's do something about it, and allow reusing
the connection for multiple calls.

This not only makes things a bit more efficient, but has one more
important benefit: Varlink connections pin the security context of the
client when connecting. This means that varlink method calls done with a
connection established while some code was privileged will still operate
as privieged once privs are dropped, until the connection is closed.
This pinning effect is really nice, as it gives us behaviour in a
"capability system" like scheme. Later code is going to use that to
continue doing certain priv userns ops even after unsharing userns and
becoming fully unpriv.

24 files changed:
src/core/exec-invoke.c
src/core/namespace.c
src/core/namespace.h
src/dissect/dissect.c
src/import/export-tar.c
src/import/import-common.c
src/import/import-tar.c
src/import/pull-oci.c
src/import/pull-tar.c
src/mountfsd/mountwork.c
src/mstack/mstack-tool.c
src/nspawn/nspawn-cgroup.c
src/nspawn/nspawn.c
src/portable/portable.c
src/shared/discover-image.c
src/shared/dissect-image.c
src/shared/dissect-image.h
src/shared/mstack.c
src/shared/mstack.h
src/shared/nsresource.c
src/shared/nsresource.h
src/test/test-mstack.c
src/test/test-nsresource.c
src/vmspawn/vmspawn.c

index 06e6a3081fc4c3e84ff0d3c94b7d41668ee96aa5..888a2555fbbea28f86cb417985b78ffaa0251da6 100644 (file)
@@ -14,6 +14,7 @@
 #include <unistd.h>
 
 #include "sd-messages.h"
+#include "sd-varlink.h"
 
 #include "apparmor-util.h"      /* IWYU pragma: keep */
 #include "argv-util.h"
@@ -2397,6 +2398,7 @@ static int setup_private_users_child(int unshare_ready_fd, const char *uid_map,
 }
 
 static int setup_private_users(
+                sd_varlink *nsresource_link,
                 PrivateUsers private_users,
                 uid_t saved_uid,    /* service manager uid */
                 gid_t saved_gid,    /* service manager gid */
@@ -2438,7 +2440,10 @@ static int setup_private_users(
                 if (uid_is_valid(*uid) || uid_is_valid(*gid))
                         return log_debug_errno(SYNTHETIC_ERRNO(EPERM), "When allocating dynamic user namespace range, target UID/GID must be root, refusing.");
 
-                _cleanup_close_ int userns_fd = nsresource_allocate_userns(/* name= */ NULL, NSRESOURCE_UIDS_64K);
+                _cleanup_close_ int userns_fd = nsresource_allocate_userns(
+                                nsresource_link,
+                                /* name= */ NULL,
+                                NSRESOURCE_UIDS_64K);
                 if (userns_fd < 0)
                         return userns_fd;
 
@@ -3786,6 +3791,7 @@ static int apply_mount_namespace(
                 PidRef *bpffs_pidref,
                 int bpffs_socket_fd,
                 int bpffs_errno_pipe,
+                sd_varlink *mountfsd_link,
                 char **reterr_path) {
 
         _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
@@ -4005,6 +4011,8 @@ static int apply_mount_namespace(
                 .bpffs_pidref = bpffs_pidref,
                 .bpffs_socket_fd = bpffs_socket_fd,
                 .bpffs_errno_pipe = bpffs_errno_pipe,
+
+                .mountfsd_link = mountfsd_link,
         };
 
         r = setup_namespace(&parameters, reterr_path);
@@ -4659,6 +4667,7 @@ static int setup_delegated_namespaces(
                 PidRef *bpffs_pidref,
                 int bpffs_socket_fd,
                 int bpffs_errno_pipe,
+                sd_varlink *mountfsd_link,
                 int *reterr_exit_status) {
 
         int r;
@@ -4773,18 +4782,20 @@ static int setup_delegated_namespaces(
             exec_namespace_is_delegated(context, params, have_cap_sys_admin, CLONE_NEWNS) == delegate) {
                 _cleanup_free_ char *error_path = NULL;
 
-                r = apply_mount_namespace(command->flags,
-                                          context,
-                                          params,
-                                          runtime,
+                r = apply_mount_namespace(
+                                command->flags,
+                                context,
+                                params,
+                                runtime,
                                           memory_pressure_path,
-                                          needs_sandboxing,
-                                          uid,
-                                          gid,
-                                          bpffs_pidref,
-                                          bpffs_socket_fd,
-                                          bpffs_errno_pipe,
-                                          &error_path);
+                                needs_sandboxing,
+                                uid,
+                                gid,
+                                bpffs_pidref,
+                                bpffs_socket_fd,
+                                bpffs_errno_pipe,
+                                mountfsd_link,
+                                &error_path);
                 if (r < 0) {
                         *reterr_exit_status = EXIT_NAMESPACE;
                         return log_error_errno(r, "Failed to set up mount namespacing%s%s: %m",
@@ -5744,6 +5755,24 @@ int exec_invoke(
                 }
         }
 
+        _cleanup_(sd_varlink_unrefp) sd_varlink *mountfsd_link = NULL, *nsresource_link = NULL;
+        if (needs_sandboxing &&
+            exec_context_get_effective_private_users(context, params) == PRIVATE_USERS_MANAGED) {
+
+                /* In managed mode we need to allocate a userns via nsresource, and then assign mounts to
+                 * it. We must do so with our original privileges (since after creating the userns, we might
+                 * simply not have the necessary privs for the IPC calls anymore), hence do this here, ahead
+                 * of time. */
+
+                r = mountfsd_connect(&mountfsd_link);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to connect to mountfsd: %m");
+
+                r = nsresource_connect(&nsresource_link);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to connect to nsresourced: %m");
+        }
+
         needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
 
         for (ExecDirectoryType dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
@@ -5939,6 +5968,7 @@ int exec_invoke(
                 /* The kernel requires /proc/pid/setgroups be set to "deny" prior to writing /proc/pid/gid_map in
                  * unprivileged user namespaces. */
                 r = setup_private_users(
+                                nsresource_link,
                                 pu,
                                 saved_uid,
                                 saved_gid,
@@ -5977,6 +6007,7 @@ int exec_invoke(
                         &bpffs_pidref,
                         bpffs_socket_fd,
                         bpffs_errno_pipe,
+                        mountfsd_link,
                         exit_status);
         if (r < 0)
                 return r;
@@ -6027,6 +6058,7 @@ int exec_invoke(
                 PrivateUsers pu = exec_context_get_effective_private_users(context, params);
 
                 r = setup_private_users(
+                                nsresource_link,
                                 pu,
                                 saved_uid,
                                 saved_gid,
@@ -6071,10 +6103,15 @@ int exec_invoke(
                         &bpffs_pidref,
                         bpffs_socket_fd,
                         bpffs_errno_pipe,
+                        mountfsd_link,
                         exit_status);
         if (r < 0)
                 return r;
 
+        /* We are done now with the nsresourced/mountfsd shenanigans, let's close the connections */
+        nsresource_link = sd_varlink_unref(nsresource_link);
+        mountfsd_link = sd_varlink_unref(mountfsd_link);
+
         /* Kill unnecessary process, for the case that e.g. when the bpffs mount point is hidden. */
         pidref_done_sigkill_wait(&bpffs_pidref);
 
index e348d26c43b91e0e78420fdc681fd3e400339b44..504f80cb6358c7aa6225f1d10f72e320169a2fba 100644 (file)
@@ -2667,6 +2667,7 @@ int setup_namespace(const NamespaceParameters *p, char **reterr_path) {
                                         return log_debug_errno(userns_fd, "Failed to open our own user namespace: %m");
 
                                 r = mountfsd_mount_image(
+                                                p->mountfsd_link,
                                                 p->root_image,
                                                 userns_fd,
                                                 p->root_image_options,
@@ -2678,7 +2679,6 @@ int setup_namespace(const NamespaceParameters *p, char **reterr_path) {
                                         return r;
                         }
                 }
-
         } else if (p->root_mstack) {
                 if (namespace_read_only(p))
                         mstack_flags |= MSTACK_RDONLY;
@@ -2693,7 +2693,13 @@ int setup_namespace(const NamespaceParameters *p, char **reterr_path) {
                                 return log_debug_errno(userns_fd, "Failed to open our own user namespace: %m");
                 }
 
-                r = mstack_open_images(mstack, userns_fd, p->root_image_policy, /* image_filter= */ NULL, mstack_flags);
+                r = mstack_open_images(
+                                mstack,
+                                p->mountfsd_link,
+                                userns_fd,
+                                p->root_image_policy,
+                                /* image_filter= */ NULL,
+                                mstack_flags);
                 if (r < 0)
                         return r;
         }
index 4b62debf2fc3149f2e642c78a47c989ea63fd872..26b0bf8ff2dcef6467a89a15868349598492cb46 100644 (file)
@@ -205,6 +205,8 @@ typedef struct NamespaceParameters {
         PidRef *bpffs_pidref;
         int bpffs_socket_fd;
         int bpffs_errno_pipe;
+
+        sd_varlink *mountfsd_link;
 } NamespaceParameters;
 
 int setup_namespace(const NamespaceParameters *p, char **reterr_path);
index 7303cfc3bd87103a20df2a05cd76932152e595c8..33ddb53d0a607a21185cba631bbb49e106bcab5f 100644 (file)
@@ -2247,12 +2247,16 @@ static int run(int argc, char *argv[]) {
                         /* Don't run things in private userns, if the mount shall be attached to the host
                          * or if we're copying from/to the host. */
                         if (!IN_SET(arg_action, ACTION_MOUNT, ACTION_WITH, ACTION_COPY_FROM, ACTION_COPY_TO)) {
-                                userns_fd = nsresource_allocate_userns(/* name= */ NULL, NSRESOURCE_UIDS_64K); /* allocate 64K users by default */
+                                userns_fd = nsresource_allocate_userns(
+                                                /* vl= */ NULL,
+                                                /* name= */ NULL,
+                                                NSRESOURCE_UIDS_64K); /* allocate 64K users by default */
                                 if (userns_fd < 0)
                                         return log_error_errno(userns_fd, "Failed to allocate user namespace with 64K users: %m");
                         }
 
                         r = mountfsd_mount_image(
+                                        /* vl= */ NULL,
                                         arg_image,
                                         userns_fd,
                                         /* options= */ NULL,
index 81fab3714bd5bbb0099573872daa0124f82f41a2..22f731de5742a7b0a52a9de362571d89fe995d44 100644 (file)
@@ -363,7 +363,12 @@ int tar_export_start(
                         return log_error_errno(r, "Failed to open '%s': %m", p);
 
                 _cleanup_close_ int mapped_fd = -EBADF;
-                r = mountfsd_mount_directory_fd(directory_fd, e->userns_fd, DISSECT_IMAGE_FOREIGN_UID, &mapped_fd);
+                r = mountfsd_mount_directory_fd(
+                                /* vl= */ NULL,
+                                directory_fd,
+                                e->userns_fd,
+                                DISSECT_IMAGE_FOREIGN_UID,
+                                &mapped_fd);
                 if (r < 0)
                         return log_error_errno(r, "Failed to mount directory via mountfsd: %m");
 
index 0c448355fdcb41d172c989c2fda7693cb69b25fd..0a5144f94ecd6ec84b3ad35ed69c80d42f0f33ee 100644 (file)
@@ -375,7 +375,10 @@ int import_make_foreign_userns(int *userns_fd) {
         if (*userns_fd >= 0)
                 return 0;
 
-        *userns_fd = nsresource_allocate_userns(/* name= */ NULL, NSRESOURCE_UIDS_64K); /* allocate 64K users */
+        *userns_fd = nsresource_allocate_userns(
+                        /* vl= */ NULL,
+                        /* name= */ NULL,
+                        NSRESOURCE_UIDS_64K); /* allocate 64K users */
         if (*userns_fd < 0)
                 return log_error_errno(*userns_fd, "Failed to allocate transient user namespace: %m");
 
index b3d3fc61040c121d10a7637eb8d5549aded1d1df..5e74de896e99c1602cd9fb20505e20749d214b61 100644 (file)
@@ -4,6 +4,7 @@
 
 #include "sd-daemon.h"
 #include "sd-event.h"
+#include "sd-varlink.h"
 
 #include "alloc-util.h"
 #include "btrfs-util.h"
@@ -256,12 +257,27 @@ static int tar_import_fork_tar(TarImport *i) {
                 if (r < 0)
                         return r;
 
+                _cleanup_(sd_varlink_unrefp) sd_varlink *mountfsd_link = NULL;
+                r = mountfsd_connect(&mountfsd_link);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to connect to mountfsd: %m");
+
                 _cleanup_close_ int directory_fd = -EBADF;
-                r = mountfsd_make_directory(d, MODE_INVALID, /* flags= */ 0, &directory_fd);
+                r = mountfsd_make_directory(
+                                mountfsd_link,
+                                d,
+                                MODE_INVALID,
+                                /* flags= */ 0,
+                                &directory_fd);
                 if (r < 0)
                         return log_error_errno(r, "Failed to make directory via mountfsd: %m");
 
-                r = mountfsd_mount_directory_fd(directory_fd, i->userns_fd, DISSECT_IMAGE_FOREIGN_UID, &i->tree_fd);
+                r = mountfsd_mount_directory_fd(
+                                mountfsd_link,
+                                directory_fd,
+                                i->userns_fd,
+                                DISSECT_IMAGE_FOREIGN_UID,
+                                &i->tree_fd);
                 if (r < 0)
                         return log_error_errno(r, "Failed mount directory via mountfsd: %m");
         } else {
index d55bd43f5702039bf68fd4df0642bf5dffa5538b..3abb8777d1f064f86c54fbec502f0d2ecf11315a 100644 (file)
@@ -464,12 +464,27 @@ static int oci_pull_job_on_open_disk(PullJob *j) {
                 if (r < 0)
                         return r;
 
+                _cleanup_(sd_varlink_unrefp) sd_varlink *mountfsd_link = NULL;
+                r = mountfsd_connect(&mountfsd_link);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to connect to mountfsd: %m");
+
                 _cleanup_close_ int directory_fd = -EBADF;
-                r = mountfsd_make_directory(st->temp_path, MODE_INVALID, /* flags= */ 0, &directory_fd);
+                r = mountfsd_make_directory(
+                                mountfsd_link,
+                                st->temp_path,
+                                MODE_INVALID,
+                                /* flags= */ 0,
+                                &directory_fd);
                 if (r < 0)
                         return log_error_errno(r, "Failed to make directory via mountfsd: %m");
 
-                r = mountfsd_mount_directory_fd(directory_fd, i->userns_fd, DISSECT_IMAGE_FOREIGN_UID, &st->tree_fd);
+                r = mountfsd_mount_directory_fd(
+                                mountfsd_link,
+                                directory_fd,
+                                i->userns_fd,
+                                DISSECT_IMAGE_FOREIGN_UID,
+                                &st->tree_fd);
                 if (r < 0)
                         return log_error_errno(r, "Failed to mount directory via mountsd: %m");
         } else {
@@ -1140,6 +1155,7 @@ static int oci_pull_save_mstack(OciPull *i) {
                                 return r;
 
                         r = mountfsd_make_directory_fd(
+                                        /* vl= */ NULL,
                                         dir_fd,
                                         "rw",
                                         0755,
index b3d80921067ae18e14f30aae2b1e9359894da0fa..ae763879b78b21580321ddbd920d61472a4e7044 100644 (file)
@@ -2,6 +2,7 @@
 
 #include "sd-daemon.h"
 #include "sd-event.h"
+#include "sd-varlink.h"
 
 #include "alloc-util.h"
 #include "btrfs-util.h"
@@ -276,6 +277,11 @@ static int tar_pull_make_local_copy(TarPull *p) {
                         if (r < 0)
                                 return r;
 
+                        _cleanup_(sd_varlink_unrefp) sd_varlink *mountfsd_link = NULL;
+                        r = mountfsd_connect(&mountfsd_link);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to connect to mountsd: %m");
+
                         /* Usually, tar_pull_job_on_open_disk_tar() would allocate ->tree_fd for us, but if
                          * already downloaded the image before, and are just making a copy of the original
                          * download, we need to open ->tree_fd now */
@@ -294,18 +300,33 @@ static int tar_pull_make_local_copy(TarPull *p) {
                                                         "Image tree '%s' is not owned by the foreign UID range, refusing.",
                                                         p->final_path);
 
-                                r = mountfsd_mount_directory_fd(directory_fd, p->userns_fd, DISSECT_IMAGE_FOREIGN_UID, &p->tree_fd);
+                                r = mountfsd_mount_directory_fd(
+                                                mountfsd_link,
+                                                directory_fd,
+                                                p->userns_fd,
+                                                DISSECT_IMAGE_FOREIGN_UID,
+                                                &p->tree_fd);
                                 if (r < 0)
                                         return log_error_errno(r, "Failed to mount directory via mountfsd: %m");
                         }
 
                         _cleanup_close_ int directory_fd = -EBADF;
-                        r = mountfsd_make_directory(t, MODE_INVALID, /* flags= */ 0, &directory_fd);
+                        r = mountfsd_make_directory(
+                                        mountfsd_link,
+                                        t,
+                                        MODE_INVALID,
+                                        /* flags= */ 0,
+                                        &directory_fd);
                         if (r < 0)
                                 return log_error_errno(r, "Failed to make directory via mountfsd: %m");
 
                         _cleanup_close_ int copy_fd = -EBADF;
-                        r = mountfsd_mount_directory_fd(directory_fd, p->userns_fd, DISSECT_IMAGE_FOREIGN_UID, &copy_fd);
+                        r = mountfsd_mount_directory_fd(
+                                        mountfsd_link,
+                                        directory_fd,
+                                        p->userns_fd,
+                                        DISSECT_IMAGE_FOREIGN_UID,
+                                        &copy_fd);
                         if (r < 0)
                                 return log_error_errno(r, "Failed to mount directory via mountfsd: %m");
 
@@ -611,12 +632,27 @@ static int tar_pull_job_on_open_disk_tar(PullJob *j) {
                 if (r < 0)
                         return r;
 
+                _cleanup_(sd_varlink_unrefp) sd_varlink *mountfsd_link = NULL;
+                r = mountfsd_connect(&mountfsd_link);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to connect to mountfsd: %m");
+
                 _cleanup_close_ int directory_fd = -EBADF;
-                r = mountfsd_make_directory(where, MODE_INVALID, /* flags= */ 0, &directory_fd);
+                r = mountfsd_make_directory(
+                                mountfsd_link,
+                                where,
+                                MODE_INVALID,
+                                /* flags= */ 0,
+                                &directory_fd);
                 if (r < 0)
                         return log_error_errno(r, "Failed to make directory via mountfsd: %m");
 
-                r = mountfsd_mount_directory_fd(directory_fd, p->userns_fd, DISSECT_IMAGE_FOREIGN_UID, &p->tree_fd);
+                r = mountfsd_mount_directory_fd(
+                                mountfsd_link,
+                                directory_fd,
+                                p->userns_fd,
+                                DISSECT_IMAGE_FOREIGN_UID,
+                                &p->tree_fd);
                 if (r < 0)
                         return log_error_errno(r, "Failed to mount directory via mountfsd: %m");
         } else {
index 7b8812f03a76b26547d7f2a7621423b9c4c6d212..25f1a8455164afc33b13f256a6087489e598df2b 100644 (file)
@@ -662,6 +662,7 @@ static int vl_method_mount_image(
         if (r < 0)
                 return r;
 
+        _cleanup_(sd_varlink_unrefp) sd_varlink *nsresource_link = NULL;
         for (PartitionDesignator d = 0; d < _PARTITION_DESIGNATOR_MAX; d++) {
                 DissectedPartition *pp = di->partitions + d;
                 int fd_idx;
@@ -673,7 +674,14 @@ static int vl_method_mount_image(
                         continue;
 
                 if (userns_fd >= 0) {
-                        r = nsresource_add_mount(userns_fd, pp->fsmount_fd);
+
+                        if (!nsresource_link) {
+                                r = nsresource_connect(&nsresource_link);
+                                if (r < 0)
+                                        return r;
+                        }
+
+                        r = nsresource_add_mount(nsresource_link, userns_fd, pp->fsmount_fd);
                         if (r < 0)
                                 return r;
                 }
@@ -1206,7 +1214,7 @@ static int vl_method_mount_directory(
         }
 
         if (userns_fd >= 0) {
-                r = nsresource_add_mount(userns_fd, mount_fd);
+                r = nsresource_add_mount(/* vl= */ NULL, userns_fd, mount_fd);
                 if (r < 0)
                         return r;
         }
index 01a71d6e567a2b66dd6cf16cc83aaceb002f803d..bc86d8565a37861862558a965b6be8e301fd22cc 100644 (file)
@@ -361,6 +361,7 @@ static int mount_mstack(void) {
                         /* dir_fd= */ -EBADF,
                         arg_where,
                         /* temp_mount_dir= */ NULL,  /* auto-create temporary directory */
+                        /* mountfsd_link= */ NULL,
                         /* userns_fd= */ -EBADF,
                         arg_image_policy,
                         arg_image_filter,
index 4206fe94e529a1730eecea17cc714712f3e44b12..f1d45bbb9b882e3c7c3d06d1fe2551d97f54a773 100644 (file)
@@ -108,7 +108,10 @@ int create_subcgroup(
                 if (r < 0)
                         return log_error_errno(r, "Failed to add process " PID_FMT " to cgroup %s: %m", pid->pid, payload);
 
-                r = nsresource_add_cgroup(userns_fd, cgroup_fd);
+                r = nsresource_add_cgroup(
+                                /* vl= */ NULL,
+                                userns_fd,
+                                cgroup_fd);
                 if (r < 0)
                         return log_error_errno(r, "Failed to add cgroup %s to userns: %m", payload);
         } else {
index 47300ff7e3c13362124cbb6f6853a34ac5886539..08afa171ae886fb9a26c3cdb13a8b939dc080836 100644 (file)
@@ -19,6 +19,7 @@
 #include "sd-id128.h"
 #include "sd-netlink.h"
 #include "sd-path.h"
+#include "sd-varlink.h"
 
 #include "alloc-util.h"
 #include "barrier.h"
@@ -5442,7 +5443,13 @@ static int run_container(
                         } else {
                                 _cleanup_free_ char *host_ifname = NULL;
 
-                                r = nsresource_add_netif_veth(userns_fd, child_netns_fd, /* namespace_ifname= */ NULL, &host_ifname, /* ret_namespace_ifname= */ NULL);
+                                r = nsresource_add_netif_veth(
+                                                /* vl= */ NULL,
+                                                userns_fd,
+                                                child_netns_fd,
+                                                /* namespace_ifname= */ NULL,
+                                                &host_ifname,
+                                                /* ret_namespace_ifname= */ NULL);
                                 if (r < 0)
                                         return log_error_errno(r, "Failed to add network interface to container: %m");
 
@@ -6030,6 +6037,7 @@ static int run(int argc, char *argv[]) {
         _cleanup_(mstack_freep) MStack *mstack = NULL;
         _cleanup_(sd_netlink_unrefp) sd_netlink *nfnl = NULL;
         _cleanup_(pidref_done) PidRef pid = PIDREF_NULL;
+        _cleanup_(sd_varlink_unrefp) sd_varlink *nsresource_link = NULL, *mountfsd_link = NULL;
 
         log_setup();
 
@@ -6132,13 +6140,28 @@ static int run(int argc, char *argv[]) {
         if (arg_userns_mode == USER_NAMESPACE_MANAGED) {
                 /* Let's allocate a 64K userns first, if managed mode is chosen */
 
+                r = nsresource_connect(&nsresource_link);
+                if (r < 0) {
+                        log_error_errno(r, "Failed to connect to nsresourced: %m");
+                        goto finish;
+                }
+
+                r = mountfsd_connect(&mountfsd_link);
+                if (r < 0) {
+                        log_error_errno(r, "Failed to connect to mountsd: %m");
+                        goto finish;
+                }
+
                 _cleanup_free_ char *userns_name = NULL;
                 if (asprintf(&userns_name, "nspawn-" PID_FMT "-%s", getpid_cached(), arg_machine) < 0) {
                         r = log_oom();
                         goto finish;
                 }
 
-                userns_fd = nsresource_allocate_userns(userns_name, NSRESOURCE_UIDS_64K); /* allocate 64K UIDs */
+                userns_fd = nsresource_allocate_userns(
+                                nsresource_link,
+                                userns_name,
+                                NSRESOURCE_UIDS_64K); /* allocate 64K UIDs */
                 if (userns_fd < 0) {
                         r = log_error_errno(userns_fd, "Failed to allocate user namespace with 64K users: %m");
                         goto finish;
@@ -6293,6 +6316,7 @@ static int run(int argc, char *argv[]) {
 
                 if (userns_fd >= 0) {
                         r = mountfsd_mount_directory(
+                                        mountfsd_link,
                                         arg_directory,
                                         userns_fd,
                                         determine_dissect_image_flags(),
@@ -6443,6 +6467,7 @@ static int run(int argc, char *argv[]) {
                                 goto finish;
                 } else {
                         r = mountfsd_mount_image(
+                                        mountfsd_link,
                                         arg_image,
                                         userns_fd,
                                         /* options= */ NULL,
@@ -6485,6 +6510,7 @@ static int run(int argc, char *argv[]) {
 
                 r = mstack_open_images(
                                 mstack,
+                                mountfsd_link,
                                 userns_fd,
                                 arg_image_policy,
                                 /* image_filter= */ NULL,
@@ -6507,6 +6533,9 @@ static int run(int argc, char *argv[]) {
         if (r < 0)
                 goto finish;
 
+        mountfsd_link = sd_varlink_unref(mountfsd_link);
+        nsresource_link = sd_varlink_unref(nsresource_link);
+
         if (!arg_quiet) {
                 const char *t = arg_mstack ?: arg_image ?: arg_directory;
                 _cleanup_free_ char *u = NULL;
@@ -6547,7 +6576,8 @@ static int run(int argc, char *argv[]) {
                                 mstack,
                                 userns_fd,
                                 fds,
-                                veth_name, &veth_created,
+                                veth_name,
+                                &veth_created,
                                 &expose_args, &master,
                                 &pid, &ret);
                 if (r <= 0)
index 191125b9c58bfb65057d2f59c9ea74f58d1de3d7..2fe015d94ca0d63f23fd1973db40e5a2c119aa3e 100644 (file)
@@ -6,6 +6,7 @@
 
 #include "sd-bus.h"
 #include "sd-messages.h"
+#include "sd-varlink.h"
 
 #include "bus-common-errors.h"
 #include "bus-error.h"
@@ -483,12 +484,20 @@ static int portable_extract_by_path(
                         return log_error_errno(r, "Failed to extract image name from path '%s': %m", path);
 
                 if (scope == RUNTIME_SCOPE_USER && uid_is_foreign(st.st_uid)) {
-                        _cleanup_close_ int userns_fd = nsresource_allocate_userns(/* name= */ NULL, NSRESOURCE_UIDS_64K);
+                        _cleanup_close_ int userns_fd = nsresource_allocate_userns(
+                                        /* vl= */ NULL,
+                                        /* name= */ NULL,
+                                        NSRESOURCE_UIDS_64K);
                         if (userns_fd < 0)
                                 return log_debug_errno(userns_fd, "Failed to allocate user namespace: %m");
 
                         _cleanup_close_ int mfd = -EBADF;
-                        r = mountfsd_mount_directory_fd(rfd, userns_fd, DISSECT_IMAGE_FOREIGN_UID, &mfd);
+                        r = mountfsd_mount_directory_fd(
+                                        /* vl= */ NULL,
+                                        rfd,
+                                        userns_fd,
+                                        DISSECT_IMAGE_FOREIGN_UID,
+                                        &mfd);
                         if (r < 0)
                                 return r;
 
@@ -604,11 +613,15 @@ static int portable_extract_by_path(
                         return log_debug_errno(r, "Failed to create temporary directory: %m");
 
                 if (scope == RUNTIME_SCOPE_USER) {
-                        userns_fd = nsresource_allocate_userns(/* name= */ NULL, NSRESOURCE_UIDS_64K);
+                        userns_fd = nsresource_allocate_userns(
+                                        /* vl= */ NULL,
+                                        /* name= */ NULL,
+                                        NSRESOURCE_UIDS_64K);
                         if (userns_fd < 0)
                                 return log_debug_errno(userns_fd, "Failed to allocate user namespace: %m");
 
                         r = mountfsd_mount_image_fd(
+                                        /* vl= */ NULL,
                                         rfd,
                                         userns_fd,
                                         /* options= */ NULL,
@@ -1808,7 +1821,10 @@ static int install_image(
 
         if (flags & PORTABLE_MIXED_COPY_LINK) {
                 if (scope == RUNTIME_SCOPE_USER) {
-                        _cleanup_close_ int userns_fd = nsresource_allocate_userns(/* name= */ NULL, NSRESOURCE_UIDS_64K);
+                        _cleanup_close_ int userns_fd = nsresource_allocate_userns(
+                                        /* vl= */ NULL,
+                                        /* name= */ NULL,
+                                        NSRESOURCE_UIDS_64K);
                         if (userns_fd < 0)
                                 return log_debug_errno(userns_fd, "Failed to allocate user namespace: %m");
 
@@ -1820,21 +1836,31 @@ static int install_image(
                         if (fstat(fd, &st) < 0)
                                 return log_error_errno(errno, "Failed to stat '%s': %m", image_path);
 
+                        _cleanup_(sd_varlink_unrefp) sd_varlink *mountfsd_link = NULL;
+                        r = mountfsd_connect(&mountfsd_link);
+                        if (r < 0)
+                                return r;
+
                         _cleanup_close_ int tree_fd = -EBADF;
                         if (uid_is_foreign(st.st_uid)) {
-                                r = mountfsd_mount_directory_fd(fd, userns_fd, DISSECT_IMAGE_FOREIGN_UID, &tree_fd);
+                                r = mountfsd_mount_directory_fd(
+                                                mountfsd_link,
+                                                fd,
+                                                userns_fd,
+                                                DISSECT_IMAGE_FOREIGN_UID,
+                                                &tree_fd);
                                 if (r < 0)
                                         return r;
                         } else
                                 tree_fd = TAKE_FD(fd);
 
                         _cleanup_close_ int directory_fd = -EBADF;
-                        r = mountfsd_make_directory(target, MODE_INVALID, /* flags= */ 0, &directory_fd);
+                        r = mountfsd_make_directory(mountfsd_link, target, MODE_INVALID, /* flags= */ 0, &directory_fd);
                         if (r < 0)
                                 return r;
 
                         _cleanup_close_ int copy_fd = -EBADF;
-                        r = mountfsd_mount_directory_fd(directory_fd, userns_fd, DISSECT_IMAGE_FOREIGN_UID, &copy_fd);
+                        r = mountfsd_mount_directory_fd(mountfsd_link, directory_fd, userns_fd, DISSECT_IMAGE_FOREIGN_UID, &copy_fd);
                         if (r < 0)
                                 return r;
 
index 064a3e8002a4df708e713def7915a1cd376cd714..0575310a2c200db620653da68acc6bf24a02d028 100644 (file)
@@ -11,6 +11,7 @@
 
 #include "sd-json.h"
 #include "sd-path.h"
+#include "sd-varlink.h"
 
 #include "alloc-util.h"
 #include "blockdev-util.h"
@@ -1279,12 +1280,16 @@ static int unprivileged_remove(Image *i) {
 
         assert(i);
 
-        _cleanup_close_ int userns_fd = nsresource_allocate_userns(/* name= */ NULL, /* size= */ NSRESOURCE_UIDS_64K);
+        _cleanup_close_ int userns_fd = nsresource_allocate_userns(
+                        /* vl= */ NULL,
+                        /* name= */ NULL,
+                        /* size= */ NSRESOURCE_UIDS_64K);
         if (userns_fd < 0)
                 return log_debug_errno(userns_fd, "Failed to allocate transient user namespace: %m");
 
         _cleanup_close_ int tree_fd = -EBADF;
         r = mountfsd_mount_directory(
+                        /* vl= */ NULL,
                         i->path,
                         userns_fd,
                         DISSECT_IMAGE_FOREIGN_UID,
@@ -1623,13 +1628,22 @@ static int unprivileged_clone(Image *i, const char *new_path) {
         assert(i);
         assert(new_path);
 
-        _cleanup_close_ int userns_fd = nsresource_allocate_userns(/* name= */ NULL, /* size= */ NSRESOURCE_UIDS_64K);
+        _cleanup_close_ int userns_fd = nsresource_allocate_userns(
+                        /* vl= */ NULL,
+                        /* name= */ NULL,
+                        /* size= */ NSRESOURCE_UIDS_64K);
         if (userns_fd < 0)
                 return log_debug_errno(userns_fd, "Failed to allocate transient user namespace: %m");
 
+        _cleanup_(sd_varlink_unrefp) sd_varlink *link = NULL;
+        r = mountfsd_connect(&link);
+        if (r < 0)
+                return r;
+
         /* Map original image */
         _cleanup_close_ int tree_fd = -EBADF;
         r = mountfsd_mount_directory(
+                        link,
                         i->path,
                         userns_fd,
                         DISSECT_IMAGE_FOREIGN_UID,
@@ -1640,6 +1654,7 @@ static int unprivileged_clone(Image *i, const char *new_path) {
         /* Make new image */
         _cleanup_close_ int new_fd = -EBADF;
         r = mountfsd_make_directory(
+                        link,
                         new_path,
                         MODE_INVALID,
                         /* flags= */ 0,
@@ -1650,6 +1665,7 @@ static int unprivileged_clone(Image *i, const char *new_path) {
         /* Mount new image */
         _cleanup_close_ int target_fd = -EBADF;
         r = mountfsd_mount_directory_fd(
+                        link,
                         new_fd,
                         userns_fd,
                         DISSECT_IMAGE_FOREIGN_UID,
@@ -1657,6 +1673,8 @@ static int unprivileged_clone(Image *i, const char *new_path) {
         if (r < 0)
                 return r;
 
+        link = sd_varlink_unref(link);
+
         /* Fork off child that moves into userns and does the copying */
         return copy_tree_at_foreign(tree_fd, target_fd, userns_fd);
 }
index 0c0274fbc52ea2bde1aa2f8568f6fa6df8e89003..eb4b6bd609967a59532c7cb07e1174c189f54be6 100644 (file)
@@ -4958,6 +4958,7 @@ int verity_dissect_and_mount(
                                 return log_debug_errno(userns_fd, "Failed to open our own user namespace: %m");
 
                         r = mountfsd_mount_image(
+                                        /* vl= */ NULL,
                                         src_fd >= 0 ? FORMAT_PROC_FD_PATH(src_fd) : src,
                                         userns_fd,
                                         options,
@@ -5125,7 +5126,30 @@ static void mount_image_reply_parameters_done(MountImageReplyParameters *p) {
 
 #endif
 
+int mountfsd_connect(sd_varlink **ret) {
+        int r;
+
+        assert(ret);
+
+        _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL;
+        r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.MountFileSystem");
+        if (r < 0)
+                return log_debug_errno(r, "Failed to connect to mountfsd: %m");
+
+        r = sd_varlink_set_allow_fd_passing_input(vl, true);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to enable varlink fd passing for read: %m");
+
+        r = sd_varlink_set_allow_fd_passing_output(vl, true);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to enable varlink fd passing for write: %m");
+
+        *ret = TAKE_PTR(vl);
+        return 0;
+}
+
 int mountfsd_mount_image_fd(
+                sd_varlink *vl,
                 int image_fd,
                 int userns_fd,
                 const MountOptions *options,
@@ -5149,7 +5173,6 @@ int mountfsd_mount_image_fd(
 
         _cleanup_(dissected_image_unrefp) DissectedImage *di = NULL;
         _cleanup_close_ int verity_data_fd = -EBADF;
-        _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL;
         _cleanup_free_ char *ps = NULL;
         const char *error_id;
         int r;
@@ -5157,17 +5180,14 @@ int mountfsd_mount_image_fd(
         assert(image_fd >= 0);
         assert(ret);
 
-        r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.MountFileSystem");
-        if (r < 0)
-                return log_error_errno(r, "Failed to connect to mountfsd: %m");
-
-        r = sd_varlink_set_allow_fd_passing_input(vl, true);
-        if (r < 0)
-                return log_error_errno(r, "Failed to enable varlink fd passing for read: %m");
+        _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL;
+        if (!vl) {
+                r = mountfsd_connect(&_vl);
+                if (r < 0)
+                        return r;
 
-        r = sd_varlink_set_allow_fd_passing_output(vl, true);
-        if (r < 0)
-                return log_error_errno(r, "Failed to enable varlink fd passing for write: %m");
+                vl = _vl;
+        }
 
         _cleanup_close_ int reopened_fd = -EBADF;
 
@@ -5337,6 +5357,7 @@ int mountfsd_mount_image_fd(
 }
 
 int mountfsd_mount_image(
+                sd_varlink *vl,
                 const char *path,
                 int userns_fd,
                 const MountOptions *options,
@@ -5355,7 +5376,7 @@ int mountfsd_mount_image(
                 return log_debug_errno(errno, "Failed to open '%s': %m", path);
 
         _cleanup_(dissected_image_unrefp) DissectedImage *di = NULL;
-        r = mountfsd_mount_image_fd(image_fd, userns_fd, options, image_policy, verity, flags, &di);
+        r = mountfsd_mount_image_fd(vl, image_fd, userns_fd, options, image_policy, verity, flags, &di);
         if (r < 0)
                 return r;
 
@@ -5370,6 +5391,7 @@ int mountfsd_mount_image(
 }
 
 int mountfsd_mount_directory_fd(
+                sd_varlink *vl,
                 int directory_fd,
                 int userns_fd,
                 DissectImageFlags flags,
@@ -5383,18 +5405,14 @@ int mountfsd_mount_directory_fd(
         /* Pick one identity, not both, that makes no sense. */
         assert(!FLAGS_SET(flags, DISSECT_IMAGE_FOREIGN_UID|DISSECT_IMAGE_IDENTITY_UID));
 
-        _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL;
-        r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.MountFileSystem");
-        if (r < 0)
-                return log_error_errno(r, "Failed to connect to mountfsd: %m");
-
-        r = sd_varlink_set_allow_fd_passing_input(vl, true);
-        if (r < 0)
-                return log_error_errno(r, "Failed to enable varlink fd passing for read: %m");
+        _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL;
+        if (!vl) {
+                r = mountfsd_connect(&_vl);
+                if (r < 0)
+                        return r;
 
-        r = sd_varlink_set_allow_fd_passing_output(vl, true);
-        if (r < 0)
-                return log_error_errno(r, "Failed to enable varlink fd passing for write: %m");
+                vl = _vl;
+        }
 
         r = sd_varlink_push_dup_fd(vl, directory_fd);
         if (r < 0)
@@ -5441,6 +5459,7 @@ int mountfsd_mount_directory_fd(
 }
 
 int mountfsd_mount_directory(
+                sd_varlink *vl,
                 const char *path,
                 int userns_fd,
                 DissectImageFlags flags,
@@ -5453,10 +5472,11 @@ int mountfsd_mount_directory(
         if (directory_fd < 0)
                 return log_debug_errno(errno, "Failed to open '%s': %m", path);
 
-        return mountfsd_mount_directory_fd(directory_fd, userns_fd, flags, ret_mount_fd);
+        return mountfsd_mount_directory_fd(vl, directory_fd, userns_fd, flags, ret_mount_fd);
 }
 
 int mountfsd_make_directory_fd(
+                sd_varlink *vl,
                 int parent_fd,
                 const char *name,
                 mode_t mode,
@@ -5468,18 +5488,14 @@ int mountfsd_make_directory_fd(
         assert(parent_fd >= 0);
         assert(name);
 
-        _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL;
-        r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.MountFileSystem");
-        if (r < 0)
-                return log_error_errno(r, "Failed to connect to mountfsd: %m");
-
-        r = sd_varlink_set_allow_fd_passing_input(vl, true);
-        if (r < 0)
-                return log_error_errno(r, "Failed to enable varlink fd passing for read: %m");
+        _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL;
+        if (!vl) {
+                r = mountfsd_connect(&_vl);
+                if (r < 0)
+                        return r;
 
-        r = sd_varlink_set_allow_fd_passing_output(vl, true);
-        if (r < 0)
-                return log_error_errno(r, "Failed to enable varlink fd passing for write: %m");
+                vl = _vl;
+        }
 
         r = sd_varlink_push_dup_fd(vl, parent_fd);
         if (r < 0)
@@ -5519,6 +5535,7 @@ int mountfsd_make_directory_fd(
 }
 
 int mountfsd_make_directory(
+                sd_varlink *vl,
                 const char *path,
                 mode_t mode,
                 DissectImageFlags flags,
@@ -5540,7 +5557,7 @@ int mountfsd_make_directory(
         if (fd < 0)
                 return log_debug_errno(r, "Failed to open '%s': %m", parent);
 
-        return mountfsd_make_directory_fd(fd, dirname, mode, flags, ret_directory_fd);
+        return mountfsd_make_directory_fd(vl, fd, dirname, mode, flags, ret_directory_fd);
 }
 
 int copy_tree_at_foreign(int source_fd, int target_fd, int userns_fd) {
@@ -5600,6 +5617,7 @@ int remove_tree_foreign(const char *path, int userns_fd) {
 
         _cleanup_close_ int tree_fd = -EBADF;
         r = mountfsd_mount_directory(
+                        /* vl= */ NULL,
                         path,
                         userns_fd,
                         DISSECT_IMAGE_FOREIGN_UID,
@@ -5611,7 +5629,7 @@ int remove_tree_foreign(const char *path, int userns_fd) {
                         "rm-tree",
                         /* stdio_fds= */ NULL,
                         (int[]) { userns_fd, tree_fd }, 2,
-                        FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_LOG|FORK_REOPEN_LOG|FORK_WAIT,
+                        FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_REOPEN_LOG,
                         /* ret= */ NULL);
         if (r < 0)
                 return r;
@@ -5625,19 +5643,19 @@ int remove_tree_foreign(const char *path, int userns_fd) {
                                 userns_fd,
                                 /* root_fd= */ -EBADF);
                 if (r < 0) {
-                        log_error_errno(r, "Failed to join user namespace: %m");
+                        log_debug_errno(r, "Failed to join user namespace: %m");
                         _exit(EXIT_FAILURE);
                 }
 
                 _cleanup_close_ int dfd = fd_reopen(tree_fd, O_DIRECTORY|O_CLOEXEC);
                 if (dfd < 0) {
-                        log_error_errno(r, "Failed to reopen tree fd: %m");
+                        log_debug_errno(r, "Failed to reopen tree fd: %m");
                         _exit(EXIT_FAILURE);
                 }
 
                 r = rm_rf_children(dfd, REMOVE_PHYSICAL|REMOVE_SUBVOLUME|REMOVE_CHMOD, /* root_dev= */ NULL);
                 if (r < 0)
-                        log_warning_errno(r, "Failed to empty '%s' directory in foreign UID mode, ignoring: %m", path);
+                        log_debug_errno(r, "Failed to empty '%s' directory in foreign UID mode, ignoring: %m", path);
 
                 _exit(EXIT_SUCCESS);
         }
index aa8c6a2737b4bbdce8bba844da4749e7d9ceb255..09d7db5952b60ee880f7061e653dddcea9a314a9 100644 (file)
@@ -269,13 +269,23 @@ static inline const char* dissected_partition_fstype(const DissectedPartition *m
 
 int get_common_dissect_directory(char **ret);
 
-int mountfsd_mount_image_fd(int image_fd, int userns_fd, const MountOptions *options, const ImagePolicy *image_policy, const VeritySettings *verity, DissectImageFlags flags, DissectedImage **ret);
-int mountfsd_mount_image(const char *path, int userns_fd, const MountOptions *options, const ImagePolicy *image_policy, const VeritySettings *verity, DissectImageFlags flags, DissectedImage **ret);
-int mountfsd_mount_directory_fd(int directory_fd, int userns_fd, DissectImageFlags flags, int *ret_mount_fd);
-int mountfsd_mount_directory(const char *path, int userns_fd, DissectImageFlags flags, int *ret_mount_fd);
-
-int mountfsd_make_directory_fd(int parent_fd, const char *name, mode_t mode, DissectImageFlags flags, int *ret_directory_fd);
-int mountfsd_make_directory(const char *path, mode_t mode, DissectImageFlags flags, int *ret_directory_fd);
+int mountfsd_connect(sd_varlink **ret);
+
+/* All the calls below take a 'link' parameter, that may be an already established Varlink connection object
+ * towards systemd-mountfsd, previously created via mountfsd_connect(). This serves two purposes: first of
+ * all allows more efficient resource usage, as this allows recycling already allocated resources for
+ * multiple calls. Secondly, the user credentials are pinned at time of mountfsd_connect(), and the caller
+ * hence can drop privileges afterwards while keeping open the connection and still execute relevant
+ * operations under the original identity, until the connection is closed. The 'link' parameter may be passed
+ * as NULL in which case a short-lived connection is created, just to execute the requested operation. */
+
+int mountfsd_mount_image_fd(sd_varlink *vl, int image_fd, int userns_fd, const MountOptions *options, const ImagePolicy *image_policy, const VeritySettings *verity, DissectImageFlags flags, DissectedImage **ret);
+int mountfsd_mount_image(sd_varlink *vl, const char *path, int userns_fd, const MountOptions *options, const ImagePolicy *image_policy, const VeritySettings *verity, DissectImageFlags flags, DissectedImage **ret);
+int mountfsd_mount_directory_fd(sd_varlink *vl, int directory_fd, int userns_fd, DissectImageFlags flags, int *ret_mount_fd);
+int mountfsd_mount_directory(sd_varlink *vl, const char *path, int userns_fd, DissectImageFlags flags, int *ret_mount_fd);
+
+int mountfsd_make_directory_fd(sd_varlink *vl, int parent_fd, const char *name, mode_t mode, DissectImageFlags flags, int *ret_directory_fd);
+int mountfsd_make_directory(sd_varlink *vl, const char *path, mode_t mode, DissectImageFlags flags, int *ret_directory_fd);
 
 int copy_tree_at_foreign(int source_fd, int target_fd, int userns_fd);
 int remove_tree_foreign(const char *path, int userns_fd);
index 8b0beb21ade55ab18180696d76d3842b3052c35c..9cbc316319114fe586a1f3530ed5e9f2301a6e3b 100644 (file)
@@ -4,6 +4,8 @@
 #include <sys/mount.h>
 #include <unistd.h>
 
+#include "sd-varlink.h"
+
 #include "alloc-util.h"
 #include "chase.h"
 #include "dissect-image.h"
@@ -515,6 +517,7 @@ static const char *mount_name(MStackMount *m) {
 
 int mstack_open_images(
                 MStack *mstack,
+                sd_varlink *mountfsd_link,
                 int userns_fd,
                 const ImagePolicy *image_policy,
                 const ImageFilter *image_filter,
@@ -524,6 +527,16 @@ int mstack_open_images(
 
         assert(mstack);
 
+        _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL;
+        if (userns_fd >= 0 && !mountfsd_link) {
+                /* User a single connection for all mounts */
+                r = mountfsd_connect(&_vl);
+                if (r < 0)
+                        return r;
+
+                mountfsd_link = _vl;
+        }
+
         FOREACH_ARRAY(m, mstack->mounts, mstack->n_mounts) {
 
                 DissectImageFlags dissect_image_flags =
@@ -549,6 +562,7 @@ int mstack_open_images(
 
                         if (userns_fd >= 0) {
                                 r = mountfsd_mount_image_fd(
+                                                mountfsd_link,
                                                 m->what_fd,
                                                 userns_fd,
                                                 /* options= */ NULL,
@@ -642,6 +656,7 @@ int mstack_open_images(
 
                         if (userns_fd >= 0) {
                                 r = mountfsd_mount_directory_fd(
+                                                mountfsd_link,
                                                 m->what_fd,
                                                 userns_fd,
                                                 dissect_image_flags,
@@ -1070,6 +1085,7 @@ int mstack_apply(
                 int dir_fd,
                 const char *where,
                 const char *temp_mount_dir,
+                sd_varlink *link,
                 int userns_fd,
                 const ImagePolicy *image_policy,
                 const ImageFilter *image_filter,
@@ -1084,7 +1100,7 @@ int mstack_apply(
         if (r < 0)
                 return r;
 
-        r = mstack_open_images(&mstack, userns_fd, image_policy, image_filter, flags);
+        r = mstack_open_images(&mstack, link, userns_fd, image_policy, image_filter, flags);
         if (r < 0)
                 return r;
 
index e526f17d98316547f6255e48879c352a56c74d7f..b71ff86940b1ff39c843b7f46cb30f667baa9127 100644 (file)
@@ -51,12 +51,12 @@ MStack *mstack_free(MStack *mstack);
 DEFINE_TRIVIAL_CLEANUP_FUNC(MStack*, mstack_free);
 
 int mstack_load(const char *dir, int dir_fd, MStack **ret);
-int mstack_open_images(MStack *mstack, int userns_fd, const ImagePolicy *image_policy, const ImageFilter *image_filter, MStackFlags flags);
+int mstack_open_images(MStack *mstack, sd_varlink *mountfsd_link, int userns_fd, const ImagePolicy *image_policy, const ImageFilter *image_filter, MStackFlags flags);
 int mstack_make_mounts(MStack *mstack, const char *temp_mount_dir, MStackFlags flags);
 int mstack_bind_mounts(MStack *mstack, const char *where, int where_fd, MStackFlags flags, int *ret_root_fd);
 
 /* The four calls above in one */
-int mstack_apply(const char *dir, int dir_fd, const char *where, const char *temp_mount_dir, int userns_fd, const ImagePolicy *image_policy, const ImageFilter *image_filter, MStackFlags flags, int *ret_root_fd);
+int mstack_apply(const char *dir, int dir_fd, const char *where, const char *temp_mount_dir, sd_varlink *mountfsd_link, int userns_fd, const ImagePolicy *image_policy, const ImageFilter *image_filter, MStackFlags flags, int *ret_root_fd);
 
 int mstack_is_read_only(MStack *mstack);
 int mstack_is_foreign_uid_owned(MStack *mstack);
index 651d9bdf4d64256e81ee9dbde54f90d3815aee83..615f99eff10879ca7e1b84691a18fd0c4298ec1f 100644 (file)
@@ -57,8 +57,25 @@ static int make_pid_name(char **ret) {
         return 0;
 }
 
-int nsresource_allocate_userns(const char *name, uint64_t size) {
+int nsresource_connect(sd_varlink **ret) {
+        int r;
+
+        assert(ret);
+
         _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL;
+        r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.NamespaceResource");
+        if (r < 0)
+                return log_debug_errno(r, "Failed to connect to namespace resource manager: %m");
+
+        r = sd_varlink_set_allow_fd_passing_output(vl, true);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to enable varlink fd passing for write: %m");
+
+        *ret = TAKE_PTR(vl);
+        return 0;
+}
+
+int nsresource_allocate_userns(sd_varlink *vl, const char *name, uint64_t size) {
         _cleanup_close_ int userns_fd = -EBADF;
         _cleanup_free_ char *_name = NULL;
         const char *error_id;
@@ -77,13 +94,14 @@ int nsresource_allocate_userns(const char *name, uint64_t size) {
         if (size <= 0 || size > UINT64_C(0x100000000)) /* Note: the server actually only allows allocating 1 or 64K right now */
                 return -EINVAL;
 
-        r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.NamespaceResource");
-        if (r < 0)
-                return log_debug_errno(r, "Failed to connect to namespace resource manager: %m");
+        _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL;
+        if (!vl) {
+                r = nsresource_connect(&_vl);
+                if (r < 0)
+                        return r;
 
-        r = sd_varlink_set_allow_fd_passing_output(vl, true);
-        if (r < 0)
-                return log_debug_errno(r, "Failed to enable varlink fd passing for write: %m");
+                vl = _vl;
+        }
 
         userns_fd = userns_acquire_empty();
         if (userns_fd < 0)
@@ -113,8 +131,7 @@ int nsresource_allocate_userns(const char *name, uint64_t size) {
         return TAKE_FD(userns_fd);
 }
 
-int nsresource_register_userns(const char *name, int userns_fd) {
-        _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL;
+int nsresource_register_userns(sd_varlink *vl, const char *name, int userns_fd) {
         _cleanup_close_ int _userns_fd = -EBADF;
         _cleanup_free_ char *_name = NULL;
         const char *error_id;
@@ -138,13 +155,14 @@ int nsresource_register_userns(const char *name, int userns_fd) {
                 userns_fd = _userns_fd;
         }
 
-        r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.NamespaceResource");
-        if (r < 0)
-                return log_debug_errno(r, "Failed to connect to namespace resource manager: %m");
+        _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL;
+        if (!vl) {
+                r = nsresource_connect(&_vl);
+                if (r < 0)
+                        return r;
 
-        r = sd_varlink_set_allow_fd_passing_output(vl, true);
-        if (r < 0)
-                return log_debug_errno(r, "Failed to enable varlink fd passing for write: %m");
+                vl = _vl;
+        }
 
         userns_fd_idx = sd_varlink_push_dup_fd(vl, userns_fd);
         if (userns_fd_idx < 0)
@@ -169,8 +187,7 @@ int nsresource_register_userns(const char *name, int userns_fd) {
         return 0;
 }
 
-int nsresource_add_mount(int userns_fd, int mount_fd) {
-        _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL;
+int nsresource_add_mount(sd_varlink *vl, int userns_fd, int mount_fd) {
         _cleanup_close_ int _userns_fd = -EBADF;
         int r, userns_fd_idx, mount_fd_idx;
         const char *error_id;
@@ -185,13 +202,14 @@ int nsresource_add_mount(int userns_fd, int mount_fd) {
                 userns_fd = _userns_fd;
         }
 
-        r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.NamespaceResource");
-        if (r < 0)
-                return log_error_errno(r, "Failed to connect to namespace resource manager: %m");
+        _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL;
+        if (!vl) {
+                r = nsresource_connect(&_vl);
+                if (r < 0)
+                        return r;
 
-        r = sd_varlink_set_allow_fd_passing_output(vl, true);
-        if (r < 0)
-                return log_error_errno(r, "Failed to enable varlink fd passing for write: %m");
+                vl = _vl;
+        }
 
         userns_fd_idx = sd_varlink_push_dup_fd(vl, userns_fd);
         if (userns_fd_idx < 0)
@@ -221,8 +239,7 @@ int nsresource_add_mount(int userns_fd, int mount_fd) {
         return 1;
 }
 
-int nsresource_add_cgroup(int userns_fd, int cgroup_fd) {
-        _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL;
+int nsresource_add_cgroup(sd_varlink *vl, int userns_fd, int cgroup_fd) {
         _cleanup_close_ int _userns_fd = -EBADF;
         int r, userns_fd_idx, cgroup_fd_idx;
         const char *error_id;
@@ -237,13 +254,14 @@ int nsresource_add_cgroup(int userns_fd, int cgroup_fd) {
                 userns_fd = _userns_fd;
         }
 
-        r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.NamespaceResource");
-        if (r < 0)
-                return log_debug_errno(r, "Failed to connect to namespace resource manager: %m");
+        _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL;
+        if (!vl) {
+                r = nsresource_connect(&_vl);
+                if (r < 0)
+                        return r;
 
-        r = sd_varlink_set_allow_fd_passing_output(vl, true);
-        if (r < 0)
-                return log_debug_errno(r, "Failed to enable varlink fd passing for write: %m");
+                vl = _vl;
+        }
 
         userns_fd_idx = sd_varlink_push_dup_fd(vl, userns_fd);
         if (userns_fd_idx < 0)
@@ -287,6 +305,7 @@ static void interface_params_done(InterfaceParams *p) {
 }
 
 int nsresource_add_netif_veth(
+                sd_varlink *vl,
                 int userns_fd,
                 int netns_fd,
                 const char *namespace_ifname,
@@ -294,7 +313,6 @@ int nsresource_add_netif_veth(
                 char **ret_namespace_ifname) {
 
         _cleanup_close_ int _userns_fd = -EBADF, _netns_fd = -EBADF;
-        _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL;
         int r, userns_fd_idx, netns_fd_idx;
         const char *error_id;
 
@@ -314,13 +332,14 @@ int nsresource_add_netif_veth(
                 netns_fd = _netns_fd;
         }
 
-        r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.NamespaceResource");
-        if (r < 0)
-                return log_debug_errno(r, "Failed to connect to namespace resource manager: %m");
+        _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL;
+        if (!vl) {
+                r = nsresource_connect(&_vl);
+                if (r < 0)
+                        return r;
 
-        r = sd_varlink_set_allow_fd_passing_output(vl, true);
-        if (r < 0)
-                return log_debug_errno(r, "Failed to enable varlink fd passing for write: %m");
+                vl = _vl;
+        }
 
         userns_fd_idx = sd_varlink_push_dup_fd(vl, userns_fd);
         if (userns_fd_idx < 0)
@@ -368,11 +387,11 @@ int nsresource_add_netif_veth(
 }
 
 int nsresource_add_netif_tap(
+                sd_varlink *vl,
                 int userns_fd,
                 char **ret_host_ifname) {
 
         _cleanup_close_ int _userns_fd = -EBADF;
-        _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL;
         int r, userns_fd_idx;
         const char *error_id;
 
@@ -384,13 +403,14 @@ int nsresource_add_netif_tap(
                 userns_fd = _userns_fd;
         }
 
-        r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.NamespaceResource");
-        if (r < 0)
-                return log_debug_errno(r, "Failed to connect to namespace resource manager: %m");
+        _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL;
+        if (!vl) {
+                r = nsresource_connect(&_vl);
+                if (r < 0)
+                        return r;
 
-        r = sd_varlink_set_allow_fd_passing_output(vl, true);
-        if (r < 0)
-                return log_debug_errno(r, "Failed to enable varlink fd passing for write: %m");
+                vl = _vl;
+        }
 
         r = sd_varlink_set_allow_fd_passing_input(vl, true);
         if (r < 0)
index 136b1f85ac12c8f9aa545efa894629c4cce424b2..93957a10c8237fc45c4a3474771df59e17cf170f 100644 (file)
@@ -7,9 +7,19 @@
 #define NSRESOURCE_UIDS_64K 0x10000U
 #define NSRESOURCE_UIDS_1 1U
 
-int nsresource_allocate_userns(const char *name, uint64_t size);
-int nsresource_register_userns(const char *name, int userns_fd);
-int nsresource_add_mount(int userns_fd, int mount_fd);
-int nsresource_add_cgroup(int userns_fd, int cgroup_fd);
-int nsresource_add_netif_veth(int userns_fd, int netns_fd, const char *namespace_ifname, char **ret_host_ifname, char **ret_namespace_ifname);
-int nsresource_add_netif_tap(int userns_fd, char **ret_host_ifname);
+int nsresource_connect(sd_varlink **ret);
+
+/* All the calls below take a 'link' parameter, that may be an already established Varlink connection object
+ * towards systemd-nsresourced, previously created via nsresource_connect(). This serves two purposes: first
+ * of all allows more efficient resource usage, as this allows recycling already allocated resources for
+ * multiple calls. Secondly, the user credentials are pinned at time of nsresource_connect(), and the caller
+ * hence can drop privileges afterwards while keeping open the connection and still execute relevant
+ * operations under the original identity, until the connection is closed. The 'link' parameter may be passed
+ * as NULL in which case a short-lived connection is created, just to execute the requested operation. */
+
+int nsresource_allocate_userns(sd_varlink *vl, const char *name, uint64_t size);
+int nsresource_register_userns(sd_varlink *vl, const char *name, int userns_fd);
+int nsresource_add_mount(sd_varlink *vl, int userns_fd, int mount_fd);
+int nsresource_add_cgroup(sd_varlink *vl, int userns_fd, int cgroup_fd);
+int nsresource_add_netif_veth(sd_varlink *vl, int userns_fd, int netns_fd, const char *namespace_ifname, char **ret_host_ifname, char **ret_namespace_ifname);
+int nsresource_add_netif_tap(sd_varlink *vl, int userns_fd, char **ret_host_ifname);
index 15400af511e8b2d829344156fe79d22377dd2134..7c370fcbe979ada1ed1e954897e9fc351b60662e 100644 (file)
@@ -90,6 +90,7 @@ TEST(mstack) {
 
                         ASSERT_OK(mstack_open_images(
                                                   mstack,
+                                                  /* mountfsd_link= */ NULL,
                                                   /* userns_fd= */ -EBADF,
                                                   /* image_policy= */ NULL,
                                                   /* image_filter= */ NULL,
index aacd776d7ffbba7d21c5d54fc13260c0ba1e7dab..b8845b8ffebc97e2447fed74eb64058ccf15b294 100644 (file)
@@ -2,6 +2,8 @@
 
 #include <net/if.h>
 
+#include "sd-varlink.h"
+
 #include "errno-util.h"
 #include "fd-util.h"
 #include "namespace-util.h"
@@ -16,13 +18,18 @@ TEST(delegatetap) {
                 return (void) log_tests_skipped_errno(userns_fd, "User namespaces not available");
         ASSERT_OK(userns_fd);
 
-        r = nsresource_register_userns("foobar", userns_fd);
+        _cleanup_(sd_varlink_unrefp) sd_varlink *link = NULL;
+        r = nsresource_connect(&link);
         if (ERRNO_IS_NEG_DISCONNECT(r) || r == -ENOENT || ERRNO_IS_NEG_NOT_SUPPORTED(r))
                 return (void) log_tests_skipped_errno(r, "systemd-nsresourced cannot be reached");
+
+        r = nsresource_register_userns(link, "foobar", userns_fd);
+        if (ERRNO_IS_NEG_NOT_SUPPORTED(r))
+                return (void) log_tests_skipped_errno(r, "systemd-nsresourced does not work");
         ASSERT_OK(r);
 
         _cleanup_free_ char *ifname = NULL;
-        _cleanup_close_ int tap_fd = nsresource_add_netif_tap(userns_fd, &ifname);
+        _cleanup_close_ int tap_fd = nsresource_add_netif_tap(link, userns_fd, &ifname);
         if (ERRNO_IS_NEG_NOT_SUPPORTED(tap_fd))
                 return (void) log_tests_skipped_errno(tap_fd, "tap device support not available");
         ASSERT_OK(tap_fd);
index 0c957b48aaeacde38cea41da1d1b353df5be1d8d..7cbe7fd5f93cada6ebbf294d7f6df90006db3cff 100644 (file)
@@ -12,6 +12,7 @@
 #include "sd-daemon.h"
 #include "sd-event.h"
 #include "sd-id128.h"
+#include "sd-varlink.h"
 
 #include "alloc-util.h"
 #include "architecture.h"
@@ -2041,11 +2042,16 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
                         if (asprintf(&userns_name, "vmspawn-" PID_FMT "-%s", getpid_cached(), arg_machine) < 0)
                                 return log_oom();
 
-                        r = nsresource_register_userns(userns_name, delegate_userns_fd);
+                        _cleanup_(sd_varlink_unrefp) sd_varlink *nsresource_link = NULL;
+                        r = nsresource_connect(&nsresource_link);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to connect to nsresourced: %m");
+
+                        r = nsresource_register_userns(nsresource_link, userns_name, delegate_userns_fd);
                         if (r < 0)
                                 return log_error_errno(r, "Failed to register user namespace with systemd-nsresourced: %m");
 
-                        tap_fd = nsresource_add_netif_tap(delegate_userns_fd, /* ret_host_ifname= */ NULL);
+                        tap_fd = nsresource_add_netif_tap(nsresource_link, delegate_userns_fd, /* ret_host_ifname= */ NULL);
                         if (tap_fd < 0)
                                 return log_error_errno(tap_fd, "Failed to allocate network tap device: %m");