]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
mntfsd: add api to mount dirs for containers
authorLennart Poettering <lennart@poettering.net>
Fri, 8 Nov 2024 11:15:16 +0000 (12:15 +0100)
committerLennart Poettering <lennart@poettering.net>
Thu, 23 Jan 2025 20:48:02 +0000 (21:48 +0100)
systemd-mountfsd so far provided a MountImage() API call for mounting a
disk image and returning a set of mount fds. This complements the API
with a new MountDirectory() API call, that operates on a directory
instead of an image file. Now, what makes this interesting is that it
applies an idmapping from the foreign UID range to the provided target
userns – and in which case unpriveleged operation is allowed (well,
under some conditions: in particular the client must own a parent dir of
the provided path).

This allows container managers to run fully unprivileged from
directories – as long as those directories are owned by the foreign UID
range. Basic operation is like this:

1. acquire a transient userns from systemd-nsresourced with 64K users
2. ask systemd-mountfsd for an idmapped mount of the container dir
   matching that userns
3. join the userns and bind the mount fd as root.

Note that we have to drop various sandboxing knobs from the mountfsd
service file for this to work, since the kernel's security checks that
try to ensure than an obstructed /proc/ cannot be circumvented via
mounting a new procfs will otherwise prohibit mountfsd to duplicate the
mounts properly.

src/mountfsd/io.systemd.mount-file-system.policy
src/mountfsd/mountwork.c
src/shared/varlink-io.systemd.MountFileSystem.c
units/systemd-mountfsd.service.in

index 6a151eb437476986cee09b8fcdd41b68f32ee907..78613bfdaf6bdfc57ac7cd6c7e39cc4a3d5d0cc0 100644 (file)
 
                 <annotate key="org.freedesktop.policykit.imply">io.systemd.mount-file-system.mount-image-privately</annotate>
         </action>
+
+        <!-- Allow mounting directories into the host user namespace -->
+        <action id="io.systemd.mount-file-system.mount-directory">
+                <!-- If the directory is owned by the user (or by the foreign UID range, with a parent
+                     directory owned by the user), make little restrictions -->
+                <description gettext-domain="systemd">Allow mounting of directory</description>
+                <message gettext-domain="systemd">Authentication is required for an application to mount directory $(directory).</message>
+                <defaults>
+                        <allow_any>auth_admin_keep</allow_any>
+                        <allow_inactive>auth_admin_keep</allow_inactive>
+                        <allow_active>yes</allow_active>
+                </defaults>
+        </action>
+
+        <action id="io.systemd.mount-file-system.mount-untrusted-directory">
+                <!-- If the directory is owned by an other user, require authentication -->
+                <description gettext-domain="systemd">Allow mounting of untrusted directory</description>
+                <message gettext-domain="systemd">Authentication is required for an application to mount directory $(directory) which is not owned by the user.</message>
+                <defaults>
+                        <allow_any>auth_admin</allow_any>
+                        <allow_inactive>auth_admin</allow_inactive>
+                        <allow_active>auth_admin</allow_active>
+                </defaults>
+
+                <annotate key="org.freedesktop.policykit.imply">io.systemd.mount-file-system.mount-directory</annotate>
+        </action>
+
+        <!-- Allow mounting directories into a private user namespace -->
+        <action id="io.systemd.mount-file-system.mount-directory-privately">
+                <description gettext-domain="systemd">Allow private mounting of directory</description>
+                <message gettext-domain="systemd">Authentication is required for an application to privately mount directory $(directory).</message>
+                <defaults>
+                        <allow_any>yes</allow_any>
+                        <allow_inactive>yes</allow_inactive>
+                        <allow_active>yes</allow_active>
+                </defaults>
+        </action>
+
+        <action id="io.systemd.mount-file-system.mount-untrusted-directory-privately">
+                <description gettext-domain="systemd">Allow private mounting of untrusted directory</description>
+                <message gettext-domain="systemd">Authentication is required for an application to privately mount directory $(directory) which is not owned by the user.</message>
+                <defaults>
+                        <allow_any>auth_admin</allow_any>
+                        <allow_inactive>auth_admin</allow_inactive>
+                        <allow_active>auth_admin</allow_active>
+                </defaults>
+
+                <annotate key="org.freedesktop.policykit.imply">io.systemd.mount-file-system.mount-directory-privately</annotate>
+        </action>
 </policyconfig>
index 31f2d47546c38146170539f29508e48844ae0a59..2fd610f5d65202d49bcdd715008efe5c0aab585c 100644 (file)
@@ -1,5 +1,10 @@
 /* SPDX-License-Identifier: LGPL-2.1-or-later */
 
+#include <sys/mount.h>
+#if WANT_LINUX_FS_H
+#include <linux/fs.h>
+#endif
+
 #include "sd-daemon.h"
 #include "sd-varlink.h"
 
 #include "json-util.h"
 #include "main-func.h"
 #include "missing_loop.h"
+#include "missing_mount.h"
+#include "missing_syscall.h"
 #include "namespace-util.h"
 #include "nsresource.h"
 #include "nulstr-util.h"
 #include "os-util.h"
 #include "process-util.h"
 #include "stat-util.h"
+#include "string-table.h"
+#include "uid-classification.h"
+#include "uid-range.h"
 #include "user-util.h"
 #include "varlink-io.systemd.MountFileSystem.h"
 #include "varlink-util.h"
@@ -532,6 +542,349 @@ static int vl_method_mount_image(
                         SD_JSON_BUILD_PAIR_CONDITION(!sd_id128_is_null(di->image_uuid), "imageUuid", SD_JSON_BUILD_UUID(di->image_uuid)));
 }
 
+typedef enum MountMapMode {
+        MOUNT_MAP_AUTO = 0,     /* determine automatically from image and caller */
+        MOUNT_MAP_ROOT,         /* map caller's UID to root in namespace (map 1 UID only) */
+        MOUNT_MAP_FOREIGN,      /* map foreign UID range to base in namespace (map 64K) */
+        MOUNT_MAP_IDENTITY,     /* apply identity mapping (map 64K) */
+        _MOUNT_MAP_MODE_MAX,
+        _MOUNT_MAP_MODE_INVALID = -EINVAL,
+} MountMapMode;
+
+static const char *const mount_map_mode_table[_MOUNT_MAP_MODE_MAX] = {
+        [MOUNT_MAP_AUTO]     = "auto",
+        [MOUNT_MAP_ROOT]     = "root",
+        [MOUNT_MAP_FOREIGN]  = "foreign",
+        [MOUNT_MAP_IDENTITY] = "identity",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(mount_map_mode, MountMapMode);
+
+typedef struct MountDirectoryParameters {
+        MountMapMode mode;
+        unsigned directory_fd_idx;
+        unsigned userns_fd_idx;
+        int read_only;
+} MountDirectoryParameters;
+
+typedef enum DirectoryOwnership {
+        DIRECTORY_IS_ROOT_PEER_OWNED,  /* This is returned if the directory is owned by the root user and the peer is root */
+        DIRECTORY_IS_ROOT_OWNED,       /* This is returned if the directory is owned by the root user (and the peer user is not root) */
+        DIRECTORY_IS_PEER_OWNED,       /* This is returned if the directory is owned by the peer user (who is not root) */
+        DIRECTORY_IS_FOREIGN_OWNED,    /* This is returned if the directory is owned by the foreign UID range */
+        DIRECTORY_IS_OTHERWISE_OWNED,  /* This is returned if the directory is owned by something else */
+        _DIRECTORY_OWNERSHIP_MAX,
+        _DIRECTORY_OWNERSHIP_ERRNO_MAX = -ERRNO_MAX, /* Guarantee the whole negative errno range fits */
+} DirectoryOwnership;
+
+static MountMapMode default_mount_map_mode(DirectoryOwnership ownership) {
+        /* Derives a suitable mapping mode from the ownership of the base tree */
+
+        switch (ownership) {
+        case DIRECTORY_IS_PEER_OWNED:
+                return MOUNT_MAP_ROOT;     /* Map the peer's UID to root in the container */
+
+        case DIRECTORY_IS_FOREIGN_OWNED:
+                return MOUNT_MAP_FOREIGN;  /* Map the foreign UID range to the container's UID range */
+
+        case DIRECTORY_IS_ROOT_PEER_OWNED:
+        case DIRECTORY_IS_ROOT_OWNED:
+        case DIRECTORY_IS_OTHERWISE_OWNED:
+                return MOUNT_MAP_IDENTITY; /* Don't map */
+
+        default:
+                return _MOUNT_MAP_MODE_INVALID;
+        }
+}
+
+static JSON_DISPATCH_ENUM_DEFINE(dispatch_mount_directory_mode, MountMapMode, mount_map_mode_from_string);
+
+static DirectoryOwnership validate_directory_fd(int fd, uid_t peer_uid) {
+        int r, fl;
+
+        assert(fd >= 0);
+
+        /* Checks if the specified directory fd looks sane. Returns a DirectoryOwnership that categorizes the
+         * ownership situation in comparison to the peer's UID.
+         *
+         * Note one key difference to image validation (as implemented above): for regular files if the
+         * client provided us with an open fd it implies the client has access, as well as what kind of
+         * access (i.e. ro or rw). But for directories this doesn't work the same way, as directories are
+         * always opened read-only only. Hence we use a different mechanism to validate access to them: we
+         * check if the directory is owned by the peer UID or by the foreign UID range (in the latter case
+         * one of the parent directories must be owned by the peer though). */
+
+        struct stat st;
+        if (fstat(fd, &st) < 0)
+                return log_debug_errno(errno, "Failed to stat() directory fd: %m");
+
+        r = stat_verify_directory(&st);
+        if (r < 0)
+                return r;
+
+        fl = fd_verify_safe_flags_full(fd, O_DIRECTORY);
+        if (fl < 0)
+                return log_debug_errno(fl, "Directory file descriptor has unsafe flags set: %m");
+
+        if (st.st_uid == 0) {
+                if (peer_uid == 0) {
+                        log_debug("Directory file descriptor points to root owned directory, who is also the peer.");
+                        return DIRECTORY_IS_ROOT_PEER_OWNED;
+                }
+                log_debug("Directory file descriptor points to root owned directory.");
+                return DIRECTORY_IS_ROOT_OWNED;
+        }
+        if (st.st_uid == peer_uid) {
+                log_debug("Directory file descriptor points to peer owned directory.");
+                return DIRECTORY_IS_PEER_OWNED;
+        }
+
+        /* For bind mounted directories we check if they are either owned by the client's UID, or by the
+         * foreign UID set, but in that case the parent directory must be owned by the client's UID, or some
+         * directory iteratively up the chain */
+
+        _cleanup_close_ int parent_fd = -EBADF;
+        unsigned n_level;
+        for (n_level = 0; n_level < 16; n_level++) {
+                /* Stop iteration if we find a directory up the tree that is neither owned by the user, nor is from the foreign UID range */
+                if (!uid_is_foreign(st.st_uid) || !gid_is_foreign(st.st_gid)) {
+                        log_debug("Directory file descriptor points to directory which itself or its parents is neither owned by foreign UID range nor by the user.");
+                        return DIRECTORY_IS_OTHERWISE_OWNED;
+                }
+
+                /* If the peer is root, then it doesn't matter if we find a parent owned by root, let's shortcut things. */
+                if (peer_uid == 0) {
+                        log_debug("Directory file descriptor is owned by foreign UID range, and peer is root.");
+                        return DIRECTORY_IS_FOREIGN_OWNED;
+                }
+
+                /* Go one level up */
+                _cleanup_close_ int new_parent_fd = openat(fd, "..", O_DIRECTORY|O_PATH|O_CLOEXEC);
+                if (new_parent_fd < 0)
+                        return log_debug_errno(errno, "Failed to open parent directory of directory file descriptor: %m");
+
+                struct stat new_st;
+                if (fstat(new_parent_fd, &new_st) < 0)
+                        return log_debug_errno(errno, "Failed to stat parent directory of directory file descriptor: %m");
+
+                /* Safety check to see if we hit the root dir */
+                if (stat_inode_same(&st, &new_st)) {
+                        log_debug("Directory file descriptor is owned by foreign UID range, but didn't find parent directory that is owned by peer among ancestors.");
+                        return DIRECTORY_IS_OTHERWISE_OWNED;
+                }
+
+                if (new_st.st_uid == peer_uid) { /* Parent inode is owned by the peer. That's good! Everything's fine. */
+                        log_debug("Directory file descriptor is owned by foreign UID range, and ancestor is owned by peer.");
+                        return DIRECTORY_IS_FOREIGN_OWNED;
+                }
+
+                close_and_replace(parent_fd, new_parent_fd);
+                st = new_st;
+        }
+
+        log_debug("Failed to find peer owned parent directory after %u levels, refusing.", n_level);
+        return DIRECTORY_IS_OTHERWISE_OWNED;
+}
+
+static int vl_method_mount_directory(
+                sd_varlink *link,
+                sd_json_variant *parameters,
+                sd_varlink_method_flags_t flags,
+                void *userdata) {
+
+        static const sd_json_dispatch_field dispatch_table[] = {
+                { "mode",                        SD_JSON_VARIANT_STRING,   dispatch_mount_directory_mode, offsetof(MountDirectoryParameters, mode),             0                 },
+                { "directoryFileDescriptor",     SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint,         offsetof(MountDirectoryParameters, directory_fd_idx), SD_JSON_MANDATORY },
+                { "userNamespaceFileDescriptor", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint,         offsetof(MountDirectoryParameters, userns_fd_idx),    0                 },
+                { "readOnly",                    SD_JSON_VARIANT_BOOLEAN,  sd_json_dispatch_tristate,     offsetof(MountDirectoryParameters, read_only),        0                 },
+                VARLINK_DISPATCH_POLKIT_FIELD,
+                {}
+        };
+
+        MountDirectoryParameters p = {
+                .mode = MOUNT_MAP_AUTO,
+                .directory_fd_idx = UINT_MAX,
+                .userns_fd_idx = UINT_MAX,
+                .read_only = -1,
+        };
+        _cleanup_close_ int directory_fd = -EBADF, userns_fd = -EBADF;
+        Hashmap **polkit_registry = ASSERT_PTR(userdata);
+        int r;
+
+        r = sd_varlink_dispatch(link, parameters, dispatch_table, &p);
+        if (r != 0)
+                return r;
+
+        if (p.directory_fd_idx == UINT_MAX)
+                return sd_varlink_error_invalid_parameter_name(link, "directoryFileDescriptor");
+
+        directory_fd = sd_varlink_peek_dup_fd(link, p.directory_fd_idx);
+        if (directory_fd < 0)
+                return log_debug_errno(directory_fd, "Failed to peek directory fd from client: %m");
+
+        if (p.userns_fd_idx != UINT_MAX) {
+                userns_fd = sd_varlink_peek_dup_fd(link, p.userns_fd_idx);
+                if (userns_fd < 0)
+                        return log_debug_errno(userns_fd, "Failed to peek user namespace fd from client: %m");
+        }
+
+        uid_t peer_uid;
+        r = sd_varlink_get_peer_uid(link, &peer_uid);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to get client UID: %m");
+
+        DirectoryOwnership owned_by = validate_directory_fd(directory_fd, peer_uid);
+        if (owned_by < 0)
+                return owned_by;
+
+        r = validate_userns(link, &userns_fd);
+        if (r != 0)
+                return r;
+
+        /* If no mode is specified, pick sensible default */
+        if (p.mode <= 0) {
+                p.mode = default_mount_map_mode(owned_by);
+                assert(p.mode > 0);
+        }
+
+        _cleanup_free_ char *directory_path = NULL;
+        (void) fd_get_path(directory_fd, &directory_path);
+
+        log_debug("Mounting '%s' with mapping mode: %s", strna(directory_path), mount_map_mode_to_string(p.mode));
+
+        const char *polkit_details[] = {
+                "read_only", one_zero(p.read_only > 0),
+                "directory", strna(directory_path),
+                NULL,
+        };
+
+        const char *polkit_action, *polkit_untrusted_action;
+        PolkitFlags polkit_flags;
+        if (userns_fd < 0) {
+                /* Mount into the host user namespace */
+                polkit_action = "io.systemd.mount-file-system.mount-directory";
+                polkit_untrusted_action = "io.systemd.mount-file-system.mount-untrusted-directory";
+                polkit_flags = 0;
+        } else {
+                /* Mount into a private user namespace */
+                polkit_action = "io.systemd.mount-file-system.mount-directory-privately";
+                polkit_untrusted_action = "io.systemd.mount-file-system.mount-untrusted-directory-privately";
+
+                /* If polkit is not around, let's allow mounting authenticated images by default */
+                polkit_flags = POLKIT_DEFAULT_ALLOW;
+        }
+
+        /* We consider a directory "trusted" if it is owned by the peer or the foreign UID range */
+        bool trusted_directory = IN_SET(owned_by, DIRECTORY_IS_ROOT_PEER_OWNED, DIRECTORY_IS_PEER_OWNED, DIRECTORY_IS_FOREIGN_OWNED);
+
+        /* Let's definitely acquire the regular action privilege, for mounting properly signed images */
+        r = varlink_verify_polkit_async_full(
+                        link,
+                        /* bus= */ NULL,
+                        trusted_directory ? polkit_action : polkit_untrusted_action,
+                        polkit_details,
+                        /* good_user= */ UID_INVALID,
+                        trusted_directory ? polkit_flags : 0,
+                        polkit_registry);
+        if (r <= 0)
+                return r;
+
+        /* Generate the common dissection directory here. We are not going to use it, but the clients might,
+         * and they likely are unprivileged, hence cannot create it themselves. Hence let's just create it
+         * here, if it is missing. */
+        r = get_common_dissect_directory(NULL);
+        if (r < 0)
+                return r;
+
+        _cleanup_close_ int mount_fd = open_tree(directory_fd, "", OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC|AT_SYMLINK_NOFOLLOW|AT_EMPTY_PATH);
+        if (mount_fd < 0)
+                return log_debug_errno(errno, "Failed to issue open_tree() of provided directory '%s': %m", strna(directory_path));
+
+        if (p.read_only > 0 && mount_setattr(
+                            mount_fd, "", AT_EMPTY_PATH,
+                            &(struct mount_attr) {
+                                    .attr_set = MOUNT_ATTR_RDONLY,
+                            }, MOUNT_ATTR_SIZE_VER0) < 0)
+                return log_debug_errno(errno, "Failed to enable read-only mode: %m");
+
+        if (p.mode != MOUNT_MAP_IDENTITY) {
+                uid_t start;
+
+                if (userns_fd >= 0) {
+                        _cleanup_(uid_range_freep) UIDRange *uid_range_outside = NULL, *uid_range_inside = NULL, *gid_range_outside = NULL, *gid_range_inside = NULL;
+                        r = uid_range_load_userns_by_fd(userns_fd, UID_RANGE_USERNS_OUTSIDE, &uid_range_outside);
+                        if (r < 0)
+                                return log_debug_errno(r, "Failed to load outside UID range of provided userns: %m");
+                        r = uid_range_load_userns_by_fd(userns_fd, UID_RANGE_USERNS_INSIDE, &uid_range_inside);
+                        if (r < 0)
+                                return log_debug_errno(r, "Failed to load inside UID range of provided userns: %m");
+                        r = uid_range_load_userns_by_fd(userns_fd, GID_RANGE_USERNS_OUTSIDE, &gid_range_outside);
+                        if (r < 0)
+                                return log_debug_errno(r, "Failed to load outside GID range of provided userns: %m");
+                        r = uid_range_load_userns_by_fd(userns_fd, GID_RANGE_USERNS_INSIDE, &gid_range_inside);
+                        if (r < 0)
+                                return log_debug_errno(r, "Failed to load inside GID range of provided userns: %m");
+
+                        /* Be very strict for now */
+                        if (!uid_range_equal(uid_range_outside, gid_range_outside) ||
+                            !uid_range_equal(uid_range_inside, gid_range_inside) ||
+                            uid_range_outside->n_entries != 1 ||
+                            uid_range_outside->entries[0].nr != 0x10000 ||
+                            uid_range_inside->n_entries != 1 ||
+                            uid_range_inside->entries[0].start != 0 ||
+                            uid_range_inside->entries[0].nr != 0x10000)
+                                return sd_varlink_error_invalid_parameter_name(link, "userNamespaceFileDescriptor");
+
+                        start = uid_range_outside->entries[0].start;
+                } else
+                        start = 0;
+
+                _cleanup_free_ char *new_uid_map = NULL;
+                switch (p.mode) {
+                case MOUNT_MAP_ROOT:
+                        r = strextendf(&new_uid_map, UID_FMT " " UID_FMT " " UID_FMT,
+                                       peer_uid, start, (uid_t) 1);
+                        break;
+                case MOUNT_MAP_FOREIGN:
+                        r = strextendf(&new_uid_map, UID_FMT " " UID_FMT " " UID_FMT,
+                                       (uid_t) FOREIGN_UID_MIN, start, (uid_t) 0x10000);
+                        break;
+                default:
+                        assert_not_reached();
+                }
+                if (r < 0)
+                        return r;
+
+                _cleanup_close_ int idmap_userns_fd = userns_acquire(new_uid_map, new_uid_map);
+                if (idmap_userns_fd < 0)
+                        return log_debug_errno(idmap_userns_fd, "Failed to acquire user namespace for id mapping: %m");
+
+                if (mount_setattr(mount_fd, "", AT_EMPTY_PATH,
+                                  &(struct mount_attr) {
+                                          .attr_set = MOUNT_ATTR_IDMAP,
+                                          .userns_fd = idmap_userns_fd,
+                                          .propagation = MS_PRIVATE,
+                                  }, MOUNT_ATTR_SIZE_VER0) < 0)
+                        return log_debug_errno(errno, "Failed to enable id mapping: %m");
+        }
+
+        if (userns_fd >= 0) {
+                r = nsresource_add_mount(userns_fd, mount_fd);
+                if (r < 0)
+                        return r;
+        }
+
+        int fd_idx = sd_varlink_push_fd(link, mount_fd);
+        if (fd_idx < 0)
+                return fd_idx;
+
+        TAKE_FD(mount_fd);
+
+        return sd_varlink_replybo(
+                        link,
+                        SD_JSON_BUILD_PAIR("mountFileDescriptor", SD_JSON_BUILD_INTEGER(fd_idx)));
+}
+
 static int process_connection(sd_varlink_server *server, int _fd) {
         _cleanup_close_ int fd = TAKE_FD(_fd); /* always take possession */
         _cleanup_(sd_varlink_close_unrefp) sd_varlink *vl = NULL;
@@ -601,7 +954,8 @@ static int run(int argc, char *argv[]) {
 
         r = sd_varlink_server_bind_method_many(
                         server,
-                        "io.systemd.MountFileSystem.MountImage", vl_method_mount_image);
+                        "io.systemd.MountFileSystem.MountImage",     vl_method_mount_image,
+                        "io.systemd.MountFileSystem.MountDirectory", vl_method_mount_directory);
         if (r < 0)
                 return log_error_errno(r, "Failed to bind methods: %m");
 
index 43b812b0d234c9a42580f8ece6d1915db2583aa7..78e7ce06ab4259567072b742addda620bb50871a 100644 (file)
@@ -49,6 +49,31 @@ static SD_VARLINK_DEFINE_METHOD(
                 SD_VARLINK_DEFINE_OUTPUT(imageName, SD_VARLINK_STRING, SD_VARLINK_NULLABLE),
                 SD_VARLINK_DEFINE_OUTPUT(imageUuid, SD_VARLINK_STRING, SD_VARLINK_NULLABLE));
 
+static SD_VARLINK_DEFINE_ENUM_TYPE(
+                MountMapMode,
+                SD_VARLINK_FIELD_COMMENT("Map the caller's UID to root in the user namespace, do not map anything else."),
+                SD_VARLINK_DEFINE_ENUM_VALUE(root),
+                SD_VARLINK_FIELD_COMMENT("Map the foreign UID range to the base UID range in the user namespace (i.e. UID zero and above), covering 64K users."),
+                SD_VARLINK_DEFINE_ENUM_VALUE(foreign),
+                SD_VARLINK_FIELD_COMMENT("Apply an identity (1:1) mapping, but limit it to 64K users."),
+                SD_VARLINK_DEFINE_ENUM_VALUE(identity),
+                SD_VARLINK_FIELD_COMMENT("Determine automatically based on provided directory and caller."),
+                SD_VARLINK_DEFINE_ENUM_VALUE(auto));
+
+static SD_VARLINK_DEFINE_METHOD(
+                MountDirectory,
+                SD_VARLINK_FIELD_COMMENT("Directory file descriptor of the directory to assign to the user namespace. Must be a regular, i.e. non-O_PATH file descriptor."),
+                SD_VARLINK_DEFINE_INPUT(directoryFileDescriptor, SD_VARLINK_INT, 0),
+                SD_VARLINK_FIELD_COMMENT("File descriptor to the user namespace to assign this directory to. If not specified uses the host user namespace."),
+                SD_VARLINK_DEFINE_INPUT(userNamespaceFileDescriptor, SD_VARLINK_INT, SD_VARLINK_NULLABLE),
+                SD_VARLINK_FIELD_COMMENT("Whether to mark the resulting mount file descriptor as read-only. If not specified defaults to false."),
+                SD_VARLINK_DEFINE_INPUT(readOnly, SD_VARLINK_BOOL, SD_VARLINK_NULLABLE),
+                SD_VARLINK_FIELD_COMMENT("Which kinda of UID/GID mapping to apply to the resulting mount file descriptor."),
+                SD_VARLINK_DEFINE_INPUT_BY_TYPE(mode, MountMapMode, SD_VARLINK_NULLABLE),
+                VARLINK_DEFINE_POLKIT_INPUT,
+                SD_VARLINK_FIELD_COMMENT("The freshly allocated mount file descriptor for the mount."),
+                SD_VARLINK_DEFINE_OUTPUT(mountFileDescriptor, SD_VARLINK_INT, 0));
+
 static SD_VARLINK_DEFINE_ERROR(IncompatibleImage);
 static SD_VARLINK_DEFINE_ERROR(MultipleRootPartitionsFound);
 static SD_VARLINK_DEFINE_ERROR(RootPartitionNotFound);
@@ -59,9 +84,17 @@ static SD_VARLINK_DEFINE_ERROR(VerityFailure);
 SD_VARLINK_DEFINE_INTERFACE(
                 io_systemd_MountFileSystem,
                 "io.systemd.MountFileSystem",
+                SD_VARLINK_INTERFACE_COMMENT("APIs for unpriviliged mounting."),
+                SD_VARLINK_SYMBOL_COMMENT("Encodes the designated purpose of a partition."),
                 &vl_type_PartitionDesignator,
+                SD_VARLINK_SYMBOL_COMMENT("Information about a specific partition."),
                 &vl_type_PartitionInfo,
+                SD_VARLINK_SYMBOL_COMMENT("Selects the type of UID/GID mapping to apply."),
+                &vl_type_MountMapMode,
+                SD_VARLINK_SYMBOL_COMMENT("Takes a disk image file descriptor as input, returns a set of mount file descriptors for it."),
                 &vl_method_MountImage,
+                SD_VARLINK_SYMBOL_COMMENT("Takes a directory file descriptor as input, returns a mount file descriptor."),
+                &vl_method_MountDirectory,
                 &vl_error_IncompatibleImage,
                 &vl_error_MultipleRootPartitionsFound,
                 &vl_error_RootPartitionNotFound,
index ef413684c3660a2ed2352fb2052483331d9dcc6c..00a0827fbcd1e0e55d8fb63cc927e607c92e830e 100644 (file)
@@ -25,13 +25,7 @@ LimitNOFILE={{HIGH_RLIMIT_NOFILE}}
 LockPersonality=yes
 MemoryDenyWriteExecute=yes
 NoNewPrivileges=yes
-ProtectProc=invisible
-ProtectControlGroups=yes
-ProtectHome=yes
 ProtectHostname=yes
-ProtectKernelLogs=yes
-ProtectKernelModules=yes
-ProtectSystem=strict
 RestrictAddressFamilies=AF_UNIX AF_NETLINK AF_INET AF_INET6
 RestrictRealtime=yes
 RestrictSUIDSGID=yes