]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: add id-mapped mount support for Exec directories 34078/head
authorAndres Beltran <abeltran@microsoft.com>
Mon, 5 Aug 2024 23:43:29 +0000 (23:43 +0000)
committerAndres Beltran <abeltran@microsoft.com>
Fri, 1 Nov 2024 18:45:28 +0000 (18:45 +0000)
man/systemd.exec.xml
src/core/exec-invoke.c
src/core/execute.h
src/core/namespace.c
src/core/namespace.h
src/shared/dissect-image.c
src/shared/mount-util.c
src/shared/mount-util.h
test/units/TEST-34-DYNAMICUSERMIGRATE.sh

index d5f85ed85c75b4821b2b36749c0662deaa2fdfdb..30a926c9a08e3d8c8aebd186c5ab9f4417cc166e 100644 (file)
@@ -1476,6 +1476,13 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
         below the locations defined in the following table. Also, the corresponding environment variable will
         be defined with the full paths of the directories. If multiple directories are set, then in the
         environment variable the paths are concatenated with colon (<literal>:</literal>).</para>
+
+        <para>If <varname>DynamicUser=</varname> is used, and if the kernel version supports
+        <ulink url="https://lwn.net/Articles/896255/">id-mapped mounts</ulink>, the specified directories will
+        be owned by "nobody" in the host namespace and will be mapped to (and will be owned by) the service's
+        UID/GID in its own namespace. For backward compatibility, existing directories created without id-mapped
+        mounts will be kept untouched.</para>
+
         <table>
           <title>Automatic directory creation and environment variables</title>
           <tgroup cols='4'>
index 968e9bd7c4dbdd92ada0cdf18b4d775403a14c6e..4e70c84374637222ec63039e8e266c5ba1113d05 100644 (file)
@@ -2554,7 +2554,8 @@ static int setup_exec_directory(
                 /* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
                  * specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
                  * current UID/GID ownership.) */
-                r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
+                const char *target_dir = pp ?: p;
+                r = chmod_and_chown(target_dir, context->directories[type].mode, UID_INVALID, GID_INVALID);
                 if (r < 0)
                         goto fail;
 
@@ -2563,12 +2564,51 @@ static int setup_exec_directory(
                 if (params->runtime_scope != RUNTIME_SCOPE_SYSTEM)
                         continue;
 
-                /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
+                int idmapping_supported = is_idmapping_supported(target_dir);
+                if (idmapping_supported < 0) {
+                        r = log_debug_errno(idmapping_supported, "Unable to determine if ID mapping is supported on mount '%s': %m", target_dir);
+                        goto fail;
+                }
+
+                log_debug("ID-mapping is%ssupported for exec directory %s", idmapping_supported ? " " : " not ", target_dir);
+
+                /* Change the ownership of the whole tree, if necessary. When dynamic users are used we
                  * drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
                  * assignments to exist. */
-                r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777, AT_SYMLINK_FOLLOW);
-                if (r < 0)
-                        goto fail;
+                uid_t chown_uid = uid;
+                gid_t chown_gid = gid;
+                bool do_chown = false;
+
+                if (uid == 0 || gid == 0 || !idmapping_supported) {
+                        do_chown = true;
+                        i->idmapped = false;
+                } else {
+                        /* Use 'nobody' uid/gid for exec directories if ID-mapping is supported. For backward compatibility,
+                         * continue doing chmod/chown if the directory was chmod/chowned before (if uid/gid is not 'nobody') */
+                        struct stat st;
+                        r = RET_NERRNO(stat(target_dir, &st));
+                        if (r < 0)
+                                goto fail;
+
+                        if (st.st_uid == UID_NOBODY && st.st_gid == GID_NOBODY) {
+                                do_chown = false;
+                                i->idmapped = true;
+                       } else if (exec_directory_is_private(context, type) && st.st_uid == 0 && st.st_gid == 0) {
+                                chown_uid = UID_NOBODY;
+                                chown_gid = GID_NOBODY;
+                                do_chown = true;
+                                i->idmapped = true;
+                        } else {
+                                do_chown = true;
+                                i->idmapped = false;
+                        }
+                }
+
+                if (do_chown) {
+                        r = path_chown_recursive(target_dir, chown_uid, chown_gid, context->dynamic_user ? 01777 : 07777, AT_SYMLINK_FOLLOW);
+                        if (r < 0)
+                                goto fail;
+                }
         }
 
         /* If we are not going to run in a namespace, set up the symlinks - otherwise
@@ -2620,6 +2660,8 @@ static int setup_smack(
 static int compile_bind_mounts(
                 const ExecContext *context,
                 const ExecParameters *params,
+                uid_t exec_directory_uid, /* only used for id-mapped mounts Exec directories */
+                gid_t exec_directory_gid, /* only used for id-mapped mounts Exec directories */
                 BindMount **ret_bind_mounts,
                 size_t *ret_n_bind_mounts,
                 char ***ret_empty_directories) {
@@ -2721,6 +2763,9 @@ static int compile_bind_mounts(
                                 .nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
                                 .recursive = true,
                                 .read_only = FLAGS_SET(i->flags, EXEC_DIRECTORY_READ_ONLY),
+                                .idmapped = i->idmapped,
+                                .uid = exec_directory_uid,
+                                .gid = exec_directory_gid,
                         };
                 }
         }
@@ -3057,7 +3102,9 @@ static int apply_mount_namespace(
                 ExecRuntime *runtime,
                 const char *memory_pressure_path,
                 bool needs_sandboxing,
-                char **reterr_path) {
+                char **reterr_path,
+                uid_t exec_directory_uid,
+                gid_t exec_directory_gid) {
 
         _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
         _cleanup_strv_free_ char **empty_directories = NULL, **symlinks = NULL,
@@ -3095,7 +3142,7 @@ static int apply_mount_namespace(
                         return r;
         }
 
-        r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
+        r = compile_bind_mounts(context, params, exec_directory_uid, exec_directory_gid, &bind_mounts, &n_bind_mounts, &empty_directories);
         if (r < 0)
                 return r;
 
@@ -4886,7 +4933,9 @@ int exec_invoke(
                                           runtime,
                                           memory_pressure_path,
                                           needs_sandboxing,
-                                          &error_path);
+                                          &error_path,
+                                          uid,
+                                          gid);
                 if (r < 0) {
                         *exit_status = EXIT_NAMESPACE;
                         return log_exec_error_errno(context, params, r, "Failed to set up mount namespacing%s%s: %m",
index 2f5abc9785bd7f68dc806e84574a72066e62ea42..7274c68d3d8394de4f697c3da3b463f6e84d9bd6 100644 (file)
@@ -163,6 +163,7 @@ typedef struct ExecDirectoryItem {
         char *path;
         char **symlinks;
         ExecDirectoryFlags flags;
+        bool idmapped;
 } ExecDirectoryItem;
 
 typedef struct ExecDirectory {
index a0d3dc0cbb9b966ba6a86111df502d0335537040..91c905f2fecd3894ee49802386ebbcd65095a01e 100644 (file)
@@ -113,6 +113,9 @@ typedef struct MountEntry {
         LIST_HEAD(MountOptions, image_options_const);
         char **overlay_layers;
         VeritySettings verity;
+        bool idmapped;
+        uid_t idmap_uid;
+        gid_t idmap_gid;
 } MountEntry;
 
 typedef struct MountList {
@@ -467,6 +470,9 @@ static int append_bind_mounts(MountList *ml, const BindMount *binds, size_t n) {
                         .flags = b->nodev ? MS_NODEV : 0,
                         .source_const = b->source,
                         .ignore = b->ignore_enoent,
+                        .idmapped = b->idmapped,
+                        .idmap_uid = b->uid,
+                        .idmap_gid = b->gid,
                 };
         }
 
@@ -1892,6 +1898,45 @@ static int apply_one_mount(
         }
 
         log_debug("Successfully mounted %s to %s", what, mount_entry_path(m));
+
+        /* Take care of id-mapped mounts */
+        if (m->idmapped && uid_is_valid(m->idmap_uid) && gid_is_valid(m->idmap_gid)) {
+                _cleanup_close_ int userns_fd = -EBADF;
+                _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
+
+                log_debug("Setting an id-mapped mount on %s", mount_entry_path(m));
+
+                /* Do mapping from nobody (in setup_exec_directory()) -> this uid */
+                if (strextendf(&uid_map, UID_FMT " " UID_FMT " " UID_FMT "\n", UID_NOBODY, (uid_t)m->idmap_uid, (uid_t)1u) < 0)
+                        return log_oom();
+
+                /* Consider StateDirectory=xxx aaa xxx:aaa/222
+                 * To allow for later symlink creation (by root) in create_symlinks_from_tuples(), map root as well. */
+                if (m->idmap_uid != (uid_t)0) {
+                        if (strextendf(&uid_map, UID_FMT " " UID_FMT " " UID_FMT "\n", (uid_t)0, (uid_t)0, (uid_t)1u) < 0)
+                                return log_oom();
+                }
+
+                if (strextendf(&gid_map, GID_FMT " " GID_FMT " " GID_FMT "\n", GID_NOBODY, (gid_t)m->idmap_gid, (gid_t)1u) < 0)
+                        return log_oom();
+
+                if (m->idmap_gid != (gid_t)0) {
+                        if (strextendf(&gid_map, GID_FMT " " GID_FMT " " GID_FMT "\n", (gid_t)0, (gid_t)0, (gid_t)1u) < 0)
+                                return log_oom();
+                }
+
+                userns_fd = userns_acquire(uid_map, gid_map);
+                if (userns_fd < 0)
+                        return log_error_errno(userns_fd, "Failed to allocate user namespace: %m");
+
+                /* Drop SUID, add NOEXEC for the mount to avoid root exploits */
+                r = remount_idmap_fd(STRV_MAKE(mount_entry_path(m)), userns_fd, MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NODEV);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to create an id-mapped mount: %m");
+
+                log_debug("ID-mapped mount created successfully for %s from " UID_FMT " to " UID_FMT "", mount_entry_path(m), UID_NOBODY, m->idmap_uid);
+        }
+
         return 1;
 }
 
index fa4f7c7cf671b427d17de64b490f96123fe34a85..7b6e892cc29bdaf491117bdd659227c1a3e57454 100644 (file)
@@ -87,6 +87,9 @@ struct BindMount {
         bool noexec;
         bool recursive;
         bool ignore_enoent;
+        bool idmapped;
+        uid_t uid;
+        gid_t gid;
 };
 
 struct TemporaryFileSystem {
index 86fd1ef865d55fc1cb24ee59b9da21fc6bcd62a4..f12a93d0dcf36eeb9f0badeb7ed9a901477d2472 100644 (file)
@@ -2089,7 +2089,7 @@ static int mount_partition(
                                 (void) fs_grow(node, -EBADF, p);
 
                         if (userns_fd >= 0) {
-                                r = remount_idmap_fd(STRV_MAKE(p), userns_fd);
+                                r = remount_idmap_fd(STRV_MAKE(p), userns_fd, /* extra_mount_attr_set= */ 0);
                                 if (r < 0)
                                         return r;
                         }
index 20b402e3d33d2bcfcce233c85ba98f9730096d9d..3c89a18790c871139badbeaa2167bbd55b0a4bd3 100644 (file)
@@ -1386,7 +1386,8 @@ int make_userns(uid_t uid_shift, uid_t uid_range, uid_t source_owner, uid_t dest
 
 int remount_idmap_fd(
                 char **paths,
-                int userns_fd) {
+                int userns_fd,
+                uint64_t extra_mount_attr_set) {
 
         int r;
 
@@ -1423,7 +1424,7 @@ int remount_idmap_fd(
                 /* Set the user namespace mapping attribute on the cloned mount point */
                 if (mount_setattr(mntfd, "", AT_EMPTY_PATH,
                                   &(struct mount_attr) {
-                                          .attr_set = MOUNT_ATTR_IDMAP,
+                                          .attr_set = MOUNT_ATTR_IDMAP | extra_mount_attr_set,
                                           .userns_fd = userns_fd,
                                   }, sizeof(struct mount_attr)) < 0)
                         return log_debug_errno(errno, "Failed to change bind mount attributes for clone of '%s': %m", paths[i]);
@@ -1460,7 +1461,7 @@ int remount_idmap(
         if (userns_fd < 0)
                 return userns_fd;
 
-        return remount_idmap_fd(p, userns_fd);
+        return remount_idmap_fd(p, userns_fd, /* extra_mount_attr_set= */ 0);
 }
 
 static void sub_mount_clear(SubMount *s) {
index 765dbf72f88e837197ae65954fed4fe28ad96d90..eb068d5b44f4a2a097a7d0567fb37cc49358ddb4 100644 (file)
@@ -163,7 +163,7 @@ typedef enum RemountIdmapping {
 } RemountIdmapping;
 
 int make_userns(uid_t uid_shift, uid_t uid_range, uid_t host_owner, uid_t dest_owner, RemountIdmapping idmapping);
-int remount_idmap_fd(char **p, int userns_fd);
+int remount_idmap_fd(char **p, int userns_fd, uint64_t extra_mount_attr_set);
 int remount_idmap(char **p, uid_t uid_shift, uid_t uid_range, uid_t host_owner, uid_t dest_owner, RemountIdmapping idmapping);
 
 int bind_mount_submounts(
index d6bdfb074167203b324b81935257b5b5ff0fd2fa..e5aee475af7b3d3a545e9b383319801eaf29dbd5 100755 (executable)
@@ -162,6 +162,71 @@ EOF
     systemctl start testservice-34-check-writable.service
 }
 
+test_check_idmapped_mounts() {
+    rm -rf /var/lib/testidmapped /var/lib/private/testidmapped
+
+    cat >/run/systemd/system/testservice-34-check-idmapped.service <<\EOF
+[Unit]
+Description=Check id-mapped directories when DynamicUser=yes with StateDirectory
+
+[Service]
+# Relevant only for sanitizer runs
+EnvironmentFile=-/usr/lib/systemd/systemd-asan-env
+Type=oneshot
+
+MountAPIVFS=yes
+DynamicUser=yes
+PrivateUsers=yes
+TemporaryFileSystem=/run /var/opt /var/lib /vol
+UMask=0000
+StateDirectory=testidmapped:sampleservice
+ExecStart=/bin/bash -c ' \
+    set -eux; \
+    set -o pipefail; \
+    touch /var/lib/sampleservice/testfile; \
+    [[ $(awk "NR==2 {print \$1}" /proc/self/uid_map) == $(stat -c "%%u" /var/lib/private/testidmapped/testfile) ]]; \
+'
+EOF
+
+    systemctl daemon-reload
+    systemctl start testservice-34-check-idmapped.service
+
+    [[ $(stat -c "%u" /var/lib/private/testidmapped/testfile) == 65534 ]]
+}
+
+test_check_idmapped_mounts_root() {
+    rm -rf /var/lib/testidmapped /var/lib/private/testidmapped
+
+    cat >/run/systemd/system/testservice-34-check-idmapped.service <<\EOF
+[Unit]
+Description=Check id-mapped directories when DynamicUser=no with StateDirectory
+
+[Service]
+# Relevant only for sanitizer runs
+EnvironmentFile=-/usr/lib/systemd/systemd-asan-env
+Type=oneshot
+
+MountAPIVFS=yes
+User=root
+DynamicUser=no
+PrivateUsers=no
+TemporaryFileSystem=/run /var/opt /var/lib /vol
+UMask=0000
+StateDirectory=testidmapped:sampleservice
+ExecStart=/bin/bash -c ' \
+    set -eux; \
+    set -o pipefail; \
+    touch /var/lib/sampleservice/testfile; \
+    [[ 0 == $(stat -c "%%u" /var/lib/testidmapped/testfile) ]]; \
+'
+EOF
+
+    systemctl daemon-reload
+    systemctl start testservice-34-check-idmapped.service
+
+    [[ $(stat -c "%u" /var/lib/testidmapped/testfile) == 0 ]]
+}
+
 test_directory "StateDirectory" "/var/lib"
 test_directory "RuntimeDirectory" "/run"
 test_directory "CacheDirectory" "/var/cache"
@@ -169,6 +234,11 @@ test_directory "LogsDirectory" "/var/log"
 
 test_check_writable
 
+if systemd-analyze compare-versions "$(uname -r)" ge 5.12; then
+    test_check_idmapped_mounts
+    test_check_idmapped_mounts_root
+fi
+
 systemd-analyze log-level info
 
 touch /testok