below the locations defined in the following table. Also, the corresponding environment variable will
be defined with the full paths of the directories. If multiple directories are set, then in the
environment variable the paths are concatenated with colon (<literal>:</literal>).</para>
+
+ <para>If <varname>DynamicUser=</varname> is used, and if the kernel version supports
+ <ulink url="https://lwn.net/Articles/896255/">id-mapped mounts</ulink>, the specified directories will
+ be owned by "nobody" in the host namespace and will be mapped to (and will be owned by) the service's
+ UID/GID in its own namespace. For backward compatibility, existing directories created without id-mapped
+ mounts will be kept untouched.</para>
+
<table>
<title>Automatic directory creation and environment variables</title>
<tgroup cols='4'>
/* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
* specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
* current UID/GID ownership.) */
- r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
+ const char *target_dir = pp ?: p;
+ r = chmod_and_chown(target_dir, context->directories[type].mode, UID_INVALID, GID_INVALID);
if (r < 0)
goto fail;
if (params->runtime_scope != RUNTIME_SCOPE_SYSTEM)
continue;
- /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
+ int idmapping_supported = is_idmapping_supported(target_dir);
+ if (idmapping_supported < 0) {
+ r = log_debug_errno(idmapping_supported, "Unable to determine if ID mapping is supported on mount '%s': %m", target_dir);
+ goto fail;
+ }
+
+ log_debug("ID-mapping is%ssupported for exec directory %s", idmapping_supported ? " " : " not ", target_dir);
+
+ /* Change the ownership of the whole tree, if necessary. When dynamic users are used we
* drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
* assignments to exist. */
- r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777, AT_SYMLINK_FOLLOW);
- if (r < 0)
- goto fail;
+ uid_t chown_uid = uid;
+ gid_t chown_gid = gid;
+ bool do_chown = false;
+
+ if (uid == 0 || gid == 0 || !idmapping_supported) {
+ do_chown = true;
+ i->idmapped = false;
+ } else {
+ /* Use 'nobody' uid/gid for exec directories if ID-mapping is supported. For backward compatibility,
+ * continue doing chmod/chown if the directory was chmod/chowned before (if uid/gid is not 'nobody') */
+ struct stat st;
+ r = RET_NERRNO(stat(target_dir, &st));
+ if (r < 0)
+ goto fail;
+
+ if (st.st_uid == UID_NOBODY && st.st_gid == GID_NOBODY) {
+ do_chown = false;
+ i->idmapped = true;
+ } else if (exec_directory_is_private(context, type) && st.st_uid == 0 && st.st_gid == 0) {
+ chown_uid = UID_NOBODY;
+ chown_gid = GID_NOBODY;
+ do_chown = true;
+ i->idmapped = true;
+ } else {
+ do_chown = true;
+ i->idmapped = false;
+ }
+ }
+
+ if (do_chown) {
+ r = path_chown_recursive(target_dir, chown_uid, chown_gid, context->dynamic_user ? 01777 : 07777, AT_SYMLINK_FOLLOW);
+ if (r < 0)
+ goto fail;
+ }
}
/* If we are not going to run in a namespace, set up the symlinks - otherwise
static int compile_bind_mounts(
const ExecContext *context,
const ExecParameters *params,
+ uid_t exec_directory_uid, /* only used for id-mapped mounts Exec directories */
+ gid_t exec_directory_gid, /* only used for id-mapped mounts Exec directories */
BindMount **ret_bind_mounts,
size_t *ret_n_bind_mounts,
char ***ret_empty_directories) {
.nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
.recursive = true,
.read_only = FLAGS_SET(i->flags, EXEC_DIRECTORY_READ_ONLY),
+ .idmapped = i->idmapped,
+ .uid = exec_directory_uid,
+ .gid = exec_directory_gid,
};
}
}
ExecRuntime *runtime,
const char *memory_pressure_path,
bool needs_sandboxing,
- char **reterr_path) {
+ char **reterr_path,
+ uid_t exec_directory_uid,
+ gid_t exec_directory_gid) {
_cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
_cleanup_strv_free_ char **empty_directories = NULL, **symlinks = NULL,
return r;
}
- r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
+ r = compile_bind_mounts(context, params, exec_directory_uid, exec_directory_gid, &bind_mounts, &n_bind_mounts, &empty_directories);
if (r < 0)
return r;
runtime,
memory_pressure_path,
needs_sandboxing,
- &error_path);
+ &error_path,
+ uid,
+ gid);
if (r < 0) {
*exit_status = EXIT_NAMESPACE;
return log_exec_error_errno(context, params, r, "Failed to set up mount namespacing%s%s: %m",
char *path;
char **symlinks;
ExecDirectoryFlags flags;
+ bool idmapped;
} ExecDirectoryItem;
typedef struct ExecDirectory {
LIST_HEAD(MountOptions, image_options_const);
char **overlay_layers;
VeritySettings verity;
+ bool idmapped;
+ uid_t idmap_uid;
+ gid_t idmap_gid;
} MountEntry;
typedef struct MountList {
.flags = b->nodev ? MS_NODEV : 0,
.source_const = b->source,
.ignore = b->ignore_enoent,
+ .idmapped = b->idmapped,
+ .idmap_uid = b->uid,
+ .idmap_gid = b->gid,
};
}
}
log_debug("Successfully mounted %s to %s", what, mount_entry_path(m));
+
+ /* Take care of id-mapped mounts */
+ if (m->idmapped && uid_is_valid(m->idmap_uid) && gid_is_valid(m->idmap_gid)) {
+ _cleanup_close_ int userns_fd = -EBADF;
+ _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
+
+ log_debug("Setting an id-mapped mount on %s", mount_entry_path(m));
+
+ /* Do mapping from nobody (in setup_exec_directory()) -> this uid */
+ if (strextendf(&uid_map, UID_FMT " " UID_FMT " " UID_FMT "\n", UID_NOBODY, (uid_t)m->idmap_uid, (uid_t)1u) < 0)
+ return log_oom();
+
+ /* Consider StateDirectory=xxx aaa xxx:aaa/222
+ * To allow for later symlink creation (by root) in create_symlinks_from_tuples(), map root as well. */
+ if (m->idmap_uid != (uid_t)0) {
+ if (strextendf(&uid_map, UID_FMT " " UID_FMT " " UID_FMT "\n", (uid_t)0, (uid_t)0, (uid_t)1u) < 0)
+ return log_oom();
+ }
+
+ if (strextendf(&gid_map, GID_FMT " " GID_FMT " " GID_FMT "\n", GID_NOBODY, (gid_t)m->idmap_gid, (gid_t)1u) < 0)
+ return log_oom();
+
+ if (m->idmap_gid != (gid_t)0) {
+ if (strextendf(&gid_map, GID_FMT " " GID_FMT " " GID_FMT "\n", (gid_t)0, (gid_t)0, (gid_t)1u) < 0)
+ return log_oom();
+ }
+
+ userns_fd = userns_acquire(uid_map, gid_map);
+ if (userns_fd < 0)
+ return log_error_errno(userns_fd, "Failed to allocate user namespace: %m");
+
+ /* Drop SUID, add NOEXEC for the mount to avoid root exploits */
+ r = remount_idmap_fd(STRV_MAKE(mount_entry_path(m)), userns_fd, MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NODEV);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create an id-mapped mount: %m");
+
+ log_debug("ID-mapped mount created successfully for %s from " UID_FMT " to " UID_FMT "", mount_entry_path(m), UID_NOBODY, m->idmap_uid);
+ }
+
return 1;
}
bool noexec;
bool recursive;
bool ignore_enoent;
+ bool idmapped;
+ uid_t uid;
+ gid_t gid;
};
struct TemporaryFileSystem {
(void) fs_grow(node, -EBADF, p);
if (userns_fd >= 0) {
- r = remount_idmap_fd(STRV_MAKE(p), userns_fd);
+ r = remount_idmap_fd(STRV_MAKE(p), userns_fd, /* extra_mount_attr_set= */ 0);
if (r < 0)
return r;
}
int remount_idmap_fd(
char **paths,
- int userns_fd) {
+ int userns_fd,
+ uint64_t extra_mount_attr_set) {
int r;
/* Set the user namespace mapping attribute on the cloned mount point */
if (mount_setattr(mntfd, "", AT_EMPTY_PATH,
&(struct mount_attr) {
- .attr_set = MOUNT_ATTR_IDMAP,
+ .attr_set = MOUNT_ATTR_IDMAP | extra_mount_attr_set,
.userns_fd = userns_fd,
}, sizeof(struct mount_attr)) < 0)
return log_debug_errno(errno, "Failed to change bind mount attributes for clone of '%s': %m", paths[i]);
if (userns_fd < 0)
return userns_fd;
- return remount_idmap_fd(p, userns_fd);
+ return remount_idmap_fd(p, userns_fd, /* extra_mount_attr_set= */ 0);
}
static void sub_mount_clear(SubMount *s) {
} RemountIdmapping;
int make_userns(uid_t uid_shift, uid_t uid_range, uid_t host_owner, uid_t dest_owner, RemountIdmapping idmapping);
-int remount_idmap_fd(char **p, int userns_fd);
+int remount_idmap_fd(char **p, int userns_fd, uint64_t extra_mount_attr_set);
int remount_idmap(char **p, uid_t uid_shift, uid_t uid_range, uid_t host_owner, uid_t dest_owner, RemountIdmapping idmapping);
int bind_mount_submounts(
systemctl start testservice-34-check-writable.service
}
+test_check_idmapped_mounts() {
+ rm -rf /var/lib/testidmapped /var/lib/private/testidmapped
+
+ cat >/run/systemd/system/testservice-34-check-idmapped.service <<\EOF
+[Unit]
+Description=Check id-mapped directories when DynamicUser=yes with StateDirectory
+
+[Service]
+# Relevant only for sanitizer runs
+EnvironmentFile=-/usr/lib/systemd/systemd-asan-env
+Type=oneshot
+
+MountAPIVFS=yes
+DynamicUser=yes
+PrivateUsers=yes
+TemporaryFileSystem=/run /var/opt /var/lib /vol
+UMask=0000
+StateDirectory=testidmapped:sampleservice
+ExecStart=/bin/bash -c ' \
+ set -eux; \
+ set -o pipefail; \
+ touch /var/lib/sampleservice/testfile; \
+ [[ $(awk "NR==2 {print \$1}" /proc/self/uid_map) == $(stat -c "%%u" /var/lib/private/testidmapped/testfile) ]]; \
+'
+EOF
+
+ systemctl daemon-reload
+ systemctl start testservice-34-check-idmapped.service
+
+ [[ $(stat -c "%u" /var/lib/private/testidmapped/testfile) == 65534 ]]
+}
+
+test_check_idmapped_mounts_root() {
+ rm -rf /var/lib/testidmapped /var/lib/private/testidmapped
+
+ cat >/run/systemd/system/testservice-34-check-idmapped.service <<\EOF
+[Unit]
+Description=Check id-mapped directories when DynamicUser=no with StateDirectory
+
+[Service]
+# Relevant only for sanitizer runs
+EnvironmentFile=-/usr/lib/systemd/systemd-asan-env
+Type=oneshot
+
+MountAPIVFS=yes
+User=root
+DynamicUser=no
+PrivateUsers=no
+TemporaryFileSystem=/run /var/opt /var/lib /vol
+UMask=0000
+StateDirectory=testidmapped:sampleservice
+ExecStart=/bin/bash -c ' \
+ set -eux; \
+ set -o pipefail; \
+ touch /var/lib/sampleservice/testfile; \
+ [[ 0 == $(stat -c "%%u" /var/lib/testidmapped/testfile) ]]; \
+'
+EOF
+
+ systemctl daemon-reload
+ systemctl start testservice-34-check-idmapped.service
+
+ [[ $(stat -c "%u" /var/lib/testidmapped/testfile) == 0 ]]
+}
+
test_directory "StateDirectory" "/var/lib"
test_directory "RuntimeDirectory" "/run"
test_directory "CacheDirectory" "/var/cache"
test_check_writable
+if systemd-analyze compare-versions "$(uname -r)" ge 5.12; then
+ test_check_idmapped_mounts
+ test_check_idmapped_mounts_root
+fi
+
systemd-analyze log-level info
touch /testok