From 24c73c79204bd22d243430bc0d9c2a7d814250bf Mon Sep 17 00:00:00 2001 From: Mike Yuan Date: Fri, 23 Jan 2026 18:51:28 +0100 Subject: [PATCH] core/exec-invoke: switch PRIVATE_USERS_FULL to direct "0 0 UINT32_MAX" mapping This was intended for v259. Let's get it done now, in v260. --- src/core/exec-invoke.c | 22 +++++----------------- test/units/TEST-07-PID1.private-users.sh | 4 ++-- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c index 4f6ed63be67..b35f3fbf97e 100644 --- a/src/core/exec-invoke.c +++ b/src/core/exec-invoke.c @@ -2421,28 +2421,16 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi if (!uid_map) return -ENOMEM; } else if (private_users == PRIVATE_USERS_FULL) { - /* Map all UID/GID from original to new user namespace. We can't use `0 0 UINT32_MAX` because - * this is the same UID/GID map as the init user namespace and systemd's running_in_userns() - * checks whether its in a user namespace by comparing uid_map/gid_map to `0 0 UINT32_MAX`. - * Thus, we still map all UIDs/GIDs but do it using two extents to differentiate the new user - * namespace from the init namespace: - * 0 0 1 - * 1 1 UINT32_MAX - 1 - * - * systemd will remove the heuristic in running_in_userns() and use namespace inodes in version 258 - * (PR #35382). But some users may be running a container image with older systemd < 258 so we keep - * this uid_map/gid_map hack until version 259 for version N-1 compatibility. - * - * TODO: Switch to `0 0 UINT32_MAX` in systemd v259. + /* Map all UID/GID from original to new user namespace. * * Note the kernel defines the UID range between 0 and UINT32_MAX so we map all UIDs even though * the UID range beyond INT32_MAX (e.g. i.e. the range above the signed 32-bit range) is * icky. For example, setfsuid() returns the old UID as signed integer. But units can decide to * use these UIDs/GIDs so we need to map them. */ - r = asprintf(&uid_map, "0 0 1\n" - "1 1 " UID_FMT "\n", (uid_t) (UINT32_MAX - 1)); + r = asprintf(&uid_map, "0 0 " UID_FMT "\n", (uid_t) UINT32_MAX); if (r < 0) return -ENOMEM; + /* Can only set up multiple mappings with CAP_SETUID. */ } else if (have_effective_cap(CAP_SETUID) > 0 && uid != ouid && uid_is_valid(uid)) { r = asprintf(&uid_map, @@ -2464,10 +2452,10 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi if (!gid_map) return -ENOMEM; } else if (private_users == PRIVATE_USERS_FULL) { - r = asprintf(&gid_map, "0 0 1\n" - "1 1 " GID_FMT "\n", (gid_t) (UINT32_MAX - 1)); + r = asprintf(&gid_map, "0 0 " GID_FMT "\n", (gid_t) UINT32_MAX); if (r < 0) return -ENOMEM; + /* Can only set up multiple mappings with CAP_SETGID. */ } else if (have_effective_cap(CAP_SETGID) > 0 && gid != ogid && gid_is_valid(gid)) { r = asprintf(&gid_map, diff --git a/test/units/TEST-07-PID1.private-users.sh b/test/units/TEST-07-PID1.private-users.sh index e788f52a2f7..16b56cbc6e7 100755 --- a/test/units/TEST-07-PID1.private-users.sh +++ b/test/units/TEST-07-PID1.private-users.sh @@ -12,6 +12,6 @@ systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/gid_ma systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/setgroups)" == "deny"' systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 65536"' systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 65536"' -systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/uid_map | tr -d "\n")" == " 0 0 1 1 1 4294967294"' -systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/gid_map | tr -d "\n")" == " 0 0 1 1 1 4294967294"' +systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 4294967295"' +systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 4294967295"' systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/setgroups)" == "allow"' -- 2.47.3