<varlistentry>
<term><varname>PrivateUsers=</varname></term>
- <listitem><para>Takes a boolean argument or one of <literal>self</literal> or
- <literal>identity</literal>. Defaults to false. If enabled, sets up a new user namespace for the
+ <listitem><para>Takes a boolean argument or one of <literal>self</literal>, <literal>identity</literal>,
+ or <literal>full</literal>. Defaults to false. If enabled, sets up a new user namespace for the
executed processes and configures a user and group mapping. If set to a true value or
<literal>self</literal>, a minimal user and group mapping is configured that maps the
<literal>root</literal> user and group as well as the unit's own user and group to themselves and
since all UIDs/GIDs are chosen identically it does provide process capability isolation, and hence is
often a good choice if proper user namespacing with distinct UID maps is not appropriate.</para>
+ <para>If the parameter is <literal>full</literal>, user namespacing is set up with an identity
+ mapping for all UIDs/GIDs. Similar to <literal>identity</literal>, this does not provide UID/GID
+ isolation, but it does provide process capability isolation.</para>
+
<para>If this mode is enabled, all unit processes are run without privileges in the host user
namespace (regardless if the unit's own user/group is <literal>root</literal> or not). Specifically
this means that the process will have zero process capabilities on the host's user namespace, but
uid_map = strdup("0 0 65536\n");
if (!uid_map)
return -ENOMEM;
+ } else if (private_users == PRIVATE_USERS_FULL) {
+ /* Map all UID/GID from original to new user namespace. We can't use `0 0 UINT32_MAX` because
+ * this is the same UID/GID map as the init user namespace and systemd's running_in_userns()
+ * checks whether its in a user namespace by comparing uid_map/gid_map to `0 0 UINT32_MAX`.
+ * Thus, we still map all UIDs/GIDs but do it using two extents to differentiate the new user
+ * namespace from the init namespace:
+ * 0 0 1
+ * 1 1 UINT32_MAX - 1
+ *
+ * systemd will remove the heuristic in running_in_userns() and use namespace inodes in version 258
+ * (PR #35382). But some users may be running a container image with older systemd < 258 so we keep
+ * this uid_map/gid_map hack until version 259 for version N-1 compatibility.
+ *
+ * TODO: Switch to `0 0 UINT32_MAX` in systemd v259.
+ *
+ * Note the kernel defines the UID range between 0 and UINT32_MAX so we map all UIDs even though
+ * the UID range beyond INT32_MAX (e.g. i.e. the range above the signed 32-bit range) is
+ * icky. For example, setfsuid() returns the old UID as signed integer. But units can decide to
+ * use these UIDs/GIDs so we need to map them. */
+ r = asprintf(&uid_map, "0 0 1\n"
+ "1 1 " UID_FMT "\n", (uid_t) (UINT32_MAX - 1));
+ if (r < 0)
+ return -ENOMEM;
/* Can only set up multiple mappings with CAP_SETUID. */
} else if (have_effective_cap(CAP_SETUID) > 0 && uid != ouid && uid_is_valid(uid)) {
r = asprintf(&uid_map,
gid_map = strdup("0 0 65536\n");
if (!gid_map)
return -ENOMEM;
+ } else if (private_users == PRIVATE_USERS_FULL) {
+ r = asprintf(&gid_map, "0 0 1\n"
+ "1 1 " GID_FMT "\n", (gid_t) (UINT32_MAX - 1));
+ if (r < 0)
+ return -ENOMEM;
/* Can only set up multiple mappings with CAP_SETGID. */
} else if (have_effective_cap(CAP_SETGID) > 0 && gid != ogid && gid_is_valid(gid)) {
r = asprintf(&gid_map,
[PRIVATE_USERS_NO] = "no",
[PRIVATE_USERS_SELF] = "self",
[PRIVATE_USERS_IDENTITY] = "identity",
+ [PRIVATE_USERS_FULL] = "full",
};
DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(private_users, PrivateUsers, PRIVATE_USERS_SELF);
PRIVATE_USERS_NO,
PRIVATE_USERS_SELF,
PRIVATE_USERS_IDENTITY,
+ PRIVATE_USERS_FULL,
_PRIVATE_USERS_MAX,
_PRIVATE_USERS_INVALID = -EINVAL,
} PrivateUsers;
systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 1"'
systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 65536"'
systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 65536"'
+systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/uid_map | tr -d "\n")" == " 0 0 1 1 1 4294967294"'
+systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/gid_map | tr -d "\n")" == " 0 0 1 1 1 4294967294"'