often a good choice if proper user namespacing with distinct UID maps is not appropriate.</para>
<para>If the parameter is <literal>full</literal>, user namespacing is set up with an identity
- mapping for all UIDs/GIDs. Similar to <literal>identity</literal>, this does not provide UID/GID
- isolation, but it does provide process capability isolation.</para>
+ mapping for all UIDs/GIDs. In addition, for system services, <literal>full</literal> allows the unit
+ to call <function>setgroups()</function> system calls (by setting
+ <filename>/proc/<replaceable>pid</replaceable>/setgroups</filename> to <literal>allow</literal>).
+ Similar to <literal>identity</literal>, this does not provide UID/GID isolation, but it does provide
+ process capability isolation.</para>
<para>If this mode is enabled, all unit processes are run without privileges in the host user
namespace (regardless if the unit's own user/group is <literal>root</literal> or not). Specifically
return 0;
}
-static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogid, uid_t uid, gid_t gid) {
+static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogid, uid_t uid, gid_t gid, bool allow_setgroups) {
_cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
_cleanup_close_pair_ int errno_pipe[2] = EBADF_PAIR;
_cleanup_close_ int unshare_ready_fd = -EBADF;
if (read(unshare_ready_fd, &c, sizeof(c)) < 0)
report_errno_and_exit(errno_pipe[1], -errno);
- /* Disable the setgroups() system call in the child user namespace, for good. */
+ /* Disable the setgroups() system call in the child user namespace, for good, unless PrivateUsers=full
+ * and using the system service manager. */
a = procfs_file_alloca(ppid, "setgroups");
fd = open(a, O_WRONLY|O_CLOEXEC);
if (fd < 0) {
/* If the file is missing the kernel is too old, let's continue anyway. */
} else {
- if (write(fd, "deny\n", 5) < 0) {
- r = log_debug_errno(errno, "Failed to write \"deny\" to %s: %m", a);
+ const char *setgroups = allow_setgroups ? "allow\n" : "deny\n";
+ if (write(fd, setgroups, strlen(setgroups)) < 0) {
+ r = log_debug_errno(errno, "Failed to write '%s' to %s: %m", setgroups, a);
report_errno_and_exit(errno_pipe[1], r);
}
if (pu == PRIVATE_USERS_NO)
pu = PRIVATE_USERS_SELF;
- r = setup_private_users(pu, saved_uid, saved_gid, uid, gid);
+ /* The kernel requires /proc/pid/setgroups be set to "deny" prior to writing /proc/pid/gid_map in
+ * unprivileged user namespaces. */
+ r = setup_private_users(pu, saved_uid, saved_gid, uid, gid, /* allow_setgroups= */ false);
/* If it was requested explicitly and we can't set it up, fail early. Otherwise, continue and let
* the actual requested operations fail (or silently continue). */
if (r < 0 && context->private_users != PRIVATE_USERS_NO) {
* different user namespace). */
if (needs_sandboxing && !userns_set_up) {
- r = setup_private_users(context->private_users, saved_uid, saved_gid, uid, gid);
+ r = setup_private_users(context->private_users, saved_uid, saved_gid, uid, gid,
+ /* allow_setgroups= */ context->private_users == PRIVATE_USERS_FULL);
if (r < 0) {
*exit_status = EXIT_USER;
return log_exec_error_errno(context, params, r, "Failed to set up user namespacing: %m");
systemd-run -p PrivateUsers=yes --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 1"'
systemd-run -p PrivateUsers=yes --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 1"'
+systemd-run -p PrivateUsersEx=yes --wait bash -c 'test "$(cat /proc/self/setgroups)" == "deny"'
systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 1"'
systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 1"'
+systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/setgroups)" == "deny"'
systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 65536"'
systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 65536"'
systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/uid_map | tr -d "\n")" == " 0 0 1 1 1 4294967294"'
systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/gid_map | tr -d "\n")" == " 0 0 1 1 1 4294967294"'
+systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/setgroups)" == "allow"'