int kmsg_socket,
int rtnl_socket,
int master_pty_socket,
- FDSet *fds) {
+ FDSet *fds,
+ char **os_release_pairs) {
_cleanup_free_ char *home = NULL;
char as_uuid[ID128_UUID_STRING_MAX];
/* Wait until the parent wrote the UID map */
if (!barrier_place_and_sync(barrier)) /* #2 */
- return log_error_errno(SYNTHETIC_ERRNO(ESRCH),
- "Parent died too early");
- }
+ return log_error_errno(SYNTHETIC_ERRNO(ESRCH), "Parent died too early");
- r = reset_uid_gid();
- if (r < 0)
- return log_error_errno(r, "Couldn't become new root: %m");
+ /* Become the new root user inside our namespace */
+ r = reset_uid_gid();
+ if (r < 0)
+ return log_error_errno(r, "Couldn't become new root: %m");
+
+ /* Creating a new user namespace means all MS_SHARED mounts become MS_SLAVE. Let's put them
+ * back to MS_SHARED here, since that's what we want as defaults. (This will not reconnect
+ * propagation, but simply create new peer groups for all our mounts). */
+ r = mount_verbose(LOG_ERR, NULL, "/", NULL, MS_SHARED|MS_REC, NULL);
+ if (r < 0)
+ return r;
+ }
r = mount_all(NULL,
arg_mount_settings | MOUNT_IN_USERNS,
if (asprintf((char **)(envp + n_env++), "NOTIFY_SOCKET=%s", NSPAWN_NOTIFY_SOCKET_PATH) < 0)
return log_oom();
- env_use = strv_env_merge(2, envp, arg_setenv);
+ env_use = strv_env_merge(3, envp, os_release_pairs, arg_setenv);
if (!env_use)
return log_oom();
FDSet *fds,
int netns_fd) {
+ _cleanup_strv_free_ char **os_release_pairs = NULL;
_cleanup_close_ int fd = -1;
const char *p;
pid_t pid;
log_debug("Outer child is initializing.");
+ r = load_os_release_pairs_with_prefix("/", "container_host_", &os_release_pairs);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read os-release from host for container, ignoring: %m");
+
if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
return log_error_errno(errno, "PR_SET_PDEATHSIG failed: %m");
if (r < 0)
return r;
- /* Mark everything as slave, so that we still
- * receive mounts from the real root, but don't
- * propagate mounts to the real root. */
+ /* Mark everything as slave, so that we still receive mounts from the real root, but don't propagate
+ * mounts to the real root. */
r = mount_verbose(LOG_ERR, NULL, "/", NULL, MS_SLAVE|MS_REC, NULL);
if (r < 0)
return r;
notify_socket = safe_close(notify_socket);
uid_shift_socket = safe_close(uid_shift_socket);
- /* The inner child has all namespaces that are
- * requested, so that we all are owned by the user if
- * user namespaces are turned on. */
+ /* The inner child has all namespaces that are requested, so that we all are owned by the
+ * user if user namespaces are turned on. */
if (arg_network_namespace_path) {
r = namespace_enter(-1, -1, netns_fd, -1, -1);
return log_error_errno(r, "Failed to join network namespace: %m");
}
- r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, master_pty_socket, fds);
+ r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, master_pty_socket, fds, os_release_pairs);
if (r < 0)
_exit(EXIT_FAILURE);
if (!barrier_place_and_sync(&barrier)) /* #5 */
return log_error_errno(SYNTHETIC_ERRNO(ESRCH), "Child died too early.");
- /* At this point we have made use of the UID we picked, and thus nss-mymachines
+ /* At this point we have made use of the UID we picked, and thus nss-systemd/systemd-machined.service
* will make them appear in getpwuid(), thus we can release the /etc/passwd lock. */
etc_passwd_lock = safe_close(etc_passwd_lock);