]> git.ipfire.org Git - people/ms/pakfire.git/blobdiff - src/libpakfire/jail.c
jail: Commit some disabled code to set up a PTY
[people/ms/pakfire.git] / src / libpakfire / jail.c
index fff8c7e671a8a2c9dfc8cfecbace24036c0fe496..472fcb2ac33f31e98cdb38ded3b51258cf98bbc4 100644 (file)
@@ -35,7 +35,6 @@
 #include <sys/personality.h>
 #include <sys/prctl.h>
 #include <sys/resource.h>
-#include <sys/signalfd.h>
 #include <sys/timerfd.h>
 #include <sys/types.h>
 #include <sys/wait.h>
@@ -55,6 +54,7 @@
 #include <pakfire/jail.h>
 #include <pakfire/logging.h>
 #include <pakfire/mount.h>
+#include <pakfire/os.h>
 #include <pakfire/pakfire.h>
 #include <pakfire/path.h>
 #include <pakfire/private.h>
@@ -129,12 +129,12 @@ struct pakfire_log_buffer {
 struct pakfire_jail_exec {
        int flags;
 
-       // PID (of the child)
-       pid_t pid;
-       int pidfd;
+       // PIDs (of the children)
+       int pidfd1;
+       int pidfd2;
 
-       // Process status (from waitid)
-       siginfo_t status;
+       // Socket to pass FDs
+       int socket[2];
 
        // FD to notify the client that the parent has finished initialization
        int completed_fd;
@@ -148,7 +148,9 @@ struct pakfire_jail_exec {
                // Logging
                int log_INFO[2];
                int log_ERROR[2];
+#ifdef ENABLE_DEBUG
                int log_DEBUG[2];
+#endif /* ENABLE_DEBUG */
        } pipes;
 
        // Communicate
@@ -166,11 +168,17 @@ struct pakfire_jail_exec {
                // Logging
                struct pakfire_log_buffer log_INFO;
                struct pakfire_log_buffer log_ERROR;
+#ifdef ENABLE_DEBUG
                struct pakfire_log_buffer log_DEBUG;
+#endif /* ENABLE_DEBUG */
        } buffers;
 
        struct pakfire_cgroup* cgroup;
        struct pakfire_cgroup_stats cgroup_stats;
+
+       // Console
+       char console[PATH_MAX];
+       int consolefd;
 };
 
 static int clone3(struct clone_args* args, size_t size) {
@@ -525,32 +533,6 @@ ERROR:
        return -1;
 }
 
-// Signals
-
-static int pakfire_jail_handle_signals(struct pakfire_jail* jail) {
-       sigset_t mask;
-       int r;
-
-       sigemptyset(&mask);
-       sigaddset(&mask, SIGINT);
-
-       // Block signals
-       r = sigprocmask(SIG_BLOCK, &mask, NULL);
-       if (r < 0) {
-               ERROR(jail->pakfire, "Failed to block signals: %m\n");
-               return r;
-       }
-
-       // Create a file descriptor
-       r = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
-       if (r < 0) {
-               ERROR(jail->pakfire, "Failed to create signalfd: %m\n");
-               return r;
-       }
-
-       return r;
-}
-
 /*
        This function replaces any logging in the child process.
 
@@ -754,6 +736,67 @@ static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[
        return -1;
 }
 
+static int pakfire_jail_recv_fd(struct pakfire_jail* jail, int socket, int* fd) {
+       const size_t payload_length = sizeof(fd);
+       char buffer[CMSG_SPACE(payload_length)];
+       int r;
+
+       struct msghdr msg = {
+               .msg_control    = buffer,
+               .msg_controllen = sizeof(buffer),
+       };
+
+       // Receive the message
+       r = recvmsg(socket, &msg, 0);
+       if (r) {
+               CTX_ERROR(jail->ctx, "Could not receive file descriptor: %s\n", strerror(errno));
+               return -errno;
+       }
+
+       // Fetch the payload
+       struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+       if (!cmsg)
+               return -EBADMSG;
+
+       *fd = *((int*)CMSG_DATA(cmsg));
+
+       CTX_DEBUG(jail->ctx, "Received fd %d from socket %d\n", *fd, socket);
+
+       return 0;
+}
+
+static int pakfire_jail_send_fd(struct pakfire_jail* jail, int socket, int fd) {
+       const size_t payload_length = sizeof(fd);
+       char buffer[CMSG_SPACE(payload_length)];
+       int r;
+
+       CTX_DEBUG(jail->ctx, "Sending fd %d to socket %d\n", fd, socket);
+
+       // Header
+       struct msghdr msg = {
+               .msg_control    = buffer,
+               .msg_controllen = sizeof(buffer),
+       };
+
+       // Payload
+       struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+       cmsg->cmsg_level = SOL_SOCKET;
+       cmsg->cmsg_type  = SCM_RIGHTS;
+       cmsg->cmsg_len   = CMSG_LEN(payload_length);
+
+       // Set payload
+       *((int*)CMSG_DATA(cmsg)) = fd;
+
+       // Send the message
+       r = sendmsg(socket, &msg, 0);
+       if (r) {
+               CTX_ERROR(jail->ctx, "Could not send file descriptor: %s\n", strerror(errno));
+               return -errno;
+       }
+
+       return 0;
+}
+
 static int pakfire_jail_log(struct pakfire* pakfire, void* data, int priority,
                const char* line, const size_t length) {
        // Pass everything to the parent logger
@@ -762,19 +805,86 @@ static int pakfire_jail_log(struct pakfire* pakfire, void* data, int priority,
        return 0;
 }
 
+static int pakfire_jail_epoll_add_fd(struct pakfire_jail* jail, int epollfd, int fd, int events) {
+       struct epoll_event event = {
+               .events = events|EPOLLHUP,
+               .data   = {
+                       .fd = fd,
+               },
+       };
+       int r;
+
+       // Read flags
+       int flags = fcntl(fd, F_GETFL, 0);
+
+       // Set modified flags
+       r  = fcntl(fd, F_SETFL, flags|O_NONBLOCK);
+       if (r < 0) {
+               CTX_ERROR(jail->ctx, "Could not set file descriptor %d into non-blocking mode: %s\n",
+                       fd, strerror(errno));
+               return -errno;
+       }
+
+       // Add the file descriptor to the loop
+       r = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
+       if (r < 0) {
+               ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %s\n",
+                       fd, strerror(errno));
+               return -errno;
+       }
+
+       return 0;
+}
+
+static int pakfire_jail_setup_child2(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx);
+
+static int pakfire_jail_wait_on_child(struct pakfire_jail* jail, int pidfd) {
+       siginfo_t status = {};
+       int r;
+
+       // Call waitid() and store the result
+       r = waitid(P_PIDFD, pidfd, &status, WEXITED);
+       if (r) {
+               CTX_ERROR(jail->ctx, "waitid() failed: %s\n", strerror(errno));
+               return -errno;
+       }
+
+       switch (status.si_code) {
+               // If the process exited normally, we return the exit code
+               case CLD_EXITED:
+                       CTX_DEBUG(jail->ctx, "The child process exited with code %d\n", status.si_status);
+                       return status.si_status;
+
+               case CLD_KILLED:
+                       CTX_ERROR(jail->ctx, "The child process was killed\n");
+                       return 139;
+
+               case CLD_DUMPED:
+                       CTX_ERROR(jail->ctx, "The child process terminated abnormally\n");
+                       return 139;
+
+               // Log anything else
+               default:
+                       CTX_ERROR(jail->ctx, "Unknown child exit code: %d\n", status.si_code);
+                       break;
+       }
+
+       return -EBADMSG;
+}
+
 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
        int epollfd = -1;
-       struct epoll_event ev;
        struct epoll_event events[EPOLL_MAX_EVENTS];
-       struct signalfd_siginfo siginfo;
        char garbage[8];
        int r = 0;
 
+       // Fetch the UNIX domain socket
+       const int socket_recv = pakfire_jail_get_pipe_to_read(jail, &ctx->socket);
+
        // Fetch file descriptors from context
        const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
        const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
        const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
-       const int pidfd  = ctx->pidfd;
 
        // Timer
        const int timerfd = pakfire_jail_create_timer(jail);
@@ -782,14 +892,37 @@ static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec
        // Logging
        const int log_INFO  = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
        const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
+#ifdef ENABLE_DEBUG
        const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
-
-       // Signals
-       const int signalfd = pakfire_jail_handle_signals(jail);
+#endif /* ENABLE_DEBUG */
 
        // Make a list of all file descriptors we are interested in
-       const int fds[] = {
-               stdin, stdout, stderr, pidfd, timerfd, signalfd, log_INFO, log_ERROR, log_DEBUG,
+       const struct pakfire_wait_fds {
+               const int fd;
+               const int events;
+       } fds[] = {
+               { socket_recv, EPOLLIN },
+
+               // Standard input/output
+               { stdin,  EPOLLOUT },
+               { stdout, EPOLLIN },
+               { stderr, EPOLLIN },
+
+               // Timer
+               { timerfd, EPOLLIN },
+
+               // Child Processes
+               { ctx->pidfd1, EPOLLIN },
+
+               // Log Pipes
+               { log_INFO, EPOLLIN },
+               { log_ERROR, EPOLLIN },
+#ifdef ENABLE_DEBUG
+               { log_DEBUG, EPOLLIN },
+#endif /* ENABLE_DEBUG */
+
+               // Sentinel
+               { -1, 0 },
        };
 
        // Setup epoll
@@ -801,41 +934,21 @@ static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec
        }
 
        // Turn file descriptors into non-blocking mode and add them to epoll()
-       for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
-               int fd = fds[i];
-
+       for (const struct pakfire_wait_fds* fd = fds; fd->events; fd++) {
                // Skip fds which were not initialized
-               if (fd < 0)
+               if (fd->fd < 0)
                        continue;
 
-               ev.events = EPOLLHUP;
-
-               if (fd == stdin)
-                       ev.events |= EPOLLOUT;
-               else
-                       ev.events |= EPOLLIN;
-
-               // Read flags
-               int flags = fcntl(fd, F_GETFL, 0);
-
-               // Set modified flags
-               if (fcntl(fd, F_SETFL, flags|O_NONBLOCK) < 0) {
-                       ERROR(jail->pakfire,
-                               "Could not set file descriptor %d into non-blocking mode: %m\n", fd);
-                       r = 1;
-                       goto ERROR;
-               }
-
-               ev.data.fd = fd;
-
-               if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
-                       ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
-                       r = 1;
+               // Add the FD to the event loop
+               r = pakfire_jail_epoll_add_fd(jail, epollfd, fd->fd, fd->events);
+               if (r)
                        goto ERROR;
-               }
        }
 
        int ended = 0;
+       int exit = 0;
+
+       CTX_DEBUG(jail->ctx, "Launching main loop...\n");
 
        // Loop for as long as the process is alive
        while (!ended) {
@@ -862,18 +975,34 @@ static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec
 
                        // Check if there is any data to be read
                        if (e & EPOLLIN) {
-                               // Handle any changes to the PIDFD
-                               if (fd == pidfd) {
-                                       // Call waidid() and store the result
-                                       r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
+                               // Monitor the first child process
+                               if (fd == ctx->pidfd1) {
+                                       r = pakfire_jail_wait_on_child(jail, ctx->pidfd1);
                                        if (r) {
-                                               ERROR(jail->pakfire, "waitid() failed: %m\n");
+                                               CTX_ERROR(jail->ctx, "The first child exited with an error\n");
+                                               goto ERROR;
+                                       }
+
+                                       close(ctx->pidfd1);
+                                       ctx->pidfd1 = -1;
+
+                                       continue;
+
+                               // Monitor the second child process
+                               } else if (fd == ctx->pidfd2) {
+                                       exit = pakfire_jail_wait_on_child(jail, ctx->pidfd2);
+                                       if (exit < 0) {
+                                               CTX_ERROR(jail->ctx, "The second child exited with an error\n");
                                                goto ERROR;
                                        }
 
+                                       close(ctx->pidfd2);
+                                       ctx->pidfd2 = -1;
+
                                        // Mark that we have ended so that we will process the remaining
                                        // events from epoll() now, but won't restart the outer loop.
                                        ended = 1;
+
                                        continue;
 
                                // Handle timer events
@@ -893,7 +1022,7 @@ static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec
                                                DEBUG(jail->pakfire, "Terminating process...\n");
 
                                                // Send SIGTERM to the process
-                                               r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
+                                               r = pidfd_send_signal(ctx->pidfd2, SIGKILL, NULL, 0);
                                                if (r) {
                                                        ERROR(jail->pakfire, "Could not kill process: %m\n");
                                                        goto ERROR;
@@ -903,33 +1032,22 @@ static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec
                                        // There is nothing else to do
                                        continue;
 
-                               // Handle signals
-                               } else if (fd == signalfd) {
-                                       // Read the signal
-                                       r = read(signalfd, &siginfo, sizeof(siginfo));
-                                       if (r < 1) {
-                                               ERROR(jail->pakfire, "Could not read signal: %m\n");
+                               // Handle socket messages
+                               } else if (fd == socket_recv) {
+                                       // Receive the FD of the second child process
+                                       r = pakfire_jail_recv_fd(jail, socket_recv, &ctx->pidfd2);
+                                       if (r)
                                                goto ERROR;
-                                       }
-
-                                       DEBUG(jail->pakfire, "Received signal %u\n", siginfo.ssi_signo);
 
-                                       // Handle signals
-                                       switch (siginfo.ssi_signo) {
-                                               // Pass SIGINT down to the child process
-                                               case SIGINT:
-                                                       r = pidfd_send_signal(pidfd, siginfo.ssi_signo, NULL, 0);
-                                                       if (r) {
-                                                               ERROR(jail->pakfire, "Could not send signal to process: %m\n");
-                                                               goto ERROR;
-                                                       }
-                                                       break;
+                                       // Add it to the event loop
+                                       r = pakfire_jail_epoll_add_fd(jail, epollfd, ctx->pidfd2, EPOLLIN);
+                                       if (r)
+                                               goto ERROR;
 
-                                               default:
-                                                       ERROR(jail->pakfire, "Received unhandled signal %u\n",
-                                                               siginfo.ssi_signo);
-                                                       break;
-                                       }
+                                       // Setup the child process
+                                       r = pakfire_jail_setup_child2(jail, ctx);
+                                       if (r)
+                                               goto ERROR;
 
                                        // Don't fall through to log processing
                                        continue;
@@ -947,11 +1065,13 @@ static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec
 
                                        callback = pakfire_jail_log;
 
+#ifdef ENABLE_DEBUG
                                } else if (fd == log_DEBUG) {
                                        buffer = &ctx->buffers.log_DEBUG;
                                        priority = LOG_DEBUG;
 
                                        callback = pakfire_jail_log;
+#endif /* ENABLE_DEBUG */
 
                                // Handle anything from the log pipes
                                } else if (fd == stdout) {
@@ -1021,13 +1141,16 @@ static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec
                }
        }
 
+       // Return the exit code
+       r = exit;
+
 ERROR:
+       CTX_DEBUG(jail->ctx, "Main loop terminated\n");
+
        if (epollfd >= 0)
                close(epollfd);
        if (timerfd >= 0)
                close(timerfd);
-       if (signalfd >= 0)
-               close(signalfd);
 
        return r;
 }
@@ -1332,7 +1455,17 @@ static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exe
                flags |= PAKFIRE_MOUNT_LOOP_DEVICES;
 
        // Mount all default stuff
-       r = pakfire_mount_all(jail->pakfire, flags);
+       r = pakfire_mount_all(jail->pakfire, PAKFIRE_MNTNS_OUTER, flags);
+       if (r)
+               return r;
+
+       // Populate /dev
+       r = pakfire_populate_dev(jail->pakfire, flags);
+       if (r)
+               return r;
+
+       // Mount the interpreter (if needed)
+       r = pakfire_mount_interpreter(jail->pakfire);
        if (r)
                return r;
 
@@ -1354,9 +1487,6 @@ static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exe
                        return r;
        }
 
-       // Log all mountpoints
-       pakfire_mount_list(jail->pakfire);
-
        return 0;
 }
 
@@ -1502,7 +1632,7 @@ static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid)
                        "0 %lu %lu\n", subgid->id, subgid->length);
        } else {
                r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
-                       "0 %lu 1\n%1 %lu %lu\n", gid, subgid->id, subgid->length);
+                       "0 %lu 1\n1 %lu %lu\n", gid, subgid->id, subgid->length);
        }
 
        if (r) {
@@ -1515,38 +1645,19 @@ static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid)
 
 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
        char path[PATH_MAX];
-       int r = 1;
+       int r;
 
        // Make path
        r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
        if (r)
                return r;
 
-       // Open file for writing
-       FILE* f = fopen(path, "w");
-       if (!f) {
-               ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
-               goto ERROR;
-       }
-
-       // Write content
-       int bytes_written = fprintf(f, "deny\n");
-       if (bytes_written <= 0) {
-               ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
-               goto ERROR;
-       }
-
-       r = fclose(f);
-       f = NULL;
+       r = pakfire_file_write(jail->pakfire, path, 0, 0, 0, "deny\n");
        if (r) {
-               ERROR(jail->pakfire, "Could not close %s: %m\n", path);
-               goto ERROR;
+               CTX_ERROR(jail->ctx, "Could not set setgroups to deny: %s\n", strerror(errno));
+               r = -errno;
        }
 
-ERROR:
-       if (f)
-               fclose(f);
-
        return r;
 }
 
@@ -1557,10 +1668,10 @@ static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
        DEBUG(jail->pakfire, "Sending signal...\n");
 
        // Write to the file descriptor
-       ssize_t bytes_written = write(fd, &val, sizeof(val));
-       if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
-               ERROR(jail->pakfire, "Could not send signal: %m\n");
-               r = 1;
+       r = eventfd_write(fd, val);
+       if (r < 0) {
+               ERROR(jail->pakfire, "Could not send signal: %s\n", strerror(errno));
+               r = -errno;
        }
 
        // Close the file descriptor
@@ -1575,10 +1686,10 @@ static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
 
        DEBUG(jail->pakfire, "Waiting for signal...\n");
 
-       ssize_t bytes_read = read(fd, &val, sizeof(val));
-       if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
-               ERROR(jail->pakfire, "Error waiting for signal: %m\n");
-               r = 1;
+       r = eventfd_read(fd, &val);
+       if (r < 0) {
+               ERROR(jail->pakfire, "Error waiting for signal: %s\n", strerror(errno));
+               r = -errno;
        }
 
        // Close the file descriptor
@@ -1587,38 +1698,6 @@ static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
        return r;
 }
 
-/*
-       Performs the initialisation that needs to happen in the parent part
-*/
-static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
-       int r;
-
-       // Setup UID mapping
-       r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
-       if (r)
-               return r;
-
-       // Write "deny" to /proc/PID/setgroups
-       r = pakfire_jail_setgroups(jail, ctx->pid);
-       if (r)
-               return r;
-
-       // Setup GID mapping
-       r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
-       if (r)
-               return r;
-
-       // Parent has finished initialisation
-       DEBUG(jail->pakfire, "Parent has finished initialization\n");
-
-       // Send signal to client
-       r = pakfire_jail_send_signal(jail, ctx->completed_fd);
-       if (r)
-               return r;
-
-       return 0;
-}
-
 static int pakfire_jail_switch_root(struct pakfire_jail* jail, const char* root) {
        int r;
 
@@ -1646,44 +1725,103 @@ static int pakfire_jail_switch_root(struct pakfire_jail* jail, const char* root)
        return 0;
 }
 
-static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
-               const char* argv[]) {
+#if 0
+static int pakfire_jail_open_pty(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
        int r;
 
-       // Redirect any logging to our log pipe
-       pakfire_ctx_set_log_callback(jail->ctx, pakfire_jail_log_redirect, &ctx->pipes);
+       // Allocate a new PTY
+       ctx->consolefd = posix_openpt(O_RDWR|O_NONBLOCK|O_NOCTTY|O_CLOEXEC);
+       if (ctx->consolefd < 0)
+               return -errno;
 
-       // Fetch my own PID
-       pid_t pid = getpid();
+       // Fetch the path
+       r = ptsname_r(ctx->consolefd, ctx->console, sizeof(ctx->console));
+       if (r)
+               return -r;
 
-       DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
+       CTX_DEBUG(jail->ctx, "Allocated console at %s (%d)\n", ctx->console, ctx->consolefd);
 
-       // Wait for the parent to finish initialization
-       r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
+       // Create a symlink
+       r = pakfire_symlink(jail->ctx, "/dev/console", ctx->console);
        if (r)
                return r;
 
-       // Die with parent
-       r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
+       return r;
+}
+#endif
+
+/*
+       Called by the parent that sets up the second child process...
+*/
+static int pakfire_jail_setup_child2(
+               struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
+       pid_t pid = -1;
+       int r;
+
+       // Fetch the PID
+       r = pidfd_get_pid(ctx->pidfd2, &pid);
        if (r) {
-               ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
-               return 126;
+               CTX_ERROR(jail->ctx, "Could not fetch PID: %s\n", strerror(-r));
+               return r;
        }
 
+       // Setup UID mapping
+       r = pakfire_jail_setup_uid_mapping(jail, pid);
+       if (r)
+               return r;
+
+       // Write "deny" to /proc/PID/setgroups
+       r = pakfire_jail_setgroups(jail, pid);
+       if (r)
+               return r;
+
+       // Setup GID mapping
+       r = pakfire_jail_setup_gid_mapping(jail, pid);
+       if (r)
+               return r;
+
+       // Parent has finished initialisation
+       DEBUG(jail->pakfire, "Parent has finished initialization\n");
+
+       // Send signal to client
+       r = pakfire_jail_send_signal(jail, ctx->completed_fd);
+       if (r)
+               return r;
+
+       return 0;
+}
+
+/*
+       Child 2 is launched in their own user/mount/etc. namespace.
+*/
+static int pakfire_jail_child2(struct pakfire_jail* jail,
+               struct pakfire_jail_exec* ctx, const char* argv[]) {
+       int r;
+
+       // Fetch my own PID
+       pid_t pid = getpid();
+
+       CTX_DEBUG(jail->ctx, "Launched child process in jail with PID %d\n", pid);
+
        // Make this process dumpable
        r = prctl (PR_SET_DUMPABLE, 1, 0, 0, 0);
        if (r) {
-               ERROR(jail->pakfire, "Could not make the process dumpable: %m\n");
+               CTX_ERROR(jail->ctx, "Could not make the process dumpable: %m\n");
                return 126;
        }
 
        // Don't drop any capabilities on setuid()
        r = prctl(PR_SET_KEEPCAPS, 1);
        if (r) {
-               ERROR(jail->pakfire, "Could not set PR_SET_KEEPCAPS: %m\n");
+               CTX_ERROR(jail->ctx, "Could not set PR_SET_KEEPCAPS: %m\n");
                return 126;
        }
 
+       // Wait for the parent to finish initialization
+       r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
+       if (r)
+               return r;
+
        // Fetch UID/GID
        uid_t uid = getuid();
        gid_t gid = getgid();
@@ -1695,42 +1833,43 @@ static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exe
        DEBUG(jail->pakfire, "  UID: %u (effective %u)\n", uid, euid);
        DEBUG(jail->pakfire, "  GID: %u (effective %u)\n", gid, egid);
 
-       // Check if we are (effectively running as root)
+       // Log all mountpoints
+       pakfire_mount_list(jail->ctx);
+
+       // Fail if we are not PID 1
+       if (pid != 1) {
+               CTX_ERROR(jail->ctx, "Child process is not PID 1\n");
+               return 126;
+       }
+
+       // Fail if we are not running as root
        if (uid || gid || euid || egid) {
                ERROR(jail->pakfire, "Child process is not running as root\n");
                return 126;
        }
 
-       const char* root = pakfire_get_path(jail->pakfire);
-       const char* arch = pakfire_get_effective_arch(jail->pakfire);
-
-       // Change mount propagation to slave to receive anything from the parent namespace
-       r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SLAVE);
-       if (r)
-               return r;
-
-       // Make root a mountpoint in the new mount namespace
-       r = pakfire_mount_make_mounpoint(jail->pakfire, root);
-       if (r)
-               return r;
-
-       // Change mount propagation to private
-       r = pakfire_mount_change_propagation(jail->ctx, root, MS_PRIVATE);
+       // Mount all default stuff
+       r = pakfire_mount_all(jail->pakfire, PAKFIRE_MNTNS_INNER, 0);
        if (r)
-               return r;
+               return 126;
 
-       // Change root (unless root is /)
-       if (!pakfire_on_root(jail->pakfire)) {
-               // Mount everything
-               r = pakfire_jail_mount(jail, ctx);
-               if (r)
-                       return r;
+#if 0
+       // Create a new session
+       r = setsid();
+       if (r < 0) {
+               CTX_ERROR(jail->ctx, "Could not create a new session: %s\n", strerror(errno));
+               return 126;
+       }
 
-               // chroot()
-               r = pakfire_jail_switch_root(jail, root);
-               if (r)
-                       return r;
+       // Allocate a new PTY
+       r = pakfire_jail_open_pty(jail, ctx);
+       if (r) {
+               CTX_ERROR(jail->ctx, "Could not allocate a new PTY: %s\n", strerror(-r));
+               return 126;
        }
+#endif
+
+       const char* arch = pakfire_get_effective_arch(jail->pakfire);
 
        // Set personality
        unsigned long persona = pakfire_arch_personality(arch);
@@ -1738,7 +1877,7 @@ static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exe
                r = personality(persona);
                if (r < 0) {
                        ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
-                       return 1;
+                       return 126;
                }
        }
 
@@ -1822,12 +1961,12 @@ static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exe
        if (r)
                return r;
 
-       DEBUG(jail->pakfire, "Child process initialization done\n");
-       DEBUG(jail->pakfire, "Launching command:\n");
+       CTX_DEBUG(jail->ctx, "Child process initialization done\n");
+       CTX_DEBUG(jail->ctx, "Launching command:\n");
 
        // Log argv
        for (unsigned int i = 0; argv[i]; i++)
-               DEBUG(jail->pakfire, "  argv[%u] = %s\n", i, argv[i]);
+               CTX_DEBUG(jail->ctx, "  argv[%u] = %s\n", i, argv[i]);
 
        // exec() command
        r = execvpe(argv[0], (char**)argv, jail->env);
@@ -1847,20 +1986,131 @@ static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exe
                                r = 1;
                }
 
-               ERROR(jail->pakfire, "Could not execve(%s): %m\n", argv[0]);
+               CTX_ERROR(jail->ctx, "Could not execve(%s): %m\n", argv[0]);
        }
 
        // We should not get here
        return r;
 }
 
+/*
+       Child 1 is launched in a new mount namespace...
+*/
+static int pakfire_jail_child1(struct pakfire_jail* jail,
+               struct pakfire_jail_exec* ctx, const char* argv[]) {
+       int r;
+
+       // Redirect any logging to our log pipe
+       pakfire_ctx_set_log_callback(jail->ctx, pakfire_jail_log_redirect, &ctx->pipes);
+
+       CTX_DEBUG(jail->ctx, "First child process launched\n");
+
+       const int socket_send = pakfire_jail_get_pipe_to_write(jail, &ctx->socket);
+
+       const char* root = pakfire_get_path(jail->pakfire);
+
+       // Die with parent
+       r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
+       if (r) {
+               CTX_ERROR(jail->ctx, "Could not configure to die with parent: %s\n", strerror(errno));
+               goto ERROR;
+       }
+
+       // Change mount propagation so that we will receive, but don't propagate back
+       r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SLAVE);
+       if (r) {
+               CTX_ERROR(jail->ctx, "Could not change mount propagation to SLAVE: %s\n", strerror(r));
+               goto ERROR;
+       }
+
+       // Make root a mountpoint in the new mount namespace
+       r = pakfire_mount_make_mounpoint(jail->pakfire, root);
+       if (r)
+               goto ERROR;
+
+       // Make everything private
+       r = pakfire_mount_change_propagation(jail->ctx, root, MS_PRIVATE);
+       if (r) {
+               CTX_ERROR(jail->ctx, "Could not change mount propagation to PRIVATE: %s\n", strerror(r));
+               goto ERROR;
+       }
+
+       // Mount everything
+       r = pakfire_jail_mount(jail, ctx);
+       if (r)
+               goto ERROR;
+
+       // XXX setup keyring
+
+
+
+       // chroot()
+       r = pakfire_jail_switch_root(jail, root);
+       if (r)
+               goto ERROR;
+
+       // Change mount propagation so that we will propagate everything down
+       r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SHARED);
+       if (r) {
+               CTX_ERROR(jail->ctx, "Could not change mount propagation to SHARED: %s\n", strerror(r));
+               goto ERROR;
+       }
+
+       // Configure child process
+       struct clone_args args = {
+               .flags =
+                       CLONE_NEWCGROUP |
+                       CLONE_NEWIPC |
+                       CLONE_NEWNS |
+                       CLONE_NEWPID |
+                       CLONE_NEWTIME |
+                       CLONE_NEWUSER |
+                       CLONE_NEWUTS |
+                       CLONE_PIDFD,
+               .exit_signal = SIGCHLD,
+               .pidfd = (long long unsigned int)&ctx->pidfd2,
+       };
+
+       // Launch the process into the configured cgroup
+       if (ctx->cgroup) {
+               args.flags |= CLONE_INTO_CGROUP;
+
+               // Clone into this cgroup
+               args.cgroup = pakfire_cgroup_fd(ctx->cgroup);
+       }
+
+       // Setup networking
+       if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING))
+               args.flags |= CLONE_NEWNET;
+
+       // Fork the second child process
+       pid_t pid = clone3(&args, sizeof(args));
+       if (pid < 0) {
+               CTX_ERROR(jail->ctx, "Could not fork the first child process: %s\n", strerror(errno));
+               r = -errno;
+               goto ERROR;
+
+       // Child process
+       } else if (pid == 0) {
+               r = pakfire_jail_child2(jail, ctx, argv);
+               _exit(r);
+       }
+
+       // Send the pidfd of the child to the first parent
+       r = pakfire_jail_send_fd(jail, socket_send, ctx->pidfd2);
+       if (r)
+               goto ERROR;
+
+ERROR:
+       return r;
+}
+
 // Run a command in the jail
 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
                const int interactive,
                pakfire_jail_communicate_in  communicate_in,
                pakfire_jail_communicate_out communicate_out,
                void* data, int flags) {
-       int exit = -1;
        int r;
 
        // Check if argv is valid
@@ -1873,13 +2123,17 @@ static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
        struct pakfire_jail_exec ctx = {
                .flags = flags,
 
+               .socket = { -1, -1 },
+
                .pipes = {
                        .stdin     = { -1, -1 },
                        .stdout    = { -1, -1 },
                        .stderr    = { -1, -1 },
                        .log_INFO  = { -1, -1 },
                        .log_ERROR = { -1, -1 },
+#ifdef ENABLE_DEBUG
                        .log_DEBUG = { -1, -1 },
+#endif /* ENABLE_DEBUG */
                },
 
                .communicate = {
@@ -1888,15 +2142,33 @@ static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
                        .data = data,
                },
 
-               .pidfd = -1,
+               // PIDs
+               .pidfd1 = -1,
+               .pidfd2 = -1,
        };
 
        DEBUG(jail->pakfire, "Executing jail...\n");
 
+       // Become the subreaper
+       r = prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0);
+       if (r < 0) {
+               CTX_ERROR(jail->ctx, "Failed to become the sub-reaper: %s\n", strerror(errno));
+               r = -errno;
+               goto ERROR;
+       }
+
        // Enable networking in interactive mode
        if (interactive)
                ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
 
+       // Create a UNIX domain socket
+       r = socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, ctx.socket);
+       if (r < 0) {
+               CTX_ERROR(jail->ctx, "Could not create UNIX socket: %s\n", strerror(errno));
+               r = -errno;
+               goto ERROR;
+       }
+
        /*
                Setup a file descriptor which can be used to notify the client that the parent
                has completed configuration.
@@ -1945,25 +2217,8 @@ static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
                goto ERROR;
 #endif /* ENABLE_DEBUG */
 
-       // Configure child process
-       struct clone_args args = {
-               .flags =
-                       CLONE_NEWCGROUP |
-                       CLONE_NEWIPC |
-                       CLONE_NEWNS |
-                       CLONE_NEWPID |
-                       CLONE_NEWTIME |
-                       CLONE_NEWUSER |
-                       CLONE_NEWUTS |
-                       CLONE_PIDFD,
-               .exit_signal = SIGCHLD,
-               .pidfd = (long long unsigned int)&ctx.pidfd,
-       };
-
        // Launch the process in a cgroup that is a leaf of the configured cgroup
        if (jail->cgroup) {
-               args.flags |= CLONE_INTO_CGROUP;
-
                // Fetch our UUID
                const char* uuid = pakfire_jail_uuid(jail);
 
@@ -1973,76 +2228,49 @@ static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
                        ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
                        goto ERROR;
                }
-
-               // Clone into this cgroup
-               args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
        }
 
-       // Setup networking
-       if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
-               args.flags |= CLONE_NEWNET;
-       }
+       /*
+               Initially, we will set up a new mount namespace and launch a child process in it.
 
-       // Fork this process
-       ctx.pid = clone3(&args, sizeof(args));
-       if (ctx.pid < 0) {
-               ERROR(jail->pakfire, "Could not clone: %m\n");
-               return -1;
+               This process remains in the user/ipc/time/etc. namespace and will set up
+               the mount namespace.
+       */
+
+       // Configure child process
+       struct clone_args args = {
+               .flags =
+                       CLONE_NEWNS |
+                       CLONE_PIDFD |
+                       CLONE_CLEAR_SIGHAND,
+               .exit_signal = SIGCHLD,
+               .pidfd = (long long unsigned int)&ctx.pidfd1,
+       };
+
+       // Fork the first child process
+       pid_t pid = clone3(&args, sizeof(args));
+       if (pid < 0) {
+               CTX_ERROR(jail->ctx, "Could not fork the first child process: %s\n", strerror(errno));
+               r = -errno;
+               goto ERROR;
 
        // Child process
-       } else if (ctx.pid == 0) {
-               r = pakfire_jail_child(jail, &ctx, argv);
+       } else if (pid == 0) {
+               r = pakfire_jail_child1(jail, &ctx, argv);
                _exit(r);
        }
 
        // Parent process
-       r = pakfire_jail_parent(jail, &ctx);
-       if (r)
-               goto ERROR;
-
-       DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
-
-       // Read output of the child process
        r = pakfire_jail_wait(jail, &ctx);
        if (r)
                goto ERROR;
 
-       // Handle exit status
-       switch (ctx.status.si_code) {
-               case CLD_EXITED:
-                       DEBUG(jail->pakfire, "The child process exited with code %d\n",
-                               ctx.status.si_status);
-
-                       // Pass exit code
-                       exit = ctx.status.si_status;
-                       break;
-
-               case CLD_KILLED:
-                       ERROR(jail->pakfire, "The child process was killed\n");
-                       exit = 139;
-                       break;
-
-               case CLD_DUMPED:
-                       ERROR(jail->pakfire, "The child process terminated abnormally\n");
-                       break;
-
-               // Log anything else
-               default:
-                       ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
-                       break;
-       }
-
 ERROR:
        // Destroy the temporary cgroup (if any)
        if (ctx.cgroup) {
                // Read cgroup stats
-               r = pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
-               if (r) {
-                       ERROR(jail->pakfire, "Could not read cgroup stats: %m\n");
-               } else {
-                       pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
-               }
-
+               pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
+               pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
                pakfire_cgroup_destroy(ctx.cgroup);
                pakfire_cgroup_unref(ctx.cgroup);
        }
@@ -2051,13 +2279,20 @@ ERROR:
        pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
        pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
        pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
-       if (ctx.pidfd >= 0)
-               close(ctx.pidfd);
        pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
        pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
+#ifdef ENABLE_DEBUG
        pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
+#endif /* ENABLE_DEBUG */
+       if (ctx.pidfd1 >= 0)
+               close(ctx.pidfd1);
+       if (ctx.pidfd2 >= 0)
+               close(ctx.pidfd2);
+
+       // Close sockets
+       pakfire_jail_close_pipe(jail, ctx.socket);
 
-       return exit;
+       return r;
 }
 
 PAKFIRE_EXPORT int pakfire_jail_exec(