#include <pakfire/jail.h>
#include <pakfire/logging.h>
#include <pakfire/mount.h>
-#include <pakfire/os.h>
#include <pakfire/pakfire.h>
#include <pakfire/path.h>
#include <pakfire/private.h>
struct pakfire_jail_exec {
int flags;
- // PIDs (of the children)
- int pidfd1;
- int pidfd2;
+ // PID (of the child)
+ pid_t pid;
+ int pidfd;
- // Socket to pass FDs
- int socket[2];
+ // Process status (from waitid)
+ siginfo_t status;
// FD to notify the client that the parent has finished initialization
int completed_fd;
return -1;
}
-static int pakfire_jail_recv_fd(struct pakfire_jail* jail, int socket, int* fd) {
- const size_t payload_length = sizeof(fd);
- char buffer[CMSG_SPACE(payload_length)];
- int r;
-
- struct msghdr msg = {
- .msg_control = buffer,
- .msg_controllen = sizeof(buffer),
- };
-
- // Receive the message
- r = recvmsg(socket, &msg, 0);
- if (r) {
- CTX_ERROR(jail->ctx, "Could not receive file descriptor: %s\n", strerror(errno));
- return -errno;
- }
-
- // Fetch the payload
- struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
- if (!cmsg)
- return -EBADMSG;
-
- *fd = *((int*)CMSG_DATA(cmsg));
-
- CTX_DEBUG(jail->ctx, "Received fd %d from socket %d\n", *fd, socket);
-
- return 0;
-}
-
-static int pakfire_jail_send_fd(struct pakfire_jail* jail, int socket, int fd) {
- const size_t payload_length = sizeof(fd);
- char buffer[CMSG_SPACE(payload_length)];
- int r;
-
- CTX_DEBUG(jail->ctx, "Sending fd %d to socket %d\n", fd, socket);
-
- // Header
- struct msghdr msg = {
- .msg_control = buffer,
- .msg_controllen = sizeof(buffer),
- };
-
- // Payload
- struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
- cmsg->cmsg_level = SOL_SOCKET;
- cmsg->cmsg_type = SCM_RIGHTS;
- cmsg->cmsg_len = CMSG_LEN(payload_length);
-
- // Set payload
- *((int*)CMSG_DATA(cmsg)) = fd;
-
- // Send the message
- r = sendmsg(socket, &msg, 0);
- if (r) {
- CTX_ERROR(jail->ctx, "Could not send file descriptor: %s\n", strerror(errno));
- return -errno;
- }
-
- return 0;
-}
-
static int pakfire_jail_log(struct pakfire* pakfire, void* data, int priority,
const char* line, const size_t length) {
// Pass everything to the parent logger
return 0;
}
-static int pakfire_jail_setup_child2(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx);
-
-static int pakfire_jail_wait_on_child(struct pakfire_jail* jail, int pidfd) {
- siginfo_t status = {};
- int r;
-
- // Call waitid() and store the result
- r = waitid(P_PIDFD, pidfd, &status, WEXITED);
- if (r) {
- CTX_ERROR(jail->ctx, "waitid() failed: %s\n", strerror(errno));
- return -errno;
- }
-
- switch (status.si_code) {
- // If the process exited normally, we return the exit code
- case CLD_EXITED:
- CTX_DEBUG(jail->ctx, "The child process exited with code %d\n", status.si_status);
- return status.si_status;
-
- case CLD_KILLED:
- CTX_ERROR(jail->ctx, "The child process was killed\n");
- return 139;
-
- case CLD_DUMPED:
- CTX_ERROR(jail->ctx, "The child process terminated abnormally\n");
- return 139;
-
- // Log anything else
- default:
- CTX_ERROR(jail->ctx, "Unknown child exit code: %d\n", status.si_code);
- break;
- }
-
- return -EBADMSG;
-}
-
static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
int epollfd = -1;
struct epoll_event events[EPOLL_MAX_EVENTS];
char garbage[8];
int r = 0;
- // Fetch the UNIX domain socket
- const int socket_recv = pakfire_jail_get_pipe_to_read(jail, &ctx->socket);
-
// Fetch file descriptors from context
const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
+ const int pidfd = ctx->pidfd;
// Timer
const int timerfd = pakfire_jail_create_timer(jail);
const int fd;
const int events;
} fds[] = {
- { socket_recv, EPOLLIN },
-
// Standard input/output
{ stdin, EPOLLOUT },
{ stdout, EPOLLIN },
// Timer
{ timerfd, EPOLLIN },
- // Child Processes
- { ctx->pidfd1, EPOLLIN },
+ // Child Process
+ { ctx->pidfd, EPOLLIN },
// Log Pipes
{ log_INFO, EPOLLIN },
}
int ended = 0;
- int exit = 0;
-
- CTX_DEBUG(jail->ctx, "Launching main loop...\n");
// Loop for as long as the process is alive
while (!ended) {
// Check if there is any data to be read
if (e & EPOLLIN) {
- // Monitor the first child process
- if (fd == ctx->pidfd1) {
- r = pakfire_jail_wait_on_child(jail, ctx->pidfd1);
+ // Handle any changes to the PIDFD
+ if (fd == pidfd) {
+ // Call waidid() and store the result
+ r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
if (r) {
- CTX_ERROR(jail->ctx, "The first child exited with an error\n");
- goto ERROR;
- }
-
- close(ctx->pidfd1);
- ctx->pidfd1 = -1;
-
- continue;
-
- // Monitor the second child process
- } else if (fd == ctx->pidfd2) {
- exit = pakfire_jail_wait_on_child(jail, ctx->pidfd2);
- if (exit < 0) {
- CTX_ERROR(jail->ctx, "The second child exited with an error\n");
+ ERROR(jail->pakfire, "waitid() failed: %m\n");
goto ERROR;
}
- close(ctx->pidfd2);
- ctx->pidfd2 = -1;
-
// Mark that we have ended so that we will process the remaining
// events from epoll() now, but won't restart the outer loop.
ended = 1;
-
continue;
// Handle timer events
DEBUG(jail->pakfire, "Terminating process...\n");
// Send SIGTERM to the process
- r = pidfd_send_signal(ctx->pidfd2, SIGKILL, NULL, 0);
+ r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
if (r) {
ERROR(jail->pakfire, "Could not kill process: %m\n");
goto ERROR;
}
}
- // There is nothing else to do
- continue;
-
- // Handle socket messages
- } else if (fd == socket_recv) {
- // Receive the FD of the second child process
- r = pakfire_jail_recv_fd(jail, socket_recv, &ctx->pidfd2);
- if (r)
- goto ERROR;
-
- // Add it to the event loop
- r = pakfire_jail_epoll_add_fd(jail, epollfd, ctx->pidfd2, EPOLLIN);
- if (r)
- goto ERROR;
-
- // Setup the child process
- r = pakfire_jail_setup_child2(jail, ctx);
- if (r)
- goto ERROR;
-
// Don't fall through to log processing
continue;
}
}
- // Return the exit code
- r = exit;
-
ERROR:
- CTX_DEBUG(jail->ctx, "Main loop terminated\n");
-
if (epollfd >= 0)
close(epollfd);
if (timerfd >= 0)
return r;
}
+/*
+ Performs the initialisation that needs to happen in the parent part
+*/
+static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
+ int r;
+
+ // Setup UID mapping
+ r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
+ if (r)
+ return r;
+
+ // Write "deny" to /proc/PID/setgroups
+ r = pakfire_jail_setgroups(jail, ctx->pid);
+ if (r)
+ return r;
+
+ // Setup GID mapping
+ r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
+ if (r)
+ return r;
+
+ // Parent has finished initialisation
+ DEBUG(jail->pakfire, "Parent has finished initialization\n");
+
+ // Send signal to client
+ r = pakfire_jail_send_signal(jail, ctx->completed_fd);
+ if (r)
+ return r;
+
+ return 0;
+}
+
static int pakfire_jail_switch_root(struct pakfire_jail* jail, const char* root) {
int r;
}
#endif
-/*
- Called by the parent that sets up the second child process...
-*/
-static int pakfire_jail_setup_child2(
- struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
- pid_t pid = -1;
+static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
+ const char* argv[]) {
int r;
- // Fetch the PID
- r = pidfd_get_pid(ctx->pidfd2, &pid);
- if (r) {
- CTX_ERROR(jail->ctx, "Could not fetch PID: %s\n", strerror(-r));
- return r;
- }
-
- // Setup UID mapping
- r = pakfire_jail_setup_uid_mapping(jail, pid);
- if (r)
- return r;
-
- // Write "deny" to /proc/PID/setgroups
- r = pakfire_jail_setgroups(jail, pid);
- if (r)
- return r;
+ // Redirect any logging to our log pipe
+ pakfire_ctx_set_log_callback(jail->ctx, pakfire_jail_log_redirect, &ctx->pipes);
- // Setup GID mapping
- r = pakfire_jail_setup_gid_mapping(jail, pid);
- if (r)
- return r;
+ // Fetch my own PID
+ pid_t pid = getpid();
- // Parent has finished initialisation
- DEBUG(jail->pakfire, "Parent has finished initialization\n");
+ DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
- // Send signal to client
- r = pakfire_jail_send_signal(jail, ctx->completed_fd);
+ // Wait for the parent to finish initialization
+ r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
if (r)
return r;
- return 0;
-}
-
-/*
- Child 2 is launched in their own user/mount/etc. namespace.
-*/
-static int pakfire_jail_child2(struct pakfire_jail* jail,
- struct pakfire_jail_exec* ctx, const char* argv[]) {
- int r;
-
- // Fetch my own PID
- pid_t pid = getpid();
-
- CTX_DEBUG(jail->ctx, "Launched child process in jail with PID %d\n", pid);
+ // Die with parent
+ r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
+ if (r) {
+ ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
+ return 126;
+ }
// Make this process dumpable
r = prctl (PR_SET_DUMPABLE, 1, 0, 0, 0);
if (r) {
- CTX_ERROR(jail->ctx, "Could not make the process dumpable: %m\n");
+ ERROR(jail->pakfire, "Could not make the process dumpable: %m\n");
return 126;
}
// Don't drop any capabilities on setuid()
r = prctl(PR_SET_KEEPCAPS, 1);
if (r) {
- CTX_ERROR(jail->ctx, "Could not set PR_SET_KEEPCAPS: %m\n");
+ ERROR(jail->pakfire, "Could not set PR_SET_KEEPCAPS: %m\n");
return 126;
}
- // Wait for the parent to finish initialization
- r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
- if (r)
- return r;
-
// Fetch UID/GID
uid_t uid = getuid();
gid_t gid = getgid();
}
#endif
+ const char* root = pakfire_get_path(jail->pakfire);
const char* arch = pakfire_get_effective_arch(jail->pakfire);
+ // Change mount propagation to slave to receive anything from the parent namespace
+ r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SLAVE);
+ if (r)
+ return r;
+
+ // Make root a mountpoint in the new mount namespace
+ r = pakfire_mount_make_mounpoint(jail->pakfire, root);
+ if (r)
+ return r;
+
+ // Change mount propagation to private
+ r = pakfire_mount_change_propagation(jail->ctx, root, MS_PRIVATE);
+ if (r)
+ return r;
+
+ // Change root (unless root is /)
+ if (!pakfire_on_root(jail->pakfire)) {
+ // Mount everything
+ r = pakfire_jail_mount(jail, ctx);
+ if (r)
+ return r;
+
+ // chroot()
+ r = pakfire_jail_switch_root(jail, root);
+ if (r)
+ return r;
+ }
+
// Set personality
unsigned long persona = pakfire_arch_personality(arch);
if (persona) {
r = personality(persona);
if (r < 0) {
ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
- return 126;
+ return 1;
}
}
if (r)
return r;
- CTX_DEBUG(jail->ctx, "Child process initialization done\n");
- CTX_DEBUG(jail->ctx, "Launching command:\n");
+ DEBUG(jail->pakfire, "Child process initialization done\n");
+ DEBUG(jail->pakfire, "Launching command:\n");
// Log argv
for (unsigned int i = 0; argv[i]; i++)
- CTX_DEBUG(jail->ctx, " argv[%u] = %s\n", i, argv[i]);
+ DEBUG(jail->pakfire, " argv[%u] = %s\n", i, argv[i]);
// exec() command
r = execvpe(argv[0], (char**)argv, jail->env);
r = 1;
}
- CTX_ERROR(jail->ctx, "Could not execve(%s): %m\n", argv[0]);
+ ERROR(jail->pakfire, "Could not execve(%s): %m\n", argv[0]);
}
// We should not get here
return r;
}
-/*
- Child 1 is launched in a new mount namespace...
-*/
-static int pakfire_jail_child1(struct pakfire_jail* jail,
- struct pakfire_jail_exec* ctx, const char* argv[]) {
- int r;
-
- // Redirect any logging to our log pipe
- pakfire_ctx_set_log_callback(jail->ctx, pakfire_jail_log_redirect, &ctx->pipes);
-
- CTX_DEBUG(jail->ctx, "First child process launched\n");
-
- const int socket_send = pakfire_jail_get_pipe_to_write(jail, &ctx->socket);
-
- const char* root = pakfire_get_path(jail->pakfire);
-
- // Die with parent
- r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
- if (r) {
- CTX_ERROR(jail->ctx, "Could not configure to die with parent: %s\n", strerror(errno));
- goto ERROR;
- }
-
- // Change mount propagation so that we will receive, but don't propagate back
- r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SLAVE);
- if (r) {
- CTX_ERROR(jail->ctx, "Could not change mount propagation to SLAVE: %s\n", strerror(r));
- goto ERROR;
- }
-
- // Make root a mountpoint in the new mount namespace
- r = pakfire_mount_make_mounpoint(jail->pakfire, root);
- if (r)
- goto ERROR;
-
- // Make everything private
- r = pakfire_mount_change_propagation(jail->ctx, root, MS_PRIVATE);
- if (r) {
- CTX_ERROR(jail->ctx, "Could not change mount propagation to PRIVATE: %s\n", strerror(r));
- goto ERROR;
- }
-
- // Mount everything
- r = pakfire_jail_mount(jail, ctx);
- if (r)
- goto ERROR;
-
- // XXX setup keyring
-
-
-
- // chroot()
- r = pakfire_jail_switch_root(jail, root);
- if (r)
- goto ERROR;
-
- // Change mount propagation so that we will propagate everything down
- r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SHARED);
- if (r) {
- CTX_ERROR(jail->ctx, "Could not change mount propagation to SHARED: %s\n", strerror(r));
- goto ERROR;
- }
-
- // Configure child process
- struct clone_args args = {
- .flags =
- CLONE_NEWCGROUP |
- CLONE_NEWIPC |
- CLONE_NEWNS |
- CLONE_NEWPID |
- CLONE_NEWTIME |
- CLONE_NEWUSER |
- CLONE_NEWUTS |
- CLONE_PIDFD,
- .exit_signal = SIGCHLD,
- .pidfd = (long long unsigned int)&ctx->pidfd2,
- };
-
- // Launch the process into the configured cgroup
- if (ctx->cgroup) {
- args.flags |= CLONE_INTO_CGROUP;
-
- // Clone into this cgroup
- args.cgroup = pakfire_cgroup_fd(ctx->cgroup);
- }
-
- // Setup networking
- if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING))
- args.flags |= CLONE_NEWNET;
-
- // Fork the second child process
- pid_t pid = clone3(&args, sizeof(args));
- if (pid < 0) {
- CTX_ERROR(jail->ctx, "Could not fork the first child process: %s\n", strerror(errno));
- r = -errno;
- goto ERROR;
-
- // Child process
- } else if (pid == 0) {
- r = pakfire_jail_child2(jail, ctx, argv);
- _exit(r);
- }
-
- // Send the pidfd of the child to the first parent
- r = pakfire_jail_send_fd(jail, socket_send, ctx->pidfd2);
- if (r)
- goto ERROR;
-
-ERROR:
- return r;
-}
-
// Run a command in the jail
static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
const int interactive,
pakfire_jail_communicate_in communicate_in,
pakfire_jail_communicate_out communicate_out,
void* data, int flags) {
+ int exit = -1;
int r;
// Check if argv is valid
struct pakfire_jail_exec ctx = {
.flags = flags,
- .socket = { -1, -1 },
-
.pipes = {
.stdin = { -1, -1 },
.stdout = { -1, -1 },
.data = data,
},
- // PIDs
- .pidfd1 = -1,
- .pidfd2 = -1,
+ .pidfd = -1,
};
DEBUG(jail->pakfire, "Executing jail...\n");
- // Become the subreaper
- r = prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0);
- if (r < 0) {
- CTX_ERROR(jail->ctx, "Failed to become the sub-reaper: %s\n", strerror(errno));
- r = -errno;
- goto ERROR;
- }
-
// Enable networking in interactive mode
if (interactive)
ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
- // Create a UNIX domain socket
- r = socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, ctx.socket);
- if (r < 0) {
- CTX_ERROR(jail->ctx, "Could not create UNIX socket: %s\n", strerror(errno));
- r = -errno;
- goto ERROR;
- }
-
/*
Setup a file descriptor which can be used to notify the client that the parent
has completed configuration.
goto ERROR;
#endif /* ENABLE_DEBUG */
+ // Configure child process
+ struct clone_args args = {
+ .flags =
+ CLONE_NEWCGROUP |
+ CLONE_NEWIPC |
+ CLONE_NEWNS |
+ CLONE_NEWPID |
+ CLONE_NEWTIME |
+ CLONE_NEWUSER |
+ CLONE_NEWUTS |
+ CLONE_PIDFD,
+ .exit_signal = SIGCHLD,
+ .pidfd = (long long unsigned int)&ctx.pidfd,
+ };
+
// Launch the process in a cgroup that is a leaf of the configured cgroup
if (jail->cgroup) {
+ args.flags |= CLONE_INTO_CGROUP;
+
// Fetch our UUID
const char* uuid = pakfire_jail_uuid(jail);
ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
goto ERROR;
}
- }
- /*
- Initially, we will set up a new mount namespace and launch a child process in it.
-
- This process remains in the user/ipc/time/etc. namespace and will set up
- the mount namespace.
- */
+ // Clone into this cgroup
+ args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
+ }
- // Configure child process
- struct clone_args args = {
- .flags =
- CLONE_NEWNS |
- CLONE_PIDFD |
- CLONE_CLEAR_SIGHAND,
- .exit_signal = SIGCHLD,
- .pidfd = (long long unsigned int)&ctx.pidfd1,
- };
+ // Setup networking
+ if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
+ args.flags |= CLONE_NEWNET;
+ }
- // Fork the first child process
- pid_t pid = clone3(&args, sizeof(args));
- if (pid < 0) {
- CTX_ERROR(jail->ctx, "Could not fork the first child process: %s\n", strerror(errno));
- r = -errno;
- goto ERROR;
+ // Fork this process
+ ctx.pid = clone3(&args, sizeof(args));
+ if (ctx.pid < 0) {
+ ERROR(jail->pakfire, "Could not clone: %m\n");
+ return -1;
// Child process
- } else if (pid == 0) {
- r = pakfire_jail_child1(jail, &ctx, argv);
+ } else if (ctx.pid == 0) {
+ r = pakfire_jail_child(jail, &ctx, argv);
_exit(r);
}
// Parent process
+ r = pakfire_jail_parent(jail, &ctx);
+ if (r)
+ goto ERROR;
+
+ DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
+
+ // Read output of the child process
r = pakfire_jail_wait(jail, &ctx);
if (r)
goto ERROR;
+ // Handle exit status
+ switch (ctx.status.si_code) {
+ case CLD_EXITED:
+ DEBUG(jail->pakfire, "The child process exited with code %d\n",
+ ctx.status.si_status);
+
+ // Pass exit code
+ exit = ctx.status.si_status;
+ break;
+
+ case CLD_KILLED:
+ ERROR(jail->pakfire, "The child process was killed\n");
+ exit = 139;
+ break;
+
+ case CLD_DUMPED:
+ ERROR(jail->pakfire, "The child process terminated abnormally\n");
+ break;
+
+ // Log anything else
+ default:
+ ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
+ break;
+ }
+
ERROR:
// Destroy the temporary cgroup (if any)
if (ctx.cgroup) {
pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
+ if (ctx.pidfd >= 0)
+ close(ctx.pidfd);
pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
#ifdef ENABLE_DEBUG
pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
#endif /* ENABLE_DEBUG */
- if (ctx.pidfd1 >= 0)
- close(ctx.pidfd1);
- if (ctx.pidfd2 >= 0)
- close(ctx.pidfd2);
- // Close sockets
- pakfire_jail_close_pipe(jail, ctx.socket);
-
- return r;
+ return exit;
}
PAKFIRE_EXPORT int pakfire_jail_exec(