#include <pakfire/jail.h>
#include <pakfire/logging.h>
#include <pakfire/mount.h>
+#include <pakfire/os.h>
#include <pakfire/pakfire.h>
+#include <pakfire/path.h>
#include <pakfire/private.h>
#include <pakfire/pwd.h>
#include <pakfire/string.h>
};
struct pakfire_jail {
+ struct pakfire_ctx* ctx;
struct pakfire* pakfire;
int nrefs;
// Mountpoints
struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
unsigned int num_mountpoints;
+
+ // Callbacks
+ struct pakfire_jail_callbacks {
+ // Log
+ pakfire_jail_log_callback log;
+ void* log_data;
+ } callbacks;
};
struct pakfire_log_buffer {
struct pakfire_jail_exec {
int flags;
- // PID (of the child)
- pid_t pid;
- int pidfd;
+ // PIDs (of the children)
+ int pidfd1;
+ int pidfd2;
- // Process status (from waitid)
- siginfo_t status;
+ // Socket to pass FDs
+ int socket[2];
// FD to notify the client that the parent has finished initialization
int completed_fd;
// Logging
int log_INFO[2];
int log_ERROR[2];
+#ifdef ENABLE_DEBUG
int log_DEBUG[2];
+#endif /* ENABLE_DEBUG */
} pipes;
// Communicate
// Logging
struct pakfire_log_buffer log_INFO;
struct pakfire_log_buffer log_ERROR;
+#ifdef ENABLE_DEBUG
struct pakfire_log_buffer log_DEBUG;
+#endif /* ENABLE_DEBUG */
} buffers;
struct pakfire_cgroup* cgroup;
struct pakfire_cgroup_stats cgroup_stats;
+
+ // Console
+ char console[PATH_MAX];
+ int consolefd;
};
static int clone3(struct clone_args* args, size_t size) {
return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
}
+static int pivot_root(const char* new_root, const char* old_root) {
+ return syscall(SYS_pivot_root, new_root, old_root);
+}
+
static int pakfire_jail_exec_has_flag(
const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
return ctx->flags & flag;
if (jail->cgroup)
pakfire_cgroup_unref(jail->cgroup);
-
- pakfire_unref(jail->pakfire);
+ if (jail->pakfire)
+ pakfire_unref(jail->pakfire);
+ if (jail->ctx)
+ pakfire_ctx_unref(jail->ctx);
free(jail);
}
PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail, struct pakfire* pakfire) {
int r;
- const char* arch = pakfire_get_arch(pakfire);
+ const char* arch = pakfire_get_effective_arch(pakfire);
// Allocate a new jail
struct pakfire_jail* j = calloc(1, sizeof(*j));
if (!j)
return 1;
+ // Reference context
+ j->ctx = pakfire_ctx(pakfire);
+
// Reference Pakfire
j->pakfire = pakfire_ref(pakfire);
DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
+ // Set the default logging callback
+ pakfire_jail_set_log_callback(j, pakfire_jail_default_log_callback, NULL);
+
// Set default environment
for (const struct environ* e = ENV; e->key; e++) {
r = pakfire_jail_set_env(j, e->key, e->val);
}
// Enable all CPU features that CPU has to offer
- if (!pakfire_arch_supported_by_host(arch)) {
+ if (!pakfire_arch_is_supported_by_host(arch)) {
r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
if (r)
goto ERROR;
return NULL;
}
+// Logging Callback
+
+PAKFIRE_EXPORT void pakfire_jail_set_log_callback(struct pakfire_jail* jail,
+ pakfire_jail_log_callback callback, void* data) {
+ jail->callbacks.log = callback;
+ jail->callbacks.log_data = data;
+}
+
// Resource Limits
PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
jail->timeout.it_value.tv_sec = timeout;
if (timeout > 0)
- DEBUG(jail->pakfire, "Timeout set to %d second(s)\n", timeout);
+ DEBUG(jail->pakfire, "Timeout set to %u second(s)\n", timeout);
else
DEBUG(jail->pakfire, "Timeout disabled\n");
return fd;
ERROR:
- if (fd > 0)
+ if (fd >= 0)
close(fd);
return -1;
All log messages will be sent to the parent process through their respective pipes.
*/
-static void pakfire_jail_log(void* data, int priority, const char* file,
+static void pakfire_jail_log_redirect(void* data, int priority, const char* file,
int line, const char* fn, const char* format, va_list args) {
struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
int fd;
}
// Send the log message
- if (fd)
+ if (fd >= 0)
vdprintf(fd, format, args);
}
close(fd);
// Reset the file-descriptor so it won't be closed again later
- ctx->pipes.stdin[1] = 0;
+ ctx->pipes.stdin[1] = -1;
// Report success
r = 0;
static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
for (unsigned int i = 0; i < 2; i++)
- if (fds[i])
+ if (fds[i] >= 0)
close(fds[i]);
}
int* fd_write = &(*fds)[1];
// Close the write end of the pipe
- if (*fd_write) {
+ if (*fd_write >= 0) {
close(*fd_write);
*fd_write = -1;
}
// Return the read end
- return *fd_read;
+ if (*fd_read >= 0)
+ return *fd_read;
+
+ return -1;
}
static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
int* fd_write = &(*fds)[1];
// Close the read end of the pipe
- if (*fd_read) {
+ if (*fd_read >= 0) {
close(*fd_read);
*fd_read = -1;
}
// Return the write end
- return *fd_write;
+ if (*fd_write >= 0)
+ return *fd_write;
+
+ return -1;
+}
+
+static int pakfire_jail_recv_fd(struct pakfire_jail* jail, int socket, int* fd) {
+ const size_t payload_length = sizeof(fd);
+ char buffer[CMSG_SPACE(payload_length)];
+ int r;
+
+ struct msghdr msg = {
+ .msg_control = buffer,
+ .msg_controllen = sizeof(buffer),
+ };
+
+ // Receive the message
+ r = recvmsg(socket, &msg, 0);
+ if (r) {
+ CTX_ERROR(jail->ctx, "Could not receive file descriptor: %s\n", strerror(errno));
+ return -errno;
+ }
+
+ // Fetch the payload
+ struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+ if (!cmsg)
+ return -EBADMSG;
+
+ *fd = *((int*)CMSG_DATA(cmsg));
+
+ CTX_DEBUG(jail->ctx, "Received fd %d from socket %d\n", *fd, socket);
+
+ return 0;
+}
+
+static int pakfire_jail_send_fd(struct pakfire_jail* jail, int socket, int fd) {
+ const size_t payload_length = sizeof(fd);
+ char buffer[CMSG_SPACE(payload_length)];
+ int r;
+
+ CTX_DEBUG(jail->ctx, "Sending fd %d to socket %d\n", fd, socket);
+
+ // Header
+ struct msghdr msg = {
+ .msg_control = buffer,
+ .msg_controllen = sizeof(buffer),
+ };
+
+ // Payload
+ struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(payload_length);
+
+ // Set payload
+ *((int*)CMSG_DATA(cmsg)) = fd;
+
+ // Send the message
+ r = sendmsg(socket, &msg, 0);
+ if (r) {
+ CTX_ERROR(jail->ctx, "Could not send file descriptor: %s\n", strerror(errno));
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int pakfire_jail_log(struct pakfire* pakfire, void* data, int priority,
+ const char* line, const size_t length) {
+ // Pass everything to the parent logger
+ pakfire_log_condition(pakfire, priority, 0, "%.*s", (int)length, line);
+
+ return 0;
+}
+
+static int pakfire_jail_epoll_add_fd(struct pakfire_jail* jail, int epollfd, int fd, int events) {
+ struct epoll_event event = {
+ .events = events|EPOLLHUP,
+ .data = {
+ .fd = fd,
+ },
+ };
+ int r;
+
+ // Read flags
+ int flags = fcntl(fd, F_GETFL, 0);
+
+ // Set modified flags
+ r = fcntl(fd, F_SETFL, flags|O_NONBLOCK);
+ if (r < 0) {
+ CTX_ERROR(jail->ctx, "Could not set file descriptor %d into non-blocking mode: %s\n",
+ fd, strerror(errno));
+ return -errno;
+ }
+
+ // Add the file descriptor to the loop
+ r = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
+ if (r < 0) {
+ ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %s\n",
+ fd, strerror(errno));
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int pakfire_jail_setup_child2(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx);
+
+static int pakfire_jail_wait_on_child(struct pakfire_jail* jail, int pidfd) {
+ siginfo_t status = {};
+ int r;
+
+ // Call waitid() and store the result
+ r = waitid(P_PIDFD, pidfd, &status, WEXITED);
+ if (r) {
+ CTX_ERROR(jail->ctx, "waitid() failed: %s\n", strerror(errno));
+ return -errno;
+ }
+
+ switch (status.si_code) {
+ // If the process exited normally, we return the exit code
+ case CLD_EXITED:
+ CTX_DEBUG(jail->ctx, "The child process exited with code %d\n", status.si_status);
+ return status.si_status;
+
+ case CLD_KILLED:
+ CTX_ERROR(jail->ctx, "The child process was killed\n");
+ return 139;
+
+ case CLD_DUMPED:
+ CTX_ERROR(jail->ctx, "The child process terminated abnormally\n");
+ return 139;
+
+ // Log anything else
+ default:
+ CTX_ERROR(jail->ctx, "Unknown child exit code: %d\n", status.si_code);
+ break;
+ }
+
+ return -EBADMSG;
}
static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
int epollfd = -1;
- struct epoll_event ev;
struct epoll_event events[EPOLL_MAX_EVENTS];
char garbage[8];
int r = 0;
+ // Fetch the UNIX domain socket
+ const int socket_recv = pakfire_jail_get_pipe_to_read(jail, &ctx->socket);
+
// Fetch file descriptors from context
const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
- const int pidfd = ctx->pidfd;
// Timer
const int timerfd = pakfire_jail_create_timer(jail);
// Logging
const int log_INFO = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
+#ifdef ENABLE_DEBUG
const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
+#endif /* ENABLE_DEBUG */
// Make a list of all file descriptors we are interested in
- int fds[] = {
- stdin, stdout, stderr, pidfd, timerfd, log_INFO, log_ERROR, log_DEBUG,
+ const struct pakfire_wait_fds {
+ const int fd;
+ const int events;
+ } fds[] = {
+ { socket_recv, EPOLLIN },
+
+ // Standard input/output
+ { stdin, EPOLLOUT },
+ { stdout, EPOLLIN },
+ { stderr, EPOLLIN },
+
+ // Timer
+ { timerfd, EPOLLIN },
+
+ // Child Processes
+ { ctx->pidfd1, EPOLLIN },
+
+ // Log Pipes
+ { log_INFO, EPOLLIN },
+ { log_ERROR, EPOLLIN },
+#ifdef ENABLE_DEBUG
+ { log_DEBUG, EPOLLIN },
+#endif /* ENABLE_DEBUG */
+
+ // Sentinel
+ { -1, 0 },
};
// Setup epoll
}
// Turn file descriptors into non-blocking mode and add them to epoll()
- for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
- int fd = fds[i];
-
+ for (const struct pakfire_wait_fds* fd = fds; fd->events; fd++) {
// Skip fds which were not initialized
- if (fd < 0)
+ if (fd->fd < 0)
continue;
- ev.events = EPOLLHUP;
-
- if (fd == stdin)
- ev.events |= EPOLLOUT;
- else
- ev.events |= EPOLLIN;
-
- // Read flags
- int flags = fcntl(fd, F_GETFL, 0);
-
- // Set modified flags
- if (fcntl(fd, F_SETFL, flags|O_NONBLOCK) < 0) {
- ERROR(jail->pakfire,
- "Could not set file descriptor %d into non-blocking mode: %m\n", fd);
- r = 1;
- goto ERROR;
- }
-
- ev.data.fd = fd;
-
- if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
- ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
- r = 1;
+ // Add the FD to the event loop
+ r = pakfire_jail_epoll_add_fd(jail, epollfd, fd->fd, fd->events);
+ if (r)
goto ERROR;
- }
}
int ended = 0;
+ int exit = 0;
+
+ CTX_DEBUG(jail->ctx, "Launching main loop...\n");
// Loop for as long as the process is alive
while (!ended) {
// Check if there is any data to be read
if (e & EPOLLIN) {
- // Handle any changes to the PIDFD
- if (fd == pidfd) {
- // Call waidid() and store the result
- r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
+ // Monitor the first child process
+ if (fd == ctx->pidfd1) {
+ r = pakfire_jail_wait_on_child(jail, ctx->pidfd1);
if (r) {
- ERROR(jail->pakfire, "waitid() failed: %m\n");
+ CTX_ERROR(jail->ctx, "The first child exited with an error\n");
goto ERROR;
}
+ close(ctx->pidfd1);
+ ctx->pidfd1 = -1;
+
+ continue;
+
+ // Monitor the second child process
+ } else if (fd == ctx->pidfd2) {
+ exit = pakfire_jail_wait_on_child(jail, ctx->pidfd2);
+ if (exit < 0) {
+ CTX_ERROR(jail->ctx, "The second child exited with an error\n");
+ goto ERROR;
+ }
+
+ close(ctx->pidfd2);
+ ctx->pidfd2 = -1;
+
// Mark that we have ended so that we will process the remaining
// events from epoll() now, but won't restart the outer loop.
ended = 1;
+
continue;
// Handle timer events
DEBUG(jail->pakfire, "Terminating process...\n");
// Send SIGTERM to the process
- r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
+ r = pidfd_send_signal(ctx->pidfd2, SIGKILL, NULL, 0);
if (r) {
ERROR(jail->pakfire, "Could not kill process: %m\n");
goto ERROR;
// There is nothing else to do
continue;
+ // Handle socket messages
+ } else if (fd == socket_recv) {
+ // Receive the FD of the second child process
+ r = pakfire_jail_recv_fd(jail, socket_recv, &ctx->pidfd2);
+ if (r)
+ goto ERROR;
+
+ // Add it to the event loop
+ r = pakfire_jail_epoll_add_fd(jail, epollfd, ctx->pidfd2, EPOLLIN);
+ if (r)
+ goto ERROR;
+
+ // Setup the child process
+ r = pakfire_jail_setup_child2(jail, ctx);
+ if (r)
+ goto ERROR;
+
+ // Don't fall through to log processing
+ continue;
+
// Handle logging messages
} else if (fd == log_INFO) {
buffer = &ctx->buffers.log_INFO;
priority = LOG_INFO;
- callback = pakfire_jail_default_log_callback;
+ callback = pakfire_jail_log;
} else if (fd == log_ERROR) {
buffer = &ctx->buffers.log_ERROR;
priority = LOG_ERR;
- callback = pakfire_jail_default_log_callback;
+ callback = pakfire_jail_log;
+#ifdef ENABLE_DEBUG
} else if (fd == log_DEBUG) {
buffer = &ctx->buffers.log_DEBUG;
priority = LOG_DEBUG;
- callback = pakfire_jail_default_log_callback;
+ callback = pakfire_jail_log;
+#endif /* ENABLE_DEBUG */
// Handle anything from the log pipes
} else if (fd == stdout) {
buffer = &ctx->buffers.stdout;
priority = LOG_INFO;
- callback = ctx->communicate.out;
- data = ctx->communicate.data;
+ // Send any output to the default logger if no callback is set
+ if (ctx->communicate.out) {
+ callback = ctx->communicate.out;
+ data = ctx->communicate.data;
+ } else {
+ callback = jail->callbacks.log;
+ data = jail->callbacks.log_data;
+ }
} else if (fd == stderr) {
buffer = &ctx->buffers.stderr;
priority = LOG_ERR;
- callback = ctx->communicate.out;
- data = ctx->communicate.data;
+ // Send any output to the default logger if no callback is set
+ if (ctx->communicate.out) {
+ callback = ctx->communicate.out;
+ data = ctx->communicate.data;
+ } else {
+ callback = jail->callbacks.log;
+ data = jail->callbacks.log_data;
+ }
} else {
DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
}
}
+ // Return the exit code
+ r = exit;
+
ERROR:
- if (epollfd > 0)
+ CTX_DEBUG(jail->ctx, "Main loop terminated\n");
+
+ if (epollfd >= 0)
close(epollfd);
- if (timerfd > 0)
+ if (timerfd >= 0)
close(timerfd);
return r;
// Bind-mount all paths read-only
for (const char** path = paths; *path; path++) {
r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
- if (r)
+ if (r) {
+ switch (errno) {
+ // Ignore if we don't have permission
+ case EPERM:
+ continue;
+
+ default:
+ break;
+ }
return r;
+ }
}
return 0;
flags |= PAKFIRE_MOUNT_LOOP_DEVICES;
// Mount all default stuff
- r = pakfire_mount_all(jail->pakfire, flags);
+ r = pakfire_mount_all(jail->pakfire, PAKFIRE_MNTNS_OUTER, flags);
+ if (r)
+ return r;
+
+ // Populate /dev
+ r = pakfire_populate_dev(jail->pakfire, flags);
+ if (r)
+ return r;
+
+ // Mount the interpreter (if needed)
+ r = pakfire_mount_interpreter(jail->pakfire);
if (r)
return r;
return r;
}
- // Log all mountpoints
- pakfire_mount_list(jail->pakfire);
-
return 0;
}
"0 %lu %lu\n", subgid->id, subgid->length);
} else {
r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
- "0 %lu 1\n%1 %lu %lu\n", gid, subgid->id, subgid->length);
+ "0 %lu 1\n1 %lu %lu\n", gid, subgid->id, subgid->length);
}
if (r) {
static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
char path[PATH_MAX];
- int r = 1;
+ int r;
// Make path
r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
if (r)
return r;
- // Open file for writing
- FILE* f = fopen(path, "w");
- if (!f) {
- ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
- goto ERROR;
- }
-
- // Write content
- int bytes_written = fprintf(f, "deny\n");
- if (bytes_written <= 0) {
- ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
- goto ERROR;
- }
-
- r = fclose(f);
- f = NULL;
+ r = pakfire_file_write(jail->pakfire, path, 0, 0, 0, "deny\n");
if (r) {
- ERROR(jail->pakfire, "Could not close %s: %m\n", path);
- goto ERROR;
+ CTX_ERROR(jail->ctx, "Could not set setgroups to deny: %s\n", strerror(errno));
+ r = -errno;
}
-ERROR:
- if (f)
- fclose(f);
-
return r;
}
DEBUG(jail->pakfire, "Sending signal...\n");
// Write to the file descriptor
- ssize_t bytes_written = write(fd, &val, sizeof(val));
- if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
- ERROR(jail->pakfire, "Could not send signal: %m\n");
- r = 1;
+ r = eventfd_write(fd, val);
+ if (r < 0) {
+ ERROR(jail->pakfire, "Could not send signal: %s\n", strerror(errno));
+ r = -errno;
}
// Close the file descriptor
DEBUG(jail->pakfire, "Waiting for signal...\n");
- ssize_t bytes_read = read(fd, &val, sizeof(val));
- if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
- ERROR(jail->pakfire, "Error waiting for signal: %m\n");
- r = 1;
+ r = eventfd_read(fd, &val);
+ if (r < 0) {
+ ERROR(jail->pakfire, "Error waiting for signal: %s\n", strerror(errno));
+ r = -errno;
}
// Close the file descriptor
return r;
}
+static int pakfire_jail_switch_root(struct pakfire_jail* jail, const char* root) {
+ int r;
+
+ // Change to the new root
+ r = chdir(root);
+ if (r) {
+ ERROR(jail->pakfire, "chdir(%s) failed: %m\n", root);
+ return r;
+ }
+
+ // Switch Root!
+ r = pivot_root(".", ".");
+ if (r) {
+ ERROR(jail->pakfire, "Failed changing into the new root directory %s: %m\n", root);
+ return r;
+ }
+
+ // Umount the old root
+ r = umount2(".", MNT_DETACH);
+ if (r) {
+ ERROR(jail->pakfire, "Could not umount the old root filesystem: %m\n");
+ return r;
+ }
+
+ return 0;
+}
+
+#if 0
+static int pakfire_jail_open_pty(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
+ int r;
+
+ // Allocate a new PTY
+ ctx->consolefd = posix_openpt(O_RDWR|O_NONBLOCK|O_NOCTTY|O_CLOEXEC);
+ if (ctx->consolefd < 0)
+ return -errno;
+
+ // Fetch the path
+ r = ptsname_r(ctx->consolefd, ctx->console, sizeof(ctx->console));
+ if (r)
+ return -r;
+
+ CTX_DEBUG(jail->ctx, "Allocated console at %s (%d)\n", ctx->console, ctx->consolefd);
+
+ // Create a symlink
+ r = pakfire_symlink(jail->ctx, "/dev/console", ctx->console);
+ if (r)
+ return r;
+
+ return r;
+}
+#endif
+
/*
- Performs the initialisation that needs to happen in the parent part
+ Called by the parent that sets up the second child process...
*/
-static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
+static int pakfire_jail_setup_child2(
+ struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
+ pid_t pid = -1;
int r;
+ // Fetch the PID
+ r = pidfd_get_pid(ctx->pidfd2, &pid);
+ if (r) {
+ CTX_ERROR(jail->ctx, "Could not fetch PID: %s\n", strerror(-r));
+ return r;
+ }
+
// Setup UID mapping
- r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
+ r = pakfire_jail_setup_uid_mapping(jail, pid);
if (r)
return r;
// Write "deny" to /proc/PID/setgroups
- r = pakfire_jail_setgroups(jail, ctx->pid);
+ r = pakfire_jail_setgroups(jail, pid);
if (r)
return r;
// Setup GID mapping
- r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
+ r = pakfire_jail_setup_gid_mapping(jail, pid);
if (r)
return r;
return 0;
}
-static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
- const char* argv[]) {
+/*
+ Child 2 is launched in their own user/mount/etc. namespace.
+*/
+static int pakfire_jail_child2(struct pakfire_jail* jail,
+ struct pakfire_jail_exec* ctx, const char* argv[]) {
int r;
- // Redirect any logging to our log pipe
- pakfire_set_log_callback(jail->pakfire, pakfire_jail_log, &ctx->pipes);
+ // Fetch my own PID
+ pid_t pid = getpid();
- // Die with parent
- r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
+ CTX_DEBUG(jail->ctx, "Launched child process in jail with PID %d\n", pid);
+
+ // Make this process dumpable
+ r = prctl (PR_SET_DUMPABLE, 1, 0, 0, 0);
if (r) {
- ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
+ CTX_ERROR(jail->ctx, "Could not make the process dumpable: %m\n");
return 126;
}
- // Fetch my own PID
- pid_t pid = getpid();
-
- DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
+ // Don't drop any capabilities on setuid()
+ r = prctl(PR_SET_KEEPCAPS, 1);
+ if (r) {
+ CTX_ERROR(jail->ctx, "Could not set PR_SET_KEEPCAPS: %m\n");
+ return 126;
+ }
// Wait for the parent to finish initialization
r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
if (r)
return r;
- // Perform further initialization
-
// Fetch UID/GID
uid_t uid = getuid();
gid_t gid = getgid();
uid_t euid = geteuid();
gid_t egid = getegid();
- DEBUG(jail->pakfire, " UID: %d (effective %d)\n", uid, euid);
- DEBUG(jail->pakfire, " GID: %d (effective %d)\n", gid, egid);
+ DEBUG(jail->pakfire, " UID: %u (effective %u)\n", uid, euid);
+ DEBUG(jail->pakfire, " GID: %u (effective %u)\n", gid, egid);
- // Check if we are (effectively running as root)
+ // Log all mountpoints
+ pakfire_mount_list(jail->ctx);
+
+ // Fail if we are not PID 1
+ if (pid != 1) {
+ CTX_ERROR(jail->ctx, "Child process is not PID 1\n");
+ return 126;
+ }
+
+ // Fail if we are not running as root
if (uid || gid || euid || egid) {
ERROR(jail->pakfire, "Child process is not running as root\n");
return 126;
}
- const char* root = pakfire_get_path(jail->pakfire);
- const char* arch = pakfire_get_arch(jail->pakfire);
-
- // Change root (unless root is /)
- if (!pakfire_on_root(jail->pakfire)) {
- // Mount everything
- r = pakfire_jail_mount(jail, ctx);
- if (r)
- return r;
+ // Mount all default stuff
+ r = pakfire_mount_all(jail->pakfire, PAKFIRE_MNTNS_INNER, 0);
+ if (r)
+ return 126;
- // Call chroot()
- r = chroot(root);
- if (r) {
- ERROR(jail->pakfire, "chroot() to %s failed: %m\n", root);
- return 1;
- }
+#if 0
+ // Create a new session
+ r = setsid();
+ if (r < 0) {
+ CTX_ERROR(jail->ctx, "Could not create a new session: %s\n", strerror(errno));
+ return 126;
+ }
- // Change directory to /
- r = chdir("/");
- if (r) {
- ERROR(jail->pakfire, "chdir() after chroot() failed: %m\n");
- return 1;
- }
+ // Allocate a new PTY
+ r = pakfire_jail_open_pty(jail, ctx);
+ if (r) {
+ CTX_ERROR(jail->ctx, "Could not allocate a new PTY: %s\n", strerror(-r));
+ return 126;
}
+#endif
+
+ const char* arch = pakfire_get_effective_arch(jail->pakfire);
// Set personality
unsigned long persona = pakfire_arch_personality(arch);
r = personality(persona);
if (r < 0) {
ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
- return 1;
+ return 126;
}
}
#endif /* ENABLE_DEBUG */
// Connect standard input
- if (ctx->pipes.stdin[0]) {
+ if (ctx->pipes.stdin[0] >= 0) {
r = dup2(ctx->pipes.stdin[0], STDIN_FILENO);
if (r < 0) {
ERROR(jail->pakfire, "Could not connect fd %d to stdin: %m\n",
}
// Connect standard output and error
- if (ctx->pipes.stdout[1] && ctx->pipes.stderr[1]) {
+ if (ctx->pipes.stdout[1] >= 0 && ctx->pipes.stderr[1] >= 0) {
r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
if (r < 0) {
ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
if (r)
return r;
- // Don't drop any capabilities on execve()
- r = prctl(PR_SET_KEEPCAPS, 1);
- if (r) {
- ERROR(jail->pakfire, "Could not set PR_SET_KEEPCAPS: %m\n");
- return r;
- }
-
// Set capabilities
r = pakfire_jail_set_capabilities(jail);
if (r)
if (r)
return r;
- DEBUG(jail->pakfire, "Child process initialization done\n");
- DEBUG(jail->pakfire, "Launching command:\n");
+ CTX_DEBUG(jail->ctx, "Child process initialization done\n");
+ CTX_DEBUG(jail->ctx, "Launching command:\n");
// Log argv
for (unsigned int i = 0; argv[i]; i++)
- DEBUG(jail->pakfire, " argv[%d] = %s\n", i, argv[i]);
+ CTX_DEBUG(jail->ctx, " argv[%u] = %s\n", i, argv[i]);
// exec() command
r = execvpe(argv[0], (char**)argv, jail->env);
r = 1;
}
- ERROR(jail->pakfire, "Could not execve(%s): %m\n", argv[0]);
+ CTX_ERROR(jail->ctx, "Could not execve(%s): %m\n", argv[0]);
}
// We should not get here
return r;
}
+/*
+ Child 1 is launched in a new mount namespace...
+*/
+static int pakfire_jail_child1(struct pakfire_jail* jail,
+ struct pakfire_jail_exec* ctx, const char* argv[]) {
+ int r;
+
+ // Redirect any logging to our log pipe
+ pakfire_ctx_set_log_callback(jail->ctx, pakfire_jail_log_redirect, &ctx->pipes);
+
+ CTX_DEBUG(jail->ctx, "First child process launched\n");
+
+ const int socket_send = pakfire_jail_get_pipe_to_write(jail, &ctx->socket);
+
+ const char* root = pakfire_get_path(jail->pakfire);
+
+ // Die with parent
+ r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
+ if (r) {
+ CTX_ERROR(jail->ctx, "Could not configure to die with parent: %s\n", strerror(errno));
+ goto ERROR;
+ }
+
+ // Change mount propagation so that we will receive, but don't propagate back
+ r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SLAVE);
+ if (r) {
+ CTX_ERROR(jail->ctx, "Could not change mount propagation to SLAVE: %s\n", strerror(r));
+ goto ERROR;
+ }
+
+ // Make root a mountpoint in the new mount namespace
+ r = pakfire_mount_make_mounpoint(jail->pakfire, root);
+ if (r)
+ goto ERROR;
+
+ // Make everything private
+ r = pakfire_mount_change_propagation(jail->ctx, root, MS_PRIVATE);
+ if (r) {
+ CTX_ERROR(jail->ctx, "Could not change mount propagation to PRIVATE: %s\n", strerror(r));
+ goto ERROR;
+ }
+
+ // Mount everything
+ r = pakfire_jail_mount(jail, ctx);
+ if (r)
+ goto ERROR;
+
+ // XXX setup keyring
+
+
+
+ // chroot()
+ r = pakfire_jail_switch_root(jail, root);
+ if (r)
+ goto ERROR;
+
+ // Change mount propagation so that we will propagate everything down
+ r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SHARED);
+ if (r) {
+ CTX_ERROR(jail->ctx, "Could not change mount propagation to SHARED: %s\n", strerror(r));
+ goto ERROR;
+ }
+
+ // Configure child process
+ struct clone_args args = {
+ .flags =
+ CLONE_NEWCGROUP |
+ CLONE_NEWIPC |
+ CLONE_NEWNS |
+ CLONE_NEWPID |
+ CLONE_NEWTIME |
+ CLONE_NEWUSER |
+ CLONE_NEWUTS |
+ CLONE_PIDFD,
+ .exit_signal = SIGCHLD,
+ .pidfd = (long long unsigned int)&ctx->pidfd2,
+ };
+
+ // Launch the process into the configured cgroup
+ if (ctx->cgroup) {
+ args.flags |= CLONE_INTO_CGROUP;
+
+ // Clone into this cgroup
+ args.cgroup = pakfire_cgroup_fd(ctx->cgroup);
+ }
+
+ // Setup networking
+ if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING))
+ args.flags |= CLONE_NEWNET;
+
+ // Fork the second child process
+ pid_t pid = clone3(&args, sizeof(args));
+ if (pid < 0) {
+ CTX_ERROR(jail->ctx, "Could not fork the first child process: %s\n", strerror(errno));
+ r = -errno;
+ goto ERROR;
+
+ // Child process
+ } else if (pid == 0) {
+ r = pakfire_jail_child2(jail, ctx, argv);
+ _exit(r);
+ }
+
+ // Send the pidfd of the child to the first parent
+ r = pakfire_jail_send_fd(jail, socket_send, ctx->pidfd2);
+ if (r)
+ goto ERROR;
+
+ERROR:
+ return r;
+}
+
// Run a command in the jail
static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
const int interactive,
pakfire_jail_communicate_in communicate_in,
pakfire_jail_communicate_out communicate_out,
void* data, int flags) {
- int exit = -1;
int r;
// Check if argv is valid
return -1;
}
- // Send any output to the default logger if no callback is set
- if (!communicate_out)
- communicate_out = pakfire_jail_default_log_callback;
-
// Initialize context for this call
struct pakfire_jail_exec ctx = {
.flags = flags,
+ .socket = { -1, -1 },
+
.pipes = {
- .stdin = { -1, -1 },
- .stdout = { -1, -1 },
- .stderr = { -1, -1 },
+ .stdin = { -1, -1 },
+ .stdout = { -1, -1 },
+ .stderr = { -1, -1 },
+ .log_INFO = { -1, -1 },
+ .log_ERROR = { -1, -1 },
+#ifdef ENABLE_DEBUG
+ .log_DEBUG = { -1, -1 },
+#endif /* ENABLE_DEBUG */
},
.communicate = {
.data = data,
},
- .pidfd = -1,
+ // PIDs
+ .pidfd1 = -1,
+ .pidfd2 = -1,
};
DEBUG(jail->pakfire, "Executing jail...\n");
+ // Become the subreaper
+ r = prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0);
+ if (r < 0) {
+ CTX_ERROR(jail->ctx, "Failed to become the sub-reaper: %s\n", strerror(errno));
+ r = -errno;
+ goto ERROR;
+ }
+
// Enable networking in interactive mode
if (interactive)
ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
+ // Create a UNIX domain socket
+ r = socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, ctx.socket);
+ if (r < 0) {
+ CTX_ERROR(jail->ctx, "Could not create UNIX socket: %s\n", strerror(errno));
+ r = -errno;
+ goto ERROR;
+ }
+
/*
Setup a file descriptor which can be used to notify the client that the parent
has completed configuration.
goto ERROR;
#endif /* ENABLE_DEBUG */
- // Configure child process
- struct clone_args args = {
- .flags =
- CLONE_NEWCGROUP |
- CLONE_NEWIPC |
- CLONE_NEWNS |
- CLONE_NEWPID |
- CLONE_NEWUSER |
- CLONE_NEWUTS |
- CLONE_PIDFD,
- .exit_signal = SIGCHLD,
- .pidfd = (long long unsigned int)&ctx.pidfd,
- };
-
// Launch the process in a cgroup that is a leaf of the configured cgroup
if (jail->cgroup) {
- args.flags |= CLONE_INTO_CGROUP;
-
// Fetch our UUID
const char* uuid = pakfire_jail_uuid(jail);
ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
goto ERROR;
}
-
- // Clone into this cgroup
- args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
}
- // Setup networking
- if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
- args.flags |= CLONE_NEWNET;
- }
+ /*
+ Initially, we will set up a new mount namespace and launch a child process in it.
- // Fork this process
- ctx.pid = clone3(&args, sizeof(args));
- if (ctx.pid < 0) {
- ERROR(jail->pakfire, "Could not clone: %m\n");
- return -1;
+ This process remains in the user/ipc/time/etc. namespace and will set up
+ the mount namespace.
+ */
+
+ // Configure child process
+ struct clone_args args = {
+ .flags =
+ CLONE_NEWNS |
+ CLONE_PIDFD |
+ CLONE_CLEAR_SIGHAND,
+ .exit_signal = SIGCHLD,
+ .pidfd = (long long unsigned int)&ctx.pidfd1,
+ };
+
+ // Fork the first child process
+ pid_t pid = clone3(&args, sizeof(args));
+ if (pid < 0) {
+ CTX_ERROR(jail->ctx, "Could not fork the first child process: %s\n", strerror(errno));
+ r = -errno;
+ goto ERROR;
// Child process
- } else if (ctx.pid == 0) {
- r = pakfire_jail_child(jail, &ctx, argv);
+ } else if (pid == 0) {
+ r = pakfire_jail_child1(jail, &ctx, argv);
_exit(r);
}
// Parent process
- r = pakfire_jail_parent(jail, &ctx);
- if (r)
- goto ERROR;
-
- DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
-
- // Read output of the child process
r = pakfire_jail_wait(jail, &ctx);
if (r)
goto ERROR;
- // Handle exit status
- switch (ctx.status.si_code) {
- case CLD_EXITED:
- DEBUG(jail->pakfire, "The child process exited with code %d\n",
- ctx.status.si_status);
-
- // Pass exit code
- exit = ctx.status.si_status;
- break;
-
- case CLD_KILLED:
- ERROR(jail->pakfire, "The child process was killed\n");
- exit = 139;
- break;
-
- case CLD_DUMPED:
- ERROR(jail->pakfire, "The child process terminated abnormally\n");
- break;
-
- // Log anything else
- default:
- ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
- break;
- }
-
ERROR:
// Destroy the temporary cgroup (if any)
if (ctx.cgroup) {
// Read cgroup stats
- r = pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
- if (r) {
- ERROR(jail->pakfire, "Could not read cgroup stats: %m\n");
- } else {
- pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
- }
-
+ pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
+ pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
pakfire_cgroup_destroy(ctx.cgroup);
pakfire_cgroup_unref(ctx.cgroup);
}
pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
- if (ctx.pidfd)
- close(ctx.pidfd);
pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
+#ifdef ENABLE_DEBUG
pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
+#endif /* ENABLE_DEBUG */
+ if (ctx.pidfd1 >= 0)
+ close(ctx.pidfd1);
+ if (ctx.pidfd2 >= 0)
+ close(ctx.pidfd2);
+
+ // Close sockets
+ pakfire_jail_close_pipe(jail, ctx.socket);
- return exit;
+ return r;
}
PAKFIRE_EXPORT int pakfire_jail_exec(
const char* root = pakfire_get_path(jail->pakfire);
// Write the scriptlet to disk
- r = pakfire_path_join(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
+ r = pakfire_path_append(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
if (r)
goto ERROR;
}
int pakfire_jail_shell(struct pakfire_jail* jail) {
+ int r;
+
const char* argv[] = {
"/bin/bash", "--login", NULL,
};
// Execute /bin/bash
- return pakfire_jail_exec_interactive(jail, argv, 0);
+ r = pakfire_jail_exec_interactive(jail, argv, 0);
+
+ // Raise any errors
+ if (r < 0)
+ return r;
+
+ // Ignore any return codes from the shell
+ return 0;
}
static int pakfire_jail_run_if_possible(struct pakfire* pakfire, const char** argv) {