#include <sys/personality.h>
#include <sys/prctl.h>
#include <sys/resource.h>
+#include <sys/signalfd.h>
+#include <sys/timerfd.h>
#include <sys/types.h>
#include <sys/wait.h>
+// libnl3
+#include <net/if.h>
+#include <netlink/route/link.h>
+
// libseccomp
#include <seccomp.h>
#include <pakfire/jail.h>
#include <pakfire/logging.h>
#include <pakfire/mount.h>
+#include <pakfire/os.h>
#include <pakfire/pakfire.h>
+#include <pakfire/path.h>
#include <pakfire/private.h>
#include <pakfire/pwd.h>
#include <pakfire/string.h>
{ "LANG", "C.utf-8" },
{ "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
{ "TERM", "vt100" },
+
+ // Tell everything that it is running inside a Pakfire container
+ { "container", "pakfire" },
{ NULL, NULL },
};
};
struct pakfire_jail {
+ struct pakfire_ctx* ctx;
struct pakfire* pakfire;
int nrefs;
uuid_t uuid;
char __uuid[UUID_STR_LEN];
- // Flags
- int flags;
-
// Resource Limits
int nice;
+ // Timeout
+ struct itimerspec timeout;
+
// CGroup
struct pakfire_cgroup* cgroup;
// Mountpoints
struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
unsigned int num_mountpoints;
+
+ // Callbacks
+ struct pakfire_jail_callbacks {
+ // Log
+ pakfire_jail_log_callback log;
+ void* log_data;
+ } callbacks;
};
struct pakfire_log_buffer {
size_t used;
};
-enum pakfire_jail_exec_flags {
- PAKFIRE_JAIL_HAS_NETWORKING = (1 << 0),
-};
-
struct pakfire_jail_exec {
int flags;
- // PID (of the child)
- pid_t pid;
- int pidfd;
+ // PIDs (of the children)
+ int pidfd1;
+ int pidfd2;
- // Process status (from waitid)
- siginfo_t status;
+ // Socket to pass FDs
+ int socket[2];
// FD to notify the client that the parent has finished initialization
int completed_fd;
// Logging
int log_INFO[2];
int log_ERROR[2];
+#ifdef ENABLE_DEBUG
int log_DEBUG[2];
+#endif /* ENABLE_DEBUG */
} pipes;
// Communicate
// Logging
struct pakfire_log_buffer log_INFO;
struct pakfire_log_buffer log_ERROR;
+#ifdef ENABLE_DEBUG
struct pakfire_log_buffer log_DEBUG;
+#endif /* ENABLE_DEBUG */
} buffers;
struct pakfire_cgroup* cgroup;
return syscall(__NR_clone3, args, size);
}
+static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
+ return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
+}
+
+static int pivot_root(const char* new_root, const char* old_root) {
+ return syscall(SYS_pivot_root, new_root, old_root);
+}
+
static int pakfire_jail_exec_has_flag(
const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
return ctx->flags & flag;
if (jail->cgroup)
pakfire_cgroup_unref(jail->cgroup);
-
- pakfire_unref(jail->pakfire);
+ if (jail->pakfire)
+ pakfire_unref(jail->pakfire);
+ if (jail->ctx)
+ pakfire_ctx_unref(jail->ctx);
free(jail);
}
return 0;
}
+static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
+ if (!*jail->__uuid)
+ uuid_unparse_lower(jail->uuid, jail->__uuid);
+
+ return jail->__uuid;
+}
+
static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
// Set PS1
int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
return 0;
}
-PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail,
- struct pakfire* pakfire, int flags) {
+PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail, struct pakfire* pakfire) {
int r;
+ const char* arch = pakfire_get_effective_arch(pakfire);
+
// Allocate a new jail
struct pakfire_jail* j = calloc(1, sizeof(*j));
if (!j)
return 1;
+ // Reference context
+ j->ctx = pakfire_ctx(pakfire);
+
// Reference Pakfire
j->pakfire = pakfire_ref(pakfire);
// Initialize reference counter
j->nrefs = 1;
- // Store flags
- j->flags = flags;
-
// Generate a random UUID
uuid_generate_random(j->uuid);
DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
+ // Set the default logging callback
+ pakfire_jail_set_log_callback(j, pakfire_jail_default_log_callback, NULL);
+
// Set default environment
for (const struct environ* e = ENV; e->key; e++) {
r = pakfire_jail_set_env(j, e->key, e->val);
goto ERROR;
}
+ // Enable all CPU features that CPU has to offer
+ if (!pakfire_arch_is_supported_by_host(arch)) {
+ r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
+ if (r)
+ goto ERROR;
+ }
+
+ // Set container UUID
+ r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
+ if (r)
+ goto ERROR;
+
+ // Disable systemctl to talk to systemd
+ if (!pakfire_on_root(j->pakfire)) {
+ r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
+ if (r)
+ goto ERROR;
+ }
+
// Done
*jail = j;
return 0;
return NULL;
}
-static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
- if (!*jail->__uuid)
- uuid_unparse_lower(jail->uuid, jail->__uuid);
+// Logging Callback
- return jail->__uuid;
+PAKFIRE_EXPORT void pakfire_jail_set_log_callback(struct pakfire_jail* jail,
+ pakfire_jail_log_callback callback, void* data) {
+ jail->callbacks.log = callback;
+ jail->callbacks.log_data = data;
}
// Resource Limits
return -1;
}
- char buffer[strlen(key) + 2];
- pakfire_string_format(buffer, "%s=", key);
+ const size_t length = strlen(key);
for (unsigned int i = 0; jail->env[i]; i++) {
- if (pakfire_string_startswith(jail->env[i], buffer))
+ if ((pakfire_string_startswith(jail->env[i], key)
+ && *(jail->env[i] + length) == '=')) {
return i;
+ }
}
// Nothing found
return 0;
}
+// Timeout
+
+PAKFIRE_EXPORT int pakfire_jail_set_timeout(
+ struct pakfire_jail* jail, unsigned int timeout) {
+ // Store value
+ jail->timeout.it_value.tv_sec = timeout;
+
+ if (timeout > 0)
+ DEBUG(jail->pakfire, "Timeout set to %u second(s)\n", timeout);
+ else
+ DEBUG(jail->pakfire, "Timeout disabled\n");
+
+ return 0;
+}
+
+static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
+ int r;
+
+ // Nothing to do if no timeout has been set
+ if (!jail->timeout.it_value.tv_sec)
+ return -1;
+
+ // Create a new timer
+ const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
+ if (fd < 0) {
+ ERROR(jail->pakfire, "Could not create timer: %m\n");
+ goto ERROR;
+ }
+
+ // Arm timer
+ r = timerfd_settime(fd, 0, &jail->timeout, NULL);
+ if (r) {
+ ERROR(jail->pakfire, "Could not arm timer: %m\n");
+ goto ERROR;
+ }
+
+ return fd;
+
+ERROR:
+ if (fd >= 0)
+ close(fd);
+
+ return -1;
+}
+
+// Signals
+
+#if 0
+static int pakfire_jail_handle_signals(struct pakfire_jail* jail) {
+ sigset_t mask;
+ int r;
+
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGINT);
+
+ // Block signals
+ r = sigprocmask(SIG_BLOCK, &mask, NULL);
+ if (r < 0) {
+ ERROR(jail->pakfire, "Failed to block signals: %m\n");
+ return r;
+ }
+
+ // Create a file descriptor
+ r = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
+ if (r < 0) {
+ ERROR(jail->pakfire, "Failed to create signalfd: %m\n");
+ return r;
+ }
+
+ return r;
+}
+#endif
+
/*
This function replaces any logging in the child process.
All log messages will be sent to the parent process through their respective pipes.
*/
-static void pakfire_jail_log(void* data, int priority, const char* file,
+static void pakfire_jail_log_redirect(void* data, int priority, const char* file,
int line, const char* fn, const char* format, va_list args) {
struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
int fd;
}
// Send the log message
- if (fd)
+ if (fd >= 0)
vdprintf(fd, format, args);
}
close(fd);
// Reset the file-descriptor so it won't be closed again later
- ctx->pipes.stdin[1] = 0;
+ ctx->pipes.stdin[1] = -1;
// Report success
r = 0;
static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
for (unsigned int i = 0; i < 2; i++)
- if (fds[i])
+ if (fds[i] >= 0)
close(fds[i]);
}
int* fd_write = &(*fds)[1];
// Close the write end of the pipe
- if (*fd_write) {
+ if (*fd_write >= 0) {
close(*fd_write);
- *fd_write = 0;
+ *fd_write = -1;
}
// Return the read end
- return *fd_read;
+ if (*fd_read >= 0)
+ return *fd_read;
+
+ return -1;
}
static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
int* fd_write = &(*fds)[1];
// Close the read end of the pipe
- if (*fd_read) {
+ if (*fd_read >= 0) {
close(*fd_read);
- *fd_read = 0;
+ *fd_read = -1;
}
// Return the write end
- return *fd_write;
+ if (*fd_write >= 0)
+ return *fd_write;
+
+ return -1;
+}
+
+static int pakfire_jail_recv_fd(struct pakfire_jail* jail, int socket, int* fd) {
+ const size_t payload_length = sizeof(fd);
+ char buffer[CMSG_SPACE(payload_length)];
+ int r;
+
+ struct msghdr msg = {
+ .msg_control = buffer,
+ .msg_controllen = sizeof(buffer),
+ };
+
+ // Receive the message
+ r = recvmsg(socket, &msg, 0);
+ if (r) {
+ CTX_ERROR(jail->ctx, "Could not receive file descriptor: %s\n", strerror(errno));
+ return -errno;
+ }
+
+ // Fetch the payload
+ struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+ if (!cmsg)
+ return -EBADMSG;
+
+ *fd = *((int*)CMSG_DATA(cmsg));
+
+ CTX_DEBUG(jail->ctx, "Received fd %d from socket %d\n", *fd, socket);
+
+ return 0;
+}
+
+static int pakfire_jail_send_fd(struct pakfire_jail* jail, int socket, int fd) {
+ const size_t payload_length = sizeof(fd);
+ char buffer[CMSG_SPACE(payload_length)];
+ int r;
+
+ CTX_DEBUG(jail->ctx, "Sending fd %d to socket %d\n", fd, socket);
+
+ // Header
+ struct msghdr msg = {
+ .msg_control = buffer,
+ .msg_controllen = sizeof(buffer),
+ };
+
+ // Payload
+ struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(payload_length);
+
+ // Set payload
+ *((int*)CMSG_DATA(cmsg)) = fd;
+
+ // Send the message
+ r = sendmsg(socket, &msg, 0);
+ if (r) {
+ CTX_ERROR(jail->ctx, "Could not send file descriptor: %s\n", strerror(errno));
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int pakfire_jail_log(struct pakfire* pakfire, void* data, int priority,
+ const char* line, const size_t length) {
+ // Pass everything to the parent logger
+ pakfire_log_condition(pakfire, priority, 0, "%.*s", (int)length, line);
+
+ return 0;
+}
+
+static int pakfire_jail_epoll_add_fd(struct pakfire_jail* jail, int epollfd, int fd, int events) {
+ struct epoll_event event = {
+ .events = events|EPOLLHUP,
+ .data = {
+ .fd = fd,
+ },
+ };
+ int r;
+
+ // Read flags
+ int flags = fcntl(fd, F_GETFL, 0);
+
+ // Set modified flags
+ r = fcntl(fd, F_SETFL, flags|O_NONBLOCK);
+ if (r < 0) {
+ CTX_ERROR(jail->ctx, "Could not set file descriptor %d into non-blocking mode: %s\n",
+ fd, strerror(errno));
+ return -errno;
+ }
+
+ // Add the file descriptor to the loop
+ r = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
+ if (r < 0) {
+ ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %s\n",
+ fd, strerror(errno));
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int pakfire_jail_setup_child2(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx);
+
+static int pakfire_jail_wait_on_child(struct pakfire_jail* jail, int pidfd) {
+ siginfo_t status = {};
+ int r;
+
+ // Call waitid() and store the result
+ r = waitid(P_PIDFD, pidfd, &status, WEXITED);
+ if (r) {
+ CTX_ERROR(jail->ctx, "waitid() failed: %s\n", strerror(errno));
+ return -errno;
+ }
+
+ switch (status.si_code) {
+ // If the process exited normally, we return the exit code
+ case CLD_EXITED:
+ CTX_DEBUG(jail->ctx, "The child process exited with code %d\n", status.si_status);
+ return status.si_status;
+
+ case CLD_KILLED:
+ CTX_ERROR(jail->ctx, "The child process was killed\n");
+ return 139;
+
+ case CLD_DUMPED:
+ CTX_ERROR(jail->ctx, "The child process terminated abnormally\n");
+ return 139;
+
+ // Log anything else
+ default:
+ CTX_ERROR(jail->ctx, "Unknown child exit code: %d\n", status.si_code);
+ break;
+ }
+
+ return -EBADMSG;
}
static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
int epollfd = -1;
- struct epoll_event ev;
struct epoll_event events[EPOLL_MAX_EVENTS];
+ char garbage[8];
int r = 0;
+ // Fetch the UNIX domain socket
+ const int socket_recv = pakfire_jail_get_pipe_to_read(jail, &ctx->socket);
+
// Fetch file descriptors from context
const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
- const int pidfd = ctx->pidfd;
+
+ // Timer
+ const int timerfd = pakfire_jail_create_timer(jail);
// Logging
const int log_INFO = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
+#ifdef ENABLE_DEBUG
const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
+#endif /* ENABLE_DEBUG */
+
+#if 0
+ // Signals
+ const int signalfd = pakfire_jail_handle_signals(jail);
+#endif
// Make a list of all file descriptors we are interested in
- int fds[] = {
- stdin, stdout, stderr, pidfd, log_INFO, log_ERROR, log_DEBUG,
+ const struct pakfire_wait_fds {
+ const int fd;
+ const int events;
+ } fds[] = {
+ { socket_recv, EPOLLIN },
+
+ // Standard input/output
+ { stdin, EPOLLOUT },
+ { stdout, EPOLLIN },
+ { stderr, EPOLLIN },
+
+ // Timer
+ { timerfd, EPOLLIN },
+
+ // Child Processes
+ { ctx->pidfd1, EPOLLIN },
+
+#if 0
+ // Signals
+ { signafd, EPOLLIN },
+#endif
+
+ // Log Pipes
+ { log_INFO, EPOLLIN },
+ { log_ERROR, EPOLLIN },
+#ifdef ENABLE_DEBUG
+ { log_DEBUG, EPOLLIN },
+#endif /* ENABLE_DEBUG */
+
+ // Sentinel
+ { -1, 0 },
};
// Setup epoll
}
// Turn file descriptors into non-blocking mode and add them to epoll()
- for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
- int fd = fds[i];
-
+ for (const struct pakfire_wait_fds* fd = fds; fd->events; fd++) {
// Skip fds which were not initialized
- if (fd <= 0)
+ if (fd->fd < 0)
continue;
- ev.events = EPOLLHUP;
-
- if (fd == stdin)
- ev.events |= EPOLLOUT;
- else
- ev.events |= EPOLLIN;
-
- // Read flags
- int flags = fcntl(fd, F_GETFL, 0);
-
- // Set modified flags
- if (fcntl(fd, F_SETFL, flags|O_NONBLOCK) < 0) {
- ERROR(jail->pakfire,
- "Could not set file descriptor %d into non-blocking mode: %m\n", fd);
- r = 1;
- goto ERROR;
- }
-
- ev.data.fd = fd;
-
- if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
- ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
- r = 1;
+ // Add the FD to the event loop
+ r = pakfire_jail_epoll_add_fd(jail, epollfd, fd->fd, fd->events);
+ if (r)
goto ERROR;
- }
}
int ended = 0;
+ int exit = 0;
+
+ CTX_DEBUG(jail->ctx, "Launching main loop...\n");
// Loop for as long as the process is alive
while (!ended) {
// Check if there is any data to be read
if (e & EPOLLIN) {
- // Handle any changes to the PIDFD
- if (fd == pidfd) {
- // Call waidid() and store the result
- r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
+ // Monitor the first child process
+ if (fd == ctx->pidfd1) {
+ r = pakfire_jail_wait_on_child(jail, ctx->pidfd1);
if (r) {
- ERROR(jail->pakfire, "waitid() failed: %m\n");
+ CTX_ERROR(jail->ctx, "The first child exited with an error\n");
+ goto ERROR;
+ }
+
+ close(ctx->pidfd1);
+ ctx->pidfd1 = -1;
+
+ continue;
+
+ // Monitor the second child process
+ } else if (fd == ctx->pidfd2) {
+ exit = pakfire_jail_wait_on_child(jail, ctx->pidfd2);
+ if (exit < 0) {
+ CTX_ERROR(jail->ctx, "The second child exited with an error\n");
goto ERROR;
}
+ close(ctx->pidfd2);
+ ctx->pidfd2 = -1;
+
// Mark that we have ended so that we will process the remaining
// events from epoll() now, but won't restart the outer loop.
ended = 1;
+
+ continue;
+
+ // Handle timer events
+ } else if (fd == timerfd) {
+ DEBUG(jail->pakfire, "Timer event received\n");
+
+ // Disarm the timer
+ r = read(timerfd, garbage, sizeof(garbage));
+ if (r < 1) {
+ ERROR(jail->pakfire, "Could not disarm timer: %m\n");
+ r = 1;
+ goto ERROR;
+ }
+
+ // Terminate the process if it hasn't already ended
+ if (!ended) {
+ DEBUG(jail->pakfire, "Terminating process...\n");
+
+ // Send SIGTERM to the process
+ r = pidfd_send_signal(ctx->pidfd2, SIGKILL, NULL, 0);
+ if (r) {
+ ERROR(jail->pakfire, "Could not kill process: %m\n");
+ goto ERROR;
+ }
+ }
+
+ // There is nothing else to do
+ continue;
+
+#if 0
+ // Handle signals
+ } else if (fd == signalfd) {
+ // Read the signal
+ r = read(signalfd, &siginfo, sizeof(siginfo));
+ if (r < 1) {
+ ERROR(jail->pakfire, "Could not read signal: %m\n");
+ goto ERROR;
+ }
+
+ DEBUG(jail->pakfire, "Received signal %u\n", siginfo.ssi_signo);
+
+ // Handle signals
+ switch (siginfo.ssi_signo) {
+ // Pass SIGINT down to the child process
+ case SIGINT:
+ r = pidfd_send_signal(pidfd, siginfo.ssi_signo, NULL, 0);
+ if (r) {
+ ERROR(jail->pakfire, "Could not send signal to process: %m\n");
+ goto ERROR;
+ }
+ break;
+
+ default:
+ ERROR(jail->pakfire, "Received unhandled signal %u\n",
+ siginfo.ssi_signo);
+ break;
+ }
+
+ // Don't fall through to log processing
+ continue;
+#endif
+
+ // Handle socket messages
+ } else if (fd == socket_recv) {
+ // Receive the FD of the second child process
+ r = pakfire_jail_recv_fd(jail, socket_recv, &ctx->pidfd2);
+ if (r)
+ goto ERROR;
+
+ // Add it to the event loop
+ r = pakfire_jail_epoll_add_fd(jail, epollfd, ctx->pidfd2, EPOLLIN);
+ if (r)
+ goto ERROR;
+
+ // Setup the child process
+ r = pakfire_jail_setup_child2(jail, ctx);
+ if (r)
+ goto ERROR;
+
+ // Don't fall through to log processing
continue;
// Handle logging messages
buffer = &ctx->buffers.log_INFO;
priority = LOG_INFO;
- callback = pakfire_jail_default_log_callback;
+ callback = pakfire_jail_log;
} else if (fd == log_ERROR) {
buffer = &ctx->buffers.log_ERROR;
priority = LOG_ERR;
- callback = pakfire_jail_default_log_callback;
+ callback = pakfire_jail_log;
+#ifdef ENABLE_DEBUG
} else if (fd == log_DEBUG) {
buffer = &ctx->buffers.log_DEBUG;
priority = LOG_DEBUG;
- callback = pakfire_jail_default_log_callback;
+ callback = pakfire_jail_log;
+#endif /* ENABLE_DEBUG */
// Handle anything from the log pipes
} else if (fd == stdout) {
buffer = &ctx->buffers.stdout;
priority = LOG_INFO;
- callback = ctx->communicate.out;
- data = ctx->communicate.data;
+ // Send any output to the default logger if no callback is set
+ if (ctx->communicate.out) {
+ callback = ctx->communicate.out;
+ data = ctx->communicate.data;
+ } else {
+ callback = jail->callbacks.log;
+ data = jail->callbacks.log_data;
+ }
} else if (fd == stderr) {
buffer = &ctx->buffers.stderr;
priority = LOG_ERR;
- callback = ctx->communicate.out;
- data = ctx->communicate.data;
+ // Send any output to the default logger if no callback is set
+ if (ctx->communicate.out) {
+ callback = ctx->communicate.out;
+ data = ctx->communicate.data;
+ } else {
+ callback = jail->callbacks.log;
+ data = jail->callbacks.log_data;
+ }
} else {
DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
}
}
+ // Return the exit code
+ r = exit;
+
ERROR:
- if (epollfd > 0)
+ CTX_DEBUG(jail->ctx, "Main loop terminated\n");
+
+ if (epollfd >= 0)
close(epollfd);
+ if (timerfd >= 0)
+ close(timerfd);
+#if 0
+ if (signalfd >= 0)
+ close(signalfd);
+#endif
return r;
}
// Capabilities
-static int pakfire_jail_drop_capabilities(struct pakfire_jail* jail) {
- const int capabilities[] = {
- // Deny access to the kernel's audit system
- CAP_AUDIT_CONTROL,
- CAP_AUDIT_READ,
- CAP_AUDIT_WRITE,
-
- // Deny suspending block devices
- CAP_BLOCK_SUSPEND,
-
- // Deny any stuff with BPF
- CAP_BPF,
-
- // Deny checkpoint restore
- CAP_CHECKPOINT_RESTORE,
-
- // Deny opening files by inode number (open_by_handle_at)
- CAP_DAC_READ_SEARCH,
+// Logs all capabilities of the current process
+static int pakfire_jail_show_capabilities(struct pakfire_jail* jail) {
+ cap_t caps = NULL;
+ char* name = NULL;
+ cap_flag_value_t value_e;
+ cap_flag_value_t value_i;
+ cap_flag_value_t value_p;
+ int r;
- // Deny setting SUID bits
- CAP_FSETID,
+ // Fetch PID
+ pid_t pid = getpid();
- // Deny locking more memory
- CAP_IPC_LOCK,
+ // Fetch all capabilities
+ caps = cap_get_proc();
+ if (!caps) {
+ ERROR(jail->pakfire, "Could not fetch capabilities: %m\n");
+ r = 1;
+ goto ERROR;
+ }
- // Deny modifying any Apparmor/SELinux/SMACK configuration
- CAP_MAC_ADMIN,
- CAP_MAC_OVERRIDE,
+ DEBUG(jail->pakfire, "Capabilities of PID %d:\n", pid);
- // Deny creating any special devices
- CAP_MKNOD,
+ // Iterate over all capabilities
+ for (unsigned int cap = 0; cap_valid(cap); cap++) {
+ name = cap_to_name(cap);
- // Deny setting any capabilities
- CAP_SETFCAP,
+ // Fetch effective value
+ r = cap_get_flag(caps, cap, CAP_EFFECTIVE, &value_e);
+ if (r)
+ goto ERROR;
- // Deny reading from syslog
- CAP_SYSLOG,
+ // Fetch inheritable value
+ r = cap_get_flag(caps, cap, CAP_INHERITABLE, &value_i);
+ if (r)
+ goto ERROR;
- // Deny any admin actions (mount, sethostname, ...)
- CAP_SYS_ADMIN,
+ // Fetch permitted value
+ r = cap_get_flag(caps, cap, CAP_PERMITTED, &value_p);
+ if (r)
+ goto ERROR;
- // Deny rebooting the system
- CAP_SYS_BOOT,
+ DEBUG(jail->pakfire,
+ " %-24s : %c%c%c\n",
+ name,
+ (value_e == CAP_SET) ? 'e' : '-',
+ (value_i == CAP_SET) ? 'i' : '-',
+ (value_p == CAP_SET) ? 'p' : '-'
+ );
- // Deny loading kernel modules
- CAP_SYS_MODULE,
+ // Free name
+ cap_free(name);
+ name = NULL;
+ }
- // Deny setting nice level
- CAP_SYS_NICE,
+ // Success
+ r = 0;
- // Deny access to /proc/kcore, /dev/mem, /dev/kmem
- CAP_SYS_RAWIO,
+ERROR:
+ if (name)
+ cap_free(name);
+ if (caps)
+ cap_free(caps);
- // Deny circumventing any resource limits
- CAP_SYS_RESOURCE,
+ return r;
+}
- // Deny setting the system time
- CAP_SYS_TIME,
+static int pakfire_jail_set_capabilities(struct pakfire_jail* jail) {
+ cap_t caps = NULL;
+ char* name = NULL;
+ int r;
- // Deny playing with suspend
- CAP_WAKE_ALARM,
+ // Fetch capabilities
+ caps = cap_get_proc();
+ if (!caps) {
+ ERROR(jail->pakfire, "Could not read capabilities: %m\n");
+ r = 1;
+ goto ERROR;
+ }
- 0,
- };
+ // Walk through all capabilities
+ for (cap_value_t cap = 0; cap_valid(cap); cap++) {
+ cap_value_t _caps[] = { cap };
- DEBUG(jail->pakfire, "Dropping capabilities...\n");
+ // Fetch the name of the capability
+ name = cap_to_name(cap);
- size_t num_caps = 0;
- int r;
+ r = cap_set_flag(caps, CAP_EFFECTIVE, 1, _caps, CAP_SET);
+ if (r) {
+ ERROR(jail->pakfire, "Could not set %s: %m\n", name);
+ goto ERROR;
+ }
- // Drop any capabilities
- for (const int* cap = capabilities; *cap; cap++) {
- r = prctl(PR_CAPBSET_DROP, *cap, 0, 0, 0);
+ r = cap_set_flag(caps, CAP_INHERITABLE, 1, _caps, CAP_SET);
if (r) {
- ERROR(jail->pakfire, "Could not drop capability %d: %m\n", *cap);
- return r;
+ ERROR(jail->pakfire, "Could not set %s: %m\n", name);
+ goto ERROR;
}
- num_caps++;
- }
+ r = cap_set_flag(caps, CAP_PERMITTED, 1, _caps, CAP_SET);
+ if (r) {
+ ERROR(jail->pakfire, "Could not set %s: %m\n", name);
+ goto ERROR;
+ }
- // Fetch any capabilities
- cap_t caps = cap_get_proc();
- if (!caps) {
- ERROR(jail->pakfire, "Could not read capabilities: %m\n");
- return 1;
+ // Free name
+ cap_free(name);
+ name = NULL;
}
- /*
- Set inheritable capabilities
-
- This ensures that no processes will be able to gain any of the listed
- capabilities again.
- */
- r = cap_set_flag(caps, CAP_INHERITABLE, num_caps, capabilities, CAP_CLEAR);
+ // Restore all capabilities
+ r = cap_set_proc(caps);
if (r) {
- ERROR(jail->pakfire, "cap_set_flag() failed: %m\n");
+ ERROR(jail->pakfire, "Restoring capabilities failed: %m\n");
goto ERROR;
}
- // Restore capabilities
- r = cap_set_proc(caps);
- if (r) {
- ERROR(jail->pakfire, "Could not restore capabilities: %m\n");
- goto ERROR;
+ // Add all capabilities to the ambient set
+ for (unsigned int cap = 0; cap_valid(cap); cap++) {
+ name = cap_to_name(cap);
+
+ // Raise the capability
+ r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0);
+ if (r) {
+ ERROR(jail->pakfire, "Could not set ambient capability %s: %m\n", name);
+ goto ERROR;
+ }
+
+ // Free name
+ cap_free(name);
+ name = NULL;
}
+ // Success
+ r = 0;
+
ERROR:
+ if (name)
+ cap_free(name);
if (caps)
cap_free(caps);
// Bind-mount all paths read-only
for (const char** path = paths; *path; path++) {
r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
- if (r)
+ if (r) {
+ switch (errno) {
+ // Ignore if we don't have permission
+ case EPERM:
+ continue;
+
+ default:
+ break;
+ }
return r;
+ }
}
return 0;
*/
static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
struct pakfire_jail_mountpoint* mp = NULL;
+ int flags = 0;
int r;
+ // Enable loop devices
+ if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_LOOP_DEVICES))
+ flags |= PAKFIRE_MOUNT_LOOP_DEVICES;
+
// Mount all default stuff
- r = pakfire_mount_all(jail->pakfire);
+ r = pakfire_mount_all(jail->pakfire, flags);
if (r)
return r;
return 0;
}
+// Networking
+
+static int pakfire_jail_setup_loopback(struct pakfire_jail* jail) {
+ struct nl_sock* nl = NULL;
+ struct nl_cache* cache = NULL;
+ struct rtnl_link* link = NULL;
+ struct rtnl_link* change = NULL;
+ int r;
+
+ DEBUG(jail->pakfire, "Setting up loopback...\n");
+
+ // Allocate a netlink socket
+ nl = nl_socket_alloc();
+ if (!nl) {
+ ERROR(jail->pakfire, "Could not allocate a netlink socket: %m\n");
+ r = 1;
+ goto ERROR;
+ }
+
+ // Connect the socket
+ r = nl_connect(nl, NETLINK_ROUTE);
+ if (r) {
+ ERROR(jail->pakfire, "Could not connect netlink socket: %s\n", nl_geterror(r));
+ goto ERROR;
+ }
+
+ // Allocate the netlink cache
+ r = rtnl_link_alloc_cache(nl, AF_UNSPEC, &cache);
+ if (r < 0) {
+ ERROR(jail->pakfire, "Unable to allocate netlink cache: %s\n", nl_geterror(r));
+ goto ERROR;
+ }
+
+ // Fetch loopback interface
+ link = rtnl_link_get_by_name(cache, "lo");
+ if (!link) {
+ ERROR(jail->pakfire, "Could not find lo interface. Ignoring.\n");
+ r = 0;
+ goto ERROR;
+ }
+
+ // Allocate a new link
+ change = rtnl_link_alloc();
+ if (!change) {
+ ERROR(jail->pakfire, "Could not allocate change link\n");
+ r = 1;
+ goto ERROR;
+ }
+
+ // Set the link to UP
+ rtnl_link_set_flags(change, IFF_UP);
+
+ // Apply any changes
+ r = rtnl_link_change(nl, link, change, 0);
+ if (r) {
+ ERROR(jail->pakfire, "Unable to activate loopback: %s\n", nl_geterror(r));
+ goto ERROR;
+ }
+
+ // Success
+ r = 0;
+
+ERROR:
+ if (nl)
+ nl_socket_free(nl);
+
+ return r;
+}
+
// UID/GID Mapping
static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
"0 %lu %lu\n", subgid->id, subgid->length);
} else {
r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
- "0 %lu 1\n%1 %lu %lu\n", gid, subgid->id, subgid->length);
+ "0 %lu 1\n1 %lu %lu\n", gid, subgid->id, subgid->length);
}
if (r) {
DEBUG(jail->pakfire, "Sending signal...\n");
// Write to the file descriptor
- ssize_t bytes_written = write(fd, &val, sizeof(val));
- if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
- ERROR(jail->pakfire, "Could not send signal: %m\n");
- r = 1;
+ r = eventfd_write(fd, val);
+ if (r < 0) {
+ ERROR(jail->pakfire, "Could not send signal: %s\n", strerror(errno));
+ r = -errno;
}
// Close the file descriptor
DEBUG(jail->pakfire, "Waiting for signal...\n");
- ssize_t bytes_read = read(fd, &val, sizeof(val));
- if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
- ERROR(jail->pakfire, "Error waiting for signal: %m\n");
- r = 1;
+ r = eventfd_read(fd, &val);
+ if (r < 0) {
+ ERROR(jail->pakfire, "Error waiting for signal: %s\n", strerror(errno));
+ r = -errno;
}
// Close the file descriptor
return r;
}
+static int pakfire_jail_switch_root(struct pakfire_jail* jail, const char* root) {
+ int r;
+
+ // Change to the new root
+ r = chdir(root);
+ if (r) {
+ ERROR(jail->pakfire, "chdir(%s) failed: %m\n", root);
+ return r;
+ }
+
+ // Switch Root!
+ r = pivot_root(".", ".");
+ if (r) {
+ ERROR(jail->pakfire, "Failed changing into the new root directory %s: %m\n", root);
+ return r;
+ }
+
+ // Umount the old root
+ r = umount2(".", MNT_DETACH);
+ if (r) {
+ ERROR(jail->pakfire, "Could not umount the old root filesystem: %m\n");
+ return r;
+ }
+
+ return 0;
+}
+
/*
- Performs the initialisation that needs to happen in the parent part
+ Called by the parent that sets up the second child process...
*/
-static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
+static int pakfire_jail_setup_child2(
+ struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
+ pid_t pid = -1;
int r;
+ // Fetch the PID
+ r = pidfd_get_pid(ctx->pidfd2, &pid);
+ if (r) {
+ CTX_ERROR(jail->ctx, "Could not fetch PID: %s\n", strerror(-r));
+ return r;
+ }
+
// Setup UID mapping
- r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
+ r = pakfire_jail_setup_uid_mapping(jail, pid);
if (r)
return r;
// Write "deny" to /proc/PID/setgroups
- r = pakfire_jail_setgroups(jail, ctx->pid);
+ r = pakfire_jail_setgroups(jail, pid);
if (r)
return r;
// Setup GID mapping
- r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
+ r = pakfire_jail_setup_gid_mapping(jail, pid);
if (r)
return r;
return 0;
}
-static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
- const char* argv[]) {
+/*
+ Child 2 is launched in their own user/mount/etc. namespace.
+*/
+static int pakfire_jail_child2(struct pakfire_jail* jail,
+ struct pakfire_jail_exec* ctx, const char* argv[]) {
int r;
- // Redirect any logging to our log pipe
- pakfire_set_log_callback(jail->pakfire, pakfire_jail_log, &ctx->pipes);
+ // Fetch my own PID
+ pid_t pid = getpid();
+
+ CTX_DEBUG(jail->ctx, "Launched child process in jail with PID %d\n", pid);
// Die with parent
r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
if (r) {
- ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
+ CTX_ERROR(jail->ctx, "Could not configure to die with parent: %m\n");
return 126;
}
- // Fetch my own PID
- pid_t pid = getpid();
+ // Make this process dumpable
+ r = prctl (PR_SET_DUMPABLE, 1, 0, 0, 0);
+ if (r) {
+ CTX_ERROR(jail->ctx, "Could not make the process dumpable: %m\n");
+ return 126;
+ }
- DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
+ // Don't drop any capabilities on setuid()
+ r = prctl(PR_SET_KEEPCAPS, 1);
+ if (r) {
+ CTX_ERROR(jail->ctx, "Could not set PR_SET_KEEPCAPS: %m\n");
+ return 126;
+ }
// Wait for the parent to finish initialization
r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
if (r)
return r;
- // Perform further initialization
-
// Fetch UID/GID
uid_t uid = getuid();
gid_t gid = getgid();
uid_t euid = geteuid();
gid_t egid = getegid();
- DEBUG(jail->pakfire, " UID: %d (effective %d)\n", uid, euid);
- DEBUG(jail->pakfire, " GID: %d (effective %d)\n", gid, egid);
+ DEBUG(jail->pakfire, " UID: %u (effective %u)\n", uid, euid);
+ DEBUG(jail->pakfire, " GID: %u (effective %u)\n", gid, egid);
+
+ // Fail if we are not PID 1
+ if (pid != 1) {
+ CTX_ERROR(jail->ctx, "Child process is not PID 1\n");
+ //return 126;
+ }
- // Check if we are (effectively running as root)
+ // Fail if we are not running as root
if (uid || gid || euid || egid) {
ERROR(jail->pakfire, "Child process is not running as root\n");
- return 126;
+ //return 126;
}
- const char* root = pakfire_get_path(jail->pakfire);
- const char* arch = pakfire_get_arch(jail->pakfire);
-
- // Change root (unless root is /)
- if (!pakfire_on_root(jail->pakfire)) {
- // Mount everything
- r = pakfire_jail_mount(jail, ctx);
- if (r)
- return r;
-
- // Call chroot()
- r = chroot(root);
- if (r) {
- ERROR(jail->pakfire, "chroot() to %s failed: %m\n", root);
- return 1;
- }
-
- // Change directory to /
- r = chdir("/");
- if (r) {
- ERROR(jail->pakfire, "chdir() after chroot() failed: %m\n");
- return 1;
- }
- }
+ const char* arch = pakfire_get_effective_arch(jail->pakfire);
// Set personality
unsigned long persona = pakfire_arch_personality(arch);
r = personality(persona);
if (r < 0) {
ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
- return 1;
+ return 126;
}
}
+ // Setup networking
+ if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
+ r = pakfire_jail_setup_loopback(jail);
+ if (r)
+ return 1;
+ }
+
// Set nice level
if (jail->nice) {
DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
#endif /* ENABLE_DEBUG */
// Connect standard input
- if (ctx->pipes.stdin[0]) {
+ if (ctx->pipes.stdin[0] >= 0) {
r = dup2(ctx->pipes.stdin[0], STDIN_FILENO);
if (r < 0) {
ERROR(jail->pakfire, "Could not connect fd %d to stdin: %m\n",
}
// Connect standard output and error
- if (ctx->pipes.stdout[1] && ctx->pipes.stderr[1]) {
+ if (ctx->pipes.stdout[1] >= 0 && ctx->pipes.stderr[1] >= 0) {
r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
if (r < 0) {
ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
if (r)
return r;
- // Drop capabilities
- r = pakfire_jail_drop_capabilities(jail);
+ // Set capabilities
+ r = pakfire_jail_set_capabilities(jail);
+ if (r)
+ return r;
+
+ // Show capabilities
+ r = pakfire_jail_show_capabilities(jail);
if (r)
return r;
if (r)
return r;
- DEBUG(jail->pakfire, "Child process initialization done\n");
- DEBUG(jail->pakfire, "Launching command:\n");
+ CTX_DEBUG(jail->ctx, "Child process initialization done\n");
+ CTX_DEBUG(jail->ctx, "Launching command:\n");
// Log argv
for (unsigned int i = 0; argv[i]; i++)
- DEBUG(jail->pakfire, " argv[%d] = %s\n", i, argv[i]);
+ CTX_DEBUG(jail->ctx, " argv[%u] = %s\n", i, argv[i]);
// exec() command
r = execvpe(argv[0], (char**)argv, jail->env);
- if (r < 0)
- ERROR(jail->pakfire, "Could not execve(): %m\n");
+ if (r < 0) {
+ // Translate errno into regular exit code
+ switch (errno) {
+ case ENOENT:
+ // Ignore if the command doesn't exist
+ if (ctx->flags & PAKFIRE_JAIL_NOENT_OK)
+ r = 0;
+ else
+ r = 127;
- // Translate errno into regular exit code
- switch (errno) {
- case ENOENT:
- r = 127;
- break;
+ break;
- default:
- r = 1;
+ default:
+ r = 1;
+ }
+
+ CTX_ERROR(jail->ctx, "Could not execve(%s): %m\n", argv[0]);
}
// We should not get here
return r;
}
+/*
+ Child 1 is launched in a new mount namespace...
+*/
+static int pakfire_jail_child1(struct pakfire_jail* jail,
+ struct pakfire_jail_exec* ctx, const char* argv[]) {
+ int r;
+
+ // Redirect any logging to our log pipe
+ pakfire_ctx_set_log_callback(jail->ctx, pakfire_jail_log_redirect, &ctx->pipes);
+
+ CTX_DEBUG(jail->ctx, "First child process launched\n");
+
+ const int socket_send = pakfire_jail_get_pipe_to_write(jail, &ctx->socket);
+
+ const char* root = pakfire_get_path(jail->pakfire);
+
+ // Die with parent
+ r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
+ if (r) {
+ CTX_ERROR(jail->ctx, "Could not configure to die with parent: %s\n", strerror(errno));
+ goto ERROR;
+ }
+
+ // Change mount propagation so that we will receive, but don't propagate back
+ r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SLAVE);
+ if (r) {
+ CTX_ERROR(jail->ctx, "Could not change mount propagation to SLAVE: %s\n", strerror(r));
+ goto ERROR;
+ }
+
+ // Make root a mountpoint in the new mount namespace
+ r = pakfire_mount_make_mounpoint(jail->pakfire, root);
+ if (r)
+ goto ERROR;
+
+ // Make everything private
+ r = pakfire_mount_change_propagation(jail->ctx, root, MS_PRIVATE);
+ if (r) {
+ CTX_ERROR(jail->ctx, "Could not change mount propagation to PRIVATE: %s\n", strerror(r));
+ goto ERROR;
+ }
+
+ // Mount everything
+ r = pakfire_jail_mount(jail, ctx);
+ if (r)
+ goto ERROR;
+
+ // chroot()
+ r = pakfire_jail_switch_root(jail, root);
+ if (r)
+ goto ERROR;
+
+ // Change mount propagation so that we will propagate everything down
+ r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SHARED);
+ if (r) {
+ CTX_ERROR(jail->ctx, "Could not change mount propagation to SHARED: %s\n", strerror(r));
+ goto ERROR;
+ }
+
+ // Configure child process
+ struct clone_args args = {
+ .flags =
+ CLONE_NEWCGROUP |
+ CLONE_NEWIPC |
+ CLONE_NEWNS |
+ CLONE_NEWPID |
+ CLONE_NEWTIME |
+ CLONE_NEWUSER |
+ CLONE_NEWUTS |
+ CLONE_PIDFD,
+ .exit_signal = SIGCHLD,
+ .pidfd = (long long unsigned int)&ctx->pidfd2,
+ };
+
+ // Launch the process into the configured cgroup
+ if (ctx->cgroup) {
+ args.flags |= CLONE_INTO_CGROUP;
+
+ // Clone into this cgroup
+ args.cgroup = pakfire_cgroup_fd(ctx->cgroup);
+ }
+
+ // Setup networking
+ if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING))
+ args.flags |= CLONE_NEWNET;
+
+ // Fork the second child process
+ pid_t pid = clone3(&args, sizeof(args));
+ if (pid < 0) {
+ CTX_ERROR(jail->ctx, "Could not fork the first child process: %s\n", strerror(errno));
+ r = -errno;
+ goto ERROR;
+
+ // Child process
+ } else if (pid == 0) {
+ r = pakfire_jail_child2(jail, ctx, argv);
+ _exit(r);
+ }
+
+ // Send the pidfd of the child to the first parent
+ r = pakfire_jail_send_fd(jail, socket_send, ctx->pidfd2);
+ if (r)
+ goto ERROR;
+
+ERROR:
+ return r;
+}
+
// Run a command in the jail
static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
const int interactive,
pakfire_jail_communicate_in communicate_in,
pakfire_jail_communicate_out communicate_out,
- void* data) {
- int exit = -1;
+ void* data, int flags) {
int r;
// Check if argv is valid
return -1;
}
- // Send any output to the default logger if no callback is set
- if (!communicate_out)
- communicate_out = pakfire_jail_default_log_callback;
-
// Initialize context for this call
struct pakfire_jail_exec ctx = {
- .flags = 0,
+ .flags = flags,
+
+ .socket = { -1, -1 },
.pipes = {
- .stdin = { 0, 0 },
- .stdout = { 0, 0 },
- .stderr = { 0, 0 },
+ .stdin = { -1, -1 },
+ .stdout = { -1, -1 },
+ .stderr = { -1, -1 },
+ .log_INFO = { -1, -1 },
+ .log_ERROR = { -1, -1 },
+#ifdef ENABLE_DEBUG
+ .log_DEBUG = { -1, -1 },
+#endif /* ENABLE_DEBUG */
},
.communicate = {
.out = communicate_out,
.data = data,
},
+
+ // PIDs
+ .pidfd1 = -1,
+ .pidfd2 = -1,
};
DEBUG(jail->pakfire, "Executing jail...\n");
+ // Become the subreaper
+ r = prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0);
+ if (r < 0) {
+ CTX_ERROR(jail->ctx, "Failed to become the sub-reaper: %s\n", strerror(errno));
+ r = -errno;
+ goto ERROR;
+ }
+
// Enable networking in interactive mode
if (interactive)
ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
+ // Create a UNIX domain socket
+ r = socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, ctx.socket);
+ if (r < 0) {
+ CTX_ERROR(jail->ctx, "Could not create UNIX socket: %s\n", strerror(errno));
+ r = -errno;
+ goto ERROR;
+ }
+
/*
Setup a file descriptor which can be used to notify the client that the parent
has completed configuration.
goto ERROR;
#endif /* ENABLE_DEBUG */
- // Configure child process
- struct clone_args args = {
- .flags =
- CLONE_NEWCGROUP |
- CLONE_NEWIPC |
- CLONE_NEWNS |
- CLONE_NEWPID |
- CLONE_NEWUSER |
- CLONE_NEWUTS |
- CLONE_PIDFD,
- .exit_signal = SIGCHLD,
- .pidfd = (long long unsigned int)&ctx.pidfd,
- };
-
// Launch the process in a cgroup that is a leaf of the configured cgroup
if (jail->cgroup) {
- args.flags |= CLONE_INTO_CGROUP;
-
// Fetch our UUID
const char* uuid = pakfire_jail_uuid(jail);
ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
goto ERROR;
}
-
- // Clone into this cgroup
- args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
}
- // Setup networking
- if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
- args.flags |= CLONE_NEWNET;
- }
+ /*
+ Initially, we will set up a new mount namespace and launch a child process in it.
- // Fork this process
- ctx.pid = clone3(&args, sizeof(args));
- if (ctx.pid < 0) {
- ERROR(jail->pakfire, "Could not clone: %m\n");
- return -1;
+ This process remains in the user/ipc/time/etc. namespace and will set up
+ the mount namespace.
+ */
+
+ // Configure child process
+ struct clone_args args = {
+ .flags =
+ CLONE_NEWNS |
+ CLONE_PIDFD |
+ CLONE_CLEAR_SIGHAND,
+ .exit_signal = SIGCHLD,
+ .pidfd = (long long unsigned int)&ctx.pidfd1,
+ };
+
+ // Fork the first child process
+ pid_t pid = clone3(&args, sizeof(args));
+ if (pid < 0) {
+ CTX_ERROR(jail->ctx, "Could not fork the first child process: %s\n", strerror(errno));
+ r = -errno;
+ goto ERROR;
// Child process
- } else if (ctx.pid == 0) {
- r = pakfire_jail_child(jail, &ctx, argv);
+ } else if (pid == 0) {
+ r = pakfire_jail_child1(jail, &ctx, argv);
_exit(r);
}
// Parent process
- r = pakfire_jail_parent(jail, &ctx);
- if (r)
- goto ERROR;
-
- DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
-
- // Read output of the child process
r = pakfire_jail_wait(jail, &ctx);
if (r)
goto ERROR;
- // Handle exit status
- switch (ctx.status.si_code) {
- case CLD_EXITED:
- DEBUG(jail->pakfire, "The child process exited with code %d\n",
- ctx.status.si_status);
-
- // Pass exit code
- exit = ctx.status.si_status;
- break;
-
- case CLD_KILLED:
- ERROR(jail->pakfire, "The child process was killed\n");
- exit = 139;
- break;
-
- case CLD_DUMPED:
- ERROR(jail->pakfire, "The child process terminated abnormally\n");
- break;
-
- // Log anything else
- default:
- ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
- break;
- }
-
ERROR:
// Destroy the temporary cgroup (if any)
if (ctx.cgroup) {
+#if 0
+ // XXX this is currently disabled because it overwrites r
// Read cgroup stats
r = pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
if (r) {
} else {
pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
}
+#endif
pakfire_cgroup_destroy(ctx.cgroup);
pakfire_cgroup_unref(ctx.cgroup);
pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
- if (ctx.pidfd)
- close(ctx.pidfd);
pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
+#ifdef ENABLE_DEBUG
pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
+#endif /* ENABLE_DEBUG */
+ if (ctx.pidfd1 >= 0)
+ close(ctx.pidfd1);
+ if (ctx.pidfd2 >= 0)
+ close(ctx.pidfd2);
+
+ // Close sockets
+ pakfire_jail_close_pipe(jail, ctx.socket);
- return exit;
+ return r;
}
PAKFIRE_EXPORT int pakfire_jail_exec(
const char* argv[],
pakfire_jail_communicate_in callback_in,
pakfire_jail_communicate_out callback_out,
- void* data) {
- return __pakfire_jail_exec(jail, argv, 0, callback_in, callback_out, data);
+ void* data, int flags) {
+ return __pakfire_jail_exec(jail, argv, 0, callback_in, callback_out, data, flags);
}
static int pakfire_jail_exec_interactive(
- struct pakfire_jail* jail, const char* argv[]) {
+ struct pakfire_jail* jail, const char* argv[], int flags) {
int r;
// Setup interactive stuff
if (r)
return r;
- return __pakfire_jail_exec(jail, argv, 1, NULL, NULL, NULL);
+ return __pakfire_jail_exec(jail, argv, 1, NULL, NULL, NULL, flags);
}
int pakfire_jail_exec_script(struct pakfire_jail* jail,
const char* root = pakfire_get_path(jail->pakfire);
// Write the scriptlet to disk
- r = pakfire_path_join(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
+ r = pakfire_path_append(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
if (r)
goto ERROR;
argv[i] = args[i-1];
// Run the script
- r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data);
+ r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data, 0);
ERROR:
if (argv)
int r;
// Create a new jail
- r = pakfire_jail_create(&jail, pakfire, flags);
+ r = pakfire_jail_create(&jail, pakfire);
if (r)
goto ERROR;
// Execute the command
- r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output);
+ r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output, 0);
ERROR:
if (jail)
int r;
// Create a new jail
- r = pakfire_jail_create(&jail, pakfire, flags);
+ r = pakfire_jail_create(&jail, pakfire);
if (r)
goto ERROR;
}
int pakfire_jail_shell(struct pakfire_jail* jail) {
+ int r;
+
const char* argv[] = {
"/bin/bash", "--login", NULL,
};
// Execute /bin/bash
- return pakfire_jail_exec_interactive(jail, argv);
+ r = pakfire_jail_exec_interactive(jail, argv, 0);
+
+ // Raise any errors
+ if (r < 0)
+ return r;
+
+ // Ignore any return codes from the shell
+ return 0;
}
-int pakfire_jail_ldconfig(struct pakfire* pakfire) {
+static int pakfire_jail_run_if_possible(struct pakfire* pakfire, const char** argv) {
char path[PATH_MAX];
+ int r;
- const char* ldconfig = "/sbin/ldconfig";
-
- // Check if ldconfig exists before calling it to avoid overhead
- int r = pakfire_path(pakfire, path, "%s", ldconfig);
+ r = pakfire_path(pakfire, path, "%s", *argv);
if (r)
return r;
- // Check if ldconfig is executable
+ // Check if the file is executable
r = access(path, X_OK);
if (r) {
- DEBUG(pakfire, "%s is not executable. Skipping...\n", ldconfig);
+ DEBUG(pakfire, "%s is not executable. Skipping...\n", *argv);
return 0;
}
+ return pakfire_jail_run(pakfire, argv, 0, NULL);
+}
+
+int pakfire_jail_ldconfig(struct pakfire* pakfire) {
const char* argv[] = {
- ldconfig, NULL,
+ "/sbin/ldconfig",
+ NULL,
};
- // Run ldconfig
- return pakfire_jail_run(pakfire, argv, 0, NULL);
+ return pakfire_jail_run_if_possible(pakfire, argv);
+}
+
+int pakfire_jail_run_systemd_tmpfiles(struct pakfire* pakfire) {
+ const char* argv[] = {
+ "/usr/bin/systemd-tmpfiles",
+ "--create",
+ NULL,
+ };
+
+ return pakfire_jail_run_if_possible(pakfire, argv);
}