1 /*#############################################################################
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
19 #############################################################################*/
23 #include <linux/capability.h>
24 #include <linux/sched.h>
26 #include <linux/wait.h>
31 #include <sys/capability.h>
32 #include <sys/epoll.h>
33 #include <sys/eventfd.h>
34 #include <sys/mount.h>
35 #include <sys/personality.h>
36 #include <sys/prctl.h>
37 #include <sys/resource.h>
38 #include <sys/timerfd.h>
39 #include <sys/types.h>
44 #include <netlink/route/link.h>
52 #include <pakfire/arch.h>
53 #include <pakfire/cgroup.h>
54 #include <pakfire/jail.h>
55 #include <pakfire/logging.h>
56 #include <pakfire/mount.h>
57 #include <pakfire/os.h>
58 #include <pakfire/pakfire.h>
59 #include <pakfire/path.h>
60 #include <pakfire/private.h>
61 #include <pakfire/pwd.h>
62 #include <pakfire/string.h>
63 #include <pakfire/util.h>
65 #define BUFFER_SIZE 1024 * 64
66 #define ENVIRON_SIZE 128
67 #define EPOLL_MAX_EVENTS 2
68 #define MAX_MOUNTPOINTS 8
70 // The default environment that will be set for every command
71 static const struct environ
{
76 { "LANG", "C.utf-8" },
77 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
80 // Tell everything that it is running inside a Pakfire container
81 { "container", "pakfire" },
85 struct pakfire_jail_mountpoint
{
86 char source
[PATH_MAX
];
87 char target
[PATH_MAX
];
92 struct pakfire_ctx
* ctx
;
93 struct pakfire
* pakfire
;
96 // A unique ID for each jail
98 char __uuid
[UUID_STR_LEN
];
104 struct itimerspec timeout
;
107 struct pakfire_cgroup
* cgroup
;
110 char* env
[ENVIRON_SIZE
];
113 struct pakfire_jail_mountpoint mountpoints
[MAX_MOUNTPOINTS
];
114 unsigned int num_mountpoints
;
117 struct pakfire_jail_callbacks
{
119 pakfire_jail_log_callback log
;
124 struct pakfire_log_buffer
{
125 char data
[BUFFER_SIZE
];
129 struct pakfire_jail_exec
{
132 // PIDs (of the children)
136 // Socket to pass FDs
139 // FD to notify the client that the parent has finished initialization
143 struct pakfire_jail_pipes
{
153 #endif /* ENABLE_DEBUG */
157 struct pakfire_jail_communicate
{
158 pakfire_jail_communicate_in in
;
159 pakfire_jail_communicate_out out
;
164 struct pakfire_jail_buffers
{
165 struct pakfire_log_buffer stdout
;
166 struct pakfire_log_buffer stderr
;
169 struct pakfire_log_buffer log_INFO
;
170 struct pakfire_log_buffer log_ERROR
;
172 struct pakfire_log_buffer log_DEBUG
;
173 #endif /* ENABLE_DEBUG */
176 struct pakfire_cgroup
* cgroup
;
177 struct pakfire_cgroup_stats cgroup_stats
;
180 static int clone3(struct clone_args
* args
, size_t size
) {
181 return syscall(__NR_clone3
, args
, size
);
184 static int pidfd_send_signal(int pidfd
, int sig
, siginfo_t
* info
, unsigned int flags
) {
185 return syscall(SYS_pidfd_send_signal
, pidfd
, sig
, info
, flags
);
188 static int pivot_root(const char* new_root
, const char* old_root
) {
189 return syscall(SYS_pivot_root
, new_root
, old_root
);
192 static int pakfire_jail_exec_has_flag(
193 const struct pakfire_jail_exec
* ctx
, const enum pakfire_jail_exec_flags flag
) {
194 return ctx
->flags
& flag
;
197 static void pakfire_jail_free(struct pakfire_jail
* jail
) {
198 DEBUG(jail
->pakfire
, "Freeing jail at %p\n", jail
);
201 for (unsigned int i
= 0; jail
->env
[i
]; i
++)
205 pakfire_cgroup_unref(jail
->cgroup
);
207 pakfire_unref(jail
->pakfire
);
209 pakfire_ctx_unref(jail
->ctx
);
214 Passes any log messages on to the default pakfire log callback
216 static int pakfire_jail_default_log_callback(struct pakfire
* pakfire
, void* data
,
217 int priority
, const char* line
, size_t length
) {
220 INFO(pakfire
, "%s", line
);
224 ERROR(pakfire
, "%s", line
);
229 DEBUG(pakfire
, "%s", line
);
237 static const char* pakfire_jail_uuid(struct pakfire_jail
* jail
) {
239 uuid_unparse_lower(jail
->uuid
, jail
->__uuid
);
244 static int pakfire_jail_setup_interactive_env(struct pakfire_jail
* jail
) {
246 int r
= pakfire_jail_set_env(jail
, "PS1", "pakfire-jail \\w> ");
251 char* TERM
= secure_getenv("TERM");
253 r
= pakfire_jail_set_env(jail
, "TERM", TERM
);
259 char* LANG
= secure_getenv("LANG");
261 r
= pakfire_jail_set_env(jail
, "LANG", LANG
);
269 PAKFIRE_EXPORT
int pakfire_jail_create(struct pakfire_jail
** jail
, struct pakfire
* pakfire
) {
272 const char* arch
= pakfire_get_effective_arch(pakfire
);
274 // Allocate a new jail
275 struct pakfire_jail
* j
= calloc(1, sizeof(*j
));
280 j
->ctx
= pakfire_ctx(pakfire
);
283 j
->pakfire
= pakfire_ref(pakfire
);
285 // Initialize reference counter
288 // Generate a random UUID
289 uuid_generate_random(j
->uuid
);
291 DEBUG(j
->pakfire
, "Allocated new jail at %p\n", j
);
293 // Set the default logging callback
294 pakfire_jail_set_log_callback(j
, pakfire_jail_default_log_callback
, NULL
);
296 // Set default environment
297 for (const struct environ
* e
= ENV
; e
->key
; e
++) {
298 r
= pakfire_jail_set_env(j
, e
->key
, e
->val
);
303 // Enable all CPU features that CPU has to offer
304 if (!pakfire_arch_is_supported_by_host(arch
)) {
305 r
= pakfire_jail_set_env(j
, "QEMU_CPU", "max");
310 // Set container UUID
311 r
= pakfire_jail_set_env(j
, "container_uuid", pakfire_jail_uuid(j
));
315 // Disable systemctl to talk to systemd
316 if (!pakfire_on_root(j
->pakfire
)) {
317 r
= pakfire_jail_set_env(j
, "SYSTEMD_OFFLINE", "1");
327 pakfire_jail_free(j
);
332 PAKFIRE_EXPORT
struct pakfire_jail
* pakfire_jail_ref(struct pakfire_jail
* jail
) {
338 PAKFIRE_EXPORT
struct pakfire_jail
* pakfire_jail_unref(struct pakfire_jail
* jail
) {
339 if (--jail
->nrefs
> 0)
342 pakfire_jail_free(jail
);
348 PAKFIRE_EXPORT
void pakfire_jail_set_log_callback(struct pakfire_jail
* jail
,
349 pakfire_jail_log_callback callback
, void* data
) {
350 jail
->callbacks
.log
= callback
;
351 jail
->callbacks
.log_data
= data
;
356 PAKFIRE_EXPORT
int pakfire_jail_nice(struct pakfire_jail
* jail
, int nice
) {
357 // Check if nice level is in range
358 if (nice
< -19 || nice
> 20) {
369 int pakfire_jail_set_cgroup(struct pakfire_jail
* jail
, struct pakfire_cgroup
* cgroup
) {
370 // Free any previous cgroup
372 pakfire_cgroup_unref(jail
->cgroup
);
376 // Set any new cgroup
378 DEBUG(jail
->pakfire
, "Setting cgroup %p\n", cgroup
);
380 jail
->cgroup
= pakfire_cgroup_ref(cgroup
);
389 // Returns the length of the environment
390 static unsigned int pakfire_jail_env_length(struct pakfire_jail
* jail
) {
393 // Count everything in the environment
394 for (char** e
= jail
->env
; *e
; e
++)
400 // Finds an existing environment variable and returns its index or -1 if not found
401 static int pakfire_jail_find_env(struct pakfire_jail
* jail
, const char* key
) {
407 const size_t length
= strlen(key
);
409 for (unsigned int i
= 0; jail
->env
[i
]; i
++) {
410 if ((pakfire_string_startswith(jail
->env
[i
], key
)
411 && *(jail
->env
[i
] + length
) == '=')) {
420 // Returns the value of an environment variable or NULL
421 PAKFIRE_EXPORT
const char* pakfire_jail_get_env(struct pakfire_jail
* jail
,
423 int i
= pakfire_jail_find_env(jail
, key
);
427 return jail
->env
[i
] + strlen(key
) + 1;
430 // Sets an environment variable
431 PAKFIRE_EXPORT
int pakfire_jail_set_env(struct pakfire_jail
* jail
,
432 const char* key
, const char* value
) {
433 // Find the index where to write this value to
434 int i
= pakfire_jail_find_env(jail
, key
);
436 i
= pakfire_jail_env_length(jail
);
438 // Return -ENOSPC when the environment is full
439 if (i
>= ENVIRON_SIZE
) {
444 // Free any previous value
448 // Format and set environment variable
449 asprintf(&jail
->env
[i
], "%s=%s", key
, value
);
451 DEBUG(jail
->pakfire
, "Set environment variable: %s\n", jail
->env
[i
]);
456 // Imports an environment
457 PAKFIRE_EXPORT
int pakfire_jail_import_env(struct pakfire_jail
* jail
, const char* env
[]) {
465 // Copy environment variables
466 for (unsigned int i
= 0; env
[i
]; i
++) {
467 r
= pakfire_string_partition(env
[i
], "=", &key
, &val
);
472 r
= pakfire_jail_set_env(jail
, key
, val
);
489 PAKFIRE_EXPORT
int pakfire_jail_set_timeout(
490 struct pakfire_jail
* jail
, unsigned int timeout
) {
492 jail
->timeout
.it_value
.tv_sec
= timeout
;
495 DEBUG(jail
->pakfire
, "Timeout set to %u second(s)\n", timeout
);
497 DEBUG(jail
->pakfire
, "Timeout disabled\n");
502 static int pakfire_jail_create_timer(struct pakfire_jail
* jail
) {
505 // Nothing to do if no timeout has been set
506 if (!jail
->timeout
.it_value
.tv_sec
)
509 // Create a new timer
510 const int fd
= timerfd_create(CLOCK_MONOTONIC
, 0);
512 ERROR(jail
->pakfire
, "Could not create timer: %m\n");
517 r
= timerfd_settime(fd
, 0, &jail
->timeout
, NULL
);
519 ERROR(jail
->pakfire
, "Could not arm timer: %m\n");
533 This function replaces any logging in the child process.
535 All log messages will be sent to the parent process through their respective pipes.
537 static void pakfire_jail_log_redirect(void* data
, int priority
, const char* file
,
538 int line
, const char* fn
, const char* format
, va_list args
) {
539 struct pakfire_jail_pipes
* pipes
= (struct pakfire_jail_pipes
*)data
;
544 fd
= pipes
->log_INFO
[1];
548 fd
= pipes
->log_ERROR
[1];
553 fd
= pipes
->log_DEBUG
[1];
555 #endif /* ENABLE_DEBUG */
557 // Ignore any messages of an unknown priority
562 // Send the log message
564 vdprintf(fd
, format
, args
);
567 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer
* buffer
) {
568 return (sizeof(buffer
->data
) == buffer
->used
);
572 This function reads as much data as it can from the file descriptor.
573 If it finds a whole line in it, it will send it to the logger and repeat the process.
574 If not newline character is found, it will try to read more data until it finds one.
576 static int pakfire_jail_handle_log(struct pakfire_jail
* jail
,
577 struct pakfire_jail_exec
* ctx
, int priority
, int fd
,
578 struct pakfire_log_buffer
* buffer
, pakfire_jail_communicate_out callback
, void* data
) {
579 char line
[BUFFER_SIZE
+ 1];
581 // Fill up buffer from fd
582 if (buffer
->used
< sizeof(buffer
->data
)) {
583 ssize_t bytes_read
= read(fd
, buffer
->data
+ buffer
->used
,
584 sizeof(buffer
->data
) - buffer
->used
);
587 if (bytes_read
< 0) {
588 ERROR(jail
->pakfire
, "Could not read from fd %d: %m\n", fd
);
592 // Update buffer size
593 buffer
->used
+= bytes_read
;
596 // See if we have any lines that we can write
597 while (buffer
->used
) {
598 // Search for the end of the first line
599 char* eol
= memchr(buffer
->data
, '\n', buffer
->used
);
603 // If the buffer is full, we send the content to the logger and try again
604 // This should not happen in practise
605 if (pakfire_jail_log_buffer_is_full(buffer
)) {
606 DEBUG(jail
->pakfire
, "Logging buffer is full. Sending all content\n");
608 eol
= buffer
->data
+ sizeof(buffer
->data
) - 1;
610 // Otherwise we might have only read parts of the output
615 // Find the length of the string
616 size_t length
= eol
- buffer
->data
+ 1;
618 // Copy the line into the buffer
619 memcpy(line
, buffer
->data
, length
);
621 // Terminate the string
626 int r
= callback(jail
->pakfire
, data
, priority
, line
, length
);
628 ERROR(jail
->pakfire
, "The logging callback returned an error: %d\n", r
);
633 // Remove line from buffer
634 memmove(buffer
->data
, buffer
->data
+ length
, buffer
->used
- length
);
635 buffer
->used
-= length
;
641 static int pakfire_jail_stream_stdin(struct pakfire_jail
* jail
,
642 struct pakfire_jail_exec
* ctx
, const int fd
) {
645 // Nothing to do if there is no stdin callback set
646 if (!ctx
->communicate
.in
) {
647 DEBUG(jail
->pakfire
, "Callback for standard input is not set\n");
651 // Skip if the writing pipe has already been closed
652 if (!ctx
->pipes
.stdin
[1])
655 DEBUG(jail
->pakfire
, "Streaming standard input...\n");
657 // Calling the callback
658 r
= ctx
->communicate
.in(jail
->pakfire
, ctx
->communicate
.data
, fd
);
660 DEBUG(jail
->pakfire
, "Standard input callback finished: %d\n", r
);
662 // The callback signaled that it has written everything
664 DEBUG(jail
->pakfire
, "Closing standard input pipe\n");
666 // Close the file-descriptor
669 // Reset the file-descriptor so it won't be closed again later
670 ctx
->pipes
.stdin
[1] = -1;
679 static int pakfire_jail_setup_pipe(struct pakfire_jail
* jail
, int (*fds
)[2], const int flags
) {
680 int r
= pipe2(*fds
, flags
);
682 ERROR(jail
->pakfire
, "Could not setup pipe: %m\n");
689 static void pakfire_jail_close_pipe(struct pakfire_jail
* jail
, int fds
[2]) {
690 for (unsigned int i
= 0; i
< 2; i
++)
696 This is a convenience function to fetch the reading end of a pipe and
697 closes the write end.
699 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail
* jail
, int (*fds
)[2]) {
700 // Give the variables easier names to avoid confusion
701 int* fd_read
= &(*fds
)[0];
702 int* fd_write
= &(*fds
)[1];
704 // Close the write end of the pipe
705 if (*fd_write
>= 0) {
710 // Return the read end
717 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail
* jail
, int (*fds
)[2]) {
718 // Give the variables easier names to avoid confusion
719 int* fd_read
= &(*fds
)[0];
720 int* fd_write
= &(*fds
)[1];
722 // Close the read end of the pipe
728 // Return the write end
735 static int pakfire_jail_recv_fd(struct pakfire_jail
* jail
, int socket
, int* fd
) {
736 const size_t payload_length
= sizeof(fd
);
737 char buffer
[CMSG_SPACE(payload_length
)];
740 struct msghdr msg
= {
741 .msg_control
= buffer
,
742 .msg_controllen
= sizeof(buffer
),
745 // Receive the message
746 r
= recvmsg(socket
, &msg
, 0);
748 CTX_ERROR(jail
->ctx
, "Could not receive file descriptor: %s\n", strerror(errno
));
753 struct cmsghdr
* cmsg
= CMSG_FIRSTHDR(&msg
);
757 *fd
= *((int*)CMSG_DATA(cmsg
));
759 CTX_DEBUG(jail
->ctx
, "Received fd %d from socket %d\n", *fd
, socket
);
764 static int pakfire_jail_send_fd(struct pakfire_jail
* jail
, int socket
, int fd
) {
765 const size_t payload_length
= sizeof(fd
);
766 char buffer
[CMSG_SPACE(payload_length
)];
769 CTX_DEBUG(jail
->ctx
, "Sending fd %d to socket %d\n", fd
, socket
);
772 struct msghdr msg
= {
773 .msg_control
= buffer
,
774 .msg_controllen
= sizeof(buffer
),
778 struct cmsghdr
* cmsg
= CMSG_FIRSTHDR(&msg
);
779 cmsg
->cmsg_level
= SOL_SOCKET
;
780 cmsg
->cmsg_type
= SCM_RIGHTS
;
781 cmsg
->cmsg_len
= CMSG_LEN(payload_length
);
784 *((int*)CMSG_DATA(cmsg
)) = fd
;
787 r
= sendmsg(socket
, &msg
, 0);
789 CTX_ERROR(jail
->ctx
, "Could not send file descriptor: %s\n", strerror(errno
));
796 static int pakfire_jail_log(struct pakfire
* pakfire
, void* data
, int priority
,
797 const char* line
, const size_t length
) {
798 // Pass everything to the parent logger
799 pakfire_log_condition(pakfire
, priority
, 0, "%.*s", (int)length
, line
);
804 static int pakfire_jail_epoll_add_fd(struct pakfire_jail
* jail
, int epollfd
, int fd
, int events
) {
805 struct epoll_event event
= {
806 .events
= events
|EPOLLHUP
,
814 int flags
= fcntl(fd
, F_GETFL
, 0);
816 // Set modified flags
817 r
= fcntl(fd
, F_SETFL
, flags
|O_NONBLOCK
);
819 CTX_ERROR(jail
->ctx
, "Could not set file descriptor %d into non-blocking mode: %s\n",
820 fd
, strerror(errno
));
824 // Add the file descriptor to the loop
825 r
= epoll_ctl(epollfd
, EPOLL_CTL_ADD
, fd
, &event
);
827 ERROR(jail
->pakfire
, "Could not add file descriptor %d to epoll(): %s\n",
828 fd
, strerror(errno
));
835 static int pakfire_jail_setup_child2(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
);
837 static int pakfire_jail_wait_on_child(struct pakfire_jail
* jail
, int pidfd
) {
838 siginfo_t status
= {};
841 // Call waitid() and store the result
842 r
= waitid(P_PIDFD
, pidfd
, &status
, WEXITED
);
844 CTX_ERROR(jail
->ctx
, "waitid() failed: %s\n", strerror(errno
));
848 switch (status
.si_code
) {
849 // If the process exited normally, we return the exit code
851 CTX_DEBUG(jail
->ctx
, "The child process exited with code %d\n", status
.si_status
);
852 return status
.si_status
;
855 CTX_ERROR(jail
->ctx
, "The child process was killed\n");
859 CTX_ERROR(jail
->ctx
, "The child process terminated abnormally\n");
864 CTX_ERROR(jail
->ctx
, "Unknown child exit code: %d\n", status
.si_code
);
871 static int pakfire_jail_wait(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
873 struct epoll_event events
[EPOLL_MAX_EVENTS
];
877 // Fetch the UNIX domain socket
878 const int socket_recv
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->socket
);
880 // Fetch file descriptors from context
881 const int stdin
= pakfire_jail_get_pipe_to_write(jail
, &ctx
->pipes
.stdin
);
882 const int stdout
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.stdout
);
883 const int stderr
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.stderr
);
886 const int timerfd
= pakfire_jail_create_timer(jail
);
889 const int log_INFO
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.log_INFO
);
890 const int log_ERROR
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.log_ERROR
);
892 const int log_DEBUG
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.log_DEBUG
);
893 #endif /* ENABLE_DEBUG */
895 // Make a list of all file descriptors we are interested in
896 const struct pakfire_wait_fds
{
900 { socket_recv
, EPOLLIN
},
902 // Standard input/output
908 { timerfd
, EPOLLIN
},
911 { ctx
->pidfd1
, EPOLLIN
},
914 { log_INFO
, EPOLLIN
},
915 { log_ERROR
, EPOLLIN
},
917 { log_DEBUG
, EPOLLIN
},
918 #endif /* ENABLE_DEBUG */
925 epollfd
= epoll_create1(0);
927 ERROR(jail
->pakfire
, "Could not initialize epoll(): %m\n");
932 // Turn file descriptors into non-blocking mode and add them to epoll()
933 for (const struct pakfire_wait_fds
* fd
= fds
; fd
->events
; fd
++) {
934 // Skip fds which were not initialized
938 // Add the FD to the event loop
939 r
= pakfire_jail_epoll_add_fd(jail
, epollfd
, fd
->fd
, fd
->events
);
947 CTX_DEBUG(jail
->ctx
, "Launching main loop...\n");
949 // Loop for as long as the process is alive
951 int num
= epoll_wait(epollfd
, events
, EPOLL_MAX_EVENTS
, -1);
953 // Ignore if epoll_wait() has been interrupted
957 ERROR(jail
->pakfire
, "epoll_wait() failed: %m\n");
963 for (int i
= 0; i
< num
; i
++) {
964 int e
= events
[i
].events
;
965 int fd
= events
[i
].data
.fd
;
967 struct pakfire_log_buffer
* buffer
= NULL
;
968 pakfire_jail_communicate_out callback
= NULL
;
972 // Check if there is any data to be read
974 // Monitor the first child process
975 if (fd
== ctx
->pidfd1
) {
976 r
= pakfire_jail_wait_on_child(jail
, ctx
->pidfd1
);
978 CTX_ERROR(jail
->ctx
, "The first child exited with an error\n");
987 // Monitor the second child process
988 } else if (fd
== ctx
->pidfd2
) {
989 exit
= pakfire_jail_wait_on_child(jail
, ctx
->pidfd2
);
991 CTX_ERROR(jail
->ctx
, "The second child exited with an error\n");
998 // Mark that we have ended so that we will process the remaining
999 // events from epoll() now, but won't restart the outer loop.
1004 // Handle timer events
1005 } else if (fd
== timerfd
) {
1006 DEBUG(jail
->pakfire
, "Timer event received\n");
1009 r
= read(timerfd
, garbage
, sizeof(garbage
));
1011 ERROR(jail
->pakfire
, "Could not disarm timer: %m\n");
1016 // Terminate the process if it hasn't already ended
1018 DEBUG(jail
->pakfire
, "Terminating process...\n");
1020 // Send SIGTERM to the process
1021 r
= pidfd_send_signal(ctx
->pidfd2
, SIGKILL
, NULL
, 0);
1023 ERROR(jail
->pakfire
, "Could not kill process: %m\n");
1028 // There is nothing else to do
1031 // Handle socket messages
1032 } else if (fd
== socket_recv
) {
1033 // Receive the FD of the second child process
1034 r
= pakfire_jail_recv_fd(jail
, socket_recv
, &ctx
->pidfd2
);
1038 // Add it to the event loop
1039 r
= pakfire_jail_epoll_add_fd(jail
, epollfd
, ctx
->pidfd2
, EPOLLIN
);
1043 // Setup the child process
1044 r
= pakfire_jail_setup_child2(jail
, ctx
);
1048 // Don't fall through to log processing
1051 // Handle logging messages
1052 } else if (fd
== log_INFO
) {
1053 buffer
= &ctx
->buffers
.log_INFO
;
1054 priority
= LOG_INFO
;
1056 callback
= pakfire_jail_log
;
1058 } else if (fd
== log_ERROR
) {
1059 buffer
= &ctx
->buffers
.log_ERROR
;
1062 callback
= pakfire_jail_log
;
1065 } else if (fd
== log_DEBUG
) {
1066 buffer
= &ctx
->buffers
.log_DEBUG
;
1067 priority
= LOG_DEBUG
;
1069 callback
= pakfire_jail_log
;
1070 #endif /* ENABLE_DEBUG */
1072 // Handle anything from the log pipes
1073 } else if (fd
== stdout
) {
1074 buffer
= &ctx
->buffers
.stdout
;
1075 priority
= LOG_INFO
;
1077 // Send any output to the default logger if no callback is set
1078 if (ctx
->communicate
.out
) {
1079 callback
= ctx
->communicate
.out
;
1080 data
= ctx
->communicate
.data
;
1082 callback
= jail
->callbacks
.log
;
1083 data
= jail
->callbacks
.log_data
;
1086 } else if (fd
== stderr
) {
1087 buffer
= &ctx
->buffers
.stderr
;
1090 // Send any output to the default logger if no callback is set
1091 if (ctx
->communicate
.out
) {
1092 callback
= ctx
->communicate
.out
;
1093 data
= ctx
->communicate
.data
;
1095 callback
= jail
->callbacks
.log
;
1096 data
= jail
->callbacks
.log_data
;
1100 DEBUG(jail
->pakfire
, "Received invalid file descriptor %d\n", fd
);
1105 r
= pakfire_jail_handle_log(jail
, ctx
, priority
, fd
, buffer
, callback
, data
);
1111 // Handle standard input
1113 r
= pakfire_jail_stream_stdin(jail
, ctx
, fd
);
1116 // Ignore if we filled up the buffer
1121 ERROR(jail
->pakfire
, "Could not write to stdin: %m\n");
1128 // Check if any file descriptors have been closed
1130 // Remove the file descriptor
1131 r
= epoll_ctl(epollfd
, EPOLL_CTL_DEL
, fd
, NULL
);
1133 ERROR(jail
->pakfire
, "Could not remove closed file-descriptor %d: %m\n", fd
);
1140 // Return the exit code
1144 CTX_DEBUG(jail
->ctx
, "Main loop terminated\n");
1154 int pakfire_jail_capture_stdout(struct pakfire
* pakfire
, void* data
,
1155 int priority
, const char* line
, size_t length
) {
1156 char** output
= (char**)data
;
1159 // Append everything from stdout to a buffer
1160 if (output
&& priority
== LOG_INFO
) {
1161 r
= asprintf(output
, "%s%s", (output
&& *output
) ? *output
: "", line
);
1167 // Send everything else to the default logger
1168 return pakfire_jail_default_log_callback(pakfire
, NULL
, priority
, line
, length
);
1173 // Logs all capabilities of the current process
1174 static int pakfire_jail_show_capabilities(struct pakfire_jail
* jail
) {
1177 cap_flag_value_t value_e
;
1178 cap_flag_value_t value_i
;
1179 cap_flag_value_t value_p
;
1183 pid_t pid
= getpid();
1185 // Fetch all capabilities
1186 caps
= cap_get_proc();
1188 ERROR(jail
->pakfire
, "Could not fetch capabilities: %m\n");
1193 DEBUG(jail
->pakfire
, "Capabilities of PID %d:\n", pid
);
1195 // Iterate over all capabilities
1196 for (unsigned int cap
= 0; cap_valid(cap
); cap
++) {
1197 name
= cap_to_name(cap
);
1199 // Fetch effective value
1200 r
= cap_get_flag(caps
, cap
, CAP_EFFECTIVE
, &value_e
);
1204 // Fetch inheritable value
1205 r
= cap_get_flag(caps
, cap
, CAP_INHERITABLE
, &value_i
);
1209 // Fetch permitted value
1210 r
= cap_get_flag(caps
, cap
, CAP_PERMITTED
, &value_p
);
1214 DEBUG(jail
->pakfire
,
1215 " %-24s : %c%c%c\n",
1217 (value_e
== CAP_SET
) ? 'e' : '-',
1218 (value_i
== CAP_SET
) ? 'i' : '-',
1219 (value_p
== CAP_SET
) ? 'p' : '-'
1239 static int pakfire_jail_set_capabilities(struct pakfire_jail
* jail
) {
1244 // Fetch capabilities
1245 caps
= cap_get_proc();
1247 ERROR(jail
->pakfire
, "Could not read capabilities: %m\n");
1252 // Walk through all capabilities
1253 for (cap_value_t cap
= 0; cap_valid(cap
); cap
++) {
1254 cap_value_t _caps
[] = { cap
};
1256 // Fetch the name of the capability
1257 name
= cap_to_name(cap
);
1259 r
= cap_set_flag(caps
, CAP_EFFECTIVE
, 1, _caps
, CAP_SET
);
1261 ERROR(jail
->pakfire
, "Could not set %s: %m\n", name
);
1265 r
= cap_set_flag(caps
, CAP_INHERITABLE
, 1, _caps
, CAP_SET
);
1267 ERROR(jail
->pakfire
, "Could not set %s: %m\n", name
);
1271 r
= cap_set_flag(caps
, CAP_PERMITTED
, 1, _caps
, CAP_SET
);
1273 ERROR(jail
->pakfire
, "Could not set %s: %m\n", name
);
1282 // Restore all capabilities
1283 r
= cap_set_proc(caps
);
1285 ERROR(jail
->pakfire
, "Restoring capabilities failed: %m\n");
1289 // Add all capabilities to the ambient set
1290 for (unsigned int cap
= 0; cap_valid(cap
); cap
++) {
1291 name
= cap_to_name(cap
);
1293 // Raise the capability
1294 r
= prctl(PR_CAP_AMBIENT
, PR_CAP_AMBIENT_RAISE
, cap
, 0, 0);
1296 ERROR(jail
->pakfire
, "Could not set ambient capability %s: %m\n", name
);
1319 static int pakfire_jail_limit_syscalls(struct pakfire_jail
* jail
) {
1320 const int syscalls
[] = {
1321 // The kernel's keyring isn't namespaced
1324 SCMP_SYS(request_key
),
1326 // Disable userfaultfd
1327 SCMP_SYS(userfaultfd
),
1329 // Disable perf which could leak a lot of information about the host
1330 SCMP_SYS(perf_event_open
),
1336 DEBUG(jail
->pakfire
, "Applying syscall filter...\n");
1338 // Setup a syscall filter which allows everything by default
1339 scmp_filter_ctx ctx
= seccomp_init(SCMP_ACT_ALLOW
);
1341 ERROR(jail
->pakfire
, "Could not setup seccomp filter: %m\n");
1346 for (const int* syscall
= syscalls
; *syscall
; syscall
++) {
1347 r
= seccomp_rule_add(ctx
, SCMP_ACT_ERRNO(EPERM
), *syscall
, 0);
1349 ERROR(jail
->pakfire
, "Could not configure syscall %d: %m\n", *syscall
);
1354 // Load syscall filter into the kernel
1355 r
= seccomp_load(ctx
);
1357 ERROR(jail
->pakfire
, "Could not load syscall filter into the kernel: %m\n");
1363 seccomp_release(ctx
);
1370 PAKFIRE_EXPORT
int pakfire_jail_bind(struct pakfire_jail
* jail
,
1371 const char* source
, const char* target
, int flags
) {
1372 struct pakfire_jail_mountpoint
* mp
= NULL
;
1375 // Check if there is any space left
1376 if (jail
->num_mountpoints
>= MAX_MOUNTPOINTS
) {
1381 // Check for valid inputs
1382 if (!source
|| !target
) {
1387 // Select the next free slot
1388 mp
= &jail
->mountpoints
[jail
->num_mountpoints
];
1391 r
= pakfire_string_set(mp
->source
, source
);
1393 ERROR(jail
->pakfire
, "Could not copy source: %m\n");
1398 r
= pakfire_string_set(mp
->target
, target
);
1400 ERROR(jail
->pakfire
, "Could not copy target: %m\n");
1407 // Increment counter
1408 jail
->num_mountpoints
++;
1413 static int pakfire_jail_mount_networking(struct pakfire_jail
* jail
) {
1416 const char* paths
[] = {
1422 // Bind-mount all paths read-only
1423 for (const char** path
= paths
; *path
; path
++) {
1424 r
= pakfire_bind(jail
->pakfire
, *path
, NULL
, MS_RDONLY
);
1427 // Ignore if we don't have permission
1442 Mounts everything that we require in the new namespace
1444 static int pakfire_jail_mount(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
1445 struct pakfire_jail_mountpoint
* mp
= NULL
;
1449 // Enable loop devices
1450 if (pakfire_jail_exec_has_flag(ctx
, PAKFIRE_JAIL_HAS_LOOP_DEVICES
))
1451 flags
|= PAKFIRE_MOUNT_LOOP_DEVICES
;
1453 // Mount all default stuff
1454 r
= pakfire_mount_all(jail
->pakfire
, flags
);
1458 // Mount networking stuff
1459 if (pakfire_jail_exec_has_flag(ctx
, PAKFIRE_JAIL_HAS_NETWORKING
)) {
1460 r
= pakfire_jail_mount_networking(jail
);
1465 // Mount all custom stuff
1466 for (unsigned int i
= 0; i
< jail
->num_mountpoints
; i
++) {
1468 mp
= &jail
->mountpoints
[i
];
1471 r
= pakfire_bind(jail
->pakfire
, mp
->source
, mp
->target
, mp
->flags
);
1476 // Log all mountpoints
1477 pakfire_mount_list(jail
->pakfire
);
1484 static int pakfire_jail_setup_loopback(struct pakfire_jail
* jail
) {
1485 struct nl_sock
* nl
= NULL
;
1486 struct nl_cache
* cache
= NULL
;
1487 struct rtnl_link
* link
= NULL
;
1488 struct rtnl_link
* change
= NULL
;
1491 DEBUG(jail
->pakfire
, "Setting up loopback...\n");
1493 // Allocate a netlink socket
1494 nl
= nl_socket_alloc();
1496 ERROR(jail
->pakfire
, "Could not allocate a netlink socket: %m\n");
1501 // Connect the socket
1502 r
= nl_connect(nl
, NETLINK_ROUTE
);
1504 ERROR(jail
->pakfire
, "Could not connect netlink socket: %s\n", nl_geterror(r
));
1508 // Allocate the netlink cache
1509 r
= rtnl_link_alloc_cache(nl
, AF_UNSPEC
, &cache
);
1511 ERROR(jail
->pakfire
, "Unable to allocate netlink cache: %s\n", nl_geterror(r
));
1515 // Fetch loopback interface
1516 link
= rtnl_link_get_by_name(cache
, "lo");
1518 ERROR(jail
->pakfire
, "Could not find lo interface. Ignoring.\n");
1523 // Allocate a new link
1524 change
= rtnl_link_alloc();
1526 ERROR(jail
->pakfire
, "Could not allocate change link\n");
1531 // Set the link to UP
1532 rtnl_link_set_flags(change
, IFF_UP
);
1534 // Apply any changes
1535 r
= rtnl_link_change(nl
, link
, change
, 0);
1537 ERROR(jail
->pakfire
, "Unable to activate loopback: %s\n", nl_geterror(r
));
1553 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail
* jail
, pid_t pid
) {
1554 char path
[PATH_MAX
];
1557 // Skip mapping anything when running on /
1558 if (pakfire_on_root(jail
->pakfire
))
1562 r
= pakfire_string_format(path
, "/proc/%d/uid_map", pid
);
1567 const uid_t uid
= pakfire_uid(jail
->pakfire
);
1570 const struct pakfire_subid
* subuid
= pakfire_subuid(jail
->pakfire
);
1574 /* When running as root, we will map the entire range.
1576 When running as a non-privileged user, we will map the root user inside the jail
1577 to the user's UID outside of the jail, and we will map the rest starting from one.
1582 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1583 "0 %lu %lu\n", subuid
->id
, subuid
->length
);
1585 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1586 "0 %lu 1\n1 %lu %lu\n", uid
, subuid
->id
, subuid
->length
);
1590 ERROR(jail
->pakfire
, "Could not map UIDs: %m\n");
1597 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail
* jail
, pid_t pid
) {
1598 char path
[PATH_MAX
];
1601 // Skip mapping anything when running on /
1602 if (pakfire_on_root(jail
->pakfire
))
1606 const gid_t gid
= pakfire_gid(jail
->pakfire
);
1609 const struct pakfire_subid
* subgid
= pakfire_subgid(jail
->pakfire
);
1614 r
= pakfire_string_format(path
, "/proc/%d/gid_map", pid
);
1620 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1621 "0 %lu %lu\n", subgid
->id
, subgid
->length
);
1623 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1624 "0 %lu 1\n1 %lu %lu\n", gid
, subgid
->id
, subgid
->length
);
1628 ERROR(jail
->pakfire
, "Could not map GIDs: %m\n");
1635 static int pakfire_jail_setgroups(struct pakfire_jail
* jail
, pid_t pid
) {
1636 char path
[PATH_MAX
];
1640 r
= pakfire_string_format(path
, "/proc/%d/setgroups", pid
);
1644 // Open file for writing
1645 FILE* f
= fopen(path
, "w");
1647 ERROR(jail
->pakfire
, "Could not open %s for writing: %m\n", path
);
1652 int bytes_written
= fprintf(f
, "deny\n");
1653 if (bytes_written
<= 0) {
1654 ERROR(jail
->pakfire
, "Could not write to %s: %m\n", path
);
1661 ERROR(jail
->pakfire
, "Could not close %s: %m\n", path
);
1672 static int pakfire_jail_send_signal(struct pakfire_jail
* jail
, int fd
) {
1673 const uint64_t val
= 1;
1676 DEBUG(jail
->pakfire
, "Sending signal...\n");
1678 // Write to the file descriptor
1679 r
= eventfd_write(fd
, val
);
1681 ERROR(jail
->pakfire
, "Could not send signal: %s\n", strerror(errno
));
1685 // Close the file descriptor
1691 static int pakfire_jail_wait_for_signal(struct pakfire_jail
* jail
, int fd
) {
1695 DEBUG(jail
->pakfire
, "Waiting for signal...\n");
1697 r
= eventfd_read(fd
, &val
);
1699 ERROR(jail
->pakfire
, "Error waiting for signal: %s\n", strerror(errno
));
1703 // Close the file descriptor
1709 static int pakfire_jail_switch_root(struct pakfire_jail
* jail
, const char* root
) {
1712 // Change to the new root
1715 ERROR(jail
->pakfire
, "chdir(%s) failed: %m\n", root
);
1720 r
= pivot_root(".", ".");
1722 ERROR(jail
->pakfire
, "Failed changing into the new root directory %s: %m\n", root
);
1726 // Umount the old root
1727 r
= umount2(".", MNT_DETACH
);
1729 ERROR(jail
->pakfire
, "Could not umount the old root filesystem: %m\n");
1737 Called by the parent that sets up the second child process...
1739 static int pakfire_jail_setup_child2(
1740 struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
1745 r
= pidfd_get_pid(ctx
->pidfd2
, &pid
);
1747 CTX_ERROR(jail
->ctx
, "Could not fetch PID: %s\n", strerror(-r
));
1751 // Setup UID mapping
1752 r
= pakfire_jail_setup_uid_mapping(jail
, pid
);
1756 // Write "deny" to /proc/PID/setgroups
1757 r
= pakfire_jail_setgroups(jail
, pid
);
1761 // Setup GID mapping
1762 r
= pakfire_jail_setup_gid_mapping(jail
, pid
);
1766 // Parent has finished initialisation
1767 DEBUG(jail
->pakfire
, "Parent has finished initialization\n");
1769 // Send signal to client
1770 r
= pakfire_jail_send_signal(jail
, ctx
->completed_fd
);
1778 Child 2 is launched in their own user/mount/etc. namespace.
1780 static int pakfire_jail_child2(struct pakfire_jail
* jail
,
1781 struct pakfire_jail_exec
* ctx
, const char* argv
[]) {
1785 pid_t pid
= getpid();
1787 CTX_DEBUG(jail
->ctx
, "Launched child process in jail with PID %d\n", pid
);
1790 r
= prctl(PR_SET_PDEATHSIG
, SIGKILL
, 0, 0, 0);
1792 CTX_ERROR(jail
->ctx
, "Could not configure to die with parent: %m\n");
1796 // Make this process dumpable
1797 r
= prctl (PR_SET_DUMPABLE
, 1, 0, 0, 0);
1799 CTX_ERROR(jail
->ctx
, "Could not make the process dumpable: %m\n");
1803 // Don't drop any capabilities on setuid()
1804 r
= prctl(PR_SET_KEEPCAPS
, 1);
1806 CTX_ERROR(jail
->ctx
, "Could not set PR_SET_KEEPCAPS: %m\n");
1810 // Wait for the parent to finish initialization
1811 r
= pakfire_jail_wait_for_signal(jail
, ctx
->completed_fd
);
1816 uid_t uid
= getuid();
1817 gid_t gid
= getgid();
1820 uid_t euid
= geteuid();
1821 gid_t egid
= getegid();
1823 DEBUG(jail
->pakfire
, " UID: %u (effective %u)\n", uid
, euid
);
1824 DEBUG(jail
->pakfire
, " GID: %u (effective %u)\n", gid
, egid
);
1826 // Fail if we are not PID 1
1828 CTX_ERROR(jail
->ctx
, "Child process is not PID 1\n");
1832 // Fail if we are not running as root
1833 if (uid
|| gid
|| euid
|| egid
) {
1834 ERROR(jail
->pakfire
, "Child process is not running as root\n");
1838 const char* arch
= pakfire_get_effective_arch(jail
->pakfire
);
1841 unsigned long persona
= pakfire_arch_personality(arch
);
1843 r
= personality(persona
);
1845 ERROR(jail
->pakfire
, "Could not set personality (%x)\n", (unsigned int)persona
);
1851 if (!pakfire_jail_exec_has_flag(ctx
, PAKFIRE_JAIL_HAS_NETWORKING
)) {
1852 r
= pakfire_jail_setup_loopback(jail
);
1859 DEBUG(jail
->pakfire
, "Setting nice level to %d\n", jail
->nice
);
1861 r
= setpriority(PRIO_PROCESS
, pid
, jail
->nice
);
1863 ERROR(jail
->pakfire
, "Could not set nice level: %m\n");
1868 // Close other end of log pipes
1869 close(ctx
->pipes
.log_INFO
[0]);
1870 close(ctx
->pipes
.log_ERROR
[0]);
1872 close(ctx
->pipes
.log_DEBUG
[0]);
1873 #endif /* ENABLE_DEBUG */
1875 // Connect standard input
1876 if (ctx
->pipes
.stdin
[0] >= 0) {
1877 r
= dup2(ctx
->pipes
.stdin
[0], STDIN_FILENO
);
1879 ERROR(jail
->pakfire
, "Could not connect fd %d to stdin: %m\n",
1880 ctx
->pipes
.stdin
[0]);
1886 // Connect standard output and error
1887 if (ctx
->pipes
.stdout
[1] >= 0 && ctx
->pipes
.stderr
[1] >= 0) {
1888 r
= dup2(ctx
->pipes
.stdout
[1], STDOUT_FILENO
);
1890 ERROR(jail
->pakfire
, "Could not connect fd %d to stdout: %m\n",
1891 ctx
->pipes
.stdout
[1]);
1896 r
= dup2(ctx
->pipes
.stderr
[1], STDERR_FILENO
);
1898 ERROR(jail
->pakfire
, "Could not connect fd %d to stderr: %m\n",
1899 ctx
->pipes
.stderr
[1]);
1904 // Close the pipe (as we have moved the original file descriptors)
1905 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stdin
);
1906 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stdout
);
1907 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stderr
);
1910 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1911 r
= pakfire_rlimit_reset_nofile(jail
->pakfire
);
1916 r
= pakfire_jail_set_capabilities(jail
);
1920 // Show capabilities
1921 r
= pakfire_jail_show_capabilities(jail
);
1926 r
= pakfire_jail_limit_syscalls(jail
);
1930 CTX_DEBUG(jail
->ctx
, "Child process initialization done\n");
1931 CTX_DEBUG(jail
->ctx
, "Launching command:\n");
1934 for (unsigned int i
= 0; argv
[i
]; i
++)
1935 CTX_DEBUG(jail
->ctx
, " argv[%u] = %s\n", i
, argv
[i
]);
1938 r
= execvpe(argv
[0], (char**)argv
, jail
->env
);
1940 // Translate errno into regular exit code
1943 // Ignore if the command doesn't exist
1944 if (ctx
->flags
& PAKFIRE_JAIL_NOENT_OK
)
1955 CTX_ERROR(jail
->ctx
, "Could not execve(%s): %m\n", argv
[0]);
1958 // We should not get here
1963 Child 1 is launched in a new mount namespace...
1965 static int pakfire_jail_child1(struct pakfire_jail
* jail
,
1966 struct pakfire_jail_exec
* ctx
, const char* argv
[]) {
1969 // Redirect any logging to our log pipe
1970 pakfire_ctx_set_log_callback(jail
->ctx
, pakfire_jail_log_redirect
, &ctx
->pipes
);
1972 CTX_DEBUG(jail
->ctx
, "First child process launched\n");
1974 const int socket_send
= pakfire_jail_get_pipe_to_write(jail
, &ctx
->socket
);
1976 const char* root
= pakfire_get_path(jail
->pakfire
);
1979 r
= prctl(PR_SET_PDEATHSIG
, SIGKILL
, 0, 0, 0);
1981 CTX_ERROR(jail
->ctx
, "Could not configure to die with parent: %s\n", strerror(errno
));
1985 // Change mount propagation so that we will receive, but don't propagate back
1986 r
= pakfire_mount_change_propagation(jail
->ctx
, "/", MS_SLAVE
);
1988 CTX_ERROR(jail
->ctx
, "Could not change mount propagation to SLAVE: %s\n", strerror(r
));
1992 // Make root a mountpoint in the new mount namespace
1993 r
= pakfire_mount_make_mounpoint(jail
->pakfire
, root
);
1997 // Make everything private
1998 r
= pakfire_mount_change_propagation(jail
->ctx
, root
, MS_PRIVATE
);
2000 CTX_ERROR(jail
->ctx
, "Could not change mount propagation to PRIVATE: %s\n", strerror(r
));
2005 r
= pakfire_jail_mount(jail
, ctx
);
2010 r
= pakfire_jail_switch_root(jail
, root
);
2014 // Change mount propagation so that we will propagate everything down
2015 r
= pakfire_mount_change_propagation(jail
->ctx
, "/", MS_SHARED
);
2017 CTX_ERROR(jail
->ctx
, "Could not change mount propagation to SHARED: %s\n", strerror(r
));
2021 // Configure child process
2022 struct clone_args args
= {
2032 .exit_signal
= SIGCHLD
,
2033 .pidfd
= (long long unsigned int)&ctx
->pidfd2
,
2036 // Launch the process into the configured cgroup
2038 args
.flags
|= CLONE_INTO_CGROUP
;
2040 // Clone into this cgroup
2041 args
.cgroup
= pakfire_cgroup_fd(ctx
->cgroup
);
2045 if (!pakfire_jail_exec_has_flag(ctx
, PAKFIRE_JAIL_HAS_NETWORKING
))
2046 args
.flags
|= CLONE_NEWNET
;
2048 // Fork the second child process
2049 pid_t pid
= clone3(&args
, sizeof(args
));
2051 CTX_ERROR(jail
->ctx
, "Could not fork the first child process: %s\n", strerror(errno
));
2056 } else if (pid
== 0) {
2057 r
= pakfire_jail_child2(jail
, ctx
, argv
);
2061 // Send the pidfd of the child to the first parent
2062 r
= pakfire_jail_send_fd(jail
, socket_send
, ctx
->pidfd2
);
2070 // Run a command in the jail
2071 static int __pakfire_jail_exec(struct pakfire_jail
* jail
, const char* argv
[],
2072 const int interactive
,
2073 pakfire_jail_communicate_in communicate_in
,
2074 pakfire_jail_communicate_out communicate_out
,
2075 void* data
, int flags
) {
2078 // Check if argv is valid
2079 if (!argv
|| !argv
[0]) {
2084 // Initialize context for this call
2085 struct pakfire_jail_exec ctx
= {
2088 .socket
= { -1, -1 },
2091 .stdin
= { -1, -1 },
2092 .stdout
= { -1, -1 },
2093 .stderr
= { -1, -1 },
2094 .log_INFO
= { -1, -1 },
2095 .log_ERROR
= { -1, -1 },
2097 .log_DEBUG
= { -1, -1 },
2098 #endif /* ENABLE_DEBUG */
2102 .in
= communicate_in
,
2103 .out
= communicate_out
,
2112 DEBUG(jail
->pakfire
, "Executing jail...\n");
2114 // Become the subreaper
2115 r
= prctl(PR_SET_CHILD_SUBREAPER
, 1, 0, 0, 0);
2117 CTX_ERROR(jail
->ctx
, "Failed to become the sub-reaper: %s\n", strerror(errno
));
2122 // Enable networking in interactive mode
2124 ctx
.flags
|= PAKFIRE_JAIL_HAS_NETWORKING
;
2126 // Create a UNIX domain socket
2127 r
= socketpair(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
, 0, ctx
.socket
);
2129 CTX_ERROR(jail
->ctx
, "Could not create UNIX socket: %s\n", strerror(errno
));
2135 Setup a file descriptor which can be used to notify the client that the parent
2136 has completed configuration.
2138 ctx
.completed_fd
= eventfd(0, EFD_CLOEXEC
);
2139 if (ctx
.completed_fd
< 0) {
2140 ERROR(jail
->pakfire
, "eventfd() failed: %m\n");
2144 // Create pipes to communicate with child process if we are not running interactively
2146 // stdin (only if callback is set)
2147 if (ctx
.communicate
.in
) {
2148 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stdin
, 0);
2154 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stdout
, 0);
2159 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stderr
, 0);
2164 // Setup pipes for logging
2166 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_INFO
, O_CLOEXEC
);
2171 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_ERROR
, O_CLOEXEC
);
2177 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_DEBUG
, O_CLOEXEC
);
2180 #endif /* ENABLE_DEBUG */
2182 // Launch the process in a cgroup that is a leaf of the configured cgroup
2185 const char* uuid
= pakfire_jail_uuid(jail
);
2187 // Create a temporary cgroup
2188 r
= pakfire_cgroup_child(&ctx
.cgroup
, jail
->cgroup
, uuid
, 0);
2190 ERROR(jail
->pakfire
, "Could not create cgroup for jail: %m\n");
2196 Initially, we will set up a new mount namespace and launch a child process in it.
2198 This process remains in the user/ipc/time/etc. namespace and will set up
2199 the mount namespace.
2202 // Configure child process
2203 struct clone_args args
= {
2207 CLONE_CLEAR_SIGHAND
,
2208 .exit_signal
= SIGCHLD
,
2209 .pidfd
= (long long unsigned int)&ctx
.pidfd1
,
2212 // Fork the first child process
2213 pid_t pid
= clone3(&args
, sizeof(args
));
2215 CTX_ERROR(jail
->ctx
, "Could not fork the first child process: %s\n", strerror(errno
));
2220 } else if (pid
== 0) {
2221 r
= pakfire_jail_child1(jail
, &ctx
, argv
);
2226 r
= pakfire_jail_wait(jail
, &ctx
);
2231 // Destroy the temporary cgroup (if any)
2234 // XXX this is currently disabled because it overwrites r
2235 // Read cgroup stats
2236 r
= pakfire_cgroup_stat(ctx
.cgroup
, &ctx
.cgroup_stats
);
2238 ERROR(jail
->pakfire
, "Could not read cgroup stats: %m\n");
2240 pakfire_cgroup_stat_dump(ctx
.cgroup
, &ctx
.cgroup_stats
);
2244 pakfire_cgroup_destroy(ctx
.cgroup
);
2245 pakfire_cgroup_unref(ctx
.cgroup
);
2248 // Close any file descriptors
2249 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stdin
);
2250 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stdout
);
2251 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stderr
);
2252 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_INFO
);
2253 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_ERROR
);
2255 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_DEBUG
);
2256 #endif /* ENABLE_DEBUG */
2257 if (ctx
.pidfd1
>= 0)
2259 if (ctx
.pidfd2
>= 0)
2263 pakfire_jail_close_pipe(jail
, ctx
.socket
);
2268 PAKFIRE_EXPORT
int pakfire_jail_exec(
2269 struct pakfire_jail
* jail
,
2271 pakfire_jail_communicate_in callback_in
,
2272 pakfire_jail_communicate_out callback_out
,
2273 void* data
, int flags
) {
2274 return __pakfire_jail_exec(jail
, argv
, 0, callback_in
, callback_out
, data
, flags
);
2277 static int pakfire_jail_exec_interactive(
2278 struct pakfire_jail
* jail
, const char* argv
[], int flags
) {
2281 // Setup interactive stuff
2282 r
= pakfire_jail_setup_interactive_env(jail
);
2286 return __pakfire_jail_exec(jail
, argv
, 1, NULL
, NULL
, NULL
, flags
);
2289 int pakfire_jail_exec_script(struct pakfire_jail
* jail
,
2293 pakfire_jail_communicate_in callback_in
,
2294 pakfire_jail_communicate_out callback_out
,
2296 char path
[PATH_MAX
];
2297 const char** argv
= NULL
;
2301 const char* root
= pakfire_get_path(jail
->pakfire
);
2303 // Write the scriptlet to disk
2304 r
= pakfire_path_append(path
, root
, PAKFIRE_TMP_DIR
"/pakfire-script.XXXXXX");
2308 // Create a temporary file
2309 f
= pakfire_mktemp(path
, 0700);
2311 ERROR(jail
->pakfire
, "Could not create temporary file: %m\n");
2315 DEBUG(jail
->pakfire
, "Writing script to %s:\n%.*s\n", path
, (int)size
, script
);
2318 r
= fprintf(f
, "%s", script
);
2320 ERROR(jail
->pakfire
, "Could not write script to file %s: %m\n", path
);
2327 ERROR(jail
->pakfire
, "Could not close script file %s: %m\n", path
);
2333 // Count how many arguments were passed
2334 unsigned int argc
= 1;
2336 for (const char** arg
= args
; *arg
; arg
++)
2340 argv
= calloc(argc
+ 1, sizeof(*argv
));
2342 ERROR(jail
->pakfire
, "Could not allocate argv: %m\n");
2347 argv
[0] = (root
) ? pakfire_path_relpath(root
, path
) : path
;
2350 for (unsigned int i
= 1; i
< argc
; i
++)
2351 argv
[i
] = args
[i
-1];
2354 r
= pakfire_jail_exec(jail
, argv
, callback_in
, callback_out
, data
, 0);
2362 // Remove script from disk
2370 A convenience function that creates a new jail, runs the given command and destroys
2373 int pakfire_jail_run(struct pakfire
* pakfire
, const char* argv
[], int flags
, char** output
) {
2374 struct pakfire_jail
* jail
= NULL
;
2377 // Create a new jail
2378 r
= pakfire_jail_create(&jail
, pakfire
);
2382 // Execute the command
2383 r
= pakfire_jail_exec(jail
, argv
, NULL
, pakfire_jail_capture_stdout
, output
, 0);
2387 pakfire_jail_unref(jail
);
2392 int pakfire_jail_run_script(struct pakfire
* pakfire
,
2393 const char* script
, const size_t length
, const char* argv
[], int flags
) {
2394 struct pakfire_jail
* jail
= NULL
;
2397 // Create a new jail
2398 r
= pakfire_jail_create(&jail
, pakfire
);
2402 // Execute the command
2403 r
= pakfire_jail_exec_script(jail
, script
, length
, argv
, NULL
, NULL
, NULL
);
2407 pakfire_jail_unref(jail
);
2412 int pakfire_jail_shell(struct pakfire_jail
* jail
) {
2415 const char* argv
[] = {
2416 "/bin/bash", "--login", NULL
,
2419 // Execute /bin/bash
2420 r
= pakfire_jail_exec_interactive(jail
, argv
, 0);
2426 // Ignore any return codes from the shell
2430 static int pakfire_jail_run_if_possible(struct pakfire
* pakfire
, const char** argv
) {
2431 char path
[PATH_MAX
];
2434 r
= pakfire_path(pakfire
, path
, "%s", *argv
);
2438 // Check if the file is executable
2439 r
= access(path
, X_OK
);
2441 DEBUG(pakfire
, "%s is not executable. Skipping...\n", *argv
);
2445 return pakfire_jail_run(pakfire
, argv
, 0, NULL
);
2448 int pakfire_jail_ldconfig(struct pakfire
* pakfire
) {
2449 const char* argv
[] = {
2454 return pakfire_jail_run_if_possible(pakfire
, argv
);
2457 int pakfire_jail_run_systemd_tmpfiles(struct pakfire
* pakfire
) {
2458 const char* argv
[] = {
2459 "/usr/bin/systemd-tmpfiles",
2464 return pakfire_jail_run_if_possible(pakfire
, argv
);