1 /*#############################################################################
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
19 #############################################################################*/
23 #include <linux/capability.h>
24 #include <linux/sched.h>
26 #include <linux/wait.h>
31 #include <sys/capability.h>
32 #include <sys/epoll.h>
33 #include <sys/eventfd.h>
34 #include <sys/mount.h>
35 #include <sys/personality.h>
36 #include <sys/prctl.h>
37 #include <sys/resource.h>
38 #include <sys/signalfd.h>
39 #include <sys/timerfd.h>
40 #include <sys/types.h>
45 #include <netlink/route/link.h>
53 #include <pakfire/arch.h>
54 #include <pakfire/cgroup.h>
55 #include <pakfire/jail.h>
56 #include <pakfire/logging.h>
57 #include <pakfire/mount.h>
58 #include <pakfire/os.h>
59 #include <pakfire/pakfire.h>
60 #include <pakfire/path.h>
61 #include <pakfire/private.h>
62 #include <pakfire/pwd.h>
63 #include <pakfire/string.h>
64 #include <pakfire/util.h>
66 #define BUFFER_SIZE 1024 * 64
67 #define ENVIRON_SIZE 128
68 #define EPOLL_MAX_EVENTS 2
69 #define MAX_MOUNTPOINTS 8
71 // The default environment that will be set for every command
72 static const struct environ
{
77 { "LANG", "C.utf-8" },
78 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
81 // Tell everything that it is running inside a Pakfire container
82 { "container", "pakfire" },
86 struct pakfire_jail_mountpoint
{
87 char source
[PATH_MAX
];
88 char target
[PATH_MAX
];
93 struct pakfire_ctx
* ctx
;
94 struct pakfire
* pakfire
;
97 // A unique ID for each jail
99 char __uuid
[UUID_STR_LEN
];
105 struct itimerspec timeout
;
108 struct pakfire_cgroup
* cgroup
;
111 char* env
[ENVIRON_SIZE
];
114 struct pakfire_jail_mountpoint mountpoints
[MAX_MOUNTPOINTS
];
115 unsigned int num_mountpoints
;
118 struct pakfire_jail_callbacks
{
120 pakfire_jail_log_callback log
;
125 struct pakfire_log_buffer
{
126 char data
[BUFFER_SIZE
];
130 struct pakfire_jail_exec
{
133 // PIDs (of the children)
137 // Socket to pass FDs
140 // FD to notify the client that the parent has finished initialization
144 struct pakfire_jail_pipes
{
154 #endif /* ENABLE_DEBUG */
158 struct pakfire_jail_communicate
{
159 pakfire_jail_communicate_in in
;
160 pakfire_jail_communicate_out out
;
165 struct pakfire_jail_buffers
{
166 struct pakfire_log_buffer stdout
;
167 struct pakfire_log_buffer stderr
;
170 struct pakfire_log_buffer log_INFO
;
171 struct pakfire_log_buffer log_ERROR
;
173 struct pakfire_log_buffer log_DEBUG
;
174 #endif /* ENABLE_DEBUG */
177 struct pakfire_cgroup
* cgroup
;
178 struct pakfire_cgroup_stats cgroup_stats
;
181 static int clone3(struct clone_args
* args
, size_t size
) {
182 return syscall(__NR_clone3
, args
, size
);
185 static int pidfd_send_signal(int pidfd
, int sig
, siginfo_t
* info
, unsigned int flags
) {
186 return syscall(SYS_pidfd_send_signal
, pidfd
, sig
, info
, flags
);
189 static int pivot_root(const char* new_root
, const char* old_root
) {
190 return syscall(SYS_pivot_root
, new_root
, old_root
);
193 static int pakfire_jail_exec_has_flag(
194 const struct pakfire_jail_exec
* ctx
, const enum pakfire_jail_exec_flags flag
) {
195 return ctx
->flags
& flag
;
198 static void pakfire_jail_free(struct pakfire_jail
* jail
) {
199 DEBUG(jail
->pakfire
, "Freeing jail at %p\n", jail
);
202 for (unsigned int i
= 0; jail
->env
[i
]; i
++)
206 pakfire_cgroup_unref(jail
->cgroup
);
208 pakfire_unref(jail
->pakfire
);
210 pakfire_ctx_unref(jail
->ctx
);
215 Passes any log messages on to the default pakfire log callback
217 static int pakfire_jail_default_log_callback(struct pakfire
* pakfire
, void* data
,
218 int priority
, const char* line
, size_t length
) {
221 INFO(pakfire
, "%s", line
);
225 ERROR(pakfire
, "%s", line
);
230 DEBUG(pakfire
, "%s", line
);
238 static const char* pakfire_jail_uuid(struct pakfire_jail
* jail
) {
240 uuid_unparse_lower(jail
->uuid
, jail
->__uuid
);
245 static int pakfire_jail_setup_interactive_env(struct pakfire_jail
* jail
) {
247 int r
= pakfire_jail_set_env(jail
, "PS1", "pakfire-jail \\w> ");
252 char* TERM
= secure_getenv("TERM");
254 r
= pakfire_jail_set_env(jail
, "TERM", TERM
);
260 char* LANG
= secure_getenv("LANG");
262 r
= pakfire_jail_set_env(jail
, "LANG", LANG
);
270 PAKFIRE_EXPORT
int pakfire_jail_create(struct pakfire_jail
** jail
, struct pakfire
* pakfire
) {
273 const char* arch
= pakfire_get_effective_arch(pakfire
);
275 // Allocate a new jail
276 struct pakfire_jail
* j
= calloc(1, sizeof(*j
));
281 j
->ctx
= pakfire_ctx(pakfire
);
284 j
->pakfire
= pakfire_ref(pakfire
);
286 // Initialize reference counter
289 // Generate a random UUID
290 uuid_generate_random(j
->uuid
);
292 DEBUG(j
->pakfire
, "Allocated new jail at %p\n", j
);
294 // Set the default logging callback
295 pakfire_jail_set_log_callback(j
, pakfire_jail_default_log_callback
, NULL
);
297 // Set default environment
298 for (const struct environ
* e
= ENV
; e
->key
; e
++) {
299 r
= pakfire_jail_set_env(j
, e
->key
, e
->val
);
304 // Enable all CPU features that CPU has to offer
305 if (!pakfire_arch_is_supported_by_host(arch
)) {
306 r
= pakfire_jail_set_env(j
, "QEMU_CPU", "max");
311 // Set container UUID
312 r
= pakfire_jail_set_env(j
, "container_uuid", pakfire_jail_uuid(j
));
316 // Disable systemctl to talk to systemd
317 if (!pakfire_on_root(j
->pakfire
)) {
318 r
= pakfire_jail_set_env(j
, "SYSTEMD_OFFLINE", "1");
328 pakfire_jail_free(j
);
333 PAKFIRE_EXPORT
struct pakfire_jail
* pakfire_jail_ref(struct pakfire_jail
* jail
) {
339 PAKFIRE_EXPORT
struct pakfire_jail
* pakfire_jail_unref(struct pakfire_jail
* jail
) {
340 if (--jail
->nrefs
> 0)
343 pakfire_jail_free(jail
);
349 PAKFIRE_EXPORT
void pakfire_jail_set_log_callback(struct pakfire_jail
* jail
,
350 pakfire_jail_log_callback callback
, void* data
) {
351 jail
->callbacks
.log
= callback
;
352 jail
->callbacks
.log_data
= data
;
357 PAKFIRE_EXPORT
int pakfire_jail_nice(struct pakfire_jail
* jail
, int nice
) {
358 // Check if nice level is in range
359 if (nice
< -19 || nice
> 20) {
370 int pakfire_jail_set_cgroup(struct pakfire_jail
* jail
, struct pakfire_cgroup
* cgroup
) {
371 // Free any previous cgroup
373 pakfire_cgroup_unref(jail
->cgroup
);
377 // Set any new cgroup
379 DEBUG(jail
->pakfire
, "Setting cgroup %p\n", cgroup
);
381 jail
->cgroup
= pakfire_cgroup_ref(cgroup
);
390 // Returns the length of the environment
391 static unsigned int pakfire_jail_env_length(struct pakfire_jail
* jail
) {
394 // Count everything in the environment
395 for (char** e
= jail
->env
; *e
; e
++)
401 // Finds an existing environment variable and returns its index or -1 if not found
402 static int pakfire_jail_find_env(struct pakfire_jail
* jail
, const char* key
) {
408 const size_t length
= strlen(key
);
410 for (unsigned int i
= 0; jail
->env
[i
]; i
++) {
411 if ((pakfire_string_startswith(jail
->env
[i
], key
)
412 && *(jail
->env
[i
] + length
) == '=')) {
421 // Returns the value of an environment variable or NULL
422 PAKFIRE_EXPORT
const char* pakfire_jail_get_env(struct pakfire_jail
* jail
,
424 int i
= pakfire_jail_find_env(jail
, key
);
428 return jail
->env
[i
] + strlen(key
) + 1;
431 // Sets an environment variable
432 PAKFIRE_EXPORT
int pakfire_jail_set_env(struct pakfire_jail
* jail
,
433 const char* key
, const char* value
) {
434 // Find the index where to write this value to
435 int i
= pakfire_jail_find_env(jail
, key
);
437 i
= pakfire_jail_env_length(jail
);
439 // Return -ENOSPC when the environment is full
440 if (i
>= ENVIRON_SIZE
) {
445 // Free any previous value
449 // Format and set environment variable
450 asprintf(&jail
->env
[i
], "%s=%s", key
, value
);
452 DEBUG(jail
->pakfire
, "Set environment variable: %s\n", jail
->env
[i
]);
457 // Imports an environment
458 PAKFIRE_EXPORT
int pakfire_jail_import_env(struct pakfire_jail
* jail
, const char* env
[]) {
466 // Copy environment variables
467 for (unsigned int i
= 0; env
[i
]; i
++) {
468 r
= pakfire_string_partition(env
[i
], "=", &key
, &val
);
473 r
= pakfire_jail_set_env(jail
, key
, val
);
490 PAKFIRE_EXPORT
int pakfire_jail_set_timeout(
491 struct pakfire_jail
* jail
, unsigned int timeout
) {
493 jail
->timeout
.it_value
.tv_sec
= timeout
;
496 DEBUG(jail
->pakfire
, "Timeout set to %u second(s)\n", timeout
);
498 DEBUG(jail
->pakfire
, "Timeout disabled\n");
503 static int pakfire_jail_create_timer(struct pakfire_jail
* jail
) {
506 // Nothing to do if no timeout has been set
507 if (!jail
->timeout
.it_value
.tv_sec
)
510 // Create a new timer
511 const int fd
= timerfd_create(CLOCK_MONOTONIC
, 0);
513 ERROR(jail
->pakfire
, "Could not create timer: %m\n");
518 r
= timerfd_settime(fd
, 0, &jail
->timeout
, NULL
);
520 ERROR(jail
->pakfire
, "Could not arm timer: %m\n");
536 static int pakfire_jail_handle_signals(struct pakfire_jail
* jail
) {
541 sigaddset(&mask
, SIGINT
);
544 r
= sigprocmask(SIG_BLOCK
, &mask
, NULL
);
546 ERROR(jail
->pakfire
, "Failed to block signals: %m\n");
550 // Create a file descriptor
551 r
= signalfd(-1, &mask
, SFD_NONBLOCK
|SFD_CLOEXEC
);
553 ERROR(jail
->pakfire
, "Failed to create signalfd: %m\n");
562 This function replaces any logging in the child process.
564 All log messages will be sent to the parent process through their respective pipes.
566 static void pakfire_jail_log_redirect(void* data
, int priority
, const char* file
,
567 int line
, const char* fn
, const char* format
, va_list args
) {
568 struct pakfire_jail_pipes
* pipes
= (struct pakfire_jail_pipes
*)data
;
573 fd
= pipes
->log_INFO
[1];
577 fd
= pipes
->log_ERROR
[1];
582 fd
= pipes
->log_DEBUG
[1];
584 #endif /* ENABLE_DEBUG */
586 // Ignore any messages of an unknown priority
591 // Send the log message
593 vdprintf(fd
, format
, args
);
596 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer
* buffer
) {
597 return (sizeof(buffer
->data
) == buffer
->used
);
601 This function reads as much data as it can from the file descriptor.
602 If it finds a whole line in it, it will send it to the logger and repeat the process.
603 If not newline character is found, it will try to read more data until it finds one.
605 static int pakfire_jail_handle_log(struct pakfire_jail
* jail
,
606 struct pakfire_jail_exec
* ctx
, int priority
, int fd
,
607 struct pakfire_log_buffer
* buffer
, pakfire_jail_communicate_out callback
, void* data
) {
608 char line
[BUFFER_SIZE
+ 1];
610 // Fill up buffer from fd
611 if (buffer
->used
< sizeof(buffer
->data
)) {
612 ssize_t bytes_read
= read(fd
, buffer
->data
+ buffer
->used
,
613 sizeof(buffer
->data
) - buffer
->used
);
616 if (bytes_read
< 0) {
617 ERROR(jail
->pakfire
, "Could not read from fd %d: %m\n", fd
);
621 // Update buffer size
622 buffer
->used
+= bytes_read
;
625 // See if we have any lines that we can write
626 while (buffer
->used
) {
627 // Search for the end of the first line
628 char* eol
= memchr(buffer
->data
, '\n', buffer
->used
);
632 // If the buffer is full, we send the content to the logger and try again
633 // This should not happen in practise
634 if (pakfire_jail_log_buffer_is_full(buffer
)) {
635 DEBUG(jail
->pakfire
, "Logging buffer is full. Sending all content\n");
637 eol
= buffer
->data
+ sizeof(buffer
->data
) - 1;
639 // Otherwise we might have only read parts of the output
644 // Find the length of the string
645 size_t length
= eol
- buffer
->data
+ 1;
647 // Copy the line into the buffer
648 memcpy(line
, buffer
->data
, length
);
650 // Terminate the string
655 int r
= callback(jail
->pakfire
, data
, priority
, line
, length
);
657 ERROR(jail
->pakfire
, "The logging callback returned an error: %d\n", r
);
662 // Remove line from buffer
663 memmove(buffer
->data
, buffer
->data
+ length
, buffer
->used
- length
);
664 buffer
->used
-= length
;
670 static int pakfire_jail_stream_stdin(struct pakfire_jail
* jail
,
671 struct pakfire_jail_exec
* ctx
, const int fd
) {
674 // Nothing to do if there is no stdin callback set
675 if (!ctx
->communicate
.in
) {
676 DEBUG(jail
->pakfire
, "Callback for standard input is not set\n");
680 // Skip if the writing pipe has already been closed
681 if (!ctx
->pipes
.stdin
[1])
684 DEBUG(jail
->pakfire
, "Streaming standard input...\n");
686 // Calling the callback
687 r
= ctx
->communicate
.in(jail
->pakfire
, ctx
->communicate
.data
, fd
);
689 DEBUG(jail
->pakfire
, "Standard input callback finished: %d\n", r
);
691 // The callback signaled that it has written everything
693 DEBUG(jail
->pakfire
, "Closing standard input pipe\n");
695 // Close the file-descriptor
698 // Reset the file-descriptor so it won't be closed again later
699 ctx
->pipes
.stdin
[1] = -1;
708 static int pakfire_jail_setup_pipe(struct pakfire_jail
* jail
, int (*fds
)[2], const int flags
) {
709 int r
= pipe2(*fds
, flags
);
711 ERROR(jail
->pakfire
, "Could not setup pipe: %m\n");
718 static void pakfire_jail_close_pipe(struct pakfire_jail
* jail
, int fds
[2]) {
719 for (unsigned int i
= 0; i
< 2; i
++)
725 This is a convenience function to fetch the reading end of a pipe and
726 closes the write end.
728 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail
* jail
, int (*fds
)[2]) {
729 // Give the variables easier names to avoid confusion
730 int* fd_read
= &(*fds
)[0];
731 int* fd_write
= &(*fds
)[1];
733 // Close the write end of the pipe
734 if (*fd_write
>= 0) {
739 // Return the read end
746 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail
* jail
, int (*fds
)[2]) {
747 // Give the variables easier names to avoid confusion
748 int* fd_read
= &(*fds
)[0];
749 int* fd_write
= &(*fds
)[1];
751 // Close the read end of the pipe
757 // Return the write end
764 static int pakfire_jail_recv_fd(struct pakfire_jail
* jail
, int socket
, int* fd
) {
765 const size_t payload_length
= sizeof(fd
);
766 char buffer
[CMSG_SPACE(payload_length
)];
769 struct msghdr msg
= {
770 .msg_control
= buffer
,
771 .msg_controllen
= sizeof(buffer
),
774 // Receive the message
775 r
= recvmsg(socket
, &msg
, 0);
777 CTX_ERROR(jail
->ctx
, "Could not receive file descriptor: %s\n", strerror(errno
));
782 struct cmsghdr
* cmsg
= CMSG_FIRSTHDR(&msg
);
786 *fd
= *((int*)CMSG_DATA(cmsg
));
788 CTX_DEBUG(jail
->ctx
, "Received fd %d from socket %d\n", *fd
, socket
);
793 static int pakfire_jail_send_fd(struct pakfire_jail
* jail
, int socket
, int fd
) {
794 const size_t payload_length
= sizeof(fd
);
795 char buffer
[CMSG_SPACE(payload_length
)];
798 CTX_DEBUG(jail
->ctx
, "Sending fd %d to socket %d\n", fd
, socket
);
801 struct msghdr msg
= {
802 .msg_control
= buffer
,
803 .msg_controllen
= sizeof(buffer
),
807 struct cmsghdr
* cmsg
= CMSG_FIRSTHDR(&msg
);
808 cmsg
->cmsg_level
= SOL_SOCKET
;
809 cmsg
->cmsg_type
= SCM_RIGHTS
;
810 cmsg
->cmsg_len
= CMSG_LEN(payload_length
);
813 *((int*)CMSG_DATA(cmsg
)) = fd
;
816 r
= sendmsg(socket
, &msg
, 0);
818 CTX_ERROR(jail
->ctx
, "Could not send file descriptor: %s\n", strerror(errno
));
825 static int pakfire_jail_log(struct pakfire
* pakfire
, void* data
, int priority
,
826 const char* line
, const size_t length
) {
827 // Pass everything to the parent logger
828 pakfire_log_condition(pakfire
, priority
, 0, "%.*s", (int)length
, line
);
833 static int pakfire_jail_epoll_add_fd(struct pakfire_jail
* jail
, int epollfd
, int fd
, int events
) {
834 struct epoll_event event
= {
835 .events
= events
|EPOLLHUP
,
843 int flags
= fcntl(fd
, F_GETFL
, 0);
845 // Set modified flags
846 r
= fcntl(fd
, F_SETFL
, flags
|O_NONBLOCK
);
848 CTX_ERROR(jail
->ctx
, "Could not set file descriptor %d into non-blocking mode: %s\n",
849 fd
, strerror(errno
));
853 // Add the file descriptor to the loop
854 r
= epoll_ctl(epollfd
, EPOLL_CTL_ADD
, fd
, &event
);
856 ERROR(jail
->pakfire
, "Could not add file descriptor %d to epoll(): %s\n",
857 fd
, strerror(errno
));
864 static int pakfire_jail_setup_child2(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
);
866 static int pakfire_jail_wait_on_child(struct pakfire_jail
* jail
, int pidfd
) {
867 siginfo_t status
= {};
870 // Call waitid() and store the result
871 r
= waitid(P_PIDFD
, pidfd
, &status
, WEXITED
);
873 CTX_ERROR(jail
->ctx
, "waitid() failed: %s\n", strerror(errno
));
877 switch (status
.si_code
) {
878 // If the process exited normally, we return the exit code
880 CTX_DEBUG(jail
->ctx
, "The child process exited with code %d\n", status
.si_status
);
881 return status
.si_status
;
884 CTX_ERROR(jail
->ctx
, "The child process was killed\n");
888 CTX_ERROR(jail
->ctx
, "The child process terminated abnormally\n");
893 CTX_ERROR(jail
->ctx
, "Unknown child exit code: %d\n", status
.si_code
);
900 static int pakfire_jail_wait(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
902 struct epoll_event events
[EPOLL_MAX_EVENTS
];
906 // Fetch the UNIX domain socket
907 const int socket_recv
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->socket
);
909 // Fetch file descriptors from context
910 const int stdin
= pakfire_jail_get_pipe_to_write(jail
, &ctx
->pipes
.stdin
);
911 const int stdout
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.stdout
);
912 const int stderr
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.stderr
);
915 const int timerfd
= pakfire_jail_create_timer(jail
);
918 const int log_INFO
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.log_INFO
);
919 const int log_ERROR
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.log_ERROR
);
921 const int log_DEBUG
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.log_DEBUG
);
922 #endif /* ENABLE_DEBUG */
926 const int signalfd
= pakfire_jail_handle_signals(jail
);
929 // Make a list of all file descriptors we are interested in
930 const struct pakfire_wait_fds
{
934 { socket_recv
, EPOLLIN
},
936 // Standard input/output
942 { timerfd
, EPOLLIN
},
945 { ctx
->pidfd1
, EPOLLIN
},
949 { signafd
, EPOLLIN
},
953 { log_INFO
, EPOLLIN
},
954 { log_ERROR
, EPOLLIN
},
956 { log_DEBUG
, EPOLLIN
},
957 #endif /* ENABLE_DEBUG */
964 epollfd
= epoll_create1(0);
966 ERROR(jail
->pakfire
, "Could not initialize epoll(): %m\n");
971 // Turn file descriptors into non-blocking mode and add them to epoll()
972 for (const struct pakfire_wait_fds
* fd
= fds
; fd
->events
; fd
++) {
973 // Skip fds which were not initialized
977 // Add the FD to the event loop
978 r
= pakfire_jail_epoll_add_fd(jail
, epollfd
, fd
->fd
, fd
->events
);
986 CTX_DEBUG(jail
->ctx
, "Launching main loop...\n");
988 // Loop for as long as the process is alive
990 int num
= epoll_wait(epollfd
, events
, EPOLL_MAX_EVENTS
, -1);
992 // Ignore if epoll_wait() has been interrupted
996 ERROR(jail
->pakfire
, "epoll_wait() failed: %m\n");
1002 for (int i
= 0; i
< num
; i
++) {
1003 int e
= events
[i
].events
;
1004 int fd
= events
[i
].data
.fd
;
1006 struct pakfire_log_buffer
* buffer
= NULL
;
1007 pakfire_jail_communicate_out callback
= NULL
;
1011 // Check if there is any data to be read
1013 // Monitor the first child process
1014 if (fd
== ctx
->pidfd1
) {
1015 r
= pakfire_jail_wait_on_child(jail
, ctx
->pidfd1
);
1017 CTX_ERROR(jail
->ctx
, "The first child exited with an error\n");
1026 // Monitor the second child process
1027 } else if (fd
== ctx
->pidfd2
) {
1028 exit
= pakfire_jail_wait_on_child(jail
, ctx
->pidfd2
);
1030 CTX_ERROR(jail
->ctx
, "The second child exited with an error\n");
1037 // Mark that we have ended so that we will process the remaining
1038 // events from epoll() now, but won't restart the outer loop.
1043 // Handle timer events
1044 } else if (fd
== timerfd
) {
1045 DEBUG(jail
->pakfire
, "Timer event received\n");
1048 r
= read(timerfd
, garbage
, sizeof(garbage
));
1050 ERROR(jail
->pakfire
, "Could not disarm timer: %m\n");
1055 // Terminate the process if it hasn't already ended
1057 DEBUG(jail
->pakfire
, "Terminating process...\n");
1059 // Send SIGTERM to the process
1060 r
= pidfd_send_signal(ctx
->pidfd2
, SIGKILL
, NULL
, 0);
1062 ERROR(jail
->pakfire
, "Could not kill process: %m\n");
1067 // There is nothing else to do
1072 } else if (fd
== signalfd
) {
1074 r
= read(signalfd
, &siginfo
, sizeof(siginfo
));
1076 ERROR(jail
->pakfire
, "Could not read signal: %m\n");
1080 DEBUG(jail
->pakfire
, "Received signal %u\n", siginfo
.ssi_signo
);
1083 switch (siginfo
.ssi_signo
) {
1084 // Pass SIGINT down to the child process
1086 r
= pidfd_send_signal(pidfd
, siginfo
.ssi_signo
, NULL
, 0);
1088 ERROR(jail
->pakfire
, "Could not send signal to process: %m\n");
1094 ERROR(jail
->pakfire
, "Received unhandled signal %u\n",
1099 // Don't fall through to log processing
1103 // Handle socket messages
1104 } else if (fd
== socket_recv
) {
1105 // Receive the FD of the second child process
1106 r
= pakfire_jail_recv_fd(jail
, socket_recv
, &ctx
->pidfd2
);
1110 // Add it to the event loop
1111 r
= pakfire_jail_epoll_add_fd(jail
, epollfd
, ctx
->pidfd2
, EPOLLIN
);
1115 // Setup the child process
1116 r
= pakfire_jail_setup_child2(jail
, ctx
);
1120 // Don't fall through to log processing
1123 // Handle logging messages
1124 } else if (fd
== log_INFO
) {
1125 buffer
= &ctx
->buffers
.log_INFO
;
1126 priority
= LOG_INFO
;
1128 callback
= pakfire_jail_log
;
1130 } else if (fd
== log_ERROR
) {
1131 buffer
= &ctx
->buffers
.log_ERROR
;
1134 callback
= pakfire_jail_log
;
1137 } else if (fd
== log_DEBUG
) {
1138 buffer
= &ctx
->buffers
.log_DEBUG
;
1139 priority
= LOG_DEBUG
;
1141 callback
= pakfire_jail_log
;
1142 #endif /* ENABLE_DEBUG */
1144 // Handle anything from the log pipes
1145 } else if (fd
== stdout
) {
1146 buffer
= &ctx
->buffers
.stdout
;
1147 priority
= LOG_INFO
;
1149 // Send any output to the default logger if no callback is set
1150 if (ctx
->communicate
.out
) {
1151 callback
= ctx
->communicate
.out
;
1152 data
= ctx
->communicate
.data
;
1154 callback
= jail
->callbacks
.log
;
1155 data
= jail
->callbacks
.log_data
;
1158 } else if (fd
== stderr
) {
1159 buffer
= &ctx
->buffers
.stderr
;
1162 // Send any output to the default logger if no callback is set
1163 if (ctx
->communicate
.out
) {
1164 callback
= ctx
->communicate
.out
;
1165 data
= ctx
->communicate
.data
;
1167 callback
= jail
->callbacks
.log
;
1168 data
= jail
->callbacks
.log_data
;
1172 DEBUG(jail
->pakfire
, "Received invalid file descriptor %d\n", fd
);
1177 r
= pakfire_jail_handle_log(jail
, ctx
, priority
, fd
, buffer
, callback
, data
);
1183 // Handle standard input
1185 r
= pakfire_jail_stream_stdin(jail
, ctx
, fd
);
1188 // Ignore if we filled up the buffer
1193 ERROR(jail
->pakfire
, "Could not write to stdin: %m\n");
1200 // Check if any file descriptors have been closed
1202 // Remove the file descriptor
1203 r
= epoll_ctl(epollfd
, EPOLL_CTL_DEL
, fd
, NULL
);
1205 ERROR(jail
->pakfire
, "Could not remove closed file-descriptor %d: %m\n", fd
);
1212 // Return the exit code
1216 CTX_DEBUG(jail
->ctx
, "Main loop terminated\n");
1230 int pakfire_jail_capture_stdout(struct pakfire
* pakfire
, void* data
,
1231 int priority
, const char* line
, size_t length
) {
1232 char** output
= (char**)data
;
1235 // Append everything from stdout to a buffer
1236 if (output
&& priority
== LOG_INFO
) {
1237 r
= asprintf(output
, "%s%s", (output
&& *output
) ? *output
: "", line
);
1243 // Send everything else to the default logger
1244 return pakfire_jail_default_log_callback(pakfire
, NULL
, priority
, line
, length
);
1249 // Logs all capabilities of the current process
1250 static int pakfire_jail_show_capabilities(struct pakfire_jail
* jail
) {
1253 cap_flag_value_t value_e
;
1254 cap_flag_value_t value_i
;
1255 cap_flag_value_t value_p
;
1259 pid_t pid
= getpid();
1261 // Fetch all capabilities
1262 caps
= cap_get_proc();
1264 ERROR(jail
->pakfire
, "Could not fetch capabilities: %m\n");
1269 DEBUG(jail
->pakfire
, "Capabilities of PID %d:\n", pid
);
1271 // Iterate over all capabilities
1272 for (unsigned int cap
= 0; cap_valid(cap
); cap
++) {
1273 name
= cap_to_name(cap
);
1275 // Fetch effective value
1276 r
= cap_get_flag(caps
, cap
, CAP_EFFECTIVE
, &value_e
);
1280 // Fetch inheritable value
1281 r
= cap_get_flag(caps
, cap
, CAP_INHERITABLE
, &value_i
);
1285 // Fetch permitted value
1286 r
= cap_get_flag(caps
, cap
, CAP_PERMITTED
, &value_p
);
1290 DEBUG(jail
->pakfire
,
1291 " %-24s : %c%c%c\n",
1293 (value_e
== CAP_SET
) ? 'e' : '-',
1294 (value_i
== CAP_SET
) ? 'i' : '-',
1295 (value_p
== CAP_SET
) ? 'p' : '-'
1315 static int pakfire_jail_set_capabilities(struct pakfire_jail
* jail
) {
1320 // Fetch capabilities
1321 caps
= cap_get_proc();
1323 ERROR(jail
->pakfire
, "Could not read capabilities: %m\n");
1328 // Walk through all capabilities
1329 for (cap_value_t cap
= 0; cap_valid(cap
); cap
++) {
1330 cap_value_t _caps
[] = { cap
};
1332 // Fetch the name of the capability
1333 name
= cap_to_name(cap
);
1335 r
= cap_set_flag(caps
, CAP_EFFECTIVE
, 1, _caps
, CAP_SET
);
1337 ERROR(jail
->pakfire
, "Could not set %s: %m\n", name
);
1341 r
= cap_set_flag(caps
, CAP_INHERITABLE
, 1, _caps
, CAP_SET
);
1343 ERROR(jail
->pakfire
, "Could not set %s: %m\n", name
);
1347 r
= cap_set_flag(caps
, CAP_PERMITTED
, 1, _caps
, CAP_SET
);
1349 ERROR(jail
->pakfire
, "Could not set %s: %m\n", name
);
1358 // Restore all capabilities
1359 r
= cap_set_proc(caps
);
1361 ERROR(jail
->pakfire
, "Restoring capabilities failed: %m\n");
1365 // Add all capabilities to the ambient set
1366 for (unsigned int cap
= 0; cap_valid(cap
); cap
++) {
1367 name
= cap_to_name(cap
);
1369 // Raise the capability
1370 r
= prctl(PR_CAP_AMBIENT
, PR_CAP_AMBIENT_RAISE
, cap
, 0, 0);
1372 ERROR(jail
->pakfire
, "Could not set ambient capability %s: %m\n", name
);
1395 static int pakfire_jail_limit_syscalls(struct pakfire_jail
* jail
) {
1396 const int syscalls
[] = {
1397 // The kernel's keyring isn't namespaced
1400 SCMP_SYS(request_key
),
1402 // Disable userfaultfd
1403 SCMP_SYS(userfaultfd
),
1405 // Disable perf which could leak a lot of information about the host
1406 SCMP_SYS(perf_event_open
),
1412 DEBUG(jail
->pakfire
, "Applying syscall filter...\n");
1414 // Setup a syscall filter which allows everything by default
1415 scmp_filter_ctx ctx
= seccomp_init(SCMP_ACT_ALLOW
);
1417 ERROR(jail
->pakfire
, "Could not setup seccomp filter: %m\n");
1422 for (const int* syscall
= syscalls
; *syscall
; syscall
++) {
1423 r
= seccomp_rule_add(ctx
, SCMP_ACT_ERRNO(EPERM
), *syscall
, 0);
1425 ERROR(jail
->pakfire
, "Could not configure syscall %d: %m\n", *syscall
);
1430 // Load syscall filter into the kernel
1431 r
= seccomp_load(ctx
);
1433 ERROR(jail
->pakfire
, "Could not load syscall filter into the kernel: %m\n");
1439 seccomp_release(ctx
);
1446 PAKFIRE_EXPORT
int pakfire_jail_bind(struct pakfire_jail
* jail
,
1447 const char* source
, const char* target
, int flags
) {
1448 struct pakfire_jail_mountpoint
* mp
= NULL
;
1451 // Check if there is any space left
1452 if (jail
->num_mountpoints
>= MAX_MOUNTPOINTS
) {
1457 // Check for valid inputs
1458 if (!source
|| !target
) {
1463 // Select the next free slot
1464 mp
= &jail
->mountpoints
[jail
->num_mountpoints
];
1467 r
= pakfire_string_set(mp
->source
, source
);
1469 ERROR(jail
->pakfire
, "Could not copy source: %m\n");
1474 r
= pakfire_string_set(mp
->target
, target
);
1476 ERROR(jail
->pakfire
, "Could not copy target: %m\n");
1483 // Increment counter
1484 jail
->num_mountpoints
++;
1489 static int pakfire_jail_mount_networking(struct pakfire_jail
* jail
) {
1492 const char* paths
[] = {
1498 // Bind-mount all paths read-only
1499 for (const char** path
= paths
; *path
; path
++) {
1500 r
= pakfire_bind(jail
->pakfire
, *path
, NULL
, MS_RDONLY
);
1503 // Ignore if we don't have permission
1518 Mounts everything that we require in the new namespace
1520 static int pakfire_jail_mount(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
1521 struct pakfire_jail_mountpoint
* mp
= NULL
;
1525 // Enable loop devices
1526 if (pakfire_jail_exec_has_flag(ctx
, PAKFIRE_JAIL_HAS_LOOP_DEVICES
))
1527 flags
|= PAKFIRE_MOUNT_LOOP_DEVICES
;
1529 // Mount all default stuff
1530 r
= pakfire_mount_all(jail
->pakfire
, flags
);
1534 // Mount networking stuff
1535 if (pakfire_jail_exec_has_flag(ctx
, PAKFIRE_JAIL_HAS_NETWORKING
)) {
1536 r
= pakfire_jail_mount_networking(jail
);
1541 // Mount all custom stuff
1542 for (unsigned int i
= 0; i
< jail
->num_mountpoints
; i
++) {
1544 mp
= &jail
->mountpoints
[i
];
1547 r
= pakfire_bind(jail
->pakfire
, mp
->source
, mp
->target
, mp
->flags
);
1552 // Log all mountpoints
1553 pakfire_mount_list(jail
->pakfire
);
1560 static int pakfire_jail_setup_loopback(struct pakfire_jail
* jail
) {
1561 struct nl_sock
* nl
= NULL
;
1562 struct nl_cache
* cache
= NULL
;
1563 struct rtnl_link
* link
= NULL
;
1564 struct rtnl_link
* change
= NULL
;
1567 DEBUG(jail
->pakfire
, "Setting up loopback...\n");
1569 // Allocate a netlink socket
1570 nl
= nl_socket_alloc();
1572 ERROR(jail
->pakfire
, "Could not allocate a netlink socket: %m\n");
1577 // Connect the socket
1578 r
= nl_connect(nl
, NETLINK_ROUTE
);
1580 ERROR(jail
->pakfire
, "Could not connect netlink socket: %s\n", nl_geterror(r
));
1584 // Allocate the netlink cache
1585 r
= rtnl_link_alloc_cache(nl
, AF_UNSPEC
, &cache
);
1587 ERROR(jail
->pakfire
, "Unable to allocate netlink cache: %s\n", nl_geterror(r
));
1591 // Fetch loopback interface
1592 link
= rtnl_link_get_by_name(cache
, "lo");
1594 ERROR(jail
->pakfire
, "Could not find lo interface. Ignoring.\n");
1599 // Allocate a new link
1600 change
= rtnl_link_alloc();
1602 ERROR(jail
->pakfire
, "Could not allocate change link\n");
1607 // Set the link to UP
1608 rtnl_link_set_flags(change
, IFF_UP
);
1610 // Apply any changes
1611 r
= rtnl_link_change(nl
, link
, change
, 0);
1613 ERROR(jail
->pakfire
, "Unable to activate loopback: %s\n", nl_geterror(r
));
1629 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail
* jail
, pid_t pid
) {
1630 char path
[PATH_MAX
];
1633 // Skip mapping anything when running on /
1634 if (pakfire_on_root(jail
->pakfire
))
1638 r
= pakfire_string_format(path
, "/proc/%d/uid_map", pid
);
1643 const uid_t uid
= pakfire_uid(jail
->pakfire
);
1646 const struct pakfire_subid
* subuid
= pakfire_subuid(jail
->pakfire
);
1650 /* When running as root, we will map the entire range.
1652 When running as a non-privileged user, we will map the root user inside the jail
1653 to the user's UID outside of the jail, and we will map the rest starting from one.
1658 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1659 "0 %lu %lu\n", subuid
->id
, subuid
->length
);
1661 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1662 "0 %lu 1\n1 %lu %lu\n", uid
, subuid
->id
, subuid
->length
);
1666 ERROR(jail
->pakfire
, "Could not map UIDs: %m\n");
1673 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail
* jail
, pid_t pid
) {
1674 char path
[PATH_MAX
];
1677 // Skip mapping anything when running on /
1678 if (pakfire_on_root(jail
->pakfire
))
1682 const gid_t gid
= pakfire_gid(jail
->pakfire
);
1685 const struct pakfire_subid
* subgid
= pakfire_subgid(jail
->pakfire
);
1690 r
= pakfire_string_format(path
, "/proc/%d/gid_map", pid
);
1696 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1697 "0 %lu %lu\n", subgid
->id
, subgid
->length
);
1699 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1700 "0 %lu 1\n%1 %lu %lu\n", gid
, subgid
->id
, subgid
->length
);
1704 ERROR(jail
->pakfire
, "Could not map GIDs: %m\n");
1711 static int pakfire_jail_setgroups(struct pakfire_jail
* jail
, pid_t pid
) {
1712 char path
[PATH_MAX
];
1716 r
= pakfire_string_format(path
, "/proc/%d/setgroups", pid
);
1720 // Open file for writing
1721 FILE* f
= fopen(path
, "w");
1723 ERROR(jail
->pakfire
, "Could not open %s for writing: %m\n", path
);
1728 int bytes_written
= fprintf(f
, "deny\n");
1729 if (bytes_written
<= 0) {
1730 ERROR(jail
->pakfire
, "Could not write to %s: %m\n", path
);
1737 ERROR(jail
->pakfire
, "Could not close %s: %m\n", path
);
1748 static int pakfire_jail_send_signal(struct pakfire_jail
* jail
, int fd
) {
1749 const uint64_t val
= 1;
1752 DEBUG(jail
->pakfire
, "Sending signal...\n");
1754 // Write to the file descriptor
1755 r
= eventfd_write(fd
, val
);
1757 ERROR(jail
->pakfire
, "Could not send signal: %s\n", strerror(errno
));
1761 // Close the file descriptor
1767 static int pakfire_jail_wait_for_signal(struct pakfire_jail
* jail
, int fd
) {
1771 DEBUG(jail
->pakfire
, "Waiting for signal...\n");
1773 r
= eventfd_read(fd
, &val
);
1775 ERROR(jail
->pakfire
, "Error waiting for signal: %s\n", strerror(errno
));
1779 // Close the file descriptor
1785 static int pakfire_jail_switch_root(struct pakfire_jail
* jail
, const char* root
) {
1788 // Change to the new root
1791 ERROR(jail
->pakfire
, "chdir(%s) failed: %m\n", root
);
1796 r
= pivot_root(".", ".");
1798 ERROR(jail
->pakfire
, "Failed changing into the new root directory %s: %m\n", root
);
1802 // Umount the old root
1803 r
= umount2(".", MNT_DETACH
);
1805 ERROR(jail
->pakfire
, "Could not umount the old root filesystem: %m\n");
1813 Called by the parent that sets up the second child process...
1815 static int pakfire_jail_setup_child2(
1816 struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
1821 r
= pidfd_get_pid(ctx
->pidfd2
, &pid
);
1823 CTX_ERROR(jail
->ctx
, "Could not fetch PID: %s\n", strerror(-r
));
1827 // Setup UID mapping
1828 r
= pakfire_jail_setup_uid_mapping(jail
, pid
);
1832 // Write "deny" to /proc/PID/setgroups
1833 r
= pakfire_jail_setgroups(jail
, pid
);
1837 // Setup GID mapping
1838 r
= pakfire_jail_setup_gid_mapping(jail
, pid
);
1842 // Parent has finished initialisation
1843 DEBUG(jail
->pakfire
, "Parent has finished initialization\n");
1845 // Send signal to client
1846 r
= pakfire_jail_send_signal(jail
, ctx
->completed_fd
);
1854 Child 2 is launched in their own user/mount/etc. namespace.
1856 static int pakfire_jail_child2(struct pakfire_jail
* jail
,
1857 struct pakfire_jail_exec
* ctx
, const char* argv
[]) {
1861 pid_t pid
= getpid();
1863 CTX_DEBUG(jail
->ctx
, "Launched child process in jail with PID %d\n", pid
);
1866 r
= prctl(PR_SET_PDEATHSIG
, SIGKILL
, 0, 0, 0);
1868 CTX_ERROR(jail
->ctx
, "Could not configure to die with parent: %m\n");
1872 // Make this process dumpable
1873 r
= prctl (PR_SET_DUMPABLE
, 1, 0, 0, 0);
1875 CTX_ERROR(jail
->ctx
, "Could not make the process dumpable: %m\n");
1879 // Don't drop any capabilities on setuid()
1880 r
= prctl(PR_SET_KEEPCAPS
, 1);
1882 CTX_ERROR(jail
->ctx
, "Could not set PR_SET_KEEPCAPS: %m\n");
1886 // Wait for the parent to finish initialization
1887 r
= pakfire_jail_wait_for_signal(jail
, ctx
->completed_fd
);
1892 uid_t uid
= getuid();
1893 gid_t gid
= getgid();
1896 uid_t euid
= geteuid();
1897 gid_t egid
= getegid();
1899 DEBUG(jail
->pakfire
, " UID: %u (effective %u)\n", uid
, euid
);
1900 DEBUG(jail
->pakfire
, " GID: %u (effective %u)\n", gid
, egid
);
1902 // Fail if we are not PID 1
1904 CTX_ERROR(jail
->ctx
, "Child process is not PID 1\n");
1908 // Fail if we are not running as root
1909 if (uid
|| gid
|| euid
|| egid
) {
1910 ERROR(jail
->pakfire
, "Child process is not running as root\n");
1914 const char* arch
= pakfire_get_effective_arch(jail
->pakfire
);
1917 unsigned long persona
= pakfire_arch_personality(arch
);
1919 r
= personality(persona
);
1921 ERROR(jail
->pakfire
, "Could not set personality (%x)\n", (unsigned int)persona
);
1927 if (!pakfire_jail_exec_has_flag(ctx
, PAKFIRE_JAIL_HAS_NETWORKING
)) {
1928 r
= pakfire_jail_setup_loopback(jail
);
1935 DEBUG(jail
->pakfire
, "Setting nice level to %d\n", jail
->nice
);
1937 r
= setpriority(PRIO_PROCESS
, pid
, jail
->nice
);
1939 ERROR(jail
->pakfire
, "Could not set nice level: %m\n");
1944 // Close other end of log pipes
1945 close(ctx
->pipes
.log_INFO
[0]);
1946 close(ctx
->pipes
.log_ERROR
[0]);
1948 close(ctx
->pipes
.log_DEBUG
[0]);
1949 #endif /* ENABLE_DEBUG */
1951 // Connect standard input
1952 if (ctx
->pipes
.stdin
[0] >= 0) {
1953 r
= dup2(ctx
->pipes
.stdin
[0], STDIN_FILENO
);
1955 ERROR(jail
->pakfire
, "Could not connect fd %d to stdin: %m\n",
1956 ctx
->pipes
.stdin
[0]);
1962 // Connect standard output and error
1963 if (ctx
->pipes
.stdout
[1] >= 0 && ctx
->pipes
.stderr
[1] >= 0) {
1964 r
= dup2(ctx
->pipes
.stdout
[1], STDOUT_FILENO
);
1966 ERROR(jail
->pakfire
, "Could not connect fd %d to stdout: %m\n",
1967 ctx
->pipes
.stdout
[1]);
1972 r
= dup2(ctx
->pipes
.stderr
[1], STDERR_FILENO
);
1974 ERROR(jail
->pakfire
, "Could not connect fd %d to stderr: %m\n",
1975 ctx
->pipes
.stderr
[1]);
1980 // Close the pipe (as we have moved the original file descriptors)
1981 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stdin
);
1982 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stdout
);
1983 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stderr
);
1986 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1987 r
= pakfire_rlimit_reset_nofile(jail
->pakfire
);
1992 r
= pakfire_jail_set_capabilities(jail
);
1996 // Show capabilities
1997 r
= pakfire_jail_show_capabilities(jail
);
2002 r
= pakfire_jail_limit_syscalls(jail
);
2006 CTX_DEBUG(jail
->ctx
, "Child process initialization done\n");
2007 CTX_DEBUG(jail
->ctx
, "Launching command:\n");
2010 for (unsigned int i
= 0; argv
[i
]; i
++)
2011 CTX_DEBUG(jail
->ctx
, " argv[%u] = %s\n", i
, argv
[i
]);
2014 r
= execvpe(argv
[0], (char**)argv
, jail
->env
);
2016 // Translate errno into regular exit code
2019 // Ignore if the command doesn't exist
2020 if (ctx
->flags
& PAKFIRE_JAIL_NOENT_OK
)
2031 CTX_ERROR(jail
->ctx
, "Could not execve(%s): %m\n", argv
[0]);
2034 // We should not get here
2039 Child 1 is launched in a new mount namespace...
2041 static int pakfire_jail_child1(struct pakfire_jail
* jail
,
2042 struct pakfire_jail_exec
* ctx
, const char* argv
[]) {
2045 // Redirect any logging to our log pipe
2046 pakfire_ctx_set_log_callback(jail
->ctx
, pakfire_jail_log_redirect
, &ctx
->pipes
);
2048 CTX_DEBUG(jail
->ctx
, "First child process launched\n");
2050 const int socket_send
= pakfire_jail_get_pipe_to_write(jail
, &ctx
->socket
);
2052 const char* root
= pakfire_get_path(jail
->pakfire
);
2055 r
= prctl(PR_SET_PDEATHSIG
, SIGKILL
, 0, 0, 0);
2057 CTX_ERROR(jail
->ctx
, "Could not configure to die with parent: %s\n", strerror(errno
));
2061 // Change mount propagation so that we will receive, but don't propagate back
2062 r
= pakfire_mount_change_propagation(jail
->ctx
, "/", MS_SLAVE
);
2064 CTX_ERROR(jail
->ctx
, "Could not change mount propagation to SLAVE: %s\n", strerror(r
));
2068 // Make root a mountpoint in the new mount namespace
2069 r
= pakfire_mount_make_mounpoint(jail
->pakfire
, root
);
2073 // Make everything private
2074 r
= pakfire_mount_change_propagation(jail
->ctx
, root
, MS_PRIVATE
);
2076 CTX_ERROR(jail
->ctx
, "Could not change mount propagation to PRIVATE: %s\n", strerror(r
));
2081 r
= pakfire_jail_mount(jail
, ctx
);
2086 r
= pakfire_jail_switch_root(jail
, root
);
2090 // Change mount propagation so that we will propagate everything down
2091 r
= pakfire_mount_change_propagation(jail
->ctx
, "/", MS_SHARED
);
2093 CTX_ERROR(jail
->ctx
, "Could not change mount propagation to SHARED: %s\n", strerror(r
));
2097 // Configure child process
2098 struct clone_args args
= {
2108 .exit_signal
= SIGCHLD
,
2109 .pidfd
= (long long unsigned int)&ctx
->pidfd2
,
2112 // Launch the process into the configured cgroup
2114 args
.flags
|= CLONE_INTO_CGROUP
;
2116 // Clone into this cgroup
2117 args
.cgroup
= pakfire_cgroup_fd(ctx
->cgroup
);
2121 if (!pakfire_jail_exec_has_flag(ctx
, PAKFIRE_JAIL_HAS_NETWORKING
))
2122 args
.flags
|= CLONE_NEWNET
;
2124 // Fork the second child process
2125 pid_t pid
= clone3(&args
, sizeof(args
));
2127 CTX_ERROR(jail
->ctx
, "Could not fork the first child process: %s\n", strerror(errno
));
2132 } else if (pid
== 0) {
2133 r
= pakfire_jail_child2(jail
, ctx
, argv
);
2137 // Send the pidfd of the child to the first parent
2138 r
= pakfire_jail_send_fd(jail
, socket_send
, ctx
->pidfd2
);
2146 // Run a command in the jail
2147 static int __pakfire_jail_exec(struct pakfire_jail
* jail
, const char* argv
[],
2148 const int interactive
,
2149 pakfire_jail_communicate_in communicate_in
,
2150 pakfire_jail_communicate_out communicate_out
,
2151 void* data
, int flags
) {
2154 // Check if argv is valid
2155 if (!argv
|| !argv
[0]) {
2160 // Initialize context for this call
2161 struct pakfire_jail_exec ctx
= {
2164 .socket
= { -1, -1 },
2167 .stdin
= { -1, -1 },
2168 .stdout
= { -1, -1 },
2169 .stderr
= { -1, -1 },
2170 .log_INFO
= { -1, -1 },
2171 .log_ERROR
= { -1, -1 },
2173 .log_DEBUG
= { -1, -1 },
2174 #endif /* ENABLE_DEBUG */
2178 .in
= communicate_in
,
2179 .out
= communicate_out
,
2188 DEBUG(jail
->pakfire
, "Executing jail...\n");
2190 // Become the subreaper
2191 r
= prctl(PR_SET_CHILD_SUBREAPER
, 1, 0, 0, 0);
2193 CTX_ERROR(jail
->ctx
, "Failed to become the sub-reaper: %s\n", strerror(errno
));
2198 // Enable networking in interactive mode
2200 ctx
.flags
|= PAKFIRE_JAIL_HAS_NETWORKING
;
2202 // Create a UNIX domain socket
2203 r
= socketpair(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
, 0, ctx
.socket
);
2205 CTX_ERROR(jail
->ctx
, "Could not create UNIX socket: %s\n", strerror(errno
));
2211 Setup a file descriptor which can be used to notify the client that the parent
2212 has completed configuration.
2214 ctx
.completed_fd
= eventfd(0, EFD_CLOEXEC
);
2215 if (ctx
.completed_fd
< 0) {
2216 ERROR(jail
->pakfire
, "eventfd() failed: %m\n");
2220 // Create pipes to communicate with child process if we are not running interactively
2222 // stdin (only if callback is set)
2223 if (ctx
.communicate
.in
) {
2224 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stdin
, 0);
2230 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stdout
, 0);
2235 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stderr
, 0);
2240 // Setup pipes for logging
2242 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_INFO
, O_CLOEXEC
);
2247 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_ERROR
, O_CLOEXEC
);
2253 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_DEBUG
, O_CLOEXEC
);
2256 #endif /* ENABLE_DEBUG */
2258 // Launch the process in a cgroup that is a leaf of the configured cgroup
2261 const char* uuid
= pakfire_jail_uuid(jail
);
2263 // Create a temporary cgroup
2264 r
= pakfire_cgroup_child(&ctx
.cgroup
, jail
->cgroup
, uuid
, 0);
2266 ERROR(jail
->pakfire
, "Could not create cgroup for jail: %m\n");
2272 Initially, we will set up a new mount namespace and launch a child process in it.
2274 This process remains in the user/ipc/time/etc. namespace and will set up
2275 the mount namespace.
2278 // Configure child process
2279 struct clone_args args
= {
2283 CLONE_CLEAR_SIGHAND
,
2284 .exit_signal
= SIGCHLD
,
2285 .pidfd
= (long long unsigned int)&ctx
.pidfd1
,
2288 // Fork the first child process
2289 pid_t pid
= clone3(&args
, sizeof(args
));
2291 CTX_ERROR(jail
->ctx
, "Could not fork the first child process: %s\n", strerror(errno
));
2296 } else if (pid
== 0) {
2297 r
= pakfire_jail_child1(jail
, &ctx
, argv
);
2302 r
= pakfire_jail_wait(jail
, &ctx
);
2307 // Destroy the temporary cgroup (if any)
2310 // XXX this is currently disabled because it overwrites r
2311 // Read cgroup stats
2312 r
= pakfire_cgroup_stat(ctx
.cgroup
, &ctx
.cgroup_stats
);
2314 ERROR(jail
->pakfire
, "Could not read cgroup stats: %m\n");
2316 pakfire_cgroup_stat_dump(ctx
.cgroup
, &ctx
.cgroup_stats
);
2320 pakfire_cgroup_destroy(ctx
.cgroup
);
2321 pakfire_cgroup_unref(ctx
.cgroup
);
2324 // Close any file descriptors
2325 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stdin
);
2326 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stdout
);
2327 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stderr
);
2328 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_INFO
);
2329 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_ERROR
);
2331 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_DEBUG
);
2332 #endif /* ENABLE_DEBUG */
2333 if (ctx
.pidfd1
>= 0)
2335 if (ctx
.pidfd2
>= 0)
2339 pakfire_jail_close_pipe(jail
, ctx
.socket
);
2344 PAKFIRE_EXPORT
int pakfire_jail_exec(
2345 struct pakfire_jail
* jail
,
2347 pakfire_jail_communicate_in callback_in
,
2348 pakfire_jail_communicate_out callback_out
,
2349 void* data
, int flags
) {
2350 return __pakfire_jail_exec(jail
, argv
, 0, callback_in
, callback_out
, data
, flags
);
2353 static int pakfire_jail_exec_interactive(
2354 struct pakfire_jail
* jail
, const char* argv
[], int flags
) {
2357 // Setup interactive stuff
2358 r
= pakfire_jail_setup_interactive_env(jail
);
2362 return __pakfire_jail_exec(jail
, argv
, 1, NULL
, NULL
, NULL
, flags
);
2365 int pakfire_jail_exec_script(struct pakfire_jail
* jail
,
2369 pakfire_jail_communicate_in callback_in
,
2370 pakfire_jail_communicate_out callback_out
,
2372 char path
[PATH_MAX
];
2373 const char** argv
= NULL
;
2377 const char* root
= pakfire_get_path(jail
->pakfire
);
2379 // Write the scriptlet to disk
2380 r
= pakfire_path_append(path
, root
, PAKFIRE_TMP_DIR
"/pakfire-script.XXXXXX");
2384 // Create a temporary file
2385 f
= pakfire_mktemp(path
, 0700);
2387 ERROR(jail
->pakfire
, "Could not create temporary file: %m\n");
2391 DEBUG(jail
->pakfire
, "Writing script to %s:\n%.*s\n", path
, (int)size
, script
);
2394 r
= fprintf(f
, "%s", script
);
2396 ERROR(jail
->pakfire
, "Could not write script to file %s: %m\n", path
);
2403 ERROR(jail
->pakfire
, "Could not close script file %s: %m\n", path
);
2409 // Count how many arguments were passed
2410 unsigned int argc
= 1;
2412 for (const char** arg
= args
; *arg
; arg
++)
2416 argv
= calloc(argc
+ 1, sizeof(*argv
));
2418 ERROR(jail
->pakfire
, "Could not allocate argv: %m\n");
2423 argv
[0] = (root
) ? pakfire_path_relpath(root
, path
) : path
;
2426 for (unsigned int i
= 1; i
< argc
; i
++)
2427 argv
[i
] = args
[i
-1];
2430 r
= pakfire_jail_exec(jail
, argv
, callback_in
, callback_out
, data
, 0);
2438 // Remove script from disk
2446 A convenience function that creates a new jail, runs the given command and destroys
2449 int pakfire_jail_run(struct pakfire
* pakfire
, const char* argv
[], int flags
, char** output
) {
2450 struct pakfire_jail
* jail
= NULL
;
2453 // Create a new jail
2454 r
= pakfire_jail_create(&jail
, pakfire
);
2458 // Execute the command
2459 r
= pakfire_jail_exec(jail
, argv
, NULL
, pakfire_jail_capture_stdout
, output
, 0);
2463 pakfire_jail_unref(jail
);
2468 int pakfire_jail_run_script(struct pakfire
* pakfire
,
2469 const char* script
, const size_t length
, const char* argv
[], int flags
) {
2470 struct pakfire_jail
* jail
= NULL
;
2473 // Create a new jail
2474 r
= pakfire_jail_create(&jail
, pakfire
);
2478 // Execute the command
2479 r
= pakfire_jail_exec_script(jail
, script
, length
, argv
, NULL
, NULL
, NULL
);
2483 pakfire_jail_unref(jail
);
2488 int pakfire_jail_shell(struct pakfire_jail
* jail
) {
2491 const char* argv
[] = {
2492 "/bin/bash", "--login", NULL
,
2495 // Execute /bin/bash
2496 r
= pakfire_jail_exec_interactive(jail
, argv
, 0);
2502 // Ignore any return codes from the shell
2506 static int pakfire_jail_run_if_possible(struct pakfire
* pakfire
, const char** argv
) {
2507 char path
[PATH_MAX
];
2510 r
= pakfire_path(pakfire
, path
, "%s", *argv
);
2514 // Check if the file is executable
2515 r
= access(path
, X_OK
);
2517 DEBUG(pakfire
, "%s is not executable. Skipping...\n", *argv
);
2521 return pakfire_jail_run(pakfire
, argv
, 0, NULL
);
2524 int pakfire_jail_ldconfig(struct pakfire
* pakfire
) {
2525 const char* argv
[] = {
2530 return pakfire_jail_run_if_possible(pakfire
, argv
);
2533 int pakfire_jail_run_systemd_tmpfiles(struct pakfire
* pakfire
) {
2534 const char* argv
[] = {
2535 "/usr/bin/systemd-tmpfiles",
2540 return pakfire_jail_run_if_possible(pakfire
, argv
);