1 /*#############################################################################
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
19 #############################################################################*/
23 #include <linux/capability.h>
24 #include <linux/sched.h>
26 #include <linux/wait.h>
31 #include <sys/capability.h>
32 #include <sys/epoll.h>
33 #include <sys/eventfd.h>
34 #include <sys/mount.h>
35 #include <sys/personality.h>
36 #include <sys/prctl.h>
37 #include <sys/resource.h>
38 #include <sys/timerfd.h>
39 #include <sys/types.h>
44 #include <netlink/route/link.h>
52 #include <pakfire/arch.h>
53 #include <pakfire/cgroup.h>
54 #include <pakfire/jail.h>
55 #include <pakfire/logging.h>
56 #include <pakfire/mount.h>
57 #include <pakfire/pakfire.h>
58 #include <pakfire/private.h>
59 #include <pakfire/pwd.h>
60 #include <pakfire/string.h>
61 #include <pakfire/util.h>
63 #define BUFFER_SIZE 1024 * 64
64 #define ENVIRON_SIZE 128
65 #define EPOLL_MAX_EVENTS 2
66 #define MAX_MOUNTPOINTS 8
68 // The default environment that will be set for every command
69 static const struct environ
{
74 { "LANG", "C.utf-8" },
75 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
78 // Tell everything that it is running inside a Pakfire container
79 { "container", "pakfire" },
83 struct pakfire_jail_mountpoint
{
84 char source
[PATH_MAX
];
85 char target
[PATH_MAX
];
90 struct pakfire
* pakfire
;
93 // A unique ID for each jail
95 char __uuid
[UUID_STR_LEN
];
101 struct itimerspec timeout
;
104 struct pakfire_cgroup
* cgroup
;
107 char* env
[ENVIRON_SIZE
];
110 struct pakfire_jail_mountpoint mountpoints
[MAX_MOUNTPOINTS
];
111 unsigned int num_mountpoints
;
114 struct pakfire_log_buffer
{
115 char data
[BUFFER_SIZE
];
119 struct pakfire_jail_exec
{
122 // PID (of the child)
126 // Process status (from waitid)
129 // FD to notify the client that the parent has finished initialization
133 struct pakfire_jail_pipes
{
145 struct pakfire_jail_communicate
{
146 pakfire_jail_communicate_in in
;
147 pakfire_jail_communicate_out out
;
152 struct pakfire_jail_buffers
{
153 struct pakfire_log_buffer stdout
;
154 struct pakfire_log_buffer stderr
;
157 struct pakfire_log_buffer log_INFO
;
158 struct pakfire_log_buffer log_ERROR
;
159 struct pakfire_log_buffer log_DEBUG
;
162 struct pakfire_cgroup
* cgroup
;
163 struct pakfire_cgroup_stats cgroup_stats
;
166 static int clone3(struct clone_args
* args
, size_t size
) {
167 return syscall(__NR_clone3
, args
, size
);
170 static int pidfd_send_signal(int pidfd
, int sig
, siginfo_t
* info
, unsigned int flags
) {
171 return syscall(SYS_pidfd_send_signal
, pidfd
, sig
, info
, flags
);
174 static int pakfire_jail_exec_has_flag(
175 const struct pakfire_jail_exec
* ctx
, const enum pakfire_jail_exec_flags flag
) {
176 return ctx
->flags
& flag
;
179 static void pakfire_jail_free(struct pakfire_jail
* jail
) {
180 DEBUG(jail
->pakfire
, "Freeing jail at %p\n", jail
);
183 for (unsigned int i
= 0; jail
->env
[i
]; i
++)
187 pakfire_cgroup_unref(jail
->cgroup
);
189 pakfire_unref(jail
->pakfire
);
194 Passes any log messages on to the default pakfire log callback
196 static int pakfire_jail_default_log_callback(struct pakfire
* pakfire
, void* data
,
197 int priority
, const char* line
, size_t length
) {
200 INFO(pakfire
, "%s", line
);
204 ERROR(pakfire
, "%s", line
);
209 DEBUG(pakfire
, "%s", line
);
217 static const char* pakfire_jail_uuid(struct pakfire_jail
* jail
) {
219 uuid_unparse_lower(jail
->uuid
, jail
->__uuid
);
224 static int pakfire_jail_setup_interactive_env(struct pakfire_jail
* jail
) {
226 int r
= pakfire_jail_set_env(jail
, "PS1", "pakfire-jail \\w> ");
231 char* TERM
= secure_getenv("TERM");
233 r
= pakfire_jail_set_env(jail
, "TERM", TERM
);
239 char* LANG
= secure_getenv("LANG");
241 r
= pakfire_jail_set_env(jail
, "LANG", LANG
);
249 PAKFIRE_EXPORT
int pakfire_jail_create(struct pakfire_jail
** jail
, struct pakfire
* pakfire
) {
252 const char* arch
= pakfire_get_arch(pakfire
);
254 // Allocate a new jail
255 struct pakfire_jail
* j
= calloc(1, sizeof(*j
));
260 j
->pakfire
= pakfire_ref(pakfire
);
262 // Initialize reference counter
265 // Generate a random UUID
266 uuid_generate_random(j
->uuid
);
268 DEBUG(j
->pakfire
, "Allocated new jail at %p\n", j
);
270 // Set default environment
271 for (const struct environ
* e
= ENV
; e
->key
; e
++) {
272 r
= pakfire_jail_set_env(j
, e
->key
, e
->val
);
277 // Enable all CPU features that CPU has to offer
278 if (!pakfire_arch_supported_by_host(arch
)) {
279 r
= pakfire_jail_set_env(j
, "QEMU_CPU", "max");
284 // Set container UUID
285 r
= pakfire_jail_set_env(j
, "container_uuid", pakfire_jail_uuid(j
));
289 // Disable systemctl to talk to systemd
290 if (!pakfire_on_root(j
->pakfire
)) {
291 r
= pakfire_jail_set_env(j
, "SYSTEMD_OFFLINE", "1");
301 pakfire_jail_free(j
);
306 PAKFIRE_EXPORT
struct pakfire_jail
* pakfire_jail_ref(struct pakfire_jail
* jail
) {
312 PAKFIRE_EXPORT
struct pakfire_jail
* pakfire_jail_unref(struct pakfire_jail
* jail
) {
313 if (--jail
->nrefs
> 0)
316 pakfire_jail_free(jail
);
322 PAKFIRE_EXPORT
int pakfire_jail_nice(struct pakfire_jail
* jail
, int nice
) {
323 // Check if nice level is in range
324 if (nice
< -19 || nice
> 20) {
335 int pakfire_jail_set_cgroup(struct pakfire_jail
* jail
, struct pakfire_cgroup
* cgroup
) {
336 // Free any previous cgroup
338 pakfire_cgroup_unref(jail
->cgroup
);
342 // Set any new cgroup
344 DEBUG(jail
->pakfire
, "Setting cgroup %p\n", cgroup
);
346 jail
->cgroup
= pakfire_cgroup_ref(cgroup
);
355 // Returns the length of the environment
356 static unsigned int pakfire_jail_env_length(struct pakfire_jail
* jail
) {
359 // Count everything in the environment
360 for (char** e
= jail
->env
; *e
; e
++)
366 // Finds an existing environment variable and returns its index or -1 if not found
367 static int pakfire_jail_find_env(struct pakfire_jail
* jail
, const char* key
) {
373 char buffer
[strlen(key
) + 2];
374 pakfire_string_format(buffer
, "%s=", key
);
376 for (unsigned int i
= 0; jail
->env
[i
]; i
++) {
377 if (pakfire_string_startswith(jail
->env
[i
], buffer
))
385 // Returns the value of an environment variable or NULL
386 PAKFIRE_EXPORT
const char* pakfire_jail_get_env(struct pakfire_jail
* jail
,
388 int i
= pakfire_jail_find_env(jail
, key
);
392 return jail
->env
[i
] + strlen(key
) + 1;
395 // Sets an environment variable
396 PAKFIRE_EXPORT
int pakfire_jail_set_env(struct pakfire_jail
* jail
,
397 const char* key
, const char* value
) {
398 // Find the index where to write this value to
399 int i
= pakfire_jail_find_env(jail
, key
);
401 i
= pakfire_jail_env_length(jail
);
403 // Return -ENOSPC when the environment is full
404 if (i
>= ENVIRON_SIZE
) {
409 // Free any previous value
413 // Format and set environment variable
414 asprintf(&jail
->env
[i
], "%s=%s", key
, value
);
416 DEBUG(jail
->pakfire
, "Set environment variable: %s\n", jail
->env
[i
]);
421 // Imports an environment
422 PAKFIRE_EXPORT
int pakfire_jail_import_env(struct pakfire_jail
* jail
, const char* env
[]) {
430 // Copy environment variables
431 for (unsigned int i
= 0; env
[i
]; i
++) {
432 r
= pakfire_string_partition(env
[i
], "=", &key
, &val
);
437 r
= pakfire_jail_set_env(jail
, key
, val
);
454 PAKFIRE_EXPORT
int pakfire_jail_set_timeout(
455 struct pakfire_jail
* jail
, unsigned int timeout
) {
457 jail
->timeout
.it_value
.tv_sec
= timeout
;
460 DEBUG(jail
->pakfire
, "Timeout set to %d second(s)\n", timeout
);
462 DEBUG(jail
->pakfire
, "Timeout disabled\n");
467 static int pakfire_jail_create_timer(struct pakfire_jail
* jail
) {
470 // Nothing to do if no timeout has been set
471 if (!jail
->timeout
.it_value
.tv_sec
)
474 // Create a new timer
475 const int fd
= timerfd_create(CLOCK_MONOTONIC
, 0);
477 ERROR(jail
->pakfire
, "Could not create timer: %m\n");
482 r
= timerfd_settime(fd
, 0, &jail
->timeout
, NULL
);
484 ERROR(jail
->pakfire
, "Could not arm timer: %m\n");
498 This function replaces any logging in the child process.
500 All log messages will be sent to the parent process through their respective pipes.
502 static void pakfire_jail_log(void* data
, int priority
, const char* file
,
503 int line
, const char* fn
, const char* format
, va_list args
) {
504 struct pakfire_jail_pipes
* pipes
= (struct pakfire_jail_pipes
*)data
;
509 fd
= pipes
->log_INFO
[1];
513 fd
= pipes
->log_ERROR
[1];
518 fd
= pipes
->log_DEBUG
[1];
520 #endif /* ENABLE_DEBUG */
522 // Ignore any messages of an unknown priority
527 // Send the log message
529 vdprintf(fd
, format
, args
);
532 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer
* buffer
) {
533 return (sizeof(buffer
->data
) == buffer
->used
);
537 This function reads as much data as it can from the file descriptor.
538 If it finds a whole line in it, it will send it to the logger and repeat the process.
539 If not newline character is found, it will try to read more data until it finds one.
541 static int pakfire_jail_handle_log(struct pakfire_jail
* jail
,
542 struct pakfire_jail_exec
* ctx
, int priority
, int fd
,
543 struct pakfire_log_buffer
* buffer
, pakfire_jail_communicate_out callback
, void* data
) {
544 char line
[BUFFER_SIZE
+ 1];
546 // Fill up buffer from fd
547 if (buffer
->used
< sizeof(buffer
->data
)) {
548 ssize_t bytes_read
= read(fd
, buffer
->data
+ buffer
->used
,
549 sizeof(buffer
->data
) - buffer
->used
);
552 if (bytes_read
< 0) {
553 ERROR(jail
->pakfire
, "Could not read from fd %d: %m\n", fd
);
557 // Update buffer size
558 buffer
->used
+= bytes_read
;
561 // See if we have any lines that we can write
562 while (buffer
->used
) {
563 // Search for the end of the first line
564 char* eol
= memchr(buffer
->data
, '\n', buffer
->used
);
568 // If the buffer is full, we send the content to the logger and try again
569 // This should not happen in practise
570 if (pakfire_jail_log_buffer_is_full(buffer
)) {
571 DEBUG(jail
->pakfire
, "Logging buffer is full. Sending all content\n");
573 eol
= buffer
->data
+ sizeof(buffer
->data
) - 1;
575 // Otherwise we might have only read parts of the output
580 // Find the length of the string
581 size_t length
= eol
- buffer
->data
+ 1;
583 // Copy the line into the buffer
584 memcpy(line
, buffer
->data
, length
);
586 // Terminate the string
591 int r
= callback(jail
->pakfire
, data
, priority
, line
, length
);
593 ERROR(jail
->pakfire
, "The logging callback returned an error: %d\n", r
);
598 // Remove line from buffer
599 memmove(buffer
->data
, buffer
->data
+ length
, buffer
->used
- length
);
600 buffer
->used
-= length
;
606 static int pakfire_jail_stream_stdin(struct pakfire_jail
* jail
,
607 struct pakfire_jail_exec
* ctx
, const int fd
) {
610 // Nothing to do if there is no stdin callback set
611 if (!ctx
->communicate
.in
) {
612 DEBUG(jail
->pakfire
, "Callback for standard input is not set\n");
616 // Skip if the writing pipe has already been closed
617 if (!ctx
->pipes
.stdin
[1])
620 DEBUG(jail
->pakfire
, "Streaming standard input...\n");
622 // Calling the callback
623 r
= ctx
->communicate
.in(jail
->pakfire
, ctx
->communicate
.data
, fd
);
625 DEBUG(jail
->pakfire
, "Standard input callback finished: %d\n", r
);
627 // The callback signaled that it has written everything
629 DEBUG(jail
->pakfire
, "Closing standard input pipe\n");
631 // Close the file-descriptor
634 // Reset the file-descriptor so it won't be closed again later
635 ctx
->pipes
.stdin
[1] = 0;
644 static int pakfire_jail_setup_pipe(struct pakfire_jail
* jail
, int (*fds
)[2], const int flags
) {
645 int r
= pipe2(*fds
, flags
);
647 ERROR(jail
->pakfire
, "Could not setup pipe: %m\n");
654 static void pakfire_jail_close_pipe(struct pakfire_jail
* jail
, int fds
[2]) {
655 for (unsigned int i
= 0; i
< 2; i
++)
661 This is a convenience function to fetch the reading end of a pipe and
662 closes the write end.
664 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail
* jail
, int (*fds
)[2]) {
665 // Give the variables easier names to avoid confusion
666 int* fd_read
= &(*fds
)[0];
667 int* fd_write
= &(*fds
)[1];
669 // Close the write end of the pipe
675 // Return the read end
679 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail
* jail
, int (*fds
)[2]) {
680 // Give the variables easier names to avoid confusion
681 int* fd_read
= &(*fds
)[0];
682 int* fd_write
= &(*fds
)[1];
684 // Close the read end of the pipe
690 // Return the write end
694 static int pakfire_jail_wait(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
696 struct epoll_event ev
;
697 struct epoll_event events
[EPOLL_MAX_EVENTS
];
701 // Fetch file descriptors from context
702 const int stdin
= pakfire_jail_get_pipe_to_write(jail
, &ctx
->pipes
.stdin
);
703 const int stdout
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.stdout
);
704 const int stderr
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.stderr
);
705 const int pidfd
= ctx
->pidfd
;
708 const int timerfd
= pakfire_jail_create_timer(jail
);
711 const int log_INFO
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.log_INFO
);
712 const int log_ERROR
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.log_ERROR
);
713 const int log_DEBUG
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.log_DEBUG
);
715 // Make a list of all file descriptors we are interested in
717 stdin
, stdout
, stderr
, pidfd
, timerfd
, log_INFO
, log_ERROR
, log_DEBUG
,
721 epollfd
= epoll_create1(0);
723 ERROR(jail
->pakfire
, "Could not initialize epoll(): %m\n");
728 // Turn file descriptors into non-blocking mode and add them to epoll()
729 for (unsigned int i
= 0; i
< sizeof(fds
) / sizeof(*fds
); i
++) {
732 // Skip fds which were not initialized
736 ev
.events
= EPOLLHUP
;
739 ev
.events
|= EPOLLOUT
;
741 ev
.events
|= EPOLLIN
;
744 int flags
= fcntl(fd
, F_GETFL
, 0);
746 // Set modified flags
747 if (fcntl(fd
, F_SETFL
, flags
|O_NONBLOCK
) < 0) {
749 "Could not set file descriptor %d into non-blocking mode: %m\n", fd
);
756 if (epoll_ctl(epollfd
, EPOLL_CTL_ADD
, fd
, &ev
) < 0) {
757 ERROR(jail
->pakfire
, "Could not add file descriptor %d to epoll(): %m\n", fd
);
765 // Loop for as long as the process is alive
767 int num
= epoll_wait(epollfd
, events
, EPOLL_MAX_EVENTS
, -1);
769 // Ignore if epoll_wait() has been interrupted
773 ERROR(jail
->pakfire
, "epoll_wait() failed: %m\n");
779 for (int i
= 0; i
< num
; i
++) {
780 int e
= events
[i
].events
;
781 int fd
= events
[i
].data
.fd
;
783 struct pakfire_log_buffer
* buffer
= NULL
;
784 pakfire_jail_communicate_out callback
= NULL
;
788 // Check if there is any data to be read
790 // Handle any changes to the PIDFD
792 // Call waidid() and store the result
793 r
= waitid(P_PIDFD
, ctx
->pidfd
, &ctx
->status
, WEXITED
);
795 ERROR(jail
->pakfire
, "waitid() failed: %m\n");
799 // Mark that we have ended so that we will process the remaining
800 // events from epoll() now, but won't restart the outer loop.
804 // Handle timer events
805 } else if (fd
== timerfd
) {
806 DEBUG(jail
->pakfire
, "Timer event received\n");
809 r
= read(timerfd
, garbage
, sizeof(garbage
));
811 ERROR(jail
->pakfire
, "Could not disarm timer: %m\n");
816 // Terminate the process if it hasn't already ended
818 DEBUG(jail
->pakfire
, "Terminating process...\n");
820 // Send SIGTERM to the process
821 r
= pidfd_send_signal(pidfd
, SIGKILL
, NULL
, 0);
823 ERROR(jail
->pakfire
, "Could not kill process: %m\n");
828 // There is nothing else to do
831 // Handle logging messages
832 } else if (fd
== log_INFO
) {
833 buffer
= &ctx
->buffers
.log_INFO
;
836 callback
= pakfire_jail_default_log_callback
;
838 } else if (fd
== log_ERROR
) {
839 buffer
= &ctx
->buffers
.log_ERROR
;
842 callback
= pakfire_jail_default_log_callback
;
844 } else if (fd
== log_DEBUG
) {
845 buffer
= &ctx
->buffers
.log_DEBUG
;
846 priority
= LOG_DEBUG
;
848 callback
= pakfire_jail_default_log_callback
;
850 // Handle anything from the log pipes
851 } else if (fd
== stdout
) {
852 buffer
= &ctx
->buffers
.stdout
;
855 callback
= ctx
->communicate
.out
;
856 data
= ctx
->communicate
.data
;
858 } else if (fd
== stderr
) {
859 buffer
= &ctx
->buffers
.stderr
;
862 callback
= ctx
->communicate
.out
;
863 data
= ctx
->communicate
.data
;
866 DEBUG(jail
->pakfire
, "Received invalid file descriptor %d\n", fd
);
871 r
= pakfire_jail_handle_log(jail
, ctx
, priority
, fd
, buffer
, callback
, data
);
877 // Handle standard input
879 r
= pakfire_jail_stream_stdin(jail
, ctx
, fd
);
882 // Ignore if we filled up the buffer
887 ERROR(jail
->pakfire
, "Could not write to stdin: %m\n");
894 // Check if any file descriptors have been closed
896 // Remove the file descriptor
897 r
= epoll_ctl(epollfd
, EPOLL_CTL_DEL
, fd
, NULL
);
899 ERROR(jail
->pakfire
, "Could not remove closed file-descriptor %d: %m\n", fd
);
915 int pakfire_jail_capture_stdout(struct pakfire
* pakfire
, void* data
,
916 int priority
, const char* line
, size_t length
) {
917 char** output
= (char**)data
;
920 // Append everything from stdout to a buffer
921 if (output
&& priority
== LOG_INFO
) {
922 r
= asprintf(output
, "%s%s", (output
&& *output
) ? *output
: "", line
);
928 // Send everything else to the default logger
929 return pakfire_jail_default_log_callback(pakfire
, NULL
, priority
, line
, length
);
934 static int pakfire_jail_drop_capabilities(struct pakfire_jail
* jail
) {
935 const int capabilities
[] = {
936 // Deny access to the kernel's audit system
941 // Deny suspending block devices
944 // Deny any stuff with BPF
947 // Deny checkpoint restore
948 CAP_CHECKPOINT_RESTORE
,
950 // Deny opening files by inode number (open_by_handle_at)
953 // Deny setting SUID bits
956 // Deny locking more memory
959 // Deny modifying any Apparmor/SELinux/SMACK configuration
963 // Deny creating any special devices
966 // Deny reading from syslog
969 // Deny any admin actions (mount, sethostname, ...)
972 // Deny rebooting the system
975 // Deny loading kernel modules
978 // Deny setting nice level
981 // Deny access to /proc/kcore, /dev/mem, /dev/kmem
984 // Deny circumventing any resource limits
987 // Deny setting the system time
990 // Deny playing with suspend
996 DEBUG(jail
->pakfire
, "Dropping capabilities...\n");
1001 // Drop any capabilities
1002 for (const int* cap
= capabilities
; *cap
; cap
++) {
1003 r
= prctl(PR_CAPBSET_DROP
, *cap
, 0, 0, 0);
1005 ERROR(jail
->pakfire
, "Could not drop capability %d: %m\n", *cap
);
1012 // Fetch any capabilities
1013 cap_t caps
= cap_get_proc();
1015 ERROR(jail
->pakfire
, "Could not read capabilities: %m\n");
1020 Set inheritable capabilities
1022 This ensures that no processes will be able to gain any of the listed
1025 r
= cap_set_flag(caps
, CAP_INHERITABLE
, num_caps
, capabilities
, CAP_CLEAR
);
1027 ERROR(jail
->pakfire
, "cap_set_flag() failed: %m\n");
1031 // Restore capabilities
1032 r
= cap_set_proc(caps
);
1034 ERROR(jail
->pakfire
, "Could not restore capabilities: %m\n");
1047 static int pakfire_jail_limit_syscalls(struct pakfire_jail
* jail
) {
1048 const int syscalls
[] = {
1049 // The kernel's keyring isn't namespaced
1052 SCMP_SYS(request_key
),
1054 // Disable userfaultfd
1055 SCMP_SYS(userfaultfd
),
1057 // Disable perf which could leak a lot of information about the host
1058 SCMP_SYS(perf_event_open
),
1064 DEBUG(jail
->pakfire
, "Applying syscall filter...\n");
1066 // Setup a syscall filter which allows everything by default
1067 scmp_filter_ctx ctx
= seccomp_init(SCMP_ACT_ALLOW
);
1069 ERROR(jail
->pakfire
, "Could not setup seccomp filter: %m\n");
1074 for (const int* syscall
= syscalls
; *syscall
; syscall
++) {
1075 r
= seccomp_rule_add(ctx
, SCMP_ACT_ERRNO(EPERM
), *syscall
, 0);
1077 ERROR(jail
->pakfire
, "Could not configure syscall %d: %m\n", *syscall
);
1082 // Load syscall filter into the kernel
1083 r
= seccomp_load(ctx
);
1085 ERROR(jail
->pakfire
, "Could not load syscall filter into the kernel: %m\n");
1091 seccomp_release(ctx
);
1098 PAKFIRE_EXPORT
int pakfire_jail_bind(struct pakfire_jail
* jail
,
1099 const char* source
, const char* target
, int flags
) {
1100 struct pakfire_jail_mountpoint
* mp
= NULL
;
1103 // Check if there is any space left
1104 if (jail
->num_mountpoints
>= MAX_MOUNTPOINTS
) {
1109 // Check for valid inputs
1110 if (!source
|| !target
) {
1115 // Select the next free slot
1116 mp
= &jail
->mountpoints
[jail
->num_mountpoints
];
1119 r
= pakfire_string_set(mp
->source
, source
);
1121 ERROR(jail
->pakfire
, "Could not copy source: %m\n");
1126 r
= pakfire_string_set(mp
->target
, target
);
1128 ERROR(jail
->pakfire
, "Could not copy target: %m\n");
1135 // Increment counter
1136 jail
->num_mountpoints
++;
1141 static int pakfire_jail_mount_networking(struct pakfire_jail
* jail
) {
1144 const char* paths
[] = {
1150 // Bind-mount all paths read-only
1151 for (const char** path
= paths
; *path
; path
++) {
1152 r
= pakfire_bind(jail
->pakfire
, *path
, NULL
, MS_RDONLY
);
1161 Mounts everything that we require in the new namespace
1163 static int pakfire_jail_mount(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
1164 struct pakfire_jail_mountpoint
* mp
= NULL
;
1168 // Enable loop devices
1169 if (pakfire_jail_exec_has_flag(ctx
, PAKFIRE_JAIL_HAS_LOOP_DEVICES
))
1170 flags
|= PAKFIRE_MOUNT_LOOP_DEVICES
;
1172 // Mount all default stuff
1173 r
= pakfire_mount_all(jail
->pakfire
, flags
);
1177 // Mount networking stuff
1178 if (pakfire_jail_exec_has_flag(ctx
, PAKFIRE_JAIL_HAS_NETWORKING
)) {
1179 r
= pakfire_jail_mount_networking(jail
);
1184 // Mount all custom stuff
1185 for (unsigned int i
= 0; i
< jail
->num_mountpoints
; i
++) {
1187 mp
= &jail
->mountpoints
[i
];
1190 r
= pakfire_bind(jail
->pakfire
, mp
->source
, mp
->target
, mp
->flags
);
1195 // Log all mountpoints
1196 pakfire_mount_list(jail
->pakfire
);
1203 static int pakfire_jail_setup_loopback(struct pakfire_jail
* jail
) {
1204 struct nl_sock
* nl
= NULL
;
1205 struct nl_cache
* cache
= NULL
;
1206 struct rtnl_link
* link
= NULL
;
1207 struct rtnl_link
* change
= NULL
;
1210 DEBUG(jail
->pakfire
, "Setting up loopback...\n");
1212 // Allocate a netlink socket
1213 nl
= nl_socket_alloc();
1215 ERROR(jail
->pakfire
, "Could not allocate a netlink socket: %m\n");
1220 // Connect the socket
1221 r
= nl_connect(nl
, NETLINK_ROUTE
);
1223 ERROR(jail
->pakfire
, "Could not connect netlink socket: %s\n", nl_geterror(r
));
1227 // Allocate the netlink cache
1228 r
= rtnl_link_alloc_cache(nl
, AF_UNSPEC
, &cache
);
1230 ERROR(jail
->pakfire
, "Unable to allocate netlink cache: %s\n", nl_geterror(r
));
1234 // Fetch loopback interface
1235 link
= rtnl_link_get_by_name(cache
, "lo");
1237 ERROR(jail
->pakfire
, "Could not find lo interface. Ignoring.\n");
1242 // Allocate a new link
1243 change
= rtnl_link_alloc();
1245 ERROR(jail
->pakfire
, "Could not allocate change link\n");
1250 // Set the link to UP
1251 rtnl_link_set_flags(change
, IFF_UP
);
1253 // Apply any changes
1254 r
= rtnl_link_change(nl
, link
, change
, 0);
1256 ERROR(jail
->pakfire
, "Unable to activate loopback: %s\n", nl_geterror(r
));
1272 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail
* jail
, pid_t pid
) {
1273 char path
[PATH_MAX
];
1276 // Skip mapping anything when running on /
1277 if (pakfire_on_root(jail
->pakfire
))
1281 r
= pakfire_string_format(path
, "/proc/%d/uid_map", pid
);
1286 const uid_t uid
= pakfire_uid(jail
->pakfire
);
1289 const struct pakfire_subid
* subuid
= pakfire_subuid(jail
->pakfire
);
1293 /* When running as root, we will map the entire range.
1295 When running as a non-privileged user, we will map the root user inside the jail
1296 to the user's UID outside of the jail, and we will map the rest starting from one.
1301 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1302 "0 %lu %lu\n", subuid
->id
, subuid
->length
);
1304 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1305 "0 %lu 1\n1 %lu %lu\n", uid
, subuid
->id
, subuid
->length
);
1309 ERROR(jail
->pakfire
, "Could not map UIDs: %m\n");
1316 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail
* jail
, pid_t pid
) {
1317 char path
[PATH_MAX
];
1320 // Skip mapping anything when running on /
1321 if (pakfire_on_root(jail
->pakfire
))
1325 const gid_t gid
= pakfire_gid(jail
->pakfire
);
1328 const struct pakfire_subid
* subgid
= pakfire_subgid(jail
->pakfire
);
1333 r
= pakfire_string_format(path
, "/proc/%d/gid_map", pid
);
1339 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1340 "0 %lu %lu\n", subgid
->id
, subgid
->length
);
1342 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1343 "0 %lu 1\n%1 %lu %lu\n", gid
, subgid
->id
, subgid
->length
);
1347 ERROR(jail
->pakfire
, "Could not map GIDs: %m\n");
1354 static int pakfire_jail_setgroups(struct pakfire_jail
* jail
, pid_t pid
) {
1355 char path
[PATH_MAX
];
1359 r
= pakfire_string_format(path
, "/proc/%d/setgroups", pid
);
1363 // Open file for writing
1364 FILE* f
= fopen(path
, "w");
1366 ERROR(jail
->pakfire
, "Could not open %s for writing: %m\n", path
);
1371 int bytes_written
= fprintf(f
, "deny\n");
1372 if (bytes_written
<= 0) {
1373 ERROR(jail
->pakfire
, "Could not write to %s: %m\n", path
);
1380 ERROR(jail
->pakfire
, "Could not close %s: %m\n", path
);
1391 static int pakfire_jail_send_signal(struct pakfire_jail
* jail
, int fd
) {
1392 const uint64_t val
= 1;
1395 DEBUG(jail
->pakfire
, "Sending signal...\n");
1397 // Write to the file descriptor
1398 ssize_t bytes_written
= write(fd
, &val
, sizeof(val
));
1399 if (bytes_written
< 0 || (size_t)bytes_written
< sizeof(val
)) {
1400 ERROR(jail
->pakfire
, "Could not send signal: %m\n");
1404 // Close the file descriptor
1410 static int pakfire_jail_wait_for_signal(struct pakfire_jail
* jail
, int fd
) {
1414 DEBUG(jail
->pakfire
, "Waiting for signal...\n");
1416 ssize_t bytes_read
= read(fd
, &val
, sizeof(val
));
1417 if (bytes_read
< 0 || (size_t)bytes_read
< sizeof(val
)) {
1418 ERROR(jail
->pakfire
, "Error waiting for signal: %m\n");
1422 // Close the file descriptor
1429 Performs the initialisation that needs to happen in the parent part
1431 static int pakfire_jail_parent(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
1434 // Setup UID mapping
1435 r
= pakfire_jail_setup_uid_mapping(jail
, ctx
->pid
);
1439 // Write "deny" to /proc/PID/setgroups
1440 r
= pakfire_jail_setgroups(jail
, ctx
->pid
);
1444 // Setup GID mapping
1445 r
= pakfire_jail_setup_gid_mapping(jail
, ctx
->pid
);
1449 // Parent has finished initialisation
1450 DEBUG(jail
->pakfire
, "Parent has finished initialization\n");
1452 // Send signal to client
1453 r
= pakfire_jail_send_signal(jail
, ctx
->completed_fd
);
1460 static int pakfire_jail_child(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
,
1461 const char* argv
[]) {
1464 // Redirect any logging to our log pipe
1465 pakfire_set_log_callback(jail
->pakfire
, pakfire_jail_log
, &ctx
->pipes
);
1468 r
= prctl(PR_SET_PDEATHSIG
, SIGKILL
, 0, 0, 0);
1470 ERROR(jail
->pakfire
, "Could not configure to die with parent: %m\n");
1475 pid_t pid
= getpid();
1477 DEBUG(jail
->pakfire
, "Launched child process in jail with PID %d\n", pid
);
1479 // Wait for the parent to finish initialization
1480 r
= pakfire_jail_wait_for_signal(jail
, ctx
->completed_fd
);
1484 // Perform further initialization
1487 uid_t uid
= getuid();
1488 gid_t gid
= getgid();
1491 uid_t euid
= geteuid();
1492 gid_t egid
= getegid();
1494 DEBUG(jail
->pakfire
, " UID: %d (effective %d)\n", uid
, euid
);
1495 DEBUG(jail
->pakfire
, " GID: %d (effective %d)\n", gid
, egid
);
1497 // Check if we are (effectively running as root)
1498 if (uid
|| gid
|| euid
|| egid
) {
1499 ERROR(jail
->pakfire
, "Child process is not running as root\n");
1503 const char* root
= pakfire_get_path(jail
->pakfire
);
1504 const char* arch
= pakfire_get_arch(jail
->pakfire
);
1506 // Change root (unless root is /)
1507 if (!pakfire_on_root(jail
->pakfire
)) {
1509 r
= pakfire_jail_mount(jail
, ctx
);
1516 ERROR(jail
->pakfire
, "chroot() to %s failed: %m\n", root
);
1520 // Change directory to /
1523 ERROR(jail
->pakfire
, "chdir() after chroot() failed: %m\n");
1529 unsigned long persona
= pakfire_arch_personality(arch
);
1531 r
= personality(persona
);
1533 ERROR(jail
->pakfire
, "Could not set personality (%x)\n", (unsigned int)persona
);
1539 if (!pakfire_jail_exec_has_flag(ctx
, PAKFIRE_JAIL_HAS_NETWORKING
)) {
1540 r
= pakfire_jail_setup_loopback(jail
);
1547 DEBUG(jail
->pakfire
, "Setting nice level to %d\n", jail
->nice
);
1549 r
= setpriority(PRIO_PROCESS
, pid
, jail
->nice
);
1551 ERROR(jail
->pakfire
, "Could not set nice level: %m\n");
1556 // Close other end of log pipes
1557 close(ctx
->pipes
.log_INFO
[0]);
1558 close(ctx
->pipes
.log_ERROR
[0]);
1560 close(ctx
->pipes
.log_DEBUG
[0]);
1561 #endif /* ENABLE_DEBUG */
1563 // Connect standard input
1564 if (ctx
->pipes
.stdin
[0]) {
1565 r
= dup2(ctx
->pipes
.stdin
[0], STDIN_FILENO
);
1567 ERROR(jail
->pakfire
, "Could not connect fd %d to stdin: %m\n",
1568 ctx
->pipes
.stdin
[0]);
1574 // Connect standard output and error
1575 if (ctx
->pipes
.stdout
[1] && ctx
->pipes
.stderr
[1]) {
1576 r
= dup2(ctx
->pipes
.stdout
[1], STDOUT_FILENO
);
1578 ERROR(jail
->pakfire
, "Could not connect fd %d to stdout: %m\n",
1579 ctx
->pipes
.stdout
[1]);
1584 r
= dup2(ctx
->pipes
.stderr
[1], STDERR_FILENO
);
1586 ERROR(jail
->pakfire
, "Could not connect fd %d to stderr: %m\n",
1587 ctx
->pipes
.stderr
[1]);
1592 // Close the pipe (as we have moved the original file descriptors)
1593 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stdin
);
1594 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stdout
);
1595 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stderr
);
1598 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1599 r
= pakfire_rlimit_reset_nofile(jail
->pakfire
);
1603 // Drop capabilities
1604 r
= pakfire_jail_drop_capabilities(jail
);
1609 r
= pakfire_jail_limit_syscalls(jail
);
1613 DEBUG(jail
->pakfire
, "Child process initialization done\n");
1614 DEBUG(jail
->pakfire
, "Launching command:\n");
1617 for (unsigned int i
= 0; argv
[i
]; i
++)
1618 DEBUG(jail
->pakfire
, " argv[%d] = %s\n", i
, argv
[i
]);
1621 r
= execvpe(argv
[0], (char**)argv
, jail
->env
);
1623 // Translate errno into regular exit code
1626 // Ignore if the command doesn't exist
1627 if (ctx
->flags
& PAKFIRE_JAIL_NOENT_OK
)
1638 ERROR(jail
->pakfire
, "Could not execve(%s): %m\n", argv
[0]);
1641 // We should not get here
1645 // Run a command in the jail
1646 static int __pakfire_jail_exec(struct pakfire_jail
* jail
, const char* argv
[],
1647 const int interactive
,
1648 pakfire_jail_communicate_in communicate_in
,
1649 pakfire_jail_communicate_out communicate_out
,
1650 void* data
, int flags
) {
1654 // Check if argv is valid
1655 if (!argv
|| !argv
[0]) {
1660 // Send any output to the default logger if no callback is set
1661 if (!communicate_out
)
1662 communicate_out
= pakfire_jail_default_log_callback
;
1664 // Initialize context for this call
1665 struct pakfire_jail_exec ctx
= {
1675 .in
= communicate_in
,
1676 .out
= communicate_out
,
1681 DEBUG(jail
->pakfire
, "Executing jail...\n");
1683 // Enable networking in interactive mode
1685 ctx
.flags
|= PAKFIRE_JAIL_HAS_NETWORKING
;
1688 Setup a file descriptor which can be used to notify the client that the parent
1689 has completed configuration.
1691 ctx
.completed_fd
= eventfd(0, EFD_CLOEXEC
);
1692 if (ctx
.completed_fd
< 0) {
1693 ERROR(jail
->pakfire
, "eventfd() failed: %m\n");
1697 // Create pipes to communicate with child process if we are not running interactively
1699 // stdin (only if callback is set)
1700 if (ctx
.communicate
.in
) {
1701 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stdin
, 0);
1707 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stdout
, 0);
1712 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stderr
, 0);
1717 // Setup pipes for logging
1719 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_INFO
, O_CLOEXEC
);
1724 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_ERROR
, O_CLOEXEC
);
1730 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_DEBUG
, O_CLOEXEC
);
1733 #endif /* ENABLE_DEBUG */
1735 // Configure child process
1736 struct clone_args args
= {
1745 .exit_signal
= SIGCHLD
,
1746 .pidfd
= (long long unsigned int)&ctx
.pidfd
,
1749 // Launch the process in a cgroup that is a leaf of the configured cgroup
1751 args
.flags
|= CLONE_INTO_CGROUP
;
1754 const char* uuid
= pakfire_jail_uuid(jail
);
1756 // Create a temporary cgroup
1757 r
= pakfire_cgroup_child(&ctx
.cgroup
, jail
->cgroup
, uuid
, 0);
1759 ERROR(jail
->pakfire
, "Could not create cgroup for jail: %m\n");
1763 // Clone into this cgroup
1764 args
.cgroup
= pakfire_cgroup_fd(ctx
.cgroup
);
1768 if (!pakfire_jail_exec_has_flag(&ctx
, PAKFIRE_JAIL_HAS_NETWORKING
)) {
1769 args
.flags
|= CLONE_NEWNET
;
1772 // Fork this process
1773 ctx
.pid
= clone3(&args
, sizeof(args
));
1775 ERROR(jail
->pakfire
, "Could not clone: %m\n");
1779 } else if (ctx
.pid
== 0) {
1780 r
= pakfire_jail_child(jail
, &ctx
, argv
);
1785 r
= pakfire_jail_parent(jail
, &ctx
);
1789 DEBUG(jail
->pakfire
, "Waiting for PID %d to finish its work\n", ctx
.pid
);
1791 // Read output of the child process
1792 r
= pakfire_jail_wait(jail
, &ctx
);
1796 // Handle exit status
1797 switch (ctx
.status
.si_code
) {
1799 DEBUG(jail
->pakfire
, "The child process exited with code %d\n",
1800 ctx
.status
.si_status
);
1803 exit
= ctx
.status
.si_status
;
1807 ERROR(jail
->pakfire
, "The child process was killed\n");
1812 ERROR(jail
->pakfire
, "The child process terminated abnormally\n");
1815 // Log anything else
1817 ERROR(jail
->pakfire
, "Unknown child exit code: %d\n", ctx
.status
.si_code
);
1822 // Destroy the temporary cgroup (if any)
1824 // Read cgroup stats
1825 r
= pakfire_cgroup_stat(ctx
.cgroup
, &ctx
.cgroup_stats
);
1827 ERROR(jail
->pakfire
, "Could not read cgroup stats: %m\n");
1829 pakfire_cgroup_stat_dump(ctx
.cgroup
, &ctx
.cgroup_stats
);
1832 pakfire_cgroup_destroy(ctx
.cgroup
);
1833 pakfire_cgroup_unref(ctx
.cgroup
);
1836 // Close any file descriptors
1837 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stdin
);
1838 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stdout
);
1839 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stderr
);
1842 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_INFO
);
1843 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_ERROR
);
1844 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_DEBUG
);
1849 PAKFIRE_EXPORT
int pakfire_jail_exec(
1850 struct pakfire_jail
* jail
,
1852 pakfire_jail_communicate_in callback_in
,
1853 pakfire_jail_communicate_out callback_out
,
1854 void* data
, int flags
) {
1855 return __pakfire_jail_exec(jail
, argv
, 0, callback_in
, callback_out
, data
, flags
);
1858 static int pakfire_jail_exec_interactive(
1859 struct pakfire_jail
* jail
, const char* argv
[], int flags
) {
1862 // Setup interactive stuff
1863 r
= pakfire_jail_setup_interactive_env(jail
);
1867 return __pakfire_jail_exec(jail
, argv
, 1, NULL
, NULL
, NULL
, flags
);
1870 int pakfire_jail_exec_script(struct pakfire_jail
* jail
,
1874 pakfire_jail_communicate_in callback_in
,
1875 pakfire_jail_communicate_out callback_out
,
1877 char path
[PATH_MAX
];
1878 const char** argv
= NULL
;
1882 const char* root
= pakfire_get_path(jail
->pakfire
);
1884 // Write the scriptlet to disk
1885 r
= pakfire_path_join(path
, root
, PAKFIRE_TMP_DIR
"/pakfire-script.XXXXXX");
1889 // Create a temporary file
1890 f
= pakfire_mktemp(path
, 0700);
1892 ERROR(jail
->pakfire
, "Could not create temporary file: %m\n");
1896 DEBUG(jail
->pakfire
, "Writing script to %s:\n%.*s\n", path
, (int)size
, script
);
1899 r
= fprintf(f
, "%s", script
);
1901 ERROR(jail
->pakfire
, "Could not write script to file %s: %m\n", path
);
1908 ERROR(jail
->pakfire
, "Could not close script file %s: %m\n", path
);
1914 // Count how many arguments were passed
1915 unsigned int argc
= 1;
1917 for (const char** arg
= args
; *arg
; arg
++)
1921 argv
= calloc(argc
+ 1, sizeof(*argv
));
1923 ERROR(jail
->pakfire
, "Could not allocate argv: %m\n");
1928 argv
[0] = (root
) ? pakfire_path_relpath(root
, path
) : path
;
1931 for (unsigned int i
= 1; i
< argc
; i
++)
1932 argv
[i
] = args
[i
-1];
1935 r
= pakfire_jail_exec(jail
, argv
, callback_in
, callback_out
, data
, 0);
1943 // Remove script from disk
1951 A convenience function that creates a new jail, runs the given command and destroys
1954 int pakfire_jail_run(struct pakfire
* pakfire
, const char* argv
[], int flags
, char** output
) {
1955 struct pakfire_jail
* jail
= NULL
;
1958 // Create a new jail
1959 r
= pakfire_jail_create(&jail
, pakfire
);
1963 // Execute the command
1964 r
= pakfire_jail_exec(jail
, argv
, NULL
, pakfire_jail_capture_stdout
, output
, 0);
1968 pakfire_jail_unref(jail
);
1973 int pakfire_jail_run_script(struct pakfire
* pakfire
,
1974 const char* script
, const size_t length
, const char* argv
[], int flags
) {
1975 struct pakfire_jail
* jail
= NULL
;
1978 // Create a new jail
1979 r
= pakfire_jail_create(&jail
, pakfire
);
1983 // Execute the command
1984 r
= pakfire_jail_exec_script(jail
, script
, length
, argv
, NULL
, NULL
, NULL
);
1988 pakfire_jail_unref(jail
);
1993 int pakfire_jail_shell(struct pakfire_jail
* jail
) {
1994 const char* argv
[] = {
1995 "/bin/bash", "--login", NULL
,
1998 // Execute /bin/bash
1999 return pakfire_jail_exec_interactive(jail
, argv
, 0);
2002 static int pakfire_jail_run_if_possible(struct pakfire
* pakfire
, const char** argv
) {
2003 char path
[PATH_MAX
];
2006 r
= pakfire_path(pakfire
, path
, "%s", *argv
);
2010 // Check if the file is executable
2011 r
= access(path
, X_OK
);
2013 DEBUG(pakfire
, "%s is not executable. Skipping...\n", *argv
);
2017 return pakfire_jail_run(pakfire
, argv
, 0, NULL
);
2020 int pakfire_jail_ldconfig(struct pakfire
* pakfire
) {
2021 const char* argv
[] = {
2026 return pakfire_jail_run_if_possible(pakfire
, argv
);
2029 int pakfire_jail_run_systemd_tmpfiles(struct pakfire
* pakfire
) {
2030 const char* argv
[] = {
2031 "/usr/bin/systemd-tmpfiles",
2036 return pakfire_jail_run_if_possible(pakfire
, argv
);