1 /*#############################################################################
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
19 #############################################################################*/
22 #include <linux/capability.h>
23 #include <linux/fcntl.h>
24 #include <linux/sched.h>
25 #include <linux/wait.h>
30 #include <sys/capability.h>
31 #include <sys/epoll.h>
32 #include <sys/eventfd.h>
33 #include <sys/personality.h>
34 #include <sys/prctl.h>
35 #include <sys/resource.h>
36 #include <sys/types.h>
45 #include <pakfire/arch.h>
46 #include <pakfire/cgroup.h>
47 #include <pakfire/jail.h>
48 #include <pakfire/logging.h>
49 #include <pakfire/mount.h>
50 #include <pakfire/pakfire.h>
51 #include <pakfire/private.h>
52 #include <pakfire/pwd.h>
53 #include <pakfire/util.h>
55 #define BUFFER_SIZE 1024 * 64
56 #define ENVIRON_SIZE 128
57 #define EPOLL_MAX_EVENTS 2
59 // The default environment that will be set for every command
60 static const struct environ
{
64 { "LANG", "en_US.utf-8" },
70 struct pakfire
* pakfire
;
73 // A unique ID for each jail
75 char __uuid
[UUID_STR_LEN
];
84 struct pakfire_cgroup
* cgroup
;
87 char* env
[ENVIRON_SIZE
];
90 pakfire_jail_log_callback log_callback
;
94 struct pakfire_log_buffer
{
95 char data
[BUFFER_SIZE
];
99 struct pakfire_jail_exec
{
100 // PID (of the child)
104 // Process status (from waitid)
107 // FD to notify the client that the parent has finished initialization
111 struct pakfire_jail_pipes
{
122 struct pakfire_jail_buffers
{
123 struct pakfire_log_buffer stdout
;
124 struct pakfire_log_buffer stderr
;
127 struct pakfire_log_buffer log_INFO
;
128 struct pakfire_log_buffer log_ERROR
;
129 struct pakfire_log_buffer log_DEBUG
;
132 struct pakfire_cgroup
* cgroup
;
133 struct pakfire_cgroup_stats cgroup_stats
;
136 static int clone3(struct clone_args
* args
, size_t size
) {
137 return syscall(__NR_clone3
, args
, size
);
140 static void pakfire_jail_free(struct pakfire_jail
* jail
) {
141 DEBUG(jail
->pakfire
, "Freeing jail at %p\n", jail
);
144 for (unsigned int i
= 0; jail
->env
[i
]; i
++)
148 pakfire_cgroup_unref(jail
->cgroup
);
150 pakfire_unref(jail
->pakfire
);
155 Passes any log messages on to the default pakfire log callback
157 static int pakfire_jail_default_log_callback(struct pakfire
* pakfire
, void* data
,
158 int priority
, const char* line
, size_t length
) {
161 INFO(pakfire
, "%s", line
);
165 ERROR(pakfire
, "%s", line
);
170 DEBUG(pakfire
, "%s", line
);
178 static int pakfire_jail_setup_interactive_env(struct pakfire_jail
* jail
) {
180 int r
= pakfire_jail_set_env(jail
, "PS1", "pakfire-jail \\w> ");
185 char* TERM
= secure_getenv("TERM");
187 r
= pakfire_jail_set_env(jail
, "TERM", TERM
);
193 char* LANG
= secure_getenv("LANG");
195 r
= pakfire_jail_set_env(jail
, "LANG", LANG
);
203 PAKFIRE_EXPORT
int pakfire_jail_create(struct pakfire_jail
** jail
,
204 struct pakfire
* pakfire
, int flags
) {
207 // Allocate a new jail
208 struct pakfire_jail
* j
= calloc(1, sizeof(*j
));
213 j
->pakfire
= pakfire_ref(pakfire
);
215 // Initialize reference counter
221 // Generate a random UUID
222 uuid_generate_random(j
->uuid
);
224 DEBUG(j
->pakfire
, "Allocated new jail at %p\n", j
);
226 // Set default log callback
227 r
= pakfire_jail_set_log_callback(j
, pakfire_jail_default_log_callback
, NULL
);
231 // Set default environment
232 for (const struct environ
* e
= ENV
; e
->key
; e
++) {
233 r
= pakfire_jail_set_env(j
, e
->key
, e
->val
);
238 // Setup interactive stuff
239 if (j
->flags
& PAKFIRE_JAIL_INTERACTIVE
) {
240 r
= pakfire_jail_setup_interactive_env(j
);
250 pakfire_jail_free(j
);
255 PAKFIRE_EXPORT
struct pakfire_jail
* pakfire_jail_ref(struct pakfire_jail
* jail
) {
261 PAKFIRE_EXPORT
struct pakfire_jail
* pakfire_jail_unref(struct pakfire_jail
* jail
) {
262 if (--jail
->nrefs
> 0)
265 pakfire_jail_free(jail
);
269 static int pakfire_jail_has_flag(struct pakfire_jail
* jail
, int flag
) {
270 return jail
->flags
& flag
;
273 static const char* pakfire_jail_uuid(struct pakfire_jail
* jail
) {
275 uuid_unparse_lower(jail
->uuid
, jail
->__uuid
);
282 PAKFIRE_EXPORT
int pakfire_jail_nice(struct pakfire_jail
* jail
, int nice
) {
283 // Check if nice level is in range
284 if (nice
< -19 || nice
> 20) {
295 int pakfire_jail_set_cgroup(struct pakfire_jail
* jail
, struct pakfire_cgroup
* cgroup
) {
296 // Free any previous cgroup
298 pakfire_cgroup_unref(jail
->cgroup
);
302 // Set any new cgroup
304 DEBUG(jail
->pakfire
, "Setting cgroup %p\n", cgroup
);
306 jail
->cgroup
= pakfire_cgroup_ref(cgroup
);
315 // Returns the length of the environment
316 static unsigned int pakfire_jail_env_length(struct pakfire_jail
* jail
) {
319 // Count everything in the environment
320 for (char** e
= jail
->env
; *e
; e
++)
326 // Finds an existing environment variable and returns its index or -1 if not found
327 static int pakfire_jail_find_env(struct pakfire_jail
* jail
, const char* key
) {
333 char buffer
[strlen(key
) + 2];
334 pakfire_string_format(buffer
, "%s=", key
);
336 for (unsigned int i
= 0; jail
->env
[i
]; i
++) {
337 if (pakfire_string_startswith(jail
->env
[i
], buffer
))
345 // Returns the value of an environment variable or NULL
346 PAKFIRE_EXPORT
const char* pakfire_jail_get_env(struct pakfire_jail
* jail
,
348 int i
= pakfire_jail_find_env(jail
, key
);
352 return jail
->env
[i
] + strlen(key
) + 1;
355 // Sets an environment variable
356 PAKFIRE_EXPORT
int pakfire_jail_set_env(struct pakfire_jail
* jail
,
357 const char* key
, const char* value
) {
358 // Find the index where to write this value to
359 int i
= pakfire_jail_find_env(jail
, key
);
361 i
= pakfire_jail_env_length(jail
);
363 // Return -ENOSPC when the environment is full
364 if (i
>= ENVIRON_SIZE
) {
369 // Free any previous value
373 // Format and set environment variable
374 asprintf(&jail
->env
[i
], "%s=%s", key
, value
);
376 DEBUG(jail
->pakfire
, "Set environment variable: %s\n", jail
->env
[i
]);
381 // Imports an environment
382 PAKFIRE_EXPORT
int pakfire_jail_import_env(struct pakfire_jail
* jail
, const char* env
[]) {
390 // Copy environment variables
391 for (unsigned int i
= 0; env
[i
]; i
++) {
392 r
= pakfire_string_partition(env
[i
], "=", &key
, &val
);
397 r
= pakfire_jail_set_env(jail
, key
, val
);
414 PAKFIRE_EXPORT
int pakfire_jail_set_log_callback(struct pakfire_jail
* jail
,
415 pakfire_jail_log_callback callback
, void* data
) {
416 jail
->log_callback
= callback
;
417 jail
->log_data
= data
;
423 This function replaces any logging in the child process.
425 All log messages will be sent to the parent process through their respective pipes.
427 static void pakfire_jail_log(void* data
, int priority
, const char* file
,
428 int line
, const char* fn
, const char* format
, va_list args
) {
429 struct pakfire_jail_pipes
* pipes
= (struct pakfire_jail_pipes
*)data
;
434 fd
= pipes
->log_INFO
[1];
438 fd
= pipes
->log_ERROR
[1];
443 fd
= pipes
->log_DEBUG
[1];
445 #endif /* ENABLE_DEBUG */
447 // Ignore any messages of an unknown priority
452 // Send the log message
454 vdprintf(fd
, format
, args
);
457 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer
* buffer
) {
458 return (sizeof(buffer
->data
) == buffer
->used
);
462 This function reads as much data as it can from the file descriptor.
463 If it finds a whole line in it, it will send it to the logger and repeat the process.
464 If not newline character is found, it will try to read more data until it finds one.
466 static int pakfire_jail_handle_log(struct pakfire_jail
* jail
,
467 struct pakfire_jail_exec
* ctx
, int priority
, int fd
,
468 struct pakfire_log_buffer
* buffer
, pakfire_jail_log_callback callback
, void* data
) {
469 char line
[BUFFER_SIZE
+ 1];
471 // Fill up buffer from fd
472 if (buffer
->used
< sizeof(buffer
->data
)) {
473 ssize_t bytes_read
= read(fd
, buffer
->data
+ buffer
->used
,
474 sizeof(buffer
->data
) - buffer
->used
);
477 if (bytes_read
< 0) {
478 ERROR(jail
->pakfire
, "Could not read from fd %d: %m\n", fd
);
482 // Update buffer size
483 buffer
->used
+= bytes_read
;
486 // See if we have any lines that we can write
487 while (buffer
->used
) {
488 // Search for the end of the first line
489 char* eol
= memchr(buffer
->data
, '\n', buffer
->used
);
493 // If the buffer is full, we send the content to the logger and try again
494 // This should not happen in practise
495 if (pakfire_jail_log_buffer_is_full(buffer
)) {
496 DEBUG(jail
->pakfire
, "Logging buffer is full. Sending all content\n");
498 eol
= buffer
->data
+ sizeof(buffer
->data
) - 1;
500 // Otherwise we might have only read parts of the output
505 // Find the length of the string
506 size_t length
= eol
- buffer
->data
+ 1;
508 // Copy the line into the buffer
509 memcpy(line
, buffer
->data
, length
);
511 // Terminate the string
516 int r
= callback(jail
->pakfire
, data
, priority
, line
, length
);
518 ERROR(jail
->pakfire
, "The logging callback returned an error: %d\n", r
);
523 // Remove line from buffer
524 memmove(buffer
->data
, buffer
->data
+ length
, buffer
->used
- length
);
525 buffer
->used
-= length
;
531 static int pakfire_jail_setup_pipe(struct pakfire_jail
* jail
, int (*fds
)[2], const int flags
) {
532 int r
= pipe2(*fds
, flags
);
534 ERROR(jail
->pakfire
, "Could not setup pipe: %m\n");
541 static void pakfire_jail_close_pipe(struct pakfire_jail
* jail
, int fds
[2]) {
542 for (unsigned int i
= 0; i
< 2; i
++)
548 This is a convenience function to fetch the reading end of a pipe and
549 closes the write end.
551 static int pakfire_jail_get_pipe(struct pakfire_jail
* jail
, int (*fds
)[2]) {
552 // Give the variables easier names to avoid confusion
553 int* fd_read
= &(*fds
)[0];
554 int* fd_write
= &(*fds
)[1];
556 // Close the write end of the pipe
562 // Return the read end
566 static int pakfire_jail_wait(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
568 struct epoll_event ev
;
569 struct epoll_event events
[EPOLL_MAX_EVENTS
];
572 // Fetch file descriptors from context
573 const int stdout
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.stdout
);
574 const int stderr
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.stderr
);
575 const int pidfd
= ctx
->pidfd
;
578 const int log_INFO
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.log_INFO
);
579 const int log_ERROR
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.log_ERROR
);
580 const int log_DEBUG
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.log_DEBUG
);
582 // Make a list of all file descriptors we are interested in
584 stdout
, stderr
, pidfd
, log_INFO
, log_ERROR
, log_DEBUG
,
588 epollfd
= epoll_create1(0);
590 ERROR(jail
->pakfire
, "Could not initialize epoll(): %m\n");
597 // Turn file descriptors into non-blocking mode and add them to epoll()
598 for (unsigned int i
= 0; i
< sizeof(fds
) / sizeof(*fds
); i
++) {
601 // Skip fds which were not initialized
607 if (epoll_ctl(epollfd
, EPOLL_CTL_ADD
, fd
, &ev
) < 0) {
608 ERROR(jail
->pakfire
, "Could not add file descriptor %d to epoll(): %m\n", fd
);
616 // Loop for as long as the process is alive
618 int num
= epoll_wait(epollfd
, events
, EPOLL_MAX_EVENTS
, -1);
620 // Ignore if epoll_wait() has been interrupted
624 ERROR(jail
->pakfire
, "epoll_wait() failed: %m\n");
630 for (int i
= 0; i
< num
; i
++) {
631 int fd
= events
[i
].data
.fd
;
633 struct pakfire_log_buffer
* buffer
= NULL
;
634 pakfire_jail_log_callback callback
= NULL
;
638 // Handle any changes to the PIDFD
640 // Call waidid() and store the result
641 r
= waitid(P_PIDFD
, ctx
->pidfd
, &ctx
->status
, WEXITED
);
643 ERROR(jail
->pakfire
, "waitid() failed: %m\n");
647 // Mark that we have ended so that we will process the remaining
648 // events from epoll() now, but won't restart the outer loop.
652 // Handle logging messages
653 } else if (fd
== log_INFO
) {
654 buffer
= &ctx
->buffers
.log_INFO
;
657 callback
= pakfire_jail_default_log_callback
;
659 } else if (fd
== log_ERROR
) {
660 buffer
= &ctx
->buffers
.log_ERROR
;
663 callback
= pakfire_jail_default_log_callback
;
665 } else if (fd
== log_DEBUG
) {
666 buffer
= &ctx
->buffers
.log_DEBUG
;
667 priority
= LOG_DEBUG
;
669 callback
= pakfire_jail_default_log_callback
;
671 // Handle anything from the log pipes
672 } else if (fd
== stdout
) {
673 buffer
= &ctx
->buffers
.stdout
;
676 callback
= jail
->log_callback
;
677 data
= jail
->log_data
;
679 } else if (fd
== stderr
) {
680 buffer
= &ctx
->buffers
.stderr
;
683 callback
= jail
->log_callback
;
684 data
= jail
->log_data
;
687 DEBUG(jail
->pakfire
, "Received invalid file descriptor %d\n", fd
);
692 r
= pakfire_jail_handle_log(jail
, ctx
, priority
, fd
, buffer
, callback
, data
);
705 static int pakfire_jail_capture_stdout(struct pakfire
* pakfire
, void* data
, int priority
,
706 const char* line
, size_t length
) {
707 char** output
= (char**)data
;
710 // Append everything from stdout to a buffer
711 if (priority
== LOG_INFO
) {
712 r
= asprintf(output
, "%s%s", (output
&& *output
) ? *output
: "", line
);
718 // Send everything else to the default logger
719 return pakfire_jail_default_log_callback(pakfire
, NULL
, priority
, line
, length
);
724 static int pakfire_jail_drop_capabilities(struct pakfire_jail
* jail
) {
725 const int capabilities
[] = {
726 // Deny access to the kernel's audit system
731 // Deny suspending block devices
734 // Deny any stuff with BPF
737 // Deny checkpoint restore
738 CAP_CHECKPOINT_RESTORE
,
740 // Deny opening files by inode number (open_by_handle_at)
743 // Deny setting SUID bits
746 // Deny locking more memory
749 // Deny modifying any Apparmor/SELinux/SMACK configuration
753 // Deny creating any special devices
756 // Deny setting any capabilities
759 // Deny reading from syslog
762 // Deny any admin actions (mount, sethostname, ...)
765 // Deny rebooting the system
768 // Deny loading kernel modules
771 // Deny setting nice level
774 // Deny access to /proc/kcore, /dev/mem, /dev/kmem
777 // Deny circumventing any resource limits
780 // Deny setting the system time
783 // Deny playing with suspend
789 DEBUG(jail
->pakfire
, "Dropping capabilities...\n");
794 // Drop any capabilities
795 for (const int* cap
= capabilities
; *cap
; cap
++) {
796 r
= prctl(PR_CAPBSET_DROP
, *cap
, 0, 0, 0);
798 ERROR(jail
->pakfire
, "Could not drop capability %d: %m\n", *cap
);
805 // Fetch any capabilities
806 cap_t caps
= cap_get_proc();
808 ERROR(jail
->pakfire
, "Could not read capabilities: %m\n");
813 Set inheritable capabilities
815 This ensures that no processes will be able to gain any of the listed
818 r
= cap_set_flag(caps
, CAP_INHERITABLE
, num_caps
, capabilities
, CAP_CLEAR
);
820 ERROR(jail
->pakfire
, "cap_set_flag() failed: %m\n");
824 // Restore capabilities
825 r
= cap_set_proc(caps
);
827 ERROR(jail
->pakfire
, "Could not restore capabilities: %m\n");
840 static int pakfire_jail_limit_syscalls(struct pakfire_jail
* jail
) {
841 const int syscalls
[] = {
842 // The kernel's keyring isn't namespaced
845 SCMP_SYS(request_key
),
847 // Disable userfaultfd
848 SCMP_SYS(userfaultfd
),
850 // Disable perf which could leak a lot of information about the host
851 SCMP_SYS(perf_event_open
),
857 DEBUG(jail
->pakfire
, "Applying syscall filter...\n");
859 // Setup a syscall filter which allows everything by default
860 scmp_filter_ctx ctx
= seccomp_init(SCMP_ACT_ALLOW
);
862 ERROR(jail
->pakfire
, "Could not setup seccomp filter: %m\n");
867 for (const int* syscall
= syscalls
; *syscall
; syscall
++) {
868 r
= seccomp_rule_add(ctx
, SCMP_ACT_ERRNO(EPERM
), *syscall
, 0);
870 ERROR(jail
->pakfire
, "Could not configure syscall %d: %m\n", *syscall
);
875 // Load syscall filter into the kernel
876 r
= seccomp_load(ctx
);
878 ERROR(jail
->pakfire
, "Could not load syscall filter into the kernel: %m\n");
884 seccomp_release(ctx
);
891 static int pakfire_jail_write_uidgid_mapping(struct pakfire_jail
* jail
,
892 const char* path
, const struct pakfire_subid
* subid
) {
895 // Open file for writing
896 FILE* f
= fopen(path
, "w");
898 ERROR(jail
->pakfire
, "Could not open %s for writing: %m\n", path
);
902 // Write configuration
903 int bytes_written
= fprintf(f
, "%d %u %lu\n", 0, subid
->id
, subid
->length
);
904 if (bytes_written
<= 0) {
905 ERROR(jail
->pakfire
, "Could not write UID/GID mapping: %m\n");
913 ERROR(jail
->pakfire
, "Could not write UID/GID mapping: %m\n");
928 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail
* jail
, pid_t pid
) {
932 // Skip mapping anything when running on /
933 if (pakfire_on_root(jail
->pakfire
))
937 const struct pakfire_subid
* subuid
= pakfire_subuid(jail
->pakfire
);
942 r
= pakfire_string_format(path
, "/proc/%d/uid_map", pid
);
946 DEBUG(jail
->pakfire
, "Mapping UID range (%u - %lu)\n",
947 subuid
->id
, subuid
->id
+ subuid
->length
);
949 return pakfire_jail_write_uidgid_mapping(jail
, path
, subuid
);
952 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail
* jail
, pid_t pid
) {
956 // Skip mapping anything when running on /
957 if (pakfire_on_root(jail
->pakfire
))
961 const struct pakfire_subid
* subgid
= pakfire_subgid(jail
->pakfire
);
966 r
= pakfire_string_format(path
, "/proc/%d/gid_map", pid
);
970 DEBUG(jail
->pakfire
, "Mapping GID range (%u - %lu)\n",
971 subgid
->id
, subgid
->id
+ subgid
->length
);
973 return pakfire_jail_write_uidgid_mapping(jail
, path
, subgid
);
976 static int pakfire_jail_setgroups(struct pakfire_jail
* jail
, pid_t pid
) {
981 r
= pakfire_string_format(path
, "/proc/%d/setgroups", pid
);
985 // Open file for writing
986 FILE* f
= fopen(path
, "w");
988 ERROR(jail
->pakfire
, "Could not open %s for writing: %m\n", path
);
993 int bytes_written
= fprintf(f
, "deny\n");
994 if (bytes_written
<= 0) {
995 ERROR(jail
->pakfire
, "Could not write to %s: %m\n", path
);
1002 ERROR(jail
->pakfire
, "Could not close %s: %m\n", path
);
1013 static int pakfire_jail_send_signal(struct pakfire_jail
* jail
, int fd
) {
1014 const uint64_t val
= 1;
1017 DEBUG(jail
->pakfire
, "Sending signal...\n");
1019 // Write to the file descriptor
1020 ssize_t bytes_written
= write(fd
, &val
, sizeof(val
));
1021 if (bytes_written
< 0 || (size_t)bytes_written
< sizeof(val
)) {
1022 ERROR(jail
->pakfire
, "Could not send signal: %m\n");
1026 // Close the file descriptor
1032 static int pakfire_jail_wait_for_signal(struct pakfire_jail
* jail
, int fd
) {
1036 DEBUG(jail
->pakfire
, "Waiting for signal...\n");
1038 ssize_t bytes_read
= read(fd
, &val
, sizeof(val
));
1039 if (bytes_read
< 0 || (size_t)bytes_read
< sizeof(val
)) {
1040 ERROR(jail
->pakfire
, "Error waiting for signal: %m\n");
1044 // Close the file descriptor
1051 Performs the initialisation that needs to happen in the parent part
1053 static int pakfire_jail_parent(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
1056 // Write "deny" to /proc/PID/setgroups
1057 r
= pakfire_jail_setgroups(jail
, ctx
->pid
);
1061 // Setup UID mapping
1062 r
= pakfire_jail_setup_uid_mapping(jail
, ctx
->pid
);
1066 // Setup GID mapping
1067 r
= pakfire_jail_setup_gid_mapping(jail
, ctx
->pid
);
1071 // Parent has finished initialisation
1072 DEBUG(jail
->pakfire
, "Parent has finished initialization\n");
1074 // Send signal to client
1075 r
= pakfire_jail_send_signal(jail
, ctx
->completed_fd
);
1082 static int pakfire_jail_child(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
,
1083 const char* argv
[]) {
1086 // Redirect any logging to our log pipe
1087 pakfire_set_log_callback(jail
->pakfire
, pakfire_jail_log
, &ctx
->pipes
);
1090 r
= prctl(PR_SET_PDEATHSIG
, SIGKILL
, 0, 0, 0);
1092 ERROR(jail
->pakfire
, "Could not configure to die with parent: %m\n");
1097 pid_t pid
= getpid();
1099 DEBUG(jail
->pakfire
, "Launched child process in jail with PID %d\n", pid
);
1102 for (unsigned int i
= 0; argv
[i
]; i
++)
1103 DEBUG(jail
->pakfire
, " argv[%d] = %s\n", i
, argv
[i
]);
1105 // Wait for the parent to finish initialization
1106 r
= pakfire_jail_wait_for_signal(jail
, ctx
->completed_fd
);
1110 // Perform further initialization
1113 uid_t uid
= getuid();
1114 gid_t gid
= getgid();
1117 uid_t euid
= geteuid();
1118 gid_t egid
= getegid();
1120 DEBUG(jail
->pakfire
, " UID: %d (effective %d)\n", uid
, euid
);
1121 DEBUG(jail
->pakfire
, " GID: %d (effective %d)\n", gid
, egid
);
1123 // Check if we are (effectively running as root)
1124 if (uid
|| gid
|| euid
|| egid
) {
1125 ERROR(jail
->pakfire
, "Child process is not running as root\n");
1129 const char* root
= pakfire_get_path(jail
->pakfire
);
1130 const char* arch
= pakfire_get_arch(jail
->pakfire
);
1132 // Change root (unless root is /)
1133 if (!pakfire_on_root(jail
->pakfire
)) {
1135 r
= pakfire_mount_all(jail
->pakfire
);
1139 // Log all mountpoints
1140 pakfire_mount_list(jail
->pakfire
);
1145 ERROR(jail
->pakfire
, "chroot() to %s failed: %m\n", root
);
1149 // Change directory to /
1152 ERROR(jail
->pakfire
, "chdir() after chroot() failed: %m\n");
1158 unsigned long persona
= pakfire_arch_personality(arch
);
1160 r
= personality(persona
);
1162 ERROR(jail
->pakfire
, "Could not set personality (%x)\n", (unsigned int)persona
);
1169 DEBUG(jail
->pakfire
, "Setting nice level to %d\n", jail
->nice
);
1171 r
= setpriority(PRIO_PROCESS
, pid
, jail
->nice
);
1173 ERROR(jail
->pakfire
, "Could not set nice level: %m\n");
1178 // Close other end of log pipes
1179 close(ctx
->pipes
.log_INFO
[0]);
1180 close(ctx
->pipes
.log_ERROR
[0]);
1182 close(ctx
->pipes
.log_DEBUG
[0]);
1183 #endif /* ENABLE_DEBUG */
1185 // Connect standard output and error
1186 if (ctx
->pipes
.stdout
[1] && ctx
->pipes
.stderr
[1]) {
1187 r
= dup2(ctx
->pipes
.stdout
[1], STDOUT_FILENO
);
1189 ERROR(jail
->pakfire
, "Could not connect fd %d to stdout: %m\n",
1190 ctx
->pipes
.stdout
[1]);
1195 r
= dup2(ctx
->pipes
.stderr
[1], STDERR_FILENO
);
1197 ERROR(jail
->pakfire
, "Could not connect fd %d to stderr: %m\n",
1198 ctx
->pipes
.stderr
[1]);
1203 // Close the pipe (as we have moved the original file descriptors)
1204 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stdout
);
1205 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stderr
);
1208 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1209 r
= pakfire_rlimit_reset_nofile(jail
->pakfire
);
1213 // Drop capabilities
1214 r
= pakfire_jail_drop_capabilities(jail
);
1219 r
= pakfire_jail_limit_syscalls(jail
);
1224 r
= execvpe(argv
[0], (char**)argv
, jail
->env
);
1226 ERROR(jail
->pakfire
, "Could not execve(): %m\n");
1228 // Translate errno into regular exit code
1238 // We should not get here
1242 // Run a command in the jail
1243 static int __pakfire_jail_exec(struct pakfire_jail
* jail
, const char* argv
[]) {
1247 // Check if argv is valid
1248 if (!argv
|| !argv
[0]) {
1253 // Initialize context for this call
1254 struct pakfire_jail_exec ctx
= {
1261 DEBUG(jail
->pakfire
, "Executing jail...\n");
1264 Setup a file descriptor which can be used to notify the client that the parent
1265 has completed configuration.
1267 ctx
.completed_fd
= eventfd(0, EFD_CLOEXEC
);
1268 if (ctx
.completed_fd
< 0) {
1269 ERROR(jail
->pakfire
, "eventfd() failed: %m\n");
1273 // Create pipes to communicate with child process if we are not running interactively
1274 if (!pakfire_jail_has_flag(jail
, PAKFIRE_JAIL_INTERACTIVE
)) {
1276 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stdout
, 0);
1281 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stderr
, 0);
1286 // Setup pipes for logging
1288 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_INFO
, O_CLOEXEC
);
1293 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_ERROR
, O_CLOEXEC
);
1299 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_DEBUG
, O_CLOEXEC
);
1302 #endif /* ENABLE_DEBUG */
1304 // Configure child process
1305 struct clone_args args
= {
1314 .exit_signal
= SIGCHLD
,
1315 .pidfd
= (long long unsigned int)&ctx
.pidfd
,
1318 // Launch the process in a cgroup that is a leaf of the configured cgroup
1320 args
.flags
|= CLONE_INTO_CGROUP
;
1323 const char* uuid
= pakfire_jail_uuid(jail
);
1325 // Create a temporary cgroup
1326 r
= pakfire_cgroup_child(&ctx
.cgroup
, jail
->cgroup
, uuid
, 0);
1328 ERROR(jail
->pakfire
, "Could not create cgroup for jail: %m\n");
1332 // Clone into this cgroup
1333 args
.cgroup
= pakfire_cgroup_fd(ctx
.cgroup
);
1336 // Fork this process
1337 ctx
.pid
= clone3(&args
, sizeof(args
));
1339 ERROR(jail
->pakfire
, "Could not clone: %m\n");
1343 } else if (ctx
.pid
== 0) {
1344 r
= pakfire_jail_child(jail
, &ctx
, argv
);
1349 r
= pakfire_jail_parent(jail
, &ctx
);
1353 DEBUG(jail
->pakfire
, "Waiting for PID %d to finish its work\n", ctx
.pid
);
1355 // Read output of the child process
1356 r
= pakfire_jail_wait(jail
, &ctx
);
1360 // Handle exit status
1361 switch (ctx
.status
.si_code
) {
1363 DEBUG(jail
->pakfire
, "The child process exited with code %d\n",
1364 ctx
.status
.si_status
);
1367 exit
= ctx
.status
.si_status
;
1372 ERROR(jail
->pakfire
, "The child process was killed\n");
1375 // Log anything else
1377 ERROR(jail
->pakfire
, "Unknown child exit code: %d\n", ctx
.status
.si_code
);
1382 // Destroy the temporary cgroup (if any)
1384 // Read cgroup stats
1385 r
= pakfire_cgroup_stat(ctx
.cgroup
, &ctx
.cgroup_stats
);
1387 ERROR(jail
->pakfire
, "Could not read cgroup stats: %m\n");
1389 pakfire_cgroup_stat_dump(ctx
.cgroup
, &ctx
.cgroup_stats
);
1392 pakfire_cgroup_destroy(ctx
.cgroup
);
1393 pakfire_cgroup_unref(ctx
.cgroup
);
1396 // Close any file descriptors
1397 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stdout
);
1398 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stderr
);
1401 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_INFO
);
1402 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_ERROR
);
1403 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_DEBUG
);
1406 // Umount everything
1407 if (!pakfire_on_root(jail
->pakfire
))
1408 pakfire_umount_all(jail
->pakfire
);
1414 PAKFIRE_EXPORT
int pakfire_jail_exec(struct pakfire_jail
* jail
,
1415 const char* argv
[], char** output
) {
1418 // Store logging callback
1419 pakfire_jail_log_callback log_callback
= jail
->log_callback
;
1420 void* log_data
= jail
->log_data
;
1422 // Capture output if requested by user
1424 pakfire_jail_set_log_callback(jail
, pakfire_jail_capture_stdout
, output
);
1427 r
= __pakfire_jail_exec(jail
, argv
);
1429 // Restore log callback
1430 pakfire_jail_set_log_callback(jail
, log_callback
, log_data
);
1435 PAKFIRE_EXPORT
int pakfire_jail_exec_script(struct pakfire_jail
* jail
,
1436 const char* script
, const size_t size
, const char* args
[], char** output
) {
1437 char path
[PATH_MAX
];
1438 const char** argv
= NULL
;
1441 const char* root
= pakfire_get_path(jail
->pakfire
);
1443 // Write the scriptlet to disk
1444 r
= pakfire_path_join(path
, root
, "pakfire-script.XXXXXX");
1448 // Open a temporary file
1449 int fd
= mkstemp(path
);
1451 ERROR(jail
->pakfire
, "Could not open a temporary file: %m\n");
1456 DEBUG(jail
->pakfire
, "Writing script to %s:\n%.*s\n", path
, (int)size
, script
);
1459 ssize_t bytes_written
= write(fd
, script
, size
);
1460 if (bytes_written
< (ssize_t
)size
) {
1461 ERROR(jail
->pakfire
, "Could not write script to file %s: %m\n", path
);
1466 // Make the script executable
1467 r
= fchmod(fd
, S_IRUSR
|S_IWUSR
|S_IXUSR
);
1469 ERROR(jail
->pakfire
, "Could not set executable permissions on %s: %m\n", path
);
1476 ERROR(jail
->pakfire
, "Could not close script file %s: %m\n", path
);
1481 // Count how many arguments were passed
1482 unsigned int argc
= 1;
1484 for (const char** arg
= args
; *arg
; arg
++)
1488 argv
= calloc(argc
+ 1, sizeof(*argv
));
1490 ERROR(jail
->pakfire
, "Could not allocate argv: %m\n");
1495 argv
[0] = (root
) ? pakfire_path_relpath(root
, path
) : path
;
1498 for (unsigned int i
= 1; i
< argc
; i
++)
1499 argv
[i
] = args
[i
-1];
1502 r
= pakfire_jail_exec(jail
, argv
, output
);
1508 // Remove script from disk
1516 A convenience function that creates a new jail, runs the given command and destroys
1519 int pakfire_jail_run(struct pakfire
* pakfire
, const char* argv
[], int flags
, char** output
) {
1520 struct pakfire_jail
* jail
= NULL
;
1523 // Create a new jail
1524 r
= pakfire_jail_create(&jail
, pakfire
, flags
);
1528 // Execute the command
1529 r
= pakfire_jail_exec(jail
, argv
, output
);
1533 pakfire_jail_unref(jail
);
1538 int pakfire_jail_run_script(struct pakfire
* pakfire
,
1539 const char* script
, const size_t length
, const char* argv
[], int flags
, char** output
) {
1540 struct pakfire_jail
* jail
= NULL
;
1543 // Create a new jail
1544 r
= pakfire_jail_create(&jail
, pakfire
, flags
);
1548 // Execute the command
1549 r
= pakfire_jail_exec_script(jail
, script
, length
, argv
, output
);
1553 pakfire_jail_unref(jail
);
1559 int pakfire_jail_shell(struct pakfire
* pakfire
) {
1560 const char* argv
[] = {
1561 "/bin/bash", "--login", NULL
,
1564 // Execute /bin/bash
1565 return pakfire_jail_run(pakfire
, argv
, PAKFIRE_JAIL_INTERACTIVE
, NULL
);
1568 int pakfire_jail_ldconfig(struct pakfire
* pakfire
) {
1569 char path
[PATH_MAX
];
1571 const char* ldconfig
= "/sbin/ldconfig";
1573 // Check if ldconfig exists before calling it to avoid overhead
1574 int r
= pakfire_make_path(pakfire
, path
, ldconfig
);
1578 // Check if ldconfig is executable
1579 r
= access(path
, X_OK
);
1581 DEBUG(pakfire
, "%s is not executable. Skipping...\n", ldconfig
);
1585 const char* argv
[] = {
1590 return pakfire_jail_run(pakfire
, argv
, 0, NULL
);