1 /*#############################################################################
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
19 #############################################################################*/
22 #include <linux/capability.h>
23 #include <linux/fcntl.h>
24 #include <linux/sched.h>
25 #include <linux/wait.h>
31 #include <sys/capability.h>
32 #include <sys/epoll.h>
33 #include <sys/eventfd.h>
34 #include <sys/personality.h>
35 #include <sys/prctl.h>
36 #include <sys/resource.h>
37 #include <sys/types.h>
46 #include <pakfire/arch.h>
47 #include <pakfire/cgroup.h>
48 #include <pakfire/jail.h>
49 #include <pakfire/logging.h>
50 #include <pakfire/mount.h>
51 #include <pakfire/pakfire.h>
52 #include <pakfire/private.h>
53 #include <pakfire/pwd.h>
54 #include <pakfire/string.h>
55 #include <pakfire/util.h>
57 #define BUFFER_SIZE 1024 * 64
58 #define ENVIRON_SIZE 128
59 #define EPOLL_MAX_EVENTS 2
60 #define MAX_MOUNTPOINTS 8
62 // The default environment that will be set for every command
63 static const struct environ
{
67 { "LANG", "en_US.utf-8" },
72 struct pakfire_jail_mountpoint
{
73 char source
[PATH_MAX
];
74 char target
[PATH_MAX
];
79 struct pakfire
* pakfire
;
82 // A unique ID for each jail
84 char __uuid
[UUID_STR_LEN
];
93 struct pakfire_cgroup
* cgroup
;
96 char* env
[ENVIRON_SIZE
];
99 pakfire_jail_log_callback log_callback
;
103 struct pakfire_jail_mountpoint mountpoints
[MAX_MOUNTPOINTS
];
104 unsigned int num_mountpoints
;
107 struct pakfire_log_buffer
{
108 char data
[BUFFER_SIZE
];
112 struct pakfire_jail_exec
{
113 // PID (of the child)
117 // Process status (from waitid)
120 // FD to notify the client that the parent has finished initialization
124 struct pakfire_jail_pipes
{
135 struct pakfire_jail_buffers
{
136 struct pakfire_log_buffer stdout
;
137 struct pakfire_log_buffer stderr
;
140 struct pakfire_log_buffer log_INFO
;
141 struct pakfire_log_buffer log_ERROR
;
142 struct pakfire_log_buffer log_DEBUG
;
145 struct pakfire_cgroup
* cgroup
;
146 struct pakfire_cgroup_stats cgroup_stats
;
149 static int clone3(struct clone_args
* args
, size_t size
) {
150 return syscall(__NR_clone3
, args
, size
);
153 static void pakfire_jail_free(struct pakfire_jail
* jail
) {
154 DEBUG(jail
->pakfire
, "Freeing jail at %p\n", jail
);
157 for (unsigned int i
= 0; jail
->env
[i
]; i
++)
161 pakfire_cgroup_unref(jail
->cgroup
);
163 pakfire_unref(jail
->pakfire
);
168 Passes any log messages on to the default pakfire log callback
170 static int pakfire_jail_default_log_callback(struct pakfire
* pakfire
, void* data
,
171 int priority
, const char* line
, size_t length
) {
174 INFO(pakfire
, "%s", line
);
178 ERROR(pakfire
, "%s", line
);
183 DEBUG(pakfire
, "%s", line
);
191 static int pakfire_jail_setup_interactive_env(struct pakfire_jail
* jail
) {
193 int r
= pakfire_jail_set_env(jail
, "PS1", "pakfire-jail \\w> ");
198 char* TERM
= secure_getenv("TERM");
200 r
= pakfire_jail_set_env(jail
, "TERM", TERM
);
206 char* LANG
= secure_getenv("LANG");
208 r
= pakfire_jail_set_env(jail
, "LANG", LANG
);
216 PAKFIRE_EXPORT
int pakfire_jail_create(struct pakfire_jail
** jail
,
217 struct pakfire
* pakfire
, int flags
) {
220 // Allocate a new jail
221 struct pakfire_jail
* j
= calloc(1, sizeof(*j
));
226 j
->pakfire
= pakfire_ref(pakfire
);
228 // Initialize reference counter
234 // Generate a random UUID
235 uuid_generate_random(j
->uuid
);
237 DEBUG(j
->pakfire
, "Allocated new jail at %p\n", j
);
239 // Set default log callback
240 r
= pakfire_jail_set_log_callback(j
, pakfire_jail_default_log_callback
, NULL
);
244 // Set default environment
245 for (const struct environ
* e
= ENV
; e
->key
; e
++) {
246 r
= pakfire_jail_set_env(j
, e
->key
, e
->val
);
256 pakfire_jail_free(j
);
261 PAKFIRE_EXPORT
struct pakfire_jail
* pakfire_jail_ref(struct pakfire_jail
* jail
) {
267 PAKFIRE_EXPORT
struct pakfire_jail
* pakfire_jail_unref(struct pakfire_jail
* jail
) {
268 if (--jail
->nrefs
> 0)
271 pakfire_jail_free(jail
);
275 static const char* pakfire_jail_uuid(struct pakfire_jail
* jail
) {
277 uuid_unparse_lower(jail
->uuid
, jail
->__uuid
);
284 PAKFIRE_EXPORT
int pakfire_jail_nice(struct pakfire_jail
* jail
, int nice
) {
285 // Check if nice level is in range
286 if (nice
< -19 || nice
> 20) {
297 int pakfire_jail_set_cgroup(struct pakfire_jail
* jail
, struct pakfire_cgroup
* cgroup
) {
298 // Free any previous cgroup
300 pakfire_cgroup_unref(jail
->cgroup
);
304 // Set any new cgroup
306 DEBUG(jail
->pakfire
, "Setting cgroup %p\n", cgroup
);
308 jail
->cgroup
= pakfire_cgroup_ref(cgroup
);
317 // Returns the length of the environment
318 static unsigned int pakfire_jail_env_length(struct pakfire_jail
* jail
) {
321 // Count everything in the environment
322 for (char** e
= jail
->env
; *e
; e
++)
328 // Finds an existing environment variable and returns its index or -1 if not found
329 static int pakfire_jail_find_env(struct pakfire_jail
* jail
, const char* key
) {
335 char buffer
[strlen(key
) + 2];
336 pakfire_string_format(buffer
, "%s=", key
);
338 for (unsigned int i
= 0; jail
->env
[i
]; i
++) {
339 if (pakfire_string_startswith(jail
->env
[i
], buffer
))
347 // Returns the value of an environment variable or NULL
348 PAKFIRE_EXPORT
const char* pakfire_jail_get_env(struct pakfire_jail
* jail
,
350 int i
= pakfire_jail_find_env(jail
, key
);
354 return jail
->env
[i
] + strlen(key
) + 1;
357 // Sets an environment variable
358 PAKFIRE_EXPORT
int pakfire_jail_set_env(struct pakfire_jail
* jail
,
359 const char* key
, const char* value
) {
360 // Find the index where to write this value to
361 int i
= pakfire_jail_find_env(jail
, key
);
363 i
= pakfire_jail_env_length(jail
);
365 // Return -ENOSPC when the environment is full
366 if (i
>= ENVIRON_SIZE
) {
371 // Free any previous value
375 // Format and set environment variable
376 asprintf(&jail
->env
[i
], "%s=%s", key
, value
);
378 DEBUG(jail
->pakfire
, "Set environment variable: %s\n", jail
->env
[i
]);
383 // Imports an environment
384 PAKFIRE_EXPORT
int pakfire_jail_import_env(struct pakfire_jail
* jail
, const char* env
[]) {
392 // Copy environment variables
393 for (unsigned int i
= 0; env
[i
]; i
++) {
394 r
= pakfire_string_partition(env
[i
], "=", &key
, &val
);
399 r
= pakfire_jail_set_env(jail
, key
, val
);
416 PAKFIRE_EXPORT
int pakfire_jail_set_log_callback(struct pakfire_jail
* jail
,
417 pakfire_jail_log_callback callback
, void* data
) {
418 jail
->log_callback
= callback
;
419 jail
->log_data
= data
;
425 This function replaces any logging in the child process.
427 All log messages will be sent to the parent process through their respective pipes.
429 static void pakfire_jail_log(void* data
, int priority
, const char* file
,
430 int line
, const char* fn
, const char* format
, va_list args
) {
431 struct pakfire_jail_pipes
* pipes
= (struct pakfire_jail_pipes
*)data
;
436 fd
= pipes
->log_INFO
[1];
440 fd
= pipes
->log_ERROR
[1];
445 fd
= pipes
->log_DEBUG
[1];
447 #endif /* ENABLE_DEBUG */
449 // Ignore any messages of an unknown priority
454 // Send the log message
456 vdprintf(fd
, format
, args
);
459 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer
* buffer
) {
460 return (sizeof(buffer
->data
) == buffer
->used
);
464 This function reads as much data as it can from the file descriptor.
465 If it finds a whole line in it, it will send it to the logger and repeat the process.
466 If not newline character is found, it will try to read more data until it finds one.
468 static int pakfire_jail_handle_log(struct pakfire_jail
* jail
,
469 struct pakfire_jail_exec
* ctx
, int priority
, int fd
,
470 struct pakfire_log_buffer
* buffer
, pakfire_jail_log_callback callback
, void* data
) {
471 char line
[BUFFER_SIZE
+ 1];
473 // Fill up buffer from fd
474 if (buffer
->used
< sizeof(buffer
->data
)) {
475 ssize_t bytes_read
= read(fd
, buffer
->data
+ buffer
->used
,
476 sizeof(buffer
->data
) - buffer
->used
);
479 if (bytes_read
< 0) {
480 ERROR(jail
->pakfire
, "Could not read from fd %d: %m\n", fd
);
484 // Update buffer size
485 buffer
->used
+= bytes_read
;
488 // See if we have any lines that we can write
489 while (buffer
->used
) {
490 // Search for the end of the first line
491 char* eol
= memchr(buffer
->data
, '\n', buffer
->used
);
495 // If the buffer is full, we send the content to the logger and try again
496 // This should not happen in practise
497 if (pakfire_jail_log_buffer_is_full(buffer
)) {
498 DEBUG(jail
->pakfire
, "Logging buffer is full. Sending all content\n");
500 eol
= buffer
->data
+ sizeof(buffer
->data
) - 1;
502 // Otherwise we might have only read parts of the output
507 // Find the length of the string
508 size_t length
= eol
- buffer
->data
+ 1;
510 // Copy the line into the buffer
511 memcpy(line
, buffer
->data
, length
);
513 // Terminate the string
518 int r
= callback(jail
->pakfire
, data
, priority
, line
, length
);
520 ERROR(jail
->pakfire
, "The logging callback returned an error: %d\n", r
);
525 // Remove line from buffer
526 memmove(buffer
->data
, buffer
->data
+ length
, buffer
->used
- length
);
527 buffer
->used
-= length
;
533 static int pakfire_jail_setup_pipe(struct pakfire_jail
* jail
, int (*fds
)[2], const int flags
) {
534 int r
= pipe2(*fds
, flags
);
536 ERROR(jail
->pakfire
, "Could not setup pipe: %m\n");
543 static void pakfire_jail_close_pipe(struct pakfire_jail
* jail
, int fds
[2]) {
544 for (unsigned int i
= 0; i
< 2; i
++)
550 This is a convenience function to fetch the reading end of a pipe and
551 closes the write end.
553 static int pakfire_jail_get_pipe(struct pakfire_jail
* jail
, int (*fds
)[2]) {
554 // Give the variables easier names to avoid confusion
555 int* fd_read
= &(*fds
)[0];
556 int* fd_write
= &(*fds
)[1];
558 // Close the write end of the pipe
564 // Return the read end
568 static int pakfire_jail_wait(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
570 struct epoll_event ev
;
571 struct epoll_event events
[EPOLL_MAX_EVENTS
];
574 // Fetch file descriptors from context
575 const int stdout
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.stdout
);
576 const int stderr
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.stderr
);
577 const int pidfd
= ctx
->pidfd
;
580 const int log_INFO
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.log_INFO
);
581 const int log_ERROR
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.log_ERROR
);
582 const int log_DEBUG
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.log_DEBUG
);
584 // Make a list of all file descriptors we are interested in
586 stdout
, stderr
, pidfd
, log_INFO
, log_ERROR
, log_DEBUG
,
590 epollfd
= epoll_create1(0);
592 ERROR(jail
->pakfire
, "Could not initialize epoll(): %m\n");
597 ev
.events
= EPOLLIN
|EPOLLHUP
;
599 // Turn file descriptors into non-blocking mode and add them to epoll()
600 for (unsigned int i
= 0; i
< sizeof(fds
) / sizeof(*fds
); i
++) {
603 // Skip fds which were not initialized
609 if (epoll_ctl(epollfd
, EPOLL_CTL_ADD
, fd
, &ev
) < 0) {
610 ERROR(jail
->pakfire
, "Could not add file descriptor %d to epoll(): %m\n", fd
);
618 // Loop for as long as the process is alive
620 int num
= epoll_wait(epollfd
, events
, EPOLL_MAX_EVENTS
, -1);
622 // Ignore if epoll_wait() has been interrupted
626 ERROR(jail
->pakfire
, "epoll_wait() failed: %m\n");
632 for (int i
= 0; i
< num
; i
++) {
633 int e
= events
[i
].events
;
634 int fd
= events
[i
].data
.fd
;
636 struct pakfire_log_buffer
* buffer
= NULL
;
637 pakfire_jail_log_callback callback
= NULL
;
641 // Check if there is any data to be read
643 // Handle any changes to the PIDFD
645 // Call waidid() and store the result
646 r
= waitid(P_PIDFD
, ctx
->pidfd
, &ctx
->status
, WEXITED
);
648 ERROR(jail
->pakfire
, "waitid() failed: %m\n");
652 // Mark that we have ended so that we will process the remaining
653 // events from epoll() now, but won't restart the outer loop.
657 // Handle logging messages
658 } else if (fd
== log_INFO
) {
659 buffer
= &ctx
->buffers
.log_INFO
;
662 callback
= pakfire_jail_default_log_callback
;
664 } else if (fd
== log_ERROR
) {
665 buffer
= &ctx
->buffers
.log_ERROR
;
668 callback
= pakfire_jail_default_log_callback
;
670 } else if (fd
== log_DEBUG
) {
671 buffer
= &ctx
->buffers
.log_DEBUG
;
672 priority
= LOG_DEBUG
;
674 callback
= pakfire_jail_default_log_callback
;
676 // Handle anything from the log pipes
677 } else if (fd
== stdout
) {
678 buffer
= &ctx
->buffers
.stdout
;
681 callback
= jail
->log_callback
;
682 data
= jail
->log_data
;
684 } else if (fd
== stderr
) {
685 buffer
= &ctx
->buffers
.stderr
;
688 callback
= jail
->log_callback
;
689 data
= jail
->log_data
;
692 DEBUG(jail
->pakfire
, "Received invalid file descriptor %d\n", fd
);
697 r
= pakfire_jail_handle_log(jail
, ctx
, priority
, fd
, buffer
, callback
, data
);
702 // Check if any file descriptors have been closed
704 // Remove the file descriptor
705 r
= epoll_ctl(epollfd
, EPOLL_CTL_DEL
, fd
, NULL
);
707 ERROR(jail
->pakfire
, "Could not remove closed file-descriptor %d: %m\n", fd
);
721 static int pakfire_jail_capture_stdout(struct pakfire
* pakfire
, void* data
, int priority
,
722 const char* line
, size_t length
) {
723 char** output
= (char**)data
;
726 // Append everything from stdout to a buffer
727 if (priority
== LOG_INFO
) {
728 r
= asprintf(output
, "%s%s", (output
&& *output
) ? *output
: "", line
);
734 // Send everything else to the default logger
735 return pakfire_jail_default_log_callback(pakfire
, NULL
, priority
, line
, length
);
740 static int pakfire_jail_drop_capabilities(struct pakfire_jail
* jail
) {
741 const int capabilities
[] = {
742 // Deny access to the kernel's audit system
747 // Deny suspending block devices
750 // Deny any stuff with BPF
753 // Deny checkpoint restore
754 CAP_CHECKPOINT_RESTORE
,
756 // Deny opening files by inode number (open_by_handle_at)
759 // Deny setting SUID bits
762 // Deny locking more memory
765 // Deny modifying any Apparmor/SELinux/SMACK configuration
769 // Deny creating any special devices
772 // Deny setting any capabilities
775 // Deny reading from syslog
778 // Deny any admin actions (mount, sethostname, ...)
781 // Deny rebooting the system
784 // Deny loading kernel modules
787 // Deny setting nice level
790 // Deny access to /proc/kcore, /dev/mem, /dev/kmem
793 // Deny circumventing any resource limits
796 // Deny setting the system time
799 // Deny playing with suspend
805 DEBUG(jail
->pakfire
, "Dropping capabilities...\n");
810 // Drop any capabilities
811 for (const int* cap
= capabilities
; *cap
; cap
++) {
812 r
= prctl(PR_CAPBSET_DROP
, *cap
, 0, 0, 0);
814 ERROR(jail
->pakfire
, "Could not drop capability %d: %m\n", *cap
);
821 // Fetch any capabilities
822 cap_t caps
= cap_get_proc();
824 ERROR(jail
->pakfire
, "Could not read capabilities: %m\n");
829 Set inheritable capabilities
831 This ensures that no processes will be able to gain any of the listed
834 r
= cap_set_flag(caps
, CAP_INHERITABLE
, num_caps
, capabilities
, CAP_CLEAR
);
836 ERROR(jail
->pakfire
, "cap_set_flag() failed: %m\n");
840 // Restore capabilities
841 r
= cap_set_proc(caps
);
843 ERROR(jail
->pakfire
, "Could not restore capabilities: %m\n");
856 static int pakfire_jail_limit_syscalls(struct pakfire_jail
* jail
) {
857 const int syscalls
[] = {
858 // The kernel's keyring isn't namespaced
861 SCMP_SYS(request_key
),
863 // Disable userfaultfd
864 SCMP_SYS(userfaultfd
),
866 // Disable perf which could leak a lot of information about the host
867 SCMP_SYS(perf_event_open
),
873 DEBUG(jail
->pakfire
, "Applying syscall filter...\n");
875 // Setup a syscall filter which allows everything by default
876 scmp_filter_ctx ctx
= seccomp_init(SCMP_ACT_ALLOW
);
878 ERROR(jail
->pakfire
, "Could not setup seccomp filter: %m\n");
883 for (const int* syscall
= syscalls
; *syscall
; syscall
++) {
884 r
= seccomp_rule_add(ctx
, SCMP_ACT_ERRNO(EPERM
), *syscall
, 0);
886 ERROR(jail
->pakfire
, "Could not configure syscall %d: %m\n", *syscall
);
891 // Load syscall filter into the kernel
892 r
= seccomp_load(ctx
);
894 ERROR(jail
->pakfire
, "Could not load syscall filter into the kernel: %m\n");
900 seccomp_release(ctx
);
907 PAKFIRE_EXPORT
int pakfire_jail_bind(struct pakfire_jail
* jail
,
908 const char* source
, const char* target
, int flags
) {
909 struct pakfire_jail_mountpoint
* mp
= NULL
;
912 // Check if there is any space left
913 if (jail
->num_mountpoints
>= MAX_MOUNTPOINTS
) {
918 // Check for valid inputs
919 if (!source
|| !target
) {
924 // Select the next free slot
925 mp
= &jail
->mountpoints
[jail
->num_mountpoints
];
928 r
= pakfire_string_set(mp
->source
, source
);
930 ERROR(jail
->pakfire
, "Could not copy source: %m\n");
935 r
= pakfire_string_set(mp
->target
, target
);
937 ERROR(jail
->pakfire
, "Could not copy target: %m\n");
945 jail
->num_mountpoints
++;
951 Mounts everything that we require in the new namespace
953 static int pakfire_jail_mount(struct pakfire_jail
* jail
) {
954 struct pakfire_jail_mountpoint
* mp
= NULL
;
957 // Mount all default stuff
958 r
= pakfire_mount_all(jail
->pakfire
);
962 // Mount all custom stuff
963 for (unsigned int i
= 0; i
< jail
->num_mountpoints
; i
++) {
965 mp
= &jail
->mountpoints
[i
];
968 r
= pakfire_bind(jail
->pakfire
, mp
->source
, mp
->target
, mp
->flags
);
973 // Log all mountpoints
974 pakfire_mount_list(jail
->pakfire
);
981 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail
* jail
, pid_t pid
) {
985 // Skip mapping anything when running on /
986 if (pakfire_on_root(jail
->pakfire
))
990 r
= pakfire_string_format(path
, "/proc/%d/uid_map", pid
);
995 const uid_t uid
= pakfire_uid(jail
->pakfire
);
998 const struct pakfire_subid
* subuid
= pakfire_subuid(jail
->pakfire
);
1002 /* When running as root, we will map the entire range.
1004 When running as a non-privileged user, we will map the root user inside the jail
1005 to the user's UID outside of the jail, and we will map the rest starting from one.
1010 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1011 "0 %lu %lu\n", subuid
->id
, subuid
->length
);
1013 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1014 "0 %lu 1\n%1 %lu %lu\n", uid
, subuid
->id
, subuid
->length
);
1018 ERROR(jail
->pakfire
, "Could not map UIDs: %m\n");
1025 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail
* jail
, pid_t pid
) {
1026 char path
[PATH_MAX
];
1029 // Skip mapping anything when running on /
1030 if (pakfire_on_root(jail
->pakfire
))
1034 const gid_t gid
= pakfire_gid(jail
->pakfire
);
1037 const struct pakfire_subid
* subgid
= pakfire_subgid(jail
->pakfire
);
1042 r
= pakfire_string_format(path
, "/proc/%d/gid_map", pid
);
1048 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1049 "0 %lu %lu\n", subgid
->id
, subgid
->length
);
1051 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1052 "0 %lu 1\n%1 %lu %lu\n", gid
, subgid
->id
, subgid
->length
);
1056 ERROR(jail
->pakfire
, "Could not map GIDs: %m\n");
1063 static int pakfire_jail_setgroups(struct pakfire_jail
* jail
, pid_t pid
) {
1064 char path
[PATH_MAX
];
1068 r
= pakfire_string_format(path
, "/proc/%d/setgroups", pid
);
1072 // Open file for writing
1073 FILE* f
= fopen(path
, "w");
1075 ERROR(jail
->pakfire
, "Could not open %s for writing: %m\n", path
);
1080 int bytes_written
= fprintf(f
, "deny\n");
1081 if (bytes_written
<= 0) {
1082 ERROR(jail
->pakfire
, "Could not write to %s: %m\n", path
);
1089 ERROR(jail
->pakfire
, "Could not close %s: %m\n", path
);
1100 static int pakfire_jail_send_signal(struct pakfire_jail
* jail
, int fd
) {
1101 const uint64_t val
= 1;
1104 DEBUG(jail
->pakfire
, "Sending signal...\n");
1106 // Write to the file descriptor
1107 ssize_t bytes_written
= write(fd
, &val
, sizeof(val
));
1108 if (bytes_written
< 0 || (size_t)bytes_written
< sizeof(val
)) {
1109 ERROR(jail
->pakfire
, "Could not send signal: %m\n");
1113 // Close the file descriptor
1119 static int pakfire_jail_wait_for_signal(struct pakfire_jail
* jail
, int fd
) {
1123 DEBUG(jail
->pakfire
, "Waiting for signal...\n");
1125 ssize_t bytes_read
= read(fd
, &val
, sizeof(val
));
1126 if (bytes_read
< 0 || (size_t)bytes_read
< sizeof(val
)) {
1127 ERROR(jail
->pakfire
, "Error waiting for signal: %m\n");
1131 // Close the file descriptor
1138 Performs the initialisation that needs to happen in the parent part
1140 static int pakfire_jail_parent(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
1143 // Setup UID mapping
1144 r
= pakfire_jail_setup_uid_mapping(jail
, ctx
->pid
);
1148 // Write "deny" to /proc/PID/setgroups
1149 r
= pakfire_jail_setgroups(jail
, ctx
->pid
);
1153 // Setup GID mapping
1154 r
= pakfire_jail_setup_gid_mapping(jail
, ctx
->pid
);
1158 // Parent has finished initialisation
1159 DEBUG(jail
->pakfire
, "Parent has finished initialization\n");
1161 // Send signal to client
1162 r
= pakfire_jail_send_signal(jail
, ctx
->completed_fd
);
1169 static int pakfire_jail_child(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
,
1170 const char* argv
[]) {
1173 // Redirect any logging to our log pipe
1174 pakfire_set_log_callback(jail
->pakfire
, pakfire_jail_log
, &ctx
->pipes
);
1177 r
= prctl(PR_SET_PDEATHSIG
, SIGKILL
, 0, 0, 0);
1179 ERROR(jail
->pakfire
, "Could not configure to die with parent: %m\n");
1184 pid_t pid
= getpid();
1186 DEBUG(jail
->pakfire
, "Launched child process in jail with PID %d\n", pid
);
1189 for (unsigned int i
= 0; argv
[i
]; i
++)
1190 DEBUG(jail
->pakfire
, " argv[%d] = %s\n", i
, argv
[i
]);
1192 // Wait for the parent to finish initialization
1193 r
= pakfire_jail_wait_for_signal(jail
, ctx
->completed_fd
);
1197 // Perform further initialization
1200 uid_t uid
= getuid();
1201 gid_t gid
= getgid();
1204 uid_t euid
= geteuid();
1205 gid_t egid
= getegid();
1207 DEBUG(jail
->pakfire
, " UID: %d (effective %d)\n", uid
, euid
);
1208 DEBUG(jail
->pakfire
, " GID: %d (effective %d)\n", gid
, egid
);
1210 // Check if we are (effectively running as root)
1211 if (uid
|| gid
|| euid
|| egid
) {
1212 ERROR(jail
->pakfire
, "Child process is not running as root\n");
1216 const char* root
= pakfire_get_path(jail
->pakfire
);
1217 const char* arch
= pakfire_get_arch(jail
->pakfire
);
1219 // Change root (unless root is /)
1220 if (!pakfire_on_root(jail
->pakfire
)) {
1222 r
= pakfire_jail_mount(jail
);
1229 ERROR(jail
->pakfire
, "chroot() to %s failed: %m\n", root
);
1233 // Change directory to /
1236 ERROR(jail
->pakfire
, "chdir() after chroot() failed: %m\n");
1242 unsigned long persona
= pakfire_arch_personality(arch
);
1244 r
= personality(persona
);
1246 ERROR(jail
->pakfire
, "Could not set personality (%x)\n", (unsigned int)persona
);
1253 DEBUG(jail
->pakfire
, "Setting nice level to %d\n", jail
->nice
);
1255 r
= setpriority(PRIO_PROCESS
, pid
, jail
->nice
);
1257 ERROR(jail
->pakfire
, "Could not set nice level: %m\n");
1262 // Close other end of log pipes
1263 close(ctx
->pipes
.log_INFO
[0]);
1264 close(ctx
->pipes
.log_ERROR
[0]);
1266 close(ctx
->pipes
.log_DEBUG
[0]);
1267 #endif /* ENABLE_DEBUG */
1269 // Connect standard output and error
1270 if (ctx
->pipes
.stdout
[1] && ctx
->pipes
.stderr
[1]) {
1271 r
= dup2(ctx
->pipes
.stdout
[1], STDOUT_FILENO
);
1273 ERROR(jail
->pakfire
, "Could not connect fd %d to stdout: %m\n",
1274 ctx
->pipes
.stdout
[1]);
1279 r
= dup2(ctx
->pipes
.stderr
[1], STDERR_FILENO
);
1281 ERROR(jail
->pakfire
, "Could not connect fd %d to stderr: %m\n",
1282 ctx
->pipes
.stderr
[1]);
1287 // Close the pipe (as we have moved the original file descriptors)
1288 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stdout
);
1289 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stderr
);
1292 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1293 r
= pakfire_rlimit_reset_nofile(jail
->pakfire
);
1297 // Drop capabilities
1298 r
= pakfire_jail_drop_capabilities(jail
);
1303 r
= pakfire_jail_limit_syscalls(jail
);
1308 r
= execvpe(argv
[0], (char**)argv
, jail
->env
);
1310 ERROR(jail
->pakfire
, "Could not execve(): %m\n");
1312 // Translate errno into regular exit code
1322 // We should not get here
1326 // Run a command in the jail
1327 static int __pakfire_jail_exec(struct pakfire_jail
* jail
, const char* argv
[],
1328 const int interactive
) {
1332 // Check if argv is valid
1333 if (!argv
|| !argv
[0]) {
1338 // Initialize context for this call
1339 struct pakfire_jail_exec ctx
= {
1346 DEBUG(jail
->pakfire
, "Executing jail...\n");
1349 Setup a file descriptor which can be used to notify the client that the parent
1350 has completed configuration.
1352 ctx
.completed_fd
= eventfd(0, EFD_CLOEXEC
);
1353 if (ctx
.completed_fd
< 0) {
1354 ERROR(jail
->pakfire
, "eventfd() failed: %m\n");
1358 // Create pipes to communicate with child process if we are not running interactively
1361 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stdout
, 0);
1366 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stderr
, 0);
1371 // Setup pipes for logging
1373 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_INFO
, O_CLOEXEC
);
1378 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_ERROR
, O_CLOEXEC
);
1384 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_DEBUG
, O_CLOEXEC
);
1387 #endif /* ENABLE_DEBUG */
1389 // Configure child process
1390 struct clone_args args
= {
1399 .exit_signal
= SIGCHLD
,
1400 .pidfd
= (long long unsigned int)&ctx
.pidfd
,
1403 // Launch the process in a cgroup that is a leaf of the configured cgroup
1405 args
.flags
|= CLONE_INTO_CGROUP
;
1408 const char* uuid
= pakfire_jail_uuid(jail
);
1410 // Create a temporary cgroup
1411 r
= pakfire_cgroup_child(&ctx
.cgroup
, jail
->cgroup
, uuid
, 0);
1413 ERROR(jail
->pakfire
, "Could not create cgroup for jail: %m\n");
1417 // Clone into this cgroup
1418 args
.cgroup
= pakfire_cgroup_fd(ctx
.cgroup
);
1421 // Fork this process
1422 ctx
.pid
= clone3(&args
, sizeof(args
));
1424 ERROR(jail
->pakfire
, "Could not clone: %m\n");
1428 } else if (ctx
.pid
== 0) {
1429 r
= pakfire_jail_child(jail
, &ctx
, argv
);
1434 r
= pakfire_jail_parent(jail
, &ctx
);
1438 DEBUG(jail
->pakfire
, "Waiting for PID %d to finish its work\n", ctx
.pid
);
1440 // Read output of the child process
1441 r
= pakfire_jail_wait(jail
, &ctx
);
1445 // Handle exit status
1446 switch (ctx
.status
.si_code
) {
1448 DEBUG(jail
->pakfire
, "The child process exited with code %d\n",
1449 ctx
.status
.si_status
);
1452 exit
= ctx
.status
.si_status
;
1457 ERROR(jail
->pakfire
, "The child process was killed\n");
1460 // Log anything else
1462 ERROR(jail
->pakfire
, "Unknown child exit code: %d\n", ctx
.status
.si_code
);
1467 // Destroy the temporary cgroup (if any)
1469 // Read cgroup stats
1470 r
= pakfire_cgroup_stat(ctx
.cgroup
, &ctx
.cgroup_stats
);
1472 ERROR(jail
->pakfire
, "Could not read cgroup stats: %m\n");
1474 pakfire_cgroup_stat_dump(ctx
.cgroup
, &ctx
.cgroup_stats
);
1477 pakfire_cgroup_destroy(ctx
.cgroup
);
1478 pakfire_cgroup_unref(ctx
.cgroup
);
1481 // Close any file descriptors
1482 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stdout
);
1483 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stderr
);
1486 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_INFO
);
1487 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_ERROR
);
1488 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_DEBUG
);
1493 PAKFIRE_EXPORT
int pakfire_jail_exec(struct pakfire_jail
* jail
,
1494 const char* argv
[], char** output
) {
1497 // Store logging callback
1498 pakfire_jail_log_callback log_callback
= jail
->log_callback
;
1499 void* log_data
= jail
->log_data
;
1501 // Capture output if requested by user
1503 pakfire_jail_set_log_callback(jail
, pakfire_jail_capture_stdout
, output
);
1506 r
= __pakfire_jail_exec(jail
, argv
, 0);
1508 // Restore log callback
1509 pakfire_jail_set_log_callback(jail
, log_callback
, log_data
);
1514 static int pakfire_jail_exec_interactive(
1515 struct pakfire_jail
* jail
, const char* argv
[]) {
1518 // Setup interactive stuff
1519 r
= pakfire_jail_setup_interactive_env(jail
);
1523 return __pakfire_jail_exec(jail
, argv
, 1);
1526 PAKFIRE_EXPORT
int pakfire_jail_exec_script(struct pakfire_jail
* jail
,
1527 const char* script
, const size_t size
, const char* args
[], char** output
) {
1528 char path
[PATH_MAX
];
1529 const char** argv
= NULL
;
1532 const char* root
= pakfire_get_path(jail
->pakfire
);
1534 // Write the scriptlet to disk
1535 r
= pakfire_path_join(path
, root
, "pakfire-script.XXXXXX");
1539 // Open a temporary file
1540 int fd
= mkstemp(path
);
1542 ERROR(jail
->pakfire
, "Could not open a temporary file: %m\n");
1547 DEBUG(jail
->pakfire
, "Writing script to %s:\n%.*s\n", path
, (int)size
, script
);
1550 ssize_t bytes_written
= write(fd
, script
, size
);
1551 if (bytes_written
< (ssize_t
)size
) {
1552 ERROR(jail
->pakfire
, "Could not write script to file %s: %m\n", path
);
1557 // Make the script executable
1558 r
= fchmod(fd
, S_IRUSR
|S_IWUSR
|S_IXUSR
);
1560 ERROR(jail
->pakfire
, "Could not set executable permissions on %s: %m\n", path
);
1567 ERROR(jail
->pakfire
, "Could not close script file %s: %m\n", path
);
1572 // Count how many arguments were passed
1573 unsigned int argc
= 1;
1575 for (const char** arg
= args
; *arg
; arg
++)
1579 argv
= calloc(argc
+ 1, sizeof(*argv
));
1581 ERROR(jail
->pakfire
, "Could not allocate argv: %m\n");
1586 argv
[0] = (root
) ? pakfire_path_relpath(root
, path
) : path
;
1589 for (unsigned int i
= 1; i
< argc
; i
++)
1590 argv
[i
] = args
[i
-1];
1593 r
= pakfire_jail_exec(jail
, argv
, output
);
1599 // Remove script from disk
1607 A convenience function that creates a new jail, runs the given command and destroys
1610 int pakfire_jail_run(struct pakfire
* pakfire
, const char* argv
[], int flags
, char** output
) {
1611 struct pakfire_jail
* jail
= NULL
;
1614 // Create a new jail
1615 r
= pakfire_jail_create(&jail
, pakfire
, flags
);
1619 // Execute the command
1620 r
= pakfire_jail_exec(jail
, argv
, output
);
1624 pakfire_jail_unref(jail
);
1629 int pakfire_jail_run_script(struct pakfire
* pakfire
,
1630 const char* script
, const size_t length
, const char* argv
[], int flags
, char** output
) {
1631 struct pakfire_jail
* jail
= NULL
;
1634 // Create a new jail
1635 r
= pakfire_jail_create(&jail
, pakfire
, flags
);
1639 // Execute the command
1640 r
= pakfire_jail_exec_script(jail
, script
, length
, argv
, output
);
1644 pakfire_jail_unref(jail
);
1649 int pakfire_jail_shell(struct pakfire_jail
* jail
) {
1650 const char* argv
[] = {
1651 "/bin/bash", "--login", NULL
,
1654 // Execute /bin/bash
1655 return pakfire_jail_exec_interactive(jail
, argv
);
1658 int pakfire_jail_ldconfig(struct pakfire
* pakfire
) {
1659 char path
[PATH_MAX
];
1661 const char* ldconfig
= "/sbin/ldconfig";
1663 // Check if ldconfig exists before calling it to avoid overhead
1664 int r
= pakfire_path(pakfire
, path
, "%s", ldconfig
);
1668 // Check if ldconfig is executable
1669 r
= access(path
, X_OK
);
1671 DEBUG(pakfire
, "%s is not executable. Skipping...\n", ldconfig
);
1675 const char* argv
[] = {
1680 return pakfire_jail_run(pakfire
, argv
, 0, NULL
);