1 /*#############################################################################
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
19 #############################################################################*/
22 #include <linux/capability.h>
23 #include <linux/fcntl.h>
24 #include <linux/sched.h>
25 #include <linux/wait.h>
30 #include <sys/capability.h>
31 #include <sys/epoll.h>
32 #include <sys/eventfd.h>
33 #include <sys/personality.h>
34 #include <sys/prctl.h>
35 #include <sys/resource.h>
36 #include <sys/types.h>
45 #include <pakfire/arch.h>
46 #include <pakfire/cgroup.h>
47 #include <pakfire/jail.h>
48 #include <pakfire/logging.h>
49 #include <pakfire/mount.h>
50 #include <pakfire/pakfire.h>
51 #include <pakfire/private.h>
52 #include <pakfire/pwd.h>
53 #include <pakfire/string.h>
54 #include <pakfire/util.h>
56 #define BUFFER_SIZE 1024 * 64
57 #define ENVIRON_SIZE 128
58 #define EPOLL_MAX_EVENTS 2
59 #define MAX_MOUNTPOINTS 8
61 // The default environment that will be set for every command
62 static const struct environ
{
66 { "LANG", "en_US.utf-8" },
71 struct pakfire_jail_mountpoint
{
72 char source
[PATH_MAX
];
73 char target
[PATH_MAX
];
78 struct pakfire
* pakfire
;
81 // A unique ID for each jail
83 char __uuid
[UUID_STR_LEN
];
92 struct pakfire_cgroup
* cgroup
;
95 char* env
[ENVIRON_SIZE
];
98 pakfire_jail_log_callback log_callback
;
102 struct pakfire_jail_mountpoint mountpoints
[MAX_MOUNTPOINTS
];
103 unsigned int num_mountpoints
;
106 struct pakfire_log_buffer
{
107 char data
[BUFFER_SIZE
];
111 struct pakfire_jail_exec
{
112 // PID (of the child)
116 // Process status (from waitid)
119 // FD to notify the client that the parent has finished initialization
123 struct pakfire_jail_pipes
{
134 struct pakfire_jail_buffers
{
135 struct pakfire_log_buffer stdout
;
136 struct pakfire_log_buffer stderr
;
139 struct pakfire_log_buffer log_INFO
;
140 struct pakfire_log_buffer log_ERROR
;
141 struct pakfire_log_buffer log_DEBUG
;
144 struct pakfire_cgroup
* cgroup
;
145 struct pakfire_cgroup_stats cgroup_stats
;
148 static int clone3(struct clone_args
* args
, size_t size
) {
149 return syscall(__NR_clone3
, args
, size
);
152 static void pakfire_jail_free(struct pakfire_jail
* jail
) {
153 DEBUG(jail
->pakfire
, "Freeing jail at %p\n", jail
);
156 for (unsigned int i
= 0; jail
->env
[i
]; i
++)
160 pakfire_cgroup_unref(jail
->cgroup
);
162 pakfire_unref(jail
->pakfire
);
167 Passes any log messages on to the default pakfire log callback
169 static int pakfire_jail_default_log_callback(struct pakfire
* pakfire
, void* data
,
170 int priority
, const char* line
, size_t length
) {
173 INFO(pakfire
, "%s", line
);
177 ERROR(pakfire
, "%s", line
);
182 DEBUG(pakfire
, "%s", line
);
190 static int pakfire_jail_setup_interactive_env(struct pakfire_jail
* jail
) {
192 int r
= pakfire_jail_set_env(jail
, "PS1", "pakfire-jail \\w> ");
197 char* TERM
= secure_getenv("TERM");
199 r
= pakfire_jail_set_env(jail
, "TERM", TERM
);
205 char* LANG
= secure_getenv("LANG");
207 r
= pakfire_jail_set_env(jail
, "LANG", LANG
);
215 PAKFIRE_EXPORT
int pakfire_jail_create(struct pakfire_jail
** jail
,
216 struct pakfire
* pakfire
, int flags
) {
219 // Allocate a new jail
220 struct pakfire_jail
* j
= calloc(1, sizeof(*j
));
225 j
->pakfire
= pakfire_ref(pakfire
);
227 // Initialize reference counter
233 // Generate a random UUID
234 uuid_generate_random(j
->uuid
);
236 DEBUG(j
->pakfire
, "Allocated new jail at %p\n", j
);
238 // Set default log callback
239 r
= pakfire_jail_set_log_callback(j
, pakfire_jail_default_log_callback
, NULL
);
243 // Set default environment
244 for (const struct environ
* e
= ENV
; e
->key
; e
++) {
245 r
= pakfire_jail_set_env(j
, e
->key
, e
->val
);
255 pakfire_jail_free(j
);
260 PAKFIRE_EXPORT
struct pakfire_jail
* pakfire_jail_ref(struct pakfire_jail
* jail
) {
266 PAKFIRE_EXPORT
struct pakfire_jail
* pakfire_jail_unref(struct pakfire_jail
* jail
) {
267 if (--jail
->nrefs
> 0)
270 pakfire_jail_free(jail
);
274 static const char* pakfire_jail_uuid(struct pakfire_jail
* jail
) {
276 uuid_unparse_lower(jail
->uuid
, jail
->__uuid
);
283 PAKFIRE_EXPORT
int pakfire_jail_nice(struct pakfire_jail
* jail
, int nice
) {
284 // Check if nice level is in range
285 if (nice
< -19 || nice
> 20) {
296 int pakfire_jail_set_cgroup(struct pakfire_jail
* jail
, struct pakfire_cgroup
* cgroup
) {
297 // Free any previous cgroup
299 pakfire_cgroup_unref(jail
->cgroup
);
303 // Set any new cgroup
305 DEBUG(jail
->pakfire
, "Setting cgroup %p\n", cgroup
);
307 jail
->cgroup
= pakfire_cgroup_ref(cgroup
);
316 // Returns the length of the environment
317 static unsigned int pakfire_jail_env_length(struct pakfire_jail
* jail
) {
320 // Count everything in the environment
321 for (char** e
= jail
->env
; *e
; e
++)
327 // Finds an existing environment variable and returns its index or -1 if not found
328 static int pakfire_jail_find_env(struct pakfire_jail
* jail
, const char* key
) {
334 char buffer
[strlen(key
) + 2];
335 pakfire_string_format(buffer
, "%s=", key
);
337 for (unsigned int i
= 0; jail
->env
[i
]; i
++) {
338 if (pakfire_string_startswith(jail
->env
[i
], buffer
))
346 // Returns the value of an environment variable or NULL
347 PAKFIRE_EXPORT
const char* pakfire_jail_get_env(struct pakfire_jail
* jail
,
349 int i
= pakfire_jail_find_env(jail
, key
);
353 return jail
->env
[i
] + strlen(key
) + 1;
356 // Sets an environment variable
357 PAKFIRE_EXPORT
int pakfire_jail_set_env(struct pakfire_jail
* jail
,
358 const char* key
, const char* value
) {
359 // Find the index where to write this value to
360 int i
= pakfire_jail_find_env(jail
, key
);
362 i
= pakfire_jail_env_length(jail
);
364 // Return -ENOSPC when the environment is full
365 if (i
>= ENVIRON_SIZE
) {
370 // Free any previous value
374 // Format and set environment variable
375 asprintf(&jail
->env
[i
], "%s=%s", key
, value
);
377 DEBUG(jail
->pakfire
, "Set environment variable: %s\n", jail
->env
[i
]);
382 // Imports an environment
383 PAKFIRE_EXPORT
int pakfire_jail_import_env(struct pakfire_jail
* jail
, const char* env
[]) {
391 // Copy environment variables
392 for (unsigned int i
= 0; env
[i
]; i
++) {
393 r
= pakfire_string_partition(env
[i
], "=", &key
, &val
);
398 r
= pakfire_jail_set_env(jail
, key
, val
);
415 PAKFIRE_EXPORT
int pakfire_jail_set_log_callback(struct pakfire_jail
* jail
,
416 pakfire_jail_log_callback callback
, void* data
) {
417 jail
->log_callback
= callback
;
418 jail
->log_data
= data
;
424 This function replaces any logging in the child process.
426 All log messages will be sent to the parent process through their respective pipes.
428 static void pakfire_jail_log(void* data
, int priority
, const char* file
,
429 int line
, const char* fn
, const char* format
, va_list args
) {
430 struct pakfire_jail_pipes
* pipes
= (struct pakfire_jail_pipes
*)data
;
435 fd
= pipes
->log_INFO
[1];
439 fd
= pipes
->log_ERROR
[1];
444 fd
= pipes
->log_DEBUG
[1];
446 #endif /* ENABLE_DEBUG */
448 // Ignore any messages of an unknown priority
453 // Send the log message
455 vdprintf(fd
, format
, args
);
458 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer
* buffer
) {
459 return (sizeof(buffer
->data
) == buffer
->used
);
463 This function reads as much data as it can from the file descriptor.
464 If it finds a whole line in it, it will send it to the logger and repeat the process.
465 If not newline character is found, it will try to read more data until it finds one.
467 static int pakfire_jail_handle_log(struct pakfire_jail
* jail
,
468 struct pakfire_jail_exec
* ctx
, int priority
, int fd
,
469 struct pakfire_log_buffer
* buffer
, pakfire_jail_log_callback callback
, void* data
) {
470 char line
[BUFFER_SIZE
+ 1];
472 // Fill up buffer from fd
473 if (buffer
->used
< sizeof(buffer
->data
)) {
474 ssize_t bytes_read
= read(fd
, buffer
->data
+ buffer
->used
,
475 sizeof(buffer
->data
) - buffer
->used
);
478 if (bytes_read
< 0) {
479 ERROR(jail
->pakfire
, "Could not read from fd %d: %m\n", fd
);
483 // Update buffer size
484 buffer
->used
+= bytes_read
;
487 // See if we have any lines that we can write
488 while (buffer
->used
) {
489 // Search for the end of the first line
490 char* eol
= memchr(buffer
->data
, '\n', buffer
->used
);
494 // If the buffer is full, we send the content to the logger and try again
495 // This should not happen in practise
496 if (pakfire_jail_log_buffer_is_full(buffer
)) {
497 DEBUG(jail
->pakfire
, "Logging buffer is full. Sending all content\n");
499 eol
= buffer
->data
+ sizeof(buffer
->data
) - 1;
501 // Otherwise we might have only read parts of the output
506 // Find the length of the string
507 size_t length
= eol
- buffer
->data
+ 1;
509 // Copy the line into the buffer
510 memcpy(line
, buffer
->data
, length
);
512 // Terminate the string
517 int r
= callback(jail
->pakfire
, data
, priority
, line
, length
);
519 ERROR(jail
->pakfire
, "The logging callback returned an error: %d\n", r
);
524 // Remove line from buffer
525 memmove(buffer
->data
, buffer
->data
+ length
, buffer
->used
- length
);
526 buffer
->used
-= length
;
532 static int pakfire_jail_setup_pipe(struct pakfire_jail
* jail
, int (*fds
)[2], const int flags
) {
533 int r
= pipe2(*fds
, flags
);
535 ERROR(jail
->pakfire
, "Could not setup pipe: %m\n");
542 static void pakfire_jail_close_pipe(struct pakfire_jail
* jail
, int fds
[2]) {
543 for (unsigned int i
= 0; i
< 2; i
++)
549 This is a convenience function to fetch the reading end of a pipe and
550 closes the write end.
552 static int pakfire_jail_get_pipe(struct pakfire_jail
* jail
, int (*fds
)[2]) {
553 // Give the variables easier names to avoid confusion
554 int* fd_read
= &(*fds
)[0];
555 int* fd_write
= &(*fds
)[1];
557 // Close the write end of the pipe
563 // Return the read end
567 static int pakfire_jail_wait(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
569 struct epoll_event ev
;
570 struct epoll_event events
[EPOLL_MAX_EVENTS
];
573 // Fetch file descriptors from context
574 const int stdout
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.stdout
);
575 const int stderr
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.stderr
);
576 const int pidfd
= ctx
->pidfd
;
579 const int log_INFO
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.log_INFO
);
580 const int log_ERROR
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.log_ERROR
);
581 const int log_DEBUG
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.log_DEBUG
);
583 // Make a list of all file descriptors we are interested in
585 stdout
, stderr
, pidfd
, log_INFO
, log_ERROR
, log_DEBUG
,
589 epollfd
= epoll_create1(0);
591 ERROR(jail
->pakfire
, "Could not initialize epoll(): %m\n");
596 ev
.events
= EPOLLIN
|EPOLLHUP
;
598 // Turn file descriptors into non-blocking mode and add them to epoll()
599 for (unsigned int i
= 0; i
< sizeof(fds
) / sizeof(*fds
); i
++) {
602 // Skip fds which were not initialized
608 if (epoll_ctl(epollfd
, EPOLL_CTL_ADD
, fd
, &ev
) < 0) {
609 ERROR(jail
->pakfire
, "Could not add file descriptor %d to epoll(): %m\n", fd
);
617 // Loop for as long as the process is alive
619 int num
= epoll_wait(epollfd
, events
, EPOLL_MAX_EVENTS
, -1);
621 // Ignore if epoll_wait() has been interrupted
625 ERROR(jail
->pakfire
, "epoll_wait() failed: %m\n");
631 for (int i
= 0; i
< num
; i
++) {
632 int e
= events
[i
].events
;
633 int fd
= events
[i
].data
.fd
;
635 struct pakfire_log_buffer
* buffer
= NULL
;
636 pakfire_jail_log_callback callback
= NULL
;
640 // Check if there is any data to be read
642 // Handle any changes to the PIDFD
644 // Call waidid() and store the result
645 r
= waitid(P_PIDFD
, ctx
->pidfd
, &ctx
->status
, WEXITED
);
647 ERROR(jail
->pakfire
, "waitid() failed: %m\n");
651 // Mark that we have ended so that we will process the remaining
652 // events from epoll() now, but won't restart the outer loop.
656 // Handle logging messages
657 } else if (fd
== log_INFO
) {
658 buffer
= &ctx
->buffers
.log_INFO
;
661 callback
= pakfire_jail_default_log_callback
;
663 } else if (fd
== log_ERROR
) {
664 buffer
= &ctx
->buffers
.log_ERROR
;
667 callback
= pakfire_jail_default_log_callback
;
669 } else if (fd
== log_DEBUG
) {
670 buffer
= &ctx
->buffers
.log_DEBUG
;
671 priority
= LOG_DEBUG
;
673 callback
= pakfire_jail_default_log_callback
;
675 // Handle anything from the log pipes
676 } else if (fd
== stdout
) {
677 buffer
= &ctx
->buffers
.stdout
;
680 callback
= jail
->log_callback
;
681 data
= jail
->log_data
;
683 } else if (fd
== stderr
) {
684 buffer
= &ctx
->buffers
.stderr
;
687 callback
= jail
->log_callback
;
688 data
= jail
->log_data
;
691 DEBUG(jail
->pakfire
, "Received invalid file descriptor %d\n", fd
);
696 r
= pakfire_jail_handle_log(jail
, ctx
, priority
, fd
, buffer
, callback
, data
);
701 // Check if any file descriptors have been closed
703 // Remove the file descriptor
704 r
= epoll_ctl(epollfd
, EPOLL_CTL_DEL
, fd
, NULL
);
706 ERROR(jail
->pakfire
, "Could not remove closed file-descriptor %d: %m\n", fd
);
720 static int pakfire_jail_capture_stdout(struct pakfire
* pakfire
, void* data
, int priority
,
721 const char* line
, size_t length
) {
722 char** output
= (char**)data
;
725 // Append everything from stdout to a buffer
726 if (priority
== LOG_INFO
) {
727 r
= asprintf(output
, "%s%s", (output
&& *output
) ? *output
: "", line
);
733 // Send everything else to the default logger
734 return pakfire_jail_default_log_callback(pakfire
, NULL
, priority
, line
, length
);
739 static int pakfire_jail_drop_capabilities(struct pakfire_jail
* jail
) {
740 const int capabilities
[] = {
741 // Deny access to the kernel's audit system
746 // Deny suspending block devices
749 // Deny any stuff with BPF
752 // Deny checkpoint restore
753 CAP_CHECKPOINT_RESTORE
,
755 // Deny opening files by inode number (open_by_handle_at)
758 // Deny setting SUID bits
761 // Deny locking more memory
764 // Deny modifying any Apparmor/SELinux/SMACK configuration
768 // Deny creating any special devices
771 // Deny setting any capabilities
774 // Deny reading from syslog
777 // Deny any admin actions (mount, sethostname, ...)
780 // Deny rebooting the system
783 // Deny loading kernel modules
786 // Deny setting nice level
789 // Deny access to /proc/kcore, /dev/mem, /dev/kmem
792 // Deny circumventing any resource limits
795 // Deny setting the system time
798 // Deny playing with suspend
804 DEBUG(jail
->pakfire
, "Dropping capabilities...\n");
809 // Drop any capabilities
810 for (const int* cap
= capabilities
; *cap
; cap
++) {
811 r
= prctl(PR_CAPBSET_DROP
, *cap
, 0, 0, 0);
813 ERROR(jail
->pakfire
, "Could not drop capability %d: %m\n", *cap
);
820 // Fetch any capabilities
821 cap_t caps
= cap_get_proc();
823 ERROR(jail
->pakfire
, "Could not read capabilities: %m\n");
828 Set inheritable capabilities
830 This ensures that no processes will be able to gain any of the listed
833 r
= cap_set_flag(caps
, CAP_INHERITABLE
, num_caps
, capabilities
, CAP_CLEAR
);
835 ERROR(jail
->pakfire
, "cap_set_flag() failed: %m\n");
839 // Restore capabilities
840 r
= cap_set_proc(caps
);
842 ERROR(jail
->pakfire
, "Could not restore capabilities: %m\n");
855 static int pakfire_jail_limit_syscalls(struct pakfire_jail
* jail
) {
856 const int syscalls
[] = {
857 // The kernel's keyring isn't namespaced
860 SCMP_SYS(request_key
),
862 // Disable userfaultfd
863 SCMP_SYS(userfaultfd
),
865 // Disable perf which could leak a lot of information about the host
866 SCMP_SYS(perf_event_open
),
872 DEBUG(jail
->pakfire
, "Applying syscall filter...\n");
874 // Setup a syscall filter which allows everything by default
875 scmp_filter_ctx ctx
= seccomp_init(SCMP_ACT_ALLOW
);
877 ERROR(jail
->pakfire
, "Could not setup seccomp filter: %m\n");
882 for (const int* syscall
= syscalls
; *syscall
; syscall
++) {
883 r
= seccomp_rule_add(ctx
, SCMP_ACT_ERRNO(EPERM
), *syscall
, 0);
885 ERROR(jail
->pakfire
, "Could not configure syscall %d: %m\n", *syscall
);
890 // Load syscall filter into the kernel
891 r
= seccomp_load(ctx
);
893 ERROR(jail
->pakfire
, "Could not load syscall filter into the kernel: %m\n");
899 seccomp_release(ctx
);
906 PAKFIRE_EXPORT
int pakfire_jail_bind(struct pakfire_jail
* jail
,
907 const char* source
, const char* target
, int flags
) {
908 struct pakfire_jail_mountpoint
* mp
= NULL
;
911 // Check if there is any space left
912 if (jail
->num_mountpoints
>= MAX_MOUNTPOINTS
) {
917 // Check for valid inputs
918 if (!source
|| !target
) {
923 // Select the next free slot
924 mp
= &jail
->mountpoints
[jail
->num_mountpoints
];
927 r
= pakfire_string_set(mp
->source
, source
);
929 ERROR(jail
->pakfire
, "Could not copy source: %m\n");
934 r
= pakfire_string_set(mp
->target
, target
);
936 ERROR(jail
->pakfire
, "Could not copy target: %m\n");
944 jail
->num_mountpoints
++;
950 Mounts everything that we require in the new namespace
952 static int pakfire_jail_mount(struct pakfire_jail
* jail
) {
953 struct pakfire_jail_mountpoint
* mp
= NULL
;
956 // Mount all default stuff
957 r
= pakfire_mount_all(jail
->pakfire
);
961 // Mount all custom stuff
962 for (unsigned int i
= 0; i
< jail
->num_mountpoints
; i
++) {
964 mp
= &jail
->mountpoints
[i
];
967 r
= pakfire_bind(jail
->pakfire
, mp
->source
, mp
->target
, mp
->flags
);
972 // Log all mountpoints
973 pakfire_mount_list(jail
->pakfire
);
980 static int pakfire_jail_write_uidgid_mapping(struct pakfire_jail
* jail
,
981 const char* path
, const struct pakfire_subid
* subid
) {
982 return pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
983 "%d %u %lu\n", 0, subid
->id
, subid
->length
);
986 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail
* jail
, pid_t pid
) {
990 // Skip mapping anything when running on /
991 if (pakfire_on_root(jail
->pakfire
))
995 const struct pakfire_subid
* subuid
= pakfire_subuid(jail
->pakfire
);
1000 r
= pakfire_string_format(path
, "/proc/%d/uid_map", pid
);
1004 DEBUG(jail
->pakfire
, "Mapping UID range (%u - %lu)\n",
1005 subuid
->id
, subuid
->id
+ subuid
->length
);
1007 return pakfire_jail_write_uidgid_mapping(jail
, path
, subuid
);
1010 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail
* jail
, pid_t pid
) {
1011 char path
[PATH_MAX
];
1014 // Skip mapping anything when running on /
1015 if (pakfire_on_root(jail
->pakfire
))
1019 const struct pakfire_subid
* subgid
= pakfire_subgid(jail
->pakfire
);
1024 r
= pakfire_string_format(path
, "/proc/%d/gid_map", pid
);
1028 DEBUG(jail
->pakfire
, "Mapping GID range (%u - %lu)\n",
1029 subgid
->id
, subgid
->id
+ subgid
->length
);
1031 return pakfire_jail_write_uidgid_mapping(jail
, path
, subgid
);
1034 static int pakfire_jail_setgroups(struct pakfire_jail
* jail
, pid_t pid
) {
1035 char path
[PATH_MAX
];
1039 r
= pakfire_string_format(path
, "/proc/%d/setgroups", pid
);
1043 // Open file for writing
1044 FILE* f
= fopen(path
, "w");
1046 ERROR(jail
->pakfire
, "Could not open %s for writing: %m\n", path
);
1051 int bytes_written
= fprintf(f
, "deny\n");
1052 if (bytes_written
<= 0) {
1053 ERROR(jail
->pakfire
, "Could not write to %s: %m\n", path
);
1060 ERROR(jail
->pakfire
, "Could not close %s: %m\n", path
);
1071 static int pakfire_jail_send_signal(struct pakfire_jail
* jail
, int fd
) {
1072 const uint64_t val
= 1;
1075 DEBUG(jail
->pakfire
, "Sending signal...\n");
1077 // Write to the file descriptor
1078 ssize_t bytes_written
= write(fd
, &val
, sizeof(val
));
1079 if (bytes_written
< 0 || (size_t)bytes_written
< sizeof(val
)) {
1080 ERROR(jail
->pakfire
, "Could not send signal: %m\n");
1084 // Close the file descriptor
1090 static int pakfire_jail_wait_for_signal(struct pakfire_jail
* jail
, int fd
) {
1094 DEBUG(jail
->pakfire
, "Waiting for signal...\n");
1096 ssize_t bytes_read
= read(fd
, &val
, sizeof(val
));
1097 if (bytes_read
< 0 || (size_t)bytes_read
< sizeof(val
)) {
1098 ERROR(jail
->pakfire
, "Error waiting for signal: %m\n");
1102 // Close the file descriptor
1109 Performs the initialisation that needs to happen in the parent part
1111 static int pakfire_jail_parent(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
1114 // Write "deny" to /proc/PID/setgroups
1115 r
= pakfire_jail_setgroups(jail
, ctx
->pid
);
1119 // Setup UID mapping
1120 r
= pakfire_jail_setup_uid_mapping(jail
, ctx
->pid
);
1124 // Setup GID mapping
1125 r
= pakfire_jail_setup_gid_mapping(jail
, ctx
->pid
);
1129 // Parent has finished initialisation
1130 DEBUG(jail
->pakfire
, "Parent has finished initialization\n");
1132 // Send signal to client
1133 r
= pakfire_jail_send_signal(jail
, ctx
->completed_fd
);
1140 static int pakfire_jail_child(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
,
1141 const char* argv
[]) {
1144 // Redirect any logging to our log pipe
1145 pakfire_set_log_callback(jail
->pakfire
, pakfire_jail_log
, &ctx
->pipes
);
1148 r
= prctl(PR_SET_PDEATHSIG
, SIGKILL
, 0, 0, 0);
1150 ERROR(jail
->pakfire
, "Could not configure to die with parent: %m\n");
1155 pid_t pid
= getpid();
1157 DEBUG(jail
->pakfire
, "Launched child process in jail with PID %d\n", pid
);
1160 for (unsigned int i
= 0; argv
[i
]; i
++)
1161 DEBUG(jail
->pakfire
, " argv[%d] = %s\n", i
, argv
[i
]);
1163 // Wait for the parent to finish initialization
1164 r
= pakfire_jail_wait_for_signal(jail
, ctx
->completed_fd
);
1168 // Perform further initialization
1171 uid_t uid
= getuid();
1172 gid_t gid
= getgid();
1175 uid_t euid
= geteuid();
1176 gid_t egid
= getegid();
1178 DEBUG(jail
->pakfire
, " UID: %d (effective %d)\n", uid
, euid
);
1179 DEBUG(jail
->pakfire
, " GID: %d (effective %d)\n", gid
, egid
);
1181 // Check if we are (effectively running as root)
1182 if (uid
|| gid
|| euid
|| egid
) {
1183 ERROR(jail
->pakfire
, "Child process is not running as root\n");
1187 const char* root
= pakfire_get_path(jail
->pakfire
);
1188 const char* arch
= pakfire_get_arch(jail
->pakfire
);
1190 // Change root (unless root is /)
1191 if (!pakfire_on_root(jail
->pakfire
)) {
1193 r
= pakfire_jail_mount(jail
);
1200 ERROR(jail
->pakfire
, "chroot() to %s failed: %m\n", root
);
1204 // Change directory to /
1207 ERROR(jail
->pakfire
, "chdir() after chroot() failed: %m\n");
1213 unsigned long persona
= pakfire_arch_personality(arch
);
1215 r
= personality(persona
);
1217 ERROR(jail
->pakfire
, "Could not set personality (%x)\n", (unsigned int)persona
);
1224 DEBUG(jail
->pakfire
, "Setting nice level to %d\n", jail
->nice
);
1226 r
= setpriority(PRIO_PROCESS
, pid
, jail
->nice
);
1228 ERROR(jail
->pakfire
, "Could not set nice level: %m\n");
1233 // Close other end of log pipes
1234 close(ctx
->pipes
.log_INFO
[0]);
1235 close(ctx
->pipes
.log_ERROR
[0]);
1237 close(ctx
->pipes
.log_DEBUG
[0]);
1238 #endif /* ENABLE_DEBUG */
1240 // Connect standard output and error
1241 if (ctx
->pipes
.stdout
[1] && ctx
->pipes
.stderr
[1]) {
1242 r
= dup2(ctx
->pipes
.stdout
[1], STDOUT_FILENO
);
1244 ERROR(jail
->pakfire
, "Could not connect fd %d to stdout: %m\n",
1245 ctx
->pipes
.stdout
[1]);
1250 r
= dup2(ctx
->pipes
.stderr
[1], STDERR_FILENO
);
1252 ERROR(jail
->pakfire
, "Could not connect fd %d to stderr: %m\n",
1253 ctx
->pipes
.stderr
[1]);
1258 // Close the pipe (as we have moved the original file descriptors)
1259 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stdout
);
1260 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stderr
);
1263 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1264 r
= pakfire_rlimit_reset_nofile(jail
->pakfire
);
1268 // Drop capabilities
1269 r
= pakfire_jail_drop_capabilities(jail
);
1274 r
= pakfire_jail_limit_syscalls(jail
);
1279 r
= execvpe(argv
[0], (char**)argv
, jail
->env
);
1281 ERROR(jail
->pakfire
, "Could not execve(): %m\n");
1283 // Translate errno into regular exit code
1293 // We should not get here
1297 // Run a command in the jail
1298 static int __pakfire_jail_exec(struct pakfire_jail
* jail
, const char* argv
[],
1299 const int interactive
) {
1303 // Check if argv is valid
1304 if (!argv
|| !argv
[0]) {
1309 // Initialize context for this call
1310 struct pakfire_jail_exec ctx
= {
1317 DEBUG(jail
->pakfire
, "Executing jail...\n");
1320 Setup a file descriptor which can be used to notify the client that the parent
1321 has completed configuration.
1323 ctx
.completed_fd
= eventfd(0, EFD_CLOEXEC
);
1324 if (ctx
.completed_fd
< 0) {
1325 ERROR(jail
->pakfire
, "eventfd() failed: %m\n");
1329 // Create pipes to communicate with child process if we are not running interactively
1332 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stdout
, 0);
1337 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stderr
, 0);
1342 // Setup pipes for logging
1344 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_INFO
, O_CLOEXEC
);
1349 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_ERROR
, O_CLOEXEC
);
1355 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_DEBUG
, O_CLOEXEC
);
1358 #endif /* ENABLE_DEBUG */
1360 // Configure child process
1361 struct clone_args args
= {
1370 .exit_signal
= SIGCHLD
,
1371 .pidfd
= (long long unsigned int)&ctx
.pidfd
,
1374 // Launch the process in a cgroup that is a leaf of the configured cgroup
1376 args
.flags
|= CLONE_INTO_CGROUP
;
1379 const char* uuid
= pakfire_jail_uuid(jail
);
1381 // Create a temporary cgroup
1382 r
= pakfire_cgroup_child(&ctx
.cgroup
, jail
->cgroup
, uuid
, 0);
1384 ERROR(jail
->pakfire
, "Could not create cgroup for jail: %m\n");
1388 // Clone into this cgroup
1389 args
.cgroup
= pakfire_cgroup_fd(ctx
.cgroup
);
1392 // Fork this process
1393 ctx
.pid
= clone3(&args
, sizeof(args
));
1395 ERROR(jail
->pakfire
, "Could not clone: %m\n");
1399 } else if (ctx
.pid
== 0) {
1400 r
= pakfire_jail_child(jail
, &ctx
, argv
);
1405 r
= pakfire_jail_parent(jail
, &ctx
);
1409 DEBUG(jail
->pakfire
, "Waiting for PID %d to finish its work\n", ctx
.pid
);
1411 // Read output of the child process
1412 r
= pakfire_jail_wait(jail
, &ctx
);
1416 // Handle exit status
1417 switch (ctx
.status
.si_code
) {
1419 DEBUG(jail
->pakfire
, "The child process exited with code %d\n",
1420 ctx
.status
.si_status
);
1423 exit
= ctx
.status
.si_status
;
1428 ERROR(jail
->pakfire
, "The child process was killed\n");
1431 // Log anything else
1433 ERROR(jail
->pakfire
, "Unknown child exit code: %d\n", ctx
.status
.si_code
);
1438 // Destroy the temporary cgroup (if any)
1440 // Read cgroup stats
1441 r
= pakfire_cgroup_stat(ctx
.cgroup
, &ctx
.cgroup_stats
);
1443 ERROR(jail
->pakfire
, "Could not read cgroup stats: %m\n");
1445 pakfire_cgroup_stat_dump(ctx
.cgroup
, &ctx
.cgroup_stats
);
1448 pakfire_cgroup_destroy(ctx
.cgroup
);
1449 pakfire_cgroup_unref(ctx
.cgroup
);
1452 // Close any file descriptors
1453 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stdout
);
1454 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stderr
);
1457 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_INFO
);
1458 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_ERROR
);
1459 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_DEBUG
);
1464 PAKFIRE_EXPORT
int pakfire_jail_exec(struct pakfire_jail
* jail
,
1465 const char* argv
[], char** output
) {
1468 // Store logging callback
1469 pakfire_jail_log_callback log_callback
= jail
->log_callback
;
1470 void* log_data
= jail
->log_data
;
1472 // Capture output if requested by user
1474 pakfire_jail_set_log_callback(jail
, pakfire_jail_capture_stdout
, output
);
1477 r
= __pakfire_jail_exec(jail
, argv
, 0);
1479 // Restore log callback
1480 pakfire_jail_set_log_callback(jail
, log_callback
, log_data
);
1485 static int pakfire_jail_exec_interactive(
1486 struct pakfire_jail
* jail
, const char* argv
[]) {
1489 // Setup interactive stuff
1490 r
= pakfire_jail_setup_interactive_env(jail
);
1494 return __pakfire_jail_exec(jail
, argv
, 1);
1497 PAKFIRE_EXPORT
int pakfire_jail_exec_script(struct pakfire_jail
* jail
,
1498 const char* script
, const size_t size
, const char* args
[], char** output
) {
1499 char path
[PATH_MAX
];
1500 const char** argv
= NULL
;
1503 const char* root
= pakfire_get_path(jail
->pakfire
);
1505 // Write the scriptlet to disk
1506 r
= pakfire_path_join(path
, root
, "pakfire-script.XXXXXX");
1510 // Open a temporary file
1511 int fd
= mkstemp(path
);
1513 ERROR(jail
->pakfire
, "Could not open a temporary file: %m\n");
1518 DEBUG(jail
->pakfire
, "Writing script to %s:\n%.*s\n", path
, (int)size
, script
);
1521 ssize_t bytes_written
= write(fd
, script
, size
);
1522 if (bytes_written
< (ssize_t
)size
) {
1523 ERROR(jail
->pakfire
, "Could not write script to file %s: %m\n", path
);
1528 // Make the script executable
1529 r
= fchmod(fd
, S_IRUSR
|S_IWUSR
|S_IXUSR
);
1531 ERROR(jail
->pakfire
, "Could not set executable permissions on %s: %m\n", path
);
1538 ERROR(jail
->pakfire
, "Could not close script file %s: %m\n", path
);
1543 // Count how many arguments were passed
1544 unsigned int argc
= 1;
1546 for (const char** arg
= args
; *arg
; arg
++)
1550 argv
= calloc(argc
+ 1, sizeof(*argv
));
1552 ERROR(jail
->pakfire
, "Could not allocate argv: %m\n");
1557 argv
[0] = (root
) ? pakfire_path_relpath(root
, path
) : path
;
1560 for (unsigned int i
= 1; i
< argc
; i
++)
1561 argv
[i
] = args
[i
-1];
1564 r
= pakfire_jail_exec(jail
, argv
, output
);
1570 // Remove script from disk
1578 A convenience function that creates a new jail, runs the given command and destroys
1581 int pakfire_jail_run(struct pakfire
* pakfire
, const char* argv
[], int flags
, char** output
) {
1582 struct pakfire_jail
* jail
= NULL
;
1585 // Create a new jail
1586 r
= pakfire_jail_create(&jail
, pakfire
, flags
);
1590 // Execute the command
1591 r
= pakfire_jail_exec(jail
, argv
, output
);
1595 pakfire_jail_unref(jail
);
1600 int pakfire_jail_run_script(struct pakfire
* pakfire
,
1601 const char* script
, const size_t length
, const char* argv
[], int flags
, char** output
) {
1602 struct pakfire_jail
* jail
= NULL
;
1605 // Create a new jail
1606 r
= pakfire_jail_create(&jail
, pakfire
, flags
);
1610 // Execute the command
1611 r
= pakfire_jail_exec_script(jail
, script
, length
, argv
, output
);
1615 pakfire_jail_unref(jail
);
1620 int pakfire_jail_shell(struct pakfire_jail
* jail
) {
1621 const char* argv
[] = {
1622 "/bin/bash", "--login", NULL
,
1625 // Execute /bin/bash
1626 return pakfire_jail_exec_interactive(jail
, argv
);
1629 int pakfire_jail_ldconfig(struct pakfire
* pakfire
) {
1630 char path
[PATH_MAX
];
1632 const char* ldconfig
= "/sbin/ldconfig";
1634 // Check if ldconfig exists before calling it to avoid overhead
1635 int r
= pakfire_make_path(pakfire
, path
, ldconfig
);
1639 // Check if ldconfig is executable
1640 r
= access(path
, X_OK
);
1642 DEBUG(pakfire
, "%s is not executable. Skipping...\n", ldconfig
);
1646 const char* argv
[] = {
1651 return pakfire_jail_run(pakfire
, argv
, 0, NULL
);