1 /*#############################################################################
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
19 #############################################################################*/
22 #include <linux/capability.h>
23 #include <linux/fcntl.h>
24 #include <linux/sched.h>
26 #include <linux/wait.h>
31 #include <sys/capability.h>
32 #include <sys/epoll.h>
33 #include <sys/eventfd.h>
34 #include <sys/personality.h>
35 #include <sys/prctl.h>
36 #include <sys/resource.h>
37 #include <sys/types.h>
46 #include <pakfire/arch.h>
47 #include <pakfire/cgroup.h>
48 #include <pakfire/jail.h>
49 #include <pakfire/logging.h>
50 #include <pakfire/mount.h>
51 #include <pakfire/pakfire.h>
52 #include <pakfire/private.h>
53 #include <pakfire/pwd.h>
54 #include <pakfire/string.h>
55 #include <pakfire/util.h>
57 #define BUFFER_SIZE 1024 * 64
58 #define ENVIRON_SIZE 128
59 #define EPOLL_MAX_EVENTS 2
60 #define MAX_MOUNTPOINTS 8
62 // The default environment that will be set for every command
63 static const struct environ
{
67 { "LANG", "en_US.utf-8" },
68 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
73 struct pakfire_jail_mountpoint
{
74 char source
[PATH_MAX
];
75 char target
[PATH_MAX
];
80 struct pakfire
* pakfire
;
83 // A unique ID for each jail
85 char __uuid
[UUID_STR_LEN
];
94 struct pakfire_cgroup
* cgroup
;
97 char* env
[ENVIRON_SIZE
];
100 struct pakfire_jail_mountpoint mountpoints
[MAX_MOUNTPOINTS
];
101 unsigned int num_mountpoints
;
104 struct pakfire_log_buffer
{
105 char data
[BUFFER_SIZE
];
109 struct pakfire_jail_exec
{
110 // PID (of the child)
114 // Process status (from waitid)
117 // FD to notify the client that the parent has finished initialization
121 struct pakfire_jail_pipes
{
133 struct pakfire_jail_communicate
{
134 pakfire_jail_communicate_in in
;
135 pakfire_jail_communicate_out out
;
140 struct pakfire_jail_buffers
{
141 struct pakfire_log_buffer stdout
;
142 struct pakfire_log_buffer stderr
;
145 struct pakfire_log_buffer log_INFO
;
146 struct pakfire_log_buffer log_ERROR
;
147 struct pakfire_log_buffer log_DEBUG
;
150 struct pakfire_cgroup
* cgroup
;
151 struct pakfire_cgroup_stats cgroup_stats
;
154 static int clone3(struct clone_args
* args
, size_t size
) {
155 return syscall(__NR_clone3
, args
, size
);
158 static void pakfire_jail_free(struct pakfire_jail
* jail
) {
159 DEBUG(jail
->pakfire
, "Freeing jail at %p\n", jail
);
162 for (unsigned int i
= 0; jail
->env
[i
]; i
++)
166 pakfire_cgroup_unref(jail
->cgroup
);
168 pakfire_unref(jail
->pakfire
);
173 Passes any log messages on to the default pakfire log callback
175 static int pakfire_jail_default_log_callback(struct pakfire
* pakfire
, void* data
,
176 int priority
, const char* line
, size_t length
) {
179 INFO(pakfire
, "%s", line
);
183 ERROR(pakfire
, "%s", line
);
188 DEBUG(pakfire
, "%s", line
);
196 static int pakfire_jail_setup_interactive_env(struct pakfire_jail
* jail
) {
198 int r
= pakfire_jail_set_env(jail
, "PS1", "pakfire-jail \\w> ");
203 char* TERM
= secure_getenv("TERM");
205 r
= pakfire_jail_set_env(jail
, "TERM", TERM
);
211 char* LANG
= secure_getenv("LANG");
213 r
= pakfire_jail_set_env(jail
, "LANG", LANG
);
221 PAKFIRE_EXPORT
int pakfire_jail_create(struct pakfire_jail
** jail
,
222 struct pakfire
* pakfire
, int flags
) {
225 // Allocate a new jail
226 struct pakfire_jail
* j
= calloc(1, sizeof(*j
));
231 j
->pakfire
= pakfire_ref(pakfire
);
233 // Initialize reference counter
239 // Generate a random UUID
240 uuid_generate_random(j
->uuid
);
242 DEBUG(j
->pakfire
, "Allocated new jail at %p\n", j
);
244 // Set default environment
245 for (const struct environ
* e
= ENV
; e
->key
; e
++) {
246 r
= pakfire_jail_set_env(j
, e
->key
, e
->val
);
256 pakfire_jail_free(j
);
261 PAKFIRE_EXPORT
struct pakfire_jail
* pakfire_jail_ref(struct pakfire_jail
* jail
) {
267 PAKFIRE_EXPORT
struct pakfire_jail
* pakfire_jail_unref(struct pakfire_jail
* jail
) {
268 if (--jail
->nrefs
> 0)
271 pakfire_jail_free(jail
);
275 static const char* pakfire_jail_uuid(struct pakfire_jail
* jail
) {
277 uuid_unparse_lower(jail
->uuid
, jail
->__uuid
);
284 PAKFIRE_EXPORT
int pakfire_jail_nice(struct pakfire_jail
* jail
, int nice
) {
285 // Check if nice level is in range
286 if (nice
< -19 || nice
> 20) {
297 int pakfire_jail_set_cgroup(struct pakfire_jail
* jail
, struct pakfire_cgroup
* cgroup
) {
298 // Free any previous cgroup
300 pakfire_cgroup_unref(jail
->cgroup
);
304 // Set any new cgroup
306 DEBUG(jail
->pakfire
, "Setting cgroup %p\n", cgroup
);
308 jail
->cgroup
= pakfire_cgroup_ref(cgroup
);
317 // Returns the length of the environment
318 static unsigned int pakfire_jail_env_length(struct pakfire_jail
* jail
) {
321 // Count everything in the environment
322 for (char** e
= jail
->env
; *e
; e
++)
328 // Finds an existing environment variable and returns its index or -1 if not found
329 static int pakfire_jail_find_env(struct pakfire_jail
* jail
, const char* key
) {
335 char buffer
[strlen(key
) + 2];
336 pakfire_string_format(buffer
, "%s=", key
);
338 for (unsigned int i
= 0; jail
->env
[i
]; i
++) {
339 if (pakfire_string_startswith(jail
->env
[i
], buffer
))
347 // Returns the value of an environment variable or NULL
348 PAKFIRE_EXPORT
const char* pakfire_jail_get_env(struct pakfire_jail
* jail
,
350 int i
= pakfire_jail_find_env(jail
, key
);
354 return jail
->env
[i
] + strlen(key
) + 1;
357 // Sets an environment variable
358 PAKFIRE_EXPORT
int pakfire_jail_set_env(struct pakfire_jail
* jail
,
359 const char* key
, const char* value
) {
360 // Find the index where to write this value to
361 int i
= pakfire_jail_find_env(jail
, key
);
363 i
= pakfire_jail_env_length(jail
);
365 // Return -ENOSPC when the environment is full
366 if (i
>= ENVIRON_SIZE
) {
371 // Free any previous value
375 // Format and set environment variable
376 asprintf(&jail
->env
[i
], "%s=%s", key
, value
);
378 DEBUG(jail
->pakfire
, "Set environment variable: %s\n", jail
->env
[i
]);
383 // Imports an environment
384 PAKFIRE_EXPORT
int pakfire_jail_import_env(struct pakfire_jail
* jail
, const char* env
[]) {
392 // Copy environment variables
393 for (unsigned int i
= 0; env
[i
]; i
++) {
394 r
= pakfire_string_partition(env
[i
], "=", &key
, &val
);
399 r
= pakfire_jail_set_env(jail
, key
, val
);
415 This function replaces any logging in the child process.
417 All log messages will be sent to the parent process through their respective pipes.
419 static void pakfire_jail_log(void* data
, int priority
, const char* file
,
420 int line
, const char* fn
, const char* format
, va_list args
) {
421 struct pakfire_jail_pipes
* pipes
= (struct pakfire_jail_pipes
*)data
;
426 fd
= pipes
->log_INFO
[1];
430 fd
= pipes
->log_ERROR
[1];
435 fd
= pipes
->log_DEBUG
[1];
437 #endif /* ENABLE_DEBUG */
439 // Ignore any messages of an unknown priority
444 // Send the log message
446 vdprintf(fd
, format
, args
);
449 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer
* buffer
) {
450 return (sizeof(buffer
->data
) == buffer
->used
);
454 This function reads as much data as it can from the file descriptor.
455 If it finds a whole line in it, it will send it to the logger and repeat the process.
456 If not newline character is found, it will try to read more data until it finds one.
458 static int pakfire_jail_handle_log(struct pakfire_jail
* jail
,
459 struct pakfire_jail_exec
* ctx
, int priority
, int fd
,
460 struct pakfire_log_buffer
* buffer
, pakfire_jail_communicate_out callback
, void* data
) {
461 char line
[BUFFER_SIZE
+ 1];
463 // Fill up buffer from fd
464 if (buffer
->used
< sizeof(buffer
->data
)) {
465 ssize_t bytes_read
= read(fd
, buffer
->data
+ buffer
->used
,
466 sizeof(buffer
->data
) - buffer
->used
);
469 if (bytes_read
< 0) {
470 ERROR(jail
->pakfire
, "Could not read from fd %d: %m\n", fd
);
474 // Update buffer size
475 buffer
->used
+= bytes_read
;
478 // See if we have any lines that we can write
479 while (buffer
->used
) {
480 // Search for the end of the first line
481 char* eol
= memchr(buffer
->data
, '\n', buffer
->used
);
485 // If the buffer is full, we send the content to the logger and try again
486 // This should not happen in practise
487 if (pakfire_jail_log_buffer_is_full(buffer
)) {
488 DEBUG(jail
->pakfire
, "Logging buffer is full. Sending all content\n");
490 eol
= buffer
->data
+ sizeof(buffer
->data
) - 1;
492 // Otherwise we might have only read parts of the output
497 // Find the length of the string
498 size_t length
= eol
- buffer
->data
+ 1;
500 // Copy the line into the buffer
501 memcpy(line
, buffer
->data
, length
);
503 // Terminate the string
508 int r
= callback(jail
->pakfire
, data
, priority
, line
, length
);
510 ERROR(jail
->pakfire
, "The logging callback returned an error: %d\n", r
);
515 // Remove line from buffer
516 memmove(buffer
->data
, buffer
->data
+ length
, buffer
->used
- length
);
517 buffer
->used
-= length
;
523 static int pakfire_jail_setup_pipe(struct pakfire_jail
* jail
, int (*fds
)[2], const int flags
) {
524 int r
= pipe2(*fds
, flags
);
526 ERROR(jail
->pakfire
, "Could not setup pipe: %m\n");
533 static void pakfire_jail_close_pipe(struct pakfire_jail
* jail
, int fds
[2]) {
534 for (unsigned int i
= 0; i
< 2; i
++)
540 This is a convenience function to fetch the reading end of a pipe and
541 closes the write end.
543 static int pakfire_jail_get_pipe(struct pakfire_jail
* jail
, int (*fds
)[2]) {
544 // Give the variables easier names to avoid confusion
545 int* fd_read
= &(*fds
)[0];
546 int* fd_write
= &(*fds
)[1];
548 // Close the write end of the pipe
554 // Return the read end
558 static int pakfire_jail_wait(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
560 struct epoll_event ev
;
561 struct epoll_event events
[EPOLL_MAX_EVENTS
];
564 // Fetch file descriptors from context
565 const int stdout
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.stdout
);
566 const int stderr
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.stderr
);
567 const int pidfd
= ctx
->pidfd
;
570 const int log_INFO
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.log_INFO
);
571 const int log_ERROR
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.log_ERROR
);
572 const int log_DEBUG
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.log_DEBUG
);
574 // Make a list of all file descriptors we are interested in
576 stdout
, stderr
, pidfd
, log_INFO
, log_ERROR
, log_DEBUG
,
580 epollfd
= epoll_create1(0);
582 ERROR(jail
->pakfire
, "Could not initialize epoll(): %m\n");
587 ev
.events
= EPOLLIN
|EPOLLHUP
;
589 // Turn file descriptors into non-blocking mode and add them to epoll()
590 for (unsigned int i
= 0; i
< sizeof(fds
) / sizeof(*fds
); i
++) {
593 // Skip fds which were not initialized
599 if (epoll_ctl(epollfd
, EPOLL_CTL_ADD
, fd
, &ev
) < 0) {
600 ERROR(jail
->pakfire
, "Could not add file descriptor %d to epoll(): %m\n", fd
);
608 // Loop for as long as the process is alive
610 int num
= epoll_wait(epollfd
, events
, EPOLL_MAX_EVENTS
, -1);
612 // Ignore if epoll_wait() has been interrupted
616 ERROR(jail
->pakfire
, "epoll_wait() failed: %m\n");
622 for (int i
= 0; i
< num
; i
++) {
623 int e
= events
[i
].events
;
624 int fd
= events
[i
].data
.fd
;
626 struct pakfire_log_buffer
* buffer
= NULL
;
627 pakfire_jail_communicate_out callback
= NULL
;
631 // Check if there is any data to be read
633 // Handle any changes to the PIDFD
635 // Call waidid() and store the result
636 r
= waitid(P_PIDFD
, ctx
->pidfd
, &ctx
->status
, WEXITED
);
638 ERROR(jail
->pakfire
, "waitid() failed: %m\n");
642 // Mark that we have ended so that we will process the remaining
643 // events from epoll() now, but won't restart the outer loop.
647 // Handle logging messages
648 } else if (fd
== log_INFO
) {
649 buffer
= &ctx
->buffers
.log_INFO
;
652 callback
= pakfire_jail_default_log_callback
;
654 } else if (fd
== log_ERROR
) {
655 buffer
= &ctx
->buffers
.log_ERROR
;
658 callback
= pakfire_jail_default_log_callback
;
660 } else if (fd
== log_DEBUG
) {
661 buffer
= &ctx
->buffers
.log_DEBUG
;
662 priority
= LOG_DEBUG
;
664 callback
= pakfire_jail_default_log_callback
;
666 // Handle anything from the log pipes
667 } else if (fd
== stdout
) {
668 buffer
= &ctx
->buffers
.stdout
;
671 callback
= ctx
->communicate
.out
;
672 data
= ctx
->communicate
.data
;
674 } else if (fd
== stderr
) {
675 buffer
= &ctx
->buffers
.stderr
;
678 callback
= ctx
->communicate
.out
;
679 data
= ctx
->communicate
.data
;
682 DEBUG(jail
->pakfire
, "Received invalid file descriptor %d\n", fd
);
687 r
= pakfire_jail_handle_log(jail
, ctx
, priority
, fd
, buffer
, callback
, data
);
692 // Check if any file descriptors have been closed
694 // Remove the file descriptor
695 r
= epoll_ctl(epollfd
, EPOLL_CTL_DEL
, fd
, NULL
);
697 ERROR(jail
->pakfire
, "Could not remove closed file-descriptor %d: %m\n", fd
);
711 int pakfire_jail_capture_stdout(struct pakfire
* pakfire
, void* data
,
712 int priority
, const char* line
, size_t length
) {
713 char** output
= (char**)data
;
716 // Append everything from stdout to a buffer
717 if (output
&& priority
== LOG_INFO
) {
718 r
= asprintf(output
, "%s%s", (output
&& *output
) ? *output
: "", line
);
724 // Send everything else to the default logger
725 return pakfire_jail_default_log_callback(pakfire
, NULL
, priority
, line
, length
);
730 static int pakfire_jail_drop_capabilities(struct pakfire_jail
* jail
) {
731 const int capabilities
[] = {
732 // Deny access to the kernel's audit system
737 // Deny suspending block devices
740 // Deny any stuff with BPF
743 // Deny checkpoint restore
744 CAP_CHECKPOINT_RESTORE
,
746 // Deny opening files by inode number (open_by_handle_at)
749 // Deny setting SUID bits
752 // Deny locking more memory
755 // Deny modifying any Apparmor/SELinux/SMACK configuration
759 // Deny creating any special devices
762 // Deny setting any capabilities
765 // Deny reading from syslog
768 // Deny any admin actions (mount, sethostname, ...)
771 // Deny rebooting the system
774 // Deny loading kernel modules
777 // Deny setting nice level
780 // Deny access to /proc/kcore, /dev/mem, /dev/kmem
783 // Deny circumventing any resource limits
786 // Deny setting the system time
789 // Deny playing with suspend
795 DEBUG(jail
->pakfire
, "Dropping capabilities...\n");
800 // Drop any capabilities
801 for (const int* cap
= capabilities
; *cap
; cap
++) {
802 r
= prctl(PR_CAPBSET_DROP
, *cap
, 0, 0, 0);
804 ERROR(jail
->pakfire
, "Could not drop capability %d: %m\n", *cap
);
811 // Fetch any capabilities
812 cap_t caps
= cap_get_proc();
814 ERROR(jail
->pakfire
, "Could not read capabilities: %m\n");
819 Set inheritable capabilities
821 This ensures that no processes will be able to gain any of the listed
824 r
= cap_set_flag(caps
, CAP_INHERITABLE
, num_caps
, capabilities
, CAP_CLEAR
);
826 ERROR(jail
->pakfire
, "cap_set_flag() failed: %m\n");
830 // Restore capabilities
831 r
= cap_set_proc(caps
);
833 ERROR(jail
->pakfire
, "Could not restore capabilities: %m\n");
846 static int pakfire_jail_limit_syscalls(struct pakfire_jail
* jail
) {
847 const int syscalls
[] = {
848 // The kernel's keyring isn't namespaced
851 SCMP_SYS(request_key
),
853 // Disable userfaultfd
854 SCMP_SYS(userfaultfd
),
856 // Disable perf which could leak a lot of information about the host
857 SCMP_SYS(perf_event_open
),
863 DEBUG(jail
->pakfire
, "Applying syscall filter...\n");
865 // Setup a syscall filter which allows everything by default
866 scmp_filter_ctx ctx
= seccomp_init(SCMP_ACT_ALLOW
);
868 ERROR(jail
->pakfire
, "Could not setup seccomp filter: %m\n");
873 for (const int* syscall
= syscalls
; *syscall
; syscall
++) {
874 r
= seccomp_rule_add(ctx
, SCMP_ACT_ERRNO(EPERM
), *syscall
, 0);
876 ERROR(jail
->pakfire
, "Could not configure syscall %d: %m\n", *syscall
);
881 // Load syscall filter into the kernel
882 r
= seccomp_load(ctx
);
884 ERROR(jail
->pakfire
, "Could not load syscall filter into the kernel: %m\n");
890 seccomp_release(ctx
);
897 PAKFIRE_EXPORT
int pakfire_jail_bind(struct pakfire_jail
* jail
,
898 const char* source
, const char* target
, int flags
) {
899 struct pakfire_jail_mountpoint
* mp
= NULL
;
902 // Check if there is any space left
903 if (jail
->num_mountpoints
>= MAX_MOUNTPOINTS
) {
908 // Check for valid inputs
909 if (!source
|| !target
) {
914 // Select the next free slot
915 mp
= &jail
->mountpoints
[jail
->num_mountpoints
];
918 r
= pakfire_string_set(mp
->source
, source
);
920 ERROR(jail
->pakfire
, "Could not copy source: %m\n");
925 r
= pakfire_string_set(mp
->target
, target
);
927 ERROR(jail
->pakfire
, "Could not copy target: %m\n");
935 jail
->num_mountpoints
++;
941 Mounts everything that we require in the new namespace
943 static int pakfire_jail_mount(struct pakfire_jail
* jail
) {
944 struct pakfire_jail_mountpoint
* mp
= NULL
;
947 // Mount all default stuff
948 r
= pakfire_mount_all(jail
->pakfire
);
952 // Mount all custom stuff
953 for (unsigned int i
= 0; i
< jail
->num_mountpoints
; i
++) {
955 mp
= &jail
->mountpoints
[i
];
958 r
= pakfire_bind(jail
->pakfire
, mp
->source
, mp
->target
, mp
->flags
);
963 // Log all mountpoints
964 pakfire_mount_list(jail
->pakfire
);
971 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail
* jail
, pid_t pid
) {
975 // Skip mapping anything when running on /
976 if (pakfire_on_root(jail
->pakfire
))
980 r
= pakfire_string_format(path
, "/proc/%d/uid_map", pid
);
985 const uid_t uid
= pakfire_uid(jail
->pakfire
);
988 const struct pakfire_subid
* subuid
= pakfire_subuid(jail
->pakfire
);
992 /* When running as root, we will map the entire range.
994 When running as a non-privileged user, we will map the root user inside the jail
995 to the user's UID outside of the jail, and we will map the rest starting from one.
1000 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1001 "0 %lu %lu\n", subuid
->id
, subuid
->length
);
1003 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1004 "0 %lu 1\n1 %lu %lu\n", uid
, subuid
->id
, subuid
->length
);
1008 ERROR(jail
->pakfire
, "Could not map UIDs: %m\n");
1015 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail
* jail
, pid_t pid
) {
1016 char path
[PATH_MAX
];
1019 // Skip mapping anything when running on /
1020 if (pakfire_on_root(jail
->pakfire
))
1024 const gid_t gid
= pakfire_gid(jail
->pakfire
);
1027 const struct pakfire_subid
* subgid
= pakfire_subgid(jail
->pakfire
);
1032 r
= pakfire_string_format(path
, "/proc/%d/gid_map", pid
);
1038 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1039 "0 %lu %lu\n", subgid
->id
, subgid
->length
);
1041 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1042 "0 %lu 1\n%1 %lu %lu\n", gid
, subgid
->id
, subgid
->length
);
1046 ERROR(jail
->pakfire
, "Could not map GIDs: %m\n");
1053 static int pakfire_jail_setgroups(struct pakfire_jail
* jail
, pid_t pid
) {
1054 char path
[PATH_MAX
];
1058 r
= pakfire_string_format(path
, "/proc/%d/setgroups", pid
);
1062 // Open file for writing
1063 FILE* f
= fopen(path
, "w");
1065 ERROR(jail
->pakfire
, "Could not open %s for writing: %m\n", path
);
1070 int bytes_written
= fprintf(f
, "deny\n");
1071 if (bytes_written
<= 0) {
1072 ERROR(jail
->pakfire
, "Could not write to %s: %m\n", path
);
1079 ERROR(jail
->pakfire
, "Could not close %s: %m\n", path
);
1090 static int pakfire_jail_send_signal(struct pakfire_jail
* jail
, int fd
) {
1091 const uint64_t val
= 1;
1094 DEBUG(jail
->pakfire
, "Sending signal...\n");
1096 // Write to the file descriptor
1097 ssize_t bytes_written
= write(fd
, &val
, sizeof(val
));
1098 if (bytes_written
< 0 || (size_t)bytes_written
< sizeof(val
)) {
1099 ERROR(jail
->pakfire
, "Could not send signal: %m\n");
1103 // Close the file descriptor
1109 static int pakfire_jail_wait_for_signal(struct pakfire_jail
* jail
, int fd
) {
1113 DEBUG(jail
->pakfire
, "Waiting for signal...\n");
1115 ssize_t bytes_read
= read(fd
, &val
, sizeof(val
));
1116 if (bytes_read
< 0 || (size_t)bytes_read
< sizeof(val
)) {
1117 ERROR(jail
->pakfire
, "Error waiting for signal: %m\n");
1121 // Close the file descriptor
1127 static int pakfire_jail_stream_stdin(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
1130 // Nothing to do if there is no stdin callback set
1131 if (!ctx
->communicate
.in
) {
1132 DEBUG(jail
->pakfire
, "Callback for standard input is not set\n");
1136 int* fd
= &ctx
->pipes
.stdin
[1];
1138 DEBUG(jail
->pakfire
, "Streaming standard input...\n");
1140 // Calling the callback
1141 r
= ctx
->communicate
.in(jail
->pakfire
, ctx
->communicate
.data
, *fd
);
1143 DEBUG(jail
->pakfire
, "Standard input callback finished: %d\n", r
);
1145 // Close the file descriptor when we are done
1153 Performs the initialisation that needs to happen in the parent part
1155 static int pakfire_jail_parent(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
1158 // Setup UID mapping
1159 r
= pakfire_jail_setup_uid_mapping(jail
, ctx
->pid
);
1163 // Write "deny" to /proc/PID/setgroups
1164 r
= pakfire_jail_setgroups(jail
, ctx
->pid
);
1168 // Setup GID mapping
1169 r
= pakfire_jail_setup_gid_mapping(jail
, ctx
->pid
);
1173 // Parent has finished initialisation
1174 DEBUG(jail
->pakfire
, "Parent has finished initialization\n");
1176 // Send signal to client
1177 r
= pakfire_jail_send_signal(jail
, ctx
->completed_fd
);
1181 // Stream standard input
1182 r
= pakfire_jail_stream_stdin(jail
, ctx
);
1189 static int pakfire_jail_child(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
,
1190 const char* argv
[]) {
1193 // Redirect any logging to our log pipe
1194 pakfire_set_log_callback(jail
->pakfire
, pakfire_jail_log
, &ctx
->pipes
);
1197 r
= prctl(PR_SET_PDEATHSIG
, SIGKILL
, 0, 0, 0);
1199 ERROR(jail
->pakfire
, "Could not configure to die with parent: %m\n");
1204 pid_t pid
= getpid();
1206 DEBUG(jail
->pakfire
, "Launched child process in jail with PID %d\n", pid
);
1208 // Wait for the parent to finish initialization
1209 r
= pakfire_jail_wait_for_signal(jail
, ctx
->completed_fd
);
1213 // Perform further initialization
1216 uid_t uid
= getuid();
1217 gid_t gid
= getgid();
1220 uid_t euid
= geteuid();
1221 gid_t egid
= getegid();
1223 DEBUG(jail
->pakfire
, " UID: %d (effective %d)\n", uid
, euid
);
1224 DEBUG(jail
->pakfire
, " GID: %d (effective %d)\n", gid
, egid
);
1226 // Check if we are (effectively running as root)
1227 if (uid
|| gid
|| euid
|| egid
) {
1228 ERROR(jail
->pakfire
, "Child process is not running as root\n");
1232 const char* root
= pakfire_get_path(jail
->pakfire
);
1233 const char* arch
= pakfire_get_arch(jail
->pakfire
);
1235 // Change root (unless root is /)
1236 if (!pakfire_on_root(jail
->pakfire
)) {
1238 r
= pakfire_jail_mount(jail
);
1245 ERROR(jail
->pakfire
, "chroot() to %s failed: %m\n", root
);
1249 // Change directory to /
1252 ERROR(jail
->pakfire
, "chdir() after chroot() failed: %m\n");
1258 unsigned long persona
= pakfire_arch_personality(arch
);
1260 r
= personality(persona
);
1262 ERROR(jail
->pakfire
, "Could not set personality (%x)\n", (unsigned int)persona
);
1269 DEBUG(jail
->pakfire
, "Setting nice level to %d\n", jail
->nice
);
1271 r
= setpriority(PRIO_PROCESS
, pid
, jail
->nice
);
1273 ERROR(jail
->pakfire
, "Could not set nice level: %m\n");
1278 // Close other end of log pipes
1279 close(ctx
->pipes
.log_INFO
[0]);
1280 close(ctx
->pipes
.log_ERROR
[0]);
1282 close(ctx
->pipes
.log_DEBUG
[0]);
1283 #endif /* ENABLE_DEBUG */
1285 // Connect standard input
1286 if (ctx
->pipes
.stdin
[0]) {
1287 r
= dup2(ctx
->pipes
.stdin
[0], STDIN_FILENO
);
1289 ERROR(jail
->pakfire
, "Could not connect fd %d to stdin: %m\n",
1290 ctx
->pipes
.stdin
[0]);
1296 // Connect standard output and error
1297 if (ctx
->pipes
.stdout
[1] && ctx
->pipes
.stderr
[1]) {
1298 r
= dup2(ctx
->pipes
.stdout
[1], STDOUT_FILENO
);
1300 ERROR(jail
->pakfire
, "Could not connect fd %d to stdout: %m\n",
1301 ctx
->pipes
.stdout
[1]);
1306 r
= dup2(ctx
->pipes
.stderr
[1], STDERR_FILENO
);
1308 ERROR(jail
->pakfire
, "Could not connect fd %d to stderr: %m\n",
1309 ctx
->pipes
.stderr
[1]);
1314 // Close the pipe (as we have moved the original file descriptors)
1315 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stdin
);
1316 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stdout
);
1317 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stderr
);
1320 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1321 r
= pakfire_rlimit_reset_nofile(jail
->pakfire
);
1325 // Drop capabilities
1326 r
= pakfire_jail_drop_capabilities(jail
);
1331 r
= pakfire_jail_limit_syscalls(jail
);
1335 DEBUG(jail
->pakfire
, "Child process initialization done\n");
1336 DEBUG(jail
->pakfire
, "Launching command:\n");
1339 for (unsigned int i
= 0; argv
[i
]; i
++)
1340 DEBUG(jail
->pakfire
, " argv[%d] = %s\n", i
, argv
[i
]);
1343 r
= execvpe(argv
[0], (char**)argv
, jail
->env
);
1345 ERROR(jail
->pakfire
, "Could not execve(): %m\n");
1347 // Translate errno into regular exit code
1357 // We should not get here
1361 // Run a command in the jail
1362 static int __pakfire_jail_exec(struct pakfire_jail
* jail
, const char* argv
[],
1363 const int interactive
,
1364 pakfire_jail_communicate_in communicate_in
,
1365 pakfire_jail_communicate_out communicate_out
,
1370 // Check if argv is valid
1371 if (!argv
|| !argv
[0]) {
1376 // Send any output to the default logger if no callback is set
1377 if (!communicate_out
)
1378 communicate_out
= pakfire_jail_default_log_callback
;
1380 // Initialize context for this call
1381 struct pakfire_jail_exec ctx
= {
1389 .in
= communicate_in
,
1390 .out
= communicate_out
,
1395 DEBUG(jail
->pakfire
, "Executing jail...\n");
1398 Setup a file descriptor which can be used to notify the client that the parent
1399 has completed configuration.
1401 ctx
.completed_fd
= eventfd(0, EFD_CLOEXEC
);
1402 if (ctx
.completed_fd
< 0) {
1403 ERROR(jail
->pakfire
, "eventfd() failed: %m\n");
1407 // Create pipes to communicate with child process if we are not running interactively
1409 // stdin (only if callback is set)
1410 if (ctx
.communicate
.in
) {
1411 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stdin
, 0);
1417 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stdout
, 0);
1422 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stderr
, 0);
1427 // Setup pipes for logging
1429 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_INFO
, O_CLOEXEC
);
1434 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_ERROR
, O_CLOEXEC
);
1440 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_DEBUG
, O_CLOEXEC
);
1443 #endif /* ENABLE_DEBUG */
1445 // Configure child process
1446 struct clone_args args
= {
1455 .exit_signal
= SIGCHLD
,
1456 .pidfd
= (long long unsigned int)&ctx
.pidfd
,
1459 // Launch the process in a cgroup that is a leaf of the configured cgroup
1461 args
.flags
|= CLONE_INTO_CGROUP
;
1464 const char* uuid
= pakfire_jail_uuid(jail
);
1466 // Create a temporary cgroup
1467 r
= pakfire_cgroup_child(&ctx
.cgroup
, jail
->cgroup
, uuid
, 0);
1469 ERROR(jail
->pakfire
, "Could not create cgroup for jail: %m\n");
1473 // Clone into this cgroup
1474 args
.cgroup
= pakfire_cgroup_fd(ctx
.cgroup
);
1477 // Fork this process
1478 ctx
.pid
= clone3(&args
, sizeof(args
));
1480 ERROR(jail
->pakfire
, "Could not clone: %m\n");
1484 } else if (ctx
.pid
== 0) {
1485 r
= pakfire_jail_child(jail
, &ctx
, argv
);
1490 r
= pakfire_jail_parent(jail
, &ctx
);
1494 DEBUG(jail
->pakfire
, "Waiting for PID %d to finish its work\n", ctx
.pid
);
1496 // Read output of the child process
1497 r
= pakfire_jail_wait(jail
, &ctx
);
1501 // Handle exit status
1502 switch (ctx
.status
.si_code
) {
1504 DEBUG(jail
->pakfire
, "The child process exited with code %d\n",
1505 ctx
.status
.si_status
);
1508 exit
= ctx
.status
.si_status
;
1513 ERROR(jail
->pakfire
, "The child process was killed\n");
1516 // Log anything else
1518 ERROR(jail
->pakfire
, "Unknown child exit code: %d\n", ctx
.status
.si_code
);
1523 // Destroy the temporary cgroup (if any)
1525 // Read cgroup stats
1526 r
= pakfire_cgroup_stat(ctx
.cgroup
, &ctx
.cgroup_stats
);
1528 ERROR(jail
->pakfire
, "Could not read cgroup stats: %m\n");
1530 pakfire_cgroup_stat_dump(ctx
.cgroup
, &ctx
.cgroup_stats
);
1533 pakfire_cgroup_destroy(ctx
.cgroup
);
1534 pakfire_cgroup_unref(ctx
.cgroup
);
1537 // Close any file descriptors
1538 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stdin
);
1539 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stdout
);
1540 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stderr
);
1543 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_INFO
);
1544 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_ERROR
);
1545 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_DEBUG
);
1550 PAKFIRE_EXPORT
int pakfire_jail_exec(
1551 struct pakfire_jail
* jail
,
1553 pakfire_jail_communicate_in callback_in
,
1554 pakfire_jail_communicate_out callback_out
,
1556 return __pakfire_jail_exec(jail
, argv
, 0, callback_in
, callback_out
, data
);
1559 static int pakfire_jail_exec_interactive(
1560 struct pakfire_jail
* jail
, const char* argv
[]) {
1563 // Setup interactive stuff
1564 r
= pakfire_jail_setup_interactive_env(jail
);
1568 return __pakfire_jail_exec(jail
, argv
, 1, NULL
, NULL
, NULL
);
1571 int pakfire_jail_exec_script(struct pakfire_jail
* jail
,
1575 pakfire_jail_communicate_in callback_in
,
1576 pakfire_jail_communicate_out callback_out
,
1578 char path
[PATH_MAX
];
1579 const char** argv
= NULL
;
1583 const char* root
= pakfire_get_path(jail
->pakfire
);
1585 // Write the scriptlet to disk
1586 r
= pakfire_path_join(path
, root
, PAKFIRE_TMP_DIR
"/pakfire-script.XXXXXX");
1590 // Create a temporary file
1591 f
= pakfire_mktemp(path
, 0700);
1593 ERROR(jail
->pakfire
, "Could not create temporary file: %m\n");
1597 DEBUG(jail
->pakfire
, "Writing script to %s:\n%.*s\n", path
, (int)size
, script
);
1600 r
= fprintf(f
, "%s", script
);
1602 ERROR(jail
->pakfire
, "Could not write script to file %s: %m\n", path
);
1609 ERROR(jail
->pakfire
, "Could not close script file %s: %m\n", path
);
1615 // Count how many arguments were passed
1616 unsigned int argc
= 1;
1618 for (const char** arg
= args
; *arg
; arg
++)
1622 argv
= calloc(argc
+ 1, sizeof(*argv
));
1624 ERROR(jail
->pakfire
, "Could not allocate argv: %m\n");
1629 argv
[0] = (root
) ? pakfire_path_relpath(root
, path
) : path
;
1632 for (unsigned int i
= 1; i
< argc
; i
++)
1633 argv
[i
] = args
[i
-1];
1636 r
= pakfire_jail_exec(jail
, argv
, callback_in
, callback_out
, data
);
1644 // Remove script from disk
1652 A convenience function that creates a new jail, runs the given command and destroys
1655 int pakfire_jail_run(struct pakfire
* pakfire
, const char* argv
[], int flags
, char** output
) {
1656 struct pakfire_jail
* jail
= NULL
;
1659 // Create a new jail
1660 r
= pakfire_jail_create(&jail
, pakfire
, flags
);
1664 // Execute the command
1665 r
= pakfire_jail_exec(jail
, argv
, NULL
, pakfire_jail_capture_stdout
, output
);
1669 pakfire_jail_unref(jail
);
1674 int pakfire_jail_run_script(struct pakfire
* pakfire
,
1675 const char* script
, const size_t length
, const char* argv
[], int flags
,
1676 pakfire_jail_communicate_in communicate_in
,
1677 pakfire_jail_communicate_out communicate_out
,
1679 struct pakfire_jail
* jail
= NULL
;
1682 // Create a new jail
1683 r
= pakfire_jail_create(&jail
, pakfire
, flags
);
1687 // Execute the command
1688 r
= pakfire_jail_exec_script(jail
, script
, length
, argv
,
1689 communicate_in
, communicate_out
, data
);
1693 pakfire_jail_unref(jail
);
1698 int pakfire_jail_shell(struct pakfire_jail
* jail
) {
1699 const char* argv
[] = {
1700 "/bin/bash", "--login", NULL
,
1703 // Execute /bin/bash
1704 return pakfire_jail_exec_interactive(jail
, argv
);
1707 int pakfire_jail_ldconfig(struct pakfire
* pakfire
) {
1708 char path
[PATH_MAX
];
1710 const char* ldconfig
= "/sbin/ldconfig";
1712 // Check if ldconfig exists before calling it to avoid overhead
1713 int r
= pakfire_path(pakfire
, path
, "%s", ldconfig
);
1717 // Check if ldconfig is executable
1718 r
= access(path
, X_OK
);
1720 DEBUG(pakfire
, "%s is not executable. Skipping...\n", ldconfig
);
1724 const char* argv
[] = {
1729 return pakfire_jail_run(pakfire
, argv
, 0, NULL
);