1 /*#############################################################################
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
19 #############################################################################*/
22 #include <linux/capability.h>
23 #include <linux/fcntl.h>
24 #include <linux/sched.h>
26 #include <linux/wait.h>
31 #include <sys/capability.h>
32 #include <sys/epoll.h>
33 #include <sys/eventfd.h>
34 #include <sys/personality.h>
35 #include <sys/prctl.h>
36 #include <sys/resource.h>
37 #include <sys/types.h>
46 #include <pakfire/arch.h>
47 #include <pakfire/cgroup.h>
48 #include <pakfire/jail.h>
49 #include <pakfire/logging.h>
50 #include <pakfire/mount.h>
51 #include <pakfire/pakfire.h>
52 #include <pakfire/private.h>
53 #include <pakfire/pwd.h>
54 #include <pakfire/string.h>
55 #include <pakfire/util.h>
57 #define BUFFER_SIZE 1024 * 64
58 #define ENVIRON_SIZE 128
59 #define EPOLL_MAX_EVENTS 2
60 #define MAX_MOUNTPOINTS 8
62 // The default environment that will be set for every command
63 static const struct environ
{
67 { "LANG", "en_US.utf-8" },
68 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
73 struct pakfire_jail_mountpoint
{
74 char source
[PATH_MAX
];
75 char target
[PATH_MAX
];
80 struct pakfire
* pakfire
;
83 // A unique ID for each jail
85 char __uuid
[UUID_STR_LEN
];
94 struct pakfire_cgroup
* cgroup
;
97 char* env
[ENVIRON_SIZE
];
100 pakfire_jail_log_callback log_callback
;
104 struct pakfire_jail_mountpoint mountpoints
[MAX_MOUNTPOINTS
];
105 unsigned int num_mountpoints
;
108 struct pakfire_log_buffer
{
109 char data
[BUFFER_SIZE
];
113 struct pakfire_jail_exec
{
114 // PID (of the child)
118 // Process status (from waitid)
121 // FD to notify the client that the parent has finished initialization
125 struct pakfire_jail_pipes
{
136 struct pakfire_jail_buffers
{
137 struct pakfire_log_buffer stdout
;
138 struct pakfire_log_buffer stderr
;
141 struct pakfire_log_buffer log_INFO
;
142 struct pakfire_log_buffer log_ERROR
;
143 struct pakfire_log_buffer log_DEBUG
;
146 struct pakfire_cgroup
* cgroup
;
147 struct pakfire_cgroup_stats cgroup_stats
;
150 static int clone3(struct clone_args
* args
, size_t size
) {
151 return syscall(__NR_clone3
, args
, size
);
154 static void pakfire_jail_free(struct pakfire_jail
* jail
) {
155 DEBUG(jail
->pakfire
, "Freeing jail at %p\n", jail
);
158 for (unsigned int i
= 0; jail
->env
[i
]; i
++)
162 pakfire_cgroup_unref(jail
->cgroup
);
164 pakfire_unref(jail
->pakfire
);
169 Passes any log messages on to the default pakfire log callback
171 static int pakfire_jail_default_log_callback(struct pakfire
* pakfire
, void* data
,
172 int priority
, const char* line
, size_t length
) {
175 INFO(pakfire
, "%s", line
);
179 ERROR(pakfire
, "%s", line
);
184 DEBUG(pakfire
, "%s", line
);
192 static int pakfire_jail_setup_interactive_env(struct pakfire_jail
* jail
) {
194 int r
= pakfire_jail_set_env(jail
, "PS1", "pakfire-jail \\w> ");
199 char* TERM
= secure_getenv("TERM");
201 r
= pakfire_jail_set_env(jail
, "TERM", TERM
);
207 char* LANG
= secure_getenv("LANG");
209 r
= pakfire_jail_set_env(jail
, "LANG", LANG
);
217 PAKFIRE_EXPORT
int pakfire_jail_create(struct pakfire_jail
** jail
,
218 struct pakfire
* pakfire
, int flags
) {
221 // Allocate a new jail
222 struct pakfire_jail
* j
= calloc(1, sizeof(*j
));
227 j
->pakfire
= pakfire_ref(pakfire
);
229 // Initialize reference counter
235 // Generate a random UUID
236 uuid_generate_random(j
->uuid
);
238 DEBUG(j
->pakfire
, "Allocated new jail at %p\n", j
);
240 // Set default log callback
241 r
= pakfire_jail_set_log_callback(j
, pakfire_jail_default_log_callback
, NULL
);
245 // Set default environment
246 for (const struct environ
* e
= ENV
; e
->key
; e
++) {
247 r
= pakfire_jail_set_env(j
, e
->key
, e
->val
);
257 pakfire_jail_free(j
);
262 PAKFIRE_EXPORT
struct pakfire_jail
* pakfire_jail_ref(struct pakfire_jail
* jail
) {
268 PAKFIRE_EXPORT
struct pakfire_jail
* pakfire_jail_unref(struct pakfire_jail
* jail
) {
269 if (--jail
->nrefs
> 0)
272 pakfire_jail_free(jail
);
276 static const char* pakfire_jail_uuid(struct pakfire_jail
* jail
) {
278 uuid_unparse_lower(jail
->uuid
, jail
->__uuid
);
285 PAKFIRE_EXPORT
int pakfire_jail_nice(struct pakfire_jail
* jail
, int nice
) {
286 // Check if nice level is in range
287 if (nice
< -19 || nice
> 20) {
298 int pakfire_jail_set_cgroup(struct pakfire_jail
* jail
, struct pakfire_cgroup
* cgroup
) {
299 // Free any previous cgroup
301 pakfire_cgroup_unref(jail
->cgroup
);
305 // Set any new cgroup
307 DEBUG(jail
->pakfire
, "Setting cgroup %p\n", cgroup
);
309 jail
->cgroup
= pakfire_cgroup_ref(cgroup
);
318 // Returns the length of the environment
319 static unsigned int pakfire_jail_env_length(struct pakfire_jail
* jail
) {
322 // Count everything in the environment
323 for (char** e
= jail
->env
; *e
; e
++)
329 // Finds an existing environment variable and returns its index or -1 if not found
330 static int pakfire_jail_find_env(struct pakfire_jail
* jail
, const char* key
) {
336 char buffer
[strlen(key
) + 2];
337 pakfire_string_format(buffer
, "%s=", key
);
339 for (unsigned int i
= 0; jail
->env
[i
]; i
++) {
340 if (pakfire_string_startswith(jail
->env
[i
], buffer
))
348 // Returns the value of an environment variable or NULL
349 PAKFIRE_EXPORT
const char* pakfire_jail_get_env(struct pakfire_jail
* jail
,
351 int i
= pakfire_jail_find_env(jail
, key
);
355 return jail
->env
[i
] + strlen(key
) + 1;
358 // Sets an environment variable
359 PAKFIRE_EXPORT
int pakfire_jail_set_env(struct pakfire_jail
* jail
,
360 const char* key
, const char* value
) {
361 // Find the index where to write this value to
362 int i
= pakfire_jail_find_env(jail
, key
);
364 i
= pakfire_jail_env_length(jail
);
366 // Return -ENOSPC when the environment is full
367 if (i
>= ENVIRON_SIZE
) {
372 // Free any previous value
376 // Format and set environment variable
377 asprintf(&jail
->env
[i
], "%s=%s", key
, value
);
379 DEBUG(jail
->pakfire
, "Set environment variable: %s\n", jail
->env
[i
]);
384 // Imports an environment
385 PAKFIRE_EXPORT
int pakfire_jail_import_env(struct pakfire_jail
* jail
, const char* env
[]) {
393 // Copy environment variables
394 for (unsigned int i
= 0; env
[i
]; i
++) {
395 r
= pakfire_string_partition(env
[i
], "=", &key
, &val
);
400 r
= pakfire_jail_set_env(jail
, key
, val
);
417 PAKFIRE_EXPORT
int pakfire_jail_set_log_callback(struct pakfire_jail
* jail
,
418 pakfire_jail_log_callback callback
, void* data
) {
419 jail
->log_callback
= callback
;
420 jail
->log_data
= data
;
426 This function replaces any logging in the child process.
428 All log messages will be sent to the parent process through their respective pipes.
430 static void pakfire_jail_log(void* data
, int priority
, const char* file
,
431 int line
, const char* fn
, const char* format
, va_list args
) {
432 struct pakfire_jail_pipes
* pipes
= (struct pakfire_jail_pipes
*)data
;
437 fd
= pipes
->log_INFO
[1];
441 fd
= pipes
->log_ERROR
[1];
446 fd
= pipes
->log_DEBUG
[1];
448 #endif /* ENABLE_DEBUG */
450 // Ignore any messages of an unknown priority
455 // Send the log message
457 vdprintf(fd
, format
, args
);
460 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer
* buffer
) {
461 return (sizeof(buffer
->data
) == buffer
->used
);
465 This function reads as much data as it can from the file descriptor.
466 If it finds a whole line in it, it will send it to the logger and repeat the process.
467 If not newline character is found, it will try to read more data until it finds one.
469 static int pakfire_jail_handle_log(struct pakfire_jail
* jail
,
470 struct pakfire_jail_exec
* ctx
, int priority
, int fd
,
471 struct pakfire_log_buffer
* buffer
, pakfire_jail_log_callback callback
, void* data
) {
472 char line
[BUFFER_SIZE
+ 1];
474 // Fill up buffer from fd
475 if (buffer
->used
< sizeof(buffer
->data
)) {
476 ssize_t bytes_read
= read(fd
, buffer
->data
+ buffer
->used
,
477 sizeof(buffer
->data
) - buffer
->used
);
480 if (bytes_read
< 0) {
481 ERROR(jail
->pakfire
, "Could not read from fd %d: %m\n", fd
);
485 // Update buffer size
486 buffer
->used
+= bytes_read
;
489 // See if we have any lines that we can write
490 while (buffer
->used
) {
491 // Search for the end of the first line
492 char* eol
= memchr(buffer
->data
, '\n', buffer
->used
);
496 // If the buffer is full, we send the content to the logger and try again
497 // This should not happen in practise
498 if (pakfire_jail_log_buffer_is_full(buffer
)) {
499 DEBUG(jail
->pakfire
, "Logging buffer is full. Sending all content\n");
501 eol
= buffer
->data
+ sizeof(buffer
->data
) - 1;
503 // Otherwise we might have only read parts of the output
508 // Find the length of the string
509 size_t length
= eol
- buffer
->data
+ 1;
511 // Copy the line into the buffer
512 memcpy(line
, buffer
->data
, length
);
514 // Terminate the string
519 int r
= callback(jail
->pakfire
, data
, priority
, line
, length
);
521 ERROR(jail
->pakfire
, "The logging callback returned an error: %d\n", r
);
526 // Remove line from buffer
527 memmove(buffer
->data
, buffer
->data
+ length
, buffer
->used
- length
);
528 buffer
->used
-= length
;
534 static int pakfire_jail_setup_pipe(struct pakfire_jail
* jail
, int (*fds
)[2], const int flags
) {
535 int r
= pipe2(*fds
, flags
);
537 ERROR(jail
->pakfire
, "Could not setup pipe: %m\n");
544 static void pakfire_jail_close_pipe(struct pakfire_jail
* jail
, int fds
[2]) {
545 for (unsigned int i
= 0; i
< 2; i
++)
551 This is a convenience function to fetch the reading end of a pipe and
552 closes the write end.
554 static int pakfire_jail_get_pipe(struct pakfire_jail
* jail
, int (*fds
)[2]) {
555 // Give the variables easier names to avoid confusion
556 int* fd_read
= &(*fds
)[0];
557 int* fd_write
= &(*fds
)[1];
559 // Close the write end of the pipe
565 // Return the read end
569 static int pakfire_jail_wait(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
571 struct epoll_event ev
;
572 struct epoll_event events
[EPOLL_MAX_EVENTS
];
575 // Fetch file descriptors from context
576 const int stdout
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.stdout
);
577 const int stderr
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.stderr
);
578 const int pidfd
= ctx
->pidfd
;
581 const int log_INFO
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.log_INFO
);
582 const int log_ERROR
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.log_ERROR
);
583 const int log_DEBUG
= pakfire_jail_get_pipe(jail
, &ctx
->pipes
.log_DEBUG
);
585 // Make a list of all file descriptors we are interested in
587 stdout
, stderr
, pidfd
, log_INFO
, log_ERROR
, log_DEBUG
,
591 epollfd
= epoll_create1(0);
593 ERROR(jail
->pakfire
, "Could not initialize epoll(): %m\n");
598 ev
.events
= EPOLLIN
|EPOLLHUP
;
600 // Turn file descriptors into non-blocking mode and add them to epoll()
601 for (unsigned int i
= 0; i
< sizeof(fds
) / sizeof(*fds
); i
++) {
604 // Skip fds which were not initialized
610 if (epoll_ctl(epollfd
, EPOLL_CTL_ADD
, fd
, &ev
) < 0) {
611 ERROR(jail
->pakfire
, "Could not add file descriptor %d to epoll(): %m\n", fd
);
619 // Loop for as long as the process is alive
621 int num
= epoll_wait(epollfd
, events
, EPOLL_MAX_EVENTS
, -1);
623 // Ignore if epoll_wait() has been interrupted
627 ERROR(jail
->pakfire
, "epoll_wait() failed: %m\n");
633 for (int i
= 0; i
< num
; i
++) {
634 int e
= events
[i
].events
;
635 int fd
= events
[i
].data
.fd
;
637 struct pakfire_log_buffer
* buffer
= NULL
;
638 pakfire_jail_log_callback callback
= NULL
;
642 // Check if there is any data to be read
644 // Handle any changes to the PIDFD
646 // Call waidid() and store the result
647 r
= waitid(P_PIDFD
, ctx
->pidfd
, &ctx
->status
, WEXITED
);
649 ERROR(jail
->pakfire
, "waitid() failed: %m\n");
653 // Mark that we have ended so that we will process the remaining
654 // events from epoll() now, but won't restart the outer loop.
658 // Handle logging messages
659 } else if (fd
== log_INFO
) {
660 buffer
= &ctx
->buffers
.log_INFO
;
663 callback
= pakfire_jail_default_log_callback
;
665 } else if (fd
== log_ERROR
) {
666 buffer
= &ctx
->buffers
.log_ERROR
;
669 callback
= pakfire_jail_default_log_callback
;
671 } else if (fd
== log_DEBUG
) {
672 buffer
= &ctx
->buffers
.log_DEBUG
;
673 priority
= LOG_DEBUG
;
675 callback
= pakfire_jail_default_log_callback
;
677 // Handle anything from the log pipes
678 } else if (fd
== stdout
) {
679 buffer
= &ctx
->buffers
.stdout
;
682 callback
= jail
->log_callback
;
683 data
= jail
->log_data
;
685 } else if (fd
== stderr
) {
686 buffer
= &ctx
->buffers
.stderr
;
689 callback
= jail
->log_callback
;
690 data
= jail
->log_data
;
693 DEBUG(jail
->pakfire
, "Received invalid file descriptor %d\n", fd
);
698 r
= pakfire_jail_handle_log(jail
, ctx
, priority
, fd
, buffer
, callback
, data
);
703 // Check if any file descriptors have been closed
705 // Remove the file descriptor
706 r
= epoll_ctl(epollfd
, EPOLL_CTL_DEL
, fd
, NULL
);
708 ERROR(jail
->pakfire
, "Could not remove closed file-descriptor %d: %m\n", fd
);
722 static int pakfire_jail_capture_stdout(struct pakfire
* pakfire
, void* data
, int priority
,
723 const char* line
, size_t length
) {
724 char** output
= (char**)data
;
727 // Append everything from stdout to a buffer
728 if (priority
== LOG_INFO
) {
729 r
= asprintf(output
, "%s%s", (output
&& *output
) ? *output
: "", line
);
735 // Send everything else to the default logger
736 return pakfire_jail_default_log_callback(pakfire
, NULL
, priority
, line
, length
);
741 static int pakfire_jail_drop_capabilities(struct pakfire_jail
* jail
) {
742 const int capabilities
[] = {
743 // Deny access to the kernel's audit system
748 // Deny suspending block devices
751 // Deny any stuff with BPF
754 // Deny checkpoint restore
755 CAP_CHECKPOINT_RESTORE
,
757 // Deny opening files by inode number (open_by_handle_at)
760 // Deny setting SUID bits
763 // Deny locking more memory
766 // Deny modifying any Apparmor/SELinux/SMACK configuration
770 // Deny creating any special devices
773 // Deny setting any capabilities
776 // Deny reading from syslog
779 // Deny any admin actions (mount, sethostname, ...)
782 // Deny rebooting the system
785 // Deny loading kernel modules
788 // Deny setting nice level
791 // Deny access to /proc/kcore, /dev/mem, /dev/kmem
794 // Deny circumventing any resource limits
797 // Deny setting the system time
800 // Deny playing with suspend
806 DEBUG(jail
->pakfire
, "Dropping capabilities...\n");
811 // Drop any capabilities
812 for (const int* cap
= capabilities
; *cap
; cap
++) {
813 r
= prctl(PR_CAPBSET_DROP
, *cap
, 0, 0, 0);
815 ERROR(jail
->pakfire
, "Could not drop capability %d: %m\n", *cap
);
822 // Fetch any capabilities
823 cap_t caps
= cap_get_proc();
825 ERROR(jail
->pakfire
, "Could not read capabilities: %m\n");
830 Set inheritable capabilities
832 This ensures that no processes will be able to gain any of the listed
835 r
= cap_set_flag(caps
, CAP_INHERITABLE
, num_caps
, capabilities
, CAP_CLEAR
);
837 ERROR(jail
->pakfire
, "cap_set_flag() failed: %m\n");
841 // Restore capabilities
842 r
= cap_set_proc(caps
);
844 ERROR(jail
->pakfire
, "Could not restore capabilities: %m\n");
857 static int pakfire_jail_limit_syscalls(struct pakfire_jail
* jail
) {
858 const int syscalls
[] = {
859 // The kernel's keyring isn't namespaced
862 SCMP_SYS(request_key
),
864 // Disable userfaultfd
865 SCMP_SYS(userfaultfd
),
867 // Disable perf which could leak a lot of information about the host
868 SCMP_SYS(perf_event_open
),
874 DEBUG(jail
->pakfire
, "Applying syscall filter...\n");
876 // Setup a syscall filter which allows everything by default
877 scmp_filter_ctx ctx
= seccomp_init(SCMP_ACT_ALLOW
);
879 ERROR(jail
->pakfire
, "Could not setup seccomp filter: %m\n");
884 for (const int* syscall
= syscalls
; *syscall
; syscall
++) {
885 r
= seccomp_rule_add(ctx
, SCMP_ACT_ERRNO(EPERM
), *syscall
, 0);
887 ERROR(jail
->pakfire
, "Could not configure syscall %d: %m\n", *syscall
);
892 // Load syscall filter into the kernel
893 r
= seccomp_load(ctx
);
895 ERROR(jail
->pakfire
, "Could not load syscall filter into the kernel: %m\n");
901 seccomp_release(ctx
);
908 PAKFIRE_EXPORT
int pakfire_jail_bind(struct pakfire_jail
* jail
,
909 const char* source
, const char* target
, int flags
) {
910 struct pakfire_jail_mountpoint
* mp
= NULL
;
913 // Check if there is any space left
914 if (jail
->num_mountpoints
>= MAX_MOUNTPOINTS
) {
919 // Check for valid inputs
920 if (!source
|| !target
) {
925 // Select the next free slot
926 mp
= &jail
->mountpoints
[jail
->num_mountpoints
];
929 r
= pakfire_string_set(mp
->source
, source
);
931 ERROR(jail
->pakfire
, "Could not copy source: %m\n");
936 r
= pakfire_string_set(mp
->target
, target
);
938 ERROR(jail
->pakfire
, "Could not copy target: %m\n");
946 jail
->num_mountpoints
++;
952 Mounts everything that we require in the new namespace
954 static int pakfire_jail_mount(struct pakfire_jail
* jail
) {
955 struct pakfire_jail_mountpoint
* mp
= NULL
;
958 // Mount all default stuff
959 r
= pakfire_mount_all(jail
->pakfire
);
963 // Mount all custom stuff
964 for (unsigned int i
= 0; i
< jail
->num_mountpoints
; i
++) {
966 mp
= &jail
->mountpoints
[i
];
969 r
= pakfire_bind(jail
->pakfire
, mp
->source
, mp
->target
, mp
->flags
);
974 // Log all mountpoints
975 pakfire_mount_list(jail
->pakfire
);
982 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail
* jail
, pid_t pid
) {
986 // Skip mapping anything when running on /
987 if (pakfire_on_root(jail
->pakfire
))
991 r
= pakfire_string_format(path
, "/proc/%d/uid_map", pid
);
996 const uid_t uid
= pakfire_uid(jail
->pakfire
);
999 const struct pakfire_subid
* subuid
= pakfire_subuid(jail
->pakfire
);
1003 /* When running as root, we will map the entire range.
1005 When running as a non-privileged user, we will map the root user inside the jail
1006 to the user's UID outside of the jail, and we will map the rest starting from one.
1011 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1012 "0 %lu %lu\n", subuid
->id
, subuid
->length
);
1014 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1015 "0 %lu 1\n%1 %lu %lu\n", uid
, subuid
->id
, subuid
->length
);
1019 ERROR(jail
->pakfire
, "Could not map UIDs: %m\n");
1026 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail
* jail
, pid_t pid
) {
1027 char path
[PATH_MAX
];
1030 // Skip mapping anything when running on /
1031 if (pakfire_on_root(jail
->pakfire
))
1035 const gid_t gid
= pakfire_gid(jail
->pakfire
);
1038 const struct pakfire_subid
* subgid
= pakfire_subgid(jail
->pakfire
);
1043 r
= pakfire_string_format(path
, "/proc/%d/gid_map", pid
);
1049 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1050 "0 %lu %lu\n", subgid
->id
, subgid
->length
);
1052 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1053 "0 %lu 1\n%1 %lu %lu\n", gid
, subgid
->id
, subgid
->length
);
1057 ERROR(jail
->pakfire
, "Could not map GIDs: %m\n");
1064 static int pakfire_jail_setgroups(struct pakfire_jail
* jail
, pid_t pid
) {
1065 char path
[PATH_MAX
];
1069 r
= pakfire_string_format(path
, "/proc/%d/setgroups", pid
);
1073 // Open file for writing
1074 FILE* f
= fopen(path
, "w");
1076 ERROR(jail
->pakfire
, "Could not open %s for writing: %m\n", path
);
1081 int bytes_written
= fprintf(f
, "deny\n");
1082 if (bytes_written
<= 0) {
1083 ERROR(jail
->pakfire
, "Could not write to %s: %m\n", path
);
1090 ERROR(jail
->pakfire
, "Could not close %s: %m\n", path
);
1101 static int pakfire_jail_send_signal(struct pakfire_jail
* jail
, int fd
) {
1102 const uint64_t val
= 1;
1105 DEBUG(jail
->pakfire
, "Sending signal...\n");
1107 // Write to the file descriptor
1108 ssize_t bytes_written
= write(fd
, &val
, sizeof(val
));
1109 if (bytes_written
< 0 || (size_t)bytes_written
< sizeof(val
)) {
1110 ERROR(jail
->pakfire
, "Could not send signal: %m\n");
1114 // Close the file descriptor
1120 static int pakfire_jail_wait_for_signal(struct pakfire_jail
* jail
, int fd
) {
1124 DEBUG(jail
->pakfire
, "Waiting for signal...\n");
1126 ssize_t bytes_read
= read(fd
, &val
, sizeof(val
));
1127 if (bytes_read
< 0 || (size_t)bytes_read
< sizeof(val
)) {
1128 ERROR(jail
->pakfire
, "Error waiting for signal: %m\n");
1132 // Close the file descriptor
1139 Performs the initialisation that needs to happen in the parent part
1141 static int pakfire_jail_parent(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
1144 // Setup UID mapping
1145 r
= pakfire_jail_setup_uid_mapping(jail
, ctx
->pid
);
1149 // Write "deny" to /proc/PID/setgroups
1150 r
= pakfire_jail_setgroups(jail
, ctx
->pid
);
1154 // Setup GID mapping
1155 r
= pakfire_jail_setup_gid_mapping(jail
, ctx
->pid
);
1159 // Parent has finished initialisation
1160 DEBUG(jail
->pakfire
, "Parent has finished initialization\n");
1162 // Send signal to client
1163 r
= pakfire_jail_send_signal(jail
, ctx
->completed_fd
);
1170 static int pakfire_jail_child(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
,
1171 const char* argv
[]) {
1174 // Redirect any logging to our log pipe
1175 pakfire_set_log_callback(jail
->pakfire
, pakfire_jail_log
, &ctx
->pipes
);
1178 r
= prctl(PR_SET_PDEATHSIG
, SIGKILL
, 0, 0, 0);
1180 ERROR(jail
->pakfire
, "Could not configure to die with parent: %m\n");
1185 pid_t pid
= getpid();
1187 DEBUG(jail
->pakfire
, "Launched child process in jail with PID %d\n", pid
);
1190 for (unsigned int i
= 0; argv
[i
]; i
++)
1191 DEBUG(jail
->pakfire
, " argv[%d] = %s\n", i
, argv
[i
]);
1193 // Wait for the parent to finish initialization
1194 r
= pakfire_jail_wait_for_signal(jail
, ctx
->completed_fd
);
1198 // Perform further initialization
1201 uid_t uid
= getuid();
1202 gid_t gid
= getgid();
1205 uid_t euid
= geteuid();
1206 gid_t egid
= getegid();
1208 DEBUG(jail
->pakfire
, " UID: %d (effective %d)\n", uid
, euid
);
1209 DEBUG(jail
->pakfire
, " GID: %d (effective %d)\n", gid
, egid
);
1211 // Check if we are (effectively running as root)
1212 if (uid
|| gid
|| euid
|| egid
) {
1213 ERROR(jail
->pakfire
, "Child process is not running as root\n");
1217 const char* root
= pakfire_get_path(jail
->pakfire
);
1218 const char* arch
= pakfire_get_arch(jail
->pakfire
);
1220 // Change root (unless root is /)
1221 if (!pakfire_on_root(jail
->pakfire
)) {
1223 r
= pakfire_jail_mount(jail
);
1230 ERROR(jail
->pakfire
, "chroot() to %s failed: %m\n", root
);
1234 // Change directory to /
1237 ERROR(jail
->pakfire
, "chdir() after chroot() failed: %m\n");
1243 unsigned long persona
= pakfire_arch_personality(arch
);
1245 r
= personality(persona
);
1247 ERROR(jail
->pakfire
, "Could not set personality (%x)\n", (unsigned int)persona
);
1254 DEBUG(jail
->pakfire
, "Setting nice level to %d\n", jail
->nice
);
1256 r
= setpriority(PRIO_PROCESS
, pid
, jail
->nice
);
1258 ERROR(jail
->pakfire
, "Could not set nice level: %m\n");
1263 // Close other end of log pipes
1264 close(ctx
->pipes
.log_INFO
[0]);
1265 close(ctx
->pipes
.log_ERROR
[0]);
1267 close(ctx
->pipes
.log_DEBUG
[0]);
1268 #endif /* ENABLE_DEBUG */
1270 // Connect standard output and error
1271 if (ctx
->pipes
.stdout
[1] && ctx
->pipes
.stderr
[1]) {
1272 r
= dup2(ctx
->pipes
.stdout
[1], STDOUT_FILENO
);
1274 ERROR(jail
->pakfire
, "Could not connect fd %d to stdout: %m\n",
1275 ctx
->pipes
.stdout
[1]);
1280 r
= dup2(ctx
->pipes
.stderr
[1], STDERR_FILENO
);
1282 ERROR(jail
->pakfire
, "Could not connect fd %d to stderr: %m\n",
1283 ctx
->pipes
.stderr
[1]);
1288 // Close the pipe (as we have moved the original file descriptors)
1289 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stdout
);
1290 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stderr
);
1293 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1294 r
= pakfire_rlimit_reset_nofile(jail
->pakfire
);
1298 // Drop capabilities
1299 r
= pakfire_jail_drop_capabilities(jail
);
1304 r
= pakfire_jail_limit_syscalls(jail
);
1309 r
= execvpe(argv
[0], (char**)argv
, jail
->env
);
1311 ERROR(jail
->pakfire
, "Could not execve(): %m\n");
1313 // Translate errno into regular exit code
1323 // We should not get here
1327 // Run a command in the jail
1328 static int __pakfire_jail_exec(struct pakfire_jail
* jail
, const char* argv
[],
1329 const int interactive
) {
1333 // Check if argv is valid
1334 if (!argv
|| !argv
[0]) {
1339 // Initialize context for this call
1340 struct pakfire_jail_exec ctx
= {
1347 DEBUG(jail
->pakfire
, "Executing jail...\n");
1350 Setup a file descriptor which can be used to notify the client that the parent
1351 has completed configuration.
1353 ctx
.completed_fd
= eventfd(0, EFD_CLOEXEC
);
1354 if (ctx
.completed_fd
< 0) {
1355 ERROR(jail
->pakfire
, "eventfd() failed: %m\n");
1359 // Create pipes to communicate with child process if we are not running interactively
1362 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stdout
, 0);
1367 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stderr
, 0);
1372 // Setup pipes for logging
1374 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_INFO
, O_CLOEXEC
);
1379 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_ERROR
, O_CLOEXEC
);
1385 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_DEBUG
, O_CLOEXEC
);
1388 #endif /* ENABLE_DEBUG */
1390 // Configure child process
1391 struct clone_args args
= {
1400 .exit_signal
= SIGCHLD
,
1401 .pidfd
= (long long unsigned int)&ctx
.pidfd
,
1404 // Launch the process in a cgroup that is a leaf of the configured cgroup
1406 args
.flags
|= CLONE_INTO_CGROUP
;
1409 const char* uuid
= pakfire_jail_uuid(jail
);
1411 // Create a temporary cgroup
1412 r
= pakfire_cgroup_child(&ctx
.cgroup
, jail
->cgroup
, uuid
, 0);
1414 ERROR(jail
->pakfire
, "Could not create cgroup for jail: %m\n");
1418 // Clone into this cgroup
1419 args
.cgroup
= pakfire_cgroup_fd(ctx
.cgroup
);
1422 // Fork this process
1423 ctx
.pid
= clone3(&args
, sizeof(args
));
1425 ERROR(jail
->pakfire
, "Could not clone: %m\n");
1429 } else if (ctx
.pid
== 0) {
1430 r
= pakfire_jail_child(jail
, &ctx
, argv
);
1435 r
= pakfire_jail_parent(jail
, &ctx
);
1439 DEBUG(jail
->pakfire
, "Waiting for PID %d to finish its work\n", ctx
.pid
);
1441 // Read output of the child process
1442 r
= pakfire_jail_wait(jail
, &ctx
);
1446 // Handle exit status
1447 switch (ctx
.status
.si_code
) {
1449 DEBUG(jail
->pakfire
, "The child process exited with code %d\n",
1450 ctx
.status
.si_status
);
1453 exit
= ctx
.status
.si_status
;
1458 ERROR(jail
->pakfire
, "The child process was killed\n");
1461 // Log anything else
1463 ERROR(jail
->pakfire
, "Unknown child exit code: %d\n", ctx
.status
.si_code
);
1468 // Destroy the temporary cgroup (if any)
1470 // Read cgroup stats
1471 r
= pakfire_cgroup_stat(ctx
.cgroup
, &ctx
.cgroup_stats
);
1473 ERROR(jail
->pakfire
, "Could not read cgroup stats: %m\n");
1475 pakfire_cgroup_stat_dump(ctx
.cgroup
, &ctx
.cgroup_stats
);
1478 pakfire_cgroup_destroy(ctx
.cgroup
);
1479 pakfire_cgroup_unref(ctx
.cgroup
);
1482 // Close any file descriptors
1483 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stdout
);
1484 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stderr
);
1487 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_INFO
);
1488 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_ERROR
);
1489 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_DEBUG
);
1494 PAKFIRE_EXPORT
int pakfire_jail_exec(struct pakfire_jail
* jail
,
1495 const char* argv
[], char** output
) {
1498 // Store logging callback
1499 pakfire_jail_log_callback log_callback
= jail
->log_callback
;
1500 void* log_data
= jail
->log_data
;
1502 // Capture output if requested by user
1504 pakfire_jail_set_log_callback(jail
, pakfire_jail_capture_stdout
, output
);
1507 r
= __pakfire_jail_exec(jail
, argv
, 0);
1509 // Restore log callback
1510 pakfire_jail_set_log_callback(jail
, log_callback
, log_data
);
1515 static int pakfire_jail_exec_interactive(
1516 struct pakfire_jail
* jail
, const char* argv
[]) {
1519 // Setup interactive stuff
1520 r
= pakfire_jail_setup_interactive_env(jail
);
1524 return __pakfire_jail_exec(jail
, argv
, 1);
1527 PAKFIRE_EXPORT
int pakfire_jail_exec_script(struct pakfire_jail
* jail
,
1528 const char* script
, const size_t size
, const char* args
[], char** output
) {
1529 char path
[PATH_MAX
];
1530 const char** argv
= NULL
;
1533 const char* root
= pakfire_get_path(jail
->pakfire
);
1535 // Write the scriptlet to disk
1536 r
= pakfire_path_join(path
, root
, "pakfire-script.XXXXXX");
1540 // Open a temporary file
1541 int fd
= mkstemp(path
);
1543 ERROR(jail
->pakfire
, "Could not open a temporary file: %m\n");
1548 DEBUG(jail
->pakfire
, "Writing script to %s:\n%.*s\n", path
, (int)size
, script
);
1551 ssize_t bytes_written
= write(fd
, script
, size
);
1552 if (bytes_written
< (ssize_t
)size
) {
1553 ERROR(jail
->pakfire
, "Could not write script to file %s: %m\n", path
);
1558 // Make the script executable
1559 r
= fchmod(fd
, S_IRUSR
|S_IWUSR
|S_IXUSR
);
1561 ERROR(jail
->pakfire
, "Could not set executable permissions on %s: %m\n", path
);
1568 ERROR(jail
->pakfire
, "Could not close script file %s: %m\n", path
);
1573 // Count how many arguments were passed
1574 unsigned int argc
= 1;
1576 for (const char** arg
= args
; *arg
; arg
++)
1580 argv
= calloc(argc
+ 1, sizeof(*argv
));
1582 ERROR(jail
->pakfire
, "Could not allocate argv: %m\n");
1587 argv
[0] = (root
) ? pakfire_path_relpath(root
, path
) : path
;
1590 for (unsigned int i
= 1; i
< argc
; i
++)
1591 argv
[i
] = args
[i
-1];
1594 r
= pakfire_jail_exec(jail
, argv
, output
);
1600 // Remove script from disk
1608 A convenience function that creates a new jail, runs the given command and destroys
1611 int pakfire_jail_run(struct pakfire
* pakfire
, const char* argv
[], int flags
, char** output
) {
1612 struct pakfire_jail
* jail
= NULL
;
1615 // Create a new jail
1616 r
= pakfire_jail_create(&jail
, pakfire
, flags
);
1620 // Execute the command
1621 r
= pakfire_jail_exec(jail
, argv
, output
);
1625 pakfire_jail_unref(jail
);
1630 int pakfire_jail_run_script(struct pakfire
* pakfire
,
1631 const char* script
, const size_t length
, const char* argv
[], int flags
, char** output
) {
1632 struct pakfire_jail
* jail
= NULL
;
1635 // Create a new jail
1636 r
= pakfire_jail_create(&jail
, pakfire
, flags
);
1640 // Execute the command
1641 r
= pakfire_jail_exec_script(jail
, script
, length
, argv
, output
);
1645 pakfire_jail_unref(jail
);
1650 int pakfire_jail_shell(struct pakfire_jail
* jail
) {
1651 const char* argv
[] = {
1652 "/bin/bash", "--login", NULL
,
1655 // Execute /bin/bash
1656 return pakfire_jail_exec_interactive(jail
, argv
);
1659 int pakfire_jail_ldconfig(struct pakfire
* pakfire
) {
1660 char path
[PATH_MAX
];
1662 const char* ldconfig
= "/sbin/ldconfig";
1664 // Check if ldconfig exists before calling it to avoid overhead
1665 int r
= pakfire_path(pakfire
, path
, "%s", ldconfig
);
1669 // Check if ldconfig is executable
1670 r
= access(path
, X_OK
);
1672 DEBUG(pakfire
, "%s is not executable. Skipping...\n", ldconfig
);
1676 const char* argv
[] = {
1681 return pakfire_jail_run(pakfire
, argv
, 0, NULL
);