1 /*#############################################################################
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
19 #############################################################################*/
23 #include <linux/capability.h>
24 #include <linux/sched.h>
26 #include <linux/wait.h>
31 #include <sys/capability.h>
32 #include <sys/epoll.h>
33 #include <sys/eventfd.h>
34 #include <sys/mount.h>
35 #include <sys/personality.h>
36 #include <sys/prctl.h>
37 #include <sys/resource.h>
38 #include <sys/types.h>
47 #include <pakfire/arch.h>
48 #include <pakfire/cgroup.h>
49 #include <pakfire/jail.h>
50 #include <pakfire/logging.h>
51 #include <pakfire/mount.h>
52 #include <pakfire/pakfire.h>
53 #include <pakfire/private.h>
54 #include <pakfire/pwd.h>
55 #include <pakfire/string.h>
56 #include <pakfire/util.h>
58 #define BUFFER_SIZE 1024 * 64
59 #define ENVIRON_SIZE 128
60 #define EPOLL_MAX_EVENTS 2
61 #define MAX_MOUNTPOINTS 8
63 // The default environment that will be set for every command
64 static const struct environ
{
69 { "LANG", "en_US.utf-8" },
70 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
75 struct pakfire_jail_mountpoint
{
76 char source
[PATH_MAX
];
77 char target
[PATH_MAX
];
82 struct pakfire
* pakfire
;
85 // A unique ID for each jail
87 char __uuid
[UUID_STR_LEN
];
96 struct pakfire_cgroup
* cgroup
;
99 char* env
[ENVIRON_SIZE
];
102 struct pakfire_jail_mountpoint mountpoints
[MAX_MOUNTPOINTS
];
103 unsigned int num_mountpoints
;
106 struct pakfire_log_buffer
{
107 char data
[BUFFER_SIZE
];
111 enum pakfire_jail_exec_flags
{
112 PAKFIRE_JAIL_HAS_NETWORKING
= (1 << 0),
115 struct pakfire_jail_exec
{
118 // PID (of the child)
122 // Process status (from waitid)
125 // FD to notify the client that the parent has finished initialization
129 struct pakfire_jail_pipes
{
141 struct pakfire_jail_communicate
{
142 pakfire_jail_communicate_in in
;
143 pakfire_jail_communicate_out out
;
148 struct pakfire_jail_buffers
{
149 struct pakfire_log_buffer stdout
;
150 struct pakfire_log_buffer stderr
;
153 struct pakfire_log_buffer log_INFO
;
154 struct pakfire_log_buffer log_ERROR
;
155 struct pakfire_log_buffer log_DEBUG
;
158 struct pakfire_cgroup
* cgroup
;
159 struct pakfire_cgroup_stats cgroup_stats
;
162 static int clone3(struct clone_args
* args
, size_t size
) {
163 return syscall(__NR_clone3
, args
, size
);
166 static int pakfire_jail_exec_has_flag(
167 const struct pakfire_jail_exec
* ctx
, const enum pakfire_jail_exec_flags flag
) {
168 return ctx
->flags
& flag
;
171 static void pakfire_jail_free(struct pakfire_jail
* jail
) {
172 DEBUG(jail
->pakfire
, "Freeing jail at %p\n", jail
);
175 for (unsigned int i
= 0; jail
->env
[i
]; i
++)
179 pakfire_cgroup_unref(jail
->cgroup
);
181 pakfire_unref(jail
->pakfire
);
186 Passes any log messages on to the default pakfire log callback
188 static int pakfire_jail_default_log_callback(struct pakfire
* pakfire
, void* data
,
189 int priority
, const char* line
, size_t length
) {
192 INFO(pakfire
, "%s", line
);
196 ERROR(pakfire
, "%s", line
);
201 DEBUG(pakfire
, "%s", line
);
209 static int pakfire_jail_setup_interactive_env(struct pakfire_jail
* jail
) {
211 int r
= pakfire_jail_set_env(jail
, "PS1", "pakfire-jail \\w> ");
216 char* TERM
= secure_getenv("TERM");
218 r
= pakfire_jail_set_env(jail
, "TERM", TERM
);
224 char* LANG
= secure_getenv("LANG");
226 r
= pakfire_jail_set_env(jail
, "LANG", LANG
);
234 PAKFIRE_EXPORT
int pakfire_jail_create(struct pakfire_jail
** jail
,
235 struct pakfire
* pakfire
, int flags
) {
238 // Allocate a new jail
239 struct pakfire_jail
* j
= calloc(1, sizeof(*j
));
244 j
->pakfire
= pakfire_ref(pakfire
);
246 // Initialize reference counter
252 // Generate a random UUID
253 uuid_generate_random(j
->uuid
);
255 DEBUG(j
->pakfire
, "Allocated new jail at %p\n", j
);
257 // Set default environment
258 for (const struct environ
* e
= ENV
; e
->key
; e
++) {
259 r
= pakfire_jail_set_env(j
, e
->key
, e
->val
);
269 pakfire_jail_free(j
);
274 PAKFIRE_EXPORT
struct pakfire_jail
* pakfire_jail_ref(struct pakfire_jail
* jail
) {
280 PAKFIRE_EXPORT
struct pakfire_jail
* pakfire_jail_unref(struct pakfire_jail
* jail
) {
281 if (--jail
->nrefs
> 0)
284 pakfire_jail_free(jail
);
288 static const char* pakfire_jail_uuid(struct pakfire_jail
* jail
) {
290 uuid_unparse_lower(jail
->uuid
, jail
->__uuid
);
297 PAKFIRE_EXPORT
int pakfire_jail_nice(struct pakfire_jail
* jail
, int nice
) {
298 // Check if nice level is in range
299 if (nice
< -19 || nice
> 20) {
310 int pakfire_jail_set_cgroup(struct pakfire_jail
* jail
, struct pakfire_cgroup
* cgroup
) {
311 // Free any previous cgroup
313 pakfire_cgroup_unref(jail
->cgroup
);
317 // Set any new cgroup
319 DEBUG(jail
->pakfire
, "Setting cgroup %p\n", cgroup
);
321 jail
->cgroup
= pakfire_cgroup_ref(cgroup
);
330 // Returns the length of the environment
331 static unsigned int pakfire_jail_env_length(struct pakfire_jail
* jail
) {
334 // Count everything in the environment
335 for (char** e
= jail
->env
; *e
; e
++)
341 // Finds an existing environment variable and returns its index or -1 if not found
342 static int pakfire_jail_find_env(struct pakfire_jail
* jail
, const char* key
) {
348 char buffer
[strlen(key
) + 2];
349 pakfire_string_format(buffer
, "%s=", key
);
351 for (unsigned int i
= 0; jail
->env
[i
]; i
++) {
352 if (pakfire_string_startswith(jail
->env
[i
], buffer
))
360 // Returns the value of an environment variable or NULL
361 PAKFIRE_EXPORT
const char* pakfire_jail_get_env(struct pakfire_jail
* jail
,
363 int i
= pakfire_jail_find_env(jail
, key
);
367 return jail
->env
[i
] + strlen(key
) + 1;
370 // Sets an environment variable
371 PAKFIRE_EXPORT
int pakfire_jail_set_env(struct pakfire_jail
* jail
,
372 const char* key
, const char* value
) {
373 // Find the index where to write this value to
374 int i
= pakfire_jail_find_env(jail
, key
);
376 i
= pakfire_jail_env_length(jail
);
378 // Return -ENOSPC when the environment is full
379 if (i
>= ENVIRON_SIZE
) {
384 // Free any previous value
388 // Format and set environment variable
389 asprintf(&jail
->env
[i
], "%s=%s", key
, value
);
391 DEBUG(jail
->pakfire
, "Set environment variable: %s\n", jail
->env
[i
]);
396 // Imports an environment
397 PAKFIRE_EXPORT
int pakfire_jail_import_env(struct pakfire_jail
* jail
, const char* env
[]) {
405 // Copy environment variables
406 for (unsigned int i
= 0; env
[i
]; i
++) {
407 r
= pakfire_string_partition(env
[i
], "=", &key
, &val
);
412 r
= pakfire_jail_set_env(jail
, key
, val
);
428 This function replaces any logging in the child process.
430 All log messages will be sent to the parent process through their respective pipes.
432 static void pakfire_jail_log(void* data
, int priority
, const char* file
,
433 int line
, const char* fn
, const char* format
, va_list args
) {
434 struct pakfire_jail_pipes
* pipes
= (struct pakfire_jail_pipes
*)data
;
439 fd
= pipes
->log_INFO
[1];
443 fd
= pipes
->log_ERROR
[1];
448 fd
= pipes
->log_DEBUG
[1];
450 #endif /* ENABLE_DEBUG */
452 // Ignore any messages of an unknown priority
457 // Send the log message
459 vdprintf(fd
, format
, args
);
462 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer
* buffer
) {
463 return (sizeof(buffer
->data
) == buffer
->used
);
467 This function reads as much data as it can from the file descriptor.
468 If it finds a whole line in it, it will send it to the logger and repeat the process.
469 If not newline character is found, it will try to read more data until it finds one.
471 static int pakfire_jail_handle_log(struct pakfire_jail
* jail
,
472 struct pakfire_jail_exec
* ctx
, int priority
, int fd
,
473 struct pakfire_log_buffer
* buffer
, pakfire_jail_communicate_out callback
, void* data
) {
474 char line
[BUFFER_SIZE
+ 1];
476 // Fill up buffer from fd
477 if (buffer
->used
< sizeof(buffer
->data
)) {
478 ssize_t bytes_read
= read(fd
, buffer
->data
+ buffer
->used
,
479 sizeof(buffer
->data
) - buffer
->used
);
482 if (bytes_read
< 0) {
483 ERROR(jail
->pakfire
, "Could not read from fd %d: %m\n", fd
);
487 // Update buffer size
488 buffer
->used
+= bytes_read
;
491 // See if we have any lines that we can write
492 while (buffer
->used
) {
493 // Search for the end of the first line
494 char* eol
= memchr(buffer
->data
, '\n', buffer
->used
);
498 // If the buffer is full, we send the content to the logger and try again
499 // This should not happen in practise
500 if (pakfire_jail_log_buffer_is_full(buffer
)) {
501 DEBUG(jail
->pakfire
, "Logging buffer is full. Sending all content\n");
503 eol
= buffer
->data
+ sizeof(buffer
->data
) - 1;
505 // Otherwise we might have only read parts of the output
510 // Find the length of the string
511 size_t length
= eol
- buffer
->data
+ 1;
513 // Copy the line into the buffer
514 memcpy(line
, buffer
->data
, length
);
516 // Terminate the string
521 int r
= callback(jail
->pakfire
, data
, priority
, line
, length
);
523 ERROR(jail
->pakfire
, "The logging callback returned an error: %d\n", r
);
528 // Remove line from buffer
529 memmove(buffer
->data
, buffer
->data
+ length
, buffer
->used
- length
);
530 buffer
->used
-= length
;
536 static int pakfire_jail_stream_stdin(struct pakfire_jail
* jail
,
537 struct pakfire_jail_exec
* ctx
, const int fd
) {
540 // Nothing to do if there is no stdin callback set
541 if (!ctx
->communicate
.in
) {
542 DEBUG(jail
->pakfire
, "Callback for standard input is not set\n");
546 // Skip if the writing pipe has already been closed
547 if (!ctx
->pipes
.stdin
[1])
550 DEBUG(jail
->pakfire
, "Streaming standard input...\n");
552 // Calling the callback
553 r
= ctx
->communicate
.in(jail
->pakfire
, ctx
->communicate
.data
, fd
);
555 DEBUG(jail
->pakfire
, "Standard input callback finished: %d\n", r
);
557 // The callback signaled that it has written everything
559 DEBUG(jail
->pakfire
, "Closing standard input pipe\n");
561 // Close the file-descriptor
564 // Reset the file-descriptor so it won't be closed again later
565 ctx
->pipes
.stdin
[1] = 0;
574 static int pakfire_jail_setup_pipe(struct pakfire_jail
* jail
, int (*fds
)[2], const int flags
) {
575 int r
= pipe2(*fds
, flags
);
577 ERROR(jail
->pakfire
, "Could not setup pipe: %m\n");
584 static void pakfire_jail_close_pipe(struct pakfire_jail
* jail
, int fds
[2]) {
585 for (unsigned int i
= 0; i
< 2; i
++)
591 This is a convenience function to fetch the reading end of a pipe and
592 closes the write end.
594 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail
* jail
, int (*fds
)[2]) {
595 // Give the variables easier names to avoid confusion
596 int* fd_read
= &(*fds
)[0];
597 int* fd_write
= &(*fds
)[1];
599 // Close the write end of the pipe
605 // Return the read end
609 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail
* jail
, int (*fds
)[2]) {
610 // Give the variables easier names to avoid confusion
611 int* fd_read
= &(*fds
)[0];
612 int* fd_write
= &(*fds
)[1];
614 // Close the read end of the pipe
620 // Return the write end
624 static int pakfire_jail_wait(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
626 struct epoll_event ev
;
627 struct epoll_event events
[EPOLL_MAX_EVENTS
];
630 // Fetch file descriptors from context
631 const int stdin
= pakfire_jail_get_pipe_to_write(jail
, &ctx
->pipes
.stdin
);
632 const int stdout
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.stdout
);
633 const int stderr
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.stderr
);
634 const int pidfd
= ctx
->pidfd
;
637 const int log_INFO
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.log_INFO
);
638 const int log_ERROR
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.log_ERROR
);
639 const int log_DEBUG
= pakfire_jail_get_pipe_to_read(jail
, &ctx
->pipes
.log_DEBUG
);
641 // Make a list of all file descriptors we are interested in
643 stdin
, stdout
, stderr
, pidfd
, log_INFO
, log_ERROR
, log_DEBUG
,
647 epollfd
= epoll_create1(0);
649 ERROR(jail
->pakfire
, "Could not initialize epoll(): %m\n");
654 // Turn file descriptors into non-blocking mode and add them to epoll()
655 for (unsigned int i
= 0; i
< sizeof(fds
) / sizeof(*fds
); i
++) {
658 // Skip fds which were not initialized
662 ev
.events
= EPOLLHUP
;
665 ev
.events
|= EPOLLOUT
;
667 ev
.events
|= EPOLLIN
;
670 int flags
= fcntl(fd
, F_GETFL
, 0);
672 // Set modified flags
673 if (fcntl(fd
, F_SETFL
, flags
|O_NONBLOCK
) < 0) {
675 "Could not set file descriptor %d into non-blocking mode: %m\n", fd
);
682 if (epoll_ctl(epollfd
, EPOLL_CTL_ADD
, fd
, &ev
) < 0) {
683 ERROR(jail
->pakfire
, "Could not add file descriptor %d to epoll(): %m\n", fd
);
691 // Loop for as long as the process is alive
693 int num
= epoll_wait(epollfd
, events
, EPOLL_MAX_EVENTS
, -1);
695 // Ignore if epoll_wait() has been interrupted
699 ERROR(jail
->pakfire
, "epoll_wait() failed: %m\n");
705 for (int i
= 0; i
< num
; i
++) {
706 int e
= events
[i
].events
;
707 int fd
= events
[i
].data
.fd
;
709 struct pakfire_log_buffer
* buffer
= NULL
;
710 pakfire_jail_communicate_out callback
= NULL
;
714 // Check if there is any data to be read
716 // Handle any changes to the PIDFD
718 // Call waidid() and store the result
719 r
= waitid(P_PIDFD
, ctx
->pidfd
, &ctx
->status
, WEXITED
);
721 ERROR(jail
->pakfire
, "waitid() failed: %m\n");
725 // Mark that we have ended so that we will process the remaining
726 // events from epoll() now, but won't restart the outer loop.
730 // Handle logging messages
731 } else if (fd
== log_INFO
) {
732 buffer
= &ctx
->buffers
.log_INFO
;
735 callback
= pakfire_jail_default_log_callback
;
737 } else if (fd
== log_ERROR
) {
738 buffer
= &ctx
->buffers
.log_ERROR
;
741 callback
= pakfire_jail_default_log_callback
;
743 } else if (fd
== log_DEBUG
) {
744 buffer
= &ctx
->buffers
.log_DEBUG
;
745 priority
= LOG_DEBUG
;
747 callback
= pakfire_jail_default_log_callback
;
749 // Handle anything from the log pipes
750 } else if (fd
== stdout
) {
751 buffer
= &ctx
->buffers
.stdout
;
754 callback
= ctx
->communicate
.out
;
755 data
= ctx
->communicate
.data
;
757 } else if (fd
== stderr
) {
758 buffer
= &ctx
->buffers
.stderr
;
761 callback
= ctx
->communicate
.out
;
762 data
= ctx
->communicate
.data
;
765 DEBUG(jail
->pakfire
, "Received invalid file descriptor %d\n", fd
);
770 r
= pakfire_jail_handle_log(jail
, ctx
, priority
, fd
, buffer
, callback
, data
);
776 // Handle standard input
778 r
= pakfire_jail_stream_stdin(jail
, ctx
, fd
);
781 // Ignore if we filled up the buffer
786 ERROR(jail
->pakfire
, "Could not write to stdin: %m\n");
793 // Check if any file descriptors have been closed
795 // Remove the file descriptor
796 r
= epoll_ctl(epollfd
, EPOLL_CTL_DEL
, fd
, NULL
);
798 ERROR(jail
->pakfire
, "Could not remove closed file-descriptor %d: %m\n", fd
);
812 int pakfire_jail_capture_stdout(struct pakfire
* pakfire
, void* data
,
813 int priority
, const char* line
, size_t length
) {
814 char** output
= (char**)data
;
817 // Append everything from stdout to a buffer
818 if (output
&& priority
== LOG_INFO
) {
819 r
= asprintf(output
, "%s%s", (output
&& *output
) ? *output
: "", line
);
825 // Send everything else to the default logger
826 return pakfire_jail_default_log_callback(pakfire
, NULL
, priority
, line
, length
);
831 static int pakfire_jail_drop_capabilities(struct pakfire_jail
* jail
) {
832 const int capabilities
[] = {
833 // Deny access to the kernel's audit system
838 // Deny suspending block devices
841 // Deny any stuff with BPF
844 // Deny checkpoint restore
845 CAP_CHECKPOINT_RESTORE
,
847 // Deny opening files by inode number (open_by_handle_at)
850 // Deny setting SUID bits
853 // Deny locking more memory
856 // Deny modifying any Apparmor/SELinux/SMACK configuration
860 // Deny creating any special devices
863 // Deny setting any capabilities
866 // Deny reading from syslog
869 // Deny any admin actions (mount, sethostname, ...)
872 // Deny rebooting the system
875 // Deny loading kernel modules
878 // Deny setting nice level
881 // Deny access to /proc/kcore, /dev/mem, /dev/kmem
884 // Deny circumventing any resource limits
887 // Deny setting the system time
890 // Deny playing with suspend
896 DEBUG(jail
->pakfire
, "Dropping capabilities...\n");
901 // Drop any capabilities
902 for (const int* cap
= capabilities
; *cap
; cap
++) {
903 r
= prctl(PR_CAPBSET_DROP
, *cap
, 0, 0, 0);
905 ERROR(jail
->pakfire
, "Could not drop capability %d: %m\n", *cap
);
912 // Fetch any capabilities
913 cap_t caps
= cap_get_proc();
915 ERROR(jail
->pakfire
, "Could not read capabilities: %m\n");
920 Set inheritable capabilities
922 This ensures that no processes will be able to gain any of the listed
925 r
= cap_set_flag(caps
, CAP_INHERITABLE
, num_caps
, capabilities
, CAP_CLEAR
);
927 ERROR(jail
->pakfire
, "cap_set_flag() failed: %m\n");
931 // Restore capabilities
932 r
= cap_set_proc(caps
);
934 ERROR(jail
->pakfire
, "Could not restore capabilities: %m\n");
947 static int pakfire_jail_limit_syscalls(struct pakfire_jail
* jail
) {
948 const int syscalls
[] = {
949 // The kernel's keyring isn't namespaced
952 SCMP_SYS(request_key
),
954 // Disable userfaultfd
955 SCMP_SYS(userfaultfd
),
957 // Disable perf which could leak a lot of information about the host
958 SCMP_SYS(perf_event_open
),
964 DEBUG(jail
->pakfire
, "Applying syscall filter...\n");
966 // Setup a syscall filter which allows everything by default
967 scmp_filter_ctx ctx
= seccomp_init(SCMP_ACT_ALLOW
);
969 ERROR(jail
->pakfire
, "Could not setup seccomp filter: %m\n");
974 for (const int* syscall
= syscalls
; *syscall
; syscall
++) {
975 r
= seccomp_rule_add(ctx
, SCMP_ACT_ERRNO(EPERM
), *syscall
, 0);
977 ERROR(jail
->pakfire
, "Could not configure syscall %d: %m\n", *syscall
);
982 // Load syscall filter into the kernel
983 r
= seccomp_load(ctx
);
985 ERROR(jail
->pakfire
, "Could not load syscall filter into the kernel: %m\n");
991 seccomp_release(ctx
);
998 PAKFIRE_EXPORT
int pakfire_jail_bind(struct pakfire_jail
* jail
,
999 const char* source
, const char* target
, int flags
) {
1000 struct pakfire_jail_mountpoint
* mp
= NULL
;
1003 // Check if there is any space left
1004 if (jail
->num_mountpoints
>= MAX_MOUNTPOINTS
) {
1009 // Check for valid inputs
1010 if (!source
|| !target
) {
1015 // Select the next free slot
1016 mp
= &jail
->mountpoints
[jail
->num_mountpoints
];
1019 r
= pakfire_string_set(mp
->source
, source
);
1021 ERROR(jail
->pakfire
, "Could not copy source: %m\n");
1026 r
= pakfire_string_set(mp
->target
, target
);
1028 ERROR(jail
->pakfire
, "Could not copy target: %m\n");
1035 // Increment counter
1036 jail
->num_mountpoints
++;
1041 static int pakfire_jail_mount_networking(struct pakfire_jail
* jail
) {
1044 const char* paths
[] = {
1050 // Bind-mount all paths read-only
1051 for (const char** path
= paths
; *path
; path
++) {
1052 r
= pakfire_bind(jail
->pakfire
, *path
, NULL
, MS_RDONLY
);
1061 Mounts everything that we require in the new namespace
1063 static int pakfire_jail_mount(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
1064 struct pakfire_jail_mountpoint
* mp
= NULL
;
1067 // Mount all default stuff
1068 r
= pakfire_mount_all(jail
->pakfire
);
1072 // Mount networking stuff
1073 if (pakfire_jail_exec_has_flag(ctx
, PAKFIRE_JAIL_HAS_NETWORKING
)) {
1074 r
= pakfire_jail_mount_networking(jail
);
1079 // Mount all custom stuff
1080 for (unsigned int i
= 0; i
< jail
->num_mountpoints
; i
++) {
1082 mp
= &jail
->mountpoints
[i
];
1085 r
= pakfire_bind(jail
->pakfire
, mp
->source
, mp
->target
, mp
->flags
);
1090 // Log all mountpoints
1091 pakfire_mount_list(jail
->pakfire
);
1098 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail
* jail
, pid_t pid
) {
1099 char path
[PATH_MAX
];
1102 // Skip mapping anything when running on /
1103 if (pakfire_on_root(jail
->pakfire
))
1107 r
= pakfire_string_format(path
, "/proc/%d/uid_map", pid
);
1112 const uid_t uid
= pakfire_uid(jail
->pakfire
);
1115 const struct pakfire_subid
* subuid
= pakfire_subuid(jail
->pakfire
);
1119 /* When running as root, we will map the entire range.
1121 When running as a non-privileged user, we will map the root user inside the jail
1122 to the user's UID outside of the jail, and we will map the rest starting from one.
1127 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1128 "0 %lu %lu\n", subuid
->id
, subuid
->length
);
1130 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1131 "0 %lu 1\n1 %lu %lu\n", uid
, subuid
->id
, subuid
->length
);
1135 ERROR(jail
->pakfire
, "Could not map UIDs: %m\n");
1142 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail
* jail
, pid_t pid
) {
1143 char path
[PATH_MAX
];
1146 // Skip mapping anything when running on /
1147 if (pakfire_on_root(jail
->pakfire
))
1151 const gid_t gid
= pakfire_gid(jail
->pakfire
);
1154 const struct pakfire_subid
* subgid
= pakfire_subgid(jail
->pakfire
);
1159 r
= pakfire_string_format(path
, "/proc/%d/gid_map", pid
);
1165 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1166 "0 %lu %lu\n", subgid
->id
, subgid
->length
);
1168 r
= pakfire_file_write(jail
->pakfire
, path
, 0, 0, 0,
1169 "0 %lu 1\n%1 %lu %lu\n", gid
, subgid
->id
, subgid
->length
);
1173 ERROR(jail
->pakfire
, "Could not map GIDs: %m\n");
1180 static int pakfire_jail_setgroups(struct pakfire_jail
* jail
, pid_t pid
) {
1181 char path
[PATH_MAX
];
1185 r
= pakfire_string_format(path
, "/proc/%d/setgroups", pid
);
1189 // Open file for writing
1190 FILE* f
= fopen(path
, "w");
1192 ERROR(jail
->pakfire
, "Could not open %s for writing: %m\n", path
);
1197 int bytes_written
= fprintf(f
, "deny\n");
1198 if (bytes_written
<= 0) {
1199 ERROR(jail
->pakfire
, "Could not write to %s: %m\n", path
);
1206 ERROR(jail
->pakfire
, "Could not close %s: %m\n", path
);
1217 static int pakfire_jail_send_signal(struct pakfire_jail
* jail
, int fd
) {
1218 const uint64_t val
= 1;
1221 DEBUG(jail
->pakfire
, "Sending signal...\n");
1223 // Write to the file descriptor
1224 ssize_t bytes_written
= write(fd
, &val
, sizeof(val
));
1225 if (bytes_written
< 0 || (size_t)bytes_written
< sizeof(val
)) {
1226 ERROR(jail
->pakfire
, "Could not send signal: %m\n");
1230 // Close the file descriptor
1236 static int pakfire_jail_wait_for_signal(struct pakfire_jail
* jail
, int fd
) {
1240 DEBUG(jail
->pakfire
, "Waiting for signal...\n");
1242 ssize_t bytes_read
= read(fd
, &val
, sizeof(val
));
1243 if (bytes_read
< 0 || (size_t)bytes_read
< sizeof(val
)) {
1244 ERROR(jail
->pakfire
, "Error waiting for signal: %m\n");
1248 // Close the file descriptor
1255 Performs the initialisation that needs to happen in the parent part
1257 static int pakfire_jail_parent(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
) {
1260 // Setup UID mapping
1261 r
= pakfire_jail_setup_uid_mapping(jail
, ctx
->pid
);
1265 // Write "deny" to /proc/PID/setgroups
1266 r
= pakfire_jail_setgroups(jail
, ctx
->pid
);
1270 // Setup GID mapping
1271 r
= pakfire_jail_setup_gid_mapping(jail
, ctx
->pid
);
1275 // Parent has finished initialisation
1276 DEBUG(jail
->pakfire
, "Parent has finished initialization\n");
1278 // Send signal to client
1279 r
= pakfire_jail_send_signal(jail
, ctx
->completed_fd
);
1286 static int pakfire_jail_child(struct pakfire_jail
* jail
, struct pakfire_jail_exec
* ctx
,
1287 const char* argv
[]) {
1290 // Redirect any logging to our log pipe
1291 pakfire_set_log_callback(jail
->pakfire
, pakfire_jail_log
, &ctx
->pipes
);
1294 r
= prctl(PR_SET_PDEATHSIG
, SIGKILL
, 0, 0, 0);
1296 ERROR(jail
->pakfire
, "Could not configure to die with parent: %m\n");
1301 pid_t pid
= getpid();
1303 DEBUG(jail
->pakfire
, "Launched child process in jail with PID %d\n", pid
);
1305 // Wait for the parent to finish initialization
1306 r
= pakfire_jail_wait_for_signal(jail
, ctx
->completed_fd
);
1310 // Perform further initialization
1313 uid_t uid
= getuid();
1314 gid_t gid
= getgid();
1317 uid_t euid
= geteuid();
1318 gid_t egid
= getegid();
1320 DEBUG(jail
->pakfire
, " UID: %d (effective %d)\n", uid
, euid
);
1321 DEBUG(jail
->pakfire
, " GID: %d (effective %d)\n", gid
, egid
);
1323 // Check if we are (effectively running as root)
1324 if (uid
|| gid
|| euid
|| egid
) {
1325 ERROR(jail
->pakfire
, "Child process is not running as root\n");
1329 const char* root
= pakfire_get_path(jail
->pakfire
);
1330 const char* arch
= pakfire_get_arch(jail
->pakfire
);
1332 // Change root (unless root is /)
1333 if (!pakfire_on_root(jail
->pakfire
)) {
1335 r
= pakfire_jail_mount(jail
, ctx
);
1342 ERROR(jail
->pakfire
, "chroot() to %s failed: %m\n", root
);
1346 // Change directory to /
1349 ERROR(jail
->pakfire
, "chdir() after chroot() failed: %m\n");
1355 unsigned long persona
= pakfire_arch_personality(arch
);
1357 r
= personality(persona
);
1359 ERROR(jail
->pakfire
, "Could not set personality (%x)\n", (unsigned int)persona
);
1366 DEBUG(jail
->pakfire
, "Setting nice level to %d\n", jail
->nice
);
1368 r
= setpriority(PRIO_PROCESS
, pid
, jail
->nice
);
1370 ERROR(jail
->pakfire
, "Could not set nice level: %m\n");
1375 // Close other end of log pipes
1376 close(ctx
->pipes
.log_INFO
[0]);
1377 close(ctx
->pipes
.log_ERROR
[0]);
1379 close(ctx
->pipes
.log_DEBUG
[0]);
1380 #endif /* ENABLE_DEBUG */
1382 // Connect standard input
1383 if (ctx
->pipes
.stdin
[0]) {
1384 r
= dup2(ctx
->pipes
.stdin
[0], STDIN_FILENO
);
1386 ERROR(jail
->pakfire
, "Could not connect fd %d to stdin: %m\n",
1387 ctx
->pipes
.stdin
[0]);
1393 // Connect standard output and error
1394 if (ctx
->pipes
.stdout
[1] && ctx
->pipes
.stderr
[1]) {
1395 r
= dup2(ctx
->pipes
.stdout
[1], STDOUT_FILENO
);
1397 ERROR(jail
->pakfire
, "Could not connect fd %d to stdout: %m\n",
1398 ctx
->pipes
.stdout
[1]);
1403 r
= dup2(ctx
->pipes
.stderr
[1], STDERR_FILENO
);
1405 ERROR(jail
->pakfire
, "Could not connect fd %d to stderr: %m\n",
1406 ctx
->pipes
.stderr
[1]);
1411 // Close the pipe (as we have moved the original file descriptors)
1412 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stdin
);
1413 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stdout
);
1414 pakfire_jail_close_pipe(jail
, ctx
->pipes
.stderr
);
1417 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1418 r
= pakfire_rlimit_reset_nofile(jail
->pakfire
);
1422 // Drop capabilities
1423 r
= pakfire_jail_drop_capabilities(jail
);
1428 r
= pakfire_jail_limit_syscalls(jail
);
1432 DEBUG(jail
->pakfire
, "Child process initialization done\n");
1433 DEBUG(jail
->pakfire
, "Launching command:\n");
1436 for (unsigned int i
= 0; argv
[i
]; i
++)
1437 DEBUG(jail
->pakfire
, " argv[%d] = %s\n", i
, argv
[i
]);
1440 r
= execvpe(argv
[0], (char**)argv
, jail
->env
);
1442 ERROR(jail
->pakfire
, "Could not execve(): %m\n");
1444 // Translate errno into regular exit code
1454 // We should not get here
1458 // Run a command in the jail
1459 static int __pakfire_jail_exec(struct pakfire_jail
* jail
, const char* argv
[],
1460 const int interactive
,
1461 pakfire_jail_communicate_in communicate_in
,
1462 pakfire_jail_communicate_out communicate_out
,
1467 // Check if argv is valid
1468 if (!argv
|| !argv
[0]) {
1473 // Send any output to the default logger if no callback is set
1474 if (!communicate_out
)
1475 communicate_out
= pakfire_jail_default_log_callback
;
1477 // Initialize context for this call
1478 struct pakfire_jail_exec ctx
= {
1488 .in
= communicate_in
,
1489 .out
= communicate_out
,
1494 DEBUG(jail
->pakfire
, "Executing jail...\n");
1496 // Enable networking in interactive mode
1498 ctx
.flags
|= PAKFIRE_JAIL_HAS_NETWORKING
;
1501 Setup a file descriptor which can be used to notify the client that the parent
1502 has completed configuration.
1504 ctx
.completed_fd
= eventfd(0, EFD_CLOEXEC
);
1505 if (ctx
.completed_fd
< 0) {
1506 ERROR(jail
->pakfire
, "eventfd() failed: %m\n");
1510 // Create pipes to communicate with child process if we are not running interactively
1512 // stdin (only if callback is set)
1513 if (ctx
.communicate
.in
) {
1514 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stdin
, 0);
1520 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stdout
, 0);
1525 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.stderr
, 0);
1530 // Setup pipes for logging
1532 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_INFO
, O_CLOEXEC
);
1537 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_ERROR
, O_CLOEXEC
);
1543 r
= pakfire_jail_setup_pipe(jail
, &ctx
.pipes
.log_DEBUG
, O_CLOEXEC
);
1546 #endif /* ENABLE_DEBUG */
1548 // Configure child process
1549 struct clone_args args
= {
1558 .exit_signal
= SIGCHLD
,
1559 .pidfd
= (long long unsigned int)&ctx
.pidfd
,
1562 // Launch the process in a cgroup that is a leaf of the configured cgroup
1564 args
.flags
|= CLONE_INTO_CGROUP
;
1567 const char* uuid
= pakfire_jail_uuid(jail
);
1569 // Create a temporary cgroup
1570 r
= pakfire_cgroup_child(&ctx
.cgroup
, jail
->cgroup
, uuid
, 0);
1572 ERROR(jail
->pakfire
, "Could not create cgroup for jail: %m\n");
1576 // Clone into this cgroup
1577 args
.cgroup
= pakfire_cgroup_fd(ctx
.cgroup
);
1581 if (!pakfire_jail_exec_has_flag(&ctx
, PAKFIRE_JAIL_HAS_NETWORKING
)) {
1582 args
.flags
|= CLONE_NEWNET
;
1585 // Fork this process
1586 ctx
.pid
= clone3(&args
, sizeof(args
));
1588 ERROR(jail
->pakfire
, "Could not clone: %m\n");
1592 } else if (ctx
.pid
== 0) {
1593 r
= pakfire_jail_child(jail
, &ctx
, argv
);
1598 r
= pakfire_jail_parent(jail
, &ctx
);
1602 DEBUG(jail
->pakfire
, "Waiting for PID %d to finish its work\n", ctx
.pid
);
1604 // Read output of the child process
1605 r
= pakfire_jail_wait(jail
, &ctx
);
1609 // Handle exit status
1610 switch (ctx
.status
.si_code
) {
1612 DEBUG(jail
->pakfire
, "The child process exited with code %d\n",
1613 ctx
.status
.si_status
);
1616 exit
= ctx
.status
.si_status
;
1620 ERROR(jail
->pakfire
, "The child process was killed\n");
1625 ERROR(jail
->pakfire
, "The child process terminated abnormally\n");
1628 // Log anything else
1630 ERROR(jail
->pakfire
, "Unknown child exit code: %d\n", ctx
.status
.si_code
);
1635 // Destroy the temporary cgroup (if any)
1637 // Read cgroup stats
1638 r
= pakfire_cgroup_stat(ctx
.cgroup
, &ctx
.cgroup_stats
);
1640 ERROR(jail
->pakfire
, "Could not read cgroup stats: %m\n");
1642 pakfire_cgroup_stat_dump(ctx
.cgroup
, &ctx
.cgroup_stats
);
1645 pakfire_cgroup_destroy(ctx
.cgroup
);
1646 pakfire_cgroup_unref(ctx
.cgroup
);
1649 // Close any file descriptors
1650 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stdin
);
1651 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stdout
);
1652 pakfire_jail_close_pipe(jail
, ctx
.pipes
.stderr
);
1655 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_INFO
);
1656 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_ERROR
);
1657 pakfire_jail_close_pipe(jail
, ctx
.pipes
.log_DEBUG
);
1662 PAKFIRE_EXPORT
int pakfire_jail_exec(
1663 struct pakfire_jail
* jail
,
1665 pakfire_jail_communicate_in callback_in
,
1666 pakfire_jail_communicate_out callback_out
,
1668 return __pakfire_jail_exec(jail
, argv
, 0, callback_in
, callback_out
, data
);
1671 static int pakfire_jail_exec_interactive(
1672 struct pakfire_jail
* jail
, const char* argv
[]) {
1675 // Setup interactive stuff
1676 r
= pakfire_jail_setup_interactive_env(jail
);
1680 return __pakfire_jail_exec(jail
, argv
, 1, NULL
, NULL
, NULL
);
1683 int pakfire_jail_exec_script(struct pakfire_jail
* jail
,
1687 pakfire_jail_communicate_in callback_in
,
1688 pakfire_jail_communicate_out callback_out
,
1690 char path
[PATH_MAX
];
1691 const char** argv
= NULL
;
1695 const char* root
= pakfire_get_path(jail
->pakfire
);
1697 // Write the scriptlet to disk
1698 r
= pakfire_path_join(path
, root
, PAKFIRE_TMP_DIR
"/pakfire-script.XXXXXX");
1702 // Create a temporary file
1703 f
= pakfire_mktemp(path
, 0700);
1705 ERROR(jail
->pakfire
, "Could not create temporary file: %m\n");
1709 DEBUG(jail
->pakfire
, "Writing script to %s:\n%.*s\n", path
, (int)size
, script
);
1712 r
= fprintf(f
, "%s", script
);
1714 ERROR(jail
->pakfire
, "Could not write script to file %s: %m\n", path
);
1721 ERROR(jail
->pakfire
, "Could not close script file %s: %m\n", path
);
1727 // Count how many arguments were passed
1728 unsigned int argc
= 1;
1730 for (const char** arg
= args
; *arg
; arg
++)
1734 argv
= calloc(argc
+ 1, sizeof(*argv
));
1736 ERROR(jail
->pakfire
, "Could not allocate argv: %m\n");
1741 argv
[0] = (root
) ? pakfire_path_relpath(root
, path
) : path
;
1744 for (unsigned int i
= 1; i
< argc
; i
++)
1745 argv
[i
] = args
[i
-1];
1748 r
= pakfire_jail_exec(jail
, argv
, callback_in
, callback_out
, data
);
1756 // Remove script from disk
1764 A convenience function that creates a new jail, runs the given command and destroys
1767 int pakfire_jail_run(struct pakfire
* pakfire
, const char* argv
[], int flags
, char** output
) {
1768 struct pakfire_jail
* jail
= NULL
;
1771 // Create a new jail
1772 r
= pakfire_jail_create(&jail
, pakfire
, flags
);
1776 // Execute the command
1777 r
= pakfire_jail_exec(jail
, argv
, NULL
, pakfire_jail_capture_stdout
, output
);
1781 pakfire_jail_unref(jail
);
1786 int pakfire_jail_run_script(struct pakfire
* pakfire
,
1787 const char* script
, const size_t length
, const char* argv
[], int flags
) {
1788 struct pakfire_jail
* jail
= NULL
;
1791 // Create a new jail
1792 r
= pakfire_jail_create(&jail
, pakfire
, flags
);
1796 // Execute the command
1797 r
= pakfire_jail_exec_script(jail
, script
, length
, argv
, NULL
, NULL
, NULL
);
1801 pakfire_jail_unref(jail
);
1806 int pakfire_jail_shell(struct pakfire_jail
* jail
) {
1807 const char* argv
[] = {
1808 "/bin/bash", "--login", NULL
,
1811 // Execute /bin/bash
1812 return pakfire_jail_exec_interactive(jail
, argv
);
1815 int pakfire_jail_ldconfig(struct pakfire
* pakfire
) {
1816 char path
[PATH_MAX
];
1818 const char* ldconfig
= "/sbin/ldconfig";
1820 // Check if ldconfig exists before calling it to avoid overhead
1821 int r
= pakfire_path(pakfire
, path
, "%s", ldconfig
);
1825 // Check if ldconfig is executable
1826 r
= access(path
, X_OK
);
1828 DEBUG(pakfire
, "%s is not executable. Skipping...\n", ldconfig
);
1832 const char* argv
[] = {
1837 return pakfire_jail_run(pakfire
, argv
, 0, NULL
);