]> git.ipfire.org Git - people/ms/pakfire.git/blob - src/libpakfire/jail.c
jail: Enable all QEMU CPU features by default
[people/ms/pakfire.git] / src / libpakfire / jail.c
1 /*#############################################################################
2 # #
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
5 # #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
10 # #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
15 # #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
18 # #
19 #############################################################################*/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <linux/capability.h>
24 #include <linux/sched.h>
25 #include <sys/wait.h>
26 #include <linux/wait.h>
27 #include <sched.h>
28 #include <signal.h>
29 #include <stdlib.h>
30 #include <syscall.h>
31 #include <sys/capability.h>
32 #include <sys/epoll.h>
33 #include <sys/eventfd.h>
34 #include <sys/mount.h>
35 #include <sys/personality.h>
36 #include <sys/prctl.h>
37 #include <sys/resource.h>
38 #include <sys/timerfd.h>
39 #include <sys/types.h>
40 #include <sys/wait.h>
41
42 // libseccomp
43 #include <seccomp.h>
44
45 // libuuid
46 #include <uuid.h>
47
48 #include <pakfire/arch.h>
49 #include <pakfire/cgroup.h>
50 #include <pakfire/jail.h>
51 #include <pakfire/logging.h>
52 #include <pakfire/mount.h>
53 #include <pakfire/pakfire.h>
54 #include <pakfire/private.h>
55 #include <pakfire/pwd.h>
56 #include <pakfire/string.h>
57 #include <pakfire/util.h>
58
59 #define BUFFER_SIZE 1024 * 64
60 #define ENVIRON_SIZE 128
61 #define EPOLL_MAX_EVENTS 2
62 #define MAX_MOUNTPOINTS 8
63
64 // The default environment that will be set for every command
65 static const struct environ {
66 const char* key;
67 const char* val;
68 } ENV[] = {
69 { "HOME", "/root" },
70 { "LANG", "C.utf-8" },
71 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
72 { "TERM", "vt100" },
73
74 // Tell everything that it is running inside a Pakfire container
75 { "container", "pakfire" },
76 { NULL, NULL },
77 };
78
79 struct pakfire_jail_mountpoint {
80 char source[PATH_MAX];
81 char target[PATH_MAX];
82 int flags;
83 };
84
85 struct pakfire_jail {
86 struct pakfire* pakfire;
87 int nrefs;
88
89 // A unique ID for each jail
90 uuid_t uuid;
91 char __uuid[UUID_STR_LEN];
92
93 // Flags
94 int flags;
95
96 // Resource Limits
97 int nice;
98
99 // Timeout
100 struct itimerspec timeout;
101
102 // CGroup
103 struct pakfire_cgroup* cgroup;
104
105 // Environment
106 char* env[ENVIRON_SIZE];
107
108 // Mountpoints
109 struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
110 unsigned int num_mountpoints;
111 };
112
113 struct pakfire_log_buffer {
114 char data[BUFFER_SIZE];
115 size_t used;
116 };
117
118 enum pakfire_jail_exec_flags {
119 PAKFIRE_JAIL_HAS_NETWORKING = (1 << 0),
120 };
121
122 struct pakfire_jail_exec {
123 int flags;
124
125 // PID (of the child)
126 pid_t pid;
127 int pidfd;
128
129 // Process status (from waitid)
130 siginfo_t status;
131
132 // FD to notify the client that the parent has finished initialization
133 int completed_fd;
134
135 // Log pipes
136 struct pakfire_jail_pipes {
137 int stdin[2];
138 int stdout[2];
139 int stderr[2];
140
141 // Logging
142 int log_INFO[2];
143 int log_ERROR[2];
144 int log_DEBUG[2];
145 } pipes;
146
147 // Communicate
148 struct pakfire_jail_communicate {
149 pakfire_jail_communicate_in in;
150 pakfire_jail_communicate_out out;
151 void* data;
152 } communicate;
153
154 // Log buffers
155 struct pakfire_jail_buffers {
156 struct pakfire_log_buffer stdout;
157 struct pakfire_log_buffer stderr;
158
159 // Logging
160 struct pakfire_log_buffer log_INFO;
161 struct pakfire_log_buffer log_ERROR;
162 struct pakfire_log_buffer log_DEBUG;
163 } buffers;
164
165 struct pakfire_cgroup* cgroup;
166 struct pakfire_cgroup_stats cgroup_stats;
167 };
168
169 static int clone3(struct clone_args* args, size_t size) {
170 return syscall(__NR_clone3, args, size);
171 }
172
173 static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
174 return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
175 }
176
177 static int pakfire_jail_exec_has_flag(
178 const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
179 return ctx->flags & flag;
180 }
181
182 static void pakfire_jail_free(struct pakfire_jail* jail) {
183 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
184
185 // Free environment
186 for (unsigned int i = 0; jail->env[i]; i++)
187 free(jail->env[i]);
188
189 if (jail->cgroup)
190 pakfire_cgroup_unref(jail->cgroup);
191
192 pakfire_unref(jail->pakfire);
193 free(jail);
194 }
195
196 /*
197 Passes any log messages on to the default pakfire log callback
198 */
199 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
200 int priority, const char* line, size_t length) {
201 switch (priority) {
202 case LOG_INFO:
203 INFO(pakfire, "%s", line);
204 break;
205
206 case LOG_ERR:
207 ERROR(pakfire, "%s", line);
208 break;
209
210 #ifdef ENABLE_DEBUG
211 case LOG_DEBUG:
212 DEBUG(pakfire, "%s", line);
213 break;
214 #endif
215 }
216
217 return 0;
218 }
219
220 static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
221 if (!*jail->__uuid)
222 uuid_unparse_lower(jail->uuid, jail->__uuid);
223
224 return jail->__uuid;
225 }
226
227 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
228 // Set PS1
229 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
230 if (r)
231 return r;
232
233 // Copy TERM
234 char* TERM = secure_getenv("TERM");
235 if (TERM) {
236 r = pakfire_jail_set_env(jail, "TERM", TERM);
237 if (r)
238 return r;
239 }
240
241 // Copy LANG
242 char* LANG = secure_getenv("LANG");
243 if (LANG) {
244 r = pakfire_jail_set_env(jail, "LANG", LANG);
245 if (r)
246 return r;
247 }
248
249 return 0;
250 }
251
252 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail,
253 struct pakfire* pakfire, int flags) {
254 int r;
255
256 const char* arch = pakfire_get_arch(pakfire);
257
258 // Allocate a new jail
259 struct pakfire_jail* j = calloc(1, sizeof(*j));
260 if (!j)
261 return 1;
262
263 // Reference Pakfire
264 j->pakfire = pakfire_ref(pakfire);
265
266 // Initialize reference counter
267 j->nrefs = 1;
268
269 // Store flags
270 j->flags = flags;
271
272 // Generate a random UUID
273 uuid_generate_random(j->uuid);
274
275 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
276
277 // Set default environment
278 for (const struct environ* e = ENV; e->key; e++) {
279 r = pakfire_jail_set_env(j, e->key, e->val);
280 if (r)
281 goto ERROR;
282 }
283
284 // Enable all CPU features that CPU has to offer
285 if (!pakfire_arch_supported_by_host(arch)) {
286 r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
287 if (r)
288 goto ERROR;
289 }
290
291 // Set container UUID
292 r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
293 if (r)
294 goto ERROR;
295
296 // Disable systemctl to talk to systemd
297 if (!pakfire_on_root(j->pakfire)) {
298 r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
299 if (r)
300 goto ERROR;
301 }
302
303 // Done
304 *jail = j;
305 return 0;
306
307 ERROR:
308 pakfire_jail_free(j);
309
310 return r;
311 }
312
313 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
314 ++jail->nrefs;
315
316 return jail;
317 }
318
319 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
320 if (--jail->nrefs > 0)
321 return jail;
322
323 pakfire_jail_free(jail);
324 return NULL;
325 }
326
327 // Resource Limits
328
329 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
330 // Check if nice level is in range
331 if (nice < -19 || nice > 20) {
332 errno = EINVAL;
333 return 1;
334 }
335
336 // Store nice level
337 jail->nice = nice;
338
339 return 0;
340 }
341
342 int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
343 // Free any previous cgroup
344 if (jail->cgroup) {
345 pakfire_cgroup_unref(jail->cgroup);
346 jail->cgroup = NULL;
347 }
348
349 // Set any new cgroup
350 if (cgroup) {
351 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
352
353 jail->cgroup = pakfire_cgroup_ref(cgroup);
354 }
355
356 // Done
357 return 0;
358 }
359
360 // Environment
361
362 // Returns the length of the environment
363 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
364 unsigned int i = 0;
365
366 // Count everything in the environment
367 for (char** e = jail->env; *e; e++)
368 i++;
369
370 return i;
371 }
372
373 // Finds an existing environment variable and returns its index or -1 if not found
374 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
375 if (!key) {
376 errno = EINVAL;
377 return -1;
378 }
379
380 char buffer[strlen(key) + 2];
381 pakfire_string_format(buffer, "%s=", key);
382
383 for (unsigned int i = 0; jail->env[i]; i++) {
384 if (pakfire_string_startswith(jail->env[i], buffer))
385 return i;
386 }
387
388 // Nothing found
389 return -1;
390 }
391
392 // Returns the value of an environment variable or NULL
393 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
394 const char* key) {
395 int i = pakfire_jail_find_env(jail, key);
396 if (i < 0)
397 return NULL;
398
399 return jail->env[i] + strlen(key) + 1;
400 }
401
402 // Sets an environment variable
403 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
404 const char* key, const char* value) {
405 // Find the index where to write this value to
406 int i = pakfire_jail_find_env(jail, key);
407 if (i < 0)
408 i = pakfire_jail_env_length(jail);
409
410 // Return -ENOSPC when the environment is full
411 if (i >= ENVIRON_SIZE) {
412 errno = ENOSPC;
413 return -1;
414 }
415
416 // Free any previous value
417 if (jail->env[i])
418 free(jail->env[i]);
419
420 // Format and set environment variable
421 asprintf(&jail->env[i], "%s=%s", key, value);
422
423 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
424
425 return 0;
426 }
427
428 // Imports an environment
429 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
430 if (!env)
431 return 0;
432
433 char* key;
434 char* val;
435 int r;
436
437 // Copy environment variables
438 for (unsigned int i = 0; env[i]; i++) {
439 r = pakfire_string_partition(env[i], "=", &key, &val);
440 if (r)
441 continue;
442
443 // Set value
444 r = pakfire_jail_set_env(jail, key, val);
445
446 if (key)
447 free(key);
448 if (val)
449 free(val);
450
451 // Break on error
452 if (r)
453 return r;
454 }
455
456 return 0;
457 }
458
459 // Timeout
460
461 PAKFIRE_EXPORT int pakfire_jail_set_timeout(
462 struct pakfire_jail* jail, unsigned int timeout) {
463 // Store value
464 jail->timeout.it_value.tv_sec = timeout;
465
466 if (timeout > 0)
467 DEBUG(jail->pakfire, "Timeout set to %d second(s)\n", timeout);
468 else
469 DEBUG(jail->pakfire, "Timeout disabled\n");
470
471 return 0;
472 }
473
474 static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
475 int r;
476
477 // Nothing to do if no timeout has been set
478 if (!jail->timeout.it_value.tv_sec)
479 return -1;
480
481 // Create a new timer
482 const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
483 if (fd < 0) {
484 ERROR(jail->pakfire, "Could not create timer: %m\n");
485 goto ERROR;
486 }
487
488 // Arm timer
489 r = timerfd_settime(fd, 0, &jail->timeout, NULL);
490 if (r) {
491 ERROR(jail->pakfire, "Could not arm timer: %m\n");
492 goto ERROR;
493 }
494
495 return fd;
496
497 ERROR:
498 if (fd > 0)
499 close(fd);
500
501 return -1;
502 }
503
504 /*
505 This function replaces any logging in the child process.
506
507 All log messages will be sent to the parent process through their respective pipes.
508 */
509 static void pakfire_jail_log(void* data, int priority, const char* file,
510 int line, const char* fn, const char* format, va_list args) {
511 struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
512 int fd;
513
514 switch (priority) {
515 case LOG_INFO:
516 fd = pipes->log_INFO[1];
517 break;
518
519 case LOG_ERR:
520 fd = pipes->log_ERROR[1];
521 break;
522
523 #ifdef ENABLE_DEBUG
524 case LOG_DEBUG:
525 fd = pipes->log_DEBUG[1];
526 break;
527 #endif /* ENABLE_DEBUG */
528
529 // Ignore any messages of an unknown priority
530 default:
531 return;
532 }
533
534 // Send the log message
535 if (fd)
536 vdprintf(fd, format, args);
537 }
538
539 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
540 return (sizeof(buffer->data) == buffer->used);
541 }
542
543 /*
544 This function reads as much data as it can from the file descriptor.
545 If it finds a whole line in it, it will send it to the logger and repeat the process.
546 If not newline character is found, it will try to read more data until it finds one.
547 */
548 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
549 struct pakfire_jail_exec* ctx, int priority, int fd,
550 struct pakfire_log_buffer* buffer, pakfire_jail_communicate_out callback, void* data) {
551 char line[BUFFER_SIZE + 1];
552
553 // Fill up buffer from fd
554 if (buffer->used < sizeof(buffer->data)) {
555 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
556 sizeof(buffer->data) - buffer->used);
557
558 // Handle errors
559 if (bytes_read < 0) {
560 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
561 return -1;
562 }
563
564 // Update buffer size
565 buffer->used += bytes_read;
566 }
567
568 // See if we have any lines that we can write
569 while (buffer->used) {
570 // Search for the end of the first line
571 char* eol = memchr(buffer->data, '\n', buffer->used);
572
573 // No newline found
574 if (!eol) {
575 // If the buffer is full, we send the content to the logger and try again
576 // This should not happen in practise
577 if (pakfire_jail_log_buffer_is_full(buffer)) {
578 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
579
580 eol = buffer->data + sizeof(buffer->data) - 1;
581
582 // Otherwise we might have only read parts of the output
583 } else
584 break;
585 }
586
587 // Find the length of the string
588 size_t length = eol - buffer->data + 1;
589
590 // Copy the line into the buffer
591 memcpy(line, buffer->data, length);
592
593 // Terminate the string
594 line[length] = '\0';
595
596 // Log the line
597 if (callback) {
598 int r = callback(jail->pakfire, data, priority, line, length);
599 if (r) {
600 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
601 return r;
602 }
603 }
604
605 // Remove line from buffer
606 memmove(buffer->data, buffer->data + length, buffer->used - length);
607 buffer->used -= length;
608 }
609
610 return 0;
611 }
612
613 static int pakfire_jail_stream_stdin(struct pakfire_jail* jail,
614 struct pakfire_jail_exec* ctx, const int fd) {
615 int r;
616
617 // Nothing to do if there is no stdin callback set
618 if (!ctx->communicate.in) {
619 DEBUG(jail->pakfire, "Callback for standard input is not set\n");
620 return 0;
621 }
622
623 // Skip if the writing pipe has already been closed
624 if (!ctx->pipes.stdin[1])
625 return 0;
626
627 DEBUG(jail->pakfire, "Streaming standard input...\n");
628
629 // Calling the callback
630 r = ctx->communicate.in(jail->pakfire, ctx->communicate.data, fd);
631
632 DEBUG(jail->pakfire, "Standard input callback finished: %d\n", r);
633
634 // The callback signaled that it has written everything
635 if (r == EOF) {
636 DEBUG(jail->pakfire, "Closing standard input pipe\n");
637
638 // Close the file-descriptor
639 close(fd);
640
641 // Reset the file-descriptor so it won't be closed again later
642 ctx->pipes.stdin[1] = 0;
643
644 // Report success
645 r = 0;
646 }
647
648 return r;
649 }
650
651 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
652 int r = pipe2(*fds, flags);
653 if (r < 0) {
654 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
655 return 1;
656 }
657
658 return 0;
659 }
660
661 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
662 for (unsigned int i = 0; i < 2; i++)
663 if (fds[i])
664 close(fds[i]);
665 }
666
667 /*
668 This is a convenience function to fetch the reading end of a pipe and
669 closes the write end.
670 */
671 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail* jail, int (*fds)[2]) {
672 // Give the variables easier names to avoid confusion
673 int* fd_read = &(*fds)[0];
674 int* fd_write = &(*fds)[1];
675
676 // Close the write end of the pipe
677 if (*fd_write) {
678 close(*fd_write);
679 *fd_write = 0;
680 }
681
682 // Return the read end
683 return *fd_read;
684 }
685
686 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
687 // Give the variables easier names to avoid confusion
688 int* fd_read = &(*fds)[0];
689 int* fd_write = &(*fds)[1];
690
691 // Close the read end of the pipe
692 if (*fd_read) {
693 close(*fd_read);
694 *fd_read = 0;
695 }
696
697 // Return the write end
698 return *fd_write;
699 }
700
701 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
702 int epollfd = -1;
703 struct epoll_event ev;
704 struct epoll_event events[EPOLL_MAX_EVENTS];
705 char garbage[8];
706 int r = 0;
707
708 // Fetch file descriptors from context
709 const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
710 const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
711 const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
712 const int pidfd = ctx->pidfd;
713
714 // Timer
715 const int timerfd = pakfire_jail_create_timer(jail);
716
717 // Logging
718 const int log_INFO = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
719 const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
720 const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
721
722 // Make a list of all file descriptors we are interested in
723 int fds[] = {
724 stdin, stdout, stderr, pidfd, timerfd, log_INFO, log_ERROR, log_DEBUG,
725 };
726
727 // Setup epoll
728 epollfd = epoll_create1(0);
729 if (epollfd < 0) {
730 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
731 r = 1;
732 goto ERROR;
733 }
734
735 // Turn file descriptors into non-blocking mode and add them to epoll()
736 for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
737 int fd = fds[i];
738
739 // Skip fds which were not initialized
740 if (fd <= 0)
741 continue;
742
743 ev.events = EPOLLHUP;
744
745 if (fd == stdin)
746 ev.events |= EPOLLOUT;
747 else
748 ev.events |= EPOLLIN;
749
750 // Read flags
751 int flags = fcntl(fd, F_GETFL, 0);
752
753 // Set modified flags
754 if (fcntl(fd, F_SETFL, flags|O_NONBLOCK) < 0) {
755 ERROR(jail->pakfire,
756 "Could not set file descriptor %d into non-blocking mode: %m\n", fd);
757 r = 1;
758 goto ERROR;
759 }
760
761 ev.data.fd = fd;
762
763 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
764 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
765 r = 1;
766 goto ERROR;
767 }
768 }
769
770 int ended = 0;
771
772 // Loop for as long as the process is alive
773 while (!ended) {
774 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
775 if (num < 1) {
776 // Ignore if epoll_wait() has been interrupted
777 if (errno == EINTR)
778 continue;
779
780 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
781 r = 1;
782
783 goto ERROR;
784 }
785
786 for (int i = 0; i < num; i++) {
787 int e = events[i].events;
788 int fd = events[i].data.fd;
789
790 struct pakfire_log_buffer* buffer = NULL;
791 pakfire_jail_communicate_out callback = NULL;
792 void* data = NULL;
793 int priority;
794
795 // Check if there is any data to be read
796 if (e & EPOLLIN) {
797 // Handle any changes to the PIDFD
798 if (fd == pidfd) {
799 // Call waidid() and store the result
800 r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
801 if (r) {
802 ERROR(jail->pakfire, "waitid() failed: %m\n");
803 goto ERROR;
804 }
805
806 // Mark that we have ended so that we will process the remaining
807 // events from epoll() now, but won't restart the outer loop.
808 ended = 1;
809 continue;
810
811 // Handle timer events
812 } else if (fd == timerfd) {
813 DEBUG(jail->pakfire, "Timer event received\n");
814
815 // Disarm the timer
816 r = read(timerfd, garbage, sizeof(garbage));
817 if (r < 1) {
818 ERROR(jail->pakfire, "Could not disarm timer: %m\n");
819 r = 1;
820 goto ERROR;
821 }
822
823 // Terminate the process if it hasn't already ended
824 if (!ended) {
825 DEBUG(jail->pakfire, "Terminating process...\n");
826
827 // Send SIGTERM to the process
828 r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
829 if (r) {
830 ERROR(jail->pakfire, "Could not kill process: %m\n");
831 goto ERROR;
832 }
833 }
834
835 // There is nothing else to do
836 continue;
837
838 // Handle logging messages
839 } else if (fd == log_INFO) {
840 buffer = &ctx->buffers.log_INFO;
841 priority = LOG_INFO;
842
843 callback = pakfire_jail_default_log_callback;
844
845 } else if (fd == log_ERROR) {
846 buffer = &ctx->buffers.log_ERROR;
847 priority = LOG_ERR;
848
849 callback = pakfire_jail_default_log_callback;
850
851 } else if (fd == log_DEBUG) {
852 buffer = &ctx->buffers.log_DEBUG;
853 priority = LOG_DEBUG;
854
855 callback = pakfire_jail_default_log_callback;
856
857 // Handle anything from the log pipes
858 } else if (fd == stdout) {
859 buffer = &ctx->buffers.stdout;
860 priority = LOG_INFO;
861
862 callback = ctx->communicate.out;
863 data = ctx->communicate.data;
864
865 } else if (fd == stderr) {
866 buffer = &ctx->buffers.stderr;
867 priority = LOG_ERR;
868
869 callback = ctx->communicate.out;
870 data = ctx->communicate.data;
871
872 } else {
873 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
874 continue;
875 }
876
877 // Handle log event
878 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
879 if (r)
880 goto ERROR;
881 }
882
883 if (e & EPOLLOUT) {
884 // Handle standard input
885 if (fd == stdin) {
886 r = pakfire_jail_stream_stdin(jail, ctx, fd);
887 if (r) {
888 switch (errno) {
889 // Ignore if we filled up the buffer
890 case EAGAIN:
891 break;
892
893 default:
894 ERROR(jail->pakfire, "Could not write to stdin: %m\n");
895 goto ERROR;
896 }
897 }
898 }
899 }
900
901 // Check if any file descriptors have been closed
902 if (e & EPOLLHUP) {
903 // Remove the file descriptor
904 r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
905 if (r) {
906 ERROR(jail->pakfire, "Could not remove closed file-descriptor %d: %m\n", fd);
907 goto ERROR;
908 }
909 }
910 }
911 }
912
913 ERROR:
914 if (epollfd > 0)
915 close(epollfd);
916 if (timerfd > 0)
917 close(timerfd);
918
919 return r;
920 }
921
922 int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data,
923 int priority, const char* line, size_t length) {
924 char** output = (char**)data;
925 int r;
926
927 // Append everything from stdout to a buffer
928 if (output && priority == LOG_INFO) {
929 r = asprintf(output, "%s%s", (output && *output) ? *output : "", line);
930 if (r < 0)
931 return 1;
932 return 0;
933 }
934
935 // Send everything else to the default logger
936 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
937 }
938
939 // Capabilities
940
941 static int pakfire_jail_drop_capabilities(struct pakfire_jail* jail) {
942 const int capabilities[] = {
943 // Deny access to the kernel's audit system
944 CAP_AUDIT_CONTROL,
945 CAP_AUDIT_READ,
946 CAP_AUDIT_WRITE,
947
948 // Deny suspending block devices
949 CAP_BLOCK_SUSPEND,
950
951 // Deny any stuff with BPF
952 CAP_BPF,
953
954 // Deny checkpoint restore
955 CAP_CHECKPOINT_RESTORE,
956
957 // Deny opening files by inode number (open_by_handle_at)
958 CAP_DAC_READ_SEARCH,
959
960 // Deny setting SUID bits
961 CAP_FSETID,
962
963 // Deny locking more memory
964 CAP_IPC_LOCK,
965
966 // Deny modifying any Apparmor/SELinux/SMACK configuration
967 CAP_MAC_ADMIN,
968 CAP_MAC_OVERRIDE,
969
970 // Deny creating any special devices
971 CAP_MKNOD,
972
973 // Deny setting any capabilities
974 CAP_SETFCAP,
975
976 // Deny reading from syslog
977 CAP_SYSLOG,
978
979 // Deny any admin actions (mount, sethostname, ...)
980 CAP_SYS_ADMIN,
981
982 // Deny rebooting the system
983 CAP_SYS_BOOT,
984
985 // Deny loading kernel modules
986 CAP_SYS_MODULE,
987
988 // Deny setting nice level
989 CAP_SYS_NICE,
990
991 // Deny access to /proc/kcore, /dev/mem, /dev/kmem
992 CAP_SYS_RAWIO,
993
994 // Deny circumventing any resource limits
995 CAP_SYS_RESOURCE,
996
997 // Deny setting the system time
998 CAP_SYS_TIME,
999
1000 // Deny playing with suspend
1001 CAP_WAKE_ALARM,
1002
1003 0,
1004 };
1005
1006 DEBUG(jail->pakfire, "Dropping capabilities...\n");
1007
1008 size_t num_caps = 0;
1009 int r;
1010
1011 // Drop any capabilities
1012 for (const int* cap = capabilities; *cap; cap++) {
1013 r = prctl(PR_CAPBSET_DROP, *cap, 0, 0, 0);
1014 if (r) {
1015 ERROR(jail->pakfire, "Could not drop capability %d: %m\n", *cap);
1016 return r;
1017 }
1018
1019 num_caps++;
1020 }
1021
1022 // Fetch any capabilities
1023 cap_t caps = cap_get_proc();
1024 if (!caps) {
1025 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
1026 return 1;
1027 }
1028
1029 /*
1030 Set inheritable capabilities
1031
1032 This ensures that no processes will be able to gain any of the listed
1033 capabilities again.
1034 */
1035 r = cap_set_flag(caps, CAP_INHERITABLE, num_caps, capabilities, CAP_CLEAR);
1036 if (r) {
1037 ERROR(jail->pakfire, "cap_set_flag() failed: %m\n");
1038 goto ERROR;
1039 }
1040
1041 // Restore capabilities
1042 r = cap_set_proc(caps);
1043 if (r) {
1044 ERROR(jail->pakfire, "Could not restore capabilities: %m\n");
1045 goto ERROR;
1046 }
1047
1048 ERROR:
1049 if (caps)
1050 cap_free(caps);
1051
1052 return r;
1053 }
1054
1055 // Syscall Filter
1056
1057 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
1058 const int syscalls[] = {
1059 // The kernel's keyring isn't namespaced
1060 SCMP_SYS(keyctl),
1061 SCMP_SYS(add_key),
1062 SCMP_SYS(request_key),
1063
1064 // Disable userfaultfd
1065 SCMP_SYS(userfaultfd),
1066
1067 // Disable perf which could leak a lot of information about the host
1068 SCMP_SYS(perf_event_open),
1069
1070 0,
1071 };
1072 int r = 1;
1073
1074 DEBUG(jail->pakfire, "Applying syscall filter...\n");
1075
1076 // Setup a syscall filter which allows everything by default
1077 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
1078 if (!ctx) {
1079 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
1080 goto ERROR;
1081 }
1082
1083 // All all syscalls
1084 for (const int* syscall = syscalls; *syscall; syscall++) {
1085 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
1086 if (r) {
1087 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
1088 goto ERROR;
1089 }
1090 }
1091
1092 // Load syscall filter into the kernel
1093 r = seccomp_load(ctx);
1094 if (r) {
1095 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
1096 goto ERROR;
1097 }
1098
1099 ERROR:
1100 if (ctx)
1101 seccomp_release(ctx);
1102
1103 return r;
1104 }
1105
1106 // Mountpoints
1107
1108 PAKFIRE_EXPORT int pakfire_jail_bind(struct pakfire_jail* jail,
1109 const char* source, const char* target, int flags) {
1110 struct pakfire_jail_mountpoint* mp = NULL;
1111 int r;
1112
1113 // Check if there is any space left
1114 if (jail->num_mountpoints >= MAX_MOUNTPOINTS) {
1115 errno = ENOSPC;
1116 return 1;
1117 }
1118
1119 // Check for valid inputs
1120 if (!source || !target) {
1121 errno = EINVAL;
1122 return 1;
1123 }
1124
1125 // Select the next free slot
1126 mp = &jail->mountpoints[jail->num_mountpoints];
1127
1128 // Copy source
1129 r = pakfire_string_set(mp->source, source);
1130 if (r) {
1131 ERROR(jail->pakfire, "Could not copy source: %m\n");
1132 return r;
1133 }
1134
1135 // Copy target
1136 r = pakfire_string_set(mp->target, target);
1137 if (r) {
1138 ERROR(jail->pakfire, "Could not copy target: %m\n");
1139 return r;
1140 }
1141
1142 // Copy flags
1143 mp->flags = flags;
1144
1145 // Increment counter
1146 jail->num_mountpoints++;
1147
1148 return 0;
1149 }
1150
1151 static int pakfire_jail_mount_networking(struct pakfire_jail* jail) {
1152 int r;
1153
1154 const char* paths[] = {
1155 "/etc/hosts",
1156 "/etc/resolv.conf",
1157 NULL,
1158 };
1159
1160 // Bind-mount all paths read-only
1161 for (const char** path = paths; *path; path++) {
1162 r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
1163 if (r)
1164 return r;
1165 }
1166
1167 return 0;
1168 }
1169
1170 /*
1171 Mounts everything that we require in the new namespace
1172 */
1173 static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1174 struct pakfire_jail_mountpoint* mp = NULL;
1175 int r;
1176
1177 // Mount all default stuff
1178 r = pakfire_mount_all(jail->pakfire);
1179 if (r)
1180 return r;
1181
1182 // Mount networking stuff
1183 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1184 r = pakfire_jail_mount_networking(jail);
1185 if (r)
1186 return r;
1187 }
1188
1189 // Mount all custom stuff
1190 for (unsigned int i = 0; i < jail->num_mountpoints; i++) {
1191 // Fetch mountpoint
1192 mp = &jail->mountpoints[i];
1193
1194 // Mount it
1195 r = pakfire_bind(jail->pakfire, mp->source, mp->target, mp->flags);
1196 if (r)
1197 return r;
1198 }
1199
1200 // Log all mountpoints
1201 pakfire_mount_list(jail->pakfire);
1202
1203 return 0;
1204 }
1205
1206 // UID/GID Mapping
1207
1208 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
1209 char path[PATH_MAX];
1210 int r;
1211
1212 // Skip mapping anything when running on /
1213 if (pakfire_on_root(jail->pakfire))
1214 return 0;
1215
1216 // Make path
1217 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
1218 if (r)
1219 return r;
1220
1221 // Fetch UID
1222 const uid_t uid = pakfire_uid(jail->pakfire);
1223
1224 // Fetch SUBUID
1225 const struct pakfire_subid* subuid = pakfire_subuid(jail->pakfire);
1226 if (!subuid)
1227 return 1;
1228
1229 /* When running as root, we will map the entire range.
1230
1231 When running as a non-privileged user, we will map the root user inside the jail
1232 to the user's UID outside of the jail, and we will map the rest starting from one.
1233 */
1234
1235 // Running as root
1236 if (uid == 0) {
1237 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1238 "0 %lu %lu\n", subuid->id, subuid->length);
1239 } else {
1240 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1241 "0 %lu 1\n1 %lu %lu\n", uid, subuid->id, subuid->length);
1242 }
1243
1244 if (r) {
1245 ERROR(jail->pakfire, "Could not map UIDs: %m\n");
1246 return r;
1247 }
1248
1249 return r;
1250 }
1251
1252 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
1253 char path[PATH_MAX];
1254 int r;
1255
1256 // Skip mapping anything when running on /
1257 if (pakfire_on_root(jail->pakfire))
1258 return 0;
1259
1260 // Fetch GID
1261 const gid_t gid = pakfire_gid(jail->pakfire);
1262
1263 // Fetch SUBGID
1264 const struct pakfire_subid* subgid = pakfire_subgid(jail->pakfire);
1265 if (!subgid)
1266 return 1;
1267
1268 // Make path
1269 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
1270 if (r)
1271 return r;
1272
1273 // Running as root
1274 if (gid == 0) {
1275 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1276 "0 %lu %lu\n", subgid->id, subgid->length);
1277 } else {
1278 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1279 "0 %lu 1\n%1 %lu %lu\n", gid, subgid->id, subgid->length);
1280 }
1281
1282 if (r) {
1283 ERROR(jail->pakfire, "Could not map GIDs: %m\n");
1284 return r;
1285 }
1286
1287 return r;
1288 }
1289
1290 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
1291 char path[PATH_MAX];
1292 int r = 1;
1293
1294 // Make path
1295 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
1296 if (r)
1297 return r;
1298
1299 // Open file for writing
1300 FILE* f = fopen(path, "w");
1301 if (!f) {
1302 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
1303 goto ERROR;
1304 }
1305
1306 // Write content
1307 int bytes_written = fprintf(f, "deny\n");
1308 if (bytes_written <= 0) {
1309 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
1310 goto ERROR;
1311 }
1312
1313 r = fclose(f);
1314 f = NULL;
1315 if (r) {
1316 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
1317 goto ERROR;
1318 }
1319
1320 ERROR:
1321 if (f)
1322 fclose(f);
1323
1324 return r;
1325 }
1326
1327 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1328 const uint64_t val = 1;
1329 int r = 0;
1330
1331 DEBUG(jail->pakfire, "Sending signal...\n");
1332
1333 // Write to the file descriptor
1334 ssize_t bytes_written = write(fd, &val, sizeof(val));
1335 if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
1336 ERROR(jail->pakfire, "Could not send signal: %m\n");
1337 r = 1;
1338 }
1339
1340 // Close the file descriptor
1341 close(fd);
1342
1343 return r;
1344 }
1345
1346 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1347 uint64_t val = 0;
1348 int r = 0;
1349
1350 DEBUG(jail->pakfire, "Waiting for signal...\n");
1351
1352 ssize_t bytes_read = read(fd, &val, sizeof(val));
1353 if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
1354 ERROR(jail->pakfire, "Error waiting for signal: %m\n");
1355 r = 1;
1356 }
1357
1358 // Close the file descriptor
1359 close(fd);
1360
1361 return r;
1362 }
1363
1364 /*
1365 Performs the initialisation that needs to happen in the parent part
1366 */
1367 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1368 int r;
1369
1370 // Setup UID mapping
1371 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
1372 if (r)
1373 return r;
1374
1375 // Write "deny" to /proc/PID/setgroups
1376 r = pakfire_jail_setgroups(jail, ctx->pid);
1377 if (r)
1378 return r;
1379
1380 // Setup GID mapping
1381 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
1382 if (r)
1383 return r;
1384
1385 // Parent has finished initialisation
1386 DEBUG(jail->pakfire, "Parent has finished initialization\n");
1387
1388 // Send signal to client
1389 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1390 if (r)
1391 return r;
1392
1393 return 0;
1394 }
1395
1396 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
1397 const char* argv[]) {
1398 int r;
1399
1400 // Redirect any logging to our log pipe
1401 pakfire_set_log_callback(jail->pakfire, pakfire_jail_log, &ctx->pipes);
1402
1403 // Die with parent
1404 r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
1405 if (r) {
1406 ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
1407 return 126;
1408 }
1409
1410 // Fetch my own PID
1411 pid_t pid = getpid();
1412
1413 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
1414
1415 // Wait for the parent to finish initialization
1416 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1417 if (r)
1418 return r;
1419
1420 // Perform further initialization
1421
1422 // Fetch UID/GID
1423 uid_t uid = getuid();
1424 gid_t gid = getgid();
1425
1426 // Fetch EUID/EGID
1427 uid_t euid = geteuid();
1428 gid_t egid = getegid();
1429
1430 DEBUG(jail->pakfire, " UID: %d (effective %d)\n", uid, euid);
1431 DEBUG(jail->pakfire, " GID: %d (effective %d)\n", gid, egid);
1432
1433 // Check if we are (effectively running as root)
1434 if (uid || gid || euid || egid) {
1435 ERROR(jail->pakfire, "Child process is not running as root\n");
1436 return 126;
1437 }
1438
1439 const char* root = pakfire_get_path(jail->pakfire);
1440 const char* arch = pakfire_get_arch(jail->pakfire);
1441
1442 // Change root (unless root is /)
1443 if (!pakfire_on_root(jail->pakfire)) {
1444 // Mount everything
1445 r = pakfire_jail_mount(jail, ctx);
1446 if (r)
1447 return r;
1448
1449 // Call chroot()
1450 r = chroot(root);
1451 if (r) {
1452 ERROR(jail->pakfire, "chroot() to %s failed: %m\n", root);
1453 return 1;
1454 }
1455
1456 // Change directory to /
1457 r = chdir("/");
1458 if (r) {
1459 ERROR(jail->pakfire, "chdir() after chroot() failed: %m\n");
1460 return 1;
1461 }
1462 }
1463
1464 // Set personality
1465 unsigned long persona = pakfire_arch_personality(arch);
1466 if (persona) {
1467 r = personality(persona);
1468 if (r < 0) {
1469 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1470 return 1;
1471 }
1472 }
1473
1474 // Set nice level
1475 if (jail->nice) {
1476 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1477
1478 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1479 if (r) {
1480 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1481 return 1;
1482 }
1483 }
1484
1485 // Close other end of log pipes
1486 close(ctx->pipes.log_INFO[0]);
1487 close(ctx->pipes.log_ERROR[0]);
1488 #ifdef ENABLE_DEBUG
1489 close(ctx->pipes.log_DEBUG[0]);
1490 #endif /* ENABLE_DEBUG */
1491
1492 // Connect standard input
1493 if (ctx->pipes.stdin[0]) {
1494 r = dup2(ctx->pipes.stdin[0], STDIN_FILENO);
1495 if (r < 0) {
1496 ERROR(jail->pakfire, "Could not connect fd %d to stdin: %m\n",
1497 ctx->pipes.stdin[0]);
1498
1499 return 1;
1500 }
1501 }
1502
1503 // Connect standard output and error
1504 if (ctx->pipes.stdout[1] && ctx->pipes.stderr[1]) {
1505 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1506 if (r < 0) {
1507 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1508 ctx->pipes.stdout[1]);
1509
1510 return 1;
1511 }
1512
1513 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1514 if (r < 0) {
1515 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1516 ctx->pipes.stderr[1]);
1517
1518 return 1;
1519 }
1520
1521 // Close the pipe (as we have moved the original file descriptors)
1522 pakfire_jail_close_pipe(jail, ctx->pipes.stdin);
1523 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1524 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
1525 }
1526
1527 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1528 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1529 if (r)
1530 return r;
1531
1532 // Drop capabilities
1533 r = pakfire_jail_drop_capabilities(jail);
1534 if (r)
1535 return r;
1536
1537 // Filter syscalls
1538 r = pakfire_jail_limit_syscalls(jail);
1539 if (r)
1540 return r;
1541
1542 DEBUG(jail->pakfire, "Child process initialization done\n");
1543 DEBUG(jail->pakfire, "Launching command:\n");
1544
1545 // Log argv
1546 for (unsigned int i = 0; argv[i]; i++)
1547 DEBUG(jail->pakfire, " argv[%d] = %s\n", i, argv[i]);
1548
1549 // exec() command
1550 r = execvpe(argv[0], (char**)argv, jail->env);
1551 if (r < 0)
1552 ERROR(jail->pakfire, "Could not execve(): %m\n");
1553
1554 // Translate errno into regular exit code
1555 switch (errno) {
1556 case ENOENT:
1557 r = 127;
1558 break;
1559
1560 default:
1561 r = 1;
1562 }
1563
1564 // We should not get here
1565 return r;
1566 }
1567
1568 // Run a command in the jail
1569 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
1570 const int interactive,
1571 pakfire_jail_communicate_in communicate_in,
1572 pakfire_jail_communicate_out communicate_out,
1573 void* data) {
1574 int exit = -1;
1575 int r;
1576
1577 // Check if argv is valid
1578 if (!argv || !argv[0]) {
1579 errno = EINVAL;
1580 return -1;
1581 }
1582
1583 // Send any output to the default logger if no callback is set
1584 if (!communicate_out)
1585 communicate_out = pakfire_jail_default_log_callback;
1586
1587 // Initialize context for this call
1588 struct pakfire_jail_exec ctx = {
1589 .flags = 0,
1590
1591 .pipes = {
1592 .stdin = { 0, 0 },
1593 .stdout = { 0, 0 },
1594 .stderr = { 0, 0 },
1595 },
1596
1597 .communicate = {
1598 .in = communicate_in,
1599 .out = communicate_out,
1600 .data = data,
1601 },
1602 };
1603
1604 DEBUG(jail->pakfire, "Executing jail...\n");
1605
1606 // Enable networking in interactive mode
1607 if (interactive)
1608 ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
1609
1610 /*
1611 Setup a file descriptor which can be used to notify the client that the parent
1612 has completed configuration.
1613 */
1614 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1615 if (ctx.completed_fd < 0) {
1616 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1617 return -1;
1618 }
1619
1620 // Create pipes to communicate with child process if we are not running interactively
1621 if (!interactive) {
1622 // stdin (only if callback is set)
1623 if (ctx.communicate.in) {
1624 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdin, 0);
1625 if (r)
1626 goto ERROR;
1627 }
1628
1629 // stdout
1630 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
1631 if (r)
1632 goto ERROR;
1633
1634 // stderr
1635 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
1636 if (r)
1637 goto ERROR;
1638 }
1639
1640 // Setup pipes for logging
1641 // INFO
1642 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
1643 if (r)
1644 goto ERROR;
1645
1646 // ERROR
1647 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
1648 if (r)
1649 goto ERROR;
1650
1651 #ifdef ENABLE_DEBUG
1652 // DEBUG
1653 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
1654 if (r)
1655 goto ERROR;
1656 #endif /* ENABLE_DEBUG */
1657
1658 // Configure child process
1659 struct clone_args args = {
1660 .flags =
1661 CLONE_NEWCGROUP |
1662 CLONE_NEWIPC |
1663 CLONE_NEWNS |
1664 CLONE_NEWPID |
1665 CLONE_NEWUSER |
1666 CLONE_NEWUTS |
1667 CLONE_PIDFD,
1668 .exit_signal = SIGCHLD,
1669 .pidfd = (long long unsigned int)&ctx.pidfd,
1670 };
1671
1672 // Launch the process in a cgroup that is a leaf of the configured cgroup
1673 if (jail->cgroup) {
1674 args.flags |= CLONE_INTO_CGROUP;
1675
1676 // Fetch our UUID
1677 const char* uuid = pakfire_jail_uuid(jail);
1678
1679 // Create a temporary cgroup
1680 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, uuid, 0);
1681 if (r) {
1682 ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
1683 goto ERROR;
1684 }
1685
1686 // Clone into this cgroup
1687 args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
1688 }
1689
1690 // Setup networking
1691 if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1692 args.flags |= CLONE_NEWNET;
1693 }
1694
1695 // Fork this process
1696 ctx.pid = clone3(&args, sizeof(args));
1697 if (ctx.pid < 0) {
1698 ERROR(jail->pakfire, "Could not clone: %m\n");
1699 return -1;
1700
1701 // Child process
1702 } else if (ctx.pid == 0) {
1703 r = pakfire_jail_child(jail, &ctx, argv);
1704 _exit(r);
1705 }
1706
1707 // Parent process
1708 r = pakfire_jail_parent(jail, &ctx);
1709 if (r)
1710 goto ERROR;
1711
1712 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
1713
1714 // Read output of the child process
1715 r = pakfire_jail_wait(jail, &ctx);
1716 if (r)
1717 goto ERROR;
1718
1719 // Handle exit status
1720 switch (ctx.status.si_code) {
1721 case CLD_EXITED:
1722 DEBUG(jail->pakfire, "The child process exited with code %d\n",
1723 ctx.status.si_status);
1724
1725 // Pass exit code
1726 exit = ctx.status.si_status;
1727 break;
1728
1729 case CLD_KILLED:
1730 ERROR(jail->pakfire, "The child process was killed\n");
1731 exit = 139;
1732 break;
1733
1734 case CLD_DUMPED:
1735 ERROR(jail->pakfire, "The child process terminated abnormally\n");
1736 break;
1737
1738 // Log anything else
1739 default:
1740 ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
1741 break;
1742 }
1743
1744 ERROR:
1745 // Destroy the temporary cgroup (if any)
1746 if (ctx.cgroup) {
1747 // Read cgroup stats
1748 r = pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
1749 if (r) {
1750 ERROR(jail->pakfire, "Could not read cgroup stats: %m\n");
1751 } else {
1752 pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
1753 }
1754
1755 pakfire_cgroup_destroy(ctx.cgroup);
1756 pakfire_cgroup_unref(ctx.cgroup);
1757 }
1758
1759 // Close any file descriptors
1760 pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
1761 pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
1762 pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
1763 if (ctx.pidfd)
1764 close(ctx.pidfd);
1765 pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
1766 pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
1767 pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
1768
1769 return exit;
1770 }
1771
1772 PAKFIRE_EXPORT int pakfire_jail_exec(
1773 struct pakfire_jail* jail,
1774 const char* argv[],
1775 pakfire_jail_communicate_in callback_in,
1776 pakfire_jail_communicate_out callback_out,
1777 void* data) {
1778 return __pakfire_jail_exec(jail, argv, 0, callback_in, callback_out, data);
1779 }
1780
1781 static int pakfire_jail_exec_interactive(
1782 struct pakfire_jail* jail, const char* argv[]) {
1783 int r;
1784
1785 // Setup interactive stuff
1786 r = pakfire_jail_setup_interactive_env(jail);
1787 if (r)
1788 return r;
1789
1790 return __pakfire_jail_exec(jail, argv, 1, NULL, NULL, NULL);
1791 }
1792
1793 int pakfire_jail_exec_script(struct pakfire_jail* jail,
1794 const char* script,
1795 const size_t size,
1796 const char* args[],
1797 pakfire_jail_communicate_in callback_in,
1798 pakfire_jail_communicate_out callback_out,
1799 void* data) {
1800 char path[PATH_MAX];
1801 const char** argv = NULL;
1802 FILE* f = NULL;
1803 int r;
1804
1805 const char* root = pakfire_get_path(jail->pakfire);
1806
1807 // Write the scriptlet to disk
1808 r = pakfire_path_join(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
1809 if (r)
1810 goto ERROR;
1811
1812 // Create a temporary file
1813 f = pakfire_mktemp(path, 0700);
1814 if (!f) {
1815 ERROR(jail->pakfire, "Could not create temporary file: %m\n");
1816 goto ERROR;
1817 }
1818
1819 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
1820
1821 // Write data
1822 r = fprintf(f, "%s", script);
1823 if (r < 0) {
1824 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
1825 goto ERROR;
1826 }
1827
1828 // Close file
1829 r = fclose(f);
1830 if (r) {
1831 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
1832 goto ERROR;
1833 }
1834
1835 f = NULL;
1836
1837 // Count how many arguments were passed
1838 unsigned int argc = 1;
1839 if (args) {
1840 for (const char** arg = args; *arg; arg++)
1841 argc++;
1842 }
1843
1844 argv = calloc(argc + 1, sizeof(*argv));
1845 if (!argv) {
1846 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
1847 goto ERROR;
1848 }
1849
1850 // Set command
1851 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
1852
1853 // Copy args
1854 for (unsigned int i = 1; i < argc; i++)
1855 argv[i] = args[i-1];
1856
1857 // Run the script
1858 r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data);
1859
1860 ERROR:
1861 if (argv)
1862 free(argv);
1863 if (f)
1864 fclose(f);
1865
1866 // Remove script from disk
1867 if (*path)
1868 unlink(path);
1869
1870 return r;
1871 }
1872
1873 /*
1874 A convenience function that creates a new jail, runs the given command and destroys
1875 the jail again.
1876 */
1877 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
1878 struct pakfire_jail* jail = NULL;
1879 int r;
1880
1881 // Create a new jail
1882 r = pakfire_jail_create(&jail, pakfire, flags);
1883 if (r)
1884 goto ERROR;
1885
1886 // Execute the command
1887 r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output);
1888
1889 ERROR:
1890 if (jail)
1891 pakfire_jail_unref(jail);
1892
1893 return r;
1894 }
1895
1896 int pakfire_jail_run_script(struct pakfire* pakfire,
1897 const char* script, const size_t length, const char* argv[], int flags) {
1898 struct pakfire_jail* jail = NULL;
1899 int r;
1900
1901 // Create a new jail
1902 r = pakfire_jail_create(&jail, pakfire, flags);
1903 if (r)
1904 goto ERROR;
1905
1906 // Execute the command
1907 r = pakfire_jail_exec_script(jail, script, length, argv, NULL, NULL, NULL);
1908
1909 ERROR:
1910 if (jail)
1911 pakfire_jail_unref(jail);
1912
1913 return r;
1914 }
1915
1916 int pakfire_jail_shell(struct pakfire_jail* jail) {
1917 const char* argv[] = {
1918 "/bin/bash", "--login", NULL,
1919 };
1920
1921 // Execute /bin/bash
1922 return pakfire_jail_exec_interactive(jail, argv);
1923 }
1924
1925 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
1926 char path[PATH_MAX];
1927
1928 const char* ldconfig = "/sbin/ldconfig";
1929
1930 // Check if ldconfig exists before calling it to avoid overhead
1931 int r = pakfire_path(pakfire, path, "%s", ldconfig);
1932 if (r)
1933 return r;
1934
1935 // Check if ldconfig is executable
1936 r = access(path, X_OK);
1937 if (r) {
1938 DEBUG(pakfire, "%s is not executable. Skipping...\n", ldconfig);
1939 return 0;
1940 }
1941
1942 const char* argv[] = {
1943 ldconfig, NULL,
1944 };
1945
1946 // Run ldconfig
1947 return pakfire_jail_run(pakfire, argv, 0, NULL);
1948 }