]> git.ipfire.org Git - pakfire.git/blob - src/libpakfire/jail.c
f59d07d7ef9571ba9812eb6b6845843997121269
[pakfire.git] / src / libpakfire / jail.c
1 /*#############################################################################
2 # #
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
5 # #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
10 # #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
15 # #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
18 # #
19 #############################################################################*/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <linux/capability.h>
24 #include <linux/sched.h>
25 #include <sys/wait.h>
26 #include <linux/wait.h>
27 #include <sched.h>
28 #include <signal.h>
29 #include <stdlib.h>
30 #include <syscall.h>
31 #include <sys/capability.h>
32 #include <sys/epoll.h>
33 #include <sys/eventfd.h>
34 #include <sys/mount.h>
35 #include <sys/personality.h>
36 #include <sys/prctl.h>
37 #include <sys/resource.h>
38 #include <sys/signalfd.h>
39 #include <sys/timerfd.h>
40 #include <sys/types.h>
41 #include <sys/wait.h>
42
43 // libnl3
44 #include <net/if.h>
45 #include <netlink/route/link.h>
46
47 // libseccomp
48 #include <seccomp.h>
49
50 // libuuid
51 #include <uuid.h>
52
53 #include <pakfire/arch.h>
54 #include <pakfire/cgroup.h>
55 #include <pakfire/jail.h>
56 #include <pakfire/logging.h>
57 #include <pakfire/mount.h>
58 #include <pakfire/pakfire.h>
59 #include <pakfire/path.h>
60 #include <pakfire/private.h>
61 #include <pakfire/pwd.h>
62 #include <pakfire/string.h>
63 #include <pakfire/util.h>
64
65 #define BUFFER_SIZE 1024 * 64
66 #define ENVIRON_SIZE 128
67 #define EPOLL_MAX_EVENTS 2
68 #define MAX_MOUNTPOINTS 8
69
70 // The default environment that will be set for every command
71 static const struct environ {
72 const char* key;
73 const char* val;
74 } ENV[] = {
75 { "HOME", "/root" },
76 { "LANG", "C.utf-8" },
77 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
78 { "TERM", "vt100" },
79
80 // Tell everything that it is running inside a Pakfire container
81 { "container", "pakfire" },
82 { NULL, NULL },
83 };
84
85 struct pakfire_jail_mountpoint {
86 char source[PATH_MAX];
87 char target[PATH_MAX];
88 int flags;
89 };
90
91 struct pakfire_jail {
92 struct pakfire_ctx* ctx;
93 struct pakfire* pakfire;
94 int nrefs;
95
96 // A unique ID for each jail
97 uuid_t uuid;
98 char __uuid[UUID_STR_LEN];
99
100 // Resource Limits
101 int nice;
102
103 // Timeout
104 struct itimerspec timeout;
105
106 // CGroup
107 struct pakfire_cgroup* cgroup;
108
109 // Environment
110 char* env[ENVIRON_SIZE];
111
112 // Mountpoints
113 struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
114 unsigned int num_mountpoints;
115
116 // Callbacks
117 struct pakfire_jail_callbacks {
118 // Log
119 pakfire_jail_log_callback log;
120 void* log_data;
121 } callbacks;
122 };
123
124 struct pakfire_log_buffer {
125 char data[BUFFER_SIZE];
126 size_t used;
127 };
128
129 struct pakfire_jail_exec {
130 int flags;
131
132 // PID (of the child)
133 pid_t pid;
134 int pidfd;
135
136 // Process status (from waitid)
137 siginfo_t status;
138
139 // FD to notify the client that the parent has finished initialization
140 int completed_fd;
141
142 // Log pipes
143 struct pakfire_jail_pipes {
144 int stdin[2];
145 int stdout[2];
146 int stderr[2];
147
148 // Logging
149 int log_INFO[2];
150 int log_ERROR[2];
151 #ifdef ENABLE_DEBUG
152 int log_DEBUG[2];
153 #endif /* ENABLE_DEBUG */
154 } pipes;
155
156 // Communicate
157 struct pakfire_jail_communicate {
158 pakfire_jail_communicate_in in;
159 pakfire_jail_communicate_out out;
160 void* data;
161 } communicate;
162
163 // Log buffers
164 struct pakfire_jail_buffers {
165 struct pakfire_log_buffer stdout;
166 struct pakfire_log_buffer stderr;
167
168 // Logging
169 struct pakfire_log_buffer log_INFO;
170 struct pakfire_log_buffer log_ERROR;
171 #ifdef ENABLE_DEBUG
172 struct pakfire_log_buffer log_DEBUG;
173 #endif /* ENABLE_DEBUG */
174 } buffers;
175
176 struct pakfire_cgroup* cgroup;
177 struct pakfire_cgroup_stats cgroup_stats;
178 };
179
180 static int clone3(struct clone_args* args, size_t size) {
181 return syscall(__NR_clone3, args, size);
182 }
183
184 static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
185 return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
186 }
187
188 static int pivot_root(const char* new_root, const char* old_root) {
189 return syscall(SYS_pivot_root, new_root, old_root);
190 }
191
192 static int pakfire_jail_exec_has_flag(
193 const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
194 return ctx->flags & flag;
195 }
196
197 static void pakfire_jail_free(struct pakfire_jail* jail) {
198 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
199
200 // Free environment
201 for (unsigned int i = 0; jail->env[i]; i++)
202 free(jail->env[i]);
203
204 if (jail->cgroup)
205 pakfire_cgroup_unref(jail->cgroup);
206 if (jail->pakfire)
207 pakfire_unref(jail->pakfire);
208 if (jail->ctx)
209 pakfire_ctx_unref(jail->ctx);
210 free(jail);
211 }
212
213 /*
214 Passes any log messages on to the default pakfire log callback
215 */
216 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
217 int priority, const char* line, size_t length) {
218 switch (priority) {
219 case LOG_INFO:
220 INFO(pakfire, "%s", line);
221 break;
222
223 case LOG_ERR:
224 ERROR(pakfire, "%s", line);
225 break;
226
227 #ifdef ENABLE_DEBUG
228 case LOG_DEBUG:
229 DEBUG(pakfire, "%s", line);
230 break;
231 #endif
232 }
233
234 return 0;
235 }
236
237 static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
238 if (!*jail->__uuid)
239 uuid_unparse_lower(jail->uuid, jail->__uuid);
240
241 return jail->__uuid;
242 }
243
244 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
245 // Set PS1
246 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
247 if (r)
248 return r;
249
250 // Copy TERM
251 char* TERM = secure_getenv("TERM");
252 if (TERM) {
253 r = pakfire_jail_set_env(jail, "TERM", TERM);
254 if (r)
255 return r;
256 }
257
258 // Copy LANG
259 char* LANG = secure_getenv("LANG");
260 if (LANG) {
261 r = pakfire_jail_set_env(jail, "LANG", LANG);
262 if (r)
263 return r;
264 }
265
266 return 0;
267 }
268
269 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail, struct pakfire* pakfire) {
270 int r;
271
272 const char* arch = pakfire_get_effective_arch(pakfire);
273
274 // Allocate a new jail
275 struct pakfire_jail* j = calloc(1, sizeof(*j));
276 if (!j)
277 return 1;
278
279 // Reference context
280 j->ctx = pakfire_ctx(pakfire);
281
282 // Reference Pakfire
283 j->pakfire = pakfire_ref(pakfire);
284
285 // Initialize reference counter
286 j->nrefs = 1;
287
288 // Generate a random UUID
289 uuid_generate_random(j->uuid);
290
291 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
292
293 // Set the default logging callback
294 pakfire_jail_set_log_callback(j, pakfire_jail_default_log_callback, NULL);
295
296 // Set default environment
297 for (const struct environ* e = ENV; e->key; e++) {
298 r = pakfire_jail_set_env(j, e->key, e->val);
299 if (r)
300 goto ERROR;
301 }
302
303 // Enable all CPU features that CPU has to offer
304 if (!pakfire_arch_is_supported_by_host(arch)) {
305 r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
306 if (r)
307 goto ERROR;
308 }
309
310 // Set container UUID
311 r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
312 if (r)
313 goto ERROR;
314
315 // Disable systemctl to talk to systemd
316 if (!pakfire_on_root(j->pakfire)) {
317 r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
318 if (r)
319 goto ERROR;
320 }
321
322 // Done
323 *jail = j;
324 return 0;
325
326 ERROR:
327 pakfire_jail_free(j);
328
329 return r;
330 }
331
332 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
333 ++jail->nrefs;
334
335 return jail;
336 }
337
338 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
339 if (--jail->nrefs > 0)
340 return jail;
341
342 pakfire_jail_free(jail);
343 return NULL;
344 }
345
346 // Logging Callback
347
348 PAKFIRE_EXPORT void pakfire_jail_set_log_callback(struct pakfire_jail* jail,
349 pakfire_jail_log_callback callback, void* data) {
350 jail->callbacks.log = callback;
351 jail->callbacks.log_data = data;
352 }
353
354 // Resource Limits
355
356 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
357 // Check if nice level is in range
358 if (nice < -19 || nice > 20) {
359 errno = EINVAL;
360 return 1;
361 }
362
363 // Store nice level
364 jail->nice = nice;
365
366 return 0;
367 }
368
369 int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
370 // Free any previous cgroup
371 if (jail->cgroup) {
372 pakfire_cgroup_unref(jail->cgroup);
373 jail->cgroup = NULL;
374 }
375
376 // Set any new cgroup
377 if (cgroup) {
378 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
379
380 jail->cgroup = pakfire_cgroup_ref(cgroup);
381 }
382
383 // Done
384 return 0;
385 }
386
387 // Environment
388
389 // Returns the length of the environment
390 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
391 unsigned int i = 0;
392
393 // Count everything in the environment
394 for (char** e = jail->env; *e; e++)
395 i++;
396
397 return i;
398 }
399
400 // Finds an existing environment variable and returns its index or -1 if not found
401 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
402 if (!key) {
403 errno = EINVAL;
404 return -1;
405 }
406
407 const size_t length = strlen(key);
408
409 for (unsigned int i = 0; jail->env[i]; i++) {
410 if ((pakfire_string_startswith(jail->env[i], key)
411 && *(jail->env[i] + length) == '=')) {
412 return i;
413 }
414 }
415
416 // Nothing found
417 return -1;
418 }
419
420 // Returns the value of an environment variable or NULL
421 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
422 const char* key) {
423 int i = pakfire_jail_find_env(jail, key);
424 if (i < 0)
425 return NULL;
426
427 return jail->env[i] + strlen(key) + 1;
428 }
429
430 // Sets an environment variable
431 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
432 const char* key, const char* value) {
433 // Find the index where to write this value to
434 int i = pakfire_jail_find_env(jail, key);
435 if (i < 0)
436 i = pakfire_jail_env_length(jail);
437
438 // Return -ENOSPC when the environment is full
439 if (i >= ENVIRON_SIZE) {
440 errno = ENOSPC;
441 return -1;
442 }
443
444 // Free any previous value
445 if (jail->env[i])
446 free(jail->env[i]);
447
448 // Format and set environment variable
449 asprintf(&jail->env[i], "%s=%s", key, value);
450
451 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
452
453 return 0;
454 }
455
456 // Imports an environment
457 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
458 if (!env)
459 return 0;
460
461 char* key;
462 char* val;
463 int r;
464
465 // Copy environment variables
466 for (unsigned int i = 0; env[i]; i++) {
467 r = pakfire_string_partition(env[i], "=", &key, &val);
468 if (r)
469 continue;
470
471 // Set value
472 r = pakfire_jail_set_env(jail, key, val);
473
474 if (key)
475 free(key);
476 if (val)
477 free(val);
478
479 // Break on error
480 if (r)
481 return r;
482 }
483
484 return 0;
485 }
486
487 // Timeout
488
489 PAKFIRE_EXPORT int pakfire_jail_set_timeout(
490 struct pakfire_jail* jail, unsigned int timeout) {
491 // Store value
492 jail->timeout.it_value.tv_sec = timeout;
493
494 if (timeout > 0)
495 DEBUG(jail->pakfire, "Timeout set to %u second(s)\n", timeout);
496 else
497 DEBUG(jail->pakfire, "Timeout disabled\n");
498
499 return 0;
500 }
501
502 static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
503 int r;
504
505 // Nothing to do if no timeout has been set
506 if (!jail->timeout.it_value.tv_sec)
507 return -1;
508
509 // Create a new timer
510 const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
511 if (fd < 0) {
512 ERROR(jail->pakfire, "Could not create timer: %m\n");
513 goto ERROR;
514 }
515
516 // Arm timer
517 r = timerfd_settime(fd, 0, &jail->timeout, NULL);
518 if (r) {
519 ERROR(jail->pakfire, "Could not arm timer: %m\n");
520 goto ERROR;
521 }
522
523 return fd;
524
525 ERROR:
526 if (fd >= 0)
527 close(fd);
528
529 return -1;
530 }
531
532 // Signals
533
534 static int pakfire_jail_handle_signals(struct pakfire_jail* jail) {
535 sigset_t mask;
536 int r;
537
538 sigemptyset(&mask);
539 sigaddset(&mask, SIGINT);
540
541 // Block signals
542 r = sigprocmask(SIG_BLOCK, &mask, NULL);
543 if (r < 0) {
544 ERROR(jail->pakfire, "Failed to block signals: %m\n");
545 return r;
546 }
547
548 // Create a file descriptor
549 r = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
550 if (r < 0) {
551 ERROR(jail->pakfire, "Failed to create signalfd: %m\n");
552 return r;
553 }
554
555 return r;
556 }
557
558 /*
559 This function replaces any logging in the child process.
560
561 All log messages will be sent to the parent process through their respective pipes.
562 */
563 static void pakfire_jail_log_redirect(void* data, int priority, const char* file,
564 int line, const char* fn, const char* format, va_list args) {
565 struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
566 int fd;
567
568 switch (priority) {
569 case LOG_INFO:
570 fd = pipes->log_INFO[1];
571 break;
572
573 case LOG_ERR:
574 fd = pipes->log_ERROR[1];
575 break;
576
577 #ifdef ENABLE_DEBUG
578 case LOG_DEBUG:
579 fd = pipes->log_DEBUG[1];
580 break;
581 #endif /* ENABLE_DEBUG */
582
583 // Ignore any messages of an unknown priority
584 default:
585 return;
586 }
587
588 // Send the log message
589 if (fd >= 0)
590 vdprintf(fd, format, args);
591 }
592
593 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
594 return (sizeof(buffer->data) == buffer->used);
595 }
596
597 /*
598 This function reads as much data as it can from the file descriptor.
599 If it finds a whole line in it, it will send it to the logger and repeat the process.
600 If not newline character is found, it will try to read more data until it finds one.
601 */
602 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
603 struct pakfire_jail_exec* ctx, int priority, int fd,
604 struct pakfire_log_buffer* buffer, pakfire_jail_communicate_out callback, void* data) {
605 char line[BUFFER_SIZE + 1];
606
607 // Fill up buffer from fd
608 if (buffer->used < sizeof(buffer->data)) {
609 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
610 sizeof(buffer->data) - buffer->used);
611
612 // Handle errors
613 if (bytes_read < 0) {
614 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
615 return -1;
616 }
617
618 // Update buffer size
619 buffer->used += bytes_read;
620 }
621
622 // See if we have any lines that we can write
623 while (buffer->used) {
624 // Search for the end of the first line
625 char* eol = memchr(buffer->data, '\n', buffer->used);
626
627 // No newline found
628 if (!eol) {
629 // If the buffer is full, we send the content to the logger and try again
630 // This should not happen in practise
631 if (pakfire_jail_log_buffer_is_full(buffer)) {
632 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
633
634 eol = buffer->data + sizeof(buffer->data) - 1;
635
636 // Otherwise we might have only read parts of the output
637 } else
638 break;
639 }
640
641 // Find the length of the string
642 size_t length = eol - buffer->data + 1;
643
644 // Copy the line into the buffer
645 memcpy(line, buffer->data, length);
646
647 // Terminate the string
648 line[length] = '\0';
649
650 // Log the line
651 if (callback) {
652 int r = callback(jail->pakfire, data, priority, line, length);
653 if (r) {
654 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
655 return r;
656 }
657 }
658
659 // Remove line from buffer
660 memmove(buffer->data, buffer->data + length, buffer->used - length);
661 buffer->used -= length;
662 }
663
664 return 0;
665 }
666
667 static int pakfire_jail_stream_stdin(struct pakfire_jail* jail,
668 struct pakfire_jail_exec* ctx, const int fd) {
669 int r;
670
671 // Nothing to do if there is no stdin callback set
672 if (!ctx->communicate.in) {
673 DEBUG(jail->pakfire, "Callback for standard input is not set\n");
674 return 0;
675 }
676
677 // Skip if the writing pipe has already been closed
678 if (!ctx->pipes.stdin[1])
679 return 0;
680
681 DEBUG(jail->pakfire, "Streaming standard input...\n");
682
683 // Calling the callback
684 r = ctx->communicate.in(jail->pakfire, ctx->communicate.data, fd);
685
686 DEBUG(jail->pakfire, "Standard input callback finished: %d\n", r);
687
688 // The callback signaled that it has written everything
689 if (r == EOF) {
690 DEBUG(jail->pakfire, "Closing standard input pipe\n");
691
692 // Close the file-descriptor
693 close(fd);
694
695 // Reset the file-descriptor so it won't be closed again later
696 ctx->pipes.stdin[1] = -1;
697
698 // Report success
699 r = 0;
700 }
701
702 return r;
703 }
704
705 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
706 int r = pipe2(*fds, flags);
707 if (r < 0) {
708 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
709 return 1;
710 }
711
712 return 0;
713 }
714
715 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
716 for (unsigned int i = 0; i < 2; i++)
717 if (fds[i] >= 0)
718 close(fds[i]);
719 }
720
721 /*
722 This is a convenience function to fetch the reading end of a pipe and
723 closes the write end.
724 */
725 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail* jail, int (*fds)[2]) {
726 // Give the variables easier names to avoid confusion
727 int* fd_read = &(*fds)[0];
728 int* fd_write = &(*fds)[1];
729
730 // Close the write end of the pipe
731 if (*fd_write >= 0) {
732 close(*fd_write);
733 *fd_write = -1;
734 }
735
736 // Return the read end
737 if (*fd_read >= 0)
738 return *fd_read;
739
740 return -1;
741 }
742
743 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
744 // Give the variables easier names to avoid confusion
745 int* fd_read = &(*fds)[0];
746 int* fd_write = &(*fds)[1];
747
748 // Close the read end of the pipe
749 if (*fd_read >= 0) {
750 close(*fd_read);
751 *fd_read = -1;
752 }
753
754 // Return the write end
755 if (*fd_write >= 0)
756 return *fd_write;
757
758 return -1;
759 }
760
761 static int pakfire_jail_log(struct pakfire* pakfire, void* data, int priority,
762 const char* line, const size_t length) {
763 // Pass everything to the parent logger
764 pakfire_log_condition(pakfire, priority, 0, "%.*s", (int)length, line);
765
766 return 0;
767 }
768
769 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
770 int epollfd = -1;
771 struct epoll_event ev;
772 struct epoll_event events[EPOLL_MAX_EVENTS];
773 struct signalfd_siginfo siginfo;
774 char garbage[8];
775 int r = 0;
776
777 // Fetch file descriptors from context
778 const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
779 const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
780 const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
781 const int pidfd = ctx->pidfd;
782
783 // Timer
784 const int timerfd = pakfire_jail_create_timer(jail);
785
786 // Logging
787 const int log_INFO = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
788 const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
789 #ifdef ENABLE_DEBUG
790 const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
791 #endif /* ENABLE_DEBUG */
792
793 // Signals
794 const int signalfd = pakfire_jail_handle_signals(jail);
795
796 // Make a list of all file descriptors we are interested in
797 const int fds[] = {
798 stdin,
799 stdout,
800 stderr,
801 pidfd,
802 timerfd,
803 signalfd,
804 log_INFO,
805 log_ERROR,
806 #ifdef ENABLE_DEBUG
807 log_DEBUG,
808 #endif /* ENABLE_DEBUG */
809 };
810
811 // Setup epoll
812 epollfd = epoll_create1(0);
813 if (epollfd < 0) {
814 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
815 r = 1;
816 goto ERROR;
817 }
818
819 // Turn file descriptors into non-blocking mode and add them to epoll()
820 for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
821 int fd = fds[i];
822
823 // Skip fds which were not initialized
824 if (fd < 0)
825 continue;
826
827 ev.events = EPOLLHUP;
828
829 if (fd == stdin)
830 ev.events |= EPOLLOUT;
831 else
832 ev.events |= EPOLLIN;
833
834 // Read flags
835 int flags = fcntl(fd, F_GETFL, 0);
836
837 // Set modified flags
838 if (fcntl(fd, F_SETFL, flags|O_NONBLOCK) < 0) {
839 ERROR(jail->pakfire,
840 "Could not set file descriptor %d into non-blocking mode: %m\n", fd);
841 r = 1;
842 goto ERROR;
843 }
844
845 ev.data.fd = fd;
846
847 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
848 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
849 r = 1;
850 goto ERROR;
851 }
852 }
853
854 int ended = 0;
855
856 // Loop for as long as the process is alive
857 while (!ended) {
858 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
859 if (num < 1) {
860 // Ignore if epoll_wait() has been interrupted
861 if (errno == EINTR)
862 continue;
863
864 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
865 r = 1;
866
867 goto ERROR;
868 }
869
870 for (int i = 0; i < num; i++) {
871 int e = events[i].events;
872 int fd = events[i].data.fd;
873
874 struct pakfire_log_buffer* buffer = NULL;
875 pakfire_jail_communicate_out callback = NULL;
876 void* data = NULL;
877 int priority;
878
879 // Check if there is any data to be read
880 if (e & EPOLLIN) {
881 // Handle any changes to the PIDFD
882 if (fd == pidfd) {
883 // Call waidid() and store the result
884 r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
885 if (r) {
886 ERROR(jail->pakfire, "waitid() failed: %m\n");
887 goto ERROR;
888 }
889
890 // Mark that we have ended so that we will process the remaining
891 // events from epoll() now, but won't restart the outer loop.
892 ended = 1;
893 continue;
894
895 // Handle timer events
896 } else if (fd == timerfd) {
897 DEBUG(jail->pakfire, "Timer event received\n");
898
899 // Disarm the timer
900 r = read(timerfd, garbage, sizeof(garbage));
901 if (r < 1) {
902 ERROR(jail->pakfire, "Could not disarm timer: %m\n");
903 r = 1;
904 goto ERROR;
905 }
906
907 // Terminate the process if it hasn't already ended
908 if (!ended) {
909 DEBUG(jail->pakfire, "Terminating process...\n");
910
911 // Send SIGTERM to the process
912 r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
913 if (r) {
914 ERROR(jail->pakfire, "Could not kill process: %m\n");
915 goto ERROR;
916 }
917 }
918
919 // There is nothing else to do
920 continue;
921
922 // Handle signals
923 } else if (fd == signalfd) {
924 // Read the signal
925 r = read(signalfd, &siginfo, sizeof(siginfo));
926 if (r < 1) {
927 ERROR(jail->pakfire, "Could not read signal: %m\n");
928 goto ERROR;
929 }
930
931 DEBUG(jail->pakfire, "Received signal %u\n", siginfo.ssi_signo);
932
933 // Handle signals
934 switch (siginfo.ssi_signo) {
935 // Pass SIGINT down to the child process
936 case SIGINT:
937 r = pidfd_send_signal(pidfd, siginfo.ssi_signo, NULL, 0);
938 if (r) {
939 ERROR(jail->pakfire, "Could not send signal to process: %m\n");
940 goto ERROR;
941 }
942 break;
943
944 default:
945 ERROR(jail->pakfire, "Received unhandled signal %u\n",
946 siginfo.ssi_signo);
947 break;
948 }
949
950 // Don't fall through to log processing
951 continue;
952
953 // Handle logging messages
954 } else if (fd == log_INFO) {
955 buffer = &ctx->buffers.log_INFO;
956 priority = LOG_INFO;
957
958 callback = pakfire_jail_log;
959
960 } else if (fd == log_ERROR) {
961 buffer = &ctx->buffers.log_ERROR;
962 priority = LOG_ERR;
963
964 callback = pakfire_jail_log;
965
966 #ifdef ENABLE_DEBUG
967 } else if (fd == log_DEBUG) {
968 buffer = &ctx->buffers.log_DEBUG;
969 priority = LOG_DEBUG;
970
971 callback = pakfire_jail_log;
972 #endif /* ENABLE_DEBUG */
973
974 // Handle anything from the log pipes
975 } else if (fd == stdout) {
976 buffer = &ctx->buffers.stdout;
977 priority = LOG_INFO;
978
979 // Send any output to the default logger if no callback is set
980 if (ctx->communicate.out) {
981 callback = ctx->communicate.out;
982 data = ctx->communicate.data;
983 } else {
984 callback = jail->callbacks.log;
985 data = jail->callbacks.log_data;
986 }
987
988 } else if (fd == stderr) {
989 buffer = &ctx->buffers.stderr;
990 priority = LOG_ERR;
991
992 // Send any output to the default logger if no callback is set
993 if (ctx->communicate.out) {
994 callback = ctx->communicate.out;
995 data = ctx->communicate.data;
996 } else {
997 callback = jail->callbacks.log;
998 data = jail->callbacks.log_data;
999 }
1000
1001 } else {
1002 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
1003 continue;
1004 }
1005
1006 // Handle log event
1007 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
1008 if (r)
1009 goto ERROR;
1010 }
1011
1012 if (e & EPOLLOUT) {
1013 // Handle standard input
1014 if (fd == stdin) {
1015 r = pakfire_jail_stream_stdin(jail, ctx, fd);
1016 if (r) {
1017 switch (errno) {
1018 // Ignore if we filled up the buffer
1019 case EAGAIN:
1020 break;
1021
1022 default:
1023 ERROR(jail->pakfire, "Could not write to stdin: %m\n");
1024 goto ERROR;
1025 }
1026 }
1027 }
1028 }
1029
1030 // Check if any file descriptors have been closed
1031 if (e & EPOLLHUP) {
1032 // Remove the file descriptor
1033 r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
1034 if (r) {
1035 ERROR(jail->pakfire, "Could not remove closed file-descriptor %d: %m\n", fd);
1036 goto ERROR;
1037 }
1038 }
1039 }
1040 }
1041
1042 ERROR:
1043 if (epollfd >= 0)
1044 close(epollfd);
1045 if (timerfd >= 0)
1046 close(timerfd);
1047 if (signalfd >= 0)
1048 close(signalfd);
1049
1050 return r;
1051 }
1052
1053 int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data,
1054 int priority, const char* line, size_t length) {
1055 char** output = (char**)data;
1056 int r;
1057
1058 // Append everything from stdout to a buffer
1059 if (output && priority == LOG_INFO) {
1060 r = asprintf(output, "%s%s", (output && *output) ? *output : "", line);
1061 if (r < 0)
1062 return 1;
1063 return 0;
1064 }
1065
1066 // Send everything else to the default logger
1067 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
1068 }
1069
1070 // Capabilities
1071
1072 // Logs all capabilities of the current process
1073 static int pakfire_jail_show_capabilities(struct pakfire_jail* jail) {
1074 cap_t caps = NULL;
1075 char* name = NULL;
1076 cap_flag_value_t value_e;
1077 cap_flag_value_t value_i;
1078 cap_flag_value_t value_p;
1079 int r;
1080
1081 // Fetch PID
1082 pid_t pid = getpid();
1083
1084 // Fetch all capabilities
1085 caps = cap_get_proc();
1086 if (!caps) {
1087 ERROR(jail->pakfire, "Could not fetch capabilities: %m\n");
1088 r = 1;
1089 goto ERROR;
1090 }
1091
1092 DEBUG(jail->pakfire, "Capabilities of PID %d:\n", pid);
1093
1094 // Iterate over all capabilities
1095 for (unsigned int cap = 0; cap_valid(cap); cap++) {
1096 name = cap_to_name(cap);
1097
1098 // Fetch effective value
1099 r = cap_get_flag(caps, cap, CAP_EFFECTIVE, &value_e);
1100 if (r)
1101 goto ERROR;
1102
1103 // Fetch inheritable value
1104 r = cap_get_flag(caps, cap, CAP_INHERITABLE, &value_i);
1105 if (r)
1106 goto ERROR;
1107
1108 // Fetch permitted value
1109 r = cap_get_flag(caps, cap, CAP_PERMITTED, &value_p);
1110 if (r)
1111 goto ERROR;
1112
1113 DEBUG(jail->pakfire,
1114 " %-24s : %c%c%c\n",
1115 name,
1116 (value_e == CAP_SET) ? 'e' : '-',
1117 (value_i == CAP_SET) ? 'i' : '-',
1118 (value_p == CAP_SET) ? 'p' : '-'
1119 );
1120
1121 // Free name
1122 cap_free(name);
1123 name = NULL;
1124 }
1125
1126 // Success
1127 r = 0;
1128
1129 ERROR:
1130 if (name)
1131 cap_free(name);
1132 if (caps)
1133 cap_free(caps);
1134
1135 return r;
1136 }
1137
1138 static int pakfire_jail_set_capabilities(struct pakfire_jail* jail) {
1139 cap_t caps = NULL;
1140 char* name = NULL;
1141 int r;
1142
1143 // Fetch capabilities
1144 caps = cap_get_proc();
1145 if (!caps) {
1146 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
1147 r = 1;
1148 goto ERROR;
1149 }
1150
1151 // Walk through all capabilities
1152 for (cap_value_t cap = 0; cap_valid(cap); cap++) {
1153 cap_value_t _caps[] = { cap };
1154
1155 // Fetch the name of the capability
1156 name = cap_to_name(cap);
1157
1158 r = cap_set_flag(caps, CAP_EFFECTIVE, 1, _caps, CAP_SET);
1159 if (r) {
1160 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1161 goto ERROR;
1162 }
1163
1164 r = cap_set_flag(caps, CAP_INHERITABLE, 1, _caps, CAP_SET);
1165 if (r) {
1166 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1167 goto ERROR;
1168 }
1169
1170 r = cap_set_flag(caps, CAP_PERMITTED, 1, _caps, CAP_SET);
1171 if (r) {
1172 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1173 goto ERROR;
1174 }
1175
1176 // Free name
1177 cap_free(name);
1178 name = NULL;
1179 }
1180
1181 // Restore all capabilities
1182 r = cap_set_proc(caps);
1183 if (r) {
1184 ERROR(jail->pakfire, "Restoring capabilities failed: %m\n");
1185 goto ERROR;
1186 }
1187
1188 // Add all capabilities to the ambient set
1189 for (unsigned int cap = 0; cap_valid(cap); cap++) {
1190 name = cap_to_name(cap);
1191
1192 // Raise the capability
1193 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0);
1194 if (r) {
1195 ERROR(jail->pakfire, "Could not set ambient capability %s: %m\n", name);
1196 goto ERROR;
1197 }
1198
1199 // Free name
1200 cap_free(name);
1201 name = NULL;
1202 }
1203
1204 // Success
1205 r = 0;
1206
1207 ERROR:
1208 if (name)
1209 cap_free(name);
1210 if (caps)
1211 cap_free(caps);
1212
1213 return r;
1214 }
1215
1216 // Syscall Filter
1217
1218 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
1219 const int syscalls[] = {
1220 // The kernel's keyring isn't namespaced
1221 SCMP_SYS(keyctl),
1222 SCMP_SYS(add_key),
1223 SCMP_SYS(request_key),
1224
1225 // Disable userfaultfd
1226 SCMP_SYS(userfaultfd),
1227
1228 // Disable perf which could leak a lot of information about the host
1229 SCMP_SYS(perf_event_open),
1230
1231 0,
1232 };
1233 int r = 1;
1234
1235 DEBUG(jail->pakfire, "Applying syscall filter...\n");
1236
1237 // Setup a syscall filter which allows everything by default
1238 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
1239 if (!ctx) {
1240 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
1241 goto ERROR;
1242 }
1243
1244 // All all syscalls
1245 for (const int* syscall = syscalls; *syscall; syscall++) {
1246 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
1247 if (r) {
1248 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
1249 goto ERROR;
1250 }
1251 }
1252
1253 // Load syscall filter into the kernel
1254 r = seccomp_load(ctx);
1255 if (r) {
1256 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
1257 goto ERROR;
1258 }
1259
1260 ERROR:
1261 if (ctx)
1262 seccomp_release(ctx);
1263
1264 return r;
1265 }
1266
1267 // Mountpoints
1268
1269 PAKFIRE_EXPORT int pakfire_jail_bind(struct pakfire_jail* jail,
1270 const char* source, const char* target, int flags) {
1271 struct pakfire_jail_mountpoint* mp = NULL;
1272 int r;
1273
1274 // Check if there is any space left
1275 if (jail->num_mountpoints >= MAX_MOUNTPOINTS) {
1276 errno = ENOSPC;
1277 return 1;
1278 }
1279
1280 // Check for valid inputs
1281 if (!source || !target) {
1282 errno = EINVAL;
1283 return 1;
1284 }
1285
1286 // Select the next free slot
1287 mp = &jail->mountpoints[jail->num_mountpoints];
1288
1289 // Copy source
1290 r = pakfire_string_set(mp->source, source);
1291 if (r) {
1292 ERROR(jail->pakfire, "Could not copy source: %m\n");
1293 return r;
1294 }
1295
1296 // Copy target
1297 r = pakfire_string_set(mp->target, target);
1298 if (r) {
1299 ERROR(jail->pakfire, "Could not copy target: %m\n");
1300 return r;
1301 }
1302
1303 // Copy flags
1304 mp->flags = flags;
1305
1306 // Increment counter
1307 jail->num_mountpoints++;
1308
1309 return 0;
1310 }
1311
1312 static int pakfire_jail_mount_networking(struct pakfire_jail* jail) {
1313 int r;
1314
1315 const char* paths[] = {
1316 "/etc/hosts",
1317 "/etc/resolv.conf",
1318 NULL,
1319 };
1320
1321 // Bind-mount all paths read-only
1322 for (const char** path = paths; *path; path++) {
1323 r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
1324 if (r) {
1325 switch (errno) {
1326 // Ignore if we don't have permission
1327 case EPERM:
1328 continue;
1329
1330 default:
1331 break;
1332 }
1333 return r;
1334 }
1335 }
1336
1337 return 0;
1338 }
1339
1340 /*
1341 Mounts everything that we require in the new namespace
1342 */
1343 static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1344 struct pakfire_jail_mountpoint* mp = NULL;
1345 int flags = 0;
1346 int r;
1347
1348 // Enable loop devices
1349 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_LOOP_DEVICES))
1350 flags |= PAKFIRE_MOUNT_LOOP_DEVICES;
1351
1352 // Mount all default stuff
1353 r = pakfire_mount_all(jail->pakfire, flags);
1354 if (r)
1355 return r;
1356
1357 // Mount networking stuff
1358 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1359 r = pakfire_jail_mount_networking(jail);
1360 if (r)
1361 return r;
1362 }
1363
1364 // Mount all custom stuff
1365 for (unsigned int i = 0; i < jail->num_mountpoints; i++) {
1366 // Fetch mountpoint
1367 mp = &jail->mountpoints[i];
1368
1369 // Mount it
1370 r = pakfire_bind(jail->pakfire, mp->source, mp->target, mp->flags);
1371 if (r)
1372 return r;
1373 }
1374
1375 // Log all mountpoints
1376 pakfire_mount_list(jail->pakfire);
1377
1378 return 0;
1379 }
1380
1381 // Networking
1382
1383 static int pakfire_jail_setup_loopback(struct pakfire_jail* jail) {
1384 struct nl_sock* nl = NULL;
1385 struct nl_cache* cache = NULL;
1386 struct rtnl_link* link = NULL;
1387 struct rtnl_link* change = NULL;
1388 int r;
1389
1390 DEBUG(jail->pakfire, "Setting up loopback...\n");
1391
1392 // Allocate a netlink socket
1393 nl = nl_socket_alloc();
1394 if (!nl) {
1395 ERROR(jail->pakfire, "Could not allocate a netlink socket: %m\n");
1396 r = 1;
1397 goto ERROR;
1398 }
1399
1400 // Connect the socket
1401 r = nl_connect(nl, NETLINK_ROUTE);
1402 if (r) {
1403 ERROR(jail->pakfire, "Could not connect netlink socket: %s\n", nl_geterror(r));
1404 goto ERROR;
1405 }
1406
1407 // Allocate the netlink cache
1408 r = rtnl_link_alloc_cache(nl, AF_UNSPEC, &cache);
1409 if (r < 0) {
1410 ERROR(jail->pakfire, "Unable to allocate netlink cache: %s\n", nl_geterror(r));
1411 goto ERROR;
1412 }
1413
1414 // Fetch loopback interface
1415 link = rtnl_link_get_by_name(cache, "lo");
1416 if (!link) {
1417 ERROR(jail->pakfire, "Could not find lo interface. Ignoring.\n");
1418 r = 0;
1419 goto ERROR;
1420 }
1421
1422 // Allocate a new link
1423 change = rtnl_link_alloc();
1424 if (!change) {
1425 ERROR(jail->pakfire, "Could not allocate change link\n");
1426 r = 1;
1427 goto ERROR;
1428 }
1429
1430 // Set the link to UP
1431 rtnl_link_set_flags(change, IFF_UP);
1432
1433 // Apply any changes
1434 r = rtnl_link_change(nl, link, change, 0);
1435 if (r) {
1436 ERROR(jail->pakfire, "Unable to activate loopback: %s\n", nl_geterror(r));
1437 goto ERROR;
1438 }
1439
1440 // Success
1441 r = 0;
1442
1443 ERROR:
1444 if (nl)
1445 nl_socket_free(nl);
1446
1447 return r;
1448 }
1449
1450 // UID/GID Mapping
1451
1452 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
1453 char path[PATH_MAX];
1454 int r;
1455
1456 // Skip mapping anything when running on /
1457 if (pakfire_on_root(jail->pakfire))
1458 return 0;
1459
1460 // Make path
1461 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
1462 if (r)
1463 return r;
1464
1465 // Fetch UID
1466 const uid_t uid = pakfire_uid(jail->pakfire);
1467
1468 // Fetch SUBUID
1469 const struct pakfire_subid* subuid = pakfire_subuid(jail->pakfire);
1470 if (!subuid)
1471 return 1;
1472
1473 /* When running as root, we will map the entire range.
1474
1475 When running as a non-privileged user, we will map the root user inside the jail
1476 to the user's UID outside of the jail, and we will map the rest starting from one.
1477 */
1478
1479 // Running as root
1480 if (uid == 0) {
1481 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1482 "0 %lu %lu\n", subuid->id, subuid->length);
1483 } else {
1484 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1485 "0 %lu 1\n1 %lu %lu\n", uid, subuid->id, subuid->length);
1486 }
1487
1488 if (r) {
1489 ERROR(jail->pakfire, "Could not map UIDs: %m\n");
1490 return r;
1491 }
1492
1493 return r;
1494 }
1495
1496 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
1497 char path[PATH_MAX];
1498 int r;
1499
1500 // Skip mapping anything when running on /
1501 if (pakfire_on_root(jail->pakfire))
1502 return 0;
1503
1504 // Fetch GID
1505 const gid_t gid = pakfire_gid(jail->pakfire);
1506
1507 // Fetch SUBGID
1508 const struct pakfire_subid* subgid = pakfire_subgid(jail->pakfire);
1509 if (!subgid)
1510 return 1;
1511
1512 // Make path
1513 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
1514 if (r)
1515 return r;
1516
1517 // Running as root
1518 if (gid == 0) {
1519 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1520 "0 %lu %lu\n", subgid->id, subgid->length);
1521 } else {
1522 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1523 "0 %lu 1\n%1 %lu %lu\n", gid, subgid->id, subgid->length);
1524 }
1525
1526 if (r) {
1527 ERROR(jail->pakfire, "Could not map GIDs: %m\n");
1528 return r;
1529 }
1530
1531 return r;
1532 }
1533
1534 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
1535 char path[PATH_MAX];
1536 int r = 1;
1537
1538 // Make path
1539 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
1540 if (r)
1541 return r;
1542
1543 // Open file for writing
1544 FILE* f = fopen(path, "w");
1545 if (!f) {
1546 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
1547 goto ERROR;
1548 }
1549
1550 // Write content
1551 int bytes_written = fprintf(f, "deny\n");
1552 if (bytes_written <= 0) {
1553 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
1554 goto ERROR;
1555 }
1556
1557 r = fclose(f);
1558 f = NULL;
1559 if (r) {
1560 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
1561 goto ERROR;
1562 }
1563
1564 ERROR:
1565 if (f)
1566 fclose(f);
1567
1568 return r;
1569 }
1570
1571 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1572 const uint64_t val = 1;
1573 int r = 0;
1574
1575 DEBUG(jail->pakfire, "Sending signal...\n");
1576
1577 // Write to the file descriptor
1578 ssize_t bytes_written = write(fd, &val, sizeof(val));
1579 if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
1580 ERROR(jail->pakfire, "Could not send signal: %m\n");
1581 r = 1;
1582 }
1583
1584 // Close the file descriptor
1585 close(fd);
1586
1587 return r;
1588 }
1589
1590 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1591 uint64_t val = 0;
1592 int r = 0;
1593
1594 DEBUG(jail->pakfire, "Waiting for signal...\n");
1595
1596 ssize_t bytes_read = read(fd, &val, sizeof(val));
1597 if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
1598 ERROR(jail->pakfire, "Error waiting for signal: %m\n");
1599 r = 1;
1600 }
1601
1602 // Close the file descriptor
1603 close(fd);
1604
1605 return r;
1606 }
1607
1608 /*
1609 Performs the initialisation that needs to happen in the parent part
1610 */
1611 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1612 int r;
1613
1614 // Setup UID mapping
1615 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
1616 if (r)
1617 return r;
1618
1619 // Write "deny" to /proc/PID/setgroups
1620 r = pakfire_jail_setgroups(jail, ctx->pid);
1621 if (r)
1622 return r;
1623
1624 // Setup GID mapping
1625 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
1626 if (r)
1627 return r;
1628
1629 // Parent has finished initialisation
1630 DEBUG(jail->pakfire, "Parent has finished initialization\n");
1631
1632 // Send signal to client
1633 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1634 if (r)
1635 return r;
1636
1637 return 0;
1638 }
1639
1640 static int pakfire_jail_switch_root(struct pakfire_jail* jail, const char* root) {
1641 int r;
1642
1643 // Change to the new root
1644 r = chdir(root);
1645 if (r) {
1646 ERROR(jail->pakfire, "chdir(%s) failed: %m\n", root);
1647 return r;
1648 }
1649
1650 // Switch Root!
1651 r = pivot_root(".", ".");
1652 if (r) {
1653 ERROR(jail->pakfire, "Failed changing into the new root directory %s: %m\n", root);
1654 return r;
1655 }
1656
1657 // Umount the old root
1658 r = umount2(".", MNT_DETACH);
1659 if (r) {
1660 ERROR(jail->pakfire, "Could not umount the old root filesystem: %m\n");
1661 return r;
1662 }
1663
1664 return 0;
1665 }
1666
1667 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
1668 const char* argv[]) {
1669 int r;
1670
1671 // Redirect any logging to our log pipe
1672 pakfire_ctx_set_log_callback(jail->ctx, pakfire_jail_log_redirect, &ctx->pipes);
1673
1674 // Fetch my own PID
1675 pid_t pid = getpid();
1676
1677 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
1678
1679 // Wait for the parent to finish initialization
1680 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1681 if (r)
1682 return r;
1683
1684 // Die with parent
1685 r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
1686 if (r) {
1687 ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
1688 return 126;
1689 }
1690
1691 // Make this process dumpable
1692 r = prctl (PR_SET_DUMPABLE, 1, 0, 0, 0);
1693 if (r) {
1694 ERROR(jail->pakfire, "Could not make the process dumpable: %m\n");
1695 return 126;
1696 }
1697
1698 // Don't drop any capabilities on setuid()
1699 r = prctl(PR_SET_KEEPCAPS, 1);
1700 if (r) {
1701 ERROR(jail->pakfire, "Could not set PR_SET_KEEPCAPS: %m\n");
1702 return 126;
1703 }
1704
1705 // Fetch UID/GID
1706 uid_t uid = getuid();
1707 gid_t gid = getgid();
1708
1709 // Fetch EUID/EGID
1710 uid_t euid = geteuid();
1711 gid_t egid = getegid();
1712
1713 DEBUG(jail->pakfire, " UID: %u (effective %u)\n", uid, euid);
1714 DEBUG(jail->pakfire, " GID: %u (effective %u)\n", gid, egid);
1715
1716 // Check if we are (effectively running as root)
1717 if (uid || gid || euid || egid) {
1718 ERROR(jail->pakfire, "Child process is not running as root\n");
1719 return 126;
1720 }
1721
1722 const char* root = pakfire_get_path(jail->pakfire);
1723 const char* arch = pakfire_get_effective_arch(jail->pakfire);
1724
1725 // Change mount propagation to slave to receive anything from the parent namespace
1726 r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SLAVE);
1727 if (r)
1728 return r;
1729
1730 // Make root a mountpoint in the new mount namespace
1731 r = pakfire_mount_make_mounpoint(jail->pakfire, root);
1732 if (r)
1733 return r;
1734
1735 // Change mount propagation to private
1736 r = pakfire_mount_change_propagation(jail->ctx, root, MS_PRIVATE);
1737 if (r)
1738 return r;
1739
1740 // Change root (unless root is /)
1741 if (!pakfire_on_root(jail->pakfire)) {
1742 // Mount everything
1743 r = pakfire_jail_mount(jail, ctx);
1744 if (r)
1745 return r;
1746
1747 // chroot()
1748 r = pakfire_jail_switch_root(jail, root);
1749 if (r)
1750 return r;
1751 }
1752
1753 // Set personality
1754 unsigned long persona = pakfire_arch_personality(arch);
1755 if (persona) {
1756 r = personality(persona);
1757 if (r < 0) {
1758 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1759 return 1;
1760 }
1761 }
1762
1763 // Setup networking
1764 if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1765 r = pakfire_jail_setup_loopback(jail);
1766 if (r)
1767 return 1;
1768 }
1769
1770 // Set nice level
1771 if (jail->nice) {
1772 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1773
1774 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1775 if (r) {
1776 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1777 return 1;
1778 }
1779 }
1780
1781 // Close other end of log pipes
1782 close(ctx->pipes.log_INFO[0]);
1783 close(ctx->pipes.log_ERROR[0]);
1784 #ifdef ENABLE_DEBUG
1785 close(ctx->pipes.log_DEBUG[0]);
1786 #endif /* ENABLE_DEBUG */
1787
1788 // Connect standard input
1789 if (ctx->pipes.stdin[0] >= 0) {
1790 r = dup2(ctx->pipes.stdin[0], STDIN_FILENO);
1791 if (r < 0) {
1792 ERROR(jail->pakfire, "Could not connect fd %d to stdin: %m\n",
1793 ctx->pipes.stdin[0]);
1794
1795 return 1;
1796 }
1797 }
1798
1799 // Connect standard output and error
1800 if (ctx->pipes.stdout[1] >= 0 && ctx->pipes.stderr[1] >= 0) {
1801 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1802 if (r < 0) {
1803 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1804 ctx->pipes.stdout[1]);
1805
1806 return 1;
1807 }
1808
1809 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1810 if (r < 0) {
1811 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1812 ctx->pipes.stderr[1]);
1813
1814 return 1;
1815 }
1816
1817 // Close the pipe (as we have moved the original file descriptors)
1818 pakfire_jail_close_pipe(jail, ctx->pipes.stdin);
1819 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1820 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
1821 }
1822
1823 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1824 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1825 if (r)
1826 return r;
1827
1828 // Set capabilities
1829 r = pakfire_jail_set_capabilities(jail);
1830 if (r)
1831 return r;
1832
1833 // Show capabilities
1834 r = pakfire_jail_show_capabilities(jail);
1835 if (r)
1836 return r;
1837
1838 // Filter syscalls
1839 r = pakfire_jail_limit_syscalls(jail);
1840 if (r)
1841 return r;
1842
1843 DEBUG(jail->pakfire, "Child process initialization done\n");
1844 DEBUG(jail->pakfire, "Launching command:\n");
1845
1846 // Log argv
1847 for (unsigned int i = 0; argv[i]; i++)
1848 DEBUG(jail->pakfire, " argv[%u] = %s\n", i, argv[i]);
1849
1850 // exec() command
1851 r = execvpe(argv[0], (char**)argv, jail->env);
1852 if (r < 0) {
1853 // Translate errno into regular exit code
1854 switch (errno) {
1855 case ENOENT:
1856 // Ignore if the command doesn't exist
1857 if (ctx->flags & PAKFIRE_JAIL_NOENT_OK)
1858 r = 0;
1859 else
1860 r = 127;
1861
1862 break;
1863
1864 default:
1865 r = 1;
1866 }
1867
1868 ERROR(jail->pakfire, "Could not execve(%s): %m\n", argv[0]);
1869 }
1870
1871 // We should not get here
1872 return r;
1873 }
1874
1875 // Run a command in the jail
1876 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
1877 const int interactive,
1878 pakfire_jail_communicate_in communicate_in,
1879 pakfire_jail_communicate_out communicate_out,
1880 void* data, int flags) {
1881 int exit = -1;
1882 int r;
1883
1884 // Check if argv is valid
1885 if (!argv || !argv[0]) {
1886 errno = EINVAL;
1887 return -1;
1888 }
1889
1890 // Initialize context for this call
1891 struct pakfire_jail_exec ctx = {
1892 .flags = flags,
1893
1894 .pipes = {
1895 .stdin = { -1, -1 },
1896 .stdout = { -1, -1 },
1897 .stderr = { -1, -1 },
1898 .log_INFO = { -1, -1 },
1899 .log_ERROR = { -1, -1 },
1900 #ifdef ENABLE_DEBUG
1901 .log_DEBUG = { -1, -1 },
1902 #endif /* ENABLE_DEBUG */
1903 },
1904
1905 .communicate = {
1906 .in = communicate_in,
1907 .out = communicate_out,
1908 .data = data,
1909 },
1910
1911 .pidfd = -1,
1912 };
1913
1914 DEBUG(jail->pakfire, "Executing jail...\n");
1915
1916 // Enable networking in interactive mode
1917 if (interactive)
1918 ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
1919
1920 /*
1921 Setup a file descriptor which can be used to notify the client that the parent
1922 has completed configuration.
1923 */
1924 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1925 if (ctx.completed_fd < 0) {
1926 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1927 return -1;
1928 }
1929
1930 // Create pipes to communicate with child process if we are not running interactively
1931 if (!interactive) {
1932 // stdin (only if callback is set)
1933 if (ctx.communicate.in) {
1934 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdin, 0);
1935 if (r)
1936 goto ERROR;
1937 }
1938
1939 // stdout
1940 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
1941 if (r)
1942 goto ERROR;
1943
1944 // stderr
1945 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
1946 if (r)
1947 goto ERROR;
1948 }
1949
1950 // Setup pipes for logging
1951 // INFO
1952 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
1953 if (r)
1954 goto ERROR;
1955
1956 // ERROR
1957 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
1958 if (r)
1959 goto ERROR;
1960
1961 #ifdef ENABLE_DEBUG
1962 // DEBUG
1963 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
1964 if (r)
1965 goto ERROR;
1966 #endif /* ENABLE_DEBUG */
1967
1968 // Configure child process
1969 struct clone_args args = {
1970 .flags =
1971 CLONE_NEWCGROUP |
1972 CLONE_NEWIPC |
1973 CLONE_NEWNS |
1974 CLONE_NEWPID |
1975 CLONE_NEWTIME |
1976 CLONE_NEWUSER |
1977 CLONE_NEWUTS |
1978 CLONE_PIDFD,
1979 .exit_signal = SIGCHLD,
1980 .pidfd = (long long unsigned int)&ctx.pidfd,
1981 };
1982
1983 // Launch the process in a cgroup that is a leaf of the configured cgroup
1984 if (jail->cgroup) {
1985 args.flags |= CLONE_INTO_CGROUP;
1986
1987 // Fetch our UUID
1988 const char* uuid = pakfire_jail_uuid(jail);
1989
1990 // Create a temporary cgroup
1991 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, uuid, 0);
1992 if (r) {
1993 ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
1994 goto ERROR;
1995 }
1996
1997 // Clone into this cgroup
1998 args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
1999 }
2000
2001 // Setup networking
2002 if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
2003 args.flags |= CLONE_NEWNET;
2004 }
2005
2006 // Fork this process
2007 ctx.pid = clone3(&args, sizeof(args));
2008 if (ctx.pid < 0) {
2009 ERROR(jail->pakfire, "Could not clone: %m\n");
2010 return -1;
2011
2012 // Child process
2013 } else if (ctx.pid == 0) {
2014 r = pakfire_jail_child(jail, &ctx, argv);
2015 _exit(r);
2016 }
2017
2018 // Parent process
2019 r = pakfire_jail_parent(jail, &ctx);
2020 if (r)
2021 goto ERROR;
2022
2023 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
2024
2025 // Read output of the child process
2026 r = pakfire_jail_wait(jail, &ctx);
2027 if (r)
2028 goto ERROR;
2029
2030 // Handle exit status
2031 switch (ctx.status.si_code) {
2032 case CLD_EXITED:
2033 DEBUG(jail->pakfire, "The child process exited with code %d\n",
2034 ctx.status.si_status);
2035
2036 // Pass exit code
2037 exit = ctx.status.si_status;
2038 break;
2039
2040 case CLD_KILLED:
2041 ERROR(jail->pakfire, "The child process was killed\n");
2042 exit = 139;
2043 break;
2044
2045 case CLD_DUMPED:
2046 ERROR(jail->pakfire, "The child process terminated abnormally\n");
2047 break;
2048
2049 // Log anything else
2050 default:
2051 ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
2052 break;
2053 }
2054
2055 ERROR:
2056 // Destroy the temporary cgroup (if any)
2057 if (ctx.cgroup) {
2058 // Read cgroup stats
2059 r = pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
2060 if (r) {
2061 ERROR(jail->pakfire, "Could not read cgroup stats: %m\n");
2062 } else {
2063 pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
2064 }
2065
2066 pakfire_cgroup_destroy(ctx.cgroup);
2067 pakfire_cgroup_unref(ctx.cgroup);
2068 }
2069
2070 // Close any file descriptors
2071 pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
2072 pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
2073 pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
2074 if (ctx.pidfd >= 0)
2075 close(ctx.pidfd);
2076 pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
2077 pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
2078 #ifdef ENABLE_DEBUG
2079 pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
2080 #endif /* ENABLE_DEBUG */
2081
2082 return exit;
2083 }
2084
2085 PAKFIRE_EXPORT int pakfire_jail_exec(
2086 struct pakfire_jail* jail,
2087 const char* argv[],
2088 pakfire_jail_communicate_in callback_in,
2089 pakfire_jail_communicate_out callback_out,
2090 void* data, int flags) {
2091 return __pakfire_jail_exec(jail, argv, 0, callback_in, callback_out, data, flags);
2092 }
2093
2094 static int pakfire_jail_exec_interactive(
2095 struct pakfire_jail* jail, const char* argv[], int flags) {
2096 int r;
2097
2098 // Setup interactive stuff
2099 r = pakfire_jail_setup_interactive_env(jail);
2100 if (r)
2101 return r;
2102
2103 return __pakfire_jail_exec(jail, argv, 1, NULL, NULL, NULL, flags);
2104 }
2105
2106 int pakfire_jail_exec_script(struct pakfire_jail* jail,
2107 const char* script,
2108 const size_t size,
2109 const char* args[],
2110 pakfire_jail_communicate_in callback_in,
2111 pakfire_jail_communicate_out callback_out,
2112 void* data) {
2113 char path[PATH_MAX];
2114 const char** argv = NULL;
2115 FILE* f = NULL;
2116 int r;
2117
2118 const char* root = pakfire_get_path(jail->pakfire);
2119
2120 // Write the scriptlet to disk
2121 r = pakfire_path_append(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
2122 if (r)
2123 goto ERROR;
2124
2125 // Create a temporary file
2126 f = pakfire_mktemp(path, 0700);
2127 if (!f) {
2128 ERROR(jail->pakfire, "Could not create temporary file: %m\n");
2129 goto ERROR;
2130 }
2131
2132 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
2133
2134 // Write data
2135 r = fprintf(f, "%s", script);
2136 if (r < 0) {
2137 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
2138 goto ERROR;
2139 }
2140
2141 // Close file
2142 r = fclose(f);
2143 if (r) {
2144 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
2145 goto ERROR;
2146 }
2147
2148 f = NULL;
2149
2150 // Count how many arguments were passed
2151 unsigned int argc = 1;
2152 if (args) {
2153 for (const char** arg = args; *arg; arg++)
2154 argc++;
2155 }
2156
2157 argv = calloc(argc + 1, sizeof(*argv));
2158 if (!argv) {
2159 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
2160 goto ERROR;
2161 }
2162
2163 // Set command
2164 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
2165
2166 // Copy args
2167 for (unsigned int i = 1; i < argc; i++)
2168 argv[i] = args[i-1];
2169
2170 // Run the script
2171 r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data, 0);
2172
2173 ERROR:
2174 if (argv)
2175 free(argv);
2176 if (f)
2177 fclose(f);
2178
2179 // Remove script from disk
2180 if (*path)
2181 unlink(path);
2182
2183 return r;
2184 }
2185
2186 /*
2187 A convenience function that creates a new jail, runs the given command and destroys
2188 the jail again.
2189 */
2190 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
2191 struct pakfire_jail* jail = NULL;
2192 int r;
2193
2194 // Create a new jail
2195 r = pakfire_jail_create(&jail, pakfire);
2196 if (r)
2197 goto ERROR;
2198
2199 // Execute the command
2200 r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output, 0);
2201
2202 ERROR:
2203 if (jail)
2204 pakfire_jail_unref(jail);
2205
2206 return r;
2207 }
2208
2209 int pakfire_jail_run_script(struct pakfire* pakfire,
2210 const char* script, const size_t length, const char* argv[], int flags) {
2211 struct pakfire_jail* jail = NULL;
2212 int r;
2213
2214 // Create a new jail
2215 r = pakfire_jail_create(&jail, pakfire);
2216 if (r)
2217 goto ERROR;
2218
2219 // Execute the command
2220 r = pakfire_jail_exec_script(jail, script, length, argv, NULL, NULL, NULL);
2221
2222 ERROR:
2223 if (jail)
2224 pakfire_jail_unref(jail);
2225
2226 return r;
2227 }
2228
2229 int pakfire_jail_shell(struct pakfire_jail* jail) {
2230 int r;
2231
2232 const char* argv[] = {
2233 "/bin/bash", "--login", NULL,
2234 };
2235
2236 // Execute /bin/bash
2237 r = pakfire_jail_exec_interactive(jail, argv, 0);
2238
2239 // Raise any errors
2240 if (r < 0)
2241 return r;
2242
2243 // Ignore any return codes from the shell
2244 return 0;
2245 }
2246
2247 static int pakfire_jail_run_if_possible(struct pakfire* pakfire, const char** argv) {
2248 char path[PATH_MAX];
2249 int r;
2250
2251 r = pakfire_path(pakfire, path, "%s", *argv);
2252 if (r)
2253 return r;
2254
2255 // Check if the file is executable
2256 r = access(path, X_OK);
2257 if (r) {
2258 DEBUG(pakfire, "%s is not executable. Skipping...\n", *argv);
2259 return 0;
2260 }
2261
2262 return pakfire_jail_run(pakfire, argv, 0, NULL);
2263 }
2264
2265 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
2266 const char* argv[] = {
2267 "/sbin/ldconfig",
2268 NULL,
2269 };
2270
2271 return pakfire_jail_run_if_possible(pakfire, argv);
2272 }
2273
2274 int pakfire_jail_run_systemd_tmpfiles(struct pakfire* pakfire) {
2275 const char* argv[] = {
2276 "/usr/bin/systemd-tmpfiles",
2277 "--create",
2278 NULL,
2279 };
2280
2281 return pakfire_jail_run_if_possible(pakfire, argv);
2282 }