]> git.ipfire.org Git - pakfire.git/blob - src/libpakfire/jail.c
jail: Use pivot_root() again instead of chroot()
[pakfire.git] / src / libpakfire / jail.c
1 /*#############################################################################
2 # #
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
5 # #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
10 # #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
15 # #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
18 # #
19 #############################################################################*/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <linux/capability.h>
24 #include <linux/sched.h>
25 #include <sys/wait.h>
26 #include <linux/wait.h>
27 #include <sched.h>
28 #include <signal.h>
29 #include <stdlib.h>
30 #include <syscall.h>
31 #include <sys/capability.h>
32 #include <sys/epoll.h>
33 #include <sys/eventfd.h>
34 #include <sys/mount.h>
35 #include <sys/personality.h>
36 #include <sys/prctl.h>
37 #include <sys/resource.h>
38 #include <sys/signalfd.h>
39 #include <sys/timerfd.h>
40 #include <sys/types.h>
41 #include <sys/wait.h>
42
43 // libnl3
44 #include <net/if.h>
45 #include <netlink/route/link.h>
46
47 // libseccomp
48 #include <seccomp.h>
49
50 // libuuid
51 #include <uuid.h>
52
53 #include <pakfire/arch.h>
54 #include <pakfire/cgroup.h>
55 #include <pakfire/jail.h>
56 #include <pakfire/logging.h>
57 #include <pakfire/mount.h>
58 #include <pakfire/pakfire.h>
59 #include <pakfire/private.h>
60 #include <pakfire/pwd.h>
61 #include <pakfire/string.h>
62 #include <pakfire/util.h>
63
64 #define BUFFER_SIZE 1024 * 64
65 #define ENVIRON_SIZE 128
66 #define EPOLL_MAX_EVENTS 2
67 #define MAX_MOUNTPOINTS 8
68
69 // The default environment that will be set for every command
70 static const struct environ {
71 const char* key;
72 const char* val;
73 } ENV[] = {
74 { "HOME", "/root" },
75 { "LANG", "C.utf-8" },
76 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
77 { "TERM", "vt100" },
78
79 // Tell everything that it is running inside a Pakfire container
80 { "container", "pakfire" },
81 { NULL, NULL },
82 };
83
84 struct pakfire_jail_mountpoint {
85 char source[PATH_MAX];
86 char target[PATH_MAX];
87 int flags;
88 };
89
90 struct pakfire_jail {
91 struct pakfire* pakfire;
92 int nrefs;
93
94 // A unique ID for each jail
95 uuid_t uuid;
96 char __uuid[UUID_STR_LEN];
97
98 // Resource Limits
99 int nice;
100
101 // Timeout
102 struct itimerspec timeout;
103
104 // CGroup
105 struct pakfire_cgroup* cgroup;
106
107 // Environment
108 char* env[ENVIRON_SIZE];
109
110 // Mountpoints
111 struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
112 unsigned int num_mountpoints;
113 };
114
115 struct pakfire_log_buffer {
116 char data[BUFFER_SIZE];
117 size_t used;
118 };
119
120 struct pakfire_jail_exec {
121 int flags;
122
123 // PID (of the child)
124 pid_t pid;
125 int pidfd;
126
127 // Process status (from waitid)
128 siginfo_t status;
129
130 // FD to notify the client that the parent has finished initialization
131 int completed_fd;
132
133 // Log pipes
134 struct pakfire_jail_pipes {
135 int stdin[2];
136 int stdout[2];
137 int stderr[2];
138
139 // Logging
140 int log_INFO[2];
141 int log_ERROR[2];
142 int log_DEBUG[2];
143 } pipes;
144
145 // Communicate
146 struct pakfire_jail_communicate {
147 pakfire_jail_communicate_in in;
148 pakfire_jail_communicate_out out;
149 void* data;
150 } communicate;
151
152 // Log buffers
153 struct pakfire_jail_buffers {
154 struct pakfire_log_buffer stdout;
155 struct pakfire_log_buffer stderr;
156
157 // Logging
158 struct pakfire_log_buffer log_INFO;
159 struct pakfire_log_buffer log_ERROR;
160 struct pakfire_log_buffer log_DEBUG;
161 } buffers;
162
163 struct pakfire_cgroup* cgroup;
164 struct pakfire_cgroup_stats cgroup_stats;
165 };
166
167 static int clone3(struct clone_args* args, size_t size) {
168 return syscall(__NR_clone3, args, size);
169 }
170
171 static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
172 return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
173 }
174
175 static int pivot_root(const char* new_root, const char* old_root) {
176 return syscall(SYS_pivot_root, new_root, old_root);
177 }
178
179 static int pakfire_jail_exec_has_flag(
180 const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
181 return ctx->flags & flag;
182 }
183
184 static void pakfire_jail_free(struct pakfire_jail* jail) {
185 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
186
187 // Free environment
188 for (unsigned int i = 0; jail->env[i]; i++)
189 free(jail->env[i]);
190
191 if (jail->cgroup)
192 pakfire_cgroup_unref(jail->cgroup);
193
194 pakfire_unref(jail->pakfire);
195 free(jail);
196 }
197
198 /*
199 Passes any log messages on to the default pakfire log callback
200 */
201 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
202 int priority, const char* line, size_t length) {
203 switch (priority) {
204 case LOG_INFO:
205 INFO(pakfire, "%s", line);
206 break;
207
208 case LOG_ERR:
209 ERROR(pakfire, "%s", line);
210 break;
211
212 #ifdef ENABLE_DEBUG
213 case LOG_DEBUG:
214 DEBUG(pakfire, "%s", line);
215 break;
216 #endif
217 }
218
219 return 0;
220 }
221
222 static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
223 if (!*jail->__uuid)
224 uuid_unparse_lower(jail->uuid, jail->__uuid);
225
226 return jail->__uuid;
227 }
228
229 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
230 // Set PS1
231 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
232 if (r)
233 return r;
234
235 // Copy TERM
236 char* TERM = secure_getenv("TERM");
237 if (TERM) {
238 r = pakfire_jail_set_env(jail, "TERM", TERM);
239 if (r)
240 return r;
241 }
242
243 // Copy LANG
244 char* LANG = secure_getenv("LANG");
245 if (LANG) {
246 r = pakfire_jail_set_env(jail, "LANG", LANG);
247 if (r)
248 return r;
249 }
250
251 return 0;
252 }
253
254 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail, struct pakfire* pakfire) {
255 int r;
256
257 const char* arch = pakfire_get_arch(pakfire);
258
259 // Allocate a new jail
260 struct pakfire_jail* j = calloc(1, sizeof(*j));
261 if (!j)
262 return 1;
263
264 // Reference Pakfire
265 j->pakfire = pakfire_ref(pakfire);
266
267 // Initialize reference counter
268 j->nrefs = 1;
269
270 // Generate a random UUID
271 uuid_generate_random(j->uuid);
272
273 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
274
275 // Set default environment
276 for (const struct environ* e = ENV; e->key; e++) {
277 r = pakfire_jail_set_env(j, e->key, e->val);
278 if (r)
279 goto ERROR;
280 }
281
282 // Enable all CPU features that CPU has to offer
283 if (!pakfire_arch_supported_by_host(arch)) {
284 r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
285 if (r)
286 goto ERROR;
287 }
288
289 // Set container UUID
290 r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
291 if (r)
292 goto ERROR;
293
294 // Disable systemctl to talk to systemd
295 if (!pakfire_on_root(j->pakfire)) {
296 r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
297 if (r)
298 goto ERROR;
299 }
300
301 // Done
302 *jail = j;
303 return 0;
304
305 ERROR:
306 pakfire_jail_free(j);
307
308 return r;
309 }
310
311 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
312 ++jail->nrefs;
313
314 return jail;
315 }
316
317 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
318 if (--jail->nrefs > 0)
319 return jail;
320
321 pakfire_jail_free(jail);
322 return NULL;
323 }
324
325 // Resource Limits
326
327 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
328 // Check if nice level is in range
329 if (nice < -19 || nice > 20) {
330 errno = EINVAL;
331 return 1;
332 }
333
334 // Store nice level
335 jail->nice = nice;
336
337 return 0;
338 }
339
340 int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
341 // Free any previous cgroup
342 if (jail->cgroup) {
343 pakfire_cgroup_unref(jail->cgroup);
344 jail->cgroup = NULL;
345 }
346
347 // Set any new cgroup
348 if (cgroup) {
349 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
350
351 jail->cgroup = pakfire_cgroup_ref(cgroup);
352 }
353
354 // Done
355 return 0;
356 }
357
358 // Environment
359
360 // Returns the length of the environment
361 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
362 unsigned int i = 0;
363
364 // Count everything in the environment
365 for (char** e = jail->env; *e; e++)
366 i++;
367
368 return i;
369 }
370
371 // Finds an existing environment variable and returns its index or -1 if not found
372 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
373 if (!key) {
374 errno = EINVAL;
375 return -1;
376 }
377
378 const size_t length = strlen(key);
379
380 for (unsigned int i = 0; jail->env[i]; i++) {
381 if ((pakfire_string_startswith(jail->env[i], key)
382 && *(jail->env[i] + length) == '=')) {
383 return i;
384 }
385 }
386
387 // Nothing found
388 return -1;
389 }
390
391 // Returns the value of an environment variable or NULL
392 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
393 const char* key) {
394 int i = pakfire_jail_find_env(jail, key);
395 if (i < 0)
396 return NULL;
397
398 return jail->env[i] + strlen(key) + 1;
399 }
400
401 // Sets an environment variable
402 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
403 const char* key, const char* value) {
404 // Find the index where to write this value to
405 int i = pakfire_jail_find_env(jail, key);
406 if (i < 0)
407 i = pakfire_jail_env_length(jail);
408
409 // Return -ENOSPC when the environment is full
410 if (i >= ENVIRON_SIZE) {
411 errno = ENOSPC;
412 return -1;
413 }
414
415 // Free any previous value
416 if (jail->env[i])
417 free(jail->env[i]);
418
419 // Format and set environment variable
420 asprintf(&jail->env[i], "%s=%s", key, value);
421
422 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
423
424 return 0;
425 }
426
427 // Imports an environment
428 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
429 if (!env)
430 return 0;
431
432 char* key;
433 char* val;
434 int r;
435
436 // Copy environment variables
437 for (unsigned int i = 0; env[i]; i++) {
438 r = pakfire_string_partition(env[i], "=", &key, &val);
439 if (r)
440 continue;
441
442 // Set value
443 r = pakfire_jail_set_env(jail, key, val);
444
445 if (key)
446 free(key);
447 if (val)
448 free(val);
449
450 // Break on error
451 if (r)
452 return r;
453 }
454
455 return 0;
456 }
457
458 // Timeout
459
460 PAKFIRE_EXPORT int pakfire_jail_set_timeout(
461 struct pakfire_jail* jail, unsigned int timeout) {
462 // Store value
463 jail->timeout.it_value.tv_sec = timeout;
464
465 if (timeout > 0)
466 DEBUG(jail->pakfire, "Timeout set to %d second(s)\n", timeout);
467 else
468 DEBUG(jail->pakfire, "Timeout disabled\n");
469
470 return 0;
471 }
472
473 static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
474 int r;
475
476 // Nothing to do if no timeout has been set
477 if (!jail->timeout.it_value.tv_sec)
478 return -1;
479
480 // Create a new timer
481 const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
482 if (fd < 0) {
483 ERROR(jail->pakfire, "Could not create timer: %m\n");
484 goto ERROR;
485 }
486
487 // Arm timer
488 r = timerfd_settime(fd, 0, &jail->timeout, NULL);
489 if (r) {
490 ERROR(jail->pakfire, "Could not arm timer: %m\n");
491 goto ERROR;
492 }
493
494 return fd;
495
496 ERROR:
497 if (fd > 0)
498 close(fd);
499
500 return -1;
501 }
502
503 // Signals
504
505 static int pakfire_jail_handle_signals(struct pakfire_jail* jail) {
506 sigset_t mask;
507 int r;
508
509 sigemptyset(&mask);
510 sigaddset(&mask, SIGINT);
511
512 // Block signals
513 r = sigprocmask(SIG_BLOCK, &mask, NULL);
514 if (r < 0) {
515 ERROR(jail->pakfire, "Failed to block signals: %m\n");
516 return r;
517 }
518
519 // Create a file descriptor
520 r = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
521 if (r < 0) {
522 ERROR(jail->pakfire, "Failed to create signalfd: %m\n");
523 return r;
524 }
525
526 return r;
527 }
528
529 /*
530 This function replaces any logging in the child process.
531
532 All log messages will be sent to the parent process through their respective pipes.
533 */
534 static void pakfire_jail_log(void* data, int priority, const char* file,
535 int line, const char* fn, const char* format, va_list args) {
536 struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
537 int fd;
538
539 switch (priority) {
540 case LOG_INFO:
541 fd = pipes->log_INFO[1];
542 break;
543
544 case LOG_ERR:
545 fd = pipes->log_ERROR[1];
546 break;
547
548 #ifdef ENABLE_DEBUG
549 case LOG_DEBUG:
550 fd = pipes->log_DEBUG[1];
551 break;
552 #endif /* ENABLE_DEBUG */
553
554 // Ignore any messages of an unknown priority
555 default:
556 return;
557 }
558
559 // Send the log message
560 if (fd)
561 vdprintf(fd, format, args);
562 }
563
564 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
565 return (sizeof(buffer->data) == buffer->used);
566 }
567
568 /*
569 This function reads as much data as it can from the file descriptor.
570 If it finds a whole line in it, it will send it to the logger and repeat the process.
571 If not newline character is found, it will try to read more data until it finds one.
572 */
573 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
574 struct pakfire_jail_exec* ctx, int priority, int fd,
575 struct pakfire_log_buffer* buffer, pakfire_jail_communicate_out callback, void* data) {
576 char line[BUFFER_SIZE + 1];
577
578 // Fill up buffer from fd
579 if (buffer->used < sizeof(buffer->data)) {
580 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
581 sizeof(buffer->data) - buffer->used);
582
583 // Handle errors
584 if (bytes_read < 0) {
585 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
586 return -1;
587 }
588
589 // Update buffer size
590 buffer->used += bytes_read;
591 }
592
593 // See if we have any lines that we can write
594 while (buffer->used) {
595 // Search for the end of the first line
596 char* eol = memchr(buffer->data, '\n', buffer->used);
597
598 // No newline found
599 if (!eol) {
600 // If the buffer is full, we send the content to the logger and try again
601 // This should not happen in practise
602 if (pakfire_jail_log_buffer_is_full(buffer)) {
603 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
604
605 eol = buffer->data + sizeof(buffer->data) - 1;
606
607 // Otherwise we might have only read parts of the output
608 } else
609 break;
610 }
611
612 // Find the length of the string
613 size_t length = eol - buffer->data + 1;
614
615 // Copy the line into the buffer
616 memcpy(line, buffer->data, length);
617
618 // Terminate the string
619 line[length] = '\0';
620
621 // Log the line
622 if (callback) {
623 int r = callback(jail->pakfire, data, priority, line, length);
624 if (r) {
625 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
626 return r;
627 }
628 }
629
630 // Remove line from buffer
631 memmove(buffer->data, buffer->data + length, buffer->used - length);
632 buffer->used -= length;
633 }
634
635 return 0;
636 }
637
638 static int pakfire_jail_stream_stdin(struct pakfire_jail* jail,
639 struct pakfire_jail_exec* ctx, const int fd) {
640 int r;
641
642 // Nothing to do if there is no stdin callback set
643 if (!ctx->communicate.in) {
644 DEBUG(jail->pakfire, "Callback for standard input is not set\n");
645 return 0;
646 }
647
648 // Skip if the writing pipe has already been closed
649 if (!ctx->pipes.stdin[1])
650 return 0;
651
652 DEBUG(jail->pakfire, "Streaming standard input...\n");
653
654 // Calling the callback
655 r = ctx->communicate.in(jail->pakfire, ctx->communicate.data, fd);
656
657 DEBUG(jail->pakfire, "Standard input callback finished: %d\n", r);
658
659 // The callback signaled that it has written everything
660 if (r == EOF) {
661 DEBUG(jail->pakfire, "Closing standard input pipe\n");
662
663 // Close the file-descriptor
664 close(fd);
665
666 // Reset the file-descriptor so it won't be closed again later
667 ctx->pipes.stdin[1] = 0;
668
669 // Report success
670 r = 0;
671 }
672
673 return r;
674 }
675
676 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
677 int r = pipe2(*fds, flags);
678 if (r < 0) {
679 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
680 return 1;
681 }
682
683 return 0;
684 }
685
686 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
687 for (unsigned int i = 0; i < 2; i++)
688 if (fds[i])
689 close(fds[i]);
690 }
691
692 /*
693 This is a convenience function to fetch the reading end of a pipe and
694 closes the write end.
695 */
696 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail* jail, int (*fds)[2]) {
697 // Give the variables easier names to avoid confusion
698 int* fd_read = &(*fds)[0];
699 int* fd_write = &(*fds)[1];
700
701 // Close the write end of the pipe
702 if (*fd_write) {
703 close(*fd_write);
704 *fd_write = -1;
705 }
706
707 // Return the read end
708 return *fd_read;
709 }
710
711 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
712 // Give the variables easier names to avoid confusion
713 int* fd_read = &(*fds)[0];
714 int* fd_write = &(*fds)[1];
715
716 // Close the read end of the pipe
717 if (*fd_read) {
718 close(*fd_read);
719 *fd_read = -1;
720 }
721
722 // Return the write end
723 return *fd_write;
724 }
725
726 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
727 int epollfd = -1;
728 struct epoll_event ev;
729 struct epoll_event events[EPOLL_MAX_EVENTS];
730 struct signalfd_siginfo siginfo;
731 char garbage[8];
732 int r = 0;
733
734 // Fetch file descriptors from context
735 const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
736 const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
737 const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
738 const int pidfd = ctx->pidfd;
739
740 // Timer
741 const int timerfd = pakfire_jail_create_timer(jail);
742
743 // Logging
744 const int log_INFO = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
745 const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
746 const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
747
748 // Signals
749 const int signalfd = pakfire_jail_handle_signals(jail);
750
751 // Make a list of all file descriptors we are interested in
752 const int fds[] = {
753 stdin, stdout, stderr, pidfd, timerfd, signalfd, log_INFO, log_ERROR, log_DEBUG,
754 };
755
756 // Setup epoll
757 epollfd = epoll_create1(0);
758 if (epollfd < 0) {
759 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
760 r = 1;
761 goto ERROR;
762 }
763
764 // Turn file descriptors into non-blocking mode and add them to epoll()
765 for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
766 int fd = fds[i];
767
768 // Skip fds which were not initialized
769 if (fd < 0)
770 continue;
771
772 ev.events = EPOLLHUP;
773
774 if (fd == stdin)
775 ev.events |= EPOLLOUT;
776 else
777 ev.events |= EPOLLIN;
778
779 // Read flags
780 int flags = fcntl(fd, F_GETFL, 0);
781
782 // Set modified flags
783 if (fcntl(fd, F_SETFL, flags|O_NONBLOCK) < 0) {
784 ERROR(jail->pakfire,
785 "Could not set file descriptor %d into non-blocking mode: %m\n", fd);
786 r = 1;
787 goto ERROR;
788 }
789
790 ev.data.fd = fd;
791
792 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
793 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
794 r = 1;
795 goto ERROR;
796 }
797 }
798
799 int ended = 0;
800
801 // Loop for as long as the process is alive
802 while (!ended) {
803 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
804 if (num < 1) {
805 // Ignore if epoll_wait() has been interrupted
806 if (errno == EINTR)
807 continue;
808
809 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
810 r = 1;
811
812 goto ERROR;
813 }
814
815 for (int i = 0; i < num; i++) {
816 int e = events[i].events;
817 int fd = events[i].data.fd;
818
819 struct pakfire_log_buffer* buffer = NULL;
820 pakfire_jail_communicate_out callback = NULL;
821 void* data = NULL;
822 int priority;
823
824 // Check if there is any data to be read
825 if (e & EPOLLIN) {
826 // Handle any changes to the PIDFD
827 if (fd == pidfd) {
828 // Call waidid() and store the result
829 r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
830 if (r) {
831 ERROR(jail->pakfire, "waitid() failed: %m\n");
832 goto ERROR;
833 }
834
835 // Mark that we have ended so that we will process the remaining
836 // events from epoll() now, but won't restart the outer loop.
837 ended = 1;
838 continue;
839
840 // Handle timer events
841 } else if (fd == timerfd) {
842 DEBUG(jail->pakfire, "Timer event received\n");
843
844 // Disarm the timer
845 r = read(timerfd, garbage, sizeof(garbage));
846 if (r < 1) {
847 ERROR(jail->pakfire, "Could not disarm timer: %m\n");
848 r = 1;
849 goto ERROR;
850 }
851
852 // Terminate the process if it hasn't already ended
853 if (!ended) {
854 DEBUG(jail->pakfire, "Terminating process...\n");
855
856 // Send SIGTERM to the process
857 r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
858 if (r) {
859 ERROR(jail->pakfire, "Could not kill process: %m\n");
860 goto ERROR;
861 }
862 }
863
864 // There is nothing else to do
865 continue;
866
867 // Handle signals
868 } else if (fd == signalfd) {
869 // Read the signal
870 r = read(signalfd, &siginfo, sizeof(siginfo));
871 if (r < 1) {
872 ERROR(jail->pakfire, "Could not read signal: %m\n");
873 goto ERROR;
874 }
875
876 DEBUG(jail->pakfire, "Received signal %d\n", siginfo.ssi_signo);
877
878 // Handle signals
879 switch (siginfo.ssi_signo) {
880 // Pass SIGINT down to the child process
881 case SIGINT:
882 r = pidfd_send_signal(pidfd, siginfo.ssi_signo, NULL, 0);
883 if (r) {
884 ERROR(jail->pakfire, "Could not send signal to process: %m\n");
885 goto ERROR;
886 }
887 break;
888
889 default:
890 ERROR(jail->pakfire, "Received unhandled signal %d\n",
891 siginfo.ssi_signo);
892 break;
893 }
894
895 // Don't fall through to log processing
896 continue;
897
898 // Handle logging messages
899 } else if (fd == log_INFO) {
900 buffer = &ctx->buffers.log_INFO;
901 priority = LOG_INFO;
902
903 callback = pakfire_jail_default_log_callback;
904
905 } else if (fd == log_ERROR) {
906 buffer = &ctx->buffers.log_ERROR;
907 priority = LOG_ERR;
908
909 callback = pakfire_jail_default_log_callback;
910
911 } else if (fd == log_DEBUG) {
912 buffer = &ctx->buffers.log_DEBUG;
913 priority = LOG_DEBUG;
914
915 callback = pakfire_jail_default_log_callback;
916
917 // Handle anything from the log pipes
918 } else if (fd == stdout) {
919 buffer = &ctx->buffers.stdout;
920 priority = LOG_INFO;
921
922 callback = ctx->communicate.out;
923 data = ctx->communicate.data;
924
925 } else if (fd == stderr) {
926 buffer = &ctx->buffers.stderr;
927 priority = LOG_ERR;
928
929 callback = ctx->communicate.out;
930 data = ctx->communicate.data;
931
932 } else {
933 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
934 continue;
935 }
936
937 // Handle log event
938 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
939 if (r)
940 goto ERROR;
941 }
942
943 if (e & EPOLLOUT) {
944 // Handle standard input
945 if (fd == stdin) {
946 r = pakfire_jail_stream_stdin(jail, ctx, fd);
947 if (r) {
948 switch (errno) {
949 // Ignore if we filled up the buffer
950 case EAGAIN:
951 break;
952
953 default:
954 ERROR(jail->pakfire, "Could not write to stdin: %m\n");
955 goto ERROR;
956 }
957 }
958 }
959 }
960
961 // Check if any file descriptors have been closed
962 if (e & EPOLLHUP) {
963 // Remove the file descriptor
964 r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
965 if (r) {
966 ERROR(jail->pakfire, "Could not remove closed file-descriptor %d: %m\n", fd);
967 goto ERROR;
968 }
969 }
970 }
971 }
972
973 ERROR:
974 if (epollfd > 0)
975 close(epollfd);
976 if (timerfd > 0)
977 close(timerfd);
978 if (signalfd > 0)
979 close(signalfd);
980
981 return r;
982 }
983
984 int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data,
985 int priority, const char* line, size_t length) {
986 char** output = (char**)data;
987 int r;
988
989 // Append everything from stdout to a buffer
990 if (output && priority == LOG_INFO) {
991 r = asprintf(output, "%s%s", (output && *output) ? *output : "", line);
992 if (r < 0)
993 return 1;
994 return 0;
995 }
996
997 // Send everything else to the default logger
998 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
999 }
1000
1001 // Capabilities
1002
1003 // Logs all capabilities of the current process
1004 static int pakfire_jail_show_capabilities(struct pakfire_jail* jail) {
1005 cap_t caps = NULL;
1006 char* name = NULL;
1007 cap_flag_value_t value_e;
1008 cap_flag_value_t value_i;
1009 cap_flag_value_t value_p;
1010 int r;
1011
1012 // Fetch PID
1013 pid_t pid = getpid();
1014
1015 // Fetch all capabilities
1016 caps = cap_get_proc();
1017 if (!caps) {
1018 ERROR(jail->pakfire, "Could not fetch capabilities: %m\n");
1019 r = 1;
1020 goto ERROR;
1021 }
1022
1023 DEBUG(jail->pakfire, "Capabilities of PID %d:\n", pid);
1024
1025 // Iterate over all capabilities
1026 for (unsigned int cap = 0; cap_valid(cap); cap++) {
1027 name = cap_to_name(cap);
1028
1029 // Fetch effective value
1030 r = cap_get_flag(caps, cap, CAP_EFFECTIVE, &value_e);
1031 if (r)
1032 goto ERROR;
1033
1034 // Fetch inheritable value
1035 r = cap_get_flag(caps, cap, CAP_INHERITABLE, &value_i);
1036 if (r)
1037 goto ERROR;
1038
1039 // Fetch permitted value
1040 r = cap_get_flag(caps, cap, CAP_PERMITTED, &value_p);
1041 if (r)
1042 goto ERROR;
1043
1044 DEBUG(jail->pakfire,
1045 " %-24s : %c%c%c\n",
1046 name,
1047 (value_e == CAP_SET) ? 'e' : '-',
1048 (value_i == CAP_SET) ? 'i' : '-',
1049 (value_p == CAP_SET) ? 'p' : '-'
1050 );
1051
1052 // Free name
1053 cap_free(name);
1054 name = NULL;
1055 }
1056
1057 // Success
1058 r = 0;
1059
1060 ERROR:
1061 if (name)
1062 cap_free(name);
1063 if (caps)
1064 cap_free(caps);
1065
1066 return r;
1067 }
1068
1069 static int pakfire_jail_set_capabilities(struct pakfire_jail* jail) {
1070 cap_t caps = NULL;
1071 char* name = NULL;
1072 int r;
1073
1074 // Fetch capabilities
1075 caps = cap_get_proc();
1076 if (!caps) {
1077 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
1078 r = 1;
1079 goto ERROR;
1080 }
1081
1082 // Walk through all capabilities
1083 for (cap_value_t cap = 0; cap_valid(cap); cap++) {
1084 cap_value_t _caps[] = { cap };
1085
1086 // Fetch the name of the capability
1087 name = cap_to_name(cap);
1088
1089 r = cap_set_flag(caps, CAP_EFFECTIVE, 1, _caps, CAP_SET);
1090 if (r) {
1091 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1092 goto ERROR;
1093 }
1094
1095 r = cap_set_flag(caps, CAP_INHERITABLE, 1, _caps, CAP_SET);
1096 if (r) {
1097 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1098 goto ERROR;
1099 }
1100
1101 r = cap_set_flag(caps, CAP_PERMITTED, 1, _caps, CAP_SET);
1102 if (r) {
1103 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1104 goto ERROR;
1105 }
1106
1107 // Free name
1108 cap_free(name);
1109 name = NULL;
1110 }
1111
1112 // Restore all capabilities
1113 r = cap_set_proc(caps);
1114 if (r) {
1115 ERROR(jail->pakfire, "Restoring capabilities failed: %m\n");
1116 goto ERROR;
1117 }
1118
1119 // Add all capabilities to the ambient set
1120 for (unsigned int cap = 0; cap_valid(cap); cap++) {
1121 name = cap_to_name(cap);
1122
1123 // Raise the capability
1124 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0);
1125 if (r) {
1126 ERROR(jail->pakfire, "Could not set ambient capability %s: %m\n", name);
1127 goto ERROR;
1128 }
1129
1130 // Free name
1131 cap_free(name);
1132 name = NULL;
1133 }
1134
1135 // Success
1136 r = 0;
1137
1138 ERROR:
1139 if (name)
1140 cap_free(name);
1141 if (caps)
1142 cap_free(caps);
1143
1144 return r;
1145 }
1146
1147 // Syscall Filter
1148
1149 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
1150 const int syscalls[] = {
1151 // The kernel's keyring isn't namespaced
1152 SCMP_SYS(keyctl),
1153 SCMP_SYS(add_key),
1154 SCMP_SYS(request_key),
1155
1156 // Disable userfaultfd
1157 SCMP_SYS(userfaultfd),
1158
1159 // Disable perf which could leak a lot of information about the host
1160 SCMP_SYS(perf_event_open),
1161
1162 0,
1163 };
1164 int r = 1;
1165
1166 DEBUG(jail->pakfire, "Applying syscall filter...\n");
1167
1168 // Setup a syscall filter which allows everything by default
1169 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
1170 if (!ctx) {
1171 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
1172 goto ERROR;
1173 }
1174
1175 // All all syscalls
1176 for (const int* syscall = syscalls; *syscall; syscall++) {
1177 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
1178 if (r) {
1179 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
1180 goto ERROR;
1181 }
1182 }
1183
1184 // Load syscall filter into the kernel
1185 r = seccomp_load(ctx);
1186 if (r) {
1187 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
1188 goto ERROR;
1189 }
1190
1191 ERROR:
1192 if (ctx)
1193 seccomp_release(ctx);
1194
1195 return r;
1196 }
1197
1198 // Mountpoints
1199
1200 PAKFIRE_EXPORT int pakfire_jail_bind(struct pakfire_jail* jail,
1201 const char* source, const char* target, int flags) {
1202 struct pakfire_jail_mountpoint* mp = NULL;
1203 int r;
1204
1205 // Check if there is any space left
1206 if (jail->num_mountpoints >= MAX_MOUNTPOINTS) {
1207 errno = ENOSPC;
1208 return 1;
1209 }
1210
1211 // Check for valid inputs
1212 if (!source || !target) {
1213 errno = EINVAL;
1214 return 1;
1215 }
1216
1217 // Select the next free slot
1218 mp = &jail->mountpoints[jail->num_mountpoints];
1219
1220 // Copy source
1221 r = pakfire_string_set(mp->source, source);
1222 if (r) {
1223 ERROR(jail->pakfire, "Could not copy source: %m\n");
1224 return r;
1225 }
1226
1227 // Copy target
1228 r = pakfire_string_set(mp->target, target);
1229 if (r) {
1230 ERROR(jail->pakfire, "Could not copy target: %m\n");
1231 return r;
1232 }
1233
1234 // Copy flags
1235 mp->flags = flags;
1236
1237 // Increment counter
1238 jail->num_mountpoints++;
1239
1240 return 0;
1241 }
1242
1243 static int pakfire_jail_mount_networking(struct pakfire_jail* jail) {
1244 int r;
1245
1246 const char* paths[] = {
1247 "/etc/hosts",
1248 "/etc/resolv.conf",
1249 NULL,
1250 };
1251
1252 // Bind-mount all paths read-only
1253 for (const char** path = paths; *path; path++) {
1254 r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
1255 if (r)
1256 return r;
1257 }
1258
1259 return 0;
1260 }
1261
1262 /*
1263 Mounts everything that we require in the new namespace
1264 */
1265 static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1266 struct pakfire_jail_mountpoint* mp = NULL;
1267 int flags = 0;
1268 int r;
1269
1270 // Enable loop devices
1271 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_LOOP_DEVICES))
1272 flags |= PAKFIRE_MOUNT_LOOP_DEVICES;
1273
1274 // Mount all default stuff
1275 r = pakfire_mount_all(jail->pakfire, flags);
1276 if (r)
1277 return r;
1278
1279 // Mount networking stuff
1280 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1281 r = pakfire_jail_mount_networking(jail);
1282 if (r)
1283 return r;
1284 }
1285
1286 // Mount all custom stuff
1287 for (unsigned int i = 0; i < jail->num_mountpoints; i++) {
1288 // Fetch mountpoint
1289 mp = &jail->mountpoints[i];
1290
1291 // Mount it
1292 r = pakfire_bind(jail->pakfire, mp->source, mp->target, mp->flags);
1293 if (r)
1294 return r;
1295 }
1296
1297 // Log all mountpoints
1298 pakfire_mount_list(jail->pakfire);
1299
1300 return 0;
1301 }
1302
1303 // Networking
1304
1305 static int pakfire_jail_setup_loopback(struct pakfire_jail* jail) {
1306 struct nl_sock* nl = NULL;
1307 struct nl_cache* cache = NULL;
1308 struct rtnl_link* link = NULL;
1309 struct rtnl_link* change = NULL;
1310 int r;
1311
1312 DEBUG(jail->pakfire, "Setting up loopback...\n");
1313
1314 // Allocate a netlink socket
1315 nl = nl_socket_alloc();
1316 if (!nl) {
1317 ERROR(jail->pakfire, "Could not allocate a netlink socket: %m\n");
1318 r = 1;
1319 goto ERROR;
1320 }
1321
1322 // Connect the socket
1323 r = nl_connect(nl, NETLINK_ROUTE);
1324 if (r) {
1325 ERROR(jail->pakfire, "Could not connect netlink socket: %s\n", nl_geterror(r));
1326 goto ERROR;
1327 }
1328
1329 // Allocate the netlink cache
1330 r = rtnl_link_alloc_cache(nl, AF_UNSPEC, &cache);
1331 if (r < 0) {
1332 ERROR(jail->pakfire, "Unable to allocate netlink cache: %s\n", nl_geterror(r));
1333 goto ERROR;
1334 }
1335
1336 // Fetch loopback interface
1337 link = rtnl_link_get_by_name(cache, "lo");
1338 if (!link) {
1339 ERROR(jail->pakfire, "Could not find lo interface. Ignoring.\n");
1340 r = 0;
1341 goto ERROR;
1342 }
1343
1344 // Allocate a new link
1345 change = rtnl_link_alloc();
1346 if (!change) {
1347 ERROR(jail->pakfire, "Could not allocate change link\n");
1348 r = 1;
1349 goto ERROR;
1350 }
1351
1352 // Set the link to UP
1353 rtnl_link_set_flags(change, IFF_UP);
1354
1355 // Apply any changes
1356 r = rtnl_link_change(nl, link, change, 0);
1357 if (r) {
1358 ERROR(jail->pakfire, "Unable to activate loopback: %s\n", nl_geterror(r));
1359 goto ERROR;
1360 }
1361
1362 // Success
1363 r = 0;
1364
1365 ERROR:
1366 if (nl)
1367 nl_socket_free(nl);
1368
1369 return r;
1370 }
1371
1372 // UID/GID Mapping
1373
1374 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
1375 char path[PATH_MAX];
1376 int r;
1377
1378 // Skip mapping anything when running on /
1379 if (pakfire_on_root(jail->pakfire))
1380 return 0;
1381
1382 // Make path
1383 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
1384 if (r)
1385 return r;
1386
1387 // Fetch UID
1388 const uid_t uid = pakfire_uid(jail->pakfire);
1389
1390 // Fetch SUBUID
1391 const struct pakfire_subid* subuid = pakfire_subuid(jail->pakfire);
1392 if (!subuid)
1393 return 1;
1394
1395 /* When running as root, we will map the entire range.
1396
1397 When running as a non-privileged user, we will map the root user inside the jail
1398 to the user's UID outside of the jail, and we will map the rest starting from one.
1399 */
1400
1401 // Running as root
1402 if (uid == 0) {
1403 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1404 "0 %lu %lu\n", subuid->id, subuid->length);
1405 } else {
1406 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1407 "0 %lu 1\n1 %lu %lu\n", uid, subuid->id, subuid->length);
1408 }
1409
1410 if (r) {
1411 ERROR(jail->pakfire, "Could not map UIDs: %m\n");
1412 return r;
1413 }
1414
1415 return r;
1416 }
1417
1418 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
1419 char path[PATH_MAX];
1420 int r;
1421
1422 // Skip mapping anything when running on /
1423 if (pakfire_on_root(jail->pakfire))
1424 return 0;
1425
1426 // Fetch GID
1427 const gid_t gid = pakfire_gid(jail->pakfire);
1428
1429 // Fetch SUBGID
1430 const struct pakfire_subid* subgid = pakfire_subgid(jail->pakfire);
1431 if (!subgid)
1432 return 1;
1433
1434 // Make path
1435 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
1436 if (r)
1437 return r;
1438
1439 // Running as root
1440 if (gid == 0) {
1441 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1442 "0 %lu %lu\n", subgid->id, subgid->length);
1443 } else {
1444 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1445 "0 %lu 1\n%1 %lu %lu\n", gid, subgid->id, subgid->length);
1446 }
1447
1448 if (r) {
1449 ERROR(jail->pakfire, "Could not map GIDs: %m\n");
1450 return r;
1451 }
1452
1453 return r;
1454 }
1455
1456 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
1457 char path[PATH_MAX];
1458 int r = 1;
1459
1460 // Make path
1461 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
1462 if (r)
1463 return r;
1464
1465 // Open file for writing
1466 FILE* f = fopen(path, "w");
1467 if (!f) {
1468 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
1469 goto ERROR;
1470 }
1471
1472 // Write content
1473 int bytes_written = fprintf(f, "deny\n");
1474 if (bytes_written <= 0) {
1475 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
1476 goto ERROR;
1477 }
1478
1479 r = fclose(f);
1480 f = NULL;
1481 if (r) {
1482 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
1483 goto ERROR;
1484 }
1485
1486 ERROR:
1487 if (f)
1488 fclose(f);
1489
1490 return r;
1491 }
1492
1493 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1494 const uint64_t val = 1;
1495 int r = 0;
1496
1497 DEBUG(jail->pakfire, "Sending signal...\n");
1498
1499 // Write to the file descriptor
1500 ssize_t bytes_written = write(fd, &val, sizeof(val));
1501 if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
1502 ERROR(jail->pakfire, "Could not send signal: %m\n");
1503 r = 1;
1504 }
1505
1506 // Close the file descriptor
1507 close(fd);
1508
1509 return r;
1510 }
1511
1512 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1513 uint64_t val = 0;
1514 int r = 0;
1515
1516 DEBUG(jail->pakfire, "Waiting for signal...\n");
1517
1518 ssize_t bytes_read = read(fd, &val, sizeof(val));
1519 if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
1520 ERROR(jail->pakfire, "Error waiting for signal: %m\n");
1521 r = 1;
1522 }
1523
1524 // Close the file descriptor
1525 close(fd);
1526
1527 return r;
1528 }
1529
1530 /*
1531 Performs the initialisation that needs to happen in the parent part
1532 */
1533 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1534 int r;
1535
1536 // Setup UID mapping
1537 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
1538 if (r)
1539 return r;
1540
1541 // Write "deny" to /proc/PID/setgroups
1542 r = pakfire_jail_setgroups(jail, ctx->pid);
1543 if (r)
1544 return r;
1545
1546 // Setup GID mapping
1547 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
1548 if (r)
1549 return r;
1550
1551 // Parent has finished initialisation
1552 DEBUG(jail->pakfire, "Parent has finished initialization\n");
1553
1554 // Send signal to client
1555 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1556 if (r)
1557 return r;
1558
1559 return 0;
1560 }
1561
1562 static int pakfire_jail_switch_root(struct pakfire_jail* jail, const char* root) {
1563 int r;
1564
1565 // Change to the new root
1566 r = chdir(root);
1567 if (r) {
1568 ERROR(jail->pakfire, "chdir(%s) failed: %m\n", root);
1569 return r;
1570 }
1571
1572 // Switch Root!
1573 r = pivot_root(".", ".");
1574 if (r) {
1575 ERROR(jail->pakfire, "Failed changing into the new root directory %s: %m\n", root);
1576 return r;
1577 }
1578
1579 // Umount the old root
1580 r = umount2(".", MNT_DETACH);
1581 if (r) {
1582 ERROR(jail->pakfire, "Could not umount the old root filesystem: %m\n");
1583 return r;
1584 }
1585
1586 return 0;
1587 }
1588
1589 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
1590 const char* argv[]) {
1591 int r;
1592
1593 // Redirect any logging to our log pipe
1594 pakfire_set_log_callback(jail->pakfire, pakfire_jail_log, &ctx->pipes);
1595
1596 // Die with parent
1597 r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
1598 if (r) {
1599 ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
1600 return 126;
1601 }
1602
1603 // Fetch my own PID
1604 pid_t pid = getpid();
1605
1606 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
1607
1608 // Wait for the parent to finish initialization
1609 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1610 if (r)
1611 return r;
1612
1613 // Perform further initialization
1614
1615 // Fetch UID/GID
1616 uid_t uid = getuid();
1617 gid_t gid = getgid();
1618
1619 // Fetch EUID/EGID
1620 uid_t euid = geteuid();
1621 gid_t egid = getegid();
1622
1623 DEBUG(jail->pakfire, " UID: %d (effective %d)\n", uid, euid);
1624 DEBUG(jail->pakfire, " GID: %d (effective %d)\n", gid, egid);
1625
1626 // Check if we are (effectively running as root)
1627 if (uid || gid || euid || egid) {
1628 ERROR(jail->pakfire, "Child process is not running as root\n");
1629 return 126;
1630 }
1631
1632 const char* root = pakfire_get_path(jail->pakfire);
1633 const char* arch = pakfire_get_arch(jail->pakfire);
1634
1635 // Make root a mountpoint in the new mount namespace
1636 r = pakfire_mount_make_mounpoint(jail->pakfire, root);
1637 if (r)
1638 return r;
1639
1640 // Change root (unless root is /)
1641 if (!pakfire_on_root(jail->pakfire)) {
1642 // Mount everything
1643 r = pakfire_jail_mount(jail, ctx);
1644 if (r)
1645 return r;
1646
1647 // chroot()
1648 r = pakfire_jail_switch_root(jail, root);
1649 if (r)
1650 return r;
1651 }
1652
1653 // Set personality
1654 unsigned long persona = pakfire_arch_personality(arch);
1655 if (persona) {
1656 r = personality(persona);
1657 if (r < 0) {
1658 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1659 return 1;
1660 }
1661 }
1662
1663 // Setup networking
1664 if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1665 r = pakfire_jail_setup_loopback(jail);
1666 if (r)
1667 return 1;
1668 }
1669
1670 // Set nice level
1671 if (jail->nice) {
1672 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1673
1674 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1675 if (r) {
1676 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1677 return 1;
1678 }
1679 }
1680
1681 // Close other end of log pipes
1682 close(ctx->pipes.log_INFO[0]);
1683 close(ctx->pipes.log_ERROR[0]);
1684 #ifdef ENABLE_DEBUG
1685 close(ctx->pipes.log_DEBUG[0]);
1686 #endif /* ENABLE_DEBUG */
1687
1688 // Connect standard input
1689 if (ctx->pipes.stdin[0]) {
1690 r = dup2(ctx->pipes.stdin[0], STDIN_FILENO);
1691 if (r < 0) {
1692 ERROR(jail->pakfire, "Could not connect fd %d to stdin: %m\n",
1693 ctx->pipes.stdin[0]);
1694
1695 return 1;
1696 }
1697 }
1698
1699 // Connect standard output and error
1700 if (ctx->pipes.stdout[1] && ctx->pipes.stderr[1]) {
1701 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1702 if (r < 0) {
1703 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1704 ctx->pipes.stdout[1]);
1705
1706 return 1;
1707 }
1708
1709 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1710 if (r < 0) {
1711 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1712 ctx->pipes.stderr[1]);
1713
1714 return 1;
1715 }
1716
1717 // Close the pipe (as we have moved the original file descriptors)
1718 pakfire_jail_close_pipe(jail, ctx->pipes.stdin);
1719 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1720 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
1721 }
1722
1723 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1724 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1725 if (r)
1726 return r;
1727
1728 // Don't drop any capabilities on execve()
1729 r = prctl(PR_SET_KEEPCAPS, 1);
1730 if (r) {
1731 ERROR(jail->pakfire, "Could not set PR_SET_KEEPCAPS: %m\n");
1732 return r;
1733 }
1734
1735 // Set capabilities
1736 r = pakfire_jail_set_capabilities(jail);
1737 if (r)
1738 return r;
1739
1740 // Show capabilities
1741 r = pakfire_jail_show_capabilities(jail);
1742 if (r)
1743 return r;
1744
1745 // Filter syscalls
1746 r = pakfire_jail_limit_syscalls(jail);
1747 if (r)
1748 return r;
1749
1750 DEBUG(jail->pakfire, "Child process initialization done\n");
1751 DEBUG(jail->pakfire, "Launching command:\n");
1752
1753 // Log argv
1754 for (unsigned int i = 0; argv[i]; i++)
1755 DEBUG(jail->pakfire, " argv[%d] = %s\n", i, argv[i]);
1756
1757 // exec() command
1758 r = execvpe(argv[0], (char**)argv, jail->env);
1759 if (r < 0) {
1760 // Translate errno into regular exit code
1761 switch (errno) {
1762 case ENOENT:
1763 // Ignore if the command doesn't exist
1764 if (ctx->flags & PAKFIRE_JAIL_NOENT_OK)
1765 r = 0;
1766 else
1767 r = 127;
1768
1769 break;
1770
1771 default:
1772 r = 1;
1773 }
1774
1775 ERROR(jail->pakfire, "Could not execve(%s): %m\n", argv[0]);
1776 }
1777
1778 // We should not get here
1779 return r;
1780 }
1781
1782 // Run a command in the jail
1783 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
1784 const int interactive,
1785 pakfire_jail_communicate_in communicate_in,
1786 pakfire_jail_communicate_out communicate_out,
1787 void* data, int flags) {
1788 int exit = -1;
1789 int r;
1790
1791 // Check if argv is valid
1792 if (!argv || !argv[0]) {
1793 errno = EINVAL;
1794 return -1;
1795 }
1796
1797 // Send any output to the default logger if no callback is set
1798 if (!communicate_out)
1799 communicate_out = pakfire_jail_default_log_callback;
1800
1801 // Initialize context for this call
1802 struct pakfire_jail_exec ctx = {
1803 .flags = flags,
1804
1805 .pipes = {
1806 .stdin = { -1, -1 },
1807 .stdout = { -1, -1 },
1808 .stderr = { -1, -1 },
1809 },
1810
1811 .communicate = {
1812 .in = communicate_in,
1813 .out = communicate_out,
1814 .data = data,
1815 },
1816
1817 .pidfd = -1,
1818 };
1819
1820 DEBUG(jail->pakfire, "Executing jail...\n");
1821
1822 // Enable networking in interactive mode
1823 if (interactive)
1824 ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
1825
1826 /*
1827 Setup a file descriptor which can be used to notify the client that the parent
1828 has completed configuration.
1829 */
1830 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1831 if (ctx.completed_fd < 0) {
1832 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1833 return -1;
1834 }
1835
1836 // Create pipes to communicate with child process if we are not running interactively
1837 if (!interactive) {
1838 // stdin (only if callback is set)
1839 if (ctx.communicate.in) {
1840 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdin, 0);
1841 if (r)
1842 goto ERROR;
1843 }
1844
1845 // stdout
1846 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
1847 if (r)
1848 goto ERROR;
1849
1850 // stderr
1851 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
1852 if (r)
1853 goto ERROR;
1854 }
1855
1856 // Setup pipes for logging
1857 // INFO
1858 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
1859 if (r)
1860 goto ERROR;
1861
1862 // ERROR
1863 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
1864 if (r)
1865 goto ERROR;
1866
1867 #ifdef ENABLE_DEBUG
1868 // DEBUG
1869 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
1870 if (r)
1871 goto ERROR;
1872 #endif /* ENABLE_DEBUG */
1873
1874 // Configure child process
1875 struct clone_args args = {
1876 .flags =
1877 CLONE_NEWCGROUP |
1878 CLONE_NEWIPC |
1879 CLONE_NEWNS |
1880 CLONE_NEWPID |
1881 CLONE_NEWUSER |
1882 CLONE_NEWUTS |
1883 CLONE_PIDFD,
1884 .exit_signal = SIGCHLD,
1885 .pidfd = (long long unsigned int)&ctx.pidfd,
1886 };
1887
1888 // Launch the process in a cgroup that is a leaf of the configured cgroup
1889 if (jail->cgroup) {
1890 args.flags |= CLONE_INTO_CGROUP;
1891
1892 // Fetch our UUID
1893 const char* uuid = pakfire_jail_uuid(jail);
1894
1895 // Create a temporary cgroup
1896 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, uuid, 0);
1897 if (r) {
1898 ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
1899 goto ERROR;
1900 }
1901
1902 // Clone into this cgroup
1903 args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
1904 }
1905
1906 // Setup networking
1907 if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1908 args.flags |= CLONE_NEWNET;
1909 }
1910
1911 // Fork this process
1912 ctx.pid = clone3(&args, sizeof(args));
1913 if (ctx.pid < 0) {
1914 ERROR(jail->pakfire, "Could not clone: %m\n");
1915 return -1;
1916
1917 // Child process
1918 } else if (ctx.pid == 0) {
1919 r = pakfire_jail_child(jail, &ctx, argv);
1920 _exit(r);
1921 }
1922
1923 // Parent process
1924 r = pakfire_jail_parent(jail, &ctx);
1925 if (r)
1926 goto ERROR;
1927
1928 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
1929
1930 // Read output of the child process
1931 r = pakfire_jail_wait(jail, &ctx);
1932 if (r)
1933 goto ERROR;
1934
1935 // Handle exit status
1936 switch (ctx.status.si_code) {
1937 case CLD_EXITED:
1938 DEBUG(jail->pakfire, "The child process exited with code %d\n",
1939 ctx.status.si_status);
1940
1941 // Pass exit code
1942 exit = ctx.status.si_status;
1943 break;
1944
1945 case CLD_KILLED:
1946 ERROR(jail->pakfire, "The child process was killed\n");
1947 exit = 139;
1948 break;
1949
1950 case CLD_DUMPED:
1951 ERROR(jail->pakfire, "The child process terminated abnormally\n");
1952 break;
1953
1954 // Log anything else
1955 default:
1956 ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
1957 break;
1958 }
1959
1960 ERROR:
1961 // Destroy the temporary cgroup (if any)
1962 if (ctx.cgroup) {
1963 // Read cgroup stats
1964 r = pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
1965 if (r) {
1966 ERROR(jail->pakfire, "Could not read cgroup stats: %m\n");
1967 } else {
1968 pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
1969 }
1970
1971 pakfire_cgroup_destroy(ctx.cgroup);
1972 pakfire_cgroup_unref(ctx.cgroup);
1973 }
1974
1975 // Close any file descriptors
1976 pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
1977 pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
1978 pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
1979 if (ctx.pidfd)
1980 close(ctx.pidfd);
1981 pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
1982 pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
1983 pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
1984
1985 return exit;
1986 }
1987
1988 PAKFIRE_EXPORT int pakfire_jail_exec(
1989 struct pakfire_jail* jail,
1990 const char* argv[],
1991 pakfire_jail_communicate_in callback_in,
1992 pakfire_jail_communicate_out callback_out,
1993 void* data, int flags) {
1994 return __pakfire_jail_exec(jail, argv, 0, callback_in, callback_out, data, flags);
1995 }
1996
1997 static int pakfire_jail_exec_interactive(
1998 struct pakfire_jail* jail, const char* argv[], int flags) {
1999 int r;
2000
2001 // Setup interactive stuff
2002 r = pakfire_jail_setup_interactive_env(jail);
2003 if (r)
2004 return r;
2005
2006 return __pakfire_jail_exec(jail, argv, 1, NULL, NULL, NULL, flags);
2007 }
2008
2009 int pakfire_jail_exec_script(struct pakfire_jail* jail,
2010 const char* script,
2011 const size_t size,
2012 const char* args[],
2013 pakfire_jail_communicate_in callback_in,
2014 pakfire_jail_communicate_out callback_out,
2015 void* data) {
2016 char path[PATH_MAX];
2017 const char** argv = NULL;
2018 FILE* f = NULL;
2019 int r;
2020
2021 const char* root = pakfire_get_path(jail->pakfire);
2022
2023 // Write the scriptlet to disk
2024 r = pakfire_path_join(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
2025 if (r)
2026 goto ERROR;
2027
2028 // Create a temporary file
2029 f = pakfire_mktemp(path, 0700);
2030 if (!f) {
2031 ERROR(jail->pakfire, "Could not create temporary file: %m\n");
2032 goto ERROR;
2033 }
2034
2035 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
2036
2037 // Write data
2038 r = fprintf(f, "%s", script);
2039 if (r < 0) {
2040 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
2041 goto ERROR;
2042 }
2043
2044 // Close file
2045 r = fclose(f);
2046 if (r) {
2047 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
2048 goto ERROR;
2049 }
2050
2051 f = NULL;
2052
2053 // Count how many arguments were passed
2054 unsigned int argc = 1;
2055 if (args) {
2056 for (const char** arg = args; *arg; arg++)
2057 argc++;
2058 }
2059
2060 argv = calloc(argc + 1, sizeof(*argv));
2061 if (!argv) {
2062 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
2063 goto ERROR;
2064 }
2065
2066 // Set command
2067 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
2068
2069 // Copy args
2070 for (unsigned int i = 1; i < argc; i++)
2071 argv[i] = args[i-1];
2072
2073 // Run the script
2074 r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data, 0);
2075
2076 ERROR:
2077 if (argv)
2078 free(argv);
2079 if (f)
2080 fclose(f);
2081
2082 // Remove script from disk
2083 if (*path)
2084 unlink(path);
2085
2086 return r;
2087 }
2088
2089 /*
2090 A convenience function that creates a new jail, runs the given command and destroys
2091 the jail again.
2092 */
2093 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
2094 struct pakfire_jail* jail = NULL;
2095 int r;
2096
2097 // Create a new jail
2098 r = pakfire_jail_create(&jail, pakfire);
2099 if (r)
2100 goto ERROR;
2101
2102 // Execute the command
2103 r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output, 0);
2104
2105 ERROR:
2106 if (jail)
2107 pakfire_jail_unref(jail);
2108
2109 return r;
2110 }
2111
2112 int pakfire_jail_run_script(struct pakfire* pakfire,
2113 const char* script, const size_t length, const char* argv[], int flags) {
2114 struct pakfire_jail* jail = NULL;
2115 int r;
2116
2117 // Create a new jail
2118 r = pakfire_jail_create(&jail, pakfire);
2119 if (r)
2120 goto ERROR;
2121
2122 // Execute the command
2123 r = pakfire_jail_exec_script(jail, script, length, argv, NULL, NULL, NULL);
2124
2125 ERROR:
2126 if (jail)
2127 pakfire_jail_unref(jail);
2128
2129 return r;
2130 }
2131
2132 int pakfire_jail_shell(struct pakfire_jail* jail) {
2133 const char* argv[] = {
2134 "/bin/bash", "--login", NULL,
2135 };
2136
2137 // Execute /bin/bash
2138 return pakfire_jail_exec_interactive(jail, argv, 0);
2139 }
2140
2141 static int pakfire_jail_run_if_possible(struct pakfire* pakfire, const char** argv) {
2142 char path[PATH_MAX];
2143 int r;
2144
2145 r = pakfire_path(pakfire, path, "%s", *argv);
2146 if (r)
2147 return r;
2148
2149 // Check if the file is executable
2150 r = access(path, X_OK);
2151 if (r) {
2152 DEBUG(pakfire, "%s is not executable. Skipping...\n", *argv);
2153 return 0;
2154 }
2155
2156 return pakfire_jail_run(pakfire, argv, 0, NULL);
2157 }
2158
2159 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
2160 const char* argv[] = {
2161 "/sbin/ldconfig",
2162 NULL,
2163 };
2164
2165 return pakfire_jail_run_if_possible(pakfire, argv);
2166 }
2167
2168 int pakfire_jail_run_systemd_tmpfiles(struct pakfire* pakfire) {
2169 const char* argv[] = {
2170 "/usr/bin/systemd-tmpfiles",
2171 "--create",
2172 NULL,
2173 };
2174
2175 return pakfire_jail_run_if_possible(pakfire, argv);
2176 }