]> git.ipfire.org Git - people/ms/pakfire.git/blob - src/libpakfire/jail.c
jail: Don't abort if /etc/hosts or /etc/resolv.conf cannot be bind-mounted
[people/ms/pakfire.git] / src / libpakfire / jail.c
1 /*#############################################################################
2 # #
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
5 # #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
10 # #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
15 # #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
18 # #
19 #############################################################################*/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <linux/capability.h>
24 #include <linux/sched.h>
25 #include <sys/wait.h>
26 #include <linux/wait.h>
27 #include <sched.h>
28 #include <signal.h>
29 #include <stdlib.h>
30 #include <syscall.h>
31 #include <sys/capability.h>
32 #include <sys/epoll.h>
33 #include <sys/eventfd.h>
34 #include <sys/mount.h>
35 #include <sys/personality.h>
36 #include <sys/prctl.h>
37 #include <sys/resource.h>
38 #include <sys/signalfd.h>
39 #include <sys/timerfd.h>
40 #include <sys/types.h>
41 #include <sys/wait.h>
42
43 // libnl3
44 #include <net/if.h>
45 #include <netlink/route/link.h>
46
47 // libseccomp
48 #include <seccomp.h>
49
50 // libuuid
51 #include <uuid.h>
52
53 #include <pakfire/arch.h>
54 #include <pakfire/cgroup.h>
55 #include <pakfire/jail.h>
56 #include <pakfire/logging.h>
57 #include <pakfire/mount.h>
58 #include <pakfire/pakfire.h>
59 #include <pakfire/private.h>
60 #include <pakfire/pwd.h>
61 #include <pakfire/string.h>
62 #include <pakfire/util.h>
63
64 #define BUFFER_SIZE 1024 * 64
65 #define ENVIRON_SIZE 128
66 #define EPOLL_MAX_EVENTS 2
67 #define MAX_MOUNTPOINTS 8
68
69 // The default environment that will be set for every command
70 static const struct environ {
71 const char* key;
72 const char* val;
73 } ENV[] = {
74 { "HOME", "/root" },
75 { "LANG", "C.utf-8" },
76 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
77 { "TERM", "vt100" },
78
79 // Tell everything that it is running inside a Pakfire container
80 { "container", "pakfire" },
81 { NULL, NULL },
82 };
83
84 struct pakfire_jail_mountpoint {
85 char source[PATH_MAX];
86 char target[PATH_MAX];
87 int flags;
88 };
89
90 struct pakfire_jail {
91 struct pakfire* pakfire;
92 int nrefs;
93
94 // A unique ID for each jail
95 uuid_t uuid;
96 char __uuid[UUID_STR_LEN];
97
98 // Resource Limits
99 int nice;
100
101 // Timeout
102 struct itimerspec timeout;
103
104 // CGroup
105 struct pakfire_cgroup* cgroup;
106
107 // Environment
108 char* env[ENVIRON_SIZE];
109
110 // Mountpoints
111 struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
112 unsigned int num_mountpoints;
113
114 // Callbacks
115 struct pakfire_jail_callbacks {
116 // Log
117 pakfire_jail_log_callback log;
118 void* log_data;
119 } callbacks;
120 };
121
122 struct pakfire_log_buffer {
123 char data[BUFFER_SIZE];
124 size_t used;
125 };
126
127 struct pakfire_jail_exec {
128 int flags;
129
130 // PID (of the child)
131 pid_t pid;
132 int pidfd;
133
134 // Process status (from waitid)
135 siginfo_t status;
136
137 // FD to notify the client that the parent has finished initialization
138 int completed_fd;
139
140 // Log pipes
141 struct pakfire_jail_pipes {
142 int stdin[2];
143 int stdout[2];
144 int stderr[2];
145
146 // Logging
147 int log_INFO[2];
148 int log_ERROR[2];
149 int log_DEBUG[2];
150 } pipes;
151
152 // Communicate
153 struct pakfire_jail_communicate {
154 pakfire_jail_communicate_in in;
155 pakfire_jail_communicate_out out;
156 void* data;
157 } communicate;
158
159 // Log buffers
160 struct pakfire_jail_buffers {
161 struct pakfire_log_buffer stdout;
162 struct pakfire_log_buffer stderr;
163
164 // Logging
165 struct pakfire_log_buffer log_INFO;
166 struct pakfire_log_buffer log_ERROR;
167 struct pakfire_log_buffer log_DEBUG;
168 } buffers;
169
170 struct pakfire_cgroup* cgroup;
171 struct pakfire_cgroup_stats cgroup_stats;
172 };
173
174 static int clone3(struct clone_args* args, size_t size) {
175 return syscall(__NR_clone3, args, size);
176 }
177
178 static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
179 return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
180 }
181
182 static int pivot_root(const char* new_root, const char* old_root) {
183 return syscall(SYS_pivot_root, new_root, old_root);
184 }
185
186 static int pakfire_jail_exec_has_flag(
187 const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
188 return ctx->flags & flag;
189 }
190
191 static void pakfire_jail_free(struct pakfire_jail* jail) {
192 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
193
194 // Free environment
195 for (unsigned int i = 0; jail->env[i]; i++)
196 free(jail->env[i]);
197
198 if (jail->cgroup)
199 pakfire_cgroup_unref(jail->cgroup);
200
201 pakfire_unref(jail->pakfire);
202 free(jail);
203 }
204
205 /*
206 Passes any log messages on to the default pakfire log callback
207 */
208 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
209 int priority, const char* line, size_t length) {
210 switch (priority) {
211 case LOG_INFO:
212 INFO(pakfire, "%s", line);
213 break;
214
215 case LOG_ERR:
216 ERROR(pakfire, "%s", line);
217 break;
218
219 #ifdef ENABLE_DEBUG
220 case LOG_DEBUG:
221 DEBUG(pakfire, "%s", line);
222 break;
223 #endif
224 }
225
226 return 0;
227 }
228
229 static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
230 if (!*jail->__uuid)
231 uuid_unparse_lower(jail->uuid, jail->__uuid);
232
233 return jail->__uuid;
234 }
235
236 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
237 // Set PS1
238 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
239 if (r)
240 return r;
241
242 // Copy TERM
243 char* TERM = secure_getenv("TERM");
244 if (TERM) {
245 r = pakfire_jail_set_env(jail, "TERM", TERM);
246 if (r)
247 return r;
248 }
249
250 // Copy LANG
251 char* LANG = secure_getenv("LANG");
252 if (LANG) {
253 r = pakfire_jail_set_env(jail, "LANG", LANG);
254 if (r)
255 return r;
256 }
257
258 return 0;
259 }
260
261 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail, struct pakfire* pakfire) {
262 int r;
263
264 const char* arch = pakfire_get_effective_arch(pakfire);
265
266 // Allocate a new jail
267 struct pakfire_jail* j = calloc(1, sizeof(*j));
268 if (!j)
269 return 1;
270
271 // Reference Pakfire
272 j->pakfire = pakfire_ref(pakfire);
273
274 // Initialize reference counter
275 j->nrefs = 1;
276
277 // Generate a random UUID
278 uuid_generate_random(j->uuid);
279
280 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
281
282 // Set the default logging callback
283 pakfire_jail_set_log_callback(j, pakfire_jail_default_log_callback, NULL);
284
285 // Set default environment
286 for (const struct environ* e = ENV; e->key; e++) {
287 r = pakfire_jail_set_env(j, e->key, e->val);
288 if (r)
289 goto ERROR;
290 }
291
292 // Enable all CPU features that CPU has to offer
293 if (!pakfire_arch_is_supported_by_host(arch)) {
294 r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
295 if (r)
296 goto ERROR;
297 }
298
299 // Set container UUID
300 r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
301 if (r)
302 goto ERROR;
303
304 // Disable systemctl to talk to systemd
305 if (!pakfire_on_root(j->pakfire)) {
306 r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
307 if (r)
308 goto ERROR;
309 }
310
311 // Done
312 *jail = j;
313 return 0;
314
315 ERROR:
316 pakfire_jail_free(j);
317
318 return r;
319 }
320
321 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
322 ++jail->nrefs;
323
324 return jail;
325 }
326
327 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
328 if (--jail->nrefs > 0)
329 return jail;
330
331 pakfire_jail_free(jail);
332 return NULL;
333 }
334
335 // Logging Callback
336
337 PAKFIRE_EXPORT void pakfire_jail_set_log_callback(struct pakfire_jail* jail,
338 pakfire_jail_log_callback callback, void* data) {
339 jail->callbacks.log = callback;
340 jail->callbacks.log_data = data;
341 }
342
343 // Resource Limits
344
345 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
346 // Check if nice level is in range
347 if (nice < -19 || nice > 20) {
348 errno = EINVAL;
349 return 1;
350 }
351
352 // Store nice level
353 jail->nice = nice;
354
355 return 0;
356 }
357
358 int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
359 // Free any previous cgroup
360 if (jail->cgroup) {
361 pakfire_cgroup_unref(jail->cgroup);
362 jail->cgroup = NULL;
363 }
364
365 // Set any new cgroup
366 if (cgroup) {
367 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
368
369 jail->cgroup = pakfire_cgroup_ref(cgroup);
370 }
371
372 // Done
373 return 0;
374 }
375
376 // Environment
377
378 // Returns the length of the environment
379 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
380 unsigned int i = 0;
381
382 // Count everything in the environment
383 for (char** e = jail->env; *e; e++)
384 i++;
385
386 return i;
387 }
388
389 // Finds an existing environment variable and returns its index or -1 if not found
390 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
391 if (!key) {
392 errno = EINVAL;
393 return -1;
394 }
395
396 const size_t length = strlen(key);
397
398 for (unsigned int i = 0; jail->env[i]; i++) {
399 if ((pakfire_string_startswith(jail->env[i], key)
400 && *(jail->env[i] + length) == '=')) {
401 return i;
402 }
403 }
404
405 // Nothing found
406 return -1;
407 }
408
409 // Returns the value of an environment variable or NULL
410 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
411 const char* key) {
412 int i = pakfire_jail_find_env(jail, key);
413 if (i < 0)
414 return NULL;
415
416 return jail->env[i] + strlen(key) + 1;
417 }
418
419 // Sets an environment variable
420 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
421 const char* key, const char* value) {
422 // Find the index where to write this value to
423 int i = pakfire_jail_find_env(jail, key);
424 if (i < 0)
425 i = pakfire_jail_env_length(jail);
426
427 // Return -ENOSPC when the environment is full
428 if (i >= ENVIRON_SIZE) {
429 errno = ENOSPC;
430 return -1;
431 }
432
433 // Free any previous value
434 if (jail->env[i])
435 free(jail->env[i]);
436
437 // Format and set environment variable
438 asprintf(&jail->env[i], "%s=%s", key, value);
439
440 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
441
442 return 0;
443 }
444
445 // Imports an environment
446 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
447 if (!env)
448 return 0;
449
450 char* key;
451 char* val;
452 int r;
453
454 // Copy environment variables
455 for (unsigned int i = 0; env[i]; i++) {
456 r = pakfire_string_partition(env[i], "=", &key, &val);
457 if (r)
458 continue;
459
460 // Set value
461 r = pakfire_jail_set_env(jail, key, val);
462
463 if (key)
464 free(key);
465 if (val)
466 free(val);
467
468 // Break on error
469 if (r)
470 return r;
471 }
472
473 return 0;
474 }
475
476 // Timeout
477
478 PAKFIRE_EXPORT int pakfire_jail_set_timeout(
479 struct pakfire_jail* jail, unsigned int timeout) {
480 // Store value
481 jail->timeout.it_value.tv_sec = timeout;
482
483 if (timeout > 0)
484 DEBUG(jail->pakfire, "Timeout set to %d second(s)\n", timeout);
485 else
486 DEBUG(jail->pakfire, "Timeout disabled\n");
487
488 return 0;
489 }
490
491 static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
492 int r;
493
494 // Nothing to do if no timeout has been set
495 if (!jail->timeout.it_value.tv_sec)
496 return -1;
497
498 // Create a new timer
499 const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
500 if (fd < 0) {
501 ERROR(jail->pakfire, "Could not create timer: %m\n");
502 goto ERROR;
503 }
504
505 // Arm timer
506 r = timerfd_settime(fd, 0, &jail->timeout, NULL);
507 if (r) {
508 ERROR(jail->pakfire, "Could not arm timer: %m\n");
509 goto ERROR;
510 }
511
512 return fd;
513
514 ERROR:
515 if (fd > 0)
516 close(fd);
517
518 return -1;
519 }
520
521 // Signals
522
523 static int pakfire_jail_handle_signals(struct pakfire_jail* jail) {
524 sigset_t mask;
525 int r;
526
527 sigemptyset(&mask);
528 sigaddset(&mask, SIGINT);
529
530 // Block signals
531 r = sigprocmask(SIG_BLOCK, &mask, NULL);
532 if (r < 0) {
533 ERROR(jail->pakfire, "Failed to block signals: %m\n");
534 return r;
535 }
536
537 // Create a file descriptor
538 r = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
539 if (r < 0) {
540 ERROR(jail->pakfire, "Failed to create signalfd: %m\n");
541 return r;
542 }
543
544 return r;
545 }
546
547 /*
548 This function replaces any logging in the child process.
549
550 All log messages will be sent to the parent process through their respective pipes.
551 */
552 static void pakfire_jail_log(void* data, int priority, const char* file,
553 int line, const char* fn, const char* format, va_list args) {
554 struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
555 int fd;
556
557 switch (priority) {
558 case LOG_INFO:
559 fd = pipes->log_INFO[1];
560 break;
561
562 case LOG_ERR:
563 fd = pipes->log_ERROR[1];
564 break;
565
566 #ifdef ENABLE_DEBUG
567 case LOG_DEBUG:
568 fd = pipes->log_DEBUG[1];
569 break;
570 #endif /* ENABLE_DEBUG */
571
572 // Ignore any messages of an unknown priority
573 default:
574 return;
575 }
576
577 // Send the log message
578 if (fd)
579 vdprintf(fd, format, args);
580 }
581
582 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
583 return (sizeof(buffer->data) == buffer->used);
584 }
585
586 /*
587 This function reads as much data as it can from the file descriptor.
588 If it finds a whole line in it, it will send it to the logger and repeat the process.
589 If not newline character is found, it will try to read more data until it finds one.
590 */
591 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
592 struct pakfire_jail_exec* ctx, int priority, int fd,
593 struct pakfire_log_buffer* buffer, pakfire_jail_communicate_out callback, void* data) {
594 char line[BUFFER_SIZE + 1];
595
596 // Fill up buffer from fd
597 if (buffer->used < sizeof(buffer->data)) {
598 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
599 sizeof(buffer->data) - buffer->used);
600
601 // Handle errors
602 if (bytes_read < 0) {
603 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
604 return -1;
605 }
606
607 // Update buffer size
608 buffer->used += bytes_read;
609 }
610
611 // See if we have any lines that we can write
612 while (buffer->used) {
613 // Search for the end of the first line
614 char* eol = memchr(buffer->data, '\n', buffer->used);
615
616 // No newline found
617 if (!eol) {
618 // If the buffer is full, we send the content to the logger and try again
619 // This should not happen in practise
620 if (pakfire_jail_log_buffer_is_full(buffer)) {
621 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
622
623 eol = buffer->data + sizeof(buffer->data) - 1;
624
625 // Otherwise we might have only read parts of the output
626 } else
627 break;
628 }
629
630 // Find the length of the string
631 size_t length = eol - buffer->data + 1;
632
633 // Copy the line into the buffer
634 memcpy(line, buffer->data, length);
635
636 // Terminate the string
637 line[length] = '\0';
638
639 // Log the line
640 if (callback) {
641 int r = callback(jail->pakfire, data, priority, line, length);
642 if (r) {
643 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
644 return r;
645 }
646 }
647
648 // Remove line from buffer
649 memmove(buffer->data, buffer->data + length, buffer->used - length);
650 buffer->used -= length;
651 }
652
653 return 0;
654 }
655
656 static int pakfire_jail_stream_stdin(struct pakfire_jail* jail,
657 struct pakfire_jail_exec* ctx, const int fd) {
658 int r;
659
660 // Nothing to do if there is no stdin callback set
661 if (!ctx->communicate.in) {
662 DEBUG(jail->pakfire, "Callback for standard input is not set\n");
663 return 0;
664 }
665
666 // Skip if the writing pipe has already been closed
667 if (!ctx->pipes.stdin[1])
668 return 0;
669
670 DEBUG(jail->pakfire, "Streaming standard input...\n");
671
672 // Calling the callback
673 r = ctx->communicate.in(jail->pakfire, ctx->communicate.data, fd);
674
675 DEBUG(jail->pakfire, "Standard input callback finished: %d\n", r);
676
677 // The callback signaled that it has written everything
678 if (r == EOF) {
679 DEBUG(jail->pakfire, "Closing standard input pipe\n");
680
681 // Close the file-descriptor
682 close(fd);
683
684 // Reset the file-descriptor so it won't be closed again later
685 ctx->pipes.stdin[1] = 0;
686
687 // Report success
688 r = 0;
689 }
690
691 return r;
692 }
693
694 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
695 int r = pipe2(*fds, flags);
696 if (r < 0) {
697 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
698 return 1;
699 }
700
701 return 0;
702 }
703
704 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
705 for (unsigned int i = 0; i < 2; i++)
706 if (fds[i])
707 close(fds[i]);
708 }
709
710 /*
711 This is a convenience function to fetch the reading end of a pipe and
712 closes the write end.
713 */
714 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail* jail, int (*fds)[2]) {
715 // Give the variables easier names to avoid confusion
716 int* fd_read = &(*fds)[0];
717 int* fd_write = &(*fds)[1];
718
719 // Close the write end of the pipe
720 if (*fd_write) {
721 close(*fd_write);
722 *fd_write = -1;
723 }
724
725 // Return the read end
726 return *fd_read;
727 }
728
729 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
730 // Give the variables easier names to avoid confusion
731 int* fd_read = &(*fds)[0];
732 int* fd_write = &(*fds)[1];
733
734 // Close the read end of the pipe
735 if (*fd_read) {
736 close(*fd_read);
737 *fd_read = -1;
738 }
739
740 // Return the write end
741 return *fd_write;
742 }
743
744 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
745 int epollfd = -1;
746 struct epoll_event ev;
747 struct epoll_event events[EPOLL_MAX_EVENTS];
748 struct signalfd_siginfo siginfo;
749 char garbage[8];
750 int r = 0;
751
752 // Fetch file descriptors from context
753 const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
754 const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
755 const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
756 const int pidfd = ctx->pidfd;
757
758 // Timer
759 const int timerfd = pakfire_jail_create_timer(jail);
760
761 // Logging
762 const int log_INFO = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
763 const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
764 const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
765
766 // Signals
767 const int signalfd = pakfire_jail_handle_signals(jail);
768
769 // Make a list of all file descriptors we are interested in
770 const int fds[] = {
771 stdin, stdout, stderr, pidfd, timerfd, signalfd, log_INFO, log_ERROR, log_DEBUG,
772 };
773
774 // Setup epoll
775 epollfd = epoll_create1(0);
776 if (epollfd < 0) {
777 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
778 r = 1;
779 goto ERROR;
780 }
781
782 // Turn file descriptors into non-blocking mode and add them to epoll()
783 for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
784 int fd = fds[i];
785
786 // Skip fds which were not initialized
787 if (fd < 0)
788 continue;
789
790 ev.events = EPOLLHUP;
791
792 if (fd == stdin)
793 ev.events |= EPOLLOUT;
794 else
795 ev.events |= EPOLLIN;
796
797 // Read flags
798 int flags = fcntl(fd, F_GETFL, 0);
799
800 // Set modified flags
801 if (fcntl(fd, F_SETFL, flags|O_NONBLOCK) < 0) {
802 ERROR(jail->pakfire,
803 "Could not set file descriptor %d into non-blocking mode: %m\n", fd);
804 r = 1;
805 goto ERROR;
806 }
807
808 ev.data.fd = fd;
809
810 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
811 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
812 r = 1;
813 goto ERROR;
814 }
815 }
816
817 int ended = 0;
818
819 // Loop for as long as the process is alive
820 while (!ended) {
821 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
822 if (num < 1) {
823 // Ignore if epoll_wait() has been interrupted
824 if (errno == EINTR)
825 continue;
826
827 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
828 r = 1;
829
830 goto ERROR;
831 }
832
833 for (int i = 0; i < num; i++) {
834 int e = events[i].events;
835 int fd = events[i].data.fd;
836
837 struct pakfire_log_buffer* buffer = NULL;
838 pakfire_jail_communicate_out callback = NULL;
839 void* data = NULL;
840 int priority;
841
842 // Check if there is any data to be read
843 if (e & EPOLLIN) {
844 // Handle any changes to the PIDFD
845 if (fd == pidfd) {
846 // Call waidid() and store the result
847 r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
848 if (r) {
849 ERROR(jail->pakfire, "waitid() failed: %m\n");
850 goto ERROR;
851 }
852
853 // Mark that we have ended so that we will process the remaining
854 // events from epoll() now, but won't restart the outer loop.
855 ended = 1;
856 continue;
857
858 // Handle timer events
859 } else if (fd == timerfd) {
860 DEBUG(jail->pakfire, "Timer event received\n");
861
862 // Disarm the timer
863 r = read(timerfd, garbage, sizeof(garbage));
864 if (r < 1) {
865 ERROR(jail->pakfire, "Could not disarm timer: %m\n");
866 r = 1;
867 goto ERROR;
868 }
869
870 // Terminate the process if it hasn't already ended
871 if (!ended) {
872 DEBUG(jail->pakfire, "Terminating process...\n");
873
874 // Send SIGTERM to the process
875 r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
876 if (r) {
877 ERROR(jail->pakfire, "Could not kill process: %m\n");
878 goto ERROR;
879 }
880 }
881
882 // There is nothing else to do
883 continue;
884
885 // Handle signals
886 } else if (fd == signalfd) {
887 // Read the signal
888 r = read(signalfd, &siginfo, sizeof(siginfo));
889 if (r < 1) {
890 ERROR(jail->pakfire, "Could not read signal: %m\n");
891 goto ERROR;
892 }
893
894 DEBUG(jail->pakfire, "Received signal %d\n", siginfo.ssi_signo);
895
896 // Handle signals
897 switch (siginfo.ssi_signo) {
898 // Pass SIGINT down to the child process
899 case SIGINT:
900 r = pidfd_send_signal(pidfd, siginfo.ssi_signo, NULL, 0);
901 if (r) {
902 ERROR(jail->pakfire, "Could not send signal to process: %m\n");
903 goto ERROR;
904 }
905 break;
906
907 default:
908 ERROR(jail->pakfire, "Received unhandled signal %d\n",
909 siginfo.ssi_signo);
910 break;
911 }
912
913 // Don't fall through to log processing
914 continue;
915
916 // Handle logging messages
917 } else if (fd == log_INFO) {
918 buffer = &ctx->buffers.log_INFO;
919 priority = LOG_INFO;
920
921 callback = jail->callbacks.log;
922 data = jail->callbacks.log_data;
923
924 } else if (fd == log_ERROR) {
925 buffer = &ctx->buffers.log_ERROR;
926 priority = LOG_ERR;
927
928 callback = jail->callbacks.log;
929 data = jail->callbacks.log_data;
930
931 } else if (fd == log_DEBUG) {
932 buffer = &ctx->buffers.log_DEBUG;
933 priority = LOG_DEBUG;
934
935 callback = jail->callbacks.log;
936 data = jail->callbacks.log_data;
937
938 // Handle anything from the log pipes
939 } else if (fd == stdout) {
940 buffer = &ctx->buffers.stdout;
941 priority = LOG_INFO;
942
943 // Send any output to the default logger if no callback is set
944 if (ctx->communicate.out) {
945 callback = ctx->communicate.out;
946 data = ctx->communicate.data;
947 } else {
948 callback = jail->callbacks.log;
949 data = jail->callbacks.log_data;
950 }
951
952 } else if (fd == stderr) {
953 buffer = &ctx->buffers.stderr;
954 priority = LOG_ERR;
955
956 // Send any output to the default logger if no callback is set
957 if (ctx->communicate.out) {
958 callback = ctx->communicate.out;
959 data = ctx->communicate.data;
960 } else {
961 callback = jail->callbacks.log;
962 data = jail->callbacks.log_data;
963 }
964
965 } else {
966 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
967 continue;
968 }
969
970 // Handle log event
971 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
972 if (r)
973 goto ERROR;
974 }
975
976 if (e & EPOLLOUT) {
977 // Handle standard input
978 if (fd == stdin) {
979 r = pakfire_jail_stream_stdin(jail, ctx, fd);
980 if (r) {
981 switch (errno) {
982 // Ignore if we filled up the buffer
983 case EAGAIN:
984 break;
985
986 default:
987 ERROR(jail->pakfire, "Could not write to stdin: %m\n");
988 goto ERROR;
989 }
990 }
991 }
992 }
993
994 // Check if any file descriptors have been closed
995 if (e & EPOLLHUP) {
996 // Remove the file descriptor
997 r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
998 if (r) {
999 ERROR(jail->pakfire, "Could not remove closed file-descriptor %d: %m\n", fd);
1000 goto ERROR;
1001 }
1002 }
1003 }
1004 }
1005
1006 ERROR:
1007 if (epollfd > 0)
1008 close(epollfd);
1009 if (timerfd > 0)
1010 close(timerfd);
1011 if (signalfd > 0)
1012 close(signalfd);
1013
1014 return r;
1015 }
1016
1017 int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data,
1018 int priority, const char* line, size_t length) {
1019 char** output = (char**)data;
1020 int r;
1021
1022 // Append everything from stdout to a buffer
1023 if (output && priority == LOG_INFO) {
1024 r = asprintf(output, "%s%s", (output && *output) ? *output : "", line);
1025 if (r < 0)
1026 return 1;
1027 return 0;
1028 }
1029
1030 // Send everything else to the default logger
1031 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
1032 }
1033
1034 // Capabilities
1035
1036 // Logs all capabilities of the current process
1037 static int pakfire_jail_show_capabilities(struct pakfire_jail* jail) {
1038 cap_t caps = NULL;
1039 char* name = NULL;
1040 cap_flag_value_t value_e;
1041 cap_flag_value_t value_i;
1042 cap_flag_value_t value_p;
1043 int r;
1044
1045 // Fetch PID
1046 pid_t pid = getpid();
1047
1048 // Fetch all capabilities
1049 caps = cap_get_proc();
1050 if (!caps) {
1051 ERROR(jail->pakfire, "Could not fetch capabilities: %m\n");
1052 r = 1;
1053 goto ERROR;
1054 }
1055
1056 DEBUG(jail->pakfire, "Capabilities of PID %d:\n", pid);
1057
1058 // Iterate over all capabilities
1059 for (unsigned int cap = 0; cap_valid(cap); cap++) {
1060 name = cap_to_name(cap);
1061
1062 // Fetch effective value
1063 r = cap_get_flag(caps, cap, CAP_EFFECTIVE, &value_e);
1064 if (r)
1065 goto ERROR;
1066
1067 // Fetch inheritable value
1068 r = cap_get_flag(caps, cap, CAP_INHERITABLE, &value_i);
1069 if (r)
1070 goto ERROR;
1071
1072 // Fetch permitted value
1073 r = cap_get_flag(caps, cap, CAP_PERMITTED, &value_p);
1074 if (r)
1075 goto ERROR;
1076
1077 DEBUG(jail->pakfire,
1078 " %-24s : %c%c%c\n",
1079 name,
1080 (value_e == CAP_SET) ? 'e' : '-',
1081 (value_i == CAP_SET) ? 'i' : '-',
1082 (value_p == CAP_SET) ? 'p' : '-'
1083 );
1084
1085 // Free name
1086 cap_free(name);
1087 name = NULL;
1088 }
1089
1090 // Success
1091 r = 0;
1092
1093 ERROR:
1094 if (name)
1095 cap_free(name);
1096 if (caps)
1097 cap_free(caps);
1098
1099 return r;
1100 }
1101
1102 static int pakfire_jail_set_capabilities(struct pakfire_jail* jail) {
1103 cap_t caps = NULL;
1104 char* name = NULL;
1105 int r;
1106
1107 // Fetch capabilities
1108 caps = cap_get_proc();
1109 if (!caps) {
1110 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
1111 r = 1;
1112 goto ERROR;
1113 }
1114
1115 // Walk through all capabilities
1116 for (cap_value_t cap = 0; cap_valid(cap); cap++) {
1117 cap_value_t _caps[] = { cap };
1118
1119 // Fetch the name of the capability
1120 name = cap_to_name(cap);
1121
1122 r = cap_set_flag(caps, CAP_EFFECTIVE, 1, _caps, CAP_SET);
1123 if (r) {
1124 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1125 goto ERROR;
1126 }
1127
1128 r = cap_set_flag(caps, CAP_INHERITABLE, 1, _caps, CAP_SET);
1129 if (r) {
1130 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1131 goto ERROR;
1132 }
1133
1134 r = cap_set_flag(caps, CAP_PERMITTED, 1, _caps, CAP_SET);
1135 if (r) {
1136 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1137 goto ERROR;
1138 }
1139
1140 // Free name
1141 cap_free(name);
1142 name = NULL;
1143 }
1144
1145 // Restore all capabilities
1146 r = cap_set_proc(caps);
1147 if (r) {
1148 ERROR(jail->pakfire, "Restoring capabilities failed: %m\n");
1149 goto ERROR;
1150 }
1151
1152 // Add all capabilities to the ambient set
1153 for (unsigned int cap = 0; cap_valid(cap); cap++) {
1154 name = cap_to_name(cap);
1155
1156 // Raise the capability
1157 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0);
1158 if (r) {
1159 ERROR(jail->pakfire, "Could not set ambient capability %s: %m\n", name);
1160 goto ERROR;
1161 }
1162
1163 // Free name
1164 cap_free(name);
1165 name = NULL;
1166 }
1167
1168 // Success
1169 r = 0;
1170
1171 ERROR:
1172 if (name)
1173 cap_free(name);
1174 if (caps)
1175 cap_free(caps);
1176
1177 return r;
1178 }
1179
1180 // Syscall Filter
1181
1182 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
1183 const int syscalls[] = {
1184 // The kernel's keyring isn't namespaced
1185 SCMP_SYS(keyctl),
1186 SCMP_SYS(add_key),
1187 SCMP_SYS(request_key),
1188
1189 // Disable userfaultfd
1190 SCMP_SYS(userfaultfd),
1191
1192 // Disable perf which could leak a lot of information about the host
1193 SCMP_SYS(perf_event_open),
1194
1195 0,
1196 };
1197 int r = 1;
1198
1199 DEBUG(jail->pakfire, "Applying syscall filter...\n");
1200
1201 // Setup a syscall filter which allows everything by default
1202 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
1203 if (!ctx) {
1204 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
1205 goto ERROR;
1206 }
1207
1208 // All all syscalls
1209 for (const int* syscall = syscalls; *syscall; syscall++) {
1210 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
1211 if (r) {
1212 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
1213 goto ERROR;
1214 }
1215 }
1216
1217 // Load syscall filter into the kernel
1218 r = seccomp_load(ctx);
1219 if (r) {
1220 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
1221 goto ERROR;
1222 }
1223
1224 ERROR:
1225 if (ctx)
1226 seccomp_release(ctx);
1227
1228 return r;
1229 }
1230
1231 // Mountpoints
1232
1233 PAKFIRE_EXPORT int pakfire_jail_bind(struct pakfire_jail* jail,
1234 const char* source, const char* target, int flags) {
1235 struct pakfire_jail_mountpoint* mp = NULL;
1236 int r;
1237
1238 // Check if there is any space left
1239 if (jail->num_mountpoints >= MAX_MOUNTPOINTS) {
1240 errno = ENOSPC;
1241 return 1;
1242 }
1243
1244 // Check for valid inputs
1245 if (!source || !target) {
1246 errno = EINVAL;
1247 return 1;
1248 }
1249
1250 // Select the next free slot
1251 mp = &jail->mountpoints[jail->num_mountpoints];
1252
1253 // Copy source
1254 r = pakfire_string_set(mp->source, source);
1255 if (r) {
1256 ERROR(jail->pakfire, "Could not copy source: %m\n");
1257 return r;
1258 }
1259
1260 // Copy target
1261 r = pakfire_string_set(mp->target, target);
1262 if (r) {
1263 ERROR(jail->pakfire, "Could not copy target: %m\n");
1264 return r;
1265 }
1266
1267 // Copy flags
1268 mp->flags = flags;
1269
1270 // Increment counter
1271 jail->num_mountpoints++;
1272
1273 return 0;
1274 }
1275
1276 static int pakfire_jail_mount_networking(struct pakfire_jail* jail) {
1277 int r;
1278
1279 const char* paths[] = {
1280 "/etc/hosts",
1281 "/etc/resolv.conf",
1282 NULL,
1283 };
1284
1285 // Bind-mount all paths read-only
1286 for (const char** path = paths; *path; path++) {
1287 r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
1288 if (r) {
1289 switch (errno) {
1290 // Ignore if we don't have permission
1291 case EPERM:
1292 continue;
1293
1294 default:
1295 break;
1296 }
1297 return r;
1298 }
1299 }
1300
1301 return 0;
1302 }
1303
1304 /*
1305 Mounts everything that we require in the new namespace
1306 */
1307 static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1308 struct pakfire_jail_mountpoint* mp = NULL;
1309 int flags = 0;
1310 int r;
1311
1312 // Enable loop devices
1313 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_LOOP_DEVICES))
1314 flags |= PAKFIRE_MOUNT_LOOP_DEVICES;
1315
1316 // Mount all default stuff
1317 r = pakfire_mount_all(jail->pakfire, flags);
1318 if (r)
1319 return r;
1320
1321 // Mount networking stuff
1322 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1323 r = pakfire_jail_mount_networking(jail);
1324 if (r)
1325 return r;
1326 }
1327
1328 // Mount all custom stuff
1329 for (unsigned int i = 0; i < jail->num_mountpoints; i++) {
1330 // Fetch mountpoint
1331 mp = &jail->mountpoints[i];
1332
1333 // Mount it
1334 r = pakfire_bind(jail->pakfire, mp->source, mp->target, mp->flags);
1335 if (r)
1336 return r;
1337 }
1338
1339 // Log all mountpoints
1340 pakfire_mount_list(jail->pakfire);
1341
1342 return 0;
1343 }
1344
1345 // Networking
1346
1347 static int pakfire_jail_setup_loopback(struct pakfire_jail* jail) {
1348 struct nl_sock* nl = NULL;
1349 struct nl_cache* cache = NULL;
1350 struct rtnl_link* link = NULL;
1351 struct rtnl_link* change = NULL;
1352 int r;
1353
1354 DEBUG(jail->pakfire, "Setting up loopback...\n");
1355
1356 // Allocate a netlink socket
1357 nl = nl_socket_alloc();
1358 if (!nl) {
1359 ERROR(jail->pakfire, "Could not allocate a netlink socket: %m\n");
1360 r = 1;
1361 goto ERROR;
1362 }
1363
1364 // Connect the socket
1365 r = nl_connect(nl, NETLINK_ROUTE);
1366 if (r) {
1367 ERROR(jail->pakfire, "Could not connect netlink socket: %s\n", nl_geterror(r));
1368 goto ERROR;
1369 }
1370
1371 // Allocate the netlink cache
1372 r = rtnl_link_alloc_cache(nl, AF_UNSPEC, &cache);
1373 if (r < 0) {
1374 ERROR(jail->pakfire, "Unable to allocate netlink cache: %s\n", nl_geterror(r));
1375 goto ERROR;
1376 }
1377
1378 // Fetch loopback interface
1379 link = rtnl_link_get_by_name(cache, "lo");
1380 if (!link) {
1381 ERROR(jail->pakfire, "Could not find lo interface. Ignoring.\n");
1382 r = 0;
1383 goto ERROR;
1384 }
1385
1386 // Allocate a new link
1387 change = rtnl_link_alloc();
1388 if (!change) {
1389 ERROR(jail->pakfire, "Could not allocate change link\n");
1390 r = 1;
1391 goto ERROR;
1392 }
1393
1394 // Set the link to UP
1395 rtnl_link_set_flags(change, IFF_UP);
1396
1397 // Apply any changes
1398 r = rtnl_link_change(nl, link, change, 0);
1399 if (r) {
1400 ERROR(jail->pakfire, "Unable to activate loopback: %s\n", nl_geterror(r));
1401 goto ERROR;
1402 }
1403
1404 // Success
1405 r = 0;
1406
1407 ERROR:
1408 if (nl)
1409 nl_socket_free(nl);
1410
1411 return r;
1412 }
1413
1414 // UID/GID Mapping
1415
1416 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
1417 char path[PATH_MAX];
1418 int r;
1419
1420 // Skip mapping anything when running on /
1421 if (pakfire_on_root(jail->pakfire))
1422 return 0;
1423
1424 // Make path
1425 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
1426 if (r)
1427 return r;
1428
1429 // Fetch UID
1430 const uid_t uid = pakfire_uid(jail->pakfire);
1431
1432 // Fetch SUBUID
1433 const struct pakfire_subid* subuid = pakfire_subuid(jail->pakfire);
1434 if (!subuid)
1435 return 1;
1436
1437 /* When running as root, we will map the entire range.
1438
1439 When running as a non-privileged user, we will map the root user inside the jail
1440 to the user's UID outside of the jail, and we will map the rest starting from one.
1441 */
1442
1443 // Running as root
1444 if (uid == 0) {
1445 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1446 "0 %lu %lu\n", subuid->id, subuid->length);
1447 } else {
1448 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1449 "0 %lu 1\n1 %lu %lu\n", uid, subuid->id, subuid->length);
1450 }
1451
1452 if (r) {
1453 ERROR(jail->pakfire, "Could not map UIDs: %m\n");
1454 return r;
1455 }
1456
1457 return r;
1458 }
1459
1460 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
1461 char path[PATH_MAX];
1462 int r;
1463
1464 // Skip mapping anything when running on /
1465 if (pakfire_on_root(jail->pakfire))
1466 return 0;
1467
1468 // Fetch GID
1469 const gid_t gid = pakfire_gid(jail->pakfire);
1470
1471 // Fetch SUBGID
1472 const struct pakfire_subid* subgid = pakfire_subgid(jail->pakfire);
1473 if (!subgid)
1474 return 1;
1475
1476 // Make path
1477 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
1478 if (r)
1479 return r;
1480
1481 // Running as root
1482 if (gid == 0) {
1483 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1484 "0 %lu %lu\n", subgid->id, subgid->length);
1485 } else {
1486 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1487 "0 %lu 1\n%1 %lu %lu\n", gid, subgid->id, subgid->length);
1488 }
1489
1490 if (r) {
1491 ERROR(jail->pakfire, "Could not map GIDs: %m\n");
1492 return r;
1493 }
1494
1495 return r;
1496 }
1497
1498 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
1499 char path[PATH_MAX];
1500 int r = 1;
1501
1502 // Make path
1503 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
1504 if (r)
1505 return r;
1506
1507 // Open file for writing
1508 FILE* f = fopen(path, "w");
1509 if (!f) {
1510 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
1511 goto ERROR;
1512 }
1513
1514 // Write content
1515 int bytes_written = fprintf(f, "deny\n");
1516 if (bytes_written <= 0) {
1517 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
1518 goto ERROR;
1519 }
1520
1521 r = fclose(f);
1522 f = NULL;
1523 if (r) {
1524 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
1525 goto ERROR;
1526 }
1527
1528 ERROR:
1529 if (f)
1530 fclose(f);
1531
1532 return r;
1533 }
1534
1535 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1536 const uint64_t val = 1;
1537 int r = 0;
1538
1539 DEBUG(jail->pakfire, "Sending signal...\n");
1540
1541 // Write to the file descriptor
1542 ssize_t bytes_written = write(fd, &val, sizeof(val));
1543 if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
1544 ERROR(jail->pakfire, "Could not send signal: %m\n");
1545 r = 1;
1546 }
1547
1548 // Close the file descriptor
1549 close(fd);
1550
1551 return r;
1552 }
1553
1554 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1555 uint64_t val = 0;
1556 int r = 0;
1557
1558 DEBUG(jail->pakfire, "Waiting for signal...\n");
1559
1560 ssize_t bytes_read = read(fd, &val, sizeof(val));
1561 if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
1562 ERROR(jail->pakfire, "Error waiting for signal: %m\n");
1563 r = 1;
1564 }
1565
1566 // Close the file descriptor
1567 close(fd);
1568
1569 return r;
1570 }
1571
1572 /*
1573 Performs the initialisation that needs to happen in the parent part
1574 */
1575 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1576 int r;
1577
1578 // Setup UID mapping
1579 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
1580 if (r)
1581 return r;
1582
1583 // Write "deny" to /proc/PID/setgroups
1584 r = pakfire_jail_setgroups(jail, ctx->pid);
1585 if (r)
1586 return r;
1587
1588 // Setup GID mapping
1589 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
1590 if (r)
1591 return r;
1592
1593 // Parent has finished initialisation
1594 DEBUG(jail->pakfire, "Parent has finished initialization\n");
1595
1596 // Send signal to client
1597 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1598 if (r)
1599 return r;
1600
1601 return 0;
1602 }
1603
1604 static int pakfire_jail_switch_root(struct pakfire_jail* jail, const char* root) {
1605 int r;
1606
1607 // Change to the new root
1608 r = chdir(root);
1609 if (r) {
1610 ERROR(jail->pakfire, "chdir(%s) failed: %m\n", root);
1611 return r;
1612 }
1613
1614 // Switch Root!
1615 r = pivot_root(".", ".");
1616 if (r) {
1617 ERROR(jail->pakfire, "Failed changing into the new root directory %s: %m\n", root);
1618 return r;
1619 }
1620
1621 // Umount the old root
1622 r = umount2(".", MNT_DETACH);
1623 if (r) {
1624 ERROR(jail->pakfire, "Could not umount the old root filesystem: %m\n");
1625 return r;
1626 }
1627
1628 return 0;
1629 }
1630
1631 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
1632 const char* argv[]) {
1633 int r;
1634
1635 // Redirect any logging to our log pipe
1636 pakfire_set_log_callback(jail->pakfire, pakfire_jail_log, &ctx->pipes);
1637
1638 // Fetch my own PID
1639 pid_t pid = getpid();
1640
1641 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
1642
1643 // Wait for the parent to finish initialization
1644 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1645 if (r)
1646 return r;
1647
1648 // Die with parent
1649 r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
1650 if (r) {
1651 ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
1652 return 126;
1653 }
1654
1655 // Make this process dumpable
1656 r = prctl (PR_SET_DUMPABLE, 1, 0, 0, 0);
1657 if (r) {
1658 ERROR(jail->pakfire, "Could not make the process dumpable: %m\n");
1659 return 126;
1660 }
1661
1662 // Don't drop any capabilities on setuid()
1663 r = prctl(PR_SET_KEEPCAPS, 1);
1664 if (r) {
1665 ERROR(jail->pakfire, "Could not set PR_SET_KEEPCAPS: %m\n");
1666 return 126;
1667 }
1668
1669 // Fetch UID/GID
1670 uid_t uid = getuid();
1671 gid_t gid = getgid();
1672
1673 // Fetch EUID/EGID
1674 uid_t euid = geteuid();
1675 gid_t egid = getegid();
1676
1677 DEBUG(jail->pakfire, " UID: %d (effective %d)\n", uid, euid);
1678 DEBUG(jail->pakfire, " GID: %d (effective %d)\n", gid, egid);
1679
1680 // Check if we are (effectively running as root)
1681 if (uid || gid || euid || egid) {
1682 ERROR(jail->pakfire, "Child process is not running as root\n");
1683 return 126;
1684 }
1685
1686 const char* root = pakfire_get_path(jail->pakfire);
1687 const char* arch = pakfire_get_effective_arch(jail->pakfire);
1688
1689 // Change mount propagation to slave to receive anything from the parent namespace
1690 r = pakfire_mount_change_propagation(jail->pakfire, MS_SLAVE, "/");
1691 if (r)
1692 return r;
1693
1694 // Make root a mountpoint in the new mount namespace
1695 r = pakfire_mount_make_mounpoint(jail->pakfire, root);
1696 if (r)
1697 return r;
1698
1699 // Change mount propagation to private
1700 r = pakfire_mount_change_propagation(jail->pakfire, MS_PRIVATE, root);
1701 if (r)
1702 return r;
1703
1704 // Change root (unless root is /)
1705 if (!pakfire_on_root(jail->pakfire)) {
1706 // Mount everything
1707 r = pakfire_jail_mount(jail, ctx);
1708 if (r)
1709 return r;
1710
1711 // chroot()
1712 r = pakfire_jail_switch_root(jail, root);
1713 if (r)
1714 return r;
1715 }
1716
1717 // Set personality
1718 unsigned long persona = pakfire_arch_personality(arch);
1719 if (persona) {
1720 r = personality(persona);
1721 if (r < 0) {
1722 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1723 return 1;
1724 }
1725 }
1726
1727 // Setup networking
1728 if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1729 r = pakfire_jail_setup_loopback(jail);
1730 if (r)
1731 return 1;
1732 }
1733
1734 // Set nice level
1735 if (jail->nice) {
1736 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1737
1738 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1739 if (r) {
1740 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1741 return 1;
1742 }
1743 }
1744
1745 // Close other end of log pipes
1746 close(ctx->pipes.log_INFO[0]);
1747 close(ctx->pipes.log_ERROR[0]);
1748 #ifdef ENABLE_DEBUG
1749 close(ctx->pipes.log_DEBUG[0]);
1750 #endif /* ENABLE_DEBUG */
1751
1752 // Connect standard input
1753 if (ctx->pipes.stdin[0] >= 0) {
1754 r = dup2(ctx->pipes.stdin[0], STDIN_FILENO);
1755 if (r < 0) {
1756 ERROR(jail->pakfire, "Could not connect fd %d to stdin: %m\n",
1757 ctx->pipes.stdin[0]);
1758
1759 return 1;
1760 }
1761 }
1762
1763 // Connect standard output and error
1764 if (ctx->pipes.stdout[1] >= 0 && ctx->pipes.stderr[1] >= 0) {
1765 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1766 if (r < 0) {
1767 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1768 ctx->pipes.stdout[1]);
1769
1770 return 1;
1771 }
1772
1773 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1774 if (r < 0) {
1775 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1776 ctx->pipes.stderr[1]);
1777
1778 return 1;
1779 }
1780
1781 // Close the pipe (as we have moved the original file descriptors)
1782 pakfire_jail_close_pipe(jail, ctx->pipes.stdin);
1783 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1784 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
1785 }
1786
1787 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1788 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1789 if (r)
1790 return r;
1791
1792 // Set capabilities
1793 r = pakfire_jail_set_capabilities(jail);
1794 if (r)
1795 return r;
1796
1797 // Show capabilities
1798 r = pakfire_jail_show_capabilities(jail);
1799 if (r)
1800 return r;
1801
1802 // Filter syscalls
1803 r = pakfire_jail_limit_syscalls(jail);
1804 if (r)
1805 return r;
1806
1807 DEBUG(jail->pakfire, "Child process initialization done\n");
1808 DEBUG(jail->pakfire, "Launching command:\n");
1809
1810 // Log argv
1811 for (unsigned int i = 0; argv[i]; i++)
1812 DEBUG(jail->pakfire, " argv[%d] = %s\n", i, argv[i]);
1813
1814 // exec() command
1815 r = execvpe(argv[0], (char**)argv, jail->env);
1816 if (r < 0) {
1817 // Translate errno into regular exit code
1818 switch (errno) {
1819 case ENOENT:
1820 // Ignore if the command doesn't exist
1821 if (ctx->flags & PAKFIRE_JAIL_NOENT_OK)
1822 r = 0;
1823 else
1824 r = 127;
1825
1826 break;
1827
1828 default:
1829 r = 1;
1830 }
1831
1832 ERROR(jail->pakfire, "Could not execve(%s): %m\n", argv[0]);
1833 }
1834
1835 // We should not get here
1836 return r;
1837 }
1838
1839 // Run a command in the jail
1840 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
1841 const int interactive,
1842 pakfire_jail_communicate_in communicate_in,
1843 pakfire_jail_communicate_out communicate_out,
1844 void* data, int flags) {
1845 int exit = -1;
1846 int r;
1847
1848 // Check if argv is valid
1849 if (!argv || !argv[0]) {
1850 errno = EINVAL;
1851 return -1;
1852 }
1853
1854 // Initialize context for this call
1855 struct pakfire_jail_exec ctx = {
1856 .flags = flags,
1857
1858 .pipes = {
1859 .stdin = { -1, -1 },
1860 .stdout = { -1, -1 },
1861 .stderr = { -1, -1 },
1862 },
1863
1864 .communicate = {
1865 .in = communicate_in,
1866 .out = communicate_out,
1867 .data = data,
1868 },
1869
1870 .pidfd = -1,
1871 };
1872
1873 DEBUG(jail->pakfire, "Executing jail...\n");
1874
1875 // Enable networking in interactive mode
1876 if (interactive)
1877 ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
1878
1879 /*
1880 Setup a file descriptor which can be used to notify the client that the parent
1881 has completed configuration.
1882 */
1883 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1884 if (ctx.completed_fd < 0) {
1885 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1886 return -1;
1887 }
1888
1889 // Create pipes to communicate with child process if we are not running interactively
1890 if (!interactive) {
1891 // stdin (only if callback is set)
1892 if (ctx.communicate.in) {
1893 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdin, 0);
1894 if (r)
1895 goto ERROR;
1896 }
1897
1898 // stdout
1899 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
1900 if (r)
1901 goto ERROR;
1902
1903 // stderr
1904 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
1905 if (r)
1906 goto ERROR;
1907 }
1908
1909 // Setup pipes for logging
1910 // INFO
1911 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
1912 if (r)
1913 goto ERROR;
1914
1915 // ERROR
1916 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
1917 if (r)
1918 goto ERROR;
1919
1920 #ifdef ENABLE_DEBUG
1921 // DEBUG
1922 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
1923 if (r)
1924 goto ERROR;
1925 #endif /* ENABLE_DEBUG */
1926
1927 // Configure child process
1928 struct clone_args args = {
1929 .flags =
1930 CLONE_NEWCGROUP |
1931 CLONE_NEWIPC |
1932 CLONE_NEWNS |
1933 CLONE_NEWPID |
1934 CLONE_NEWTIME |
1935 CLONE_NEWUSER |
1936 CLONE_NEWUTS |
1937 CLONE_PIDFD,
1938 .exit_signal = SIGCHLD,
1939 .pidfd = (long long unsigned int)&ctx.pidfd,
1940 };
1941
1942 // Launch the process in a cgroup that is a leaf of the configured cgroup
1943 if (jail->cgroup) {
1944 args.flags |= CLONE_INTO_CGROUP;
1945
1946 // Fetch our UUID
1947 const char* uuid = pakfire_jail_uuid(jail);
1948
1949 // Create a temporary cgroup
1950 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, uuid, 0);
1951 if (r) {
1952 ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
1953 goto ERROR;
1954 }
1955
1956 // Clone into this cgroup
1957 args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
1958 }
1959
1960 // Setup networking
1961 if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1962 args.flags |= CLONE_NEWNET;
1963 }
1964
1965 // Fork this process
1966 ctx.pid = clone3(&args, sizeof(args));
1967 if (ctx.pid < 0) {
1968 ERROR(jail->pakfire, "Could not clone: %m\n");
1969 return -1;
1970
1971 // Child process
1972 } else if (ctx.pid == 0) {
1973 r = pakfire_jail_child(jail, &ctx, argv);
1974 _exit(r);
1975 }
1976
1977 // Parent process
1978 r = pakfire_jail_parent(jail, &ctx);
1979 if (r)
1980 goto ERROR;
1981
1982 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
1983
1984 // Read output of the child process
1985 r = pakfire_jail_wait(jail, &ctx);
1986 if (r)
1987 goto ERROR;
1988
1989 // Handle exit status
1990 switch (ctx.status.si_code) {
1991 case CLD_EXITED:
1992 DEBUG(jail->pakfire, "The child process exited with code %d\n",
1993 ctx.status.si_status);
1994
1995 // Pass exit code
1996 exit = ctx.status.si_status;
1997 break;
1998
1999 case CLD_KILLED:
2000 ERROR(jail->pakfire, "The child process was killed\n");
2001 exit = 139;
2002 break;
2003
2004 case CLD_DUMPED:
2005 ERROR(jail->pakfire, "The child process terminated abnormally\n");
2006 break;
2007
2008 // Log anything else
2009 default:
2010 ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
2011 break;
2012 }
2013
2014 ERROR:
2015 // Destroy the temporary cgroup (if any)
2016 if (ctx.cgroup) {
2017 // Read cgroup stats
2018 r = pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
2019 if (r) {
2020 ERROR(jail->pakfire, "Could not read cgroup stats: %m\n");
2021 } else {
2022 pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
2023 }
2024
2025 pakfire_cgroup_destroy(ctx.cgroup);
2026 pakfire_cgroup_unref(ctx.cgroup);
2027 }
2028
2029 // Close any file descriptors
2030 pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
2031 pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
2032 pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
2033 if (ctx.pidfd)
2034 close(ctx.pidfd);
2035 pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
2036 pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
2037 pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
2038
2039 return exit;
2040 }
2041
2042 PAKFIRE_EXPORT int pakfire_jail_exec(
2043 struct pakfire_jail* jail,
2044 const char* argv[],
2045 pakfire_jail_communicate_in callback_in,
2046 pakfire_jail_communicate_out callback_out,
2047 void* data, int flags) {
2048 return __pakfire_jail_exec(jail, argv, 0, callback_in, callback_out, data, flags);
2049 }
2050
2051 static int pakfire_jail_exec_interactive(
2052 struct pakfire_jail* jail, const char* argv[], int flags) {
2053 int r;
2054
2055 // Setup interactive stuff
2056 r = pakfire_jail_setup_interactive_env(jail);
2057 if (r)
2058 return r;
2059
2060 return __pakfire_jail_exec(jail, argv, 1, NULL, NULL, NULL, flags);
2061 }
2062
2063 int pakfire_jail_exec_script(struct pakfire_jail* jail,
2064 const char* script,
2065 const size_t size,
2066 const char* args[],
2067 pakfire_jail_communicate_in callback_in,
2068 pakfire_jail_communicate_out callback_out,
2069 void* data) {
2070 char path[PATH_MAX];
2071 const char** argv = NULL;
2072 FILE* f = NULL;
2073 int r;
2074
2075 const char* root = pakfire_get_path(jail->pakfire);
2076
2077 // Write the scriptlet to disk
2078 r = pakfire_path_join(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
2079 if (r)
2080 goto ERROR;
2081
2082 // Create a temporary file
2083 f = pakfire_mktemp(path, 0700);
2084 if (!f) {
2085 ERROR(jail->pakfire, "Could not create temporary file: %m\n");
2086 goto ERROR;
2087 }
2088
2089 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
2090
2091 // Write data
2092 r = fprintf(f, "%s", script);
2093 if (r < 0) {
2094 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
2095 goto ERROR;
2096 }
2097
2098 // Close file
2099 r = fclose(f);
2100 if (r) {
2101 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
2102 goto ERROR;
2103 }
2104
2105 f = NULL;
2106
2107 // Count how many arguments were passed
2108 unsigned int argc = 1;
2109 if (args) {
2110 for (const char** arg = args; *arg; arg++)
2111 argc++;
2112 }
2113
2114 argv = calloc(argc + 1, sizeof(*argv));
2115 if (!argv) {
2116 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
2117 goto ERROR;
2118 }
2119
2120 // Set command
2121 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
2122
2123 // Copy args
2124 for (unsigned int i = 1; i < argc; i++)
2125 argv[i] = args[i-1];
2126
2127 // Run the script
2128 r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data, 0);
2129
2130 ERROR:
2131 if (argv)
2132 free(argv);
2133 if (f)
2134 fclose(f);
2135
2136 // Remove script from disk
2137 if (*path)
2138 unlink(path);
2139
2140 return r;
2141 }
2142
2143 /*
2144 A convenience function that creates a new jail, runs the given command and destroys
2145 the jail again.
2146 */
2147 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
2148 struct pakfire_jail* jail = NULL;
2149 int r;
2150
2151 // Create a new jail
2152 r = pakfire_jail_create(&jail, pakfire);
2153 if (r)
2154 goto ERROR;
2155
2156 // Execute the command
2157 r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output, 0);
2158
2159 ERROR:
2160 if (jail)
2161 pakfire_jail_unref(jail);
2162
2163 return r;
2164 }
2165
2166 int pakfire_jail_run_script(struct pakfire* pakfire,
2167 const char* script, const size_t length, const char* argv[], int flags) {
2168 struct pakfire_jail* jail = NULL;
2169 int r;
2170
2171 // Create a new jail
2172 r = pakfire_jail_create(&jail, pakfire);
2173 if (r)
2174 goto ERROR;
2175
2176 // Execute the command
2177 r = pakfire_jail_exec_script(jail, script, length, argv, NULL, NULL, NULL);
2178
2179 ERROR:
2180 if (jail)
2181 pakfire_jail_unref(jail);
2182
2183 return r;
2184 }
2185
2186 int pakfire_jail_shell(struct pakfire_jail* jail) {
2187 int r;
2188
2189 const char* argv[] = {
2190 "/bin/bash", "--login", NULL,
2191 };
2192
2193 // Execute /bin/bash
2194 r = pakfire_jail_exec_interactive(jail, argv, 0);
2195
2196 // Raise any errors
2197 if (r < 0)
2198 return r;
2199
2200 // Ignore any return codes from the shell
2201 return 0;
2202 }
2203
2204 static int pakfire_jail_run_if_possible(struct pakfire* pakfire, const char** argv) {
2205 char path[PATH_MAX];
2206 int r;
2207
2208 r = pakfire_path(pakfire, path, "%s", *argv);
2209 if (r)
2210 return r;
2211
2212 // Check if the file is executable
2213 r = access(path, X_OK);
2214 if (r) {
2215 DEBUG(pakfire, "%s is not executable. Skipping...\n", *argv);
2216 return 0;
2217 }
2218
2219 return pakfire_jail_run(pakfire, argv, 0, NULL);
2220 }
2221
2222 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
2223 const char* argv[] = {
2224 "/sbin/ldconfig",
2225 NULL,
2226 };
2227
2228 return pakfire_jail_run_if_possible(pakfire, argv);
2229 }
2230
2231 int pakfire_jail_run_systemd_tmpfiles(struct pakfire* pakfire) {
2232 const char* argv[] = {
2233 "/usr/bin/systemd-tmpfiles",
2234 "--create",
2235 NULL,
2236 };
2237
2238 return pakfire_jail_run_if_possible(pakfire, argv);
2239 }