]> git.ipfire.org Git - people/ms/pakfire.git/blob - src/libpakfire/jail.c
jail: Handle signals in epoll() loop
[people/ms/pakfire.git] / src / libpakfire / jail.c
1 /*#############################################################################
2 # #
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
5 # #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
10 # #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
15 # #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
18 # #
19 #############################################################################*/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <linux/capability.h>
24 #include <linux/sched.h>
25 #include <sys/wait.h>
26 #include <linux/wait.h>
27 #include <sched.h>
28 #include <signal.h>
29 #include <stdlib.h>
30 #include <syscall.h>
31 #include <sys/capability.h>
32 #include <sys/epoll.h>
33 #include <sys/eventfd.h>
34 #include <sys/mount.h>
35 #include <sys/personality.h>
36 #include <sys/prctl.h>
37 #include <sys/resource.h>
38 #include <sys/signalfd.h>
39 #include <sys/timerfd.h>
40 #include <sys/types.h>
41 #include <sys/wait.h>
42
43 // libnl3
44 #include <net/if.h>
45 #include <netlink/route/link.h>
46
47 // libseccomp
48 #include <seccomp.h>
49
50 // libuuid
51 #include <uuid.h>
52
53 #include <pakfire/arch.h>
54 #include <pakfire/cgroup.h>
55 #include <pakfire/jail.h>
56 #include <pakfire/logging.h>
57 #include <pakfire/mount.h>
58 #include <pakfire/pakfire.h>
59 #include <pakfire/private.h>
60 #include <pakfire/pwd.h>
61 #include <pakfire/string.h>
62 #include <pakfire/util.h>
63
64 #define BUFFER_SIZE 1024 * 64
65 #define ENVIRON_SIZE 128
66 #define EPOLL_MAX_EVENTS 2
67 #define MAX_MOUNTPOINTS 8
68
69 // The default environment that will be set for every command
70 static const struct environ {
71 const char* key;
72 const char* val;
73 } ENV[] = {
74 { "HOME", "/root" },
75 { "LANG", "C.utf-8" },
76 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
77 { "TERM", "vt100" },
78
79 // Tell everything that it is running inside a Pakfire container
80 { "container", "pakfire" },
81 { NULL, NULL },
82 };
83
84 struct pakfire_jail_mountpoint {
85 char source[PATH_MAX];
86 char target[PATH_MAX];
87 int flags;
88 };
89
90 struct pakfire_jail {
91 struct pakfire* pakfire;
92 int nrefs;
93
94 // A unique ID for each jail
95 uuid_t uuid;
96 char __uuid[UUID_STR_LEN];
97
98 // Resource Limits
99 int nice;
100
101 // Timeout
102 struct itimerspec timeout;
103
104 // CGroup
105 struct pakfire_cgroup* cgroup;
106
107 // Environment
108 char* env[ENVIRON_SIZE];
109
110 // Mountpoints
111 struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
112 unsigned int num_mountpoints;
113 };
114
115 struct pakfire_log_buffer {
116 char data[BUFFER_SIZE];
117 size_t used;
118 };
119
120 struct pakfire_jail_exec {
121 int flags;
122
123 // PID (of the child)
124 pid_t pid;
125 int pidfd;
126
127 // Process status (from waitid)
128 siginfo_t status;
129
130 // FD to notify the client that the parent has finished initialization
131 int completed_fd;
132
133 // Log pipes
134 struct pakfire_jail_pipes {
135 int stdin[2];
136 int stdout[2];
137 int stderr[2];
138
139 // Logging
140 int log_INFO[2];
141 int log_ERROR[2];
142 int log_DEBUG[2];
143 } pipes;
144
145 // Communicate
146 struct pakfire_jail_communicate {
147 pakfire_jail_communicate_in in;
148 pakfire_jail_communicate_out out;
149 void* data;
150 } communicate;
151
152 // Log buffers
153 struct pakfire_jail_buffers {
154 struct pakfire_log_buffer stdout;
155 struct pakfire_log_buffer stderr;
156
157 // Logging
158 struct pakfire_log_buffer log_INFO;
159 struct pakfire_log_buffer log_ERROR;
160 struct pakfire_log_buffer log_DEBUG;
161 } buffers;
162
163 struct pakfire_cgroup* cgroup;
164 struct pakfire_cgroup_stats cgroup_stats;
165 };
166
167 static int clone3(struct clone_args* args, size_t size) {
168 return syscall(__NR_clone3, args, size);
169 }
170
171 static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
172 return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
173 }
174
175 static int pakfire_jail_exec_has_flag(
176 const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
177 return ctx->flags & flag;
178 }
179
180 static void pakfire_jail_free(struct pakfire_jail* jail) {
181 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
182
183 // Free environment
184 for (unsigned int i = 0; jail->env[i]; i++)
185 free(jail->env[i]);
186
187 if (jail->cgroup)
188 pakfire_cgroup_unref(jail->cgroup);
189
190 pakfire_unref(jail->pakfire);
191 free(jail);
192 }
193
194 /*
195 Passes any log messages on to the default pakfire log callback
196 */
197 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
198 int priority, const char* line, size_t length) {
199 switch (priority) {
200 case LOG_INFO:
201 INFO(pakfire, "%s", line);
202 break;
203
204 case LOG_ERR:
205 ERROR(pakfire, "%s", line);
206 break;
207
208 #ifdef ENABLE_DEBUG
209 case LOG_DEBUG:
210 DEBUG(pakfire, "%s", line);
211 break;
212 #endif
213 }
214
215 return 0;
216 }
217
218 static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
219 if (!*jail->__uuid)
220 uuid_unparse_lower(jail->uuid, jail->__uuid);
221
222 return jail->__uuid;
223 }
224
225 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
226 // Set PS1
227 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
228 if (r)
229 return r;
230
231 // Copy TERM
232 char* TERM = secure_getenv("TERM");
233 if (TERM) {
234 r = pakfire_jail_set_env(jail, "TERM", TERM);
235 if (r)
236 return r;
237 }
238
239 // Copy LANG
240 char* LANG = secure_getenv("LANG");
241 if (LANG) {
242 r = pakfire_jail_set_env(jail, "LANG", LANG);
243 if (r)
244 return r;
245 }
246
247 return 0;
248 }
249
250 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail, struct pakfire* pakfire) {
251 int r;
252
253 const char* arch = pakfire_get_arch(pakfire);
254
255 // Allocate a new jail
256 struct pakfire_jail* j = calloc(1, sizeof(*j));
257 if (!j)
258 return 1;
259
260 // Reference Pakfire
261 j->pakfire = pakfire_ref(pakfire);
262
263 // Initialize reference counter
264 j->nrefs = 1;
265
266 // Generate a random UUID
267 uuid_generate_random(j->uuid);
268
269 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
270
271 // Set default environment
272 for (const struct environ* e = ENV; e->key; e++) {
273 r = pakfire_jail_set_env(j, e->key, e->val);
274 if (r)
275 goto ERROR;
276 }
277
278 // Enable all CPU features that CPU has to offer
279 if (!pakfire_arch_supported_by_host(arch)) {
280 r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
281 if (r)
282 goto ERROR;
283 }
284
285 // Set container UUID
286 r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
287 if (r)
288 goto ERROR;
289
290 // Disable systemctl to talk to systemd
291 if (!pakfire_on_root(j->pakfire)) {
292 r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
293 if (r)
294 goto ERROR;
295 }
296
297 // Done
298 *jail = j;
299 return 0;
300
301 ERROR:
302 pakfire_jail_free(j);
303
304 return r;
305 }
306
307 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
308 ++jail->nrefs;
309
310 return jail;
311 }
312
313 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
314 if (--jail->nrefs > 0)
315 return jail;
316
317 pakfire_jail_free(jail);
318 return NULL;
319 }
320
321 // Resource Limits
322
323 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
324 // Check if nice level is in range
325 if (nice < -19 || nice > 20) {
326 errno = EINVAL;
327 return 1;
328 }
329
330 // Store nice level
331 jail->nice = nice;
332
333 return 0;
334 }
335
336 int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
337 // Free any previous cgroup
338 if (jail->cgroup) {
339 pakfire_cgroup_unref(jail->cgroup);
340 jail->cgroup = NULL;
341 }
342
343 // Set any new cgroup
344 if (cgroup) {
345 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
346
347 jail->cgroup = pakfire_cgroup_ref(cgroup);
348 }
349
350 // Done
351 return 0;
352 }
353
354 // Environment
355
356 // Returns the length of the environment
357 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
358 unsigned int i = 0;
359
360 // Count everything in the environment
361 for (char** e = jail->env; *e; e++)
362 i++;
363
364 return i;
365 }
366
367 // Finds an existing environment variable and returns its index or -1 if not found
368 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
369 if (!key) {
370 errno = EINVAL;
371 return -1;
372 }
373
374 const size_t length = strlen(key);
375
376 for (unsigned int i = 0; jail->env[i]; i++) {
377 if ((pakfire_string_startswith(jail->env[i], key)
378 && *(jail->env[i] + length) == '=')) {
379 return i;
380 }
381 }
382
383 // Nothing found
384 return -1;
385 }
386
387 // Returns the value of an environment variable or NULL
388 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
389 const char* key) {
390 int i = pakfire_jail_find_env(jail, key);
391 if (i < 0)
392 return NULL;
393
394 return jail->env[i] + strlen(key) + 1;
395 }
396
397 // Sets an environment variable
398 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
399 const char* key, const char* value) {
400 // Find the index where to write this value to
401 int i = pakfire_jail_find_env(jail, key);
402 if (i < 0)
403 i = pakfire_jail_env_length(jail);
404
405 // Return -ENOSPC when the environment is full
406 if (i >= ENVIRON_SIZE) {
407 errno = ENOSPC;
408 return -1;
409 }
410
411 // Free any previous value
412 if (jail->env[i])
413 free(jail->env[i]);
414
415 // Format and set environment variable
416 asprintf(&jail->env[i], "%s=%s", key, value);
417
418 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
419
420 return 0;
421 }
422
423 // Imports an environment
424 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
425 if (!env)
426 return 0;
427
428 char* key;
429 char* val;
430 int r;
431
432 // Copy environment variables
433 for (unsigned int i = 0; env[i]; i++) {
434 r = pakfire_string_partition(env[i], "=", &key, &val);
435 if (r)
436 continue;
437
438 // Set value
439 r = pakfire_jail_set_env(jail, key, val);
440
441 if (key)
442 free(key);
443 if (val)
444 free(val);
445
446 // Break on error
447 if (r)
448 return r;
449 }
450
451 return 0;
452 }
453
454 // Timeout
455
456 PAKFIRE_EXPORT int pakfire_jail_set_timeout(
457 struct pakfire_jail* jail, unsigned int timeout) {
458 // Store value
459 jail->timeout.it_value.tv_sec = timeout;
460
461 if (timeout > 0)
462 DEBUG(jail->pakfire, "Timeout set to %d second(s)\n", timeout);
463 else
464 DEBUG(jail->pakfire, "Timeout disabled\n");
465
466 return 0;
467 }
468
469 static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
470 int r;
471
472 // Nothing to do if no timeout has been set
473 if (!jail->timeout.it_value.tv_sec)
474 return -1;
475
476 // Create a new timer
477 const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
478 if (fd < 0) {
479 ERROR(jail->pakfire, "Could not create timer: %m\n");
480 goto ERROR;
481 }
482
483 // Arm timer
484 r = timerfd_settime(fd, 0, &jail->timeout, NULL);
485 if (r) {
486 ERROR(jail->pakfire, "Could not arm timer: %m\n");
487 goto ERROR;
488 }
489
490 return fd;
491
492 ERROR:
493 if (fd > 0)
494 close(fd);
495
496 return -1;
497 }
498
499 // Signals
500
501 static int pakfire_jail_handle_signals(struct pakfire_jail* jail) {
502 sigset_t mask;
503 int r;
504
505 sigemptyset(&mask);
506 sigaddset(&mask, SIGINT);
507
508 // Block signals
509 r = sigprocmask(SIG_BLOCK, &mask, NULL);
510 if (r < 0) {
511 ERROR(jail->pakfire, "Failed to block signals: %m\n");
512 return r;
513 }
514
515 // Create a file descriptor
516 r = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
517 if (r < 0) {
518 ERROR(jail->pakfire, "Failed to create signalfd: %m\n");
519 return r;
520 }
521
522 return r;
523 }
524
525 /*
526 This function replaces any logging in the child process.
527
528 All log messages will be sent to the parent process through their respective pipes.
529 */
530 static void pakfire_jail_log(void* data, int priority, const char* file,
531 int line, const char* fn, const char* format, va_list args) {
532 struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
533 int fd;
534
535 switch (priority) {
536 case LOG_INFO:
537 fd = pipes->log_INFO[1];
538 break;
539
540 case LOG_ERR:
541 fd = pipes->log_ERROR[1];
542 break;
543
544 #ifdef ENABLE_DEBUG
545 case LOG_DEBUG:
546 fd = pipes->log_DEBUG[1];
547 break;
548 #endif /* ENABLE_DEBUG */
549
550 // Ignore any messages of an unknown priority
551 default:
552 return;
553 }
554
555 // Send the log message
556 if (fd)
557 vdprintf(fd, format, args);
558 }
559
560 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
561 return (sizeof(buffer->data) == buffer->used);
562 }
563
564 /*
565 This function reads as much data as it can from the file descriptor.
566 If it finds a whole line in it, it will send it to the logger and repeat the process.
567 If not newline character is found, it will try to read more data until it finds one.
568 */
569 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
570 struct pakfire_jail_exec* ctx, int priority, int fd,
571 struct pakfire_log_buffer* buffer, pakfire_jail_communicate_out callback, void* data) {
572 char line[BUFFER_SIZE + 1];
573
574 // Fill up buffer from fd
575 if (buffer->used < sizeof(buffer->data)) {
576 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
577 sizeof(buffer->data) - buffer->used);
578
579 // Handle errors
580 if (bytes_read < 0) {
581 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
582 return -1;
583 }
584
585 // Update buffer size
586 buffer->used += bytes_read;
587 }
588
589 // See if we have any lines that we can write
590 while (buffer->used) {
591 // Search for the end of the first line
592 char* eol = memchr(buffer->data, '\n', buffer->used);
593
594 // No newline found
595 if (!eol) {
596 // If the buffer is full, we send the content to the logger and try again
597 // This should not happen in practise
598 if (pakfire_jail_log_buffer_is_full(buffer)) {
599 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
600
601 eol = buffer->data + sizeof(buffer->data) - 1;
602
603 // Otherwise we might have only read parts of the output
604 } else
605 break;
606 }
607
608 // Find the length of the string
609 size_t length = eol - buffer->data + 1;
610
611 // Copy the line into the buffer
612 memcpy(line, buffer->data, length);
613
614 // Terminate the string
615 line[length] = '\0';
616
617 // Log the line
618 if (callback) {
619 int r = callback(jail->pakfire, data, priority, line, length);
620 if (r) {
621 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
622 return r;
623 }
624 }
625
626 // Remove line from buffer
627 memmove(buffer->data, buffer->data + length, buffer->used - length);
628 buffer->used -= length;
629 }
630
631 return 0;
632 }
633
634 static int pakfire_jail_stream_stdin(struct pakfire_jail* jail,
635 struct pakfire_jail_exec* ctx, const int fd) {
636 int r;
637
638 // Nothing to do if there is no stdin callback set
639 if (!ctx->communicate.in) {
640 DEBUG(jail->pakfire, "Callback for standard input is not set\n");
641 return 0;
642 }
643
644 // Skip if the writing pipe has already been closed
645 if (!ctx->pipes.stdin[1])
646 return 0;
647
648 DEBUG(jail->pakfire, "Streaming standard input...\n");
649
650 // Calling the callback
651 r = ctx->communicate.in(jail->pakfire, ctx->communicate.data, fd);
652
653 DEBUG(jail->pakfire, "Standard input callback finished: %d\n", r);
654
655 // The callback signaled that it has written everything
656 if (r == EOF) {
657 DEBUG(jail->pakfire, "Closing standard input pipe\n");
658
659 // Close the file-descriptor
660 close(fd);
661
662 // Reset the file-descriptor so it won't be closed again later
663 ctx->pipes.stdin[1] = 0;
664
665 // Report success
666 r = 0;
667 }
668
669 return r;
670 }
671
672 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
673 int r = pipe2(*fds, flags);
674 if (r < 0) {
675 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
676 return 1;
677 }
678
679 return 0;
680 }
681
682 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
683 for (unsigned int i = 0; i < 2; i++)
684 if (fds[i])
685 close(fds[i]);
686 }
687
688 /*
689 This is a convenience function to fetch the reading end of a pipe and
690 closes the write end.
691 */
692 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail* jail, int (*fds)[2]) {
693 // Give the variables easier names to avoid confusion
694 int* fd_read = &(*fds)[0];
695 int* fd_write = &(*fds)[1];
696
697 // Close the write end of the pipe
698 if (*fd_write) {
699 close(*fd_write);
700 *fd_write = -1;
701 }
702
703 // Return the read end
704 return *fd_read;
705 }
706
707 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
708 // Give the variables easier names to avoid confusion
709 int* fd_read = &(*fds)[0];
710 int* fd_write = &(*fds)[1];
711
712 // Close the read end of the pipe
713 if (*fd_read) {
714 close(*fd_read);
715 *fd_read = -1;
716 }
717
718 // Return the write end
719 return *fd_write;
720 }
721
722 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
723 int epollfd = -1;
724 struct epoll_event ev;
725 struct epoll_event events[EPOLL_MAX_EVENTS];
726 struct signalfd_siginfo siginfo;
727 char garbage[8];
728 int r = 0;
729
730 // Fetch file descriptors from context
731 const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
732 const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
733 const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
734 const int pidfd = ctx->pidfd;
735
736 // Timer
737 const int timerfd = pakfire_jail_create_timer(jail);
738
739 // Logging
740 const int log_INFO = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
741 const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
742 const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
743
744 // Signals
745 const int signalfd = pakfire_jail_handle_signals(jail);
746
747 // Make a list of all file descriptors we are interested in
748 const int fds[] = {
749 stdin, stdout, stderr, pidfd, timerfd, signalfd, log_INFO, log_ERROR, log_DEBUG,
750 };
751
752 // Setup epoll
753 epollfd = epoll_create1(0);
754 if (epollfd < 0) {
755 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
756 r = 1;
757 goto ERROR;
758 }
759
760 // Turn file descriptors into non-blocking mode and add them to epoll()
761 for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
762 int fd = fds[i];
763
764 // Skip fds which were not initialized
765 if (fd < 0)
766 continue;
767
768 ev.events = EPOLLHUP;
769
770 if (fd == stdin)
771 ev.events |= EPOLLOUT;
772 else
773 ev.events |= EPOLLIN;
774
775 // Read flags
776 int flags = fcntl(fd, F_GETFL, 0);
777
778 // Set modified flags
779 if (fcntl(fd, F_SETFL, flags|O_NONBLOCK) < 0) {
780 ERROR(jail->pakfire,
781 "Could not set file descriptor %d into non-blocking mode: %m\n", fd);
782 r = 1;
783 goto ERROR;
784 }
785
786 ev.data.fd = fd;
787
788 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
789 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
790 r = 1;
791 goto ERROR;
792 }
793 }
794
795 int ended = 0;
796
797 // Loop for as long as the process is alive
798 while (!ended) {
799 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
800 if (num < 1) {
801 // Ignore if epoll_wait() has been interrupted
802 if (errno == EINTR)
803 continue;
804
805 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
806 r = 1;
807
808 goto ERROR;
809 }
810
811 for (int i = 0; i < num; i++) {
812 int e = events[i].events;
813 int fd = events[i].data.fd;
814
815 struct pakfire_log_buffer* buffer = NULL;
816 pakfire_jail_communicate_out callback = NULL;
817 void* data = NULL;
818 int priority;
819
820 // Check if there is any data to be read
821 if (e & EPOLLIN) {
822 // Handle any changes to the PIDFD
823 if (fd == pidfd) {
824 // Call waidid() and store the result
825 r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
826 if (r) {
827 ERROR(jail->pakfire, "waitid() failed: %m\n");
828 goto ERROR;
829 }
830
831 // Mark that we have ended so that we will process the remaining
832 // events from epoll() now, but won't restart the outer loop.
833 ended = 1;
834 continue;
835
836 // Handle timer events
837 } else if (fd == timerfd) {
838 DEBUG(jail->pakfire, "Timer event received\n");
839
840 // Disarm the timer
841 r = read(timerfd, garbage, sizeof(garbage));
842 if (r < 1) {
843 ERROR(jail->pakfire, "Could not disarm timer: %m\n");
844 r = 1;
845 goto ERROR;
846 }
847
848 // Terminate the process if it hasn't already ended
849 if (!ended) {
850 DEBUG(jail->pakfire, "Terminating process...\n");
851
852 // Send SIGTERM to the process
853 r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
854 if (r) {
855 ERROR(jail->pakfire, "Could not kill process: %m\n");
856 goto ERROR;
857 }
858 }
859
860 // There is nothing else to do
861 continue;
862
863 // Handle signals
864 } else if (fd == signalfd) {
865 // Read the signal
866 r = read(signalfd, &siginfo, sizeof(siginfo));
867 if (r < 1) {
868 ERROR(jail->pakfire, "Could not read signal: %m\n");
869 goto ERROR;
870 }
871
872 DEBUG(jail->pakfire, "Received signal %d\n", siginfo.ssi_signo);
873
874 // Handle signals
875 switch (siginfo.ssi_signo) {
876 // Pass SIGINT down to the child process
877 case SIGINT:
878 r = pidfd_send_signal(pidfd, siginfo.ssi_signo, NULL, 0);
879 if (r) {
880 ERROR(jail->pakfire, "Could not send signal to process: %m\n");
881 goto ERROR;
882 }
883 break;
884
885 default:
886 ERROR(jail->pakfire, "Received unhandled signal %d\n",
887 siginfo.ssi_signo);
888 break;
889 }
890
891 // Don't fall through to log processing
892 continue;
893
894 // Handle logging messages
895 } else if (fd == log_INFO) {
896 buffer = &ctx->buffers.log_INFO;
897 priority = LOG_INFO;
898
899 callback = pakfire_jail_default_log_callback;
900
901 } else if (fd == log_ERROR) {
902 buffer = &ctx->buffers.log_ERROR;
903 priority = LOG_ERR;
904
905 callback = pakfire_jail_default_log_callback;
906
907 } else if (fd == log_DEBUG) {
908 buffer = &ctx->buffers.log_DEBUG;
909 priority = LOG_DEBUG;
910
911 callback = pakfire_jail_default_log_callback;
912
913 // Handle anything from the log pipes
914 } else if (fd == stdout) {
915 buffer = &ctx->buffers.stdout;
916 priority = LOG_INFO;
917
918 callback = ctx->communicate.out;
919 data = ctx->communicate.data;
920
921 } else if (fd == stderr) {
922 buffer = &ctx->buffers.stderr;
923 priority = LOG_ERR;
924
925 callback = ctx->communicate.out;
926 data = ctx->communicate.data;
927
928 } else {
929 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
930 continue;
931 }
932
933 // Handle log event
934 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
935 if (r)
936 goto ERROR;
937 }
938
939 if (e & EPOLLOUT) {
940 // Handle standard input
941 if (fd == stdin) {
942 r = pakfire_jail_stream_stdin(jail, ctx, fd);
943 if (r) {
944 switch (errno) {
945 // Ignore if we filled up the buffer
946 case EAGAIN:
947 break;
948
949 default:
950 ERROR(jail->pakfire, "Could not write to stdin: %m\n");
951 goto ERROR;
952 }
953 }
954 }
955 }
956
957 // Check if any file descriptors have been closed
958 if (e & EPOLLHUP) {
959 // Remove the file descriptor
960 r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
961 if (r) {
962 ERROR(jail->pakfire, "Could not remove closed file-descriptor %d: %m\n", fd);
963 goto ERROR;
964 }
965 }
966 }
967 }
968
969 ERROR:
970 if (epollfd > 0)
971 close(epollfd);
972 if (timerfd > 0)
973 close(timerfd);
974 if (signalfd > 0)
975 close(signalfd);
976
977 return r;
978 }
979
980 int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data,
981 int priority, const char* line, size_t length) {
982 char** output = (char**)data;
983 int r;
984
985 // Append everything from stdout to a buffer
986 if (output && priority == LOG_INFO) {
987 r = asprintf(output, "%s%s", (output && *output) ? *output : "", line);
988 if (r < 0)
989 return 1;
990 return 0;
991 }
992
993 // Send everything else to the default logger
994 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
995 }
996
997 // Capabilities
998
999 // Logs all capabilities of the current process
1000 static int pakfire_jail_show_capabilities(struct pakfire_jail* jail) {
1001 cap_t caps = NULL;
1002 char* name = NULL;
1003 cap_flag_value_t value_e;
1004 cap_flag_value_t value_i;
1005 cap_flag_value_t value_p;
1006 int r;
1007
1008 // Fetch PID
1009 pid_t pid = getpid();
1010
1011 // Fetch all capabilities
1012 caps = cap_get_proc();
1013 if (!caps) {
1014 ERROR(jail->pakfire, "Could not fetch capabilities: %m\n");
1015 r = 1;
1016 goto ERROR;
1017 }
1018
1019 DEBUG(jail->pakfire, "Capabilities of PID %d:\n", pid);
1020
1021 // Iterate over all capabilities
1022 for (unsigned int cap = 0; cap_valid(cap); cap++) {
1023 name = cap_to_name(cap);
1024
1025 // Fetch effective value
1026 r = cap_get_flag(caps, cap, CAP_EFFECTIVE, &value_e);
1027 if (r)
1028 goto ERROR;
1029
1030 // Fetch inheritable value
1031 r = cap_get_flag(caps, cap, CAP_INHERITABLE, &value_i);
1032 if (r)
1033 goto ERROR;
1034
1035 // Fetch permitted value
1036 r = cap_get_flag(caps, cap, CAP_PERMITTED, &value_p);
1037 if (r)
1038 goto ERROR;
1039
1040 DEBUG(jail->pakfire,
1041 " %-24s : %c%c%c\n",
1042 name,
1043 (value_e == CAP_SET) ? 'e' : '-',
1044 (value_i == CAP_SET) ? 'i' : '-',
1045 (value_p == CAP_SET) ? 'p' : '-'
1046 );
1047
1048 // Free name
1049 cap_free(name);
1050 name = NULL;
1051 }
1052
1053 // Success
1054 r = 0;
1055
1056 ERROR:
1057 if (name)
1058 cap_free(name);
1059 if (caps)
1060 cap_free(caps);
1061
1062 return r;
1063 }
1064
1065 static int pakfire_jail_set_capabilities(struct pakfire_jail* jail) {
1066 cap_t caps = NULL;
1067 char* name = NULL;
1068 int r;
1069
1070 // Fetch capabilities
1071 caps = cap_get_proc();
1072 if (!caps) {
1073 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
1074 r = 1;
1075 goto ERROR;
1076 }
1077
1078 // Walk through all capabilities
1079 for (cap_value_t cap = 0; cap_valid(cap); cap++) {
1080 cap_value_t _caps[] = { cap };
1081
1082 // Fetch the name of the capability
1083 name = cap_to_name(cap);
1084
1085 r = cap_set_flag(caps, CAP_EFFECTIVE, 1, _caps, CAP_SET);
1086 if (r) {
1087 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1088 goto ERROR;
1089 }
1090
1091 r = cap_set_flag(caps, CAP_INHERITABLE, 1, _caps, CAP_SET);
1092 if (r) {
1093 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1094 goto ERROR;
1095 }
1096
1097 r = cap_set_flag(caps, CAP_PERMITTED, 1, _caps, CAP_SET);
1098 if (r) {
1099 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1100 goto ERROR;
1101 }
1102
1103 // Free name
1104 cap_free(name);
1105 name = NULL;
1106 }
1107
1108 // Restore all capabilities
1109 r = cap_set_proc(caps);
1110 if (r) {
1111 ERROR(jail->pakfire, "Restoring capabilities failed: %m\n");
1112 goto ERROR;
1113 }
1114
1115 // Add all capabilities to the ambient set
1116 for (unsigned int cap = 0; cap_valid(cap); cap++) {
1117 name = cap_to_name(cap);
1118
1119 // Raise the capability
1120 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0);
1121 if (r) {
1122 ERROR(jail->pakfire, "Could not set ambient capability %s: %m\n", name);
1123 goto ERROR;
1124 }
1125
1126 // Free name
1127 cap_free(name);
1128 name = NULL;
1129 }
1130
1131 // Success
1132 r = 0;
1133
1134 ERROR:
1135 if (name)
1136 cap_free(name);
1137 if (caps)
1138 cap_free(caps);
1139
1140 return r;
1141 }
1142
1143 // Syscall Filter
1144
1145 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
1146 const int syscalls[] = {
1147 // The kernel's keyring isn't namespaced
1148 SCMP_SYS(keyctl),
1149 SCMP_SYS(add_key),
1150 SCMP_SYS(request_key),
1151
1152 // Disable userfaultfd
1153 SCMP_SYS(userfaultfd),
1154
1155 // Disable perf which could leak a lot of information about the host
1156 SCMP_SYS(perf_event_open),
1157
1158 0,
1159 };
1160 int r = 1;
1161
1162 DEBUG(jail->pakfire, "Applying syscall filter...\n");
1163
1164 // Setup a syscall filter which allows everything by default
1165 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
1166 if (!ctx) {
1167 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
1168 goto ERROR;
1169 }
1170
1171 // All all syscalls
1172 for (const int* syscall = syscalls; *syscall; syscall++) {
1173 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
1174 if (r) {
1175 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
1176 goto ERROR;
1177 }
1178 }
1179
1180 // Load syscall filter into the kernel
1181 r = seccomp_load(ctx);
1182 if (r) {
1183 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
1184 goto ERROR;
1185 }
1186
1187 ERROR:
1188 if (ctx)
1189 seccomp_release(ctx);
1190
1191 return r;
1192 }
1193
1194 // Mountpoints
1195
1196 PAKFIRE_EXPORT int pakfire_jail_bind(struct pakfire_jail* jail,
1197 const char* source, const char* target, int flags) {
1198 struct pakfire_jail_mountpoint* mp = NULL;
1199 int r;
1200
1201 // Check if there is any space left
1202 if (jail->num_mountpoints >= MAX_MOUNTPOINTS) {
1203 errno = ENOSPC;
1204 return 1;
1205 }
1206
1207 // Check for valid inputs
1208 if (!source || !target) {
1209 errno = EINVAL;
1210 return 1;
1211 }
1212
1213 // Select the next free slot
1214 mp = &jail->mountpoints[jail->num_mountpoints];
1215
1216 // Copy source
1217 r = pakfire_string_set(mp->source, source);
1218 if (r) {
1219 ERROR(jail->pakfire, "Could not copy source: %m\n");
1220 return r;
1221 }
1222
1223 // Copy target
1224 r = pakfire_string_set(mp->target, target);
1225 if (r) {
1226 ERROR(jail->pakfire, "Could not copy target: %m\n");
1227 return r;
1228 }
1229
1230 // Copy flags
1231 mp->flags = flags;
1232
1233 // Increment counter
1234 jail->num_mountpoints++;
1235
1236 return 0;
1237 }
1238
1239 static int pakfire_jail_mount_networking(struct pakfire_jail* jail) {
1240 int r;
1241
1242 const char* paths[] = {
1243 "/etc/hosts",
1244 "/etc/resolv.conf",
1245 NULL,
1246 };
1247
1248 // Bind-mount all paths read-only
1249 for (const char** path = paths; *path; path++) {
1250 r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
1251 if (r)
1252 return r;
1253 }
1254
1255 return 0;
1256 }
1257
1258 /*
1259 Mounts everything that we require in the new namespace
1260 */
1261 static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1262 struct pakfire_jail_mountpoint* mp = NULL;
1263 int flags = 0;
1264 int r;
1265
1266 // Enable loop devices
1267 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_LOOP_DEVICES))
1268 flags |= PAKFIRE_MOUNT_LOOP_DEVICES;
1269
1270 // Mount all default stuff
1271 r = pakfire_mount_all(jail->pakfire, flags);
1272 if (r)
1273 return r;
1274
1275 // Mount networking stuff
1276 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1277 r = pakfire_jail_mount_networking(jail);
1278 if (r)
1279 return r;
1280 }
1281
1282 // Mount all custom stuff
1283 for (unsigned int i = 0; i < jail->num_mountpoints; i++) {
1284 // Fetch mountpoint
1285 mp = &jail->mountpoints[i];
1286
1287 // Mount it
1288 r = pakfire_bind(jail->pakfire, mp->source, mp->target, mp->flags);
1289 if (r)
1290 return r;
1291 }
1292
1293 // Log all mountpoints
1294 pakfire_mount_list(jail->pakfire);
1295
1296 return 0;
1297 }
1298
1299 // Networking
1300
1301 static int pakfire_jail_setup_loopback(struct pakfire_jail* jail) {
1302 struct nl_sock* nl = NULL;
1303 struct nl_cache* cache = NULL;
1304 struct rtnl_link* link = NULL;
1305 struct rtnl_link* change = NULL;
1306 int r;
1307
1308 DEBUG(jail->pakfire, "Setting up loopback...\n");
1309
1310 // Allocate a netlink socket
1311 nl = nl_socket_alloc();
1312 if (!nl) {
1313 ERROR(jail->pakfire, "Could not allocate a netlink socket: %m\n");
1314 r = 1;
1315 goto ERROR;
1316 }
1317
1318 // Connect the socket
1319 r = nl_connect(nl, NETLINK_ROUTE);
1320 if (r) {
1321 ERROR(jail->pakfire, "Could not connect netlink socket: %s\n", nl_geterror(r));
1322 goto ERROR;
1323 }
1324
1325 // Allocate the netlink cache
1326 r = rtnl_link_alloc_cache(nl, AF_UNSPEC, &cache);
1327 if (r < 0) {
1328 ERROR(jail->pakfire, "Unable to allocate netlink cache: %s\n", nl_geterror(r));
1329 goto ERROR;
1330 }
1331
1332 // Fetch loopback interface
1333 link = rtnl_link_get_by_name(cache, "lo");
1334 if (!link) {
1335 ERROR(jail->pakfire, "Could not find lo interface. Ignoring.\n");
1336 r = 0;
1337 goto ERROR;
1338 }
1339
1340 // Allocate a new link
1341 change = rtnl_link_alloc();
1342 if (!change) {
1343 ERROR(jail->pakfire, "Could not allocate change link\n");
1344 r = 1;
1345 goto ERROR;
1346 }
1347
1348 // Set the link to UP
1349 rtnl_link_set_flags(change, IFF_UP);
1350
1351 // Apply any changes
1352 r = rtnl_link_change(nl, link, change, 0);
1353 if (r) {
1354 ERROR(jail->pakfire, "Unable to activate loopback: %s\n", nl_geterror(r));
1355 goto ERROR;
1356 }
1357
1358 // Success
1359 r = 0;
1360
1361 ERROR:
1362 if (nl)
1363 nl_socket_free(nl);
1364
1365 return r;
1366 }
1367
1368 // UID/GID Mapping
1369
1370 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
1371 char path[PATH_MAX];
1372 int r;
1373
1374 // Skip mapping anything when running on /
1375 if (pakfire_on_root(jail->pakfire))
1376 return 0;
1377
1378 // Make path
1379 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
1380 if (r)
1381 return r;
1382
1383 // Fetch UID
1384 const uid_t uid = pakfire_uid(jail->pakfire);
1385
1386 // Fetch SUBUID
1387 const struct pakfire_subid* subuid = pakfire_subuid(jail->pakfire);
1388 if (!subuid)
1389 return 1;
1390
1391 /* When running as root, we will map the entire range.
1392
1393 When running as a non-privileged user, we will map the root user inside the jail
1394 to the user's UID outside of the jail, and we will map the rest starting from one.
1395 */
1396
1397 // Running as root
1398 if (uid == 0) {
1399 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1400 "0 %lu %lu\n", subuid->id, subuid->length);
1401 } else {
1402 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1403 "0 %lu 1\n1 %lu %lu\n", uid, subuid->id, subuid->length);
1404 }
1405
1406 if (r) {
1407 ERROR(jail->pakfire, "Could not map UIDs: %m\n");
1408 return r;
1409 }
1410
1411 return r;
1412 }
1413
1414 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
1415 char path[PATH_MAX];
1416 int r;
1417
1418 // Skip mapping anything when running on /
1419 if (pakfire_on_root(jail->pakfire))
1420 return 0;
1421
1422 // Fetch GID
1423 const gid_t gid = pakfire_gid(jail->pakfire);
1424
1425 // Fetch SUBGID
1426 const struct pakfire_subid* subgid = pakfire_subgid(jail->pakfire);
1427 if (!subgid)
1428 return 1;
1429
1430 // Make path
1431 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
1432 if (r)
1433 return r;
1434
1435 // Running as root
1436 if (gid == 0) {
1437 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1438 "0 %lu %lu\n", subgid->id, subgid->length);
1439 } else {
1440 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1441 "0 %lu 1\n%1 %lu %lu\n", gid, subgid->id, subgid->length);
1442 }
1443
1444 if (r) {
1445 ERROR(jail->pakfire, "Could not map GIDs: %m\n");
1446 return r;
1447 }
1448
1449 return r;
1450 }
1451
1452 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
1453 char path[PATH_MAX];
1454 int r = 1;
1455
1456 // Make path
1457 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
1458 if (r)
1459 return r;
1460
1461 // Open file for writing
1462 FILE* f = fopen(path, "w");
1463 if (!f) {
1464 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
1465 goto ERROR;
1466 }
1467
1468 // Write content
1469 int bytes_written = fprintf(f, "deny\n");
1470 if (bytes_written <= 0) {
1471 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
1472 goto ERROR;
1473 }
1474
1475 r = fclose(f);
1476 f = NULL;
1477 if (r) {
1478 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
1479 goto ERROR;
1480 }
1481
1482 ERROR:
1483 if (f)
1484 fclose(f);
1485
1486 return r;
1487 }
1488
1489 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1490 const uint64_t val = 1;
1491 int r = 0;
1492
1493 DEBUG(jail->pakfire, "Sending signal...\n");
1494
1495 // Write to the file descriptor
1496 ssize_t bytes_written = write(fd, &val, sizeof(val));
1497 if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
1498 ERROR(jail->pakfire, "Could not send signal: %m\n");
1499 r = 1;
1500 }
1501
1502 // Close the file descriptor
1503 close(fd);
1504
1505 return r;
1506 }
1507
1508 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1509 uint64_t val = 0;
1510 int r = 0;
1511
1512 DEBUG(jail->pakfire, "Waiting for signal...\n");
1513
1514 ssize_t bytes_read = read(fd, &val, sizeof(val));
1515 if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
1516 ERROR(jail->pakfire, "Error waiting for signal: %m\n");
1517 r = 1;
1518 }
1519
1520 // Close the file descriptor
1521 close(fd);
1522
1523 return r;
1524 }
1525
1526 /*
1527 Performs the initialisation that needs to happen in the parent part
1528 */
1529 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1530 int r;
1531
1532 // Setup UID mapping
1533 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
1534 if (r)
1535 return r;
1536
1537 // Write "deny" to /proc/PID/setgroups
1538 r = pakfire_jail_setgroups(jail, ctx->pid);
1539 if (r)
1540 return r;
1541
1542 // Setup GID mapping
1543 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
1544 if (r)
1545 return r;
1546
1547 // Parent has finished initialisation
1548 DEBUG(jail->pakfire, "Parent has finished initialization\n");
1549
1550 // Send signal to client
1551 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1552 if (r)
1553 return r;
1554
1555 return 0;
1556 }
1557
1558 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
1559 const char* argv[]) {
1560 int r;
1561
1562 // Redirect any logging to our log pipe
1563 pakfire_set_log_callback(jail->pakfire, pakfire_jail_log, &ctx->pipes);
1564
1565 // Die with parent
1566 r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
1567 if (r) {
1568 ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
1569 return 126;
1570 }
1571
1572 // Fetch my own PID
1573 pid_t pid = getpid();
1574
1575 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
1576
1577 // Wait for the parent to finish initialization
1578 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1579 if (r)
1580 return r;
1581
1582 // Perform further initialization
1583
1584 // Fetch UID/GID
1585 uid_t uid = getuid();
1586 gid_t gid = getgid();
1587
1588 // Fetch EUID/EGID
1589 uid_t euid = geteuid();
1590 gid_t egid = getegid();
1591
1592 DEBUG(jail->pakfire, " UID: %d (effective %d)\n", uid, euid);
1593 DEBUG(jail->pakfire, " GID: %d (effective %d)\n", gid, egid);
1594
1595 // Check if we are (effectively running as root)
1596 if (uid || gid || euid || egid) {
1597 ERROR(jail->pakfire, "Child process is not running as root\n");
1598 return 126;
1599 }
1600
1601 const char* root = pakfire_get_path(jail->pakfire);
1602 const char* arch = pakfire_get_arch(jail->pakfire);
1603
1604 // Change root (unless root is /)
1605 if (!pakfire_on_root(jail->pakfire)) {
1606 // Mount everything
1607 r = pakfire_jail_mount(jail, ctx);
1608 if (r)
1609 return r;
1610
1611 // Call chroot()
1612 r = chroot(root);
1613 if (r) {
1614 ERROR(jail->pakfire, "chroot() to %s failed: %m\n", root);
1615 return 1;
1616 }
1617
1618 // Change directory to /
1619 r = chdir("/");
1620 if (r) {
1621 ERROR(jail->pakfire, "chdir() after chroot() failed: %m\n");
1622 return 1;
1623 }
1624 }
1625
1626 // Set personality
1627 unsigned long persona = pakfire_arch_personality(arch);
1628 if (persona) {
1629 r = personality(persona);
1630 if (r < 0) {
1631 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1632 return 1;
1633 }
1634 }
1635
1636 // Setup networking
1637 if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1638 r = pakfire_jail_setup_loopback(jail);
1639 if (r)
1640 return 1;
1641 }
1642
1643 // Set nice level
1644 if (jail->nice) {
1645 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1646
1647 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1648 if (r) {
1649 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1650 return 1;
1651 }
1652 }
1653
1654 // Close other end of log pipes
1655 close(ctx->pipes.log_INFO[0]);
1656 close(ctx->pipes.log_ERROR[0]);
1657 #ifdef ENABLE_DEBUG
1658 close(ctx->pipes.log_DEBUG[0]);
1659 #endif /* ENABLE_DEBUG */
1660
1661 // Connect standard input
1662 if (ctx->pipes.stdin[0]) {
1663 r = dup2(ctx->pipes.stdin[0], STDIN_FILENO);
1664 if (r < 0) {
1665 ERROR(jail->pakfire, "Could not connect fd %d to stdin: %m\n",
1666 ctx->pipes.stdin[0]);
1667
1668 return 1;
1669 }
1670 }
1671
1672 // Connect standard output and error
1673 if (ctx->pipes.stdout[1] && ctx->pipes.stderr[1]) {
1674 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1675 if (r < 0) {
1676 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1677 ctx->pipes.stdout[1]);
1678
1679 return 1;
1680 }
1681
1682 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1683 if (r < 0) {
1684 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1685 ctx->pipes.stderr[1]);
1686
1687 return 1;
1688 }
1689
1690 // Close the pipe (as we have moved the original file descriptors)
1691 pakfire_jail_close_pipe(jail, ctx->pipes.stdin);
1692 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1693 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
1694 }
1695
1696 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1697 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1698 if (r)
1699 return r;
1700
1701 // Don't drop any capabilities on execve()
1702 r = prctl(PR_SET_KEEPCAPS, 1);
1703 if (r) {
1704 ERROR(jail->pakfire, "Could not set PR_SET_KEEPCAPS: %m\n");
1705 return r;
1706 }
1707
1708 // Set capabilities
1709 r = pakfire_jail_set_capabilities(jail);
1710 if (r)
1711 return r;
1712
1713 // Show capabilities
1714 r = pakfire_jail_show_capabilities(jail);
1715 if (r)
1716 return r;
1717
1718 // Filter syscalls
1719 r = pakfire_jail_limit_syscalls(jail);
1720 if (r)
1721 return r;
1722
1723 DEBUG(jail->pakfire, "Child process initialization done\n");
1724 DEBUG(jail->pakfire, "Launching command:\n");
1725
1726 // Log argv
1727 for (unsigned int i = 0; argv[i]; i++)
1728 DEBUG(jail->pakfire, " argv[%d] = %s\n", i, argv[i]);
1729
1730 // exec() command
1731 r = execvpe(argv[0], (char**)argv, jail->env);
1732 if (r < 0) {
1733 // Translate errno into regular exit code
1734 switch (errno) {
1735 case ENOENT:
1736 // Ignore if the command doesn't exist
1737 if (ctx->flags & PAKFIRE_JAIL_NOENT_OK)
1738 r = 0;
1739 else
1740 r = 127;
1741
1742 break;
1743
1744 default:
1745 r = 1;
1746 }
1747
1748 ERROR(jail->pakfire, "Could not execve(%s): %m\n", argv[0]);
1749 }
1750
1751 // We should not get here
1752 return r;
1753 }
1754
1755 // Run a command in the jail
1756 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
1757 const int interactive,
1758 pakfire_jail_communicate_in communicate_in,
1759 pakfire_jail_communicate_out communicate_out,
1760 void* data, int flags) {
1761 int exit = -1;
1762 int r;
1763
1764 // Check if argv is valid
1765 if (!argv || !argv[0]) {
1766 errno = EINVAL;
1767 return -1;
1768 }
1769
1770 // Send any output to the default logger if no callback is set
1771 if (!communicate_out)
1772 communicate_out = pakfire_jail_default_log_callback;
1773
1774 // Initialize context for this call
1775 struct pakfire_jail_exec ctx = {
1776 .flags = flags,
1777
1778 .pipes = {
1779 .stdin = { -1, -1 },
1780 .stdout = { -1, -1 },
1781 .stderr = { -1, -1 },
1782 },
1783
1784 .communicate = {
1785 .in = communicate_in,
1786 .out = communicate_out,
1787 .data = data,
1788 },
1789
1790 .pidfd = -1,
1791 };
1792
1793 DEBUG(jail->pakfire, "Executing jail...\n");
1794
1795 // Enable networking in interactive mode
1796 if (interactive)
1797 ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
1798
1799 /*
1800 Setup a file descriptor which can be used to notify the client that the parent
1801 has completed configuration.
1802 */
1803 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1804 if (ctx.completed_fd < 0) {
1805 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1806 return -1;
1807 }
1808
1809 // Create pipes to communicate with child process if we are not running interactively
1810 if (!interactive) {
1811 // stdin (only if callback is set)
1812 if (ctx.communicate.in) {
1813 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdin, 0);
1814 if (r)
1815 goto ERROR;
1816 }
1817
1818 // stdout
1819 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
1820 if (r)
1821 goto ERROR;
1822
1823 // stderr
1824 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
1825 if (r)
1826 goto ERROR;
1827 }
1828
1829 // Setup pipes for logging
1830 // INFO
1831 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
1832 if (r)
1833 goto ERROR;
1834
1835 // ERROR
1836 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
1837 if (r)
1838 goto ERROR;
1839
1840 #ifdef ENABLE_DEBUG
1841 // DEBUG
1842 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
1843 if (r)
1844 goto ERROR;
1845 #endif /* ENABLE_DEBUG */
1846
1847 // Configure child process
1848 struct clone_args args = {
1849 .flags =
1850 CLONE_NEWCGROUP |
1851 CLONE_NEWIPC |
1852 CLONE_NEWNS |
1853 CLONE_NEWPID |
1854 CLONE_NEWUSER |
1855 CLONE_NEWUTS |
1856 CLONE_PIDFD,
1857 .exit_signal = SIGCHLD,
1858 .pidfd = (long long unsigned int)&ctx.pidfd,
1859 };
1860
1861 // Launch the process in a cgroup that is a leaf of the configured cgroup
1862 if (jail->cgroup) {
1863 args.flags |= CLONE_INTO_CGROUP;
1864
1865 // Fetch our UUID
1866 const char* uuid = pakfire_jail_uuid(jail);
1867
1868 // Create a temporary cgroup
1869 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, uuid, 0);
1870 if (r) {
1871 ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
1872 goto ERROR;
1873 }
1874
1875 // Clone into this cgroup
1876 args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
1877 }
1878
1879 // Setup networking
1880 if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1881 args.flags |= CLONE_NEWNET;
1882 }
1883
1884 // Fork this process
1885 ctx.pid = clone3(&args, sizeof(args));
1886 if (ctx.pid < 0) {
1887 ERROR(jail->pakfire, "Could not clone: %m\n");
1888 return -1;
1889
1890 // Child process
1891 } else if (ctx.pid == 0) {
1892 r = pakfire_jail_child(jail, &ctx, argv);
1893 _exit(r);
1894 }
1895
1896 // Parent process
1897 r = pakfire_jail_parent(jail, &ctx);
1898 if (r)
1899 goto ERROR;
1900
1901 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
1902
1903 // Read output of the child process
1904 r = pakfire_jail_wait(jail, &ctx);
1905 if (r)
1906 goto ERROR;
1907
1908 // Handle exit status
1909 switch (ctx.status.si_code) {
1910 case CLD_EXITED:
1911 DEBUG(jail->pakfire, "The child process exited with code %d\n",
1912 ctx.status.si_status);
1913
1914 // Pass exit code
1915 exit = ctx.status.si_status;
1916 break;
1917
1918 case CLD_KILLED:
1919 ERROR(jail->pakfire, "The child process was killed\n");
1920 exit = 139;
1921 break;
1922
1923 case CLD_DUMPED:
1924 ERROR(jail->pakfire, "The child process terminated abnormally\n");
1925 break;
1926
1927 // Log anything else
1928 default:
1929 ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
1930 break;
1931 }
1932
1933 ERROR:
1934 // Destroy the temporary cgroup (if any)
1935 if (ctx.cgroup) {
1936 // Read cgroup stats
1937 r = pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
1938 if (r) {
1939 ERROR(jail->pakfire, "Could not read cgroup stats: %m\n");
1940 } else {
1941 pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
1942 }
1943
1944 pakfire_cgroup_destroy(ctx.cgroup);
1945 pakfire_cgroup_unref(ctx.cgroup);
1946 }
1947
1948 // Close any file descriptors
1949 pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
1950 pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
1951 pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
1952 if (ctx.pidfd)
1953 close(ctx.pidfd);
1954 pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
1955 pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
1956 pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
1957
1958 return exit;
1959 }
1960
1961 PAKFIRE_EXPORT int pakfire_jail_exec(
1962 struct pakfire_jail* jail,
1963 const char* argv[],
1964 pakfire_jail_communicate_in callback_in,
1965 pakfire_jail_communicate_out callback_out,
1966 void* data, int flags) {
1967 return __pakfire_jail_exec(jail, argv, 0, callback_in, callback_out, data, flags);
1968 }
1969
1970 static int pakfire_jail_exec_interactive(
1971 struct pakfire_jail* jail, const char* argv[], int flags) {
1972 int r;
1973
1974 // Setup interactive stuff
1975 r = pakfire_jail_setup_interactive_env(jail);
1976 if (r)
1977 return r;
1978
1979 return __pakfire_jail_exec(jail, argv, 1, NULL, NULL, NULL, flags);
1980 }
1981
1982 int pakfire_jail_exec_script(struct pakfire_jail* jail,
1983 const char* script,
1984 const size_t size,
1985 const char* args[],
1986 pakfire_jail_communicate_in callback_in,
1987 pakfire_jail_communicate_out callback_out,
1988 void* data) {
1989 char path[PATH_MAX];
1990 const char** argv = NULL;
1991 FILE* f = NULL;
1992 int r;
1993
1994 const char* root = pakfire_get_path(jail->pakfire);
1995
1996 // Write the scriptlet to disk
1997 r = pakfire_path_join(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
1998 if (r)
1999 goto ERROR;
2000
2001 // Create a temporary file
2002 f = pakfire_mktemp(path, 0700);
2003 if (!f) {
2004 ERROR(jail->pakfire, "Could not create temporary file: %m\n");
2005 goto ERROR;
2006 }
2007
2008 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
2009
2010 // Write data
2011 r = fprintf(f, "%s", script);
2012 if (r < 0) {
2013 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
2014 goto ERROR;
2015 }
2016
2017 // Close file
2018 r = fclose(f);
2019 if (r) {
2020 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
2021 goto ERROR;
2022 }
2023
2024 f = NULL;
2025
2026 // Count how many arguments were passed
2027 unsigned int argc = 1;
2028 if (args) {
2029 for (const char** arg = args; *arg; arg++)
2030 argc++;
2031 }
2032
2033 argv = calloc(argc + 1, sizeof(*argv));
2034 if (!argv) {
2035 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
2036 goto ERROR;
2037 }
2038
2039 // Set command
2040 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
2041
2042 // Copy args
2043 for (unsigned int i = 1; i < argc; i++)
2044 argv[i] = args[i-1];
2045
2046 // Run the script
2047 r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data, 0);
2048
2049 ERROR:
2050 if (argv)
2051 free(argv);
2052 if (f)
2053 fclose(f);
2054
2055 // Remove script from disk
2056 if (*path)
2057 unlink(path);
2058
2059 return r;
2060 }
2061
2062 /*
2063 A convenience function that creates a new jail, runs the given command and destroys
2064 the jail again.
2065 */
2066 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
2067 struct pakfire_jail* jail = NULL;
2068 int r;
2069
2070 // Create a new jail
2071 r = pakfire_jail_create(&jail, pakfire);
2072 if (r)
2073 goto ERROR;
2074
2075 // Execute the command
2076 r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output, 0);
2077
2078 ERROR:
2079 if (jail)
2080 pakfire_jail_unref(jail);
2081
2082 return r;
2083 }
2084
2085 int pakfire_jail_run_script(struct pakfire* pakfire,
2086 const char* script, const size_t length, const char* argv[], int flags) {
2087 struct pakfire_jail* jail = NULL;
2088 int r;
2089
2090 // Create a new jail
2091 r = pakfire_jail_create(&jail, pakfire);
2092 if (r)
2093 goto ERROR;
2094
2095 // Execute the command
2096 r = pakfire_jail_exec_script(jail, script, length, argv, NULL, NULL, NULL);
2097
2098 ERROR:
2099 if (jail)
2100 pakfire_jail_unref(jail);
2101
2102 return r;
2103 }
2104
2105 int pakfire_jail_shell(struct pakfire_jail* jail) {
2106 const char* argv[] = {
2107 "/bin/bash", "--login", NULL,
2108 };
2109
2110 // Execute /bin/bash
2111 return pakfire_jail_exec_interactive(jail, argv, 0);
2112 }
2113
2114 static int pakfire_jail_run_if_possible(struct pakfire* pakfire, const char** argv) {
2115 char path[PATH_MAX];
2116 int r;
2117
2118 r = pakfire_path(pakfire, path, "%s", *argv);
2119 if (r)
2120 return r;
2121
2122 // Check if the file is executable
2123 r = access(path, X_OK);
2124 if (r) {
2125 DEBUG(pakfire, "%s is not executable. Skipping...\n", *argv);
2126 return 0;
2127 }
2128
2129 return pakfire_jail_run(pakfire, argv, 0, NULL);
2130 }
2131
2132 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
2133 const char* argv[] = {
2134 "/sbin/ldconfig",
2135 NULL,
2136 };
2137
2138 return pakfire_jail_run_if_possible(pakfire, argv);
2139 }
2140
2141 int pakfire_jail_run_systemd_tmpfiles(struct pakfire* pakfire) {
2142 const char* argv[] = {
2143 "/usr/bin/systemd-tmpfiles",
2144 "--create",
2145 NULL,
2146 };
2147
2148 return pakfire_jail_run_if_possible(pakfire, argv);
2149 }