]> git.ipfire.org Git - people/ms/pakfire.git/blob - src/libpakfire/jail.c
jail: Ignore any return codes from the shell
[people/ms/pakfire.git] / src / libpakfire / jail.c
1 /*#############################################################################
2 # #
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
5 # #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
10 # #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
15 # #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
18 # #
19 #############################################################################*/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <linux/capability.h>
24 #include <linux/sched.h>
25 #include <sys/wait.h>
26 #include <linux/wait.h>
27 #include <sched.h>
28 #include <signal.h>
29 #include <stdlib.h>
30 #include <syscall.h>
31 #include <sys/capability.h>
32 #include <sys/epoll.h>
33 #include <sys/eventfd.h>
34 #include <sys/mount.h>
35 #include <sys/personality.h>
36 #include <sys/prctl.h>
37 #include <sys/resource.h>
38 #include <sys/signalfd.h>
39 #include <sys/timerfd.h>
40 #include <sys/types.h>
41 #include <sys/wait.h>
42
43 // libnl3
44 #include <net/if.h>
45 #include <netlink/route/link.h>
46
47 // libseccomp
48 #include <seccomp.h>
49
50 // libuuid
51 #include <uuid.h>
52
53 #include <pakfire/arch.h>
54 #include <pakfire/cgroup.h>
55 #include <pakfire/jail.h>
56 #include <pakfire/logging.h>
57 #include <pakfire/mount.h>
58 #include <pakfire/pakfire.h>
59 #include <pakfire/private.h>
60 #include <pakfire/pwd.h>
61 #include <pakfire/string.h>
62 #include <pakfire/util.h>
63
64 #define BUFFER_SIZE 1024 * 64
65 #define ENVIRON_SIZE 128
66 #define EPOLL_MAX_EVENTS 2
67 #define MAX_MOUNTPOINTS 8
68
69 // The default environment that will be set for every command
70 static const struct environ {
71 const char* key;
72 const char* val;
73 } ENV[] = {
74 { "HOME", "/root" },
75 { "LANG", "C.utf-8" },
76 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
77 { "TERM", "vt100" },
78
79 // Tell everything that it is running inside a Pakfire container
80 { "container", "pakfire" },
81 { NULL, NULL },
82 };
83
84 struct pakfire_jail_mountpoint {
85 char source[PATH_MAX];
86 char target[PATH_MAX];
87 int flags;
88 };
89
90 struct pakfire_jail {
91 struct pakfire* pakfire;
92 int nrefs;
93
94 // A unique ID for each jail
95 uuid_t uuid;
96 char __uuid[UUID_STR_LEN];
97
98 // Resource Limits
99 int nice;
100
101 // Timeout
102 struct itimerspec timeout;
103
104 // CGroup
105 struct pakfire_cgroup* cgroup;
106
107 // Environment
108 char* env[ENVIRON_SIZE];
109
110 // Mountpoints
111 struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
112 unsigned int num_mountpoints;
113 };
114
115 struct pakfire_log_buffer {
116 char data[BUFFER_SIZE];
117 size_t used;
118 };
119
120 struct pakfire_jail_exec {
121 int flags;
122
123 // PID (of the child)
124 pid_t pid;
125 int pidfd;
126
127 // Process status (from waitid)
128 siginfo_t status;
129
130 // FD to notify the client that the parent has finished initialization
131 int completed_fd;
132
133 // Log pipes
134 struct pakfire_jail_pipes {
135 int stdin[2];
136 int stdout[2];
137 int stderr[2];
138
139 // Logging
140 int log_INFO[2];
141 int log_ERROR[2];
142 int log_DEBUG[2];
143 } pipes;
144
145 // Communicate
146 struct pakfire_jail_communicate {
147 pakfire_jail_communicate_in in;
148 pakfire_jail_communicate_out out;
149 void* data;
150 } communicate;
151
152 // Log buffers
153 struct pakfire_jail_buffers {
154 struct pakfire_log_buffer stdout;
155 struct pakfire_log_buffer stderr;
156
157 // Logging
158 struct pakfire_log_buffer log_INFO;
159 struct pakfire_log_buffer log_ERROR;
160 struct pakfire_log_buffer log_DEBUG;
161 } buffers;
162
163 struct pakfire_cgroup* cgroup;
164 struct pakfire_cgroup_stats cgroup_stats;
165 };
166
167 static int clone3(struct clone_args* args, size_t size) {
168 return syscall(__NR_clone3, args, size);
169 }
170
171 static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
172 return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
173 }
174
175 static int pivot_root(const char* new_root, const char* old_root) {
176 return syscall(SYS_pivot_root, new_root, old_root);
177 }
178
179 static int pakfire_jail_exec_has_flag(
180 const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
181 return ctx->flags & flag;
182 }
183
184 static void pakfire_jail_free(struct pakfire_jail* jail) {
185 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
186
187 // Free environment
188 for (unsigned int i = 0; jail->env[i]; i++)
189 free(jail->env[i]);
190
191 if (jail->cgroup)
192 pakfire_cgroup_unref(jail->cgroup);
193
194 pakfire_unref(jail->pakfire);
195 free(jail);
196 }
197
198 /*
199 Passes any log messages on to the default pakfire log callback
200 */
201 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
202 int priority, const char* line, size_t length) {
203 switch (priority) {
204 case LOG_INFO:
205 INFO(pakfire, "%s", line);
206 break;
207
208 case LOG_ERR:
209 ERROR(pakfire, "%s", line);
210 break;
211
212 #ifdef ENABLE_DEBUG
213 case LOG_DEBUG:
214 DEBUG(pakfire, "%s", line);
215 break;
216 #endif
217 }
218
219 return 0;
220 }
221
222 static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
223 if (!*jail->__uuid)
224 uuid_unparse_lower(jail->uuid, jail->__uuid);
225
226 return jail->__uuid;
227 }
228
229 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
230 // Set PS1
231 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
232 if (r)
233 return r;
234
235 // Copy TERM
236 char* TERM = secure_getenv("TERM");
237 if (TERM) {
238 r = pakfire_jail_set_env(jail, "TERM", TERM);
239 if (r)
240 return r;
241 }
242
243 // Copy LANG
244 char* LANG = secure_getenv("LANG");
245 if (LANG) {
246 r = pakfire_jail_set_env(jail, "LANG", LANG);
247 if (r)
248 return r;
249 }
250
251 return 0;
252 }
253
254 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail, struct pakfire* pakfire) {
255 int r;
256
257 const char* arch = pakfire_get_effective_arch(pakfire);
258
259 // Allocate a new jail
260 struct pakfire_jail* j = calloc(1, sizeof(*j));
261 if (!j)
262 return 1;
263
264 // Reference Pakfire
265 j->pakfire = pakfire_ref(pakfire);
266
267 // Initialize reference counter
268 j->nrefs = 1;
269
270 // Generate a random UUID
271 uuid_generate_random(j->uuid);
272
273 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
274
275 // Set default environment
276 for (const struct environ* e = ENV; e->key; e++) {
277 r = pakfire_jail_set_env(j, e->key, e->val);
278 if (r)
279 goto ERROR;
280 }
281
282 // Enable all CPU features that CPU has to offer
283 if (!pakfire_arch_is_supported_by_host(arch)) {
284 r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
285 if (r)
286 goto ERROR;
287 }
288
289 // Set container UUID
290 r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
291 if (r)
292 goto ERROR;
293
294 // Disable systemctl to talk to systemd
295 if (!pakfire_on_root(j->pakfire)) {
296 r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
297 if (r)
298 goto ERROR;
299 }
300
301 // Done
302 *jail = j;
303 return 0;
304
305 ERROR:
306 pakfire_jail_free(j);
307
308 return r;
309 }
310
311 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
312 ++jail->nrefs;
313
314 return jail;
315 }
316
317 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
318 if (--jail->nrefs > 0)
319 return jail;
320
321 pakfire_jail_free(jail);
322 return NULL;
323 }
324
325 // Resource Limits
326
327 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
328 // Check if nice level is in range
329 if (nice < -19 || nice > 20) {
330 errno = EINVAL;
331 return 1;
332 }
333
334 // Store nice level
335 jail->nice = nice;
336
337 return 0;
338 }
339
340 int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
341 // Free any previous cgroup
342 if (jail->cgroup) {
343 pakfire_cgroup_unref(jail->cgroup);
344 jail->cgroup = NULL;
345 }
346
347 // Set any new cgroup
348 if (cgroup) {
349 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
350
351 jail->cgroup = pakfire_cgroup_ref(cgroup);
352 }
353
354 // Done
355 return 0;
356 }
357
358 // Environment
359
360 // Returns the length of the environment
361 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
362 unsigned int i = 0;
363
364 // Count everything in the environment
365 for (char** e = jail->env; *e; e++)
366 i++;
367
368 return i;
369 }
370
371 // Finds an existing environment variable and returns its index or -1 if not found
372 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
373 if (!key) {
374 errno = EINVAL;
375 return -1;
376 }
377
378 const size_t length = strlen(key);
379
380 for (unsigned int i = 0; jail->env[i]; i++) {
381 if ((pakfire_string_startswith(jail->env[i], key)
382 && *(jail->env[i] + length) == '=')) {
383 return i;
384 }
385 }
386
387 // Nothing found
388 return -1;
389 }
390
391 // Returns the value of an environment variable or NULL
392 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
393 const char* key) {
394 int i = pakfire_jail_find_env(jail, key);
395 if (i < 0)
396 return NULL;
397
398 return jail->env[i] + strlen(key) + 1;
399 }
400
401 // Sets an environment variable
402 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
403 const char* key, const char* value) {
404 // Find the index where to write this value to
405 int i = pakfire_jail_find_env(jail, key);
406 if (i < 0)
407 i = pakfire_jail_env_length(jail);
408
409 // Return -ENOSPC when the environment is full
410 if (i >= ENVIRON_SIZE) {
411 errno = ENOSPC;
412 return -1;
413 }
414
415 // Free any previous value
416 if (jail->env[i])
417 free(jail->env[i]);
418
419 // Format and set environment variable
420 asprintf(&jail->env[i], "%s=%s", key, value);
421
422 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
423
424 return 0;
425 }
426
427 // Imports an environment
428 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
429 if (!env)
430 return 0;
431
432 char* key;
433 char* val;
434 int r;
435
436 // Copy environment variables
437 for (unsigned int i = 0; env[i]; i++) {
438 r = pakfire_string_partition(env[i], "=", &key, &val);
439 if (r)
440 continue;
441
442 // Set value
443 r = pakfire_jail_set_env(jail, key, val);
444
445 if (key)
446 free(key);
447 if (val)
448 free(val);
449
450 // Break on error
451 if (r)
452 return r;
453 }
454
455 return 0;
456 }
457
458 // Timeout
459
460 PAKFIRE_EXPORT int pakfire_jail_set_timeout(
461 struct pakfire_jail* jail, unsigned int timeout) {
462 // Store value
463 jail->timeout.it_value.tv_sec = timeout;
464
465 if (timeout > 0)
466 DEBUG(jail->pakfire, "Timeout set to %d second(s)\n", timeout);
467 else
468 DEBUG(jail->pakfire, "Timeout disabled\n");
469
470 return 0;
471 }
472
473 static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
474 int r;
475
476 // Nothing to do if no timeout has been set
477 if (!jail->timeout.it_value.tv_sec)
478 return -1;
479
480 // Create a new timer
481 const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
482 if (fd < 0) {
483 ERROR(jail->pakfire, "Could not create timer: %m\n");
484 goto ERROR;
485 }
486
487 // Arm timer
488 r = timerfd_settime(fd, 0, &jail->timeout, NULL);
489 if (r) {
490 ERROR(jail->pakfire, "Could not arm timer: %m\n");
491 goto ERROR;
492 }
493
494 return fd;
495
496 ERROR:
497 if (fd > 0)
498 close(fd);
499
500 return -1;
501 }
502
503 // Signals
504
505 static int pakfire_jail_handle_signals(struct pakfire_jail* jail) {
506 sigset_t mask;
507 int r;
508
509 sigemptyset(&mask);
510 sigaddset(&mask, SIGINT);
511
512 // Block signals
513 r = sigprocmask(SIG_BLOCK, &mask, NULL);
514 if (r < 0) {
515 ERROR(jail->pakfire, "Failed to block signals: %m\n");
516 return r;
517 }
518
519 // Create a file descriptor
520 r = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
521 if (r < 0) {
522 ERROR(jail->pakfire, "Failed to create signalfd: %m\n");
523 return r;
524 }
525
526 return r;
527 }
528
529 /*
530 This function replaces any logging in the child process.
531
532 All log messages will be sent to the parent process through their respective pipes.
533 */
534 static void pakfire_jail_log(void* data, int priority, const char* file,
535 int line, const char* fn, const char* format, va_list args) {
536 struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
537 int fd;
538
539 switch (priority) {
540 case LOG_INFO:
541 fd = pipes->log_INFO[1];
542 break;
543
544 case LOG_ERR:
545 fd = pipes->log_ERROR[1];
546 break;
547
548 #ifdef ENABLE_DEBUG
549 case LOG_DEBUG:
550 fd = pipes->log_DEBUG[1];
551 break;
552 #endif /* ENABLE_DEBUG */
553
554 // Ignore any messages of an unknown priority
555 default:
556 return;
557 }
558
559 // Send the log message
560 if (fd)
561 vdprintf(fd, format, args);
562 }
563
564 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
565 return (sizeof(buffer->data) == buffer->used);
566 }
567
568 /*
569 This function reads as much data as it can from the file descriptor.
570 If it finds a whole line in it, it will send it to the logger and repeat the process.
571 If not newline character is found, it will try to read more data until it finds one.
572 */
573 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
574 struct pakfire_jail_exec* ctx, int priority, int fd,
575 struct pakfire_log_buffer* buffer, pakfire_jail_communicate_out callback, void* data) {
576 char line[BUFFER_SIZE + 1];
577
578 // Fill up buffer from fd
579 if (buffer->used < sizeof(buffer->data)) {
580 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
581 sizeof(buffer->data) - buffer->used);
582
583 // Handle errors
584 if (bytes_read < 0) {
585 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
586 return -1;
587 }
588
589 // Update buffer size
590 buffer->used += bytes_read;
591 }
592
593 // See if we have any lines that we can write
594 while (buffer->used) {
595 // Search for the end of the first line
596 char* eol = memchr(buffer->data, '\n', buffer->used);
597
598 // No newline found
599 if (!eol) {
600 // If the buffer is full, we send the content to the logger and try again
601 // This should not happen in practise
602 if (pakfire_jail_log_buffer_is_full(buffer)) {
603 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
604
605 eol = buffer->data + sizeof(buffer->data) - 1;
606
607 // Otherwise we might have only read parts of the output
608 } else
609 break;
610 }
611
612 // Find the length of the string
613 size_t length = eol - buffer->data + 1;
614
615 // Copy the line into the buffer
616 memcpy(line, buffer->data, length);
617
618 // Terminate the string
619 line[length] = '\0';
620
621 // Log the line
622 if (callback) {
623 int r = callback(jail->pakfire, data, priority, line, length);
624 if (r) {
625 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
626 return r;
627 }
628 }
629
630 // Remove line from buffer
631 memmove(buffer->data, buffer->data + length, buffer->used - length);
632 buffer->used -= length;
633 }
634
635 return 0;
636 }
637
638 static int pakfire_jail_stream_stdin(struct pakfire_jail* jail,
639 struct pakfire_jail_exec* ctx, const int fd) {
640 int r;
641
642 // Nothing to do if there is no stdin callback set
643 if (!ctx->communicate.in) {
644 DEBUG(jail->pakfire, "Callback for standard input is not set\n");
645 return 0;
646 }
647
648 // Skip if the writing pipe has already been closed
649 if (!ctx->pipes.stdin[1])
650 return 0;
651
652 DEBUG(jail->pakfire, "Streaming standard input...\n");
653
654 // Calling the callback
655 r = ctx->communicate.in(jail->pakfire, ctx->communicate.data, fd);
656
657 DEBUG(jail->pakfire, "Standard input callback finished: %d\n", r);
658
659 // The callback signaled that it has written everything
660 if (r == EOF) {
661 DEBUG(jail->pakfire, "Closing standard input pipe\n");
662
663 // Close the file-descriptor
664 close(fd);
665
666 // Reset the file-descriptor so it won't be closed again later
667 ctx->pipes.stdin[1] = 0;
668
669 // Report success
670 r = 0;
671 }
672
673 return r;
674 }
675
676 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
677 int r = pipe2(*fds, flags);
678 if (r < 0) {
679 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
680 return 1;
681 }
682
683 return 0;
684 }
685
686 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
687 for (unsigned int i = 0; i < 2; i++)
688 if (fds[i])
689 close(fds[i]);
690 }
691
692 /*
693 This is a convenience function to fetch the reading end of a pipe and
694 closes the write end.
695 */
696 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail* jail, int (*fds)[2]) {
697 // Give the variables easier names to avoid confusion
698 int* fd_read = &(*fds)[0];
699 int* fd_write = &(*fds)[1];
700
701 // Close the write end of the pipe
702 if (*fd_write) {
703 close(*fd_write);
704 *fd_write = -1;
705 }
706
707 // Return the read end
708 return *fd_read;
709 }
710
711 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
712 // Give the variables easier names to avoid confusion
713 int* fd_read = &(*fds)[0];
714 int* fd_write = &(*fds)[1];
715
716 // Close the read end of the pipe
717 if (*fd_read) {
718 close(*fd_read);
719 *fd_read = -1;
720 }
721
722 // Return the write end
723 return *fd_write;
724 }
725
726 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
727 int epollfd = -1;
728 struct epoll_event ev;
729 struct epoll_event events[EPOLL_MAX_EVENTS];
730 struct signalfd_siginfo siginfo;
731 char garbage[8];
732 int r = 0;
733
734 // Fetch file descriptors from context
735 const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
736 const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
737 const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
738 const int pidfd = ctx->pidfd;
739
740 // Timer
741 const int timerfd = pakfire_jail_create_timer(jail);
742
743 // Logging
744 const int log_INFO = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
745 const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
746 const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
747
748 // Signals
749 const int signalfd = pakfire_jail_handle_signals(jail);
750
751 // Make a list of all file descriptors we are interested in
752 const int fds[] = {
753 stdin, stdout, stderr, pidfd, timerfd, signalfd, log_INFO, log_ERROR, log_DEBUG,
754 };
755
756 // Setup epoll
757 epollfd = epoll_create1(0);
758 if (epollfd < 0) {
759 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
760 r = 1;
761 goto ERROR;
762 }
763
764 // Turn file descriptors into non-blocking mode and add them to epoll()
765 for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
766 int fd = fds[i];
767
768 // Skip fds which were not initialized
769 if (fd < 0)
770 continue;
771
772 ev.events = EPOLLHUP;
773
774 if (fd == stdin)
775 ev.events |= EPOLLOUT;
776 else
777 ev.events |= EPOLLIN;
778
779 // Read flags
780 int flags = fcntl(fd, F_GETFL, 0);
781
782 // Set modified flags
783 if (fcntl(fd, F_SETFL, flags|O_NONBLOCK) < 0) {
784 ERROR(jail->pakfire,
785 "Could not set file descriptor %d into non-blocking mode: %m\n", fd);
786 r = 1;
787 goto ERROR;
788 }
789
790 ev.data.fd = fd;
791
792 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
793 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
794 r = 1;
795 goto ERROR;
796 }
797 }
798
799 int ended = 0;
800
801 // Loop for as long as the process is alive
802 while (!ended) {
803 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
804 if (num < 1) {
805 // Ignore if epoll_wait() has been interrupted
806 if (errno == EINTR)
807 continue;
808
809 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
810 r = 1;
811
812 goto ERROR;
813 }
814
815 for (int i = 0; i < num; i++) {
816 int e = events[i].events;
817 int fd = events[i].data.fd;
818
819 struct pakfire_log_buffer* buffer = NULL;
820 pakfire_jail_communicate_out callback = NULL;
821 void* data = NULL;
822 int priority;
823
824 // Check if there is any data to be read
825 if (e & EPOLLIN) {
826 // Handle any changes to the PIDFD
827 if (fd == pidfd) {
828 // Call waidid() and store the result
829 r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
830 if (r) {
831 ERROR(jail->pakfire, "waitid() failed: %m\n");
832 goto ERROR;
833 }
834
835 // Mark that we have ended so that we will process the remaining
836 // events from epoll() now, but won't restart the outer loop.
837 ended = 1;
838 continue;
839
840 // Handle timer events
841 } else if (fd == timerfd) {
842 DEBUG(jail->pakfire, "Timer event received\n");
843
844 // Disarm the timer
845 r = read(timerfd, garbage, sizeof(garbage));
846 if (r < 1) {
847 ERROR(jail->pakfire, "Could not disarm timer: %m\n");
848 r = 1;
849 goto ERROR;
850 }
851
852 // Terminate the process if it hasn't already ended
853 if (!ended) {
854 DEBUG(jail->pakfire, "Terminating process...\n");
855
856 // Send SIGTERM to the process
857 r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
858 if (r) {
859 ERROR(jail->pakfire, "Could not kill process: %m\n");
860 goto ERROR;
861 }
862 }
863
864 // There is nothing else to do
865 continue;
866
867 // Handle signals
868 } else if (fd == signalfd) {
869 // Read the signal
870 r = read(signalfd, &siginfo, sizeof(siginfo));
871 if (r < 1) {
872 ERROR(jail->pakfire, "Could not read signal: %m\n");
873 goto ERROR;
874 }
875
876 DEBUG(jail->pakfire, "Received signal %d\n", siginfo.ssi_signo);
877
878 // Handle signals
879 switch (siginfo.ssi_signo) {
880 // Pass SIGINT down to the child process
881 case SIGINT:
882 r = pidfd_send_signal(pidfd, siginfo.ssi_signo, NULL, 0);
883 if (r) {
884 ERROR(jail->pakfire, "Could not send signal to process: %m\n");
885 goto ERROR;
886 }
887 break;
888
889 default:
890 ERROR(jail->pakfire, "Received unhandled signal %d\n",
891 siginfo.ssi_signo);
892 break;
893 }
894
895 // Don't fall through to log processing
896 continue;
897
898 // Handle logging messages
899 } else if (fd == log_INFO) {
900 buffer = &ctx->buffers.log_INFO;
901 priority = LOG_INFO;
902
903 callback = pakfire_jail_default_log_callback;
904
905 } else if (fd == log_ERROR) {
906 buffer = &ctx->buffers.log_ERROR;
907 priority = LOG_ERR;
908
909 callback = pakfire_jail_default_log_callback;
910
911 } else if (fd == log_DEBUG) {
912 buffer = &ctx->buffers.log_DEBUG;
913 priority = LOG_DEBUG;
914
915 callback = pakfire_jail_default_log_callback;
916
917 // Handle anything from the log pipes
918 } else if (fd == stdout) {
919 buffer = &ctx->buffers.stdout;
920 priority = LOG_INFO;
921
922 callback = ctx->communicate.out;
923 data = ctx->communicate.data;
924
925 } else if (fd == stderr) {
926 buffer = &ctx->buffers.stderr;
927 priority = LOG_ERR;
928
929 callback = ctx->communicate.out;
930 data = ctx->communicate.data;
931
932 } else {
933 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
934 continue;
935 }
936
937 // Handle log event
938 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
939 if (r)
940 goto ERROR;
941 }
942
943 if (e & EPOLLOUT) {
944 // Handle standard input
945 if (fd == stdin) {
946 r = pakfire_jail_stream_stdin(jail, ctx, fd);
947 if (r) {
948 switch (errno) {
949 // Ignore if we filled up the buffer
950 case EAGAIN:
951 break;
952
953 default:
954 ERROR(jail->pakfire, "Could not write to stdin: %m\n");
955 goto ERROR;
956 }
957 }
958 }
959 }
960
961 // Check if any file descriptors have been closed
962 if (e & EPOLLHUP) {
963 // Remove the file descriptor
964 r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
965 if (r) {
966 ERROR(jail->pakfire, "Could not remove closed file-descriptor %d: %m\n", fd);
967 goto ERROR;
968 }
969 }
970 }
971 }
972
973 ERROR:
974 if (epollfd > 0)
975 close(epollfd);
976 if (timerfd > 0)
977 close(timerfd);
978 if (signalfd > 0)
979 close(signalfd);
980
981 return r;
982 }
983
984 int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data,
985 int priority, const char* line, size_t length) {
986 char** output = (char**)data;
987 int r;
988
989 // Append everything from stdout to a buffer
990 if (output && priority == LOG_INFO) {
991 r = asprintf(output, "%s%s", (output && *output) ? *output : "", line);
992 if (r < 0)
993 return 1;
994 return 0;
995 }
996
997 // Send everything else to the default logger
998 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
999 }
1000
1001 // Capabilities
1002
1003 // Logs all capabilities of the current process
1004 static int pakfire_jail_show_capabilities(struct pakfire_jail* jail) {
1005 cap_t caps = NULL;
1006 char* name = NULL;
1007 cap_flag_value_t value_e;
1008 cap_flag_value_t value_i;
1009 cap_flag_value_t value_p;
1010 int r;
1011
1012 // Fetch PID
1013 pid_t pid = getpid();
1014
1015 // Fetch all capabilities
1016 caps = cap_get_proc();
1017 if (!caps) {
1018 ERROR(jail->pakfire, "Could not fetch capabilities: %m\n");
1019 r = 1;
1020 goto ERROR;
1021 }
1022
1023 DEBUG(jail->pakfire, "Capabilities of PID %d:\n", pid);
1024
1025 // Iterate over all capabilities
1026 for (unsigned int cap = 0; cap_valid(cap); cap++) {
1027 name = cap_to_name(cap);
1028
1029 // Fetch effective value
1030 r = cap_get_flag(caps, cap, CAP_EFFECTIVE, &value_e);
1031 if (r)
1032 goto ERROR;
1033
1034 // Fetch inheritable value
1035 r = cap_get_flag(caps, cap, CAP_INHERITABLE, &value_i);
1036 if (r)
1037 goto ERROR;
1038
1039 // Fetch permitted value
1040 r = cap_get_flag(caps, cap, CAP_PERMITTED, &value_p);
1041 if (r)
1042 goto ERROR;
1043
1044 DEBUG(jail->pakfire,
1045 " %-24s : %c%c%c\n",
1046 name,
1047 (value_e == CAP_SET) ? 'e' : '-',
1048 (value_i == CAP_SET) ? 'i' : '-',
1049 (value_p == CAP_SET) ? 'p' : '-'
1050 );
1051
1052 // Free name
1053 cap_free(name);
1054 name = NULL;
1055 }
1056
1057 // Success
1058 r = 0;
1059
1060 ERROR:
1061 if (name)
1062 cap_free(name);
1063 if (caps)
1064 cap_free(caps);
1065
1066 return r;
1067 }
1068
1069 static int pakfire_jail_set_capabilities(struct pakfire_jail* jail) {
1070 cap_t caps = NULL;
1071 char* name = NULL;
1072 int r;
1073
1074 // Fetch capabilities
1075 caps = cap_get_proc();
1076 if (!caps) {
1077 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
1078 r = 1;
1079 goto ERROR;
1080 }
1081
1082 // Walk through all capabilities
1083 for (cap_value_t cap = 0; cap_valid(cap); cap++) {
1084 cap_value_t _caps[] = { cap };
1085
1086 // Fetch the name of the capability
1087 name = cap_to_name(cap);
1088
1089 r = cap_set_flag(caps, CAP_EFFECTIVE, 1, _caps, CAP_SET);
1090 if (r) {
1091 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1092 goto ERROR;
1093 }
1094
1095 r = cap_set_flag(caps, CAP_INHERITABLE, 1, _caps, CAP_SET);
1096 if (r) {
1097 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1098 goto ERROR;
1099 }
1100
1101 r = cap_set_flag(caps, CAP_PERMITTED, 1, _caps, CAP_SET);
1102 if (r) {
1103 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1104 goto ERROR;
1105 }
1106
1107 // Free name
1108 cap_free(name);
1109 name = NULL;
1110 }
1111
1112 // Restore all capabilities
1113 r = cap_set_proc(caps);
1114 if (r) {
1115 ERROR(jail->pakfire, "Restoring capabilities failed: %m\n");
1116 goto ERROR;
1117 }
1118
1119 // Add all capabilities to the ambient set
1120 for (unsigned int cap = 0; cap_valid(cap); cap++) {
1121 name = cap_to_name(cap);
1122
1123 // Raise the capability
1124 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0);
1125 if (r) {
1126 ERROR(jail->pakfire, "Could not set ambient capability %s: %m\n", name);
1127 goto ERROR;
1128 }
1129
1130 // Free name
1131 cap_free(name);
1132 name = NULL;
1133 }
1134
1135 // Success
1136 r = 0;
1137
1138 ERROR:
1139 if (name)
1140 cap_free(name);
1141 if (caps)
1142 cap_free(caps);
1143
1144 return r;
1145 }
1146
1147 // Syscall Filter
1148
1149 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
1150 const int syscalls[] = {
1151 // The kernel's keyring isn't namespaced
1152 SCMP_SYS(keyctl),
1153 SCMP_SYS(add_key),
1154 SCMP_SYS(request_key),
1155
1156 // Disable userfaultfd
1157 SCMP_SYS(userfaultfd),
1158
1159 // Disable perf which could leak a lot of information about the host
1160 SCMP_SYS(perf_event_open),
1161
1162 0,
1163 };
1164 int r = 1;
1165
1166 DEBUG(jail->pakfire, "Applying syscall filter...\n");
1167
1168 // Setup a syscall filter which allows everything by default
1169 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
1170 if (!ctx) {
1171 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
1172 goto ERROR;
1173 }
1174
1175 // All all syscalls
1176 for (const int* syscall = syscalls; *syscall; syscall++) {
1177 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
1178 if (r) {
1179 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
1180 goto ERROR;
1181 }
1182 }
1183
1184 // Load syscall filter into the kernel
1185 r = seccomp_load(ctx);
1186 if (r) {
1187 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
1188 goto ERROR;
1189 }
1190
1191 ERROR:
1192 if (ctx)
1193 seccomp_release(ctx);
1194
1195 return r;
1196 }
1197
1198 // Mountpoints
1199
1200 PAKFIRE_EXPORT int pakfire_jail_bind(struct pakfire_jail* jail,
1201 const char* source, const char* target, int flags) {
1202 struct pakfire_jail_mountpoint* mp = NULL;
1203 int r;
1204
1205 // Check if there is any space left
1206 if (jail->num_mountpoints >= MAX_MOUNTPOINTS) {
1207 errno = ENOSPC;
1208 return 1;
1209 }
1210
1211 // Check for valid inputs
1212 if (!source || !target) {
1213 errno = EINVAL;
1214 return 1;
1215 }
1216
1217 // Select the next free slot
1218 mp = &jail->mountpoints[jail->num_mountpoints];
1219
1220 // Copy source
1221 r = pakfire_string_set(mp->source, source);
1222 if (r) {
1223 ERROR(jail->pakfire, "Could not copy source: %m\n");
1224 return r;
1225 }
1226
1227 // Copy target
1228 r = pakfire_string_set(mp->target, target);
1229 if (r) {
1230 ERROR(jail->pakfire, "Could not copy target: %m\n");
1231 return r;
1232 }
1233
1234 // Copy flags
1235 mp->flags = flags;
1236
1237 // Increment counter
1238 jail->num_mountpoints++;
1239
1240 return 0;
1241 }
1242
1243 static int pakfire_jail_mount_networking(struct pakfire_jail* jail) {
1244 int r;
1245
1246 const char* paths[] = {
1247 "/etc/hosts",
1248 "/etc/resolv.conf",
1249 NULL,
1250 };
1251
1252 // Bind-mount all paths read-only
1253 for (const char** path = paths; *path; path++) {
1254 r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
1255 if (r)
1256 return r;
1257 }
1258
1259 return 0;
1260 }
1261
1262 /*
1263 Mounts everything that we require in the new namespace
1264 */
1265 static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1266 struct pakfire_jail_mountpoint* mp = NULL;
1267 int flags = 0;
1268 int r;
1269
1270 // Enable loop devices
1271 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_LOOP_DEVICES))
1272 flags |= PAKFIRE_MOUNT_LOOP_DEVICES;
1273
1274 // Mount all default stuff
1275 r = pakfire_mount_all(jail->pakfire, flags);
1276 if (r)
1277 return r;
1278
1279 // Mount networking stuff
1280 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1281 r = pakfire_jail_mount_networking(jail);
1282 if (r)
1283 return r;
1284 }
1285
1286 // Mount all custom stuff
1287 for (unsigned int i = 0; i < jail->num_mountpoints; i++) {
1288 // Fetch mountpoint
1289 mp = &jail->mountpoints[i];
1290
1291 // Mount it
1292 r = pakfire_bind(jail->pakfire, mp->source, mp->target, mp->flags);
1293 if (r)
1294 return r;
1295 }
1296
1297 // Log all mountpoints
1298 pakfire_mount_list(jail->pakfire);
1299
1300 return 0;
1301 }
1302
1303 // Networking
1304
1305 static int pakfire_jail_setup_loopback(struct pakfire_jail* jail) {
1306 struct nl_sock* nl = NULL;
1307 struct nl_cache* cache = NULL;
1308 struct rtnl_link* link = NULL;
1309 struct rtnl_link* change = NULL;
1310 int r;
1311
1312 DEBUG(jail->pakfire, "Setting up loopback...\n");
1313
1314 // Allocate a netlink socket
1315 nl = nl_socket_alloc();
1316 if (!nl) {
1317 ERROR(jail->pakfire, "Could not allocate a netlink socket: %m\n");
1318 r = 1;
1319 goto ERROR;
1320 }
1321
1322 // Connect the socket
1323 r = nl_connect(nl, NETLINK_ROUTE);
1324 if (r) {
1325 ERROR(jail->pakfire, "Could not connect netlink socket: %s\n", nl_geterror(r));
1326 goto ERROR;
1327 }
1328
1329 // Allocate the netlink cache
1330 r = rtnl_link_alloc_cache(nl, AF_UNSPEC, &cache);
1331 if (r < 0) {
1332 ERROR(jail->pakfire, "Unable to allocate netlink cache: %s\n", nl_geterror(r));
1333 goto ERROR;
1334 }
1335
1336 // Fetch loopback interface
1337 link = rtnl_link_get_by_name(cache, "lo");
1338 if (!link) {
1339 ERROR(jail->pakfire, "Could not find lo interface. Ignoring.\n");
1340 r = 0;
1341 goto ERROR;
1342 }
1343
1344 // Allocate a new link
1345 change = rtnl_link_alloc();
1346 if (!change) {
1347 ERROR(jail->pakfire, "Could not allocate change link\n");
1348 r = 1;
1349 goto ERROR;
1350 }
1351
1352 // Set the link to UP
1353 rtnl_link_set_flags(change, IFF_UP);
1354
1355 // Apply any changes
1356 r = rtnl_link_change(nl, link, change, 0);
1357 if (r) {
1358 ERROR(jail->pakfire, "Unable to activate loopback: %s\n", nl_geterror(r));
1359 goto ERROR;
1360 }
1361
1362 // Success
1363 r = 0;
1364
1365 ERROR:
1366 if (nl)
1367 nl_socket_free(nl);
1368
1369 return r;
1370 }
1371
1372 // UID/GID Mapping
1373
1374 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
1375 char path[PATH_MAX];
1376 int r;
1377
1378 // Skip mapping anything when running on /
1379 if (pakfire_on_root(jail->pakfire))
1380 return 0;
1381
1382 // Make path
1383 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
1384 if (r)
1385 return r;
1386
1387 // Fetch UID
1388 const uid_t uid = pakfire_uid(jail->pakfire);
1389
1390 // Fetch SUBUID
1391 const struct pakfire_subid* subuid = pakfire_subuid(jail->pakfire);
1392 if (!subuid)
1393 return 1;
1394
1395 /* When running as root, we will map the entire range.
1396
1397 When running as a non-privileged user, we will map the root user inside the jail
1398 to the user's UID outside of the jail, and we will map the rest starting from one.
1399 */
1400
1401 // Running as root
1402 if (uid == 0) {
1403 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1404 "0 %lu %lu\n", subuid->id, subuid->length);
1405 } else {
1406 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1407 "0 %lu 1\n1 %lu %lu\n", uid, subuid->id, subuid->length);
1408 }
1409
1410 if (r) {
1411 ERROR(jail->pakfire, "Could not map UIDs: %m\n");
1412 return r;
1413 }
1414
1415 return r;
1416 }
1417
1418 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
1419 char path[PATH_MAX];
1420 int r;
1421
1422 // Skip mapping anything when running on /
1423 if (pakfire_on_root(jail->pakfire))
1424 return 0;
1425
1426 // Fetch GID
1427 const gid_t gid = pakfire_gid(jail->pakfire);
1428
1429 // Fetch SUBGID
1430 const struct pakfire_subid* subgid = pakfire_subgid(jail->pakfire);
1431 if (!subgid)
1432 return 1;
1433
1434 // Make path
1435 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
1436 if (r)
1437 return r;
1438
1439 // Running as root
1440 if (gid == 0) {
1441 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1442 "0 %lu %lu\n", subgid->id, subgid->length);
1443 } else {
1444 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1445 "0 %lu 1\n%1 %lu %lu\n", gid, subgid->id, subgid->length);
1446 }
1447
1448 if (r) {
1449 ERROR(jail->pakfire, "Could not map GIDs: %m\n");
1450 return r;
1451 }
1452
1453 return r;
1454 }
1455
1456 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
1457 char path[PATH_MAX];
1458 int r = 1;
1459
1460 // Make path
1461 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
1462 if (r)
1463 return r;
1464
1465 // Open file for writing
1466 FILE* f = fopen(path, "w");
1467 if (!f) {
1468 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
1469 goto ERROR;
1470 }
1471
1472 // Write content
1473 int bytes_written = fprintf(f, "deny\n");
1474 if (bytes_written <= 0) {
1475 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
1476 goto ERROR;
1477 }
1478
1479 r = fclose(f);
1480 f = NULL;
1481 if (r) {
1482 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
1483 goto ERROR;
1484 }
1485
1486 ERROR:
1487 if (f)
1488 fclose(f);
1489
1490 return r;
1491 }
1492
1493 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1494 const uint64_t val = 1;
1495 int r = 0;
1496
1497 DEBUG(jail->pakfire, "Sending signal...\n");
1498
1499 // Write to the file descriptor
1500 ssize_t bytes_written = write(fd, &val, sizeof(val));
1501 if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
1502 ERROR(jail->pakfire, "Could not send signal: %m\n");
1503 r = 1;
1504 }
1505
1506 // Close the file descriptor
1507 close(fd);
1508
1509 return r;
1510 }
1511
1512 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1513 uint64_t val = 0;
1514 int r = 0;
1515
1516 DEBUG(jail->pakfire, "Waiting for signal...\n");
1517
1518 ssize_t bytes_read = read(fd, &val, sizeof(val));
1519 if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
1520 ERROR(jail->pakfire, "Error waiting for signal: %m\n");
1521 r = 1;
1522 }
1523
1524 // Close the file descriptor
1525 close(fd);
1526
1527 return r;
1528 }
1529
1530 /*
1531 Performs the initialisation that needs to happen in the parent part
1532 */
1533 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1534 int r;
1535
1536 // Setup UID mapping
1537 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
1538 if (r)
1539 return r;
1540
1541 // Write "deny" to /proc/PID/setgroups
1542 r = pakfire_jail_setgroups(jail, ctx->pid);
1543 if (r)
1544 return r;
1545
1546 // Setup GID mapping
1547 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
1548 if (r)
1549 return r;
1550
1551 // Parent has finished initialisation
1552 DEBUG(jail->pakfire, "Parent has finished initialization\n");
1553
1554 // Send signal to client
1555 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1556 if (r)
1557 return r;
1558
1559 return 0;
1560 }
1561
1562 static int pakfire_jail_switch_root(struct pakfire_jail* jail, const char* root) {
1563 int r;
1564
1565 // Change to the new root
1566 r = chdir(root);
1567 if (r) {
1568 ERROR(jail->pakfire, "chdir(%s) failed: %m\n", root);
1569 return r;
1570 }
1571
1572 // Switch Root!
1573 r = pivot_root(".", ".");
1574 if (r) {
1575 ERROR(jail->pakfire, "Failed changing into the new root directory %s: %m\n", root);
1576 return r;
1577 }
1578
1579 // Umount the old root
1580 r = umount2(".", MNT_DETACH);
1581 if (r) {
1582 ERROR(jail->pakfire, "Could not umount the old root filesystem: %m\n");
1583 return r;
1584 }
1585
1586 return 0;
1587 }
1588
1589 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
1590 const char* argv[]) {
1591 int r;
1592
1593 // Redirect any logging to our log pipe
1594 pakfire_set_log_callback(jail->pakfire, pakfire_jail_log, &ctx->pipes);
1595
1596 // Fetch my own PID
1597 pid_t pid = getpid();
1598
1599 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
1600
1601 // Wait for the parent to finish initialization
1602 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1603 if (r)
1604 return r;
1605
1606 // Die with parent
1607 r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
1608 if (r) {
1609 ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
1610 return 126;
1611 }
1612
1613 // Make this process dumpable
1614 r = prctl (PR_SET_DUMPABLE, 1, 0, 0, 0);
1615 if (r) {
1616 ERROR(jail->pakfire, "Could not make the process dumpable: %m\n");
1617 return 126;
1618 }
1619
1620 // Don't drop any capabilities on setuid()
1621 r = prctl(PR_SET_KEEPCAPS, 1);
1622 if (r) {
1623 ERROR(jail->pakfire, "Could not set PR_SET_KEEPCAPS: %m\n");
1624 return 126;
1625 }
1626
1627 // Fetch UID/GID
1628 uid_t uid = getuid();
1629 gid_t gid = getgid();
1630
1631 // Fetch EUID/EGID
1632 uid_t euid = geteuid();
1633 gid_t egid = getegid();
1634
1635 DEBUG(jail->pakfire, " UID: %d (effective %d)\n", uid, euid);
1636 DEBUG(jail->pakfire, " GID: %d (effective %d)\n", gid, egid);
1637
1638 // Check if we are (effectively running as root)
1639 if (uid || gid || euid || egid) {
1640 ERROR(jail->pakfire, "Child process is not running as root\n");
1641 return 126;
1642 }
1643
1644 const char* root = pakfire_get_path(jail->pakfire);
1645 const char* arch = pakfire_get_effective_arch(jail->pakfire);
1646
1647 // Change mount propagation to slave to receive anything from the parent namespace
1648 r = pakfire_mount_change_propagation(jail->pakfire, MS_SLAVE, "/");
1649 if (r)
1650 return r;
1651
1652 // Make root a mountpoint in the new mount namespace
1653 r = pakfire_mount_make_mounpoint(jail->pakfire, root);
1654 if (r)
1655 return r;
1656
1657 // Change mount propagation to private
1658 r = pakfire_mount_change_propagation(jail->pakfire, MS_PRIVATE, root);
1659 if (r)
1660 return r;
1661
1662 // Change root (unless root is /)
1663 if (!pakfire_on_root(jail->pakfire)) {
1664 // Mount everything
1665 r = pakfire_jail_mount(jail, ctx);
1666 if (r)
1667 return r;
1668
1669 // chroot()
1670 r = pakfire_jail_switch_root(jail, root);
1671 if (r)
1672 return r;
1673 }
1674
1675 // Set personality
1676 unsigned long persona = pakfire_arch_personality(arch);
1677 if (persona) {
1678 r = personality(persona);
1679 if (r < 0) {
1680 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1681 return 1;
1682 }
1683 }
1684
1685 // Setup networking
1686 if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1687 r = pakfire_jail_setup_loopback(jail);
1688 if (r)
1689 return 1;
1690 }
1691
1692 // Set nice level
1693 if (jail->nice) {
1694 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1695
1696 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1697 if (r) {
1698 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1699 return 1;
1700 }
1701 }
1702
1703 // Close other end of log pipes
1704 close(ctx->pipes.log_INFO[0]);
1705 close(ctx->pipes.log_ERROR[0]);
1706 #ifdef ENABLE_DEBUG
1707 close(ctx->pipes.log_DEBUG[0]);
1708 #endif /* ENABLE_DEBUG */
1709
1710 // Connect standard input
1711 if (ctx->pipes.stdin[0] >= 0) {
1712 r = dup2(ctx->pipes.stdin[0], STDIN_FILENO);
1713 if (r < 0) {
1714 ERROR(jail->pakfire, "Could not connect fd %d to stdin: %m\n",
1715 ctx->pipes.stdin[0]);
1716
1717 return 1;
1718 }
1719 }
1720
1721 // Connect standard output and error
1722 if (ctx->pipes.stdout[1] >= 0 && ctx->pipes.stderr[1] >= 0) {
1723 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1724 if (r < 0) {
1725 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1726 ctx->pipes.stdout[1]);
1727
1728 return 1;
1729 }
1730
1731 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1732 if (r < 0) {
1733 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1734 ctx->pipes.stderr[1]);
1735
1736 return 1;
1737 }
1738
1739 // Close the pipe (as we have moved the original file descriptors)
1740 pakfire_jail_close_pipe(jail, ctx->pipes.stdin);
1741 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1742 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
1743 }
1744
1745 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1746 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1747 if (r)
1748 return r;
1749
1750 // Set capabilities
1751 r = pakfire_jail_set_capabilities(jail);
1752 if (r)
1753 return r;
1754
1755 // Show capabilities
1756 r = pakfire_jail_show_capabilities(jail);
1757 if (r)
1758 return r;
1759
1760 // Filter syscalls
1761 r = pakfire_jail_limit_syscalls(jail);
1762 if (r)
1763 return r;
1764
1765 DEBUG(jail->pakfire, "Child process initialization done\n");
1766 DEBUG(jail->pakfire, "Launching command:\n");
1767
1768 // Log argv
1769 for (unsigned int i = 0; argv[i]; i++)
1770 DEBUG(jail->pakfire, " argv[%d] = %s\n", i, argv[i]);
1771
1772 // exec() command
1773 r = execvpe(argv[0], (char**)argv, jail->env);
1774 if (r < 0) {
1775 // Translate errno into regular exit code
1776 switch (errno) {
1777 case ENOENT:
1778 // Ignore if the command doesn't exist
1779 if (ctx->flags & PAKFIRE_JAIL_NOENT_OK)
1780 r = 0;
1781 else
1782 r = 127;
1783
1784 break;
1785
1786 default:
1787 r = 1;
1788 }
1789
1790 ERROR(jail->pakfire, "Could not execve(%s): %m\n", argv[0]);
1791 }
1792
1793 // We should not get here
1794 return r;
1795 }
1796
1797 // Run a command in the jail
1798 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
1799 const int interactive,
1800 pakfire_jail_communicate_in communicate_in,
1801 pakfire_jail_communicate_out communicate_out,
1802 void* data, int flags) {
1803 int exit = -1;
1804 int r;
1805
1806 // Check if argv is valid
1807 if (!argv || !argv[0]) {
1808 errno = EINVAL;
1809 return -1;
1810 }
1811
1812 // Send any output to the default logger if no callback is set
1813 if (!communicate_out)
1814 communicate_out = pakfire_jail_default_log_callback;
1815
1816 // Initialize context for this call
1817 struct pakfire_jail_exec ctx = {
1818 .flags = flags,
1819
1820 .pipes = {
1821 .stdin = { -1, -1 },
1822 .stdout = { -1, -1 },
1823 .stderr = { -1, -1 },
1824 },
1825
1826 .communicate = {
1827 .in = communicate_in,
1828 .out = communicate_out,
1829 .data = data,
1830 },
1831
1832 .pidfd = -1,
1833 };
1834
1835 DEBUG(jail->pakfire, "Executing jail...\n");
1836
1837 // Enable networking in interactive mode
1838 if (interactive)
1839 ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
1840
1841 /*
1842 Setup a file descriptor which can be used to notify the client that the parent
1843 has completed configuration.
1844 */
1845 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1846 if (ctx.completed_fd < 0) {
1847 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1848 return -1;
1849 }
1850
1851 // Create pipes to communicate with child process if we are not running interactively
1852 if (!interactive) {
1853 // stdin (only if callback is set)
1854 if (ctx.communicate.in) {
1855 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdin, 0);
1856 if (r)
1857 goto ERROR;
1858 }
1859
1860 // stdout
1861 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
1862 if (r)
1863 goto ERROR;
1864
1865 // stderr
1866 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
1867 if (r)
1868 goto ERROR;
1869 }
1870
1871 // Setup pipes for logging
1872 // INFO
1873 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
1874 if (r)
1875 goto ERROR;
1876
1877 // ERROR
1878 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
1879 if (r)
1880 goto ERROR;
1881
1882 #ifdef ENABLE_DEBUG
1883 // DEBUG
1884 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
1885 if (r)
1886 goto ERROR;
1887 #endif /* ENABLE_DEBUG */
1888
1889 // Configure child process
1890 struct clone_args args = {
1891 .flags =
1892 CLONE_NEWCGROUP |
1893 CLONE_NEWIPC |
1894 CLONE_NEWNS |
1895 CLONE_NEWPID |
1896 CLONE_NEWTIME |
1897 CLONE_NEWUSER |
1898 CLONE_NEWUTS |
1899 CLONE_PIDFD,
1900 .exit_signal = SIGCHLD,
1901 .pidfd = (long long unsigned int)&ctx.pidfd,
1902 };
1903
1904 // Launch the process in a cgroup that is a leaf of the configured cgroup
1905 if (jail->cgroup) {
1906 args.flags |= CLONE_INTO_CGROUP;
1907
1908 // Fetch our UUID
1909 const char* uuid = pakfire_jail_uuid(jail);
1910
1911 // Create a temporary cgroup
1912 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, uuid, 0);
1913 if (r) {
1914 ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
1915 goto ERROR;
1916 }
1917
1918 // Clone into this cgroup
1919 args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
1920 }
1921
1922 // Setup networking
1923 if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1924 args.flags |= CLONE_NEWNET;
1925 }
1926
1927 // Fork this process
1928 ctx.pid = clone3(&args, sizeof(args));
1929 if (ctx.pid < 0) {
1930 ERROR(jail->pakfire, "Could not clone: %m\n");
1931 return -1;
1932
1933 // Child process
1934 } else if (ctx.pid == 0) {
1935 r = pakfire_jail_child(jail, &ctx, argv);
1936 _exit(r);
1937 }
1938
1939 // Parent process
1940 r = pakfire_jail_parent(jail, &ctx);
1941 if (r)
1942 goto ERROR;
1943
1944 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
1945
1946 // Read output of the child process
1947 r = pakfire_jail_wait(jail, &ctx);
1948 if (r)
1949 goto ERROR;
1950
1951 // Handle exit status
1952 switch (ctx.status.si_code) {
1953 case CLD_EXITED:
1954 DEBUG(jail->pakfire, "The child process exited with code %d\n",
1955 ctx.status.si_status);
1956
1957 // Pass exit code
1958 exit = ctx.status.si_status;
1959 break;
1960
1961 case CLD_KILLED:
1962 ERROR(jail->pakfire, "The child process was killed\n");
1963 exit = 139;
1964 break;
1965
1966 case CLD_DUMPED:
1967 ERROR(jail->pakfire, "The child process terminated abnormally\n");
1968 break;
1969
1970 // Log anything else
1971 default:
1972 ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
1973 break;
1974 }
1975
1976 ERROR:
1977 // Destroy the temporary cgroup (if any)
1978 if (ctx.cgroup) {
1979 // Read cgroup stats
1980 r = pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
1981 if (r) {
1982 ERROR(jail->pakfire, "Could not read cgroup stats: %m\n");
1983 } else {
1984 pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
1985 }
1986
1987 pakfire_cgroup_destroy(ctx.cgroup);
1988 pakfire_cgroup_unref(ctx.cgroup);
1989 }
1990
1991 // Close any file descriptors
1992 pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
1993 pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
1994 pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
1995 if (ctx.pidfd)
1996 close(ctx.pidfd);
1997 pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
1998 pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
1999 pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
2000
2001 return exit;
2002 }
2003
2004 PAKFIRE_EXPORT int pakfire_jail_exec(
2005 struct pakfire_jail* jail,
2006 const char* argv[],
2007 pakfire_jail_communicate_in callback_in,
2008 pakfire_jail_communicate_out callback_out,
2009 void* data, int flags) {
2010 return __pakfire_jail_exec(jail, argv, 0, callback_in, callback_out, data, flags);
2011 }
2012
2013 static int pakfire_jail_exec_interactive(
2014 struct pakfire_jail* jail, const char* argv[], int flags) {
2015 int r;
2016
2017 // Setup interactive stuff
2018 r = pakfire_jail_setup_interactive_env(jail);
2019 if (r)
2020 return r;
2021
2022 return __pakfire_jail_exec(jail, argv, 1, NULL, NULL, NULL, flags);
2023 }
2024
2025 int pakfire_jail_exec_script(struct pakfire_jail* jail,
2026 const char* script,
2027 const size_t size,
2028 const char* args[],
2029 pakfire_jail_communicate_in callback_in,
2030 pakfire_jail_communicate_out callback_out,
2031 void* data) {
2032 char path[PATH_MAX];
2033 const char** argv = NULL;
2034 FILE* f = NULL;
2035 int r;
2036
2037 const char* root = pakfire_get_path(jail->pakfire);
2038
2039 // Write the scriptlet to disk
2040 r = pakfire_path_join(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
2041 if (r)
2042 goto ERROR;
2043
2044 // Create a temporary file
2045 f = pakfire_mktemp(path, 0700);
2046 if (!f) {
2047 ERROR(jail->pakfire, "Could not create temporary file: %m\n");
2048 goto ERROR;
2049 }
2050
2051 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
2052
2053 // Write data
2054 r = fprintf(f, "%s", script);
2055 if (r < 0) {
2056 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
2057 goto ERROR;
2058 }
2059
2060 // Close file
2061 r = fclose(f);
2062 if (r) {
2063 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
2064 goto ERROR;
2065 }
2066
2067 f = NULL;
2068
2069 // Count how many arguments were passed
2070 unsigned int argc = 1;
2071 if (args) {
2072 for (const char** arg = args; *arg; arg++)
2073 argc++;
2074 }
2075
2076 argv = calloc(argc + 1, sizeof(*argv));
2077 if (!argv) {
2078 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
2079 goto ERROR;
2080 }
2081
2082 // Set command
2083 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
2084
2085 // Copy args
2086 for (unsigned int i = 1; i < argc; i++)
2087 argv[i] = args[i-1];
2088
2089 // Run the script
2090 r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data, 0);
2091
2092 ERROR:
2093 if (argv)
2094 free(argv);
2095 if (f)
2096 fclose(f);
2097
2098 // Remove script from disk
2099 if (*path)
2100 unlink(path);
2101
2102 return r;
2103 }
2104
2105 /*
2106 A convenience function that creates a new jail, runs the given command and destroys
2107 the jail again.
2108 */
2109 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
2110 struct pakfire_jail* jail = NULL;
2111 int r;
2112
2113 // Create a new jail
2114 r = pakfire_jail_create(&jail, pakfire);
2115 if (r)
2116 goto ERROR;
2117
2118 // Execute the command
2119 r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output, 0);
2120
2121 ERROR:
2122 if (jail)
2123 pakfire_jail_unref(jail);
2124
2125 return r;
2126 }
2127
2128 int pakfire_jail_run_script(struct pakfire* pakfire,
2129 const char* script, const size_t length, const char* argv[], int flags) {
2130 struct pakfire_jail* jail = NULL;
2131 int r;
2132
2133 // Create a new jail
2134 r = pakfire_jail_create(&jail, pakfire);
2135 if (r)
2136 goto ERROR;
2137
2138 // Execute the command
2139 r = pakfire_jail_exec_script(jail, script, length, argv, NULL, NULL, NULL);
2140
2141 ERROR:
2142 if (jail)
2143 pakfire_jail_unref(jail);
2144
2145 return r;
2146 }
2147
2148 int pakfire_jail_shell(struct pakfire_jail* jail) {
2149 int r;
2150
2151 const char* argv[] = {
2152 "/bin/bash", "--login", NULL,
2153 };
2154
2155 // Execute /bin/bash
2156 r = pakfire_jail_exec_interactive(jail, argv, 0);
2157
2158 // Raise any errors
2159 if (r < 0)
2160 return r;
2161
2162 // Ignore any return codes from the shell
2163 return 0;
2164 }
2165
2166 static int pakfire_jail_run_if_possible(struct pakfire* pakfire, const char** argv) {
2167 char path[PATH_MAX];
2168 int r;
2169
2170 r = pakfire_path(pakfire, path, "%s", *argv);
2171 if (r)
2172 return r;
2173
2174 // Check if the file is executable
2175 r = access(path, X_OK);
2176 if (r) {
2177 DEBUG(pakfire, "%s is not executable. Skipping...\n", *argv);
2178 return 0;
2179 }
2180
2181 return pakfire_jail_run(pakfire, argv, 0, NULL);
2182 }
2183
2184 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
2185 const char* argv[] = {
2186 "/sbin/ldconfig",
2187 NULL,
2188 };
2189
2190 return pakfire_jail_run_if_possible(pakfire, argv);
2191 }
2192
2193 int pakfire_jail_run_systemd_tmpfiles(struct pakfire* pakfire) {
2194 const char* argv[] = {
2195 "/usr/bin/systemd-tmpfiles",
2196 "--create",
2197 NULL,
2198 };
2199
2200 return pakfire_jail_run_if_possible(pakfire, argv);
2201 }