]> git.ipfire.org Git - people/ms/pakfire.git/blob - src/libpakfire/jail.c
mount: Make changing mount operation not dependant on pakfire
[people/ms/pakfire.git] / src / libpakfire / jail.c
1 /*#############################################################################
2 # #
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
5 # #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
10 # #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
15 # #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
18 # #
19 #############################################################################*/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <linux/capability.h>
24 #include <linux/sched.h>
25 #include <sys/wait.h>
26 #include <linux/wait.h>
27 #include <sched.h>
28 #include <signal.h>
29 #include <stdlib.h>
30 #include <syscall.h>
31 #include <sys/capability.h>
32 #include <sys/epoll.h>
33 #include <sys/eventfd.h>
34 #include <sys/mount.h>
35 #include <sys/personality.h>
36 #include <sys/prctl.h>
37 #include <sys/resource.h>
38 #include <sys/signalfd.h>
39 #include <sys/timerfd.h>
40 #include <sys/types.h>
41 #include <sys/wait.h>
42
43 // libnl3
44 #include <net/if.h>
45 #include <netlink/route/link.h>
46
47 // libseccomp
48 #include <seccomp.h>
49
50 // libuuid
51 #include <uuid.h>
52
53 #include <pakfire/arch.h>
54 #include <pakfire/cgroup.h>
55 #include <pakfire/jail.h>
56 #include <pakfire/logging.h>
57 #include <pakfire/mount.h>
58 #include <pakfire/pakfire.h>
59 #include <pakfire/path.h>
60 #include <pakfire/private.h>
61 #include <pakfire/pwd.h>
62 #include <pakfire/string.h>
63 #include <pakfire/util.h>
64
65 #define BUFFER_SIZE 1024 * 64
66 #define ENVIRON_SIZE 128
67 #define EPOLL_MAX_EVENTS 2
68 #define MAX_MOUNTPOINTS 8
69
70 // The default environment that will be set for every command
71 static const struct environ {
72 const char* key;
73 const char* val;
74 } ENV[] = {
75 { "HOME", "/root" },
76 { "LANG", "C.utf-8" },
77 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
78 { "TERM", "vt100" },
79
80 // Tell everything that it is running inside a Pakfire container
81 { "container", "pakfire" },
82 { NULL, NULL },
83 };
84
85 struct pakfire_jail_mountpoint {
86 char source[PATH_MAX];
87 char target[PATH_MAX];
88 int flags;
89 };
90
91 struct pakfire_jail {
92 struct pakfire_ctx* ctx;
93 struct pakfire* pakfire;
94 int nrefs;
95
96 // A unique ID for each jail
97 uuid_t uuid;
98 char __uuid[UUID_STR_LEN];
99
100 // Resource Limits
101 int nice;
102
103 // Timeout
104 struct itimerspec timeout;
105
106 // CGroup
107 struct pakfire_cgroup* cgroup;
108
109 // Environment
110 char* env[ENVIRON_SIZE];
111
112 // Mountpoints
113 struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
114 unsigned int num_mountpoints;
115
116 // Callbacks
117 struct pakfire_jail_callbacks {
118 // Log
119 pakfire_jail_log_callback log;
120 void* log_data;
121 } callbacks;
122 };
123
124 struct pakfire_log_buffer {
125 char data[BUFFER_SIZE];
126 size_t used;
127 };
128
129 struct pakfire_jail_exec {
130 int flags;
131
132 // PID (of the child)
133 pid_t pid;
134 int pidfd;
135
136 // Process status (from waitid)
137 siginfo_t status;
138
139 // FD to notify the client that the parent has finished initialization
140 int completed_fd;
141
142 // Log pipes
143 struct pakfire_jail_pipes {
144 int stdin[2];
145 int stdout[2];
146 int stderr[2];
147
148 // Logging
149 int log_INFO[2];
150 int log_ERROR[2];
151 int log_DEBUG[2];
152 } pipes;
153
154 // Communicate
155 struct pakfire_jail_communicate {
156 pakfire_jail_communicate_in in;
157 pakfire_jail_communicate_out out;
158 void* data;
159 } communicate;
160
161 // Log buffers
162 struct pakfire_jail_buffers {
163 struct pakfire_log_buffer stdout;
164 struct pakfire_log_buffer stderr;
165
166 // Logging
167 struct pakfire_log_buffer log_INFO;
168 struct pakfire_log_buffer log_ERROR;
169 struct pakfire_log_buffer log_DEBUG;
170 } buffers;
171
172 struct pakfire_cgroup* cgroup;
173 struct pakfire_cgroup_stats cgroup_stats;
174 };
175
176 static int clone3(struct clone_args* args, size_t size) {
177 return syscall(__NR_clone3, args, size);
178 }
179
180 static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
181 return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
182 }
183
184 static int pivot_root(const char* new_root, const char* old_root) {
185 return syscall(SYS_pivot_root, new_root, old_root);
186 }
187
188 static int pakfire_jail_exec_has_flag(
189 const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
190 return ctx->flags & flag;
191 }
192
193 static void pakfire_jail_free(struct pakfire_jail* jail) {
194 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
195
196 // Free environment
197 for (unsigned int i = 0; jail->env[i]; i++)
198 free(jail->env[i]);
199
200 if (jail->cgroup)
201 pakfire_cgroup_unref(jail->cgroup);
202 if (jail->pakfire)
203 pakfire_unref(jail->pakfire);
204 if (jail->ctx)
205 pakfire_ctx_unref(jail->ctx);
206 free(jail);
207 }
208
209 /*
210 Passes any log messages on to the default pakfire log callback
211 */
212 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
213 int priority, const char* line, size_t length) {
214 switch (priority) {
215 case LOG_INFO:
216 INFO(pakfire, "%s", line);
217 break;
218
219 case LOG_ERR:
220 ERROR(pakfire, "%s", line);
221 break;
222
223 #ifdef ENABLE_DEBUG
224 case LOG_DEBUG:
225 DEBUG(pakfire, "%s", line);
226 break;
227 #endif
228 }
229
230 return 0;
231 }
232
233 static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
234 if (!*jail->__uuid)
235 uuid_unparse_lower(jail->uuid, jail->__uuid);
236
237 return jail->__uuid;
238 }
239
240 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
241 // Set PS1
242 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
243 if (r)
244 return r;
245
246 // Copy TERM
247 char* TERM = secure_getenv("TERM");
248 if (TERM) {
249 r = pakfire_jail_set_env(jail, "TERM", TERM);
250 if (r)
251 return r;
252 }
253
254 // Copy LANG
255 char* LANG = secure_getenv("LANG");
256 if (LANG) {
257 r = pakfire_jail_set_env(jail, "LANG", LANG);
258 if (r)
259 return r;
260 }
261
262 return 0;
263 }
264
265 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail, struct pakfire* pakfire) {
266 int r;
267
268 const char* arch = pakfire_get_effective_arch(pakfire);
269
270 // Allocate a new jail
271 struct pakfire_jail* j = calloc(1, sizeof(*j));
272 if (!j)
273 return 1;
274
275 // Reference context
276 j->ctx = pakfire_ctx(pakfire);
277
278 // Reference Pakfire
279 j->pakfire = pakfire_ref(pakfire);
280
281 // Initialize reference counter
282 j->nrefs = 1;
283
284 // Generate a random UUID
285 uuid_generate_random(j->uuid);
286
287 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
288
289 // Set the default logging callback
290 pakfire_jail_set_log_callback(j, pakfire_jail_default_log_callback, NULL);
291
292 // Set default environment
293 for (const struct environ* e = ENV; e->key; e++) {
294 r = pakfire_jail_set_env(j, e->key, e->val);
295 if (r)
296 goto ERROR;
297 }
298
299 // Enable all CPU features that CPU has to offer
300 if (!pakfire_arch_is_supported_by_host(arch)) {
301 r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
302 if (r)
303 goto ERROR;
304 }
305
306 // Set container UUID
307 r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
308 if (r)
309 goto ERROR;
310
311 // Disable systemctl to talk to systemd
312 if (!pakfire_on_root(j->pakfire)) {
313 r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
314 if (r)
315 goto ERROR;
316 }
317
318 // Done
319 *jail = j;
320 return 0;
321
322 ERROR:
323 pakfire_jail_free(j);
324
325 return r;
326 }
327
328 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
329 ++jail->nrefs;
330
331 return jail;
332 }
333
334 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
335 if (--jail->nrefs > 0)
336 return jail;
337
338 pakfire_jail_free(jail);
339 return NULL;
340 }
341
342 // Logging Callback
343
344 PAKFIRE_EXPORT void pakfire_jail_set_log_callback(struct pakfire_jail* jail,
345 pakfire_jail_log_callback callback, void* data) {
346 jail->callbacks.log = callback;
347 jail->callbacks.log_data = data;
348 }
349
350 // Resource Limits
351
352 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
353 // Check if nice level is in range
354 if (nice < -19 || nice > 20) {
355 errno = EINVAL;
356 return 1;
357 }
358
359 // Store nice level
360 jail->nice = nice;
361
362 return 0;
363 }
364
365 int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
366 // Free any previous cgroup
367 if (jail->cgroup) {
368 pakfire_cgroup_unref(jail->cgroup);
369 jail->cgroup = NULL;
370 }
371
372 // Set any new cgroup
373 if (cgroup) {
374 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
375
376 jail->cgroup = pakfire_cgroup_ref(cgroup);
377 }
378
379 // Done
380 return 0;
381 }
382
383 // Environment
384
385 // Returns the length of the environment
386 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
387 unsigned int i = 0;
388
389 // Count everything in the environment
390 for (char** e = jail->env; *e; e++)
391 i++;
392
393 return i;
394 }
395
396 // Finds an existing environment variable and returns its index or -1 if not found
397 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
398 if (!key) {
399 errno = EINVAL;
400 return -1;
401 }
402
403 const size_t length = strlen(key);
404
405 for (unsigned int i = 0; jail->env[i]; i++) {
406 if ((pakfire_string_startswith(jail->env[i], key)
407 && *(jail->env[i] + length) == '=')) {
408 return i;
409 }
410 }
411
412 // Nothing found
413 return -1;
414 }
415
416 // Returns the value of an environment variable or NULL
417 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
418 const char* key) {
419 int i = pakfire_jail_find_env(jail, key);
420 if (i < 0)
421 return NULL;
422
423 return jail->env[i] + strlen(key) + 1;
424 }
425
426 // Sets an environment variable
427 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
428 const char* key, const char* value) {
429 // Find the index where to write this value to
430 int i = pakfire_jail_find_env(jail, key);
431 if (i < 0)
432 i = pakfire_jail_env_length(jail);
433
434 // Return -ENOSPC when the environment is full
435 if (i >= ENVIRON_SIZE) {
436 errno = ENOSPC;
437 return -1;
438 }
439
440 // Free any previous value
441 if (jail->env[i])
442 free(jail->env[i]);
443
444 // Format and set environment variable
445 asprintf(&jail->env[i], "%s=%s", key, value);
446
447 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
448
449 return 0;
450 }
451
452 // Imports an environment
453 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
454 if (!env)
455 return 0;
456
457 char* key;
458 char* val;
459 int r;
460
461 // Copy environment variables
462 for (unsigned int i = 0; env[i]; i++) {
463 r = pakfire_string_partition(env[i], "=", &key, &val);
464 if (r)
465 continue;
466
467 // Set value
468 r = pakfire_jail_set_env(jail, key, val);
469
470 if (key)
471 free(key);
472 if (val)
473 free(val);
474
475 // Break on error
476 if (r)
477 return r;
478 }
479
480 return 0;
481 }
482
483 // Timeout
484
485 PAKFIRE_EXPORT int pakfire_jail_set_timeout(
486 struct pakfire_jail* jail, unsigned int timeout) {
487 // Store value
488 jail->timeout.it_value.tv_sec = timeout;
489
490 if (timeout > 0)
491 DEBUG(jail->pakfire, "Timeout set to %u second(s)\n", timeout);
492 else
493 DEBUG(jail->pakfire, "Timeout disabled\n");
494
495 return 0;
496 }
497
498 static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
499 int r;
500
501 // Nothing to do if no timeout has been set
502 if (!jail->timeout.it_value.tv_sec)
503 return -1;
504
505 // Create a new timer
506 const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
507 if (fd < 0) {
508 ERROR(jail->pakfire, "Could not create timer: %m\n");
509 goto ERROR;
510 }
511
512 // Arm timer
513 r = timerfd_settime(fd, 0, &jail->timeout, NULL);
514 if (r) {
515 ERROR(jail->pakfire, "Could not arm timer: %m\n");
516 goto ERROR;
517 }
518
519 return fd;
520
521 ERROR:
522 if (fd >= 0)
523 close(fd);
524
525 return -1;
526 }
527
528 // Signals
529
530 static int pakfire_jail_handle_signals(struct pakfire_jail* jail) {
531 sigset_t mask;
532 int r;
533
534 sigemptyset(&mask);
535 sigaddset(&mask, SIGINT);
536
537 // Block signals
538 r = sigprocmask(SIG_BLOCK, &mask, NULL);
539 if (r < 0) {
540 ERROR(jail->pakfire, "Failed to block signals: %m\n");
541 return r;
542 }
543
544 // Create a file descriptor
545 r = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
546 if (r < 0) {
547 ERROR(jail->pakfire, "Failed to create signalfd: %m\n");
548 return r;
549 }
550
551 return r;
552 }
553
554 /*
555 This function replaces any logging in the child process.
556
557 All log messages will be sent to the parent process through their respective pipes.
558 */
559 static void pakfire_jail_log_redirect(void* data, int priority, const char* file,
560 int line, const char* fn, const char* format, va_list args) {
561 struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
562 int fd;
563
564 switch (priority) {
565 case LOG_INFO:
566 fd = pipes->log_INFO[1];
567 break;
568
569 case LOG_ERR:
570 fd = pipes->log_ERROR[1];
571 break;
572
573 #ifdef ENABLE_DEBUG
574 case LOG_DEBUG:
575 fd = pipes->log_DEBUG[1];
576 break;
577 #endif /* ENABLE_DEBUG */
578
579 // Ignore any messages of an unknown priority
580 default:
581 return;
582 }
583
584 // Send the log message
585 if (fd >= 0)
586 vdprintf(fd, format, args);
587 }
588
589 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
590 return (sizeof(buffer->data) == buffer->used);
591 }
592
593 /*
594 This function reads as much data as it can from the file descriptor.
595 If it finds a whole line in it, it will send it to the logger and repeat the process.
596 If not newline character is found, it will try to read more data until it finds one.
597 */
598 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
599 struct pakfire_jail_exec* ctx, int priority, int fd,
600 struct pakfire_log_buffer* buffer, pakfire_jail_communicate_out callback, void* data) {
601 char line[BUFFER_SIZE + 1];
602
603 // Fill up buffer from fd
604 if (buffer->used < sizeof(buffer->data)) {
605 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
606 sizeof(buffer->data) - buffer->used);
607
608 // Handle errors
609 if (bytes_read < 0) {
610 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
611 return -1;
612 }
613
614 // Update buffer size
615 buffer->used += bytes_read;
616 }
617
618 // See if we have any lines that we can write
619 while (buffer->used) {
620 // Search for the end of the first line
621 char* eol = memchr(buffer->data, '\n', buffer->used);
622
623 // No newline found
624 if (!eol) {
625 // If the buffer is full, we send the content to the logger and try again
626 // This should not happen in practise
627 if (pakfire_jail_log_buffer_is_full(buffer)) {
628 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
629
630 eol = buffer->data + sizeof(buffer->data) - 1;
631
632 // Otherwise we might have only read parts of the output
633 } else
634 break;
635 }
636
637 // Find the length of the string
638 size_t length = eol - buffer->data + 1;
639
640 // Copy the line into the buffer
641 memcpy(line, buffer->data, length);
642
643 // Terminate the string
644 line[length] = '\0';
645
646 // Log the line
647 if (callback) {
648 int r = callback(jail->pakfire, data, priority, line, length);
649 if (r) {
650 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
651 return r;
652 }
653 }
654
655 // Remove line from buffer
656 memmove(buffer->data, buffer->data + length, buffer->used - length);
657 buffer->used -= length;
658 }
659
660 return 0;
661 }
662
663 static int pakfire_jail_stream_stdin(struct pakfire_jail* jail,
664 struct pakfire_jail_exec* ctx, const int fd) {
665 int r;
666
667 // Nothing to do if there is no stdin callback set
668 if (!ctx->communicate.in) {
669 DEBUG(jail->pakfire, "Callback for standard input is not set\n");
670 return 0;
671 }
672
673 // Skip if the writing pipe has already been closed
674 if (!ctx->pipes.stdin[1])
675 return 0;
676
677 DEBUG(jail->pakfire, "Streaming standard input...\n");
678
679 // Calling the callback
680 r = ctx->communicate.in(jail->pakfire, ctx->communicate.data, fd);
681
682 DEBUG(jail->pakfire, "Standard input callback finished: %d\n", r);
683
684 // The callback signaled that it has written everything
685 if (r == EOF) {
686 DEBUG(jail->pakfire, "Closing standard input pipe\n");
687
688 // Close the file-descriptor
689 close(fd);
690
691 // Reset the file-descriptor so it won't be closed again later
692 ctx->pipes.stdin[1] = -1;
693
694 // Report success
695 r = 0;
696 }
697
698 return r;
699 }
700
701 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
702 int r = pipe2(*fds, flags);
703 if (r < 0) {
704 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
705 return 1;
706 }
707
708 return 0;
709 }
710
711 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
712 for (unsigned int i = 0; i < 2; i++)
713 if (fds[i] >= 0)
714 close(fds[i]);
715 }
716
717 /*
718 This is a convenience function to fetch the reading end of a pipe and
719 closes the write end.
720 */
721 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail* jail, int (*fds)[2]) {
722 // Give the variables easier names to avoid confusion
723 int* fd_read = &(*fds)[0];
724 int* fd_write = &(*fds)[1];
725
726 // Close the write end of the pipe
727 if (*fd_write >= 0) {
728 close(*fd_write);
729 *fd_write = -1;
730 }
731
732 // Return the read end
733 if (*fd_read >= 0)
734 return *fd_read;
735
736 return -1;
737 }
738
739 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
740 // Give the variables easier names to avoid confusion
741 int* fd_read = &(*fds)[0];
742 int* fd_write = &(*fds)[1];
743
744 // Close the read end of the pipe
745 if (*fd_read >= 0) {
746 close(*fd_read);
747 *fd_read = -1;
748 }
749
750 // Return the write end
751 if (*fd_write >= 0)
752 return *fd_write;
753
754 return -1;
755 }
756
757 static int pakfire_jail_log(struct pakfire* pakfire, void* data, int priority,
758 const char* line, const size_t length) {
759 // Pass everything to the parent logger
760 pakfire_log_condition(pakfire, priority, 0, "%.*s", (int)length, line);
761
762 return 0;
763 }
764
765 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
766 int epollfd = -1;
767 struct epoll_event ev;
768 struct epoll_event events[EPOLL_MAX_EVENTS];
769 struct signalfd_siginfo siginfo;
770 char garbage[8];
771 int r = 0;
772
773 // Fetch file descriptors from context
774 const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
775 const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
776 const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
777 const int pidfd = ctx->pidfd;
778
779 // Timer
780 const int timerfd = pakfire_jail_create_timer(jail);
781
782 // Logging
783 const int log_INFO = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
784 const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
785 const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
786
787 // Signals
788 const int signalfd = pakfire_jail_handle_signals(jail);
789
790 // Make a list of all file descriptors we are interested in
791 const int fds[] = {
792 stdin, stdout, stderr, pidfd, timerfd, signalfd, log_INFO, log_ERROR, log_DEBUG,
793 };
794
795 // Setup epoll
796 epollfd = epoll_create1(0);
797 if (epollfd < 0) {
798 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
799 r = 1;
800 goto ERROR;
801 }
802
803 // Turn file descriptors into non-blocking mode and add them to epoll()
804 for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
805 int fd = fds[i];
806
807 // Skip fds which were not initialized
808 if (fd < 0)
809 continue;
810
811 ev.events = EPOLLHUP;
812
813 if (fd == stdin)
814 ev.events |= EPOLLOUT;
815 else
816 ev.events |= EPOLLIN;
817
818 // Read flags
819 int flags = fcntl(fd, F_GETFL, 0);
820
821 // Set modified flags
822 if (fcntl(fd, F_SETFL, flags|O_NONBLOCK) < 0) {
823 ERROR(jail->pakfire,
824 "Could not set file descriptor %d into non-blocking mode: %m\n", fd);
825 r = 1;
826 goto ERROR;
827 }
828
829 ev.data.fd = fd;
830
831 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
832 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
833 r = 1;
834 goto ERROR;
835 }
836 }
837
838 int ended = 0;
839
840 // Loop for as long as the process is alive
841 while (!ended) {
842 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
843 if (num < 1) {
844 // Ignore if epoll_wait() has been interrupted
845 if (errno == EINTR)
846 continue;
847
848 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
849 r = 1;
850
851 goto ERROR;
852 }
853
854 for (int i = 0; i < num; i++) {
855 int e = events[i].events;
856 int fd = events[i].data.fd;
857
858 struct pakfire_log_buffer* buffer = NULL;
859 pakfire_jail_communicate_out callback = NULL;
860 void* data = NULL;
861 int priority;
862
863 // Check if there is any data to be read
864 if (e & EPOLLIN) {
865 // Handle any changes to the PIDFD
866 if (fd == pidfd) {
867 // Call waidid() and store the result
868 r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
869 if (r) {
870 ERROR(jail->pakfire, "waitid() failed: %m\n");
871 goto ERROR;
872 }
873
874 // Mark that we have ended so that we will process the remaining
875 // events from epoll() now, but won't restart the outer loop.
876 ended = 1;
877 continue;
878
879 // Handle timer events
880 } else if (fd == timerfd) {
881 DEBUG(jail->pakfire, "Timer event received\n");
882
883 // Disarm the timer
884 r = read(timerfd, garbage, sizeof(garbage));
885 if (r < 1) {
886 ERROR(jail->pakfire, "Could not disarm timer: %m\n");
887 r = 1;
888 goto ERROR;
889 }
890
891 // Terminate the process if it hasn't already ended
892 if (!ended) {
893 DEBUG(jail->pakfire, "Terminating process...\n");
894
895 // Send SIGTERM to the process
896 r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
897 if (r) {
898 ERROR(jail->pakfire, "Could not kill process: %m\n");
899 goto ERROR;
900 }
901 }
902
903 // There is nothing else to do
904 continue;
905
906 // Handle signals
907 } else if (fd == signalfd) {
908 // Read the signal
909 r = read(signalfd, &siginfo, sizeof(siginfo));
910 if (r < 1) {
911 ERROR(jail->pakfire, "Could not read signal: %m\n");
912 goto ERROR;
913 }
914
915 DEBUG(jail->pakfire, "Received signal %u\n", siginfo.ssi_signo);
916
917 // Handle signals
918 switch (siginfo.ssi_signo) {
919 // Pass SIGINT down to the child process
920 case SIGINT:
921 r = pidfd_send_signal(pidfd, siginfo.ssi_signo, NULL, 0);
922 if (r) {
923 ERROR(jail->pakfire, "Could not send signal to process: %m\n");
924 goto ERROR;
925 }
926 break;
927
928 default:
929 ERROR(jail->pakfire, "Received unhandled signal %u\n",
930 siginfo.ssi_signo);
931 break;
932 }
933
934 // Don't fall through to log processing
935 continue;
936
937 // Handle logging messages
938 } else if (fd == log_INFO) {
939 buffer = &ctx->buffers.log_INFO;
940 priority = LOG_INFO;
941
942 callback = pakfire_jail_log;
943
944 } else if (fd == log_ERROR) {
945 buffer = &ctx->buffers.log_ERROR;
946 priority = LOG_ERR;
947
948 callback = pakfire_jail_log;
949
950 } else if (fd == log_DEBUG) {
951 buffer = &ctx->buffers.log_DEBUG;
952 priority = LOG_DEBUG;
953
954 callback = pakfire_jail_log;
955
956 // Handle anything from the log pipes
957 } else if (fd == stdout) {
958 buffer = &ctx->buffers.stdout;
959 priority = LOG_INFO;
960
961 // Send any output to the default logger if no callback is set
962 if (ctx->communicate.out) {
963 callback = ctx->communicate.out;
964 data = ctx->communicate.data;
965 } else {
966 callback = jail->callbacks.log;
967 data = jail->callbacks.log_data;
968 }
969
970 } else if (fd == stderr) {
971 buffer = &ctx->buffers.stderr;
972 priority = LOG_ERR;
973
974 // Send any output to the default logger if no callback is set
975 if (ctx->communicate.out) {
976 callback = ctx->communicate.out;
977 data = ctx->communicate.data;
978 } else {
979 callback = jail->callbacks.log;
980 data = jail->callbacks.log_data;
981 }
982
983 } else {
984 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
985 continue;
986 }
987
988 // Handle log event
989 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
990 if (r)
991 goto ERROR;
992 }
993
994 if (e & EPOLLOUT) {
995 // Handle standard input
996 if (fd == stdin) {
997 r = pakfire_jail_stream_stdin(jail, ctx, fd);
998 if (r) {
999 switch (errno) {
1000 // Ignore if we filled up the buffer
1001 case EAGAIN:
1002 break;
1003
1004 default:
1005 ERROR(jail->pakfire, "Could not write to stdin: %m\n");
1006 goto ERROR;
1007 }
1008 }
1009 }
1010 }
1011
1012 // Check if any file descriptors have been closed
1013 if (e & EPOLLHUP) {
1014 // Remove the file descriptor
1015 r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
1016 if (r) {
1017 ERROR(jail->pakfire, "Could not remove closed file-descriptor %d: %m\n", fd);
1018 goto ERROR;
1019 }
1020 }
1021 }
1022 }
1023
1024 ERROR:
1025 if (epollfd >= 0)
1026 close(epollfd);
1027 if (timerfd >= 0)
1028 close(timerfd);
1029 if (signalfd >= 0)
1030 close(signalfd);
1031
1032 return r;
1033 }
1034
1035 int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data,
1036 int priority, const char* line, size_t length) {
1037 char** output = (char**)data;
1038 int r;
1039
1040 // Append everything from stdout to a buffer
1041 if (output && priority == LOG_INFO) {
1042 r = asprintf(output, "%s%s", (output && *output) ? *output : "", line);
1043 if (r < 0)
1044 return 1;
1045 return 0;
1046 }
1047
1048 // Send everything else to the default logger
1049 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
1050 }
1051
1052 // Capabilities
1053
1054 // Logs all capabilities of the current process
1055 static int pakfire_jail_show_capabilities(struct pakfire_jail* jail) {
1056 cap_t caps = NULL;
1057 char* name = NULL;
1058 cap_flag_value_t value_e;
1059 cap_flag_value_t value_i;
1060 cap_flag_value_t value_p;
1061 int r;
1062
1063 // Fetch PID
1064 pid_t pid = getpid();
1065
1066 // Fetch all capabilities
1067 caps = cap_get_proc();
1068 if (!caps) {
1069 ERROR(jail->pakfire, "Could not fetch capabilities: %m\n");
1070 r = 1;
1071 goto ERROR;
1072 }
1073
1074 DEBUG(jail->pakfire, "Capabilities of PID %d:\n", pid);
1075
1076 // Iterate over all capabilities
1077 for (unsigned int cap = 0; cap_valid(cap); cap++) {
1078 name = cap_to_name(cap);
1079
1080 // Fetch effective value
1081 r = cap_get_flag(caps, cap, CAP_EFFECTIVE, &value_e);
1082 if (r)
1083 goto ERROR;
1084
1085 // Fetch inheritable value
1086 r = cap_get_flag(caps, cap, CAP_INHERITABLE, &value_i);
1087 if (r)
1088 goto ERROR;
1089
1090 // Fetch permitted value
1091 r = cap_get_flag(caps, cap, CAP_PERMITTED, &value_p);
1092 if (r)
1093 goto ERROR;
1094
1095 DEBUG(jail->pakfire,
1096 " %-24s : %c%c%c\n",
1097 name,
1098 (value_e == CAP_SET) ? 'e' : '-',
1099 (value_i == CAP_SET) ? 'i' : '-',
1100 (value_p == CAP_SET) ? 'p' : '-'
1101 );
1102
1103 // Free name
1104 cap_free(name);
1105 name = NULL;
1106 }
1107
1108 // Success
1109 r = 0;
1110
1111 ERROR:
1112 if (name)
1113 cap_free(name);
1114 if (caps)
1115 cap_free(caps);
1116
1117 return r;
1118 }
1119
1120 static int pakfire_jail_set_capabilities(struct pakfire_jail* jail) {
1121 cap_t caps = NULL;
1122 char* name = NULL;
1123 int r;
1124
1125 // Fetch capabilities
1126 caps = cap_get_proc();
1127 if (!caps) {
1128 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
1129 r = 1;
1130 goto ERROR;
1131 }
1132
1133 // Walk through all capabilities
1134 for (cap_value_t cap = 0; cap_valid(cap); cap++) {
1135 cap_value_t _caps[] = { cap };
1136
1137 // Fetch the name of the capability
1138 name = cap_to_name(cap);
1139
1140 r = cap_set_flag(caps, CAP_EFFECTIVE, 1, _caps, CAP_SET);
1141 if (r) {
1142 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1143 goto ERROR;
1144 }
1145
1146 r = cap_set_flag(caps, CAP_INHERITABLE, 1, _caps, CAP_SET);
1147 if (r) {
1148 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1149 goto ERROR;
1150 }
1151
1152 r = cap_set_flag(caps, CAP_PERMITTED, 1, _caps, CAP_SET);
1153 if (r) {
1154 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1155 goto ERROR;
1156 }
1157
1158 // Free name
1159 cap_free(name);
1160 name = NULL;
1161 }
1162
1163 // Restore all capabilities
1164 r = cap_set_proc(caps);
1165 if (r) {
1166 ERROR(jail->pakfire, "Restoring capabilities failed: %m\n");
1167 goto ERROR;
1168 }
1169
1170 // Add all capabilities to the ambient set
1171 for (unsigned int cap = 0; cap_valid(cap); cap++) {
1172 name = cap_to_name(cap);
1173
1174 // Raise the capability
1175 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0);
1176 if (r) {
1177 ERROR(jail->pakfire, "Could not set ambient capability %s: %m\n", name);
1178 goto ERROR;
1179 }
1180
1181 // Free name
1182 cap_free(name);
1183 name = NULL;
1184 }
1185
1186 // Success
1187 r = 0;
1188
1189 ERROR:
1190 if (name)
1191 cap_free(name);
1192 if (caps)
1193 cap_free(caps);
1194
1195 return r;
1196 }
1197
1198 // Syscall Filter
1199
1200 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
1201 const int syscalls[] = {
1202 // The kernel's keyring isn't namespaced
1203 SCMP_SYS(keyctl),
1204 SCMP_SYS(add_key),
1205 SCMP_SYS(request_key),
1206
1207 // Disable userfaultfd
1208 SCMP_SYS(userfaultfd),
1209
1210 // Disable perf which could leak a lot of information about the host
1211 SCMP_SYS(perf_event_open),
1212
1213 0,
1214 };
1215 int r = 1;
1216
1217 DEBUG(jail->pakfire, "Applying syscall filter...\n");
1218
1219 // Setup a syscall filter which allows everything by default
1220 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
1221 if (!ctx) {
1222 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
1223 goto ERROR;
1224 }
1225
1226 // All all syscalls
1227 for (const int* syscall = syscalls; *syscall; syscall++) {
1228 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
1229 if (r) {
1230 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
1231 goto ERROR;
1232 }
1233 }
1234
1235 // Load syscall filter into the kernel
1236 r = seccomp_load(ctx);
1237 if (r) {
1238 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
1239 goto ERROR;
1240 }
1241
1242 ERROR:
1243 if (ctx)
1244 seccomp_release(ctx);
1245
1246 return r;
1247 }
1248
1249 // Mountpoints
1250
1251 PAKFIRE_EXPORT int pakfire_jail_bind(struct pakfire_jail* jail,
1252 const char* source, const char* target, int flags) {
1253 struct pakfire_jail_mountpoint* mp = NULL;
1254 int r;
1255
1256 // Check if there is any space left
1257 if (jail->num_mountpoints >= MAX_MOUNTPOINTS) {
1258 errno = ENOSPC;
1259 return 1;
1260 }
1261
1262 // Check for valid inputs
1263 if (!source || !target) {
1264 errno = EINVAL;
1265 return 1;
1266 }
1267
1268 // Select the next free slot
1269 mp = &jail->mountpoints[jail->num_mountpoints];
1270
1271 // Copy source
1272 r = pakfire_string_set(mp->source, source);
1273 if (r) {
1274 ERROR(jail->pakfire, "Could not copy source: %m\n");
1275 return r;
1276 }
1277
1278 // Copy target
1279 r = pakfire_string_set(mp->target, target);
1280 if (r) {
1281 ERROR(jail->pakfire, "Could not copy target: %m\n");
1282 return r;
1283 }
1284
1285 // Copy flags
1286 mp->flags = flags;
1287
1288 // Increment counter
1289 jail->num_mountpoints++;
1290
1291 return 0;
1292 }
1293
1294 static int pakfire_jail_mount_networking(struct pakfire_jail* jail) {
1295 int r;
1296
1297 const char* paths[] = {
1298 "/etc/hosts",
1299 "/etc/resolv.conf",
1300 NULL,
1301 };
1302
1303 // Bind-mount all paths read-only
1304 for (const char** path = paths; *path; path++) {
1305 r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
1306 if (r) {
1307 switch (errno) {
1308 // Ignore if we don't have permission
1309 case EPERM:
1310 continue;
1311
1312 default:
1313 break;
1314 }
1315 return r;
1316 }
1317 }
1318
1319 return 0;
1320 }
1321
1322 /*
1323 Mounts everything that we require in the new namespace
1324 */
1325 static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1326 struct pakfire_jail_mountpoint* mp = NULL;
1327 int flags = 0;
1328 int r;
1329
1330 // Enable loop devices
1331 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_LOOP_DEVICES))
1332 flags |= PAKFIRE_MOUNT_LOOP_DEVICES;
1333
1334 // Mount all default stuff
1335 r = pakfire_mount_all(jail->pakfire, flags);
1336 if (r)
1337 return r;
1338
1339 // Mount networking stuff
1340 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1341 r = pakfire_jail_mount_networking(jail);
1342 if (r)
1343 return r;
1344 }
1345
1346 // Mount all custom stuff
1347 for (unsigned int i = 0; i < jail->num_mountpoints; i++) {
1348 // Fetch mountpoint
1349 mp = &jail->mountpoints[i];
1350
1351 // Mount it
1352 r = pakfire_bind(jail->pakfire, mp->source, mp->target, mp->flags);
1353 if (r)
1354 return r;
1355 }
1356
1357 // Log all mountpoints
1358 pakfire_mount_list(jail->pakfire);
1359
1360 return 0;
1361 }
1362
1363 // Networking
1364
1365 static int pakfire_jail_setup_loopback(struct pakfire_jail* jail) {
1366 struct nl_sock* nl = NULL;
1367 struct nl_cache* cache = NULL;
1368 struct rtnl_link* link = NULL;
1369 struct rtnl_link* change = NULL;
1370 int r;
1371
1372 DEBUG(jail->pakfire, "Setting up loopback...\n");
1373
1374 // Allocate a netlink socket
1375 nl = nl_socket_alloc();
1376 if (!nl) {
1377 ERROR(jail->pakfire, "Could not allocate a netlink socket: %m\n");
1378 r = 1;
1379 goto ERROR;
1380 }
1381
1382 // Connect the socket
1383 r = nl_connect(nl, NETLINK_ROUTE);
1384 if (r) {
1385 ERROR(jail->pakfire, "Could not connect netlink socket: %s\n", nl_geterror(r));
1386 goto ERROR;
1387 }
1388
1389 // Allocate the netlink cache
1390 r = rtnl_link_alloc_cache(nl, AF_UNSPEC, &cache);
1391 if (r < 0) {
1392 ERROR(jail->pakfire, "Unable to allocate netlink cache: %s\n", nl_geterror(r));
1393 goto ERROR;
1394 }
1395
1396 // Fetch loopback interface
1397 link = rtnl_link_get_by_name(cache, "lo");
1398 if (!link) {
1399 ERROR(jail->pakfire, "Could not find lo interface. Ignoring.\n");
1400 r = 0;
1401 goto ERROR;
1402 }
1403
1404 // Allocate a new link
1405 change = rtnl_link_alloc();
1406 if (!change) {
1407 ERROR(jail->pakfire, "Could not allocate change link\n");
1408 r = 1;
1409 goto ERROR;
1410 }
1411
1412 // Set the link to UP
1413 rtnl_link_set_flags(change, IFF_UP);
1414
1415 // Apply any changes
1416 r = rtnl_link_change(nl, link, change, 0);
1417 if (r) {
1418 ERROR(jail->pakfire, "Unable to activate loopback: %s\n", nl_geterror(r));
1419 goto ERROR;
1420 }
1421
1422 // Success
1423 r = 0;
1424
1425 ERROR:
1426 if (nl)
1427 nl_socket_free(nl);
1428
1429 return r;
1430 }
1431
1432 // UID/GID Mapping
1433
1434 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
1435 char path[PATH_MAX];
1436 int r;
1437
1438 // Skip mapping anything when running on /
1439 if (pakfire_on_root(jail->pakfire))
1440 return 0;
1441
1442 // Make path
1443 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
1444 if (r)
1445 return r;
1446
1447 // Fetch UID
1448 const uid_t uid = pakfire_uid(jail->pakfire);
1449
1450 // Fetch SUBUID
1451 const struct pakfire_subid* subuid = pakfire_subuid(jail->pakfire);
1452 if (!subuid)
1453 return 1;
1454
1455 /* When running as root, we will map the entire range.
1456
1457 When running as a non-privileged user, we will map the root user inside the jail
1458 to the user's UID outside of the jail, and we will map the rest starting from one.
1459 */
1460
1461 // Running as root
1462 if (uid == 0) {
1463 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1464 "0 %lu %lu\n", subuid->id, subuid->length);
1465 } else {
1466 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1467 "0 %lu 1\n1 %lu %lu\n", uid, subuid->id, subuid->length);
1468 }
1469
1470 if (r) {
1471 ERROR(jail->pakfire, "Could not map UIDs: %m\n");
1472 return r;
1473 }
1474
1475 return r;
1476 }
1477
1478 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
1479 char path[PATH_MAX];
1480 int r;
1481
1482 // Skip mapping anything when running on /
1483 if (pakfire_on_root(jail->pakfire))
1484 return 0;
1485
1486 // Fetch GID
1487 const gid_t gid = pakfire_gid(jail->pakfire);
1488
1489 // Fetch SUBGID
1490 const struct pakfire_subid* subgid = pakfire_subgid(jail->pakfire);
1491 if (!subgid)
1492 return 1;
1493
1494 // Make path
1495 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
1496 if (r)
1497 return r;
1498
1499 // Running as root
1500 if (gid == 0) {
1501 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1502 "0 %lu %lu\n", subgid->id, subgid->length);
1503 } else {
1504 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1505 "0 %lu 1\n%1 %lu %lu\n", gid, subgid->id, subgid->length);
1506 }
1507
1508 if (r) {
1509 ERROR(jail->pakfire, "Could not map GIDs: %m\n");
1510 return r;
1511 }
1512
1513 return r;
1514 }
1515
1516 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
1517 char path[PATH_MAX];
1518 int r = 1;
1519
1520 // Make path
1521 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
1522 if (r)
1523 return r;
1524
1525 // Open file for writing
1526 FILE* f = fopen(path, "w");
1527 if (!f) {
1528 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
1529 goto ERROR;
1530 }
1531
1532 // Write content
1533 int bytes_written = fprintf(f, "deny\n");
1534 if (bytes_written <= 0) {
1535 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
1536 goto ERROR;
1537 }
1538
1539 r = fclose(f);
1540 f = NULL;
1541 if (r) {
1542 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
1543 goto ERROR;
1544 }
1545
1546 ERROR:
1547 if (f)
1548 fclose(f);
1549
1550 return r;
1551 }
1552
1553 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1554 const uint64_t val = 1;
1555 int r = 0;
1556
1557 DEBUG(jail->pakfire, "Sending signal...\n");
1558
1559 // Write to the file descriptor
1560 ssize_t bytes_written = write(fd, &val, sizeof(val));
1561 if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
1562 ERROR(jail->pakfire, "Could not send signal: %m\n");
1563 r = 1;
1564 }
1565
1566 // Close the file descriptor
1567 close(fd);
1568
1569 return r;
1570 }
1571
1572 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1573 uint64_t val = 0;
1574 int r = 0;
1575
1576 DEBUG(jail->pakfire, "Waiting for signal...\n");
1577
1578 ssize_t bytes_read = read(fd, &val, sizeof(val));
1579 if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
1580 ERROR(jail->pakfire, "Error waiting for signal: %m\n");
1581 r = 1;
1582 }
1583
1584 // Close the file descriptor
1585 close(fd);
1586
1587 return r;
1588 }
1589
1590 /*
1591 Performs the initialisation that needs to happen in the parent part
1592 */
1593 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1594 int r;
1595
1596 // Setup UID mapping
1597 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
1598 if (r)
1599 return r;
1600
1601 // Write "deny" to /proc/PID/setgroups
1602 r = pakfire_jail_setgroups(jail, ctx->pid);
1603 if (r)
1604 return r;
1605
1606 // Setup GID mapping
1607 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
1608 if (r)
1609 return r;
1610
1611 // Parent has finished initialisation
1612 DEBUG(jail->pakfire, "Parent has finished initialization\n");
1613
1614 // Send signal to client
1615 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1616 if (r)
1617 return r;
1618
1619 return 0;
1620 }
1621
1622 static int pakfire_jail_switch_root(struct pakfire_jail* jail, const char* root) {
1623 int r;
1624
1625 // Change to the new root
1626 r = chdir(root);
1627 if (r) {
1628 ERROR(jail->pakfire, "chdir(%s) failed: %m\n", root);
1629 return r;
1630 }
1631
1632 // Switch Root!
1633 r = pivot_root(".", ".");
1634 if (r) {
1635 ERROR(jail->pakfire, "Failed changing into the new root directory %s: %m\n", root);
1636 return r;
1637 }
1638
1639 // Umount the old root
1640 r = umount2(".", MNT_DETACH);
1641 if (r) {
1642 ERROR(jail->pakfire, "Could not umount the old root filesystem: %m\n");
1643 return r;
1644 }
1645
1646 return 0;
1647 }
1648
1649 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
1650 const char* argv[]) {
1651 int r;
1652
1653 // Redirect any logging to our log pipe
1654 pakfire_ctx_set_log_callback(jail->ctx, pakfire_jail_log_redirect, &ctx->pipes);
1655
1656 // Fetch my own PID
1657 pid_t pid = getpid();
1658
1659 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
1660
1661 // Wait for the parent to finish initialization
1662 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1663 if (r)
1664 return r;
1665
1666 // Die with parent
1667 r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
1668 if (r) {
1669 ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
1670 return 126;
1671 }
1672
1673 // Make this process dumpable
1674 r = prctl (PR_SET_DUMPABLE, 1, 0, 0, 0);
1675 if (r) {
1676 ERROR(jail->pakfire, "Could not make the process dumpable: %m\n");
1677 return 126;
1678 }
1679
1680 // Don't drop any capabilities on setuid()
1681 r = prctl(PR_SET_KEEPCAPS, 1);
1682 if (r) {
1683 ERROR(jail->pakfire, "Could not set PR_SET_KEEPCAPS: %m\n");
1684 return 126;
1685 }
1686
1687 // Fetch UID/GID
1688 uid_t uid = getuid();
1689 gid_t gid = getgid();
1690
1691 // Fetch EUID/EGID
1692 uid_t euid = geteuid();
1693 gid_t egid = getegid();
1694
1695 DEBUG(jail->pakfire, " UID: %u (effective %u)\n", uid, euid);
1696 DEBUG(jail->pakfire, " GID: %u (effective %u)\n", gid, egid);
1697
1698 // Check if we are (effectively running as root)
1699 if (uid || gid || euid || egid) {
1700 ERROR(jail->pakfire, "Child process is not running as root\n");
1701 return 126;
1702 }
1703
1704 const char* root = pakfire_get_path(jail->pakfire);
1705 const char* arch = pakfire_get_effective_arch(jail->pakfire);
1706
1707 // Change mount propagation to slave to receive anything from the parent namespace
1708 r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SLAVE);
1709 if (r)
1710 return r;
1711
1712 // Make root a mountpoint in the new mount namespace
1713 r = pakfire_mount_make_mounpoint(jail->pakfire, root);
1714 if (r)
1715 return r;
1716
1717 // Change mount propagation to private
1718 r = pakfire_mount_change_propagation(jail->ctx, root, MS_PRIVATE);
1719 if (r)
1720 return r;
1721
1722 // Change root (unless root is /)
1723 if (!pakfire_on_root(jail->pakfire)) {
1724 // Mount everything
1725 r = pakfire_jail_mount(jail, ctx);
1726 if (r)
1727 return r;
1728
1729 // chroot()
1730 r = pakfire_jail_switch_root(jail, root);
1731 if (r)
1732 return r;
1733 }
1734
1735 // Set personality
1736 unsigned long persona = pakfire_arch_personality(arch);
1737 if (persona) {
1738 r = personality(persona);
1739 if (r < 0) {
1740 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1741 return 1;
1742 }
1743 }
1744
1745 // Setup networking
1746 if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1747 r = pakfire_jail_setup_loopback(jail);
1748 if (r)
1749 return 1;
1750 }
1751
1752 // Set nice level
1753 if (jail->nice) {
1754 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1755
1756 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1757 if (r) {
1758 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1759 return 1;
1760 }
1761 }
1762
1763 // Close other end of log pipes
1764 close(ctx->pipes.log_INFO[0]);
1765 close(ctx->pipes.log_ERROR[0]);
1766 #ifdef ENABLE_DEBUG
1767 close(ctx->pipes.log_DEBUG[0]);
1768 #endif /* ENABLE_DEBUG */
1769
1770 // Connect standard input
1771 if (ctx->pipes.stdin[0] >= 0) {
1772 r = dup2(ctx->pipes.stdin[0], STDIN_FILENO);
1773 if (r < 0) {
1774 ERROR(jail->pakfire, "Could not connect fd %d to stdin: %m\n",
1775 ctx->pipes.stdin[0]);
1776
1777 return 1;
1778 }
1779 }
1780
1781 // Connect standard output and error
1782 if (ctx->pipes.stdout[1] >= 0 && ctx->pipes.stderr[1] >= 0) {
1783 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1784 if (r < 0) {
1785 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1786 ctx->pipes.stdout[1]);
1787
1788 return 1;
1789 }
1790
1791 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1792 if (r < 0) {
1793 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1794 ctx->pipes.stderr[1]);
1795
1796 return 1;
1797 }
1798
1799 // Close the pipe (as we have moved the original file descriptors)
1800 pakfire_jail_close_pipe(jail, ctx->pipes.stdin);
1801 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1802 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
1803 }
1804
1805 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1806 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1807 if (r)
1808 return r;
1809
1810 // Set capabilities
1811 r = pakfire_jail_set_capabilities(jail);
1812 if (r)
1813 return r;
1814
1815 // Show capabilities
1816 r = pakfire_jail_show_capabilities(jail);
1817 if (r)
1818 return r;
1819
1820 // Filter syscalls
1821 r = pakfire_jail_limit_syscalls(jail);
1822 if (r)
1823 return r;
1824
1825 DEBUG(jail->pakfire, "Child process initialization done\n");
1826 DEBUG(jail->pakfire, "Launching command:\n");
1827
1828 // Log argv
1829 for (unsigned int i = 0; argv[i]; i++)
1830 DEBUG(jail->pakfire, " argv[%u] = %s\n", i, argv[i]);
1831
1832 // exec() command
1833 r = execvpe(argv[0], (char**)argv, jail->env);
1834 if (r < 0) {
1835 // Translate errno into regular exit code
1836 switch (errno) {
1837 case ENOENT:
1838 // Ignore if the command doesn't exist
1839 if (ctx->flags & PAKFIRE_JAIL_NOENT_OK)
1840 r = 0;
1841 else
1842 r = 127;
1843
1844 break;
1845
1846 default:
1847 r = 1;
1848 }
1849
1850 ERROR(jail->pakfire, "Could not execve(%s): %m\n", argv[0]);
1851 }
1852
1853 // We should not get here
1854 return r;
1855 }
1856
1857 // Run a command in the jail
1858 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
1859 const int interactive,
1860 pakfire_jail_communicate_in communicate_in,
1861 pakfire_jail_communicate_out communicate_out,
1862 void* data, int flags) {
1863 int exit = -1;
1864 int r;
1865
1866 // Check if argv is valid
1867 if (!argv || !argv[0]) {
1868 errno = EINVAL;
1869 return -1;
1870 }
1871
1872 // Initialize context for this call
1873 struct pakfire_jail_exec ctx = {
1874 .flags = flags,
1875
1876 .pipes = {
1877 .stdin = { -1, -1 },
1878 .stdout = { -1, -1 },
1879 .stderr = { -1, -1 },
1880 .log_INFO = { -1, -1 },
1881 .log_ERROR = { -1, -1 },
1882 .log_DEBUG = { -1, -1 },
1883 },
1884
1885 .communicate = {
1886 .in = communicate_in,
1887 .out = communicate_out,
1888 .data = data,
1889 },
1890
1891 .pidfd = -1,
1892 };
1893
1894 DEBUG(jail->pakfire, "Executing jail...\n");
1895
1896 // Enable networking in interactive mode
1897 if (interactive)
1898 ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
1899
1900 /*
1901 Setup a file descriptor which can be used to notify the client that the parent
1902 has completed configuration.
1903 */
1904 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1905 if (ctx.completed_fd < 0) {
1906 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1907 return -1;
1908 }
1909
1910 // Create pipes to communicate with child process if we are not running interactively
1911 if (!interactive) {
1912 // stdin (only if callback is set)
1913 if (ctx.communicate.in) {
1914 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdin, 0);
1915 if (r)
1916 goto ERROR;
1917 }
1918
1919 // stdout
1920 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
1921 if (r)
1922 goto ERROR;
1923
1924 // stderr
1925 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
1926 if (r)
1927 goto ERROR;
1928 }
1929
1930 // Setup pipes for logging
1931 // INFO
1932 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
1933 if (r)
1934 goto ERROR;
1935
1936 // ERROR
1937 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
1938 if (r)
1939 goto ERROR;
1940
1941 #ifdef ENABLE_DEBUG
1942 // DEBUG
1943 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
1944 if (r)
1945 goto ERROR;
1946 #endif /* ENABLE_DEBUG */
1947
1948 // Configure child process
1949 struct clone_args args = {
1950 .flags =
1951 CLONE_NEWCGROUP |
1952 CLONE_NEWIPC |
1953 CLONE_NEWNS |
1954 CLONE_NEWPID |
1955 CLONE_NEWTIME |
1956 CLONE_NEWUSER |
1957 CLONE_NEWUTS |
1958 CLONE_PIDFD,
1959 .exit_signal = SIGCHLD,
1960 .pidfd = (long long unsigned int)&ctx.pidfd,
1961 };
1962
1963 // Launch the process in a cgroup that is a leaf of the configured cgroup
1964 if (jail->cgroup) {
1965 args.flags |= CLONE_INTO_CGROUP;
1966
1967 // Fetch our UUID
1968 const char* uuid = pakfire_jail_uuid(jail);
1969
1970 // Create a temporary cgroup
1971 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, uuid, 0);
1972 if (r) {
1973 ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
1974 goto ERROR;
1975 }
1976
1977 // Clone into this cgroup
1978 args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
1979 }
1980
1981 // Setup networking
1982 if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1983 args.flags |= CLONE_NEWNET;
1984 }
1985
1986 // Fork this process
1987 ctx.pid = clone3(&args, sizeof(args));
1988 if (ctx.pid < 0) {
1989 ERROR(jail->pakfire, "Could not clone: %m\n");
1990 return -1;
1991
1992 // Child process
1993 } else if (ctx.pid == 0) {
1994 r = pakfire_jail_child(jail, &ctx, argv);
1995 _exit(r);
1996 }
1997
1998 // Parent process
1999 r = pakfire_jail_parent(jail, &ctx);
2000 if (r)
2001 goto ERROR;
2002
2003 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
2004
2005 // Read output of the child process
2006 r = pakfire_jail_wait(jail, &ctx);
2007 if (r)
2008 goto ERROR;
2009
2010 // Handle exit status
2011 switch (ctx.status.si_code) {
2012 case CLD_EXITED:
2013 DEBUG(jail->pakfire, "The child process exited with code %d\n",
2014 ctx.status.si_status);
2015
2016 // Pass exit code
2017 exit = ctx.status.si_status;
2018 break;
2019
2020 case CLD_KILLED:
2021 ERROR(jail->pakfire, "The child process was killed\n");
2022 exit = 139;
2023 break;
2024
2025 case CLD_DUMPED:
2026 ERROR(jail->pakfire, "The child process terminated abnormally\n");
2027 break;
2028
2029 // Log anything else
2030 default:
2031 ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
2032 break;
2033 }
2034
2035 ERROR:
2036 // Destroy the temporary cgroup (if any)
2037 if (ctx.cgroup) {
2038 // Read cgroup stats
2039 r = pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
2040 if (r) {
2041 ERROR(jail->pakfire, "Could not read cgroup stats: %m\n");
2042 } else {
2043 pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
2044 }
2045
2046 pakfire_cgroup_destroy(ctx.cgroup);
2047 pakfire_cgroup_unref(ctx.cgroup);
2048 }
2049
2050 // Close any file descriptors
2051 pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
2052 pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
2053 pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
2054 if (ctx.pidfd >= 0)
2055 close(ctx.pidfd);
2056 pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
2057 pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
2058 pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
2059
2060 return exit;
2061 }
2062
2063 PAKFIRE_EXPORT int pakfire_jail_exec(
2064 struct pakfire_jail* jail,
2065 const char* argv[],
2066 pakfire_jail_communicate_in callback_in,
2067 pakfire_jail_communicate_out callback_out,
2068 void* data, int flags) {
2069 return __pakfire_jail_exec(jail, argv, 0, callback_in, callback_out, data, flags);
2070 }
2071
2072 static int pakfire_jail_exec_interactive(
2073 struct pakfire_jail* jail, const char* argv[], int flags) {
2074 int r;
2075
2076 // Setup interactive stuff
2077 r = pakfire_jail_setup_interactive_env(jail);
2078 if (r)
2079 return r;
2080
2081 return __pakfire_jail_exec(jail, argv, 1, NULL, NULL, NULL, flags);
2082 }
2083
2084 int pakfire_jail_exec_script(struct pakfire_jail* jail,
2085 const char* script,
2086 const size_t size,
2087 const char* args[],
2088 pakfire_jail_communicate_in callback_in,
2089 pakfire_jail_communicate_out callback_out,
2090 void* data) {
2091 char path[PATH_MAX];
2092 const char** argv = NULL;
2093 FILE* f = NULL;
2094 int r;
2095
2096 const char* root = pakfire_get_path(jail->pakfire);
2097
2098 // Write the scriptlet to disk
2099 r = pakfire_path_append(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
2100 if (r)
2101 goto ERROR;
2102
2103 // Create a temporary file
2104 f = pakfire_mktemp(path, 0700);
2105 if (!f) {
2106 ERROR(jail->pakfire, "Could not create temporary file: %m\n");
2107 goto ERROR;
2108 }
2109
2110 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
2111
2112 // Write data
2113 r = fprintf(f, "%s", script);
2114 if (r < 0) {
2115 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
2116 goto ERROR;
2117 }
2118
2119 // Close file
2120 r = fclose(f);
2121 if (r) {
2122 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
2123 goto ERROR;
2124 }
2125
2126 f = NULL;
2127
2128 // Count how many arguments were passed
2129 unsigned int argc = 1;
2130 if (args) {
2131 for (const char** arg = args; *arg; arg++)
2132 argc++;
2133 }
2134
2135 argv = calloc(argc + 1, sizeof(*argv));
2136 if (!argv) {
2137 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
2138 goto ERROR;
2139 }
2140
2141 // Set command
2142 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
2143
2144 // Copy args
2145 for (unsigned int i = 1; i < argc; i++)
2146 argv[i] = args[i-1];
2147
2148 // Run the script
2149 r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data, 0);
2150
2151 ERROR:
2152 if (argv)
2153 free(argv);
2154 if (f)
2155 fclose(f);
2156
2157 // Remove script from disk
2158 if (*path)
2159 unlink(path);
2160
2161 return r;
2162 }
2163
2164 /*
2165 A convenience function that creates a new jail, runs the given command and destroys
2166 the jail again.
2167 */
2168 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
2169 struct pakfire_jail* jail = NULL;
2170 int r;
2171
2172 // Create a new jail
2173 r = pakfire_jail_create(&jail, pakfire);
2174 if (r)
2175 goto ERROR;
2176
2177 // Execute the command
2178 r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output, 0);
2179
2180 ERROR:
2181 if (jail)
2182 pakfire_jail_unref(jail);
2183
2184 return r;
2185 }
2186
2187 int pakfire_jail_run_script(struct pakfire* pakfire,
2188 const char* script, const size_t length, const char* argv[], int flags) {
2189 struct pakfire_jail* jail = NULL;
2190 int r;
2191
2192 // Create a new jail
2193 r = pakfire_jail_create(&jail, pakfire);
2194 if (r)
2195 goto ERROR;
2196
2197 // Execute the command
2198 r = pakfire_jail_exec_script(jail, script, length, argv, NULL, NULL, NULL);
2199
2200 ERROR:
2201 if (jail)
2202 pakfire_jail_unref(jail);
2203
2204 return r;
2205 }
2206
2207 int pakfire_jail_shell(struct pakfire_jail* jail) {
2208 int r;
2209
2210 const char* argv[] = {
2211 "/bin/bash", "--login", NULL,
2212 };
2213
2214 // Execute /bin/bash
2215 r = pakfire_jail_exec_interactive(jail, argv, 0);
2216
2217 // Raise any errors
2218 if (r < 0)
2219 return r;
2220
2221 // Ignore any return codes from the shell
2222 return 0;
2223 }
2224
2225 static int pakfire_jail_run_if_possible(struct pakfire* pakfire, const char** argv) {
2226 char path[PATH_MAX];
2227 int r;
2228
2229 r = pakfire_path(pakfire, path, "%s", *argv);
2230 if (r)
2231 return r;
2232
2233 // Check if the file is executable
2234 r = access(path, X_OK);
2235 if (r) {
2236 DEBUG(pakfire, "%s is not executable. Skipping...\n", *argv);
2237 return 0;
2238 }
2239
2240 return pakfire_jail_run(pakfire, argv, 0, NULL);
2241 }
2242
2243 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
2244 const char* argv[] = {
2245 "/sbin/ldconfig",
2246 NULL,
2247 };
2248
2249 return pakfire_jail_run_if_possible(pakfire, argv);
2250 }
2251
2252 int pakfire_jail_run_systemd_tmpfiles(struct pakfire* pakfire) {
2253 const char* argv[] = {
2254 "/usr/bin/systemd-tmpfiles",
2255 "--create",
2256 NULL,
2257 };
2258
2259 return pakfire_jail_run_if_possible(pakfire, argv);
2260 }