]> git.ipfire.org Git - people/ms/pakfire.git/blob - src/libpakfire/jail.c
Revert "jail: This changes how we launch sub-processes"
[people/ms/pakfire.git] / src / libpakfire / jail.c
1 /*#############################################################################
2 # #
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
5 # #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
10 # #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
15 # #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
18 # #
19 #############################################################################*/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <linux/capability.h>
24 #include <linux/sched.h>
25 #include <sys/wait.h>
26 #include <linux/wait.h>
27 #include <sched.h>
28 #include <signal.h>
29 #include <stdlib.h>
30 #include <syscall.h>
31 #include <sys/capability.h>
32 #include <sys/epoll.h>
33 #include <sys/eventfd.h>
34 #include <sys/mount.h>
35 #include <sys/personality.h>
36 #include <sys/prctl.h>
37 #include <sys/resource.h>
38 #include <sys/timerfd.h>
39 #include <sys/types.h>
40 #include <sys/wait.h>
41
42 // libnl3
43 #include <net/if.h>
44 #include <netlink/route/link.h>
45
46 // libseccomp
47 #include <seccomp.h>
48
49 // libuuid
50 #include <uuid.h>
51
52 #include <pakfire/arch.h>
53 #include <pakfire/cgroup.h>
54 #include <pakfire/jail.h>
55 #include <pakfire/logging.h>
56 #include <pakfire/mount.h>
57 #include <pakfire/pakfire.h>
58 #include <pakfire/path.h>
59 #include <pakfire/private.h>
60 #include <pakfire/pwd.h>
61 #include <pakfire/string.h>
62 #include <pakfire/util.h>
63
64 #define BUFFER_SIZE 1024 * 64
65 #define ENVIRON_SIZE 128
66 #define EPOLL_MAX_EVENTS 2
67 #define MAX_MOUNTPOINTS 8
68
69 // The default environment that will be set for every command
70 static const struct environ {
71 const char* key;
72 const char* val;
73 } ENV[] = {
74 { "HOME", "/root" },
75 { "LANG", "C.utf-8" },
76 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
77 { "TERM", "vt100" },
78
79 // Tell everything that it is running inside a Pakfire container
80 { "container", "pakfire" },
81 { NULL, NULL },
82 };
83
84 struct pakfire_jail_mountpoint {
85 char source[PATH_MAX];
86 char target[PATH_MAX];
87 int flags;
88 };
89
90 struct pakfire_jail {
91 struct pakfire_ctx* ctx;
92 struct pakfire* pakfire;
93 int nrefs;
94
95 // A unique ID for each jail
96 uuid_t uuid;
97 char __uuid[UUID_STR_LEN];
98
99 // Resource Limits
100 int nice;
101
102 // Timeout
103 struct itimerspec timeout;
104
105 // CGroup
106 struct pakfire_cgroup* cgroup;
107
108 // Environment
109 char* env[ENVIRON_SIZE];
110
111 // Mountpoints
112 struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
113 unsigned int num_mountpoints;
114
115 // Callbacks
116 struct pakfire_jail_callbacks {
117 // Log
118 pakfire_jail_log_callback log;
119 void* log_data;
120 } callbacks;
121 };
122
123 struct pakfire_log_buffer {
124 char data[BUFFER_SIZE];
125 size_t used;
126 };
127
128 struct pakfire_jail_exec {
129 int flags;
130
131 // PID (of the child)
132 pid_t pid;
133 int pidfd;
134
135 // Process status (from waitid)
136 siginfo_t status;
137
138 // FD to notify the client that the parent has finished initialization
139 int completed_fd;
140
141 // Log pipes
142 struct pakfire_jail_pipes {
143 int stdin[2];
144 int stdout[2];
145 int stderr[2];
146
147 // Logging
148 int log_INFO[2];
149 int log_ERROR[2];
150 #ifdef ENABLE_DEBUG
151 int log_DEBUG[2];
152 #endif /* ENABLE_DEBUG */
153 } pipes;
154
155 // Communicate
156 struct pakfire_jail_communicate {
157 pakfire_jail_communicate_in in;
158 pakfire_jail_communicate_out out;
159 void* data;
160 } communicate;
161
162 // Log buffers
163 struct pakfire_jail_buffers {
164 struct pakfire_log_buffer stdout;
165 struct pakfire_log_buffer stderr;
166
167 // Logging
168 struct pakfire_log_buffer log_INFO;
169 struct pakfire_log_buffer log_ERROR;
170 #ifdef ENABLE_DEBUG
171 struct pakfire_log_buffer log_DEBUG;
172 #endif /* ENABLE_DEBUG */
173 } buffers;
174
175 struct pakfire_cgroup* cgroup;
176 struct pakfire_cgroup_stats cgroup_stats;
177
178 // Console
179 char console[PATH_MAX];
180 int consolefd;
181 };
182
183 static int clone3(struct clone_args* args, size_t size) {
184 return syscall(__NR_clone3, args, size);
185 }
186
187 static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
188 return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
189 }
190
191 static int pivot_root(const char* new_root, const char* old_root) {
192 return syscall(SYS_pivot_root, new_root, old_root);
193 }
194
195 static int pakfire_jail_exec_has_flag(
196 const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
197 return ctx->flags & flag;
198 }
199
200 static void pakfire_jail_free(struct pakfire_jail* jail) {
201 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
202
203 // Free environment
204 for (unsigned int i = 0; jail->env[i]; i++)
205 free(jail->env[i]);
206
207 if (jail->cgroup)
208 pakfire_cgroup_unref(jail->cgroup);
209 if (jail->pakfire)
210 pakfire_unref(jail->pakfire);
211 if (jail->ctx)
212 pakfire_ctx_unref(jail->ctx);
213 free(jail);
214 }
215
216 /*
217 Passes any log messages on to the default pakfire log callback
218 */
219 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
220 int priority, const char* line, size_t length) {
221 switch (priority) {
222 case LOG_INFO:
223 INFO(pakfire, "%s", line);
224 break;
225
226 case LOG_ERR:
227 ERROR(pakfire, "%s", line);
228 break;
229
230 #ifdef ENABLE_DEBUG
231 case LOG_DEBUG:
232 DEBUG(pakfire, "%s", line);
233 break;
234 #endif
235 }
236
237 return 0;
238 }
239
240 static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
241 if (!*jail->__uuid)
242 uuid_unparse_lower(jail->uuid, jail->__uuid);
243
244 return jail->__uuid;
245 }
246
247 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
248 // Set PS1
249 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
250 if (r)
251 return r;
252
253 // Copy TERM
254 char* TERM = secure_getenv("TERM");
255 if (TERM) {
256 r = pakfire_jail_set_env(jail, "TERM", TERM);
257 if (r)
258 return r;
259 }
260
261 // Copy LANG
262 char* LANG = secure_getenv("LANG");
263 if (LANG) {
264 r = pakfire_jail_set_env(jail, "LANG", LANG);
265 if (r)
266 return r;
267 }
268
269 return 0;
270 }
271
272 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail, struct pakfire* pakfire) {
273 int r;
274
275 const char* arch = pakfire_get_effective_arch(pakfire);
276
277 // Allocate a new jail
278 struct pakfire_jail* j = calloc(1, sizeof(*j));
279 if (!j)
280 return 1;
281
282 // Reference context
283 j->ctx = pakfire_ctx(pakfire);
284
285 // Reference Pakfire
286 j->pakfire = pakfire_ref(pakfire);
287
288 // Initialize reference counter
289 j->nrefs = 1;
290
291 // Generate a random UUID
292 uuid_generate_random(j->uuid);
293
294 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
295
296 // Set the default logging callback
297 pakfire_jail_set_log_callback(j, pakfire_jail_default_log_callback, NULL);
298
299 // Set default environment
300 for (const struct environ* e = ENV; e->key; e++) {
301 r = pakfire_jail_set_env(j, e->key, e->val);
302 if (r)
303 goto ERROR;
304 }
305
306 // Enable all CPU features that CPU has to offer
307 if (!pakfire_arch_is_supported_by_host(arch)) {
308 r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
309 if (r)
310 goto ERROR;
311 }
312
313 // Set container UUID
314 r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
315 if (r)
316 goto ERROR;
317
318 // Disable systemctl to talk to systemd
319 if (!pakfire_on_root(j->pakfire)) {
320 r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
321 if (r)
322 goto ERROR;
323 }
324
325 // Done
326 *jail = j;
327 return 0;
328
329 ERROR:
330 pakfire_jail_free(j);
331
332 return r;
333 }
334
335 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
336 ++jail->nrefs;
337
338 return jail;
339 }
340
341 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
342 if (--jail->nrefs > 0)
343 return jail;
344
345 pakfire_jail_free(jail);
346 return NULL;
347 }
348
349 // Logging Callback
350
351 PAKFIRE_EXPORT void pakfire_jail_set_log_callback(struct pakfire_jail* jail,
352 pakfire_jail_log_callback callback, void* data) {
353 jail->callbacks.log = callback;
354 jail->callbacks.log_data = data;
355 }
356
357 // Resource Limits
358
359 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
360 // Check if nice level is in range
361 if (nice < -19 || nice > 20) {
362 errno = EINVAL;
363 return 1;
364 }
365
366 // Store nice level
367 jail->nice = nice;
368
369 return 0;
370 }
371
372 int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
373 // Free any previous cgroup
374 if (jail->cgroup) {
375 pakfire_cgroup_unref(jail->cgroup);
376 jail->cgroup = NULL;
377 }
378
379 // Set any new cgroup
380 if (cgroup) {
381 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
382
383 jail->cgroup = pakfire_cgroup_ref(cgroup);
384 }
385
386 // Done
387 return 0;
388 }
389
390 // Environment
391
392 // Returns the length of the environment
393 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
394 unsigned int i = 0;
395
396 // Count everything in the environment
397 for (char** e = jail->env; *e; e++)
398 i++;
399
400 return i;
401 }
402
403 // Finds an existing environment variable and returns its index or -1 if not found
404 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
405 if (!key) {
406 errno = EINVAL;
407 return -1;
408 }
409
410 const size_t length = strlen(key);
411
412 for (unsigned int i = 0; jail->env[i]; i++) {
413 if ((pakfire_string_startswith(jail->env[i], key)
414 && *(jail->env[i] + length) == '=')) {
415 return i;
416 }
417 }
418
419 // Nothing found
420 return -1;
421 }
422
423 // Returns the value of an environment variable or NULL
424 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
425 const char* key) {
426 int i = pakfire_jail_find_env(jail, key);
427 if (i < 0)
428 return NULL;
429
430 return jail->env[i] + strlen(key) + 1;
431 }
432
433 // Sets an environment variable
434 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
435 const char* key, const char* value) {
436 // Find the index where to write this value to
437 int i = pakfire_jail_find_env(jail, key);
438 if (i < 0)
439 i = pakfire_jail_env_length(jail);
440
441 // Return -ENOSPC when the environment is full
442 if (i >= ENVIRON_SIZE) {
443 errno = ENOSPC;
444 return -1;
445 }
446
447 // Free any previous value
448 if (jail->env[i])
449 free(jail->env[i]);
450
451 // Format and set environment variable
452 asprintf(&jail->env[i], "%s=%s", key, value);
453
454 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
455
456 return 0;
457 }
458
459 // Imports an environment
460 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
461 if (!env)
462 return 0;
463
464 char* key;
465 char* val;
466 int r;
467
468 // Copy environment variables
469 for (unsigned int i = 0; env[i]; i++) {
470 r = pakfire_string_partition(env[i], "=", &key, &val);
471 if (r)
472 continue;
473
474 // Set value
475 r = pakfire_jail_set_env(jail, key, val);
476
477 if (key)
478 free(key);
479 if (val)
480 free(val);
481
482 // Break on error
483 if (r)
484 return r;
485 }
486
487 return 0;
488 }
489
490 // Timeout
491
492 PAKFIRE_EXPORT int pakfire_jail_set_timeout(
493 struct pakfire_jail* jail, unsigned int timeout) {
494 // Store value
495 jail->timeout.it_value.tv_sec = timeout;
496
497 if (timeout > 0)
498 DEBUG(jail->pakfire, "Timeout set to %u second(s)\n", timeout);
499 else
500 DEBUG(jail->pakfire, "Timeout disabled\n");
501
502 return 0;
503 }
504
505 static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
506 int r;
507
508 // Nothing to do if no timeout has been set
509 if (!jail->timeout.it_value.tv_sec)
510 return -1;
511
512 // Create a new timer
513 const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
514 if (fd < 0) {
515 ERROR(jail->pakfire, "Could not create timer: %m\n");
516 goto ERROR;
517 }
518
519 // Arm timer
520 r = timerfd_settime(fd, 0, &jail->timeout, NULL);
521 if (r) {
522 ERROR(jail->pakfire, "Could not arm timer: %m\n");
523 goto ERROR;
524 }
525
526 return fd;
527
528 ERROR:
529 if (fd >= 0)
530 close(fd);
531
532 return -1;
533 }
534
535 /*
536 This function replaces any logging in the child process.
537
538 All log messages will be sent to the parent process through their respective pipes.
539 */
540 static void pakfire_jail_log_redirect(void* data, int priority, const char* file,
541 int line, const char* fn, const char* format, va_list args) {
542 struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
543 int fd;
544
545 switch (priority) {
546 case LOG_INFO:
547 fd = pipes->log_INFO[1];
548 break;
549
550 case LOG_ERR:
551 fd = pipes->log_ERROR[1];
552 break;
553
554 #ifdef ENABLE_DEBUG
555 case LOG_DEBUG:
556 fd = pipes->log_DEBUG[1];
557 break;
558 #endif /* ENABLE_DEBUG */
559
560 // Ignore any messages of an unknown priority
561 default:
562 return;
563 }
564
565 // Send the log message
566 if (fd >= 0)
567 vdprintf(fd, format, args);
568 }
569
570 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
571 return (sizeof(buffer->data) == buffer->used);
572 }
573
574 /*
575 This function reads as much data as it can from the file descriptor.
576 If it finds a whole line in it, it will send it to the logger and repeat the process.
577 If not newline character is found, it will try to read more data until it finds one.
578 */
579 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
580 struct pakfire_jail_exec* ctx, int priority, int fd,
581 struct pakfire_log_buffer* buffer, pakfire_jail_communicate_out callback, void* data) {
582 char line[BUFFER_SIZE + 1];
583
584 // Fill up buffer from fd
585 if (buffer->used < sizeof(buffer->data)) {
586 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
587 sizeof(buffer->data) - buffer->used);
588
589 // Handle errors
590 if (bytes_read < 0) {
591 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
592 return -1;
593 }
594
595 // Update buffer size
596 buffer->used += bytes_read;
597 }
598
599 // See if we have any lines that we can write
600 while (buffer->used) {
601 // Search for the end of the first line
602 char* eol = memchr(buffer->data, '\n', buffer->used);
603
604 // No newline found
605 if (!eol) {
606 // If the buffer is full, we send the content to the logger and try again
607 // This should not happen in practise
608 if (pakfire_jail_log_buffer_is_full(buffer)) {
609 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
610
611 eol = buffer->data + sizeof(buffer->data) - 1;
612
613 // Otherwise we might have only read parts of the output
614 } else
615 break;
616 }
617
618 // Find the length of the string
619 size_t length = eol - buffer->data + 1;
620
621 // Copy the line into the buffer
622 memcpy(line, buffer->data, length);
623
624 // Terminate the string
625 line[length] = '\0';
626
627 // Log the line
628 if (callback) {
629 int r = callback(jail->pakfire, data, priority, line, length);
630 if (r) {
631 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
632 return r;
633 }
634 }
635
636 // Remove line from buffer
637 memmove(buffer->data, buffer->data + length, buffer->used - length);
638 buffer->used -= length;
639 }
640
641 return 0;
642 }
643
644 static int pakfire_jail_stream_stdin(struct pakfire_jail* jail,
645 struct pakfire_jail_exec* ctx, const int fd) {
646 int r;
647
648 // Nothing to do if there is no stdin callback set
649 if (!ctx->communicate.in) {
650 DEBUG(jail->pakfire, "Callback for standard input is not set\n");
651 return 0;
652 }
653
654 // Skip if the writing pipe has already been closed
655 if (!ctx->pipes.stdin[1])
656 return 0;
657
658 DEBUG(jail->pakfire, "Streaming standard input...\n");
659
660 // Calling the callback
661 r = ctx->communicate.in(jail->pakfire, ctx->communicate.data, fd);
662
663 DEBUG(jail->pakfire, "Standard input callback finished: %d\n", r);
664
665 // The callback signaled that it has written everything
666 if (r == EOF) {
667 DEBUG(jail->pakfire, "Closing standard input pipe\n");
668
669 // Close the file-descriptor
670 close(fd);
671
672 // Reset the file-descriptor so it won't be closed again later
673 ctx->pipes.stdin[1] = -1;
674
675 // Report success
676 r = 0;
677 }
678
679 return r;
680 }
681
682 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
683 int r = pipe2(*fds, flags);
684 if (r < 0) {
685 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
686 return 1;
687 }
688
689 return 0;
690 }
691
692 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
693 for (unsigned int i = 0; i < 2; i++)
694 if (fds[i] >= 0)
695 close(fds[i]);
696 }
697
698 /*
699 This is a convenience function to fetch the reading end of a pipe and
700 closes the write end.
701 */
702 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail* jail, int (*fds)[2]) {
703 // Give the variables easier names to avoid confusion
704 int* fd_read = &(*fds)[0];
705 int* fd_write = &(*fds)[1];
706
707 // Close the write end of the pipe
708 if (*fd_write >= 0) {
709 close(*fd_write);
710 *fd_write = -1;
711 }
712
713 // Return the read end
714 if (*fd_read >= 0)
715 return *fd_read;
716
717 return -1;
718 }
719
720 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
721 // Give the variables easier names to avoid confusion
722 int* fd_read = &(*fds)[0];
723 int* fd_write = &(*fds)[1];
724
725 // Close the read end of the pipe
726 if (*fd_read >= 0) {
727 close(*fd_read);
728 *fd_read = -1;
729 }
730
731 // Return the write end
732 if (*fd_write >= 0)
733 return *fd_write;
734
735 return -1;
736 }
737
738 static int pakfire_jail_log(struct pakfire* pakfire, void* data, int priority,
739 const char* line, const size_t length) {
740 // Pass everything to the parent logger
741 pakfire_log_condition(pakfire, priority, 0, "%.*s", (int)length, line);
742
743 return 0;
744 }
745
746 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
747 int epollfd = -1;
748 struct epoll_event ev;
749 struct epoll_event events[EPOLL_MAX_EVENTS];
750 char garbage[8];
751 int r = 0;
752
753 // Fetch file descriptors from context
754 const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
755 const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
756 const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
757 const int pidfd = ctx->pidfd;
758
759 // Timer
760 const int timerfd = pakfire_jail_create_timer(jail);
761
762 // Logging
763 const int log_INFO = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
764 const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
765 #ifdef ENABLE_DEBUG
766 const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
767 #endif /* ENABLE_DEBUG */
768
769 // Make a list of all file descriptors we are interested in
770 const int fds[] = {
771 stdin,
772 stdout,
773 stderr,
774 pidfd,
775 timerfd,
776 log_INFO,
777 log_ERROR,
778 #ifdef ENABLE_DEBUG
779 log_DEBUG,
780 #endif /* ENABLE_DEBUG */
781 };
782
783 // Setup epoll
784 epollfd = epoll_create1(0);
785 if (epollfd < 0) {
786 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
787 r = 1;
788 goto ERROR;
789 }
790
791 // Turn file descriptors into non-blocking mode and add them to epoll()
792 for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
793 int fd = fds[i];
794
795 // Skip fds which were not initialized
796 if (fd < 0)
797 continue;
798
799 ev.events = EPOLLHUP;
800
801 if (fd == stdin)
802 ev.events |= EPOLLOUT;
803 else
804 ev.events |= EPOLLIN;
805
806 // Read flags
807 int flags = fcntl(fd, F_GETFL, 0);
808
809 // Set modified flags
810 if (fcntl(fd, F_SETFL, flags|O_NONBLOCK) < 0) {
811 ERROR(jail->pakfire,
812 "Could not set file descriptor %d into non-blocking mode: %m\n", fd);
813 r = 1;
814 goto ERROR;
815 }
816
817 ev.data.fd = fd;
818
819 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
820 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
821 r = 1;
822 goto ERROR;
823 }
824 }
825
826 int ended = 0;
827
828 // Loop for as long as the process is alive
829 while (!ended) {
830 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
831 if (num < 1) {
832 // Ignore if epoll_wait() has been interrupted
833 if (errno == EINTR)
834 continue;
835
836 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
837 r = 1;
838
839 goto ERROR;
840 }
841
842 for (int i = 0; i < num; i++) {
843 int e = events[i].events;
844 int fd = events[i].data.fd;
845
846 struct pakfire_log_buffer* buffer = NULL;
847 pakfire_jail_communicate_out callback = NULL;
848 void* data = NULL;
849 int priority;
850
851 // Check if there is any data to be read
852 if (e & EPOLLIN) {
853 // Handle any changes to the PIDFD
854 if (fd == pidfd) {
855 // Call waidid() and store the result
856 r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
857 if (r) {
858 ERROR(jail->pakfire, "waitid() failed: %m\n");
859 goto ERROR;
860 }
861
862 // Mark that we have ended so that we will process the remaining
863 // events from epoll() now, but won't restart the outer loop.
864 ended = 1;
865 continue;
866
867 // Handle timer events
868 } else if (fd == timerfd) {
869 DEBUG(jail->pakfire, "Timer event received\n");
870
871 // Disarm the timer
872 r = read(timerfd, garbage, sizeof(garbage));
873 if (r < 1) {
874 ERROR(jail->pakfire, "Could not disarm timer: %m\n");
875 r = 1;
876 goto ERROR;
877 }
878
879 // Terminate the process if it hasn't already ended
880 if (!ended) {
881 DEBUG(jail->pakfire, "Terminating process...\n");
882
883 // Send SIGTERM to the process
884 r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
885 if (r) {
886 ERROR(jail->pakfire, "Could not kill process: %m\n");
887 goto ERROR;
888 }
889 }
890
891 // Don't fall through to log processing
892 continue;
893
894 // Handle logging messages
895 } else if (fd == log_INFO) {
896 buffer = &ctx->buffers.log_INFO;
897 priority = LOG_INFO;
898
899 callback = pakfire_jail_log;
900
901 } else if (fd == log_ERROR) {
902 buffer = &ctx->buffers.log_ERROR;
903 priority = LOG_ERR;
904
905 callback = pakfire_jail_log;
906
907 #ifdef ENABLE_DEBUG
908 } else if (fd == log_DEBUG) {
909 buffer = &ctx->buffers.log_DEBUG;
910 priority = LOG_DEBUG;
911
912 callback = pakfire_jail_log;
913 #endif /* ENABLE_DEBUG */
914
915 // Handle anything from the log pipes
916 } else if (fd == stdout) {
917 buffer = &ctx->buffers.stdout;
918 priority = LOG_INFO;
919
920 // Send any output to the default logger if no callback is set
921 if (ctx->communicate.out) {
922 callback = ctx->communicate.out;
923 data = ctx->communicate.data;
924 } else {
925 callback = jail->callbacks.log;
926 data = jail->callbacks.log_data;
927 }
928
929 } else if (fd == stderr) {
930 buffer = &ctx->buffers.stderr;
931 priority = LOG_ERR;
932
933 // Send any output to the default logger if no callback is set
934 if (ctx->communicate.out) {
935 callback = ctx->communicate.out;
936 data = ctx->communicate.data;
937 } else {
938 callback = jail->callbacks.log;
939 data = jail->callbacks.log_data;
940 }
941
942 } else {
943 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
944 continue;
945 }
946
947 // Handle log event
948 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
949 if (r)
950 goto ERROR;
951 }
952
953 if (e & EPOLLOUT) {
954 // Handle standard input
955 if (fd == stdin) {
956 r = pakfire_jail_stream_stdin(jail, ctx, fd);
957 if (r) {
958 switch (errno) {
959 // Ignore if we filled up the buffer
960 case EAGAIN:
961 break;
962
963 default:
964 ERROR(jail->pakfire, "Could not write to stdin: %m\n");
965 goto ERROR;
966 }
967 }
968 }
969 }
970
971 // Check if any file descriptors have been closed
972 if (e & EPOLLHUP) {
973 // Remove the file descriptor
974 r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
975 if (r) {
976 ERROR(jail->pakfire, "Could not remove closed file-descriptor %d: %m\n", fd);
977 goto ERROR;
978 }
979 }
980 }
981 }
982
983 ERROR:
984 if (epollfd >= 0)
985 close(epollfd);
986 if (timerfd >= 0)
987 close(timerfd);
988
989 return r;
990 }
991
992 int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data,
993 int priority, const char* line, size_t length) {
994 char** output = (char**)data;
995 int r;
996
997 // Append everything from stdout to a buffer
998 if (output && priority == LOG_INFO) {
999 r = asprintf(output, "%s%s", (output && *output) ? *output : "", line);
1000 if (r < 0)
1001 return 1;
1002 return 0;
1003 }
1004
1005 // Send everything else to the default logger
1006 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
1007 }
1008
1009 // Capabilities
1010
1011 // Logs all capabilities of the current process
1012 static int pakfire_jail_show_capabilities(struct pakfire_jail* jail) {
1013 cap_t caps = NULL;
1014 char* name = NULL;
1015 cap_flag_value_t value_e;
1016 cap_flag_value_t value_i;
1017 cap_flag_value_t value_p;
1018 int r;
1019
1020 // Fetch PID
1021 pid_t pid = getpid();
1022
1023 // Fetch all capabilities
1024 caps = cap_get_proc();
1025 if (!caps) {
1026 ERROR(jail->pakfire, "Could not fetch capabilities: %m\n");
1027 r = 1;
1028 goto ERROR;
1029 }
1030
1031 DEBUG(jail->pakfire, "Capabilities of PID %d:\n", pid);
1032
1033 // Iterate over all capabilities
1034 for (unsigned int cap = 0; cap_valid(cap); cap++) {
1035 name = cap_to_name(cap);
1036
1037 // Fetch effective value
1038 r = cap_get_flag(caps, cap, CAP_EFFECTIVE, &value_e);
1039 if (r)
1040 goto ERROR;
1041
1042 // Fetch inheritable value
1043 r = cap_get_flag(caps, cap, CAP_INHERITABLE, &value_i);
1044 if (r)
1045 goto ERROR;
1046
1047 // Fetch permitted value
1048 r = cap_get_flag(caps, cap, CAP_PERMITTED, &value_p);
1049 if (r)
1050 goto ERROR;
1051
1052 DEBUG(jail->pakfire,
1053 " %-24s : %c%c%c\n",
1054 name,
1055 (value_e == CAP_SET) ? 'e' : '-',
1056 (value_i == CAP_SET) ? 'i' : '-',
1057 (value_p == CAP_SET) ? 'p' : '-'
1058 );
1059
1060 // Free name
1061 cap_free(name);
1062 name = NULL;
1063 }
1064
1065 // Success
1066 r = 0;
1067
1068 ERROR:
1069 if (name)
1070 cap_free(name);
1071 if (caps)
1072 cap_free(caps);
1073
1074 return r;
1075 }
1076
1077 static int pakfire_jail_set_capabilities(struct pakfire_jail* jail) {
1078 cap_t caps = NULL;
1079 char* name = NULL;
1080 int r;
1081
1082 // Fetch capabilities
1083 caps = cap_get_proc();
1084 if (!caps) {
1085 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
1086 r = 1;
1087 goto ERROR;
1088 }
1089
1090 // Walk through all capabilities
1091 for (cap_value_t cap = 0; cap_valid(cap); cap++) {
1092 cap_value_t _caps[] = { cap };
1093
1094 // Fetch the name of the capability
1095 name = cap_to_name(cap);
1096
1097 r = cap_set_flag(caps, CAP_EFFECTIVE, 1, _caps, CAP_SET);
1098 if (r) {
1099 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1100 goto ERROR;
1101 }
1102
1103 r = cap_set_flag(caps, CAP_INHERITABLE, 1, _caps, CAP_SET);
1104 if (r) {
1105 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1106 goto ERROR;
1107 }
1108
1109 r = cap_set_flag(caps, CAP_PERMITTED, 1, _caps, CAP_SET);
1110 if (r) {
1111 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1112 goto ERROR;
1113 }
1114
1115 // Free name
1116 cap_free(name);
1117 name = NULL;
1118 }
1119
1120 // Restore all capabilities
1121 r = cap_set_proc(caps);
1122 if (r) {
1123 ERROR(jail->pakfire, "Restoring capabilities failed: %m\n");
1124 goto ERROR;
1125 }
1126
1127 // Add all capabilities to the ambient set
1128 for (unsigned int cap = 0; cap_valid(cap); cap++) {
1129 name = cap_to_name(cap);
1130
1131 // Raise the capability
1132 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0);
1133 if (r) {
1134 ERROR(jail->pakfire, "Could not set ambient capability %s: %m\n", name);
1135 goto ERROR;
1136 }
1137
1138 // Free name
1139 cap_free(name);
1140 name = NULL;
1141 }
1142
1143 // Success
1144 r = 0;
1145
1146 ERROR:
1147 if (name)
1148 cap_free(name);
1149 if (caps)
1150 cap_free(caps);
1151
1152 return r;
1153 }
1154
1155 // Syscall Filter
1156
1157 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
1158 const int syscalls[] = {
1159 // The kernel's keyring isn't namespaced
1160 SCMP_SYS(keyctl),
1161 SCMP_SYS(add_key),
1162 SCMP_SYS(request_key),
1163
1164 // Disable userfaultfd
1165 SCMP_SYS(userfaultfd),
1166
1167 // Disable perf which could leak a lot of information about the host
1168 SCMP_SYS(perf_event_open),
1169
1170 0,
1171 };
1172 int r = 1;
1173
1174 DEBUG(jail->pakfire, "Applying syscall filter...\n");
1175
1176 // Setup a syscall filter which allows everything by default
1177 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
1178 if (!ctx) {
1179 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
1180 goto ERROR;
1181 }
1182
1183 // All all syscalls
1184 for (const int* syscall = syscalls; *syscall; syscall++) {
1185 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
1186 if (r) {
1187 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
1188 goto ERROR;
1189 }
1190 }
1191
1192 // Load syscall filter into the kernel
1193 r = seccomp_load(ctx);
1194 if (r) {
1195 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
1196 goto ERROR;
1197 }
1198
1199 ERROR:
1200 if (ctx)
1201 seccomp_release(ctx);
1202
1203 return r;
1204 }
1205
1206 // Mountpoints
1207
1208 PAKFIRE_EXPORT int pakfire_jail_bind(struct pakfire_jail* jail,
1209 const char* source, const char* target, int flags) {
1210 struct pakfire_jail_mountpoint* mp = NULL;
1211 int r;
1212
1213 // Check if there is any space left
1214 if (jail->num_mountpoints >= MAX_MOUNTPOINTS) {
1215 errno = ENOSPC;
1216 return 1;
1217 }
1218
1219 // Check for valid inputs
1220 if (!source || !target) {
1221 errno = EINVAL;
1222 return 1;
1223 }
1224
1225 // Select the next free slot
1226 mp = &jail->mountpoints[jail->num_mountpoints];
1227
1228 // Copy source
1229 r = pakfire_string_set(mp->source, source);
1230 if (r) {
1231 ERROR(jail->pakfire, "Could not copy source: %m\n");
1232 return r;
1233 }
1234
1235 // Copy target
1236 r = pakfire_string_set(mp->target, target);
1237 if (r) {
1238 ERROR(jail->pakfire, "Could not copy target: %m\n");
1239 return r;
1240 }
1241
1242 // Copy flags
1243 mp->flags = flags;
1244
1245 // Increment counter
1246 jail->num_mountpoints++;
1247
1248 return 0;
1249 }
1250
1251 static int pakfire_jail_mount_networking(struct pakfire_jail* jail) {
1252 int r;
1253
1254 const char* paths[] = {
1255 "/etc/hosts",
1256 "/etc/resolv.conf",
1257 NULL,
1258 };
1259
1260 // Bind-mount all paths read-only
1261 for (const char** path = paths; *path; path++) {
1262 r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
1263 if (r) {
1264 switch (errno) {
1265 // Ignore if we don't have permission
1266 case EPERM:
1267 continue;
1268
1269 default:
1270 break;
1271 }
1272 return r;
1273 }
1274 }
1275
1276 return 0;
1277 }
1278
1279 /*
1280 Mounts everything that we require in the new namespace
1281 */
1282 static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1283 struct pakfire_jail_mountpoint* mp = NULL;
1284 int flags = 0;
1285 int r;
1286
1287 // Enable loop devices
1288 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_LOOP_DEVICES))
1289 flags |= PAKFIRE_MOUNT_LOOP_DEVICES;
1290
1291 // Mount all default stuff
1292 r = pakfire_mount_all(jail->pakfire, PAKFIRE_MNTNS_OUTER, flags);
1293 if (r)
1294 return r;
1295
1296 // Populate /dev
1297 r = pakfire_populate_dev(jail->pakfire, flags);
1298 if (r)
1299 return r;
1300
1301 // Mount the interpreter (if needed)
1302 r = pakfire_mount_interpreter(jail->pakfire);
1303 if (r)
1304 return r;
1305
1306 // Mount networking stuff
1307 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1308 r = pakfire_jail_mount_networking(jail);
1309 if (r)
1310 return r;
1311 }
1312
1313 // Mount all custom stuff
1314 for (unsigned int i = 0; i < jail->num_mountpoints; i++) {
1315 // Fetch mountpoint
1316 mp = &jail->mountpoints[i];
1317
1318 // Mount it
1319 r = pakfire_bind(jail->pakfire, mp->source, mp->target, mp->flags);
1320 if (r)
1321 return r;
1322 }
1323
1324 return 0;
1325 }
1326
1327 // Networking
1328
1329 static int pakfire_jail_setup_loopback(struct pakfire_jail* jail) {
1330 struct nl_sock* nl = NULL;
1331 struct nl_cache* cache = NULL;
1332 struct rtnl_link* link = NULL;
1333 struct rtnl_link* change = NULL;
1334 int r;
1335
1336 DEBUG(jail->pakfire, "Setting up loopback...\n");
1337
1338 // Allocate a netlink socket
1339 nl = nl_socket_alloc();
1340 if (!nl) {
1341 ERROR(jail->pakfire, "Could not allocate a netlink socket: %m\n");
1342 r = 1;
1343 goto ERROR;
1344 }
1345
1346 // Connect the socket
1347 r = nl_connect(nl, NETLINK_ROUTE);
1348 if (r) {
1349 ERROR(jail->pakfire, "Could not connect netlink socket: %s\n", nl_geterror(r));
1350 goto ERROR;
1351 }
1352
1353 // Allocate the netlink cache
1354 r = rtnl_link_alloc_cache(nl, AF_UNSPEC, &cache);
1355 if (r < 0) {
1356 ERROR(jail->pakfire, "Unable to allocate netlink cache: %s\n", nl_geterror(r));
1357 goto ERROR;
1358 }
1359
1360 // Fetch loopback interface
1361 link = rtnl_link_get_by_name(cache, "lo");
1362 if (!link) {
1363 ERROR(jail->pakfire, "Could not find lo interface. Ignoring.\n");
1364 r = 0;
1365 goto ERROR;
1366 }
1367
1368 // Allocate a new link
1369 change = rtnl_link_alloc();
1370 if (!change) {
1371 ERROR(jail->pakfire, "Could not allocate change link\n");
1372 r = 1;
1373 goto ERROR;
1374 }
1375
1376 // Set the link to UP
1377 rtnl_link_set_flags(change, IFF_UP);
1378
1379 // Apply any changes
1380 r = rtnl_link_change(nl, link, change, 0);
1381 if (r) {
1382 ERROR(jail->pakfire, "Unable to activate loopback: %s\n", nl_geterror(r));
1383 goto ERROR;
1384 }
1385
1386 // Success
1387 r = 0;
1388
1389 ERROR:
1390 if (nl)
1391 nl_socket_free(nl);
1392
1393 return r;
1394 }
1395
1396 // UID/GID Mapping
1397
1398 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
1399 char path[PATH_MAX];
1400 int r;
1401
1402 // Skip mapping anything when running on /
1403 if (pakfire_on_root(jail->pakfire))
1404 return 0;
1405
1406 // Make path
1407 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
1408 if (r)
1409 return r;
1410
1411 // Fetch UID
1412 const uid_t uid = pakfire_uid(jail->pakfire);
1413
1414 // Fetch SUBUID
1415 const struct pakfire_subid* subuid = pakfire_subuid(jail->pakfire);
1416 if (!subuid)
1417 return 1;
1418
1419 /* When running as root, we will map the entire range.
1420
1421 When running as a non-privileged user, we will map the root user inside the jail
1422 to the user's UID outside of the jail, and we will map the rest starting from one.
1423 */
1424
1425 // Running as root
1426 if (uid == 0) {
1427 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1428 "0 %lu %lu\n", subuid->id, subuid->length);
1429 } else {
1430 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1431 "0 %lu 1\n1 %lu %lu\n", uid, subuid->id, subuid->length);
1432 }
1433
1434 if (r) {
1435 ERROR(jail->pakfire, "Could not map UIDs: %m\n");
1436 return r;
1437 }
1438
1439 return r;
1440 }
1441
1442 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
1443 char path[PATH_MAX];
1444 int r;
1445
1446 // Skip mapping anything when running on /
1447 if (pakfire_on_root(jail->pakfire))
1448 return 0;
1449
1450 // Fetch GID
1451 const gid_t gid = pakfire_gid(jail->pakfire);
1452
1453 // Fetch SUBGID
1454 const struct pakfire_subid* subgid = pakfire_subgid(jail->pakfire);
1455 if (!subgid)
1456 return 1;
1457
1458 // Make path
1459 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
1460 if (r)
1461 return r;
1462
1463 // Running as root
1464 if (gid == 0) {
1465 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1466 "0 %lu %lu\n", subgid->id, subgid->length);
1467 } else {
1468 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1469 "0 %lu 1\n1 %lu %lu\n", gid, subgid->id, subgid->length);
1470 }
1471
1472 if (r) {
1473 ERROR(jail->pakfire, "Could not map GIDs: %m\n");
1474 return r;
1475 }
1476
1477 return r;
1478 }
1479
1480 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
1481 char path[PATH_MAX];
1482 int r;
1483
1484 // Make path
1485 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
1486 if (r)
1487 return r;
1488
1489 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0, "deny\n");
1490 if (r) {
1491 CTX_ERROR(jail->ctx, "Could not set setgroups to deny: %s\n", strerror(errno));
1492 r = -errno;
1493 }
1494
1495 return r;
1496 }
1497
1498 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1499 const uint64_t val = 1;
1500 int r = 0;
1501
1502 DEBUG(jail->pakfire, "Sending signal...\n");
1503
1504 // Write to the file descriptor
1505 r = eventfd_write(fd, val);
1506 if (r < 0) {
1507 ERROR(jail->pakfire, "Could not send signal: %s\n", strerror(errno));
1508 r = -errno;
1509 }
1510
1511 // Close the file descriptor
1512 close(fd);
1513
1514 return r;
1515 }
1516
1517 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1518 uint64_t val = 0;
1519 int r = 0;
1520
1521 DEBUG(jail->pakfire, "Waiting for signal...\n");
1522
1523 r = eventfd_read(fd, &val);
1524 if (r < 0) {
1525 ERROR(jail->pakfire, "Error waiting for signal: %s\n", strerror(errno));
1526 r = -errno;
1527 }
1528
1529 // Close the file descriptor
1530 close(fd);
1531
1532 return r;
1533 }
1534
1535 /*
1536 Performs the initialisation that needs to happen in the parent part
1537 */
1538 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1539 int r;
1540
1541 // Setup UID mapping
1542 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
1543 if (r)
1544 return r;
1545
1546 // Write "deny" to /proc/PID/setgroups
1547 r = pakfire_jail_setgroups(jail, ctx->pid);
1548 if (r)
1549 return r;
1550
1551 // Setup GID mapping
1552 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
1553 if (r)
1554 return r;
1555
1556 // Parent has finished initialisation
1557 DEBUG(jail->pakfire, "Parent has finished initialization\n");
1558
1559 // Send signal to client
1560 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1561 if (r)
1562 return r;
1563
1564 return 0;
1565 }
1566
1567 static int pakfire_jail_switch_root(struct pakfire_jail* jail, const char* root) {
1568 int r;
1569
1570 // Change to the new root
1571 r = chdir(root);
1572 if (r) {
1573 ERROR(jail->pakfire, "chdir(%s) failed: %m\n", root);
1574 return r;
1575 }
1576
1577 // Switch Root!
1578 r = pivot_root(".", ".");
1579 if (r) {
1580 ERROR(jail->pakfire, "Failed changing into the new root directory %s: %m\n", root);
1581 return r;
1582 }
1583
1584 // Umount the old root
1585 r = umount2(".", MNT_DETACH);
1586 if (r) {
1587 ERROR(jail->pakfire, "Could not umount the old root filesystem: %m\n");
1588 return r;
1589 }
1590
1591 return 0;
1592 }
1593
1594 #if 0
1595 static int pakfire_jail_open_pty(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1596 int r;
1597
1598 // Allocate a new PTY
1599 ctx->consolefd = posix_openpt(O_RDWR|O_NONBLOCK|O_NOCTTY|O_CLOEXEC);
1600 if (ctx->consolefd < 0)
1601 return -errno;
1602
1603 // Fetch the path
1604 r = ptsname_r(ctx->consolefd, ctx->console, sizeof(ctx->console));
1605 if (r)
1606 return -r;
1607
1608 CTX_DEBUG(jail->ctx, "Allocated console at %s (%d)\n", ctx->console, ctx->consolefd);
1609
1610 // Create a symlink
1611 r = pakfire_symlink(jail->ctx, "/dev/console", ctx->console);
1612 if (r)
1613 return r;
1614
1615 return r;
1616 }
1617 #endif
1618
1619 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
1620 const char* argv[]) {
1621 int r;
1622
1623 // Redirect any logging to our log pipe
1624 pakfire_ctx_set_log_callback(jail->ctx, pakfire_jail_log_redirect, &ctx->pipes);
1625
1626 // Fetch my own PID
1627 pid_t pid = getpid();
1628
1629 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
1630
1631 // Wait for the parent to finish initialization
1632 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1633 if (r)
1634 return r;
1635
1636 // Die with parent
1637 r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
1638 if (r) {
1639 ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
1640 return 126;
1641 }
1642
1643 // Make this process dumpable
1644 r = prctl (PR_SET_DUMPABLE, 1, 0, 0, 0);
1645 if (r) {
1646 ERROR(jail->pakfire, "Could not make the process dumpable: %m\n");
1647 return 126;
1648 }
1649
1650 // Don't drop any capabilities on setuid()
1651 r = prctl(PR_SET_KEEPCAPS, 1);
1652 if (r) {
1653 ERROR(jail->pakfire, "Could not set PR_SET_KEEPCAPS: %m\n");
1654 return 126;
1655 }
1656
1657 // Fetch UID/GID
1658 uid_t uid = getuid();
1659 gid_t gid = getgid();
1660
1661 // Fetch EUID/EGID
1662 uid_t euid = geteuid();
1663 gid_t egid = getegid();
1664
1665 DEBUG(jail->pakfire, " UID: %u (effective %u)\n", uid, euid);
1666 DEBUG(jail->pakfire, " GID: %u (effective %u)\n", gid, egid);
1667
1668 // Log all mountpoints
1669 pakfire_mount_list(jail->ctx);
1670
1671 // Fail if we are not PID 1
1672 if (pid != 1) {
1673 CTX_ERROR(jail->ctx, "Child process is not PID 1\n");
1674 return 126;
1675 }
1676
1677 // Fail if we are not running as root
1678 if (uid || gid || euid || egid) {
1679 ERROR(jail->pakfire, "Child process is not running as root\n");
1680 return 126;
1681 }
1682
1683 // Mount all default stuff
1684 r = pakfire_mount_all(jail->pakfire, PAKFIRE_MNTNS_INNER, 0);
1685 if (r)
1686 return 126;
1687
1688 #if 0
1689 // Create a new session
1690 r = setsid();
1691 if (r < 0) {
1692 CTX_ERROR(jail->ctx, "Could not create a new session: %s\n", strerror(errno));
1693 return 126;
1694 }
1695
1696 // Allocate a new PTY
1697 r = pakfire_jail_open_pty(jail, ctx);
1698 if (r) {
1699 CTX_ERROR(jail->ctx, "Could not allocate a new PTY: %s\n", strerror(-r));
1700 return 126;
1701 }
1702 #endif
1703
1704 const char* root = pakfire_get_path(jail->pakfire);
1705 const char* arch = pakfire_get_effective_arch(jail->pakfire);
1706
1707 // Change mount propagation to slave to receive anything from the parent namespace
1708 r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SLAVE);
1709 if (r)
1710 return r;
1711
1712 // Make root a mountpoint in the new mount namespace
1713 r = pakfire_mount_make_mounpoint(jail->pakfire, root);
1714 if (r)
1715 return r;
1716
1717 // Change mount propagation to private
1718 r = pakfire_mount_change_propagation(jail->ctx, root, MS_PRIVATE);
1719 if (r)
1720 return r;
1721
1722 // Change root (unless root is /)
1723 if (!pakfire_on_root(jail->pakfire)) {
1724 // Mount everything
1725 r = pakfire_jail_mount(jail, ctx);
1726 if (r)
1727 return r;
1728
1729 // chroot()
1730 r = pakfire_jail_switch_root(jail, root);
1731 if (r)
1732 return r;
1733 }
1734
1735 // Set personality
1736 unsigned long persona = pakfire_arch_personality(arch);
1737 if (persona) {
1738 r = personality(persona);
1739 if (r < 0) {
1740 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1741 return 1;
1742 }
1743 }
1744
1745 // Setup networking
1746 if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1747 r = pakfire_jail_setup_loopback(jail);
1748 if (r)
1749 return 1;
1750 }
1751
1752 // Set nice level
1753 if (jail->nice) {
1754 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1755
1756 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1757 if (r) {
1758 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1759 return 1;
1760 }
1761 }
1762
1763 // Close other end of log pipes
1764 close(ctx->pipes.log_INFO[0]);
1765 close(ctx->pipes.log_ERROR[0]);
1766 #ifdef ENABLE_DEBUG
1767 close(ctx->pipes.log_DEBUG[0]);
1768 #endif /* ENABLE_DEBUG */
1769
1770 // Connect standard input
1771 if (ctx->pipes.stdin[0] >= 0) {
1772 r = dup2(ctx->pipes.stdin[0], STDIN_FILENO);
1773 if (r < 0) {
1774 ERROR(jail->pakfire, "Could not connect fd %d to stdin: %m\n",
1775 ctx->pipes.stdin[0]);
1776
1777 return 1;
1778 }
1779 }
1780
1781 // Connect standard output and error
1782 if (ctx->pipes.stdout[1] >= 0 && ctx->pipes.stderr[1] >= 0) {
1783 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1784 if (r < 0) {
1785 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1786 ctx->pipes.stdout[1]);
1787
1788 return 1;
1789 }
1790
1791 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1792 if (r < 0) {
1793 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1794 ctx->pipes.stderr[1]);
1795
1796 return 1;
1797 }
1798
1799 // Close the pipe (as we have moved the original file descriptors)
1800 pakfire_jail_close_pipe(jail, ctx->pipes.stdin);
1801 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1802 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
1803 }
1804
1805 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1806 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1807 if (r)
1808 return r;
1809
1810 // Set capabilities
1811 r = pakfire_jail_set_capabilities(jail);
1812 if (r)
1813 return r;
1814
1815 // Show capabilities
1816 r = pakfire_jail_show_capabilities(jail);
1817 if (r)
1818 return r;
1819
1820 // Filter syscalls
1821 r = pakfire_jail_limit_syscalls(jail);
1822 if (r)
1823 return r;
1824
1825 DEBUG(jail->pakfire, "Child process initialization done\n");
1826 DEBUG(jail->pakfire, "Launching command:\n");
1827
1828 // Log argv
1829 for (unsigned int i = 0; argv[i]; i++)
1830 DEBUG(jail->pakfire, " argv[%u] = %s\n", i, argv[i]);
1831
1832 // exec() command
1833 r = execvpe(argv[0], (char**)argv, jail->env);
1834 if (r < 0) {
1835 // Translate errno into regular exit code
1836 switch (errno) {
1837 case ENOENT:
1838 // Ignore if the command doesn't exist
1839 if (ctx->flags & PAKFIRE_JAIL_NOENT_OK)
1840 r = 0;
1841 else
1842 r = 127;
1843
1844 break;
1845
1846 default:
1847 r = 1;
1848 }
1849
1850 ERROR(jail->pakfire, "Could not execve(%s): %m\n", argv[0]);
1851 }
1852
1853 // We should not get here
1854 return r;
1855 }
1856
1857 // Run a command in the jail
1858 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
1859 const int interactive,
1860 pakfire_jail_communicate_in communicate_in,
1861 pakfire_jail_communicate_out communicate_out,
1862 void* data, int flags) {
1863 int exit = -1;
1864 int r;
1865
1866 // Check if argv is valid
1867 if (!argv || !argv[0]) {
1868 errno = EINVAL;
1869 return -1;
1870 }
1871
1872 // Initialize context for this call
1873 struct pakfire_jail_exec ctx = {
1874 .flags = flags,
1875
1876 .pipes = {
1877 .stdin = { -1, -1 },
1878 .stdout = { -1, -1 },
1879 .stderr = { -1, -1 },
1880 .log_INFO = { -1, -1 },
1881 .log_ERROR = { -1, -1 },
1882 #ifdef ENABLE_DEBUG
1883 .log_DEBUG = { -1, -1 },
1884 #endif /* ENABLE_DEBUG */
1885 },
1886
1887 .communicate = {
1888 .in = communicate_in,
1889 .out = communicate_out,
1890 .data = data,
1891 },
1892
1893 .pidfd = -1,
1894 };
1895
1896 DEBUG(jail->pakfire, "Executing jail...\n");
1897
1898 // Enable networking in interactive mode
1899 if (interactive)
1900 ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
1901
1902 /*
1903 Setup a file descriptor which can be used to notify the client that the parent
1904 has completed configuration.
1905 */
1906 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1907 if (ctx.completed_fd < 0) {
1908 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1909 return -1;
1910 }
1911
1912 // Create pipes to communicate with child process if we are not running interactively
1913 if (!interactive) {
1914 // stdin (only if callback is set)
1915 if (ctx.communicate.in) {
1916 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdin, 0);
1917 if (r)
1918 goto ERROR;
1919 }
1920
1921 // stdout
1922 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
1923 if (r)
1924 goto ERROR;
1925
1926 // stderr
1927 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
1928 if (r)
1929 goto ERROR;
1930 }
1931
1932 // Setup pipes for logging
1933 // INFO
1934 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
1935 if (r)
1936 goto ERROR;
1937
1938 // ERROR
1939 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
1940 if (r)
1941 goto ERROR;
1942
1943 #ifdef ENABLE_DEBUG
1944 // DEBUG
1945 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
1946 if (r)
1947 goto ERROR;
1948 #endif /* ENABLE_DEBUG */
1949
1950 // Configure child process
1951 struct clone_args args = {
1952 .flags =
1953 CLONE_NEWCGROUP |
1954 CLONE_NEWIPC |
1955 CLONE_NEWNS |
1956 CLONE_NEWPID |
1957 CLONE_NEWTIME |
1958 CLONE_NEWUSER |
1959 CLONE_NEWUTS |
1960 CLONE_PIDFD,
1961 .exit_signal = SIGCHLD,
1962 .pidfd = (long long unsigned int)&ctx.pidfd,
1963 };
1964
1965 // Launch the process in a cgroup that is a leaf of the configured cgroup
1966 if (jail->cgroup) {
1967 args.flags |= CLONE_INTO_CGROUP;
1968
1969 // Fetch our UUID
1970 const char* uuid = pakfire_jail_uuid(jail);
1971
1972 // Create a temporary cgroup
1973 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, uuid, 0);
1974 if (r) {
1975 ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
1976 goto ERROR;
1977 }
1978
1979 // Clone into this cgroup
1980 args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
1981 }
1982
1983 // Setup networking
1984 if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1985 args.flags |= CLONE_NEWNET;
1986 }
1987
1988 // Fork this process
1989 ctx.pid = clone3(&args, sizeof(args));
1990 if (ctx.pid < 0) {
1991 ERROR(jail->pakfire, "Could not clone: %m\n");
1992 return -1;
1993
1994 // Child process
1995 } else if (ctx.pid == 0) {
1996 r = pakfire_jail_child(jail, &ctx, argv);
1997 _exit(r);
1998 }
1999
2000 // Parent process
2001 r = pakfire_jail_parent(jail, &ctx);
2002 if (r)
2003 goto ERROR;
2004
2005 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
2006
2007 // Read output of the child process
2008 r = pakfire_jail_wait(jail, &ctx);
2009 if (r)
2010 goto ERROR;
2011
2012 // Handle exit status
2013 switch (ctx.status.si_code) {
2014 case CLD_EXITED:
2015 DEBUG(jail->pakfire, "The child process exited with code %d\n",
2016 ctx.status.si_status);
2017
2018 // Pass exit code
2019 exit = ctx.status.si_status;
2020 break;
2021
2022 case CLD_KILLED:
2023 ERROR(jail->pakfire, "The child process was killed\n");
2024 exit = 139;
2025 break;
2026
2027 case CLD_DUMPED:
2028 ERROR(jail->pakfire, "The child process terminated abnormally\n");
2029 break;
2030
2031 // Log anything else
2032 default:
2033 ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
2034 break;
2035 }
2036
2037 ERROR:
2038 // Destroy the temporary cgroup (if any)
2039 if (ctx.cgroup) {
2040 // Read cgroup stats
2041 pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
2042 pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
2043 pakfire_cgroup_destroy(ctx.cgroup);
2044 pakfire_cgroup_unref(ctx.cgroup);
2045 }
2046
2047 // Close any file descriptors
2048 pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
2049 pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
2050 pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
2051 if (ctx.pidfd >= 0)
2052 close(ctx.pidfd);
2053 pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
2054 pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
2055 #ifdef ENABLE_DEBUG
2056 pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
2057 #endif /* ENABLE_DEBUG */
2058
2059 return exit;
2060 }
2061
2062 PAKFIRE_EXPORT int pakfire_jail_exec(
2063 struct pakfire_jail* jail,
2064 const char* argv[],
2065 pakfire_jail_communicate_in callback_in,
2066 pakfire_jail_communicate_out callback_out,
2067 void* data, int flags) {
2068 return __pakfire_jail_exec(jail, argv, 0, callback_in, callback_out, data, flags);
2069 }
2070
2071 static int pakfire_jail_exec_interactive(
2072 struct pakfire_jail* jail, const char* argv[], int flags) {
2073 int r;
2074
2075 // Setup interactive stuff
2076 r = pakfire_jail_setup_interactive_env(jail);
2077 if (r)
2078 return r;
2079
2080 return __pakfire_jail_exec(jail, argv, 1, NULL, NULL, NULL, flags);
2081 }
2082
2083 int pakfire_jail_exec_script(struct pakfire_jail* jail,
2084 const char* script,
2085 const size_t size,
2086 const char* args[],
2087 pakfire_jail_communicate_in callback_in,
2088 pakfire_jail_communicate_out callback_out,
2089 void* data) {
2090 char path[PATH_MAX];
2091 const char** argv = NULL;
2092 FILE* f = NULL;
2093 int r;
2094
2095 const char* root = pakfire_get_path(jail->pakfire);
2096
2097 // Write the scriptlet to disk
2098 r = pakfire_path_append(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
2099 if (r)
2100 goto ERROR;
2101
2102 // Create a temporary file
2103 f = pakfire_mktemp(path, 0700);
2104 if (!f) {
2105 ERROR(jail->pakfire, "Could not create temporary file: %m\n");
2106 goto ERROR;
2107 }
2108
2109 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
2110
2111 // Write data
2112 r = fprintf(f, "%s", script);
2113 if (r < 0) {
2114 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
2115 goto ERROR;
2116 }
2117
2118 // Close file
2119 r = fclose(f);
2120 if (r) {
2121 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
2122 goto ERROR;
2123 }
2124
2125 f = NULL;
2126
2127 // Count how many arguments were passed
2128 unsigned int argc = 1;
2129 if (args) {
2130 for (const char** arg = args; *arg; arg++)
2131 argc++;
2132 }
2133
2134 argv = calloc(argc + 1, sizeof(*argv));
2135 if (!argv) {
2136 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
2137 goto ERROR;
2138 }
2139
2140 // Set command
2141 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
2142
2143 // Copy args
2144 for (unsigned int i = 1; i < argc; i++)
2145 argv[i] = args[i-1];
2146
2147 // Run the script
2148 r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data, 0);
2149
2150 ERROR:
2151 if (argv)
2152 free(argv);
2153 if (f)
2154 fclose(f);
2155
2156 // Remove script from disk
2157 if (*path)
2158 unlink(path);
2159
2160 return r;
2161 }
2162
2163 /*
2164 A convenience function that creates a new jail, runs the given command and destroys
2165 the jail again.
2166 */
2167 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
2168 struct pakfire_jail* jail = NULL;
2169 int r;
2170
2171 // Create a new jail
2172 r = pakfire_jail_create(&jail, pakfire);
2173 if (r)
2174 goto ERROR;
2175
2176 // Execute the command
2177 r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output, 0);
2178
2179 ERROR:
2180 if (jail)
2181 pakfire_jail_unref(jail);
2182
2183 return r;
2184 }
2185
2186 int pakfire_jail_run_script(struct pakfire* pakfire,
2187 const char* script, const size_t length, const char* argv[], int flags) {
2188 struct pakfire_jail* jail = NULL;
2189 int r;
2190
2191 // Create a new jail
2192 r = pakfire_jail_create(&jail, pakfire);
2193 if (r)
2194 goto ERROR;
2195
2196 // Execute the command
2197 r = pakfire_jail_exec_script(jail, script, length, argv, NULL, NULL, NULL);
2198
2199 ERROR:
2200 if (jail)
2201 pakfire_jail_unref(jail);
2202
2203 return r;
2204 }
2205
2206 int pakfire_jail_shell(struct pakfire_jail* jail) {
2207 int r;
2208
2209 const char* argv[] = {
2210 "/bin/bash", "--login", NULL,
2211 };
2212
2213 // Execute /bin/bash
2214 r = pakfire_jail_exec_interactive(jail, argv, 0);
2215
2216 // Raise any errors
2217 if (r < 0)
2218 return r;
2219
2220 // Ignore any return codes from the shell
2221 return 0;
2222 }
2223
2224 static int pakfire_jail_run_if_possible(struct pakfire* pakfire, const char** argv) {
2225 char path[PATH_MAX];
2226 int r;
2227
2228 r = pakfire_path(pakfire, path, "%s", *argv);
2229 if (r)
2230 return r;
2231
2232 // Check if the file is executable
2233 r = access(path, X_OK);
2234 if (r) {
2235 DEBUG(pakfire, "%s is not executable. Skipping...\n", *argv);
2236 return 0;
2237 }
2238
2239 return pakfire_jail_run(pakfire, argv, 0, NULL);
2240 }
2241
2242 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
2243 const char* argv[] = {
2244 "/sbin/ldconfig",
2245 NULL,
2246 };
2247
2248 return pakfire_jail_run_if_possible(pakfire, argv);
2249 }
2250
2251 int pakfire_jail_run_systemd_tmpfiles(struct pakfire* pakfire) {
2252 const char* argv[] = {
2253 "/usr/bin/systemd-tmpfiles",
2254 "--create",
2255 NULL,
2256 };
2257
2258 return pakfire_jail_run_if_possible(pakfire, argv);
2259 }