]> git.ipfire.org Git - people/ms/pakfire.git/blob - src/libpakfire/jail.c
5cc1dd753d366f1e62688e3705ad74073f375389
[people/ms/pakfire.git] / src / libpakfire / jail.c
1 /*#############################################################################
2 # #
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
5 # #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
10 # #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
15 # #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
18 # #
19 #############################################################################*/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <linux/capability.h>
24 #include <linux/sched.h>
25 #include <sys/wait.h>
26 #include <linux/wait.h>
27 #include <sched.h>
28 #include <signal.h>
29 #include <stdlib.h>
30 #include <syscall.h>
31 #include <sys/capability.h>
32 #include <sys/epoll.h>
33 #include <sys/eventfd.h>
34 #include <sys/mount.h>
35 #include <sys/personality.h>
36 #include <sys/prctl.h>
37 #include <sys/resource.h>
38 #include <sys/timerfd.h>
39 #include <sys/types.h>
40 #include <sys/wait.h>
41
42 // libnl3
43 #include <net/if.h>
44 #include <netlink/route/link.h>
45
46 // libseccomp
47 #include <seccomp.h>
48
49 // libuuid
50 #include <uuid.h>
51
52 #include <pakfire/arch.h>
53 #include <pakfire/cgroup.h>
54 #include <pakfire/jail.h>
55 #include <pakfire/logging.h>
56 #include <pakfire/mount.h>
57 #include <pakfire/pakfire.h>
58 #include <pakfire/path.h>
59 #include <pakfire/private.h>
60 #include <pakfire/pwd.h>
61 #include <pakfire/string.h>
62 #include <pakfire/util.h>
63
64 #define BUFFER_SIZE 1024 * 64
65 #define ENVIRON_SIZE 128
66 #define EPOLL_MAX_EVENTS 2
67 #define MAX_MOUNTPOINTS 8
68
69 // The default environment that will be set for every command
70 static const struct environ {
71 const char* key;
72 const char* val;
73 } ENV[] = {
74 { "HOME", "/root" },
75 { "LANG", "C.utf-8" },
76 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
77 { "TERM", "vt100" },
78
79 // Tell everything that it is running inside a Pakfire container
80 { "container", "pakfire" },
81 { NULL, NULL },
82 };
83
84 struct pakfire_jail_mountpoint {
85 char source[PATH_MAX];
86 char target[PATH_MAX];
87 int flags;
88 };
89
90 struct pakfire_jail {
91 struct pakfire_ctx* ctx;
92 struct pakfire* pakfire;
93 int nrefs;
94
95 // A unique ID for each jail
96 uuid_t uuid;
97 char __uuid[UUID_STR_LEN];
98
99 // Resource Limits
100 int nice;
101
102 // Timeout
103 struct itimerspec timeout;
104
105 // CGroup
106 struct pakfire_cgroup* cgroup;
107
108 // Environment
109 char* env[ENVIRON_SIZE];
110
111 // Mountpoints
112 struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
113 unsigned int num_mountpoints;
114
115 // Callbacks
116 struct pakfire_jail_callbacks {
117 // Log
118 pakfire_jail_log_callback log;
119 void* log_data;
120 } callbacks;
121 };
122
123 struct pakfire_log_buffer {
124 char data[BUFFER_SIZE];
125 size_t used;
126 };
127
128 struct pakfire_jail_exec {
129 int flags;
130
131 // PID (of the child)
132 pid_t pid;
133 int pidfd;
134
135 // Process status (from waitid)
136 siginfo_t status;
137
138 // FD to notify the client that the parent has finished initialization
139 int completed_fd;
140
141 // Log pipes
142 struct pakfire_jail_pipes {
143 int stdin[2];
144 int stdout[2];
145 int stderr[2];
146
147 // Logging
148 int log_INFO[2];
149 int log_ERROR[2];
150 #ifdef ENABLE_DEBUG
151 int log_DEBUG[2];
152 #endif /* ENABLE_DEBUG */
153 } pipes;
154
155 // Communicate
156 struct pakfire_jail_communicate {
157 pakfire_jail_communicate_in in;
158 pakfire_jail_communicate_out out;
159 void* data;
160 } communicate;
161
162 // Log buffers
163 struct pakfire_jail_buffers {
164 struct pakfire_log_buffer stdout;
165 struct pakfire_log_buffer stderr;
166
167 // Logging
168 struct pakfire_log_buffer log_INFO;
169 struct pakfire_log_buffer log_ERROR;
170 #ifdef ENABLE_DEBUG
171 struct pakfire_log_buffer log_DEBUG;
172 #endif /* ENABLE_DEBUG */
173 } buffers;
174
175 struct pakfire_cgroup* cgroup;
176 struct pakfire_cgroup_stats cgroup_stats;
177
178 // Console
179 char console[PATH_MAX];
180 int consolefd;
181 };
182
183 static int clone3(struct clone_args* args, size_t size) {
184 return syscall(__NR_clone3, args, size);
185 }
186
187 static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
188 return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
189 }
190
191 static int pivot_root(const char* new_root, const char* old_root) {
192 return syscall(SYS_pivot_root, new_root, old_root);
193 }
194
195 static int pakfire_jail_exec_has_flag(
196 const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
197 return ctx->flags & flag;
198 }
199
200 static void pakfire_jail_free(struct pakfire_jail* jail) {
201 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
202
203 // Free environment
204 for (unsigned int i = 0; jail->env[i]; i++)
205 free(jail->env[i]);
206
207 if (jail->cgroup)
208 pakfire_cgroup_unref(jail->cgroup);
209 if (jail->pakfire)
210 pakfire_unref(jail->pakfire);
211 if (jail->ctx)
212 pakfire_ctx_unref(jail->ctx);
213 free(jail);
214 }
215
216 /*
217 Passes any log messages on to the default pakfire log callback
218 */
219 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
220 int priority, const char* line, size_t length) {
221 switch (priority) {
222 case LOG_INFO:
223 INFO(pakfire, "%s", line);
224 break;
225
226 case LOG_ERR:
227 ERROR(pakfire, "%s", line);
228 break;
229
230 #ifdef ENABLE_DEBUG
231 case LOG_DEBUG:
232 DEBUG(pakfire, "%s", line);
233 break;
234 #endif
235 }
236
237 return 0;
238 }
239
240 static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
241 if (!*jail->__uuid)
242 uuid_unparse_lower(jail->uuid, jail->__uuid);
243
244 return jail->__uuid;
245 }
246
247 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
248 // Set PS1
249 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
250 if (r)
251 return r;
252
253 // Copy TERM
254 char* TERM = secure_getenv("TERM");
255 if (TERM) {
256 r = pakfire_jail_set_env(jail, "TERM", TERM);
257 if (r)
258 return r;
259 }
260
261 // Copy LANG
262 char* LANG = secure_getenv("LANG");
263 if (LANG) {
264 r = pakfire_jail_set_env(jail, "LANG", LANG);
265 if (r)
266 return r;
267 }
268
269 return 0;
270 }
271
272 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail, struct pakfire* pakfire) {
273 int r;
274
275 const char* arch = pakfire_get_effective_arch(pakfire);
276
277 // Allocate a new jail
278 struct pakfire_jail* j = calloc(1, sizeof(*j));
279 if (!j)
280 return 1;
281
282 // Reference context
283 j->ctx = pakfire_ctx(pakfire);
284
285 // Reference Pakfire
286 j->pakfire = pakfire_ref(pakfire);
287
288 // Initialize reference counter
289 j->nrefs = 1;
290
291 // Generate a random UUID
292 uuid_generate_random(j->uuid);
293
294 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
295
296 // Set the default logging callback
297 pakfire_jail_set_log_callback(j, pakfire_jail_default_log_callback, NULL);
298
299 // Set default environment
300 for (const struct environ* e = ENV; e->key; e++) {
301 r = pakfire_jail_set_env(j, e->key, e->val);
302 if (r)
303 goto ERROR;
304 }
305
306 // Enable all CPU features that CPU has to offer
307 if (!pakfire_arch_is_supported_by_host(arch)) {
308 r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
309 if (r)
310 goto ERROR;
311 }
312
313 // Set container UUID
314 r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
315 if (r)
316 goto ERROR;
317
318 // Disable systemctl to talk to systemd
319 if (!pakfire_on_root(j->pakfire)) {
320 r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
321 if (r)
322 goto ERROR;
323 }
324
325 // Done
326 *jail = j;
327 return 0;
328
329 ERROR:
330 pakfire_jail_free(j);
331
332 return r;
333 }
334
335 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
336 ++jail->nrefs;
337
338 return jail;
339 }
340
341 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
342 if (--jail->nrefs > 0)
343 return jail;
344
345 pakfire_jail_free(jail);
346 return NULL;
347 }
348
349 // Logging Callback
350
351 PAKFIRE_EXPORT void pakfire_jail_set_log_callback(struct pakfire_jail* jail,
352 pakfire_jail_log_callback callback, void* data) {
353 jail->callbacks.log = callback;
354 jail->callbacks.log_data = data;
355 }
356
357 // Resource Limits
358
359 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
360 // Check if nice level is in range
361 if (nice < -19 || nice > 20) {
362 errno = EINVAL;
363 return 1;
364 }
365
366 // Store nice level
367 jail->nice = nice;
368
369 return 0;
370 }
371
372 int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
373 // Free any previous cgroup
374 if (jail->cgroup) {
375 pakfire_cgroup_unref(jail->cgroup);
376 jail->cgroup = NULL;
377 }
378
379 // Set any new cgroup
380 if (cgroup) {
381 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
382
383 jail->cgroup = pakfire_cgroup_ref(cgroup);
384 }
385
386 // Done
387 return 0;
388 }
389
390 // Environment
391
392 // Returns the length of the environment
393 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
394 unsigned int i = 0;
395
396 // Count everything in the environment
397 for (char** e = jail->env; *e; e++)
398 i++;
399
400 return i;
401 }
402
403 // Finds an existing environment variable and returns its index or -1 if not found
404 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
405 if (!key) {
406 errno = EINVAL;
407 return -1;
408 }
409
410 const size_t length = strlen(key);
411
412 for (unsigned int i = 0; jail->env[i]; i++) {
413 if ((pakfire_string_startswith(jail->env[i], key)
414 && *(jail->env[i] + length) == '=')) {
415 return i;
416 }
417 }
418
419 // Nothing found
420 return -1;
421 }
422
423 // Returns the value of an environment variable or NULL
424 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
425 const char* key) {
426 int i = pakfire_jail_find_env(jail, key);
427 if (i < 0)
428 return NULL;
429
430 return jail->env[i] + strlen(key) + 1;
431 }
432
433 // Sets an environment variable
434 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
435 const char* key, const char* value) {
436 // Find the index where to write this value to
437 int i = pakfire_jail_find_env(jail, key);
438 if (i < 0)
439 i = pakfire_jail_env_length(jail);
440
441 // Return -ENOSPC when the environment is full
442 if (i >= ENVIRON_SIZE) {
443 errno = ENOSPC;
444 return -1;
445 }
446
447 // Free any previous value
448 if (jail->env[i])
449 free(jail->env[i]);
450
451 // Format and set environment variable
452 asprintf(&jail->env[i], "%s=%s", key, value);
453
454 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
455
456 return 0;
457 }
458
459 // Imports an environment
460 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
461 if (!env)
462 return 0;
463
464 char* key;
465 char* val;
466 int r;
467
468 // Copy environment variables
469 for (unsigned int i = 0; env[i]; i++) {
470 r = pakfire_string_partition(env[i], "=", &key, &val);
471 if (r)
472 continue;
473
474 // Set value
475 r = pakfire_jail_set_env(jail, key, val);
476
477 if (key)
478 free(key);
479 if (val)
480 free(val);
481
482 // Break on error
483 if (r)
484 return r;
485 }
486
487 return 0;
488 }
489
490 // Timeout
491
492 PAKFIRE_EXPORT int pakfire_jail_set_timeout(
493 struct pakfire_jail* jail, unsigned int timeout) {
494 // Store value
495 jail->timeout.it_value.tv_sec = timeout;
496
497 if (timeout > 0)
498 DEBUG(jail->pakfire, "Timeout set to %u second(s)\n", timeout);
499 else
500 DEBUG(jail->pakfire, "Timeout disabled\n");
501
502 return 0;
503 }
504
505 static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
506 int r;
507
508 // Nothing to do if no timeout has been set
509 if (!jail->timeout.it_value.tv_sec)
510 return -1;
511
512 // Create a new timer
513 const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
514 if (fd < 0) {
515 ERROR(jail->pakfire, "Could not create timer: %m\n");
516 goto ERROR;
517 }
518
519 // Arm timer
520 r = timerfd_settime(fd, 0, &jail->timeout, NULL);
521 if (r) {
522 ERROR(jail->pakfire, "Could not arm timer: %m\n");
523 goto ERROR;
524 }
525
526 return fd;
527
528 ERROR:
529 if (fd >= 0)
530 close(fd);
531
532 return -1;
533 }
534
535 /*
536 This function replaces any logging in the child process.
537
538 All log messages will be sent to the parent process through their respective pipes.
539 */
540 static void pakfire_jail_log_redirect(void* data, int priority, const char* file,
541 int line, const char* fn, const char* format, va_list args) {
542 struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
543 int fd;
544
545 switch (priority) {
546 case LOG_INFO:
547 fd = pipes->log_INFO[1];
548 break;
549
550 case LOG_ERR:
551 fd = pipes->log_ERROR[1];
552 break;
553
554 #ifdef ENABLE_DEBUG
555 case LOG_DEBUG:
556 fd = pipes->log_DEBUG[1];
557 break;
558 #endif /* ENABLE_DEBUG */
559
560 // Ignore any messages of an unknown priority
561 default:
562 return;
563 }
564
565 // Send the log message
566 if (fd >= 0)
567 vdprintf(fd, format, args);
568 }
569
570 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
571 return (sizeof(buffer->data) == buffer->used);
572 }
573
574 /*
575 This function reads as much data as it can from the file descriptor.
576 If it finds a whole line in it, it will send it to the logger and repeat the process.
577 If not newline character is found, it will try to read more data until it finds one.
578 */
579 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
580 struct pakfire_jail_exec* ctx, int priority, int fd,
581 struct pakfire_log_buffer* buffer, pakfire_jail_communicate_out callback, void* data) {
582 char line[BUFFER_SIZE + 1];
583
584 // Fill up buffer from fd
585 if (buffer->used < sizeof(buffer->data)) {
586 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
587 sizeof(buffer->data) - buffer->used);
588
589 // Handle errors
590 if (bytes_read < 0) {
591 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
592 return -1;
593 }
594
595 // Update buffer size
596 buffer->used += bytes_read;
597 }
598
599 // See if we have any lines that we can write
600 while (buffer->used) {
601 // Search for the end of the first line
602 char* eol = memchr(buffer->data, '\n', buffer->used);
603
604 // No newline found
605 if (!eol) {
606 // If the buffer is full, we send the content to the logger and try again
607 // This should not happen in practise
608 if (pakfire_jail_log_buffer_is_full(buffer)) {
609 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
610
611 eol = buffer->data + sizeof(buffer->data) - 1;
612
613 // Otherwise we might have only read parts of the output
614 } else
615 break;
616 }
617
618 // Find the length of the string
619 size_t length = eol - buffer->data + 1;
620
621 // Copy the line into the buffer
622 memcpy(line, buffer->data, length);
623
624 // Terminate the string
625 line[length] = '\0';
626
627 // Log the line
628 if (callback) {
629 int r = callback(jail->pakfire, data, priority, line, length);
630 if (r) {
631 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
632 return r;
633 }
634 }
635
636 // Remove line from buffer
637 memmove(buffer->data, buffer->data + length, buffer->used - length);
638 buffer->used -= length;
639 }
640
641 return 0;
642 }
643
644 static int pakfire_jail_stream_stdin(struct pakfire_jail* jail,
645 struct pakfire_jail_exec* ctx, const int fd) {
646 int r;
647
648 // Nothing to do if there is no stdin callback set
649 if (!ctx->communicate.in) {
650 DEBUG(jail->pakfire, "Callback for standard input is not set\n");
651 return 0;
652 }
653
654 // Skip if the writing pipe has already been closed
655 if (!ctx->pipes.stdin[1])
656 return 0;
657
658 DEBUG(jail->pakfire, "Streaming standard input...\n");
659
660 // Calling the callback
661 r = ctx->communicate.in(jail->pakfire, ctx->communicate.data, fd);
662
663 DEBUG(jail->pakfire, "Standard input callback finished: %d\n", r);
664
665 // The callback signaled that it has written everything
666 if (r == EOF) {
667 DEBUG(jail->pakfire, "Closing standard input pipe\n");
668
669 // Close the file-descriptor
670 close(fd);
671
672 // Reset the file-descriptor so it won't be closed again later
673 ctx->pipes.stdin[1] = -1;
674
675 // Report success
676 r = 0;
677 }
678
679 return r;
680 }
681
682 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
683 int r = pipe2(*fds, flags);
684 if (r < 0) {
685 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
686 return 1;
687 }
688
689 return 0;
690 }
691
692 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
693 for (unsigned int i = 0; i < 2; i++)
694 if (fds[i] >= 0)
695 close(fds[i]);
696 }
697
698 /*
699 This is a convenience function to fetch the reading end of a pipe and
700 closes the write end.
701 */
702 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail* jail, int (*fds)[2]) {
703 // Give the variables easier names to avoid confusion
704 int* fd_read = &(*fds)[0];
705 int* fd_write = &(*fds)[1];
706
707 // Close the write end of the pipe
708 if (*fd_write >= 0) {
709 close(*fd_write);
710 *fd_write = -1;
711 }
712
713 // Return the read end
714 if (*fd_read >= 0)
715 return *fd_read;
716
717 return -1;
718 }
719
720 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
721 // Give the variables easier names to avoid confusion
722 int* fd_read = &(*fds)[0];
723 int* fd_write = &(*fds)[1];
724
725 // Close the read end of the pipe
726 if (*fd_read >= 0) {
727 close(*fd_read);
728 *fd_read = -1;
729 }
730
731 // Return the write end
732 if (*fd_write >= 0)
733 return *fd_write;
734
735 return -1;
736 }
737
738 static int pakfire_jail_log(struct pakfire* pakfire, void* data, int priority,
739 const char* line, const size_t length) {
740 // Pass everything to the parent logger
741 pakfire_log_condition(pakfire, priority, 0, "%.*s", (int)length, line);
742
743 return 0;
744 }
745
746 static int pakfire_jail_epoll_add_fd(struct pakfire_jail* jail, int epollfd, int fd, int events) {
747 struct epoll_event event = {
748 .events = events|EPOLLHUP,
749 .data = {
750 .fd = fd,
751 },
752 };
753 int r;
754
755 // Read flags
756 int flags = fcntl(fd, F_GETFL, 0);
757
758 // Set modified flags
759 r = fcntl(fd, F_SETFL, flags|O_NONBLOCK);
760 if (r < 0) {
761 CTX_ERROR(jail->ctx, "Could not set file descriptor %d into non-blocking mode: %s\n",
762 fd, strerror(errno));
763 return -errno;
764 }
765
766 // Add the file descriptor to the loop
767 r = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
768 if (r < 0) {
769 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %s\n",
770 fd, strerror(errno));
771 return -errno;
772 }
773
774 return 0;
775 }
776
777 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
778 int epollfd = -1;
779 struct epoll_event events[EPOLL_MAX_EVENTS];
780 char garbage[8];
781 int r = 0;
782
783 // Fetch file descriptors from context
784 const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
785 const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
786 const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
787 const int pidfd = ctx->pidfd;
788
789 // Timer
790 const int timerfd = pakfire_jail_create_timer(jail);
791
792 // Logging
793 const int log_INFO = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
794 const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
795 #ifdef ENABLE_DEBUG
796 const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
797 #endif /* ENABLE_DEBUG */
798
799 // Make a list of all file descriptors we are interested in
800 const struct pakfire_wait_fds {
801 const int fd;
802 const int events;
803 } fds[] = {
804 // Standard input/output
805 { stdin, EPOLLOUT },
806 { stdout, EPOLLIN },
807 { stderr, EPOLLIN },
808
809 // Timer
810 { timerfd, EPOLLIN },
811
812 // Child Process
813 { ctx->pidfd, EPOLLIN },
814
815 // Log Pipes
816 { log_INFO, EPOLLIN },
817 { log_ERROR, EPOLLIN },
818 #ifdef ENABLE_DEBUG
819 { log_DEBUG, EPOLLIN },
820 #endif /* ENABLE_DEBUG */
821
822 // Sentinel
823 { -1, 0 },
824 };
825
826 // Setup epoll
827 epollfd = epoll_create1(0);
828 if (epollfd < 0) {
829 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
830 r = 1;
831 goto ERROR;
832 }
833
834 // Turn file descriptors into non-blocking mode and add them to epoll()
835 for (const struct pakfire_wait_fds* fd = fds; fd->events; fd++) {
836 // Skip fds which were not initialized
837 if (fd->fd < 0)
838 continue;
839
840 // Add the FD to the event loop
841 r = pakfire_jail_epoll_add_fd(jail, epollfd, fd->fd, fd->events);
842 if (r)
843 goto ERROR;
844 }
845
846 int ended = 0;
847
848 // Loop for as long as the process is alive
849 while (!ended) {
850 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
851 if (num < 1) {
852 // Ignore if epoll_wait() has been interrupted
853 if (errno == EINTR)
854 continue;
855
856 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
857 r = 1;
858
859 goto ERROR;
860 }
861
862 for (int i = 0; i < num; i++) {
863 int e = events[i].events;
864 int fd = events[i].data.fd;
865
866 struct pakfire_log_buffer* buffer = NULL;
867 pakfire_jail_communicate_out callback = NULL;
868 void* data = NULL;
869 int priority;
870
871 // Check if there is any data to be read
872 if (e & EPOLLIN) {
873 // Handle any changes to the PIDFD
874 if (fd == pidfd) {
875 // Call waidid() and store the result
876 r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
877 if (r) {
878 ERROR(jail->pakfire, "waitid() failed: %m\n");
879 goto ERROR;
880 }
881
882 // Mark that we have ended so that we will process the remaining
883 // events from epoll() now, but won't restart the outer loop.
884 ended = 1;
885 continue;
886
887 // Handle timer events
888 } else if (fd == timerfd) {
889 DEBUG(jail->pakfire, "Timer event received\n");
890
891 // Disarm the timer
892 r = read(timerfd, garbage, sizeof(garbage));
893 if (r < 1) {
894 ERROR(jail->pakfire, "Could not disarm timer: %m\n");
895 r = 1;
896 goto ERROR;
897 }
898
899 // Terminate the process if it hasn't already ended
900 if (!ended) {
901 DEBUG(jail->pakfire, "Terminating process...\n");
902
903 // Send SIGTERM to the process
904 r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
905 if (r) {
906 ERROR(jail->pakfire, "Could not kill process: %m\n");
907 goto ERROR;
908 }
909 }
910
911 // Don't fall through to log processing
912 continue;
913
914 // Handle logging messages
915 } else if (fd == log_INFO) {
916 buffer = &ctx->buffers.log_INFO;
917 priority = LOG_INFO;
918
919 callback = pakfire_jail_log;
920
921 } else if (fd == log_ERROR) {
922 buffer = &ctx->buffers.log_ERROR;
923 priority = LOG_ERR;
924
925 callback = pakfire_jail_log;
926
927 #ifdef ENABLE_DEBUG
928 } else if (fd == log_DEBUG) {
929 buffer = &ctx->buffers.log_DEBUG;
930 priority = LOG_DEBUG;
931
932 callback = pakfire_jail_log;
933 #endif /* ENABLE_DEBUG */
934
935 // Handle anything from the log pipes
936 } else if (fd == stdout) {
937 buffer = &ctx->buffers.stdout;
938 priority = LOG_INFO;
939
940 // Send any output to the default logger if no callback is set
941 if (ctx->communicate.out) {
942 callback = ctx->communicate.out;
943 data = ctx->communicate.data;
944 } else {
945 callback = jail->callbacks.log;
946 data = jail->callbacks.log_data;
947 }
948
949 } else if (fd == stderr) {
950 buffer = &ctx->buffers.stderr;
951 priority = LOG_ERR;
952
953 // Send any output to the default logger if no callback is set
954 if (ctx->communicate.out) {
955 callback = ctx->communicate.out;
956 data = ctx->communicate.data;
957 } else {
958 callback = jail->callbacks.log;
959 data = jail->callbacks.log_data;
960 }
961
962 } else {
963 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
964 continue;
965 }
966
967 // Handle log event
968 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
969 if (r)
970 goto ERROR;
971 }
972
973 if (e & EPOLLOUT) {
974 // Handle standard input
975 if (fd == stdin) {
976 r = pakfire_jail_stream_stdin(jail, ctx, fd);
977 if (r) {
978 switch (errno) {
979 // Ignore if we filled up the buffer
980 case EAGAIN:
981 break;
982
983 default:
984 ERROR(jail->pakfire, "Could not write to stdin: %m\n");
985 goto ERROR;
986 }
987 }
988 }
989 }
990
991 // Check if any file descriptors have been closed
992 if (e & EPOLLHUP) {
993 // Remove the file descriptor
994 r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
995 if (r) {
996 ERROR(jail->pakfire, "Could not remove closed file-descriptor %d: %m\n", fd);
997 goto ERROR;
998 }
999 }
1000 }
1001 }
1002
1003 ERROR:
1004 if (epollfd >= 0)
1005 close(epollfd);
1006 if (timerfd >= 0)
1007 close(timerfd);
1008
1009 return r;
1010 }
1011
1012 int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data,
1013 int priority, const char* line, size_t length) {
1014 char** output = (char**)data;
1015 int r;
1016
1017 // Append everything from stdout to a buffer
1018 if (output && priority == LOG_INFO) {
1019 r = asprintf(output, "%s%s", (output && *output) ? *output : "", line);
1020 if (r < 0)
1021 return 1;
1022 return 0;
1023 }
1024
1025 // Send everything else to the default logger
1026 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
1027 }
1028
1029 // Capabilities
1030
1031 // Logs all capabilities of the current process
1032 static int pakfire_jail_show_capabilities(struct pakfire_jail* jail) {
1033 cap_t caps = NULL;
1034 char* name = NULL;
1035 cap_flag_value_t value_e;
1036 cap_flag_value_t value_i;
1037 cap_flag_value_t value_p;
1038 int r;
1039
1040 // Fetch PID
1041 pid_t pid = getpid();
1042
1043 // Fetch all capabilities
1044 caps = cap_get_proc();
1045 if (!caps) {
1046 ERROR(jail->pakfire, "Could not fetch capabilities: %m\n");
1047 r = 1;
1048 goto ERROR;
1049 }
1050
1051 DEBUG(jail->pakfire, "Capabilities of PID %d:\n", pid);
1052
1053 // Iterate over all capabilities
1054 for (unsigned int cap = 0; cap_valid(cap); cap++) {
1055 name = cap_to_name(cap);
1056
1057 // Fetch effective value
1058 r = cap_get_flag(caps, cap, CAP_EFFECTIVE, &value_e);
1059 if (r)
1060 goto ERROR;
1061
1062 // Fetch inheritable value
1063 r = cap_get_flag(caps, cap, CAP_INHERITABLE, &value_i);
1064 if (r)
1065 goto ERROR;
1066
1067 // Fetch permitted value
1068 r = cap_get_flag(caps, cap, CAP_PERMITTED, &value_p);
1069 if (r)
1070 goto ERROR;
1071
1072 DEBUG(jail->pakfire,
1073 " %-24s : %c%c%c\n",
1074 name,
1075 (value_e == CAP_SET) ? 'e' : '-',
1076 (value_i == CAP_SET) ? 'i' : '-',
1077 (value_p == CAP_SET) ? 'p' : '-'
1078 );
1079
1080 // Free name
1081 cap_free(name);
1082 name = NULL;
1083 }
1084
1085 // Success
1086 r = 0;
1087
1088 ERROR:
1089 if (name)
1090 cap_free(name);
1091 if (caps)
1092 cap_free(caps);
1093
1094 return r;
1095 }
1096
1097 static int pakfire_jail_set_capabilities(struct pakfire_jail* jail) {
1098 cap_t caps = NULL;
1099 char* name = NULL;
1100 int r;
1101
1102 // Fetch capabilities
1103 caps = cap_get_proc();
1104 if (!caps) {
1105 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
1106 r = 1;
1107 goto ERROR;
1108 }
1109
1110 // Walk through all capabilities
1111 for (cap_value_t cap = 0; cap_valid(cap); cap++) {
1112 cap_value_t _caps[] = { cap };
1113
1114 // Fetch the name of the capability
1115 name = cap_to_name(cap);
1116
1117 r = cap_set_flag(caps, CAP_EFFECTIVE, 1, _caps, CAP_SET);
1118 if (r) {
1119 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1120 goto ERROR;
1121 }
1122
1123 r = cap_set_flag(caps, CAP_INHERITABLE, 1, _caps, CAP_SET);
1124 if (r) {
1125 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1126 goto ERROR;
1127 }
1128
1129 r = cap_set_flag(caps, CAP_PERMITTED, 1, _caps, CAP_SET);
1130 if (r) {
1131 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1132 goto ERROR;
1133 }
1134
1135 // Free name
1136 cap_free(name);
1137 name = NULL;
1138 }
1139
1140 // Restore all capabilities
1141 r = cap_set_proc(caps);
1142 if (r) {
1143 ERROR(jail->pakfire, "Restoring capabilities failed: %m\n");
1144 goto ERROR;
1145 }
1146
1147 // Add all capabilities to the ambient set
1148 for (unsigned int cap = 0; cap_valid(cap); cap++) {
1149 name = cap_to_name(cap);
1150
1151 // Raise the capability
1152 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0);
1153 if (r) {
1154 ERROR(jail->pakfire, "Could not set ambient capability %s: %m\n", name);
1155 goto ERROR;
1156 }
1157
1158 // Free name
1159 cap_free(name);
1160 name = NULL;
1161 }
1162
1163 // Success
1164 r = 0;
1165
1166 ERROR:
1167 if (name)
1168 cap_free(name);
1169 if (caps)
1170 cap_free(caps);
1171
1172 return r;
1173 }
1174
1175 // Syscall Filter
1176
1177 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
1178 const int syscalls[] = {
1179 // The kernel's keyring isn't namespaced
1180 SCMP_SYS(keyctl),
1181 SCMP_SYS(add_key),
1182 SCMP_SYS(request_key),
1183
1184 // Disable userfaultfd
1185 SCMP_SYS(userfaultfd),
1186
1187 // Disable perf which could leak a lot of information about the host
1188 SCMP_SYS(perf_event_open),
1189
1190 0,
1191 };
1192 int r = 1;
1193
1194 DEBUG(jail->pakfire, "Applying syscall filter...\n");
1195
1196 // Setup a syscall filter which allows everything by default
1197 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
1198 if (!ctx) {
1199 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
1200 goto ERROR;
1201 }
1202
1203 // All all syscalls
1204 for (const int* syscall = syscalls; *syscall; syscall++) {
1205 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
1206 if (r) {
1207 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
1208 goto ERROR;
1209 }
1210 }
1211
1212 // Load syscall filter into the kernel
1213 r = seccomp_load(ctx);
1214 if (r) {
1215 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
1216 goto ERROR;
1217 }
1218
1219 ERROR:
1220 if (ctx)
1221 seccomp_release(ctx);
1222
1223 return r;
1224 }
1225
1226 // Mountpoints
1227
1228 PAKFIRE_EXPORT int pakfire_jail_bind(struct pakfire_jail* jail,
1229 const char* source, const char* target, int flags) {
1230 struct pakfire_jail_mountpoint* mp = NULL;
1231 int r;
1232
1233 // Check if there is any space left
1234 if (jail->num_mountpoints >= MAX_MOUNTPOINTS) {
1235 errno = ENOSPC;
1236 return 1;
1237 }
1238
1239 // Check for valid inputs
1240 if (!source || !target) {
1241 errno = EINVAL;
1242 return 1;
1243 }
1244
1245 // Select the next free slot
1246 mp = &jail->mountpoints[jail->num_mountpoints];
1247
1248 // Copy source
1249 r = pakfire_string_set(mp->source, source);
1250 if (r) {
1251 ERROR(jail->pakfire, "Could not copy source: %m\n");
1252 return r;
1253 }
1254
1255 // Copy target
1256 r = pakfire_string_set(mp->target, target);
1257 if (r) {
1258 ERROR(jail->pakfire, "Could not copy target: %m\n");
1259 return r;
1260 }
1261
1262 // Copy flags
1263 mp->flags = flags;
1264
1265 // Increment counter
1266 jail->num_mountpoints++;
1267
1268 return 0;
1269 }
1270
1271 static int pakfire_jail_mount_networking(struct pakfire_jail* jail) {
1272 int r;
1273
1274 const char* paths[] = {
1275 "/etc/hosts",
1276 "/etc/resolv.conf",
1277 NULL,
1278 };
1279
1280 // Bind-mount all paths read-only
1281 for (const char** path = paths; *path; path++) {
1282 r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
1283 if (r) {
1284 switch (errno) {
1285 // Ignore if we don't have permission
1286 case EPERM:
1287 continue;
1288
1289 default:
1290 break;
1291 }
1292 return r;
1293 }
1294 }
1295
1296 return 0;
1297 }
1298
1299 /*
1300 Mounts everything that we require in the new namespace
1301 */
1302 static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1303 struct pakfire_jail_mountpoint* mp = NULL;
1304 int flags = 0;
1305 int r;
1306
1307 // Enable loop devices
1308 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_LOOP_DEVICES))
1309 flags |= PAKFIRE_MOUNT_LOOP_DEVICES;
1310
1311 // Mount all default stuff
1312 r = pakfire_mount_all(jail->pakfire, PAKFIRE_MNTNS_OUTER, flags);
1313 if (r)
1314 return r;
1315
1316 // Populate /dev
1317 r = pakfire_populate_dev(jail->pakfire, flags);
1318 if (r)
1319 return r;
1320
1321 // Mount the interpreter (if needed)
1322 r = pakfire_mount_interpreter(jail->pakfire);
1323 if (r)
1324 return r;
1325
1326 // Mount networking stuff
1327 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1328 r = pakfire_jail_mount_networking(jail);
1329 if (r)
1330 return r;
1331 }
1332
1333 // Mount all custom stuff
1334 for (unsigned int i = 0; i < jail->num_mountpoints; i++) {
1335 // Fetch mountpoint
1336 mp = &jail->mountpoints[i];
1337
1338 // Mount it
1339 r = pakfire_bind(jail->pakfire, mp->source, mp->target, mp->flags);
1340 if (r)
1341 return r;
1342 }
1343
1344 return 0;
1345 }
1346
1347 // Networking
1348
1349 static int pakfire_jail_setup_loopback(struct pakfire_jail* jail) {
1350 struct nl_sock* nl = NULL;
1351 struct nl_cache* cache = NULL;
1352 struct rtnl_link* link = NULL;
1353 struct rtnl_link* change = NULL;
1354 int r;
1355
1356 DEBUG(jail->pakfire, "Setting up loopback...\n");
1357
1358 // Allocate a netlink socket
1359 nl = nl_socket_alloc();
1360 if (!nl) {
1361 ERROR(jail->pakfire, "Could not allocate a netlink socket: %m\n");
1362 r = 1;
1363 goto ERROR;
1364 }
1365
1366 // Connect the socket
1367 r = nl_connect(nl, NETLINK_ROUTE);
1368 if (r) {
1369 ERROR(jail->pakfire, "Could not connect netlink socket: %s\n", nl_geterror(r));
1370 goto ERROR;
1371 }
1372
1373 // Allocate the netlink cache
1374 r = rtnl_link_alloc_cache(nl, AF_UNSPEC, &cache);
1375 if (r < 0) {
1376 ERROR(jail->pakfire, "Unable to allocate netlink cache: %s\n", nl_geterror(r));
1377 goto ERROR;
1378 }
1379
1380 // Fetch loopback interface
1381 link = rtnl_link_get_by_name(cache, "lo");
1382 if (!link) {
1383 ERROR(jail->pakfire, "Could not find lo interface. Ignoring.\n");
1384 r = 0;
1385 goto ERROR;
1386 }
1387
1388 // Allocate a new link
1389 change = rtnl_link_alloc();
1390 if (!change) {
1391 ERROR(jail->pakfire, "Could not allocate change link\n");
1392 r = 1;
1393 goto ERROR;
1394 }
1395
1396 // Set the link to UP
1397 rtnl_link_set_flags(change, IFF_UP);
1398
1399 // Apply any changes
1400 r = rtnl_link_change(nl, link, change, 0);
1401 if (r) {
1402 ERROR(jail->pakfire, "Unable to activate loopback: %s\n", nl_geterror(r));
1403 goto ERROR;
1404 }
1405
1406 // Success
1407 r = 0;
1408
1409 ERROR:
1410 if (nl)
1411 nl_socket_free(nl);
1412
1413 return r;
1414 }
1415
1416 // UID/GID Mapping
1417
1418 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
1419 char path[PATH_MAX];
1420 int r;
1421
1422 // Skip mapping anything when running on /
1423 if (pakfire_on_root(jail->pakfire))
1424 return 0;
1425
1426 // Make path
1427 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
1428 if (r)
1429 return r;
1430
1431 // Fetch UID
1432 const uid_t uid = pakfire_uid(jail->pakfire);
1433
1434 // Fetch SUBUID
1435 const struct pakfire_subid* subuid = pakfire_subuid(jail->pakfire);
1436 if (!subuid)
1437 return 1;
1438
1439 /* When running as root, we will map the entire range.
1440
1441 When running as a non-privileged user, we will map the root user inside the jail
1442 to the user's UID outside of the jail, and we will map the rest starting from one.
1443 */
1444
1445 // Running as root
1446 if (uid == 0) {
1447 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1448 "0 %lu %lu\n", subuid->id, subuid->length);
1449 } else {
1450 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1451 "0 %lu 1\n1 %lu %lu\n", uid, subuid->id, subuid->length);
1452 }
1453
1454 if (r) {
1455 ERROR(jail->pakfire, "Could not map UIDs: %m\n");
1456 return r;
1457 }
1458
1459 return r;
1460 }
1461
1462 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
1463 char path[PATH_MAX];
1464 int r;
1465
1466 // Skip mapping anything when running on /
1467 if (pakfire_on_root(jail->pakfire))
1468 return 0;
1469
1470 // Fetch GID
1471 const gid_t gid = pakfire_gid(jail->pakfire);
1472
1473 // Fetch SUBGID
1474 const struct pakfire_subid* subgid = pakfire_subgid(jail->pakfire);
1475 if (!subgid)
1476 return 1;
1477
1478 // Make path
1479 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
1480 if (r)
1481 return r;
1482
1483 // Running as root
1484 if (gid == 0) {
1485 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1486 "0 %lu %lu\n", subgid->id, subgid->length);
1487 } else {
1488 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1489 "0 %lu 1\n1 %lu %lu\n", gid, subgid->id, subgid->length);
1490 }
1491
1492 if (r) {
1493 ERROR(jail->pakfire, "Could not map GIDs: %m\n");
1494 return r;
1495 }
1496
1497 return r;
1498 }
1499
1500 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
1501 char path[PATH_MAX];
1502 int r;
1503
1504 // Make path
1505 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
1506 if (r)
1507 return r;
1508
1509 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0, "deny\n");
1510 if (r) {
1511 CTX_ERROR(jail->ctx, "Could not set setgroups to deny: %s\n", strerror(errno));
1512 r = -errno;
1513 }
1514
1515 return r;
1516 }
1517
1518 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1519 const uint64_t val = 1;
1520 int r = 0;
1521
1522 DEBUG(jail->pakfire, "Sending signal...\n");
1523
1524 // Write to the file descriptor
1525 r = eventfd_write(fd, val);
1526 if (r < 0) {
1527 ERROR(jail->pakfire, "Could not send signal: %s\n", strerror(errno));
1528 r = -errno;
1529 }
1530
1531 // Close the file descriptor
1532 close(fd);
1533
1534 return r;
1535 }
1536
1537 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1538 uint64_t val = 0;
1539 int r = 0;
1540
1541 DEBUG(jail->pakfire, "Waiting for signal...\n");
1542
1543 r = eventfd_read(fd, &val);
1544 if (r < 0) {
1545 ERROR(jail->pakfire, "Error waiting for signal: %s\n", strerror(errno));
1546 r = -errno;
1547 }
1548
1549 // Close the file descriptor
1550 close(fd);
1551
1552 return r;
1553 }
1554
1555 /*
1556 Performs the initialisation that needs to happen in the parent part
1557 */
1558 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1559 int r;
1560
1561 // Setup UID mapping
1562 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
1563 if (r)
1564 return r;
1565
1566 // Write "deny" to /proc/PID/setgroups
1567 r = pakfire_jail_setgroups(jail, ctx->pid);
1568 if (r)
1569 return r;
1570
1571 // Setup GID mapping
1572 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
1573 if (r)
1574 return r;
1575
1576 // Parent has finished initialisation
1577 DEBUG(jail->pakfire, "Parent has finished initialization\n");
1578
1579 // Send signal to client
1580 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1581 if (r)
1582 return r;
1583
1584 return 0;
1585 }
1586
1587 static int pakfire_jail_switch_root(struct pakfire_jail* jail, const char* root) {
1588 int r;
1589
1590 // Change to the new root
1591 r = chdir(root);
1592 if (r) {
1593 ERROR(jail->pakfire, "chdir(%s) failed: %m\n", root);
1594 return r;
1595 }
1596
1597 // Switch Root!
1598 r = pivot_root(".", ".");
1599 if (r) {
1600 ERROR(jail->pakfire, "Failed changing into the new root directory %s: %m\n", root);
1601 return r;
1602 }
1603
1604 // Umount the old root
1605 r = umount2(".", MNT_DETACH);
1606 if (r) {
1607 ERROR(jail->pakfire, "Could not umount the old root filesystem: %m\n");
1608 return r;
1609 }
1610
1611 return 0;
1612 }
1613
1614 #if 0
1615 static int pakfire_jail_open_pty(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1616 int r;
1617
1618 // Allocate a new PTY
1619 ctx->consolefd = posix_openpt(O_RDWR|O_NONBLOCK|O_NOCTTY|O_CLOEXEC);
1620 if (ctx->consolefd < 0)
1621 return -errno;
1622
1623 // Fetch the path
1624 r = ptsname_r(ctx->consolefd, ctx->console, sizeof(ctx->console));
1625 if (r)
1626 return -r;
1627
1628 CTX_DEBUG(jail->ctx, "Allocated console at %s (%d)\n", ctx->console, ctx->consolefd);
1629
1630 // Create a symlink
1631 r = pakfire_symlink(jail->ctx, "/dev/console", ctx->console);
1632 if (r)
1633 return r;
1634
1635 return r;
1636 }
1637 #endif
1638
1639 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
1640 const char* argv[]) {
1641 int r;
1642
1643 // Redirect any logging to our log pipe
1644 pakfire_ctx_set_log_callback(jail->ctx, pakfire_jail_log_redirect, &ctx->pipes);
1645
1646 // Fetch my own PID
1647 pid_t pid = getpid();
1648
1649 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
1650
1651 // Wait for the parent to finish initialization
1652 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1653 if (r)
1654 return r;
1655
1656 // Die with parent
1657 r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
1658 if (r) {
1659 ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
1660 return 126;
1661 }
1662
1663 // Make this process dumpable
1664 r = prctl (PR_SET_DUMPABLE, 1, 0, 0, 0);
1665 if (r) {
1666 ERROR(jail->pakfire, "Could not make the process dumpable: %m\n");
1667 return 126;
1668 }
1669
1670 // Don't drop any capabilities on setuid()
1671 r = prctl(PR_SET_KEEPCAPS, 1);
1672 if (r) {
1673 ERROR(jail->pakfire, "Could not set PR_SET_KEEPCAPS: %m\n");
1674 return 126;
1675 }
1676
1677 // Fetch UID/GID
1678 uid_t uid = getuid();
1679 gid_t gid = getgid();
1680
1681 // Fetch EUID/EGID
1682 uid_t euid = geteuid();
1683 gid_t egid = getegid();
1684
1685 DEBUG(jail->pakfire, " UID: %u (effective %u)\n", uid, euid);
1686 DEBUG(jail->pakfire, " GID: %u (effective %u)\n", gid, egid);
1687
1688 // Log all mountpoints
1689 pakfire_mount_list(jail->ctx);
1690
1691 // Fail if we are not PID 1
1692 if (pid != 1) {
1693 CTX_ERROR(jail->ctx, "Child process is not PID 1\n");
1694 return 126;
1695 }
1696
1697 // Fail if we are not running as root
1698 if (uid || gid || euid || egid) {
1699 ERROR(jail->pakfire, "Child process is not running as root\n");
1700 return 126;
1701 }
1702
1703 // Mount all default stuff
1704 r = pakfire_mount_all(jail->pakfire, PAKFIRE_MNTNS_INNER, 0);
1705 if (r)
1706 return 126;
1707
1708 #if 0
1709 // Create a new session
1710 r = setsid();
1711 if (r < 0) {
1712 CTX_ERROR(jail->ctx, "Could not create a new session: %s\n", strerror(errno));
1713 return 126;
1714 }
1715
1716 // Allocate a new PTY
1717 r = pakfire_jail_open_pty(jail, ctx);
1718 if (r) {
1719 CTX_ERROR(jail->ctx, "Could not allocate a new PTY: %s\n", strerror(-r));
1720 return 126;
1721 }
1722 #endif
1723
1724 const char* root = pakfire_get_path(jail->pakfire);
1725 const char* arch = pakfire_get_effective_arch(jail->pakfire);
1726
1727 // Change mount propagation to slave to receive anything from the parent namespace
1728 r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SLAVE);
1729 if (r)
1730 return r;
1731
1732 // Make root a mountpoint in the new mount namespace
1733 r = pakfire_mount_make_mounpoint(jail->pakfire, root);
1734 if (r)
1735 return r;
1736
1737 // Change mount propagation to private
1738 r = pakfire_mount_change_propagation(jail->ctx, root, MS_PRIVATE);
1739 if (r)
1740 return r;
1741
1742 // Change root (unless root is /)
1743 if (!pakfire_on_root(jail->pakfire)) {
1744 // Mount everything
1745 r = pakfire_jail_mount(jail, ctx);
1746 if (r)
1747 return r;
1748
1749 // chroot()
1750 r = pakfire_jail_switch_root(jail, root);
1751 if (r)
1752 return r;
1753 }
1754
1755 // Set personality
1756 unsigned long persona = pakfire_arch_personality(arch);
1757 if (persona) {
1758 r = personality(persona);
1759 if (r < 0) {
1760 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1761 return 1;
1762 }
1763 }
1764
1765 // Setup networking
1766 if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1767 r = pakfire_jail_setup_loopback(jail);
1768 if (r)
1769 return 1;
1770 }
1771
1772 // Set nice level
1773 if (jail->nice) {
1774 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1775
1776 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1777 if (r) {
1778 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1779 return 1;
1780 }
1781 }
1782
1783 // Close other end of log pipes
1784 close(ctx->pipes.log_INFO[0]);
1785 close(ctx->pipes.log_ERROR[0]);
1786 #ifdef ENABLE_DEBUG
1787 close(ctx->pipes.log_DEBUG[0]);
1788 #endif /* ENABLE_DEBUG */
1789
1790 // Connect standard input
1791 if (ctx->pipes.stdin[0] >= 0) {
1792 r = dup2(ctx->pipes.stdin[0], STDIN_FILENO);
1793 if (r < 0) {
1794 ERROR(jail->pakfire, "Could not connect fd %d to stdin: %m\n",
1795 ctx->pipes.stdin[0]);
1796
1797 return 1;
1798 }
1799 }
1800
1801 // Connect standard output and error
1802 if (ctx->pipes.stdout[1] >= 0 && ctx->pipes.stderr[1] >= 0) {
1803 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1804 if (r < 0) {
1805 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1806 ctx->pipes.stdout[1]);
1807
1808 return 1;
1809 }
1810
1811 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1812 if (r < 0) {
1813 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1814 ctx->pipes.stderr[1]);
1815
1816 return 1;
1817 }
1818
1819 // Close the pipe (as we have moved the original file descriptors)
1820 pakfire_jail_close_pipe(jail, ctx->pipes.stdin);
1821 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1822 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
1823 }
1824
1825 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1826 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1827 if (r)
1828 return r;
1829
1830 // Set capabilities
1831 r = pakfire_jail_set_capabilities(jail);
1832 if (r)
1833 return r;
1834
1835 // Show capabilities
1836 r = pakfire_jail_show_capabilities(jail);
1837 if (r)
1838 return r;
1839
1840 // Filter syscalls
1841 r = pakfire_jail_limit_syscalls(jail);
1842 if (r)
1843 return r;
1844
1845 DEBUG(jail->pakfire, "Child process initialization done\n");
1846 DEBUG(jail->pakfire, "Launching command:\n");
1847
1848 // Log argv
1849 for (unsigned int i = 0; argv[i]; i++)
1850 DEBUG(jail->pakfire, " argv[%u] = %s\n", i, argv[i]);
1851
1852 // exec() command
1853 r = execvpe(argv[0], (char**)argv, jail->env);
1854 if (r < 0) {
1855 // Translate errno into regular exit code
1856 switch (errno) {
1857 case ENOENT:
1858 // Ignore if the command doesn't exist
1859 if (ctx->flags & PAKFIRE_JAIL_NOENT_OK)
1860 r = 0;
1861 else
1862 r = 127;
1863
1864 break;
1865
1866 default:
1867 r = 1;
1868 }
1869
1870 ERROR(jail->pakfire, "Could not execve(%s): %m\n", argv[0]);
1871 }
1872
1873 // We should not get here
1874 return r;
1875 }
1876
1877 // Run a command in the jail
1878 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
1879 const int interactive,
1880 pakfire_jail_communicate_in communicate_in,
1881 pakfire_jail_communicate_out communicate_out,
1882 void* data, int flags) {
1883 int exit = -1;
1884 int r;
1885
1886 // Check if argv is valid
1887 if (!argv || !argv[0]) {
1888 errno = EINVAL;
1889 return -1;
1890 }
1891
1892 // Initialize context for this call
1893 struct pakfire_jail_exec ctx = {
1894 .flags = flags,
1895
1896 .pipes = {
1897 .stdin = { -1, -1 },
1898 .stdout = { -1, -1 },
1899 .stderr = { -1, -1 },
1900 .log_INFO = { -1, -1 },
1901 .log_ERROR = { -1, -1 },
1902 #ifdef ENABLE_DEBUG
1903 .log_DEBUG = { -1, -1 },
1904 #endif /* ENABLE_DEBUG */
1905 },
1906
1907 .communicate = {
1908 .in = communicate_in,
1909 .out = communicate_out,
1910 .data = data,
1911 },
1912
1913 .pidfd = -1,
1914 };
1915
1916 DEBUG(jail->pakfire, "Executing jail...\n");
1917
1918 // Enable networking in interactive mode
1919 if (interactive)
1920 ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
1921
1922 /*
1923 Setup a file descriptor which can be used to notify the client that the parent
1924 has completed configuration.
1925 */
1926 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1927 if (ctx.completed_fd < 0) {
1928 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1929 return -1;
1930 }
1931
1932 // Create pipes to communicate with child process if we are not running interactively
1933 if (!interactive) {
1934 // stdin (only if callback is set)
1935 if (ctx.communicate.in) {
1936 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdin, 0);
1937 if (r)
1938 goto ERROR;
1939 }
1940
1941 // stdout
1942 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
1943 if (r)
1944 goto ERROR;
1945
1946 // stderr
1947 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
1948 if (r)
1949 goto ERROR;
1950 }
1951
1952 // Setup pipes for logging
1953 // INFO
1954 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
1955 if (r)
1956 goto ERROR;
1957
1958 // ERROR
1959 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
1960 if (r)
1961 goto ERROR;
1962
1963 #ifdef ENABLE_DEBUG
1964 // DEBUG
1965 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
1966 if (r)
1967 goto ERROR;
1968 #endif /* ENABLE_DEBUG */
1969
1970 // Configure child process
1971 struct clone_args args = {
1972 .flags =
1973 CLONE_NEWCGROUP |
1974 CLONE_NEWIPC |
1975 CLONE_NEWNS |
1976 CLONE_NEWPID |
1977 CLONE_NEWTIME |
1978 CLONE_NEWUSER |
1979 CLONE_NEWUTS |
1980 CLONE_PIDFD,
1981 .exit_signal = SIGCHLD,
1982 .pidfd = (long long unsigned int)&ctx.pidfd,
1983 };
1984
1985 // Launch the process in a cgroup that is a leaf of the configured cgroup
1986 if (jail->cgroup) {
1987 args.flags |= CLONE_INTO_CGROUP;
1988
1989 // Fetch our UUID
1990 const char* uuid = pakfire_jail_uuid(jail);
1991
1992 // Create a temporary cgroup
1993 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, uuid, 0);
1994 if (r) {
1995 ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
1996 goto ERROR;
1997 }
1998
1999 // Clone into this cgroup
2000 args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
2001 }
2002
2003 // Setup networking
2004 if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
2005 args.flags |= CLONE_NEWNET;
2006 }
2007
2008 // Fork this process
2009 ctx.pid = clone3(&args, sizeof(args));
2010 if (ctx.pid < 0) {
2011 ERROR(jail->pakfire, "Could not clone: %m\n");
2012 return -1;
2013
2014 // Child process
2015 } else if (ctx.pid == 0) {
2016 r = pakfire_jail_child(jail, &ctx, argv);
2017 _exit(r);
2018 }
2019
2020 // Parent process
2021 r = pakfire_jail_parent(jail, &ctx);
2022 if (r)
2023 goto ERROR;
2024
2025 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
2026
2027 // Read output of the child process
2028 r = pakfire_jail_wait(jail, &ctx);
2029 if (r)
2030 goto ERROR;
2031
2032 // Handle exit status
2033 switch (ctx.status.si_code) {
2034 case CLD_EXITED:
2035 DEBUG(jail->pakfire, "The child process exited with code %d\n",
2036 ctx.status.si_status);
2037
2038 // Pass exit code
2039 exit = ctx.status.si_status;
2040 break;
2041
2042 case CLD_KILLED:
2043 ERROR(jail->pakfire, "The child process was killed\n");
2044 exit = 139;
2045 break;
2046
2047 case CLD_DUMPED:
2048 ERROR(jail->pakfire, "The child process terminated abnormally\n");
2049 break;
2050
2051 // Log anything else
2052 default:
2053 ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
2054 break;
2055 }
2056
2057 ERROR:
2058 // Destroy the temporary cgroup (if any)
2059 if (ctx.cgroup) {
2060 // Read cgroup stats
2061 pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
2062 pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
2063 pakfire_cgroup_destroy(ctx.cgroup);
2064 pakfire_cgroup_unref(ctx.cgroup);
2065 }
2066
2067 // Close any file descriptors
2068 pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
2069 pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
2070 pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
2071 if (ctx.pidfd >= 0)
2072 close(ctx.pidfd);
2073 pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
2074 pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
2075 #ifdef ENABLE_DEBUG
2076 pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
2077 #endif /* ENABLE_DEBUG */
2078
2079 return exit;
2080 }
2081
2082 PAKFIRE_EXPORT int pakfire_jail_exec(
2083 struct pakfire_jail* jail,
2084 const char* argv[],
2085 pakfire_jail_communicate_in callback_in,
2086 pakfire_jail_communicate_out callback_out,
2087 void* data, int flags) {
2088 return __pakfire_jail_exec(jail, argv, 0, callback_in, callback_out, data, flags);
2089 }
2090
2091 static int pakfire_jail_exec_interactive(
2092 struct pakfire_jail* jail, const char* argv[], int flags) {
2093 int r;
2094
2095 // Setup interactive stuff
2096 r = pakfire_jail_setup_interactive_env(jail);
2097 if (r)
2098 return r;
2099
2100 return __pakfire_jail_exec(jail, argv, 1, NULL, NULL, NULL, flags);
2101 }
2102
2103 int pakfire_jail_exec_script(struct pakfire_jail* jail,
2104 const char* script,
2105 const size_t size,
2106 const char* args[],
2107 pakfire_jail_communicate_in callback_in,
2108 pakfire_jail_communicate_out callback_out,
2109 void* data) {
2110 char path[PATH_MAX];
2111 const char** argv = NULL;
2112 FILE* f = NULL;
2113 int r;
2114
2115 const char* root = pakfire_get_path(jail->pakfire);
2116
2117 // Write the scriptlet to disk
2118 r = pakfire_path_append(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
2119 if (r)
2120 goto ERROR;
2121
2122 // Create a temporary file
2123 f = pakfire_mktemp(path, 0700);
2124 if (!f) {
2125 ERROR(jail->pakfire, "Could not create temporary file: %m\n");
2126 goto ERROR;
2127 }
2128
2129 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
2130
2131 // Write data
2132 r = fprintf(f, "%s", script);
2133 if (r < 0) {
2134 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
2135 goto ERROR;
2136 }
2137
2138 // Close file
2139 r = fclose(f);
2140 if (r) {
2141 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
2142 goto ERROR;
2143 }
2144
2145 f = NULL;
2146
2147 // Count how many arguments were passed
2148 unsigned int argc = 1;
2149 if (args) {
2150 for (const char** arg = args; *arg; arg++)
2151 argc++;
2152 }
2153
2154 argv = calloc(argc + 1, sizeof(*argv));
2155 if (!argv) {
2156 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
2157 goto ERROR;
2158 }
2159
2160 // Set command
2161 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
2162
2163 // Copy args
2164 for (unsigned int i = 1; i < argc; i++)
2165 argv[i] = args[i-1];
2166
2167 // Run the script
2168 r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data, 0);
2169
2170 ERROR:
2171 if (argv)
2172 free(argv);
2173 if (f)
2174 fclose(f);
2175
2176 // Remove script from disk
2177 if (*path)
2178 unlink(path);
2179
2180 return r;
2181 }
2182
2183 /*
2184 A convenience function that creates a new jail, runs the given command and destroys
2185 the jail again.
2186 */
2187 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
2188 struct pakfire_jail* jail = NULL;
2189 int r;
2190
2191 // Create a new jail
2192 r = pakfire_jail_create(&jail, pakfire);
2193 if (r)
2194 goto ERROR;
2195
2196 // Execute the command
2197 r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output, 0);
2198
2199 ERROR:
2200 if (jail)
2201 pakfire_jail_unref(jail);
2202
2203 return r;
2204 }
2205
2206 int pakfire_jail_run_script(struct pakfire* pakfire,
2207 const char* script, const size_t length, const char* argv[], int flags) {
2208 struct pakfire_jail* jail = NULL;
2209 int r;
2210
2211 // Create a new jail
2212 r = pakfire_jail_create(&jail, pakfire);
2213 if (r)
2214 goto ERROR;
2215
2216 // Execute the command
2217 r = pakfire_jail_exec_script(jail, script, length, argv, NULL, NULL, NULL);
2218
2219 ERROR:
2220 if (jail)
2221 pakfire_jail_unref(jail);
2222
2223 return r;
2224 }
2225
2226 int pakfire_jail_shell(struct pakfire_jail* jail) {
2227 int r;
2228
2229 const char* argv[] = {
2230 "/bin/bash", "--login", NULL,
2231 };
2232
2233 // Execute /bin/bash
2234 r = pakfire_jail_exec_interactive(jail, argv, 0);
2235
2236 // Raise any errors
2237 if (r < 0)
2238 return r;
2239
2240 // Ignore any return codes from the shell
2241 return 0;
2242 }
2243
2244 static int pakfire_jail_run_if_possible(struct pakfire* pakfire, const char** argv) {
2245 char path[PATH_MAX];
2246 int r;
2247
2248 r = pakfire_path(pakfire, path, "%s", *argv);
2249 if (r)
2250 return r;
2251
2252 // Check if the file is executable
2253 r = access(path, X_OK);
2254 if (r) {
2255 DEBUG(pakfire, "%s is not executable. Skipping...\n", *argv);
2256 return 0;
2257 }
2258
2259 return pakfire_jail_run(pakfire, argv, 0, NULL);
2260 }
2261
2262 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
2263 const char* argv[] = {
2264 "/sbin/ldconfig",
2265 NULL,
2266 };
2267
2268 return pakfire_jail_run_if_possible(pakfire, argv);
2269 }
2270
2271 int pakfire_jail_run_systemd_tmpfiles(struct pakfire* pakfire) {
2272 const char* argv[] = {
2273 "/usr/bin/systemd-tmpfiles",
2274 "--create",
2275 NULL,
2276 };
2277
2278 return pakfire_jail_run_if_possible(pakfire, argv);
2279 }