]> git.ipfire.org Git - pakfire.git/blob - src/libpakfire/jail.c
jail: Initialize all file descriptors with -1
[pakfire.git] / src / libpakfire / jail.c
1 /*#############################################################################
2 # #
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
5 # #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
10 # #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
15 # #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
18 # #
19 #############################################################################*/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <linux/capability.h>
24 #include <linux/sched.h>
25 #include <sys/wait.h>
26 #include <linux/wait.h>
27 #include <sched.h>
28 #include <signal.h>
29 #include <stdlib.h>
30 #include <syscall.h>
31 #include <sys/capability.h>
32 #include <sys/epoll.h>
33 #include <sys/eventfd.h>
34 #include <sys/mount.h>
35 #include <sys/personality.h>
36 #include <sys/prctl.h>
37 #include <sys/resource.h>
38 #include <sys/timerfd.h>
39 #include <sys/types.h>
40 #include <sys/wait.h>
41
42 // libnl3
43 #include <net/if.h>
44 #include <netlink/route/link.h>
45
46 // libseccomp
47 #include <seccomp.h>
48
49 // libuuid
50 #include <uuid.h>
51
52 #include <pakfire/arch.h>
53 #include <pakfire/cgroup.h>
54 #include <pakfire/jail.h>
55 #include <pakfire/logging.h>
56 #include <pakfire/mount.h>
57 #include <pakfire/pakfire.h>
58 #include <pakfire/private.h>
59 #include <pakfire/pwd.h>
60 #include <pakfire/string.h>
61 #include <pakfire/util.h>
62
63 #define BUFFER_SIZE 1024 * 64
64 #define ENVIRON_SIZE 128
65 #define EPOLL_MAX_EVENTS 2
66 #define MAX_MOUNTPOINTS 8
67
68 // The default environment that will be set for every command
69 static const struct environ {
70 const char* key;
71 const char* val;
72 } ENV[] = {
73 { "HOME", "/root" },
74 { "LANG", "C.utf-8" },
75 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
76 { "TERM", "vt100" },
77
78 // Tell everything that it is running inside a Pakfire container
79 { "container", "pakfire" },
80 { NULL, NULL },
81 };
82
83 struct pakfire_jail_mountpoint {
84 char source[PATH_MAX];
85 char target[PATH_MAX];
86 int flags;
87 };
88
89 struct pakfire_jail {
90 struct pakfire* pakfire;
91 int nrefs;
92
93 // A unique ID for each jail
94 uuid_t uuid;
95 char __uuid[UUID_STR_LEN];
96
97 // Resource Limits
98 int nice;
99
100 // Timeout
101 struct itimerspec timeout;
102
103 // CGroup
104 struct pakfire_cgroup* cgroup;
105
106 // Environment
107 char* env[ENVIRON_SIZE];
108
109 // Mountpoints
110 struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
111 unsigned int num_mountpoints;
112 };
113
114 struct pakfire_log_buffer {
115 char data[BUFFER_SIZE];
116 size_t used;
117 };
118
119 struct pakfire_jail_exec {
120 int flags;
121
122 // PID (of the child)
123 pid_t pid;
124 int pidfd;
125
126 // Process status (from waitid)
127 siginfo_t status;
128
129 // FD to notify the client that the parent has finished initialization
130 int completed_fd;
131
132 // Log pipes
133 struct pakfire_jail_pipes {
134 int stdin[2];
135 int stdout[2];
136 int stderr[2];
137
138 // Logging
139 int log_INFO[2];
140 int log_ERROR[2];
141 int log_DEBUG[2];
142 } pipes;
143
144 // Communicate
145 struct pakfire_jail_communicate {
146 pakfire_jail_communicate_in in;
147 pakfire_jail_communicate_out out;
148 void* data;
149 } communicate;
150
151 // Log buffers
152 struct pakfire_jail_buffers {
153 struct pakfire_log_buffer stdout;
154 struct pakfire_log_buffer stderr;
155
156 // Logging
157 struct pakfire_log_buffer log_INFO;
158 struct pakfire_log_buffer log_ERROR;
159 struct pakfire_log_buffer log_DEBUG;
160 } buffers;
161
162 struct pakfire_cgroup* cgroup;
163 struct pakfire_cgroup_stats cgroup_stats;
164 };
165
166 static int clone3(struct clone_args* args, size_t size) {
167 return syscall(__NR_clone3, args, size);
168 }
169
170 static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
171 return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
172 }
173
174 static int pakfire_jail_exec_has_flag(
175 const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
176 return ctx->flags & flag;
177 }
178
179 static void pakfire_jail_free(struct pakfire_jail* jail) {
180 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
181
182 // Free environment
183 for (unsigned int i = 0; jail->env[i]; i++)
184 free(jail->env[i]);
185
186 if (jail->cgroup)
187 pakfire_cgroup_unref(jail->cgroup);
188
189 pakfire_unref(jail->pakfire);
190 free(jail);
191 }
192
193 /*
194 Passes any log messages on to the default pakfire log callback
195 */
196 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
197 int priority, const char* line, size_t length) {
198 switch (priority) {
199 case LOG_INFO:
200 INFO(pakfire, "%s", line);
201 break;
202
203 case LOG_ERR:
204 ERROR(pakfire, "%s", line);
205 break;
206
207 #ifdef ENABLE_DEBUG
208 case LOG_DEBUG:
209 DEBUG(pakfire, "%s", line);
210 break;
211 #endif
212 }
213
214 return 0;
215 }
216
217 static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
218 if (!*jail->__uuid)
219 uuid_unparse_lower(jail->uuid, jail->__uuid);
220
221 return jail->__uuid;
222 }
223
224 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
225 // Set PS1
226 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
227 if (r)
228 return r;
229
230 // Copy TERM
231 char* TERM = secure_getenv("TERM");
232 if (TERM) {
233 r = pakfire_jail_set_env(jail, "TERM", TERM);
234 if (r)
235 return r;
236 }
237
238 // Copy LANG
239 char* LANG = secure_getenv("LANG");
240 if (LANG) {
241 r = pakfire_jail_set_env(jail, "LANG", LANG);
242 if (r)
243 return r;
244 }
245
246 return 0;
247 }
248
249 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail, struct pakfire* pakfire) {
250 int r;
251
252 const char* arch = pakfire_get_arch(pakfire);
253
254 // Allocate a new jail
255 struct pakfire_jail* j = calloc(1, sizeof(*j));
256 if (!j)
257 return 1;
258
259 // Reference Pakfire
260 j->pakfire = pakfire_ref(pakfire);
261
262 // Initialize reference counter
263 j->nrefs = 1;
264
265 // Generate a random UUID
266 uuid_generate_random(j->uuid);
267
268 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
269
270 // Set default environment
271 for (const struct environ* e = ENV; e->key; e++) {
272 r = pakfire_jail_set_env(j, e->key, e->val);
273 if (r)
274 goto ERROR;
275 }
276
277 // Enable all CPU features that CPU has to offer
278 if (!pakfire_arch_supported_by_host(arch)) {
279 r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
280 if (r)
281 goto ERROR;
282 }
283
284 // Set container UUID
285 r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
286 if (r)
287 goto ERROR;
288
289 // Disable systemctl to talk to systemd
290 if (!pakfire_on_root(j->pakfire)) {
291 r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
292 if (r)
293 goto ERROR;
294 }
295
296 // Done
297 *jail = j;
298 return 0;
299
300 ERROR:
301 pakfire_jail_free(j);
302
303 return r;
304 }
305
306 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
307 ++jail->nrefs;
308
309 return jail;
310 }
311
312 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
313 if (--jail->nrefs > 0)
314 return jail;
315
316 pakfire_jail_free(jail);
317 return NULL;
318 }
319
320 // Resource Limits
321
322 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
323 // Check if nice level is in range
324 if (nice < -19 || nice > 20) {
325 errno = EINVAL;
326 return 1;
327 }
328
329 // Store nice level
330 jail->nice = nice;
331
332 return 0;
333 }
334
335 int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
336 // Free any previous cgroup
337 if (jail->cgroup) {
338 pakfire_cgroup_unref(jail->cgroup);
339 jail->cgroup = NULL;
340 }
341
342 // Set any new cgroup
343 if (cgroup) {
344 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
345
346 jail->cgroup = pakfire_cgroup_ref(cgroup);
347 }
348
349 // Done
350 return 0;
351 }
352
353 // Environment
354
355 // Returns the length of the environment
356 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
357 unsigned int i = 0;
358
359 // Count everything in the environment
360 for (char** e = jail->env; *e; e++)
361 i++;
362
363 return i;
364 }
365
366 // Finds an existing environment variable and returns its index or -1 if not found
367 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
368 if (!key) {
369 errno = EINVAL;
370 return -1;
371 }
372
373 const size_t length = strlen(key);
374
375 for (unsigned int i = 0; jail->env[i]; i++) {
376 if ((pakfire_string_startswith(jail->env[i], key)
377 && *(jail->env[i] + length) == '=')) {
378 return i;
379 }
380 }
381
382 // Nothing found
383 return -1;
384 }
385
386 // Returns the value of an environment variable or NULL
387 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
388 const char* key) {
389 int i = pakfire_jail_find_env(jail, key);
390 if (i < 0)
391 return NULL;
392
393 return jail->env[i] + strlen(key) + 1;
394 }
395
396 // Sets an environment variable
397 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
398 const char* key, const char* value) {
399 // Find the index where to write this value to
400 int i = pakfire_jail_find_env(jail, key);
401 if (i < 0)
402 i = pakfire_jail_env_length(jail);
403
404 // Return -ENOSPC when the environment is full
405 if (i >= ENVIRON_SIZE) {
406 errno = ENOSPC;
407 return -1;
408 }
409
410 // Free any previous value
411 if (jail->env[i])
412 free(jail->env[i]);
413
414 // Format and set environment variable
415 asprintf(&jail->env[i], "%s=%s", key, value);
416
417 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
418
419 return 0;
420 }
421
422 // Imports an environment
423 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
424 if (!env)
425 return 0;
426
427 char* key;
428 char* val;
429 int r;
430
431 // Copy environment variables
432 for (unsigned int i = 0; env[i]; i++) {
433 r = pakfire_string_partition(env[i], "=", &key, &val);
434 if (r)
435 continue;
436
437 // Set value
438 r = pakfire_jail_set_env(jail, key, val);
439
440 if (key)
441 free(key);
442 if (val)
443 free(val);
444
445 // Break on error
446 if (r)
447 return r;
448 }
449
450 return 0;
451 }
452
453 // Timeout
454
455 PAKFIRE_EXPORT int pakfire_jail_set_timeout(
456 struct pakfire_jail* jail, unsigned int timeout) {
457 // Store value
458 jail->timeout.it_value.tv_sec = timeout;
459
460 if (timeout > 0)
461 DEBUG(jail->pakfire, "Timeout set to %d second(s)\n", timeout);
462 else
463 DEBUG(jail->pakfire, "Timeout disabled\n");
464
465 return 0;
466 }
467
468 static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
469 int r;
470
471 // Nothing to do if no timeout has been set
472 if (!jail->timeout.it_value.tv_sec)
473 return -1;
474
475 // Create a new timer
476 const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
477 if (fd < 0) {
478 ERROR(jail->pakfire, "Could not create timer: %m\n");
479 goto ERROR;
480 }
481
482 // Arm timer
483 r = timerfd_settime(fd, 0, &jail->timeout, NULL);
484 if (r) {
485 ERROR(jail->pakfire, "Could not arm timer: %m\n");
486 goto ERROR;
487 }
488
489 return fd;
490
491 ERROR:
492 if (fd > 0)
493 close(fd);
494
495 return -1;
496 }
497
498 /*
499 This function replaces any logging in the child process.
500
501 All log messages will be sent to the parent process through their respective pipes.
502 */
503 static void pakfire_jail_log(void* data, int priority, const char* file,
504 int line, const char* fn, const char* format, va_list args) {
505 struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
506 int fd;
507
508 switch (priority) {
509 case LOG_INFO:
510 fd = pipes->log_INFO[1];
511 break;
512
513 case LOG_ERR:
514 fd = pipes->log_ERROR[1];
515 break;
516
517 #ifdef ENABLE_DEBUG
518 case LOG_DEBUG:
519 fd = pipes->log_DEBUG[1];
520 break;
521 #endif /* ENABLE_DEBUG */
522
523 // Ignore any messages of an unknown priority
524 default:
525 return;
526 }
527
528 // Send the log message
529 if (fd)
530 vdprintf(fd, format, args);
531 }
532
533 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
534 return (sizeof(buffer->data) == buffer->used);
535 }
536
537 /*
538 This function reads as much data as it can from the file descriptor.
539 If it finds a whole line in it, it will send it to the logger and repeat the process.
540 If not newline character is found, it will try to read more data until it finds one.
541 */
542 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
543 struct pakfire_jail_exec* ctx, int priority, int fd,
544 struct pakfire_log_buffer* buffer, pakfire_jail_communicate_out callback, void* data) {
545 char line[BUFFER_SIZE + 1];
546
547 // Fill up buffer from fd
548 if (buffer->used < sizeof(buffer->data)) {
549 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
550 sizeof(buffer->data) - buffer->used);
551
552 // Handle errors
553 if (bytes_read < 0) {
554 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
555 return -1;
556 }
557
558 // Update buffer size
559 buffer->used += bytes_read;
560 }
561
562 // See if we have any lines that we can write
563 while (buffer->used) {
564 // Search for the end of the first line
565 char* eol = memchr(buffer->data, '\n', buffer->used);
566
567 // No newline found
568 if (!eol) {
569 // If the buffer is full, we send the content to the logger and try again
570 // This should not happen in practise
571 if (pakfire_jail_log_buffer_is_full(buffer)) {
572 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
573
574 eol = buffer->data + sizeof(buffer->data) - 1;
575
576 // Otherwise we might have only read parts of the output
577 } else
578 break;
579 }
580
581 // Find the length of the string
582 size_t length = eol - buffer->data + 1;
583
584 // Copy the line into the buffer
585 memcpy(line, buffer->data, length);
586
587 // Terminate the string
588 line[length] = '\0';
589
590 // Log the line
591 if (callback) {
592 int r = callback(jail->pakfire, data, priority, line, length);
593 if (r) {
594 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
595 return r;
596 }
597 }
598
599 // Remove line from buffer
600 memmove(buffer->data, buffer->data + length, buffer->used - length);
601 buffer->used -= length;
602 }
603
604 return 0;
605 }
606
607 static int pakfire_jail_stream_stdin(struct pakfire_jail* jail,
608 struct pakfire_jail_exec* ctx, const int fd) {
609 int r;
610
611 // Nothing to do if there is no stdin callback set
612 if (!ctx->communicate.in) {
613 DEBUG(jail->pakfire, "Callback for standard input is not set\n");
614 return 0;
615 }
616
617 // Skip if the writing pipe has already been closed
618 if (!ctx->pipes.stdin[1])
619 return 0;
620
621 DEBUG(jail->pakfire, "Streaming standard input...\n");
622
623 // Calling the callback
624 r = ctx->communicate.in(jail->pakfire, ctx->communicate.data, fd);
625
626 DEBUG(jail->pakfire, "Standard input callback finished: %d\n", r);
627
628 // The callback signaled that it has written everything
629 if (r == EOF) {
630 DEBUG(jail->pakfire, "Closing standard input pipe\n");
631
632 // Close the file-descriptor
633 close(fd);
634
635 // Reset the file-descriptor so it won't be closed again later
636 ctx->pipes.stdin[1] = 0;
637
638 // Report success
639 r = 0;
640 }
641
642 return r;
643 }
644
645 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
646 int r = pipe2(*fds, flags);
647 if (r < 0) {
648 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
649 return 1;
650 }
651
652 return 0;
653 }
654
655 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
656 for (unsigned int i = 0; i < 2; i++)
657 if (fds[i])
658 close(fds[i]);
659 }
660
661 /*
662 This is a convenience function to fetch the reading end of a pipe and
663 closes the write end.
664 */
665 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail* jail, int (*fds)[2]) {
666 // Give the variables easier names to avoid confusion
667 int* fd_read = &(*fds)[0];
668 int* fd_write = &(*fds)[1];
669
670 // Close the write end of the pipe
671 if (*fd_write) {
672 close(*fd_write);
673 *fd_write = -1;
674 }
675
676 // Return the read end
677 return *fd_read;
678 }
679
680 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
681 // Give the variables easier names to avoid confusion
682 int* fd_read = &(*fds)[0];
683 int* fd_write = &(*fds)[1];
684
685 // Close the read end of the pipe
686 if (*fd_read) {
687 close(*fd_read);
688 *fd_read = -1;
689 }
690
691 // Return the write end
692 return *fd_write;
693 }
694
695 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
696 int epollfd = -1;
697 struct epoll_event ev;
698 struct epoll_event events[EPOLL_MAX_EVENTS];
699 char garbage[8];
700 int r = 0;
701
702 // Fetch file descriptors from context
703 const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
704 const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
705 const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
706 const int pidfd = ctx->pidfd;
707
708 // Timer
709 const int timerfd = pakfire_jail_create_timer(jail);
710
711 // Logging
712 const int log_INFO = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
713 const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
714 const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
715
716 // Make a list of all file descriptors we are interested in
717 int fds[] = {
718 stdin, stdout, stderr, pidfd, timerfd, log_INFO, log_ERROR, log_DEBUG,
719 };
720
721 // Setup epoll
722 epollfd = epoll_create1(0);
723 if (epollfd < 0) {
724 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
725 r = 1;
726 goto ERROR;
727 }
728
729 // Turn file descriptors into non-blocking mode and add them to epoll()
730 for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
731 int fd = fds[i];
732
733 // Skip fds which were not initialized
734 if (fd < 0)
735 continue;
736
737 ev.events = EPOLLHUP;
738
739 if (fd == stdin)
740 ev.events |= EPOLLOUT;
741 else
742 ev.events |= EPOLLIN;
743
744 // Read flags
745 int flags = fcntl(fd, F_GETFL, 0);
746
747 // Set modified flags
748 if (fcntl(fd, F_SETFL, flags|O_NONBLOCK) < 0) {
749 ERROR(jail->pakfire,
750 "Could not set file descriptor %d into non-blocking mode: %m\n", fd);
751 r = 1;
752 goto ERROR;
753 }
754
755 ev.data.fd = fd;
756
757 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
758 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
759 r = 1;
760 goto ERROR;
761 }
762 }
763
764 int ended = 0;
765
766 // Loop for as long as the process is alive
767 while (!ended) {
768 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
769 if (num < 1) {
770 // Ignore if epoll_wait() has been interrupted
771 if (errno == EINTR)
772 continue;
773
774 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
775 r = 1;
776
777 goto ERROR;
778 }
779
780 for (int i = 0; i < num; i++) {
781 int e = events[i].events;
782 int fd = events[i].data.fd;
783
784 struct pakfire_log_buffer* buffer = NULL;
785 pakfire_jail_communicate_out callback = NULL;
786 void* data = NULL;
787 int priority;
788
789 // Check if there is any data to be read
790 if (e & EPOLLIN) {
791 // Handle any changes to the PIDFD
792 if (fd == pidfd) {
793 // Call waidid() and store the result
794 r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
795 if (r) {
796 ERROR(jail->pakfire, "waitid() failed: %m\n");
797 goto ERROR;
798 }
799
800 // Mark that we have ended so that we will process the remaining
801 // events from epoll() now, but won't restart the outer loop.
802 ended = 1;
803 continue;
804
805 // Handle timer events
806 } else if (fd == timerfd) {
807 DEBUG(jail->pakfire, "Timer event received\n");
808
809 // Disarm the timer
810 r = read(timerfd, garbage, sizeof(garbage));
811 if (r < 1) {
812 ERROR(jail->pakfire, "Could not disarm timer: %m\n");
813 r = 1;
814 goto ERROR;
815 }
816
817 // Terminate the process if it hasn't already ended
818 if (!ended) {
819 DEBUG(jail->pakfire, "Terminating process...\n");
820
821 // Send SIGTERM to the process
822 r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
823 if (r) {
824 ERROR(jail->pakfire, "Could not kill process: %m\n");
825 goto ERROR;
826 }
827 }
828
829 // There is nothing else to do
830 continue;
831
832 // Handle logging messages
833 } else if (fd == log_INFO) {
834 buffer = &ctx->buffers.log_INFO;
835 priority = LOG_INFO;
836
837 callback = pakfire_jail_default_log_callback;
838
839 } else if (fd == log_ERROR) {
840 buffer = &ctx->buffers.log_ERROR;
841 priority = LOG_ERR;
842
843 callback = pakfire_jail_default_log_callback;
844
845 } else if (fd == log_DEBUG) {
846 buffer = &ctx->buffers.log_DEBUG;
847 priority = LOG_DEBUG;
848
849 callback = pakfire_jail_default_log_callback;
850
851 // Handle anything from the log pipes
852 } else if (fd == stdout) {
853 buffer = &ctx->buffers.stdout;
854 priority = LOG_INFO;
855
856 callback = ctx->communicate.out;
857 data = ctx->communicate.data;
858
859 } else if (fd == stderr) {
860 buffer = &ctx->buffers.stderr;
861 priority = LOG_ERR;
862
863 callback = ctx->communicate.out;
864 data = ctx->communicate.data;
865
866 } else {
867 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
868 continue;
869 }
870
871 // Handle log event
872 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
873 if (r)
874 goto ERROR;
875 }
876
877 if (e & EPOLLOUT) {
878 // Handle standard input
879 if (fd == stdin) {
880 r = pakfire_jail_stream_stdin(jail, ctx, fd);
881 if (r) {
882 switch (errno) {
883 // Ignore if we filled up the buffer
884 case EAGAIN:
885 break;
886
887 default:
888 ERROR(jail->pakfire, "Could not write to stdin: %m\n");
889 goto ERROR;
890 }
891 }
892 }
893 }
894
895 // Check if any file descriptors have been closed
896 if (e & EPOLLHUP) {
897 // Remove the file descriptor
898 r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
899 if (r) {
900 ERROR(jail->pakfire, "Could not remove closed file-descriptor %d: %m\n", fd);
901 goto ERROR;
902 }
903 }
904 }
905 }
906
907 ERROR:
908 if (epollfd > 0)
909 close(epollfd);
910 if (timerfd > 0)
911 close(timerfd);
912
913 return r;
914 }
915
916 int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data,
917 int priority, const char* line, size_t length) {
918 char** output = (char**)data;
919 int r;
920
921 // Append everything from stdout to a buffer
922 if (output && priority == LOG_INFO) {
923 r = asprintf(output, "%s%s", (output && *output) ? *output : "", line);
924 if (r < 0)
925 return 1;
926 return 0;
927 }
928
929 // Send everything else to the default logger
930 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
931 }
932
933 // Capabilities
934
935 // Logs all capabilities of the current process
936 static int pakfire_jail_show_capabilities(struct pakfire_jail* jail) {
937 cap_t caps = NULL;
938 char* name = NULL;
939 cap_flag_value_t value_e;
940 cap_flag_value_t value_i;
941 cap_flag_value_t value_p;
942 int r;
943
944 // Fetch PID
945 pid_t pid = getpid();
946
947 // Fetch all capabilities
948 caps = cap_get_proc();
949 if (!caps) {
950 ERROR(jail->pakfire, "Could not fetch capabilities: %m\n");
951 r = 1;
952 goto ERROR;
953 }
954
955 DEBUG(jail->pakfire, "Capabilities of PID %d:\n", pid);
956
957 // Iterate over all capabilities
958 for (unsigned int cap = 0; cap_valid(cap); cap++) {
959 name = cap_to_name(cap);
960
961 // Fetch effective value
962 r = cap_get_flag(caps, cap, CAP_EFFECTIVE, &value_e);
963 if (r)
964 goto ERROR;
965
966 // Fetch inheritable value
967 r = cap_get_flag(caps, cap, CAP_INHERITABLE, &value_i);
968 if (r)
969 goto ERROR;
970
971 // Fetch permitted value
972 r = cap_get_flag(caps, cap, CAP_PERMITTED, &value_p);
973 if (r)
974 goto ERROR;
975
976 DEBUG(jail->pakfire,
977 " %-24s : %c%c%c\n",
978 name,
979 (value_e == CAP_SET) ? 'e' : '-',
980 (value_i == CAP_SET) ? 'i' : '-',
981 (value_p == CAP_SET) ? 'p' : '-'
982 );
983
984 // Free name
985 cap_free(name);
986 name = NULL;
987 }
988
989 // Success
990 r = 0;
991
992 ERROR:
993 if (name)
994 cap_free(name);
995 if (caps)
996 cap_free(caps);
997
998 return r;
999 }
1000
1001 static int pakfire_jail_set_capabilities(struct pakfire_jail* jail) {
1002 cap_t caps = NULL;
1003 char* name = NULL;
1004 int r;
1005
1006 // Fetch capabilities
1007 caps = cap_get_proc();
1008 if (!caps) {
1009 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
1010 r = 1;
1011 goto ERROR;
1012 }
1013
1014 // Walk through all capabilities
1015 for (cap_value_t cap = 0; cap_valid(cap); cap++) {
1016 cap_value_t _caps[] = { cap };
1017
1018 // Fetch the name of the capability
1019 name = cap_to_name(cap);
1020
1021 r = cap_set_flag(caps, CAP_EFFECTIVE, 1, _caps, CAP_SET);
1022 if (r) {
1023 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1024 goto ERROR;
1025 }
1026
1027 r = cap_set_flag(caps, CAP_INHERITABLE, 1, _caps, CAP_SET);
1028 if (r) {
1029 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1030 goto ERROR;
1031 }
1032
1033 r = cap_set_flag(caps, CAP_PERMITTED, 1, _caps, CAP_SET);
1034 if (r) {
1035 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1036 goto ERROR;
1037 }
1038
1039 // Free name
1040 cap_free(name);
1041 name = NULL;
1042 }
1043
1044 // Restore all capabilities
1045 r = cap_set_proc(caps);
1046 if (r) {
1047 ERROR(jail->pakfire, "Restoring capabilities failed: %m\n");
1048 goto ERROR;
1049 }
1050
1051 // Add all capabilities to the ambient set
1052 for (unsigned int cap = 0; cap_valid(cap); cap++) {
1053 name = cap_to_name(cap);
1054
1055 // Raise the capability
1056 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0);
1057 if (r) {
1058 ERROR(jail->pakfire, "Could not set ambient capability %s: %m\n", name);
1059 goto ERROR;
1060 }
1061
1062 // Free name
1063 cap_free(name);
1064 name = NULL;
1065 }
1066
1067 // Success
1068 r = 0;
1069
1070 ERROR:
1071 if (name)
1072 cap_free(name);
1073 if (caps)
1074 cap_free(caps);
1075
1076 return r;
1077 }
1078
1079 // Syscall Filter
1080
1081 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
1082 const int syscalls[] = {
1083 // The kernel's keyring isn't namespaced
1084 SCMP_SYS(keyctl),
1085 SCMP_SYS(add_key),
1086 SCMP_SYS(request_key),
1087
1088 // Disable userfaultfd
1089 SCMP_SYS(userfaultfd),
1090
1091 // Disable perf which could leak a lot of information about the host
1092 SCMP_SYS(perf_event_open),
1093
1094 0,
1095 };
1096 int r = 1;
1097
1098 DEBUG(jail->pakfire, "Applying syscall filter...\n");
1099
1100 // Setup a syscall filter which allows everything by default
1101 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
1102 if (!ctx) {
1103 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
1104 goto ERROR;
1105 }
1106
1107 // All all syscalls
1108 for (const int* syscall = syscalls; *syscall; syscall++) {
1109 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
1110 if (r) {
1111 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
1112 goto ERROR;
1113 }
1114 }
1115
1116 // Load syscall filter into the kernel
1117 r = seccomp_load(ctx);
1118 if (r) {
1119 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
1120 goto ERROR;
1121 }
1122
1123 ERROR:
1124 if (ctx)
1125 seccomp_release(ctx);
1126
1127 return r;
1128 }
1129
1130 // Mountpoints
1131
1132 PAKFIRE_EXPORT int pakfire_jail_bind(struct pakfire_jail* jail,
1133 const char* source, const char* target, int flags) {
1134 struct pakfire_jail_mountpoint* mp = NULL;
1135 int r;
1136
1137 // Check if there is any space left
1138 if (jail->num_mountpoints >= MAX_MOUNTPOINTS) {
1139 errno = ENOSPC;
1140 return 1;
1141 }
1142
1143 // Check for valid inputs
1144 if (!source || !target) {
1145 errno = EINVAL;
1146 return 1;
1147 }
1148
1149 // Select the next free slot
1150 mp = &jail->mountpoints[jail->num_mountpoints];
1151
1152 // Copy source
1153 r = pakfire_string_set(mp->source, source);
1154 if (r) {
1155 ERROR(jail->pakfire, "Could not copy source: %m\n");
1156 return r;
1157 }
1158
1159 // Copy target
1160 r = pakfire_string_set(mp->target, target);
1161 if (r) {
1162 ERROR(jail->pakfire, "Could not copy target: %m\n");
1163 return r;
1164 }
1165
1166 // Copy flags
1167 mp->flags = flags;
1168
1169 // Increment counter
1170 jail->num_mountpoints++;
1171
1172 return 0;
1173 }
1174
1175 static int pakfire_jail_mount_networking(struct pakfire_jail* jail) {
1176 int r;
1177
1178 const char* paths[] = {
1179 "/etc/hosts",
1180 "/etc/resolv.conf",
1181 NULL,
1182 };
1183
1184 // Bind-mount all paths read-only
1185 for (const char** path = paths; *path; path++) {
1186 r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
1187 if (r)
1188 return r;
1189 }
1190
1191 return 0;
1192 }
1193
1194 /*
1195 Mounts everything that we require in the new namespace
1196 */
1197 static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1198 struct pakfire_jail_mountpoint* mp = NULL;
1199 int flags = 0;
1200 int r;
1201
1202 // Enable loop devices
1203 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_LOOP_DEVICES))
1204 flags |= PAKFIRE_MOUNT_LOOP_DEVICES;
1205
1206 // Mount all default stuff
1207 r = pakfire_mount_all(jail->pakfire, flags);
1208 if (r)
1209 return r;
1210
1211 // Mount networking stuff
1212 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1213 r = pakfire_jail_mount_networking(jail);
1214 if (r)
1215 return r;
1216 }
1217
1218 // Mount all custom stuff
1219 for (unsigned int i = 0; i < jail->num_mountpoints; i++) {
1220 // Fetch mountpoint
1221 mp = &jail->mountpoints[i];
1222
1223 // Mount it
1224 r = pakfire_bind(jail->pakfire, mp->source, mp->target, mp->flags);
1225 if (r)
1226 return r;
1227 }
1228
1229 // Log all mountpoints
1230 pakfire_mount_list(jail->pakfire);
1231
1232 return 0;
1233 }
1234
1235 // Networking
1236
1237 static int pakfire_jail_setup_loopback(struct pakfire_jail* jail) {
1238 struct nl_sock* nl = NULL;
1239 struct nl_cache* cache = NULL;
1240 struct rtnl_link* link = NULL;
1241 struct rtnl_link* change = NULL;
1242 int r;
1243
1244 DEBUG(jail->pakfire, "Setting up loopback...\n");
1245
1246 // Allocate a netlink socket
1247 nl = nl_socket_alloc();
1248 if (!nl) {
1249 ERROR(jail->pakfire, "Could not allocate a netlink socket: %m\n");
1250 r = 1;
1251 goto ERROR;
1252 }
1253
1254 // Connect the socket
1255 r = nl_connect(nl, NETLINK_ROUTE);
1256 if (r) {
1257 ERROR(jail->pakfire, "Could not connect netlink socket: %s\n", nl_geterror(r));
1258 goto ERROR;
1259 }
1260
1261 // Allocate the netlink cache
1262 r = rtnl_link_alloc_cache(nl, AF_UNSPEC, &cache);
1263 if (r < 0) {
1264 ERROR(jail->pakfire, "Unable to allocate netlink cache: %s\n", nl_geterror(r));
1265 goto ERROR;
1266 }
1267
1268 // Fetch loopback interface
1269 link = rtnl_link_get_by_name(cache, "lo");
1270 if (!link) {
1271 ERROR(jail->pakfire, "Could not find lo interface. Ignoring.\n");
1272 r = 0;
1273 goto ERROR;
1274 }
1275
1276 // Allocate a new link
1277 change = rtnl_link_alloc();
1278 if (!change) {
1279 ERROR(jail->pakfire, "Could not allocate change link\n");
1280 r = 1;
1281 goto ERROR;
1282 }
1283
1284 // Set the link to UP
1285 rtnl_link_set_flags(change, IFF_UP);
1286
1287 // Apply any changes
1288 r = rtnl_link_change(nl, link, change, 0);
1289 if (r) {
1290 ERROR(jail->pakfire, "Unable to activate loopback: %s\n", nl_geterror(r));
1291 goto ERROR;
1292 }
1293
1294 // Success
1295 r = 0;
1296
1297 ERROR:
1298 if (nl)
1299 nl_socket_free(nl);
1300
1301 return r;
1302 }
1303
1304 // UID/GID Mapping
1305
1306 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
1307 char path[PATH_MAX];
1308 int r;
1309
1310 // Skip mapping anything when running on /
1311 if (pakfire_on_root(jail->pakfire))
1312 return 0;
1313
1314 // Make path
1315 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
1316 if (r)
1317 return r;
1318
1319 // Fetch UID
1320 const uid_t uid = pakfire_uid(jail->pakfire);
1321
1322 // Fetch SUBUID
1323 const struct pakfire_subid* subuid = pakfire_subuid(jail->pakfire);
1324 if (!subuid)
1325 return 1;
1326
1327 /* When running as root, we will map the entire range.
1328
1329 When running as a non-privileged user, we will map the root user inside the jail
1330 to the user's UID outside of the jail, and we will map the rest starting from one.
1331 */
1332
1333 // Running as root
1334 if (uid == 0) {
1335 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1336 "0 %lu %lu\n", subuid->id, subuid->length);
1337 } else {
1338 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1339 "0 %lu 1\n1 %lu %lu\n", uid, subuid->id, subuid->length);
1340 }
1341
1342 if (r) {
1343 ERROR(jail->pakfire, "Could not map UIDs: %m\n");
1344 return r;
1345 }
1346
1347 return r;
1348 }
1349
1350 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
1351 char path[PATH_MAX];
1352 int r;
1353
1354 // Skip mapping anything when running on /
1355 if (pakfire_on_root(jail->pakfire))
1356 return 0;
1357
1358 // Fetch GID
1359 const gid_t gid = pakfire_gid(jail->pakfire);
1360
1361 // Fetch SUBGID
1362 const struct pakfire_subid* subgid = pakfire_subgid(jail->pakfire);
1363 if (!subgid)
1364 return 1;
1365
1366 // Make path
1367 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
1368 if (r)
1369 return r;
1370
1371 // Running as root
1372 if (gid == 0) {
1373 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1374 "0 %lu %lu\n", subgid->id, subgid->length);
1375 } else {
1376 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1377 "0 %lu 1\n%1 %lu %lu\n", gid, subgid->id, subgid->length);
1378 }
1379
1380 if (r) {
1381 ERROR(jail->pakfire, "Could not map GIDs: %m\n");
1382 return r;
1383 }
1384
1385 return r;
1386 }
1387
1388 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
1389 char path[PATH_MAX];
1390 int r = 1;
1391
1392 // Make path
1393 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
1394 if (r)
1395 return r;
1396
1397 // Open file for writing
1398 FILE* f = fopen(path, "w");
1399 if (!f) {
1400 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
1401 goto ERROR;
1402 }
1403
1404 // Write content
1405 int bytes_written = fprintf(f, "deny\n");
1406 if (bytes_written <= 0) {
1407 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
1408 goto ERROR;
1409 }
1410
1411 r = fclose(f);
1412 f = NULL;
1413 if (r) {
1414 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
1415 goto ERROR;
1416 }
1417
1418 ERROR:
1419 if (f)
1420 fclose(f);
1421
1422 return r;
1423 }
1424
1425 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1426 const uint64_t val = 1;
1427 int r = 0;
1428
1429 DEBUG(jail->pakfire, "Sending signal...\n");
1430
1431 // Write to the file descriptor
1432 ssize_t bytes_written = write(fd, &val, sizeof(val));
1433 if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
1434 ERROR(jail->pakfire, "Could not send signal: %m\n");
1435 r = 1;
1436 }
1437
1438 // Close the file descriptor
1439 close(fd);
1440
1441 return r;
1442 }
1443
1444 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1445 uint64_t val = 0;
1446 int r = 0;
1447
1448 DEBUG(jail->pakfire, "Waiting for signal...\n");
1449
1450 ssize_t bytes_read = read(fd, &val, sizeof(val));
1451 if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
1452 ERROR(jail->pakfire, "Error waiting for signal: %m\n");
1453 r = 1;
1454 }
1455
1456 // Close the file descriptor
1457 close(fd);
1458
1459 return r;
1460 }
1461
1462 /*
1463 Performs the initialisation that needs to happen in the parent part
1464 */
1465 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1466 int r;
1467
1468 // Setup UID mapping
1469 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
1470 if (r)
1471 return r;
1472
1473 // Write "deny" to /proc/PID/setgroups
1474 r = pakfire_jail_setgroups(jail, ctx->pid);
1475 if (r)
1476 return r;
1477
1478 // Setup GID mapping
1479 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
1480 if (r)
1481 return r;
1482
1483 // Parent has finished initialisation
1484 DEBUG(jail->pakfire, "Parent has finished initialization\n");
1485
1486 // Send signal to client
1487 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1488 if (r)
1489 return r;
1490
1491 return 0;
1492 }
1493
1494 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
1495 const char* argv[]) {
1496 int r;
1497
1498 // Redirect any logging to our log pipe
1499 pakfire_set_log_callback(jail->pakfire, pakfire_jail_log, &ctx->pipes);
1500
1501 // Die with parent
1502 r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
1503 if (r) {
1504 ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
1505 return 126;
1506 }
1507
1508 // Fetch my own PID
1509 pid_t pid = getpid();
1510
1511 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
1512
1513 // Wait for the parent to finish initialization
1514 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1515 if (r)
1516 return r;
1517
1518 // Perform further initialization
1519
1520 // Fetch UID/GID
1521 uid_t uid = getuid();
1522 gid_t gid = getgid();
1523
1524 // Fetch EUID/EGID
1525 uid_t euid = geteuid();
1526 gid_t egid = getegid();
1527
1528 DEBUG(jail->pakfire, " UID: %d (effective %d)\n", uid, euid);
1529 DEBUG(jail->pakfire, " GID: %d (effective %d)\n", gid, egid);
1530
1531 // Check if we are (effectively running as root)
1532 if (uid || gid || euid || egid) {
1533 ERROR(jail->pakfire, "Child process is not running as root\n");
1534 return 126;
1535 }
1536
1537 const char* root = pakfire_get_path(jail->pakfire);
1538 const char* arch = pakfire_get_arch(jail->pakfire);
1539
1540 // Change root (unless root is /)
1541 if (!pakfire_on_root(jail->pakfire)) {
1542 // Mount everything
1543 r = pakfire_jail_mount(jail, ctx);
1544 if (r)
1545 return r;
1546
1547 // Call chroot()
1548 r = chroot(root);
1549 if (r) {
1550 ERROR(jail->pakfire, "chroot() to %s failed: %m\n", root);
1551 return 1;
1552 }
1553
1554 // Change directory to /
1555 r = chdir("/");
1556 if (r) {
1557 ERROR(jail->pakfire, "chdir() after chroot() failed: %m\n");
1558 return 1;
1559 }
1560 }
1561
1562 // Set personality
1563 unsigned long persona = pakfire_arch_personality(arch);
1564 if (persona) {
1565 r = personality(persona);
1566 if (r < 0) {
1567 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1568 return 1;
1569 }
1570 }
1571
1572 // Setup networking
1573 if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1574 r = pakfire_jail_setup_loopback(jail);
1575 if (r)
1576 return 1;
1577 }
1578
1579 // Set nice level
1580 if (jail->nice) {
1581 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1582
1583 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1584 if (r) {
1585 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1586 return 1;
1587 }
1588 }
1589
1590 // Close other end of log pipes
1591 close(ctx->pipes.log_INFO[0]);
1592 close(ctx->pipes.log_ERROR[0]);
1593 #ifdef ENABLE_DEBUG
1594 close(ctx->pipes.log_DEBUG[0]);
1595 #endif /* ENABLE_DEBUG */
1596
1597 // Connect standard input
1598 if (ctx->pipes.stdin[0]) {
1599 r = dup2(ctx->pipes.stdin[0], STDIN_FILENO);
1600 if (r < 0) {
1601 ERROR(jail->pakfire, "Could not connect fd %d to stdin: %m\n",
1602 ctx->pipes.stdin[0]);
1603
1604 return 1;
1605 }
1606 }
1607
1608 // Connect standard output and error
1609 if (ctx->pipes.stdout[1] && ctx->pipes.stderr[1]) {
1610 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1611 if (r < 0) {
1612 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1613 ctx->pipes.stdout[1]);
1614
1615 return 1;
1616 }
1617
1618 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1619 if (r < 0) {
1620 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1621 ctx->pipes.stderr[1]);
1622
1623 return 1;
1624 }
1625
1626 // Close the pipe (as we have moved the original file descriptors)
1627 pakfire_jail_close_pipe(jail, ctx->pipes.stdin);
1628 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1629 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
1630 }
1631
1632 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1633 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1634 if (r)
1635 return r;
1636
1637 // Don't drop any capabilities on execve()
1638 r = prctl(PR_SET_KEEPCAPS, 1);
1639 if (r) {
1640 ERROR(jail->pakfire, "Could not set PR_SET_KEEPCAPS: %m\n");
1641 return r;
1642 }
1643
1644 // Set capabilities
1645 r = pakfire_jail_set_capabilities(jail);
1646 if (r)
1647 return r;
1648
1649 // Show capabilities
1650 r = pakfire_jail_show_capabilities(jail);
1651 if (r)
1652 return r;
1653
1654 // Filter syscalls
1655 r = pakfire_jail_limit_syscalls(jail);
1656 if (r)
1657 return r;
1658
1659 DEBUG(jail->pakfire, "Child process initialization done\n");
1660 DEBUG(jail->pakfire, "Launching command:\n");
1661
1662 // Log argv
1663 for (unsigned int i = 0; argv[i]; i++)
1664 DEBUG(jail->pakfire, " argv[%d] = %s\n", i, argv[i]);
1665
1666 // exec() command
1667 r = execvpe(argv[0], (char**)argv, jail->env);
1668 if (r < 0) {
1669 // Translate errno into regular exit code
1670 switch (errno) {
1671 case ENOENT:
1672 // Ignore if the command doesn't exist
1673 if (ctx->flags & PAKFIRE_JAIL_NOENT_OK)
1674 r = 0;
1675 else
1676 r = 127;
1677
1678 break;
1679
1680 default:
1681 r = 1;
1682 }
1683
1684 ERROR(jail->pakfire, "Could not execve(%s): %m\n", argv[0]);
1685 }
1686
1687 // We should not get here
1688 return r;
1689 }
1690
1691 // Run a command in the jail
1692 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
1693 const int interactive,
1694 pakfire_jail_communicate_in communicate_in,
1695 pakfire_jail_communicate_out communicate_out,
1696 void* data, int flags) {
1697 int exit = -1;
1698 int r;
1699
1700 // Check if argv is valid
1701 if (!argv || !argv[0]) {
1702 errno = EINVAL;
1703 return -1;
1704 }
1705
1706 // Send any output to the default logger if no callback is set
1707 if (!communicate_out)
1708 communicate_out = pakfire_jail_default_log_callback;
1709
1710 // Initialize context for this call
1711 struct pakfire_jail_exec ctx = {
1712 .flags = flags,
1713
1714 .pipes = {
1715 .stdin = { -1, -1 },
1716 .stdout = { -1, -1 },
1717 .stderr = { -1, -1 },
1718 },
1719
1720 .communicate = {
1721 .in = communicate_in,
1722 .out = communicate_out,
1723 .data = data,
1724 },
1725
1726 .pidfd = -1,
1727 };
1728
1729 DEBUG(jail->pakfire, "Executing jail...\n");
1730
1731 // Enable networking in interactive mode
1732 if (interactive)
1733 ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
1734
1735 /*
1736 Setup a file descriptor which can be used to notify the client that the parent
1737 has completed configuration.
1738 */
1739 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1740 if (ctx.completed_fd < 0) {
1741 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1742 return -1;
1743 }
1744
1745 // Create pipes to communicate with child process if we are not running interactively
1746 if (!interactive) {
1747 // stdin (only if callback is set)
1748 if (ctx.communicate.in) {
1749 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdin, 0);
1750 if (r)
1751 goto ERROR;
1752 }
1753
1754 // stdout
1755 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
1756 if (r)
1757 goto ERROR;
1758
1759 // stderr
1760 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
1761 if (r)
1762 goto ERROR;
1763 }
1764
1765 // Setup pipes for logging
1766 // INFO
1767 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
1768 if (r)
1769 goto ERROR;
1770
1771 // ERROR
1772 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
1773 if (r)
1774 goto ERROR;
1775
1776 #ifdef ENABLE_DEBUG
1777 // DEBUG
1778 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
1779 if (r)
1780 goto ERROR;
1781 #endif /* ENABLE_DEBUG */
1782
1783 // Configure child process
1784 struct clone_args args = {
1785 .flags =
1786 CLONE_NEWCGROUP |
1787 CLONE_NEWIPC |
1788 CLONE_NEWNS |
1789 CLONE_NEWPID |
1790 CLONE_NEWUSER |
1791 CLONE_NEWUTS |
1792 CLONE_PIDFD,
1793 .exit_signal = SIGCHLD,
1794 .pidfd = (long long unsigned int)&ctx.pidfd,
1795 };
1796
1797 // Launch the process in a cgroup that is a leaf of the configured cgroup
1798 if (jail->cgroup) {
1799 args.flags |= CLONE_INTO_CGROUP;
1800
1801 // Fetch our UUID
1802 const char* uuid = pakfire_jail_uuid(jail);
1803
1804 // Create a temporary cgroup
1805 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, uuid, 0);
1806 if (r) {
1807 ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
1808 goto ERROR;
1809 }
1810
1811 // Clone into this cgroup
1812 args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
1813 }
1814
1815 // Setup networking
1816 if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1817 args.flags |= CLONE_NEWNET;
1818 }
1819
1820 // Fork this process
1821 ctx.pid = clone3(&args, sizeof(args));
1822 if (ctx.pid < 0) {
1823 ERROR(jail->pakfire, "Could not clone: %m\n");
1824 return -1;
1825
1826 // Child process
1827 } else if (ctx.pid == 0) {
1828 r = pakfire_jail_child(jail, &ctx, argv);
1829 _exit(r);
1830 }
1831
1832 // Parent process
1833 r = pakfire_jail_parent(jail, &ctx);
1834 if (r)
1835 goto ERROR;
1836
1837 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
1838
1839 // Read output of the child process
1840 r = pakfire_jail_wait(jail, &ctx);
1841 if (r)
1842 goto ERROR;
1843
1844 // Handle exit status
1845 switch (ctx.status.si_code) {
1846 case CLD_EXITED:
1847 DEBUG(jail->pakfire, "The child process exited with code %d\n",
1848 ctx.status.si_status);
1849
1850 // Pass exit code
1851 exit = ctx.status.si_status;
1852 break;
1853
1854 case CLD_KILLED:
1855 ERROR(jail->pakfire, "The child process was killed\n");
1856 exit = 139;
1857 break;
1858
1859 case CLD_DUMPED:
1860 ERROR(jail->pakfire, "The child process terminated abnormally\n");
1861 break;
1862
1863 // Log anything else
1864 default:
1865 ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
1866 break;
1867 }
1868
1869 ERROR:
1870 // Destroy the temporary cgroup (if any)
1871 if (ctx.cgroup) {
1872 // Read cgroup stats
1873 r = pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
1874 if (r) {
1875 ERROR(jail->pakfire, "Could not read cgroup stats: %m\n");
1876 } else {
1877 pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
1878 }
1879
1880 pakfire_cgroup_destroy(ctx.cgroup);
1881 pakfire_cgroup_unref(ctx.cgroup);
1882 }
1883
1884 // Close any file descriptors
1885 pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
1886 pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
1887 pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
1888 if (ctx.pidfd)
1889 close(ctx.pidfd);
1890 pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
1891 pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
1892 pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
1893
1894 return exit;
1895 }
1896
1897 PAKFIRE_EXPORT int pakfire_jail_exec(
1898 struct pakfire_jail* jail,
1899 const char* argv[],
1900 pakfire_jail_communicate_in callback_in,
1901 pakfire_jail_communicate_out callback_out,
1902 void* data, int flags) {
1903 return __pakfire_jail_exec(jail, argv, 0, callback_in, callback_out, data, flags);
1904 }
1905
1906 static int pakfire_jail_exec_interactive(
1907 struct pakfire_jail* jail, const char* argv[], int flags) {
1908 int r;
1909
1910 // Setup interactive stuff
1911 r = pakfire_jail_setup_interactive_env(jail);
1912 if (r)
1913 return r;
1914
1915 return __pakfire_jail_exec(jail, argv, 1, NULL, NULL, NULL, flags);
1916 }
1917
1918 int pakfire_jail_exec_script(struct pakfire_jail* jail,
1919 const char* script,
1920 const size_t size,
1921 const char* args[],
1922 pakfire_jail_communicate_in callback_in,
1923 pakfire_jail_communicate_out callback_out,
1924 void* data) {
1925 char path[PATH_MAX];
1926 const char** argv = NULL;
1927 FILE* f = NULL;
1928 int r;
1929
1930 const char* root = pakfire_get_path(jail->pakfire);
1931
1932 // Write the scriptlet to disk
1933 r = pakfire_path_join(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
1934 if (r)
1935 goto ERROR;
1936
1937 // Create a temporary file
1938 f = pakfire_mktemp(path, 0700);
1939 if (!f) {
1940 ERROR(jail->pakfire, "Could not create temporary file: %m\n");
1941 goto ERROR;
1942 }
1943
1944 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
1945
1946 // Write data
1947 r = fprintf(f, "%s", script);
1948 if (r < 0) {
1949 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
1950 goto ERROR;
1951 }
1952
1953 // Close file
1954 r = fclose(f);
1955 if (r) {
1956 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
1957 goto ERROR;
1958 }
1959
1960 f = NULL;
1961
1962 // Count how many arguments were passed
1963 unsigned int argc = 1;
1964 if (args) {
1965 for (const char** arg = args; *arg; arg++)
1966 argc++;
1967 }
1968
1969 argv = calloc(argc + 1, sizeof(*argv));
1970 if (!argv) {
1971 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
1972 goto ERROR;
1973 }
1974
1975 // Set command
1976 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
1977
1978 // Copy args
1979 for (unsigned int i = 1; i < argc; i++)
1980 argv[i] = args[i-1];
1981
1982 // Run the script
1983 r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data, 0);
1984
1985 ERROR:
1986 if (argv)
1987 free(argv);
1988 if (f)
1989 fclose(f);
1990
1991 // Remove script from disk
1992 if (*path)
1993 unlink(path);
1994
1995 return r;
1996 }
1997
1998 /*
1999 A convenience function that creates a new jail, runs the given command and destroys
2000 the jail again.
2001 */
2002 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
2003 struct pakfire_jail* jail = NULL;
2004 int r;
2005
2006 // Create a new jail
2007 r = pakfire_jail_create(&jail, pakfire);
2008 if (r)
2009 goto ERROR;
2010
2011 // Execute the command
2012 r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output, 0);
2013
2014 ERROR:
2015 if (jail)
2016 pakfire_jail_unref(jail);
2017
2018 return r;
2019 }
2020
2021 int pakfire_jail_run_script(struct pakfire* pakfire,
2022 const char* script, const size_t length, const char* argv[], int flags) {
2023 struct pakfire_jail* jail = NULL;
2024 int r;
2025
2026 // Create a new jail
2027 r = pakfire_jail_create(&jail, pakfire);
2028 if (r)
2029 goto ERROR;
2030
2031 // Execute the command
2032 r = pakfire_jail_exec_script(jail, script, length, argv, NULL, NULL, NULL);
2033
2034 ERROR:
2035 if (jail)
2036 pakfire_jail_unref(jail);
2037
2038 return r;
2039 }
2040
2041 int pakfire_jail_shell(struct pakfire_jail* jail) {
2042 const char* argv[] = {
2043 "/bin/bash", "--login", NULL,
2044 };
2045
2046 // Execute /bin/bash
2047 return pakfire_jail_exec_interactive(jail, argv, 0);
2048 }
2049
2050 static int pakfire_jail_run_if_possible(struct pakfire* pakfire, const char** argv) {
2051 char path[PATH_MAX];
2052 int r;
2053
2054 r = pakfire_path(pakfire, path, "%s", *argv);
2055 if (r)
2056 return r;
2057
2058 // Check if the file is executable
2059 r = access(path, X_OK);
2060 if (r) {
2061 DEBUG(pakfire, "%s is not executable. Skipping...\n", *argv);
2062 return 0;
2063 }
2064
2065 return pakfire_jail_run(pakfire, argv, 0, NULL);
2066 }
2067
2068 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
2069 const char* argv[] = {
2070 "/sbin/ldconfig",
2071 NULL,
2072 };
2073
2074 return pakfire_jail_run_if_possible(pakfire, argv);
2075 }
2076
2077 int pakfire_jail_run_systemd_tmpfiles(struct pakfire* pakfire) {
2078 const char* argv[] = {
2079 "/usr/bin/systemd-tmpfiles",
2080 "--create",
2081 NULL,
2082 };
2083
2084 return pakfire_jail_run_if_possible(pakfire, argv);
2085 }