]> git.ipfire.org Git - people/ms/pakfire.git/blob - src/libpakfire/jail.c
jail: Allow accessing loop devices
[people/ms/pakfire.git] / src / libpakfire / jail.c
1 /*#############################################################################
2 # #
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
5 # #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
10 # #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
15 # #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
18 # #
19 #############################################################################*/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <linux/capability.h>
24 #include <linux/sched.h>
25 #include <sys/wait.h>
26 #include <linux/wait.h>
27 #include <sched.h>
28 #include <signal.h>
29 #include <stdlib.h>
30 #include <syscall.h>
31 #include <sys/capability.h>
32 #include <sys/epoll.h>
33 #include <sys/eventfd.h>
34 #include <sys/mount.h>
35 #include <sys/personality.h>
36 #include <sys/prctl.h>
37 #include <sys/resource.h>
38 #include <sys/timerfd.h>
39 #include <sys/types.h>
40 #include <sys/wait.h>
41
42 // libnl3
43 #include <net/if.h>
44 #include <netlink/route/link.h>
45
46 // libseccomp
47 #include <seccomp.h>
48
49 // libuuid
50 #include <uuid.h>
51
52 #include <pakfire/arch.h>
53 #include <pakfire/cgroup.h>
54 #include <pakfire/jail.h>
55 #include <pakfire/logging.h>
56 #include <pakfire/mount.h>
57 #include <pakfire/pakfire.h>
58 #include <pakfire/private.h>
59 #include <pakfire/pwd.h>
60 #include <pakfire/string.h>
61 #include <pakfire/util.h>
62
63 #define BUFFER_SIZE 1024 * 64
64 #define ENVIRON_SIZE 128
65 #define EPOLL_MAX_EVENTS 2
66 #define MAX_MOUNTPOINTS 8
67
68 // The default environment that will be set for every command
69 static const struct environ {
70 const char* key;
71 const char* val;
72 } ENV[] = {
73 { "HOME", "/root" },
74 { "LANG", "C.utf-8" },
75 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
76 { "TERM", "vt100" },
77
78 // Tell everything that it is running inside a Pakfire container
79 { "container", "pakfire" },
80 { NULL, NULL },
81 };
82
83 struct pakfire_jail_mountpoint {
84 char source[PATH_MAX];
85 char target[PATH_MAX];
86 int flags;
87 };
88
89 struct pakfire_jail {
90 struct pakfire* pakfire;
91 int nrefs;
92
93 // A unique ID for each jail
94 uuid_t uuid;
95 char __uuid[UUID_STR_LEN];
96
97 // Resource Limits
98 int nice;
99
100 // Timeout
101 struct itimerspec timeout;
102
103 // CGroup
104 struct pakfire_cgroup* cgroup;
105
106 // Environment
107 char* env[ENVIRON_SIZE];
108
109 // Mountpoints
110 struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
111 unsigned int num_mountpoints;
112 };
113
114 struct pakfire_log_buffer {
115 char data[BUFFER_SIZE];
116 size_t used;
117 };
118
119 struct pakfire_jail_exec {
120 int flags;
121
122 // PID (of the child)
123 pid_t pid;
124 int pidfd;
125
126 // Process status (from waitid)
127 siginfo_t status;
128
129 // FD to notify the client that the parent has finished initialization
130 int completed_fd;
131
132 // Log pipes
133 struct pakfire_jail_pipes {
134 int stdin[2];
135 int stdout[2];
136 int stderr[2];
137
138 // Logging
139 int log_INFO[2];
140 int log_ERROR[2];
141 int log_DEBUG[2];
142 } pipes;
143
144 // Communicate
145 struct pakfire_jail_communicate {
146 pakfire_jail_communicate_in in;
147 pakfire_jail_communicate_out out;
148 void* data;
149 } communicate;
150
151 // Log buffers
152 struct pakfire_jail_buffers {
153 struct pakfire_log_buffer stdout;
154 struct pakfire_log_buffer stderr;
155
156 // Logging
157 struct pakfire_log_buffer log_INFO;
158 struct pakfire_log_buffer log_ERROR;
159 struct pakfire_log_buffer log_DEBUG;
160 } buffers;
161
162 struct pakfire_cgroup* cgroup;
163 struct pakfire_cgroup_stats cgroup_stats;
164 };
165
166 static int clone3(struct clone_args* args, size_t size) {
167 return syscall(__NR_clone3, args, size);
168 }
169
170 static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
171 return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
172 }
173
174 static int pakfire_jail_exec_has_flag(
175 const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
176 return ctx->flags & flag;
177 }
178
179 static void pakfire_jail_free(struct pakfire_jail* jail) {
180 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
181
182 // Free environment
183 for (unsigned int i = 0; jail->env[i]; i++)
184 free(jail->env[i]);
185
186 if (jail->cgroup)
187 pakfire_cgroup_unref(jail->cgroup);
188
189 pakfire_unref(jail->pakfire);
190 free(jail);
191 }
192
193 /*
194 Passes any log messages on to the default pakfire log callback
195 */
196 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
197 int priority, const char* line, size_t length) {
198 switch (priority) {
199 case LOG_INFO:
200 INFO(pakfire, "%s", line);
201 break;
202
203 case LOG_ERR:
204 ERROR(pakfire, "%s", line);
205 break;
206
207 #ifdef ENABLE_DEBUG
208 case LOG_DEBUG:
209 DEBUG(pakfire, "%s", line);
210 break;
211 #endif
212 }
213
214 return 0;
215 }
216
217 static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
218 if (!*jail->__uuid)
219 uuid_unparse_lower(jail->uuid, jail->__uuid);
220
221 return jail->__uuid;
222 }
223
224 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
225 // Set PS1
226 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
227 if (r)
228 return r;
229
230 // Copy TERM
231 char* TERM = secure_getenv("TERM");
232 if (TERM) {
233 r = pakfire_jail_set_env(jail, "TERM", TERM);
234 if (r)
235 return r;
236 }
237
238 // Copy LANG
239 char* LANG = secure_getenv("LANG");
240 if (LANG) {
241 r = pakfire_jail_set_env(jail, "LANG", LANG);
242 if (r)
243 return r;
244 }
245
246 return 0;
247 }
248
249 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail, struct pakfire* pakfire) {
250 int r;
251
252 const char* arch = pakfire_get_arch(pakfire);
253
254 // Allocate a new jail
255 struct pakfire_jail* j = calloc(1, sizeof(*j));
256 if (!j)
257 return 1;
258
259 // Reference Pakfire
260 j->pakfire = pakfire_ref(pakfire);
261
262 // Initialize reference counter
263 j->nrefs = 1;
264
265 // Generate a random UUID
266 uuid_generate_random(j->uuid);
267
268 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
269
270 // Set default environment
271 for (const struct environ* e = ENV; e->key; e++) {
272 r = pakfire_jail_set_env(j, e->key, e->val);
273 if (r)
274 goto ERROR;
275 }
276
277 // Enable all CPU features that CPU has to offer
278 if (!pakfire_arch_supported_by_host(arch)) {
279 r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
280 if (r)
281 goto ERROR;
282 }
283
284 // Set container UUID
285 r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
286 if (r)
287 goto ERROR;
288
289 // Disable systemctl to talk to systemd
290 if (!pakfire_on_root(j->pakfire)) {
291 r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
292 if (r)
293 goto ERROR;
294 }
295
296 // Done
297 *jail = j;
298 return 0;
299
300 ERROR:
301 pakfire_jail_free(j);
302
303 return r;
304 }
305
306 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
307 ++jail->nrefs;
308
309 return jail;
310 }
311
312 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
313 if (--jail->nrefs > 0)
314 return jail;
315
316 pakfire_jail_free(jail);
317 return NULL;
318 }
319
320 // Resource Limits
321
322 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
323 // Check if nice level is in range
324 if (nice < -19 || nice > 20) {
325 errno = EINVAL;
326 return 1;
327 }
328
329 // Store nice level
330 jail->nice = nice;
331
332 return 0;
333 }
334
335 int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
336 // Free any previous cgroup
337 if (jail->cgroup) {
338 pakfire_cgroup_unref(jail->cgroup);
339 jail->cgroup = NULL;
340 }
341
342 // Set any new cgroup
343 if (cgroup) {
344 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
345
346 jail->cgroup = pakfire_cgroup_ref(cgroup);
347 }
348
349 // Done
350 return 0;
351 }
352
353 // Environment
354
355 // Returns the length of the environment
356 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
357 unsigned int i = 0;
358
359 // Count everything in the environment
360 for (char** e = jail->env; *e; e++)
361 i++;
362
363 return i;
364 }
365
366 // Finds an existing environment variable and returns its index or -1 if not found
367 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
368 if (!key) {
369 errno = EINVAL;
370 return -1;
371 }
372
373 char buffer[strlen(key) + 2];
374 pakfire_string_format(buffer, "%s=", key);
375
376 for (unsigned int i = 0; jail->env[i]; i++) {
377 if (pakfire_string_startswith(jail->env[i], buffer))
378 return i;
379 }
380
381 // Nothing found
382 return -1;
383 }
384
385 // Returns the value of an environment variable or NULL
386 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
387 const char* key) {
388 int i = pakfire_jail_find_env(jail, key);
389 if (i < 0)
390 return NULL;
391
392 return jail->env[i] + strlen(key) + 1;
393 }
394
395 // Sets an environment variable
396 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
397 const char* key, const char* value) {
398 // Find the index where to write this value to
399 int i = pakfire_jail_find_env(jail, key);
400 if (i < 0)
401 i = pakfire_jail_env_length(jail);
402
403 // Return -ENOSPC when the environment is full
404 if (i >= ENVIRON_SIZE) {
405 errno = ENOSPC;
406 return -1;
407 }
408
409 // Free any previous value
410 if (jail->env[i])
411 free(jail->env[i]);
412
413 // Format and set environment variable
414 asprintf(&jail->env[i], "%s=%s", key, value);
415
416 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
417
418 return 0;
419 }
420
421 // Imports an environment
422 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
423 if (!env)
424 return 0;
425
426 char* key;
427 char* val;
428 int r;
429
430 // Copy environment variables
431 for (unsigned int i = 0; env[i]; i++) {
432 r = pakfire_string_partition(env[i], "=", &key, &val);
433 if (r)
434 continue;
435
436 // Set value
437 r = pakfire_jail_set_env(jail, key, val);
438
439 if (key)
440 free(key);
441 if (val)
442 free(val);
443
444 // Break on error
445 if (r)
446 return r;
447 }
448
449 return 0;
450 }
451
452 // Timeout
453
454 PAKFIRE_EXPORT int pakfire_jail_set_timeout(
455 struct pakfire_jail* jail, unsigned int timeout) {
456 // Store value
457 jail->timeout.it_value.tv_sec = timeout;
458
459 if (timeout > 0)
460 DEBUG(jail->pakfire, "Timeout set to %d second(s)\n", timeout);
461 else
462 DEBUG(jail->pakfire, "Timeout disabled\n");
463
464 return 0;
465 }
466
467 static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
468 int r;
469
470 // Nothing to do if no timeout has been set
471 if (!jail->timeout.it_value.tv_sec)
472 return -1;
473
474 // Create a new timer
475 const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
476 if (fd < 0) {
477 ERROR(jail->pakfire, "Could not create timer: %m\n");
478 goto ERROR;
479 }
480
481 // Arm timer
482 r = timerfd_settime(fd, 0, &jail->timeout, NULL);
483 if (r) {
484 ERROR(jail->pakfire, "Could not arm timer: %m\n");
485 goto ERROR;
486 }
487
488 return fd;
489
490 ERROR:
491 if (fd > 0)
492 close(fd);
493
494 return -1;
495 }
496
497 /*
498 This function replaces any logging in the child process.
499
500 All log messages will be sent to the parent process through their respective pipes.
501 */
502 static void pakfire_jail_log(void* data, int priority, const char* file,
503 int line, const char* fn, const char* format, va_list args) {
504 struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
505 int fd;
506
507 switch (priority) {
508 case LOG_INFO:
509 fd = pipes->log_INFO[1];
510 break;
511
512 case LOG_ERR:
513 fd = pipes->log_ERROR[1];
514 break;
515
516 #ifdef ENABLE_DEBUG
517 case LOG_DEBUG:
518 fd = pipes->log_DEBUG[1];
519 break;
520 #endif /* ENABLE_DEBUG */
521
522 // Ignore any messages of an unknown priority
523 default:
524 return;
525 }
526
527 // Send the log message
528 if (fd)
529 vdprintf(fd, format, args);
530 }
531
532 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
533 return (sizeof(buffer->data) == buffer->used);
534 }
535
536 /*
537 This function reads as much data as it can from the file descriptor.
538 If it finds a whole line in it, it will send it to the logger and repeat the process.
539 If not newline character is found, it will try to read more data until it finds one.
540 */
541 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
542 struct pakfire_jail_exec* ctx, int priority, int fd,
543 struct pakfire_log_buffer* buffer, pakfire_jail_communicate_out callback, void* data) {
544 char line[BUFFER_SIZE + 1];
545
546 // Fill up buffer from fd
547 if (buffer->used < sizeof(buffer->data)) {
548 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
549 sizeof(buffer->data) - buffer->used);
550
551 // Handle errors
552 if (bytes_read < 0) {
553 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
554 return -1;
555 }
556
557 // Update buffer size
558 buffer->used += bytes_read;
559 }
560
561 // See if we have any lines that we can write
562 while (buffer->used) {
563 // Search for the end of the first line
564 char* eol = memchr(buffer->data, '\n', buffer->used);
565
566 // No newline found
567 if (!eol) {
568 // If the buffer is full, we send the content to the logger and try again
569 // This should not happen in practise
570 if (pakfire_jail_log_buffer_is_full(buffer)) {
571 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
572
573 eol = buffer->data + sizeof(buffer->data) - 1;
574
575 // Otherwise we might have only read parts of the output
576 } else
577 break;
578 }
579
580 // Find the length of the string
581 size_t length = eol - buffer->data + 1;
582
583 // Copy the line into the buffer
584 memcpy(line, buffer->data, length);
585
586 // Terminate the string
587 line[length] = '\0';
588
589 // Log the line
590 if (callback) {
591 int r = callback(jail->pakfire, data, priority, line, length);
592 if (r) {
593 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
594 return r;
595 }
596 }
597
598 // Remove line from buffer
599 memmove(buffer->data, buffer->data + length, buffer->used - length);
600 buffer->used -= length;
601 }
602
603 return 0;
604 }
605
606 static int pakfire_jail_stream_stdin(struct pakfire_jail* jail,
607 struct pakfire_jail_exec* ctx, const int fd) {
608 int r;
609
610 // Nothing to do if there is no stdin callback set
611 if (!ctx->communicate.in) {
612 DEBUG(jail->pakfire, "Callback for standard input is not set\n");
613 return 0;
614 }
615
616 // Skip if the writing pipe has already been closed
617 if (!ctx->pipes.stdin[1])
618 return 0;
619
620 DEBUG(jail->pakfire, "Streaming standard input...\n");
621
622 // Calling the callback
623 r = ctx->communicate.in(jail->pakfire, ctx->communicate.data, fd);
624
625 DEBUG(jail->pakfire, "Standard input callback finished: %d\n", r);
626
627 // The callback signaled that it has written everything
628 if (r == EOF) {
629 DEBUG(jail->pakfire, "Closing standard input pipe\n");
630
631 // Close the file-descriptor
632 close(fd);
633
634 // Reset the file-descriptor so it won't be closed again later
635 ctx->pipes.stdin[1] = 0;
636
637 // Report success
638 r = 0;
639 }
640
641 return r;
642 }
643
644 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
645 int r = pipe2(*fds, flags);
646 if (r < 0) {
647 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
648 return 1;
649 }
650
651 return 0;
652 }
653
654 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
655 for (unsigned int i = 0; i < 2; i++)
656 if (fds[i])
657 close(fds[i]);
658 }
659
660 /*
661 This is a convenience function to fetch the reading end of a pipe and
662 closes the write end.
663 */
664 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail* jail, int (*fds)[2]) {
665 // Give the variables easier names to avoid confusion
666 int* fd_read = &(*fds)[0];
667 int* fd_write = &(*fds)[1];
668
669 // Close the write end of the pipe
670 if (*fd_write) {
671 close(*fd_write);
672 *fd_write = 0;
673 }
674
675 // Return the read end
676 return *fd_read;
677 }
678
679 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
680 // Give the variables easier names to avoid confusion
681 int* fd_read = &(*fds)[0];
682 int* fd_write = &(*fds)[1];
683
684 // Close the read end of the pipe
685 if (*fd_read) {
686 close(*fd_read);
687 *fd_read = 0;
688 }
689
690 // Return the write end
691 return *fd_write;
692 }
693
694 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
695 int epollfd = -1;
696 struct epoll_event ev;
697 struct epoll_event events[EPOLL_MAX_EVENTS];
698 char garbage[8];
699 int r = 0;
700
701 // Fetch file descriptors from context
702 const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
703 const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
704 const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
705 const int pidfd = ctx->pidfd;
706
707 // Timer
708 const int timerfd = pakfire_jail_create_timer(jail);
709
710 // Logging
711 const int log_INFO = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
712 const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
713 const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
714
715 // Make a list of all file descriptors we are interested in
716 int fds[] = {
717 stdin, stdout, stderr, pidfd, timerfd, log_INFO, log_ERROR, log_DEBUG,
718 };
719
720 // Setup epoll
721 epollfd = epoll_create1(0);
722 if (epollfd < 0) {
723 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
724 r = 1;
725 goto ERROR;
726 }
727
728 // Turn file descriptors into non-blocking mode and add them to epoll()
729 for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
730 int fd = fds[i];
731
732 // Skip fds which were not initialized
733 if (fd <= 0)
734 continue;
735
736 ev.events = EPOLLHUP;
737
738 if (fd == stdin)
739 ev.events |= EPOLLOUT;
740 else
741 ev.events |= EPOLLIN;
742
743 // Read flags
744 int flags = fcntl(fd, F_GETFL, 0);
745
746 // Set modified flags
747 if (fcntl(fd, F_SETFL, flags|O_NONBLOCK) < 0) {
748 ERROR(jail->pakfire,
749 "Could not set file descriptor %d into non-blocking mode: %m\n", fd);
750 r = 1;
751 goto ERROR;
752 }
753
754 ev.data.fd = fd;
755
756 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
757 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
758 r = 1;
759 goto ERROR;
760 }
761 }
762
763 int ended = 0;
764
765 // Loop for as long as the process is alive
766 while (!ended) {
767 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
768 if (num < 1) {
769 // Ignore if epoll_wait() has been interrupted
770 if (errno == EINTR)
771 continue;
772
773 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
774 r = 1;
775
776 goto ERROR;
777 }
778
779 for (int i = 0; i < num; i++) {
780 int e = events[i].events;
781 int fd = events[i].data.fd;
782
783 struct pakfire_log_buffer* buffer = NULL;
784 pakfire_jail_communicate_out callback = NULL;
785 void* data = NULL;
786 int priority;
787
788 // Check if there is any data to be read
789 if (e & EPOLLIN) {
790 // Handle any changes to the PIDFD
791 if (fd == pidfd) {
792 // Call waidid() and store the result
793 r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
794 if (r) {
795 ERROR(jail->pakfire, "waitid() failed: %m\n");
796 goto ERROR;
797 }
798
799 // Mark that we have ended so that we will process the remaining
800 // events from epoll() now, but won't restart the outer loop.
801 ended = 1;
802 continue;
803
804 // Handle timer events
805 } else if (fd == timerfd) {
806 DEBUG(jail->pakfire, "Timer event received\n");
807
808 // Disarm the timer
809 r = read(timerfd, garbage, sizeof(garbage));
810 if (r < 1) {
811 ERROR(jail->pakfire, "Could not disarm timer: %m\n");
812 r = 1;
813 goto ERROR;
814 }
815
816 // Terminate the process if it hasn't already ended
817 if (!ended) {
818 DEBUG(jail->pakfire, "Terminating process...\n");
819
820 // Send SIGTERM to the process
821 r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
822 if (r) {
823 ERROR(jail->pakfire, "Could not kill process: %m\n");
824 goto ERROR;
825 }
826 }
827
828 // There is nothing else to do
829 continue;
830
831 // Handle logging messages
832 } else if (fd == log_INFO) {
833 buffer = &ctx->buffers.log_INFO;
834 priority = LOG_INFO;
835
836 callback = pakfire_jail_default_log_callback;
837
838 } else if (fd == log_ERROR) {
839 buffer = &ctx->buffers.log_ERROR;
840 priority = LOG_ERR;
841
842 callback = pakfire_jail_default_log_callback;
843
844 } else if (fd == log_DEBUG) {
845 buffer = &ctx->buffers.log_DEBUG;
846 priority = LOG_DEBUG;
847
848 callback = pakfire_jail_default_log_callback;
849
850 // Handle anything from the log pipes
851 } else if (fd == stdout) {
852 buffer = &ctx->buffers.stdout;
853 priority = LOG_INFO;
854
855 callback = ctx->communicate.out;
856 data = ctx->communicate.data;
857
858 } else if (fd == stderr) {
859 buffer = &ctx->buffers.stderr;
860 priority = LOG_ERR;
861
862 callback = ctx->communicate.out;
863 data = ctx->communicate.data;
864
865 } else {
866 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
867 continue;
868 }
869
870 // Handle log event
871 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
872 if (r)
873 goto ERROR;
874 }
875
876 if (e & EPOLLOUT) {
877 // Handle standard input
878 if (fd == stdin) {
879 r = pakfire_jail_stream_stdin(jail, ctx, fd);
880 if (r) {
881 switch (errno) {
882 // Ignore if we filled up the buffer
883 case EAGAIN:
884 break;
885
886 default:
887 ERROR(jail->pakfire, "Could not write to stdin: %m\n");
888 goto ERROR;
889 }
890 }
891 }
892 }
893
894 // Check if any file descriptors have been closed
895 if (e & EPOLLHUP) {
896 // Remove the file descriptor
897 r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
898 if (r) {
899 ERROR(jail->pakfire, "Could not remove closed file-descriptor %d: %m\n", fd);
900 goto ERROR;
901 }
902 }
903 }
904 }
905
906 ERROR:
907 if (epollfd > 0)
908 close(epollfd);
909 if (timerfd > 0)
910 close(timerfd);
911
912 return r;
913 }
914
915 int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data,
916 int priority, const char* line, size_t length) {
917 char** output = (char**)data;
918 int r;
919
920 // Append everything from stdout to a buffer
921 if (output && priority == LOG_INFO) {
922 r = asprintf(output, "%s%s", (output && *output) ? *output : "", line);
923 if (r < 0)
924 return 1;
925 return 0;
926 }
927
928 // Send everything else to the default logger
929 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
930 }
931
932 // Capabilities
933
934 static int pakfire_jail_drop_capabilities(struct pakfire_jail* jail) {
935 const int capabilities[] = {
936 // Deny access to the kernel's audit system
937 CAP_AUDIT_CONTROL,
938 CAP_AUDIT_READ,
939 CAP_AUDIT_WRITE,
940
941 // Deny suspending block devices
942 CAP_BLOCK_SUSPEND,
943
944 // Deny any stuff with BPF
945 CAP_BPF,
946
947 // Deny checkpoint restore
948 CAP_CHECKPOINT_RESTORE,
949
950 // Deny opening files by inode number (open_by_handle_at)
951 CAP_DAC_READ_SEARCH,
952
953 // Deny setting SUID bits
954 CAP_FSETID,
955
956 // Deny locking more memory
957 CAP_IPC_LOCK,
958
959 // Deny modifying any Apparmor/SELinux/SMACK configuration
960 CAP_MAC_ADMIN,
961 CAP_MAC_OVERRIDE,
962
963 // Deny creating any special devices
964 CAP_MKNOD,
965
966 // Deny reading from syslog
967 CAP_SYSLOG,
968
969 // Deny any admin actions (mount, sethostname, ...)
970 CAP_SYS_ADMIN,
971
972 // Deny rebooting the system
973 CAP_SYS_BOOT,
974
975 // Deny loading kernel modules
976 CAP_SYS_MODULE,
977
978 // Deny setting nice level
979 CAP_SYS_NICE,
980
981 // Deny access to /proc/kcore, /dev/mem, /dev/kmem
982 CAP_SYS_RAWIO,
983
984 // Deny circumventing any resource limits
985 CAP_SYS_RESOURCE,
986
987 // Deny setting the system time
988 CAP_SYS_TIME,
989
990 // Deny playing with suspend
991 CAP_WAKE_ALARM,
992
993 0,
994 };
995
996 DEBUG(jail->pakfire, "Dropping capabilities...\n");
997
998 size_t num_caps = 0;
999 int r;
1000
1001 // Drop any capabilities
1002 for (const int* cap = capabilities; *cap; cap++) {
1003 r = prctl(PR_CAPBSET_DROP, *cap, 0, 0, 0);
1004 if (r) {
1005 ERROR(jail->pakfire, "Could not drop capability %d: %m\n", *cap);
1006 return r;
1007 }
1008
1009 num_caps++;
1010 }
1011
1012 // Fetch any capabilities
1013 cap_t caps = cap_get_proc();
1014 if (!caps) {
1015 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
1016 return 1;
1017 }
1018
1019 /*
1020 Set inheritable capabilities
1021
1022 This ensures that no processes will be able to gain any of the listed
1023 capabilities again.
1024 */
1025 r = cap_set_flag(caps, CAP_INHERITABLE, num_caps, capabilities, CAP_CLEAR);
1026 if (r) {
1027 ERROR(jail->pakfire, "cap_set_flag() failed: %m\n");
1028 goto ERROR;
1029 }
1030
1031 // Restore capabilities
1032 r = cap_set_proc(caps);
1033 if (r) {
1034 ERROR(jail->pakfire, "Could not restore capabilities: %m\n");
1035 goto ERROR;
1036 }
1037
1038 ERROR:
1039 if (caps)
1040 cap_free(caps);
1041
1042 return r;
1043 }
1044
1045 // Syscall Filter
1046
1047 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
1048 const int syscalls[] = {
1049 // The kernel's keyring isn't namespaced
1050 SCMP_SYS(keyctl),
1051 SCMP_SYS(add_key),
1052 SCMP_SYS(request_key),
1053
1054 // Disable userfaultfd
1055 SCMP_SYS(userfaultfd),
1056
1057 // Disable perf which could leak a lot of information about the host
1058 SCMP_SYS(perf_event_open),
1059
1060 0,
1061 };
1062 int r = 1;
1063
1064 DEBUG(jail->pakfire, "Applying syscall filter...\n");
1065
1066 // Setup a syscall filter which allows everything by default
1067 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
1068 if (!ctx) {
1069 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
1070 goto ERROR;
1071 }
1072
1073 // All all syscalls
1074 for (const int* syscall = syscalls; *syscall; syscall++) {
1075 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
1076 if (r) {
1077 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
1078 goto ERROR;
1079 }
1080 }
1081
1082 // Load syscall filter into the kernel
1083 r = seccomp_load(ctx);
1084 if (r) {
1085 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
1086 goto ERROR;
1087 }
1088
1089 ERROR:
1090 if (ctx)
1091 seccomp_release(ctx);
1092
1093 return r;
1094 }
1095
1096 // Mountpoints
1097
1098 PAKFIRE_EXPORT int pakfire_jail_bind(struct pakfire_jail* jail,
1099 const char* source, const char* target, int flags) {
1100 struct pakfire_jail_mountpoint* mp = NULL;
1101 int r;
1102
1103 // Check if there is any space left
1104 if (jail->num_mountpoints >= MAX_MOUNTPOINTS) {
1105 errno = ENOSPC;
1106 return 1;
1107 }
1108
1109 // Check for valid inputs
1110 if (!source || !target) {
1111 errno = EINVAL;
1112 return 1;
1113 }
1114
1115 // Select the next free slot
1116 mp = &jail->mountpoints[jail->num_mountpoints];
1117
1118 // Copy source
1119 r = pakfire_string_set(mp->source, source);
1120 if (r) {
1121 ERROR(jail->pakfire, "Could not copy source: %m\n");
1122 return r;
1123 }
1124
1125 // Copy target
1126 r = pakfire_string_set(mp->target, target);
1127 if (r) {
1128 ERROR(jail->pakfire, "Could not copy target: %m\n");
1129 return r;
1130 }
1131
1132 // Copy flags
1133 mp->flags = flags;
1134
1135 // Increment counter
1136 jail->num_mountpoints++;
1137
1138 return 0;
1139 }
1140
1141 static int pakfire_jail_mount_networking(struct pakfire_jail* jail) {
1142 int r;
1143
1144 const char* paths[] = {
1145 "/etc/hosts",
1146 "/etc/resolv.conf",
1147 NULL,
1148 };
1149
1150 // Bind-mount all paths read-only
1151 for (const char** path = paths; *path; path++) {
1152 r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
1153 if (r)
1154 return r;
1155 }
1156
1157 return 0;
1158 }
1159
1160 /*
1161 Mounts everything that we require in the new namespace
1162 */
1163 static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1164 struct pakfire_jail_mountpoint* mp = NULL;
1165 int flags = 0;
1166 int r;
1167
1168 // Enable loop devices
1169 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_LOOP_DEVICES))
1170 flags |= PAKFIRE_MOUNT_LOOP_DEVICES;
1171
1172 // Mount all default stuff
1173 r = pakfire_mount_all(jail->pakfire, flags);
1174 if (r)
1175 return r;
1176
1177 // Mount networking stuff
1178 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1179 r = pakfire_jail_mount_networking(jail);
1180 if (r)
1181 return r;
1182 }
1183
1184 // Mount all custom stuff
1185 for (unsigned int i = 0; i < jail->num_mountpoints; i++) {
1186 // Fetch mountpoint
1187 mp = &jail->mountpoints[i];
1188
1189 // Mount it
1190 r = pakfire_bind(jail->pakfire, mp->source, mp->target, mp->flags);
1191 if (r)
1192 return r;
1193 }
1194
1195 // Log all mountpoints
1196 pakfire_mount_list(jail->pakfire);
1197
1198 return 0;
1199 }
1200
1201 // Networking
1202
1203 static int pakfire_jail_setup_loopback(struct pakfire_jail* jail) {
1204 struct nl_sock* nl = NULL;
1205 struct nl_cache* cache = NULL;
1206 struct rtnl_link* link = NULL;
1207 struct rtnl_link* change = NULL;
1208 int r;
1209
1210 DEBUG(jail->pakfire, "Setting up loopback...\n");
1211
1212 // Allocate a netlink socket
1213 nl = nl_socket_alloc();
1214 if (!nl) {
1215 ERROR(jail->pakfire, "Could not allocate a netlink socket: %m\n");
1216 r = 1;
1217 goto ERROR;
1218 }
1219
1220 // Connect the socket
1221 r = nl_connect(nl, NETLINK_ROUTE);
1222 if (r) {
1223 ERROR(jail->pakfire, "Could not connect netlink socket: %s\n", nl_geterror(r));
1224 goto ERROR;
1225 }
1226
1227 // Allocate the netlink cache
1228 r = rtnl_link_alloc_cache(nl, AF_UNSPEC, &cache);
1229 if (r < 0) {
1230 ERROR(jail->pakfire, "Unable to allocate netlink cache: %s\n", nl_geterror(r));
1231 goto ERROR;
1232 }
1233
1234 // Fetch loopback interface
1235 link = rtnl_link_get_by_name(cache, "lo");
1236 if (!link) {
1237 ERROR(jail->pakfire, "Could not find lo interface. Ignoring.\n");
1238 r = 0;
1239 goto ERROR;
1240 }
1241
1242 // Allocate a new link
1243 change = rtnl_link_alloc();
1244 if (!change) {
1245 ERROR(jail->pakfire, "Could not allocate change link\n");
1246 r = 1;
1247 goto ERROR;
1248 }
1249
1250 // Set the link to UP
1251 rtnl_link_set_flags(change, IFF_UP);
1252
1253 // Apply any changes
1254 r = rtnl_link_change(nl, link, change, 0);
1255 if (r) {
1256 ERROR(jail->pakfire, "Unable to activate loopback: %s\n", nl_geterror(r));
1257 goto ERROR;
1258 }
1259
1260 // Success
1261 r = 0;
1262
1263 ERROR:
1264 if (nl)
1265 nl_socket_free(nl);
1266
1267 return r;
1268 }
1269
1270 // UID/GID Mapping
1271
1272 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
1273 char path[PATH_MAX];
1274 int r;
1275
1276 // Skip mapping anything when running on /
1277 if (pakfire_on_root(jail->pakfire))
1278 return 0;
1279
1280 // Make path
1281 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
1282 if (r)
1283 return r;
1284
1285 // Fetch UID
1286 const uid_t uid = pakfire_uid(jail->pakfire);
1287
1288 // Fetch SUBUID
1289 const struct pakfire_subid* subuid = pakfire_subuid(jail->pakfire);
1290 if (!subuid)
1291 return 1;
1292
1293 /* When running as root, we will map the entire range.
1294
1295 When running as a non-privileged user, we will map the root user inside the jail
1296 to the user's UID outside of the jail, and we will map the rest starting from one.
1297 */
1298
1299 // Running as root
1300 if (uid == 0) {
1301 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1302 "0 %lu %lu\n", subuid->id, subuid->length);
1303 } else {
1304 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1305 "0 %lu 1\n1 %lu %lu\n", uid, subuid->id, subuid->length);
1306 }
1307
1308 if (r) {
1309 ERROR(jail->pakfire, "Could not map UIDs: %m\n");
1310 return r;
1311 }
1312
1313 return r;
1314 }
1315
1316 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
1317 char path[PATH_MAX];
1318 int r;
1319
1320 // Skip mapping anything when running on /
1321 if (pakfire_on_root(jail->pakfire))
1322 return 0;
1323
1324 // Fetch GID
1325 const gid_t gid = pakfire_gid(jail->pakfire);
1326
1327 // Fetch SUBGID
1328 const struct pakfire_subid* subgid = pakfire_subgid(jail->pakfire);
1329 if (!subgid)
1330 return 1;
1331
1332 // Make path
1333 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
1334 if (r)
1335 return r;
1336
1337 // Running as root
1338 if (gid == 0) {
1339 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1340 "0 %lu %lu\n", subgid->id, subgid->length);
1341 } else {
1342 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1343 "0 %lu 1\n%1 %lu %lu\n", gid, subgid->id, subgid->length);
1344 }
1345
1346 if (r) {
1347 ERROR(jail->pakfire, "Could not map GIDs: %m\n");
1348 return r;
1349 }
1350
1351 return r;
1352 }
1353
1354 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
1355 char path[PATH_MAX];
1356 int r = 1;
1357
1358 // Make path
1359 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
1360 if (r)
1361 return r;
1362
1363 // Open file for writing
1364 FILE* f = fopen(path, "w");
1365 if (!f) {
1366 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
1367 goto ERROR;
1368 }
1369
1370 // Write content
1371 int bytes_written = fprintf(f, "deny\n");
1372 if (bytes_written <= 0) {
1373 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
1374 goto ERROR;
1375 }
1376
1377 r = fclose(f);
1378 f = NULL;
1379 if (r) {
1380 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
1381 goto ERROR;
1382 }
1383
1384 ERROR:
1385 if (f)
1386 fclose(f);
1387
1388 return r;
1389 }
1390
1391 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1392 const uint64_t val = 1;
1393 int r = 0;
1394
1395 DEBUG(jail->pakfire, "Sending signal...\n");
1396
1397 // Write to the file descriptor
1398 ssize_t bytes_written = write(fd, &val, sizeof(val));
1399 if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
1400 ERROR(jail->pakfire, "Could not send signal: %m\n");
1401 r = 1;
1402 }
1403
1404 // Close the file descriptor
1405 close(fd);
1406
1407 return r;
1408 }
1409
1410 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1411 uint64_t val = 0;
1412 int r = 0;
1413
1414 DEBUG(jail->pakfire, "Waiting for signal...\n");
1415
1416 ssize_t bytes_read = read(fd, &val, sizeof(val));
1417 if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
1418 ERROR(jail->pakfire, "Error waiting for signal: %m\n");
1419 r = 1;
1420 }
1421
1422 // Close the file descriptor
1423 close(fd);
1424
1425 return r;
1426 }
1427
1428 /*
1429 Performs the initialisation that needs to happen in the parent part
1430 */
1431 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1432 int r;
1433
1434 // Setup UID mapping
1435 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
1436 if (r)
1437 return r;
1438
1439 // Write "deny" to /proc/PID/setgroups
1440 r = pakfire_jail_setgroups(jail, ctx->pid);
1441 if (r)
1442 return r;
1443
1444 // Setup GID mapping
1445 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
1446 if (r)
1447 return r;
1448
1449 // Parent has finished initialisation
1450 DEBUG(jail->pakfire, "Parent has finished initialization\n");
1451
1452 // Send signal to client
1453 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1454 if (r)
1455 return r;
1456
1457 return 0;
1458 }
1459
1460 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
1461 const char* argv[]) {
1462 int r;
1463
1464 // Redirect any logging to our log pipe
1465 pakfire_set_log_callback(jail->pakfire, pakfire_jail_log, &ctx->pipes);
1466
1467 // Die with parent
1468 r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
1469 if (r) {
1470 ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
1471 return 126;
1472 }
1473
1474 // Fetch my own PID
1475 pid_t pid = getpid();
1476
1477 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
1478
1479 // Wait for the parent to finish initialization
1480 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1481 if (r)
1482 return r;
1483
1484 // Perform further initialization
1485
1486 // Fetch UID/GID
1487 uid_t uid = getuid();
1488 gid_t gid = getgid();
1489
1490 // Fetch EUID/EGID
1491 uid_t euid = geteuid();
1492 gid_t egid = getegid();
1493
1494 DEBUG(jail->pakfire, " UID: %d (effective %d)\n", uid, euid);
1495 DEBUG(jail->pakfire, " GID: %d (effective %d)\n", gid, egid);
1496
1497 // Check if we are (effectively running as root)
1498 if (uid || gid || euid || egid) {
1499 ERROR(jail->pakfire, "Child process is not running as root\n");
1500 return 126;
1501 }
1502
1503 const char* root = pakfire_get_path(jail->pakfire);
1504 const char* arch = pakfire_get_arch(jail->pakfire);
1505
1506 // Change root (unless root is /)
1507 if (!pakfire_on_root(jail->pakfire)) {
1508 // Mount everything
1509 r = pakfire_jail_mount(jail, ctx);
1510 if (r)
1511 return r;
1512
1513 // Call chroot()
1514 r = chroot(root);
1515 if (r) {
1516 ERROR(jail->pakfire, "chroot() to %s failed: %m\n", root);
1517 return 1;
1518 }
1519
1520 // Change directory to /
1521 r = chdir("/");
1522 if (r) {
1523 ERROR(jail->pakfire, "chdir() after chroot() failed: %m\n");
1524 return 1;
1525 }
1526 }
1527
1528 // Set personality
1529 unsigned long persona = pakfire_arch_personality(arch);
1530 if (persona) {
1531 r = personality(persona);
1532 if (r < 0) {
1533 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1534 return 1;
1535 }
1536 }
1537
1538 // Setup networking
1539 if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1540 r = pakfire_jail_setup_loopback(jail);
1541 if (r)
1542 return 1;
1543 }
1544
1545 // Set nice level
1546 if (jail->nice) {
1547 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1548
1549 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1550 if (r) {
1551 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1552 return 1;
1553 }
1554 }
1555
1556 // Close other end of log pipes
1557 close(ctx->pipes.log_INFO[0]);
1558 close(ctx->pipes.log_ERROR[0]);
1559 #ifdef ENABLE_DEBUG
1560 close(ctx->pipes.log_DEBUG[0]);
1561 #endif /* ENABLE_DEBUG */
1562
1563 // Connect standard input
1564 if (ctx->pipes.stdin[0]) {
1565 r = dup2(ctx->pipes.stdin[0], STDIN_FILENO);
1566 if (r < 0) {
1567 ERROR(jail->pakfire, "Could not connect fd %d to stdin: %m\n",
1568 ctx->pipes.stdin[0]);
1569
1570 return 1;
1571 }
1572 }
1573
1574 // Connect standard output and error
1575 if (ctx->pipes.stdout[1] && ctx->pipes.stderr[1]) {
1576 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1577 if (r < 0) {
1578 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1579 ctx->pipes.stdout[1]);
1580
1581 return 1;
1582 }
1583
1584 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1585 if (r < 0) {
1586 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1587 ctx->pipes.stderr[1]);
1588
1589 return 1;
1590 }
1591
1592 // Close the pipe (as we have moved the original file descriptors)
1593 pakfire_jail_close_pipe(jail, ctx->pipes.stdin);
1594 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1595 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
1596 }
1597
1598 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1599 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1600 if (r)
1601 return r;
1602
1603 // Drop capabilities
1604 r = pakfire_jail_drop_capabilities(jail);
1605 if (r)
1606 return r;
1607
1608 // Filter syscalls
1609 r = pakfire_jail_limit_syscalls(jail);
1610 if (r)
1611 return r;
1612
1613 DEBUG(jail->pakfire, "Child process initialization done\n");
1614 DEBUG(jail->pakfire, "Launching command:\n");
1615
1616 // Log argv
1617 for (unsigned int i = 0; argv[i]; i++)
1618 DEBUG(jail->pakfire, " argv[%d] = %s\n", i, argv[i]);
1619
1620 // exec() command
1621 r = execvpe(argv[0], (char**)argv, jail->env);
1622 if (r < 0) {
1623 // Translate errno into regular exit code
1624 switch (errno) {
1625 case ENOENT:
1626 // Ignore if the command doesn't exist
1627 if (ctx->flags & PAKFIRE_JAIL_NOENT_OK)
1628 r = 0;
1629 else
1630 r = 127;
1631
1632 break;
1633
1634 default:
1635 r = 1;
1636 }
1637
1638 ERROR(jail->pakfire, "Could not execve(%s): %m\n", argv[0]);
1639 }
1640
1641 // We should not get here
1642 return r;
1643 }
1644
1645 // Run a command in the jail
1646 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
1647 const int interactive,
1648 pakfire_jail_communicate_in communicate_in,
1649 pakfire_jail_communicate_out communicate_out,
1650 void* data, int flags) {
1651 int exit = -1;
1652 int r;
1653
1654 // Check if argv is valid
1655 if (!argv || !argv[0]) {
1656 errno = EINVAL;
1657 return -1;
1658 }
1659
1660 // Send any output to the default logger if no callback is set
1661 if (!communicate_out)
1662 communicate_out = pakfire_jail_default_log_callback;
1663
1664 // Initialize context for this call
1665 struct pakfire_jail_exec ctx = {
1666 .flags = flags,
1667
1668 .pipes = {
1669 .stdin = { 0, 0 },
1670 .stdout = { 0, 0 },
1671 .stderr = { 0, 0 },
1672 },
1673
1674 .communicate = {
1675 .in = communicate_in,
1676 .out = communicate_out,
1677 .data = data,
1678 },
1679 };
1680
1681 DEBUG(jail->pakfire, "Executing jail...\n");
1682
1683 // Enable networking in interactive mode
1684 if (interactive)
1685 ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
1686
1687 /*
1688 Setup a file descriptor which can be used to notify the client that the parent
1689 has completed configuration.
1690 */
1691 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1692 if (ctx.completed_fd < 0) {
1693 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1694 return -1;
1695 }
1696
1697 // Create pipes to communicate with child process if we are not running interactively
1698 if (!interactive) {
1699 // stdin (only if callback is set)
1700 if (ctx.communicate.in) {
1701 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdin, 0);
1702 if (r)
1703 goto ERROR;
1704 }
1705
1706 // stdout
1707 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
1708 if (r)
1709 goto ERROR;
1710
1711 // stderr
1712 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
1713 if (r)
1714 goto ERROR;
1715 }
1716
1717 // Setup pipes for logging
1718 // INFO
1719 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
1720 if (r)
1721 goto ERROR;
1722
1723 // ERROR
1724 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
1725 if (r)
1726 goto ERROR;
1727
1728 #ifdef ENABLE_DEBUG
1729 // DEBUG
1730 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
1731 if (r)
1732 goto ERROR;
1733 #endif /* ENABLE_DEBUG */
1734
1735 // Configure child process
1736 struct clone_args args = {
1737 .flags =
1738 CLONE_NEWCGROUP |
1739 CLONE_NEWIPC |
1740 CLONE_NEWNS |
1741 CLONE_NEWPID |
1742 CLONE_NEWUSER |
1743 CLONE_NEWUTS |
1744 CLONE_PIDFD,
1745 .exit_signal = SIGCHLD,
1746 .pidfd = (long long unsigned int)&ctx.pidfd,
1747 };
1748
1749 // Launch the process in a cgroup that is a leaf of the configured cgroup
1750 if (jail->cgroup) {
1751 args.flags |= CLONE_INTO_CGROUP;
1752
1753 // Fetch our UUID
1754 const char* uuid = pakfire_jail_uuid(jail);
1755
1756 // Create a temporary cgroup
1757 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, uuid, 0);
1758 if (r) {
1759 ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
1760 goto ERROR;
1761 }
1762
1763 // Clone into this cgroup
1764 args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
1765 }
1766
1767 // Setup networking
1768 if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1769 args.flags |= CLONE_NEWNET;
1770 }
1771
1772 // Fork this process
1773 ctx.pid = clone3(&args, sizeof(args));
1774 if (ctx.pid < 0) {
1775 ERROR(jail->pakfire, "Could not clone: %m\n");
1776 return -1;
1777
1778 // Child process
1779 } else if (ctx.pid == 0) {
1780 r = pakfire_jail_child(jail, &ctx, argv);
1781 _exit(r);
1782 }
1783
1784 // Parent process
1785 r = pakfire_jail_parent(jail, &ctx);
1786 if (r)
1787 goto ERROR;
1788
1789 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
1790
1791 // Read output of the child process
1792 r = pakfire_jail_wait(jail, &ctx);
1793 if (r)
1794 goto ERROR;
1795
1796 // Handle exit status
1797 switch (ctx.status.si_code) {
1798 case CLD_EXITED:
1799 DEBUG(jail->pakfire, "The child process exited with code %d\n",
1800 ctx.status.si_status);
1801
1802 // Pass exit code
1803 exit = ctx.status.si_status;
1804 break;
1805
1806 case CLD_KILLED:
1807 ERROR(jail->pakfire, "The child process was killed\n");
1808 exit = 139;
1809 break;
1810
1811 case CLD_DUMPED:
1812 ERROR(jail->pakfire, "The child process terminated abnormally\n");
1813 break;
1814
1815 // Log anything else
1816 default:
1817 ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
1818 break;
1819 }
1820
1821 ERROR:
1822 // Destroy the temporary cgroup (if any)
1823 if (ctx.cgroup) {
1824 // Read cgroup stats
1825 r = pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
1826 if (r) {
1827 ERROR(jail->pakfire, "Could not read cgroup stats: %m\n");
1828 } else {
1829 pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
1830 }
1831
1832 pakfire_cgroup_destroy(ctx.cgroup);
1833 pakfire_cgroup_unref(ctx.cgroup);
1834 }
1835
1836 // Close any file descriptors
1837 pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
1838 pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
1839 pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
1840 if (ctx.pidfd)
1841 close(ctx.pidfd);
1842 pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
1843 pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
1844 pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
1845
1846 return exit;
1847 }
1848
1849 PAKFIRE_EXPORT int pakfire_jail_exec(
1850 struct pakfire_jail* jail,
1851 const char* argv[],
1852 pakfire_jail_communicate_in callback_in,
1853 pakfire_jail_communicate_out callback_out,
1854 void* data, int flags) {
1855 return __pakfire_jail_exec(jail, argv, 0, callback_in, callback_out, data, flags);
1856 }
1857
1858 static int pakfire_jail_exec_interactive(
1859 struct pakfire_jail* jail, const char* argv[], int flags) {
1860 int r;
1861
1862 // Setup interactive stuff
1863 r = pakfire_jail_setup_interactive_env(jail);
1864 if (r)
1865 return r;
1866
1867 return __pakfire_jail_exec(jail, argv, 1, NULL, NULL, NULL, flags);
1868 }
1869
1870 int pakfire_jail_exec_script(struct pakfire_jail* jail,
1871 const char* script,
1872 const size_t size,
1873 const char* args[],
1874 pakfire_jail_communicate_in callback_in,
1875 pakfire_jail_communicate_out callback_out,
1876 void* data) {
1877 char path[PATH_MAX];
1878 const char** argv = NULL;
1879 FILE* f = NULL;
1880 int r;
1881
1882 const char* root = pakfire_get_path(jail->pakfire);
1883
1884 // Write the scriptlet to disk
1885 r = pakfire_path_join(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
1886 if (r)
1887 goto ERROR;
1888
1889 // Create a temporary file
1890 f = pakfire_mktemp(path, 0700);
1891 if (!f) {
1892 ERROR(jail->pakfire, "Could not create temporary file: %m\n");
1893 goto ERROR;
1894 }
1895
1896 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
1897
1898 // Write data
1899 r = fprintf(f, "%s", script);
1900 if (r < 0) {
1901 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
1902 goto ERROR;
1903 }
1904
1905 // Close file
1906 r = fclose(f);
1907 if (r) {
1908 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
1909 goto ERROR;
1910 }
1911
1912 f = NULL;
1913
1914 // Count how many arguments were passed
1915 unsigned int argc = 1;
1916 if (args) {
1917 for (const char** arg = args; *arg; arg++)
1918 argc++;
1919 }
1920
1921 argv = calloc(argc + 1, sizeof(*argv));
1922 if (!argv) {
1923 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
1924 goto ERROR;
1925 }
1926
1927 // Set command
1928 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
1929
1930 // Copy args
1931 for (unsigned int i = 1; i < argc; i++)
1932 argv[i] = args[i-1];
1933
1934 // Run the script
1935 r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data, 0);
1936
1937 ERROR:
1938 if (argv)
1939 free(argv);
1940 if (f)
1941 fclose(f);
1942
1943 // Remove script from disk
1944 if (*path)
1945 unlink(path);
1946
1947 return r;
1948 }
1949
1950 /*
1951 A convenience function that creates a new jail, runs the given command and destroys
1952 the jail again.
1953 */
1954 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
1955 struct pakfire_jail* jail = NULL;
1956 int r;
1957
1958 // Create a new jail
1959 r = pakfire_jail_create(&jail, pakfire);
1960 if (r)
1961 goto ERROR;
1962
1963 // Execute the command
1964 r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output, 0);
1965
1966 ERROR:
1967 if (jail)
1968 pakfire_jail_unref(jail);
1969
1970 return r;
1971 }
1972
1973 int pakfire_jail_run_script(struct pakfire* pakfire,
1974 const char* script, const size_t length, const char* argv[], int flags) {
1975 struct pakfire_jail* jail = NULL;
1976 int r;
1977
1978 // Create a new jail
1979 r = pakfire_jail_create(&jail, pakfire);
1980 if (r)
1981 goto ERROR;
1982
1983 // Execute the command
1984 r = pakfire_jail_exec_script(jail, script, length, argv, NULL, NULL, NULL);
1985
1986 ERROR:
1987 if (jail)
1988 pakfire_jail_unref(jail);
1989
1990 return r;
1991 }
1992
1993 int pakfire_jail_shell(struct pakfire_jail* jail) {
1994 const char* argv[] = {
1995 "/bin/bash", "--login", NULL,
1996 };
1997
1998 // Execute /bin/bash
1999 return pakfire_jail_exec_interactive(jail, argv, 0);
2000 }
2001
2002 static int pakfire_jail_run_if_possible(struct pakfire* pakfire, const char** argv) {
2003 char path[PATH_MAX];
2004 int r;
2005
2006 r = pakfire_path(pakfire, path, "%s", *argv);
2007 if (r)
2008 return r;
2009
2010 // Check if the file is executable
2011 r = access(path, X_OK);
2012 if (r) {
2013 DEBUG(pakfire, "%s is not executable. Skipping...\n", *argv);
2014 return 0;
2015 }
2016
2017 return pakfire_jail_run(pakfire, argv, 0, NULL);
2018 }
2019
2020 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
2021 const char* argv[] = {
2022 "/sbin/ldconfig",
2023 NULL,
2024 };
2025
2026 return pakfire_jail_run_if_possible(pakfire, argv);
2027 }
2028
2029 int pakfire_jail_run_systemd_tmpfiles(struct pakfire* pakfire) {
2030 const char* argv[] = {
2031 "/usr/bin/systemd-tmpfiles",
2032 "--create",
2033 NULL,
2034 };
2035
2036 return pakfire_jail_run_if_possible(pakfire, argv);
2037 }