]> git.ipfire.org Git - people/ms/pakfire.git/blob - src/libpakfire/jail.c
cgroup: Add controllers
[people/ms/pakfire.git] / src / libpakfire / jail.c
1 /*#############################################################################
2 # #
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
5 # #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
10 # #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
15 # #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
18 # #
19 #############################################################################*/
20
21 #include <errno.h>
22 #include <linux/capability.h>
23 #include <linux/fcntl.h>
24 #include <linux/sched.h>
25 #include <linux/wait.h>
26 #include <sched.h>
27 #include <signal.h>
28 #include <stdlib.h>
29 #include <syscall.h>
30 #include <sys/capability.h>
31 #include <sys/epoll.h>
32 #include <sys/eventfd.h>
33 #include <sys/personality.h>
34 #include <sys/prctl.h>
35 #include <sys/resource.h>
36 #include <sys/types.h>
37 #include <sys/wait.h>
38
39 // libseccomp
40 #include <seccomp.h>
41
42 #include <pakfire/arch.h>
43 #include <pakfire/cgroup.h>
44 #include <pakfire/jail.h>
45 #include <pakfire/logging.h>
46 #include <pakfire/mount.h>
47 #include <pakfire/pakfire.h>
48 #include <pakfire/private.h>
49 #include <pakfire/util.h>
50
51 #define BUFFER_SIZE 1024 * 64
52 #define ENVIRON_SIZE 128
53 #define EPOLL_MAX_EVENTS 2
54
55 // The default environment that will be set for every command
56 static const struct environ {
57 const char* key;
58 const char* val;
59 } ENV[] = {
60 { "LANG", "en_US.utf-8" },
61 { "TERM", "vt100" },
62 { NULL, NULL },
63 };
64
65 struct pakfire_jail {
66 struct pakfire* pakfire;
67 int nrefs;
68
69 // Flags
70 int flags;
71
72 // Resource Limits
73 int nice;
74
75 // Environment
76 char* env[ENVIRON_SIZE];
77
78 // Logging
79 pakfire_jail_log_callback log_callback;
80 void* log_data;
81 };
82
83 struct pakfire_log_buffer {
84 char data[BUFFER_SIZE];
85 size_t used;
86 };
87
88 struct pakfire_jail_exec {
89 // PID (of the child)
90 pid_t pid;
91 int pidfd;
92
93 // Process status (from waitid)
94 siginfo_t status;
95
96 // FD to notify the client that the parent has finished initialization
97 int completed_fd;
98
99 // Log pipes
100 struct pakfire_jail_pipes {
101 int stdout[2];
102 int stderr[2];
103
104 // Logging
105 int log_INFO[2];
106 int log_ERROR[2];
107 int log_DEBUG[2];
108 } pipes;
109
110 // Log buffers
111 struct pakfire_jail_buffers {
112 struct pakfire_log_buffer stdout;
113 struct pakfire_log_buffer stderr;
114
115 // Logging
116 struct pakfire_log_buffer log_INFO;
117 struct pakfire_log_buffer log_ERROR;
118 struct pakfire_log_buffer log_DEBUG;
119 } buffers;
120
121 // cgroup
122 struct pakfire_cgroup* cgroup;
123 };
124
125 static int clone3(struct clone_args* args, size_t size) {
126 return syscall(__NR_clone3, args, size);
127 }
128
129 static void pakfire_jail_free(struct pakfire_jail* jail) {
130 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
131
132 // Free environment
133 for (unsigned int i = 0; jail->env[i]; i++)
134 free(jail->env[i]);
135
136 pakfire_unref(jail->pakfire);
137 free(jail);
138 }
139
140 /*
141 Passes any log messages on to the default pakfire log callback
142 */
143 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
144 int priority, const char* line, size_t length) {
145 switch (priority) {
146 case LOG_INFO:
147 INFO(pakfire, "%s", line);
148 break;
149
150 case LOG_ERR:
151 ERROR(pakfire, "%s", line);
152 break;
153
154 #ifdef ENABLE_DEBUG
155 case LOG_DEBUG:
156 DEBUG(pakfire, "%s", line);
157 break;
158 #endif
159 }
160
161 return 0;
162 }
163
164 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
165 // Set PS1
166 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
167 if (r)
168 return r;
169
170 // Copy TERM
171 char* TERM = secure_getenv("TERM");
172 if (TERM) {
173 r = pakfire_jail_set_env(jail, "TERM", TERM);
174 if (r)
175 return r;
176 }
177
178 // Copy LANG
179 char* LANG = secure_getenv("LANG");
180 if (LANG) {
181 r = pakfire_jail_set_env(jail, "LANG", LANG);
182 if (r)
183 return r;
184 }
185
186 return 0;
187 }
188
189 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail,
190 struct pakfire* pakfire, int flags) {
191 int r;
192
193 // Allocate a new jail
194 struct pakfire_jail* j = calloc(1, sizeof(*j));
195 if (!j)
196 return 1;
197
198 // Reference Pakfire
199 j->pakfire = pakfire_ref(pakfire);
200
201 // Initialize reference counter
202 j->nrefs = 1;
203
204 // Store flags
205 j->flags = flags;
206
207 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
208
209 // Set default log callback
210 r = pakfire_jail_set_log_callback(j, pakfire_jail_default_log_callback, NULL);
211 if (r)
212 goto ERROR;
213
214 // Set default environment
215 for (const struct environ* e = ENV; e->key; e++) {
216 r = pakfire_jail_set_env(j, e->key, e->val);
217 if (r)
218 goto ERROR;
219 }
220
221 // Setup interactive stuff
222 if (j->flags & PAKFIRE_JAIL_INTERACTIVE) {
223 r = pakfire_jail_setup_interactive_env(j);
224 if (r)
225 goto ERROR;
226 }
227
228 // Done
229 *jail = j;
230 return 0;
231
232 ERROR:
233 pakfire_jail_free(j);
234
235 return r;
236 }
237
238 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
239 ++jail->nrefs;
240
241 return jail;
242 }
243
244 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
245 if (--jail->nrefs > 0)
246 return jail;
247
248 pakfire_jail_free(jail);
249 return NULL;
250 }
251
252 static int pakfire_jail_has_flag(struct pakfire_jail* jail, int flag) {
253 return jail->flags & flag;
254 }
255
256 // Resource Limits
257
258 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
259 // Check if nice level is in range
260 if (nice < -19 || nice > 20) {
261 errno = EINVAL;
262 return 1;
263 }
264
265 // Store nice level
266 jail->nice = nice;
267
268 return 0;
269 }
270
271 // Environment
272
273 // Returns the length of the environment
274 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
275 unsigned int i = 0;
276
277 // Count everything in the environment
278 for (char** e = jail->env; *e; e++)
279 i++;
280
281 return i;
282 }
283
284 // Finds an existing environment variable and returns its index or -1 if not found
285 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
286 if (!key) {
287 errno = EINVAL;
288 return -1;
289 }
290
291 char buffer[strlen(key) + 2];
292 pakfire_string_format(buffer, "%s=", key);
293
294 for (unsigned int i = 0; jail->env[i]; i++) {
295 if (pakfire_string_startswith(jail->env[i], buffer))
296 return i;
297 }
298
299 // Nothing found
300 return -1;
301 }
302
303 // Returns the value of an environment variable or NULL
304 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
305 const char* key) {
306 int i = pakfire_jail_find_env(jail, key);
307 if (i < 0)
308 return NULL;
309
310 return jail->env[i] + strlen(key) + 1;
311 }
312
313 // Sets an environment variable
314 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
315 const char* key, const char* value) {
316 // Find the index where to write this value to
317 int i = pakfire_jail_find_env(jail, key);
318 if (i < 0)
319 i = pakfire_jail_env_length(jail);
320
321 // Return -ENOSPC when the environment is full
322 if (i >= ENVIRON_SIZE) {
323 errno = ENOSPC;
324 return -1;
325 }
326
327 // Free any previous value
328 if (jail->env[i])
329 free(jail->env[i]);
330
331 // Format and set environment variable
332 asprintf(&jail->env[i], "%s=%s", key, value);
333
334 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
335
336 return 0;
337 }
338
339 // Imports an environment
340 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
341 if (!env)
342 return 0;
343
344 char* key;
345 char* val;
346 int r;
347
348 // Copy environment variables
349 for (unsigned int i = 0; env[i]; i++) {
350 r = pakfire_string_partition(env[i], "=", &key, &val);
351 if (r)
352 continue;
353
354 // Set value
355 r = pakfire_jail_set_env(jail, key, val);
356
357 if (key)
358 free(key);
359 if (val)
360 free(val);
361
362 // Break on error
363 if (r)
364 return r;
365 }
366
367 return 0;
368 }
369
370 // Logging
371
372 PAKFIRE_EXPORT int pakfire_jail_set_log_callback(struct pakfire_jail* jail,
373 pakfire_jail_log_callback callback, void* data) {
374 jail->log_callback = callback;
375 jail->log_data = data;
376
377 return 0;
378 }
379
380 /*
381 This function replaces any logging in the child process.
382
383 All log messages will be sent to the parent process through their respective pipes.
384 */
385 static void pakfire_jail_log(void* data, int priority, const char* file,
386 int line, const char* fn, const char* format, va_list args) {
387 struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
388 int fd;
389
390 switch (priority) {
391 case LOG_INFO:
392 fd = pipes->log_INFO[1];
393 break;
394
395 case LOG_ERR:
396 fd = pipes->log_ERROR[1];
397 break;
398
399 #ifdef ENABLE_DEBUG
400 case LOG_DEBUG:
401 fd = pipes->log_DEBUG[1];
402 break;
403 #endif /* ENABLE_DEBUG */
404
405 // Ignore any messages of an unknown priority
406 default:
407 return;
408 }
409
410 // Send the log message
411 if (fd)
412 vdprintf(fd, format, args);
413 }
414
415 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
416 return (sizeof(buffer->data) == buffer->used);
417 }
418
419 /*
420 This function reads as much data as it can from the file descriptor.
421 If it finds a whole line in it, it will send it to the logger and repeat the process.
422 If not newline character is found, it will try to read more data until it finds one.
423 */
424 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
425 struct pakfire_jail_exec* ctx, int priority, int fd,
426 struct pakfire_log_buffer* buffer, pakfire_jail_log_callback callback, void* data) {
427 char line[BUFFER_SIZE + 1];
428
429 // Fill up buffer from fd
430 if (buffer->used < sizeof(buffer->data)) {
431 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
432 sizeof(buffer->data) - buffer->used);
433
434 // Handle errors
435 if (bytes_read < 0) {
436 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
437 return -1;
438 }
439
440 // Update buffer size
441 buffer->used += bytes_read;
442 }
443
444 // See if we have any lines that we can write
445 while (buffer->used) {
446 // Search for the end of the first line
447 char* eol = memchr(buffer->data, '\n', buffer->used);
448
449 // No newline found
450 if (!eol) {
451 // If the buffer is full, we send the content to the logger and try again
452 // This should not happen in practise
453 if (pakfire_jail_log_buffer_is_full(buffer)) {
454 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
455
456 eol = buffer->data + sizeof(buffer->data) - 1;
457
458 // Otherwise we might have only read parts of the output
459 } else
460 break;
461 }
462
463 // Find the length of the string
464 size_t length = eol - buffer->data + 1;
465
466 // Copy the line into the buffer
467 memcpy(line, buffer->data, length);
468
469 // Terminate the string
470 line[length] = '\0';
471
472 // Log the line
473 if (callback) {
474 int r = callback(jail->pakfire, data, priority, line, length);
475 if (r) {
476 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
477 return r;
478 }
479 }
480
481 // Remove line from buffer
482 memmove(buffer->data, buffer->data + length, buffer->used - length);
483 buffer->used -= length;
484 }
485
486 return 0;
487 }
488
489 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
490 int r = pipe2(*fds, flags);
491 if (r < 0) {
492 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
493 return 1;
494 }
495
496 return 0;
497 }
498
499 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
500 for (unsigned int i = 0; i < 2; i++)
501 if (fds[i])
502 close(fds[i]);
503 }
504
505 /*
506 This is a convenience function to fetch the reading end of a pipe and
507 closes the write end.
508 */
509 static int pakfire_jail_get_pipe(struct pakfire_jail* jail, int (*fds)[2]) {
510 // Give the variables easier names to avoid confusion
511 int* fd_read = &(*fds)[0];
512 int* fd_write = &(*fds)[1];
513
514 // Close the write end of the pipe
515 if (*fd_write) {
516 close(*fd_write);
517 *fd_write = 0;
518 }
519
520 // Return the read end
521 return *fd_read;
522 }
523
524 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
525 int epollfd = -1;
526 struct epoll_event ev;
527 struct epoll_event events[EPOLL_MAX_EVENTS];
528 int r = 0;
529
530 // Fetch file descriptors from context
531 const int stdout = pakfire_jail_get_pipe(jail, &ctx->pipes.stdout);
532 const int stderr = pakfire_jail_get_pipe(jail, &ctx->pipes.stderr);
533 const int pidfd = ctx->pidfd;
534
535 // Logging
536 const int log_INFO = pakfire_jail_get_pipe(jail, &ctx->pipes.log_INFO);
537 const int log_ERROR = pakfire_jail_get_pipe(jail, &ctx->pipes.log_ERROR);
538 const int log_DEBUG = pakfire_jail_get_pipe(jail, &ctx->pipes.log_DEBUG);
539
540 // Make a list of all file descriptors we are interested in
541 int fds[] = {
542 stdout, stderr, pidfd, log_INFO, log_ERROR, log_DEBUG,
543 };
544
545 // Setup epoll
546 epollfd = epoll_create1(0);
547 if (epollfd < 0) {
548 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
549 r = 1;
550 goto ERROR;
551 }
552
553 ev.events = EPOLLIN;
554
555 // Turn file descriptors into non-blocking mode and add them to epoll()
556 for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
557 int fd = fds[i];
558
559 // Skip fds which were not initialized
560 if (fd <= 0)
561 continue;
562
563 ev.data.fd = fd;
564
565 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
566 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
567 r = 1;
568 goto ERROR;
569 }
570 }
571
572 int ended = 0;
573
574 // Loop for as long as the process is alive
575 while (!ended) {
576 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
577 if (num < 1) {
578 // Ignore if epoll_wait() has been interrupted
579 if (errno == EINTR)
580 continue;
581
582 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
583 r = 1;
584
585 goto ERROR;
586 }
587
588 for (int i = 0; i < num; i++) {
589 int fd = events[i].data.fd;
590
591 struct pakfire_log_buffer* buffer = NULL;
592 pakfire_jail_log_callback callback = NULL;
593 void* data = NULL;
594 int priority;
595
596 // Handle any changes to the PIDFD
597 if (fd == pidfd) {
598 // Call waidid() and store the result
599 r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
600 if (r) {
601 ERROR(jail->pakfire, "waitid() failed: %m\n");
602 goto ERROR;
603 }
604
605 // Mark that we have ended so that we will process the remaining
606 // events from epoll() now, but won't restart the outer loop.
607 ended = 1;
608 continue;
609
610 // Handle logging messages
611 } else if (fd == log_INFO) {
612 buffer = &ctx->buffers.log_INFO;
613 priority = LOG_INFO;
614
615 callback = pakfire_jail_default_log_callback;
616
617 } else if (fd == log_ERROR) {
618 buffer = &ctx->buffers.log_ERROR;
619 priority = LOG_ERR;
620
621 callback = pakfire_jail_default_log_callback;
622
623 } else if (fd == log_DEBUG) {
624 buffer = &ctx->buffers.log_DEBUG;
625 priority = LOG_DEBUG;
626
627 callback = pakfire_jail_default_log_callback;
628
629 // Handle anything from the log pipes
630 } else if (fd == stdout) {
631 buffer = &ctx->buffers.stdout;
632 priority = LOG_INFO;
633
634 callback = jail->log_callback;
635 data = jail->log_data;
636
637 } else if (fd == stderr) {
638 buffer = &ctx->buffers.stderr;
639 priority = LOG_ERR;
640
641 callback = jail->log_callback;
642 data = jail->log_data;
643
644 } else {
645 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
646 continue;
647 }
648
649 // Handle log event
650 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
651 if (r)
652 goto ERROR;
653 }
654 }
655
656 ERROR:
657 if (epollfd > 0)
658 close(epollfd);
659
660 return r;
661 }
662
663 static int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data, int priority,
664 const char* line, size_t length) {
665 char*** array = (char***)data;
666
667 // Append everything from stdout to an array
668 if (priority == LOG_INFO) {
669 length = 0;
670
671 // Create a copy of line
672 char* message = strdup(line);
673 if (!message)
674 return 1;
675
676 // Determine the length of the existing array
677 if (*array) {
678 for (char** element = *array; *element; element++)
679 length++;
680 }
681
682 // Allocate space
683 *array = reallocarray(*array, length + 2, sizeof(**array));
684 if (!*array)
685 return 1;
686
687 // Append message and terminate the array
688 (*array)[length] = message;
689 (*array)[length + 1] = NULL;
690
691 return 0;
692 }
693
694 // Send everything else to the default logger
695 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
696 }
697
698 // Capabilities
699
700 static int pakfire_jail_drop_capabilities(struct pakfire_jail* jail) {
701 const int capabilities[] = {
702 // Deny access to the kernel's audit system
703 CAP_AUDIT_CONTROL,
704 CAP_AUDIT_READ,
705 CAP_AUDIT_WRITE,
706
707 // Deny suspending block devices
708 CAP_BLOCK_SUSPEND,
709
710 // Deny any stuff with BPF
711 CAP_BPF,
712
713 // Deny checkpoint restore
714 CAP_CHECKPOINT_RESTORE,
715
716 // Deny opening files by inode number (open_by_handle_at)
717 CAP_DAC_READ_SEARCH,
718
719 // Deny setting SUID bits
720 CAP_FSETID,
721
722 // Deny locking more memory
723 CAP_IPC_LOCK,
724
725 // Deny modifying any Apparmor/SELinux/SMACK configuration
726 CAP_MAC_ADMIN,
727 CAP_MAC_OVERRIDE,
728
729 // Deny creating any special devices
730 CAP_MKNOD,
731
732 // Deny setting any capabilities
733 CAP_SETFCAP,
734
735 // Deny reading from syslog
736 CAP_SYSLOG,
737
738 // Deny any admin actions (mount, sethostname, ...)
739 CAP_SYS_ADMIN,
740
741 // Deny rebooting the system
742 CAP_SYS_BOOT,
743
744 // Deny loading kernel modules
745 CAP_SYS_MODULE,
746
747 // Deny setting nice level
748 CAP_SYS_NICE,
749
750 // Deny access to /proc/kcore, /dev/mem, /dev/kmem
751 CAP_SYS_RAWIO,
752
753 // Deny circumventing any resource limits
754 CAP_SYS_RESOURCE,
755
756 // Deny setting the system time
757 CAP_SYS_TIME,
758
759 // Deny playing with suspend
760 CAP_WAKE_ALARM,
761
762 0,
763 };
764
765 DEBUG(jail->pakfire, "Dropping capabilities...\n");
766
767 size_t num_caps = 0;
768 int r;
769
770 // Drop any capabilities
771 for (const int* cap = capabilities; *cap; cap++) {
772 r = prctl(PR_CAPBSET_DROP, *cap, 0, 0, 0);
773 if (r) {
774 ERROR(jail->pakfire, "Could not drop capability %d: %m\n", *cap);
775 return r;
776 }
777
778 num_caps++;
779 }
780
781 // Fetch any capabilities
782 cap_t caps = cap_get_proc();
783 if (!caps) {
784 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
785 return 1;
786 }
787
788 /*
789 Set inheritable capabilities
790
791 This ensures that no processes will be able to gain any of the listed
792 capabilities again.
793 */
794 r = cap_set_flag(caps, CAP_INHERITABLE, num_caps, capabilities, CAP_CLEAR);
795 if (r) {
796 ERROR(jail->pakfire, "cap_set_flag() failed: %m\n");
797 goto ERROR;
798 }
799
800 // Restore capabilities
801 r = cap_set_proc(caps);
802 if (r) {
803 ERROR(jail->pakfire, "Could not restore capabilities: %m\n");
804 goto ERROR;
805 }
806
807 ERROR:
808 if (caps)
809 cap_free(caps);
810
811 return r;
812 }
813
814 // Syscall Filter
815
816 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
817 const int syscalls[] = {
818 // The kernel's keyring isn't namespaced
819 SCMP_SYS(keyctl),
820 SCMP_SYS(add_key),
821 SCMP_SYS(request_key),
822
823 // Disable userfaultfd
824 SCMP_SYS(userfaultfd),
825
826 // Disable perf which could leak a lot of information about the host
827 SCMP_SYS(perf_event_open),
828
829 0,
830 };
831 int r = 1;
832
833 DEBUG(jail->pakfire, "Applying syscall filter...\n");
834
835 // Setup a syscall filter which allows everything by default
836 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
837 if (!ctx) {
838 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
839 goto ERROR;
840 }
841
842 // All all syscalls
843 for (const int* syscall = syscalls; *syscall; syscall++) {
844 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
845 if (r) {
846 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
847 goto ERROR;
848 }
849 }
850
851 // Load syscall filter into the kernel
852 r = seccomp_load(ctx);
853 if (r) {
854 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
855 goto ERROR;
856 }
857
858 ERROR:
859 if (ctx)
860 seccomp_release(ctx);
861
862 return r;
863 }
864
865 // UID/GID Mapping
866
867 static int pakfire_jail_write_uidgid_mapping(struct pakfire_jail* jail,
868 const char* path, uid_t mapped_id, size_t length) {
869 int r = 1;
870
871 // Open file for writing
872 FILE* f = fopen(path, "w");
873 if (!f) {
874 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
875 goto ERROR;
876 }
877
878 // Write configuration
879 int bytes_written = fprintf(f, "%d %d %ld\n", 0, mapped_id, length);
880 if (bytes_written <= 0) {
881 ERROR(jail->pakfire, "Could not write UID/GID mapping: %m\n");
882 goto ERROR;
883 }
884
885 // Close the file
886 r = fclose(f);
887 f = NULL;
888 if (r) {
889 ERROR(jail->pakfire, "Could not write UID/GID mapping: %m\n");
890
891 goto ERROR;
892 }
893
894 // Success
895 r = 0;
896
897 ERROR:
898 if (f)
899 fclose(f);
900
901 return r;
902 }
903
904 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
905 char path[PATH_MAX];
906 int r;
907
908 uid_t mapped_uid = 0;
909 const size_t length = 1;
910
911 // Fetch the UID of the calling process
912 uid_t uid = getuid();
913
914 // Have we been called by root?
915 if (uid == 0) {
916 mapped_uid = 0;
917
918 // Have we been called by an unprivileged user?
919 } else {
920 // XXX fetch SUBUID
921 mapped_uid = uid;
922 }
923
924 // Make path
925 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
926 if (r < 0)
927 return 1;
928
929 DEBUG(jail->pakfire, "Mapping UID range (%u - %lu)\n", mapped_uid, mapped_uid + length);
930
931 return pakfire_jail_write_uidgid_mapping(jail, path, mapped_uid, length);
932 }
933
934 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
935 char path[PATH_MAX];
936 int r;
937
938 gid_t mapped_gid = 0;
939 const size_t length = 1;
940
941 // Fetch the GID of the calling process
942 gid_t gid = getgid();
943
944 // Have we been called from the root group?
945 if (gid == 0) {
946 mapped_gid = 0;
947
948 // Have we been called by an unprivileged group?
949 } else {
950 // XXX fetch SUBGID
951 mapped_gid = gid;
952 }
953
954 // Make path
955 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
956 if (r < 0)
957 return 1;
958
959 DEBUG(jail->pakfire, "Mapping GID range (%u - %lu)\n", mapped_gid, mapped_gid + length);
960
961 return pakfire_jail_write_uidgid_mapping(jail, path, mapped_gid, length);
962 }
963
964 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
965 char path[PATH_MAX];
966 int r = 1;
967
968 // Make path
969 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
970 if (r < 0)
971 return 1;
972
973 // Open file for writing
974 FILE* f = fopen(path, "w");
975 if (!f) {
976 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
977 goto ERROR;
978 }
979
980 // Write content
981 int bytes_written = fprintf(f, "deny\n");
982 if (bytes_written <= 0) {
983 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
984 goto ERROR;
985 }
986
987 r = fclose(f);
988 f = NULL;
989 if (r) {
990 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
991 goto ERROR;
992 }
993
994 ERROR:
995 if (f)
996 fclose(f);
997
998 return r;
999 }
1000
1001 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1002 const uint64_t val = 1;
1003 int r = 0;
1004
1005 DEBUG(jail->pakfire, "Sending signal...\n");
1006
1007 // Write to the file descriptor
1008 ssize_t bytes_written = write(fd, &val, sizeof(val));
1009 if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
1010 ERROR(jail->pakfire, "Could not send signal: %m\n");
1011 r = 1;
1012 }
1013
1014 // Close the file descriptor
1015 close(fd);
1016
1017 return r;
1018 }
1019
1020 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1021 uint64_t val = 0;
1022 int r = 0;
1023
1024 DEBUG(jail->pakfire, "Waiting for signal...\n");
1025
1026 ssize_t bytes_read = read(fd, &val, sizeof(val));
1027 if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
1028 ERROR(jail->pakfire, "Error waiting for signal: %m\n");
1029 r = 1;
1030 }
1031
1032 // Close the file descriptor
1033 close(fd);
1034
1035 return r;
1036 }
1037
1038 /*
1039 Performs the initialisation that needs to happen in the parent part
1040 */
1041 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1042 int r;
1043
1044 // Setup UID mapping
1045 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
1046 if (r)
1047 return r;
1048
1049 // Write "deny" to /proc/PID/setgroups
1050 r = pakfire_jail_setgroups(jail, ctx->pid);
1051 if (r)
1052 return r;
1053
1054 // Setup GID mapping
1055 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
1056 if (r)
1057 return r;
1058
1059 // Parent has finished initialisation
1060 DEBUG(jail->pakfire, "Parent has finished initialization\n");
1061
1062 // Send signal to client
1063 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1064 if (r)
1065 return r;
1066
1067 return 0;
1068 }
1069
1070 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
1071 const char* argv[]) {
1072 int r;
1073
1074 // Redirect any logging to our log pipe
1075 pakfire_set_log_callback(jail->pakfire, pakfire_jail_log, &ctx->pipes);
1076
1077 // Fetch my own PID
1078 pid_t pid = getpid();
1079
1080 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
1081
1082 // Wait for the parent to finish initialization
1083 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1084 if (r)
1085 return r;
1086
1087 // Perform further initialization
1088
1089 // Fetch UID/GID
1090 uid_t uid = getuid();
1091 gid_t gid = getgid();
1092
1093 // Fetch EUID/EGID
1094 uid_t euid = geteuid();
1095 gid_t egid = getegid();
1096
1097 DEBUG(jail->pakfire, " UID: %d (effective %d)\n", uid, euid);
1098 DEBUG(jail->pakfire, " GID: %d (effective %d)\n", gid, egid);
1099
1100 // Check if we are (effectively running as root)
1101 if (uid != 0 || gid != 0) {
1102 ERROR(jail->pakfire, "Child process is not running as root\n");
1103 return 126;
1104 }
1105
1106 const char* root = pakfire_get_path(jail->pakfire);
1107 const char* arch = pakfire_get_arch(jail->pakfire);
1108
1109 // Change root (unless root is /)
1110 if (!pakfire_on_root(jail->pakfire)) {
1111 // Mount everything
1112 r = pakfire_mount_all(jail->pakfire);
1113 if (r)
1114 return r;
1115
1116 // Log all mountpoints
1117 pakfire_mount_list(jail->pakfire);
1118
1119 // Call chroot()
1120 r = chroot(root);
1121 if (r) {
1122 ERROR(jail->pakfire, "chroot() to %s failed: %m\n", root);
1123 return 1;
1124 }
1125
1126 // Change directory to /
1127 r = chdir("/");
1128 if (r) {
1129 ERROR(jail->pakfire, "chdir() after chroot() failed: %m\n");
1130 return 1;
1131 }
1132 }
1133
1134 // Set personality
1135 unsigned long persona = pakfire_arch_personality(arch);
1136 if (persona) {
1137 r = personality(persona);
1138 if (r < 0) {
1139 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1140 return 1;
1141 }
1142 }
1143
1144 // Set nice level
1145 if (jail->nice) {
1146 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1147
1148 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1149 if (r) {
1150 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1151 return 1;
1152 }
1153 }
1154
1155 // Close other end of log pipes
1156 close(ctx->pipes.log_INFO[0]);
1157 close(ctx->pipes.log_ERROR[0]);
1158 #ifdef ENABLE_DEBUG
1159 close(ctx->pipes.log_DEBUG[0]);
1160 #endif /* ENABLE_DEBUG */
1161
1162 // Connect standard output and error
1163 if (ctx->pipes.stdout[1] && ctx->pipes.stderr[1]) {
1164 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1165 if (r < 0) {
1166 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1167 ctx->pipes.stdout[1]);
1168
1169 return 1;
1170 }
1171
1172 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1173 if (r < 0) {
1174 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1175 ctx->pipes.stderr[1]);
1176
1177 return 1;
1178 }
1179
1180 // Close the pipe (as we have moved the original file descriptors)
1181 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1182 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
1183 }
1184
1185 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1186 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1187 if (r)
1188 return r;
1189
1190 // Drop capabilities
1191 r = pakfire_jail_drop_capabilities(jail);
1192 if (r)
1193 return r;
1194
1195 // Filter syscalls
1196 r = pakfire_jail_limit_syscalls(jail);
1197 if (r)
1198 return r;
1199
1200 // exec() command
1201 r = execvpe(argv[0], (char**)argv, jail->env);
1202 if (r < 0)
1203 ERROR(jail->pakfire, "Could not execve(): %m\n");
1204
1205 // Translate errno into regular exit code
1206 switch (errno) {
1207 case ENOENT:
1208 r = 127;
1209 break;
1210
1211 default:
1212 r = 1;
1213 }
1214
1215 // We should not get here
1216 return r;
1217 }
1218
1219 // Run a command in the jail
1220 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[]) {
1221 int exit = -1;
1222 int r;
1223
1224 // Check if argv is valid
1225 if (!argv || !argv[0]) {
1226 errno = EINVAL;
1227 return -1;
1228 }
1229
1230 // Initialize context for this call
1231 struct pakfire_jail_exec ctx = {
1232 .pipes = {
1233 .stdout = { 0, 0 },
1234 .stderr = { 0, 0 },
1235 },
1236 };
1237
1238 DEBUG(jail->pakfire, "Executing jail...\n");
1239
1240 /*
1241 Setup a file descriptor which can be used to notify the client that the parent
1242 has completed configuration.
1243 */
1244 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1245 if (ctx.completed_fd < 0) {
1246 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1247 return -1;
1248 }
1249
1250 // Create pipes to communicate with child process if we are not running interactively
1251 if (!pakfire_jail_has_flag(jail, PAKFIRE_JAIL_INTERACTIVE)) {
1252 // stdout
1253 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
1254 if (r)
1255 goto ERROR;
1256
1257 // stderr
1258 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
1259 if (r)
1260 goto ERROR;
1261 }
1262
1263 // Setup pipes for logging
1264 // INFO
1265 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
1266 if (r)
1267 goto ERROR;
1268
1269 // ERROR
1270 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
1271 if (r)
1272 goto ERROR;
1273
1274 #ifdef ENABLE_DEBUG
1275 // DEBUG
1276 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
1277 if (r)
1278 goto ERROR;
1279 #endif /* ENABLE_DEBUG */
1280
1281 // Setup a cgroup
1282 r = pakfire_cgroup_open(&ctx.cgroup, jail->pakfire, "jail/test1",
1283 PAKFIRE_CGROUP_ENABLE_ACCOUNTING);
1284 if (r)
1285 goto ERROR;
1286
1287 // Configure child process
1288 struct clone_args args = {
1289 .flags =
1290 CLONE_NEWCGROUP |
1291 CLONE_NEWIPC |
1292 CLONE_NEWNS |
1293 CLONE_NEWPID |
1294 CLONE_NEWUSER |
1295 CLONE_NEWUTS |
1296 CLONE_PIDFD |
1297 CLONE_INTO_CGROUP,
1298 .exit_signal = SIGCHLD,
1299 .pidfd = (long long unsigned int)&ctx.pidfd,
1300
1301 // Clone into the new cgroup
1302 .cgroup = pakfire_cgroup_fd(ctx.cgroup),
1303 };
1304
1305 // Fork this process
1306 ctx.pid = clone3(&args, sizeof(args));
1307 if (ctx.pid < 0) {
1308 ERROR(jail->pakfire, "Could not clone: %m\n");
1309 return -1;
1310
1311 // Child process
1312 } else if (ctx.pid == 0) {
1313 r = pakfire_jail_child(jail, &ctx, argv);
1314 _exit(r);
1315 }
1316
1317 // Parent process
1318 r = pakfire_jail_parent(jail, &ctx);
1319 if (r)
1320 goto ERROR;
1321
1322 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
1323
1324 // Read output of the child process
1325 r = pakfire_jail_wait(jail, &ctx);
1326 if (r)
1327 goto ERROR;
1328
1329 // Handle exit status
1330 switch (ctx.status.si_code) {
1331 case CLD_EXITED:
1332 DEBUG(jail->pakfire, "The child process exited with code %d\n",
1333 ctx.status.si_status);
1334
1335 // Pass exit code
1336 exit = ctx.status.si_status;
1337 break;
1338
1339 case CLD_KILLED:
1340 case CLD_DUMPED:
1341 ERROR(jail->pakfire, "The child process was killed\n");
1342 break;
1343
1344 // Log anything else
1345 default:
1346 ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
1347 break;
1348 }
1349
1350 ERROR:
1351 // Close any file descriptors
1352 pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
1353 pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
1354 if (ctx.pidfd)
1355 close(ctx.pidfd);
1356 pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
1357 pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
1358 pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
1359
1360 // Destroy cgroup
1361 if (ctx.cgroup) {
1362 pakfire_cgroup_destroy(ctx.cgroup);
1363 pakfire_cgroup_unref(ctx.cgroup);
1364 }
1365
1366 // Umount everything
1367 if (!pakfire_on_root(jail->pakfire))
1368 pakfire_umount_all(jail->pakfire);
1369
1370 return exit;
1371 }
1372
1373 PAKFIRE_EXPORT int pakfire_jail_exec(struct pakfire_jail* jail,
1374 const char* argv[], char*** output) {
1375 int r;
1376
1377 // Store logging callback
1378 pakfire_jail_log_callback log_callback = jail->log_callback;
1379 void* log_data = jail->log_data;
1380
1381 // Capture output if requested by user
1382 if (output)
1383 pakfire_jail_set_log_callback(jail, pakfire_jail_capture_stdout, output);
1384
1385 // Run exec()
1386 r = __pakfire_jail_exec(jail, argv);
1387
1388 // Restore log callback
1389 pakfire_jail_set_log_callback(jail, log_callback, log_data);
1390
1391 return r;
1392 }
1393
1394 PAKFIRE_EXPORT int pakfire_jail_exec_script(struct pakfire_jail* jail,
1395 const char* script, const size_t size, const char* args[], char*** output) {
1396 char path[PATH_MAX];
1397 const char** argv = NULL;
1398 int r;
1399
1400 const char* root = pakfire_get_path(jail->pakfire);
1401
1402 // Write the scriptlet to disk
1403 r = pakfire_path_join(path, root, "pakfire-script.XXXXXX");
1404 if (r < 0)
1405 goto ERROR;
1406
1407 // Open a temporary file
1408 int fd = mkstemp(path);
1409 if (fd < 0) {
1410 ERROR(jail->pakfire, "Could not open a temporary file: %m\n");
1411 r = 1;
1412 goto ERROR;
1413 }
1414
1415 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
1416
1417 // Write data
1418 ssize_t bytes_written = write(fd, script, size);
1419 if (bytes_written < (ssize_t)size) {
1420 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
1421 r = 1;
1422 goto ERROR;
1423 }
1424
1425 // Make the script executable
1426 r = fchmod(fd, S_IRUSR|S_IWUSR|S_IXUSR);
1427 if (r) {
1428 ERROR(jail->pakfire, "Could not set executable permissions on %s: %m\n", path);
1429 goto ERROR;
1430 }
1431
1432 // Close file
1433 r = close(fd);
1434 if (r) {
1435 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
1436 r = 1;
1437 goto ERROR;
1438 }
1439
1440 // Count how many arguments were passed
1441 unsigned int argc = 1;
1442 if (args) {
1443 for (const char** arg = args; *arg; arg++)
1444 argc++;
1445 }
1446
1447 argv = calloc(argc + 1, sizeof(*argv));
1448 if (!argv) {
1449 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
1450 goto ERROR;
1451 }
1452
1453 // Set command
1454 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
1455
1456 // Copy args
1457 for (unsigned int i = 1; i < argc; i++)
1458 argv[i] = args[i-1];
1459
1460 // Run the script
1461 r = pakfire_jail_exec(jail, argv, output);
1462
1463 ERROR:
1464 if (argv)
1465 free(argv);
1466
1467 // Remove script from disk
1468 if (*path)
1469 unlink(path);
1470
1471 return r;
1472 }
1473
1474 /*
1475 A convenience function that creates a new jail, runs the given command and destroys
1476 the jail again.
1477 */
1478 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char*** output) {
1479 struct pakfire_jail* jail = NULL;
1480 int r;
1481
1482 // Create a new jail
1483 r = pakfire_jail_create(&jail, pakfire, flags);
1484 if (r)
1485 goto ERROR;
1486
1487 // Execute the command
1488 r = pakfire_jail_exec(jail, argv, output);
1489
1490 ERROR:
1491 if (jail)
1492 pakfire_jail_unref(jail);
1493
1494 return r;
1495 }
1496
1497 int pakfire_jail_run_script(struct pakfire* pakfire,
1498 const char* script, const size_t length, const char* argv[], int flags, char*** output) {
1499 struct pakfire_jail* jail = NULL;
1500 int r;
1501
1502 // Create a new jail
1503 r = pakfire_jail_create(&jail, pakfire, flags);
1504 if (r)
1505 goto ERROR;
1506
1507 // Execute the command
1508 r = pakfire_jail_exec_script(jail, script, length, argv, output);
1509
1510 ERROR:
1511 if (jail)
1512 pakfire_jail_unref(jail);
1513
1514 return r;
1515 }
1516
1517
1518 int pakfire_jail_shell(struct pakfire* pakfire) {
1519 const char* argv[] = {
1520 "/bin/bash", "--login", NULL,
1521 };
1522
1523 // Execute /bin/bash
1524 return pakfire_jail_run(pakfire, argv, PAKFIRE_JAIL_INTERACTIVE, NULL);
1525 }
1526
1527 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
1528 char path[PATH_MAX];
1529
1530 const char* ldconfig = "/sbin/ldconfig";
1531
1532 // Check if ldconfig exists before calling it to avoid overhead
1533 int r = pakfire_make_path(pakfire, path, ldconfig);
1534 if (r < 0)
1535 return 1;
1536
1537 // Check if ldconfig is executable
1538 r = access(path, X_OK);
1539 if (r) {
1540 DEBUG(pakfire, "%s is not executable. Skipping...\n", ldconfig);
1541 return 0;
1542 }
1543
1544 const char* argv[] = {
1545 ldconfig, NULL,
1546 };
1547
1548 // Run ldconfig
1549 return pakfire_jail_run(pakfire, argv, 0, NULL);
1550 }
1551
1552 // Utility functions
1553
1554 PAKFIRE_EXPORT char* pakfire_jail_concat_output(struct pakfire_jail* jail,
1555 const char** input, size_t* length) {
1556 // Return nothing on no input
1557 if (!input)
1558 return NULL;
1559
1560 // XXX Maybe there is a more efficient way to do this
1561
1562 char* output = pakfire_string_join((char**)input, "");
1563 if (!output)
1564 return NULL;
1565
1566 // Store the length of the result
1567 if (length)
1568 *length = strlen(output);
1569
1570 return output;
1571 }