]> git.ipfire.org Git - people/ms/pakfire.git/blob - src/libpakfire/jail.c
jail: Implement better logging for the child process
[people/ms/pakfire.git] / src / libpakfire / jail.c
1 /*#############################################################################
2 # #
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
5 # #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
10 # #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
15 # #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
18 # #
19 #############################################################################*/
20
21 #include <errno.h>
22 #include <linux/capability.h>
23 #include <linux/fcntl.h>
24 #include <linux/sched.h>
25 #include <linux/wait.h>
26 #include <sched.h>
27 #include <signal.h>
28 #include <stdlib.h>
29 #include <syscall.h>
30 #include <sys/capability.h>
31 #include <sys/epoll.h>
32 #include <sys/eventfd.h>
33 #include <sys/personality.h>
34 #include <sys/prctl.h>
35 #include <sys/resource.h>
36 #include <sys/types.h>
37 #include <sys/wait.h>
38
39 // libseccomp
40 #include <seccomp.h>
41
42 #include <pakfire/arch.h>
43 #include <pakfire/jail.h>
44 #include <pakfire/logging.h>
45 #include <pakfire/mount.h>
46 #include <pakfire/pakfire.h>
47 #include <pakfire/private.h>
48 #include <pakfire/util.h>
49
50 #define BUFFER_SIZE 1024 * 64
51 #define ENVIRON_SIZE 128
52 #define EPOLL_MAX_EVENTS 2
53
54 // The default environment that will be set for every command
55 static const struct environ {
56 const char* key;
57 const char* val;
58 } ENV[] = {
59 { "LANG", "en_US.utf-8" },
60 { "TERM", "vt100" },
61 { NULL, NULL },
62 };
63
64 struct pakfire_jail {
65 struct pakfire* pakfire;
66 int nrefs;
67
68 // Flags
69 int flags;
70
71 // Resource Limits
72 int nice;
73
74 // Environment
75 char* env[ENVIRON_SIZE];
76
77 // Logging
78 pakfire_jail_log_callback log_callback;
79 void* log_data;
80 };
81
82 struct pakfire_log_buffer {
83 char data[BUFFER_SIZE];
84 size_t used;
85 };
86
87 struct pakfire_jail_exec {
88 // PID (of the child)
89 pid_t pid;
90 int pidfd;
91
92 // Process status (from waitid)
93 siginfo_t status;
94
95 // FD to notify the client that the parent has finished initialization
96 int completed_fd;
97
98 // Log pipes
99 struct pakfire_jail_pipes {
100 int stdout[2];
101 int stderr[2];
102
103 // Logging
104 int log_INFO[2];
105 int log_ERROR[2];
106 int log_DEBUG[2];
107 } pipes;
108
109 // Log buffers
110 struct pakfire_jail_buffers {
111 struct pakfire_log_buffer stdout;
112 struct pakfire_log_buffer stderr;
113
114 // Logging
115 struct pakfire_log_buffer log_INFO;
116 struct pakfire_log_buffer log_ERROR;
117 struct pakfire_log_buffer log_DEBUG;
118 } buffers;
119 };
120
121 static int clone3(struct clone_args* args, size_t size) {
122 return syscall(__NR_clone3, args, size);
123 }
124
125 static void pakfire_jail_free(struct pakfire_jail* jail) {
126 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
127
128 // Free environment
129 for (unsigned int i = 0; jail->env[i]; i++)
130 free(jail->env[i]);
131
132 pakfire_unref(jail->pakfire);
133 free(jail);
134 }
135
136 /*
137 Passes any log messages on to the default pakfire log callback
138 */
139 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
140 int priority, const char* line, size_t length) {
141 switch (priority) {
142 case LOG_INFO:
143 INFO(pakfire, "%s", line);
144 break;
145
146 case LOG_ERR:
147 ERROR(pakfire, "%s", line);
148 break;
149
150 #ifdef ENABLE_DEBUG
151 case LOG_DEBUG:
152 DEBUG(pakfire, "%s", line);
153 break;
154 #endif
155 }
156
157 return 0;
158 }
159
160 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
161 // Set PS1
162 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
163 if (r)
164 return r;
165
166 // Copy TERM
167 char* TERM = secure_getenv("TERM");
168 if (TERM) {
169 r = pakfire_jail_set_env(jail, "TERM", TERM);
170 if (r)
171 return r;
172 }
173
174 // Copy LANG
175 char* LANG = secure_getenv("LANG");
176 if (LANG) {
177 r = pakfire_jail_set_env(jail, "LANG", LANG);
178 if (r)
179 return r;
180 }
181
182 return 0;
183 }
184
185 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail,
186 struct pakfire* pakfire, int flags) {
187 int r;
188
189 // Allocate a new jail
190 struct pakfire_jail* j = calloc(1, sizeof(*j));
191 if (!j)
192 return 1;
193
194 // Reference Pakfire
195 j->pakfire = pakfire_ref(pakfire);
196
197 // Initialize reference counter
198 j->nrefs = 1;
199
200 // Store flags
201 j->flags = flags;
202
203 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
204
205 // Set default log callback
206 r = pakfire_jail_set_log_callback(j, pakfire_jail_default_log_callback, NULL);
207 if (r)
208 goto ERROR;
209
210 // Set default environment
211 for (const struct environ* e = ENV; e->key; e++) {
212 r = pakfire_jail_set_env(j, e->key, e->val);
213 if (r)
214 goto ERROR;
215 }
216
217 // Setup interactive stuff
218 if (j->flags & PAKFIRE_JAIL_INTERACTIVE) {
219 r = pakfire_jail_setup_interactive_env(j);
220 if (r)
221 goto ERROR;
222 }
223
224 // Done
225 *jail = j;
226 return 0;
227
228 ERROR:
229 pakfire_jail_free(j);
230
231 return r;
232 }
233
234 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
235 ++jail->nrefs;
236
237 return jail;
238 }
239
240 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
241 if (--jail->nrefs > 0)
242 return jail;
243
244 pakfire_jail_free(jail);
245 return NULL;
246 }
247
248 static int pakfire_jail_has_flag(struct pakfire_jail* jail, int flag) {
249 return jail->flags & flag;
250 }
251
252 // Resource Limits
253
254 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
255 // Check if nice level is in range
256 if (nice < -19 || nice > 20) {
257 errno = EINVAL;
258 return 1;
259 }
260
261 // Store nice level
262 jail->nice = nice;
263
264 return 0;
265 }
266
267 // Environment
268
269 // Returns the length of the environment
270 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
271 unsigned int i = 0;
272
273 // Count everything in the environment
274 for (char** e = jail->env; *e; e++)
275 i++;
276
277 return i;
278 }
279
280 // Finds an existing environment variable and returns its index or -1 if not found
281 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
282 if (!key) {
283 errno = EINVAL;
284 return -1;
285 }
286
287 char buffer[strlen(key) + 2];
288 pakfire_string_format(buffer, "%s=", key);
289
290 for (unsigned int i = 0; jail->env[i]; i++) {
291 if (pakfire_string_startswith(jail->env[i], buffer))
292 return i;
293 }
294
295 // Nothing found
296 return -1;
297 }
298
299 // Returns the value of an environment variable or NULL
300 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
301 const char* key) {
302 int i = pakfire_jail_find_env(jail, key);
303 if (i < 0)
304 return NULL;
305
306 return jail->env[i] + strlen(key) + 1;
307 }
308
309 // Sets an environment variable
310 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
311 const char* key, const char* value) {
312 // Find the index where to write this value to
313 int i = pakfire_jail_find_env(jail, key);
314 if (i < 0)
315 i = pakfire_jail_env_length(jail);
316
317 // Return -ENOSPC when the environment is full
318 if (i >= ENVIRON_SIZE) {
319 errno = ENOSPC;
320 return -1;
321 }
322
323 // Free any previous value
324 if (jail->env[i])
325 free(jail->env[i]);
326
327 // Format and set environment variable
328 asprintf(&jail->env[i], "%s=%s", key, value);
329
330 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
331
332 return 0;
333 }
334
335 // Imports an environment
336 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
337 if (!env)
338 return 0;
339
340 char* key;
341 char* val;
342 int r;
343
344 // Copy environment variables
345 for (unsigned int i = 0; env[i]; i++) {
346 r = pakfire_string_partition(env[i], "=", &key, &val);
347 if (r)
348 continue;
349
350 // Set value
351 r = pakfire_jail_set_env(jail, key, val);
352
353 if (key)
354 free(key);
355 if (val)
356 free(val);
357
358 // Break on error
359 if (r)
360 return r;
361 }
362
363 return 0;
364 }
365
366 // Logging
367
368 PAKFIRE_EXPORT int pakfire_jail_set_log_callback(struct pakfire_jail* jail,
369 pakfire_jail_log_callback callback, void* data) {
370 jail->log_callback = callback;
371 jail->log_data = data;
372
373 return 0;
374 }
375
376 /*
377 This function replaces any logging in the child process.
378
379 All log messages will be sent to the parent process through their respective pipes.
380 */
381 static void pakfire_jail_log(void* data, int priority, const char* file,
382 int line, const char* fn, const char* format, va_list args) {
383 struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
384 int fd;
385
386 switch (priority) {
387 case LOG_INFO:
388 fd = pipes->log_INFO[1];
389 break;
390
391 case LOG_ERR:
392 fd = pipes->log_ERROR[1];
393 break;
394
395 #ifdef ENABLE_DEBUG
396 case LOG_DEBUG:
397 fd = pipes->log_DEBUG[1];
398 break;
399 #endif /* ENABLE_DEBUG */
400
401 // Ignore any messages of an unknown priority
402 default:
403 return;
404 }
405
406 // Send the log message
407 if (fd)
408 vdprintf(fd, format, args);
409 }
410
411 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
412 return (sizeof(buffer->data) == buffer->used);
413 }
414
415 /*
416 This function reads as much data as it can from the file descriptor.
417 If it finds a whole line in it, it will send it to the logger and repeat the process.
418 If not newline character is found, it will try to read more data until it finds one.
419 */
420 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
421 struct pakfire_jail_exec* ctx, int priority, int fd,
422 struct pakfire_log_buffer* buffer, pakfire_jail_log_callback callback, void* data) {
423 char line[BUFFER_SIZE + 1];
424
425 // Fill up buffer from fd
426 if (buffer->used < sizeof(buffer->data)) {
427 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
428 sizeof(buffer->data) - buffer->used);
429
430 // Handle errors
431 if (bytes_read < 0) {
432 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
433 return -1;
434 }
435
436 // Update buffer size
437 buffer->used += bytes_read;
438 }
439
440 // See if we have any lines that we can write
441 while (buffer->used) {
442 // Search for the end of the first line
443 char* eol = memchr(buffer->data, '\n', buffer->used);
444
445 // No newline found
446 if (!eol) {
447 // If the buffer is full, we send the content to the logger and try again
448 // This should not happen in practise
449 if (pakfire_jail_log_buffer_is_full(buffer)) {
450 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
451
452 eol = buffer->data + sizeof(buffer->data) - 1;
453
454 // Otherwise we might have only read parts of the output
455 } else
456 break;
457 }
458
459 // Find the length of the string
460 size_t length = eol - buffer->data + 1;
461
462 // Copy the line into the buffer
463 memcpy(line, buffer->data, length);
464
465 // Terminate the string
466 line[length] = '\0';
467
468 // Log the line
469 if (callback) {
470 int r = callback(jail->pakfire, data, priority, line, length);
471 if (r) {
472 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
473 return r;
474 }
475 }
476
477 // Remove line from buffer
478 memmove(buffer->data, buffer->data + length, buffer->used - length);
479 buffer->used -= length;
480 }
481
482 return 0;
483 }
484
485 /*
486 This is a convenience function to fetch the reading end of a pipe and
487 closes the write end.
488 */
489 static int pakfire_jail_get_pipe(struct pakfire_jail* jail, int (*fds)[2]) {
490 // Give the variables easier names to avoid confusion
491 int* fd_read = &(*fds)[0];
492 int* fd_write = &(*fds)[1];
493
494 // Close the write end of the pipe
495 if (*fd_write) {
496 close(*fd_write);
497 *fd_write = 0;
498 }
499
500 // Return the read end
501 return *fd_read;
502 }
503
504 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
505 int epollfd = -1;
506 struct epoll_event ev;
507 struct epoll_event events[EPOLL_MAX_EVENTS];
508 int r = 0;
509
510 // Fetch file descriptors from context
511 const int stdout = pakfire_jail_get_pipe(jail, &ctx->pipes.stdout);
512 const int stderr = pakfire_jail_get_pipe(jail, &ctx->pipes.stderr);
513 const int pidfd = ctx->pidfd;
514
515 // Logging
516 const int log_INFO = pakfire_jail_get_pipe(jail, &ctx->pipes.log_INFO);
517 const int log_ERROR = pakfire_jail_get_pipe(jail, &ctx->pipes.log_ERROR);
518 const int log_DEBUG = pakfire_jail_get_pipe(jail, &ctx->pipes.log_DEBUG);
519
520 // Make a list of all file descriptors we are interested in
521 int fds[] = {
522 stdout, stderr, pidfd, log_INFO, log_ERROR, log_DEBUG,
523 };
524
525 // Setup epoll
526 epollfd = epoll_create1(0);
527 if (epollfd < 0) {
528 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
529 r = 1;
530 goto ERROR;
531 }
532
533 ev.events = EPOLLIN;
534
535 // Turn file descriptors into non-blocking mode and add them to epoll()
536 for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
537 int fd = fds[i];
538
539 // Skip fds which were not initialized
540 if (fd <= 0)
541 continue;
542
543 ev.data.fd = fd;
544
545 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
546 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
547 r = 1;
548 goto ERROR;
549 }
550 }
551
552 int ended = 0;
553
554 // Loop for as long as the process is alive
555 while (!ended) {
556 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
557 if (num < 1) {
558 // Ignore if epoll_wait() has been interrupted
559 if (errno == EINTR)
560 continue;
561
562 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
563 r = 1;
564
565 goto ERROR;
566 }
567
568 for (int i = 0; i < num; i++) {
569 int fd = events[i].data.fd;
570
571 struct pakfire_log_buffer* buffer = NULL;
572 pakfire_jail_log_callback callback = NULL;
573 void* data = NULL;
574 int priority;
575
576 // Handle any changes to the PIDFD
577 if (fd == pidfd) {
578 // Call waidid() and store the result
579 r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
580 if (r) {
581 ERROR(jail->pakfire, "waitid() failed: %m\n");
582 goto ERROR;
583 }
584
585 // Mark that we have ended so that we will process the remaining
586 // events from epoll() now, but won't restart the outer loop.
587 ended = 1;
588 continue;
589
590 // Handle logging messages
591 } else if (fd == log_INFO) {
592 buffer = &ctx->buffers.log_INFO;
593 priority = LOG_INFO;
594
595 callback = pakfire_jail_default_log_callback;
596
597 } else if (fd == log_ERROR) {
598 buffer = &ctx->buffers.log_ERROR;
599 priority = LOG_ERR;
600
601 callback = pakfire_jail_default_log_callback;
602
603 } else if (fd == log_DEBUG) {
604 buffer = &ctx->buffers.log_DEBUG;
605 priority = LOG_DEBUG;
606
607 callback = pakfire_jail_default_log_callback;
608
609 // Handle anything from the log pipes
610 } else if (fd == stdout) {
611 buffer = &ctx->buffers.stdout;
612 priority = LOG_INFO;
613
614 callback = jail->log_callback;
615 data = jail->log_data;
616
617 } else if (fd == stderr) {
618 buffer = &ctx->buffers.stderr;
619 priority = LOG_ERR;
620
621 callback = jail->log_callback;
622 data = jail->log_data;
623
624 } else {
625 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
626 continue;
627 }
628
629 // Handle log event
630 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
631 if (r)
632 goto ERROR;
633 }
634 }
635
636 ERROR:
637 if (epollfd > 0)
638 close(epollfd);
639
640 return r;
641 }
642
643 static int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data, int priority,
644 const char* line, size_t length) {
645 char*** array = (char***)data;
646
647 // Append everything from stdout to an array
648 if (priority == LOG_INFO) {
649 length = 0;
650
651 // Create a copy of line
652 char* message = strdup(line);
653 if (!message)
654 return 1;
655
656 // Determine the length of the existing array
657 if (*array) {
658 for (char** element = *array; *element; element++)
659 length++;
660 }
661
662 // Allocate space
663 *array = reallocarray(*array, length + 2, sizeof(**array));
664 if (!*array)
665 return 1;
666
667 // Append message and terminate the array
668 (*array)[length] = message;
669 (*array)[length + 1] = NULL;
670
671 return 0;
672 }
673
674 // Send everything else to the default logger
675 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
676 }
677
678 // Capabilities
679
680 static int pakfire_jail_drop_capabilities(struct pakfire_jail* jail) {
681 const int capabilities[] = {
682 // Deny access to the kernel's audit system
683 CAP_AUDIT_CONTROL,
684 CAP_AUDIT_READ,
685 CAP_AUDIT_WRITE,
686
687 // Deny suspending block devices
688 CAP_BLOCK_SUSPEND,
689
690 // Deny any stuff with BPF
691 CAP_BPF,
692
693 // Deny checkpoint restore
694 CAP_CHECKPOINT_RESTORE,
695
696 // Deny opening files by inode number (open_by_handle_at)
697 CAP_DAC_READ_SEARCH,
698
699 // Deny setting SUID bits
700 CAP_FSETID,
701
702 // Deny locking more memory
703 CAP_IPC_LOCK,
704
705 // Deny modifying any Apparmor/SELinux/SMACK configuration
706 CAP_MAC_ADMIN,
707 CAP_MAC_OVERRIDE,
708
709 // Deny creating any special devices
710 CAP_MKNOD,
711
712 // Deny setting any capabilities
713 CAP_SETFCAP,
714
715 // Deny reading from syslog
716 CAP_SYSLOG,
717
718 // Deny any admin actions (mount, sethostname, ...)
719 CAP_SYS_ADMIN,
720
721 // Deny rebooting the system
722 CAP_SYS_BOOT,
723
724 // Deny loading kernel modules
725 CAP_SYS_MODULE,
726
727 // Deny setting nice level
728 CAP_SYS_NICE,
729
730 // Deny access to /proc/kcore, /dev/mem, /dev/kmem
731 CAP_SYS_RAWIO,
732
733 // Deny circumventing any resource limits
734 CAP_SYS_RESOURCE,
735
736 // Deny setting the system time
737 CAP_SYS_TIME,
738
739 // Deny playing with suspend
740 CAP_WAKE_ALARM,
741
742 0,
743 };
744
745 DEBUG(jail->pakfire, "Dropping capabilities...\n");
746
747 size_t num_caps = 0;
748 int r;
749
750 // Drop any capabilities
751 for (const int* cap = capabilities; *cap; cap++) {
752 r = prctl(PR_CAPBSET_DROP, *cap, 0, 0, 0);
753 if (r) {
754 ERROR(jail->pakfire, "Could not drop capability %d: %m\n", *cap);
755 return r;
756 }
757
758 num_caps++;
759 }
760
761 // Fetch any capabilities
762 cap_t caps = cap_get_proc();
763 if (!caps) {
764 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
765 return 1;
766 }
767
768 /*
769 Set inheritable capabilities
770
771 This ensures that no processes will be able to gain any of the listed
772 capabilities again.
773 */
774 r = cap_set_flag(caps, CAP_INHERITABLE, num_caps, capabilities, CAP_CLEAR);
775 if (r) {
776 ERROR(jail->pakfire, "cap_set_flag() failed: %m\n");
777 goto ERROR;
778 }
779
780 // Restore capabilities
781 r = cap_set_proc(caps);
782 if (r) {
783 ERROR(jail->pakfire, "Could not restore capabilities: %m\n");
784 goto ERROR;
785 }
786
787 ERROR:
788 if (caps)
789 cap_free(caps);
790
791 return r;
792 }
793
794 // Syscall Filter
795
796 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
797 const int syscalls[] = {
798 // The kernel's keyring isn't namespaced
799 SCMP_SYS(keyctl),
800 SCMP_SYS(add_key),
801 SCMP_SYS(request_key),
802
803 // Disable userfaultfd
804 SCMP_SYS(userfaultfd),
805
806 // Disable perf which could leak a lot of information about the host
807 SCMP_SYS(perf_event_open),
808
809 0,
810 };
811 int r = 1;
812
813 DEBUG(jail->pakfire, "Applying syscall filter...\n");
814
815 // Setup a syscall filter which allows everything by default
816 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
817 if (!ctx) {
818 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
819 goto ERROR;
820 }
821
822 // All all syscalls
823 for (const int* syscall = syscalls; *syscall; syscall++) {
824 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
825 if (r) {
826 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
827 goto ERROR;
828 }
829 }
830
831 // Load syscall filter into the kernel
832 r = seccomp_load(ctx);
833 if (r) {
834 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
835 goto ERROR;
836 }
837
838 ERROR:
839 if (ctx)
840 seccomp_release(ctx);
841
842 return r;
843 }
844
845 // UID/GID Mapping
846
847 static int pakfire_jail_write_uidgid_mapping(struct pakfire_jail* jail,
848 const char* path, uid_t mapped_id, size_t length) {
849 int r = 1;
850
851 // Open file for writing
852 FILE* f = fopen(path, "w");
853 if (!f) {
854 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
855 goto ERROR;
856 }
857
858 // Write configuration
859 int bytes_written = fprintf(f, "%d %d %ld\n", 0, mapped_id, length);
860 if (bytes_written <= 0) {
861 ERROR(jail->pakfire, "Could not write UID/GID mapping: %m\n");
862 goto ERROR;
863 }
864
865 // Close the file
866 r = fclose(f);
867 f = NULL;
868 if (r) {
869 ERROR(jail->pakfire, "Could not write UID/GID mapping: %m\n");
870
871 goto ERROR;
872 }
873
874 // Success
875 r = 0;
876
877 ERROR:
878 if (f)
879 fclose(f);
880
881 return r;
882 }
883
884 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
885 char path[PATH_MAX];
886 int r;
887
888 uid_t mapped_uid = 0;
889 const size_t length = 1;
890
891 // Fetch the UID of the calling process
892 uid_t uid = getuid();
893
894 // Have we been called by root?
895 if (uid == 0) {
896 mapped_uid = 0;
897
898 // Have we been called by an unprivileged user?
899 } else {
900 // XXX fetch SUBUID
901 mapped_uid = uid;
902 }
903
904 // Make path
905 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
906 if (r < 0)
907 return 1;
908
909 DEBUG(jail->pakfire, "Mapping UID range (%u - %lu)\n", mapped_uid, mapped_uid + length);
910
911 return pakfire_jail_write_uidgid_mapping(jail, path, mapped_uid, length);
912 }
913
914 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
915 char path[PATH_MAX];
916 int r;
917
918 gid_t mapped_gid = 0;
919 const size_t length = 1;
920
921 // Fetch the GID of the calling process
922 gid_t gid = getgid();
923
924 // Have we been called from the root group?
925 if (gid == 0) {
926 mapped_gid = 0;
927
928 // Have we been called by an unprivileged group?
929 } else {
930 // XXX fetch SUBGID
931 mapped_gid = gid;
932 }
933
934 // Make path
935 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
936 if (r < 0)
937 return 1;
938
939 DEBUG(jail->pakfire, "Mapping GID range (%u - %lu)\n", mapped_gid, mapped_gid + length);
940
941 return pakfire_jail_write_uidgid_mapping(jail, path, mapped_gid, length);
942 }
943
944 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
945 char path[PATH_MAX];
946 int r = 1;
947
948 // Make path
949 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
950 if (r < 0)
951 return 1;
952
953 // Open file for writing
954 FILE* f = fopen(path, "w");
955 if (!f) {
956 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
957 goto ERROR;
958 }
959
960 // Write content
961 int bytes_written = fprintf(f, "deny\n");
962 if (bytes_written <= 0) {
963 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
964 goto ERROR;
965 }
966
967 r = fclose(f);
968 f = NULL;
969 if (r) {
970 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
971 goto ERROR;
972 }
973
974 ERROR:
975 if (f)
976 fclose(f);
977
978 return r;
979 }
980
981 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
982 const uint64_t val = 1;
983 int r = 0;
984
985 DEBUG(jail->pakfire, "Sending signal...\n");
986
987 // Write to the file descriptor
988 ssize_t bytes_written = write(fd, &val, sizeof(val));
989 if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
990 ERROR(jail->pakfire, "Could not send signal: %m\n");
991 r = 1;
992 }
993
994 // Close the file descriptor
995 close(fd);
996
997 return r;
998 }
999
1000 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1001 uint64_t val = 0;
1002 int r = 0;
1003
1004 DEBUG(jail->pakfire, "Waiting for signal...\n");
1005
1006 ssize_t bytes_read = read(fd, &val, sizeof(val));
1007 if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
1008 ERROR(jail->pakfire, "Error waiting for signal: %m\n");
1009 r = 1;
1010 }
1011
1012 // Close the file descriptor
1013 close(fd);
1014
1015 return r;
1016 }
1017
1018 /*
1019 Performs the initialisation that needs to happen in the parent part
1020 */
1021 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1022 int r;
1023
1024 // Setup UID mapping
1025 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
1026 if (r)
1027 return r;
1028
1029 // Write "deny" to /proc/PID/setgroups
1030 r = pakfire_jail_setgroups(jail, ctx->pid);
1031 if (r)
1032 return r;
1033
1034 // Setup GID mapping
1035 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
1036 if (r)
1037 return r;
1038
1039 // Parent has finished initialisation
1040 DEBUG(jail->pakfire, "Parent has finished initialization\n");
1041
1042 // Send signal to client
1043 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1044 if (r)
1045 return r;
1046
1047 return 0;
1048 }
1049
1050 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
1051 const char* argv[]) {
1052 int r;
1053
1054 // Redirect any logging to our log pipe
1055 pakfire_set_log_callback(jail->pakfire, pakfire_jail_log, &ctx->pipes);
1056
1057 // Fetch my own PID
1058 pid_t pid = getpid();
1059
1060 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
1061
1062 // Wait for the parent to finish initialization
1063 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1064 if (r)
1065 return r;
1066
1067 // Perform further initialization
1068
1069 // Fetch UID/GID
1070 uid_t uid = getuid();
1071 gid_t gid = getgid();
1072
1073 // Fetch EUID/EGID
1074 uid_t euid = geteuid();
1075 gid_t egid = getegid();
1076
1077 DEBUG(jail->pakfire, " UID: %d (effective %d)\n", uid, euid);
1078 DEBUG(jail->pakfire, " GID: %d (effective %d)\n", gid, egid);
1079
1080 // Check if we are (effectively running as root)
1081 if (uid != 0 || gid != 0) {
1082 ERROR(jail->pakfire, "Child process is not running as root\n");
1083 return 126;
1084 }
1085
1086 const char* root = pakfire_get_path(jail->pakfire);
1087 const char* arch = pakfire_get_arch(jail->pakfire);
1088
1089 // Change root (unless root is /)
1090 if (!pakfire_on_root(jail->pakfire)) {
1091 // Mount everything
1092 r = pakfire_mount_all(jail->pakfire);
1093 if (r)
1094 return r;
1095
1096 // Log all mountpoints
1097 pakfire_mount_list(jail->pakfire);
1098
1099 // Call chroot()
1100 r = chroot(root);
1101 if (r) {
1102 ERROR(jail->pakfire, "chroot() to %s failed: %m\n", root);
1103 return 1;
1104 }
1105
1106 // Change directory to /
1107 r = chdir("/");
1108 if (r) {
1109 ERROR(jail->pakfire, "chdir() after chroot() failed: %m\n");
1110 return 1;
1111 }
1112 }
1113
1114 // Set personality
1115 unsigned long persona = pakfire_arch_personality(arch);
1116 if (persona) {
1117 r = personality(persona);
1118 if (r < 0) {
1119 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1120 return 1;
1121 }
1122 }
1123
1124 // Set nice level
1125 if (jail->nice) {
1126 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1127
1128 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1129 if (r) {
1130 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1131 return 1;
1132 }
1133 }
1134
1135 // Close other end of log pipes
1136 close(ctx->pipes.log_INFO[0]);
1137 close(ctx->pipes.log_ERROR[0]);
1138 #ifdef ENABLE_DEBUG
1139 close(ctx->pipes.log_DEBUG[0]);
1140 #endif /* ENABLE_DEBUG */
1141
1142 // Connect standard output and error
1143 if (ctx->pipes.stdout[1] && ctx->pipes.stderr[1]) {
1144 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1145 if (r < 0) {
1146 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1147 ctx->pipes.stdout[1]);
1148
1149 return 1;
1150 }
1151
1152 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1153 if (r < 0) {
1154 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1155 ctx->pipes.stderr[1]);
1156
1157 return 1;
1158 }
1159
1160 // Close the reading sides of the pipe
1161 close(ctx->pipes.stdout[0]);
1162 close(ctx->pipes.stderr[0]);
1163 }
1164
1165 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1166 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1167 if (r)
1168 return r;
1169
1170 // Drop capabilities
1171 r = pakfire_jail_drop_capabilities(jail);
1172 if (r)
1173 return r;
1174
1175 // Filter syscalls
1176 r = pakfire_jail_limit_syscalls(jail);
1177 if (r)
1178 return r;
1179
1180 // exec() command
1181 r = execvpe(argv[0], (char**)argv, jail->env);
1182 if (r < 0)
1183 ERROR(jail->pakfire, "Could not execve(): %m\n");
1184
1185 // Translate errno into regular exit code
1186 switch (errno) {
1187 case ENOENT:
1188 r = 127;
1189 break;
1190
1191 default:
1192 r = 1;
1193 }
1194
1195 // We should not get here
1196 return r;
1197 }
1198
1199 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
1200 int r = pipe2(*fds, flags);
1201 if (r < 0) {
1202 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
1203 return 1;
1204 }
1205
1206 return 0;
1207 }
1208
1209 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
1210 for (unsigned int i = 0; i < 2; i++)
1211 if (fds[i])
1212 close(fds[i]);
1213 }
1214
1215 // Run a command in the jail
1216 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[]) {
1217 int exit = -1;
1218 int r;
1219
1220 // Check if argv is valid
1221 if (!argv || !argv[0]) {
1222 errno = EINVAL;
1223 return -1;
1224 }
1225
1226 // Initialize context for this call
1227 struct pakfire_jail_exec ctx = {
1228 .pipes = {
1229 .stdout = { 0, 0 },
1230 .stderr = { 0, 0 },
1231 },
1232 };
1233
1234 DEBUG(jail->pakfire, "Executing jail...\n");
1235
1236 /*
1237 Setup a file descriptor which can be used to notify the client that the parent
1238 has completed configuration.
1239 */
1240 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1241 if (ctx.completed_fd < 0) {
1242 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1243 return -1;
1244 }
1245
1246 // Create pipes to communicate with child process if we are not running interactively
1247 if (!pakfire_jail_has_flag(jail, PAKFIRE_JAIL_INTERACTIVE)) {
1248 // stdout
1249 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
1250 if (r)
1251 goto ERROR;
1252
1253 // stderr
1254 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
1255 if (r)
1256 goto ERROR;
1257 }
1258
1259 // Setup pipes for logging
1260 // INFO
1261 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
1262 if (r)
1263 goto ERROR;
1264
1265 // ERROR
1266 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
1267 if (r)
1268 goto ERROR;
1269
1270 #ifdef ENABLE_DEBUG
1271 // DEBUG
1272 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
1273 if (r)
1274 goto ERROR;
1275 #endif /* ENABLE_DEBUG */
1276
1277 // Configure child process
1278 struct clone_args args = {
1279 .flags =
1280 CLONE_NEWCGROUP |
1281 CLONE_NEWIPC |
1282 CLONE_NEWNS |
1283 CLONE_NEWPID |
1284 CLONE_NEWUSER |
1285 CLONE_NEWUTS |
1286 CLONE_PIDFD,
1287 .exit_signal = SIGCHLD,
1288 .pidfd = (long long unsigned int)&ctx.pidfd,
1289 };
1290
1291 // Fork this process
1292 ctx.pid = clone3(&args, sizeof(args));
1293 if (ctx.pid < 0) {
1294 ERROR(jail->pakfire, "Could not clone: %m\n");
1295 return -1;
1296
1297 // Child process
1298 } else if (ctx.pid == 0) {
1299 r = pakfire_jail_child(jail, &ctx, argv);
1300 _exit(r);
1301 }
1302
1303 // Parent process
1304 r = pakfire_jail_parent(jail, &ctx);
1305 if (r)
1306 goto ERROR;
1307
1308 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
1309
1310 // Read output of the child process
1311 r = pakfire_jail_wait(jail, &ctx);
1312 if (r)
1313 goto ERROR;
1314
1315 // Handle exit status
1316 switch (ctx.status.si_code) {
1317 case CLD_EXITED:
1318 DEBUG(jail->pakfire, "The child process exited with code %d\n",
1319 ctx.status.si_status);
1320
1321 // Pass exit code
1322 exit = ctx.status.si_status;
1323 break;
1324
1325 case CLD_KILLED:
1326 case CLD_DUMPED:
1327 ERROR(jail->pakfire, "The child process was killed\n");
1328 break;
1329
1330 // Log anything else
1331 default:
1332 ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
1333 break;
1334 }
1335
1336 ERROR:
1337 // Close any file descriptors
1338 pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
1339 pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
1340 if (ctx.pidfd)
1341 close(ctx.pidfd);
1342 pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
1343 pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
1344 pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
1345
1346 // Umount everything
1347 if (!pakfire_on_root(jail->pakfire))
1348 pakfire_umount_all(jail->pakfire);
1349
1350 return exit;
1351 }
1352
1353 PAKFIRE_EXPORT int pakfire_jail_exec(struct pakfire_jail* jail,
1354 const char* argv[], char*** output) {
1355 int r;
1356
1357 // Store logging callback
1358 pakfire_jail_log_callback log_callback = jail->log_callback;
1359 void* log_data = jail->log_data;
1360
1361 // Capture output if requested by user
1362 if (output)
1363 pakfire_jail_set_log_callback(jail, pakfire_jail_capture_stdout, output);
1364
1365 // Run exec()
1366 r = __pakfire_jail_exec(jail, argv);
1367
1368 // Restore log callback
1369 pakfire_jail_set_log_callback(jail, log_callback, log_data);
1370
1371 return r;
1372 }
1373
1374 PAKFIRE_EXPORT int pakfire_jail_exec_script(struct pakfire_jail* jail,
1375 const char* script, const size_t size, const char* args[], char*** output) {
1376 char path[PATH_MAX];
1377 const char** argv = NULL;
1378 int r;
1379
1380 const char* root = pakfire_get_path(jail->pakfire);
1381
1382 // Write the scriptlet to disk
1383 r = pakfire_path_join(path, root, "pakfire-script.XXXXXX");
1384 if (r < 0)
1385 goto ERROR;
1386
1387 // Open a temporary file
1388 int fd = mkstemp(path);
1389 if (fd < 0) {
1390 ERROR(jail->pakfire, "Could not open a temporary file: %m\n");
1391 r = 1;
1392 goto ERROR;
1393 }
1394
1395 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
1396
1397 // Write data
1398 ssize_t bytes_written = write(fd, script, size);
1399 if (bytes_written < (ssize_t)size) {
1400 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
1401 r = 1;
1402 goto ERROR;
1403 }
1404
1405 // Make the script executable
1406 r = fchmod(fd, S_IRUSR|S_IWUSR|S_IXUSR);
1407 if (r) {
1408 ERROR(jail->pakfire, "Could not set executable permissions on %s: %m\n", path);
1409 goto ERROR;
1410 }
1411
1412 // Close file
1413 r = close(fd);
1414 if (r) {
1415 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
1416 r = 1;
1417 goto ERROR;
1418 }
1419
1420 // Count how many arguments were passed
1421 unsigned int argc = 1;
1422 if (args) {
1423 for (const char** arg = args; *arg; arg++)
1424 argc++;
1425 }
1426
1427 argv = calloc(argc + 1, sizeof(*argv));
1428 if (!argv) {
1429 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
1430 goto ERROR;
1431 }
1432
1433 // Set command
1434 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
1435
1436 // Copy args
1437 for (unsigned int i = 1; i < argc; i++)
1438 argv[i] = args[i-1];
1439
1440 // Run the script
1441 r = pakfire_jail_exec(jail, argv, output);
1442
1443 ERROR:
1444 if (argv)
1445 free(argv);
1446
1447 // Remove script from disk
1448 if (*path)
1449 unlink(path);
1450
1451 return r;
1452 }
1453
1454 /*
1455 A convenience function that creates a new jail, runs the given command and destroys
1456 the jail again.
1457 */
1458 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char*** output) {
1459 struct pakfire_jail* jail = NULL;
1460 int r;
1461
1462 // Create a new jail
1463 r = pakfire_jail_create(&jail, pakfire, flags);
1464 if (r)
1465 goto ERROR;
1466
1467 // Execute the command
1468 r = pakfire_jail_exec(jail, argv, output);
1469
1470 ERROR:
1471 if (jail)
1472 pakfire_jail_unref(jail);
1473
1474 return r;
1475 }
1476
1477 int pakfire_jail_run_script(struct pakfire* pakfire,
1478 const char* script, const size_t length, const char* argv[], int flags, char*** output) {
1479 struct pakfire_jail* jail = NULL;
1480 int r;
1481
1482 // Create a new jail
1483 r = pakfire_jail_create(&jail, pakfire, flags);
1484 if (r)
1485 goto ERROR;
1486
1487 // Execute the command
1488 r = pakfire_jail_exec_script(jail, script, length, argv, output);
1489
1490 ERROR:
1491 if (jail)
1492 pakfire_jail_unref(jail);
1493
1494 return r;
1495 }
1496
1497
1498 int pakfire_jail_shell(struct pakfire* pakfire) {
1499 const char* argv[] = {
1500 "/bin/bash", "--login", NULL,
1501 };
1502
1503 // Execute /bin/bash
1504 return pakfire_jail_run(pakfire, argv, PAKFIRE_JAIL_INTERACTIVE, NULL);
1505 }
1506
1507 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
1508 char path[PATH_MAX];
1509
1510 const char* ldconfig = "/sbin/ldconfig";
1511
1512 // Check if ldconfig exists before calling it to avoid overhead
1513 int r = pakfire_make_path(pakfire, path, ldconfig);
1514 if (r < 0)
1515 return 1;
1516
1517 // Check if ldconfig is executable
1518 r = access(path, X_OK);
1519 if (r) {
1520 DEBUG(pakfire, "%s is not executable. Skipping...\n", ldconfig);
1521 return 0;
1522 }
1523
1524 const char* argv[] = {
1525 ldconfig, NULL,
1526 };
1527
1528 // Run ldconfig
1529 return pakfire_jail_run(pakfire, argv, 0, NULL);
1530 }
1531
1532 // Utility functions
1533
1534 PAKFIRE_EXPORT char* pakfire_jail_concat_output(struct pakfire_jail* jail,
1535 const char** input, size_t* length) {
1536 // Return nothing on no input
1537 if (!input)
1538 return NULL;
1539
1540 // XXX Maybe there is a more efficient way to do this
1541
1542 char* output = pakfire_string_join((char**)input, "");
1543 if (!output)
1544 return NULL;
1545
1546 // Store the length of the result
1547 if (length)
1548 *length = strlen(output);
1549
1550 return output;
1551 }