]> git.ipfire.org Git - people/ms/pakfire.git/blob - src/libpakfire/jail.c
python: Add switch to return output on execute
[people/ms/pakfire.git] / src / libpakfire / jail.c
1 /*#############################################################################
2 # #
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
5 # #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
10 # #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
15 # #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
18 # #
19 #############################################################################*/
20
21 #include <errno.h>
22 #include <linux/capability.h>
23 #include <linux/fcntl.h>
24 #include <linux/sched.h>
25 #include <linux/wait.h>
26 #include <sched.h>
27 #include <signal.h>
28 #include <stdlib.h>
29 #include <syscall.h>
30 #include <sys/capability.h>
31 #include <sys/epoll.h>
32 #include <sys/eventfd.h>
33 #include <sys/personality.h>
34 #include <sys/prctl.h>
35 #include <sys/resource.h>
36 #include <sys/types.h>
37 #include <sys/wait.h>
38
39 // libseccomp
40 #include <seccomp.h>
41
42 #include <pakfire/arch.h>
43 #include <pakfire/jail.h>
44 #include <pakfire/logging.h>
45 #include <pakfire/mount.h>
46 #include <pakfire/pakfire.h>
47 #include <pakfire/private.h>
48 #include <pakfire/util.h>
49
50 #define BUFFER_SIZE 1024 * 64
51 #define ENVIRON_SIZE 128
52 #define EPOLL_MAX_EVENTS 2
53
54 // The default environment that will be set for every command
55 static const struct environ {
56 const char* key;
57 const char* val;
58 } ENV[] = {
59 { "LANG", "en_US.utf-8" },
60 { "TERM", "vt100" },
61 { NULL, NULL },
62 };
63
64 struct pakfire_jail {
65 struct pakfire* pakfire;
66 int nrefs;
67
68 // Flags
69 int flags;
70
71 // Resource Limits
72 int nice;
73
74 // Environment
75 char* env[ENVIRON_SIZE];
76
77 // Logging
78 pakfire_jail_log_callback log_callback;
79 void* log_data;
80 };
81
82 struct pakfire_log_buffer {
83 char data[BUFFER_SIZE];
84 size_t used;
85 };
86
87 struct pakfire_jail_exec {
88 // PID (of the child)
89 pid_t pid;
90 int pidfd;
91
92 // Process status (from waitid)
93 siginfo_t status;
94
95 // FD to notify the client that the parent has finished initialization
96 int completed_fd;
97
98 // Log pipes
99 struct {
100 int stdout[2];
101 int stderr[2];
102 } pipes;
103
104 // Log buffers
105 struct {
106 struct pakfire_log_buffer stdout;
107 struct pakfire_log_buffer stderr;
108 } buffers;
109 };
110
111 static int clone3(struct clone_args* args, size_t size) {
112 return syscall(__NR_clone3, args, size);
113 }
114
115 static void pakfire_jail_free(struct pakfire_jail* jail) {
116 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
117
118 // Free environment
119 for (unsigned int i = 0; jail->env[i]; i++)
120 free(jail->env[i]);
121
122 pakfire_unref(jail->pakfire);
123 free(jail);
124 }
125
126 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
127 int priority, const char* line, size_t length) {
128 switch (priority) {
129 case LOG_INFO:
130 INFO(pakfire, "%s", line);
131 break;
132
133 case LOG_ERR:
134 ERROR(pakfire, "%s", line);
135 break;
136 }
137
138 return 0;
139 }
140
141 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
142 // Set PS1
143 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
144 if (r)
145 return r;
146
147 // Copy TERM
148 char* TERM = secure_getenv("TERM");
149 if (TERM) {
150 r = pakfire_jail_set_env(jail, "TERM", TERM);
151 if (r)
152 return r;
153 }
154
155 // Copy LANG
156 char* LANG = secure_getenv("LANG");
157 if (LANG) {
158 r = pakfire_jail_set_env(jail, "LANG", LANG);
159 if (r)
160 return r;
161 }
162
163 return 0;
164 }
165
166 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail,
167 struct pakfire* pakfire, int flags) {
168 int r;
169
170 // Allocate a new jail
171 struct pakfire_jail* j = calloc(1, sizeof(*j));
172 if (!j)
173 return 1;
174
175 // Reference Pakfire
176 j->pakfire = pakfire_ref(pakfire);
177
178 // Initialize reference counter
179 j->nrefs = 1;
180
181 // Store flags
182 j->flags = flags;
183
184 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
185
186 // Set default log callback
187 r = pakfire_jail_set_log_callback(j, pakfire_jail_default_log_callback, NULL);
188 if (r)
189 goto ERROR;
190
191 // Set default environment
192 for (const struct environ* e = ENV; e->key; e++) {
193 r = pakfire_jail_set_env(j, e->key, e->val);
194 if (r)
195 goto ERROR;
196 }
197
198 // Setup interactive stuff
199 if (j->flags & PAKFIRE_JAIL_INTERACTIVE) {
200 r = pakfire_jail_setup_interactive_env(j);
201 if (r)
202 goto ERROR;
203 }
204
205 // Done
206 *jail = j;
207 return 0;
208
209 ERROR:
210 pakfire_jail_free(j);
211
212 return r;
213 }
214
215 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
216 ++jail->nrefs;
217
218 return jail;
219 }
220
221 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
222 if (--jail->nrefs > 0)
223 return jail;
224
225 pakfire_jail_free(jail);
226 return NULL;
227 }
228
229 static int pakfire_jail_has_flag(struct pakfire_jail* jail, int flag) {
230 return jail->flags & flag;
231 }
232
233 // Resource Limits
234
235 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
236 // Check if nice level is in range
237 if (nice < -19 || nice > 20) {
238 errno = EINVAL;
239 return 1;
240 }
241
242 // Store nice level
243 jail->nice = nice;
244
245 return 0;
246 }
247
248 // Environment
249
250 // Returns the length of the environment
251 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
252 unsigned int i = 0;
253
254 // Count everything in the environment
255 for (char** e = jail->env; *e; e++)
256 i++;
257
258 return i;
259 }
260
261 // Finds an existing environment variable and returns its index or -1 if not found
262 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
263 if (!key) {
264 errno = EINVAL;
265 return -1;
266 }
267
268 char buffer[strlen(key) + 2];
269 pakfire_string_format(buffer, "%s=", key);
270
271 for (unsigned int i = 0; jail->env[i]; i++) {
272 if (pakfire_string_startswith(jail->env[i], buffer))
273 return i;
274 }
275
276 // Nothing found
277 return -1;
278 }
279
280 // Returns the value of an environment variable or NULL
281 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
282 const char* key) {
283 int i = pakfire_jail_find_env(jail, key);
284 if (i < 0)
285 return NULL;
286
287 return jail->env[i] + strlen(key) + 1;
288 }
289
290 // Sets an environment variable
291 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
292 const char* key, const char* value) {
293 // Find the index where to write this value to
294 int i = pakfire_jail_find_env(jail, key);
295 if (i < 0)
296 i = pakfire_jail_env_length(jail);
297
298 // Return -ENOSPC when the environment is full
299 if (i >= ENVIRON_SIZE) {
300 errno = ENOSPC;
301 return -1;
302 }
303
304 // Free any previous value
305 if (jail->env[i])
306 free(jail->env[i]);
307
308 // Format and set environment variable
309 asprintf(&jail->env[i], "%s=%s", key, value);
310
311 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
312
313 return 0;
314 }
315
316 // Imports an environment
317 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
318 if (!env)
319 return 0;
320
321 char* key;
322 char* val;
323 int r;
324
325 // Copy environment variables
326 for (unsigned int i = 0; env[i]; i++) {
327 r = pakfire_string_partition(env[i], "=", &key, &val);
328 if (r)
329 continue;
330
331 // Set value
332 r = pakfire_jail_set_env(jail, key, val);
333
334 if (key)
335 free(key);
336 if (val)
337 free(val);
338
339 // Break on error
340 if (r)
341 return r;
342 }
343
344 return 0;
345 }
346
347 // Logging
348
349 PAKFIRE_EXPORT int pakfire_jail_set_log_callback(struct pakfire_jail* jail,
350 pakfire_jail_log_callback callback, void* data) {
351 jail->log_callback = callback;
352 jail->log_data = data;
353
354 return 0;
355 }
356
357 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
358 return (sizeof(buffer->data) == buffer->used);
359 }
360
361 /*
362 This function reads as much data as it can from the file descriptor.
363 If it finds a whole line in it, it will send it to the logger and repeat the process.
364 If not newline character is found, it will try to read more data until it finds one.
365 */
366 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
367 struct pakfire_jail_exec* ctx, int priority, int fd, struct pakfire_log_buffer* buffer) {
368 char line[BUFFER_SIZE + 1];
369
370 // Fill up buffer from fd
371 if (buffer->used < sizeof(buffer->data)) {
372 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
373 sizeof(buffer->data) - buffer->used);
374
375 // Handle errors
376 if (bytes_read < 0) {
377 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
378 return -1;
379 }
380
381 // Update buffer size
382 buffer->used += bytes_read;
383 }
384
385 // See if we have any lines that we can write
386 while (buffer->used) {
387 // Search for the end of the first line
388 char* eol = memchr(buffer->data, '\n', buffer->used);
389
390 // No newline found
391 if (!eol) {
392 // If the buffer is full, we send the content to the logger and try again
393 // This should not happen in practise
394 if (pakfire_jail_log_buffer_is_full(buffer)) {
395 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
396
397 eol = buffer->data + sizeof(buffer->data) - 1;
398
399 // Otherwise we might have only read parts of the output
400 } else
401 break;
402 }
403
404 // Find the length of the string
405 size_t length = eol - buffer->data + 1;
406
407 // Copy the line into the buffer
408 memcpy(line, buffer->data, length);
409
410 // Terminate the string
411 line[length] = '\0';
412
413 // Log the line
414 if (jail->log_callback) {
415 int r = jail->log_callback(jail->pakfire, jail->log_data, priority, line, length);
416 if (r) {
417 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
418 return r;
419 }
420 }
421
422 // Remove line from buffer
423 memmove(buffer->data, buffer->data + length, buffer->used - length);
424 buffer->used -= length;
425 }
426
427 return 0;
428 }
429
430 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
431 int epollfd = -1;
432 struct epoll_event ev;
433 struct epoll_event events[EPOLL_MAX_EVENTS];
434 int r = 0;
435
436 // Fetch file descriptors from context
437 const int stdout = ctx->pipes.stdout[0];
438 const int stderr = ctx->pipes.stderr[0];
439 const int pidfd = ctx->pidfd;
440
441 // Close any unused file descriptors
442 if (ctx->pipes.stdout[1]) {
443 close(ctx->pipes.stdout[1]);
444 ctx->pipes.stdout[1] = 0;
445 }
446 if (ctx->pipes.stderr[1]) {
447 close(ctx->pipes.stderr[1]);
448 ctx->pipes.stderr[1] = 0;
449 }
450
451 // Make a list of all file descriptors we are interested in
452 int fds[] = {
453 stdout, stderr, pidfd,
454 };
455
456 // Setup epoll
457 epollfd = epoll_create1(0);
458 if (epollfd < 0) {
459 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
460 r = 1;
461 goto ERROR;
462 }
463
464 ev.events = EPOLLIN;
465
466 // Turn file descriptors into non-blocking mode and add them to epoll()
467 for (unsigned int i = 0; i < 3; i++) {
468 int fd = fds[i];
469
470 // Skip fds which were not initialized
471 if (fd <= 0)
472 continue;
473
474 ev.data.fd = fd;
475
476 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
477 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
478 r = 1;
479 goto ERROR;
480 }
481 }
482
483 int ended = 0;
484
485 // Loop for as long as the process is alive
486 while (!ended) {
487 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
488 if (num < 1) {
489 // Ignore if epoll_wait() has been interrupted
490 if (errno == EINTR)
491 continue;
492
493 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
494 r = 1;
495
496 goto ERROR;
497 }
498
499 struct pakfire_log_buffer* buffer;
500 int priority;
501
502 for (int i = 0; i < num; i++) {
503 int fd = events[i].data.fd;
504
505 // Handle any changes to the PIDFD
506 if (fd == pidfd) {
507 // Call waidid() and store the result
508 r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
509 if (r) {
510 ERROR(jail->pakfire, "waitid() failed: %m\n");
511 goto ERROR;
512 }
513
514 // Mark that we have ended so that we will process the remaining
515 // events from epoll() now, but won't restart the outer loop.
516 ended = 1;
517 continue;
518
519 // Handle anything from the log pipes
520 } else if (fd == stdout) {
521 buffer = &ctx->buffers.stdout;
522 priority = LOG_INFO;
523
524 } else if (fd == stderr) {
525 buffer = &ctx->buffers.stderr;
526 priority = LOG_ERR;
527
528 } else {
529 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
530 continue;
531 }
532
533 // Handle log event
534 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer);
535 if (r)
536 goto ERROR;
537 }
538 }
539
540 ERROR:
541 if (epollfd > 0)
542 close(epollfd);
543
544 return r;
545 }
546
547 static int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data, int priority,
548 const char* line, size_t length) {
549 char*** array = (char***)data;
550
551 // Append everything from stdout to an array
552 if (priority == LOG_INFO) {
553 length = 0;
554
555 // Create a copy of line
556 char* message = strdup(line);
557 if (!message)
558 return 1;
559
560 // Remove any trailing newline
561 pakfire_remove_trailing_newline(message);
562
563 // Determine the length of the existing array
564 if (*array) {
565 for (char** element = *array; *element; element++)
566 length++;
567 }
568
569 // Allocate space
570 *array = reallocarray(*array, length + 2, sizeof(**array));
571 if (!*array)
572 return 1;
573
574 // Append message and terminate the array
575 (*array)[length] = message;
576 (*array)[length + 1] = NULL;
577
578 return 0;
579 }
580
581 // Send everything else to the default logger
582 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
583 }
584
585 // Capabilities
586
587 static int pakfire_jail_drop_capabilities(struct pakfire_jail* jail) {
588 const int capabilities[] = {
589 // Deny access to the kernel's audit system
590 CAP_AUDIT_CONTROL,
591 CAP_AUDIT_READ,
592 CAP_AUDIT_WRITE,
593
594 // Deny suspending block devices
595 CAP_BLOCK_SUSPEND,
596
597 // Deny any stuff with BPF
598 CAP_BPF,
599
600 // Deny checkpoint restore
601 CAP_CHECKPOINT_RESTORE,
602
603 // Deny opening files by inode number (open_by_handle_at)
604 CAP_DAC_READ_SEARCH,
605
606 // Deny setting SUID bits
607 CAP_FSETID,
608
609 // Deny locking more memory
610 CAP_IPC_LOCK,
611
612 // Deny modifying any Apparmor/SELinux/SMACK configuration
613 CAP_MAC_ADMIN,
614 CAP_MAC_OVERRIDE,
615
616 // Deny creating any special devices
617 CAP_MKNOD,
618
619 // Deny setting any capabilities
620 CAP_SETFCAP,
621
622 // Deny reading from syslog
623 CAP_SYSLOG,
624
625 // Deny any admin actions (mount, sethostname, ...)
626 CAP_SYS_ADMIN,
627
628 // Deny rebooting the system
629 CAP_SYS_BOOT,
630
631 // Deny loading kernel modules
632 CAP_SYS_MODULE,
633
634 // Deny setting nice level
635 CAP_SYS_NICE,
636
637 // Deny access to /proc/kcore, /dev/mem, /dev/kmem
638 CAP_SYS_RAWIO,
639
640 // Deny circumventing any resource limits
641 CAP_SYS_RESOURCE,
642
643 // Deny setting the system time
644 CAP_SYS_TIME,
645
646 // Deny playing with suspend
647 CAP_WAKE_ALARM,
648
649 0,
650 };
651
652 DEBUG(jail->pakfire, "Dropping capabilities...\n");
653
654 size_t num_caps = 0;
655 int r;
656
657 // Drop any capabilities
658 for (const int* cap = capabilities; *cap; cap++) {
659 r = prctl(PR_CAPBSET_DROP, *cap, 0, 0, 0);
660 if (r) {
661 ERROR(jail->pakfire, "Could not drop capability %d: %m\n", *cap);
662 return r;
663 }
664
665 num_caps++;
666 }
667
668 // Fetch any capabilities
669 cap_t caps = cap_get_proc();
670 if (!caps) {
671 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
672 return 1;
673 }
674
675 /*
676 Set inheritable capabilities
677
678 This ensures that no processes will be able to gain any of the listed
679 capabilities again.
680 */
681 r = cap_set_flag(caps, CAP_INHERITABLE, num_caps, capabilities, CAP_CLEAR);
682 if (r) {
683 ERROR(jail->pakfire, "cap_set_flag() failed: %m\n");
684 goto ERROR;
685 }
686
687 // Restore capabilities
688 r = cap_set_proc(caps);
689 if (r) {
690 ERROR(jail->pakfire, "Could not restore capabilities: %m\n");
691 goto ERROR;
692 }
693
694 ERROR:
695 if (caps)
696 cap_free(caps);
697
698 return r;
699 }
700
701 // Syscall Filter
702
703 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
704 const int syscalls[] = {
705 // The kernel's keyring isn't namespaced
706 SCMP_SYS(keyctl),
707 SCMP_SYS(add_key),
708 SCMP_SYS(request_key),
709
710 // Disable userfaultfd
711 SCMP_SYS(userfaultfd),
712
713 // Disable perf which could leak a lot of information about the host
714 SCMP_SYS(perf_event_open),
715
716 0,
717 };
718 int r = 1;
719
720 DEBUG(jail->pakfire, "Applying syscall filter...\n");
721
722 // Setup a syscall filter which allows everything by default
723 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
724 if (!ctx) {
725 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
726 goto ERROR;
727 }
728
729 // All all syscalls
730 for (const int* syscall = syscalls; *syscall; syscall++) {
731 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
732 if (r) {
733 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
734 goto ERROR;
735 }
736 }
737
738 // Load syscall filter into the kernel
739 r = seccomp_load(ctx);
740 if (r) {
741 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
742 goto ERROR;
743 }
744
745 ERROR:
746 if (ctx)
747 seccomp_release(ctx);
748
749 return r;
750 }
751
752 // UID/GID Mapping
753
754 static int pakfire_jail_write_uidgid_mapping(struct pakfire_jail* jail,
755 const char* path, uid_t mapped_id, size_t length) {
756 int r = 1;
757
758 // Open file for writing
759 FILE* f = fopen(path, "w");
760 if (!f) {
761 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
762 goto ERROR;
763 }
764
765 // Write configuration
766 int bytes_written = fprintf(f, "%d %d %ld\n", 0, mapped_id, length);
767 if (bytes_written <= 0) {
768 ERROR(jail->pakfire, "Could not write UID/GID mapping: %m\n");
769 goto ERROR;
770 }
771
772 // Close the file
773 r = fclose(f);
774 f = NULL;
775 if (r) {
776 ERROR(jail->pakfire, "Could not write UID/GID mapping: %m\n");
777
778 goto ERROR;
779 }
780
781 // Success
782 r = 0;
783
784 ERROR:
785 if (f)
786 fclose(f);
787
788 return r;
789 }
790
791 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
792 char path[PATH_MAX];
793 int r;
794
795 uid_t mapped_uid = 0;
796 const size_t length = 1;
797
798 // Fetch the UID of the calling process
799 uid_t uid = getuid();
800
801 // Have we been called by root?
802 if (uid == 0) {
803 mapped_uid = 0;
804
805 // Have we been called by an unprivileged user?
806 } else {
807 // XXX fetch SUBUID
808 mapped_uid = uid;
809 }
810
811 // Make path
812 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
813 if (r < 0)
814 return 1;
815
816 DEBUG(jail->pakfire, "Mapping UID range (%u - %lu)\n", mapped_uid, mapped_uid + length);
817
818 return pakfire_jail_write_uidgid_mapping(jail, path, mapped_uid, length);
819 }
820
821 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
822 char path[PATH_MAX];
823 int r;
824
825 gid_t mapped_gid = 0;
826 const size_t length = 1;
827
828 // Fetch the GID of the calling process
829 gid_t gid = getgid();
830
831 // Have we been called from the root group?
832 if (gid == 0) {
833 mapped_gid = 0;
834
835 // Have we been called by an unprivileged group?
836 } else {
837 // XXX fetch SUBGID
838 mapped_gid = gid;
839 }
840
841 // Make path
842 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
843 if (r < 0)
844 return 1;
845
846 DEBUG(jail->pakfire, "Mapping GID range (%u - %lu)\n", mapped_gid, mapped_gid + length);
847
848 return pakfire_jail_write_uidgid_mapping(jail, path, mapped_gid, length);
849 }
850
851 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
852 char path[PATH_MAX];
853 int r = 1;
854
855 // Make path
856 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
857 if (r < 0)
858 return 1;
859
860 // Open file for writing
861 FILE* f = fopen(path, "w");
862 if (!f) {
863 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
864 goto ERROR;
865 }
866
867 // Write content
868 int bytes_written = fprintf(f, "deny\n");
869 if (bytes_written <= 0) {
870 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
871 goto ERROR;
872 }
873
874 r = fclose(f);
875 f = NULL;
876 if (r) {
877 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
878 goto ERROR;
879 }
880
881 ERROR:
882 if (f)
883 fclose(f);
884
885 return r;
886 }
887
888 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
889 const uint64_t val = 1;
890 int r = 0;
891
892 DEBUG(jail->pakfire, "Sending signal...\n");
893
894 // Write to the file descriptor
895 ssize_t bytes_written = write(fd, &val, sizeof(val));
896 if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
897 ERROR(jail->pakfire, "Could not send signal: %m\n");
898 r = 1;
899 }
900
901 // Close the file descriptor
902 close(fd);
903
904 return r;
905 }
906
907 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
908 uint64_t val = 0;
909 int r = 0;
910
911 DEBUG(jail->pakfire, "Waiting for signal...\n");
912
913 ssize_t bytes_read = read(fd, &val, sizeof(val));
914 if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
915 ERROR(jail->pakfire, "Error waiting for signal: %m\n");
916 r = 1;
917 }
918
919 // Close the file descriptor
920 close(fd);
921
922 return r;
923 }
924
925 /*
926 Performs the initialisation that needs to happen in the parent part
927 */
928 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
929 int r;
930
931 // Setup UID mapping
932 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
933 if (r)
934 return r;
935
936 // Write "deny" to /proc/PID/setgroups
937 r = pakfire_jail_setgroups(jail, ctx->pid);
938 if (r)
939 return r;
940
941 // Setup GID mapping
942 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
943 if (r)
944 return r;
945
946 // Parent has finished initialisation
947 DEBUG(jail->pakfire, "Parent has finished initialization\n");
948
949 // Send signal to client
950 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
951 if (r)
952 return r;
953
954 return 0;
955 }
956
957 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
958 const char* argv[]) {
959 int r;
960
961 // XXX do we have to reconfigure logging here?
962
963 // Fetch my own PID
964 pid_t pid = getpid();
965
966 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
967
968 // Wait for the parent to finish initialization
969 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
970 if (r)
971 return r;
972
973 // Perform further initialization
974
975 // Fetch UID/GID
976 uid_t uid = getuid();
977 gid_t gid = getgid();
978
979 // Fetch EUID/EGID
980 uid_t euid = geteuid();
981 gid_t egid = getegid();
982
983 DEBUG(jail->pakfire, " UID: %d (effective %d)\n", uid, euid);
984 DEBUG(jail->pakfire, " GID: %d (effective %d)\n", gid, egid);
985
986 // Check if we are (effectively running as root)
987 if (uid != 0 || gid != 0) {
988 ERROR(jail->pakfire, "Child process is not running as root\n");
989 return 126;
990 }
991
992 const char* root = pakfire_get_path(jail->pakfire);
993 const char* arch = pakfire_get_arch(jail->pakfire);
994
995 // Change root (unless root is /)
996 if (!pakfire_on_root(jail->pakfire)) {
997 // Mount everything
998 r = pakfire_mount_all(jail->pakfire);
999 if (r)
1000 return r;
1001
1002 // Log all mountpoints
1003 pakfire_mount_list(jail->pakfire);
1004
1005 // Call chroot()
1006 r = chroot(root);
1007 if (r) {
1008 ERROR(jail->pakfire, "chroot() to %s failed: %m\n", root);
1009 return 1;
1010 }
1011
1012 // Change directory to /
1013 r = chdir("/");
1014 if (r) {
1015 ERROR(jail->pakfire, "chdir() after chroot() failed: %m\n");
1016 return 1;
1017 }
1018 }
1019
1020 // Set personality
1021 unsigned long persona = pakfire_arch_personality(arch);
1022 if (persona) {
1023 r = personality(persona);
1024 if (r < 0) {
1025 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1026 return 1;
1027 }
1028 }
1029
1030 // Set nice level
1031 if (jail->nice) {
1032 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1033
1034 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1035 if (r) {
1036 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1037 return 1;
1038 }
1039 }
1040
1041 // Connect standard output and error
1042 if (ctx->pipes.stdout[1] && ctx->pipes.stderr[1]) {
1043 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1044 if (r < 0) {
1045 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1046 ctx->pipes.stdout[1]);
1047
1048 return 1;
1049 }
1050
1051 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1052 if (r < 0) {
1053 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1054 ctx->pipes.stderr[1]);
1055
1056 return 1;
1057 }
1058
1059 // Close the reading sides of the pipe
1060 close(ctx->pipes.stdout[0]);
1061 close(ctx->pipes.stderr[0]);
1062 }
1063
1064 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1065 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1066 if (r)
1067 return r;
1068
1069 // Drop capabilities
1070 r = pakfire_jail_drop_capabilities(jail);
1071 if (r)
1072 return r;
1073
1074 // Filter syscalls
1075 r = pakfire_jail_limit_syscalls(jail);
1076 if (r)
1077 return r;
1078
1079 // exec() command
1080 r = execvpe(argv[0], (char**)argv, jail->env);
1081 if (r < 0)
1082 ERROR(jail->pakfire, "Could not execve(): %m\n");
1083
1084 // Translate errno into regular exit code
1085 switch (errno) {
1086 case ENOENT:
1087 r = 127;
1088 break;
1089
1090 default:
1091 r = 1;
1092 }
1093
1094 // We should not get here
1095 return r;
1096 }
1097
1098 // Run a command in the jail
1099 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[]) {
1100 int exit = -1;
1101 int r;
1102
1103 // Check if argv is valid
1104 if (!argv || !argv[0]) {
1105 errno = EINVAL;
1106 return -1;
1107 }
1108
1109 // Initialize context for this call
1110 struct pakfire_jail_exec ctx = {
1111 .pipes = {
1112 .stdout = { 0, 0, },
1113 .stderr = { 0, 0, },
1114 },
1115 };
1116
1117 DEBUG(jail->pakfire, "Executing jail...\n");
1118
1119 /*
1120 Setup a file descriptor which can be used to notify the client that the parent
1121 has completed configuration.
1122 */
1123 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1124 if (ctx.completed_fd < 0) {
1125 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1126 return -1;
1127 }
1128
1129 // Create pipes to communicate with child process if we are not running interactively
1130 if (!pakfire_jail_has_flag(jail, PAKFIRE_JAIL_INTERACTIVE)) {
1131 // stdout
1132 r = pipe2(ctx.pipes.stdout, O_NONBLOCK);
1133 if (r < 0) {
1134 ERROR(jail->pakfire, "Could not create file descriptors for stdout: %m\n");
1135 goto ERROR;
1136 }
1137
1138 // stderr
1139 r = pipe2(ctx.pipes.stderr, O_NONBLOCK);
1140 if (r < 0) {
1141 ERROR(jail->pakfire, "Could not create file descriptors for stderr: %m\n");
1142 goto ERROR;
1143 }
1144 }
1145
1146 // Configure child process
1147 struct clone_args args = {
1148 .flags =
1149 CLONE_NEWCGROUP |
1150 CLONE_NEWIPC |
1151 CLONE_NEWNS |
1152 CLONE_NEWPID |
1153 CLONE_NEWUSER |
1154 CLONE_NEWUTS |
1155 CLONE_PIDFD,
1156 .exit_signal = SIGCHLD,
1157 .pidfd = (long long unsigned int)&ctx.pidfd,
1158 };
1159
1160 // Fork this process
1161 ctx.pid = clone3(&args, sizeof(args));
1162 if (ctx.pid < 0) {
1163 ERROR(jail->pakfire, "Could not clone: %m\n");
1164 return -1;
1165
1166 // Child process
1167 } else if (ctx.pid == 0) {
1168 r = pakfire_jail_child(jail, &ctx, argv);
1169 _exit(r);
1170 }
1171
1172 // Parent process
1173 r = pakfire_jail_parent(jail, &ctx);
1174 if (r)
1175 goto ERROR;
1176
1177 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
1178
1179 // Read output of the child process
1180 r = pakfire_jail_wait(jail, &ctx);
1181 if (r)
1182 goto ERROR;
1183
1184 // Handle exit status
1185 switch (ctx.status.si_code) {
1186 case CLD_EXITED:
1187 DEBUG(jail->pakfire, "The child process exited with code %d\n",
1188 ctx.status.si_status);
1189
1190 // Pass exit code
1191 exit = ctx.status.si_status;
1192 break;
1193
1194 case CLD_KILLED:
1195 case CLD_DUMPED:
1196 ERROR(jail->pakfire, "The child process was killed\n");
1197 break;
1198
1199 // Log anything else
1200 default:
1201 ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
1202 break;
1203 }
1204
1205 ERROR:
1206 // Close any file descriptors
1207 if (ctx.pipes.stdout[0])
1208 close(ctx.pipes.stdout[0]);
1209 if (ctx.pipes.stdout[1])
1210 close(ctx.pipes.stdout[1]);
1211 if (ctx.pipes.stderr[0])
1212 close(ctx.pipes.stderr[0]);
1213 if (ctx.pipes.stderr[1])
1214 close(ctx.pipes.stderr[1]);
1215 if (ctx.pidfd)
1216 close(ctx.pidfd);
1217
1218 // Umount everything
1219 if (!pakfire_on_root(jail->pakfire))
1220 pakfire_umount_all(jail->pakfire);
1221
1222 return exit;
1223 }
1224
1225 PAKFIRE_EXPORT int pakfire_jail_exec(struct pakfire_jail* jail,
1226 const char* argv[], char*** output) {
1227 int r;
1228
1229 // Store logging callback
1230 pakfire_jail_log_callback log_callback = jail->log_callback;
1231 void* log_data = jail->log_data;
1232
1233 // Capture output if requested by user
1234 if (output)
1235 pakfire_jail_set_log_callback(jail, pakfire_jail_capture_stdout, output);
1236
1237 // Run exec()
1238 r = __pakfire_jail_exec(jail, argv);
1239
1240 // Restore log callback
1241 pakfire_jail_set_log_callback(jail, log_callback, log_data);
1242
1243 return r;
1244 }
1245
1246 PAKFIRE_EXPORT int pakfire_jail_exec_script(struct pakfire_jail* jail,
1247 const char* script, const size_t size, const char* args[], char*** output) {
1248 char path[PATH_MAX];
1249 const char** argv = NULL;
1250 int r;
1251
1252 const char* root = pakfire_get_path(jail->pakfire);
1253
1254 // Write the scriptlet to disk
1255 r = pakfire_path_join(path, root, "pakfire-script.XXXXXX");
1256 if (r < 0)
1257 goto ERROR;
1258
1259 // Open a temporary file
1260 int fd = mkstemp(path);
1261 if (fd < 0) {
1262 ERROR(jail->pakfire, "Could not open a temporary file: %m\n");
1263 r = 1;
1264 goto ERROR;
1265 }
1266
1267 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
1268
1269 // Write data
1270 ssize_t bytes_written = write(fd, script, size);
1271 if (bytes_written < (ssize_t)size) {
1272 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
1273 r = 1;
1274 goto ERROR;
1275 }
1276
1277 // Make the script executable
1278 r = fchmod(fd, S_IRUSR|S_IWUSR|S_IXUSR);
1279 if (r) {
1280 ERROR(jail->pakfire, "Could not set executable permissions on %s: %m\n", path);
1281 goto ERROR;
1282 }
1283
1284 // Close file
1285 r = close(fd);
1286 if (r) {
1287 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
1288 r = 1;
1289 goto ERROR;
1290 }
1291
1292 // Count how many arguments were passed
1293 unsigned int argc = 1;
1294 if (args) {
1295 for (const char** arg = args; *arg; arg++)
1296 argc++;
1297 }
1298
1299 argv = calloc(argc + 1, sizeof(*argv));
1300 if (!argv) {
1301 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
1302 goto ERROR;
1303 }
1304
1305 // Set command
1306 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
1307
1308 // Copy args
1309 for (unsigned int i = 1; i < argc; i++)
1310 argv[i] = args[i-1];
1311
1312 // Run the script
1313 r = pakfire_jail_exec(jail, argv, output);
1314
1315 ERROR:
1316 if (argv)
1317 free(argv);
1318
1319 // Remove script from disk
1320 if (*path)
1321 unlink(path);
1322
1323 return r;
1324 }
1325
1326 /*
1327 A convenience function that creates a new jail, runs the given command and destroys
1328 the jail again.
1329 */
1330 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char*** output) {
1331 struct pakfire_jail* jail = NULL;
1332 int r;
1333
1334 // Create a new jail
1335 r = pakfire_jail_create(&jail, pakfire, flags);
1336 if (r)
1337 goto ERROR;
1338
1339 // Execute the command
1340 r = pakfire_jail_exec(jail, argv, output);
1341
1342 ERROR:
1343 if (jail)
1344 pakfire_jail_unref(jail);
1345
1346 return r;
1347 }
1348
1349 int pakfire_jail_run_script(struct pakfire* pakfire,
1350 const char* script, const size_t length, const char* argv[], int flags, char*** output) {
1351 struct pakfire_jail* jail = NULL;
1352 int r;
1353
1354 // Create a new jail
1355 r = pakfire_jail_create(&jail, pakfire, flags);
1356 if (r)
1357 goto ERROR;
1358
1359 // Execute the command
1360 r = pakfire_jail_exec_script(jail, script, length, argv, output);
1361
1362 ERROR:
1363 if (jail)
1364 pakfire_jail_unref(jail);
1365
1366 return r;
1367 }
1368
1369
1370 int pakfire_jail_shell(struct pakfire* pakfire) {
1371 const char* argv[] = {
1372 "/bin/bash", "--login", NULL,
1373 };
1374
1375 // Execute /bin/bash
1376 return pakfire_jail_run(pakfire, argv, PAKFIRE_JAIL_INTERACTIVE, NULL);
1377 }
1378
1379 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
1380 char path[PATH_MAX];
1381
1382 const char* ldconfig = "/sbin/ldconfig";
1383
1384 // Check if ldconfig exists before calling it to avoid overhead
1385 int r = pakfire_make_path(pakfire, path, ldconfig);
1386 if (r < 0)
1387 return 1;
1388
1389 // Check if ldconfig is executable
1390 r = access(path, X_OK);
1391 if (r) {
1392 DEBUG(pakfire, "%s is not executable. Skipping...\n", ldconfig);
1393 return 0;
1394 }
1395
1396 const char* argv[] = {
1397 ldconfig, NULL,
1398 };
1399
1400 // Run ldconfig
1401 return pakfire_jail_run(pakfire, argv, 0, NULL);
1402 }
1403
1404 // Utility functions
1405
1406 PAKFIRE_EXPORT char* pakfire_jail_concat_output(struct pakfire_jail* jail,
1407 const char** input, size_t* length) {
1408 // Return nothing on no input
1409 if (!input)
1410 return NULL;
1411
1412 // XXX Maybe there is a more efficient way to do this
1413
1414 char* output = pakfire_string_join((char**)input, "");
1415 if (!output)
1416 return NULL;
1417
1418 // Store the length of the result
1419 if (length)
1420 *length = strlen(output);
1421
1422 return output;
1423 }