]> git.ipfire.org Git - people/ms/pakfire.git/blob - src/libpakfire/jail.c
jail: Export in libpakfire
[people/ms/pakfire.git] / src / libpakfire / jail.c
1 /*#############################################################################
2 # #
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
5 # #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
10 # #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
15 # #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
18 # #
19 #############################################################################*/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <linux/capability.h>
24 #include <linux/sched.h>
25 #include <sched.h>
26 #include <signal.h>
27 #include <stdlib.h>
28 #include <syscall.h>
29 #include <sys/capability.h>
30 #include <sys/epoll.h>
31 #include <sys/eventfd.h>
32 #include <sys/personality.h>
33 #include <sys/prctl.h>
34 #include <sys/types.h>
35 #include <sys/wait.h>
36
37 // libseccomp
38 #include <seccomp.h>
39
40 #include <pakfire/arch.h>
41 #include <pakfire/jail.h>
42 #include <pakfire/logging.h>
43 #include <pakfire/mount.h>
44 #include <pakfire/pakfire.h>
45 #include <pakfire/private.h>
46 #include <pakfire/util.h>
47
48 #define BUFFER_SIZE 1024 * 64
49 #define ENVIRON_SIZE 128
50 #define EPOLL_MAX_EVENTS 2
51
52 // The default environment that will be set for every command
53 static const struct environ {
54 const char* key;
55 const char* val;
56 } ENV[] = {
57 { "LANG", "en_US.utf-8" },
58 { "TERM", "vt100" },
59 { NULL, NULL },
60 };
61
62 struct pakfire_jail {
63 struct pakfire* pakfire;
64 int nrefs;
65
66 // Flags
67 int flags;
68
69 // Environment
70 char* env[ENVIRON_SIZE];
71
72 // Logging
73 pakfire_jail_log_callback log_callback;
74 void* log_data;
75 };
76
77 struct pakfire_log_buffer {
78 char data[BUFFER_SIZE];
79 size_t used;
80 };
81
82 struct pakfire_jail_exec {
83 // PID (of the child)
84 pid_t pid;
85
86 // Process status (from waitpid)
87 int status;
88
89 // FD to notify the client that the parent has finished initialization
90 int completed_fd;
91
92 // Log pipes
93 struct {
94 int stdout[2];
95 int stderr[2];
96 } pipes;
97
98 // Log buffers
99 struct {
100 struct pakfire_log_buffer stdout;
101 struct pakfire_log_buffer stderr;
102 } buffers;
103 };
104
105 static int clone3(struct clone_args* args, size_t size) {
106 return syscall(__NR_clone3, args, size);
107 }
108
109 static void pakfire_jail_free(struct pakfire_jail* jail) {
110 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
111
112 // Free environment
113 for (unsigned int i = 0; jail->env[i]; i++)
114 free(jail->env[i]);
115
116 pakfire_unref(jail->pakfire);
117 free(jail);
118 }
119
120 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
121 int priority, const char* line, size_t length) {
122 switch (priority) {
123 case LOG_INFO:
124 INFO(pakfire, "%s", line);
125 break;
126
127 case LOG_ERR:
128 ERROR(pakfire, "%s", line);
129 break;
130 }
131
132 return 0;
133 }
134
135 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
136 // Set PS1
137 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
138 if (r)
139 return r;
140
141 // Copy TERM
142 char* TERM = secure_getenv("TERM");
143 if (TERM) {
144 r = pakfire_jail_set_env(jail, "TERM", TERM);
145 if (r)
146 return r;
147 }
148
149 // Copy LANG
150 char* LANG = secure_getenv("LANG");
151 if (LANG) {
152 r = pakfire_jail_set_env(jail, "LANG", LANG);
153 if (r)
154 return r;
155 }
156
157 return 0;
158 }
159
160 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail,
161 struct pakfire* pakfire, int flags) {
162 int r;
163
164 // Allocate a new jail
165 struct pakfire_jail* j = calloc(1, sizeof(*j));
166 if (!j)
167 return 1;
168
169 // Reference Pakfire
170 j->pakfire = pakfire_ref(pakfire);
171
172 // Initialize reference counter
173 j->nrefs = 1;
174
175 // Store flags
176 j->flags = flags;
177
178 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
179
180 // Set default log callback
181 r = pakfire_jail_set_log_callback(j, pakfire_jail_default_log_callback, NULL);
182 if (r)
183 goto ERROR;
184
185 // Set default environment
186 for (const struct environ* e = ENV; e->key; e++) {
187 r = pakfire_jail_set_env(j, e->key, e->val);
188 if (r)
189 goto ERROR;
190 }
191
192 // Setup interactive stuff
193 if (j->flags & PAKFIRE_JAIL_INTERACTIVE) {
194 r = pakfire_jail_setup_interactive_env(j);
195 if (r)
196 goto ERROR;
197 }
198
199 // Done
200 *jail = j;
201 return 0;
202
203 ERROR:
204 pakfire_jail_free(j);
205
206 return r;
207 }
208
209 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
210 ++jail->nrefs;
211
212 return jail;
213 }
214
215 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
216 if (--jail->nrefs > 0)
217 return jail;
218
219 pakfire_jail_free(jail);
220 return NULL;
221 }
222
223 static int pakfire_jail_has_flag(struct pakfire_jail* jail, int flag) {
224 return jail->flags & flag;
225 }
226
227 // Environment
228
229 // Returns the length of the environment
230 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
231 unsigned int i = 0;
232
233 // Count everything in the environment
234 for (char** e = jail->env; *e; e++)
235 i++;
236
237 return i;
238 }
239
240 // Finds an existing environment variable and returns its index or -1 if not found
241 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
242 if (!key) {
243 errno = EINVAL;
244 return -1;
245 }
246
247 char buffer[strlen(key) + 2];
248 pakfire_string_format(buffer, "%s=", key);
249
250 for (unsigned int i = 0; jail->env[i]; i++) {
251 if (pakfire_string_startswith(jail->env[i], buffer))
252 return i;
253 }
254
255 // Nothing found
256 return -1;
257 }
258
259 // Returns the value of an environment variable or NULL
260 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
261 const char* key) {
262 int i = pakfire_jail_find_env(jail, key);
263 if (i < 0)
264 return NULL;
265
266 return jail->env[i] + strlen(key) + 1;
267 }
268
269 // Sets an environment variable
270 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
271 const char* key, const char* value) {
272 // Find the index where to write this value to
273 int i = pakfire_jail_find_env(jail, key);
274 if (i < 0)
275 i = pakfire_jail_env_length(jail);
276
277 // Return -ENOSPC when the environment is full
278 if (i >= ENVIRON_SIZE) {
279 errno = ENOSPC;
280 return -1;
281 }
282
283 // Free any previous value
284 if (jail->env[i])
285 free(jail->env[i]);
286
287 // Format and set environment variable
288 asprintf(&jail->env[i], "%s=%s", key, value);
289
290 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
291
292 return 0;
293 }
294
295 // Imports an environment
296 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
297 if (!env)
298 return 0;
299
300 char* key;
301 char* val;
302 int r;
303
304 // Copy environment variables
305 for (unsigned int i = 0; env[i]; i++) {
306 r = pakfire_string_partition(env[i], "=", &key, &val);
307 if (r)
308 continue;
309
310 // Set value
311 r = pakfire_jail_set_env(jail, key, val);
312
313 if (key)
314 free(key);
315 if (val)
316 free(val);
317
318 // Break on error
319 if (r)
320 return r;
321 }
322
323 return 0;
324 }
325
326 // Logging
327
328 PAKFIRE_EXPORT int pakfire_jail_set_log_callback(struct pakfire_jail* jail,
329 pakfire_jail_log_callback callback, void* data) {
330 jail->log_callback = callback;
331 jail->log_data = data;
332
333 return 0;
334 }
335
336 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
337 return (sizeof(buffer->data) == buffer->used);
338 }
339
340 /*
341 This function reads as much data as it can from the file descriptor.
342 If it finds a whole line in it, it will send it to the logger and repeat the process.
343 If not newline character is found, it will try to read more data until it finds one.
344 */
345 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
346 struct pakfire_jail_exec* ctx, int priority, int fd, struct pakfire_log_buffer* buffer) {
347 char line[BUFFER_SIZE + 1];
348
349 // Fill up buffer from fd
350 if (buffer->used < sizeof(buffer->data)) {
351 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
352 sizeof(buffer->data) - buffer->used);
353
354 // Handle errors
355 if (bytes_read < 0) {
356 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
357 return -1;
358 }
359
360 // Update buffer size
361 buffer->used += bytes_read;
362 }
363
364 // See if we have any lines that we can write
365 while (buffer->used) {
366 // Search for the end of the first line
367 char* eol = memchr(buffer->data, '\n', buffer->used);
368
369 // No newline found
370 if (!eol) {
371 // If the buffer is full, we send the content to the logger and try again
372 // This should not happen in practise
373 if (pakfire_jail_log_buffer_is_full(buffer)) {
374 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
375
376 eol = buffer->data + sizeof(buffer->data) - 1;
377
378 // Otherwise we might have only read parts of the output
379 } else
380 break;
381 }
382
383 // Find the length of the string
384 size_t length = eol - buffer->data + 1;
385
386 // Copy the line into the buffer
387 memcpy(line, buffer->data, length);
388
389 // Terminate the string
390 line[length] = '\0';
391
392 // Log the line
393 if (jail->log_callback) {
394 int r = jail->log_callback(jail->pakfire, jail->log_data, priority, line, length);
395 if (r) {
396 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
397 return r;
398 }
399 }
400
401 // Remove line from buffer
402 memmove(buffer->data, buffer->data + length, buffer->used - length);
403 buffer->used -= length;
404 }
405
406 return 0;
407 }
408
409 static int pakfire_jail_logger(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
410 int epollfd = -1;
411 struct epoll_event ev;
412 struct epoll_event events[EPOLL_MAX_EVENTS];
413 int r = 0;
414
415 // Fetch file descriptors from context
416 const int stdout = ctx->pipes.stdout[1];
417 const int stderr = ctx->pipes.stderr[1];
418
419 int fds[2] = {
420 stdout, stderr,
421 };
422
423 // Setup epoll
424 epollfd = epoll_create1(0);
425 if (epollfd < 0) {
426 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
427 r = 1;
428 goto OUT;
429 }
430
431 ev.events = EPOLLIN;
432
433 // Turn file descriptors into non-blocking mode and add them to epoll()
434 for (unsigned int i = 0; i < 2; i++) {
435 int fd = fds[i];
436
437 ev.data.fd = fd;
438
439 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
440 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
441 r = 1;
442 goto OUT;
443 }
444 }
445
446 int ended = 0;
447
448 // Loop for as long as the process is alive
449 while (!ended) {
450 // If waitpid() returns non-zero, the process has ended, but we want to perform
451 // one last iteration over the loop to read any remaining content from the file
452 // descriptor buffers.
453 r = waitpid(ctx->pid, &ctx->status, WNOHANG);
454 if (r)
455 ended = 1;
456
457 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
458 if (num < 1) {
459 // Ignore if epoll_wait() has been interrupted
460 if (errno == EINTR)
461 continue;
462
463 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
464 r = 1;
465
466 goto OUT;
467 }
468
469 struct pakfire_log_buffer* buffer;
470 int priority;
471
472 for (int i = 0; i < num; i++) {
473 int fd = events[i].data.fd;
474
475 if (fd == stdout) {
476 buffer = &ctx->buffers.stdout;
477 priority = LOG_INFO;
478
479 } else if (fd == stderr) {
480 buffer = &ctx->buffers.stderr;
481 priority = LOG_ERR;
482
483 } else {
484 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
485 continue;
486 }
487
488 // Handle log event
489 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer);
490 if (r)
491 goto OUT;
492 }
493 }
494
495 OUT:
496 if (epollfd > 0)
497 close(epollfd);
498
499 return r;
500 }
501
502 static int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data, int priority,
503 const char* line, size_t length) {
504 char*** array = (char***)data;
505
506 // Append everything from stdout to an array
507 if (priority == LOG_INFO) {
508 length = 0;
509
510 // Create a copy of line
511 char* message = strdup(line);
512 if (!message)
513 return 1;
514
515 // Remove any trailing newline
516 pakfire_remove_trailing_newline(message);
517
518 // Determine the length of the existing array
519 if (*array) {
520 for (char** element = *array; *element; element++)
521 length++;
522 }
523
524 // Allocate space
525 *array = reallocarray(*array, length + 2, sizeof(**array));
526 if (!*array)
527 return 1;
528
529 // Append message and terminate the array
530 (*array)[length] = message;
531 (*array)[length + 1] = NULL;
532
533 return 0;
534 }
535
536 // Send everything else to the default logger
537 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
538 }
539
540 // Capabilities
541
542 static int pakfire_jail_drop_capabilities(struct pakfire_jail* jail) {
543 const int capabilities[] = {
544 // Deny access to the kernel's audit system
545 CAP_AUDIT_CONTROL,
546 CAP_AUDIT_READ,
547 CAP_AUDIT_WRITE,
548
549 // Deny suspending block devices
550 CAP_BLOCK_SUSPEND,
551
552 // Deny any stuff with BPF
553 CAP_BPF,
554
555 // Deny checkpoint restore
556 CAP_CHECKPOINT_RESTORE,
557
558 // Deny opening files by inode number (open_by_handle_at)
559 CAP_DAC_READ_SEARCH,
560
561 // Deny setting SUID bits
562 CAP_FSETID,
563
564 // Deny locking more memory
565 CAP_IPC_LOCK,
566
567 // Deny modifying any Apparmor/SELinux/SMACK configuration
568 CAP_MAC_ADMIN,
569 CAP_MAC_OVERRIDE,
570
571 // Deny creating any special devices
572 CAP_MKNOD,
573
574 // Deny setting any capabilities
575 CAP_SETFCAP,
576
577 // Deny reading from syslog
578 CAP_SYSLOG,
579
580 // Deny any admin actions (mount, sethostname, ...)
581 CAP_SYS_ADMIN,
582
583 // Deny rebooting the system
584 CAP_SYS_BOOT,
585
586 // Deny loading kernel modules
587 CAP_SYS_MODULE,
588
589 // Deny setting nice level
590 CAP_SYS_NICE,
591
592 // Deny access to /proc/kcore, /dev/mem, /dev/kmem
593 CAP_SYS_RAWIO,
594
595 // Deny circumventing any resource limits
596 CAP_SYS_RESOURCE,
597
598 // Deny setting the system time
599 CAP_SYS_TIME,
600
601 // Deny playing with suspend
602 CAP_WAKE_ALARM,
603
604 0,
605 };
606
607 DEBUG(jail->pakfire, "Dropping capabilities...\n");
608
609 size_t num_caps = 0;
610 int r;
611
612 // Drop any capabilities
613 for (const int* cap = capabilities; *cap; cap++) {
614 r = prctl(PR_CAPBSET_DROP, *cap, 0, 0, 0);
615 if (r) {
616 ERROR(jail->pakfire, "Could not drop capability %d: %m\n", *cap);
617 return r;
618 }
619
620 num_caps++;
621 }
622
623 // Fetch any capabilities
624 cap_t caps = cap_get_proc();
625 if (!caps) {
626 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
627 return 1;
628 }
629
630 /*
631 Set inheritable capabilities
632
633 This ensures that no processes will be able to gain any of the listed
634 capabilities again.
635 */
636 r = cap_set_flag(caps, CAP_INHERITABLE, num_caps, capabilities, CAP_CLEAR);
637 if (r) {
638 ERROR(jail->pakfire, "cap_set_flag() failed: %m\n");
639 goto ERROR;
640 }
641
642 // Restore capabilities
643 r = cap_set_proc(caps);
644 if (r) {
645 ERROR(jail->pakfire, "Could not restore capabilities: %m\n");
646 goto ERROR;
647 }
648
649 ERROR:
650 if (caps)
651 cap_free(caps);
652
653 return r;
654 }
655
656 // Syscall Filter
657
658 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
659 const int syscalls[] = {
660 // The kernel's keyring isn't namespaced
661 SCMP_SYS(keyctl),
662 SCMP_SYS(add_key),
663 SCMP_SYS(request_key),
664
665 // Disable userfaultfd
666 SCMP_SYS(userfaultfd),
667
668 // Disable perf which could leak a lot of information about the host
669 SCMP_SYS(perf_event_open),
670
671 0,
672 };
673 int r = 1;
674
675 DEBUG(jail->pakfire, "Applying syscall filter...\n");
676
677 // Setup a syscall filter which allows everything by default
678 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
679 if (!ctx) {
680 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
681 goto ERROR;
682 }
683
684 // All all syscalls
685 for (const int* syscall = syscalls; *syscall; syscall++) {
686 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
687 if (r) {
688 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
689 goto ERROR;
690 }
691 }
692
693 // Load syscall filter into the kernel
694 r = seccomp_load(ctx);
695 if (r) {
696 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
697 goto ERROR;
698 }
699
700 ERROR:
701 if (ctx)
702 seccomp_release(ctx);
703
704 return r;
705 }
706
707 // UID/GID Mapping
708
709 static int pakfire_jail_write_uidgid_mapping(struct pakfire_jail* jail,
710 const char* path, uid_t mapped_id, size_t length) {
711 int r = 1;
712
713 // Open file for writing
714 FILE* f = fopen(path, "w");
715 if (!f) {
716 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
717 goto ERROR;
718 }
719
720 // Write configuration
721 int bytes_written = fprintf(f, "%d %d %ld\n", 0, mapped_id, length);
722 if (bytes_written <= 0) {
723 ERROR(jail->pakfire, "Could not write UID/GID mapping: %m\n");
724 goto ERROR;
725 }
726
727 // Close the file
728 r = fclose(f);
729 f = NULL;
730 if (r) {
731 ERROR(jail->pakfire, "Could not write UID/GID mapping: %m\n");
732
733 goto ERROR;
734 }
735
736 // Success
737 r = 0;
738
739 ERROR:
740 if (f)
741 fclose(f);
742
743 return r;
744 }
745
746 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
747 char path[PATH_MAX];
748 int r;
749
750 uid_t mapped_uid = 0;
751 const size_t length = 1;
752
753 // Fetch the UID of the calling process
754 uid_t uid = getuid();
755
756 // Have we been called by root?
757 if (uid == 0) {
758 mapped_uid = 0;
759
760 // Have we been called by an unprivileged user?
761 } else {
762 // XXX fetch SUBUID
763 mapped_uid = uid;
764 }
765
766 // Make path
767 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
768 if (r < 0)
769 return 1;
770
771 DEBUG(jail->pakfire, "Mapping UID range (%u - %lu)\n", mapped_uid, mapped_uid + length);
772
773 return pakfire_jail_write_uidgid_mapping(jail, path, mapped_uid, length);
774 }
775
776 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
777 char path[PATH_MAX];
778 int r;
779
780 gid_t mapped_gid = 0;
781 const size_t length = 1;
782
783 // Fetch the GID of the calling process
784 gid_t gid = getgid();
785
786 // Have we been called from the root group?
787 if (gid == 0) {
788 mapped_gid = 0;
789
790 // Have we been called by an unprivileged group?
791 } else {
792 // XXX fetch SUBGID
793 mapped_gid = gid;
794 }
795
796 // Make path
797 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
798 if (r < 0)
799 return 1;
800
801 DEBUG(jail->pakfire, "Mapping GID range (%u - %lu)\n", mapped_gid, mapped_gid + length);
802
803 return pakfire_jail_write_uidgid_mapping(jail, path, mapped_gid, length);
804 }
805
806 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
807 char path[PATH_MAX];
808 int r = 1;
809
810 // Make path
811 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
812 if (r < 0)
813 return 1;
814
815 // Open file for writing
816 FILE* f = fopen(path, "w");
817 if (!f) {
818 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
819 goto ERROR;
820 }
821
822 // Write content
823 int bytes_written = fprintf(f, "deny\n");
824 if (bytes_written <= 0) {
825 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
826 goto ERROR;
827 }
828
829 r = fclose(f);
830 f = NULL;
831 if (r) {
832 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
833 goto ERROR;
834 }
835
836 ERROR:
837 if (f)
838 fclose(f);
839
840 return r;
841 }
842
843 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
844 const uint64_t val = 1;
845 int r = 0;
846
847 DEBUG(jail->pakfire, "Sending signal...\n");
848
849 // Write to the file descriptor
850 ssize_t bytes_written = write(fd, &val, sizeof(val));
851 if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
852 ERROR(jail->pakfire, "Could not send signal: %m\n");
853 r = 1;
854 }
855
856 // Close the file descriptor
857 close(fd);
858
859 return r;
860 }
861
862 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
863 uint64_t val = 0;
864 int r = 0;
865
866 DEBUG(jail->pakfire, "Waiting for signal...\n");
867
868 ssize_t bytes_read = read(fd, &val, sizeof(val));
869 if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
870 ERROR(jail->pakfire, "Error waiting for signal: %m\n");
871 r = 1;
872 }
873
874 // Close the file descriptor
875 close(fd);
876
877 return r;
878 }
879
880 /*
881 Performs the initialisation that needs to happen in the parent part
882 */
883 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
884 int r;
885
886 // Setup UID mapping
887 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
888 if (r)
889 return r;
890
891 // Write "deny" to /proc/PID/setgroups
892 r = pakfire_jail_setgroups(jail, ctx->pid);
893 if (r)
894 return r;
895
896 // Setup GID mapping
897 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
898 if (r)
899 return r;
900
901 // Parent has finished initialisation
902 DEBUG(jail->pakfire, "Parent has finished initialization\n");
903
904 // Send signal to client
905 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
906 if (r)
907 return r;
908
909 return 0;
910 }
911
912 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
913 const char* argv[]) {
914 int r;
915
916 // XXX do we have to reconfigure logging here?
917
918 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", getpid());
919
920 // Wait for the parent to finish initialization
921 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
922 if (r)
923 return r;
924
925 // Perform further initialization
926
927 // Fetch UID/GID
928 uid_t uid = getuid();
929 gid_t gid = getgid();
930
931 // Fetch EUID/EGID
932 uid_t euid = geteuid();
933 gid_t egid = getegid();
934
935 DEBUG(jail->pakfire, " UID: %d (effective %d)\n", uid, euid);
936 DEBUG(jail->pakfire, " GID: %d (effective %d)\n", gid, egid);
937
938 // Check if we are (effectively running as root)
939 if (uid != 0 || gid != 0) {
940 ERROR(jail->pakfire, "Child process is not running as root\n");
941 return 126;
942 }
943
944 const char* root = pakfire_get_path(jail->pakfire);
945 const char* arch = pakfire_get_arch(jail->pakfire);
946
947 // Change root (unless root is /)
948 if (!pakfire_on_root(jail->pakfire)) {
949 // Mount everything
950 r = pakfire_mount_all(jail->pakfire);
951 if (r)
952 return r;
953
954 // Log all mountpoints
955 pakfire_mount_list(jail->pakfire);
956
957 // Call chroot()
958 r = chroot(root);
959 if (r) {
960 ERROR(jail->pakfire, "chroot() to %s failed: %m\n", root);
961 return 1;
962 }
963
964 // Change directory to /
965 r = chdir("/");
966 if (r) {
967 ERROR(jail->pakfire, "chdir() after chroot() failed: %m\n");
968 return 1;
969 }
970 }
971
972 // Set personality
973 unsigned long persona = pakfire_arch_personality(arch);
974 if (persona) {
975 r = personality(persona);
976 if (r < 0) {
977 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
978 return 1;
979 }
980 }
981
982 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
983 r = pakfire_rlimit_reset_nofile(jail->pakfire);
984 if (r)
985 return r;
986
987 // Drop capabilities
988 r = pakfire_jail_drop_capabilities(jail);
989 if (r)
990 return r;
991
992 // Filter syscalls
993 r = pakfire_jail_limit_syscalls(jail);
994 if (r)
995 return r;
996
997 // exec() command
998 r = execvpe(argv[0], (char**)argv, jail->env);
999 if (r < 0)
1000 ERROR(jail->pakfire, "Could not execve(): %m\n");
1001
1002 // Translate errno into regular exit code
1003 switch (errno) {
1004 case ENOENT:
1005 r = 127;
1006 break;
1007
1008 default:
1009 r = 1;
1010 }
1011
1012 // We should not get here
1013 return r;
1014 }
1015
1016 // Run a command in the jail
1017 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[]) {
1018 int exit = -1;
1019 int r;
1020
1021 // Check if argv is valid
1022 if (!argv || !argv[0]) {
1023 errno = EINVAL;
1024 return -1;
1025 }
1026
1027 // Initialize context for this call
1028 struct pakfire_jail_exec ctx = {
1029 .pipes = {
1030 .stdout = { 0, 0, },
1031 .stderr = { 0, 0, },
1032 },
1033 .status = 0,
1034 };
1035
1036 DEBUG(jail->pakfire, "Executing jail...\n");
1037
1038 /*
1039 Setup a file descriptor which can be used to notify the client that the parent
1040 has completed configuration.
1041 */
1042 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1043 if (ctx.completed_fd < 0) {
1044 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1045 return -1;
1046 }
1047
1048 // Create pipes to communicate with child process if we are not running interactively
1049 if (!pakfire_jail_has_flag(jail, PAKFIRE_JAIL_INTERACTIVE)) {
1050 // stdout
1051 r = pipe2(ctx.pipes.stdout, O_NONBLOCK);
1052 if (r < 0) {
1053 ERROR(jail->pakfire, "Could not create file descriptors for stdout: %m\n");
1054 goto ERROR;
1055 }
1056
1057 // stderr
1058 r = pipe2(ctx.pipes.stderr, O_NONBLOCK);
1059 if (r < 0) {
1060 ERROR(jail->pakfire, "Could not create file descriptors for stderr: %m\n");
1061 goto ERROR;
1062 }
1063 }
1064
1065 // Configure child process
1066 struct clone_args args = {
1067 .flags =
1068 CLONE_NEWCGROUP |
1069 CLONE_NEWIPC |
1070 CLONE_NEWNS |
1071 CLONE_NEWPID |
1072 CLONE_NEWUSER |
1073 CLONE_NEWUTS,
1074 .exit_signal = SIGCHLD,
1075 };
1076
1077 // Fork this process
1078 ctx.pid = clone3(&args, sizeof(args));
1079 if (ctx.pid < 0) {
1080 ERROR(jail->pakfire, "Could not clone: %m\n");
1081 return -1;
1082
1083 // Child process
1084 } else if (ctx.pid == 0) {
1085 r = pakfire_jail_child(jail, &ctx, argv);
1086 _exit(r);
1087 }
1088
1089 // Parent process
1090 r = pakfire_jail_parent(jail, &ctx);
1091 if (r)
1092 goto ERROR;
1093
1094 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
1095
1096 // Read output of the child process
1097 if (!pakfire_jail_has_flag(jail, PAKFIRE_JAIL_INTERACTIVE)) {
1098 r = pakfire_jail_logger(jail, &ctx);
1099 if (r)
1100 ERROR(jail->pakfire, "Log reading aborted: %m\n");
1101 }
1102
1103 if (!ctx.status)
1104 waitpid(ctx.pid, &ctx.status, 0);
1105
1106 if (WIFEXITED(ctx.status)) {
1107 exit = WEXITSTATUS(ctx.status);
1108
1109 DEBUG(jail->pakfire, "Child process exited with code: %d\n", exit);
1110 } else {
1111 ERROR(jail->pakfire, "Could not determine the exit status of process %d\n", ctx.pid);
1112
1113 errno = ESRCH;
1114 exit = -1;
1115 }
1116
1117 ERROR:
1118 // Close any file descriptors
1119 if (ctx.pipes.stdout[0])
1120 close(ctx.pipes.stdout[0]);
1121 if (ctx.pipes.stdout[1])
1122 close(ctx.pipes.stdout[1]);
1123 if (ctx.pipes.stderr[0])
1124 close(ctx.pipes.stderr[0]);
1125 if (ctx.pipes.stderr[1])
1126 close(ctx.pipes.stderr[1]);
1127
1128 // Umount everything
1129 if (!pakfire_on_root(jail->pakfire))
1130 pakfire_umount_all(jail->pakfire);
1131
1132 return exit;
1133 }
1134
1135 PAKFIRE_EXPORT int pakfire_jail_exec(struct pakfire_jail* jail,
1136 const char* argv[], char*** output) {
1137 int r;
1138
1139 // Store logging callback
1140 pakfire_jail_log_callback log_callback = jail->log_callback;
1141 void* log_data = jail->log_data;
1142
1143 // Capture output if requested by user
1144 if (output)
1145 pakfire_jail_set_log_callback(jail, pakfire_jail_capture_stdout, output);
1146
1147 // Run exec()
1148 r = __pakfire_jail_exec(jail, argv);
1149
1150 // Restore log callback
1151 pakfire_jail_set_log_callback(jail, log_callback, log_data);
1152
1153 return r;
1154 }
1155
1156 PAKFIRE_EXPORT int pakfire_jail_exec_script(struct pakfire_jail* jail,
1157 const char* script, const size_t size, const char* args[], char*** output) {
1158 char path[PATH_MAX];
1159 const char** argv = NULL;
1160 int r;
1161
1162 const char* root = pakfire_get_path(jail->pakfire);
1163
1164 // Write the scriptlet to disk
1165 r = pakfire_path_join(path, root, "pakfire-script.XXXXXX");
1166 if (r < 0)
1167 goto ERROR;
1168
1169 // Open a temporary file
1170 int fd = mkstemp(path);
1171 if (fd < 0) {
1172 ERROR(jail->pakfire, "Could not open a temporary file: %m\n");
1173 r = 1;
1174 goto ERROR;
1175 }
1176
1177 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
1178
1179 // Write data
1180 ssize_t bytes_written = write(fd, script, size);
1181 if (bytes_written < (ssize_t)size) {
1182 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
1183 r = 1;
1184 goto ERROR;
1185 }
1186
1187 // Make the script executable
1188 r = fchmod(fd, S_IRUSR|S_IWUSR|S_IXUSR);
1189 if (r) {
1190 ERROR(jail->pakfire, "Could not set executable permissions on %s: %m\n", path);
1191 goto ERROR;
1192 }
1193
1194 // Close file
1195 r = close(fd);
1196 if (r) {
1197 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
1198 r = 1;
1199 goto ERROR;
1200 }
1201
1202 // Count how many arguments were passed
1203 unsigned int argc = 1;
1204 if (args) {
1205 for (const char** arg = args; *arg; arg++)
1206 argc++;
1207 }
1208
1209 argv = calloc(argc + 1, sizeof(*argv));
1210 if (!argv) {
1211 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
1212 goto ERROR;
1213 }
1214
1215 // Set command
1216 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
1217
1218 // Copy args
1219 for (unsigned int i = 1; i < argc; i++)
1220 argv[i] = args[i-1];
1221
1222 // Run the script
1223 r = pakfire_jail_exec(jail, argv, output);
1224
1225 ERROR:
1226 if (argv)
1227 free(argv);
1228
1229 // Remove script from disk
1230 if (*path)
1231 unlink(path);
1232
1233 return r;
1234 }
1235
1236 /*
1237 A convenience function that creates a new jail, runs the given command and destroys
1238 the jail again.
1239 */
1240 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char*** output) {
1241 struct pakfire_jail* jail = NULL;
1242 int r;
1243
1244 // Create a new jail
1245 r = pakfire_jail_create(&jail, pakfire, flags);
1246 if (r)
1247 goto ERROR;
1248
1249 // Execute the command
1250 r = pakfire_jail_exec(jail, argv, output);
1251
1252 ERROR:
1253 if (jail)
1254 pakfire_jail_unref(jail);
1255
1256 return r;
1257 }
1258
1259 int pakfire_jail_run_script(struct pakfire* pakfire,
1260 const char* script, const size_t length, const char* argv[], int flags, char*** output) {
1261 struct pakfire_jail* jail = NULL;
1262 int r;
1263
1264 // Create a new jail
1265 r = pakfire_jail_create(&jail, pakfire, flags);
1266 if (r)
1267 goto ERROR;
1268
1269 // Execute the command
1270 r = pakfire_jail_exec_script(jail, script, length, argv, output);
1271
1272 ERROR:
1273 if (jail)
1274 pakfire_jail_unref(jail);
1275
1276 return r;
1277 }
1278
1279
1280 int pakfire_jail_shell(struct pakfire* pakfire) {
1281 const char* argv[] = {
1282 "/bin/bash", "--login", NULL,
1283 };
1284
1285 // Execute /bin/bash
1286 return pakfire_jail_run(pakfire, argv, PAKFIRE_JAIL_INTERACTIVE, NULL);
1287 }
1288
1289 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
1290 char path[PATH_MAX];
1291
1292 const char* ldconfig = "/sbin/ldconfig";
1293
1294 // Check if ldconfig exists before calling it to avoid overhead
1295 int r = pakfire_make_path(pakfire, path, ldconfig);
1296 if (r < 0)
1297 return 1;
1298
1299 // Check if ldconfig is executable
1300 r = access(path, X_OK);
1301 if (r) {
1302 DEBUG(pakfire, "%s is not executable. Skipping...\n", ldconfig);
1303 return 0;
1304 }
1305
1306 const char* argv[] = {
1307 ldconfig, NULL,
1308 };
1309
1310 // Run ldconfig
1311 return pakfire_jail_run(pakfire, argv, 0, NULL);
1312 }