]> git.ipfire.org Git - pakfire.git/blob - src/libpakfire/jail.c
jail: Return any output as string
[pakfire.git] / src / libpakfire / jail.c
1 /*#############################################################################
2 # #
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
5 # #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
10 # #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
15 # #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
18 # #
19 #############################################################################*/
20
21 #include <errno.h>
22 #include <linux/capability.h>
23 #include <linux/fcntl.h>
24 #include <linux/sched.h>
25 #include <linux/wait.h>
26 #include <sched.h>
27 #include <signal.h>
28 #include <stdlib.h>
29 #include <syscall.h>
30 #include <sys/capability.h>
31 #include <sys/epoll.h>
32 #include <sys/eventfd.h>
33 #include <sys/personality.h>
34 #include <sys/prctl.h>
35 #include <sys/resource.h>
36 #include <sys/types.h>
37 #include <sys/wait.h>
38
39 // libseccomp
40 #include <seccomp.h>
41
42 #include <pakfire/arch.h>
43 #include <pakfire/cgroup.h>
44 #include <pakfire/jail.h>
45 #include <pakfire/logging.h>
46 #include <pakfire/mount.h>
47 #include <pakfire/pakfire.h>
48 #include <pakfire/private.h>
49 #include <pakfire/util.h>
50
51 #define BUFFER_SIZE 1024 * 64
52 #define ENVIRON_SIZE 128
53 #define EPOLL_MAX_EVENTS 2
54
55 // The default environment that will be set for every command
56 static const struct environ {
57 const char* key;
58 const char* val;
59 } ENV[] = {
60 { "LANG", "en_US.utf-8" },
61 { "TERM", "vt100" },
62 { NULL, NULL },
63 };
64
65 struct pakfire_jail {
66 struct pakfire* pakfire;
67 int nrefs;
68
69 // Flags
70 int flags;
71
72 // Resource Limits
73 int nice;
74
75 // CGroup
76 struct pakfire_cgroup* cgroup;
77
78 // Environment
79 char* env[ENVIRON_SIZE];
80
81 // Logging
82 pakfire_jail_log_callback log_callback;
83 void* log_data;
84 };
85
86 struct pakfire_log_buffer {
87 char data[BUFFER_SIZE];
88 size_t used;
89 };
90
91 struct pakfire_jail_exec {
92 // PID (of the child)
93 pid_t pid;
94 int pidfd;
95
96 // Process status (from waitid)
97 siginfo_t status;
98
99 // FD to notify the client that the parent has finished initialization
100 int completed_fd;
101
102 // Log pipes
103 struct pakfire_jail_pipes {
104 int stdout[2];
105 int stderr[2];
106
107 // Logging
108 int log_INFO[2];
109 int log_ERROR[2];
110 int log_DEBUG[2];
111 } pipes;
112
113 // Log buffers
114 struct pakfire_jail_buffers {
115 struct pakfire_log_buffer stdout;
116 struct pakfire_log_buffer stderr;
117
118 // Logging
119 struct pakfire_log_buffer log_INFO;
120 struct pakfire_log_buffer log_ERROR;
121 struct pakfire_log_buffer log_DEBUG;
122 } buffers;
123 };
124
125 static int clone3(struct clone_args* args, size_t size) {
126 return syscall(__NR_clone3, args, size);
127 }
128
129 static void pakfire_jail_free(struct pakfire_jail* jail) {
130 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
131
132 // Free environment
133 for (unsigned int i = 0; jail->env[i]; i++)
134 free(jail->env[i]);
135
136 if (jail->cgroup)
137 pakfire_cgroup_unref(jail->cgroup);
138
139 pakfire_unref(jail->pakfire);
140 free(jail);
141 }
142
143 /*
144 Passes any log messages on to the default pakfire log callback
145 */
146 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
147 int priority, const char* line, size_t length) {
148 switch (priority) {
149 case LOG_INFO:
150 INFO(pakfire, "%s", line);
151 break;
152
153 case LOG_ERR:
154 ERROR(pakfire, "%s", line);
155 break;
156
157 #ifdef ENABLE_DEBUG
158 case LOG_DEBUG:
159 DEBUG(pakfire, "%s", line);
160 break;
161 #endif
162 }
163
164 return 0;
165 }
166
167 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
168 // Set PS1
169 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
170 if (r)
171 return r;
172
173 // Copy TERM
174 char* TERM = secure_getenv("TERM");
175 if (TERM) {
176 r = pakfire_jail_set_env(jail, "TERM", TERM);
177 if (r)
178 return r;
179 }
180
181 // Copy LANG
182 char* LANG = secure_getenv("LANG");
183 if (LANG) {
184 r = pakfire_jail_set_env(jail, "LANG", LANG);
185 if (r)
186 return r;
187 }
188
189 return 0;
190 }
191
192 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail,
193 struct pakfire* pakfire, int flags) {
194 int r;
195
196 // Allocate a new jail
197 struct pakfire_jail* j = calloc(1, sizeof(*j));
198 if (!j)
199 return 1;
200
201 // Reference Pakfire
202 j->pakfire = pakfire_ref(pakfire);
203
204 // Initialize reference counter
205 j->nrefs = 1;
206
207 // Store flags
208 j->flags = flags;
209
210 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
211
212 // Set default log callback
213 r = pakfire_jail_set_log_callback(j, pakfire_jail_default_log_callback, NULL);
214 if (r)
215 goto ERROR;
216
217 // Set default environment
218 for (const struct environ* e = ENV; e->key; e++) {
219 r = pakfire_jail_set_env(j, e->key, e->val);
220 if (r)
221 goto ERROR;
222 }
223
224 // Setup interactive stuff
225 if (j->flags & PAKFIRE_JAIL_INTERACTIVE) {
226 r = pakfire_jail_setup_interactive_env(j);
227 if (r)
228 goto ERROR;
229 }
230
231 // Done
232 *jail = j;
233 return 0;
234
235 ERROR:
236 pakfire_jail_free(j);
237
238 return r;
239 }
240
241 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
242 ++jail->nrefs;
243
244 return jail;
245 }
246
247 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
248 if (--jail->nrefs > 0)
249 return jail;
250
251 pakfire_jail_free(jail);
252 return NULL;
253 }
254
255 static int pakfire_jail_has_flag(struct pakfire_jail* jail, int flag) {
256 return jail->flags & flag;
257 }
258
259 // Resource Limits
260
261 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
262 // Check if nice level is in range
263 if (nice < -19 || nice > 20) {
264 errno = EINVAL;
265 return 1;
266 }
267
268 // Store nice level
269 jail->nice = nice;
270
271 return 0;
272 }
273
274 int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
275 // Free any previous cgroup
276 if (jail->cgroup) {
277 pakfire_cgroup_unref(jail->cgroup);
278 jail->cgroup = NULL;
279 }
280
281 // Set any new cgroup
282 if (cgroup) {
283 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
284
285 jail->cgroup = pakfire_cgroup_ref(cgroup);
286 }
287
288 // Done
289 return 0;
290 }
291
292 // Environment
293
294 // Returns the length of the environment
295 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
296 unsigned int i = 0;
297
298 // Count everything in the environment
299 for (char** e = jail->env; *e; e++)
300 i++;
301
302 return i;
303 }
304
305 // Finds an existing environment variable and returns its index or -1 if not found
306 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
307 if (!key) {
308 errno = EINVAL;
309 return -1;
310 }
311
312 char buffer[strlen(key) + 2];
313 pakfire_string_format(buffer, "%s=", key);
314
315 for (unsigned int i = 0; jail->env[i]; i++) {
316 if (pakfire_string_startswith(jail->env[i], buffer))
317 return i;
318 }
319
320 // Nothing found
321 return -1;
322 }
323
324 // Returns the value of an environment variable or NULL
325 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
326 const char* key) {
327 int i = pakfire_jail_find_env(jail, key);
328 if (i < 0)
329 return NULL;
330
331 return jail->env[i] + strlen(key) + 1;
332 }
333
334 // Sets an environment variable
335 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
336 const char* key, const char* value) {
337 // Find the index where to write this value to
338 int i = pakfire_jail_find_env(jail, key);
339 if (i < 0)
340 i = pakfire_jail_env_length(jail);
341
342 // Return -ENOSPC when the environment is full
343 if (i >= ENVIRON_SIZE) {
344 errno = ENOSPC;
345 return -1;
346 }
347
348 // Free any previous value
349 if (jail->env[i])
350 free(jail->env[i]);
351
352 // Format and set environment variable
353 asprintf(&jail->env[i], "%s=%s", key, value);
354
355 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
356
357 return 0;
358 }
359
360 // Imports an environment
361 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
362 if (!env)
363 return 0;
364
365 char* key;
366 char* val;
367 int r;
368
369 // Copy environment variables
370 for (unsigned int i = 0; env[i]; i++) {
371 r = pakfire_string_partition(env[i], "=", &key, &val);
372 if (r)
373 continue;
374
375 // Set value
376 r = pakfire_jail_set_env(jail, key, val);
377
378 if (key)
379 free(key);
380 if (val)
381 free(val);
382
383 // Break on error
384 if (r)
385 return r;
386 }
387
388 return 0;
389 }
390
391 // Logging
392
393 PAKFIRE_EXPORT int pakfire_jail_set_log_callback(struct pakfire_jail* jail,
394 pakfire_jail_log_callback callback, void* data) {
395 jail->log_callback = callback;
396 jail->log_data = data;
397
398 return 0;
399 }
400
401 /*
402 This function replaces any logging in the child process.
403
404 All log messages will be sent to the parent process through their respective pipes.
405 */
406 static void pakfire_jail_log(void* data, int priority, const char* file,
407 int line, const char* fn, const char* format, va_list args) {
408 struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
409 int fd;
410
411 switch (priority) {
412 case LOG_INFO:
413 fd = pipes->log_INFO[1];
414 break;
415
416 case LOG_ERR:
417 fd = pipes->log_ERROR[1];
418 break;
419
420 #ifdef ENABLE_DEBUG
421 case LOG_DEBUG:
422 fd = pipes->log_DEBUG[1];
423 break;
424 #endif /* ENABLE_DEBUG */
425
426 // Ignore any messages of an unknown priority
427 default:
428 return;
429 }
430
431 // Send the log message
432 if (fd)
433 vdprintf(fd, format, args);
434 }
435
436 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
437 return (sizeof(buffer->data) == buffer->used);
438 }
439
440 /*
441 This function reads as much data as it can from the file descriptor.
442 If it finds a whole line in it, it will send it to the logger and repeat the process.
443 If not newline character is found, it will try to read more data until it finds one.
444 */
445 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
446 struct pakfire_jail_exec* ctx, int priority, int fd,
447 struct pakfire_log_buffer* buffer, pakfire_jail_log_callback callback, void* data) {
448 char line[BUFFER_SIZE + 1];
449
450 // Fill up buffer from fd
451 if (buffer->used < sizeof(buffer->data)) {
452 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
453 sizeof(buffer->data) - buffer->used);
454
455 // Handle errors
456 if (bytes_read < 0) {
457 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
458 return -1;
459 }
460
461 // Update buffer size
462 buffer->used += bytes_read;
463 }
464
465 // See if we have any lines that we can write
466 while (buffer->used) {
467 // Search for the end of the first line
468 char* eol = memchr(buffer->data, '\n', buffer->used);
469
470 // No newline found
471 if (!eol) {
472 // If the buffer is full, we send the content to the logger and try again
473 // This should not happen in practise
474 if (pakfire_jail_log_buffer_is_full(buffer)) {
475 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
476
477 eol = buffer->data + sizeof(buffer->data) - 1;
478
479 // Otherwise we might have only read parts of the output
480 } else
481 break;
482 }
483
484 // Find the length of the string
485 size_t length = eol - buffer->data + 1;
486
487 // Copy the line into the buffer
488 memcpy(line, buffer->data, length);
489
490 // Terminate the string
491 line[length] = '\0';
492
493 // Log the line
494 if (callback) {
495 int r = callback(jail->pakfire, data, priority, line, length);
496 if (r) {
497 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
498 return r;
499 }
500 }
501
502 // Remove line from buffer
503 memmove(buffer->data, buffer->data + length, buffer->used - length);
504 buffer->used -= length;
505 }
506
507 return 0;
508 }
509
510 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
511 int r = pipe2(*fds, flags);
512 if (r < 0) {
513 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
514 return 1;
515 }
516
517 return 0;
518 }
519
520 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
521 for (unsigned int i = 0; i < 2; i++)
522 if (fds[i])
523 close(fds[i]);
524 }
525
526 /*
527 This is a convenience function to fetch the reading end of a pipe and
528 closes the write end.
529 */
530 static int pakfire_jail_get_pipe(struct pakfire_jail* jail, int (*fds)[2]) {
531 // Give the variables easier names to avoid confusion
532 int* fd_read = &(*fds)[0];
533 int* fd_write = &(*fds)[1];
534
535 // Close the write end of the pipe
536 if (*fd_write) {
537 close(*fd_write);
538 *fd_write = 0;
539 }
540
541 // Return the read end
542 return *fd_read;
543 }
544
545 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
546 int epollfd = -1;
547 struct epoll_event ev;
548 struct epoll_event events[EPOLL_MAX_EVENTS];
549 int r = 0;
550
551 // Fetch file descriptors from context
552 const int stdout = pakfire_jail_get_pipe(jail, &ctx->pipes.stdout);
553 const int stderr = pakfire_jail_get_pipe(jail, &ctx->pipes.stderr);
554 const int pidfd = ctx->pidfd;
555
556 // Logging
557 const int log_INFO = pakfire_jail_get_pipe(jail, &ctx->pipes.log_INFO);
558 const int log_ERROR = pakfire_jail_get_pipe(jail, &ctx->pipes.log_ERROR);
559 const int log_DEBUG = pakfire_jail_get_pipe(jail, &ctx->pipes.log_DEBUG);
560
561 // Make a list of all file descriptors we are interested in
562 int fds[] = {
563 stdout, stderr, pidfd, log_INFO, log_ERROR, log_DEBUG,
564 };
565
566 // Setup epoll
567 epollfd = epoll_create1(0);
568 if (epollfd < 0) {
569 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
570 r = 1;
571 goto ERROR;
572 }
573
574 ev.events = EPOLLIN;
575
576 // Turn file descriptors into non-blocking mode and add them to epoll()
577 for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
578 int fd = fds[i];
579
580 // Skip fds which were not initialized
581 if (fd <= 0)
582 continue;
583
584 ev.data.fd = fd;
585
586 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
587 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
588 r = 1;
589 goto ERROR;
590 }
591 }
592
593 int ended = 0;
594
595 // Loop for as long as the process is alive
596 while (!ended) {
597 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
598 if (num < 1) {
599 // Ignore if epoll_wait() has been interrupted
600 if (errno == EINTR)
601 continue;
602
603 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
604 r = 1;
605
606 goto ERROR;
607 }
608
609 for (int i = 0; i < num; i++) {
610 int fd = events[i].data.fd;
611
612 struct pakfire_log_buffer* buffer = NULL;
613 pakfire_jail_log_callback callback = NULL;
614 void* data = NULL;
615 int priority;
616
617 // Handle any changes to the PIDFD
618 if (fd == pidfd) {
619 // Call waidid() and store the result
620 r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
621 if (r) {
622 ERROR(jail->pakfire, "waitid() failed: %m\n");
623 goto ERROR;
624 }
625
626 // Mark that we have ended so that we will process the remaining
627 // events from epoll() now, but won't restart the outer loop.
628 ended = 1;
629 continue;
630
631 // Handle logging messages
632 } else if (fd == log_INFO) {
633 buffer = &ctx->buffers.log_INFO;
634 priority = LOG_INFO;
635
636 callback = pakfire_jail_default_log_callback;
637
638 } else if (fd == log_ERROR) {
639 buffer = &ctx->buffers.log_ERROR;
640 priority = LOG_ERR;
641
642 callback = pakfire_jail_default_log_callback;
643
644 } else if (fd == log_DEBUG) {
645 buffer = &ctx->buffers.log_DEBUG;
646 priority = LOG_DEBUG;
647
648 callback = pakfire_jail_default_log_callback;
649
650 // Handle anything from the log pipes
651 } else if (fd == stdout) {
652 buffer = &ctx->buffers.stdout;
653 priority = LOG_INFO;
654
655 callback = jail->log_callback;
656 data = jail->log_data;
657
658 } else if (fd == stderr) {
659 buffer = &ctx->buffers.stderr;
660 priority = LOG_ERR;
661
662 callback = jail->log_callback;
663 data = jail->log_data;
664
665 } else {
666 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
667 continue;
668 }
669
670 // Handle log event
671 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
672 if (r)
673 goto ERROR;
674 }
675 }
676
677 ERROR:
678 if (epollfd > 0)
679 close(epollfd);
680
681 return r;
682 }
683
684 static int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data, int priority,
685 const char* line, size_t length) {
686 char** output = (char**)data;
687 int r;
688
689 // Append everything from stdout to a buffer
690 if (priority == LOG_INFO) {
691 r = asprintf(output, "%s%s", (output && *output) ? *output : "", line);
692 if (r < 0)
693 return 1;
694 return 0;
695 }
696
697 // Send everything else to the default logger
698 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
699 }
700
701 // Capabilities
702
703 static int pakfire_jail_drop_capabilities(struct pakfire_jail* jail) {
704 const int capabilities[] = {
705 // Deny access to the kernel's audit system
706 CAP_AUDIT_CONTROL,
707 CAP_AUDIT_READ,
708 CAP_AUDIT_WRITE,
709
710 // Deny suspending block devices
711 CAP_BLOCK_SUSPEND,
712
713 // Deny any stuff with BPF
714 CAP_BPF,
715
716 // Deny checkpoint restore
717 CAP_CHECKPOINT_RESTORE,
718
719 // Deny opening files by inode number (open_by_handle_at)
720 CAP_DAC_READ_SEARCH,
721
722 // Deny setting SUID bits
723 CAP_FSETID,
724
725 // Deny locking more memory
726 CAP_IPC_LOCK,
727
728 // Deny modifying any Apparmor/SELinux/SMACK configuration
729 CAP_MAC_ADMIN,
730 CAP_MAC_OVERRIDE,
731
732 // Deny creating any special devices
733 CAP_MKNOD,
734
735 // Deny setting any capabilities
736 CAP_SETFCAP,
737
738 // Deny reading from syslog
739 CAP_SYSLOG,
740
741 // Deny any admin actions (mount, sethostname, ...)
742 CAP_SYS_ADMIN,
743
744 // Deny rebooting the system
745 CAP_SYS_BOOT,
746
747 // Deny loading kernel modules
748 CAP_SYS_MODULE,
749
750 // Deny setting nice level
751 CAP_SYS_NICE,
752
753 // Deny access to /proc/kcore, /dev/mem, /dev/kmem
754 CAP_SYS_RAWIO,
755
756 // Deny circumventing any resource limits
757 CAP_SYS_RESOURCE,
758
759 // Deny setting the system time
760 CAP_SYS_TIME,
761
762 // Deny playing with suspend
763 CAP_WAKE_ALARM,
764
765 0,
766 };
767
768 DEBUG(jail->pakfire, "Dropping capabilities...\n");
769
770 size_t num_caps = 0;
771 int r;
772
773 // Drop any capabilities
774 for (const int* cap = capabilities; *cap; cap++) {
775 r = prctl(PR_CAPBSET_DROP, *cap, 0, 0, 0);
776 if (r) {
777 ERROR(jail->pakfire, "Could not drop capability %d: %m\n", *cap);
778 return r;
779 }
780
781 num_caps++;
782 }
783
784 // Fetch any capabilities
785 cap_t caps = cap_get_proc();
786 if (!caps) {
787 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
788 return 1;
789 }
790
791 /*
792 Set inheritable capabilities
793
794 This ensures that no processes will be able to gain any of the listed
795 capabilities again.
796 */
797 r = cap_set_flag(caps, CAP_INHERITABLE, num_caps, capabilities, CAP_CLEAR);
798 if (r) {
799 ERROR(jail->pakfire, "cap_set_flag() failed: %m\n");
800 goto ERROR;
801 }
802
803 // Restore capabilities
804 r = cap_set_proc(caps);
805 if (r) {
806 ERROR(jail->pakfire, "Could not restore capabilities: %m\n");
807 goto ERROR;
808 }
809
810 ERROR:
811 if (caps)
812 cap_free(caps);
813
814 return r;
815 }
816
817 // Syscall Filter
818
819 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
820 const int syscalls[] = {
821 // The kernel's keyring isn't namespaced
822 SCMP_SYS(keyctl),
823 SCMP_SYS(add_key),
824 SCMP_SYS(request_key),
825
826 // Disable userfaultfd
827 SCMP_SYS(userfaultfd),
828
829 // Disable perf which could leak a lot of information about the host
830 SCMP_SYS(perf_event_open),
831
832 0,
833 };
834 int r = 1;
835
836 DEBUG(jail->pakfire, "Applying syscall filter...\n");
837
838 // Setup a syscall filter which allows everything by default
839 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
840 if (!ctx) {
841 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
842 goto ERROR;
843 }
844
845 // All all syscalls
846 for (const int* syscall = syscalls; *syscall; syscall++) {
847 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
848 if (r) {
849 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
850 goto ERROR;
851 }
852 }
853
854 // Load syscall filter into the kernel
855 r = seccomp_load(ctx);
856 if (r) {
857 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
858 goto ERROR;
859 }
860
861 ERROR:
862 if (ctx)
863 seccomp_release(ctx);
864
865 return r;
866 }
867
868 // UID/GID Mapping
869
870 static int pakfire_jail_write_uidgid_mapping(struct pakfire_jail* jail,
871 const char* path, uid_t mapped_id, size_t length) {
872 int r = 1;
873
874 // Open file for writing
875 FILE* f = fopen(path, "w");
876 if (!f) {
877 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
878 goto ERROR;
879 }
880
881 // Write configuration
882 int bytes_written = fprintf(f, "%d %d %ld\n", 0, mapped_id, length);
883 if (bytes_written <= 0) {
884 ERROR(jail->pakfire, "Could not write UID/GID mapping: %m\n");
885 goto ERROR;
886 }
887
888 // Close the file
889 r = fclose(f);
890 f = NULL;
891 if (r) {
892 ERROR(jail->pakfire, "Could not write UID/GID mapping: %m\n");
893
894 goto ERROR;
895 }
896
897 // Success
898 r = 0;
899
900 ERROR:
901 if (f)
902 fclose(f);
903
904 return r;
905 }
906
907 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
908 char path[PATH_MAX];
909 int r;
910
911 uid_t mapped_uid = 0;
912 const size_t length = 1;
913
914 // Fetch the UID of the calling process
915 uid_t uid = getuid();
916
917 // Have we been called by root?
918 if (uid == 0) {
919 mapped_uid = 0;
920
921 // Have we been called by an unprivileged user?
922 } else {
923 // XXX fetch SUBUID
924 mapped_uid = uid;
925 }
926
927 // Make path
928 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
929 if (r < 0)
930 return 1;
931
932 DEBUG(jail->pakfire, "Mapping UID range (%u - %lu)\n", mapped_uid, mapped_uid + length);
933
934 return pakfire_jail_write_uidgid_mapping(jail, path, mapped_uid, length);
935 }
936
937 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
938 char path[PATH_MAX];
939 int r;
940
941 gid_t mapped_gid = 0;
942 const size_t length = 1;
943
944 // Fetch the GID of the calling process
945 gid_t gid = getgid();
946
947 // Have we been called from the root group?
948 if (gid == 0) {
949 mapped_gid = 0;
950
951 // Have we been called by an unprivileged group?
952 } else {
953 // XXX fetch SUBGID
954 mapped_gid = gid;
955 }
956
957 // Make path
958 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
959 if (r < 0)
960 return 1;
961
962 DEBUG(jail->pakfire, "Mapping GID range (%u - %lu)\n", mapped_gid, mapped_gid + length);
963
964 return pakfire_jail_write_uidgid_mapping(jail, path, mapped_gid, length);
965 }
966
967 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
968 char path[PATH_MAX];
969 int r = 1;
970
971 // Make path
972 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
973 if (r < 0)
974 return 1;
975
976 // Open file for writing
977 FILE* f = fopen(path, "w");
978 if (!f) {
979 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
980 goto ERROR;
981 }
982
983 // Write content
984 int bytes_written = fprintf(f, "deny\n");
985 if (bytes_written <= 0) {
986 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
987 goto ERROR;
988 }
989
990 r = fclose(f);
991 f = NULL;
992 if (r) {
993 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
994 goto ERROR;
995 }
996
997 ERROR:
998 if (f)
999 fclose(f);
1000
1001 return r;
1002 }
1003
1004 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1005 const uint64_t val = 1;
1006 int r = 0;
1007
1008 DEBUG(jail->pakfire, "Sending signal...\n");
1009
1010 // Write to the file descriptor
1011 ssize_t bytes_written = write(fd, &val, sizeof(val));
1012 if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
1013 ERROR(jail->pakfire, "Could not send signal: %m\n");
1014 r = 1;
1015 }
1016
1017 // Close the file descriptor
1018 close(fd);
1019
1020 return r;
1021 }
1022
1023 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1024 uint64_t val = 0;
1025 int r = 0;
1026
1027 DEBUG(jail->pakfire, "Waiting for signal...\n");
1028
1029 ssize_t bytes_read = read(fd, &val, sizeof(val));
1030 if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
1031 ERROR(jail->pakfire, "Error waiting for signal: %m\n");
1032 r = 1;
1033 }
1034
1035 // Close the file descriptor
1036 close(fd);
1037
1038 return r;
1039 }
1040
1041 /*
1042 Performs the initialisation that needs to happen in the parent part
1043 */
1044 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1045 int r;
1046
1047 // Setup UID mapping
1048 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
1049 if (r)
1050 return r;
1051
1052 // Write "deny" to /proc/PID/setgroups
1053 r = pakfire_jail_setgroups(jail, ctx->pid);
1054 if (r)
1055 return r;
1056
1057 // Setup GID mapping
1058 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
1059 if (r)
1060 return r;
1061
1062 // Parent has finished initialisation
1063 DEBUG(jail->pakfire, "Parent has finished initialization\n");
1064
1065 // Send signal to client
1066 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1067 if (r)
1068 return r;
1069
1070 return 0;
1071 }
1072
1073 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
1074 const char* argv[]) {
1075 int r;
1076
1077 // Redirect any logging to our log pipe
1078 pakfire_set_log_callback(jail->pakfire, pakfire_jail_log, &ctx->pipes);
1079
1080 // Fetch my own PID
1081 pid_t pid = getpid();
1082
1083 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
1084
1085 // Log argv
1086 for (unsigned int i = 0; argv[i]; i++)
1087 DEBUG(jail->pakfire, " argv[%d] = %s\n", i, argv[i]);
1088
1089 // Wait for the parent to finish initialization
1090 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1091 if (r)
1092 return r;
1093
1094 // Perform further initialization
1095
1096 // Fetch UID/GID
1097 uid_t uid = getuid();
1098 gid_t gid = getgid();
1099
1100 // Fetch EUID/EGID
1101 uid_t euid = geteuid();
1102 gid_t egid = getegid();
1103
1104 DEBUG(jail->pakfire, " UID: %d (effective %d)\n", uid, euid);
1105 DEBUG(jail->pakfire, " GID: %d (effective %d)\n", gid, egid);
1106
1107 // Check if we are (effectively running as root)
1108 if (uid != 0 || gid != 0) {
1109 ERROR(jail->pakfire, "Child process is not running as root\n");
1110 return 126;
1111 }
1112
1113 const char* root = pakfire_get_path(jail->pakfire);
1114 const char* arch = pakfire_get_arch(jail->pakfire);
1115
1116 // Change root (unless root is /)
1117 if (!pakfire_on_root(jail->pakfire)) {
1118 // Mount everything
1119 r = pakfire_mount_all(jail->pakfire);
1120 if (r)
1121 return r;
1122
1123 // Log all mountpoints
1124 pakfire_mount_list(jail->pakfire);
1125
1126 // Call chroot()
1127 r = chroot(root);
1128 if (r) {
1129 ERROR(jail->pakfire, "chroot() to %s failed: %m\n", root);
1130 return 1;
1131 }
1132
1133 // Change directory to /
1134 r = chdir("/");
1135 if (r) {
1136 ERROR(jail->pakfire, "chdir() after chroot() failed: %m\n");
1137 return 1;
1138 }
1139 }
1140
1141 // Set personality
1142 unsigned long persona = pakfire_arch_personality(arch);
1143 if (persona) {
1144 r = personality(persona);
1145 if (r < 0) {
1146 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1147 return 1;
1148 }
1149 }
1150
1151 // Set nice level
1152 if (jail->nice) {
1153 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1154
1155 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1156 if (r) {
1157 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1158 return 1;
1159 }
1160 }
1161
1162 // Close other end of log pipes
1163 close(ctx->pipes.log_INFO[0]);
1164 close(ctx->pipes.log_ERROR[0]);
1165 #ifdef ENABLE_DEBUG
1166 close(ctx->pipes.log_DEBUG[0]);
1167 #endif /* ENABLE_DEBUG */
1168
1169 // Connect standard output and error
1170 if (ctx->pipes.stdout[1] && ctx->pipes.stderr[1]) {
1171 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1172 if (r < 0) {
1173 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1174 ctx->pipes.stdout[1]);
1175
1176 return 1;
1177 }
1178
1179 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1180 if (r < 0) {
1181 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1182 ctx->pipes.stderr[1]);
1183
1184 return 1;
1185 }
1186
1187 // Close the pipe (as we have moved the original file descriptors)
1188 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1189 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
1190 }
1191
1192 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1193 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1194 if (r)
1195 return r;
1196
1197 // Drop capabilities
1198 r = pakfire_jail_drop_capabilities(jail);
1199 if (r)
1200 return r;
1201
1202 // Filter syscalls
1203 r = pakfire_jail_limit_syscalls(jail);
1204 if (r)
1205 return r;
1206
1207 // exec() command
1208 r = execvpe(argv[0], (char**)argv, jail->env);
1209 if (r < 0)
1210 ERROR(jail->pakfire, "Could not execve(): %m\n");
1211
1212 // Translate errno into regular exit code
1213 switch (errno) {
1214 case ENOENT:
1215 r = 127;
1216 break;
1217
1218 default:
1219 r = 1;
1220 }
1221
1222 // We should not get here
1223 return r;
1224 }
1225
1226 // Run a command in the jail
1227 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[]) {
1228 int exit = -1;
1229 int r;
1230
1231 // Check if argv is valid
1232 if (!argv || !argv[0]) {
1233 errno = EINVAL;
1234 return -1;
1235 }
1236
1237 // Initialize context for this call
1238 struct pakfire_jail_exec ctx = {
1239 .pipes = {
1240 .stdout = { 0, 0 },
1241 .stderr = { 0, 0 },
1242 },
1243 };
1244
1245 DEBUG(jail->pakfire, "Executing jail...\n");
1246
1247 /*
1248 Setup a file descriptor which can be used to notify the client that the parent
1249 has completed configuration.
1250 */
1251 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1252 if (ctx.completed_fd < 0) {
1253 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1254 return -1;
1255 }
1256
1257 // Create pipes to communicate with child process if we are not running interactively
1258 if (!pakfire_jail_has_flag(jail, PAKFIRE_JAIL_INTERACTIVE)) {
1259 // stdout
1260 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
1261 if (r)
1262 goto ERROR;
1263
1264 // stderr
1265 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
1266 if (r)
1267 goto ERROR;
1268 }
1269
1270 // Setup pipes for logging
1271 // INFO
1272 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
1273 if (r)
1274 goto ERROR;
1275
1276 // ERROR
1277 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
1278 if (r)
1279 goto ERROR;
1280
1281 #ifdef ENABLE_DEBUG
1282 // DEBUG
1283 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
1284 if (r)
1285 goto ERROR;
1286 #endif /* ENABLE_DEBUG */
1287
1288 // Configure child process
1289 struct clone_args args = {
1290 .flags =
1291 CLONE_NEWCGROUP |
1292 CLONE_NEWIPC |
1293 CLONE_NEWNS |
1294 CLONE_NEWPID |
1295 CLONE_NEWUSER |
1296 CLONE_NEWUTS |
1297 CLONE_PIDFD,
1298 .exit_signal = SIGCHLD,
1299 .pidfd = (long long unsigned int)&ctx.pidfd,
1300 };
1301
1302 // Launch the process in a cgroup (if requested)
1303 if (jail->cgroup) {
1304 args.flags |= CLONE_INTO_CGROUP;
1305
1306 // Clone into this cgroup
1307 args.cgroup = pakfire_cgroup_fd(jail->cgroup);
1308 }
1309
1310 // Fork this process
1311 ctx.pid = clone3(&args, sizeof(args));
1312 if (ctx.pid < 0) {
1313 ERROR(jail->pakfire, "Could not clone: %m\n");
1314 return -1;
1315
1316 // Child process
1317 } else if (ctx.pid == 0) {
1318 r = pakfire_jail_child(jail, &ctx, argv);
1319 _exit(r);
1320 }
1321
1322 // Parent process
1323 r = pakfire_jail_parent(jail, &ctx);
1324 if (r)
1325 goto ERROR;
1326
1327 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
1328
1329 // Read output of the child process
1330 r = pakfire_jail_wait(jail, &ctx);
1331 if (r)
1332 goto ERROR;
1333
1334 // Handle exit status
1335 switch (ctx.status.si_code) {
1336 case CLD_EXITED:
1337 DEBUG(jail->pakfire, "The child process exited with code %d\n",
1338 ctx.status.si_status);
1339
1340 // Pass exit code
1341 exit = ctx.status.si_status;
1342 break;
1343
1344 case CLD_KILLED:
1345 case CLD_DUMPED:
1346 ERROR(jail->pakfire, "The child process was killed\n");
1347 break;
1348
1349 // Log anything else
1350 default:
1351 ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
1352 break;
1353 }
1354
1355 ERROR:
1356 // Close any file descriptors
1357 pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
1358 pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
1359 if (ctx.pidfd)
1360 close(ctx.pidfd);
1361 pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
1362 pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
1363 pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
1364
1365 // Umount everything
1366 if (!pakfire_on_root(jail->pakfire))
1367 pakfire_umount_all(jail->pakfire);
1368
1369 return exit;
1370 }
1371
1372 PAKFIRE_EXPORT int pakfire_jail_exec(struct pakfire_jail* jail,
1373 const char* argv[], char** output) {
1374 int r;
1375
1376 // Store logging callback
1377 pakfire_jail_log_callback log_callback = jail->log_callback;
1378 void* log_data = jail->log_data;
1379
1380 // Capture output if requested by user
1381 if (output)
1382 pakfire_jail_set_log_callback(jail, pakfire_jail_capture_stdout, output);
1383
1384 // Run exec()
1385 r = __pakfire_jail_exec(jail, argv);
1386
1387 // Restore log callback
1388 pakfire_jail_set_log_callback(jail, log_callback, log_data);
1389
1390 return r;
1391 }
1392
1393 PAKFIRE_EXPORT int pakfire_jail_exec_script(struct pakfire_jail* jail,
1394 const char* script, const size_t size, const char* args[], char** output) {
1395 char path[PATH_MAX];
1396 const char** argv = NULL;
1397 int r;
1398
1399 const char* root = pakfire_get_path(jail->pakfire);
1400
1401 // Write the scriptlet to disk
1402 r = pakfire_path_join(path, root, "pakfire-script.XXXXXX");
1403 if (r < 0)
1404 goto ERROR;
1405
1406 // Open a temporary file
1407 int fd = mkstemp(path);
1408 if (fd < 0) {
1409 ERROR(jail->pakfire, "Could not open a temporary file: %m\n");
1410 r = 1;
1411 goto ERROR;
1412 }
1413
1414 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
1415
1416 // Write data
1417 ssize_t bytes_written = write(fd, script, size);
1418 if (bytes_written < (ssize_t)size) {
1419 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
1420 r = 1;
1421 goto ERROR;
1422 }
1423
1424 // Make the script executable
1425 r = fchmod(fd, S_IRUSR|S_IWUSR|S_IXUSR);
1426 if (r) {
1427 ERROR(jail->pakfire, "Could not set executable permissions on %s: %m\n", path);
1428 goto ERROR;
1429 }
1430
1431 // Close file
1432 r = close(fd);
1433 if (r) {
1434 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
1435 r = 1;
1436 goto ERROR;
1437 }
1438
1439 // Count how many arguments were passed
1440 unsigned int argc = 1;
1441 if (args) {
1442 for (const char** arg = args; *arg; arg++)
1443 argc++;
1444 }
1445
1446 argv = calloc(argc + 1, sizeof(*argv));
1447 if (!argv) {
1448 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
1449 goto ERROR;
1450 }
1451
1452 // Set command
1453 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
1454
1455 // Copy args
1456 for (unsigned int i = 1; i < argc; i++)
1457 argv[i] = args[i-1];
1458
1459 // Run the script
1460 r = pakfire_jail_exec(jail, argv, output);
1461
1462 ERROR:
1463 if (argv)
1464 free(argv);
1465
1466 // Remove script from disk
1467 if (*path)
1468 unlink(path);
1469
1470 return r;
1471 }
1472
1473 /*
1474 A convenience function that creates a new jail, runs the given command and destroys
1475 the jail again.
1476 */
1477 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
1478 struct pakfire_jail* jail = NULL;
1479 int r;
1480
1481 // Create a new jail
1482 r = pakfire_jail_create(&jail, pakfire, flags);
1483 if (r)
1484 goto ERROR;
1485
1486 // Execute the command
1487 r = pakfire_jail_exec(jail, argv, output);
1488
1489 ERROR:
1490 if (jail)
1491 pakfire_jail_unref(jail);
1492
1493 return r;
1494 }
1495
1496 int pakfire_jail_run_script(struct pakfire* pakfire,
1497 const char* script, const size_t length, const char* argv[], int flags, char** output) {
1498 struct pakfire_jail* jail = NULL;
1499 int r;
1500
1501 // Create a new jail
1502 r = pakfire_jail_create(&jail, pakfire, flags);
1503 if (r)
1504 goto ERROR;
1505
1506 // Execute the command
1507 r = pakfire_jail_exec_script(jail, script, length, argv, output);
1508
1509 ERROR:
1510 if (jail)
1511 pakfire_jail_unref(jail);
1512
1513 return r;
1514 }
1515
1516
1517 int pakfire_jail_shell(struct pakfire* pakfire) {
1518 const char* argv[] = {
1519 "/bin/bash", "--login", NULL,
1520 };
1521
1522 // Execute /bin/bash
1523 return pakfire_jail_run(pakfire, argv, PAKFIRE_JAIL_INTERACTIVE, NULL);
1524 }
1525
1526 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
1527 char path[PATH_MAX];
1528
1529 const char* ldconfig = "/sbin/ldconfig";
1530
1531 // Check if ldconfig exists before calling it to avoid overhead
1532 int r = pakfire_make_path(pakfire, path, ldconfig);
1533 if (r < 0)
1534 return 1;
1535
1536 // Check if ldconfig is executable
1537 r = access(path, X_OK);
1538 if (r) {
1539 DEBUG(pakfire, "%s is not executable. Skipping...\n", ldconfig);
1540 return 0;
1541 }
1542
1543 const char* argv[] = {
1544 ldconfig, NULL,
1545 };
1546
1547 // Run ldconfig
1548 return pakfire_jail_run(pakfire, argv, 0, NULL);
1549 }