]> git.ipfire.org Git - pakfire.git/blob - src/libpakfire/jail.c
jail: Check effective UID/GID for root as well
[pakfire.git] / src / libpakfire / jail.c
1 /*#############################################################################
2 # #
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
5 # #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
10 # #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
15 # #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
18 # #
19 #############################################################################*/
20
21 #include <errno.h>
22 #include <linux/capability.h>
23 #include <linux/fcntl.h>
24 #include <linux/sched.h>
25 #include <linux/wait.h>
26 #include <sched.h>
27 #include <signal.h>
28 #include <stdlib.h>
29 #include <syscall.h>
30 #include <sys/capability.h>
31 #include <sys/epoll.h>
32 #include <sys/eventfd.h>
33 #include <sys/personality.h>
34 #include <sys/prctl.h>
35 #include <sys/resource.h>
36 #include <sys/types.h>
37 #include <sys/wait.h>
38
39 // libseccomp
40 #include <seccomp.h>
41
42 #include <pakfire/arch.h>
43 #include <pakfire/cgroup.h>
44 #include <pakfire/jail.h>
45 #include <pakfire/logging.h>
46 #include <pakfire/mount.h>
47 #include <pakfire/pakfire.h>
48 #include <pakfire/private.h>
49 #include <pakfire/util.h>
50
51 #define BUFFER_SIZE 1024 * 64
52 #define ENVIRON_SIZE 128
53 #define EPOLL_MAX_EVENTS 2
54
55 // The default environment that will be set for every command
56 static const struct environ {
57 const char* key;
58 const char* val;
59 } ENV[] = {
60 { "LANG", "en_US.utf-8" },
61 { "TERM", "vt100" },
62 { NULL, NULL },
63 };
64
65 struct pakfire_jail {
66 struct pakfire* pakfire;
67 int nrefs;
68
69 // Flags
70 int flags;
71
72 // Resource Limits
73 int nice;
74
75 // CGroup
76 struct pakfire_cgroup* cgroup;
77
78 // Environment
79 char* env[ENVIRON_SIZE];
80
81 // Logging
82 pakfire_jail_log_callback log_callback;
83 void* log_data;
84 };
85
86 struct pakfire_log_buffer {
87 char data[BUFFER_SIZE];
88 size_t used;
89 };
90
91 struct pakfire_jail_exec {
92 // PID (of the child)
93 pid_t pid;
94 int pidfd;
95
96 // Process status (from waitid)
97 siginfo_t status;
98
99 // FD to notify the client that the parent has finished initialization
100 int completed_fd;
101
102 // Log pipes
103 struct pakfire_jail_pipes {
104 int stdout[2];
105 int stderr[2];
106
107 // Logging
108 int log_INFO[2];
109 int log_ERROR[2];
110 int log_DEBUG[2];
111 } pipes;
112
113 // Log buffers
114 struct pakfire_jail_buffers {
115 struct pakfire_log_buffer stdout;
116 struct pakfire_log_buffer stderr;
117
118 // Logging
119 struct pakfire_log_buffer log_INFO;
120 struct pakfire_log_buffer log_ERROR;
121 struct pakfire_log_buffer log_DEBUG;
122 } buffers;
123
124 struct pakfire_cgroup* cgroup;
125 struct pakfire_cgroup_stats cgroup_stats;
126 };
127
128 static int clone3(struct clone_args* args, size_t size) {
129 return syscall(__NR_clone3, args, size);
130 }
131
132 static void pakfire_jail_free(struct pakfire_jail* jail) {
133 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
134
135 // Free environment
136 for (unsigned int i = 0; jail->env[i]; i++)
137 free(jail->env[i]);
138
139 if (jail->cgroup)
140 pakfire_cgroup_unref(jail->cgroup);
141
142 pakfire_unref(jail->pakfire);
143 free(jail);
144 }
145
146 /*
147 Passes any log messages on to the default pakfire log callback
148 */
149 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
150 int priority, const char* line, size_t length) {
151 switch (priority) {
152 case LOG_INFO:
153 INFO(pakfire, "%s", line);
154 break;
155
156 case LOG_ERR:
157 ERROR(pakfire, "%s", line);
158 break;
159
160 #ifdef ENABLE_DEBUG
161 case LOG_DEBUG:
162 DEBUG(pakfire, "%s", line);
163 break;
164 #endif
165 }
166
167 return 0;
168 }
169
170 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
171 // Set PS1
172 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
173 if (r)
174 return r;
175
176 // Copy TERM
177 char* TERM = secure_getenv("TERM");
178 if (TERM) {
179 r = pakfire_jail_set_env(jail, "TERM", TERM);
180 if (r)
181 return r;
182 }
183
184 // Copy LANG
185 char* LANG = secure_getenv("LANG");
186 if (LANG) {
187 r = pakfire_jail_set_env(jail, "LANG", LANG);
188 if (r)
189 return r;
190 }
191
192 return 0;
193 }
194
195 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail,
196 struct pakfire* pakfire, int flags) {
197 int r;
198
199 // Allocate a new jail
200 struct pakfire_jail* j = calloc(1, sizeof(*j));
201 if (!j)
202 return 1;
203
204 // Reference Pakfire
205 j->pakfire = pakfire_ref(pakfire);
206
207 // Initialize reference counter
208 j->nrefs = 1;
209
210 // Store flags
211 j->flags = flags;
212
213 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
214
215 // Set default log callback
216 r = pakfire_jail_set_log_callback(j, pakfire_jail_default_log_callback, NULL);
217 if (r)
218 goto ERROR;
219
220 // Set default environment
221 for (const struct environ* e = ENV; e->key; e++) {
222 r = pakfire_jail_set_env(j, e->key, e->val);
223 if (r)
224 goto ERROR;
225 }
226
227 // Setup interactive stuff
228 if (j->flags & PAKFIRE_JAIL_INTERACTIVE) {
229 r = pakfire_jail_setup_interactive_env(j);
230 if (r)
231 goto ERROR;
232 }
233
234 // Done
235 *jail = j;
236 return 0;
237
238 ERROR:
239 pakfire_jail_free(j);
240
241 return r;
242 }
243
244 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
245 ++jail->nrefs;
246
247 return jail;
248 }
249
250 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
251 if (--jail->nrefs > 0)
252 return jail;
253
254 pakfire_jail_free(jail);
255 return NULL;
256 }
257
258 static int pakfire_jail_has_flag(struct pakfire_jail* jail, int flag) {
259 return jail->flags & flag;
260 }
261
262 // Resource Limits
263
264 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
265 // Check if nice level is in range
266 if (nice < -19 || nice > 20) {
267 errno = EINVAL;
268 return 1;
269 }
270
271 // Store nice level
272 jail->nice = nice;
273
274 return 0;
275 }
276
277 int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
278 // Free any previous cgroup
279 if (jail->cgroup) {
280 pakfire_cgroup_unref(jail->cgroup);
281 jail->cgroup = NULL;
282 }
283
284 // Set any new cgroup
285 if (cgroup) {
286 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
287
288 jail->cgroup = pakfire_cgroup_ref(cgroup);
289 }
290
291 // Done
292 return 0;
293 }
294
295 // Environment
296
297 // Returns the length of the environment
298 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
299 unsigned int i = 0;
300
301 // Count everything in the environment
302 for (char** e = jail->env; *e; e++)
303 i++;
304
305 return i;
306 }
307
308 // Finds an existing environment variable and returns its index or -1 if not found
309 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
310 if (!key) {
311 errno = EINVAL;
312 return -1;
313 }
314
315 char buffer[strlen(key) + 2];
316 pakfire_string_format(buffer, "%s=", key);
317
318 for (unsigned int i = 0; jail->env[i]; i++) {
319 if (pakfire_string_startswith(jail->env[i], buffer))
320 return i;
321 }
322
323 // Nothing found
324 return -1;
325 }
326
327 // Returns the value of an environment variable or NULL
328 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
329 const char* key) {
330 int i = pakfire_jail_find_env(jail, key);
331 if (i < 0)
332 return NULL;
333
334 return jail->env[i] + strlen(key) + 1;
335 }
336
337 // Sets an environment variable
338 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
339 const char* key, const char* value) {
340 // Find the index where to write this value to
341 int i = pakfire_jail_find_env(jail, key);
342 if (i < 0)
343 i = pakfire_jail_env_length(jail);
344
345 // Return -ENOSPC when the environment is full
346 if (i >= ENVIRON_SIZE) {
347 errno = ENOSPC;
348 return -1;
349 }
350
351 // Free any previous value
352 if (jail->env[i])
353 free(jail->env[i]);
354
355 // Format and set environment variable
356 asprintf(&jail->env[i], "%s=%s", key, value);
357
358 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
359
360 return 0;
361 }
362
363 // Imports an environment
364 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
365 if (!env)
366 return 0;
367
368 char* key;
369 char* val;
370 int r;
371
372 // Copy environment variables
373 for (unsigned int i = 0; env[i]; i++) {
374 r = pakfire_string_partition(env[i], "=", &key, &val);
375 if (r)
376 continue;
377
378 // Set value
379 r = pakfire_jail_set_env(jail, key, val);
380
381 if (key)
382 free(key);
383 if (val)
384 free(val);
385
386 // Break on error
387 if (r)
388 return r;
389 }
390
391 return 0;
392 }
393
394 // Logging
395
396 PAKFIRE_EXPORT int pakfire_jail_set_log_callback(struct pakfire_jail* jail,
397 pakfire_jail_log_callback callback, void* data) {
398 jail->log_callback = callback;
399 jail->log_data = data;
400
401 return 0;
402 }
403
404 /*
405 This function replaces any logging in the child process.
406
407 All log messages will be sent to the parent process through their respective pipes.
408 */
409 static void pakfire_jail_log(void* data, int priority, const char* file,
410 int line, const char* fn, const char* format, va_list args) {
411 struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
412 int fd;
413
414 switch (priority) {
415 case LOG_INFO:
416 fd = pipes->log_INFO[1];
417 break;
418
419 case LOG_ERR:
420 fd = pipes->log_ERROR[1];
421 break;
422
423 #ifdef ENABLE_DEBUG
424 case LOG_DEBUG:
425 fd = pipes->log_DEBUG[1];
426 break;
427 #endif /* ENABLE_DEBUG */
428
429 // Ignore any messages of an unknown priority
430 default:
431 return;
432 }
433
434 // Send the log message
435 if (fd)
436 vdprintf(fd, format, args);
437 }
438
439 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
440 return (sizeof(buffer->data) == buffer->used);
441 }
442
443 /*
444 This function reads as much data as it can from the file descriptor.
445 If it finds a whole line in it, it will send it to the logger and repeat the process.
446 If not newline character is found, it will try to read more data until it finds one.
447 */
448 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
449 struct pakfire_jail_exec* ctx, int priority, int fd,
450 struct pakfire_log_buffer* buffer, pakfire_jail_log_callback callback, void* data) {
451 char line[BUFFER_SIZE + 1];
452
453 // Fill up buffer from fd
454 if (buffer->used < sizeof(buffer->data)) {
455 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
456 sizeof(buffer->data) - buffer->used);
457
458 // Handle errors
459 if (bytes_read < 0) {
460 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
461 return -1;
462 }
463
464 // Update buffer size
465 buffer->used += bytes_read;
466 }
467
468 // See if we have any lines that we can write
469 while (buffer->used) {
470 // Search for the end of the first line
471 char* eol = memchr(buffer->data, '\n', buffer->used);
472
473 // No newline found
474 if (!eol) {
475 // If the buffer is full, we send the content to the logger and try again
476 // This should not happen in practise
477 if (pakfire_jail_log_buffer_is_full(buffer)) {
478 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
479
480 eol = buffer->data + sizeof(buffer->data) - 1;
481
482 // Otherwise we might have only read parts of the output
483 } else
484 break;
485 }
486
487 // Find the length of the string
488 size_t length = eol - buffer->data + 1;
489
490 // Copy the line into the buffer
491 memcpy(line, buffer->data, length);
492
493 // Terminate the string
494 line[length] = '\0';
495
496 // Log the line
497 if (callback) {
498 int r = callback(jail->pakfire, data, priority, line, length);
499 if (r) {
500 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
501 return r;
502 }
503 }
504
505 // Remove line from buffer
506 memmove(buffer->data, buffer->data + length, buffer->used - length);
507 buffer->used -= length;
508 }
509
510 return 0;
511 }
512
513 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
514 int r = pipe2(*fds, flags);
515 if (r < 0) {
516 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
517 return 1;
518 }
519
520 return 0;
521 }
522
523 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
524 for (unsigned int i = 0; i < 2; i++)
525 if (fds[i])
526 close(fds[i]);
527 }
528
529 /*
530 This is a convenience function to fetch the reading end of a pipe and
531 closes the write end.
532 */
533 static int pakfire_jail_get_pipe(struct pakfire_jail* jail, int (*fds)[2]) {
534 // Give the variables easier names to avoid confusion
535 int* fd_read = &(*fds)[0];
536 int* fd_write = &(*fds)[1];
537
538 // Close the write end of the pipe
539 if (*fd_write) {
540 close(*fd_write);
541 *fd_write = 0;
542 }
543
544 // Return the read end
545 return *fd_read;
546 }
547
548 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
549 int epollfd = -1;
550 struct epoll_event ev;
551 struct epoll_event events[EPOLL_MAX_EVENTS];
552 int r = 0;
553
554 // Fetch file descriptors from context
555 const int stdout = pakfire_jail_get_pipe(jail, &ctx->pipes.stdout);
556 const int stderr = pakfire_jail_get_pipe(jail, &ctx->pipes.stderr);
557 const int pidfd = ctx->pidfd;
558
559 // Logging
560 const int log_INFO = pakfire_jail_get_pipe(jail, &ctx->pipes.log_INFO);
561 const int log_ERROR = pakfire_jail_get_pipe(jail, &ctx->pipes.log_ERROR);
562 const int log_DEBUG = pakfire_jail_get_pipe(jail, &ctx->pipes.log_DEBUG);
563
564 // Make a list of all file descriptors we are interested in
565 int fds[] = {
566 stdout, stderr, pidfd, log_INFO, log_ERROR, log_DEBUG,
567 };
568
569 // Setup epoll
570 epollfd = epoll_create1(0);
571 if (epollfd < 0) {
572 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
573 r = 1;
574 goto ERROR;
575 }
576
577 ev.events = EPOLLIN;
578
579 // Turn file descriptors into non-blocking mode and add them to epoll()
580 for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
581 int fd = fds[i];
582
583 // Skip fds which were not initialized
584 if (fd <= 0)
585 continue;
586
587 ev.data.fd = fd;
588
589 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
590 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
591 r = 1;
592 goto ERROR;
593 }
594 }
595
596 int ended = 0;
597
598 // Loop for as long as the process is alive
599 while (!ended) {
600 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
601 if (num < 1) {
602 // Ignore if epoll_wait() has been interrupted
603 if (errno == EINTR)
604 continue;
605
606 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
607 r = 1;
608
609 goto ERROR;
610 }
611
612 for (int i = 0; i < num; i++) {
613 int fd = events[i].data.fd;
614
615 struct pakfire_log_buffer* buffer = NULL;
616 pakfire_jail_log_callback callback = NULL;
617 void* data = NULL;
618 int priority;
619
620 // Handle any changes to the PIDFD
621 if (fd == pidfd) {
622 // Call waidid() and store the result
623 r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
624 if (r) {
625 ERROR(jail->pakfire, "waitid() failed: %m\n");
626 goto ERROR;
627 }
628
629 // Mark that we have ended so that we will process the remaining
630 // events from epoll() now, but won't restart the outer loop.
631 ended = 1;
632 continue;
633
634 // Handle logging messages
635 } else if (fd == log_INFO) {
636 buffer = &ctx->buffers.log_INFO;
637 priority = LOG_INFO;
638
639 callback = pakfire_jail_default_log_callback;
640
641 } else if (fd == log_ERROR) {
642 buffer = &ctx->buffers.log_ERROR;
643 priority = LOG_ERR;
644
645 callback = pakfire_jail_default_log_callback;
646
647 } else if (fd == log_DEBUG) {
648 buffer = &ctx->buffers.log_DEBUG;
649 priority = LOG_DEBUG;
650
651 callback = pakfire_jail_default_log_callback;
652
653 // Handle anything from the log pipes
654 } else if (fd == stdout) {
655 buffer = &ctx->buffers.stdout;
656 priority = LOG_INFO;
657
658 callback = jail->log_callback;
659 data = jail->log_data;
660
661 } else if (fd == stderr) {
662 buffer = &ctx->buffers.stderr;
663 priority = LOG_ERR;
664
665 callback = jail->log_callback;
666 data = jail->log_data;
667
668 } else {
669 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
670 continue;
671 }
672
673 // Handle log event
674 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
675 if (r)
676 goto ERROR;
677 }
678 }
679
680 ERROR:
681 if (epollfd > 0)
682 close(epollfd);
683
684 return r;
685 }
686
687 static int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data, int priority,
688 const char* line, size_t length) {
689 char** output = (char**)data;
690 int r;
691
692 // Append everything from stdout to a buffer
693 if (priority == LOG_INFO) {
694 r = asprintf(output, "%s%s", (output && *output) ? *output : "", line);
695 if (r < 0)
696 return 1;
697 return 0;
698 }
699
700 // Send everything else to the default logger
701 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
702 }
703
704 // Capabilities
705
706 static int pakfire_jail_drop_capabilities(struct pakfire_jail* jail) {
707 const int capabilities[] = {
708 // Deny access to the kernel's audit system
709 CAP_AUDIT_CONTROL,
710 CAP_AUDIT_READ,
711 CAP_AUDIT_WRITE,
712
713 // Deny suspending block devices
714 CAP_BLOCK_SUSPEND,
715
716 // Deny any stuff with BPF
717 CAP_BPF,
718
719 // Deny checkpoint restore
720 CAP_CHECKPOINT_RESTORE,
721
722 // Deny opening files by inode number (open_by_handle_at)
723 CAP_DAC_READ_SEARCH,
724
725 // Deny setting SUID bits
726 CAP_FSETID,
727
728 // Deny locking more memory
729 CAP_IPC_LOCK,
730
731 // Deny modifying any Apparmor/SELinux/SMACK configuration
732 CAP_MAC_ADMIN,
733 CAP_MAC_OVERRIDE,
734
735 // Deny creating any special devices
736 CAP_MKNOD,
737
738 // Deny setting any capabilities
739 CAP_SETFCAP,
740
741 // Deny reading from syslog
742 CAP_SYSLOG,
743
744 // Deny any admin actions (mount, sethostname, ...)
745 CAP_SYS_ADMIN,
746
747 // Deny rebooting the system
748 CAP_SYS_BOOT,
749
750 // Deny loading kernel modules
751 CAP_SYS_MODULE,
752
753 // Deny setting nice level
754 CAP_SYS_NICE,
755
756 // Deny access to /proc/kcore, /dev/mem, /dev/kmem
757 CAP_SYS_RAWIO,
758
759 // Deny circumventing any resource limits
760 CAP_SYS_RESOURCE,
761
762 // Deny setting the system time
763 CAP_SYS_TIME,
764
765 // Deny playing with suspend
766 CAP_WAKE_ALARM,
767
768 0,
769 };
770
771 DEBUG(jail->pakfire, "Dropping capabilities...\n");
772
773 size_t num_caps = 0;
774 int r;
775
776 // Drop any capabilities
777 for (const int* cap = capabilities; *cap; cap++) {
778 r = prctl(PR_CAPBSET_DROP, *cap, 0, 0, 0);
779 if (r) {
780 ERROR(jail->pakfire, "Could not drop capability %d: %m\n", *cap);
781 return r;
782 }
783
784 num_caps++;
785 }
786
787 // Fetch any capabilities
788 cap_t caps = cap_get_proc();
789 if (!caps) {
790 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
791 return 1;
792 }
793
794 /*
795 Set inheritable capabilities
796
797 This ensures that no processes will be able to gain any of the listed
798 capabilities again.
799 */
800 r = cap_set_flag(caps, CAP_INHERITABLE, num_caps, capabilities, CAP_CLEAR);
801 if (r) {
802 ERROR(jail->pakfire, "cap_set_flag() failed: %m\n");
803 goto ERROR;
804 }
805
806 // Restore capabilities
807 r = cap_set_proc(caps);
808 if (r) {
809 ERROR(jail->pakfire, "Could not restore capabilities: %m\n");
810 goto ERROR;
811 }
812
813 ERROR:
814 if (caps)
815 cap_free(caps);
816
817 return r;
818 }
819
820 // Syscall Filter
821
822 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
823 const int syscalls[] = {
824 // The kernel's keyring isn't namespaced
825 SCMP_SYS(keyctl),
826 SCMP_SYS(add_key),
827 SCMP_SYS(request_key),
828
829 // Disable userfaultfd
830 SCMP_SYS(userfaultfd),
831
832 // Disable perf which could leak a lot of information about the host
833 SCMP_SYS(perf_event_open),
834
835 0,
836 };
837 int r = 1;
838
839 DEBUG(jail->pakfire, "Applying syscall filter...\n");
840
841 // Setup a syscall filter which allows everything by default
842 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
843 if (!ctx) {
844 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
845 goto ERROR;
846 }
847
848 // All all syscalls
849 for (const int* syscall = syscalls; *syscall; syscall++) {
850 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
851 if (r) {
852 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
853 goto ERROR;
854 }
855 }
856
857 // Load syscall filter into the kernel
858 r = seccomp_load(ctx);
859 if (r) {
860 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
861 goto ERROR;
862 }
863
864 ERROR:
865 if (ctx)
866 seccomp_release(ctx);
867
868 return r;
869 }
870
871 // UID/GID Mapping
872
873 static int pakfire_jail_write_uidgid_mapping(struct pakfire_jail* jail,
874 const char* path, uid_t mapped_id, size_t length) {
875 int r = 1;
876
877 // Open file for writing
878 FILE* f = fopen(path, "w");
879 if (!f) {
880 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
881 goto ERROR;
882 }
883
884 // Write configuration
885 int bytes_written = fprintf(f, "%d %d %ld\n", 0, mapped_id, length);
886 if (bytes_written <= 0) {
887 ERROR(jail->pakfire, "Could not write UID/GID mapping: %m\n");
888 goto ERROR;
889 }
890
891 // Close the file
892 r = fclose(f);
893 f = NULL;
894 if (r) {
895 ERROR(jail->pakfire, "Could not write UID/GID mapping: %m\n");
896
897 goto ERROR;
898 }
899
900 // Success
901 r = 0;
902
903 ERROR:
904 if (f)
905 fclose(f);
906
907 return r;
908 }
909
910 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
911 char path[PATH_MAX];
912 int r;
913
914 uid_t mapped_uid = 0;
915 const size_t length = 1;
916
917 // Fetch the UID of the calling process
918 uid_t uid = getuid();
919
920 // Have we been called by root?
921 if (uid == 0) {
922 mapped_uid = 0;
923
924 // Have we been called by an unprivileged user?
925 } else {
926 // XXX fetch SUBUID
927 mapped_uid = uid;
928 }
929
930 // Make path
931 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
932 if (r < 0)
933 return 1;
934
935 DEBUG(jail->pakfire, "Mapping UID range (%u - %lu)\n", mapped_uid, mapped_uid + length);
936
937 return pakfire_jail_write_uidgid_mapping(jail, path, mapped_uid, length);
938 }
939
940 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
941 char path[PATH_MAX];
942 int r;
943
944 gid_t mapped_gid = 0;
945 const size_t length = 1;
946
947 // Fetch the GID of the calling process
948 gid_t gid = getgid();
949
950 // Have we been called from the root group?
951 if (gid == 0) {
952 mapped_gid = 0;
953
954 // Have we been called by an unprivileged group?
955 } else {
956 // XXX fetch SUBGID
957 mapped_gid = gid;
958 }
959
960 // Make path
961 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
962 if (r < 0)
963 return 1;
964
965 DEBUG(jail->pakfire, "Mapping GID range (%u - %lu)\n", mapped_gid, mapped_gid + length);
966
967 return pakfire_jail_write_uidgid_mapping(jail, path, mapped_gid, length);
968 }
969
970 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
971 char path[PATH_MAX];
972 int r = 1;
973
974 // Make path
975 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
976 if (r < 0)
977 return 1;
978
979 // Open file for writing
980 FILE* f = fopen(path, "w");
981 if (!f) {
982 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
983 goto ERROR;
984 }
985
986 // Write content
987 int bytes_written = fprintf(f, "deny\n");
988 if (bytes_written <= 0) {
989 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
990 goto ERROR;
991 }
992
993 r = fclose(f);
994 f = NULL;
995 if (r) {
996 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
997 goto ERROR;
998 }
999
1000 ERROR:
1001 if (f)
1002 fclose(f);
1003
1004 return r;
1005 }
1006
1007 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1008 const uint64_t val = 1;
1009 int r = 0;
1010
1011 DEBUG(jail->pakfire, "Sending signal...\n");
1012
1013 // Write to the file descriptor
1014 ssize_t bytes_written = write(fd, &val, sizeof(val));
1015 if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
1016 ERROR(jail->pakfire, "Could not send signal: %m\n");
1017 r = 1;
1018 }
1019
1020 // Close the file descriptor
1021 close(fd);
1022
1023 return r;
1024 }
1025
1026 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1027 uint64_t val = 0;
1028 int r = 0;
1029
1030 DEBUG(jail->pakfire, "Waiting for signal...\n");
1031
1032 ssize_t bytes_read = read(fd, &val, sizeof(val));
1033 if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
1034 ERROR(jail->pakfire, "Error waiting for signal: %m\n");
1035 r = 1;
1036 }
1037
1038 // Close the file descriptor
1039 close(fd);
1040
1041 return r;
1042 }
1043
1044 /*
1045 Performs the initialisation that needs to happen in the parent part
1046 */
1047 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1048 int r;
1049
1050 // Setup UID mapping
1051 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
1052 if (r)
1053 return r;
1054
1055 // Write "deny" to /proc/PID/setgroups
1056 r = pakfire_jail_setgroups(jail, ctx->pid);
1057 if (r)
1058 return r;
1059
1060 // Setup GID mapping
1061 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
1062 if (r)
1063 return r;
1064
1065 // Parent has finished initialisation
1066 DEBUG(jail->pakfire, "Parent has finished initialization\n");
1067
1068 // Send signal to client
1069 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1070 if (r)
1071 return r;
1072
1073 return 0;
1074 }
1075
1076 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
1077 const char* argv[]) {
1078 int r;
1079
1080 // Redirect any logging to our log pipe
1081 pakfire_set_log_callback(jail->pakfire, pakfire_jail_log, &ctx->pipes);
1082
1083 // Fetch my own PID
1084 pid_t pid = getpid();
1085
1086 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
1087
1088 // Log argv
1089 for (unsigned int i = 0; argv[i]; i++)
1090 DEBUG(jail->pakfire, " argv[%d] = %s\n", i, argv[i]);
1091
1092 // Wait for the parent to finish initialization
1093 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1094 if (r)
1095 return r;
1096
1097 // Perform further initialization
1098
1099 // Fetch UID/GID
1100 uid_t uid = getuid();
1101 gid_t gid = getgid();
1102
1103 // Fetch EUID/EGID
1104 uid_t euid = geteuid();
1105 gid_t egid = getegid();
1106
1107 DEBUG(jail->pakfire, " UID: %d (effective %d)\n", uid, euid);
1108 DEBUG(jail->pakfire, " GID: %d (effective %d)\n", gid, egid);
1109
1110 // Check if we are (effectively running as root)
1111 if (uid || gid || euid || egid) {
1112 ERROR(jail->pakfire, "Child process is not running as root\n");
1113 return 126;
1114 }
1115
1116 const char* root = pakfire_get_path(jail->pakfire);
1117 const char* arch = pakfire_get_arch(jail->pakfire);
1118
1119 // Change root (unless root is /)
1120 if (!pakfire_on_root(jail->pakfire)) {
1121 // Mount everything
1122 r = pakfire_mount_all(jail->pakfire);
1123 if (r)
1124 return r;
1125
1126 // Log all mountpoints
1127 pakfire_mount_list(jail->pakfire);
1128
1129 // Call chroot()
1130 r = chroot(root);
1131 if (r) {
1132 ERROR(jail->pakfire, "chroot() to %s failed: %m\n", root);
1133 return 1;
1134 }
1135
1136 // Change directory to /
1137 r = chdir("/");
1138 if (r) {
1139 ERROR(jail->pakfire, "chdir() after chroot() failed: %m\n");
1140 return 1;
1141 }
1142 }
1143
1144 // Set personality
1145 unsigned long persona = pakfire_arch_personality(arch);
1146 if (persona) {
1147 r = personality(persona);
1148 if (r < 0) {
1149 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1150 return 1;
1151 }
1152 }
1153
1154 // Set nice level
1155 if (jail->nice) {
1156 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1157
1158 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1159 if (r) {
1160 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1161 return 1;
1162 }
1163 }
1164
1165 // Close other end of log pipes
1166 close(ctx->pipes.log_INFO[0]);
1167 close(ctx->pipes.log_ERROR[0]);
1168 #ifdef ENABLE_DEBUG
1169 close(ctx->pipes.log_DEBUG[0]);
1170 #endif /* ENABLE_DEBUG */
1171
1172 // Connect standard output and error
1173 if (ctx->pipes.stdout[1] && ctx->pipes.stderr[1]) {
1174 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1175 if (r < 0) {
1176 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1177 ctx->pipes.stdout[1]);
1178
1179 return 1;
1180 }
1181
1182 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1183 if (r < 0) {
1184 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1185 ctx->pipes.stderr[1]);
1186
1187 return 1;
1188 }
1189
1190 // Close the pipe (as we have moved the original file descriptors)
1191 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1192 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
1193 }
1194
1195 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1196 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1197 if (r)
1198 return r;
1199
1200 // Drop capabilities
1201 r = pakfire_jail_drop_capabilities(jail);
1202 if (r)
1203 return r;
1204
1205 // Filter syscalls
1206 r = pakfire_jail_limit_syscalls(jail);
1207 if (r)
1208 return r;
1209
1210 // exec() command
1211 r = execvpe(argv[0], (char**)argv, jail->env);
1212 if (r < 0)
1213 ERROR(jail->pakfire, "Could not execve(): %m\n");
1214
1215 // Translate errno into regular exit code
1216 switch (errno) {
1217 case ENOENT:
1218 r = 127;
1219 break;
1220
1221 default:
1222 r = 1;
1223 }
1224
1225 // We should not get here
1226 return r;
1227 }
1228
1229 // Run a command in the jail
1230 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[]) {
1231 int exit = -1;
1232 int r;
1233
1234 // Check if argv is valid
1235 if (!argv || !argv[0]) {
1236 errno = EINVAL;
1237 return -1;
1238 }
1239
1240 // Initialize context for this call
1241 struct pakfire_jail_exec ctx = {
1242 .pipes = {
1243 .stdout = { 0, 0 },
1244 .stderr = { 0, 0 },
1245 },
1246 };
1247
1248 DEBUG(jail->pakfire, "Executing jail...\n");
1249
1250 /*
1251 Setup a file descriptor which can be used to notify the client that the parent
1252 has completed configuration.
1253 */
1254 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1255 if (ctx.completed_fd < 0) {
1256 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1257 return -1;
1258 }
1259
1260 // Create pipes to communicate with child process if we are not running interactively
1261 if (!pakfire_jail_has_flag(jail, PAKFIRE_JAIL_INTERACTIVE)) {
1262 // stdout
1263 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
1264 if (r)
1265 goto ERROR;
1266
1267 // stderr
1268 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
1269 if (r)
1270 goto ERROR;
1271 }
1272
1273 // Setup pipes for logging
1274 // INFO
1275 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
1276 if (r)
1277 goto ERROR;
1278
1279 // ERROR
1280 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
1281 if (r)
1282 goto ERROR;
1283
1284 #ifdef ENABLE_DEBUG
1285 // DEBUG
1286 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
1287 if (r)
1288 goto ERROR;
1289 #endif /* ENABLE_DEBUG */
1290
1291 // Configure child process
1292 struct clone_args args = {
1293 .flags =
1294 CLONE_NEWCGROUP |
1295 CLONE_NEWIPC |
1296 CLONE_NEWNS |
1297 CLONE_NEWPID |
1298 CLONE_NEWUSER |
1299 CLONE_NEWUTS |
1300 CLONE_PIDFD,
1301 .exit_signal = SIGCHLD,
1302 .pidfd = (long long unsigned int)&ctx.pidfd,
1303 };
1304
1305 // Launch the process in a cgroup that is a leaf of the configured cgroup
1306 if (jail->cgroup) {
1307 args.flags |= CLONE_INTO_CGROUP;
1308
1309 #warning TODO randomize the name
1310
1311 // Create a temporary cgroup
1312 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, "jail", 0);
1313 if (r) {
1314 ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
1315 goto ERROR;
1316 }
1317
1318 // Clone into this cgroup
1319 args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
1320 }
1321
1322 // Fork this process
1323 ctx.pid = clone3(&args, sizeof(args));
1324 if (ctx.pid < 0) {
1325 ERROR(jail->pakfire, "Could not clone: %m\n");
1326 return -1;
1327
1328 // Child process
1329 } else if (ctx.pid == 0) {
1330 r = pakfire_jail_child(jail, &ctx, argv);
1331 _exit(r);
1332 }
1333
1334 // Parent process
1335 r = pakfire_jail_parent(jail, &ctx);
1336 if (r)
1337 goto ERROR;
1338
1339 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
1340
1341 // Read output of the child process
1342 r = pakfire_jail_wait(jail, &ctx);
1343 if (r)
1344 goto ERROR;
1345
1346 // Handle exit status
1347 switch (ctx.status.si_code) {
1348 case CLD_EXITED:
1349 DEBUG(jail->pakfire, "The child process exited with code %d\n",
1350 ctx.status.si_status);
1351
1352 // Pass exit code
1353 exit = ctx.status.si_status;
1354 break;
1355
1356 case CLD_KILLED:
1357 case CLD_DUMPED:
1358 ERROR(jail->pakfire, "The child process was killed\n");
1359 break;
1360
1361 // Log anything else
1362 default:
1363 ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
1364 break;
1365 }
1366
1367 ERROR:
1368 // Destroy the temporary cgroup (if any)
1369 if (ctx.cgroup) {
1370 // Read cgroup stats
1371 r = pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
1372 if (r) {
1373 ERROR(jail->pakfire, "Could not read cgroup stats: %m\n");
1374 } else {
1375 pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
1376 }
1377
1378 pakfire_cgroup_destroy(ctx.cgroup);
1379 pakfire_cgroup_unref(ctx.cgroup);
1380 }
1381
1382 // Close any file descriptors
1383 pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
1384 pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
1385 if (ctx.pidfd)
1386 close(ctx.pidfd);
1387 pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
1388 pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
1389 pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
1390
1391 #if 0
1392 // Umount everything
1393 if (!pakfire_on_root(jail->pakfire))
1394 pakfire_umount_all(jail->pakfire);
1395 #endif
1396
1397 return exit;
1398 }
1399
1400 PAKFIRE_EXPORT int pakfire_jail_exec(struct pakfire_jail* jail,
1401 const char* argv[], char** output) {
1402 int r;
1403
1404 // Store logging callback
1405 pakfire_jail_log_callback log_callback = jail->log_callback;
1406 void* log_data = jail->log_data;
1407
1408 // Capture output if requested by user
1409 if (output)
1410 pakfire_jail_set_log_callback(jail, pakfire_jail_capture_stdout, output);
1411
1412 // Run exec()
1413 r = __pakfire_jail_exec(jail, argv);
1414
1415 // Restore log callback
1416 pakfire_jail_set_log_callback(jail, log_callback, log_data);
1417
1418 return r;
1419 }
1420
1421 PAKFIRE_EXPORT int pakfire_jail_exec_script(struct pakfire_jail* jail,
1422 const char* script, const size_t size, const char* args[], char** output) {
1423 char path[PATH_MAX];
1424 const char** argv = NULL;
1425 int r;
1426
1427 const char* root = pakfire_get_path(jail->pakfire);
1428
1429 // Write the scriptlet to disk
1430 r = pakfire_path_join(path, root, "pakfire-script.XXXXXX");
1431 if (r < 0)
1432 goto ERROR;
1433
1434 // Open a temporary file
1435 int fd = mkstemp(path);
1436 if (fd < 0) {
1437 ERROR(jail->pakfire, "Could not open a temporary file: %m\n");
1438 r = 1;
1439 goto ERROR;
1440 }
1441
1442 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
1443
1444 // Write data
1445 ssize_t bytes_written = write(fd, script, size);
1446 if (bytes_written < (ssize_t)size) {
1447 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
1448 r = 1;
1449 goto ERROR;
1450 }
1451
1452 // Make the script executable
1453 r = fchmod(fd, S_IRUSR|S_IWUSR|S_IXUSR);
1454 if (r) {
1455 ERROR(jail->pakfire, "Could not set executable permissions on %s: %m\n", path);
1456 goto ERROR;
1457 }
1458
1459 // Close file
1460 r = close(fd);
1461 if (r) {
1462 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
1463 r = 1;
1464 goto ERROR;
1465 }
1466
1467 // Count how many arguments were passed
1468 unsigned int argc = 1;
1469 if (args) {
1470 for (const char** arg = args; *arg; arg++)
1471 argc++;
1472 }
1473
1474 argv = calloc(argc + 1, sizeof(*argv));
1475 if (!argv) {
1476 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
1477 goto ERROR;
1478 }
1479
1480 // Set command
1481 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
1482
1483 // Copy args
1484 for (unsigned int i = 1; i < argc; i++)
1485 argv[i] = args[i-1];
1486
1487 // Run the script
1488 r = pakfire_jail_exec(jail, argv, output);
1489
1490 ERROR:
1491 if (argv)
1492 free(argv);
1493
1494 // Remove script from disk
1495 if (*path)
1496 unlink(path);
1497
1498 return r;
1499 }
1500
1501 /*
1502 A convenience function that creates a new jail, runs the given command and destroys
1503 the jail again.
1504 */
1505 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
1506 struct pakfire_jail* jail = NULL;
1507 int r;
1508
1509 // Create a new jail
1510 r = pakfire_jail_create(&jail, pakfire, flags);
1511 if (r)
1512 goto ERROR;
1513
1514 // Execute the command
1515 r = pakfire_jail_exec(jail, argv, output);
1516
1517 ERROR:
1518 if (jail)
1519 pakfire_jail_unref(jail);
1520
1521 return r;
1522 }
1523
1524 int pakfire_jail_run_script(struct pakfire* pakfire,
1525 const char* script, const size_t length, const char* argv[], int flags, char** output) {
1526 struct pakfire_jail* jail = NULL;
1527 int r;
1528
1529 // Create a new jail
1530 r = pakfire_jail_create(&jail, pakfire, flags);
1531 if (r)
1532 goto ERROR;
1533
1534 // Execute the command
1535 r = pakfire_jail_exec_script(jail, script, length, argv, output);
1536
1537 ERROR:
1538 if (jail)
1539 pakfire_jail_unref(jail);
1540
1541 return r;
1542 }
1543
1544
1545 int pakfire_jail_shell(struct pakfire* pakfire) {
1546 const char* argv[] = {
1547 "/bin/bash", "--login", NULL,
1548 };
1549
1550 // Execute /bin/bash
1551 return pakfire_jail_run(pakfire, argv, PAKFIRE_JAIL_INTERACTIVE, NULL);
1552 }
1553
1554 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
1555 char path[PATH_MAX];
1556
1557 const char* ldconfig = "/sbin/ldconfig";
1558
1559 // Check if ldconfig exists before calling it to avoid overhead
1560 int r = pakfire_make_path(pakfire, path, ldconfig);
1561 if (r < 0)
1562 return 1;
1563
1564 // Check if ldconfig is executable
1565 r = access(path, X_OK);
1566 if (r) {
1567 DEBUG(pakfire, "%s is not executable. Skipping...\n", ldconfig);
1568 return 0;
1569 }
1570
1571 const char* argv[] = {
1572 ldconfig, NULL,
1573 };
1574
1575 // Run ldconfig
1576 return pakfire_jail_run(pakfire, argv, 0, NULL);
1577 }