]> git.ipfire.org Git - people/ms/pakfire.git/blob - src/libpakfire/jail.c
jail: Log executed command line
[people/ms/pakfire.git] / src / libpakfire / jail.c
1 /*#############################################################################
2 # #
3 # Pakfire - The IPFire package management system #
4 # Copyright (C) 2022 Pakfire development team #
5 # #
6 # This program is free software: you can redistribute it and/or modify #
7 # it under the terms of the GNU General Public License as published by #
8 # the Free Software Foundation, either version 3 of the License, or #
9 # (at your option) any later version. #
10 # #
11 # This program is distributed in the hope that it will be useful, #
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14 # GNU General Public License for more details. #
15 # #
16 # You should have received a copy of the GNU General Public License #
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
18 # #
19 #############################################################################*/
20
21 #include <errno.h>
22 #include <linux/capability.h>
23 #include <linux/fcntl.h>
24 #include <linux/sched.h>
25 #include <linux/wait.h>
26 #include <sched.h>
27 #include <signal.h>
28 #include <stdlib.h>
29 #include <syscall.h>
30 #include <sys/capability.h>
31 #include <sys/epoll.h>
32 #include <sys/eventfd.h>
33 #include <sys/personality.h>
34 #include <sys/prctl.h>
35 #include <sys/resource.h>
36 #include <sys/types.h>
37 #include <sys/wait.h>
38
39 // libseccomp
40 #include <seccomp.h>
41
42 #include <pakfire/arch.h>
43 #include <pakfire/cgroup.h>
44 #include <pakfire/jail.h>
45 #include <pakfire/logging.h>
46 #include <pakfire/mount.h>
47 #include <pakfire/pakfire.h>
48 #include <pakfire/private.h>
49 #include <pakfire/util.h>
50
51 #define BUFFER_SIZE 1024 * 64
52 #define ENVIRON_SIZE 128
53 #define EPOLL_MAX_EVENTS 2
54
55 // The default environment that will be set for every command
56 static const struct environ {
57 const char* key;
58 const char* val;
59 } ENV[] = {
60 { "LANG", "en_US.utf-8" },
61 { "TERM", "vt100" },
62 { NULL, NULL },
63 };
64
65 struct pakfire_jail {
66 struct pakfire* pakfire;
67 int nrefs;
68
69 // Flags
70 int flags;
71
72 // Resource Limits
73 int nice;
74
75 // CGroup
76 struct pakfire_cgroup* cgroup;
77
78 // Environment
79 char* env[ENVIRON_SIZE];
80
81 // Logging
82 pakfire_jail_log_callback log_callback;
83 void* log_data;
84 };
85
86 struct pakfire_log_buffer {
87 char data[BUFFER_SIZE];
88 size_t used;
89 };
90
91 struct pakfire_jail_exec {
92 // PID (of the child)
93 pid_t pid;
94 int pidfd;
95
96 // Process status (from waitid)
97 siginfo_t status;
98
99 // FD to notify the client that the parent has finished initialization
100 int completed_fd;
101
102 // Log pipes
103 struct pakfire_jail_pipes {
104 int stdout[2];
105 int stderr[2];
106
107 // Logging
108 int log_INFO[2];
109 int log_ERROR[2];
110 int log_DEBUG[2];
111 } pipes;
112
113 // Log buffers
114 struct pakfire_jail_buffers {
115 struct pakfire_log_buffer stdout;
116 struct pakfire_log_buffer stderr;
117
118 // Logging
119 struct pakfire_log_buffer log_INFO;
120 struct pakfire_log_buffer log_ERROR;
121 struct pakfire_log_buffer log_DEBUG;
122 } buffers;
123 };
124
125 static int clone3(struct clone_args* args, size_t size) {
126 return syscall(__NR_clone3, args, size);
127 }
128
129 static void pakfire_jail_free(struct pakfire_jail* jail) {
130 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
131
132 // Free environment
133 for (unsigned int i = 0; jail->env[i]; i++)
134 free(jail->env[i]);
135
136 if (jail->cgroup)
137 pakfire_cgroup_unref(jail->cgroup);
138
139 pakfire_unref(jail->pakfire);
140 free(jail);
141 }
142
143 /*
144 Passes any log messages on to the default pakfire log callback
145 */
146 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
147 int priority, const char* line, size_t length) {
148 switch (priority) {
149 case LOG_INFO:
150 INFO(pakfire, "%s", line);
151 break;
152
153 case LOG_ERR:
154 ERROR(pakfire, "%s", line);
155 break;
156
157 #ifdef ENABLE_DEBUG
158 case LOG_DEBUG:
159 DEBUG(pakfire, "%s", line);
160 break;
161 #endif
162 }
163
164 return 0;
165 }
166
167 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
168 // Set PS1
169 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
170 if (r)
171 return r;
172
173 // Copy TERM
174 char* TERM = secure_getenv("TERM");
175 if (TERM) {
176 r = pakfire_jail_set_env(jail, "TERM", TERM);
177 if (r)
178 return r;
179 }
180
181 // Copy LANG
182 char* LANG = secure_getenv("LANG");
183 if (LANG) {
184 r = pakfire_jail_set_env(jail, "LANG", LANG);
185 if (r)
186 return r;
187 }
188
189 return 0;
190 }
191
192 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail,
193 struct pakfire* pakfire, int flags) {
194 int r;
195
196 // Allocate a new jail
197 struct pakfire_jail* j = calloc(1, sizeof(*j));
198 if (!j)
199 return 1;
200
201 // Reference Pakfire
202 j->pakfire = pakfire_ref(pakfire);
203
204 // Initialize reference counter
205 j->nrefs = 1;
206
207 // Store flags
208 j->flags = flags;
209
210 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
211
212 // Set default log callback
213 r = pakfire_jail_set_log_callback(j, pakfire_jail_default_log_callback, NULL);
214 if (r)
215 goto ERROR;
216
217 // Set default environment
218 for (const struct environ* e = ENV; e->key; e++) {
219 r = pakfire_jail_set_env(j, e->key, e->val);
220 if (r)
221 goto ERROR;
222 }
223
224 // Setup interactive stuff
225 if (j->flags & PAKFIRE_JAIL_INTERACTIVE) {
226 r = pakfire_jail_setup_interactive_env(j);
227 if (r)
228 goto ERROR;
229 }
230
231 // Done
232 *jail = j;
233 return 0;
234
235 ERROR:
236 pakfire_jail_free(j);
237
238 return r;
239 }
240
241 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
242 ++jail->nrefs;
243
244 return jail;
245 }
246
247 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
248 if (--jail->nrefs > 0)
249 return jail;
250
251 pakfire_jail_free(jail);
252 return NULL;
253 }
254
255 static int pakfire_jail_has_flag(struct pakfire_jail* jail, int flag) {
256 return jail->flags & flag;
257 }
258
259 // Resource Limits
260
261 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
262 // Check if nice level is in range
263 if (nice < -19 || nice > 20) {
264 errno = EINVAL;
265 return 1;
266 }
267
268 // Store nice level
269 jail->nice = nice;
270
271 return 0;
272 }
273
274 int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
275 // Free any previous cgroup
276 if (jail->cgroup) {
277 pakfire_cgroup_unref(jail->cgroup);
278 jail->cgroup = NULL;
279 }
280
281 // Set any new cgroup
282 if (cgroup) {
283 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
284
285 jail->cgroup = pakfire_cgroup_ref(cgroup);
286 }
287
288 // Done
289 return 0;
290 }
291
292 // Environment
293
294 // Returns the length of the environment
295 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
296 unsigned int i = 0;
297
298 // Count everything in the environment
299 for (char** e = jail->env; *e; e++)
300 i++;
301
302 return i;
303 }
304
305 // Finds an existing environment variable and returns its index or -1 if not found
306 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
307 if (!key) {
308 errno = EINVAL;
309 return -1;
310 }
311
312 char buffer[strlen(key) + 2];
313 pakfire_string_format(buffer, "%s=", key);
314
315 for (unsigned int i = 0; jail->env[i]; i++) {
316 if (pakfire_string_startswith(jail->env[i], buffer))
317 return i;
318 }
319
320 // Nothing found
321 return -1;
322 }
323
324 // Returns the value of an environment variable or NULL
325 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
326 const char* key) {
327 int i = pakfire_jail_find_env(jail, key);
328 if (i < 0)
329 return NULL;
330
331 return jail->env[i] + strlen(key) + 1;
332 }
333
334 // Sets an environment variable
335 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
336 const char* key, const char* value) {
337 // Find the index where to write this value to
338 int i = pakfire_jail_find_env(jail, key);
339 if (i < 0)
340 i = pakfire_jail_env_length(jail);
341
342 // Return -ENOSPC when the environment is full
343 if (i >= ENVIRON_SIZE) {
344 errno = ENOSPC;
345 return -1;
346 }
347
348 // Free any previous value
349 if (jail->env[i])
350 free(jail->env[i]);
351
352 // Format and set environment variable
353 asprintf(&jail->env[i], "%s=%s", key, value);
354
355 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
356
357 return 0;
358 }
359
360 // Imports an environment
361 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
362 if (!env)
363 return 0;
364
365 char* key;
366 char* val;
367 int r;
368
369 // Copy environment variables
370 for (unsigned int i = 0; env[i]; i++) {
371 r = pakfire_string_partition(env[i], "=", &key, &val);
372 if (r)
373 continue;
374
375 // Set value
376 r = pakfire_jail_set_env(jail, key, val);
377
378 if (key)
379 free(key);
380 if (val)
381 free(val);
382
383 // Break on error
384 if (r)
385 return r;
386 }
387
388 return 0;
389 }
390
391 // Logging
392
393 PAKFIRE_EXPORT int pakfire_jail_set_log_callback(struct pakfire_jail* jail,
394 pakfire_jail_log_callback callback, void* data) {
395 jail->log_callback = callback;
396 jail->log_data = data;
397
398 return 0;
399 }
400
401 /*
402 This function replaces any logging in the child process.
403
404 All log messages will be sent to the parent process through their respective pipes.
405 */
406 static void pakfire_jail_log(void* data, int priority, const char* file,
407 int line, const char* fn, const char* format, va_list args) {
408 struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
409 int fd;
410
411 switch (priority) {
412 case LOG_INFO:
413 fd = pipes->log_INFO[1];
414 break;
415
416 case LOG_ERR:
417 fd = pipes->log_ERROR[1];
418 break;
419
420 #ifdef ENABLE_DEBUG
421 case LOG_DEBUG:
422 fd = pipes->log_DEBUG[1];
423 break;
424 #endif /* ENABLE_DEBUG */
425
426 // Ignore any messages of an unknown priority
427 default:
428 return;
429 }
430
431 // Send the log message
432 if (fd)
433 vdprintf(fd, format, args);
434 }
435
436 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
437 return (sizeof(buffer->data) == buffer->used);
438 }
439
440 /*
441 This function reads as much data as it can from the file descriptor.
442 If it finds a whole line in it, it will send it to the logger and repeat the process.
443 If not newline character is found, it will try to read more data until it finds one.
444 */
445 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
446 struct pakfire_jail_exec* ctx, int priority, int fd,
447 struct pakfire_log_buffer* buffer, pakfire_jail_log_callback callback, void* data) {
448 char line[BUFFER_SIZE + 1];
449
450 // Fill up buffer from fd
451 if (buffer->used < sizeof(buffer->data)) {
452 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
453 sizeof(buffer->data) - buffer->used);
454
455 // Handle errors
456 if (bytes_read < 0) {
457 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
458 return -1;
459 }
460
461 // Update buffer size
462 buffer->used += bytes_read;
463 }
464
465 // See if we have any lines that we can write
466 while (buffer->used) {
467 // Search for the end of the first line
468 char* eol = memchr(buffer->data, '\n', buffer->used);
469
470 // No newline found
471 if (!eol) {
472 // If the buffer is full, we send the content to the logger and try again
473 // This should not happen in practise
474 if (pakfire_jail_log_buffer_is_full(buffer)) {
475 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
476
477 eol = buffer->data + sizeof(buffer->data) - 1;
478
479 // Otherwise we might have only read parts of the output
480 } else
481 break;
482 }
483
484 // Find the length of the string
485 size_t length = eol - buffer->data + 1;
486
487 // Copy the line into the buffer
488 memcpy(line, buffer->data, length);
489
490 // Terminate the string
491 line[length] = '\0';
492
493 // Log the line
494 if (callback) {
495 int r = callback(jail->pakfire, data, priority, line, length);
496 if (r) {
497 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
498 return r;
499 }
500 }
501
502 // Remove line from buffer
503 memmove(buffer->data, buffer->data + length, buffer->used - length);
504 buffer->used -= length;
505 }
506
507 return 0;
508 }
509
510 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
511 int r = pipe2(*fds, flags);
512 if (r < 0) {
513 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
514 return 1;
515 }
516
517 return 0;
518 }
519
520 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
521 for (unsigned int i = 0; i < 2; i++)
522 if (fds[i])
523 close(fds[i]);
524 }
525
526 /*
527 This is a convenience function to fetch the reading end of a pipe and
528 closes the write end.
529 */
530 static int pakfire_jail_get_pipe(struct pakfire_jail* jail, int (*fds)[2]) {
531 // Give the variables easier names to avoid confusion
532 int* fd_read = &(*fds)[0];
533 int* fd_write = &(*fds)[1];
534
535 // Close the write end of the pipe
536 if (*fd_write) {
537 close(*fd_write);
538 *fd_write = 0;
539 }
540
541 // Return the read end
542 return *fd_read;
543 }
544
545 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
546 int epollfd = -1;
547 struct epoll_event ev;
548 struct epoll_event events[EPOLL_MAX_EVENTS];
549 int r = 0;
550
551 // Fetch file descriptors from context
552 const int stdout = pakfire_jail_get_pipe(jail, &ctx->pipes.stdout);
553 const int stderr = pakfire_jail_get_pipe(jail, &ctx->pipes.stderr);
554 const int pidfd = ctx->pidfd;
555
556 // Logging
557 const int log_INFO = pakfire_jail_get_pipe(jail, &ctx->pipes.log_INFO);
558 const int log_ERROR = pakfire_jail_get_pipe(jail, &ctx->pipes.log_ERROR);
559 const int log_DEBUG = pakfire_jail_get_pipe(jail, &ctx->pipes.log_DEBUG);
560
561 // Make a list of all file descriptors we are interested in
562 int fds[] = {
563 stdout, stderr, pidfd, log_INFO, log_ERROR, log_DEBUG,
564 };
565
566 // Setup epoll
567 epollfd = epoll_create1(0);
568 if (epollfd < 0) {
569 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
570 r = 1;
571 goto ERROR;
572 }
573
574 ev.events = EPOLLIN;
575
576 // Turn file descriptors into non-blocking mode and add them to epoll()
577 for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
578 int fd = fds[i];
579
580 // Skip fds which were not initialized
581 if (fd <= 0)
582 continue;
583
584 ev.data.fd = fd;
585
586 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
587 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
588 r = 1;
589 goto ERROR;
590 }
591 }
592
593 int ended = 0;
594
595 // Loop for as long as the process is alive
596 while (!ended) {
597 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
598 if (num < 1) {
599 // Ignore if epoll_wait() has been interrupted
600 if (errno == EINTR)
601 continue;
602
603 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
604 r = 1;
605
606 goto ERROR;
607 }
608
609 for (int i = 0; i < num; i++) {
610 int fd = events[i].data.fd;
611
612 struct pakfire_log_buffer* buffer = NULL;
613 pakfire_jail_log_callback callback = NULL;
614 void* data = NULL;
615 int priority;
616
617 // Handle any changes to the PIDFD
618 if (fd == pidfd) {
619 // Call waidid() and store the result
620 r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
621 if (r) {
622 ERROR(jail->pakfire, "waitid() failed: %m\n");
623 goto ERROR;
624 }
625
626 // Mark that we have ended so that we will process the remaining
627 // events from epoll() now, but won't restart the outer loop.
628 ended = 1;
629 continue;
630
631 // Handle logging messages
632 } else if (fd == log_INFO) {
633 buffer = &ctx->buffers.log_INFO;
634 priority = LOG_INFO;
635
636 callback = pakfire_jail_default_log_callback;
637
638 } else if (fd == log_ERROR) {
639 buffer = &ctx->buffers.log_ERROR;
640 priority = LOG_ERR;
641
642 callback = pakfire_jail_default_log_callback;
643
644 } else if (fd == log_DEBUG) {
645 buffer = &ctx->buffers.log_DEBUG;
646 priority = LOG_DEBUG;
647
648 callback = pakfire_jail_default_log_callback;
649
650 // Handle anything from the log pipes
651 } else if (fd == stdout) {
652 buffer = &ctx->buffers.stdout;
653 priority = LOG_INFO;
654
655 callback = jail->log_callback;
656 data = jail->log_data;
657
658 } else if (fd == stderr) {
659 buffer = &ctx->buffers.stderr;
660 priority = LOG_ERR;
661
662 callback = jail->log_callback;
663 data = jail->log_data;
664
665 } else {
666 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
667 continue;
668 }
669
670 // Handle log event
671 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
672 if (r)
673 goto ERROR;
674 }
675 }
676
677 ERROR:
678 if (epollfd > 0)
679 close(epollfd);
680
681 return r;
682 }
683
684 static int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data, int priority,
685 const char* line, size_t length) {
686 char*** array = (char***)data;
687
688 // Append everything from stdout to an array
689 if (priority == LOG_INFO) {
690 length = 0;
691
692 // Create a copy of line
693 char* message = strdup(line);
694 if (!message)
695 return 1;
696
697 // Determine the length of the existing array
698 if (*array) {
699 for (char** element = *array; *element; element++)
700 length++;
701 }
702
703 // Allocate space
704 *array = reallocarray(*array, length + 2, sizeof(**array));
705 if (!*array)
706 return 1;
707
708 // Append message and terminate the array
709 (*array)[length] = message;
710 (*array)[length + 1] = NULL;
711
712 return 0;
713 }
714
715 // Send everything else to the default logger
716 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
717 }
718
719 // Capabilities
720
721 static int pakfire_jail_drop_capabilities(struct pakfire_jail* jail) {
722 const int capabilities[] = {
723 // Deny access to the kernel's audit system
724 CAP_AUDIT_CONTROL,
725 CAP_AUDIT_READ,
726 CAP_AUDIT_WRITE,
727
728 // Deny suspending block devices
729 CAP_BLOCK_SUSPEND,
730
731 // Deny any stuff with BPF
732 CAP_BPF,
733
734 // Deny checkpoint restore
735 CAP_CHECKPOINT_RESTORE,
736
737 // Deny opening files by inode number (open_by_handle_at)
738 CAP_DAC_READ_SEARCH,
739
740 // Deny setting SUID bits
741 CAP_FSETID,
742
743 // Deny locking more memory
744 CAP_IPC_LOCK,
745
746 // Deny modifying any Apparmor/SELinux/SMACK configuration
747 CAP_MAC_ADMIN,
748 CAP_MAC_OVERRIDE,
749
750 // Deny creating any special devices
751 CAP_MKNOD,
752
753 // Deny setting any capabilities
754 CAP_SETFCAP,
755
756 // Deny reading from syslog
757 CAP_SYSLOG,
758
759 // Deny any admin actions (mount, sethostname, ...)
760 CAP_SYS_ADMIN,
761
762 // Deny rebooting the system
763 CAP_SYS_BOOT,
764
765 // Deny loading kernel modules
766 CAP_SYS_MODULE,
767
768 // Deny setting nice level
769 CAP_SYS_NICE,
770
771 // Deny access to /proc/kcore, /dev/mem, /dev/kmem
772 CAP_SYS_RAWIO,
773
774 // Deny circumventing any resource limits
775 CAP_SYS_RESOURCE,
776
777 // Deny setting the system time
778 CAP_SYS_TIME,
779
780 // Deny playing with suspend
781 CAP_WAKE_ALARM,
782
783 0,
784 };
785
786 DEBUG(jail->pakfire, "Dropping capabilities...\n");
787
788 size_t num_caps = 0;
789 int r;
790
791 // Drop any capabilities
792 for (const int* cap = capabilities; *cap; cap++) {
793 r = prctl(PR_CAPBSET_DROP, *cap, 0, 0, 0);
794 if (r) {
795 ERROR(jail->pakfire, "Could not drop capability %d: %m\n", *cap);
796 return r;
797 }
798
799 num_caps++;
800 }
801
802 // Fetch any capabilities
803 cap_t caps = cap_get_proc();
804 if (!caps) {
805 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
806 return 1;
807 }
808
809 /*
810 Set inheritable capabilities
811
812 This ensures that no processes will be able to gain any of the listed
813 capabilities again.
814 */
815 r = cap_set_flag(caps, CAP_INHERITABLE, num_caps, capabilities, CAP_CLEAR);
816 if (r) {
817 ERROR(jail->pakfire, "cap_set_flag() failed: %m\n");
818 goto ERROR;
819 }
820
821 // Restore capabilities
822 r = cap_set_proc(caps);
823 if (r) {
824 ERROR(jail->pakfire, "Could not restore capabilities: %m\n");
825 goto ERROR;
826 }
827
828 ERROR:
829 if (caps)
830 cap_free(caps);
831
832 return r;
833 }
834
835 // Syscall Filter
836
837 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
838 const int syscalls[] = {
839 // The kernel's keyring isn't namespaced
840 SCMP_SYS(keyctl),
841 SCMP_SYS(add_key),
842 SCMP_SYS(request_key),
843
844 // Disable userfaultfd
845 SCMP_SYS(userfaultfd),
846
847 // Disable perf which could leak a lot of information about the host
848 SCMP_SYS(perf_event_open),
849
850 0,
851 };
852 int r = 1;
853
854 DEBUG(jail->pakfire, "Applying syscall filter...\n");
855
856 // Setup a syscall filter which allows everything by default
857 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
858 if (!ctx) {
859 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
860 goto ERROR;
861 }
862
863 // All all syscalls
864 for (const int* syscall = syscalls; *syscall; syscall++) {
865 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
866 if (r) {
867 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
868 goto ERROR;
869 }
870 }
871
872 // Load syscall filter into the kernel
873 r = seccomp_load(ctx);
874 if (r) {
875 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
876 goto ERROR;
877 }
878
879 ERROR:
880 if (ctx)
881 seccomp_release(ctx);
882
883 return r;
884 }
885
886 // UID/GID Mapping
887
888 static int pakfire_jail_write_uidgid_mapping(struct pakfire_jail* jail,
889 const char* path, uid_t mapped_id, size_t length) {
890 int r = 1;
891
892 // Open file for writing
893 FILE* f = fopen(path, "w");
894 if (!f) {
895 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
896 goto ERROR;
897 }
898
899 // Write configuration
900 int bytes_written = fprintf(f, "%d %d %ld\n", 0, mapped_id, length);
901 if (bytes_written <= 0) {
902 ERROR(jail->pakfire, "Could not write UID/GID mapping: %m\n");
903 goto ERROR;
904 }
905
906 // Close the file
907 r = fclose(f);
908 f = NULL;
909 if (r) {
910 ERROR(jail->pakfire, "Could not write UID/GID mapping: %m\n");
911
912 goto ERROR;
913 }
914
915 // Success
916 r = 0;
917
918 ERROR:
919 if (f)
920 fclose(f);
921
922 return r;
923 }
924
925 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
926 char path[PATH_MAX];
927 int r;
928
929 uid_t mapped_uid = 0;
930 const size_t length = 1;
931
932 // Fetch the UID of the calling process
933 uid_t uid = getuid();
934
935 // Have we been called by root?
936 if (uid == 0) {
937 mapped_uid = 0;
938
939 // Have we been called by an unprivileged user?
940 } else {
941 // XXX fetch SUBUID
942 mapped_uid = uid;
943 }
944
945 // Make path
946 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
947 if (r < 0)
948 return 1;
949
950 DEBUG(jail->pakfire, "Mapping UID range (%u - %lu)\n", mapped_uid, mapped_uid + length);
951
952 return pakfire_jail_write_uidgid_mapping(jail, path, mapped_uid, length);
953 }
954
955 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
956 char path[PATH_MAX];
957 int r;
958
959 gid_t mapped_gid = 0;
960 const size_t length = 1;
961
962 // Fetch the GID of the calling process
963 gid_t gid = getgid();
964
965 // Have we been called from the root group?
966 if (gid == 0) {
967 mapped_gid = 0;
968
969 // Have we been called by an unprivileged group?
970 } else {
971 // XXX fetch SUBGID
972 mapped_gid = gid;
973 }
974
975 // Make path
976 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
977 if (r < 0)
978 return 1;
979
980 DEBUG(jail->pakfire, "Mapping GID range (%u - %lu)\n", mapped_gid, mapped_gid + length);
981
982 return pakfire_jail_write_uidgid_mapping(jail, path, mapped_gid, length);
983 }
984
985 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
986 char path[PATH_MAX];
987 int r = 1;
988
989 // Make path
990 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
991 if (r < 0)
992 return 1;
993
994 // Open file for writing
995 FILE* f = fopen(path, "w");
996 if (!f) {
997 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
998 goto ERROR;
999 }
1000
1001 // Write content
1002 int bytes_written = fprintf(f, "deny\n");
1003 if (bytes_written <= 0) {
1004 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
1005 goto ERROR;
1006 }
1007
1008 r = fclose(f);
1009 f = NULL;
1010 if (r) {
1011 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
1012 goto ERROR;
1013 }
1014
1015 ERROR:
1016 if (f)
1017 fclose(f);
1018
1019 return r;
1020 }
1021
1022 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1023 const uint64_t val = 1;
1024 int r = 0;
1025
1026 DEBUG(jail->pakfire, "Sending signal...\n");
1027
1028 // Write to the file descriptor
1029 ssize_t bytes_written = write(fd, &val, sizeof(val));
1030 if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
1031 ERROR(jail->pakfire, "Could not send signal: %m\n");
1032 r = 1;
1033 }
1034
1035 // Close the file descriptor
1036 close(fd);
1037
1038 return r;
1039 }
1040
1041 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1042 uint64_t val = 0;
1043 int r = 0;
1044
1045 DEBUG(jail->pakfire, "Waiting for signal...\n");
1046
1047 ssize_t bytes_read = read(fd, &val, sizeof(val));
1048 if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
1049 ERROR(jail->pakfire, "Error waiting for signal: %m\n");
1050 r = 1;
1051 }
1052
1053 // Close the file descriptor
1054 close(fd);
1055
1056 return r;
1057 }
1058
1059 /*
1060 Performs the initialisation that needs to happen in the parent part
1061 */
1062 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1063 int r;
1064
1065 // Setup UID mapping
1066 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
1067 if (r)
1068 return r;
1069
1070 // Write "deny" to /proc/PID/setgroups
1071 r = pakfire_jail_setgroups(jail, ctx->pid);
1072 if (r)
1073 return r;
1074
1075 // Setup GID mapping
1076 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
1077 if (r)
1078 return r;
1079
1080 // Parent has finished initialisation
1081 DEBUG(jail->pakfire, "Parent has finished initialization\n");
1082
1083 // Send signal to client
1084 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1085 if (r)
1086 return r;
1087
1088 return 0;
1089 }
1090
1091 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
1092 const char* argv[]) {
1093 int r;
1094
1095 // Redirect any logging to our log pipe
1096 pakfire_set_log_callback(jail->pakfire, pakfire_jail_log, &ctx->pipes);
1097
1098 // Fetch my own PID
1099 pid_t pid = getpid();
1100
1101 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
1102
1103 // Log argv
1104 for (unsigned int i = 0; argv[i]; i++)
1105 DEBUG(jail->pakfire, " argv[%d] = %s\n", i, argv[i]);
1106
1107 // Wait for the parent to finish initialization
1108 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1109 if (r)
1110 return r;
1111
1112 // Perform further initialization
1113
1114 // Fetch UID/GID
1115 uid_t uid = getuid();
1116 gid_t gid = getgid();
1117
1118 // Fetch EUID/EGID
1119 uid_t euid = geteuid();
1120 gid_t egid = getegid();
1121
1122 DEBUG(jail->pakfire, " UID: %d (effective %d)\n", uid, euid);
1123 DEBUG(jail->pakfire, " GID: %d (effective %d)\n", gid, egid);
1124
1125 // Check if we are (effectively running as root)
1126 if (uid != 0 || gid != 0) {
1127 ERROR(jail->pakfire, "Child process is not running as root\n");
1128 return 126;
1129 }
1130
1131 const char* root = pakfire_get_path(jail->pakfire);
1132 const char* arch = pakfire_get_arch(jail->pakfire);
1133
1134 // Change root (unless root is /)
1135 if (!pakfire_on_root(jail->pakfire)) {
1136 // Mount everything
1137 r = pakfire_mount_all(jail->pakfire);
1138 if (r)
1139 return r;
1140
1141 // Log all mountpoints
1142 pakfire_mount_list(jail->pakfire);
1143
1144 // Call chroot()
1145 r = chroot(root);
1146 if (r) {
1147 ERROR(jail->pakfire, "chroot() to %s failed: %m\n", root);
1148 return 1;
1149 }
1150
1151 // Change directory to /
1152 r = chdir("/");
1153 if (r) {
1154 ERROR(jail->pakfire, "chdir() after chroot() failed: %m\n");
1155 return 1;
1156 }
1157 }
1158
1159 // Set personality
1160 unsigned long persona = pakfire_arch_personality(arch);
1161 if (persona) {
1162 r = personality(persona);
1163 if (r < 0) {
1164 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1165 return 1;
1166 }
1167 }
1168
1169 // Set nice level
1170 if (jail->nice) {
1171 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1172
1173 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1174 if (r) {
1175 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1176 return 1;
1177 }
1178 }
1179
1180 // Close other end of log pipes
1181 close(ctx->pipes.log_INFO[0]);
1182 close(ctx->pipes.log_ERROR[0]);
1183 #ifdef ENABLE_DEBUG
1184 close(ctx->pipes.log_DEBUG[0]);
1185 #endif /* ENABLE_DEBUG */
1186
1187 // Connect standard output and error
1188 if (ctx->pipes.stdout[1] && ctx->pipes.stderr[1]) {
1189 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1190 if (r < 0) {
1191 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1192 ctx->pipes.stdout[1]);
1193
1194 return 1;
1195 }
1196
1197 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1198 if (r < 0) {
1199 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1200 ctx->pipes.stderr[1]);
1201
1202 return 1;
1203 }
1204
1205 // Close the pipe (as we have moved the original file descriptors)
1206 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1207 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
1208 }
1209
1210 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1211 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1212 if (r)
1213 return r;
1214
1215 // Drop capabilities
1216 r = pakfire_jail_drop_capabilities(jail);
1217 if (r)
1218 return r;
1219
1220 // Filter syscalls
1221 r = pakfire_jail_limit_syscalls(jail);
1222 if (r)
1223 return r;
1224
1225 // exec() command
1226 r = execvpe(argv[0], (char**)argv, jail->env);
1227 if (r < 0)
1228 ERROR(jail->pakfire, "Could not execve(): %m\n");
1229
1230 // Translate errno into regular exit code
1231 switch (errno) {
1232 case ENOENT:
1233 r = 127;
1234 break;
1235
1236 default:
1237 r = 1;
1238 }
1239
1240 // We should not get here
1241 return r;
1242 }
1243
1244 // Run a command in the jail
1245 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[]) {
1246 int exit = -1;
1247 int r;
1248
1249 // Check if argv is valid
1250 if (!argv || !argv[0]) {
1251 errno = EINVAL;
1252 return -1;
1253 }
1254
1255 // Initialize context for this call
1256 struct pakfire_jail_exec ctx = {
1257 .pipes = {
1258 .stdout = { 0, 0 },
1259 .stderr = { 0, 0 },
1260 },
1261 };
1262
1263 DEBUG(jail->pakfire, "Executing jail...\n");
1264
1265 /*
1266 Setup a file descriptor which can be used to notify the client that the parent
1267 has completed configuration.
1268 */
1269 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1270 if (ctx.completed_fd < 0) {
1271 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1272 return -1;
1273 }
1274
1275 // Create pipes to communicate with child process if we are not running interactively
1276 if (!pakfire_jail_has_flag(jail, PAKFIRE_JAIL_INTERACTIVE)) {
1277 // stdout
1278 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
1279 if (r)
1280 goto ERROR;
1281
1282 // stderr
1283 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
1284 if (r)
1285 goto ERROR;
1286 }
1287
1288 // Setup pipes for logging
1289 // INFO
1290 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
1291 if (r)
1292 goto ERROR;
1293
1294 // ERROR
1295 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
1296 if (r)
1297 goto ERROR;
1298
1299 #ifdef ENABLE_DEBUG
1300 // DEBUG
1301 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
1302 if (r)
1303 goto ERROR;
1304 #endif /* ENABLE_DEBUG */
1305
1306 // Configure child process
1307 struct clone_args args = {
1308 .flags =
1309 CLONE_NEWCGROUP |
1310 CLONE_NEWIPC |
1311 CLONE_NEWNS |
1312 CLONE_NEWPID |
1313 CLONE_NEWUSER |
1314 CLONE_NEWUTS |
1315 CLONE_PIDFD,
1316 .exit_signal = SIGCHLD,
1317 .pidfd = (long long unsigned int)&ctx.pidfd,
1318 };
1319
1320 // Launch the process in a cgroup (if requested)
1321 if (jail->cgroup) {
1322 args.flags |= CLONE_INTO_CGROUP;
1323
1324 // Clone into this cgroup
1325 args.cgroup = pakfire_cgroup_fd(jail->cgroup);
1326 }
1327
1328 // Fork this process
1329 ctx.pid = clone3(&args, sizeof(args));
1330 if (ctx.pid < 0) {
1331 ERROR(jail->pakfire, "Could not clone: %m\n");
1332 return -1;
1333
1334 // Child process
1335 } else if (ctx.pid == 0) {
1336 r = pakfire_jail_child(jail, &ctx, argv);
1337 _exit(r);
1338 }
1339
1340 // Parent process
1341 r = pakfire_jail_parent(jail, &ctx);
1342 if (r)
1343 goto ERROR;
1344
1345 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
1346
1347 // Read output of the child process
1348 r = pakfire_jail_wait(jail, &ctx);
1349 if (r)
1350 goto ERROR;
1351
1352 // Handle exit status
1353 switch (ctx.status.si_code) {
1354 case CLD_EXITED:
1355 DEBUG(jail->pakfire, "The child process exited with code %d\n",
1356 ctx.status.si_status);
1357
1358 // Pass exit code
1359 exit = ctx.status.si_status;
1360 break;
1361
1362 case CLD_KILLED:
1363 case CLD_DUMPED:
1364 ERROR(jail->pakfire, "The child process was killed\n");
1365 break;
1366
1367 // Log anything else
1368 default:
1369 ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
1370 break;
1371 }
1372
1373 ERROR:
1374 // Close any file descriptors
1375 pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
1376 pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
1377 if (ctx.pidfd)
1378 close(ctx.pidfd);
1379 pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
1380 pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
1381 pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
1382
1383 // Umount everything
1384 if (!pakfire_on_root(jail->pakfire))
1385 pakfire_umount_all(jail->pakfire);
1386
1387 return exit;
1388 }
1389
1390 PAKFIRE_EXPORT int pakfire_jail_exec(struct pakfire_jail* jail,
1391 const char* argv[], char*** output) {
1392 int r;
1393
1394 // Store logging callback
1395 pakfire_jail_log_callback log_callback = jail->log_callback;
1396 void* log_data = jail->log_data;
1397
1398 // Capture output if requested by user
1399 if (output)
1400 pakfire_jail_set_log_callback(jail, pakfire_jail_capture_stdout, output);
1401
1402 // Run exec()
1403 r = __pakfire_jail_exec(jail, argv);
1404
1405 // Restore log callback
1406 pakfire_jail_set_log_callback(jail, log_callback, log_data);
1407
1408 return r;
1409 }
1410
1411 PAKFIRE_EXPORT int pakfire_jail_exec_script(struct pakfire_jail* jail,
1412 const char* script, const size_t size, const char* args[], char*** output) {
1413 char path[PATH_MAX];
1414 const char** argv = NULL;
1415 int r;
1416
1417 const char* root = pakfire_get_path(jail->pakfire);
1418
1419 // Write the scriptlet to disk
1420 r = pakfire_path_join(path, root, "pakfire-script.XXXXXX");
1421 if (r < 0)
1422 goto ERROR;
1423
1424 // Open a temporary file
1425 int fd = mkstemp(path);
1426 if (fd < 0) {
1427 ERROR(jail->pakfire, "Could not open a temporary file: %m\n");
1428 r = 1;
1429 goto ERROR;
1430 }
1431
1432 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
1433
1434 // Write data
1435 ssize_t bytes_written = write(fd, script, size);
1436 if (bytes_written < (ssize_t)size) {
1437 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
1438 r = 1;
1439 goto ERROR;
1440 }
1441
1442 // Make the script executable
1443 r = fchmod(fd, S_IRUSR|S_IWUSR|S_IXUSR);
1444 if (r) {
1445 ERROR(jail->pakfire, "Could not set executable permissions on %s: %m\n", path);
1446 goto ERROR;
1447 }
1448
1449 // Close file
1450 r = close(fd);
1451 if (r) {
1452 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
1453 r = 1;
1454 goto ERROR;
1455 }
1456
1457 // Count how many arguments were passed
1458 unsigned int argc = 1;
1459 if (args) {
1460 for (const char** arg = args; *arg; arg++)
1461 argc++;
1462 }
1463
1464 argv = calloc(argc + 1, sizeof(*argv));
1465 if (!argv) {
1466 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
1467 goto ERROR;
1468 }
1469
1470 // Set command
1471 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
1472
1473 // Copy args
1474 for (unsigned int i = 1; i < argc; i++)
1475 argv[i] = args[i-1];
1476
1477 // Run the script
1478 r = pakfire_jail_exec(jail, argv, output);
1479
1480 ERROR:
1481 if (argv)
1482 free(argv);
1483
1484 // Remove script from disk
1485 if (*path)
1486 unlink(path);
1487
1488 return r;
1489 }
1490
1491 /*
1492 A convenience function that creates a new jail, runs the given command and destroys
1493 the jail again.
1494 */
1495 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char*** output) {
1496 struct pakfire_jail* jail = NULL;
1497 int r;
1498
1499 // Create a new jail
1500 r = pakfire_jail_create(&jail, pakfire, flags);
1501 if (r)
1502 goto ERROR;
1503
1504 // Execute the command
1505 r = pakfire_jail_exec(jail, argv, output);
1506
1507 ERROR:
1508 if (jail)
1509 pakfire_jail_unref(jail);
1510
1511 return r;
1512 }
1513
1514 int pakfire_jail_run_script(struct pakfire* pakfire,
1515 const char* script, const size_t length, const char* argv[], int flags, char*** output) {
1516 struct pakfire_jail* jail = NULL;
1517 int r;
1518
1519 // Create a new jail
1520 r = pakfire_jail_create(&jail, pakfire, flags);
1521 if (r)
1522 goto ERROR;
1523
1524 // Execute the command
1525 r = pakfire_jail_exec_script(jail, script, length, argv, output);
1526
1527 ERROR:
1528 if (jail)
1529 pakfire_jail_unref(jail);
1530
1531 return r;
1532 }
1533
1534
1535 int pakfire_jail_shell(struct pakfire* pakfire) {
1536 const char* argv[] = {
1537 "/bin/bash", "--login", NULL,
1538 };
1539
1540 // Execute /bin/bash
1541 return pakfire_jail_run(pakfire, argv, PAKFIRE_JAIL_INTERACTIVE, NULL);
1542 }
1543
1544 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
1545 char path[PATH_MAX];
1546
1547 const char* ldconfig = "/sbin/ldconfig";
1548
1549 // Check if ldconfig exists before calling it to avoid overhead
1550 int r = pakfire_make_path(pakfire, path, ldconfig);
1551 if (r < 0)
1552 return 1;
1553
1554 // Check if ldconfig is executable
1555 r = access(path, X_OK);
1556 if (r) {
1557 DEBUG(pakfire, "%s is not executable. Skipping...\n", ldconfig);
1558 return 0;
1559 }
1560
1561 const char* argv[] = {
1562 ldconfig, NULL,
1563 };
1564
1565 // Run ldconfig
1566 return pakfire_jail_run(pakfire, argv, 0, NULL);
1567 }
1568
1569 // Utility functions
1570
1571 PAKFIRE_EXPORT char* pakfire_jail_concat_output(struct pakfire_jail* jail,
1572 const char** input, size_t* length) {
1573 // Return nothing on no input
1574 if (!input)
1575 return NULL;
1576
1577 // XXX Maybe there is a more efficient way to do this
1578
1579 char* output = pakfire_string_join((char**)input, "");
1580 if (!output)
1581 return NULL;
1582
1583 // Store the length of the result
1584 if (length)
1585 *length = strlen(output);
1586
1587 return output;
1588 }