]> git.ipfire.org Git - people/ms/pakfire.git/blame - src/libpakfire/jail.c
file: Mark files as executable
[people/ms/pakfire.git] / src / libpakfire / jail.c
CommitLineData
fd37ccaf
MT
1/*#############################################################################
2# #
3# Pakfire - The IPFire package management system #
4# Copyright (C) 2022 Pakfire development team #
5# #
6# This program is free software: you can redistribute it and/or modify #
7# it under the terms of the GNU General Public License as published by #
8# the Free Software Foundation, either version 3 of the License, or #
9# (at your option) any later version. #
10# #
11# This program is distributed in the hope that it will be useful, #
12# but WITHOUT ANY WARRANTY; without even the implied warranty of #
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14# GNU General Public License for more details. #
15# #
16# You should have received a copy of the GNU General Public License #
17# along with this program. If not, see <http://www.gnu.org/licenses/>. #
18# #
19#############################################################################*/
20
32d5f21d 21#include <errno.h>
bcf09bf5 22#include <fcntl.h>
980b15af 23#include <linux/capability.h>
0bd84dc1 24#include <linux/sched.h>
58ee649f 25#include <sys/wait.h>
4f23b498 26#include <linux/wait.h>
0bd84dc1
MT
27#include <sched.h>
28#include <signal.h>
32d5f21d 29#include <stdlib.h>
0bd84dc1 30#include <syscall.h>
980b15af 31#include <sys/capability.h>
616f1fca 32#include <sys/epoll.h>
43dc0e16 33#include <sys/eventfd.h>
7bdf1d8e 34#include <sys/mount.h>
90d92b5c 35#include <sys/personality.h>
980b15af 36#include <sys/prctl.h>
cf440db8 37#include <sys/resource.h>
335b8a44 38#include <sys/timerfd.h>
0bd84dc1
MT
39#include <sys/types.h>
40#include <sys/wait.h>
32d5f21d 41
739d5b57
MT
42// libseccomp
43#include <seccomp.h>
44
ae5201c5
MT
45// libuuid
46#include <uuid.h>
47
90d92b5c 48#include <pakfire/arch.h>
e3ddb498 49#include <pakfire/cgroup.h>
fd37ccaf 50#include <pakfire/jail.h>
4f59c39b
MT
51#include <pakfire/logging.h>
52#include <pakfire/mount.h>
fd37ccaf 53#include <pakfire/pakfire.h>
6ce56f90 54#include <pakfire/private.h>
4896e62c 55#include <pakfire/pwd.h>
d973a13d 56#include <pakfire/string.h>
32d5f21d
MT
57#include <pakfire/util.h>
58
616f1fca
MT
59#define BUFFER_SIZE 1024 * 64
60#define ENVIRON_SIZE 128
61#define EPOLL_MAX_EVENTS 2
cc6e2264 62#define MAX_MOUNTPOINTS 8
fd37ccaf 63
d5bc8fe0
MT
64// The default environment that will be set for every command
65static const struct environ {
66 const char* key;
67 const char* val;
68} ENV[] = {
fbe95cdf 69 { "HOME", "/root" },
55f54757 70 { "LANG", "C.utf-8" },
a7ad6d4f 71 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
d5bc8fe0 72 { "TERM", "vt100" },
3bf01105
MT
73
74 // Tell everything that it is running inside a Pakfire container
75 { "container", "pakfire" },
d5bc8fe0
MT
76 { NULL, NULL },
77};
78
cc6e2264
MT
79struct pakfire_jail_mountpoint {
80 char source[PATH_MAX];
81 char target[PATH_MAX];
82 int flags;
83};
84
fd37ccaf
MT
85struct pakfire_jail {
86 struct pakfire* pakfire;
87 int nrefs;
32d5f21d 88
ae5201c5
MT
89 // A unique ID for each jail
90 uuid_t uuid;
91 char __uuid[UUID_STR_LEN];
92
d639929b
MT
93 // Flags
94 int flags;
95
cf440db8
MT
96 // Resource Limits
97 int nice;
98
335b8a44
MT
99 // Timeout
100 struct itimerspec timeout;
101
15503538
MT
102 // CGroup
103 struct pakfire_cgroup* cgroup;
104
32d5f21d
MT
105 // Environment
106 char* env[ENVIRON_SIZE];
616f1fca 107
cc6e2264
MT
108 // Mountpoints
109 struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
110 unsigned int num_mountpoints;
616f1fca
MT
111};
112
113struct pakfire_log_buffer {
114 char data[BUFFER_SIZE];
115 size_t used;
116};
117
7bdf1d8e
MT
118enum pakfire_jail_exec_flags {
119 PAKFIRE_JAIL_HAS_NETWORKING = (1 << 0),
120};
121
616f1fca 122struct pakfire_jail_exec {
7bdf1d8e
MT
123 int flags;
124
616f1fca
MT
125 // PID (of the child)
126 pid_t pid;
d853213d 127 int pidfd;
616f1fca 128
d853213d
MT
129 // Process status (from waitid)
130 siginfo_t status;
616f1fca 131
f7d240a7
MT
132 // FD to notify the client that the parent has finished initialization
133 int completed_fd;
134
616f1fca 135 // Log pipes
e33387d3 136 struct pakfire_jail_pipes {
2015cb92 137 int stdin[2];
616f1fca
MT
138 int stdout[2];
139 int stderr[2];
e33387d3
MT
140
141 // Logging
142 int log_INFO[2];
143 int log_ERROR[2];
144 int log_DEBUG[2];
616f1fca
MT
145 } pipes;
146
2015cb92
MT
147 // Communicate
148 struct pakfire_jail_communicate {
149 pakfire_jail_communicate_in in;
150 pakfire_jail_communicate_out out;
151 void* data;
152 } communicate;
153
616f1fca 154 // Log buffers
e33387d3 155 struct pakfire_jail_buffers {
616f1fca
MT
156 struct pakfire_log_buffer stdout;
157 struct pakfire_log_buffer stderr;
e33387d3
MT
158
159 // Logging
160 struct pakfire_log_buffer log_INFO;
161 struct pakfire_log_buffer log_ERROR;
162 struct pakfire_log_buffer log_DEBUG;
616f1fca 163 } buffers;
aca565fc
MT
164
165 struct pakfire_cgroup* cgroup;
6b7cf275 166 struct pakfire_cgroup_stats cgroup_stats;
fd37ccaf
MT
167};
168
0bd84dc1
MT
169static int clone3(struct clone_args* args, size_t size) {
170 return syscall(__NR_clone3, args, size);
171}
172
335b8a44
MT
173static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
174 return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
175}
176
7bdf1d8e
MT
177static int pakfire_jail_exec_has_flag(
178 const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
179 return ctx->flags & flag;
180}
181
d5bc8fe0
MT
182static void pakfire_jail_free(struct pakfire_jail* jail) {
183 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
184
185 // Free environment
186 for (unsigned int i = 0; jail->env[i]; i++)
187 free(jail->env[i]);
188
d34b1e00
MT
189 if (jail->cgroup)
190 pakfire_cgroup_unref(jail->cgroup);
191
d5bc8fe0
MT
192 pakfire_unref(jail->pakfire);
193 free(jail);
194}
195
e33387d3
MT
196/*
197 Passes any log messages on to the default pakfire log callback
198*/
fed41508
MT
199static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
200 int priority, const char* line, size_t length) {
201 switch (priority) {
202 case LOG_INFO:
203 INFO(pakfire, "%s", line);
204 break;
205
206 case LOG_ERR:
207 ERROR(pakfire, "%s", line);
208 break;
e33387d3
MT
209
210#ifdef ENABLE_DEBUG
211 case LOG_DEBUG:
212 DEBUG(pakfire, "%s", line);
213 break;
214#endif
fed41508
MT
215 }
216
217 return 0;
218}
219
3bf01105
MT
220static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
221 if (!*jail->__uuid)
222 uuid_unparse_lower(jail->uuid, jail->__uuid);
223
224 return jail->__uuid;
225}
226
00ba1d9a
MT
227static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
228 // Set PS1
229 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
230 if (r)
231 return r;
232
233 // Copy TERM
234 char* TERM = secure_getenv("TERM");
235 if (TERM) {
236 r = pakfire_jail_set_env(jail, "TERM", TERM);
237 if (r)
238 return r;
239 }
240
241 // Copy LANG
242 char* LANG = secure_getenv("LANG");
243 if (LANG) {
244 r = pakfire_jail_set_env(jail, "LANG", LANG);
245 if (r)
246 return r;
247 }
248
249 return 0;
250}
251
6ce56f90
MT
252PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail,
253 struct pakfire* pakfire, int flags) {
d5bc8fe0
MT
254 int r;
255
256 // Allocate a new jail
fd37ccaf
MT
257 struct pakfire_jail* j = calloc(1, sizeof(*j));
258 if (!j)
259 return 1;
260
261 // Reference Pakfire
262 j->pakfire = pakfire_ref(pakfire);
263
264 // Initialize reference counter
265 j->nrefs = 1;
266
d639929b
MT
267 // Store flags
268 j->flags = flags;
269
ae5201c5
MT
270 // Generate a random UUID
271 uuid_generate_random(j->uuid);
272
84bd7655
MT
273 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
274
d5bc8fe0
MT
275 // Set default environment
276 for (const struct environ* e = ENV; e->key; e++) {
277 r = pakfire_jail_set_env(j, e->key, e->val);
278 if (r)
279 goto ERROR;
280 }
281
3bf01105
MT
282 // Set container UUID
283 r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
284 if (r)
285 goto ERROR;
286
367e708d
MT
287 // Disable systemctl to talk to systemd
288 if (!pakfire_on_root(j->pakfire)) {
289 r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
290 if (r)
291 goto ERROR;
292 }
293
fd37ccaf
MT
294 // Done
295 *jail = j;
296 return 0;
84bd7655 297
d5bc8fe0
MT
298ERROR:
299 pakfire_jail_free(j);
32d5f21d 300
d5bc8fe0 301 return r;
fd37ccaf
MT
302}
303
6ce56f90 304PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
fd37ccaf
MT
305 ++jail->nrefs;
306
307 return jail;
308}
309
6ce56f90 310PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
fd37ccaf
MT
311 if (--jail->nrefs > 0)
312 return jail;
313
314 pakfire_jail_free(jail);
315 return NULL;
316}
32d5f21d 317
cf440db8
MT
318// Resource Limits
319
320PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
321 // Check if nice level is in range
322 if (nice < -19 || nice > 20) {
323 errno = EINVAL;
324 return 1;
325 }
326
327 // Store nice level
328 jail->nice = nice;
329
330 return 0;
331}
332
15503538
MT
333int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
334 // Free any previous cgroup
335 if (jail->cgroup) {
336 pakfire_cgroup_unref(jail->cgroup);
337 jail->cgroup = NULL;
338 }
339
340 // Set any new cgroup
341 if (cgroup) {
342 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
343
344 jail->cgroup = pakfire_cgroup_ref(cgroup);
345 }
346
347 // Done
348 return 0;
349}
350
32d5f21d
MT
351// Environment
352
353// Returns the length of the environment
354static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
355 unsigned int i = 0;
356
357 // Count everything in the environment
358 for (char** e = jail->env; *e; e++)
359 i++;
360
361 return i;
362}
363
364// Finds an existing environment variable and returns its index or -1 if not found
365static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
366 if (!key) {
367 errno = EINVAL;
368 return -1;
369 }
370
371 char buffer[strlen(key) + 2];
372 pakfire_string_format(buffer, "%s=", key);
373
374 for (unsigned int i = 0; jail->env[i]; i++) {
375 if (pakfire_string_startswith(jail->env[i], buffer))
376 return i;
377 }
378
379 // Nothing found
380 return -1;
381}
382
383// Returns the value of an environment variable or NULL
6ce56f90
MT
384PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
385 const char* key) {
32d5f21d
MT
386 int i = pakfire_jail_find_env(jail, key);
387 if (i < 0)
388 return NULL;
389
390 return jail->env[i] + strlen(key) + 1;
391}
392
393// Sets an environment variable
6ce56f90
MT
394PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
395 const char* key, const char* value) {
32d5f21d
MT
396 // Find the index where to write this value to
397 int i = pakfire_jail_find_env(jail, key);
398 if (i < 0)
399 i = pakfire_jail_env_length(jail);
400
401 // Return -ENOSPC when the environment is full
402 if (i >= ENVIRON_SIZE) {
403 errno = ENOSPC;
404 return -1;
405 }
406
407 // Free any previous value
408 if (jail->env[i])
409 free(jail->env[i]);
410
411 // Format and set environment variable
412 asprintf(&jail->env[i], "%s=%s", key, value);
413
414 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
415
416 return 0;
417}
9f50bf71 418
939025e7 419// Imports an environment
6ce56f90 420PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
939025e7
MT
421 if (!env)
422 return 0;
423
424 char* key;
425 char* val;
426 int r;
427
428 // Copy environment variables
429 for (unsigned int i = 0; env[i]; i++) {
430 r = pakfire_string_partition(env[i], "=", &key, &val);
431 if (r)
432 continue;
433
434 // Set value
435 r = pakfire_jail_set_env(jail, key, val);
436
437 if (key)
438 free(key);
439 if (val)
440 free(val);
441
442 // Break on error
443 if (r)
444 return r;
445 }
446
447 return 0;
448}
449
335b8a44
MT
450// Timeout
451
452PAKFIRE_EXPORT int pakfire_jail_set_timeout(
453 struct pakfire_jail* jail, unsigned int timeout) {
454 // Store value
455 jail->timeout.it_value.tv_sec = timeout;
456
457 if (timeout > 0)
458 DEBUG(jail->pakfire, "Timeout set to %d second(s)\n", timeout);
459 else
460 DEBUG(jail->pakfire, "Timeout disabled\n");
461
462 return 0;
463}
464
465static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
466 int r;
467
468 // Nothing to do if no timeout has been set
469 if (!jail->timeout.it_value.tv_sec)
470 return -1;
471
472 // Create a new timer
473 const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
474 if (fd < 0) {
475 ERROR(jail->pakfire, "Could not create timer: %m\n");
476 goto ERROR;
477 }
478
479 // Arm timer
480 r = timerfd_settime(fd, 0, &jail->timeout, NULL);
481 if (r) {
482 ERROR(jail->pakfire, "Could not arm timer: %m\n");
483 goto ERROR;
484 }
485
486 return fd;
487
488ERROR:
489 if (fd > 0)
490 close(fd);
491
492 return -1;
493}
494
e33387d3
MT
495/*
496 This function replaces any logging in the child process.
497
498 All log messages will be sent to the parent process through their respective pipes.
499*/
500static void pakfire_jail_log(void* data, int priority, const char* file,
501 int line, const char* fn, const char* format, va_list args) {
502 struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
503 int fd;
504
505 switch (priority) {
506 case LOG_INFO:
507 fd = pipes->log_INFO[1];
508 break;
509
510 case LOG_ERR:
511 fd = pipes->log_ERROR[1];
512 break;
513
514#ifdef ENABLE_DEBUG
515 case LOG_DEBUG:
516 fd = pipes->log_DEBUG[1];
517 break;
518#endif /* ENABLE_DEBUG */
519
520 // Ignore any messages of an unknown priority
521 default:
522 return;
523 }
524
525 // Send the log message
526 if (fd)
527 vdprintf(fd, format, args);
528}
529
616f1fca
MT
530static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
531 return (sizeof(buffer->data) == buffer->used);
532}
533
534/*
535 This function reads as much data as it can from the file descriptor.
536 If it finds a whole line in it, it will send it to the logger and repeat the process.
537 If not newline character is found, it will try to read more data until it finds one.
538*/
539static int pakfire_jail_handle_log(struct pakfire_jail* jail,
e33387d3 540 struct pakfire_jail_exec* ctx, int priority, int fd,
2015cb92 541 struct pakfire_log_buffer* buffer, pakfire_jail_communicate_out callback, void* data) {
616f1fca
MT
542 char line[BUFFER_SIZE + 1];
543
544 // Fill up buffer from fd
545 if (buffer->used < sizeof(buffer->data)) {
546 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
547 sizeof(buffer->data) - buffer->used);
548
549 // Handle errors
550 if (bytes_read < 0) {
551 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
552 return -1;
553 }
554
555 // Update buffer size
556 buffer->used += bytes_read;
557 }
558
559 // See if we have any lines that we can write
560 while (buffer->used) {
561 // Search for the end of the first line
562 char* eol = memchr(buffer->data, '\n', buffer->used);
563
564 // No newline found
565 if (!eol) {
566 // If the buffer is full, we send the content to the logger and try again
567 // This should not happen in practise
568 if (pakfire_jail_log_buffer_is_full(buffer)) {
569 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
570
571 eol = buffer->data + sizeof(buffer->data) - 1;
572
573 // Otherwise we might have only read parts of the output
574 } else
575 break;
576 }
577
578 // Find the length of the string
579 size_t length = eol - buffer->data + 1;
580
581 // Copy the line into the buffer
582 memcpy(line, buffer->data, length);
583
584 // Terminate the string
585 line[length] = '\0';
586
587 // Log the line
e33387d3
MT
588 if (callback) {
589 int r = callback(jail->pakfire, data, priority, line, length);
616f1fca
MT
590 if (r) {
591 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
592 return r;
593 }
594 }
595
596 // Remove line from buffer
597 memmove(buffer->data, buffer->data + length, buffer->used - length);
598 buffer->used -= length;
599 }
600
601 return 0;
602}
603
06b864ae
MT
604static int pakfire_jail_stream_stdin(struct pakfire_jail* jail,
605 struct pakfire_jail_exec* ctx, const int fd) {
606 int r;
607
608 // Nothing to do if there is no stdin callback set
609 if (!ctx->communicate.in) {
610 DEBUG(jail->pakfire, "Callback for standard input is not set\n");
611 return 0;
612 }
613
f5a70a96
MT
614 // Skip if the writing pipe has already been closed
615 if (!ctx->pipes.stdin[1])
616 return 0;
617
06b864ae
MT
618 DEBUG(jail->pakfire, "Streaming standard input...\n");
619
620 // Calling the callback
621 r = ctx->communicate.in(jail->pakfire, ctx->communicate.data, fd);
622
623 DEBUG(jail->pakfire, "Standard input callback finished: %d\n", r);
624
625 // The callback signaled that it has written everything
626 if (r == EOF) {
627 DEBUG(jail->pakfire, "Closing standard input pipe\n");
628
f5a70a96 629 // Close the file-descriptor
06b864ae 630 close(fd);
f5a70a96
MT
631
632 // Reset the file-descriptor so it won't be closed again later
633 ctx->pipes.stdin[1] = 0;
634
635 // Report success
06b864ae
MT
636 r = 0;
637 }
638
639 return r;
640}
641
195fe455
MT
642static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
643 int r = pipe2(*fds, flags);
644 if (r < 0) {
645 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
646 return 1;
647 }
648
649 return 0;
650}
651
652static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
653 for (unsigned int i = 0; i < 2; i++)
654 if (fds[i])
655 close(fds[i]);
656}
657
e33387d3
MT
658/*
659 This is a convenience function to fetch the reading end of a pipe and
660 closes the write end.
661*/
06b864ae 662static int pakfire_jail_get_pipe_to_read(struct pakfire_jail* jail, int (*fds)[2]) {
e33387d3
MT
663 // Give the variables easier names to avoid confusion
664 int* fd_read = &(*fds)[0];
665 int* fd_write = &(*fds)[1];
666
667 // Close the write end of the pipe
668 if (*fd_write) {
669 close(*fd_write);
670 *fd_write = 0;
671 }
672
673 // Return the read end
674 return *fd_read;
675}
676
06b864ae
MT
677static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
678 // Give the variables easier names to avoid confusion
679 int* fd_read = &(*fds)[0];
680 int* fd_write = &(*fds)[1];
681
682 // Close the read end of the pipe
683 if (*fd_read) {
684 close(*fd_read);
685 *fd_read = 0;
686 }
687
688 // Return the write end
689 return *fd_write;
690}
691
d853213d 692static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
616f1fca
MT
693 int epollfd = -1;
694 struct epoll_event ev;
695 struct epoll_event events[EPOLL_MAX_EVENTS];
335b8a44 696 char garbage[8];
616f1fca
MT
697 int r = 0;
698
699 // Fetch file descriptors from context
06b864ae
MT
700 const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
701 const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
702 const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
d853213d 703 const int pidfd = ctx->pidfd;
616f1fca 704
335b8a44
MT
705 // Timer
706 const int timerfd = pakfire_jail_create_timer(jail);
707
e33387d3 708 // Logging
06b864ae
MT
709 const int log_INFO = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
710 const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
711 const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
7ebfb7cb
MT
712
713 // Make a list of all file descriptors we are interested in
d853213d 714 int fds[] = {
335b8a44 715 stdin, stdout, stderr, pidfd, timerfd, log_INFO, log_ERROR, log_DEBUG,
616f1fca
MT
716 };
717
718 // Setup epoll
719 epollfd = epoll_create1(0);
720 if (epollfd < 0) {
721 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
722 r = 1;
d853213d 723 goto ERROR;
616f1fca
MT
724 }
725
616f1fca 726 // Turn file descriptors into non-blocking mode and add them to epoll()
3aad8c0d 727 for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
616f1fca
MT
728 int fd = fds[i];
729
d853213d
MT
730 // Skip fds which were not initialized
731 if (fd <= 0)
732 continue;
733
e48beb24
MT
734 ev.events = EPOLLHUP;
735
736 if (fd == stdin)
737 ev.events |= EPOLLOUT;
738 else
739 ev.events |= EPOLLIN;
740
06b864ae
MT
741 // Read flags
742 int flags = fcntl(fd, F_GETFL, 0);
743
744 // Set modified flags
745 if (fcntl(fd, F_SETFL, flags|O_NONBLOCK) < 0) {
746 ERROR(jail->pakfire,
747 "Could not set file descriptor %d into non-blocking mode: %m\n", fd);
748 r = 1;
749 goto ERROR;
750 }
751
616f1fca
MT
752 ev.data.fd = fd;
753
754 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
755 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
756 r = 1;
d853213d 757 goto ERROR;
616f1fca
MT
758 }
759 }
760
761 int ended = 0;
762
763 // Loop for as long as the process is alive
764 while (!ended) {
616f1fca
MT
765 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
766 if (num < 1) {
767 // Ignore if epoll_wait() has been interrupted
768 if (errno == EINTR)
769 continue;
770
771 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
772 r = 1;
773
d853213d 774 goto ERROR;
616f1fca
MT
775 }
776
616f1fca 777 for (int i = 0; i < num; i++) {
e068b964 778 int e = events[i].events;
616f1fca
MT
779 int fd = events[i].data.fd;
780
e33387d3 781 struct pakfire_log_buffer* buffer = NULL;
2015cb92 782 pakfire_jail_communicate_out callback = NULL;
e33387d3
MT
783 void* data = NULL;
784 int priority;
785
e068b964
MT
786 // Check if there is any data to be read
787 if (e & EPOLLIN) {
788 // Handle any changes to the PIDFD
789 if (fd == pidfd) {
790 // Call waidid() and store the result
791 r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
792 if (r) {
793 ERROR(jail->pakfire, "waitid() failed: %m\n");
794 goto ERROR;
795 }
d853213d 796
e068b964
MT
797 // Mark that we have ended so that we will process the remaining
798 // events from epoll() now, but won't restart the outer loop.
799 ended = 1;
800 continue;
d853213d 801
335b8a44
MT
802 // Handle timer events
803 } else if (fd == timerfd) {
804 DEBUG(jail->pakfire, "Timer event received\n");
805
806 // Disarm the timer
807 r = read(timerfd, garbage, sizeof(garbage));
808 if (r < 1) {
809 ERROR(jail->pakfire, "Could not disarm timer: %m\n");
810 r = 1;
811 goto ERROR;
812 }
813
814 // Terminate the process if it hasn't already ended
815 if (!ended) {
816 DEBUG(jail->pakfire, "Terminating process...\n");
817
818 // Send SIGTERM to the process
819 r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
820 if (r) {
821 ERROR(jail->pakfire, "Could not kill process: %m\n");
822 goto ERROR;
823 }
824 }
825
826 // There is nothing else to do
827 continue;
828
e068b964
MT
829 // Handle logging messages
830 } else if (fd == log_INFO) {
831 buffer = &ctx->buffers.log_INFO;
832 priority = LOG_INFO;
e33387d3 833
e068b964 834 callback = pakfire_jail_default_log_callback;
e33387d3 835
e068b964
MT
836 } else if (fd == log_ERROR) {
837 buffer = &ctx->buffers.log_ERROR;
838 priority = LOG_ERR;
e33387d3 839
e068b964 840 callback = pakfire_jail_default_log_callback;
e33387d3 841
e068b964
MT
842 } else if (fd == log_DEBUG) {
843 buffer = &ctx->buffers.log_DEBUG;
844 priority = LOG_DEBUG;
e33387d3 845
e068b964 846 callback = pakfire_jail_default_log_callback;
e33387d3 847
e068b964
MT
848 // Handle anything from the log pipes
849 } else if (fd == stdout) {
850 buffer = &ctx->buffers.stdout;
851 priority = LOG_INFO;
616f1fca 852
2015cb92
MT
853 callback = ctx->communicate.out;
854 data = ctx->communicate.data;
e33387d3 855
e068b964
MT
856 } else if (fd == stderr) {
857 buffer = &ctx->buffers.stderr;
858 priority = LOG_ERR;
616f1fca 859
2015cb92
MT
860 callback = ctx->communicate.out;
861 data = ctx->communicate.data;
e33387d3 862
e068b964
MT
863 } else {
864 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
865 continue;
866 }
867
868 // Handle log event
869 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
870 if (r)
871 goto ERROR;
616f1fca
MT
872 }
873
06b864ae
MT
874 if (e & EPOLLOUT) {
875 // Handle standard input
876 if (fd == stdin) {
877 r = pakfire_jail_stream_stdin(jail, ctx, fd);
878 if (r) {
879 switch (errno) {
880 // Ignore if we filled up the buffer
881 case EAGAIN:
882 break;
883
884 default:
885 ERROR(jail->pakfire, "Could not write to stdin: %m\n");
886 goto ERROR;
887 }
888 }
889 }
890 }
891
e068b964
MT
892 // Check if any file descriptors have been closed
893 if (e & EPOLLHUP) {
894 // Remove the file descriptor
895 r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
896 if (r) {
897 ERROR(jail->pakfire, "Could not remove closed file-descriptor %d: %m\n", fd);
898 goto ERROR;
899 }
900 }
616f1fca
MT
901 }
902 }
903
d853213d 904ERROR:
616f1fca
MT
905 if (epollfd > 0)
906 close(epollfd);
335b8a44
MT
907 if (timerfd > 0)
908 close(timerfd);
616f1fca
MT
909
910 return r;
911}
912
ccdd2e95
MT
913int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data,
914 int priority, const char* line, size_t length) {
12b9b39f
MT
915 char** output = (char**)data;
916 int r;
0de6bb30 917
2015cb92
MT
918 // Append everything from stdout to a buffer
919 if (output && priority == LOG_INFO) {
12b9b39f
MT
920 r = asprintf(output, "%s%s", (output && *output) ? *output : "", line);
921 if (r < 0)
0de6bb30 922 return 1;
0de6bb30
MT
923 return 0;
924 }
925
926 // Send everything else to the default logger
927 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
928}
929
980b15af
MT
930// Capabilities
931
932static int pakfire_jail_drop_capabilities(struct pakfire_jail* jail) {
933 const int capabilities[] = {
934 // Deny access to the kernel's audit system
935 CAP_AUDIT_CONTROL,
936 CAP_AUDIT_READ,
937 CAP_AUDIT_WRITE,
938
939 // Deny suspending block devices
940 CAP_BLOCK_SUSPEND,
941
942 // Deny any stuff with BPF
943 CAP_BPF,
944
945 // Deny checkpoint restore
946 CAP_CHECKPOINT_RESTORE,
947
948 // Deny opening files by inode number (open_by_handle_at)
949 CAP_DAC_READ_SEARCH,
950
951 // Deny setting SUID bits
952 CAP_FSETID,
953
954 // Deny locking more memory
955 CAP_IPC_LOCK,
956
957 // Deny modifying any Apparmor/SELinux/SMACK configuration
958 CAP_MAC_ADMIN,
959 CAP_MAC_OVERRIDE,
960
961 // Deny creating any special devices
962 CAP_MKNOD,
963
964 // Deny setting any capabilities
965 CAP_SETFCAP,
966
967 // Deny reading from syslog
968 CAP_SYSLOG,
969
970 // Deny any admin actions (mount, sethostname, ...)
971 CAP_SYS_ADMIN,
972
973 // Deny rebooting the system
974 CAP_SYS_BOOT,
975
976 // Deny loading kernel modules
977 CAP_SYS_MODULE,
978
979 // Deny setting nice level
980 CAP_SYS_NICE,
981
982 // Deny access to /proc/kcore, /dev/mem, /dev/kmem
983 CAP_SYS_RAWIO,
984
985 // Deny circumventing any resource limits
986 CAP_SYS_RESOURCE,
987
988 // Deny setting the system time
989 CAP_SYS_TIME,
990
991 // Deny playing with suspend
992 CAP_WAKE_ALARM,
993
994 0,
995 };
996
997 DEBUG(jail->pakfire, "Dropping capabilities...\n");
998
999 size_t num_caps = 0;
1000 int r;
1001
1002 // Drop any capabilities
1003 for (const int* cap = capabilities; *cap; cap++) {
1004 r = prctl(PR_CAPBSET_DROP, *cap, 0, 0, 0);
1005 if (r) {
1006 ERROR(jail->pakfire, "Could not drop capability %d: %m\n", *cap);
1007 return r;
1008 }
1009
1010 num_caps++;
1011 }
1012
1013 // Fetch any capabilities
1014 cap_t caps = cap_get_proc();
1015 if (!caps) {
1016 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
1017 return 1;
1018 }
1019
1020 /*
1021 Set inheritable capabilities
1022
1023 This ensures that no processes will be able to gain any of the listed
1024 capabilities again.
1025 */
1026 r = cap_set_flag(caps, CAP_INHERITABLE, num_caps, capabilities, CAP_CLEAR);
1027 if (r) {
1028 ERROR(jail->pakfire, "cap_set_flag() failed: %m\n");
1029 goto ERROR;
1030 }
1031
1032 // Restore capabilities
1033 r = cap_set_proc(caps);
1034 if (r) {
1035 ERROR(jail->pakfire, "Could not restore capabilities: %m\n");
1036 goto ERROR;
1037 }
1038
1039ERROR:
1040 if (caps)
1041 cap_free(caps);
1042
1043 return r;
1044}
1045
739d5b57
MT
1046// Syscall Filter
1047
1048static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
1049 const int syscalls[] = {
1050 // The kernel's keyring isn't namespaced
1051 SCMP_SYS(keyctl),
1052 SCMP_SYS(add_key),
1053 SCMP_SYS(request_key),
1054
1055 // Disable userfaultfd
1056 SCMP_SYS(userfaultfd),
1057
1058 // Disable perf which could leak a lot of information about the host
1059 SCMP_SYS(perf_event_open),
1060
1061 0,
1062 };
1063 int r = 1;
1064
1065 DEBUG(jail->pakfire, "Applying syscall filter...\n");
1066
1067 // Setup a syscall filter which allows everything by default
1068 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
1069 if (!ctx) {
1070 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
1071 goto ERROR;
1072 }
1073
1074 // All all syscalls
1075 for (const int* syscall = syscalls; *syscall; syscall++) {
1076 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
1077 if (r) {
1078 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
1079 goto ERROR;
1080 }
1081 }
1082
1083 // Load syscall filter into the kernel
1084 r = seccomp_load(ctx);
1085 if (r) {
1086 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
1087 goto ERROR;
1088 }
1089
1090ERROR:
1091 if (ctx)
1092 seccomp_release(ctx);
1093
1094 return r;
1095}
1096
cc6e2264
MT
1097// Mountpoints
1098
061223f7 1099PAKFIRE_EXPORT int pakfire_jail_bind(struct pakfire_jail* jail,
cc6e2264
MT
1100 const char* source, const char* target, int flags) {
1101 struct pakfire_jail_mountpoint* mp = NULL;
1102 int r;
1103
1104 // Check if there is any space left
1105 if (jail->num_mountpoints >= MAX_MOUNTPOINTS) {
1106 errno = ENOSPC;
1107 return 1;
1108 }
1109
1110 // Check for valid inputs
1111 if (!source || !target) {
1112 errno = EINVAL;
1113 return 1;
1114 }
1115
1116 // Select the next free slot
1117 mp = &jail->mountpoints[jail->num_mountpoints];
1118
1119 // Copy source
1120 r = pakfire_string_set(mp->source, source);
a60955af 1121 if (r) {
cc6e2264 1122 ERROR(jail->pakfire, "Could not copy source: %m\n");
a60955af 1123 return r;
cc6e2264
MT
1124 }
1125
1126 // Copy target
1127 r = pakfire_string_set(mp->target, target);
a60955af 1128 if (r) {
cc6e2264 1129 ERROR(jail->pakfire, "Could not copy target: %m\n");
a60955af 1130 return r;
cc6e2264
MT
1131 }
1132
1133 // Copy flags
1134 mp->flags = flags;
1135
1136 // Increment counter
1137 jail->num_mountpoints++;
1138
1139 return 0;
1140}
1141
7bdf1d8e
MT
1142static int pakfire_jail_mount_networking(struct pakfire_jail* jail) {
1143 int r;
1144
1145 const char* paths[] = {
1146 "/etc/hosts",
1147 "/etc/resolv.conf",
1148 NULL,
1149 };
1150
1151 // Bind-mount all paths read-only
1152 for (const char** path = paths; *path; path++) {
1153 r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
1154 if (r)
1155 return r;
1156 }
1157
1158 return 0;
1159}
1160
cc6e2264
MT
1161/*
1162 Mounts everything that we require in the new namespace
1163*/
7bdf1d8e 1164static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
cc6e2264
MT
1165 struct pakfire_jail_mountpoint* mp = NULL;
1166 int r;
1167
1168 // Mount all default stuff
1169 r = pakfire_mount_all(jail->pakfire);
1170 if (r)
1171 return r;
1172
7bdf1d8e
MT
1173 // Mount networking stuff
1174 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1175 r = pakfire_jail_mount_networking(jail);
1176 if (r)
1177 return r;
1178 }
1179
cc6e2264
MT
1180 // Mount all custom stuff
1181 for (unsigned int i = 0; i < jail->num_mountpoints; i++) {
1182 // Fetch mountpoint
1183 mp = &jail->mountpoints[i];
1184
1185 // Mount it
1186 r = pakfire_bind(jail->pakfire, mp->source, mp->target, mp->flags);
1187 if (r)
1188 return r;
1189 }
1190
1191 // Log all mountpoints
1192 pakfire_mount_list(jail->pakfire);
1193
1194 return 0;
1195}
1196
679ee2fa
MT
1197// UID/GID Mapping
1198
679ee2fa
MT
1199static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
1200 char path[PATH_MAX];
1201 int r;
1202
4896e62c
MT
1203 // Skip mapping anything when running on /
1204 if (pakfire_on_root(jail->pakfire))
1205 return 0;
0f7f068b 1206
abe4ee37
MT
1207 // Make path
1208 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
1209 if (r)
1210 return r;
1211
1212 // Fetch UID
1213 const uid_t uid = pakfire_uid(jail->pakfire);
1214
4896e62c 1215 // Fetch SUBUID
a1ff2863 1216 const struct pakfire_subid* subuid = pakfire_subuid(jail->pakfire);
4896e62c
MT
1217 if (!subuid)
1218 return 1;
679ee2fa 1219
abe4ee37 1220 /* When running as root, we will map the entire range.
679ee2fa 1221
abe4ee37
MT
1222 When running as a non-privileged user, we will map the root user inside the jail
1223 to the user's UID outside of the jail, and we will map the rest starting from one.
1224 */
679ee2fa 1225
abe4ee37
MT
1226 // Running as root
1227 if (uid == 0) {
1228 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1229 "0 %lu %lu\n", subuid->id, subuid->length);
1230 } else {
1231 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
b64888fa 1232 "0 %lu 1\n1 %lu %lu\n", uid, subuid->id, subuid->length);
abe4ee37
MT
1233 }
1234
1235 if (r) {
1236 ERROR(jail->pakfire, "Could not map UIDs: %m\n");
1237 return r;
1238 }
1239
1240 return r;
679ee2fa
MT
1241}
1242
1243static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
1244 char path[PATH_MAX];
1245 int r;
1246
4896e62c
MT
1247 // Skip mapping anything when running on /
1248 if (pakfire_on_root(jail->pakfire))
1249 return 0;
0f7f068b 1250
abe4ee37
MT
1251 // Fetch GID
1252 const gid_t gid = pakfire_gid(jail->pakfire);
1253
4896e62c 1254 // Fetch SUBGID
a1ff2863 1255 const struct pakfire_subid* subgid = pakfire_subgid(jail->pakfire);
4896e62c
MT
1256 if (!subgid)
1257 return 1;
679ee2fa
MT
1258
1259 // Make path
1260 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
a60955af
MT
1261 if (r)
1262 return r;
679ee2fa 1263
abe4ee37
MT
1264 // Running as root
1265 if (gid == 0) {
1266 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1267 "0 %lu %lu\n", subgid->id, subgid->length);
1268 } else {
1269 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1270 "0 %lu 1\n%1 %lu %lu\n", gid, subgid->id, subgid->length);
1271 }
679ee2fa 1272
abe4ee37
MT
1273 if (r) {
1274 ERROR(jail->pakfire, "Could not map GIDs: %m\n");
1275 return r;
1276 }
1277
1278 return r;
679ee2fa
MT
1279}
1280
78d7488a
MT
1281static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
1282 char path[PATH_MAX];
1283 int r = 1;
1284
1285 // Make path
1286 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
a60955af
MT
1287 if (r)
1288 return r;
78d7488a
MT
1289
1290 // Open file for writing
1291 FILE* f = fopen(path, "w");
1292 if (!f) {
1293 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
1294 goto ERROR;
1295 }
1296
1297 // Write content
1298 int bytes_written = fprintf(f, "deny\n");
1299 if (bytes_written <= 0) {
1300 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
1301 goto ERROR;
1302 }
1303
1304 r = fclose(f);
1305 f = NULL;
1306 if (r) {
1307 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
1308 goto ERROR;
1309 }
1310
1311ERROR:
1312 if (f)
1313 fclose(f);
1314
1315 return r;
1316}
1317
43dc0e16 1318static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
743f449e
MT
1319 const uint64_t val = 1;
1320 int r = 0;
43dc0e16
MT
1321
1322 DEBUG(jail->pakfire, "Sending signal...\n");
1323
743f449e
MT
1324 // Write to the file descriptor
1325 ssize_t bytes_written = write(fd, &val, sizeof(val));
1326 if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
1327 ERROR(jail->pakfire, "Could not send signal: %m\n");
1328 r = 1;
1329 }
1330
1331 // Close the file descriptor
43dc0e16
MT
1332 close(fd);
1333
743f449e 1334 return r;
43dc0e16
MT
1335}
1336
1337static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
743f449e
MT
1338 uint64_t val = 0;
1339 int r = 0;
43dc0e16
MT
1340
1341 DEBUG(jail->pakfire, "Waiting for signal...\n");
1342
743f449e
MT
1343 ssize_t bytes_read = read(fd, &val, sizeof(val));
1344 if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
1345 ERROR(jail->pakfire, "Error waiting for signal: %m\n");
1346 r = 1;
1347 }
1348
1349 // Close the file descriptor
43dc0e16
MT
1350 close(fd);
1351
743f449e 1352 return r;
43dc0e16
MT
1353}
1354
679ee2fa
MT
1355/*
1356 Performs the initialisation that needs to happen in the parent part
1357*/
f7d240a7 1358static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
679ee2fa
MT
1359 int r;
1360
abe4ee37
MT
1361 // Setup UID mapping
1362 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
679ee2fa
MT
1363 if (r)
1364 return r;
1365
abe4ee37
MT
1366 // Write "deny" to /proc/PID/setgroups
1367 r = pakfire_jail_setgroups(jail, ctx->pid);
78d7488a
MT
1368 if (r)
1369 return r;
1370
679ee2fa 1371 // Setup GID mapping
616f1fca 1372 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
679ee2fa
MT
1373 if (r)
1374 return r;
1375
43dc0e16
MT
1376 // Parent has finished initialisation
1377 DEBUG(jail->pakfire, "Parent has finished initialization\n");
1378
1379 // Send signal to client
f7d240a7 1380 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
43dc0e16
MT
1381 if (r)
1382 return r;
1383
679ee2fa
MT
1384 return 0;
1385}
1386
616f1fca 1387static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
f7d240a7 1388 const char* argv[]) {
43dc0e16
MT
1389 int r;
1390
e33387d3
MT
1391 // Redirect any logging to our log pipe
1392 pakfire_set_log_callback(jail->pakfire, pakfire_jail_log, &ctx->pipes);
0bd84dc1 1393
2a7b5e00
MT
1394 // Die with parent
1395 r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
1396 if (r) {
1397 ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
1398 return 126;
1399 }
1400
cf440db8
MT
1401 // Fetch my own PID
1402 pid_t pid = getpid();
1403
1404 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
0bd84dc1 1405
43dc0e16 1406 // Wait for the parent to finish initialization
f7d240a7 1407 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
43dc0e16
MT
1408 if (r)
1409 return r;
1410
4f59c39b
MT
1411 // Perform further initialization
1412
1413 // Fetch UID/GID
1414 uid_t uid = getuid();
1415 gid_t gid = getgid();
1416
1417 // Fetch EUID/EGID
1418 uid_t euid = geteuid();
1419 gid_t egid = getegid();
1420
1421 DEBUG(jail->pakfire, " UID: %d (effective %d)\n", uid, euid);
1422 DEBUG(jail->pakfire, " GID: %d (effective %d)\n", gid, egid);
1423
1424 // Check if we are (effectively running as root)
4f719e21 1425 if (uid || gid || euid || egid) {
4f59c39b
MT
1426 ERROR(jail->pakfire, "Child process is not running as root\n");
1427 return 126;
1428 }
1429
1430 const char* root = pakfire_get_path(jail->pakfire);
1431 const char* arch = pakfire_get_arch(jail->pakfire);
1432
1433 // Change root (unless root is /)
1434 if (!pakfire_on_root(jail->pakfire)) {
1435 // Mount everything
7bdf1d8e 1436 r = pakfire_jail_mount(jail, ctx);
4f59c39b
MT
1437 if (r)
1438 return r;
1439
4f59c39b
MT
1440 // Call chroot()
1441 r = chroot(root);
1442 if (r) {
1443 ERROR(jail->pakfire, "chroot() to %s failed: %m\n", root);
1444 return 1;
1445 }
1446
1447 // Change directory to /
1448 r = chdir("/");
1449 if (r) {
1450 ERROR(jail->pakfire, "chdir() after chroot() failed: %m\n");
1451 return 1;
1452 }
1453 }
1454
90d92b5c
MT
1455 // Set personality
1456 unsigned long persona = pakfire_arch_personality(arch);
1457 if (persona) {
1458 r = personality(persona);
1459 if (r < 0) {
1460 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1461 return 1;
1462 }
1463 }
1464
cf440db8
MT
1465 // Set nice level
1466 if (jail->nice) {
1467 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1468
1469 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1470 if (r) {
1471 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1472 return 1;
1473 }
1474 }
1475
e33387d3
MT
1476 // Close other end of log pipes
1477 close(ctx->pipes.log_INFO[0]);
1478 close(ctx->pipes.log_ERROR[0]);
1479#ifdef ENABLE_DEBUG
1480 close(ctx->pipes.log_DEBUG[0]);
1481#endif /* ENABLE_DEBUG */
1482
2015cb92
MT
1483 // Connect standard input
1484 if (ctx->pipes.stdin[0]) {
1485 r = dup2(ctx->pipes.stdin[0], STDIN_FILENO);
1486 if (r < 0) {
1487 ERROR(jail->pakfire, "Could not connect fd %d to stdin: %m\n",
1488 ctx->pipes.stdin[0]);
1489
1490 return 1;
1491 }
1492 }
1493
7ebfb7cb
MT
1494 // Connect standard output and error
1495 if (ctx->pipes.stdout[1] && ctx->pipes.stderr[1]) {
1496 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1497 if (r < 0) {
1498 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1499 ctx->pipes.stdout[1]);
1500
1501 return 1;
1502 }
1503
1504 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1505 if (r < 0) {
1506 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1507 ctx->pipes.stderr[1]);
1508
1509 return 1;
1510 }
1511
195fe455 1512 // Close the pipe (as we have moved the original file descriptors)
2015cb92 1513 pakfire_jail_close_pipe(jail, ctx->pipes.stdin);
195fe455
MT
1514 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1515 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
7ebfb7cb
MT
1516 }
1517
007bc66c
MT
1518 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1519 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1520 if (r)
1521 return r;
1522
980b15af
MT
1523 // Drop capabilities
1524 r = pakfire_jail_drop_capabilities(jail);
1525 if (r)
1526 return r;
1527
739d5b57
MT
1528 // Filter syscalls
1529 r = pakfire_jail_limit_syscalls(jail);
1530 if (r)
1531 return r;
1532
2015cb92
MT
1533 DEBUG(jail->pakfire, "Child process initialization done\n");
1534 DEBUG(jail->pakfire, "Launching command:\n");
1535
1536 // Log argv
1537 for (unsigned int i = 0; argv[i]; i++)
1538 DEBUG(jail->pakfire, " argv[%d] = %s\n", i, argv[i]);
1539
b3498aeb
MT
1540 // exec() command
1541 r = execvpe(argv[0], (char**)argv, jail->env);
1542 if (r < 0)
1543 ERROR(jail->pakfire, "Could not execve(): %m\n");
1544
1545 // Translate errno into regular exit code
1546 switch (errno) {
1547 case ENOENT:
1548 r = 127;
1549 break;
1550
1551 default:
1552 r = 1;
1553 }
1554
1555 // We should not get here
1556 return r;
0bd84dc1
MT
1557}
1558
9f50bf71 1559// Run a command in the jail
db4f234f 1560static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
2015cb92
MT
1561 const int interactive,
1562 pakfire_jail_communicate_in communicate_in,
1563 pakfire_jail_communicate_out communicate_out,
1564 void* data) {
4f59c39b 1565 int exit = -1;
0bd84dc1
MT
1566 int r;
1567
b3498aeb
MT
1568 // Check if argv is valid
1569 if (!argv || !argv[0]) {
1570 errno = EINVAL;
1571 return -1;
1572 }
1573
2015cb92
MT
1574 // Send any output to the default logger if no callback is set
1575 if (!communicate_out)
1576 communicate_out = pakfire_jail_default_log_callback;
1577
616f1fca
MT
1578 // Initialize context for this call
1579 struct pakfire_jail_exec ctx = {
7bdf1d8e
MT
1580 .flags = 0,
1581
616f1fca 1582 .pipes = {
2015cb92
MT
1583 .stdin = { 0, 0 },
1584 .stdout = { 0, 0 },
1585 .stderr = { 0, 0 },
1586 },
1587
1588 .communicate = {
1589 .in = communicate_in,
1590 .out = communicate_out,
1591 .data = data,
616f1fca 1592 },
616f1fca
MT
1593 };
1594
0bd84dc1
MT
1595 DEBUG(jail->pakfire, "Executing jail...\n");
1596
7bdf1d8e
MT
1597 // Enable networking in interactive mode
1598 if (interactive)
1599 ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
1600
43dc0e16
MT
1601 /*
1602 Setup a file descriptor which can be used to notify the client that the parent
1603 has completed configuration.
1604 */
f7d240a7
MT
1605 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1606 if (ctx.completed_fd < 0) {
43dc0e16
MT
1607 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1608 return -1;
1609 }
1610
616f1fca 1611 // Create pipes to communicate with child process if we are not running interactively
58963c75 1612 if (!interactive) {
2015cb92
MT
1613 // stdin (only if callback is set)
1614 if (ctx.communicate.in) {
1615 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdin, 0);
1616 if (r)
1617 goto ERROR;
1618 }
1619
616f1fca 1620 // stdout
e33387d3
MT
1621 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
1622 if (r)
616f1fca 1623 goto ERROR;
616f1fca
MT
1624
1625 // stderr
e33387d3
MT
1626 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
1627 if (r)
616f1fca 1628 goto ERROR;
616f1fca
MT
1629 }
1630
e33387d3
MT
1631 // Setup pipes for logging
1632 // INFO
1633 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
1634 if (r)
1635 goto ERROR;
1636
1637 // ERROR
1638 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
1639 if (r)
1640 goto ERROR;
1641
1642#ifdef ENABLE_DEBUG
1643 // DEBUG
1644 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
1645 if (r)
1646 goto ERROR;
1647#endif /* ENABLE_DEBUG */
1648
0bd84dc1
MT
1649 // Configure child process
1650 struct clone_args args = {
1651 .flags =
1652 CLONE_NEWCGROUP |
1653 CLONE_NEWIPC |
1654 CLONE_NEWNS |
1655 CLONE_NEWPID |
1656 CLONE_NEWUSER |
d853213d 1657 CLONE_NEWUTS |
02fd4f8b 1658 CLONE_PIDFD,
0bd84dc1 1659 .exit_signal = SIGCHLD,
d853213d 1660 .pidfd = (long long unsigned int)&ctx.pidfd,
0bd84dc1
MT
1661 };
1662
aca565fc 1663 // Launch the process in a cgroup that is a leaf of the configured cgroup
02fd4f8b
MT
1664 if (jail->cgroup) {
1665 args.flags |= CLONE_INTO_CGROUP;
1666
ae5201c5
MT
1667 // Fetch our UUID
1668 const char* uuid = pakfire_jail_uuid(jail);
aca565fc
MT
1669
1670 // Create a temporary cgroup
ae5201c5 1671 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, uuid, 0);
aca565fc
MT
1672 if (r) {
1673 ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
1674 goto ERROR;
1675 }
1676
02fd4f8b 1677 // Clone into this cgroup
aca565fc 1678 args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
02fd4f8b
MT
1679 }
1680
7bdf1d8e
MT
1681 // Setup networking
1682 if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1683 args.flags |= CLONE_NEWNET;
1684 }
1685
0bd84dc1 1686 // Fork this process
616f1fca
MT
1687 ctx.pid = clone3(&args, sizeof(args));
1688 if (ctx.pid < 0) {
0bd84dc1
MT
1689 ERROR(jail->pakfire, "Could not clone: %m\n");
1690 return -1;
1691
1692 // Child process
616f1fca 1693 } else if (ctx.pid == 0) {
f7d240a7 1694 r = pakfire_jail_child(jail, &ctx, argv);
0bd84dc1
MT
1695 _exit(r);
1696 }
1697
679ee2fa 1698 // Parent process
f7d240a7 1699 r = pakfire_jail_parent(jail, &ctx);
679ee2fa
MT
1700 if (r)
1701 goto ERROR;
1702
616f1fca 1703 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
0bd84dc1 1704
616f1fca 1705 // Read output of the child process
d853213d
MT
1706 r = pakfire_jail_wait(jail, &ctx);
1707 if (r)
1708 goto ERROR;
0bd84dc1 1709
d853213d
MT
1710 // Handle exit status
1711 switch (ctx.status.si_code) {
1712 case CLD_EXITED:
1713 DEBUG(jail->pakfire, "The child process exited with code %d\n",
1714 ctx.status.si_status);
616f1fca 1715
d853213d
MT
1716 // Pass exit code
1717 exit = ctx.status.si_status;
1718 break;
0bd84dc1 1719
d853213d 1720 case CLD_KILLED:
d853213d 1721 ERROR(jail->pakfire, "The child process was killed\n");
54f64dc5
MT
1722 exit = 139;
1723 break;
1724
1725 case CLD_DUMPED:
1726 ERROR(jail->pakfire, "The child process terminated abnormally\n");
d853213d 1727 break;
0bd84dc1 1728
d853213d
MT
1729 // Log anything else
1730 default:
1731 ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
1732 break;
0bd84dc1
MT
1733 }
1734
679ee2fa 1735ERROR:
aca565fc
MT
1736 // Destroy the temporary cgroup (if any)
1737 if (ctx.cgroup) {
6b7cf275
MT
1738 // Read cgroup stats
1739 r = pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
1740 if (r) {
1741 ERROR(jail->pakfire, "Could not read cgroup stats: %m\n");
1742 } else {
1743 pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
1744 }
1745
aca565fc
MT
1746 pakfire_cgroup_destroy(ctx.cgroup);
1747 pakfire_cgroup_unref(ctx.cgroup);
1748 }
1749
616f1fca 1750 // Close any file descriptors
2015cb92 1751 pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
e33387d3
MT
1752 pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
1753 pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
d853213d
MT
1754 if (ctx.pidfd)
1755 close(ctx.pidfd);
e33387d3
MT
1756 pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
1757 pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
1758 pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
616f1fca 1759
4f59c39b 1760 return exit;
9f50bf71 1761}
a45ed6b0 1762
ccdd2e95 1763PAKFIRE_EXPORT int pakfire_jail_exec(
2015cb92
MT
1764 struct pakfire_jail* jail,
1765 const char* argv[],
1766 pakfire_jail_communicate_in callback_in,
1767 pakfire_jail_communicate_out callback_out,
1768 void* data) {
1769 return __pakfire_jail_exec(jail, argv, 0, callback_in, callback_out, data);
1770}
1771
db4f234f
MT
1772static int pakfire_jail_exec_interactive(
1773 struct pakfire_jail* jail, const char* argv[]) {
1774 int r;
1775
1776 // Setup interactive stuff
1777 r = pakfire_jail_setup_interactive_env(jail);
1778 if (r)
1779 return r;
1780
2015cb92 1781 return __pakfire_jail_exec(jail, argv, 1, NULL, NULL, NULL);
db4f234f
MT
1782}
1783
ccdd2e95
MT
1784int pakfire_jail_exec_script(struct pakfire_jail* jail,
1785 const char* script,
1786 const size_t size,
1787 const char* args[],
1788 pakfire_jail_communicate_in callback_in,
1789 pakfire_jail_communicate_out callback_out,
1790 void* data) {
a45ed6b0
MT
1791 char path[PATH_MAX];
1792 const char** argv = NULL;
35291cb7 1793 FILE* f = NULL;
a45ed6b0
MT
1794 int r;
1795
1796 const char* root = pakfire_get_path(jail->pakfire);
1797
1798 // Write the scriptlet to disk
35291cb7 1799 r = pakfire_path_join(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
56796f84 1800 if (r)
a45ed6b0
MT
1801 goto ERROR;
1802
35291cb7
MT
1803 // Create a temporary file
1804 f = pakfire_mktemp(path, 0700);
1805 if (!f) {
1806 ERROR(jail->pakfire, "Could not create temporary file: %m\n");
a45ed6b0
MT
1807 goto ERROR;
1808 }
1809
1810 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
1811
1812 // Write data
35291cb7
MT
1813 r = fprintf(f, "%s", script);
1814 if (r < 0) {
a45ed6b0 1815 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
a45ed6b0
MT
1816 goto ERROR;
1817 }
1818
1819 // Close file
35291cb7 1820 r = fclose(f);
a45ed6b0
MT
1821 if (r) {
1822 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
a45ed6b0
MT
1823 goto ERROR;
1824 }
1825
35291cb7
MT
1826 f = NULL;
1827
a45ed6b0
MT
1828 // Count how many arguments were passed
1829 unsigned int argc = 1;
1830 if (args) {
1831 for (const char** arg = args; *arg; arg++)
1832 argc++;
1833 }
1834
1835 argv = calloc(argc + 1, sizeof(*argv));
1836 if (!argv) {
1837 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
1838 goto ERROR;
1839 }
1840
1841 // Set command
1842 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
1843
1844 // Copy args
1845 for (unsigned int i = 1; i < argc; i++)
1846 argv[i] = args[i-1];
1847
1848 // Run the script
ccdd2e95 1849 r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data);
a45ed6b0
MT
1850
1851ERROR:
1852 if (argv)
1853 free(argv);
35291cb7
MT
1854 if (f)
1855 fclose(f);
a45ed6b0
MT
1856
1857 // Remove script from disk
1858 if (*path)
1859 unlink(path);
1860
1861 return r;
1862}
82df3c77
MT
1863
1864/*
1865 A convenience function that creates a new jail, runs the given command and destroys
1866 the jail again.
1867*/
12b9b39f 1868int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
82df3c77
MT
1869 struct pakfire_jail* jail = NULL;
1870 int r;
1871
1872 // Create a new jail
1873 r = pakfire_jail_create(&jail, pakfire, flags);
1874 if (r)
1875 goto ERROR;
1876
1877 // Execute the command
ccdd2e95 1878 r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output);
82df3c77
MT
1879
1880ERROR:
1881 if (jail)
1882 pakfire_jail_unref(jail);
1883
1884 return r;
1885}
4f688bd8
MT
1886
1887int pakfire_jail_run_script(struct pakfire* pakfire,
49a9babc 1888 const char* script, const size_t length, const char* argv[], int flags) {
4f688bd8
MT
1889 struct pakfire_jail* jail = NULL;
1890 int r;
1891
1892 // Create a new jail
1893 r = pakfire_jail_create(&jail, pakfire, flags);
1894 if (r)
1895 goto ERROR;
1896
1897 // Execute the command
49a9babc 1898 r = pakfire_jail_exec_script(jail, script, length, argv, NULL, NULL, NULL);
4f688bd8
MT
1899
1900ERROR:
1901 if (jail)
1902 pakfire_jail_unref(jail);
1903
1904 return r;
1905}
e43489f7 1906
5f6e42a2 1907int pakfire_jail_shell(struct pakfire_jail* jail) {
e43489f7
MT
1908 const char* argv[] = {
1909 "/bin/bash", "--login", NULL,
1910 };
1911
1912 // Execute /bin/bash
db4f234f 1913 return pakfire_jail_exec_interactive(jail, argv);
e43489f7
MT
1914}
1915
1916int pakfire_jail_ldconfig(struct pakfire* pakfire) {
1917 char path[PATH_MAX];
1918
1919 const char* ldconfig = "/sbin/ldconfig";
1920
1921 // Check if ldconfig exists before calling it to avoid overhead
77e26129
MT
1922 int r = pakfire_path(pakfire, path, "%s", ldconfig);
1923 if (r)
1924 return r;
e43489f7
MT
1925
1926 // Check if ldconfig is executable
1927 r = access(path, X_OK);
1928 if (r) {
1929 DEBUG(pakfire, "%s is not executable. Skipping...\n", ldconfig);
1930 return 0;
1931 }
1932
1933 const char* argv[] = {
1934 ldconfig, NULL,
1935 };
1936
1937 // Run ldconfig
1938 return pakfire_jail_run(pakfire, argv, 0, NULL);
1939}