]> git.ipfire.org Git - people/ms/pakfire.git/blame - src/libpakfire/jail.c
jail: Actually exit if something went wrong during initialization
[people/ms/pakfire.git] / src / libpakfire / jail.c
CommitLineData
fd37ccaf
MT
1/*#############################################################################
2# #
3# Pakfire - The IPFire package management system #
4# Copyright (C) 2022 Pakfire development team #
5# #
6# This program is free software: you can redistribute it and/or modify #
7# it under the terms of the GNU General Public License as published by #
8# the Free Software Foundation, either version 3 of the License, or #
9# (at your option) any later version. #
10# #
11# This program is distributed in the hope that it will be useful, #
12# but WITHOUT ANY WARRANTY; without even the implied warranty of #
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14# GNU General Public License for more details. #
15# #
16# You should have received a copy of the GNU General Public License #
17# along with this program. If not, see <http://www.gnu.org/licenses/>. #
18# #
19#############################################################################*/
20
32d5f21d 21#include <errno.h>
bcf09bf5 22#include <fcntl.h>
980b15af 23#include <linux/capability.h>
0bd84dc1 24#include <linux/sched.h>
58ee649f 25#include <sys/wait.h>
4f23b498 26#include <linux/wait.h>
0bd84dc1
MT
27#include <sched.h>
28#include <signal.h>
32d5f21d 29#include <stdlib.h>
0bd84dc1 30#include <syscall.h>
980b15af 31#include <sys/capability.h>
616f1fca 32#include <sys/epoll.h>
43dc0e16 33#include <sys/eventfd.h>
7bdf1d8e 34#include <sys/mount.h>
90d92b5c 35#include <sys/personality.h>
980b15af 36#include <sys/prctl.h>
cf440db8 37#include <sys/resource.h>
335b8a44 38#include <sys/timerfd.h>
0bd84dc1
MT
39#include <sys/types.h>
40#include <sys/wait.h>
32d5f21d 41
fec79a33
MT
42// libnl3
43#include <net/if.h>
44#include <netlink/route/link.h>
45
739d5b57
MT
46// libseccomp
47#include <seccomp.h>
48
ae5201c5
MT
49// libuuid
50#include <uuid.h>
51
90d92b5c 52#include <pakfire/arch.h>
e3ddb498 53#include <pakfire/cgroup.h>
fd37ccaf 54#include <pakfire/jail.h>
4f59c39b
MT
55#include <pakfire/logging.h>
56#include <pakfire/mount.h>
9b171c6a 57#include <pakfire/os.h>
fd37ccaf 58#include <pakfire/pakfire.h>
729827f7 59#include <pakfire/path.h>
6ce56f90 60#include <pakfire/private.h>
4896e62c 61#include <pakfire/pwd.h>
d973a13d 62#include <pakfire/string.h>
32d5f21d
MT
63#include <pakfire/util.h>
64
616f1fca
MT
65#define BUFFER_SIZE 1024 * 64
66#define ENVIRON_SIZE 128
67#define EPOLL_MAX_EVENTS 2
cc6e2264 68#define MAX_MOUNTPOINTS 8
fd37ccaf 69
d5bc8fe0
MT
70// The default environment that will be set for every command
71static const struct environ {
72 const char* key;
73 const char* val;
74} ENV[] = {
fbe95cdf 75 { "HOME", "/root" },
55f54757 76 { "LANG", "C.utf-8" },
a7ad6d4f 77 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
d5bc8fe0 78 { "TERM", "vt100" },
3bf01105
MT
79
80 // Tell everything that it is running inside a Pakfire container
81 { "container", "pakfire" },
d5bc8fe0
MT
82 { NULL, NULL },
83};
84
cc6e2264
MT
85struct pakfire_jail_mountpoint {
86 char source[PATH_MAX];
87 char target[PATH_MAX];
88 int flags;
89};
90
fd37ccaf 91struct pakfire_jail {
a13df023 92 struct pakfire_ctx* ctx;
fd37ccaf
MT
93 struct pakfire* pakfire;
94 int nrefs;
32d5f21d 95
ae5201c5
MT
96 // A unique ID for each jail
97 uuid_t uuid;
98 char __uuid[UUID_STR_LEN];
99
cf440db8
MT
100 // Resource Limits
101 int nice;
102
335b8a44
MT
103 // Timeout
104 struct itimerspec timeout;
105
15503538
MT
106 // CGroup
107 struct pakfire_cgroup* cgroup;
108
32d5f21d
MT
109 // Environment
110 char* env[ENVIRON_SIZE];
616f1fca 111
cc6e2264
MT
112 // Mountpoints
113 struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
114 unsigned int num_mountpoints;
2dc104da
MT
115
116 // Callbacks
117 struct pakfire_jail_callbacks {
118 // Log
119 pakfire_jail_log_callback log;
120 void* log_data;
121 } callbacks;
616f1fca
MT
122};
123
124struct pakfire_log_buffer {
125 char data[BUFFER_SIZE];
126 size_t used;
127};
128
129struct pakfire_jail_exec {
7bdf1d8e
MT
130 int flags;
131
9b171c6a
MT
132 // PIDs (of the children)
133 int pidfd1;
134 int pidfd2;
616f1fca 135
9b171c6a
MT
136 // Socket to pass FDs
137 int socket[2];
616f1fca 138
f7d240a7
MT
139 // FD to notify the client that the parent has finished initialization
140 int completed_fd;
141
616f1fca 142 // Log pipes
e33387d3 143 struct pakfire_jail_pipes {
2015cb92 144 int stdin[2];
616f1fca
MT
145 int stdout[2];
146 int stderr[2];
e33387d3
MT
147
148 // Logging
149 int log_INFO[2];
150 int log_ERROR[2];
6ac51607 151#ifdef ENABLE_DEBUG
e33387d3 152 int log_DEBUG[2];
6ac51607 153#endif /* ENABLE_DEBUG */
616f1fca
MT
154 } pipes;
155
2015cb92
MT
156 // Communicate
157 struct pakfire_jail_communicate {
158 pakfire_jail_communicate_in in;
159 pakfire_jail_communicate_out out;
160 void* data;
161 } communicate;
162
616f1fca 163 // Log buffers
e33387d3 164 struct pakfire_jail_buffers {
616f1fca
MT
165 struct pakfire_log_buffer stdout;
166 struct pakfire_log_buffer stderr;
e33387d3
MT
167
168 // Logging
169 struct pakfire_log_buffer log_INFO;
170 struct pakfire_log_buffer log_ERROR;
6ac51607 171#ifdef ENABLE_DEBUG
e33387d3 172 struct pakfire_log_buffer log_DEBUG;
6ac51607 173#endif /* ENABLE_DEBUG */
616f1fca 174 } buffers;
aca565fc
MT
175
176 struct pakfire_cgroup* cgroup;
6b7cf275 177 struct pakfire_cgroup_stats cgroup_stats;
fd37ccaf
MT
178};
179
0bd84dc1
MT
180static int clone3(struct clone_args* args, size_t size) {
181 return syscall(__NR_clone3, args, size);
182}
183
335b8a44
MT
184static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
185 return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
186}
187
14df7388
MT
188static int pivot_root(const char* new_root, const char* old_root) {
189 return syscall(SYS_pivot_root, new_root, old_root);
190}
191
7bdf1d8e
MT
192static int pakfire_jail_exec_has_flag(
193 const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
194 return ctx->flags & flag;
195}
196
d5bc8fe0
MT
197static void pakfire_jail_free(struct pakfire_jail* jail) {
198 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
199
200 // Free environment
201 for (unsigned int i = 0; jail->env[i]; i++)
202 free(jail->env[i]);
203
d34b1e00
MT
204 if (jail->cgroup)
205 pakfire_cgroup_unref(jail->cgroup);
a13df023
MT
206 if (jail->pakfire)
207 pakfire_unref(jail->pakfire);
208 if (jail->ctx)
209 pakfire_ctx_unref(jail->ctx);
d5bc8fe0
MT
210 free(jail);
211}
212
e33387d3
MT
213/*
214 Passes any log messages on to the default pakfire log callback
215*/
fed41508
MT
216static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
217 int priority, const char* line, size_t length) {
218 switch (priority) {
219 case LOG_INFO:
220 INFO(pakfire, "%s", line);
221 break;
222
223 case LOG_ERR:
224 ERROR(pakfire, "%s", line);
225 break;
e33387d3
MT
226
227#ifdef ENABLE_DEBUG
228 case LOG_DEBUG:
229 DEBUG(pakfire, "%s", line);
230 break;
231#endif
fed41508
MT
232 }
233
234 return 0;
235}
236
3bf01105
MT
237static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
238 if (!*jail->__uuid)
239 uuid_unparse_lower(jail->uuid, jail->__uuid);
240
241 return jail->__uuid;
242}
243
00ba1d9a
MT
244static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
245 // Set PS1
246 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
247 if (r)
248 return r;
249
250 // Copy TERM
251 char* TERM = secure_getenv("TERM");
252 if (TERM) {
253 r = pakfire_jail_set_env(jail, "TERM", TERM);
254 if (r)
255 return r;
256 }
257
258 // Copy LANG
259 char* LANG = secure_getenv("LANG");
260 if (LANG) {
261 r = pakfire_jail_set_env(jail, "LANG", LANG);
262 if (r)
263 return r;
264 }
265
266 return 0;
267}
268
9fa1afb6 269PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail, struct pakfire* pakfire) {
d5bc8fe0
MT
270 int r;
271
652f2a99 272 const char* arch = pakfire_get_effective_arch(pakfire);
aac86bd3 273
d5bc8fe0 274 // Allocate a new jail
fd37ccaf
MT
275 struct pakfire_jail* j = calloc(1, sizeof(*j));
276 if (!j)
277 return 1;
278
a13df023
MT
279 // Reference context
280 j->ctx = pakfire_ctx(pakfire);
281
fd37ccaf
MT
282 // Reference Pakfire
283 j->pakfire = pakfire_ref(pakfire);
284
285 // Initialize reference counter
286 j->nrefs = 1;
287
ae5201c5
MT
288 // Generate a random UUID
289 uuid_generate_random(j->uuid);
290
84bd7655
MT
291 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
292
2dc104da
MT
293 // Set the default logging callback
294 pakfire_jail_set_log_callback(j, pakfire_jail_default_log_callback, NULL);
295
d5bc8fe0
MT
296 // Set default environment
297 for (const struct environ* e = ENV; e->key; e++) {
298 r = pakfire_jail_set_env(j, e->key, e->val);
299 if (r)
300 goto ERROR;
301 }
302
aac86bd3 303 // Enable all CPU features that CPU has to offer
1f4e66a4 304 if (!pakfire_arch_is_supported_by_host(arch)) {
aac86bd3
MT
305 r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
306 if (r)
307 goto ERROR;
308 }
309
3bf01105
MT
310 // Set container UUID
311 r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
312 if (r)
313 goto ERROR;
314
367e708d
MT
315 // Disable systemctl to talk to systemd
316 if (!pakfire_on_root(j->pakfire)) {
317 r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
318 if (r)
319 goto ERROR;
320 }
321
fd37ccaf
MT
322 // Done
323 *jail = j;
324 return 0;
84bd7655 325
d5bc8fe0
MT
326ERROR:
327 pakfire_jail_free(j);
32d5f21d 328
d5bc8fe0 329 return r;
fd37ccaf
MT
330}
331
6ce56f90 332PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
fd37ccaf
MT
333 ++jail->nrefs;
334
335 return jail;
336}
337
6ce56f90 338PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
fd37ccaf
MT
339 if (--jail->nrefs > 0)
340 return jail;
341
342 pakfire_jail_free(jail);
343 return NULL;
344}
32d5f21d 345
2dc104da
MT
346// Logging Callback
347
348PAKFIRE_EXPORT void pakfire_jail_set_log_callback(struct pakfire_jail* jail,
349 pakfire_jail_log_callback callback, void* data) {
350 jail->callbacks.log = callback;
351 jail->callbacks.log_data = data;
352}
353
cf440db8
MT
354// Resource Limits
355
356PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
357 // Check if nice level is in range
358 if (nice < -19 || nice > 20) {
359 errno = EINVAL;
360 return 1;
361 }
362
363 // Store nice level
364 jail->nice = nice;
365
366 return 0;
367}
368
15503538
MT
369int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
370 // Free any previous cgroup
371 if (jail->cgroup) {
372 pakfire_cgroup_unref(jail->cgroup);
373 jail->cgroup = NULL;
374 }
375
376 // Set any new cgroup
377 if (cgroup) {
378 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
379
380 jail->cgroup = pakfire_cgroup_ref(cgroup);
381 }
382
383 // Done
384 return 0;
385}
386
32d5f21d
MT
387// Environment
388
389// Returns the length of the environment
390static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
391 unsigned int i = 0;
392
393 // Count everything in the environment
394 for (char** e = jail->env; *e; e++)
395 i++;
396
397 return i;
398}
399
400// Finds an existing environment variable and returns its index or -1 if not found
401static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
402 if (!key) {
403 errno = EINVAL;
404 return -1;
405 }
406
b88ae806 407 const size_t length = strlen(key);
32d5f21d
MT
408
409 for (unsigned int i = 0; jail->env[i]; i++) {
b88ae806
MT
410 if ((pakfire_string_startswith(jail->env[i], key)
411 && *(jail->env[i] + length) == '=')) {
32d5f21d 412 return i;
b88ae806 413 }
32d5f21d
MT
414 }
415
416 // Nothing found
417 return -1;
418}
419
420// Returns the value of an environment variable or NULL
6ce56f90
MT
421PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
422 const char* key) {
32d5f21d
MT
423 int i = pakfire_jail_find_env(jail, key);
424 if (i < 0)
425 return NULL;
426
427 return jail->env[i] + strlen(key) + 1;
428}
429
430// Sets an environment variable
6ce56f90
MT
431PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
432 const char* key, const char* value) {
32d5f21d
MT
433 // Find the index where to write this value to
434 int i = pakfire_jail_find_env(jail, key);
435 if (i < 0)
436 i = pakfire_jail_env_length(jail);
437
438 // Return -ENOSPC when the environment is full
439 if (i >= ENVIRON_SIZE) {
440 errno = ENOSPC;
441 return -1;
442 }
443
444 // Free any previous value
445 if (jail->env[i])
446 free(jail->env[i]);
447
448 // Format and set environment variable
449 asprintf(&jail->env[i], "%s=%s", key, value);
450
451 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
452
453 return 0;
454}
9f50bf71 455
939025e7 456// Imports an environment
6ce56f90 457PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
939025e7
MT
458 if (!env)
459 return 0;
460
461 char* key;
462 char* val;
463 int r;
464
465 // Copy environment variables
466 for (unsigned int i = 0; env[i]; i++) {
467 r = pakfire_string_partition(env[i], "=", &key, &val);
468 if (r)
469 continue;
470
471 // Set value
472 r = pakfire_jail_set_env(jail, key, val);
473
474 if (key)
475 free(key);
476 if (val)
477 free(val);
478
479 // Break on error
480 if (r)
481 return r;
482 }
483
484 return 0;
485}
486
335b8a44
MT
487// Timeout
488
489PAKFIRE_EXPORT int pakfire_jail_set_timeout(
490 struct pakfire_jail* jail, unsigned int timeout) {
491 // Store value
492 jail->timeout.it_value.tv_sec = timeout;
493
494 if (timeout > 0)
a8a41064 495 DEBUG(jail->pakfire, "Timeout set to %u second(s)\n", timeout);
335b8a44
MT
496 else
497 DEBUG(jail->pakfire, "Timeout disabled\n");
498
499 return 0;
500}
501
502static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
503 int r;
504
505 // Nothing to do if no timeout has been set
506 if (!jail->timeout.it_value.tv_sec)
507 return -1;
508
509 // Create a new timer
510 const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
511 if (fd < 0) {
512 ERROR(jail->pakfire, "Could not create timer: %m\n");
513 goto ERROR;
514 }
515
516 // Arm timer
517 r = timerfd_settime(fd, 0, &jail->timeout, NULL);
518 if (r) {
519 ERROR(jail->pakfire, "Could not arm timer: %m\n");
520 goto ERROR;
521 }
522
523 return fd;
524
525ERROR:
dd2d7dbb 526 if (fd >= 0)
335b8a44
MT
527 close(fd);
528
529 return -1;
530}
531
e33387d3
MT
532/*
533 This function replaces any logging in the child process.
534
535 All log messages will be sent to the parent process through their respective pipes.
536*/
19e50d86 537static void pakfire_jail_log_redirect(void* data, int priority, const char* file,
e33387d3
MT
538 int line, const char* fn, const char* format, va_list args) {
539 struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
540 int fd;
541
542 switch (priority) {
543 case LOG_INFO:
544 fd = pipes->log_INFO[1];
545 break;
546
547 case LOG_ERR:
548 fd = pipes->log_ERROR[1];
549 break;
550
551#ifdef ENABLE_DEBUG
552 case LOG_DEBUG:
553 fd = pipes->log_DEBUG[1];
554 break;
555#endif /* ENABLE_DEBUG */
556
557 // Ignore any messages of an unknown priority
558 default:
559 return;
560 }
561
562 // Send the log message
4c3bab92 563 if (fd >= 0)
e33387d3
MT
564 vdprintf(fd, format, args);
565}
566
616f1fca
MT
567static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
568 return (sizeof(buffer->data) == buffer->used);
569}
570
571/*
572 This function reads as much data as it can from the file descriptor.
573 If it finds a whole line in it, it will send it to the logger and repeat the process.
574 If not newline character is found, it will try to read more data until it finds one.
575*/
576static int pakfire_jail_handle_log(struct pakfire_jail* jail,
e33387d3 577 struct pakfire_jail_exec* ctx, int priority, int fd,
2015cb92 578 struct pakfire_log_buffer* buffer, pakfire_jail_communicate_out callback, void* data) {
616f1fca
MT
579 char line[BUFFER_SIZE + 1];
580
581 // Fill up buffer from fd
582 if (buffer->used < sizeof(buffer->data)) {
583 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
584 sizeof(buffer->data) - buffer->used);
585
586 // Handle errors
587 if (bytes_read < 0) {
588 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
589 return -1;
590 }
591
592 // Update buffer size
593 buffer->used += bytes_read;
594 }
595
596 // See if we have any lines that we can write
597 while (buffer->used) {
598 // Search for the end of the first line
599 char* eol = memchr(buffer->data, '\n', buffer->used);
600
601 // No newline found
602 if (!eol) {
603 // If the buffer is full, we send the content to the logger and try again
604 // This should not happen in practise
605 if (pakfire_jail_log_buffer_is_full(buffer)) {
606 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
607
608 eol = buffer->data + sizeof(buffer->data) - 1;
609
610 // Otherwise we might have only read parts of the output
611 } else
612 break;
613 }
614
615 // Find the length of the string
616 size_t length = eol - buffer->data + 1;
617
618 // Copy the line into the buffer
619 memcpy(line, buffer->data, length);
620
621 // Terminate the string
622 line[length] = '\0';
623
624 // Log the line
e33387d3
MT
625 if (callback) {
626 int r = callback(jail->pakfire, data, priority, line, length);
616f1fca
MT
627 if (r) {
628 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
629 return r;
630 }
631 }
632
633 // Remove line from buffer
634 memmove(buffer->data, buffer->data + length, buffer->used - length);
635 buffer->used -= length;
636 }
637
638 return 0;
639}
640
06b864ae
MT
641static int pakfire_jail_stream_stdin(struct pakfire_jail* jail,
642 struct pakfire_jail_exec* ctx, const int fd) {
643 int r;
644
645 // Nothing to do if there is no stdin callback set
646 if (!ctx->communicate.in) {
647 DEBUG(jail->pakfire, "Callback for standard input is not set\n");
648 return 0;
649 }
650
f5a70a96
MT
651 // Skip if the writing pipe has already been closed
652 if (!ctx->pipes.stdin[1])
653 return 0;
654
06b864ae
MT
655 DEBUG(jail->pakfire, "Streaming standard input...\n");
656
657 // Calling the callback
658 r = ctx->communicate.in(jail->pakfire, ctx->communicate.data, fd);
659
660 DEBUG(jail->pakfire, "Standard input callback finished: %d\n", r);
661
662 // The callback signaled that it has written everything
663 if (r == EOF) {
664 DEBUG(jail->pakfire, "Closing standard input pipe\n");
665
f5a70a96 666 // Close the file-descriptor
06b864ae 667 close(fd);
f5a70a96
MT
668
669 // Reset the file-descriptor so it won't be closed again later
dd2d7dbb 670 ctx->pipes.stdin[1] = -1;
f5a70a96
MT
671
672 // Report success
06b864ae
MT
673 r = 0;
674 }
675
676 return r;
677}
678
195fe455
MT
679static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
680 int r = pipe2(*fds, flags);
681 if (r < 0) {
682 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
683 return 1;
684 }
685
686 return 0;
687}
688
689static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
690 for (unsigned int i = 0; i < 2; i++)
dd2d7dbb 691 if (fds[i] >= 0)
195fe455
MT
692 close(fds[i]);
693}
694
e33387d3
MT
695/*
696 This is a convenience function to fetch the reading end of a pipe and
697 closes the write end.
698*/
06b864ae 699static int pakfire_jail_get_pipe_to_read(struct pakfire_jail* jail, int (*fds)[2]) {
e33387d3
MT
700 // Give the variables easier names to avoid confusion
701 int* fd_read = &(*fds)[0];
702 int* fd_write = &(*fds)[1];
703
704 // Close the write end of the pipe
4c3bab92 705 if (*fd_write >= 0) {
e33387d3 706 close(*fd_write);
d2eaf8dc 707 *fd_write = -1;
e33387d3
MT
708 }
709
710 // Return the read end
4c3bab92
MT
711 if (*fd_read >= 0)
712 return *fd_read;
713
714 return -1;
e33387d3
MT
715}
716
06b864ae
MT
717static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
718 // Give the variables easier names to avoid confusion
719 int* fd_read = &(*fds)[0];
720 int* fd_write = &(*fds)[1];
721
722 // Close the read end of the pipe
4c3bab92 723 if (*fd_read >= 0) {
06b864ae 724 close(*fd_read);
d2eaf8dc 725 *fd_read = -1;
06b864ae
MT
726 }
727
728 // Return the write end
4c3bab92
MT
729 if (*fd_write >= 0)
730 return *fd_write;
731
732 return -1;
06b864ae
MT
733}
734
9b171c6a
MT
735static int pakfire_jail_recv_fd(struct pakfire_jail* jail, int socket, int* fd) {
736 const size_t payload_length = sizeof(fd);
737 char buffer[CMSG_SPACE(payload_length)];
738 int r;
739
740 struct msghdr msg = {
741 .msg_control = buffer,
742 .msg_controllen = sizeof(buffer),
743 };
744
745 // Receive the message
746 r = recvmsg(socket, &msg, 0);
747 if (r) {
748 CTX_ERROR(jail->ctx, "Could not receive file descriptor: %s\n", strerror(errno));
749 return -errno;
750 }
751
752 // Fetch the payload
753 struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
754 if (!cmsg)
755 return -EBADMSG;
756
757 *fd = *((int*)CMSG_DATA(cmsg));
758
759 CTX_DEBUG(jail->ctx, "Received fd %d from socket %d\n", *fd, socket);
760
761 return 0;
762}
763
764static int pakfire_jail_send_fd(struct pakfire_jail* jail, int socket, int fd) {
765 const size_t payload_length = sizeof(fd);
766 char buffer[CMSG_SPACE(payload_length)];
767 int r;
768
769 CTX_DEBUG(jail->ctx, "Sending fd %d to socket %d\n", fd, socket);
770
771 // Header
772 struct msghdr msg = {
773 .msg_control = buffer,
774 .msg_controllen = sizeof(buffer),
775 };
776
777 // Payload
778 struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
779 cmsg->cmsg_level = SOL_SOCKET;
780 cmsg->cmsg_type = SCM_RIGHTS;
781 cmsg->cmsg_len = CMSG_LEN(payload_length);
782
783 // Set payload
784 *((int*)CMSG_DATA(cmsg)) = fd;
785
786 // Send the message
787 r = sendmsg(socket, &msg, 0);
788 if (r) {
789 CTX_ERROR(jail->ctx, "Could not send file descriptor: %s\n", strerror(errno));
790 return -errno;
791 }
792
793 return 0;
794}
795
19e50d86
MT
796static int pakfire_jail_log(struct pakfire* pakfire, void* data, int priority,
797 const char* line, const size_t length) {
798 // Pass everything to the parent logger
799 pakfire_log_condition(pakfire, priority, 0, "%.*s", (int)length, line);
800
801 return 0;
802}
803
9b171c6a
MT
804static int pakfire_jail_epoll_add_fd(struct pakfire_jail* jail, int epollfd, int fd, int events) {
805 struct epoll_event event = {
806 .events = events|EPOLLHUP,
807 .data = {
808 .fd = fd,
809 },
810 };
811 int r;
812
813 // Read flags
814 int flags = fcntl(fd, F_GETFL, 0);
815
816 // Set modified flags
817 r = fcntl(fd, F_SETFL, flags|O_NONBLOCK);
818 if (r < 0) {
819 CTX_ERROR(jail->ctx, "Could not set file descriptor %d into non-blocking mode: %s\n",
820 fd, strerror(errno));
821 return -errno;
822 }
823
824 // Add the file descriptor to the loop
825 r = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
826 if (r < 0) {
827 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %s\n",
828 fd, strerror(errno));
829 return -errno;
830 }
831
832 return 0;
833}
834
835static int pakfire_jail_setup_child2(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx);
836
837static int pakfire_jail_wait_on_child(struct pakfire_jail* jail, int pidfd) {
838 siginfo_t status = {};
839 int r;
840
841 // Call waitid() and store the result
842 r = waitid(P_PIDFD, pidfd, &status, WEXITED);
843 if (r) {
844 CTX_ERROR(jail->ctx, "waitid() failed: %s\n", strerror(errno));
845 return -errno;
846 }
847
848 switch (status.si_code) {
849 // If the process exited normally, we return the exit code
850 case CLD_EXITED:
851 CTX_DEBUG(jail->ctx, "The child process exited with code %d\n", status.si_status);
852 return status.si_status;
853
854 case CLD_KILLED:
855 CTX_ERROR(jail->ctx, "The child process was killed\n");
856 return 139;
857
858 case CLD_DUMPED:
859 CTX_ERROR(jail->ctx, "The child process terminated abnormally\n");
860 return 139;
861
862 // Log anything else
863 default:
864 CTX_ERROR(jail->ctx, "Unknown child exit code: %d\n", status.si_code);
865 break;
866 }
867
868 return -EBADMSG;
869}
870
d853213d 871static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
616f1fca 872 int epollfd = -1;
616f1fca 873 struct epoll_event events[EPOLL_MAX_EVENTS];
335b8a44 874 char garbage[8];
616f1fca
MT
875 int r = 0;
876
9b171c6a
MT
877 // Fetch the UNIX domain socket
878 const int socket_recv = pakfire_jail_get_pipe_to_read(jail, &ctx->socket);
879
616f1fca 880 // Fetch file descriptors from context
06b864ae
MT
881 const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
882 const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
883 const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
616f1fca 884
335b8a44
MT
885 // Timer
886 const int timerfd = pakfire_jail_create_timer(jail);
887
e33387d3 888 // Logging
06b864ae
MT
889 const int log_INFO = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
890 const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
6ac51607 891#ifdef ENABLE_DEBUG
06b864ae 892 const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
6ac51607 893#endif /* ENABLE_DEBUG */
7ebfb7cb
MT
894
895 // Make a list of all file descriptors we are interested in
9b171c6a
MT
896 const struct pakfire_wait_fds {
897 const int fd;
898 const int events;
899 } fds[] = {
900 { socket_recv, EPOLLIN },
901
902 // Standard input/output
903 { stdin, EPOLLOUT },
904 { stdout, EPOLLIN },
905 { stderr, EPOLLIN },
906
907 // Timer
908 { timerfd, EPOLLIN },
909
910 // Child Processes
911 { ctx->pidfd1, EPOLLIN },
912
9b171c6a
MT
913 // Log Pipes
914 { log_INFO, EPOLLIN },
915 { log_ERROR, EPOLLIN },
6ac51607 916#ifdef ENABLE_DEBUG
9b171c6a 917 { log_DEBUG, EPOLLIN },
6ac51607 918#endif /* ENABLE_DEBUG */
9b171c6a
MT
919
920 // Sentinel
921 { -1, 0 },
616f1fca
MT
922 };
923
924 // Setup epoll
925 epollfd = epoll_create1(0);
926 if (epollfd < 0) {
927 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
928 r = 1;
d853213d 929 goto ERROR;
616f1fca
MT
930 }
931
616f1fca 932 // Turn file descriptors into non-blocking mode and add them to epoll()
9b171c6a 933 for (const struct pakfire_wait_fds* fd = fds; fd->events; fd++) {
d853213d 934 // Skip fds which were not initialized
9b171c6a 935 if (fd->fd < 0)
d853213d
MT
936 continue;
937
9b171c6a
MT
938 // Add the FD to the event loop
939 r = pakfire_jail_epoll_add_fd(jail, epollfd, fd->fd, fd->events);
940 if (r)
d853213d 941 goto ERROR;
616f1fca
MT
942 }
943
944 int ended = 0;
9b171c6a
MT
945 int exit = 0;
946
947 CTX_DEBUG(jail->ctx, "Launching main loop...\n");
616f1fca
MT
948
949 // Loop for as long as the process is alive
950 while (!ended) {
616f1fca
MT
951 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
952 if (num < 1) {
953 // Ignore if epoll_wait() has been interrupted
954 if (errno == EINTR)
955 continue;
956
957 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
958 r = 1;
959
d853213d 960 goto ERROR;
616f1fca
MT
961 }
962
616f1fca 963 for (int i = 0; i < num; i++) {
e068b964 964 int e = events[i].events;
616f1fca
MT
965 int fd = events[i].data.fd;
966
e33387d3 967 struct pakfire_log_buffer* buffer = NULL;
2015cb92 968 pakfire_jail_communicate_out callback = NULL;
e33387d3
MT
969 void* data = NULL;
970 int priority;
971
e068b964
MT
972 // Check if there is any data to be read
973 if (e & EPOLLIN) {
9b171c6a
MT
974 // Monitor the first child process
975 if (fd == ctx->pidfd1) {
976 r = pakfire_jail_wait_on_child(jail, ctx->pidfd1);
e068b964 977 if (r) {
9b171c6a
MT
978 CTX_ERROR(jail->ctx, "The first child exited with an error\n");
979 goto ERROR;
980 }
981
982 close(ctx->pidfd1);
983 ctx->pidfd1 = -1;
984
985 continue;
986
987 // Monitor the second child process
988 } else if (fd == ctx->pidfd2) {
989 exit = pakfire_jail_wait_on_child(jail, ctx->pidfd2);
990 if (exit < 0) {
991 CTX_ERROR(jail->ctx, "The second child exited with an error\n");
e068b964
MT
992 goto ERROR;
993 }
d853213d 994
9b171c6a
MT
995 close(ctx->pidfd2);
996 ctx->pidfd2 = -1;
997
e068b964
MT
998 // Mark that we have ended so that we will process the remaining
999 // events from epoll() now, but won't restart the outer loop.
1000 ended = 1;
9b171c6a 1001
e068b964 1002 continue;
d853213d 1003
335b8a44
MT
1004 // Handle timer events
1005 } else if (fd == timerfd) {
1006 DEBUG(jail->pakfire, "Timer event received\n");
1007
1008 // Disarm the timer
1009 r = read(timerfd, garbage, sizeof(garbage));
1010 if (r < 1) {
1011 ERROR(jail->pakfire, "Could not disarm timer: %m\n");
1012 r = 1;
1013 goto ERROR;
1014 }
1015
1016 // Terminate the process if it hasn't already ended
1017 if (!ended) {
1018 DEBUG(jail->pakfire, "Terminating process...\n");
1019
1020 // Send SIGTERM to the process
9b171c6a 1021 r = pidfd_send_signal(ctx->pidfd2, SIGKILL, NULL, 0);
335b8a44
MT
1022 if (r) {
1023 ERROR(jail->pakfire, "Could not kill process: %m\n");
1024 goto ERROR;
1025 }
1026 }
1027
1028 // There is nothing else to do
1029 continue;
1030
9b171c6a
MT
1031 // Handle socket messages
1032 } else if (fd == socket_recv) {
1033 // Receive the FD of the second child process
1034 r = pakfire_jail_recv_fd(jail, socket_recv, &ctx->pidfd2);
1035 if (r)
1036 goto ERROR;
1037
1038 // Add it to the event loop
1039 r = pakfire_jail_epoll_add_fd(jail, epollfd, ctx->pidfd2, EPOLLIN);
1040 if (r)
1041 goto ERROR;
1042
1043 // Setup the child process
1044 r = pakfire_jail_setup_child2(jail, ctx);
1045 if (r)
1046 goto ERROR;
1047
1048 // Don't fall through to log processing
1049 continue;
47d2da8e 1050
e068b964
MT
1051 // Handle logging messages
1052 } else if (fd == log_INFO) {
1053 buffer = &ctx->buffers.log_INFO;
1054 priority = LOG_INFO;
e33387d3 1055
19e50d86 1056 callback = pakfire_jail_log;
e33387d3 1057
e068b964
MT
1058 } else if (fd == log_ERROR) {
1059 buffer = &ctx->buffers.log_ERROR;
1060 priority = LOG_ERR;
e33387d3 1061
19e50d86 1062 callback = pakfire_jail_log;
e33387d3 1063
6ac51607 1064#ifdef ENABLE_DEBUG
e068b964
MT
1065 } else if (fd == log_DEBUG) {
1066 buffer = &ctx->buffers.log_DEBUG;
1067 priority = LOG_DEBUG;
e33387d3 1068
19e50d86 1069 callback = pakfire_jail_log;
6ac51607 1070#endif /* ENABLE_DEBUG */
e33387d3 1071
e068b964
MT
1072 // Handle anything from the log pipes
1073 } else if (fd == stdout) {
1074 buffer = &ctx->buffers.stdout;
1075 priority = LOG_INFO;
616f1fca 1076
2dc104da
MT
1077 // Send any output to the default logger if no callback is set
1078 if (ctx->communicate.out) {
1079 callback = ctx->communicate.out;
1080 data = ctx->communicate.data;
1081 } else {
1082 callback = jail->callbacks.log;
1083 data = jail->callbacks.log_data;
1084 }
e33387d3 1085
e068b964
MT
1086 } else if (fd == stderr) {
1087 buffer = &ctx->buffers.stderr;
1088 priority = LOG_ERR;
616f1fca 1089
2dc104da
MT
1090 // Send any output to the default logger if no callback is set
1091 if (ctx->communicate.out) {
1092 callback = ctx->communicate.out;
1093 data = ctx->communicate.data;
1094 } else {
1095 callback = jail->callbacks.log;
1096 data = jail->callbacks.log_data;
1097 }
e33387d3 1098
e068b964
MT
1099 } else {
1100 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
1101 continue;
1102 }
1103
1104 // Handle log event
1105 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
1106 if (r)
1107 goto ERROR;
616f1fca
MT
1108 }
1109
06b864ae
MT
1110 if (e & EPOLLOUT) {
1111 // Handle standard input
1112 if (fd == stdin) {
1113 r = pakfire_jail_stream_stdin(jail, ctx, fd);
1114 if (r) {
1115 switch (errno) {
1116 // Ignore if we filled up the buffer
1117 case EAGAIN:
1118 break;
1119
1120 default:
1121 ERROR(jail->pakfire, "Could not write to stdin: %m\n");
1122 goto ERROR;
1123 }
1124 }
1125 }
1126 }
1127
e068b964
MT
1128 // Check if any file descriptors have been closed
1129 if (e & EPOLLHUP) {
1130 // Remove the file descriptor
1131 r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
1132 if (r) {
1133 ERROR(jail->pakfire, "Could not remove closed file-descriptor %d: %m\n", fd);
1134 goto ERROR;
1135 }
1136 }
616f1fca
MT
1137 }
1138 }
1139
9b171c6a
MT
1140 // Return the exit code
1141 r = exit;
1142
d853213d 1143ERROR:
9b171c6a
MT
1144 CTX_DEBUG(jail->ctx, "Main loop terminated\n");
1145
dd2d7dbb 1146 if (epollfd >= 0)
616f1fca 1147 close(epollfd);
dd2d7dbb 1148 if (timerfd >= 0)
335b8a44 1149 close(timerfd);
616f1fca
MT
1150
1151 return r;
1152}
1153
ccdd2e95
MT
1154int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data,
1155 int priority, const char* line, size_t length) {
12b9b39f
MT
1156 char** output = (char**)data;
1157 int r;
0de6bb30 1158
2015cb92
MT
1159 // Append everything from stdout to a buffer
1160 if (output && priority == LOG_INFO) {
12b9b39f
MT
1161 r = asprintf(output, "%s%s", (output && *output) ? *output : "", line);
1162 if (r < 0)
0de6bb30 1163 return 1;
0de6bb30
MT
1164 return 0;
1165 }
1166
1167 // Send everything else to the default logger
1168 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
1169}
1170
980b15af
MT
1171// Capabilities
1172
e6791c52
MT
1173// Logs all capabilities of the current process
1174static int pakfire_jail_show_capabilities(struct pakfire_jail* jail) {
1175 cap_t caps = NULL;
1176 char* name = NULL;
1177 cap_flag_value_t value_e;
1178 cap_flag_value_t value_i;
1179 cap_flag_value_t value_p;
1180 int r;
980b15af 1181
e6791c52
MT
1182 // Fetch PID
1183 pid_t pid = getpid();
980b15af 1184
e6791c52
MT
1185 // Fetch all capabilities
1186 caps = cap_get_proc();
1187 if (!caps) {
1188 ERROR(jail->pakfire, "Could not fetch capabilities: %m\n");
1189 r = 1;
1190 goto ERROR;
1191 }
980b15af 1192
e6791c52 1193 DEBUG(jail->pakfire, "Capabilities of PID %d:\n", pid);
980b15af 1194
e6791c52
MT
1195 // Iterate over all capabilities
1196 for (unsigned int cap = 0; cap_valid(cap); cap++) {
1197 name = cap_to_name(cap);
980b15af 1198
e6791c52
MT
1199 // Fetch effective value
1200 r = cap_get_flag(caps, cap, CAP_EFFECTIVE, &value_e);
1201 if (r)
1202 goto ERROR;
980b15af 1203
e6791c52
MT
1204 // Fetch inheritable value
1205 r = cap_get_flag(caps, cap, CAP_INHERITABLE, &value_i);
1206 if (r)
1207 goto ERROR;
980b15af 1208
e6791c52
MT
1209 // Fetch permitted value
1210 r = cap_get_flag(caps, cap, CAP_PERMITTED, &value_p);
1211 if (r)
1212 goto ERROR;
980b15af 1213
e6791c52
MT
1214 DEBUG(jail->pakfire,
1215 " %-24s : %c%c%c\n",
1216 name,
1217 (value_e == CAP_SET) ? 'e' : '-',
1218 (value_i == CAP_SET) ? 'i' : '-',
1219 (value_p == CAP_SET) ? 'p' : '-'
1220 );
980b15af 1221
e6791c52
MT
1222 // Free name
1223 cap_free(name);
1224 name = NULL;
1225 }
980b15af 1226
e6791c52
MT
1227 // Success
1228 r = 0;
980b15af 1229
e6791c52
MT
1230ERROR:
1231 if (name)
1232 cap_free(name);
1233 if (caps)
1234 cap_free(caps);
980b15af 1235
e6791c52
MT
1236 return r;
1237}
980b15af 1238
e6791c52
MT
1239static int pakfire_jail_set_capabilities(struct pakfire_jail* jail) {
1240 cap_t caps = NULL;
1241 char* name = NULL;
1242 int r;
980b15af 1243
e6791c52
MT
1244 // Fetch capabilities
1245 caps = cap_get_proc();
1246 if (!caps) {
1247 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
1248 r = 1;
1249 goto ERROR;
1250 }
980b15af 1251
e6791c52
MT
1252 // Walk through all capabilities
1253 for (cap_value_t cap = 0; cap_valid(cap); cap++) {
1254 cap_value_t _caps[] = { cap };
980b15af 1255
e6791c52
MT
1256 // Fetch the name of the capability
1257 name = cap_to_name(cap);
980b15af 1258
e6791c52
MT
1259 r = cap_set_flag(caps, CAP_EFFECTIVE, 1, _caps, CAP_SET);
1260 if (r) {
1261 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1262 goto ERROR;
1263 }
980b15af 1264
e6791c52 1265 r = cap_set_flag(caps, CAP_INHERITABLE, 1, _caps, CAP_SET);
980b15af 1266 if (r) {
e6791c52
MT
1267 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1268 goto ERROR;
980b15af
MT
1269 }
1270
e6791c52
MT
1271 r = cap_set_flag(caps, CAP_PERMITTED, 1, _caps, CAP_SET);
1272 if (r) {
1273 ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1274 goto ERROR;
1275 }
980b15af 1276
e6791c52
MT
1277 // Free name
1278 cap_free(name);
1279 name = NULL;
980b15af
MT
1280 }
1281
e6791c52
MT
1282 // Restore all capabilities
1283 r = cap_set_proc(caps);
980b15af 1284 if (r) {
e6791c52 1285 ERROR(jail->pakfire, "Restoring capabilities failed: %m\n");
980b15af
MT
1286 goto ERROR;
1287 }
1288
e6791c52
MT
1289 // Add all capabilities to the ambient set
1290 for (unsigned int cap = 0; cap_valid(cap); cap++) {
1291 name = cap_to_name(cap);
1292
1293 // Raise the capability
1294 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0);
1295 if (r) {
1296 ERROR(jail->pakfire, "Could not set ambient capability %s: %m\n", name);
1297 goto ERROR;
1298 }
1299
1300 // Free name
1301 cap_free(name);
1302 name = NULL;
980b15af
MT
1303 }
1304
e6791c52
MT
1305 // Success
1306 r = 0;
1307
980b15af 1308ERROR:
e6791c52
MT
1309 if (name)
1310 cap_free(name);
980b15af
MT
1311 if (caps)
1312 cap_free(caps);
1313
1314 return r;
1315}
1316
739d5b57
MT
1317// Syscall Filter
1318
1319static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
1320 const int syscalls[] = {
1321 // The kernel's keyring isn't namespaced
1322 SCMP_SYS(keyctl),
1323 SCMP_SYS(add_key),
1324 SCMP_SYS(request_key),
1325
1326 // Disable userfaultfd
1327 SCMP_SYS(userfaultfd),
1328
1329 // Disable perf which could leak a lot of information about the host
1330 SCMP_SYS(perf_event_open),
1331
1332 0,
1333 };
1334 int r = 1;
1335
1336 DEBUG(jail->pakfire, "Applying syscall filter...\n");
1337
1338 // Setup a syscall filter which allows everything by default
1339 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
1340 if (!ctx) {
1341 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
1342 goto ERROR;
1343 }
1344
1345 // All all syscalls
1346 for (const int* syscall = syscalls; *syscall; syscall++) {
1347 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
1348 if (r) {
1349 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
1350 goto ERROR;
1351 }
1352 }
1353
1354 // Load syscall filter into the kernel
1355 r = seccomp_load(ctx);
1356 if (r) {
1357 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
1358 goto ERROR;
1359 }
1360
1361ERROR:
1362 if (ctx)
1363 seccomp_release(ctx);
1364
1365 return r;
1366}
1367
cc6e2264
MT
1368// Mountpoints
1369
061223f7 1370PAKFIRE_EXPORT int pakfire_jail_bind(struct pakfire_jail* jail,
cc6e2264
MT
1371 const char* source, const char* target, int flags) {
1372 struct pakfire_jail_mountpoint* mp = NULL;
1373 int r;
1374
1375 // Check if there is any space left
1376 if (jail->num_mountpoints >= MAX_MOUNTPOINTS) {
1377 errno = ENOSPC;
1378 return 1;
1379 }
1380
1381 // Check for valid inputs
1382 if (!source || !target) {
1383 errno = EINVAL;
1384 return 1;
1385 }
1386
1387 // Select the next free slot
1388 mp = &jail->mountpoints[jail->num_mountpoints];
1389
1390 // Copy source
1391 r = pakfire_string_set(mp->source, source);
a60955af 1392 if (r) {
cc6e2264 1393 ERROR(jail->pakfire, "Could not copy source: %m\n");
a60955af 1394 return r;
cc6e2264
MT
1395 }
1396
1397 // Copy target
1398 r = pakfire_string_set(mp->target, target);
a60955af 1399 if (r) {
cc6e2264 1400 ERROR(jail->pakfire, "Could not copy target: %m\n");
a60955af 1401 return r;
cc6e2264
MT
1402 }
1403
1404 // Copy flags
1405 mp->flags = flags;
1406
1407 // Increment counter
1408 jail->num_mountpoints++;
1409
1410 return 0;
1411}
1412
7bdf1d8e
MT
1413static int pakfire_jail_mount_networking(struct pakfire_jail* jail) {
1414 int r;
1415
1416 const char* paths[] = {
1417 "/etc/hosts",
1418 "/etc/resolv.conf",
1419 NULL,
1420 };
1421
1422 // Bind-mount all paths read-only
1423 for (const char** path = paths; *path; path++) {
1424 r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
693a979a
MT
1425 if (r) {
1426 switch (errno) {
1427 // Ignore if we don't have permission
1428 case EPERM:
1429 continue;
1430
1431 default:
1432 break;
1433 }
7bdf1d8e 1434 return r;
693a979a 1435 }
7bdf1d8e
MT
1436 }
1437
1438 return 0;
1439}
1440
cc6e2264
MT
1441/*
1442 Mounts everything that we require in the new namespace
1443*/
7bdf1d8e 1444static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
cc6e2264 1445 struct pakfire_jail_mountpoint* mp = NULL;
282b732a 1446 int flags = 0;
cc6e2264
MT
1447 int r;
1448
282b732a
MT
1449 // Enable loop devices
1450 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_LOOP_DEVICES))
1451 flags |= PAKFIRE_MOUNT_LOOP_DEVICES;
1452
cc6e2264 1453 // Mount all default stuff
282b732a 1454 r = pakfire_mount_all(jail->pakfire, flags);
cc6e2264
MT
1455 if (r)
1456 return r;
1457
7bdf1d8e
MT
1458 // Mount networking stuff
1459 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1460 r = pakfire_jail_mount_networking(jail);
1461 if (r)
1462 return r;
1463 }
1464
cc6e2264
MT
1465 // Mount all custom stuff
1466 for (unsigned int i = 0; i < jail->num_mountpoints; i++) {
1467 // Fetch mountpoint
1468 mp = &jail->mountpoints[i];
1469
1470 // Mount it
1471 r = pakfire_bind(jail->pakfire, mp->source, mp->target, mp->flags);
1472 if (r)
1473 return r;
1474 }
1475
1476 // Log all mountpoints
1477 pakfire_mount_list(jail->pakfire);
1478
1479 return 0;
1480}
1481
fec79a33
MT
1482// Networking
1483
1484static int pakfire_jail_setup_loopback(struct pakfire_jail* jail) {
1485 struct nl_sock* nl = NULL;
1486 struct nl_cache* cache = NULL;
1487 struct rtnl_link* link = NULL;
1488 struct rtnl_link* change = NULL;
1489 int r;
1490
1491 DEBUG(jail->pakfire, "Setting up loopback...\n");
1492
1493 // Allocate a netlink socket
1494 nl = nl_socket_alloc();
1495 if (!nl) {
1496 ERROR(jail->pakfire, "Could not allocate a netlink socket: %m\n");
1497 r = 1;
1498 goto ERROR;
1499 }
1500
1501 // Connect the socket
1502 r = nl_connect(nl, NETLINK_ROUTE);
1503 if (r) {
1504 ERROR(jail->pakfire, "Could not connect netlink socket: %s\n", nl_geterror(r));
1505 goto ERROR;
1506 }
1507
1508 // Allocate the netlink cache
1509 r = rtnl_link_alloc_cache(nl, AF_UNSPEC, &cache);
1510 if (r < 0) {
1511 ERROR(jail->pakfire, "Unable to allocate netlink cache: %s\n", nl_geterror(r));
1512 goto ERROR;
1513 }
1514
1515 // Fetch loopback interface
1516 link = rtnl_link_get_by_name(cache, "lo");
1517 if (!link) {
1518 ERROR(jail->pakfire, "Could not find lo interface. Ignoring.\n");
1519 r = 0;
1520 goto ERROR;
1521 }
1522
1523 // Allocate a new link
1524 change = rtnl_link_alloc();
1525 if (!change) {
1526 ERROR(jail->pakfire, "Could not allocate change link\n");
1527 r = 1;
1528 goto ERROR;
1529 }
1530
1531 // Set the link to UP
1532 rtnl_link_set_flags(change, IFF_UP);
1533
1534 // Apply any changes
1535 r = rtnl_link_change(nl, link, change, 0);
1536 if (r) {
1537 ERROR(jail->pakfire, "Unable to activate loopback: %s\n", nl_geterror(r));
1538 goto ERROR;
1539 }
1540
1541 // Success
1542 r = 0;
1543
1544ERROR:
1545 if (nl)
1546 nl_socket_free(nl);
1547
1548 return r;
1549}
1550
679ee2fa
MT
1551// UID/GID Mapping
1552
679ee2fa
MT
1553static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
1554 char path[PATH_MAX];
1555 int r;
1556
4896e62c
MT
1557 // Skip mapping anything when running on /
1558 if (pakfire_on_root(jail->pakfire))
1559 return 0;
0f7f068b 1560
abe4ee37
MT
1561 // Make path
1562 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
1563 if (r)
1564 return r;
1565
1566 // Fetch UID
1567 const uid_t uid = pakfire_uid(jail->pakfire);
1568
4896e62c 1569 // Fetch SUBUID
a1ff2863 1570 const struct pakfire_subid* subuid = pakfire_subuid(jail->pakfire);
4896e62c
MT
1571 if (!subuid)
1572 return 1;
679ee2fa 1573
abe4ee37 1574 /* When running as root, we will map the entire range.
679ee2fa 1575
abe4ee37
MT
1576 When running as a non-privileged user, we will map the root user inside the jail
1577 to the user's UID outside of the jail, and we will map the rest starting from one.
1578 */
679ee2fa 1579
abe4ee37
MT
1580 // Running as root
1581 if (uid == 0) {
1582 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1583 "0 %lu %lu\n", subuid->id, subuid->length);
1584 } else {
1585 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
b64888fa 1586 "0 %lu 1\n1 %lu %lu\n", uid, subuid->id, subuid->length);
abe4ee37
MT
1587 }
1588
1589 if (r) {
1590 ERROR(jail->pakfire, "Could not map UIDs: %m\n");
1591 return r;
1592 }
1593
1594 return r;
679ee2fa
MT
1595}
1596
1597static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
1598 char path[PATH_MAX];
1599 int r;
1600
4896e62c
MT
1601 // Skip mapping anything when running on /
1602 if (pakfire_on_root(jail->pakfire))
1603 return 0;
0f7f068b 1604
abe4ee37
MT
1605 // Fetch GID
1606 const gid_t gid = pakfire_gid(jail->pakfire);
1607
4896e62c 1608 // Fetch SUBGID
a1ff2863 1609 const struct pakfire_subid* subgid = pakfire_subgid(jail->pakfire);
4896e62c
MT
1610 if (!subgid)
1611 return 1;
679ee2fa
MT
1612
1613 // Make path
1614 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
a60955af
MT
1615 if (r)
1616 return r;
679ee2fa 1617
abe4ee37
MT
1618 // Running as root
1619 if (gid == 0) {
1620 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1621 "0 %lu %lu\n", subgid->id, subgid->length);
1622 } else {
1623 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
b9a1d857 1624 "0 %lu 1\n1 %lu %lu\n", gid, subgid->id, subgid->length);
abe4ee37 1625 }
679ee2fa 1626
abe4ee37
MT
1627 if (r) {
1628 ERROR(jail->pakfire, "Could not map GIDs: %m\n");
1629 return r;
1630 }
1631
1632 return r;
679ee2fa
MT
1633}
1634
78d7488a
MT
1635static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
1636 char path[PATH_MAX];
1637 int r = 1;
1638
1639 // Make path
1640 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
a60955af
MT
1641 if (r)
1642 return r;
78d7488a
MT
1643
1644 // Open file for writing
1645 FILE* f = fopen(path, "w");
1646 if (!f) {
1647 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
1648 goto ERROR;
1649 }
1650
1651 // Write content
1652 int bytes_written = fprintf(f, "deny\n");
1653 if (bytes_written <= 0) {
1654 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
1655 goto ERROR;
1656 }
1657
1658 r = fclose(f);
1659 f = NULL;
1660 if (r) {
1661 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
1662 goto ERROR;
1663 }
1664
1665ERROR:
1666 if (f)
1667 fclose(f);
1668
1669 return r;
1670}
1671
43dc0e16 1672static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
743f449e
MT
1673 const uint64_t val = 1;
1674 int r = 0;
43dc0e16
MT
1675
1676 DEBUG(jail->pakfire, "Sending signal...\n");
1677
743f449e 1678 // Write to the file descriptor
a87c52e2
MT
1679 r = eventfd_write(fd, val);
1680 if (r < 0) {
1681 ERROR(jail->pakfire, "Could not send signal: %s\n", strerror(errno));
1682 r = -errno;
743f449e
MT
1683 }
1684
1685 // Close the file descriptor
43dc0e16
MT
1686 close(fd);
1687
743f449e 1688 return r;
43dc0e16
MT
1689}
1690
1691static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
743f449e
MT
1692 uint64_t val = 0;
1693 int r = 0;
43dc0e16
MT
1694
1695 DEBUG(jail->pakfire, "Waiting for signal...\n");
1696
a87c52e2
MT
1697 r = eventfd_read(fd, &val);
1698 if (r < 0) {
1699 ERROR(jail->pakfire, "Error waiting for signal: %s\n", strerror(errno));
1700 r = -errno;
743f449e
MT
1701 }
1702
1703 // Close the file descriptor
43dc0e16
MT
1704 close(fd);
1705
743f449e 1706 return r;
43dc0e16
MT
1707}
1708
14df7388
MT
1709static int pakfire_jail_switch_root(struct pakfire_jail* jail, const char* root) {
1710 int r;
1711
1712 // Change to the new root
1713 r = chdir(root);
1714 if (r) {
1715 ERROR(jail->pakfire, "chdir(%s) failed: %m\n", root);
1716 return r;
1717 }
1718
1719 // Switch Root!
1720 r = pivot_root(".", ".");
1721 if (r) {
1722 ERROR(jail->pakfire, "Failed changing into the new root directory %s: %m\n", root);
1723 return r;
1724 }
1725
1726 // Umount the old root
1727 r = umount2(".", MNT_DETACH);
1728 if (r) {
1729 ERROR(jail->pakfire, "Could not umount the old root filesystem: %m\n");
1730 return r;
1731 }
1732
1733 return 0;
1734}
1735
9b171c6a
MT
1736/*
1737 Called by the parent that sets up the second child process...
1738*/
1739static int pakfire_jail_setup_child2(
1740 struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1741 pid_t pid = -1;
43dc0e16
MT
1742 int r;
1743
9b171c6a
MT
1744 // Fetch the PID
1745 r = pidfd_get_pid(ctx->pidfd2, &pid);
1746 if (r) {
1747 CTX_ERROR(jail->ctx, "Could not fetch PID: %s\n", strerror(-r));
1748 return r;
1749 }
0bd84dc1 1750
9b171c6a
MT
1751 // Setup UID mapping
1752 r = pakfire_jail_setup_uid_mapping(jail, pid);
1753 if (r)
1754 return r;
cf440db8 1755
9b171c6a
MT
1756 // Write "deny" to /proc/PID/setgroups
1757 r = pakfire_jail_setgroups(jail, pid);
1758 if (r)
1759 return r;
0bd84dc1 1760
9b171c6a
MT
1761 // Setup GID mapping
1762 r = pakfire_jail_setup_gid_mapping(jail, pid);
1763 if (r)
1764 return r;
1765
1766 // Parent has finished initialisation
1767 DEBUG(jail->pakfire, "Parent has finished initialization\n");
1768
1769 // Send signal to client
1770 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
43dc0e16
MT
1771 if (r)
1772 return r;
1773
9b171c6a
MT
1774 return 0;
1775}
1776
1777/*
1778 Child 2 is launched in their own user/mount/etc. namespace.
1779*/
1780static int pakfire_jail_child2(struct pakfire_jail* jail,
1781 struct pakfire_jail_exec* ctx, const char* argv[]) {
1782 int r;
1783
1784 // Fetch my own PID
1785 pid_t pid = getpid();
1786
1787 CTX_DEBUG(jail->ctx, "Launched child process in jail with PID %d\n", pid);
1788
90503c53
MT
1789 // Make this process dumpable
1790 r = prctl (PR_SET_DUMPABLE, 1, 0, 0, 0);
1791 if (r) {
9b171c6a 1792 CTX_ERROR(jail->ctx, "Could not make the process dumpable: %m\n");
90503c53
MT
1793 return 126;
1794 }
1795
1796 // Don't drop any capabilities on setuid()
1797 r = prctl(PR_SET_KEEPCAPS, 1);
1798 if (r) {
9b171c6a 1799 CTX_ERROR(jail->ctx, "Could not set PR_SET_KEEPCAPS: %m\n");
90503c53
MT
1800 return 126;
1801 }
4f59c39b 1802
9b171c6a
MT
1803 // Wait for the parent to finish initialization
1804 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1805 if (r)
1806 return r;
1807
4f59c39b
MT
1808 // Fetch UID/GID
1809 uid_t uid = getuid();
1810 gid_t gid = getgid();
1811
1812 // Fetch EUID/EGID
1813 uid_t euid = geteuid();
1814 gid_t egid = getegid();
1815
a8a41064
MT
1816 DEBUG(jail->pakfire, " UID: %u (effective %u)\n", uid, euid);
1817 DEBUG(jail->pakfire, " GID: %u (effective %u)\n", gid, egid);
4f59c39b 1818
9b171c6a
MT
1819 // Fail if we are not PID 1
1820 if (pid != 1) {
1821 CTX_ERROR(jail->ctx, "Child process is not PID 1\n");
3a9f6931 1822 return 126;
9b171c6a
MT
1823 }
1824
1825 // Fail if we are not running as root
4f719e21 1826 if (uid || gid || euid || egid) {
4f59c39b 1827 ERROR(jail->pakfire, "Child process is not running as root\n");
3a9f6931 1828 return 126;
4f59c39b
MT
1829 }
1830
652f2a99 1831 const char* arch = pakfire_get_effective_arch(jail->pakfire);
4f59c39b 1832
90d92b5c
MT
1833 // Set personality
1834 unsigned long persona = pakfire_arch_personality(arch);
1835 if (persona) {
1836 r = personality(persona);
1837 if (r < 0) {
1838 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
9b171c6a 1839 return 126;
90d92b5c
MT
1840 }
1841 }
1842
fec79a33
MT
1843 // Setup networking
1844 if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1845 r = pakfire_jail_setup_loopback(jail);
1846 if (r)
1847 return 1;
1848 }
1849
cf440db8
MT
1850 // Set nice level
1851 if (jail->nice) {
1852 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1853
1854 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1855 if (r) {
1856 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1857 return 1;
1858 }
1859 }
1860
e33387d3
MT
1861 // Close other end of log pipes
1862 close(ctx->pipes.log_INFO[0]);
1863 close(ctx->pipes.log_ERROR[0]);
1864#ifdef ENABLE_DEBUG
1865 close(ctx->pipes.log_DEBUG[0]);
1866#endif /* ENABLE_DEBUG */
1867
2015cb92 1868 // Connect standard input
bca69274 1869 if (ctx->pipes.stdin[0] >= 0) {
2015cb92
MT
1870 r = dup2(ctx->pipes.stdin[0], STDIN_FILENO);
1871 if (r < 0) {
1872 ERROR(jail->pakfire, "Could not connect fd %d to stdin: %m\n",
1873 ctx->pipes.stdin[0]);
1874
1875 return 1;
1876 }
1877 }
1878
7ebfb7cb 1879 // Connect standard output and error
bca69274 1880 if (ctx->pipes.stdout[1] >= 0 && ctx->pipes.stderr[1] >= 0) {
7ebfb7cb
MT
1881 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1882 if (r < 0) {
1883 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1884 ctx->pipes.stdout[1]);
1885
1886 return 1;
1887 }
1888
1889 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1890 if (r < 0) {
1891 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1892 ctx->pipes.stderr[1]);
1893
1894 return 1;
1895 }
1896
195fe455 1897 // Close the pipe (as we have moved the original file descriptors)
2015cb92 1898 pakfire_jail_close_pipe(jail, ctx->pipes.stdin);
195fe455
MT
1899 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1900 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
7ebfb7cb
MT
1901 }
1902
007bc66c
MT
1903 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1904 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1905 if (r)
1906 return r;
1907
e6791c52
MT
1908 // Set capabilities
1909 r = pakfire_jail_set_capabilities(jail);
1910 if (r)
1911 return r;
1912
1913 // Show capabilities
1914 r = pakfire_jail_show_capabilities(jail);
980b15af
MT
1915 if (r)
1916 return r;
1917
739d5b57
MT
1918 // Filter syscalls
1919 r = pakfire_jail_limit_syscalls(jail);
1920 if (r)
1921 return r;
1922
9b171c6a
MT
1923 CTX_DEBUG(jail->ctx, "Child process initialization done\n");
1924 CTX_DEBUG(jail->ctx, "Launching command:\n");
2015cb92
MT
1925
1926 // Log argv
1927 for (unsigned int i = 0; argv[i]; i++)
9b171c6a 1928 CTX_DEBUG(jail->ctx, " argv[%u] = %s\n", i, argv[i]);
2015cb92 1929
b3498aeb
MT
1930 // exec() command
1931 r = execvpe(argv[0], (char**)argv, jail->env);
9fa1afb6
MT
1932 if (r < 0) {
1933 // Translate errno into regular exit code
1934 switch (errno) {
1935 case ENOENT:
1936 // Ignore if the command doesn't exist
1937 if (ctx->flags & PAKFIRE_JAIL_NOENT_OK)
1938 r = 0;
1939 else
1940 r = 127;
b3498aeb 1941
9fa1afb6 1942 break;
b3498aeb 1943
9fa1afb6
MT
1944 default:
1945 r = 1;
1946 }
1947
9b171c6a 1948 CTX_ERROR(jail->ctx, "Could not execve(%s): %m\n", argv[0]);
b3498aeb
MT
1949 }
1950
1951 // We should not get here
1952 return r;
0bd84dc1
MT
1953}
1954
9b171c6a
MT
1955/*
1956 Child 1 is launched in a new mount namespace...
1957*/
1958static int pakfire_jail_child1(struct pakfire_jail* jail,
1959 struct pakfire_jail_exec* ctx, const char* argv[]) {
1960 int r;
1961
1962 // Redirect any logging to our log pipe
1963 pakfire_ctx_set_log_callback(jail->ctx, pakfire_jail_log_redirect, &ctx->pipes);
1964
1965 CTX_DEBUG(jail->ctx, "First child process launched\n");
1966
1967 const int socket_send = pakfire_jail_get_pipe_to_write(jail, &ctx->socket);
1968
1969 const char* root = pakfire_get_path(jail->pakfire);
1970
1971 // Die with parent
1972 r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
1973 if (r) {
1974 CTX_ERROR(jail->ctx, "Could not configure to die with parent: %s\n", strerror(errno));
1975 goto ERROR;
1976 }
1977
1978 // Change mount propagation so that we will receive, but don't propagate back
1979 r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SLAVE);
1980 if (r) {
1981 CTX_ERROR(jail->ctx, "Could not change mount propagation to SLAVE: %s\n", strerror(r));
1982 goto ERROR;
1983 }
1984
1985 // Make root a mountpoint in the new mount namespace
1986 r = pakfire_mount_make_mounpoint(jail->pakfire, root);
1987 if (r)
1988 goto ERROR;
1989
1990 // Make everything private
1991 r = pakfire_mount_change_propagation(jail->ctx, root, MS_PRIVATE);
1992 if (r) {
1993 CTX_ERROR(jail->ctx, "Could not change mount propagation to PRIVATE: %s\n", strerror(r));
1994 goto ERROR;
1995 }
1996
1997 // Mount everything
1998 r = pakfire_jail_mount(jail, ctx);
1999 if (r)
2000 goto ERROR;
2001
2002 // chroot()
2003 r = pakfire_jail_switch_root(jail, root);
2004 if (r)
2005 goto ERROR;
2006
2007 // Change mount propagation so that we will propagate everything down
2008 r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SHARED);
2009 if (r) {
2010 CTX_ERROR(jail->ctx, "Could not change mount propagation to SHARED: %s\n", strerror(r));
2011 goto ERROR;
2012 }
2013
2014 // Configure child process
2015 struct clone_args args = {
2016 .flags =
2017 CLONE_NEWCGROUP |
2018 CLONE_NEWIPC |
2019 CLONE_NEWNS |
2020 CLONE_NEWPID |
2021 CLONE_NEWTIME |
2022 CLONE_NEWUSER |
2023 CLONE_NEWUTS |
2024 CLONE_PIDFD,
2025 .exit_signal = SIGCHLD,
2026 .pidfd = (long long unsigned int)&ctx->pidfd2,
2027 };
2028
2029 // Launch the process into the configured cgroup
2030 if (ctx->cgroup) {
2031 args.flags |= CLONE_INTO_CGROUP;
2032
2033 // Clone into this cgroup
2034 args.cgroup = pakfire_cgroup_fd(ctx->cgroup);
2035 }
2036
2037 // Setup networking
2038 if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING))
2039 args.flags |= CLONE_NEWNET;
2040
2041 // Fork the second child process
2042 pid_t pid = clone3(&args, sizeof(args));
2043 if (pid < 0) {
2044 CTX_ERROR(jail->ctx, "Could not fork the first child process: %s\n", strerror(errno));
2045 r = -errno;
2046 goto ERROR;
2047
2048 // Child process
2049 } else if (pid == 0) {
2050 r = pakfire_jail_child2(jail, ctx, argv);
2051 _exit(r);
2052 }
2053
2054 // Send the pidfd of the child to the first parent
2055 r = pakfire_jail_send_fd(jail, socket_send, ctx->pidfd2);
2056 if (r)
2057 goto ERROR;
2058
2059ERROR:
2060 return r;
2061}
2062
9f50bf71 2063// Run a command in the jail
db4f234f 2064static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
2015cb92
MT
2065 const int interactive,
2066 pakfire_jail_communicate_in communicate_in,
2067 pakfire_jail_communicate_out communicate_out,
9fa1afb6 2068 void* data, int flags) {
0bd84dc1
MT
2069 int r;
2070
b3498aeb
MT
2071 // Check if argv is valid
2072 if (!argv || !argv[0]) {
2073 errno = EINVAL;
2074 return -1;
2075 }
2076
616f1fca
MT
2077 // Initialize context for this call
2078 struct pakfire_jail_exec ctx = {
9fa1afb6 2079 .flags = flags,
7bdf1d8e 2080
9b171c6a
MT
2081 .socket = { -1, -1 },
2082
616f1fca 2083 .pipes = {
4c3bab92
MT
2084 .stdin = { -1, -1 },
2085 .stdout = { -1, -1 },
2086 .stderr = { -1, -1 },
2087 .log_INFO = { -1, -1 },
2088 .log_ERROR = { -1, -1 },
6ac51607 2089#ifdef ENABLE_DEBUG
4c3bab92 2090 .log_DEBUG = { -1, -1 },
6ac51607 2091#endif /* ENABLE_DEBUG */
2015cb92
MT
2092 },
2093
2094 .communicate = {
2095 .in = communicate_in,
2096 .out = communicate_out,
2097 .data = data,
616f1fca 2098 },
d2eaf8dc 2099
9b171c6a
MT
2100 // PIDs
2101 .pidfd1 = -1,
2102 .pidfd2 = -1,
616f1fca
MT
2103 };
2104
0bd84dc1
MT
2105 DEBUG(jail->pakfire, "Executing jail...\n");
2106
9b171c6a
MT
2107 // Become the subreaper
2108 r = prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0);
2109 if (r < 0) {
2110 CTX_ERROR(jail->ctx, "Failed to become the sub-reaper: %s\n", strerror(errno));
2111 r = -errno;
2112 goto ERROR;
2113 }
2114
7bdf1d8e
MT
2115 // Enable networking in interactive mode
2116 if (interactive)
2117 ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
2118
9b171c6a
MT
2119 // Create a UNIX domain socket
2120 r = socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, ctx.socket);
2121 if (r < 0) {
2122 CTX_ERROR(jail->ctx, "Could not create UNIX socket: %s\n", strerror(errno));
2123 r = -errno;
2124 goto ERROR;
2125 }
2126
43dc0e16
MT
2127 /*
2128 Setup a file descriptor which can be used to notify the client that the parent
2129 has completed configuration.
2130 */
f7d240a7
MT
2131 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
2132 if (ctx.completed_fd < 0) {
43dc0e16
MT
2133 ERROR(jail->pakfire, "eventfd() failed: %m\n");
2134 return -1;
2135 }
2136
616f1fca 2137 // Create pipes to communicate with child process if we are not running interactively
58963c75 2138 if (!interactive) {
2015cb92
MT
2139 // stdin (only if callback is set)
2140 if (ctx.communicate.in) {
2141 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdin, 0);
2142 if (r)
2143 goto ERROR;
2144 }
2145
616f1fca 2146 // stdout
e33387d3
MT
2147 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
2148 if (r)
616f1fca 2149 goto ERROR;
616f1fca
MT
2150
2151 // stderr
e33387d3
MT
2152 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
2153 if (r)
616f1fca 2154 goto ERROR;
616f1fca
MT
2155 }
2156
e33387d3
MT
2157 // Setup pipes for logging
2158 // INFO
2159 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
2160 if (r)
2161 goto ERROR;
2162
2163 // ERROR
2164 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
2165 if (r)
2166 goto ERROR;
2167
2168#ifdef ENABLE_DEBUG
2169 // DEBUG
2170 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
2171 if (r)
2172 goto ERROR;
2173#endif /* ENABLE_DEBUG */
2174
aca565fc 2175 // Launch the process in a cgroup that is a leaf of the configured cgroup
02fd4f8b 2176 if (jail->cgroup) {
ae5201c5
MT
2177 // Fetch our UUID
2178 const char* uuid = pakfire_jail_uuid(jail);
aca565fc
MT
2179
2180 // Create a temporary cgroup
ae5201c5 2181 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, uuid, 0);
aca565fc
MT
2182 if (r) {
2183 ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
2184 goto ERROR;
2185 }
02fd4f8b
MT
2186 }
2187
9b171c6a
MT
2188 /*
2189 Initially, we will set up a new mount namespace and launch a child process in it.
7bdf1d8e 2190
9b171c6a
MT
2191 This process remains in the user/ipc/time/etc. namespace and will set up
2192 the mount namespace.
2193 */
2194
2195 // Configure child process
2196 struct clone_args args = {
2197 .flags =
2198 CLONE_NEWNS |
2199 CLONE_PIDFD |
2200 CLONE_CLEAR_SIGHAND,
2201 .exit_signal = SIGCHLD,
2202 .pidfd = (long long unsigned int)&ctx.pidfd1,
2203 };
2204
2205 // Fork the first child process
2206 pid_t pid = clone3(&args, sizeof(args));
2207 if (pid < 0) {
2208 CTX_ERROR(jail->ctx, "Could not fork the first child process: %s\n", strerror(errno));
2209 r = -errno;
2210 goto ERROR;
0bd84dc1
MT
2211
2212 // Child process
9b171c6a
MT
2213 } else if (pid == 0) {
2214 r = pakfire_jail_child1(jail, &ctx, argv);
0bd84dc1
MT
2215 _exit(r);
2216 }
2217
679ee2fa 2218 // Parent process
d853213d
MT
2219 r = pakfire_jail_wait(jail, &ctx);
2220 if (r)
2221 goto ERROR;
0bd84dc1 2222
679ee2fa 2223ERROR:
aca565fc
MT
2224 // Destroy the temporary cgroup (if any)
2225 if (ctx.cgroup) {
6b7cf275 2226 // Read cgroup stats
ec64b312
MT
2227 pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
2228 pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
aca565fc
MT
2229 pakfire_cgroup_destroy(ctx.cgroup);
2230 pakfire_cgroup_unref(ctx.cgroup);
2231 }
2232
616f1fca 2233 // Close any file descriptors
2015cb92 2234 pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
e33387d3
MT
2235 pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
2236 pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
e33387d3
MT
2237 pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
2238 pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
6ac51607 2239#ifdef ENABLE_DEBUG
e33387d3 2240 pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
6ac51607 2241#endif /* ENABLE_DEBUG */
9b171c6a
MT
2242 if (ctx.pidfd1 >= 0)
2243 close(ctx.pidfd1);
2244 if (ctx.pidfd2 >= 0)
2245 close(ctx.pidfd2);
616f1fca 2246
9b171c6a
MT
2247 // Close sockets
2248 pakfire_jail_close_pipe(jail, ctx.socket);
2249
2250 return r;
9f50bf71 2251}
a45ed6b0 2252
ccdd2e95 2253PAKFIRE_EXPORT int pakfire_jail_exec(
2015cb92
MT
2254 struct pakfire_jail* jail,
2255 const char* argv[],
2256 pakfire_jail_communicate_in callback_in,
2257 pakfire_jail_communicate_out callback_out,
9fa1afb6
MT
2258 void* data, int flags) {
2259 return __pakfire_jail_exec(jail, argv, 0, callback_in, callback_out, data, flags);
2015cb92
MT
2260}
2261
db4f234f 2262static int pakfire_jail_exec_interactive(
9fa1afb6 2263 struct pakfire_jail* jail, const char* argv[], int flags) {
db4f234f
MT
2264 int r;
2265
2266 // Setup interactive stuff
2267 r = pakfire_jail_setup_interactive_env(jail);
2268 if (r)
2269 return r;
2270
9fa1afb6 2271 return __pakfire_jail_exec(jail, argv, 1, NULL, NULL, NULL, flags);
db4f234f
MT
2272}
2273
ccdd2e95
MT
2274int pakfire_jail_exec_script(struct pakfire_jail* jail,
2275 const char* script,
2276 const size_t size,
2277 const char* args[],
2278 pakfire_jail_communicate_in callback_in,
2279 pakfire_jail_communicate_out callback_out,
2280 void* data) {
a45ed6b0
MT
2281 char path[PATH_MAX];
2282 const char** argv = NULL;
35291cb7 2283 FILE* f = NULL;
a45ed6b0
MT
2284 int r;
2285
2286 const char* root = pakfire_get_path(jail->pakfire);
2287
2288 // Write the scriptlet to disk
819232d6 2289 r = pakfire_path_append(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
56796f84 2290 if (r)
a45ed6b0
MT
2291 goto ERROR;
2292
35291cb7
MT
2293 // Create a temporary file
2294 f = pakfire_mktemp(path, 0700);
2295 if (!f) {
2296 ERROR(jail->pakfire, "Could not create temporary file: %m\n");
a45ed6b0
MT
2297 goto ERROR;
2298 }
2299
2300 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
2301
2302 // Write data
35291cb7
MT
2303 r = fprintf(f, "%s", script);
2304 if (r < 0) {
a45ed6b0 2305 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
a45ed6b0
MT
2306 goto ERROR;
2307 }
2308
2309 // Close file
35291cb7 2310 r = fclose(f);
a45ed6b0
MT
2311 if (r) {
2312 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
a45ed6b0
MT
2313 goto ERROR;
2314 }
2315
35291cb7
MT
2316 f = NULL;
2317
a45ed6b0
MT
2318 // Count how many arguments were passed
2319 unsigned int argc = 1;
2320 if (args) {
2321 for (const char** arg = args; *arg; arg++)
2322 argc++;
2323 }
2324
2325 argv = calloc(argc + 1, sizeof(*argv));
2326 if (!argv) {
2327 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
2328 goto ERROR;
2329 }
2330
2331 // Set command
2332 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
2333
2334 // Copy args
2335 for (unsigned int i = 1; i < argc; i++)
2336 argv[i] = args[i-1];
2337
2338 // Run the script
9fa1afb6 2339 r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data, 0);
a45ed6b0
MT
2340
2341ERROR:
2342 if (argv)
2343 free(argv);
35291cb7
MT
2344 if (f)
2345 fclose(f);
a45ed6b0
MT
2346
2347 // Remove script from disk
2348 if (*path)
2349 unlink(path);
2350
2351 return r;
2352}
82df3c77
MT
2353
2354/*
2355 A convenience function that creates a new jail, runs the given command and destroys
2356 the jail again.
2357*/
12b9b39f 2358int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
82df3c77
MT
2359 struct pakfire_jail* jail = NULL;
2360 int r;
2361
2362 // Create a new jail
9fa1afb6 2363 r = pakfire_jail_create(&jail, pakfire);
82df3c77
MT
2364 if (r)
2365 goto ERROR;
2366
2367 // Execute the command
9fa1afb6 2368 r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output, 0);
82df3c77
MT
2369
2370ERROR:
2371 if (jail)
2372 pakfire_jail_unref(jail);
2373
2374 return r;
2375}
4f688bd8
MT
2376
2377int pakfire_jail_run_script(struct pakfire* pakfire,
49a9babc 2378 const char* script, const size_t length, const char* argv[], int flags) {
4f688bd8
MT
2379 struct pakfire_jail* jail = NULL;
2380 int r;
2381
2382 // Create a new jail
9fa1afb6 2383 r = pakfire_jail_create(&jail, pakfire);
4f688bd8
MT
2384 if (r)
2385 goto ERROR;
2386
2387 // Execute the command
49a9babc 2388 r = pakfire_jail_exec_script(jail, script, length, argv, NULL, NULL, NULL);
4f688bd8
MT
2389
2390ERROR:
2391 if (jail)
2392 pakfire_jail_unref(jail);
2393
2394 return r;
2395}
e43489f7 2396
5f6e42a2 2397int pakfire_jail_shell(struct pakfire_jail* jail) {
42a41fdf
MT
2398 int r;
2399
e43489f7
MT
2400 const char* argv[] = {
2401 "/bin/bash", "--login", NULL,
2402 };
2403
2404 // Execute /bin/bash
42a41fdf
MT
2405 r = pakfire_jail_exec_interactive(jail, argv, 0);
2406
2407 // Raise any errors
2408 if (r < 0)
2409 return r;
2410
2411 // Ignore any return codes from the shell
2412 return 0;
e43489f7
MT
2413}
2414
f7ffbb93 2415static int pakfire_jail_run_if_possible(struct pakfire* pakfire, const char** argv) {
e43489f7 2416 char path[PATH_MAX];
f7ffbb93 2417 int r;
e43489f7 2418
f7ffbb93 2419 r = pakfire_path(pakfire, path, "%s", *argv);
77e26129
MT
2420 if (r)
2421 return r;
e43489f7 2422
f7ffbb93 2423 // Check if the file is executable
e43489f7
MT
2424 r = access(path, X_OK);
2425 if (r) {
f7ffbb93 2426 DEBUG(pakfire, "%s is not executable. Skipping...\n", *argv);
e43489f7
MT
2427 return 0;
2428 }
2429
f7ffbb93
MT
2430 return pakfire_jail_run(pakfire, argv, 0, NULL);
2431}
2432
2433int pakfire_jail_ldconfig(struct pakfire* pakfire) {
e43489f7 2434 const char* argv[] = {
f7ffbb93
MT
2435 "/sbin/ldconfig",
2436 NULL,
e43489f7
MT
2437 };
2438
f7ffbb93
MT
2439 return pakfire_jail_run_if_possible(pakfire, argv);
2440}
2441
2442int pakfire_jail_run_systemd_tmpfiles(struct pakfire* pakfire) {
2443 const char* argv[] = {
2444 "/usr/bin/systemd-tmpfiles",
2445 "--create",
2446 NULL,
2447 };
2448
2449 return pakfire_jail_run_if_possible(pakfire, argv);
e43489f7 2450}