]> git.ipfire.org Git - people/ms/pakfire.git/blame - src/libpakfire/jail.c
strip: Apply hack to preserve capabilities
[people/ms/pakfire.git] / src / libpakfire / jail.c
CommitLineData
fd37ccaf
MT
1/*#############################################################################
2# #
3# Pakfire - The IPFire package management system #
4# Copyright (C) 2022 Pakfire development team #
5# #
6# This program is free software: you can redistribute it and/or modify #
7# it under the terms of the GNU General Public License as published by #
8# the Free Software Foundation, either version 3 of the License, or #
9# (at your option) any later version. #
10# #
11# This program is distributed in the hope that it will be useful, #
12# but WITHOUT ANY WARRANTY; without even the implied warranty of #
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
14# GNU General Public License for more details. #
15# #
16# You should have received a copy of the GNU General Public License #
17# along with this program. If not, see <http://www.gnu.org/licenses/>. #
18# #
19#############################################################################*/
20
32d5f21d 21#include <errno.h>
bcf09bf5 22#include <fcntl.h>
980b15af 23#include <linux/capability.h>
0bd84dc1 24#include <linux/sched.h>
58ee649f 25#include <sys/wait.h>
4f23b498 26#include <linux/wait.h>
0bd84dc1
MT
27#include <sched.h>
28#include <signal.h>
32d5f21d 29#include <stdlib.h>
0bd84dc1 30#include <syscall.h>
980b15af 31#include <sys/capability.h>
616f1fca 32#include <sys/epoll.h>
43dc0e16 33#include <sys/eventfd.h>
7bdf1d8e 34#include <sys/mount.h>
90d92b5c 35#include <sys/personality.h>
980b15af 36#include <sys/prctl.h>
cf440db8 37#include <sys/resource.h>
335b8a44 38#include <sys/timerfd.h>
0bd84dc1
MT
39#include <sys/types.h>
40#include <sys/wait.h>
32d5f21d 41
739d5b57
MT
42// libseccomp
43#include <seccomp.h>
44
ae5201c5
MT
45// libuuid
46#include <uuid.h>
47
90d92b5c 48#include <pakfire/arch.h>
e3ddb498 49#include <pakfire/cgroup.h>
fd37ccaf 50#include <pakfire/jail.h>
4f59c39b
MT
51#include <pakfire/logging.h>
52#include <pakfire/mount.h>
fd37ccaf 53#include <pakfire/pakfire.h>
6ce56f90 54#include <pakfire/private.h>
4896e62c 55#include <pakfire/pwd.h>
d973a13d 56#include <pakfire/string.h>
32d5f21d
MT
57#include <pakfire/util.h>
58
616f1fca
MT
59#define BUFFER_SIZE 1024 * 64
60#define ENVIRON_SIZE 128
61#define EPOLL_MAX_EVENTS 2
cc6e2264 62#define MAX_MOUNTPOINTS 8
fd37ccaf 63
d5bc8fe0
MT
64// The default environment that will be set for every command
65static const struct environ {
66 const char* key;
67 const char* val;
68} ENV[] = {
fbe95cdf 69 { "HOME", "/root" },
55f54757 70 { "LANG", "C.utf-8" },
a7ad6d4f 71 { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
d5bc8fe0 72 { "TERM", "vt100" },
3bf01105
MT
73
74 // Tell everything that it is running inside a Pakfire container
75 { "container", "pakfire" },
d5bc8fe0
MT
76 { NULL, NULL },
77};
78
cc6e2264
MT
79struct pakfire_jail_mountpoint {
80 char source[PATH_MAX];
81 char target[PATH_MAX];
82 int flags;
83};
84
fd37ccaf
MT
85struct pakfire_jail {
86 struct pakfire* pakfire;
87 int nrefs;
32d5f21d 88
ae5201c5
MT
89 // A unique ID for each jail
90 uuid_t uuid;
91 char __uuid[UUID_STR_LEN];
92
d639929b
MT
93 // Flags
94 int flags;
95
cf440db8
MT
96 // Resource Limits
97 int nice;
98
335b8a44
MT
99 // Timeout
100 struct itimerspec timeout;
101
15503538
MT
102 // CGroup
103 struct pakfire_cgroup* cgroup;
104
32d5f21d
MT
105 // Environment
106 char* env[ENVIRON_SIZE];
616f1fca 107
cc6e2264
MT
108 // Mountpoints
109 struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
110 unsigned int num_mountpoints;
616f1fca
MT
111};
112
113struct pakfire_log_buffer {
114 char data[BUFFER_SIZE];
115 size_t used;
116};
117
7bdf1d8e
MT
118enum pakfire_jail_exec_flags {
119 PAKFIRE_JAIL_HAS_NETWORKING = (1 << 0),
120};
121
616f1fca 122struct pakfire_jail_exec {
7bdf1d8e
MT
123 int flags;
124
616f1fca
MT
125 // PID (of the child)
126 pid_t pid;
d853213d 127 int pidfd;
616f1fca 128
d853213d
MT
129 // Process status (from waitid)
130 siginfo_t status;
616f1fca 131
f7d240a7
MT
132 // FD to notify the client that the parent has finished initialization
133 int completed_fd;
134
616f1fca 135 // Log pipes
e33387d3 136 struct pakfire_jail_pipes {
2015cb92 137 int stdin[2];
616f1fca
MT
138 int stdout[2];
139 int stderr[2];
e33387d3
MT
140
141 // Logging
142 int log_INFO[2];
143 int log_ERROR[2];
144 int log_DEBUG[2];
616f1fca
MT
145 } pipes;
146
2015cb92
MT
147 // Communicate
148 struct pakfire_jail_communicate {
149 pakfire_jail_communicate_in in;
150 pakfire_jail_communicate_out out;
151 void* data;
152 } communicate;
153
616f1fca 154 // Log buffers
e33387d3 155 struct pakfire_jail_buffers {
616f1fca
MT
156 struct pakfire_log_buffer stdout;
157 struct pakfire_log_buffer stderr;
e33387d3
MT
158
159 // Logging
160 struct pakfire_log_buffer log_INFO;
161 struct pakfire_log_buffer log_ERROR;
162 struct pakfire_log_buffer log_DEBUG;
616f1fca 163 } buffers;
aca565fc
MT
164
165 struct pakfire_cgroup* cgroup;
6b7cf275 166 struct pakfire_cgroup_stats cgroup_stats;
fd37ccaf
MT
167};
168
0bd84dc1
MT
169static int clone3(struct clone_args* args, size_t size) {
170 return syscall(__NR_clone3, args, size);
171}
172
335b8a44
MT
173static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
174 return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
175}
176
7bdf1d8e
MT
177static int pakfire_jail_exec_has_flag(
178 const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
179 return ctx->flags & flag;
180}
181
d5bc8fe0
MT
182static void pakfire_jail_free(struct pakfire_jail* jail) {
183 DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
184
185 // Free environment
186 for (unsigned int i = 0; jail->env[i]; i++)
187 free(jail->env[i]);
188
d34b1e00
MT
189 if (jail->cgroup)
190 pakfire_cgroup_unref(jail->cgroup);
191
d5bc8fe0
MT
192 pakfire_unref(jail->pakfire);
193 free(jail);
194}
195
e33387d3
MT
196/*
197 Passes any log messages on to the default pakfire log callback
198*/
fed41508
MT
199static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
200 int priority, const char* line, size_t length) {
201 switch (priority) {
202 case LOG_INFO:
203 INFO(pakfire, "%s", line);
204 break;
205
206 case LOG_ERR:
207 ERROR(pakfire, "%s", line);
208 break;
e33387d3
MT
209
210#ifdef ENABLE_DEBUG
211 case LOG_DEBUG:
212 DEBUG(pakfire, "%s", line);
213 break;
214#endif
fed41508
MT
215 }
216
217 return 0;
218}
219
3bf01105
MT
220static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
221 if (!*jail->__uuid)
222 uuid_unparse_lower(jail->uuid, jail->__uuid);
223
224 return jail->__uuid;
225}
226
00ba1d9a
MT
227static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
228 // Set PS1
229 int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
230 if (r)
231 return r;
232
233 // Copy TERM
234 char* TERM = secure_getenv("TERM");
235 if (TERM) {
236 r = pakfire_jail_set_env(jail, "TERM", TERM);
237 if (r)
238 return r;
239 }
240
241 // Copy LANG
242 char* LANG = secure_getenv("LANG");
243 if (LANG) {
244 r = pakfire_jail_set_env(jail, "LANG", LANG);
245 if (r)
246 return r;
247 }
248
249 return 0;
250}
251
6ce56f90
MT
252PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail,
253 struct pakfire* pakfire, int flags) {
d5bc8fe0
MT
254 int r;
255
aac86bd3
MT
256 const char* arch = pakfire_get_arch(pakfire);
257
d5bc8fe0 258 // Allocate a new jail
fd37ccaf
MT
259 struct pakfire_jail* j = calloc(1, sizeof(*j));
260 if (!j)
261 return 1;
262
263 // Reference Pakfire
264 j->pakfire = pakfire_ref(pakfire);
265
266 // Initialize reference counter
267 j->nrefs = 1;
268
d639929b
MT
269 // Store flags
270 j->flags = flags;
271
ae5201c5
MT
272 // Generate a random UUID
273 uuid_generate_random(j->uuid);
274
84bd7655
MT
275 DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
276
d5bc8fe0
MT
277 // Set default environment
278 for (const struct environ* e = ENV; e->key; e++) {
279 r = pakfire_jail_set_env(j, e->key, e->val);
280 if (r)
281 goto ERROR;
282 }
283
aac86bd3
MT
284 // Enable all CPU features that CPU has to offer
285 if (!pakfire_arch_supported_by_host(arch)) {
286 r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
287 if (r)
288 goto ERROR;
289 }
290
3bf01105
MT
291 // Set container UUID
292 r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
293 if (r)
294 goto ERROR;
295
367e708d
MT
296 // Disable systemctl to talk to systemd
297 if (!pakfire_on_root(j->pakfire)) {
298 r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
299 if (r)
300 goto ERROR;
301 }
302
fd37ccaf
MT
303 // Done
304 *jail = j;
305 return 0;
84bd7655 306
d5bc8fe0
MT
307ERROR:
308 pakfire_jail_free(j);
32d5f21d 309
d5bc8fe0 310 return r;
fd37ccaf
MT
311}
312
6ce56f90 313PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
fd37ccaf
MT
314 ++jail->nrefs;
315
316 return jail;
317}
318
6ce56f90 319PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
fd37ccaf
MT
320 if (--jail->nrefs > 0)
321 return jail;
322
323 pakfire_jail_free(jail);
324 return NULL;
325}
32d5f21d 326
cf440db8
MT
327// Resource Limits
328
329PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
330 // Check if nice level is in range
331 if (nice < -19 || nice > 20) {
332 errno = EINVAL;
333 return 1;
334 }
335
336 // Store nice level
337 jail->nice = nice;
338
339 return 0;
340}
341
15503538
MT
342int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
343 // Free any previous cgroup
344 if (jail->cgroup) {
345 pakfire_cgroup_unref(jail->cgroup);
346 jail->cgroup = NULL;
347 }
348
349 // Set any new cgroup
350 if (cgroup) {
351 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
352
353 jail->cgroup = pakfire_cgroup_ref(cgroup);
354 }
355
356 // Done
357 return 0;
358}
359
32d5f21d
MT
360// Environment
361
362// Returns the length of the environment
363static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
364 unsigned int i = 0;
365
366 // Count everything in the environment
367 for (char** e = jail->env; *e; e++)
368 i++;
369
370 return i;
371}
372
373// Finds an existing environment variable and returns its index or -1 if not found
374static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
375 if (!key) {
376 errno = EINVAL;
377 return -1;
378 }
379
380 char buffer[strlen(key) + 2];
381 pakfire_string_format(buffer, "%s=", key);
382
383 for (unsigned int i = 0; jail->env[i]; i++) {
384 if (pakfire_string_startswith(jail->env[i], buffer))
385 return i;
386 }
387
388 // Nothing found
389 return -1;
390}
391
392// Returns the value of an environment variable or NULL
6ce56f90
MT
393PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
394 const char* key) {
32d5f21d
MT
395 int i = pakfire_jail_find_env(jail, key);
396 if (i < 0)
397 return NULL;
398
399 return jail->env[i] + strlen(key) + 1;
400}
401
402// Sets an environment variable
6ce56f90
MT
403PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
404 const char* key, const char* value) {
32d5f21d
MT
405 // Find the index where to write this value to
406 int i = pakfire_jail_find_env(jail, key);
407 if (i < 0)
408 i = pakfire_jail_env_length(jail);
409
410 // Return -ENOSPC when the environment is full
411 if (i >= ENVIRON_SIZE) {
412 errno = ENOSPC;
413 return -1;
414 }
415
416 // Free any previous value
417 if (jail->env[i])
418 free(jail->env[i]);
419
420 // Format and set environment variable
421 asprintf(&jail->env[i], "%s=%s", key, value);
422
423 DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
424
425 return 0;
426}
9f50bf71 427
939025e7 428// Imports an environment
6ce56f90 429PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
939025e7
MT
430 if (!env)
431 return 0;
432
433 char* key;
434 char* val;
435 int r;
436
437 // Copy environment variables
438 for (unsigned int i = 0; env[i]; i++) {
439 r = pakfire_string_partition(env[i], "=", &key, &val);
440 if (r)
441 continue;
442
443 // Set value
444 r = pakfire_jail_set_env(jail, key, val);
445
446 if (key)
447 free(key);
448 if (val)
449 free(val);
450
451 // Break on error
452 if (r)
453 return r;
454 }
455
456 return 0;
457}
458
335b8a44
MT
459// Timeout
460
461PAKFIRE_EXPORT int pakfire_jail_set_timeout(
462 struct pakfire_jail* jail, unsigned int timeout) {
463 // Store value
464 jail->timeout.it_value.tv_sec = timeout;
465
466 if (timeout > 0)
467 DEBUG(jail->pakfire, "Timeout set to %d second(s)\n", timeout);
468 else
469 DEBUG(jail->pakfire, "Timeout disabled\n");
470
471 return 0;
472}
473
474static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
475 int r;
476
477 // Nothing to do if no timeout has been set
478 if (!jail->timeout.it_value.tv_sec)
479 return -1;
480
481 // Create a new timer
482 const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
483 if (fd < 0) {
484 ERROR(jail->pakfire, "Could not create timer: %m\n");
485 goto ERROR;
486 }
487
488 // Arm timer
489 r = timerfd_settime(fd, 0, &jail->timeout, NULL);
490 if (r) {
491 ERROR(jail->pakfire, "Could not arm timer: %m\n");
492 goto ERROR;
493 }
494
495 return fd;
496
497ERROR:
498 if (fd > 0)
499 close(fd);
500
501 return -1;
502}
503
e33387d3
MT
504/*
505 This function replaces any logging in the child process.
506
507 All log messages will be sent to the parent process through their respective pipes.
508*/
509static void pakfire_jail_log(void* data, int priority, const char* file,
510 int line, const char* fn, const char* format, va_list args) {
511 struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
512 int fd;
513
514 switch (priority) {
515 case LOG_INFO:
516 fd = pipes->log_INFO[1];
517 break;
518
519 case LOG_ERR:
520 fd = pipes->log_ERROR[1];
521 break;
522
523#ifdef ENABLE_DEBUG
524 case LOG_DEBUG:
525 fd = pipes->log_DEBUG[1];
526 break;
527#endif /* ENABLE_DEBUG */
528
529 // Ignore any messages of an unknown priority
530 default:
531 return;
532 }
533
534 // Send the log message
535 if (fd)
536 vdprintf(fd, format, args);
537}
538
616f1fca
MT
539static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
540 return (sizeof(buffer->data) == buffer->used);
541}
542
543/*
544 This function reads as much data as it can from the file descriptor.
545 If it finds a whole line in it, it will send it to the logger and repeat the process.
546 If not newline character is found, it will try to read more data until it finds one.
547*/
548static int pakfire_jail_handle_log(struct pakfire_jail* jail,
e33387d3 549 struct pakfire_jail_exec* ctx, int priority, int fd,
2015cb92 550 struct pakfire_log_buffer* buffer, pakfire_jail_communicate_out callback, void* data) {
616f1fca
MT
551 char line[BUFFER_SIZE + 1];
552
553 // Fill up buffer from fd
554 if (buffer->used < sizeof(buffer->data)) {
555 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
556 sizeof(buffer->data) - buffer->used);
557
558 // Handle errors
559 if (bytes_read < 0) {
560 ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
561 return -1;
562 }
563
564 // Update buffer size
565 buffer->used += bytes_read;
566 }
567
568 // See if we have any lines that we can write
569 while (buffer->used) {
570 // Search for the end of the first line
571 char* eol = memchr(buffer->data, '\n', buffer->used);
572
573 // No newline found
574 if (!eol) {
575 // If the buffer is full, we send the content to the logger and try again
576 // This should not happen in practise
577 if (pakfire_jail_log_buffer_is_full(buffer)) {
578 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
579
580 eol = buffer->data + sizeof(buffer->data) - 1;
581
582 // Otherwise we might have only read parts of the output
583 } else
584 break;
585 }
586
587 // Find the length of the string
588 size_t length = eol - buffer->data + 1;
589
590 // Copy the line into the buffer
591 memcpy(line, buffer->data, length);
592
593 // Terminate the string
594 line[length] = '\0';
595
596 // Log the line
e33387d3
MT
597 if (callback) {
598 int r = callback(jail->pakfire, data, priority, line, length);
616f1fca
MT
599 if (r) {
600 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
601 return r;
602 }
603 }
604
605 // Remove line from buffer
606 memmove(buffer->data, buffer->data + length, buffer->used - length);
607 buffer->used -= length;
608 }
609
610 return 0;
611}
612
06b864ae
MT
613static int pakfire_jail_stream_stdin(struct pakfire_jail* jail,
614 struct pakfire_jail_exec* ctx, const int fd) {
615 int r;
616
617 // Nothing to do if there is no stdin callback set
618 if (!ctx->communicate.in) {
619 DEBUG(jail->pakfire, "Callback for standard input is not set\n");
620 return 0;
621 }
622
f5a70a96
MT
623 // Skip if the writing pipe has already been closed
624 if (!ctx->pipes.stdin[1])
625 return 0;
626
06b864ae
MT
627 DEBUG(jail->pakfire, "Streaming standard input...\n");
628
629 // Calling the callback
630 r = ctx->communicate.in(jail->pakfire, ctx->communicate.data, fd);
631
632 DEBUG(jail->pakfire, "Standard input callback finished: %d\n", r);
633
634 // The callback signaled that it has written everything
635 if (r == EOF) {
636 DEBUG(jail->pakfire, "Closing standard input pipe\n");
637
f5a70a96 638 // Close the file-descriptor
06b864ae 639 close(fd);
f5a70a96
MT
640
641 // Reset the file-descriptor so it won't be closed again later
642 ctx->pipes.stdin[1] = 0;
643
644 // Report success
06b864ae
MT
645 r = 0;
646 }
647
648 return r;
649}
650
195fe455
MT
651static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
652 int r = pipe2(*fds, flags);
653 if (r < 0) {
654 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
655 return 1;
656 }
657
658 return 0;
659}
660
661static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
662 for (unsigned int i = 0; i < 2; i++)
663 if (fds[i])
664 close(fds[i]);
665}
666
e33387d3
MT
667/*
668 This is a convenience function to fetch the reading end of a pipe and
669 closes the write end.
670*/
06b864ae 671static int pakfire_jail_get_pipe_to_read(struct pakfire_jail* jail, int (*fds)[2]) {
e33387d3
MT
672 // Give the variables easier names to avoid confusion
673 int* fd_read = &(*fds)[0];
674 int* fd_write = &(*fds)[1];
675
676 // Close the write end of the pipe
677 if (*fd_write) {
678 close(*fd_write);
679 *fd_write = 0;
680 }
681
682 // Return the read end
683 return *fd_read;
684}
685
06b864ae
MT
686static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
687 // Give the variables easier names to avoid confusion
688 int* fd_read = &(*fds)[0];
689 int* fd_write = &(*fds)[1];
690
691 // Close the read end of the pipe
692 if (*fd_read) {
693 close(*fd_read);
694 *fd_read = 0;
695 }
696
697 // Return the write end
698 return *fd_write;
699}
700
d853213d 701static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
616f1fca
MT
702 int epollfd = -1;
703 struct epoll_event ev;
704 struct epoll_event events[EPOLL_MAX_EVENTS];
335b8a44 705 char garbage[8];
616f1fca
MT
706 int r = 0;
707
708 // Fetch file descriptors from context
06b864ae
MT
709 const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
710 const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
711 const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
d853213d 712 const int pidfd = ctx->pidfd;
616f1fca 713
335b8a44
MT
714 // Timer
715 const int timerfd = pakfire_jail_create_timer(jail);
716
e33387d3 717 // Logging
06b864ae
MT
718 const int log_INFO = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
719 const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
720 const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
7ebfb7cb
MT
721
722 // Make a list of all file descriptors we are interested in
d853213d 723 int fds[] = {
335b8a44 724 stdin, stdout, stderr, pidfd, timerfd, log_INFO, log_ERROR, log_DEBUG,
616f1fca
MT
725 };
726
727 // Setup epoll
728 epollfd = epoll_create1(0);
729 if (epollfd < 0) {
730 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
731 r = 1;
d853213d 732 goto ERROR;
616f1fca
MT
733 }
734
616f1fca 735 // Turn file descriptors into non-blocking mode and add them to epoll()
3aad8c0d 736 for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
616f1fca
MT
737 int fd = fds[i];
738
d853213d
MT
739 // Skip fds which were not initialized
740 if (fd <= 0)
741 continue;
742
e48beb24
MT
743 ev.events = EPOLLHUP;
744
745 if (fd == stdin)
746 ev.events |= EPOLLOUT;
747 else
748 ev.events |= EPOLLIN;
749
06b864ae
MT
750 // Read flags
751 int flags = fcntl(fd, F_GETFL, 0);
752
753 // Set modified flags
754 if (fcntl(fd, F_SETFL, flags|O_NONBLOCK) < 0) {
755 ERROR(jail->pakfire,
756 "Could not set file descriptor %d into non-blocking mode: %m\n", fd);
757 r = 1;
758 goto ERROR;
759 }
760
616f1fca
MT
761 ev.data.fd = fd;
762
763 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
764 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
765 r = 1;
d853213d 766 goto ERROR;
616f1fca
MT
767 }
768 }
769
770 int ended = 0;
771
772 // Loop for as long as the process is alive
773 while (!ended) {
616f1fca
MT
774 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
775 if (num < 1) {
776 // Ignore if epoll_wait() has been interrupted
777 if (errno == EINTR)
778 continue;
779
780 ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
781 r = 1;
782
d853213d 783 goto ERROR;
616f1fca
MT
784 }
785
616f1fca 786 for (int i = 0; i < num; i++) {
e068b964 787 int e = events[i].events;
616f1fca
MT
788 int fd = events[i].data.fd;
789
e33387d3 790 struct pakfire_log_buffer* buffer = NULL;
2015cb92 791 pakfire_jail_communicate_out callback = NULL;
e33387d3
MT
792 void* data = NULL;
793 int priority;
794
e068b964
MT
795 // Check if there is any data to be read
796 if (e & EPOLLIN) {
797 // Handle any changes to the PIDFD
798 if (fd == pidfd) {
799 // Call waidid() and store the result
800 r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
801 if (r) {
802 ERROR(jail->pakfire, "waitid() failed: %m\n");
803 goto ERROR;
804 }
d853213d 805
e068b964
MT
806 // Mark that we have ended so that we will process the remaining
807 // events from epoll() now, but won't restart the outer loop.
808 ended = 1;
809 continue;
d853213d 810
335b8a44
MT
811 // Handle timer events
812 } else if (fd == timerfd) {
813 DEBUG(jail->pakfire, "Timer event received\n");
814
815 // Disarm the timer
816 r = read(timerfd, garbage, sizeof(garbage));
817 if (r < 1) {
818 ERROR(jail->pakfire, "Could not disarm timer: %m\n");
819 r = 1;
820 goto ERROR;
821 }
822
823 // Terminate the process if it hasn't already ended
824 if (!ended) {
825 DEBUG(jail->pakfire, "Terminating process...\n");
826
827 // Send SIGTERM to the process
828 r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
829 if (r) {
830 ERROR(jail->pakfire, "Could not kill process: %m\n");
831 goto ERROR;
832 }
833 }
834
835 // There is nothing else to do
836 continue;
837
e068b964
MT
838 // Handle logging messages
839 } else if (fd == log_INFO) {
840 buffer = &ctx->buffers.log_INFO;
841 priority = LOG_INFO;
e33387d3 842
e068b964 843 callback = pakfire_jail_default_log_callback;
e33387d3 844
e068b964
MT
845 } else if (fd == log_ERROR) {
846 buffer = &ctx->buffers.log_ERROR;
847 priority = LOG_ERR;
e33387d3 848
e068b964 849 callback = pakfire_jail_default_log_callback;
e33387d3 850
e068b964
MT
851 } else if (fd == log_DEBUG) {
852 buffer = &ctx->buffers.log_DEBUG;
853 priority = LOG_DEBUG;
e33387d3 854
e068b964 855 callback = pakfire_jail_default_log_callback;
e33387d3 856
e068b964
MT
857 // Handle anything from the log pipes
858 } else if (fd == stdout) {
859 buffer = &ctx->buffers.stdout;
860 priority = LOG_INFO;
616f1fca 861
2015cb92
MT
862 callback = ctx->communicate.out;
863 data = ctx->communicate.data;
e33387d3 864
e068b964
MT
865 } else if (fd == stderr) {
866 buffer = &ctx->buffers.stderr;
867 priority = LOG_ERR;
616f1fca 868
2015cb92
MT
869 callback = ctx->communicate.out;
870 data = ctx->communicate.data;
e33387d3 871
e068b964
MT
872 } else {
873 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
874 continue;
875 }
876
877 // Handle log event
878 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
879 if (r)
880 goto ERROR;
616f1fca
MT
881 }
882
06b864ae
MT
883 if (e & EPOLLOUT) {
884 // Handle standard input
885 if (fd == stdin) {
886 r = pakfire_jail_stream_stdin(jail, ctx, fd);
887 if (r) {
888 switch (errno) {
889 // Ignore if we filled up the buffer
890 case EAGAIN:
891 break;
892
893 default:
894 ERROR(jail->pakfire, "Could not write to stdin: %m\n");
895 goto ERROR;
896 }
897 }
898 }
899 }
900
e068b964
MT
901 // Check if any file descriptors have been closed
902 if (e & EPOLLHUP) {
903 // Remove the file descriptor
904 r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
905 if (r) {
906 ERROR(jail->pakfire, "Could not remove closed file-descriptor %d: %m\n", fd);
907 goto ERROR;
908 }
909 }
616f1fca
MT
910 }
911 }
912
d853213d 913ERROR:
616f1fca
MT
914 if (epollfd > 0)
915 close(epollfd);
335b8a44
MT
916 if (timerfd > 0)
917 close(timerfd);
616f1fca
MT
918
919 return r;
920}
921
ccdd2e95
MT
922int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data,
923 int priority, const char* line, size_t length) {
12b9b39f
MT
924 char** output = (char**)data;
925 int r;
0de6bb30 926
2015cb92
MT
927 // Append everything from stdout to a buffer
928 if (output && priority == LOG_INFO) {
12b9b39f
MT
929 r = asprintf(output, "%s%s", (output && *output) ? *output : "", line);
930 if (r < 0)
0de6bb30 931 return 1;
0de6bb30
MT
932 return 0;
933 }
934
935 // Send everything else to the default logger
936 return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
937}
938
980b15af
MT
939// Capabilities
940
941static int pakfire_jail_drop_capabilities(struct pakfire_jail* jail) {
942 const int capabilities[] = {
943 // Deny access to the kernel's audit system
944 CAP_AUDIT_CONTROL,
945 CAP_AUDIT_READ,
946 CAP_AUDIT_WRITE,
947
948 // Deny suspending block devices
949 CAP_BLOCK_SUSPEND,
950
951 // Deny any stuff with BPF
952 CAP_BPF,
953
954 // Deny checkpoint restore
955 CAP_CHECKPOINT_RESTORE,
956
957 // Deny opening files by inode number (open_by_handle_at)
958 CAP_DAC_READ_SEARCH,
959
960 // Deny setting SUID bits
961 CAP_FSETID,
962
963 // Deny locking more memory
964 CAP_IPC_LOCK,
965
966 // Deny modifying any Apparmor/SELinux/SMACK configuration
967 CAP_MAC_ADMIN,
968 CAP_MAC_OVERRIDE,
969
970 // Deny creating any special devices
971 CAP_MKNOD,
972
980b15af
MT
973 // Deny reading from syslog
974 CAP_SYSLOG,
975
976 // Deny any admin actions (mount, sethostname, ...)
977 CAP_SYS_ADMIN,
978
979 // Deny rebooting the system
980 CAP_SYS_BOOT,
981
982 // Deny loading kernel modules
983 CAP_SYS_MODULE,
984
985 // Deny setting nice level
986 CAP_SYS_NICE,
987
988 // Deny access to /proc/kcore, /dev/mem, /dev/kmem
989 CAP_SYS_RAWIO,
990
991 // Deny circumventing any resource limits
992 CAP_SYS_RESOURCE,
993
994 // Deny setting the system time
995 CAP_SYS_TIME,
996
997 // Deny playing with suspend
998 CAP_WAKE_ALARM,
999
1000 0,
1001 };
1002
1003 DEBUG(jail->pakfire, "Dropping capabilities...\n");
1004
1005 size_t num_caps = 0;
1006 int r;
1007
1008 // Drop any capabilities
1009 for (const int* cap = capabilities; *cap; cap++) {
1010 r = prctl(PR_CAPBSET_DROP, *cap, 0, 0, 0);
1011 if (r) {
1012 ERROR(jail->pakfire, "Could not drop capability %d: %m\n", *cap);
1013 return r;
1014 }
1015
1016 num_caps++;
1017 }
1018
1019 // Fetch any capabilities
1020 cap_t caps = cap_get_proc();
1021 if (!caps) {
1022 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
1023 return 1;
1024 }
1025
1026 /*
1027 Set inheritable capabilities
1028
1029 This ensures that no processes will be able to gain any of the listed
1030 capabilities again.
1031 */
1032 r = cap_set_flag(caps, CAP_INHERITABLE, num_caps, capabilities, CAP_CLEAR);
1033 if (r) {
1034 ERROR(jail->pakfire, "cap_set_flag() failed: %m\n");
1035 goto ERROR;
1036 }
1037
1038 // Restore capabilities
1039 r = cap_set_proc(caps);
1040 if (r) {
1041 ERROR(jail->pakfire, "Could not restore capabilities: %m\n");
1042 goto ERROR;
1043 }
1044
1045ERROR:
1046 if (caps)
1047 cap_free(caps);
1048
1049 return r;
1050}
1051
739d5b57
MT
1052// Syscall Filter
1053
1054static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
1055 const int syscalls[] = {
1056 // The kernel's keyring isn't namespaced
1057 SCMP_SYS(keyctl),
1058 SCMP_SYS(add_key),
1059 SCMP_SYS(request_key),
1060
1061 // Disable userfaultfd
1062 SCMP_SYS(userfaultfd),
1063
1064 // Disable perf which could leak a lot of information about the host
1065 SCMP_SYS(perf_event_open),
1066
1067 0,
1068 };
1069 int r = 1;
1070
1071 DEBUG(jail->pakfire, "Applying syscall filter...\n");
1072
1073 // Setup a syscall filter which allows everything by default
1074 scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
1075 if (!ctx) {
1076 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
1077 goto ERROR;
1078 }
1079
1080 // All all syscalls
1081 for (const int* syscall = syscalls; *syscall; syscall++) {
1082 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
1083 if (r) {
1084 ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
1085 goto ERROR;
1086 }
1087 }
1088
1089 // Load syscall filter into the kernel
1090 r = seccomp_load(ctx);
1091 if (r) {
1092 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
1093 goto ERROR;
1094 }
1095
1096ERROR:
1097 if (ctx)
1098 seccomp_release(ctx);
1099
1100 return r;
1101}
1102
cc6e2264
MT
1103// Mountpoints
1104
061223f7 1105PAKFIRE_EXPORT int pakfire_jail_bind(struct pakfire_jail* jail,
cc6e2264
MT
1106 const char* source, const char* target, int flags) {
1107 struct pakfire_jail_mountpoint* mp = NULL;
1108 int r;
1109
1110 // Check if there is any space left
1111 if (jail->num_mountpoints >= MAX_MOUNTPOINTS) {
1112 errno = ENOSPC;
1113 return 1;
1114 }
1115
1116 // Check for valid inputs
1117 if (!source || !target) {
1118 errno = EINVAL;
1119 return 1;
1120 }
1121
1122 // Select the next free slot
1123 mp = &jail->mountpoints[jail->num_mountpoints];
1124
1125 // Copy source
1126 r = pakfire_string_set(mp->source, source);
a60955af 1127 if (r) {
cc6e2264 1128 ERROR(jail->pakfire, "Could not copy source: %m\n");
a60955af 1129 return r;
cc6e2264
MT
1130 }
1131
1132 // Copy target
1133 r = pakfire_string_set(mp->target, target);
a60955af 1134 if (r) {
cc6e2264 1135 ERROR(jail->pakfire, "Could not copy target: %m\n");
a60955af 1136 return r;
cc6e2264
MT
1137 }
1138
1139 // Copy flags
1140 mp->flags = flags;
1141
1142 // Increment counter
1143 jail->num_mountpoints++;
1144
1145 return 0;
1146}
1147
7bdf1d8e
MT
1148static int pakfire_jail_mount_networking(struct pakfire_jail* jail) {
1149 int r;
1150
1151 const char* paths[] = {
1152 "/etc/hosts",
1153 "/etc/resolv.conf",
1154 NULL,
1155 };
1156
1157 // Bind-mount all paths read-only
1158 for (const char** path = paths; *path; path++) {
1159 r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
1160 if (r)
1161 return r;
1162 }
1163
1164 return 0;
1165}
1166
cc6e2264
MT
1167/*
1168 Mounts everything that we require in the new namespace
1169*/
7bdf1d8e 1170static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
cc6e2264
MT
1171 struct pakfire_jail_mountpoint* mp = NULL;
1172 int r;
1173
1174 // Mount all default stuff
1175 r = pakfire_mount_all(jail->pakfire);
1176 if (r)
1177 return r;
1178
7bdf1d8e
MT
1179 // Mount networking stuff
1180 if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1181 r = pakfire_jail_mount_networking(jail);
1182 if (r)
1183 return r;
1184 }
1185
cc6e2264
MT
1186 // Mount all custom stuff
1187 for (unsigned int i = 0; i < jail->num_mountpoints; i++) {
1188 // Fetch mountpoint
1189 mp = &jail->mountpoints[i];
1190
1191 // Mount it
1192 r = pakfire_bind(jail->pakfire, mp->source, mp->target, mp->flags);
1193 if (r)
1194 return r;
1195 }
1196
1197 // Log all mountpoints
1198 pakfire_mount_list(jail->pakfire);
1199
1200 return 0;
1201}
1202
679ee2fa
MT
1203// UID/GID Mapping
1204
679ee2fa
MT
1205static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
1206 char path[PATH_MAX];
1207 int r;
1208
4896e62c
MT
1209 // Skip mapping anything when running on /
1210 if (pakfire_on_root(jail->pakfire))
1211 return 0;
0f7f068b 1212
abe4ee37
MT
1213 // Make path
1214 r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
1215 if (r)
1216 return r;
1217
1218 // Fetch UID
1219 const uid_t uid = pakfire_uid(jail->pakfire);
1220
4896e62c 1221 // Fetch SUBUID
a1ff2863 1222 const struct pakfire_subid* subuid = pakfire_subuid(jail->pakfire);
4896e62c
MT
1223 if (!subuid)
1224 return 1;
679ee2fa 1225
abe4ee37 1226 /* When running as root, we will map the entire range.
679ee2fa 1227
abe4ee37
MT
1228 When running as a non-privileged user, we will map the root user inside the jail
1229 to the user's UID outside of the jail, and we will map the rest starting from one.
1230 */
679ee2fa 1231
abe4ee37
MT
1232 // Running as root
1233 if (uid == 0) {
1234 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1235 "0 %lu %lu\n", subuid->id, subuid->length);
1236 } else {
1237 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
b64888fa 1238 "0 %lu 1\n1 %lu %lu\n", uid, subuid->id, subuid->length);
abe4ee37
MT
1239 }
1240
1241 if (r) {
1242 ERROR(jail->pakfire, "Could not map UIDs: %m\n");
1243 return r;
1244 }
1245
1246 return r;
679ee2fa
MT
1247}
1248
1249static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
1250 char path[PATH_MAX];
1251 int r;
1252
4896e62c
MT
1253 // Skip mapping anything when running on /
1254 if (pakfire_on_root(jail->pakfire))
1255 return 0;
0f7f068b 1256
abe4ee37
MT
1257 // Fetch GID
1258 const gid_t gid = pakfire_gid(jail->pakfire);
1259
4896e62c 1260 // Fetch SUBGID
a1ff2863 1261 const struct pakfire_subid* subgid = pakfire_subgid(jail->pakfire);
4896e62c
MT
1262 if (!subgid)
1263 return 1;
679ee2fa
MT
1264
1265 // Make path
1266 r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
a60955af
MT
1267 if (r)
1268 return r;
679ee2fa 1269
abe4ee37
MT
1270 // Running as root
1271 if (gid == 0) {
1272 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1273 "0 %lu %lu\n", subgid->id, subgid->length);
1274 } else {
1275 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1276 "0 %lu 1\n%1 %lu %lu\n", gid, subgid->id, subgid->length);
1277 }
679ee2fa 1278
abe4ee37
MT
1279 if (r) {
1280 ERROR(jail->pakfire, "Could not map GIDs: %m\n");
1281 return r;
1282 }
1283
1284 return r;
679ee2fa
MT
1285}
1286
78d7488a
MT
1287static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
1288 char path[PATH_MAX];
1289 int r = 1;
1290
1291 // Make path
1292 r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
a60955af
MT
1293 if (r)
1294 return r;
78d7488a
MT
1295
1296 // Open file for writing
1297 FILE* f = fopen(path, "w");
1298 if (!f) {
1299 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
1300 goto ERROR;
1301 }
1302
1303 // Write content
1304 int bytes_written = fprintf(f, "deny\n");
1305 if (bytes_written <= 0) {
1306 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
1307 goto ERROR;
1308 }
1309
1310 r = fclose(f);
1311 f = NULL;
1312 if (r) {
1313 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
1314 goto ERROR;
1315 }
1316
1317ERROR:
1318 if (f)
1319 fclose(f);
1320
1321 return r;
1322}
1323
43dc0e16 1324static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
743f449e
MT
1325 const uint64_t val = 1;
1326 int r = 0;
43dc0e16
MT
1327
1328 DEBUG(jail->pakfire, "Sending signal...\n");
1329
743f449e
MT
1330 // Write to the file descriptor
1331 ssize_t bytes_written = write(fd, &val, sizeof(val));
1332 if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
1333 ERROR(jail->pakfire, "Could not send signal: %m\n");
1334 r = 1;
1335 }
1336
1337 // Close the file descriptor
43dc0e16
MT
1338 close(fd);
1339
743f449e 1340 return r;
43dc0e16
MT
1341}
1342
1343static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
743f449e
MT
1344 uint64_t val = 0;
1345 int r = 0;
43dc0e16
MT
1346
1347 DEBUG(jail->pakfire, "Waiting for signal...\n");
1348
743f449e
MT
1349 ssize_t bytes_read = read(fd, &val, sizeof(val));
1350 if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
1351 ERROR(jail->pakfire, "Error waiting for signal: %m\n");
1352 r = 1;
1353 }
1354
1355 // Close the file descriptor
43dc0e16
MT
1356 close(fd);
1357
743f449e 1358 return r;
43dc0e16
MT
1359}
1360
679ee2fa
MT
1361/*
1362 Performs the initialisation that needs to happen in the parent part
1363*/
f7d240a7 1364static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
679ee2fa
MT
1365 int r;
1366
abe4ee37
MT
1367 // Setup UID mapping
1368 r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
679ee2fa
MT
1369 if (r)
1370 return r;
1371
abe4ee37
MT
1372 // Write "deny" to /proc/PID/setgroups
1373 r = pakfire_jail_setgroups(jail, ctx->pid);
78d7488a
MT
1374 if (r)
1375 return r;
1376
679ee2fa 1377 // Setup GID mapping
616f1fca 1378 r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
679ee2fa
MT
1379 if (r)
1380 return r;
1381
43dc0e16
MT
1382 // Parent has finished initialisation
1383 DEBUG(jail->pakfire, "Parent has finished initialization\n");
1384
1385 // Send signal to client
f7d240a7 1386 r = pakfire_jail_send_signal(jail, ctx->completed_fd);
43dc0e16
MT
1387 if (r)
1388 return r;
1389
679ee2fa
MT
1390 return 0;
1391}
1392
616f1fca 1393static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
f7d240a7 1394 const char* argv[]) {
43dc0e16
MT
1395 int r;
1396
e33387d3
MT
1397 // Redirect any logging to our log pipe
1398 pakfire_set_log_callback(jail->pakfire, pakfire_jail_log, &ctx->pipes);
0bd84dc1 1399
2a7b5e00
MT
1400 // Die with parent
1401 r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
1402 if (r) {
1403 ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
1404 return 126;
1405 }
1406
cf440db8
MT
1407 // Fetch my own PID
1408 pid_t pid = getpid();
1409
1410 DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
0bd84dc1 1411
43dc0e16 1412 // Wait for the parent to finish initialization
f7d240a7 1413 r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
43dc0e16
MT
1414 if (r)
1415 return r;
1416
4f59c39b
MT
1417 // Perform further initialization
1418
1419 // Fetch UID/GID
1420 uid_t uid = getuid();
1421 gid_t gid = getgid();
1422
1423 // Fetch EUID/EGID
1424 uid_t euid = geteuid();
1425 gid_t egid = getegid();
1426
1427 DEBUG(jail->pakfire, " UID: %d (effective %d)\n", uid, euid);
1428 DEBUG(jail->pakfire, " GID: %d (effective %d)\n", gid, egid);
1429
1430 // Check if we are (effectively running as root)
4f719e21 1431 if (uid || gid || euid || egid) {
4f59c39b
MT
1432 ERROR(jail->pakfire, "Child process is not running as root\n");
1433 return 126;
1434 }
1435
1436 const char* root = pakfire_get_path(jail->pakfire);
1437 const char* arch = pakfire_get_arch(jail->pakfire);
1438
1439 // Change root (unless root is /)
1440 if (!pakfire_on_root(jail->pakfire)) {
1441 // Mount everything
7bdf1d8e 1442 r = pakfire_jail_mount(jail, ctx);
4f59c39b
MT
1443 if (r)
1444 return r;
1445
4f59c39b
MT
1446 // Call chroot()
1447 r = chroot(root);
1448 if (r) {
1449 ERROR(jail->pakfire, "chroot() to %s failed: %m\n", root);
1450 return 1;
1451 }
1452
1453 // Change directory to /
1454 r = chdir("/");
1455 if (r) {
1456 ERROR(jail->pakfire, "chdir() after chroot() failed: %m\n");
1457 return 1;
1458 }
1459 }
1460
90d92b5c
MT
1461 // Set personality
1462 unsigned long persona = pakfire_arch_personality(arch);
1463 if (persona) {
1464 r = personality(persona);
1465 if (r < 0) {
1466 ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1467 return 1;
1468 }
1469 }
1470
cf440db8
MT
1471 // Set nice level
1472 if (jail->nice) {
1473 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1474
1475 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1476 if (r) {
1477 ERROR(jail->pakfire, "Could not set nice level: %m\n");
1478 return 1;
1479 }
1480 }
1481
e33387d3
MT
1482 // Close other end of log pipes
1483 close(ctx->pipes.log_INFO[0]);
1484 close(ctx->pipes.log_ERROR[0]);
1485#ifdef ENABLE_DEBUG
1486 close(ctx->pipes.log_DEBUG[0]);
1487#endif /* ENABLE_DEBUG */
1488
2015cb92
MT
1489 // Connect standard input
1490 if (ctx->pipes.stdin[0]) {
1491 r = dup2(ctx->pipes.stdin[0], STDIN_FILENO);
1492 if (r < 0) {
1493 ERROR(jail->pakfire, "Could not connect fd %d to stdin: %m\n",
1494 ctx->pipes.stdin[0]);
1495
1496 return 1;
1497 }
1498 }
1499
7ebfb7cb
MT
1500 // Connect standard output and error
1501 if (ctx->pipes.stdout[1] && ctx->pipes.stderr[1]) {
1502 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1503 if (r < 0) {
1504 ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1505 ctx->pipes.stdout[1]);
1506
1507 return 1;
1508 }
1509
1510 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1511 if (r < 0) {
1512 ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1513 ctx->pipes.stderr[1]);
1514
1515 return 1;
1516 }
1517
195fe455 1518 // Close the pipe (as we have moved the original file descriptors)
2015cb92 1519 pakfire_jail_close_pipe(jail, ctx->pipes.stdin);
195fe455
MT
1520 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1521 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
7ebfb7cb
MT
1522 }
1523
007bc66c
MT
1524 // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1525 r = pakfire_rlimit_reset_nofile(jail->pakfire);
1526 if (r)
1527 return r;
1528
980b15af
MT
1529 // Drop capabilities
1530 r = pakfire_jail_drop_capabilities(jail);
1531 if (r)
1532 return r;
1533
739d5b57
MT
1534 // Filter syscalls
1535 r = pakfire_jail_limit_syscalls(jail);
1536 if (r)
1537 return r;
1538
2015cb92
MT
1539 DEBUG(jail->pakfire, "Child process initialization done\n");
1540 DEBUG(jail->pakfire, "Launching command:\n");
1541
1542 // Log argv
1543 for (unsigned int i = 0; argv[i]; i++)
1544 DEBUG(jail->pakfire, " argv[%d] = %s\n", i, argv[i]);
1545
b3498aeb
MT
1546 // exec() command
1547 r = execvpe(argv[0], (char**)argv, jail->env);
1548 if (r < 0)
0494b00c 1549 ERROR(jail->pakfire, "Could not execve(%s): %m\n", argv[0]);
b3498aeb
MT
1550
1551 // Translate errno into regular exit code
1552 switch (errno) {
1553 case ENOENT:
1554 r = 127;
1555 break;
1556
1557 default:
1558 r = 1;
1559 }
1560
1561 // We should not get here
1562 return r;
0bd84dc1
MT
1563}
1564
9f50bf71 1565// Run a command in the jail
db4f234f 1566static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
2015cb92
MT
1567 const int interactive,
1568 pakfire_jail_communicate_in communicate_in,
1569 pakfire_jail_communicate_out communicate_out,
1570 void* data) {
4f59c39b 1571 int exit = -1;
0bd84dc1
MT
1572 int r;
1573
b3498aeb
MT
1574 // Check if argv is valid
1575 if (!argv || !argv[0]) {
1576 errno = EINVAL;
1577 return -1;
1578 }
1579
2015cb92
MT
1580 // Send any output to the default logger if no callback is set
1581 if (!communicate_out)
1582 communicate_out = pakfire_jail_default_log_callback;
1583
616f1fca
MT
1584 // Initialize context for this call
1585 struct pakfire_jail_exec ctx = {
7bdf1d8e
MT
1586 .flags = 0,
1587
616f1fca 1588 .pipes = {
2015cb92
MT
1589 .stdin = { 0, 0 },
1590 .stdout = { 0, 0 },
1591 .stderr = { 0, 0 },
1592 },
1593
1594 .communicate = {
1595 .in = communicate_in,
1596 .out = communicate_out,
1597 .data = data,
616f1fca 1598 },
616f1fca
MT
1599 };
1600
0bd84dc1
MT
1601 DEBUG(jail->pakfire, "Executing jail...\n");
1602
7bdf1d8e
MT
1603 // Enable networking in interactive mode
1604 if (interactive)
1605 ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
1606
43dc0e16
MT
1607 /*
1608 Setup a file descriptor which can be used to notify the client that the parent
1609 has completed configuration.
1610 */
f7d240a7
MT
1611 ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1612 if (ctx.completed_fd < 0) {
43dc0e16
MT
1613 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1614 return -1;
1615 }
1616
616f1fca 1617 // Create pipes to communicate with child process if we are not running interactively
58963c75 1618 if (!interactive) {
2015cb92
MT
1619 // stdin (only if callback is set)
1620 if (ctx.communicate.in) {
1621 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdin, 0);
1622 if (r)
1623 goto ERROR;
1624 }
1625
616f1fca 1626 // stdout
e33387d3
MT
1627 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
1628 if (r)
616f1fca 1629 goto ERROR;
616f1fca
MT
1630
1631 // stderr
e33387d3
MT
1632 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
1633 if (r)
616f1fca 1634 goto ERROR;
616f1fca
MT
1635 }
1636
e33387d3
MT
1637 // Setup pipes for logging
1638 // INFO
1639 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
1640 if (r)
1641 goto ERROR;
1642
1643 // ERROR
1644 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
1645 if (r)
1646 goto ERROR;
1647
1648#ifdef ENABLE_DEBUG
1649 // DEBUG
1650 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
1651 if (r)
1652 goto ERROR;
1653#endif /* ENABLE_DEBUG */
1654
0bd84dc1
MT
1655 // Configure child process
1656 struct clone_args args = {
1657 .flags =
1658 CLONE_NEWCGROUP |
1659 CLONE_NEWIPC |
1660 CLONE_NEWNS |
1661 CLONE_NEWPID |
1662 CLONE_NEWUSER |
d853213d 1663 CLONE_NEWUTS |
02fd4f8b 1664 CLONE_PIDFD,
0bd84dc1 1665 .exit_signal = SIGCHLD,
d853213d 1666 .pidfd = (long long unsigned int)&ctx.pidfd,
0bd84dc1
MT
1667 };
1668
aca565fc 1669 // Launch the process in a cgroup that is a leaf of the configured cgroup
02fd4f8b
MT
1670 if (jail->cgroup) {
1671 args.flags |= CLONE_INTO_CGROUP;
1672
ae5201c5
MT
1673 // Fetch our UUID
1674 const char* uuid = pakfire_jail_uuid(jail);
aca565fc
MT
1675
1676 // Create a temporary cgroup
ae5201c5 1677 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, uuid, 0);
aca565fc
MT
1678 if (r) {
1679 ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
1680 goto ERROR;
1681 }
1682
02fd4f8b 1683 // Clone into this cgroup
aca565fc 1684 args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
02fd4f8b
MT
1685 }
1686
7bdf1d8e
MT
1687 // Setup networking
1688 if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1689 args.flags |= CLONE_NEWNET;
1690 }
1691
0bd84dc1 1692 // Fork this process
616f1fca
MT
1693 ctx.pid = clone3(&args, sizeof(args));
1694 if (ctx.pid < 0) {
0bd84dc1
MT
1695 ERROR(jail->pakfire, "Could not clone: %m\n");
1696 return -1;
1697
1698 // Child process
616f1fca 1699 } else if (ctx.pid == 0) {
f7d240a7 1700 r = pakfire_jail_child(jail, &ctx, argv);
0bd84dc1
MT
1701 _exit(r);
1702 }
1703
679ee2fa 1704 // Parent process
f7d240a7 1705 r = pakfire_jail_parent(jail, &ctx);
679ee2fa
MT
1706 if (r)
1707 goto ERROR;
1708
616f1fca 1709 DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
0bd84dc1 1710
616f1fca 1711 // Read output of the child process
d853213d
MT
1712 r = pakfire_jail_wait(jail, &ctx);
1713 if (r)
1714 goto ERROR;
0bd84dc1 1715
d853213d
MT
1716 // Handle exit status
1717 switch (ctx.status.si_code) {
1718 case CLD_EXITED:
1719 DEBUG(jail->pakfire, "The child process exited with code %d\n",
1720 ctx.status.si_status);
616f1fca 1721
d853213d
MT
1722 // Pass exit code
1723 exit = ctx.status.si_status;
1724 break;
0bd84dc1 1725
d853213d 1726 case CLD_KILLED:
d853213d 1727 ERROR(jail->pakfire, "The child process was killed\n");
54f64dc5
MT
1728 exit = 139;
1729 break;
1730
1731 case CLD_DUMPED:
1732 ERROR(jail->pakfire, "The child process terminated abnormally\n");
d853213d 1733 break;
0bd84dc1 1734
d853213d
MT
1735 // Log anything else
1736 default:
1737 ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
1738 break;
0bd84dc1
MT
1739 }
1740
679ee2fa 1741ERROR:
aca565fc
MT
1742 // Destroy the temporary cgroup (if any)
1743 if (ctx.cgroup) {
6b7cf275
MT
1744 // Read cgroup stats
1745 r = pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
1746 if (r) {
1747 ERROR(jail->pakfire, "Could not read cgroup stats: %m\n");
1748 } else {
1749 pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
1750 }
1751
aca565fc
MT
1752 pakfire_cgroup_destroy(ctx.cgroup);
1753 pakfire_cgroup_unref(ctx.cgroup);
1754 }
1755
616f1fca 1756 // Close any file descriptors
2015cb92 1757 pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
e33387d3
MT
1758 pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
1759 pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
d853213d
MT
1760 if (ctx.pidfd)
1761 close(ctx.pidfd);
e33387d3
MT
1762 pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
1763 pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
1764 pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
616f1fca 1765
4f59c39b 1766 return exit;
9f50bf71 1767}
a45ed6b0 1768
ccdd2e95 1769PAKFIRE_EXPORT int pakfire_jail_exec(
2015cb92
MT
1770 struct pakfire_jail* jail,
1771 const char* argv[],
1772 pakfire_jail_communicate_in callback_in,
1773 pakfire_jail_communicate_out callback_out,
1774 void* data) {
1775 return __pakfire_jail_exec(jail, argv, 0, callback_in, callback_out, data);
1776}
1777
db4f234f
MT
1778static int pakfire_jail_exec_interactive(
1779 struct pakfire_jail* jail, const char* argv[]) {
1780 int r;
1781
1782 // Setup interactive stuff
1783 r = pakfire_jail_setup_interactive_env(jail);
1784 if (r)
1785 return r;
1786
2015cb92 1787 return __pakfire_jail_exec(jail, argv, 1, NULL, NULL, NULL);
db4f234f
MT
1788}
1789
ccdd2e95
MT
1790int pakfire_jail_exec_script(struct pakfire_jail* jail,
1791 const char* script,
1792 const size_t size,
1793 const char* args[],
1794 pakfire_jail_communicate_in callback_in,
1795 pakfire_jail_communicate_out callback_out,
1796 void* data) {
a45ed6b0
MT
1797 char path[PATH_MAX];
1798 const char** argv = NULL;
35291cb7 1799 FILE* f = NULL;
a45ed6b0
MT
1800 int r;
1801
1802 const char* root = pakfire_get_path(jail->pakfire);
1803
1804 // Write the scriptlet to disk
35291cb7 1805 r = pakfire_path_join(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
56796f84 1806 if (r)
a45ed6b0
MT
1807 goto ERROR;
1808
35291cb7
MT
1809 // Create a temporary file
1810 f = pakfire_mktemp(path, 0700);
1811 if (!f) {
1812 ERROR(jail->pakfire, "Could not create temporary file: %m\n");
a45ed6b0
MT
1813 goto ERROR;
1814 }
1815
1816 DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
1817
1818 // Write data
35291cb7
MT
1819 r = fprintf(f, "%s", script);
1820 if (r < 0) {
a45ed6b0 1821 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
a45ed6b0
MT
1822 goto ERROR;
1823 }
1824
1825 // Close file
35291cb7 1826 r = fclose(f);
a45ed6b0
MT
1827 if (r) {
1828 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
a45ed6b0
MT
1829 goto ERROR;
1830 }
1831
35291cb7
MT
1832 f = NULL;
1833
a45ed6b0
MT
1834 // Count how many arguments were passed
1835 unsigned int argc = 1;
1836 if (args) {
1837 for (const char** arg = args; *arg; arg++)
1838 argc++;
1839 }
1840
1841 argv = calloc(argc + 1, sizeof(*argv));
1842 if (!argv) {
1843 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
1844 goto ERROR;
1845 }
1846
1847 // Set command
1848 argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
1849
1850 // Copy args
1851 for (unsigned int i = 1; i < argc; i++)
1852 argv[i] = args[i-1];
1853
1854 // Run the script
ccdd2e95 1855 r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data);
a45ed6b0
MT
1856
1857ERROR:
1858 if (argv)
1859 free(argv);
35291cb7
MT
1860 if (f)
1861 fclose(f);
a45ed6b0
MT
1862
1863 // Remove script from disk
1864 if (*path)
1865 unlink(path);
1866
1867 return r;
1868}
82df3c77
MT
1869
1870/*
1871 A convenience function that creates a new jail, runs the given command and destroys
1872 the jail again.
1873*/
12b9b39f 1874int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
82df3c77
MT
1875 struct pakfire_jail* jail = NULL;
1876 int r;
1877
1878 // Create a new jail
1879 r = pakfire_jail_create(&jail, pakfire, flags);
1880 if (r)
1881 goto ERROR;
1882
1883 // Execute the command
ccdd2e95 1884 r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output);
82df3c77
MT
1885
1886ERROR:
1887 if (jail)
1888 pakfire_jail_unref(jail);
1889
1890 return r;
1891}
4f688bd8
MT
1892
1893int pakfire_jail_run_script(struct pakfire* pakfire,
49a9babc 1894 const char* script, const size_t length, const char* argv[], int flags) {
4f688bd8
MT
1895 struct pakfire_jail* jail = NULL;
1896 int r;
1897
1898 // Create a new jail
1899 r = pakfire_jail_create(&jail, pakfire, flags);
1900 if (r)
1901 goto ERROR;
1902
1903 // Execute the command
49a9babc 1904 r = pakfire_jail_exec_script(jail, script, length, argv, NULL, NULL, NULL);
4f688bd8
MT
1905
1906ERROR:
1907 if (jail)
1908 pakfire_jail_unref(jail);
1909
1910 return r;
1911}
e43489f7 1912
5f6e42a2 1913int pakfire_jail_shell(struct pakfire_jail* jail) {
e43489f7
MT
1914 const char* argv[] = {
1915 "/bin/bash", "--login", NULL,
1916 };
1917
1918 // Execute /bin/bash
db4f234f 1919 return pakfire_jail_exec_interactive(jail, argv);
e43489f7
MT
1920}
1921
f7ffbb93 1922static int pakfire_jail_run_if_possible(struct pakfire* pakfire, const char** argv) {
e43489f7 1923 char path[PATH_MAX];
f7ffbb93 1924 int r;
e43489f7 1925
f7ffbb93 1926 r = pakfire_path(pakfire, path, "%s", *argv);
77e26129
MT
1927 if (r)
1928 return r;
e43489f7 1929
f7ffbb93 1930 // Check if the file is executable
e43489f7
MT
1931 r = access(path, X_OK);
1932 if (r) {
f7ffbb93 1933 DEBUG(pakfire, "%s is not executable. Skipping...\n", *argv);
e43489f7
MT
1934 return 0;
1935 }
1936
f7ffbb93
MT
1937 return pakfire_jail_run(pakfire, argv, 0, NULL);
1938}
1939
1940int pakfire_jail_ldconfig(struct pakfire* pakfire) {
e43489f7 1941 const char* argv[] = {
f7ffbb93
MT
1942 "/sbin/ldconfig",
1943 NULL,
e43489f7
MT
1944 };
1945
f7ffbb93
MT
1946 return pakfire_jail_run_if_possible(pakfire, argv);
1947}
1948
1949int pakfire_jail_run_systemd_tmpfiles(struct pakfire* pakfire) {
1950 const char* argv[] = {
1951 "/usr/bin/systemd-tmpfiles",
1952 "--create",
1953 NULL,
1954 };
1955
1956 return pakfire_jail_run_if_possible(pakfire, argv);
e43489f7 1957}