src/libpakfire/jail.c

   1 /*#############################################################################
   2 #                                                                             #
   3 # Pakfire - The IPFire package management system                              #
   4 # Copyright (C) 2022 Pakfire development team                                 #
   5 #                                                                             #
   6 # This program is free software: you can redistribute it and/or modify        #
   7 # it under the terms of the GNU General Public License as published by        #
   8 # the Free Software Foundation, either version 3 of the License, or           #
   9 # (at your option) any later version.                                         #
  10 #                                                                             #
  11 # This program is distributed in the hope that it will be useful,             #
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of              #
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
  14 # GNU General Public License for more details.                                #
  15 #                                                                             #
  16 # You should have received a copy of the GNU General Public License           #
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
  18 #                                                                             #
  19 #############################################################################*/
  20
  21 #include <errno.h>
  22 #include <fcntl.h>
  23 #include <linux/capability.h>
  24 #include <linux/sched.h>
  25 #include <sys/wait.h>
  26 #include <linux/wait.h>
  27 #include <sched.h>
  28 #include <signal.h>
  29 #include <stdlib.h>
  30 #include <syscall.h>
  31 #include <sys/capability.h>
  32 #include <sys/epoll.h>
  33 #include <sys/eventfd.h>
  34 #include <sys/mount.h>
  35 #include <sys/personality.h>
  36 #include <sys/prctl.h>
  37 #include <sys/resource.h>
  38 #include <sys/timerfd.h>
  39 #include <sys/types.h>
  40 #include <sys/wait.h>
  41
  42 // libnl3
  43 #include <net/if.h>
  44 #include <netlink/route/link.h>
  45
  46 // libseccomp
  47 #include <seccomp.h>
  48
  49 // libuuid
  50 #include <uuid.h>
  51
  52 #include <pakfire/arch.h>
  53 #include <pakfire/cgroup.h>
  54 #include <pakfire/jail.h>
  55 #include <pakfire/logging.h>
  56 #include <pakfire/mount.h>
  57 #include <pakfire/os.h>
  58 #include <pakfire/pakfire.h>
  59 #include <pakfire/path.h>
  60 #include <pakfire/private.h>
  61 #include <pakfire/pwd.h>
  62 #include <pakfire/string.h>
  63 #include <pakfire/util.h>
  64
  65 #define BUFFER_SIZE      1024 * 64
  66 #define ENVIRON_SIZE     128
  67 #define EPOLL_MAX_EVENTS 2
  68 #define MAX_MOUNTPOINTS  8
  69
  70 // The default environment that will be set for every command
  71 static const struct environ {
  72         const char* key;
  73         const char* val;
  74 } ENV[] = {
  75         { "HOME", "/root" },
  76         { "LANG", "C.utf-8" },
  77         { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
  78         { "TERM", "vt100" },
  79
  80         // Tell everything that it is running inside a Pakfire container
  81         { "container", "pakfire" },
  82         { NULL, NULL },
  83 };
  84
  85 struct pakfire_jail_mountpoint {
  86         char source[PATH_MAX];
  87         char target[PATH_MAX];
  88         int flags;
  89 };
  90
  91 struct pakfire_jail {
  92         struct pakfire_ctx* ctx;
  93         struct pakfire* pakfire;
  94         int nrefs;
  95
  96         // A unique ID for each jail
  97         uuid_t uuid;
  98         char __uuid[UUID_STR_LEN];
  99
 100         // Resource Limits
 101         int nice;
 102
 103         // Timeout
 104         struct itimerspec timeout;
 105
 106         // CGroup
 107         struct pakfire_cgroup* cgroup;
 108
 109         // Environment
 110         char* env[ENVIRON_SIZE];
 111
 112         // Mountpoints
 113         struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
 114         unsigned int num_mountpoints;
 115
 116         // Callbacks
 117         struct pakfire_jail_callbacks {
 118                 // Log
 119                 pakfire_jail_log_callback log;
 120                 void* log_data;
 121         } callbacks;
 122 };
 123
 124 struct pakfire_log_buffer {
 125         char data[BUFFER_SIZE];
 126         size_t used;
 127 };
 128
 129 struct pakfire_jail_exec {
 130         int flags;
 131
 132         // PIDs (of the children)
 133         int pidfd1;
 134         int pidfd2;
 135
 136         // Socket to pass FDs
 137         int socket[2];
 138
 139         // FD to notify the client that the parent has finished initialization
 140         int completed_fd;
 141
 142         // Log pipes
 143         struct pakfire_jail_pipes {
 144                 int stdin[2];
 145                 int stdout[2];
 146                 int stderr[2];
 147
 148                 // Logging
 149                 int log_INFO[2];
 150                 int log_ERROR[2];
 151 #ifdef ENABLE_DEBUG
 152                 int log_DEBUG[2];
 153 #endif /* ENABLE_DEBUG */
 154         } pipes;
 155
 156         // Communicate
 157         struct pakfire_jail_communicate {
 158                 pakfire_jail_communicate_in  in;
 159                 pakfire_jail_communicate_out out;
 160                 void* data;
 161         } communicate;
 162
 163         // Log buffers
 164         struct pakfire_jail_buffers {
 165                 struct pakfire_log_buffer stdout;
 166                 struct pakfire_log_buffer stderr;
 167
 168                 // Logging
 169                 struct pakfire_log_buffer log_INFO;
 170                 struct pakfire_log_buffer log_ERROR;
 171 #ifdef ENABLE_DEBUG
 172                 struct pakfire_log_buffer log_DEBUG;
 173 #endif /* ENABLE_DEBUG */
 174         } buffers;
 175
 176         struct pakfire_cgroup* cgroup;
 177         struct pakfire_cgroup_stats cgroup_stats;
 178 };
 179
 180 static int clone3(struct clone_args* args, size_t size) {
 181         return syscall(__NR_clone3, args, size);
 182 }
 183
 184 static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
 185         return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
 186 }
 187
 188 static int pivot_root(const char* new_root, const char* old_root) {
 189         return syscall(SYS_pivot_root, new_root, old_root);
 190 }
 191
 192 static int pakfire_jail_exec_has_flag(
 193                 const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
 194         return ctx->flags & flag;
 195 }
 196
 197 static void pakfire_jail_free(struct pakfire_jail* jail) {
 198         DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
 199
 200         // Free environment
 201         for (unsigned int i = 0; jail->env[i]; i++)
 202                 free(jail->env[i]);
 203
 204         if (jail->cgroup)
 205                 pakfire_cgroup_unref(jail->cgroup);
 206         if (jail->pakfire)
 207                 pakfire_unref(jail->pakfire);
 208         if (jail->ctx)
 209                 pakfire_ctx_unref(jail->ctx);
 210         free(jail);
 211 }
 212
 213 /*
 214         Passes any log messages on to the default pakfire log callback
 215 */
 216 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
 217                 int priority, const char* line, size_t length) {
 218         switch (priority) {
 219                 case LOG_INFO:
 220                         INFO(pakfire, "%s", line);
 221                         break;
 222
 223                 case LOG_ERR:
 224                         ERROR(pakfire, "%s", line);
 225                         break;
 226
 227 #ifdef ENABLE_DEBUG
 228                 case LOG_DEBUG:
 229                         DEBUG(pakfire, "%s", line);
 230                         break;
 231 #endif
 232         }
 233
 234         return 0;
 235 }
 236
 237 static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
 238         if (!*jail->__uuid)
 239                 uuid_unparse_lower(jail->uuid, jail->__uuid);
 240
 241         return jail->__uuid;
 242 }
 243
 244 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
 245         // Set PS1
 246         int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
 247         if (r)
 248                 return r;
 249
 250         // Copy TERM
 251         char* TERM = secure_getenv("TERM");
 252         if (TERM) {
 253                 r = pakfire_jail_set_env(jail, "TERM", TERM);
 254                 if (r)
 255                         return r;
 256         }
 257
 258         // Copy LANG
 259         char* LANG = secure_getenv("LANG");
 260         if (LANG) {
 261                 r = pakfire_jail_set_env(jail, "LANG", LANG);
 262                 if (r)
 263                         return r;
 264         }
 265
 266         return 0;
 267 }
 268
 269 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail, struct pakfire* pakfire) {
 270         int r;
 271
 272         const char* arch = pakfire_get_effective_arch(pakfire);
 273
 274         // Allocate a new jail
 275         struct pakfire_jail* j = calloc(1, sizeof(*j));
 276         if (!j)
 277                 return 1;
 278
 279         // Reference context
 280         j->ctx = pakfire_ctx(pakfire);
 281
 282         // Reference Pakfire
 283         j->pakfire = pakfire_ref(pakfire);
 284
 285         // Initialize reference counter
 286         j->nrefs = 1;
 287
 288         // Generate a random UUID
 289         uuid_generate_random(j->uuid);
 290
 291         DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
 292
 293         // Set the default logging callback
 294         pakfire_jail_set_log_callback(j, pakfire_jail_default_log_callback, NULL);
 295
 296         // Set default environment
 297         for (const struct environ* e = ENV; e->key; e++) {
 298                 r = pakfire_jail_set_env(j, e->key, e->val);
 299                 if (r)
 300                         goto ERROR;
 301         }
 302
 303         // Enable all CPU features that CPU has to offer
 304         if (!pakfire_arch_is_supported_by_host(arch)) {
 305                 r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
 306                 if (r)
 307                         goto ERROR;
 308         }
 309
 310         // Set container UUID
 311         r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
 312         if (r)
 313                 goto ERROR;
 314
 315         // Disable systemctl to talk to systemd
 316         if (!pakfire_on_root(j->pakfire)) {
 317                 r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
 318                 if (r)
 319                         goto ERROR;
 320         }
 321
 322         // Done
 323         *jail = j;
 324         return 0;
 325
 326 ERROR:
 327         pakfire_jail_free(j);
 328
 329         return r;
 330 }
 331
 332 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
 333         ++jail->nrefs;
 334
 335         return jail;
 336 }
 337
 338 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
 339         if (--jail->nrefs > 0)
 340                 return jail;
 341
 342         pakfire_jail_free(jail);
 343         return NULL;
 344 }
 345
 346 // Logging Callback
 347
 348 PAKFIRE_EXPORT void pakfire_jail_set_log_callback(struct pakfire_jail* jail,
 349                 pakfire_jail_log_callback callback, void* data) {
 350         jail->callbacks.log = callback;
 351         jail->callbacks.log_data = data;
 352 }
 353
 354 // Resource Limits
 355
 356 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
 357         // Check if nice level is in range
 358         if (nice < -19 || nice > 20) {
 359                 errno = EINVAL;
 360                 return 1;
 361         }
 362
 363         // Store nice level
 364         jail->nice = nice;
 365
 366         return 0;
 367 }
 368
 369 int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
 370         // Free any previous cgroup
 371         if (jail->cgroup) {
 372                 pakfire_cgroup_unref(jail->cgroup);
 373                 jail->cgroup = NULL;
 374         }
 375
 376         // Set any new cgroup
 377         if (cgroup) {
 378                 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
 379
 380                 jail->cgroup = pakfire_cgroup_ref(cgroup);
 381         }
 382
 383         // Done
 384         return 0;
 385 }
 386
 387 // Environment
 388
 389 // Returns the length of the environment
 390 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
 391         unsigned int i = 0;
 392
 393         // Count everything in the environment
 394         for (char** e = jail->env; *e; e++)
 395                 i++;
 396
 397         return i;
 398 }
 399
 400 // Finds an existing environment variable and returns its index or -1 if not found
 401 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
 402         if (!key) {
 403                 errno = EINVAL;
 404                 return -1;
 405         }
 406
 407         const size_t length = strlen(key);
 408
 409         for (unsigned int i = 0; jail->env[i]; i++) {
 410                 if ((pakfire_string_startswith(jail->env[i], key)
 411                                 && *(jail->env[i] + length) == '=')) {
 412                         return i;
 413                 }
 414         }
 415
 416         // Nothing found
 417         return -1;
 418 }
 419
 420 // Returns the value of an environment variable or NULL
 421 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
 422                 const char* key) {
 423         int i = pakfire_jail_find_env(jail, key);
 424         if (i < 0)
 425                 return NULL;
 426
 427         return jail->env[i] + strlen(key) + 1;
 428 }
 429
 430 // Sets an environment variable
 431 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
 432                 const char* key, const char* value) {
 433         // Find the index where to write this value to
 434         int i = pakfire_jail_find_env(jail, key);
 435         if (i < 0)
 436                 i = pakfire_jail_env_length(jail);
 437
 438         // Return -ENOSPC when the environment is full
 439         if (i >= ENVIRON_SIZE) {
 440                 errno = ENOSPC;
 441                 return -1;
 442         }
 443
 444         // Free any previous value
 445         if (jail->env[i])
 446                 free(jail->env[i]);
 447
 448         // Format and set environment variable
 449         asprintf(&jail->env[i], "%s=%s", key, value);
 450
 451         DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
 452
 453         return 0;
 454 }
 455
 456 // Imports an environment
 457 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
 458         if (!env)
 459                 return 0;
 460
 461         char* key;
 462         char* val;
 463         int r;
 464
 465         // Copy environment variables
 466         for (unsigned int i = 0; env[i]; i++) {
 467                 r = pakfire_string_partition(env[i], "=", &key, &val);
 468                 if (r)
 469                         continue;
 470
 471                 // Set value
 472                 r = pakfire_jail_set_env(jail, key, val);
 473
 474                 if (key)
 475                         free(key);
 476                 if (val)
 477                         free(val);
 478
 479                 // Break on error
 480                 if (r)
 481                         return r;
 482         }
 483
 484         return 0;
 485 }
 486
 487 // Timeout
 488
 489 PAKFIRE_EXPORT int pakfire_jail_set_timeout(
 490                 struct pakfire_jail* jail, unsigned int timeout) {
 491         // Store value
 492         jail->timeout.it_value.tv_sec = timeout;
 493
 494         if (timeout > 0)
 495                 DEBUG(jail->pakfire, "Timeout set to %u second(s)\n", timeout);
 496         else
 497                 DEBUG(jail->pakfire, "Timeout disabled\n");
 498
 499         return 0;
 500 }
 501
 502 static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
 503         int r;
 504
 505         // Nothing to do if no timeout has been set
 506         if (!jail->timeout.it_value.tv_sec)
 507                 return -1;
 508
 509         // Create a new timer
 510         const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
 511         if (fd < 0) {
 512                 ERROR(jail->pakfire, "Could not create timer: %m\n");
 513                 goto ERROR;
 514         }
 515
 516         // Arm timer
 517         r = timerfd_settime(fd, 0, &jail->timeout, NULL);
 518         if (r) {
 519                 ERROR(jail->pakfire, "Could not arm timer: %m\n");
 520                 goto ERROR;
 521         }
 522
 523         return fd;
 524
 525 ERROR:
 526         if (fd >= 0)
 527                 close(fd);
 528
 529         return -1;
 530 }
 531
 532 /*
 533         This function replaces any logging in the child process.
 534
 535         All log messages will be sent to the parent process through their respective pipes.
 536 */
 537 static void pakfire_jail_log_redirect(void* data, int priority, const char* file,
 538                 int line, const char* fn, const char* format, va_list args) {
 539         struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
 540         int fd;
 541
 542         switch (priority) {
 543                 case LOG_INFO:
 544                         fd = pipes->log_INFO[1];
 545                         break;
 546
 547                 case LOG_ERR:
 548                         fd = pipes->log_ERROR[1];
 549                         break;
 550
 551 #ifdef ENABLE_DEBUG
 552                 case LOG_DEBUG:
 553                         fd = pipes->log_DEBUG[1];
 554                         break;
 555 #endif /* ENABLE_DEBUG */
 556
 557                 // Ignore any messages of an unknown priority
 558                 default:
 559                         return;
 560         }
 561
 562         // Send the log message
 563         if (fd >= 0)
 564                 vdprintf(fd, format, args);
 565 }
 566
 567 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
 568         return (sizeof(buffer->data) == buffer->used);
 569 }
 570
 571 /*
 572         This function reads as much data as it can from the file descriptor.
 573         If it finds a whole line in it, it will send it to the logger and repeat the process.
 574         If not newline character is found, it will try to read more data until it finds one.
 575 */
 576 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
 577                 struct pakfire_jail_exec* ctx, int priority, int fd,
 578                 struct pakfire_log_buffer* buffer, pakfire_jail_communicate_out callback, void* data) {
 579         char line[BUFFER_SIZE + 1];
 580
 581         // Fill up buffer from fd
 582         if (buffer->used < sizeof(buffer->data)) {
 583                 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
 584                                 sizeof(buffer->data) - buffer->used);
 585
 586                 // Handle errors
 587                 if (bytes_read < 0) {
 588                         ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
 589                         return -1;
 590                 }
 591
 592                 // Update buffer size
 593                 buffer->used += bytes_read;
 594         }
 595
 596         // See if we have any lines that we can write
 597         while (buffer->used) {
 598                 // Search for the end of the first line
 599                 char* eol = memchr(buffer->data, '\n', buffer->used);
 600
 601                 // No newline found
 602                 if (!eol) {
 603                         // If the buffer is full, we send the content to the logger and try again
 604                         // This should not happen in practise
 605                         if (pakfire_jail_log_buffer_is_full(buffer)) {
 606                                 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
 607
 608                                 eol = buffer->data + sizeof(buffer->data) - 1;
 609
 610                         // Otherwise we might have only read parts of the output
 611                         } else
 612                                 break;
 613                 }
 614
 615                 // Find the length of the string
 616                 size_t length = eol - buffer->data + 1;
 617
 618                 // Copy the line into the buffer
 619                 memcpy(line, buffer->data, length);
 620
 621                 // Terminate the string
 622                 line[length] = '\0';
 623
 624                 // Log the line
 625                 if (callback) {
 626                         int r = callback(jail->pakfire, data, priority, line, length);
 627                         if (r) {
 628                                 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
 629                                 return r;
 630                         }
 631                 }
 632
 633                 // Remove line from buffer
 634                 memmove(buffer->data, buffer->data + length, buffer->used - length);
 635                 buffer->used -= length;
 636         }
 637
 638         return 0;
 639 }
 640
 641 static int pakfire_jail_stream_stdin(struct pakfire_jail* jail,
 642                 struct pakfire_jail_exec* ctx, const int fd) {
 643         int r;
 644
 645         // Nothing to do if there is no stdin callback set
 646         if (!ctx->communicate.in) {
 647                 DEBUG(jail->pakfire, "Callback for standard input is not set\n");
 648                 return 0;
 649         }
 650
 651         // Skip if the writing pipe has already been closed
 652         if (!ctx->pipes.stdin[1])
 653                 return 0;
 654
 655         DEBUG(jail->pakfire, "Streaming standard input...\n");
 656
 657         // Calling the callback
 658         r = ctx->communicate.in(jail->pakfire, ctx->communicate.data, fd);
 659
 660         DEBUG(jail->pakfire, "Standard input callback finished: %d\n", r);
 661
 662         // The callback signaled that it has written everything
 663         if (r == EOF) {
 664                 DEBUG(jail->pakfire, "Closing standard input pipe\n");
 665
 666                 // Close the file-descriptor
 667                 close(fd);
 668
 669                 // Reset the file-descriptor so it won't be closed again later
 670                 ctx->pipes.stdin[1] = -1;
 671
 672                 // Report success
 673                 r = 0;
 674         }
 675
 676         return r;
 677 }
 678
 679 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
 680         int r = pipe2(*fds, flags);
 681         if (r < 0) {
 682                 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
 683                 return 1;
 684         }
 685
 686         return 0;
 687 }
 688
 689 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
 690         for (unsigned int i = 0; i < 2; i++)
 691                 if (fds[i] >= 0)
 692                         close(fds[i]);
 693 }
 694
 695 /*
 696         This is a convenience function to fetch the reading end of a pipe and
 697         closes the write end.
 698 */
 699 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail* jail, int (*fds)[2]) {
 700         // Give the variables easier names to avoid confusion
 701         int* fd_read  = &(*fds)[0];
 702         int* fd_write = &(*fds)[1];
 703
 704         // Close the write end of the pipe
 705         if (*fd_write >= 0) {
 706                 close(*fd_write);
 707                 *fd_write = -1;
 708         }
 709
 710         // Return the read end
 711         if (*fd_read >= 0)
 712                 return *fd_read;
 713
 714         return -1;
 715 }
 716
 717 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
 718         // Give the variables easier names to avoid confusion
 719         int* fd_read  = &(*fds)[0];
 720         int* fd_write = &(*fds)[1];
 721
 722         // Close the read end of the pipe
 723         if (*fd_read >= 0) {
 724                 close(*fd_read);
 725                 *fd_read = -1;
 726         }
 727
 728         // Return the write end
 729         if (*fd_write >= 0)
 730                 return *fd_write;
 731
 732         return -1;
 733 }
 734
 735 static int pakfire_jail_recv_fd(struct pakfire_jail* jail, int socket, int* fd) {
 736         const size_t payload_length = sizeof(fd);
 737         char buffer[CMSG_SPACE(payload_length)];
 738         int r;
 739
 740         struct msghdr msg = {
 741                 .msg_control    = buffer,
 742                 .msg_controllen = sizeof(buffer),
 743         };
 744
 745         // Receive the message
 746         r = recvmsg(socket, &msg, 0);
 747         if (r) {
 748                 CTX_ERROR(jail->ctx, "Could not receive file descriptor: %s\n", strerror(errno));
 749                 return -errno;
 750         }
 751
 752         // Fetch the payload
 753         struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
 754         if (!cmsg)
 755                 return -EBADMSG;
 756
 757         *fd = *((int*)CMSG_DATA(cmsg));
 758
 759         CTX_DEBUG(jail->ctx, "Received fd %d from socket %d\n", *fd, socket);
 760
 761         return 0;
 762 }
 763
 764 static int pakfire_jail_send_fd(struct pakfire_jail* jail, int socket, int fd) {
 765         const size_t payload_length = sizeof(fd);
 766         char buffer[CMSG_SPACE(payload_length)];
 767         int r;
 768
 769         CTX_DEBUG(jail->ctx, "Sending fd %d to socket %d\n", fd, socket);
 770
 771         // Header
 772         struct msghdr msg = {
 773                 .msg_control    = buffer,
 774                 .msg_controllen = sizeof(buffer),
 775         };
 776
 777         // Payload
 778         struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
 779         cmsg->cmsg_level = SOL_SOCKET;
 780         cmsg->cmsg_type  = SCM_RIGHTS;
 781         cmsg->cmsg_len   = CMSG_LEN(payload_length);
 782
 783         // Set payload
 784         *((int*)CMSG_DATA(cmsg)) = fd;
 785
 786         // Send the message
 787         r = sendmsg(socket, &msg, 0);
 788         if (r) {
 789                 CTX_ERROR(jail->ctx, "Could not send file descriptor: %s\n", strerror(errno));
 790                 return -errno;
 791         }
 792
 793         return 0;
 794 }
 795
 796 static int pakfire_jail_log(struct pakfire* pakfire, void* data, int priority,
 797                 const char* line, const size_t length) {
 798         // Pass everything to the parent logger
 799         pakfire_log_condition(pakfire, priority, 0, "%.*s", (int)length, line);
 800
 801         return 0;
 802 }
 803
 804 static int pakfire_jail_epoll_add_fd(struct pakfire_jail* jail, int epollfd, int fd, int events) {
 805         struct epoll_event event = {
 806                 .events = events|EPOLLHUP,
 807                 .data   = {
 808                         .fd = fd,
 809                 },
 810         };
 811         int r;
 812
 813         // Read flags
 814         int flags = fcntl(fd, F_GETFL, 0);
 815
 816         // Set modified flags
 817         r  = fcntl(fd, F_SETFL, flags|O_NONBLOCK);
 818         if (r < 0) {
 819                 CTX_ERROR(jail->ctx, "Could not set file descriptor %d into non-blocking mode: %s\n",
 820                         fd, strerror(errno));
 821                 return -errno;
 822         }
 823
 824         // Add the file descriptor to the loop
 825         r = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
 826         if (r < 0) {
 827                 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %s\n",
 828                         fd, strerror(errno));
 829                 return -errno;
 830         }
 831
 832         return 0;
 833 }
 834
 835 static int pakfire_jail_setup_child2(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx);
 836
 837 static int pakfire_jail_wait_on_child(struct pakfire_jail* jail, int pidfd) {
 838         siginfo_t status = {};
 839         int r;
 840
 841         // Call waitid() and store the result
 842         r = waitid(P_PIDFD, pidfd, &status, WEXITED);
 843         if (r) {
 844                 CTX_ERROR(jail->ctx, "waitid() failed: %s\n", strerror(errno));
 845                 return -errno;
 846         }
 847
 848         switch (status.si_code) {
 849                 // If the process exited normally, we return the exit code
 850                 case CLD_EXITED:
 851                         CTX_DEBUG(jail->ctx, "The child process exited with code %d\n", status.si_status);
 852                         return status.si_status;
 853
 854                 case CLD_KILLED:
 855                         CTX_ERROR(jail->ctx, "The child process was killed\n");
 856                         return 139;
 857
 858                 case CLD_DUMPED:
 859                         CTX_ERROR(jail->ctx, "The child process terminated abnormally\n");
 860                         return 139;
 861
 862                 // Log anything else
 863                 default:
 864                         CTX_ERROR(jail->ctx, "Unknown child exit code: %d\n", status.si_code);
 865                         break;
 866         }
 867
 868         return -EBADMSG;
 869 }
 870
 871 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
 872         int epollfd = -1;
 873         struct epoll_event events[EPOLL_MAX_EVENTS];
 874         char garbage[8];
 875         int r = 0;
 876
 877         // Fetch the UNIX domain socket
 878         const int socket_recv = pakfire_jail_get_pipe_to_read(jail, &ctx->socket);
 879
 880         // Fetch file descriptors from context
 881         const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
 882         const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
 883         const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
 884
 885         // Timer
 886         const int timerfd = pakfire_jail_create_timer(jail);
 887
 888         // Logging
 889         const int log_INFO  = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
 890         const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
 891 #ifdef ENABLE_DEBUG
 892         const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
 893 #endif /* ENABLE_DEBUG */
 894
 895         // Make a list of all file descriptors we are interested in
 896         const struct pakfire_wait_fds {
 897                 const int fd;
 898                 const int events;
 899         } fds[] = {
 900                 { socket_recv, EPOLLIN },
 901
 902                 // Standard input/output
 903                 { stdin,  EPOLLOUT },
 904                 { stdout, EPOLLIN },
 905                 { stderr, EPOLLIN },
 906
 907                 // Timer
 908                 { timerfd, EPOLLIN },
 909
 910                 // Child Processes
 911                 { ctx->pidfd1, EPOLLIN },
 912
 913                 // Log Pipes
 914                 { log_INFO, EPOLLIN },
 915                 { log_ERROR, EPOLLIN },
 916 #ifdef ENABLE_DEBUG
 917                 { log_DEBUG, EPOLLIN },
 918 #endif /* ENABLE_DEBUG */
 919
 920                 // Sentinel
 921                 { -1, 0 },
 922         };
 923
 924         // Setup epoll
 925         epollfd = epoll_create1(0);
 926         if (epollfd < 0) {
 927                 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
 928                 r = 1;
 929                 goto ERROR;
 930         }
 931
 932         // Turn file descriptors into non-blocking mode and add them to epoll()
 933         for (const struct pakfire_wait_fds* fd = fds; fd->events; fd++) {
 934                 // Skip fds which were not initialized
 935                 if (fd->fd < 0)
 936                         continue;
 937
 938                 // Add the FD to the event loop
 939                 r = pakfire_jail_epoll_add_fd(jail, epollfd, fd->fd, fd->events);
 940                 if (r)
 941                         goto ERROR;
 942         }
 943
 944         int ended = 0;
 945         int exit = 0;
 946
 947         CTX_DEBUG(jail->ctx, "Launching main loop...\n");
 948
 949         // Loop for as long as the process is alive
 950         while (!ended) {
 951                 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
 952                 if (num < 1) {
 953                         // Ignore if epoll_wait() has been interrupted
 954                         if (errno == EINTR)
 955                                 continue;
 956
 957                         ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
 958                         r = 1;
 959
 960                         goto ERROR;
 961                 }
 962
 963                 for (int i = 0; i < num; i++) {
 964                         int e  = events[i].events;
 965                         int fd = events[i].data.fd;
 966
 967                         struct pakfire_log_buffer* buffer = NULL;
 968                         pakfire_jail_communicate_out callback = NULL;
 969                         void* data = NULL;
 970                         int priority;
 971
 972                         // Check if there is any data to be read
 973                         if (e & EPOLLIN) {
 974                                 // Monitor the first child process
 975                                 if (fd == ctx->pidfd1) {
 976                                         r = pakfire_jail_wait_on_child(jail, ctx->pidfd1);
 977                                         if (r) {
 978                                                 CTX_ERROR(jail->ctx, "The first child exited with an error\n");
 979                                                 goto ERROR;
 980                                         }
 981
 982                                         close(ctx->pidfd1);
 983                                         ctx->pidfd1 = -1;
 984
 985                                         continue;
 986
 987                                 // Monitor the second child process
 988                                 } else if (fd == ctx->pidfd2) {
 989                                         exit = pakfire_jail_wait_on_child(jail, ctx->pidfd2);
 990                                         if (exit < 0) {
 991                                                 CTX_ERROR(jail->ctx, "The second child exited with an error\n");
 992                                                 goto ERROR;
 993                                         }
 994
 995                                         close(ctx->pidfd2);
 996                                         ctx->pidfd2 = -1;
 997
 998                                         // Mark that we have ended so that we will process the remaining
 999                                         // events from epoll() now, but won't restart the outer loop.
1000                                         ended = 1;
1001
1002                                         continue;
1003
1004                                 // Handle timer events
1005                                 } else if (fd == timerfd) {
1006                                         DEBUG(jail->pakfire, "Timer event received\n");
1007
1008                                         // Disarm the timer
1009                                         r = read(timerfd, garbage, sizeof(garbage));
1010                                         if (r < 1) {
1011                                                 ERROR(jail->pakfire, "Could not disarm timer: %m\n");
1012                                                 r = 1;
1013                                                 goto ERROR;
1014                                         }
1015
1016                                         // Terminate the process if it hasn't already ended
1017                                         if (!ended) {
1018                                                 DEBUG(jail->pakfire, "Terminating process...\n");
1019
1020                                                 // Send SIGTERM to the process
1021                                                 r = pidfd_send_signal(ctx->pidfd2, SIGKILL, NULL, 0);
1022                                                 if (r) {
1023                                                         ERROR(jail->pakfire, "Could not kill process: %m\n");
1024                                                         goto ERROR;
1025                                                 }
1026                                         }
1027
1028                                         // There is nothing else to do
1029                                         continue;
1030
1031                                 // Handle socket messages
1032                                 } else if (fd == socket_recv) {
1033                                         // Receive the FD of the second child process
1034                                         r = pakfire_jail_recv_fd(jail, socket_recv, &ctx->pidfd2);
1035                                         if (r)
1036                                                 goto ERROR;
1037
1038                                         // Add it to the event loop
1039                                         r = pakfire_jail_epoll_add_fd(jail, epollfd, ctx->pidfd2, EPOLLIN);
1040                                         if (r)
1041                                                 goto ERROR;
1042
1043                                         // Setup the child process
1044                                         r = pakfire_jail_setup_child2(jail, ctx);
1045                                         if (r)
1046                                                 goto ERROR;
1047
1048                                         // Don't fall through to log processing
1049                                         continue;
1050
1051                                 // Handle logging messages
1052                                 } else if (fd == log_INFO) {
1053                                         buffer = &ctx->buffers.log_INFO;
1054                                         priority = LOG_INFO;
1055
1056                                         callback = pakfire_jail_log;
1057
1058                                 } else if (fd == log_ERROR) {
1059                                         buffer = &ctx->buffers.log_ERROR;
1060                                         priority = LOG_ERR;
1061
1062                                         callback = pakfire_jail_log;
1063
1064 #ifdef ENABLE_DEBUG
1065                                 } else if (fd == log_DEBUG) {
1066                                         buffer = &ctx->buffers.log_DEBUG;
1067                                         priority = LOG_DEBUG;
1068
1069                                         callback = pakfire_jail_log;
1070 #endif /* ENABLE_DEBUG */
1071
1072                                 // Handle anything from the log pipes
1073                                 } else if (fd == stdout) {
1074                                         buffer = &ctx->buffers.stdout;
1075                                         priority = LOG_INFO;
1076
1077                                         // Send any output to the default logger if no callback is set
1078                                         if (ctx->communicate.out) {
1079                                                 callback = ctx->communicate.out;
1080                                                 data     = ctx->communicate.data;
1081                                         } else {
1082                                                 callback = jail->callbacks.log;
1083                                                 data     = jail->callbacks.log_data;
1084                                         }
1085
1086                                 } else if (fd == stderr) {
1087                                         buffer = &ctx->buffers.stderr;
1088                                         priority = LOG_ERR;
1089
1090                                         // Send any output to the default logger if no callback is set
1091                                         if (ctx->communicate.out) {
1092                                                 callback = ctx->communicate.out;
1093                                                 data     = ctx->communicate.data;
1094                                         } else {
1095                                                 callback = jail->callbacks.log;
1096                                                 data     = jail->callbacks.log_data;
1097                                         }
1098
1099                                 } else {
1100                                         DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
1101                                         continue;
1102                                 }
1103
1104                                 // Handle log event
1105                                 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
1106                                 if (r)
1107                                         goto ERROR;
1108                         }
1109
1110                         if (e & EPOLLOUT) {
1111                                 // Handle standard input
1112                                 if (fd == stdin) {
1113                                         r = pakfire_jail_stream_stdin(jail, ctx, fd);
1114                                         if (r) {
1115                                                 switch (errno) {
1116                                                         // Ignore if we filled up the buffer
1117                                                         case EAGAIN:
1118                                                                 break;
1119
1120                                                         default:
1121                                                                 ERROR(jail->pakfire, "Could not write to stdin: %m\n");
1122                                                                 goto ERROR;
1123                                                 }
1124                                         }
1125                                 }
1126                         }
1127
1128                         // Check if any file descriptors have been closed
1129                         if (e & EPOLLHUP) {
1130                                 // Remove the file descriptor
1131                                 r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
1132                                 if (r) {
1133                                         ERROR(jail->pakfire, "Could not remove closed file-descriptor %d: %m\n", fd);
1134                                         goto ERROR;
1135                                 }
1136                         }
1137                 }
1138         }
1139
1140         // Return the exit code
1141         r = exit;
1142
1143 ERROR:
1144         CTX_DEBUG(jail->ctx, "Main loop terminated\n");
1145
1146         if (epollfd >= 0)
1147                 close(epollfd);
1148         if (timerfd >= 0)
1149                 close(timerfd);
1150
1151         return r;
1152 }
1153
1154 int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data,
1155                 int priority, const char* line, size_t length) {
1156         char** output = (char**)data;
1157         int r;
1158
1159         // Append everything from stdout to a buffer
1160         if (output && priority == LOG_INFO) {
1161                 r = asprintf(output, "%s%s", (output && *output) ? *output : "", line);
1162                 if (r < 0)
1163                         return 1;
1164                 return 0;
1165         }
1166
1167         // Send everything else to the default logger
1168         return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
1169 }
1170
1171 // Capabilities
1172
1173 // Logs all capabilities of the current process
1174 static int pakfire_jail_show_capabilities(struct pakfire_jail* jail) {
1175         cap_t caps = NULL;
1176         char* name = NULL;
1177         cap_flag_value_t value_e;
1178         cap_flag_value_t value_i;
1179         cap_flag_value_t value_p;
1180         int r;
1181
1182         // Fetch PID
1183         pid_t pid = getpid();
1184
1185         // Fetch all capabilities
1186         caps = cap_get_proc();
1187         if (!caps) {
1188                 ERROR(jail->pakfire, "Could not fetch capabilities: %m\n");
1189                 r = 1;
1190                 goto ERROR;
1191         }
1192
1193         DEBUG(jail->pakfire, "Capabilities of PID %d:\n", pid);
1194
1195         // Iterate over all capabilities
1196         for (unsigned int cap = 0; cap_valid(cap); cap++) {
1197                 name = cap_to_name(cap);
1198
1199                 // Fetch effective value
1200                 r = cap_get_flag(caps, cap, CAP_EFFECTIVE, &value_e);
1201                 if (r)
1202                         goto ERROR;
1203
1204                 // Fetch inheritable value
1205                 r = cap_get_flag(caps, cap, CAP_INHERITABLE, &value_i);
1206                 if (r)
1207                         goto ERROR;
1208
1209                 // Fetch permitted value
1210                 r = cap_get_flag(caps, cap, CAP_PERMITTED, &value_p);
1211                 if (r)
1212                         goto ERROR;
1213
1214                 DEBUG(jail->pakfire,
1215                         "  %-24s : %c%c%c\n",
1216                         name,
1217                         (value_e == CAP_SET) ? 'e' : '-',
1218                         (value_i == CAP_SET) ? 'i' : '-',
1219                         (value_p == CAP_SET) ? 'p' : '-'
1220                 );
1221
1222                 // Free name
1223                 cap_free(name);
1224                 name = NULL;
1225         }
1226
1227         // Success
1228         r = 0;
1229
1230 ERROR:
1231         if (name)
1232                 cap_free(name);
1233         if (caps)
1234                 cap_free(caps);
1235
1236         return r;
1237 }
1238
1239 static int pakfire_jail_set_capabilities(struct pakfire_jail* jail) {
1240         cap_t caps = NULL;
1241         char* name = NULL;
1242         int r;
1243
1244         // Fetch capabilities
1245         caps = cap_get_proc();
1246         if (!caps) {
1247                 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
1248                 r = 1;
1249                 goto ERROR;
1250         }
1251
1252         // Walk through all capabilities
1253         for (cap_value_t cap = 0; cap_valid(cap); cap++) {
1254                 cap_value_t _caps[] = { cap };
1255
1256                 // Fetch the name of the capability
1257                 name = cap_to_name(cap);
1258
1259                 r = cap_set_flag(caps, CAP_EFFECTIVE, 1, _caps, CAP_SET);
1260                 if (r) {
1261                         ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1262                         goto ERROR;
1263                 }
1264
1265                 r = cap_set_flag(caps, CAP_INHERITABLE, 1, _caps, CAP_SET);
1266                 if (r) {
1267                         ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1268                         goto ERROR;
1269                 }
1270
1271                 r = cap_set_flag(caps, CAP_PERMITTED, 1, _caps, CAP_SET);
1272                 if (r) {
1273                         ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1274                         goto ERROR;
1275                 }
1276
1277                 // Free name
1278                 cap_free(name);
1279                 name = NULL;
1280         }
1281
1282         // Restore all capabilities
1283         r = cap_set_proc(caps);
1284         if (r) {
1285                 ERROR(jail->pakfire, "Restoring capabilities failed: %m\n");
1286                 goto ERROR;
1287         }
1288
1289         // Add all capabilities to the ambient set
1290         for (unsigned int cap = 0; cap_valid(cap); cap++) {
1291                 name = cap_to_name(cap);
1292
1293                 // Raise the capability
1294                 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0);
1295                 if (r) {
1296                         ERROR(jail->pakfire, "Could not set ambient capability %s: %m\n", name);
1297                         goto ERROR;
1298                 }
1299
1300                 // Free name
1301                 cap_free(name);
1302                 name = NULL;
1303         }
1304
1305         // Success
1306         r = 0;
1307
1308 ERROR:
1309         if (name)
1310                 cap_free(name);
1311         if (caps)
1312                 cap_free(caps);
1313
1314         return r;
1315 }
1316
1317 // Syscall Filter
1318
1319 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
1320         const int syscalls[] = {
1321                 // The kernel's keyring isn't namespaced
1322                 SCMP_SYS(keyctl),
1323                 SCMP_SYS(add_key),
1324                 SCMP_SYS(request_key),
1325
1326                 // Disable userfaultfd
1327                 SCMP_SYS(userfaultfd),
1328
1329                 // Disable perf which could leak a lot of information about the host
1330                 SCMP_SYS(perf_event_open),
1331
1332                 0,
1333         };
1334         int r = 1;
1335
1336         DEBUG(jail->pakfire, "Applying syscall filter...\n");
1337
1338         // Setup a syscall filter which allows everything by default
1339         scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
1340         if (!ctx) {
1341                 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
1342                 goto ERROR;
1343         }
1344
1345         // All all syscalls
1346         for (const int* syscall = syscalls; *syscall; syscall++) {
1347                 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
1348                 if (r) {
1349                         ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
1350                         goto ERROR;
1351                 }
1352         }
1353
1354         // Load syscall filter into the kernel
1355         r = seccomp_load(ctx);
1356         if (r) {
1357                 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
1358                 goto ERROR;
1359         }
1360
1361 ERROR:
1362         if (ctx)
1363                 seccomp_release(ctx);
1364
1365         return r;
1366 }
1367
1368 // Mountpoints
1369
1370 PAKFIRE_EXPORT int pakfire_jail_bind(struct pakfire_jail* jail,
1371                 const char* source, const char* target, int flags) {
1372         struct pakfire_jail_mountpoint* mp = NULL;
1373         int r;
1374
1375         // Check if there is any space left
1376         if (jail->num_mountpoints >= MAX_MOUNTPOINTS) {
1377                 errno = ENOSPC;
1378                 return 1;
1379         }
1380
1381         // Check for valid inputs
1382         if (!source || !target) {
1383                 errno = EINVAL;
1384                 return 1;
1385         }
1386
1387         // Select the next free slot
1388         mp = &jail->mountpoints[jail->num_mountpoints];
1389
1390         // Copy source
1391         r = pakfire_string_set(mp->source, source);
1392         if (r) {
1393                 ERROR(jail->pakfire, "Could not copy source: %m\n");
1394                 return r;
1395         }
1396
1397         // Copy target
1398         r = pakfire_string_set(mp->target, target);
1399         if (r) {
1400                 ERROR(jail->pakfire, "Could not copy target: %m\n");
1401                 return r;
1402         }
1403
1404         // Copy flags
1405         mp->flags = flags;
1406
1407         // Increment counter
1408         jail->num_mountpoints++;
1409
1410         return 0;
1411 }
1412
1413 static int pakfire_jail_mount_networking(struct pakfire_jail* jail) {
1414         int r;
1415
1416         const char* paths[] = {
1417                 "/etc/hosts",
1418                 "/etc/resolv.conf",
1419                 NULL,
1420         };
1421
1422         // Bind-mount all paths read-only
1423         for (const char** path = paths; *path; path++) {
1424                 r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
1425                 if (r) {
1426                         switch (errno) {
1427                                 // Ignore if we don't have permission
1428                                 case EPERM:
1429                                         continue;
1430
1431                                 default:
1432                                         break;
1433                         }
1434                         return r;
1435                 }
1436         }
1437
1438         return 0;
1439 }
1440
1441 /*
1442         Mounts everything that we require in the new namespace
1443 */
1444 static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1445         struct pakfire_jail_mountpoint* mp = NULL;
1446         int flags = 0;
1447         int r;
1448
1449         // Enable loop devices
1450         if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_LOOP_DEVICES))
1451                 flags |= PAKFIRE_MOUNT_LOOP_DEVICES;
1452
1453         // Mount all default stuff
1454         r = pakfire_mount_all(jail->pakfire, flags);
1455         if (r)
1456                 return r;
1457
1458         // Mount networking stuff
1459         if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1460                 r = pakfire_jail_mount_networking(jail);
1461                 if (r)
1462                         return r;
1463         }
1464
1465         // Mount all custom stuff
1466         for (unsigned int i = 0; i < jail->num_mountpoints; i++) {
1467                 // Fetch mountpoint
1468                 mp = &jail->mountpoints[i];
1469
1470                 // Mount it
1471                 r = pakfire_bind(jail->pakfire, mp->source, mp->target, mp->flags);
1472                 if (r)
1473                         return r;
1474         }
1475
1476         // Log all mountpoints
1477         pakfire_mount_list(jail->pakfire);
1478
1479         return 0;
1480 }
1481
1482 // Networking
1483
1484 static int pakfire_jail_setup_loopback(struct pakfire_jail* jail) {
1485         struct nl_sock* nl = NULL;
1486         struct nl_cache* cache = NULL;
1487         struct rtnl_link* link = NULL;
1488         struct rtnl_link* change = NULL;
1489         int r;
1490
1491         DEBUG(jail->pakfire, "Setting up loopback...\n");
1492
1493         // Allocate a netlink socket
1494         nl = nl_socket_alloc();
1495         if (!nl) {
1496                 ERROR(jail->pakfire, "Could not allocate a netlink socket: %m\n");
1497                 r = 1;
1498                 goto ERROR;
1499         }
1500
1501         // Connect the socket
1502         r = nl_connect(nl, NETLINK_ROUTE);
1503         if (r) {
1504                 ERROR(jail->pakfire, "Could not connect netlink socket: %s\n", nl_geterror(r));
1505                 goto ERROR;
1506         }
1507
1508         // Allocate the netlink cache
1509         r = rtnl_link_alloc_cache(nl, AF_UNSPEC, &cache);
1510         if (r < 0) {
1511                 ERROR(jail->pakfire, "Unable to allocate netlink cache: %s\n", nl_geterror(r));
1512                 goto ERROR;
1513         }
1514
1515         // Fetch loopback interface
1516         link = rtnl_link_get_by_name(cache, "lo");
1517         if (!link) {
1518                 ERROR(jail->pakfire, "Could not find lo interface. Ignoring.\n");
1519                 r = 0;
1520                 goto ERROR;
1521         }
1522
1523         // Allocate a new link
1524         change = rtnl_link_alloc();
1525         if (!change) {
1526                 ERROR(jail->pakfire, "Could not allocate change link\n");
1527                 r = 1;
1528                 goto ERROR;
1529         }
1530
1531         // Set the link to UP
1532         rtnl_link_set_flags(change, IFF_UP);
1533
1534         // Apply any changes
1535         r = rtnl_link_change(nl, link, change, 0);
1536         if (r) {
1537                 ERROR(jail->pakfire, "Unable to activate loopback: %s\n", nl_geterror(r));
1538                 goto ERROR;
1539         }
1540
1541         // Success
1542         r = 0;
1543
1544 ERROR:
1545         if (nl)
1546                 nl_socket_free(nl);
1547
1548         return r;
1549 }
1550
1551 // UID/GID Mapping
1552
1553 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
1554         char path[PATH_MAX];
1555         int r;
1556
1557         // Skip mapping anything when running on /
1558         if (pakfire_on_root(jail->pakfire))
1559                 return 0;
1560
1561         // Make path
1562         r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
1563         if (r)
1564                 return r;
1565
1566         // Fetch UID
1567         const uid_t uid = pakfire_uid(jail->pakfire);
1568
1569         // Fetch SUBUID
1570         const struct pakfire_subid* subuid = pakfire_subuid(jail->pakfire);
1571         if (!subuid)
1572                 return 1;
1573
1574         /* When running as root, we will map the entire range.
1575
1576            When running as a non-privileged user, we will map the root user inside the jail
1577            to the user's UID outside of the jail, and we will map the rest starting from one.
1578         */
1579
1580         // Running as root
1581         if (uid == 0) {
1582                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1583                         "0 %lu %lu\n", subuid->id, subuid->length);
1584         } else {
1585                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1586                         "0 %lu 1\n1 %lu %lu\n", uid, subuid->id, subuid->length);
1587         }
1588
1589         if (r) {
1590                 ERROR(jail->pakfire, "Could not map UIDs: %m\n");
1591                 return r;
1592         }
1593
1594         return r;
1595 }
1596
1597 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
1598         char path[PATH_MAX];
1599         int r;
1600
1601         // Skip mapping anything when running on /
1602         if (pakfire_on_root(jail->pakfire))
1603                 return 0;
1604
1605         // Fetch GID
1606         const gid_t gid = pakfire_gid(jail->pakfire);
1607
1608         // Fetch SUBGID
1609         const struct pakfire_subid* subgid = pakfire_subgid(jail->pakfire);
1610         if (!subgid)
1611                 return 1;
1612
1613         // Make path
1614         r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
1615         if (r)
1616                 return r;
1617
1618         // Running as root
1619         if (gid == 0) {
1620                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1621                         "0 %lu %lu\n", subgid->id, subgid->length);
1622         } else {
1623                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1624                         "0 %lu 1\n1 %lu %lu\n", gid, subgid->id, subgid->length);
1625         }
1626
1627         if (r) {
1628                 ERROR(jail->pakfire, "Could not map GIDs: %m\n");
1629                 return r;
1630         }
1631
1632         return r;
1633 }
1634
1635 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
1636         char path[PATH_MAX];
1637         int r;
1638
1639         // Make path
1640         r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
1641         if (r)
1642                 return r;
1643
1644         r = pakfire_file_write(jail->pakfire, path, 0, 0, 0, "deny\n");
1645         if (r) {
1646                 CTX_ERROR(jail->ctx, "Could not set setgroups to deny: %s\n", strerror(errno));
1647                 r = -errno;
1648         }
1649
1650         return r;
1651 }
1652
1653 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1654         const uint64_t val = 1;
1655         int r = 0;
1656
1657         DEBUG(jail->pakfire, "Sending signal...\n");
1658
1659         // Write to the file descriptor
1660         r = eventfd_write(fd, val);
1661         if (r < 0) {
1662                 ERROR(jail->pakfire, "Could not send signal: %s\n", strerror(errno));
1663                 r = -errno;
1664         }
1665
1666         // Close the file descriptor
1667         close(fd);
1668
1669         return r;
1670 }
1671
1672 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1673         uint64_t val = 0;
1674         int r = 0;
1675
1676         DEBUG(jail->pakfire, "Waiting for signal...\n");
1677
1678         r = eventfd_read(fd, &val);
1679         if (r < 0) {
1680                 ERROR(jail->pakfire, "Error waiting for signal: %s\n", strerror(errno));
1681                 r = -errno;
1682         }
1683
1684         // Close the file descriptor
1685         close(fd);
1686
1687         return r;
1688 }
1689
1690 static int pakfire_jail_switch_root(struct pakfire_jail* jail, const char* root) {
1691         int r;
1692
1693         // Change to the new root
1694         r = chdir(root);
1695         if (r) {
1696                 ERROR(jail->pakfire, "chdir(%s) failed: %m\n", root);
1697                 return r;
1698         }
1699
1700         // Switch Root!
1701         r = pivot_root(".", ".");
1702         if (r) {
1703                 ERROR(jail->pakfire, "Failed changing into the new root directory %s: %m\n", root);
1704                 return r;
1705         }
1706
1707         // Umount the old root
1708         r = umount2(".", MNT_DETACH);
1709         if (r) {
1710                 ERROR(jail->pakfire, "Could not umount the old root filesystem: %m\n");
1711                 return r;
1712         }
1713
1714         return 0;
1715 }
1716
1717 /*
1718         Called by the parent that sets up the second child process...
1719 */
1720 static int pakfire_jail_setup_child2(
1721                 struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1722         pid_t pid = -1;
1723         int r;
1724
1725         // Fetch the PID
1726         r = pidfd_get_pid(ctx->pidfd2, &pid);
1727         if (r) {
1728                 CTX_ERROR(jail->ctx, "Could not fetch PID: %s\n", strerror(-r));
1729                 return r;
1730         }
1731
1732         // Setup UID mapping
1733         r = pakfire_jail_setup_uid_mapping(jail, pid);
1734         if (r)
1735                 return r;
1736
1737         // Write "deny" to /proc/PID/setgroups
1738         r = pakfire_jail_setgroups(jail, pid);
1739         if (r)
1740                 return r;
1741
1742         // Setup GID mapping
1743         r = pakfire_jail_setup_gid_mapping(jail, pid);
1744         if (r)
1745                 return r;
1746
1747         // Parent has finished initialisation
1748         DEBUG(jail->pakfire, "Parent has finished initialization\n");
1749
1750         // Send signal to client
1751         r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1752         if (r)
1753                 return r;
1754
1755         return 0;
1756 }
1757
1758 /*
1759         Child 2 is launched in their own user/mount/etc. namespace.
1760 */
1761 static int pakfire_jail_child2(struct pakfire_jail* jail,
1762                 struct pakfire_jail_exec* ctx, const char* argv[]) {
1763         int r;
1764
1765         // Fetch my own PID
1766         pid_t pid = getpid();
1767
1768         CTX_DEBUG(jail->ctx, "Launched child process in jail with PID %d\n", pid);
1769
1770         // Make this process dumpable
1771         r = prctl (PR_SET_DUMPABLE, 1, 0, 0, 0);
1772         if (r) {
1773                 CTX_ERROR(jail->ctx, "Could not make the process dumpable: %m\n");
1774                 return 126;
1775         }
1776
1777         // Don't drop any capabilities on setuid()
1778         r = prctl(PR_SET_KEEPCAPS, 1);
1779         if (r) {
1780                 CTX_ERROR(jail->ctx, "Could not set PR_SET_KEEPCAPS: %m\n");
1781                 return 126;
1782         }
1783
1784         // Wait for the parent to finish initialization
1785         r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1786         if (r)
1787                 return r;
1788
1789         // Fetch UID/GID
1790         uid_t uid = getuid();
1791         gid_t gid = getgid();
1792
1793         // Fetch EUID/EGID
1794         uid_t euid = geteuid();
1795         gid_t egid = getegid();
1796
1797         DEBUG(jail->pakfire, "  UID: %u (effective %u)\n", uid, euid);
1798         DEBUG(jail->pakfire, "  GID: %u (effective %u)\n", gid, egid);
1799
1800         // Fail if we are not PID 1
1801         if (pid != 1) {
1802                 CTX_ERROR(jail->ctx, "Child process is not PID 1\n");
1803                 return 126;
1804         }
1805
1806         // Fail if we are not running as root
1807         if (uid || gid || euid || egid) {
1808                 ERROR(jail->pakfire, "Child process is not running as root\n");
1809                 return 126;
1810         }
1811
1812         const char* arch = pakfire_get_effective_arch(jail->pakfire);
1813
1814         // Set personality
1815         unsigned long persona = pakfire_arch_personality(arch);
1816         if (persona) {
1817                 r = personality(persona);
1818                 if (r < 0) {
1819                         ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1820                         return 126;
1821                 }
1822         }
1823
1824         // Setup networking
1825         if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1826                 r = pakfire_jail_setup_loopback(jail);
1827                 if (r)
1828                         return 1;
1829         }
1830
1831         // Set nice level
1832         if (jail->nice) {
1833                 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1834
1835                 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1836                 if (r) {
1837                         ERROR(jail->pakfire, "Could not set nice level: %m\n");
1838                         return 1;
1839                 }
1840         }
1841
1842         // Close other end of log pipes
1843         close(ctx->pipes.log_INFO[0]);
1844         close(ctx->pipes.log_ERROR[0]);
1845 #ifdef ENABLE_DEBUG
1846         close(ctx->pipes.log_DEBUG[0]);
1847 #endif /* ENABLE_DEBUG */
1848
1849         // Connect standard input
1850         if (ctx->pipes.stdin[0] >= 0) {
1851                 r = dup2(ctx->pipes.stdin[0], STDIN_FILENO);
1852                 if (r < 0) {
1853                         ERROR(jail->pakfire, "Could not connect fd %d to stdin: %m\n",
1854                                 ctx->pipes.stdin[0]);
1855
1856                         return 1;
1857                 }
1858         }
1859
1860         // Connect standard output and error
1861         if (ctx->pipes.stdout[1] >= 0 && ctx->pipes.stderr[1] >= 0) {
1862                 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1863                 if (r < 0) {
1864                         ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1865                                 ctx->pipes.stdout[1]);
1866
1867                         return 1;
1868                 }
1869
1870                 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1871                 if (r < 0) {
1872                         ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1873                                 ctx->pipes.stderr[1]);
1874
1875                         return 1;
1876                 }
1877
1878                 // Close the pipe (as we have moved the original file descriptors)
1879                 pakfire_jail_close_pipe(jail, ctx->pipes.stdin);
1880                 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1881                 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
1882         }
1883
1884         // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1885         r = pakfire_rlimit_reset_nofile(jail->pakfire);
1886         if (r)
1887                 return r;
1888
1889         // Set capabilities
1890         r = pakfire_jail_set_capabilities(jail);
1891         if (r)
1892                 return r;
1893
1894         // Show capabilities
1895         r = pakfire_jail_show_capabilities(jail);
1896         if (r)
1897                 return r;
1898
1899         // Filter syscalls
1900         r = pakfire_jail_limit_syscalls(jail);
1901         if (r)
1902                 return r;
1903
1904         CTX_DEBUG(jail->ctx, "Child process initialization done\n");
1905         CTX_DEBUG(jail->ctx, "Launching command:\n");
1906
1907         // Log argv
1908         for (unsigned int i = 0; argv[i]; i++)
1909                 CTX_DEBUG(jail->ctx, "  argv[%u] = %s\n", i, argv[i]);
1910
1911         // exec() command
1912         r = execvpe(argv[0], (char**)argv, jail->env);
1913         if (r < 0) {
1914                 // Translate errno into regular exit code
1915                 switch (errno) {
1916                         case ENOENT:
1917                                 // Ignore if the command doesn't exist
1918                                 if (ctx->flags & PAKFIRE_JAIL_NOENT_OK)
1919                                         r = 0;
1920                                 else
1921                                         r = 127;
1922
1923                                 break;
1924
1925                         default:
1926                                 r = 1;
1927                 }
1928
1929                 CTX_ERROR(jail->ctx, "Could not execve(%s): %m\n", argv[0]);
1930         }
1931
1932         // We should not get here
1933         return r;
1934 }
1935
1936 /*
1937         Child 1 is launched in a new mount namespace...
1938 */
1939 static int pakfire_jail_child1(struct pakfire_jail* jail,
1940                 struct pakfire_jail_exec* ctx, const char* argv[]) {
1941         int r;
1942
1943         // Redirect any logging to our log pipe
1944         pakfire_ctx_set_log_callback(jail->ctx, pakfire_jail_log_redirect, &ctx->pipes);
1945
1946         CTX_DEBUG(jail->ctx, "First child process launched\n");
1947
1948         const int socket_send = pakfire_jail_get_pipe_to_write(jail, &ctx->socket);
1949
1950         const char* root = pakfire_get_path(jail->pakfire);
1951
1952         // Die with parent
1953         r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
1954         if (r) {
1955                 CTX_ERROR(jail->ctx, "Could not configure to die with parent: %s\n", strerror(errno));
1956                 goto ERROR;
1957         }
1958
1959         // Change mount propagation so that we will receive, but don't propagate back
1960         r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SLAVE);
1961         if (r) {
1962                 CTX_ERROR(jail->ctx, "Could not change mount propagation to SLAVE: %s\n", strerror(r));
1963                 goto ERROR;
1964         }
1965
1966         // Make root a mountpoint in the new mount namespace
1967         r = pakfire_mount_make_mounpoint(jail->pakfire, root);
1968         if (r)
1969                 goto ERROR;
1970
1971         // Make everything private
1972         r = pakfire_mount_change_propagation(jail->ctx, root, MS_PRIVATE);
1973         if (r) {
1974                 CTX_ERROR(jail->ctx, "Could not change mount propagation to PRIVATE: %s\n", strerror(r));
1975                 goto ERROR;
1976         }
1977
1978         // Mount everything
1979         r = pakfire_jail_mount(jail, ctx);
1980         if (r)
1981                 goto ERROR;
1982
1983         // chroot()
1984         r = pakfire_jail_switch_root(jail, root);
1985         if (r)
1986                 goto ERROR;
1987
1988         // Change mount propagation so that we will propagate everything down
1989         r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SHARED);
1990         if (r) {
1991                 CTX_ERROR(jail->ctx, "Could not change mount propagation to SHARED: %s\n", strerror(r));
1992                 goto ERROR;
1993         }
1994
1995         // Configure child process
1996         struct clone_args args = {
1997                 .flags =
1998                         CLONE_NEWCGROUP |
1999                         CLONE_NEWIPC |
2000                         CLONE_NEWNS |
2001                         CLONE_NEWPID |
2002                         CLONE_NEWTIME |
2003                         CLONE_NEWUSER |
2004                         CLONE_NEWUTS |
2005                         CLONE_PIDFD,
2006                 .exit_signal = SIGCHLD,
2007                 .pidfd = (long long unsigned int)&ctx->pidfd2,
2008         };
2009
2010         // Launch the process into the configured cgroup
2011         if (ctx->cgroup) {
2012                 args.flags |= CLONE_INTO_CGROUP;
2013
2014                 // Clone into this cgroup
2015                 args.cgroup = pakfire_cgroup_fd(ctx->cgroup);
2016         }
2017
2018         // Setup networking
2019         if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING))
2020                 args.flags |= CLONE_NEWNET;
2021
2022         // Fork the second child process
2023         pid_t pid = clone3(&args, sizeof(args));
2024         if (pid < 0) {
2025                 CTX_ERROR(jail->ctx, "Could not fork the first child process: %s\n", strerror(errno));
2026                 r = -errno;
2027                 goto ERROR;
2028
2029         // Child process
2030         } else if (pid == 0) {
2031                 r = pakfire_jail_child2(jail, ctx, argv);
2032                 _exit(r);
2033         }
2034
2035         // Send the pidfd of the child to the first parent
2036         r = pakfire_jail_send_fd(jail, socket_send, ctx->pidfd2);
2037         if (r)
2038                 goto ERROR;
2039
2040 ERROR:
2041         return r;
2042 }
2043
2044 // Run a command in the jail
2045 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
2046                 const int interactive,
2047                 pakfire_jail_communicate_in  communicate_in,
2048                 pakfire_jail_communicate_out communicate_out,
2049                 void* data, int flags) {
2050         int r;
2051
2052         // Check if argv is valid
2053         if (!argv || !argv[0]) {
2054                 errno = EINVAL;
2055                 return -1;
2056         }
2057
2058         // Initialize context for this call
2059         struct pakfire_jail_exec ctx = {
2060                 .flags = flags,
2061
2062                 .socket = { -1, -1 },
2063
2064                 .pipes = {
2065                         .stdin     = { -1, -1 },
2066                         .stdout    = { -1, -1 },
2067                         .stderr    = { -1, -1 },
2068                         .log_INFO  = { -1, -1 },
2069                         .log_ERROR = { -1, -1 },
2070 #ifdef ENABLE_DEBUG
2071                         .log_DEBUG = { -1, -1 },
2072 #endif /* ENABLE_DEBUG */
2073                 },
2074
2075                 .communicate = {
2076                         .in   = communicate_in,
2077                         .out  = communicate_out,
2078                         .data = data,
2079                 },
2080
2081                 // PIDs
2082                 .pidfd1 = -1,
2083                 .pidfd2 = -1,
2084         };
2085
2086         DEBUG(jail->pakfire, "Executing jail...\n");
2087
2088         // Become the subreaper
2089         r = prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0);
2090         if (r < 0) {
2091                 CTX_ERROR(jail->ctx, "Failed to become the sub-reaper: %s\n", strerror(errno));
2092                 r = -errno;
2093                 goto ERROR;
2094         }
2095
2096         // Enable networking in interactive mode
2097         if (interactive)
2098                 ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
2099
2100         // Create a UNIX domain socket
2101         r = socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, ctx.socket);
2102         if (r < 0) {
2103                 CTX_ERROR(jail->ctx, "Could not create UNIX socket: %s\n", strerror(errno));
2104                 r = -errno;
2105                 goto ERROR;
2106         }
2107
2108         /*
2109                 Setup a file descriptor which can be used to notify the client that the parent
2110                 has completed configuration.
2111         */
2112         ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
2113         if (ctx.completed_fd < 0) {
2114                 ERROR(jail->pakfire, "eventfd() failed: %m\n");
2115                 return -1;
2116         }
2117
2118         // Create pipes to communicate with child process if we are not running interactively
2119         if (!interactive) {
2120                 // stdin (only if callback is set)
2121                 if (ctx.communicate.in) {
2122                         r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdin, 0);
2123                         if (r)
2124                                 goto ERROR;
2125                 }
2126
2127                 // stdout
2128                 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
2129                 if (r)
2130                         goto ERROR;
2131
2132                 // stderr
2133                 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
2134                 if (r)
2135                         goto ERROR;
2136         }
2137
2138         // Setup pipes for logging
2139         // INFO
2140         r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
2141         if (r)
2142                 goto ERROR;
2143
2144         // ERROR
2145         r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
2146         if (r)
2147                 goto ERROR;
2148
2149 #ifdef ENABLE_DEBUG
2150         // DEBUG
2151         r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
2152         if (r)
2153                 goto ERROR;
2154 #endif /* ENABLE_DEBUG */
2155
2156         // Launch the process in a cgroup that is a leaf of the configured cgroup
2157         if (jail->cgroup) {
2158                 // Fetch our UUID
2159                 const char* uuid = pakfire_jail_uuid(jail);
2160
2161                 // Create a temporary cgroup
2162                 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, uuid, 0);
2163                 if (r) {
2164                         ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
2165                         goto ERROR;
2166                 }
2167         }
2168
2169         /*
2170                 Initially, we will set up a new mount namespace and launch a child process in it.
2171
2172                 This process remains in the user/ipc/time/etc. namespace and will set up
2173                 the mount namespace.
2174         */
2175
2176         // Configure child process
2177         struct clone_args args = {
2178                 .flags =
2179                         CLONE_NEWNS |
2180                         CLONE_PIDFD |
2181                         CLONE_CLEAR_SIGHAND,
2182                 .exit_signal = SIGCHLD,
2183                 .pidfd = (long long unsigned int)&ctx.pidfd1,
2184         };
2185
2186         // Fork the first child process
2187         pid_t pid = clone3(&args, sizeof(args));
2188         if (pid < 0) {
2189                 CTX_ERROR(jail->ctx, "Could not fork the first child process: %s\n", strerror(errno));
2190                 r = -errno;
2191                 goto ERROR;
2192
2193         // Child process
2194         } else if (pid == 0) {
2195                 r = pakfire_jail_child1(jail, &ctx, argv);
2196                 _exit(r);
2197         }
2198
2199         // Parent process
2200         r = pakfire_jail_wait(jail, &ctx);
2201         if (r)
2202                 goto ERROR;
2203
2204 ERROR:
2205         // Destroy the temporary cgroup (if any)
2206         if (ctx.cgroup) {
2207                 // Read cgroup stats
2208                 pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
2209                 pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
2210                 pakfire_cgroup_destroy(ctx.cgroup);
2211                 pakfire_cgroup_unref(ctx.cgroup);
2212         }
2213
2214         // Close any file descriptors
2215         pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
2216         pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
2217         pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
2218         pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
2219         pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
2220 #ifdef ENABLE_DEBUG
2221         pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
2222 #endif /* ENABLE_DEBUG */
2223         if (ctx.pidfd1 >= 0)
2224                 close(ctx.pidfd1);
2225         if (ctx.pidfd2 >= 0)
2226                 close(ctx.pidfd2);
2227
2228         // Close sockets
2229         pakfire_jail_close_pipe(jail, ctx.socket);
2230
2231         return r;
2232 }
2233
2234 PAKFIRE_EXPORT int pakfire_jail_exec(
2235                 struct pakfire_jail* jail,
2236                 const char* argv[],
2237                 pakfire_jail_communicate_in  callback_in,
2238                 pakfire_jail_communicate_out callback_out,
2239                 void* data, int flags) {
2240         return __pakfire_jail_exec(jail, argv, 0, callback_in, callback_out, data, flags);
2241 }
2242
2243 static int pakfire_jail_exec_interactive(
2244                 struct pakfire_jail* jail, const char* argv[], int flags) {
2245         int r;
2246
2247         // Setup interactive stuff
2248         r = pakfire_jail_setup_interactive_env(jail);
2249         if (r)
2250                 return r;
2251
2252         return __pakfire_jail_exec(jail, argv, 1, NULL, NULL, NULL, flags);
2253 }
2254
2255 int pakfire_jail_exec_script(struct pakfire_jail* jail,
2256                 const char* script,
2257                 const size_t size,
2258                 const char* args[],
2259                 pakfire_jail_communicate_in  callback_in,
2260                 pakfire_jail_communicate_out callback_out,
2261                 void* data) {
2262         char path[PATH_MAX];
2263         const char** argv = NULL;
2264         FILE* f = NULL;
2265         int r;
2266
2267         const char* root = pakfire_get_path(jail->pakfire);
2268
2269         // Write the scriptlet to disk
2270         r = pakfire_path_append(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
2271         if (r)
2272                 goto ERROR;
2273
2274         // Create a temporary file
2275         f = pakfire_mktemp(path, 0700);
2276         if (!f) {
2277                 ERROR(jail->pakfire, "Could not create temporary file: %m\n");
2278                 goto ERROR;
2279         }
2280
2281         DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
2282
2283         // Write data
2284         r = fprintf(f, "%s", script);
2285         if (r < 0) {
2286                 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
2287                 goto ERROR;
2288         }
2289
2290         // Close file
2291         r = fclose(f);
2292         if (r) {
2293                 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
2294                 goto ERROR;
2295         }
2296
2297         f = NULL;
2298
2299         // Count how many arguments were passed
2300         unsigned int argc = 1;
2301         if (args) {
2302                 for (const char** arg = args; *arg; arg++)
2303                         argc++;
2304         }
2305
2306         argv = calloc(argc + 1, sizeof(*argv));
2307         if (!argv) {
2308                 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
2309                 goto ERROR;
2310         }
2311
2312         // Set command
2313         argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
2314
2315         // Copy args
2316         for (unsigned int i = 1; i < argc; i++)
2317                 argv[i] = args[i-1];
2318
2319         // Run the script
2320         r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data, 0);
2321
2322 ERROR:
2323         if (argv)
2324                 free(argv);
2325         if (f)
2326                 fclose(f);
2327
2328         // Remove script from disk
2329         if (*path)
2330                 unlink(path);
2331
2332         return r;
2333 }
2334
2335 /*
2336         A convenience function that creates a new jail, runs the given command and destroys
2337         the jail again.
2338 */
2339 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
2340         struct pakfire_jail* jail = NULL;
2341         int r;
2342
2343         // Create a new jail
2344         r = pakfire_jail_create(&jail, pakfire);
2345         if (r)
2346                 goto ERROR;
2347
2348         // Execute the command
2349         r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output, 0);
2350
2351 ERROR:
2352         if (jail)
2353                 pakfire_jail_unref(jail);
2354
2355         return r;
2356 }
2357
2358 int pakfire_jail_run_script(struct pakfire* pakfire,
2359                 const char* script, const size_t length, const char* argv[], int flags) {
2360         struct pakfire_jail* jail = NULL;
2361         int r;
2362
2363         // Create a new jail
2364         r = pakfire_jail_create(&jail, pakfire);
2365         if (r)
2366                 goto ERROR;
2367
2368         // Execute the command
2369         r = pakfire_jail_exec_script(jail, script, length, argv, NULL, NULL, NULL);
2370
2371 ERROR:
2372         if (jail)
2373                 pakfire_jail_unref(jail);
2374
2375         return r;
2376 }
2377
2378 int pakfire_jail_shell(struct pakfire_jail* jail) {
2379         int r;
2380
2381         const char* argv[] = {
2382                 "/bin/bash", "--login", NULL,
2383         };
2384
2385         // Execute /bin/bash
2386         r = pakfire_jail_exec_interactive(jail, argv, 0);
2387
2388         // Raise any errors
2389         if (r < 0)
2390                 return r;
2391
2392         // Ignore any return codes from the shell
2393         return 0;
2394 }
2395
2396 static int pakfire_jail_run_if_possible(struct pakfire* pakfire, const char** argv) {
2397         char path[PATH_MAX];
2398         int r;
2399
2400         r = pakfire_path(pakfire, path, "%s", *argv);
2401         if (r)
2402                 return r;
2403
2404         // Check if the file is executable
2405         r = access(path, X_OK);
2406         if (r) {
2407                 DEBUG(pakfire, "%s is not executable. Skipping...\n", *argv);
2408                 return 0;
2409         }
2410
2411         return pakfire_jail_run(pakfire, argv, 0, NULL);
2412 }
2413
2414 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
2415         const char* argv[] = {
2416                 "/sbin/ldconfig",
2417                 NULL,
2418         };
2419
2420         return pakfire_jail_run_if_possible(pakfire, argv);
2421 }
2422
2423 int pakfire_jail_run_systemd_tmpfiles(struct pakfire* pakfire) {
2424         const char* argv[] = {
2425                 "/usr/bin/systemd-tmpfiles",
2426                 "--create",
2427                 NULL,
2428         };
2429
2430         return pakfire_jail_run_if_possible(pakfire, argv);
2431 }