src/libpakfire/jail.c

   1 /*#############################################################################
   2 #                                                                             #
   3 # Pakfire - The IPFire package management system                              #
   4 # Copyright (C) 2022 Pakfire development team                                 #
   5 #                                                                             #
   6 # This program is free software: you can redistribute it and/or modify        #
   7 # it under the terms of the GNU General Public License as published by        #
   8 # the Free Software Foundation, either version 3 of the License, or           #
   9 # (at your option) any later version.                                         #
  10 #                                                                             #
  11 # This program is distributed in the hope that it will be useful,             #
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of              #
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
  14 # GNU General Public License for more details.                                #
  15 #                                                                             #
  16 # You should have received a copy of the GNU General Public License           #
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
  18 #                                                                             #
  19 #############################################################################*/
  20
  21 #include <errno.h>
  22 #include <fcntl.h>
  23 #include <linux/capability.h>
  24 #include <linux/sched.h>
  25 #include <sys/wait.h>
  26 #include <linux/wait.h>
  27 #include <sched.h>
  28 #include <signal.h>
  29 #include <stdlib.h>
  30 #include <syscall.h>
  31 #include <sys/capability.h>
  32 #include <sys/epoll.h>
  33 #include <sys/eventfd.h>
  34 #include <sys/mount.h>
  35 #include <sys/personality.h>
  36 #include <sys/prctl.h>
  37 #include <sys/resource.h>
  38 #include <sys/timerfd.h>
  39 #include <sys/types.h>
  40 #include <sys/wait.h>
  41
  42 // libnl3
  43 #include <net/if.h>
  44 #include <netlink/route/link.h>
  45
  46 // libseccomp
  47 #include <seccomp.h>
  48
  49 // libuuid
  50 #include <uuid.h>
  51
  52 #include <pakfire/arch.h>
  53 #include <pakfire/cgroup.h>
  54 #include <pakfire/jail.h>
  55 #include <pakfire/logging.h>
  56 #include <pakfire/mount.h>
  57 #include <pakfire/os.h>
  58 #include <pakfire/pakfire.h>
  59 #include <pakfire/path.h>
  60 #include <pakfire/private.h>
  61 #include <pakfire/pwd.h>
  62 #include <pakfire/string.h>
  63 #include <pakfire/util.h>
  64
  65 #define BUFFER_SIZE      1024 * 64
  66 #define ENVIRON_SIZE     128
  67 #define EPOLL_MAX_EVENTS 2
  68 #define MAX_MOUNTPOINTS  8
  69
  70 // The default environment that will be set for every command
  71 static const struct environ {
  72         const char* key;
  73         const char* val;
  74 } ENV[] = {
  75         { "HOME", "/root" },
  76         { "LANG", "C.utf-8" },
  77         { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
  78         { "TERM", "vt100" },
  79
  80         // Tell everything that it is running inside a Pakfire container
  81         { "container", "pakfire" },
  82         { NULL, NULL },
  83 };
  84
  85 struct pakfire_jail_mountpoint {
  86         char source[PATH_MAX];
  87         char target[PATH_MAX];
  88         int flags;
  89 };
  90
  91 struct pakfire_jail {
  92         struct pakfire_ctx* ctx;
  93         struct pakfire* pakfire;
  94         int nrefs;
  95
  96         // A unique ID for each jail
  97         uuid_t uuid;
  98         char __uuid[UUID_STR_LEN];
  99
 100         // Resource Limits
 101         int nice;
 102
 103         // Timeout
 104         struct itimerspec timeout;
 105
 106         // CGroup
 107         struct pakfire_cgroup* cgroup;
 108
 109         // Environment
 110         char* env[ENVIRON_SIZE];
 111
 112         // Mountpoints
 113         struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
 114         unsigned int num_mountpoints;
 115
 116         // Callbacks
 117         struct pakfire_jail_callbacks {
 118                 // Log
 119                 pakfire_jail_log_callback log;
 120                 void* log_data;
 121         } callbacks;
 122 };
 123
 124 struct pakfire_log_buffer {
 125         char data[BUFFER_SIZE];
 126         size_t used;
 127 };
 128
 129 struct pakfire_jail_exec {
 130         int flags;
 131
 132         // PIDs (of the children)
 133         int pidfd1;
 134         int pidfd2;
 135
 136         // Socket to pass FDs
 137         int socket[2];
 138
 139         // FD to notify the client that the parent has finished initialization
 140         int completed_fd;
 141
 142         // Log pipes
 143         struct pakfire_jail_pipes {
 144                 int stdin[2];
 145                 int stdout[2];
 146                 int stderr[2];
 147
 148                 // Logging
 149                 int log_INFO[2];
 150                 int log_ERROR[2];
 151 #ifdef ENABLE_DEBUG
 152                 int log_DEBUG[2];
 153 #endif /* ENABLE_DEBUG */
 154         } pipes;
 155
 156         // Communicate
 157         struct pakfire_jail_communicate {
 158                 pakfire_jail_communicate_in  in;
 159                 pakfire_jail_communicate_out out;
 160                 void* data;
 161         } communicate;
 162
 163         // Log buffers
 164         struct pakfire_jail_buffers {
 165                 struct pakfire_log_buffer stdout;
 166                 struct pakfire_log_buffer stderr;
 167
 168                 // Logging
 169                 struct pakfire_log_buffer log_INFO;
 170                 struct pakfire_log_buffer log_ERROR;
 171 #ifdef ENABLE_DEBUG
 172                 struct pakfire_log_buffer log_DEBUG;
 173 #endif /* ENABLE_DEBUG */
 174         } buffers;
 175
 176         struct pakfire_cgroup* cgroup;
 177         struct pakfire_cgroup_stats cgroup_stats;
 178 };
 179
 180 static int clone3(struct clone_args* args, size_t size) {
 181         return syscall(__NR_clone3, args, size);
 182 }
 183
 184 static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
 185         return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
 186 }
 187
 188 static int pivot_root(const char* new_root, const char* old_root) {
 189         return syscall(SYS_pivot_root, new_root, old_root);
 190 }
 191
 192 static int pakfire_jail_exec_has_flag(
 193                 const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
 194         return ctx->flags & flag;
 195 }
 196
 197 static void pakfire_jail_free(struct pakfire_jail* jail) {
 198         DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
 199
 200         // Free environment
 201         for (unsigned int i = 0; jail->env[i]; i++)
 202                 free(jail->env[i]);
 203
 204         if (jail->cgroup)
 205                 pakfire_cgroup_unref(jail->cgroup);
 206         if (jail->pakfire)
 207                 pakfire_unref(jail->pakfire);
 208         if (jail->ctx)
 209                 pakfire_ctx_unref(jail->ctx);
 210         free(jail);
 211 }
 212
 213 /*
 214         Passes any log messages on to the default pakfire log callback
 215 */
 216 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
 217                 int priority, const char* line, size_t length) {
 218         switch (priority) {
 219                 case LOG_INFO:
 220                         INFO(pakfire, "%s", line);
 221                         break;
 222
 223                 case LOG_ERR:
 224                         ERROR(pakfire, "%s", line);
 225                         break;
 226
 227 #ifdef ENABLE_DEBUG
 228                 case LOG_DEBUG:
 229                         DEBUG(pakfire, "%s", line);
 230                         break;
 231 #endif
 232         }
 233
 234         return 0;
 235 }
 236
 237 static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
 238         if (!*jail->__uuid)
 239                 uuid_unparse_lower(jail->uuid, jail->__uuid);
 240
 241         return jail->__uuid;
 242 }
 243
 244 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
 245         // Set PS1
 246         int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
 247         if (r)
 248                 return r;
 249
 250         // Copy TERM
 251         char* TERM = secure_getenv("TERM");
 252         if (TERM) {
 253                 r = pakfire_jail_set_env(jail, "TERM", TERM);
 254                 if (r)
 255                         return r;
 256         }
 257
 258         // Copy LANG
 259         char* LANG = secure_getenv("LANG");
 260         if (LANG) {
 261                 r = pakfire_jail_set_env(jail, "LANG", LANG);
 262                 if (r)
 263                         return r;
 264         }
 265
 266         return 0;
 267 }
 268
 269 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail, struct pakfire* pakfire) {
 270         int r;
 271
 272         const char* arch = pakfire_get_effective_arch(pakfire);
 273
 274         // Allocate a new jail
 275         struct pakfire_jail* j = calloc(1, sizeof(*j));
 276         if (!j)
 277                 return 1;
 278
 279         // Reference context
 280         j->ctx = pakfire_ctx(pakfire);
 281
 282         // Reference Pakfire
 283         j->pakfire = pakfire_ref(pakfire);
 284
 285         // Initialize reference counter
 286         j->nrefs = 1;
 287
 288         // Generate a random UUID
 289         uuid_generate_random(j->uuid);
 290
 291         DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
 292
 293         // Set the default logging callback
 294         pakfire_jail_set_log_callback(j, pakfire_jail_default_log_callback, NULL);
 295
 296         // Set default environment
 297         for (const struct environ* e = ENV; e->key; e++) {
 298                 r = pakfire_jail_set_env(j, e->key, e->val);
 299                 if (r)
 300                         goto ERROR;
 301         }
 302
 303         // Enable all CPU features that CPU has to offer
 304         if (!pakfire_arch_is_supported_by_host(arch)) {
 305                 r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
 306                 if (r)
 307                         goto ERROR;
 308         }
 309
 310         // Set container UUID
 311         r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
 312         if (r)
 313                 goto ERROR;
 314
 315         // Disable systemctl to talk to systemd
 316         if (!pakfire_on_root(j->pakfire)) {
 317                 r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
 318                 if (r)
 319                         goto ERROR;
 320         }
 321
 322         // Done
 323         *jail = j;
 324         return 0;
 325
 326 ERROR:
 327         pakfire_jail_free(j);
 328
 329         return r;
 330 }
 331
 332 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
 333         ++jail->nrefs;
 334
 335         return jail;
 336 }
 337
 338 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
 339         if (--jail->nrefs > 0)
 340                 return jail;
 341
 342         pakfire_jail_free(jail);
 343         return NULL;
 344 }
 345
 346 // Logging Callback
 347
 348 PAKFIRE_EXPORT void pakfire_jail_set_log_callback(struct pakfire_jail* jail,
 349                 pakfire_jail_log_callback callback, void* data) {
 350         jail->callbacks.log = callback;
 351         jail->callbacks.log_data = data;
 352 }
 353
 354 // Resource Limits
 355
 356 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
 357         // Check if nice level is in range
 358         if (nice < -19 || nice > 20) {
 359                 errno = EINVAL;
 360                 return 1;
 361         }
 362
 363         // Store nice level
 364         jail->nice = nice;
 365
 366         return 0;
 367 }
 368
 369 int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
 370         // Free any previous cgroup
 371         if (jail->cgroup) {
 372                 pakfire_cgroup_unref(jail->cgroup);
 373                 jail->cgroup = NULL;
 374         }
 375
 376         // Set any new cgroup
 377         if (cgroup) {
 378                 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
 379
 380                 jail->cgroup = pakfire_cgroup_ref(cgroup);
 381         }
 382
 383         // Done
 384         return 0;
 385 }
 386
 387 // Environment
 388
 389 // Returns the length of the environment
 390 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
 391         unsigned int i = 0;
 392
 393         // Count everything in the environment
 394         for (char** e = jail->env; *e; e++)
 395                 i++;
 396
 397         return i;
 398 }
 399
 400 // Finds an existing environment variable and returns its index or -1 if not found
 401 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
 402         if (!key) {
 403                 errno = EINVAL;
 404                 return -1;
 405         }
 406
 407         const size_t length = strlen(key);
 408
 409         for (unsigned int i = 0; jail->env[i]; i++) {
 410                 if ((pakfire_string_startswith(jail->env[i], key)
 411                                 && *(jail->env[i] + length) == '=')) {
 412                         return i;
 413                 }
 414         }
 415
 416         // Nothing found
 417         return -1;
 418 }
 419
 420 // Returns the value of an environment variable or NULL
 421 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
 422                 const char* key) {
 423         int i = pakfire_jail_find_env(jail, key);
 424         if (i < 0)
 425                 return NULL;
 426
 427         return jail->env[i] + strlen(key) + 1;
 428 }
 429
 430 // Sets an environment variable
 431 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
 432                 const char* key, const char* value) {
 433         // Find the index where to write this value to
 434         int i = pakfire_jail_find_env(jail, key);
 435         if (i < 0)
 436                 i = pakfire_jail_env_length(jail);
 437
 438         // Return -ENOSPC when the environment is full
 439         if (i >= ENVIRON_SIZE) {
 440                 errno = ENOSPC;
 441                 return -1;
 442         }
 443
 444         // Free any previous value
 445         if (jail->env[i])
 446                 free(jail->env[i]);
 447
 448         // Format and set environment variable
 449         asprintf(&jail->env[i], "%s=%s", key, value);
 450
 451         DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
 452
 453         return 0;
 454 }
 455
 456 // Imports an environment
 457 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
 458         if (!env)
 459                 return 0;
 460
 461         char* key;
 462         char* val;
 463         int r;
 464
 465         // Copy environment variables
 466         for (unsigned int i = 0; env[i]; i++) {
 467                 r = pakfire_string_partition(env[i], "=", &key, &val);
 468                 if (r)
 469                         continue;
 470
 471                 // Set value
 472                 r = pakfire_jail_set_env(jail, key, val);
 473
 474                 if (key)
 475                         free(key);
 476                 if (val)
 477                         free(val);
 478
 479                 // Break on error
 480                 if (r)
 481                         return r;
 482         }
 483
 484         return 0;
 485 }
 486
 487 // Timeout
 488
 489 PAKFIRE_EXPORT int pakfire_jail_set_timeout(
 490                 struct pakfire_jail* jail, unsigned int timeout) {
 491         // Store value
 492         jail->timeout.it_value.tv_sec = timeout;
 493
 494         if (timeout > 0)
 495                 DEBUG(jail->pakfire, "Timeout set to %u second(s)\n", timeout);
 496         else
 497                 DEBUG(jail->pakfire, "Timeout disabled\n");
 498
 499         return 0;
 500 }
 501
 502 static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
 503         int r;
 504
 505         // Nothing to do if no timeout has been set
 506         if (!jail->timeout.it_value.tv_sec)
 507                 return -1;
 508
 509         // Create a new timer
 510         const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
 511         if (fd < 0) {
 512                 ERROR(jail->pakfire, "Could not create timer: %m\n");
 513                 goto ERROR;
 514         }
 515
 516         // Arm timer
 517         r = timerfd_settime(fd, 0, &jail->timeout, NULL);
 518         if (r) {
 519                 ERROR(jail->pakfire, "Could not arm timer: %m\n");
 520                 goto ERROR;
 521         }
 522
 523         return fd;
 524
 525 ERROR:
 526         if (fd >= 0)
 527                 close(fd);
 528
 529         return -1;
 530 }
 531
 532 /*
 533         This function replaces any logging in the child process.
 534
 535         All log messages will be sent to the parent process through their respective pipes.
 536 */
 537 static void pakfire_jail_log_redirect(void* data, int priority, const char* file,
 538                 int line, const char* fn, const char* format, va_list args) {
 539         struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
 540         int fd;
 541
 542         switch (priority) {
 543                 case LOG_INFO:
 544                         fd = pipes->log_INFO[1];
 545                         break;
 546
 547                 case LOG_ERR:
 548                         fd = pipes->log_ERROR[1];
 549                         break;
 550
 551 #ifdef ENABLE_DEBUG
 552                 case LOG_DEBUG:
 553                         fd = pipes->log_DEBUG[1];
 554                         break;
 555 #endif /* ENABLE_DEBUG */
 556
 557                 // Ignore any messages of an unknown priority
 558                 default:
 559                         return;
 560         }
 561
 562         // Send the log message
 563         if (fd >= 0)
 564                 vdprintf(fd, format, args);
 565 }
 566
 567 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
 568         return (sizeof(buffer->data) == buffer->used);
 569 }
 570
 571 /*
 572         This function reads as much data as it can from the file descriptor.
 573         If it finds a whole line in it, it will send it to the logger and repeat the process.
 574         If not newline character is found, it will try to read more data until it finds one.
 575 */
 576 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
 577                 struct pakfire_jail_exec* ctx, int priority, int fd,
 578                 struct pakfire_log_buffer* buffer, pakfire_jail_communicate_out callback, void* data) {
 579         char line[BUFFER_SIZE + 1];
 580
 581         // Fill up buffer from fd
 582         if (buffer->used < sizeof(buffer->data)) {
 583                 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
 584                                 sizeof(buffer->data) - buffer->used);
 585
 586                 // Handle errors
 587                 if (bytes_read < 0) {
 588                         ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
 589                         return -1;
 590                 }
 591
 592                 // Update buffer size
 593                 buffer->used += bytes_read;
 594         }
 595
 596         // See if we have any lines that we can write
 597         while (buffer->used) {
 598                 // Search for the end of the first line
 599                 char* eol = memchr(buffer->data, '\n', buffer->used);
 600
 601                 // No newline found
 602                 if (!eol) {
 603                         // If the buffer is full, we send the content to the logger and try again
 604                         // This should not happen in practise
 605                         if (pakfire_jail_log_buffer_is_full(buffer)) {
 606                                 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
 607
 608                                 eol = buffer->data + sizeof(buffer->data) - 1;
 609
 610                         // Otherwise we might have only read parts of the output
 611                         } else
 612                                 break;
 613                 }
 614
 615                 // Find the length of the string
 616                 size_t length = eol - buffer->data + 1;
 617
 618                 // Copy the line into the buffer
 619                 memcpy(line, buffer->data, length);
 620
 621                 // Terminate the string
 622                 line[length] = '\0';
 623
 624                 // Log the line
 625                 if (callback) {
 626                         int r = callback(jail->pakfire, data, priority, line, length);
 627                         if (r) {
 628                                 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
 629                                 return r;
 630                         }
 631                 }
 632
 633                 // Remove line from buffer
 634                 memmove(buffer->data, buffer->data + length, buffer->used - length);
 635                 buffer->used -= length;
 636         }
 637
 638         return 0;
 639 }
 640
 641 static int pakfire_jail_stream_stdin(struct pakfire_jail* jail,
 642                 struct pakfire_jail_exec* ctx, const int fd) {
 643         int r;
 644
 645         // Nothing to do if there is no stdin callback set
 646         if (!ctx->communicate.in) {
 647                 DEBUG(jail->pakfire, "Callback for standard input is not set\n");
 648                 return 0;
 649         }
 650
 651         // Skip if the writing pipe has already been closed
 652         if (!ctx->pipes.stdin[1])
 653                 return 0;
 654
 655         DEBUG(jail->pakfire, "Streaming standard input...\n");
 656
 657         // Calling the callback
 658         r = ctx->communicate.in(jail->pakfire, ctx->communicate.data, fd);
 659
 660         DEBUG(jail->pakfire, "Standard input callback finished: %d\n", r);
 661
 662         // The callback signaled that it has written everything
 663         if (r == EOF) {
 664                 DEBUG(jail->pakfire, "Closing standard input pipe\n");
 665
 666                 // Close the file-descriptor
 667                 close(fd);
 668
 669                 // Reset the file-descriptor so it won't be closed again later
 670                 ctx->pipes.stdin[1] = -1;
 671
 672                 // Report success
 673                 r = 0;
 674         }
 675
 676         return r;
 677 }
 678
 679 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
 680         int r = pipe2(*fds, flags);
 681         if (r < 0) {
 682                 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
 683                 return 1;
 684         }
 685
 686         return 0;
 687 }
 688
 689 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
 690         for (unsigned int i = 0; i < 2; i++)
 691                 if (fds[i] >= 0)
 692                         close(fds[i]);
 693 }
 694
 695 /*
 696         This is a convenience function to fetch the reading end of a pipe and
 697         closes the write end.
 698 */
 699 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail* jail, int (*fds)[2]) {
 700         // Give the variables easier names to avoid confusion
 701         int* fd_read  = &(*fds)[0];
 702         int* fd_write = &(*fds)[1];
 703
 704         // Close the write end of the pipe
 705         if (*fd_write >= 0) {
 706                 close(*fd_write);
 707                 *fd_write = -1;
 708         }
 709
 710         // Return the read end
 711         if (*fd_read >= 0)
 712                 return *fd_read;
 713
 714         return -1;
 715 }
 716
 717 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
 718         // Give the variables easier names to avoid confusion
 719         int* fd_read  = &(*fds)[0];
 720         int* fd_write = &(*fds)[1];
 721
 722         // Close the read end of the pipe
 723         if (*fd_read >= 0) {
 724                 close(*fd_read);
 725                 *fd_read = -1;
 726         }
 727
 728         // Return the write end
 729         if (*fd_write >= 0)
 730                 return *fd_write;
 731
 732         return -1;
 733 }
 734
 735 static int pakfire_jail_recv_fd(struct pakfire_jail* jail, int socket, int* fd) {
 736         const size_t payload_length = sizeof(fd);
 737         char buffer[CMSG_SPACE(payload_length)];
 738         int r;
 739
 740         struct msghdr msg = {
 741                 .msg_control    = buffer,
 742                 .msg_controllen = sizeof(buffer),
 743         };
 744
 745         // Receive the message
 746         r = recvmsg(socket, &msg, 0);
 747         if (r) {
 748                 CTX_ERROR(jail->ctx, "Could not receive file descriptor: %s\n", strerror(errno));
 749                 return -errno;
 750         }
 751
 752         // Fetch the payload
 753         struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
 754         if (!cmsg)
 755                 return -EBADMSG;
 756
 757         *fd = *((int*)CMSG_DATA(cmsg));
 758
 759         CTX_DEBUG(jail->ctx, "Received fd %d from socket %d\n", *fd, socket);
 760
 761         return 0;
 762 }
 763
 764 static int pakfire_jail_send_fd(struct pakfire_jail* jail, int socket, int fd) {
 765         const size_t payload_length = sizeof(fd);
 766         char buffer[CMSG_SPACE(payload_length)];
 767         int r;
 768
 769         CTX_DEBUG(jail->ctx, "Sending fd %d to socket %d\n", fd, socket);
 770
 771         // Header
 772         struct msghdr msg = {
 773                 .msg_control    = buffer,
 774                 .msg_controllen = sizeof(buffer),
 775         };
 776
 777         // Payload
 778         struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
 779         cmsg->cmsg_level = SOL_SOCKET;
 780         cmsg->cmsg_type  = SCM_RIGHTS;
 781         cmsg->cmsg_len   = CMSG_LEN(payload_length);
 782
 783         // Set payload
 784         *((int*)CMSG_DATA(cmsg)) = fd;
 785
 786         // Send the message
 787         r = sendmsg(socket, &msg, 0);
 788         if (r) {
 789                 CTX_ERROR(jail->ctx, "Could not send file descriptor: %s\n", strerror(errno));
 790                 return -errno;
 791         }
 792
 793         return 0;
 794 }
 795
 796 static int pakfire_jail_log(struct pakfire* pakfire, void* data, int priority,
 797                 const char* line, const size_t length) {
 798         // Pass everything to the parent logger
 799         pakfire_log_condition(pakfire, priority, 0, "%.*s", (int)length, line);
 800
 801         return 0;
 802 }
 803
 804 static int pakfire_jail_epoll_add_fd(struct pakfire_jail* jail, int epollfd, int fd, int events) {
 805         struct epoll_event event = {
 806                 .events = events|EPOLLHUP,
 807                 .data   = {
 808                         .fd = fd,
 809                 },
 810         };
 811         int r;
 812
 813         // Read flags
 814         int flags = fcntl(fd, F_GETFL, 0);
 815
 816         // Set modified flags
 817         r  = fcntl(fd, F_SETFL, flags|O_NONBLOCK);
 818         if (r < 0) {
 819                 CTX_ERROR(jail->ctx, "Could not set file descriptor %d into non-blocking mode: %s\n",
 820                         fd, strerror(errno));
 821                 return -errno;
 822         }
 823
 824         // Add the file descriptor to the loop
 825         r = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
 826         if (r < 0) {
 827                 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %s\n",
 828                         fd, strerror(errno));
 829                 return -errno;
 830         }
 831
 832         return 0;
 833 }
 834
 835 static int pakfire_jail_setup_child2(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx);
 836
 837 static int pakfire_jail_wait_on_child(struct pakfire_jail* jail, int pidfd) {
 838         siginfo_t status = {};
 839         int r;
 840
 841         // Call waitid() and store the result
 842         r = waitid(P_PIDFD, pidfd, &status, WEXITED);
 843         if (r) {
 844                 CTX_ERROR(jail->ctx, "waitid() failed: %s\n", strerror(errno));
 845                 return -errno;
 846         }
 847
 848         switch (status.si_code) {
 849                 // If the process exited normally, we return the exit code
 850                 case CLD_EXITED:
 851                         CTX_DEBUG(jail->ctx, "The child process exited with code %d\n", status.si_status);
 852                         return status.si_status;
 853
 854                 case CLD_KILLED:
 855                         CTX_ERROR(jail->ctx, "The child process was killed\n");
 856                         return 139;
 857
 858                 case CLD_DUMPED:
 859                         CTX_ERROR(jail->ctx, "The child process terminated abnormally\n");
 860                         return 139;
 861
 862                 // Log anything else
 863                 default:
 864                         CTX_ERROR(jail->ctx, "Unknown child exit code: %d\n", status.si_code);
 865                         break;
 866         }
 867
 868         return -EBADMSG;
 869 }
 870
 871 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
 872         int epollfd = -1;
 873         struct epoll_event events[EPOLL_MAX_EVENTS];
 874         char garbage[8];
 875         int r = 0;
 876
 877         // Fetch the UNIX domain socket
 878         const int socket_recv = pakfire_jail_get_pipe_to_read(jail, &ctx->socket);
 879
 880         // Fetch file descriptors from context
 881         const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
 882         const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
 883         const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
 884
 885         // Timer
 886         const int timerfd = pakfire_jail_create_timer(jail);
 887
 888         // Logging
 889         const int log_INFO  = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
 890         const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
 891 #ifdef ENABLE_DEBUG
 892         const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
 893 #endif /* ENABLE_DEBUG */
 894
 895         // Make a list of all file descriptors we are interested in
 896         const struct pakfire_wait_fds {
 897                 const int fd;
 898                 const int events;
 899         } fds[] = {
 900                 { socket_recv, EPOLLIN },
 901
 902                 // Standard input/output
 903                 { stdin,  EPOLLOUT },
 904                 { stdout, EPOLLIN },
 905                 { stderr, EPOLLIN },
 906
 907                 // Timer
 908                 { timerfd, EPOLLIN },
 909
 910                 // Child Processes
 911                 { ctx->pidfd1, EPOLLIN },
 912
 913                 // Log Pipes
 914                 { log_INFO, EPOLLIN },
 915                 { log_ERROR, EPOLLIN },
 916 #ifdef ENABLE_DEBUG
 917                 { log_DEBUG, EPOLLIN },
 918 #endif /* ENABLE_DEBUG */
 919
 920                 // Sentinel
 921                 { -1, 0 },
 922         };
 923
 924         // Setup epoll
 925         epollfd = epoll_create1(0);
 926         if (epollfd < 0) {
 927                 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
 928                 r = 1;
 929                 goto ERROR;
 930         }
 931
 932         // Turn file descriptors into non-blocking mode and add them to epoll()
 933         for (const struct pakfire_wait_fds* fd = fds; fd->events; fd++) {
 934                 // Skip fds which were not initialized
 935                 if (fd->fd < 0)
 936                         continue;
 937
 938                 // Add the FD to the event loop
 939                 r = pakfire_jail_epoll_add_fd(jail, epollfd, fd->fd, fd->events);
 940                 if (r)
 941                         goto ERROR;
 942         }
 943
 944         int ended = 0;
 945         int exit = 0;
 946
 947         CTX_DEBUG(jail->ctx, "Launching main loop...\n");
 948
 949         // Loop for as long as the process is alive
 950         while (!ended) {
 951                 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
 952                 if (num < 1) {
 953                         // Ignore if epoll_wait() has been interrupted
 954                         if (errno == EINTR)
 955                                 continue;
 956
 957                         ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
 958                         r = 1;
 959
 960                         goto ERROR;
 961                 }
 962
 963                 for (int i = 0; i < num; i++) {
 964                         int e  = events[i].events;
 965                         int fd = events[i].data.fd;
 966
 967                         struct pakfire_log_buffer* buffer = NULL;
 968                         pakfire_jail_communicate_out callback = NULL;
 969                         void* data = NULL;
 970                         int priority;
 971
 972                         // Check if there is any data to be read
 973                         if (e & EPOLLIN) {
 974                                 // Monitor the first child process
 975                                 if (fd == ctx->pidfd1) {
 976                                         r = pakfire_jail_wait_on_child(jail, ctx->pidfd1);
 977                                         if (r) {
 978                                                 CTX_ERROR(jail->ctx, "The first child exited with an error\n");
 979                                                 goto ERROR;
 980                                         }
 981
 982                                         close(ctx->pidfd1);
 983                                         ctx->pidfd1 = -1;
 984
 985                                         continue;
 986
 987                                 // Monitor the second child process
 988                                 } else if (fd == ctx->pidfd2) {
 989                                         exit = pakfire_jail_wait_on_child(jail, ctx->pidfd2);
 990                                         if (exit < 0) {
 991                                                 CTX_ERROR(jail->ctx, "The second child exited with an error\n");
 992                                                 goto ERROR;
 993                                         }
 994
 995                                         close(ctx->pidfd2);
 996                                         ctx->pidfd2 = -1;
 997
 998                                         // Mark that we have ended so that we will process the remaining
 999                                         // events from epoll() now, but won't restart the outer loop.
1000                                         ended = 1;
1001
1002                                         continue;
1003
1004                                 // Handle timer events
1005                                 } else if (fd == timerfd) {
1006                                         DEBUG(jail->pakfire, "Timer event received\n");
1007
1008                                         // Disarm the timer
1009                                         r = read(timerfd, garbage, sizeof(garbage));
1010                                         if (r < 1) {
1011                                                 ERROR(jail->pakfire, "Could not disarm timer: %m\n");
1012                                                 r = 1;
1013                                                 goto ERROR;
1014                                         }
1015
1016                                         // Terminate the process if it hasn't already ended
1017                                         if (!ended) {
1018                                                 DEBUG(jail->pakfire, "Terminating process...\n");
1019
1020                                                 // Send SIGTERM to the process
1021                                                 r = pidfd_send_signal(ctx->pidfd2, SIGKILL, NULL, 0);
1022                                                 if (r) {
1023                                                         ERROR(jail->pakfire, "Could not kill process: %m\n");
1024                                                         goto ERROR;
1025                                                 }
1026                                         }
1027
1028                                         // There is nothing else to do
1029                                         continue;
1030
1031                                 // Handle socket messages
1032                                 } else if (fd == socket_recv) {
1033                                         // Receive the FD of the second child process
1034                                         r = pakfire_jail_recv_fd(jail, socket_recv, &ctx->pidfd2);
1035                                         if (r)
1036                                                 goto ERROR;
1037
1038                                         // Add it to the event loop
1039                                         r = pakfire_jail_epoll_add_fd(jail, epollfd, ctx->pidfd2, EPOLLIN);
1040                                         if (r)
1041                                                 goto ERROR;
1042
1043                                         // Setup the child process
1044                                         r = pakfire_jail_setup_child2(jail, ctx);
1045                                         if (r)
1046                                                 goto ERROR;
1047
1048                                         // Don't fall through to log processing
1049                                         continue;
1050
1051                                 // Handle logging messages
1052                                 } else if (fd == log_INFO) {
1053                                         buffer = &ctx->buffers.log_INFO;
1054                                         priority = LOG_INFO;
1055
1056                                         callback = pakfire_jail_log;
1057
1058                                 } else if (fd == log_ERROR) {
1059                                         buffer = &ctx->buffers.log_ERROR;
1060                                         priority = LOG_ERR;
1061
1062                                         callback = pakfire_jail_log;
1063
1064 #ifdef ENABLE_DEBUG
1065                                 } else if (fd == log_DEBUG) {
1066                                         buffer = &ctx->buffers.log_DEBUG;
1067                                         priority = LOG_DEBUG;
1068
1069                                         callback = pakfire_jail_log;
1070 #endif /* ENABLE_DEBUG */
1071
1072                                 // Handle anything from the log pipes
1073                                 } else if (fd == stdout) {
1074                                         buffer = &ctx->buffers.stdout;
1075                                         priority = LOG_INFO;
1076
1077                                         // Send any output to the default logger if no callback is set
1078                                         if (ctx->communicate.out) {
1079                                                 callback = ctx->communicate.out;
1080                                                 data     = ctx->communicate.data;
1081                                         } else {
1082                                                 callback = jail->callbacks.log;
1083                                                 data     = jail->callbacks.log_data;
1084                                         }
1085
1086                                 } else if (fd == stderr) {
1087                                         buffer = &ctx->buffers.stderr;
1088                                         priority = LOG_ERR;
1089
1090                                         // Send any output to the default logger if no callback is set
1091                                         if (ctx->communicate.out) {
1092                                                 callback = ctx->communicate.out;
1093                                                 data     = ctx->communicate.data;
1094                                         } else {
1095                                                 callback = jail->callbacks.log;
1096                                                 data     = jail->callbacks.log_data;
1097                                         }
1098
1099                                 } else {
1100                                         DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
1101                                         continue;
1102                                 }
1103
1104                                 // Handle log event
1105                                 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
1106                                 if (r)
1107                                         goto ERROR;
1108                         }
1109
1110                         if (e & EPOLLOUT) {
1111                                 // Handle standard input
1112                                 if (fd == stdin) {
1113                                         r = pakfire_jail_stream_stdin(jail, ctx, fd);
1114                                         if (r) {
1115                                                 switch (errno) {
1116                                                         // Ignore if we filled up the buffer
1117                                                         case EAGAIN:
1118                                                                 break;
1119
1120                                                         default:
1121                                                                 ERROR(jail->pakfire, "Could not write to stdin: %m\n");
1122                                                                 goto ERROR;
1123                                                 }
1124                                         }
1125                                 }
1126                         }
1127
1128                         // Check if any file descriptors have been closed
1129                         if (e & EPOLLHUP) {
1130                                 // Remove the file descriptor
1131                                 r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
1132                                 if (r) {
1133                                         ERROR(jail->pakfire, "Could not remove closed file-descriptor %d: %m\n", fd);
1134                                         goto ERROR;
1135                                 }
1136                         }
1137                 }
1138         }
1139
1140         // Return the exit code
1141         r = exit;
1142
1143 ERROR:
1144         CTX_DEBUG(jail->ctx, "Main loop terminated\n");
1145
1146         if (epollfd >= 0)
1147                 close(epollfd);
1148         if (timerfd >= 0)
1149                 close(timerfd);
1150
1151         return r;
1152 }
1153
1154 int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data,
1155                 int priority, const char* line, size_t length) {
1156         char** output = (char**)data;
1157         int r;
1158
1159         // Append everything from stdout to a buffer
1160         if (output && priority == LOG_INFO) {
1161                 r = asprintf(output, "%s%s", (output && *output) ? *output : "", line);
1162                 if (r < 0)
1163                         return 1;
1164                 return 0;
1165         }
1166
1167         // Send everything else to the default logger
1168         return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
1169 }
1170
1171 // Capabilities
1172
1173 // Logs all capabilities of the current process
1174 static int pakfire_jail_show_capabilities(struct pakfire_jail* jail) {
1175         cap_t caps = NULL;
1176         char* name = NULL;
1177         cap_flag_value_t value_e;
1178         cap_flag_value_t value_i;
1179         cap_flag_value_t value_p;
1180         int r;
1181
1182         // Fetch PID
1183         pid_t pid = getpid();
1184
1185         // Fetch all capabilities
1186         caps = cap_get_proc();
1187         if (!caps) {
1188                 ERROR(jail->pakfire, "Could not fetch capabilities: %m\n");
1189                 r = 1;
1190                 goto ERROR;
1191         }
1192
1193         DEBUG(jail->pakfire, "Capabilities of PID %d:\n", pid);
1194
1195         // Iterate over all capabilities
1196         for (unsigned int cap = 0; cap_valid(cap); cap++) {
1197                 name = cap_to_name(cap);
1198
1199                 // Fetch effective value
1200                 r = cap_get_flag(caps, cap, CAP_EFFECTIVE, &value_e);
1201                 if (r)
1202                         goto ERROR;
1203
1204                 // Fetch inheritable value
1205                 r = cap_get_flag(caps, cap, CAP_INHERITABLE, &value_i);
1206                 if (r)
1207                         goto ERROR;
1208
1209                 // Fetch permitted value
1210                 r = cap_get_flag(caps, cap, CAP_PERMITTED, &value_p);
1211                 if (r)
1212                         goto ERROR;
1213
1214                 DEBUG(jail->pakfire,
1215                         "  %-24s : %c%c%c\n",
1216                         name,
1217                         (value_e == CAP_SET) ? 'e' : '-',
1218                         (value_i == CAP_SET) ? 'i' : '-',
1219                         (value_p == CAP_SET) ? 'p' : '-'
1220                 );
1221
1222                 // Free name
1223                 cap_free(name);
1224                 name = NULL;
1225         }
1226
1227         // Success
1228         r = 0;
1229
1230 ERROR:
1231         if (name)
1232                 cap_free(name);
1233         if (caps)
1234                 cap_free(caps);
1235
1236         return r;
1237 }
1238
1239 static int pakfire_jail_set_capabilities(struct pakfire_jail* jail) {
1240         cap_t caps = NULL;
1241         char* name = NULL;
1242         int r;
1243
1244         // Fetch capabilities
1245         caps = cap_get_proc();
1246         if (!caps) {
1247                 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
1248                 r = 1;
1249                 goto ERROR;
1250         }
1251
1252         // Walk through all capabilities
1253         for (cap_value_t cap = 0; cap_valid(cap); cap++) {
1254                 cap_value_t _caps[] = { cap };
1255
1256                 // Fetch the name of the capability
1257                 name = cap_to_name(cap);
1258
1259                 r = cap_set_flag(caps, CAP_EFFECTIVE, 1, _caps, CAP_SET);
1260                 if (r) {
1261                         ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1262                         goto ERROR;
1263                 }
1264
1265                 r = cap_set_flag(caps, CAP_INHERITABLE, 1, _caps, CAP_SET);
1266                 if (r) {
1267                         ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1268                         goto ERROR;
1269                 }
1270
1271                 r = cap_set_flag(caps, CAP_PERMITTED, 1, _caps, CAP_SET);
1272                 if (r) {
1273                         ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1274                         goto ERROR;
1275                 }
1276
1277                 // Free name
1278                 cap_free(name);
1279                 name = NULL;
1280         }
1281
1282         // Restore all capabilities
1283         r = cap_set_proc(caps);
1284         if (r) {
1285                 ERROR(jail->pakfire, "Restoring capabilities failed: %m\n");
1286                 goto ERROR;
1287         }
1288
1289         // Add all capabilities to the ambient set
1290         for (unsigned int cap = 0; cap_valid(cap); cap++) {
1291                 name = cap_to_name(cap);
1292
1293                 // Raise the capability
1294                 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0);
1295                 if (r) {
1296                         ERROR(jail->pakfire, "Could not set ambient capability %s: %m\n", name);
1297                         goto ERROR;
1298                 }
1299
1300                 // Free name
1301                 cap_free(name);
1302                 name = NULL;
1303         }
1304
1305         // Success
1306         r = 0;
1307
1308 ERROR:
1309         if (name)
1310                 cap_free(name);
1311         if (caps)
1312                 cap_free(caps);
1313
1314         return r;
1315 }
1316
1317 // Syscall Filter
1318
1319 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
1320         const int syscalls[] = {
1321                 // The kernel's keyring isn't namespaced
1322                 SCMP_SYS(keyctl),
1323                 SCMP_SYS(add_key),
1324                 SCMP_SYS(request_key),
1325
1326                 // Disable userfaultfd
1327                 SCMP_SYS(userfaultfd),
1328
1329                 // Disable perf which could leak a lot of information about the host
1330                 SCMP_SYS(perf_event_open),
1331
1332                 0,
1333         };
1334         int r = 1;
1335
1336         DEBUG(jail->pakfire, "Applying syscall filter...\n");
1337
1338         // Setup a syscall filter which allows everything by default
1339         scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
1340         if (!ctx) {
1341                 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
1342                 goto ERROR;
1343         }
1344
1345         // All all syscalls
1346         for (const int* syscall = syscalls; *syscall; syscall++) {
1347                 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
1348                 if (r) {
1349                         ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
1350                         goto ERROR;
1351                 }
1352         }
1353
1354         // Load syscall filter into the kernel
1355         r = seccomp_load(ctx);
1356         if (r) {
1357                 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
1358                 goto ERROR;
1359         }
1360
1361 ERROR:
1362         if (ctx)
1363                 seccomp_release(ctx);
1364
1365         return r;
1366 }
1367
1368 // Mountpoints
1369
1370 PAKFIRE_EXPORT int pakfire_jail_bind(struct pakfire_jail* jail,
1371                 const char* source, const char* target, int flags) {
1372         struct pakfire_jail_mountpoint* mp = NULL;
1373         int r;
1374
1375         // Check if there is any space left
1376         if (jail->num_mountpoints >= MAX_MOUNTPOINTS) {
1377                 errno = ENOSPC;
1378                 return 1;
1379         }
1380
1381         // Check for valid inputs
1382         if (!source || !target) {
1383                 errno = EINVAL;
1384                 return 1;
1385         }
1386
1387         // Select the next free slot
1388         mp = &jail->mountpoints[jail->num_mountpoints];
1389
1390         // Copy source
1391         r = pakfire_string_set(mp->source, source);
1392         if (r) {
1393                 ERROR(jail->pakfire, "Could not copy source: %m\n");
1394                 return r;
1395         }
1396
1397         // Copy target
1398         r = pakfire_string_set(mp->target, target);
1399         if (r) {
1400                 ERROR(jail->pakfire, "Could not copy target: %m\n");
1401                 return r;
1402         }
1403
1404         // Copy flags
1405         mp->flags = flags;
1406
1407         // Increment counter
1408         jail->num_mountpoints++;
1409
1410         return 0;
1411 }
1412
1413 static int pakfire_jail_mount_networking(struct pakfire_jail* jail) {
1414         int r;
1415
1416         const char* paths[] = {
1417                 "/etc/hosts",
1418                 "/etc/resolv.conf",
1419                 NULL,
1420         };
1421
1422         // Bind-mount all paths read-only
1423         for (const char** path = paths; *path; path++) {
1424                 r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
1425                 if (r) {
1426                         switch (errno) {
1427                                 // Ignore if we don't have permission
1428                                 case EPERM:
1429                                         continue;
1430
1431                                 default:
1432                                         break;
1433                         }
1434                         return r;
1435                 }
1436         }
1437
1438         return 0;
1439 }
1440
1441 /*
1442         Mounts everything that we require in the new namespace
1443 */
1444 static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1445         struct pakfire_jail_mountpoint* mp = NULL;
1446         int flags = 0;
1447         int r;
1448
1449         // Enable loop devices
1450         if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_LOOP_DEVICES))
1451                 flags |= PAKFIRE_MOUNT_LOOP_DEVICES;
1452
1453         // Mount all default stuff
1454         r = pakfire_mount_all(jail->pakfire, flags);
1455         if (r)
1456                 return r;
1457
1458         // Mount networking stuff
1459         if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1460                 r = pakfire_jail_mount_networking(jail);
1461                 if (r)
1462                         return r;
1463         }
1464
1465         // Mount all custom stuff
1466         for (unsigned int i = 0; i < jail->num_mountpoints; i++) {
1467                 // Fetch mountpoint
1468                 mp = &jail->mountpoints[i];
1469
1470                 // Mount it
1471                 r = pakfire_bind(jail->pakfire, mp->source, mp->target, mp->flags);
1472                 if (r)
1473                         return r;
1474         }
1475
1476         // Log all mountpoints
1477         pakfire_mount_list(jail->pakfire);
1478
1479         return 0;
1480 }
1481
1482 // Networking
1483
1484 static int pakfire_jail_setup_loopback(struct pakfire_jail* jail) {
1485         struct nl_sock* nl = NULL;
1486         struct nl_cache* cache = NULL;
1487         struct rtnl_link* link = NULL;
1488         struct rtnl_link* change = NULL;
1489         int r;
1490
1491         DEBUG(jail->pakfire, "Setting up loopback...\n");
1492
1493         // Allocate a netlink socket
1494         nl = nl_socket_alloc();
1495         if (!nl) {
1496                 ERROR(jail->pakfire, "Could not allocate a netlink socket: %m\n");
1497                 r = 1;
1498                 goto ERROR;
1499         }
1500
1501         // Connect the socket
1502         r = nl_connect(nl, NETLINK_ROUTE);
1503         if (r) {
1504                 ERROR(jail->pakfire, "Could not connect netlink socket: %s\n", nl_geterror(r));
1505                 goto ERROR;
1506         }
1507
1508         // Allocate the netlink cache
1509         r = rtnl_link_alloc_cache(nl, AF_UNSPEC, &cache);
1510         if (r < 0) {
1511                 ERROR(jail->pakfire, "Unable to allocate netlink cache: %s\n", nl_geterror(r));
1512                 goto ERROR;
1513         }
1514
1515         // Fetch loopback interface
1516         link = rtnl_link_get_by_name(cache, "lo");
1517         if (!link) {
1518                 ERROR(jail->pakfire, "Could not find lo interface. Ignoring.\n");
1519                 r = 0;
1520                 goto ERROR;
1521         }
1522
1523         // Allocate a new link
1524         change = rtnl_link_alloc();
1525         if (!change) {
1526                 ERROR(jail->pakfire, "Could not allocate change link\n");
1527                 r = 1;
1528                 goto ERROR;
1529         }
1530
1531         // Set the link to UP
1532         rtnl_link_set_flags(change, IFF_UP);
1533
1534         // Apply any changes
1535         r = rtnl_link_change(nl, link, change, 0);
1536         if (r) {
1537                 ERROR(jail->pakfire, "Unable to activate loopback: %s\n", nl_geterror(r));
1538                 goto ERROR;
1539         }
1540
1541         // Success
1542         r = 0;
1543
1544 ERROR:
1545         if (nl)
1546                 nl_socket_free(nl);
1547
1548         return r;
1549 }
1550
1551 // UID/GID Mapping
1552
1553 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
1554         char path[PATH_MAX];
1555         int r;
1556
1557         // Skip mapping anything when running on /
1558         if (pakfire_on_root(jail->pakfire))
1559                 return 0;
1560
1561         // Make path
1562         r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
1563         if (r)
1564                 return r;
1565
1566         // Fetch UID
1567         const uid_t uid = pakfire_uid(jail->pakfire);
1568
1569         // Fetch SUBUID
1570         const struct pakfire_subid* subuid = pakfire_subuid(jail->pakfire);
1571         if (!subuid)
1572                 return 1;
1573
1574         /* When running as root, we will map the entire range.
1575
1576            When running as a non-privileged user, we will map the root user inside the jail
1577            to the user's UID outside of the jail, and we will map the rest starting from one.
1578         */
1579
1580         // Running as root
1581         if (uid == 0) {
1582                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1583                         "0 %lu %lu\n", subuid->id, subuid->length);
1584         } else {
1585                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1586                         "0 %lu 1\n1 %lu %lu\n", uid, subuid->id, subuid->length);
1587         }
1588
1589         if (r) {
1590                 ERROR(jail->pakfire, "Could not map UIDs: %m\n");
1591                 return r;
1592         }
1593
1594         return r;
1595 }
1596
1597 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
1598         char path[PATH_MAX];
1599         int r;
1600
1601         // Skip mapping anything when running on /
1602         if (pakfire_on_root(jail->pakfire))
1603                 return 0;
1604
1605         // Fetch GID
1606         const gid_t gid = pakfire_gid(jail->pakfire);
1607
1608         // Fetch SUBGID
1609         const struct pakfire_subid* subgid = pakfire_subgid(jail->pakfire);
1610         if (!subgid)
1611                 return 1;
1612
1613         // Make path
1614         r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
1615         if (r)
1616                 return r;
1617
1618         // Running as root
1619         if (gid == 0) {
1620                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1621                         "0 %lu %lu\n", subgid->id, subgid->length);
1622         } else {
1623                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1624                         "0 %lu 1\n1 %lu %lu\n", gid, subgid->id, subgid->length);
1625         }
1626
1627         if (r) {
1628                 ERROR(jail->pakfire, "Could not map GIDs: %m\n");
1629                 return r;
1630         }
1631
1632         return r;
1633 }
1634
1635 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
1636         char path[PATH_MAX];
1637         int r = 1;
1638
1639         // Make path
1640         r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
1641         if (r)
1642                 return r;
1643
1644         // Open file for writing
1645         FILE* f = fopen(path, "w");
1646         if (!f) {
1647                 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
1648                 goto ERROR;
1649         }
1650
1651         // Write content
1652         int bytes_written = fprintf(f, "deny\n");
1653         if (bytes_written <= 0) {
1654                 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
1655                 goto ERROR;
1656         }
1657
1658         r = fclose(f);
1659         f = NULL;
1660         if (r) {
1661                 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
1662                 goto ERROR;
1663         }
1664
1665 ERROR:
1666         if (f)
1667                 fclose(f);
1668
1669         return r;
1670 }
1671
1672 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1673         const uint64_t val = 1;
1674         int r = 0;
1675
1676         DEBUG(jail->pakfire, "Sending signal...\n");
1677
1678         // Write to the file descriptor
1679         r = eventfd_write(fd, val);
1680         if (r < 0) {
1681                 ERROR(jail->pakfire, "Could not send signal: %s\n", strerror(errno));
1682                 r = -errno;
1683         }
1684
1685         // Close the file descriptor
1686         close(fd);
1687
1688         return r;
1689 }
1690
1691 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1692         uint64_t val = 0;
1693         int r = 0;
1694
1695         DEBUG(jail->pakfire, "Waiting for signal...\n");
1696
1697         r = eventfd_read(fd, &val);
1698         if (r < 0) {
1699                 ERROR(jail->pakfire, "Error waiting for signal: %s\n", strerror(errno));
1700                 r = -errno;
1701         }
1702
1703         // Close the file descriptor
1704         close(fd);
1705
1706         return r;
1707 }
1708
1709 static int pakfire_jail_switch_root(struct pakfire_jail* jail, const char* root) {
1710         int r;
1711
1712         // Change to the new root
1713         r = chdir(root);
1714         if (r) {
1715                 ERROR(jail->pakfire, "chdir(%s) failed: %m\n", root);
1716                 return r;
1717         }
1718
1719         // Switch Root!
1720         r = pivot_root(".", ".");
1721         if (r) {
1722                 ERROR(jail->pakfire, "Failed changing into the new root directory %s: %m\n", root);
1723                 return r;
1724         }
1725
1726         // Umount the old root
1727         r = umount2(".", MNT_DETACH);
1728         if (r) {
1729                 ERROR(jail->pakfire, "Could not umount the old root filesystem: %m\n");
1730                 return r;
1731         }
1732
1733         return 0;
1734 }
1735
1736 /*
1737         Called by the parent that sets up the second child process...
1738 */
1739 static int pakfire_jail_setup_child2(
1740                 struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1741         pid_t pid = -1;
1742         int r;
1743
1744         // Fetch the PID
1745         r = pidfd_get_pid(ctx->pidfd2, &pid);
1746         if (r) {
1747                 CTX_ERROR(jail->ctx, "Could not fetch PID: %s\n", strerror(-r));
1748                 return r;
1749         }
1750
1751         // Setup UID mapping
1752         r = pakfire_jail_setup_uid_mapping(jail, pid);
1753         if (r)
1754                 return r;
1755
1756         // Write "deny" to /proc/PID/setgroups
1757         r = pakfire_jail_setgroups(jail, pid);
1758         if (r)
1759                 return r;
1760
1761         // Setup GID mapping
1762         r = pakfire_jail_setup_gid_mapping(jail, pid);
1763         if (r)
1764                 return r;
1765
1766         // Parent has finished initialisation
1767         DEBUG(jail->pakfire, "Parent has finished initialization\n");
1768
1769         // Send signal to client
1770         r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1771         if (r)
1772                 return r;
1773
1774         return 0;
1775 }
1776
1777 /*
1778         Child 2 is launched in their own user/mount/etc. namespace.
1779 */
1780 static int pakfire_jail_child2(struct pakfire_jail* jail,
1781                 struct pakfire_jail_exec* ctx, const char* argv[]) {
1782         int r;
1783
1784         // Fetch my own PID
1785         pid_t pid = getpid();
1786
1787         CTX_DEBUG(jail->ctx, "Launched child process in jail with PID %d\n", pid);
1788
1789         // Die with parent
1790         r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
1791         if (r) {
1792                 CTX_ERROR(jail->ctx, "Could not configure to die with parent: %m\n");
1793                 return 126;
1794         }
1795
1796         // Make this process dumpable
1797         r = prctl (PR_SET_DUMPABLE, 1, 0, 0, 0);
1798         if (r) {
1799                 CTX_ERROR(jail->ctx, "Could not make the process dumpable: %m\n");
1800                 return 126;
1801         }
1802
1803         // Don't drop any capabilities on setuid()
1804         r = prctl(PR_SET_KEEPCAPS, 1);
1805         if (r) {
1806                 CTX_ERROR(jail->ctx, "Could not set PR_SET_KEEPCAPS: %m\n");
1807                 return 126;
1808         }
1809
1810         // Wait for the parent to finish initialization
1811         r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1812         if (r)
1813                 return r;
1814
1815         // Fetch UID/GID
1816         uid_t uid = getuid();
1817         gid_t gid = getgid();
1818
1819         // Fetch EUID/EGID
1820         uid_t euid = geteuid();
1821         gid_t egid = getegid();
1822
1823         DEBUG(jail->pakfire, "  UID: %u (effective %u)\n", uid, euid);
1824         DEBUG(jail->pakfire, "  GID: %u (effective %u)\n", gid, egid);
1825
1826         // Fail if we are not PID 1
1827         if (pid != 1) {
1828                 CTX_ERROR(jail->ctx, "Child process is not PID 1\n");
1829                 //return 126;
1830         }
1831
1832         // Fail if we are not running as root
1833         if (uid || gid || euid || egid) {
1834                 ERROR(jail->pakfire, "Child process is not running as root\n");
1835                 //return 126;
1836         }
1837
1838         const char* arch = pakfire_get_effective_arch(jail->pakfire);
1839
1840         // Set personality
1841         unsigned long persona = pakfire_arch_personality(arch);
1842         if (persona) {
1843                 r = personality(persona);
1844                 if (r < 0) {
1845                         ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1846                         return 126;
1847                 }
1848         }
1849
1850         // Setup networking
1851         if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1852                 r = pakfire_jail_setup_loopback(jail);
1853                 if (r)
1854                         return 1;
1855         }
1856
1857         // Set nice level
1858         if (jail->nice) {
1859                 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1860
1861                 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1862                 if (r) {
1863                         ERROR(jail->pakfire, "Could not set nice level: %m\n");
1864                         return 1;
1865                 }
1866         }
1867
1868         // Close other end of log pipes
1869         close(ctx->pipes.log_INFO[0]);
1870         close(ctx->pipes.log_ERROR[0]);
1871 #ifdef ENABLE_DEBUG
1872         close(ctx->pipes.log_DEBUG[0]);
1873 #endif /* ENABLE_DEBUG */
1874
1875         // Connect standard input
1876         if (ctx->pipes.stdin[0] >= 0) {
1877                 r = dup2(ctx->pipes.stdin[0], STDIN_FILENO);
1878                 if (r < 0) {
1879                         ERROR(jail->pakfire, "Could not connect fd %d to stdin: %m\n",
1880                                 ctx->pipes.stdin[0]);
1881
1882                         return 1;
1883                 }
1884         }
1885
1886         // Connect standard output and error
1887         if (ctx->pipes.stdout[1] >= 0 && ctx->pipes.stderr[1] >= 0) {
1888                 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1889                 if (r < 0) {
1890                         ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1891                                 ctx->pipes.stdout[1]);
1892
1893                         return 1;
1894                 }
1895
1896                 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1897                 if (r < 0) {
1898                         ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1899                                 ctx->pipes.stderr[1]);
1900
1901                         return 1;
1902                 }
1903
1904                 // Close the pipe (as we have moved the original file descriptors)
1905                 pakfire_jail_close_pipe(jail, ctx->pipes.stdin);
1906                 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1907                 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
1908         }
1909
1910         // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1911         r = pakfire_rlimit_reset_nofile(jail->pakfire);
1912         if (r)
1913                 return r;
1914
1915         // Set capabilities
1916         r = pakfire_jail_set_capabilities(jail);
1917         if (r)
1918                 return r;
1919
1920         // Show capabilities
1921         r = pakfire_jail_show_capabilities(jail);
1922         if (r)
1923                 return r;
1924
1925         // Filter syscalls
1926         r = pakfire_jail_limit_syscalls(jail);
1927         if (r)
1928                 return r;
1929
1930         CTX_DEBUG(jail->ctx, "Child process initialization done\n");
1931         CTX_DEBUG(jail->ctx, "Launching command:\n");
1932
1933         // Log argv
1934         for (unsigned int i = 0; argv[i]; i++)
1935                 CTX_DEBUG(jail->ctx, "  argv[%u] = %s\n", i, argv[i]);
1936
1937         // exec() command
1938         r = execvpe(argv[0], (char**)argv, jail->env);
1939         if (r < 0) {
1940                 // Translate errno into regular exit code
1941                 switch (errno) {
1942                         case ENOENT:
1943                                 // Ignore if the command doesn't exist
1944                                 if (ctx->flags & PAKFIRE_JAIL_NOENT_OK)
1945                                         r = 0;
1946                                 else
1947                                         r = 127;
1948
1949                                 break;
1950
1951                         default:
1952                                 r = 1;
1953                 }
1954
1955                 CTX_ERROR(jail->ctx, "Could not execve(%s): %m\n", argv[0]);
1956         }
1957
1958         // We should not get here
1959         return r;
1960 }
1961
1962 /*
1963         Child 1 is launched in a new mount namespace...
1964 */
1965 static int pakfire_jail_child1(struct pakfire_jail* jail,
1966                 struct pakfire_jail_exec* ctx, const char* argv[]) {
1967         int r;
1968
1969         // Redirect any logging to our log pipe
1970         pakfire_ctx_set_log_callback(jail->ctx, pakfire_jail_log_redirect, &ctx->pipes);
1971
1972         CTX_DEBUG(jail->ctx, "First child process launched\n");
1973
1974         const int socket_send = pakfire_jail_get_pipe_to_write(jail, &ctx->socket);
1975
1976         const char* root = pakfire_get_path(jail->pakfire);
1977
1978         // Die with parent
1979         r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
1980         if (r) {
1981                 CTX_ERROR(jail->ctx, "Could not configure to die with parent: %s\n", strerror(errno));
1982                 goto ERROR;
1983         }
1984
1985         // Change mount propagation so that we will receive, but don't propagate back
1986         r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SLAVE);
1987         if (r) {
1988                 CTX_ERROR(jail->ctx, "Could not change mount propagation to SLAVE: %s\n", strerror(r));
1989                 goto ERROR;
1990         }
1991
1992         // Make root a mountpoint in the new mount namespace
1993         r = pakfire_mount_make_mounpoint(jail->pakfire, root);
1994         if (r)
1995                 goto ERROR;
1996
1997         // Make everything private
1998         r = pakfire_mount_change_propagation(jail->ctx, root, MS_PRIVATE);
1999         if (r) {
2000                 CTX_ERROR(jail->ctx, "Could not change mount propagation to PRIVATE: %s\n", strerror(r));
2001                 goto ERROR;
2002         }
2003
2004         // Mount everything
2005         r = pakfire_jail_mount(jail, ctx);
2006         if (r)
2007                 goto ERROR;
2008
2009         // chroot()
2010         r = pakfire_jail_switch_root(jail, root);
2011         if (r)
2012                 goto ERROR;
2013
2014         // Change mount propagation so that we will propagate everything down
2015         r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SHARED);
2016         if (r) {
2017                 CTX_ERROR(jail->ctx, "Could not change mount propagation to SHARED: %s\n", strerror(r));
2018                 goto ERROR;
2019         }
2020
2021         // Configure child process
2022         struct clone_args args = {
2023                 .flags =
2024                         CLONE_NEWCGROUP |
2025                         CLONE_NEWIPC |
2026                         CLONE_NEWNS |
2027                         CLONE_NEWPID |
2028                         CLONE_NEWTIME |
2029                         CLONE_NEWUSER |
2030                         CLONE_NEWUTS |
2031                         CLONE_PIDFD,
2032                 .exit_signal = SIGCHLD,
2033                 .pidfd = (long long unsigned int)&ctx->pidfd2,
2034         };
2035
2036         // Launch the process into the configured cgroup
2037         if (ctx->cgroup) {
2038                 args.flags |= CLONE_INTO_CGROUP;
2039
2040                 // Clone into this cgroup
2041                 args.cgroup = pakfire_cgroup_fd(ctx->cgroup);
2042         }
2043
2044         // Setup networking
2045         if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING))
2046                 args.flags |= CLONE_NEWNET;
2047
2048         // Fork the second child process
2049         pid_t pid = clone3(&args, sizeof(args));
2050         if (pid < 0) {
2051                 CTX_ERROR(jail->ctx, "Could not fork the first child process: %s\n", strerror(errno));
2052                 r = -errno;
2053                 goto ERROR;
2054
2055         // Child process
2056         } else if (pid == 0) {
2057                 r = pakfire_jail_child2(jail, ctx, argv);
2058                 _exit(r);
2059         }
2060
2061         // Send the pidfd of the child to the first parent
2062         r = pakfire_jail_send_fd(jail, socket_send, ctx->pidfd2);
2063         if (r)
2064                 goto ERROR;
2065
2066 ERROR:
2067         return r;
2068 }
2069
2070 // Run a command in the jail
2071 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
2072                 const int interactive,
2073                 pakfire_jail_communicate_in  communicate_in,
2074                 pakfire_jail_communicate_out communicate_out,
2075                 void* data, int flags) {
2076         int r;
2077
2078         // Check if argv is valid
2079         if (!argv || !argv[0]) {
2080                 errno = EINVAL;
2081                 return -1;
2082         }
2083
2084         // Initialize context for this call
2085         struct pakfire_jail_exec ctx = {
2086                 .flags = flags,
2087
2088                 .socket = { -1, -1 },
2089
2090                 .pipes = {
2091                         .stdin     = { -1, -1 },
2092                         .stdout    = { -1, -1 },
2093                         .stderr    = { -1, -1 },
2094                         .log_INFO  = { -1, -1 },
2095                         .log_ERROR = { -1, -1 },
2096 #ifdef ENABLE_DEBUG
2097                         .log_DEBUG = { -1, -1 },
2098 #endif /* ENABLE_DEBUG */
2099                 },
2100
2101                 .communicate = {
2102                         .in   = communicate_in,
2103                         .out  = communicate_out,
2104                         .data = data,
2105                 },
2106
2107                 // PIDs
2108                 .pidfd1 = -1,
2109                 .pidfd2 = -1,
2110         };
2111
2112         DEBUG(jail->pakfire, "Executing jail...\n");
2113
2114         // Become the subreaper
2115         r = prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0);
2116         if (r < 0) {
2117                 CTX_ERROR(jail->ctx, "Failed to become the sub-reaper: %s\n", strerror(errno));
2118                 r = -errno;
2119                 goto ERROR;
2120         }
2121
2122         // Enable networking in interactive mode
2123         if (interactive)
2124                 ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
2125
2126         // Create a UNIX domain socket
2127         r = socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, ctx.socket);
2128         if (r < 0) {
2129                 CTX_ERROR(jail->ctx, "Could not create UNIX socket: %s\n", strerror(errno));
2130                 r = -errno;
2131                 goto ERROR;
2132         }
2133
2134         /*
2135                 Setup a file descriptor which can be used to notify the client that the parent
2136                 has completed configuration.
2137         */
2138         ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
2139         if (ctx.completed_fd < 0) {
2140                 ERROR(jail->pakfire, "eventfd() failed: %m\n");
2141                 return -1;
2142         }
2143
2144         // Create pipes to communicate with child process if we are not running interactively
2145         if (!interactive) {
2146                 // stdin (only if callback is set)
2147                 if (ctx.communicate.in) {
2148                         r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdin, 0);
2149                         if (r)
2150                                 goto ERROR;
2151                 }
2152
2153                 // stdout
2154                 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
2155                 if (r)
2156                         goto ERROR;
2157
2158                 // stderr
2159                 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
2160                 if (r)
2161                         goto ERROR;
2162         }
2163
2164         // Setup pipes for logging
2165         // INFO
2166         r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
2167         if (r)
2168                 goto ERROR;
2169
2170         // ERROR
2171         r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
2172         if (r)
2173                 goto ERROR;
2174
2175 #ifdef ENABLE_DEBUG
2176         // DEBUG
2177         r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
2178         if (r)
2179                 goto ERROR;
2180 #endif /* ENABLE_DEBUG */
2181
2182         // Launch the process in a cgroup that is a leaf of the configured cgroup
2183         if (jail->cgroup) {
2184                 // Fetch our UUID
2185                 const char* uuid = pakfire_jail_uuid(jail);
2186
2187                 // Create a temporary cgroup
2188                 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, uuid, 0);
2189                 if (r) {
2190                         ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
2191                         goto ERROR;
2192                 }
2193         }
2194
2195         /*
2196                 Initially, we will set up a new mount namespace and launch a child process in it.
2197
2198                 This process remains in the user/ipc/time/etc. namespace and will set up
2199                 the mount namespace.
2200         */
2201
2202         // Configure child process
2203         struct clone_args args = {
2204                 .flags =
2205                         CLONE_NEWNS |
2206                         CLONE_PIDFD |
2207                         CLONE_CLEAR_SIGHAND,
2208                 .exit_signal = SIGCHLD,
2209                 .pidfd = (long long unsigned int)&ctx.pidfd1,
2210         };
2211
2212         // Fork the first child process
2213         pid_t pid = clone3(&args, sizeof(args));
2214         if (pid < 0) {
2215                 CTX_ERROR(jail->ctx, "Could not fork the first child process: %s\n", strerror(errno));
2216                 r = -errno;
2217                 goto ERROR;
2218
2219         // Child process
2220         } else if (pid == 0) {
2221                 r = pakfire_jail_child1(jail, &ctx, argv);
2222                 _exit(r);
2223         }
2224
2225         // Parent process
2226         r = pakfire_jail_wait(jail, &ctx);
2227         if (r)
2228                 goto ERROR;
2229
2230 ERROR:
2231         // Destroy the temporary cgroup (if any)
2232         if (ctx.cgroup) {
2233 #if 0
2234                 // XXX this is currently disabled because it overwrites r
2235                 // Read cgroup stats
2236                 r = pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
2237                 if (r) {
2238                         ERROR(jail->pakfire, "Could not read cgroup stats: %m\n");
2239                 } else {
2240                         pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
2241                 }
2242 #endif
2243
2244                 pakfire_cgroup_destroy(ctx.cgroup);
2245                 pakfire_cgroup_unref(ctx.cgroup);
2246         }
2247
2248         // Close any file descriptors
2249         pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
2250         pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
2251         pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
2252         pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
2253         pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
2254 #ifdef ENABLE_DEBUG
2255         pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
2256 #endif /* ENABLE_DEBUG */
2257         if (ctx.pidfd1 >= 0)
2258                 close(ctx.pidfd1);
2259         if (ctx.pidfd2 >= 0)
2260                 close(ctx.pidfd2);
2261
2262         // Close sockets
2263         pakfire_jail_close_pipe(jail, ctx.socket);
2264
2265         return r;
2266 }
2267
2268 PAKFIRE_EXPORT int pakfire_jail_exec(
2269                 struct pakfire_jail* jail,
2270                 const char* argv[],
2271                 pakfire_jail_communicate_in  callback_in,
2272                 pakfire_jail_communicate_out callback_out,
2273                 void* data, int flags) {
2274         return __pakfire_jail_exec(jail, argv, 0, callback_in, callback_out, data, flags);
2275 }
2276
2277 static int pakfire_jail_exec_interactive(
2278                 struct pakfire_jail* jail, const char* argv[], int flags) {
2279         int r;
2280
2281         // Setup interactive stuff
2282         r = pakfire_jail_setup_interactive_env(jail);
2283         if (r)
2284                 return r;
2285
2286         return __pakfire_jail_exec(jail, argv, 1, NULL, NULL, NULL, flags);
2287 }
2288
2289 int pakfire_jail_exec_script(struct pakfire_jail* jail,
2290                 const char* script,
2291                 const size_t size,
2292                 const char* args[],
2293                 pakfire_jail_communicate_in  callback_in,
2294                 pakfire_jail_communicate_out callback_out,
2295                 void* data) {
2296         char path[PATH_MAX];
2297         const char** argv = NULL;
2298         FILE* f = NULL;
2299         int r;
2300
2301         const char* root = pakfire_get_path(jail->pakfire);
2302
2303         // Write the scriptlet to disk
2304         r = pakfire_path_append(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
2305         if (r)
2306                 goto ERROR;
2307
2308         // Create a temporary file
2309         f = pakfire_mktemp(path, 0700);
2310         if (!f) {
2311                 ERROR(jail->pakfire, "Could not create temporary file: %m\n");
2312                 goto ERROR;
2313         }
2314
2315         DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
2316
2317         // Write data
2318         r = fprintf(f, "%s", script);
2319         if (r < 0) {
2320                 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
2321                 goto ERROR;
2322         }
2323
2324         // Close file
2325         r = fclose(f);
2326         if (r) {
2327                 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
2328                 goto ERROR;
2329         }
2330
2331         f = NULL;
2332
2333         // Count how many arguments were passed
2334         unsigned int argc = 1;
2335         if (args) {
2336                 for (const char** arg = args; *arg; arg++)
2337                         argc++;
2338         }
2339
2340         argv = calloc(argc + 1, sizeof(*argv));
2341         if (!argv) {
2342                 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
2343                 goto ERROR;
2344         }
2345
2346         // Set command
2347         argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
2348
2349         // Copy args
2350         for (unsigned int i = 1; i < argc; i++)
2351                 argv[i] = args[i-1];
2352
2353         // Run the script
2354         r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data, 0);
2355
2356 ERROR:
2357         if (argv)
2358                 free(argv);
2359         if (f)
2360                 fclose(f);
2361
2362         // Remove script from disk
2363         if (*path)
2364                 unlink(path);
2365
2366         return r;
2367 }
2368
2369 /*
2370         A convenience function that creates a new jail, runs the given command and destroys
2371         the jail again.
2372 */
2373 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
2374         struct pakfire_jail* jail = NULL;
2375         int r;
2376
2377         // Create a new jail
2378         r = pakfire_jail_create(&jail, pakfire);
2379         if (r)
2380                 goto ERROR;
2381
2382         // Execute the command
2383         r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output, 0);
2384
2385 ERROR:
2386         if (jail)
2387                 pakfire_jail_unref(jail);
2388
2389         return r;
2390 }
2391
2392 int pakfire_jail_run_script(struct pakfire* pakfire,
2393                 const char* script, const size_t length, const char* argv[], int flags) {
2394         struct pakfire_jail* jail = NULL;
2395         int r;
2396
2397         // Create a new jail
2398         r = pakfire_jail_create(&jail, pakfire);
2399         if (r)
2400                 goto ERROR;
2401
2402         // Execute the command
2403         r = pakfire_jail_exec_script(jail, script, length, argv, NULL, NULL, NULL);
2404
2405 ERROR:
2406         if (jail)
2407                 pakfire_jail_unref(jail);
2408
2409         return r;
2410 }
2411
2412 int pakfire_jail_shell(struct pakfire_jail* jail) {
2413         int r;
2414
2415         const char* argv[] = {
2416                 "/bin/bash", "--login", NULL,
2417         };
2418
2419         // Execute /bin/bash
2420         r = pakfire_jail_exec_interactive(jail, argv, 0);
2421
2422         // Raise any errors
2423         if (r < 0)
2424                 return r;
2425
2426         // Ignore any return codes from the shell
2427         return 0;
2428 }
2429
2430 static int pakfire_jail_run_if_possible(struct pakfire* pakfire, const char** argv) {
2431         char path[PATH_MAX];
2432         int r;
2433
2434         r = pakfire_path(pakfire, path, "%s", *argv);
2435         if (r)
2436                 return r;
2437
2438         // Check if the file is executable
2439         r = access(path, X_OK);
2440         if (r) {
2441                 DEBUG(pakfire, "%s is not executable. Skipping...\n", *argv);
2442                 return 0;
2443         }
2444
2445         return pakfire_jail_run(pakfire, argv, 0, NULL);
2446 }
2447
2448 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
2449         const char* argv[] = {
2450                 "/sbin/ldconfig",
2451                 NULL,
2452         };
2453
2454         return pakfire_jail_run_if_possible(pakfire, argv);
2455 }
2456
2457 int pakfire_jail_run_systemd_tmpfiles(struct pakfire* pakfire) {
2458         const char* argv[] = {
2459                 "/usr/bin/systemd-tmpfiles",
2460                 "--create",
2461                 NULL,
2462         };
2463
2464         return pakfire_jail_run_if_possible(pakfire, argv);
2465 }