src/libpakfire/jail.c

   1 /*#############################################################################
   2 #                                                                             #
   3 # Pakfire - The IPFire package management system                              #
   4 # Copyright (C) 2022 Pakfire development team                                 #
   5 #                                                                             #
   6 # This program is free software: you can redistribute it and/or modify        #
   7 # it under the terms of the GNU General Public License as published by        #
   8 # the Free Software Foundation, either version 3 of the License, or           #
   9 # (at your option) any later version.                                         #
  10 #                                                                             #
  11 # This program is distributed in the hope that it will be useful,             #
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of              #
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
  14 # GNU General Public License for more details.                                #
  15 #                                                                             #
  16 # You should have received a copy of the GNU General Public License           #
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
  18 #                                                                             #
  19 #############################################################################*/
  20
  21 #include <errno.h>
  22 #include <fcntl.h>
  23 #include <linux/capability.h>
  24 #include <linux/sched.h>
  25 #include <sys/wait.h>
  26 #include <linux/wait.h>
  27 #include <sched.h>
  28 #include <signal.h>
  29 #include <stdlib.h>
  30 #include <syscall.h>
  31 #include <sys/capability.h>
  32 #include <sys/epoll.h>
  33 #include <sys/eventfd.h>
  34 #include <sys/mount.h>
  35 #include <sys/personality.h>
  36 #include <sys/prctl.h>
  37 #include <sys/resource.h>
  38 #include <sys/timerfd.h>
  39 #include <sys/types.h>
  40 #include <sys/wait.h>
  41 #include <termios.h>
  42
  43 // libnl3
  44 #include <net/if.h>
  45 #include <netlink/route/link.h>
  46
  47 // libseccomp
  48 #include <seccomp.h>
  49
  50 // libuuid
  51 #include <uuid.h>
  52
  53 #include <pakfire/arch.h>
  54 #include <pakfire/cgroup.h>
  55 #include <pakfire/jail.h>
  56 #include <pakfire/logging.h>
  57 #include <pakfire/mount.h>
  58 #include <pakfire/pakfire.h>
  59 #include <pakfire/path.h>
  60 #include <pakfire/private.h>
  61 #include <pakfire/pwd.h>
  62 #include <pakfire/string.h>
  63 #include <pakfire/util.h>
  64
  65 #define BUFFER_SIZE      1024 * 64
  66 #define ENVIRON_SIZE     128
  67 #define EPOLL_MAX_EVENTS 2
  68 #define MAX_MOUNTPOINTS  8
  69
  70 // The default environment that will be set for every command
  71 static const struct environ {
  72         const char* key;
  73         const char* val;
  74 } ENV[] = {
  75         { "HOME", "/root" },
  76         { "LANG", "C.utf-8" },
  77         { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
  78         { "TERM", "vt100" },
  79
  80         // Tell everything that it is running inside a Pakfire container
  81         { "container", "pakfire" },
  82         { NULL, NULL },
  83 };
  84
  85 struct pakfire_jail_mountpoint {
  86         char source[PATH_MAX];
  87         char target[PATH_MAX];
  88         int flags;
  89 };
  90
  91 struct pakfire_jail {
  92         struct pakfire_ctx* ctx;
  93         struct pakfire* pakfire;
  94         int nrefs;
  95
  96         // A unique ID for each jail
  97         uuid_t uuid;
  98         char __uuid[UUID_STR_LEN];
  99
 100         // Resource Limits
 101         int nice;
 102
 103         // Timeout
 104         struct itimerspec timeout;
 105
 106         // CGroup
 107         struct pakfire_cgroup* cgroup;
 108
 109         // Environment
 110         char* env[ENVIRON_SIZE];
 111
 112         // Mountpoints
 113         struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
 114         unsigned int num_mountpoints;
 115
 116         // Callbacks
 117         struct pakfire_jail_callbacks {
 118                 // Log
 119                 pakfire_jail_log_callback log;
 120                 void* log_data;
 121         } callbacks;
 122 };
 123
 124 struct pakfire_log_buffer {
 125         char data[BUFFER_SIZE];
 126         size_t used;
 127 };
 128
 129 struct pakfire_jail_exec {
 130         int flags;
 131
 132         // PID (of the child)
 133         pid_t pid;
 134         int pidfd;
 135
 136         // Socket to pass FDs
 137         int socket[2];
 138
 139         // Process status (from waitid)
 140         siginfo_t status;
 141
 142         // FD to notify the client that the parent has finished initialization
 143         int completed_fd;
 144
 145         // Log pipes
 146         struct pakfire_jail_pipes {
 147                 // Logging
 148                 int log_INFO[2];
 149                 int log_ERROR[2];
 150 #ifdef ENABLE_DEBUG
 151                 int log_DEBUG[2];
 152 #endif /* ENABLE_DEBUG */
 153         } pipes;
 154
 155         // Communicate
 156         struct pakfire_jail_communicate {
 157                 pakfire_jail_communicate_in  in;
 158                 pakfire_jail_communicate_out out;
 159                 void* data;
 160         } communicate;
 161
 162         // Log buffers
 163         struct pakfire_jail_buffers {
 164                 struct pakfire_log_buffer stdout;
 165                 struct pakfire_log_buffer stderr;
 166
 167                 // Logging
 168                 struct pakfire_log_buffer log_INFO;
 169                 struct pakfire_log_buffer log_ERROR;
 170 #ifdef ENABLE_DEBUG
 171                 struct pakfire_log_buffer log_DEBUG;
 172 #endif /* ENABLE_DEBUG */
 173         } buffers;
 174
 175         struct pakfire_cgroup* cgroup;
 176         struct pakfire_cgroup_stats cgroup_stats;
 177
 178         // PTY
 179         struct pakfire_jail_pty {
 180                 // The path to the console
 181                 char console[PATH_MAX];
 182
 183                 // The master fd
 184                 struct pakfire_jail_pty_master {
 185                         int fd;
 186
 187                         enum pakfire_jail_pty_flags {
 188                                 PAKFIRE_JAIL_PTY_READY_TO_READ  = (1 << 0),
 189                                 PAKFIRE_JAIL_PTY_READY_TO_WRITE = (1 << 1),
 190                         } flags;
 191                 } master;
 192
 193                 // Standard Input
 194                 struct pakfire_jail_pty_stdio {
 195                         int fd;
 196                         struct pakfire_log_buffer buffer;
 197                         struct termios attrs;
 198                         int fdflags;
 199                         enum pakfire_jail_pty_flags flags;
 200                 } stdin;
 201
 202                 // Standard Output
 203                 struct pakfire_jail_pty_stdio stdout;
 204         } pty;
 205 };
 206
 207 static int clone3(struct clone_args* args, size_t size) {
 208         return syscall(__NR_clone3, args, size);
 209 }
 210
 211 static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
 212         return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
 213 }
 214
 215 static int pivot_root(const char* new_root, const char* old_root) {
 216         return syscall(SYS_pivot_root, new_root, old_root);
 217 }
 218
 219 static int pakfire_jail_exec_has_flag(
 220                 const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
 221         return ctx->flags & flag;
 222 }
 223
 224 static void pakfire_jail_free(struct pakfire_jail* jail) {
 225         DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
 226
 227         // Free environment
 228         for (unsigned int i = 0; jail->env[i]; i++)
 229                 free(jail->env[i]);
 230
 231         if (jail->cgroup)
 232                 pakfire_cgroup_unref(jail->cgroup);
 233         if (jail->pakfire)
 234                 pakfire_unref(jail->pakfire);
 235         if (jail->ctx)
 236                 pakfire_ctx_unref(jail->ctx);
 237         free(jail);
 238 }
 239
 240 /*
 241         Passes any log messages on to the default pakfire log callback
 242 */
 243 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
 244                 int priority, const char* line, size_t length) {
 245         struct pakfire_ctx* ctx = pakfire_ctx(pakfire);
 246
 247         if (pakfire_ctx_get_log_level(ctx) >= priority)
 248                 pakfire_ctx_log(ctx, priority, NULL, 0, NULL, "%.*s", (int)length, line);
 249
 250         pakfire_ctx_unref(ctx);
 251
 252         return 0;
 253 }
 254
 255 static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
 256         if (!*jail->__uuid)
 257                 uuid_unparse_lower(jail->uuid, jail->__uuid);
 258
 259         return jail->__uuid;
 260 }
 261
 262 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
 263         // Set PS1
 264         int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
 265         if (r)
 266                 return r;
 267
 268         // Copy TERM
 269         char* TERM = secure_getenv("TERM");
 270         if (TERM) {
 271                 r = pakfire_jail_set_env(jail, "TERM", TERM);
 272                 if (r)
 273                         return r;
 274         }
 275
 276         // Copy LANG
 277         char* LANG = secure_getenv("LANG");
 278         if (LANG) {
 279                 r = pakfire_jail_set_env(jail, "LANG", LANG);
 280                 if (r)
 281                         return r;
 282         }
 283
 284         return 0;
 285 }
 286
 287 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail, struct pakfire* pakfire) {
 288         int r;
 289
 290         const char* arch = pakfire_get_effective_arch(pakfire);
 291
 292         // Allocate a new jail
 293         struct pakfire_jail* j = calloc(1, sizeof(*j));
 294         if (!j)
 295                 return 1;
 296
 297         // Reference context
 298         j->ctx = pakfire_ctx(pakfire);
 299
 300         // Reference Pakfire
 301         j->pakfire = pakfire_ref(pakfire);
 302
 303         // Initialize reference counter
 304         j->nrefs = 1;
 305
 306         // Generate a random UUID
 307         uuid_generate_random(j->uuid);
 308
 309         DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
 310
 311         // Set the default logging callback
 312         pakfire_jail_set_log_callback(j, pakfire_jail_default_log_callback, NULL);
 313
 314         // Set default environment
 315         for (const struct environ* e = ENV; e->key; e++) {
 316                 r = pakfire_jail_set_env(j, e->key, e->val);
 317                 if (r)
 318                         goto ERROR;
 319         }
 320
 321         // Enable all CPU features that CPU has to offer
 322         if (!pakfire_arch_is_supported_by_host(arch)) {
 323                 r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
 324                 if (r)
 325                         goto ERROR;
 326         }
 327
 328         // Set container UUID
 329         r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
 330         if (r)
 331                 goto ERROR;
 332
 333         // Disable systemctl to talk to systemd
 334         if (!pakfire_on_root(j->pakfire)) {
 335                 r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
 336                 if (r)
 337                         goto ERROR;
 338         }
 339
 340         // Done
 341         *jail = j;
 342         return 0;
 343
 344 ERROR:
 345         pakfire_jail_free(j);
 346
 347         return r;
 348 }
 349
 350 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
 351         ++jail->nrefs;
 352
 353         return jail;
 354 }
 355
 356 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
 357         if (--jail->nrefs > 0)
 358                 return jail;
 359
 360         pakfire_jail_free(jail);
 361         return NULL;
 362 }
 363
 364 // Logging Callback
 365
 366 PAKFIRE_EXPORT void pakfire_jail_set_log_callback(struct pakfire_jail* jail,
 367                 pakfire_jail_log_callback callback, void* data) {
 368         jail->callbacks.log = callback;
 369         jail->callbacks.log_data = data;
 370 }
 371
 372 // Resource Limits
 373
 374 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
 375         // Check if nice level is in range
 376         if (nice < -19 || nice > 20) {
 377                 errno = EINVAL;
 378                 return 1;
 379         }
 380
 381         // Store nice level
 382         jail->nice = nice;
 383
 384         return 0;
 385 }
 386
 387 int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
 388         // Free any previous cgroup
 389         if (jail->cgroup) {
 390                 pakfire_cgroup_unref(jail->cgroup);
 391                 jail->cgroup = NULL;
 392         }
 393
 394         // Set any new cgroup
 395         if (cgroup) {
 396                 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
 397
 398                 jail->cgroup = pakfire_cgroup_ref(cgroup);
 399         }
 400
 401         // Done
 402         return 0;
 403 }
 404
 405 // Environment
 406
 407 // Returns the length of the environment
 408 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
 409         unsigned int i = 0;
 410
 411         // Count everything in the environment
 412         for (char** e = jail->env; *e; e++)
 413                 i++;
 414
 415         return i;
 416 }
 417
 418 // Finds an existing environment variable and returns its index or -1 if not found
 419 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
 420         if (!key) {
 421                 errno = EINVAL;
 422                 return -1;
 423         }
 424
 425         const size_t length = strlen(key);
 426
 427         for (unsigned int i = 0; jail->env[i]; i++) {
 428                 if ((pakfire_string_startswith(jail->env[i], key)
 429                                 && *(jail->env[i] + length) == '=')) {
 430                         return i;
 431                 }
 432         }
 433
 434         // Nothing found
 435         return -1;
 436 }
 437
 438 // Returns the value of an environment variable or NULL
 439 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
 440                 const char* key) {
 441         int i = pakfire_jail_find_env(jail, key);
 442         if (i < 0)
 443                 return NULL;
 444
 445         return jail->env[i] + strlen(key) + 1;
 446 }
 447
 448 // Sets an environment variable
 449 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
 450                 const char* key, const char* value) {
 451         // Find the index where to write this value to
 452         int i = pakfire_jail_find_env(jail, key);
 453         if (i < 0)
 454                 i = pakfire_jail_env_length(jail);
 455
 456         // Return -ENOSPC when the environment is full
 457         if (i >= ENVIRON_SIZE) {
 458                 errno = ENOSPC;
 459                 return -1;
 460         }
 461
 462         // Free any previous value
 463         if (jail->env[i])
 464                 free(jail->env[i]);
 465
 466         // Format and set environment variable
 467         asprintf(&jail->env[i], "%s=%s", key, value);
 468
 469         DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
 470
 471         return 0;
 472 }
 473
 474 // Imports an environment
 475 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
 476         if (!env)
 477                 return 0;
 478
 479         char* key;
 480         char* val;
 481         int r;
 482
 483         // Copy environment variables
 484         for (unsigned int i = 0; env[i]; i++) {
 485                 r = pakfire_string_partition(env[i], "=", &key, &val);
 486                 if (r)
 487                         continue;
 488
 489                 // Set value
 490                 r = pakfire_jail_set_env(jail, key, val);
 491
 492                 if (key)
 493                         free(key);
 494                 if (val)
 495                         free(val);
 496
 497                 // Break on error
 498                 if (r)
 499                         return r;
 500         }
 501
 502         return 0;
 503 }
 504
 505 // Timeout
 506
 507 PAKFIRE_EXPORT int pakfire_jail_set_timeout(
 508                 struct pakfire_jail* jail, unsigned int timeout) {
 509         // Store value
 510         jail->timeout.it_value.tv_sec = timeout;
 511
 512         if (timeout > 0)
 513                 DEBUG(jail->pakfire, "Timeout set to %u second(s)\n", timeout);
 514         else
 515                 DEBUG(jail->pakfire, "Timeout disabled\n");
 516
 517         return 0;
 518 }
 519
 520 static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
 521         int r;
 522
 523         // Nothing to do if no timeout has been set
 524         if (!jail->timeout.it_value.tv_sec)
 525                 return -1;
 526
 527         // Create a new timer
 528         const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
 529         if (fd < 0) {
 530                 ERROR(jail->pakfire, "Could not create timer: %m\n");
 531                 goto ERROR;
 532         }
 533
 534         // Arm timer
 535         r = timerfd_settime(fd, 0, &jail->timeout, NULL);
 536         if (r) {
 537                 ERROR(jail->pakfire, "Could not arm timer: %m\n");
 538                 goto ERROR;
 539         }
 540
 541         return fd;
 542
 543 ERROR:
 544         if (fd >= 0)
 545                 close(fd);
 546
 547         return -1;
 548 }
 549
 550 /*
 551         This function replaces any logging in the child process.
 552
 553         All log messages will be sent to the parent process through their respective pipes.
 554 */
 555 static void pakfire_jail_log_redirect(void* data, int priority, const char* file,
 556                 int line, const char* fn, const char* format, va_list args) {
 557         struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
 558         int fd;
 559
 560         switch (priority) {
 561                 case LOG_INFO:
 562                         fd = pipes->log_INFO[1];
 563                         break;
 564
 565                 case LOG_ERR:
 566                         fd = pipes->log_ERROR[1];
 567                         break;
 568
 569 #ifdef ENABLE_DEBUG
 570                 case LOG_DEBUG:
 571                         fd = pipes->log_DEBUG[1];
 572                         break;
 573 #endif /* ENABLE_DEBUG */
 574
 575                 // Ignore any messages of an unknown priority
 576                 default:
 577                         return;
 578         }
 579
 580         // End if we do not have a file descriptor to write to
 581         if (fd < 0)
 582                 return;
 583
 584         // Optionally log the function name
 585         if (fn)
 586                 dprintf(fd, "%s: ", fn);
 587
 588         // Send the log message
 589         vdprintf(fd, format, args);
 590 }
 591
 592 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
 593         return (sizeof(buffer->data) == buffer->used);
 594 }
 595
 596 static int pakfire_jail_fill_buffer(struct pakfire_jail* jail, int fd, struct pakfire_log_buffer* buffer) {
 597         int r;
 598
 599         // Skip this if there is not space left in the buffer
 600         if (buffer->used >= sizeof(buffer->data))
 601                 return 0;
 602
 603         // Fill the buffer
 604         r = read(fd, buffer->data + buffer->used, sizeof(buffer->data) - buffer->used);
 605
 606         // Handle errors
 607         if (r < 0) {
 608                 switch (errno) {
 609                         case EAGAIN:
 610                         case EIO:
 611                                 break;
 612
 613                         default:
 614                                 return -errno;
 615                 }
 616
 617         // EOF
 618         } else if (r == 0) {
 619                 // XXX What to do here?
 620
 621         // Successful read
 622         } else {
 623                 buffer->used += r;
 624         }
 625
 626         return 0;
 627 }
 628
 629 static int pakfire_jail_drain_buffer_with_callback(struct pakfire_jail* jail,
 630                 struct pakfire_log_buffer* buffer, int priority, pakfire_jail_communicate_out callback, void* data) {
 631         const char* eol = NULL;
 632         int r;
 633
 634         while (buffer->used) {
 635                 // Search for the end of the first line
 636                 eol = memchr(buffer->data, '\n', buffer->used);
 637
 638                 // No newline found
 639                 if (!eol) {
 640                         // If the buffer is full, we send the entire content to make space.
 641                         if (pakfire_jail_log_buffer_is_full(buffer)) {
 642                                 CTX_DEBUG(jail->ctx, "Buffer is full. Sending all content\n");
 643
 644                                 eol = buffer->data + buffer->used - 1;
 645
 646                         // Otherwise we might have only read parts of the output...
 647                         } else {
 648                                 break;
 649                         }
 650                 }
 651
 652                 // Find the length of the string
 653                 const size_t length = eol - buffer->data + 1;
 654
 655                 // Call the callback
 656                 r = callback(jail->pakfire, data, priority, buffer->data, length);
 657                 if (r) {
 658                         CTX_ERROR(jail->ctx, "The logging callback returned an error: %d\n", r);
 659                         return r;
 660                 }
 661
 662                 // Remove line from buffer
 663                 memmove(buffer->data, buffer->data + length, buffer->used - length);
 664                 buffer->used -= length;
 665         }
 666
 667         return 0;
 668 }
 669
 670 static int pakfire_jail_drain_buffer(struct pakfire_jail* jail, int fd, struct pakfire_log_buffer* buffer) {
 671         int r;
 672
 673         // Nothing to do if the buffer is empty
 674         if (!buffer->used)
 675                 return 0;
 676
 677         // Do not try to write to an invalid file descriptor
 678         if (fd < 0)
 679                 return 0;
 680
 681         // Drain the buffer
 682         r = write(fd, buffer->data, buffer->used);
 683
 684         // Handle errors
 685         if (r < 0) {
 686                 switch (errno) {
 687                         case EAGAIN:
 688                         case EIO:
 689                                 break;
 690
 691                         default:
 692                                 return -errno;
 693                 }
 694
 695         // Successful write
 696         } else {
 697                 memmove(buffer->data, buffer->data + r, buffer->used - r);
 698
 699                 buffer->used -= r;
 700         }
 701
 702         return 0;
 703 }
 704
 705 /*
 706         This function reads as much data as it can from the file descriptor.
 707         If it finds a whole line in it, it will send it to the logger and repeat the process.
 708         If not newline character is found, it will try to read more data until it finds one.
 709 */
 710 static int pakfire_jail_handle_log(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
 711                 int priority, int fd, struct pakfire_log_buffer* buffer,
 712                 pakfire_jail_communicate_out callback, void* data) {
 713         int r;
 714
 715         // Fill up buffer from fd
 716         r = pakfire_jail_fill_buffer(jail, fd, buffer);
 717         if (r)
 718                 return r;
 719
 720         // Drain the buffer
 721         r = pakfire_jail_drain_buffer_with_callback(jail, buffer, priority, callback, data);
 722         if (r)
 723                 return r;
 724
 725         return 0;
 726 }
 727
 728 #if 0
 729 static int pakfire_jail_stream_stdin(struct pakfire_jail* jail,
 730                 struct pakfire_jail_exec* ctx, const int fd) {
 731         int r;
 732
 733         // Nothing to do if there is no stdin callback set
 734         if (!ctx->communicate.in) {
 735                 DEBUG(jail->pakfire, "Callback for standard input is not set\n");
 736                 return 0;
 737         }
 738
 739         // Skip if the writing pipe has already been closed
 740         if (ctx->pipes.stdin[1] < 0)
 741                 return 0;
 742
 743         DEBUG(jail->pakfire, "Streaming standard input...\n");
 744
 745         // Calling the callback
 746         r = ctx->communicate.in(jail->pakfire, ctx->communicate.data, fd);
 747
 748         DEBUG(jail->pakfire, "Standard input callback finished: %d\n", r);
 749
 750         // The callback signaled that it has written everything
 751         if (r == EOF) {
 752                 DEBUG(jail->pakfire, "Closing standard input pipe\n");
 753
 754                 // Close the file-descriptor
 755                 close(fd);
 756
 757                 // Reset the file-descriptor so it won't be closed again later
 758                 ctx->pipes.stdin[1] = -1;
 759
 760                 // Report success
 761                 r = 0;
 762         }
 763
 764         return r;
 765 }
 766 #endif
 767
 768 static int pakfire_jail_recv_fd(struct pakfire_jail* jail, int socket, int* fd) {
 769         const size_t payload_length = sizeof(fd);
 770         char buffer[CMSG_SPACE(payload_length)];
 771         int r;
 772
 773         struct msghdr msg = {
 774                 .msg_control    = buffer,
 775                 .msg_controllen = sizeof(buffer),
 776         };
 777
 778         // Receive the message
 779         r = recvmsg(socket, &msg, 0);
 780         if (r) {
 781                 CTX_ERROR(jail->ctx, "Could not receive file descriptor: %s\n", strerror(errno));
 782                 return -errno;
 783         }
 784
 785         // Fetch the payload
 786         struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
 787         if (!cmsg)
 788                 return -EBADMSG;
 789
 790         *fd = *((int*)CMSG_DATA(cmsg));
 791
 792         CTX_DEBUG(jail->ctx, "Received fd %d from socket %d\n", *fd, socket);
 793
 794         return 0;
 795 }
 796
 797 static int pakfire_jail_send_fd(struct pakfire_jail* jail, int socket, int fd) {
 798         const size_t payload_length = sizeof(fd);
 799         char buffer[CMSG_SPACE(payload_length)];
 800         int r;
 801
 802         CTX_DEBUG(jail->ctx, "Sending fd %d to socket %d\n", fd, socket);
 803
 804         // Header
 805         struct msghdr msg = {
 806                 .msg_control    = buffer,
 807                 .msg_controllen = sizeof(buffer),
 808         };
 809
 810         // Payload
 811         struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
 812         cmsg->cmsg_level = SOL_SOCKET;
 813         cmsg->cmsg_type  = SCM_RIGHTS;
 814         cmsg->cmsg_len   = CMSG_LEN(payload_length);
 815
 816         // Set payload
 817         *((int*)CMSG_DATA(cmsg)) = fd;
 818
 819         // Send the message
 820         r = sendmsg(socket, &msg, 0);
 821         if (r) {
 822                 CTX_ERROR(jail->ctx, "Could not send file descriptor: %s\n", strerror(errno));
 823                 return -errno;
 824         }
 825
 826         return 0;
 827 }
 828
 829 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
 830         int r = pipe2(*fds, flags);
 831         if (r < 0) {
 832                 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
 833                 return 1;
 834         }
 835
 836         return 0;
 837 }
 838
 839 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
 840         for (unsigned int i = 0; i < 2; i++)
 841                 if (fds[i] >= 0)
 842                         close(fds[i]);
 843 }
 844
 845 /*
 846         This is a convenience function to fetch the reading end of a pipe and
 847         closes the write end.
 848 */
 849 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail* jail, int (*fds)[2]) {
 850         // Give the variables easier names to avoid confusion
 851         int* fd_read  = &(*fds)[0];
 852         int* fd_write = &(*fds)[1];
 853
 854         // Close the write end of the pipe
 855         if (*fd_write >= 0) {
 856                 close(*fd_write);
 857                 *fd_write = -1;
 858         }
 859
 860         // Return the read end
 861         if (*fd_read >= 0)
 862                 return *fd_read;
 863
 864         return -1;
 865 }
 866
 867 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
 868         // Give the variables easier names to avoid confusion
 869         int* fd_read  = &(*fds)[0];
 870         int* fd_write = &(*fds)[1];
 871
 872         // Close the read end of the pipe
 873         if (*fd_read >= 0) {
 874                 close(*fd_read);
 875                 *fd_read = -1;
 876         }
 877
 878         // Return the write end
 879         if (*fd_write >= 0)
 880                 return *fd_write;
 881
 882         return -1;
 883 }
 884
 885 static int pakfire_jail_epoll_add_fd(struct pakfire_jail* jail, int epollfd, int fd, int events) {
 886         struct epoll_event event = {
 887                 .events = events|EPOLLHUP,
 888                 .data   = {
 889                         .fd = fd,
 890                 },
 891         };
 892         int r;
 893
 894         // Read flags
 895         int flags = fcntl(fd, F_GETFL, 0);
 896
 897         // Set modified flags
 898         r  = fcntl(fd, F_SETFL, flags|O_NONBLOCK);
 899         if (r < 0) {
 900                 CTX_ERROR(jail->ctx, "Could not set file descriptor %d into non-blocking mode: %s\n",
 901                         fd, strerror(errno));
 902                 return -errno;
 903         }
 904
 905         // Add the file descriptor to the loop
 906         r = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
 907         if (r < 0) {
 908                 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %s\n",
 909                         fd, strerror(errno));
 910                 return -errno;
 911         }
 912
 913         return 0;
 914 }
 915
 916 // PTY Forwarding
 917
 918 static int pakfire_jail_enable_raw_mode(struct pakfire_jail* jail,
 919                 struct pakfire_jail_pty_stdio* stdio) {
 920         struct termios raw_attrs;
 921         int r;
 922
 923         // Skip if we don't know the file descriptor
 924         if (stdio->fd < 0)
 925                 return 0;
 926
 927         // Skip everything if fd is not a TTY
 928         if (!isatty(stdio->fd))
 929                 return 0;
 930
 931         // Store flags
 932         stdio->fdflags = fcntl(stdio->fd, F_GETFL);
 933         if (stdio->fdflags < 0) {
 934                 CTX_ERROR(jail->ctx, "Could not fetch flags from fd %d: %s\n",
 935                         stdio->fd, strerror(errno));
 936                 return -errno;
 937         }
 938
 939         // Fetch all attributes
 940         r = tcgetattr(stdio->fd, &stdio->attrs);
 941         if (r) {
 942                 CTX_ERROR(jail->ctx, "Could not fetch terminal attributes from fd %d: %s\n",
 943                         stdio->fd, strerror(errno));
 944                 return -errno;
 945         }
 946
 947         // Copy all attributes
 948         raw_attrs = stdio->attrs;
 949
 950         // Make it RAW
 951         cfmakeraw(&raw_attrs);
 952
 953         switch (stdio->fd) {
 954                 case STDIN_FILENO:
 955                         raw_attrs.c_oflag = stdio->attrs.c_oflag;
 956                         break;
 957
 958                 case STDOUT_FILENO:
 959                         raw_attrs.c_iflag = stdio->attrs.c_iflag;
 960                         raw_attrs.c_lflag = stdio->attrs.c_lflag;
 961                         break;
 962         }
 963
 964         // Restore the attributes
 965         r = tcsetattr(stdio->fd, TCSANOW, &raw_attrs);
 966         if (r) {
 967                 CTX_ERROR(jail->ctx, "Could not restore terminal attributes for fd %d: %s\n",
 968                         stdio->fd, strerror(errno));
 969                 return -errno;
 970         }
 971
 972         return 0;
 973 }
 974
 975 static int pakfire_jail_restore_attrs(struct pakfire_jail* jail,
 976                 const struct pakfire_jail_pty_stdio* stdio) {
 977         int r;
 978
 979         // Skip if we don't know the file descriptor
 980         if (stdio->fd < 0)
 981                 return 0;
 982
 983         // Skip everything if fd is not a TTY
 984         if (!isatty(stdio->fd))
 985                 return 0;
 986
 987         // Restore the flags
 988         r = fcntl(stdio->fd, F_SETFL, stdio->fdflags);
 989         if (r < 0) {
 990                 CTX_ERROR(jail->ctx, "Could not set flags for file descriptor %d: %s\n",
 991                         stdio->fd, strerror(errno));
 992                 return -errno;
 993         }
 994
 995         // Restore the attributes
 996         r = tcsetattr(stdio->fd, TCSANOW, &stdio->attrs);
 997         if (r) {
 998                 CTX_ERROR(jail->ctx, "Could not restore terminal attributes for %d, ignoring: %s\n",
 999                         stdio->fd, strerror(errno));
1000                 return -errno;
1001         }
1002
1003         return 0;
1004 }
1005
1006 static int pakfire_jail_setup_pty_forwarding(struct pakfire_jail* jail,
1007                 struct pakfire_jail_exec* ctx, const int epollfd, const int fd) {
1008         struct winsize size;
1009         int r;
1010
1011         CTX_DEBUG(jail->ctx, "Setting up PTY forwarding on fd %d\n", fd);
1012
1013         // Store the file descriptor
1014         ctx->pty.master.fd = fd;
1015
1016         // Add the master to the event loop
1017         r = pakfire_jail_epoll_add_fd(jail, epollfd, ctx->pty.master.fd, EPOLLIN|EPOLLOUT|EPOLLET);
1018         if (r)
1019                 return r;
1020
1021         if (ctx->flags & PAKFIRE_JAIL_PTY_FORWARDING) {
1022                 // Configure stdin/stdout
1023                 ctx->pty.stdin.fd  = STDIN_FILENO;
1024                 ctx->pty.stdout.fd = STDOUT_FILENO;
1025
1026                 // Fetch dimensions
1027                 if (isatty(ctx->pty.stdout.fd)) {
1028                         r = ioctl(ctx->pty.stdout.fd, TIOCGWINSZ, &size);
1029                         if (r) {
1030                                 CTX_ERROR(jail->ctx, "Failed to determine terminal dimensions: %s\n", strerror(errno));
1031                                 return -errno;
1032                         }
1033
1034                         // Set dimensions
1035                         r = ioctl(ctx->pty.master.fd, TIOCSWINSZ, &size);
1036                         if (r) {
1037                                 CTX_ERROR(jail->ctx, "Failed setting dimensions: %s\n", strerror(errno));
1038                                 return -errno;
1039                         }
1040                 }
1041
1042                 // Enable RAW mode on standard input
1043                 r = pakfire_jail_enable_raw_mode(jail, &ctx->pty.stdin);
1044                 if (r)
1045                         return r;
1046
1047                 // Enable RAW mode on standard output
1048                 r = pakfire_jail_enable_raw_mode(jail, &ctx->pty.stdout);
1049                 if (r)
1050                         return r;
1051
1052                 // Add standard input to the event loop
1053                 r = pakfire_jail_epoll_add_fd(jail, epollfd, ctx->pty.stdin.fd, EPOLLIN|EPOLLET);
1054                 if (r)
1055                         return r;
1056
1057                 // Add standard output to the event loop
1058                 r = pakfire_jail_epoll_add_fd(jail, epollfd, ctx->pty.stdout.fd, EPOLLOUT|EPOLLET);
1059                 if (r)
1060                         return r;
1061         }
1062
1063         return 0;
1064 }
1065
1066 static int pakfire_jail_forward_pty(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1067         int r;
1068
1069         // Read from standard input
1070         if (ctx->pty.stdin.flags & PAKFIRE_JAIL_PTY_READY_TO_READ) {
1071                 r = pakfire_jail_fill_buffer(jail, ctx->pty.stdin.fd, &ctx->pty.stdin.buffer);
1072                 if (r) {
1073                         CTX_ERROR(jail->ctx, "Failed reading from standard input: %s\n", strerror(-r));
1074                         return r;
1075                 }
1076
1077                 // We are done reading for now
1078                 ctx->pty.stdin.flags &= ~PAKFIRE_JAIL_PTY_READY_TO_READ;
1079
1080                 // But we may have data to write
1081                 if (ctx->pty.stdin.buffer.used)
1082                         ctx->pty.master.flags |= PAKFIRE_JAIL_PTY_READY_TO_WRITE;
1083         }
1084
1085         // Write to the master
1086         if (ctx->pty.master.flags & PAKFIRE_JAIL_PTY_READY_TO_WRITE) {
1087                 r = pakfire_jail_drain_buffer(jail, ctx->pty.master.fd, &ctx->pty.stdin.buffer);
1088                 if (r) {
1089                         CTX_ERROR(jail->ctx, "Failed writing to the PTY: %s\n", strerror(-r));
1090                         return r;
1091                 }
1092
1093                 // We are done writing for now
1094                 ctx->pty.master.flags &= ~PAKFIRE_JAIL_PTY_READY_TO_WRITE;
1095         }
1096
1097         // Read from the master
1098         if (ctx->pty.master.flags & PAKFIRE_JAIL_PTY_READY_TO_READ) {
1099                 r = pakfire_jail_fill_buffer(jail, ctx->pty.master.fd, &ctx->pty.stdout.buffer);
1100                 if (r) {
1101                         CTX_ERROR(jail->ctx, "Failed reading from the PTY: %s\n", strerror(-r));
1102                         return r;
1103                 }
1104
1105                 // We are done reading for now
1106                 ctx->pty.master.flags &= ~PAKFIRE_JAIL_PTY_READY_TO_READ;
1107
1108                 // But we may have data to write
1109                 if (ctx->pty.stdout.buffer.used)
1110                         ctx->pty.stdout.flags |= PAKFIRE_JAIL_PTY_READY_TO_WRITE;
1111         }
1112
1113         // Write to standard output
1114         if (ctx->pty.stdout.flags & PAKFIRE_JAIL_PTY_READY_TO_WRITE) {
1115                 // If we have a callback, we will send any output to the callback
1116                 if (ctx->communicate.out) {
1117                         r = pakfire_jail_drain_buffer_with_callback(jail, &ctx->pty.stdout.buffer,
1118                                 LOG_INFO, ctx->communicate.out, ctx->communicate.data);
1119                         if (r)
1120                                 return r;
1121
1122                 // If we have a file descriptor, we will forward any output
1123                 } else if (ctx->pty.stdout.fd >= 0) {
1124                         r = pakfire_jail_drain_buffer(jail, ctx->pty.stdout.fd, &ctx->pty.stdout.buffer);
1125                         if (r) {
1126                                 CTX_ERROR(jail->ctx, "Failed writing to standard output: %s\n", strerror(-r));
1127                                 return r;
1128                         }
1129
1130                 // Otherwise we log a message
1131                 } else {
1132                         CTX_ERROR(jail->ctx, "No output configured for the PTY\n");
1133                 }
1134
1135                 // We are done writing for now
1136                 ctx->pty.stdout.flags &= ~PAKFIRE_JAIL_PTY_READY_TO_WRITE;
1137         }
1138
1139         return 0;
1140 }
1141
1142 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1143         int epollfd = -1;
1144         struct epoll_event events[EPOLL_MAX_EVENTS];
1145         char garbage[8];
1146         int r = 0;
1147
1148         // Fetch file descriptors from context
1149         const int pidfd  = ctx->pidfd;
1150
1151         // Fetch the UNIX domain socket
1152         const int socket_recv = pakfire_jail_get_pipe_to_read(jail, &ctx->socket);
1153
1154         // Timer
1155         const int timerfd = pakfire_jail_create_timer(jail);
1156
1157         // Logging
1158         const int log_INFO  = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
1159         const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
1160 #ifdef ENABLE_DEBUG
1161         const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
1162 #endif /* ENABLE_DEBUG */
1163
1164         // Make a list of all file descriptors we are interested in
1165         const struct pakfire_wait_fds {
1166                 const int fd;
1167                 const int events;
1168         } fds[] = {
1169                 // Timer
1170                 { timerfd, EPOLLIN },
1171
1172                 // Child Process
1173                 { ctx->pidfd, EPOLLIN },
1174
1175                 // Log Pipes
1176                 { log_INFO, EPOLLIN },
1177                 { log_ERROR, EPOLLIN },
1178 #ifdef ENABLE_DEBUG
1179                 { log_DEBUG, EPOLLIN },
1180 #endif /* ENABLE_DEBUG */
1181
1182                 // UNIX Domain Socket
1183                 { socket_recv, EPOLLIN },
1184
1185                 // Sentinel
1186                 { -1, 0 },
1187         };
1188
1189         // Setup epoll
1190         epollfd = epoll_create1(0);
1191         if (epollfd < 0) {
1192                 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
1193                 r = 1;
1194                 goto ERROR;
1195         }
1196
1197         // Turn file descriptors into non-blocking mode and add them to epoll()
1198         for (const struct pakfire_wait_fds* fd = fds; fd->events; fd++) {
1199                 // Skip fds which were not initialized
1200                 if (fd->fd < 0)
1201                         continue;
1202
1203                 // Add the FD to the event loop
1204                 r = pakfire_jail_epoll_add_fd(jail, epollfd, fd->fd, fd->events);
1205                 if (r)
1206                         goto ERROR;
1207         }
1208
1209         int ended = 0;
1210
1211         // Loop for as long as the process is alive
1212         while (!ended) {
1213                 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
1214                 if (num < 1) {
1215                         // Ignore if epoll_wait() has been interrupted
1216                         if (errno == EINTR)
1217                                 continue;
1218
1219                         ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
1220                         r = 1;
1221
1222                         goto ERROR;
1223                 }
1224
1225                 for (int i = 0; i < num; i++) {
1226                         int e  = events[i].events;
1227                         int fd = events[i].data.fd;
1228
1229                         // Handle PTY forwarding events
1230                         if (ctx->pty.master.fd == fd) {
1231                                 if (e & (EPOLLIN|EPOLLHUP))
1232                                         ctx->pty.master.flags |= PAKFIRE_JAIL_PTY_READY_TO_READ;
1233
1234                                 if (e & (EPOLLOUT|EPOLLHUP))
1235                                         ctx->pty.master.flags |= PAKFIRE_JAIL_PTY_READY_TO_WRITE;
1236
1237                                 // Perform the work
1238                                 r = pakfire_jail_forward_pty(jail, ctx);
1239                                 if (r) {
1240                                         CTX_ERROR(jail->ctx, "Failed forwarding the PTY: %s\n", strerror(-r));
1241                                         goto ERROR;
1242                                 }
1243
1244                         // Handle standard input
1245                         } else if (ctx->pty.stdin.fd == fd) {
1246                                 if (e & (EPOLLIN|EPOLLHUP))
1247                                         ctx->pty.stdin.flags |= PAKFIRE_JAIL_PTY_READY_TO_READ;
1248
1249                                 // Perform the work
1250                                 r = pakfire_jail_forward_pty(jail, ctx);
1251                                 if (r) {
1252                                         CTX_ERROR(jail->ctx, "Failed forwarding the PTY: %s\n", strerror(-r));
1253                                         goto ERROR;
1254                                 }
1255
1256                         // Handle standard output
1257                         } else if (ctx->pty.stdout.fd == fd) {
1258                                 if (e & (EPOLLOUT|EPOLLHUP))
1259                                         ctx->pty.stdout.flags |= PAKFIRE_JAIL_PTY_READY_TO_WRITE;
1260
1261                                 // Perform the work
1262                                 r = pakfire_jail_forward_pty(jail, ctx);
1263                                 if (r) {
1264                                         CTX_ERROR(jail->ctx, "Failed forwarding the PTY: %s\n", strerror(-r));
1265                                         goto ERROR;
1266                                 }
1267
1268                         // Handle any changes to the PIDFD
1269                         } else if (pidfd == fd) {
1270                                 if (e & EPOLLIN) {
1271                                         // Call waidid() and store the result
1272                                         r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
1273                                         if (r) {
1274                                                 ERROR(jail->pakfire, "waitid() failed: %m\n");
1275                                                 goto ERROR;
1276                                         }
1277
1278                                         // Mark that we have ended so that we will process the remaining
1279                                         // events from epoll() now, but won't restart the outer loop.
1280                                         ended = 1;
1281                                 }
1282
1283                         // Handle timer events
1284                         } else if (timerfd == fd) {
1285                                 if (e & EPOLLIN) {
1286                                         DEBUG(jail->pakfire, "Timer event received\n");
1287
1288                                         // Disarm the timer
1289                                         r = read(timerfd, garbage, sizeof(garbage));
1290                                         if (r < 1) {
1291                                                 ERROR(jail->pakfire, "Could not disarm timer: %m\n");
1292                                                 r = 1;
1293                                                 goto ERROR;
1294                                         }
1295
1296                                         // Terminate the process if it hasn't already ended
1297                                         if (!ended) {
1298                                                 DEBUG(jail->pakfire, "Terminating process...\n");
1299
1300                                                 // Send SIGTERM to the process
1301                                                 r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
1302                                                 if (r) {
1303                                                         ERROR(jail->pakfire, "Could not kill process: %m\n");
1304                                                         goto ERROR;
1305                                                 }
1306                                         }
1307                                 }
1308
1309                         // Handle socket messages
1310                         } else if (socket_recv == fd) {
1311                                 if (e & EPOLLIN) {
1312                                         // Receive the passed FD
1313                                         r = pakfire_jail_recv_fd(jail, socket_recv, &fd);
1314                                         if (r)
1315                                                 goto ERROR;
1316
1317                                         // Setup PTY forwarding
1318                                         if (ctx->pty.master.fd < 0) {
1319                                                 r = pakfire_jail_setup_pty_forwarding(jail, ctx, epollfd, fd);
1320                                                 if (r) {
1321                                                         CTX_ERROR(jail->ctx, "Failed setting up PTY forwarding: %s\n", strerror(-r));
1322                                                         goto ERROR;
1323                                                 }
1324                                         }
1325                                 }
1326
1327                         // Handle log INFO messages
1328                         } else if (log_INFO == fd) {
1329                                 if (e & EPOLLIN) {
1330                                         r = pakfire_jail_handle_log(jail, ctx, LOG_INFO, fd,
1331                                                 &ctx->buffers.log_INFO, pakfire_jail_default_log_callback, NULL);
1332                                         if (r)
1333                                                 goto ERROR;
1334                                 }
1335
1336                         // Handle log ERROR messages
1337                         } else if (log_ERROR == fd) {
1338                                 if (e & EPOLLIN) {
1339                                         r = pakfire_jail_handle_log(jail, ctx, LOG_ERR, fd,
1340                                                 &ctx->buffers.log_ERROR, pakfire_jail_default_log_callback, NULL);
1341                                         if (r)
1342                                                 goto ERROR;
1343                                 }
1344
1345 #ifdef ENABLE_DEBUG
1346                         // Handle log DEBUG messages
1347                         } else if (log_DEBUG == fd) {
1348                                 if (e & EPOLLIN) {
1349                                         r = pakfire_jail_handle_log(jail, ctx, LOG_DEBUG, fd,
1350                                                 &ctx->buffers.log_DEBUG, pakfire_jail_default_log_callback, NULL);
1351                                         if (r)
1352                                                 goto ERROR;
1353                                 }
1354 #endif /* ENABLE_DEBUG */
1355
1356                         // Log a message for anything else
1357                         } else {
1358                                 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
1359                                 continue;
1360                         }
1361
1362                         // Check if any file descriptors have been closed
1363                         if (e & EPOLLHUP) {
1364                                 // Remove the file descriptor
1365                                 r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
1366                                 if (r) {
1367                                         ERROR(jail->pakfire, "Could not remove closed file-descriptor %d: %m\n", fd);
1368                                         goto ERROR;
1369                                 }
1370                         }
1371                 }
1372         }
1373
1374 ERROR:
1375         if (epollfd >= 0)
1376                 close(epollfd);
1377         if (timerfd >= 0)
1378                 close(timerfd);
1379
1380         // Restore any changed terminal attributes
1381         if (ctx->pty.stdin.fd >= 0)
1382                 pakfire_jail_restore_attrs(jail, &ctx->pty.stdin);
1383         if (ctx->pty.stdout.fd >= 0)
1384                 pakfire_jail_restore_attrs(jail, &ctx->pty.stdout);
1385
1386         return r;
1387 }
1388
1389 int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data,
1390                 int priority, const char* line, size_t length) {
1391         char** output = (char**)data;
1392         int r;
1393
1394         // Append everything from stdout to a buffer
1395         if (output && priority == LOG_INFO) {
1396                 r = asprintf(output, "%s%.*s", (output && *output) ? *output : "", (int)length, line);
1397                 if (r < 0)
1398                         return -errno;
1399
1400                 return 0;
1401         }
1402
1403         // Send everything else to the default logger
1404         return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
1405 }
1406
1407 // Capabilities
1408
1409 // Logs all capabilities of the current process
1410 static int pakfire_jail_show_capabilities(struct pakfire_jail* jail) {
1411         cap_t caps = NULL;
1412         char* name = NULL;
1413         cap_flag_value_t value_e;
1414         cap_flag_value_t value_i;
1415         cap_flag_value_t value_p;
1416         int r;
1417
1418         // Fetch PID
1419         pid_t pid = getpid();
1420
1421         // Fetch all capabilities
1422         caps = cap_get_proc();
1423         if (!caps) {
1424                 ERROR(jail->pakfire, "Could not fetch capabilities: %m\n");
1425                 r = 1;
1426                 goto ERROR;
1427         }
1428
1429         DEBUG(jail->pakfire, "Capabilities of PID %d:\n", pid);
1430
1431         // Iterate over all capabilities
1432         for (unsigned int cap = 0; cap_valid(cap); cap++) {
1433                 name = cap_to_name(cap);
1434
1435                 // Fetch effective value
1436                 r = cap_get_flag(caps, cap, CAP_EFFECTIVE, &value_e);
1437                 if (r)
1438                         goto ERROR;
1439
1440                 // Fetch inheritable value
1441                 r = cap_get_flag(caps, cap, CAP_INHERITABLE, &value_i);
1442                 if (r)
1443                         goto ERROR;
1444
1445                 // Fetch permitted value
1446                 r = cap_get_flag(caps, cap, CAP_PERMITTED, &value_p);
1447                 if (r)
1448                         goto ERROR;
1449
1450                 DEBUG(jail->pakfire,
1451                         "  %-24s : %c%c%c\n",
1452                         name,
1453                         (value_e == CAP_SET) ? 'e' : '-',
1454                         (value_i == CAP_SET) ? 'i' : '-',
1455                         (value_p == CAP_SET) ? 'p' : '-'
1456                 );
1457
1458                 // Free name
1459                 cap_free(name);
1460                 name = NULL;
1461         }
1462
1463         // Success
1464         r = 0;
1465
1466 ERROR:
1467         if (name)
1468                 cap_free(name);
1469         if (caps)
1470                 cap_free(caps);
1471
1472         return r;
1473 }
1474
1475 static int pakfire_jail_set_capabilities(struct pakfire_jail* jail) {
1476         cap_t caps = NULL;
1477         char* name = NULL;
1478         int r;
1479
1480         // Fetch capabilities
1481         caps = cap_get_proc();
1482         if (!caps) {
1483                 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
1484                 r = 1;
1485                 goto ERROR;
1486         }
1487
1488         // Walk through all capabilities
1489         for (cap_value_t cap = 0; cap_valid(cap); cap++) {
1490                 cap_value_t _caps[] = { cap };
1491
1492                 // Fetch the name of the capability
1493                 name = cap_to_name(cap);
1494
1495                 r = cap_set_flag(caps, CAP_EFFECTIVE, 1, _caps, CAP_SET);
1496                 if (r) {
1497                         ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1498                         goto ERROR;
1499                 }
1500
1501                 r = cap_set_flag(caps, CAP_INHERITABLE, 1, _caps, CAP_SET);
1502                 if (r) {
1503                         ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1504                         goto ERROR;
1505                 }
1506
1507                 r = cap_set_flag(caps, CAP_PERMITTED, 1, _caps, CAP_SET);
1508                 if (r) {
1509                         ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1510                         goto ERROR;
1511                 }
1512
1513                 // Free name
1514                 cap_free(name);
1515                 name = NULL;
1516         }
1517
1518         // Restore all capabilities
1519         r = cap_set_proc(caps);
1520         if (r) {
1521                 ERROR(jail->pakfire, "Restoring capabilities failed: %m\n");
1522                 goto ERROR;
1523         }
1524
1525         // Add all capabilities to the ambient set
1526         for (unsigned int cap = 0; cap_valid(cap); cap++) {
1527                 name = cap_to_name(cap);
1528
1529                 // Raise the capability
1530                 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0);
1531                 if (r) {
1532                         ERROR(jail->pakfire, "Could not set ambient capability %s: %m\n", name);
1533                         goto ERROR;
1534                 }
1535
1536                 // Free name
1537                 cap_free(name);
1538                 name = NULL;
1539         }
1540
1541         // Success
1542         r = 0;
1543
1544 ERROR:
1545         if (name)
1546                 cap_free(name);
1547         if (caps)
1548                 cap_free(caps);
1549
1550         return r;
1551 }
1552
1553 // Syscall Filter
1554
1555 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
1556         const int syscalls[] = {
1557                 // The kernel's keyring isn't namespaced
1558                 SCMP_SYS(keyctl),
1559                 SCMP_SYS(add_key),
1560                 SCMP_SYS(request_key),
1561
1562                 // Disable userfaultfd
1563                 SCMP_SYS(userfaultfd),
1564
1565                 // Disable perf which could leak a lot of information about the host
1566                 SCMP_SYS(perf_event_open),
1567
1568                 0,
1569         };
1570         int r = 1;
1571
1572         DEBUG(jail->pakfire, "Applying syscall filter...\n");
1573
1574         // Setup a syscall filter which allows everything by default
1575         scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
1576         if (!ctx) {
1577                 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
1578                 goto ERROR;
1579         }
1580
1581         // All all syscalls
1582         for (const int* syscall = syscalls; *syscall; syscall++) {
1583                 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
1584                 if (r) {
1585                         ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
1586                         goto ERROR;
1587                 }
1588         }
1589
1590         // Load syscall filter into the kernel
1591         r = seccomp_load(ctx);
1592         if (r) {
1593                 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
1594                 goto ERROR;
1595         }
1596
1597 ERROR:
1598         if (ctx)
1599                 seccomp_release(ctx);
1600
1601         return r;
1602 }
1603
1604 // Mountpoints
1605
1606 PAKFIRE_EXPORT int pakfire_jail_bind(struct pakfire_jail* jail,
1607                 const char* source, const char* target, int flags) {
1608         struct pakfire_jail_mountpoint* mp = NULL;
1609         int r;
1610
1611         // Check if there is any space left
1612         if (jail->num_mountpoints >= MAX_MOUNTPOINTS) {
1613                 errno = ENOSPC;
1614                 return 1;
1615         }
1616
1617         // Check for valid inputs
1618         if (!source || !target) {
1619                 errno = EINVAL;
1620                 return 1;
1621         }
1622
1623         // Select the next free slot
1624         mp = &jail->mountpoints[jail->num_mountpoints];
1625
1626         // Copy source
1627         r = pakfire_string_set(mp->source, source);
1628         if (r) {
1629                 ERROR(jail->pakfire, "Could not copy source: %m\n");
1630                 return r;
1631         }
1632
1633         // Copy target
1634         r = pakfire_string_set(mp->target, target);
1635         if (r) {
1636                 ERROR(jail->pakfire, "Could not copy target: %m\n");
1637                 return r;
1638         }
1639
1640         // Copy flags
1641         mp->flags = flags;
1642
1643         // Increment counter
1644         jail->num_mountpoints++;
1645
1646         return 0;
1647 }
1648
1649 static int pakfire_jail_mount_networking(struct pakfire_jail* jail) {
1650         int r;
1651
1652         const char* paths[] = {
1653                 "/etc/hosts",
1654                 "/etc/resolv.conf",
1655                 NULL,
1656         };
1657
1658         // Bind-mount all paths read-only
1659         for (const char** path = paths; *path; path++) {
1660                 r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
1661                 if (r) {
1662                         switch (errno) {
1663                                 // Ignore if we don't have permission
1664                                 case EPERM:
1665                                         continue;
1666
1667                                 default:
1668                                         break;
1669                         }
1670                         return r;
1671                 }
1672         }
1673
1674         return 0;
1675 }
1676
1677 /*
1678         Mounts everything that we require in the new namespace
1679 */
1680 static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1681         struct pakfire_jail_mountpoint* mp = NULL;
1682         int flags = 0;
1683         int r;
1684
1685         // Enable loop devices
1686         if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_LOOP_DEVICES))
1687                 flags |= PAKFIRE_MOUNT_LOOP_DEVICES;
1688
1689         // Mount all default stuff
1690         r = pakfire_mount_all(jail->pakfire, PAKFIRE_MNTNS_OUTER, flags);
1691         if (r)
1692                 return r;
1693
1694         // Populate /dev
1695         r = pakfire_populate_dev(jail->pakfire, flags);
1696         if (r)
1697                 return r;
1698
1699         // Mount the interpreter (if needed)
1700         r = pakfire_mount_interpreter(jail->pakfire);
1701         if (r)
1702                 return r;
1703
1704         // Mount networking stuff
1705         if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1706                 r = pakfire_jail_mount_networking(jail);
1707                 if (r)
1708                         return r;
1709         }
1710
1711         // Mount all custom stuff
1712         for (unsigned int i = 0; i < jail->num_mountpoints; i++) {
1713                 // Fetch mountpoint
1714                 mp = &jail->mountpoints[i];
1715
1716                 // Mount it
1717                 r = pakfire_bind(jail->pakfire, mp->source, mp->target, mp->flags);
1718                 if (r)
1719                         return r;
1720         }
1721
1722         return 0;
1723 }
1724
1725 // Networking
1726
1727 static int pakfire_jail_setup_loopback(struct pakfire_jail* jail) {
1728         struct nl_sock* nl = NULL;
1729         struct nl_cache* cache = NULL;
1730         struct rtnl_link* link = NULL;
1731         struct rtnl_link* change = NULL;
1732         int r;
1733
1734         DEBUG(jail->pakfire, "Setting up loopback...\n");
1735
1736         // Allocate a netlink socket
1737         nl = nl_socket_alloc();
1738         if (!nl) {
1739                 ERROR(jail->pakfire, "Could not allocate a netlink socket: %m\n");
1740                 r = 1;
1741                 goto ERROR;
1742         }
1743
1744         // Connect the socket
1745         r = nl_connect(nl, NETLINK_ROUTE);
1746         if (r) {
1747                 ERROR(jail->pakfire, "Could not connect netlink socket: %s\n", nl_geterror(r));
1748                 goto ERROR;
1749         }
1750
1751         // Allocate the netlink cache
1752         r = rtnl_link_alloc_cache(nl, AF_UNSPEC, &cache);
1753         if (r < 0) {
1754                 ERROR(jail->pakfire, "Unable to allocate netlink cache: %s\n", nl_geterror(r));
1755                 goto ERROR;
1756         }
1757
1758         // Fetch loopback interface
1759         link = rtnl_link_get_by_name(cache, "lo");
1760         if (!link) {
1761                 ERROR(jail->pakfire, "Could not find lo interface. Ignoring.\n");
1762                 r = 0;
1763                 goto ERROR;
1764         }
1765
1766         // Allocate a new link
1767         change = rtnl_link_alloc();
1768         if (!change) {
1769                 ERROR(jail->pakfire, "Could not allocate change link\n");
1770                 r = 1;
1771                 goto ERROR;
1772         }
1773
1774         // Set the link to UP
1775         rtnl_link_set_flags(change, IFF_UP);
1776
1777         // Apply any changes
1778         r = rtnl_link_change(nl, link, change, 0);
1779         if (r) {
1780                 ERROR(jail->pakfire, "Unable to activate loopback: %s\n", nl_geterror(r));
1781                 goto ERROR;
1782         }
1783
1784         // Success
1785         r = 0;
1786
1787 ERROR:
1788         if (nl)
1789                 nl_socket_free(nl);
1790
1791         return r;
1792 }
1793
1794 // UID/GID Mapping
1795
1796 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
1797         char path[PATH_MAX];
1798         int r;
1799
1800         // Skip mapping anything when running on /
1801         if (pakfire_on_root(jail->pakfire))
1802                 return 0;
1803
1804         // Make path
1805         r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
1806         if (r)
1807                 return r;
1808
1809         // Fetch UID
1810         const uid_t uid = pakfire_uid(jail->pakfire);
1811
1812         // Fetch SUBUID
1813         const struct pakfire_subid* subuid = pakfire_subuid(jail->pakfire);
1814         if (!subuid)
1815                 return 1;
1816
1817         /* When running as root, we will map the entire range.
1818
1819            When running as a non-privileged user, we will map the root user inside the jail
1820            to the user's UID outside of the jail, and we will map the rest starting from one.
1821         */
1822
1823         // Running as root
1824         if (uid == 0) {
1825                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1826                         "0 %lu %lu\n", subuid->id, subuid->length);
1827         } else {
1828                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1829                         "0 %lu 1\n1 %lu %lu\n", uid, subuid->id, subuid->length);
1830         }
1831
1832         if (r) {
1833                 ERROR(jail->pakfire, "Could not map UIDs: %m\n");
1834                 return r;
1835         }
1836
1837         return r;
1838 }
1839
1840 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
1841         char path[PATH_MAX];
1842         int r;
1843
1844         // Skip mapping anything when running on /
1845         if (pakfire_on_root(jail->pakfire))
1846                 return 0;
1847
1848         // Fetch GID
1849         const gid_t gid = pakfire_gid(jail->pakfire);
1850
1851         // Fetch SUBGID
1852         const struct pakfire_subid* subgid = pakfire_subgid(jail->pakfire);
1853         if (!subgid)
1854                 return 1;
1855
1856         // Make path
1857         r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
1858         if (r)
1859                 return r;
1860
1861         // Running as root
1862         if (gid == 0) {
1863                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1864                         "0 %lu %lu\n", subgid->id, subgid->length);
1865         } else {
1866                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1867                         "0 %lu 1\n1 %lu %lu\n", gid, subgid->id, subgid->length);
1868         }
1869
1870         if (r) {
1871                 ERROR(jail->pakfire, "Could not map GIDs: %m\n");
1872                 return r;
1873         }
1874
1875         return r;
1876 }
1877
1878 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
1879         char path[PATH_MAX];
1880         int r;
1881
1882         // Make path
1883         r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
1884         if (r)
1885                 return r;
1886
1887         r = pakfire_file_write(jail->pakfire, path, 0, 0, 0, "deny\n");
1888         if (r) {
1889                 CTX_ERROR(jail->ctx, "Could not set setgroups to deny: %s\n", strerror(errno));
1890                 r = -errno;
1891         }
1892
1893         return r;
1894 }
1895
1896 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1897         const uint64_t val = 1;
1898         int r = 0;
1899
1900         DEBUG(jail->pakfire, "Sending signal...\n");
1901
1902         // Write to the file descriptor
1903         r = eventfd_write(fd, val);
1904         if (r < 0) {
1905                 ERROR(jail->pakfire, "Could not send signal: %s\n", strerror(errno));
1906                 r = -errno;
1907         }
1908
1909         // Close the file descriptor
1910         close(fd);
1911
1912         return r;
1913 }
1914
1915 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1916         uint64_t val = 0;
1917         int r = 0;
1918
1919         DEBUG(jail->pakfire, "Waiting for signal...\n");
1920
1921         r = eventfd_read(fd, &val);
1922         if (r < 0) {
1923                 ERROR(jail->pakfire, "Error waiting for signal: %s\n", strerror(errno));
1924                 r = -errno;
1925         }
1926
1927         // Close the file descriptor
1928         close(fd);
1929
1930         return r;
1931 }
1932
1933 /*
1934         Performs the initialisation that needs to happen in the parent part
1935 */
1936 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1937         int r;
1938
1939         // Setup UID mapping
1940         r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
1941         if (r)
1942                 return r;
1943
1944         // Write "deny" to /proc/PID/setgroups
1945         r = pakfire_jail_setgroups(jail, ctx->pid);
1946         if (r)
1947                 return r;
1948
1949         // Setup GID mapping
1950         r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
1951         if (r)
1952                 return r;
1953
1954         // Parent has finished initialisation
1955         DEBUG(jail->pakfire, "Parent has finished initialization\n");
1956
1957         // Send signal to client
1958         r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1959         if (r)
1960                 return r;
1961
1962         return 0;
1963 }
1964
1965 static int pakfire_jail_switch_root(struct pakfire_jail* jail, const char* root) {
1966         int r;
1967
1968         // Change to the new root
1969         r = chdir(root);
1970         if (r) {
1971                 ERROR(jail->pakfire, "chdir(%s) failed: %m\n", root);
1972                 return r;
1973         }
1974
1975         // Switch Root!
1976         r = pivot_root(".", ".");
1977         if (r) {
1978                 ERROR(jail->pakfire, "Failed changing into the new root directory %s: %m\n", root);
1979                 return r;
1980         }
1981
1982         // Umount the old root
1983         r = umount2(".", MNT_DETACH);
1984         if (r) {
1985                 ERROR(jail->pakfire, "Could not umount the old root filesystem: %m\n");
1986                 return r;
1987         }
1988
1989         return 0;
1990 }
1991
1992 static int pakfire_jail_open_pty(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1993         int r;
1994
1995         // Allocate a new PTY
1996         ctx->pty.master.fd = posix_openpt(O_RDWR|O_NONBLOCK|O_NOCTTY|O_CLOEXEC);
1997         if (ctx->pty.master.fd < 0)
1998                 return -errno;
1999
2000         // Fetch the path
2001         r = ptsname_r(ctx->pty.master.fd, ctx->pty.console, sizeof(ctx->pty.console));
2002         if (r)
2003                 return -r;
2004
2005         CTX_DEBUG(jail->ctx, "Allocated console at %s (%d)\n", ctx->pty.console, ctx->pty.master.fd);
2006
2007         // Unlock the master device
2008         r = unlockpt(ctx->pty.master.fd);
2009         if (r) {
2010                 CTX_ERROR(jail->ctx, "Could not unlock the PTY: %s\n", strerror(errno));
2011                 return -errno;
2012         }
2013
2014         // Create a symlink
2015         r = pakfire_symlink(jail->ctx, ctx->pty.console, "/dev/console");
2016         if (r)
2017                 return r;
2018
2019         return r;
2020 }
2021
2022 static int pakfire_jail_setup_terminal(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
2023         int fd;
2024         int r;
2025
2026         // Open a new terminal
2027         fd = open("/dev/console", O_RDWR|O_NOCTTY);
2028         if (fd < 0) {
2029                 CTX_ERROR(jail->ctx, "Failed to open a new terminal: %s\n", strerror(errno));
2030                 return -errno;
2031         }
2032
2033         CTX_DEBUG(jail->ctx, "Opened a new terminal %d\n", fd);
2034
2035         // Connect the new terminal to standard input
2036         r = dup2(fd, STDIN_FILENO);
2037         if (r < 0) {
2038                 CTX_ERROR(jail->ctx, "Failed to open standard input: %s\n", strerror(errno));
2039                 return -errno;
2040         }
2041
2042         // Connect the new terminal to standard output
2043         r = dup2(fd, STDOUT_FILENO);
2044         if (r < 0) {
2045                 CTX_ERROR(jail->ctx, "Failed to open standard output: %s\n", strerror(errno));
2046                 return -errno;
2047         }
2048
2049         // Connect the new terminal to standard error
2050         r = dup2(fd, STDERR_FILENO);
2051         if (r < 0) {
2052                 CTX_ERROR(jail->ctx, "Failed to open standard error: %s\n", strerror(errno));
2053                 return -errno;
2054         }
2055
2056         return 0;
2057 }
2058
2059 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
2060                 const char* argv[]) {
2061         int r;
2062
2063         // Redirect any logging to our log pipe
2064         pakfire_ctx_set_log_callback(jail->ctx, pakfire_jail_log_redirect, &ctx->pipes);
2065
2066         // Fetch my own PID
2067         pid_t pid = getpid();
2068
2069         DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
2070
2071         // Wait for the parent to finish initialization
2072         r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
2073         if (r)
2074                 return r;
2075
2076         // Die with parent
2077         r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
2078         if (r) {
2079                 ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
2080                 return 126;
2081         }
2082
2083         // Make this process dumpable
2084         r = prctl (PR_SET_DUMPABLE, 1, 0, 0, 0);
2085         if (r) {
2086                 ERROR(jail->pakfire, "Could not make the process dumpable: %m\n");
2087                 return 126;
2088         }
2089
2090         // Don't drop any capabilities on setuid()
2091         r = prctl(PR_SET_KEEPCAPS, 1);
2092         if (r) {
2093                 ERROR(jail->pakfire, "Could not set PR_SET_KEEPCAPS: %m\n");
2094                 return 126;
2095         }
2096
2097         // Fetch UID/GID
2098         uid_t uid = getuid();
2099         gid_t gid = getgid();
2100
2101         // Fetch EUID/EGID
2102         uid_t euid = geteuid();
2103         gid_t egid = getegid();
2104
2105         DEBUG(jail->pakfire, "  UID: %u (effective %u)\n", uid, euid);
2106         DEBUG(jail->pakfire, "  GID: %u (effective %u)\n", gid, egid);
2107
2108         // Log all mountpoints
2109         pakfire_mount_list(jail->ctx);
2110
2111         // Fail if we are not PID 1
2112         if (pid != 1) {
2113                 CTX_ERROR(jail->ctx, "Child process is not PID 1\n");
2114                 return 126;
2115         }
2116
2117         // Fail if we are not running as root
2118         if (uid || gid || euid || egid) {
2119                 ERROR(jail->pakfire, "Child process is not running as root\n");
2120                 return 126;
2121         }
2122
2123         const int socket_send = pakfire_jail_get_pipe_to_write(jail, &ctx->socket);
2124
2125         // Mount all default stuff
2126         r = pakfire_mount_all(jail->pakfire, PAKFIRE_MNTNS_INNER, 0);
2127         if (r)
2128                 return 126;
2129
2130         const char* root = pakfire_get_path(jail->pakfire);
2131         const char* arch = pakfire_get_effective_arch(jail->pakfire);
2132
2133         // Change mount propagation to slave to receive anything from the parent namespace
2134         r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SLAVE);
2135         if (r)
2136                 return r;
2137
2138         // Make root a mountpoint in the new mount namespace
2139         r = pakfire_mount_make_mounpoint(jail->pakfire, root);
2140         if (r)
2141                 return r;
2142
2143         // Change mount propagation to private
2144         r = pakfire_mount_change_propagation(jail->ctx, root, MS_PRIVATE);
2145         if (r)
2146                 return r;
2147
2148         // Change root (unless root is /)
2149         if (!pakfire_on_root(jail->pakfire)) {
2150                 // Mount everything
2151                 r = pakfire_jail_mount(jail, ctx);
2152                 if (r)
2153                         return r;
2154
2155                 // chroot()
2156                 r = pakfire_jail_switch_root(jail, root);
2157                 if (r)
2158                         return r;
2159         }
2160
2161         // Set personality
2162         unsigned long persona = pakfire_arch_personality(arch);
2163         if (persona) {
2164                 r = personality(persona);
2165                 if (r < 0) {
2166                         ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
2167                         return 1;
2168                 }
2169         }
2170
2171         // Setup networking
2172         if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
2173                 r = pakfire_jail_setup_loopback(jail);
2174                 if (r)
2175                         return 1;
2176         }
2177
2178         // Set nice level
2179         if (jail->nice) {
2180                 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
2181
2182                 r = setpriority(PRIO_PROCESS, pid, jail->nice);
2183                 if (r) {
2184                         ERROR(jail->pakfire, "Could not set nice level: %m\n");
2185                         return 1;
2186                 }
2187         }
2188
2189         // Create a new session
2190         r = setsid();
2191         if (r < 0) {
2192                 CTX_ERROR(jail->ctx, "Could not create a new session: %s\n", strerror(errno));
2193                 return r;
2194         }
2195
2196         // Allocate a new PTY
2197         r = pakfire_jail_open_pty(jail, ctx);
2198         if (r) {
2199                 CTX_ERROR(jail->ctx, "Could not allocate a new PTY: %s\n", strerror(-r));
2200                 return r;
2201         }
2202
2203         // Send the PTY master to the parent process
2204         r = pakfire_jail_send_fd(jail, socket_send, ctx->pty.master.fd);
2205         if (r) {
2206                 CTX_ERROR(jail->ctx, "Failed sending the PTY master to the parent: %s\n", strerror(-r));
2207                 return r;
2208         }
2209
2210         // Setup the terminal
2211         r = pakfire_jail_setup_terminal(jail, ctx);
2212         if (r)
2213                 return r;
2214
2215         // Close the master of the PTY
2216         close(ctx->pty.master.fd);
2217         ctx->pty.master.fd = -1;
2218
2219         // Close the socket
2220         close(socket_send);
2221
2222         // Close other end of log pipes
2223         close(ctx->pipes.log_INFO[0]);
2224         close(ctx->pipes.log_ERROR[0]);
2225 #ifdef ENABLE_DEBUG
2226         close(ctx->pipes.log_DEBUG[0]);
2227 #endif /* ENABLE_DEBUG */
2228
2229         // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
2230         r = pakfire_rlimit_reset_nofile(jail->pakfire);
2231         if (r)
2232                 return r;
2233
2234         // Set capabilities
2235         r = pakfire_jail_set_capabilities(jail);
2236         if (r)
2237                 return r;
2238
2239         // Show capabilities
2240         r = pakfire_jail_show_capabilities(jail);
2241         if (r)
2242                 return r;
2243
2244         // Filter syscalls
2245         r = pakfire_jail_limit_syscalls(jail);
2246         if (r)
2247                 return r;
2248
2249         DEBUG(jail->pakfire, "Child process initialization done\n");
2250         DEBUG(jail->pakfire, "Launching command:\n");
2251
2252         // Log argv
2253         for (unsigned int i = 0; argv[i]; i++)
2254                 DEBUG(jail->pakfire, "  argv[%u] = %s\n", i, argv[i]);
2255
2256         // exec() command
2257         r = execvpe(argv[0], (char**)argv, jail->env);
2258         if (r < 0) {
2259                 // Translate errno into regular exit code
2260                 switch (errno) {
2261                         case ENOENT:
2262                                 // Ignore if the command doesn't exist
2263                                 if (ctx->flags & PAKFIRE_JAIL_NOENT_OK)
2264                                         r = 0;
2265                                 else
2266                                         r = 127;
2267
2268                                 break;
2269
2270                         default:
2271                                 r = 1;
2272                 }
2273
2274                 ERROR(jail->pakfire, "Could not execve(%s): %m\n", argv[0]);
2275         }
2276
2277         // We should not get here
2278         return r;
2279 }
2280
2281 // Run a command in the jail
2282 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
2283                 pakfire_jail_communicate_in  communicate_in,
2284                 pakfire_jail_communicate_out communicate_out,
2285                 void* data, int flags) {
2286         int exit = -1;
2287         int r;
2288
2289         // Check if argv is valid
2290         if (!argv || !argv[0]) {
2291                 errno = EINVAL;
2292                 return -1;
2293         }
2294
2295         // Initialize context for this call
2296         struct pakfire_jail_exec ctx = {
2297                 .flags = flags,
2298
2299                 .socket = { -1, -1 },
2300
2301                 .pipes = {
2302                         .log_INFO  = { -1, -1 },
2303                         .log_ERROR = { -1, -1 },
2304 #ifdef ENABLE_DEBUG
2305                         .log_DEBUG = { -1, -1 },
2306 #endif /* ENABLE_DEBUG */
2307                 },
2308
2309                 .communicate = {
2310                         .in   = communicate_in,
2311                         .out  = communicate_out,
2312                         .data = data,
2313                 },
2314
2315                 .pidfd = -1,
2316
2317                 // PTY
2318                 .pty = {
2319                         .master = {
2320                                 .fd = -1,
2321                         },
2322                         .stdin = {
2323                                 .fd = -1,
2324                         },
2325                         .stdout = {
2326                                 .fd = -1,
2327                         },
2328                 },
2329         };
2330
2331         DEBUG(jail->pakfire, "Executing jail...\n");
2332
2333         // Enable networking in interactive mode
2334         if (ctx.flags & PAKFIRE_JAIL_PTY_FORWARDING)
2335                 ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
2336
2337         /*
2338                 Setup a file descriptor which can be used to notify the client that the parent
2339                 has completed configuration.
2340         */
2341         ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
2342         if (ctx.completed_fd < 0) {
2343                 ERROR(jail->pakfire, "eventfd() failed: %m\n");
2344                 return -1;
2345         }
2346
2347         // Create a UNIX domain socket
2348         r = socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, ctx.socket);
2349         if (r < 0) {
2350                 CTX_ERROR(jail->ctx, "Could not create UNIX socket: %s\n", strerror(errno));
2351                 r = -errno;
2352                 goto ERROR;
2353         }
2354
2355         // Setup pipes for logging
2356         // INFO
2357         r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
2358         if (r)
2359                 goto ERROR;
2360
2361         // ERROR
2362         r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
2363         if (r)
2364                 goto ERROR;
2365
2366 #ifdef ENABLE_DEBUG
2367         // DEBUG
2368         r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
2369         if (r)
2370                 goto ERROR;
2371 #endif /* ENABLE_DEBUG */
2372
2373         // Configure child process
2374         struct clone_args args = {
2375                 .flags =
2376                         CLONE_NEWCGROUP |
2377                         CLONE_NEWIPC |
2378                         CLONE_NEWNS |
2379                         CLONE_NEWPID |
2380                         CLONE_NEWTIME |
2381                         CLONE_NEWUSER |
2382                         CLONE_NEWUTS |
2383                         CLONE_PIDFD,
2384                 .exit_signal = SIGCHLD,
2385                 .pidfd = (long long unsigned int)&ctx.pidfd,
2386         };
2387
2388         // Launch the process in a cgroup that is a leaf of the configured cgroup
2389         if (jail->cgroup) {
2390                 args.flags |= CLONE_INTO_CGROUP;
2391
2392                 // Fetch our UUID
2393                 const char* uuid = pakfire_jail_uuid(jail);
2394
2395                 // Create a temporary cgroup
2396                 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, uuid, 0);
2397                 if (r) {
2398                         ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
2399                         goto ERROR;
2400                 }
2401
2402                 // Clone into this cgroup
2403                 args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
2404         }
2405
2406         // Setup networking
2407         if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
2408                 args.flags |= CLONE_NEWNET;
2409         }
2410
2411         // Fork this process
2412         ctx.pid = clone3(&args, sizeof(args));
2413         if (ctx.pid < 0) {
2414                 ERROR(jail->pakfire, "Could not clone: %m\n");
2415                 return -1;
2416
2417         // Child process
2418         } else if (ctx.pid == 0) {
2419                 r = pakfire_jail_child(jail, &ctx, argv);
2420                 _exit(r);
2421         }
2422
2423         // Parent process
2424         r = pakfire_jail_parent(jail, &ctx);
2425         if (r)
2426                 goto ERROR;
2427
2428         DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
2429
2430         // Read output of the child process
2431         r = pakfire_jail_wait(jail, &ctx);
2432         if (r)
2433                 goto ERROR;
2434
2435         // Handle exit status
2436         switch (ctx.status.si_code) {
2437                 case CLD_EXITED:
2438                         DEBUG(jail->pakfire, "The child process exited with code %d\n",
2439                                 ctx.status.si_status);
2440
2441                         // Pass exit code
2442                         exit = ctx.status.si_status;
2443                         break;
2444
2445                 case CLD_KILLED:
2446                         ERROR(jail->pakfire, "The child process was killed\n");
2447                         exit = 139;
2448                         break;
2449
2450                 case CLD_DUMPED:
2451                         ERROR(jail->pakfire, "The child process terminated abnormally\n");
2452                         break;
2453
2454                 // Log anything else
2455                 default:
2456                         ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
2457                         break;
2458         }
2459
2460 ERROR:
2461         // Destroy the temporary cgroup (if any)
2462         if (ctx.cgroup) {
2463                 // Read cgroup stats
2464                 pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
2465                 pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
2466                 pakfire_cgroup_destroy(ctx.cgroup);
2467                 pakfire_cgroup_unref(ctx.cgroup);
2468         }
2469
2470         // Close any file descriptors
2471         if (ctx.pidfd >= 0)
2472                 close(ctx.pidfd);
2473         if (ctx.pty.master.fd >= 0)
2474                 close(ctx.pty.master.fd);
2475         pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
2476         pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
2477 #ifdef ENABLE_DEBUG
2478         pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
2479 #endif /* ENABLE_DEBUG */
2480         pakfire_jail_close_pipe(jail, ctx.socket);
2481
2482         return exit;
2483 }
2484
2485 PAKFIRE_EXPORT int pakfire_jail_exec(
2486                 struct pakfire_jail* jail,
2487                 const char* argv[],
2488                 pakfire_jail_communicate_in  callback_in,
2489                 pakfire_jail_communicate_out callback_out,
2490                 void* data, int flags) {
2491         return __pakfire_jail_exec(jail, argv, callback_in, callback_out, data, flags);
2492 }
2493
2494 static int pakfire_jail_exec_interactive(
2495                 struct pakfire_jail* jail, const char* argv[], int flags) {
2496         int r;
2497
2498         flags |= PAKFIRE_JAIL_PTY_FORWARDING;
2499
2500         // Setup interactive stuff
2501         r = pakfire_jail_setup_interactive_env(jail);
2502         if (r)
2503                 return r;
2504
2505         return __pakfire_jail_exec(jail, argv, NULL, NULL, NULL, flags);
2506 }
2507
2508 int pakfire_jail_exec_script(struct pakfire_jail* jail,
2509                 const char* script,
2510                 const size_t size,
2511                 const char* args[],
2512                 pakfire_jail_communicate_in  callback_in,
2513                 pakfire_jail_communicate_out callback_out,
2514                 void* data) {
2515         char path[PATH_MAX];
2516         const char** argv = NULL;
2517         FILE* f = NULL;
2518         int r;
2519
2520         const char* root = pakfire_get_path(jail->pakfire);
2521
2522         // Write the scriptlet to disk
2523         r = pakfire_path_append(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
2524         if (r)
2525                 goto ERROR;
2526
2527         // Create a temporary file
2528         f = pakfire_mktemp(path, 0700);
2529         if (!f) {
2530                 ERROR(jail->pakfire, "Could not create temporary file: %m\n");
2531                 goto ERROR;
2532         }
2533
2534         DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
2535
2536         // Write data
2537         r = fprintf(f, "%s", script);
2538         if (r < 0) {
2539                 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
2540                 goto ERROR;
2541         }
2542
2543         // Close file
2544         r = fclose(f);
2545         if (r) {
2546                 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
2547                 goto ERROR;
2548         }
2549
2550         f = NULL;
2551
2552         // Count how many arguments were passed
2553         unsigned int argc = 1;
2554         if (args) {
2555                 for (const char** arg = args; *arg; arg++)
2556                         argc++;
2557         }
2558
2559         argv = calloc(argc + 1, sizeof(*argv));
2560         if (!argv) {
2561                 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
2562                 goto ERROR;
2563         }
2564
2565         // Set command
2566         argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
2567
2568         // Copy args
2569         for (unsigned int i = 1; i < argc; i++)
2570                 argv[i] = args[i-1];
2571
2572         // Run the script
2573         r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data, 0);
2574
2575 ERROR:
2576         if (argv)
2577                 free(argv);
2578         if (f)
2579                 fclose(f);
2580
2581         // Remove script from disk
2582         if (*path)
2583                 unlink(path);
2584
2585         return r;
2586 }
2587
2588 /*
2589         A convenience function that creates a new jail, runs the given command and destroys
2590         the jail again.
2591 */
2592 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
2593         struct pakfire_jail* jail = NULL;
2594         int r;
2595
2596         // Create a new jail
2597         r = pakfire_jail_create(&jail, pakfire);
2598         if (r)
2599                 goto ERROR;
2600
2601         // Execute the command
2602         r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output, 0);
2603
2604 ERROR:
2605         if (jail)
2606                 pakfire_jail_unref(jail);
2607
2608         return r;
2609 }
2610
2611 int pakfire_jail_run_script(struct pakfire* pakfire,
2612                 const char* script, const size_t length, const char* argv[], int flags) {
2613         struct pakfire_jail* jail = NULL;
2614         int r;
2615
2616         // Create a new jail
2617         r = pakfire_jail_create(&jail, pakfire);
2618         if (r)
2619                 goto ERROR;
2620
2621         // Execute the command
2622         r = pakfire_jail_exec_script(jail, script, length, argv, NULL, NULL, NULL);
2623
2624 ERROR:
2625         if (jail)
2626                 pakfire_jail_unref(jail);
2627
2628         return r;
2629 }
2630
2631 int pakfire_jail_shell(struct pakfire_jail* jail) {
2632         int r;
2633
2634         const char* argv[] = {
2635                 "/bin/bash", "--login", NULL,
2636         };
2637
2638         // Execute /bin/bash
2639         r = pakfire_jail_exec_interactive(jail, argv, 0);
2640
2641         // Raise any errors
2642         if (r < 0)
2643                 return r;
2644
2645         // Ignore any return codes from the shell
2646         return 0;
2647 }
2648
2649 static int pakfire_jail_run_if_possible(struct pakfire* pakfire, const char** argv) {
2650         char path[PATH_MAX];
2651         int r;
2652
2653         r = pakfire_path(pakfire, path, "%s", *argv);
2654         if (r)
2655                 return r;
2656
2657         // Check if the file is executable
2658         r = access(path, X_OK);
2659         if (r) {
2660                 DEBUG(pakfire, "%s is not executable. Skipping...\n", *argv);
2661                 return 0;
2662         }
2663
2664         return pakfire_jail_run(pakfire, argv, 0, NULL);
2665 }
2666
2667 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
2668         const char* argv[] = {
2669                 "/sbin/ldconfig",
2670                 NULL,
2671         };
2672
2673         return pakfire_jail_run_if_possible(pakfire, argv);
2674 }
2675
2676 int pakfire_jail_run_systemd_tmpfiles(struct pakfire* pakfire) {
2677         const char* argv[] = {
2678                 "/usr/bin/systemd-tmpfiles",
2679                 "--create",
2680                 NULL,
2681         };
2682
2683         return pakfire_jail_run_if_possible(pakfire, argv);
2684 }