src/libpakfire/jail.c

   1 /*#############################################################################
   2 #                                                                             #
   3 # Pakfire - The IPFire package management system                              #
   4 # Copyright (C) 2022 Pakfire development team                                 #
   5 #                                                                             #
   6 # This program is free software: you can redistribute it and/or modify        #
   7 # it under the terms of the GNU General Public License as published by        #
   8 # the Free Software Foundation, either version 3 of the License, or           #
   9 # (at your option) any later version.                                         #
  10 #                                                                             #
  11 # This program is distributed in the hope that it will be useful,             #
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of              #
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
  14 # GNU General Public License for more details.                                #
  15 #                                                                             #
  16 # You should have received a copy of the GNU General Public License           #
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
  18 #                                                                             #
  19 #############################################################################*/
  20
  21 #include <errno.h>
  22 #include <fcntl.h>
  23 #include <linux/capability.h>
  24 #include <linux/sched.h>
  25 #include <sys/wait.h>
  26 #include <linux/wait.h>
  27 #include <sched.h>
  28 #include <signal.h>
  29 #include <stdlib.h>
  30 #include <syscall.h>
  31 #include <sys/capability.h>
  32 #include <sys/epoll.h>
  33 #include <sys/eventfd.h>
  34 #include <sys/mount.h>
  35 #include <sys/personality.h>
  36 #include <sys/prctl.h>
  37 #include <sys/resource.h>
  38 #include <sys/timerfd.h>
  39 #include <sys/types.h>
  40 #include <sys/wait.h>
  41 #include <termios.h>
  42
  43 // libnl3
  44 #include <net/if.h>
  45 #include <netlink/route/link.h>
  46
  47 // libseccomp
  48 #include <seccomp.h>
  49
  50 // libuuid
  51 #include <uuid.h>
  52
  53 #include <pakfire/arch.h>
  54 #include <pakfire/cgroup.h>
  55 #include <pakfire/jail.h>
  56 #include <pakfire/logging.h>
  57 #include <pakfire/mount.h>
  58 #include <pakfire/pakfire.h>
  59 #include <pakfire/path.h>
  60 #include <pakfire/private.h>
  61 #include <pakfire/pwd.h>
  62 #include <pakfire/string.h>
  63 #include <pakfire/util.h>
  64
  65 #define BUFFER_SIZE      1024 * 64
  66 #define ENVIRON_SIZE     128
  67 #define EPOLL_MAX_EVENTS 2
  68 #define MAX_MOUNTPOINTS  8
  69
  70 // The default environment that will be set for every command
  71 static const struct environ {
  72         const char* key;
  73         const char* val;
  74 } ENV[] = {
  75         { "HOME", "/root" },
  76         { "LANG", "C.utf-8" },
  77         { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
  78         { "TERM", "vt100" },
  79
  80         // Tell everything that it is running inside a Pakfire container
  81         { "container", "pakfire" },
  82         { NULL, NULL },
  83 };
  84
  85 struct pakfire_jail_mountpoint {
  86         char source[PATH_MAX];
  87         char target[PATH_MAX];
  88         int flags;
  89 };
  90
  91 struct pakfire_jail {
  92         struct pakfire_ctx* ctx;
  93         struct pakfire* pakfire;
  94         int nrefs;
  95
  96         // A unique ID for each jail
  97         uuid_t uuid;
  98         char __uuid[UUID_STR_LEN];
  99
 100         // Resource Limits
 101         int nice;
 102
 103         // Timeout
 104         struct itimerspec timeout;
 105
 106         // CGroup
 107         struct pakfire_cgroup* cgroup;
 108
 109         // Environment
 110         char* env[ENVIRON_SIZE];
 111
 112         // Mountpoints
 113         struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
 114         unsigned int num_mountpoints;
 115
 116         // Callbacks
 117         struct pakfire_jail_callbacks {
 118                 // Log
 119                 pakfire_jail_log_callback log;
 120                 void* log_data;
 121         } callbacks;
 122 };
 123
 124 struct pakfire_log_buffer {
 125         char data[BUFFER_SIZE];
 126         size_t used;
 127 };
 128
 129 struct pakfire_jail_exec {
 130         int flags;
 131
 132         // PID (of the child)
 133         pid_t pid;
 134         int pidfd;
 135
 136         // Socket to pass FDs
 137         int socket[2];
 138
 139         // Process status (from waitid)
 140         siginfo_t status;
 141
 142         // FD to notify the client that the parent has finished initialization
 143         int completed_fd;
 144
 145         // Log pipes
 146         struct pakfire_jail_pipes {
 147                 // Logging
 148                 int log_INFO[2];
 149                 int log_ERROR[2];
 150 #ifdef ENABLE_DEBUG
 151                 int log_DEBUG[2];
 152 #endif /* ENABLE_DEBUG */
 153         } pipes;
 154
 155         // Communicate
 156         struct pakfire_jail_communicate {
 157                 pakfire_jail_communicate_in  in;
 158                 pakfire_jail_communicate_out out;
 159                 void* data;
 160         } communicate;
 161
 162         // Log buffers
 163         struct pakfire_jail_buffers {
 164                 struct pakfire_log_buffer stdout;
 165                 struct pakfire_log_buffer stderr;
 166
 167                 // Logging
 168                 struct pakfire_log_buffer log_INFO;
 169                 struct pakfire_log_buffer log_ERROR;
 170 #ifdef ENABLE_DEBUG
 171                 struct pakfire_log_buffer log_DEBUG;
 172 #endif /* ENABLE_DEBUG */
 173         } buffers;
 174
 175         struct pakfire_cgroup* cgroup;
 176         struct pakfire_cgroup_stats cgroup_stats;
 177
 178         // PTY
 179         struct pakfire_jail_pty {
 180                 // The path to the console
 181                 char console[PATH_MAX];
 182
 183                 // The master fd
 184                 struct pakfire_jail_pty_master {
 185                         int fd;
 186
 187                         enum pakfire_jail_pty_flags {
 188                                 PAKFIRE_JAIL_PTY_READY_TO_READ  = (1 << 0),
 189                                 PAKFIRE_JAIL_PTY_READY_TO_WRITE = (1 << 1),
 190                         } flags;
 191                 } master;
 192
 193                 // Standard Input
 194                 struct pakfire_jail_pty_stdio {
 195                         int fd;
 196                         struct pakfire_log_buffer buffer;
 197                         struct termios attrs;
 198                         int fdflags;
 199                         enum pakfire_jail_pty_flags flags;
 200                 } stdin;
 201
 202                 // Standard Output
 203                 struct pakfire_jail_pty_stdio stdout;
 204         } pty;
 205 };
 206
 207 static int clone3(struct clone_args* args, size_t size) {
 208         return syscall(__NR_clone3, args, size);
 209 }
 210
 211 static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
 212         return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
 213 }
 214
 215 static int pivot_root(const char* new_root, const char* old_root) {
 216         return syscall(SYS_pivot_root, new_root, old_root);
 217 }
 218
 219 static int pakfire_jail_exec_has_flag(
 220                 const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
 221         return ctx->flags & flag;
 222 }
 223
 224 static void pakfire_jail_free(struct pakfire_jail* jail) {
 225         DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
 226
 227         // Free environment
 228         for (unsigned int i = 0; jail->env[i]; i++)
 229                 free(jail->env[i]);
 230
 231         if (jail->cgroup)
 232                 pakfire_cgroup_unref(jail->cgroup);
 233         if (jail->pakfire)
 234                 pakfire_unref(jail->pakfire);
 235         if (jail->ctx)
 236                 pakfire_ctx_unref(jail->ctx);
 237         free(jail);
 238 }
 239
 240 /*
 241         Passes any log messages on to the default pakfire log callback
 242 */
 243 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
 244                 int priority, const char* line, size_t length) {
 245         switch (priority) {
 246                 case LOG_INFO:
 247                         INFO(pakfire, "%s", line);
 248                         break;
 249
 250                 case LOG_ERR:
 251                         ERROR(pakfire, "%s", line);
 252                         break;
 253
 254 #ifdef ENABLE_DEBUG
 255                 case LOG_DEBUG:
 256                         DEBUG(pakfire, "%s", line);
 257                         break;
 258 #endif
 259         }
 260
 261         return 0;
 262 }
 263
 264 static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
 265         if (!*jail->__uuid)
 266                 uuid_unparse_lower(jail->uuid, jail->__uuid);
 267
 268         return jail->__uuid;
 269 }
 270
 271 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
 272         // Set PS1
 273         int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
 274         if (r)
 275                 return r;
 276
 277         // Copy TERM
 278         char* TERM = secure_getenv("TERM");
 279         if (TERM) {
 280                 r = pakfire_jail_set_env(jail, "TERM", TERM);
 281                 if (r)
 282                         return r;
 283         }
 284
 285         // Copy LANG
 286         char* LANG = secure_getenv("LANG");
 287         if (LANG) {
 288                 r = pakfire_jail_set_env(jail, "LANG", LANG);
 289                 if (r)
 290                         return r;
 291         }
 292
 293         return 0;
 294 }
 295
 296 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail, struct pakfire* pakfire) {
 297         int r;
 298
 299         const char* arch = pakfire_get_effective_arch(pakfire);
 300
 301         // Allocate a new jail
 302         struct pakfire_jail* j = calloc(1, sizeof(*j));
 303         if (!j)
 304                 return 1;
 305
 306         // Reference context
 307         j->ctx = pakfire_ctx(pakfire);
 308
 309         // Reference Pakfire
 310         j->pakfire = pakfire_ref(pakfire);
 311
 312         // Initialize reference counter
 313         j->nrefs = 1;
 314
 315         // Generate a random UUID
 316         uuid_generate_random(j->uuid);
 317
 318         DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
 319
 320         // Set the default logging callback
 321         pakfire_jail_set_log_callback(j, pakfire_jail_default_log_callback, NULL);
 322
 323         // Set default environment
 324         for (const struct environ* e = ENV; e->key; e++) {
 325                 r = pakfire_jail_set_env(j, e->key, e->val);
 326                 if (r)
 327                         goto ERROR;
 328         }
 329
 330         // Enable all CPU features that CPU has to offer
 331         if (!pakfire_arch_is_supported_by_host(arch)) {
 332                 r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
 333                 if (r)
 334                         goto ERROR;
 335         }
 336
 337         // Set container UUID
 338         r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
 339         if (r)
 340                 goto ERROR;
 341
 342         // Disable systemctl to talk to systemd
 343         if (!pakfire_on_root(j->pakfire)) {
 344                 r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
 345                 if (r)
 346                         goto ERROR;
 347         }
 348
 349         // Done
 350         *jail = j;
 351         return 0;
 352
 353 ERROR:
 354         pakfire_jail_free(j);
 355
 356         return r;
 357 }
 358
 359 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
 360         ++jail->nrefs;
 361
 362         return jail;
 363 }
 364
 365 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
 366         if (--jail->nrefs > 0)
 367                 return jail;
 368
 369         pakfire_jail_free(jail);
 370         return NULL;
 371 }
 372
 373 // Logging Callback
 374
 375 PAKFIRE_EXPORT void pakfire_jail_set_log_callback(struct pakfire_jail* jail,
 376                 pakfire_jail_log_callback callback, void* data) {
 377         jail->callbacks.log = callback;
 378         jail->callbacks.log_data = data;
 379 }
 380
 381 // Resource Limits
 382
 383 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
 384         // Check if nice level is in range
 385         if (nice < -19 || nice > 20) {
 386                 errno = EINVAL;
 387                 return 1;
 388         }
 389
 390         // Store nice level
 391         jail->nice = nice;
 392
 393         return 0;
 394 }
 395
 396 int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
 397         // Free any previous cgroup
 398         if (jail->cgroup) {
 399                 pakfire_cgroup_unref(jail->cgroup);
 400                 jail->cgroup = NULL;
 401         }
 402
 403         // Set any new cgroup
 404         if (cgroup) {
 405                 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
 406
 407                 jail->cgroup = pakfire_cgroup_ref(cgroup);
 408         }
 409
 410         // Done
 411         return 0;
 412 }
 413
 414 // Environment
 415
 416 // Returns the length of the environment
 417 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
 418         unsigned int i = 0;
 419
 420         // Count everything in the environment
 421         for (char** e = jail->env; *e; e++)
 422                 i++;
 423
 424         return i;
 425 }
 426
 427 // Finds an existing environment variable and returns its index or -1 if not found
 428 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
 429         if (!key) {
 430                 errno = EINVAL;
 431                 return -1;
 432         }
 433
 434         const size_t length = strlen(key);
 435
 436         for (unsigned int i = 0; jail->env[i]; i++) {
 437                 if ((pakfire_string_startswith(jail->env[i], key)
 438                                 && *(jail->env[i] + length) == '=')) {
 439                         return i;
 440                 }
 441         }
 442
 443         // Nothing found
 444         return -1;
 445 }
 446
 447 // Returns the value of an environment variable or NULL
 448 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
 449                 const char* key) {
 450         int i = pakfire_jail_find_env(jail, key);
 451         if (i < 0)
 452                 return NULL;
 453
 454         return jail->env[i] + strlen(key) + 1;
 455 }
 456
 457 // Sets an environment variable
 458 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
 459                 const char* key, const char* value) {
 460         // Find the index where to write this value to
 461         int i = pakfire_jail_find_env(jail, key);
 462         if (i < 0)
 463                 i = pakfire_jail_env_length(jail);
 464
 465         // Return -ENOSPC when the environment is full
 466         if (i >= ENVIRON_SIZE) {
 467                 errno = ENOSPC;
 468                 return -1;
 469         }
 470
 471         // Free any previous value
 472         if (jail->env[i])
 473                 free(jail->env[i]);
 474
 475         // Format and set environment variable
 476         asprintf(&jail->env[i], "%s=%s", key, value);
 477
 478         DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
 479
 480         return 0;
 481 }
 482
 483 // Imports an environment
 484 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
 485         if (!env)
 486                 return 0;
 487
 488         char* key;
 489         char* val;
 490         int r;
 491
 492         // Copy environment variables
 493         for (unsigned int i = 0; env[i]; i++) {
 494                 r = pakfire_string_partition(env[i], "=", &key, &val);
 495                 if (r)
 496                         continue;
 497
 498                 // Set value
 499                 r = pakfire_jail_set_env(jail, key, val);
 500
 501                 if (key)
 502                         free(key);
 503                 if (val)
 504                         free(val);
 505
 506                 // Break on error
 507                 if (r)
 508                         return r;
 509         }
 510
 511         return 0;
 512 }
 513
 514 // Timeout
 515
 516 PAKFIRE_EXPORT int pakfire_jail_set_timeout(
 517                 struct pakfire_jail* jail, unsigned int timeout) {
 518         // Store value
 519         jail->timeout.it_value.tv_sec = timeout;
 520
 521         if (timeout > 0)
 522                 DEBUG(jail->pakfire, "Timeout set to %u second(s)\n", timeout);
 523         else
 524                 DEBUG(jail->pakfire, "Timeout disabled\n");
 525
 526         return 0;
 527 }
 528
 529 static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
 530         int r;
 531
 532         // Nothing to do if no timeout has been set
 533         if (!jail->timeout.it_value.tv_sec)
 534                 return -1;
 535
 536         // Create a new timer
 537         const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
 538         if (fd < 0) {
 539                 ERROR(jail->pakfire, "Could not create timer: %m\n");
 540                 goto ERROR;
 541         }
 542
 543         // Arm timer
 544         r = timerfd_settime(fd, 0, &jail->timeout, NULL);
 545         if (r) {
 546                 ERROR(jail->pakfire, "Could not arm timer: %m\n");
 547                 goto ERROR;
 548         }
 549
 550         return fd;
 551
 552 ERROR:
 553         if (fd >= 0)
 554                 close(fd);
 555
 556         return -1;
 557 }
 558
 559 /*
 560         This function replaces any logging in the child process.
 561
 562         All log messages will be sent to the parent process through their respective pipes.
 563 */
 564 static void pakfire_jail_log_redirect(void* data, int priority, const char* file,
 565                 int line, const char* fn, const char* format, va_list args) {
 566         struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
 567         int fd;
 568
 569         switch (priority) {
 570                 case LOG_INFO:
 571                         fd = pipes->log_INFO[1];
 572                         break;
 573
 574                 case LOG_ERR:
 575                         fd = pipes->log_ERROR[1];
 576                         break;
 577
 578 #ifdef ENABLE_DEBUG
 579                 case LOG_DEBUG:
 580                         fd = pipes->log_DEBUG[1];
 581                         break;
 582 #endif /* ENABLE_DEBUG */
 583
 584                 // Ignore any messages of an unknown priority
 585                 default:
 586                         return;
 587         }
 588
 589         // Send the log message
 590         if (fd >= 0)
 591                 vdprintf(fd, format, args);
 592 }
 593
 594 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
 595         return (sizeof(buffer->data) == buffer->used);
 596 }
 597
 598 static int pakfire_jail_fill_buffer(struct pakfire_jail* jail, int fd, struct pakfire_log_buffer* buffer) {
 599         int r;
 600
 601         // Skip this if there is not space left in the buffer
 602         if (buffer->used >= sizeof(buffer->data))
 603                 return 0;
 604
 605         // Fill the buffer
 606         r = read(fd, buffer->data + buffer->used, sizeof(buffer->data) - buffer->used);
 607
 608         // Handle errors
 609         if (r < 0) {
 610                 switch (errno) {
 611                         case EAGAIN:
 612                         case EIO:
 613                                 break;
 614
 615                         default:
 616                                 return -errno;
 617                 }
 618
 619         // EOF
 620         } else if (r == 0) {
 621                 // XXX What to do here?
 622
 623         // Successful read
 624         } else {
 625                 buffer->used += r;
 626         }
 627
 628         return 0;
 629 }
 630
 631 static int pakfire_jail_drain_buffer_with_callback(struct pakfire_jail* jail,
 632                 struct pakfire_log_buffer* buffer, int priority, pakfire_jail_communicate_out callback, void* data) {
 633         const char* eol = NULL;
 634         int r;
 635
 636         while (buffer->used) {
 637                 // Search for the end of the first line
 638                 eol = memchr(buffer->data, '\n', buffer->used);
 639
 640                 // No newline found
 641                 if (!eol) {
 642                         // If the buffer is full, we send the entire content to make space.
 643                         if (pakfire_jail_log_buffer_is_full(buffer)) {
 644                                 CTX_DEBUG(jail->ctx, "Buffer is full. Sending all content\n");
 645
 646                                 eol = buffer->data + buffer->used - 1;
 647
 648                         // Otherwise we might have only read parts of the output...
 649                         } else {
 650                                 break;
 651                         }
 652                 }
 653
 654                 // Find the length of the string
 655                 const size_t length = eol - buffer->data + 1;
 656
 657                 // Call the callback
 658                 r = callback(jail->pakfire, data, priority, buffer->data, length);
 659                 if (r) {
 660                         CTX_ERROR(jail->ctx, "The logging callback returned an error: %d\n", r);
 661                         return r;
 662                 }
 663
 664                 // Remove line from buffer
 665                 memmove(buffer->data, buffer->data + length, buffer->used - length);
 666                 buffer->used -= length;
 667         }
 668
 669         return 0;
 670 }
 671
 672 static int pakfire_jail_drain_buffer(struct pakfire_jail* jail, int fd, struct pakfire_log_buffer* buffer) {
 673         int r;
 674
 675         // Nothing to do if the buffer is empty
 676         if (!buffer->used)
 677                 return 0;
 678
 679         // Do not try to write to an invalid file descriptor
 680         if (fd < 0)
 681                 return 0;
 682
 683         // Drain the buffer
 684         r = write(fd, buffer->data, buffer->used);
 685
 686         // Handle errors
 687         if (r < 0) {
 688                 switch (errno) {
 689                         case EAGAIN:
 690                         case EIO:
 691                                 break;
 692
 693                         default:
 694                                 return -errno;
 695                 }
 696
 697         // Successful write
 698         } else {
 699                 memmove(buffer->data, buffer->data + r, buffer->used - r);
 700
 701                 buffer->used -= r;
 702         }
 703
 704         return 0;
 705 }
 706
 707 /*
 708         This function reads as much data as it can from the file descriptor.
 709         If it finds a whole line in it, it will send it to the logger and repeat the process.
 710         If not newline character is found, it will try to read more data until it finds one.
 711 */
 712 static int pakfire_jail_handle_log(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
 713                 int priority, int fd, struct pakfire_log_buffer* buffer,
 714                 pakfire_jail_communicate_out callback, void* data) {
 715         int r;
 716
 717         // Fill up buffer from fd
 718         r = pakfire_jail_fill_buffer(jail, fd, buffer);
 719         if (r)
 720                 return r;
 721
 722         // Drain the buffer
 723         r = pakfire_jail_drain_buffer_with_callback(jail, buffer, priority, callback, data);
 724         if (r)
 725                 return r;
 726
 727         return 0;
 728 }
 729
 730 #if 0
 731 static int pakfire_jail_stream_stdin(struct pakfire_jail* jail,
 732                 struct pakfire_jail_exec* ctx, const int fd) {
 733         int r;
 734
 735         // Nothing to do if there is no stdin callback set
 736         if (!ctx->communicate.in) {
 737                 DEBUG(jail->pakfire, "Callback for standard input is not set\n");
 738                 return 0;
 739         }
 740
 741         // Skip if the writing pipe has already been closed
 742         if (ctx->pipes.stdin[1] < 0)
 743                 return 0;
 744
 745         DEBUG(jail->pakfire, "Streaming standard input...\n");
 746
 747         // Calling the callback
 748         r = ctx->communicate.in(jail->pakfire, ctx->communicate.data, fd);
 749
 750         DEBUG(jail->pakfire, "Standard input callback finished: %d\n", r);
 751
 752         // The callback signaled that it has written everything
 753         if (r == EOF) {
 754                 DEBUG(jail->pakfire, "Closing standard input pipe\n");
 755
 756                 // Close the file-descriptor
 757                 close(fd);
 758
 759                 // Reset the file-descriptor so it won't be closed again later
 760                 ctx->pipes.stdin[1] = -1;
 761
 762                 // Report success
 763                 r = 0;
 764         }
 765
 766         return r;
 767 }
 768 #endif
 769
 770 static int pakfire_jail_recv_fd(struct pakfire_jail* jail, int socket, int* fd) {
 771         const size_t payload_length = sizeof(fd);
 772         char buffer[CMSG_SPACE(payload_length)];
 773         int r;
 774
 775         struct msghdr msg = {
 776                 .msg_control    = buffer,
 777                 .msg_controllen = sizeof(buffer),
 778         };
 779
 780         // Receive the message
 781         r = recvmsg(socket, &msg, 0);
 782         if (r) {
 783                 CTX_ERROR(jail->ctx, "Could not receive file descriptor: %s\n", strerror(errno));
 784                 return -errno;
 785         }
 786
 787         // Fetch the payload
 788         struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
 789         if (!cmsg)
 790                 return -EBADMSG;
 791
 792         *fd = *((int*)CMSG_DATA(cmsg));
 793
 794         CTX_DEBUG(jail->ctx, "Received fd %d from socket %d\n", *fd, socket);
 795
 796         return 0;
 797 }
 798
 799 static int pakfire_jail_send_fd(struct pakfire_jail* jail, int socket, int fd) {
 800         const size_t payload_length = sizeof(fd);
 801         char buffer[CMSG_SPACE(payload_length)];
 802         int r;
 803
 804         CTX_DEBUG(jail->ctx, "Sending fd %d to socket %d\n", fd, socket);
 805
 806         // Header
 807         struct msghdr msg = {
 808                 .msg_control    = buffer,
 809                 .msg_controllen = sizeof(buffer),
 810         };
 811
 812         // Payload
 813         struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
 814         cmsg->cmsg_level = SOL_SOCKET;
 815         cmsg->cmsg_type  = SCM_RIGHTS;
 816         cmsg->cmsg_len   = CMSG_LEN(payload_length);
 817
 818         // Set payload
 819         *((int*)CMSG_DATA(cmsg)) = fd;
 820
 821         // Send the message
 822         r = sendmsg(socket, &msg, 0);
 823         if (r) {
 824                 CTX_ERROR(jail->ctx, "Could not send file descriptor: %s\n", strerror(errno));
 825                 return -errno;
 826         }
 827
 828         return 0;
 829 }
 830
 831 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
 832         int r = pipe2(*fds, flags);
 833         if (r < 0) {
 834                 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
 835                 return 1;
 836         }
 837
 838         return 0;
 839 }
 840
 841 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
 842         for (unsigned int i = 0; i < 2; i++)
 843                 if (fds[i] >= 0)
 844                         close(fds[i]);
 845 }
 846
 847 /*
 848         This is a convenience function to fetch the reading end of a pipe and
 849         closes the write end.
 850 */
 851 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail* jail, int (*fds)[2]) {
 852         // Give the variables easier names to avoid confusion
 853         int* fd_read  = &(*fds)[0];
 854         int* fd_write = &(*fds)[1];
 855
 856         // Close the write end of the pipe
 857         if (*fd_write >= 0) {
 858                 close(*fd_write);
 859                 *fd_write = -1;
 860         }
 861
 862         // Return the read end
 863         if (*fd_read >= 0)
 864                 return *fd_read;
 865
 866         return -1;
 867 }
 868
 869 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
 870         // Give the variables easier names to avoid confusion
 871         int* fd_read  = &(*fds)[0];
 872         int* fd_write = &(*fds)[1];
 873
 874         // Close the read end of the pipe
 875         if (*fd_read >= 0) {
 876                 close(*fd_read);
 877                 *fd_read = -1;
 878         }
 879
 880         // Return the write end
 881         if (*fd_write >= 0)
 882                 return *fd_write;
 883
 884         return -1;
 885 }
 886
 887 static int pakfire_jail_log(struct pakfire* pakfire, void* data, int priority,
 888                 const char* line, const size_t length) {
 889         // Pass everything to the parent logger
 890         pakfire_log_condition(pakfire, priority, 0, "%.*s", (int)length, line);
 891
 892         return 0;
 893 }
 894
 895 static int pakfire_jail_epoll_add_fd(struct pakfire_jail* jail, int epollfd, int fd, int events) {
 896         struct epoll_event event = {
 897                 .events = events|EPOLLHUP,
 898                 .data   = {
 899                         .fd = fd,
 900                 },
 901         };
 902         int r;
 903
 904         // Read flags
 905         int flags = fcntl(fd, F_GETFL, 0);
 906
 907         // Set modified flags
 908         r  = fcntl(fd, F_SETFL, flags|O_NONBLOCK);
 909         if (r < 0) {
 910                 CTX_ERROR(jail->ctx, "Could not set file descriptor %d into non-blocking mode: %s\n",
 911                         fd, strerror(errno));
 912                 return -errno;
 913         }
 914
 915         // Add the file descriptor to the loop
 916         r = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
 917         if (r < 0) {
 918                 ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %s\n",
 919                         fd, strerror(errno));
 920                 return -errno;
 921         }
 922
 923         return 0;
 924 }
 925
 926 // PTY Forwarding
 927
 928 static int pakfire_jail_enable_raw_mode(struct pakfire_jail* jail,
 929                 struct pakfire_jail_pty_stdio* stdio) {
 930         struct termios raw_attrs;
 931         int r;
 932
 933         // Skip if we don't know the file descriptor
 934         if (stdio->fd < 0)
 935                 return 0;
 936
 937         // Skip everything if fd is not a TTY
 938         if (!isatty(stdio->fd))
 939                 return 0;
 940
 941         // Store flags
 942         stdio->fdflags = fcntl(stdio->fd, F_GETFL);
 943         if (stdio->fdflags < 0) {
 944                 CTX_ERROR(jail->ctx, "Could not fetch flags from fd %d: %s\n",
 945                         stdio->fd, strerror(errno));
 946                 return -errno;
 947         }
 948
 949         // Fetch all attributes
 950         r = tcgetattr(stdio->fd, &stdio->attrs);
 951         if (r) {
 952                 CTX_ERROR(jail->ctx, "Could not fetch terminal attributes from fd %d: %s\n",
 953                         stdio->fd, strerror(errno));
 954                 return -errno;
 955         }
 956
 957         // Copy all attributes
 958         raw_attrs = stdio->attrs;
 959
 960         // Make it RAW
 961         cfmakeraw(&raw_attrs);
 962
 963         switch (stdio->fd) {
 964                 case STDIN_FILENO:
 965                         raw_attrs.c_oflag = stdio->attrs.c_oflag;
 966                         break;
 967
 968                 case STDOUT_FILENO:
 969                         raw_attrs.c_iflag = stdio->attrs.c_iflag;
 970                         raw_attrs.c_lflag = stdio->attrs.c_lflag;
 971                         break;
 972         }
 973
 974         // Restore the attributes
 975         r = tcsetattr(stdio->fd, TCSANOW, &raw_attrs);
 976         if (r) {
 977                 CTX_ERROR(jail->ctx, "Could not restore terminal attributes for fd %d: %s\n",
 978                         stdio->fd, strerror(errno));
 979                 return -errno;
 980         }
 981
 982         return 0;
 983 }
 984
 985 static int pakfire_jail_restore_attrs(struct pakfire_jail* jail,
 986                 const struct pakfire_jail_pty_stdio* stdio) {
 987         int r;
 988
 989         // Skip if we don't know the file descriptor
 990         if (stdio->fd < 0)
 991                 return 0;
 992
 993         // Skip everything if fd is not a TTY
 994         if (!isatty(stdio->fd))
 995                 return 0;
 996
 997         // Restore the flags
 998         r = fcntl(stdio->fd, F_SETFL, stdio->fdflags);
 999         if (r < 0) {
1000                 CTX_ERROR(jail->ctx, "Could not set flags for file descriptor %d: %s\n",
1001                         stdio->fd, strerror(errno));
1002                 return -errno;
1003         }
1004
1005         // Restore the attributes
1006         r = tcsetattr(stdio->fd, TCSANOW, &stdio->attrs);
1007         if (r) {
1008                 CTX_ERROR(jail->ctx, "Could not restore terminal attributes for %d, ignoring: %s\n",
1009                         stdio->fd, strerror(errno));
1010                 return -errno;
1011         }
1012
1013         return 0;
1014 }
1015
1016 static int pakfire_jail_setup_pty_forwarding(struct pakfire_jail* jail,
1017                 struct pakfire_jail_exec* ctx, const int epollfd, const int fd) {
1018         struct winsize size;
1019         int r;
1020
1021         CTX_DEBUG(jail->ctx, "Setting up PTY forwarding on fd %d\n", fd);
1022
1023         // Store the file descriptor
1024         ctx->pty.master.fd = fd;
1025
1026         // Add the master to the event loop
1027         r = pakfire_jail_epoll_add_fd(jail, epollfd, ctx->pty.master.fd, EPOLLIN|EPOLLOUT|EPOLLET);
1028         if (r)
1029                 return r;
1030
1031         if (ctx->flags & PAKFIRE_JAIL_PTY_FORWARDING) {
1032                 // Configure stdin/stdout
1033                 ctx->pty.stdin.fd  = STDIN_FILENO;
1034                 ctx->pty.stdout.fd = STDOUT_FILENO;
1035
1036                 // Fetch dimensions
1037                 if (isatty(ctx->pty.stdout.fd)) {
1038                         r = ioctl(ctx->pty.stdout.fd, TIOCGWINSZ, &size);
1039                         if (r) {
1040                                 CTX_ERROR(jail->ctx, "Failed to determine terminal dimensions: %s\n", strerror(errno));
1041                                 return -errno;
1042                         }
1043
1044                         // Set dimensions
1045                         r = ioctl(ctx->pty.master.fd, TIOCSWINSZ, &size);
1046                         if (r) {
1047                                 CTX_ERROR(jail->ctx, "Failed setting dimensions: %s\n", strerror(errno));
1048                                 return -errno;
1049                         }
1050                 }
1051
1052                 // Enable RAW mode on standard input
1053                 r = pakfire_jail_enable_raw_mode(jail, &ctx->pty.stdin);
1054                 if (r)
1055                         return r;
1056
1057                 // Enable RAW mode on standard output
1058                 r = pakfire_jail_enable_raw_mode(jail, &ctx->pty.stdout);
1059                 if (r)
1060                         return r;
1061
1062                 // Add standard input to the event loop
1063                 r = pakfire_jail_epoll_add_fd(jail, epollfd, ctx->pty.stdin.fd, EPOLLIN|EPOLLET);
1064                 if (r)
1065                         return r;
1066
1067                 // Add standard output to the event loop
1068                 r = pakfire_jail_epoll_add_fd(jail, epollfd, ctx->pty.stdout.fd, EPOLLOUT|EPOLLET);
1069                 if (r)
1070                         return r;
1071         }
1072
1073         return 0;
1074 }
1075
1076 static int pakfire_jail_forward_pty(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1077         int r;
1078
1079         // Read from standard input
1080         if (ctx->pty.stdin.flags & PAKFIRE_JAIL_PTY_READY_TO_READ) {
1081                 r = pakfire_jail_fill_buffer(jail, ctx->pty.stdin.fd, &ctx->pty.stdin.buffer);
1082                 if (r) {
1083                         CTX_ERROR(jail->ctx, "Failed reading from standard input: %s\n", strerror(-r));
1084                         return r;
1085                 }
1086
1087                 // We are done reading for now
1088                 ctx->pty.stdin.flags &= ~PAKFIRE_JAIL_PTY_READY_TO_READ;
1089
1090                 // But we may have data to write
1091                 if (ctx->pty.stdin.buffer.used)
1092                         ctx->pty.master.flags |= PAKFIRE_JAIL_PTY_READY_TO_WRITE;
1093         }
1094
1095         // Write to the master
1096         if (ctx->pty.master.flags & PAKFIRE_JAIL_PTY_READY_TO_WRITE) {
1097                 r = pakfire_jail_drain_buffer(jail, ctx->pty.master.fd, &ctx->pty.stdin.buffer);
1098                 if (r) {
1099                         CTX_ERROR(jail->ctx, "Failed writing to the PTY: %s\n", strerror(-r));
1100                         return r;
1101                 }
1102
1103                 // We are done writing for now
1104                 ctx->pty.master.flags &= ~PAKFIRE_JAIL_PTY_READY_TO_WRITE;
1105         }
1106
1107         // Read from the master
1108         if (ctx->pty.master.flags & PAKFIRE_JAIL_PTY_READY_TO_READ) {
1109                 r = pakfire_jail_fill_buffer(jail, ctx->pty.master.fd, &ctx->pty.stdout.buffer);
1110                 if (r) {
1111                         CTX_ERROR(jail->ctx, "Failed reading from the PTY: %s\n", strerror(-r));
1112                         return r;
1113                 }
1114
1115                 // We are done reading for now
1116                 ctx->pty.master.flags &= ~PAKFIRE_JAIL_PTY_READY_TO_READ;
1117
1118                 // But we may have data to write
1119                 if (ctx->pty.stdout.buffer.used)
1120                         ctx->pty.stdout.flags |= PAKFIRE_JAIL_PTY_READY_TO_WRITE;
1121         }
1122
1123         // Write to standard output
1124         if (ctx->pty.stdout.flags & PAKFIRE_JAIL_PTY_READY_TO_WRITE) {
1125                 // If we have a callback, we will send any output to the callback
1126                 if (ctx->communicate.out) {
1127                         r = pakfire_jail_drain_buffer_with_callback(jail, &ctx->pty.stdout.buffer,
1128                                 LOG_INFO, ctx->communicate.out, ctx->communicate.data);
1129                         if (r)
1130                                 return r;
1131
1132                 // If we have a file descriptor, we will forward any output
1133                 } else if (ctx->pty.stdout.fd >= 0) {
1134                         r = pakfire_jail_drain_buffer(jail, ctx->pty.stdout.fd, &ctx->pty.stdout.buffer);
1135                         if (r) {
1136                                 CTX_ERROR(jail->ctx, "Failed writing to standard output: %s\n", strerror(-r));
1137                                 return r;
1138                         }
1139
1140                 // Otherwise we log a message
1141                 } else {
1142                         CTX_ERROR(jail->ctx, "No output configured for the PTY\n");
1143                 }
1144
1145                 // We are done writing for now
1146                 ctx->pty.stdout.flags &= ~PAKFIRE_JAIL_PTY_READY_TO_WRITE;
1147         }
1148
1149         return 0;
1150 }
1151
1152 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1153         int epollfd = -1;
1154         struct epoll_event events[EPOLL_MAX_EVENTS];
1155         char garbage[8];
1156         int r = 0;
1157
1158         // Fetch file descriptors from context
1159         const int pidfd  = ctx->pidfd;
1160
1161         // Fetch the UNIX domain socket
1162         const int socket_recv = pakfire_jail_get_pipe_to_read(jail, &ctx->socket);
1163
1164         // Timer
1165         const int timerfd = pakfire_jail_create_timer(jail);
1166
1167         // Logging
1168         const int log_INFO  = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
1169         const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
1170 #ifdef ENABLE_DEBUG
1171         const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
1172 #endif /* ENABLE_DEBUG */
1173
1174         // Make a list of all file descriptors we are interested in
1175         const struct pakfire_wait_fds {
1176                 const int fd;
1177                 const int events;
1178         } fds[] = {
1179                 // Timer
1180                 { timerfd, EPOLLIN },
1181
1182                 // Child Process
1183                 { ctx->pidfd, EPOLLIN },
1184
1185                 // Log Pipes
1186                 { log_INFO, EPOLLIN },
1187                 { log_ERROR, EPOLLIN },
1188 #ifdef ENABLE_DEBUG
1189                 { log_DEBUG, EPOLLIN },
1190 #endif /* ENABLE_DEBUG */
1191
1192                 // UNIX Domain Socket
1193                 { socket_recv, EPOLLIN },
1194
1195                 // Sentinel
1196                 { -1, 0 },
1197         };
1198
1199         // Setup epoll
1200         epollfd = epoll_create1(0);
1201         if (epollfd < 0) {
1202                 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
1203                 r = 1;
1204                 goto ERROR;
1205         }
1206
1207         // Turn file descriptors into non-blocking mode and add them to epoll()
1208         for (const struct pakfire_wait_fds* fd = fds; fd->events; fd++) {
1209                 // Skip fds which were not initialized
1210                 if (fd->fd < 0)
1211                         continue;
1212
1213                 // Add the FD to the event loop
1214                 r = pakfire_jail_epoll_add_fd(jail, epollfd, fd->fd, fd->events);
1215                 if (r)
1216                         goto ERROR;
1217         }
1218
1219         int ended = 0;
1220
1221         // Loop for as long as the process is alive
1222         while (!ended) {
1223                 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
1224                 if (num < 1) {
1225                         // Ignore if epoll_wait() has been interrupted
1226                         if (errno == EINTR)
1227                                 continue;
1228
1229                         ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
1230                         r = 1;
1231
1232                         goto ERROR;
1233                 }
1234
1235                 for (int i = 0; i < num; i++) {
1236                         int e  = events[i].events;
1237                         int fd = events[i].data.fd;
1238
1239                         // Handle PTY forwarding events
1240                         if (ctx->pty.master.fd == fd) {
1241                                 if (e & (EPOLLIN|EPOLLHUP))
1242                                         ctx->pty.master.flags |= PAKFIRE_JAIL_PTY_READY_TO_READ;
1243
1244                                 if (e & (EPOLLOUT|EPOLLHUP))
1245                                         ctx->pty.master.flags |= PAKFIRE_JAIL_PTY_READY_TO_WRITE;
1246
1247                                 // Perform the work
1248                                 r = pakfire_jail_forward_pty(jail, ctx);
1249                                 if (r) {
1250                                         CTX_ERROR(jail->ctx, "Failed forwarding the PTY: %s\n", strerror(-r));
1251                                         goto ERROR;
1252                                 }
1253
1254                         // Handle standard input
1255                         } else if (ctx->pty.stdin.fd == fd) {
1256                                 if (e & (EPOLLIN|EPOLLHUP))
1257                                         ctx->pty.stdin.flags |= PAKFIRE_JAIL_PTY_READY_TO_READ;
1258
1259                                 // Perform the work
1260                                 r = pakfire_jail_forward_pty(jail, ctx);
1261                                 if (r) {
1262                                         CTX_ERROR(jail->ctx, "Failed forwarding the PTY: %s\n", strerror(-r));
1263                                         goto ERROR;
1264                                 }
1265
1266                         // Handle standard output
1267                         } else if (ctx->pty.stdout.fd == fd) {
1268                                 if (e & (EPOLLOUT|EPOLLHUP))
1269                                         ctx->pty.stdout.flags |= PAKFIRE_JAIL_PTY_READY_TO_WRITE;
1270
1271                                 // Perform the work
1272                                 r = pakfire_jail_forward_pty(jail, ctx);
1273                                 if (r) {
1274                                         CTX_ERROR(jail->ctx, "Failed forwarding the PTY: %s\n", strerror(-r));
1275                                         goto ERROR;
1276                                 }
1277
1278                         // Handle any changes to the PIDFD
1279                         } else if (pidfd == fd) {
1280                                 if (e & EPOLLIN) {
1281                                         // Call waidid() and store the result
1282                                         r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
1283                                         if (r) {
1284                                                 ERROR(jail->pakfire, "waitid() failed: %m\n");
1285                                                 goto ERROR;
1286                                         }
1287
1288                                         // Mark that we have ended so that we will process the remaining
1289                                         // events from epoll() now, but won't restart the outer loop.
1290                                         ended = 1;
1291                                 }
1292
1293                         // Handle timer events
1294                         } else if (timerfd == fd) {
1295                                 if (e & EPOLLIN) {
1296                                         DEBUG(jail->pakfire, "Timer event received\n");
1297
1298                                         // Disarm the timer
1299                                         r = read(timerfd, garbage, sizeof(garbage));
1300                                         if (r < 1) {
1301                                                 ERROR(jail->pakfire, "Could not disarm timer: %m\n");
1302                                                 r = 1;
1303                                                 goto ERROR;
1304                                         }
1305
1306                                         // Terminate the process if it hasn't already ended
1307                                         if (!ended) {
1308                                                 DEBUG(jail->pakfire, "Terminating process...\n");
1309
1310                                                 // Send SIGTERM to the process
1311                                                 r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
1312                                                 if (r) {
1313                                                         ERROR(jail->pakfire, "Could not kill process: %m\n");
1314                                                         goto ERROR;
1315                                                 }
1316                                         }
1317                                 }
1318
1319                         // Handle socket messages
1320                         } else if (socket_recv == fd) {
1321                                 if (e & EPOLLIN) {
1322                                         // Receive the passed FD
1323                                         r = pakfire_jail_recv_fd(jail, socket_recv, &fd);
1324                                         if (r)
1325                                                 goto ERROR;
1326
1327                                         // Setup PTY forwarding
1328                                         if (ctx->pty.master.fd < 0) {
1329                                                 r = pakfire_jail_setup_pty_forwarding(jail, ctx, epollfd, fd);
1330                                                 if (r) {
1331                                                         CTX_ERROR(jail->ctx, "Failed setting up PTY forwarding: %s\n", strerror(-r));
1332                                                         goto ERROR;
1333                                                 }
1334                                         }
1335                                 }
1336
1337                         // Handle log INFO messages
1338                         } else if (log_INFO == fd) {
1339                                 if (e & EPOLLIN) {
1340                                         r = pakfire_jail_handle_log(jail, ctx, LOG_INFO, fd,
1341                                                 &ctx->buffers.log_INFO, pakfire_jail_log, NULL);
1342                                         if (r)
1343                                                 goto ERROR;
1344                                 }
1345
1346                         // Handle log ERROR messages
1347                         } else if (log_ERROR == fd) {
1348                                 if (e & EPOLLIN) {
1349                                         r = pakfire_jail_handle_log(jail, ctx, LOG_ERR, fd,
1350                                                 &ctx->buffers.log_ERROR, pakfire_jail_log, NULL);
1351                                         if (r)
1352                                                 goto ERROR;
1353                                 }
1354
1355 #ifdef ENABLE_DEBUG
1356                         // Handle log DEBUG messages
1357                         } else if (log_DEBUG == fd) {
1358                                 if (e & EPOLLIN) {
1359                                         r = pakfire_jail_handle_log(jail, ctx, LOG_DEBUG, fd,
1360                                                 &ctx->buffers.log_DEBUG, pakfire_jail_log, NULL);
1361                                         if (r)
1362                                                 goto ERROR;
1363                                 }
1364 #endif /* ENABLE_DEBUG */
1365
1366                         // Log a message for anything else
1367                         } else {
1368                                 DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
1369                                 continue;
1370                         }
1371
1372                         // Check if any file descriptors have been closed
1373                         if (e & EPOLLHUP) {
1374                                 // Remove the file descriptor
1375                                 r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
1376                                 if (r) {
1377                                         ERROR(jail->pakfire, "Could not remove closed file-descriptor %d: %m\n", fd);
1378                                         goto ERROR;
1379                                 }
1380                         }
1381                 }
1382         }
1383
1384 ERROR:
1385         if (epollfd >= 0)
1386                 close(epollfd);
1387         if (timerfd >= 0)
1388                 close(timerfd);
1389
1390         // Restore any changed terminal attributes
1391         if (ctx->pty.stdin.fd >= 0)
1392                 pakfire_jail_restore_attrs(jail, &ctx->pty.stdin);
1393         if (ctx->pty.stdout.fd >= 0)
1394                 pakfire_jail_restore_attrs(jail, &ctx->pty.stdout);
1395
1396         return r;
1397 }
1398
1399 int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data,
1400                 int priority, const char* line, size_t length) {
1401         char** output = (char**)data;
1402         int r;
1403
1404         // Append everything from stdout to a buffer
1405         if (output && priority == LOG_INFO) {
1406                 r = asprintf(output, "%s%.*s", (output && *output) ? *output : "", (int)length, line);
1407                 if (r < 0)
1408                         return -errno;
1409
1410                 return 0;
1411         }
1412
1413         // Send everything else to the default logger
1414         return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
1415 }
1416
1417 // Capabilities
1418
1419 // Logs all capabilities of the current process
1420 static int pakfire_jail_show_capabilities(struct pakfire_jail* jail) {
1421         cap_t caps = NULL;
1422         char* name = NULL;
1423         cap_flag_value_t value_e;
1424         cap_flag_value_t value_i;
1425         cap_flag_value_t value_p;
1426         int r;
1427
1428         // Fetch PID
1429         pid_t pid = getpid();
1430
1431         // Fetch all capabilities
1432         caps = cap_get_proc();
1433         if (!caps) {
1434                 ERROR(jail->pakfire, "Could not fetch capabilities: %m\n");
1435                 r = 1;
1436                 goto ERROR;
1437         }
1438
1439         DEBUG(jail->pakfire, "Capabilities of PID %d:\n", pid);
1440
1441         // Iterate over all capabilities
1442         for (unsigned int cap = 0; cap_valid(cap); cap++) {
1443                 name = cap_to_name(cap);
1444
1445                 // Fetch effective value
1446                 r = cap_get_flag(caps, cap, CAP_EFFECTIVE, &value_e);
1447                 if (r)
1448                         goto ERROR;
1449
1450                 // Fetch inheritable value
1451                 r = cap_get_flag(caps, cap, CAP_INHERITABLE, &value_i);
1452                 if (r)
1453                         goto ERROR;
1454
1455                 // Fetch permitted value
1456                 r = cap_get_flag(caps, cap, CAP_PERMITTED, &value_p);
1457                 if (r)
1458                         goto ERROR;
1459
1460                 DEBUG(jail->pakfire,
1461                         "  %-24s : %c%c%c\n",
1462                         name,
1463                         (value_e == CAP_SET) ? 'e' : '-',
1464                         (value_i == CAP_SET) ? 'i' : '-',
1465                         (value_p == CAP_SET) ? 'p' : '-'
1466                 );
1467
1468                 // Free name
1469                 cap_free(name);
1470                 name = NULL;
1471         }
1472
1473         // Success
1474         r = 0;
1475
1476 ERROR:
1477         if (name)
1478                 cap_free(name);
1479         if (caps)
1480                 cap_free(caps);
1481
1482         return r;
1483 }
1484
1485 static int pakfire_jail_set_capabilities(struct pakfire_jail* jail) {
1486         cap_t caps = NULL;
1487         char* name = NULL;
1488         int r;
1489
1490         // Fetch capabilities
1491         caps = cap_get_proc();
1492         if (!caps) {
1493                 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
1494                 r = 1;
1495                 goto ERROR;
1496         }
1497
1498         // Walk through all capabilities
1499         for (cap_value_t cap = 0; cap_valid(cap); cap++) {
1500                 cap_value_t _caps[] = { cap };
1501
1502                 // Fetch the name of the capability
1503                 name = cap_to_name(cap);
1504
1505                 r = cap_set_flag(caps, CAP_EFFECTIVE, 1, _caps, CAP_SET);
1506                 if (r) {
1507                         ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1508                         goto ERROR;
1509                 }
1510
1511                 r = cap_set_flag(caps, CAP_INHERITABLE, 1, _caps, CAP_SET);
1512                 if (r) {
1513                         ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1514                         goto ERROR;
1515                 }
1516
1517                 r = cap_set_flag(caps, CAP_PERMITTED, 1, _caps, CAP_SET);
1518                 if (r) {
1519                         ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1520                         goto ERROR;
1521                 }
1522
1523                 // Free name
1524                 cap_free(name);
1525                 name = NULL;
1526         }
1527
1528         // Restore all capabilities
1529         r = cap_set_proc(caps);
1530         if (r) {
1531                 ERROR(jail->pakfire, "Restoring capabilities failed: %m\n");
1532                 goto ERROR;
1533         }
1534
1535         // Add all capabilities to the ambient set
1536         for (unsigned int cap = 0; cap_valid(cap); cap++) {
1537                 name = cap_to_name(cap);
1538
1539                 // Raise the capability
1540                 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0);
1541                 if (r) {
1542                         ERROR(jail->pakfire, "Could not set ambient capability %s: %m\n", name);
1543                         goto ERROR;
1544                 }
1545
1546                 // Free name
1547                 cap_free(name);
1548                 name = NULL;
1549         }
1550
1551         // Success
1552         r = 0;
1553
1554 ERROR:
1555         if (name)
1556                 cap_free(name);
1557         if (caps)
1558                 cap_free(caps);
1559
1560         return r;
1561 }
1562
1563 // Syscall Filter
1564
1565 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
1566         const int syscalls[] = {
1567                 // The kernel's keyring isn't namespaced
1568                 SCMP_SYS(keyctl),
1569                 SCMP_SYS(add_key),
1570                 SCMP_SYS(request_key),
1571
1572                 // Disable userfaultfd
1573                 SCMP_SYS(userfaultfd),
1574
1575                 // Disable perf which could leak a lot of information about the host
1576                 SCMP_SYS(perf_event_open),
1577
1578                 0,
1579         };
1580         int r = 1;
1581
1582         DEBUG(jail->pakfire, "Applying syscall filter...\n");
1583
1584         // Setup a syscall filter which allows everything by default
1585         scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
1586         if (!ctx) {
1587                 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
1588                 goto ERROR;
1589         }
1590
1591         // All all syscalls
1592         for (const int* syscall = syscalls; *syscall; syscall++) {
1593                 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
1594                 if (r) {
1595                         ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
1596                         goto ERROR;
1597                 }
1598         }
1599
1600         // Load syscall filter into the kernel
1601         r = seccomp_load(ctx);
1602         if (r) {
1603                 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
1604                 goto ERROR;
1605         }
1606
1607 ERROR:
1608         if (ctx)
1609                 seccomp_release(ctx);
1610
1611         return r;
1612 }
1613
1614 // Mountpoints
1615
1616 PAKFIRE_EXPORT int pakfire_jail_bind(struct pakfire_jail* jail,
1617                 const char* source, const char* target, int flags) {
1618         struct pakfire_jail_mountpoint* mp = NULL;
1619         int r;
1620
1621         // Check if there is any space left
1622         if (jail->num_mountpoints >= MAX_MOUNTPOINTS) {
1623                 errno = ENOSPC;
1624                 return 1;
1625         }
1626
1627         // Check for valid inputs
1628         if (!source || !target) {
1629                 errno = EINVAL;
1630                 return 1;
1631         }
1632
1633         // Select the next free slot
1634         mp = &jail->mountpoints[jail->num_mountpoints];
1635
1636         // Copy source
1637         r = pakfire_string_set(mp->source, source);
1638         if (r) {
1639                 ERROR(jail->pakfire, "Could not copy source: %m\n");
1640                 return r;
1641         }
1642
1643         // Copy target
1644         r = pakfire_string_set(mp->target, target);
1645         if (r) {
1646                 ERROR(jail->pakfire, "Could not copy target: %m\n");
1647                 return r;
1648         }
1649
1650         // Copy flags
1651         mp->flags = flags;
1652
1653         // Increment counter
1654         jail->num_mountpoints++;
1655
1656         return 0;
1657 }
1658
1659 static int pakfire_jail_mount_networking(struct pakfire_jail* jail) {
1660         int r;
1661
1662         const char* paths[] = {
1663                 "/etc/hosts",
1664                 "/etc/resolv.conf",
1665                 NULL,
1666         };
1667
1668         // Bind-mount all paths read-only
1669         for (const char** path = paths; *path; path++) {
1670                 r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
1671                 if (r) {
1672                         switch (errno) {
1673                                 // Ignore if we don't have permission
1674                                 case EPERM:
1675                                         continue;
1676
1677                                 default:
1678                                         break;
1679                         }
1680                         return r;
1681                 }
1682         }
1683
1684         return 0;
1685 }
1686
1687 /*
1688         Mounts everything that we require in the new namespace
1689 */
1690 static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1691         struct pakfire_jail_mountpoint* mp = NULL;
1692         int flags = 0;
1693         int r;
1694
1695         // Enable loop devices
1696         if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_LOOP_DEVICES))
1697                 flags |= PAKFIRE_MOUNT_LOOP_DEVICES;
1698
1699         // Mount all default stuff
1700         r = pakfire_mount_all(jail->pakfire, PAKFIRE_MNTNS_OUTER, flags);
1701         if (r)
1702                 return r;
1703
1704         // Populate /dev
1705         r = pakfire_populate_dev(jail->pakfire, flags);
1706         if (r)
1707                 return r;
1708
1709         // Mount the interpreter (if needed)
1710         r = pakfire_mount_interpreter(jail->pakfire);
1711         if (r)
1712                 return r;
1713
1714         // Mount networking stuff
1715         if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1716                 r = pakfire_jail_mount_networking(jail);
1717                 if (r)
1718                         return r;
1719         }
1720
1721         // Mount all custom stuff
1722         for (unsigned int i = 0; i < jail->num_mountpoints; i++) {
1723                 // Fetch mountpoint
1724                 mp = &jail->mountpoints[i];
1725
1726                 // Mount it
1727                 r = pakfire_bind(jail->pakfire, mp->source, mp->target, mp->flags);
1728                 if (r)
1729                         return r;
1730         }
1731
1732         return 0;
1733 }
1734
1735 // Networking
1736
1737 static int pakfire_jail_setup_loopback(struct pakfire_jail* jail) {
1738         struct nl_sock* nl = NULL;
1739         struct nl_cache* cache = NULL;
1740         struct rtnl_link* link = NULL;
1741         struct rtnl_link* change = NULL;
1742         int r;
1743
1744         DEBUG(jail->pakfire, "Setting up loopback...\n");
1745
1746         // Allocate a netlink socket
1747         nl = nl_socket_alloc();
1748         if (!nl) {
1749                 ERROR(jail->pakfire, "Could not allocate a netlink socket: %m\n");
1750                 r = 1;
1751                 goto ERROR;
1752         }
1753
1754         // Connect the socket
1755         r = nl_connect(nl, NETLINK_ROUTE);
1756         if (r) {
1757                 ERROR(jail->pakfire, "Could not connect netlink socket: %s\n", nl_geterror(r));
1758                 goto ERROR;
1759         }
1760
1761         // Allocate the netlink cache
1762         r = rtnl_link_alloc_cache(nl, AF_UNSPEC, &cache);
1763         if (r < 0) {
1764                 ERROR(jail->pakfire, "Unable to allocate netlink cache: %s\n", nl_geterror(r));
1765                 goto ERROR;
1766         }
1767
1768         // Fetch loopback interface
1769         link = rtnl_link_get_by_name(cache, "lo");
1770         if (!link) {
1771                 ERROR(jail->pakfire, "Could not find lo interface. Ignoring.\n");
1772                 r = 0;
1773                 goto ERROR;
1774         }
1775
1776         // Allocate a new link
1777         change = rtnl_link_alloc();
1778         if (!change) {
1779                 ERROR(jail->pakfire, "Could not allocate change link\n");
1780                 r = 1;
1781                 goto ERROR;
1782         }
1783
1784         // Set the link to UP
1785         rtnl_link_set_flags(change, IFF_UP);
1786
1787         // Apply any changes
1788         r = rtnl_link_change(nl, link, change, 0);
1789         if (r) {
1790                 ERROR(jail->pakfire, "Unable to activate loopback: %s\n", nl_geterror(r));
1791                 goto ERROR;
1792         }
1793
1794         // Success
1795         r = 0;
1796
1797 ERROR:
1798         if (nl)
1799                 nl_socket_free(nl);
1800
1801         return r;
1802 }
1803
1804 // UID/GID Mapping
1805
1806 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
1807         char path[PATH_MAX];
1808         int r;
1809
1810         // Skip mapping anything when running on /
1811         if (pakfire_on_root(jail->pakfire))
1812                 return 0;
1813
1814         // Make path
1815         r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
1816         if (r)
1817                 return r;
1818
1819         // Fetch UID
1820         const uid_t uid = pakfire_uid(jail->pakfire);
1821
1822         // Fetch SUBUID
1823         const struct pakfire_subid* subuid = pakfire_subuid(jail->pakfire);
1824         if (!subuid)
1825                 return 1;
1826
1827         /* When running as root, we will map the entire range.
1828
1829            When running as a non-privileged user, we will map the root user inside the jail
1830            to the user's UID outside of the jail, and we will map the rest starting from one.
1831         */
1832
1833         // Running as root
1834         if (uid == 0) {
1835                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1836                         "0 %lu %lu\n", subuid->id, subuid->length);
1837         } else {
1838                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1839                         "0 %lu 1\n1 %lu %lu\n", uid, subuid->id, subuid->length);
1840         }
1841
1842         if (r) {
1843                 ERROR(jail->pakfire, "Could not map UIDs: %m\n");
1844                 return r;
1845         }
1846
1847         return r;
1848 }
1849
1850 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
1851         char path[PATH_MAX];
1852         int r;
1853
1854         // Skip mapping anything when running on /
1855         if (pakfire_on_root(jail->pakfire))
1856                 return 0;
1857
1858         // Fetch GID
1859         const gid_t gid = pakfire_gid(jail->pakfire);
1860
1861         // Fetch SUBGID
1862         const struct pakfire_subid* subgid = pakfire_subgid(jail->pakfire);
1863         if (!subgid)
1864                 return 1;
1865
1866         // Make path
1867         r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
1868         if (r)
1869                 return r;
1870
1871         // Running as root
1872         if (gid == 0) {
1873                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1874                         "0 %lu %lu\n", subgid->id, subgid->length);
1875         } else {
1876                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1877                         "0 %lu 1\n1 %lu %lu\n", gid, subgid->id, subgid->length);
1878         }
1879
1880         if (r) {
1881                 ERROR(jail->pakfire, "Could not map GIDs: %m\n");
1882                 return r;
1883         }
1884
1885         return r;
1886 }
1887
1888 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
1889         char path[PATH_MAX];
1890         int r;
1891
1892         // Make path
1893         r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
1894         if (r)
1895                 return r;
1896
1897         r = pakfire_file_write(jail->pakfire, path, 0, 0, 0, "deny\n");
1898         if (r) {
1899                 CTX_ERROR(jail->ctx, "Could not set setgroups to deny: %s\n", strerror(errno));
1900                 r = -errno;
1901         }
1902
1903         return r;
1904 }
1905
1906 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1907         const uint64_t val = 1;
1908         int r = 0;
1909
1910         DEBUG(jail->pakfire, "Sending signal...\n");
1911
1912         // Write to the file descriptor
1913         r = eventfd_write(fd, val);
1914         if (r < 0) {
1915                 ERROR(jail->pakfire, "Could not send signal: %s\n", strerror(errno));
1916                 r = -errno;
1917         }
1918
1919         // Close the file descriptor
1920         close(fd);
1921
1922         return r;
1923 }
1924
1925 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1926         uint64_t val = 0;
1927         int r = 0;
1928
1929         DEBUG(jail->pakfire, "Waiting for signal...\n");
1930
1931         r = eventfd_read(fd, &val);
1932         if (r < 0) {
1933                 ERROR(jail->pakfire, "Error waiting for signal: %s\n", strerror(errno));
1934                 r = -errno;
1935         }
1936
1937         // Close the file descriptor
1938         close(fd);
1939
1940         return r;
1941 }
1942
1943 /*
1944         Performs the initialisation that needs to happen in the parent part
1945 */
1946 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1947         int r;
1948
1949         // Setup UID mapping
1950         r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
1951         if (r)
1952                 return r;
1953
1954         // Write "deny" to /proc/PID/setgroups
1955         r = pakfire_jail_setgroups(jail, ctx->pid);
1956         if (r)
1957                 return r;
1958
1959         // Setup GID mapping
1960         r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
1961         if (r)
1962                 return r;
1963
1964         // Parent has finished initialisation
1965         DEBUG(jail->pakfire, "Parent has finished initialization\n");
1966
1967         // Send signal to client
1968         r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1969         if (r)
1970                 return r;
1971
1972         return 0;
1973 }
1974
1975 static int pakfire_jail_switch_root(struct pakfire_jail* jail, const char* root) {
1976         int r;
1977
1978         // Change to the new root
1979         r = chdir(root);
1980         if (r) {
1981                 ERROR(jail->pakfire, "chdir(%s) failed: %m\n", root);
1982                 return r;
1983         }
1984
1985         // Switch Root!
1986         r = pivot_root(".", ".");
1987         if (r) {
1988                 ERROR(jail->pakfire, "Failed changing into the new root directory %s: %m\n", root);
1989                 return r;
1990         }
1991
1992         // Umount the old root
1993         r = umount2(".", MNT_DETACH);
1994         if (r) {
1995                 ERROR(jail->pakfire, "Could not umount the old root filesystem: %m\n");
1996                 return r;
1997         }
1998
1999         return 0;
2000 }
2001
2002 static int pakfire_jail_open_pty(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
2003         int r;
2004
2005         // Allocate a new PTY
2006         ctx->pty.master.fd = posix_openpt(O_RDWR|O_NONBLOCK|O_NOCTTY|O_CLOEXEC);
2007         if (ctx->pty.master.fd < 0)
2008                 return -errno;
2009
2010         // Fetch the path
2011         r = ptsname_r(ctx->pty.master.fd, ctx->pty.console, sizeof(ctx->pty.console));
2012         if (r)
2013                 return -r;
2014
2015         CTX_DEBUG(jail->ctx, "Allocated console at %s (%d)\n", ctx->pty.console, ctx->pty.master.fd);
2016
2017         // Unlock the master device
2018         r = unlockpt(ctx->pty.master.fd);
2019         if (r) {
2020                 CTX_ERROR(jail->ctx, "Could not unlock the PTY: %s\n", strerror(errno));
2021                 return -errno;
2022         }
2023
2024         // Create a symlink
2025         r = pakfire_symlink(jail->ctx, ctx->pty.console, "/dev/console");
2026         if (r)
2027                 return r;
2028
2029         return r;
2030 }
2031
2032 static int pakfire_jail_setup_terminal(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
2033         int fd;
2034         int r;
2035
2036         // Open a new terminal
2037         fd = open("/dev/console", O_RDWR|O_NOCTTY);
2038         if (fd < 0) {
2039                 CTX_ERROR(jail->ctx, "Failed to open a new terminal: %s\n", strerror(errno));
2040                 return -errno;
2041         }
2042
2043         CTX_DEBUG(jail->ctx, "Opened a new terminal %d\n", fd);
2044
2045         // Connect the new terminal to standard input
2046         r = dup2(fd, STDIN_FILENO);
2047         if (r < 0) {
2048                 CTX_ERROR(jail->ctx, "Failed to open standard input: %s\n", strerror(errno));
2049                 return -errno;
2050         }
2051
2052         // Connect the new terminal to standard output
2053         r = dup2(fd, STDOUT_FILENO);
2054         if (r < 0) {
2055                 CTX_ERROR(jail->ctx, "Failed to open standard output: %s\n", strerror(errno));
2056                 return -errno;
2057         }
2058
2059         // Connect the new terminal to standard error
2060         r = dup2(fd, STDERR_FILENO);
2061         if (r < 0) {
2062                 CTX_ERROR(jail->ctx, "Failed to open standard error: %s\n", strerror(errno));
2063                 return -errno;
2064         }
2065
2066         return 0;
2067 }
2068
2069 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
2070                 const char* argv[]) {
2071         int r;
2072
2073         // Redirect any logging to our log pipe
2074         pakfire_ctx_set_log_callback(jail->ctx, pakfire_jail_log_redirect, &ctx->pipes);
2075
2076         // Fetch my own PID
2077         pid_t pid = getpid();
2078
2079         DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
2080
2081         // Wait for the parent to finish initialization
2082         r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
2083         if (r)
2084                 return r;
2085
2086         // Die with parent
2087         r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
2088         if (r) {
2089                 ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
2090                 return 126;
2091         }
2092
2093         // Make this process dumpable
2094         r = prctl (PR_SET_DUMPABLE, 1, 0, 0, 0);
2095         if (r) {
2096                 ERROR(jail->pakfire, "Could not make the process dumpable: %m\n");
2097                 return 126;
2098         }
2099
2100         // Don't drop any capabilities on setuid()
2101         r = prctl(PR_SET_KEEPCAPS, 1);
2102         if (r) {
2103                 ERROR(jail->pakfire, "Could not set PR_SET_KEEPCAPS: %m\n");
2104                 return 126;
2105         }
2106
2107         // Fetch UID/GID
2108         uid_t uid = getuid();
2109         gid_t gid = getgid();
2110
2111         // Fetch EUID/EGID
2112         uid_t euid = geteuid();
2113         gid_t egid = getegid();
2114
2115         DEBUG(jail->pakfire, "  UID: %u (effective %u)\n", uid, euid);
2116         DEBUG(jail->pakfire, "  GID: %u (effective %u)\n", gid, egid);
2117
2118         // Log all mountpoints
2119         pakfire_mount_list(jail->ctx);
2120
2121         // Fail if we are not PID 1
2122         if (pid != 1) {
2123                 CTX_ERROR(jail->ctx, "Child process is not PID 1\n");
2124                 return 126;
2125         }
2126
2127         // Fail if we are not running as root
2128         if (uid || gid || euid || egid) {
2129                 ERROR(jail->pakfire, "Child process is not running as root\n");
2130                 return 126;
2131         }
2132
2133         const int socket_send = pakfire_jail_get_pipe_to_write(jail, &ctx->socket);
2134
2135         // Mount all default stuff
2136         r = pakfire_mount_all(jail->pakfire, PAKFIRE_MNTNS_INNER, 0);
2137         if (r)
2138                 return 126;
2139
2140         const char* root = pakfire_get_path(jail->pakfire);
2141         const char* arch = pakfire_get_effective_arch(jail->pakfire);
2142
2143         // Change mount propagation to slave to receive anything from the parent namespace
2144         r = pakfire_mount_change_propagation(jail->ctx, "/", MS_SLAVE);
2145         if (r)
2146                 return r;
2147
2148         // Make root a mountpoint in the new mount namespace
2149         r = pakfire_mount_make_mounpoint(jail->pakfire, root);
2150         if (r)
2151                 return r;
2152
2153         // Change mount propagation to private
2154         r = pakfire_mount_change_propagation(jail->ctx, root, MS_PRIVATE);
2155         if (r)
2156                 return r;
2157
2158         // Change root (unless root is /)
2159         if (!pakfire_on_root(jail->pakfire)) {
2160                 // Mount everything
2161                 r = pakfire_jail_mount(jail, ctx);
2162                 if (r)
2163                         return r;
2164
2165                 // chroot()
2166                 r = pakfire_jail_switch_root(jail, root);
2167                 if (r)
2168                         return r;
2169         }
2170
2171         // Set personality
2172         unsigned long persona = pakfire_arch_personality(arch);
2173         if (persona) {
2174                 r = personality(persona);
2175                 if (r < 0) {
2176                         ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
2177                         return 1;
2178                 }
2179         }
2180
2181         // Setup networking
2182         if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
2183                 r = pakfire_jail_setup_loopback(jail);
2184                 if (r)
2185                         return 1;
2186         }
2187
2188         // Set nice level
2189         if (jail->nice) {
2190                 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
2191
2192                 r = setpriority(PRIO_PROCESS, pid, jail->nice);
2193                 if (r) {
2194                         ERROR(jail->pakfire, "Could not set nice level: %m\n");
2195                         return 1;
2196                 }
2197         }
2198
2199         // Create a new session
2200         r = setsid();
2201         if (r < 0) {
2202                 CTX_ERROR(jail->ctx, "Could not create a new session: %s\n", strerror(errno));
2203                 return r;
2204         }
2205
2206         // Allocate a new PTY
2207         r = pakfire_jail_open_pty(jail, ctx);
2208         if (r) {
2209                 CTX_ERROR(jail->ctx, "Could not allocate a new PTY: %s\n", strerror(-r));
2210                 return r;
2211         }
2212
2213         // Send the PTY master to the parent process
2214         r = pakfire_jail_send_fd(jail, socket_send, ctx->pty.master.fd);
2215         if (r) {
2216                 CTX_ERROR(jail->ctx, "Failed sending the PTY master to the parent: %s\n", strerror(-r));
2217                 return r;
2218         }
2219
2220         // Setup the terminal
2221         r = pakfire_jail_setup_terminal(jail, ctx);
2222         if (r)
2223                 return r;
2224
2225         // Close the master of the PTY
2226         close(ctx->pty.master.fd);
2227         ctx->pty.master.fd = -1;
2228
2229         // Close the socket
2230         close(socket_send);
2231
2232         // Close other end of log pipes
2233         close(ctx->pipes.log_INFO[0]);
2234         close(ctx->pipes.log_ERROR[0]);
2235 #ifdef ENABLE_DEBUG
2236         close(ctx->pipes.log_DEBUG[0]);
2237 #endif /* ENABLE_DEBUG */
2238
2239         // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
2240         r = pakfire_rlimit_reset_nofile(jail->pakfire);
2241         if (r)
2242                 return r;
2243
2244         // Set capabilities
2245         r = pakfire_jail_set_capabilities(jail);
2246         if (r)
2247                 return r;
2248
2249         // Show capabilities
2250         r = pakfire_jail_show_capabilities(jail);
2251         if (r)
2252                 return r;
2253
2254         // Filter syscalls
2255         r = pakfire_jail_limit_syscalls(jail);
2256         if (r)
2257                 return r;
2258
2259         DEBUG(jail->pakfire, "Child process initialization done\n");
2260         DEBUG(jail->pakfire, "Launching command:\n");
2261
2262         // Log argv
2263         for (unsigned int i = 0; argv[i]; i++)
2264                 DEBUG(jail->pakfire, "  argv[%u] = %s\n", i, argv[i]);
2265
2266         // exec() command
2267         r = execvpe(argv[0], (char**)argv, jail->env);
2268         if (r < 0) {
2269                 // Translate errno into regular exit code
2270                 switch (errno) {
2271                         case ENOENT:
2272                                 // Ignore if the command doesn't exist
2273                                 if (ctx->flags & PAKFIRE_JAIL_NOENT_OK)
2274                                         r = 0;
2275                                 else
2276                                         r = 127;
2277
2278                                 break;
2279
2280                         default:
2281                                 r = 1;
2282                 }
2283
2284                 ERROR(jail->pakfire, "Could not execve(%s): %m\n", argv[0]);
2285         }
2286
2287         // We should not get here
2288         return r;
2289 }
2290
2291 // Run a command in the jail
2292 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
2293                 pakfire_jail_communicate_in  communicate_in,
2294                 pakfire_jail_communicate_out communicate_out,
2295                 void* data, int flags) {
2296         int exit = -1;
2297         int r;
2298
2299         // Check if argv is valid
2300         if (!argv || !argv[0]) {
2301                 errno = EINVAL;
2302                 return -1;
2303         }
2304
2305         // Initialize context for this call
2306         struct pakfire_jail_exec ctx = {
2307                 .flags = flags,
2308
2309                 .socket = { -1, -1 },
2310
2311                 .pipes = {
2312                         .log_INFO  = { -1, -1 },
2313                         .log_ERROR = { -1, -1 },
2314 #ifdef ENABLE_DEBUG
2315                         .log_DEBUG = { -1, -1 },
2316 #endif /* ENABLE_DEBUG */
2317                 },
2318
2319                 .communicate = {
2320                         .in   = communicate_in,
2321                         .out  = communicate_out,
2322                         .data = data,
2323                 },
2324
2325                 .pidfd = -1,
2326
2327                 // PTY
2328                 .pty = {
2329                         .master = {
2330                                 .fd = -1,
2331                         },
2332                         .stdin = {
2333                                 .fd = -1,
2334                         },
2335                         .stdout = {
2336                                 .fd = -1,
2337                         },
2338                 },
2339         };
2340
2341         DEBUG(jail->pakfire, "Executing jail...\n");
2342
2343         // Enable networking in interactive mode
2344         if (ctx.flags & PAKFIRE_JAIL_PTY_FORWARDING)
2345                 ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
2346
2347         /*
2348                 Setup a file descriptor which can be used to notify the client that the parent
2349                 has completed configuration.
2350         */
2351         ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
2352         if (ctx.completed_fd < 0) {
2353                 ERROR(jail->pakfire, "eventfd() failed: %m\n");
2354                 return -1;
2355         }
2356
2357         // Create a UNIX domain socket
2358         r = socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, ctx.socket);
2359         if (r < 0) {
2360                 CTX_ERROR(jail->ctx, "Could not create UNIX socket: %s\n", strerror(errno));
2361                 r = -errno;
2362                 goto ERROR;
2363         }
2364
2365         // Setup pipes for logging
2366         // INFO
2367         r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
2368         if (r)
2369                 goto ERROR;
2370
2371         // ERROR
2372         r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
2373         if (r)
2374                 goto ERROR;
2375
2376 #ifdef ENABLE_DEBUG
2377         // DEBUG
2378         r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
2379         if (r)
2380                 goto ERROR;
2381 #endif /* ENABLE_DEBUG */
2382
2383         // Configure child process
2384         struct clone_args args = {
2385                 .flags =
2386                         CLONE_NEWCGROUP |
2387                         CLONE_NEWIPC |
2388                         CLONE_NEWNS |
2389                         CLONE_NEWPID |
2390                         CLONE_NEWTIME |
2391                         CLONE_NEWUSER |
2392                         CLONE_NEWUTS |
2393                         CLONE_PIDFD,
2394                 .exit_signal = SIGCHLD,
2395                 .pidfd = (long long unsigned int)&ctx.pidfd,
2396         };
2397
2398         // Launch the process in a cgroup that is a leaf of the configured cgroup
2399         if (jail->cgroup) {
2400                 args.flags |= CLONE_INTO_CGROUP;
2401
2402                 // Fetch our UUID
2403                 const char* uuid = pakfire_jail_uuid(jail);
2404
2405                 // Create a temporary cgroup
2406                 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, uuid, 0);
2407                 if (r) {
2408                         ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
2409                         goto ERROR;
2410                 }
2411
2412                 // Clone into this cgroup
2413                 args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
2414         }
2415
2416         // Setup networking
2417         if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
2418                 args.flags |= CLONE_NEWNET;
2419         }
2420
2421         // Fork this process
2422         ctx.pid = clone3(&args, sizeof(args));
2423         if (ctx.pid < 0) {
2424                 ERROR(jail->pakfire, "Could not clone: %m\n");
2425                 return -1;
2426
2427         // Child process
2428         } else if (ctx.pid == 0) {
2429                 r = pakfire_jail_child(jail, &ctx, argv);
2430                 _exit(r);
2431         }
2432
2433         // Parent process
2434         r = pakfire_jail_parent(jail, &ctx);
2435         if (r)
2436                 goto ERROR;
2437
2438         DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
2439
2440         // Read output of the child process
2441         r = pakfire_jail_wait(jail, &ctx);
2442         if (r)
2443                 goto ERROR;
2444
2445         // Handle exit status
2446         switch (ctx.status.si_code) {
2447                 case CLD_EXITED:
2448                         DEBUG(jail->pakfire, "The child process exited with code %d\n",
2449                                 ctx.status.si_status);
2450
2451                         // Pass exit code
2452                         exit = ctx.status.si_status;
2453                         break;
2454
2455                 case CLD_KILLED:
2456                         ERROR(jail->pakfire, "The child process was killed\n");
2457                         exit = 139;
2458                         break;
2459
2460                 case CLD_DUMPED:
2461                         ERROR(jail->pakfire, "The child process terminated abnormally\n");
2462                         break;
2463
2464                 // Log anything else
2465                 default:
2466                         ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
2467                         break;
2468         }
2469
2470 ERROR:
2471         // Destroy the temporary cgroup (if any)
2472         if (ctx.cgroup) {
2473                 // Read cgroup stats
2474                 pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
2475                 pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
2476                 pakfire_cgroup_destroy(ctx.cgroup);
2477                 pakfire_cgroup_unref(ctx.cgroup);
2478         }
2479
2480         // Close any file descriptors
2481         if (ctx.pidfd >= 0)
2482                 close(ctx.pidfd);
2483         if (ctx.pty.master.fd >= 0)
2484                 close(ctx.pty.master.fd);
2485         pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
2486         pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
2487 #ifdef ENABLE_DEBUG
2488         pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
2489 #endif /* ENABLE_DEBUG */
2490         pakfire_jail_close_pipe(jail, ctx.socket);
2491
2492         return exit;
2493 }
2494
2495 PAKFIRE_EXPORT int pakfire_jail_exec(
2496                 struct pakfire_jail* jail,
2497                 const char* argv[],
2498                 pakfire_jail_communicate_in  callback_in,
2499                 pakfire_jail_communicate_out callback_out,
2500                 void* data, int flags) {
2501         return __pakfire_jail_exec(jail, argv, callback_in, callback_out, data, flags);
2502 }
2503
2504 static int pakfire_jail_exec_interactive(
2505                 struct pakfire_jail* jail, const char* argv[], int flags) {
2506         int r;
2507
2508         flags |= PAKFIRE_JAIL_PTY_FORWARDING;
2509
2510         // Setup interactive stuff
2511         r = pakfire_jail_setup_interactive_env(jail);
2512         if (r)
2513                 return r;
2514
2515         return __pakfire_jail_exec(jail, argv, NULL, NULL, NULL, flags);
2516 }
2517
2518 int pakfire_jail_exec_script(struct pakfire_jail* jail,
2519                 const char* script,
2520                 const size_t size,
2521                 const char* args[],
2522                 pakfire_jail_communicate_in  callback_in,
2523                 pakfire_jail_communicate_out callback_out,
2524                 void* data) {
2525         char path[PATH_MAX];
2526         const char** argv = NULL;
2527         FILE* f = NULL;
2528         int r;
2529
2530         const char* root = pakfire_get_path(jail->pakfire);
2531
2532         // Write the scriptlet to disk
2533         r = pakfire_path_append(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
2534         if (r)
2535                 goto ERROR;
2536
2537         // Create a temporary file
2538         f = pakfire_mktemp(path, 0700);
2539         if (!f) {
2540                 ERROR(jail->pakfire, "Could not create temporary file: %m\n");
2541                 goto ERROR;
2542         }
2543
2544         DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
2545
2546         // Write data
2547         r = fprintf(f, "%s", script);
2548         if (r < 0) {
2549                 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
2550                 goto ERROR;
2551         }
2552
2553         // Close file
2554         r = fclose(f);
2555         if (r) {
2556                 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
2557                 goto ERROR;
2558         }
2559
2560         f = NULL;
2561
2562         // Count how many arguments were passed
2563         unsigned int argc = 1;
2564         if (args) {
2565                 for (const char** arg = args; *arg; arg++)
2566                         argc++;
2567         }
2568
2569         argv = calloc(argc + 1, sizeof(*argv));
2570         if (!argv) {
2571                 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
2572                 goto ERROR;
2573         }
2574
2575         // Set command
2576         argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
2577
2578         // Copy args
2579         for (unsigned int i = 1; i < argc; i++)
2580                 argv[i] = args[i-1];
2581
2582         // Run the script
2583         r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data, 0);
2584
2585 ERROR:
2586         if (argv)
2587                 free(argv);
2588         if (f)
2589                 fclose(f);
2590
2591         // Remove script from disk
2592         if (*path)
2593                 unlink(path);
2594
2595         return r;
2596 }
2597
2598 /*
2599         A convenience function that creates a new jail, runs the given command and destroys
2600         the jail again.
2601 */
2602 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
2603         struct pakfire_jail* jail = NULL;
2604         int r;
2605
2606         // Create a new jail
2607         r = pakfire_jail_create(&jail, pakfire);
2608         if (r)
2609                 goto ERROR;
2610
2611         // Execute the command
2612         r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output, 0);
2613
2614 ERROR:
2615         if (jail)
2616                 pakfire_jail_unref(jail);
2617
2618         return r;
2619 }
2620
2621 int pakfire_jail_run_script(struct pakfire* pakfire,
2622                 const char* script, const size_t length, const char* argv[], int flags) {
2623         struct pakfire_jail* jail = NULL;
2624         int r;
2625
2626         // Create a new jail
2627         r = pakfire_jail_create(&jail, pakfire);
2628         if (r)
2629                 goto ERROR;
2630
2631         // Execute the command
2632         r = pakfire_jail_exec_script(jail, script, length, argv, NULL, NULL, NULL);
2633
2634 ERROR:
2635         if (jail)
2636                 pakfire_jail_unref(jail);
2637
2638         return r;
2639 }
2640
2641 int pakfire_jail_shell(struct pakfire_jail* jail) {
2642         int r;
2643
2644         const char* argv[] = {
2645                 "/bin/bash", "--login", NULL,
2646         };
2647
2648         // Execute /bin/bash
2649         r = pakfire_jail_exec_interactive(jail, argv, 0);
2650
2651         // Raise any errors
2652         if (r < 0)
2653                 return r;
2654
2655         // Ignore any return codes from the shell
2656         return 0;
2657 }
2658
2659 static int pakfire_jail_run_if_possible(struct pakfire* pakfire, const char** argv) {
2660         char path[PATH_MAX];
2661         int r;
2662
2663         r = pakfire_path(pakfire, path, "%s", *argv);
2664         if (r)
2665                 return r;
2666
2667         // Check if the file is executable
2668         r = access(path, X_OK);
2669         if (r) {
2670                 DEBUG(pakfire, "%s is not executable. Skipping...\n", *argv);
2671                 return 0;
2672         }
2673
2674         return pakfire_jail_run(pakfire, argv, 0, NULL);
2675 }
2676
2677 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
2678         const char* argv[] = {
2679                 "/sbin/ldconfig",
2680                 NULL,
2681         };
2682
2683         return pakfire_jail_run_if_possible(pakfire, argv);
2684 }
2685
2686 int pakfire_jail_run_systemd_tmpfiles(struct pakfire* pakfire) {
2687         const char* argv[] = {
2688                 "/usr/bin/systemd-tmpfiles",
2689                 "--create",
2690                 NULL,
2691         };
2692
2693         return pakfire_jail_run_if_possible(pakfire, argv);
2694 }