src/libpakfire/jail.c

   1 /*#############################################################################
   2 #                                                                             #
   3 # Pakfire - The IPFire package management system                              #
   4 # Copyright (C) 2022 Pakfire development team                                 #
   5 #                                                                             #
   6 # This program is free software: you can redistribute it and/or modify        #
   7 # it under the terms of the GNU General Public License as published by        #
   8 # the Free Software Foundation, either version 3 of the License, or           #
   9 # (at your option) any later version.                                         #
  10 #                                                                             #
  11 # This program is distributed in the hope that it will be useful,             #
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of              #
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
  14 # GNU General Public License for more details.                                #
  15 #                                                                             #
  16 # You should have received a copy of the GNU General Public License           #
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
  18 #                                                                             #
  19 #############################################################################*/
  20
  21 #include <errno.h>
  22 #include <fcntl.h>
  23 #include <linux/capability.h>
  24 #include <linux/sched.h>
  25 #include <sys/wait.h>
  26 #include <linux/wait.h>
  27 #include <sched.h>
  28 #include <signal.h>
  29 #include <stdlib.h>
  30 #include <syscall.h>
  31 #include <sys/capability.h>
  32 #include <sys/epoll.h>
  33 #include <sys/eventfd.h>
  34 #include <sys/mount.h>
  35 #include <sys/personality.h>
  36 #include <sys/prctl.h>
  37 #include <sys/resource.h>
  38 #include <sys/signalfd.h>
  39 #include <sys/timerfd.h>
  40 #include <sys/types.h>
  41 #include <sys/wait.h>
  42
  43 // libnl3
  44 #include <net/if.h>
  45 #include <netlink/route/link.h>
  46
  47 // libseccomp
  48 #include <seccomp.h>
  49
  50 // libuuid
  51 #include <uuid.h>
  52
  53 #include <pakfire/arch.h>
  54 #include <pakfire/cgroup.h>
  55 #include <pakfire/jail.h>
  56 #include <pakfire/logging.h>
  57 #include <pakfire/mount.h>
  58 #include <pakfire/pakfire.h>
  59 #include <pakfire/private.h>
  60 #include <pakfire/pwd.h>
  61 #include <pakfire/string.h>
  62 #include <pakfire/util.h>
  63
  64 #define BUFFER_SIZE      1024 * 64
  65 #define ENVIRON_SIZE     128
  66 #define EPOLL_MAX_EVENTS 2
  67 #define MAX_MOUNTPOINTS  8
  68
  69 // The default environment that will be set for every command
  70 static const struct environ {
  71         const char* key;
  72         const char* val;
  73 } ENV[] = {
  74         { "HOME", "/root" },
  75         { "LANG", "C.utf-8" },
  76         { "PATH", "/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin", },
  77         { "TERM", "vt100" },
  78
  79         // Tell everything that it is running inside a Pakfire container
  80         { "container", "pakfire" },
  81         { NULL, NULL },
  82 };
  83
  84 struct pakfire_jail_mountpoint {
  85         char source[PATH_MAX];
  86         char target[PATH_MAX];
  87         int flags;
  88 };
  89
  90 struct pakfire_jail {
  91         struct pakfire* pakfire;
  92         int nrefs;
  93
  94         // A unique ID for each jail
  95         uuid_t uuid;
  96         char __uuid[UUID_STR_LEN];
  97
  98         // Resource Limits
  99         int nice;
 100
 101         // Timeout
 102         struct itimerspec timeout;
 103
 104         // CGroup
 105         struct pakfire_cgroup* cgroup;
 106
 107         // Environment
 108         char* env[ENVIRON_SIZE];
 109
 110         // Mountpoints
 111         struct pakfire_jail_mountpoint mountpoints[MAX_MOUNTPOINTS];
 112         unsigned int num_mountpoints;
 113 };
 114
 115 struct pakfire_log_buffer {
 116         char data[BUFFER_SIZE];
 117         size_t used;
 118 };
 119
 120 struct pakfire_jail_exec {
 121         int flags;
 122
 123         // PID (of the child)
 124         pid_t pid;
 125         int pidfd;
 126
 127         // Process status (from waitid)
 128         siginfo_t status;
 129
 130         // FD to notify the client that the parent has finished initialization
 131         int completed_fd;
 132
 133         // Log pipes
 134         struct pakfire_jail_pipes {
 135                 int stdin[2];
 136                 int stdout[2];
 137                 int stderr[2];
 138
 139                 // Logging
 140                 int log_INFO[2];
 141                 int log_ERROR[2];
 142                 int log_DEBUG[2];
 143         } pipes;
 144
 145         // Communicate
 146         struct pakfire_jail_communicate {
 147                 pakfire_jail_communicate_in  in;
 148                 pakfire_jail_communicate_out out;
 149                 void* data;
 150         } communicate;
 151
 152         // Log buffers
 153         struct pakfire_jail_buffers {
 154                 struct pakfire_log_buffer stdout;
 155                 struct pakfire_log_buffer stderr;
 156
 157                 // Logging
 158                 struct pakfire_log_buffer log_INFO;
 159                 struct pakfire_log_buffer log_ERROR;
 160                 struct pakfire_log_buffer log_DEBUG;
 161         } buffers;
 162
 163         struct pakfire_cgroup* cgroup;
 164         struct pakfire_cgroup_stats cgroup_stats;
 165 };
 166
 167 static int clone3(struct clone_args* args, size_t size) {
 168         return syscall(__NR_clone3, args, size);
 169 }
 170
 171 static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) {
 172         return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
 173 }
 174
 175 static int pakfire_jail_exec_has_flag(
 176                 const struct pakfire_jail_exec* ctx, const enum pakfire_jail_exec_flags flag) {
 177         return ctx->flags & flag;
 178 }
 179
 180 static void pakfire_jail_free(struct pakfire_jail* jail) {
 181         DEBUG(jail->pakfire, "Freeing jail at %p\n", jail);
 182
 183         // Free environment
 184         for (unsigned int i = 0; jail->env[i]; i++)
 185                 free(jail->env[i]);
 186
 187         if (jail->cgroup)
 188                 pakfire_cgroup_unref(jail->cgroup);
 189
 190         pakfire_unref(jail->pakfire);
 191         free(jail);
 192 }
 193
 194 /*
 195         Passes any log messages on to the default pakfire log callback
 196 */
 197 static int pakfire_jail_default_log_callback(struct pakfire* pakfire, void* data,
 198                 int priority, const char* line, size_t length) {
 199         switch (priority) {
 200                 case LOG_INFO:
 201                         INFO(pakfire, "%s", line);
 202                         break;
 203
 204                 case LOG_ERR:
 205                         ERROR(pakfire, "%s", line);
 206                         break;
 207
 208 #ifdef ENABLE_DEBUG
 209                 case LOG_DEBUG:
 210                         DEBUG(pakfire, "%s", line);
 211                         break;
 212 #endif
 213         }
 214
 215         return 0;
 216 }
 217
 218 static const char* pakfire_jail_uuid(struct pakfire_jail* jail) {
 219         if (!*jail->__uuid)
 220                 uuid_unparse_lower(jail->uuid, jail->__uuid);
 221
 222         return jail->__uuid;
 223 }
 224
 225 static int pakfire_jail_setup_interactive_env(struct pakfire_jail* jail) {
 226         // Set PS1
 227         int r = pakfire_jail_set_env(jail, "PS1", "pakfire-jail \\w> ");
 228         if (r)
 229                 return r;
 230
 231         // Copy TERM
 232         char* TERM = secure_getenv("TERM");
 233         if (TERM) {
 234                 r = pakfire_jail_set_env(jail, "TERM", TERM);
 235                 if (r)
 236                         return r;
 237         }
 238
 239         // Copy LANG
 240         char* LANG = secure_getenv("LANG");
 241         if (LANG) {
 242                 r = pakfire_jail_set_env(jail, "LANG", LANG);
 243                 if (r)
 244                         return r;
 245         }
 246
 247         return 0;
 248 }
 249
 250 PAKFIRE_EXPORT int pakfire_jail_create(struct pakfire_jail** jail, struct pakfire* pakfire) {
 251         int r;
 252
 253         const char* arch = pakfire_get_arch(pakfire);
 254
 255         // Allocate a new jail
 256         struct pakfire_jail* j = calloc(1, sizeof(*j));
 257         if (!j)
 258                 return 1;
 259
 260         // Reference Pakfire
 261         j->pakfire = pakfire_ref(pakfire);
 262
 263         // Initialize reference counter
 264         j->nrefs = 1;
 265
 266         // Generate a random UUID
 267         uuid_generate_random(j->uuid);
 268
 269         DEBUG(j->pakfire, "Allocated new jail at %p\n", j);
 270
 271         // Set default environment
 272         for (const struct environ* e = ENV; e->key; e++) {
 273                 r = pakfire_jail_set_env(j, e->key, e->val);
 274                 if (r)
 275                         goto ERROR;
 276         }
 277
 278         // Enable all CPU features that CPU has to offer
 279         if (!pakfire_arch_supported_by_host(arch)) {
 280                 r = pakfire_jail_set_env(j, "QEMU_CPU", "max");
 281                 if (r)
 282                         goto ERROR;
 283         }
 284
 285         // Set container UUID
 286         r = pakfire_jail_set_env(j, "container_uuid", pakfire_jail_uuid(j));
 287         if (r)
 288                 goto ERROR;
 289
 290         // Disable systemctl to talk to systemd
 291         if (!pakfire_on_root(j->pakfire)) {
 292                 r = pakfire_jail_set_env(j, "SYSTEMD_OFFLINE", "1");
 293                 if (r)
 294                         goto ERROR;
 295         }
 296
 297         // Done
 298         *jail = j;
 299         return 0;
 300
 301 ERROR:
 302         pakfire_jail_free(j);
 303
 304         return r;
 305 }
 306
 307 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_ref(struct pakfire_jail* jail) {
 308         ++jail->nrefs;
 309
 310         return jail;
 311 }
 312
 313 PAKFIRE_EXPORT struct pakfire_jail* pakfire_jail_unref(struct pakfire_jail* jail) {
 314         if (--jail->nrefs > 0)
 315                 return jail;
 316
 317         pakfire_jail_free(jail);
 318         return NULL;
 319 }
 320
 321 // Resource Limits
 322
 323 PAKFIRE_EXPORT int pakfire_jail_nice(struct pakfire_jail* jail, int nice) {
 324         // Check if nice level is in range
 325         if (nice < -19 || nice > 20) {
 326                 errno = EINVAL;
 327                 return 1;
 328         }
 329
 330         // Store nice level
 331         jail->nice = nice;
 332
 333         return 0;
 334 }
 335
 336 int pakfire_jail_set_cgroup(struct pakfire_jail* jail, struct pakfire_cgroup* cgroup) {
 337         // Free any previous cgroup
 338         if (jail->cgroup) {
 339                 pakfire_cgroup_unref(jail->cgroup);
 340                 jail->cgroup = NULL;
 341         }
 342
 343         // Set any new cgroup
 344         if (cgroup) {
 345                 DEBUG(jail->pakfire, "Setting cgroup %p\n", cgroup);
 346
 347                 jail->cgroup = pakfire_cgroup_ref(cgroup);
 348         }
 349
 350         // Done
 351         return 0;
 352 }
 353
 354 // Environment
 355
 356 // Returns the length of the environment
 357 static unsigned int pakfire_jail_env_length(struct pakfire_jail* jail) {
 358         unsigned int i = 0;
 359
 360         // Count everything in the environment
 361         for (char** e = jail->env; *e; e++)
 362                 i++;
 363
 364         return i;
 365 }
 366
 367 // Finds an existing environment variable and returns its index or -1 if not found
 368 static int pakfire_jail_find_env(struct pakfire_jail* jail, const char* key) {
 369         if (!key) {
 370                 errno = EINVAL;
 371                 return -1;
 372         }
 373
 374         const size_t length = strlen(key);
 375
 376         for (unsigned int i = 0; jail->env[i]; i++) {
 377                 if ((pakfire_string_startswith(jail->env[i], key)
 378                                 && *(jail->env[i] + length) == '=')) {
 379                         return i;
 380                 }
 381         }
 382
 383         // Nothing found
 384         return -1;
 385 }
 386
 387 // Returns the value of an environment variable or NULL
 388 PAKFIRE_EXPORT const char* pakfire_jail_get_env(struct pakfire_jail* jail,
 389                 const char* key) {
 390         int i = pakfire_jail_find_env(jail, key);
 391         if (i < 0)
 392                 return NULL;
 393
 394         return jail->env[i] + strlen(key) + 1;
 395 }
 396
 397 // Sets an environment variable
 398 PAKFIRE_EXPORT int pakfire_jail_set_env(struct pakfire_jail* jail,
 399                 const char* key, const char* value) {
 400         // Find the index where to write this value to
 401         int i = pakfire_jail_find_env(jail, key);
 402         if (i < 0)
 403                 i = pakfire_jail_env_length(jail);
 404
 405         // Return -ENOSPC when the environment is full
 406         if (i >= ENVIRON_SIZE) {
 407                 errno = ENOSPC;
 408                 return -1;
 409         }
 410
 411         // Free any previous value
 412         if (jail->env[i])
 413                 free(jail->env[i]);
 414
 415         // Format and set environment variable
 416         asprintf(&jail->env[i], "%s=%s", key, value);
 417
 418         DEBUG(jail->pakfire, "Set environment variable: %s\n", jail->env[i]);
 419
 420         return 0;
 421 }
 422
 423 // Imports an environment
 424 PAKFIRE_EXPORT int pakfire_jail_import_env(struct pakfire_jail* jail, const char* env[]) {
 425         if (!env)
 426                 return 0;
 427
 428         char* key;
 429         char* val;
 430         int r;
 431
 432         // Copy environment variables
 433         for (unsigned int i = 0; env[i]; i++) {
 434                 r = pakfire_string_partition(env[i], "=", &key, &val);
 435                 if (r)
 436                         continue;
 437
 438                 // Set value
 439                 r = pakfire_jail_set_env(jail, key, val);
 440
 441                 if (key)
 442                         free(key);
 443                 if (val)
 444                         free(val);
 445
 446                 // Break on error
 447                 if (r)
 448                         return r;
 449         }
 450
 451         return 0;
 452 }
 453
 454 // Timeout
 455
 456 PAKFIRE_EXPORT int pakfire_jail_set_timeout(
 457                 struct pakfire_jail* jail, unsigned int timeout) {
 458         // Store value
 459         jail->timeout.it_value.tv_sec = timeout;
 460
 461         if (timeout > 0)
 462                 DEBUG(jail->pakfire, "Timeout set to %d second(s)\n", timeout);
 463         else
 464                 DEBUG(jail->pakfire, "Timeout disabled\n");
 465
 466         return 0;
 467 }
 468
 469 static int pakfire_jail_create_timer(struct pakfire_jail* jail) {
 470         int r;
 471
 472         // Nothing to do if no timeout has been set
 473         if (!jail->timeout.it_value.tv_sec)
 474                 return -1;
 475
 476         // Create a new timer
 477         const int fd = timerfd_create(CLOCK_MONOTONIC, 0);
 478         if (fd < 0) {
 479                 ERROR(jail->pakfire, "Could not create timer: %m\n");
 480                 goto ERROR;
 481         }
 482
 483         // Arm timer
 484         r = timerfd_settime(fd, 0, &jail->timeout, NULL);
 485         if (r) {
 486                 ERROR(jail->pakfire, "Could not arm timer: %m\n");
 487                 goto ERROR;
 488         }
 489
 490         return fd;
 491
 492 ERROR:
 493         if (fd > 0)
 494                 close(fd);
 495
 496         return -1;
 497 }
 498
 499 // Signals
 500
 501 static int pakfire_jail_handle_signals(struct pakfire_jail* jail) {
 502         sigset_t mask;
 503         int r;
 504
 505         sigemptyset(&mask);
 506         sigaddset(&mask, SIGINT);
 507
 508         // Block signals
 509         r = sigprocmask(SIG_BLOCK, &mask, NULL);
 510         if (r < 0) {
 511                 ERROR(jail->pakfire, "Failed to block signals: %m\n");
 512                 return r;
 513         }
 514
 515         // Create a file descriptor
 516         r = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
 517         if (r < 0) {
 518                 ERROR(jail->pakfire, "Failed to create signalfd: %m\n");
 519                 return r;
 520         }
 521
 522         return r;
 523 }
 524
 525 /*
 526         This function replaces any logging in the child process.
 527
 528         All log messages will be sent to the parent process through their respective pipes.
 529 */
 530 static void pakfire_jail_log(void* data, int priority, const char* file,
 531                 int line, const char* fn, const char* format, va_list args) {
 532         struct pakfire_jail_pipes* pipes = (struct pakfire_jail_pipes*)data;
 533         int fd;
 534
 535         switch (priority) {
 536                 case LOG_INFO:
 537                         fd = pipes->log_INFO[1];
 538                         break;
 539
 540                 case LOG_ERR:
 541                         fd = pipes->log_ERROR[1];
 542                         break;
 543
 544 #ifdef ENABLE_DEBUG
 545                 case LOG_DEBUG:
 546                         fd = pipes->log_DEBUG[1];
 547                         break;
 548 #endif /* ENABLE_DEBUG */
 549
 550                 // Ignore any messages of an unknown priority
 551                 default:
 552                         return;
 553         }
 554
 555         // Send the log message
 556         if (fd)
 557                 vdprintf(fd, format, args);
 558 }
 559
 560 static int pakfire_jail_log_buffer_is_full(const struct pakfire_log_buffer* buffer) {
 561         return (sizeof(buffer->data) == buffer->used);
 562 }
 563
 564 /*
 565         This function reads as much data as it can from the file descriptor.
 566         If it finds a whole line in it, it will send it to the logger and repeat the process.
 567         If not newline character is found, it will try to read more data until it finds one.
 568 */
 569 static int pakfire_jail_handle_log(struct pakfire_jail* jail,
 570                 struct pakfire_jail_exec* ctx, int priority, int fd,
 571                 struct pakfire_log_buffer* buffer, pakfire_jail_communicate_out callback, void* data) {
 572         char line[BUFFER_SIZE + 1];
 573
 574         // Fill up buffer from fd
 575         if (buffer->used < sizeof(buffer->data)) {
 576                 ssize_t bytes_read = read(fd, buffer->data + buffer->used,
 577                                 sizeof(buffer->data) - buffer->used);
 578
 579                 // Handle errors
 580                 if (bytes_read < 0) {
 581                         ERROR(jail->pakfire, "Could not read from fd %d: %m\n", fd);
 582                         return -1;
 583                 }
 584
 585                 // Update buffer size
 586                 buffer->used += bytes_read;
 587         }
 588
 589         // See if we have any lines that we can write
 590         while (buffer->used) {
 591                 // Search for the end of the first line
 592                 char* eol = memchr(buffer->data, '\n', buffer->used);
 593
 594                 // No newline found
 595                 if (!eol) {
 596                         // If the buffer is full, we send the content to the logger and try again
 597                         // This should not happen in practise
 598                         if (pakfire_jail_log_buffer_is_full(buffer)) {
 599                                 DEBUG(jail->pakfire, "Logging buffer is full. Sending all content\n");
 600
 601                                 eol = buffer->data + sizeof(buffer->data) - 1;
 602
 603                         // Otherwise we might have only read parts of the output
 604                         } else
 605                                 break;
 606                 }
 607
 608                 // Find the length of the string
 609                 size_t length = eol - buffer->data + 1;
 610
 611                 // Copy the line into the buffer
 612                 memcpy(line, buffer->data, length);
 613
 614                 // Terminate the string
 615                 line[length] = '\0';
 616
 617                 // Log the line
 618                 if (callback) {
 619                         int r = callback(jail->pakfire, data, priority, line, length);
 620                         if (r) {
 621                                 ERROR(jail->pakfire, "The logging callback returned an error: %d\n", r);
 622                                 return r;
 623                         }
 624                 }
 625
 626                 // Remove line from buffer
 627                 memmove(buffer->data, buffer->data + length, buffer->used - length);
 628                 buffer->used -= length;
 629         }
 630
 631         return 0;
 632 }
 633
 634 static int pakfire_jail_stream_stdin(struct pakfire_jail* jail,
 635                 struct pakfire_jail_exec* ctx, const int fd) {
 636         int r;
 637
 638         // Nothing to do if there is no stdin callback set
 639         if (!ctx->communicate.in) {
 640                 DEBUG(jail->pakfire, "Callback for standard input is not set\n");
 641                 return 0;
 642         }
 643
 644         // Skip if the writing pipe has already been closed
 645         if (!ctx->pipes.stdin[1])
 646                 return 0;
 647
 648         DEBUG(jail->pakfire, "Streaming standard input...\n");
 649
 650         // Calling the callback
 651         r = ctx->communicate.in(jail->pakfire, ctx->communicate.data, fd);
 652
 653         DEBUG(jail->pakfire, "Standard input callback finished: %d\n", r);
 654
 655         // The callback signaled that it has written everything
 656         if (r == EOF) {
 657                 DEBUG(jail->pakfire, "Closing standard input pipe\n");
 658
 659                 // Close the file-descriptor
 660                 close(fd);
 661
 662                 // Reset the file-descriptor so it won't be closed again later
 663                 ctx->pipes.stdin[1] = 0;
 664
 665                 // Report success
 666                 r = 0;
 667         }
 668
 669         return r;
 670 }
 671
 672 static int pakfire_jail_setup_pipe(struct pakfire_jail* jail, int (*fds)[2], const int flags) {
 673         int r = pipe2(*fds, flags);
 674         if (r < 0) {
 675                 ERROR(jail->pakfire, "Could not setup pipe: %m\n");
 676                 return 1;
 677         }
 678
 679         return 0;
 680 }
 681
 682 static void pakfire_jail_close_pipe(struct pakfire_jail* jail, int fds[2]) {
 683         for (unsigned int i = 0; i < 2; i++)
 684                 if (fds[i])
 685                         close(fds[i]);
 686 }
 687
 688 /*
 689         This is a convenience function to fetch the reading end of a pipe and
 690         closes the write end.
 691 */
 692 static int pakfire_jail_get_pipe_to_read(struct pakfire_jail* jail, int (*fds)[2]) {
 693         // Give the variables easier names to avoid confusion
 694         int* fd_read  = &(*fds)[0];
 695         int* fd_write = &(*fds)[1];
 696
 697         // Close the write end of the pipe
 698         if (*fd_write) {
 699                 close(*fd_write);
 700                 *fd_write = -1;
 701         }
 702
 703         // Return the read end
 704         return *fd_read;
 705 }
 706
 707 static int pakfire_jail_get_pipe_to_write(struct pakfire_jail* jail, int (*fds)[2]) {
 708         // Give the variables easier names to avoid confusion
 709         int* fd_read  = &(*fds)[0];
 710         int* fd_write = &(*fds)[1];
 711
 712         // Close the read end of the pipe
 713         if (*fd_read) {
 714                 close(*fd_read);
 715                 *fd_read = -1;
 716         }
 717
 718         // Return the write end
 719         return *fd_write;
 720 }
 721
 722 static int pakfire_jail_wait(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
 723         int epollfd = -1;
 724         struct epoll_event ev;
 725         struct epoll_event events[EPOLL_MAX_EVENTS];
 726         struct signalfd_siginfo siginfo;
 727         char garbage[8];
 728         int r = 0;
 729
 730         // Fetch file descriptors from context
 731         const int stdin = pakfire_jail_get_pipe_to_write(jail, &ctx->pipes.stdin);
 732         const int stdout = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stdout);
 733         const int stderr = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.stderr);
 734         const int pidfd  = ctx->pidfd;
 735
 736         // Timer
 737         const int timerfd = pakfire_jail_create_timer(jail);
 738
 739         // Logging
 740         const int log_INFO  = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_INFO);
 741         const int log_ERROR = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_ERROR);
 742         const int log_DEBUG = pakfire_jail_get_pipe_to_read(jail, &ctx->pipes.log_DEBUG);
 743
 744         // Signals
 745         const int signalfd = pakfire_jail_handle_signals(jail);
 746
 747         // Make a list of all file descriptors we are interested in
 748         const int fds[] = {
 749                 stdin, stdout, stderr, pidfd, timerfd, signalfd, log_INFO, log_ERROR, log_DEBUG,
 750         };
 751
 752         // Setup epoll
 753         epollfd = epoll_create1(0);
 754         if (epollfd < 0) {
 755                 ERROR(jail->pakfire, "Could not initialize epoll(): %m\n");
 756                 r = 1;
 757                 goto ERROR;
 758         }
 759
 760         // Turn file descriptors into non-blocking mode and add them to epoll()
 761         for (unsigned int i = 0; i < sizeof(fds) / sizeof(*fds); i++) {
 762                 int fd = fds[i];
 763
 764                 // Skip fds which were not initialized
 765                 if (fd < 0)
 766                         continue;
 767
 768                 ev.events = EPOLLHUP;
 769
 770                 if (fd == stdin)
 771                         ev.events |= EPOLLOUT;
 772                 else
 773                         ev.events |= EPOLLIN;
 774
 775                 // Read flags
 776                 int flags = fcntl(fd, F_GETFL, 0);
 777
 778                 // Set modified flags
 779                 if (fcntl(fd, F_SETFL, flags|O_NONBLOCK) < 0) {
 780                         ERROR(jail->pakfire,
 781                                 "Could not set file descriptor %d into non-blocking mode: %m\n", fd);
 782                         r = 1;
 783                         goto ERROR;
 784                 }
 785
 786                 ev.data.fd = fd;
 787
 788                 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
 789                         ERROR(jail->pakfire, "Could not add file descriptor %d to epoll(): %m\n", fd);
 790                         r = 1;
 791                         goto ERROR;
 792                 }
 793         }
 794
 795         int ended = 0;
 796
 797         // Loop for as long as the process is alive
 798         while (!ended) {
 799                 int num = epoll_wait(epollfd, events, EPOLL_MAX_EVENTS, -1);
 800                 if (num < 1) {
 801                         // Ignore if epoll_wait() has been interrupted
 802                         if (errno == EINTR)
 803                                 continue;
 804
 805                         ERROR(jail->pakfire, "epoll_wait() failed: %m\n");
 806                         r = 1;
 807
 808                         goto ERROR;
 809                 }
 810
 811                 for (int i = 0; i < num; i++) {
 812                         int e  = events[i].events;
 813                         int fd = events[i].data.fd;
 814
 815                         struct pakfire_log_buffer* buffer = NULL;
 816                         pakfire_jail_communicate_out callback = NULL;
 817                         void* data = NULL;
 818                         int priority;
 819
 820                         // Check if there is any data to be read
 821                         if (e & EPOLLIN) {
 822                                 // Handle any changes to the PIDFD
 823                                 if (fd == pidfd) {
 824                                         // Call waidid() and store the result
 825                                         r = waitid(P_PIDFD, ctx->pidfd, &ctx->status, WEXITED);
 826                                         if (r) {
 827                                                 ERROR(jail->pakfire, "waitid() failed: %m\n");
 828                                                 goto ERROR;
 829                                         }
 830
 831                                         // Mark that we have ended so that we will process the remaining
 832                                         // events from epoll() now, but won't restart the outer loop.
 833                                         ended = 1;
 834                                         continue;
 835
 836                                 // Handle timer events
 837                                 } else if (fd == timerfd) {
 838                                         DEBUG(jail->pakfire, "Timer event received\n");
 839
 840                                         // Disarm the timer
 841                                         r = read(timerfd, garbage, sizeof(garbage));
 842                                         if (r < 1) {
 843                                                 ERROR(jail->pakfire, "Could not disarm timer: %m\n");
 844                                                 r = 1;
 845                                                 goto ERROR;
 846                                         }
 847
 848                                         // Terminate the process if it hasn't already ended
 849                                         if (!ended) {
 850                                                 DEBUG(jail->pakfire, "Terminating process...\n");
 851
 852                                                 // Send SIGTERM to the process
 853                                                 r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
 854                                                 if (r) {
 855                                                         ERROR(jail->pakfire, "Could not kill process: %m\n");
 856                                                         goto ERROR;
 857                                                 }
 858                                         }
 859
 860                                         // There is nothing else to do
 861                                         continue;
 862
 863                                 // Handle signals
 864                                 } else if (fd == signalfd) {
 865                                         // Read the signal
 866                                         r = read(signalfd, &siginfo, sizeof(siginfo));
 867                                         if (r < 1) {
 868                                                 ERROR(jail->pakfire, "Could not read signal: %m\n");
 869                                                 goto ERROR;
 870                                         }
 871
 872                                         DEBUG(jail->pakfire, "Received signal %d\n", siginfo.ssi_signo);
 873
 874                                         // Handle signals
 875                                         switch (siginfo.ssi_signo) {
 876                                                 // Pass SIGINT down to the child process
 877                                                 case SIGINT:
 878                                                         r = pidfd_send_signal(pidfd, siginfo.ssi_signo, NULL, 0);
 879                                                         if (r) {
 880                                                                 ERROR(jail->pakfire, "Could not send signal to process: %m\n");
 881                                                                 goto ERROR;
 882                                                         }
 883                                                         break;
 884
 885                                                 default:
 886                                                         ERROR(jail->pakfire, "Received unhandled signal %d\n",
 887                                                                 siginfo.ssi_signo);
 888                                                         break;
 889                                         }
 890
 891                                         // Don't fall through to log processing
 892                                         continue;
 893
 894                                 // Handle logging messages
 895                                 } else if (fd == log_INFO) {
 896                                         buffer = &ctx->buffers.log_INFO;
 897                                         priority = LOG_INFO;
 898
 899                                         callback = pakfire_jail_default_log_callback;
 900
 901                                 } else if (fd == log_ERROR) {
 902                                         buffer = &ctx->buffers.log_ERROR;
 903                                         priority = LOG_ERR;
 904
 905                                         callback = pakfire_jail_default_log_callback;
 906
 907                                 } else if (fd == log_DEBUG) {
 908                                         buffer = &ctx->buffers.log_DEBUG;
 909                                         priority = LOG_DEBUG;
 910
 911                                         callback = pakfire_jail_default_log_callback;
 912
 913                                 // Handle anything from the log pipes
 914                                 } else if (fd == stdout) {
 915                                         buffer = &ctx->buffers.stdout;
 916                                         priority = LOG_INFO;
 917
 918                                         callback = ctx->communicate.out;
 919                                         data     = ctx->communicate.data;
 920
 921                                 } else if (fd == stderr) {
 922                                         buffer = &ctx->buffers.stderr;
 923                                         priority = LOG_ERR;
 924
 925                                         callback = ctx->communicate.out;
 926                                         data     = ctx->communicate.data;
 927
 928                                 } else {
 929                                         DEBUG(jail->pakfire, "Received invalid file descriptor %d\n", fd);
 930                                         continue;
 931                                 }
 932
 933                                 // Handle log event
 934                                 r = pakfire_jail_handle_log(jail, ctx, priority, fd, buffer, callback, data);
 935                                 if (r)
 936                                         goto ERROR;
 937                         }
 938
 939                         if (e & EPOLLOUT) {
 940                                 // Handle standard input
 941                                 if (fd == stdin) {
 942                                         r = pakfire_jail_stream_stdin(jail, ctx, fd);
 943                                         if (r) {
 944                                                 switch (errno) {
 945                                                         // Ignore if we filled up the buffer
 946                                                         case EAGAIN:
 947                                                                 break;
 948
 949                                                         default:
 950                                                                 ERROR(jail->pakfire, "Could not write to stdin: %m\n");
 951                                                                 goto ERROR;
 952                                                 }
 953                                         }
 954                                 }
 955                         }
 956
 957                         // Check if any file descriptors have been closed
 958                         if (e & EPOLLHUP) {
 959                                 // Remove the file descriptor
 960                                 r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
 961                                 if (r) {
 962                                         ERROR(jail->pakfire, "Could not remove closed file-descriptor %d: %m\n", fd);
 963                                         goto ERROR;
 964                                 }
 965                         }
 966                 }
 967         }
 968
 969 ERROR:
 970         if (epollfd > 0)
 971                 close(epollfd);
 972         if (timerfd > 0)
 973                 close(timerfd);
 974         if (signalfd > 0)
 975                 close(signalfd);
 976
 977         return r;
 978 }
 979
 980 int pakfire_jail_capture_stdout(struct pakfire* pakfire, void* data,
 981                 int priority, const char* line, size_t length) {
 982         char** output = (char**)data;
 983         int r;
 984
 985         // Append everything from stdout to a buffer
 986         if (output && priority == LOG_INFO) {
 987                 r = asprintf(output, "%s%s", (output && *output) ? *output : "", line);
 988                 if (r < 0)
 989                         return 1;
 990                 return 0;
 991         }
 992
 993         // Send everything else to the default logger
 994         return pakfire_jail_default_log_callback(pakfire, NULL, priority, line, length);
 995 }
 996
 997 // Capabilities
 998
 999 // Logs all capabilities of the current process
1000 static int pakfire_jail_show_capabilities(struct pakfire_jail* jail) {
1001         cap_t caps = NULL;
1002         char* name = NULL;
1003         cap_flag_value_t value_e;
1004         cap_flag_value_t value_i;
1005         cap_flag_value_t value_p;
1006         int r;
1007
1008         // Fetch PID
1009         pid_t pid = getpid();
1010
1011         // Fetch all capabilities
1012         caps = cap_get_proc();
1013         if (!caps) {
1014                 ERROR(jail->pakfire, "Could not fetch capabilities: %m\n");
1015                 r = 1;
1016                 goto ERROR;
1017         }
1018
1019         DEBUG(jail->pakfire, "Capabilities of PID %d:\n", pid);
1020
1021         // Iterate over all capabilities
1022         for (unsigned int cap = 0; cap_valid(cap); cap++) {
1023                 name = cap_to_name(cap);
1024
1025                 // Fetch effective value
1026                 r = cap_get_flag(caps, cap, CAP_EFFECTIVE, &value_e);
1027                 if (r)
1028                         goto ERROR;
1029
1030                 // Fetch inheritable value
1031                 r = cap_get_flag(caps, cap, CAP_INHERITABLE, &value_i);
1032                 if (r)
1033                         goto ERROR;
1034
1035                 // Fetch permitted value
1036                 r = cap_get_flag(caps, cap, CAP_PERMITTED, &value_p);
1037                 if (r)
1038                         goto ERROR;
1039
1040                 DEBUG(jail->pakfire,
1041                         "  %-24s : %c%c%c\n",
1042                         name,
1043                         (value_e == CAP_SET) ? 'e' : '-',
1044                         (value_i == CAP_SET) ? 'i' : '-',
1045                         (value_p == CAP_SET) ? 'p' : '-'
1046                 );
1047
1048                 // Free name
1049                 cap_free(name);
1050                 name = NULL;
1051         }
1052
1053         // Success
1054         r = 0;
1055
1056 ERROR:
1057         if (name)
1058                 cap_free(name);
1059         if (caps)
1060                 cap_free(caps);
1061
1062         return r;
1063 }
1064
1065 static int pakfire_jail_set_capabilities(struct pakfire_jail* jail) {
1066         cap_t caps = NULL;
1067         char* name = NULL;
1068         int r;
1069
1070         // Fetch capabilities
1071         caps = cap_get_proc();
1072         if (!caps) {
1073                 ERROR(jail->pakfire, "Could not read capabilities: %m\n");
1074                 r = 1;
1075                 goto ERROR;
1076         }
1077
1078         // Walk through all capabilities
1079         for (cap_value_t cap = 0; cap_valid(cap); cap++) {
1080                 cap_value_t _caps[] = { cap };
1081
1082                 // Fetch the name of the capability
1083                 name = cap_to_name(cap);
1084
1085                 r = cap_set_flag(caps, CAP_EFFECTIVE, 1, _caps, CAP_SET);
1086                 if (r) {
1087                         ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1088                         goto ERROR;
1089                 }
1090
1091                 r = cap_set_flag(caps, CAP_INHERITABLE, 1, _caps, CAP_SET);
1092                 if (r) {
1093                         ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1094                         goto ERROR;
1095                 }
1096
1097                 r = cap_set_flag(caps, CAP_PERMITTED, 1, _caps, CAP_SET);
1098                 if (r) {
1099                         ERROR(jail->pakfire, "Could not set %s: %m\n", name);
1100                         goto ERROR;
1101                 }
1102
1103                 // Free name
1104                 cap_free(name);
1105                 name = NULL;
1106         }
1107
1108         // Restore all capabilities
1109         r = cap_set_proc(caps);
1110         if (r) {
1111                 ERROR(jail->pakfire, "Restoring capabilities failed: %m\n");
1112                 goto ERROR;
1113         }
1114
1115         // Add all capabilities to the ambient set
1116         for (unsigned int cap = 0; cap_valid(cap); cap++) {
1117                 name = cap_to_name(cap);
1118
1119                 // Raise the capability
1120                 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0);
1121                 if (r) {
1122                         ERROR(jail->pakfire, "Could not set ambient capability %s: %m\n", name);
1123                         goto ERROR;
1124                 }
1125
1126                 // Free name
1127                 cap_free(name);
1128                 name = NULL;
1129         }
1130
1131         // Success
1132         r = 0;
1133
1134 ERROR:
1135         if (name)
1136                 cap_free(name);
1137         if (caps)
1138                 cap_free(caps);
1139
1140         return r;
1141 }
1142
1143 // Syscall Filter
1144
1145 static int pakfire_jail_limit_syscalls(struct pakfire_jail* jail) {
1146         const int syscalls[] = {
1147                 // The kernel's keyring isn't namespaced
1148                 SCMP_SYS(keyctl),
1149                 SCMP_SYS(add_key),
1150                 SCMP_SYS(request_key),
1151
1152                 // Disable userfaultfd
1153                 SCMP_SYS(userfaultfd),
1154
1155                 // Disable perf which could leak a lot of information about the host
1156                 SCMP_SYS(perf_event_open),
1157
1158                 0,
1159         };
1160         int r = 1;
1161
1162         DEBUG(jail->pakfire, "Applying syscall filter...\n");
1163
1164         // Setup a syscall filter which allows everything by default
1165         scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
1166         if (!ctx) {
1167                 ERROR(jail->pakfire, "Could not setup seccomp filter: %m\n");
1168                 goto ERROR;
1169         }
1170
1171         // All all syscalls
1172         for (const int* syscall = syscalls; *syscall; syscall++) {
1173                 r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), *syscall, 0);
1174                 if (r) {
1175                         ERROR(jail->pakfire, "Could not configure syscall %d: %m\n", *syscall);
1176                         goto ERROR;
1177                 }
1178         }
1179
1180         // Load syscall filter into the kernel
1181         r = seccomp_load(ctx);
1182         if (r) {
1183                 ERROR(jail->pakfire, "Could not load syscall filter into the kernel: %m\n");
1184                 goto ERROR;
1185         }
1186
1187 ERROR:
1188         if (ctx)
1189                 seccomp_release(ctx);
1190
1191         return r;
1192 }
1193
1194 // Mountpoints
1195
1196 PAKFIRE_EXPORT int pakfire_jail_bind(struct pakfire_jail* jail,
1197                 const char* source, const char* target, int flags) {
1198         struct pakfire_jail_mountpoint* mp = NULL;
1199         int r;
1200
1201         // Check if there is any space left
1202         if (jail->num_mountpoints >= MAX_MOUNTPOINTS) {
1203                 errno = ENOSPC;
1204                 return 1;
1205         }
1206
1207         // Check for valid inputs
1208         if (!source || !target) {
1209                 errno = EINVAL;
1210                 return 1;
1211         }
1212
1213         // Select the next free slot
1214         mp = &jail->mountpoints[jail->num_mountpoints];
1215
1216         // Copy source
1217         r = pakfire_string_set(mp->source, source);
1218         if (r) {
1219                 ERROR(jail->pakfire, "Could not copy source: %m\n");
1220                 return r;
1221         }
1222
1223         // Copy target
1224         r = pakfire_string_set(mp->target, target);
1225         if (r) {
1226                 ERROR(jail->pakfire, "Could not copy target: %m\n");
1227                 return r;
1228         }
1229
1230         // Copy flags
1231         mp->flags = flags;
1232
1233         // Increment counter
1234         jail->num_mountpoints++;
1235
1236         return 0;
1237 }
1238
1239 static int pakfire_jail_mount_networking(struct pakfire_jail* jail) {
1240         int r;
1241
1242         const char* paths[] = {
1243                 "/etc/hosts",
1244                 "/etc/resolv.conf",
1245                 NULL,
1246         };
1247
1248         // Bind-mount all paths read-only
1249         for (const char** path = paths; *path; path++) {
1250                 r = pakfire_bind(jail->pakfire, *path, NULL, MS_RDONLY);
1251                 if (r)
1252                         return r;
1253         }
1254
1255         return 0;
1256 }
1257
1258 /*
1259         Mounts everything that we require in the new namespace
1260 */
1261 static int pakfire_jail_mount(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1262         struct pakfire_jail_mountpoint* mp = NULL;
1263         int flags = 0;
1264         int r;
1265
1266         // Enable loop devices
1267         if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_LOOP_DEVICES))
1268                 flags |= PAKFIRE_MOUNT_LOOP_DEVICES;
1269
1270         // Mount all default stuff
1271         r = pakfire_mount_all(jail->pakfire, flags);
1272         if (r)
1273                 return r;
1274
1275         // Mount networking stuff
1276         if (pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1277                 r = pakfire_jail_mount_networking(jail);
1278                 if (r)
1279                         return r;
1280         }
1281
1282         // Mount all custom stuff
1283         for (unsigned int i = 0; i < jail->num_mountpoints; i++) {
1284                 // Fetch mountpoint
1285                 mp = &jail->mountpoints[i];
1286
1287                 // Mount it
1288                 r = pakfire_bind(jail->pakfire, mp->source, mp->target, mp->flags);
1289                 if (r)
1290                         return r;
1291         }
1292
1293         // Log all mountpoints
1294         pakfire_mount_list(jail->pakfire);
1295
1296         return 0;
1297 }
1298
1299 // Networking
1300
1301 static int pakfire_jail_setup_loopback(struct pakfire_jail* jail) {
1302         struct nl_sock* nl = NULL;
1303         struct nl_cache* cache = NULL;
1304         struct rtnl_link* link = NULL;
1305         struct rtnl_link* change = NULL;
1306         int r;
1307
1308         DEBUG(jail->pakfire, "Setting up loopback...\n");
1309
1310         // Allocate a netlink socket
1311         nl = nl_socket_alloc();
1312         if (!nl) {
1313                 ERROR(jail->pakfire, "Could not allocate a netlink socket: %m\n");
1314                 r = 1;
1315                 goto ERROR;
1316         }
1317
1318         // Connect the socket
1319         r = nl_connect(nl, NETLINK_ROUTE);
1320         if (r) {
1321                 ERROR(jail->pakfire, "Could not connect netlink socket: %s\n", nl_geterror(r));
1322                 goto ERROR;
1323         }
1324
1325         // Allocate the netlink cache
1326         r = rtnl_link_alloc_cache(nl, AF_UNSPEC, &cache);
1327         if (r < 0) {
1328                 ERROR(jail->pakfire, "Unable to allocate netlink cache: %s\n", nl_geterror(r));
1329                 goto ERROR;
1330         }
1331
1332         // Fetch loopback interface
1333         link = rtnl_link_get_by_name(cache, "lo");
1334         if (!link) {
1335                 ERROR(jail->pakfire, "Could not find lo interface. Ignoring.\n");
1336                 r = 0;
1337                 goto ERROR;
1338         }
1339
1340         // Allocate a new link
1341         change = rtnl_link_alloc();
1342         if (!change) {
1343                 ERROR(jail->pakfire, "Could not allocate change link\n");
1344                 r = 1;
1345                 goto ERROR;
1346         }
1347
1348         // Set the link to UP
1349         rtnl_link_set_flags(change, IFF_UP);
1350
1351         // Apply any changes
1352         r = rtnl_link_change(nl, link, change, 0);
1353         if (r) {
1354                 ERROR(jail->pakfire, "Unable to activate loopback: %s\n", nl_geterror(r));
1355                 goto ERROR;
1356         }
1357
1358         // Success
1359         r = 0;
1360
1361 ERROR:
1362         if (nl)
1363                 nl_socket_free(nl);
1364
1365         return r;
1366 }
1367
1368 // UID/GID Mapping
1369
1370 static int pakfire_jail_setup_uid_mapping(struct pakfire_jail* jail, pid_t pid) {
1371         char path[PATH_MAX];
1372         int r;
1373
1374         // Skip mapping anything when running on /
1375         if (pakfire_on_root(jail->pakfire))
1376                 return 0;
1377
1378         // Make path
1379         r = pakfire_string_format(path, "/proc/%d/uid_map", pid);
1380         if (r)
1381                 return r;
1382
1383         // Fetch UID
1384         const uid_t uid = pakfire_uid(jail->pakfire);
1385
1386         // Fetch SUBUID
1387         const struct pakfire_subid* subuid = pakfire_subuid(jail->pakfire);
1388         if (!subuid)
1389                 return 1;
1390
1391         /* When running as root, we will map the entire range.
1392
1393            When running as a non-privileged user, we will map the root user inside the jail
1394            to the user's UID outside of the jail, and we will map the rest starting from one.
1395         */
1396
1397         // Running as root
1398         if (uid == 0) {
1399                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1400                         "0 %lu %lu\n", subuid->id, subuid->length);
1401         } else {
1402                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1403                         "0 %lu 1\n1 %lu %lu\n", uid, subuid->id, subuid->length);
1404         }
1405
1406         if (r) {
1407                 ERROR(jail->pakfire, "Could not map UIDs: %m\n");
1408                 return r;
1409         }
1410
1411         return r;
1412 }
1413
1414 static int pakfire_jail_setup_gid_mapping(struct pakfire_jail* jail, pid_t pid) {
1415         char path[PATH_MAX];
1416         int r;
1417
1418         // Skip mapping anything when running on /
1419         if (pakfire_on_root(jail->pakfire))
1420                 return 0;
1421
1422         // Fetch GID
1423         const gid_t gid = pakfire_gid(jail->pakfire);
1424
1425         // Fetch SUBGID
1426         const struct pakfire_subid* subgid = pakfire_subgid(jail->pakfire);
1427         if (!subgid)
1428                 return 1;
1429
1430         // Make path
1431         r = pakfire_string_format(path, "/proc/%d/gid_map", pid);
1432         if (r)
1433                 return r;
1434
1435         // Running as root
1436         if (gid == 0) {
1437                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1438                         "0 %lu %lu\n", subgid->id, subgid->length);
1439         } else {
1440                 r = pakfire_file_write(jail->pakfire, path, 0, 0, 0,
1441                         "0 %lu 1\n%1 %lu %lu\n", gid, subgid->id, subgid->length);
1442         }
1443
1444         if (r) {
1445                 ERROR(jail->pakfire, "Could not map GIDs: %m\n");
1446                 return r;
1447         }
1448
1449         return r;
1450 }
1451
1452 static int pakfire_jail_setgroups(struct pakfire_jail* jail, pid_t pid) {
1453         char path[PATH_MAX];
1454         int r = 1;
1455
1456         // Make path
1457         r = pakfire_string_format(path, "/proc/%d/setgroups", pid);
1458         if (r)
1459                 return r;
1460
1461         // Open file for writing
1462         FILE* f = fopen(path, "w");
1463         if (!f) {
1464                 ERROR(jail->pakfire, "Could not open %s for writing: %m\n", path);
1465                 goto ERROR;
1466         }
1467
1468         // Write content
1469         int bytes_written = fprintf(f, "deny\n");
1470         if (bytes_written <= 0) {
1471                 ERROR(jail->pakfire, "Could not write to %s: %m\n", path);
1472                 goto ERROR;
1473         }
1474
1475         r = fclose(f);
1476         f = NULL;
1477         if (r) {
1478                 ERROR(jail->pakfire, "Could not close %s: %m\n", path);
1479                 goto ERROR;
1480         }
1481
1482 ERROR:
1483         if (f)
1484                 fclose(f);
1485
1486         return r;
1487 }
1488
1489 static int pakfire_jail_send_signal(struct pakfire_jail* jail, int fd) {
1490         const uint64_t val = 1;
1491         int r = 0;
1492
1493         DEBUG(jail->pakfire, "Sending signal...\n");
1494
1495         // Write to the file descriptor
1496         ssize_t bytes_written = write(fd, &val, sizeof(val));
1497         if (bytes_written < 0 || (size_t)bytes_written < sizeof(val)) {
1498                 ERROR(jail->pakfire, "Could not send signal: %m\n");
1499                 r = 1;
1500         }
1501
1502         // Close the file descriptor
1503         close(fd);
1504
1505         return r;
1506 }
1507
1508 static int pakfire_jail_wait_for_signal(struct pakfire_jail* jail, int fd) {
1509         uint64_t val = 0;
1510         int r = 0;
1511
1512         DEBUG(jail->pakfire, "Waiting for signal...\n");
1513
1514         ssize_t bytes_read = read(fd, &val, sizeof(val));
1515         if (bytes_read < 0 || (size_t)bytes_read < sizeof(val)) {
1516                 ERROR(jail->pakfire, "Error waiting for signal: %m\n");
1517                 r = 1;
1518         }
1519
1520         // Close the file descriptor
1521         close(fd);
1522
1523         return r;
1524 }
1525
1526 /*
1527         Performs the initialisation that needs to happen in the parent part
1528 */
1529 static int pakfire_jail_parent(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx) {
1530         int r;
1531
1532         // Setup UID mapping
1533         r = pakfire_jail_setup_uid_mapping(jail, ctx->pid);
1534         if (r)
1535                 return r;
1536
1537         // Write "deny" to /proc/PID/setgroups
1538         r = pakfire_jail_setgroups(jail, ctx->pid);
1539         if (r)
1540                 return r;
1541
1542         // Setup GID mapping
1543         r = pakfire_jail_setup_gid_mapping(jail, ctx->pid);
1544         if (r)
1545                 return r;
1546
1547         // Parent has finished initialisation
1548         DEBUG(jail->pakfire, "Parent has finished initialization\n");
1549
1550         // Send signal to client
1551         r = pakfire_jail_send_signal(jail, ctx->completed_fd);
1552         if (r)
1553                 return r;
1554
1555         return 0;
1556 }
1557
1558 static int pakfire_jail_child(struct pakfire_jail* jail, struct pakfire_jail_exec* ctx,
1559                 const char* argv[]) {
1560         int r;
1561
1562         // Redirect any logging to our log pipe
1563         pakfire_set_log_callback(jail->pakfire, pakfire_jail_log, &ctx->pipes);
1564
1565         // Die with parent
1566         r = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
1567         if (r) {
1568                 ERROR(jail->pakfire, "Could not configure to die with parent: %m\n");
1569                 return 126;
1570         }
1571
1572         // Fetch my own PID
1573         pid_t pid = getpid();
1574
1575         DEBUG(jail->pakfire, "Launched child process in jail with PID %d\n", pid);
1576
1577         // Wait for the parent to finish initialization
1578         r = pakfire_jail_wait_for_signal(jail, ctx->completed_fd);
1579         if (r)
1580                 return r;
1581
1582         // Perform further initialization
1583
1584         // Fetch UID/GID
1585         uid_t uid = getuid();
1586         gid_t gid = getgid();
1587
1588         // Fetch EUID/EGID
1589         uid_t euid = geteuid();
1590         gid_t egid = getegid();
1591
1592         DEBUG(jail->pakfire, "  UID: %d (effective %d)\n", uid, euid);
1593         DEBUG(jail->pakfire, "  GID: %d (effective %d)\n", gid, egid);
1594
1595         // Check if we are (effectively running as root)
1596         if (uid || gid || euid || egid) {
1597                 ERROR(jail->pakfire, "Child process is not running as root\n");
1598                 return 126;
1599         }
1600
1601         const char* root = pakfire_get_path(jail->pakfire);
1602         const char* arch = pakfire_get_arch(jail->pakfire);
1603
1604         // Change root (unless root is /)
1605         if (!pakfire_on_root(jail->pakfire)) {
1606                 // Mount everything
1607                 r = pakfire_jail_mount(jail, ctx);
1608                 if (r)
1609                         return r;
1610
1611                 // Call chroot()
1612                 r = chroot(root);
1613                 if (r) {
1614                         ERROR(jail->pakfire, "chroot() to %s failed: %m\n", root);
1615                         return 1;
1616                 }
1617
1618                 // Change directory to /
1619                 r = chdir("/");
1620                 if (r) {
1621                         ERROR(jail->pakfire, "chdir() after chroot() failed: %m\n");
1622                         return 1;
1623                 }
1624         }
1625
1626         // Set personality
1627         unsigned long persona = pakfire_arch_personality(arch);
1628         if (persona) {
1629                 r = personality(persona);
1630                 if (r < 0) {
1631                         ERROR(jail->pakfire, "Could not set personality (%x)\n", (unsigned int)persona);
1632                         return 1;
1633                 }
1634         }
1635
1636         // Setup networking
1637         if (!pakfire_jail_exec_has_flag(ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1638                 r = pakfire_jail_setup_loopback(jail);
1639                 if (r)
1640                         return 1;
1641         }
1642
1643         // Set nice level
1644         if (jail->nice) {
1645                 DEBUG(jail->pakfire, "Setting nice level to %d\n", jail->nice);
1646
1647                 r = setpriority(PRIO_PROCESS, pid, jail->nice);
1648                 if (r) {
1649                         ERROR(jail->pakfire, "Could not set nice level: %m\n");
1650                         return 1;
1651                 }
1652         }
1653
1654         // Close other end of log pipes
1655         close(ctx->pipes.log_INFO[0]);
1656         close(ctx->pipes.log_ERROR[0]);
1657 #ifdef ENABLE_DEBUG
1658         close(ctx->pipes.log_DEBUG[0]);
1659 #endif /* ENABLE_DEBUG */
1660
1661         // Connect standard input
1662         if (ctx->pipes.stdin[0]) {
1663                 r = dup2(ctx->pipes.stdin[0], STDIN_FILENO);
1664                 if (r < 0) {
1665                         ERROR(jail->pakfire, "Could not connect fd %d to stdin: %m\n",
1666                                 ctx->pipes.stdin[0]);
1667
1668                         return 1;
1669                 }
1670         }
1671
1672         // Connect standard output and error
1673         if (ctx->pipes.stdout[1] && ctx->pipes.stderr[1]) {
1674                 r = dup2(ctx->pipes.stdout[1], STDOUT_FILENO);
1675                 if (r < 0) {
1676                         ERROR(jail->pakfire, "Could not connect fd %d to stdout: %m\n",
1677                                 ctx->pipes.stdout[1]);
1678
1679                         return 1;
1680                 }
1681
1682                 r = dup2(ctx->pipes.stderr[1], STDERR_FILENO);
1683                 if (r < 0) {
1684                         ERROR(jail->pakfire, "Could not connect fd %d to stderr: %m\n",
1685                                 ctx->pipes.stderr[1]);
1686
1687                         return 1;
1688                 }
1689
1690                 // Close the pipe (as we have moved the original file descriptors)
1691                 pakfire_jail_close_pipe(jail, ctx->pipes.stdin);
1692                 pakfire_jail_close_pipe(jail, ctx->pipes.stdout);
1693                 pakfire_jail_close_pipe(jail, ctx->pipes.stderr);
1694         }
1695
1696         // Reset open file limit (http://0pointer.net/blog/file-descriptor-limits.html)
1697         r = pakfire_rlimit_reset_nofile(jail->pakfire);
1698         if (r)
1699                 return r;
1700
1701         // Don't drop any capabilities on execve()
1702         r = prctl(PR_SET_KEEPCAPS, 1);
1703         if (r) {
1704                 ERROR(jail->pakfire, "Could not set PR_SET_KEEPCAPS: %m\n");
1705                 return r;
1706         }
1707
1708         // Set capabilities
1709         r = pakfire_jail_set_capabilities(jail);
1710         if (r)
1711                 return r;
1712
1713         // Show capabilities
1714         r = pakfire_jail_show_capabilities(jail);
1715         if (r)
1716                 return r;
1717
1718         // Filter syscalls
1719         r = pakfire_jail_limit_syscalls(jail);
1720         if (r)
1721                 return r;
1722
1723         DEBUG(jail->pakfire, "Child process initialization done\n");
1724         DEBUG(jail->pakfire, "Launching command:\n");
1725
1726         // Log argv
1727         for (unsigned int i = 0; argv[i]; i++)
1728                 DEBUG(jail->pakfire, "  argv[%d] = %s\n", i, argv[i]);
1729
1730         // exec() command
1731         r = execvpe(argv[0], (char**)argv, jail->env);
1732         if (r < 0) {
1733                 // Translate errno into regular exit code
1734                 switch (errno) {
1735                         case ENOENT:
1736                                 // Ignore if the command doesn't exist
1737                                 if (ctx->flags & PAKFIRE_JAIL_NOENT_OK)
1738                                         r = 0;
1739                                 else
1740                                         r = 127;
1741
1742                                 break;
1743
1744                         default:
1745                                 r = 1;
1746                 }
1747
1748                 ERROR(jail->pakfire, "Could not execve(%s): %m\n", argv[0]);
1749         }
1750
1751         // We should not get here
1752         return r;
1753 }
1754
1755 // Run a command in the jail
1756 static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[],
1757                 const int interactive,
1758                 pakfire_jail_communicate_in  communicate_in,
1759                 pakfire_jail_communicate_out communicate_out,
1760                 void* data, int flags) {
1761         int exit = -1;
1762         int r;
1763
1764         // Check if argv is valid
1765         if (!argv || !argv[0]) {
1766                 errno = EINVAL;
1767                 return -1;
1768         }
1769
1770         // Send any output to the default logger if no callback is set
1771         if (!communicate_out)
1772                 communicate_out = pakfire_jail_default_log_callback;
1773
1774         // Initialize context for this call
1775         struct pakfire_jail_exec ctx = {
1776                 .flags = flags,
1777
1778                 .pipes = {
1779                         .stdin  = { -1, -1 },
1780                         .stdout = { -1, -1 },
1781                         .stderr = { -1, -1 },
1782                 },
1783
1784                 .communicate = {
1785                         .in   = communicate_in,
1786                         .out  = communicate_out,
1787                         .data = data,
1788                 },
1789
1790                 .pidfd = -1,
1791         };
1792
1793         DEBUG(jail->pakfire, "Executing jail...\n");
1794
1795         // Enable networking in interactive mode
1796         if (interactive)
1797                 ctx.flags |= PAKFIRE_JAIL_HAS_NETWORKING;
1798
1799         /*
1800                 Setup a file descriptor which can be used to notify the client that the parent
1801                 has completed configuration.
1802         */
1803         ctx.completed_fd = eventfd(0, EFD_CLOEXEC);
1804         if (ctx.completed_fd < 0) {
1805                 ERROR(jail->pakfire, "eventfd() failed: %m\n");
1806                 return -1;
1807         }
1808
1809         // Create pipes to communicate with child process if we are not running interactively
1810         if (!interactive) {
1811                 // stdin (only if callback is set)
1812                 if (ctx.communicate.in) {
1813                         r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdin, 0);
1814                         if (r)
1815                                 goto ERROR;
1816                 }
1817
1818                 // stdout
1819                 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stdout, 0);
1820                 if (r)
1821                         goto ERROR;
1822
1823                 // stderr
1824                 r = pakfire_jail_setup_pipe(jail, &ctx.pipes.stderr, 0);
1825                 if (r)
1826                         goto ERROR;
1827         }
1828
1829         // Setup pipes for logging
1830         // INFO
1831         r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_INFO, O_CLOEXEC);
1832         if (r)
1833                 goto ERROR;
1834
1835         // ERROR
1836         r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_ERROR, O_CLOEXEC);
1837         if (r)
1838                 goto ERROR;
1839
1840 #ifdef ENABLE_DEBUG
1841         // DEBUG
1842         r = pakfire_jail_setup_pipe(jail, &ctx.pipes.log_DEBUG, O_CLOEXEC);
1843         if (r)
1844                 goto ERROR;
1845 #endif /* ENABLE_DEBUG */
1846
1847         // Configure child process
1848         struct clone_args args = {
1849                 .flags =
1850                         CLONE_NEWCGROUP |
1851                         CLONE_NEWIPC |
1852                         CLONE_NEWNS |
1853                         CLONE_NEWPID |
1854                         CLONE_NEWUSER |
1855                         CLONE_NEWUTS |
1856                         CLONE_PIDFD,
1857                 .exit_signal = SIGCHLD,
1858                 .pidfd = (long long unsigned int)&ctx.pidfd,
1859         };
1860
1861         // Launch the process in a cgroup that is a leaf of the configured cgroup
1862         if (jail->cgroup) {
1863                 args.flags |= CLONE_INTO_CGROUP;
1864
1865                 // Fetch our UUID
1866                 const char* uuid = pakfire_jail_uuid(jail);
1867
1868                 // Create a temporary cgroup
1869                 r = pakfire_cgroup_child(&ctx.cgroup, jail->cgroup, uuid, 0);
1870                 if (r) {
1871                         ERROR(jail->pakfire, "Could not create cgroup for jail: %m\n");
1872                         goto ERROR;
1873                 }
1874
1875                 // Clone into this cgroup
1876                 args.cgroup = pakfire_cgroup_fd(ctx.cgroup);
1877         }
1878
1879         // Setup networking
1880         if (!pakfire_jail_exec_has_flag(&ctx, PAKFIRE_JAIL_HAS_NETWORKING)) {
1881                 args.flags |= CLONE_NEWNET;
1882         }
1883
1884         // Fork this process
1885         ctx.pid = clone3(&args, sizeof(args));
1886         if (ctx.pid < 0) {
1887                 ERROR(jail->pakfire, "Could not clone: %m\n");
1888                 return -1;
1889
1890         // Child process
1891         } else if (ctx.pid == 0) {
1892                 r = pakfire_jail_child(jail, &ctx, argv);
1893                 _exit(r);
1894         }
1895
1896         // Parent process
1897         r = pakfire_jail_parent(jail, &ctx);
1898         if (r)
1899                 goto ERROR;
1900
1901         DEBUG(jail->pakfire, "Waiting for PID %d to finish its work\n", ctx.pid);
1902
1903         // Read output of the child process
1904         r = pakfire_jail_wait(jail, &ctx);
1905         if (r)
1906                 goto ERROR;
1907
1908         // Handle exit status
1909         switch (ctx.status.si_code) {
1910                 case CLD_EXITED:
1911                         DEBUG(jail->pakfire, "The child process exited with code %d\n",
1912                                 ctx.status.si_status);
1913
1914                         // Pass exit code
1915                         exit = ctx.status.si_status;
1916                         break;
1917
1918                 case CLD_KILLED:
1919                         ERROR(jail->pakfire, "The child process was killed\n");
1920                         exit = 139;
1921                         break;
1922
1923                 case CLD_DUMPED:
1924                         ERROR(jail->pakfire, "The child process terminated abnormally\n");
1925                         break;
1926
1927                 // Log anything else
1928                 default:
1929                         ERROR(jail->pakfire, "Unknown child exit code: %d\n", ctx.status.si_code);
1930                         break;
1931         }
1932
1933 ERROR:
1934         // Destroy the temporary cgroup (if any)
1935         if (ctx.cgroup) {
1936                 // Read cgroup stats
1937                 r = pakfire_cgroup_stat(ctx.cgroup, &ctx.cgroup_stats);
1938                 if (r) {
1939                         ERROR(jail->pakfire, "Could not read cgroup stats: %m\n");
1940                 } else {
1941                         pakfire_cgroup_stat_dump(ctx.cgroup, &ctx.cgroup_stats);
1942                 }
1943
1944                 pakfire_cgroup_destroy(ctx.cgroup);
1945                 pakfire_cgroup_unref(ctx.cgroup);
1946         }
1947
1948         // Close any file descriptors
1949         pakfire_jail_close_pipe(jail, ctx.pipes.stdin);
1950         pakfire_jail_close_pipe(jail, ctx.pipes.stdout);
1951         pakfire_jail_close_pipe(jail, ctx.pipes.stderr);
1952         if (ctx.pidfd)
1953                 close(ctx.pidfd);
1954         pakfire_jail_close_pipe(jail, ctx.pipes.log_INFO);
1955         pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR);
1956         pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG);
1957
1958         return exit;
1959 }
1960
1961 PAKFIRE_EXPORT int pakfire_jail_exec(
1962                 struct pakfire_jail* jail,
1963                 const char* argv[],
1964                 pakfire_jail_communicate_in  callback_in,
1965                 pakfire_jail_communicate_out callback_out,
1966                 void* data, int flags) {
1967         return __pakfire_jail_exec(jail, argv, 0, callback_in, callback_out, data, flags);
1968 }
1969
1970 static int pakfire_jail_exec_interactive(
1971                 struct pakfire_jail* jail, const char* argv[], int flags) {
1972         int r;
1973
1974         // Setup interactive stuff
1975         r = pakfire_jail_setup_interactive_env(jail);
1976         if (r)
1977                 return r;
1978
1979         return __pakfire_jail_exec(jail, argv, 1, NULL, NULL, NULL, flags);
1980 }
1981
1982 int pakfire_jail_exec_script(struct pakfire_jail* jail,
1983                 const char* script,
1984                 const size_t size,
1985                 const char* args[],
1986                 pakfire_jail_communicate_in  callback_in,
1987                 pakfire_jail_communicate_out callback_out,
1988                 void* data) {
1989         char path[PATH_MAX];
1990         const char** argv = NULL;
1991         FILE* f = NULL;
1992         int r;
1993
1994         const char* root = pakfire_get_path(jail->pakfire);
1995
1996         // Write the scriptlet to disk
1997         r = pakfire_path_join(path, root, PAKFIRE_TMP_DIR "/pakfire-script.XXXXXX");
1998         if (r)
1999                 goto ERROR;
2000
2001         // Create a temporary file
2002         f = pakfire_mktemp(path, 0700);
2003         if (!f) {
2004                 ERROR(jail->pakfire, "Could not create temporary file: %m\n");
2005                 goto ERROR;
2006         }
2007
2008         DEBUG(jail->pakfire, "Writing script to %s:\n%.*s\n", path, (int)size, script);
2009
2010         // Write data
2011         r = fprintf(f, "%s", script);
2012         if (r < 0) {
2013                 ERROR(jail->pakfire, "Could not write script to file %s: %m\n", path);
2014                 goto ERROR;
2015         }
2016
2017         // Close file
2018         r = fclose(f);
2019         if (r) {
2020                 ERROR(jail->pakfire, "Could not close script file %s: %m\n", path);
2021                 goto ERROR;
2022         }
2023
2024         f = NULL;
2025
2026         // Count how many arguments were passed
2027         unsigned int argc = 1;
2028         if (args) {
2029                 for (const char** arg = args; *arg; arg++)
2030                         argc++;
2031         }
2032
2033         argv = calloc(argc + 1, sizeof(*argv));
2034         if (!argv) {
2035                 ERROR(jail->pakfire, "Could not allocate argv: %m\n");
2036                 goto ERROR;
2037         }
2038
2039         // Set command
2040         argv[0] = (root) ? pakfire_path_relpath(root, path) : path;
2041
2042         // Copy args
2043         for (unsigned int i = 1; i < argc; i++)
2044                 argv[i] = args[i-1];
2045
2046         // Run the script
2047         r = pakfire_jail_exec(jail, argv, callback_in, callback_out, data, 0);
2048
2049 ERROR:
2050         if (argv)
2051                 free(argv);
2052         if (f)
2053                 fclose(f);
2054
2055         // Remove script from disk
2056         if (*path)
2057                 unlink(path);
2058
2059         return r;
2060 }
2061
2062 /*
2063         A convenience function that creates a new jail, runs the given command and destroys
2064         the jail again.
2065 */
2066 int pakfire_jail_run(struct pakfire* pakfire, const char* argv[], int flags, char** output) {
2067         struct pakfire_jail* jail = NULL;
2068         int r;
2069
2070         // Create a new jail
2071         r = pakfire_jail_create(&jail, pakfire);
2072         if (r)
2073                 goto ERROR;
2074
2075         // Execute the command
2076         r = pakfire_jail_exec(jail, argv, NULL, pakfire_jail_capture_stdout, output, 0);
2077
2078 ERROR:
2079         if (jail)
2080                 pakfire_jail_unref(jail);
2081
2082         return r;
2083 }
2084
2085 int pakfire_jail_run_script(struct pakfire* pakfire,
2086                 const char* script, const size_t length, const char* argv[], int flags) {
2087         struct pakfire_jail* jail = NULL;
2088         int r;
2089
2090         // Create a new jail
2091         r = pakfire_jail_create(&jail, pakfire);
2092         if (r)
2093                 goto ERROR;
2094
2095         // Execute the command
2096         r = pakfire_jail_exec_script(jail, script, length, argv, NULL, NULL, NULL);
2097
2098 ERROR:
2099         if (jail)
2100                 pakfire_jail_unref(jail);
2101
2102         return r;
2103 }
2104
2105 int pakfire_jail_shell(struct pakfire_jail* jail) {
2106         const char* argv[] = {
2107                 "/bin/bash", "--login", NULL,
2108         };
2109
2110         // Execute /bin/bash
2111         return pakfire_jail_exec_interactive(jail, argv, 0);
2112 }
2113
2114 static int pakfire_jail_run_if_possible(struct pakfire* pakfire, const char** argv) {
2115         char path[PATH_MAX];
2116         int r;
2117
2118         r = pakfire_path(pakfire, path, "%s", *argv);
2119         if (r)
2120                 return r;
2121
2122         // Check if the file is executable
2123         r = access(path, X_OK);
2124         if (r) {
2125                 DEBUG(pakfire, "%s is not executable. Skipping...\n", *argv);
2126                 return 0;
2127         }
2128
2129         return pakfire_jail_run(pakfire, argv, 0, NULL);
2130 }
2131
2132 int pakfire_jail_ldconfig(struct pakfire* pakfire) {
2133         const char* argv[] = {
2134                 "/sbin/ldconfig",
2135                 NULL,
2136         };
2137
2138         return pakfire_jail_run_if_possible(pakfire, argv);
2139 }
2140
2141 int pakfire_jail_run_systemd_tmpfiles(struct pakfire* pakfire) {
2142         const char* argv[] = {
2143                 "/usr/bin/systemd-tmpfiles",
2144                 "--create",
2145                 NULL,
2146         };
2147
2148         return pakfire_jail_run_if_possible(pakfire, argv);
2149 }