src/core/execute.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2010 Lennart Poettering
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #include <errno.h>
  22 #include <fcntl.h>
  23 #include <glob.h>
  24 #include <grp.h>
  25 #include <poll.h>
  26 #include <signal.h>
  27 #include <string.h>
  28 #include <sys/capability.h>
  29 #include <sys/eventfd.h>
  30 #include <sys/mman.h>
  31 #include <sys/personality.h>
  32 #include <sys/prctl.h>
  33 #include <sys/shm.h>
  34 #include <sys/socket.h>
  35 #include <sys/stat.h>
  36 #include <sys/types.h>
  37 #include <sys/un.h>
  38 #include <unistd.h>
  39 #include <utmpx.h>
  40
  41 #if HAVE_PAM
  42 #include <security/pam_appl.h>
  43 #endif
  44
  45 #if HAVE_SELINUX
  46 #include <selinux/selinux.h>
  47 #endif
  48
  49 #if HAVE_SECCOMP
  50 #include <seccomp.h>
  51 #endif
  52
  53 #if HAVE_APPARMOR
  54 #include <sys/apparmor.h>
  55 #endif
  56
  57 #include "sd-messages.h"
  58
  59 #include "af-list.h"
  60 #include "alloc-util.h"
  61 #if HAVE_APPARMOR
  62 #include "apparmor-util.h"
  63 #endif
  64 #include "async.h"
  65 #include "barrier.h"
  66 #include "cap-list.h"
  67 #include "capability-util.h"
  68 #include "chown-recursive.h"
  69 #include "def.h"
  70 #include "env-util.h"
  71 #include "errno-list.h"
  72 #include "execute.h"
  73 #include "exit-status.h"
  74 #include "fd-util.h"
  75 #include "fileio.h"
  76 #include "format-util.h"
  77 #include "fs-util.h"
  78 #include "glob-util.h"
  79 #include "io-util.h"
  80 #include "ioprio.h"
  81 #include "label.h"
  82 #include "log.h"
  83 #include "macro.h"
  84 #include "missing.h"
  85 #include "mkdir.h"
  86 #include "namespace.h"
  87 #include "parse-util.h"
  88 #include "path-util.h"
  89 #include "process-util.h"
  90 #include "rlimit-util.h"
  91 #include "rm-rf.h"
  92 #if HAVE_SECCOMP
  93 #include "seccomp-util.h"
  94 #endif
  95 #include "securebits.h"
  96 #include "securebits-util.h"
  97 #include "selinux-util.h"
  98 #include "signal-util.h"
  99 #include "smack-util.h"
 100 #include "special.h"
 101 #include "string-table.h"
 102 #include "string-util.h"
 103 #include "strv.h"
 104 #include "syslog-util.h"
 105 #include "terminal-util.h"
 106 #include "unit.h"
 107 #include "user-util.h"
 108 #include "util.h"
 109 #include "utmp-wtmp.h"
 110
 111 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
 112 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
 113
 114 /* This assumes there is a 'tty' group */
 115 #define TTY_MODE 0620
 116
 117 #define SNDBUF_SIZE (8*1024*1024)
 118
 119 static int shift_fds(int fds[], unsigned n_fds) {
 120         int start, restart_from;
 121
 122         if (n_fds <= 0)
 123                 return 0;
 124
 125         /* Modifies the fds array! (sorts it) */
 126
 127         assert(fds);
 128
 129         start = 0;
 130         for (;;) {
 131                 int i;
 132
 133                 restart_from = -1;
 134
 135                 for (i = start; i < (int) n_fds; i++) {
 136                         int nfd;
 137
 138                         /* Already at right index? */
 139                         if (fds[i] == i+3)
 140                                 continue;
 141
 142                         nfd = fcntl(fds[i], F_DUPFD, i + 3);
 143                         if (nfd < 0)
 144                                 return -errno;
 145
 146                         safe_close(fds[i]);
 147                         fds[i] = nfd;
 148
 149                         /* Hmm, the fd we wanted isn't free? Then
 150                          * let's remember that and try again from here */
 151                         if (nfd != i+3 && restart_from < 0)
 152                                 restart_from = i;
 153                 }
 154
 155                 if (restart_from < 0)
 156                         break;
 157
 158                 start = restart_from;
 159         }
 160
 161         return 0;
 162 }
 163
 164 static int flags_fds(const int fds[], unsigned n_storage_fds, unsigned n_socket_fds, bool nonblock) {
 165         unsigned i, n_fds;
 166         int r;
 167
 168         n_fds = n_storage_fds + n_socket_fds;
 169         if (n_fds <= 0)
 170                 return 0;
 171
 172         assert(fds);
 173
 174         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
 175          * O_NONBLOCK only applies to socket activation though. */
 176
 177         for (i = 0; i < n_fds; i++) {
 178
 179                 if (i < n_socket_fds) {
 180                         r = fd_nonblock(fds[i], nonblock);
 181                         if (r < 0)
 182                                 return r;
 183                 }
 184
 185                 /* We unconditionally drop FD_CLOEXEC from the fds,
 186                  * since after all we want to pass these fds to our
 187                  * children */
 188
 189                 r = fd_cloexec(fds[i], false);
 190                 if (r < 0)
 191                         return r;
 192         }
 193
 194         return 0;
 195 }
 196
 197 static const char *exec_context_tty_path(const ExecContext *context) {
 198         assert(context);
 199
 200         if (context->stdio_as_fds)
 201                 return NULL;
 202
 203         if (context->tty_path)
 204                 return context->tty_path;
 205
 206         return "/dev/console";
 207 }
 208
 209 static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
 210         const char *path;
 211
 212         assert(context);
 213
 214         path = exec_context_tty_path(context);
 215
 216         if (context->tty_vhangup) {
 217                 if (p && p->stdin_fd >= 0)
 218                         (void) terminal_vhangup_fd(p->stdin_fd);
 219                 else if (path)
 220                         (void) terminal_vhangup(path);
 221         }
 222
 223         if (context->tty_reset) {
 224                 if (p && p->stdin_fd >= 0)
 225                         (void) reset_terminal_fd(p->stdin_fd, true);
 226                 else if (path)
 227                         (void) reset_terminal(path);
 228         }
 229
 230         if (context->tty_vt_disallocate && path)
 231                 (void) vt_disallocate(path);
 232 }
 233
 234 static bool is_terminal_input(ExecInput i) {
 235         return IN_SET(i,
 236                       EXEC_INPUT_TTY,
 237                       EXEC_INPUT_TTY_FORCE,
 238                       EXEC_INPUT_TTY_FAIL);
 239 }
 240
 241 static bool is_terminal_output(ExecOutput o) {
 242         return IN_SET(o,
 243                       EXEC_OUTPUT_TTY,
 244                       EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
 245                       EXEC_OUTPUT_KMSG_AND_CONSOLE,
 246                       EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
 247 }
 248
 249 static bool is_syslog_output(ExecOutput o) {
 250         return IN_SET(o,
 251                       EXEC_OUTPUT_SYSLOG,
 252                       EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
 253 }
 254
 255 static bool is_kmsg_output(ExecOutput o) {
 256         return IN_SET(o,
 257                       EXEC_OUTPUT_KMSG,
 258                       EXEC_OUTPUT_KMSG_AND_CONSOLE);
 259 }
 260
 261 static bool exec_context_needs_term(const ExecContext *c) {
 262         assert(c);
 263
 264         /* Return true if the execution context suggests we should set $TERM to something useful. */
 265
 266         if (is_terminal_input(c->std_input))
 267                 return true;
 268
 269         if (is_terminal_output(c->std_output))
 270                 return true;
 271
 272         if (is_terminal_output(c->std_error))
 273                 return true;
 274
 275         return !!c->tty_path;
 276 }
 277
 278 static int open_null_as(int flags, int nfd) {
 279         int fd, r;
 280
 281         assert(nfd >= 0);
 282
 283         fd = open("/dev/null", flags|O_NOCTTY);
 284         if (fd < 0)
 285                 return -errno;
 286
 287         if (fd != nfd) {
 288                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
 289                 safe_close(fd);
 290         } else
 291                 r = nfd;
 292
 293         return r;
 294 }
 295
 296 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
 297         static const union sockaddr_union sa = {
 298                 .un.sun_family = AF_UNIX,
 299                 .un.sun_path = "/run/systemd/journal/stdout",
 300         };
 301         uid_t olduid = UID_INVALID;
 302         gid_t oldgid = GID_INVALID;
 303         int r;
 304
 305         if (gid_is_valid(gid)) {
 306                 oldgid = getgid();
 307
 308                 if (setegid(gid) < 0)
 309                         return -errno;
 310         }
 311
 312         if (uid_is_valid(uid)) {
 313                 olduid = getuid();
 314
 315                 if (seteuid(uid) < 0) {
 316                         r = -errno;
 317                         goto restore_gid;
 318                 }
 319         }
 320
 321         r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
 322
 323         /* If we fail to restore the uid or gid, things will likely
 324            fail later on. This should only happen if an LSM interferes. */
 325
 326         if (uid_is_valid(uid))
 327                 (void) seteuid(olduid);
 328
 329  restore_gid:
 330         if (gid_is_valid(gid))
 331                 (void) setegid(oldgid);
 332
 333         return r;
 334 }
 335
 336 static int connect_logger_as(
 337                 Unit *unit,
 338                 const ExecContext *context,
 339                 const ExecParameters *params,
 340                 ExecOutput output,
 341                 const char *ident,
 342                 int nfd,
 343                 uid_t uid,
 344                 gid_t gid) {
 345
 346         int fd, r;
 347
 348         assert(context);
 349         assert(params);
 350         assert(output < _EXEC_OUTPUT_MAX);
 351         assert(ident);
 352         assert(nfd >= 0);
 353
 354         fd = socket(AF_UNIX, SOCK_STREAM, 0);
 355         if (fd < 0)
 356                 return -errno;
 357
 358         r = connect_journal_socket(fd, uid, gid);
 359         if (r < 0)
 360                 return r;
 361
 362         if (shutdown(fd, SHUT_RD) < 0) {
 363                 safe_close(fd);
 364                 return -errno;
 365         }
 366
 367         (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
 368
 369         dprintf(fd,
 370                 "%s\n"
 371                 "%s\n"
 372                 "%i\n"
 373                 "%i\n"
 374                 "%i\n"
 375                 "%i\n"
 376                 "%i\n",
 377                 context->syslog_identifier ?: ident,
 378                 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
 379                 context->syslog_priority,
 380                 !!context->syslog_level_prefix,
 381                 is_syslog_output(output),
 382                 is_kmsg_output(output),
 383                 is_terminal_output(output));
 384
 385         if (fd == nfd)
 386                 return nfd;
 387
 388         r = dup2(fd, nfd) < 0 ? -errno : nfd;
 389         safe_close(fd);
 390
 391         return r;
 392 }
 393 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
 394         int fd, r;
 395
 396         assert(path);
 397         assert(nfd >= 0);
 398
 399         fd = open_terminal(path, mode | O_NOCTTY);
 400         if (fd < 0)
 401                 return fd;
 402
 403         if (fd != nfd) {
 404                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
 405                 safe_close(fd);
 406         } else
 407                 r = nfd;
 408
 409         return r;
 410 }
 411
 412 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
 413
 414         if (is_terminal_input(std_input) && !apply_tty_stdin)
 415                 return EXEC_INPUT_NULL;
 416
 417         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
 418                 return EXEC_INPUT_NULL;
 419
 420         return std_input;
 421 }
 422
 423 static int fixup_output(ExecOutput std_output, int socket_fd) {
 424
 425         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
 426                 return EXEC_OUTPUT_INHERIT;
 427
 428         return std_output;
 429 }
 430
 431 static int setup_input(
 432                 const ExecContext *context,
 433                 const ExecParameters *params,
 434                 int socket_fd,
 435                 int named_iofds[3]) {
 436
 437         ExecInput i;
 438
 439         assert(context);
 440         assert(params);
 441
 442         if (params->stdin_fd >= 0) {
 443                 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
 444                         return -errno;
 445
 446                 /* Try to make this the controlling tty, if it is a tty, and reset it */
 447                 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
 448                 (void) reset_terminal_fd(STDIN_FILENO, true);
 449
 450                 return STDIN_FILENO;
 451         }
 452
 453         i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
 454
 455         switch (i) {
 456
 457         case EXEC_INPUT_NULL:
 458                 return open_null_as(O_RDONLY, STDIN_FILENO);
 459
 460         case EXEC_INPUT_TTY:
 461         case EXEC_INPUT_TTY_FORCE:
 462         case EXEC_INPUT_TTY_FAIL: {
 463                 int fd, r;
 464
 465                 fd = acquire_terminal(exec_context_tty_path(context),
 466                                       i == EXEC_INPUT_TTY_FAIL,
 467                                       i == EXEC_INPUT_TTY_FORCE,
 468                                       false,
 469                                       USEC_INFINITY);
 470                 if (fd < 0)
 471                         return fd;
 472
 473                 if (fd != STDIN_FILENO) {
 474                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
 475                         safe_close(fd);
 476                 } else
 477                         r = STDIN_FILENO;
 478
 479                 return r;
 480         }
 481
 482         case EXEC_INPUT_SOCKET:
 483                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
 484
 485         case EXEC_INPUT_NAMED_FD:
 486                 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
 487                 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
 488
 489         default:
 490                 assert_not_reached("Unknown input type");
 491         }
 492 }
 493
 494 static int setup_output(
 495                 Unit *unit,
 496                 const ExecContext *context,
 497                 const ExecParameters *params,
 498                 int fileno,
 499                 int socket_fd,
 500                 int named_iofds[3],
 501                 const char *ident,
 502                 uid_t uid,
 503                 gid_t gid,
 504                 dev_t *journal_stream_dev,
 505                 ino_t *journal_stream_ino) {
 506
 507         ExecOutput o;
 508         ExecInput i;
 509         int r;
 510
 511         assert(unit);
 512         assert(context);
 513         assert(params);
 514         assert(ident);
 515         assert(journal_stream_dev);
 516         assert(journal_stream_ino);
 517
 518         if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
 519
 520                 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
 521                         return -errno;
 522
 523                 return STDOUT_FILENO;
 524         }
 525
 526         if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
 527                 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
 528                         return -errno;
 529
 530                 return STDERR_FILENO;
 531         }
 532
 533         i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
 534         o = fixup_output(context->std_output, socket_fd);
 535
 536         if (fileno == STDERR_FILENO) {
 537                 ExecOutput e;
 538                 e = fixup_output(context->std_error, socket_fd);
 539
 540                 /* This expects the input and output are already set up */
 541
 542                 /* Don't change the stderr file descriptor if we inherit all
 543                  * the way and are not on a tty */
 544                 if (e == EXEC_OUTPUT_INHERIT &&
 545                     o == EXEC_OUTPUT_INHERIT &&
 546                     i == EXEC_INPUT_NULL &&
 547                     !is_terminal_input(context->std_input) &&
 548                     getppid () != 1)
 549                         return fileno;
 550
 551                 /* Duplicate from stdout if possible */
 552                 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
 553                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
 554
 555                 o = e;
 556
 557         } else if (o == EXEC_OUTPUT_INHERIT) {
 558                 /* If input got downgraded, inherit the original value */
 559                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
 560                         return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
 561
 562                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
 563                 if (i != EXEC_INPUT_NULL)
 564                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
 565
 566                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
 567                 if (getppid() != 1)
 568                         return fileno;
 569
 570                 /* We need to open /dev/null here anew, to get the right access mode. */
 571                 return open_null_as(O_WRONLY, fileno);
 572         }
 573
 574         switch (o) {
 575
 576         case EXEC_OUTPUT_NULL:
 577                 return open_null_as(O_WRONLY, fileno);
 578
 579         case EXEC_OUTPUT_TTY:
 580                 if (is_terminal_input(i))
 581                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
 582
 583                 /* We don't reset the terminal if this is just about output */
 584                 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
 585
 586         case EXEC_OUTPUT_SYSLOG:
 587         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
 588         case EXEC_OUTPUT_KMSG:
 589         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
 590         case EXEC_OUTPUT_JOURNAL:
 591         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
 592                 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
 593                 if (r < 0) {
 594                         log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
 595                         r = open_null_as(O_WRONLY, fileno);
 596                 } else {
 597                         struct stat st;
 598
 599                         /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
 600                          * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
 601                          * services to detect whether they are connected to the journal or not.
 602                          *
 603                          * If both stdout and stderr are connected to a stream then let's make sure to store the data
 604                          * about STDERR as that's usually the best way to do logging. */
 605
 606                         if (fstat(fileno, &st) >= 0 &&
 607                             (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
 608                                 *journal_stream_dev = st.st_dev;
 609                                 *journal_stream_ino = st.st_ino;
 610                         }
 611                 }
 612                 return r;
 613
 614         case EXEC_OUTPUT_SOCKET:
 615                 assert(socket_fd >= 0);
 616                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
 617
 618         case EXEC_OUTPUT_NAMED_FD:
 619                 (void) fd_nonblock(named_iofds[fileno], false);
 620                 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
 621
 622         default:
 623                 assert_not_reached("Unknown error type");
 624         }
 625 }
 626
 627 static int chown_terminal(int fd, uid_t uid) {
 628         struct stat st;
 629
 630         assert(fd >= 0);
 631
 632         /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
 633         if (isatty(fd) < 1)
 634                 return 0;
 635
 636         /* This might fail. What matters are the results. */
 637         (void) fchown(fd, uid, -1);
 638         (void) fchmod(fd, TTY_MODE);
 639
 640         if (fstat(fd, &st) < 0)
 641                 return -errno;
 642
 643         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
 644                 return -EPERM;
 645
 646         return 0;
 647 }
 648
 649 static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
 650         _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
 651         int r;
 652
 653         assert(_saved_stdin);
 654         assert(_saved_stdout);
 655
 656         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
 657         if (saved_stdin < 0)
 658                 return -errno;
 659
 660         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
 661         if (saved_stdout < 0)
 662                 return -errno;
 663
 664         fd = acquire_terminal(vc, false, false, false, DEFAULT_CONFIRM_USEC);
 665         if (fd < 0)
 666                 return fd;
 667
 668         r = chown_terminal(fd, getuid());
 669         if (r < 0)
 670                 return r;
 671
 672         r = reset_terminal_fd(fd, true);
 673         if (r < 0)
 674                 return r;
 675
 676         if (dup2(fd, STDIN_FILENO) < 0)
 677                 return -errno;
 678
 679         if (dup2(fd, STDOUT_FILENO) < 0)
 680                 return -errno;
 681
 682         if (fd >= 2)
 683                 safe_close(fd);
 684         fd = -1;
 685
 686         *_saved_stdin = saved_stdin;
 687         *_saved_stdout = saved_stdout;
 688
 689         saved_stdin = saved_stdout = -1;
 690
 691         return 0;
 692 }
 693
 694 static void write_confirm_error_fd(int err, int fd, const Unit *u) {
 695         assert(err < 0);
 696
 697         if (err == -ETIMEDOUT)
 698                 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
 699         else {
 700                 errno = -err;
 701                 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
 702         }
 703 }
 704
 705 static void write_confirm_error(int err, const char *vc, const Unit *u) {
 706         _cleanup_close_ int fd = -1;
 707
 708         assert(vc);
 709
 710         fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
 711         if (fd < 0)
 712                 return;
 713
 714         write_confirm_error_fd(err, fd, u);
 715 }
 716
 717 static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
 718         int r = 0;
 719
 720         assert(saved_stdin);
 721         assert(saved_stdout);
 722
 723         release_terminal();
 724
 725         if (*saved_stdin >= 0)
 726                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
 727                         r = -errno;
 728
 729         if (*saved_stdout >= 0)
 730                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
 731                         r = -errno;
 732
 733         *saved_stdin = safe_close(*saved_stdin);
 734         *saved_stdout = safe_close(*saved_stdout);
 735
 736         return r;
 737 }
 738
 739 enum {
 740         CONFIRM_PRETEND_FAILURE = -1,
 741         CONFIRM_PRETEND_SUCCESS =  0,
 742         CONFIRM_EXECUTE = 1,
 743 };
 744
 745 static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
 746         int saved_stdout = -1, saved_stdin = -1, r;
 747         _cleanup_free_ char *e = NULL;
 748         char c;
 749
 750         /* For any internal errors, assume a positive response. */
 751         r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
 752         if (r < 0) {
 753                 write_confirm_error(r, vc, u);
 754                 return CONFIRM_EXECUTE;
 755         }
 756
 757         /* confirm_spawn might have been disabled while we were sleeping. */
 758         if (manager_is_confirm_spawn_disabled(u->manager)) {
 759                 r = 1;
 760                 goto restore_stdio;
 761         }
 762
 763         e = ellipsize(cmdline, 60, 100);
 764         if (!e) {
 765                 log_oom();
 766                 r = CONFIRM_EXECUTE;
 767                 goto restore_stdio;
 768         }
 769
 770         for (;;) {
 771                 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
 772                 if (r < 0) {
 773                         write_confirm_error_fd(r, STDOUT_FILENO, u);
 774                         r = CONFIRM_EXECUTE;
 775                         goto restore_stdio;
 776                 }
 777
 778                 switch (c) {
 779                 case 'c':
 780                         printf("Resuming normal execution.\n");
 781                         manager_disable_confirm_spawn();
 782                         r = 1;
 783                         break;
 784                 case 'D':
 785                         unit_dump(u, stdout, "  ");
 786                         continue; /* ask again */
 787                 case 'f':
 788                         printf("Failing execution.\n");
 789                         r = CONFIRM_PRETEND_FAILURE;
 790                         break;
 791                 case 'h':
 792                         printf("  c - continue, proceed without asking anymore\n"
 793                                "  D - dump, show the state of the unit\n"
 794                                "  f - fail, don't execute the command and pretend it failed\n"
 795                                "  h - help\n"
 796                                "  i - info, show a short summary of the unit\n"
 797                                "  j - jobs, show jobs that are in progress\n"
 798                                "  s - skip, don't execute the command and pretend it succeeded\n"
 799                                "  y - yes, execute the command\n");
 800                         continue; /* ask again */
 801                 case 'i':
 802                         printf("  Description: %s\n"
 803                                "  Unit:        %s\n"
 804                                "  Command:     %s\n",
 805                                u->id, u->description, cmdline);
 806                         continue; /* ask again */
 807                 case 'j':
 808                         manager_dump_jobs(u->manager, stdout, "  ");
 809                         continue; /* ask again */
 810                 case 'n':
 811                         /* 'n' was removed in favor of 'f'. */
 812                         printf("Didn't understand 'n', did you mean 'f'?\n");
 813                         continue; /* ask again */
 814                 case 's':
 815                         printf("Skipping execution.\n");
 816                         r = CONFIRM_PRETEND_SUCCESS;
 817                         break;
 818                 case 'y':
 819                         r = CONFIRM_EXECUTE;
 820                         break;
 821                 default:
 822                         assert_not_reached("Unhandled choice");
 823                 }
 824                 break;
 825         }
 826
 827 restore_stdio:
 828         restore_confirm_stdio(&saved_stdin, &saved_stdout);
 829         return r;
 830 }
 831
 832 static int get_fixed_user(const ExecContext *c, const char **user,
 833                           uid_t *uid, gid_t *gid,
 834                           const char **home, const char **shell) {
 835         int r;
 836         const char *name;
 837
 838         assert(c);
 839
 840         if (!c->user)
 841                 return 0;
 842
 843         /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
 844          * (i.e. are "/" or "/bin/nologin"). */
 845
 846         name = c->user;
 847         r = get_user_creds_clean(&name, uid, gid, home, shell);
 848         if (r < 0)
 849                 return r;
 850
 851         *user = name;
 852         return 0;
 853 }
 854
 855 static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
 856         int r;
 857         const char *name;
 858
 859         assert(c);
 860
 861         if (!c->group)
 862                 return 0;
 863
 864         name = c->group;
 865         r = get_group_creds(&name, gid);
 866         if (r < 0)
 867                 return r;
 868
 869         *group = name;
 870         return 0;
 871 }
 872
 873 static int get_supplementary_groups(const ExecContext *c, const char *user,
 874                                     const char *group, gid_t gid,
 875                                     gid_t **supplementary_gids, int *ngids) {
 876         char **i;
 877         int r, k = 0;
 878         int ngroups_max;
 879         bool keep_groups = false;
 880         gid_t *groups = NULL;
 881         _cleanup_free_ gid_t *l_gids = NULL;
 882
 883         assert(c);
 884
 885         /*
 886          * If user is given, then lookup GID and supplementary groups list.
 887          * We avoid NSS lookups for gid=0. Also we have to initialize groups
 888          * here and as early as possible so we keep the list of supplementary
 889          * groups of the caller.
 890          */
 891         if (user && gid_is_valid(gid) && gid != 0) {
 892                 /* First step, initialize groups from /etc/groups */
 893                 if (initgroups(user, gid) < 0)
 894                         return -errno;
 895
 896                 keep_groups = true;
 897         }
 898
 899         if (strv_isempty(c->supplementary_groups))
 900                 return 0;
 901
 902         /*
 903          * If SupplementaryGroups= was passed then NGROUPS_MAX has to
 904          * be positive, otherwise fail.
 905          */
 906         errno = 0;
 907         ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
 908         if (ngroups_max <= 0) {
 909                 if (errno > 0)
 910                         return -errno;
 911                 else
 912                         return -EOPNOTSUPP; /* For all other values */
 913         }
 914
 915         l_gids = new(gid_t, ngroups_max);
 916         if (!l_gids)
 917                 return -ENOMEM;
 918
 919         if (keep_groups) {
 920                 /*
 921                  * Lookup the list of groups that the user belongs to, we
 922                  * avoid NSS lookups here too for gid=0.
 923                  */
 924                 k = ngroups_max;
 925                 if (getgrouplist(user, gid, l_gids, &k) < 0)
 926                         return -EINVAL;
 927         } else
 928                 k = 0;
 929
 930         STRV_FOREACH(i, c->supplementary_groups) {
 931                 const char *g;
 932
 933                 if (k >= ngroups_max)
 934                         return -E2BIG;
 935
 936                 g = *i;
 937                 r = get_group_creds(&g, l_gids+k);
 938                 if (r < 0)
 939                         return r;
 940
 941                 k++;
 942         }
 943
 944         /*
 945          * Sets ngids to zero to drop all supplementary groups, happens
 946          * when we are under root and SupplementaryGroups= is empty.
 947          */
 948         if (k == 0) {
 949                 *ngids = 0;
 950                 return 0;
 951         }
 952
 953         /* Otherwise get the final list of supplementary groups */
 954         groups = memdup(l_gids, sizeof(gid_t) * k);
 955         if (!groups)
 956                 return -ENOMEM;
 957
 958         *supplementary_gids = groups;
 959         *ngids = k;
 960
 961         groups = NULL;
 962
 963         return 0;
 964 }
 965
 966 static int enforce_groups(gid_t gid, gid_t *supplementary_gids, int ngids) {
 967         int r;
 968
 969         /* Handle SupplementaryGroups= if it is not empty */
 970         if (ngids > 0) {
 971                 r = maybe_setgroups(ngids, supplementary_gids);
 972                 if (r < 0)
 973                         return r;
 974         }
 975
 976         if (gid_is_valid(gid)) {
 977                 /* Then set our gids */
 978                 if (setresgid(gid, gid, gid) < 0)
 979                         return -errno;
 980         }
 981
 982         return 0;
 983 }
 984
 985 static int enforce_user(const ExecContext *context, uid_t uid) {
 986         assert(context);
 987
 988         if (!uid_is_valid(uid))
 989                 return 0;
 990
 991         /* Sets (but doesn't look up) the uid and make sure we keep the
 992          * capabilities while doing so. */
 993
 994         if (context->capability_ambient_set != 0) {
 995
 996                 /* First step: If we need to keep capabilities but
 997                  * drop privileges we need to make sure we keep our
 998                  * caps, while we drop privileges. */
 999                 if (uid != 0) {
1000                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
1001
1002                         if (prctl(PR_GET_SECUREBITS) != sb)
1003                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1004                                         return -errno;
1005                 }
1006         }
1007
1008         /* Second step: actually set the uids */
1009         if (setresuid(uid, uid, uid) < 0)
1010                 return -errno;
1011
1012         /* At this point we should have all necessary capabilities but
1013            are otherwise a normal user. However, the caps might got
1014            corrupted due to the setresuid() so we need clean them up
1015            later. This is done outside of this call. */
1016
1017         return 0;
1018 }
1019
1020 #if HAVE_PAM
1021
1022 static int null_conv(
1023                 int num_msg,
1024                 const struct pam_message **msg,
1025                 struct pam_response **resp,
1026                 void *appdata_ptr) {
1027
1028         /* We don't support conversations */
1029
1030         return PAM_CONV_ERR;
1031 }
1032
1033 #endif
1034
1035 static int setup_pam(
1036                 const char *name,
1037                 const char *user,
1038                 uid_t uid,
1039                 gid_t gid,
1040                 const char *tty,
1041                 char ***env,
1042                 int fds[], unsigned n_fds) {
1043
1044 #if HAVE_PAM
1045
1046         static const struct pam_conv conv = {
1047                 .conv = null_conv,
1048                 .appdata_ptr = NULL
1049         };
1050
1051         _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
1052         pam_handle_t *handle = NULL;
1053         sigset_t old_ss;
1054         int pam_code = PAM_SUCCESS, r;
1055         char **nv, **e = NULL;
1056         bool close_session = false;
1057         pid_t pam_pid = 0, parent_pid;
1058         int flags = 0;
1059
1060         assert(name);
1061         assert(user);
1062         assert(env);
1063
1064         /* We set up PAM in the parent process, then fork. The child
1065          * will then stay around until killed via PR_GET_PDEATHSIG or
1066          * systemd via the cgroup logic. It will then remove the PAM
1067          * session again. The parent process will exec() the actual
1068          * daemon. We do things this way to ensure that the main PID
1069          * of the daemon is the one we initially fork()ed. */
1070
1071         r = barrier_create(&barrier);
1072         if (r < 0)
1073                 goto fail;
1074
1075         if (log_get_max_level() < LOG_DEBUG)
1076                 flags |= PAM_SILENT;
1077
1078         pam_code = pam_start(name, user, &conv, &handle);
1079         if (pam_code != PAM_SUCCESS) {
1080                 handle = NULL;
1081                 goto fail;
1082         }
1083
1084         if (tty) {
1085                 pam_code = pam_set_item(handle, PAM_TTY, tty);
1086                 if (pam_code != PAM_SUCCESS)
1087                         goto fail;
1088         }
1089
1090         STRV_FOREACH(nv, *env) {
1091                 pam_code = pam_putenv(handle, *nv);
1092                 if (pam_code != PAM_SUCCESS)
1093                         goto fail;
1094         }
1095
1096         pam_code = pam_acct_mgmt(handle, flags);
1097         if (pam_code != PAM_SUCCESS)
1098                 goto fail;
1099
1100         pam_code = pam_open_session(handle, flags);
1101         if (pam_code != PAM_SUCCESS)
1102                 goto fail;
1103
1104         close_session = true;
1105
1106         e = pam_getenvlist(handle);
1107         if (!e) {
1108                 pam_code = PAM_BUF_ERR;
1109                 goto fail;
1110         }
1111
1112         /* Block SIGTERM, so that we know that it won't get lost in
1113          * the child */
1114
1115         assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
1116
1117         parent_pid = getpid_cached();
1118
1119         pam_pid = fork();
1120         if (pam_pid < 0) {
1121                 r = -errno;
1122                 goto fail;
1123         }
1124
1125         if (pam_pid == 0) {
1126                 int sig, ret = EXIT_PAM;
1127
1128                 /* The child's job is to reset the PAM session on
1129                  * termination */
1130                 barrier_set_role(&barrier, BARRIER_CHILD);
1131
1132                 /* This string must fit in 10 chars (i.e. the length
1133                  * of "/sbin/init"), to look pretty in /bin/ps */
1134                 rename_process("(sd-pam)");
1135
1136                 /* Make sure we don't keep open the passed fds in this
1137                 child. We assume that otherwise only those fds are
1138                 open here that have been opened by PAM. */
1139                 close_many(fds, n_fds);
1140
1141                 /* Drop privileges - we don't need any to pam_close_session
1142                  * and this will make PR_SET_PDEATHSIG work in most cases.
1143                  * If this fails, ignore the error - but expect sd-pam threads
1144                  * to fail to exit normally */
1145
1146                 r = maybe_setgroups(0, NULL);
1147                 if (r < 0)
1148                         log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
1149                 if (setresgid(gid, gid, gid) < 0)
1150                         log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
1151                 if (setresuid(uid, uid, uid) < 0)
1152                         log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
1153
1154                 (void) ignore_signals(SIGPIPE, -1);
1155
1156                 /* Wait until our parent died. This will only work if
1157                  * the above setresuid() succeeds, otherwise the kernel
1158                  * will not allow unprivileged parents kill their privileged
1159                  * children this way. We rely on the control groups kill logic
1160                  * to do the rest for us. */
1161                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1162                         goto child_finish;
1163
1164                 /* Tell the parent that our setup is done. This is especially
1165                  * important regarding dropping privileges. Otherwise, unit
1166                  * setup might race against our setresuid(2) call.
1167                  *
1168                  * If the parent aborted, we'll detect this below, hence ignore
1169                  * return failure here. */
1170                 (void) barrier_place(&barrier);
1171
1172                 /* Check if our parent process might already have died? */
1173                 if (getppid() == parent_pid) {
1174                         sigset_t ss;
1175
1176                         assert_se(sigemptyset(&ss) >= 0);
1177                         assert_se(sigaddset(&ss, SIGTERM) >= 0);
1178
1179                         for (;;) {
1180                                 if (sigwait(&ss, &sig) < 0) {
1181                                         if (errno == EINTR)
1182                                                 continue;
1183
1184                                         goto child_finish;
1185                                 }
1186
1187                                 assert(sig == SIGTERM);
1188                                 break;
1189                         }
1190                 }
1191
1192                 /* If our parent died we'll end the session */
1193                 if (getppid() != parent_pid) {
1194                         pam_code = pam_close_session(handle, flags);
1195                         if (pam_code != PAM_SUCCESS)
1196                                 goto child_finish;
1197                 }
1198
1199                 ret = 0;
1200
1201         child_finish:
1202                 pam_end(handle, pam_code | flags);
1203                 _exit(ret);
1204         }
1205
1206         barrier_set_role(&barrier, BARRIER_PARENT);
1207
1208         /* If the child was forked off successfully it will do all the
1209          * cleanups, so forget about the handle here. */
1210         handle = NULL;
1211
1212         /* Unblock SIGTERM again in the parent */
1213         assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
1214
1215         /* We close the log explicitly here, since the PAM modules
1216          * might have opened it, but we don't want this fd around. */
1217         closelog();
1218
1219         /* Synchronously wait for the child to initialize. We don't care for
1220          * errors as we cannot recover. However, warn loudly if it happens. */
1221         if (!barrier_place_and_sync(&barrier))
1222                 log_error("PAM initialization failed");
1223
1224         strv_free(*env);
1225         *env = e;
1226
1227         return 0;
1228
1229 fail:
1230         if (pam_code != PAM_SUCCESS) {
1231                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
1232                 r = -EPERM;  /* PAM errors do not map to errno */
1233         } else
1234                 log_error_errno(r, "PAM failed: %m");
1235
1236         if (handle) {
1237                 if (close_session)
1238                         pam_code = pam_close_session(handle, flags);
1239
1240                 pam_end(handle, pam_code | flags);
1241         }
1242
1243         strv_free(e);
1244         closelog();
1245
1246         return r;
1247 #else
1248         return 0;
1249 #endif
1250 }
1251
1252 static void rename_process_from_path(const char *path) {
1253         char process_name[11];
1254         const char *p;
1255         size_t l;
1256
1257         /* This resulting string must fit in 10 chars (i.e. the length
1258          * of "/sbin/init") to look pretty in /bin/ps */
1259
1260         p = basename(path);
1261         if (isempty(p)) {
1262                 rename_process("(...)");
1263                 return;
1264         }
1265
1266         l = strlen(p);
1267         if (l > 8) {
1268                 /* The end of the process name is usually more
1269                  * interesting, since the first bit might just be
1270                  * "systemd-" */
1271                 p = p + l - 8;
1272                 l = 8;
1273         }
1274
1275         process_name[0] = '(';
1276         memcpy(process_name+1, p, l);
1277         process_name[1+l] = ')';
1278         process_name[1+l+1] = 0;
1279
1280         rename_process(process_name);
1281 }
1282
1283 static bool context_has_address_families(const ExecContext *c) {
1284         assert(c);
1285
1286         return c->address_families_whitelist ||
1287                 !set_isempty(c->address_families);
1288 }
1289
1290 static bool context_has_syscall_filters(const ExecContext *c) {
1291         assert(c);
1292
1293         return c->syscall_whitelist ||
1294                 !hashmap_isempty(c->syscall_filter);
1295 }
1296
1297 static bool context_has_no_new_privileges(const ExecContext *c) {
1298         assert(c);
1299
1300         if (c->no_new_privileges)
1301                 return true;
1302
1303         if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1304                 return false;
1305
1306         /* We need NNP if we have any form of seccomp and are unprivileged */
1307         return context_has_address_families(c) ||
1308                 c->memory_deny_write_execute ||
1309                 c->restrict_realtime ||
1310                 exec_context_restrict_namespaces_set(c) ||
1311                 c->protect_kernel_tunables ||
1312                 c->protect_kernel_modules ||
1313                 c->private_devices ||
1314                 context_has_syscall_filters(c) ||
1315                 !set_isempty(c->syscall_archs) ||
1316                 c->lock_personality;
1317 }
1318
1319 #if HAVE_SECCOMP
1320
1321 static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
1322
1323         if (is_seccomp_available())
1324                 return false;
1325
1326         log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
1327         return true;
1328 }
1329
1330 static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
1331         uint32_t negative_action, default_action, action;
1332         int r;
1333
1334         assert(u);
1335         assert(c);
1336
1337         if (!context_has_syscall_filters(c))
1338                 return 0;
1339
1340         if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1341                 return 0;
1342
1343         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
1344
1345         if (c->syscall_whitelist) {
1346                 default_action = negative_action;
1347                 action = SCMP_ACT_ALLOW;
1348         } else {
1349                 default_action = SCMP_ACT_ALLOW;
1350                 action = negative_action;
1351         }
1352
1353         if (needs_ambient_hack) {
1354                 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1355                 if (r < 0)
1356                         return r;
1357         }
1358
1359         return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
1360 }
1361
1362 static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1363         assert(u);
1364         assert(c);
1365
1366         if (set_isempty(c->syscall_archs))
1367                 return 0;
1368
1369         if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1370                 return 0;
1371
1372         return seccomp_restrict_archs(c->syscall_archs);
1373 }
1374
1375 static int apply_address_families(const Unit* u, const ExecContext *c) {
1376         assert(u);
1377         assert(c);
1378
1379         if (!context_has_address_families(c))
1380                 return 0;
1381
1382         if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1383                 return 0;
1384
1385         return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
1386 }
1387
1388 static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
1389         assert(u);
1390         assert(c);
1391
1392         if (!c->memory_deny_write_execute)
1393                 return 0;
1394
1395         if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1396                 return 0;
1397
1398         return seccomp_memory_deny_write_execute();
1399 }
1400
1401 static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
1402         assert(u);
1403         assert(c);
1404
1405         if (!c->restrict_realtime)
1406                 return 0;
1407
1408         if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1409                 return 0;
1410
1411         return seccomp_restrict_realtime();
1412 }
1413
1414 static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
1415         assert(u);
1416         assert(c);
1417
1418         /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1419          * let's protect even those systems where this is left on in the kernel. */
1420
1421         if (!c->protect_kernel_tunables)
1422                 return 0;
1423
1424         if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1425                 return 0;
1426
1427         return seccomp_protect_sysctl();
1428 }
1429
1430 static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
1431         assert(u);
1432         assert(c);
1433
1434         /* Turn off module syscalls on ProtectKernelModules=yes */
1435
1436         if (!c->protect_kernel_modules)
1437                 return 0;
1438
1439         if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1440                 return 0;
1441
1442         return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
1443 }
1444
1445 static int apply_private_devices(const Unit *u, const ExecContext *c) {
1446         assert(u);
1447         assert(c);
1448
1449         /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
1450
1451         if (!c->private_devices)
1452                 return 0;
1453
1454         if (skip_seccomp_unavailable(u, "PrivateDevices="))
1455                 return 0;
1456
1457         return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
1458 }
1459
1460 static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
1461         assert(u);
1462         assert(c);
1463
1464         if (!exec_context_restrict_namespaces_set(c))
1465                 return 0;
1466
1467         if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1468                 return 0;
1469
1470         return seccomp_restrict_namespaces(c->restrict_namespaces);
1471 }
1472
1473 static int apply_lock_personality(const Unit* u, const ExecContext *c) {
1474         unsigned long personality;
1475         int r;
1476
1477         assert(u);
1478         assert(c);
1479
1480         if (!c->lock_personality)
1481                 return 0;
1482
1483         if (skip_seccomp_unavailable(u, "LockPersonality="))
1484                 return 0;
1485
1486         personality = c->personality;
1487
1488         /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1489         if (personality == PERSONALITY_INVALID) {
1490
1491                 r = opinionated_personality(&personality);
1492                 if (r < 0)
1493                         return r;
1494         }
1495
1496         return seccomp_lock_personality(personality);
1497 }
1498
1499 #endif
1500
1501 static void do_idle_pipe_dance(int idle_pipe[4]) {
1502         assert(idle_pipe);
1503
1504         idle_pipe[1] = safe_close(idle_pipe[1]);
1505         idle_pipe[2] = safe_close(idle_pipe[2]);
1506
1507         if (idle_pipe[0] >= 0) {
1508                 int r;
1509
1510                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1511
1512                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1513                         ssize_t n;
1514
1515                         /* Signal systemd that we are bored and want to continue. */
1516                         n = write(idle_pipe[3], "x", 1);
1517                         if (n > 0)
1518                                 /* Wait for systemd to react to the signal above. */
1519                                 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1520                 }
1521
1522                 idle_pipe[0] = safe_close(idle_pipe[0]);
1523
1524         }
1525
1526         idle_pipe[3] = safe_close(idle_pipe[3]);
1527 }
1528
1529 static int build_environment(
1530                 Unit *u,
1531                 const ExecContext *c,
1532                 const ExecParameters *p,
1533                 unsigned n_fds,
1534                 const char *home,
1535                 const char *username,
1536                 const char *shell,
1537                 dev_t journal_stream_dev,
1538                 ino_t journal_stream_ino,
1539                 char ***ret) {
1540
1541         _cleanup_strv_free_ char **our_env = NULL;
1542         unsigned n_env = 0;
1543         char *x;
1544
1545         assert(u);
1546         assert(c);
1547         assert(ret);
1548
1549         our_env = new0(char*, 14);
1550         if (!our_env)
1551                 return -ENOMEM;
1552
1553         if (n_fds > 0) {
1554                 _cleanup_free_ char *joined = NULL;
1555
1556                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
1557                         return -ENOMEM;
1558                 our_env[n_env++] = x;
1559
1560                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1561                         return -ENOMEM;
1562                 our_env[n_env++] = x;
1563
1564                 joined = strv_join(p->fd_names, ":");
1565                 if (!joined)
1566                         return -ENOMEM;
1567
1568                 x = strjoin("LISTEN_FDNAMES=", joined);
1569                 if (!x)
1570                         return -ENOMEM;
1571                 our_env[n_env++] = x;
1572         }
1573
1574         if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
1575                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
1576                         return -ENOMEM;
1577                 our_env[n_env++] = x;
1578
1579                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
1580                         return -ENOMEM;
1581                 our_env[n_env++] = x;
1582         }
1583
1584         /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1585          * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1586          * check the database directly. */
1587         if (p->flags & EXEC_NSS_BYPASS_BUS) {
1588                 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1589                 if (!x)
1590                         return -ENOMEM;
1591                 our_env[n_env++] = x;
1592         }
1593
1594         if (home) {
1595                 x = strappend("HOME=", home);
1596                 if (!x)
1597                         return -ENOMEM;
1598                 our_env[n_env++] = x;
1599         }
1600
1601         if (username) {
1602                 x = strappend("LOGNAME=", username);
1603                 if (!x)
1604                         return -ENOMEM;
1605                 our_env[n_env++] = x;
1606
1607                 x = strappend("USER=", username);
1608                 if (!x)
1609                         return -ENOMEM;
1610                 our_env[n_env++] = x;
1611         }
1612
1613         if (shell) {
1614                 x = strappend("SHELL=", shell);
1615                 if (!x)
1616                         return -ENOMEM;
1617                 our_env[n_env++] = x;
1618         }
1619
1620         if (!sd_id128_is_null(u->invocation_id)) {
1621                 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1622                         return -ENOMEM;
1623
1624                 our_env[n_env++] = x;
1625         }
1626
1627         if (exec_context_needs_term(c)) {
1628                 const char *tty_path, *term = NULL;
1629
1630                 tty_path = exec_context_tty_path(c);
1631
1632                 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1633                  * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1634                  * passes to PID 1 ends up all the way in the console login shown. */
1635
1636                 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1637                         term = getenv("TERM");
1638                 if (!term)
1639                         term = default_term_for_tty(tty_path);
1640
1641                 x = strappend("TERM=", term);
1642                 if (!x)
1643                         return -ENOMEM;
1644                 our_env[n_env++] = x;
1645         }
1646
1647         if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1648                 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1649                         return -ENOMEM;
1650
1651                 our_env[n_env++] = x;
1652         }
1653
1654         our_env[n_env++] = NULL;
1655         assert(n_env <= 12);
1656
1657         *ret = our_env;
1658         our_env = NULL;
1659
1660         return 0;
1661 }
1662
1663 static int build_pass_environment(const ExecContext *c, char ***ret) {
1664         _cleanup_strv_free_ char **pass_env = NULL;
1665         size_t n_env = 0, n_bufsize = 0;
1666         char **i;
1667
1668         STRV_FOREACH(i, c->pass_environment) {
1669                 _cleanup_free_ char *x = NULL;
1670                 char *v;
1671
1672                 v = getenv(*i);
1673                 if (!v)
1674                         continue;
1675                 x = strjoin(*i, "=", v);
1676                 if (!x)
1677                         return -ENOMEM;
1678
1679                 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1680                         return -ENOMEM;
1681
1682                 pass_env[n_env++] = x;
1683                 pass_env[n_env] = NULL;
1684                 x = NULL;
1685         }
1686
1687         *ret = pass_env;
1688         pass_env = NULL;
1689
1690         return 0;
1691 }
1692
1693 static bool exec_needs_mount_namespace(
1694                 const ExecContext *context,
1695                 const ExecParameters *params,
1696                 ExecRuntime *runtime) {
1697
1698         assert(context);
1699         assert(params);
1700
1701         if (context->root_image)
1702                 return true;
1703
1704         if (!strv_isempty(context->read_write_paths) ||
1705             !strv_isempty(context->read_only_paths) ||
1706             !strv_isempty(context->inaccessible_paths))
1707                 return true;
1708
1709         if (context->n_bind_mounts > 0 ||
1710             !strv_isempty(context->directories[EXEC_DIRECTORY_RUNTIME].paths) ||
1711             !strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
1712             !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1713             !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths) ||
1714             !strv_isempty(context->directories[EXEC_DIRECTORY_CONFIGURATION].paths))
1715                 return true;
1716
1717         if (context->mount_flags != 0)
1718                 return true;
1719
1720         if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1721                 return true;
1722
1723         if (context->private_devices ||
1724             context->protect_system != PROTECT_SYSTEM_NO ||
1725             context->protect_home != PROTECT_HOME_NO ||
1726             context->protect_kernel_tunables ||
1727             context->protect_kernel_modules ||
1728             context->protect_control_groups)
1729                 return true;
1730
1731         if (context->mount_apivfs && (context->root_image || context->root_directory))
1732                 return true;
1733
1734         return false;
1735 }
1736
1737 static int setup_private_users(uid_t uid, gid_t gid) {
1738         _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1739         _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1740         _cleanup_close_ int unshare_ready_fd = -1;
1741         _cleanup_(sigkill_waitp) pid_t pid = 0;
1742         uint64_t c = 1;
1743         siginfo_t si;
1744         ssize_t n;
1745         int r;
1746
1747         /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1748          * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1749          * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1750          * which waits for the parent to create the new user namespace while staying in the original namespace. The
1751          * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1752          * continues execution normally. */
1753
1754         if (uid != 0 && uid_is_valid(uid)) {
1755                 r = asprintf(&uid_map,
1756                              "0 0 1\n"                      /* Map root → root */
1757                              UID_FMT " " UID_FMT " 1\n",    /* Map $UID → $UID */
1758                              uid, uid);
1759                 if (r < 0)
1760                         return -ENOMEM;
1761         } else {
1762                 uid_map = strdup("0 0 1\n");            /* The case where the above is the same */
1763                 if (!uid_map)
1764                         return -ENOMEM;
1765         }
1766
1767         if (gid != 0 && gid_is_valid(gid)) {
1768                 r = asprintf(&gid_map,
1769                              "0 0 1\n"                      /* Map root → root */
1770                              GID_FMT " " GID_FMT " 1\n",    /* Map $GID → $GID */
1771                              gid, gid);
1772                 if (r < 0)
1773                         return -ENOMEM;
1774         } else {
1775                 gid_map = strdup("0 0 1\n");            /* The case where the above is the same */
1776                 if (!gid_map)
1777                         return -ENOMEM;
1778         }
1779
1780         /* Create a communication channel so that the parent can tell the child when it finished creating the user
1781          * namespace. */
1782         unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1783         if (unshare_ready_fd < 0)
1784                 return -errno;
1785
1786         /* Create a communication channel so that the child can tell the parent a proper error code in case it
1787          * failed. */
1788         if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1789                 return -errno;
1790
1791         pid = fork();
1792         if (pid < 0)
1793                 return -errno;
1794
1795         if (pid == 0) {
1796                 _cleanup_close_ int fd = -1;
1797                 const char *a;
1798                 pid_t ppid;
1799
1800                 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1801                  * here, after the parent opened its own user namespace. */
1802
1803                 ppid = getppid();
1804                 errno_pipe[0] = safe_close(errno_pipe[0]);
1805
1806                 /* Wait until the parent unshared the user namespace */
1807                 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1808                         r = -errno;
1809                         goto child_fail;
1810                 }
1811
1812                 /* Disable the setgroups() system call in the child user namespace, for good. */
1813                 a = procfs_file_alloca(ppid, "setgroups");
1814                 fd = open(a, O_WRONLY|O_CLOEXEC);
1815                 if (fd < 0) {
1816                         if (errno != ENOENT) {
1817                                 r = -errno;
1818                                 goto child_fail;
1819                         }
1820
1821                         /* If the file is missing the kernel is too old, let's continue anyway. */
1822                 } else {
1823                         if (write(fd, "deny\n", 5) < 0) {
1824                                 r = -errno;
1825                                 goto child_fail;
1826                         }
1827
1828                         fd = safe_close(fd);
1829                 }
1830
1831                 /* First write the GID map */
1832                 a = procfs_file_alloca(ppid, "gid_map");
1833                 fd = open(a, O_WRONLY|O_CLOEXEC);
1834                 if (fd < 0) {
1835                         r = -errno;
1836                         goto child_fail;
1837                 }
1838                 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1839                         r = -errno;
1840                         goto child_fail;
1841                 }
1842                 fd = safe_close(fd);
1843
1844                 /* The write the UID map */
1845                 a = procfs_file_alloca(ppid, "uid_map");
1846                 fd = open(a, O_WRONLY|O_CLOEXEC);
1847                 if (fd < 0) {
1848                         r = -errno;
1849                         goto child_fail;
1850                 }
1851                 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1852                         r = -errno;
1853                         goto child_fail;
1854                 }
1855
1856                 _exit(EXIT_SUCCESS);
1857
1858         child_fail:
1859                 (void) write(errno_pipe[1], &r, sizeof(r));
1860                 _exit(EXIT_FAILURE);
1861         }
1862
1863         errno_pipe[1] = safe_close(errno_pipe[1]);
1864
1865         if (unshare(CLONE_NEWUSER) < 0)
1866                 return -errno;
1867
1868         /* Let the child know that the namespace is ready now */
1869         if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1870                 return -errno;
1871
1872         /* Try to read an error code from the child */
1873         n = read(errno_pipe[0], &r, sizeof(r));
1874         if (n < 0)
1875                 return -errno;
1876         if (n == sizeof(r)) { /* an error code was sent to us */
1877                 if (r < 0)
1878                         return r;
1879                 return -EIO;
1880         }
1881         if (n != 0) /* on success we should have read 0 bytes */
1882                 return -EIO;
1883
1884         r = wait_for_terminate(pid, &si);
1885         if (r < 0)
1886                 return r;
1887         pid = 0;
1888
1889         /* If something strange happened with the child, let's consider this fatal, too */
1890         if (si.si_code != CLD_EXITED || si.si_status != 0)
1891                 return -EIO;
1892
1893         return 0;
1894 }
1895
1896 static int setup_exec_directory(
1897                 const ExecContext *context,
1898                 const ExecParameters *params,
1899                 uid_t uid,
1900                 gid_t gid,
1901                 ExecDirectoryType type,
1902                 int *exit_status) {
1903
1904         static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
1905                 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
1906                 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
1907                 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
1908                 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
1909                 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
1910         };
1911         char **rt;
1912         int r;
1913
1914         assert(context);
1915         assert(params);
1916         assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
1917         assert(exit_status);
1918
1919         if (!params->prefix[type])
1920                 return 0;
1921
1922         if (params->flags & EXEC_CHOWN_DIRECTORIES) {
1923                 if (!uid_is_valid(uid))
1924                         uid = 0;
1925                 if (!gid_is_valid(gid))
1926                         gid = 0;
1927         }
1928
1929         STRV_FOREACH(rt, context->directories[type].paths) {
1930                 _cleanup_free_ char *p = NULL, *pp = NULL;
1931                 const char *effective;
1932
1933                 p = strjoin(params->prefix[type], "/", *rt);
1934                 if (!p) {
1935                         r = -ENOMEM;
1936                         goto fail;
1937                 }
1938
1939                 r = mkdir_parents_label(p, 0755);
1940                 if (r < 0)
1941                         goto fail;
1942
1943                 if (context->dynamic_user &&
1944                     !IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
1945                         _cleanup_free_ char *private_root = NULL, *relative = NULL, *parent = NULL;
1946
1947                         /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
1948                          * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
1949                          * whose UID is later on reused. To lock this down we use the same trick used by container
1950                          * managers to prohibit host users to get access to files of the same UID in containers: we
1951                          * place everything inside a directory that has an access mode of 0700 and is owned root:root,
1952                          * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
1953                          * to make this directory permeable for the service itself.
1954                          *
1955                          * Specifically: for a service which wants a special directory "foo/" we first create a
1956                          * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
1957                          * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
1958                          * privileged host users can access "foo/" as usual, but unprivileged host users can't look
1959                          * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
1960                          * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
1961                          * disabling the access boundary for the service and making sure it only gets access to the
1962                          * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
1963                          *
1964                          * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
1965                          * owned by the service itself.
1966                          * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
1967                          * files or sockets with other services. */
1968
1969                         private_root = strjoin(params->prefix[type], "/private");
1970                         if (!private_root) {
1971                                 r = -ENOMEM;
1972                                 goto fail;
1973                         }
1974
1975                         /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
1976                         r = mkdir_safe_label(private_root, 0700, 0, 0, false);
1977                         if (r < 0)
1978                                 goto fail;
1979
1980                         pp = strjoin(private_root, "/", *rt);
1981                         if (!pp) {
1982                                 r = -ENOMEM;
1983                                 goto fail;
1984                         }
1985
1986                         /* Create all directories between the configured directory and this private root, and mark them 0755 */
1987                         r = mkdir_parents_label(pp, 0755);
1988                         if (r < 0)
1989                                 goto fail;
1990
1991                         /* Finally, create the actual directory for the service */
1992                         r = mkdir_label(pp, context->directories[type].mode);
1993                         if (r < 0 && r != -EEXIST)
1994                                 goto fail;
1995
1996                         parent = dirname_malloc(p);
1997                         if (!parent) {
1998                                 r = -ENOMEM;
1999                                 goto fail;
2000                         }
2001
2002                         r = path_make_relative(parent, pp, &relative);
2003                         if (r < 0)
2004                                 goto fail;
2005
2006                         /* And link it up from the original place */
2007                         r = symlink_idempotent(relative, p);
2008                         if (r < 0)
2009                                 goto fail;
2010
2011                         effective = pp;
2012
2013                 } else {
2014                         r = mkdir_label(p, context->directories[type].mode);
2015                         if (r < 0 && r != -EEXIST)
2016                                 goto fail;
2017
2018                         effective = p;
2019                 }
2020
2021                 /* First lock down the access mode */
2022                 if (chmod(effective, context->directories[type].mode) < 0) {
2023                         r = -errno;
2024                         goto fail;
2025                 }
2026
2027                 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
2028                  * a service, and shall not be writable. */
2029                 if (type == EXEC_DIRECTORY_CONFIGURATION)
2030                         continue;
2031
2032                 /* Then, change the ownership of the whole tree, if necessary */
2033                 r = path_chown_recursive(effective, uid, gid);
2034                 if (r < 0)
2035                         goto fail;
2036         }
2037
2038         return 0;
2039
2040 fail:
2041         *exit_status = exit_status_table[type];
2042         return r;
2043 }
2044
2045 static int setup_smack(
2046                 const ExecContext *context,
2047                 const ExecCommand *command) {
2048
2049         int r;
2050
2051         assert(context);
2052         assert(command);
2053
2054         if (context->smack_process_label) {
2055                 r = mac_smack_apply_pid(0, context->smack_process_label);
2056                 if (r < 0)
2057                         return r;
2058         }
2059 #ifdef SMACK_DEFAULT_PROCESS_LABEL
2060         else {
2061                 _cleanup_free_ char *exec_label = NULL;
2062
2063                 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
2064                 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
2065                         return r;
2066
2067                 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2068                 if (r < 0)
2069                         return r;
2070         }
2071 #endif
2072
2073         return 0;
2074 }
2075
2076 static int compile_bind_mounts(
2077                 const ExecContext *context,
2078                 const ExecParameters *params,
2079                 BindMount **ret_bind_mounts,
2080                 unsigned *ret_n_bind_mounts,
2081                 char ***ret_empty_directories) {
2082
2083         _cleanup_strv_free_ char **empty_directories = NULL;
2084         BindMount *bind_mounts;
2085         unsigned n, h = 0, i;
2086         ExecDirectoryType t;
2087         int r;
2088
2089         assert(context);
2090         assert(params);
2091         assert(ret_bind_mounts);
2092         assert(ret_n_bind_mounts);
2093         assert(ret_empty_directories);
2094
2095         n = context->n_bind_mounts;
2096         for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2097                 if (!params->prefix[t])
2098                         continue;
2099
2100                 n += strv_length(context->directories[t].paths);
2101         }
2102
2103         if (n <= 0) {
2104                 *ret_bind_mounts = NULL;
2105                 *ret_n_bind_mounts = 0;
2106                 *ret_empty_directories = NULL;
2107                 return 0;
2108         }
2109
2110         bind_mounts = new(BindMount, n);
2111         if (!bind_mounts)
2112                 return -ENOMEM;
2113
2114         for (i = 0; i < context->n_bind_mounts; i++) {
2115                 BindMount *item = context->bind_mounts + i;
2116                 char *s, *d;
2117
2118                 s = strdup(item->source);
2119                 if (!s) {
2120                         r = -ENOMEM;
2121                         goto finish;
2122                 }
2123
2124                 d = strdup(item->destination);
2125                 if (!d) {
2126                         free(s);
2127                         r = -ENOMEM;
2128                         goto finish;
2129                 }
2130
2131                 bind_mounts[h++] = (BindMount) {
2132                         .source = s,
2133                         .destination = d,
2134                         .read_only = item->read_only,
2135                         .recursive = item->recursive,
2136                         .ignore_enoent = item->ignore_enoent,
2137                 };
2138         }
2139
2140         for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2141                 char **suffix;
2142
2143                 if (!params->prefix[t])
2144                         continue;
2145
2146                 if (strv_isempty(context->directories[t].paths))
2147                         continue;
2148
2149                 if (context->dynamic_user &&
2150                     !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
2151                         char *private_root;
2152
2153                         /* So this is for a dynamic user, and we need to make sure the process can access its own
2154                          * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2155                          * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2156
2157                         private_root = strjoin(params->prefix[t], "/private");
2158                         if (!private_root) {
2159                                 r = -ENOMEM;
2160                                 goto finish;
2161                         }
2162
2163                         r = strv_consume(&empty_directories, private_root);
2164                         if (r < 0) {
2165                                 r = -ENOMEM;
2166                                 goto finish;
2167                         }
2168                 }
2169
2170                 STRV_FOREACH(suffix, context->directories[t].paths) {
2171                         char *s, *d;
2172
2173                         if (context->dynamic_user &&
2174                             !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
2175                                 s = strjoin(params->prefix[t], "/private/", *suffix);
2176                         else
2177                                 s = strjoin(params->prefix[t], "/", *suffix);
2178                         if (!s) {
2179                                 r = -ENOMEM;
2180                                 goto finish;
2181                         }
2182
2183                         d = strdup(s);
2184                         if (!d) {
2185                                 free(s);
2186                                 r = -ENOMEM;
2187                                 goto finish;
2188                         }
2189
2190                         bind_mounts[h++] = (BindMount) {
2191                                 .source = s,
2192                                 .destination = d,
2193                                 .read_only = false,
2194                                 .recursive = true,
2195                                 .ignore_enoent = false,
2196                         };
2197                 }
2198         }
2199
2200         assert(h == n);
2201
2202         *ret_bind_mounts = bind_mounts;
2203         *ret_n_bind_mounts = n;
2204         *ret_empty_directories = empty_directories;
2205
2206         empty_directories = NULL;
2207
2208         return (int) n;
2209
2210 finish:
2211         bind_mount_free_many(bind_mounts, h);
2212         return r;
2213 }
2214
2215 static int apply_mount_namespace(
2216                 Unit *u,
2217                 ExecCommand *command,
2218                 const ExecContext *context,
2219                 const ExecParameters *params,
2220                 ExecRuntime *runtime) {
2221
2222         _cleanup_strv_free_ char **empty_directories = NULL;
2223         char *tmp = NULL, *var = NULL;
2224         const char *root_dir = NULL, *root_image = NULL;
2225         NamespaceInfo ns_info = {
2226                 .ignore_protect_paths = false,
2227                 .private_dev = context->private_devices,
2228                 .protect_control_groups = context->protect_control_groups,
2229                 .protect_kernel_tunables = context->protect_kernel_tunables,
2230                 .protect_kernel_modules = context->protect_kernel_modules,
2231                 .mount_apivfs = context->mount_apivfs,
2232         };
2233         bool needs_sandboxing;
2234         BindMount *bind_mounts = NULL;
2235         unsigned n_bind_mounts = 0;
2236         int r;
2237
2238         assert(context);
2239
2240         /* The runtime struct only contains the parent of the private /tmp,
2241          * which is non-accessible to world users. Inside of it there's a /tmp
2242          * that is sticky, and that's the one we want to use here. */
2243
2244         if (context->private_tmp && runtime) {
2245                 if (runtime->tmp_dir)
2246                         tmp = strjoina(runtime->tmp_dir, "/tmp");
2247                 if (runtime->var_tmp_dir)
2248                         var = strjoina(runtime->var_tmp_dir, "/tmp");
2249         }
2250
2251         if (params->flags & EXEC_APPLY_CHROOT) {
2252                 root_image = context->root_image;
2253
2254                 if (!root_image)
2255                         root_dir = context->root_directory;
2256         }
2257
2258         r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2259         if (r < 0)
2260                 return r;
2261
2262         /*
2263          * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2264          * sandbox info, otherwise enforce it, don't ignore protected paths and
2265          * fail if we are enable to apply the sandbox inside the mount namespace.
2266          */
2267         if (!context->dynamic_user && root_dir)
2268                 ns_info.ignore_protect_paths = true;
2269
2270         needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
2271
2272         r = setup_namespace(root_dir, root_image,
2273                             &ns_info, context->read_write_paths,
2274                             needs_sandboxing ? context->read_only_paths : NULL,
2275                             needs_sandboxing ? context->inaccessible_paths : NULL,
2276                             empty_directories,
2277                             bind_mounts,
2278                             n_bind_mounts,
2279                             tmp,
2280                             var,
2281                             needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2282                             needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
2283                             context->mount_flags,
2284                             DISSECT_IMAGE_DISCARD_ON_LOOP);
2285
2286         bind_mount_free_many(bind_mounts, n_bind_mounts);
2287
2288         /* If we couldn't set up the namespace this is probably due to a
2289          * missing capability. In this case, silently proceeed. */
2290         if (IN_SET(r, -EPERM, -EACCES)) {
2291                 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
2292                 return 0;
2293         }
2294
2295         return r;
2296 }
2297
2298 static int apply_working_directory(
2299                 const ExecContext *context,
2300                 const ExecParameters *params,
2301                 const char *home,
2302                 const bool needs_mount_ns,
2303                 int *exit_status) {
2304
2305         const char *d, *wd;
2306
2307         assert(context);
2308         assert(exit_status);
2309
2310         if (context->working_directory_home) {
2311
2312                 if (!home) {
2313                         *exit_status = EXIT_CHDIR;
2314                         return -ENXIO;
2315                 }
2316
2317                 wd = home;
2318
2319         } else if (context->working_directory)
2320                 wd = context->working_directory;
2321         else
2322                 wd = "/";
2323
2324         if (params->flags & EXEC_APPLY_CHROOT) {
2325                 if (!needs_mount_ns && context->root_directory)
2326                         if (chroot(context->root_directory) < 0) {
2327                                 *exit_status = EXIT_CHROOT;
2328                                 return -errno;
2329                         }
2330
2331                 d = wd;
2332         } else
2333                 d = prefix_roota(context->root_directory, wd);
2334
2335         if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2336                 *exit_status = EXIT_CHDIR;
2337                 return -errno;
2338         }
2339
2340         return 0;
2341 }
2342
2343 static int setup_keyring(
2344                 Unit *u,
2345                 const ExecContext *context,
2346                 const ExecParameters *p,
2347                 uid_t uid, gid_t gid) {
2348
2349         key_serial_t keyring;
2350         int r;
2351
2352         assert(u);
2353         assert(context);
2354         assert(p);
2355
2356         /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2357          * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2358          * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2359          * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2360          * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2361          * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2362
2363         if (!(p->flags & EXEC_NEW_KEYRING))
2364                 return 0;
2365
2366         if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2367                 return 0;
2368
2369         keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2370         if (keyring == -1) {
2371                 if (errno == ENOSYS)
2372                         log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
2373                 else if (IN_SET(errno, EACCES, EPERM))
2374                         log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
2375                 else if (errno == EDQUOT)
2376                         log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
2377                 else
2378                         return log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
2379
2380                 return 0;
2381         }
2382
2383         /* Populate they keyring with the invocation ID by default. */
2384         if (!sd_id128_is_null(u->invocation_id)) {
2385                 key_serial_t key;
2386
2387                 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2388                 if (key == -1)
2389                         log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
2390                 else {
2391                         if (keyctl(KEYCTL_SETPERM, key,
2392                                    KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2393                                    KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
2394                                 return log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
2395                 }
2396         }
2397
2398         /* And now, make the keyring owned by the service's user */
2399         if (uid_is_valid(uid) || gid_is_valid(gid))
2400                 if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0)
2401                         return log_unit_error_errno(u, errno, "Failed to change ownership of session keyring: %m");
2402
2403         /* When requested link the user keyring into the session keyring. */
2404         if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2405                 uid_t saved_uid;
2406                 gid_t saved_gid;
2407
2408                 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things
2409                  * set up properly by the kernel. If we don't do that then we can't create it atomically, and that
2410                  * sucks for parallel execution. This mimics what pam_keyinit does, too.*/
2411
2412                 saved_uid = getuid();
2413                 saved_gid = getgid();
2414
2415                 if (gid_is_valid(gid) && gid != saved_gid) {
2416                         if (setregid(gid, -1) < 0)
2417                                 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2418                 }
2419
2420                 if (uid_is_valid(uid) && uid != saved_uid) {
2421                         if (setreuid(uid, -1) < 0) {
2422                                 (void) setregid(saved_gid, -1);
2423                                 return log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2424                         }
2425                 }
2426
2427                 if (keyctl(KEYCTL_LINK,
2428                            KEY_SPEC_USER_KEYRING,
2429                            KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2430
2431                         r = -errno;
2432
2433                         (void) setreuid(saved_uid, -1);
2434                         (void) setregid(saved_gid, -1);
2435
2436                         return log_unit_error_errno(u, r, "Failed to link user keyring into session keyring: %m");
2437                 }
2438
2439                 if (uid_is_valid(uid) && uid != saved_uid) {
2440                         if (setreuid(saved_uid, -1) < 0) {
2441                                 (void) setregid(saved_gid, -1);
2442                                 return log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2443                         }
2444                 }
2445
2446                 if (gid_is_valid(gid) && gid != saved_gid) {
2447                         if (setregid(saved_gid, -1) < 0)
2448                                 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2449                 }
2450         }
2451
2452         return 0;
2453 }
2454
2455 static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
2456         assert(array);
2457         assert(n);
2458
2459         if (!pair)
2460                 return;
2461
2462         if (pair[0] >= 0)
2463                 array[(*n)++] = pair[0];
2464         if (pair[1] >= 0)
2465                 array[(*n)++] = pair[1];
2466 }
2467
2468 static int close_remaining_fds(
2469                 const ExecParameters *params,
2470                 ExecRuntime *runtime,
2471                 DynamicCreds *dcreds,
2472                 int user_lookup_fd,
2473                 int socket_fd,
2474                 int *fds, unsigned n_fds) {
2475
2476         unsigned n_dont_close = 0;
2477         int dont_close[n_fds + 12];
2478
2479         assert(params);
2480
2481         if (params->stdin_fd >= 0)
2482                 dont_close[n_dont_close++] = params->stdin_fd;
2483         if (params->stdout_fd >= 0)
2484                 dont_close[n_dont_close++] = params->stdout_fd;
2485         if (params->stderr_fd >= 0)
2486                 dont_close[n_dont_close++] = params->stderr_fd;
2487
2488         if (socket_fd >= 0)
2489                 dont_close[n_dont_close++] = socket_fd;
2490         if (n_fds > 0) {
2491                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2492                 n_dont_close += n_fds;
2493         }
2494
2495         if (runtime)
2496                 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2497
2498         if (dcreds) {
2499                 if (dcreds->user)
2500                         append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2501                 if (dcreds->group)
2502                         append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
2503         }
2504
2505         if (user_lookup_fd >= 0)
2506                 dont_close[n_dont_close++] = user_lookup_fd;
2507
2508         return close_all_fds(dont_close, n_dont_close);
2509 }
2510
2511 static int send_user_lookup(
2512                 Unit *unit,
2513                 int user_lookup_fd,
2514                 uid_t uid,
2515                 gid_t gid) {
2516
2517         assert(unit);
2518
2519         /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2520          * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2521          * specified. */
2522
2523         if (user_lookup_fd < 0)
2524                 return 0;
2525
2526         if (!uid_is_valid(uid) && !gid_is_valid(gid))
2527                 return 0;
2528
2529         if (writev(user_lookup_fd,
2530                (struct iovec[]) {
2531                            IOVEC_INIT(&uid, sizeof(uid)),
2532                            IOVEC_INIT(&gid, sizeof(gid)),
2533                            IOVEC_INIT_STRING(unit->id) }, 3) < 0)
2534                 return -errno;
2535
2536         return 0;
2537 }
2538
2539 static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2540         int r;
2541
2542         assert(c);
2543         assert(home);
2544         assert(buf);
2545
2546         /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2547
2548         if (*home)
2549                 return 0;
2550
2551         if (!c->working_directory_home)
2552                 return 0;
2553
2554         if (uid == 0) {
2555                 /* Hardcode /root as home directory for UID 0 */
2556                 *home = "/root";
2557                 return 1;
2558         }
2559
2560         r = get_home_dir(buf);
2561         if (r < 0)
2562                 return r;
2563
2564         *home = *buf;
2565         return 1;
2566 }
2567
2568 static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2569         _cleanup_strv_free_ char ** list = NULL;
2570         ExecDirectoryType t;
2571         int r;
2572
2573         assert(c);
2574         assert(p);
2575         assert(ret);
2576
2577         assert(c->dynamic_user);
2578
2579         /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2580          * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2581          * directories. */
2582
2583         for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2584                 char **i;
2585
2586                 if (t == EXEC_DIRECTORY_CONFIGURATION)
2587                         continue;
2588
2589                 if (!p->prefix[t])
2590                         continue;
2591
2592                 STRV_FOREACH(i, c->directories[t].paths) {
2593                         char *e;
2594
2595                         if (t == EXEC_DIRECTORY_RUNTIME)
2596                                 e = strjoin(p->prefix[t], "/", *i);
2597                         else
2598                                 e = strjoin(p->prefix[t], "/private/", *i);
2599                         if (!e)
2600                                 return -ENOMEM;
2601
2602                         r = strv_consume(&list, e);
2603                         if (r < 0)
2604                                 return r;
2605                 }
2606         }
2607
2608         *ret = list;
2609         list = NULL;
2610
2611         return 0;
2612 }
2613
2614 static int exec_child(
2615                 Unit *unit,
2616                 ExecCommand *command,
2617                 const ExecContext *context,
2618                 const ExecParameters *params,
2619                 ExecRuntime *runtime,
2620                 DynamicCreds *dcreds,
2621                 char **argv,
2622                 int socket_fd,
2623                 int named_iofds[3],
2624                 int *fds,
2625                 unsigned n_storage_fds,
2626                 unsigned n_socket_fds,
2627                 char **files_env,
2628                 int user_lookup_fd,
2629                 int *exit_status) {
2630
2631         _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
2632         _cleanup_free_ char *mac_selinux_context_net = NULL, *home_buffer = NULL;
2633         _cleanup_free_ gid_t *supplementary_gids = NULL;
2634         const char *username = NULL, *groupname = NULL;
2635         const char *home = NULL, *shell = NULL;
2636         dev_t journal_stream_dev = 0;
2637         ino_t journal_stream_ino = 0;
2638         bool needs_sandboxing,          /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2639                 needs_setuid,           /* Do we need to do the actual setresuid()/setresgid() calls? */
2640                 needs_mount_namespace,  /* Do we need to set up a mount namespace for this kernel? */
2641                 needs_ambient_hack;     /* Do we need to apply the ambient capabilities hack? */
2642 #if HAVE_SELINUX
2643         bool use_selinux = false;
2644 #endif
2645 #if ENABLE_SMACK
2646         bool use_smack = false;
2647 #endif
2648 #if HAVE_APPARMOR
2649         bool use_apparmor = false;
2650 #endif
2651         uid_t uid = UID_INVALID;
2652         gid_t gid = GID_INVALID;
2653         int i, r, ngids = 0;
2654         unsigned n_fds;
2655         ExecDirectoryType dt;
2656         int secure_bits;
2657
2658         assert(unit);
2659         assert(command);
2660         assert(context);
2661         assert(params);
2662         assert(exit_status);
2663
2664         rename_process_from_path(command->path);
2665
2666         /* We reset exactly these signals, since they are the
2667          * only ones we set to SIG_IGN in the main daemon. All
2668          * others we leave untouched because we set them to
2669          * SIG_DFL or a valid handler initially, both of which
2670          * will be demoted to SIG_DFL. */
2671         (void) default_signals(SIGNALS_CRASH_HANDLER,
2672                                SIGNALS_IGNORE, -1);
2673
2674         if (context->ignore_sigpipe)
2675                 (void) ignore_signals(SIGPIPE, -1);
2676
2677         r = reset_signal_mask();
2678         if (r < 0) {
2679                 *exit_status = EXIT_SIGNAL_MASK;
2680                 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
2681         }
2682
2683         if (params->idle_pipe)
2684                 do_idle_pipe_dance(params->idle_pipe);
2685
2686         /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2687          * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2688          * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2689          * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
2690
2691         log_forget_fds();
2692         log_set_open_when_needed(true);
2693
2694         /* In case anything used libc syslog(), close this here, too */
2695         closelog();
2696
2697         n_fds = n_storage_fds + n_socket_fds;
2698         r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
2699         if (r < 0) {
2700                 *exit_status = EXIT_FDS;
2701                 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
2702         }
2703
2704         if (!context->same_pgrp)
2705                 if (setsid() < 0) {
2706                         *exit_status = EXIT_SETSID;
2707                         return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
2708                 }
2709
2710         exec_context_tty_reset(context, params);
2711
2712         if (unit_shall_confirm_spawn(unit)) {
2713                 const char *vc = params->confirm_spawn;
2714                 _cleanup_free_ char *cmdline = NULL;
2715
2716                 cmdline = exec_command_line(argv);
2717                 if (!cmdline) {
2718                         *exit_status = EXIT_MEMORY;
2719                         return log_oom();
2720                 }
2721
2722                 r = ask_for_confirmation(vc, unit, cmdline);
2723                 if (r != CONFIRM_EXECUTE) {
2724                         if (r == CONFIRM_PRETEND_SUCCESS) {
2725                                 *exit_status = EXIT_SUCCESS;
2726                                 return 0;
2727                         }
2728                         *exit_status = EXIT_CONFIRM;
2729                         log_unit_error(unit, "Execution cancelled by the user");
2730                         return -ECANCELED;
2731                 }
2732         }
2733
2734         if (context->dynamic_user && dcreds) {
2735                 _cleanup_strv_free_ char **suggested_paths = NULL;
2736
2737                 /* Make sure we bypass our own NSS module for any NSS checks */
2738                 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2739                         *exit_status = EXIT_USER;
2740                         return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
2741                 }
2742
2743                 r = compile_suggested_paths(context, params, &suggested_paths);
2744                 if (r < 0) {
2745                         *exit_status = EXIT_MEMORY;
2746                         return log_oom();
2747                 }
2748
2749                 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
2750                 if (r < 0) {
2751                         *exit_status = EXIT_USER;
2752                         if (r == -EILSEQ) {
2753                                 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
2754                                 return -EOPNOTSUPP;
2755                         }
2756                         return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
2757                 }
2758
2759                 if (!uid_is_valid(uid)) {
2760                         *exit_status = EXIT_USER;
2761                         log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
2762                         return -ESRCH;
2763                 }
2764
2765                 if (!gid_is_valid(gid)) {
2766                         *exit_status = EXIT_USER;
2767                         log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
2768                         return -ESRCH;
2769                 }
2770
2771                 if (dcreds->user)
2772                         username = dcreds->user->name;
2773
2774         } else {
2775                 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2776                 if (r < 0) {
2777                         *exit_status = EXIT_USER;
2778                         return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
2779                 }
2780
2781                 r = get_fixed_group(context, &groupname, &gid);
2782                 if (r < 0) {
2783                         *exit_status = EXIT_GROUP;
2784                         return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
2785                 }
2786         }
2787
2788         /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2789         r = get_supplementary_groups(context, username, groupname, gid,
2790                                      &supplementary_gids, &ngids);
2791         if (r < 0) {
2792                 *exit_status = EXIT_GROUP;
2793                 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
2794         }
2795
2796         r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2797         if (r < 0) {
2798                 *exit_status = EXIT_USER;
2799                 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
2800         }
2801
2802         user_lookup_fd = safe_close(user_lookup_fd);
2803
2804         r = acquire_home(context, uid, &home, &home_buffer);
2805         if (r < 0) {
2806                 *exit_status = EXIT_CHDIR;
2807                 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
2808         }
2809
2810         /* If a socket is connected to STDIN/STDOUT/STDERR, we
2811          * must sure to drop O_NONBLOCK */
2812         if (socket_fd >= 0)
2813                 (void) fd_nonblock(socket_fd, false);
2814
2815         r = setup_input(context, params, socket_fd, named_iofds);
2816         if (r < 0) {
2817                 *exit_status = EXIT_STDIN;
2818                 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
2819         }
2820
2821         r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
2822         if (r < 0) {
2823                 *exit_status = EXIT_STDOUT;
2824                 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
2825         }
2826
2827         r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
2828         if (r < 0) {
2829                 *exit_status = EXIT_STDERR;
2830                 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
2831         }
2832
2833         if (params->cgroup_path) {
2834                 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2835                 if (r < 0) {
2836                         *exit_status = EXIT_CGROUP;
2837                         return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path);
2838                 }
2839         }
2840
2841         if (context->oom_score_adjust_set) {
2842                 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
2843
2844                 /* When we can't make this change due to EPERM, then
2845                  * let's silently skip over it. User namespaces
2846                  * prohibit write access to this file, and we
2847                  * shouldn't trip up over that. */
2848
2849                 sprintf(t, "%i", context->oom_score_adjust);
2850                 r = write_string_file("/proc/self/oom_score_adj", t, 0);
2851                 if (IN_SET(r, -EPERM, -EACCES))
2852                         log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
2853                 else if (r < 0) {
2854                         *exit_status = EXIT_OOM_ADJUST;
2855                         return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
2856                 }
2857         }
2858
2859         if (context->nice_set)
2860                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
2861                         *exit_status = EXIT_NICE;
2862                         return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
2863                 }
2864
2865         if (context->cpu_sched_set) {
2866                 struct sched_param param = {
2867                         .sched_priority = context->cpu_sched_priority,
2868                 };
2869
2870                 r = sched_setscheduler(0,
2871                                        context->cpu_sched_policy |
2872                                        (context->cpu_sched_reset_on_fork ?
2873                                         SCHED_RESET_ON_FORK : 0),
2874                                        &param);
2875                 if (r < 0) {
2876                         *exit_status = EXIT_SETSCHEDULER;
2877                         return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
2878                 }
2879         }
2880
2881         if (context->cpuset)
2882                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
2883                         *exit_status = EXIT_CPUAFFINITY;
2884                         return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
2885                 }
2886
2887         if (context->ioprio_set)
2888                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
2889                         *exit_status = EXIT_IOPRIO;
2890                         return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
2891                 }
2892
2893         if (context->timer_slack_nsec != NSEC_INFINITY)
2894                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
2895                         *exit_status = EXIT_TIMERSLACK;
2896                         return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
2897                 }
2898
2899         if (context->personality != PERSONALITY_INVALID) {
2900                 r = safe_personality(context->personality);
2901                 if (r < 0) {
2902                         *exit_status = EXIT_PERSONALITY;
2903                         return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
2904                 }
2905         }
2906
2907         if (context->utmp_id)
2908                 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
2909                                       context->tty_path,
2910                                       context->utmp_mode == EXEC_UTMP_INIT  ? INIT_PROCESS :
2911                                       context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
2912                                       USER_PROCESS,
2913                                       username);
2914
2915         if (context->user) {
2916                 r = chown_terminal(STDIN_FILENO, uid);
2917                 if (r < 0) {
2918                         *exit_status = EXIT_STDIN;
2919                         return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
2920                 }
2921         }
2922
2923         /* If delegation is enabled we'll pass ownership of the cgroup
2924          * (but only in systemd's own controller hierarchy!) to the
2925          * user of the new process. */
2926         if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
2927                 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
2928                 if (r < 0) {
2929                         *exit_status = EXIT_CGROUP;
2930                         return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
2931                 }
2932
2933                 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
2934                 if (r < 0) {
2935                         *exit_status = EXIT_CGROUP;
2936                         return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
2937                 }
2938         }
2939
2940         for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
2941                 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
2942                 if (r < 0)
2943                         return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
2944         }
2945
2946         r = build_environment(
2947                         unit,
2948                         context,
2949                         params,
2950                         n_fds,
2951                         home,
2952                         username,
2953                         shell,
2954                         journal_stream_dev,
2955                         journal_stream_ino,
2956                         &our_env);
2957         if (r < 0) {
2958                 *exit_status = EXIT_MEMORY;
2959                 return log_oom();
2960         }
2961
2962         r = build_pass_environment(context, &pass_env);
2963         if (r < 0) {
2964                 *exit_status = EXIT_MEMORY;
2965                 return log_oom();
2966         }
2967
2968         accum_env = strv_env_merge(5,
2969                                    params->environment,
2970                                    our_env,
2971                                    pass_env,
2972                                    context->environment,
2973                                    files_env,
2974                                    NULL);
2975         if (!accum_env) {
2976                 *exit_status = EXIT_MEMORY;
2977                 return log_oom();
2978         }
2979         accum_env = strv_env_clean(accum_env);
2980
2981         (void) umask(context->umask);
2982
2983         r = setup_keyring(unit, context, params, uid, gid);
2984         if (r < 0) {
2985                 *exit_status = EXIT_KEYRING;
2986                 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
2987         }
2988
2989         /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
2990         needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
2991
2992         /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
2993         needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
2994
2995         /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
2996         if (needs_ambient_hack)
2997                 needs_setuid = false;
2998         else
2999                 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3000
3001         if (needs_sandboxing) {
3002                 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3003                  * present. The actual MAC context application will happen later, as late as possible, to avoid
3004                  * impacting our own code paths. */
3005
3006 #if HAVE_SELINUX
3007                 use_selinux = mac_selinux_use();
3008 #endif
3009 #if ENABLE_SMACK
3010                 use_smack = mac_smack_use();
3011 #endif
3012 #if HAVE_APPARMOR
3013                 use_apparmor = mac_apparmor_use();
3014 #endif
3015         }
3016
3017         if (needs_setuid) {
3018                 if (context->pam_name && username) {
3019                         r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3020                         if (r < 0) {
3021                                 *exit_status = EXIT_PAM;
3022                                 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
3023                         }
3024                 }
3025         }
3026
3027         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
3028                 if (ns_type_supported(NAMESPACE_NET)) {
3029                         r = setup_netns(runtime->netns_storage_socket);
3030                         if (r < 0) {
3031                                 *exit_status = EXIT_NETWORK;
3032                                 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3033                         }
3034                 } else
3035                         log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
3036         }
3037
3038         needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
3039         if (needs_mount_namespace) {
3040                 r = apply_mount_namespace(unit, command, context, params, runtime);
3041                 if (r < 0) {
3042                         *exit_status = EXIT_NAMESPACE;
3043                         return log_unit_error_errno(unit, r, "Failed to set up mount namespacing: %m");
3044                 }
3045         }
3046
3047         /* Apply just after mount namespace setup */
3048         r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
3049         if (r < 0)
3050                 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
3051
3052         /* Drop groups as early as possbile */
3053         if (needs_setuid) {
3054                 r = enforce_groups(gid, supplementary_gids, ngids);
3055                 if (r < 0) {
3056                         *exit_status = EXIT_GROUP;
3057                         return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
3058                 }
3059         }
3060
3061         if (needs_sandboxing) {
3062 #if HAVE_SELINUX
3063                 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
3064                         r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3065                         if (r < 0) {
3066                                 *exit_status = EXIT_SELINUX_CONTEXT;
3067                                 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
3068                         }
3069                 }
3070 #endif
3071
3072                 if (context->private_users) {
3073                         r = setup_private_users(uid, gid);
3074                         if (r < 0) {
3075                                 *exit_status = EXIT_USER;
3076                                 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
3077                         }
3078                 }
3079         }
3080
3081         /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
3082          * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
3083          * was needed to upload the policy and can now be closed as well. */
3084         r = close_all_fds(fds, n_fds);
3085         if (r >= 0)
3086                 r = shift_fds(fds, n_fds);
3087         if (r >= 0)
3088                 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
3089         if (r < 0) {
3090                 *exit_status = EXIT_FDS;
3091                 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
3092         }
3093
3094         secure_bits = context->secure_bits;
3095
3096         if (needs_sandboxing) {
3097                 uint64_t bset;
3098
3099                 for (i = 0; i < _RLIMIT_MAX; i++) {
3100
3101                         if (!context->rlimit[i])
3102                                 continue;
3103
3104                         r = setrlimit_closest(i, context->rlimit[i]);
3105                         if (r < 0) {
3106                                 *exit_status = EXIT_LIMITS;
3107                                 return log_unit_error_errno(unit, r, "Failed to adjust resource limit %s: %m", rlimit_to_string(i));
3108                         }
3109                 }
3110
3111                 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
3112                 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3113                         if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3114                                 *exit_status = EXIT_LIMITS;
3115                                 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
3116                         }
3117                 }
3118
3119                 bset = context->capability_bounding_set;
3120                 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3121                  * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3122                  * instead of us doing that */
3123                 if (needs_ambient_hack)
3124                         bset |= (UINT64_C(1) << CAP_SETPCAP) |
3125                                 (UINT64_C(1) << CAP_SETUID) |
3126                                 (UINT64_C(1) << CAP_SETGID);
3127
3128                 if (!cap_test_all(bset)) {
3129                         r = capability_bounding_set_drop(bset, false);
3130                         if (r < 0) {
3131                                 *exit_status = EXIT_CAPABILITIES;
3132                                 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3133                         }
3134                 }
3135
3136                 /* This is done before enforce_user, but ambient set
3137                  * does not survive over setresuid() if keep_caps is not set. */
3138                 if (!needs_ambient_hack &&
3139                     context->capability_ambient_set != 0) {
3140                         r = capability_ambient_set_apply(context->capability_ambient_set, true);
3141                         if (r < 0) {
3142                                 *exit_status = EXIT_CAPABILITIES;
3143                                 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
3144                         }
3145                 }
3146         }
3147
3148         if (needs_setuid) {
3149                 if (context->user) {
3150                         r = enforce_user(context, uid);
3151                         if (r < 0) {
3152                                 *exit_status = EXIT_USER;
3153                                 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
3154                         }
3155
3156                         if (!needs_ambient_hack &&
3157                             context->capability_ambient_set != 0) {
3158
3159                                 /* Fix the ambient capabilities after user change. */
3160                                 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3161                                 if (r < 0) {
3162                                         *exit_status = EXIT_CAPABILITIES;
3163                                         return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
3164                                 }
3165
3166                                 /* If we were asked to change user and ambient capabilities
3167                                  * were requested, we had to add keep-caps to the securebits
3168                                  * so that we would maintain the inherited capability set
3169                                  * through the setresuid(). Make sure that the bit is added
3170                                  * also to the context secure_bits so that we don't try to
3171                                  * drop the bit away next. */
3172
3173                                 secure_bits |= 1<<SECURE_KEEP_CAPS;
3174                         }
3175                 }
3176         }
3177
3178         if (needs_sandboxing) {
3179                 /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
3180                  * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3181                  * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3182                  * are restricted. */
3183
3184 #if HAVE_SELINUX
3185                 if (use_selinux) {
3186                         char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3187
3188                         if (exec_context) {
3189                                 r = setexeccon(exec_context);
3190                                 if (r < 0) {
3191                                         *exit_status = EXIT_SELINUX_CONTEXT;
3192                                         return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
3193                                 }
3194                         }
3195                 }
3196 #endif
3197
3198 #if ENABLE_SMACK
3199                 if (use_smack) {
3200                         r = setup_smack(context, command);
3201                         if (r < 0) {
3202                                 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3203                                 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3204                         }
3205                 }
3206 #endif
3207
3208 #if HAVE_APPARMOR
3209                 if (use_apparmor && context->apparmor_profile) {
3210                         r = aa_change_onexec(context->apparmor_profile);
3211                         if (r < 0 && !context->apparmor_profile_ignore) {
3212                                 *exit_status = EXIT_APPARMOR_PROFILE;
3213                                 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
3214                         }
3215                 }
3216 #endif
3217
3218                 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3219                  * we'll try not to call PR_SET_SECUREBITS unless necessary. */
3220                 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3221                         if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
3222                                 *exit_status = EXIT_SECUREBITS;
3223                                 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
3224                         }
3225
3226                 if (context_has_no_new_privileges(context))
3227                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
3228                                 *exit_status = EXIT_NO_NEW_PRIVILEGES;
3229                                 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
3230                         }
3231
3232 #if HAVE_SECCOMP
3233                 r = apply_address_families(unit, context);
3234                 if (r < 0) {
3235                         *exit_status = EXIT_ADDRESS_FAMILIES;
3236                         return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
3237                 }
3238
3239                 r = apply_memory_deny_write_execute(unit, context);
3240                 if (r < 0) {
3241                         *exit_status = EXIT_SECCOMP;
3242                         return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
3243                 }
3244
3245                 r = apply_restrict_realtime(unit, context);
3246                 if (r < 0) {
3247                         *exit_status = EXIT_SECCOMP;
3248                         return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
3249                 }
3250
3251                 r = apply_restrict_namespaces(unit, context);
3252                 if (r < 0) {
3253                         *exit_status = EXIT_SECCOMP;
3254                         return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
3255                 }
3256
3257                 r = apply_protect_sysctl(unit, context);
3258                 if (r < 0) {
3259                         *exit_status = EXIT_SECCOMP;
3260                         return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
3261                 }
3262
3263                 r = apply_protect_kernel_modules(unit, context);
3264                 if (r < 0) {
3265                         *exit_status = EXIT_SECCOMP;
3266                         return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
3267                 }
3268
3269                 r = apply_private_devices(unit, context);
3270                 if (r < 0) {
3271                         *exit_status = EXIT_SECCOMP;
3272                         return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
3273                 }
3274
3275                 r = apply_syscall_archs(unit, context);
3276                 if (r < 0) {
3277                         *exit_status = EXIT_SECCOMP;
3278                         return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
3279                 }
3280
3281                 r = apply_lock_personality(unit, context);
3282                 if (r < 0) {
3283                         *exit_status = EXIT_SECCOMP;
3284                         return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
3285                 }
3286
3287                 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3288                  * by the filter as little as possible. */
3289                 r = apply_syscall_filter(unit, context, needs_ambient_hack);
3290                 if (r < 0) {
3291                         *exit_status = EXIT_SECCOMP;
3292                         return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
3293                 }
3294 #endif
3295         }
3296
3297         if (!strv_isempty(context->unset_environment)) {
3298                 char **ee = NULL;
3299
3300                 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3301                 if (!ee) {
3302                         *exit_status = EXIT_MEMORY;
3303                         return log_oom();
3304                 }
3305
3306                 strv_free(accum_env);
3307                 accum_env = ee;
3308         }
3309
3310         final_argv = replace_env_argv(argv, accum_env);
3311         if (!final_argv) {
3312                 *exit_status = EXIT_MEMORY;
3313                 return log_oom();
3314         }
3315
3316         if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
3317                 _cleanup_free_ char *line;
3318
3319                 line = exec_command_line(final_argv);
3320                 if (line) {
3321                         log_struct(LOG_DEBUG,
3322                                    "EXECUTABLE=%s", command->path,
3323                                    LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
3324                                    LOG_UNIT_ID(unit),
3325                                    LOG_UNIT_INVOCATION_ID(unit),
3326                                    NULL);
3327                 }
3328         }
3329
3330         execve(command->path, final_argv, accum_env);
3331
3332         if (errno == ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3333
3334                 log_struct_errno(LOG_INFO, errno,
3335                                  "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3336                                  LOG_UNIT_ID(unit),
3337                                  LOG_UNIT_INVOCATION_ID(unit),
3338                                  LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3339                                                   command->path),
3340                                  "EXECUTABLE=%s", command->path,
3341                                  NULL);
3342
3343                 return 0;
3344         }
3345
3346         *exit_status = EXIT_EXEC;
3347         return log_unit_error_errno(unit, errno, "Failed to execute command: %m");
3348 }
3349
3350 int exec_spawn(Unit *unit,
3351                ExecCommand *command,
3352                const ExecContext *context,
3353                const ExecParameters *params,
3354                ExecRuntime *runtime,
3355                DynamicCreds *dcreds,
3356                pid_t *ret) {
3357
3358         _cleanup_strv_free_ char **files_env = NULL;
3359         int *fds = NULL;
3360         unsigned n_storage_fds = 0, n_socket_fds = 0;
3361         _cleanup_free_ char *line = NULL;
3362         int socket_fd, r;
3363         int named_iofds[3] = { -1, -1, -1 };
3364         char **argv;
3365         pid_t pid;
3366
3367         assert(unit);
3368         assert(command);
3369         assert(context);
3370         assert(ret);
3371         assert(params);
3372         assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
3373
3374         if (context->std_input == EXEC_INPUT_SOCKET ||
3375             context->std_output == EXEC_OUTPUT_SOCKET ||
3376             context->std_error == EXEC_OUTPUT_SOCKET) {
3377
3378                 if (params->n_socket_fds > 1) {
3379                         log_unit_error(unit, "Got more than one socket.");
3380                         return -EINVAL;
3381                 }
3382
3383                 if (params->n_socket_fds == 0) {
3384                         log_unit_error(unit, "Got no socket.");
3385                         return -EINVAL;
3386                 }
3387
3388                 socket_fd = params->fds[0];
3389         } else {
3390                 socket_fd = -1;
3391                 fds = params->fds;
3392                 n_storage_fds = params->n_storage_fds;
3393                 n_socket_fds = params->n_socket_fds;
3394         }
3395
3396         r = exec_context_named_iofds(unit, context, params, named_iofds);
3397         if (r < 0)
3398                 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3399
3400         r = exec_context_load_environment(unit, context, &files_env);
3401         if (r < 0)
3402                 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
3403
3404         argv = params->argv ?: command->argv;
3405         line = exec_command_line(argv);
3406         if (!line)
3407                 return log_oom();
3408
3409         log_struct(LOG_DEBUG,
3410                    LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3411                    "EXECUTABLE=%s", command->path,
3412                    LOG_UNIT_ID(unit),
3413                    LOG_UNIT_INVOCATION_ID(unit),
3414                    NULL);
3415
3416         pid = fork();
3417         if (pid < 0)
3418                 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
3419
3420         if (pid == 0) {
3421                 int exit_status = EXIT_SUCCESS;
3422
3423                 r = exec_child(unit,
3424                                command,
3425                                context,
3426                                params,
3427                                runtime,
3428                                dcreds,
3429                                argv,
3430                                socket_fd,
3431                                named_iofds,
3432                                fds,
3433                                n_storage_fds,
3434                                n_socket_fds,
3435                                files_env,
3436                                unit->manager->user_lookup_fds[1],
3437                                &exit_status);
3438
3439                 if (r < 0) {
3440                         log_struct_errno(LOG_ERR, r,
3441                                          "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3442                                          LOG_UNIT_ID(unit),
3443                                          LOG_UNIT_INVOCATION_ID(unit),
3444                                          LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3445                                                           exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3446                                                           command->path),
3447                                          "EXECUTABLE=%s", command->path,
3448                                          NULL);
3449                 }
3450
3451                 _exit(exit_status);
3452         }
3453
3454         log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
3455
3456         /* We add the new process to the cgroup both in the child (so
3457          * that we can be sure that no user code is ever executed
3458          * outside of the cgroup) and in the parent (so that we can be
3459          * sure that when we kill the cgroup the process will be
3460          * killed too). */
3461         if (params->cgroup_path)
3462                 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
3463
3464         exec_status_start(&command->exec_status, pid);
3465
3466         *ret = pid;
3467         return 0;
3468 }
3469
3470 void exec_context_init(ExecContext *c) {
3471         ExecDirectoryType i;
3472
3473         assert(c);
3474
3475         c->umask = 0022;
3476         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
3477         c->cpu_sched_policy = SCHED_OTHER;
3478         c->syslog_priority = LOG_DAEMON|LOG_INFO;
3479         c->syslog_level_prefix = true;
3480         c->ignore_sigpipe = true;
3481         c->timer_slack_nsec = NSEC_INFINITY;
3482         c->personality = PERSONALITY_INVALID;
3483         for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3484                 c->directories[i].mode = 0755;
3485         c->capability_bounding_set = CAP_ALL;
3486         c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
3487         c->log_level_max = -1;
3488 }
3489
3490 void exec_context_done(ExecContext *c) {
3491         ExecDirectoryType i;
3492         size_t l;
3493
3494         assert(c);
3495
3496         c->environment = strv_free(c->environment);
3497         c->environment_files = strv_free(c->environment_files);
3498         c->pass_environment = strv_free(c->pass_environment);
3499         c->unset_environment = strv_free(c->unset_environment);
3500
3501         for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
3502                 c->rlimit[l] = mfree(c->rlimit[l]);
3503
3504         for (l = 0; l < 3; l++)
3505                 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
3506
3507         c->working_directory = mfree(c->working_directory);
3508         c->root_directory = mfree(c->root_directory);
3509         c->root_image = mfree(c->root_image);
3510         c->tty_path = mfree(c->tty_path);
3511         c->syslog_identifier = mfree(c->syslog_identifier);
3512         c->user = mfree(c->user);
3513         c->group = mfree(c->group);
3514
3515         c->supplementary_groups = strv_free(c->supplementary_groups);
3516
3517         c->pam_name = mfree(c->pam_name);
3518
3519         c->read_only_paths = strv_free(c->read_only_paths);
3520         c->read_write_paths = strv_free(c->read_write_paths);
3521         c->inaccessible_paths = strv_free(c->inaccessible_paths);
3522
3523         bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
3524
3525         if (c->cpuset)
3526                 CPU_FREE(c->cpuset);
3527
3528         c->utmp_id = mfree(c->utmp_id);
3529         c->selinux_context = mfree(c->selinux_context);
3530         c->apparmor_profile = mfree(c->apparmor_profile);
3531         c->smack_process_label = mfree(c->smack_process_label);
3532
3533         c->syscall_filter = hashmap_free(c->syscall_filter);
3534         c->syscall_archs = set_free(c->syscall_archs);
3535         c->address_families = set_free(c->address_families);
3536
3537         for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3538                 c->directories[i].paths = strv_free(c->directories[i].paths);
3539
3540         c->log_level_max = -1;
3541
3542         exec_context_free_log_extra_fields(c);
3543 }
3544
3545 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
3546         char **i;
3547
3548         assert(c);
3549
3550         if (!runtime_prefix)
3551                 return 0;
3552
3553         STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
3554                 _cleanup_free_ char *p;
3555
3556                 p = strjoin(runtime_prefix, "/", *i);
3557                 if (!p)
3558                         return -ENOMEM;
3559
3560                 /* We execute this synchronously, since we need to be sure this is gone when we start the service
3561                  * next. */
3562                 (void) rm_rf(p, REMOVE_ROOT);
3563         }
3564
3565         return 0;
3566 }
3567
3568 void exec_command_done(ExecCommand *c) {
3569         assert(c);
3570
3571         c->path = mfree(c->path);
3572
3573         c->argv = strv_free(c->argv);
3574 }
3575
3576 void exec_command_done_array(ExecCommand *c, unsigned n) {
3577         unsigned i;
3578
3579         for (i = 0; i < n; i++)
3580                 exec_command_done(c+i);
3581 }
3582
3583 ExecCommand* exec_command_free_list(ExecCommand *c) {
3584         ExecCommand *i;
3585
3586         while ((i = c)) {
3587                 LIST_REMOVE(command, c, i);
3588                 exec_command_done(i);
3589                 free(i);
3590         }
3591
3592         return NULL;
3593 }
3594
3595 void exec_command_free_array(ExecCommand **c, unsigned n) {
3596         unsigned i;
3597
3598         for (i = 0; i < n; i++)
3599                 c[i] = exec_command_free_list(c[i]);
3600 }
3601
3602 typedef struct InvalidEnvInfo {
3603         Unit *unit;
3604         const char *path;
3605 } InvalidEnvInfo;
3606
3607 static void invalid_env(const char *p, void *userdata) {
3608         InvalidEnvInfo *info = userdata;
3609
3610         log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
3611 }
3612
3613 const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3614         assert(c);
3615
3616         switch (fd_index) {
3617         case STDIN_FILENO:
3618                 if (c->std_input != EXEC_INPUT_NAMED_FD)
3619                         return NULL;
3620                 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
3621         case STDOUT_FILENO:
3622                 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3623                         return NULL;
3624                 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
3625         case STDERR_FILENO:
3626                 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3627                         return NULL;
3628                 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
3629         default:
3630                 return NULL;
3631         }
3632 }
3633
3634 int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
3635         unsigned i, targets;
3636         const char* stdio_fdname[3];
3637         unsigned n_fds;
3638
3639         assert(c);
3640         assert(p);
3641
3642         targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3643                   (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3644                   (c->std_error == EXEC_OUTPUT_NAMED_FD);
3645
3646         for (i = 0; i < 3; i++)
3647                 stdio_fdname[i] = exec_context_fdname(c, i);
3648
3649         n_fds = p->n_storage_fds + p->n_socket_fds;
3650
3651         for (i = 0; i < n_fds  && targets > 0; i++)
3652                 if (named_iofds[STDIN_FILENO] < 0 &&
3653                     c->std_input == EXEC_INPUT_NAMED_FD &&
3654                     stdio_fdname[STDIN_FILENO] &&
3655                     streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3656
3657                         named_iofds[STDIN_FILENO] = p->fds[i];
3658                         targets--;
3659
3660                 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3661                            c->std_output == EXEC_OUTPUT_NAMED_FD &&
3662                            stdio_fdname[STDOUT_FILENO] &&
3663                            streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3664
3665                         named_iofds[STDOUT_FILENO] = p->fds[i];
3666                         targets--;
3667
3668                 } else if (named_iofds[STDERR_FILENO] < 0 &&
3669                            c->std_error == EXEC_OUTPUT_NAMED_FD &&
3670                            stdio_fdname[STDERR_FILENO] &&
3671                            streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3672
3673                         named_iofds[STDERR_FILENO] = p->fds[i];
3674                         targets--;
3675                 }
3676
3677         return targets == 0 ? 0 : -ENOENT;
3678 }
3679
3680 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
3681         char **i, **r = NULL;
3682
3683         assert(c);
3684         assert(l);
3685
3686         STRV_FOREACH(i, c->environment_files) {
3687                 char *fn;
3688                 int k;
3689                 unsigned n;
3690                 bool ignore = false;
3691                 char **p;
3692                 _cleanup_globfree_ glob_t pglob = {};
3693
3694                 fn = *i;
3695
3696                 if (fn[0] == '-') {
3697                         ignore = true;
3698                         fn++;
3699                 }
3700
3701                 if (!path_is_absolute(fn)) {
3702                         if (ignore)
3703                                 continue;
3704
3705                         strv_free(r);
3706                         return -EINVAL;
3707                 }
3708
3709                 /* Filename supports globbing, take all matching files */
3710                 k = safe_glob(fn, 0, &pglob);
3711                 if (k < 0) {
3712                         if (ignore)
3713                                 continue;
3714
3715                         strv_free(r);
3716                         return k;
3717                 }
3718
3719                 /* When we don't match anything, -ENOENT should be returned */
3720                 assert(pglob.gl_pathc > 0);
3721
3722                 for (n = 0; n < pglob.gl_pathc; n++) {
3723                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
3724                         if (k < 0) {
3725                                 if (ignore)
3726                                         continue;
3727
3728                                 strv_free(r);
3729                                 return k;
3730                         }
3731                         /* Log invalid environment variables with filename */
3732                         if (p) {
3733                                 InvalidEnvInfo info = {
3734                                         .unit = unit,
3735                                         .path = pglob.gl_pathv[n]
3736                                 };
3737
3738                                 p = strv_env_clean_with_callback(p, invalid_env, &info);
3739                         }
3740
3741                         if (r == NULL)
3742                                 r = p;
3743                         else {
3744                                 char **m;
3745
3746                                 m = strv_env_merge(2, r, p);
3747                                 strv_free(r);
3748                                 strv_free(p);
3749                                 if (!m)
3750                                         return -ENOMEM;
3751
3752                                 r = m;
3753                         }
3754                 }
3755         }
3756
3757         *l = r;
3758
3759         return 0;
3760 }
3761
3762 static bool tty_may_match_dev_console(const char *tty) {
3763         _cleanup_free_ char *active = NULL;
3764         char *console;
3765
3766         if (!tty)
3767                 return true;
3768
3769         tty = skip_dev_prefix(tty);
3770
3771         /* trivial identity? */
3772         if (streq(tty, "console"))
3773                 return true;
3774
3775         console = resolve_dev_console(&active);
3776         /* if we could not resolve, assume it may */
3777         if (!console)
3778                 return true;
3779
3780         /* "tty0" means the active VC, so it may be the same sometimes */
3781         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
3782 }
3783
3784 bool exec_context_may_touch_console(ExecContext *ec) {
3785
3786         return (ec->tty_reset ||
3787                 ec->tty_vhangup ||
3788                 ec->tty_vt_disallocate ||
3789                 is_terminal_input(ec->std_input) ||
3790                 is_terminal_output(ec->std_output) ||
3791                 is_terminal_output(ec->std_error)) &&
3792                tty_may_match_dev_console(exec_context_tty_path(ec));
3793 }
3794
3795 static void strv_fprintf(FILE *f, char **l) {
3796         char **g;
3797
3798         assert(f);
3799
3800         STRV_FOREACH(g, l)
3801                 fprintf(f, " %s", *g);
3802 }
3803
3804 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
3805         ExecDirectoryType dt;
3806         char **e, **d;
3807         unsigned i;
3808         int r;
3809
3810         assert(c);
3811         assert(f);
3812
3813         prefix = strempty(prefix);
3814
3815         fprintf(f,
3816                 "%sUMask: %04o\n"
3817                 "%sWorkingDirectory: %s\n"
3818                 "%sRootDirectory: %s\n"
3819                 "%sNonBlocking: %s\n"
3820                 "%sPrivateTmp: %s\n"
3821                 "%sPrivateDevices: %s\n"
3822                 "%sProtectKernelTunables: %s\n"
3823                 "%sProtectKernelModules: %s\n"
3824                 "%sProtectControlGroups: %s\n"
3825                 "%sPrivateNetwork: %s\n"
3826                 "%sPrivateUsers: %s\n"
3827                 "%sProtectHome: %s\n"
3828                 "%sProtectSystem: %s\n"
3829                 "%sMountAPIVFS: %s\n"
3830                 "%sIgnoreSIGPIPE: %s\n"
3831                 "%sMemoryDenyWriteExecute: %s\n"
3832                 "%sRestrictRealtime: %s\n"
3833                 "%sKeyringMode: %s\n",
3834                 prefix, c->umask,
3835                 prefix, c->working_directory ? c->working_directory : "/",
3836                 prefix, c->root_directory ? c->root_directory : "/",
3837                 prefix, yes_no(c->non_blocking),
3838                 prefix, yes_no(c->private_tmp),
3839                 prefix, yes_no(c->private_devices),
3840                 prefix, yes_no(c->protect_kernel_tunables),
3841                 prefix, yes_no(c->protect_kernel_modules),
3842                 prefix, yes_no(c->protect_control_groups),
3843                 prefix, yes_no(c->private_network),
3844                 prefix, yes_no(c->private_users),
3845                 prefix, protect_home_to_string(c->protect_home),
3846                 prefix, protect_system_to_string(c->protect_system),
3847                 prefix, yes_no(c->mount_apivfs),
3848                 prefix, yes_no(c->ignore_sigpipe),
3849                 prefix, yes_no(c->memory_deny_write_execute),
3850                 prefix, yes_no(c->restrict_realtime),
3851                 prefix, exec_keyring_mode_to_string(c->keyring_mode));
3852
3853         if (c->root_image)
3854                 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
3855
3856         STRV_FOREACH(e, c->environment)
3857                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3858
3859         STRV_FOREACH(e, c->environment_files)
3860                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
3861
3862         STRV_FOREACH(e, c->pass_environment)
3863                 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3864
3865         STRV_FOREACH(e, c->unset_environment)
3866                 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
3867
3868         fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
3869
3870         for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3871                 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
3872
3873                 STRV_FOREACH(d, c->directories[dt].paths)
3874                         fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
3875         }
3876
3877         if (c->nice_set)
3878                 fprintf(f,
3879                         "%sNice: %i\n",
3880                         prefix, c->nice);
3881
3882         if (c->oom_score_adjust_set)
3883                 fprintf(f,
3884                         "%sOOMScoreAdjust: %i\n",
3885                         prefix, c->oom_score_adjust);
3886
3887         for (i = 0; i < RLIM_NLIMITS; i++)
3888                 if (c->rlimit[i]) {
3889                         fprintf(f, "%s%s: " RLIM_FMT "\n",
3890                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
3891                         fprintf(f, "%s%sSoft: " RLIM_FMT "\n",
3892                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3893                 }
3894
3895         if (c->ioprio_set) {
3896                 _cleanup_free_ char *class_str = NULL;
3897
3898                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
3899                 if (r >= 0)
3900                         fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
3901
3902                 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
3903         }
3904
3905         if (c->cpu_sched_set) {
3906                 _cleanup_free_ char *policy_str = NULL;
3907
3908                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
3909                 if (r >= 0)
3910                         fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
3911
3912                 fprintf(f,
3913                         "%sCPUSchedulingPriority: %i\n"
3914                         "%sCPUSchedulingResetOnFork: %s\n",
3915                         prefix, c->cpu_sched_priority,
3916                         prefix, yes_no(c->cpu_sched_reset_on_fork));
3917         }
3918
3919         if (c->cpuset) {
3920                 fprintf(f, "%sCPUAffinity:", prefix);
3921                 for (i = 0; i < c->cpuset_ncpus; i++)
3922                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
3923                                 fprintf(f, " %u", i);
3924                 fputs("\n", f);
3925         }
3926
3927         if (c->timer_slack_nsec != NSEC_INFINITY)
3928                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
3929
3930         fprintf(f,
3931                 "%sStandardInput: %s\n"
3932                 "%sStandardOutput: %s\n"
3933                 "%sStandardError: %s\n",
3934                 prefix, exec_input_to_string(c->std_input),
3935                 prefix, exec_output_to_string(c->std_output),
3936                 prefix, exec_output_to_string(c->std_error));
3937
3938         if (c->tty_path)
3939                 fprintf(f,
3940                         "%sTTYPath: %s\n"
3941                         "%sTTYReset: %s\n"
3942                         "%sTTYVHangup: %s\n"
3943                         "%sTTYVTDisallocate: %s\n",
3944                         prefix, c->tty_path,
3945                         prefix, yes_no(c->tty_reset),
3946                         prefix, yes_no(c->tty_vhangup),
3947                         prefix, yes_no(c->tty_vt_disallocate));
3948
3949         if (IN_SET(c->std_output,
3950                    EXEC_OUTPUT_SYSLOG,
3951                    EXEC_OUTPUT_KMSG,
3952                    EXEC_OUTPUT_JOURNAL,
3953                    EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3954                    EXEC_OUTPUT_KMSG_AND_CONSOLE,
3955                    EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
3956             IN_SET(c->std_error,
3957                    EXEC_OUTPUT_SYSLOG,
3958                    EXEC_OUTPUT_KMSG,
3959                    EXEC_OUTPUT_JOURNAL,
3960                    EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3961                    EXEC_OUTPUT_KMSG_AND_CONSOLE,
3962                    EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
3963
3964                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
3965
3966                 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
3967                 if (r >= 0)
3968                         fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
3969
3970                 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
3971                 if (r >= 0)
3972                         fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
3973         }
3974
3975         if (c->log_level_max >= 0) {
3976                 _cleanup_free_ char *t = NULL;
3977
3978                 (void) log_level_to_string_alloc(c->log_level_max, &t);
3979
3980                 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
3981         }
3982
3983         if (c->n_log_extra_fields > 0) {
3984                 size_t j;
3985
3986                 for (j = 0; j < c->n_log_extra_fields; j++) {
3987                         fprintf(f, "%sLogExtraFields: ", prefix);
3988                         fwrite(c->log_extra_fields[j].iov_base,
3989                                1, c->log_extra_fields[j].iov_len,
3990                                f);
3991                         fputc('\n', f);
3992                 }
3993         }
3994
3995         if (c->secure_bits) {
3996                 _cleanup_free_ char *str = NULL;
3997
3998                 r = secure_bits_to_string_alloc(c->secure_bits, &str);
3999                 if (r >= 0)
4000                         fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4001         }
4002
4003         if (c->capability_bounding_set != CAP_ALL) {
4004                 _cleanup_free_ char *str = NULL;
4005
4006                 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4007                 if (r >= 0)
4008                         fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
4009         }
4010
4011         if (c->capability_ambient_set != 0) {
4012                 _cleanup_free_ char *str = NULL;
4013
4014                 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4015                 if (r >= 0)
4016                         fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
4017         }
4018
4019         if (c->user)
4020                 fprintf(f, "%sUser: %s\n", prefix, c->user);
4021         if (c->group)
4022                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
4023
4024         fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4025
4026         if (!strv_isempty(c->supplementary_groups)) {
4027                 fprintf(f, "%sSupplementaryGroups:", prefix);
4028                 strv_fprintf(f, c->supplementary_groups);
4029                 fputs("\n", f);
4030         }
4031
4032         if (c->pam_name)
4033                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
4034
4035         if (strv_length(c->read_write_paths) > 0) {
4036                 fprintf(f, "%sReadWritePaths:", prefix);
4037                 strv_fprintf(f, c->read_write_paths);
4038                 fputs("\n", f);
4039         }
4040
4041         if (strv_length(c->read_only_paths) > 0) {
4042                 fprintf(f, "%sReadOnlyPaths:", prefix);
4043                 strv_fprintf(f, c->read_only_paths);
4044                 fputs("\n", f);
4045         }
4046
4047         if (strv_length(c->inaccessible_paths) > 0) {
4048                 fprintf(f, "%sInaccessiblePaths:", prefix);
4049                 strv_fprintf(f, c->inaccessible_paths);
4050                 fputs("\n", f);
4051         }
4052
4053         if (c->n_bind_mounts > 0)
4054                 for (i = 0; i < c->n_bind_mounts; i++) {
4055                         fprintf(f, "%s%s: %s:%s:%s\n", prefix,
4056                                 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4057                                 c->bind_mounts[i].source,
4058                                 c->bind_mounts[i].destination,
4059                                 c->bind_mounts[i].recursive ? "rbind" : "norbind");
4060                 }
4061
4062         if (c->utmp_id)
4063                 fprintf(f,
4064                         "%sUtmpIdentifier: %s\n",
4065                         prefix, c->utmp_id);
4066
4067         if (c->selinux_context)
4068                 fprintf(f,
4069                         "%sSELinuxContext: %s%s\n",
4070                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
4071
4072         if (c->apparmor_profile)
4073                 fprintf(f,
4074                         "%sAppArmorProfile: %s%s\n",
4075                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4076
4077         if (c->smack_process_label)
4078                 fprintf(f,
4079                         "%sSmackProcessLabel: %s%s\n",
4080                         prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4081
4082         if (c->personality != PERSONALITY_INVALID)
4083                 fprintf(f,
4084                         "%sPersonality: %s\n",
4085                         prefix, strna(personality_to_string(c->personality)));
4086
4087         fprintf(f,
4088                 "%sLockPersonality: %s\n",
4089                 prefix, yes_no(c->lock_personality));
4090
4091         if (c->syscall_filter) {
4092 #if HAVE_SECCOMP
4093                 Iterator j;
4094                 void *id, *val;
4095                 bool first = true;
4096 #endif
4097
4098                 fprintf(f,
4099                         "%sSystemCallFilter: ",
4100                         prefix);
4101
4102                 if (!c->syscall_whitelist)
4103                         fputc('~', f);
4104
4105 #if HAVE_SECCOMP
4106                 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
4107                         _cleanup_free_ char *name = NULL;
4108                         const char *errno_name = NULL;
4109                         int num = PTR_TO_INT(val);
4110
4111                         if (first)
4112                                 first = false;
4113                         else
4114                                 fputc(' ', f);
4115
4116                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
4117                         fputs(strna(name), f);
4118
4119                         if (num >= 0) {
4120                                 errno_name = errno_to_name(num);
4121                                 if (errno_name)
4122                                         fprintf(f, ":%s", errno_name);
4123                                 else
4124                                         fprintf(f, ":%d", num);
4125                         }
4126                 }
4127 #endif
4128
4129                 fputc('\n', f);
4130         }
4131
4132         if (c->syscall_archs) {
4133 #if HAVE_SECCOMP
4134                 Iterator j;
4135                 void *id;
4136 #endif
4137
4138                 fprintf(f,
4139                         "%sSystemCallArchitectures:",
4140                         prefix);
4141
4142 #if HAVE_SECCOMP
4143                 SET_FOREACH(id, c->syscall_archs, j)
4144                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4145 #endif
4146                 fputc('\n', f);
4147         }
4148
4149         if (exec_context_restrict_namespaces_set(c)) {
4150                 _cleanup_free_ char *s = NULL;
4151
4152                 r = namespace_flag_to_string_many(c->restrict_namespaces, &s);
4153                 if (r >= 0)
4154                         fprintf(f, "%sRestrictNamespaces: %s\n",
4155                                 prefix, s);
4156         }
4157
4158         if (c->syscall_errno > 0) {
4159                 const char *errno_name;
4160
4161                 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4162
4163                 errno_name = errno_to_name(c->syscall_errno);
4164                 if (errno_name)
4165                         fprintf(f, "%s\n", errno_name);
4166                 else
4167                         fprintf(f, "%d\n", c->syscall_errno);
4168         }
4169
4170         if (c->apparmor_profile)
4171                 fprintf(f,
4172                         "%sAppArmorProfile: %s%s\n",
4173                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4174 }
4175
4176 bool exec_context_maintains_privileges(ExecContext *c) {
4177         assert(c);
4178
4179         /* Returns true if the process forked off would run under
4180          * an unchanged UID or as root. */
4181
4182         if (!c->user)
4183                 return true;
4184
4185         if (streq(c->user, "root") || streq(c->user, "0"))
4186                 return true;
4187
4188         return false;
4189 }
4190
4191 int exec_context_get_effective_ioprio(ExecContext *c) {
4192         int p;
4193
4194         assert(c);
4195
4196         if (c->ioprio_set)
4197                 return c->ioprio;
4198
4199         p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4200         if (p < 0)
4201                 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4202
4203         return p;
4204 }
4205
4206 void exec_context_free_log_extra_fields(ExecContext *c) {
4207         size_t l;
4208
4209         assert(c);
4210
4211         for (l = 0; l < c->n_log_extra_fields; l++)
4212                 free(c->log_extra_fields[l].iov_base);
4213         c->log_extra_fields = mfree(c->log_extra_fields);
4214         c->n_log_extra_fields = 0;
4215 }
4216
4217 void exec_status_start(ExecStatus *s, pid_t pid) {
4218         assert(s);
4219
4220         zero(*s);
4221         s->pid = pid;
4222         dual_timestamp_get(&s->start_timestamp);
4223 }
4224
4225 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
4226         assert(s);
4227
4228         if (s->pid && s->pid != pid)
4229                 zero(*s);
4230
4231         s->pid = pid;
4232         dual_timestamp_get(&s->exit_timestamp);
4233
4234         s->code = code;
4235         s->status = status;
4236
4237         if (context) {
4238                 if (context->utmp_id)
4239                         utmp_put_dead_process(context->utmp_id, pid, code, status);
4240
4241                 exec_context_tty_reset(context, NULL);
4242         }
4243 }
4244
4245 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
4246         char buf[FORMAT_TIMESTAMP_MAX];
4247
4248         assert(s);
4249         assert(f);
4250
4251         if (s->pid <= 0)
4252                 return;
4253
4254         prefix = strempty(prefix);
4255
4256         fprintf(f,
4257                 "%sPID: "PID_FMT"\n",
4258                 prefix, s->pid);
4259
4260         if (dual_timestamp_is_set(&s->start_timestamp))
4261                 fprintf(f,
4262                         "%sStart Timestamp: %s\n",
4263                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
4264
4265         if (dual_timestamp_is_set(&s->exit_timestamp))
4266                 fprintf(f,
4267                         "%sExit Timestamp: %s\n"
4268                         "%sExit Code: %s\n"
4269                         "%sExit Status: %i\n",
4270                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
4271                         prefix, sigchld_code_to_string(s->code),
4272                         prefix, s->status);
4273 }
4274
4275 char *exec_command_line(char **argv) {
4276         size_t k;
4277         char *n, *p, **a;
4278         bool first = true;
4279
4280         assert(argv);
4281
4282         k = 1;
4283         STRV_FOREACH(a, argv)
4284                 k += strlen(*a)+3;
4285
4286         n = new(char, k);
4287         if (!n)
4288                 return NULL;
4289
4290         p = n;
4291         STRV_FOREACH(a, argv) {
4292
4293                 if (!first)
4294                         *(p++) = ' ';
4295                 else
4296                         first = false;
4297
4298                 if (strpbrk(*a, WHITESPACE)) {
4299                         *(p++) = '\'';
4300                         p = stpcpy(p, *a);
4301                         *(p++) = '\'';
4302                 } else
4303                         p = stpcpy(p, *a);
4304
4305         }
4306
4307         *p = 0;
4308
4309         /* FIXME: this doesn't really handle arguments that have
4310          * spaces and ticks in them */
4311
4312         return n;
4313 }
4314
4315 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
4316         _cleanup_free_ char *cmd = NULL;
4317         const char *prefix2;
4318
4319         assert(c);
4320         assert(f);
4321
4322         prefix = strempty(prefix);
4323         prefix2 = strjoina(prefix, "\t");
4324
4325         cmd = exec_command_line(c->argv);
4326         fprintf(f,
4327                 "%sCommand Line: %s\n",
4328                 prefix, cmd ? cmd : strerror(ENOMEM));
4329
4330         exec_status_dump(&c->exec_status, f, prefix2);
4331 }
4332
4333 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4334         assert(f);
4335
4336         prefix = strempty(prefix);
4337
4338         LIST_FOREACH(command, c, c)
4339                 exec_command_dump(c, f, prefix);
4340 }
4341
4342 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4343         ExecCommand *end;
4344
4345         assert(l);
4346         assert(e);
4347
4348         if (*l) {
4349                 /* It's kind of important, that we keep the order here */
4350                 LIST_FIND_TAIL(command, *l, end);
4351                 LIST_INSERT_AFTER(command, *l, end, e);
4352         } else
4353               *l = e;
4354 }
4355
4356 int exec_command_set(ExecCommand *c, const char *path, ...) {
4357         va_list ap;
4358         char **l, *p;
4359
4360         assert(c);
4361         assert(path);
4362
4363         va_start(ap, path);
4364         l = strv_new_ap(path, ap);
4365         va_end(ap);
4366
4367         if (!l)
4368                 return -ENOMEM;
4369
4370         p = strdup(path);
4371         if (!p) {
4372                 strv_free(l);
4373                 return -ENOMEM;
4374         }
4375
4376         free(c->path);
4377         c->path = p;
4378
4379         strv_free(c->argv);
4380         c->argv = l;
4381
4382         return 0;
4383 }
4384
4385 int exec_command_append(ExecCommand *c, const char *path, ...) {
4386         _cleanup_strv_free_ char **l = NULL;
4387         va_list ap;
4388         int r;
4389
4390         assert(c);
4391         assert(path);
4392
4393         va_start(ap, path);
4394         l = strv_new_ap(path, ap);
4395         va_end(ap);
4396
4397         if (!l)
4398                 return -ENOMEM;
4399
4400         r = strv_extend_strv(&c->argv, l, false);
4401         if (r < 0)
4402                 return r;
4403
4404         return 0;
4405 }
4406
4407
4408 static int exec_runtime_allocate(ExecRuntime **rt) {
4409
4410         if (*rt)
4411                 return 0;
4412
4413         *rt = new0(ExecRuntime, 1);
4414         if (!*rt)
4415                 return -ENOMEM;
4416
4417         (*rt)->n_ref = 1;
4418         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
4419
4420         return 0;
4421 }
4422
4423 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
4424         int r;
4425
4426         assert(rt);
4427         assert(c);
4428         assert(id);
4429
4430         if (*rt)
4431                 return 1;
4432
4433         if (!c->private_network && !c->private_tmp)
4434                 return 0;
4435
4436         r = exec_runtime_allocate(rt);
4437         if (r < 0)
4438                 return r;
4439
4440         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
4441                 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
4442                         return -errno;
4443         }
4444
4445         if (c->private_tmp && !(*rt)->tmp_dir) {
4446                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
4447                 if (r < 0)
4448                         return r;
4449         }
4450
4451         return 1;
4452 }
4453
4454 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
4455         assert(r);
4456         assert(r->n_ref > 0);
4457
4458         r->n_ref++;
4459         return r;
4460 }
4461
4462 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
4463
4464         if (!r)
4465                 return NULL;
4466
4467         assert(r->n_ref > 0);
4468
4469         r->n_ref--;
4470         if (r->n_ref > 0)
4471                 return NULL;
4472
4473         free(r->tmp_dir);
4474         free(r->var_tmp_dir);
4475         safe_close_pair(r->netns_storage_socket);
4476         return mfree(r);
4477 }
4478
4479 int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
4480         assert(u);
4481         assert(f);
4482         assert(fds);
4483
4484         if (!rt)
4485                 return 0;
4486
4487         if (rt->tmp_dir)
4488                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
4489
4490         if (rt->var_tmp_dir)
4491                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
4492
4493         if (rt->netns_storage_socket[0] >= 0) {
4494                 int copy;
4495
4496                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4497                 if (copy < 0)
4498                         return copy;
4499
4500                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
4501         }
4502
4503         if (rt->netns_storage_socket[1] >= 0) {
4504                 int copy;
4505
4506                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4507                 if (copy < 0)
4508                         return copy;
4509
4510                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
4511         }
4512
4513         return 0;
4514 }
4515
4516 int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
4517         int r;
4518
4519         assert(rt);
4520         assert(key);
4521         assert(value);
4522
4523         if (streq(key, "tmp-dir")) {
4524                 char *copy;
4525
4526                 r = exec_runtime_allocate(rt);
4527                 if (r < 0)
4528                         return log_oom();
4529
4530                 copy = strdup(value);
4531                 if (!copy)
4532                         return log_oom();
4533
4534                 free((*rt)->tmp_dir);
4535                 (*rt)->tmp_dir = copy;
4536
4537         } else if (streq(key, "var-tmp-dir")) {
4538                 char *copy;
4539
4540                 r = exec_runtime_allocate(rt);
4541                 if (r < 0)
4542                         return log_oom();
4543
4544                 copy = strdup(value);
4545                 if (!copy)
4546                         return log_oom();
4547
4548                 free((*rt)->var_tmp_dir);
4549                 (*rt)->var_tmp_dir = copy;
4550
4551         } else if (streq(key, "netns-socket-0")) {
4552                 int fd;
4553
4554                 r = exec_runtime_allocate(rt);
4555                 if (r < 0)
4556                         return log_oom();
4557
4558                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
4559                         log_unit_debug(u, "Failed to parse netns socket value: %s", value);
4560                 else {
4561                         safe_close((*rt)->netns_storage_socket[0]);
4562                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
4563                 }
4564         } else if (streq(key, "netns-socket-1")) {
4565                 int fd;
4566
4567                 r = exec_runtime_allocate(rt);
4568                 if (r < 0)
4569                         return log_oom();
4570
4571                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
4572                         log_unit_debug(u, "Failed to parse netns socket value: %s", value);
4573                 else {
4574                         safe_close((*rt)->netns_storage_socket[1]);
4575                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
4576                 }
4577         } else
4578                 return 0;
4579
4580         return 1;
4581 }
4582
4583 static void *remove_tmpdir_thread(void *p) {
4584         _cleanup_free_ char *path = p;
4585
4586         (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
4587         return NULL;
4588 }
4589
4590 void exec_runtime_destroy(ExecRuntime *rt) {
4591         int r;
4592
4593         if (!rt)
4594                 return;
4595
4596         /* If there are multiple users of this, let's leave the stuff around */
4597         if (rt->n_ref > 1)
4598                 return;
4599
4600         if (rt->tmp_dir) {
4601                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
4602
4603                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4604                 if (r < 0) {
4605                         log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
4606                         free(rt->tmp_dir);
4607                 }
4608
4609                 rt->tmp_dir = NULL;
4610         }
4611
4612         if (rt->var_tmp_dir) {
4613                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
4614
4615                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4616                 if (r < 0) {
4617                         log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
4618                         free(rt->var_tmp_dir);
4619                 }
4620
4621                 rt->var_tmp_dir = NULL;
4622         }
4623
4624         safe_close_pair(rt->netns_storage_socket);
4625 }
4626
4627 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
4628         [EXEC_INPUT_NULL] = "null",
4629         [EXEC_INPUT_TTY] = "tty",
4630         [EXEC_INPUT_TTY_FORCE] = "tty-force",
4631         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
4632         [EXEC_INPUT_SOCKET] = "socket",
4633         [EXEC_INPUT_NAMED_FD] = "fd",
4634 };
4635
4636 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4637
4638 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
4639         [EXEC_OUTPUT_INHERIT] = "inherit",
4640         [EXEC_OUTPUT_NULL] = "null",
4641         [EXEC_OUTPUT_TTY] = "tty",
4642         [EXEC_OUTPUT_SYSLOG] = "syslog",
4643         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
4644         [EXEC_OUTPUT_KMSG] = "kmsg",
4645         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
4646         [EXEC_OUTPUT_JOURNAL] = "journal",
4647         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
4648         [EXEC_OUTPUT_SOCKET] = "socket",
4649         [EXEC_OUTPUT_NAMED_FD] = "fd",
4650 };
4651
4652 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
4653
4654 static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4655         [EXEC_UTMP_INIT] = "init",
4656         [EXEC_UTMP_LOGIN] = "login",
4657         [EXEC_UTMP_USER] = "user",
4658 };
4659
4660 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
4661
4662 static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
4663         [EXEC_PRESERVE_NO] = "no",
4664         [EXEC_PRESERVE_YES] = "yes",
4665         [EXEC_PRESERVE_RESTART] = "restart",
4666 };
4667
4668 DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
4669
4670 static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
4671         [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
4672         [EXEC_DIRECTORY_STATE] = "StateDirectory",
4673         [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
4674         [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
4675         [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
4676 };
4677
4678 DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
4679
4680 static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
4681         [EXEC_KEYRING_INHERIT] = "inherit",
4682         [EXEC_KEYRING_PRIVATE] = "private",
4683         [EXEC_KEYRING_SHARED] = "shared",
4684 };
4685
4686 DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);