src/core/execute.c

   1 /***
   2   This file is part of systemd.
   3
   4   Copyright 2010 Lennart Poettering
   5
   6   systemd is free software; you can redistribute it and/or modify it
   7   under the terms of the GNU Lesser General Public License as published by
   8   the Free Software Foundation; either version 2.1 of the License, or
   9   (at your option) any later version.
  10
  11   systemd is distributed in the hope that it will be useful, but
  12   WITHOUT ANY WARRANTY; without even the implied warranty of
  13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14   Lesser General Public License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public License
  17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  18 ***/
  19
  20 #include <errno.h>
  21 #include <fcntl.h>
  22 #include <glob.h>
  23 #include <grp.h>
  24 #include <poll.h>
  25 #include <signal.h>
  26 #include <string.h>
  27 #include <sys/capability.h>
  28 #include <sys/eventfd.h>
  29 #include <sys/mman.h>
  30 #include <sys/personality.h>
  31 #include <sys/prctl.h>
  32 #include <sys/shm.h>
  33 #include <sys/socket.h>
  34 #include <sys/stat.h>
  35 #include <sys/types.h>
  36 #include <sys/un.h>
  37 #include <unistd.h>
  38 #include <utmpx.h>
  39
  40 #if HAVE_PAM
  41 #include <security/pam_appl.h>
  42 #endif
  43
  44 #if HAVE_SELINUX
  45 #include <selinux/selinux.h>
  46 #endif
  47
  48 #if HAVE_SECCOMP
  49 #include <seccomp.h>
  50 #endif
  51
  52 #if HAVE_APPARMOR
  53 #include <sys/apparmor.h>
  54 #endif
  55
  56 #include "sd-messages.h"
  57
  58 #include "af-list.h"
  59 #include "alloc-util.h"
  60 #if HAVE_APPARMOR
  61 #include "apparmor-util.h"
  62 #endif
  63 #include "async.h"
  64 #include "barrier.h"
  65 #include "cap-list.h"
  66 #include "capability-util.h"
  67 #include "chown-recursive.h"
  68 #include "def.h"
  69 #include "env-util.h"
  70 #include "errno-list.h"
  71 #include "execute.h"
  72 #include "exit-status.h"
  73 #include "fd-util.h"
  74 #include "fileio.h"
  75 #include "format-util.h"
  76 #include "fs-util.h"
  77 #include "glob-util.h"
  78 #include "io-util.h"
  79 #include "ioprio.h"
  80 #include "label.h"
  81 #include "log.h"
  82 #include "macro.h"
  83 #include "missing.h"
  84 #include "mkdir.h"
  85 #include "namespace.h"
  86 #include "parse-util.h"
  87 #include "path-util.h"
  88 #include "process-util.h"
  89 #include "rlimit-util.h"
  90 #include "rm-rf.h"
  91 #if HAVE_SECCOMP
  92 #include "seccomp-util.h"
  93 #endif
  94 #include "securebits.h"
  95 #include "securebits-util.h"
  96 #include "selinux-util.h"
  97 #include "signal-util.h"
  98 #include "smack-util.h"
  99 #include "special.h"
 100 #include "string-table.h"
 101 #include "string-util.h"
 102 #include "strv.h"
 103 #include "syslog-util.h"
 104 #include "terminal-util.h"
 105 #include "unit.h"
 106 #include "user-util.h"
 107 #include "util.h"
 108 #include "utmp-wtmp.h"
 109
 110 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
 111 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
 112
 113 /* This assumes there is a 'tty' group */
 114 #define TTY_MODE 0620
 115
 116 #define SNDBUF_SIZE (8*1024*1024)
 117
 118 static int shift_fds(int fds[], unsigned n_fds) {
 119         int start, restart_from;
 120
 121         if (n_fds <= 0)
 122                 return 0;
 123
 124         /* Modifies the fds array! (sorts it) */
 125
 126         assert(fds);
 127
 128         start = 0;
 129         for (;;) {
 130                 int i;
 131
 132                 restart_from = -1;
 133
 134                 for (i = start; i < (int) n_fds; i++) {
 135                         int nfd;
 136
 137                         /* Already at right index? */
 138                         if (fds[i] == i+3)
 139                                 continue;
 140
 141                         nfd = fcntl(fds[i], F_DUPFD, i + 3);
 142                         if (nfd < 0)
 143                                 return -errno;
 144
 145                         safe_close(fds[i]);
 146                         fds[i] = nfd;
 147
 148                         /* Hmm, the fd we wanted isn't free? Then
 149                          * let's remember that and try again from here */
 150                         if (nfd != i+3 && restart_from < 0)
 151                                 restart_from = i;
 152                 }
 153
 154                 if (restart_from < 0)
 155                         break;
 156
 157                 start = restart_from;
 158         }
 159
 160         return 0;
 161 }
 162
 163 static int flags_fds(const int fds[], unsigned n_storage_fds, unsigned n_socket_fds, bool nonblock) {
 164         unsigned i, n_fds;
 165         int r;
 166
 167         n_fds = n_storage_fds + n_socket_fds;
 168         if (n_fds <= 0)
 169                 return 0;
 170
 171         assert(fds);
 172
 173         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
 174          * O_NONBLOCK only applies to socket activation though. */
 175
 176         for (i = 0; i < n_fds; i++) {
 177
 178                 if (i < n_socket_fds) {
 179                         r = fd_nonblock(fds[i], nonblock);
 180                         if (r < 0)
 181                                 return r;
 182                 }
 183
 184                 /* We unconditionally drop FD_CLOEXEC from the fds,
 185                  * since after all we want to pass these fds to our
 186                  * children */
 187
 188                 r = fd_cloexec(fds[i], false);
 189                 if (r < 0)
 190                         return r;
 191         }
 192
 193         return 0;
 194 }
 195
 196 static const char *exec_context_tty_path(const ExecContext *context) {
 197         assert(context);
 198
 199         if (context->stdio_as_fds)
 200                 return NULL;
 201
 202         if (context->tty_path)
 203                 return context->tty_path;
 204
 205         return "/dev/console";
 206 }
 207
 208 static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
 209         const char *path;
 210
 211         assert(context);
 212
 213         path = exec_context_tty_path(context);
 214
 215         if (context->tty_vhangup) {
 216                 if (p && p->stdin_fd >= 0)
 217                         (void) terminal_vhangup_fd(p->stdin_fd);
 218                 else if (path)
 219                         (void) terminal_vhangup(path);
 220         }
 221
 222         if (context->tty_reset) {
 223                 if (p && p->stdin_fd >= 0)
 224                         (void) reset_terminal_fd(p->stdin_fd, true);
 225                 else if (path)
 226                         (void) reset_terminal(path);
 227         }
 228
 229         if (context->tty_vt_disallocate && path)
 230                 (void) vt_disallocate(path);
 231 }
 232
 233 static bool is_terminal_input(ExecInput i) {
 234         return IN_SET(i,
 235                       EXEC_INPUT_TTY,
 236                       EXEC_INPUT_TTY_FORCE,
 237                       EXEC_INPUT_TTY_FAIL);
 238 }
 239
 240 static bool is_terminal_output(ExecOutput o) {
 241         return IN_SET(o,
 242                       EXEC_OUTPUT_TTY,
 243                       EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
 244                       EXEC_OUTPUT_KMSG_AND_CONSOLE,
 245                       EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
 246 }
 247
 248 static bool is_syslog_output(ExecOutput o) {
 249         return IN_SET(o,
 250                       EXEC_OUTPUT_SYSLOG,
 251                       EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
 252 }
 253
 254 static bool is_kmsg_output(ExecOutput o) {
 255         return IN_SET(o,
 256                       EXEC_OUTPUT_KMSG,
 257                       EXEC_OUTPUT_KMSG_AND_CONSOLE);
 258 }
 259
 260 static bool exec_context_needs_term(const ExecContext *c) {
 261         assert(c);
 262
 263         /* Return true if the execution context suggests we should set $TERM to something useful. */
 264
 265         if (is_terminal_input(c->std_input))
 266                 return true;
 267
 268         if (is_terminal_output(c->std_output))
 269                 return true;
 270
 271         if (is_terminal_output(c->std_error))
 272                 return true;
 273
 274         return !!c->tty_path;
 275 }
 276
 277 static int open_null_as(int flags, int nfd) {
 278         int fd, r;
 279
 280         assert(nfd >= 0);
 281
 282         fd = open("/dev/null", flags|O_NOCTTY);
 283         if (fd < 0)
 284                 return -errno;
 285
 286         if (fd != nfd) {
 287                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
 288                 safe_close(fd);
 289         } else
 290                 r = nfd;
 291
 292         return r;
 293 }
 294
 295 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
 296         static const union sockaddr_union sa = {
 297                 .un.sun_family = AF_UNIX,
 298                 .un.sun_path = "/run/systemd/journal/stdout",
 299         };
 300         uid_t olduid = UID_INVALID;
 301         gid_t oldgid = GID_INVALID;
 302         int r;
 303
 304         if (gid_is_valid(gid)) {
 305                 oldgid = getgid();
 306
 307                 if (setegid(gid) < 0)
 308                         return -errno;
 309         }
 310
 311         if (uid_is_valid(uid)) {
 312                 olduid = getuid();
 313
 314                 if (seteuid(uid) < 0) {
 315                         r = -errno;
 316                         goto restore_gid;
 317                 }
 318         }
 319
 320         r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
 321
 322         /* If we fail to restore the uid or gid, things will likely
 323            fail later on. This should only happen if an LSM interferes. */
 324
 325         if (uid_is_valid(uid))
 326                 (void) seteuid(olduid);
 327
 328  restore_gid:
 329         if (gid_is_valid(gid))
 330                 (void) setegid(oldgid);
 331
 332         return r;
 333 }
 334
 335 static int connect_logger_as(
 336                 Unit *unit,
 337                 const ExecContext *context,
 338                 const ExecParameters *params,
 339                 ExecOutput output,
 340                 const char *ident,
 341                 int nfd,
 342                 uid_t uid,
 343                 gid_t gid) {
 344
 345         int fd, r;
 346
 347         assert(context);
 348         assert(params);
 349         assert(output < _EXEC_OUTPUT_MAX);
 350         assert(ident);
 351         assert(nfd >= 0);
 352
 353         fd = socket(AF_UNIX, SOCK_STREAM, 0);
 354         if (fd < 0)
 355                 return -errno;
 356
 357         r = connect_journal_socket(fd, uid, gid);
 358         if (r < 0)
 359                 return r;
 360
 361         if (shutdown(fd, SHUT_RD) < 0) {
 362                 safe_close(fd);
 363                 return -errno;
 364         }
 365
 366         (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
 367
 368         dprintf(fd,
 369                 "%s\n"
 370                 "%s\n"
 371                 "%i\n"
 372                 "%i\n"
 373                 "%i\n"
 374                 "%i\n"
 375                 "%i\n",
 376                 context->syslog_identifier ?: ident,
 377                 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
 378                 context->syslog_priority,
 379                 !!context->syslog_level_prefix,
 380                 is_syslog_output(output),
 381                 is_kmsg_output(output),
 382                 is_terminal_output(output));
 383
 384         if (fd == nfd)
 385                 return nfd;
 386
 387         r = dup2(fd, nfd) < 0 ? -errno : nfd;
 388         safe_close(fd);
 389
 390         return r;
 391 }
 392 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
 393         int fd, r;
 394
 395         assert(path);
 396         assert(nfd >= 0);
 397
 398         fd = open_terminal(path, mode | O_NOCTTY);
 399         if (fd < 0)
 400                 return fd;
 401
 402         if (fd != nfd) {
 403                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
 404                 safe_close(fd);
 405         } else
 406                 r = nfd;
 407
 408         return r;
 409 }
 410
 411 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
 412
 413         if (is_terminal_input(std_input) && !apply_tty_stdin)
 414                 return EXEC_INPUT_NULL;
 415
 416         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
 417                 return EXEC_INPUT_NULL;
 418
 419         return std_input;
 420 }
 421
 422 static int fixup_output(ExecOutput std_output, int socket_fd) {
 423
 424         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
 425                 return EXEC_OUTPUT_INHERIT;
 426
 427         return std_output;
 428 }
 429
 430 static int setup_input(
 431                 const ExecContext *context,
 432                 const ExecParameters *params,
 433                 int socket_fd,
 434                 int named_iofds[3]) {
 435
 436         ExecInput i;
 437
 438         assert(context);
 439         assert(params);
 440
 441         if (params->stdin_fd >= 0) {
 442                 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
 443                         return -errno;
 444
 445                 /* Try to make this the controlling tty, if it is a tty, and reset it */
 446                 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
 447                 (void) reset_terminal_fd(STDIN_FILENO, true);
 448
 449                 return STDIN_FILENO;
 450         }
 451
 452         i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
 453
 454         switch (i) {
 455
 456         case EXEC_INPUT_NULL:
 457                 return open_null_as(O_RDONLY, STDIN_FILENO);
 458
 459         case EXEC_INPUT_TTY:
 460         case EXEC_INPUT_TTY_FORCE:
 461         case EXEC_INPUT_TTY_FAIL: {
 462                 int fd, r;
 463
 464                 fd = acquire_terminal(exec_context_tty_path(context),
 465                                       i == EXEC_INPUT_TTY_FAIL,
 466                                       i == EXEC_INPUT_TTY_FORCE,
 467                                       false,
 468                                       USEC_INFINITY);
 469                 if (fd < 0)
 470                         return fd;
 471
 472                 if (fd != STDIN_FILENO) {
 473                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
 474                         safe_close(fd);
 475                 } else
 476                         r = STDIN_FILENO;
 477
 478                 return r;
 479         }
 480
 481         case EXEC_INPUT_SOCKET:
 482                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
 483
 484         case EXEC_INPUT_NAMED_FD:
 485                 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
 486                 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
 487
 488         default:
 489                 assert_not_reached("Unknown input type");
 490         }
 491 }
 492
 493 static int setup_output(
 494                 Unit *unit,
 495                 const ExecContext *context,
 496                 const ExecParameters *params,
 497                 int fileno,
 498                 int socket_fd,
 499                 int named_iofds[3],
 500                 const char *ident,
 501                 uid_t uid,
 502                 gid_t gid,
 503                 dev_t *journal_stream_dev,
 504                 ino_t *journal_stream_ino) {
 505
 506         ExecOutput o;
 507         ExecInput i;
 508         int r;
 509
 510         assert(unit);
 511         assert(context);
 512         assert(params);
 513         assert(ident);
 514         assert(journal_stream_dev);
 515         assert(journal_stream_ino);
 516
 517         if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
 518
 519                 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
 520                         return -errno;
 521
 522                 return STDOUT_FILENO;
 523         }
 524
 525         if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
 526                 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
 527                         return -errno;
 528
 529                 return STDERR_FILENO;
 530         }
 531
 532         i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
 533         o = fixup_output(context->std_output, socket_fd);
 534
 535         if (fileno == STDERR_FILENO) {
 536                 ExecOutput e;
 537                 e = fixup_output(context->std_error, socket_fd);
 538
 539                 /* This expects the input and output are already set up */
 540
 541                 /* Don't change the stderr file descriptor if we inherit all
 542                  * the way and are not on a tty */
 543                 if (e == EXEC_OUTPUT_INHERIT &&
 544                     o == EXEC_OUTPUT_INHERIT &&
 545                     i == EXEC_INPUT_NULL &&
 546                     !is_terminal_input(context->std_input) &&
 547                     getppid () != 1)
 548                         return fileno;
 549
 550                 /* Duplicate from stdout if possible */
 551                 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
 552                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
 553
 554                 o = e;
 555
 556         } else if (o == EXEC_OUTPUT_INHERIT) {
 557                 /* If input got downgraded, inherit the original value */
 558                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
 559                         return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
 560
 561                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
 562                 if (i != EXEC_INPUT_NULL)
 563                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
 564
 565                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
 566                 if (getppid() != 1)
 567                         return fileno;
 568
 569                 /* We need to open /dev/null here anew, to get the right access mode. */
 570                 return open_null_as(O_WRONLY, fileno);
 571         }
 572
 573         switch (o) {
 574
 575         case EXEC_OUTPUT_NULL:
 576                 return open_null_as(O_WRONLY, fileno);
 577
 578         case EXEC_OUTPUT_TTY:
 579                 if (is_terminal_input(i))
 580                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
 581
 582                 /* We don't reset the terminal if this is just about output */
 583                 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
 584
 585         case EXEC_OUTPUT_SYSLOG:
 586         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
 587         case EXEC_OUTPUT_KMSG:
 588         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
 589         case EXEC_OUTPUT_JOURNAL:
 590         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
 591                 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
 592                 if (r < 0) {
 593                         log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
 594                         r = open_null_as(O_WRONLY, fileno);
 595                 } else {
 596                         struct stat st;
 597
 598                         /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
 599                          * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
 600                          * services to detect whether they are connected to the journal or not.
 601                          *
 602                          * If both stdout and stderr are connected to a stream then let's make sure to store the data
 603                          * about STDERR as that's usually the best way to do logging. */
 604
 605                         if (fstat(fileno, &st) >= 0 &&
 606                             (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
 607                                 *journal_stream_dev = st.st_dev;
 608                                 *journal_stream_ino = st.st_ino;
 609                         }
 610                 }
 611                 return r;
 612
 613         case EXEC_OUTPUT_SOCKET:
 614                 assert(socket_fd >= 0);
 615                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
 616
 617         case EXEC_OUTPUT_NAMED_FD:
 618                 (void) fd_nonblock(named_iofds[fileno], false);
 619                 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
 620
 621         default:
 622                 assert_not_reached("Unknown error type");
 623         }
 624 }
 625
 626 static int chown_terminal(int fd, uid_t uid) {
 627         struct stat st;
 628
 629         assert(fd >= 0);
 630
 631         /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
 632         if (isatty(fd) < 1)
 633                 return 0;
 634
 635         /* This might fail. What matters are the results. */
 636         (void) fchown(fd, uid, -1);
 637         (void) fchmod(fd, TTY_MODE);
 638
 639         if (fstat(fd, &st) < 0)
 640                 return -errno;
 641
 642         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
 643                 return -EPERM;
 644
 645         return 0;
 646 }
 647
 648 static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
 649         _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
 650         int r;
 651
 652         assert(_saved_stdin);
 653         assert(_saved_stdout);
 654
 655         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
 656         if (saved_stdin < 0)
 657                 return -errno;
 658
 659         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
 660         if (saved_stdout < 0)
 661                 return -errno;
 662
 663         fd = acquire_terminal(vc, false, false, false, DEFAULT_CONFIRM_USEC);
 664         if (fd < 0)
 665                 return fd;
 666
 667         r = chown_terminal(fd, getuid());
 668         if (r < 0)
 669                 return r;
 670
 671         r = reset_terminal_fd(fd, true);
 672         if (r < 0)
 673                 return r;
 674
 675         if (dup2(fd, STDIN_FILENO) < 0)
 676                 return -errno;
 677
 678         if (dup2(fd, STDOUT_FILENO) < 0)
 679                 return -errno;
 680
 681         if (fd >= 2)
 682                 safe_close(fd);
 683         fd = -1;
 684
 685         *_saved_stdin = saved_stdin;
 686         *_saved_stdout = saved_stdout;
 687
 688         saved_stdin = saved_stdout = -1;
 689
 690         return 0;
 691 }
 692
 693 static void write_confirm_error_fd(int err, int fd, const Unit *u) {
 694         assert(err < 0);
 695
 696         if (err == -ETIMEDOUT)
 697                 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
 698         else {
 699                 errno = -err;
 700                 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
 701         }
 702 }
 703
 704 static void write_confirm_error(int err, const char *vc, const Unit *u) {
 705         _cleanup_close_ int fd = -1;
 706
 707         assert(vc);
 708
 709         fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
 710         if (fd < 0)
 711                 return;
 712
 713         write_confirm_error_fd(err, fd, u);
 714 }
 715
 716 static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
 717         int r = 0;
 718
 719         assert(saved_stdin);
 720         assert(saved_stdout);
 721
 722         release_terminal();
 723
 724         if (*saved_stdin >= 0)
 725                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
 726                         r = -errno;
 727
 728         if (*saved_stdout >= 0)
 729                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
 730                         r = -errno;
 731
 732         *saved_stdin = safe_close(*saved_stdin);
 733         *saved_stdout = safe_close(*saved_stdout);
 734
 735         return r;
 736 }
 737
 738 enum {
 739         CONFIRM_PRETEND_FAILURE = -1,
 740         CONFIRM_PRETEND_SUCCESS =  0,
 741         CONFIRM_EXECUTE = 1,
 742 };
 743
 744 static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
 745         int saved_stdout = -1, saved_stdin = -1, r;
 746         _cleanup_free_ char *e = NULL;
 747         char c;
 748
 749         /* For any internal errors, assume a positive response. */
 750         r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
 751         if (r < 0) {
 752                 write_confirm_error(r, vc, u);
 753                 return CONFIRM_EXECUTE;
 754         }
 755
 756         /* confirm_spawn might have been disabled while we were sleeping. */
 757         if (manager_is_confirm_spawn_disabled(u->manager)) {
 758                 r = 1;
 759                 goto restore_stdio;
 760         }
 761
 762         e = ellipsize(cmdline, 60, 100);
 763         if (!e) {
 764                 log_oom();
 765                 r = CONFIRM_EXECUTE;
 766                 goto restore_stdio;
 767         }
 768
 769         for (;;) {
 770                 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
 771                 if (r < 0) {
 772                         write_confirm_error_fd(r, STDOUT_FILENO, u);
 773                         r = CONFIRM_EXECUTE;
 774                         goto restore_stdio;
 775                 }
 776
 777                 switch (c) {
 778                 case 'c':
 779                         printf("Resuming normal execution.\n");
 780                         manager_disable_confirm_spawn();
 781                         r = 1;
 782                         break;
 783                 case 'D':
 784                         unit_dump(u, stdout, "  ");
 785                         continue; /* ask again */
 786                 case 'f':
 787                         printf("Failing execution.\n");
 788                         r = CONFIRM_PRETEND_FAILURE;
 789                         break;
 790                 case 'h':
 791                         printf("  c - continue, proceed without asking anymore\n"
 792                                "  D - dump, show the state of the unit\n"
 793                                "  f - fail, don't execute the command and pretend it failed\n"
 794                                "  h - help\n"
 795                                "  i - info, show a short summary of the unit\n"
 796                                "  j - jobs, show jobs that are in progress\n"
 797                                "  s - skip, don't execute the command and pretend it succeeded\n"
 798                                "  y - yes, execute the command\n");
 799                         continue; /* ask again */
 800                 case 'i':
 801                         printf("  Description: %s\n"
 802                                "  Unit:        %s\n"
 803                                "  Command:     %s\n",
 804                                u->id, u->description, cmdline);
 805                         continue; /* ask again */
 806                 case 'j':
 807                         manager_dump_jobs(u->manager, stdout, "  ");
 808                         continue; /* ask again */
 809                 case 'n':
 810                         /* 'n' was removed in favor of 'f'. */
 811                         printf("Didn't understand 'n', did you mean 'f'?\n");
 812                         continue; /* ask again */
 813                 case 's':
 814                         printf("Skipping execution.\n");
 815                         r = CONFIRM_PRETEND_SUCCESS;
 816                         break;
 817                 case 'y':
 818                         r = CONFIRM_EXECUTE;
 819                         break;
 820                 default:
 821                         assert_not_reached("Unhandled choice");
 822                 }
 823                 break;
 824         }
 825
 826 restore_stdio:
 827         restore_confirm_stdio(&saved_stdin, &saved_stdout);
 828         return r;
 829 }
 830
 831 static int get_fixed_user(const ExecContext *c, const char **user,
 832                           uid_t *uid, gid_t *gid,
 833                           const char **home, const char **shell) {
 834         int r;
 835         const char *name;
 836
 837         assert(c);
 838
 839         if (!c->user)
 840                 return 0;
 841
 842         /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
 843          * (i.e. are "/" or "/bin/nologin"). */
 844
 845         name = c->user;
 846         r = get_user_creds_clean(&name, uid, gid, home, shell);
 847         if (r < 0)
 848                 return r;
 849
 850         *user = name;
 851         return 0;
 852 }
 853
 854 static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
 855         int r;
 856         const char *name;
 857
 858         assert(c);
 859
 860         if (!c->group)
 861                 return 0;
 862
 863         name = c->group;
 864         r = get_group_creds(&name, gid);
 865         if (r < 0)
 866                 return r;
 867
 868         *group = name;
 869         return 0;
 870 }
 871
 872 static int get_supplementary_groups(const ExecContext *c, const char *user,
 873                                     const char *group, gid_t gid,
 874                                     gid_t **supplementary_gids, int *ngids) {
 875         char **i;
 876         int r, k = 0;
 877         int ngroups_max;
 878         bool keep_groups = false;
 879         gid_t *groups = NULL;
 880         _cleanup_free_ gid_t *l_gids = NULL;
 881
 882         assert(c);
 883
 884         /*
 885          * If user is given, then lookup GID and supplementary groups list.
 886          * We avoid NSS lookups for gid=0. Also we have to initialize groups
 887          * here and as early as possible so we keep the list of supplementary
 888          * groups of the caller.
 889          */
 890         if (user && gid_is_valid(gid) && gid != 0) {
 891                 /* First step, initialize groups from /etc/groups */
 892                 if (initgroups(user, gid) < 0)
 893                         return -errno;
 894
 895                 keep_groups = true;
 896         }
 897
 898         if (!c->supplementary_groups)
 899                 return 0;
 900
 901         /*
 902          * If SupplementaryGroups= was passed then NGROUPS_MAX has to
 903          * be positive, otherwise fail.
 904          */
 905         errno = 0;
 906         ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
 907         if (ngroups_max <= 0) {
 908                 if (errno > 0)
 909                         return -errno;
 910                 else
 911                         return -EOPNOTSUPP; /* For all other values */
 912         }
 913
 914         l_gids = new(gid_t, ngroups_max);
 915         if (!l_gids)
 916                 return -ENOMEM;
 917
 918         if (keep_groups) {
 919                 /*
 920                  * Lookup the list of groups that the user belongs to, we
 921                  * avoid NSS lookups here too for gid=0.
 922                  */
 923                 k = ngroups_max;
 924                 if (getgrouplist(user, gid, l_gids, &k) < 0)
 925                         return -EINVAL;
 926         } else
 927                 k = 0;
 928
 929         STRV_FOREACH(i, c->supplementary_groups) {
 930                 const char *g;
 931
 932                 if (k >= ngroups_max)
 933                         return -E2BIG;
 934
 935                 g = *i;
 936                 r = get_group_creds(&g, l_gids+k);
 937                 if (r < 0)
 938                         return r;
 939
 940                 k++;
 941         }
 942
 943         /*
 944          * Sets ngids to zero to drop all supplementary groups, happens
 945          * when we are under root and SupplementaryGroups= is empty.
 946          */
 947         if (k == 0) {
 948                 *ngids = 0;
 949                 return 0;
 950         }
 951
 952         /* Otherwise get the final list of supplementary groups */
 953         groups = memdup(l_gids, sizeof(gid_t) * k);
 954         if (!groups)
 955                 return -ENOMEM;
 956
 957         *supplementary_gids = groups;
 958         *ngids = k;
 959
 960         groups = NULL;
 961
 962         return 0;
 963 }
 964
 965 static int enforce_groups(const ExecContext *context, gid_t gid,
 966                           gid_t *supplementary_gids, int ngids) {
 967         int r;
 968
 969         assert(context);
 970
 971         /* Handle SupplementaryGroups= even if it is empty */
 972         if (context->supplementary_groups) {
 973                 r = maybe_setgroups(ngids, supplementary_gids);
 974                 if (r < 0)
 975                         return r;
 976         }
 977
 978         if (gid_is_valid(gid)) {
 979                 /* Then set our gids */
 980                 if (setresgid(gid, gid, gid) < 0)
 981                         return -errno;
 982         }
 983
 984         return 0;
 985 }
 986
 987 static int enforce_user(const ExecContext *context, uid_t uid) {
 988         assert(context);
 989
 990         if (!uid_is_valid(uid))
 991                 return 0;
 992
 993         /* Sets (but doesn't look up) the uid and make sure we keep the
 994          * capabilities while doing so. */
 995
 996         if (context->capability_ambient_set != 0) {
 997
 998                 /* First step: If we need to keep capabilities but
 999                  * drop privileges we need to make sure we keep our
1000                  * caps, while we drop privileges. */
1001                 if (uid != 0) {
1002                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
1003
1004                         if (prctl(PR_GET_SECUREBITS) != sb)
1005                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1006                                         return -errno;
1007                 }
1008         }
1009
1010         /* Second step: actually set the uids */
1011         if (setresuid(uid, uid, uid) < 0)
1012                 return -errno;
1013
1014         /* At this point we should have all necessary capabilities but
1015            are otherwise a normal user. However, the caps might got
1016            corrupted due to the setresuid() so we need clean them up
1017            later. This is done outside of this call. */
1018
1019         return 0;
1020 }
1021
1022 #if HAVE_PAM
1023
1024 static int null_conv(
1025                 int num_msg,
1026                 const struct pam_message **msg,
1027                 struct pam_response **resp,
1028                 void *appdata_ptr) {
1029
1030         /* We don't support conversations */
1031
1032         return PAM_CONV_ERR;
1033 }
1034
1035 #endif
1036
1037 static int setup_pam(
1038                 const char *name,
1039                 const char *user,
1040                 uid_t uid,
1041                 gid_t gid,
1042                 const char *tty,
1043                 char ***env,
1044                 int fds[], unsigned n_fds) {
1045
1046 #if HAVE_PAM
1047
1048         static const struct pam_conv conv = {
1049                 .conv = null_conv,
1050                 .appdata_ptr = NULL
1051         };
1052
1053         _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
1054         pam_handle_t *handle = NULL;
1055         sigset_t old_ss;
1056         int pam_code = PAM_SUCCESS, r;
1057         char **nv, **e = NULL;
1058         bool close_session = false;
1059         pid_t pam_pid = 0, parent_pid;
1060         int flags = 0;
1061
1062         assert(name);
1063         assert(user);
1064         assert(env);
1065
1066         /* We set up PAM in the parent process, then fork. The child
1067          * will then stay around until killed via PR_GET_PDEATHSIG or
1068          * systemd via the cgroup logic. It will then remove the PAM
1069          * session again. The parent process will exec() the actual
1070          * daemon. We do things this way to ensure that the main PID
1071          * of the daemon is the one we initially fork()ed. */
1072
1073         r = barrier_create(&barrier);
1074         if (r < 0)
1075                 goto fail;
1076
1077         if (log_get_max_level() < LOG_DEBUG)
1078                 flags |= PAM_SILENT;
1079
1080         pam_code = pam_start(name, user, &conv, &handle);
1081         if (pam_code != PAM_SUCCESS) {
1082                 handle = NULL;
1083                 goto fail;
1084         }
1085
1086         if (tty) {
1087                 pam_code = pam_set_item(handle, PAM_TTY, tty);
1088                 if (pam_code != PAM_SUCCESS)
1089                         goto fail;
1090         }
1091
1092         STRV_FOREACH(nv, *env) {
1093                 pam_code = pam_putenv(handle, *nv);
1094                 if (pam_code != PAM_SUCCESS)
1095                         goto fail;
1096         }
1097
1098         pam_code = pam_acct_mgmt(handle, flags);
1099         if (pam_code != PAM_SUCCESS)
1100                 goto fail;
1101
1102         pam_code = pam_open_session(handle, flags);
1103         if (pam_code != PAM_SUCCESS)
1104                 goto fail;
1105
1106         close_session = true;
1107
1108         e = pam_getenvlist(handle);
1109         if (!e) {
1110                 pam_code = PAM_BUF_ERR;
1111                 goto fail;
1112         }
1113
1114         /* Block SIGTERM, so that we know that it won't get lost in
1115          * the child */
1116
1117         assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
1118
1119         parent_pid = getpid_cached();
1120
1121         pam_pid = fork();
1122         if (pam_pid < 0) {
1123                 r = -errno;
1124                 goto fail;
1125         }
1126
1127         if (pam_pid == 0) {
1128                 int sig, ret = EXIT_PAM;
1129
1130                 /* The child's job is to reset the PAM session on
1131                  * termination */
1132                 barrier_set_role(&barrier, BARRIER_CHILD);
1133
1134                 /* This string must fit in 10 chars (i.e. the length
1135                  * of "/sbin/init"), to look pretty in /bin/ps */
1136                 rename_process("(sd-pam)");
1137
1138                 /* Make sure we don't keep open the passed fds in this
1139                 child. We assume that otherwise only those fds are
1140                 open here that have been opened by PAM. */
1141                 close_many(fds, n_fds);
1142
1143                 /* Drop privileges - we don't need any to pam_close_session
1144                  * and this will make PR_SET_PDEATHSIG work in most cases.
1145                  * If this fails, ignore the error - but expect sd-pam threads
1146                  * to fail to exit normally */
1147
1148                 r = maybe_setgroups(0, NULL);
1149                 if (r < 0)
1150                         log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
1151                 if (setresgid(gid, gid, gid) < 0)
1152                         log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
1153                 if (setresuid(uid, uid, uid) < 0)
1154                         log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
1155
1156                 (void) ignore_signals(SIGPIPE, -1);
1157
1158                 /* Wait until our parent died. This will only work if
1159                  * the above setresuid() succeeds, otherwise the kernel
1160                  * will not allow unprivileged parents kill their privileged
1161                  * children this way. We rely on the control groups kill logic
1162                  * to do the rest for us. */
1163                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1164                         goto child_finish;
1165
1166                 /* Tell the parent that our setup is done. This is especially
1167                  * important regarding dropping privileges. Otherwise, unit
1168                  * setup might race against our setresuid(2) call.
1169                  *
1170                  * If the parent aborted, we'll detect this below, hence ignore
1171                  * return failure here. */
1172                 (void) barrier_place(&barrier);
1173
1174                 /* Check if our parent process might already have died? */
1175                 if (getppid() == parent_pid) {
1176                         sigset_t ss;
1177
1178                         assert_se(sigemptyset(&ss) >= 0);
1179                         assert_se(sigaddset(&ss, SIGTERM) >= 0);
1180
1181                         for (;;) {
1182                                 if (sigwait(&ss, &sig) < 0) {
1183                                         if (errno == EINTR)
1184                                                 continue;
1185
1186                                         goto child_finish;
1187                                 }
1188
1189                                 assert(sig == SIGTERM);
1190                                 break;
1191                         }
1192                 }
1193
1194                 /* If our parent died we'll end the session */
1195                 if (getppid() != parent_pid) {
1196                         pam_code = pam_close_session(handle, flags);
1197                         if (pam_code != PAM_SUCCESS)
1198                                 goto child_finish;
1199                 }
1200
1201                 ret = 0;
1202
1203         child_finish:
1204                 pam_end(handle, pam_code | flags);
1205                 _exit(ret);
1206         }
1207
1208         barrier_set_role(&barrier, BARRIER_PARENT);
1209
1210         /* If the child was forked off successfully it will do all the
1211          * cleanups, so forget about the handle here. */
1212         handle = NULL;
1213
1214         /* Unblock SIGTERM again in the parent */
1215         assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
1216
1217         /* We close the log explicitly here, since the PAM modules
1218          * might have opened it, but we don't want this fd around. */
1219         closelog();
1220
1221         /* Synchronously wait for the child to initialize. We don't care for
1222          * errors as we cannot recover. However, warn loudly if it happens. */
1223         if (!barrier_place_and_sync(&barrier))
1224                 log_error("PAM initialization failed");
1225
1226         strv_free(*env);
1227         *env = e;
1228
1229         return 0;
1230
1231 fail:
1232         if (pam_code != PAM_SUCCESS) {
1233                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
1234                 r = -EPERM;  /* PAM errors do not map to errno */
1235         } else
1236                 log_error_errno(r, "PAM failed: %m");
1237
1238         if (handle) {
1239                 if (close_session)
1240                         pam_code = pam_close_session(handle, flags);
1241
1242                 pam_end(handle, pam_code | flags);
1243         }
1244
1245         strv_free(e);
1246         closelog();
1247
1248         return r;
1249 #else
1250         return 0;
1251 #endif
1252 }
1253
1254 static void rename_process_from_path(const char *path) {
1255         char process_name[11];
1256         const char *p;
1257         size_t l;
1258
1259         /* This resulting string must fit in 10 chars (i.e. the length
1260          * of "/sbin/init") to look pretty in /bin/ps */
1261
1262         p = basename(path);
1263         if (isempty(p)) {
1264                 rename_process("(...)");
1265                 return;
1266         }
1267
1268         l = strlen(p);
1269         if (l > 8) {
1270                 /* The end of the process name is usually more
1271                  * interesting, since the first bit might just be
1272                  * "systemd-" */
1273                 p = p + l - 8;
1274                 l = 8;
1275         }
1276
1277         process_name[0] = '(';
1278         memcpy(process_name+1, p, l);
1279         process_name[1+l] = ')';
1280         process_name[1+l+1] = 0;
1281
1282         rename_process(process_name);
1283 }
1284
1285 static bool context_has_address_families(const ExecContext *c) {
1286         assert(c);
1287
1288         return c->address_families_whitelist ||
1289                 !set_isempty(c->address_families);
1290 }
1291
1292 static bool context_has_syscall_filters(const ExecContext *c) {
1293         assert(c);
1294
1295         return c->syscall_whitelist ||
1296                 !set_isempty(c->syscall_filter);
1297 }
1298
1299 static bool context_has_no_new_privileges(const ExecContext *c) {
1300         assert(c);
1301
1302         if (c->no_new_privileges)
1303                 return true;
1304
1305         if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1306                 return false;
1307
1308         /* We need NNP if we have any form of seccomp and are unprivileged */
1309         return context_has_address_families(c) ||
1310                 c->memory_deny_write_execute ||
1311                 c->restrict_realtime ||
1312                 exec_context_restrict_namespaces_set(c) ||
1313                 c->protect_kernel_tunables ||
1314                 c->protect_kernel_modules ||
1315                 c->private_devices ||
1316                 context_has_syscall_filters(c) ||
1317                 !set_isempty(c->syscall_archs) ||
1318                 c->lock_personality;
1319 }
1320
1321 #if HAVE_SECCOMP
1322
1323 static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
1324
1325         if (is_seccomp_available())
1326                 return false;
1327
1328         log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
1329         return true;
1330 }
1331
1332 static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
1333         uint32_t negative_action, default_action, action;
1334         int r;
1335
1336         assert(u);
1337         assert(c);
1338
1339         if (!context_has_syscall_filters(c))
1340                 return 0;
1341
1342         if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1343                 return 0;
1344
1345         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
1346
1347         if (c->syscall_whitelist) {
1348                 default_action = negative_action;
1349                 action = SCMP_ACT_ALLOW;
1350         } else {
1351                 default_action = SCMP_ACT_ALLOW;
1352                 action = negative_action;
1353         }
1354
1355         if (needs_ambient_hack) {
1356                 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1357                 if (r < 0)
1358                         return r;
1359         }
1360
1361         return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
1362 }
1363
1364 static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1365         assert(u);
1366         assert(c);
1367
1368         if (set_isempty(c->syscall_archs))
1369                 return 0;
1370
1371         if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1372                 return 0;
1373
1374         return seccomp_restrict_archs(c->syscall_archs);
1375 }
1376
1377 static int apply_address_families(const Unit* u, const ExecContext *c) {
1378         assert(u);
1379         assert(c);
1380
1381         if (!context_has_address_families(c))
1382                 return 0;
1383
1384         if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1385                 return 0;
1386
1387         return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
1388 }
1389
1390 static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
1391         assert(u);
1392         assert(c);
1393
1394         if (!c->memory_deny_write_execute)
1395                 return 0;
1396
1397         if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1398                 return 0;
1399
1400         return seccomp_memory_deny_write_execute();
1401 }
1402
1403 static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
1404         assert(u);
1405         assert(c);
1406
1407         if (!c->restrict_realtime)
1408                 return 0;
1409
1410         if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1411                 return 0;
1412
1413         return seccomp_restrict_realtime();
1414 }
1415
1416 static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
1417         assert(u);
1418         assert(c);
1419
1420         /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1421          * let's protect even those systems where this is left on in the kernel. */
1422
1423         if (!c->protect_kernel_tunables)
1424                 return 0;
1425
1426         if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1427                 return 0;
1428
1429         return seccomp_protect_sysctl();
1430 }
1431
1432 static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
1433         assert(u);
1434         assert(c);
1435
1436         /* Turn off module syscalls on ProtectKernelModules=yes */
1437
1438         if (!c->protect_kernel_modules)
1439                 return 0;
1440
1441         if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1442                 return 0;
1443
1444         return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
1445 }
1446
1447 static int apply_private_devices(const Unit *u, const ExecContext *c) {
1448         assert(u);
1449         assert(c);
1450
1451         /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
1452
1453         if (!c->private_devices)
1454                 return 0;
1455
1456         if (skip_seccomp_unavailable(u, "PrivateDevices="))
1457                 return 0;
1458
1459         return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
1460 }
1461
1462 static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
1463         assert(u);
1464         assert(c);
1465
1466         if (!exec_context_restrict_namespaces_set(c))
1467                 return 0;
1468
1469         if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1470                 return 0;
1471
1472         return seccomp_restrict_namespaces(c->restrict_namespaces);
1473 }
1474
1475 static int apply_lock_personality(const Unit* u, const ExecContext *c) {
1476         unsigned long personality;
1477         int r;
1478
1479         assert(u);
1480         assert(c);
1481
1482         if (!c->lock_personality)
1483                 return 0;
1484
1485         if (skip_seccomp_unavailable(u, "LockPersonality="))
1486                 return 0;
1487
1488         personality = c->personality;
1489
1490         /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1491         if (personality == PERSONALITY_INVALID) {
1492
1493                 r = opinionated_personality(&personality);
1494                 if (r < 0)
1495                         return r;
1496         }
1497
1498         return seccomp_lock_personality(personality);
1499 }
1500
1501 #endif
1502
1503 static void do_idle_pipe_dance(int idle_pipe[4]) {
1504         assert(idle_pipe);
1505
1506         idle_pipe[1] = safe_close(idle_pipe[1]);
1507         idle_pipe[2] = safe_close(idle_pipe[2]);
1508
1509         if (idle_pipe[0] >= 0) {
1510                 int r;
1511
1512                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1513
1514                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1515                         ssize_t n;
1516
1517                         /* Signal systemd that we are bored and want to continue. */
1518                         n = write(idle_pipe[3], "x", 1);
1519                         if (n > 0)
1520                                 /* Wait for systemd to react to the signal above. */
1521                                 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1522                 }
1523
1524                 idle_pipe[0] = safe_close(idle_pipe[0]);
1525
1526         }
1527
1528         idle_pipe[3] = safe_close(idle_pipe[3]);
1529 }
1530
1531 static int build_environment(
1532                 Unit *u,
1533                 const ExecContext *c,
1534                 const ExecParameters *p,
1535                 unsigned n_fds,
1536                 const char *home,
1537                 const char *username,
1538                 const char *shell,
1539                 dev_t journal_stream_dev,
1540                 ino_t journal_stream_ino,
1541                 char ***ret) {
1542
1543         _cleanup_strv_free_ char **our_env = NULL;
1544         unsigned n_env = 0;
1545         char *x;
1546
1547         assert(u);
1548         assert(c);
1549         assert(ret);
1550
1551         our_env = new0(char*, 14);
1552         if (!our_env)
1553                 return -ENOMEM;
1554
1555         if (n_fds > 0) {
1556                 _cleanup_free_ char *joined = NULL;
1557
1558                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
1559                         return -ENOMEM;
1560                 our_env[n_env++] = x;
1561
1562                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1563                         return -ENOMEM;
1564                 our_env[n_env++] = x;
1565
1566                 joined = strv_join(p->fd_names, ":");
1567                 if (!joined)
1568                         return -ENOMEM;
1569
1570                 x = strjoin("LISTEN_FDNAMES=", joined);
1571                 if (!x)
1572                         return -ENOMEM;
1573                 our_env[n_env++] = x;
1574         }
1575
1576         if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
1577                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
1578                         return -ENOMEM;
1579                 our_env[n_env++] = x;
1580
1581                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
1582                         return -ENOMEM;
1583                 our_env[n_env++] = x;
1584         }
1585
1586         /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1587          * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1588          * check the database directly. */
1589         if (p->flags & EXEC_NSS_BYPASS_BUS) {
1590                 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1591                 if (!x)
1592                         return -ENOMEM;
1593                 our_env[n_env++] = x;
1594         }
1595
1596         if (home) {
1597                 x = strappend("HOME=", home);
1598                 if (!x)
1599                         return -ENOMEM;
1600                 our_env[n_env++] = x;
1601         }
1602
1603         if (username) {
1604                 x = strappend("LOGNAME=", username);
1605                 if (!x)
1606                         return -ENOMEM;
1607                 our_env[n_env++] = x;
1608
1609                 x = strappend("USER=", username);
1610                 if (!x)
1611                         return -ENOMEM;
1612                 our_env[n_env++] = x;
1613         }
1614
1615         if (shell) {
1616                 x = strappend("SHELL=", shell);
1617                 if (!x)
1618                         return -ENOMEM;
1619                 our_env[n_env++] = x;
1620         }
1621
1622         if (!sd_id128_is_null(u->invocation_id)) {
1623                 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1624                         return -ENOMEM;
1625
1626                 our_env[n_env++] = x;
1627         }
1628
1629         if (exec_context_needs_term(c)) {
1630                 const char *tty_path, *term = NULL;
1631
1632                 tty_path = exec_context_tty_path(c);
1633
1634                 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1635                  * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1636                  * passes to PID 1 ends up all the way in the console login shown. */
1637
1638                 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1639                         term = getenv("TERM");
1640                 if (!term)
1641                         term = default_term_for_tty(tty_path);
1642
1643                 x = strappend("TERM=", term);
1644                 if (!x)
1645                         return -ENOMEM;
1646                 our_env[n_env++] = x;
1647         }
1648
1649         if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1650                 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1651                         return -ENOMEM;
1652
1653                 our_env[n_env++] = x;
1654         }
1655
1656         our_env[n_env++] = NULL;
1657         assert(n_env <= 12);
1658
1659         *ret = our_env;
1660         our_env = NULL;
1661
1662         return 0;
1663 }
1664
1665 static int build_pass_environment(const ExecContext *c, char ***ret) {
1666         _cleanup_strv_free_ char **pass_env = NULL;
1667         size_t n_env = 0, n_bufsize = 0;
1668         char **i;
1669
1670         STRV_FOREACH(i, c->pass_environment) {
1671                 _cleanup_free_ char *x = NULL;
1672                 char *v;
1673
1674                 v = getenv(*i);
1675                 if (!v)
1676                         continue;
1677                 x = strjoin(*i, "=", v);
1678                 if (!x)
1679                         return -ENOMEM;
1680
1681                 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1682                         return -ENOMEM;
1683
1684                 pass_env[n_env++] = x;
1685                 pass_env[n_env] = NULL;
1686                 x = NULL;
1687         }
1688
1689         *ret = pass_env;
1690         pass_env = NULL;
1691
1692         return 0;
1693 }
1694
1695 static bool exec_needs_mount_namespace(
1696                 const ExecContext *context,
1697                 const ExecParameters *params,
1698                 ExecRuntime *runtime) {
1699
1700         assert(context);
1701         assert(params);
1702
1703         if (context->root_image)
1704                 return true;
1705
1706         if (!strv_isempty(context->read_write_paths) ||
1707             !strv_isempty(context->read_only_paths) ||
1708             !strv_isempty(context->inaccessible_paths))
1709                 return true;
1710
1711         if (context->n_bind_mounts > 0)
1712                 return true;
1713
1714         if (context->mount_flags != 0)
1715                 return true;
1716
1717         if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1718                 return true;
1719
1720         if (context->private_devices ||
1721             context->protect_system != PROTECT_SYSTEM_NO ||
1722             context->protect_home != PROTECT_HOME_NO ||
1723             context->protect_kernel_tunables ||
1724             context->protect_kernel_modules ||
1725             context->protect_control_groups)
1726                 return true;
1727
1728         if (context->mount_apivfs && (context->root_image || context->root_directory))
1729                 return true;
1730
1731         if (context->dynamic_user &&
1732             (!strv_isempty(context->directories[EXEC_DIRECTORY_RUNTIME].paths) ||
1733              !strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
1734              !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1735              !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1736                 return true;
1737
1738         return false;
1739 }
1740
1741 static int setup_private_users(uid_t uid, gid_t gid) {
1742         _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1743         _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1744         _cleanup_close_ int unshare_ready_fd = -1;
1745         _cleanup_(sigkill_waitp) pid_t pid = 0;
1746         uint64_t c = 1;
1747         siginfo_t si;
1748         ssize_t n;
1749         int r;
1750
1751         /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1752          * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1753          * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1754          * which waits for the parent to create the new user namespace while staying in the original namespace. The
1755          * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1756          * continues execution normally. */
1757
1758         if (uid != 0 && uid_is_valid(uid)) {
1759                 r = asprintf(&uid_map,
1760                              "0 0 1\n"                      /* Map root → root */
1761                              UID_FMT " " UID_FMT " 1\n",    /* Map $UID → $UID */
1762                              uid, uid);
1763                 if (r < 0)
1764                         return -ENOMEM;
1765         } else {
1766                 uid_map = strdup("0 0 1\n");            /* The case where the above is the same */
1767                 if (!uid_map)
1768                         return -ENOMEM;
1769         }
1770
1771         if (gid != 0 && gid_is_valid(gid)) {
1772                 r = asprintf(&gid_map,
1773                              "0 0 1\n"                      /* Map root → root */
1774                              GID_FMT " " GID_FMT " 1\n",    /* Map $GID → $GID */
1775                              gid, gid);
1776                 if (r < 0)
1777                         return -ENOMEM;
1778         } else {
1779                 gid_map = strdup("0 0 1\n");            /* The case where the above is the same */
1780                 if (!gid_map)
1781                         return -ENOMEM;
1782         }
1783
1784         /* Create a communication channel so that the parent can tell the child when it finished creating the user
1785          * namespace. */
1786         unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1787         if (unshare_ready_fd < 0)
1788                 return -errno;
1789
1790         /* Create a communication channel so that the child can tell the parent a proper error code in case it
1791          * failed. */
1792         if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1793                 return -errno;
1794
1795         pid = fork();
1796         if (pid < 0)
1797                 return -errno;
1798
1799         if (pid == 0) {
1800                 _cleanup_close_ int fd = -1;
1801                 const char *a;
1802                 pid_t ppid;
1803
1804                 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1805                  * here, after the parent opened its own user namespace. */
1806
1807                 ppid = getppid();
1808                 errno_pipe[0] = safe_close(errno_pipe[0]);
1809
1810                 /* Wait until the parent unshared the user namespace */
1811                 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1812                         r = -errno;
1813                         goto child_fail;
1814                 }
1815
1816                 /* Disable the setgroups() system call in the child user namespace, for good. */
1817                 a = procfs_file_alloca(ppid, "setgroups");
1818                 fd = open(a, O_WRONLY|O_CLOEXEC);
1819                 if (fd < 0) {
1820                         if (errno != ENOENT) {
1821                                 r = -errno;
1822                                 goto child_fail;
1823                         }
1824
1825                         /* If the file is missing the kernel is too old, let's continue anyway. */
1826                 } else {
1827                         if (write(fd, "deny\n", 5) < 0) {
1828                                 r = -errno;
1829                                 goto child_fail;
1830                         }
1831
1832                         fd = safe_close(fd);
1833                 }
1834
1835                 /* First write the GID map */
1836                 a = procfs_file_alloca(ppid, "gid_map");
1837                 fd = open(a, O_WRONLY|O_CLOEXEC);
1838                 if (fd < 0) {
1839                         r = -errno;
1840                         goto child_fail;
1841                 }
1842                 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1843                         r = -errno;
1844                         goto child_fail;
1845                 }
1846                 fd = safe_close(fd);
1847
1848                 /* The write the UID map */
1849                 a = procfs_file_alloca(ppid, "uid_map");
1850                 fd = open(a, O_WRONLY|O_CLOEXEC);
1851                 if (fd < 0) {
1852                         r = -errno;
1853                         goto child_fail;
1854                 }
1855                 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1856                         r = -errno;
1857                         goto child_fail;
1858                 }
1859
1860                 _exit(EXIT_SUCCESS);
1861
1862         child_fail:
1863                 (void) write(errno_pipe[1], &r, sizeof(r));
1864                 _exit(EXIT_FAILURE);
1865         }
1866
1867         errno_pipe[1] = safe_close(errno_pipe[1]);
1868
1869         if (unshare(CLONE_NEWUSER) < 0)
1870                 return -errno;
1871
1872         /* Let the child know that the namespace is ready now */
1873         if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1874                 return -errno;
1875
1876         /* Try to read an error code from the child */
1877         n = read(errno_pipe[0], &r, sizeof(r));
1878         if (n < 0)
1879                 return -errno;
1880         if (n == sizeof(r)) { /* an error code was sent to us */
1881                 if (r < 0)
1882                         return r;
1883                 return -EIO;
1884         }
1885         if (n != 0) /* on success we should have read 0 bytes */
1886                 return -EIO;
1887
1888         r = wait_for_terminate(pid, &si);
1889         if (r < 0)
1890                 return r;
1891         pid = 0;
1892
1893         /* If something strange happened with the child, let's consider this fatal, too */
1894         if (si.si_code != CLD_EXITED || si.si_status != 0)
1895                 return -EIO;
1896
1897         return 0;
1898 }
1899
1900 static int setup_exec_directory(
1901                 const ExecContext *context,
1902                 const ExecParameters *params,
1903                 uid_t uid,
1904                 gid_t gid,
1905                 ExecDirectoryType type,
1906                 int *exit_status) {
1907
1908         static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
1909                 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
1910                 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
1911                 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
1912                 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
1913                 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
1914         };
1915         char **rt;
1916         int r;
1917
1918         assert(context);
1919         assert(params);
1920         assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
1921         assert(exit_status);
1922
1923         if (!params->prefix[type])
1924                 return 0;
1925
1926         if (params->flags & EXEC_CHOWN_DIRECTORIES) {
1927                 if (!uid_is_valid(uid))
1928                         uid = 0;
1929                 if (!gid_is_valid(gid))
1930                         gid = 0;
1931         }
1932
1933         STRV_FOREACH(rt, context->directories[type].paths) {
1934                 _cleanup_free_ char *p = NULL, *pp = NULL;
1935                 const char *effective;
1936
1937                 p = strjoin(params->prefix[type], "/", *rt);
1938                 if (!p) {
1939                         r = -ENOMEM;
1940                         goto fail;
1941                 }
1942
1943                 r = mkdir_parents_label(p, 0755);
1944                 if (r < 0)
1945                         goto fail;
1946
1947                 if (context->dynamic_user && type != EXEC_DIRECTORY_CONFIGURATION) {
1948                         _cleanup_free_ char *private_root = NULL, *relative = NULL, *parent = NULL;
1949
1950                         /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
1951                          * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
1952                          * whose UID is later on reused. To lock this down we use the same trick used by container
1953                          * managers to prohibit host users to get access to files of the same UID in containers: we
1954                          * place everything inside a directory that has an access mode of 0700 and is owned root:root,
1955                          * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
1956                          * to make this directory permeable for the service itself.
1957                          *
1958                          * Specifically: for a service which wants a special directory "foo/" we first create a
1959                          * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
1960                          * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
1961                          * privileged host users can access "foo/" as usual, but unprivileged host users can't look
1962                          * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
1963                          * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
1964                          * disabling the access boundary for the service and making sure it only gets access to the
1965                          * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
1966                          *
1967                          * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
1968                          * owned by the service itself. */
1969
1970                         private_root = strjoin(params->prefix[type], "/private");
1971                         if (!private_root) {
1972                                 r = -ENOMEM;
1973                                 goto fail;
1974                         }
1975
1976                         /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
1977                         r = mkdir_safe_label(private_root, 0700, 0, 0);
1978                         if (r < 0)
1979                                 goto fail;
1980
1981                         pp = strjoin(private_root, "/", *rt);
1982                         if (!pp) {
1983                                 r = -ENOMEM;
1984                                 goto fail;
1985                         }
1986
1987                         /* Create all directories between the configured directory and this private root, and mark them 0755 */
1988                         r = mkdir_parents_label(pp, 0755);
1989                         if (r < 0)
1990                                 goto fail;
1991
1992                         /* Finally, create the actual directory for the service */
1993                         r = mkdir_label(pp, context->directories[type].mode);
1994                         if (r < 0 && r != -EEXIST)
1995                                 goto fail;
1996
1997                         parent = dirname_malloc(p);
1998                         if (!parent) {
1999                                 r = -ENOMEM;
2000                                 goto fail;
2001                         }
2002
2003                         r = path_make_relative(parent, pp, &relative);
2004                         if (r < 0)
2005                                 goto fail;
2006
2007                         /* And link it up from the original place */
2008                         r = symlink_idempotent(relative, p);
2009                         if (r < 0)
2010                                 goto fail;
2011
2012                         effective = pp;
2013
2014                 } else {
2015                         r = mkdir_label(p, context->directories[type].mode);
2016                         if (r < 0 && r != -EEXIST)
2017                                 goto fail;
2018
2019                         effective = p;
2020                 }
2021
2022                 /* First lock down the access mode */
2023                 if (chmod(effective, context->directories[type].mode) < 0) {
2024                         r = -errno;
2025                         goto fail;
2026                 }
2027
2028                 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
2029                  * a service, and shall not be writable. */
2030                 if (type == EXEC_DIRECTORY_CONFIGURATION)
2031                         continue;
2032
2033                 /* Then, change the ownership of the whole tree, if necessary */
2034                 r = path_chown_recursive(effective, uid, gid);
2035                 if (r < 0)
2036                         goto fail;
2037         }
2038
2039         return 0;
2040
2041 fail:
2042         *exit_status = exit_status_table[type];
2043         return r;
2044 }
2045
2046 static int setup_smack(
2047                 const ExecContext *context,
2048                 const ExecCommand *command) {
2049
2050         int r;
2051
2052         assert(context);
2053         assert(command);
2054
2055         if (context->smack_process_label) {
2056                 r = mac_smack_apply_pid(0, context->smack_process_label);
2057                 if (r < 0)
2058                         return r;
2059         }
2060 #ifdef SMACK_DEFAULT_PROCESS_LABEL
2061         else {
2062                 _cleanup_free_ char *exec_label = NULL;
2063
2064                 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
2065                 if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP)
2066                         return r;
2067
2068                 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2069                 if (r < 0)
2070                         return r;
2071         }
2072 #endif
2073
2074         return 0;
2075 }
2076
2077 static int compile_read_write_paths(
2078                 const ExecContext *context,
2079                 const ExecParameters *params,
2080                 char ***ret) {
2081
2082         _cleanup_strv_free_ char **l = NULL;
2083         char **rt;
2084         ExecDirectoryType i;
2085
2086         /* Compile the list of writable paths. This is the combination of
2087          * the explicitly configured paths, plus all runtime directories. */
2088
2089         if (strv_isempty(context->read_write_paths)) {
2090                 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
2091                         if (!strv_isempty(context->directories[i].paths))
2092                                 break;
2093
2094                 if (i == _EXEC_DIRECTORY_TYPE_MAX) {
2095                         *ret = NULL; /* NOP if neither is set */
2096                         return 0;
2097                 }
2098         }
2099
2100         l = strv_copy(context->read_write_paths);
2101         if (!l)
2102                 return -ENOMEM;
2103
2104         for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++) {
2105                 if (!params->prefix[i])
2106                         continue;
2107
2108                 STRV_FOREACH(rt, context->directories[i].paths) {
2109                         char *s;
2110
2111                         s = strjoin(params->prefix[i], "/", *rt);
2112                         if (!s)
2113                                 return -ENOMEM;
2114
2115                         if (strv_consume(&l, s) < 0)
2116                                 return -ENOMEM;
2117                 }
2118         }
2119
2120         *ret = l;
2121         l = NULL;
2122
2123         return 0;
2124 }
2125
2126 static int compile_bind_mounts(
2127                 const ExecContext *context,
2128                 const ExecParameters *params,
2129                 BindMount **ret_bind_mounts,
2130                 unsigned *ret_n_bind_mounts,
2131                 char ***ret_empty_directories) {
2132
2133         _cleanup_strv_free_ char **empty_directories = NULL;
2134         BindMount *bind_mounts;
2135         unsigned n, h = 0, i;
2136         ExecDirectoryType t;
2137         int r;
2138
2139         assert(context);
2140         assert(params);
2141         assert(ret_bind_mounts);
2142         assert(ret_n_bind_mounts);
2143         assert(ret_empty_directories);
2144
2145         n = context->n_bind_mounts;
2146         for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2147                 if (!params->prefix[t])
2148                         continue;
2149
2150                 n += strv_length(context->directories[t].paths);
2151         }
2152
2153         if (n <= 0) {
2154                 *ret_bind_mounts = NULL;
2155                 *ret_n_bind_mounts = 0;
2156                 *ret_empty_directories = NULL;
2157                 return 0;
2158         }
2159
2160         bind_mounts = new(BindMount, n);
2161         if (!bind_mounts)
2162                 return -ENOMEM;
2163
2164         for (i = 0; context->n_bind_mounts; i++) {
2165                 BindMount *item = context->bind_mounts + i;
2166                 char *s, *d;
2167
2168                 s = strdup(item->source);
2169                 if (!s) {
2170                         r = -ENOMEM;
2171                         goto finish;
2172                 }
2173
2174                 d = strdup(item->destination);
2175                 if (!d) {
2176                         free(s);
2177                         r = -ENOMEM;
2178                         goto finish;
2179                 }
2180
2181                 bind_mounts[h++] = (BindMount) {
2182                         .source = s,
2183                         .destination = d,
2184                         .read_only = item->read_only,
2185                         .recursive = item->recursive,
2186                         .ignore_enoent = item->ignore_enoent,
2187                 };
2188         }
2189
2190         for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2191                 char **suffix;
2192
2193                 if (!params->prefix[t])
2194                         continue;
2195
2196                 if (strv_isempty(context->directories[t].paths))
2197                         continue;
2198
2199                 if (context->dynamic_user && t != EXEC_DIRECTORY_CONFIGURATION) {
2200                         char *private_root;
2201
2202                         /* So this is for a dynamic user, and we need to make sure the process can access its own
2203                          * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2204                          * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2205
2206                         private_root = strjoin(params->prefix[t], "/private");
2207                         if (!private_root) {
2208                                 r = -ENOMEM;
2209                                 goto finish;
2210                         }
2211
2212                         r = strv_consume(&empty_directories, private_root);
2213                         if (r < 0) {
2214                                 r = -ENOMEM;
2215                                 goto finish;
2216                         }
2217                 }
2218
2219                 STRV_FOREACH(suffix, context->directories[t].paths) {
2220                         char *s, *d;
2221
2222                         if (context->dynamic_user && t != EXEC_DIRECTORY_CONFIGURATION)
2223                                 s = strjoin(params->prefix[t], "/private/", *suffix);
2224                         else
2225                                 s = strjoin(params->prefix[t], "/", *suffix);
2226                         if (!s) {
2227                                 r = -ENOMEM;
2228                                 goto finish;
2229                         }
2230
2231                         d = strdup(s);
2232                         if (!d) {
2233                                 free(s);
2234                                 r = -ENOMEM;
2235                                 goto finish;
2236                         }
2237
2238                         bind_mounts[h++] = (BindMount) {
2239                                 .source = s,
2240                                 .destination = d,
2241                                 .read_only = false,
2242                                 .recursive = true,
2243                                 .ignore_enoent = false,
2244                         };
2245                 }
2246         }
2247
2248         assert(h == n);
2249
2250         *ret_bind_mounts = bind_mounts;
2251         *ret_n_bind_mounts = n;
2252         *ret_empty_directories = empty_directories;
2253
2254         empty_directories = NULL;
2255
2256         return (int) n;
2257
2258 finish:
2259         bind_mount_free_many(bind_mounts, h);
2260         return r;
2261 }
2262
2263 static int apply_mount_namespace(
2264                 Unit *u,
2265                 ExecCommand *command,
2266                 const ExecContext *context,
2267                 const ExecParameters *params,
2268                 ExecRuntime *runtime) {
2269
2270         _cleanup_strv_free_ char **rw = NULL, **empty_directories = NULL;
2271         char *tmp = NULL, *var = NULL;
2272         const char *root_dir = NULL, *root_image = NULL;
2273         NameSpaceInfo ns_info = {
2274                 .ignore_protect_paths = false,
2275                 .private_dev = context->private_devices,
2276                 .protect_control_groups = context->protect_control_groups,
2277                 .protect_kernel_tunables = context->protect_kernel_tunables,
2278                 .protect_kernel_modules = context->protect_kernel_modules,
2279                 .mount_apivfs = context->mount_apivfs,
2280         };
2281         bool needs_sandboxing;
2282         BindMount *bind_mounts = NULL;
2283         unsigned n_bind_mounts = 0;
2284         int r;
2285
2286         assert(context);
2287
2288         /* The runtime struct only contains the parent of the private /tmp,
2289          * which is non-accessible to world users. Inside of it there's a /tmp
2290          * that is sticky, and that's the one we want to use here. */
2291
2292         if (context->private_tmp && runtime) {
2293                 if (runtime->tmp_dir)
2294                         tmp = strjoina(runtime->tmp_dir, "/tmp");
2295                 if (runtime->var_tmp_dir)
2296                         var = strjoina(runtime->var_tmp_dir, "/tmp");
2297         }
2298
2299         r = compile_read_write_paths(context, params, &rw);
2300         if (r < 0)
2301                 return r;
2302
2303         if (params->flags & EXEC_APPLY_CHROOT) {
2304                 root_image = context->root_image;
2305
2306                 if (!root_image)
2307                         root_dir = context->root_directory;
2308         }
2309
2310         r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2311         if (r < 0)
2312                 return r;
2313
2314         /*
2315          * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2316          * sandbox info, otherwise enforce it, don't ignore protected paths and
2317          * fail if we are enable to apply the sandbox inside the mount namespace.
2318          */
2319         if (!context->dynamic_user && root_dir)
2320                 ns_info.ignore_protect_paths = true;
2321
2322         needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
2323
2324         r = setup_namespace(root_dir, root_image,
2325                             &ns_info, rw,
2326                             needs_sandboxing ? context->read_only_paths : NULL,
2327                             needs_sandboxing ? context->inaccessible_paths : NULL,
2328                             empty_directories,
2329                             bind_mounts,
2330                             n_bind_mounts,
2331                             tmp,
2332                             var,
2333                             needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2334                             needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
2335                             context->mount_flags,
2336                             DISSECT_IMAGE_DISCARD_ON_LOOP);
2337
2338         bind_mount_free_many(bind_mounts, n_bind_mounts);
2339
2340         /* If we couldn't set up the namespace this is probably due to a
2341          * missing capability. In this case, silently proceeed. */
2342         if (IN_SET(r, -EPERM, -EACCES)) {
2343                 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
2344                 return 0;
2345         }
2346
2347         return r;
2348 }
2349
2350 static int apply_working_directory(
2351                 const ExecContext *context,
2352                 const ExecParameters *params,
2353                 const char *home,
2354                 const bool needs_mount_ns,
2355                 int *exit_status) {
2356
2357         const char *d, *wd;
2358
2359         assert(context);
2360         assert(exit_status);
2361
2362         if (context->working_directory_home) {
2363
2364                 if (!home) {
2365                         *exit_status = EXIT_CHDIR;
2366                         return -ENXIO;
2367                 }
2368
2369                 wd = home;
2370
2371         } else if (context->working_directory)
2372                 wd = context->working_directory;
2373         else
2374                 wd = "/";
2375
2376         if (params->flags & EXEC_APPLY_CHROOT) {
2377                 if (!needs_mount_ns && context->root_directory)
2378                         if (chroot(context->root_directory) < 0) {
2379                                 *exit_status = EXIT_CHROOT;
2380                                 return -errno;
2381                         }
2382
2383                 d = wd;
2384         } else
2385                 d = prefix_roota(context->root_directory, wd);
2386
2387         if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2388                 *exit_status = EXIT_CHDIR;
2389                 return -errno;
2390         }
2391
2392         return 0;
2393 }
2394
2395 static int setup_keyring(
2396                 Unit *u,
2397                 const ExecContext *context,
2398                 const ExecParameters *p,
2399                 uid_t uid, gid_t gid) {
2400
2401         key_serial_t keyring;
2402         int r;
2403
2404         assert(u);
2405         assert(context);
2406         assert(p);
2407
2408         /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2409          * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2410          * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2411          * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2412          * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2413          * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2414
2415         if (!(p->flags & EXEC_NEW_KEYRING))
2416                 return 0;
2417
2418         if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2419                 return 0;
2420
2421         keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2422         if (keyring == -1) {
2423                 if (errno == ENOSYS)
2424                         log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
2425                 else if (IN_SET(errno, EACCES, EPERM))
2426                         log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
2427                 else if (errno == EDQUOT)
2428                         log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
2429                 else
2430                         return log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
2431
2432                 return 0;
2433         }
2434
2435         /* Populate they keyring with the invocation ID by default. */
2436         if (!sd_id128_is_null(u->invocation_id)) {
2437                 key_serial_t key;
2438
2439                 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2440                 if (key == -1)
2441                         log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
2442                 else {
2443                         if (keyctl(KEYCTL_SETPERM, key,
2444                                    KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2445                                    KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
2446                                 return log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
2447                 }
2448         }
2449
2450         /* And now, make the keyring owned by the service's user */
2451         if (uid_is_valid(uid) || gid_is_valid(gid))
2452                 if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0)
2453                         return log_unit_error_errno(u, errno, "Failed to change ownership of session keyring: %m");
2454
2455         /* When requested link the user keyring into the session keyring. */
2456         if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2457                 uid_t saved_uid;
2458                 gid_t saved_gid;
2459
2460                 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things
2461                  * set up properly by the kernel. If we don't do that then we can't create it atomically, and that
2462                  * sucks for parallel execution. This mimics what pam_keyinit does, too.*/
2463
2464                 saved_uid = getuid();
2465                 saved_gid = getgid();
2466
2467                 if (gid_is_valid(gid) && gid != saved_gid) {
2468                         if (setregid(gid, -1) < 0)
2469                                 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2470                 }
2471
2472                 if (uid_is_valid(uid) && uid != saved_uid) {
2473                         if (setreuid(uid, -1) < 0) {
2474                                 (void) setregid(saved_gid, -1);
2475                                 return log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2476                         }
2477                 }
2478
2479                 if (keyctl(KEYCTL_LINK,
2480                            KEY_SPEC_USER_KEYRING,
2481                            KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2482
2483                         r = -errno;
2484
2485                         (void) setreuid(saved_uid, -1);
2486                         (void) setregid(saved_gid, -1);
2487
2488                         return log_unit_error_errno(u, r, "Failed to link user keyring into session keyring: %m");
2489                 }
2490
2491                 if (uid_is_valid(uid) && uid != saved_uid) {
2492                         if (setreuid(saved_uid, -1) < 0) {
2493                                 (void) setregid(saved_gid, -1);
2494                                 return log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2495                         }
2496                 }
2497
2498                 if (gid_is_valid(gid) && gid != saved_gid) {
2499                         if (setregid(saved_gid, -1) < 0)
2500                                 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2501                 }
2502         }
2503
2504         return 0;
2505 }
2506
2507 static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
2508         assert(array);
2509         assert(n);
2510
2511         if (!pair)
2512                 return;
2513
2514         if (pair[0] >= 0)
2515                 array[(*n)++] = pair[0];
2516         if (pair[1] >= 0)
2517                 array[(*n)++] = pair[1];
2518 }
2519
2520 static int close_remaining_fds(
2521                 const ExecParameters *params,
2522                 ExecRuntime *runtime,
2523                 DynamicCreds *dcreds,
2524                 int user_lookup_fd,
2525                 int socket_fd,
2526                 int *fds, unsigned n_fds) {
2527
2528         unsigned n_dont_close = 0;
2529         int dont_close[n_fds + 12];
2530
2531         assert(params);
2532
2533         if (params->stdin_fd >= 0)
2534                 dont_close[n_dont_close++] = params->stdin_fd;
2535         if (params->stdout_fd >= 0)
2536                 dont_close[n_dont_close++] = params->stdout_fd;
2537         if (params->stderr_fd >= 0)
2538                 dont_close[n_dont_close++] = params->stderr_fd;
2539
2540         if (socket_fd >= 0)
2541                 dont_close[n_dont_close++] = socket_fd;
2542         if (n_fds > 0) {
2543                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2544                 n_dont_close += n_fds;
2545         }
2546
2547         if (runtime)
2548                 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2549
2550         if (dcreds) {
2551                 if (dcreds->user)
2552                         append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2553                 if (dcreds->group)
2554                         append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
2555         }
2556
2557         if (user_lookup_fd >= 0)
2558                 dont_close[n_dont_close++] = user_lookup_fd;
2559
2560         return close_all_fds(dont_close, n_dont_close);
2561 }
2562
2563 static int send_user_lookup(
2564                 Unit *unit,
2565                 int user_lookup_fd,
2566                 uid_t uid,
2567                 gid_t gid) {
2568
2569         assert(unit);
2570
2571         /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2572          * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2573          * specified. */
2574
2575         if (user_lookup_fd < 0)
2576                 return 0;
2577
2578         if (!uid_is_valid(uid) && !gid_is_valid(gid))
2579                 return 0;
2580
2581         if (writev(user_lookup_fd,
2582                (struct iovec[]) {
2583                            IOVEC_INIT(&uid, sizeof(uid)),
2584                            IOVEC_INIT(&gid, sizeof(gid)),
2585                            IOVEC_INIT_STRING(unit->id) }, 3) < 0)
2586                 return -errno;
2587
2588         return 0;
2589 }
2590
2591 static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2592         int r;
2593
2594         assert(c);
2595         assert(home);
2596         assert(buf);
2597
2598         /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2599
2600         if (*home)
2601                 return 0;
2602
2603         if (!c->working_directory_home)
2604                 return 0;
2605
2606         if (uid == 0) {
2607                 /* Hardcode /root as home directory for UID 0 */
2608                 *home = "/root";
2609                 return 1;
2610         }
2611
2612         r = get_home_dir(buf);
2613         if (r < 0)
2614                 return r;
2615
2616         *home = *buf;
2617         return 1;
2618 }
2619
2620 static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2621         _cleanup_strv_free_ char ** list = NULL;
2622         ExecDirectoryType t;
2623         int r;
2624
2625         assert(c);
2626         assert(p);
2627         assert(ret);
2628
2629         assert(c->dynamic_user);
2630
2631         /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2632          * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2633          * directories. */
2634
2635         for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2636                 char **i;
2637
2638                 if (t == EXEC_DIRECTORY_CONFIGURATION)
2639                         continue;
2640
2641                 if (!p->prefix[t])
2642                         continue;
2643
2644                 STRV_FOREACH(i, c->directories[t].paths) {
2645                         char *e;
2646
2647                         e = strjoin(p->prefix[t], "/private/", *i);
2648                         if (!e)
2649                                 return -ENOMEM;
2650
2651                         r = strv_consume(&list, e);
2652                         if (r < 0)
2653                                 return r;
2654                 }
2655         }
2656
2657         *ret = list;
2658         list = NULL;
2659
2660         return 0;
2661 }
2662
2663 static int exec_child(
2664                 Unit *unit,
2665                 ExecCommand *command,
2666                 const ExecContext *context,
2667                 const ExecParameters *params,
2668                 ExecRuntime *runtime,
2669                 DynamicCreds *dcreds,
2670                 char **argv,
2671                 int socket_fd,
2672                 int named_iofds[3],
2673                 int *fds,
2674                 unsigned n_storage_fds,
2675                 unsigned n_socket_fds,
2676                 char **files_env,
2677                 int user_lookup_fd,
2678                 int *exit_status) {
2679
2680         _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
2681         _cleanup_free_ char *mac_selinux_context_net = NULL, *home_buffer = NULL;
2682         _cleanup_free_ gid_t *supplementary_gids = NULL;
2683         const char *username = NULL, *groupname = NULL;
2684         const char *home = NULL, *shell = NULL;
2685         dev_t journal_stream_dev = 0;
2686         ino_t journal_stream_ino = 0;
2687         bool needs_sandboxing,          /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2688                 needs_setuid,           /* Do we need to do the actual setresuid()/setresgid() calls? */
2689                 needs_mount_namespace,  /* Do we need to set up a mount namespace for this kernel? */
2690                 needs_ambient_hack;     /* Do we need to apply the ambient capabilities hack? */
2691 #if HAVE_SELINUX
2692         bool use_selinux = false;
2693 #endif
2694 #if HAVE_SMACK
2695         bool use_smack = false;
2696 #endif
2697 #if HAVE_APPARMOR
2698         bool use_apparmor = false;
2699 #endif
2700         uid_t uid = UID_INVALID;
2701         gid_t gid = GID_INVALID;
2702         int i, r, ngids = 0;
2703         unsigned n_fds;
2704         ExecDirectoryType dt;
2705         int secure_bits;
2706
2707         assert(unit);
2708         assert(command);
2709         assert(context);
2710         assert(params);
2711         assert(exit_status);
2712
2713         rename_process_from_path(command->path);
2714
2715         /* We reset exactly these signals, since they are the
2716          * only ones we set to SIG_IGN in the main daemon. All
2717          * others we leave untouched because we set them to
2718          * SIG_DFL or a valid handler initially, both of which
2719          * will be demoted to SIG_DFL. */
2720         (void) default_signals(SIGNALS_CRASH_HANDLER,
2721                                SIGNALS_IGNORE, -1);
2722
2723         if (context->ignore_sigpipe)
2724                 (void) ignore_signals(SIGPIPE, -1);
2725
2726         r = reset_signal_mask();
2727         if (r < 0) {
2728                 *exit_status = EXIT_SIGNAL_MASK;
2729                 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
2730         }
2731
2732         if (params->idle_pipe)
2733                 do_idle_pipe_dance(params->idle_pipe);
2734
2735         /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2736          * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2737          * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2738          * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
2739
2740         log_forget_fds();
2741         log_set_open_when_needed(true);
2742
2743         /* In case anything used libc syslog(), close this here, too */
2744         closelog();
2745
2746         n_fds = n_storage_fds + n_socket_fds;
2747         r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
2748         if (r < 0) {
2749                 *exit_status = EXIT_FDS;
2750                 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
2751         }
2752
2753         if (!context->same_pgrp)
2754                 if (setsid() < 0) {
2755                         *exit_status = EXIT_SETSID;
2756                         return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
2757                 }
2758
2759         exec_context_tty_reset(context, params);
2760
2761         if (unit_shall_confirm_spawn(unit)) {
2762                 const char *vc = params->confirm_spawn;
2763                 _cleanup_free_ char *cmdline = NULL;
2764
2765                 cmdline = exec_command_line(argv);
2766                 if (!cmdline) {
2767                         *exit_status = EXIT_MEMORY;
2768                         return log_oom();
2769                 }
2770
2771                 r = ask_for_confirmation(vc, unit, cmdline);
2772                 if (r != CONFIRM_EXECUTE) {
2773                         if (r == CONFIRM_PRETEND_SUCCESS) {
2774                                 *exit_status = EXIT_SUCCESS;
2775                                 return 0;
2776                         }
2777                         *exit_status = EXIT_CONFIRM;
2778                         log_unit_error(unit, "Execution cancelled by the user");
2779                         return -ECANCELED;
2780                 }
2781         }
2782
2783         if (context->dynamic_user && dcreds) {
2784                 _cleanup_strv_free_ char **suggested_paths = NULL;
2785
2786                 /* Make sure we bypass our own NSS module for any NSS checks */
2787                 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2788                         *exit_status = EXIT_USER;
2789                         return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
2790                 }
2791
2792                 r = compile_suggested_paths(context, params, &suggested_paths);
2793                 if (r < 0) {
2794                         *exit_status = EXIT_MEMORY;
2795                         return log_oom();
2796                 }
2797
2798                 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
2799                 if (r < 0) {
2800                         *exit_status = EXIT_USER;
2801                         return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
2802                 }
2803
2804                 if (!uid_is_valid(uid)) {
2805                         *exit_status = EXIT_USER;
2806                         log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
2807                         return -ESRCH;
2808                 }
2809
2810                 if (!gid_is_valid(gid)) {
2811                         *exit_status = EXIT_USER;
2812                         log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
2813                         return -ESRCH;
2814                 }
2815
2816                 if (dcreds->user)
2817                         username = dcreds->user->name;
2818
2819         } else {
2820                 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2821                 if (r < 0) {
2822                         *exit_status = EXIT_USER;
2823                         return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
2824                 }
2825
2826                 r = get_fixed_group(context, &groupname, &gid);
2827                 if (r < 0) {
2828                         *exit_status = EXIT_GROUP;
2829                         return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
2830                 }
2831         }
2832
2833         /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2834         r = get_supplementary_groups(context, username, groupname, gid,
2835                                      &supplementary_gids, &ngids);
2836         if (r < 0) {
2837                 *exit_status = EXIT_GROUP;
2838                 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
2839         }
2840
2841         r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2842         if (r < 0) {
2843                 *exit_status = EXIT_USER;
2844                 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
2845         }
2846
2847         user_lookup_fd = safe_close(user_lookup_fd);
2848
2849         r = acquire_home(context, uid, &home, &home_buffer);
2850         if (r < 0) {
2851                 *exit_status = EXIT_CHDIR;
2852                 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
2853         }
2854
2855         /* If a socket is connected to STDIN/STDOUT/STDERR, we
2856          * must sure to drop O_NONBLOCK */
2857         if (socket_fd >= 0)
2858                 (void) fd_nonblock(socket_fd, false);
2859
2860         r = setup_input(context, params, socket_fd, named_iofds);
2861         if (r < 0) {
2862                 *exit_status = EXIT_STDIN;
2863                 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
2864         }
2865
2866         r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
2867         if (r < 0) {
2868                 *exit_status = EXIT_STDOUT;
2869                 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
2870         }
2871
2872         r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
2873         if (r < 0) {
2874                 *exit_status = EXIT_STDERR;
2875                 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
2876         }
2877
2878         if (params->cgroup_path) {
2879                 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2880                 if (r < 0) {
2881                         *exit_status = EXIT_CGROUP;
2882                         return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path);
2883                 }
2884         }
2885
2886         if (context->oom_score_adjust_set) {
2887                 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
2888
2889                 /* When we can't make this change due to EPERM, then
2890                  * let's silently skip over it. User namespaces
2891                  * prohibit write access to this file, and we
2892                  * shouldn't trip up over that. */
2893
2894                 sprintf(t, "%i", context->oom_score_adjust);
2895                 r = write_string_file("/proc/self/oom_score_adj", t, 0);
2896                 if (IN_SET(r, -EPERM, -EACCES))
2897                         log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
2898                 else if (r < 0) {
2899                         *exit_status = EXIT_OOM_ADJUST;
2900                         return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
2901                 }
2902         }
2903
2904         if (context->nice_set)
2905                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
2906                         *exit_status = EXIT_NICE;
2907                         return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
2908                 }
2909
2910         if (context->cpu_sched_set) {
2911                 struct sched_param param = {
2912                         .sched_priority = context->cpu_sched_priority,
2913                 };
2914
2915                 r = sched_setscheduler(0,
2916                                        context->cpu_sched_policy |
2917                                        (context->cpu_sched_reset_on_fork ?
2918                                         SCHED_RESET_ON_FORK : 0),
2919                                        &param);
2920                 if (r < 0) {
2921                         *exit_status = EXIT_SETSCHEDULER;
2922                         return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
2923                 }
2924         }
2925
2926         if (context->cpuset)
2927                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
2928                         *exit_status = EXIT_CPUAFFINITY;
2929                         return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
2930                 }
2931
2932         if (context->ioprio_set)
2933                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
2934                         *exit_status = EXIT_IOPRIO;
2935                         return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
2936                 }
2937
2938         if (context->timer_slack_nsec != NSEC_INFINITY)
2939                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
2940                         *exit_status = EXIT_TIMERSLACK;
2941                         return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
2942                 }
2943
2944         if (context->personality != PERSONALITY_INVALID) {
2945                 r = safe_personality(context->personality);
2946                 if (r < 0) {
2947                         *exit_status = EXIT_PERSONALITY;
2948                         return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
2949                 }
2950         }
2951
2952         if (context->utmp_id)
2953                 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
2954                                       context->tty_path,
2955                                       context->utmp_mode == EXEC_UTMP_INIT  ? INIT_PROCESS :
2956                                       context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
2957                                       USER_PROCESS,
2958                                       username);
2959
2960         if (context->user) {
2961                 r = chown_terminal(STDIN_FILENO, uid);
2962                 if (r < 0) {
2963                         *exit_status = EXIT_STDIN;
2964                         return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
2965                 }
2966         }
2967
2968         /* If delegation is enabled we'll pass ownership of the cgroup
2969          * (but only in systemd's own controller hierarchy!) to the
2970          * user of the new process. */
2971         if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
2972                 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
2973                 if (r < 0) {
2974                         *exit_status = EXIT_CGROUP;
2975                         return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
2976                 }
2977
2978                 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
2979                 if (r < 0) {
2980                         *exit_status = EXIT_CGROUP;
2981                         return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
2982                 }
2983         }
2984
2985         for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
2986                 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
2987                 if (r < 0)
2988                         return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
2989         }
2990
2991         r = build_environment(
2992                         unit,
2993                         context,
2994                         params,
2995                         n_fds,
2996                         home,
2997                         username,
2998                         shell,
2999                         journal_stream_dev,
3000                         journal_stream_ino,
3001                         &our_env);
3002         if (r < 0) {
3003                 *exit_status = EXIT_MEMORY;
3004                 return log_oom();
3005         }
3006
3007         r = build_pass_environment(context, &pass_env);
3008         if (r < 0) {
3009                 *exit_status = EXIT_MEMORY;
3010                 return log_oom();
3011         }
3012
3013         accum_env = strv_env_merge(5,
3014                                    params->environment,
3015                                    our_env,
3016                                    pass_env,
3017                                    context->environment,
3018                                    files_env,
3019                                    NULL);
3020         if (!accum_env) {
3021                 *exit_status = EXIT_MEMORY;
3022                 return log_oom();
3023         }
3024         accum_env = strv_env_clean(accum_env);
3025
3026         (void) umask(context->umask);
3027
3028         r = setup_keyring(unit, context, params, uid, gid);
3029         if (r < 0) {
3030                 *exit_status = EXIT_KEYRING;
3031                 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
3032         }
3033
3034         /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
3035         needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
3036
3037         /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3038         needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
3039
3040         /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3041         if (needs_ambient_hack)
3042                 needs_setuid = false;
3043         else
3044                 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3045
3046         if (needs_sandboxing) {
3047                 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3048                  * present. The actual MAC context application will happen later, as late as possible, to avoid
3049                  * impacting our own code paths. */
3050
3051 #if HAVE_SELINUX
3052                 use_selinux = mac_selinux_use();
3053 #endif
3054 #if HAVE_SMACK
3055                 use_smack = mac_smack_use();
3056 #endif
3057 #if HAVE_APPARMOR
3058                 use_apparmor = mac_apparmor_use();
3059 #endif
3060         }
3061
3062         if (needs_setuid) {
3063                 if (context->pam_name && username) {
3064                         r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3065                         if (r < 0) {
3066                                 *exit_status = EXIT_PAM;
3067                                 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
3068                         }
3069                 }
3070         }
3071
3072         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
3073                 r = setup_netns(runtime->netns_storage_socket);
3074                 if (r < 0) {
3075                         *exit_status = EXIT_NETWORK;
3076                         return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3077                 }
3078         }
3079
3080         needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
3081         if (needs_mount_namespace) {
3082                 r = apply_mount_namespace(unit, command, context, params, runtime);
3083                 if (r < 0) {
3084                         *exit_status = EXIT_NAMESPACE;
3085                         return log_unit_error_errno(unit, r, "Failed to set up mount namespacing: %m");
3086                 }
3087         }
3088
3089         /* Apply just after mount namespace setup */
3090         r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
3091         if (r < 0)
3092                 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
3093
3094         /* Drop groups as early as possbile */
3095         if (needs_setuid) {
3096                 r = enforce_groups(context, gid, supplementary_gids, ngids);
3097                 if (r < 0) {
3098                         *exit_status = EXIT_GROUP;
3099                         return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
3100                 }
3101         }
3102
3103         if (needs_sandboxing) {
3104 #if HAVE_SELINUX
3105                 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
3106                         r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3107                         if (r < 0) {
3108                                 *exit_status = EXIT_SELINUX_CONTEXT;
3109                                 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
3110                         }
3111                 }
3112 #endif
3113
3114                 if (context->private_users) {
3115                         r = setup_private_users(uid, gid);
3116                         if (r < 0) {
3117                                 *exit_status = EXIT_USER;
3118                                 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
3119                         }
3120                 }
3121         }
3122
3123         /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
3124          * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
3125          * was needed to upload the policy and can now be closed as well. */
3126         r = close_all_fds(fds, n_fds);
3127         if (r >= 0)
3128                 r = shift_fds(fds, n_fds);
3129         if (r >= 0)
3130                 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
3131         if (r < 0) {
3132                 *exit_status = EXIT_FDS;
3133                 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
3134         }
3135
3136         secure_bits = context->secure_bits;
3137
3138         if (needs_sandboxing) {
3139                 uint64_t bset;
3140
3141                 for (i = 0; i < _RLIMIT_MAX; i++) {
3142
3143                         if (!context->rlimit[i])
3144                                 continue;
3145
3146                         r = setrlimit_closest(i, context->rlimit[i]);
3147                         if (r < 0) {
3148                                 *exit_status = EXIT_LIMITS;
3149                                 return log_unit_error_errno(unit, r, "Failed to adjust resource limit %s: %m", rlimit_to_string(i));
3150                         }
3151                 }
3152
3153                 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
3154                 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3155                         if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3156                                 *exit_status = EXIT_LIMITS;
3157                                 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
3158                         }
3159                 }
3160
3161                 bset = context->capability_bounding_set;
3162                 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3163                  * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3164                  * instead of us doing that */
3165                 if (needs_ambient_hack)
3166                         bset |= (UINT64_C(1) << CAP_SETPCAP) |
3167                                 (UINT64_C(1) << CAP_SETUID) |
3168                                 (UINT64_C(1) << CAP_SETGID);
3169
3170                 if (!cap_test_all(bset)) {
3171                         r = capability_bounding_set_drop(bset, false);
3172                         if (r < 0) {
3173                                 *exit_status = EXIT_CAPABILITIES;
3174                                 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3175                         }
3176                 }
3177
3178                 /* This is done before enforce_user, but ambient set
3179                  * does not survive over setresuid() if keep_caps is not set. */
3180                 if (!needs_ambient_hack &&
3181                     context->capability_ambient_set != 0) {
3182                         r = capability_ambient_set_apply(context->capability_ambient_set, true);
3183                         if (r < 0) {
3184                                 *exit_status = EXIT_CAPABILITIES;
3185                                 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
3186                         }
3187                 }
3188         }
3189
3190         if (needs_setuid) {
3191                 if (context->user) {
3192                         r = enforce_user(context, uid);
3193                         if (r < 0) {
3194                                 *exit_status = EXIT_USER;
3195                                 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
3196                         }
3197
3198                         if (!needs_ambient_hack &&
3199                             context->capability_ambient_set != 0) {
3200
3201                                 /* Fix the ambient capabilities after user change. */
3202                                 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3203                                 if (r < 0) {
3204                                         *exit_status = EXIT_CAPABILITIES;
3205                                         return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
3206                                 }
3207
3208                                 /* If we were asked to change user and ambient capabilities
3209                                  * were requested, we had to add keep-caps to the securebits
3210                                  * so that we would maintain the inherited capability set
3211                                  * through the setresuid(). Make sure that the bit is added
3212                                  * also to the context secure_bits so that we don't try to
3213                                  * drop the bit away next. */
3214
3215                                 secure_bits |= 1<<SECURE_KEEP_CAPS;
3216                         }
3217                 }
3218         }
3219
3220         if (needs_sandboxing) {
3221                 /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
3222                  * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3223                  * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3224                  * are restricted. */
3225
3226 #if HAVE_SELINUX
3227                 if (use_selinux) {
3228                         char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3229
3230                         if (exec_context) {
3231                                 r = setexeccon(exec_context);
3232                                 if (r < 0) {
3233                                         *exit_status = EXIT_SELINUX_CONTEXT;
3234                                         return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
3235                                 }
3236                         }
3237                 }
3238 #endif
3239
3240 #if HAVE_SMACK
3241                 if (use_smack) {
3242                         r = setup_smack(context, command);
3243                         if (r < 0) {
3244                                 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3245                                 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3246                         }
3247                 }
3248 #endif
3249
3250 #if HAVE_APPARMOR
3251                 if (use_apparmor && context->apparmor_profile) {
3252                         r = aa_change_onexec(context->apparmor_profile);
3253                         if (r < 0 && !context->apparmor_profile_ignore) {
3254                                 *exit_status = EXIT_APPARMOR_PROFILE;
3255                                 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
3256                         }
3257                 }
3258 #endif
3259
3260                 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3261                  * we'll try not to call PR_SET_SECUREBITS unless necessary. */
3262                 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3263                         if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
3264                                 *exit_status = EXIT_SECUREBITS;
3265                                 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
3266                         }
3267
3268                 if (context_has_no_new_privileges(context))
3269                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
3270                                 *exit_status = EXIT_NO_NEW_PRIVILEGES;
3271                                 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
3272                         }
3273
3274 #if HAVE_SECCOMP
3275                 r = apply_address_families(unit, context);
3276                 if (r < 0) {
3277                         *exit_status = EXIT_ADDRESS_FAMILIES;
3278                         return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
3279                 }
3280
3281                 r = apply_memory_deny_write_execute(unit, context);
3282                 if (r < 0) {
3283                         *exit_status = EXIT_SECCOMP;
3284                         return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
3285                 }
3286
3287                 r = apply_restrict_realtime(unit, context);
3288                 if (r < 0) {
3289                         *exit_status = EXIT_SECCOMP;
3290                         return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
3291                 }
3292
3293                 r = apply_restrict_namespaces(unit, context);
3294                 if (r < 0) {
3295                         *exit_status = EXIT_SECCOMP;
3296                         return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
3297                 }
3298
3299                 r = apply_protect_sysctl(unit, context);
3300                 if (r < 0) {
3301                         *exit_status = EXIT_SECCOMP;
3302                         return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
3303                 }
3304
3305                 r = apply_protect_kernel_modules(unit, context);
3306                 if (r < 0) {
3307                         *exit_status = EXIT_SECCOMP;
3308                         return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
3309                 }
3310
3311                 r = apply_private_devices(unit, context);
3312                 if (r < 0) {
3313                         *exit_status = EXIT_SECCOMP;
3314                         return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
3315                 }
3316
3317                 r = apply_syscall_archs(unit, context);
3318                 if (r < 0) {
3319                         *exit_status = EXIT_SECCOMP;
3320                         return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
3321                 }
3322
3323                 r = apply_lock_personality(unit, context);
3324                 if (r < 0) {
3325                         *exit_status = EXIT_SECCOMP;
3326                         return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
3327                 }
3328
3329                 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3330                  * by the filter as little as possible. */
3331                 r = apply_syscall_filter(unit, context, needs_ambient_hack);
3332                 if (r < 0) {
3333                         *exit_status = EXIT_SECCOMP;
3334                         return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
3335                 }
3336 #endif
3337         }
3338
3339         if (!strv_isempty(context->unset_environment)) {
3340                 char **ee = NULL;
3341
3342                 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3343                 if (!ee) {
3344                         *exit_status = EXIT_MEMORY;
3345                         return log_oom();
3346                 }
3347
3348                 strv_free(accum_env);
3349                 accum_env = ee;
3350         }
3351
3352         final_argv = replace_env_argv(argv, accum_env);
3353         if (!final_argv) {
3354                 *exit_status = EXIT_MEMORY;
3355                 return log_oom();
3356         }
3357
3358         if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
3359                 _cleanup_free_ char *line;
3360
3361                 line = exec_command_line(final_argv);
3362                 if (line) {
3363                         log_struct(LOG_DEBUG,
3364                                    "EXECUTABLE=%s", command->path,
3365                                    LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
3366                                    LOG_UNIT_ID(unit),
3367                                    LOG_UNIT_INVOCATION_ID(unit),
3368                                    NULL);
3369                 }
3370         }
3371
3372         execve(command->path, final_argv, accum_env);
3373
3374         if (errno == ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3375
3376                 log_struct_errno(LOG_INFO, errno,
3377                                  "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3378                                  LOG_UNIT_ID(unit),
3379                                  LOG_UNIT_INVOCATION_ID(unit),
3380                                  LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3381                                                   command->path),
3382                                  "EXECUTABLE=%s", command->path,
3383                                  NULL);
3384
3385                 return 0;
3386         }
3387
3388         *exit_status = EXIT_EXEC;
3389         return log_unit_error_errno(unit, errno, "Failed to execute command: %m");
3390 }
3391
3392 int exec_spawn(Unit *unit,
3393                ExecCommand *command,
3394                const ExecContext *context,
3395                const ExecParameters *params,
3396                ExecRuntime *runtime,
3397                DynamicCreds *dcreds,
3398                pid_t *ret) {
3399
3400         _cleanup_strv_free_ char **files_env = NULL;
3401         int *fds = NULL;
3402         unsigned n_storage_fds = 0, n_socket_fds = 0;
3403         _cleanup_free_ char *line = NULL;
3404         int socket_fd, r;
3405         int named_iofds[3] = { -1, -1, -1 };
3406         char **argv;
3407         pid_t pid;
3408
3409         assert(unit);
3410         assert(command);
3411         assert(context);
3412         assert(ret);
3413         assert(params);
3414         assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
3415
3416         if (context->std_input == EXEC_INPUT_SOCKET ||
3417             context->std_output == EXEC_OUTPUT_SOCKET ||
3418             context->std_error == EXEC_OUTPUT_SOCKET) {
3419
3420                 if (params->n_socket_fds > 1) {
3421                         log_unit_error(unit, "Got more than one socket.");
3422                         return -EINVAL;
3423                 }
3424
3425                 if (params->n_socket_fds == 0) {
3426                         log_unit_error(unit, "Got no socket.");
3427                         return -EINVAL;
3428                 }
3429
3430                 socket_fd = params->fds[0];
3431         } else {
3432                 socket_fd = -1;
3433                 fds = params->fds;
3434                 n_storage_fds = params->n_storage_fds;
3435                 n_socket_fds = params->n_socket_fds;
3436         }
3437
3438         r = exec_context_named_iofds(unit, context, params, named_iofds);
3439         if (r < 0)
3440                 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3441
3442         r = exec_context_load_environment(unit, context, &files_env);
3443         if (r < 0)
3444                 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
3445
3446         argv = params->argv ?: command->argv;
3447         line = exec_command_line(argv);
3448         if (!line)
3449                 return log_oom();
3450
3451         log_struct(LOG_DEBUG,
3452                    LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3453                    "EXECUTABLE=%s", command->path,
3454                    LOG_UNIT_ID(unit),
3455                    LOG_UNIT_INVOCATION_ID(unit),
3456                    NULL);
3457
3458         pid = fork();
3459         if (pid < 0)
3460                 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
3461
3462         if (pid == 0) {
3463                 int exit_status = EXIT_SUCCESS;
3464
3465                 r = exec_child(unit,
3466                                command,
3467                                context,
3468                                params,
3469                                runtime,
3470                                dcreds,
3471                                argv,
3472                                socket_fd,
3473                                named_iofds,
3474                                fds,
3475                                n_storage_fds,
3476                                n_socket_fds,
3477                                files_env,
3478                                unit->manager->user_lookup_fds[1],
3479                                &exit_status);
3480
3481                 if (r < 0) {
3482                         log_struct_errno(LOG_ERR, r,
3483                                          "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3484                                          LOG_UNIT_ID(unit),
3485                                          LOG_UNIT_INVOCATION_ID(unit),
3486                                          LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3487                                                           exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3488                                                           command->path),
3489                                          "EXECUTABLE=%s", command->path,
3490                                          NULL);
3491                 }
3492
3493                 _exit(exit_status);
3494         }
3495
3496         log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
3497
3498         /* We add the new process to the cgroup both in the child (so
3499          * that we can be sure that no user code is ever executed
3500          * outside of the cgroup) and in the parent (so that we can be
3501          * sure that when we kill the cgroup the process will be
3502          * killed too). */
3503         if (params->cgroup_path)
3504                 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
3505
3506         exec_status_start(&command->exec_status, pid);
3507
3508         *ret = pid;
3509         return 0;
3510 }
3511
3512 void exec_context_init(ExecContext *c) {
3513         ExecDirectoryType i;
3514
3515         assert(c);
3516
3517         c->umask = 0022;
3518         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
3519         c->cpu_sched_policy = SCHED_OTHER;
3520         c->syslog_priority = LOG_DAEMON|LOG_INFO;
3521         c->syslog_level_prefix = true;
3522         c->ignore_sigpipe = true;
3523         c->timer_slack_nsec = NSEC_INFINITY;
3524         c->personality = PERSONALITY_INVALID;
3525         for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3526                 c->directories[i].mode = 0755;
3527         c->capability_bounding_set = CAP_ALL;
3528         c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
3529 }
3530
3531 void exec_context_done(ExecContext *c) {
3532         unsigned l;
3533         ExecDirectoryType i;
3534
3535         assert(c);
3536
3537         c->environment = strv_free(c->environment);
3538         c->environment_files = strv_free(c->environment_files);
3539         c->pass_environment = strv_free(c->pass_environment);
3540         c->unset_environment = strv_free(c->unset_environment);
3541
3542         for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
3543                 c->rlimit[l] = mfree(c->rlimit[l]);
3544
3545         for (l = 0; l < 3; l++)
3546                 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
3547
3548         c->working_directory = mfree(c->working_directory);
3549         c->root_directory = mfree(c->root_directory);
3550         c->root_image = mfree(c->root_image);
3551         c->tty_path = mfree(c->tty_path);
3552         c->syslog_identifier = mfree(c->syslog_identifier);
3553         c->user = mfree(c->user);
3554         c->group = mfree(c->group);
3555
3556         c->supplementary_groups = strv_free(c->supplementary_groups);
3557
3558         c->pam_name = mfree(c->pam_name);
3559
3560         c->read_only_paths = strv_free(c->read_only_paths);
3561         c->read_write_paths = strv_free(c->read_write_paths);
3562         c->inaccessible_paths = strv_free(c->inaccessible_paths);
3563
3564         bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
3565
3566         if (c->cpuset)
3567                 CPU_FREE(c->cpuset);
3568
3569         c->utmp_id = mfree(c->utmp_id);
3570         c->selinux_context = mfree(c->selinux_context);
3571         c->apparmor_profile = mfree(c->apparmor_profile);
3572         c->smack_process_label = mfree(c->smack_process_label);
3573
3574         c->syscall_filter = set_free(c->syscall_filter);
3575         c->syscall_archs = set_free(c->syscall_archs);
3576         c->address_families = set_free(c->address_families);
3577
3578         for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3579                 c->directories[i].paths = strv_free(c->directories[i].paths);
3580 }
3581
3582 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
3583         char **i;
3584
3585         assert(c);
3586
3587         if (!runtime_prefix)
3588                 return 0;
3589
3590         STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
3591                 _cleanup_free_ char *p;
3592
3593                 p = strjoin(runtime_prefix, "/", *i);
3594                 if (!p)
3595                         return -ENOMEM;
3596
3597                 /* We execute this synchronously, since we need to be sure this is gone when we start the service
3598                  * next. */
3599                 (void) rm_rf(p, REMOVE_ROOT);
3600
3601                 /* Also destroy any matching subdirectory below /private/. This is done to support DynamicUser=1
3602                  * setups. Note that we don't conditionalize here on that though, as the namespace is same way, and it
3603                  * makes us a bit more robust towards changing unit settings. Or to say this differently: in the worst
3604                  * case this is a NOP. */
3605
3606                 free(p);
3607                 p = strjoin(runtime_prefix, "/private/", *i);
3608                 if (!p)
3609                         return -ENOMEM;
3610
3611                 (void) rm_rf(p, REMOVE_ROOT);
3612         }
3613
3614         return 0;
3615 }
3616
3617 void exec_command_done(ExecCommand *c) {
3618         assert(c);
3619
3620         c->path = mfree(c->path);
3621
3622         c->argv = strv_free(c->argv);
3623 }
3624
3625 void exec_command_done_array(ExecCommand *c, unsigned n) {
3626         unsigned i;
3627
3628         for (i = 0; i < n; i++)
3629                 exec_command_done(c+i);
3630 }
3631
3632 ExecCommand* exec_command_free_list(ExecCommand *c) {
3633         ExecCommand *i;
3634
3635         while ((i = c)) {
3636                 LIST_REMOVE(command, c, i);
3637                 exec_command_done(i);
3638                 free(i);
3639         }
3640
3641         return NULL;
3642 }
3643
3644 void exec_command_free_array(ExecCommand **c, unsigned n) {
3645         unsigned i;
3646
3647         for (i = 0; i < n; i++)
3648                 c[i] = exec_command_free_list(c[i]);
3649 }
3650
3651 typedef struct InvalidEnvInfo {
3652         Unit *unit;
3653         const char *path;
3654 } InvalidEnvInfo;
3655
3656 static void invalid_env(const char *p, void *userdata) {
3657         InvalidEnvInfo *info = userdata;
3658
3659         log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
3660 }
3661
3662 const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3663         assert(c);
3664
3665         switch (fd_index) {
3666         case STDIN_FILENO:
3667                 if (c->std_input != EXEC_INPUT_NAMED_FD)
3668                         return NULL;
3669                 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
3670         case STDOUT_FILENO:
3671                 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3672                         return NULL;
3673                 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
3674         case STDERR_FILENO:
3675                 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3676                         return NULL;
3677                 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
3678         default:
3679                 return NULL;
3680         }
3681 }
3682
3683 int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
3684         unsigned i, targets;
3685         const char* stdio_fdname[3];
3686         unsigned n_fds;
3687
3688         assert(c);
3689         assert(p);
3690
3691         targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3692                   (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3693                   (c->std_error == EXEC_OUTPUT_NAMED_FD);
3694
3695         for (i = 0; i < 3; i++)
3696                 stdio_fdname[i] = exec_context_fdname(c, i);
3697
3698         n_fds = p->n_storage_fds + p->n_socket_fds;
3699
3700         for (i = 0; i < n_fds  && targets > 0; i++)
3701                 if (named_iofds[STDIN_FILENO] < 0 &&
3702                     c->std_input == EXEC_INPUT_NAMED_FD &&
3703                     stdio_fdname[STDIN_FILENO] &&
3704                     streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3705
3706                         named_iofds[STDIN_FILENO] = p->fds[i];
3707                         targets--;
3708
3709                 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3710                            c->std_output == EXEC_OUTPUT_NAMED_FD &&
3711                            stdio_fdname[STDOUT_FILENO] &&
3712                            streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3713
3714                         named_iofds[STDOUT_FILENO] = p->fds[i];
3715                         targets--;
3716
3717                 } else if (named_iofds[STDERR_FILENO] < 0 &&
3718                            c->std_error == EXEC_OUTPUT_NAMED_FD &&
3719                            stdio_fdname[STDERR_FILENO] &&
3720                            streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3721
3722                         named_iofds[STDERR_FILENO] = p->fds[i];
3723                         targets--;
3724                 }
3725
3726         return targets == 0 ? 0 : -ENOENT;
3727 }
3728
3729 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
3730         char **i, **r = NULL;
3731
3732         assert(c);
3733         assert(l);
3734
3735         STRV_FOREACH(i, c->environment_files) {
3736                 char *fn;
3737                 int k;
3738                 unsigned n;
3739                 bool ignore = false;
3740                 char **p;
3741                 _cleanup_globfree_ glob_t pglob = {};
3742
3743                 fn = *i;
3744
3745                 if (fn[0] == '-') {
3746                         ignore = true;
3747                         fn++;
3748                 }
3749
3750                 if (!path_is_absolute(fn)) {
3751                         if (ignore)
3752                                 continue;
3753
3754                         strv_free(r);
3755                         return -EINVAL;
3756                 }
3757
3758                 /* Filename supports globbing, take all matching files */
3759                 k = safe_glob(fn, 0, &pglob);
3760                 if (k < 0) {
3761                         if (ignore)
3762                                 continue;
3763
3764                         strv_free(r);
3765                         return k;
3766                 }
3767
3768                 /* When we don't match anything, -ENOENT should be returned */
3769                 assert(pglob.gl_pathc > 0);
3770
3771                 for (n = 0; n < pglob.gl_pathc; n++) {
3772                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
3773                         if (k < 0) {
3774                                 if (ignore)
3775                                         continue;
3776
3777                                 strv_free(r);
3778                                 return k;
3779                         }
3780                         /* Log invalid environment variables with filename */
3781                         if (p) {
3782                                 InvalidEnvInfo info = {
3783                                         .unit = unit,
3784                                         .path = pglob.gl_pathv[n]
3785                                 };
3786
3787                                 p = strv_env_clean_with_callback(p, invalid_env, &info);
3788                         }
3789
3790                         if (r == NULL)
3791                                 r = p;
3792                         else {
3793                                 char **m;
3794
3795                                 m = strv_env_merge(2, r, p);
3796                                 strv_free(r);
3797                                 strv_free(p);
3798                                 if (!m)
3799                                         return -ENOMEM;
3800
3801                                 r = m;
3802                         }
3803                 }
3804         }
3805
3806         *l = r;
3807
3808         return 0;
3809 }
3810
3811 static bool tty_may_match_dev_console(const char *tty) {
3812         _cleanup_free_ char *active = NULL;
3813         char *console;
3814
3815         if (!tty)
3816                 return true;
3817
3818         tty = skip_dev_prefix(tty);
3819
3820         /* trivial identity? */
3821         if (streq(tty, "console"))
3822                 return true;
3823
3824         console = resolve_dev_console(&active);
3825         /* if we could not resolve, assume it may */
3826         if (!console)
3827                 return true;
3828
3829         /* "tty0" means the active VC, so it may be the same sometimes */
3830         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
3831 }
3832
3833 bool exec_context_may_touch_console(ExecContext *ec) {
3834
3835         return (ec->tty_reset ||
3836                 ec->tty_vhangup ||
3837                 ec->tty_vt_disallocate ||
3838                 is_terminal_input(ec->std_input) ||
3839                 is_terminal_output(ec->std_output) ||
3840                 is_terminal_output(ec->std_error)) &&
3841                tty_may_match_dev_console(exec_context_tty_path(ec));
3842 }
3843
3844 static void strv_fprintf(FILE *f, char **l) {
3845         char **g;
3846
3847         assert(f);
3848
3849         STRV_FOREACH(g, l)
3850                 fprintf(f, " %s", *g);
3851 }
3852
3853 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
3854         char **e, **d;
3855         unsigned i;
3856         ExecDirectoryType dt;
3857         int r;
3858
3859         assert(c);
3860         assert(f);
3861
3862         prefix = strempty(prefix);
3863
3864         fprintf(f,
3865                 "%sUMask: %04o\n"
3866                 "%sWorkingDirectory: %s\n"
3867                 "%sRootDirectory: %s\n"
3868                 "%sNonBlocking: %s\n"
3869                 "%sPrivateTmp: %s\n"
3870                 "%sPrivateDevices: %s\n"
3871                 "%sProtectKernelTunables: %s\n"
3872                 "%sProtectKernelModules: %s\n"
3873                 "%sProtectControlGroups: %s\n"
3874                 "%sPrivateNetwork: %s\n"
3875                 "%sPrivateUsers: %s\n"
3876                 "%sProtectHome: %s\n"
3877                 "%sProtectSystem: %s\n"
3878                 "%sMountAPIVFS: %s\n"
3879                 "%sIgnoreSIGPIPE: %s\n"
3880                 "%sMemoryDenyWriteExecute: %s\n"
3881                 "%sRestrictRealtime: %s\n"
3882                 "%sKeyringMode: %s\n",
3883                 prefix, c->umask,
3884                 prefix, c->working_directory ? c->working_directory : "/",
3885                 prefix, c->root_directory ? c->root_directory : "/",
3886                 prefix, yes_no(c->non_blocking),
3887                 prefix, yes_no(c->private_tmp),
3888                 prefix, yes_no(c->private_devices),
3889                 prefix, yes_no(c->protect_kernel_tunables),
3890                 prefix, yes_no(c->protect_kernel_modules),
3891                 prefix, yes_no(c->protect_control_groups),
3892                 prefix, yes_no(c->private_network),
3893                 prefix, yes_no(c->private_users),
3894                 prefix, protect_home_to_string(c->protect_home),
3895                 prefix, protect_system_to_string(c->protect_system),
3896                 prefix, yes_no(c->mount_apivfs),
3897                 prefix, yes_no(c->ignore_sigpipe),
3898                 prefix, yes_no(c->memory_deny_write_execute),
3899                 prefix, yes_no(c->restrict_realtime),
3900                 prefix, exec_keyring_mode_to_string(c->keyring_mode));
3901
3902         if (c->root_image)
3903                 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
3904
3905         STRV_FOREACH(e, c->environment)
3906                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3907
3908         STRV_FOREACH(e, c->environment_files)
3909                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
3910
3911         STRV_FOREACH(e, c->pass_environment)
3912                 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3913
3914         STRV_FOREACH(e, c->unset_environment)
3915                 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
3916
3917         fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
3918
3919         for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3920                 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
3921
3922                 STRV_FOREACH(d, c->directories[dt].paths)
3923                         fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
3924         }
3925
3926         if (c->nice_set)
3927                 fprintf(f,
3928                         "%sNice: %i\n",
3929                         prefix, c->nice);
3930
3931         if (c->oom_score_adjust_set)
3932                 fprintf(f,
3933                         "%sOOMScoreAdjust: %i\n",
3934                         prefix, c->oom_score_adjust);
3935
3936         for (i = 0; i < RLIM_NLIMITS; i++)
3937                 if (c->rlimit[i]) {
3938                         fprintf(f, "%s%s: " RLIM_FMT "\n",
3939                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
3940                         fprintf(f, "%s%sSoft: " RLIM_FMT "\n",
3941                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3942                 }
3943
3944         if (c->ioprio_set) {
3945                 _cleanup_free_ char *class_str = NULL;
3946
3947                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
3948                 if (r >= 0)
3949                         fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
3950
3951                 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
3952         }
3953
3954         if (c->cpu_sched_set) {
3955                 _cleanup_free_ char *policy_str = NULL;
3956
3957                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
3958                 if (r >= 0)
3959                         fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
3960
3961                 fprintf(f,
3962                         "%sCPUSchedulingPriority: %i\n"
3963                         "%sCPUSchedulingResetOnFork: %s\n",
3964                         prefix, c->cpu_sched_priority,
3965                         prefix, yes_no(c->cpu_sched_reset_on_fork));
3966         }
3967
3968         if (c->cpuset) {
3969                 fprintf(f, "%sCPUAffinity:", prefix);
3970                 for (i = 0; i < c->cpuset_ncpus; i++)
3971                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
3972                                 fprintf(f, " %u", i);
3973                 fputs("\n", f);
3974         }
3975
3976         if (c->timer_slack_nsec != NSEC_INFINITY)
3977                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
3978
3979         fprintf(f,
3980                 "%sStandardInput: %s\n"
3981                 "%sStandardOutput: %s\n"
3982                 "%sStandardError: %s\n",
3983                 prefix, exec_input_to_string(c->std_input),
3984                 prefix, exec_output_to_string(c->std_output),
3985                 prefix, exec_output_to_string(c->std_error));
3986
3987         if (c->tty_path)
3988                 fprintf(f,
3989                         "%sTTYPath: %s\n"
3990                         "%sTTYReset: %s\n"
3991                         "%sTTYVHangup: %s\n"
3992                         "%sTTYVTDisallocate: %s\n",
3993                         prefix, c->tty_path,
3994                         prefix, yes_no(c->tty_reset),
3995                         prefix, yes_no(c->tty_vhangup),
3996                         prefix, yes_no(c->tty_vt_disallocate));
3997
3998         if (IN_SET(c->std_output,
3999                    EXEC_OUTPUT_SYSLOG,
4000                    EXEC_OUTPUT_KMSG,
4001                    EXEC_OUTPUT_JOURNAL,
4002                    EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4003                    EXEC_OUTPUT_KMSG_AND_CONSOLE,
4004                    EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4005             IN_SET(c->std_error,
4006                    EXEC_OUTPUT_SYSLOG,
4007                    EXEC_OUTPUT_KMSG,
4008                    EXEC_OUTPUT_JOURNAL,
4009                    EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4010                    EXEC_OUTPUT_KMSG_AND_CONSOLE,
4011                    EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
4012
4013                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
4014
4015                 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4016                 if (r >= 0)
4017                         fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
4018
4019                 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4020                 if (r >= 0)
4021                         fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
4022         }
4023
4024         if (c->secure_bits) {
4025                 _cleanup_free_ char *str = NULL;
4026
4027                 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4028                 if (r >= 0)
4029                         fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4030         }
4031
4032         if (c->capability_bounding_set != CAP_ALL) {
4033                 _cleanup_free_ char *str = NULL;
4034
4035                 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4036                 if (r >= 0)
4037                         fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
4038         }
4039
4040         if (c->capability_ambient_set != 0) {
4041                 _cleanup_free_ char *str = NULL;
4042
4043                 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4044                 if (r >= 0)
4045                         fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
4046         }
4047
4048         if (c->user)
4049                 fprintf(f, "%sUser: %s\n", prefix, c->user);
4050         if (c->group)
4051                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
4052
4053         fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4054
4055         if (strv_length(c->supplementary_groups) > 0) {
4056                 fprintf(f, "%sSupplementaryGroups:", prefix);
4057                 strv_fprintf(f, c->supplementary_groups);
4058                 fputs("\n", f);
4059         }
4060
4061         if (c->pam_name)
4062                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
4063
4064         if (strv_length(c->read_write_paths) > 0) {
4065                 fprintf(f, "%sReadWritePaths:", prefix);
4066                 strv_fprintf(f, c->read_write_paths);
4067                 fputs("\n", f);
4068         }
4069
4070         if (strv_length(c->read_only_paths) > 0) {
4071                 fprintf(f, "%sReadOnlyPaths:", prefix);
4072                 strv_fprintf(f, c->read_only_paths);
4073                 fputs("\n", f);
4074         }
4075
4076         if (strv_length(c->inaccessible_paths) > 0) {
4077                 fprintf(f, "%sInaccessiblePaths:", prefix);
4078                 strv_fprintf(f, c->inaccessible_paths);
4079                 fputs("\n", f);
4080         }
4081
4082         if (c->n_bind_mounts > 0)
4083                 for (i = 0; i < c->n_bind_mounts; i++) {
4084                         fprintf(f, "%s%s: %s:%s:%s\n", prefix,
4085                                 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4086                                 c->bind_mounts[i].source,
4087                                 c->bind_mounts[i].destination,
4088                                 c->bind_mounts[i].recursive ? "rbind" : "norbind");
4089                 }
4090
4091         if (c->utmp_id)
4092                 fprintf(f,
4093                         "%sUtmpIdentifier: %s\n",
4094                         prefix, c->utmp_id);
4095
4096         if (c->selinux_context)
4097                 fprintf(f,
4098                         "%sSELinuxContext: %s%s\n",
4099                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
4100
4101         if (c->apparmor_profile)
4102                 fprintf(f,
4103                         "%sAppArmorProfile: %s%s\n",
4104                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4105
4106         if (c->smack_process_label)
4107                 fprintf(f,
4108                         "%sSmackProcessLabel: %s%s\n",
4109                         prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4110
4111         if (c->personality != PERSONALITY_INVALID)
4112                 fprintf(f,
4113                         "%sPersonality: %s\n",
4114                         prefix, strna(personality_to_string(c->personality)));
4115
4116         fprintf(f,
4117                 "%sLockPersonality: %s\n",
4118                 prefix, yes_no(c->lock_personality));
4119
4120         if (c->syscall_filter) {
4121 #if HAVE_SECCOMP
4122                 Iterator j;
4123                 void *id;
4124                 bool first = true;
4125 #endif
4126
4127                 fprintf(f,
4128                         "%sSystemCallFilter: ",
4129                         prefix);
4130
4131                 if (!c->syscall_whitelist)
4132                         fputc('~', f);
4133
4134 #if HAVE_SECCOMP
4135                 SET_FOREACH(id, c->syscall_filter, j) {
4136                         _cleanup_free_ char *name = NULL;
4137
4138                         if (first)
4139                                 first = false;
4140                         else
4141                                 fputc(' ', f);
4142
4143                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
4144                         fputs(strna(name), f);
4145                 }
4146 #endif
4147
4148                 fputc('\n', f);
4149         }
4150
4151         if (c->syscall_archs) {
4152 #if HAVE_SECCOMP
4153                 Iterator j;
4154                 void *id;
4155 #endif
4156
4157                 fprintf(f,
4158                         "%sSystemCallArchitectures:",
4159                         prefix);
4160
4161 #if HAVE_SECCOMP
4162                 SET_FOREACH(id, c->syscall_archs, j)
4163                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4164 #endif
4165                 fputc('\n', f);
4166         }
4167
4168         if (exec_context_restrict_namespaces_set(c)) {
4169                 _cleanup_free_ char *s = NULL;
4170
4171                 r = namespace_flag_to_string_many(c->restrict_namespaces, &s);
4172                 if (r >= 0)
4173                         fprintf(f, "%sRestrictNamespaces: %s\n",
4174                                 prefix, s);
4175         }
4176
4177         if (c->syscall_errno > 0)
4178                 fprintf(f,
4179                         "%sSystemCallErrorNumber: %s\n",
4180                         prefix, strna(errno_to_name(c->syscall_errno)));
4181
4182         if (c->apparmor_profile)
4183                 fprintf(f,
4184                         "%sAppArmorProfile: %s%s\n",
4185                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4186 }
4187
4188 bool exec_context_maintains_privileges(ExecContext *c) {
4189         assert(c);
4190
4191         /* Returns true if the process forked off would run under
4192          * an unchanged UID or as root. */
4193
4194         if (!c->user)
4195                 return true;
4196
4197         if (streq(c->user, "root") || streq(c->user, "0"))
4198                 return true;
4199
4200         return false;
4201 }
4202
4203 int exec_context_get_effective_ioprio(ExecContext *c) {
4204         int p;
4205
4206         assert(c);
4207
4208         if (c->ioprio_set)
4209                 return c->ioprio;
4210
4211         p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4212         if (p < 0)
4213                 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4214
4215         return p;
4216 }
4217
4218 void exec_status_start(ExecStatus *s, pid_t pid) {
4219         assert(s);
4220
4221         zero(*s);
4222         s->pid = pid;
4223         dual_timestamp_get(&s->start_timestamp);
4224 }
4225
4226 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
4227         assert(s);
4228
4229         if (s->pid && s->pid != pid)
4230                 zero(*s);
4231
4232         s->pid = pid;
4233         dual_timestamp_get(&s->exit_timestamp);
4234
4235         s->code = code;
4236         s->status = status;
4237
4238         if (context) {
4239                 if (context->utmp_id)
4240                         utmp_put_dead_process(context->utmp_id, pid, code, status);
4241
4242                 exec_context_tty_reset(context, NULL);
4243         }
4244 }
4245
4246 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
4247         char buf[FORMAT_TIMESTAMP_MAX];
4248
4249         assert(s);
4250         assert(f);
4251
4252         if (s->pid <= 0)
4253                 return;
4254
4255         prefix = strempty(prefix);
4256
4257         fprintf(f,
4258                 "%sPID: "PID_FMT"\n",
4259                 prefix, s->pid);
4260
4261         if (dual_timestamp_is_set(&s->start_timestamp))
4262                 fprintf(f,
4263                         "%sStart Timestamp: %s\n",
4264                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
4265
4266         if (dual_timestamp_is_set(&s->exit_timestamp))
4267                 fprintf(f,
4268                         "%sExit Timestamp: %s\n"
4269                         "%sExit Code: %s\n"
4270                         "%sExit Status: %i\n",
4271                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
4272                         prefix, sigchld_code_to_string(s->code),
4273                         prefix, s->status);
4274 }
4275
4276 char *exec_command_line(char **argv) {
4277         size_t k;
4278         char *n, *p, **a;
4279         bool first = true;
4280
4281         assert(argv);
4282
4283         k = 1;
4284         STRV_FOREACH(a, argv)
4285                 k += strlen(*a)+3;
4286
4287         n = new(char, k);
4288         if (!n)
4289                 return NULL;
4290
4291         p = n;
4292         STRV_FOREACH(a, argv) {
4293
4294                 if (!first)
4295                         *(p++) = ' ';
4296                 else
4297                         first = false;
4298
4299                 if (strpbrk(*a, WHITESPACE)) {
4300                         *(p++) = '\'';
4301                         p = stpcpy(p, *a);
4302                         *(p++) = '\'';
4303                 } else
4304                         p = stpcpy(p, *a);
4305
4306         }
4307
4308         *p = 0;
4309
4310         /* FIXME: this doesn't really handle arguments that have
4311          * spaces and ticks in them */
4312
4313         return n;
4314 }
4315
4316 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
4317         _cleanup_free_ char *cmd = NULL;
4318         const char *prefix2;
4319
4320         assert(c);
4321         assert(f);
4322
4323         prefix = strempty(prefix);
4324         prefix2 = strjoina(prefix, "\t");
4325
4326         cmd = exec_command_line(c->argv);
4327         fprintf(f,
4328                 "%sCommand Line: %s\n",
4329                 prefix, cmd ? cmd : strerror(ENOMEM));
4330
4331         exec_status_dump(&c->exec_status, f, prefix2);
4332 }
4333
4334 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4335         assert(f);
4336
4337         prefix = strempty(prefix);
4338
4339         LIST_FOREACH(command, c, c)
4340                 exec_command_dump(c, f, prefix);
4341 }
4342
4343 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4344         ExecCommand *end;
4345
4346         assert(l);
4347         assert(e);
4348
4349         if (*l) {
4350                 /* It's kind of important, that we keep the order here */
4351                 LIST_FIND_TAIL(command, *l, end);
4352                 LIST_INSERT_AFTER(command, *l, end, e);
4353         } else
4354               *l = e;
4355 }
4356
4357 int exec_command_set(ExecCommand *c, const char *path, ...) {
4358         va_list ap;
4359         char **l, *p;
4360
4361         assert(c);
4362         assert(path);
4363
4364         va_start(ap, path);
4365         l = strv_new_ap(path, ap);
4366         va_end(ap);
4367
4368         if (!l)
4369                 return -ENOMEM;
4370
4371         p = strdup(path);
4372         if (!p) {
4373                 strv_free(l);
4374                 return -ENOMEM;
4375         }
4376
4377         free(c->path);
4378         c->path = p;
4379
4380         strv_free(c->argv);
4381         c->argv = l;
4382
4383         return 0;
4384 }
4385
4386 int exec_command_append(ExecCommand *c, const char *path, ...) {
4387         _cleanup_strv_free_ char **l = NULL;
4388         va_list ap;
4389         int r;
4390
4391         assert(c);
4392         assert(path);
4393
4394         va_start(ap, path);
4395         l = strv_new_ap(path, ap);
4396         va_end(ap);
4397
4398         if (!l)
4399                 return -ENOMEM;
4400
4401         r = strv_extend_strv(&c->argv, l, false);
4402         if (r < 0)
4403                 return r;
4404
4405         return 0;
4406 }
4407
4408
4409 static int exec_runtime_allocate(ExecRuntime **rt) {
4410
4411         if (*rt)
4412                 return 0;
4413
4414         *rt = new0(ExecRuntime, 1);
4415         if (!*rt)
4416                 return -ENOMEM;
4417
4418         (*rt)->n_ref = 1;
4419         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
4420
4421         return 0;
4422 }
4423
4424 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
4425         int r;
4426
4427         assert(rt);
4428         assert(c);
4429         assert(id);
4430
4431         if (*rt)
4432                 return 1;
4433
4434         if (!c->private_network && !c->private_tmp)
4435                 return 0;
4436
4437         r = exec_runtime_allocate(rt);
4438         if (r < 0)
4439                 return r;
4440
4441         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
4442                 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
4443                         return -errno;
4444         }
4445
4446         if (c->private_tmp && !(*rt)->tmp_dir) {
4447                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
4448                 if (r < 0)
4449                         return r;
4450         }
4451
4452         return 1;
4453 }
4454
4455 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
4456         assert(r);
4457         assert(r->n_ref > 0);
4458
4459         r->n_ref++;
4460         return r;
4461 }
4462
4463 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
4464
4465         if (!r)
4466                 return NULL;
4467
4468         assert(r->n_ref > 0);
4469
4470         r->n_ref--;
4471         if (r->n_ref > 0)
4472                 return NULL;
4473
4474         free(r->tmp_dir);
4475         free(r->var_tmp_dir);
4476         safe_close_pair(r->netns_storage_socket);
4477         return mfree(r);
4478 }
4479
4480 int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
4481         assert(u);
4482         assert(f);
4483         assert(fds);
4484
4485         if (!rt)
4486                 return 0;
4487
4488         if (rt->tmp_dir)
4489                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
4490
4491         if (rt->var_tmp_dir)
4492                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
4493
4494         if (rt->netns_storage_socket[0] >= 0) {
4495                 int copy;
4496
4497                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4498                 if (copy < 0)
4499                         return copy;
4500
4501                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
4502         }
4503
4504         if (rt->netns_storage_socket[1] >= 0) {
4505                 int copy;
4506
4507                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4508                 if (copy < 0)
4509                         return copy;
4510
4511                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
4512         }
4513
4514         return 0;
4515 }
4516
4517 int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
4518         int r;
4519
4520         assert(rt);
4521         assert(key);
4522         assert(value);
4523
4524         if (streq(key, "tmp-dir")) {
4525                 char *copy;
4526
4527                 r = exec_runtime_allocate(rt);
4528                 if (r < 0)
4529                         return log_oom();
4530
4531                 copy = strdup(value);
4532                 if (!copy)
4533                         return log_oom();
4534
4535                 free((*rt)->tmp_dir);
4536                 (*rt)->tmp_dir = copy;
4537
4538         } else if (streq(key, "var-tmp-dir")) {
4539                 char *copy;
4540
4541                 r = exec_runtime_allocate(rt);
4542                 if (r < 0)
4543                         return log_oom();
4544
4545                 copy = strdup(value);
4546                 if (!copy)
4547                         return log_oom();
4548
4549                 free((*rt)->var_tmp_dir);
4550                 (*rt)->var_tmp_dir = copy;
4551
4552         } else if (streq(key, "netns-socket-0")) {
4553                 int fd;
4554
4555                 r = exec_runtime_allocate(rt);
4556                 if (r < 0)
4557                         return log_oom();
4558
4559                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
4560                         log_unit_debug(u, "Failed to parse netns socket value: %s", value);
4561                 else {
4562                         safe_close((*rt)->netns_storage_socket[0]);
4563                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
4564                 }
4565         } else if (streq(key, "netns-socket-1")) {
4566                 int fd;
4567
4568                 r = exec_runtime_allocate(rt);
4569                 if (r < 0)
4570                         return log_oom();
4571
4572                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
4573                         log_unit_debug(u, "Failed to parse netns socket value: %s", value);
4574                 else {
4575                         safe_close((*rt)->netns_storage_socket[1]);
4576                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
4577                 }
4578         } else
4579                 return 0;
4580
4581         return 1;
4582 }
4583
4584 static void *remove_tmpdir_thread(void *p) {
4585         _cleanup_free_ char *path = p;
4586
4587         (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
4588         return NULL;
4589 }
4590
4591 void exec_runtime_destroy(ExecRuntime *rt) {
4592         int r;
4593
4594         if (!rt)
4595                 return;
4596
4597         /* If there are multiple users of this, let's leave the stuff around */
4598         if (rt->n_ref > 1)
4599                 return;
4600
4601         if (rt->tmp_dir) {
4602                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
4603
4604                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4605                 if (r < 0) {
4606                         log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
4607                         free(rt->tmp_dir);
4608                 }
4609
4610                 rt->tmp_dir = NULL;
4611         }
4612
4613         if (rt->var_tmp_dir) {
4614                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
4615
4616                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4617                 if (r < 0) {
4618                         log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
4619                         free(rt->var_tmp_dir);
4620                 }
4621
4622                 rt->var_tmp_dir = NULL;
4623         }
4624
4625         safe_close_pair(rt->netns_storage_socket);
4626 }
4627
4628 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
4629         [EXEC_INPUT_NULL] = "null",
4630         [EXEC_INPUT_TTY] = "tty",
4631         [EXEC_INPUT_TTY_FORCE] = "tty-force",
4632         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
4633         [EXEC_INPUT_SOCKET] = "socket",
4634         [EXEC_INPUT_NAMED_FD] = "fd",
4635 };
4636
4637 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4638
4639 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
4640         [EXEC_OUTPUT_INHERIT] = "inherit",
4641         [EXEC_OUTPUT_NULL] = "null",
4642         [EXEC_OUTPUT_TTY] = "tty",
4643         [EXEC_OUTPUT_SYSLOG] = "syslog",
4644         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
4645         [EXEC_OUTPUT_KMSG] = "kmsg",
4646         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
4647         [EXEC_OUTPUT_JOURNAL] = "journal",
4648         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
4649         [EXEC_OUTPUT_SOCKET] = "socket",
4650         [EXEC_OUTPUT_NAMED_FD] = "fd",
4651 };
4652
4653 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
4654
4655 static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4656         [EXEC_UTMP_INIT] = "init",
4657         [EXEC_UTMP_LOGIN] = "login",
4658         [EXEC_UTMP_USER] = "user",
4659 };
4660
4661 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
4662
4663 static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
4664         [EXEC_PRESERVE_NO] = "no",
4665         [EXEC_PRESERVE_YES] = "yes",
4666         [EXEC_PRESERVE_RESTART] = "restart",
4667 };
4668
4669 DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
4670
4671 static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
4672         [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
4673         [EXEC_DIRECTORY_STATE] = "StateDirectory",
4674         [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
4675         [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
4676         [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
4677 };
4678
4679 DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
4680
4681 static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
4682         [EXEC_KEYRING_INHERIT] = "inherit",
4683         [EXEC_KEYRING_PRIVATE] = "private",
4684         [EXEC_KEYRING_SHARED] = "shared",
4685 };
4686
4687 DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);