src/core/execute.c

   1 /***
   2   This file is part of systemd.
   3
   4   Copyright 2010 Lennart Poettering
   5
   6   systemd is free software; you can redistribute it and/or modify it
   7   under the terms of the GNU Lesser General Public License as published by
   8   the Free Software Foundation; either version 2.1 of the License, or
   9   (at your option) any later version.
  10
  11   systemd is distributed in the hope that it will be useful, but
  12   WITHOUT ANY WARRANTY; without even the implied warranty of
  13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14   Lesser General Public License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public License
  17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  18 ***/
  19
  20 #include <errno.h>
  21 #include <fcntl.h>
  22 #include <glob.h>
  23 #include <grp.h>
  24 #include <poll.h>
  25 #include <signal.h>
  26 #include <string.h>
  27 #include <sys/capability.h>
  28 #include <sys/eventfd.h>
  29 #include <sys/mman.h>
  30 #include <sys/personality.h>
  31 #include <sys/prctl.h>
  32 #include <sys/shm.h>
  33 #include <sys/socket.h>
  34 #include <sys/stat.h>
  35 #include <sys/types.h>
  36 #include <sys/un.h>
  37 #include <unistd.h>
  38 #include <utmpx.h>
  39
  40 #if HAVE_PAM
  41 #include <security/pam_appl.h>
  42 #endif
  43
  44 #if HAVE_SELINUX
  45 #include <selinux/selinux.h>
  46 #endif
  47
  48 #if HAVE_SECCOMP
  49 #include <seccomp.h>
  50 #endif
  51
  52 #if HAVE_APPARMOR
  53 #include <sys/apparmor.h>
  54 #endif
  55
  56 #include "sd-messages.h"
  57
  58 #include "af-list.h"
  59 #include "alloc-util.h"
  60 #if HAVE_APPARMOR
  61 #include "apparmor-util.h"
  62 #endif
  63 #include "async.h"
  64 #include "barrier.h"
  65 #include "cap-list.h"
  66 #include "capability-util.h"
  67 #include "chown-recursive.h"
  68 #include "def.h"
  69 #include "env-util.h"
  70 #include "errno-list.h"
  71 #include "execute.h"
  72 #include "exit-status.h"
  73 #include "fd-util.h"
  74 #include "fileio.h"
  75 #include "format-util.h"
  76 #include "fs-util.h"
  77 #include "glob-util.h"
  78 #include "io-util.h"
  79 #include "ioprio.h"
  80 #include "label.h"
  81 #include "log.h"
  82 #include "macro.h"
  83 #include "missing.h"
  84 #include "mkdir.h"
  85 #include "namespace.h"
  86 #include "parse-util.h"
  87 #include "path-util.h"
  88 #include "process-util.h"
  89 #include "rlimit-util.h"
  90 #include "rm-rf.h"
  91 #if HAVE_SECCOMP
  92 #include "seccomp-util.h"
  93 #endif
  94 #include "securebits.h"
  95 #include "securebits-util.h"
  96 #include "selinux-util.h"
  97 #include "signal-util.h"
  98 #include "smack-util.h"
  99 #include "special.h"
 100 #include "string-table.h"
 101 #include "string-util.h"
 102 #include "strv.h"
 103 #include "syslog-util.h"
 104 #include "terminal-util.h"
 105 #include "unit.h"
 106 #include "user-util.h"
 107 #include "util.h"
 108 #include "utmp-wtmp.h"
 109
 110 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
 111 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
 112
 113 /* This assumes there is a 'tty' group */
 114 #define TTY_MODE 0620
 115
 116 #define SNDBUF_SIZE (8*1024*1024)
 117
 118 static int shift_fds(int fds[], unsigned n_fds) {
 119         int start, restart_from;
 120
 121         if (n_fds <= 0)
 122                 return 0;
 123
 124         /* Modifies the fds array! (sorts it) */
 125
 126         assert(fds);
 127
 128         start = 0;
 129         for (;;) {
 130                 int i;
 131
 132                 restart_from = -1;
 133
 134                 for (i = start; i < (int) n_fds; i++) {
 135                         int nfd;
 136
 137                         /* Already at right index? */
 138                         if (fds[i] == i+3)
 139                                 continue;
 140
 141                         nfd = fcntl(fds[i], F_DUPFD, i + 3);
 142                         if (nfd < 0)
 143                                 return -errno;
 144
 145                         safe_close(fds[i]);
 146                         fds[i] = nfd;
 147
 148                         /* Hmm, the fd we wanted isn't free? Then
 149                          * let's remember that and try again from here */
 150                         if (nfd != i+3 && restart_from < 0)
 151                                 restart_from = i;
 152                 }
 153
 154                 if (restart_from < 0)
 155                         break;
 156
 157                 start = restart_from;
 158         }
 159
 160         return 0;
 161 }
 162
 163 static int flags_fds(const int fds[], unsigned n_storage_fds, unsigned n_socket_fds, bool nonblock) {
 164         unsigned i, n_fds;
 165         int r;
 166
 167         n_fds = n_storage_fds + n_socket_fds;
 168         if (n_fds <= 0)
 169                 return 0;
 170
 171         assert(fds);
 172
 173         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
 174          * O_NONBLOCK only applies to socket activation though. */
 175
 176         for (i = 0; i < n_fds; i++) {
 177
 178                 if (i < n_socket_fds) {
 179                         r = fd_nonblock(fds[i], nonblock);
 180                         if (r < 0)
 181                                 return r;
 182                 }
 183
 184                 /* We unconditionally drop FD_CLOEXEC from the fds,
 185                  * since after all we want to pass these fds to our
 186                  * children */
 187
 188                 r = fd_cloexec(fds[i], false);
 189                 if (r < 0)
 190                         return r;
 191         }
 192
 193         return 0;
 194 }
 195
 196 static const char *exec_context_tty_path(const ExecContext *context) {
 197         assert(context);
 198
 199         if (context->stdio_as_fds)
 200                 return NULL;
 201
 202         if (context->tty_path)
 203                 return context->tty_path;
 204
 205         return "/dev/console";
 206 }
 207
 208 static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
 209         const char *path;
 210
 211         assert(context);
 212
 213         path = exec_context_tty_path(context);
 214
 215         if (context->tty_vhangup) {
 216                 if (p && p->stdin_fd >= 0)
 217                         (void) terminal_vhangup_fd(p->stdin_fd);
 218                 else if (path)
 219                         (void) terminal_vhangup(path);
 220         }
 221
 222         if (context->tty_reset) {
 223                 if (p && p->stdin_fd >= 0)
 224                         (void) reset_terminal_fd(p->stdin_fd, true);
 225                 else if (path)
 226                         (void) reset_terminal(path);
 227         }
 228
 229         if (context->tty_vt_disallocate && path)
 230                 (void) vt_disallocate(path);
 231 }
 232
 233 static bool is_terminal_input(ExecInput i) {
 234         return IN_SET(i,
 235                       EXEC_INPUT_TTY,
 236                       EXEC_INPUT_TTY_FORCE,
 237                       EXEC_INPUT_TTY_FAIL);
 238 }
 239
 240 static bool is_terminal_output(ExecOutput o) {
 241         return IN_SET(o,
 242                       EXEC_OUTPUT_TTY,
 243                       EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
 244                       EXEC_OUTPUT_KMSG_AND_CONSOLE,
 245                       EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
 246 }
 247
 248 static bool is_syslog_output(ExecOutput o) {
 249         return IN_SET(o,
 250                       EXEC_OUTPUT_SYSLOG,
 251                       EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
 252 }
 253
 254 static bool is_kmsg_output(ExecOutput o) {
 255         return IN_SET(o,
 256                       EXEC_OUTPUT_KMSG,
 257                       EXEC_OUTPUT_KMSG_AND_CONSOLE);
 258 }
 259
 260 static bool exec_context_needs_term(const ExecContext *c) {
 261         assert(c);
 262
 263         /* Return true if the execution context suggests we should set $TERM to something useful. */
 264
 265         if (is_terminal_input(c->std_input))
 266                 return true;
 267
 268         if (is_terminal_output(c->std_output))
 269                 return true;
 270
 271         if (is_terminal_output(c->std_error))
 272                 return true;
 273
 274         return !!c->tty_path;
 275 }
 276
 277 static int open_null_as(int flags, int nfd) {
 278         int fd;
 279
 280         assert(nfd >= 0);
 281
 282         fd = open("/dev/null", flags|O_NOCTTY);
 283         if (fd < 0)
 284                 return -errno;
 285
 286         return move_fd(fd, nfd, false);
 287 }
 288
 289 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
 290         static const union sockaddr_union sa = {
 291                 .un.sun_family = AF_UNIX,
 292                 .un.sun_path = "/run/systemd/journal/stdout",
 293         };
 294         uid_t olduid = UID_INVALID;
 295         gid_t oldgid = GID_INVALID;
 296         int r;
 297
 298         if (gid_is_valid(gid)) {
 299                 oldgid = getgid();
 300
 301                 if (setegid(gid) < 0)
 302                         return -errno;
 303         }
 304
 305         if (uid_is_valid(uid)) {
 306                 olduid = getuid();
 307
 308                 if (seteuid(uid) < 0) {
 309                         r = -errno;
 310                         goto restore_gid;
 311                 }
 312         }
 313
 314         r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
 315
 316         /* If we fail to restore the uid or gid, things will likely
 317            fail later on. This should only happen if an LSM interferes. */
 318
 319         if (uid_is_valid(uid))
 320                 (void) seteuid(olduid);
 321
 322  restore_gid:
 323         if (gid_is_valid(gid))
 324                 (void) setegid(oldgid);
 325
 326         return r;
 327 }
 328
 329 static int connect_logger_as(
 330                 Unit *unit,
 331                 const ExecContext *context,
 332                 const ExecParameters *params,
 333                 ExecOutput output,
 334                 const char *ident,
 335                 int nfd,
 336                 uid_t uid,
 337                 gid_t gid) {
 338
 339         int fd, r;
 340
 341         assert(context);
 342         assert(params);
 343         assert(output < _EXEC_OUTPUT_MAX);
 344         assert(ident);
 345         assert(nfd >= 0);
 346
 347         fd = socket(AF_UNIX, SOCK_STREAM, 0);
 348         if (fd < 0)
 349                 return -errno;
 350
 351         r = connect_journal_socket(fd, uid, gid);
 352         if (r < 0)
 353                 return r;
 354
 355         if (shutdown(fd, SHUT_RD) < 0) {
 356                 safe_close(fd);
 357                 return -errno;
 358         }
 359
 360         (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
 361
 362         dprintf(fd,
 363                 "%s\n"
 364                 "%s\n"
 365                 "%i\n"
 366                 "%i\n"
 367                 "%i\n"
 368                 "%i\n"
 369                 "%i\n",
 370                 context->syslog_identifier ?: ident,
 371                 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
 372                 context->syslog_priority,
 373                 !!context->syslog_level_prefix,
 374                 is_syslog_output(output),
 375                 is_kmsg_output(output),
 376                 is_terminal_output(output));
 377
 378         return move_fd(fd, nfd, false);
 379 }
 380 static int open_terminal_as(const char *path, int flags, int nfd) {
 381         int fd;
 382
 383         assert(path);
 384         assert(nfd >= 0);
 385
 386         fd = open_terminal(path, flags | O_NOCTTY);
 387         if (fd < 0)
 388                 return fd;
 389
 390         return move_fd(fd, nfd, false);
 391 }
 392
 393 static int fixup_input(
 394                 const ExecContext *context,
 395                 int socket_fd,
 396                 bool apply_tty_stdin) {
 397
 398         ExecInput std_input;
 399
 400         assert(context);
 401
 402         std_input = context->std_input;
 403
 404         if (is_terminal_input(std_input) && !apply_tty_stdin)
 405                 return EXEC_INPUT_NULL;
 406
 407         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
 408                 return EXEC_INPUT_NULL;
 409
 410         if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
 411                 return EXEC_INPUT_NULL;
 412
 413         return std_input;
 414 }
 415
 416 static int fixup_output(ExecOutput std_output, int socket_fd) {
 417
 418         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
 419                 return EXEC_OUTPUT_INHERIT;
 420
 421         return std_output;
 422 }
 423
 424 static int setup_input(
 425                 const ExecContext *context,
 426                 const ExecParameters *params,
 427                 int socket_fd,
 428                 int named_iofds[3]) {
 429
 430         ExecInput i;
 431
 432         assert(context);
 433         assert(params);
 434
 435         if (params->stdin_fd >= 0) {
 436                 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
 437                         return -errno;
 438
 439                 /* Try to make this the controlling tty, if it is a tty, and reset it */
 440                 if (isatty(STDIN_FILENO)) {
 441                         (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
 442                         (void) reset_terminal_fd(STDIN_FILENO, true);
 443                 }
 444
 445                 return STDIN_FILENO;
 446         }
 447
 448         i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
 449
 450         switch (i) {
 451
 452         case EXEC_INPUT_NULL:
 453                 return open_null_as(O_RDONLY, STDIN_FILENO);
 454
 455         case EXEC_INPUT_TTY:
 456         case EXEC_INPUT_TTY_FORCE:
 457         case EXEC_INPUT_TTY_FAIL: {
 458                 int fd;
 459
 460                 fd = acquire_terminal(exec_context_tty_path(context),
 461                                       i == EXEC_INPUT_TTY_FAIL,
 462                                       i == EXEC_INPUT_TTY_FORCE,
 463                                       false,
 464                                       USEC_INFINITY);
 465                 if (fd < 0)
 466                         return fd;
 467
 468                 return move_fd(fd, STDIN_FILENO, false);
 469         }
 470
 471         case EXEC_INPUT_SOCKET:
 472                 assert(socket_fd >= 0);
 473
 474                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
 475
 476         case EXEC_INPUT_NAMED_FD:
 477                 assert(named_iofds[STDIN_FILENO] >= 0);
 478
 479                 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
 480                 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
 481
 482         case EXEC_INPUT_DATA: {
 483                 int fd;
 484
 485                 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
 486                 if (fd < 0)
 487                         return fd;
 488
 489                 return move_fd(fd, STDIN_FILENO, false);
 490         }
 491
 492         default:
 493                 assert_not_reached("Unknown input type");
 494         }
 495 }
 496
 497 static int setup_output(
 498                 Unit *unit,
 499                 const ExecContext *context,
 500                 const ExecParameters *params,
 501                 int fileno,
 502                 int socket_fd,
 503                 int named_iofds[3],
 504                 const char *ident,
 505                 uid_t uid,
 506                 gid_t gid,
 507                 dev_t *journal_stream_dev,
 508                 ino_t *journal_stream_ino) {
 509
 510         ExecOutput o;
 511         ExecInput i;
 512         int r;
 513
 514         assert(unit);
 515         assert(context);
 516         assert(params);
 517         assert(ident);
 518         assert(journal_stream_dev);
 519         assert(journal_stream_ino);
 520
 521         if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
 522
 523                 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
 524                         return -errno;
 525
 526                 return STDOUT_FILENO;
 527         }
 528
 529         if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
 530                 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
 531                         return -errno;
 532
 533                 return STDERR_FILENO;
 534         }
 535
 536         i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
 537         o = fixup_output(context->std_output, socket_fd);
 538
 539         if (fileno == STDERR_FILENO) {
 540                 ExecOutput e;
 541                 e = fixup_output(context->std_error, socket_fd);
 542
 543                 /* This expects the input and output are already set up */
 544
 545                 /* Don't change the stderr file descriptor if we inherit all
 546                  * the way and are not on a tty */
 547                 if (e == EXEC_OUTPUT_INHERIT &&
 548                     o == EXEC_OUTPUT_INHERIT &&
 549                     i == EXEC_INPUT_NULL &&
 550                     !is_terminal_input(context->std_input) &&
 551                     getppid () != 1)
 552                         return fileno;
 553
 554                 /* Duplicate from stdout if possible */
 555                 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
 556                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
 557
 558                 o = e;
 559
 560         } else if (o == EXEC_OUTPUT_INHERIT) {
 561                 /* If input got downgraded, inherit the original value */
 562                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
 563                         return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
 564
 565                 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
 566                 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
 567                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
 568
 569                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
 570                 if (getppid() != 1)
 571                         return fileno;
 572
 573                 /* We need to open /dev/null here anew, to get the right access mode. */
 574                 return open_null_as(O_WRONLY, fileno);
 575         }
 576
 577         switch (o) {
 578
 579         case EXEC_OUTPUT_NULL:
 580                 return open_null_as(O_WRONLY, fileno);
 581
 582         case EXEC_OUTPUT_TTY:
 583                 if (is_terminal_input(i))
 584                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
 585
 586                 /* We don't reset the terminal if this is just about output */
 587                 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
 588
 589         case EXEC_OUTPUT_SYSLOG:
 590         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
 591         case EXEC_OUTPUT_KMSG:
 592         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
 593         case EXEC_OUTPUT_JOURNAL:
 594         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
 595                 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
 596                 if (r < 0) {
 597                         log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
 598                         r = open_null_as(O_WRONLY, fileno);
 599                 } else {
 600                         struct stat st;
 601
 602                         /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
 603                          * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
 604                          * services to detect whether they are connected to the journal or not.
 605                          *
 606                          * If both stdout and stderr are connected to a stream then let's make sure to store the data
 607                          * about STDERR as that's usually the best way to do logging. */
 608
 609                         if (fstat(fileno, &st) >= 0 &&
 610                             (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
 611                                 *journal_stream_dev = st.st_dev;
 612                                 *journal_stream_ino = st.st_ino;
 613                         }
 614                 }
 615                 return r;
 616
 617         case EXEC_OUTPUT_SOCKET:
 618                 assert(socket_fd >= 0);
 619
 620                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
 621
 622         case EXEC_OUTPUT_NAMED_FD:
 623                 assert(named_iofds[fileno] >= 0);
 624
 625                 (void) fd_nonblock(named_iofds[fileno], false);
 626                 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
 627
 628         default:
 629                 assert_not_reached("Unknown error type");
 630         }
 631 }
 632
 633 static int chown_terminal(int fd, uid_t uid) {
 634         struct stat st;
 635
 636         assert(fd >= 0);
 637
 638         /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
 639         if (isatty(fd) < 1)
 640                 return 0;
 641
 642         /* This might fail. What matters are the results. */
 643         (void) fchown(fd, uid, -1);
 644         (void) fchmod(fd, TTY_MODE);
 645
 646         if (fstat(fd, &st) < 0)
 647                 return -errno;
 648
 649         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
 650                 return -EPERM;
 651
 652         return 0;
 653 }
 654
 655 static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
 656         _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
 657         int r;
 658
 659         assert(_saved_stdin);
 660         assert(_saved_stdout);
 661
 662         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
 663         if (saved_stdin < 0)
 664                 return -errno;
 665
 666         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
 667         if (saved_stdout < 0)
 668                 return -errno;
 669
 670         fd = acquire_terminal(vc, false, false, false, DEFAULT_CONFIRM_USEC);
 671         if (fd < 0)
 672                 return fd;
 673
 674         r = chown_terminal(fd, getuid());
 675         if (r < 0)
 676                 return r;
 677
 678         r = reset_terminal_fd(fd, true);
 679         if (r < 0)
 680                 return r;
 681
 682         if (dup2(fd, STDIN_FILENO) < 0)
 683                 return -errno;
 684
 685         if (dup2(fd, STDOUT_FILENO) < 0)
 686                 return -errno;
 687
 688         if (fd >= 2)
 689                 safe_close(fd);
 690         fd = -1;
 691
 692         *_saved_stdin = saved_stdin;
 693         *_saved_stdout = saved_stdout;
 694
 695         saved_stdin = saved_stdout = -1;
 696
 697         return 0;
 698 }
 699
 700 static void write_confirm_error_fd(int err, int fd, const Unit *u) {
 701         assert(err < 0);
 702
 703         if (err == -ETIMEDOUT)
 704                 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
 705         else {
 706                 errno = -err;
 707                 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
 708         }
 709 }
 710
 711 static void write_confirm_error(int err, const char *vc, const Unit *u) {
 712         _cleanup_close_ int fd = -1;
 713
 714         assert(vc);
 715
 716         fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
 717         if (fd < 0)
 718                 return;
 719
 720         write_confirm_error_fd(err, fd, u);
 721 }
 722
 723 static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
 724         int r = 0;
 725
 726         assert(saved_stdin);
 727         assert(saved_stdout);
 728
 729         release_terminal();
 730
 731         if (*saved_stdin >= 0)
 732                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
 733                         r = -errno;
 734
 735         if (*saved_stdout >= 0)
 736                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
 737                         r = -errno;
 738
 739         *saved_stdin = safe_close(*saved_stdin);
 740         *saved_stdout = safe_close(*saved_stdout);
 741
 742         return r;
 743 }
 744
 745 enum {
 746         CONFIRM_PRETEND_FAILURE = -1,
 747         CONFIRM_PRETEND_SUCCESS =  0,
 748         CONFIRM_EXECUTE = 1,
 749 };
 750
 751 static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
 752         int saved_stdout = -1, saved_stdin = -1, r;
 753         _cleanup_free_ char *e = NULL;
 754         char c;
 755
 756         /* For any internal errors, assume a positive response. */
 757         r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
 758         if (r < 0) {
 759                 write_confirm_error(r, vc, u);
 760                 return CONFIRM_EXECUTE;
 761         }
 762
 763         /* confirm_spawn might have been disabled while we were sleeping. */
 764         if (manager_is_confirm_spawn_disabled(u->manager)) {
 765                 r = 1;
 766                 goto restore_stdio;
 767         }
 768
 769         e = ellipsize(cmdline, 60, 100);
 770         if (!e) {
 771                 log_oom();
 772                 r = CONFIRM_EXECUTE;
 773                 goto restore_stdio;
 774         }
 775
 776         for (;;) {
 777                 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
 778                 if (r < 0) {
 779                         write_confirm_error_fd(r, STDOUT_FILENO, u);
 780                         r = CONFIRM_EXECUTE;
 781                         goto restore_stdio;
 782                 }
 783
 784                 switch (c) {
 785                 case 'c':
 786                         printf("Resuming normal execution.\n");
 787                         manager_disable_confirm_spawn();
 788                         r = 1;
 789                         break;
 790                 case 'D':
 791                         unit_dump(u, stdout, "  ");
 792                         continue; /* ask again */
 793                 case 'f':
 794                         printf("Failing execution.\n");
 795                         r = CONFIRM_PRETEND_FAILURE;
 796                         break;
 797                 case 'h':
 798                         printf("  c - continue, proceed without asking anymore\n"
 799                                "  D - dump, show the state of the unit\n"
 800                                "  f - fail, don't execute the command and pretend it failed\n"
 801                                "  h - help\n"
 802                                "  i - info, show a short summary of the unit\n"
 803                                "  j - jobs, show jobs that are in progress\n"
 804                                "  s - skip, don't execute the command and pretend it succeeded\n"
 805                                "  y - yes, execute the command\n");
 806                         continue; /* ask again */
 807                 case 'i':
 808                         printf("  Description: %s\n"
 809                                "  Unit:        %s\n"
 810                                "  Command:     %s\n",
 811                                u->id, u->description, cmdline);
 812                         continue; /* ask again */
 813                 case 'j':
 814                         manager_dump_jobs(u->manager, stdout, "  ");
 815                         continue; /* ask again */
 816                 case 'n':
 817                         /* 'n' was removed in favor of 'f'. */
 818                         printf("Didn't understand 'n', did you mean 'f'?\n");
 819                         continue; /* ask again */
 820                 case 's':
 821                         printf("Skipping execution.\n");
 822                         r = CONFIRM_PRETEND_SUCCESS;
 823                         break;
 824                 case 'y':
 825                         r = CONFIRM_EXECUTE;
 826                         break;
 827                 default:
 828                         assert_not_reached("Unhandled choice");
 829                 }
 830                 break;
 831         }
 832
 833 restore_stdio:
 834         restore_confirm_stdio(&saved_stdin, &saved_stdout);
 835         return r;
 836 }
 837
 838 static int get_fixed_user(const ExecContext *c, const char **user,
 839                           uid_t *uid, gid_t *gid,
 840                           const char **home, const char **shell) {
 841         int r;
 842         const char *name;
 843
 844         assert(c);
 845
 846         if (!c->user)
 847                 return 0;
 848
 849         /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
 850          * (i.e. are "/" or "/bin/nologin"). */
 851
 852         name = c->user;
 853         r = get_user_creds_clean(&name, uid, gid, home, shell);
 854         if (r < 0)
 855                 return r;
 856
 857         *user = name;
 858         return 0;
 859 }
 860
 861 static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
 862         int r;
 863         const char *name;
 864
 865         assert(c);
 866
 867         if (!c->group)
 868                 return 0;
 869
 870         name = c->group;
 871         r = get_group_creds(&name, gid);
 872         if (r < 0)
 873                 return r;
 874
 875         *group = name;
 876         return 0;
 877 }
 878
 879 static int get_supplementary_groups(const ExecContext *c, const char *user,
 880                                     const char *group, gid_t gid,
 881                                     gid_t **supplementary_gids, int *ngids) {
 882         char **i;
 883         int r, k = 0;
 884         int ngroups_max;
 885         bool keep_groups = false;
 886         gid_t *groups = NULL;
 887         _cleanup_free_ gid_t *l_gids = NULL;
 888
 889         assert(c);
 890
 891         /*
 892          * If user is given, then lookup GID and supplementary groups list.
 893          * We avoid NSS lookups for gid=0. Also we have to initialize groups
 894          * here and as early as possible so we keep the list of supplementary
 895          * groups of the caller.
 896          */
 897         if (user && gid_is_valid(gid) && gid != 0) {
 898                 /* First step, initialize groups from /etc/groups */
 899                 if (initgroups(user, gid) < 0)
 900                         return -errno;
 901
 902                 keep_groups = true;
 903         }
 904
 905         if (strv_isempty(c->supplementary_groups))
 906                 return 0;
 907
 908         /*
 909          * If SupplementaryGroups= was passed then NGROUPS_MAX has to
 910          * be positive, otherwise fail.
 911          */
 912         errno = 0;
 913         ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
 914         if (ngroups_max <= 0) {
 915                 if (errno > 0)
 916                         return -errno;
 917                 else
 918                         return -EOPNOTSUPP; /* For all other values */
 919         }
 920
 921         l_gids = new(gid_t, ngroups_max);
 922         if (!l_gids)
 923                 return -ENOMEM;
 924
 925         if (keep_groups) {
 926                 /*
 927                  * Lookup the list of groups that the user belongs to, we
 928                  * avoid NSS lookups here too for gid=0.
 929                  */
 930                 k = ngroups_max;
 931                 if (getgrouplist(user, gid, l_gids, &k) < 0)
 932                         return -EINVAL;
 933         } else
 934                 k = 0;
 935
 936         STRV_FOREACH(i, c->supplementary_groups) {
 937                 const char *g;
 938
 939                 if (k >= ngroups_max)
 940                         return -E2BIG;
 941
 942                 g = *i;
 943                 r = get_group_creds(&g, l_gids+k);
 944                 if (r < 0)
 945                         return r;
 946
 947                 k++;
 948         }
 949
 950         /*
 951          * Sets ngids to zero to drop all supplementary groups, happens
 952          * when we are under root and SupplementaryGroups= is empty.
 953          */
 954         if (k == 0) {
 955                 *ngids = 0;
 956                 return 0;
 957         }
 958
 959         /* Otherwise get the final list of supplementary groups */
 960         groups = memdup(l_gids, sizeof(gid_t) * k);
 961         if (!groups)
 962                 return -ENOMEM;
 963
 964         *supplementary_gids = groups;
 965         *ngids = k;
 966
 967         groups = NULL;
 968
 969         return 0;
 970 }
 971
 972 static int enforce_groups(gid_t gid, gid_t *supplementary_gids, int ngids) {
 973         int r;
 974
 975         /* Handle SupplementaryGroups= if it is not empty */
 976         if (ngids > 0) {
 977                 r = maybe_setgroups(ngids, supplementary_gids);
 978                 if (r < 0)
 979                         return r;
 980         }
 981
 982         if (gid_is_valid(gid)) {
 983                 /* Then set our gids */
 984                 if (setresgid(gid, gid, gid) < 0)
 985                         return -errno;
 986         }
 987
 988         return 0;
 989 }
 990
 991 static int enforce_user(const ExecContext *context, uid_t uid) {
 992         assert(context);
 993
 994         if (!uid_is_valid(uid))
 995                 return 0;
 996
 997         /* Sets (but doesn't look up) the uid and make sure we keep the
 998          * capabilities while doing so. */
 999
1000         if (context->capability_ambient_set != 0) {
1001
1002                 /* First step: If we need to keep capabilities but
1003                  * drop privileges we need to make sure we keep our
1004                  * caps, while we drop privileges. */
1005                 if (uid != 0) {
1006                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
1007
1008                         if (prctl(PR_GET_SECUREBITS) != sb)
1009                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1010                                         return -errno;
1011                 }
1012         }
1013
1014         /* Second step: actually set the uids */
1015         if (setresuid(uid, uid, uid) < 0)
1016                 return -errno;
1017
1018         /* At this point we should have all necessary capabilities but
1019            are otherwise a normal user. However, the caps might got
1020            corrupted due to the setresuid() so we need clean them up
1021            later. This is done outside of this call. */
1022
1023         return 0;
1024 }
1025
1026 #if HAVE_PAM
1027
1028 static int null_conv(
1029                 int num_msg,
1030                 const struct pam_message **msg,
1031                 struct pam_response **resp,
1032                 void *appdata_ptr) {
1033
1034         /* We don't support conversations */
1035
1036         return PAM_CONV_ERR;
1037 }
1038
1039 #endif
1040
1041 static int setup_pam(
1042                 const char *name,
1043                 const char *user,
1044                 uid_t uid,
1045                 gid_t gid,
1046                 const char *tty,
1047                 char ***env,
1048                 int fds[], unsigned n_fds) {
1049
1050 #if HAVE_PAM
1051
1052         static const struct pam_conv conv = {
1053                 .conv = null_conv,
1054                 .appdata_ptr = NULL
1055         };
1056
1057         _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
1058         pam_handle_t *handle = NULL;
1059         sigset_t old_ss;
1060         int pam_code = PAM_SUCCESS, r;
1061         char **nv, **e = NULL;
1062         bool close_session = false;
1063         pid_t pam_pid = 0, parent_pid;
1064         int flags = 0;
1065
1066         assert(name);
1067         assert(user);
1068         assert(env);
1069
1070         /* We set up PAM in the parent process, then fork. The child
1071          * will then stay around until killed via PR_GET_PDEATHSIG or
1072          * systemd via the cgroup logic. It will then remove the PAM
1073          * session again. The parent process will exec() the actual
1074          * daemon. We do things this way to ensure that the main PID
1075          * of the daemon is the one we initially fork()ed. */
1076
1077         r = barrier_create(&barrier);
1078         if (r < 0)
1079                 goto fail;
1080
1081         if (log_get_max_level() < LOG_DEBUG)
1082                 flags |= PAM_SILENT;
1083
1084         pam_code = pam_start(name, user, &conv, &handle);
1085         if (pam_code != PAM_SUCCESS) {
1086                 handle = NULL;
1087                 goto fail;
1088         }
1089
1090         if (tty) {
1091                 pam_code = pam_set_item(handle, PAM_TTY, tty);
1092                 if (pam_code != PAM_SUCCESS)
1093                         goto fail;
1094         }
1095
1096         STRV_FOREACH(nv, *env) {
1097                 pam_code = pam_putenv(handle, *nv);
1098                 if (pam_code != PAM_SUCCESS)
1099                         goto fail;
1100         }
1101
1102         pam_code = pam_acct_mgmt(handle, flags);
1103         if (pam_code != PAM_SUCCESS)
1104                 goto fail;
1105
1106         pam_code = pam_open_session(handle, flags);
1107         if (pam_code != PAM_SUCCESS)
1108                 goto fail;
1109
1110         close_session = true;
1111
1112         e = pam_getenvlist(handle);
1113         if (!e) {
1114                 pam_code = PAM_BUF_ERR;
1115                 goto fail;
1116         }
1117
1118         /* Block SIGTERM, so that we know that it won't get lost in
1119          * the child */
1120
1121         assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
1122
1123         parent_pid = getpid_cached();
1124
1125         pam_pid = fork();
1126         if (pam_pid < 0) {
1127                 r = -errno;
1128                 goto fail;
1129         }
1130
1131         if (pam_pid == 0) {
1132                 int sig, ret = EXIT_PAM;
1133
1134                 /* The child's job is to reset the PAM session on
1135                  * termination */
1136                 barrier_set_role(&barrier, BARRIER_CHILD);
1137
1138                 /* This string must fit in 10 chars (i.e. the length
1139                  * of "/sbin/init"), to look pretty in /bin/ps */
1140                 rename_process("(sd-pam)");
1141
1142                 /* Make sure we don't keep open the passed fds in this
1143                 child. We assume that otherwise only those fds are
1144                 open here that have been opened by PAM. */
1145                 close_many(fds, n_fds);
1146
1147                 /* Drop privileges - we don't need any to pam_close_session
1148                  * and this will make PR_SET_PDEATHSIG work in most cases.
1149                  * If this fails, ignore the error - but expect sd-pam threads
1150                  * to fail to exit normally */
1151
1152                 r = maybe_setgroups(0, NULL);
1153                 if (r < 0)
1154                         log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
1155                 if (setresgid(gid, gid, gid) < 0)
1156                         log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
1157                 if (setresuid(uid, uid, uid) < 0)
1158                         log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
1159
1160                 (void) ignore_signals(SIGPIPE, -1);
1161
1162                 /* Wait until our parent died. This will only work if
1163                  * the above setresuid() succeeds, otherwise the kernel
1164                  * will not allow unprivileged parents kill their privileged
1165                  * children this way. We rely on the control groups kill logic
1166                  * to do the rest for us. */
1167                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1168                         goto child_finish;
1169
1170                 /* Tell the parent that our setup is done. This is especially
1171                  * important regarding dropping privileges. Otherwise, unit
1172                  * setup might race against our setresuid(2) call.
1173                  *
1174                  * If the parent aborted, we'll detect this below, hence ignore
1175                  * return failure here. */
1176                 (void) barrier_place(&barrier);
1177
1178                 /* Check if our parent process might already have died? */
1179                 if (getppid() == parent_pid) {
1180                         sigset_t ss;
1181
1182                         assert_se(sigemptyset(&ss) >= 0);
1183                         assert_se(sigaddset(&ss, SIGTERM) >= 0);
1184
1185                         for (;;) {
1186                                 if (sigwait(&ss, &sig) < 0) {
1187                                         if (errno == EINTR)
1188                                                 continue;
1189
1190                                         goto child_finish;
1191                                 }
1192
1193                                 assert(sig == SIGTERM);
1194                                 break;
1195                         }
1196                 }
1197
1198                 /* If our parent died we'll end the session */
1199                 if (getppid() != parent_pid) {
1200                         pam_code = pam_close_session(handle, flags);
1201                         if (pam_code != PAM_SUCCESS)
1202                                 goto child_finish;
1203                 }
1204
1205                 ret = 0;
1206
1207         child_finish:
1208                 pam_end(handle, pam_code | flags);
1209                 _exit(ret);
1210         }
1211
1212         barrier_set_role(&barrier, BARRIER_PARENT);
1213
1214         /* If the child was forked off successfully it will do all the
1215          * cleanups, so forget about the handle here. */
1216         handle = NULL;
1217
1218         /* Unblock SIGTERM again in the parent */
1219         assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
1220
1221         /* We close the log explicitly here, since the PAM modules
1222          * might have opened it, but we don't want this fd around. */
1223         closelog();
1224
1225         /* Synchronously wait for the child to initialize. We don't care for
1226          * errors as we cannot recover. However, warn loudly if it happens. */
1227         if (!barrier_place_and_sync(&barrier))
1228                 log_error("PAM initialization failed");
1229
1230         strv_free(*env);
1231         *env = e;
1232
1233         return 0;
1234
1235 fail:
1236         if (pam_code != PAM_SUCCESS) {
1237                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
1238                 r = -EPERM;  /* PAM errors do not map to errno */
1239         } else
1240                 log_error_errno(r, "PAM failed: %m");
1241
1242         if (handle) {
1243                 if (close_session)
1244                         pam_code = pam_close_session(handle, flags);
1245
1246                 pam_end(handle, pam_code | flags);
1247         }
1248
1249         strv_free(e);
1250         closelog();
1251
1252         return r;
1253 #else
1254         return 0;
1255 #endif
1256 }
1257
1258 static void rename_process_from_path(const char *path) {
1259         char process_name[11];
1260         const char *p;
1261         size_t l;
1262
1263         /* This resulting string must fit in 10 chars (i.e. the length
1264          * of "/sbin/init") to look pretty in /bin/ps */
1265
1266         p = basename(path);
1267         if (isempty(p)) {
1268                 rename_process("(...)");
1269                 return;
1270         }
1271
1272         l = strlen(p);
1273         if (l > 8) {
1274                 /* The end of the process name is usually more
1275                  * interesting, since the first bit might just be
1276                  * "systemd-" */
1277                 p = p + l - 8;
1278                 l = 8;
1279         }
1280
1281         process_name[0] = '(';
1282         memcpy(process_name+1, p, l);
1283         process_name[1+l] = ')';
1284         process_name[1+l+1] = 0;
1285
1286         rename_process(process_name);
1287 }
1288
1289 static bool context_has_address_families(const ExecContext *c) {
1290         assert(c);
1291
1292         return c->address_families_whitelist ||
1293                 !set_isempty(c->address_families);
1294 }
1295
1296 static bool context_has_syscall_filters(const ExecContext *c) {
1297         assert(c);
1298
1299         return c->syscall_whitelist ||
1300                 !hashmap_isempty(c->syscall_filter);
1301 }
1302
1303 static bool context_has_no_new_privileges(const ExecContext *c) {
1304         assert(c);
1305
1306         if (c->no_new_privileges)
1307                 return true;
1308
1309         if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1310                 return false;
1311
1312         /* We need NNP if we have any form of seccomp and are unprivileged */
1313         return context_has_address_families(c) ||
1314                 c->memory_deny_write_execute ||
1315                 c->restrict_realtime ||
1316                 exec_context_restrict_namespaces_set(c) ||
1317                 c->protect_kernel_tunables ||
1318                 c->protect_kernel_modules ||
1319                 c->private_devices ||
1320                 context_has_syscall_filters(c) ||
1321                 !set_isempty(c->syscall_archs) ||
1322                 c->lock_personality;
1323 }
1324
1325 #if HAVE_SECCOMP
1326
1327 static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
1328
1329         if (is_seccomp_available())
1330                 return false;
1331
1332         log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
1333         return true;
1334 }
1335
1336 static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
1337         uint32_t negative_action, default_action, action;
1338         int r;
1339
1340         assert(u);
1341         assert(c);
1342
1343         if (!context_has_syscall_filters(c))
1344                 return 0;
1345
1346         if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1347                 return 0;
1348
1349         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
1350
1351         if (c->syscall_whitelist) {
1352                 default_action = negative_action;
1353                 action = SCMP_ACT_ALLOW;
1354         } else {
1355                 default_action = SCMP_ACT_ALLOW;
1356                 action = negative_action;
1357         }
1358
1359         if (needs_ambient_hack) {
1360                 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1361                 if (r < 0)
1362                         return r;
1363         }
1364
1365         return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
1366 }
1367
1368 static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1369         assert(u);
1370         assert(c);
1371
1372         if (set_isempty(c->syscall_archs))
1373                 return 0;
1374
1375         if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1376                 return 0;
1377
1378         return seccomp_restrict_archs(c->syscall_archs);
1379 }
1380
1381 static int apply_address_families(const Unit* u, const ExecContext *c) {
1382         assert(u);
1383         assert(c);
1384
1385         if (!context_has_address_families(c))
1386                 return 0;
1387
1388         if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1389                 return 0;
1390
1391         return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
1392 }
1393
1394 static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
1395         assert(u);
1396         assert(c);
1397
1398         if (!c->memory_deny_write_execute)
1399                 return 0;
1400
1401         if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1402                 return 0;
1403
1404         return seccomp_memory_deny_write_execute();
1405 }
1406
1407 static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
1408         assert(u);
1409         assert(c);
1410
1411         if (!c->restrict_realtime)
1412                 return 0;
1413
1414         if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1415                 return 0;
1416
1417         return seccomp_restrict_realtime();
1418 }
1419
1420 static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
1421         assert(u);
1422         assert(c);
1423
1424         /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1425          * let's protect even those systems where this is left on in the kernel. */
1426
1427         if (!c->protect_kernel_tunables)
1428                 return 0;
1429
1430         if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1431                 return 0;
1432
1433         return seccomp_protect_sysctl();
1434 }
1435
1436 static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
1437         assert(u);
1438         assert(c);
1439
1440         /* Turn off module syscalls on ProtectKernelModules=yes */
1441
1442         if (!c->protect_kernel_modules)
1443                 return 0;
1444
1445         if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1446                 return 0;
1447
1448         return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
1449 }
1450
1451 static int apply_private_devices(const Unit *u, const ExecContext *c) {
1452         assert(u);
1453         assert(c);
1454
1455         /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
1456
1457         if (!c->private_devices)
1458                 return 0;
1459
1460         if (skip_seccomp_unavailable(u, "PrivateDevices="))
1461                 return 0;
1462
1463         return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
1464 }
1465
1466 static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
1467         assert(u);
1468         assert(c);
1469
1470         if (!exec_context_restrict_namespaces_set(c))
1471                 return 0;
1472
1473         if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1474                 return 0;
1475
1476         return seccomp_restrict_namespaces(c->restrict_namespaces);
1477 }
1478
1479 static int apply_lock_personality(const Unit* u, const ExecContext *c) {
1480         unsigned long personality;
1481         int r;
1482
1483         assert(u);
1484         assert(c);
1485
1486         if (!c->lock_personality)
1487                 return 0;
1488
1489         if (skip_seccomp_unavailable(u, "LockPersonality="))
1490                 return 0;
1491
1492         personality = c->personality;
1493
1494         /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1495         if (personality == PERSONALITY_INVALID) {
1496
1497                 r = opinionated_personality(&personality);
1498                 if (r < 0)
1499                         return r;
1500         }
1501
1502         return seccomp_lock_personality(personality);
1503 }
1504
1505 #endif
1506
1507 static void do_idle_pipe_dance(int idle_pipe[4]) {
1508         assert(idle_pipe);
1509
1510         idle_pipe[1] = safe_close(idle_pipe[1]);
1511         idle_pipe[2] = safe_close(idle_pipe[2]);
1512
1513         if (idle_pipe[0] >= 0) {
1514                 int r;
1515
1516                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1517
1518                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1519                         ssize_t n;
1520
1521                         /* Signal systemd that we are bored and want to continue. */
1522                         n = write(idle_pipe[3], "x", 1);
1523                         if (n > 0)
1524                                 /* Wait for systemd to react to the signal above. */
1525                                 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1526                 }
1527
1528                 idle_pipe[0] = safe_close(idle_pipe[0]);
1529
1530         }
1531
1532         idle_pipe[3] = safe_close(idle_pipe[3]);
1533 }
1534
1535 static int build_environment(
1536                 Unit *u,
1537                 const ExecContext *c,
1538                 const ExecParameters *p,
1539                 unsigned n_fds,
1540                 const char *home,
1541                 const char *username,
1542                 const char *shell,
1543                 dev_t journal_stream_dev,
1544                 ino_t journal_stream_ino,
1545                 char ***ret) {
1546
1547         _cleanup_strv_free_ char **our_env = NULL;
1548         unsigned n_env = 0;
1549         char *x;
1550
1551         assert(u);
1552         assert(c);
1553         assert(ret);
1554
1555         our_env = new0(char*, 14);
1556         if (!our_env)
1557                 return -ENOMEM;
1558
1559         if (n_fds > 0) {
1560                 _cleanup_free_ char *joined = NULL;
1561
1562                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
1563                         return -ENOMEM;
1564                 our_env[n_env++] = x;
1565
1566                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1567                         return -ENOMEM;
1568                 our_env[n_env++] = x;
1569
1570                 joined = strv_join(p->fd_names, ":");
1571                 if (!joined)
1572                         return -ENOMEM;
1573
1574                 x = strjoin("LISTEN_FDNAMES=", joined);
1575                 if (!x)
1576                         return -ENOMEM;
1577                 our_env[n_env++] = x;
1578         }
1579
1580         if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
1581                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
1582                         return -ENOMEM;
1583                 our_env[n_env++] = x;
1584
1585                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
1586                         return -ENOMEM;
1587                 our_env[n_env++] = x;
1588         }
1589
1590         /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1591          * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1592          * check the database directly. */
1593         if (p->flags & EXEC_NSS_BYPASS_BUS) {
1594                 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1595                 if (!x)
1596                         return -ENOMEM;
1597                 our_env[n_env++] = x;
1598         }
1599
1600         if (home) {
1601                 x = strappend("HOME=", home);
1602                 if (!x)
1603                         return -ENOMEM;
1604                 our_env[n_env++] = x;
1605         }
1606
1607         if (username) {
1608                 x = strappend("LOGNAME=", username);
1609                 if (!x)
1610                         return -ENOMEM;
1611                 our_env[n_env++] = x;
1612
1613                 x = strappend("USER=", username);
1614                 if (!x)
1615                         return -ENOMEM;
1616                 our_env[n_env++] = x;
1617         }
1618
1619         if (shell) {
1620                 x = strappend("SHELL=", shell);
1621                 if (!x)
1622                         return -ENOMEM;
1623                 our_env[n_env++] = x;
1624         }
1625
1626         if (!sd_id128_is_null(u->invocation_id)) {
1627                 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1628                         return -ENOMEM;
1629
1630                 our_env[n_env++] = x;
1631         }
1632
1633         if (exec_context_needs_term(c)) {
1634                 const char *tty_path, *term = NULL;
1635
1636                 tty_path = exec_context_tty_path(c);
1637
1638                 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1639                  * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1640                  * passes to PID 1 ends up all the way in the console login shown. */
1641
1642                 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1643                         term = getenv("TERM");
1644                 if (!term)
1645                         term = default_term_for_tty(tty_path);
1646
1647                 x = strappend("TERM=", term);
1648                 if (!x)
1649                         return -ENOMEM;
1650                 our_env[n_env++] = x;
1651         }
1652
1653         if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1654                 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1655                         return -ENOMEM;
1656
1657                 our_env[n_env++] = x;
1658         }
1659
1660         our_env[n_env++] = NULL;
1661         assert(n_env <= 12);
1662
1663         *ret = our_env;
1664         our_env = NULL;
1665
1666         return 0;
1667 }
1668
1669 static int build_pass_environment(const ExecContext *c, char ***ret) {
1670         _cleanup_strv_free_ char **pass_env = NULL;
1671         size_t n_env = 0, n_bufsize = 0;
1672         char **i;
1673
1674         STRV_FOREACH(i, c->pass_environment) {
1675                 _cleanup_free_ char *x = NULL;
1676                 char *v;
1677
1678                 v = getenv(*i);
1679                 if (!v)
1680                         continue;
1681                 x = strjoin(*i, "=", v);
1682                 if (!x)
1683                         return -ENOMEM;
1684
1685                 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1686                         return -ENOMEM;
1687
1688                 pass_env[n_env++] = x;
1689                 pass_env[n_env] = NULL;
1690                 x = NULL;
1691         }
1692
1693         *ret = pass_env;
1694         pass_env = NULL;
1695
1696         return 0;
1697 }
1698
1699 static bool exec_needs_mount_namespace(
1700                 const ExecContext *context,
1701                 const ExecParameters *params,
1702                 ExecRuntime *runtime) {
1703
1704         assert(context);
1705         assert(params);
1706
1707         if (context->root_image)
1708                 return true;
1709
1710         if (!strv_isempty(context->read_write_paths) ||
1711             !strv_isempty(context->read_only_paths) ||
1712             !strv_isempty(context->inaccessible_paths))
1713                 return true;
1714
1715         if (context->n_bind_mounts > 0 ||
1716             !strv_isempty(context->directories[EXEC_DIRECTORY_RUNTIME].paths) ||
1717             !strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
1718             !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1719             !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths) ||
1720             !strv_isempty(context->directories[EXEC_DIRECTORY_CONFIGURATION].paths))
1721                 return true;
1722
1723         if (context->mount_flags != 0)
1724                 return true;
1725
1726         if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1727                 return true;
1728
1729         if (context->private_devices ||
1730             context->protect_system != PROTECT_SYSTEM_NO ||
1731             context->protect_home != PROTECT_HOME_NO ||
1732             context->protect_kernel_tunables ||
1733             context->protect_kernel_modules ||
1734             context->protect_control_groups)
1735                 return true;
1736
1737         if (context->mount_apivfs && (context->root_image || context->root_directory))
1738                 return true;
1739
1740         return false;
1741 }
1742
1743 static int setup_private_users(uid_t uid, gid_t gid) {
1744         _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1745         _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1746         _cleanup_close_ int unshare_ready_fd = -1;
1747         _cleanup_(sigkill_waitp) pid_t pid = 0;
1748         uint64_t c = 1;
1749         siginfo_t si;
1750         ssize_t n;
1751         int r;
1752
1753         /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1754          * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1755          * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1756          * which waits for the parent to create the new user namespace while staying in the original namespace. The
1757          * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1758          * continues execution normally. */
1759
1760         if (uid != 0 && uid_is_valid(uid)) {
1761                 r = asprintf(&uid_map,
1762                              "0 0 1\n"                      /* Map root → root */
1763                              UID_FMT " " UID_FMT " 1\n",    /* Map $UID → $UID */
1764                              uid, uid);
1765                 if (r < 0)
1766                         return -ENOMEM;
1767         } else {
1768                 uid_map = strdup("0 0 1\n");            /* The case where the above is the same */
1769                 if (!uid_map)
1770                         return -ENOMEM;
1771         }
1772
1773         if (gid != 0 && gid_is_valid(gid)) {
1774                 r = asprintf(&gid_map,
1775                              "0 0 1\n"                      /* Map root → root */
1776                              GID_FMT " " GID_FMT " 1\n",    /* Map $GID → $GID */
1777                              gid, gid);
1778                 if (r < 0)
1779                         return -ENOMEM;
1780         } else {
1781                 gid_map = strdup("0 0 1\n");            /* The case where the above is the same */
1782                 if (!gid_map)
1783                         return -ENOMEM;
1784         }
1785
1786         /* Create a communication channel so that the parent can tell the child when it finished creating the user
1787          * namespace. */
1788         unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1789         if (unshare_ready_fd < 0)
1790                 return -errno;
1791
1792         /* Create a communication channel so that the child can tell the parent a proper error code in case it
1793          * failed. */
1794         if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1795                 return -errno;
1796
1797         pid = fork();
1798         if (pid < 0)
1799                 return -errno;
1800
1801         if (pid == 0) {
1802                 _cleanup_close_ int fd = -1;
1803                 const char *a;
1804                 pid_t ppid;
1805
1806                 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1807                  * here, after the parent opened its own user namespace. */
1808
1809                 ppid = getppid();
1810                 errno_pipe[0] = safe_close(errno_pipe[0]);
1811
1812                 /* Wait until the parent unshared the user namespace */
1813                 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1814                         r = -errno;
1815                         goto child_fail;
1816                 }
1817
1818                 /* Disable the setgroups() system call in the child user namespace, for good. */
1819                 a = procfs_file_alloca(ppid, "setgroups");
1820                 fd = open(a, O_WRONLY|O_CLOEXEC);
1821                 if (fd < 0) {
1822                         if (errno != ENOENT) {
1823                                 r = -errno;
1824                                 goto child_fail;
1825                         }
1826
1827                         /* If the file is missing the kernel is too old, let's continue anyway. */
1828                 } else {
1829                         if (write(fd, "deny\n", 5) < 0) {
1830                                 r = -errno;
1831                                 goto child_fail;
1832                         }
1833
1834                         fd = safe_close(fd);
1835                 }
1836
1837                 /* First write the GID map */
1838                 a = procfs_file_alloca(ppid, "gid_map");
1839                 fd = open(a, O_WRONLY|O_CLOEXEC);
1840                 if (fd < 0) {
1841                         r = -errno;
1842                         goto child_fail;
1843                 }
1844                 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1845                         r = -errno;
1846                         goto child_fail;
1847                 }
1848                 fd = safe_close(fd);
1849
1850                 /* The write the UID map */
1851                 a = procfs_file_alloca(ppid, "uid_map");
1852                 fd = open(a, O_WRONLY|O_CLOEXEC);
1853                 if (fd < 0) {
1854                         r = -errno;
1855                         goto child_fail;
1856                 }
1857                 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1858                         r = -errno;
1859                         goto child_fail;
1860                 }
1861
1862                 _exit(EXIT_SUCCESS);
1863
1864         child_fail:
1865                 (void) write(errno_pipe[1], &r, sizeof(r));
1866                 _exit(EXIT_FAILURE);
1867         }
1868
1869         errno_pipe[1] = safe_close(errno_pipe[1]);
1870
1871         if (unshare(CLONE_NEWUSER) < 0)
1872                 return -errno;
1873
1874         /* Let the child know that the namespace is ready now */
1875         if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1876                 return -errno;
1877
1878         /* Try to read an error code from the child */
1879         n = read(errno_pipe[0], &r, sizeof(r));
1880         if (n < 0)
1881                 return -errno;
1882         if (n == sizeof(r)) { /* an error code was sent to us */
1883                 if (r < 0)
1884                         return r;
1885                 return -EIO;
1886         }
1887         if (n != 0) /* on success we should have read 0 bytes */
1888                 return -EIO;
1889
1890         r = wait_for_terminate(pid, &si);
1891         if (r < 0)
1892                 return r;
1893         pid = 0;
1894
1895         /* If something strange happened with the child, let's consider this fatal, too */
1896         if (si.si_code != CLD_EXITED || si.si_status != 0)
1897                 return -EIO;
1898
1899         return 0;
1900 }
1901
1902 static int setup_exec_directory(
1903                 const ExecContext *context,
1904                 const ExecParameters *params,
1905                 uid_t uid,
1906                 gid_t gid,
1907                 ExecDirectoryType type,
1908                 int *exit_status) {
1909
1910         static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
1911                 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
1912                 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
1913                 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
1914                 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
1915                 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
1916         };
1917         char **rt;
1918         int r;
1919
1920         assert(context);
1921         assert(params);
1922         assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
1923         assert(exit_status);
1924
1925         if (!params->prefix[type])
1926                 return 0;
1927
1928         if (params->flags & EXEC_CHOWN_DIRECTORIES) {
1929                 if (!uid_is_valid(uid))
1930                         uid = 0;
1931                 if (!gid_is_valid(gid))
1932                         gid = 0;
1933         }
1934
1935         STRV_FOREACH(rt, context->directories[type].paths) {
1936                 _cleanup_free_ char *p = NULL, *pp = NULL;
1937                 const char *effective;
1938
1939                 p = strjoin(params->prefix[type], "/", *rt);
1940                 if (!p) {
1941                         r = -ENOMEM;
1942                         goto fail;
1943                 }
1944
1945                 r = mkdir_parents_label(p, 0755);
1946                 if (r < 0)
1947                         goto fail;
1948
1949                 if (context->dynamic_user &&
1950                     !IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
1951                         _cleanup_free_ char *private_root = NULL, *relative = NULL, *parent = NULL;
1952
1953                         /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
1954                          * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
1955                          * whose UID is later on reused. To lock this down we use the same trick used by container
1956                          * managers to prohibit host users to get access to files of the same UID in containers: we
1957                          * place everything inside a directory that has an access mode of 0700 and is owned root:root,
1958                          * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
1959                          * to make this directory permeable for the service itself.
1960                          *
1961                          * Specifically: for a service which wants a special directory "foo/" we first create a
1962                          * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
1963                          * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
1964                          * privileged host users can access "foo/" as usual, but unprivileged host users can't look
1965                          * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
1966                          * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
1967                          * disabling the access boundary for the service and making sure it only gets access to the
1968                          * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
1969                          *
1970                          * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
1971                          * owned by the service itself.
1972                          * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
1973                          * files or sockets with other services. */
1974
1975                         private_root = strjoin(params->prefix[type], "/private");
1976                         if (!private_root) {
1977                                 r = -ENOMEM;
1978                                 goto fail;
1979                         }
1980
1981                         /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
1982                         r = mkdir_safe_label(private_root, 0700, 0, 0, false);
1983                         if (r < 0)
1984                                 goto fail;
1985
1986                         pp = strjoin(private_root, "/", *rt);
1987                         if (!pp) {
1988                                 r = -ENOMEM;
1989                                 goto fail;
1990                         }
1991
1992                         /* Create all directories between the configured directory and this private root, and mark them 0755 */
1993                         r = mkdir_parents_label(pp, 0755);
1994                         if (r < 0)
1995                                 goto fail;
1996
1997                         /* Finally, create the actual directory for the service */
1998                         r = mkdir_label(pp, context->directories[type].mode);
1999                         if (r < 0 && r != -EEXIST)
2000                                 goto fail;
2001
2002                         parent = dirname_malloc(p);
2003                         if (!parent) {
2004                                 r = -ENOMEM;
2005                                 goto fail;
2006                         }
2007
2008                         r = path_make_relative(parent, pp, &relative);
2009                         if (r < 0)
2010                                 goto fail;
2011
2012                         /* And link it up from the original place */
2013                         r = symlink_idempotent(relative, p);
2014                         if (r < 0)
2015                                 goto fail;
2016
2017                         effective = pp;
2018
2019                 } else {
2020                         r = mkdir_label(p, context->directories[type].mode);
2021                         if (r < 0 && r != -EEXIST)
2022                                 goto fail;
2023
2024                         effective = p;
2025                 }
2026
2027                 /* First lock down the access mode */
2028                 if (chmod(effective, context->directories[type].mode) < 0) {
2029                         r = -errno;
2030                         goto fail;
2031                 }
2032
2033                 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
2034                  * a service, and shall not be writable. */
2035                 if (type == EXEC_DIRECTORY_CONFIGURATION)
2036                         continue;
2037
2038                 /* Then, change the ownership of the whole tree, if necessary */
2039                 r = path_chown_recursive(effective, uid, gid);
2040                 if (r < 0)
2041                         goto fail;
2042         }
2043
2044         return 0;
2045
2046 fail:
2047         *exit_status = exit_status_table[type];
2048         return r;
2049 }
2050
2051 static int setup_smack(
2052                 const ExecContext *context,
2053                 const ExecCommand *command) {
2054
2055         int r;
2056
2057         assert(context);
2058         assert(command);
2059
2060         if (context->smack_process_label) {
2061                 r = mac_smack_apply_pid(0, context->smack_process_label);
2062                 if (r < 0)
2063                         return r;
2064         }
2065 #ifdef SMACK_DEFAULT_PROCESS_LABEL
2066         else {
2067                 _cleanup_free_ char *exec_label = NULL;
2068
2069                 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
2070                 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
2071                         return r;
2072
2073                 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2074                 if (r < 0)
2075                         return r;
2076         }
2077 #endif
2078
2079         return 0;
2080 }
2081
2082 static int compile_bind_mounts(
2083                 const ExecContext *context,
2084                 const ExecParameters *params,
2085                 BindMount **ret_bind_mounts,
2086                 unsigned *ret_n_bind_mounts,
2087                 char ***ret_empty_directories) {
2088
2089         _cleanup_strv_free_ char **empty_directories = NULL;
2090         BindMount *bind_mounts;
2091         unsigned n, h = 0, i;
2092         ExecDirectoryType t;
2093         int r;
2094
2095         assert(context);
2096         assert(params);
2097         assert(ret_bind_mounts);
2098         assert(ret_n_bind_mounts);
2099         assert(ret_empty_directories);
2100
2101         n = context->n_bind_mounts;
2102         for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2103                 if (!params->prefix[t])
2104                         continue;
2105
2106                 n += strv_length(context->directories[t].paths);
2107         }
2108
2109         if (n <= 0) {
2110                 *ret_bind_mounts = NULL;
2111                 *ret_n_bind_mounts = 0;
2112                 *ret_empty_directories = NULL;
2113                 return 0;
2114         }
2115
2116         bind_mounts = new(BindMount, n);
2117         if (!bind_mounts)
2118                 return -ENOMEM;
2119
2120         for (i = 0; i < context->n_bind_mounts; i++) {
2121                 BindMount *item = context->bind_mounts + i;
2122                 char *s, *d;
2123
2124                 s = strdup(item->source);
2125                 if (!s) {
2126                         r = -ENOMEM;
2127                         goto finish;
2128                 }
2129
2130                 d = strdup(item->destination);
2131                 if (!d) {
2132                         free(s);
2133                         r = -ENOMEM;
2134                         goto finish;
2135                 }
2136
2137                 bind_mounts[h++] = (BindMount) {
2138                         .source = s,
2139                         .destination = d,
2140                         .read_only = item->read_only,
2141                         .recursive = item->recursive,
2142                         .ignore_enoent = item->ignore_enoent,
2143                 };
2144         }
2145
2146         for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2147                 char **suffix;
2148
2149                 if (!params->prefix[t])
2150                         continue;
2151
2152                 if (strv_isempty(context->directories[t].paths))
2153                         continue;
2154
2155                 if (context->dynamic_user &&
2156                     !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
2157                         char *private_root;
2158
2159                         /* So this is for a dynamic user, and we need to make sure the process can access its own
2160                          * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2161                          * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2162
2163                         private_root = strjoin(params->prefix[t], "/private");
2164                         if (!private_root) {
2165                                 r = -ENOMEM;
2166                                 goto finish;
2167                         }
2168
2169                         r = strv_consume(&empty_directories, private_root);
2170                         if (r < 0) {
2171                                 r = -ENOMEM;
2172                                 goto finish;
2173                         }
2174                 }
2175
2176                 STRV_FOREACH(suffix, context->directories[t].paths) {
2177                         char *s, *d;
2178
2179                         if (context->dynamic_user &&
2180                             !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
2181                                 s = strjoin(params->prefix[t], "/private/", *suffix);
2182                         else
2183                                 s = strjoin(params->prefix[t], "/", *suffix);
2184                         if (!s) {
2185                                 r = -ENOMEM;
2186                                 goto finish;
2187                         }
2188
2189                         d = strdup(s);
2190                         if (!d) {
2191                                 free(s);
2192                                 r = -ENOMEM;
2193                                 goto finish;
2194                         }
2195
2196                         bind_mounts[h++] = (BindMount) {
2197                                 .source = s,
2198                                 .destination = d,
2199                                 .read_only = false,
2200                                 .recursive = true,
2201                                 .ignore_enoent = false,
2202                         };
2203                 }
2204         }
2205
2206         assert(h == n);
2207
2208         *ret_bind_mounts = bind_mounts;
2209         *ret_n_bind_mounts = n;
2210         *ret_empty_directories = empty_directories;
2211
2212         empty_directories = NULL;
2213
2214         return (int) n;
2215
2216 finish:
2217         bind_mount_free_many(bind_mounts, h);
2218         return r;
2219 }
2220
2221 static int apply_mount_namespace(
2222                 Unit *u,
2223                 ExecCommand *command,
2224                 const ExecContext *context,
2225                 const ExecParameters *params,
2226                 ExecRuntime *runtime) {
2227
2228         _cleanup_strv_free_ char **empty_directories = NULL;
2229         char *tmp = NULL, *var = NULL;
2230         const char *root_dir = NULL, *root_image = NULL;
2231         NamespaceInfo ns_info = {
2232                 .ignore_protect_paths = false,
2233                 .private_dev = context->private_devices,
2234                 .protect_control_groups = context->protect_control_groups,
2235                 .protect_kernel_tunables = context->protect_kernel_tunables,
2236                 .protect_kernel_modules = context->protect_kernel_modules,
2237                 .mount_apivfs = context->mount_apivfs,
2238         };
2239         bool needs_sandboxing;
2240         BindMount *bind_mounts = NULL;
2241         unsigned n_bind_mounts = 0;
2242         int r;
2243
2244         assert(context);
2245
2246         /* The runtime struct only contains the parent of the private /tmp,
2247          * which is non-accessible to world users. Inside of it there's a /tmp
2248          * that is sticky, and that's the one we want to use here. */
2249
2250         if (context->private_tmp && runtime) {
2251                 if (runtime->tmp_dir)
2252                         tmp = strjoina(runtime->tmp_dir, "/tmp");
2253                 if (runtime->var_tmp_dir)
2254                         var = strjoina(runtime->var_tmp_dir, "/tmp");
2255         }
2256
2257         if (params->flags & EXEC_APPLY_CHROOT) {
2258                 root_image = context->root_image;
2259
2260                 if (!root_image)
2261                         root_dir = context->root_directory;
2262         }
2263
2264         r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2265         if (r < 0)
2266                 return r;
2267
2268         /*
2269          * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2270          * sandbox info, otherwise enforce it, don't ignore protected paths and
2271          * fail if we are enable to apply the sandbox inside the mount namespace.
2272          */
2273         if (!context->dynamic_user && root_dir)
2274                 ns_info.ignore_protect_paths = true;
2275
2276         needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
2277
2278         r = setup_namespace(root_dir, root_image,
2279                             &ns_info, context->read_write_paths,
2280                             needs_sandboxing ? context->read_only_paths : NULL,
2281                             needs_sandboxing ? context->inaccessible_paths : NULL,
2282                             empty_directories,
2283                             bind_mounts,
2284                             n_bind_mounts,
2285                             tmp,
2286                             var,
2287                             needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2288                             needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
2289                             context->mount_flags,
2290                             DISSECT_IMAGE_DISCARD_ON_LOOP);
2291
2292         bind_mount_free_many(bind_mounts, n_bind_mounts);
2293
2294         /* If we couldn't set up the namespace this is probably due to a
2295          * missing capability. In this case, silently proceeed. */
2296         if (IN_SET(r, -EPERM, -EACCES)) {
2297                 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
2298                 return 0;
2299         }
2300
2301         return r;
2302 }
2303
2304 static int apply_working_directory(
2305                 const ExecContext *context,
2306                 const ExecParameters *params,
2307                 const char *home,
2308                 const bool needs_mount_ns,
2309                 int *exit_status) {
2310
2311         const char *d, *wd;
2312
2313         assert(context);
2314         assert(exit_status);
2315
2316         if (context->working_directory_home) {
2317
2318                 if (!home) {
2319                         *exit_status = EXIT_CHDIR;
2320                         return -ENXIO;
2321                 }
2322
2323                 wd = home;
2324
2325         } else if (context->working_directory)
2326                 wd = context->working_directory;
2327         else
2328                 wd = "/";
2329
2330         if (params->flags & EXEC_APPLY_CHROOT) {
2331                 if (!needs_mount_ns && context->root_directory)
2332                         if (chroot(context->root_directory) < 0) {
2333                                 *exit_status = EXIT_CHROOT;
2334                                 return -errno;
2335                         }
2336
2337                 d = wd;
2338         } else
2339                 d = prefix_roota(context->root_directory, wd);
2340
2341         if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2342                 *exit_status = EXIT_CHDIR;
2343                 return -errno;
2344         }
2345
2346         return 0;
2347 }
2348
2349 static int setup_keyring(
2350                 Unit *u,
2351                 const ExecContext *context,
2352                 const ExecParameters *p,
2353                 uid_t uid, gid_t gid) {
2354
2355         key_serial_t keyring;
2356         int r;
2357
2358         assert(u);
2359         assert(context);
2360         assert(p);
2361
2362         /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2363          * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2364          * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2365          * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2366          * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2367          * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2368
2369         if (!(p->flags & EXEC_NEW_KEYRING))
2370                 return 0;
2371
2372         if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2373                 return 0;
2374
2375         keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2376         if (keyring == -1) {
2377                 if (errno == ENOSYS)
2378                         log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
2379                 else if (IN_SET(errno, EACCES, EPERM))
2380                         log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
2381                 else if (errno == EDQUOT)
2382                         log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
2383                 else
2384                         return log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
2385
2386                 return 0;
2387         }
2388
2389         /* Populate they keyring with the invocation ID by default. */
2390         if (!sd_id128_is_null(u->invocation_id)) {
2391                 key_serial_t key;
2392
2393                 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2394                 if (key == -1)
2395                         log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
2396                 else {
2397                         if (keyctl(KEYCTL_SETPERM, key,
2398                                    KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2399                                    KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
2400                                 return log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
2401                 }
2402         }
2403
2404         /* And now, make the keyring owned by the service's user */
2405         if (uid_is_valid(uid) || gid_is_valid(gid))
2406                 if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0)
2407                         return log_unit_error_errno(u, errno, "Failed to change ownership of session keyring: %m");
2408
2409         /* When requested link the user keyring into the session keyring. */
2410         if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2411                 uid_t saved_uid;
2412                 gid_t saved_gid;
2413
2414                 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things
2415                  * set up properly by the kernel. If we don't do that then we can't create it atomically, and that
2416                  * sucks for parallel execution. This mimics what pam_keyinit does, too.*/
2417
2418                 saved_uid = getuid();
2419                 saved_gid = getgid();
2420
2421                 if (gid_is_valid(gid) && gid != saved_gid) {
2422                         if (setregid(gid, -1) < 0)
2423                                 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2424                 }
2425
2426                 if (uid_is_valid(uid) && uid != saved_uid) {
2427                         if (setreuid(uid, -1) < 0) {
2428                                 (void) setregid(saved_gid, -1);
2429                                 return log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2430                         }
2431                 }
2432
2433                 if (keyctl(KEYCTL_LINK,
2434                            KEY_SPEC_USER_KEYRING,
2435                            KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2436
2437                         r = -errno;
2438
2439                         (void) setreuid(saved_uid, -1);
2440                         (void) setregid(saved_gid, -1);
2441
2442                         return log_unit_error_errno(u, r, "Failed to link user keyring into session keyring: %m");
2443                 }
2444
2445                 if (uid_is_valid(uid) && uid != saved_uid) {
2446                         if (setreuid(saved_uid, -1) < 0) {
2447                                 (void) setregid(saved_gid, -1);
2448                                 return log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2449                         }
2450                 }
2451
2452                 if (gid_is_valid(gid) && gid != saved_gid) {
2453                         if (setregid(saved_gid, -1) < 0)
2454                                 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2455                 }
2456         }
2457
2458         return 0;
2459 }
2460
2461 static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
2462         assert(array);
2463         assert(n);
2464
2465         if (!pair)
2466                 return;
2467
2468         if (pair[0] >= 0)
2469                 array[(*n)++] = pair[0];
2470         if (pair[1] >= 0)
2471                 array[(*n)++] = pair[1];
2472 }
2473
2474 static int close_remaining_fds(
2475                 const ExecParameters *params,
2476                 ExecRuntime *runtime,
2477                 DynamicCreds *dcreds,
2478                 int user_lookup_fd,
2479                 int socket_fd,
2480                 int *fds, unsigned n_fds) {
2481
2482         unsigned n_dont_close = 0;
2483         int dont_close[n_fds + 12];
2484
2485         assert(params);
2486
2487         if (params->stdin_fd >= 0)
2488                 dont_close[n_dont_close++] = params->stdin_fd;
2489         if (params->stdout_fd >= 0)
2490                 dont_close[n_dont_close++] = params->stdout_fd;
2491         if (params->stderr_fd >= 0)
2492                 dont_close[n_dont_close++] = params->stderr_fd;
2493
2494         if (socket_fd >= 0)
2495                 dont_close[n_dont_close++] = socket_fd;
2496         if (n_fds > 0) {
2497                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2498                 n_dont_close += n_fds;
2499         }
2500
2501         if (runtime)
2502                 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2503
2504         if (dcreds) {
2505                 if (dcreds->user)
2506                         append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2507                 if (dcreds->group)
2508                         append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
2509         }
2510
2511         if (user_lookup_fd >= 0)
2512                 dont_close[n_dont_close++] = user_lookup_fd;
2513
2514         return close_all_fds(dont_close, n_dont_close);
2515 }
2516
2517 static int send_user_lookup(
2518                 Unit *unit,
2519                 int user_lookup_fd,
2520                 uid_t uid,
2521                 gid_t gid) {
2522
2523         assert(unit);
2524
2525         /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2526          * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2527          * specified. */
2528
2529         if (user_lookup_fd < 0)
2530                 return 0;
2531
2532         if (!uid_is_valid(uid) && !gid_is_valid(gid))
2533                 return 0;
2534
2535         if (writev(user_lookup_fd,
2536                (struct iovec[]) {
2537                            IOVEC_INIT(&uid, sizeof(uid)),
2538                            IOVEC_INIT(&gid, sizeof(gid)),
2539                            IOVEC_INIT_STRING(unit->id) }, 3) < 0)
2540                 return -errno;
2541
2542         return 0;
2543 }
2544
2545 static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2546         int r;
2547
2548         assert(c);
2549         assert(home);
2550         assert(buf);
2551
2552         /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2553
2554         if (*home)
2555                 return 0;
2556
2557         if (!c->working_directory_home)
2558                 return 0;
2559
2560         if (uid == 0) {
2561                 /* Hardcode /root as home directory for UID 0 */
2562                 *home = "/root";
2563                 return 1;
2564         }
2565
2566         r = get_home_dir(buf);
2567         if (r < 0)
2568                 return r;
2569
2570         *home = *buf;
2571         return 1;
2572 }
2573
2574 static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2575         _cleanup_strv_free_ char ** list = NULL;
2576         ExecDirectoryType t;
2577         int r;
2578
2579         assert(c);
2580         assert(p);
2581         assert(ret);
2582
2583         assert(c->dynamic_user);
2584
2585         /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2586          * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2587          * directories. */
2588
2589         for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2590                 char **i;
2591
2592                 if (t == EXEC_DIRECTORY_CONFIGURATION)
2593                         continue;
2594
2595                 if (!p->prefix[t])
2596                         continue;
2597
2598                 STRV_FOREACH(i, c->directories[t].paths) {
2599                         char *e;
2600
2601                         if (t == EXEC_DIRECTORY_RUNTIME)
2602                                 e = strjoin(p->prefix[t], "/", *i);
2603                         else
2604                                 e = strjoin(p->prefix[t], "/private/", *i);
2605                         if (!e)
2606                                 return -ENOMEM;
2607
2608                         r = strv_consume(&list, e);
2609                         if (r < 0)
2610                                 return r;
2611                 }
2612         }
2613
2614         *ret = list;
2615         list = NULL;
2616
2617         return 0;
2618 }
2619
2620 static int exec_child(
2621                 Unit *unit,
2622                 ExecCommand *command,
2623                 const ExecContext *context,
2624                 const ExecParameters *params,
2625                 ExecRuntime *runtime,
2626                 DynamicCreds *dcreds,
2627                 char **argv,
2628                 int socket_fd,
2629                 int named_iofds[3],
2630                 int *fds,
2631                 unsigned n_storage_fds,
2632                 unsigned n_socket_fds,
2633                 char **files_env,
2634                 int user_lookup_fd,
2635                 int *exit_status) {
2636
2637         _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
2638         _cleanup_free_ char *mac_selinux_context_net = NULL, *home_buffer = NULL;
2639         _cleanup_free_ gid_t *supplementary_gids = NULL;
2640         const char *username = NULL, *groupname = NULL;
2641         const char *home = NULL, *shell = NULL;
2642         dev_t journal_stream_dev = 0;
2643         ino_t journal_stream_ino = 0;
2644         bool needs_sandboxing,          /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2645                 needs_setuid,           /* Do we need to do the actual setresuid()/setresgid() calls? */
2646                 needs_mount_namespace,  /* Do we need to set up a mount namespace for this kernel? */
2647                 needs_ambient_hack;     /* Do we need to apply the ambient capabilities hack? */
2648 #if HAVE_SELINUX
2649         bool use_selinux = false;
2650 #endif
2651 #if ENABLE_SMACK
2652         bool use_smack = false;
2653 #endif
2654 #if HAVE_APPARMOR
2655         bool use_apparmor = false;
2656 #endif
2657         uid_t uid = UID_INVALID;
2658         gid_t gid = GID_INVALID;
2659         int i, r, ngids = 0;
2660         unsigned n_fds;
2661         ExecDirectoryType dt;
2662         int secure_bits;
2663
2664         assert(unit);
2665         assert(command);
2666         assert(context);
2667         assert(params);
2668         assert(exit_status);
2669
2670         rename_process_from_path(command->path);
2671
2672         /* We reset exactly these signals, since they are the
2673          * only ones we set to SIG_IGN in the main daemon. All
2674          * others we leave untouched because we set them to
2675          * SIG_DFL or a valid handler initially, both of which
2676          * will be demoted to SIG_DFL. */
2677         (void) default_signals(SIGNALS_CRASH_HANDLER,
2678                                SIGNALS_IGNORE, -1);
2679
2680         if (context->ignore_sigpipe)
2681                 (void) ignore_signals(SIGPIPE, -1);
2682
2683         r = reset_signal_mask();
2684         if (r < 0) {
2685                 *exit_status = EXIT_SIGNAL_MASK;
2686                 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
2687         }
2688
2689         if (params->idle_pipe)
2690                 do_idle_pipe_dance(params->idle_pipe);
2691
2692         /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2693          * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2694          * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2695          * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
2696
2697         log_forget_fds();
2698         log_set_open_when_needed(true);
2699
2700         /* In case anything used libc syslog(), close this here, too */
2701         closelog();
2702
2703         n_fds = n_storage_fds + n_socket_fds;
2704         r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
2705         if (r < 0) {
2706                 *exit_status = EXIT_FDS;
2707                 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
2708         }
2709
2710         if (!context->same_pgrp)
2711                 if (setsid() < 0) {
2712                         *exit_status = EXIT_SETSID;
2713                         return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
2714                 }
2715
2716         exec_context_tty_reset(context, params);
2717
2718         if (unit_shall_confirm_spawn(unit)) {
2719                 const char *vc = params->confirm_spawn;
2720                 _cleanup_free_ char *cmdline = NULL;
2721
2722                 cmdline = exec_command_line(argv);
2723                 if (!cmdline) {
2724                         *exit_status = EXIT_MEMORY;
2725                         return log_oom();
2726                 }
2727
2728                 r = ask_for_confirmation(vc, unit, cmdline);
2729                 if (r != CONFIRM_EXECUTE) {
2730                         if (r == CONFIRM_PRETEND_SUCCESS) {
2731                                 *exit_status = EXIT_SUCCESS;
2732                                 return 0;
2733                         }
2734                         *exit_status = EXIT_CONFIRM;
2735                         log_unit_error(unit, "Execution cancelled by the user");
2736                         return -ECANCELED;
2737                 }
2738         }
2739
2740         if (context->dynamic_user && dcreds) {
2741                 _cleanup_strv_free_ char **suggested_paths = NULL;
2742
2743                 /* Make sure we bypass our own NSS module for any NSS checks */
2744                 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2745                         *exit_status = EXIT_USER;
2746                         return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
2747                 }
2748
2749                 r = compile_suggested_paths(context, params, &suggested_paths);
2750                 if (r < 0) {
2751                         *exit_status = EXIT_MEMORY;
2752                         return log_oom();
2753                 }
2754
2755                 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
2756                 if (r < 0) {
2757                         *exit_status = EXIT_USER;
2758                         if (r == -EILSEQ) {
2759                                 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
2760                                 return -EOPNOTSUPP;
2761                         }
2762                         return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
2763                 }
2764
2765                 if (!uid_is_valid(uid)) {
2766                         *exit_status = EXIT_USER;
2767                         log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
2768                         return -ESRCH;
2769                 }
2770
2771                 if (!gid_is_valid(gid)) {
2772                         *exit_status = EXIT_USER;
2773                         log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
2774                         return -ESRCH;
2775                 }
2776
2777                 if (dcreds->user)
2778                         username = dcreds->user->name;
2779
2780         } else {
2781                 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2782                 if (r < 0) {
2783                         *exit_status = EXIT_USER;
2784                         return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
2785                 }
2786
2787                 r = get_fixed_group(context, &groupname, &gid);
2788                 if (r < 0) {
2789                         *exit_status = EXIT_GROUP;
2790                         return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
2791                 }
2792         }
2793
2794         /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2795         r = get_supplementary_groups(context, username, groupname, gid,
2796                                      &supplementary_gids, &ngids);
2797         if (r < 0) {
2798                 *exit_status = EXIT_GROUP;
2799                 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
2800         }
2801
2802         r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2803         if (r < 0) {
2804                 *exit_status = EXIT_USER;
2805                 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
2806         }
2807
2808         user_lookup_fd = safe_close(user_lookup_fd);
2809
2810         r = acquire_home(context, uid, &home, &home_buffer);
2811         if (r < 0) {
2812                 *exit_status = EXIT_CHDIR;
2813                 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
2814         }
2815
2816         /* If a socket is connected to STDIN/STDOUT/STDERR, we
2817          * must sure to drop O_NONBLOCK */
2818         if (socket_fd >= 0)
2819                 (void) fd_nonblock(socket_fd, false);
2820
2821         r = setup_input(context, params, socket_fd, named_iofds);
2822         if (r < 0) {
2823                 *exit_status = EXIT_STDIN;
2824                 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
2825         }
2826
2827         r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
2828         if (r < 0) {
2829                 *exit_status = EXIT_STDOUT;
2830                 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
2831         }
2832
2833         r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
2834         if (r < 0) {
2835                 *exit_status = EXIT_STDERR;
2836                 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
2837         }
2838
2839         if (params->cgroup_path) {
2840                 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2841                 if (r < 0) {
2842                         *exit_status = EXIT_CGROUP;
2843                         return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path);
2844                 }
2845         }
2846
2847         if (context->oom_score_adjust_set) {
2848                 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
2849
2850                 /* When we can't make this change due to EPERM, then
2851                  * let's silently skip over it. User namespaces
2852                  * prohibit write access to this file, and we
2853                  * shouldn't trip up over that. */
2854
2855                 sprintf(t, "%i", context->oom_score_adjust);
2856                 r = write_string_file("/proc/self/oom_score_adj", t, 0);
2857                 if (IN_SET(r, -EPERM, -EACCES))
2858                         log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
2859                 else if (r < 0) {
2860                         *exit_status = EXIT_OOM_ADJUST;
2861                         return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
2862                 }
2863         }
2864
2865         if (context->nice_set)
2866                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
2867                         *exit_status = EXIT_NICE;
2868                         return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
2869                 }
2870
2871         if (context->cpu_sched_set) {
2872                 struct sched_param param = {
2873                         .sched_priority = context->cpu_sched_priority,
2874                 };
2875
2876                 r = sched_setscheduler(0,
2877                                        context->cpu_sched_policy |
2878                                        (context->cpu_sched_reset_on_fork ?
2879                                         SCHED_RESET_ON_FORK : 0),
2880                                        &param);
2881                 if (r < 0) {
2882                         *exit_status = EXIT_SETSCHEDULER;
2883                         return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
2884                 }
2885         }
2886
2887         if (context->cpuset)
2888                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
2889                         *exit_status = EXIT_CPUAFFINITY;
2890                         return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
2891                 }
2892
2893         if (context->ioprio_set)
2894                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
2895                         *exit_status = EXIT_IOPRIO;
2896                         return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
2897                 }
2898
2899         if (context->timer_slack_nsec != NSEC_INFINITY)
2900                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
2901                         *exit_status = EXIT_TIMERSLACK;
2902                         return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
2903                 }
2904
2905         if (context->personality != PERSONALITY_INVALID) {
2906                 r = safe_personality(context->personality);
2907                 if (r < 0) {
2908                         *exit_status = EXIT_PERSONALITY;
2909                         return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
2910                 }
2911         }
2912
2913         if (context->utmp_id)
2914                 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
2915                                       context->tty_path,
2916                                       context->utmp_mode == EXEC_UTMP_INIT  ? INIT_PROCESS :
2917                                       context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
2918                                       USER_PROCESS,
2919                                       username);
2920
2921         if (context->user) {
2922                 r = chown_terminal(STDIN_FILENO, uid);
2923                 if (r < 0) {
2924                         *exit_status = EXIT_STDIN;
2925                         return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
2926                 }
2927         }
2928
2929         /* If delegation is enabled we'll pass ownership of the cgroup
2930          * (but only in systemd's own controller hierarchy!) to the
2931          * user of the new process. */
2932         if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
2933                 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
2934                 if (r < 0) {
2935                         *exit_status = EXIT_CGROUP;
2936                         return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
2937                 }
2938
2939                 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
2940                 if (r < 0) {
2941                         *exit_status = EXIT_CGROUP;
2942                         return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
2943                 }
2944         }
2945
2946         for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
2947                 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
2948                 if (r < 0)
2949                         return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
2950         }
2951
2952         r = build_environment(
2953                         unit,
2954                         context,
2955                         params,
2956                         n_fds,
2957                         home,
2958                         username,
2959                         shell,
2960                         journal_stream_dev,
2961                         journal_stream_ino,
2962                         &our_env);
2963         if (r < 0) {
2964                 *exit_status = EXIT_MEMORY;
2965                 return log_oom();
2966         }
2967
2968         r = build_pass_environment(context, &pass_env);
2969         if (r < 0) {
2970                 *exit_status = EXIT_MEMORY;
2971                 return log_oom();
2972         }
2973
2974         accum_env = strv_env_merge(5,
2975                                    params->environment,
2976                                    our_env,
2977                                    pass_env,
2978                                    context->environment,
2979                                    files_env,
2980                                    NULL);
2981         if (!accum_env) {
2982                 *exit_status = EXIT_MEMORY;
2983                 return log_oom();
2984         }
2985         accum_env = strv_env_clean(accum_env);
2986
2987         (void) umask(context->umask);
2988
2989         r = setup_keyring(unit, context, params, uid, gid);
2990         if (r < 0) {
2991                 *exit_status = EXIT_KEYRING;
2992                 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
2993         }
2994
2995         /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
2996         needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
2997
2998         /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
2999         needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
3000
3001         /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3002         if (needs_ambient_hack)
3003                 needs_setuid = false;
3004         else
3005                 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3006
3007         if (needs_sandboxing) {
3008                 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3009                  * present. The actual MAC context application will happen later, as late as possible, to avoid
3010                  * impacting our own code paths. */
3011
3012 #if HAVE_SELINUX
3013                 use_selinux = mac_selinux_use();
3014 #endif
3015 #if ENABLE_SMACK
3016                 use_smack = mac_smack_use();
3017 #endif
3018 #if HAVE_APPARMOR
3019                 use_apparmor = mac_apparmor_use();
3020 #endif
3021         }
3022
3023         if (needs_setuid) {
3024                 if (context->pam_name && username) {
3025                         r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3026                         if (r < 0) {
3027                                 *exit_status = EXIT_PAM;
3028                                 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
3029                         }
3030                 }
3031         }
3032
3033         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
3034                 if (ns_type_supported(NAMESPACE_NET)) {
3035                         r = setup_netns(runtime->netns_storage_socket);
3036                         if (r < 0) {
3037                                 *exit_status = EXIT_NETWORK;
3038                                 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3039                         }
3040                 } else
3041                         log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
3042         }
3043
3044         needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
3045         if (needs_mount_namespace) {
3046                 r = apply_mount_namespace(unit, command, context, params, runtime);
3047                 if (r < 0) {
3048                         *exit_status = EXIT_NAMESPACE;
3049                         return log_unit_error_errno(unit, r, "Failed to set up mount namespacing: %m");
3050                 }
3051         }
3052
3053         /* Apply just after mount namespace setup */
3054         r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
3055         if (r < 0)
3056                 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
3057
3058         /* Drop groups as early as possbile */
3059         if (needs_setuid) {
3060                 r = enforce_groups(gid, supplementary_gids, ngids);
3061                 if (r < 0) {
3062                         *exit_status = EXIT_GROUP;
3063                         return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
3064                 }
3065         }
3066
3067         if (needs_sandboxing) {
3068 #if HAVE_SELINUX
3069                 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
3070                         r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3071                         if (r < 0) {
3072                                 *exit_status = EXIT_SELINUX_CONTEXT;
3073                                 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
3074                         }
3075                 }
3076 #endif
3077
3078                 if (context->private_users) {
3079                         r = setup_private_users(uid, gid);
3080                         if (r < 0) {
3081                                 *exit_status = EXIT_USER;
3082                                 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
3083                         }
3084                 }
3085         }
3086
3087         /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
3088          * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
3089          * was needed to upload the policy and can now be closed as well. */
3090         r = close_all_fds(fds, n_fds);
3091         if (r >= 0)
3092                 r = shift_fds(fds, n_fds);
3093         if (r >= 0)
3094                 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
3095         if (r < 0) {
3096                 *exit_status = EXIT_FDS;
3097                 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
3098         }
3099
3100         secure_bits = context->secure_bits;
3101
3102         if (needs_sandboxing) {
3103                 uint64_t bset;
3104
3105                 for (i = 0; i < _RLIMIT_MAX; i++) {
3106
3107                         if (!context->rlimit[i])
3108                                 continue;
3109
3110                         r = setrlimit_closest(i, context->rlimit[i]);
3111                         if (r < 0) {
3112                                 *exit_status = EXIT_LIMITS;
3113                                 return log_unit_error_errno(unit, r, "Failed to adjust resource limit %s: %m", rlimit_to_string(i));
3114                         }
3115                 }
3116
3117                 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
3118                 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3119                         if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3120                                 *exit_status = EXIT_LIMITS;
3121                                 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
3122                         }
3123                 }
3124
3125                 bset = context->capability_bounding_set;
3126                 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3127                  * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3128                  * instead of us doing that */
3129                 if (needs_ambient_hack)
3130                         bset |= (UINT64_C(1) << CAP_SETPCAP) |
3131                                 (UINT64_C(1) << CAP_SETUID) |
3132                                 (UINT64_C(1) << CAP_SETGID);
3133
3134                 if (!cap_test_all(bset)) {
3135                         r = capability_bounding_set_drop(bset, false);
3136                         if (r < 0) {
3137                                 *exit_status = EXIT_CAPABILITIES;
3138                                 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3139                         }
3140                 }
3141
3142                 /* This is done before enforce_user, but ambient set
3143                  * does not survive over setresuid() if keep_caps is not set. */
3144                 if (!needs_ambient_hack &&
3145                     context->capability_ambient_set != 0) {
3146                         r = capability_ambient_set_apply(context->capability_ambient_set, true);
3147                         if (r < 0) {
3148                                 *exit_status = EXIT_CAPABILITIES;
3149                                 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
3150                         }
3151                 }
3152         }
3153
3154         if (needs_setuid) {
3155                 if (context->user) {
3156                         r = enforce_user(context, uid);
3157                         if (r < 0) {
3158                                 *exit_status = EXIT_USER;
3159                                 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
3160                         }
3161
3162                         if (!needs_ambient_hack &&
3163                             context->capability_ambient_set != 0) {
3164
3165                                 /* Fix the ambient capabilities after user change. */
3166                                 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3167                                 if (r < 0) {
3168                                         *exit_status = EXIT_CAPABILITIES;
3169                                         return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
3170                                 }
3171
3172                                 /* If we were asked to change user and ambient capabilities
3173                                  * were requested, we had to add keep-caps to the securebits
3174                                  * so that we would maintain the inherited capability set
3175                                  * through the setresuid(). Make sure that the bit is added
3176                                  * also to the context secure_bits so that we don't try to
3177                                  * drop the bit away next. */
3178
3179                                 secure_bits |= 1<<SECURE_KEEP_CAPS;
3180                         }
3181                 }
3182         }
3183
3184         if (needs_sandboxing) {
3185                 /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
3186                  * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3187                  * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3188                  * are restricted. */
3189
3190 #if HAVE_SELINUX
3191                 if (use_selinux) {
3192                         char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3193
3194                         if (exec_context) {
3195                                 r = setexeccon(exec_context);
3196                                 if (r < 0) {
3197                                         *exit_status = EXIT_SELINUX_CONTEXT;
3198                                         return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
3199                                 }
3200                         }
3201                 }
3202 #endif
3203
3204 #if ENABLE_SMACK
3205                 if (use_smack) {
3206                         r = setup_smack(context, command);
3207                         if (r < 0) {
3208                                 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3209                                 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3210                         }
3211                 }
3212 #endif
3213
3214 #if HAVE_APPARMOR
3215                 if (use_apparmor && context->apparmor_profile) {
3216                         r = aa_change_onexec(context->apparmor_profile);
3217                         if (r < 0 && !context->apparmor_profile_ignore) {
3218                                 *exit_status = EXIT_APPARMOR_PROFILE;
3219                                 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
3220                         }
3221                 }
3222 #endif
3223
3224                 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3225                  * we'll try not to call PR_SET_SECUREBITS unless necessary. */
3226                 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3227                         if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
3228                                 *exit_status = EXIT_SECUREBITS;
3229                                 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
3230                         }
3231
3232                 if (context_has_no_new_privileges(context))
3233                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
3234                                 *exit_status = EXIT_NO_NEW_PRIVILEGES;
3235                                 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
3236                         }
3237
3238 #if HAVE_SECCOMP
3239                 r = apply_address_families(unit, context);
3240                 if (r < 0) {
3241                         *exit_status = EXIT_ADDRESS_FAMILIES;
3242                         return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
3243                 }
3244
3245                 r = apply_memory_deny_write_execute(unit, context);
3246                 if (r < 0) {
3247                         *exit_status = EXIT_SECCOMP;
3248                         return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
3249                 }
3250
3251                 r = apply_restrict_realtime(unit, context);
3252                 if (r < 0) {
3253                         *exit_status = EXIT_SECCOMP;
3254                         return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
3255                 }
3256
3257                 r = apply_restrict_namespaces(unit, context);
3258                 if (r < 0) {
3259                         *exit_status = EXIT_SECCOMP;
3260                         return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
3261                 }
3262
3263                 r = apply_protect_sysctl(unit, context);
3264                 if (r < 0) {
3265                         *exit_status = EXIT_SECCOMP;
3266                         return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
3267                 }
3268
3269                 r = apply_protect_kernel_modules(unit, context);
3270                 if (r < 0) {
3271                         *exit_status = EXIT_SECCOMP;
3272                         return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
3273                 }
3274
3275                 r = apply_private_devices(unit, context);
3276                 if (r < 0) {
3277                         *exit_status = EXIT_SECCOMP;
3278                         return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
3279                 }
3280
3281                 r = apply_syscall_archs(unit, context);
3282                 if (r < 0) {
3283                         *exit_status = EXIT_SECCOMP;
3284                         return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
3285                 }
3286
3287                 r = apply_lock_personality(unit, context);
3288                 if (r < 0) {
3289                         *exit_status = EXIT_SECCOMP;
3290                         return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
3291                 }
3292
3293                 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3294                  * by the filter as little as possible. */
3295                 r = apply_syscall_filter(unit, context, needs_ambient_hack);
3296                 if (r < 0) {
3297                         *exit_status = EXIT_SECCOMP;
3298                         return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
3299                 }
3300 #endif
3301         }
3302
3303         if (!strv_isempty(context->unset_environment)) {
3304                 char **ee = NULL;
3305
3306                 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3307                 if (!ee) {
3308                         *exit_status = EXIT_MEMORY;
3309                         return log_oom();
3310                 }
3311
3312                 strv_free(accum_env);
3313                 accum_env = ee;
3314         }
3315
3316         final_argv = replace_env_argv(argv, accum_env);
3317         if (!final_argv) {
3318                 *exit_status = EXIT_MEMORY;
3319                 return log_oom();
3320         }
3321
3322         if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
3323                 _cleanup_free_ char *line;
3324
3325                 line = exec_command_line(final_argv);
3326                 if (line) {
3327                         log_struct(LOG_DEBUG,
3328                                    "EXECUTABLE=%s", command->path,
3329                                    LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
3330                                    LOG_UNIT_ID(unit),
3331                                    LOG_UNIT_INVOCATION_ID(unit),
3332                                    NULL);
3333                 }
3334         }
3335
3336         execve(command->path, final_argv, accum_env);
3337
3338         if (errno == ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3339
3340                 log_struct_errno(LOG_INFO, errno,
3341                                  "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3342                                  LOG_UNIT_ID(unit),
3343                                  LOG_UNIT_INVOCATION_ID(unit),
3344                                  LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3345                                                   command->path),
3346                                  "EXECUTABLE=%s", command->path,
3347                                  NULL);
3348
3349                 return 0;
3350         }
3351
3352         *exit_status = EXIT_EXEC;
3353         return log_unit_error_errno(unit, errno, "Failed to execute command: %m");
3354 }
3355
3356 int exec_spawn(Unit *unit,
3357                ExecCommand *command,
3358                const ExecContext *context,
3359                const ExecParameters *params,
3360                ExecRuntime *runtime,
3361                DynamicCreds *dcreds,
3362                pid_t *ret) {
3363
3364         _cleanup_strv_free_ char **files_env = NULL;
3365         int *fds = NULL;
3366         unsigned n_storage_fds = 0, n_socket_fds = 0;
3367         _cleanup_free_ char *line = NULL;
3368         int socket_fd, r;
3369         int named_iofds[3] = { -1, -1, -1 };
3370         char **argv;
3371         pid_t pid;
3372
3373         assert(unit);
3374         assert(command);
3375         assert(context);
3376         assert(ret);
3377         assert(params);
3378         assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
3379
3380         if (context->std_input == EXEC_INPUT_SOCKET ||
3381             context->std_output == EXEC_OUTPUT_SOCKET ||
3382             context->std_error == EXEC_OUTPUT_SOCKET) {
3383
3384                 if (params->n_socket_fds > 1) {
3385                         log_unit_error(unit, "Got more than one socket.");
3386                         return -EINVAL;
3387                 }
3388
3389                 if (params->n_socket_fds == 0) {
3390                         log_unit_error(unit, "Got no socket.");
3391                         return -EINVAL;
3392                 }
3393
3394                 socket_fd = params->fds[0];
3395         } else {
3396                 socket_fd = -1;
3397                 fds = params->fds;
3398                 n_storage_fds = params->n_storage_fds;
3399                 n_socket_fds = params->n_socket_fds;
3400         }
3401
3402         r = exec_context_named_iofds(unit, context, params, named_iofds);
3403         if (r < 0)
3404                 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3405
3406         r = exec_context_load_environment(unit, context, &files_env);
3407         if (r < 0)
3408                 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
3409
3410         argv = params->argv ?: command->argv;
3411         line = exec_command_line(argv);
3412         if (!line)
3413                 return log_oom();
3414
3415         log_struct(LOG_DEBUG,
3416                    LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3417                    "EXECUTABLE=%s", command->path,
3418                    LOG_UNIT_ID(unit),
3419                    LOG_UNIT_INVOCATION_ID(unit),
3420                    NULL);
3421
3422         pid = fork();
3423         if (pid < 0)
3424                 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
3425
3426         if (pid == 0) {
3427                 int exit_status = EXIT_SUCCESS;
3428
3429                 r = exec_child(unit,
3430                                command,
3431                                context,
3432                                params,
3433                                runtime,
3434                                dcreds,
3435                                argv,
3436                                socket_fd,
3437                                named_iofds,
3438                                fds,
3439                                n_storage_fds,
3440                                n_socket_fds,
3441                                files_env,
3442                                unit->manager->user_lookup_fds[1],
3443                                &exit_status);
3444
3445                 if (r < 0) {
3446                         log_struct_errno(LOG_ERR, r,
3447                                          "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3448                                          LOG_UNIT_ID(unit),
3449                                          LOG_UNIT_INVOCATION_ID(unit),
3450                                          LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3451                                                           exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3452                                                           command->path),
3453                                          "EXECUTABLE=%s", command->path,
3454                                          NULL);
3455                 }
3456
3457                 _exit(exit_status);
3458         }
3459
3460         log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
3461
3462         /* We add the new process to the cgroup both in the child (so
3463          * that we can be sure that no user code is ever executed
3464          * outside of the cgroup) and in the parent (so that we can be
3465          * sure that when we kill the cgroup the process will be
3466          * killed too). */
3467         if (params->cgroup_path)
3468                 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
3469
3470         exec_status_start(&command->exec_status, pid);
3471
3472         *ret = pid;
3473         return 0;
3474 }
3475
3476 void exec_context_init(ExecContext *c) {
3477         ExecDirectoryType i;
3478
3479         assert(c);
3480
3481         c->umask = 0022;
3482         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
3483         c->cpu_sched_policy = SCHED_OTHER;
3484         c->syslog_priority = LOG_DAEMON|LOG_INFO;
3485         c->syslog_level_prefix = true;
3486         c->ignore_sigpipe = true;
3487         c->timer_slack_nsec = NSEC_INFINITY;
3488         c->personality = PERSONALITY_INVALID;
3489         for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3490                 c->directories[i].mode = 0755;
3491         c->capability_bounding_set = CAP_ALL;
3492         c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
3493         c->log_level_max = -1;
3494 }
3495
3496 void exec_context_done(ExecContext *c) {
3497         ExecDirectoryType i;
3498         size_t l;
3499
3500         assert(c);
3501
3502         c->environment = strv_free(c->environment);
3503         c->environment_files = strv_free(c->environment_files);
3504         c->pass_environment = strv_free(c->pass_environment);
3505         c->unset_environment = strv_free(c->unset_environment);
3506
3507         for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
3508                 c->rlimit[l] = mfree(c->rlimit[l]);
3509
3510         for (l = 0; l < 3; l++)
3511                 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
3512
3513         c->working_directory = mfree(c->working_directory);
3514         c->root_directory = mfree(c->root_directory);
3515         c->root_image = mfree(c->root_image);
3516         c->tty_path = mfree(c->tty_path);
3517         c->syslog_identifier = mfree(c->syslog_identifier);
3518         c->user = mfree(c->user);
3519         c->group = mfree(c->group);
3520
3521         c->supplementary_groups = strv_free(c->supplementary_groups);
3522
3523         c->pam_name = mfree(c->pam_name);
3524
3525         c->read_only_paths = strv_free(c->read_only_paths);
3526         c->read_write_paths = strv_free(c->read_write_paths);
3527         c->inaccessible_paths = strv_free(c->inaccessible_paths);
3528
3529         bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
3530
3531         if (c->cpuset)
3532                 CPU_FREE(c->cpuset);
3533
3534         c->utmp_id = mfree(c->utmp_id);
3535         c->selinux_context = mfree(c->selinux_context);
3536         c->apparmor_profile = mfree(c->apparmor_profile);
3537         c->smack_process_label = mfree(c->smack_process_label);
3538
3539         c->syscall_filter = hashmap_free(c->syscall_filter);
3540         c->syscall_archs = set_free(c->syscall_archs);
3541         c->address_families = set_free(c->address_families);
3542
3543         for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3544                 c->directories[i].paths = strv_free(c->directories[i].paths);
3545
3546         c->log_level_max = -1;
3547
3548         exec_context_free_log_extra_fields(c);
3549
3550         c->stdin_data = mfree(c->stdin_data);
3551         c->stdin_data_size = 0;
3552 }
3553
3554 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
3555         char **i;
3556
3557         assert(c);
3558
3559         if (!runtime_prefix)
3560                 return 0;
3561
3562         STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
3563                 _cleanup_free_ char *p;
3564
3565                 p = strjoin(runtime_prefix, "/", *i);
3566                 if (!p)
3567                         return -ENOMEM;
3568
3569                 /* We execute this synchronously, since we need to be sure this is gone when we start the service
3570                  * next. */
3571                 (void) rm_rf(p, REMOVE_ROOT);
3572         }
3573
3574         return 0;
3575 }
3576
3577 void exec_command_done(ExecCommand *c) {
3578         assert(c);
3579
3580         c->path = mfree(c->path);
3581
3582         c->argv = strv_free(c->argv);
3583 }
3584
3585 void exec_command_done_array(ExecCommand *c, unsigned n) {
3586         unsigned i;
3587
3588         for (i = 0; i < n; i++)
3589                 exec_command_done(c+i);
3590 }
3591
3592 ExecCommand* exec_command_free_list(ExecCommand *c) {
3593         ExecCommand *i;
3594
3595         while ((i = c)) {
3596                 LIST_REMOVE(command, c, i);
3597                 exec_command_done(i);
3598                 free(i);
3599         }
3600
3601         return NULL;
3602 }
3603
3604 void exec_command_free_array(ExecCommand **c, unsigned n) {
3605         unsigned i;
3606
3607         for (i = 0; i < n; i++)
3608                 c[i] = exec_command_free_list(c[i]);
3609 }
3610
3611 typedef struct InvalidEnvInfo {
3612         Unit *unit;
3613         const char *path;
3614 } InvalidEnvInfo;
3615
3616 static void invalid_env(const char *p, void *userdata) {
3617         InvalidEnvInfo *info = userdata;
3618
3619         log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
3620 }
3621
3622 const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3623         assert(c);
3624
3625         switch (fd_index) {
3626
3627         case STDIN_FILENO:
3628                 if (c->std_input != EXEC_INPUT_NAMED_FD)
3629                         return NULL;
3630
3631                 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
3632
3633         case STDOUT_FILENO:
3634                 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3635                         return NULL;
3636
3637                 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
3638
3639         case STDERR_FILENO:
3640                 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3641                         return NULL;
3642
3643                 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
3644
3645         default:
3646                 return NULL;
3647         }
3648 }
3649
3650 int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
3651         unsigned i, targets;
3652         const char* stdio_fdname[3];
3653         unsigned n_fds;
3654
3655         assert(c);
3656         assert(p);
3657
3658         targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3659                   (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3660                   (c->std_error == EXEC_OUTPUT_NAMED_FD);
3661
3662         for (i = 0; i < 3; i++)
3663                 stdio_fdname[i] = exec_context_fdname(c, i);
3664
3665         n_fds = p->n_storage_fds + p->n_socket_fds;
3666
3667         for (i = 0; i < n_fds  && targets > 0; i++)
3668                 if (named_iofds[STDIN_FILENO] < 0 &&
3669                     c->std_input == EXEC_INPUT_NAMED_FD &&
3670                     stdio_fdname[STDIN_FILENO] &&
3671                     streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3672
3673                         named_iofds[STDIN_FILENO] = p->fds[i];
3674                         targets--;
3675
3676                 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3677                            c->std_output == EXEC_OUTPUT_NAMED_FD &&
3678                            stdio_fdname[STDOUT_FILENO] &&
3679                            streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3680
3681                         named_iofds[STDOUT_FILENO] = p->fds[i];
3682                         targets--;
3683
3684                 } else if (named_iofds[STDERR_FILENO] < 0 &&
3685                            c->std_error == EXEC_OUTPUT_NAMED_FD &&
3686                            stdio_fdname[STDERR_FILENO] &&
3687                            streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3688
3689                         named_iofds[STDERR_FILENO] = p->fds[i];
3690                         targets--;
3691                 }
3692
3693         return targets == 0 ? 0 : -ENOENT;
3694 }
3695
3696 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
3697         char **i, **r = NULL;
3698
3699         assert(c);
3700         assert(l);
3701
3702         STRV_FOREACH(i, c->environment_files) {
3703                 char *fn;
3704                 int k;
3705                 unsigned n;
3706                 bool ignore = false;
3707                 char **p;
3708                 _cleanup_globfree_ glob_t pglob = {};
3709
3710                 fn = *i;
3711
3712                 if (fn[0] == '-') {
3713                         ignore = true;
3714                         fn++;
3715                 }
3716
3717                 if (!path_is_absolute(fn)) {
3718                         if (ignore)
3719                                 continue;
3720
3721                         strv_free(r);
3722                         return -EINVAL;
3723                 }
3724
3725                 /* Filename supports globbing, take all matching files */
3726                 k = safe_glob(fn, 0, &pglob);
3727                 if (k < 0) {
3728                         if (ignore)
3729                                 continue;
3730
3731                         strv_free(r);
3732                         return k;
3733                 }
3734
3735                 /* When we don't match anything, -ENOENT should be returned */
3736                 assert(pglob.gl_pathc > 0);
3737
3738                 for (n = 0; n < pglob.gl_pathc; n++) {
3739                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
3740                         if (k < 0) {
3741                                 if (ignore)
3742                                         continue;
3743
3744                                 strv_free(r);
3745                                 return k;
3746                         }
3747                         /* Log invalid environment variables with filename */
3748                         if (p) {
3749                                 InvalidEnvInfo info = {
3750                                         .unit = unit,
3751                                         .path = pglob.gl_pathv[n]
3752                                 };
3753
3754                                 p = strv_env_clean_with_callback(p, invalid_env, &info);
3755                         }
3756
3757                         if (r == NULL)
3758                                 r = p;
3759                         else {
3760                                 char **m;
3761
3762                                 m = strv_env_merge(2, r, p);
3763                                 strv_free(r);
3764                                 strv_free(p);
3765                                 if (!m)
3766                                         return -ENOMEM;
3767
3768                                 r = m;
3769                         }
3770                 }
3771         }
3772
3773         *l = r;
3774
3775         return 0;
3776 }
3777
3778 static bool tty_may_match_dev_console(const char *tty) {
3779         _cleanup_free_ char *active = NULL;
3780         char *console;
3781
3782         if (!tty)
3783                 return true;
3784
3785         tty = skip_dev_prefix(tty);
3786
3787         /* trivial identity? */
3788         if (streq(tty, "console"))
3789                 return true;
3790
3791         console = resolve_dev_console(&active);
3792         /* if we could not resolve, assume it may */
3793         if (!console)
3794                 return true;
3795
3796         /* "tty0" means the active VC, so it may be the same sometimes */
3797         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
3798 }
3799
3800 bool exec_context_may_touch_console(ExecContext *ec) {
3801
3802         return (ec->tty_reset ||
3803                 ec->tty_vhangup ||
3804                 ec->tty_vt_disallocate ||
3805                 is_terminal_input(ec->std_input) ||
3806                 is_terminal_output(ec->std_output) ||
3807                 is_terminal_output(ec->std_error)) &&
3808                tty_may_match_dev_console(exec_context_tty_path(ec));
3809 }
3810
3811 static void strv_fprintf(FILE *f, char **l) {
3812         char **g;
3813
3814         assert(f);
3815
3816         STRV_FOREACH(g, l)
3817                 fprintf(f, " %s", *g);
3818 }
3819
3820 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
3821         ExecDirectoryType dt;
3822         char **e, **d;
3823         unsigned i;
3824         int r;
3825
3826         assert(c);
3827         assert(f);
3828
3829         prefix = strempty(prefix);
3830
3831         fprintf(f,
3832                 "%sUMask: %04o\n"
3833                 "%sWorkingDirectory: %s\n"
3834                 "%sRootDirectory: %s\n"
3835                 "%sNonBlocking: %s\n"
3836                 "%sPrivateTmp: %s\n"
3837                 "%sPrivateDevices: %s\n"
3838                 "%sProtectKernelTunables: %s\n"
3839                 "%sProtectKernelModules: %s\n"
3840                 "%sProtectControlGroups: %s\n"
3841                 "%sPrivateNetwork: %s\n"
3842                 "%sPrivateUsers: %s\n"
3843                 "%sProtectHome: %s\n"
3844                 "%sProtectSystem: %s\n"
3845                 "%sMountAPIVFS: %s\n"
3846                 "%sIgnoreSIGPIPE: %s\n"
3847                 "%sMemoryDenyWriteExecute: %s\n"
3848                 "%sRestrictRealtime: %s\n"
3849                 "%sKeyringMode: %s\n",
3850                 prefix, c->umask,
3851                 prefix, c->working_directory ? c->working_directory : "/",
3852                 prefix, c->root_directory ? c->root_directory : "/",
3853                 prefix, yes_no(c->non_blocking),
3854                 prefix, yes_no(c->private_tmp),
3855                 prefix, yes_no(c->private_devices),
3856                 prefix, yes_no(c->protect_kernel_tunables),
3857                 prefix, yes_no(c->protect_kernel_modules),
3858                 prefix, yes_no(c->protect_control_groups),
3859                 prefix, yes_no(c->private_network),
3860                 prefix, yes_no(c->private_users),
3861                 prefix, protect_home_to_string(c->protect_home),
3862                 prefix, protect_system_to_string(c->protect_system),
3863                 prefix, yes_no(c->mount_apivfs),
3864                 prefix, yes_no(c->ignore_sigpipe),
3865                 prefix, yes_no(c->memory_deny_write_execute),
3866                 prefix, yes_no(c->restrict_realtime),
3867                 prefix, exec_keyring_mode_to_string(c->keyring_mode));
3868
3869         if (c->root_image)
3870                 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
3871
3872         STRV_FOREACH(e, c->environment)
3873                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3874
3875         STRV_FOREACH(e, c->environment_files)
3876                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
3877
3878         STRV_FOREACH(e, c->pass_environment)
3879                 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3880
3881         STRV_FOREACH(e, c->unset_environment)
3882                 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
3883
3884         fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
3885
3886         for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3887                 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
3888
3889                 STRV_FOREACH(d, c->directories[dt].paths)
3890                         fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
3891         }
3892
3893         if (c->nice_set)
3894                 fprintf(f,
3895                         "%sNice: %i\n",
3896                         prefix, c->nice);
3897
3898         if (c->oom_score_adjust_set)
3899                 fprintf(f,
3900                         "%sOOMScoreAdjust: %i\n",
3901                         prefix, c->oom_score_adjust);
3902
3903         for (i = 0; i < RLIM_NLIMITS; i++)
3904                 if (c->rlimit[i]) {
3905                         fprintf(f, "%s%s: " RLIM_FMT "\n",
3906                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
3907                         fprintf(f, "%s%sSoft: " RLIM_FMT "\n",
3908                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3909                 }
3910
3911         if (c->ioprio_set) {
3912                 _cleanup_free_ char *class_str = NULL;
3913
3914                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
3915                 if (r >= 0)
3916                         fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
3917
3918                 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
3919         }
3920
3921         if (c->cpu_sched_set) {
3922                 _cleanup_free_ char *policy_str = NULL;
3923
3924                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
3925                 if (r >= 0)
3926                         fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
3927
3928                 fprintf(f,
3929                         "%sCPUSchedulingPriority: %i\n"
3930                         "%sCPUSchedulingResetOnFork: %s\n",
3931                         prefix, c->cpu_sched_priority,
3932                         prefix, yes_no(c->cpu_sched_reset_on_fork));
3933         }
3934
3935         if (c->cpuset) {
3936                 fprintf(f, "%sCPUAffinity:", prefix);
3937                 for (i = 0; i < c->cpuset_ncpus; i++)
3938                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
3939                                 fprintf(f, " %u", i);
3940                 fputs("\n", f);
3941         }
3942
3943         if (c->timer_slack_nsec != NSEC_INFINITY)
3944                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
3945
3946         fprintf(f,
3947                 "%sStandardInput: %s\n"
3948                 "%sStandardOutput: %s\n"
3949                 "%sStandardError: %s\n",
3950                 prefix, exec_input_to_string(c->std_input),
3951                 prefix, exec_output_to_string(c->std_output),
3952                 prefix, exec_output_to_string(c->std_error));
3953
3954         if (c->tty_path)
3955                 fprintf(f,
3956                         "%sTTYPath: %s\n"
3957                         "%sTTYReset: %s\n"
3958                         "%sTTYVHangup: %s\n"
3959                         "%sTTYVTDisallocate: %s\n",
3960                         prefix, c->tty_path,
3961                         prefix, yes_no(c->tty_reset),
3962                         prefix, yes_no(c->tty_vhangup),
3963                         prefix, yes_no(c->tty_vt_disallocate));
3964
3965         if (IN_SET(c->std_output,
3966                    EXEC_OUTPUT_SYSLOG,
3967                    EXEC_OUTPUT_KMSG,
3968                    EXEC_OUTPUT_JOURNAL,
3969                    EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3970                    EXEC_OUTPUT_KMSG_AND_CONSOLE,
3971                    EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
3972             IN_SET(c->std_error,
3973                    EXEC_OUTPUT_SYSLOG,
3974                    EXEC_OUTPUT_KMSG,
3975                    EXEC_OUTPUT_JOURNAL,
3976                    EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3977                    EXEC_OUTPUT_KMSG_AND_CONSOLE,
3978                    EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
3979
3980                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
3981
3982                 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
3983                 if (r >= 0)
3984                         fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
3985
3986                 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
3987                 if (r >= 0)
3988                         fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
3989         }
3990
3991         if (c->log_level_max >= 0) {
3992                 _cleanup_free_ char *t = NULL;
3993
3994                 (void) log_level_to_string_alloc(c->log_level_max, &t);
3995
3996                 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
3997         }
3998
3999         if (c->n_log_extra_fields > 0) {
4000                 size_t j;
4001
4002                 for (j = 0; j < c->n_log_extra_fields; j++) {
4003                         fprintf(f, "%sLogExtraFields: ", prefix);
4004                         fwrite(c->log_extra_fields[j].iov_base,
4005                                1, c->log_extra_fields[j].iov_len,
4006                                f);
4007                         fputc('\n', f);
4008                 }
4009         }
4010
4011         if (c->secure_bits) {
4012                 _cleanup_free_ char *str = NULL;
4013
4014                 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4015                 if (r >= 0)
4016                         fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4017         }
4018
4019         if (c->capability_bounding_set != CAP_ALL) {
4020                 _cleanup_free_ char *str = NULL;
4021
4022                 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4023                 if (r >= 0)
4024                         fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
4025         }
4026
4027         if (c->capability_ambient_set != 0) {
4028                 _cleanup_free_ char *str = NULL;
4029
4030                 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4031                 if (r >= 0)
4032                         fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
4033         }
4034
4035         if (c->user)
4036                 fprintf(f, "%sUser: %s\n", prefix, c->user);
4037         if (c->group)
4038                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
4039
4040         fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4041
4042         if (!strv_isempty(c->supplementary_groups)) {
4043                 fprintf(f, "%sSupplementaryGroups:", prefix);
4044                 strv_fprintf(f, c->supplementary_groups);
4045                 fputs("\n", f);
4046         }
4047
4048         if (c->pam_name)
4049                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
4050
4051         if (strv_length(c->read_write_paths) > 0) {
4052                 fprintf(f, "%sReadWritePaths:", prefix);
4053                 strv_fprintf(f, c->read_write_paths);
4054                 fputs("\n", f);
4055         }
4056
4057         if (strv_length(c->read_only_paths) > 0) {
4058                 fprintf(f, "%sReadOnlyPaths:", prefix);
4059                 strv_fprintf(f, c->read_only_paths);
4060                 fputs("\n", f);
4061         }
4062
4063         if (strv_length(c->inaccessible_paths) > 0) {
4064                 fprintf(f, "%sInaccessiblePaths:", prefix);
4065                 strv_fprintf(f, c->inaccessible_paths);
4066                 fputs("\n", f);
4067         }
4068
4069         if (c->n_bind_mounts > 0)
4070                 for (i = 0; i < c->n_bind_mounts; i++) {
4071                         fprintf(f, "%s%s: %s:%s:%s\n", prefix,
4072                                 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4073                                 c->bind_mounts[i].source,
4074                                 c->bind_mounts[i].destination,
4075                                 c->bind_mounts[i].recursive ? "rbind" : "norbind");
4076                 }
4077
4078         if (c->utmp_id)
4079                 fprintf(f,
4080                         "%sUtmpIdentifier: %s\n",
4081                         prefix, c->utmp_id);
4082
4083         if (c->selinux_context)
4084                 fprintf(f,
4085                         "%sSELinuxContext: %s%s\n",
4086                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
4087
4088         if (c->apparmor_profile)
4089                 fprintf(f,
4090                         "%sAppArmorProfile: %s%s\n",
4091                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4092
4093         if (c->smack_process_label)
4094                 fprintf(f,
4095                         "%sSmackProcessLabel: %s%s\n",
4096                         prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4097
4098         if (c->personality != PERSONALITY_INVALID)
4099                 fprintf(f,
4100                         "%sPersonality: %s\n",
4101                         prefix, strna(personality_to_string(c->personality)));
4102
4103         fprintf(f,
4104                 "%sLockPersonality: %s\n",
4105                 prefix, yes_no(c->lock_personality));
4106
4107         if (c->syscall_filter) {
4108 #if HAVE_SECCOMP
4109                 Iterator j;
4110                 void *id, *val;
4111                 bool first = true;
4112 #endif
4113
4114                 fprintf(f,
4115                         "%sSystemCallFilter: ",
4116                         prefix);
4117
4118                 if (!c->syscall_whitelist)
4119                         fputc('~', f);
4120
4121 #if HAVE_SECCOMP
4122                 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
4123                         _cleanup_free_ char *name = NULL;
4124                         const char *errno_name = NULL;
4125                         int num = PTR_TO_INT(val);
4126
4127                         if (first)
4128                                 first = false;
4129                         else
4130                                 fputc(' ', f);
4131
4132                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
4133                         fputs(strna(name), f);
4134
4135                         if (num >= 0) {
4136                                 errno_name = errno_to_name(num);
4137                                 if (errno_name)
4138                                         fprintf(f, ":%s", errno_name);
4139                                 else
4140                                         fprintf(f, ":%d", num);
4141                         }
4142                 }
4143 #endif
4144
4145                 fputc('\n', f);
4146         }
4147
4148         if (c->syscall_archs) {
4149 #if HAVE_SECCOMP
4150                 Iterator j;
4151                 void *id;
4152 #endif
4153
4154                 fprintf(f,
4155                         "%sSystemCallArchitectures:",
4156                         prefix);
4157
4158 #if HAVE_SECCOMP
4159                 SET_FOREACH(id, c->syscall_archs, j)
4160                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4161 #endif
4162                 fputc('\n', f);
4163         }
4164
4165         if (exec_context_restrict_namespaces_set(c)) {
4166                 _cleanup_free_ char *s = NULL;
4167
4168                 r = namespace_flag_to_string_many(c->restrict_namespaces, &s);
4169                 if (r >= 0)
4170                         fprintf(f, "%sRestrictNamespaces: %s\n",
4171                                 prefix, s);
4172         }
4173
4174         if (c->syscall_errno > 0) {
4175                 const char *errno_name;
4176
4177                 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4178
4179                 errno_name = errno_to_name(c->syscall_errno);
4180                 if (errno_name)
4181                         fprintf(f, "%s\n", errno_name);
4182                 else
4183                         fprintf(f, "%d\n", c->syscall_errno);
4184         }
4185
4186         if (c->apparmor_profile)
4187                 fprintf(f,
4188                         "%sAppArmorProfile: %s%s\n",
4189                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4190 }
4191
4192 bool exec_context_maintains_privileges(ExecContext *c) {
4193         assert(c);
4194
4195         /* Returns true if the process forked off would run under
4196          * an unchanged UID or as root. */
4197
4198         if (!c->user)
4199                 return true;
4200
4201         if (streq(c->user, "root") || streq(c->user, "0"))
4202                 return true;
4203
4204         return false;
4205 }
4206
4207 int exec_context_get_effective_ioprio(ExecContext *c) {
4208         int p;
4209
4210         assert(c);
4211
4212         if (c->ioprio_set)
4213                 return c->ioprio;
4214
4215         p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4216         if (p < 0)
4217                 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4218
4219         return p;
4220 }
4221
4222 void exec_context_free_log_extra_fields(ExecContext *c) {
4223         size_t l;
4224
4225         assert(c);
4226
4227         for (l = 0; l < c->n_log_extra_fields; l++)
4228                 free(c->log_extra_fields[l].iov_base);
4229         c->log_extra_fields = mfree(c->log_extra_fields);
4230         c->n_log_extra_fields = 0;
4231 }
4232
4233 void exec_status_start(ExecStatus *s, pid_t pid) {
4234         assert(s);
4235
4236         zero(*s);
4237         s->pid = pid;
4238         dual_timestamp_get(&s->start_timestamp);
4239 }
4240
4241 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
4242         assert(s);
4243
4244         if (s->pid && s->pid != pid)
4245                 zero(*s);
4246
4247         s->pid = pid;
4248         dual_timestamp_get(&s->exit_timestamp);
4249
4250         s->code = code;
4251         s->status = status;
4252
4253         if (context) {
4254                 if (context->utmp_id)
4255                         utmp_put_dead_process(context->utmp_id, pid, code, status);
4256
4257                 exec_context_tty_reset(context, NULL);
4258         }
4259 }
4260
4261 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
4262         char buf[FORMAT_TIMESTAMP_MAX];
4263
4264         assert(s);
4265         assert(f);
4266
4267         if (s->pid <= 0)
4268                 return;
4269
4270         prefix = strempty(prefix);
4271
4272         fprintf(f,
4273                 "%sPID: "PID_FMT"\n",
4274                 prefix, s->pid);
4275
4276         if (dual_timestamp_is_set(&s->start_timestamp))
4277                 fprintf(f,
4278                         "%sStart Timestamp: %s\n",
4279                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
4280
4281         if (dual_timestamp_is_set(&s->exit_timestamp))
4282                 fprintf(f,
4283                         "%sExit Timestamp: %s\n"
4284                         "%sExit Code: %s\n"
4285                         "%sExit Status: %i\n",
4286                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
4287                         prefix, sigchld_code_to_string(s->code),
4288                         prefix, s->status);
4289 }
4290
4291 char *exec_command_line(char **argv) {
4292         size_t k;
4293         char *n, *p, **a;
4294         bool first = true;
4295
4296         assert(argv);
4297
4298         k = 1;
4299         STRV_FOREACH(a, argv)
4300                 k += strlen(*a)+3;
4301
4302         n = new(char, k);
4303         if (!n)
4304                 return NULL;
4305
4306         p = n;
4307         STRV_FOREACH(a, argv) {
4308
4309                 if (!first)
4310                         *(p++) = ' ';
4311                 else
4312                         first = false;
4313
4314                 if (strpbrk(*a, WHITESPACE)) {
4315                         *(p++) = '\'';
4316                         p = stpcpy(p, *a);
4317                         *(p++) = '\'';
4318                 } else
4319                         p = stpcpy(p, *a);
4320
4321         }
4322
4323         *p = 0;
4324
4325         /* FIXME: this doesn't really handle arguments that have
4326          * spaces and ticks in them */
4327
4328         return n;
4329 }
4330
4331 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
4332         _cleanup_free_ char *cmd = NULL;
4333         const char *prefix2;
4334
4335         assert(c);
4336         assert(f);
4337
4338         prefix = strempty(prefix);
4339         prefix2 = strjoina(prefix, "\t");
4340
4341         cmd = exec_command_line(c->argv);
4342         fprintf(f,
4343                 "%sCommand Line: %s\n",
4344                 prefix, cmd ? cmd : strerror(ENOMEM));
4345
4346         exec_status_dump(&c->exec_status, f, prefix2);
4347 }
4348
4349 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4350         assert(f);
4351
4352         prefix = strempty(prefix);
4353
4354         LIST_FOREACH(command, c, c)
4355                 exec_command_dump(c, f, prefix);
4356 }
4357
4358 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4359         ExecCommand *end;
4360
4361         assert(l);
4362         assert(e);
4363
4364         if (*l) {
4365                 /* It's kind of important, that we keep the order here */
4366                 LIST_FIND_TAIL(command, *l, end);
4367                 LIST_INSERT_AFTER(command, *l, end, e);
4368         } else
4369               *l = e;
4370 }
4371
4372 int exec_command_set(ExecCommand *c, const char *path, ...) {
4373         va_list ap;
4374         char **l, *p;
4375
4376         assert(c);
4377         assert(path);
4378
4379         va_start(ap, path);
4380         l = strv_new_ap(path, ap);
4381         va_end(ap);
4382
4383         if (!l)
4384                 return -ENOMEM;
4385
4386         p = strdup(path);
4387         if (!p) {
4388                 strv_free(l);
4389                 return -ENOMEM;
4390         }
4391
4392         free(c->path);
4393         c->path = p;
4394
4395         strv_free(c->argv);
4396         c->argv = l;
4397
4398         return 0;
4399 }
4400
4401 int exec_command_append(ExecCommand *c, const char *path, ...) {
4402         _cleanup_strv_free_ char **l = NULL;
4403         va_list ap;
4404         int r;
4405
4406         assert(c);
4407         assert(path);
4408
4409         va_start(ap, path);
4410         l = strv_new_ap(path, ap);
4411         va_end(ap);
4412
4413         if (!l)
4414                 return -ENOMEM;
4415
4416         r = strv_extend_strv(&c->argv, l, false);
4417         if (r < 0)
4418                 return r;
4419
4420         return 0;
4421 }
4422
4423
4424 static int exec_runtime_allocate(ExecRuntime **rt) {
4425
4426         if (*rt)
4427                 return 0;
4428
4429         *rt = new0(ExecRuntime, 1);
4430         if (!*rt)
4431                 return -ENOMEM;
4432
4433         (*rt)->n_ref = 1;
4434         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
4435
4436         return 0;
4437 }
4438
4439 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
4440         int r;
4441
4442         assert(rt);
4443         assert(c);
4444         assert(id);
4445
4446         if (*rt)
4447                 return 1;
4448
4449         if (!c->private_network && !c->private_tmp)
4450                 return 0;
4451
4452         r = exec_runtime_allocate(rt);
4453         if (r < 0)
4454                 return r;
4455
4456         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
4457                 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
4458                         return -errno;
4459         }
4460
4461         if (c->private_tmp && !(*rt)->tmp_dir) {
4462                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
4463                 if (r < 0)
4464                         return r;
4465         }
4466
4467         return 1;
4468 }
4469
4470 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
4471         assert(r);
4472         assert(r->n_ref > 0);
4473
4474         r->n_ref++;
4475         return r;
4476 }
4477
4478 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
4479
4480         if (!r)
4481                 return NULL;
4482
4483         assert(r->n_ref > 0);
4484
4485         r->n_ref--;
4486         if (r->n_ref > 0)
4487                 return NULL;
4488
4489         free(r->tmp_dir);
4490         free(r->var_tmp_dir);
4491         safe_close_pair(r->netns_storage_socket);
4492         return mfree(r);
4493 }
4494
4495 int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
4496         assert(u);
4497         assert(f);
4498         assert(fds);
4499
4500         if (!rt)
4501                 return 0;
4502
4503         if (rt->tmp_dir)
4504                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
4505
4506         if (rt->var_tmp_dir)
4507                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
4508
4509         if (rt->netns_storage_socket[0] >= 0) {
4510                 int copy;
4511
4512                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4513                 if (copy < 0)
4514                         return copy;
4515
4516                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
4517         }
4518
4519         if (rt->netns_storage_socket[1] >= 0) {
4520                 int copy;
4521
4522                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4523                 if (copy < 0)
4524                         return copy;
4525
4526                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
4527         }
4528
4529         return 0;
4530 }
4531
4532 int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
4533         int r;
4534
4535         assert(rt);
4536         assert(key);
4537         assert(value);
4538
4539         if (streq(key, "tmp-dir")) {
4540                 char *copy;
4541
4542                 r = exec_runtime_allocate(rt);
4543                 if (r < 0)
4544                         return log_oom();
4545
4546                 copy = strdup(value);
4547                 if (!copy)
4548                         return log_oom();
4549
4550                 free((*rt)->tmp_dir);
4551                 (*rt)->tmp_dir = copy;
4552
4553         } else if (streq(key, "var-tmp-dir")) {
4554                 char *copy;
4555
4556                 r = exec_runtime_allocate(rt);
4557                 if (r < 0)
4558                         return log_oom();
4559
4560                 copy = strdup(value);
4561                 if (!copy)
4562                         return log_oom();
4563
4564                 free((*rt)->var_tmp_dir);
4565                 (*rt)->var_tmp_dir = copy;
4566
4567         } else if (streq(key, "netns-socket-0")) {
4568                 int fd;
4569
4570                 r = exec_runtime_allocate(rt);
4571                 if (r < 0)
4572                         return log_oom();
4573
4574                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
4575                         log_unit_debug(u, "Failed to parse netns socket value: %s", value);
4576                 else {
4577                         safe_close((*rt)->netns_storage_socket[0]);
4578                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
4579                 }
4580         } else if (streq(key, "netns-socket-1")) {
4581                 int fd;
4582
4583                 r = exec_runtime_allocate(rt);
4584                 if (r < 0)
4585                         return log_oom();
4586
4587                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
4588                         log_unit_debug(u, "Failed to parse netns socket value: %s", value);
4589                 else {
4590                         safe_close((*rt)->netns_storage_socket[1]);
4591                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
4592                 }
4593         } else
4594                 return 0;
4595
4596         return 1;
4597 }
4598
4599 static void *remove_tmpdir_thread(void *p) {
4600         _cleanup_free_ char *path = p;
4601
4602         (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
4603         return NULL;
4604 }
4605
4606 void exec_runtime_destroy(ExecRuntime *rt) {
4607         int r;
4608
4609         if (!rt)
4610                 return;
4611
4612         /* If there are multiple users of this, let's leave the stuff around */
4613         if (rt->n_ref > 1)
4614                 return;
4615
4616         if (rt->tmp_dir) {
4617                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
4618
4619                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4620                 if (r < 0) {
4621                         log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
4622                         free(rt->tmp_dir);
4623                 }
4624
4625                 rt->tmp_dir = NULL;
4626         }
4627
4628         if (rt->var_tmp_dir) {
4629                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
4630
4631                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4632                 if (r < 0) {
4633                         log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
4634                         free(rt->var_tmp_dir);
4635                 }
4636
4637                 rt->var_tmp_dir = NULL;
4638         }
4639
4640         safe_close_pair(rt->netns_storage_socket);
4641 }
4642
4643 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
4644         [EXEC_INPUT_NULL] = "null",
4645         [EXEC_INPUT_TTY] = "tty",
4646         [EXEC_INPUT_TTY_FORCE] = "tty-force",
4647         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
4648         [EXEC_INPUT_SOCKET] = "socket",
4649         [EXEC_INPUT_NAMED_FD] = "fd",
4650         [EXEC_INPUT_DATA] = "data",
4651 };
4652
4653 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4654
4655 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
4656         [EXEC_OUTPUT_INHERIT] = "inherit",
4657         [EXEC_OUTPUT_NULL] = "null",
4658         [EXEC_OUTPUT_TTY] = "tty",
4659         [EXEC_OUTPUT_SYSLOG] = "syslog",
4660         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
4661         [EXEC_OUTPUT_KMSG] = "kmsg",
4662         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
4663         [EXEC_OUTPUT_JOURNAL] = "journal",
4664         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
4665         [EXEC_OUTPUT_SOCKET] = "socket",
4666         [EXEC_OUTPUT_NAMED_FD] = "fd",
4667 };
4668
4669 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
4670
4671 static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4672         [EXEC_UTMP_INIT] = "init",
4673         [EXEC_UTMP_LOGIN] = "login",
4674         [EXEC_UTMP_USER] = "user",
4675 };
4676
4677 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
4678
4679 static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
4680         [EXEC_PRESERVE_NO] = "no",
4681         [EXEC_PRESERVE_YES] = "yes",
4682         [EXEC_PRESERVE_RESTART] = "restart",
4683 };
4684
4685 DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
4686
4687 static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
4688         [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
4689         [EXEC_DIRECTORY_STATE] = "StateDirectory",
4690         [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
4691         [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
4692         [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
4693 };
4694
4695 DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
4696
4697 static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
4698         [EXEC_KEYRING_INHERIT] = "inherit",
4699         [EXEC_KEYRING_PRIVATE] = "private",
4700         [EXEC_KEYRING_SHARED] = "shared",
4701 };
4702
4703 DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);