]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
bash-completion: move shell-completion for log-level or friends to systemctl
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
a7334b09 2
034c6ed7
LP
3#include <errno.h>
4#include <fcntl.h>
8dd4c05b 5#include <poll.h>
d251207d 6#include <sys/eventfd.h>
f5947a5e 7#include <sys/ioctl.h>
f3e43635 8#include <sys/mman.h>
8dd4c05b 9#include <sys/personality.h>
94f04347 10#include <sys/prctl.h>
d2ffa389 11#include <sys/shm.h>
d2ffa389 12#include <sys/types.h>
8dd4c05b
LP
13#include <sys/un.h>
14#include <unistd.h>
023a4f67 15#include <utmpx.h>
5cb5a6ff 16
349cc4a5 17#if HAVE_PAM
5b6319dc
LP
18#include <security/pam_appl.h>
19#endif
20
349cc4a5 21#if HAVE_SELINUX
7b52a628
MS
22#include <selinux/selinux.h>
23#endif
24
349cc4a5 25#if HAVE_SECCOMP
17df7223
LP
26#include <seccomp.h>
27#endif
28
349cc4a5 29#if HAVE_APPARMOR
eef65bf3
MS
30#include <sys/apparmor.h>
31#endif
32
24882e06 33#include "sd-messages.h"
8dd4c05b
LP
34
35#include "af-list.h"
b5efdb8a 36#include "alloc-util.h"
349cc4a5 37#if HAVE_APPARMOR
3ffd4af2
LP
38#include "apparmor-util.h"
39#endif
8dd4c05b
LP
40#include "async.h"
41#include "barrier.h"
8dd4c05b 42#include "cap-list.h"
430f0182 43#include "capability-util.h"
a1164ae3 44#include "chown-recursive.h"
fdb3deca 45#include "cgroup-setup.h"
da681e1b 46#include "cpu-set-util.h"
f6a6225e 47#include "def.h"
686d13b9 48#include "env-file.h"
4d1a6904 49#include "env-util.h"
17df7223 50#include "errno-list.h"
3ffd4af2 51#include "execute.h"
8dd4c05b 52#include "exit-status.h"
3ffd4af2 53#include "fd-util.h"
f97b34a6 54#include "format-util.h"
f4f15635 55#include "fs-util.h"
7d50b32a 56#include "glob-util.h"
c004493c 57#include "io-util.h"
8dd4c05b 58#include "ioprio.h"
a1164ae3 59#include "label.h"
8dd4c05b
LP
60#include "log.h"
61#include "macro.h"
e8a565cb 62#include "manager.h"
0a970718 63#include "memory-util.h"
f5947a5e 64#include "missing_fs.h"
8dd4c05b
LP
65#include "mkdir.h"
66#include "namespace.h"
6bedfcbb 67#include "parse-util.h"
8dd4c05b 68#include "path-util.h"
0b452006 69#include "process-util.h"
78f22b97 70#include "rlimit-util.h"
8dd4c05b 71#include "rm-rf.h"
349cc4a5 72#if HAVE_SECCOMP
3ffd4af2
LP
73#include "seccomp-util.h"
74#endif
07d46372 75#include "securebits-util.h"
8dd4c05b 76#include "selinux-util.h"
24882e06 77#include "signal-util.h"
8dd4c05b 78#include "smack-util.h"
57b7a260 79#include "socket-util.h"
fd63e712 80#include "special.h"
949befd3 81#include "stat-util.h"
8b43440b 82#include "string-table.h"
07630cea 83#include "string-util.h"
8dd4c05b 84#include "strv.h"
7ccbd1ae 85#include "syslog-util.h"
8dd4c05b 86#include "terminal-util.h"
566b7d23 87#include "umask-util.h"
8dd4c05b 88#include "unit.h"
b1d4f8e1 89#include "user-util.h"
8dd4c05b 90#include "utmp-wtmp.h"
5cb5a6ff 91
e056b01d 92#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 93#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 94
531dca78
LP
95#define SNDBUF_SIZE (8*1024*1024)
96
da6053d0 97static int shift_fds(int fds[], size_t n_fds) {
034c6ed7
LP
98 int start, restart_from;
99
100 if (n_fds <= 0)
101 return 0;
102
a0d40ac5
LP
103 /* Modifies the fds array! (sorts it) */
104
034c6ed7
LP
105 assert(fds);
106
107 start = 0;
108 for (;;) {
109 int i;
110
111 restart_from = -1;
112
113 for (i = start; i < (int) n_fds; i++) {
114 int nfd;
115
116 /* Already at right index? */
117 if (fds[i] == i+3)
118 continue;
119
3cc2aff1
LP
120 nfd = fcntl(fds[i], F_DUPFD, i + 3);
121 if (nfd < 0)
034c6ed7
LP
122 return -errno;
123
03e334a1 124 safe_close(fds[i]);
034c6ed7
LP
125 fds[i] = nfd;
126
127 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 128 * let's remember that and try again from here */
034c6ed7
LP
129 if (nfd != i+3 && restart_from < 0)
130 restart_from = i;
131 }
132
133 if (restart_from < 0)
134 break;
135
136 start = restart_from;
137 }
138
139 return 0;
140}
141
25b583d7 142static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
da6053d0 143 size_t i, n_fds;
e2c76839 144 int r;
47a71eed 145
25b583d7 146 n_fds = n_socket_fds + n_storage_fds;
47a71eed
LP
147 if (n_fds <= 0)
148 return 0;
149
150 assert(fds);
151
9b141911
FB
152 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
153 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
154
155 for (i = 0; i < n_fds; i++) {
47a71eed 156
9b141911
FB
157 if (i < n_socket_fds) {
158 r = fd_nonblock(fds[i], nonblock);
159 if (r < 0)
160 return r;
161 }
47a71eed 162
451a074f
LP
163 /* We unconditionally drop FD_CLOEXEC from the fds,
164 * since after all we want to pass these fds to our
165 * children */
47a71eed 166
3cc2aff1
LP
167 r = fd_cloexec(fds[i], false);
168 if (r < 0)
e2c76839 169 return r;
47a71eed
LP
170 }
171
172 return 0;
173}
174
1e22b5cd 175static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
176 assert(context);
177
1e22b5cd
LP
178 if (context->stdio_as_fds)
179 return NULL;
180
80876c20
LP
181 if (context->tty_path)
182 return context->tty_path;
183
184 return "/dev/console";
185}
186
1e22b5cd
LP
187static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
188 const char *path;
189
6ea832a2
LP
190 assert(context);
191
1e22b5cd 192 path = exec_context_tty_path(context);
6ea832a2 193
1e22b5cd
LP
194 if (context->tty_vhangup) {
195 if (p && p->stdin_fd >= 0)
196 (void) terminal_vhangup_fd(p->stdin_fd);
197 else if (path)
198 (void) terminal_vhangup(path);
199 }
6ea832a2 200
1e22b5cd
LP
201 if (context->tty_reset) {
202 if (p && p->stdin_fd >= 0)
203 (void) reset_terminal_fd(p->stdin_fd, true);
204 else if (path)
205 (void) reset_terminal(path);
206 }
207
208 if (context->tty_vt_disallocate && path)
209 (void) vt_disallocate(path);
6ea832a2
LP
210}
211
6af760f3
LP
212static bool is_terminal_input(ExecInput i) {
213 return IN_SET(i,
214 EXEC_INPUT_TTY,
215 EXEC_INPUT_TTY_FORCE,
216 EXEC_INPUT_TTY_FAIL);
217}
218
3a1286b6 219static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
220 return IN_SET(o,
221 EXEC_OUTPUT_TTY,
222 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
223 EXEC_OUTPUT_KMSG_AND_CONSOLE,
224 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
225}
226
aac8c0c3
LP
227static bool is_syslog_output(ExecOutput o) {
228 return IN_SET(o,
229 EXEC_OUTPUT_SYSLOG,
230 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
231}
232
233static bool is_kmsg_output(ExecOutput o) {
234 return IN_SET(o,
235 EXEC_OUTPUT_KMSG,
236 EXEC_OUTPUT_KMSG_AND_CONSOLE);
237}
238
6af760f3
LP
239static bool exec_context_needs_term(const ExecContext *c) {
240 assert(c);
241
242 /* Return true if the execution context suggests we should set $TERM to something useful. */
243
244 if (is_terminal_input(c->std_input))
245 return true;
246
247 if (is_terminal_output(c->std_output))
248 return true;
249
250 if (is_terminal_output(c->std_error))
251 return true;
252
253 return !!c->tty_path;
3a1286b6
MS
254}
255
80876c20 256static int open_null_as(int flags, int nfd) {
046a82c1 257 int fd;
071830ff 258
80876c20 259 assert(nfd >= 0);
071830ff 260
613b411c
LP
261 fd = open("/dev/null", flags|O_NOCTTY);
262 if (fd < 0)
071830ff
LP
263 return -errno;
264
046a82c1 265 return move_fd(fd, nfd, false);
071830ff
LP
266}
267
524daa8c 268static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 269 static const union sockaddr_union sa = {
b92bea5d
ZJS
270 .un.sun_family = AF_UNIX,
271 .un.sun_path = "/run/systemd/journal/stdout",
272 };
524daa8c
ZJS
273 uid_t olduid = UID_INVALID;
274 gid_t oldgid = GID_INVALID;
275 int r;
276
cad93f29 277 if (gid_is_valid(gid)) {
524daa8c
ZJS
278 oldgid = getgid();
279
92a17af9 280 if (setegid(gid) < 0)
524daa8c
ZJS
281 return -errno;
282 }
283
cad93f29 284 if (uid_is_valid(uid)) {
524daa8c
ZJS
285 olduid = getuid();
286
92a17af9 287 if (seteuid(uid) < 0) {
524daa8c
ZJS
288 r = -errno;
289 goto restore_gid;
290 }
291 }
292
92a17af9 293 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
294
295 /* If we fail to restore the uid or gid, things will likely
296 fail later on. This should only happen if an LSM interferes. */
297
cad93f29 298 if (uid_is_valid(uid))
524daa8c
ZJS
299 (void) seteuid(olduid);
300
301 restore_gid:
cad93f29 302 if (gid_is_valid(gid))
524daa8c
ZJS
303 (void) setegid(oldgid);
304
305 return r;
306}
307
fd1f9c89 308static int connect_logger_as(
34cf6c43 309 const Unit *unit,
fd1f9c89 310 const ExecContext *context,
af635cf3 311 const ExecParameters *params,
fd1f9c89
LP
312 ExecOutput output,
313 const char *ident,
fd1f9c89
LP
314 int nfd,
315 uid_t uid,
316 gid_t gid) {
317
2ac1ff68
EV
318 _cleanup_close_ int fd = -1;
319 int r;
071830ff
LP
320
321 assert(context);
af635cf3 322 assert(params);
80876c20
LP
323 assert(output < _EXEC_OUTPUT_MAX);
324 assert(ident);
325 assert(nfd >= 0);
071830ff 326
54fe0cdb
LP
327 fd = socket(AF_UNIX, SOCK_STREAM, 0);
328 if (fd < 0)
80876c20 329 return -errno;
071830ff 330
524daa8c
ZJS
331 r = connect_journal_socket(fd, uid, gid);
332 if (r < 0)
333 return r;
071830ff 334
2ac1ff68 335 if (shutdown(fd, SHUT_RD) < 0)
80876c20 336 return -errno;
071830ff 337
fd1f9c89 338 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 339
2ac1ff68 340 if (dprintf(fd,
62bca2c6 341 "%s\n"
80876c20
LP
342 "%s\n"
343 "%i\n"
54fe0cdb
LP
344 "%i\n"
345 "%i\n"
346 "%i\n"
4f4a1dbf 347 "%i\n",
c867611e 348 context->syslog_identifier ?: ident,
af635cf3 349 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
350 context->syslog_priority,
351 !!context->syslog_level_prefix,
aac8c0c3
LP
352 is_syslog_output(output),
353 is_kmsg_output(output),
2ac1ff68
EV
354 is_terminal_output(output)) < 0)
355 return -errno;
80876c20 356
2ac1ff68 357 return move_fd(TAKE_FD(fd), nfd, false);
80876c20 358}
2ac1ff68 359
3a274a21 360static int open_terminal_as(const char *path, int flags, int nfd) {
046a82c1 361 int fd;
071830ff 362
80876c20
LP
363 assert(path);
364 assert(nfd >= 0);
fd1f9c89 365
3a274a21 366 fd = open_terminal(path, flags | O_NOCTTY);
3cc2aff1 367 if (fd < 0)
80876c20 368 return fd;
071830ff 369
046a82c1 370 return move_fd(fd, nfd, false);
80876c20 371}
071830ff 372
2038c3f5 373static int acquire_path(const char *path, int flags, mode_t mode) {
15a3e96f
LP
374 union sockaddr_union sa = {};
375 _cleanup_close_ int fd = -1;
376 int r, salen;
071830ff 377
80876c20 378 assert(path);
071830ff 379
2038c3f5
LP
380 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
381 flags |= O_CREAT;
382
383 fd = open(path, flags|O_NOCTTY, mode);
384 if (fd >= 0)
15a3e96f 385 return TAKE_FD(fd);
071830ff 386
2038c3f5
LP
387 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
388 return -errno;
15a3e96f 389 if (strlen(path) >= sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
2038c3f5
LP
390 return -ENXIO;
391
392 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
393
394 fd = socket(AF_UNIX, SOCK_STREAM, 0);
395 if (fd < 0)
396 return -errno;
397
15a3e96f
LP
398 salen = sockaddr_un_set_path(&sa.un, path);
399 if (salen < 0)
400 return salen;
401
402 if (connect(fd, &sa.sa, salen) < 0)
2038c3f5
LP
403 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
404 * indication that his wasn't an AF_UNIX socket after all */
071830ff 405
2038c3f5
LP
406 if ((flags & O_ACCMODE) == O_RDONLY)
407 r = shutdown(fd, SHUT_WR);
408 else if ((flags & O_ACCMODE) == O_WRONLY)
409 r = shutdown(fd, SHUT_RD);
410 else
15a3e96f
LP
411 return TAKE_FD(fd);
412 if (r < 0)
2038c3f5 413 return -errno;
2038c3f5 414
15a3e96f 415 return TAKE_FD(fd);
80876c20 416}
071830ff 417
08f3be7a
LP
418static int fixup_input(
419 const ExecContext *context,
420 int socket_fd,
421 bool apply_tty_stdin) {
422
423 ExecInput std_input;
424
425 assert(context);
426
427 std_input = context->std_input;
1e3ad081
LP
428
429 if (is_terminal_input(std_input) && !apply_tty_stdin)
430 return EXEC_INPUT_NULL;
071830ff 431
03fd9c49 432 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
433 return EXEC_INPUT_NULL;
434
08f3be7a
LP
435 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
436 return EXEC_INPUT_NULL;
437
03fd9c49 438 return std_input;
4f2d528d
LP
439}
440
03fd9c49 441static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 442
03fd9c49 443 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
444 return EXEC_OUTPUT_INHERIT;
445
03fd9c49 446 return std_output;
4f2d528d
LP
447}
448
a34ceba6
LP
449static int setup_input(
450 const ExecContext *context,
451 const ExecParameters *params,
52c239d7 452 int socket_fd,
2caa38e9 453 const int named_iofds[static 3]) {
a34ceba6 454
4f2d528d
LP
455 ExecInput i;
456
457 assert(context);
a34ceba6 458 assert(params);
2caa38e9 459 assert(named_iofds);
a34ceba6
LP
460
461 if (params->stdin_fd >= 0) {
462 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
463 return -errno;
464
465 /* Try to make this the controlling tty, if it is a tty, and reset it */
1fb0682e
LP
466 if (isatty(STDIN_FILENO)) {
467 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
468 (void) reset_terminal_fd(STDIN_FILENO, true);
469 }
a34ceba6
LP
470
471 return STDIN_FILENO;
472 }
4f2d528d 473
08f3be7a 474 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
475
476 switch (i) {
071830ff 477
80876c20
LP
478 case EXEC_INPUT_NULL:
479 return open_null_as(O_RDONLY, STDIN_FILENO);
480
481 case EXEC_INPUT_TTY:
482 case EXEC_INPUT_TTY_FORCE:
483 case EXEC_INPUT_TTY_FAIL: {
046a82c1 484 int fd;
071830ff 485
1e22b5cd 486 fd = acquire_terminal(exec_context_tty_path(context),
8854d795
LP
487 i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
488 i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
489 ACQUIRE_TERMINAL_WAIT,
3a43da28 490 USEC_INFINITY);
970edce6 491 if (fd < 0)
80876c20
LP
492 return fd;
493
046a82c1 494 return move_fd(fd, STDIN_FILENO, false);
80876c20
LP
495 }
496
4f2d528d 497 case EXEC_INPUT_SOCKET:
e75a9ed1
LP
498 assert(socket_fd >= 0);
499
4f2d528d
LP
500 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
501
52c239d7 502 case EXEC_INPUT_NAMED_FD:
e75a9ed1
LP
503 assert(named_iofds[STDIN_FILENO] >= 0);
504
52c239d7
LB
505 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
506 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
507
08f3be7a
LP
508 case EXEC_INPUT_DATA: {
509 int fd;
510
511 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
512 if (fd < 0)
513 return fd;
514
515 return move_fd(fd, STDIN_FILENO, false);
516 }
517
2038c3f5
LP
518 case EXEC_INPUT_FILE: {
519 bool rw;
520 int fd;
521
522 assert(context->stdio_file[STDIN_FILENO]);
523
524 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
525 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
526
527 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
528 if (fd < 0)
529 return fd;
530
531 return move_fd(fd, STDIN_FILENO, false);
532 }
533
80876c20
LP
534 default:
535 assert_not_reached("Unknown input type");
536 }
537}
538
41fc585a
LP
539static bool can_inherit_stderr_from_stdout(
540 const ExecContext *context,
541 ExecOutput o,
542 ExecOutput e) {
543
544 assert(context);
545
546 /* Returns true, if given the specified STDERR and STDOUT output we can directly dup() the stdout fd to the
547 * stderr fd */
548
549 if (e == EXEC_OUTPUT_INHERIT)
550 return true;
551 if (e != o)
552 return false;
553
554 if (e == EXEC_OUTPUT_NAMED_FD)
555 return streq_ptr(context->stdio_fdname[STDOUT_FILENO], context->stdio_fdname[STDERR_FILENO]);
556
557 if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND))
558 return streq_ptr(context->stdio_file[STDOUT_FILENO], context->stdio_file[STDERR_FILENO]);
559
560 return true;
561}
562
a34ceba6 563static int setup_output(
34cf6c43 564 const Unit *unit,
a34ceba6
LP
565 const ExecContext *context,
566 const ExecParameters *params,
567 int fileno,
568 int socket_fd,
2caa38e9 569 const int named_iofds[static 3],
a34ceba6 570 const char *ident,
7bce046b
LP
571 uid_t uid,
572 gid_t gid,
573 dev_t *journal_stream_dev,
574 ino_t *journal_stream_ino) {
a34ceba6 575
4f2d528d
LP
576 ExecOutput o;
577 ExecInput i;
47c1d80d 578 int r;
4f2d528d 579
f2341e0a 580 assert(unit);
80876c20 581 assert(context);
a34ceba6 582 assert(params);
80876c20 583 assert(ident);
7bce046b
LP
584 assert(journal_stream_dev);
585 assert(journal_stream_ino);
80876c20 586
a34ceba6
LP
587 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
588
589 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
590 return -errno;
591
592 return STDOUT_FILENO;
593 }
594
595 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
596 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
597 return -errno;
598
599 return STDERR_FILENO;
600 }
601
08f3be7a 602 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 603 o = fixup_output(context->std_output, socket_fd);
4f2d528d 604
eb17e935
MS
605 if (fileno == STDERR_FILENO) {
606 ExecOutput e;
607 e = fixup_output(context->std_error, socket_fd);
80876c20 608
eb17e935
MS
609 /* This expects the input and output are already set up */
610
611 /* Don't change the stderr file descriptor if we inherit all
612 * the way and are not on a tty */
613 if (e == EXEC_OUTPUT_INHERIT &&
614 o == EXEC_OUTPUT_INHERIT &&
615 i == EXEC_INPUT_NULL &&
616 !is_terminal_input(context->std_input) &&
617 getppid () != 1)
618 return fileno;
619
620 /* Duplicate from stdout if possible */
41fc585a 621 if (can_inherit_stderr_from_stdout(context, o, e))
eb17e935 622 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 623
eb17e935 624 o = e;
80876c20 625
eb17e935 626 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
627 /* If input got downgraded, inherit the original value */
628 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 629 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 630
08f3be7a
LP
631 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
632 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
eb17e935 633 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 634
acb591e4
LP
635 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
636 if (getppid() != 1)
eb17e935 637 return fileno;
94f04347 638
eb17e935
MS
639 /* We need to open /dev/null here anew, to get the right access mode. */
640 return open_null_as(O_WRONLY, fileno);
071830ff 641 }
94f04347 642
eb17e935 643 switch (o) {
80876c20
LP
644
645 case EXEC_OUTPUT_NULL:
eb17e935 646 return open_null_as(O_WRONLY, fileno);
80876c20
LP
647
648 case EXEC_OUTPUT_TTY:
4f2d528d 649 if (is_terminal_input(i))
eb17e935 650 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
651
652 /* We don't reset the terminal if this is just about output */
1e22b5cd 653 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
654
655 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 656 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 657 case EXEC_OUTPUT_KMSG:
28dbc1e8 658 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
659 case EXEC_OUTPUT_JOURNAL:
660 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 661 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 662 if (r < 0) {
82677ae4 663 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 664 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
665 } else {
666 struct stat st;
667
668 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
669 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
670 * services to detect whether they are connected to the journal or not.
671 *
672 * If both stdout and stderr are connected to a stream then let's make sure to store the data
673 * about STDERR as that's usually the best way to do logging. */
7bce046b 674
ab2116b1
LP
675 if (fstat(fileno, &st) >= 0 &&
676 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
677 *journal_stream_dev = st.st_dev;
678 *journal_stream_ino = st.st_ino;
679 }
47c1d80d
MS
680 }
681 return r;
4f2d528d
LP
682
683 case EXEC_OUTPUT_SOCKET:
684 assert(socket_fd >= 0);
e75a9ed1 685
eb17e935 686 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 687
52c239d7 688 case EXEC_OUTPUT_NAMED_FD:
e75a9ed1
LP
689 assert(named_iofds[fileno] >= 0);
690
52c239d7
LB
691 (void) fd_nonblock(named_iofds[fileno], false);
692 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
693
566b7d23
ZD
694 case EXEC_OUTPUT_FILE:
695 case EXEC_OUTPUT_FILE_APPEND: {
2038c3f5 696 bool rw;
566b7d23 697 int fd, flags;
2038c3f5
LP
698
699 assert(context->stdio_file[fileno]);
700
701 rw = context->std_input == EXEC_INPUT_FILE &&
702 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
703
704 if (rw)
705 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
706
566b7d23
ZD
707 flags = O_WRONLY;
708 if (o == EXEC_OUTPUT_FILE_APPEND)
709 flags |= O_APPEND;
710
711 fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
2038c3f5
LP
712 if (fd < 0)
713 return fd;
714
566b7d23 715 return move_fd(fd, fileno, 0);
2038c3f5
LP
716 }
717
94f04347 718 default:
80876c20 719 assert_not_reached("Unknown error type");
94f04347 720 }
071830ff
LP
721}
722
02a51aba 723static int chown_terminal(int fd, uid_t uid) {
4b3b5bc7 724 int r;
02a51aba
LP
725
726 assert(fd >= 0);
02a51aba 727
1ff74fb6 728 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
4b3b5bc7
LP
729 if (isatty(fd) < 1) {
730 if (IN_SET(errno, EINVAL, ENOTTY))
731 return 0; /* not a tty */
1ff74fb6 732
02a51aba 733 return -errno;
4b3b5bc7 734 }
02a51aba 735
4b3b5bc7
LP
736 /* This might fail. What matters are the results. */
737 r = fchmod_and_chown(fd, TTY_MODE, uid, -1);
738 if (r < 0)
739 return r;
02a51aba 740
4b3b5bc7 741 return 1;
02a51aba
LP
742}
743
7d5ceb64 744static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
745 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
746 int r;
80876c20 747
80876c20
LP
748 assert(_saved_stdin);
749 assert(_saved_stdout);
750
af6da548
LP
751 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
752 if (saved_stdin < 0)
753 return -errno;
80876c20 754
af6da548 755 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
756 if (saved_stdout < 0)
757 return -errno;
80876c20 758
8854d795 759 fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
3d18b167
LP
760 if (fd < 0)
761 return fd;
80876c20 762
af6da548
LP
763 r = chown_terminal(fd, getuid());
764 if (r < 0)
3d18b167 765 return r;
02a51aba 766
3d18b167
LP
767 r = reset_terminal_fd(fd, true);
768 if (r < 0)
769 return r;
80876c20 770
2b33ab09 771 r = rearrange_stdio(fd, fd, STDERR_FILENO);
3d18b167 772 fd = -1;
2b33ab09
LP
773 if (r < 0)
774 return r;
80876c20
LP
775
776 *_saved_stdin = saved_stdin;
777 *_saved_stdout = saved_stdout;
778
3d18b167 779 saved_stdin = saved_stdout = -1;
80876c20 780
3d18b167 781 return 0;
80876c20
LP
782}
783
63d77c92 784static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
785 assert(err < 0);
786
787 if (err == -ETIMEDOUT)
63d77c92 788 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
789 else {
790 errno = -err;
63d77c92 791 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
792 }
793}
794
63d77c92 795static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 796 _cleanup_close_ int fd = -1;
80876c20 797
3b20f877 798 assert(vc);
80876c20 799
7d5ceb64 800 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 801 if (fd < 0)
3b20f877 802 return;
80876c20 803
63d77c92 804 write_confirm_error_fd(err, fd, u);
af6da548 805}
80876c20 806
3d18b167 807static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 808 int r = 0;
80876c20 809
af6da548
LP
810 assert(saved_stdin);
811 assert(saved_stdout);
812
813 release_terminal();
814
815 if (*saved_stdin >= 0)
80876c20 816 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 817 r = -errno;
80876c20 818
af6da548 819 if (*saved_stdout >= 0)
80876c20 820 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 821 r = -errno;
80876c20 822
3d18b167
LP
823 *saved_stdin = safe_close(*saved_stdin);
824 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
825
826 return r;
827}
828
3b20f877
FB
829enum {
830 CONFIRM_PRETEND_FAILURE = -1,
831 CONFIRM_PRETEND_SUCCESS = 0,
832 CONFIRM_EXECUTE = 1,
833};
834
eedf223a 835static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 836 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 837 _cleanup_free_ char *e = NULL;
3b20f877 838 char c;
af6da548 839
3b20f877 840 /* For any internal errors, assume a positive response. */
7d5ceb64 841 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 842 if (r < 0) {
63d77c92 843 write_confirm_error(r, vc, u);
3b20f877
FB
844 return CONFIRM_EXECUTE;
845 }
af6da548 846
b0eb2944
FB
847 /* confirm_spawn might have been disabled while we were sleeping. */
848 if (manager_is_confirm_spawn_disabled(u->manager)) {
849 r = 1;
850 goto restore_stdio;
851 }
af6da548 852
2bcd3c26
FB
853 e = ellipsize(cmdline, 60, 100);
854 if (!e) {
855 log_oom();
856 r = CONFIRM_EXECUTE;
857 goto restore_stdio;
858 }
af6da548 859
d172b175 860 for (;;) {
539622bd 861 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 862 if (r < 0) {
63d77c92 863 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
864 r = CONFIRM_EXECUTE;
865 goto restore_stdio;
866 }
af6da548 867
d172b175 868 switch (c) {
b0eb2944
FB
869 case 'c':
870 printf("Resuming normal execution.\n");
871 manager_disable_confirm_spawn();
872 r = 1;
873 break;
dd6f9ac0
FB
874 case 'D':
875 unit_dump(u, stdout, " ");
876 continue; /* ask again */
d172b175
FB
877 case 'f':
878 printf("Failing execution.\n");
879 r = CONFIRM_PRETEND_FAILURE;
880 break;
881 case 'h':
b0eb2944
FB
882 printf(" c - continue, proceed without asking anymore\n"
883 " D - dump, show the state of the unit\n"
dd6f9ac0 884 " f - fail, don't execute the command and pretend it failed\n"
d172b175 885 " h - help\n"
eedf223a 886 " i - info, show a short summary of the unit\n"
56fde33a 887 " j - jobs, show jobs that are in progress\n"
d172b175
FB
888 " s - skip, don't execute the command and pretend it succeeded\n"
889 " y - yes, execute the command\n");
dd6f9ac0 890 continue; /* ask again */
eedf223a
FB
891 case 'i':
892 printf(" Description: %s\n"
893 " Unit: %s\n"
894 " Command: %s\n",
895 u->id, u->description, cmdline);
896 continue; /* ask again */
56fde33a
FB
897 case 'j':
898 manager_dump_jobs(u->manager, stdout, " ");
899 continue; /* ask again */
539622bd
FB
900 case 'n':
901 /* 'n' was removed in favor of 'f'. */
902 printf("Didn't understand 'n', did you mean 'f'?\n");
903 continue; /* ask again */
d172b175
FB
904 case 's':
905 printf("Skipping execution.\n");
906 r = CONFIRM_PRETEND_SUCCESS;
907 break;
908 case 'y':
909 r = CONFIRM_EXECUTE;
910 break;
911 default:
912 assert_not_reached("Unhandled choice");
913 }
3b20f877 914 break;
3b20f877 915 }
af6da548 916
3b20f877 917restore_stdio:
af6da548 918 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 919 return r;
80876c20
LP
920}
921
4d885bd3
DH
922static int get_fixed_user(const ExecContext *c, const char **user,
923 uid_t *uid, gid_t *gid,
924 const char **home, const char **shell) {
81a2b7ce 925 int r;
4d885bd3 926 const char *name;
81a2b7ce 927
4d885bd3 928 assert(c);
81a2b7ce 929
23deef88
LP
930 if (!c->user)
931 return 0;
932
4d885bd3
DH
933 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
934 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 935
23deef88 936 name = c->user;
fafff8f1 937 r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN);
4d885bd3
DH
938 if (r < 0)
939 return r;
81a2b7ce 940
4d885bd3
DH
941 *user = name;
942 return 0;
943}
944
945static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
946 int r;
947 const char *name;
948
949 assert(c);
950
951 if (!c->group)
952 return 0;
953
954 name = c->group;
fafff8f1 955 r = get_group_creds(&name, gid, 0);
4d885bd3
DH
956 if (r < 0)
957 return r;
958
959 *group = name;
960 return 0;
961}
962
cdc5d5c5
DH
963static int get_supplementary_groups(const ExecContext *c, const char *user,
964 const char *group, gid_t gid,
965 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
966 char **i;
967 int r, k = 0;
968 int ngroups_max;
969 bool keep_groups = false;
970 gid_t *groups = NULL;
971 _cleanup_free_ gid_t *l_gids = NULL;
972
973 assert(c);
974
bbeea271
DH
975 /*
976 * If user is given, then lookup GID and supplementary groups list.
977 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
978 * here and as early as possible so we keep the list of supplementary
979 * groups of the caller.
bbeea271
DH
980 */
981 if (user && gid_is_valid(gid) && gid != 0) {
982 /* First step, initialize groups from /etc/groups */
983 if (initgroups(user, gid) < 0)
984 return -errno;
985
986 keep_groups = true;
987 }
988
ac6e8be6 989 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
990 return 0;
991
366ddd25
DH
992 /*
993 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
994 * be positive, otherwise fail.
995 */
996 errno = 0;
997 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
66855de7
LP
998 if (ngroups_max <= 0)
999 return errno_or_else(EOPNOTSUPP);
366ddd25 1000
4d885bd3
DH
1001 l_gids = new(gid_t, ngroups_max);
1002 if (!l_gids)
1003 return -ENOMEM;
81a2b7ce 1004
4d885bd3
DH
1005 if (keep_groups) {
1006 /*
1007 * Lookup the list of groups that the user belongs to, we
1008 * avoid NSS lookups here too for gid=0.
1009 */
1010 k = ngroups_max;
1011 if (getgrouplist(user, gid, l_gids, &k) < 0)
1012 return -EINVAL;
1013 } else
1014 k = 0;
81a2b7ce 1015
4d885bd3
DH
1016 STRV_FOREACH(i, c->supplementary_groups) {
1017 const char *g;
81a2b7ce 1018
4d885bd3
DH
1019 if (k >= ngroups_max)
1020 return -E2BIG;
81a2b7ce 1021
4d885bd3 1022 g = *i;
fafff8f1 1023 r = get_group_creds(&g, l_gids+k, 0);
4d885bd3
DH
1024 if (r < 0)
1025 return r;
81a2b7ce 1026
4d885bd3
DH
1027 k++;
1028 }
81a2b7ce 1029
4d885bd3
DH
1030 /*
1031 * Sets ngids to zero to drop all supplementary groups, happens
1032 * when we are under root and SupplementaryGroups= is empty.
1033 */
1034 if (k == 0) {
1035 *ngids = 0;
1036 return 0;
1037 }
81a2b7ce 1038
4d885bd3
DH
1039 /* Otherwise get the final list of supplementary groups */
1040 groups = memdup(l_gids, sizeof(gid_t) * k);
1041 if (!groups)
1042 return -ENOMEM;
1043
1044 *supplementary_gids = groups;
1045 *ngids = k;
1046
1047 groups = NULL;
1048
1049 return 0;
1050}
1051
34cf6c43 1052static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
4d885bd3
DH
1053 int r;
1054
709dbeac
YW
1055 /* Handle SupplementaryGroups= if it is not empty */
1056 if (ngids > 0) {
4d885bd3
DH
1057 r = maybe_setgroups(ngids, supplementary_gids);
1058 if (r < 0)
97f0e76f 1059 return r;
4d885bd3 1060 }
81a2b7ce 1061
4d885bd3
DH
1062 if (gid_is_valid(gid)) {
1063 /* Then set our gids */
1064 if (setresgid(gid, gid, gid) < 0)
1065 return -errno;
81a2b7ce
LP
1066 }
1067
1068 return 0;
1069}
1070
1071static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
1072 assert(context);
1073
4d885bd3
DH
1074 if (!uid_is_valid(uid))
1075 return 0;
1076
479050b3 1077 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
1078 * capabilities while doing so. */
1079
479050b3 1080 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
1081
1082 /* First step: If we need to keep capabilities but
1083 * drop privileges we need to make sure we keep our
cbb21cca 1084 * caps, while we drop privileges. */
693ced48 1085 if (uid != 0) {
cbb21cca 1086 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1087
1088 if (prctl(PR_GET_SECUREBITS) != sb)
1089 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1090 return -errno;
1091 }
81a2b7ce
LP
1092 }
1093
479050b3 1094 /* Second step: actually set the uids */
81a2b7ce
LP
1095 if (setresuid(uid, uid, uid) < 0)
1096 return -errno;
1097
1098 /* At this point we should have all necessary capabilities but
1099 are otherwise a normal user. However, the caps might got
1100 corrupted due to the setresuid() so we need clean them up
1101 later. This is done outside of this call. */
1102
1103 return 0;
1104}
1105
349cc4a5 1106#if HAVE_PAM
5b6319dc
LP
1107
1108static int null_conv(
1109 int num_msg,
1110 const struct pam_message **msg,
1111 struct pam_response **resp,
1112 void *appdata_ptr) {
1113
1114 /* We don't support conversations */
1115
1116 return PAM_CONV_ERR;
1117}
1118
cefc33ae
LP
1119#endif
1120
5b6319dc
LP
1121static int setup_pam(
1122 const char *name,
1123 const char *user,
940c5210 1124 uid_t uid,
2d6fce8d 1125 gid_t gid,
5b6319dc 1126 const char *tty,
2065ca69 1127 char ***env,
da6053d0 1128 int fds[], size_t n_fds) {
5b6319dc 1129
349cc4a5 1130#if HAVE_PAM
cefc33ae 1131
5b6319dc
LP
1132 static const struct pam_conv conv = {
1133 .conv = null_conv,
1134 .appdata_ptr = NULL
1135 };
1136
2d7c6aa2 1137 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1138 pam_handle_t *handle = NULL;
d6e5f3ad 1139 sigset_t old_ss;
7bb70b6e 1140 int pam_code = PAM_SUCCESS, r;
84eada2f 1141 char **nv, **e = NULL;
5b6319dc
LP
1142 bool close_session = false;
1143 pid_t pam_pid = 0, parent_pid;
970edce6 1144 int flags = 0;
5b6319dc
LP
1145
1146 assert(name);
1147 assert(user);
2065ca69 1148 assert(env);
5b6319dc
LP
1149
1150 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1151 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1152 * systemd via the cgroup logic. It will then remove the PAM
1153 * session again. The parent process will exec() the actual
1154 * daemon. We do things this way to ensure that the main PID
1155 * of the daemon is the one we initially fork()ed. */
1156
7bb70b6e
LP
1157 r = barrier_create(&barrier);
1158 if (r < 0)
2d7c6aa2
DH
1159 goto fail;
1160
553d2243 1161 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1162 flags |= PAM_SILENT;
1163
f546241b
ZJS
1164 pam_code = pam_start(name, user, &conv, &handle);
1165 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1166 handle = NULL;
1167 goto fail;
1168 }
1169
3cd24c1a
LP
1170 if (!tty) {
1171 _cleanup_free_ char *q = NULL;
1172
1173 /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
1174 * out if that's the case, and read the TTY off it. */
1175
1176 if (getttyname_malloc(STDIN_FILENO, &q) >= 0)
1177 tty = strjoina("/dev/", q);
1178 }
1179
f546241b
ZJS
1180 if (tty) {
1181 pam_code = pam_set_item(handle, PAM_TTY, tty);
1182 if (pam_code != PAM_SUCCESS)
5b6319dc 1183 goto fail;
f546241b 1184 }
5b6319dc 1185
84eada2f
JW
1186 STRV_FOREACH(nv, *env) {
1187 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1188 if (pam_code != PAM_SUCCESS)
1189 goto fail;
1190 }
1191
970edce6 1192 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1193 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1194 goto fail;
1195
970edce6 1196 pam_code = pam_open_session(handle, flags);
f546241b 1197 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1198 goto fail;
1199
1200 close_session = true;
1201
f546241b
ZJS
1202 e = pam_getenvlist(handle);
1203 if (!e) {
5b6319dc
LP
1204 pam_code = PAM_BUF_ERR;
1205 goto fail;
1206 }
1207
1208 /* Block SIGTERM, so that we know that it won't get lost in
1209 * the child */
ce30c8dc 1210
72c0a2c2 1211 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1212
df0ff127 1213 parent_pid = getpid_cached();
5b6319dc 1214
4c253ed1
LP
1215 r = safe_fork("(sd-pam)", 0, &pam_pid);
1216 if (r < 0)
5b6319dc 1217 goto fail;
4c253ed1 1218 if (r == 0) {
7bb70b6e 1219 int sig, ret = EXIT_PAM;
5b6319dc
LP
1220
1221 /* The child's job is to reset the PAM session on
1222 * termination */
2d7c6aa2 1223 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc 1224
4c253ed1
LP
1225 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1226 * are open here that have been opened by PAM. */
1227 (void) close_many(fds, n_fds);
5b6319dc 1228
940c5210
AK
1229 /* Drop privileges - we don't need any to pam_close_session
1230 * and this will make PR_SET_PDEATHSIG work in most cases.
1231 * If this fails, ignore the error - but expect sd-pam threads
1232 * to fail to exit normally */
2d6fce8d 1233
97f0e76f
LP
1234 r = maybe_setgroups(0, NULL);
1235 if (r < 0)
1236 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1237 if (setresgid(gid, gid, gid) < 0)
1238 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1239 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1240 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1241
ce30c8dc
LP
1242 (void) ignore_signals(SIGPIPE, -1);
1243
940c5210
AK
1244 /* Wait until our parent died. This will only work if
1245 * the above setresuid() succeeds, otherwise the kernel
1246 * will not allow unprivileged parents kill their privileged
1247 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1248 * to do the rest for us. */
1249 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1250 goto child_finish;
1251
2d7c6aa2
DH
1252 /* Tell the parent that our setup is done. This is especially
1253 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1254 * setup might race against our setresuid(2) call.
1255 *
1256 * If the parent aborted, we'll detect this below, hence ignore
1257 * return failure here. */
1258 (void) barrier_place(&barrier);
2d7c6aa2 1259
643f4706 1260 /* Check if our parent process might already have died? */
5b6319dc 1261 if (getppid() == parent_pid) {
d6e5f3ad
DM
1262 sigset_t ss;
1263
1264 assert_se(sigemptyset(&ss) >= 0);
1265 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1266
3dead8d9
LP
1267 for (;;) {
1268 if (sigwait(&ss, &sig) < 0) {
1269 if (errno == EINTR)
1270 continue;
1271
1272 goto child_finish;
1273 }
5b6319dc 1274
3dead8d9
LP
1275 assert(sig == SIGTERM);
1276 break;
1277 }
5b6319dc
LP
1278 }
1279
3dead8d9 1280 /* If our parent died we'll end the session */
f546241b 1281 if (getppid() != parent_pid) {
970edce6 1282 pam_code = pam_close_session(handle, flags);
f546241b 1283 if (pam_code != PAM_SUCCESS)
5b6319dc 1284 goto child_finish;
f546241b 1285 }
5b6319dc 1286
7bb70b6e 1287 ret = 0;
5b6319dc
LP
1288
1289 child_finish:
970edce6 1290 pam_end(handle, pam_code | flags);
7bb70b6e 1291 _exit(ret);
5b6319dc
LP
1292 }
1293
2d7c6aa2
DH
1294 barrier_set_role(&barrier, BARRIER_PARENT);
1295
5b6319dc
LP
1296 /* If the child was forked off successfully it will do all the
1297 * cleanups, so forget about the handle here. */
1298 handle = NULL;
1299
3b8bddde 1300 /* Unblock SIGTERM again in the parent */
72c0a2c2 1301 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1302
1303 /* We close the log explicitly here, since the PAM modules
1304 * might have opened it, but we don't want this fd around. */
1305 closelog();
1306
2d7c6aa2
DH
1307 /* Synchronously wait for the child to initialize. We don't care for
1308 * errors as we cannot recover. However, warn loudly if it happens. */
1309 if (!barrier_place_and_sync(&barrier))
1310 log_error("PAM initialization failed");
1311
130d3d22 1312 return strv_free_and_replace(*env, e);
5b6319dc
LP
1313
1314fail:
970edce6
ZJS
1315 if (pam_code != PAM_SUCCESS) {
1316 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1317 r = -EPERM; /* PAM errors do not map to errno */
1318 } else
1319 log_error_errno(r, "PAM failed: %m");
9ba35398 1320
5b6319dc
LP
1321 if (handle) {
1322 if (close_session)
970edce6 1323 pam_code = pam_close_session(handle, flags);
5b6319dc 1324
970edce6 1325 pam_end(handle, pam_code | flags);
5b6319dc
LP
1326 }
1327
1328 strv_free(e);
5b6319dc
LP
1329 closelog();
1330
7bb70b6e 1331 return r;
cefc33ae
LP
1332#else
1333 return 0;
5b6319dc 1334#endif
cefc33ae 1335}
5b6319dc 1336
5d6b1584
LP
1337static void rename_process_from_path(const char *path) {
1338 char process_name[11];
1339 const char *p;
1340 size_t l;
1341
1342 /* This resulting string must fit in 10 chars (i.e. the length
1343 * of "/sbin/init") to look pretty in /bin/ps */
1344
2b6bf07d 1345 p = basename(path);
5d6b1584
LP
1346 if (isempty(p)) {
1347 rename_process("(...)");
1348 return;
1349 }
1350
1351 l = strlen(p);
1352 if (l > 8) {
1353 /* The end of the process name is usually more
1354 * interesting, since the first bit might just be
1355 * "systemd-" */
1356 p = p + l - 8;
1357 l = 8;
1358 }
1359
1360 process_name[0] = '(';
1361 memcpy(process_name+1, p, l);
1362 process_name[1+l] = ')';
1363 process_name[1+l+1] = 0;
1364
1365 rename_process(process_name);
1366}
1367
469830d1
LP
1368static bool context_has_address_families(const ExecContext *c) {
1369 assert(c);
1370
1371 return c->address_families_whitelist ||
1372 !set_isempty(c->address_families);
1373}
1374
1375static bool context_has_syscall_filters(const ExecContext *c) {
1376 assert(c);
1377
1378 return c->syscall_whitelist ||
8cfa775f 1379 !hashmap_isempty(c->syscall_filter);
469830d1
LP
1380}
1381
1382static bool context_has_no_new_privileges(const ExecContext *c) {
1383 assert(c);
1384
1385 if (c->no_new_privileges)
1386 return true;
1387
1388 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1389 return false;
1390
1391 /* We need NNP if we have any form of seccomp and are unprivileged */
1392 return context_has_address_families(c) ||
1393 c->memory_deny_write_execute ||
1394 c->restrict_realtime ||
f69567cb 1395 c->restrict_suid_sgid ||
469830d1
LP
1396 exec_context_restrict_namespaces_set(c) ||
1397 c->protect_kernel_tunables ||
1398 c->protect_kernel_modules ||
84703040 1399 c->protect_kernel_logs ||
469830d1
LP
1400 c->private_devices ||
1401 context_has_syscall_filters(c) ||
78e864e5 1402 !set_isempty(c->syscall_archs) ||
aecd5ac6
TM
1403 c->lock_personality ||
1404 c->protect_hostname;
469830d1
LP
1405}
1406
349cc4a5 1407#if HAVE_SECCOMP
17df7223 1408
83f12b27 1409static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1410
1411 if (is_seccomp_available())
1412 return false;
1413
f673b62d 1414 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1415 return true;
83f12b27
FS
1416}
1417
165a31c0 1418static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1419 uint32_t negative_action, default_action, action;
165a31c0 1420 int r;
8351ceae 1421
469830d1 1422 assert(u);
c0467cf3 1423 assert(c);
8351ceae 1424
469830d1 1425 if (!context_has_syscall_filters(c))
83f12b27
FS
1426 return 0;
1427
469830d1
LP
1428 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1429 return 0;
e9642be2 1430
ccc16c78 1431 negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1432
469830d1
LP
1433 if (c->syscall_whitelist) {
1434 default_action = negative_action;
1435 action = SCMP_ACT_ALLOW;
7c66bae2 1436 } else {
469830d1
LP
1437 default_action = SCMP_ACT_ALLOW;
1438 action = negative_action;
57183d11 1439 }
8351ceae 1440
165a31c0
LP
1441 if (needs_ambient_hack) {
1442 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1443 if (r < 0)
1444 return r;
1445 }
1446
b54f36c6 1447 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
4298d0b5
LP
1448}
1449
469830d1
LP
1450static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1451 assert(u);
4298d0b5
LP
1452 assert(c);
1453
469830d1 1454 if (set_isempty(c->syscall_archs))
83f12b27
FS
1455 return 0;
1456
469830d1
LP
1457 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1458 return 0;
4298d0b5 1459
469830d1
LP
1460 return seccomp_restrict_archs(c->syscall_archs);
1461}
4298d0b5 1462
469830d1
LP
1463static int apply_address_families(const Unit* u, const ExecContext *c) {
1464 assert(u);
1465 assert(c);
4298d0b5 1466
469830d1
LP
1467 if (!context_has_address_families(c))
1468 return 0;
4298d0b5 1469
469830d1
LP
1470 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1471 return 0;
4298d0b5 1472
469830d1 1473 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1474}
4298d0b5 1475
83f12b27 1476static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1477 assert(u);
f3e43635
TM
1478 assert(c);
1479
469830d1 1480 if (!c->memory_deny_write_execute)
83f12b27
FS
1481 return 0;
1482
469830d1
LP
1483 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1484 return 0;
f3e43635 1485
469830d1 1486 return seccomp_memory_deny_write_execute();
f3e43635
TM
1487}
1488
83f12b27 1489static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1490 assert(u);
f4170c67
LP
1491 assert(c);
1492
469830d1 1493 if (!c->restrict_realtime)
83f12b27
FS
1494 return 0;
1495
469830d1
LP
1496 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1497 return 0;
f4170c67 1498
469830d1 1499 return seccomp_restrict_realtime();
f4170c67
LP
1500}
1501
f69567cb
LP
1502static int apply_restrict_suid_sgid(const Unit* u, const ExecContext *c) {
1503 assert(u);
1504 assert(c);
1505
1506 if (!c->restrict_suid_sgid)
1507 return 0;
1508
1509 if (skip_seccomp_unavailable(u, "RestrictSUIDSGID="))
1510 return 0;
1511
1512 return seccomp_restrict_suid_sgid();
1513}
1514
59e856c7 1515static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1516 assert(u);
59eeb84b
LP
1517 assert(c);
1518
1519 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1520 * let's protect even those systems where this is left on in the kernel. */
1521
469830d1 1522 if (!c->protect_kernel_tunables)
59eeb84b
LP
1523 return 0;
1524
469830d1
LP
1525 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1526 return 0;
59eeb84b 1527
469830d1 1528 return seccomp_protect_sysctl();
59eeb84b
LP
1529}
1530
59e856c7 1531static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1532 assert(u);
502d704e
DH
1533 assert(c);
1534
25a8d8a0 1535 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1536
469830d1
LP
1537 if (!c->protect_kernel_modules)
1538 return 0;
1539
502d704e
DH
1540 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1541 return 0;
1542
b54f36c6 1543 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false);
502d704e
DH
1544}
1545
84703040
KK
1546static int apply_protect_kernel_logs(const Unit *u, const ExecContext *c) {
1547 assert(u);
1548 assert(c);
1549
1550 if (!c->protect_kernel_logs)
1551 return 0;
1552
1553 if (skip_seccomp_unavailable(u, "ProtectKernelLogs="))
1554 return 0;
1555
1556 return seccomp_protect_syslog();
1557}
1558
59e856c7 1559static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1560 assert(u);
ba128bb8
LP
1561 assert(c);
1562
8f81a5f6 1563 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1564
469830d1
LP
1565 if (!c->private_devices)
1566 return 0;
1567
ba128bb8
LP
1568 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1569 return 0;
1570
b54f36c6 1571 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false);
ba128bb8
LP
1572}
1573
34cf6c43 1574static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
469830d1 1575 assert(u);
add00535
LP
1576 assert(c);
1577
1578 if (!exec_context_restrict_namespaces_set(c))
1579 return 0;
1580
1581 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1582 return 0;
1583
1584 return seccomp_restrict_namespaces(c->restrict_namespaces);
1585}
1586
78e864e5 1587static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1588 unsigned long personality;
1589 int r;
78e864e5
TM
1590
1591 assert(u);
1592 assert(c);
1593
1594 if (!c->lock_personality)
1595 return 0;
1596
1597 if (skip_seccomp_unavailable(u, "LockPersonality="))
1598 return 0;
1599
e8132d63
LP
1600 personality = c->personality;
1601
1602 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1603 if (personality == PERSONALITY_INVALID) {
1604
1605 r = opinionated_personality(&personality);
1606 if (r < 0)
1607 return r;
1608 }
78e864e5
TM
1609
1610 return seccomp_lock_personality(personality);
1611}
1612
c0467cf3 1613#endif
8351ceae 1614
3042bbeb 1615static void do_idle_pipe_dance(int idle_pipe[static 4]) {
31a7eb86
ZJS
1616 assert(idle_pipe);
1617
54eb2300
LP
1618 idle_pipe[1] = safe_close(idle_pipe[1]);
1619 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1620
1621 if (idle_pipe[0] >= 0) {
1622 int r;
1623
1624 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1625
1626 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1627 ssize_t n;
1628
31a7eb86 1629 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1630 n = write(idle_pipe[3], "x", 1);
1631 if (n > 0)
cd972d69 1632 /* Wait for systemd to react to the signal above. */
54756dce 1633 (void) fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1634 }
1635
54eb2300 1636 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1637
1638 }
1639
54eb2300 1640 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1641}
1642
fb2042dd
YW
1643static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
1644
7cae38c4 1645static int build_environment(
34cf6c43 1646 const Unit *u,
9fa95f85 1647 const ExecContext *c,
1e22b5cd 1648 const ExecParameters *p,
da6053d0 1649 size_t n_fds,
7cae38c4
LP
1650 const char *home,
1651 const char *username,
1652 const char *shell,
7bce046b
LP
1653 dev_t journal_stream_dev,
1654 ino_t journal_stream_ino,
7cae38c4
LP
1655 char ***ret) {
1656
1657 _cleanup_strv_free_ char **our_env = NULL;
fb2042dd 1658 ExecDirectoryType t;
da6053d0 1659 size_t n_env = 0;
7cae38c4
LP
1660 char *x;
1661
4b58153d 1662 assert(u);
7cae38c4 1663 assert(c);
7c1cb6f1 1664 assert(p);
7cae38c4
LP
1665 assert(ret);
1666
fb2042dd 1667 our_env = new0(char*, 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4
LP
1668 if (!our_env)
1669 return -ENOMEM;
1670
1671 if (n_fds > 0) {
8dd4c05b
LP
1672 _cleanup_free_ char *joined = NULL;
1673
df0ff127 1674 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1675 return -ENOMEM;
1676 our_env[n_env++] = x;
1677
da6053d0 1678 if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
7cae38c4
LP
1679 return -ENOMEM;
1680 our_env[n_env++] = x;
8dd4c05b 1681
1e22b5cd 1682 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1683 if (!joined)
1684 return -ENOMEM;
1685
605405c6 1686 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1687 if (!x)
1688 return -ENOMEM;
1689 our_env[n_env++] = x;
7cae38c4
LP
1690 }
1691
b08af3b1 1692 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1693 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1694 return -ENOMEM;
1695 our_env[n_env++] = x;
1696
1e22b5cd 1697 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1698 return -ENOMEM;
1699 our_env[n_env++] = x;
1700 }
1701
fd63e712
LP
1702 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1703 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1704 * check the database directly. */
ac647978 1705 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1706 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1707 if (!x)
1708 return -ENOMEM;
1709 our_env[n_env++] = x;
1710 }
1711
7cae38c4 1712 if (home) {
b910cc72 1713 x = strjoin("HOME=", home);
7cae38c4
LP
1714 if (!x)
1715 return -ENOMEM;
7bbead1d
LP
1716
1717 path_simplify(x + 5, true);
7cae38c4
LP
1718 our_env[n_env++] = x;
1719 }
1720
1721 if (username) {
b910cc72 1722 x = strjoin("LOGNAME=", username);
7cae38c4
LP
1723 if (!x)
1724 return -ENOMEM;
1725 our_env[n_env++] = x;
1726
b910cc72 1727 x = strjoin("USER=", username);
7cae38c4
LP
1728 if (!x)
1729 return -ENOMEM;
1730 our_env[n_env++] = x;
1731 }
1732
1733 if (shell) {
b910cc72 1734 x = strjoin("SHELL=", shell);
7cae38c4
LP
1735 if (!x)
1736 return -ENOMEM;
7bbead1d
LP
1737
1738 path_simplify(x + 6, true);
7cae38c4
LP
1739 our_env[n_env++] = x;
1740 }
1741
4b58153d
LP
1742 if (!sd_id128_is_null(u->invocation_id)) {
1743 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1744 return -ENOMEM;
1745
1746 our_env[n_env++] = x;
1747 }
1748
6af760f3
LP
1749 if (exec_context_needs_term(c)) {
1750 const char *tty_path, *term = NULL;
1751
1752 tty_path = exec_context_tty_path(c);
1753
1754 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1755 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1756 * passes to PID 1 ends up all the way in the console login shown. */
1757
1758 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1759 term = getenv("TERM");
1760 if (!term)
1761 term = default_term_for_tty(tty_path);
7cae38c4 1762
b910cc72 1763 x = strjoin("TERM=", term);
7cae38c4
LP
1764 if (!x)
1765 return -ENOMEM;
1766 our_env[n_env++] = x;
1767 }
1768
7bce046b
LP
1769 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1770 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1771 return -ENOMEM;
1772
1773 our_env[n_env++] = x;
1774 }
1775
fb2042dd
YW
1776 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1777 _cleanup_free_ char *pre = NULL, *joined = NULL;
1778 const char *n;
1779
1780 if (!p->prefix[t])
1781 continue;
1782
1783 if (strv_isempty(c->directories[t].paths))
1784 continue;
1785
1786 n = exec_directory_env_name_to_string(t);
1787 if (!n)
1788 continue;
1789
1790 pre = strjoin(p->prefix[t], "/");
1791 if (!pre)
1792 return -ENOMEM;
1793
1794 joined = strv_join_prefix(c->directories[t].paths, ":", pre);
1795 if (!joined)
1796 return -ENOMEM;
1797
1798 x = strjoin(n, "=", joined);
1799 if (!x)
1800 return -ENOMEM;
1801
1802 our_env[n_env++] = x;
1803 }
1804
7cae38c4 1805 our_env[n_env++] = NULL;
fb2042dd 1806 assert(n_env <= 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4 1807
ae2a15bc 1808 *ret = TAKE_PTR(our_env);
7cae38c4
LP
1809
1810 return 0;
1811}
1812
b4c14404
FB
1813static int build_pass_environment(const ExecContext *c, char ***ret) {
1814 _cleanup_strv_free_ char **pass_env = NULL;
1815 size_t n_env = 0, n_bufsize = 0;
1816 char **i;
1817
1818 STRV_FOREACH(i, c->pass_environment) {
1819 _cleanup_free_ char *x = NULL;
1820 char *v;
1821
1822 v = getenv(*i);
1823 if (!v)
1824 continue;
605405c6 1825 x = strjoin(*i, "=", v);
b4c14404
FB
1826 if (!x)
1827 return -ENOMEM;
00819cc1 1828
b4c14404
FB
1829 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1830 return -ENOMEM;
00819cc1 1831
1cc6c93a 1832 pass_env[n_env++] = TAKE_PTR(x);
b4c14404 1833 pass_env[n_env] = NULL;
b4c14404
FB
1834 }
1835
ae2a15bc 1836 *ret = TAKE_PTR(pass_env);
b4c14404
FB
1837
1838 return 0;
1839}
1840
8b44a3d2
LP
1841static bool exec_needs_mount_namespace(
1842 const ExecContext *context,
1843 const ExecParameters *params,
4657abb5 1844 const ExecRuntime *runtime) {
8b44a3d2
LP
1845
1846 assert(context);
1847 assert(params);
1848
915e6d16
LP
1849 if (context->root_image)
1850 return true;
1851
2a624c36
AP
1852 if (!strv_isempty(context->read_write_paths) ||
1853 !strv_isempty(context->read_only_paths) ||
1854 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1855 return true;
1856
42b1d8e0 1857 if (context->n_bind_mounts > 0)
d2d6c096
LP
1858 return true;
1859
2abd4e38
YW
1860 if (context->n_temporary_filesystems > 0)
1861 return true;
1862
37ed15d7 1863 if (!IN_SET(context->mount_flags, 0, MS_SHARED))
8b44a3d2
LP
1864 return true;
1865
1866 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1867 return true;
1868
8b44a3d2 1869 if (context->private_devices ||
228af36f 1870 context->private_mounts ||
8b44a3d2 1871 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1872 context->protect_home != PROTECT_HOME_NO ||
1873 context->protect_kernel_tunables ||
c575770b 1874 context->protect_kernel_modules ||
94a7b275 1875 context->protect_kernel_logs ||
59eeb84b 1876 context->protect_control_groups)
8b44a3d2
LP
1877 return true;
1878
37c56f89
YW
1879 if (context->root_directory) {
1880 ExecDirectoryType t;
1881
1882 if (context->mount_apivfs)
1883 return true;
1884
1885 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1886 if (!params->prefix[t])
1887 continue;
1888
1889 if (!strv_isempty(context->directories[t].paths))
1890 return true;
1891 }
1892 }
5d997827 1893
42b1d8e0 1894 if (context->dynamic_user &&
b43ee82f 1895 (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
42b1d8e0
YW
1896 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1897 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1898 return true;
1899
8b44a3d2
LP
1900 return false;
1901}
1902
5749f855 1903static int setup_private_users(uid_t ouid, gid_t ogid, uid_t uid, gid_t gid) {
d251207d
LP
1904 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1905 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1906 _cleanup_close_ int unshare_ready_fd = -1;
1907 _cleanup_(sigkill_waitp) pid_t pid = 0;
1908 uint64_t c = 1;
d251207d
LP
1909 ssize_t n;
1910 int r;
1911
5749f855
AZ
1912 /* Set up a user namespace and map the original UID/GID (IDs from before any user or group changes, i.e.
1913 * the IDs from the user or system manager(s)) to itself, the selected UID/GID to itself, and everything else to
d251207d
LP
1914 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1915 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1916 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1917 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
5749f855
AZ
1918 * continues execution normally.
1919 * For unprivileged users (i.e. without capabilities), the root to root mapping is excluded. As such, it
1920 * does not need CAP_SETUID to write the single line mapping to itself. */
d251207d 1921
5749f855
AZ
1922 /* Can only set up multiple mappings with CAP_SETUID. */
1923 if (have_effective_cap(CAP_SETUID) && uid != ouid && uid_is_valid(uid))
587ab01b 1924 r = asprintf(&uid_map,
5749f855 1925 UID_FMT " " UID_FMT " 1\n" /* Map $OUID → $OUID */
587ab01b 1926 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
5749f855
AZ
1927 ouid, ouid, uid, uid);
1928 else
1929 r = asprintf(&uid_map,
1930 UID_FMT " " UID_FMT " 1\n", /* Map $OUID → $OUID */
1931 ouid, ouid);
d251207d 1932
5749f855
AZ
1933 if (r < 0)
1934 return -ENOMEM;
1935
1936 /* Can only set up multiple mappings with CAP_SETGID. */
1937 if (have_effective_cap(CAP_SETGID) && gid != ogid && gid_is_valid(gid))
587ab01b 1938 r = asprintf(&gid_map,
5749f855 1939 GID_FMT " " GID_FMT " 1\n" /* Map $OGID → $OGID */
587ab01b 1940 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
5749f855
AZ
1941 ogid, ogid, gid, gid);
1942 else
1943 r = asprintf(&gid_map,
1944 GID_FMT " " GID_FMT " 1\n", /* Map $OGID -> $OGID */
1945 ogid, ogid);
1946
1947 if (r < 0)
1948 return -ENOMEM;
d251207d
LP
1949
1950 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1951 * namespace. */
1952 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1953 if (unshare_ready_fd < 0)
1954 return -errno;
1955
1956 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1957 * failed. */
1958 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1959 return -errno;
1960
4c253ed1
LP
1961 r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
1962 if (r < 0)
1963 return r;
1964 if (r == 0) {
d251207d
LP
1965 _cleanup_close_ int fd = -1;
1966 const char *a;
1967 pid_t ppid;
1968
1969 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1970 * here, after the parent opened its own user namespace. */
1971
1972 ppid = getppid();
1973 errno_pipe[0] = safe_close(errno_pipe[0]);
1974
1975 /* Wait until the parent unshared the user namespace */
1976 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1977 r = -errno;
1978 goto child_fail;
1979 }
1980
1981 /* Disable the setgroups() system call in the child user namespace, for good. */
1982 a = procfs_file_alloca(ppid, "setgroups");
1983 fd = open(a, O_WRONLY|O_CLOEXEC);
1984 if (fd < 0) {
1985 if (errno != ENOENT) {
1986 r = -errno;
1987 goto child_fail;
1988 }
1989
1990 /* If the file is missing the kernel is too old, let's continue anyway. */
1991 } else {
1992 if (write(fd, "deny\n", 5) < 0) {
1993 r = -errno;
1994 goto child_fail;
1995 }
1996
1997 fd = safe_close(fd);
1998 }
1999
2000 /* First write the GID map */
2001 a = procfs_file_alloca(ppid, "gid_map");
2002 fd = open(a, O_WRONLY|O_CLOEXEC);
2003 if (fd < 0) {
2004 r = -errno;
2005 goto child_fail;
2006 }
2007 if (write(fd, gid_map, strlen(gid_map)) < 0) {
2008 r = -errno;
2009 goto child_fail;
2010 }
2011 fd = safe_close(fd);
2012
2013 /* The write the UID map */
2014 a = procfs_file_alloca(ppid, "uid_map");
2015 fd = open(a, O_WRONLY|O_CLOEXEC);
2016 if (fd < 0) {
2017 r = -errno;
2018 goto child_fail;
2019 }
2020 if (write(fd, uid_map, strlen(uid_map)) < 0) {
2021 r = -errno;
2022 goto child_fail;
2023 }
2024
2025 _exit(EXIT_SUCCESS);
2026
2027 child_fail:
2028 (void) write(errno_pipe[1], &r, sizeof(r));
2029 _exit(EXIT_FAILURE);
2030 }
2031
2032 errno_pipe[1] = safe_close(errno_pipe[1]);
2033
2034 if (unshare(CLONE_NEWUSER) < 0)
2035 return -errno;
2036
2037 /* Let the child know that the namespace is ready now */
2038 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
2039 return -errno;
2040
2041 /* Try to read an error code from the child */
2042 n = read(errno_pipe[0], &r, sizeof(r));
2043 if (n < 0)
2044 return -errno;
2045 if (n == sizeof(r)) { /* an error code was sent to us */
2046 if (r < 0)
2047 return r;
2048 return -EIO;
2049 }
2050 if (n != 0) /* on success we should have read 0 bytes */
2051 return -EIO;
2052
2e87a1fd
LP
2053 r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
2054 pid = 0;
d251207d
LP
2055 if (r < 0)
2056 return r;
2e87a1fd 2057 if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
d251207d
LP
2058 return -EIO;
2059
2060 return 0;
2061}
2062
494d0247
YW
2063static bool exec_directory_is_private(const ExecContext *context, ExecDirectoryType type) {
2064 if (!context->dynamic_user)
2065 return false;
2066
2067 if (type == EXEC_DIRECTORY_CONFIGURATION)
2068 return false;
2069
2070 if (type == EXEC_DIRECTORY_RUNTIME && context->runtime_directory_preserve_mode == EXEC_PRESERVE_NO)
2071 return false;
2072
2073 return true;
2074}
2075
3536f49e 2076static int setup_exec_directory(
07689d5d
LP
2077 const ExecContext *context,
2078 const ExecParameters *params,
2079 uid_t uid,
3536f49e 2080 gid_t gid,
3536f49e
YW
2081 ExecDirectoryType type,
2082 int *exit_status) {
07689d5d 2083
72fd1768 2084 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
2085 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
2086 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
2087 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
2088 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
2089 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
2090 };
07689d5d
LP
2091 char **rt;
2092 int r;
2093
2094 assert(context);
2095 assert(params);
72fd1768 2096 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 2097 assert(exit_status);
07689d5d 2098
3536f49e
YW
2099 if (!params->prefix[type])
2100 return 0;
2101
8679efde 2102 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
2103 if (!uid_is_valid(uid))
2104 uid = 0;
2105 if (!gid_is_valid(gid))
2106 gid = 0;
2107 }
2108
2109 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d 2110 _cleanup_free_ char *p = NULL, *pp = NULL;
07689d5d 2111
edbfeb12 2112 p = path_join(params->prefix[type], *rt);
3536f49e
YW
2113 if (!p) {
2114 r = -ENOMEM;
2115 goto fail;
2116 }
07689d5d 2117
23a7448e
YW
2118 r = mkdir_parents_label(p, 0755);
2119 if (r < 0)
3536f49e 2120 goto fail;
23a7448e 2121
494d0247 2122 if (exec_directory_is_private(context, type)) {
6c9c51e5 2123 _cleanup_free_ char *private_root = NULL;
6c47cd7d 2124
3f5b1508
LP
2125 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that
2126 * case we want to avoid leaving a directory around fully accessible that is owned by
2127 * a dynamic user whose UID is later on reused. To lock this down we use the same
2128 * trick used by container managers to prohibit host users to get access to files of
2129 * the same UID in containers: we place everything inside a directory that has an
2130 * access mode of 0700 and is owned root:root, so that it acts as security boundary
2131 * for unprivileged host code. We then use fs namespacing to make this directory
2132 * permeable for the service itself.
6c47cd7d 2133 *
3f5b1508
LP
2134 * Specifically: for a service which wants a special directory "foo/" we first create
2135 * a directory "private/" with access mode 0700 owned by root:root. Then we place
2136 * "foo" inside of that directory (i.e. "private/foo/"), and make "foo" a symlink to
2137 * "private/foo". This way, privileged host users can access "foo/" as usual, but
2138 * unprivileged host users can't look into it. Inside of the namespace of the unit
2139 * "private/" is replaced by a more liberally accessible tmpfs, into which the host's
2140 * "private/foo/" is mounted under the same name, thus disabling the access boundary
2141 * for the service and making sure it only gets access to the dirs it needs but no
2142 * others. Tricky? Yes, absolutely, but it works!
6c47cd7d 2143 *
3f5b1508
LP
2144 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not
2145 * to be owned by the service itself.
2146 *
2147 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used
2148 * for sharing files or sockets with other services. */
6c47cd7d 2149
edbfeb12 2150 private_root = path_join(params->prefix[type], "private");
6c47cd7d
LP
2151 if (!private_root) {
2152 r = -ENOMEM;
2153 goto fail;
2154 }
2155
2156 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
37c1d5e9 2157 r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
6c47cd7d
LP
2158 if (r < 0)
2159 goto fail;
2160
edbfeb12 2161 pp = path_join(private_root, *rt);
6c47cd7d
LP
2162 if (!pp) {
2163 r = -ENOMEM;
2164 goto fail;
2165 }
2166
2167 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2168 r = mkdir_parents_label(pp, 0755);
2169 if (r < 0)
2170 goto fail;
2171
949befd3
LP
2172 if (is_dir(p, false) > 0 &&
2173 (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
2174
2175 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2176 * it over. Most likely the service has been upgraded from one that didn't use
2177 * DynamicUser=1, to one that does. */
2178
cf52c45d
LP
2179 log_info("Found pre-existing public %s= directory %s, migrating to %s.\n"
2180 "Apparently, service previously had DynamicUser= turned off, and has now turned it on.",
2181 exec_directory_type_to_string(type), p, pp);
2182
949befd3
LP
2183 if (rename(p, pp) < 0) {
2184 r = -errno;
2185 goto fail;
2186 }
2187 } else {
2188 /* Otherwise, create the actual directory for the service */
2189
2190 r = mkdir_label(pp, context->directories[type].mode);
2191 if (r < 0 && r != -EEXIST)
2192 goto fail;
2193 }
6c47cd7d 2194
6c47cd7d 2195 /* And link it up from the original place */
6c9c51e5 2196 r = symlink_idempotent(pp, p, true);
6c47cd7d
LP
2197 if (r < 0)
2198 goto fail;
2199
6c47cd7d 2200 } else {
5c6d40d1
LP
2201 _cleanup_free_ char *target = NULL;
2202
2203 if (type != EXEC_DIRECTORY_CONFIGURATION &&
2204 readlink_and_make_absolute(p, &target) >= 0) {
2205 _cleanup_free_ char *q = NULL;
2206
2207 /* This already exists and is a symlink? Interesting. Maybe it's one created
2193f17c
LP
2208 * by DynamicUser=1 (see above)?
2209 *
2210 * We do this for all directory types except for ConfigurationDirectory=,
2211 * since they all support the private/ symlink logic at least in some
2212 * configurations, see above. */
5c6d40d1
LP
2213
2214 q = path_join(params->prefix[type], "private", *rt);
2215 if (!q) {
2216 r = -ENOMEM;
2217 goto fail;
2218 }
2219
2220 if (path_equal(q, target)) {
2221
2222 /* Hmm, apparently DynamicUser= was once turned on for this service,
2223 * but is no longer. Let's move the directory back up. */
2224
cf52c45d
LP
2225 log_info("Found pre-existing private %s= directory %s, migrating to %s.\n"
2226 "Apparently, service previously had DynamicUser= turned on, and has now turned it off.",
2227 exec_directory_type_to_string(type), q, p);
2228
5c6d40d1
LP
2229 if (unlink(p) < 0) {
2230 r = -errno;
2231 goto fail;
2232 }
2233
2234 if (rename(q, p) < 0) {
2235 r = -errno;
2236 goto fail;
2237 }
2238 }
2239 }
2240
6c47cd7d 2241 r = mkdir_label(p, context->directories[type].mode);
d484580c 2242 if (r < 0) {
d484580c
LP
2243 if (r != -EEXIST)
2244 goto fail;
2245
206e9864
LP
2246 if (type == EXEC_DIRECTORY_CONFIGURATION) {
2247 struct stat st;
2248
2249 /* Don't change the owner/access mode of the configuration directory,
2250 * as in the common case it is not written to by a service, and shall
2251 * not be writable. */
2252
2253 if (stat(p, &st) < 0) {
2254 r = -errno;
2255 goto fail;
2256 }
2257
2258 /* Still complain if the access mode doesn't match */
2259 if (((st.st_mode ^ context->directories[type].mode) & 07777) != 0)
2260 log_warning("%s \'%s\' already exists but the mode is different. "
2261 "(File system: %o %sMode: %o)",
2262 exec_directory_type_to_string(type), *rt,
2263 st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
2264
6cff72eb 2265 continue;
206e9864 2266 }
6cff72eb 2267 }
a1164ae3 2268 }
07689d5d 2269
206e9864 2270 /* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
5238e957 2271 * specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
206e9864
LP
2272 * current UID/GID ownership.) */
2273 r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
2274 if (r < 0)
2275 goto fail;
c71b2eb7 2276
607b358e
LP
2277 /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
2278 * drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
2279 * assignments to exist.*/
2280 r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777);
07689d5d 2281 if (r < 0)
3536f49e 2282 goto fail;
07689d5d
LP
2283 }
2284
2285 return 0;
3536f49e
YW
2286
2287fail:
2288 *exit_status = exit_status_table[type];
3536f49e 2289 return r;
07689d5d
LP
2290}
2291
92b423b9 2292#if ENABLE_SMACK
cefc33ae
LP
2293static int setup_smack(
2294 const ExecContext *context,
2295 const ExecCommand *command) {
2296
cefc33ae
LP
2297 int r;
2298
2299 assert(context);
2300 assert(command);
2301
cefc33ae
LP
2302 if (context->smack_process_label) {
2303 r = mac_smack_apply_pid(0, context->smack_process_label);
2304 if (r < 0)
2305 return r;
2306 }
2307#ifdef SMACK_DEFAULT_PROCESS_LABEL
2308 else {
2309 _cleanup_free_ char *exec_label = NULL;
2310
2311 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2312 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2313 return r;
2314
2315 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2316 if (r < 0)
2317 return r;
2318 }
cefc33ae
LP
2319#endif
2320
2321 return 0;
2322}
92b423b9 2323#endif
cefc33ae 2324
6c47cd7d
LP
2325static int compile_bind_mounts(
2326 const ExecContext *context,
2327 const ExecParameters *params,
2328 BindMount **ret_bind_mounts,
da6053d0 2329 size_t *ret_n_bind_mounts,
6c47cd7d
LP
2330 char ***ret_empty_directories) {
2331
2332 _cleanup_strv_free_ char **empty_directories = NULL;
2333 BindMount *bind_mounts;
da6053d0 2334 size_t n, h = 0, i;
6c47cd7d
LP
2335 ExecDirectoryType t;
2336 int r;
2337
2338 assert(context);
2339 assert(params);
2340 assert(ret_bind_mounts);
2341 assert(ret_n_bind_mounts);
2342 assert(ret_empty_directories);
2343
2344 n = context->n_bind_mounts;
2345 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2346 if (!params->prefix[t])
2347 continue;
2348
2349 n += strv_length(context->directories[t].paths);
2350 }
2351
2352 if (n <= 0) {
2353 *ret_bind_mounts = NULL;
2354 *ret_n_bind_mounts = 0;
2355 *ret_empty_directories = NULL;
2356 return 0;
2357 }
2358
2359 bind_mounts = new(BindMount, n);
2360 if (!bind_mounts)
2361 return -ENOMEM;
2362
a8cabc61 2363 for (i = 0; i < context->n_bind_mounts; i++) {
6c47cd7d
LP
2364 BindMount *item = context->bind_mounts + i;
2365 char *s, *d;
2366
2367 s = strdup(item->source);
2368 if (!s) {
2369 r = -ENOMEM;
2370 goto finish;
2371 }
2372
2373 d = strdup(item->destination);
2374 if (!d) {
2375 free(s);
2376 r = -ENOMEM;
2377 goto finish;
2378 }
2379
2380 bind_mounts[h++] = (BindMount) {
2381 .source = s,
2382 .destination = d,
2383 .read_only = item->read_only,
2384 .recursive = item->recursive,
2385 .ignore_enoent = item->ignore_enoent,
2386 };
2387 }
2388
2389 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2390 char **suffix;
2391
2392 if (!params->prefix[t])
2393 continue;
2394
2395 if (strv_isempty(context->directories[t].paths))
2396 continue;
2397
494d0247 2398 if (exec_directory_is_private(context, t) &&
5609f688 2399 !(context->root_directory || context->root_image)) {
6c47cd7d
LP
2400 char *private_root;
2401
2402 /* So this is for a dynamic user, and we need to make sure the process can access its own
2403 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2404 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2405
657ee2d8 2406 private_root = path_join(params->prefix[t], "private");
6c47cd7d
LP
2407 if (!private_root) {
2408 r = -ENOMEM;
2409 goto finish;
2410 }
2411
2412 r = strv_consume(&empty_directories, private_root);
a635a7ae 2413 if (r < 0)
6c47cd7d 2414 goto finish;
6c47cd7d
LP
2415 }
2416
2417 STRV_FOREACH(suffix, context->directories[t].paths) {
2418 char *s, *d;
2419
494d0247 2420 if (exec_directory_is_private(context, t))
657ee2d8 2421 s = path_join(params->prefix[t], "private", *suffix);
6c47cd7d 2422 else
657ee2d8 2423 s = path_join(params->prefix[t], *suffix);
6c47cd7d
LP
2424 if (!s) {
2425 r = -ENOMEM;
2426 goto finish;
2427 }
2428
494d0247 2429 if (exec_directory_is_private(context, t) &&
5609f688
YW
2430 (context->root_directory || context->root_image))
2431 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2432 * directory is not created on the root directory. So, let's bind-mount the directory
2433 * on the 'non-private' place. */
657ee2d8 2434 d = path_join(params->prefix[t], *suffix);
5609f688
YW
2435 else
2436 d = strdup(s);
6c47cd7d
LP
2437 if (!d) {
2438 free(s);
2439 r = -ENOMEM;
2440 goto finish;
2441 }
2442
2443 bind_mounts[h++] = (BindMount) {
2444 .source = s,
2445 .destination = d,
2446 .read_only = false,
9ce4e4b0 2447 .nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
6c47cd7d
LP
2448 .recursive = true,
2449 .ignore_enoent = false,
2450 };
2451 }
2452 }
2453
2454 assert(h == n);
2455
2456 *ret_bind_mounts = bind_mounts;
2457 *ret_n_bind_mounts = n;
ae2a15bc 2458 *ret_empty_directories = TAKE_PTR(empty_directories);
6c47cd7d
LP
2459
2460 return (int) n;
2461
2462finish:
2463 bind_mount_free_many(bind_mounts, h);
2464 return r;
2465}
2466
4e677599
LP
2467static bool insist_on_sandboxing(
2468 const ExecContext *context,
2469 const char *root_dir,
2470 const char *root_image,
2471 const BindMount *bind_mounts,
2472 size_t n_bind_mounts) {
2473
2474 size_t i;
2475
2476 assert(context);
2477 assert(n_bind_mounts == 0 || bind_mounts);
2478
2479 /* Checks whether we need to insist on fs namespacing. i.e. whether we have settings configured that
2480 * would alter the view on the file system beyond making things read-only or invisble, i.e. would
2481 * rearrange stuff in a way we cannot ignore gracefully. */
2482
2483 if (context->n_temporary_filesystems > 0)
2484 return true;
2485
2486 if (root_dir || root_image)
2487 return true;
2488
2489 if (context->dynamic_user)
2490 return true;
2491
2492 /* If there are any bind mounts set that don't map back onto themselves, fs namespacing becomes
2493 * essential. */
2494 for (i = 0; i < n_bind_mounts; i++)
2495 if (!path_equal(bind_mounts[i].source, bind_mounts[i].destination))
2496 return true;
2497
2498 return false;
2499}
2500
6818c54c 2501static int apply_mount_namespace(
34cf6c43
YW
2502 const Unit *u,
2503 const ExecCommand *command,
6818c54c
LP
2504 const ExecContext *context,
2505 const ExecParameters *params,
7cc5ef5f
ZJS
2506 const ExecRuntime *runtime,
2507 char **error_path) {
6818c54c 2508
7bcef4ef 2509 _cleanup_strv_free_ char **empty_directories = NULL;
93c6bb51 2510 char *tmp = NULL, *var = NULL;
915e6d16 2511 const char *root_dir = NULL, *root_image = NULL;
228af36f 2512 NamespaceInfo ns_info;
165a31c0 2513 bool needs_sandboxing;
6c47cd7d 2514 BindMount *bind_mounts = NULL;
da6053d0 2515 size_t n_bind_mounts = 0;
6818c54c 2516 int r;
93c6bb51 2517
2b3c1b9e
DH
2518 assert(context);
2519
93c6bb51
DH
2520 /* The runtime struct only contains the parent of the private /tmp,
2521 * which is non-accessible to world users. Inside of it there's a /tmp
2522 * that is sticky, and that's the one we want to use here. */
2523
2524 if (context->private_tmp && runtime) {
2525 if (runtime->tmp_dir)
2526 tmp = strjoina(runtime->tmp_dir, "/tmp");
2527 if (runtime->var_tmp_dir)
2528 var = strjoina(runtime->var_tmp_dir, "/tmp");
2529 }
2530
915e6d16
LP
2531 if (params->flags & EXEC_APPLY_CHROOT) {
2532 root_image = context->root_image;
2533
2534 if (!root_image)
2535 root_dir = context->root_directory;
2536 }
93c6bb51 2537
6c47cd7d
LP
2538 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2539 if (r < 0)
2540 return r;
2541
165a31c0 2542 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
b5a33299
YW
2543 if (needs_sandboxing)
2544 ns_info = (NamespaceInfo) {
2545 .ignore_protect_paths = false,
2546 .private_dev = context->private_devices,
2547 .protect_control_groups = context->protect_control_groups,
2548 .protect_kernel_tunables = context->protect_kernel_tunables,
2549 .protect_kernel_modules = context->protect_kernel_modules,
94a7b275 2550 .protect_kernel_logs = context->protect_kernel_logs,
aecd5ac6 2551 .protect_hostname = context->protect_hostname,
b5a33299 2552 .mount_apivfs = context->mount_apivfs,
228af36f 2553 .private_mounts = context->private_mounts,
b5a33299 2554 };
228af36f
LP
2555 else if (!context->dynamic_user && root_dir)
2556 /*
2557 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2558 * sandbox info, otherwise enforce it, don't ignore protected paths and
2559 * fail if we are enable to apply the sandbox inside the mount namespace.
2560 */
2561 ns_info = (NamespaceInfo) {
2562 .ignore_protect_paths = true,
2563 };
2564 else
2565 ns_info = (NamespaceInfo) {};
b5a33299 2566
37ed15d7
FB
2567 if (context->mount_flags == MS_SHARED)
2568 log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
2569
915e6d16 2570 r = setup_namespace(root_dir, root_image,
7bcef4ef 2571 &ns_info, context->read_write_paths,
165a31c0
LP
2572 needs_sandboxing ? context->read_only_paths : NULL,
2573 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2574 empty_directories,
2575 bind_mounts,
2576 n_bind_mounts,
2abd4e38
YW
2577 context->temporary_filesystems,
2578 context->n_temporary_filesystems,
93c6bb51
DH
2579 tmp,
2580 var,
165a31c0
LP
2581 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2582 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16 2583 context->mount_flags,
7cc5ef5f
ZJS
2584 DISSECT_IMAGE_DISCARD_ON_LOOP,
2585 error_path);
93c6bb51 2586
1beab8b0 2587 /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
5238e957 2588 * that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively
1beab8b0
LP
2589 * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
2590 * completely different execution environment. */
aca835ed 2591 if (r == -ENOANO) {
4e677599
LP
2592 if (insist_on_sandboxing(
2593 context,
2594 root_dir, root_image,
2595 bind_mounts,
2596 n_bind_mounts)) {
2597 log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
2598 "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
2599 n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
2600
2601 r = -EOPNOTSUPP;
2602 } else {
aca835ed 2603 log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
4e677599 2604 r = 0;
aca835ed 2605 }
93c6bb51
DH
2606 }
2607
4e677599 2608 bind_mount_free_many(bind_mounts, n_bind_mounts);
93c6bb51
DH
2609 return r;
2610}
2611
915e6d16
LP
2612static int apply_working_directory(
2613 const ExecContext *context,
2614 const ExecParameters *params,
2615 const char *home,
376fecf6 2616 int *exit_status) {
915e6d16 2617
6732edab 2618 const char *d, *wd;
2b3c1b9e
DH
2619
2620 assert(context);
376fecf6 2621 assert(exit_status);
2b3c1b9e 2622
6732edab
LP
2623 if (context->working_directory_home) {
2624
376fecf6
LP
2625 if (!home) {
2626 *exit_status = EXIT_CHDIR;
6732edab 2627 return -ENXIO;
376fecf6 2628 }
6732edab 2629
2b3c1b9e 2630 wd = home;
6732edab
LP
2631
2632 } else if (context->working_directory)
2b3c1b9e
DH
2633 wd = context->working_directory;
2634 else
2635 wd = "/";
e7f1e7c6 2636
fa97f630 2637 if (params->flags & EXEC_APPLY_CHROOT)
2b3c1b9e 2638 d = wd;
fa97f630 2639 else
3b0e5bb5 2640 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2641
376fecf6
LP
2642 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2643 *exit_status = EXIT_CHDIR;
2b3c1b9e 2644 return -errno;
376fecf6 2645 }
e7f1e7c6
DH
2646
2647 return 0;
2648}
2649
fa97f630
JB
2650static int apply_root_directory(
2651 const ExecContext *context,
2652 const ExecParameters *params,
2653 const bool needs_mount_ns,
2654 int *exit_status) {
2655
2656 assert(context);
2657 assert(exit_status);
2658
2659 if (params->flags & EXEC_APPLY_CHROOT) {
2660 if (!needs_mount_ns && context->root_directory)
2661 if (chroot(context->root_directory) < 0) {
2662 *exit_status = EXIT_CHROOT;
2663 return -errno;
2664 }
2665 }
2666
2667 return 0;
2668}
2669
b1edf445 2670static int setup_keyring(
34cf6c43 2671 const Unit *u,
b1edf445
LP
2672 const ExecContext *context,
2673 const ExecParameters *p,
2674 uid_t uid, gid_t gid) {
2675
74dd6b51 2676 key_serial_t keyring;
e64c2d0b
DJL
2677 int r = 0;
2678 uid_t saved_uid;
2679 gid_t saved_gid;
74dd6b51
LP
2680
2681 assert(u);
b1edf445 2682 assert(context);
74dd6b51
LP
2683 assert(p);
2684
2685 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2686 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2687 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2688 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2689 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2690 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2691
b1edf445
LP
2692 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2693 return 0;
2694
e64c2d0b
DJL
2695 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2696 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2697 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2698 * & group is just as nasty as acquiring a reference to the user keyring. */
2699
2700 saved_uid = getuid();
2701 saved_gid = getgid();
2702
2703 if (gid_is_valid(gid) && gid != saved_gid) {
2704 if (setregid(gid, -1) < 0)
2705 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2706 }
2707
2708 if (uid_is_valid(uid) && uid != saved_uid) {
2709 if (setreuid(uid, -1) < 0) {
2710 r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2711 goto out;
2712 }
2713 }
2714
74dd6b51
LP
2715 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2716 if (keyring == -1) {
2717 if (errno == ENOSYS)
8002fb97 2718 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2719 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2720 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2721 else if (errno == EDQUOT)
8002fb97 2722 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2723 else
e64c2d0b 2724 r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51 2725
e64c2d0b 2726 goto out;
74dd6b51
LP
2727 }
2728
e64c2d0b
DJL
2729 /* When requested link the user keyring into the session keyring. */
2730 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2731
2732 if (keyctl(KEYCTL_LINK,
2733 KEY_SPEC_USER_KEYRING,
2734 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2735 r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
2736 goto out;
2737 }
2738 }
2739
2740 /* Restore uid/gid back */
2741 if (uid_is_valid(uid) && uid != saved_uid) {
2742 if (setreuid(saved_uid, -1) < 0) {
2743 r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2744 goto out;
2745 }
2746 }
2747
2748 if (gid_is_valid(gid) && gid != saved_gid) {
2749 if (setregid(saved_gid, -1) < 0)
2750 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2751 }
2752
2753 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
b3415f5d
LP
2754 if (!sd_id128_is_null(u->invocation_id)) {
2755 key_serial_t key;
2756
2757 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2758 if (key == -1)
8002fb97 2759 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2760 else {
2761 if (keyctl(KEYCTL_SETPERM, key,
2762 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2763 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
e64c2d0b 2764 r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2765 }
2766 }
2767
e64c2d0b
DJL
2768out:
2769 /* Revert back uid & gid for the the last time, and exit */
2770 /* no extra logging, as only the first already reported error matters */
2771 if (getuid() != saved_uid)
2772 (void) setreuid(saved_uid, -1);
b1edf445 2773
e64c2d0b
DJL
2774 if (getgid() != saved_gid)
2775 (void) setregid(saved_gid, -1);
b1edf445 2776
e64c2d0b 2777 return r;
74dd6b51
LP
2778}
2779
3042bbeb 2780static void append_socket_pair(int *array, size_t *n, const int pair[static 2]) {
29206d46
LP
2781 assert(array);
2782 assert(n);
2caa38e9 2783 assert(pair);
29206d46
LP
2784
2785 if (pair[0] >= 0)
2786 array[(*n)++] = pair[0];
2787 if (pair[1] >= 0)
2788 array[(*n)++] = pair[1];
2789}
2790
a34ceba6
LP
2791static int close_remaining_fds(
2792 const ExecParameters *params,
34cf6c43
YW
2793 const ExecRuntime *runtime,
2794 const DynamicCreds *dcreds,
00d9ef85 2795 int user_lookup_fd,
a34ceba6 2796 int socket_fd,
5686391b 2797 int exec_fd,
da6053d0 2798 int *fds, size_t n_fds) {
a34ceba6 2799
da6053d0 2800 size_t n_dont_close = 0;
00d9ef85 2801 int dont_close[n_fds + 12];
a34ceba6
LP
2802
2803 assert(params);
2804
2805 if (params->stdin_fd >= 0)
2806 dont_close[n_dont_close++] = params->stdin_fd;
2807 if (params->stdout_fd >= 0)
2808 dont_close[n_dont_close++] = params->stdout_fd;
2809 if (params->stderr_fd >= 0)
2810 dont_close[n_dont_close++] = params->stderr_fd;
2811
2812 if (socket_fd >= 0)
2813 dont_close[n_dont_close++] = socket_fd;
5686391b
LP
2814 if (exec_fd >= 0)
2815 dont_close[n_dont_close++] = exec_fd;
a34ceba6
LP
2816 if (n_fds > 0) {
2817 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2818 n_dont_close += n_fds;
2819 }
2820
29206d46
LP
2821 if (runtime)
2822 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2823
2824 if (dcreds) {
2825 if (dcreds->user)
2826 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2827 if (dcreds->group)
2828 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2829 }
2830
00d9ef85
LP
2831 if (user_lookup_fd >= 0)
2832 dont_close[n_dont_close++] = user_lookup_fd;
2833
a34ceba6
LP
2834 return close_all_fds(dont_close, n_dont_close);
2835}
2836
00d9ef85
LP
2837static int send_user_lookup(
2838 Unit *unit,
2839 int user_lookup_fd,
2840 uid_t uid,
2841 gid_t gid) {
2842
2843 assert(unit);
2844
2845 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2846 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2847 * specified. */
2848
2849 if (user_lookup_fd < 0)
2850 return 0;
2851
2852 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2853 return 0;
2854
2855 if (writev(user_lookup_fd,
2856 (struct iovec[]) {
e6a7ec4b
LP
2857 IOVEC_INIT(&uid, sizeof(uid)),
2858 IOVEC_INIT(&gid, sizeof(gid)),
2859 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2860 return -errno;
2861
2862 return 0;
2863}
2864
6732edab
LP
2865static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2866 int r;
2867
2868 assert(c);
2869 assert(home);
2870 assert(buf);
2871
2872 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2873
2874 if (*home)
2875 return 0;
2876
2877 if (!c->working_directory_home)
2878 return 0;
2879
6732edab
LP
2880 r = get_home_dir(buf);
2881 if (r < 0)
2882 return r;
2883
2884 *home = *buf;
2885 return 1;
2886}
2887
da50b85a
LP
2888static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2889 _cleanup_strv_free_ char ** list = NULL;
2890 ExecDirectoryType t;
2891 int r;
2892
2893 assert(c);
2894 assert(p);
2895 assert(ret);
2896
2897 assert(c->dynamic_user);
2898
2899 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2900 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2901 * directories. */
2902
2903 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2904 char **i;
2905
2906 if (t == EXEC_DIRECTORY_CONFIGURATION)
2907 continue;
2908
2909 if (!p->prefix[t])
2910 continue;
2911
2912 STRV_FOREACH(i, c->directories[t].paths) {
2913 char *e;
2914
494d0247 2915 if (exec_directory_is_private(c, t))
657ee2d8 2916 e = path_join(p->prefix[t], "private", *i);
494d0247
YW
2917 else
2918 e = path_join(p->prefix[t], *i);
da50b85a
LP
2919 if (!e)
2920 return -ENOMEM;
2921
2922 r = strv_consume(&list, e);
2923 if (r < 0)
2924 return r;
2925 }
2926 }
2927
ae2a15bc 2928 *ret = TAKE_PTR(list);
da50b85a
LP
2929
2930 return 0;
2931}
2932
34cf6c43
YW
2933static char *exec_command_line(char **argv);
2934
78f93209
LP
2935static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
2936 bool using_subcgroup;
2937 char *p;
2938
2939 assert(params);
2940 assert(ret);
2941
2942 if (!params->cgroup_path)
2943 return -EINVAL;
2944
2945 /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
2946 * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
2947 * processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
2948 * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
2949 * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
2950 * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
2951 * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
2952 * flag, which is only passed for the former statements, not for the latter. */
2953
2954 using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
2955 if (using_subcgroup)
657ee2d8 2956 p = path_join(params->cgroup_path, ".control");
78f93209
LP
2957 else
2958 p = strdup(params->cgroup_path);
2959 if (!p)
2960 return -ENOMEM;
2961
2962 *ret = p;
2963 return using_subcgroup;
2964}
2965
ff0af2a1 2966static int exec_child(
f2341e0a 2967 Unit *unit,
34cf6c43 2968 const ExecCommand *command,
ff0af2a1
LP
2969 const ExecContext *context,
2970 const ExecParameters *params,
2971 ExecRuntime *runtime,
29206d46 2972 DynamicCreds *dcreds,
ff0af2a1 2973 int socket_fd,
2caa38e9 2974 const int named_iofds[static 3],
4c47affc 2975 int *fds,
da6053d0 2976 size_t n_socket_fds,
25b583d7 2977 size_t n_storage_fds,
ff0af2a1 2978 char **files_env,
00d9ef85 2979 int user_lookup_fd,
12145637 2980 int *exit_status) {
d35fbf6b 2981
7ca69792 2982 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **replaced_argv = NULL;
5686391b 2983 int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
4d885bd3
DH
2984 _cleanup_free_ gid_t *supplementary_gids = NULL;
2985 const char *username = NULL, *groupname = NULL;
5686391b 2986 _cleanup_free_ char *home_buffer = NULL;
2b3c1b9e 2987 const char *home = NULL, *shell = NULL;
7ca69792 2988 char **final_argv = NULL;
7bce046b
LP
2989 dev_t journal_stream_dev = 0;
2990 ino_t journal_stream_ino = 0;
5749f855 2991 bool userns_set_up = false;
165a31c0
LP
2992 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2993 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2994 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2995 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 2996#if HAVE_SELINUX
7f59dd35 2997 _cleanup_free_ char *mac_selinux_context_net = NULL;
43b1f709 2998 bool use_selinux = false;
ecfbc84f 2999#endif
f9fa32f0 3000#if ENABLE_SMACK
43b1f709 3001 bool use_smack = false;
ecfbc84f 3002#endif
349cc4a5 3003#if HAVE_APPARMOR
43b1f709 3004 bool use_apparmor = false;
ecfbc84f 3005#endif
5749f855
AZ
3006 uid_t saved_uid = getuid();
3007 gid_t saved_gid = getgid();
fed1e721
LP
3008 uid_t uid = UID_INVALID;
3009 gid_t gid = GID_INVALID;
da6053d0 3010 size_t n_fds;
3536f49e 3011 ExecDirectoryType dt;
165a31c0 3012 int secure_bits;
034c6ed7 3013
f2341e0a 3014 assert(unit);
5cb5a6ff
LP
3015 assert(command);
3016 assert(context);
d35fbf6b 3017 assert(params);
ff0af2a1 3018 assert(exit_status);
d35fbf6b
DM
3019
3020 rename_process_from_path(command->path);
3021
3022 /* We reset exactly these signals, since they are the
3023 * only ones we set to SIG_IGN in the main daemon. All
3024 * others we leave untouched because we set them to
3025 * SIG_DFL or a valid handler initially, both of which
3026 * will be demoted to SIG_DFL. */
ce30c8dc
LP
3027 (void) default_signals(SIGNALS_CRASH_HANDLER,
3028 SIGNALS_IGNORE, -1);
d35fbf6b
DM
3029
3030 if (context->ignore_sigpipe)
ce30c8dc 3031 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 3032
ff0af2a1
LP
3033 r = reset_signal_mask();
3034 if (r < 0) {
3035 *exit_status = EXIT_SIGNAL_MASK;
12145637 3036 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 3037 }
034c6ed7 3038
d35fbf6b
DM
3039 if (params->idle_pipe)
3040 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 3041
2c027c62
LP
3042 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
3043 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
3044 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
3045 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 3046
d35fbf6b 3047 log_forget_fds();
2c027c62 3048 log_set_open_when_needed(true);
4f2d528d 3049
40a80078
LP
3050 /* In case anything used libc syslog(), close this here, too */
3051 closelog();
3052
5686391b
LP
3053 n_fds = n_socket_fds + n_storage_fds;
3054 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
ff0af2a1
LP
3055 if (r < 0) {
3056 *exit_status = EXIT_FDS;
12145637 3057 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
3058 }
3059
d35fbf6b
DM
3060 if (!context->same_pgrp)
3061 if (setsid() < 0) {
ff0af2a1 3062 *exit_status = EXIT_SETSID;
12145637 3063 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 3064 }
9e2f7c11 3065
1e22b5cd 3066 exec_context_tty_reset(context, params);
d35fbf6b 3067
c891efaf 3068 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 3069 const char *vc = params->confirm_spawn;
3b20f877
FB
3070 _cleanup_free_ char *cmdline = NULL;
3071
ee39ca20 3072 cmdline = exec_command_line(command->argv);
3b20f877 3073 if (!cmdline) {
0460aa5c 3074 *exit_status = EXIT_MEMORY;
12145637 3075 return log_oom();
3b20f877 3076 }
d35fbf6b 3077
eedf223a 3078 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
3079 if (r != CONFIRM_EXECUTE) {
3080 if (r == CONFIRM_PRETEND_SUCCESS) {
3081 *exit_status = EXIT_SUCCESS;
3082 return 0;
3083 }
ff0af2a1 3084 *exit_status = EXIT_CONFIRM;
12145637 3085 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 3086 return -ECANCELED;
d35fbf6b
DM
3087 }
3088 }
1a63a750 3089
d521916d
LP
3090 /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
3091 * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
3092 * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
3093 * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
3094 * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
3095 if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
3096 setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
3097 *exit_status = EXIT_MEMORY;
3098 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
3099 }
3100
29206d46 3101 if (context->dynamic_user && dcreds) {
da50b85a 3102 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 3103
d521916d
LP
3104 /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
3105 * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
409093fe
LP
3106 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
3107 *exit_status = EXIT_USER;
12145637 3108 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
3109 }
3110
da50b85a
LP
3111 r = compile_suggested_paths(context, params, &suggested_paths);
3112 if (r < 0) {
3113 *exit_status = EXIT_MEMORY;
3114 return log_oom();
3115 }
3116
3117 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
3118 if (r < 0) {
3119 *exit_status = EXIT_USER;
e2b0cc34
YW
3120 if (r == -EILSEQ) {
3121 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
3122 return -EOPNOTSUPP;
3123 }
12145637 3124 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 3125 }
524daa8c 3126
70dd455c 3127 if (!uid_is_valid(uid)) {
29206d46 3128 *exit_status = EXIT_USER;
12145637 3129 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
3130 return -ESRCH;
3131 }
3132
3133 if (!gid_is_valid(gid)) {
3134 *exit_status = EXIT_USER;
12145637 3135 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
3136 return -ESRCH;
3137 }
5bc7452b 3138
29206d46
LP
3139 if (dcreds->user)
3140 username = dcreds->user->name;
3141
3142 } else {
4d885bd3
DH
3143 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
3144 if (r < 0) {
3145 *exit_status = EXIT_USER;
12145637 3146 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 3147 }
5bc7452b 3148
4d885bd3
DH
3149 r = get_fixed_group(context, &groupname, &gid);
3150 if (r < 0) {
3151 *exit_status = EXIT_GROUP;
12145637 3152 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 3153 }
cdc5d5c5 3154 }
29206d46 3155
cdc5d5c5
DH
3156 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
3157 r = get_supplementary_groups(context, username, groupname, gid,
3158 &supplementary_gids, &ngids);
3159 if (r < 0) {
3160 *exit_status = EXIT_GROUP;
12145637 3161 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 3162 }
5bc7452b 3163
00d9ef85
LP
3164 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
3165 if (r < 0) {
3166 *exit_status = EXIT_USER;
12145637 3167 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
3168 }
3169
3170 user_lookup_fd = safe_close(user_lookup_fd);
3171
6732edab
LP
3172 r = acquire_home(context, uid, &home, &home_buffer);
3173 if (r < 0) {
3174 *exit_status = EXIT_CHDIR;
12145637 3175 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
3176 }
3177
d35fbf6b
DM
3178 /* If a socket is connected to STDIN/STDOUT/STDERR, we
3179 * must sure to drop O_NONBLOCK */
3180 if (socket_fd >= 0)
a34ceba6 3181 (void) fd_nonblock(socket_fd, false);
acbb0225 3182
4c70a4a7
MS
3183 /* Journald will try to look-up our cgroup in order to populate _SYSTEMD_CGROUP and _SYSTEMD_UNIT fields.
3184 * Hence we need to migrate to the target cgroup from init.scope before connecting to journald */
3185 if (params->cgroup_path) {
3186 _cleanup_free_ char *p = NULL;
3187
3188 r = exec_parameters_get_cgroup_path(params, &p);
3189 if (r < 0) {
3190 *exit_status = EXIT_CGROUP;
3191 return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
3192 }
3193
3194 r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
3195 if (r < 0) {
3196 *exit_status = EXIT_CGROUP;
3197 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
3198 }
3199 }
3200
a8d08f39
LP
3201 if (context->network_namespace_path && runtime && runtime->netns_storage_socket[0] >= 0) {
3202 r = open_netns_path(runtime->netns_storage_socket, context->network_namespace_path);
3203 if (r < 0) {
3204 *exit_status = EXIT_NETWORK;
3205 return log_unit_error_errno(unit, r, "Failed to open network namespace path %s: %m", context->network_namespace_path);
3206 }
3207 }
3208
52c239d7 3209 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
3210 if (r < 0) {
3211 *exit_status = EXIT_STDIN;
12145637 3212 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 3213 }
034c6ed7 3214
52c239d7 3215 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3216 if (r < 0) {
3217 *exit_status = EXIT_STDOUT;
12145637 3218 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
3219 }
3220
52c239d7 3221 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3222 if (r < 0) {
3223 *exit_status = EXIT_STDERR;
12145637 3224 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
3225 }
3226
d35fbf6b 3227 if (context->oom_score_adjust_set) {
9f8168eb
LP
3228 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
3229 * prohibit write access to this file, and we shouldn't trip up over that. */
3230 r = set_oom_score_adjust(context->oom_score_adjust);
12145637 3231 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 3232 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 3233 else if (r < 0) {
ff0af2a1 3234 *exit_status = EXIT_OOM_ADJUST;
12145637 3235 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 3236 }
d35fbf6b
DM
3237 }
3238
3239 if (context->nice_set)
3240 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 3241 *exit_status = EXIT_NICE;
12145637 3242 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
613b411c
LP
3243 }
3244
d35fbf6b
DM
3245 if (context->cpu_sched_set) {
3246 struct sched_param param = {
3247 .sched_priority = context->cpu_sched_priority,
3248 };
3249
ff0af2a1
LP
3250 r = sched_setscheduler(0,
3251 context->cpu_sched_policy |
3252 (context->cpu_sched_reset_on_fork ?
3253 SCHED_RESET_ON_FORK : 0),
3254 &param);
3255 if (r < 0) {
3256 *exit_status = EXIT_SETSCHEDULER;
12145637 3257 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 3258 }
d35fbf6b 3259 }
fc9b2a84 3260
0985c7c4
ZJS
3261 if (context->cpu_set.set)
3262 if (sched_setaffinity(0, context->cpu_set.allocated, context->cpu_set.set) < 0) {
ff0af2a1 3263 *exit_status = EXIT_CPUAFFINITY;
12145637 3264 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7
LP
3265 }
3266
b070c7c0
MS
3267 if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
3268 r = apply_numa_policy(&context->numa_policy);
3269 if (r == -EOPNOTSUPP)
33fe9e3f 3270 log_unit_debug_errno(unit, r, "NUMA support not available, ignoring.");
b070c7c0
MS
3271 else if (r < 0) {
3272 *exit_status = EXIT_NUMA_POLICY;
3273 return log_unit_error_errno(unit, r, "Failed to set NUMA memory policy: %m");
3274 }
3275 }
3276
d35fbf6b
DM
3277 if (context->ioprio_set)
3278 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 3279 *exit_status = EXIT_IOPRIO;
12145637 3280 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 3281 }
da726a4d 3282
d35fbf6b
DM
3283 if (context->timer_slack_nsec != NSEC_INFINITY)
3284 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 3285 *exit_status = EXIT_TIMERSLACK;
12145637 3286 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 3287 }
9eba9da4 3288
21022b9d
LP
3289 if (context->personality != PERSONALITY_INVALID) {
3290 r = safe_personality(context->personality);
3291 if (r < 0) {
ff0af2a1 3292 *exit_status = EXIT_PERSONALITY;
12145637 3293 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 3294 }
21022b9d 3295 }
94f04347 3296
d35fbf6b 3297 if (context->utmp_id)
df0ff127 3298 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 3299 context->tty_path,
023a4f67
LP
3300 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3301 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3302 USER_PROCESS,
6a93917d 3303 username);
d35fbf6b 3304
08f67696 3305 if (uid_is_valid(uid)) {
ff0af2a1
LP
3306 r = chown_terminal(STDIN_FILENO, uid);
3307 if (r < 0) {
3308 *exit_status = EXIT_STDIN;
12145637 3309 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 3310 }
d35fbf6b 3311 }
8e274523 3312
4e1dfa45 3313 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
62b9bb26 3314 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
4e1dfa45 3315 * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
62b9bb26 3316 * touch a single hierarchy too. */
584b8688 3317 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
62b9bb26 3318 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
ff0af2a1
LP
3319 if (r < 0) {
3320 *exit_status = EXIT_CGROUP;
12145637 3321 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 3322 }
d35fbf6b 3323 }
034c6ed7 3324
72fd1768 3325 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 3326 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
3327 if (r < 0)
3328 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 3329 }
94f04347 3330
7bce046b 3331 r = build_environment(
fd63e712 3332 unit,
7bce046b
LP
3333 context,
3334 params,
3335 n_fds,
3336 home,
3337 username,
3338 shell,
3339 journal_stream_dev,
3340 journal_stream_ino,
3341 &our_env);
2065ca69
JW
3342 if (r < 0) {
3343 *exit_status = EXIT_MEMORY;
12145637 3344 return log_oom();
2065ca69
JW
3345 }
3346
3347 r = build_pass_environment(context, &pass_env);
3348 if (r < 0) {
3349 *exit_status = EXIT_MEMORY;
12145637 3350 return log_oom();
2065ca69
JW
3351 }
3352
3353 accum_env = strv_env_merge(5,
3354 params->environment,
3355 our_env,
3356 pass_env,
3357 context->environment,
3358 files_env,
3359 NULL);
3360 if (!accum_env) {
3361 *exit_status = EXIT_MEMORY;
12145637 3362 return log_oom();
2065ca69 3363 }
1280503b 3364 accum_env = strv_env_clean(accum_env);
2065ca69 3365
096424d1 3366 (void) umask(context->umask);
b213e1c1 3367
b1edf445 3368 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
3369 if (r < 0) {
3370 *exit_status = EXIT_KEYRING;
12145637 3371 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
3372 }
3373
165a31c0 3374 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 3375 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 3376
165a31c0
LP
3377 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3378 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 3379
165a31c0
LP
3380 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3381 if (needs_ambient_hack)
3382 needs_setuid = false;
3383 else
3384 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3385
3386 if (needs_sandboxing) {
7f18ef0a
FK
3387 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3388 * present. The actual MAC context application will happen later, as late as possible, to avoid
3389 * impacting our own code paths. */
3390
349cc4a5 3391#if HAVE_SELINUX
43b1f709 3392 use_selinux = mac_selinux_use();
7f18ef0a 3393#endif
f9fa32f0 3394#if ENABLE_SMACK
43b1f709 3395 use_smack = mac_smack_use();
7f18ef0a 3396#endif
349cc4a5 3397#if HAVE_APPARMOR
43b1f709 3398 use_apparmor = mac_apparmor_use();
7f18ef0a 3399#endif
165a31c0 3400 }
7f18ef0a 3401
ce932d2d
LP
3402 if (needs_sandboxing) {
3403 int which_failed;
3404
3405 /* Let's set the resource limits before we call into PAM, so that pam_limits wins over what
3406 * is set here. (See below.) */
3407
3408 r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
3409 if (r < 0) {
3410 *exit_status = EXIT_LIMITS;
3411 return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
3412 }
3413 }
3414
165a31c0 3415 if (needs_setuid) {
ce932d2d
LP
3416
3417 /* Let's call into PAM after we set up our own idea of resource limits to that pam_limits
3418 * wins here. (See above.) */
3419
165a31c0
LP
3420 if (context->pam_name && username) {
3421 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3422 if (r < 0) {
3423 *exit_status = EXIT_PAM;
12145637 3424 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0
LP
3425 }
3426 }
b213e1c1 3427 }
ac45f971 3428
5749f855
AZ
3429 if (needs_sandboxing) {
3430#if HAVE_SELINUX
3431 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
3432 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3433 if (r < 0) {
3434 *exit_status = EXIT_SELINUX_CONTEXT;
3435 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
3436 }
3437 }
3438#endif
3439
3440 /* If we're unprivileged, set up the user namespace first to enable use of the other namespaces.
3441 * Users with CAP_SYS_ADMIN can set up user namespaces last because they will be able to
3442 * set up the all of the other namespaces (i.e. network, mount, UTS) without a user namespace. */
3443 if (context->private_users && !have_effective_cap(CAP_SYS_ADMIN)) {
3444 userns_set_up = true;
3445 r = setup_private_users(saved_uid, saved_gid, uid, gid);
3446 if (r < 0) {
3447 *exit_status = EXIT_USER;
3448 return log_unit_error_errno(unit, r, "Failed to set up user namespacing for unprivileged user: %m");
3449 }
3450 }
3451 }
3452
a8d08f39
LP
3453 if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
3454
6e2d7c4f
MS
3455 if (ns_type_supported(NAMESPACE_NET)) {
3456 r = setup_netns(runtime->netns_storage_socket);
3457 if (r < 0) {
3458 *exit_status = EXIT_NETWORK;
3459 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3460 }
a8d08f39
LP
3461 } else if (context->network_namespace_path) {
3462 *exit_status = EXIT_NETWORK;
3463 return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP), "NetworkNamespacePath= is not supported, refusing.");
6e2d7c4f
MS
3464 } else
3465 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3466 }
169c1bda 3467
ee818b89 3468 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3469 if (needs_mount_namespace) {
7cc5ef5f
ZJS
3470 _cleanup_free_ char *error_path = NULL;
3471
3472 r = apply_mount_namespace(unit, command, context, params, runtime, &error_path);
3fbe8dbe
LP
3473 if (r < 0) {
3474 *exit_status = EXIT_NAMESPACE;
7cc5ef5f
ZJS
3475 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m",
3476 error_path ? ": " : "", strempty(error_path));
3fbe8dbe 3477 }
d35fbf6b 3478 }
81a2b7ce 3479
aecd5ac6
TM
3480 if (context->protect_hostname) {
3481 if (ns_type_supported(NAMESPACE_UTS)) {
3482 if (unshare(CLONE_NEWUTS) < 0) {
6d19b718
LP
3483 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) {
3484 *exit_status = EXIT_NAMESPACE;
3485 return log_unit_error_errno(unit, errno, "Failed to set up UTS namespacing: %m");
3486 }
3487
3488 log_unit_warning(unit, "ProtectHostname=yes is configured, but UTS namespace setup is prohibited (container manager?), ignoring namespace setup.");
aecd5ac6
TM
3489 }
3490 } else
3491 log_unit_warning(unit, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
3492#if HAVE_SECCOMP
3493 r = seccomp_protect_hostname();
3494 if (r < 0) {
3495 *exit_status = EXIT_SECCOMP;
3496 return log_unit_error_errno(unit, r, "Failed to apply hostname restrictions: %m");
3497 }
3498#endif
3499 }
3500
5749f855
AZ
3501 /* Drop groups as early as possible.
3502 * This needs to be done after PrivateDevices=y setup as device nodes should be owned by the host's root.
3503 * For non-root in a userns, devices will be owned by the user/group before the group change, and nobody. */
165a31c0 3504 if (needs_setuid) {
709dbeac 3505 r = enforce_groups(gid, supplementary_gids, ngids);
096424d1
LP
3506 if (r < 0) {
3507 *exit_status = EXIT_GROUP;
12145637 3508 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3509 }
165a31c0 3510 }
096424d1 3511
5749f855
AZ
3512 /* If the user namespace was not set up above, try to do it now.
3513 * It's preferred to set up the user namespace later (after all other namespaces) so as not to be
3514 * restricted by rules pertaining to combining user namspaces with other namespaces (e.g. in the
3515 * case of mount namespaces being less privileged when the mount point list is copied from a
3516 * different user namespace). */
9008e1ac 3517
5749f855
AZ
3518 if (needs_sandboxing && context->private_users && !userns_set_up) {
3519 r = setup_private_users(saved_uid, saved_gid, uid, gid);
3520 if (r < 0) {
3521 *exit_status = EXIT_USER;
3522 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
d251207d
LP
3523 }
3524 }
3525
165a31c0 3526 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
5686391b
LP
3527 * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
3528 * however if we have it as we want to keep it open until the final execve(). */
3529
3530 if (params->exec_fd >= 0) {
3531 exec_fd = params->exec_fd;
3532
3533 if (exec_fd < 3 + (int) n_fds) {
3534 int moved_fd;
3535
3536 /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
3537 * process we are about to execute. */
3538
3539 moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
3540 if (moved_fd < 0) {
3541 *exit_status = EXIT_FDS;
3542 return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
3543 }
3544
3545 safe_close(exec_fd);
3546 exec_fd = moved_fd;
3547 } else {
3548 /* This fd should be FD_CLOEXEC already, but let's make sure. */
3549 r = fd_cloexec(exec_fd, true);
3550 if (r < 0) {
3551 *exit_status = EXIT_FDS;
3552 return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
3553 }
3554 }
3555
3556 fds_with_exec_fd = newa(int, n_fds + 1);
7e8d494b 3557 memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
5686391b
LP
3558 fds_with_exec_fd[n_fds] = exec_fd;
3559 n_fds_with_exec_fd = n_fds + 1;
3560 } else {
3561 fds_with_exec_fd = fds;
3562 n_fds_with_exec_fd = n_fds;
3563 }
3564
3565 r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
ff0af2a1
LP
3566 if (r >= 0)
3567 r = shift_fds(fds, n_fds);
3568 if (r >= 0)
25b583d7 3569 r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
ff0af2a1
LP
3570 if (r < 0) {
3571 *exit_status = EXIT_FDS;
12145637 3572 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3573 }
e66cf1a3 3574
5686391b
LP
3575 /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
3576 * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
3577 * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
3578 * came this far. */
3579
165a31c0 3580 secure_bits = context->secure_bits;
e66cf1a3 3581
165a31c0
LP
3582 if (needs_sandboxing) {
3583 uint64_t bset;
e66cf1a3 3584
ce932d2d
LP
3585 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly
3586 * requested. (Note this is placed after the general resource limit initialization, see
3587 * above, in order to take precedence.) */
f4170c67
LP
3588 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3589 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3590 *exit_status = EXIT_LIMITS;
12145637 3591 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3592 }
3593 }
3594
37ac2744
JB
3595#if ENABLE_SMACK
3596 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3597 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3598 if (use_smack) {
3599 r = setup_smack(context, command);
3600 if (r < 0) {
3601 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3602 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3603 }
3604 }
3605#endif
3606
165a31c0
LP
3607 bset = context->capability_bounding_set;
3608 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3609 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3610 * instead of us doing that */
3611 if (needs_ambient_hack)
3612 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3613 (UINT64_C(1) << CAP_SETUID) |
3614 (UINT64_C(1) << CAP_SETGID);
3615
3616 if (!cap_test_all(bset)) {
3617 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3618 if (r < 0) {
3619 *exit_status = EXIT_CAPABILITIES;
12145637 3620 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3621 }
4c2630eb 3622 }
3b8bddde 3623
755d4b67
IP
3624 /* This is done before enforce_user, but ambient set
3625 * does not survive over setresuid() if keep_caps is not set. */
943800f4 3626 if (!needs_ambient_hack) {
755d4b67
IP
3627 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3628 if (r < 0) {
3629 *exit_status = EXIT_CAPABILITIES;
12145637 3630 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3631 }
755d4b67 3632 }
165a31c0 3633 }
755d4b67 3634
fa97f630
JB
3635 /* chroot to root directory first, before we lose the ability to chroot */
3636 r = apply_root_directory(context, params, needs_mount_namespace, exit_status);
3637 if (r < 0)
3638 return log_unit_error_errno(unit, r, "Chrooting to the requested root directory failed: %m");
3639
165a31c0 3640 if (needs_setuid) {
08f67696 3641 if (uid_is_valid(uid)) {
ff0af2a1
LP
3642 r = enforce_user(context, uid);
3643 if (r < 0) {
3644 *exit_status = EXIT_USER;
12145637 3645 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3646 }
165a31c0
LP
3647
3648 if (!needs_ambient_hack &&
3649 context->capability_ambient_set != 0) {
755d4b67
IP
3650
3651 /* Fix the ambient capabilities after user change. */
3652 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3653 if (r < 0) {
3654 *exit_status = EXIT_CAPABILITIES;
12145637 3655 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3656 }
3657
3658 /* If we were asked to change user and ambient capabilities
3659 * were requested, we had to add keep-caps to the securebits
3660 * so that we would maintain the inherited capability set
3661 * through the setresuid(). Make sure that the bit is added
3662 * also to the context secure_bits so that we don't try to
3663 * drop the bit away next. */
3664
7f508f2c 3665 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3666 }
5b6319dc 3667 }
165a31c0 3668 }
d35fbf6b 3669
56ef8db9
JB
3670 /* Apply working directory here, because the working directory might be on NFS and only the user running
3671 * this service might have the correct privilege to change to the working directory */
fa97f630 3672 r = apply_working_directory(context, params, home, exit_status);
56ef8db9
JB
3673 if (r < 0)
3674 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
3675
165a31c0 3676 if (needs_sandboxing) {
37ac2744 3677 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
5cd9cd35
LP
3678 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3679 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3680 * are restricted. */
3681
349cc4a5 3682#if HAVE_SELINUX
43b1f709 3683 if (use_selinux) {
5cd9cd35
LP
3684 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3685
3686 if (exec_context) {
3687 r = setexeccon(exec_context);
3688 if (r < 0) {
3689 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3690 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3691 }
3692 }
3693 }
3694#endif
3695
349cc4a5 3696#if HAVE_APPARMOR
43b1f709 3697 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3698 r = aa_change_onexec(context->apparmor_profile);
3699 if (r < 0 && !context->apparmor_profile_ignore) {
3700 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3701 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3702 }
3703 }
3704#endif
3705
165a31c0
LP
3706 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3707 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3708 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3709 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3710 *exit_status = EXIT_SECUREBITS;
12145637 3711 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3712 }
5b6319dc 3713
59eeb84b 3714 if (context_has_no_new_privileges(context))
d35fbf6b 3715 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3716 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3717 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3718 }
3719
349cc4a5 3720#if HAVE_SECCOMP
469830d1
LP
3721 r = apply_address_families(unit, context);
3722 if (r < 0) {
3723 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3724 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3725 }
04aa0cb9 3726
469830d1
LP
3727 r = apply_memory_deny_write_execute(unit, context);
3728 if (r < 0) {
3729 *exit_status = EXIT_SECCOMP;
12145637 3730 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3731 }
f4170c67 3732
469830d1
LP
3733 r = apply_restrict_realtime(unit, context);
3734 if (r < 0) {
3735 *exit_status = EXIT_SECCOMP;
12145637 3736 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3737 }
3738
f69567cb
LP
3739 r = apply_restrict_suid_sgid(unit, context);
3740 if (r < 0) {
3741 *exit_status = EXIT_SECCOMP;
3742 return log_unit_error_errno(unit, r, "Failed to apply SUID/SGID restrictions: %m");
3743 }
3744
add00535
LP
3745 r = apply_restrict_namespaces(unit, context);
3746 if (r < 0) {
3747 *exit_status = EXIT_SECCOMP;
12145637 3748 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3749 }
3750
469830d1
LP
3751 r = apply_protect_sysctl(unit, context);
3752 if (r < 0) {
3753 *exit_status = EXIT_SECCOMP;
12145637 3754 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3755 }
3756
469830d1
LP
3757 r = apply_protect_kernel_modules(unit, context);
3758 if (r < 0) {
3759 *exit_status = EXIT_SECCOMP;
12145637 3760 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3761 }
3762
84703040
KK
3763 r = apply_protect_kernel_logs(unit, context);
3764 if (r < 0) {
3765 *exit_status = EXIT_SECCOMP;
3766 return log_unit_error_errno(unit, r, "Failed to apply kernel log restrictions: %m");
3767 }
3768
469830d1
LP
3769 r = apply_private_devices(unit, context);
3770 if (r < 0) {
3771 *exit_status = EXIT_SECCOMP;
12145637 3772 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3773 }
3774
3775 r = apply_syscall_archs(unit, context);
3776 if (r < 0) {
3777 *exit_status = EXIT_SECCOMP;
12145637 3778 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3779 }
3780
78e864e5
TM
3781 r = apply_lock_personality(unit, context);
3782 if (r < 0) {
3783 *exit_status = EXIT_SECCOMP;
12145637 3784 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3785 }
3786
5cd9cd35
LP
3787 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3788 * by the filter as little as possible. */
165a31c0 3789 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3790 if (r < 0) {
3791 *exit_status = EXIT_SECCOMP;
12145637 3792 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3793 }
3794#endif
d35fbf6b 3795 }
034c6ed7 3796
00819cc1
LP
3797 if (!strv_isempty(context->unset_environment)) {
3798 char **ee = NULL;
3799
3800 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3801 if (!ee) {
3802 *exit_status = EXIT_MEMORY;
12145637 3803 return log_oom();
00819cc1
LP
3804 }
3805
130d3d22 3806 strv_free_and_replace(accum_env, ee);
00819cc1
LP
3807 }
3808
7ca69792
AZ
3809 if (!FLAGS_SET(command->flags, EXEC_COMMAND_NO_ENV_EXPAND)) {
3810 replaced_argv = replace_env_argv(command->argv, accum_env);
3811 if (!replaced_argv) {
3812 *exit_status = EXIT_MEMORY;
3813 return log_oom();
3814 }
3815 final_argv = replaced_argv;
3816 } else
3817 final_argv = command->argv;
034c6ed7 3818
f1d34068 3819 if (DEBUG_LOGGING) {
d35fbf6b 3820 _cleanup_free_ char *line;
81a2b7ce 3821
d35fbf6b 3822 line = exec_command_line(final_argv);
a1230ff9 3823 if (line)
f2341e0a 3824 log_struct(LOG_DEBUG,
f2341e0a
LP
3825 "EXECUTABLE=%s", command->path,
3826 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3827 LOG_UNIT_ID(unit),
a1230ff9 3828 LOG_UNIT_INVOCATION_ID(unit));
d35fbf6b 3829 }
dd305ec9 3830
5686391b
LP
3831 if (exec_fd >= 0) {
3832 uint8_t hot = 1;
3833
3834 /* We have finished with all our initializations. Let's now let the manager know that. From this point
3835 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
3836
3837 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3838 *exit_status = EXIT_EXEC;
3839 return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
3840 }
3841 }
3842
2065ca69 3843 execve(command->path, final_argv, accum_env);
5686391b
LP
3844 r = -errno;
3845
3846 if (exec_fd >= 0) {
3847 uint8_t hot = 0;
3848
3849 /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
3850 * that POLLHUP on it no longer means execve() succeeded. */
3851
3852 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3853 *exit_status = EXIT_EXEC;
3854 return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
3855 }
3856 }
12145637 3857
5686391b
LP
3858 if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3859 log_struct_errno(LOG_INFO, r,
12145637
LP
3860 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3861 LOG_UNIT_ID(unit),
3862 LOG_UNIT_INVOCATION_ID(unit),
3863 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3864 command->path),
a1230ff9 3865 "EXECUTABLE=%s", command->path);
12145637
LP
3866 return 0;
3867 }
3868
ff0af2a1 3869 *exit_status = EXIT_EXEC;
5686391b 3870 return log_unit_error_errno(unit, r, "Failed to execute command: %m");
d35fbf6b 3871}
81a2b7ce 3872
34cf6c43 3873static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
2caa38e9 3874static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[static 3]);
34cf6c43 3875
f2341e0a
LP
3876int exec_spawn(Unit *unit,
3877 ExecCommand *command,
d35fbf6b
DM
3878 const ExecContext *context,
3879 const ExecParameters *params,
3880 ExecRuntime *runtime,
29206d46 3881 DynamicCreds *dcreds,
d35fbf6b 3882 pid_t *ret) {
8351ceae 3883
ee39ca20 3884 int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
78f93209 3885 _cleanup_free_ char *subcgroup_path = NULL;
d35fbf6b 3886 _cleanup_strv_free_ char **files_env = NULL;
da6053d0 3887 size_t n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1 3888 _cleanup_free_ char *line = NULL;
d35fbf6b 3889 pid_t pid;
8351ceae 3890
f2341e0a 3891 assert(unit);
d35fbf6b
DM
3892 assert(command);
3893 assert(context);
3894 assert(ret);
3895 assert(params);
25b583d7 3896 assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
4298d0b5 3897
d35fbf6b
DM
3898 if (context->std_input == EXEC_INPUT_SOCKET ||
3899 context->std_output == EXEC_OUTPUT_SOCKET ||
3900 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3901
4c47affc 3902 if (params->n_socket_fds > 1) {
f2341e0a 3903 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3904 return -EINVAL;
ff0af2a1 3905 }
eef65bf3 3906
4c47affc 3907 if (params->n_socket_fds == 0) {
488ab41c
AA
3908 log_unit_error(unit, "Got no socket.");
3909 return -EINVAL;
3910 }
3911
d35fbf6b
DM
3912 socket_fd = params->fds[0];
3913 } else {
3914 socket_fd = -1;
3915 fds = params->fds;
9b141911 3916 n_socket_fds = params->n_socket_fds;
25b583d7 3917 n_storage_fds = params->n_storage_fds;
d35fbf6b 3918 }
94f04347 3919
34cf6c43 3920 r = exec_context_named_iofds(context, params, named_iofds);
52c239d7
LB
3921 if (r < 0)
3922 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3923
f2341e0a 3924 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3925 if (r < 0)
f2341e0a 3926 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3927
ee39ca20 3928 line = exec_command_line(command->argv);
d35fbf6b
DM
3929 if (!line)
3930 return log_oom();
fab56fc5 3931
f2341e0a 3932 log_struct(LOG_DEBUG,
f2341e0a
LP
3933 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3934 "EXECUTABLE=%s", command->path,
ba360bb0 3935 LOG_UNIT_ID(unit),
a1230ff9 3936 LOG_UNIT_INVOCATION_ID(unit));
12145637 3937
78f93209
LP
3938 if (params->cgroup_path) {
3939 r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
3940 if (r < 0)
3941 return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
3942 if (r > 0) { /* We are using a child cgroup */
3943 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
3944 if (r < 0)
3945 return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
3946 }
3947 }
3948
d35fbf6b
DM
3949 pid = fork();
3950 if (pid < 0)
74129a12 3951 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3952
3953 if (pid == 0) {
12145637 3954 int exit_status = EXIT_SUCCESS;
ff0af2a1 3955
f2341e0a
LP
3956 r = exec_child(unit,
3957 command,
ff0af2a1
LP
3958 context,
3959 params,
3960 runtime,
29206d46 3961 dcreds,
ff0af2a1 3962 socket_fd,
52c239d7 3963 named_iofds,
4c47affc 3964 fds,
9b141911 3965 n_socket_fds,
25b583d7 3966 n_storage_fds,
ff0af2a1 3967 files_env,
00d9ef85 3968 unit->manager->user_lookup_fds[1],
12145637
LP
3969 &exit_status);
3970
e1714f02
ZJS
3971 if (r < 0) {
3972 const char *status =
3973 exit_status_to_string(exit_status,
e04ed6db 3974 EXIT_STATUS_LIBC | EXIT_STATUS_SYSTEMD);
e1714f02 3975
12145637
LP
3976 log_struct_errno(LOG_ERR, r,
3977 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3978 LOG_UNIT_ID(unit),
3979 LOG_UNIT_INVOCATION_ID(unit),
3980 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
e1714f02 3981 status, command->path),
a1230ff9 3982 "EXECUTABLE=%s", command->path);
e1714f02 3983 }
4c2630eb 3984
ff0af2a1 3985 _exit(exit_status);
034c6ed7
LP
3986 }
3987
f2341e0a 3988 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3989
78f93209
LP
3990 /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
3991 * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
3992 * process will be killed too). */
3993 if (subcgroup_path)
3994 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
2da3263a 3995
b58b4116 3996 exec_status_start(&command->exec_status, pid);
9fb86720 3997
034c6ed7 3998 *ret = pid;
5cb5a6ff
LP
3999 return 0;
4000}
4001
034c6ed7 4002void exec_context_init(ExecContext *c) {
3536f49e
YW
4003 ExecDirectoryType i;
4004
034c6ed7
LP
4005 assert(c);
4006
4c12626c 4007 c->umask = 0022;
9eba9da4 4008 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 4009 c->cpu_sched_policy = SCHED_OTHER;
071830ff 4010 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 4011 c->syslog_level_prefix = true;
353e12c2 4012 c->ignore_sigpipe = true;
3a43da28 4013 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 4014 c->personality = PERSONALITY_INVALID;
72fd1768 4015 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 4016 c->directories[i].mode = 0755;
12213aed 4017 c->timeout_clean_usec = USEC_INFINITY;
a103496c 4018 c->capability_bounding_set = CAP_ALL;
aa9d574d
YW
4019 assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
4020 c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
d3070fbd 4021 c->log_level_max = -1;
b070c7c0 4022 numa_policy_reset(&c->numa_policy);
034c6ed7
LP
4023}
4024
613b411c 4025void exec_context_done(ExecContext *c) {
3536f49e 4026 ExecDirectoryType i;
d3070fbd 4027 size_t l;
5cb5a6ff
LP
4028
4029 assert(c);
4030
6796073e
LP
4031 c->environment = strv_free(c->environment);
4032 c->environment_files = strv_free(c->environment_files);
b4c14404 4033 c->pass_environment = strv_free(c->pass_environment);
00819cc1 4034 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 4035
31ce987c 4036 rlimit_free_all(c->rlimit);
034c6ed7 4037
2038c3f5 4038 for (l = 0; l < 3; l++) {
52c239d7 4039 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
2038c3f5
LP
4040 c->stdio_file[l] = mfree(c->stdio_file[l]);
4041 }
52c239d7 4042
a1e58e8e
LP
4043 c->working_directory = mfree(c->working_directory);
4044 c->root_directory = mfree(c->root_directory);
915e6d16 4045 c->root_image = mfree(c->root_image);
a1e58e8e
LP
4046 c->tty_path = mfree(c->tty_path);
4047 c->syslog_identifier = mfree(c->syslog_identifier);
4048 c->user = mfree(c->user);
4049 c->group = mfree(c->group);
034c6ed7 4050
6796073e 4051 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 4052
a1e58e8e 4053 c->pam_name = mfree(c->pam_name);
5b6319dc 4054
2a624c36
AP
4055 c->read_only_paths = strv_free(c->read_only_paths);
4056 c->read_write_paths = strv_free(c->read_write_paths);
4057 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 4058
d2d6c096 4059 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
8e06d57c
YW
4060 c->bind_mounts = NULL;
4061 c->n_bind_mounts = 0;
2abd4e38
YW
4062 temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
4063 c->temporary_filesystems = NULL;
4064 c->n_temporary_filesystems = 0;
d2d6c096 4065
0985c7c4 4066 cpu_set_reset(&c->cpu_set);
b070c7c0 4067 numa_policy_reset(&c->numa_policy);
86a3475b 4068
a1e58e8e
LP
4069 c->utmp_id = mfree(c->utmp_id);
4070 c->selinux_context = mfree(c->selinux_context);
4071 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 4072 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 4073
8cfa775f 4074 c->syscall_filter = hashmap_free(c->syscall_filter);
525d3cc7
LP
4075 c->syscall_archs = set_free(c->syscall_archs);
4076 c->address_families = set_free(c->address_families);
e66cf1a3 4077
72fd1768 4078 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 4079 c->directories[i].paths = strv_free(c->directories[i].paths);
d3070fbd
LP
4080
4081 c->log_level_max = -1;
4082
4083 exec_context_free_log_extra_fields(c);
08f3be7a 4084
5ac1530e
ZJS
4085 c->log_ratelimit_interval_usec = 0;
4086 c->log_ratelimit_burst = 0;
90fc172e 4087
08f3be7a
LP
4088 c->stdin_data = mfree(c->stdin_data);
4089 c->stdin_data_size = 0;
a8d08f39
LP
4090
4091 c->network_namespace_path = mfree(c->network_namespace_path);
e66cf1a3
LP
4092}
4093
34cf6c43 4094int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
e66cf1a3
LP
4095 char **i;
4096
4097 assert(c);
4098
4099 if (!runtime_prefix)
4100 return 0;
4101
3536f49e 4102 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
4103 _cleanup_free_ char *p;
4104
494d0247
YW
4105 if (exec_directory_is_private(c, EXEC_DIRECTORY_RUNTIME))
4106 p = path_join(runtime_prefix, "private", *i);
4107 else
4108 p = path_join(runtime_prefix, *i);
e66cf1a3
LP
4109 if (!p)
4110 return -ENOMEM;
4111
7bc4bf4a
LP
4112 /* We execute this synchronously, since we need to be sure this is gone when we start the
4113 * service next. */
c6878637 4114 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
4115 }
4116
4117 return 0;
5cb5a6ff
LP
4118}
4119
34cf6c43 4120static void exec_command_done(ExecCommand *c) {
43d0fcbd
LP
4121 assert(c);
4122
a1e58e8e 4123 c->path = mfree(c->path);
6796073e 4124 c->argv = strv_free(c->argv);
43d0fcbd
LP
4125}
4126
da6053d0
LP
4127void exec_command_done_array(ExecCommand *c, size_t n) {
4128 size_t i;
43d0fcbd
LP
4129
4130 for (i = 0; i < n; i++)
4131 exec_command_done(c+i);
4132}
4133
f1acf85a 4134ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
4135 ExecCommand *i;
4136
4137 while ((i = c)) {
71fda00f 4138 LIST_REMOVE(command, c, i);
43d0fcbd 4139 exec_command_done(i);
5cb5a6ff
LP
4140 free(i);
4141 }
f1acf85a
ZJS
4142
4143 return NULL;
5cb5a6ff
LP
4144}
4145
da6053d0
LP
4146void exec_command_free_array(ExecCommand **c, size_t n) {
4147 size_t i;
034c6ed7 4148
f1acf85a
ZJS
4149 for (i = 0; i < n; i++)
4150 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
4151}
4152
6a1d4d9f
LP
4153void exec_command_reset_status_array(ExecCommand *c, size_t n) {
4154 size_t i;
4155
4156 for (i = 0; i < n; i++)
4157 exec_status_reset(&c[i].exec_status);
4158}
4159
4160void exec_command_reset_status_list_array(ExecCommand **c, size_t n) {
4161 size_t i;
4162
4163 for (i = 0; i < n; i++) {
4164 ExecCommand *z;
4165
4166 LIST_FOREACH(command, z, c[i])
4167 exec_status_reset(&z->exec_status);
4168 }
4169}
4170
039f0e70 4171typedef struct InvalidEnvInfo {
34cf6c43 4172 const Unit *unit;
039f0e70
LP
4173 const char *path;
4174} InvalidEnvInfo;
4175
4176static void invalid_env(const char *p, void *userdata) {
4177 InvalidEnvInfo *info = userdata;
4178
f2341e0a 4179 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
4180}
4181
52c239d7
LB
4182const char* exec_context_fdname(const ExecContext *c, int fd_index) {
4183 assert(c);
4184
4185 switch (fd_index) {
5073ff6b 4186
52c239d7
LB
4187 case STDIN_FILENO:
4188 if (c->std_input != EXEC_INPUT_NAMED_FD)
4189 return NULL;
5073ff6b 4190
52c239d7 4191 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
5073ff6b 4192
52c239d7
LB
4193 case STDOUT_FILENO:
4194 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
4195 return NULL;
5073ff6b 4196
52c239d7 4197 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
5073ff6b 4198
52c239d7
LB
4199 case STDERR_FILENO:
4200 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
4201 return NULL;
5073ff6b 4202
52c239d7 4203 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
5073ff6b 4204
52c239d7
LB
4205 default:
4206 return NULL;
4207 }
4208}
4209
2caa38e9
LP
4210static int exec_context_named_iofds(
4211 const ExecContext *c,
4212 const ExecParameters *p,
4213 int named_iofds[static 3]) {
4214
da6053d0 4215 size_t i, targets;
56fbd561 4216 const char* stdio_fdname[3];
da6053d0 4217 size_t n_fds;
52c239d7
LB
4218
4219 assert(c);
4220 assert(p);
2caa38e9 4221 assert(named_iofds);
52c239d7
LB
4222
4223 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
4224 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
4225 (c->std_error == EXEC_OUTPUT_NAMED_FD);
4226
4227 for (i = 0; i < 3; i++)
4228 stdio_fdname[i] = exec_context_fdname(c, i);
4229
4c47affc
FB
4230 n_fds = p->n_storage_fds + p->n_socket_fds;
4231
4232 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
4233 if (named_iofds[STDIN_FILENO] < 0 &&
4234 c->std_input == EXEC_INPUT_NAMED_FD &&
4235 stdio_fdname[STDIN_FILENO] &&
4236 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
4237
52c239d7
LB
4238 named_iofds[STDIN_FILENO] = p->fds[i];
4239 targets--;
56fbd561
ZJS
4240
4241 } else if (named_iofds[STDOUT_FILENO] < 0 &&
4242 c->std_output == EXEC_OUTPUT_NAMED_FD &&
4243 stdio_fdname[STDOUT_FILENO] &&
4244 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
4245
52c239d7
LB
4246 named_iofds[STDOUT_FILENO] = p->fds[i];
4247 targets--;
56fbd561
ZJS
4248
4249 } else if (named_iofds[STDERR_FILENO] < 0 &&
4250 c->std_error == EXEC_OUTPUT_NAMED_FD &&
4251 stdio_fdname[STDERR_FILENO] &&
4252 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
4253
52c239d7
LB
4254 named_iofds[STDERR_FILENO] = p->fds[i];
4255 targets--;
4256 }
4257
56fbd561 4258 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
4259}
4260
34cf6c43 4261static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
4262 char **i, **r = NULL;
4263
4264 assert(c);
4265 assert(l);
4266
4267 STRV_FOREACH(i, c->environment_files) {
4268 char *fn;
52511fae
ZJS
4269 int k;
4270 unsigned n;
8c7be95e
LP
4271 bool ignore = false;
4272 char **p;
7fd1b19b 4273 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
4274
4275 fn = *i;
4276
4277 if (fn[0] == '-') {
4278 ignore = true;
313cefa1 4279 fn++;
8c7be95e
LP
4280 }
4281
4282 if (!path_is_absolute(fn)) {
8c7be95e
LP
4283 if (ignore)
4284 continue;
4285
4286 strv_free(r);
4287 return -EINVAL;
4288 }
4289
2bef10ab 4290 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
4291 k = safe_glob(fn, 0, &pglob);
4292 if (k < 0) {
2bef10ab
PL
4293 if (ignore)
4294 continue;
8c7be95e 4295
2bef10ab 4296 strv_free(r);
d8c92e8b 4297 return k;
2bef10ab 4298 }
8c7be95e 4299
d8c92e8b
ZJS
4300 /* When we don't match anything, -ENOENT should be returned */
4301 assert(pglob.gl_pathc > 0);
4302
4303 for (n = 0; n < pglob.gl_pathc; n++) {
aa8fbc74 4304 k = load_env_file(NULL, pglob.gl_pathv[n], &p);
2bef10ab
PL
4305 if (k < 0) {
4306 if (ignore)
4307 continue;
8c7be95e 4308
2bef10ab 4309 strv_free(r);
2bef10ab 4310 return k;
e9c1ea9d 4311 }
ebc05a09 4312 /* Log invalid environment variables with filename */
039f0e70
LP
4313 if (p) {
4314 InvalidEnvInfo info = {
f2341e0a 4315 .unit = unit,
039f0e70
LP
4316 .path = pglob.gl_pathv[n]
4317 };
4318
4319 p = strv_env_clean_with_callback(p, invalid_env, &info);
4320 }
8c7be95e 4321
234519ae 4322 if (!r)
2bef10ab
PL
4323 r = p;
4324 else {
4325 char **m;
8c7be95e 4326
2bef10ab
PL
4327 m = strv_env_merge(2, r, p);
4328 strv_free(r);
4329 strv_free(p);
c84a9488 4330 if (!m)
2bef10ab 4331 return -ENOMEM;
2bef10ab
PL
4332
4333 r = m;
4334 }
8c7be95e
LP
4335 }
4336 }
4337
4338 *l = r;
4339
4340 return 0;
4341}
4342
6ac8fdc9 4343static bool tty_may_match_dev_console(const char *tty) {
7b912648 4344 _cleanup_free_ char *resolved = NULL;
6ac8fdc9 4345
1e22b5cd
LP
4346 if (!tty)
4347 return true;
4348
a119ec7c 4349 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
4350
4351 /* trivial identity? */
4352 if (streq(tty, "console"))
4353 return true;
4354
7b912648
LP
4355 if (resolve_dev_console(&resolved) < 0)
4356 return true; /* if we could not resolve, assume it may */
6ac8fdc9
MS
4357
4358 /* "tty0" means the active VC, so it may be the same sometimes */
955f1c85 4359 return path_equal(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
4360}
4361
6c0ae739
LP
4362static bool exec_context_may_touch_tty(const ExecContext *ec) {
4363 assert(ec);
1e22b5cd 4364
6c0ae739 4365 return ec->tty_reset ||
1e22b5cd
LP
4366 ec->tty_vhangup ||
4367 ec->tty_vt_disallocate ||
6ac8fdc9
MS
4368 is_terminal_input(ec->std_input) ||
4369 is_terminal_output(ec->std_output) ||
6c0ae739
LP
4370 is_terminal_output(ec->std_error);
4371}
4372
4373bool exec_context_may_touch_console(const ExecContext *ec) {
4374
4375 return exec_context_may_touch_tty(ec) &&
1e22b5cd 4376 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
4377}
4378
15ae422b
LP
4379static void strv_fprintf(FILE *f, char **l) {
4380 char **g;
4381
4382 assert(f);
4383
4384 STRV_FOREACH(g, l)
4385 fprintf(f, " %s", *g);
4386}
4387
34cf6c43 4388void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
12213aed 4389 char **e, **d, buf_clean[FORMAT_TIMESPAN_MAX];
d3070fbd 4390 ExecDirectoryType dt;
94f04347 4391 unsigned i;
add00535 4392 int r;
9eba9da4 4393
5cb5a6ff
LP
4394 assert(c);
4395 assert(f);
4396
4ad49000 4397 prefix = strempty(prefix);
5cb5a6ff
LP
4398
4399 fprintf(f,
94f04347
LP
4400 "%sUMask: %04o\n"
4401 "%sWorkingDirectory: %s\n"
451a074f 4402 "%sRootDirectory: %s\n"
15ae422b 4403 "%sNonBlocking: %s\n"
64747e2d 4404 "%sPrivateTmp: %s\n"
7f112f50 4405 "%sPrivateDevices: %s\n"
59eeb84b 4406 "%sProtectKernelTunables: %s\n"
e66a2f65 4407 "%sProtectKernelModules: %s\n"
84703040 4408 "%sProtectKernelLogs: %s\n"
59eeb84b 4409 "%sProtectControlGroups: %s\n"
d251207d
LP
4410 "%sPrivateNetwork: %s\n"
4411 "%sPrivateUsers: %s\n"
1b8689f9
LP
4412 "%sProtectHome: %s\n"
4413 "%sProtectSystem: %s\n"
5d997827 4414 "%sMountAPIVFS: %s\n"
f3e43635 4415 "%sIgnoreSIGPIPE: %s\n"
f4170c67 4416 "%sMemoryDenyWriteExecute: %s\n"
b1edf445 4417 "%sRestrictRealtime: %s\n"
f69567cb 4418 "%sRestrictSUIDSGID: %s\n"
aecd5ac6
TM
4419 "%sKeyringMode: %s\n"
4420 "%sProtectHostname: %s\n",
5cb5a6ff 4421 prefix, c->umask,
9eba9da4 4422 prefix, c->working_directory ? c->working_directory : "/",
451a074f 4423 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 4424 prefix, yes_no(c->non_blocking),
64747e2d 4425 prefix, yes_no(c->private_tmp),
7f112f50 4426 prefix, yes_no(c->private_devices),
59eeb84b 4427 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 4428 prefix, yes_no(c->protect_kernel_modules),
84703040 4429 prefix, yes_no(c->protect_kernel_logs),
59eeb84b 4430 prefix, yes_no(c->protect_control_groups),
d251207d
LP
4431 prefix, yes_no(c->private_network),
4432 prefix, yes_no(c->private_users),
1b8689f9
LP
4433 prefix, protect_home_to_string(c->protect_home),
4434 prefix, protect_system_to_string(c->protect_system),
5d997827 4435 prefix, yes_no(c->mount_apivfs),
f3e43635 4436 prefix, yes_no(c->ignore_sigpipe),
f4170c67 4437 prefix, yes_no(c->memory_deny_write_execute),
b1edf445 4438 prefix, yes_no(c->restrict_realtime),
f69567cb 4439 prefix, yes_no(c->restrict_suid_sgid),
aecd5ac6
TM
4440 prefix, exec_keyring_mode_to_string(c->keyring_mode),
4441 prefix, yes_no(c->protect_hostname));
fb33a393 4442
915e6d16
LP
4443 if (c->root_image)
4444 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
4445
8c7be95e
LP
4446 STRV_FOREACH(e, c->environment)
4447 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
4448
4449 STRV_FOREACH(e, c->environment_files)
4450 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 4451
b4c14404
FB
4452 STRV_FOREACH(e, c->pass_environment)
4453 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
4454
00819cc1
LP
4455 STRV_FOREACH(e, c->unset_environment)
4456 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
4457
53f47dfc
YW
4458 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
4459
72fd1768 4460 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
4461 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
4462
4463 STRV_FOREACH(d, c->directories[dt].paths)
4464 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
4465 }
c2bbd90b 4466
12213aed
YW
4467 fprintf(f,
4468 "%sTimeoutCleanSec: %s\n",
4469 prefix, format_timespan(buf_clean, sizeof(buf_clean), c->timeout_clean_usec, USEC_PER_SEC));
4470
fb33a393
LP
4471 if (c->nice_set)
4472 fprintf(f,
4473 "%sNice: %i\n",
4474 prefix, c->nice);
4475
dd6c17b1 4476 if (c->oom_score_adjust_set)
fb33a393 4477 fprintf(f,
dd6c17b1
LP
4478 "%sOOMScoreAdjust: %i\n",
4479 prefix, c->oom_score_adjust);
9eba9da4 4480
94f04347 4481 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d 4482 if (c->rlimit[i]) {
4c3a2b84 4483 fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
3c11da9d 4484 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
4c3a2b84 4485 fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n",
3c11da9d
EV
4486 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
4487 }
94f04347 4488
f8b69d1d 4489 if (c->ioprio_set) {
1756a011 4490 _cleanup_free_ char *class_str = NULL;
f8b69d1d 4491
837df140
YW
4492 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
4493 if (r >= 0)
4494 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
4495
4496 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 4497 }
94f04347 4498
f8b69d1d 4499 if (c->cpu_sched_set) {
1756a011 4500 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 4501
837df140
YW
4502 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4503 if (r >= 0)
4504 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4505
94f04347 4506 fprintf(f,
38b48754
LP
4507 "%sCPUSchedulingPriority: %i\n"
4508 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
4509 prefix, c->cpu_sched_priority,
4510 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 4511 }
94f04347 4512
0985c7c4 4513 if (c->cpu_set.set) {
e7fca352
MS
4514 _cleanup_free_ char *affinity = NULL;
4515
4516 affinity = cpu_set_to_range_string(&c->cpu_set);
4517 fprintf(f, "%sCPUAffinity: %s\n", prefix, affinity);
94f04347
LP
4518 }
4519
b070c7c0
MS
4520 if (mpol_is_valid(numa_policy_get_type(&c->numa_policy))) {
4521 _cleanup_free_ char *nodes = NULL;
4522
4523 nodes = cpu_set_to_range_string(&c->numa_policy.nodes);
4524 fprintf(f, "%sNUMAPolicy: %s\n", prefix, mpol_to_string(numa_policy_get_type(&c->numa_policy)));
4525 fprintf(f, "%sNUMAMask: %s\n", prefix, strnull(nodes));
4526 }
4527
3a43da28 4528 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 4529 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
4530
4531 fprintf(f,
80876c20
LP
4532 "%sStandardInput: %s\n"
4533 "%sStandardOutput: %s\n"
4534 "%sStandardError: %s\n",
4535 prefix, exec_input_to_string(c->std_input),
4536 prefix, exec_output_to_string(c->std_output),
4537 prefix, exec_output_to_string(c->std_error));
4538
befc4a80
LP
4539 if (c->std_input == EXEC_INPUT_NAMED_FD)
4540 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4541 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4542 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4543 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4544 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4545
4546 if (c->std_input == EXEC_INPUT_FILE)
4547 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4548 if (c->std_output == EXEC_OUTPUT_FILE)
4549 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
566b7d23
ZD
4550 if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
4551 fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
befc4a80
LP
4552 if (c->std_error == EXEC_OUTPUT_FILE)
4553 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
566b7d23
ZD
4554 if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
4555 fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
befc4a80 4556
80876c20
LP
4557 if (c->tty_path)
4558 fprintf(f,
6ea832a2
LP
4559 "%sTTYPath: %s\n"
4560 "%sTTYReset: %s\n"
4561 "%sTTYVHangup: %s\n"
4562 "%sTTYVTDisallocate: %s\n",
4563 prefix, c->tty_path,
4564 prefix, yes_no(c->tty_reset),
4565 prefix, yes_no(c->tty_vhangup),
4566 prefix, yes_no(c->tty_vt_disallocate));
94f04347 4567
9f6444eb
LP
4568 if (IN_SET(c->std_output,
4569 EXEC_OUTPUT_SYSLOG,
4570 EXEC_OUTPUT_KMSG,
4571 EXEC_OUTPUT_JOURNAL,
4572 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4573 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4574 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4575 IN_SET(c->std_error,
4576 EXEC_OUTPUT_SYSLOG,
4577 EXEC_OUTPUT_KMSG,
4578 EXEC_OUTPUT_JOURNAL,
4579 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4580 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4581 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 4582
5ce70e5b 4583 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 4584
837df140
YW
4585 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4586 if (r >= 0)
4587 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 4588
837df140
YW
4589 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4590 if (r >= 0)
4591 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 4592 }
94f04347 4593
d3070fbd
LP
4594 if (c->log_level_max >= 0) {
4595 _cleanup_free_ char *t = NULL;
4596
4597 (void) log_level_to_string_alloc(c->log_level_max, &t);
4598
4599 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4600 }
4601
5ac1530e 4602 if (c->log_ratelimit_interval_usec > 0) {
90fc172e
AZ
4603 char buf_timespan[FORMAT_TIMESPAN_MAX];
4604
4605 fprintf(f,
4606 "%sLogRateLimitIntervalSec: %s\n",
5ac1530e 4607 prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_ratelimit_interval_usec, USEC_PER_SEC));
90fc172e
AZ
4608 }
4609
5ac1530e
ZJS
4610 if (c->log_ratelimit_burst > 0)
4611 fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_ratelimit_burst);
90fc172e 4612
d3070fbd
LP
4613 if (c->n_log_extra_fields > 0) {
4614 size_t j;
4615
4616 for (j = 0; j < c->n_log_extra_fields; j++) {
4617 fprintf(f, "%sLogExtraFields: ", prefix);
4618 fwrite(c->log_extra_fields[j].iov_base,
4619 1, c->log_extra_fields[j].iov_len,
4620 f);
4621 fputc('\n', f);
4622 }
4623 }
4624
07d46372
YW
4625 if (c->secure_bits) {
4626 _cleanup_free_ char *str = NULL;
4627
4628 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4629 if (r >= 0)
4630 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4631 }
94f04347 4632
a103496c 4633 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4634 _cleanup_free_ char *str = NULL;
94f04347 4635
dd1f5bd0
YW
4636 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4637 if (r >= 0)
4638 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4639 }
4640
4641 if (c->capability_ambient_set != 0) {
dd1f5bd0 4642 _cleanup_free_ char *str = NULL;
755d4b67 4643
dd1f5bd0
YW
4644 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4645 if (r >= 0)
4646 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4647 }
4648
4649 if (c->user)
f2d3769a 4650 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4651 if (c->group)
f2d3769a 4652 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4653
29206d46
LP
4654 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4655
ac6e8be6 4656 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4657 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4658 strv_fprintf(f, c->supplementary_groups);
4659 fputs("\n", f);
4660 }
94f04347 4661
5b6319dc 4662 if (c->pam_name)
f2d3769a 4663 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4664
58629001 4665 if (!strv_isempty(c->read_write_paths)) {
2a624c36
AP
4666 fprintf(f, "%sReadWritePaths:", prefix);
4667 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4668 fputs("\n", f);
4669 }
4670
58629001 4671 if (!strv_isempty(c->read_only_paths)) {
2a624c36
AP
4672 fprintf(f, "%sReadOnlyPaths:", prefix);
4673 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4674 fputs("\n", f);
4675 }
94f04347 4676
58629001 4677 if (!strv_isempty(c->inaccessible_paths)) {
2a624c36
AP
4678 fprintf(f, "%sInaccessiblePaths:", prefix);
4679 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4680 fputs("\n", f);
4681 }
2e22afe9 4682
d2d6c096 4683 if (c->n_bind_mounts > 0)
4ca763a9
YW
4684 for (i = 0; i < c->n_bind_mounts; i++)
4685 fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
d2d6c096 4686 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4ca763a9 4687 c->bind_mounts[i].ignore_enoent ? "-": "",
d2d6c096
LP
4688 c->bind_mounts[i].source,
4689 c->bind_mounts[i].destination,
4690 c->bind_mounts[i].recursive ? "rbind" : "norbind");
d2d6c096 4691
2abd4e38
YW
4692 if (c->n_temporary_filesystems > 0)
4693 for (i = 0; i < c->n_temporary_filesystems; i++) {
4694 TemporaryFileSystem *t = c->temporary_filesystems + i;
4695
4696 fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
4697 t->path,
4698 isempty(t->options) ? "" : ":",
4699 strempty(t->options));
4700 }
4701
169c1bda
LP
4702 if (c->utmp_id)
4703 fprintf(f,
4704 "%sUtmpIdentifier: %s\n",
4705 prefix, c->utmp_id);
7b52a628
MS
4706
4707 if (c->selinux_context)
4708 fprintf(f,
5f8640fb
LP
4709 "%sSELinuxContext: %s%s\n",
4710 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4711
80c21aea
WC
4712 if (c->apparmor_profile)
4713 fprintf(f,
4714 "%sAppArmorProfile: %s%s\n",
4715 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4716
4717 if (c->smack_process_label)
4718 fprintf(f,
4719 "%sSmackProcessLabel: %s%s\n",
4720 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4721
050f7277 4722 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4723 fprintf(f,
4724 "%sPersonality: %s\n",
4725 prefix, strna(personality_to_string(c->personality)));
4726
78e864e5
TM
4727 fprintf(f,
4728 "%sLockPersonality: %s\n",
4729 prefix, yes_no(c->lock_personality));
4730
17df7223 4731 if (c->syscall_filter) {
349cc4a5 4732#if HAVE_SECCOMP
17df7223 4733 Iterator j;
8cfa775f 4734 void *id, *val;
17df7223 4735 bool first = true;
351a19b1 4736#endif
17df7223
LP
4737
4738 fprintf(f,
57183d11 4739 "%sSystemCallFilter: ",
17df7223
LP
4740 prefix);
4741
4742 if (!c->syscall_whitelist)
4743 fputc('~', f);
4744
349cc4a5 4745#if HAVE_SECCOMP
8cfa775f 4746 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
17df7223 4747 _cleanup_free_ char *name = NULL;
8cfa775f
YW
4748 const char *errno_name = NULL;
4749 int num = PTR_TO_INT(val);
17df7223
LP
4750
4751 if (first)
4752 first = false;
4753 else
4754 fputc(' ', f);
4755
57183d11 4756 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223 4757 fputs(strna(name), f);
8cfa775f
YW
4758
4759 if (num >= 0) {
4760 errno_name = errno_to_name(num);
4761 if (errno_name)
4762 fprintf(f, ":%s", errno_name);
4763 else
4764 fprintf(f, ":%d", num);
4765 }
17df7223 4766 }
351a19b1 4767#endif
17df7223
LP
4768
4769 fputc('\n', f);
4770 }
4771
57183d11 4772 if (c->syscall_archs) {
349cc4a5 4773#if HAVE_SECCOMP
57183d11
LP
4774 Iterator j;
4775 void *id;
4776#endif
4777
4778 fprintf(f,
4779 "%sSystemCallArchitectures:",
4780 prefix);
4781
349cc4a5 4782#if HAVE_SECCOMP
57183d11
LP
4783 SET_FOREACH(id, c->syscall_archs, j)
4784 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4785#endif
4786 fputc('\n', f);
4787 }
4788
add00535
LP
4789 if (exec_context_restrict_namespaces_set(c)) {
4790 _cleanup_free_ char *s = NULL;
4791
86c2a9f1 4792 r = namespace_flags_to_string(c->restrict_namespaces, &s);
add00535
LP
4793 if (r >= 0)
4794 fprintf(f, "%sRestrictNamespaces: %s\n",
4795 prefix, s);
4796 }
4797
a8d08f39
LP
4798 if (c->network_namespace_path)
4799 fprintf(f,
4800 "%sNetworkNamespacePath: %s\n",
4801 prefix, c->network_namespace_path);
4802
3df90f24
YW
4803 if (c->syscall_errno > 0) {
4804 const char *errno_name;
4805
4806 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4807
4808 errno_name = errno_to_name(c->syscall_errno);
4809 if (errno_name)
4810 fprintf(f, "%s\n", errno_name);
4811 else
4812 fprintf(f, "%d\n", c->syscall_errno);
4813 }
5cb5a6ff
LP
4814}
4815
34cf6c43 4816bool exec_context_maintains_privileges(const ExecContext *c) {
a931ad47
LP
4817 assert(c);
4818
61233823 4819 /* Returns true if the process forked off would run under
a931ad47
LP
4820 * an unchanged UID or as root. */
4821
4822 if (!c->user)
4823 return true;
4824
4825 if (streq(c->user, "root") || streq(c->user, "0"))
4826 return true;
4827
4828 return false;
4829}
4830
34cf6c43 4831int exec_context_get_effective_ioprio(const ExecContext *c) {
7f452159
LP
4832 int p;
4833
4834 assert(c);
4835
4836 if (c->ioprio_set)
4837 return c->ioprio;
4838
4839 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4840 if (p < 0)
4841 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4842
4843 return p;
4844}
4845
d3070fbd
LP
4846void exec_context_free_log_extra_fields(ExecContext *c) {
4847 size_t l;
4848
4849 assert(c);
4850
4851 for (l = 0; l < c->n_log_extra_fields; l++)
4852 free(c->log_extra_fields[l].iov_base);
4853 c->log_extra_fields = mfree(c->log_extra_fields);
4854 c->n_log_extra_fields = 0;
4855}
4856
6f765baf
LP
4857void exec_context_revert_tty(ExecContext *c) {
4858 int r;
4859
4860 assert(c);
4861
4862 /* First, reset the TTY (possibly kicking everybody else from the TTY) */
4863 exec_context_tty_reset(c, NULL);
4864
4865 /* And then undo what chown_terminal() did earlier. Note that we only do this if we have a path
4866 * configured. If the TTY was passed to us as file descriptor we assume the TTY is opened and managed
4867 * by whoever passed it to us and thus knows better when and how to chmod()/chown() it back. */
4868
4869 if (exec_context_may_touch_tty(c)) {
4870 const char *path;
4871
4872 path = exec_context_tty_path(c);
4873 if (path) {
4874 r = chmod_and_chown(path, TTY_MODE, 0, TTY_GID);
4875 if (r < 0 && r != -ENOENT)
4876 log_warning_errno(r, "Failed to reset TTY ownership/access mode of %s, ignoring: %m", path);
4877 }
4878 }
4879}
4880
4c2f5842
LP
4881int exec_context_get_clean_directories(
4882 ExecContext *c,
4883 char **prefix,
4884 ExecCleanMask mask,
4885 char ***ret) {
4886
4887 _cleanup_strv_free_ char **l = NULL;
4888 ExecDirectoryType t;
4889 int r;
4890
4891 assert(c);
4892 assert(prefix);
4893 assert(ret);
4894
4895 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
4896 char **i;
4897
4898 if (!FLAGS_SET(mask, 1U << t))
4899 continue;
4900
4901 if (!prefix[t])
4902 continue;
4903
4904 STRV_FOREACH(i, c->directories[t].paths) {
4905 char *j;
4906
4907 j = path_join(prefix[t], *i);
4908 if (!j)
4909 return -ENOMEM;
4910
4911 r = strv_consume(&l, j);
4912 if (r < 0)
4913 return r;
7f622a19
YW
4914
4915 /* Also remove private directories unconditionally. */
4916 if (t != EXEC_DIRECTORY_CONFIGURATION) {
4917 j = path_join(prefix[t], "private", *i);
4918 if (!j)
4919 return -ENOMEM;
4920
4921 r = strv_consume(&l, j);
4922 if (r < 0)
4923 return r;
4924 }
4c2f5842
LP
4925 }
4926 }
4927
4928 *ret = TAKE_PTR(l);
4929 return 0;
4930}
4931
4932int exec_context_get_clean_mask(ExecContext *c, ExecCleanMask *ret) {
4933 ExecCleanMask mask = 0;
4934
4935 assert(c);
4936 assert(ret);
4937
4938 for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++)
4939 if (!strv_isempty(c->directories[t].paths))
4940 mask |= 1U << t;
4941
4942 *ret = mask;
4943 return 0;
4944}
4945
b58b4116 4946void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 4947 assert(s);
5cb5a6ff 4948
2ed26ed0
LP
4949 *s = (ExecStatus) {
4950 .pid = pid,
4951 };
4952
b58b4116
LP
4953 dual_timestamp_get(&s->start_timestamp);
4954}
4955
34cf6c43 4956void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
4957 assert(s);
4958
2ed26ed0
LP
4959 if (s->pid != pid) {
4960 *s = (ExecStatus) {
4961 .pid = pid,
4962 };
4963 }
b58b4116 4964
63983207 4965 dual_timestamp_get(&s->exit_timestamp);
9fb86720 4966
034c6ed7
LP
4967 s->code = code;
4968 s->status = status;
169c1bda 4969
6f765baf
LP
4970 if (context && context->utmp_id)
4971 (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
9fb86720
LP
4972}
4973
6a1d4d9f
LP
4974void exec_status_reset(ExecStatus *s) {
4975 assert(s);
4976
4977 *s = (ExecStatus) {};
4978}
4979
34cf6c43 4980void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
9fb86720
LP
4981 char buf[FORMAT_TIMESTAMP_MAX];
4982
4983 assert(s);
4984 assert(f);
4985
9fb86720
LP
4986 if (s->pid <= 0)
4987 return;
4988
4c940960
LP
4989 prefix = strempty(prefix);
4990
9fb86720 4991 fprintf(f,
ccd06097
ZJS
4992 "%sPID: "PID_FMT"\n",
4993 prefix, s->pid);
9fb86720 4994
af9d16e1 4995 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
4996 fprintf(f,
4997 "%sStart Timestamp: %s\n",
63983207 4998 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 4999
af9d16e1 5000 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
5001 fprintf(f,
5002 "%sExit Timestamp: %s\n"
5003 "%sExit Code: %s\n"
5004 "%sExit Status: %i\n",
63983207 5005 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
5006 prefix, sigchld_code_to_string(s->code),
5007 prefix, s->status);
5cb5a6ff 5008}
44d8db9e 5009
34cf6c43 5010static char *exec_command_line(char **argv) {
44d8db9e
LP
5011 size_t k;
5012 char *n, *p, **a;
5013 bool first = true;
5014
9e2f7c11 5015 assert(argv);
44d8db9e 5016
9164977d 5017 k = 1;
9e2f7c11 5018 STRV_FOREACH(a, argv)
44d8db9e
LP
5019 k += strlen(*a)+3;
5020
5cd9cd35
LP
5021 n = new(char, k);
5022 if (!n)
44d8db9e
LP
5023 return NULL;
5024
5025 p = n;
9e2f7c11 5026 STRV_FOREACH(a, argv) {
44d8db9e
LP
5027
5028 if (!first)
5029 *(p++) = ' ';
5030 else
5031 first = false;
5032
5033 if (strpbrk(*a, WHITESPACE)) {
5034 *(p++) = '\'';
5035 p = stpcpy(p, *a);
5036 *(p++) = '\'';
5037 } else
5038 p = stpcpy(p, *a);
5039
5040 }
5041
9164977d
LP
5042 *p = 0;
5043
44d8db9e
LP
5044 /* FIXME: this doesn't really handle arguments that have
5045 * spaces and ticks in them */
5046
5047 return n;
5048}
5049
34cf6c43 5050static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 5051 _cleanup_free_ char *cmd = NULL;
4c940960 5052 const char *prefix2;
44d8db9e
LP
5053
5054 assert(c);
5055 assert(f);
5056
4c940960 5057 prefix = strempty(prefix);
63c372cb 5058 prefix2 = strjoina(prefix, "\t");
44d8db9e 5059
9e2f7c11 5060 cmd = exec_command_line(c->argv);
44d8db9e
LP
5061 fprintf(f,
5062 "%sCommand Line: %s\n",
4bbccb02 5063 prefix, cmd ? cmd : strerror_safe(ENOMEM));
44d8db9e 5064
9fb86720 5065 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
5066}
5067
5068void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
5069 assert(f);
5070
4c940960 5071 prefix = strempty(prefix);
44d8db9e
LP
5072
5073 LIST_FOREACH(command, c, c)
5074 exec_command_dump(c, f, prefix);
5075}
94f04347 5076
a6a80b4f
LP
5077void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
5078 ExecCommand *end;
5079
5080 assert(l);
5081 assert(e);
5082
5083 if (*l) {
35b8ca3a 5084 /* It's kind of important, that we keep the order here */
71fda00f
LP
5085 LIST_FIND_TAIL(command, *l, end);
5086 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
5087 } else
5088 *l = e;
5089}
5090
26fd040d
LP
5091int exec_command_set(ExecCommand *c, const char *path, ...) {
5092 va_list ap;
5093 char **l, *p;
5094
5095 assert(c);
5096 assert(path);
5097
5098 va_start(ap, path);
5099 l = strv_new_ap(path, ap);
5100 va_end(ap);
5101
5102 if (!l)
5103 return -ENOMEM;
5104
250a918d
LP
5105 p = strdup(path);
5106 if (!p) {
26fd040d
LP
5107 strv_free(l);
5108 return -ENOMEM;
5109 }
5110
6897dfe8 5111 free_and_replace(c->path, p);
26fd040d 5112
130d3d22 5113 return strv_free_and_replace(c->argv, l);
26fd040d
LP
5114}
5115
86b23b07 5116int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 5117 _cleanup_strv_free_ char **l = NULL;
86b23b07 5118 va_list ap;
86b23b07
JS
5119 int r;
5120
5121 assert(c);
5122 assert(path);
5123
5124 va_start(ap, path);
5125 l = strv_new_ap(path, ap);
5126 va_end(ap);
5127
5128 if (!l)
5129 return -ENOMEM;
5130
e287086b 5131 r = strv_extend_strv(&c->argv, l, false);
e63ff941 5132 if (r < 0)
86b23b07 5133 return r;
86b23b07
JS
5134
5135 return 0;
5136}
5137
e8a565cb
YW
5138static void *remove_tmpdir_thread(void *p) {
5139 _cleanup_free_ char *path = p;
86b23b07 5140
e8a565cb
YW
5141 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
5142 return NULL;
5143}
5144
5145static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
5146 int r;
5147
5148 if (!rt)
5149 return NULL;
5150
5151 if (rt->manager)
5152 (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
5153
5154 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
5155 if (destroy && rt->tmp_dir) {
5156 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
5157
5158 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
5159 if (r < 0) {
5160 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
5161 free(rt->tmp_dir);
5162 }
5163
5164 rt->tmp_dir = NULL;
5165 }
613b411c 5166
e8a565cb
YW
5167 if (destroy && rt->var_tmp_dir) {
5168 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
5169
5170 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
5171 if (r < 0) {
5172 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
5173 free(rt->var_tmp_dir);
5174 }
5175
5176 rt->var_tmp_dir = NULL;
5177 }
5178
5179 rt->id = mfree(rt->id);
5180 rt->tmp_dir = mfree(rt->tmp_dir);
5181 rt->var_tmp_dir = mfree(rt->var_tmp_dir);
5182 safe_close_pair(rt->netns_storage_socket);
5183 return mfree(rt);
5184}
5185
5186static void exec_runtime_freep(ExecRuntime **rt) {
da6bc6ed 5187 (void) exec_runtime_free(*rt, false);
e8a565cb
YW
5188}
5189
8e8009dc
LP
5190static int exec_runtime_allocate(ExecRuntime **ret) {
5191 ExecRuntime *n;
613b411c 5192
8e8009dc 5193 assert(ret);
613b411c 5194
8e8009dc
LP
5195 n = new(ExecRuntime, 1);
5196 if (!n)
613b411c
LP
5197 return -ENOMEM;
5198
8e8009dc
LP
5199 *n = (ExecRuntime) {
5200 .netns_storage_socket = { -1, -1 },
5201 };
5202
5203 *ret = n;
613b411c
LP
5204 return 0;
5205}
5206
e8a565cb
YW
5207static int exec_runtime_add(
5208 Manager *m,
5209 const char *id,
5210 const char *tmp_dir,
5211 const char *var_tmp_dir,
5212 const int netns_storage_socket[2],
5213 ExecRuntime **ret) {
5214
5215 _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
613b411c
LP
5216 int r;
5217
e8a565cb 5218 assert(m);
613b411c
LP
5219 assert(id);
5220
e8a565cb
YW
5221 r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
5222 if (r < 0)
5223 return r;
613b411c 5224
e8a565cb 5225 r = exec_runtime_allocate(&rt);
613b411c
LP
5226 if (r < 0)
5227 return r;
5228
e8a565cb
YW
5229 rt->id = strdup(id);
5230 if (!rt->id)
5231 return -ENOMEM;
5232
5233 if (tmp_dir) {
5234 rt->tmp_dir = strdup(tmp_dir);
5235 if (!rt->tmp_dir)
5236 return -ENOMEM;
5237
5238 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
5239 assert(var_tmp_dir);
5240 rt->var_tmp_dir = strdup(var_tmp_dir);
5241 if (!rt->var_tmp_dir)
5242 return -ENOMEM;
5243 }
5244
5245 if (netns_storage_socket) {
5246 rt->netns_storage_socket[0] = netns_storage_socket[0];
5247 rt->netns_storage_socket[1] = netns_storage_socket[1];
613b411c
LP
5248 }
5249
e8a565cb
YW
5250 r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
5251 if (r < 0)
5252 return r;
5253
5254 rt->manager = m;
5255
5256 if (ret)
5257 *ret = rt;
5258
5259 /* do not remove created ExecRuntime object when the operation succeeds. */
5260 rt = NULL;
5261 return 0;
5262}
5263
5264static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
5265 _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
2fa3742d 5266 _cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
e8a565cb
YW
5267 int r;
5268
5269 assert(m);
5270 assert(c);
5271 assert(id);
5272
5273 /* It is not necessary to create ExecRuntime object. */
a8d08f39 5274 if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
e8a565cb
YW
5275 return 0;
5276
5277 if (c->private_tmp) {
5278 r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
613b411c
LP
5279 if (r < 0)
5280 return r;
5281 }
5282
a8d08f39 5283 if (c->private_network || c->network_namespace_path) {
e8a565cb
YW
5284 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
5285 return -errno;
5286 }
5287
5288 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
5289 if (r < 0)
5290 return r;
5291
5292 /* Avoid cleanup */
2fa3742d 5293 netns_storage_socket[0] = netns_storage_socket[1] = -1;
613b411c
LP
5294 return 1;
5295}
5296
e8a565cb
YW
5297int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
5298 ExecRuntime *rt;
5299 int r;
613b411c 5300
e8a565cb
YW
5301 assert(m);
5302 assert(id);
5303 assert(ret);
5304
5305 rt = hashmap_get(m->exec_runtime_by_id, id);
5306 if (rt)
5307 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
5308 goto ref;
5309
5310 if (!create)
5311 return 0;
5312
5313 /* If not found, then create a new object. */
5314 r = exec_runtime_make(m, c, id, &rt);
5315 if (r <= 0)
5316 /* When r == 0, it is not necessary to create ExecRuntime object. */
5317 return r;
613b411c 5318
e8a565cb
YW
5319ref:
5320 /* increment reference counter. */
5321 rt->n_ref++;
5322 *ret = rt;
5323 return 1;
5324}
613b411c 5325
e8a565cb
YW
5326ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
5327 if (!rt)
613b411c
LP
5328 return NULL;
5329
e8a565cb 5330 assert(rt->n_ref > 0);
613b411c 5331
e8a565cb
YW
5332 rt->n_ref--;
5333 if (rt->n_ref > 0)
f2341e0a
LP
5334 return NULL;
5335
e8a565cb 5336 return exec_runtime_free(rt, destroy);
613b411c
LP
5337}
5338
e8a565cb
YW
5339int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
5340 ExecRuntime *rt;
5341 Iterator i;
5342
5343 assert(m);
613b411c
LP
5344 assert(f);
5345 assert(fds);
5346
e8a565cb
YW
5347 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5348 fprintf(f, "exec-runtime=%s", rt->id);
613b411c 5349
e8a565cb
YW
5350 if (rt->tmp_dir)
5351 fprintf(f, " tmp-dir=%s", rt->tmp_dir);
613b411c 5352
e8a565cb
YW
5353 if (rt->var_tmp_dir)
5354 fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
613b411c 5355
e8a565cb
YW
5356 if (rt->netns_storage_socket[0] >= 0) {
5357 int copy;
613b411c 5358
e8a565cb
YW
5359 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
5360 if (copy < 0)
5361 return copy;
613b411c 5362
e8a565cb
YW
5363 fprintf(f, " netns-socket-0=%i", copy);
5364 }
613b411c 5365
e8a565cb
YW
5366 if (rt->netns_storage_socket[1] >= 0) {
5367 int copy;
613b411c 5368
e8a565cb
YW
5369 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
5370 if (copy < 0)
5371 return copy;
613b411c 5372
e8a565cb
YW
5373 fprintf(f, " netns-socket-1=%i", copy);
5374 }
5375
5376 fputc('\n', f);
613b411c
LP
5377 }
5378
5379 return 0;
5380}
5381
e8a565cb
YW
5382int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
5383 _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
5384 ExecRuntime *rt;
613b411c
LP
5385 int r;
5386
e8a565cb
YW
5387 /* This is for the migration from old (v237 or earlier) deserialization text.
5388 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
5389 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
5390 * so or not from the serialized text, then we always creates a new object owned by this. */
5391
5392 assert(u);
613b411c
LP
5393 assert(key);
5394 assert(value);
5395
e8a565cb
YW
5396 /* Manager manages ExecRuntime objects by the unit id.
5397 * So, we omit the serialized text when the unit does not have id (yet?)... */
5398 if (isempty(u->id)) {
5399 log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
5400 return 0;
5401 }
613b411c 5402
e8a565cb
YW
5403 r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
5404 if (r < 0) {
5405 log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
5406 return 0;
5407 }
5408
5409 rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
5410 if (!rt) {
5411 r = exec_runtime_allocate(&rt_create);
613b411c 5412 if (r < 0)
f2341e0a 5413 return log_oom();
613b411c 5414
e8a565cb
YW
5415 rt_create->id = strdup(u->id);
5416 if (!rt_create->id)
5417 return log_oom();
5418
5419 rt = rt_create;
5420 }
5421
5422 if (streq(key, "tmp-dir")) {
5423 char *copy;
5424
613b411c
LP
5425 copy = strdup(value);
5426 if (!copy)
5427 return log_oom();
5428
e8a565cb 5429 free_and_replace(rt->tmp_dir, copy);
613b411c
LP
5430
5431 } else if (streq(key, "var-tmp-dir")) {
5432 char *copy;
5433
613b411c
LP
5434 copy = strdup(value);
5435 if (!copy)
5436 return log_oom();
5437
e8a565cb 5438 free_and_replace(rt->var_tmp_dir, copy);
613b411c
LP
5439
5440 } else if (streq(key, "netns-socket-0")) {
5441 int fd;
5442
e8a565cb 5443 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5444 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5445 return 0;
613b411c 5446 }
e8a565cb
YW
5447
5448 safe_close(rt->netns_storage_socket[0]);
5449 rt->netns_storage_socket[0] = fdset_remove(fds, fd);
5450
613b411c
LP
5451 } else if (streq(key, "netns-socket-1")) {
5452 int fd;
5453
e8a565cb 5454 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5455 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5456 return 0;
613b411c 5457 }
e8a565cb
YW
5458
5459 safe_close(rt->netns_storage_socket[1]);
5460 rt->netns_storage_socket[1] = fdset_remove(fds, fd);
613b411c
LP
5461 } else
5462 return 0;
5463
e8a565cb
YW
5464 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
5465 if (rt_create) {
5466 r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
5467 if (r < 0) {
3fe91079 5468 log_unit_debug_errno(u, r, "Failed to put runtime parameter to manager's storage: %m");
e8a565cb
YW
5469 return 0;
5470 }
613b411c 5471
e8a565cb 5472 rt_create->manager = u->manager;
613b411c 5473
e8a565cb
YW
5474 /* Avoid cleanup */
5475 rt_create = NULL;
5476 }
98b47d54 5477
e8a565cb
YW
5478 return 1;
5479}
613b411c 5480
e8a565cb
YW
5481void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
5482 char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
5483 int r, fd0 = -1, fd1 = -1;
5484 const char *p, *v = value;
5485 size_t n;
613b411c 5486
e8a565cb
YW
5487 assert(m);
5488 assert(value);
5489 assert(fds);
98b47d54 5490
e8a565cb
YW
5491 n = strcspn(v, " ");
5492 id = strndupa(v, n);
5493 if (v[n] != ' ')
5494 goto finalize;
5495 p = v + n + 1;
5496
5497 v = startswith(p, "tmp-dir=");
5498 if (v) {
5499 n = strcspn(v, " ");
5500 tmp_dir = strndupa(v, n);
5501 if (v[n] != ' ')
5502 goto finalize;
5503 p = v + n + 1;
5504 }
5505
5506 v = startswith(p, "var-tmp-dir=");
5507 if (v) {
5508 n = strcspn(v, " ");
5509 var_tmp_dir = strndupa(v, n);
5510 if (v[n] != ' ')
5511 goto finalize;
5512 p = v + n + 1;
5513 }
5514
5515 v = startswith(p, "netns-socket-0=");
5516 if (v) {
5517 char *buf;
5518
5519 n = strcspn(v, " ");
5520 buf = strndupa(v, n);
5521 if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
5522 log_debug("Unable to process exec-runtime netns fd specification.");
5523 return;
98b47d54 5524 }
e8a565cb
YW
5525 fd0 = fdset_remove(fds, fd0);
5526 if (v[n] != ' ')
5527 goto finalize;
5528 p = v + n + 1;
613b411c
LP
5529 }
5530
e8a565cb
YW
5531 v = startswith(p, "netns-socket-1=");
5532 if (v) {
5533 char *buf;
98b47d54 5534
e8a565cb
YW
5535 n = strcspn(v, " ");
5536 buf = strndupa(v, n);
5537 if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
5538 log_debug("Unable to process exec-runtime netns fd specification.");
5539 return;
98b47d54 5540 }
e8a565cb
YW
5541 fd1 = fdset_remove(fds, fd1);
5542 }
98b47d54 5543
e8a565cb
YW
5544finalize:
5545
5546 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
7d853ca6 5547 if (r < 0)
e8a565cb 5548 log_debug_errno(r, "Failed to add exec-runtime: %m");
e8a565cb 5549}
613b411c 5550
e8a565cb
YW
5551void exec_runtime_vacuum(Manager *m) {
5552 ExecRuntime *rt;
5553 Iterator i;
5554
5555 assert(m);
5556
5557 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
5558
5559 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5560 if (rt->n_ref > 0)
5561 continue;
5562
5563 (void) exec_runtime_free(rt, false);
5564 }
613b411c
LP
5565}
5566
b9c04eaf
YW
5567void exec_params_clear(ExecParameters *p) {
5568 if (!p)
5569 return;
5570
5571 strv_free(p->environment);
5572}
5573
80876c20
LP
5574static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
5575 [EXEC_INPUT_NULL] = "null",
5576 [EXEC_INPUT_TTY] = "tty",
5577 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 5578 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
5579 [EXEC_INPUT_SOCKET] = "socket",
5580 [EXEC_INPUT_NAMED_FD] = "fd",
08f3be7a 5581 [EXEC_INPUT_DATA] = "data",
2038c3f5 5582 [EXEC_INPUT_FILE] = "file",
80876c20
LP
5583};
5584
8a0867d6
LP
5585DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
5586
94f04347 5587static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 5588 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 5589 [EXEC_OUTPUT_NULL] = "null",
80876c20 5590 [EXEC_OUTPUT_TTY] = "tty",
94f04347 5591 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 5592 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 5593 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 5594 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
5595 [EXEC_OUTPUT_JOURNAL] = "journal",
5596 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
5597 [EXEC_OUTPUT_SOCKET] = "socket",
5598 [EXEC_OUTPUT_NAMED_FD] = "fd",
2038c3f5 5599 [EXEC_OUTPUT_FILE] = "file",
566b7d23 5600 [EXEC_OUTPUT_FILE_APPEND] = "append",
94f04347
LP
5601};
5602
5603DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
5604
5605static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
5606 [EXEC_UTMP_INIT] = "init",
5607 [EXEC_UTMP_LOGIN] = "login",
5608 [EXEC_UTMP_USER] = "user",
5609};
5610
5611DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
5612
5613static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
5614 [EXEC_PRESERVE_NO] = "no",
5615 [EXEC_PRESERVE_YES] = "yes",
5616 [EXEC_PRESERVE_RESTART] = "restart",
5617};
5618
5619DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 5620
6b7b2ed9 5621/* This table maps ExecDirectoryType to the setting it is configured with in the unit */
72fd1768 5622static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
5623 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
5624 [EXEC_DIRECTORY_STATE] = "StateDirectory",
5625 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
5626 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
5627 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
5628};
5629
5630DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445 5631
6b7b2ed9
LP
5632/* And this table maps ExecDirectoryType too, but to a generic term identifying the type of resource. This
5633 * one is supposed to be generic enough to be used for unit types that don't use ExecContext and per-unit
5634 * directories, specifically .timer units with their timestamp touch file. */
5635static const char* const exec_resource_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5636 [EXEC_DIRECTORY_RUNTIME] = "runtime",
5637 [EXEC_DIRECTORY_STATE] = "state",
5638 [EXEC_DIRECTORY_CACHE] = "cache",
5639 [EXEC_DIRECTORY_LOGS] = "logs",
5640 [EXEC_DIRECTORY_CONFIGURATION] = "configuration",
5641};
5642
5643DEFINE_STRING_TABLE_LOOKUP(exec_resource_type, ExecDirectoryType);
5644
5645/* And this table also maps ExecDirectoryType, to the environment variable we pass the selected directory to
5646 * the service payload in. */
fb2042dd
YW
5647static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5648 [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
5649 [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
5650 [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
5651 [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
5652 [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
5653};
5654
5655DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType);
5656
b1edf445
LP
5657static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
5658 [EXEC_KEYRING_INHERIT] = "inherit",
5659 [EXEC_KEYRING_PRIVATE] = "private",
5660 [EXEC_KEYRING_SHARED] = "shared",
5661};
5662
5663DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);