]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
units: remove unused busnames.target
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
a7334b09 2
034c6ed7
LP
3#include <errno.h>
4#include <fcntl.h>
8dd4c05b
LP
5#include <glob.h>
6#include <grp.h>
7#include <poll.h>
309bff19 8#include <signal.h>
8dd4c05b 9#include <string.h>
19c0b0b9 10#include <sys/capability.h>
d251207d 11#include <sys/eventfd.h>
f3e43635 12#include <sys/mman.h>
8dd4c05b 13#include <sys/personality.h>
94f04347 14#include <sys/prctl.h>
d2ffa389 15#include <sys/shm.h>
8dd4c05b 16#include <sys/socket.h>
451a074f 17#include <sys/stat.h>
d2ffa389 18#include <sys/types.h>
8dd4c05b
LP
19#include <sys/un.h>
20#include <unistd.h>
023a4f67 21#include <utmpx.h>
5cb5a6ff 22
349cc4a5 23#if HAVE_PAM
5b6319dc
LP
24#include <security/pam_appl.h>
25#endif
26
349cc4a5 27#if HAVE_SELINUX
7b52a628
MS
28#include <selinux/selinux.h>
29#endif
30
349cc4a5 31#if HAVE_SECCOMP
17df7223
LP
32#include <seccomp.h>
33#endif
34
349cc4a5 35#if HAVE_APPARMOR
eef65bf3
MS
36#include <sys/apparmor.h>
37#endif
38
24882e06 39#include "sd-messages.h"
8dd4c05b
LP
40
41#include "af-list.h"
b5efdb8a 42#include "alloc-util.h"
349cc4a5 43#if HAVE_APPARMOR
3ffd4af2
LP
44#include "apparmor-util.h"
45#endif
8dd4c05b
LP
46#include "async.h"
47#include "barrier.h"
8dd4c05b 48#include "cap-list.h"
430f0182 49#include "capability-util.h"
a1164ae3 50#include "chown-recursive.h"
da681e1b 51#include "cpu-set-util.h"
f6a6225e 52#include "def.h"
4d1a6904 53#include "env-util.h"
17df7223 54#include "errno-list.h"
3ffd4af2 55#include "execute.h"
8dd4c05b 56#include "exit-status.h"
3ffd4af2 57#include "fd-util.h"
8dd4c05b 58#include "fileio.h"
f97b34a6 59#include "format-util.h"
f4f15635 60#include "fs-util.h"
7d50b32a 61#include "glob-util.h"
c004493c 62#include "io-util.h"
8dd4c05b 63#include "ioprio.h"
a1164ae3 64#include "label.h"
8dd4c05b
LP
65#include "log.h"
66#include "macro.h"
e8a565cb 67#include "manager.h"
8dd4c05b
LP
68#include "missing.h"
69#include "mkdir.h"
70#include "namespace.h"
6bedfcbb 71#include "parse-util.h"
8dd4c05b 72#include "path-util.h"
0b452006 73#include "process-util.h"
78f22b97 74#include "rlimit-util.h"
8dd4c05b 75#include "rm-rf.h"
349cc4a5 76#if HAVE_SECCOMP
3ffd4af2
LP
77#include "seccomp-util.h"
78#endif
8dd4c05b 79#include "securebits.h"
07d46372 80#include "securebits-util.h"
8dd4c05b 81#include "selinux-util.h"
24882e06 82#include "signal-util.h"
8dd4c05b 83#include "smack-util.h"
57b7a260 84#include "socket-util.h"
fd63e712 85#include "special.h"
949befd3 86#include "stat-util.h"
8b43440b 87#include "string-table.h"
07630cea 88#include "string-util.h"
8dd4c05b 89#include "strv.h"
7ccbd1ae 90#include "syslog-util.h"
8dd4c05b 91#include "terminal-util.h"
566b7d23 92#include "umask-util.h"
8dd4c05b 93#include "unit.h"
b1d4f8e1 94#include "user-util.h"
8dd4c05b
LP
95#include "util.h"
96#include "utmp-wtmp.h"
5cb5a6ff 97
e056b01d 98#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 99#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 100
02a51aba
LP
101/* This assumes there is a 'tty' group */
102#define TTY_MODE 0620
103
531dca78
LP
104#define SNDBUF_SIZE (8*1024*1024)
105
da6053d0 106static int shift_fds(int fds[], size_t n_fds) {
034c6ed7
LP
107 int start, restart_from;
108
109 if (n_fds <= 0)
110 return 0;
111
a0d40ac5
LP
112 /* Modifies the fds array! (sorts it) */
113
034c6ed7
LP
114 assert(fds);
115
116 start = 0;
117 for (;;) {
118 int i;
119
120 restart_from = -1;
121
122 for (i = start; i < (int) n_fds; i++) {
123 int nfd;
124
125 /* Already at right index? */
126 if (fds[i] == i+3)
127 continue;
128
3cc2aff1
LP
129 nfd = fcntl(fds[i], F_DUPFD, i + 3);
130 if (nfd < 0)
034c6ed7
LP
131 return -errno;
132
03e334a1 133 safe_close(fds[i]);
034c6ed7
LP
134 fds[i] = nfd;
135
136 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 137 * let's remember that and try again from here */
034c6ed7
LP
138 if (nfd != i+3 && restart_from < 0)
139 restart_from = i;
140 }
141
142 if (restart_from < 0)
143 break;
144
145 start = restart_from;
146 }
147
148 return 0;
149}
150
25b583d7 151static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
da6053d0 152 size_t i, n_fds;
e2c76839 153 int r;
47a71eed 154
25b583d7 155 n_fds = n_socket_fds + n_storage_fds;
47a71eed
LP
156 if (n_fds <= 0)
157 return 0;
158
159 assert(fds);
160
9b141911
FB
161 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
162 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
163
164 for (i = 0; i < n_fds; i++) {
47a71eed 165
9b141911
FB
166 if (i < n_socket_fds) {
167 r = fd_nonblock(fds[i], nonblock);
168 if (r < 0)
169 return r;
170 }
47a71eed 171
451a074f
LP
172 /* We unconditionally drop FD_CLOEXEC from the fds,
173 * since after all we want to pass these fds to our
174 * children */
47a71eed 175
3cc2aff1
LP
176 r = fd_cloexec(fds[i], false);
177 if (r < 0)
e2c76839 178 return r;
47a71eed
LP
179 }
180
181 return 0;
182}
183
1e22b5cd 184static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
185 assert(context);
186
1e22b5cd
LP
187 if (context->stdio_as_fds)
188 return NULL;
189
80876c20
LP
190 if (context->tty_path)
191 return context->tty_path;
192
193 return "/dev/console";
194}
195
1e22b5cd
LP
196static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
197 const char *path;
198
6ea832a2
LP
199 assert(context);
200
1e22b5cd 201 path = exec_context_tty_path(context);
6ea832a2 202
1e22b5cd
LP
203 if (context->tty_vhangup) {
204 if (p && p->stdin_fd >= 0)
205 (void) terminal_vhangup_fd(p->stdin_fd);
206 else if (path)
207 (void) terminal_vhangup(path);
208 }
6ea832a2 209
1e22b5cd
LP
210 if (context->tty_reset) {
211 if (p && p->stdin_fd >= 0)
212 (void) reset_terminal_fd(p->stdin_fd, true);
213 else if (path)
214 (void) reset_terminal(path);
215 }
216
217 if (context->tty_vt_disallocate && path)
218 (void) vt_disallocate(path);
6ea832a2
LP
219}
220
6af760f3
LP
221static bool is_terminal_input(ExecInput i) {
222 return IN_SET(i,
223 EXEC_INPUT_TTY,
224 EXEC_INPUT_TTY_FORCE,
225 EXEC_INPUT_TTY_FAIL);
226}
227
3a1286b6 228static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
229 return IN_SET(o,
230 EXEC_OUTPUT_TTY,
231 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
232 EXEC_OUTPUT_KMSG_AND_CONSOLE,
233 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
234}
235
aac8c0c3
LP
236static bool is_syslog_output(ExecOutput o) {
237 return IN_SET(o,
238 EXEC_OUTPUT_SYSLOG,
239 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
240}
241
242static bool is_kmsg_output(ExecOutput o) {
243 return IN_SET(o,
244 EXEC_OUTPUT_KMSG,
245 EXEC_OUTPUT_KMSG_AND_CONSOLE);
246}
247
6af760f3
LP
248static bool exec_context_needs_term(const ExecContext *c) {
249 assert(c);
250
251 /* Return true if the execution context suggests we should set $TERM to something useful. */
252
253 if (is_terminal_input(c->std_input))
254 return true;
255
256 if (is_terminal_output(c->std_output))
257 return true;
258
259 if (is_terminal_output(c->std_error))
260 return true;
261
262 return !!c->tty_path;
3a1286b6
MS
263}
264
80876c20 265static int open_null_as(int flags, int nfd) {
046a82c1 266 int fd;
071830ff 267
80876c20 268 assert(nfd >= 0);
071830ff 269
613b411c
LP
270 fd = open("/dev/null", flags|O_NOCTTY);
271 if (fd < 0)
071830ff
LP
272 return -errno;
273
046a82c1 274 return move_fd(fd, nfd, false);
071830ff
LP
275}
276
524daa8c 277static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 278 static const union sockaddr_union sa = {
b92bea5d
ZJS
279 .un.sun_family = AF_UNIX,
280 .un.sun_path = "/run/systemd/journal/stdout",
281 };
524daa8c
ZJS
282 uid_t olduid = UID_INVALID;
283 gid_t oldgid = GID_INVALID;
284 int r;
285
cad93f29 286 if (gid_is_valid(gid)) {
524daa8c
ZJS
287 oldgid = getgid();
288
92a17af9 289 if (setegid(gid) < 0)
524daa8c
ZJS
290 return -errno;
291 }
292
cad93f29 293 if (uid_is_valid(uid)) {
524daa8c
ZJS
294 olduid = getuid();
295
92a17af9 296 if (seteuid(uid) < 0) {
524daa8c
ZJS
297 r = -errno;
298 goto restore_gid;
299 }
300 }
301
92a17af9 302 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
303
304 /* If we fail to restore the uid or gid, things will likely
305 fail later on. This should only happen if an LSM interferes. */
306
cad93f29 307 if (uid_is_valid(uid))
524daa8c
ZJS
308 (void) seteuid(olduid);
309
310 restore_gid:
cad93f29 311 if (gid_is_valid(gid))
524daa8c
ZJS
312 (void) setegid(oldgid);
313
314 return r;
315}
316
fd1f9c89 317static int connect_logger_as(
34cf6c43 318 const Unit *unit,
fd1f9c89 319 const ExecContext *context,
af635cf3 320 const ExecParameters *params,
fd1f9c89
LP
321 ExecOutput output,
322 const char *ident,
fd1f9c89
LP
323 int nfd,
324 uid_t uid,
325 gid_t gid) {
326
2ac1ff68
EV
327 _cleanup_close_ int fd = -1;
328 int r;
071830ff
LP
329
330 assert(context);
af635cf3 331 assert(params);
80876c20
LP
332 assert(output < _EXEC_OUTPUT_MAX);
333 assert(ident);
334 assert(nfd >= 0);
071830ff 335
54fe0cdb
LP
336 fd = socket(AF_UNIX, SOCK_STREAM, 0);
337 if (fd < 0)
80876c20 338 return -errno;
071830ff 339
524daa8c
ZJS
340 r = connect_journal_socket(fd, uid, gid);
341 if (r < 0)
342 return r;
071830ff 343
2ac1ff68 344 if (shutdown(fd, SHUT_RD) < 0)
80876c20 345 return -errno;
071830ff 346
fd1f9c89 347 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 348
2ac1ff68 349 if (dprintf(fd,
62bca2c6 350 "%s\n"
80876c20
LP
351 "%s\n"
352 "%i\n"
54fe0cdb
LP
353 "%i\n"
354 "%i\n"
355 "%i\n"
4f4a1dbf 356 "%i\n",
c867611e 357 context->syslog_identifier ?: ident,
af635cf3 358 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
359 context->syslog_priority,
360 !!context->syslog_level_prefix,
aac8c0c3
LP
361 is_syslog_output(output),
362 is_kmsg_output(output),
2ac1ff68
EV
363 is_terminal_output(output)) < 0)
364 return -errno;
80876c20 365
2ac1ff68 366 return move_fd(TAKE_FD(fd), nfd, false);
80876c20 367}
2ac1ff68 368
3a274a21 369static int open_terminal_as(const char *path, int flags, int nfd) {
046a82c1 370 int fd;
071830ff 371
80876c20
LP
372 assert(path);
373 assert(nfd >= 0);
fd1f9c89 374
3a274a21 375 fd = open_terminal(path, flags | O_NOCTTY);
3cc2aff1 376 if (fd < 0)
80876c20 377 return fd;
071830ff 378
046a82c1 379 return move_fd(fd, nfd, false);
80876c20 380}
071830ff 381
2038c3f5 382static int acquire_path(const char *path, int flags, mode_t mode) {
15a3e96f
LP
383 union sockaddr_union sa = {};
384 _cleanup_close_ int fd = -1;
385 int r, salen;
071830ff 386
80876c20 387 assert(path);
071830ff 388
2038c3f5
LP
389 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
390 flags |= O_CREAT;
391
392 fd = open(path, flags|O_NOCTTY, mode);
393 if (fd >= 0)
15a3e96f 394 return TAKE_FD(fd);
071830ff 395
2038c3f5
LP
396 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
397 return -errno;
15a3e96f 398 if (strlen(path) >= sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
2038c3f5
LP
399 return -ENXIO;
400
401 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
402
403 fd = socket(AF_UNIX, SOCK_STREAM, 0);
404 if (fd < 0)
405 return -errno;
406
15a3e96f
LP
407 salen = sockaddr_un_set_path(&sa.un, path);
408 if (salen < 0)
409 return salen;
410
411 if (connect(fd, &sa.sa, salen) < 0)
2038c3f5
LP
412 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
413 * indication that his wasn't an AF_UNIX socket after all */
071830ff 414
2038c3f5
LP
415 if ((flags & O_ACCMODE) == O_RDONLY)
416 r = shutdown(fd, SHUT_WR);
417 else if ((flags & O_ACCMODE) == O_WRONLY)
418 r = shutdown(fd, SHUT_RD);
419 else
15a3e96f
LP
420 return TAKE_FD(fd);
421 if (r < 0)
2038c3f5 422 return -errno;
2038c3f5 423
15a3e96f 424 return TAKE_FD(fd);
80876c20 425}
071830ff 426
08f3be7a
LP
427static int fixup_input(
428 const ExecContext *context,
429 int socket_fd,
430 bool apply_tty_stdin) {
431
432 ExecInput std_input;
433
434 assert(context);
435
436 std_input = context->std_input;
1e3ad081
LP
437
438 if (is_terminal_input(std_input) && !apply_tty_stdin)
439 return EXEC_INPUT_NULL;
071830ff 440
03fd9c49 441 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
442 return EXEC_INPUT_NULL;
443
08f3be7a
LP
444 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
445 return EXEC_INPUT_NULL;
446
03fd9c49 447 return std_input;
4f2d528d
LP
448}
449
03fd9c49 450static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 451
03fd9c49 452 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
453 return EXEC_OUTPUT_INHERIT;
454
03fd9c49 455 return std_output;
4f2d528d
LP
456}
457
a34ceba6
LP
458static int setup_input(
459 const ExecContext *context,
460 const ExecParameters *params,
52c239d7
LB
461 int socket_fd,
462 int named_iofds[3]) {
a34ceba6 463
4f2d528d
LP
464 ExecInput i;
465
466 assert(context);
a34ceba6
LP
467 assert(params);
468
469 if (params->stdin_fd >= 0) {
470 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
471 return -errno;
472
473 /* Try to make this the controlling tty, if it is a tty, and reset it */
1fb0682e
LP
474 if (isatty(STDIN_FILENO)) {
475 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
476 (void) reset_terminal_fd(STDIN_FILENO, true);
477 }
a34ceba6
LP
478
479 return STDIN_FILENO;
480 }
4f2d528d 481
08f3be7a 482 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
483
484 switch (i) {
071830ff 485
80876c20
LP
486 case EXEC_INPUT_NULL:
487 return open_null_as(O_RDONLY, STDIN_FILENO);
488
489 case EXEC_INPUT_TTY:
490 case EXEC_INPUT_TTY_FORCE:
491 case EXEC_INPUT_TTY_FAIL: {
046a82c1 492 int fd;
071830ff 493
1e22b5cd 494 fd = acquire_terminal(exec_context_tty_path(context),
8854d795
LP
495 i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
496 i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
497 ACQUIRE_TERMINAL_WAIT,
3a43da28 498 USEC_INFINITY);
970edce6 499 if (fd < 0)
80876c20
LP
500 return fd;
501
046a82c1 502 return move_fd(fd, STDIN_FILENO, false);
80876c20
LP
503 }
504
4f2d528d 505 case EXEC_INPUT_SOCKET:
e75a9ed1
LP
506 assert(socket_fd >= 0);
507
4f2d528d
LP
508 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
509
52c239d7 510 case EXEC_INPUT_NAMED_FD:
e75a9ed1
LP
511 assert(named_iofds[STDIN_FILENO] >= 0);
512
52c239d7
LB
513 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
514 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
515
08f3be7a
LP
516 case EXEC_INPUT_DATA: {
517 int fd;
518
519 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
520 if (fd < 0)
521 return fd;
522
523 return move_fd(fd, STDIN_FILENO, false);
524 }
525
2038c3f5
LP
526 case EXEC_INPUT_FILE: {
527 bool rw;
528 int fd;
529
530 assert(context->stdio_file[STDIN_FILENO]);
531
532 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
533 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
534
535 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
536 if (fd < 0)
537 return fd;
538
539 return move_fd(fd, STDIN_FILENO, false);
540 }
541
80876c20
LP
542 default:
543 assert_not_reached("Unknown input type");
544 }
545}
546
a34ceba6 547static int setup_output(
34cf6c43 548 const Unit *unit,
a34ceba6
LP
549 const ExecContext *context,
550 const ExecParameters *params,
551 int fileno,
552 int socket_fd,
52c239d7 553 int named_iofds[3],
a34ceba6 554 const char *ident,
7bce046b
LP
555 uid_t uid,
556 gid_t gid,
557 dev_t *journal_stream_dev,
558 ino_t *journal_stream_ino) {
a34ceba6 559
4f2d528d
LP
560 ExecOutput o;
561 ExecInput i;
47c1d80d 562 int r;
4f2d528d 563
f2341e0a 564 assert(unit);
80876c20 565 assert(context);
a34ceba6 566 assert(params);
80876c20 567 assert(ident);
7bce046b
LP
568 assert(journal_stream_dev);
569 assert(journal_stream_ino);
80876c20 570
a34ceba6
LP
571 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
572
573 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
574 return -errno;
575
576 return STDOUT_FILENO;
577 }
578
579 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
580 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
581 return -errno;
582
583 return STDERR_FILENO;
584 }
585
08f3be7a 586 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 587 o = fixup_output(context->std_output, socket_fd);
4f2d528d 588
eb17e935
MS
589 if (fileno == STDERR_FILENO) {
590 ExecOutput e;
591 e = fixup_output(context->std_error, socket_fd);
80876c20 592
eb17e935
MS
593 /* This expects the input and output are already set up */
594
595 /* Don't change the stderr file descriptor if we inherit all
596 * the way and are not on a tty */
597 if (e == EXEC_OUTPUT_INHERIT &&
598 o == EXEC_OUTPUT_INHERIT &&
599 i == EXEC_INPUT_NULL &&
600 !is_terminal_input(context->std_input) &&
601 getppid () != 1)
602 return fileno;
603
604 /* Duplicate from stdout if possible */
52c239d7 605 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
eb17e935 606 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 607
eb17e935 608 o = e;
80876c20 609
eb17e935 610 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
611 /* If input got downgraded, inherit the original value */
612 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 613 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 614
08f3be7a
LP
615 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
616 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
eb17e935 617 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 618
acb591e4
LP
619 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
620 if (getppid() != 1)
eb17e935 621 return fileno;
94f04347 622
eb17e935
MS
623 /* We need to open /dev/null here anew, to get the right access mode. */
624 return open_null_as(O_WRONLY, fileno);
071830ff 625 }
94f04347 626
eb17e935 627 switch (o) {
80876c20
LP
628
629 case EXEC_OUTPUT_NULL:
eb17e935 630 return open_null_as(O_WRONLY, fileno);
80876c20
LP
631
632 case EXEC_OUTPUT_TTY:
4f2d528d 633 if (is_terminal_input(i))
eb17e935 634 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
635
636 /* We don't reset the terminal if this is just about output */
1e22b5cd 637 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
638
639 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 640 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 641 case EXEC_OUTPUT_KMSG:
28dbc1e8 642 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
643 case EXEC_OUTPUT_JOURNAL:
644 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 645 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 646 if (r < 0) {
82677ae4 647 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 648 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
649 } else {
650 struct stat st;
651
652 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
653 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
654 * services to detect whether they are connected to the journal or not.
655 *
656 * If both stdout and stderr are connected to a stream then let's make sure to store the data
657 * about STDERR as that's usually the best way to do logging. */
7bce046b 658
ab2116b1
LP
659 if (fstat(fileno, &st) >= 0 &&
660 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
661 *journal_stream_dev = st.st_dev;
662 *journal_stream_ino = st.st_ino;
663 }
47c1d80d
MS
664 }
665 return r;
4f2d528d
LP
666
667 case EXEC_OUTPUT_SOCKET:
668 assert(socket_fd >= 0);
e75a9ed1 669
eb17e935 670 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 671
52c239d7 672 case EXEC_OUTPUT_NAMED_FD:
e75a9ed1
LP
673 assert(named_iofds[fileno] >= 0);
674
52c239d7
LB
675 (void) fd_nonblock(named_iofds[fileno], false);
676 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
677
566b7d23
ZD
678 case EXEC_OUTPUT_FILE:
679 case EXEC_OUTPUT_FILE_APPEND: {
2038c3f5 680 bool rw;
566b7d23 681 int fd, flags;
2038c3f5
LP
682
683 assert(context->stdio_file[fileno]);
684
685 rw = context->std_input == EXEC_INPUT_FILE &&
686 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
687
688 if (rw)
689 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
690
566b7d23
ZD
691 flags = O_WRONLY;
692 if (o == EXEC_OUTPUT_FILE_APPEND)
693 flags |= O_APPEND;
694
695 fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
696
2038c3f5
LP
697 if (fd < 0)
698 return fd;
699
566b7d23 700 return move_fd(fd, fileno, 0);
2038c3f5
LP
701 }
702
94f04347 703 default:
80876c20 704 assert_not_reached("Unknown error type");
94f04347 705 }
071830ff
LP
706}
707
02a51aba
LP
708static int chown_terminal(int fd, uid_t uid) {
709 struct stat st;
710
711 assert(fd >= 0);
02a51aba 712
1ff74fb6
LP
713 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
714 if (isatty(fd) < 1)
715 return 0;
716
02a51aba 717 /* This might fail. What matters are the results. */
bab45044
LP
718 (void) fchown(fd, uid, -1);
719 (void) fchmod(fd, TTY_MODE);
02a51aba
LP
720
721 if (fstat(fd, &st) < 0)
722 return -errno;
723
d8b4e2e9 724 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
02a51aba
LP
725 return -EPERM;
726
727 return 0;
728}
729
7d5ceb64 730static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
731 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
732 int r;
80876c20 733
80876c20
LP
734 assert(_saved_stdin);
735 assert(_saved_stdout);
736
af6da548
LP
737 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
738 if (saved_stdin < 0)
739 return -errno;
80876c20 740
af6da548 741 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
742 if (saved_stdout < 0)
743 return -errno;
80876c20 744
8854d795 745 fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
3d18b167
LP
746 if (fd < 0)
747 return fd;
80876c20 748
af6da548
LP
749 r = chown_terminal(fd, getuid());
750 if (r < 0)
3d18b167 751 return r;
02a51aba 752
3d18b167
LP
753 r = reset_terminal_fd(fd, true);
754 if (r < 0)
755 return r;
80876c20 756
2b33ab09 757 r = rearrange_stdio(fd, fd, STDERR_FILENO);
3d18b167 758 fd = -1;
2b33ab09
LP
759 if (r < 0)
760 return r;
80876c20
LP
761
762 *_saved_stdin = saved_stdin;
763 *_saved_stdout = saved_stdout;
764
3d18b167 765 saved_stdin = saved_stdout = -1;
80876c20 766
3d18b167 767 return 0;
80876c20
LP
768}
769
63d77c92 770static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
771 assert(err < 0);
772
773 if (err == -ETIMEDOUT)
63d77c92 774 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
775 else {
776 errno = -err;
63d77c92 777 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
778 }
779}
780
63d77c92 781static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 782 _cleanup_close_ int fd = -1;
80876c20 783
3b20f877 784 assert(vc);
80876c20 785
7d5ceb64 786 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 787 if (fd < 0)
3b20f877 788 return;
80876c20 789
63d77c92 790 write_confirm_error_fd(err, fd, u);
af6da548 791}
80876c20 792
3d18b167 793static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 794 int r = 0;
80876c20 795
af6da548
LP
796 assert(saved_stdin);
797 assert(saved_stdout);
798
799 release_terminal();
800
801 if (*saved_stdin >= 0)
80876c20 802 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 803 r = -errno;
80876c20 804
af6da548 805 if (*saved_stdout >= 0)
80876c20 806 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 807 r = -errno;
80876c20 808
3d18b167
LP
809 *saved_stdin = safe_close(*saved_stdin);
810 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
811
812 return r;
813}
814
3b20f877
FB
815enum {
816 CONFIRM_PRETEND_FAILURE = -1,
817 CONFIRM_PRETEND_SUCCESS = 0,
818 CONFIRM_EXECUTE = 1,
819};
820
eedf223a 821static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 822 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 823 _cleanup_free_ char *e = NULL;
3b20f877 824 char c;
af6da548 825
3b20f877 826 /* For any internal errors, assume a positive response. */
7d5ceb64 827 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 828 if (r < 0) {
63d77c92 829 write_confirm_error(r, vc, u);
3b20f877
FB
830 return CONFIRM_EXECUTE;
831 }
af6da548 832
b0eb2944
FB
833 /* confirm_spawn might have been disabled while we were sleeping. */
834 if (manager_is_confirm_spawn_disabled(u->manager)) {
835 r = 1;
836 goto restore_stdio;
837 }
af6da548 838
2bcd3c26
FB
839 e = ellipsize(cmdline, 60, 100);
840 if (!e) {
841 log_oom();
842 r = CONFIRM_EXECUTE;
843 goto restore_stdio;
844 }
af6da548 845
d172b175 846 for (;;) {
539622bd 847 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 848 if (r < 0) {
63d77c92 849 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
850 r = CONFIRM_EXECUTE;
851 goto restore_stdio;
852 }
af6da548 853
d172b175 854 switch (c) {
b0eb2944
FB
855 case 'c':
856 printf("Resuming normal execution.\n");
857 manager_disable_confirm_spawn();
858 r = 1;
859 break;
dd6f9ac0
FB
860 case 'D':
861 unit_dump(u, stdout, " ");
862 continue; /* ask again */
d172b175
FB
863 case 'f':
864 printf("Failing execution.\n");
865 r = CONFIRM_PRETEND_FAILURE;
866 break;
867 case 'h':
b0eb2944
FB
868 printf(" c - continue, proceed without asking anymore\n"
869 " D - dump, show the state of the unit\n"
dd6f9ac0 870 " f - fail, don't execute the command and pretend it failed\n"
d172b175 871 " h - help\n"
eedf223a 872 " i - info, show a short summary of the unit\n"
56fde33a 873 " j - jobs, show jobs that are in progress\n"
d172b175
FB
874 " s - skip, don't execute the command and pretend it succeeded\n"
875 " y - yes, execute the command\n");
dd6f9ac0 876 continue; /* ask again */
eedf223a
FB
877 case 'i':
878 printf(" Description: %s\n"
879 " Unit: %s\n"
880 " Command: %s\n",
881 u->id, u->description, cmdline);
882 continue; /* ask again */
56fde33a
FB
883 case 'j':
884 manager_dump_jobs(u->manager, stdout, " ");
885 continue; /* ask again */
539622bd
FB
886 case 'n':
887 /* 'n' was removed in favor of 'f'. */
888 printf("Didn't understand 'n', did you mean 'f'?\n");
889 continue; /* ask again */
d172b175
FB
890 case 's':
891 printf("Skipping execution.\n");
892 r = CONFIRM_PRETEND_SUCCESS;
893 break;
894 case 'y':
895 r = CONFIRM_EXECUTE;
896 break;
897 default:
898 assert_not_reached("Unhandled choice");
899 }
3b20f877 900 break;
3b20f877 901 }
af6da548 902
3b20f877 903restore_stdio:
af6da548 904 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 905 return r;
80876c20
LP
906}
907
4d885bd3
DH
908static int get_fixed_user(const ExecContext *c, const char **user,
909 uid_t *uid, gid_t *gid,
910 const char **home, const char **shell) {
81a2b7ce 911 int r;
4d885bd3 912 const char *name;
81a2b7ce 913
4d885bd3 914 assert(c);
81a2b7ce 915
23deef88
LP
916 if (!c->user)
917 return 0;
918
4d885bd3
DH
919 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
920 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 921
23deef88 922 name = c->user;
fafff8f1 923 r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN);
4d885bd3
DH
924 if (r < 0)
925 return r;
81a2b7ce 926
4d885bd3
DH
927 *user = name;
928 return 0;
929}
930
931static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
932 int r;
933 const char *name;
934
935 assert(c);
936
937 if (!c->group)
938 return 0;
939
940 name = c->group;
fafff8f1 941 r = get_group_creds(&name, gid, 0);
4d885bd3
DH
942 if (r < 0)
943 return r;
944
945 *group = name;
946 return 0;
947}
948
cdc5d5c5
DH
949static int get_supplementary_groups(const ExecContext *c, const char *user,
950 const char *group, gid_t gid,
951 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
952 char **i;
953 int r, k = 0;
954 int ngroups_max;
955 bool keep_groups = false;
956 gid_t *groups = NULL;
957 _cleanup_free_ gid_t *l_gids = NULL;
958
959 assert(c);
960
bbeea271
DH
961 /*
962 * If user is given, then lookup GID and supplementary groups list.
963 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
964 * here and as early as possible so we keep the list of supplementary
965 * groups of the caller.
bbeea271
DH
966 */
967 if (user && gid_is_valid(gid) && gid != 0) {
968 /* First step, initialize groups from /etc/groups */
969 if (initgroups(user, gid) < 0)
970 return -errno;
971
972 keep_groups = true;
973 }
974
ac6e8be6 975 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
976 return 0;
977
366ddd25
DH
978 /*
979 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
980 * be positive, otherwise fail.
981 */
982 errno = 0;
983 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
984 if (ngroups_max <= 0) {
985 if (errno > 0)
986 return -errno;
987 else
988 return -EOPNOTSUPP; /* For all other values */
989 }
990
4d885bd3
DH
991 l_gids = new(gid_t, ngroups_max);
992 if (!l_gids)
993 return -ENOMEM;
81a2b7ce 994
4d885bd3
DH
995 if (keep_groups) {
996 /*
997 * Lookup the list of groups that the user belongs to, we
998 * avoid NSS lookups here too for gid=0.
999 */
1000 k = ngroups_max;
1001 if (getgrouplist(user, gid, l_gids, &k) < 0)
1002 return -EINVAL;
1003 } else
1004 k = 0;
81a2b7ce 1005
4d885bd3
DH
1006 STRV_FOREACH(i, c->supplementary_groups) {
1007 const char *g;
81a2b7ce 1008
4d885bd3
DH
1009 if (k >= ngroups_max)
1010 return -E2BIG;
81a2b7ce 1011
4d885bd3 1012 g = *i;
fafff8f1 1013 r = get_group_creds(&g, l_gids+k, 0);
4d885bd3
DH
1014 if (r < 0)
1015 return r;
81a2b7ce 1016
4d885bd3
DH
1017 k++;
1018 }
81a2b7ce 1019
4d885bd3
DH
1020 /*
1021 * Sets ngids to zero to drop all supplementary groups, happens
1022 * when we are under root and SupplementaryGroups= is empty.
1023 */
1024 if (k == 0) {
1025 *ngids = 0;
1026 return 0;
1027 }
81a2b7ce 1028
4d885bd3
DH
1029 /* Otherwise get the final list of supplementary groups */
1030 groups = memdup(l_gids, sizeof(gid_t) * k);
1031 if (!groups)
1032 return -ENOMEM;
1033
1034 *supplementary_gids = groups;
1035 *ngids = k;
1036
1037 groups = NULL;
1038
1039 return 0;
1040}
1041
34cf6c43 1042static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
4d885bd3
DH
1043 int r;
1044
709dbeac
YW
1045 /* Handle SupplementaryGroups= if it is not empty */
1046 if (ngids > 0) {
4d885bd3
DH
1047 r = maybe_setgroups(ngids, supplementary_gids);
1048 if (r < 0)
97f0e76f 1049 return r;
4d885bd3 1050 }
81a2b7ce 1051
4d885bd3
DH
1052 if (gid_is_valid(gid)) {
1053 /* Then set our gids */
1054 if (setresgid(gid, gid, gid) < 0)
1055 return -errno;
81a2b7ce
LP
1056 }
1057
1058 return 0;
1059}
1060
1061static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
1062 assert(context);
1063
4d885bd3
DH
1064 if (!uid_is_valid(uid))
1065 return 0;
1066
479050b3 1067 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
1068 * capabilities while doing so. */
1069
479050b3 1070 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
1071
1072 /* First step: If we need to keep capabilities but
1073 * drop privileges we need to make sure we keep our
cbb21cca 1074 * caps, while we drop privileges. */
693ced48 1075 if (uid != 0) {
cbb21cca 1076 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1077
1078 if (prctl(PR_GET_SECUREBITS) != sb)
1079 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1080 return -errno;
1081 }
81a2b7ce
LP
1082 }
1083
479050b3 1084 /* Second step: actually set the uids */
81a2b7ce
LP
1085 if (setresuid(uid, uid, uid) < 0)
1086 return -errno;
1087
1088 /* At this point we should have all necessary capabilities but
1089 are otherwise a normal user. However, the caps might got
1090 corrupted due to the setresuid() so we need clean them up
1091 later. This is done outside of this call. */
1092
1093 return 0;
1094}
1095
349cc4a5 1096#if HAVE_PAM
5b6319dc
LP
1097
1098static int null_conv(
1099 int num_msg,
1100 const struct pam_message **msg,
1101 struct pam_response **resp,
1102 void *appdata_ptr) {
1103
1104 /* We don't support conversations */
1105
1106 return PAM_CONV_ERR;
1107}
1108
cefc33ae
LP
1109#endif
1110
5b6319dc
LP
1111static int setup_pam(
1112 const char *name,
1113 const char *user,
940c5210 1114 uid_t uid,
2d6fce8d 1115 gid_t gid,
5b6319dc 1116 const char *tty,
2065ca69 1117 char ***env,
da6053d0 1118 int fds[], size_t n_fds) {
5b6319dc 1119
349cc4a5 1120#if HAVE_PAM
cefc33ae 1121
5b6319dc
LP
1122 static const struct pam_conv conv = {
1123 .conv = null_conv,
1124 .appdata_ptr = NULL
1125 };
1126
2d7c6aa2 1127 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1128 pam_handle_t *handle = NULL;
d6e5f3ad 1129 sigset_t old_ss;
7bb70b6e 1130 int pam_code = PAM_SUCCESS, r;
84eada2f 1131 char **nv, **e = NULL;
5b6319dc
LP
1132 bool close_session = false;
1133 pid_t pam_pid = 0, parent_pid;
970edce6 1134 int flags = 0;
5b6319dc
LP
1135
1136 assert(name);
1137 assert(user);
2065ca69 1138 assert(env);
5b6319dc
LP
1139
1140 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1141 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1142 * systemd via the cgroup logic. It will then remove the PAM
1143 * session again. The parent process will exec() the actual
1144 * daemon. We do things this way to ensure that the main PID
1145 * of the daemon is the one we initially fork()ed. */
1146
7bb70b6e
LP
1147 r = barrier_create(&barrier);
1148 if (r < 0)
2d7c6aa2
DH
1149 goto fail;
1150
553d2243 1151 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1152 flags |= PAM_SILENT;
1153
f546241b
ZJS
1154 pam_code = pam_start(name, user, &conv, &handle);
1155 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1156 handle = NULL;
1157 goto fail;
1158 }
1159
3cd24c1a
LP
1160 if (!tty) {
1161 _cleanup_free_ char *q = NULL;
1162
1163 /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
1164 * out if that's the case, and read the TTY off it. */
1165
1166 if (getttyname_malloc(STDIN_FILENO, &q) >= 0)
1167 tty = strjoina("/dev/", q);
1168 }
1169
f546241b
ZJS
1170 if (tty) {
1171 pam_code = pam_set_item(handle, PAM_TTY, tty);
1172 if (pam_code != PAM_SUCCESS)
5b6319dc 1173 goto fail;
f546241b 1174 }
5b6319dc 1175
84eada2f
JW
1176 STRV_FOREACH(nv, *env) {
1177 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1178 if (pam_code != PAM_SUCCESS)
1179 goto fail;
1180 }
1181
970edce6 1182 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1183 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1184 goto fail;
1185
970edce6 1186 pam_code = pam_open_session(handle, flags);
f546241b 1187 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1188 goto fail;
1189
1190 close_session = true;
1191
f546241b
ZJS
1192 e = pam_getenvlist(handle);
1193 if (!e) {
5b6319dc
LP
1194 pam_code = PAM_BUF_ERR;
1195 goto fail;
1196 }
1197
1198 /* Block SIGTERM, so that we know that it won't get lost in
1199 * the child */
ce30c8dc 1200
72c0a2c2 1201 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1202
df0ff127 1203 parent_pid = getpid_cached();
5b6319dc 1204
4c253ed1
LP
1205 r = safe_fork("(sd-pam)", 0, &pam_pid);
1206 if (r < 0)
5b6319dc 1207 goto fail;
4c253ed1 1208 if (r == 0) {
7bb70b6e 1209 int sig, ret = EXIT_PAM;
5b6319dc
LP
1210
1211 /* The child's job is to reset the PAM session on
1212 * termination */
2d7c6aa2 1213 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc 1214
4c253ed1
LP
1215 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1216 * are open here that have been opened by PAM. */
1217 (void) close_many(fds, n_fds);
5b6319dc 1218
940c5210
AK
1219 /* Drop privileges - we don't need any to pam_close_session
1220 * and this will make PR_SET_PDEATHSIG work in most cases.
1221 * If this fails, ignore the error - but expect sd-pam threads
1222 * to fail to exit normally */
2d6fce8d 1223
97f0e76f
LP
1224 r = maybe_setgroups(0, NULL);
1225 if (r < 0)
1226 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1227 if (setresgid(gid, gid, gid) < 0)
1228 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1229 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1230 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1231
ce30c8dc
LP
1232 (void) ignore_signals(SIGPIPE, -1);
1233
940c5210
AK
1234 /* Wait until our parent died. This will only work if
1235 * the above setresuid() succeeds, otherwise the kernel
1236 * will not allow unprivileged parents kill their privileged
1237 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1238 * to do the rest for us. */
1239 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1240 goto child_finish;
1241
2d7c6aa2
DH
1242 /* Tell the parent that our setup is done. This is especially
1243 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1244 * setup might race against our setresuid(2) call.
1245 *
1246 * If the parent aborted, we'll detect this below, hence ignore
1247 * return failure here. */
1248 (void) barrier_place(&barrier);
2d7c6aa2 1249
643f4706 1250 /* Check if our parent process might already have died? */
5b6319dc 1251 if (getppid() == parent_pid) {
d6e5f3ad
DM
1252 sigset_t ss;
1253
1254 assert_se(sigemptyset(&ss) >= 0);
1255 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1256
3dead8d9
LP
1257 for (;;) {
1258 if (sigwait(&ss, &sig) < 0) {
1259 if (errno == EINTR)
1260 continue;
1261
1262 goto child_finish;
1263 }
5b6319dc 1264
3dead8d9
LP
1265 assert(sig == SIGTERM);
1266 break;
1267 }
5b6319dc
LP
1268 }
1269
3dead8d9 1270 /* If our parent died we'll end the session */
f546241b 1271 if (getppid() != parent_pid) {
970edce6 1272 pam_code = pam_close_session(handle, flags);
f546241b 1273 if (pam_code != PAM_SUCCESS)
5b6319dc 1274 goto child_finish;
f546241b 1275 }
5b6319dc 1276
7bb70b6e 1277 ret = 0;
5b6319dc
LP
1278
1279 child_finish:
970edce6 1280 pam_end(handle, pam_code | flags);
7bb70b6e 1281 _exit(ret);
5b6319dc
LP
1282 }
1283
2d7c6aa2
DH
1284 barrier_set_role(&barrier, BARRIER_PARENT);
1285
5b6319dc
LP
1286 /* If the child was forked off successfully it will do all the
1287 * cleanups, so forget about the handle here. */
1288 handle = NULL;
1289
3b8bddde 1290 /* Unblock SIGTERM again in the parent */
72c0a2c2 1291 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1292
1293 /* We close the log explicitly here, since the PAM modules
1294 * might have opened it, but we don't want this fd around. */
1295 closelog();
1296
2d7c6aa2
DH
1297 /* Synchronously wait for the child to initialize. We don't care for
1298 * errors as we cannot recover. However, warn loudly if it happens. */
1299 if (!barrier_place_and_sync(&barrier))
1300 log_error("PAM initialization failed");
1301
130d3d22 1302 return strv_free_and_replace(*env, e);
5b6319dc
LP
1303
1304fail:
970edce6
ZJS
1305 if (pam_code != PAM_SUCCESS) {
1306 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1307 r = -EPERM; /* PAM errors do not map to errno */
1308 } else
1309 log_error_errno(r, "PAM failed: %m");
9ba35398 1310
5b6319dc
LP
1311 if (handle) {
1312 if (close_session)
970edce6 1313 pam_code = pam_close_session(handle, flags);
5b6319dc 1314
970edce6 1315 pam_end(handle, pam_code | flags);
5b6319dc
LP
1316 }
1317
1318 strv_free(e);
5b6319dc
LP
1319 closelog();
1320
7bb70b6e 1321 return r;
cefc33ae
LP
1322#else
1323 return 0;
5b6319dc 1324#endif
cefc33ae 1325}
5b6319dc 1326
5d6b1584
LP
1327static void rename_process_from_path(const char *path) {
1328 char process_name[11];
1329 const char *p;
1330 size_t l;
1331
1332 /* This resulting string must fit in 10 chars (i.e. the length
1333 * of "/sbin/init") to look pretty in /bin/ps */
1334
2b6bf07d 1335 p = basename(path);
5d6b1584
LP
1336 if (isempty(p)) {
1337 rename_process("(...)");
1338 return;
1339 }
1340
1341 l = strlen(p);
1342 if (l > 8) {
1343 /* The end of the process name is usually more
1344 * interesting, since the first bit might just be
1345 * "systemd-" */
1346 p = p + l - 8;
1347 l = 8;
1348 }
1349
1350 process_name[0] = '(';
1351 memcpy(process_name+1, p, l);
1352 process_name[1+l] = ')';
1353 process_name[1+l+1] = 0;
1354
1355 rename_process(process_name);
1356}
1357
469830d1
LP
1358static bool context_has_address_families(const ExecContext *c) {
1359 assert(c);
1360
1361 return c->address_families_whitelist ||
1362 !set_isempty(c->address_families);
1363}
1364
1365static bool context_has_syscall_filters(const ExecContext *c) {
1366 assert(c);
1367
1368 return c->syscall_whitelist ||
8cfa775f 1369 !hashmap_isempty(c->syscall_filter);
469830d1
LP
1370}
1371
1372static bool context_has_no_new_privileges(const ExecContext *c) {
1373 assert(c);
1374
1375 if (c->no_new_privileges)
1376 return true;
1377
1378 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1379 return false;
1380
1381 /* We need NNP if we have any form of seccomp and are unprivileged */
1382 return context_has_address_families(c) ||
1383 c->memory_deny_write_execute ||
1384 c->restrict_realtime ||
1385 exec_context_restrict_namespaces_set(c) ||
1386 c->protect_kernel_tunables ||
1387 c->protect_kernel_modules ||
1388 c->private_devices ||
1389 context_has_syscall_filters(c) ||
78e864e5
TM
1390 !set_isempty(c->syscall_archs) ||
1391 c->lock_personality;
469830d1
LP
1392}
1393
349cc4a5 1394#if HAVE_SECCOMP
17df7223 1395
83f12b27 1396static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1397
1398 if (is_seccomp_available())
1399 return false;
1400
f673b62d 1401 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1402 return true;
83f12b27
FS
1403}
1404
165a31c0 1405static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1406 uint32_t negative_action, default_action, action;
165a31c0 1407 int r;
8351ceae 1408
469830d1 1409 assert(u);
c0467cf3 1410 assert(c);
8351ceae 1411
469830d1 1412 if (!context_has_syscall_filters(c))
83f12b27
FS
1413 return 0;
1414
469830d1
LP
1415 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1416 return 0;
e9642be2 1417
469830d1 1418 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1419
469830d1
LP
1420 if (c->syscall_whitelist) {
1421 default_action = negative_action;
1422 action = SCMP_ACT_ALLOW;
7c66bae2 1423 } else {
469830d1
LP
1424 default_action = SCMP_ACT_ALLOW;
1425 action = negative_action;
57183d11 1426 }
8351ceae 1427
165a31c0
LP
1428 if (needs_ambient_hack) {
1429 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1430 if (r < 0)
1431 return r;
1432 }
1433
b54f36c6 1434 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
4298d0b5
LP
1435}
1436
469830d1
LP
1437static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1438 assert(u);
4298d0b5
LP
1439 assert(c);
1440
469830d1 1441 if (set_isempty(c->syscall_archs))
83f12b27
FS
1442 return 0;
1443
469830d1
LP
1444 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1445 return 0;
4298d0b5 1446
469830d1
LP
1447 return seccomp_restrict_archs(c->syscall_archs);
1448}
4298d0b5 1449
469830d1
LP
1450static int apply_address_families(const Unit* u, const ExecContext *c) {
1451 assert(u);
1452 assert(c);
4298d0b5 1453
469830d1
LP
1454 if (!context_has_address_families(c))
1455 return 0;
4298d0b5 1456
469830d1
LP
1457 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1458 return 0;
4298d0b5 1459
469830d1 1460 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1461}
4298d0b5 1462
83f12b27 1463static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1464 assert(u);
f3e43635
TM
1465 assert(c);
1466
469830d1 1467 if (!c->memory_deny_write_execute)
83f12b27
FS
1468 return 0;
1469
469830d1
LP
1470 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1471 return 0;
f3e43635 1472
469830d1 1473 return seccomp_memory_deny_write_execute();
f3e43635
TM
1474}
1475
83f12b27 1476static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1477 assert(u);
f4170c67
LP
1478 assert(c);
1479
469830d1 1480 if (!c->restrict_realtime)
83f12b27
FS
1481 return 0;
1482
469830d1
LP
1483 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1484 return 0;
f4170c67 1485
469830d1 1486 return seccomp_restrict_realtime();
f4170c67
LP
1487}
1488
59e856c7 1489static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1490 assert(u);
59eeb84b
LP
1491 assert(c);
1492
1493 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1494 * let's protect even those systems where this is left on in the kernel. */
1495
469830d1 1496 if (!c->protect_kernel_tunables)
59eeb84b
LP
1497 return 0;
1498
469830d1
LP
1499 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1500 return 0;
59eeb84b 1501
469830d1 1502 return seccomp_protect_sysctl();
59eeb84b
LP
1503}
1504
59e856c7 1505static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1506 assert(u);
502d704e
DH
1507 assert(c);
1508
25a8d8a0 1509 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1510
469830d1
LP
1511 if (!c->protect_kernel_modules)
1512 return 0;
1513
502d704e
DH
1514 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1515 return 0;
1516
b54f36c6 1517 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false);
502d704e
DH
1518}
1519
59e856c7 1520static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1521 assert(u);
ba128bb8
LP
1522 assert(c);
1523
8f81a5f6 1524 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1525
469830d1
LP
1526 if (!c->private_devices)
1527 return 0;
1528
ba128bb8
LP
1529 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1530 return 0;
1531
b54f36c6 1532 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false);
ba128bb8
LP
1533}
1534
34cf6c43 1535static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
469830d1 1536 assert(u);
add00535
LP
1537 assert(c);
1538
1539 if (!exec_context_restrict_namespaces_set(c))
1540 return 0;
1541
1542 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1543 return 0;
1544
1545 return seccomp_restrict_namespaces(c->restrict_namespaces);
1546}
1547
78e864e5 1548static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1549 unsigned long personality;
1550 int r;
78e864e5
TM
1551
1552 assert(u);
1553 assert(c);
1554
1555 if (!c->lock_personality)
1556 return 0;
1557
1558 if (skip_seccomp_unavailable(u, "LockPersonality="))
1559 return 0;
1560
e8132d63
LP
1561 personality = c->personality;
1562
1563 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1564 if (personality == PERSONALITY_INVALID) {
1565
1566 r = opinionated_personality(&personality);
1567 if (r < 0)
1568 return r;
1569 }
78e864e5
TM
1570
1571 return seccomp_lock_personality(personality);
1572}
1573
c0467cf3 1574#endif
8351ceae 1575
31a7eb86
ZJS
1576static void do_idle_pipe_dance(int idle_pipe[4]) {
1577 assert(idle_pipe);
1578
54eb2300
LP
1579 idle_pipe[1] = safe_close(idle_pipe[1]);
1580 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1581
1582 if (idle_pipe[0] >= 0) {
1583 int r;
1584
1585 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1586
1587 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1588 ssize_t n;
1589
31a7eb86 1590 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1591 n = write(idle_pipe[3], "x", 1);
1592 if (n > 0)
cd972d69
ZJS
1593 /* Wait for systemd to react to the signal above. */
1594 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1595 }
1596
54eb2300 1597 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1598
1599 }
1600
54eb2300 1601 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1602}
1603
fb2042dd
YW
1604static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
1605
7cae38c4 1606static int build_environment(
34cf6c43 1607 const Unit *u,
9fa95f85 1608 const ExecContext *c,
1e22b5cd 1609 const ExecParameters *p,
da6053d0 1610 size_t n_fds,
7cae38c4
LP
1611 const char *home,
1612 const char *username,
1613 const char *shell,
7bce046b
LP
1614 dev_t journal_stream_dev,
1615 ino_t journal_stream_ino,
7cae38c4
LP
1616 char ***ret) {
1617
1618 _cleanup_strv_free_ char **our_env = NULL;
fb2042dd 1619 ExecDirectoryType t;
da6053d0 1620 size_t n_env = 0;
7cae38c4
LP
1621 char *x;
1622
4b58153d 1623 assert(u);
7cae38c4 1624 assert(c);
7c1cb6f1 1625 assert(p);
7cae38c4
LP
1626 assert(ret);
1627
fb2042dd 1628 our_env = new0(char*, 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4
LP
1629 if (!our_env)
1630 return -ENOMEM;
1631
1632 if (n_fds > 0) {
8dd4c05b
LP
1633 _cleanup_free_ char *joined = NULL;
1634
df0ff127 1635 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1636 return -ENOMEM;
1637 our_env[n_env++] = x;
1638
da6053d0 1639 if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
7cae38c4
LP
1640 return -ENOMEM;
1641 our_env[n_env++] = x;
8dd4c05b 1642
1e22b5cd 1643 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1644 if (!joined)
1645 return -ENOMEM;
1646
605405c6 1647 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1648 if (!x)
1649 return -ENOMEM;
1650 our_env[n_env++] = x;
7cae38c4
LP
1651 }
1652
b08af3b1 1653 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1654 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1655 return -ENOMEM;
1656 our_env[n_env++] = x;
1657
1e22b5cd 1658 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1659 return -ENOMEM;
1660 our_env[n_env++] = x;
1661 }
1662
fd63e712
LP
1663 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1664 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1665 * check the database directly. */
ac647978 1666 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1667 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1668 if (!x)
1669 return -ENOMEM;
1670 our_env[n_env++] = x;
1671 }
1672
7cae38c4
LP
1673 if (home) {
1674 x = strappend("HOME=", home);
1675 if (!x)
1676 return -ENOMEM;
1677 our_env[n_env++] = x;
1678 }
1679
1680 if (username) {
1681 x = strappend("LOGNAME=", username);
1682 if (!x)
1683 return -ENOMEM;
1684 our_env[n_env++] = x;
1685
1686 x = strappend("USER=", username);
1687 if (!x)
1688 return -ENOMEM;
1689 our_env[n_env++] = x;
1690 }
1691
1692 if (shell) {
1693 x = strappend("SHELL=", shell);
1694 if (!x)
1695 return -ENOMEM;
1696 our_env[n_env++] = x;
1697 }
1698
4b58153d
LP
1699 if (!sd_id128_is_null(u->invocation_id)) {
1700 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1701 return -ENOMEM;
1702
1703 our_env[n_env++] = x;
1704 }
1705
6af760f3
LP
1706 if (exec_context_needs_term(c)) {
1707 const char *tty_path, *term = NULL;
1708
1709 tty_path = exec_context_tty_path(c);
1710
1711 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1712 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1713 * passes to PID 1 ends up all the way in the console login shown. */
1714
1715 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1716 term = getenv("TERM");
1717 if (!term)
1718 term = default_term_for_tty(tty_path);
7cae38c4 1719
6af760f3 1720 x = strappend("TERM=", term);
7cae38c4
LP
1721 if (!x)
1722 return -ENOMEM;
1723 our_env[n_env++] = x;
1724 }
1725
7bce046b
LP
1726 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1727 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1728 return -ENOMEM;
1729
1730 our_env[n_env++] = x;
1731 }
1732
fb2042dd
YW
1733 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1734 _cleanup_free_ char *pre = NULL, *joined = NULL;
1735 const char *n;
1736
1737 if (!p->prefix[t])
1738 continue;
1739
1740 if (strv_isempty(c->directories[t].paths))
1741 continue;
1742
1743 n = exec_directory_env_name_to_string(t);
1744 if (!n)
1745 continue;
1746
1747 pre = strjoin(p->prefix[t], "/");
1748 if (!pre)
1749 return -ENOMEM;
1750
1751 joined = strv_join_prefix(c->directories[t].paths, ":", pre);
1752 if (!joined)
1753 return -ENOMEM;
1754
1755 x = strjoin(n, "=", joined);
1756 if (!x)
1757 return -ENOMEM;
1758
1759 our_env[n_env++] = x;
1760 }
1761
7cae38c4 1762 our_env[n_env++] = NULL;
fb2042dd 1763 assert(n_env <= 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4 1764
ae2a15bc 1765 *ret = TAKE_PTR(our_env);
7cae38c4
LP
1766
1767 return 0;
1768}
1769
b4c14404
FB
1770static int build_pass_environment(const ExecContext *c, char ***ret) {
1771 _cleanup_strv_free_ char **pass_env = NULL;
1772 size_t n_env = 0, n_bufsize = 0;
1773 char **i;
1774
1775 STRV_FOREACH(i, c->pass_environment) {
1776 _cleanup_free_ char *x = NULL;
1777 char *v;
1778
1779 v = getenv(*i);
1780 if (!v)
1781 continue;
605405c6 1782 x = strjoin(*i, "=", v);
b4c14404
FB
1783 if (!x)
1784 return -ENOMEM;
00819cc1 1785
b4c14404
FB
1786 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1787 return -ENOMEM;
00819cc1 1788
1cc6c93a 1789 pass_env[n_env++] = TAKE_PTR(x);
b4c14404 1790 pass_env[n_env] = NULL;
b4c14404
FB
1791 }
1792
ae2a15bc 1793 *ret = TAKE_PTR(pass_env);
b4c14404
FB
1794
1795 return 0;
1796}
1797
8b44a3d2
LP
1798static bool exec_needs_mount_namespace(
1799 const ExecContext *context,
1800 const ExecParameters *params,
4657abb5 1801 const ExecRuntime *runtime) {
8b44a3d2
LP
1802
1803 assert(context);
1804 assert(params);
1805
915e6d16
LP
1806 if (context->root_image)
1807 return true;
1808
2a624c36
AP
1809 if (!strv_isempty(context->read_write_paths) ||
1810 !strv_isempty(context->read_only_paths) ||
1811 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1812 return true;
1813
42b1d8e0 1814 if (context->n_bind_mounts > 0)
d2d6c096
LP
1815 return true;
1816
2abd4e38
YW
1817 if (context->n_temporary_filesystems > 0)
1818 return true;
1819
8b44a3d2
LP
1820 if (context->mount_flags != 0)
1821 return true;
1822
1823 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1824 return true;
1825
8b44a3d2 1826 if (context->private_devices ||
228af36f 1827 context->private_mounts ||
8b44a3d2 1828 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1829 context->protect_home != PROTECT_HOME_NO ||
1830 context->protect_kernel_tunables ||
c575770b 1831 context->protect_kernel_modules ||
59eeb84b 1832 context->protect_control_groups)
8b44a3d2
LP
1833 return true;
1834
37c56f89
YW
1835 if (context->root_directory) {
1836 ExecDirectoryType t;
1837
1838 if (context->mount_apivfs)
1839 return true;
1840
1841 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1842 if (!params->prefix[t])
1843 continue;
1844
1845 if (!strv_isempty(context->directories[t].paths))
1846 return true;
1847 }
1848 }
5d997827 1849
42b1d8e0 1850 if (context->dynamic_user &&
b43ee82f 1851 (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
42b1d8e0
YW
1852 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1853 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1854 return true;
1855
8b44a3d2
LP
1856 return false;
1857}
1858
d251207d
LP
1859static int setup_private_users(uid_t uid, gid_t gid) {
1860 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1861 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1862 _cleanup_close_ int unshare_ready_fd = -1;
1863 _cleanup_(sigkill_waitp) pid_t pid = 0;
1864 uint64_t c = 1;
d251207d
LP
1865 ssize_t n;
1866 int r;
1867
1868 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1869 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1870 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1871 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1872 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1873 * continues execution normally. */
1874
587ab01b
ZJS
1875 if (uid != 0 && uid_is_valid(uid)) {
1876 r = asprintf(&uid_map,
1877 "0 0 1\n" /* Map root → root */
1878 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1879 uid, uid);
1880 if (r < 0)
1881 return -ENOMEM;
1882 } else {
e0f3720e 1883 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1884 if (!uid_map)
1885 return -ENOMEM;
1886 }
d251207d 1887
587ab01b
ZJS
1888 if (gid != 0 && gid_is_valid(gid)) {
1889 r = asprintf(&gid_map,
1890 "0 0 1\n" /* Map root → root */
1891 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1892 gid, gid);
1893 if (r < 0)
1894 return -ENOMEM;
1895 } else {
d251207d 1896 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1897 if (!gid_map)
1898 return -ENOMEM;
1899 }
d251207d
LP
1900
1901 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1902 * namespace. */
1903 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1904 if (unshare_ready_fd < 0)
1905 return -errno;
1906
1907 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1908 * failed. */
1909 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1910 return -errno;
1911
4c253ed1
LP
1912 r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
1913 if (r < 0)
1914 return r;
1915 if (r == 0) {
d251207d
LP
1916 _cleanup_close_ int fd = -1;
1917 const char *a;
1918 pid_t ppid;
1919
1920 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1921 * here, after the parent opened its own user namespace. */
1922
1923 ppid = getppid();
1924 errno_pipe[0] = safe_close(errno_pipe[0]);
1925
1926 /* Wait until the parent unshared the user namespace */
1927 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1928 r = -errno;
1929 goto child_fail;
1930 }
1931
1932 /* Disable the setgroups() system call in the child user namespace, for good. */
1933 a = procfs_file_alloca(ppid, "setgroups");
1934 fd = open(a, O_WRONLY|O_CLOEXEC);
1935 if (fd < 0) {
1936 if (errno != ENOENT) {
1937 r = -errno;
1938 goto child_fail;
1939 }
1940
1941 /* If the file is missing the kernel is too old, let's continue anyway. */
1942 } else {
1943 if (write(fd, "deny\n", 5) < 0) {
1944 r = -errno;
1945 goto child_fail;
1946 }
1947
1948 fd = safe_close(fd);
1949 }
1950
1951 /* First write the GID map */
1952 a = procfs_file_alloca(ppid, "gid_map");
1953 fd = open(a, O_WRONLY|O_CLOEXEC);
1954 if (fd < 0) {
1955 r = -errno;
1956 goto child_fail;
1957 }
1958 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1959 r = -errno;
1960 goto child_fail;
1961 }
1962 fd = safe_close(fd);
1963
1964 /* The write the UID map */
1965 a = procfs_file_alloca(ppid, "uid_map");
1966 fd = open(a, O_WRONLY|O_CLOEXEC);
1967 if (fd < 0) {
1968 r = -errno;
1969 goto child_fail;
1970 }
1971 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1972 r = -errno;
1973 goto child_fail;
1974 }
1975
1976 _exit(EXIT_SUCCESS);
1977
1978 child_fail:
1979 (void) write(errno_pipe[1], &r, sizeof(r));
1980 _exit(EXIT_FAILURE);
1981 }
1982
1983 errno_pipe[1] = safe_close(errno_pipe[1]);
1984
1985 if (unshare(CLONE_NEWUSER) < 0)
1986 return -errno;
1987
1988 /* Let the child know that the namespace is ready now */
1989 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1990 return -errno;
1991
1992 /* Try to read an error code from the child */
1993 n = read(errno_pipe[0], &r, sizeof(r));
1994 if (n < 0)
1995 return -errno;
1996 if (n == sizeof(r)) { /* an error code was sent to us */
1997 if (r < 0)
1998 return r;
1999 return -EIO;
2000 }
2001 if (n != 0) /* on success we should have read 0 bytes */
2002 return -EIO;
2003
2e87a1fd
LP
2004 r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
2005 pid = 0;
d251207d
LP
2006 if (r < 0)
2007 return r;
2e87a1fd 2008 if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
d251207d
LP
2009 return -EIO;
2010
2011 return 0;
2012}
2013
3536f49e 2014static int setup_exec_directory(
07689d5d
LP
2015 const ExecContext *context,
2016 const ExecParameters *params,
2017 uid_t uid,
3536f49e 2018 gid_t gid,
3536f49e
YW
2019 ExecDirectoryType type,
2020 int *exit_status) {
07689d5d 2021
72fd1768 2022 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
2023 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
2024 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
2025 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
2026 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
2027 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
2028 };
07689d5d
LP
2029 char **rt;
2030 int r;
2031
2032 assert(context);
2033 assert(params);
72fd1768 2034 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 2035 assert(exit_status);
07689d5d 2036
3536f49e
YW
2037 if (!params->prefix[type])
2038 return 0;
2039
8679efde 2040 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
2041 if (!uid_is_valid(uid))
2042 uid = 0;
2043 if (!gid_is_valid(gid))
2044 gid = 0;
2045 }
2046
2047 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d 2048 _cleanup_free_ char *p = NULL, *pp = NULL;
07689d5d 2049
3536f49e
YW
2050 p = strjoin(params->prefix[type], "/", *rt);
2051 if (!p) {
2052 r = -ENOMEM;
2053 goto fail;
2054 }
07689d5d 2055
23a7448e
YW
2056 r = mkdir_parents_label(p, 0755);
2057 if (r < 0)
3536f49e 2058 goto fail;
23a7448e 2059
8092a48c
YW
2060 if (context->dynamic_user &&
2061 !IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
6c9c51e5 2062 _cleanup_free_ char *private_root = NULL;
6c47cd7d
LP
2063
2064 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
2065 * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
2066 * whose UID is later on reused. To lock this down we use the same trick used by container
2067 * managers to prohibit host users to get access to files of the same UID in containers: we
2068 * place everything inside a directory that has an access mode of 0700 and is owned root:root,
2069 * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
2070 * to make this directory permeable for the service itself.
2071 *
2072 * Specifically: for a service which wants a special directory "foo/" we first create a
2073 * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
2074 * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
2075 * privileged host users can access "foo/" as usual, but unprivileged host users can't look
2076 * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
2077 * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
2078 * disabling the access boundary for the service and making sure it only gets access to the
2079 * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
2080 *
2081 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
8092a48c
YW
2082 * owned by the service itself.
2083 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
2084 * files or sockets with other services. */
6c47cd7d
LP
2085
2086 private_root = strjoin(params->prefix[type], "/private");
2087 if (!private_root) {
2088 r = -ENOMEM;
2089 goto fail;
2090 }
2091
2092 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
37c1d5e9 2093 r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
6c47cd7d
LP
2094 if (r < 0)
2095 goto fail;
2096
2097 pp = strjoin(private_root, "/", *rt);
2098 if (!pp) {
2099 r = -ENOMEM;
2100 goto fail;
2101 }
2102
2103 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2104 r = mkdir_parents_label(pp, 0755);
2105 if (r < 0)
2106 goto fail;
2107
949befd3
LP
2108 if (is_dir(p, false) > 0 &&
2109 (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
2110
2111 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2112 * it over. Most likely the service has been upgraded from one that didn't use
2113 * DynamicUser=1, to one that does. */
2114
2115 if (rename(p, pp) < 0) {
2116 r = -errno;
2117 goto fail;
2118 }
2119 } else {
2120 /* Otherwise, create the actual directory for the service */
2121
2122 r = mkdir_label(pp, context->directories[type].mode);
2123 if (r < 0 && r != -EEXIST)
2124 goto fail;
2125 }
6c47cd7d 2126
6c47cd7d 2127 /* And link it up from the original place */
6c9c51e5 2128 r = symlink_idempotent(pp, p, true);
6c47cd7d
LP
2129 if (r < 0)
2130 goto fail;
2131
30c81ce2
ZJS
2132 /* Lock down the access mode */
2133 if (chmod(pp, context->directories[type].mode) < 0) {
2134 r = -errno;
2135 goto fail;
2136 }
6c47cd7d
LP
2137 } else {
2138 r = mkdir_label(p, context->directories[type].mode);
fdff1da2 2139 if (r < 0 && r != -EEXIST)
6c47cd7d 2140 goto fail;
fdff1da2
YW
2141 if (r == -EEXIST && !context->dynamic_user)
2142 continue;
a1164ae3 2143 }
07689d5d 2144
c71b2eb7
LP
2145 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
2146 * a service, and shall not be writable. */
2147 if (type == EXEC_DIRECTORY_CONFIGURATION)
2148 continue;
2149
a1164ae3 2150 /* Then, change the ownership of the whole tree, if necessary */
30c81ce2 2151 r = path_chown_recursive(pp ?: p, uid, gid);
07689d5d 2152 if (r < 0)
3536f49e 2153 goto fail;
07689d5d
LP
2154 }
2155
2156 return 0;
3536f49e
YW
2157
2158fail:
2159 *exit_status = exit_status_table[type];
3536f49e 2160 return r;
07689d5d
LP
2161}
2162
92b423b9 2163#if ENABLE_SMACK
cefc33ae
LP
2164static int setup_smack(
2165 const ExecContext *context,
2166 const ExecCommand *command) {
2167
cefc33ae
LP
2168 int r;
2169
2170 assert(context);
2171 assert(command);
2172
cefc33ae
LP
2173 if (context->smack_process_label) {
2174 r = mac_smack_apply_pid(0, context->smack_process_label);
2175 if (r < 0)
2176 return r;
2177 }
2178#ifdef SMACK_DEFAULT_PROCESS_LABEL
2179 else {
2180 _cleanup_free_ char *exec_label = NULL;
2181
2182 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2183 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2184 return r;
2185
2186 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2187 if (r < 0)
2188 return r;
2189 }
cefc33ae
LP
2190#endif
2191
2192 return 0;
2193}
92b423b9 2194#endif
cefc33ae 2195
6c47cd7d
LP
2196static int compile_bind_mounts(
2197 const ExecContext *context,
2198 const ExecParameters *params,
2199 BindMount **ret_bind_mounts,
da6053d0 2200 size_t *ret_n_bind_mounts,
6c47cd7d
LP
2201 char ***ret_empty_directories) {
2202
2203 _cleanup_strv_free_ char **empty_directories = NULL;
2204 BindMount *bind_mounts;
da6053d0 2205 size_t n, h = 0, i;
6c47cd7d
LP
2206 ExecDirectoryType t;
2207 int r;
2208
2209 assert(context);
2210 assert(params);
2211 assert(ret_bind_mounts);
2212 assert(ret_n_bind_mounts);
2213 assert(ret_empty_directories);
2214
2215 n = context->n_bind_mounts;
2216 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2217 if (!params->prefix[t])
2218 continue;
2219
2220 n += strv_length(context->directories[t].paths);
2221 }
2222
2223 if (n <= 0) {
2224 *ret_bind_mounts = NULL;
2225 *ret_n_bind_mounts = 0;
2226 *ret_empty_directories = NULL;
2227 return 0;
2228 }
2229
2230 bind_mounts = new(BindMount, n);
2231 if (!bind_mounts)
2232 return -ENOMEM;
2233
a8cabc61 2234 for (i = 0; i < context->n_bind_mounts; i++) {
6c47cd7d
LP
2235 BindMount *item = context->bind_mounts + i;
2236 char *s, *d;
2237
2238 s = strdup(item->source);
2239 if (!s) {
2240 r = -ENOMEM;
2241 goto finish;
2242 }
2243
2244 d = strdup(item->destination);
2245 if (!d) {
2246 free(s);
2247 r = -ENOMEM;
2248 goto finish;
2249 }
2250
2251 bind_mounts[h++] = (BindMount) {
2252 .source = s,
2253 .destination = d,
2254 .read_only = item->read_only,
2255 .recursive = item->recursive,
2256 .ignore_enoent = item->ignore_enoent,
2257 };
2258 }
2259
2260 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2261 char **suffix;
2262
2263 if (!params->prefix[t])
2264 continue;
2265
2266 if (strv_isempty(context->directories[t].paths))
2267 continue;
2268
8092a48c 2269 if (context->dynamic_user &&
5609f688
YW
2270 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2271 !(context->root_directory || context->root_image)) {
6c47cd7d
LP
2272 char *private_root;
2273
2274 /* So this is for a dynamic user, and we need to make sure the process can access its own
2275 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2276 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2277
2278 private_root = strjoin(params->prefix[t], "/private");
2279 if (!private_root) {
2280 r = -ENOMEM;
2281 goto finish;
2282 }
2283
2284 r = strv_consume(&empty_directories, private_root);
a635a7ae 2285 if (r < 0)
6c47cd7d 2286 goto finish;
6c47cd7d
LP
2287 }
2288
2289 STRV_FOREACH(suffix, context->directories[t].paths) {
2290 char *s, *d;
2291
8092a48c
YW
2292 if (context->dynamic_user &&
2293 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
6c47cd7d
LP
2294 s = strjoin(params->prefix[t], "/private/", *suffix);
2295 else
2296 s = strjoin(params->prefix[t], "/", *suffix);
2297 if (!s) {
2298 r = -ENOMEM;
2299 goto finish;
2300 }
2301
5609f688
YW
2302 if (context->dynamic_user &&
2303 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2304 (context->root_directory || context->root_image))
2305 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2306 * directory is not created on the root directory. So, let's bind-mount the directory
2307 * on the 'non-private' place. */
2308 d = strjoin(params->prefix[t], "/", *suffix);
2309 else
2310 d = strdup(s);
6c47cd7d
LP
2311 if (!d) {
2312 free(s);
2313 r = -ENOMEM;
2314 goto finish;
2315 }
2316
2317 bind_mounts[h++] = (BindMount) {
2318 .source = s,
2319 .destination = d,
2320 .read_only = false,
2321 .recursive = true,
2322 .ignore_enoent = false,
2323 };
2324 }
2325 }
2326
2327 assert(h == n);
2328
2329 *ret_bind_mounts = bind_mounts;
2330 *ret_n_bind_mounts = n;
ae2a15bc 2331 *ret_empty_directories = TAKE_PTR(empty_directories);
6c47cd7d
LP
2332
2333 return (int) n;
2334
2335finish:
2336 bind_mount_free_many(bind_mounts, h);
2337 return r;
2338}
2339
6818c54c 2340static int apply_mount_namespace(
34cf6c43
YW
2341 const Unit *u,
2342 const ExecCommand *command,
6818c54c
LP
2343 const ExecContext *context,
2344 const ExecParameters *params,
34cf6c43 2345 const ExecRuntime *runtime) {
6818c54c 2346
7bcef4ef 2347 _cleanup_strv_free_ char **empty_directories = NULL;
93c6bb51 2348 char *tmp = NULL, *var = NULL;
915e6d16 2349 const char *root_dir = NULL, *root_image = NULL;
228af36f 2350 NamespaceInfo ns_info;
165a31c0 2351 bool needs_sandboxing;
6c47cd7d 2352 BindMount *bind_mounts = NULL;
da6053d0 2353 size_t n_bind_mounts = 0;
6818c54c 2354 int r;
93c6bb51 2355
2b3c1b9e
DH
2356 assert(context);
2357
93c6bb51
DH
2358 /* The runtime struct only contains the parent of the private /tmp,
2359 * which is non-accessible to world users. Inside of it there's a /tmp
2360 * that is sticky, and that's the one we want to use here. */
2361
2362 if (context->private_tmp && runtime) {
2363 if (runtime->tmp_dir)
2364 tmp = strjoina(runtime->tmp_dir, "/tmp");
2365 if (runtime->var_tmp_dir)
2366 var = strjoina(runtime->var_tmp_dir, "/tmp");
2367 }
2368
915e6d16
LP
2369 if (params->flags & EXEC_APPLY_CHROOT) {
2370 root_image = context->root_image;
2371
2372 if (!root_image)
2373 root_dir = context->root_directory;
2374 }
93c6bb51 2375
6c47cd7d
LP
2376 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2377 if (r < 0)
2378 return r;
2379
165a31c0 2380 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
b5a33299
YW
2381 if (needs_sandboxing)
2382 ns_info = (NamespaceInfo) {
2383 .ignore_protect_paths = false,
2384 .private_dev = context->private_devices,
2385 .protect_control_groups = context->protect_control_groups,
2386 .protect_kernel_tunables = context->protect_kernel_tunables,
2387 .protect_kernel_modules = context->protect_kernel_modules,
2388 .mount_apivfs = context->mount_apivfs,
228af36f 2389 .private_mounts = context->private_mounts,
b5a33299 2390 };
228af36f
LP
2391 else if (!context->dynamic_user && root_dir)
2392 /*
2393 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2394 * sandbox info, otherwise enforce it, don't ignore protected paths and
2395 * fail if we are enable to apply the sandbox inside the mount namespace.
2396 */
2397 ns_info = (NamespaceInfo) {
2398 .ignore_protect_paths = true,
2399 };
2400 else
2401 ns_info = (NamespaceInfo) {};
b5a33299 2402
915e6d16 2403 r = setup_namespace(root_dir, root_image,
7bcef4ef 2404 &ns_info, context->read_write_paths,
165a31c0
LP
2405 needs_sandboxing ? context->read_only_paths : NULL,
2406 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2407 empty_directories,
2408 bind_mounts,
2409 n_bind_mounts,
2abd4e38
YW
2410 context->temporary_filesystems,
2411 context->n_temporary_filesystems,
93c6bb51
DH
2412 tmp,
2413 var,
165a31c0
LP
2414 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2415 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16
LP
2416 context->mount_flags,
2417 DISSECT_IMAGE_DISCARD_ON_LOOP);
93c6bb51 2418
6c47cd7d
LP
2419 bind_mount_free_many(bind_mounts, n_bind_mounts);
2420
1beab8b0
LP
2421 /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
2422 * that with a special, recognizable error ENOANO. In this case, silently proceeed, but only if exclusively
2423 * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
2424 * completely different execution environment. */
aca835ed
YW
2425 if (r == -ENOANO) {
2426 if (n_bind_mounts == 0 &&
2427 context->n_temporary_filesystems == 0 &&
2428 !root_dir && !root_image &&
2429 !context->dynamic_user) {
2430 log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
2431 return 0;
2432 }
2433
2194547e
LP
2434 log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
2435 "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
2436 n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
2437
aca835ed 2438 return -EOPNOTSUPP;
93c6bb51
DH
2439 }
2440
2441 return r;
2442}
2443
915e6d16
LP
2444static int apply_working_directory(
2445 const ExecContext *context,
2446 const ExecParameters *params,
2447 const char *home,
376fecf6
LP
2448 const bool needs_mount_ns,
2449 int *exit_status) {
915e6d16 2450
6732edab 2451 const char *d, *wd;
2b3c1b9e
DH
2452
2453 assert(context);
376fecf6 2454 assert(exit_status);
2b3c1b9e 2455
6732edab
LP
2456 if (context->working_directory_home) {
2457
376fecf6
LP
2458 if (!home) {
2459 *exit_status = EXIT_CHDIR;
6732edab 2460 return -ENXIO;
376fecf6 2461 }
6732edab 2462
2b3c1b9e 2463 wd = home;
6732edab
LP
2464
2465 } else if (context->working_directory)
2b3c1b9e
DH
2466 wd = context->working_directory;
2467 else
2468 wd = "/";
e7f1e7c6
DH
2469
2470 if (params->flags & EXEC_APPLY_CHROOT) {
2471 if (!needs_mount_ns && context->root_directory)
376fecf6
LP
2472 if (chroot(context->root_directory) < 0) {
2473 *exit_status = EXIT_CHROOT;
e7f1e7c6 2474 return -errno;
376fecf6 2475 }
e7f1e7c6 2476
2b3c1b9e
DH
2477 d = wd;
2478 } else
3b0e5bb5 2479 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2480
376fecf6
LP
2481 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2482 *exit_status = EXIT_CHDIR;
2b3c1b9e 2483 return -errno;
376fecf6 2484 }
e7f1e7c6
DH
2485
2486 return 0;
2487}
2488
b1edf445 2489static int setup_keyring(
34cf6c43 2490 const Unit *u,
b1edf445
LP
2491 const ExecContext *context,
2492 const ExecParameters *p,
2493 uid_t uid, gid_t gid) {
2494
74dd6b51 2495 key_serial_t keyring;
e64c2d0b
DJL
2496 int r = 0;
2497 uid_t saved_uid;
2498 gid_t saved_gid;
74dd6b51
LP
2499
2500 assert(u);
b1edf445 2501 assert(context);
74dd6b51
LP
2502 assert(p);
2503
2504 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2505 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2506 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2507 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2508 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2509 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2510
2511 if (!(p->flags & EXEC_NEW_KEYRING))
2512 return 0;
2513
b1edf445
LP
2514 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2515 return 0;
2516
e64c2d0b
DJL
2517 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2518 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2519 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2520 * & group is just as nasty as acquiring a reference to the user keyring. */
2521
2522 saved_uid = getuid();
2523 saved_gid = getgid();
2524
2525 if (gid_is_valid(gid) && gid != saved_gid) {
2526 if (setregid(gid, -1) < 0)
2527 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2528 }
2529
2530 if (uid_is_valid(uid) && uid != saved_uid) {
2531 if (setreuid(uid, -1) < 0) {
2532 r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2533 goto out;
2534 }
2535 }
2536
74dd6b51
LP
2537 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2538 if (keyring == -1) {
2539 if (errno == ENOSYS)
8002fb97 2540 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2541 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2542 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2543 else if (errno == EDQUOT)
8002fb97 2544 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2545 else
e64c2d0b 2546 r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51 2547
e64c2d0b 2548 goto out;
74dd6b51
LP
2549 }
2550
e64c2d0b
DJL
2551 /* When requested link the user keyring into the session keyring. */
2552 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2553
2554 if (keyctl(KEYCTL_LINK,
2555 KEY_SPEC_USER_KEYRING,
2556 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2557 r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
2558 goto out;
2559 }
2560 }
2561
2562 /* Restore uid/gid back */
2563 if (uid_is_valid(uid) && uid != saved_uid) {
2564 if (setreuid(saved_uid, -1) < 0) {
2565 r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2566 goto out;
2567 }
2568 }
2569
2570 if (gid_is_valid(gid) && gid != saved_gid) {
2571 if (setregid(saved_gid, -1) < 0)
2572 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2573 }
2574
2575 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
b3415f5d
LP
2576 if (!sd_id128_is_null(u->invocation_id)) {
2577 key_serial_t key;
2578
2579 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2580 if (key == -1)
8002fb97 2581 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2582 else {
2583 if (keyctl(KEYCTL_SETPERM, key,
2584 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2585 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
e64c2d0b 2586 r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2587 }
2588 }
2589
e64c2d0b
DJL
2590out:
2591 /* Revert back uid & gid for the the last time, and exit */
2592 /* no extra logging, as only the first already reported error matters */
2593 if (getuid() != saved_uid)
2594 (void) setreuid(saved_uid, -1);
b1edf445 2595
e64c2d0b
DJL
2596 if (getgid() != saved_gid)
2597 (void) setregid(saved_gid, -1);
b1edf445 2598
e64c2d0b 2599 return r;
74dd6b51
LP
2600}
2601
da6053d0 2602static void append_socket_pair(int *array, size_t *n, const int pair[2]) {
29206d46
LP
2603 assert(array);
2604 assert(n);
2605
2606 if (!pair)
2607 return;
2608
2609 if (pair[0] >= 0)
2610 array[(*n)++] = pair[0];
2611 if (pair[1] >= 0)
2612 array[(*n)++] = pair[1];
2613}
2614
a34ceba6
LP
2615static int close_remaining_fds(
2616 const ExecParameters *params,
34cf6c43
YW
2617 const ExecRuntime *runtime,
2618 const DynamicCreds *dcreds,
00d9ef85 2619 int user_lookup_fd,
a34ceba6 2620 int socket_fd,
5686391b 2621 int exec_fd,
da6053d0 2622 int *fds, size_t n_fds) {
a34ceba6 2623
da6053d0 2624 size_t n_dont_close = 0;
00d9ef85 2625 int dont_close[n_fds + 12];
a34ceba6
LP
2626
2627 assert(params);
2628
2629 if (params->stdin_fd >= 0)
2630 dont_close[n_dont_close++] = params->stdin_fd;
2631 if (params->stdout_fd >= 0)
2632 dont_close[n_dont_close++] = params->stdout_fd;
2633 if (params->stderr_fd >= 0)
2634 dont_close[n_dont_close++] = params->stderr_fd;
2635
2636 if (socket_fd >= 0)
2637 dont_close[n_dont_close++] = socket_fd;
5686391b
LP
2638 if (exec_fd >= 0)
2639 dont_close[n_dont_close++] = exec_fd;
a34ceba6
LP
2640 if (n_fds > 0) {
2641 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2642 n_dont_close += n_fds;
2643 }
2644
29206d46
LP
2645 if (runtime)
2646 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2647
2648 if (dcreds) {
2649 if (dcreds->user)
2650 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2651 if (dcreds->group)
2652 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2653 }
2654
00d9ef85
LP
2655 if (user_lookup_fd >= 0)
2656 dont_close[n_dont_close++] = user_lookup_fd;
2657
a34ceba6
LP
2658 return close_all_fds(dont_close, n_dont_close);
2659}
2660
00d9ef85
LP
2661static int send_user_lookup(
2662 Unit *unit,
2663 int user_lookup_fd,
2664 uid_t uid,
2665 gid_t gid) {
2666
2667 assert(unit);
2668
2669 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2670 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2671 * specified. */
2672
2673 if (user_lookup_fd < 0)
2674 return 0;
2675
2676 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2677 return 0;
2678
2679 if (writev(user_lookup_fd,
2680 (struct iovec[]) {
e6a7ec4b
LP
2681 IOVEC_INIT(&uid, sizeof(uid)),
2682 IOVEC_INIT(&gid, sizeof(gid)),
2683 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2684 return -errno;
2685
2686 return 0;
2687}
2688
6732edab
LP
2689static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2690 int r;
2691
2692 assert(c);
2693 assert(home);
2694 assert(buf);
2695
2696 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2697
2698 if (*home)
2699 return 0;
2700
2701 if (!c->working_directory_home)
2702 return 0;
2703
2704 if (uid == 0) {
2705 /* Hardcode /root as home directory for UID 0 */
2706 *home = "/root";
2707 return 1;
2708 }
2709
2710 r = get_home_dir(buf);
2711 if (r < 0)
2712 return r;
2713
2714 *home = *buf;
2715 return 1;
2716}
2717
da50b85a
LP
2718static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2719 _cleanup_strv_free_ char ** list = NULL;
2720 ExecDirectoryType t;
2721 int r;
2722
2723 assert(c);
2724 assert(p);
2725 assert(ret);
2726
2727 assert(c->dynamic_user);
2728
2729 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2730 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2731 * directories. */
2732
2733 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2734 char **i;
2735
2736 if (t == EXEC_DIRECTORY_CONFIGURATION)
2737 continue;
2738
2739 if (!p->prefix[t])
2740 continue;
2741
2742 STRV_FOREACH(i, c->directories[t].paths) {
2743 char *e;
2744
8092a48c
YW
2745 if (t == EXEC_DIRECTORY_RUNTIME)
2746 e = strjoin(p->prefix[t], "/", *i);
2747 else
2748 e = strjoin(p->prefix[t], "/private/", *i);
da50b85a
LP
2749 if (!e)
2750 return -ENOMEM;
2751
2752 r = strv_consume(&list, e);
2753 if (r < 0)
2754 return r;
2755 }
2756 }
2757
ae2a15bc 2758 *ret = TAKE_PTR(list);
da50b85a
LP
2759
2760 return 0;
2761}
2762
34cf6c43
YW
2763static char *exec_command_line(char **argv);
2764
ff0af2a1 2765static int exec_child(
f2341e0a 2766 Unit *unit,
34cf6c43 2767 const ExecCommand *command,
ff0af2a1
LP
2768 const ExecContext *context,
2769 const ExecParameters *params,
2770 ExecRuntime *runtime,
29206d46 2771 DynamicCreds *dcreds,
ff0af2a1 2772 int socket_fd,
52c239d7 2773 int named_iofds[3],
4c47affc 2774 int *fds,
da6053d0 2775 size_t n_socket_fds,
25b583d7 2776 size_t n_storage_fds,
ff0af2a1 2777 char **files_env,
00d9ef85 2778 int user_lookup_fd,
12145637 2779 int *exit_status) {
d35fbf6b 2780
2065ca69 2781 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
5686391b 2782 int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
4d885bd3
DH
2783 _cleanup_free_ gid_t *supplementary_gids = NULL;
2784 const char *username = NULL, *groupname = NULL;
5686391b 2785 _cleanup_free_ char *home_buffer = NULL;
2b3c1b9e 2786 const char *home = NULL, *shell = NULL;
7bce046b
LP
2787 dev_t journal_stream_dev = 0;
2788 ino_t journal_stream_ino = 0;
165a31c0
LP
2789 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2790 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2791 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2792 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 2793#if HAVE_SELINUX
7f59dd35 2794 _cleanup_free_ char *mac_selinux_context_net = NULL;
43b1f709 2795 bool use_selinux = false;
ecfbc84f 2796#endif
f9fa32f0 2797#if ENABLE_SMACK
43b1f709 2798 bool use_smack = false;
ecfbc84f 2799#endif
349cc4a5 2800#if HAVE_APPARMOR
43b1f709 2801 bool use_apparmor = false;
ecfbc84f 2802#endif
fed1e721
LP
2803 uid_t uid = UID_INVALID;
2804 gid_t gid = GID_INVALID;
da6053d0 2805 size_t n_fds;
3536f49e 2806 ExecDirectoryType dt;
165a31c0 2807 int secure_bits;
034c6ed7 2808
f2341e0a 2809 assert(unit);
5cb5a6ff
LP
2810 assert(command);
2811 assert(context);
d35fbf6b 2812 assert(params);
ff0af2a1 2813 assert(exit_status);
d35fbf6b
DM
2814
2815 rename_process_from_path(command->path);
2816
2817 /* We reset exactly these signals, since they are the
2818 * only ones we set to SIG_IGN in the main daemon. All
2819 * others we leave untouched because we set them to
2820 * SIG_DFL or a valid handler initially, both of which
2821 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2822 (void) default_signals(SIGNALS_CRASH_HANDLER,
2823 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2824
2825 if (context->ignore_sigpipe)
ce30c8dc 2826 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2827
ff0af2a1
LP
2828 r = reset_signal_mask();
2829 if (r < 0) {
2830 *exit_status = EXIT_SIGNAL_MASK;
12145637 2831 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 2832 }
034c6ed7 2833
d35fbf6b
DM
2834 if (params->idle_pipe)
2835 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2836
2c027c62
LP
2837 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2838 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2839 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2840 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 2841
d35fbf6b 2842 log_forget_fds();
2c027c62 2843 log_set_open_when_needed(true);
4f2d528d 2844
40a80078
LP
2845 /* In case anything used libc syslog(), close this here, too */
2846 closelog();
2847
5686391b
LP
2848 n_fds = n_socket_fds + n_storage_fds;
2849 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
ff0af2a1
LP
2850 if (r < 0) {
2851 *exit_status = EXIT_FDS;
12145637 2852 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
2853 }
2854
d35fbf6b
DM
2855 if (!context->same_pgrp)
2856 if (setsid() < 0) {
ff0af2a1 2857 *exit_status = EXIT_SETSID;
12145637 2858 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 2859 }
9e2f7c11 2860
1e22b5cd 2861 exec_context_tty_reset(context, params);
d35fbf6b 2862
c891efaf 2863 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 2864 const char *vc = params->confirm_spawn;
3b20f877
FB
2865 _cleanup_free_ char *cmdline = NULL;
2866
ee39ca20 2867 cmdline = exec_command_line(command->argv);
3b20f877 2868 if (!cmdline) {
0460aa5c 2869 *exit_status = EXIT_MEMORY;
12145637 2870 return log_oom();
3b20f877 2871 }
d35fbf6b 2872
eedf223a 2873 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
2874 if (r != CONFIRM_EXECUTE) {
2875 if (r == CONFIRM_PRETEND_SUCCESS) {
2876 *exit_status = EXIT_SUCCESS;
2877 return 0;
2878 }
ff0af2a1 2879 *exit_status = EXIT_CONFIRM;
12145637 2880 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 2881 return -ECANCELED;
d35fbf6b
DM
2882 }
2883 }
1a63a750 2884
d521916d
LP
2885 /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
2886 * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
2887 * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
2888 * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
2889 * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
2890 if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
2891 setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
2892 *exit_status = EXIT_MEMORY;
2893 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
2894 }
2895
29206d46 2896 if (context->dynamic_user && dcreds) {
da50b85a 2897 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 2898
d521916d
LP
2899 /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
2900 * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
409093fe
LP
2901 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2902 *exit_status = EXIT_USER;
12145637 2903 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
2904 }
2905
da50b85a
LP
2906 r = compile_suggested_paths(context, params, &suggested_paths);
2907 if (r < 0) {
2908 *exit_status = EXIT_MEMORY;
2909 return log_oom();
2910 }
2911
2912 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
2913 if (r < 0) {
2914 *exit_status = EXIT_USER;
e2b0cc34
YW
2915 if (r == -EILSEQ) {
2916 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
2917 return -EOPNOTSUPP;
2918 }
12145637 2919 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 2920 }
524daa8c 2921
70dd455c 2922 if (!uid_is_valid(uid)) {
29206d46 2923 *exit_status = EXIT_USER;
12145637 2924 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
2925 return -ESRCH;
2926 }
2927
2928 if (!gid_is_valid(gid)) {
2929 *exit_status = EXIT_USER;
12145637 2930 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
2931 return -ESRCH;
2932 }
5bc7452b 2933
29206d46
LP
2934 if (dcreds->user)
2935 username = dcreds->user->name;
2936
2937 } else {
4d885bd3
DH
2938 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2939 if (r < 0) {
2940 *exit_status = EXIT_USER;
12145637 2941 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 2942 }
5bc7452b 2943
4d885bd3
DH
2944 r = get_fixed_group(context, &groupname, &gid);
2945 if (r < 0) {
2946 *exit_status = EXIT_GROUP;
12145637 2947 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 2948 }
cdc5d5c5 2949 }
29206d46 2950
cdc5d5c5
DH
2951 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2952 r = get_supplementary_groups(context, username, groupname, gid,
2953 &supplementary_gids, &ngids);
2954 if (r < 0) {
2955 *exit_status = EXIT_GROUP;
12145637 2956 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 2957 }
5bc7452b 2958
00d9ef85
LP
2959 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2960 if (r < 0) {
2961 *exit_status = EXIT_USER;
12145637 2962 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
2963 }
2964
2965 user_lookup_fd = safe_close(user_lookup_fd);
2966
6732edab
LP
2967 r = acquire_home(context, uid, &home, &home_buffer);
2968 if (r < 0) {
2969 *exit_status = EXIT_CHDIR;
12145637 2970 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
2971 }
2972
d35fbf6b
DM
2973 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2974 * must sure to drop O_NONBLOCK */
2975 if (socket_fd >= 0)
a34ceba6 2976 (void) fd_nonblock(socket_fd, false);
acbb0225 2977
52c239d7 2978 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
2979 if (r < 0) {
2980 *exit_status = EXIT_STDIN;
12145637 2981 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 2982 }
034c6ed7 2983
52c239d7 2984 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2985 if (r < 0) {
2986 *exit_status = EXIT_STDOUT;
12145637 2987 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
2988 }
2989
52c239d7 2990 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2991 if (r < 0) {
2992 *exit_status = EXIT_STDERR;
12145637 2993 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
2994 }
2995
2996 if (params->cgroup_path) {
ff0af2a1
LP
2997 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2998 if (r < 0) {
2999 *exit_status = EXIT_CGROUP;
12145637 3000 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path);
309bff19 3001 }
d35fbf6b 3002 }
309bff19 3003
d35fbf6b 3004 if (context->oom_score_adjust_set) {
9f8168eb
LP
3005 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
3006 * prohibit write access to this file, and we shouldn't trip up over that. */
3007 r = set_oom_score_adjust(context->oom_score_adjust);
12145637 3008 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 3009 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 3010 else if (r < 0) {
ff0af2a1 3011 *exit_status = EXIT_OOM_ADJUST;
12145637 3012 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 3013 }
d35fbf6b
DM
3014 }
3015
3016 if (context->nice_set)
3017 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 3018 *exit_status = EXIT_NICE;
12145637 3019 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
613b411c
LP
3020 }
3021
d35fbf6b
DM
3022 if (context->cpu_sched_set) {
3023 struct sched_param param = {
3024 .sched_priority = context->cpu_sched_priority,
3025 };
3026
ff0af2a1
LP
3027 r = sched_setscheduler(0,
3028 context->cpu_sched_policy |
3029 (context->cpu_sched_reset_on_fork ?
3030 SCHED_RESET_ON_FORK : 0),
3031 &param);
3032 if (r < 0) {
3033 *exit_status = EXIT_SETSCHEDULER;
12145637 3034 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 3035 }
d35fbf6b 3036 }
fc9b2a84 3037
d35fbf6b
DM
3038 if (context->cpuset)
3039 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
ff0af2a1 3040 *exit_status = EXIT_CPUAFFINITY;
12145637 3041 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7
LP
3042 }
3043
d35fbf6b
DM
3044 if (context->ioprio_set)
3045 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 3046 *exit_status = EXIT_IOPRIO;
12145637 3047 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 3048 }
da726a4d 3049
d35fbf6b
DM
3050 if (context->timer_slack_nsec != NSEC_INFINITY)
3051 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 3052 *exit_status = EXIT_TIMERSLACK;
12145637 3053 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 3054 }
9eba9da4 3055
21022b9d
LP
3056 if (context->personality != PERSONALITY_INVALID) {
3057 r = safe_personality(context->personality);
3058 if (r < 0) {
ff0af2a1 3059 *exit_status = EXIT_PERSONALITY;
12145637 3060 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 3061 }
21022b9d 3062 }
94f04347 3063
d35fbf6b 3064 if (context->utmp_id)
df0ff127 3065 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 3066 context->tty_path,
023a4f67
LP
3067 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3068 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3069 USER_PROCESS,
6a93917d 3070 username);
d35fbf6b 3071
e0d2adfd 3072 if (context->user) {
ff0af2a1
LP
3073 r = chown_terminal(STDIN_FILENO, uid);
3074 if (r < 0) {
3075 *exit_status = EXIT_STDIN;
12145637 3076 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 3077 }
d35fbf6b 3078 }
8e274523 3079
62b9bb26
LP
3080 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroupsv1
3081 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
3082 * safe. On cgroupsv2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
3083 * touch a single hierarchy too. */
584b8688 3084 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
62b9bb26 3085 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
ff0af2a1
LP
3086 if (r < 0) {
3087 *exit_status = EXIT_CGROUP;
12145637 3088 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 3089 }
d35fbf6b 3090 }
034c6ed7 3091
72fd1768 3092 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 3093 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
3094 if (r < 0)
3095 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 3096 }
94f04347 3097
7bce046b 3098 r = build_environment(
fd63e712 3099 unit,
7bce046b
LP
3100 context,
3101 params,
3102 n_fds,
3103 home,
3104 username,
3105 shell,
3106 journal_stream_dev,
3107 journal_stream_ino,
3108 &our_env);
2065ca69
JW
3109 if (r < 0) {
3110 *exit_status = EXIT_MEMORY;
12145637 3111 return log_oom();
2065ca69
JW
3112 }
3113
3114 r = build_pass_environment(context, &pass_env);
3115 if (r < 0) {
3116 *exit_status = EXIT_MEMORY;
12145637 3117 return log_oom();
2065ca69
JW
3118 }
3119
3120 accum_env = strv_env_merge(5,
3121 params->environment,
3122 our_env,
3123 pass_env,
3124 context->environment,
3125 files_env,
3126 NULL);
3127 if (!accum_env) {
3128 *exit_status = EXIT_MEMORY;
12145637 3129 return log_oom();
2065ca69 3130 }
1280503b 3131 accum_env = strv_env_clean(accum_env);
2065ca69 3132
096424d1 3133 (void) umask(context->umask);
b213e1c1 3134
b1edf445 3135 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
3136 if (r < 0) {
3137 *exit_status = EXIT_KEYRING;
12145637 3138 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
3139 }
3140
165a31c0 3141 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 3142 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 3143
165a31c0
LP
3144 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3145 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 3146
165a31c0
LP
3147 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3148 if (needs_ambient_hack)
3149 needs_setuid = false;
3150 else
3151 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3152
3153 if (needs_sandboxing) {
7f18ef0a
FK
3154 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3155 * present. The actual MAC context application will happen later, as late as possible, to avoid
3156 * impacting our own code paths. */
3157
349cc4a5 3158#if HAVE_SELINUX
43b1f709 3159 use_selinux = mac_selinux_use();
7f18ef0a 3160#endif
f9fa32f0 3161#if ENABLE_SMACK
43b1f709 3162 use_smack = mac_smack_use();
7f18ef0a 3163#endif
349cc4a5 3164#if HAVE_APPARMOR
43b1f709 3165 use_apparmor = mac_apparmor_use();
7f18ef0a 3166#endif
165a31c0 3167 }
7f18ef0a 3168
165a31c0
LP
3169 if (needs_setuid) {
3170 if (context->pam_name && username) {
3171 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3172 if (r < 0) {
3173 *exit_status = EXIT_PAM;
12145637 3174 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0
LP
3175 }
3176 }
b213e1c1 3177 }
ac45f971 3178
d35fbf6b 3179 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
6e2d7c4f
MS
3180 if (ns_type_supported(NAMESPACE_NET)) {
3181 r = setup_netns(runtime->netns_storage_socket);
3182 if (r < 0) {
3183 *exit_status = EXIT_NETWORK;
3184 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3185 }
3186 } else
3187 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3188 }
169c1bda 3189
ee818b89 3190 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3191 if (needs_mount_namespace) {
6818c54c 3192 r = apply_mount_namespace(unit, command, context, params, runtime);
3fbe8dbe
LP
3193 if (r < 0) {
3194 *exit_status = EXIT_NAMESPACE;
12145637 3195 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing: %m");
3fbe8dbe 3196 }
d35fbf6b 3197 }
81a2b7ce 3198
50b3dfb9 3199 /* Apply just after mount namespace setup */
376fecf6 3200 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
12145637
LP
3201 if (r < 0)
3202 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
50b3dfb9 3203
bbeea271 3204 /* Drop groups as early as possbile */
165a31c0 3205 if (needs_setuid) {
709dbeac 3206 r = enforce_groups(gid, supplementary_gids, ngids);
096424d1
LP
3207 if (r < 0) {
3208 *exit_status = EXIT_GROUP;
12145637 3209 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3210 }
165a31c0 3211 }
096424d1 3212
165a31c0 3213 if (needs_sandboxing) {
349cc4a5 3214#if HAVE_SELINUX
43b1f709 3215 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
3216 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3217 if (r < 0) {
3218 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3219 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
937ccce9 3220 }
9008e1ac 3221 }
9008e1ac
MS
3222#endif
3223
937ccce9
LP
3224 if (context->private_users) {
3225 r = setup_private_users(uid, gid);
3226 if (r < 0) {
3227 *exit_status = EXIT_USER;
12145637 3228 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
937ccce9 3229 }
d251207d
LP
3230 }
3231 }
3232
165a31c0 3233 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
5686391b
LP
3234 * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
3235 * however if we have it as we want to keep it open until the final execve(). */
3236
3237 if (params->exec_fd >= 0) {
3238 exec_fd = params->exec_fd;
3239
3240 if (exec_fd < 3 + (int) n_fds) {
3241 int moved_fd;
3242
3243 /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
3244 * process we are about to execute. */
3245
3246 moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
3247 if (moved_fd < 0) {
3248 *exit_status = EXIT_FDS;
3249 return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
3250 }
3251
3252 safe_close(exec_fd);
3253 exec_fd = moved_fd;
3254 } else {
3255 /* This fd should be FD_CLOEXEC already, but let's make sure. */
3256 r = fd_cloexec(exec_fd, true);
3257 if (r < 0) {
3258 *exit_status = EXIT_FDS;
3259 return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
3260 }
3261 }
3262
3263 fds_with_exec_fd = newa(int, n_fds + 1);
7e8d494b 3264 memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
5686391b
LP
3265 fds_with_exec_fd[n_fds] = exec_fd;
3266 n_fds_with_exec_fd = n_fds + 1;
3267 } else {
3268 fds_with_exec_fd = fds;
3269 n_fds_with_exec_fd = n_fds;
3270 }
3271
3272 r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
ff0af2a1
LP
3273 if (r >= 0)
3274 r = shift_fds(fds, n_fds);
3275 if (r >= 0)
25b583d7 3276 r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
ff0af2a1
LP
3277 if (r < 0) {
3278 *exit_status = EXIT_FDS;
12145637 3279 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3280 }
e66cf1a3 3281
5686391b
LP
3282 /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
3283 * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
3284 * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
3285 * came this far. */
3286
165a31c0 3287 secure_bits = context->secure_bits;
e66cf1a3 3288
165a31c0
LP
3289 if (needs_sandboxing) {
3290 uint64_t bset;
34a5df58 3291 int which_failed;
755d4b67 3292
34a5df58
LP
3293 r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
3294 if (r < 0) {
3295 *exit_status = EXIT_LIMITS;
3296 return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
e66cf1a3
LP
3297 }
3298
f4170c67
LP
3299 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
3300 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3301 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3302 *exit_status = EXIT_LIMITS;
12145637 3303 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3304 }
3305 }
3306
37ac2744
JB
3307#if ENABLE_SMACK
3308 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3309 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3310 if (use_smack) {
3311 r = setup_smack(context, command);
3312 if (r < 0) {
3313 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3314 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3315 }
3316 }
3317#endif
3318
165a31c0
LP
3319 bset = context->capability_bounding_set;
3320 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3321 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3322 * instead of us doing that */
3323 if (needs_ambient_hack)
3324 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3325 (UINT64_C(1) << CAP_SETUID) |
3326 (UINT64_C(1) << CAP_SETGID);
3327
3328 if (!cap_test_all(bset)) {
3329 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3330 if (r < 0) {
3331 *exit_status = EXIT_CAPABILITIES;
12145637 3332 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3333 }
4c2630eb 3334 }
3b8bddde 3335
755d4b67
IP
3336 /* This is done before enforce_user, but ambient set
3337 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
3338 if (!needs_ambient_hack &&
3339 context->capability_ambient_set != 0) {
755d4b67
IP
3340 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3341 if (r < 0) {
3342 *exit_status = EXIT_CAPABILITIES;
12145637 3343 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3344 }
755d4b67 3345 }
165a31c0 3346 }
755d4b67 3347
165a31c0 3348 if (needs_setuid) {
d35fbf6b 3349 if (context->user) {
ff0af2a1
LP
3350 r = enforce_user(context, uid);
3351 if (r < 0) {
3352 *exit_status = EXIT_USER;
12145637 3353 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3354 }
165a31c0
LP
3355
3356 if (!needs_ambient_hack &&
3357 context->capability_ambient_set != 0) {
755d4b67
IP
3358
3359 /* Fix the ambient capabilities after user change. */
3360 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3361 if (r < 0) {
3362 *exit_status = EXIT_CAPABILITIES;
12145637 3363 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3364 }
3365
3366 /* If we were asked to change user and ambient capabilities
3367 * were requested, we had to add keep-caps to the securebits
3368 * so that we would maintain the inherited capability set
3369 * through the setresuid(). Make sure that the bit is added
3370 * also to the context secure_bits so that we don't try to
3371 * drop the bit away next. */
3372
7f508f2c 3373 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3374 }
5b6319dc 3375 }
165a31c0 3376 }
d35fbf6b 3377
165a31c0 3378 if (needs_sandboxing) {
37ac2744 3379 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
5cd9cd35
LP
3380 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3381 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3382 * are restricted. */
3383
349cc4a5 3384#if HAVE_SELINUX
43b1f709 3385 if (use_selinux) {
5cd9cd35
LP
3386 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3387
3388 if (exec_context) {
3389 r = setexeccon(exec_context);
3390 if (r < 0) {
3391 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3392 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3393 }
3394 }
3395 }
3396#endif
3397
349cc4a5 3398#if HAVE_APPARMOR
43b1f709 3399 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3400 r = aa_change_onexec(context->apparmor_profile);
3401 if (r < 0 && !context->apparmor_profile_ignore) {
3402 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3403 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3404 }
3405 }
3406#endif
3407
165a31c0
LP
3408 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3409 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3410 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3411 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3412 *exit_status = EXIT_SECUREBITS;
12145637 3413 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3414 }
5b6319dc 3415
59eeb84b 3416 if (context_has_no_new_privileges(context))
d35fbf6b 3417 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3418 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3419 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3420 }
3421
349cc4a5 3422#if HAVE_SECCOMP
469830d1
LP
3423 r = apply_address_families(unit, context);
3424 if (r < 0) {
3425 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3426 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3427 }
04aa0cb9 3428
469830d1
LP
3429 r = apply_memory_deny_write_execute(unit, context);
3430 if (r < 0) {
3431 *exit_status = EXIT_SECCOMP;
12145637 3432 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3433 }
f4170c67 3434
469830d1
LP
3435 r = apply_restrict_realtime(unit, context);
3436 if (r < 0) {
3437 *exit_status = EXIT_SECCOMP;
12145637 3438 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3439 }
3440
add00535
LP
3441 r = apply_restrict_namespaces(unit, context);
3442 if (r < 0) {
3443 *exit_status = EXIT_SECCOMP;
12145637 3444 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3445 }
3446
469830d1
LP
3447 r = apply_protect_sysctl(unit, context);
3448 if (r < 0) {
3449 *exit_status = EXIT_SECCOMP;
12145637 3450 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3451 }
3452
469830d1
LP
3453 r = apply_protect_kernel_modules(unit, context);
3454 if (r < 0) {
3455 *exit_status = EXIT_SECCOMP;
12145637 3456 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3457 }
3458
469830d1
LP
3459 r = apply_private_devices(unit, context);
3460 if (r < 0) {
3461 *exit_status = EXIT_SECCOMP;
12145637 3462 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3463 }
3464
3465 r = apply_syscall_archs(unit, context);
3466 if (r < 0) {
3467 *exit_status = EXIT_SECCOMP;
12145637 3468 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3469 }
3470
78e864e5
TM
3471 r = apply_lock_personality(unit, context);
3472 if (r < 0) {
3473 *exit_status = EXIT_SECCOMP;
12145637 3474 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3475 }
3476
5cd9cd35
LP
3477 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3478 * by the filter as little as possible. */
165a31c0 3479 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3480 if (r < 0) {
3481 *exit_status = EXIT_SECCOMP;
12145637 3482 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3483 }
3484#endif
d35fbf6b 3485 }
034c6ed7 3486
00819cc1
LP
3487 if (!strv_isempty(context->unset_environment)) {
3488 char **ee = NULL;
3489
3490 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3491 if (!ee) {
3492 *exit_status = EXIT_MEMORY;
12145637 3493 return log_oom();
00819cc1
LP
3494 }
3495
130d3d22 3496 strv_free_and_replace(accum_env, ee);
00819cc1
LP
3497 }
3498
ee39ca20 3499 final_argv = replace_env_argv(command->argv, accum_env);
d35fbf6b 3500 if (!final_argv) {
ff0af2a1 3501 *exit_status = EXIT_MEMORY;
12145637 3502 return log_oom();
d35fbf6b 3503 }
034c6ed7 3504
f1d34068 3505 if (DEBUG_LOGGING) {
d35fbf6b 3506 _cleanup_free_ char *line;
81a2b7ce 3507
d35fbf6b 3508 line = exec_command_line(final_argv);
a1230ff9 3509 if (line)
f2341e0a 3510 log_struct(LOG_DEBUG,
f2341e0a
LP
3511 "EXECUTABLE=%s", command->path,
3512 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3513 LOG_UNIT_ID(unit),
a1230ff9 3514 LOG_UNIT_INVOCATION_ID(unit));
d35fbf6b 3515 }
dd305ec9 3516
5686391b
LP
3517 if (exec_fd >= 0) {
3518 uint8_t hot = 1;
3519
3520 /* We have finished with all our initializations. Let's now let the manager know that. From this point
3521 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
3522
3523 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3524 *exit_status = EXIT_EXEC;
3525 return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
3526 }
3527 }
3528
2065ca69 3529 execve(command->path, final_argv, accum_env);
5686391b
LP
3530 r = -errno;
3531
3532 if (exec_fd >= 0) {
3533 uint8_t hot = 0;
3534
3535 /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
3536 * that POLLHUP on it no longer means execve() succeeded. */
3537
3538 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3539 *exit_status = EXIT_EXEC;
3540 return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
3541 }
3542 }
12145637 3543
5686391b
LP
3544 if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3545 log_struct_errno(LOG_INFO, r,
12145637
LP
3546 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3547 LOG_UNIT_ID(unit),
3548 LOG_UNIT_INVOCATION_ID(unit),
3549 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3550 command->path),
a1230ff9 3551 "EXECUTABLE=%s", command->path);
12145637
LP
3552 return 0;
3553 }
3554
ff0af2a1 3555 *exit_status = EXIT_EXEC;
5686391b 3556 return log_unit_error_errno(unit, r, "Failed to execute command: %m");
d35fbf6b 3557}
81a2b7ce 3558
34cf6c43
YW
3559static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
3560static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[3]);
3561
f2341e0a
LP
3562int exec_spawn(Unit *unit,
3563 ExecCommand *command,
d35fbf6b
DM
3564 const ExecContext *context,
3565 const ExecParameters *params,
3566 ExecRuntime *runtime,
29206d46 3567 DynamicCreds *dcreds,
d35fbf6b 3568 pid_t *ret) {
8351ceae 3569
ee39ca20 3570 int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
d35fbf6b 3571 _cleanup_strv_free_ char **files_env = NULL;
da6053d0 3572 size_t n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1 3573 _cleanup_free_ char *line = NULL;
d35fbf6b 3574 pid_t pid;
8351ceae 3575
f2341e0a 3576 assert(unit);
d35fbf6b
DM
3577 assert(command);
3578 assert(context);
3579 assert(ret);
3580 assert(params);
25b583d7 3581 assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
4298d0b5 3582
d35fbf6b
DM
3583 if (context->std_input == EXEC_INPUT_SOCKET ||
3584 context->std_output == EXEC_OUTPUT_SOCKET ||
3585 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3586
4c47affc 3587 if (params->n_socket_fds > 1) {
f2341e0a 3588 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3589 return -EINVAL;
ff0af2a1 3590 }
eef65bf3 3591
4c47affc 3592 if (params->n_socket_fds == 0) {
488ab41c
AA
3593 log_unit_error(unit, "Got no socket.");
3594 return -EINVAL;
3595 }
3596
d35fbf6b
DM
3597 socket_fd = params->fds[0];
3598 } else {
3599 socket_fd = -1;
3600 fds = params->fds;
9b141911 3601 n_socket_fds = params->n_socket_fds;
25b583d7 3602 n_storage_fds = params->n_storage_fds;
d35fbf6b 3603 }
94f04347 3604
34cf6c43 3605 r = exec_context_named_iofds(context, params, named_iofds);
52c239d7
LB
3606 if (r < 0)
3607 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3608
f2341e0a 3609 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3610 if (r < 0)
f2341e0a 3611 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3612
ee39ca20 3613 line = exec_command_line(command->argv);
d35fbf6b
DM
3614 if (!line)
3615 return log_oom();
fab56fc5 3616
f2341e0a 3617 log_struct(LOG_DEBUG,
f2341e0a
LP
3618 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3619 "EXECUTABLE=%s", command->path,
ba360bb0 3620 LOG_UNIT_ID(unit),
a1230ff9 3621 LOG_UNIT_INVOCATION_ID(unit));
12145637 3622
d35fbf6b
DM
3623 pid = fork();
3624 if (pid < 0)
74129a12 3625 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3626
3627 if (pid == 0) {
12145637 3628 int exit_status = EXIT_SUCCESS;
ff0af2a1 3629
f2341e0a
LP
3630 r = exec_child(unit,
3631 command,
ff0af2a1
LP
3632 context,
3633 params,
3634 runtime,
29206d46 3635 dcreds,
ff0af2a1 3636 socket_fd,
52c239d7 3637 named_iofds,
4c47affc 3638 fds,
9b141911 3639 n_socket_fds,
25b583d7 3640 n_storage_fds,
ff0af2a1 3641 files_env,
00d9ef85 3642 unit->manager->user_lookup_fds[1],
12145637
LP
3643 &exit_status);
3644
a1230ff9 3645 if (r < 0)
12145637
LP
3646 log_struct_errno(LOG_ERR, r,
3647 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3648 LOG_UNIT_ID(unit),
3649 LOG_UNIT_INVOCATION_ID(unit),
3650 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3651 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3652 command->path),
a1230ff9 3653 "EXECUTABLE=%s", command->path);
4c2630eb 3654
ff0af2a1 3655 _exit(exit_status);
034c6ed7
LP
3656 }
3657
f2341e0a 3658 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3659
80876c20
LP
3660 /* We add the new process to the cgroup both in the child (so
3661 * that we can be sure that no user code is ever executed
3662 * outside of the cgroup) and in the parent (so that we can be
3663 * sure that when we kill the cgroup the process will be
3664 * killed too). */
d35fbf6b 3665 if (params->cgroup_path)
dd305ec9 3666 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
2da3263a 3667
b58b4116 3668 exec_status_start(&command->exec_status, pid);
9fb86720 3669
034c6ed7 3670 *ret = pid;
5cb5a6ff
LP
3671 return 0;
3672}
3673
034c6ed7 3674void exec_context_init(ExecContext *c) {
3536f49e
YW
3675 ExecDirectoryType i;
3676
034c6ed7
LP
3677 assert(c);
3678
4c12626c 3679 c->umask = 0022;
9eba9da4 3680 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3681 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3682 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3683 c->syslog_level_prefix = true;
353e12c2 3684 c->ignore_sigpipe = true;
3a43da28 3685 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3686 c->personality = PERSONALITY_INVALID;
72fd1768 3687 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3688 c->directories[i].mode = 0755;
a103496c 3689 c->capability_bounding_set = CAP_ALL;
aa9d574d
YW
3690 assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
3691 c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
d3070fbd 3692 c->log_level_max = -1;
034c6ed7
LP
3693}
3694
613b411c 3695void exec_context_done(ExecContext *c) {
3536f49e 3696 ExecDirectoryType i;
d3070fbd 3697 size_t l;
5cb5a6ff
LP
3698
3699 assert(c);
3700
6796073e
LP
3701 c->environment = strv_free(c->environment);
3702 c->environment_files = strv_free(c->environment_files);
b4c14404 3703 c->pass_environment = strv_free(c->pass_environment);
00819cc1 3704 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 3705
31ce987c 3706 rlimit_free_all(c->rlimit);
034c6ed7 3707
2038c3f5 3708 for (l = 0; l < 3; l++) {
52c239d7 3709 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
2038c3f5
LP
3710 c->stdio_file[l] = mfree(c->stdio_file[l]);
3711 }
52c239d7 3712
a1e58e8e
LP
3713 c->working_directory = mfree(c->working_directory);
3714 c->root_directory = mfree(c->root_directory);
915e6d16 3715 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3716 c->tty_path = mfree(c->tty_path);
3717 c->syslog_identifier = mfree(c->syslog_identifier);
3718 c->user = mfree(c->user);
3719 c->group = mfree(c->group);
034c6ed7 3720
6796073e 3721 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3722
a1e58e8e 3723 c->pam_name = mfree(c->pam_name);
5b6319dc 3724
2a624c36
AP
3725 c->read_only_paths = strv_free(c->read_only_paths);
3726 c->read_write_paths = strv_free(c->read_write_paths);
3727 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3728
d2d6c096 3729 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
8e06d57c
YW
3730 c->bind_mounts = NULL;
3731 c->n_bind_mounts = 0;
2abd4e38
YW
3732 temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
3733 c->temporary_filesystems = NULL;
3734 c->n_temporary_filesystems = 0;
d2d6c096 3735
da681e1b 3736 c->cpuset = cpu_set_mfree(c->cpuset);
86a3475b 3737
a1e58e8e
LP
3738 c->utmp_id = mfree(c->utmp_id);
3739 c->selinux_context = mfree(c->selinux_context);
3740 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3741 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3742
8cfa775f 3743 c->syscall_filter = hashmap_free(c->syscall_filter);
525d3cc7
LP
3744 c->syscall_archs = set_free(c->syscall_archs);
3745 c->address_families = set_free(c->address_families);
e66cf1a3 3746
72fd1768 3747 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3748 c->directories[i].paths = strv_free(c->directories[i].paths);
d3070fbd
LP
3749
3750 c->log_level_max = -1;
3751
3752 exec_context_free_log_extra_fields(c);
08f3be7a 3753
90fc172e
AZ
3754 c->log_rate_limit_interval_usec = 0;
3755 c->log_rate_limit_burst = 0;
3756
08f3be7a
LP
3757 c->stdin_data = mfree(c->stdin_data);
3758 c->stdin_data_size = 0;
e66cf1a3
LP
3759}
3760
34cf6c43 3761int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
e66cf1a3
LP
3762 char **i;
3763
3764 assert(c);
3765
3766 if (!runtime_prefix)
3767 return 0;
3768
3536f49e 3769 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
3770 _cleanup_free_ char *p;
3771
605405c6 3772 p = strjoin(runtime_prefix, "/", *i);
e66cf1a3
LP
3773 if (!p)
3774 return -ENOMEM;
3775
6c47cd7d 3776 /* We execute this synchronously, since we need to be sure this is gone when we start the service
e66cf1a3 3777 * next. */
c6878637 3778 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
3779 }
3780
3781 return 0;
5cb5a6ff
LP
3782}
3783
34cf6c43 3784static void exec_command_done(ExecCommand *c) {
43d0fcbd
LP
3785 assert(c);
3786
a1e58e8e 3787 c->path = mfree(c->path);
6796073e 3788 c->argv = strv_free(c->argv);
43d0fcbd
LP
3789}
3790
da6053d0
LP
3791void exec_command_done_array(ExecCommand *c, size_t n) {
3792 size_t i;
43d0fcbd
LP
3793
3794 for (i = 0; i < n; i++)
3795 exec_command_done(c+i);
3796}
3797
f1acf85a 3798ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
3799 ExecCommand *i;
3800
3801 while ((i = c)) {
71fda00f 3802 LIST_REMOVE(command, c, i);
43d0fcbd 3803 exec_command_done(i);
5cb5a6ff
LP
3804 free(i);
3805 }
f1acf85a
ZJS
3806
3807 return NULL;
5cb5a6ff
LP
3808}
3809
da6053d0
LP
3810void exec_command_free_array(ExecCommand **c, size_t n) {
3811 size_t i;
034c6ed7 3812
f1acf85a
ZJS
3813 for (i = 0; i < n; i++)
3814 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
3815}
3816
6a1d4d9f
LP
3817void exec_command_reset_status_array(ExecCommand *c, size_t n) {
3818 size_t i;
3819
3820 for (i = 0; i < n; i++)
3821 exec_status_reset(&c[i].exec_status);
3822}
3823
3824void exec_command_reset_status_list_array(ExecCommand **c, size_t n) {
3825 size_t i;
3826
3827 for (i = 0; i < n; i++) {
3828 ExecCommand *z;
3829
3830 LIST_FOREACH(command, z, c[i])
3831 exec_status_reset(&z->exec_status);
3832 }
3833}
3834
039f0e70 3835typedef struct InvalidEnvInfo {
34cf6c43 3836 const Unit *unit;
039f0e70
LP
3837 const char *path;
3838} InvalidEnvInfo;
3839
3840static void invalid_env(const char *p, void *userdata) {
3841 InvalidEnvInfo *info = userdata;
3842
f2341e0a 3843 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
3844}
3845
52c239d7
LB
3846const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3847 assert(c);
3848
3849 switch (fd_index) {
5073ff6b 3850
52c239d7
LB
3851 case STDIN_FILENO:
3852 if (c->std_input != EXEC_INPUT_NAMED_FD)
3853 return NULL;
5073ff6b 3854
52c239d7 3855 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
5073ff6b 3856
52c239d7
LB
3857 case STDOUT_FILENO:
3858 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3859 return NULL;
5073ff6b 3860
52c239d7 3861 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
5073ff6b 3862
52c239d7
LB
3863 case STDERR_FILENO:
3864 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3865 return NULL;
5073ff6b 3866
52c239d7 3867 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
5073ff6b 3868
52c239d7
LB
3869 default:
3870 return NULL;
3871 }
3872}
3873
34cf6c43 3874static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
da6053d0 3875 size_t i, targets;
56fbd561 3876 const char* stdio_fdname[3];
da6053d0 3877 size_t n_fds;
52c239d7
LB
3878
3879 assert(c);
3880 assert(p);
3881
3882 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3883 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3884 (c->std_error == EXEC_OUTPUT_NAMED_FD);
3885
3886 for (i = 0; i < 3; i++)
3887 stdio_fdname[i] = exec_context_fdname(c, i);
3888
4c47affc
FB
3889 n_fds = p->n_storage_fds + p->n_socket_fds;
3890
3891 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
3892 if (named_iofds[STDIN_FILENO] < 0 &&
3893 c->std_input == EXEC_INPUT_NAMED_FD &&
3894 stdio_fdname[STDIN_FILENO] &&
3895 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3896
52c239d7
LB
3897 named_iofds[STDIN_FILENO] = p->fds[i];
3898 targets--;
56fbd561
ZJS
3899
3900 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3901 c->std_output == EXEC_OUTPUT_NAMED_FD &&
3902 stdio_fdname[STDOUT_FILENO] &&
3903 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3904
52c239d7
LB
3905 named_iofds[STDOUT_FILENO] = p->fds[i];
3906 targets--;
56fbd561
ZJS
3907
3908 } else if (named_iofds[STDERR_FILENO] < 0 &&
3909 c->std_error == EXEC_OUTPUT_NAMED_FD &&
3910 stdio_fdname[STDERR_FILENO] &&
3911 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3912
52c239d7
LB
3913 named_iofds[STDERR_FILENO] = p->fds[i];
3914 targets--;
3915 }
3916
56fbd561 3917 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
3918}
3919
34cf6c43 3920static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
3921 char **i, **r = NULL;
3922
3923 assert(c);
3924 assert(l);
3925
3926 STRV_FOREACH(i, c->environment_files) {
3927 char *fn;
52511fae
ZJS
3928 int k;
3929 unsigned n;
8c7be95e
LP
3930 bool ignore = false;
3931 char **p;
7fd1b19b 3932 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
3933
3934 fn = *i;
3935
3936 if (fn[0] == '-') {
3937 ignore = true;
313cefa1 3938 fn++;
8c7be95e
LP
3939 }
3940
3941 if (!path_is_absolute(fn)) {
8c7be95e
LP
3942 if (ignore)
3943 continue;
3944
3945 strv_free(r);
3946 return -EINVAL;
3947 }
3948
2bef10ab 3949 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
3950 k = safe_glob(fn, 0, &pglob);
3951 if (k < 0) {
2bef10ab
PL
3952 if (ignore)
3953 continue;
8c7be95e 3954
2bef10ab 3955 strv_free(r);
d8c92e8b 3956 return k;
2bef10ab 3957 }
8c7be95e 3958
d8c92e8b
ZJS
3959 /* When we don't match anything, -ENOENT should be returned */
3960 assert(pglob.gl_pathc > 0);
3961
3962 for (n = 0; n < pglob.gl_pathc; n++) {
717603e3 3963 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2bef10ab
PL
3964 if (k < 0) {
3965 if (ignore)
3966 continue;
8c7be95e 3967
2bef10ab 3968 strv_free(r);
2bef10ab 3969 return k;
e9c1ea9d 3970 }
ebc05a09 3971 /* Log invalid environment variables with filename */
039f0e70
LP
3972 if (p) {
3973 InvalidEnvInfo info = {
f2341e0a 3974 .unit = unit,
039f0e70
LP
3975 .path = pglob.gl_pathv[n]
3976 };
3977
3978 p = strv_env_clean_with_callback(p, invalid_env, &info);
3979 }
8c7be95e 3980
234519ae 3981 if (!r)
2bef10ab
PL
3982 r = p;
3983 else {
3984 char **m;
8c7be95e 3985
2bef10ab
PL
3986 m = strv_env_merge(2, r, p);
3987 strv_free(r);
3988 strv_free(p);
c84a9488 3989 if (!m)
2bef10ab 3990 return -ENOMEM;
2bef10ab
PL
3991
3992 r = m;
3993 }
8c7be95e
LP
3994 }
3995 }
3996
3997 *l = r;
3998
3999 return 0;
4000}
4001
6ac8fdc9 4002static bool tty_may_match_dev_console(const char *tty) {
7b912648 4003 _cleanup_free_ char *resolved = NULL;
6ac8fdc9 4004
1e22b5cd
LP
4005 if (!tty)
4006 return true;
4007
a119ec7c 4008 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
4009
4010 /* trivial identity? */
4011 if (streq(tty, "console"))
4012 return true;
4013
7b912648
LP
4014 if (resolve_dev_console(&resolved) < 0)
4015 return true; /* if we could not resolve, assume it may */
6ac8fdc9
MS
4016
4017 /* "tty0" means the active VC, so it may be the same sometimes */
7b912648 4018 return streq(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
4019}
4020
34cf6c43 4021bool exec_context_may_touch_console(const ExecContext *ec) {
1e22b5cd
LP
4022
4023 return (ec->tty_reset ||
4024 ec->tty_vhangup ||
4025 ec->tty_vt_disallocate ||
6ac8fdc9
MS
4026 is_terminal_input(ec->std_input) ||
4027 is_terminal_output(ec->std_output) ||
4028 is_terminal_output(ec->std_error)) &&
1e22b5cd 4029 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
4030}
4031
15ae422b
LP
4032static void strv_fprintf(FILE *f, char **l) {
4033 char **g;
4034
4035 assert(f);
4036
4037 STRV_FOREACH(g, l)
4038 fprintf(f, " %s", *g);
4039}
4040
34cf6c43 4041void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
d3070fbd 4042 ExecDirectoryType dt;
c2bbd90b 4043 char **e, **d;
94f04347 4044 unsigned i;
add00535 4045 int r;
9eba9da4 4046
5cb5a6ff
LP
4047 assert(c);
4048 assert(f);
4049
4ad49000 4050 prefix = strempty(prefix);
5cb5a6ff
LP
4051
4052 fprintf(f,
94f04347
LP
4053 "%sUMask: %04o\n"
4054 "%sWorkingDirectory: %s\n"
451a074f 4055 "%sRootDirectory: %s\n"
15ae422b 4056 "%sNonBlocking: %s\n"
64747e2d 4057 "%sPrivateTmp: %s\n"
7f112f50 4058 "%sPrivateDevices: %s\n"
59eeb84b 4059 "%sProtectKernelTunables: %s\n"
e66a2f65 4060 "%sProtectKernelModules: %s\n"
59eeb84b 4061 "%sProtectControlGroups: %s\n"
d251207d
LP
4062 "%sPrivateNetwork: %s\n"
4063 "%sPrivateUsers: %s\n"
1b8689f9
LP
4064 "%sProtectHome: %s\n"
4065 "%sProtectSystem: %s\n"
5d997827 4066 "%sMountAPIVFS: %s\n"
f3e43635 4067 "%sIgnoreSIGPIPE: %s\n"
f4170c67 4068 "%sMemoryDenyWriteExecute: %s\n"
b1edf445
LP
4069 "%sRestrictRealtime: %s\n"
4070 "%sKeyringMode: %s\n",
5cb5a6ff 4071 prefix, c->umask,
9eba9da4 4072 prefix, c->working_directory ? c->working_directory : "/",
451a074f 4073 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 4074 prefix, yes_no(c->non_blocking),
64747e2d 4075 prefix, yes_no(c->private_tmp),
7f112f50 4076 prefix, yes_no(c->private_devices),
59eeb84b 4077 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 4078 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 4079 prefix, yes_no(c->protect_control_groups),
d251207d
LP
4080 prefix, yes_no(c->private_network),
4081 prefix, yes_no(c->private_users),
1b8689f9
LP
4082 prefix, protect_home_to_string(c->protect_home),
4083 prefix, protect_system_to_string(c->protect_system),
5d997827 4084 prefix, yes_no(c->mount_apivfs),
f3e43635 4085 prefix, yes_no(c->ignore_sigpipe),
f4170c67 4086 prefix, yes_no(c->memory_deny_write_execute),
b1edf445
LP
4087 prefix, yes_no(c->restrict_realtime),
4088 prefix, exec_keyring_mode_to_string(c->keyring_mode));
fb33a393 4089
915e6d16
LP
4090 if (c->root_image)
4091 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
4092
8c7be95e
LP
4093 STRV_FOREACH(e, c->environment)
4094 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
4095
4096 STRV_FOREACH(e, c->environment_files)
4097 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 4098
b4c14404
FB
4099 STRV_FOREACH(e, c->pass_environment)
4100 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
4101
00819cc1
LP
4102 STRV_FOREACH(e, c->unset_environment)
4103 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
4104
53f47dfc
YW
4105 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
4106
72fd1768 4107 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
4108 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
4109
4110 STRV_FOREACH(d, c->directories[dt].paths)
4111 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
4112 }
c2bbd90b 4113
fb33a393
LP
4114 if (c->nice_set)
4115 fprintf(f,
4116 "%sNice: %i\n",
4117 prefix, c->nice);
4118
dd6c17b1 4119 if (c->oom_score_adjust_set)
fb33a393 4120 fprintf(f,
dd6c17b1
LP
4121 "%sOOMScoreAdjust: %i\n",
4122 prefix, c->oom_score_adjust);
9eba9da4 4123
94f04347 4124 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d 4125 if (c->rlimit[i]) {
4c3a2b84 4126 fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
3c11da9d 4127 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
4c3a2b84 4128 fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n",
3c11da9d
EV
4129 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
4130 }
94f04347 4131
f8b69d1d 4132 if (c->ioprio_set) {
1756a011 4133 _cleanup_free_ char *class_str = NULL;
f8b69d1d 4134
837df140
YW
4135 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
4136 if (r >= 0)
4137 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
4138
4139 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 4140 }
94f04347 4141
f8b69d1d 4142 if (c->cpu_sched_set) {
1756a011 4143 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 4144
837df140
YW
4145 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4146 if (r >= 0)
4147 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4148
94f04347 4149 fprintf(f,
38b48754
LP
4150 "%sCPUSchedulingPriority: %i\n"
4151 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
4152 prefix, c->cpu_sched_priority,
4153 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 4154 }
94f04347 4155
82c121a4 4156 if (c->cpuset) {
94f04347 4157 fprintf(f, "%sCPUAffinity:", prefix);
82c121a4
LP
4158 for (i = 0; i < c->cpuset_ncpus; i++)
4159 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
43a99a7a 4160 fprintf(f, " %u", i);
94f04347
LP
4161 fputs("\n", f);
4162 }
4163
3a43da28 4164 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 4165 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
4166
4167 fprintf(f,
80876c20
LP
4168 "%sStandardInput: %s\n"
4169 "%sStandardOutput: %s\n"
4170 "%sStandardError: %s\n",
4171 prefix, exec_input_to_string(c->std_input),
4172 prefix, exec_output_to_string(c->std_output),
4173 prefix, exec_output_to_string(c->std_error));
4174
befc4a80
LP
4175 if (c->std_input == EXEC_INPUT_NAMED_FD)
4176 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4177 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4178 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4179 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4180 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4181
4182 if (c->std_input == EXEC_INPUT_FILE)
4183 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4184 if (c->std_output == EXEC_OUTPUT_FILE)
4185 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
566b7d23
ZD
4186 if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
4187 fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
befc4a80
LP
4188 if (c->std_error == EXEC_OUTPUT_FILE)
4189 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
566b7d23
ZD
4190 if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
4191 fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
befc4a80 4192
80876c20
LP
4193 if (c->tty_path)
4194 fprintf(f,
6ea832a2
LP
4195 "%sTTYPath: %s\n"
4196 "%sTTYReset: %s\n"
4197 "%sTTYVHangup: %s\n"
4198 "%sTTYVTDisallocate: %s\n",
4199 prefix, c->tty_path,
4200 prefix, yes_no(c->tty_reset),
4201 prefix, yes_no(c->tty_vhangup),
4202 prefix, yes_no(c->tty_vt_disallocate));
94f04347 4203
9f6444eb
LP
4204 if (IN_SET(c->std_output,
4205 EXEC_OUTPUT_SYSLOG,
4206 EXEC_OUTPUT_KMSG,
4207 EXEC_OUTPUT_JOURNAL,
4208 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4209 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4210 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4211 IN_SET(c->std_error,
4212 EXEC_OUTPUT_SYSLOG,
4213 EXEC_OUTPUT_KMSG,
4214 EXEC_OUTPUT_JOURNAL,
4215 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4216 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4217 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 4218
5ce70e5b 4219 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 4220
837df140
YW
4221 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4222 if (r >= 0)
4223 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 4224
837df140
YW
4225 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4226 if (r >= 0)
4227 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 4228 }
94f04347 4229
d3070fbd
LP
4230 if (c->log_level_max >= 0) {
4231 _cleanup_free_ char *t = NULL;
4232
4233 (void) log_level_to_string_alloc(c->log_level_max, &t);
4234
4235 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4236 }
4237
90fc172e
AZ
4238 if (c->log_rate_limit_interval_usec > 0) {
4239 char buf_timespan[FORMAT_TIMESPAN_MAX];
4240
4241 fprintf(f,
4242 "%sLogRateLimitIntervalSec: %s\n",
4243 prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_rate_limit_interval_usec, USEC_PER_SEC));
4244 }
4245
4246 if (c->log_rate_limit_burst > 0)
4247 fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_rate_limit_burst);
4248
d3070fbd
LP
4249 if (c->n_log_extra_fields > 0) {
4250 size_t j;
4251
4252 for (j = 0; j < c->n_log_extra_fields; j++) {
4253 fprintf(f, "%sLogExtraFields: ", prefix);
4254 fwrite(c->log_extra_fields[j].iov_base,
4255 1, c->log_extra_fields[j].iov_len,
4256 f);
4257 fputc('\n', f);
4258 }
4259 }
4260
07d46372
YW
4261 if (c->secure_bits) {
4262 _cleanup_free_ char *str = NULL;
4263
4264 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4265 if (r >= 0)
4266 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4267 }
94f04347 4268
a103496c 4269 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4270 _cleanup_free_ char *str = NULL;
94f04347 4271
dd1f5bd0
YW
4272 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4273 if (r >= 0)
4274 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4275 }
4276
4277 if (c->capability_ambient_set != 0) {
dd1f5bd0 4278 _cleanup_free_ char *str = NULL;
755d4b67 4279
dd1f5bd0
YW
4280 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4281 if (r >= 0)
4282 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4283 }
4284
4285 if (c->user)
f2d3769a 4286 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4287 if (c->group)
f2d3769a 4288 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4289
29206d46
LP
4290 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4291
ac6e8be6 4292 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4293 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4294 strv_fprintf(f, c->supplementary_groups);
4295 fputs("\n", f);
4296 }
94f04347 4297
5b6319dc 4298 if (c->pam_name)
f2d3769a 4299 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4300
58629001 4301 if (!strv_isempty(c->read_write_paths)) {
2a624c36
AP
4302 fprintf(f, "%sReadWritePaths:", prefix);
4303 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4304 fputs("\n", f);
4305 }
4306
58629001 4307 if (!strv_isempty(c->read_only_paths)) {
2a624c36
AP
4308 fprintf(f, "%sReadOnlyPaths:", prefix);
4309 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4310 fputs("\n", f);
4311 }
94f04347 4312
58629001 4313 if (!strv_isempty(c->inaccessible_paths)) {
2a624c36
AP
4314 fprintf(f, "%sInaccessiblePaths:", prefix);
4315 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4316 fputs("\n", f);
4317 }
2e22afe9 4318
d2d6c096 4319 if (c->n_bind_mounts > 0)
4ca763a9
YW
4320 for (i = 0; i < c->n_bind_mounts; i++)
4321 fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
d2d6c096 4322 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4ca763a9 4323 c->bind_mounts[i].ignore_enoent ? "-": "",
d2d6c096
LP
4324 c->bind_mounts[i].source,
4325 c->bind_mounts[i].destination,
4326 c->bind_mounts[i].recursive ? "rbind" : "norbind");
d2d6c096 4327
2abd4e38
YW
4328 if (c->n_temporary_filesystems > 0)
4329 for (i = 0; i < c->n_temporary_filesystems; i++) {
4330 TemporaryFileSystem *t = c->temporary_filesystems + i;
4331
4332 fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
4333 t->path,
4334 isempty(t->options) ? "" : ":",
4335 strempty(t->options));
4336 }
4337
169c1bda
LP
4338 if (c->utmp_id)
4339 fprintf(f,
4340 "%sUtmpIdentifier: %s\n",
4341 prefix, c->utmp_id);
7b52a628
MS
4342
4343 if (c->selinux_context)
4344 fprintf(f,
5f8640fb
LP
4345 "%sSELinuxContext: %s%s\n",
4346 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4347
80c21aea
WC
4348 if (c->apparmor_profile)
4349 fprintf(f,
4350 "%sAppArmorProfile: %s%s\n",
4351 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4352
4353 if (c->smack_process_label)
4354 fprintf(f,
4355 "%sSmackProcessLabel: %s%s\n",
4356 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4357
050f7277 4358 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4359 fprintf(f,
4360 "%sPersonality: %s\n",
4361 prefix, strna(personality_to_string(c->personality)));
4362
78e864e5
TM
4363 fprintf(f,
4364 "%sLockPersonality: %s\n",
4365 prefix, yes_no(c->lock_personality));
4366
17df7223 4367 if (c->syscall_filter) {
349cc4a5 4368#if HAVE_SECCOMP
17df7223 4369 Iterator j;
8cfa775f 4370 void *id, *val;
17df7223 4371 bool first = true;
351a19b1 4372#endif
17df7223
LP
4373
4374 fprintf(f,
57183d11 4375 "%sSystemCallFilter: ",
17df7223
LP
4376 prefix);
4377
4378 if (!c->syscall_whitelist)
4379 fputc('~', f);
4380
349cc4a5 4381#if HAVE_SECCOMP
8cfa775f 4382 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
17df7223 4383 _cleanup_free_ char *name = NULL;
8cfa775f
YW
4384 const char *errno_name = NULL;
4385 int num = PTR_TO_INT(val);
17df7223
LP
4386
4387 if (first)
4388 first = false;
4389 else
4390 fputc(' ', f);
4391
57183d11 4392 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223 4393 fputs(strna(name), f);
8cfa775f
YW
4394
4395 if (num >= 0) {
4396 errno_name = errno_to_name(num);
4397 if (errno_name)
4398 fprintf(f, ":%s", errno_name);
4399 else
4400 fprintf(f, ":%d", num);
4401 }
17df7223 4402 }
351a19b1 4403#endif
17df7223
LP
4404
4405 fputc('\n', f);
4406 }
4407
57183d11 4408 if (c->syscall_archs) {
349cc4a5 4409#if HAVE_SECCOMP
57183d11
LP
4410 Iterator j;
4411 void *id;
4412#endif
4413
4414 fprintf(f,
4415 "%sSystemCallArchitectures:",
4416 prefix);
4417
349cc4a5 4418#if HAVE_SECCOMP
57183d11
LP
4419 SET_FOREACH(id, c->syscall_archs, j)
4420 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4421#endif
4422 fputc('\n', f);
4423 }
4424
add00535
LP
4425 if (exec_context_restrict_namespaces_set(c)) {
4426 _cleanup_free_ char *s = NULL;
4427
86c2a9f1 4428 r = namespace_flags_to_string(c->restrict_namespaces, &s);
add00535
LP
4429 if (r >= 0)
4430 fprintf(f, "%sRestrictNamespaces: %s\n",
4431 prefix, s);
4432 }
4433
3df90f24
YW
4434 if (c->syscall_errno > 0) {
4435 const char *errno_name;
4436
4437 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4438
4439 errno_name = errno_to_name(c->syscall_errno);
4440 if (errno_name)
4441 fprintf(f, "%s\n", errno_name);
4442 else
4443 fprintf(f, "%d\n", c->syscall_errno);
4444 }
eef65bf3
MS
4445
4446 if (c->apparmor_profile)
4447 fprintf(f,
4448 "%sAppArmorProfile: %s%s\n",
4449 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
5cb5a6ff
LP
4450}
4451
34cf6c43 4452bool exec_context_maintains_privileges(const ExecContext *c) {
a931ad47
LP
4453 assert(c);
4454
61233823 4455 /* Returns true if the process forked off would run under
a931ad47
LP
4456 * an unchanged UID or as root. */
4457
4458 if (!c->user)
4459 return true;
4460
4461 if (streq(c->user, "root") || streq(c->user, "0"))
4462 return true;
4463
4464 return false;
4465}
4466
34cf6c43 4467int exec_context_get_effective_ioprio(const ExecContext *c) {
7f452159
LP
4468 int p;
4469
4470 assert(c);
4471
4472 if (c->ioprio_set)
4473 return c->ioprio;
4474
4475 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4476 if (p < 0)
4477 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4478
4479 return p;
4480}
4481
d3070fbd
LP
4482void exec_context_free_log_extra_fields(ExecContext *c) {
4483 size_t l;
4484
4485 assert(c);
4486
4487 for (l = 0; l < c->n_log_extra_fields; l++)
4488 free(c->log_extra_fields[l].iov_base);
4489 c->log_extra_fields = mfree(c->log_extra_fields);
4490 c->n_log_extra_fields = 0;
4491}
4492
b58b4116 4493void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 4494 assert(s);
5cb5a6ff 4495
2ed26ed0
LP
4496 *s = (ExecStatus) {
4497 .pid = pid,
4498 };
4499
b58b4116
LP
4500 dual_timestamp_get(&s->start_timestamp);
4501}
4502
34cf6c43 4503void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
4504 assert(s);
4505
2ed26ed0
LP
4506 if (s->pid != pid) {
4507 *s = (ExecStatus) {
4508 .pid = pid,
4509 };
4510 }
b58b4116 4511
63983207 4512 dual_timestamp_get(&s->exit_timestamp);
9fb86720 4513
034c6ed7
LP
4514 s->code = code;
4515 s->status = status;
169c1bda 4516
6ea832a2
LP
4517 if (context) {
4518 if (context->utmp_id)
2ed26ed0 4519 (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
6ea832a2 4520
1e22b5cd 4521 exec_context_tty_reset(context, NULL);
6ea832a2 4522 }
9fb86720
LP
4523}
4524
6a1d4d9f
LP
4525void exec_status_reset(ExecStatus *s) {
4526 assert(s);
4527
4528 *s = (ExecStatus) {};
4529}
4530
34cf6c43 4531void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
9fb86720
LP
4532 char buf[FORMAT_TIMESTAMP_MAX];
4533
4534 assert(s);
4535 assert(f);
4536
9fb86720
LP
4537 if (s->pid <= 0)
4538 return;
4539
4c940960
LP
4540 prefix = strempty(prefix);
4541
9fb86720 4542 fprintf(f,
ccd06097
ZJS
4543 "%sPID: "PID_FMT"\n",
4544 prefix, s->pid);
9fb86720 4545
af9d16e1 4546 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
4547 fprintf(f,
4548 "%sStart Timestamp: %s\n",
63983207 4549 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 4550
af9d16e1 4551 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
4552 fprintf(f,
4553 "%sExit Timestamp: %s\n"
4554 "%sExit Code: %s\n"
4555 "%sExit Status: %i\n",
63983207 4556 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
4557 prefix, sigchld_code_to_string(s->code),
4558 prefix, s->status);
5cb5a6ff 4559}
44d8db9e 4560
34cf6c43 4561static char *exec_command_line(char **argv) {
44d8db9e
LP
4562 size_t k;
4563 char *n, *p, **a;
4564 bool first = true;
4565
9e2f7c11 4566 assert(argv);
44d8db9e 4567
9164977d 4568 k = 1;
9e2f7c11 4569 STRV_FOREACH(a, argv)
44d8db9e
LP
4570 k += strlen(*a)+3;
4571
5cd9cd35
LP
4572 n = new(char, k);
4573 if (!n)
44d8db9e
LP
4574 return NULL;
4575
4576 p = n;
9e2f7c11 4577 STRV_FOREACH(a, argv) {
44d8db9e
LP
4578
4579 if (!first)
4580 *(p++) = ' ';
4581 else
4582 first = false;
4583
4584 if (strpbrk(*a, WHITESPACE)) {
4585 *(p++) = '\'';
4586 p = stpcpy(p, *a);
4587 *(p++) = '\'';
4588 } else
4589 p = stpcpy(p, *a);
4590
4591 }
4592
9164977d
LP
4593 *p = 0;
4594
44d8db9e
LP
4595 /* FIXME: this doesn't really handle arguments that have
4596 * spaces and ticks in them */
4597
4598 return n;
4599}
4600
34cf6c43 4601static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 4602 _cleanup_free_ char *cmd = NULL;
4c940960 4603 const char *prefix2;
44d8db9e
LP
4604
4605 assert(c);
4606 assert(f);
4607
4c940960 4608 prefix = strempty(prefix);
63c372cb 4609 prefix2 = strjoina(prefix, "\t");
44d8db9e 4610
9e2f7c11 4611 cmd = exec_command_line(c->argv);
44d8db9e
LP
4612 fprintf(f,
4613 "%sCommand Line: %s\n",
4614 prefix, cmd ? cmd : strerror(ENOMEM));
4615
9fb86720 4616 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
4617}
4618
4619void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4620 assert(f);
4621
4c940960 4622 prefix = strempty(prefix);
44d8db9e
LP
4623
4624 LIST_FOREACH(command, c, c)
4625 exec_command_dump(c, f, prefix);
4626}
94f04347 4627
a6a80b4f
LP
4628void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4629 ExecCommand *end;
4630
4631 assert(l);
4632 assert(e);
4633
4634 if (*l) {
35b8ca3a 4635 /* It's kind of important, that we keep the order here */
71fda00f
LP
4636 LIST_FIND_TAIL(command, *l, end);
4637 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
4638 } else
4639 *l = e;
4640}
4641
26fd040d
LP
4642int exec_command_set(ExecCommand *c, const char *path, ...) {
4643 va_list ap;
4644 char **l, *p;
4645
4646 assert(c);
4647 assert(path);
4648
4649 va_start(ap, path);
4650 l = strv_new_ap(path, ap);
4651 va_end(ap);
4652
4653 if (!l)
4654 return -ENOMEM;
4655
250a918d
LP
4656 p = strdup(path);
4657 if (!p) {
26fd040d
LP
4658 strv_free(l);
4659 return -ENOMEM;
4660 }
4661
6897dfe8 4662 free_and_replace(c->path, p);
26fd040d 4663
130d3d22 4664 return strv_free_and_replace(c->argv, l);
26fd040d
LP
4665}
4666
86b23b07 4667int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 4668 _cleanup_strv_free_ char **l = NULL;
86b23b07 4669 va_list ap;
86b23b07
JS
4670 int r;
4671
4672 assert(c);
4673 assert(path);
4674
4675 va_start(ap, path);
4676 l = strv_new_ap(path, ap);
4677 va_end(ap);
4678
4679 if (!l)
4680 return -ENOMEM;
4681
e287086b 4682 r = strv_extend_strv(&c->argv, l, false);
e63ff941 4683 if (r < 0)
86b23b07 4684 return r;
86b23b07
JS
4685
4686 return 0;
4687}
4688
e8a565cb
YW
4689static void *remove_tmpdir_thread(void *p) {
4690 _cleanup_free_ char *path = p;
86b23b07 4691
e8a565cb
YW
4692 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
4693 return NULL;
4694}
4695
4696static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
4697 int r;
4698
4699 if (!rt)
4700 return NULL;
4701
4702 if (rt->manager)
4703 (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
4704
4705 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
4706 if (destroy && rt->tmp_dir) {
4707 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
4708
4709 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4710 if (r < 0) {
4711 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
4712 free(rt->tmp_dir);
4713 }
4714
4715 rt->tmp_dir = NULL;
4716 }
613b411c 4717
e8a565cb
YW
4718 if (destroy && rt->var_tmp_dir) {
4719 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
4720
4721 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4722 if (r < 0) {
4723 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
4724 free(rt->var_tmp_dir);
4725 }
4726
4727 rt->var_tmp_dir = NULL;
4728 }
4729
4730 rt->id = mfree(rt->id);
4731 rt->tmp_dir = mfree(rt->tmp_dir);
4732 rt->var_tmp_dir = mfree(rt->var_tmp_dir);
4733 safe_close_pair(rt->netns_storage_socket);
4734 return mfree(rt);
4735}
4736
4737static void exec_runtime_freep(ExecRuntime **rt) {
613b411c 4738 if (*rt)
e8a565cb
YW
4739 (void) exec_runtime_free(*rt, false);
4740}
4741
4742static int exec_runtime_allocate(ExecRuntime **rt) {
4743 assert(rt);
613b411c
LP
4744
4745 *rt = new0(ExecRuntime, 1);
f146f5e1 4746 if (!*rt)
613b411c
LP
4747 return -ENOMEM;
4748
613b411c 4749 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
613b411c
LP
4750 return 0;
4751}
4752
e8a565cb
YW
4753static int exec_runtime_add(
4754 Manager *m,
4755 const char *id,
4756 const char *tmp_dir,
4757 const char *var_tmp_dir,
4758 const int netns_storage_socket[2],
4759 ExecRuntime **ret) {
4760
4761 _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
613b411c
LP
4762 int r;
4763
e8a565cb 4764 assert(m);
613b411c
LP
4765 assert(id);
4766
e8a565cb
YW
4767 r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
4768 if (r < 0)
4769 return r;
613b411c 4770
e8a565cb 4771 r = exec_runtime_allocate(&rt);
613b411c
LP
4772 if (r < 0)
4773 return r;
4774
e8a565cb
YW
4775 rt->id = strdup(id);
4776 if (!rt->id)
4777 return -ENOMEM;
4778
4779 if (tmp_dir) {
4780 rt->tmp_dir = strdup(tmp_dir);
4781 if (!rt->tmp_dir)
4782 return -ENOMEM;
4783
4784 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
4785 assert(var_tmp_dir);
4786 rt->var_tmp_dir = strdup(var_tmp_dir);
4787 if (!rt->var_tmp_dir)
4788 return -ENOMEM;
4789 }
4790
4791 if (netns_storage_socket) {
4792 rt->netns_storage_socket[0] = netns_storage_socket[0];
4793 rt->netns_storage_socket[1] = netns_storage_socket[1];
613b411c
LP
4794 }
4795
e8a565cb
YW
4796 r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
4797 if (r < 0)
4798 return r;
4799
4800 rt->manager = m;
4801
4802 if (ret)
4803 *ret = rt;
4804
4805 /* do not remove created ExecRuntime object when the operation succeeds. */
4806 rt = NULL;
4807 return 0;
4808}
4809
4810static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
4811 _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
4812 _cleanup_close_pair_ int netns_storage_socket[2] = {-1, -1};
4813 int r;
4814
4815 assert(m);
4816 assert(c);
4817 assert(id);
4818
4819 /* It is not necessary to create ExecRuntime object. */
4820 if (!c->private_network && !c->private_tmp)
4821 return 0;
4822
4823 if (c->private_tmp) {
4824 r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
613b411c
LP
4825 if (r < 0)
4826 return r;
4827 }
4828
e8a565cb
YW
4829 if (c->private_network) {
4830 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
4831 return -errno;
4832 }
4833
4834 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
4835 if (r < 0)
4836 return r;
4837
4838 /* Avoid cleanup */
4839 netns_storage_socket[0] = -1;
4840 netns_storage_socket[1] = -1;
613b411c
LP
4841 return 1;
4842}
4843
e8a565cb
YW
4844int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
4845 ExecRuntime *rt;
4846 int r;
613b411c 4847
e8a565cb
YW
4848 assert(m);
4849 assert(id);
4850 assert(ret);
4851
4852 rt = hashmap_get(m->exec_runtime_by_id, id);
4853 if (rt)
4854 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
4855 goto ref;
4856
4857 if (!create)
4858 return 0;
4859
4860 /* If not found, then create a new object. */
4861 r = exec_runtime_make(m, c, id, &rt);
4862 if (r <= 0)
4863 /* When r == 0, it is not necessary to create ExecRuntime object. */
4864 return r;
613b411c 4865
e8a565cb
YW
4866ref:
4867 /* increment reference counter. */
4868 rt->n_ref++;
4869 *ret = rt;
4870 return 1;
4871}
613b411c 4872
e8a565cb
YW
4873ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
4874 if (!rt)
613b411c
LP
4875 return NULL;
4876
e8a565cb 4877 assert(rt->n_ref > 0);
613b411c 4878
e8a565cb
YW
4879 rt->n_ref--;
4880 if (rt->n_ref > 0)
f2341e0a
LP
4881 return NULL;
4882
e8a565cb 4883 return exec_runtime_free(rt, destroy);
613b411c
LP
4884}
4885
e8a565cb
YW
4886int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
4887 ExecRuntime *rt;
4888 Iterator i;
4889
4890 assert(m);
613b411c
LP
4891 assert(f);
4892 assert(fds);
4893
e8a565cb
YW
4894 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
4895 fprintf(f, "exec-runtime=%s", rt->id);
613b411c 4896
e8a565cb
YW
4897 if (rt->tmp_dir)
4898 fprintf(f, " tmp-dir=%s", rt->tmp_dir);
613b411c 4899
e8a565cb
YW
4900 if (rt->var_tmp_dir)
4901 fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
613b411c 4902
e8a565cb
YW
4903 if (rt->netns_storage_socket[0] >= 0) {
4904 int copy;
613b411c 4905
e8a565cb
YW
4906 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4907 if (copy < 0)
4908 return copy;
613b411c 4909
e8a565cb
YW
4910 fprintf(f, " netns-socket-0=%i", copy);
4911 }
613b411c 4912
e8a565cb
YW
4913 if (rt->netns_storage_socket[1] >= 0) {
4914 int copy;
613b411c 4915
e8a565cb
YW
4916 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4917 if (copy < 0)
4918 return copy;
613b411c 4919
e8a565cb
YW
4920 fprintf(f, " netns-socket-1=%i", copy);
4921 }
4922
4923 fputc('\n', f);
613b411c
LP
4924 }
4925
4926 return 0;
4927}
4928
e8a565cb
YW
4929int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
4930 _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
4931 ExecRuntime *rt;
613b411c
LP
4932 int r;
4933
e8a565cb
YW
4934 /* This is for the migration from old (v237 or earlier) deserialization text.
4935 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
4936 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
4937 * so or not from the serialized text, then we always creates a new object owned by this. */
4938
4939 assert(u);
613b411c
LP
4940 assert(key);
4941 assert(value);
4942
e8a565cb
YW
4943 /* Manager manages ExecRuntime objects by the unit id.
4944 * So, we omit the serialized text when the unit does not have id (yet?)... */
4945 if (isempty(u->id)) {
4946 log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
4947 return 0;
4948 }
613b411c 4949
e8a565cb
YW
4950 r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
4951 if (r < 0) {
4952 log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
4953 return 0;
4954 }
4955
4956 rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
4957 if (!rt) {
4958 r = exec_runtime_allocate(&rt_create);
613b411c 4959 if (r < 0)
f2341e0a 4960 return log_oom();
613b411c 4961
e8a565cb
YW
4962 rt_create->id = strdup(u->id);
4963 if (!rt_create->id)
4964 return log_oom();
4965
4966 rt = rt_create;
4967 }
4968
4969 if (streq(key, "tmp-dir")) {
4970 char *copy;
4971
613b411c
LP
4972 copy = strdup(value);
4973 if (!copy)
4974 return log_oom();
4975
e8a565cb 4976 free_and_replace(rt->tmp_dir, copy);
613b411c
LP
4977
4978 } else if (streq(key, "var-tmp-dir")) {
4979 char *copy;
4980
613b411c
LP
4981 copy = strdup(value);
4982 if (!copy)
4983 return log_oom();
4984
e8a565cb 4985 free_and_replace(rt->var_tmp_dir, copy);
613b411c
LP
4986
4987 } else if (streq(key, "netns-socket-0")) {
4988 int fd;
4989
e8a565cb 4990 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 4991 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 4992 return 0;
613b411c 4993 }
e8a565cb
YW
4994
4995 safe_close(rt->netns_storage_socket[0]);
4996 rt->netns_storage_socket[0] = fdset_remove(fds, fd);
4997
613b411c
LP
4998 } else if (streq(key, "netns-socket-1")) {
4999 int fd;
5000
e8a565cb 5001 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5002 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5003 return 0;
613b411c 5004 }
e8a565cb
YW
5005
5006 safe_close(rt->netns_storage_socket[1]);
5007 rt->netns_storage_socket[1] = fdset_remove(fds, fd);
613b411c
LP
5008 } else
5009 return 0;
5010
e8a565cb
YW
5011 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
5012 if (rt_create) {
5013 r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
5014 if (r < 0) {
3fe91079 5015 log_unit_debug_errno(u, r, "Failed to put runtime parameter to manager's storage: %m");
e8a565cb
YW
5016 return 0;
5017 }
613b411c 5018
e8a565cb 5019 rt_create->manager = u->manager;
613b411c 5020
e8a565cb
YW
5021 /* Avoid cleanup */
5022 rt_create = NULL;
5023 }
98b47d54 5024
e8a565cb
YW
5025 return 1;
5026}
613b411c 5027
e8a565cb
YW
5028void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
5029 char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
5030 int r, fd0 = -1, fd1 = -1;
5031 const char *p, *v = value;
5032 size_t n;
613b411c 5033
e8a565cb
YW
5034 assert(m);
5035 assert(value);
5036 assert(fds);
98b47d54 5037
e8a565cb
YW
5038 n = strcspn(v, " ");
5039 id = strndupa(v, n);
5040 if (v[n] != ' ')
5041 goto finalize;
5042 p = v + n + 1;
5043
5044 v = startswith(p, "tmp-dir=");
5045 if (v) {
5046 n = strcspn(v, " ");
5047 tmp_dir = strndupa(v, n);
5048 if (v[n] != ' ')
5049 goto finalize;
5050 p = v + n + 1;
5051 }
5052
5053 v = startswith(p, "var-tmp-dir=");
5054 if (v) {
5055 n = strcspn(v, " ");
5056 var_tmp_dir = strndupa(v, n);
5057 if (v[n] != ' ')
5058 goto finalize;
5059 p = v + n + 1;
5060 }
5061
5062 v = startswith(p, "netns-socket-0=");
5063 if (v) {
5064 char *buf;
5065
5066 n = strcspn(v, " ");
5067 buf = strndupa(v, n);
5068 if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
5069 log_debug("Unable to process exec-runtime netns fd specification.");
5070 return;
98b47d54 5071 }
e8a565cb
YW
5072 fd0 = fdset_remove(fds, fd0);
5073 if (v[n] != ' ')
5074 goto finalize;
5075 p = v + n + 1;
613b411c
LP
5076 }
5077
e8a565cb
YW
5078 v = startswith(p, "netns-socket-1=");
5079 if (v) {
5080 char *buf;
98b47d54 5081
e8a565cb
YW
5082 n = strcspn(v, " ");
5083 buf = strndupa(v, n);
5084 if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
5085 log_debug("Unable to process exec-runtime netns fd specification.");
5086 return;
98b47d54 5087 }
e8a565cb
YW
5088 fd1 = fdset_remove(fds, fd1);
5089 }
98b47d54 5090
e8a565cb
YW
5091finalize:
5092
5093 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
7d853ca6 5094 if (r < 0)
e8a565cb 5095 log_debug_errno(r, "Failed to add exec-runtime: %m");
e8a565cb 5096}
613b411c 5097
e8a565cb
YW
5098void exec_runtime_vacuum(Manager *m) {
5099 ExecRuntime *rt;
5100 Iterator i;
5101
5102 assert(m);
5103
5104 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
5105
5106 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5107 if (rt->n_ref > 0)
5108 continue;
5109
5110 (void) exec_runtime_free(rt, false);
5111 }
613b411c
LP
5112}
5113
80876c20
LP
5114static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
5115 [EXEC_INPUT_NULL] = "null",
5116 [EXEC_INPUT_TTY] = "tty",
5117 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 5118 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
5119 [EXEC_INPUT_SOCKET] = "socket",
5120 [EXEC_INPUT_NAMED_FD] = "fd",
08f3be7a 5121 [EXEC_INPUT_DATA] = "data",
2038c3f5 5122 [EXEC_INPUT_FILE] = "file",
80876c20
LP
5123};
5124
8a0867d6
LP
5125DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
5126
94f04347 5127static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 5128 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 5129 [EXEC_OUTPUT_NULL] = "null",
80876c20 5130 [EXEC_OUTPUT_TTY] = "tty",
94f04347 5131 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 5132 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 5133 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 5134 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
5135 [EXEC_OUTPUT_JOURNAL] = "journal",
5136 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
5137 [EXEC_OUTPUT_SOCKET] = "socket",
5138 [EXEC_OUTPUT_NAMED_FD] = "fd",
2038c3f5 5139 [EXEC_OUTPUT_FILE] = "file",
566b7d23 5140 [EXEC_OUTPUT_FILE_APPEND] = "append",
94f04347
LP
5141};
5142
5143DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
5144
5145static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
5146 [EXEC_UTMP_INIT] = "init",
5147 [EXEC_UTMP_LOGIN] = "login",
5148 [EXEC_UTMP_USER] = "user",
5149};
5150
5151DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
5152
5153static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
5154 [EXEC_PRESERVE_NO] = "no",
5155 [EXEC_PRESERVE_YES] = "yes",
5156 [EXEC_PRESERVE_RESTART] = "restart",
5157};
5158
5159DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 5160
72fd1768 5161static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
5162 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
5163 [EXEC_DIRECTORY_STATE] = "StateDirectory",
5164 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
5165 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
5166 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
5167};
5168
5169DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445 5170
fb2042dd
YW
5171static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5172 [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
5173 [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
5174 [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
5175 [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
5176 [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
5177};
5178
5179DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType);
5180
b1edf445
LP
5181static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
5182 [EXEC_KEYRING_INHERIT] = "inherit",
5183 [EXEC_KEYRING_PRIVATE] = "private",
5184 [EXEC_KEYRING_SHARED] = "shared",
5185};
5186
5187DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);