]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
Merge pull request #10919 from yuwata/sd-device-monitor-fixes
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
a7334b09 2
034c6ed7
LP
3#include <errno.h>
4#include <fcntl.h>
8dd4c05b
LP
5#include <glob.h>
6#include <grp.h>
7#include <poll.h>
309bff19 8#include <signal.h>
8dd4c05b 9#include <string.h>
19c0b0b9 10#include <sys/capability.h>
d251207d 11#include <sys/eventfd.h>
f3e43635 12#include <sys/mman.h>
8dd4c05b 13#include <sys/personality.h>
94f04347 14#include <sys/prctl.h>
d2ffa389 15#include <sys/shm.h>
8dd4c05b 16#include <sys/socket.h>
451a074f 17#include <sys/stat.h>
d2ffa389 18#include <sys/types.h>
8dd4c05b
LP
19#include <sys/un.h>
20#include <unistd.h>
023a4f67 21#include <utmpx.h>
5cb5a6ff 22
349cc4a5 23#if HAVE_PAM
5b6319dc
LP
24#include <security/pam_appl.h>
25#endif
26
349cc4a5 27#if HAVE_SELINUX
7b52a628
MS
28#include <selinux/selinux.h>
29#endif
30
349cc4a5 31#if HAVE_SECCOMP
17df7223
LP
32#include <seccomp.h>
33#endif
34
349cc4a5 35#if HAVE_APPARMOR
eef65bf3
MS
36#include <sys/apparmor.h>
37#endif
38
24882e06 39#include "sd-messages.h"
8dd4c05b
LP
40
41#include "af-list.h"
b5efdb8a 42#include "alloc-util.h"
349cc4a5 43#if HAVE_APPARMOR
3ffd4af2
LP
44#include "apparmor-util.h"
45#endif
8dd4c05b
LP
46#include "async.h"
47#include "barrier.h"
8dd4c05b 48#include "cap-list.h"
430f0182 49#include "capability-util.h"
a1164ae3 50#include "chown-recursive.h"
da681e1b 51#include "cpu-set-util.h"
f6a6225e 52#include "def.h"
4d1a6904 53#include "env-util.h"
17df7223 54#include "errno-list.h"
3ffd4af2 55#include "execute.h"
8dd4c05b 56#include "exit-status.h"
3ffd4af2 57#include "fd-util.h"
8dd4c05b 58#include "fileio.h"
f97b34a6 59#include "format-util.h"
f4f15635 60#include "fs-util.h"
7d50b32a 61#include "glob-util.h"
c004493c 62#include "io-util.h"
8dd4c05b 63#include "ioprio.h"
a1164ae3 64#include "label.h"
8dd4c05b
LP
65#include "log.h"
66#include "macro.h"
e8a565cb 67#include "manager.h"
8dd4c05b
LP
68#include "missing.h"
69#include "mkdir.h"
70#include "namespace.h"
6bedfcbb 71#include "parse-util.h"
8dd4c05b 72#include "path-util.h"
0b452006 73#include "process-util.h"
78f22b97 74#include "rlimit-util.h"
8dd4c05b 75#include "rm-rf.h"
349cc4a5 76#if HAVE_SECCOMP
3ffd4af2
LP
77#include "seccomp-util.h"
78#endif
8dd4c05b 79#include "securebits.h"
07d46372 80#include "securebits-util.h"
8dd4c05b 81#include "selinux-util.h"
24882e06 82#include "signal-util.h"
8dd4c05b 83#include "smack-util.h"
57b7a260 84#include "socket-util.h"
fd63e712 85#include "special.h"
949befd3 86#include "stat-util.h"
8b43440b 87#include "string-table.h"
07630cea 88#include "string-util.h"
8dd4c05b 89#include "strv.h"
7ccbd1ae 90#include "syslog-util.h"
8dd4c05b 91#include "terminal-util.h"
566b7d23 92#include "umask-util.h"
8dd4c05b 93#include "unit.h"
b1d4f8e1 94#include "user-util.h"
8dd4c05b
LP
95#include "util.h"
96#include "utmp-wtmp.h"
5cb5a6ff 97
e056b01d 98#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 99#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 100
02a51aba
LP
101/* This assumes there is a 'tty' group */
102#define TTY_MODE 0620
103
531dca78
LP
104#define SNDBUF_SIZE (8*1024*1024)
105
da6053d0 106static int shift_fds(int fds[], size_t n_fds) {
034c6ed7
LP
107 int start, restart_from;
108
109 if (n_fds <= 0)
110 return 0;
111
a0d40ac5
LP
112 /* Modifies the fds array! (sorts it) */
113
034c6ed7
LP
114 assert(fds);
115
116 start = 0;
117 for (;;) {
118 int i;
119
120 restart_from = -1;
121
122 for (i = start; i < (int) n_fds; i++) {
123 int nfd;
124
125 /* Already at right index? */
126 if (fds[i] == i+3)
127 continue;
128
3cc2aff1
LP
129 nfd = fcntl(fds[i], F_DUPFD, i + 3);
130 if (nfd < 0)
034c6ed7
LP
131 return -errno;
132
03e334a1 133 safe_close(fds[i]);
034c6ed7
LP
134 fds[i] = nfd;
135
136 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 137 * let's remember that and try again from here */
034c6ed7
LP
138 if (nfd != i+3 && restart_from < 0)
139 restart_from = i;
140 }
141
142 if (restart_from < 0)
143 break;
144
145 start = restart_from;
146 }
147
148 return 0;
149}
150
25b583d7 151static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
da6053d0 152 size_t i, n_fds;
e2c76839 153 int r;
47a71eed 154
25b583d7 155 n_fds = n_socket_fds + n_storage_fds;
47a71eed
LP
156 if (n_fds <= 0)
157 return 0;
158
159 assert(fds);
160
9b141911
FB
161 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
162 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
163
164 for (i = 0; i < n_fds; i++) {
47a71eed 165
9b141911
FB
166 if (i < n_socket_fds) {
167 r = fd_nonblock(fds[i], nonblock);
168 if (r < 0)
169 return r;
170 }
47a71eed 171
451a074f
LP
172 /* We unconditionally drop FD_CLOEXEC from the fds,
173 * since after all we want to pass these fds to our
174 * children */
47a71eed 175
3cc2aff1
LP
176 r = fd_cloexec(fds[i], false);
177 if (r < 0)
e2c76839 178 return r;
47a71eed
LP
179 }
180
181 return 0;
182}
183
1e22b5cd 184static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
185 assert(context);
186
1e22b5cd
LP
187 if (context->stdio_as_fds)
188 return NULL;
189
80876c20
LP
190 if (context->tty_path)
191 return context->tty_path;
192
193 return "/dev/console";
194}
195
1e22b5cd
LP
196static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
197 const char *path;
198
6ea832a2
LP
199 assert(context);
200
1e22b5cd 201 path = exec_context_tty_path(context);
6ea832a2 202
1e22b5cd
LP
203 if (context->tty_vhangup) {
204 if (p && p->stdin_fd >= 0)
205 (void) terminal_vhangup_fd(p->stdin_fd);
206 else if (path)
207 (void) terminal_vhangup(path);
208 }
6ea832a2 209
1e22b5cd
LP
210 if (context->tty_reset) {
211 if (p && p->stdin_fd >= 0)
212 (void) reset_terminal_fd(p->stdin_fd, true);
213 else if (path)
214 (void) reset_terminal(path);
215 }
216
217 if (context->tty_vt_disallocate && path)
218 (void) vt_disallocate(path);
6ea832a2
LP
219}
220
6af760f3
LP
221static bool is_terminal_input(ExecInput i) {
222 return IN_SET(i,
223 EXEC_INPUT_TTY,
224 EXEC_INPUT_TTY_FORCE,
225 EXEC_INPUT_TTY_FAIL);
226}
227
3a1286b6 228static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
229 return IN_SET(o,
230 EXEC_OUTPUT_TTY,
231 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
232 EXEC_OUTPUT_KMSG_AND_CONSOLE,
233 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
234}
235
aac8c0c3
LP
236static bool is_syslog_output(ExecOutput o) {
237 return IN_SET(o,
238 EXEC_OUTPUT_SYSLOG,
239 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
240}
241
242static bool is_kmsg_output(ExecOutput o) {
243 return IN_SET(o,
244 EXEC_OUTPUT_KMSG,
245 EXEC_OUTPUT_KMSG_AND_CONSOLE);
246}
247
6af760f3
LP
248static bool exec_context_needs_term(const ExecContext *c) {
249 assert(c);
250
251 /* Return true if the execution context suggests we should set $TERM to something useful. */
252
253 if (is_terminal_input(c->std_input))
254 return true;
255
256 if (is_terminal_output(c->std_output))
257 return true;
258
259 if (is_terminal_output(c->std_error))
260 return true;
261
262 return !!c->tty_path;
3a1286b6
MS
263}
264
80876c20 265static int open_null_as(int flags, int nfd) {
046a82c1 266 int fd;
071830ff 267
80876c20 268 assert(nfd >= 0);
071830ff 269
613b411c
LP
270 fd = open("/dev/null", flags|O_NOCTTY);
271 if (fd < 0)
071830ff
LP
272 return -errno;
273
046a82c1 274 return move_fd(fd, nfd, false);
071830ff
LP
275}
276
524daa8c 277static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 278 static const union sockaddr_union sa = {
b92bea5d
ZJS
279 .un.sun_family = AF_UNIX,
280 .un.sun_path = "/run/systemd/journal/stdout",
281 };
524daa8c
ZJS
282 uid_t olduid = UID_INVALID;
283 gid_t oldgid = GID_INVALID;
284 int r;
285
cad93f29 286 if (gid_is_valid(gid)) {
524daa8c
ZJS
287 oldgid = getgid();
288
92a17af9 289 if (setegid(gid) < 0)
524daa8c
ZJS
290 return -errno;
291 }
292
cad93f29 293 if (uid_is_valid(uid)) {
524daa8c
ZJS
294 olduid = getuid();
295
92a17af9 296 if (seteuid(uid) < 0) {
524daa8c
ZJS
297 r = -errno;
298 goto restore_gid;
299 }
300 }
301
92a17af9 302 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
303
304 /* If we fail to restore the uid or gid, things will likely
305 fail later on. This should only happen if an LSM interferes. */
306
cad93f29 307 if (uid_is_valid(uid))
524daa8c
ZJS
308 (void) seteuid(olduid);
309
310 restore_gid:
cad93f29 311 if (gid_is_valid(gid))
524daa8c
ZJS
312 (void) setegid(oldgid);
313
314 return r;
315}
316
fd1f9c89 317static int connect_logger_as(
34cf6c43 318 const Unit *unit,
fd1f9c89 319 const ExecContext *context,
af635cf3 320 const ExecParameters *params,
fd1f9c89
LP
321 ExecOutput output,
322 const char *ident,
fd1f9c89
LP
323 int nfd,
324 uid_t uid,
325 gid_t gid) {
326
2ac1ff68
EV
327 _cleanup_close_ int fd = -1;
328 int r;
071830ff
LP
329
330 assert(context);
af635cf3 331 assert(params);
80876c20
LP
332 assert(output < _EXEC_OUTPUT_MAX);
333 assert(ident);
334 assert(nfd >= 0);
071830ff 335
54fe0cdb
LP
336 fd = socket(AF_UNIX, SOCK_STREAM, 0);
337 if (fd < 0)
80876c20 338 return -errno;
071830ff 339
524daa8c
ZJS
340 r = connect_journal_socket(fd, uid, gid);
341 if (r < 0)
342 return r;
071830ff 343
2ac1ff68 344 if (shutdown(fd, SHUT_RD) < 0)
80876c20 345 return -errno;
071830ff 346
fd1f9c89 347 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 348
2ac1ff68 349 if (dprintf(fd,
62bca2c6 350 "%s\n"
80876c20
LP
351 "%s\n"
352 "%i\n"
54fe0cdb
LP
353 "%i\n"
354 "%i\n"
355 "%i\n"
4f4a1dbf 356 "%i\n",
c867611e 357 context->syslog_identifier ?: ident,
af635cf3 358 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
359 context->syslog_priority,
360 !!context->syslog_level_prefix,
aac8c0c3
LP
361 is_syslog_output(output),
362 is_kmsg_output(output),
2ac1ff68
EV
363 is_terminal_output(output)) < 0)
364 return -errno;
80876c20 365
2ac1ff68 366 return move_fd(TAKE_FD(fd), nfd, false);
80876c20 367}
2ac1ff68 368
3a274a21 369static int open_terminal_as(const char *path, int flags, int nfd) {
046a82c1 370 int fd;
071830ff 371
80876c20
LP
372 assert(path);
373 assert(nfd >= 0);
fd1f9c89 374
3a274a21 375 fd = open_terminal(path, flags | O_NOCTTY);
3cc2aff1 376 if (fd < 0)
80876c20 377 return fd;
071830ff 378
046a82c1 379 return move_fd(fd, nfd, false);
80876c20 380}
071830ff 381
2038c3f5 382static int acquire_path(const char *path, int flags, mode_t mode) {
15a3e96f
LP
383 union sockaddr_union sa = {};
384 _cleanup_close_ int fd = -1;
385 int r, salen;
071830ff 386
80876c20 387 assert(path);
071830ff 388
2038c3f5
LP
389 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
390 flags |= O_CREAT;
391
392 fd = open(path, flags|O_NOCTTY, mode);
393 if (fd >= 0)
15a3e96f 394 return TAKE_FD(fd);
071830ff 395
2038c3f5
LP
396 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
397 return -errno;
15a3e96f 398 if (strlen(path) >= sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
2038c3f5
LP
399 return -ENXIO;
400
401 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
402
403 fd = socket(AF_UNIX, SOCK_STREAM, 0);
404 if (fd < 0)
405 return -errno;
406
15a3e96f
LP
407 salen = sockaddr_un_set_path(&sa.un, path);
408 if (salen < 0)
409 return salen;
410
411 if (connect(fd, &sa.sa, salen) < 0)
2038c3f5
LP
412 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
413 * indication that his wasn't an AF_UNIX socket after all */
071830ff 414
2038c3f5
LP
415 if ((flags & O_ACCMODE) == O_RDONLY)
416 r = shutdown(fd, SHUT_WR);
417 else if ((flags & O_ACCMODE) == O_WRONLY)
418 r = shutdown(fd, SHUT_RD);
419 else
15a3e96f
LP
420 return TAKE_FD(fd);
421 if (r < 0)
2038c3f5 422 return -errno;
2038c3f5 423
15a3e96f 424 return TAKE_FD(fd);
80876c20 425}
071830ff 426
08f3be7a
LP
427static int fixup_input(
428 const ExecContext *context,
429 int socket_fd,
430 bool apply_tty_stdin) {
431
432 ExecInput std_input;
433
434 assert(context);
435
436 std_input = context->std_input;
1e3ad081
LP
437
438 if (is_terminal_input(std_input) && !apply_tty_stdin)
439 return EXEC_INPUT_NULL;
071830ff 440
03fd9c49 441 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
442 return EXEC_INPUT_NULL;
443
08f3be7a
LP
444 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
445 return EXEC_INPUT_NULL;
446
03fd9c49 447 return std_input;
4f2d528d
LP
448}
449
03fd9c49 450static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 451
03fd9c49 452 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
453 return EXEC_OUTPUT_INHERIT;
454
03fd9c49 455 return std_output;
4f2d528d
LP
456}
457
a34ceba6
LP
458static int setup_input(
459 const ExecContext *context,
460 const ExecParameters *params,
52c239d7
LB
461 int socket_fd,
462 int named_iofds[3]) {
a34ceba6 463
4f2d528d
LP
464 ExecInput i;
465
466 assert(context);
a34ceba6
LP
467 assert(params);
468
469 if (params->stdin_fd >= 0) {
470 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
471 return -errno;
472
473 /* Try to make this the controlling tty, if it is a tty, and reset it */
1fb0682e
LP
474 if (isatty(STDIN_FILENO)) {
475 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
476 (void) reset_terminal_fd(STDIN_FILENO, true);
477 }
a34ceba6
LP
478
479 return STDIN_FILENO;
480 }
4f2d528d 481
08f3be7a 482 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
483
484 switch (i) {
071830ff 485
80876c20
LP
486 case EXEC_INPUT_NULL:
487 return open_null_as(O_RDONLY, STDIN_FILENO);
488
489 case EXEC_INPUT_TTY:
490 case EXEC_INPUT_TTY_FORCE:
491 case EXEC_INPUT_TTY_FAIL: {
046a82c1 492 int fd;
071830ff 493
1e22b5cd 494 fd = acquire_terminal(exec_context_tty_path(context),
8854d795
LP
495 i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
496 i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
497 ACQUIRE_TERMINAL_WAIT,
3a43da28 498 USEC_INFINITY);
970edce6 499 if (fd < 0)
80876c20
LP
500 return fd;
501
046a82c1 502 return move_fd(fd, STDIN_FILENO, false);
80876c20
LP
503 }
504
4f2d528d 505 case EXEC_INPUT_SOCKET:
e75a9ed1
LP
506 assert(socket_fd >= 0);
507
4f2d528d
LP
508 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
509
52c239d7 510 case EXEC_INPUT_NAMED_FD:
e75a9ed1
LP
511 assert(named_iofds[STDIN_FILENO] >= 0);
512
52c239d7
LB
513 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
514 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
515
08f3be7a
LP
516 case EXEC_INPUT_DATA: {
517 int fd;
518
519 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
520 if (fd < 0)
521 return fd;
522
523 return move_fd(fd, STDIN_FILENO, false);
524 }
525
2038c3f5
LP
526 case EXEC_INPUT_FILE: {
527 bool rw;
528 int fd;
529
530 assert(context->stdio_file[STDIN_FILENO]);
531
532 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
533 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
534
535 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
536 if (fd < 0)
537 return fd;
538
539 return move_fd(fd, STDIN_FILENO, false);
540 }
541
80876c20
LP
542 default:
543 assert_not_reached("Unknown input type");
544 }
545}
546
a34ceba6 547static int setup_output(
34cf6c43 548 const Unit *unit,
a34ceba6
LP
549 const ExecContext *context,
550 const ExecParameters *params,
551 int fileno,
552 int socket_fd,
52c239d7 553 int named_iofds[3],
a34ceba6 554 const char *ident,
7bce046b
LP
555 uid_t uid,
556 gid_t gid,
557 dev_t *journal_stream_dev,
558 ino_t *journal_stream_ino) {
a34ceba6 559
4f2d528d
LP
560 ExecOutput o;
561 ExecInput i;
47c1d80d 562 int r;
4f2d528d 563
f2341e0a 564 assert(unit);
80876c20 565 assert(context);
a34ceba6 566 assert(params);
80876c20 567 assert(ident);
7bce046b
LP
568 assert(journal_stream_dev);
569 assert(journal_stream_ino);
80876c20 570
a34ceba6
LP
571 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
572
573 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
574 return -errno;
575
576 return STDOUT_FILENO;
577 }
578
579 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
580 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
581 return -errno;
582
583 return STDERR_FILENO;
584 }
585
08f3be7a 586 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 587 o = fixup_output(context->std_output, socket_fd);
4f2d528d 588
eb17e935
MS
589 if (fileno == STDERR_FILENO) {
590 ExecOutput e;
591 e = fixup_output(context->std_error, socket_fd);
80876c20 592
eb17e935
MS
593 /* This expects the input and output are already set up */
594
595 /* Don't change the stderr file descriptor if we inherit all
596 * the way and are not on a tty */
597 if (e == EXEC_OUTPUT_INHERIT &&
598 o == EXEC_OUTPUT_INHERIT &&
599 i == EXEC_INPUT_NULL &&
600 !is_terminal_input(context->std_input) &&
601 getppid () != 1)
602 return fileno;
603
604 /* Duplicate from stdout if possible */
52c239d7 605 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
eb17e935 606 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 607
eb17e935 608 o = e;
80876c20 609
eb17e935 610 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
611 /* If input got downgraded, inherit the original value */
612 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 613 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 614
08f3be7a
LP
615 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
616 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
eb17e935 617 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 618
acb591e4
LP
619 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
620 if (getppid() != 1)
eb17e935 621 return fileno;
94f04347 622
eb17e935
MS
623 /* We need to open /dev/null here anew, to get the right access mode. */
624 return open_null_as(O_WRONLY, fileno);
071830ff 625 }
94f04347 626
eb17e935 627 switch (o) {
80876c20
LP
628
629 case EXEC_OUTPUT_NULL:
eb17e935 630 return open_null_as(O_WRONLY, fileno);
80876c20
LP
631
632 case EXEC_OUTPUT_TTY:
4f2d528d 633 if (is_terminal_input(i))
eb17e935 634 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
635
636 /* We don't reset the terminal if this is just about output */
1e22b5cd 637 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
638
639 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 640 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 641 case EXEC_OUTPUT_KMSG:
28dbc1e8 642 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
643 case EXEC_OUTPUT_JOURNAL:
644 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 645 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 646 if (r < 0) {
82677ae4 647 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 648 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
649 } else {
650 struct stat st;
651
652 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
653 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
654 * services to detect whether they are connected to the journal or not.
655 *
656 * If both stdout and stderr are connected to a stream then let's make sure to store the data
657 * about STDERR as that's usually the best way to do logging. */
7bce046b 658
ab2116b1
LP
659 if (fstat(fileno, &st) >= 0 &&
660 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
661 *journal_stream_dev = st.st_dev;
662 *journal_stream_ino = st.st_ino;
663 }
47c1d80d
MS
664 }
665 return r;
4f2d528d
LP
666
667 case EXEC_OUTPUT_SOCKET:
668 assert(socket_fd >= 0);
e75a9ed1 669
eb17e935 670 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 671
52c239d7 672 case EXEC_OUTPUT_NAMED_FD:
e75a9ed1
LP
673 assert(named_iofds[fileno] >= 0);
674
52c239d7
LB
675 (void) fd_nonblock(named_iofds[fileno], false);
676 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
677
566b7d23
ZD
678 case EXEC_OUTPUT_FILE:
679 case EXEC_OUTPUT_FILE_APPEND: {
2038c3f5 680 bool rw;
566b7d23 681 int fd, flags;
2038c3f5
LP
682
683 assert(context->stdio_file[fileno]);
684
685 rw = context->std_input == EXEC_INPUT_FILE &&
686 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
687
688 if (rw)
689 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
690
566b7d23
ZD
691 flags = O_WRONLY;
692 if (o == EXEC_OUTPUT_FILE_APPEND)
693 flags |= O_APPEND;
694
695 fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
696
2038c3f5
LP
697 if (fd < 0)
698 return fd;
699
566b7d23 700 return move_fd(fd, fileno, 0);
2038c3f5
LP
701 }
702
94f04347 703 default:
80876c20 704 assert_not_reached("Unknown error type");
94f04347 705 }
071830ff
LP
706}
707
02a51aba
LP
708static int chown_terminal(int fd, uid_t uid) {
709 struct stat st;
710
711 assert(fd >= 0);
02a51aba 712
1ff74fb6
LP
713 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
714 if (isatty(fd) < 1)
715 return 0;
716
02a51aba 717 /* This might fail. What matters are the results. */
bab45044
LP
718 (void) fchown(fd, uid, -1);
719 (void) fchmod(fd, TTY_MODE);
02a51aba
LP
720
721 if (fstat(fd, &st) < 0)
722 return -errno;
723
d8b4e2e9 724 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
02a51aba
LP
725 return -EPERM;
726
727 return 0;
728}
729
7d5ceb64 730static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
731 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
732 int r;
80876c20 733
80876c20
LP
734 assert(_saved_stdin);
735 assert(_saved_stdout);
736
af6da548
LP
737 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
738 if (saved_stdin < 0)
739 return -errno;
80876c20 740
af6da548 741 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
742 if (saved_stdout < 0)
743 return -errno;
80876c20 744
8854d795 745 fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
3d18b167
LP
746 if (fd < 0)
747 return fd;
80876c20 748
af6da548
LP
749 r = chown_terminal(fd, getuid());
750 if (r < 0)
3d18b167 751 return r;
02a51aba 752
3d18b167
LP
753 r = reset_terminal_fd(fd, true);
754 if (r < 0)
755 return r;
80876c20 756
2b33ab09 757 r = rearrange_stdio(fd, fd, STDERR_FILENO);
3d18b167 758 fd = -1;
2b33ab09
LP
759 if (r < 0)
760 return r;
80876c20
LP
761
762 *_saved_stdin = saved_stdin;
763 *_saved_stdout = saved_stdout;
764
3d18b167 765 saved_stdin = saved_stdout = -1;
80876c20 766
3d18b167 767 return 0;
80876c20
LP
768}
769
63d77c92 770static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
771 assert(err < 0);
772
773 if (err == -ETIMEDOUT)
63d77c92 774 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
775 else {
776 errno = -err;
63d77c92 777 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
778 }
779}
780
63d77c92 781static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 782 _cleanup_close_ int fd = -1;
80876c20 783
3b20f877 784 assert(vc);
80876c20 785
7d5ceb64 786 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 787 if (fd < 0)
3b20f877 788 return;
80876c20 789
63d77c92 790 write_confirm_error_fd(err, fd, u);
af6da548 791}
80876c20 792
3d18b167 793static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 794 int r = 0;
80876c20 795
af6da548
LP
796 assert(saved_stdin);
797 assert(saved_stdout);
798
799 release_terminal();
800
801 if (*saved_stdin >= 0)
80876c20 802 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 803 r = -errno;
80876c20 804
af6da548 805 if (*saved_stdout >= 0)
80876c20 806 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 807 r = -errno;
80876c20 808
3d18b167
LP
809 *saved_stdin = safe_close(*saved_stdin);
810 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
811
812 return r;
813}
814
3b20f877
FB
815enum {
816 CONFIRM_PRETEND_FAILURE = -1,
817 CONFIRM_PRETEND_SUCCESS = 0,
818 CONFIRM_EXECUTE = 1,
819};
820
eedf223a 821static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 822 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 823 _cleanup_free_ char *e = NULL;
3b20f877 824 char c;
af6da548 825
3b20f877 826 /* For any internal errors, assume a positive response. */
7d5ceb64 827 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 828 if (r < 0) {
63d77c92 829 write_confirm_error(r, vc, u);
3b20f877
FB
830 return CONFIRM_EXECUTE;
831 }
af6da548 832
b0eb2944
FB
833 /* confirm_spawn might have been disabled while we were sleeping. */
834 if (manager_is_confirm_spawn_disabled(u->manager)) {
835 r = 1;
836 goto restore_stdio;
837 }
af6da548 838
2bcd3c26
FB
839 e = ellipsize(cmdline, 60, 100);
840 if (!e) {
841 log_oom();
842 r = CONFIRM_EXECUTE;
843 goto restore_stdio;
844 }
af6da548 845
d172b175 846 for (;;) {
539622bd 847 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 848 if (r < 0) {
63d77c92 849 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
850 r = CONFIRM_EXECUTE;
851 goto restore_stdio;
852 }
af6da548 853
d172b175 854 switch (c) {
b0eb2944
FB
855 case 'c':
856 printf("Resuming normal execution.\n");
857 manager_disable_confirm_spawn();
858 r = 1;
859 break;
dd6f9ac0
FB
860 case 'D':
861 unit_dump(u, stdout, " ");
862 continue; /* ask again */
d172b175
FB
863 case 'f':
864 printf("Failing execution.\n");
865 r = CONFIRM_PRETEND_FAILURE;
866 break;
867 case 'h':
b0eb2944
FB
868 printf(" c - continue, proceed without asking anymore\n"
869 " D - dump, show the state of the unit\n"
dd6f9ac0 870 " f - fail, don't execute the command and pretend it failed\n"
d172b175 871 " h - help\n"
eedf223a 872 " i - info, show a short summary of the unit\n"
56fde33a 873 " j - jobs, show jobs that are in progress\n"
d172b175
FB
874 " s - skip, don't execute the command and pretend it succeeded\n"
875 " y - yes, execute the command\n");
dd6f9ac0 876 continue; /* ask again */
eedf223a
FB
877 case 'i':
878 printf(" Description: %s\n"
879 " Unit: %s\n"
880 " Command: %s\n",
881 u->id, u->description, cmdline);
882 continue; /* ask again */
56fde33a
FB
883 case 'j':
884 manager_dump_jobs(u->manager, stdout, " ");
885 continue; /* ask again */
539622bd
FB
886 case 'n':
887 /* 'n' was removed in favor of 'f'. */
888 printf("Didn't understand 'n', did you mean 'f'?\n");
889 continue; /* ask again */
d172b175
FB
890 case 's':
891 printf("Skipping execution.\n");
892 r = CONFIRM_PRETEND_SUCCESS;
893 break;
894 case 'y':
895 r = CONFIRM_EXECUTE;
896 break;
897 default:
898 assert_not_reached("Unhandled choice");
899 }
3b20f877 900 break;
3b20f877 901 }
af6da548 902
3b20f877 903restore_stdio:
af6da548 904 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 905 return r;
80876c20
LP
906}
907
4d885bd3
DH
908static int get_fixed_user(const ExecContext *c, const char **user,
909 uid_t *uid, gid_t *gid,
910 const char **home, const char **shell) {
81a2b7ce 911 int r;
4d885bd3 912 const char *name;
81a2b7ce 913
4d885bd3 914 assert(c);
81a2b7ce 915
23deef88
LP
916 if (!c->user)
917 return 0;
918
4d885bd3
DH
919 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
920 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 921
23deef88 922 name = c->user;
fafff8f1 923 r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN);
4d885bd3
DH
924 if (r < 0)
925 return r;
81a2b7ce 926
4d885bd3
DH
927 *user = name;
928 return 0;
929}
930
931static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
932 int r;
933 const char *name;
934
935 assert(c);
936
937 if (!c->group)
938 return 0;
939
940 name = c->group;
fafff8f1 941 r = get_group_creds(&name, gid, 0);
4d885bd3
DH
942 if (r < 0)
943 return r;
944
945 *group = name;
946 return 0;
947}
948
cdc5d5c5
DH
949static int get_supplementary_groups(const ExecContext *c, const char *user,
950 const char *group, gid_t gid,
951 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
952 char **i;
953 int r, k = 0;
954 int ngroups_max;
955 bool keep_groups = false;
956 gid_t *groups = NULL;
957 _cleanup_free_ gid_t *l_gids = NULL;
958
959 assert(c);
960
bbeea271
DH
961 /*
962 * If user is given, then lookup GID and supplementary groups list.
963 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
964 * here and as early as possible so we keep the list of supplementary
965 * groups of the caller.
bbeea271
DH
966 */
967 if (user && gid_is_valid(gid) && gid != 0) {
968 /* First step, initialize groups from /etc/groups */
969 if (initgroups(user, gid) < 0)
970 return -errno;
971
972 keep_groups = true;
973 }
974
ac6e8be6 975 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
976 return 0;
977
366ddd25
DH
978 /*
979 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
980 * be positive, otherwise fail.
981 */
982 errno = 0;
983 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
984 if (ngroups_max <= 0) {
985 if (errno > 0)
986 return -errno;
987 else
988 return -EOPNOTSUPP; /* For all other values */
989 }
990
4d885bd3
DH
991 l_gids = new(gid_t, ngroups_max);
992 if (!l_gids)
993 return -ENOMEM;
81a2b7ce 994
4d885bd3
DH
995 if (keep_groups) {
996 /*
997 * Lookup the list of groups that the user belongs to, we
998 * avoid NSS lookups here too for gid=0.
999 */
1000 k = ngroups_max;
1001 if (getgrouplist(user, gid, l_gids, &k) < 0)
1002 return -EINVAL;
1003 } else
1004 k = 0;
81a2b7ce 1005
4d885bd3
DH
1006 STRV_FOREACH(i, c->supplementary_groups) {
1007 const char *g;
81a2b7ce 1008
4d885bd3
DH
1009 if (k >= ngroups_max)
1010 return -E2BIG;
81a2b7ce 1011
4d885bd3 1012 g = *i;
fafff8f1 1013 r = get_group_creds(&g, l_gids+k, 0);
4d885bd3
DH
1014 if (r < 0)
1015 return r;
81a2b7ce 1016
4d885bd3
DH
1017 k++;
1018 }
81a2b7ce 1019
4d885bd3
DH
1020 /*
1021 * Sets ngids to zero to drop all supplementary groups, happens
1022 * when we are under root and SupplementaryGroups= is empty.
1023 */
1024 if (k == 0) {
1025 *ngids = 0;
1026 return 0;
1027 }
81a2b7ce 1028
4d885bd3
DH
1029 /* Otherwise get the final list of supplementary groups */
1030 groups = memdup(l_gids, sizeof(gid_t) * k);
1031 if (!groups)
1032 return -ENOMEM;
1033
1034 *supplementary_gids = groups;
1035 *ngids = k;
1036
1037 groups = NULL;
1038
1039 return 0;
1040}
1041
34cf6c43 1042static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
4d885bd3
DH
1043 int r;
1044
709dbeac
YW
1045 /* Handle SupplementaryGroups= if it is not empty */
1046 if (ngids > 0) {
4d885bd3
DH
1047 r = maybe_setgroups(ngids, supplementary_gids);
1048 if (r < 0)
97f0e76f 1049 return r;
4d885bd3 1050 }
81a2b7ce 1051
4d885bd3
DH
1052 if (gid_is_valid(gid)) {
1053 /* Then set our gids */
1054 if (setresgid(gid, gid, gid) < 0)
1055 return -errno;
81a2b7ce
LP
1056 }
1057
1058 return 0;
1059}
1060
1061static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
1062 assert(context);
1063
4d885bd3
DH
1064 if (!uid_is_valid(uid))
1065 return 0;
1066
479050b3 1067 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
1068 * capabilities while doing so. */
1069
479050b3 1070 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
1071
1072 /* First step: If we need to keep capabilities but
1073 * drop privileges we need to make sure we keep our
cbb21cca 1074 * caps, while we drop privileges. */
693ced48 1075 if (uid != 0) {
cbb21cca 1076 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1077
1078 if (prctl(PR_GET_SECUREBITS) != sb)
1079 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1080 return -errno;
1081 }
81a2b7ce
LP
1082 }
1083
479050b3 1084 /* Second step: actually set the uids */
81a2b7ce
LP
1085 if (setresuid(uid, uid, uid) < 0)
1086 return -errno;
1087
1088 /* At this point we should have all necessary capabilities but
1089 are otherwise a normal user. However, the caps might got
1090 corrupted due to the setresuid() so we need clean them up
1091 later. This is done outside of this call. */
1092
1093 return 0;
1094}
1095
349cc4a5 1096#if HAVE_PAM
5b6319dc
LP
1097
1098static int null_conv(
1099 int num_msg,
1100 const struct pam_message **msg,
1101 struct pam_response **resp,
1102 void *appdata_ptr) {
1103
1104 /* We don't support conversations */
1105
1106 return PAM_CONV_ERR;
1107}
1108
cefc33ae
LP
1109#endif
1110
5b6319dc
LP
1111static int setup_pam(
1112 const char *name,
1113 const char *user,
940c5210 1114 uid_t uid,
2d6fce8d 1115 gid_t gid,
5b6319dc 1116 const char *tty,
2065ca69 1117 char ***env,
da6053d0 1118 int fds[], size_t n_fds) {
5b6319dc 1119
349cc4a5 1120#if HAVE_PAM
cefc33ae 1121
5b6319dc
LP
1122 static const struct pam_conv conv = {
1123 .conv = null_conv,
1124 .appdata_ptr = NULL
1125 };
1126
2d7c6aa2 1127 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1128 pam_handle_t *handle = NULL;
d6e5f3ad 1129 sigset_t old_ss;
7bb70b6e 1130 int pam_code = PAM_SUCCESS, r;
84eada2f 1131 char **nv, **e = NULL;
5b6319dc
LP
1132 bool close_session = false;
1133 pid_t pam_pid = 0, parent_pid;
970edce6 1134 int flags = 0;
5b6319dc
LP
1135
1136 assert(name);
1137 assert(user);
2065ca69 1138 assert(env);
5b6319dc
LP
1139
1140 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1141 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1142 * systemd via the cgroup logic. It will then remove the PAM
1143 * session again. The parent process will exec() the actual
1144 * daemon. We do things this way to ensure that the main PID
1145 * of the daemon is the one we initially fork()ed. */
1146
7bb70b6e
LP
1147 r = barrier_create(&barrier);
1148 if (r < 0)
2d7c6aa2
DH
1149 goto fail;
1150
553d2243 1151 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1152 flags |= PAM_SILENT;
1153
f546241b
ZJS
1154 pam_code = pam_start(name, user, &conv, &handle);
1155 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1156 handle = NULL;
1157 goto fail;
1158 }
1159
3cd24c1a
LP
1160 if (!tty) {
1161 _cleanup_free_ char *q = NULL;
1162
1163 /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
1164 * out if that's the case, and read the TTY off it. */
1165
1166 if (getttyname_malloc(STDIN_FILENO, &q) >= 0)
1167 tty = strjoina("/dev/", q);
1168 }
1169
f546241b
ZJS
1170 if (tty) {
1171 pam_code = pam_set_item(handle, PAM_TTY, tty);
1172 if (pam_code != PAM_SUCCESS)
5b6319dc 1173 goto fail;
f546241b 1174 }
5b6319dc 1175
84eada2f
JW
1176 STRV_FOREACH(nv, *env) {
1177 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1178 if (pam_code != PAM_SUCCESS)
1179 goto fail;
1180 }
1181
970edce6 1182 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1183 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1184 goto fail;
1185
970edce6 1186 pam_code = pam_open_session(handle, flags);
f546241b 1187 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1188 goto fail;
1189
1190 close_session = true;
1191
f546241b
ZJS
1192 e = pam_getenvlist(handle);
1193 if (!e) {
5b6319dc
LP
1194 pam_code = PAM_BUF_ERR;
1195 goto fail;
1196 }
1197
1198 /* Block SIGTERM, so that we know that it won't get lost in
1199 * the child */
ce30c8dc 1200
72c0a2c2 1201 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1202
df0ff127 1203 parent_pid = getpid_cached();
5b6319dc 1204
4c253ed1
LP
1205 r = safe_fork("(sd-pam)", 0, &pam_pid);
1206 if (r < 0)
5b6319dc 1207 goto fail;
4c253ed1 1208 if (r == 0) {
7bb70b6e 1209 int sig, ret = EXIT_PAM;
5b6319dc
LP
1210
1211 /* The child's job is to reset the PAM session on
1212 * termination */
2d7c6aa2 1213 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc 1214
4c253ed1
LP
1215 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1216 * are open here that have been opened by PAM. */
1217 (void) close_many(fds, n_fds);
5b6319dc 1218
940c5210
AK
1219 /* Drop privileges - we don't need any to pam_close_session
1220 * and this will make PR_SET_PDEATHSIG work in most cases.
1221 * If this fails, ignore the error - but expect sd-pam threads
1222 * to fail to exit normally */
2d6fce8d 1223
97f0e76f
LP
1224 r = maybe_setgroups(0, NULL);
1225 if (r < 0)
1226 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1227 if (setresgid(gid, gid, gid) < 0)
1228 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1229 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1230 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1231
ce30c8dc
LP
1232 (void) ignore_signals(SIGPIPE, -1);
1233
940c5210
AK
1234 /* Wait until our parent died. This will only work if
1235 * the above setresuid() succeeds, otherwise the kernel
1236 * will not allow unprivileged parents kill their privileged
1237 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1238 * to do the rest for us. */
1239 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1240 goto child_finish;
1241
2d7c6aa2
DH
1242 /* Tell the parent that our setup is done. This is especially
1243 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1244 * setup might race against our setresuid(2) call.
1245 *
1246 * If the parent aborted, we'll detect this below, hence ignore
1247 * return failure here. */
1248 (void) barrier_place(&barrier);
2d7c6aa2 1249
643f4706 1250 /* Check if our parent process might already have died? */
5b6319dc 1251 if (getppid() == parent_pid) {
d6e5f3ad
DM
1252 sigset_t ss;
1253
1254 assert_se(sigemptyset(&ss) >= 0);
1255 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1256
3dead8d9
LP
1257 for (;;) {
1258 if (sigwait(&ss, &sig) < 0) {
1259 if (errno == EINTR)
1260 continue;
1261
1262 goto child_finish;
1263 }
5b6319dc 1264
3dead8d9
LP
1265 assert(sig == SIGTERM);
1266 break;
1267 }
5b6319dc
LP
1268 }
1269
3dead8d9 1270 /* If our parent died we'll end the session */
f546241b 1271 if (getppid() != parent_pid) {
970edce6 1272 pam_code = pam_close_session(handle, flags);
f546241b 1273 if (pam_code != PAM_SUCCESS)
5b6319dc 1274 goto child_finish;
f546241b 1275 }
5b6319dc 1276
7bb70b6e 1277 ret = 0;
5b6319dc
LP
1278
1279 child_finish:
970edce6 1280 pam_end(handle, pam_code | flags);
7bb70b6e 1281 _exit(ret);
5b6319dc
LP
1282 }
1283
2d7c6aa2
DH
1284 barrier_set_role(&barrier, BARRIER_PARENT);
1285
5b6319dc
LP
1286 /* If the child was forked off successfully it will do all the
1287 * cleanups, so forget about the handle here. */
1288 handle = NULL;
1289
3b8bddde 1290 /* Unblock SIGTERM again in the parent */
72c0a2c2 1291 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1292
1293 /* We close the log explicitly here, since the PAM modules
1294 * might have opened it, but we don't want this fd around. */
1295 closelog();
1296
2d7c6aa2
DH
1297 /* Synchronously wait for the child to initialize. We don't care for
1298 * errors as we cannot recover. However, warn loudly if it happens. */
1299 if (!barrier_place_and_sync(&barrier))
1300 log_error("PAM initialization failed");
1301
130d3d22 1302 return strv_free_and_replace(*env, e);
5b6319dc
LP
1303
1304fail:
970edce6
ZJS
1305 if (pam_code != PAM_SUCCESS) {
1306 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1307 r = -EPERM; /* PAM errors do not map to errno */
1308 } else
1309 log_error_errno(r, "PAM failed: %m");
9ba35398 1310
5b6319dc
LP
1311 if (handle) {
1312 if (close_session)
970edce6 1313 pam_code = pam_close_session(handle, flags);
5b6319dc 1314
970edce6 1315 pam_end(handle, pam_code | flags);
5b6319dc
LP
1316 }
1317
1318 strv_free(e);
5b6319dc
LP
1319 closelog();
1320
7bb70b6e 1321 return r;
cefc33ae
LP
1322#else
1323 return 0;
5b6319dc 1324#endif
cefc33ae 1325}
5b6319dc 1326
5d6b1584
LP
1327static void rename_process_from_path(const char *path) {
1328 char process_name[11];
1329 const char *p;
1330 size_t l;
1331
1332 /* This resulting string must fit in 10 chars (i.e. the length
1333 * of "/sbin/init") to look pretty in /bin/ps */
1334
2b6bf07d 1335 p = basename(path);
5d6b1584
LP
1336 if (isempty(p)) {
1337 rename_process("(...)");
1338 return;
1339 }
1340
1341 l = strlen(p);
1342 if (l > 8) {
1343 /* The end of the process name is usually more
1344 * interesting, since the first bit might just be
1345 * "systemd-" */
1346 p = p + l - 8;
1347 l = 8;
1348 }
1349
1350 process_name[0] = '(';
1351 memcpy(process_name+1, p, l);
1352 process_name[1+l] = ')';
1353 process_name[1+l+1] = 0;
1354
1355 rename_process(process_name);
1356}
1357
469830d1
LP
1358static bool context_has_address_families(const ExecContext *c) {
1359 assert(c);
1360
1361 return c->address_families_whitelist ||
1362 !set_isempty(c->address_families);
1363}
1364
1365static bool context_has_syscall_filters(const ExecContext *c) {
1366 assert(c);
1367
1368 return c->syscall_whitelist ||
8cfa775f 1369 !hashmap_isempty(c->syscall_filter);
469830d1
LP
1370}
1371
1372static bool context_has_no_new_privileges(const ExecContext *c) {
1373 assert(c);
1374
1375 if (c->no_new_privileges)
1376 return true;
1377
1378 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1379 return false;
1380
1381 /* We need NNP if we have any form of seccomp and are unprivileged */
1382 return context_has_address_families(c) ||
1383 c->memory_deny_write_execute ||
1384 c->restrict_realtime ||
1385 exec_context_restrict_namespaces_set(c) ||
1386 c->protect_kernel_tunables ||
1387 c->protect_kernel_modules ||
1388 c->private_devices ||
1389 context_has_syscall_filters(c) ||
78e864e5
TM
1390 !set_isempty(c->syscall_archs) ||
1391 c->lock_personality;
469830d1
LP
1392}
1393
349cc4a5 1394#if HAVE_SECCOMP
17df7223 1395
83f12b27 1396static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1397
1398 if (is_seccomp_available())
1399 return false;
1400
f673b62d 1401 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1402 return true;
83f12b27
FS
1403}
1404
165a31c0 1405static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1406 uint32_t negative_action, default_action, action;
165a31c0 1407 int r;
8351ceae 1408
469830d1 1409 assert(u);
c0467cf3 1410 assert(c);
8351ceae 1411
469830d1 1412 if (!context_has_syscall_filters(c))
83f12b27
FS
1413 return 0;
1414
469830d1
LP
1415 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1416 return 0;
e9642be2 1417
469830d1 1418 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1419
469830d1
LP
1420 if (c->syscall_whitelist) {
1421 default_action = negative_action;
1422 action = SCMP_ACT_ALLOW;
7c66bae2 1423 } else {
469830d1
LP
1424 default_action = SCMP_ACT_ALLOW;
1425 action = negative_action;
57183d11 1426 }
8351ceae 1427
165a31c0
LP
1428 if (needs_ambient_hack) {
1429 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1430 if (r < 0)
1431 return r;
1432 }
1433
b54f36c6 1434 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
4298d0b5
LP
1435}
1436
469830d1
LP
1437static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1438 assert(u);
4298d0b5
LP
1439 assert(c);
1440
469830d1 1441 if (set_isempty(c->syscall_archs))
83f12b27
FS
1442 return 0;
1443
469830d1
LP
1444 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1445 return 0;
4298d0b5 1446
469830d1
LP
1447 return seccomp_restrict_archs(c->syscall_archs);
1448}
4298d0b5 1449
469830d1
LP
1450static int apply_address_families(const Unit* u, const ExecContext *c) {
1451 assert(u);
1452 assert(c);
4298d0b5 1453
469830d1
LP
1454 if (!context_has_address_families(c))
1455 return 0;
4298d0b5 1456
469830d1
LP
1457 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1458 return 0;
4298d0b5 1459
469830d1 1460 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1461}
4298d0b5 1462
83f12b27 1463static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1464 assert(u);
f3e43635
TM
1465 assert(c);
1466
469830d1 1467 if (!c->memory_deny_write_execute)
83f12b27
FS
1468 return 0;
1469
469830d1
LP
1470 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1471 return 0;
f3e43635 1472
469830d1 1473 return seccomp_memory_deny_write_execute();
f3e43635
TM
1474}
1475
83f12b27 1476static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1477 assert(u);
f4170c67
LP
1478 assert(c);
1479
469830d1 1480 if (!c->restrict_realtime)
83f12b27
FS
1481 return 0;
1482
469830d1
LP
1483 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1484 return 0;
f4170c67 1485
469830d1 1486 return seccomp_restrict_realtime();
f4170c67
LP
1487}
1488
59e856c7 1489static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1490 assert(u);
59eeb84b
LP
1491 assert(c);
1492
1493 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1494 * let's protect even those systems where this is left on in the kernel. */
1495
469830d1 1496 if (!c->protect_kernel_tunables)
59eeb84b
LP
1497 return 0;
1498
469830d1
LP
1499 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1500 return 0;
59eeb84b 1501
469830d1 1502 return seccomp_protect_sysctl();
59eeb84b
LP
1503}
1504
59e856c7 1505static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1506 assert(u);
502d704e
DH
1507 assert(c);
1508
25a8d8a0 1509 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1510
469830d1
LP
1511 if (!c->protect_kernel_modules)
1512 return 0;
1513
502d704e
DH
1514 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1515 return 0;
1516
b54f36c6 1517 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false);
502d704e
DH
1518}
1519
59e856c7 1520static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1521 assert(u);
ba128bb8
LP
1522 assert(c);
1523
8f81a5f6 1524 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1525
469830d1
LP
1526 if (!c->private_devices)
1527 return 0;
1528
ba128bb8
LP
1529 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1530 return 0;
1531
b54f36c6 1532 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false);
ba128bb8
LP
1533}
1534
34cf6c43 1535static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
469830d1 1536 assert(u);
add00535
LP
1537 assert(c);
1538
1539 if (!exec_context_restrict_namespaces_set(c))
1540 return 0;
1541
1542 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1543 return 0;
1544
1545 return seccomp_restrict_namespaces(c->restrict_namespaces);
1546}
1547
78e864e5 1548static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1549 unsigned long personality;
1550 int r;
78e864e5
TM
1551
1552 assert(u);
1553 assert(c);
1554
1555 if (!c->lock_personality)
1556 return 0;
1557
1558 if (skip_seccomp_unavailable(u, "LockPersonality="))
1559 return 0;
1560
e8132d63
LP
1561 personality = c->personality;
1562
1563 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1564 if (personality == PERSONALITY_INVALID) {
1565
1566 r = opinionated_personality(&personality);
1567 if (r < 0)
1568 return r;
1569 }
78e864e5
TM
1570
1571 return seccomp_lock_personality(personality);
1572}
1573
c0467cf3 1574#endif
8351ceae 1575
31a7eb86
ZJS
1576static void do_idle_pipe_dance(int idle_pipe[4]) {
1577 assert(idle_pipe);
1578
54eb2300
LP
1579 idle_pipe[1] = safe_close(idle_pipe[1]);
1580 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1581
1582 if (idle_pipe[0] >= 0) {
1583 int r;
1584
1585 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1586
1587 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1588 ssize_t n;
1589
31a7eb86 1590 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1591 n = write(idle_pipe[3], "x", 1);
1592 if (n > 0)
cd972d69
ZJS
1593 /* Wait for systemd to react to the signal above. */
1594 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1595 }
1596
54eb2300 1597 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1598
1599 }
1600
54eb2300 1601 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1602}
1603
fb2042dd
YW
1604static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
1605
7cae38c4 1606static int build_environment(
34cf6c43 1607 const Unit *u,
9fa95f85 1608 const ExecContext *c,
1e22b5cd 1609 const ExecParameters *p,
da6053d0 1610 size_t n_fds,
7cae38c4
LP
1611 const char *home,
1612 const char *username,
1613 const char *shell,
7bce046b
LP
1614 dev_t journal_stream_dev,
1615 ino_t journal_stream_ino,
7cae38c4
LP
1616 char ***ret) {
1617
1618 _cleanup_strv_free_ char **our_env = NULL;
fb2042dd 1619 ExecDirectoryType t;
da6053d0 1620 size_t n_env = 0;
7cae38c4
LP
1621 char *x;
1622
4b58153d 1623 assert(u);
7cae38c4 1624 assert(c);
7c1cb6f1 1625 assert(p);
7cae38c4
LP
1626 assert(ret);
1627
fb2042dd 1628 our_env = new0(char*, 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4
LP
1629 if (!our_env)
1630 return -ENOMEM;
1631
1632 if (n_fds > 0) {
8dd4c05b
LP
1633 _cleanup_free_ char *joined = NULL;
1634
df0ff127 1635 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1636 return -ENOMEM;
1637 our_env[n_env++] = x;
1638
da6053d0 1639 if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
7cae38c4
LP
1640 return -ENOMEM;
1641 our_env[n_env++] = x;
8dd4c05b 1642
1e22b5cd 1643 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1644 if (!joined)
1645 return -ENOMEM;
1646
605405c6 1647 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1648 if (!x)
1649 return -ENOMEM;
1650 our_env[n_env++] = x;
7cae38c4
LP
1651 }
1652
b08af3b1 1653 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1654 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1655 return -ENOMEM;
1656 our_env[n_env++] = x;
1657
1e22b5cd 1658 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1659 return -ENOMEM;
1660 our_env[n_env++] = x;
1661 }
1662
fd63e712
LP
1663 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1664 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1665 * check the database directly. */
ac647978 1666 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1667 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1668 if (!x)
1669 return -ENOMEM;
1670 our_env[n_env++] = x;
1671 }
1672
7cae38c4
LP
1673 if (home) {
1674 x = strappend("HOME=", home);
1675 if (!x)
1676 return -ENOMEM;
1677 our_env[n_env++] = x;
1678 }
1679
1680 if (username) {
1681 x = strappend("LOGNAME=", username);
1682 if (!x)
1683 return -ENOMEM;
1684 our_env[n_env++] = x;
1685
1686 x = strappend("USER=", username);
1687 if (!x)
1688 return -ENOMEM;
1689 our_env[n_env++] = x;
1690 }
1691
1692 if (shell) {
1693 x = strappend("SHELL=", shell);
1694 if (!x)
1695 return -ENOMEM;
1696 our_env[n_env++] = x;
1697 }
1698
4b58153d
LP
1699 if (!sd_id128_is_null(u->invocation_id)) {
1700 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1701 return -ENOMEM;
1702
1703 our_env[n_env++] = x;
1704 }
1705
6af760f3
LP
1706 if (exec_context_needs_term(c)) {
1707 const char *tty_path, *term = NULL;
1708
1709 tty_path = exec_context_tty_path(c);
1710
1711 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1712 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1713 * passes to PID 1 ends up all the way in the console login shown. */
1714
1715 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1716 term = getenv("TERM");
1717 if (!term)
1718 term = default_term_for_tty(tty_path);
7cae38c4 1719
6af760f3 1720 x = strappend("TERM=", term);
7cae38c4
LP
1721 if (!x)
1722 return -ENOMEM;
1723 our_env[n_env++] = x;
1724 }
1725
7bce046b
LP
1726 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1727 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1728 return -ENOMEM;
1729
1730 our_env[n_env++] = x;
1731 }
1732
fb2042dd
YW
1733 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1734 _cleanup_free_ char *pre = NULL, *joined = NULL;
1735 const char *n;
1736
1737 if (!p->prefix[t])
1738 continue;
1739
1740 if (strv_isempty(c->directories[t].paths))
1741 continue;
1742
1743 n = exec_directory_env_name_to_string(t);
1744 if (!n)
1745 continue;
1746
1747 pre = strjoin(p->prefix[t], "/");
1748 if (!pre)
1749 return -ENOMEM;
1750
1751 joined = strv_join_prefix(c->directories[t].paths, ":", pre);
1752 if (!joined)
1753 return -ENOMEM;
1754
1755 x = strjoin(n, "=", joined);
1756 if (!x)
1757 return -ENOMEM;
1758
1759 our_env[n_env++] = x;
1760 }
1761
7cae38c4 1762 our_env[n_env++] = NULL;
fb2042dd 1763 assert(n_env <= 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4 1764
ae2a15bc 1765 *ret = TAKE_PTR(our_env);
7cae38c4
LP
1766
1767 return 0;
1768}
1769
b4c14404
FB
1770static int build_pass_environment(const ExecContext *c, char ***ret) {
1771 _cleanup_strv_free_ char **pass_env = NULL;
1772 size_t n_env = 0, n_bufsize = 0;
1773 char **i;
1774
1775 STRV_FOREACH(i, c->pass_environment) {
1776 _cleanup_free_ char *x = NULL;
1777 char *v;
1778
1779 v = getenv(*i);
1780 if (!v)
1781 continue;
605405c6 1782 x = strjoin(*i, "=", v);
b4c14404
FB
1783 if (!x)
1784 return -ENOMEM;
00819cc1 1785
b4c14404
FB
1786 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1787 return -ENOMEM;
00819cc1 1788
1cc6c93a 1789 pass_env[n_env++] = TAKE_PTR(x);
b4c14404 1790 pass_env[n_env] = NULL;
b4c14404
FB
1791 }
1792
ae2a15bc 1793 *ret = TAKE_PTR(pass_env);
b4c14404
FB
1794
1795 return 0;
1796}
1797
8b44a3d2
LP
1798static bool exec_needs_mount_namespace(
1799 const ExecContext *context,
1800 const ExecParameters *params,
4657abb5 1801 const ExecRuntime *runtime) {
8b44a3d2
LP
1802
1803 assert(context);
1804 assert(params);
1805
915e6d16
LP
1806 if (context->root_image)
1807 return true;
1808
2a624c36
AP
1809 if (!strv_isempty(context->read_write_paths) ||
1810 !strv_isempty(context->read_only_paths) ||
1811 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1812 return true;
1813
42b1d8e0 1814 if (context->n_bind_mounts > 0)
d2d6c096
LP
1815 return true;
1816
2abd4e38
YW
1817 if (context->n_temporary_filesystems > 0)
1818 return true;
1819
8b44a3d2
LP
1820 if (context->mount_flags != 0)
1821 return true;
1822
1823 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1824 return true;
1825
8b44a3d2 1826 if (context->private_devices ||
228af36f 1827 context->private_mounts ||
8b44a3d2 1828 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1829 context->protect_home != PROTECT_HOME_NO ||
1830 context->protect_kernel_tunables ||
c575770b 1831 context->protect_kernel_modules ||
59eeb84b 1832 context->protect_control_groups)
8b44a3d2
LP
1833 return true;
1834
37c56f89
YW
1835 if (context->root_directory) {
1836 ExecDirectoryType t;
1837
1838 if (context->mount_apivfs)
1839 return true;
1840
1841 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1842 if (!params->prefix[t])
1843 continue;
1844
1845 if (!strv_isempty(context->directories[t].paths))
1846 return true;
1847 }
1848 }
5d997827 1849
42b1d8e0 1850 if (context->dynamic_user &&
b43ee82f 1851 (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
42b1d8e0
YW
1852 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1853 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1854 return true;
1855
8b44a3d2
LP
1856 return false;
1857}
1858
d251207d
LP
1859static int setup_private_users(uid_t uid, gid_t gid) {
1860 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1861 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1862 _cleanup_close_ int unshare_ready_fd = -1;
1863 _cleanup_(sigkill_waitp) pid_t pid = 0;
1864 uint64_t c = 1;
d251207d
LP
1865 ssize_t n;
1866 int r;
1867
1868 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1869 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1870 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1871 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1872 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1873 * continues execution normally. */
1874
587ab01b
ZJS
1875 if (uid != 0 && uid_is_valid(uid)) {
1876 r = asprintf(&uid_map,
1877 "0 0 1\n" /* Map root → root */
1878 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1879 uid, uid);
1880 if (r < 0)
1881 return -ENOMEM;
1882 } else {
e0f3720e 1883 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1884 if (!uid_map)
1885 return -ENOMEM;
1886 }
d251207d 1887
587ab01b
ZJS
1888 if (gid != 0 && gid_is_valid(gid)) {
1889 r = asprintf(&gid_map,
1890 "0 0 1\n" /* Map root → root */
1891 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1892 gid, gid);
1893 if (r < 0)
1894 return -ENOMEM;
1895 } else {
d251207d 1896 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1897 if (!gid_map)
1898 return -ENOMEM;
1899 }
d251207d
LP
1900
1901 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1902 * namespace. */
1903 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1904 if (unshare_ready_fd < 0)
1905 return -errno;
1906
1907 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1908 * failed. */
1909 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1910 return -errno;
1911
4c253ed1
LP
1912 r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
1913 if (r < 0)
1914 return r;
1915 if (r == 0) {
d251207d
LP
1916 _cleanup_close_ int fd = -1;
1917 const char *a;
1918 pid_t ppid;
1919
1920 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1921 * here, after the parent opened its own user namespace. */
1922
1923 ppid = getppid();
1924 errno_pipe[0] = safe_close(errno_pipe[0]);
1925
1926 /* Wait until the parent unshared the user namespace */
1927 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1928 r = -errno;
1929 goto child_fail;
1930 }
1931
1932 /* Disable the setgroups() system call in the child user namespace, for good. */
1933 a = procfs_file_alloca(ppid, "setgroups");
1934 fd = open(a, O_WRONLY|O_CLOEXEC);
1935 if (fd < 0) {
1936 if (errno != ENOENT) {
1937 r = -errno;
1938 goto child_fail;
1939 }
1940
1941 /* If the file is missing the kernel is too old, let's continue anyway. */
1942 } else {
1943 if (write(fd, "deny\n", 5) < 0) {
1944 r = -errno;
1945 goto child_fail;
1946 }
1947
1948 fd = safe_close(fd);
1949 }
1950
1951 /* First write the GID map */
1952 a = procfs_file_alloca(ppid, "gid_map");
1953 fd = open(a, O_WRONLY|O_CLOEXEC);
1954 if (fd < 0) {
1955 r = -errno;
1956 goto child_fail;
1957 }
1958 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1959 r = -errno;
1960 goto child_fail;
1961 }
1962 fd = safe_close(fd);
1963
1964 /* The write the UID map */
1965 a = procfs_file_alloca(ppid, "uid_map");
1966 fd = open(a, O_WRONLY|O_CLOEXEC);
1967 if (fd < 0) {
1968 r = -errno;
1969 goto child_fail;
1970 }
1971 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1972 r = -errno;
1973 goto child_fail;
1974 }
1975
1976 _exit(EXIT_SUCCESS);
1977
1978 child_fail:
1979 (void) write(errno_pipe[1], &r, sizeof(r));
1980 _exit(EXIT_FAILURE);
1981 }
1982
1983 errno_pipe[1] = safe_close(errno_pipe[1]);
1984
1985 if (unshare(CLONE_NEWUSER) < 0)
1986 return -errno;
1987
1988 /* Let the child know that the namespace is ready now */
1989 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1990 return -errno;
1991
1992 /* Try to read an error code from the child */
1993 n = read(errno_pipe[0], &r, sizeof(r));
1994 if (n < 0)
1995 return -errno;
1996 if (n == sizeof(r)) { /* an error code was sent to us */
1997 if (r < 0)
1998 return r;
1999 return -EIO;
2000 }
2001 if (n != 0) /* on success we should have read 0 bytes */
2002 return -EIO;
2003
2e87a1fd
LP
2004 r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
2005 pid = 0;
d251207d
LP
2006 if (r < 0)
2007 return r;
2e87a1fd 2008 if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
d251207d
LP
2009 return -EIO;
2010
2011 return 0;
2012}
2013
3536f49e 2014static int setup_exec_directory(
07689d5d
LP
2015 const ExecContext *context,
2016 const ExecParameters *params,
2017 uid_t uid,
3536f49e 2018 gid_t gid,
3536f49e
YW
2019 ExecDirectoryType type,
2020 int *exit_status) {
07689d5d 2021
72fd1768 2022 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
2023 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
2024 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
2025 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
2026 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
2027 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
2028 };
07689d5d
LP
2029 char **rt;
2030 int r;
2031
2032 assert(context);
2033 assert(params);
72fd1768 2034 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 2035 assert(exit_status);
07689d5d 2036
3536f49e
YW
2037 if (!params->prefix[type])
2038 return 0;
2039
8679efde 2040 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
2041 if (!uid_is_valid(uid))
2042 uid = 0;
2043 if (!gid_is_valid(gid))
2044 gid = 0;
2045 }
2046
2047 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d 2048 _cleanup_free_ char *p = NULL, *pp = NULL;
07689d5d 2049
3536f49e
YW
2050 p = strjoin(params->prefix[type], "/", *rt);
2051 if (!p) {
2052 r = -ENOMEM;
2053 goto fail;
2054 }
07689d5d 2055
23a7448e
YW
2056 r = mkdir_parents_label(p, 0755);
2057 if (r < 0)
3536f49e 2058 goto fail;
23a7448e 2059
8092a48c
YW
2060 if (context->dynamic_user &&
2061 !IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
6c9c51e5 2062 _cleanup_free_ char *private_root = NULL;
6c47cd7d
LP
2063
2064 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
2065 * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
2066 * whose UID is later on reused. To lock this down we use the same trick used by container
2067 * managers to prohibit host users to get access to files of the same UID in containers: we
2068 * place everything inside a directory that has an access mode of 0700 and is owned root:root,
2069 * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
2070 * to make this directory permeable for the service itself.
2071 *
2072 * Specifically: for a service which wants a special directory "foo/" we first create a
2073 * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
2074 * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
2075 * privileged host users can access "foo/" as usual, but unprivileged host users can't look
2076 * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
2077 * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
2078 * disabling the access boundary for the service and making sure it only gets access to the
2079 * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
2080 *
2081 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
8092a48c
YW
2082 * owned by the service itself.
2083 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
2084 * files or sockets with other services. */
6c47cd7d
LP
2085
2086 private_root = strjoin(params->prefix[type], "/private");
2087 if (!private_root) {
2088 r = -ENOMEM;
2089 goto fail;
2090 }
2091
2092 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
37c1d5e9 2093 r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
6c47cd7d
LP
2094 if (r < 0)
2095 goto fail;
2096
2097 pp = strjoin(private_root, "/", *rt);
2098 if (!pp) {
2099 r = -ENOMEM;
2100 goto fail;
2101 }
2102
2103 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2104 r = mkdir_parents_label(pp, 0755);
2105 if (r < 0)
2106 goto fail;
2107
949befd3
LP
2108 if (is_dir(p, false) > 0 &&
2109 (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
2110
2111 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2112 * it over. Most likely the service has been upgraded from one that didn't use
2113 * DynamicUser=1, to one that does. */
2114
2115 if (rename(p, pp) < 0) {
2116 r = -errno;
2117 goto fail;
2118 }
2119 } else {
2120 /* Otherwise, create the actual directory for the service */
2121
2122 r = mkdir_label(pp, context->directories[type].mode);
2123 if (r < 0 && r != -EEXIST)
2124 goto fail;
2125 }
6c47cd7d 2126
6c47cd7d 2127 /* And link it up from the original place */
6c9c51e5 2128 r = symlink_idempotent(pp, p, true);
6c47cd7d
LP
2129 if (r < 0)
2130 goto fail;
2131
30c81ce2
ZJS
2132 /* Lock down the access mode */
2133 if (chmod(pp, context->directories[type].mode) < 0) {
2134 r = -errno;
2135 goto fail;
2136 }
6c47cd7d
LP
2137 } else {
2138 r = mkdir_label(p, context->directories[type].mode);
fdff1da2 2139 if (r < 0 && r != -EEXIST)
6c47cd7d 2140 goto fail;
fdff1da2
YW
2141 if (r == -EEXIST && !context->dynamic_user)
2142 continue;
a1164ae3 2143 }
07689d5d 2144
c71b2eb7
LP
2145 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
2146 * a service, and shall not be writable. */
2147 if (type == EXEC_DIRECTORY_CONFIGURATION)
2148 continue;
2149
a1164ae3 2150 /* Then, change the ownership of the whole tree, if necessary */
30c81ce2 2151 r = path_chown_recursive(pp ?: p, uid, gid);
07689d5d 2152 if (r < 0)
3536f49e 2153 goto fail;
07689d5d
LP
2154 }
2155
2156 return 0;
3536f49e
YW
2157
2158fail:
2159 *exit_status = exit_status_table[type];
3536f49e 2160 return r;
07689d5d
LP
2161}
2162
92b423b9 2163#if ENABLE_SMACK
cefc33ae
LP
2164static int setup_smack(
2165 const ExecContext *context,
2166 const ExecCommand *command) {
2167
cefc33ae
LP
2168 int r;
2169
2170 assert(context);
2171 assert(command);
2172
cefc33ae
LP
2173 if (context->smack_process_label) {
2174 r = mac_smack_apply_pid(0, context->smack_process_label);
2175 if (r < 0)
2176 return r;
2177 }
2178#ifdef SMACK_DEFAULT_PROCESS_LABEL
2179 else {
2180 _cleanup_free_ char *exec_label = NULL;
2181
2182 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2183 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2184 return r;
2185
2186 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2187 if (r < 0)
2188 return r;
2189 }
cefc33ae
LP
2190#endif
2191
2192 return 0;
2193}
92b423b9 2194#endif
cefc33ae 2195
6c47cd7d
LP
2196static int compile_bind_mounts(
2197 const ExecContext *context,
2198 const ExecParameters *params,
2199 BindMount **ret_bind_mounts,
da6053d0 2200 size_t *ret_n_bind_mounts,
6c47cd7d
LP
2201 char ***ret_empty_directories) {
2202
2203 _cleanup_strv_free_ char **empty_directories = NULL;
2204 BindMount *bind_mounts;
da6053d0 2205 size_t n, h = 0, i;
6c47cd7d
LP
2206 ExecDirectoryType t;
2207 int r;
2208
2209 assert(context);
2210 assert(params);
2211 assert(ret_bind_mounts);
2212 assert(ret_n_bind_mounts);
2213 assert(ret_empty_directories);
2214
2215 n = context->n_bind_mounts;
2216 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2217 if (!params->prefix[t])
2218 continue;
2219
2220 n += strv_length(context->directories[t].paths);
2221 }
2222
2223 if (n <= 0) {
2224 *ret_bind_mounts = NULL;
2225 *ret_n_bind_mounts = 0;
2226 *ret_empty_directories = NULL;
2227 return 0;
2228 }
2229
2230 bind_mounts = new(BindMount, n);
2231 if (!bind_mounts)
2232 return -ENOMEM;
2233
a8cabc61 2234 for (i = 0; i < context->n_bind_mounts; i++) {
6c47cd7d
LP
2235 BindMount *item = context->bind_mounts + i;
2236 char *s, *d;
2237
2238 s = strdup(item->source);
2239 if (!s) {
2240 r = -ENOMEM;
2241 goto finish;
2242 }
2243
2244 d = strdup(item->destination);
2245 if (!d) {
2246 free(s);
2247 r = -ENOMEM;
2248 goto finish;
2249 }
2250
2251 bind_mounts[h++] = (BindMount) {
2252 .source = s,
2253 .destination = d,
2254 .read_only = item->read_only,
2255 .recursive = item->recursive,
2256 .ignore_enoent = item->ignore_enoent,
2257 };
2258 }
2259
2260 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2261 char **suffix;
2262
2263 if (!params->prefix[t])
2264 continue;
2265
2266 if (strv_isempty(context->directories[t].paths))
2267 continue;
2268
8092a48c 2269 if (context->dynamic_user &&
5609f688
YW
2270 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2271 !(context->root_directory || context->root_image)) {
6c47cd7d
LP
2272 char *private_root;
2273
2274 /* So this is for a dynamic user, and we need to make sure the process can access its own
2275 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2276 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2277
2278 private_root = strjoin(params->prefix[t], "/private");
2279 if (!private_root) {
2280 r = -ENOMEM;
2281 goto finish;
2282 }
2283
2284 r = strv_consume(&empty_directories, private_root);
a635a7ae 2285 if (r < 0)
6c47cd7d 2286 goto finish;
6c47cd7d
LP
2287 }
2288
2289 STRV_FOREACH(suffix, context->directories[t].paths) {
2290 char *s, *d;
2291
8092a48c
YW
2292 if (context->dynamic_user &&
2293 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
6c47cd7d
LP
2294 s = strjoin(params->prefix[t], "/private/", *suffix);
2295 else
2296 s = strjoin(params->prefix[t], "/", *suffix);
2297 if (!s) {
2298 r = -ENOMEM;
2299 goto finish;
2300 }
2301
5609f688
YW
2302 if (context->dynamic_user &&
2303 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2304 (context->root_directory || context->root_image))
2305 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2306 * directory is not created on the root directory. So, let's bind-mount the directory
2307 * on the 'non-private' place. */
2308 d = strjoin(params->prefix[t], "/", *suffix);
2309 else
2310 d = strdup(s);
6c47cd7d
LP
2311 if (!d) {
2312 free(s);
2313 r = -ENOMEM;
2314 goto finish;
2315 }
2316
2317 bind_mounts[h++] = (BindMount) {
2318 .source = s,
2319 .destination = d,
2320 .read_only = false,
2321 .recursive = true,
2322 .ignore_enoent = false,
2323 };
2324 }
2325 }
2326
2327 assert(h == n);
2328
2329 *ret_bind_mounts = bind_mounts;
2330 *ret_n_bind_mounts = n;
ae2a15bc 2331 *ret_empty_directories = TAKE_PTR(empty_directories);
6c47cd7d
LP
2332
2333 return (int) n;
2334
2335finish:
2336 bind_mount_free_many(bind_mounts, h);
2337 return r;
2338}
2339
6818c54c 2340static int apply_mount_namespace(
34cf6c43
YW
2341 const Unit *u,
2342 const ExecCommand *command,
6818c54c
LP
2343 const ExecContext *context,
2344 const ExecParameters *params,
34cf6c43 2345 const ExecRuntime *runtime) {
6818c54c 2346
7bcef4ef 2347 _cleanup_strv_free_ char **empty_directories = NULL;
93c6bb51 2348 char *tmp = NULL, *var = NULL;
915e6d16 2349 const char *root_dir = NULL, *root_image = NULL;
228af36f 2350 NamespaceInfo ns_info;
165a31c0 2351 bool needs_sandboxing;
6c47cd7d 2352 BindMount *bind_mounts = NULL;
da6053d0 2353 size_t n_bind_mounts = 0;
6818c54c 2354 int r;
93c6bb51 2355
2b3c1b9e
DH
2356 assert(context);
2357
93c6bb51
DH
2358 /* The runtime struct only contains the parent of the private /tmp,
2359 * which is non-accessible to world users. Inside of it there's a /tmp
2360 * that is sticky, and that's the one we want to use here. */
2361
2362 if (context->private_tmp && runtime) {
2363 if (runtime->tmp_dir)
2364 tmp = strjoina(runtime->tmp_dir, "/tmp");
2365 if (runtime->var_tmp_dir)
2366 var = strjoina(runtime->var_tmp_dir, "/tmp");
2367 }
2368
915e6d16
LP
2369 if (params->flags & EXEC_APPLY_CHROOT) {
2370 root_image = context->root_image;
2371
2372 if (!root_image)
2373 root_dir = context->root_directory;
2374 }
93c6bb51 2375
6c47cd7d
LP
2376 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2377 if (r < 0)
2378 return r;
2379
165a31c0 2380 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
b5a33299
YW
2381 if (needs_sandboxing)
2382 ns_info = (NamespaceInfo) {
2383 .ignore_protect_paths = false,
2384 .private_dev = context->private_devices,
2385 .protect_control_groups = context->protect_control_groups,
2386 .protect_kernel_tunables = context->protect_kernel_tunables,
2387 .protect_kernel_modules = context->protect_kernel_modules,
2388 .mount_apivfs = context->mount_apivfs,
228af36f 2389 .private_mounts = context->private_mounts,
b5a33299 2390 };
228af36f
LP
2391 else if (!context->dynamic_user && root_dir)
2392 /*
2393 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2394 * sandbox info, otherwise enforce it, don't ignore protected paths and
2395 * fail if we are enable to apply the sandbox inside the mount namespace.
2396 */
2397 ns_info = (NamespaceInfo) {
2398 .ignore_protect_paths = true,
2399 };
2400 else
2401 ns_info = (NamespaceInfo) {};
b5a33299 2402
915e6d16 2403 r = setup_namespace(root_dir, root_image,
7bcef4ef 2404 &ns_info, context->read_write_paths,
165a31c0
LP
2405 needs_sandboxing ? context->read_only_paths : NULL,
2406 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2407 empty_directories,
2408 bind_mounts,
2409 n_bind_mounts,
2abd4e38
YW
2410 context->temporary_filesystems,
2411 context->n_temporary_filesystems,
93c6bb51
DH
2412 tmp,
2413 var,
165a31c0
LP
2414 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2415 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16
LP
2416 context->mount_flags,
2417 DISSECT_IMAGE_DISCARD_ON_LOOP);
93c6bb51 2418
6c47cd7d
LP
2419 bind_mount_free_many(bind_mounts, n_bind_mounts);
2420
1beab8b0
LP
2421 /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
2422 * that with a special, recognizable error ENOANO. In this case, silently proceeed, but only if exclusively
2423 * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
2424 * completely different execution environment. */
aca835ed
YW
2425 if (r == -ENOANO) {
2426 if (n_bind_mounts == 0 &&
2427 context->n_temporary_filesystems == 0 &&
2428 !root_dir && !root_image &&
2429 !context->dynamic_user) {
2430 log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
2431 return 0;
2432 }
2433
2194547e
LP
2434 log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
2435 "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
2436 n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
2437
aca835ed 2438 return -EOPNOTSUPP;
93c6bb51
DH
2439 }
2440
2441 return r;
2442}
2443
915e6d16
LP
2444static int apply_working_directory(
2445 const ExecContext *context,
2446 const ExecParameters *params,
2447 const char *home,
376fecf6
LP
2448 const bool needs_mount_ns,
2449 int *exit_status) {
915e6d16 2450
6732edab 2451 const char *d, *wd;
2b3c1b9e
DH
2452
2453 assert(context);
376fecf6 2454 assert(exit_status);
2b3c1b9e 2455
6732edab
LP
2456 if (context->working_directory_home) {
2457
376fecf6
LP
2458 if (!home) {
2459 *exit_status = EXIT_CHDIR;
6732edab 2460 return -ENXIO;
376fecf6 2461 }
6732edab 2462
2b3c1b9e 2463 wd = home;
6732edab
LP
2464
2465 } else if (context->working_directory)
2b3c1b9e
DH
2466 wd = context->working_directory;
2467 else
2468 wd = "/";
e7f1e7c6
DH
2469
2470 if (params->flags & EXEC_APPLY_CHROOT) {
2471 if (!needs_mount_ns && context->root_directory)
376fecf6
LP
2472 if (chroot(context->root_directory) < 0) {
2473 *exit_status = EXIT_CHROOT;
e7f1e7c6 2474 return -errno;
376fecf6 2475 }
e7f1e7c6 2476
2b3c1b9e
DH
2477 d = wd;
2478 } else
3b0e5bb5 2479 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2480
376fecf6
LP
2481 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2482 *exit_status = EXIT_CHDIR;
2b3c1b9e 2483 return -errno;
376fecf6 2484 }
e7f1e7c6
DH
2485
2486 return 0;
2487}
2488
b1edf445 2489static int setup_keyring(
34cf6c43 2490 const Unit *u,
b1edf445
LP
2491 const ExecContext *context,
2492 const ExecParameters *p,
2493 uid_t uid, gid_t gid) {
2494
74dd6b51 2495 key_serial_t keyring;
e64c2d0b
DJL
2496 int r = 0;
2497 uid_t saved_uid;
2498 gid_t saved_gid;
74dd6b51
LP
2499
2500 assert(u);
b1edf445 2501 assert(context);
74dd6b51
LP
2502 assert(p);
2503
2504 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2505 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2506 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2507 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2508 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2509 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2510
2511 if (!(p->flags & EXEC_NEW_KEYRING))
2512 return 0;
2513
b1edf445
LP
2514 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2515 return 0;
2516
e64c2d0b
DJL
2517 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2518 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2519 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2520 * & group is just as nasty as acquiring a reference to the user keyring. */
2521
2522 saved_uid = getuid();
2523 saved_gid = getgid();
2524
2525 if (gid_is_valid(gid) && gid != saved_gid) {
2526 if (setregid(gid, -1) < 0)
2527 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2528 }
2529
2530 if (uid_is_valid(uid) && uid != saved_uid) {
2531 if (setreuid(uid, -1) < 0) {
2532 r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2533 goto out;
2534 }
2535 }
2536
74dd6b51
LP
2537 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2538 if (keyring == -1) {
2539 if (errno == ENOSYS)
8002fb97 2540 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2541 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2542 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2543 else if (errno == EDQUOT)
8002fb97 2544 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2545 else
e64c2d0b 2546 r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51 2547
e64c2d0b 2548 goto out;
74dd6b51
LP
2549 }
2550
e64c2d0b
DJL
2551 /* When requested link the user keyring into the session keyring. */
2552 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2553
2554 if (keyctl(KEYCTL_LINK,
2555 KEY_SPEC_USER_KEYRING,
2556 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2557 r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
2558 goto out;
2559 }
2560 }
2561
2562 /* Restore uid/gid back */
2563 if (uid_is_valid(uid) && uid != saved_uid) {
2564 if (setreuid(saved_uid, -1) < 0) {
2565 r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2566 goto out;
2567 }
2568 }
2569
2570 if (gid_is_valid(gid) && gid != saved_gid) {
2571 if (setregid(saved_gid, -1) < 0)
2572 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2573 }
2574
2575 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
b3415f5d
LP
2576 if (!sd_id128_is_null(u->invocation_id)) {
2577 key_serial_t key;
2578
2579 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2580 if (key == -1)
8002fb97 2581 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2582 else {
2583 if (keyctl(KEYCTL_SETPERM, key,
2584 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2585 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
e64c2d0b 2586 r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2587 }
2588 }
2589
e64c2d0b
DJL
2590out:
2591 /* Revert back uid & gid for the the last time, and exit */
2592 /* no extra logging, as only the first already reported error matters */
2593 if (getuid() != saved_uid)
2594 (void) setreuid(saved_uid, -1);
b1edf445 2595
e64c2d0b
DJL
2596 if (getgid() != saved_gid)
2597 (void) setregid(saved_gid, -1);
b1edf445 2598
e64c2d0b 2599 return r;
74dd6b51
LP
2600}
2601
da6053d0 2602static void append_socket_pair(int *array, size_t *n, const int pair[2]) {
29206d46
LP
2603 assert(array);
2604 assert(n);
2605
2606 if (!pair)
2607 return;
2608
2609 if (pair[0] >= 0)
2610 array[(*n)++] = pair[0];
2611 if (pair[1] >= 0)
2612 array[(*n)++] = pair[1];
2613}
2614
a34ceba6
LP
2615static int close_remaining_fds(
2616 const ExecParameters *params,
34cf6c43
YW
2617 const ExecRuntime *runtime,
2618 const DynamicCreds *dcreds,
00d9ef85 2619 int user_lookup_fd,
a34ceba6 2620 int socket_fd,
5686391b 2621 int exec_fd,
da6053d0 2622 int *fds, size_t n_fds) {
a34ceba6 2623
da6053d0 2624 size_t n_dont_close = 0;
00d9ef85 2625 int dont_close[n_fds + 12];
a34ceba6
LP
2626
2627 assert(params);
2628
2629 if (params->stdin_fd >= 0)
2630 dont_close[n_dont_close++] = params->stdin_fd;
2631 if (params->stdout_fd >= 0)
2632 dont_close[n_dont_close++] = params->stdout_fd;
2633 if (params->stderr_fd >= 0)
2634 dont_close[n_dont_close++] = params->stderr_fd;
2635
2636 if (socket_fd >= 0)
2637 dont_close[n_dont_close++] = socket_fd;
5686391b
LP
2638 if (exec_fd >= 0)
2639 dont_close[n_dont_close++] = exec_fd;
a34ceba6
LP
2640 if (n_fds > 0) {
2641 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2642 n_dont_close += n_fds;
2643 }
2644
29206d46
LP
2645 if (runtime)
2646 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2647
2648 if (dcreds) {
2649 if (dcreds->user)
2650 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2651 if (dcreds->group)
2652 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2653 }
2654
00d9ef85
LP
2655 if (user_lookup_fd >= 0)
2656 dont_close[n_dont_close++] = user_lookup_fd;
2657
a34ceba6
LP
2658 return close_all_fds(dont_close, n_dont_close);
2659}
2660
00d9ef85
LP
2661static int send_user_lookup(
2662 Unit *unit,
2663 int user_lookup_fd,
2664 uid_t uid,
2665 gid_t gid) {
2666
2667 assert(unit);
2668
2669 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2670 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2671 * specified. */
2672
2673 if (user_lookup_fd < 0)
2674 return 0;
2675
2676 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2677 return 0;
2678
2679 if (writev(user_lookup_fd,
2680 (struct iovec[]) {
e6a7ec4b
LP
2681 IOVEC_INIT(&uid, sizeof(uid)),
2682 IOVEC_INIT(&gid, sizeof(gid)),
2683 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2684 return -errno;
2685
2686 return 0;
2687}
2688
6732edab
LP
2689static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2690 int r;
2691
2692 assert(c);
2693 assert(home);
2694 assert(buf);
2695
2696 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2697
2698 if (*home)
2699 return 0;
2700
2701 if (!c->working_directory_home)
2702 return 0;
2703
2704 if (uid == 0) {
2705 /* Hardcode /root as home directory for UID 0 */
2706 *home = "/root";
2707 return 1;
2708 }
2709
2710 r = get_home_dir(buf);
2711 if (r < 0)
2712 return r;
2713
2714 *home = *buf;
2715 return 1;
2716}
2717
da50b85a
LP
2718static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2719 _cleanup_strv_free_ char ** list = NULL;
2720 ExecDirectoryType t;
2721 int r;
2722
2723 assert(c);
2724 assert(p);
2725 assert(ret);
2726
2727 assert(c->dynamic_user);
2728
2729 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2730 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2731 * directories. */
2732
2733 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2734 char **i;
2735
2736 if (t == EXEC_DIRECTORY_CONFIGURATION)
2737 continue;
2738
2739 if (!p->prefix[t])
2740 continue;
2741
2742 STRV_FOREACH(i, c->directories[t].paths) {
2743 char *e;
2744
8092a48c
YW
2745 if (t == EXEC_DIRECTORY_RUNTIME)
2746 e = strjoin(p->prefix[t], "/", *i);
2747 else
2748 e = strjoin(p->prefix[t], "/private/", *i);
da50b85a
LP
2749 if (!e)
2750 return -ENOMEM;
2751
2752 r = strv_consume(&list, e);
2753 if (r < 0)
2754 return r;
2755 }
2756 }
2757
ae2a15bc 2758 *ret = TAKE_PTR(list);
da50b85a
LP
2759
2760 return 0;
2761}
2762
34cf6c43
YW
2763static char *exec_command_line(char **argv);
2764
ff0af2a1 2765static int exec_child(
f2341e0a 2766 Unit *unit,
34cf6c43 2767 const ExecCommand *command,
ff0af2a1
LP
2768 const ExecContext *context,
2769 const ExecParameters *params,
2770 ExecRuntime *runtime,
29206d46 2771 DynamicCreds *dcreds,
ff0af2a1 2772 int socket_fd,
52c239d7 2773 int named_iofds[3],
4c47affc 2774 int *fds,
da6053d0 2775 size_t n_socket_fds,
25b583d7 2776 size_t n_storage_fds,
ff0af2a1 2777 char **files_env,
00d9ef85 2778 int user_lookup_fd,
12145637 2779 int *exit_status) {
d35fbf6b 2780
2065ca69 2781 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
5686391b 2782 int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
4d885bd3
DH
2783 _cleanup_free_ gid_t *supplementary_gids = NULL;
2784 const char *username = NULL, *groupname = NULL;
5686391b 2785 _cleanup_free_ char *home_buffer = NULL;
2b3c1b9e 2786 const char *home = NULL, *shell = NULL;
7bce046b
LP
2787 dev_t journal_stream_dev = 0;
2788 ino_t journal_stream_ino = 0;
165a31c0
LP
2789 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2790 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2791 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2792 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 2793#if HAVE_SELINUX
7f59dd35 2794 _cleanup_free_ char *mac_selinux_context_net = NULL;
43b1f709 2795 bool use_selinux = false;
ecfbc84f 2796#endif
f9fa32f0 2797#if ENABLE_SMACK
43b1f709 2798 bool use_smack = false;
ecfbc84f 2799#endif
349cc4a5 2800#if HAVE_APPARMOR
43b1f709 2801 bool use_apparmor = false;
ecfbc84f 2802#endif
fed1e721
LP
2803 uid_t uid = UID_INVALID;
2804 gid_t gid = GID_INVALID;
da6053d0 2805 size_t n_fds;
3536f49e 2806 ExecDirectoryType dt;
165a31c0 2807 int secure_bits;
034c6ed7 2808
f2341e0a 2809 assert(unit);
5cb5a6ff
LP
2810 assert(command);
2811 assert(context);
d35fbf6b 2812 assert(params);
ff0af2a1 2813 assert(exit_status);
d35fbf6b
DM
2814
2815 rename_process_from_path(command->path);
2816
2817 /* We reset exactly these signals, since they are the
2818 * only ones we set to SIG_IGN in the main daemon. All
2819 * others we leave untouched because we set them to
2820 * SIG_DFL or a valid handler initially, both of which
2821 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2822 (void) default_signals(SIGNALS_CRASH_HANDLER,
2823 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2824
2825 if (context->ignore_sigpipe)
ce30c8dc 2826 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2827
ff0af2a1
LP
2828 r = reset_signal_mask();
2829 if (r < 0) {
2830 *exit_status = EXIT_SIGNAL_MASK;
12145637 2831 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 2832 }
034c6ed7 2833
d35fbf6b
DM
2834 if (params->idle_pipe)
2835 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2836
2c027c62
LP
2837 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2838 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2839 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2840 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 2841
d35fbf6b 2842 log_forget_fds();
2c027c62 2843 log_set_open_when_needed(true);
4f2d528d 2844
40a80078
LP
2845 /* In case anything used libc syslog(), close this here, too */
2846 closelog();
2847
5686391b
LP
2848 n_fds = n_socket_fds + n_storage_fds;
2849 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
ff0af2a1
LP
2850 if (r < 0) {
2851 *exit_status = EXIT_FDS;
12145637 2852 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
2853 }
2854
d35fbf6b
DM
2855 if (!context->same_pgrp)
2856 if (setsid() < 0) {
ff0af2a1 2857 *exit_status = EXIT_SETSID;
12145637 2858 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 2859 }
9e2f7c11 2860
1e22b5cd 2861 exec_context_tty_reset(context, params);
d35fbf6b 2862
c891efaf 2863 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 2864 const char *vc = params->confirm_spawn;
3b20f877
FB
2865 _cleanup_free_ char *cmdline = NULL;
2866
ee39ca20 2867 cmdline = exec_command_line(command->argv);
3b20f877 2868 if (!cmdline) {
0460aa5c 2869 *exit_status = EXIT_MEMORY;
12145637 2870 return log_oom();
3b20f877 2871 }
d35fbf6b 2872
eedf223a 2873 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
2874 if (r != CONFIRM_EXECUTE) {
2875 if (r == CONFIRM_PRETEND_SUCCESS) {
2876 *exit_status = EXIT_SUCCESS;
2877 return 0;
2878 }
ff0af2a1 2879 *exit_status = EXIT_CONFIRM;
12145637 2880 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 2881 return -ECANCELED;
d35fbf6b
DM
2882 }
2883 }
1a63a750 2884
d521916d
LP
2885 /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
2886 * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
2887 * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
2888 * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
2889 * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
2890 if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
2891 setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
2892 *exit_status = EXIT_MEMORY;
2893 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
2894 }
2895
29206d46 2896 if (context->dynamic_user && dcreds) {
da50b85a 2897 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 2898
d521916d
LP
2899 /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
2900 * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
409093fe
LP
2901 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2902 *exit_status = EXIT_USER;
12145637 2903 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
2904 }
2905
da50b85a
LP
2906 r = compile_suggested_paths(context, params, &suggested_paths);
2907 if (r < 0) {
2908 *exit_status = EXIT_MEMORY;
2909 return log_oom();
2910 }
2911
2912 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
2913 if (r < 0) {
2914 *exit_status = EXIT_USER;
e2b0cc34
YW
2915 if (r == -EILSEQ) {
2916 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
2917 return -EOPNOTSUPP;
2918 }
12145637 2919 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 2920 }
524daa8c 2921
70dd455c 2922 if (!uid_is_valid(uid)) {
29206d46 2923 *exit_status = EXIT_USER;
12145637 2924 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
2925 return -ESRCH;
2926 }
2927
2928 if (!gid_is_valid(gid)) {
2929 *exit_status = EXIT_USER;
12145637 2930 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
2931 return -ESRCH;
2932 }
5bc7452b 2933
29206d46
LP
2934 if (dcreds->user)
2935 username = dcreds->user->name;
2936
2937 } else {
4d885bd3
DH
2938 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2939 if (r < 0) {
2940 *exit_status = EXIT_USER;
12145637 2941 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 2942 }
5bc7452b 2943
4d885bd3
DH
2944 r = get_fixed_group(context, &groupname, &gid);
2945 if (r < 0) {
2946 *exit_status = EXIT_GROUP;
12145637 2947 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 2948 }
cdc5d5c5 2949 }
29206d46 2950
cdc5d5c5
DH
2951 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2952 r = get_supplementary_groups(context, username, groupname, gid,
2953 &supplementary_gids, &ngids);
2954 if (r < 0) {
2955 *exit_status = EXIT_GROUP;
12145637 2956 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 2957 }
5bc7452b 2958
00d9ef85
LP
2959 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2960 if (r < 0) {
2961 *exit_status = EXIT_USER;
12145637 2962 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
2963 }
2964
2965 user_lookup_fd = safe_close(user_lookup_fd);
2966
6732edab
LP
2967 r = acquire_home(context, uid, &home, &home_buffer);
2968 if (r < 0) {
2969 *exit_status = EXIT_CHDIR;
12145637 2970 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
2971 }
2972
d35fbf6b
DM
2973 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2974 * must sure to drop O_NONBLOCK */
2975 if (socket_fd >= 0)
a34ceba6 2976 (void) fd_nonblock(socket_fd, false);
acbb0225 2977
52c239d7 2978 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
2979 if (r < 0) {
2980 *exit_status = EXIT_STDIN;
12145637 2981 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 2982 }
034c6ed7 2983
52c239d7 2984 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2985 if (r < 0) {
2986 *exit_status = EXIT_STDOUT;
12145637 2987 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
2988 }
2989
52c239d7 2990 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2991 if (r < 0) {
2992 *exit_status = EXIT_STDERR;
12145637 2993 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
2994 }
2995
2996 if (params->cgroup_path) {
ff0af2a1
LP
2997 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2998 if (r < 0) {
2999 *exit_status = EXIT_CGROUP;
12145637 3000 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path);
309bff19 3001 }
d35fbf6b 3002 }
309bff19 3003
d35fbf6b 3004 if (context->oom_score_adjust_set) {
9f8168eb
LP
3005 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
3006 * prohibit write access to this file, and we shouldn't trip up over that. */
3007 r = set_oom_score_adjust(context->oom_score_adjust);
12145637 3008 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 3009 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 3010 else if (r < 0) {
ff0af2a1 3011 *exit_status = EXIT_OOM_ADJUST;
12145637 3012 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 3013 }
d35fbf6b
DM
3014 }
3015
3016 if (context->nice_set)
3017 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 3018 *exit_status = EXIT_NICE;
12145637 3019 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
613b411c
LP
3020 }
3021
d35fbf6b
DM
3022 if (context->cpu_sched_set) {
3023 struct sched_param param = {
3024 .sched_priority = context->cpu_sched_priority,
3025 };
3026
ff0af2a1
LP
3027 r = sched_setscheduler(0,
3028 context->cpu_sched_policy |
3029 (context->cpu_sched_reset_on_fork ?
3030 SCHED_RESET_ON_FORK : 0),
3031 &param);
3032 if (r < 0) {
3033 *exit_status = EXIT_SETSCHEDULER;
12145637 3034 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 3035 }
d35fbf6b 3036 }
fc9b2a84 3037
d35fbf6b
DM
3038 if (context->cpuset)
3039 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
ff0af2a1 3040 *exit_status = EXIT_CPUAFFINITY;
12145637 3041 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7
LP
3042 }
3043
d35fbf6b
DM
3044 if (context->ioprio_set)
3045 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 3046 *exit_status = EXIT_IOPRIO;
12145637 3047 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 3048 }
da726a4d 3049
d35fbf6b
DM
3050 if (context->timer_slack_nsec != NSEC_INFINITY)
3051 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 3052 *exit_status = EXIT_TIMERSLACK;
12145637 3053 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 3054 }
9eba9da4 3055
21022b9d
LP
3056 if (context->personality != PERSONALITY_INVALID) {
3057 r = safe_personality(context->personality);
3058 if (r < 0) {
ff0af2a1 3059 *exit_status = EXIT_PERSONALITY;
12145637 3060 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 3061 }
21022b9d 3062 }
94f04347 3063
d35fbf6b 3064 if (context->utmp_id)
df0ff127 3065 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 3066 context->tty_path,
023a4f67
LP
3067 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3068 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3069 USER_PROCESS,
6a93917d 3070 username);
d35fbf6b 3071
e0d2adfd 3072 if (context->user) {
ff0af2a1
LP
3073 r = chown_terminal(STDIN_FILENO, uid);
3074 if (r < 0) {
3075 *exit_status = EXIT_STDIN;
12145637 3076 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 3077 }
d35fbf6b 3078 }
8e274523 3079
62b9bb26
LP
3080 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroupsv1
3081 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
3082 * safe. On cgroupsv2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
3083 * touch a single hierarchy too. */
584b8688 3084 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
62b9bb26 3085 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
ff0af2a1
LP
3086 if (r < 0) {
3087 *exit_status = EXIT_CGROUP;
12145637 3088 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 3089 }
d35fbf6b 3090 }
034c6ed7 3091
72fd1768 3092 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 3093 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
3094 if (r < 0)
3095 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 3096 }
94f04347 3097
7bce046b 3098 r = build_environment(
fd63e712 3099 unit,
7bce046b
LP
3100 context,
3101 params,
3102 n_fds,
3103 home,
3104 username,
3105 shell,
3106 journal_stream_dev,
3107 journal_stream_ino,
3108 &our_env);
2065ca69
JW
3109 if (r < 0) {
3110 *exit_status = EXIT_MEMORY;
12145637 3111 return log_oom();
2065ca69
JW
3112 }
3113
3114 r = build_pass_environment(context, &pass_env);
3115 if (r < 0) {
3116 *exit_status = EXIT_MEMORY;
12145637 3117 return log_oom();
2065ca69
JW
3118 }
3119
3120 accum_env = strv_env_merge(5,
3121 params->environment,
3122 our_env,
3123 pass_env,
3124 context->environment,
3125 files_env,
3126 NULL);
3127 if (!accum_env) {
3128 *exit_status = EXIT_MEMORY;
12145637 3129 return log_oom();
2065ca69 3130 }
1280503b 3131 accum_env = strv_env_clean(accum_env);
2065ca69 3132
096424d1 3133 (void) umask(context->umask);
b213e1c1 3134
b1edf445 3135 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
3136 if (r < 0) {
3137 *exit_status = EXIT_KEYRING;
12145637 3138 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
3139 }
3140
165a31c0 3141 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 3142 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 3143
165a31c0
LP
3144 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3145 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 3146
165a31c0
LP
3147 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3148 if (needs_ambient_hack)
3149 needs_setuid = false;
3150 else
3151 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3152
3153 if (needs_sandboxing) {
7f18ef0a
FK
3154 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3155 * present. The actual MAC context application will happen later, as late as possible, to avoid
3156 * impacting our own code paths. */
3157
349cc4a5 3158#if HAVE_SELINUX
43b1f709 3159 use_selinux = mac_selinux_use();
7f18ef0a 3160#endif
f9fa32f0 3161#if ENABLE_SMACK
43b1f709 3162 use_smack = mac_smack_use();
7f18ef0a 3163#endif
349cc4a5 3164#if HAVE_APPARMOR
43b1f709 3165 use_apparmor = mac_apparmor_use();
7f18ef0a 3166#endif
165a31c0 3167 }
7f18ef0a 3168
165a31c0
LP
3169 if (needs_setuid) {
3170 if (context->pam_name && username) {
3171 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3172 if (r < 0) {
3173 *exit_status = EXIT_PAM;
12145637 3174 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0
LP
3175 }
3176 }
b213e1c1 3177 }
ac45f971 3178
d35fbf6b 3179 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
6e2d7c4f
MS
3180 if (ns_type_supported(NAMESPACE_NET)) {
3181 r = setup_netns(runtime->netns_storage_socket);
3182 if (r < 0) {
3183 *exit_status = EXIT_NETWORK;
3184 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3185 }
3186 } else
3187 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3188 }
169c1bda 3189
ee818b89 3190 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3191 if (needs_mount_namespace) {
6818c54c 3192 r = apply_mount_namespace(unit, command, context, params, runtime);
3fbe8dbe
LP
3193 if (r < 0) {
3194 *exit_status = EXIT_NAMESPACE;
12145637 3195 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing: %m");
3fbe8dbe 3196 }
d35fbf6b 3197 }
81a2b7ce 3198
bbeea271 3199 /* Drop groups as early as possbile */
165a31c0 3200 if (needs_setuid) {
709dbeac 3201 r = enforce_groups(gid, supplementary_gids, ngids);
096424d1
LP
3202 if (r < 0) {
3203 *exit_status = EXIT_GROUP;
12145637 3204 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3205 }
165a31c0 3206 }
096424d1 3207
165a31c0 3208 if (needs_sandboxing) {
349cc4a5 3209#if HAVE_SELINUX
43b1f709 3210 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
3211 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3212 if (r < 0) {
3213 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3214 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
937ccce9 3215 }
9008e1ac 3216 }
9008e1ac
MS
3217#endif
3218
937ccce9
LP
3219 if (context->private_users) {
3220 r = setup_private_users(uid, gid);
3221 if (r < 0) {
3222 *exit_status = EXIT_USER;
12145637 3223 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
937ccce9 3224 }
d251207d
LP
3225 }
3226 }
3227
165a31c0 3228 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
5686391b
LP
3229 * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
3230 * however if we have it as we want to keep it open until the final execve(). */
3231
3232 if (params->exec_fd >= 0) {
3233 exec_fd = params->exec_fd;
3234
3235 if (exec_fd < 3 + (int) n_fds) {
3236 int moved_fd;
3237
3238 /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
3239 * process we are about to execute. */
3240
3241 moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
3242 if (moved_fd < 0) {
3243 *exit_status = EXIT_FDS;
3244 return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
3245 }
3246
3247 safe_close(exec_fd);
3248 exec_fd = moved_fd;
3249 } else {
3250 /* This fd should be FD_CLOEXEC already, but let's make sure. */
3251 r = fd_cloexec(exec_fd, true);
3252 if (r < 0) {
3253 *exit_status = EXIT_FDS;
3254 return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
3255 }
3256 }
3257
3258 fds_with_exec_fd = newa(int, n_fds + 1);
7e8d494b 3259 memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
5686391b
LP
3260 fds_with_exec_fd[n_fds] = exec_fd;
3261 n_fds_with_exec_fd = n_fds + 1;
3262 } else {
3263 fds_with_exec_fd = fds;
3264 n_fds_with_exec_fd = n_fds;
3265 }
3266
3267 r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
ff0af2a1
LP
3268 if (r >= 0)
3269 r = shift_fds(fds, n_fds);
3270 if (r >= 0)
25b583d7 3271 r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
ff0af2a1
LP
3272 if (r < 0) {
3273 *exit_status = EXIT_FDS;
12145637 3274 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3275 }
e66cf1a3 3276
5686391b
LP
3277 /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
3278 * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
3279 * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
3280 * came this far. */
3281
165a31c0 3282 secure_bits = context->secure_bits;
e66cf1a3 3283
165a31c0
LP
3284 if (needs_sandboxing) {
3285 uint64_t bset;
34a5df58 3286 int which_failed;
755d4b67 3287
34a5df58
LP
3288 r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
3289 if (r < 0) {
3290 *exit_status = EXIT_LIMITS;
3291 return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
e66cf1a3
LP
3292 }
3293
f4170c67
LP
3294 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
3295 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3296 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3297 *exit_status = EXIT_LIMITS;
12145637 3298 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3299 }
3300 }
3301
37ac2744
JB
3302#if ENABLE_SMACK
3303 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3304 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3305 if (use_smack) {
3306 r = setup_smack(context, command);
3307 if (r < 0) {
3308 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3309 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3310 }
3311 }
3312#endif
3313
165a31c0
LP
3314 bset = context->capability_bounding_set;
3315 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3316 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3317 * instead of us doing that */
3318 if (needs_ambient_hack)
3319 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3320 (UINT64_C(1) << CAP_SETUID) |
3321 (UINT64_C(1) << CAP_SETGID);
3322
3323 if (!cap_test_all(bset)) {
3324 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3325 if (r < 0) {
3326 *exit_status = EXIT_CAPABILITIES;
12145637 3327 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3328 }
4c2630eb 3329 }
3b8bddde 3330
755d4b67
IP
3331 /* This is done before enforce_user, but ambient set
3332 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
3333 if (!needs_ambient_hack &&
3334 context->capability_ambient_set != 0) {
755d4b67
IP
3335 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3336 if (r < 0) {
3337 *exit_status = EXIT_CAPABILITIES;
12145637 3338 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3339 }
755d4b67 3340 }
165a31c0 3341 }
755d4b67 3342
165a31c0 3343 if (needs_setuid) {
d35fbf6b 3344 if (context->user) {
ff0af2a1
LP
3345 r = enforce_user(context, uid);
3346 if (r < 0) {
3347 *exit_status = EXIT_USER;
12145637 3348 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3349 }
165a31c0
LP
3350
3351 if (!needs_ambient_hack &&
3352 context->capability_ambient_set != 0) {
755d4b67
IP
3353
3354 /* Fix the ambient capabilities after user change. */
3355 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3356 if (r < 0) {
3357 *exit_status = EXIT_CAPABILITIES;
12145637 3358 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3359 }
3360
3361 /* If we were asked to change user and ambient capabilities
3362 * were requested, we had to add keep-caps to the securebits
3363 * so that we would maintain the inherited capability set
3364 * through the setresuid(). Make sure that the bit is added
3365 * also to the context secure_bits so that we don't try to
3366 * drop the bit away next. */
3367
7f508f2c 3368 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3369 }
5b6319dc 3370 }
165a31c0 3371 }
d35fbf6b 3372
56ef8db9
JB
3373 /* Apply working directory here, because the working directory might be on NFS and only the user running
3374 * this service might have the correct privilege to change to the working directory */
3375 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
3376 if (r < 0)
3377 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
3378
165a31c0 3379 if (needs_sandboxing) {
37ac2744 3380 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
5cd9cd35
LP
3381 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3382 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3383 * are restricted. */
3384
349cc4a5 3385#if HAVE_SELINUX
43b1f709 3386 if (use_selinux) {
5cd9cd35
LP
3387 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3388
3389 if (exec_context) {
3390 r = setexeccon(exec_context);
3391 if (r < 0) {
3392 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3393 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3394 }
3395 }
3396 }
3397#endif
3398
349cc4a5 3399#if HAVE_APPARMOR
43b1f709 3400 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3401 r = aa_change_onexec(context->apparmor_profile);
3402 if (r < 0 && !context->apparmor_profile_ignore) {
3403 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3404 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3405 }
3406 }
3407#endif
3408
165a31c0
LP
3409 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3410 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3411 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3412 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3413 *exit_status = EXIT_SECUREBITS;
12145637 3414 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3415 }
5b6319dc 3416
59eeb84b 3417 if (context_has_no_new_privileges(context))
d35fbf6b 3418 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3419 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3420 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3421 }
3422
349cc4a5 3423#if HAVE_SECCOMP
469830d1
LP
3424 r = apply_address_families(unit, context);
3425 if (r < 0) {
3426 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3427 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3428 }
04aa0cb9 3429
469830d1
LP
3430 r = apply_memory_deny_write_execute(unit, context);
3431 if (r < 0) {
3432 *exit_status = EXIT_SECCOMP;
12145637 3433 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3434 }
f4170c67 3435
469830d1
LP
3436 r = apply_restrict_realtime(unit, context);
3437 if (r < 0) {
3438 *exit_status = EXIT_SECCOMP;
12145637 3439 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3440 }
3441
add00535
LP
3442 r = apply_restrict_namespaces(unit, context);
3443 if (r < 0) {
3444 *exit_status = EXIT_SECCOMP;
12145637 3445 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3446 }
3447
469830d1
LP
3448 r = apply_protect_sysctl(unit, context);
3449 if (r < 0) {
3450 *exit_status = EXIT_SECCOMP;
12145637 3451 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3452 }
3453
469830d1
LP
3454 r = apply_protect_kernel_modules(unit, context);
3455 if (r < 0) {
3456 *exit_status = EXIT_SECCOMP;
12145637 3457 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3458 }
3459
469830d1
LP
3460 r = apply_private_devices(unit, context);
3461 if (r < 0) {
3462 *exit_status = EXIT_SECCOMP;
12145637 3463 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3464 }
3465
3466 r = apply_syscall_archs(unit, context);
3467 if (r < 0) {
3468 *exit_status = EXIT_SECCOMP;
12145637 3469 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3470 }
3471
78e864e5
TM
3472 r = apply_lock_personality(unit, context);
3473 if (r < 0) {
3474 *exit_status = EXIT_SECCOMP;
12145637 3475 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3476 }
3477
5cd9cd35
LP
3478 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3479 * by the filter as little as possible. */
165a31c0 3480 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3481 if (r < 0) {
3482 *exit_status = EXIT_SECCOMP;
12145637 3483 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3484 }
3485#endif
d35fbf6b 3486 }
034c6ed7 3487
00819cc1
LP
3488 if (!strv_isempty(context->unset_environment)) {
3489 char **ee = NULL;
3490
3491 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3492 if (!ee) {
3493 *exit_status = EXIT_MEMORY;
12145637 3494 return log_oom();
00819cc1
LP
3495 }
3496
130d3d22 3497 strv_free_and_replace(accum_env, ee);
00819cc1
LP
3498 }
3499
ee39ca20 3500 final_argv = replace_env_argv(command->argv, accum_env);
d35fbf6b 3501 if (!final_argv) {
ff0af2a1 3502 *exit_status = EXIT_MEMORY;
12145637 3503 return log_oom();
d35fbf6b 3504 }
034c6ed7 3505
f1d34068 3506 if (DEBUG_LOGGING) {
d35fbf6b 3507 _cleanup_free_ char *line;
81a2b7ce 3508
d35fbf6b 3509 line = exec_command_line(final_argv);
a1230ff9 3510 if (line)
f2341e0a 3511 log_struct(LOG_DEBUG,
f2341e0a
LP
3512 "EXECUTABLE=%s", command->path,
3513 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3514 LOG_UNIT_ID(unit),
a1230ff9 3515 LOG_UNIT_INVOCATION_ID(unit));
d35fbf6b 3516 }
dd305ec9 3517
5686391b
LP
3518 if (exec_fd >= 0) {
3519 uint8_t hot = 1;
3520
3521 /* We have finished with all our initializations. Let's now let the manager know that. From this point
3522 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
3523
3524 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3525 *exit_status = EXIT_EXEC;
3526 return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
3527 }
3528 }
3529
2065ca69 3530 execve(command->path, final_argv, accum_env);
5686391b
LP
3531 r = -errno;
3532
3533 if (exec_fd >= 0) {
3534 uint8_t hot = 0;
3535
3536 /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
3537 * that POLLHUP on it no longer means execve() succeeded. */
3538
3539 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3540 *exit_status = EXIT_EXEC;
3541 return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
3542 }
3543 }
12145637 3544
5686391b
LP
3545 if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3546 log_struct_errno(LOG_INFO, r,
12145637
LP
3547 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3548 LOG_UNIT_ID(unit),
3549 LOG_UNIT_INVOCATION_ID(unit),
3550 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3551 command->path),
a1230ff9 3552 "EXECUTABLE=%s", command->path);
12145637
LP
3553 return 0;
3554 }
3555
ff0af2a1 3556 *exit_status = EXIT_EXEC;
5686391b 3557 return log_unit_error_errno(unit, r, "Failed to execute command: %m");
d35fbf6b 3558}
81a2b7ce 3559
34cf6c43
YW
3560static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
3561static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[3]);
3562
f2341e0a
LP
3563int exec_spawn(Unit *unit,
3564 ExecCommand *command,
d35fbf6b
DM
3565 const ExecContext *context,
3566 const ExecParameters *params,
3567 ExecRuntime *runtime,
29206d46 3568 DynamicCreds *dcreds,
d35fbf6b 3569 pid_t *ret) {
8351ceae 3570
ee39ca20 3571 int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
d35fbf6b 3572 _cleanup_strv_free_ char **files_env = NULL;
da6053d0 3573 size_t n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1 3574 _cleanup_free_ char *line = NULL;
d35fbf6b 3575 pid_t pid;
8351ceae 3576
f2341e0a 3577 assert(unit);
d35fbf6b
DM
3578 assert(command);
3579 assert(context);
3580 assert(ret);
3581 assert(params);
25b583d7 3582 assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
4298d0b5 3583
d35fbf6b
DM
3584 if (context->std_input == EXEC_INPUT_SOCKET ||
3585 context->std_output == EXEC_OUTPUT_SOCKET ||
3586 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3587
4c47affc 3588 if (params->n_socket_fds > 1) {
f2341e0a 3589 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3590 return -EINVAL;
ff0af2a1 3591 }
eef65bf3 3592
4c47affc 3593 if (params->n_socket_fds == 0) {
488ab41c
AA
3594 log_unit_error(unit, "Got no socket.");
3595 return -EINVAL;
3596 }
3597
d35fbf6b
DM
3598 socket_fd = params->fds[0];
3599 } else {
3600 socket_fd = -1;
3601 fds = params->fds;
9b141911 3602 n_socket_fds = params->n_socket_fds;
25b583d7 3603 n_storage_fds = params->n_storage_fds;
d35fbf6b 3604 }
94f04347 3605
34cf6c43 3606 r = exec_context_named_iofds(context, params, named_iofds);
52c239d7
LB
3607 if (r < 0)
3608 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3609
f2341e0a 3610 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3611 if (r < 0)
f2341e0a 3612 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3613
ee39ca20 3614 line = exec_command_line(command->argv);
d35fbf6b
DM
3615 if (!line)
3616 return log_oom();
fab56fc5 3617
f2341e0a 3618 log_struct(LOG_DEBUG,
f2341e0a
LP
3619 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3620 "EXECUTABLE=%s", command->path,
ba360bb0 3621 LOG_UNIT_ID(unit),
a1230ff9 3622 LOG_UNIT_INVOCATION_ID(unit));
12145637 3623
d35fbf6b
DM
3624 pid = fork();
3625 if (pid < 0)
74129a12 3626 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3627
3628 if (pid == 0) {
12145637 3629 int exit_status = EXIT_SUCCESS;
ff0af2a1 3630
f2341e0a
LP
3631 r = exec_child(unit,
3632 command,
ff0af2a1
LP
3633 context,
3634 params,
3635 runtime,
29206d46 3636 dcreds,
ff0af2a1 3637 socket_fd,
52c239d7 3638 named_iofds,
4c47affc 3639 fds,
9b141911 3640 n_socket_fds,
25b583d7 3641 n_storage_fds,
ff0af2a1 3642 files_env,
00d9ef85 3643 unit->manager->user_lookup_fds[1],
12145637
LP
3644 &exit_status);
3645
a1230ff9 3646 if (r < 0)
12145637
LP
3647 log_struct_errno(LOG_ERR, r,
3648 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3649 LOG_UNIT_ID(unit),
3650 LOG_UNIT_INVOCATION_ID(unit),
3651 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3652 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3653 command->path),
a1230ff9 3654 "EXECUTABLE=%s", command->path);
4c2630eb 3655
ff0af2a1 3656 _exit(exit_status);
034c6ed7
LP
3657 }
3658
f2341e0a 3659 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3660
80876c20
LP
3661 /* We add the new process to the cgroup both in the child (so
3662 * that we can be sure that no user code is ever executed
3663 * outside of the cgroup) and in the parent (so that we can be
3664 * sure that when we kill the cgroup the process will be
3665 * killed too). */
d35fbf6b 3666 if (params->cgroup_path)
dd305ec9 3667 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
2da3263a 3668
b58b4116 3669 exec_status_start(&command->exec_status, pid);
9fb86720 3670
034c6ed7 3671 *ret = pid;
5cb5a6ff
LP
3672 return 0;
3673}
3674
034c6ed7 3675void exec_context_init(ExecContext *c) {
3536f49e
YW
3676 ExecDirectoryType i;
3677
034c6ed7
LP
3678 assert(c);
3679
4c12626c 3680 c->umask = 0022;
9eba9da4 3681 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3682 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3683 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3684 c->syslog_level_prefix = true;
353e12c2 3685 c->ignore_sigpipe = true;
3a43da28 3686 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3687 c->personality = PERSONALITY_INVALID;
72fd1768 3688 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3689 c->directories[i].mode = 0755;
a103496c 3690 c->capability_bounding_set = CAP_ALL;
aa9d574d
YW
3691 assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
3692 c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
d3070fbd 3693 c->log_level_max = -1;
034c6ed7
LP
3694}
3695
613b411c 3696void exec_context_done(ExecContext *c) {
3536f49e 3697 ExecDirectoryType i;
d3070fbd 3698 size_t l;
5cb5a6ff
LP
3699
3700 assert(c);
3701
6796073e
LP
3702 c->environment = strv_free(c->environment);
3703 c->environment_files = strv_free(c->environment_files);
b4c14404 3704 c->pass_environment = strv_free(c->pass_environment);
00819cc1 3705 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 3706
31ce987c 3707 rlimit_free_all(c->rlimit);
034c6ed7 3708
2038c3f5 3709 for (l = 0; l < 3; l++) {
52c239d7 3710 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
2038c3f5
LP
3711 c->stdio_file[l] = mfree(c->stdio_file[l]);
3712 }
52c239d7 3713
a1e58e8e
LP
3714 c->working_directory = mfree(c->working_directory);
3715 c->root_directory = mfree(c->root_directory);
915e6d16 3716 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3717 c->tty_path = mfree(c->tty_path);
3718 c->syslog_identifier = mfree(c->syslog_identifier);
3719 c->user = mfree(c->user);
3720 c->group = mfree(c->group);
034c6ed7 3721
6796073e 3722 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3723
a1e58e8e 3724 c->pam_name = mfree(c->pam_name);
5b6319dc 3725
2a624c36
AP
3726 c->read_only_paths = strv_free(c->read_only_paths);
3727 c->read_write_paths = strv_free(c->read_write_paths);
3728 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3729
d2d6c096 3730 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
8e06d57c
YW
3731 c->bind_mounts = NULL;
3732 c->n_bind_mounts = 0;
2abd4e38
YW
3733 temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
3734 c->temporary_filesystems = NULL;
3735 c->n_temporary_filesystems = 0;
d2d6c096 3736
da681e1b 3737 c->cpuset = cpu_set_mfree(c->cpuset);
86a3475b 3738
a1e58e8e
LP
3739 c->utmp_id = mfree(c->utmp_id);
3740 c->selinux_context = mfree(c->selinux_context);
3741 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3742 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3743
8cfa775f 3744 c->syscall_filter = hashmap_free(c->syscall_filter);
525d3cc7
LP
3745 c->syscall_archs = set_free(c->syscall_archs);
3746 c->address_families = set_free(c->address_families);
e66cf1a3 3747
72fd1768 3748 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3749 c->directories[i].paths = strv_free(c->directories[i].paths);
d3070fbd
LP
3750
3751 c->log_level_max = -1;
3752
3753 exec_context_free_log_extra_fields(c);
08f3be7a 3754
90fc172e
AZ
3755 c->log_rate_limit_interval_usec = 0;
3756 c->log_rate_limit_burst = 0;
3757
08f3be7a
LP
3758 c->stdin_data = mfree(c->stdin_data);
3759 c->stdin_data_size = 0;
e66cf1a3
LP
3760}
3761
34cf6c43 3762int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
e66cf1a3
LP
3763 char **i;
3764
3765 assert(c);
3766
3767 if (!runtime_prefix)
3768 return 0;
3769
3536f49e 3770 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
3771 _cleanup_free_ char *p;
3772
605405c6 3773 p = strjoin(runtime_prefix, "/", *i);
e66cf1a3
LP
3774 if (!p)
3775 return -ENOMEM;
3776
6c47cd7d 3777 /* We execute this synchronously, since we need to be sure this is gone when we start the service
e66cf1a3 3778 * next. */
c6878637 3779 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
3780 }
3781
3782 return 0;
5cb5a6ff
LP
3783}
3784
34cf6c43 3785static void exec_command_done(ExecCommand *c) {
43d0fcbd
LP
3786 assert(c);
3787
a1e58e8e 3788 c->path = mfree(c->path);
6796073e 3789 c->argv = strv_free(c->argv);
43d0fcbd
LP
3790}
3791
da6053d0
LP
3792void exec_command_done_array(ExecCommand *c, size_t n) {
3793 size_t i;
43d0fcbd
LP
3794
3795 for (i = 0; i < n; i++)
3796 exec_command_done(c+i);
3797}
3798
f1acf85a 3799ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
3800 ExecCommand *i;
3801
3802 while ((i = c)) {
71fda00f 3803 LIST_REMOVE(command, c, i);
43d0fcbd 3804 exec_command_done(i);
5cb5a6ff
LP
3805 free(i);
3806 }
f1acf85a
ZJS
3807
3808 return NULL;
5cb5a6ff
LP
3809}
3810
da6053d0
LP
3811void exec_command_free_array(ExecCommand **c, size_t n) {
3812 size_t i;
034c6ed7 3813
f1acf85a
ZJS
3814 for (i = 0; i < n; i++)
3815 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
3816}
3817
6a1d4d9f
LP
3818void exec_command_reset_status_array(ExecCommand *c, size_t n) {
3819 size_t i;
3820
3821 for (i = 0; i < n; i++)
3822 exec_status_reset(&c[i].exec_status);
3823}
3824
3825void exec_command_reset_status_list_array(ExecCommand **c, size_t n) {
3826 size_t i;
3827
3828 for (i = 0; i < n; i++) {
3829 ExecCommand *z;
3830
3831 LIST_FOREACH(command, z, c[i])
3832 exec_status_reset(&z->exec_status);
3833 }
3834}
3835
039f0e70 3836typedef struct InvalidEnvInfo {
34cf6c43 3837 const Unit *unit;
039f0e70
LP
3838 const char *path;
3839} InvalidEnvInfo;
3840
3841static void invalid_env(const char *p, void *userdata) {
3842 InvalidEnvInfo *info = userdata;
3843
f2341e0a 3844 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
3845}
3846
52c239d7
LB
3847const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3848 assert(c);
3849
3850 switch (fd_index) {
5073ff6b 3851
52c239d7
LB
3852 case STDIN_FILENO:
3853 if (c->std_input != EXEC_INPUT_NAMED_FD)
3854 return NULL;
5073ff6b 3855
52c239d7 3856 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
5073ff6b 3857
52c239d7
LB
3858 case STDOUT_FILENO:
3859 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3860 return NULL;
5073ff6b 3861
52c239d7 3862 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
5073ff6b 3863
52c239d7
LB
3864 case STDERR_FILENO:
3865 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3866 return NULL;
5073ff6b 3867
52c239d7 3868 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
5073ff6b 3869
52c239d7
LB
3870 default:
3871 return NULL;
3872 }
3873}
3874
34cf6c43 3875static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
da6053d0 3876 size_t i, targets;
56fbd561 3877 const char* stdio_fdname[3];
da6053d0 3878 size_t n_fds;
52c239d7
LB
3879
3880 assert(c);
3881 assert(p);
3882
3883 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3884 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3885 (c->std_error == EXEC_OUTPUT_NAMED_FD);
3886
3887 for (i = 0; i < 3; i++)
3888 stdio_fdname[i] = exec_context_fdname(c, i);
3889
4c47affc
FB
3890 n_fds = p->n_storage_fds + p->n_socket_fds;
3891
3892 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
3893 if (named_iofds[STDIN_FILENO] < 0 &&
3894 c->std_input == EXEC_INPUT_NAMED_FD &&
3895 stdio_fdname[STDIN_FILENO] &&
3896 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3897
52c239d7
LB
3898 named_iofds[STDIN_FILENO] = p->fds[i];
3899 targets--;
56fbd561
ZJS
3900
3901 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3902 c->std_output == EXEC_OUTPUT_NAMED_FD &&
3903 stdio_fdname[STDOUT_FILENO] &&
3904 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3905
52c239d7
LB
3906 named_iofds[STDOUT_FILENO] = p->fds[i];
3907 targets--;
56fbd561
ZJS
3908
3909 } else if (named_iofds[STDERR_FILENO] < 0 &&
3910 c->std_error == EXEC_OUTPUT_NAMED_FD &&
3911 stdio_fdname[STDERR_FILENO] &&
3912 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3913
52c239d7
LB
3914 named_iofds[STDERR_FILENO] = p->fds[i];
3915 targets--;
3916 }
3917
56fbd561 3918 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
3919}
3920
34cf6c43 3921static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
3922 char **i, **r = NULL;
3923
3924 assert(c);
3925 assert(l);
3926
3927 STRV_FOREACH(i, c->environment_files) {
3928 char *fn;
52511fae
ZJS
3929 int k;
3930 unsigned n;
8c7be95e
LP
3931 bool ignore = false;
3932 char **p;
7fd1b19b 3933 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
3934
3935 fn = *i;
3936
3937 if (fn[0] == '-') {
3938 ignore = true;
313cefa1 3939 fn++;
8c7be95e
LP
3940 }
3941
3942 if (!path_is_absolute(fn)) {
8c7be95e
LP
3943 if (ignore)
3944 continue;
3945
3946 strv_free(r);
3947 return -EINVAL;
3948 }
3949
2bef10ab 3950 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
3951 k = safe_glob(fn, 0, &pglob);
3952 if (k < 0) {
2bef10ab
PL
3953 if (ignore)
3954 continue;
8c7be95e 3955
2bef10ab 3956 strv_free(r);
d8c92e8b 3957 return k;
2bef10ab 3958 }
8c7be95e 3959
d8c92e8b
ZJS
3960 /* When we don't match anything, -ENOENT should be returned */
3961 assert(pglob.gl_pathc > 0);
3962
3963 for (n = 0; n < pglob.gl_pathc; n++) {
aa8fbc74 3964 k = load_env_file(NULL, pglob.gl_pathv[n], &p);
2bef10ab
PL
3965 if (k < 0) {
3966 if (ignore)
3967 continue;
8c7be95e 3968
2bef10ab 3969 strv_free(r);
2bef10ab 3970 return k;
e9c1ea9d 3971 }
ebc05a09 3972 /* Log invalid environment variables with filename */
039f0e70
LP
3973 if (p) {
3974 InvalidEnvInfo info = {
f2341e0a 3975 .unit = unit,
039f0e70
LP
3976 .path = pglob.gl_pathv[n]
3977 };
3978
3979 p = strv_env_clean_with_callback(p, invalid_env, &info);
3980 }
8c7be95e 3981
234519ae 3982 if (!r)
2bef10ab
PL
3983 r = p;
3984 else {
3985 char **m;
8c7be95e 3986
2bef10ab
PL
3987 m = strv_env_merge(2, r, p);
3988 strv_free(r);
3989 strv_free(p);
c84a9488 3990 if (!m)
2bef10ab 3991 return -ENOMEM;
2bef10ab
PL
3992
3993 r = m;
3994 }
8c7be95e
LP
3995 }
3996 }
3997
3998 *l = r;
3999
4000 return 0;
4001}
4002
6ac8fdc9 4003static bool tty_may_match_dev_console(const char *tty) {
7b912648 4004 _cleanup_free_ char *resolved = NULL;
6ac8fdc9 4005
1e22b5cd
LP
4006 if (!tty)
4007 return true;
4008
a119ec7c 4009 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
4010
4011 /* trivial identity? */
4012 if (streq(tty, "console"))
4013 return true;
4014
7b912648
LP
4015 if (resolve_dev_console(&resolved) < 0)
4016 return true; /* if we could not resolve, assume it may */
6ac8fdc9
MS
4017
4018 /* "tty0" means the active VC, so it may be the same sometimes */
7b912648 4019 return streq(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
4020}
4021
34cf6c43 4022bool exec_context_may_touch_console(const ExecContext *ec) {
1e22b5cd
LP
4023
4024 return (ec->tty_reset ||
4025 ec->tty_vhangup ||
4026 ec->tty_vt_disallocate ||
6ac8fdc9
MS
4027 is_terminal_input(ec->std_input) ||
4028 is_terminal_output(ec->std_output) ||
4029 is_terminal_output(ec->std_error)) &&
1e22b5cd 4030 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
4031}
4032
15ae422b
LP
4033static void strv_fprintf(FILE *f, char **l) {
4034 char **g;
4035
4036 assert(f);
4037
4038 STRV_FOREACH(g, l)
4039 fprintf(f, " %s", *g);
4040}
4041
34cf6c43 4042void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
d3070fbd 4043 ExecDirectoryType dt;
c2bbd90b 4044 char **e, **d;
94f04347 4045 unsigned i;
add00535 4046 int r;
9eba9da4 4047
5cb5a6ff
LP
4048 assert(c);
4049 assert(f);
4050
4ad49000 4051 prefix = strempty(prefix);
5cb5a6ff
LP
4052
4053 fprintf(f,
94f04347
LP
4054 "%sUMask: %04o\n"
4055 "%sWorkingDirectory: %s\n"
451a074f 4056 "%sRootDirectory: %s\n"
15ae422b 4057 "%sNonBlocking: %s\n"
64747e2d 4058 "%sPrivateTmp: %s\n"
7f112f50 4059 "%sPrivateDevices: %s\n"
59eeb84b 4060 "%sProtectKernelTunables: %s\n"
e66a2f65 4061 "%sProtectKernelModules: %s\n"
59eeb84b 4062 "%sProtectControlGroups: %s\n"
d251207d
LP
4063 "%sPrivateNetwork: %s\n"
4064 "%sPrivateUsers: %s\n"
1b8689f9
LP
4065 "%sProtectHome: %s\n"
4066 "%sProtectSystem: %s\n"
5d997827 4067 "%sMountAPIVFS: %s\n"
f3e43635 4068 "%sIgnoreSIGPIPE: %s\n"
f4170c67 4069 "%sMemoryDenyWriteExecute: %s\n"
b1edf445
LP
4070 "%sRestrictRealtime: %s\n"
4071 "%sKeyringMode: %s\n",
5cb5a6ff 4072 prefix, c->umask,
9eba9da4 4073 prefix, c->working_directory ? c->working_directory : "/",
451a074f 4074 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 4075 prefix, yes_no(c->non_blocking),
64747e2d 4076 prefix, yes_no(c->private_tmp),
7f112f50 4077 prefix, yes_no(c->private_devices),
59eeb84b 4078 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 4079 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 4080 prefix, yes_no(c->protect_control_groups),
d251207d
LP
4081 prefix, yes_no(c->private_network),
4082 prefix, yes_no(c->private_users),
1b8689f9
LP
4083 prefix, protect_home_to_string(c->protect_home),
4084 prefix, protect_system_to_string(c->protect_system),
5d997827 4085 prefix, yes_no(c->mount_apivfs),
f3e43635 4086 prefix, yes_no(c->ignore_sigpipe),
f4170c67 4087 prefix, yes_no(c->memory_deny_write_execute),
b1edf445
LP
4088 prefix, yes_no(c->restrict_realtime),
4089 prefix, exec_keyring_mode_to_string(c->keyring_mode));
fb33a393 4090
915e6d16
LP
4091 if (c->root_image)
4092 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
4093
8c7be95e
LP
4094 STRV_FOREACH(e, c->environment)
4095 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
4096
4097 STRV_FOREACH(e, c->environment_files)
4098 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 4099
b4c14404
FB
4100 STRV_FOREACH(e, c->pass_environment)
4101 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
4102
00819cc1
LP
4103 STRV_FOREACH(e, c->unset_environment)
4104 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
4105
53f47dfc
YW
4106 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
4107
72fd1768 4108 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
4109 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
4110
4111 STRV_FOREACH(d, c->directories[dt].paths)
4112 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
4113 }
c2bbd90b 4114
fb33a393
LP
4115 if (c->nice_set)
4116 fprintf(f,
4117 "%sNice: %i\n",
4118 prefix, c->nice);
4119
dd6c17b1 4120 if (c->oom_score_adjust_set)
fb33a393 4121 fprintf(f,
dd6c17b1
LP
4122 "%sOOMScoreAdjust: %i\n",
4123 prefix, c->oom_score_adjust);
9eba9da4 4124
94f04347 4125 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d 4126 if (c->rlimit[i]) {
4c3a2b84 4127 fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
3c11da9d 4128 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
4c3a2b84 4129 fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n",
3c11da9d
EV
4130 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
4131 }
94f04347 4132
f8b69d1d 4133 if (c->ioprio_set) {
1756a011 4134 _cleanup_free_ char *class_str = NULL;
f8b69d1d 4135
837df140
YW
4136 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
4137 if (r >= 0)
4138 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
4139
4140 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 4141 }
94f04347 4142
f8b69d1d 4143 if (c->cpu_sched_set) {
1756a011 4144 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 4145
837df140
YW
4146 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4147 if (r >= 0)
4148 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4149
94f04347 4150 fprintf(f,
38b48754
LP
4151 "%sCPUSchedulingPriority: %i\n"
4152 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
4153 prefix, c->cpu_sched_priority,
4154 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 4155 }
94f04347 4156
82c121a4 4157 if (c->cpuset) {
94f04347 4158 fprintf(f, "%sCPUAffinity:", prefix);
82c121a4
LP
4159 for (i = 0; i < c->cpuset_ncpus; i++)
4160 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
43a99a7a 4161 fprintf(f, " %u", i);
94f04347
LP
4162 fputs("\n", f);
4163 }
4164
3a43da28 4165 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 4166 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
4167
4168 fprintf(f,
80876c20
LP
4169 "%sStandardInput: %s\n"
4170 "%sStandardOutput: %s\n"
4171 "%sStandardError: %s\n",
4172 prefix, exec_input_to_string(c->std_input),
4173 prefix, exec_output_to_string(c->std_output),
4174 prefix, exec_output_to_string(c->std_error));
4175
befc4a80
LP
4176 if (c->std_input == EXEC_INPUT_NAMED_FD)
4177 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4178 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4179 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4180 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4181 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4182
4183 if (c->std_input == EXEC_INPUT_FILE)
4184 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4185 if (c->std_output == EXEC_OUTPUT_FILE)
4186 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
566b7d23
ZD
4187 if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
4188 fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
befc4a80
LP
4189 if (c->std_error == EXEC_OUTPUT_FILE)
4190 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
566b7d23
ZD
4191 if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
4192 fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
befc4a80 4193
80876c20
LP
4194 if (c->tty_path)
4195 fprintf(f,
6ea832a2
LP
4196 "%sTTYPath: %s\n"
4197 "%sTTYReset: %s\n"
4198 "%sTTYVHangup: %s\n"
4199 "%sTTYVTDisallocate: %s\n",
4200 prefix, c->tty_path,
4201 prefix, yes_no(c->tty_reset),
4202 prefix, yes_no(c->tty_vhangup),
4203 prefix, yes_no(c->tty_vt_disallocate));
94f04347 4204
9f6444eb
LP
4205 if (IN_SET(c->std_output,
4206 EXEC_OUTPUT_SYSLOG,
4207 EXEC_OUTPUT_KMSG,
4208 EXEC_OUTPUT_JOURNAL,
4209 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4210 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4211 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4212 IN_SET(c->std_error,
4213 EXEC_OUTPUT_SYSLOG,
4214 EXEC_OUTPUT_KMSG,
4215 EXEC_OUTPUT_JOURNAL,
4216 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4217 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4218 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 4219
5ce70e5b 4220 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 4221
837df140
YW
4222 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4223 if (r >= 0)
4224 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 4225
837df140
YW
4226 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4227 if (r >= 0)
4228 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 4229 }
94f04347 4230
d3070fbd
LP
4231 if (c->log_level_max >= 0) {
4232 _cleanup_free_ char *t = NULL;
4233
4234 (void) log_level_to_string_alloc(c->log_level_max, &t);
4235
4236 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4237 }
4238
90fc172e
AZ
4239 if (c->log_rate_limit_interval_usec > 0) {
4240 char buf_timespan[FORMAT_TIMESPAN_MAX];
4241
4242 fprintf(f,
4243 "%sLogRateLimitIntervalSec: %s\n",
4244 prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_rate_limit_interval_usec, USEC_PER_SEC));
4245 }
4246
4247 if (c->log_rate_limit_burst > 0)
4248 fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_rate_limit_burst);
4249
d3070fbd
LP
4250 if (c->n_log_extra_fields > 0) {
4251 size_t j;
4252
4253 for (j = 0; j < c->n_log_extra_fields; j++) {
4254 fprintf(f, "%sLogExtraFields: ", prefix);
4255 fwrite(c->log_extra_fields[j].iov_base,
4256 1, c->log_extra_fields[j].iov_len,
4257 f);
4258 fputc('\n', f);
4259 }
4260 }
4261
07d46372
YW
4262 if (c->secure_bits) {
4263 _cleanup_free_ char *str = NULL;
4264
4265 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4266 if (r >= 0)
4267 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4268 }
94f04347 4269
a103496c 4270 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4271 _cleanup_free_ char *str = NULL;
94f04347 4272
dd1f5bd0
YW
4273 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4274 if (r >= 0)
4275 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4276 }
4277
4278 if (c->capability_ambient_set != 0) {
dd1f5bd0 4279 _cleanup_free_ char *str = NULL;
755d4b67 4280
dd1f5bd0
YW
4281 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4282 if (r >= 0)
4283 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4284 }
4285
4286 if (c->user)
f2d3769a 4287 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4288 if (c->group)
f2d3769a 4289 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4290
29206d46
LP
4291 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4292
ac6e8be6 4293 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4294 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4295 strv_fprintf(f, c->supplementary_groups);
4296 fputs("\n", f);
4297 }
94f04347 4298
5b6319dc 4299 if (c->pam_name)
f2d3769a 4300 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4301
58629001 4302 if (!strv_isempty(c->read_write_paths)) {
2a624c36
AP
4303 fprintf(f, "%sReadWritePaths:", prefix);
4304 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4305 fputs("\n", f);
4306 }
4307
58629001 4308 if (!strv_isempty(c->read_only_paths)) {
2a624c36
AP
4309 fprintf(f, "%sReadOnlyPaths:", prefix);
4310 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4311 fputs("\n", f);
4312 }
94f04347 4313
58629001 4314 if (!strv_isempty(c->inaccessible_paths)) {
2a624c36
AP
4315 fprintf(f, "%sInaccessiblePaths:", prefix);
4316 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4317 fputs("\n", f);
4318 }
2e22afe9 4319
d2d6c096 4320 if (c->n_bind_mounts > 0)
4ca763a9
YW
4321 for (i = 0; i < c->n_bind_mounts; i++)
4322 fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
d2d6c096 4323 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4ca763a9 4324 c->bind_mounts[i].ignore_enoent ? "-": "",
d2d6c096
LP
4325 c->bind_mounts[i].source,
4326 c->bind_mounts[i].destination,
4327 c->bind_mounts[i].recursive ? "rbind" : "norbind");
d2d6c096 4328
2abd4e38
YW
4329 if (c->n_temporary_filesystems > 0)
4330 for (i = 0; i < c->n_temporary_filesystems; i++) {
4331 TemporaryFileSystem *t = c->temporary_filesystems + i;
4332
4333 fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
4334 t->path,
4335 isempty(t->options) ? "" : ":",
4336 strempty(t->options));
4337 }
4338
169c1bda
LP
4339 if (c->utmp_id)
4340 fprintf(f,
4341 "%sUtmpIdentifier: %s\n",
4342 prefix, c->utmp_id);
7b52a628
MS
4343
4344 if (c->selinux_context)
4345 fprintf(f,
5f8640fb
LP
4346 "%sSELinuxContext: %s%s\n",
4347 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4348
80c21aea
WC
4349 if (c->apparmor_profile)
4350 fprintf(f,
4351 "%sAppArmorProfile: %s%s\n",
4352 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4353
4354 if (c->smack_process_label)
4355 fprintf(f,
4356 "%sSmackProcessLabel: %s%s\n",
4357 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4358
050f7277 4359 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4360 fprintf(f,
4361 "%sPersonality: %s\n",
4362 prefix, strna(personality_to_string(c->personality)));
4363
78e864e5
TM
4364 fprintf(f,
4365 "%sLockPersonality: %s\n",
4366 prefix, yes_no(c->lock_personality));
4367
17df7223 4368 if (c->syscall_filter) {
349cc4a5 4369#if HAVE_SECCOMP
17df7223 4370 Iterator j;
8cfa775f 4371 void *id, *val;
17df7223 4372 bool first = true;
351a19b1 4373#endif
17df7223
LP
4374
4375 fprintf(f,
57183d11 4376 "%sSystemCallFilter: ",
17df7223
LP
4377 prefix);
4378
4379 if (!c->syscall_whitelist)
4380 fputc('~', f);
4381
349cc4a5 4382#if HAVE_SECCOMP
8cfa775f 4383 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
17df7223 4384 _cleanup_free_ char *name = NULL;
8cfa775f
YW
4385 const char *errno_name = NULL;
4386 int num = PTR_TO_INT(val);
17df7223
LP
4387
4388 if (first)
4389 first = false;
4390 else
4391 fputc(' ', f);
4392
57183d11 4393 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223 4394 fputs(strna(name), f);
8cfa775f
YW
4395
4396 if (num >= 0) {
4397 errno_name = errno_to_name(num);
4398 if (errno_name)
4399 fprintf(f, ":%s", errno_name);
4400 else
4401 fprintf(f, ":%d", num);
4402 }
17df7223 4403 }
351a19b1 4404#endif
17df7223
LP
4405
4406 fputc('\n', f);
4407 }
4408
57183d11 4409 if (c->syscall_archs) {
349cc4a5 4410#if HAVE_SECCOMP
57183d11
LP
4411 Iterator j;
4412 void *id;
4413#endif
4414
4415 fprintf(f,
4416 "%sSystemCallArchitectures:",
4417 prefix);
4418
349cc4a5 4419#if HAVE_SECCOMP
57183d11
LP
4420 SET_FOREACH(id, c->syscall_archs, j)
4421 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4422#endif
4423 fputc('\n', f);
4424 }
4425
add00535
LP
4426 if (exec_context_restrict_namespaces_set(c)) {
4427 _cleanup_free_ char *s = NULL;
4428
86c2a9f1 4429 r = namespace_flags_to_string(c->restrict_namespaces, &s);
add00535
LP
4430 if (r >= 0)
4431 fprintf(f, "%sRestrictNamespaces: %s\n",
4432 prefix, s);
4433 }
4434
3df90f24
YW
4435 if (c->syscall_errno > 0) {
4436 const char *errno_name;
4437
4438 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4439
4440 errno_name = errno_to_name(c->syscall_errno);
4441 if (errno_name)
4442 fprintf(f, "%s\n", errno_name);
4443 else
4444 fprintf(f, "%d\n", c->syscall_errno);
4445 }
eef65bf3
MS
4446
4447 if (c->apparmor_profile)
4448 fprintf(f,
4449 "%sAppArmorProfile: %s%s\n",
4450 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
5cb5a6ff
LP
4451}
4452
34cf6c43 4453bool exec_context_maintains_privileges(const ExecContext *c) {
a931ad47
LP
4454 assert(c);
4455
61233823 4456 /* Returns true if the process forked off would run under
a931ad47
LP
4457 * an unchanged UID or as root. */
4458
4459 if (!c->user)
4460 return true;
4461
4462 if (streq(c->user, "root") || streq(c->user, "0"))
4463 return true;
4464
4465 return false;
4466}
4467
34cf6c43 4468int exec_context_get_effective_ioprio(const ExecContext *c) {
7f452159
LP
4469 int p;
4470
4471 assert(c);
4472
4473 if (c->ioprio_set)
4474 return c->ioprio;
4475
4476 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4477 if (p < 0)
4478 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4479
4480 return p;
4481}
4482
d3070fbd
LP
4483void exec_context_free_log_extra_fields(ExecContext *c) {
4484 size_t l;
4485
4486 assert(c);
4487
4488 for (l = 0; l < c->n_log_extra_fields; l++)
4489 free(c->log_extra_fields[l].iov_base);
4490 c->log_extra_fields = mfree(c->log_extra_fields);
4491 c->n_log_extra_fields = 0;
4492}
4493
b58b4116 4494void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 4495 assert(s);
5cb5a6ff 4496
2ed26ed0
LP
4497 *s = (ExecStatus) {
4498 .pid = pid,
4499 };
4500
b58b4116
LP
4501 dual_timestamp_get(&s->start_timestamp);
4502}
4503
34cf6c43 4504void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
4505 assert(s);
4506
2ed26ed0
LP
4507 if (s->pid != pid) {
4508 *s = (ExecStatus) {
4509 .pid = pid,
4510 };
4511 }
b58b4116 4512
63983207 4513 dual_timestamp_get(&s->exit_timestamp);
9fb86720 4514
034c6ed7
LP
4515 s->code = code;
4516 s->status = status;
169c1bda 4517
6ea832a2
LP
4518 if (context) {
4519 if (context->utmp_id)
2ed26ed0 4520 (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
6ea832a2 4521
1e22b5cd 4522 exec_context_tty_reset(context, NULL);
6ea832a2 4523 }
9fb86720
LP
4524}
4525
6a1d4d9f
LP
4526void exec_status_reset(ExecStatus *s) {
4527 assert(s);
4528
4529 *s = (ExecStatus) {};
4530}
4531
34cf6c43 4532void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
9fb86720
LP
4533 char buf[FORMAT_TIMESTAMP_MAX];
4534
4535 assert(s);
4536 assert(f);
4537
9fb86720
LP
4538 if (s->pid <= 0)
4539 return;
4540
4c940960
LP
4541 prefix = strempty(prefix);
4542
9fb86720 4543 fprintf(f,
ccd06097
ZJS
4544 "%sPID: "PID_FMT"\n",
4545 prefix, s->pid);
9fb86720 4546
af9d16e1 4547 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
4548 fprintf(f,
4549 "%sStart Timestamp: %s\n",
63983207 4550 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 4551
af9d16e1 4552 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
4553 fprintf(f,
4554 "%sExit Timestamp: %s\n"
4555 "%sExit Code: %s\n"
4556 "%sExit Status: %i\n",
63983207 4557 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
4558 prefix, sigchld_code_to_string(s->code),
4559 prefix, s->status);
5cb5a6ff 4560}
44d8db9e 4561
34cf6c43 4562static char *exec_command_line(char **argv) {
44d8db9e
LP
4563 size_t k;
4564 char *n, *p, **a;
4565 bool first = true;
4566
9e2f7c11 4567 assert(argv);
44d8db9e 4568
9164977d 4569 k = 1;
9e2f7c11 4570 STRV_FOREACH(a, argv)
44d8db9e
LP
4571 k += strlen(*a)+3;
4572
5cd9cd35
LP
4573 n = new(char, k);
4574 if (!n)
44d8db9e
LP
4575 return NULL;
4576
4577 p = n;
9e2f7c11 4578 STRV_FOREACH(a, argv) {
44d8db9e
LP
4579
4580 if (!first)
4581 *(p++) = ' ';
4582 else
4583 first = false;
4584
4585 if (strpbrk(*a, WHITESPACE)) {
4586 *(p++) = '\'';
4587 p = stpcpy(p, *a);
4588 *(p++) = '\'';
4589 } else
4590 p = stpcpy(p, *a);
4591
4592 }
4593
9164977d
LP
4594 *p = 0;
4595
44d8db9e
LP
4596 /* FIXME: this doesn't really handle arguments that have
4597 * spaces and ticks in them */
4598
4599 return n;
4600}
4601
34cf6c43 4602static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 4603 _cleanup_free_ char *cmd = NULL;
4c940960 4604 const char *prefix2;
44d8db9e
LP
4605
4606 assert(c);
4607 assert(f);
4608
4c940960 4609 prefix = strempty(prefix);
63c372cb 4610 prefix2 = strjoina(prefix, "\t");
44d8db9e 4611
9e2f7c11 4612 cmd = exec_command_line(c->argv);
44d8db9e
LP
4613 fprintf(f,
4614 "%sCommand Line: %s\n",
4615 prefix, cmd ? cmd : strerror(ENOMEM));
4616
9fb86720 4617 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
4618}
4619
4620void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4621 assert(f);
4622
4c940960 4623 prefix = strempty(prefix);
44d8db9e
LP
4624
4625 LIST_FOREACH(command, c, c)
4626 exec_command_dump(c, f, prefix);
4627}
94f04347 4628
a6a80b4f
LP
4629void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4630 ExecCommand *end;
4631
4632 assert(l);
4633 assert(e);
4634
4635 if (*l) {
35b8ca3a 4636 /* It's kind of important, that we keep the order here */
71fda00f
LP
4637 LIST_FIND_TAIL(command, *l, end);
4638 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
4639 } else
4640 *l = e;
4641}
4642
26fd040d
LP
4643int exec_command_set(ExecCommand *c, const char *path, ...) {
4644 va_list ap;
4645 char **l, *p;
4646
4647 assert(c);
4648 assert(path);
4649
4650 va_start(ap, path);
4651 l = strv_new_ap(path, ap);
4652 va_end(ap);
4653
4654 if (!l)
4655 return -ENOMEM;
4656
250a918d
LP
4657 p = strdup(path);
4658 if (!p) {
26fd040d
LP
4659 strv_free(l);
4660 return -ENOMEM;
4661 }
4662
6897dfe8 4663 free_and_replace(c->path, p);
26fd040d 4664
130d3d22 4665 return strv_free_and_replace(c->argv, l);
26fd040d
LP
4666}
4667
86b23b07 4668int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 4669 _cleanup_strv_free_ char **l = NULL;
86b23b07 4670 va_list ap;
86b23b07
JS
4671 int r;
4672
4673 assert(c);
4674 assert(path);
4675
4676 va_start(ap, path);
4677 l = strv_new_ap(path, ap);
4678 va_end(ap);
4679
4680 if (!l)
4681 return -ENOMEM;
4682
e287086b 4683 r = strv_extend_strv(&c->argv, l, false);
e63ff941 4684 if (r < 0)
86b23b07 4685 return r;
86b23b07
JS
4686
4687 return 0;
4688}
4689
e8a565cb
YW
4690static void *remove_tmpdir_thread(void *p) {
4691 _cleanup_free_ char *path = p;
86b23b07 4692
e8a565cb
YW
4693 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
4694 return NULL;
4695}
4696
4697static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
4698 int r;
4699
4700 if (!rt)
4701 return NULL;
4702
4703 if (rt->manager)
4704 (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
4705
4706 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
4707 if (destroy && rt->tmp_dir) {
4708 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
4709
4710 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4711 if (r < 0) {
4712 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
4713 free(rt->tmp_dir);
4714 }
4715
4716 rt->tmp_dir = NULL;
4717 }
613b411c 4718
e8a565cb
YW
4719 if (destroy && rt->var_tmp_dir) {
4720 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
4721
4722 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4723 if (r < 0) {
4724 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
4725 free(rt->var_tmp_dir);
4726 }
4727
4728 rt->var_tmp_dir = NULL;
4729 }
4730
4731 rt->id = mfree(rt->id);
4732 rt->tmp_dir = mfree(rt->tmp_dir);
4733 rt->var_tmp_dir = mfree(rt->var_tmp_dir);
4734 safe_close_pair(rt->netns_storage_socket);
4735 return mfree(rt);
4736}
4737
4738static void exec_runtime_freep(ExecRuntime **rt) {
613b411c 4739 if (*rt)
e8a565cb
YW
4740 (void) exec_runtime_free(*rt, false);
4741}
4742
4743static int exec_runtime_allocate(ExecRuntime **rt) {
4744 assert(rt);
613b411c
LP
4745
4746 *rt = new0(ExecRuntime, 1);
f146f5e1 4747 if (!*rt)
613b411c
LP
4748 return -ENOMEM;
4749
613b411c 4750 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
613b411c
LP
4751 return 0;
4752}
4753
e8a565cb
YW
4754static int exec_runtime_add(
4755 Manager *m,
4756 const char *id,
4757 const char *tmp_dir,
4758 const char *var_tmp_dir,
4759 const int netns_storage_socket[2],
4760 ExecRuntime **ret) {
4761
4762 _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
613b411c
LP
4763 int r;
4764
e8a565cb 4765 assert(m);
613b411c
LP
4766 assert(id);
4767
e8a565cb
YW
4768 r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
4769 if (r < 0)
4770 return r;
613b411c 4771
e8a565cb 4772 r = exec_runtime_allocate(&rt);
613b411c
LP
4773 if (r < 0)
4774 return r;
4775
e8a565cb
YW
4776 rt->id = strdup(id);
4777 if (!rt->id)
4778 return -ENOMEM;
4779
4780 if (tmp_dir) {
4781 rt->tmp_dir = strdup(tmp_dir);
4782 if (!rt->tmp_dir)
4783 return -ENOMEM;
4784
4785 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
4786 assert(var_tmp_dir);
4787 rt->var_tmp_dir = strdup(var_tmp_dir);
4788 if (!rt->var_tmp_dir)
4789 return -ENOMEM;
4790 }
4791
4792 if (netns_storage_socket) {
4793 rt->netns_storage_socket[0] = netns_storage_socket[0];
4794 rt->netns_storage_socket[1] = netns_storage_socket[1];
613b411c
LP
4795 }
4796
e8a565cb
YW
4797 r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
4798 if (r < 0)
4799 return r;
4800
4801 rt->manager = m;
4802
4803 if (ret)
4804 *ret = rt;
4805
4806 /* do not remove created ExecRuntime object when the operation succeeds. */
4807 rt = NULL;
4808 return 0;
4809}
4810
4811static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
4812 _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
4813 _cleanup_close_pair_ int netns_storage_socket[2] = {-1, -1};
4814 int r;
4815
4816 assert(m);
4817 assert(c);
4818 assert(id);
4819
4820 /* It is not necessary to create ExecRuntime object. */
4821 if (!c->private_network && !c->private_tmp)
4822 return 0;
4823
4824 if (c->private_tmp) {
4825 r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
613b411c
LP
4826 if (r < 0)
4827 return r;
4828 }
4829
e8a565cb
YW
4830 if (c->private_network) {
4831 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
4832 return -errno;
4833 }
4834
4835 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
4836 if (r < 0)
4837 return r;
4838
4839 /* Avoid cleanup */
4840 netns_storage_socket[0] = -1;
4841 netns_storage_socket[1] = -1;
613b411c
LP
4842 return 1;
4843}
4844
e8a565cb
YW
4845int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
4846 ExecRuntime *rt;
4847 int r;
613b411c 4848
e8a565cb
YW
4849 assert(m);
4850 assert(id);
4851 assert(ret);
4852
4853 rt = hashmap_get(m->exec_runtime_by_id, id);
4854 if (rt)
4855 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
4856 goto ref;
4857
4858 if (!create)
4859 return 0;
4860
4861 /* If not found, then create a new object. */
4862 r = exec_runtime_make(m, c, id, &rt);
4863 if (r <= 0)
4864 /* When r == 0, it is not necessary to create ExecRuntime object. */
4865 return r;
613b411c 4866
e8a565cb
YW
4867ref:
4868 /* increment reference counter. */
4869 rt->n_ref++;
4870 *ret = rt;
4871 return 1;
4872}
613b411c 4873
e8a565cb
YW
4874ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
4875 if (!rt)
613b411c
LP
4876 return NULL;
4877
e8a565cb 4878 assert(rt->n_ref > 0);
613b411c 4879
e8a565cb
YW
4880 rt->n_ref--;
4881 if (rt->n_ref > 0)
f2341e0a
LP
4882 return NULL;
4883
e8a565cb 4884 return exec_runtime_free(rt, destroy);
613b411c
LP
4885}
4886
e8a565cb
YW
4887int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
4888 ExecRuntime *rt;
4889 Iterator i;
4890
4891 assert(m);
613b411c
LP
4892 assert(f);
4893 assert(fds);
4894
e8a565cb
YW
4895 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
4896 fprintf(f, "exec-runtime=%s", rt->id);
613b411c 4897
e8a565cb
YW
4898 if (rt->tmp_dir)
4899 fprintf(f, " tmp-dir=%s", rt->tmp_dir);
613b411c 4900
e8a565cb
YW
4901 if (rt->var_tmp_dir)
4902 fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
613b411c 4903
e8a565cb
YW
4904 if (rt->netns_storage_socket[0] >= 0) {
4905 int copy;
613b411c 4906
e8a565cb
YW
4907 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4908 if (copy < 0)
4909 return copy;
613b411c 4910
e8a565cb
YW
4911 fprintf(f, " netns-socket-0=%i", copy);
4912 }
613b411c 4913
e8a565cb
YW
4914 if (rt->netns_storage_socket[1] >= 0) {
4915 int copy;
613b411c 4916
e8a565cb
YW
4917 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4918 if (copy < 0)
4919 return copy;
613b411c 4920
e8a565cb
YW
4921 fprintf(f, " netns-socket-1=%i", copy);
4922 }
4923
4924 fputc('\n', f);
613b411c
LP
4925 }
4926
4927 return 0;
4928}
4929
e8a565cb
YW
4930int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
4931 _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
4932 ExecRuntime *rt;
613b411c
LP
4933 int r;
4934
e8a565cb
YW
4935 /* This is for the migration from old (v237 or earlier) deserialization text.
4936 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
4937 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
4938 * so or not from the serialized text, then we always creates a new object owned by this. */
4939
4940 assert(u);
613b411c
LP
4941 assert(key);
4942 assert(value);
4943
e8a565cb
YW
4944 /* Manager manages ExecRuntime objects by the unit id.
4945 * So, we omit the serialized text when the unit does not have id (yet?)... */
4946 if (isempty(u->id)) {
4947 log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
4948 return 0;
4949 }
613b411c 4950
e8a565cb
YW
4951 r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
4952 if (r < 0) {
4953 log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
4954 return 0;
4955 }
4956
4957 rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
4958 if (!rt) {
4959 r = exec_runtime_allocate(&rt_create);
613b411c 4960 if (r < 0)
f2341e0a 4961 return log_oom();
613b411c 4962
e8a565cb
YW
4963 rt_create->id = strdup(u->id);
4964 if (!rt_create->id)
4965 return log_oom();
4966
4967 rt = rt_create;
4968 }
4969
4970 if (streq(key, "tmp-dir")) {
4971 char *copy;
4972
613b411c
LP
4973 copy = strdup(value);
4974 if (!copy)
4975 return log_oom();
4976
e8a565cb 4977 free_and_replace(rt->tmp_dir, copy);
613b411c
LP
4978
4979 } else if (streq(key, "var-tmp-dir")) {
4980 char *copy;
4981
613b411c
LP
4982 copy = strdup(value);
4983 if (!copy)
4984 return log_oom();
4985
e8a565cb 4986 free_and_replace(rt->var_tmp_dir, copy);
613b411c
LP
4987
4988 } else if (streq(key, "netns-socket-0")) {
4989 int fd;
4990
e8a565cb 4991 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 4992 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 4993 return 0;
613b411c 4994 }
e8a565cb
YW
4995
4996 safe_close(rt->netns_storage_socket[0]);
4997 rt->netns_storage_socket[0] = fdset_remove(fds, fd);
4998
613b411c
LP
4999 } else if (streq(key, "netns-socket-1")) {
5000 int fd;
5001
e8a565cb 5002 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5003 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5004 return 0;
613b411c 5005 }
e8a565cb
YW
5006
5007 safe_close(rt->netns_storage_socket[1]);
5008 rt->netns_storage_socket[1] = fdset_remove(fds, fd);
613b411c
LP
5009 } else
5010 return 0;
5011
e8a565cb
YW
5012 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
5013 if (rt_create) {
5014 r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
5015 if (r < 0) {
3fe91079 5016 log_unit_debug_errno(u, r, "Failed to put runtime parameter to manager's storage: %m");
e8a565cb
YW
5017 return 0;
5018 }
613b411c 5019
e8a565cb 5020 rt_create->manager = u->manager;
613b411c 5021
e8a565cb
YW
5022 /* Avoid cleanup */
5023 rt_create = NULL;
5024 }
98b47d54 5025
e8a565cb
YW
5026 return 1;
5027}
613b411c 5028
e8a565cb
YW
5029void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
5030 char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
5031 int r, fd0 = -1, fd1 = -1;
5032 const char *p, *v = value;
5033 size_t n;
613b411c 5034
e8a565cb
YW
5035 assert(m);
5036 assert(value);
5037 assert(fds);
98b47d54 5038
e8a565cb
YW
5039 n = strcspn(v, " ");
5040 id = strndupa(v, n);
5041 if (v[n] != ' ')
5042 goto finalize;
5043 p = v + n + 1;
5044
5045 v = startswith(p, "tmp-dir=");
5046 if (v) {
5047 n = strcspn(v, " ");
5048 tmp_dir = strndupa(v, n);
5049 if (v[n] != ' ')
5050 goto finalize;
5051 p = v + n + 1;
5052 }
5053
5054 v = startswith(p, "var-tmp-dir=");
5055 if (v) {
5056 n = strcspn(v, " ");
5057 var_tmp_dir = strndupa(v, n);
5058 if (v[n] != ' ')
5059 goto finalize;
5060 p = v + n + 1;
5061 }
5062
5063 v = startswith(p, "netns-socket-0=");
5064 if (v) {
5065 char *buf;
5066
5067 n = strcspn(v, " ");
5068 buf = strndupa(v, n);
5069 if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
5070 log_debug("Unable to process exec-runtime netns fd specification.");
5071 return;
98b47d54 5072 }
e8a565cb
YW
5073 fd0 = fdset_remove(fds, fd0);
5074 if (v[n] != ' ')
5075 goto finalize;
5076 p = v + n + 1;
613b411c
LP
5077 }
5078
e8a565cb
YW
5079 v = startswith(p, "netns-socket-1=");
5080 if (v) {
5081 char *buf;
98b47d54 5082
e8a565cb
YW
5083 n = strcspn(v, " ");
5084 buf = strndupa(v, n);
5085 if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
5086 log_debug("Unable to process exec-runtime netns fd specification.");
5087 return;
98b47d54 5088 }
e8a565cb
YW
5089 fd1 = fdset_remove(fds, fd1);
5090 }
98b47d54 5091
e8a565cb
YW
5092finalize:
5093
5094 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
7d853ca6 5095 if (r < 0)
e8a565cb 5096 log_debug_errno(r, "Failed to add exec-runtime: %m");
e8a565cb 5097}
613b411c 5098
e8a565cb
YW
5099void exec_runtime_vacuum(Manager *m) {
5100 ExecRuntime *rt;
5101 Iterator i;
5102
5103 assert(m);
5104
5105 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
5106
5107 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5108 if (rt->n_ref > 0)
5109 continue;
5110
5111 (void) exec_runtime_free(rt, false);
5112 }
613b411c
LP
5113}
5114
b9c04eaf
YW
5115void exec_params_clear(ExecParameters *p) {
5116 if (!p)
5117 return;
5118
5119 strv_free(p->environment);
5120}
5121
80876c20
LP
5122static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
5123 [EXEC_INPUT_NULL] = "null",
5124 [EXEC_INPUT_TTY] = "tty",
5125 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 5126 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
5127 [EXEC_INPUT_SOCKET] = "socket",
5128 [EXEC_INPUT_NAMED_FD] = "fd",
08f3be7a 5129 [EXEC_INPUT_DATA] = "data",
2038c3f5 5130 [EXEC_INPUT_FILE] = "file",
80876c20
LP
5131};
5132
8a0867d6
LP
5133DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
5134
94f04347 5135static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 5136 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 5137 [EXEC_OUTPUT_NULL] = "null",
80876c20 5138 [EXEC_OUTPUT_TTY] = "tty",
94f04347 5139 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 5140 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 5141 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 5142 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
5143 [EXEC_OUTPUT_JOURNAL] = "journal",
5144 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
5145 [EXEC_OUTPUT_SOCKET] = "socket",
5146 [EXEC_OUTPUT_NAMED_FD] = "fd",
2038c3f5 5147 [EXEC_OUTPUT_FILE] = "file",
566b7d23 5148 [EXEC_OUTPUT_FILE_APPEND] = "append",
94f04347
LP
5149};
5150
5151DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
5152
5153static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
5154 [EXEC_UTMP_INIT] = "init",
5155 [EXEC_UTMP_LOGIN] = "login",
5156 [EXEC_UTMP_USER] = "user",
5157};
5158
5159DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
5160
5161static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
5162 [EXEC_PRESERVE_NO] = "no",
5163 [EXEC_PRESERVE_YES] = "yes",
5164 [EXEC_PRESERVE_RESTART] = "restart",
5165};
5166
5167DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 5168
72fd1768 5169static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
5170 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
5171 [EXEC_DIRECTORY_STATE] = "StateDirectory",
5172 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
5173 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
5174 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
5175};
5176
5177DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445 5178
fb2042dd
YW
5179static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5180 [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
5181 [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
5182 [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
5183 [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
5184 [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
5185};
5186
5187DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType);
5188
b1edf445
LP
5189static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
5190 [EXEC_KEYRING_INHERIT] = "inherit",
5191 [EXEC_KEYRING_PRIVATE] = "private",
5192 [EXEC_KEYRING_SHARED] = "shared",
5193};
5194
5195DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);