]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
core: automatically update StandardOuput=syslog to =journal (and similar for Standard...
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
a7334b09 2
034c6ed7
LP
3#include <errno.h>
4#include <fcntl.h>
8dd4c05b 5#include <poll.h>
d251207d 6#include <sys/eventfd.h>
f5947a5e 7#include <sys/ioctl.h>
f3e43635 8#include <sys/mman.h>
8dd4c05b 9#include <sys/personality.h>
94f04347 10#include <sys/prctl.h>
d2ffa389 11#include <sys/shm.h>
d2ffa389 12#include <sys/types.h>
8dd4c05b
LP
13#include <sys/un.h>
14#include <unistd.h>
023a4f67 15#include <utmpx.h>
5cb5a6ff 16
349cc4a5 17#if HAVE_PAM
5b6319dc
LP
18#include <security/pam_appl.h>
19#endif
20
349cc4a5 21#if HAVE_SELINUX
7b52a628
MS
22#include <selinux/selinux.h>
23#endif
24
349cc4a5 25#if HAVE_SECCOMP
17df7223
LP
26#include <seccomp.h>
27#endif
28
349cc4a5 29#if HAVE_APPARMOR
eef65bf3
MS
30#include <sys/apparmor.h>
31#endif
32
24882e06 33#include "sd-messages.h"
8dd4c05b
LP
34
35#include "af-list.h"
b5efdb8a 36#include "alloc-util.h"
349cc4a5 37#if HAVE_APPARMOR
3ffd4af2
LP
38#include "apparmor-util.h"
39#endif
8dd4c05b
LP
40#include "async.h"
41#include "barrier.h"
8dd4c05b 42#include "cap-list.h"
430f0182 43#include "capability-util.h"
a1164ae3 44#include "chown-recursive.h"
fdb3deca 45#include "cgroup-setup.h"
da681e1b 46#include "cpu-set-util.h"
f6a6225e 47#include "def.h"
686d13b9 48#include "env-file.h"
4d1a6904 49#include "env-util.h"
17df7223 50#include "errno-list.h"
3ffd4af2 51#include "execute.h"
8dd4c05b 52#include "exit-status.h"
3ffd4af2 53#include "fd-util.h"
f97b34a6 54#include "format-util.h"
f4f15635 55#include "fs-util.h"
7d50b32a 56#include "glob-util.h"
c004493c 57#include "io-util.h"
8dd4c05b 58#include "ioprio.h"
a1164ae3 59#include "label.h"
8dd4c05b
LP
60#include "log.h"
61#include "macro.h"
e8a565cb 62#include "manager.h"
0a970718 63#include "memory-util.h"
f5947a5e 64#include "missing_fs.h"
8dd4c05b
LP
65#include "mkdir.h"
66#include "namespace.h"
6bedfcbb 67#include "parse-util.h"
8dd4c05b 68#include "path-util.h"
0b452006 69#include "process-util.h"
78f22b97 70#include "rlimit-util.h"
8dd4c05b 71#include "rm-rf.h"
349cc4a5 72#if HAVE_SECCOMP
3ffd4af2
LP
73#include "seccomp-util.h"
74#endif
07d46372 75#include "securebits-util.h"
8dd4c05b 76#include "selinux-util.h"
24882e06 77#include "signal-util.h"
8dd4c05b 78#include "smack-util.h"
57b7a260 79#include "socket-util.h"
fd63e712 80#include "special.h"
949befd3 81#include "stat-util.h"
8b43440b 82#include "string-table.h"
07630cea 83#include "string-util.h"
8dd4c05b 84#include "strv.h"
7ccbd1ae 85#include "syslog-util.h"
8dd4c05b 86#include "terminal-util.h"
566b7d23 87#include "umask-util.h"
8dd4c05b 88#include "unit.h"
b1d4f8e1 89#include "user-util.h"
8dd4c05b 90#include "utmp-wtmp.h"
5cb5a6ff 91
e056b01d 92#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 93#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 94
531dca78
LP
95#define SNDBUF_SIZE (8*1024*1024)
96
da6053d0 97static int shift_fds(int fds[], size_t n_fds) {
034c6ed7
LP
98 int start, restart_from;
99
100 if (n_fds <= 0)
101 return 0;
102
a0d40ac5
LP
103 /* Modifies the fds array! (sorts it) */
104
034c6ed7
LP
105 assert(fds);
106
107 start = 0;
108 for (;;) {
109 int i;
110
111 restart_from = -1;
112
113 for (i = start; i < (int) n_fds; i++) {
114 int nfd;
115
116 /* Already at right index? */
117 if (fds[i] == i+3)
118 continue;
119
3cc2aff1
LP
120 nfd = fcntl(fds[i], F_DUPFD, i + 3);
121 if (nfd < 0)
034c6ed7
LP
122 return -errno;
123
03e334a1 124 safe_close(fds[i]);
034c6ed7
LP
125 fds[i] = nfd;
126
127 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 128 * let's remember that and try again from here */
034c6ed7
LP
129 if (nfd != i+3 && restart_from < 0)
130 restart_from = i;
131 }
132
133 if (restart_from < 0)
134 break;
135
136 start = restart_from;
137 }
138
139 return 0;
140}
141
25b583d7 142static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
da6053d0 143 size_t i, n_fds;
e2c76839 144 int r;
47a71eed 145
25b583d7 146 n_fds = n_socket_fds + n_storage_fds;
47a71eed
LP
147 if (n_fds <= 0)
148 return 0;
149
150 assert(fds);
151
9b141911
FB
152 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
153 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
154
155 for (i = 0; i < n_fds; i++) {
47a71eed 156
9b141911
FB
157 if (i < n_socket_fds) {
158 r = fd_nonblock(fds[i], nonblock);
159 if (r < 0)
160 return r;
161 }
47a71eed 162
451a074f
LP
163 /* We unconditionally drop FD_CLOEXEC from the fds,
164 * since after all we want to pass these fds to our
165 * children */
47a71eed 166
3cc2aff1
LP
167 r = fd_cloexec(fds[i], false);
168 if (r < 0)
e2c76839 169 return r;
47a71eed
LP
170 }
171
172 return 0;
173}
174
1e22b5cd 175static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
176 assert(context);
177
1e22b5cd
LP
178 if (context->stdio_as_fds)
179 return NULL;
180
80876c20
LP
181 if (context->tty_path)
182 return context->tty_path;
183
184 return "/dev/console";
185}
186
1e22b5cd
LP
187static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
188 const char *path;
189
6ea832a2
LP
190 assert(context);
191
1e22b5cd 192 path = exec_context_tty_path(context);
6ea832a2 193
1e22b5cd
LP
194 if (context->tty_vhangup) {
195 if (p && p->stdin_fd >= 0)
196 (void) terminal_vhangup_fd(p->stdin_fd);
197 else if (path)
198 (void) terminal_vhangup(path);
199 }
6ea832a2 200
1e22b5cd
LP
201 if (context->tty_reset) {
202 if (p && p->stdin_fd >= 0)
203 (void) reset_terminal_fd(p->stdin_fd, true);
204 else if (path)
205 (void) reset_terminal(path);
206 }
207
208 if (context->tty_vt_disallocate && path)
209 (void) vt_disallocate(path);
6ea832a2
LP
210}
211
6af760f3
LP
212static bool is_terminal_input(ExecInput i) {
213 return IN_SET(i,
214 EXEC_INPUT_TTY,
215 EXEC_INPUT_TTY_FORCE,
216 EXEC_INPUT_TTY_FAIL);
217}
218
3a1286b6 219static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
220 return IN_SET(o,
221 EXEC_OUTPUT_TTY,
6af760f3
LP
222 EXEC_OUTPUT_KMSG_AND_CONSOLE,
223 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
224}
225
aac8c0c3
LP
226static bool is_kmsg_output(ExecOutput o) {
227 return IN_SET(o,
228 EXEC_OUTPUT_KMSG,
229 EXEC_OUTPUT_KMSG_AND_CONSOLE);
230}
231
6af760f3
LP
232static bool exec_context_needs_term(const ExecContext *c) {
233 assert(c);
234
235 /* Return true if the execution context suggests we should set $TERM to something useful. */
236
237 if (is_terminal_input(c->std_input))
238 return true;
239
240 if (is_terminal_output(c->std_output))
241 return true;
242
243 if (is_terminal_output(c->std_error))
244 return true;
245
246 return !!c->tty_path;
3a1286b6
MS
247}
248
80876c20 249static int open_null_as(int flags, int nfd) {
046a82c1 250 int fd;
071830ff 251
80876c20 252 assert(nfd >= 0);
071830ff 253
613b411c
LP
254 fd = open("/dev/null", flags|O_NOCTTY);
255 if (fd < 0)
071830ff
LP
256 return -errno;
257
046a82c1 258 return move_fd(fd, nfd, false);
071830ff
LP
259}
260
91dd5f7c
LP
261static int connect_journal_socket(
262 int fd,
263 const char *log_namespace,
264 uid_t uid,
265 gid_t gid) {
266
f36a9d59
ZJS
267 union sockaddr_union sa;
268 socklen_t sa_len;
524daa8c
ZJS
269 uid_t olduid = UID_INVALID;
270 gid_t oldgid = GID_INVALID;
91dd5f7c 271 const char *j;
524daa8c
ZJS
272 int r;
273
91dd5f7c
LP
274 j = log_namespace ?
275 strjoina("/run/systemd/journal.", log_namespace, "/stdout") :
276 "/run/systemd/journal/stdout";
277 r = sockaddr_un_set_path(&sa.un, j);
278 if (r < 0)
279 return r;
f36a9d59 280 sa_len = r;
91dd5f7c 281
cad93f29 282 if (gid_is_valid(gid)) {
524daa8c
ZJS
283 oldgid = getgid();
284
92a17af9 285 if (setegid(gid) < 0)
524daa8c
ZJS
286 return -errno;
287 }
288
cad93f29 289 if (uid_is_valid(uid)) {
524daa8c
ZJS
290 olduid = getuid();
291
92a17af9 292 if (seteuid(uid) < 0) {
524daa8c
ZJS
293 r = -errno;
294 goto restore_gid;
295 }
296 }
297
f36a9d59 298 r = connect(fd, &sa.sa, sa_len) < 0 ? -errno : 0;
524daa8c
ZJS
299
300 /* If we fail to restore the uid or gid, things will likely
301 fail later on. This should only happen if an LSM interferes. */
302
cad93f29 303 if (uid_is_valid(uid))
524daa8c
ZJS
304 (void) seteuid(olduid);
305
306 restore_gid:
cad93f29 307 if (gid_is_valid(gid))
524daa8c
ZJS
308 (void) setegid(oldgid);
309
310 return r;
311}
312
fd1f9c89 313static int connect_logger_as(
34cf6c43 314 const Unit *unit,
fd1f9c89 315 const ExecContext *context,
af635cf3 316 const ExecParameters *params,
fd1f9c89
LP
317 ExecOutput output,
318 const char *ident,
fd1f9c89
LP
319 int nfd,
320 uid_t uid,
321 gid_t gid) {
322
2ac1ff68
EV
323 _cleanup_close_ int fd = -1;
324 int r;
071830ff
LP
325
326 assert(context);
af635cf3 327 assert(params);
80876c20
LP
328 assert(output < _EXEC_OUTPUT_MAX);
329 assert(ident);
330 assert(nfd >= 0);
071830ff 331
54fe0cdb
LP
332 fd = socket(AF_UNIX, SOCK_STREAM, 0);
333 if (fd < 0)
80876c20 334 return -errno;
071830ff 335
91dd5f7c 336 r = connect_journal_socket(fd, context->log_namespace, uid, gid);
524daa8c
ZJS
337 if (r < 0)
338 return r;
071830ff 339
2ac1ff68 340 if (shutdown(fd, SHUT_RD) < 0)
80876c20 341 return -errno;
071830ff 342
fd1f9c89 343 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 344
2ac1ff68 345 if (dprintf(fd,
62bca2c6 346 "%s\n"
80876c20
LP
347 "%s\n"
348 "%i\n"
54fe0cdb
LP
349 "%i\n"
350 "%i\n"
351 "%i\n"
4f4a1dbf 352 "%i\n",
c867611e 353 context->syslog_identifier ?: ident,
af635cf3 354 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
355 context->syslog_priority,
356 !!context->syslog_level_prefix,
f3dc6af2 357 false,
aac8c0c3 358 is_kmsg_output(output),
2ac1ff68
EV
359 is_terminal_output(output)) < 0)
360 return -errno;
80876c20 361
2ac1ff68 362 return move_fd(TAKE_FD(fd), nfd, false);
80876c20 363}
2ac1ff68 364
3a274a21 365static int open_terminal_as(const char *path, int flags, int nfd) {
046a82c1 366 int fd;
071830ff 367
80876c20
LP
368 assert(path);
369 assert(nfd >= 0);
fd1f9c89 370
3a274a21 371 fd = open_terminal(path, flags | O_NOCTTY);
3cc2aff1 372 if (fd < 0)
80876c20 373 return fd;
071830ff 374
046a82c1 375 return move_fd(fd, nfd, false);
80876c20 376}
071830ff 377
2038c3f5 378static int acquire_path(const char *path, int flags, mode_t mode) {
86fca584
ZJS
379 union sockaddr_union sa;
380 socklen_t sa_len;
15a3e96f 381 _cleanup_close_ int fd = -1;
86fca584 382 int r;
071830ff 383
80876c20 384 assert(path);
071830ff 385
2038c3f5
LP
386 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
387 flags |= O_CREAT;
388
389 fd = open(path, flags|O_NOCTTY, mode);
390 if (fd >= 0)
15a3e96f 391 return TAKE_FD(fd);
071830ff 392
2038c3f5
LP
393 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
394 return -errno;
2038c3f5
LP
395
396 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
397
86fca584
ZJS
398 r = sockaddr_un_set_path(&sa.un, path);
399 if (r < 0)
400 return r == -EINVAL ? -ENXIO : r;
401 sa_len = r;
402
2038c3f5
LP
403 fd = socket(AF_UNIX, SOCK_STREAM, 0);
404 if (fd < 0)
405 return -errno;
406
86fca584 407 if (connect(fd, &sa.sa, sa_len) < 0)
2038c3f5
LP
408 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
409 * indication that his wasn't an AF_UNIX socket after all */
071830ff 410
2038c3f5
LP
411 if ((flags & O_ACCMODE) == O_RDONLY)
412 r = shutdown(fd, SHUT_WR);
413 else if ((flags & O_ACCMODE) == O_WRONLY)
414 r = shutdown(fd, SHUT_RD);
415 else
86fca584 416 r = 0;
15a3e96f 417 if (r < 0)
2038c3f5 418 return -errno;
2038c3f5 419
15a3e96f 420 return TAKE_FD(fd);
80876c20 421}
071830ff 422
08f3be7a
LP
423static int fixup_input(
424 const ExecContext *context,
425 int socket_fd,
426 bool apply_tty_stdin) {
427
428 ExecInput std_input;
429
430 assert(context);
431
432 std_input = context->std_input;
1e3ad081
LP
433
434 if (is_terminal_input(std_input) && !apply_tty_stdin)
435 return EXEC_INPUT_NULL;
071830ff 436
03fd9c49 437 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
438 return EXEC_INPUT_NULL;
439
08f3be7a
LP
440 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
441 return EXEC_INPUT_NULL;
442
03fd9c49 443 return std_input;
4f2d528d
LP
444}
445
03fd9c49 446static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 447
03fd9c49 448 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
449 return EXEC_OUTPUT_INHERIT;
450
03fd9c49 451 return std_output;
4f2d528d
LP
452}
453
a34ceba6
LP
454static int setup_input(
455 const ExecContext *context,
456 const ExecParameters *params,
52c239d7 457 int socket_fd,
2caa38e9 458 const int named_iofds[static 3]) {
a34ceba6 459
4f2d528d
LP
460 ExecInput i;
461
462 assert(context);
a34ceba6 463 assert(params);
2caa38e9 464 assert(named_iofds);
a34ceba6
LP
465
466 if (params->stdin_fd >= 0) {
467 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
468 return -errno;
469
470 /* Try to make this the controlling tty, if it is a tty, and reset it */
1fb0682e
LP
471 if (isatty(STDIN_FILENO)) {
472 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
473 (void) reset_terminal_fd(STDIN_FILENO, true);
474 }
a34ceba6
LP
475
476 return STDIN_FILENO;
477 }
4f2d528d 478
08f3be7a 479 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
480
481 switch (i) {
071830ff 482
80876c20
LP
483 case EXEC_INPUT_NULL:
484 return open_null_as(O_RDONLY, STDIN_FILENO);
485
486 case EXEC_INPUT_TTY:
487 case EXEC_INPUT_TTY_FORCE:
488 case EXEC_INPUT_TTY_FAIL: {
046a82c1 489 int fd;
071830ff 490
1e22b5cd 491 fd = acquire_terminal(exec_context_tty_path(context),
8854d795
LP
492 i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
493 i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
494 ACQUIRE_TERMINAL_WAIT,
3a43da28 495 USEC_INFINITY);
970edce6 496 if (fd < 0)
80876c20
LP
497 return fd;
498
046a82c1 499 return move_fd(fd, STDIN_FILENO, false);
80876c20
LP
500 }
501
4f2d528d 502 case EXEC_INPUT_SOCKET:
e75a9ed1
LP
503 assert(socket_fd >= 0);
504
4f2d528d
LP
505 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
506
52c239d7 507 case EXEC_INPUT_NAMED_FD:
e75a9ed1
LP
508 assert(named_iofds[STDIN_FILENO] >= 0);
509
52c239d7
LB
510 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
511 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
512
08f3be7a
LP
513 case EXEC_INPUT_DATA: {
514 int fd;
515
516 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
517 if (fd < 0)
518 return fd;
519
520 return move_fd(fd, STDIN_FILENO, false);
521 }
522
2038c3f5
LP
523 case EXEC_INPUT_FILE: {
524 bool rw;
525 int fd;
526
527 assert(context->stdio_file[STDIN_FILENO]);
528
529 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
530 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
531
532 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
533 if (fd < 0)
534 return fd;
535
536 return move_fd(fd, STDIN_FILENO, false);
537 }
538
80876c20
LP
539 default:
540 assert_not_reached("Unknown input type");
541 }
542}
543
41fc585a
LP
544static bool can_inherit_stderr_from_stdout(
545 const ExecContext *context,
546 ExecOutput o,
547 ExecOutput e) {
548
549 assert(context);
550
551 /* Returns true, if given the specified STDERR and STDOUT output we can directly dup() the stdout fd to the
552 * stderr fd */
553
554 if (e == EXEC_OUTPUT_INHERIT)
555 return true;
556 if (e != o)
557 return false;
558
559 if (e == EXEC_OUTPUT_NAMED_FD)
560 return streq_ptr(context->stdio_fdname[STDOUT_FILENO], context->stdio_fdname[STDERR_FILENO]);
561
562 if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND))
563 return streq_ptr(context->stdio_file[STDOUT_FILENO], context->stdio_file[STDERR_FILENO]);
564
565 return true;
566}
567
a34ceba6 568static int setup_output(
34cf6c43 569 const Unit *unit,
a34ceba6
LP
570 const ExecContext *context,
571 const ExecParameters *params,
572 int fileno,
573 int socket_fd,
2caa38e9 574 const int named_iofds[static 3],
a34ceba6 575 const char *ident,
7bce046b
LP
576 uid_t uid,
577 gid_t gid,
578 dev_t *journal_stream_dev,
579 ino_t *journal_stream_ino) {
a34ceba6 580
4f2d528d
LP
581 ExecOutput o;
582 ExecInput i;
47c1d80d 583 int r;
4f2d528d 584
f2341e0a 585 assert(unit);
80876c20 586 assert(context);
a34ceba6 587 assert(params);
80876c20 588 assert(ident);
7bce046b
LP
589 assert(journal_stream_dev);
590 assert(journal_stream_ino);
80876c20 591
a34ceba6
LP
592 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
593
594 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
595 return -errno;
596
597 return STDOUT_FILENO;
598 }
599
600 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
601 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
602 return -errno;
603
604 return STDERR_FILENO;
605 }
606
08f3be7a 607 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 608 o = fixup_output(context->std_output, socket_fd);
4f2d528d 609
eb17e935
MS
610 if (fileno == STDERR_FILENO) {
611 ExecOutput e;
612 e = fixup_output(context->std_error, socket_fd);
80876c20 613
eb17e935
MS
614 /* This expects the input and output are already set up */
615
616 /* Don't change the stderr file descriptor if we inherit all
617 * the way and are not on a tty */
618 if (e == EXEC_OUTPUT_INHERIT &&
619 o == EXEC_OUTPUT_INHERIT &&
620 i == EXEC_INPUT_NULL &&
621 !is_terminal_input(context->std_input) &&
622 getppid () != 1)
623 return fileno;
624
625 /* Duplicate from stdout if possible */
41fc585a 626 if (can_inherit_stderr_from_stdout(context, o, e))
eb17e935 627 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 628
eb17e935 629 o = e;
80876c20 630
eb17e935 631 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
632 /* If input got downgraded, inherit the original value */
633 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 634 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 635
08f3be7a
LP
636 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
637 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
eb17e935 638 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 639
acb591e4
LP
640 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
641 if (getppid() != 1)
eb17e935 642 return fileno;
94f04347 643
eb17e935
MS
644 /* We need to open /dev/null here anew, to get the right access mode. */
645 return open_null_as(O_WRONLY, fileno);
071830ff 646 }
94f04347 647
eb17e935 648 switch (o) {
80876c20
LP
649
650 case EXEC_OUTPUT_NULL:
eb17e935 651 return open_null_as(O_WRONLY, fileno);
80876c20
LP
652
653 case EXEC_OUTPUT_TTY:
4f2d528d 654 if (is_terminal_input(i))
eb17e935 655 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
656
657 /* We don't reset the terminal if this is just about output */
1e22b5cd 658 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20 659
9a6bca7a 660 case EXEC_OUTPUT_KMSG:
28dbc1e8 661 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
662 case EXEC_OUTPUT_JOURNAL:
663 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 664 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 665 if (r < 0) {
82677ae4 666 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 667 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
668 } else {
669 struct stat st;
670
671 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
672 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
673 * services to detect whether they are connected to the journal or not.
674 *
675 * If both stdout and stderr are connected to a stream then let's make sure to store the data
676 * about STDERR as that's usually the best way to do logging. */
7bce046b 677
ab2116b1
LP
678 if (fstat(fileno, &st) >= 0 &&
679 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
680 *journal_stream_dev = st.st_dev;
681 *journal_stream_ino = st.st_ino;
682 }
47c1d80d
MS
683 }
684 return r;
4f2d528d
LP
685
686 case EXEC_OUTPUT_SOCKET:
687 assert(socket_fd >= 0);
e75a9ed1 688
eb17e935 689 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 690
52c239d7 691 case EXEC_OUTPUT_NAMED_FD:
e75a9ed1
LP
692 assert(named_iofds[fileno] >= 0);
693
52c239d7
LB
694 (void) fd_nonblock(named_iofds[fileno], false);
695 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
696
566b7d23
ZD
697 case EXEC_OUTPUT_FILE:
698 case EXEC_OUTPUT_FILE_APPEND: {
2038c3f5 699 bool rw;
566b7d23 700 int fd, flags;
2038c3f5
LP
701
702 assert(context->stdio_file[fileno]);
703
704 rw = context->std_input == EXEC_INPUT_FILE &&
705 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
706
707 if (rw)
708 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
709
566b7d23
ZD
710 flags = O_WRONLY;
711 if (o == EXEC_OUTPUT_FILE_APPEND)
712 flags |= O_APPEND;
713
714 fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
2038c3f5
LP
715 if (fd < 0)
716 return fd;
717
566b7d23 718 return move_fd(fd, fileno, 0);
2038c3f5
LP
719 }
720
94f04347 721 default:
80876c20 722 assert_not_reached("Unknown error type");
94f04347 723 }
071830ff
LP
724}
725
02a51aba 726static int chown_terminal(int fd, uid_t uid) {
4b3b5bc7 727 int r;
02a51aba
LP
728
729 assert(fd >= 0);
02a51aba 730
1ff74fb6 731 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
4b3b5bc7
LP
732 if (isatty(fd) < 1) {
733 if (IN_SET(errno, EINVAL, ENOTTY))
734 return 0; /* not a tty */
1ff74fb6 735
02a51aba 736 return -errno;
4b3b5bc7 737 }
02a51aba 738
4b3b5bc7
LP
739 /* This might fail. What matters are the results. */
740 r = fchmod_and_chown(fd, TTY_MODE, uid, -1);
741 if (r < 0)
742 return r;
02a51aba 743
4b3b5bc7 744 return 1;
02a51aba
LP
745}
746
7d5ceb64 747static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
748 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
749 int r;
80876c20 750
80876c20
LP
751 assert(_saved_stdin);
752 assert(_saved_stdout);
753
af6da548
LP
754 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
755 if (saved_stdin < 0)
756 return -errno;
80876c20 757
af6da548 758 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
759 if (saved_stdout < 0)
760 return -errno;
80876c20 761
8854d795 762 fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
3d18b167
LP
763 if (fd < 0)
764 return fd;
80876c20 765
af6da548
LP
766 r = chown_terminal(fd, getuid());
767 if (r < 0)
3d18b167 768 return r;
02a51aba 769
3d18b167
LP
770 r = reset_terminal_fd(fd, true);
771 if (r < 0)
772 return r;
80876c20 773
2b33ab09 774 r = rearrange_stdio(fd, fd, STDERR_FILENO);
3d18b167 775 fd = -1;
2b33ab09
LP
776 if (r < 0)
777 return r;
80876c20
LP
778
779 *_saved_stdin = saved_stdin;
780 *_saved_stdout = saved_stdout;
781
3d18b167 782 saved_stdin = saved_stdout = -1;
80876c20 783
3d18b167 784 return 0;
80876c20
LP
785}
786
63d77c92 787static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
788 assert(err < 0);
789
790 if (err == -ETIMEDOUT)
63d77c92 791 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
792 else {
793 errno = -err;
63d77c92 794 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
795 }
796}
797
63d77c92 798static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 799 _cleanup_close_ int fd = -1;
80876c20 800
3b20f877 801 assert(vc);
80876c20 802
7d5ceb64 803 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 804 if (fd < 0)
3b20f877 805 return;
80876c20 806
63d77c92 807 write_confirm_error_fd(err, fd, u);
af6da548 808}
80876c20 809
3d18b167 810static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 811 int r = 0;
80876c20 812
af6da548
LP
813 assert(saved_stdin);
814 assert(saved_stdout);
815
816 release_terminal();
817
818 if (*saved_stdin >= 0)
80876c20 819 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 820 r = -errno;
80876c20 821
af6da548 822 if (*saved_stdout >= 0)
80876c20 823 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 824 r = -errno;
80876c20 825
3d18b167
LP
826 *saved_stdin = safe_close(*saved_stdin);
827 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
828
829 return r;
830}
831
3b20f877
FB
832enum {
833 CONFIRM_PRETEND_FAILURE = -1,
834 CONFIRM_PRETEND_SUCCESS = 0,
835 CONFIRM_EXECUTE = 1,
836};
837
eedf223a 838static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 839 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 840 _cleanup_free_ char *e = NULL;
3b20f877 841 char c;
af6da548 842
3b20f877 843 /* For any internal errors, assume a positive response. */
7d5ceb64 844 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 845 if (r < 0) {
63d77c92 846 write_confirm_error(r, vc, u);
3b20f877
FB
847 return CONFIRM_EXECUTE;
848 }
af6da548 849
b0eb2944
FB
850 /* confirm_spawn might have been disabled while we were sleeping. */
851 if (manager_is_confirm_spawn_disabled(u->manager)) {
852 r = 1;
853 goto restore_stdio;
854 }
af6da548 855
2bcd3c26
FB
856 e = ellipsize(cmdline, 60, 100);
857 if (!e) {
858 log_oom();
859 r = CONFIRM_EXECUTE;
860 goto restore_stdio;
861 }
af6da548 862
d172b175 863 for (;;) {
539622bd 864 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 865 if (r < 0) {
63d77c92 866 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
867 r = CONFIRM_EXECUTE;
868 goto restore_stdio;
869 }
af6da548 870
d172b175 871 switch (c) {
b0eb2944
FB
872 case 'c':
873 printf("Resuming normal execution.\n");
874 manager_disable_confirm_spawn();
875 r = 1;
876 break;
dd6f9ac0
FB
877 case 'D':
878 unit_dump(u, stdout, " ");
879 continue; /* ask again */
d172b175
FB
880 case 'f':
881 printf("Failing execution.\n");
882 r = CONFIRM_PRETEND_FAILURE;
883 break;
884 case 'h':
b0eb2944
FB
885 printf(" c - continue, proceed without asking anymore\n"
886 " D - dump, show the state of the unit\n"
dd6f9ac0 887 " f - fail, don't execute the command and pretend it failed\n"
d172b175 888 " h - help\n"
eedf223a 889 " i - info, show a short summary of the unit\n"
56fde33a 890 " j - jobs, show jobs that are in progress\n"
d172b175
FB
891 " s - skip, don't execute the command and pretend it succeeded\n"
892 " y - yes, execute the command\n");
dd6f9ac0 893 continue; /* ask again */
eedf223a
FB
894 case 'i':
895 printf(" Description: %s\n"
896 " Unit: %s\n"
897 " Command: %s\n",
898 u->id, u->description, cmdline);
899 continue; /* ask again */
56fde33a
FB
900 case 'j':
901 manager_dump_jobs(u->manager, stdout, " ");
902 continue; /* ask again */
539622bd
FB
903 case 'n':
904 /* 'n' was removed in favor of 'f'. */
905 printf("Didn't understand 'n', did you mean 'f'?\n");
906 continue; /* ask again */
d172b175
FB
907 case 's':
908 printf("Skipping execution.\n");
909 r = CONFIRM_PRETEND_SUCCESS;
910 break;
911 case 'y':
912 r = CONFIRM_EXECUTE;
913 break;
914 default:
915 assert_not_reached("Unhandled choice");
916 }
3b20f877 917 break;
3b20f877 918 }
af6da548 919
3b20f877 920restore_stdio:
af6da548 921 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 922 return r;
80876c20
LP
923}
924
4d885bd3
DH
925static int get_fixed_user(const ExecContext *c, const char **user,
926 uid_t *uid, gid_t *gid,
927 const char **home, const char **shell) {
81a2b7ce 928 int r;
4d885bd3 929 const char *name;
81a2b7ce 930
4d885bd3 931 assert(c);
81a2b7ce 932
23deef88
LP
933 if (!c->user)
934 return 0;
935
4d885bd3
DH
936 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
937 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 938
23deef88 939 name = c->user;
fafff8f1 940 r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN);
4d885bd3
DH
941 if (r < 0)
942 return r;
81a2b7ce 943
4d885bd3
DH
944 *user = name;
945 return 0;
946}
947
948static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
949 int r;
950 const char *name;
951
952 assert(c);
953
954 if (!c->group)
955 return 0;
956
957 name = c->group;
fafff8f1 958 r = get_group_creds(&name, gid, 0);
4d885bd3
DH
959 if (r < 0)
960 return r;
961
962 *group = name;
963 return 0;
964}
965
cdc5d5c5
DH
966static int get_supplementary_groups(const ExecContext *c, const char *user,
967 const char *group, gid_t gid,
968 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
969 char **i;
970 int r, k = 0;
971 int ngroups_max;
972 bool keep_groups = false;
973 gid_t *groups = NULL;
974 _cleanup_free_ gid_t *l_gids = NULL;
975
976 assert(c);
977
bbeea271
DH
978 /*
979 * If user is given, then lookup GID and supplementary groups list.
980 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
981 * here and as early as possible so we keep the list of supplementary
982 * groups of the caller.
bbeea271
DH
983 */
984 if (user && gid_is_valid(gid) && gid != 0) {
985 /* First step, initialize groups from /etc/groups */
986 if (initgroups(user, gid) < 0)
987 return -errno;
988
989 keep_groups = true;
990 }
991
ac6e8be6 992 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
993 return 0;
994
366ddd25
DH
995 /*
996 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
997 * be positive, otherwise fail.
998 */
999 errno = 0;
1000 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
66855de7
LP
1001 if (ngroups_max <= 0)
1002 return errno_or_else(EOPNOTSUPP);
366ddd25 1003
4d885bd3
DH
1004 l_gids = new(gid_t, ngroups_max);
1005 if (!l_gids)
1006 return -ENOMEM;
81a2b7ce 1007
4d885bd3
DH
1008 if (keep_groups) {
1009 /*
1010 * Lookup the list of groups that the user belongs to, we
1011 * avoid NSS lookups here too for gid=0.
1012 */
1013 k = ngroups_max;
1014 if (getgrouplist(user, gid, l_gids, &k) < 0)
1015 return -EINVAL;
1016 } else
1017 k = 0;
81a2b7ce 1018
4d885bd3
DH
1019 STRV_FOREACH(i, c->supplementary_groups) {
1020 const char *g;
81a2b7ce 1021
4d885bd3
DH
1022 if (k >= ngroups_max)
1023 return -E2BIG;
81a2b7ce 1024
4d885bd3 1025 g = *i;
fafff8f1 1026 r = get_group_creds(&g, l_gids+k, 0);
4d885bd3
DH
1027 if (r < 0)
1028 return r;
81a2b7ce 1029
4d885bd3
DH
1030 k++;
1031 }
81a2b7ce 1032
4d885bd3
DH
1033 /*
1034 * Sets ngids to zero to drop all supplementary groups, happens
1035 * when we are under root and SupplementaryGroups= is empty.
1036 */
1037 if (k == 0) {
1038 *ngids = 0;
1039 return 0;
1040 }
81a2b7ce 1041
4d885bd3
DH
1042 /* Otherwise get the final list of supplementary groups */
1043 groups = memdup(l_gids, sizeof(gid_t) * k);
1044 if (!groups)
1045 return -ENOMEM;
1046
1047 *supplementary_gids = groups;
1048 *ngids = k;
1049
1050 groups = NULL;
1051
1052 return 0;
1053}
1054
34cf6c43 1055static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
4d885bd3
DH
1056 int r;
1057
709dbeac
YW
1058 /* Handle SupplementaryGroups= if it is not empty */
1059 if (ngids > 0) {
4d885bd3
DH
1060 r = maybe_setgroups(ngids, supplementary_gids);
1061 if (r < 0)
97f0e76f 1062 return r;
4d885bd3 1063 }
81a2b7ce 1064
4d885bd3
DH
1065 if (gid_is_valid(gid)) {
1066 /* Then set our gids */
1067 if (setresgid(gid, gid, gid) < 0)
1068 return -errno;
81a2b7ce
LP
1069 }
1070
1071 return 0;
1072}
1073
1074static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
1075 assert(context);
1076
4d885bd3
DH
1077 if (!uid_is_valid(uid))
1078 return 0;
1079
479050b3 1080 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
1081 * capabilities while doing so. */
1082
479050b3 1083 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
1084
1085 /* First step: If we need to keep capabilities but
1086 * drop privileges we need to make sure we keep our
cbb21cca 1087 * caps, while we drop privileges. */
693ced48 1088 if (uid != 0) {
cbb21cca 1089 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1090
1091 if (prctl(PR_GET_SECUREBITS) != sb)
1092 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1093 return -errno;
1094 }
81a2b7ce
LP
1095 }
1096
479050b3 1097 /* Second step: actually set the uids */
81a2b7ce
LP
1098 if (setresuid(uid, uid, uid) < 0)
1099 return -errno;
1100
1101 /* At this point we should have all necessary capabilities but
1102 are otherwise a normal user. However, the caps might got
1103 corrupted due to the setresuid() so we need clean them up
1104 later. This is done outside of this call. */
1105
1106 return 0;
1107}
1108
349cc4a5 1109#if HAVE_PAM
5b6319dc
LP
1110
1111static int null_conv(
1112 int num_msg,
1113 const struct pam_message **msg,
1114 struct pam_response **resp,
1115 void *appdata_ptr) {
1116
1117 /* We don't support conversations */
1118
1119 return PAM_CONV_ERR;
1120}
1121
cefc33ae
LP
1122#endif
1123
5b6319dc
LP
1124static int setup_pam(
1125 const char *name,
1126 const char *user,
940c5210 1127 uid_t uid,
2d6fce8d 1128 gid_t gid,
5b6319dc 1129 const char *tty,
2065ca69 1130 char ***env,
5b8d1f6b 1131 const int fds[], size_t n_fds) {
5b6319dc 1132
349cc4a5 1133#if HAVE_PAM
cefc33ae 1134
5b6319dc
LP
1135 static const struct pam_conv conv = {
1136 .conv = null_conv,
1137 .appdata_ptr = NULL
1138 };
1139
2d7c6aa2 1140 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1141 pam_handle_t *handle = NULL;
d6e5f3ad 1142 sigset_t old_ss;
7bb70b6e 1143 int pam_code = PAM_SUCCESS, r;
84eada2f 1144 char **nv, **e = NULL;
5b6319dc
LP
1145 bool close_session = false;
1146 pid_t pam_pid = 0, parent_pid;
970edce6 1147 int flags = 0;
5b6319dc
LP
1148
1149 assert(name);
1150 assert(user);
2065ca69 1151 assert(env);
5b6319dc
LP
1152
1153 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1154 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1155 * systemd via the cgroup logic. It will then remove the PAM
1156 * session again. The parent process will exec() the actual
1157 * daemon. We do things this way to ensure that the main PID
1158 * of the daemon is the one we initially fork()ed. */
1159
7bb70b6e
LP
1160 r = barrier_create(&barrier);
1161 if (r < 0)
2d7c6aa2
DH
1162 goto fail;
1163
553d2243 1164 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1165 flags |= PAM_SILENT;
1166
f546241b
ZJS
1167 pam_code = pam_start(name, user, &conv, &handle);
1168 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1169 handle = NULL;
1170 goto fail;
1171 }
1172
3cd24c1a
LP
1173 if (!tty) {
1174 _cleanup_free_ char *q = NULL;
1175
1176 /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
1177 * out if that's the case, and read the TTY off it. */
1178
1179 if (getttyname_malloc(STDIN_FILENO, &q) >= 0)
1180 tty = strjoina("/dev/", q);
1181 }
1182
f546241b
ZJS
1183 if (tty) {
1184 pam_code = pam_set_item(handle, PAM_TTY, tty);
1185 if (pam_code != PAM_SUCCESS)
5b6319dc 1186 goto fail;
f546241b 1187 }
5b6319dc 1188
84eada2f
JW
1189 STRV_FOREACH(nv, *env) {
1190 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1191 if (pam_code != PAM_SUCCESS)
1192 goto fail;
1193 }
1194
970edce6 1195 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1196 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1197 goto fail;
1198
3bb39ea9
DG
1199 pam_code = pam_setcred(handle, PAM_ESTABLISH_CRED | flags);
1200 if (pam_code != PAM_SUCCESS)
46d7c6af 1201 log_debug("pam_setcred() failed, ignoring: %s", pam_strerror(handle, pam_code));
3bb39ea9 1202
970edce6 1203 pam_code = pam_open_session(handle, flags);
f546241b 1204 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1205 goto fail;
1206
1207 close_session = true;
1208
f546241b
ZJS
1209 e = pam_getenvlist(handle);
1210 if (!e) {
5b6319dc
LP
1211 pam_code = PAM_BUF_ERR;
1212 goto fail;
1213 }
1214
1215 /* Block SIGTERM, so that we know that it won't get lost in
1216 * the child */
ce30c8dc 1217
72c0a2c2 1218 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1219
df0ff127 1220 parent_pid = getpid_cached();
5b6319dc 1221
4c253ed1
LP
1222 r = safe_fork("(sd-pam)", 0, &pam_pid);
1223 if (r < 0)
5b6319dc 1224 goto fail;
4c253ed1 1225 if (r == 0) {
7bb70b6e 1226 int sig, ret = EXIT_PAM;
5b6319dc
LP
1227
1228 /* The child's job is to reset the PAM session on
1229 * termination */
2d7c6aa2 1230 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc 1231
4c253ed1
LP
1232 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1233 * are open here that have been opened by PAM. */
1234 (void) close_many(fds, n_fds);
5b6319dc 1235
940c5210
AK
1236 /* Drop privileges - we don't need any to pam_close_session
1237 * and this will make PR_SET_PDEATHSIG work in most cases.
1238 * If this fails, ignore the error - but expect sd-pam threads
1239 * to fail to exit normally */
2d6fce8d 1240
97f0e76f
LP
1241 r = maybe_setgroups(0, NULL);
1242 if (r < 0)
1243 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1244 if (setresgid(gid, gid, gid) < 0)
1245 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1246 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1247 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1248
ce30c8dc
LP
1249 (void) ignore_signals(SIGPIPE, -1);
1250
940c5210
AK
1251 /* Wait until our parent died. This will only work if
1252 * the above setresuid() succeeds, otherwise the kernel
1253 * will not allow unprivileged parents kill their privileged
1254 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1255 * to do the rest for us. */
1256 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1257 goto child_finish;
1258
2d7c6aa2
DH
1259 /* Tell the parent that our setup is done. This is especially
1260 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1261 * setup might race against our setresuid(2) call.
1262 *
1263 * If the parent aborted, we'll detect this below, hence ignore
1264 * return failure here. */
1265 (void) barrier_place(&barrier);
2d7c6aa2 1266
643f4706 1267 /* Check if our parent process might already have died? */
5b6319dc 1268 if (getppid() == parent_pid) {
d6e5f3ad
DM
1269 sigset_t ss;
1270
1271 assert_se(sigemptyset(&ss) >= 0);
1272 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1273
3dead8d9
LP
1274 for (;;) {
1275 if (sigwait(&ss, &sig) < 0) {
1276 if (errno == EINTR)
1277 continue;
1278
1279 goto child_finish;
1280 }
5b6319dc 1281
3dead8d9
LP
1282 assert(sig == SIGTERM);
1283 break;
1284 }
5b6319dc
LP
1285 }
1286
3bb39ea9
DG
1287 pam_code = pam_setcred(handle, PAM_DELETE_CRED | flags);
1288 if (pam_code != PAM_SUCCESS)
1289 goto child_finish;
1290
3dead8d9 1291 /* If our parent died we'll end the session */
f546241b 1292 if (getppid() != parent_pid) {
970edce6 1293 pam_code = pam_close_session(handle, flags);
f546241b 1294 if (pam_code != PAM_SUCCESS)
5b6319dc 1295 goto child_finish;
f546241b 1296 }
5b6319dc 1297
7bb70b6e 1298 ret = 0;
5b6319dc
LP
1299
1300 child_finish:
970edce6 1301 pam_end(handle, pam_code | flags);
7bb70b6e 1302 _exit(ret);
5b6319dc
LP
1303 }
1304
2d7c6aa2
DH
1305 barrier_set_role(&barrier, BARRIER_PARENT);
1306
5b6319dc
LP
1307 /* If the child was forked off successfully it will do all the
1308 * cleanups, so forget about the handle here. */
1309 handle = NULL;
1310
3b8bddde 1311 /* Unblock SIGTERM again in the parent */
72c0a2c2 1312 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1313
1314 /* We close the log explicitly here, since the PAM modules
1315 * might have opened it, but we don't want this fd around. */
1316 closelog();
1317
2d7c6aa2
DH
1318 /* Synchronously wait for the child to initialize. We don't care for
1319 * errors as we cannot recover. However, warn loudly if it happens. */
1320 if (!barrier_place_and_sync(&barrier))
1321 log_error("PAM initialization failed");
1322
130d3d22 1323 return strv_free_and_replace(*env, e);
5b6319dc
LP
1324
1325fail:
970edce6
ZJS
1326 if (pam_code != PAM_SUCCESS) {
1327 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1328 r = -EPERM; /* PAM errors do not map to errno */
1329 } else
1330 log_error_errno(r, "PAM failed: %m");
9ba35398 1331
5b6319dc
LP
1332 if (handle) {
1333 if (close_session)
970edce6 1334 pam_code = pam_close_session(handle, flags);
5b6319dc 1335
970edce6 1336 pam_end(handle, pam_code | flags);
5b6319dc
LP
1337 }
1338
1339 strv_free(e);
5b6319dc
LP
1340 closelog();
1341
7bb70b6e 1342 return r;
cefc33ae
LP
1343#else
1344 return 0;
5b6319dc 1345#endif
cefc33ae 1346}
5b6319dc 1347
5d6b1584
LP
1348static void rename_process_from_path(const char *path) {
1349 char process_name[11];
1350 const char *p;
1351 size_t l;
1352
1353 /* This resulting string must fit in 10 chars (i.e. the length
1354 * of "/sbin/init") to look pretty in /bin/ps */
1355
2b6bf07d 1356 p = basename(path);
5d6b1584
LP
1357 if (isempty(p)) {
1358 rename_process("(...)");
1359 return;
1360 }
1361
1362 l = strlen(p);
1363 if (l > 8) {
1364 /* The end of the process name is usually more
1365 * interesting, since the first bit might just be
1366 * "systemd-" */
1367 p = p + l - 8;
1368 l = 8;
1369 }
1370
1371 process_name[0] = '(';
1372 memcpy(process_name+1, p, l);
1373 process_name[1+l] = ')';
1374 process_name[1+l+1] = 0;
1375
1376 rename_process(process_name);
1377}
1378
469830d1
LP
1379static bool context_has_address_families(const ExecContext *c) {
1380 assert(c);
1381
1382 return c->address_families_whitelist ||
1383 !set_isempty(c->address_families);
1384}
1385
1386static bool context_has_syscall_filters(const ExecContext *c) {
1387 assert(c);
1388
1389 return c->syscall_whitelist ||
8cfa775f 1390 !hashmap_isempty(c->syscall_filter);
469830d1
LP
1391}
1392
1393static bool context_has_no_new_privileges(const ExecContext *c) {
1394 assert(c);
1395
1396 if (c->no_new_privileges)
1397 return true;
1398
1399 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1400 return false;
1401
1402 /* We need NNP if we have any form of seccomp and are unprivileged */
1403 return context_has_address_families(c) ||
1404 c->memory_deny_write_execute ||
1405 c->restrict_realtime ||
f69567cb 1406 c->restrict_suid_sgid ||
469830d1 1407 exec_context_restrict_namespaces_set(c) ||
fc64760d 1408 c->protect_clock ||
469830d1
LP
1409 c->protect_kernel_tunables ||
1410 c->protect_kernel_modules ||
84703040 1411 c->protect_kernel_logs ||
469830d1
LP
1412 c->private_devices ||
1413 context_has_syscall_filters(c) ||
78e864e5 1414 !set_isempty(c->syscall_archs) ||
aecd5ac6
TM
1415 c->lock_personality ||
1416 c->protect_hostname;
469830d1
LP
1417}
1418
349cc4a5 1419#if HAVE_SECCOMP
17df7223 1420
83f12b27 1421static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1422
1423 if (is_seccomp_available())
1424 return false;
1425
f673b62d 1426 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1427 return true;
83f12b27
FS
1428}
1429
165a31c0 1430static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1431 uint32_t negative_action, default_action, action;
165a31c0 1432 int r;
8351ceae 1433
469830d1 1434 assert(u);
c0467cf3 1435 assert(c);
8351ceae 1436
469830d1 1437 if (!context_has_syscall_filters(c))
83f12b27
FS
1438 return 0;
1439
469830d1
LP
1440 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1441 return 0;
e9642be2 1442
ccc16c78 1443 negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1444
469830d1
LP
1445 if (c->syscall_whitelist) {
1446 default_action = negative_action;
1447 action = SCMP_ACT_ALLOW;
7c66bae2 1448 } else {
469830d1
LP
1449 default_action = SCMP_ACT_ALLOW;
1450 action = negative_action;
57183d11 1451 }
8351ceae 1452
165a31c0
LP
1453 if (needs_ambient_hack) {
1454 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1455 if (r < 0)
1456 return r;
1457 }
1458
b54f36c6 1459 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
4298d0b5
LP
1460}
1461
469830d1
LP
1462static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1463 assert(u);
4298d0b5
LP
1464 assert(c);
1465
469830d1 1466 if (set_isempty(c->syscall_archs))
83f12b27
FS
1467 return 0;
1468
469830d1
LP
1469 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1470 return 0;
4298d0b5 1471
469830d1
LP
1472 return seccomp_restrict_archs(c->syscall_archs);
1473}
4298d0b5 1474
469830d1
LP
1475static int apply_address_families(const Unit* u, const ExecContext *c) {
1476 assert(u);
1477 assert(c);
4298d0b5 1478
469830d1
LP
1479 if (!context_has_address_families(c))
1480 return 0;
4298d0b5 1481
469830d1
LP
1482 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1483 return 0;
4298d0b5 1484
469830d1 1485 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1486}
4298d0b5 1487
83f12b27 1488static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1489 assert(u);
f3e43635
TM
1490 assert(c);
1491
469830d1 1492 if (!c->memory_deny_write_execute)
83f12b27
FS
1493 return 0;
1494
469830d1
LP
1495 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1496 return 0;
f3e43635 1497
469830d1 1498 return seccomp_memory_deny_write_execute();
f3e43635
TM
1499}
1500
83f12b27 1501static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1502 assert(u);
f4170c67
LP
1503 assert(c);
1504
469830d1 1505 if (!c->restrict_realtime)
83f12b27
FS
1506 return 0;
1507
469830d1
LP
1508 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1509 return 0;
f4170c67 1510
469830d1 1511 return seccomp_restrict_realtime();
f4170c67
LP
1512}
1513
f69567cb
LP
1514static int apply_restrict_suid_sgid(const Unit* u, const ExecContext *c) {
1515 assert(u);
1516 assert(c);
1517
1518 if (!c->restrict_suid_sgid)
1519 return 0;
1520
1521 if (skip_seccomp_unavailable(u, "RestrictSUIDSGID="))
1522 return 0;
1523
1524 return seccomp_restrict_suid_sgid();
1525}
1526
59e856c7 1527static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1528 assert(u);
59eeb84b
LP
1529 assert(c);
1530
1531 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1532 * let's protect even those systems where this is left on in the kernel. */
1533
469830d1 1534 if (!c->protect_kernel_tunables)
59eeb84b
LP
1535 return 0;
1536
469830d1
LP
1537 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1538 return 0;
59eeb84b 1539
469830d1 1540 return seccomp_protect_sysctl();
59eeb84b
LP
1541}
1542
59e856c7 1543static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1544 assert(u);
502d704e
DH
1545 assert(c);
1546
25a8d8a0 1547 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1548
469830d1
LP
1549 if (!c->protect_kernel_modules)
1550 return 0;
1551
502d704e
DH
1552 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1553 return 0;
1554
b54f36c6 1555 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false);
502d704e
DH
1556}
1557
84703040
KK
1558static int apply_protect_kernel_logs(const Unit *u, const ExecContext *c) {
1559 assert(u);
1560 assert(c);
1561
1562 if (!c->protect_kernel_logs)
1563 return 0;
1564
1565 if (skip_seccomp_unavailable(u, "ProtectKernelLogs="))
1566 return 0;
1567
1568 return seccomp_protect_syslog();
1569}
1570
daf8f72b 1571static int apply_protect_clock(const Unit *u, const ExecContext *c) {
fc64760d
KK
1572 assert(u);
1573 assert(c);
1574
1575 if (!c->protect_clock)
1576 return 0;
1577
1578 if (skip_seccomp_unavailable(u, "ProtectClock="))
1579 return 0;
1580
1581 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_CLOCK, SCMP_ACT_ERRNO(EPERM), false);
1582}
1583
59e856c7 1584static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1585 assert(u);
ba128bb8
LP
1586 assert(c);
1587
8f81a5f6 1588 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1589
469830d1
LP
1590 if (!c->private_devices)
1591 return 0;
1592
ba128bb8
LP
1593 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1594 return 0;
1595
b54f36c6 1596 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false);
ba128bb8
LP
1597}
1598
34cf6c43 1599static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
469830d1 1600 assert(u);
add00535
LP
1601 assert(c);
1602
1603 if (!exec_context_restrict_namespaces_set(c))
1604 return 0;
1605
1606 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1607 return 0;
1608
1609 return seccomp_restrict_namespaces(c->restrict_namespaces);
1610}
1611
78e864e5 1612static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1613 unsigned long personality;
1614 int r;
78e864e5
TM
1615
1616 assert(u);
1617 assert(c);
1618
1619 if (!c->lock_personality)
1620 return 0;
1621
1622 if (skip_seccomp_unavailable(u, "LockPersonality="))
1623 return 0;
1624
e8132d63
LP
1625 personality = c->personality;
1626
1627 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1628 if (personality == PERSONALITY_INVALID) {
1629
1630 r = opinionated_personality(&personality);
1631 if (r < 0)
1632 return r;
1633 }
78e864e5
TM
1634
1635 return seccomp_lock_personality(personality);
1636}
1637
c0467cf3 1638#endif
8351ceae 1639
daf8f72b 1640static int apply_protect_hostname(const Unit *u, const ExecContext *c, int *ret_exit_status) {
daf8f72b
LP
1641 assert(u);
1642 assert(c);
1643
1644 if (!c->protect_hostname)
1645 return 0;
1646
1647 if (ns_type_supported(NAMESPACE_UTS)) {
1648 if (unshare(CLONE_NEWUTS) < 0) {
1649 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) {
1650 *ret_exit_status = EXIT_NAMESPACE;
1651 return log_unit_error_errno(u, errno, "Failed to set up UTS namespacing: %m");
1652 }
1653
1654 log_unit_warning(u, "ProtectHostname=yes is configured, but UTS namespace setup is prohibited (container manager?), ignoring namespace setup.");
1655 }
1656 } else
1657 log_unit_warning(u, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
1658
1659#if HAVE_SECCOMP
8f3e342f
ZJS
1660 int r;
1661
daf8f72b
LP
1662 if (skip_seccomp_unavailable(u, "ProtectHostname="))
1663 return 0;
1664
1665 r = seccomp_protect_hostname();
1666 if (r < 0) {
1667 *ret_exit_status = EXIT_SECCOMP;
1668 return log_unit_error_errno(u, r, "Failed to apply hostname restrictions: %m");
1669 }
1670#endif
1671
1672 return 0;
1673}
1674
3042bbeb 1675static void do_idle_pipe_dance(int idle_pipe[static 4]) {
31a7eb86
ZJS
1676 assert(idle_pipe);
1677
54eb2300
LP
1678 idle_pipe[1] = safe_close(idle_pipe[1]);
1679 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1680
1681 if (idle_pipe[0] >= 0) {
1682 int r;
1683
1684 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1685
1686 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1687 ssize_t n;
1688
31a7eb86 1689 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1690 n = write(idle_pipe[3], "x", 1);
1691 if (n > 0)
cd972d69 1692 /* Wait for systemd to react to the signal above. */
54756dce 1693 (void) fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1694 }
1695
54eb2300 1696 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1697
1698 }
1699
54eb2300 1700 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1701}
1702
fb2042dd
YW
1703static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
1704
7cae38c4 1705static int build_environment(
34cf6c43 1706 const Unit *u,
9fa95f85 1707 const ExecContext *c,
1e22b5cd 1708 const ExecParameters *p,
da6053d0 1709 size_t n_fds,
7cae38c4
LP
1710 const char *home,
1711 const char *username,
1712 const char *shell,
7bce046b
LP
1713 dev_t journal_stream_dev,
1714 ino_t journal_stream_ino,
7cae38c4
LP
1715 char ***ret) {
1716
1717 _cleanup_strv_free_ char **our_env = NULL;
fb2042dd 1718 ExecDirectoryType t;
da6053d0 1719 size_t n_env = 0;
7cae38c4
LP
1720 char *x;
1721
4b58153d 1722 assert(u);
7cae38c4 1723 assert(c);
7c1cb6f1 1724 assert(p);
7cae38c4
LP
1725 assert(ret);
1726
91dd5f7c 1727 our_env = new0(char*, 15 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4
LP
1728 if (!our_env)
1729 return -ENOMEM;
1730
1731 if (n_fds > 0) {
8dd4c05b
LP
1732 _cleanup_free_ char *joined = NULL;
1733
df0ff127 1734 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1735 return -ENOMEM;
1736 our_env[n_env++] = x;
1737
da6053d0 1738 if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
7cae38c4
LP
1739 return -ENOMEM;
1740 our_env[n_env++] = x;
8dd4c05b 1741
1e22b5cd 1742 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1743 if (!joined)
1744 return -ENOMEM;
1745
605405c6 1746 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1747 if (!x)
1748 return -ENOMEM;
1749 our_env[n_env++] = x;
7cae38c4
LP
1750 }
1751
b08af3b1 1752 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1753 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1754 return -ENOMEM;
1755 our_env[n_env++] = x;
1756
1e22b5cd 1757 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1758 return -ENOMEM;
1759 our_env[n_env++] = x;
1760 }
1761
fd63e712
LP
1762 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1763 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1764 * check the database directly. */
ac647978 1765 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1766 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1767 if (!x)
1768 return -ENOMEM;
1769 our_env[n_env++] = x;
1770 }
1771
7cae38c4 1772 if (home) {
b910cc72 1773 x = strjoin("HOME=", home);
7cae38c4
LP
1774 if (!x)
1775 return -ENOMEM;
7bbead1d
LP
1776
1777 path_simplify(x + 5, true);
7cae38c4
LP
1778 our_env[n_env++] = x;
1779 }
1780
1781 if (username) {
b910cc72 1782 x = strjoin("LOGNAME=", username);
7cae38c4
LP
1783 if (!x)
1784 return -ENOMEM;
1785 our_env[n_env++] = x;
1786
b910cc72 1787 x = strjoin("USER=", username);
7cae38c4
LP
1788 if (!x)
1789 return -ENOMEM;
1790 our_env[n_env++] = x;
1791 }
1792
1793 if (shell) {
b910cc72 1794 x = strjoin("SHELL=", shell);
7cae38c4
LP
1795 if (!x)
1796 return -ENOMEM;
7bbead1d
LP
1797
1798 path_simplify(x + 6, true);
7cae38c4
LP
1799 our_env[n_env++] = x;
1800 }
1801
4b58153d
LP
1802 if (!sd_id128_is_null(u->invocation_id)) {
1803 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1804 return -ENOMEM;
1805
1806 our_env[n_env++] = x;
1807 }
1808
6af760f3
LP
1809 if (exec_context_needs_term(c)) {
1810 const char *tty_path, *term = NULL;
1811
1812 tty_path = exec_context_tty_path(c);
1813
e8cf09b2
LP
1814 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try
1815 * to inherit the $TERM set for PID 1. This is useful for containers so that the $TERM the
1816 * container manager passes to PID 1 ends up all the way in the console login shown. */
6af760f3 1817
e8cf09b2 1818 if (path_equal_ptr(tty_path, "/dev/console") && getppid() == 1)
6af760f3 1819 term = getenv("TERM");
e8cf09b2 1820
6af760f3
LP
1821 if (!term)
1822 term = default_term_for_tty(tty_path);
7cae38c4 1823
b910cc72 1824 x = strjoin("TERM=", term);
7cae38c4
LP
1825 if (!x)
1826 return -ENOMEM;
1827 our_env[n_env++] = x;
1828 }
1829
7bce046b
LP
1830 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1831 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1832 return -ENOMEM;
1833
1834 our_env[n_env++] = x;
1835 }
1836
91dd5f7c
LP
1837 if (c->log_namespace) {
1838 x = strjoin("LOG_NAMESPACE=", c->log_namespace);
1839 if (!x)
1840 return -ENOMEM;
1841
1842 our_env[n_env++] = x;
1843 }
1844
fb2042dd
YW
1845 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1846 _cleanup_free_ char *pre = NULL, *joined = NULL;
1847 const char *n;
1848
1849 if (!p->prefix[t])
1850 continue;
1851
1852 if (strv_isempty(c->directories[t].paths))
1853 continue;
1854
1855 n = exec_directory_env_name_to_string(t);
1856 if (!n)
1857 continue;
1858
1859 pre = strjoin(p->prefix[t], "/");
1860 if (!pre)
1861 return -ENOMEM;
1862
1863 joined = strv_join_prefix(c->directories[t].paths, ":", pre);
1864 if (!joined)
1865 return -ENOMEM;
1866
1867 x = strjoin(n, "=", joined);
1868 if (!x)
1869 return -ENOMEM;
1870
1871 our_env[n_env++] = x;
1872 }
1873
7cae38c4 1874 our_env[n_env++] = NULL;
fb2042dd 1875 assert(n_env <= 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4 1876
ae2a15bc 1877 *ret = TAKE_PTR(our_env);
7cae38c4
LP
1878
1879 return 0;
1880}
1881
b4c14404
FB
1882static int build_pass_environment(const ExecContext *c, char ***ret) {
1883 _cleanup_strv_free_ char **pass_env = NULL;
1884 size_t n_env = 0, n_bufsize = 0;
1885 char **i;
1886
1887 STRV_FOREACH(i, c->pass_environment) {
1888 _cleanup_free_ char *x = NULL;
1889 char *v;
1890
1891 v = getenv(*i);
1892 if (!v)
1893 continue;
605405c6 1894 x = strjoin(*i, "=", v);
b4c14404
FB
1895 if (!x)
1896 return -ENOMEM;
00819cc1 1897
b4c14404
FB
1898 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1899 return -ENOMEM;
00819cc1 1900
1cc6c93a 1901 pass_env[n_env++] = TAKE_PTR(x);
b4c14404 1902 pass_env[n_env] = NULL;
b4c14404
FB
1903 }
1904
ae2a15bc 1905 *ret = TAKE_PTR(pass_env);
b4c14404
FB
1906
1907 return 0;
1908}
1909
8b44a3d2
LP
1910static bool exec_needs_mount_namespace(
1911 const ExecContext *context,
1912 const ExecParameters *params,
4657abb5 1913 const ExecRuntime *runtime) {
8b44a3d2
LP
1914
1915 assert(context);
1916 assert(params);
1917
915e6d16
LP
1918 if (context->root_image)
1919 return true;
1920
2a624c36
AP
1921 if (!strv_isempty(context->read_write_paths) ||
1922 !strv_isempty(context->read_only_paths) ||
1923 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1924 return true;
1925
42b1d8e0 1926 if (context->n_bind_mounts > 0)
d2d6c096
LP
1927 return true;
1928
2abd4e38
YW
1929 if (context->n_temporary_filesystems > 0)
1930 return true;
1931
37ed15d7 1932 if (!IN_SET(context->mount_flags, 0, MS_SHARED))
8b44a3d2
LP
1933 return true;
1934
1935 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1936 return true;
1937
8b44a3d2 1938 if (context->private_devices ||
228af36f 1939 context->private_mounts ||
8b44a3d2 1940 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1941 context->protect_home != PROTECT_HOME_NO ||
1942 context->protect_kernel_tunables ||
c575770b 1943 context->protect_kernel_modules ||
94a7b275 1944 context->protect_kernel_logs ||
59eeb84b 1945 context->protect_control_groups)
8b44a3d2
LP
1946 return true;
1947
37c56f89
YW
1948 if (context->root_directory) {
1949 ExecDirectoryType t;
1950
1951 if (context->mount_apivfs)
1952 return true;
1953
1954 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1955 if (!params->prefix[t])
1956 continue;
1957
1958 if (!strv_isempty(context->directories[t].paths))
1959 return true;
1960 }
1961 }
5d997827 1962
42b1d8e0 1963 if (context->dynamic_user &&
b43ee82f 1964 (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
42b1d8e0
YW
1965 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1966 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1967 return true;
1968
91dd5f7c
LP
1969 if (context->log_namespace)
1970 return true;
1971
8b44a3d2
LP
1972 return false;
1973}
1974
5749f855 1975static int setup_private_users(uid_t ouid, gid_t ogid, uid_t uid, gid_t gid) {
d251207d
LP
1976 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1977 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1978 _cleanup_close_ int unshare_ready_fd = -1;
1979 _cleanup_(sigkill_waitp) pid_t pid = 0;
1980 uint64_t c = 1;
d251207d
LP
1981 ssize_t n;
1982 int r;
1983
5749f855
AZ
1984 /* Set up a user namespace and map the original UID/GID (IDs from before any user or group changes, i.e.
1985 * the IDs from the user or system manager(s)) to itself, the selected UID/GID to itself, and everything else to
d251207d
LP
1986 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1987 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1988 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1989 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
5749f855
AZ
1990 * continues execution normally.
1991 * For unprivileged users (i.e. without capabilities), the root to root mapping is excluded. As such, it
1992 * does not need CAP_SETUID to write the single line mapping to itself. */
d251207d 1993
5749f855
AZ
1994 /* Can only set up multiple mappings with CAP_SETUID. */
1995 if (have_effective_cap(CAP_SETUID) && uid != ouid && uid_is_valid(uid))
587ab01b 1996 r = asprintf(&uid_map,
5749f855 1997 UID_FMT " " UID_FMT " 1\n" /* Map $OUID → $OUID */
587ab01b 1998 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
5749f855
AZ
1999 ouid, ouid, uid, uid);
2000 else
2001 r = asprintf(&uid_map,
2002 UID_FMT " " UID_FMT " 1\n", /* Map $OUID → $OUID */
2003 ouid, ouid);
d251207d 2004
5749f855
AZ
2005 if (r < 0)
2006 return -ENOMEM;
2007
2008 /* Can only set up multiple mappings with CAP_SETGID. */
2009 if (have_effective_cap(CAP_SETGID) && gid != ogid && gid_is_valid(gid))
587ab01b 2010 r = asprintf(&gid_map,
5749f855 2011 GID_FMT " " GID_FMT " 1\n" /* Map $OGID → $OGID */
587ab01b 2012 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
5749f855
AZ
2013 ogid, ogid, gid, gid);
2014 else
2015 r = asprintf(&gid_map,
2016 GID_FMT " " GID_FMT " 1\n", /* Map $OGID -> $OGID */
2017 ogid, ogid);
2018
2019 if (r < 0)
2020 return -ENOMEM;
d251207d
LP
2021
2022 /* Create a communication channel so that the parent can tell the child when it finished creating the user
2023 * namespace. */
2024 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
2025 if (unshare_ready_fd < 0)
2026 return -errno;
2027
2028 /* Create a communication channel so that the child can tell the parent a proper error code in case it
2029 * failed. */
2030 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
2031 return -errno;
2032
4c253ed1
LP
2033 r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
2034 if (r < 0)
2035 return r;
2036 if (r == 0) {
d251207d
LP
2037 _cleanup_close_ int fd = -1;
2038 const char *a;
2039 pid_t ppid;
2040
2041 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
2042 * here, after the parent opened its own user namespace. */
2043
2044 ppid = getppid();
2045 errno_pipe[0] = safe_close(errno_pipe[0]);
2046
2047 /* Wait until the parent unshared the user namespace */
2048 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
2049 r = -errno;
2050 goto child_fail;
2051 }
2052
2053 /* Disable the setgroups() system call in the child user namespace, for good. */
2054 a = procfs_file_alloca(ppid, "setgroups");
2055 fd = open(a, O_WRONLY|O_CLOEXEC);
2056 if (fd < 0) {
2057 if (errno != ENOENT) {
2058 r = -errno;
2059 goto child_fail;
2060 }
2061
2062 /* If the file is missing the kernel is too old, let's continue anyway. */
2063 } else {
2064 if (write(fd, "deny\n", 5) < 0) {
2065 r = -errno;
2066 goto child_fail;
2067 }
2068
2069 fd = safe_close(fd);
2070 }
2071
2072 /* First write the GID map */
2073 a = procfs_file_alloca(ppid, "gid_map");
2074 fd = open(a, O_WRONLY|O_CLOEXEC);
2075 if (fd < 0) {
2076 r = -errno;
2077 goto child_fail;
2078 }
2079 if (write(fd, gid_map, strlen(gid_map)) < 0) {
2080 r = -errno;
2081 goto child_fail;
2082 }
2083 fd = safe_close(fd);
2084
2085 /* The write the UID map */
2086 a = procfs_file_alloca(ppid, "uid_map");
2087 fd = open(a, O_WRONLY|O_CLOEXEC);
2088 if (fd < 0) {
2089 r = -errno;
2090 goto child_fail;
2091 }
2092 if (write(fd, uid_map, strlen(uid_map)) < 0) {
2093 r = -errno;
2094 goto child_fail;
2095 }
2096
2097 _exit(EXIT_SUCCESS);
2098
2099 child_fail:
2100 (void) write(errno_pipe[1], &r, sizeof(r));
2101 _exit(EXIT_FAILURE);
2102 }
2103
2104 errno_pipe[1] = safe_close(errno_pipe[1]);
2105
2106 if (unshare(CLONE_NEWUSER) < 0)
2107 return -errno;
2108
2109 /* Let the child know that the namespace is ready now */
2110 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
2111 return -errno;
2112
2113 /* Try to read an error code from the child */
2114 n = read(errno_pipe[0], &r, sizeof(r));
2115 if (n < 0)
2116 return -errno;
2117 if (n == sizeof(r)) { /* an error code was sent to us */
2118 if (r < 0)
2119 return r;
2120 return -EIO;
2121 }
2122 if (n != 0) /* on success we should have read 0 bytes */
2123 return -EIO;
2124
2e87a1fd
LP
2125 r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
2126 pid = 0;
d251207d
LP
2127 if (r < 0)
2128 return r;
2e87a1fd 2129 if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
d251207d
LP
2130 return -EIO;
2131
2132 return 0;
2133}
2134
494d0247
YW
2135static bool exec_directory_is_private(const ExecContext *context, ExecDirectoryType type) {
2136 if (!context->dynamic_user)
2137 return false;
2138
2139 if (type == EXEC_DIRECTORY_CONFIGURATION)
2140 return false;
2141
2142 if (type == EXEC_DIRECTORY_RUNTIME && context->runtime_directory_preserve_mode == EXEC_PRESERVE_NO)
2143 return false;
2144
2145 return true;
2146}
2147
3536f49e 2148static int setup_exec_directory(
07689d5d
LP
2149 const ExecContext *context,
2150 const ExecParameters *params,
2151 uid_t uid,
3536f49e 2152 gid_t gid,
3536f49e
YW
2153 ExecDirectoryType type,
2154 int *exit_status) {
07689d5d 2155
72fd1768 2156 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
2157 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
2158 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
2159 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
2160 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
2161 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
2162 };
07689d5d
LP
2163 char **rt;
2164 int r;
2165
2166 assert(context);
2167 assert(params);
72fd1768 2168 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 2169 assert(exit_status);
07689d5d 2170
3536f49e
YW
2171 if (!params->prefix[type])
2172 return 0;
2173
8679efde 2174 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
2175 if (!uid_is_valid(uid))
2176 uid = 0;
2177 if (!gid_is_valid(gid))
2178 gid = 0;
2179 }
2180
2181 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d 2182 _cleanup_free_ char *p = NULL, *pp = NULL;
07689d5d 2183
edbfeb12 2184 p = path_join(params->prefix[type], *rt);
3536f49e
YW
2185 if (!p) {
2186 r = -ENOMEM;
2187 goto fail;
2188 }
07689d5d 2189
23a7448e
YW
2190 r = mkdir_parents_label(p, 0755);
2191 if (r < 0)
3536f49e 2192 goto fail;
23a7448e 2193
494d0247 2194 if (exec_directory_is_private(context, type)) {
6c9c51e5 2195 _cleanup_free_ char *private_root = NULL;
6c47cd7d 2196
3f5b1508
LP
2197 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that
2198 * case we want to avoid leaving a directory around fully accessible that is owned by
2199 * a dynamic user whose UID is later on reused. To lock this down we use the same
2200 * trick used by container managers to prohibit host users to get access to files of
2201 * the same UID in containers: we place everything inside a directory that has an
2202 * access mode of 0700 and is owned root:root, so that it acts as security boundary
2203 * for unprivileged host code. We then use fs namespacing to make this directory
2204 * permeable for the service itself.
6c47cd7d 2205 *
3f5b1508
LP
2206 * Specifically: for a service which wants a special directory "foo/" we first create
2207 * a directory "private/" with access mode 0700 owned by root:root. Then we place
2208 * "foo" inside of that directory (i.e. "private/foo/"), and make "foo" a symlink to
2209 * "private/foo". This way, privileged host users can access "foo/" as usual, but
2210 * unprivileged host users can't look into it. Inside of the namespace of the unit
2211 * "private/" is replaced by a more liberally accessible tmpfs, into which the host's
2212 * "private/foo/" is mounted under the same name, thus disabling the access boundary
2213 * for the service and making sure it only gets access to the dirs it needs but no
2214 * others. Tricky? Yes, absolutely, but it works!
6c47cd7d 2215 *
3f5b1508
LP
2216 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not
2217 * to be owned by the service itself.
2218 *
2219 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used
2220 * for sharing files or sockets with other services. */
6c47cd7d 2221
edbfeb12 2222 private_root = path_join(params->prefix[type], "private");
6c47cd7d
LP
2223 if (!private_root) {
2224 r = -ENOMEM;
2225 goto fail;
2226 }
2227
2228 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
37c1d5e9 2229 r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
6c47cd7d
LP
2230 if (r < 0)
2231 goto fail;
2232
edbfeb12 2233 pp = path_join(private_root, *rt);
6c47cd7d
LP
2234 if (!pp) {
2235 r = -ENOMEM;
2236 goto fail;
2237 }
2238
2239 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2240 r = mkdir_parents_label(pp, 0755);
2241 if (r < 0)
2242 goto fail;
2243
949befd3
LP
2244 if (is_dir(p, false) > 0 &&
2245 (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
2246
2247 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2248 * it over. Most likely the service has been upgraded from one that didn't use
2249 * DynamicUser=1, to one that does. */
2250
cf52c45d
LP
2251 log_info("Found pre-existing public %s= directory %s, migrating to %s.\n"
2252 "Apparently, service previously had DynamicUser= turned off, and has now turned it on.",
2253 exec_directory_type_to_string(type), p, pp);
2254
949befd3
LP
2255 if (rename(p, pp) < 0) {
2256 r = -errno;
2257 goto fail;
2258 }
2259 } else {
2260 /* Otherwise, create the actual directory for the service */
2261
2262 r = mkdir_label(pp, context->directories[type].mode);
2263 if (r < 0 && r != -EEXIST)
2264 goto fail;
2265 }
6c47cd7d 2266
6c47cd7d 2267 /* And link it up from the original place */
6c9c51e5 2268 r = symlink_idempotent(pp, p, true);
6c47cd7d
LP
2269 if (r < 0)
2270 goto fail;
2271
6c47cd7d 2272 } else {
5c6d40d1
LP
2273 _cleanup_free_ char *target = NULL;
2274
2275 if (type != EXEC_DIRECTORY_CONFIGURATION &&
2276 readlink_and_make_absolute(p, &target) >= 0) {
578dc69f 2277 _cleanup_free_ char *q = NULL, *q_resolved = NULL, *target_resolved = NULL;
5c6d40d1
LP
2278
2279 /* This already exists and is a symlink? Interesting. Maybe it's one created
2193f17c
LP
2280 * by DynamicUser=1 (see above)?
2281 *
2282 * We do this for all directory types except for ConfigurationDirectory=,
2283 * since they all support the private/ symlink logic at least in some
2284 * configurations, see above. */
5c6d40d1 2285
578dc69f
YW
2286 r = chase_symlinks(target, NULL, 0, &target_resolved, NULL);
2287 if (r < 0)
2288 goto fail;
2289
5c6d40d1
LP
2290 q = path_join(params->prefix[type], "private", *rt);
2291 if (!q) {
2292 r = -ENOMEM;
2293 goto fail;
2294 }
2295
578dc69f
YW
2296 /* /var/lib or friends may be symlinks. So, let's chase them also. */
2297 r = chase_symlinks(q, NULL, CHASE_NONEXISTENT, &q_resolved, NULL);
2298 if (r < 0)
2299 goto fail;
2300
2301 if (path_equal(q_resolved, target_resolved)) {
5c6d40d1
LP
2302
2303 /* Hmm, apparently DynamicUser= was once turned on for this service,
2304 * but is no longer. Let's move the directory back up. */
2305
cf52c45d
LP
2306 log_info("Found pre-existing private %s= directory %s, migrating to %s.\n"
2307 "Apparently, service previously had DynamicUser= turned on, and has now turned it off.",
2308 exec_directory_type_to_string(type), q, p);
2309
5c6d40d1
LP
2310 if (unlink(p) < 0) {
2311 r = -errno;
2312 goto fail;
2313 }
2314
2315 if (rename(q, p) < 0) {
2316 r = -errno;
2317 goto fail;
2318 }
2319 }
2320 }
2321
6c47cd7d 2322 r = mkdir_label(p, context->directories[type].mode);
d484580c 2323 if (r < 0) {
d484580c
LP
2324 if (r != -EEXIST)
2325 goto fail;
2326
206e9864
LP
2327 if (type == EXEC_DIRECTORY_CONFIGURATION) {
2328 struct stat st;
2329
2330 /* Don't change the owner/access mode of the configuration directory,
2331 * as in the common case it is not written to by a service, and shall
2332 * not be writable. */
2333
2334 if (stat(p, &st) < 0) {
2335 r = -errno;
2336 goto fail;
2337 }
2338
2339 /* Still complain if the access mode doesn't match */
2340 if (((st.st_mode ^ context->directories[type].mode) & 07777) != 0)
2341 log_warning("%s \'%s\' already exists but the mode is different. "
2342 "(File system: %o %sMode: %o)",
2343 exec_directory_type_to_string(type), *rt,
2344 st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
2345
6cff72eb 2346 continue;
206e9864 2347 }
6cff72eb 2348 }
a1164ae3 2349 }
07689d5d 2350
206e9864 2351 /* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
5238e957 2352 * specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
206e9864
LP
2353 * current UID/GID ownership.) */
2354 r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
2355 if (r < 0)
2356 goto fail;
c71b2eb7 2357
607b358e
LP
2358 /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
2359 * drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
2360 * assignments to exist.*/
2361 r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777);
07689d5d 2362 if (r < 0)
3536f49e 2363 goto fail;
07689d5d
LP
2364 }
2365
2366 return 0;
3536f49e
YW
2367
2368fail:
2369 *exit_status = exit_status_table[type];
3536f49e 2370 return r;
07689d5d
LP
2371}
2372
92b423b9 2373#if ENABLE_SMACK
cefc33ae
LP
2374static int setup_smack(
2375 const ExecContext *context,
2376 const ExecCommand *command) {
2377
cefc33ae
LP
2378 int r;
2379
2380 assert(context);
2381 assert(command);
2382
cefc33ae
LP
2383 if (context->smack_process_label) {
2384 r = mac_smack_apply_pid(0, context->smack_process_label);
2385 if (r < 0)
2386 return r;
2387 }
2388#ifdef SMACK_DEFAULT_PROCESS_LABEL
2389 else {
2390 _cleanup_free_ char *exec_label = NULL;
2391
2392 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2393 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2394 return r;
2395
2396 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2397 if (r < 0)
2398 return r;
2399 }
cefc33ae
LP
2400#endif
2401
2402 return 0;
2403}
92b423b9 2404#endif
cefc33ae 2405
6c47cd7d
LP
2406static int compile_bind_mounts(
2407 const ExecContext *context,
2408 const ExecParameters *params,
2409 BindMount **ret_bind_mounts,
da6053d0 2410 size_t *ret_n_bind_mounts,
6c47cd7d
LP
2411 char ***ret_empty_directories) {
2412
2413 _cleanup_strv_free_ char **empty_directories = NULL;
2414 BindMount *bind_mounts;
da6053d0 2415 size_t n, h = 0, i;
6c47cd7d
LP
2416 ExecDirectoryType t;
2417 int r;
2418
2419 assert(context);
2420 assert(params);
2421 assert(ret_bind_mounts);
2422 assert(ret_n_bind_mounts);
2423 assert(ret_empty_directories);
2424
2425 n = context->n_bind_mounts;
2426 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2427 if (!params->prefix[t])
2428 continue;
2429
2430 n += strv_length(context->directories[t].paths);
2431 }
2432
2433 if (n <= 0) {
2434 *ret_bind_mounts = NULL;
2435 *ret_n_bind_mounts = 0;
2436 *ret_empty_directories = NULL;
2437 return 0;
2438 }
2439
2440 bind_mounts = new(BindMount, n);
2441 if (!bind_mounts)
2442 return -ENOMEM;
2443
a8cabc61 2444 for (i = 0; i < context->n_bind_mounts; i++) {
6c47cd7d
LP
2445 BindMount *item = context->bind_mounts + i;
2446 char *s, *d;
2447
2448 s = strdup(item->source);
2449 if (!s) {
2450 r = -ENOMEM;
2451 goto finish;
2452 }
2453
2454 d = strdup(item->destination);
2455 if (!d) {
2456 free(s);
2457 r = -ENOMEM;
2458 goto finish;
2459 }
2460
2461 bind_mounts[h++] = (BindMount) {
2462 .source = s,
2463 .destination = d,
2464 .read_only = item->read_only,
2465 .recursive = item->recursive,
2466 .ignore_enoent = item->ignore_enoent,
2467 };
2468 }
2469
2470 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2471 char **suffix;
2472
2473 if (!params->prefix[t])
2474 continue;
2475
2476 if (strv_isempty(context->directories[t].paths))
2477 continue;
2478
494d0247 2479 if (exec_directory_is_private(context, t) &&
5609f688 2480 !(context->root_directory || context->root_image)) {
6c47cd7d
LP
2481 char *private_root;
2482
2483 /* So this is for a dynamic user, and we need to make sure the process can access its own
2484 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2485 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2486
657ee2d8 2487 private_root = path_join(params->prefix[t], "private");
6c47cd7d
LP
2488 if (!private_root) {
2489 r = -ENOMEM;
2490 goto finish;
2491 }
2492
2493 r = strv_consume(&empty_directories, private_root);
a635a7ae 2494 if (r < 0)
6c47cd7d 2495 goto finish;
6c47cd7d
LP
2496 }
2497
2498 STRV_FOREACH(suffix, context->directories[t].paths) {
2499 char *s, *d;
2500
494d0247 2501 if (exec_directory_is_private(context, t))
657ee2d8 2502 s = path_join(params->prefix[t], "private", *suffix);
6c47cd7d 2503 else
657ee2d8 2504 s = path_join(params->prefix[t], *suffix);
6c47cd7d
LP
2505 if (!s) {
2506 r = -ENOMEM;
2507 goto finish;
2508 }
2509
494d0247 2510 if (exec_directory_is_private(context, t) &&
5609f688
YW
2511 (context->root_directory || context->root_image))
2512 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2513 * directory is not created on the root directory. So, let's bind-mount the directory
2514 * on the 'non-private' place. */
657ee2d8 2515 d = path_join(params->prefix[t], *suffix);
5609f688
YW
2516 else
2517 d = strdup(s);
6c47cd7d
LP
2518 if (!d) {
2519 free(s);
2520 r = -ENOMEM;
2521 goto finish;
2522 }
2523
2524 bind_mounts[h++] = (BindMount) {
2525 .source = s,
2526 .destination = d,
2527 .read_only = false,
9ce4e4b0 2528 .nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
6c47cd7d
LP
2529 .recursive = true,
2530 .ignore_enoent = false,
2531 };
2532 }
2533 }
2534
2535 assert(h == n);
2536
2537 *ret_bind_mounts = bind_mounts;
2538 *ret_n_bind_mounts = n;
ae2a15bc 2539 *ret_empty_directories = TAKE_PTR(empty_directories);
6c47cd7d
LP
2540
2541 return (int) n;
2542
2543finish:
2544 bind_mount_free_many(bind_mounts, h);
2545 return r;
2546}
2547
4e677599
LP
2548static bool insist_on_sandboxing(
2549 const ExecContext *context,
2550 const char *root_dir,
2551 const char *root_image,
2552 const BindMount *bind_mounts,
2553 size_t n_bind_mounts) {
2554
2555 size_t i;
2556
2557 assert(context);
2558 assert(n_bind_mounts == 0 || bind_mounts);
2559
2560 /* Checks whether we need to insist on fs namespacing. i.e. whether we have settings configured that
86b52a39 2561 * would alter the view on the file system beyond making things read-only or invisible, i.e. would
4e677599
LP
2562 * rearrange stuff in a way we cannot ignore gracefully. */
2563
2564 if (context->n_temporary_filesystems > 0)
2565 return true;
2566
2567 if (root_dir || root_image)
2568 return true;
2569
2570 if (context->dynamic_user)
2571 return true;
2572
2573 /* If there are any bind mounts set that don't map back onto themselves, fs namespacing becomes
2574 * essential. */
2575 for (i = 0; i < n_bind_mounts; i++)
2576 if (!path_equal(bind_mounts[i].source, bind_mounts[i].destination))
2577 return true;
2578
91dd5f7c
LP
2579 if (context->log_namespace)
2580 return true;
2581
4e677599
LP
2582 return false;
2583}
2584
6818c54c 2585static int apply_mount_namespace(
34cf6c43
YW
2586 const Unit *u,
2587 const ExecCommand *command,
6818c54c
LP
2588 const ExecContext *context,
2589 const ExecParameters *params,
7cc5ef5f
ZJS
2590 const ExecRuntime *runtime,
2591 char **error_path) {
6818c54c 2592
7bcef4ef 2593 _cleanup_strv_free_ char **empty_directories = NULL;
93c6bb51 2594 char *tmp = NULL, *var = NULL;
915e6d16 2595 const char *root_dir = NULL, *root_image = NULL;
228af36f 2596 NamespaceInfo ns_info;
165a31c0 2597 bool needs_sandboxing;
6c47cd7d 2598 BindMount *bind_mounts = NULL;
da6053d0 2599 size_t n_bind_mounts = 0;
6818c54c 2600 int r;
93c6bb51 2601
2b3c1b9e
DH
2602 assert(context);
2603
915e6d16
LP
2604 if (params->flags & EXEC_APPLY_CHROOT) {
2605 root_image = context->root_image;
2606
2607 if (!root_image)
2608 root_dir = context->root_directory;
2609 }
93c6bb51 2610
6c47cd7d
LP
2611 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2612 if (r < 0)
2613 return r;
2614
165a31c0 2615 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
ecf63c91
NJ
2616 if (needs_sandboxing) {
2617 /* The runtime struct only contains the parent of the private /tmp,
2618 * which is non-accessible to world users. Inside of it there's a /tmp
2619 * that is sticky, and that's the one we want to use here. */
2620
2621 if (context->private_tmp && runtime) {
2622 if (runtime->tmp_dir)
2623 tmp = strjoina(runtime->tmp_dir, "/tmp");
2624 if (runtime->var_tmp_dir)
2625 var = strjoina(runtime->var_tmp_dir, "/tmp");
2626 }
2627
b5a33299
YW
2628 ns_info = (NamespaceInfo) {
2629 .ignore_protect_paths = false,
2630 .private_dev = context->private_devices,
2631 .protect_control_groups = context->protect_control_groups,
2632 .protect_kernel_tunables = context->protect_kernel_tunables,
2633 .protect_kernel_modules = context->protect_kernel_modules,
94a7b275 2634 .protect_kernel_logs = context->protect_kernel_logs,
aecd5ac6 2635 .protect_hostname = context->protect_hostname,
b5a33299 2636 .mount_apivfs = context->mount_apivfs,
228af36f 2637 .private_mounts = context->private_mounts,
b5a33299 2638 };
ecf63c91 2639 } else if (!context->dynamic_user && root_dir)
228af36f
LP
2640 /*
2641 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2642 * sandbox info, otherwise enforce it, don't ignore protected paths and
2643 * fail if we are enable to apply the sandbox inside the mount namespace.
2644 */
2645 ns_info = (NamespaceInfo) {
2646 .ignore_protect_paths = true,
2647 };
2648 else
2649 ns_info = (NamespaceInfo) {};
b5a33299 2650
37ed15d7
FB
2651 if (context->mount_flags == MS_SHARED)
2652 log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
2653
915e6d16 2654 r = setup_namespace(root_dir, root_image,
7bcef4ef 2655 &ns_info, context->read_write_paths,
165a31c0
LP
2656 needs_sandboxing ? context->read_only_paths : NULL,
2657 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2658 empty_directories,
2659 bind_mounts,
2660 n_bind_mounts,
2abd4e38
YW
2661 context->temporary_filesystems,
2662 context->n_temporary_filesystems,
93c6bb51
DH
2663 tmp,
2664 var,
91dd5f7c 2665 context->log_namespace,
165a31c0
LP
2666 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2667 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16 2668 context->mount_flags,
8d251485 2669 DISSECT_IMAGE_DISCARD_ON_LOOP|DISSECT_IMAGE_RELAX_VAR_CHECK|DISSECT_IMAGE_FSCK,
7cc5ef5f 2670 error_path);
93c6bb51 2671
1beab8b0 2672 /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
5238e957 2673 * that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively
1beab8b0
LP
2674 * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
2675 * completely different execution environment. */
aca835ed 2676 if (r == -ENOANO) {
4e677599
LP
2677 if (insist_on_sandboxing(
2678 context,
2679 root_dir, root_image,
2680 bind_mounts,
2681 n_bind_mounts)) {
2682 log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
2683 "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
2684 n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
2685
2686 r = -EOPNOTSUPP;
2687 } else {
aca835ed 2688 log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
4e677599 2689 r = 0;
aca835ed 2690 }
93c6bb51
DH
2691 }
2692
4e677599 2693 bind_mount_free_many(bind_mounts, n_bind_mounts);
93c6bb51
DH
2694 return r;
2695}
2696
915e6d16
LP
2697static int apply_working_directory(
2698 const ExecContext *context,
2699 const ExecParameters *params,
2700 const char *home,
376fecf6 2701 int *exit_status) {
915e6d16 2702
6732edab 2703 const char *d, *wd;
2b3c1b9e
DH
2704
2705 assert(context);
376fecf6 2706 assert(exit_status);
2b3c1b9e 2707
6732edab
LP
2708 if (context->working_directory_home) {
2709
376fecf6
LP
2710 if (!home) {
2711 *exit_status = EXIT_CHDIR;
6732edab 2712 return -ENXIO;
376fecf6 2713 }
6732edab 2714
2b3c1b9e 2715 wd = home;
6732edab
LP
2716
2717 } else if (context->working_directory)
2b3c1b9e
DH
2718 wd = context->working_directory;
2719 else
2720 wd = "/";
e7f1e7c6 2721
fa97f630 2722 if (params->flags & EXEC_APPLY_CHROOT)
2b3c1b9e 2723 d = wd;
fa97f630 2724 else
3b0e5bb5 2725 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2726
376fecf6
LP
2727 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2728 *exit_status = EXIT_CHDIR;
2b3c1b9e 2729 return -errno;
376fecf6 2730 }
e7f1e7c6
DH
2731
2732 return 0;
2733}
2734
fa97f630
JB
2735static int apply_root_directory(
2736 const ExecContext *context,
2737 const ExecParameters *params,
2738 const bool needs_mount_ns,
2739 int *exit_status) {
2740
2741 assert(context);
2742 assert(exit_status);
2743
2744 if (params->flags & EXEC_APPLY_CHROOT) {
2745 if (!needs_mount_ns && context->root_directory)
2746 if (chroot(context->root_directory) < 0) {
2747 *exit_status = EXIT_CHROOT;
2748 return -errno;
2749 }
2750 }
2751
2752 return 0;
2753}
2754
b1edf445 2755static int setup_keyring(
34cf6c43 2756 const Unit *u,
b1edf445
LP
2757 const ExecContext *context,
2758 const ExecParameters *p,
2759 uid_t uid, gid_t gid) {
2760
74dd6b51 2761 key_serial_t keyring;
e64c2d0b
DJL
2762 int r = 0;
2763 uid_t saved_uid;
2764 gid_t saved_gid;
74dd6b51
LP
2765
2766 assert(u);
b1edf445 2767 assert(context);
74dd6b51
LP
2768 assert(p);
2769
2770 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2771 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2772 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2773 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2774 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2775 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2776
b1edf445
LP
2777 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2778 return 0;
2779
e64c2d0b
DJL
2780 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2781 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2782 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2783 * & group is just as nasty as acquiring a reference to the user keyring. */
2784
2785 saved_uid = getuid();
2786 saved_gid = getgid();
2787
2788 if (gid_is_valid(gid) && gid != saved_gid) {
2789 if (setregid(gid, -1) < 0)
2790 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2791 }
2792
2793 if (uid_is_valid(uid) && uid != saved_uid) {
2794 if (setreuid(uid, -1) < 0) {
2795 r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2796 goto out;
2797 }
2798 }
2799
74dd6b51
LP
2800 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2801 if (keyring == -1) {
2802 if (errno == ENOSYS)
8002fb97 2803 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2804 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2805 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2806 else if (errno == EDQUOT)
8002fb97 2807 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2808 else
e64c2d0b 2809 r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51 2810
e64c2d0b 2811 goto out;
74dd6b51
LP
2812 }
2813
e64c2d0b
DJL
2814 /* When requested link the user keyring into the session keyring. */
2815 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2816
2817 if (keyctl(KEYCTL_LINK,
2818 KEY_SPEC_USER_KEYRING,
2819 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2820 r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
2821 goto out;
2822 }
2823 }
2824
2825 /* Restore uid/gid back */
2826 if (uid_is_valid(uid) && uid != saved_uid) {
2827 if (setreuid(saved_uid, -1) < 0) {
2828 r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2829 goto out;
2830 }
2831 }
2832
2833 if (gid_is_valid(gid) && gid != saved_gid) {
2834 if (setregid(saved_gid, -1) < 0)
2835 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2836 }
2837
2838 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
b3415f5d
LP
2839 if (!sd_id128_is_null(u->invocation_id)) {
2840 key_serial_t key;
2841
2842 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2843 if (key == -1)
8002fb97 2844 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2845 else {
2846 if (keyctl(KEYCTL_SETPERM, key,
2847 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2848 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
e64c2d0b 2849 r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2850 }
2851 }
2852
e64c2d0b
DJL
2853out:
2854 /* Revert back uid & gid for the the last time, and exit */
2855 /* no extra logging, as only the first already reported error matters */
2856 if (getuid() != saved_uid)
2857 (void) setreuid(saved_uid, -1);
b1edf445 2858
e64c2d0b
DJL
2859 if (getgid() != saved_gid)
2860 (void) setregid(saved_gid, -1);
b1edf445 2861
e64c2d0b 2862 return r;
74dd6b51
LP
2863}
2864
3042bbeb 2865static void append_socket_pair(int *array, size_t *n, const int pair[static 2]) {
29206d46
LP
2866 assert(array);
2867 assert(n);
2caa38e9 2868 assert(pair);
29206d46
LP
2869
2870 if (pair[0] >= 0)
2871 array[(*n)++] = pair[0];
2872 if (pair[1] >= 0)
2873 array[(*n)++] = pair[1];
2874}
2875
a34ceba6
LP
2876static int close_remaining_fds(
2877 const ExecParameters *params,
34cf6c43
YW
2878 const ExecRuntime *runtime,
2879 const DynamicCreds *dcreds,
00d9ef85 2880 int user_lookup_fd,
a34ceba6 2881 int socket_fd,
5686391b 2882 int exec_fd,
5b8d1f6b 2883 const int *fds, size_t n_fds) {
a34ceba6 2884
da6053d0 2885 size_t n_dont_close = 0;
00d9ef85 2886 int dont_close[n_fds + 12];
a34ceba6
LP
2887
2888 assert(params);
2889
2890 if (params->stdin_fd >= 0)
2891 dont_close[n_dont_close++] = params->stdin_fd;
2892 if (params->stdout_fd >= 0)
2893 dont_close[n_dont_close++] = params->stdout_fd;
2894 if (params->stderr_fd >= 0)
2895 dont_close[n_dont_close++] = params->stderr_fd;
2896
2897 if (socket_fd >= 0)
2898 dont_close[n_dont_close++] = socket_fd;
5686391b
LP
2899 if (exec_fd >= 0)
2900 dont_close[n_dont_close++] = exec_fd;
a34ceba6
LP
2901 if (n_fds > 0) {
2902 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2903 n_dont_close += n_fds;
2904 }
2905
29206d46
LP
2906 if (runtime)
2907 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2908
2909 if (dcreds) {
2910 if (dcreds->user)
2911 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2912 if (dcreds->group)
2913 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2914 }
2915
00d9ef85
LP
2916 if (user_lookup_fd >= 0)
2917 dont_close[n_dont_close++] = user_lookup_fd;
2918
a34ceba6
LP
2919 return close_all_fds(dont_close, n_dont_close);
2920}
2921
00d9ef85
LP
2922static int send_user_lookup(
2923 Unit *unit,
2924 int user_lookup_fd,
2925 uid_t uid,
2926 gid_t gid) {
2927
2928 assert(unit);
2929
2930 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2931 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2932 * specified. */
2933
2934 if (user_lookup_fd < 0)
2935 return 0;
2936
2937 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2938 return 0;
2939
2940 if (writev(user_lookup_fd,
2941 (struct iovec[]) {
e6a7ec4b
LP
2942 IOVEC_INIT(&uid, sizeof(uid)),
2943 IOVEC_INIT(&gid, sizeof(gid)),
2944 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2945 return -errno;
2946
2947 return 0;
2948}
2949
6732edab
LP
2950static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2951 int r;
2952
2953 assert(c);
2954 assert(home);
2955 assert(buf);
2956
2957 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2958
2959 if (*home)
2960 return 0;
2961
2962 if (!c->working_directory_home)
2963 return 0;
2964
6732edab
LP
2965 r = get_home_dir(buf);
2966 if (r < 0)
2967 return r;
2968
2969 *home = *buf;
2970 return 1;
2971}
2972
da50b85a
LP
2973static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2974 _cleanup_strv_free_ char ** list = NULL;
2975 ExecDirectoryType t;
2976 int r;
2977
2978 assert(c);
2979 assert(p);
2980 assert(ret);
2981
2982 assert(c->dynamic_user);
2983
2984 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2985 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2986 * directories. */
2987
2988 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2989 char **i;
2990
2991 if (t == EXEC_DIRECTORY_CONFIGURATION)
2992 continue;
2993
2994 if (!p->prefix[t])
2995 continue;
2996
2997 STRV_FOREACH(i, c->directories[t].paths) {
2998 char *e;
2999
494d0247 3000 if (exec_directory_is_private(c, t))
657ee2d8 3001 e = path_join(p->prefix[t], "private", *i);
494d0247
YW
3002 else
3003 e = path_join(p->prefix[t], *i);
da50b85a
LP
3004 if (!e)
3005 return -ENOMEM;
3006
3007 r = strv_consume(&list, e);
3008 if (r < 0)
3009 return r;
3010 }
3011 }
3012
ae2a15bc 3013 *ret = TAKE_PTR(list);
da50b85a
LP
3014
3015 return 0;
3016}
3017
34cf6c43
YW
3018static char *exec_command_line(char **argv);
3019
78f93209
LP
3020static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
3021 bool using_subcgroup;
3022 char *p;
3023
3024 assert(params);
3025 assert(ret);
3026
3027 if (!params->cgroup_path)
3028 return -EINVAL;
3029
3030 /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
3031 * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
3032 * processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
3033 * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
3034 * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
3035 * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
3036 * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
3037 * flag, which is only passed for the former statements, not for the latter. */
3038
3039 using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
3040 if (using_subcgroup)
657ee2d8 3041 p = path_join(params->cgroup_path, ".control");
78f93209
LP
3042 else
3043 p = strdup(params->cgroup_path);
3044 if (!p)
3045 return -ENOMEM;
3046
3047 *ret = p;
3048 return using_subcgroup;
3049}
3050
e2b2fb7f
MS
3051static int exec_context_cpu_affinity_from_numa(const ExecContext *c, CPUSet *ret) {
3052 _cleanup_(cpu_set_reset) CPUSet s = {};
3053 int r;
3054
3055 assert(c);
3056 assert(ret);
3057
3058 if (!c->numa_policy.nodes.set) {
3059 log_debug("Can't derive CPU affinity mask from NUMA mask because NUMA mask is not set, ignoring");
3060 return 0;
3061 }
3062
3063 r = numa_to_cpu_set(&c->numa_policy, &s);
3064 if (r < 0)
3065 return r;
3066
3067 cpu_set_reset(ret);
3068
3069 return cpu_set_add_all(ret, &s);
3070}
3071
3072bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c) {
3073 assert(c);
3074
3075 return c->cpu_affinity_from_numa;
3076}
3077
ff0af2a1 3078static int exec_child(
f2341e0a 3079 Unit *unit,
34cf6c43 3080 const ExecCommand *command,
ff0af2a1
LP
3081 const ExecContext *context,
3082 const ExecParameters *params,
3083 ExecRuntime *runtime,
29206d46 3084 DynamicCreds *dcreds,
ff0af2a1 3085 int socket_fd,
2caa38e9 3086 const int named_iofds[static 3],
4c47affc 3087 int *fds,
da6053d0 3088 size_t n_socket_fds,
25b583d7 3089 size_t n_storage_fds,
ff0af2a1 3090 char **files_env,
00d9ef85 3091 int user_lookup_fd,
12145637 3092 int *exit_status) {
d35fbf6b 3093
7ca69792 3094 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **replaced_argv = NULL;
5686391b 3095 int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
4d885bd3
DH
3096 _cleanup_free_ gid_t *supplementary_gids = NULL;
3097 const char *username = NULL, *groupname = NULL;
5686391b 3098 _cleanup_free_ char *home_buffer = NULL;
2b3c1b9e 3099 const char *home = NULL, *shell = NULL;
7ca69792 3100 char **final_argv = NULL;
7bce046b
LP
3101 dev_t journal_stream_dev = 0;
3102 ino_t journal_stream_ino = 0;
5749f855 3103 bool userns_set_up = false;
165a31c0
LP
3104 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
3105 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
3106 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
3107 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 3108#if HAVE_SELINUX
7f59dd35 3109 _cleanup_free_ char *mac_selinux_context_net = NULL;
43b1f709 3110 bool use_selinux = false;
ecfbc84f 3111#endif
f9fa32f0 3112#if ENABLE_SMACK
43b1f709 3113 bool use_smack = false;
ecfbc84f 3114#endif
349cc4a5 3115#if HAVE_APPARMOR
43b1f709 3116 bool use_apparmor = false;
ecfbc84f 3117#endif
5749f855
AZ
3118 uid_t saved_uid = getuid();
3119 gid_t saved_gid = getgid();
fed1e721
LP
3120 uid_t uid = UID_INVALID;
3121 gid_t gid = GID_INVALID;
da6053d0 3122 size_t n_fds;
3536f49e 3123 ExecDirectoryType dt;
165a31c0 3124 int secure_bits;
afb11bf1
DG
3125 _cleanup_free_ gid_t *gids_after_pam = NULL;
3126 int ngids_after_pam = 0;
034c6ed7 3127
f2341e0a 3128 assert(unit);
5cb5a6ff
LP
3129 assert(command);
3130 assert(context);
d35fbf6b 3131 assert(params);
ff0af2a1 3132 assert(exit_status);
d35fbf6b
DM
3133
3134 rename_process_from_path(command->path);
3135
3136 /* We reset exactly these signals, since they are the
3137 * only ones we set to SIG_IGN in the main daemon. All
3138 * others we leave untouched because we set them to
3139 * SIG_DFL or a valid handler initially, both of which
3140 * will be demoted to SIG_DFL. */
ce30c8dc
LP
3141 (void) default_signals(SIGNALS_CRASH_HANDLER,
3142 SIGNALS_IGNORE, -1);
d35fbf6b
DM
3143
3144 if (context->ignore_sigpipe)
ce30c8dc 3145 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 3146
ff0af2a1
LP
3147 r = reset_signal_mask();
3148 if (r < 0) {
3149 *exit_status = EXIT_SIGNAL_MASK;
12145637 3150 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 3151 }
034c6ed7 3152
d35fbf6b
DM
3153 if (params->idle_pipe)
3154 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 3155
2c027c62
LP
3156 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
3157 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
3158 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
3159 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 3160
d35fbf6b 3161 log_forget_fds();
2c027c62 3162 log_set_open_when_needed(true);
4f2d528d 3163
40a80078
LP
3164 /* In case anything used libc syslog(), close this here, too */
3165 closelog();
3166
5686391b
LP
3167 n_fds = n_socket_fds + n_storage_fds;
3168 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
ff0af2a1
LP
3169 if (r < 0) {
3170 *exit_status = EXIT_FDS;
12145637 3171 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
3172 }
3173
d35fbf6b
DM
3174 if (!context->same_pgrp)
3175 if (setsid() < 0) {
ff0af2a1 3176 *exit_status = EXIT_SETSID;
12145637 3177 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 3178 }
9e2f7c11 3179
1e22b5cd 3180 exec_context_tty_reset(context, params);
d35fbf6b 3181
c891efaf 3182 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 3183 const char *vc = params->confirm_spawn;
3b20f877
FB
3184 _cleanup_free_ char *cmdline = NULL;
3185
ee39ca20 3186 cmdline = exec_command_line(command->argv);
3b20f877 3187 if (!cmdline) {
0460aa5c 3188 *exit_status = EXIT_MEMORY;
12145637 3189 return log_oom();
3b20f877 3190 }
d35fbf6b 3191
eedf223a 3192 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
3193 if (r != CONFIRM_EXECUTE) {
3194 if (r == CONFIRM_PRETEND_SUCCESS) {
3195 *exit_status = EXIT_SUCCESS;
3196 return 0;
3197 }
ff0af2a1 3198 *exit_status = EXIT_CONFIRM;
12145637 3199 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 3200 return -ECANCELED;
d35fbf6b
DM
3201 }
3202 }
1a63a750 3203
d521916d
LP
3204 /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
3205 * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
3206 * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
3207 * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
3208 * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
3209 if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
3210 setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
3211 *exit_status = EXIT_MEMORY;
3212 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
3213 }
3214
29206d46 3215 if (context->dynamic_user && dcreds) {
da50b85a 3216 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 3217
d521916d
LP
3218 /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
3219 * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
409093fe
LP
3220 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
3221 *exit_status = EXIT_USER;
12145637 3222 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
3223 }
3224
da50b85a
LP
3225 r = compile_suggested_paths(context, params, &suggested_paths);
3226 if (r < 0) {
3227 *exit_status = EXIT_MEMORY;
3228 return log_oom();
3229 }
3230
3231 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
3232 if (r < 0) {
3233 *exit_status = EXIT_USER;
e2b0cc34
YW
3234 if (r == -EILSEQ) {
3235 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
3236 return -EOPNOTSUPP;
3237 }
12145637 3238 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 3239 }
524daa8c 3240
70dd455c 3241 if (!uid_is_valid(uid)) {
29206d46 3242 *exit_status = EXIT_USER;
12145637 3243 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
3244 return -ESRCH;
3245 }
3246
3247 if (!gid_is_valid(gid)) {
3248 *exit_status = EXIT_USER;
12145637 3249 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
3250 return -ESRCH;
3251 }
5bc7452b 3252
29206d46
LP
3253 if (dcreds->user)
3254 username = dcreds->user->name;
3255
3256 } else {
4d885bd3
DH
3257 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
3258 if (r < 0) {
3259 *exit_status = EXIT_USER;
12145637 3260 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 3261 }
5bc7452b 3262
4d885bd3
DH
3263 r = get_fixed_group(context, &groupname, &gid);
3264 if (r < 0) {
3265 *exit_status = EXIT_GROUP;
12145637 3266 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 3267 }
cdc5d5c5 3268 }
29206d46 3269
cdc5d5c5
DH
3270 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
3271 r = get_supplementary_groups(context, username, groupname, gid,
3272 &supplementary_gids, &ngids);
3273 if (r < 0) {
3274 *exit_status = EXIT_GROUP;
12145637 3275 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 3276 }
5bc7452b 3277
00d9ef85
LP
3278 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
3279 if (r < 0) {
3280 *exit_status = EXIT_USER;
12145637 3281 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
3282 }
3283
3284 user_lookup_fd = safe_close(user_lookup_fd);
3285
6732edab
LP
3286 r = acquire_home(context, uid, &home, &home_buffer);
3287 if (r < 0) {
3288 *exit_status = EXIT_CHDIR;
12145637 3289 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
3290 }
3291
d35fbf6b
DM
3292 /* If a socket is connected to STDIN/STDOUT/STDERR, we
3293 * must sure to drop O_NONBLOCK */
3294 if (socket_fd >= 0)
a34ceba6 3295 (void) fd_nonblock(socket_fd, false);
acbb0225 3296
4c70a4a7
MS
3297 /* Journald will try to look-up our cgroup in order to populate _SYSTEMD_CGROUP and _SYSTEMD_UNIT fields.
3298 * Hence we need to migrate to the target cgroup from init.scope before connecting to journald */
3299 if (params->cgroup_path) {
3300 _cleanup_free_ char *p = NULL;
3301
3302 r = exec_parameters_get_cgroup_path(params, &p);
3303 if (r < 0) {
3304 *exit_status = EXIT_CGROUP;
3305 return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
3306 }
3307
3308 r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
3309 if (r < 0) {
3310 *exit_status = EXIT_CGROUP;
3311 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
3312 }
3313 }
3314
a8d08f39
LP
3315 if (context->network_namespace_path && runtime && runtime->netns_storage_socket[0] >= 0) {
3316 r = open_netns_path(runtime->netns_storage_socket, context->network_namespace_path);
3317 if (r < 0) {
3318 *exit_status = EXIT_NETWORK;
3319 return log_unit_error_errno(unit, r, "Failed to open network namespace path %s: %m", context->network_namespace_path);
3320 }
3321 }
3322
52c239d7 3323 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
3324 if (r < 0) {
3325 *exit_status = EXIT_STDIN;
12145637 3326 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 3327 }
034c6ed7 3328
52c239d7 3329 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3330 if (r < 0) {
3331 *exit_status = EXIT_STDOUT;
12145637 3332 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
3333 }
3334
52c239d7 3335 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3336 if (r < 0) {
3337 *exit_status = EXIT_STDERR;
12145637 3338 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
3339 }
3340
d35fbf6b 3341 if (context->oom_score_adjust_set) {
9f8168eb
LP
3342 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
3343 * prohibit write access to this file, and we shouldn't trip up over that. */
3344 r = set_oom_score_adjust(context->oom_score_adjust);
12145637 3345 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 3346 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 3347 else if (r < 0) {
ff0af2a1 3348 *exit_status = EXIT_OOM_ADJUST;
12145637 3349 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 3350 }
d35fbf6b
DM
3351 }
3352
ad21e542
ZJS
3353 if (context->coredump_filter_set) {
3354 r = set_coredump_filter(context->coredump_filter);
3355 if (ERRNO_IS_PRIVILEGE(r))
3356 log_unit_debug_errno(unit, r, "Failed to adjust coredump_filter, ignoring: %m");
3357 else if (r < 0)
3358 return log_unit_error_errno(unit, r, "Failed to adjust coredump_filter: %m");
3359 }
3360
39090201
DJL
3361 if (context->nice_set) {
3362 r = setpriority_closest(context->nice);
3363 if (r < 0)
3364 return log_unit_error_errno(unit, r, "Failed to set up process scheduling priority (nice level): %m");
3365 }
613b411c 3366
d35fbf6b
DM
3367 if (context->cpu_sched_set) {
3368 struct sched_param param = {
3369 .sched_priority = context->cpu_sched_priority,
3370 };
3371
ff0af2a1
LP
3372 r = sched_setscheduler(0,
3373 context->cpu_sched_policy |
3374 (context->cpu_sched_reset_on_fork ?
3375 SCHED_RESET_ON_FORK : 0),
3376 &param);
3377 if (r < 0) {
3378 *exit_status = EXIT_SETSCHEDULER;
12145637 3379 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 3380 }
d35fbf6b 3381 }
fc9b2a84 3382
e2b2fb7f
MS
3383 if (context->cpu_affinity_from_numa || context->cpu_set.set) {
3384 _cleanup_(cpu_set_reset) CPUSet converted_cpu_set = {};
3385 const CPUSet *cpu_set;
3386
3387 if (context->cpu_affinity_from_numa) {
3388 r = exec_context_cpu_affinity_from_numa(context, &converted_cpu_set);
3389 if (r < 0) {
3390 *exit_status = EXIT_CPUAFFINITY;
3391 return log_unit_error_errno(unit, r, "Failed to derive CPU affinity mask from NUMA mask: %m");
3392 }
3393
3394 cpu_set = &converted_cpu_set;
3395 } else
3396 cpu_set = &context->cpu_set;
3397
3398 if (sched_setaffinity(0, cpu_set->allocated, cpu_set->set) < 0) {
ff0af2a1 3399 *exit_status = EXIT_CPUAFFINITY;
12145637 3400 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7 3401 }
e2b2fb7f 3402 }
034c6ed7 3403
b070c7c0
MS
3404 if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
3405 r = apply_numa_policy(&context->numa_policy);
3406 if (r == -EOPNOTSUPP)
33fe9e3f 3407 log_unit_debug_errno(unit, r, "NUMA support not available, ignoring.");
b070c7c0
MS
3408 else if (r < 0) {
3409 *exit_status = EXIT_NUMA_POLICY;
3410 return log_unit_error_errno(unit, r, "Failed to set NUMA memory policy: %m");
3411 }
3412 }
3413
d35fbf6b
DM
3414 if (context->ioprio_set)
3415 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 3416 *exit_status = EXIT_IOPRIO;
12145637 3417 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 3418 }
da726a4d 3419
d35fbf6b
DM
3420 if (context->timer_slack_nsec != NSEC_INFINITY)
3421 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 3422 *exit_status = EXIT_TIMERSLACK;
12145637 3423 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 3424 }
9eba9da4 3425
21022b9d
LP
3426 if (context->personality != PERSONALITY_INVALID) {
3427 r = safe_personality(context->personality);
3428 if (r < 0) {
ff0af2a1 3429 *exit_status = EXIT_PERSONALITY;
12145637 3430 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 3431 }
21022b9d 3432 }
94f04347 3433
d35fbf6b 3434 if (context->utmp_id)
df0ff127 3435 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 3436 context->tty_path,
023a4f67
LP
3437 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3438 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3439 USER_PROCESS,
6a93917d 3440 username);
d35fbf6b 3441
08f67696 3442 if (uid_is_valid(uid)) {
ff0af2a1
LP
3443 r = chown_terminal(STDIN_FILENO, uid);
3444 if (r < 0) {
3445 *exit_status = EXIT_STDIN;
12145637 3446 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 3447 }
d35fbf6b 3448 }
8e274523 3449
4e1dfa45 3450 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
62b9bb26 3451 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
4e1dfa45 3452 * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
62b9bb26 3453 * touch a single hierarchy too. */
584b8688 3454 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
62b9bb26 3455 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
ff0af2a1
LP
3456 if (r < 0) {
3457 *exit_status = EXIT_CGROUP;
12145637 3458 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 3459 }
d35fbf6b 3460 }
034c6ed7 3461
72fd1768 3462 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 3463 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
3464 if (r < 0)
3465 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 3466 }
94f04347 3467
7bce046b 3468 r = build_environment(
fd63e712 3469 unit,
7bce046b
LP
3470 context,
3471 params,
3472 n_fds,
3473 home,
3474 username,
3475 shell,
3476 journal_stream_dev,
3477 journal_stream_ino,
3478 &our_env);
2065ca69
JW
3479 if (r < 0) {
3480 *exit_status = EXIT_MEMORY;
12145637 3481 return log_oom();
2065ca69
JW
3482 }
3483
3484 r = build_pass_environment(context, &pass_env);
3485 if (r < 0) {
3486 *exit_status = EXIT_MEMORY;
12145637 3487 return log_oom();
2065ca69
JW
3488 }
3489
3490 accum_env = strv_env_merge(5,
3491 params->environment,
3492 our_env,
3493 pass_env,
3494 context->environment,
44e5d006 3495 files_env);
2065ca69
JW
3496 if (!accum_env) {
3497 *exit_status = EXIT_MEMORY;
12145637 3498 return log_oom();
2065ca69 3499 }
1280503b 3500 accum_env = strv_env_clean(accum_env);
2065ca69 3501
096424d1 3502 (void) umask(context->umask);
b213e1c1 3503
b1edf445 3504 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
3505 if (r < 0) {
3506 *exit_status = EXIT_KEYRING;
12145637 3507 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
3508 }
3509
165a31c0 3510 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 3511 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 3512
165a31c0
LP
3513 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3514 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 3515
165a31c0
LP
3516 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3517 if (needs_ambient_hack)
3518 needs_setuid = false;
3519 else
3520 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3521
3522 if (needs_sandboxing) {
7f18ef0a
FK
3523 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3524 * present. The actual MAC context application will happen later, as late as possible, to avoid
3525 * impacting our own code paths. */
3526
349cc4a5 3527#if HAVE_SELINUX
43b1f709 3528 use_selinux = mac_selinux_use();
7f18ef0a 3529#endif
f9fa32f0 3530#if ENABLE_SMACK
43b1f709 3531 use_smack = mac_smack_use();
7f18ef0a 3532#endif
349cc4a5 3533#if HAVE_APPARMOR
43b1f709 3534 use_apparmor = mac_apparmor_use();
7f18ef0a 3535#endif
165a31c0 3536 }
7f18ef0a 3537
ce932d2d
LP
3538 if (needs_sandboxing) {
3539 int which_failed;
3540
3541 /* Let's set the resource limits before we call into PAM, so that pam_limits wins over what
3542 * is set here. (See below.) */
3543
3544 r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
3545 if (r < 0) {
3546 *exit_status = EXIT_LIMITS;
3547 return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
3548 }
3549 }
3550
165a31c0 3551 if (needs_setuid) {
ce932d2d
LP
3552
3553 /* Let's call into PAM after we set up our own idea of resource limits to that pam_limits
3554 * wins here. (See above.) */
3555
165a31c0
LP
3556 if (context->pam_name && username) {
3557 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3558 if (r < 0) {
3559 *exit_status = EXIT_PAM;
12145637 3560 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0 3561 }
afb11bf1
DG
3562
3563 ngids_after_pam = getgroups_alloc(&gids_after_pam);
3564 if (ngids_after_pam < 0) {
3565 *exit_status = EXIT_MEMORY;
3566 return log_unit_error_errno(unit, ngids_after_pam, "Failed to obtain groups after setting up PAM: %m");
3567 }
165a31c0 3568 }
b213e1c1 3569 }
ac45f971 3570
5749f855
AZ
3571 if (needs_sandboxing) {
3572#if HAVE_SELINUX
3573 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
3574 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3575 if (r < 0) {
3576 *exit_status = EXIT_SELINUX_CONTEXT;
3577 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
3578 }
3579 }
3580#endif
3581
3582 /* If we're unprivileged, set up the user namespace first to enable use of the other namespaces.
3583 * Users with CAP_SYS_ADMIN can set up user namespaces last because they will be able to
3584 * set up the all of the other namespaces (i.e. network, mount, UTS) without a user namespace. */
3585 if (context->private_users && !have_effective_cap(CAP_SYS_ADMIN)) {
3586 userns_set_up = true;
3587 r = setup_private_users(saved_uid, saved_gid, uid, gid);
3588 if (r < 0) {
3589 *exit_status = EXIT_USER;
3590 return log_unit_error_errno(unit, r, "Failed to set up user namespacing for unprivileged user: %m");
3591 }
3592 }
3593 }
3594
a8d08f39
LP
3595 if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
3596
6e2d7c4f
MS
3597 if (ns_type_supported(NAMESPACE_NET)) {
3598 r = setup_netns(runtime->netns_storage_socket);
ee00d1e9
ZJS
3599 if (r == -EPERM)
3600 log_unit_warning_errno(unit, r,
3601 "PrivateNetwork=yes is configured, but network namespace setup failed, ignoring: %m");
3602 else if (r < 0) {
6e2d7c4f
MS
3603 *exit_status = EXIT_NETWORK;
3604 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3605 }
a8d08f39
LP
3606 } else if (context->network_namespace_path) {
3607 *exit_status = EXIT_NETWORK;
ee00d1e9
ZJS
3608 return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP),
3609 "NetworkNamespacePath= is not supported, refusing.");
6e2d7c4f
MS
3610 } else
3611 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3612 }
169c1bda 3613
ee818b89 3614 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3615 if (needs_mount_namespace) {
7cc5ef5f
ZJS
3616 _cleanup_free_ char *error_path = NULL;
3617
3618 r = apply_mount_namespace(unit, command, context, params, runtime, &error_path);
3fbe8dbe
LP
3619 if (r < 0) {
3620 *exit_status = EXIT_NAMESPACE;
7cc5ef5f
ZJS
3621 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m",
3622 error_path ? ": " : "", strempty(error_path));
3fbe8dbe 3623 }
d35fbf6b 3624 }
81a2b7ce 3625
daf8f72b
LP
3626 if (needs_sandboxing) {
3627 r = apply_protect_hostname(unit, context, exit_status);
3628 if (r < 0)
3629 return r;
aecd5ac6
TM
3630 }
3631
5749f855
AZ
3632 /* Drop groups as early as possible.
3633 * This needs to be done after PrivateDevices=y setup as device nodes should be owned by the host's root.
3634 * For non-root in a userns, devices will be owned by the user/group before the group change, and nobody. */
165a31c0 3635 if (needs_setuid) {
afb11bf1
DG
3636 _cleanup_free_ gid_t *gids_to_enforce = NULL;
3637 int ngids_to_enforce = 0;
3638
3639 ngids_to_enforce = merge_gid_lists(supplementary_gids,
3640 ngids,
3641 gids_after_pam,
3642 ngids_after_pam,
3643 &gids_to_enforce);
3644 if (ngids_to_enforce < 0) {
3645 *exit_status = EXIT_MEMORY;
3646 return log_unit_error_errno(unit,
3647 ngids_to_enforce,
3648 "Failed to merge group lists. Group membership might be incorrect: %m");
3649 }
3650
3651 r = enforce_groups(gid, gids_to_enforce, ngids_to_enforce);
096424d1
LP
3652 if (r < 0) {
3653 *exit_status = EXIT_GROUP;
12145637 3654 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3655 }
165a31c0 3656 }
096424d1 3657
5749f855
AZ
3658 /* If the user namespace was not set up above, try to do it now.
3659 * It's preferred to set up the user namespace later (after all other namespaces) so as not to be
3660 * restricted by rules pertaining to combining user namspaces with other namespaces (e.g. in the
3661 * case of mount namespaces being less privileged when the mount point list is copied from a
3662 * different user namespace). */
9008e1ac 3663
5749f855
AZ
3664 if (needs_sandboxing && context->private_users && !userns_set_up) {
3665 r = setup_private_users(saved_uid, saved_gid, uid, gid);
3666 if (r < 0) {
3667 *exit_status = EXIT_USER;
3668 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
d251207d
LP
3669 }
3670 }
3671
165a31c0 3672 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
5686391b
LP
3673 * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
3674 * however if we have it as we want to keep it open until the final execve(). */
3675
3676 if (params->exec_fd >= 0) {
3677 exec_fd = params->exec_fd;
3678
3679 if (exec_fd < 3 + (int) n_fds) {
3680 int moved_fd;
3681
3682 /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
3683 * process we are about to execute. */
3684
3685 moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
3686 if (moved_fd < 0) {
3687 *exit_status = EXIT_FDS;
3688 return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
3689 }
3690
3691 safe_close(exec_fd);
3692 exec_fd = moved_fd;
3693 } else {
3694 /* This fd should be FD_CLOEXEC already, but let's make sure. */
3695 r = fd_cloexec(exec_fd, true);
3696 if (r < 0) {
3697 *exit_status = EXIT_FDS;
3698 return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
3699 }
3700 }
3701
3702 fds_with_exec_fd = newa(int, n_fds + 1);
7e8d494b 3703 memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
5686391b
LP
3704 fds_with_exec_fd[n_fds] = exec_fd;
3705 n_fds_with_exec_fd = n_fds + 1;
3706 } else {
3707 fds_with_exec_fd = fds;
3708 n_fds_with_exec_fd = n_fds;
3709 }
3710
3711 r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
ff0af2a1
LP
3712 if (r >= 0)
3713 r = shift_fds(fds, n_fds);
3714 if (r >= 0)
25b583d7 3715 r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
ff0af2a1
LP
3716 if (r < 0) {
3717 *exit_status = EXIT_FDS;
12145637 3718 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3719 }
e66cf1a3 3720
5686391b
LP
3721 /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
3722 * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
3723 * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
3724 * came this far. */
3725
165a31c0 3726 secure_bits = context->secure_bits;
e66cf1a3 3727
165a31c0
LP
3728 if (needs_sandboxing) {
3729 uint64_t bset;
e66cf1a3 3730
ce932d2d
LP
3731 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly
3732 * requested. (Note this is placed after the general resource limit initialization, see
3733 * above, in order to take precedence.) */
f4170c67
LP
3734 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3735 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3736 *exit_status = EXIT_LIMITS;
12145637 3737 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3738 }
3739 }
3740
37ac2744
JB
3741#if ENABLE_SMACK
3742 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3743 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3744 if (use_smack) {
3745 r = setup_smack(context, command);
3746 if (r < 0) {
3747 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3748 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3749 }
3750 }
3751#endif
3752
165a31c0
LP
3753 bset = context->capability_bounding_set;
3754 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3755 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3756 * instead of us doing that */
3757 if (needs_ambient_hack)
3758 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3759 (UINT64_C(1) << CAP_SETUID) |
3760 (UINT64_C(1) << CAP_SETGID);
3761
3762 if (!cap_test_all(bset)) {
3763 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3764 if (r < 0) {
3765 *exit_status = EXIT_CAPABILITIES;
12145637 3766 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3767 }
4c2630eb 3768 }
3b8bddde 3769
755d4b67
IP
3770 /* This is done before enforce_user, but ambient set
3771 * does not survive over setresuid() if keep_caps is not set. */
943800f4 3772 if (!needs_ambient_hack) {
755d4b67
IP
3773 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3774 if (r < 0) {
3775 *exit_status = EXIT_CAPABILITIES;
12145637 3776 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3777 }
755d4b67 3778 }
165a31c0 3779 }
755d4b67 3780
fa97f630
JB
3781 /* chroot to root directory first, before we lose the ability to chroot */
3782 r = apply_root_directory(context, params, needs_mount_namespace, exit_status);
3783 if (r < 0)
3784 return log_unit_error_errno(unit, r, "Chrooting to the requested root directory failed: %m");
3785
165a31c0 3786 if (needs_setuid) {
08f67696 3787 if (uid_is_valid(uid)) {
ff0af2a1
LP
3788 r = enforce_user(context, uid);
3789 if (r < 0) {
3790 *exit_status = EXIT_USER;
12145637 3791 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3792 }
165a31c0
LP
3793
3794 if (!needs_ambient_hack &&
3795 context->capability_ambient_set != 0) {
755d4b67
IP
3796
3797 /* Fix the ambient capabilities after user change. */
3798 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3799 if (r < 0) {
3800 *exit_status = EXIT_CAPABILITIES;
12145637 3801 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3802 }
3803
3804 /* If we were asked to change user and ambient capabilities
3805 * were requested, we had to add keep-caps to the securebits
3806 * so that we would maintain the inherited capability set
3807 * through the setresuid(). Make sure that the bit is added
3808 * also to the context secure_bits so that we don't try to
3809 * drop the bit away next. */
3810
7f508f2c 3811 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3812 }
5b6319dc 3813 }
165a31c0 3814 }
d35fbf6b 3815
56ef8db9
JB
3816 /* Apply working directory here, because the working directory might be on NFS and only the user running
3817 * this service might have the correct privilege to change to the working directory */
fa97f630 3818 r = apply_working_directory(context, params, home, exit_status);
56ef8db9
JB
3819 if (r < 0)
3820 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
3821
165a31c0 3822 if (needs_sandboxing) {
37ac2744 3823 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
5cd9cd35
LP
3824 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3825 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3826 * are restricted. */
3827
349cc4a5 3828#if HAVE_SELINUX
43b1f709 3829 if (use_selinux) {
5cd9cd35
LP
3830 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3831
3832 if (exec_context) {
3833 r = setexeccon(exec_context);
3834 if (r < 0) {
3835 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3836 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3837 }
3838 }
3839 }
3840#endif
3841
349cc4a5 3842#if HAVE_APPARMOR
43b1f709 3843 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3844 r = aa_change_onexec(context->apparmor_profile);
3845 if (r < 0 && !context->apparmor_profile_ignore) {
3846 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3847 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3848 }
3849 }
3850#endif
3851
165a31c0
LP
3852 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3853 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3854 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3855 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3856 *exit_status = EXIT_SECUREBITS;
12145637 3857 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3858 }
5b6319dc 3859
59eeb84b 3860 if (context_has_no_new_privileges(context))
d35fbf6b 3861 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3862 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3863 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3864 }
3865
349cc4a5 3866#if HAVE_SECCOMP
469830d1
LP
3867 r = apply_address_families(unit, context);
3868 if (r < 0) {
3869 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3870 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3871 }
04aa0cb9 3872
469830d1
LP
3873 r = apply_memory_deny_write_execute(unit, context);
3874 if (r < 0) {
3875 *exit_status = EXIT_SECCOMP;
12145637 3876 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3877 }
f4170c67 3878
469830d1
LP
3879 r = apply_restrict_realtime(unit, context);
3880 if (r < 0) {
3881 *exit_status = EXIT_SECCOMP;
12145637 3882 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3883 }
3884
f69567cb
LP
3885 r = apply_restrict_suid_sgid(unit, context);
3886 if (r < 0) {
3887 *exit_status = EXIT_SECCOMP;
3888 return log_unit_error_errno(unit, r, "Failed to apply SUID/SGID restrictions: %m");
3889 }
3890
add00535
LP
3891 r = apply_restrict_namespaces(unit, context);
3892 if (r < 0) {
3893 *exit_status = EXIT_SECCOMP;
12145637 3894 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3895 }
3896
469830d1
LP
3897 r = apply_protect_sysctl(unit, context);
3898 if (r < 0) {
3899 *exit_status = EXIT_SECCOMP;
12145637 3900 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3901 }
3902
469830d1
LP
3903 r = apply_protect_kernel_modules(unit, context);
3904 if (r < 0) {
3905 *exit_status = EXIT_SECCOMP;
12145637 3906 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3907 }
3908
84703040
KK
3909 r = apply_protect_kernel_logs(unit, context);
3910 if (r < 0) {
3911 *exit_status = EXIT_SECCOMP;
3912 return log_unit_error_errno(unit, r, "Failed to apply kernel log restrictions: %m");
3913 }
3914
fc64760d
KK
3915 r = apply_protect_clock(unit, context);
3916 if (r < 0) {
3917 *exit_status = EXIT_SECCOMP;
3918 return log_unit_error_errno(unit, r, "Failed to apply clock restrictions: %m");
3919 }
3920
469830d1
LP
3921 r = apply_private_devices(unit, context);
3922 if (r < 0) {
3923 *exit_status = EXIT_SECCOMP;
12145637 3924 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3925 }
3926
3927 r = apply_syscall_archs(unit, context);
3928 if (r < 0) {
3929 *exit_status = EXIT_SECCOMP;
12145637 3930 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3931 }
3932
78e864e5
TM
3933 r = apply_lock_personality(unit, context);
3934 if (r < 0) {
3935 *exit_status = EXIT_SECCOMP;
12145637 3936 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3937 }
3938
5cd9cd35
LP
3939 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3940 * by the filter as little as possible. */
165a31c0 3941 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3942 if (r < 0) {
3943 *exit_status = EXIT_SECCOMP;
12145637 3944 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3945 }
3946#endif
d35fbf6b 3947 }
034c6ed7 3948
00819cc1
LP
3949 if (!strv_isempty(context->unset_environment)) {
3950 char **ee = NULL;
3951
3952 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3953 if (!ee) {
3954 *exit_status = EXIT_MEMORY;
12145637 3955 return log_oom();
00819cc1
LP
3956 }
3957
130d3d22 3958 strv_free_and_replace(accum_env, ee);
00819cc1
LP
3959 }
3960
7ca69792
AZ
3961 if (!FLAGS_SET(command->flags, EXEC_COMMAND_NO_ENV_EXPAND)) {
3962 replaced_argv = replace_env_argv(command->argv, accum_env);
3963 if (!replaced_argv) {
3964 *exit_status = EXIT_MEMORY;
3965 return log_oom();
3966 }
3967 final_argv = replaced_argv;
3968 } else
3969 final_argv = command->argv;
034c6ed7 3970
f1d34068 3971 if (DEBUG_LOGGING) {
d35fbf6b 3972 _cleanup_free_ char *line;
81a2b7ce 3973
d35fbf6b 3974 line = exec_command_line(final_argv);
a1230ff9 3975 if (line)
f2341e0a 3976 log_struct(LOG_DEBUG,
f2341e0a
LP
3977 "EXECUTABLE=%s", command->path,
3978 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3979 LOG_UNIT_ID(unit),
a1230ff9 3980 LOG_UNIT_INVOCATION_ID(unit));
d35fbf6b 3981 }
dd305ec9 3982
5686391b
LP
3983 if (exec_fd >= 0) {
3984 uint8_t hot = 1;
3985
3986 /* We have finished with all our initializations. Let's now let the manager know that. From this point
3987 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
3988
3989 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3990 *exit_status = EXIT_EXEC;
3991 return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
3992 }
3993 }
3994
2065ca69 3995 execve(command->path, final_argv, accum_env);
5686391b
LP
3996 r = -errno;
3997
3998 if (exec_fd >= 0) {
3999 uint8_t hot = 0;
4000
4001 /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
4002 * that POLLHUP on it no longer means execve() succeeded. */
4003
4004 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
4005 *exit_status = EXIT_EXEC;
4006 return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
4007 }
4008 }
12145637 4009
5686391b
LP
4010 if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
4011 log_struct_errno(LOG_INFO, r,
12145637
LP
4012 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
4013 LOG_UNIT_ID(unit),
4014 LOG_UNIT_INVOCATION_ID(unit),
4015 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
4016 command->path),
a1230ff9 4017 "EXECUTABLE=%s", command->path);
12145637
LP
4018 return 0;
4019 }
4020
ff0af2a1 4021 *exit_status = EXIT_EXEC;
5686391b 4022 return log_unit_error_errno(unit, r, "Failed to execute command: %m");
d35fbf6b 4023}
81a2b7ce 4024
34cf6c43 4025static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
2caa38e9 4026static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[static 3]);
34cf6c43 4027
f2341e0a
LP
4028int exec_spawn(Unit *unit,
4029 ExecCommand *command,
d35fbf6b
DM
4030 const ExecContext *context,
4031 const ExecParameters *params,
4032 ExecRuntime *runtime,
29206d46 4033 DynamicCreds *dcreds,
d35fbf6b 4034 pid_t *ret) {
8351ceae 4035
ee39ca20 4036 int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
78f93209 4037 _cleanup_free_ char *subcgroup_path = NULL;
d35fbf6b 4038 _cleanup_strv_free_ char **files_env = NULL;
da6053d0 4039 size_t n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1 4040 _cleanup_free_ char *line = NULL;
d35fbf6b 4041 pid_t pid;
8351ceae 4042
f2341e0a 4043 assert(unit);
d35fbf6b
DM
4044 assert(command);
4045 assert(context);
4046 assert(ret);
4047 assert(params);
25b583d7 4048 assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
4298d0b5 4049
d35fbf6b
DM
4050 if (context->std_input == EXEC_INPUT_SOCKET ||
4051 context->std_output == EXEC_OUTPUT_SOCKET ||
4052 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 4053
4c47affc 4054 if (params->n_socket_fds > 1) {
f2341e0a 4055 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 4056 return -EINVAL;
ff0af2a1 4057 }
eef65bf3 4058
4c47affc 4059 if (params->n_socket_fds == 0) {
488ab41c
AA
4060 log_unit_error(unit, "Got no socket.");
4061 return -EINVAL;
4062 }
4063
d35fbf6b
DM
4064 socket_fd = params->fds[0];
4065 } else {
4066 socket_fd = -1;
4067 fds = params->fds;
9b141911 4068 n_socket_fds = params->n_socket_fds;
25b583d7 4069 n_storage_fds = params->n_storage_fds;
d35fbf6b 4070 }
94f04347 4071
34cf6c43 4072 r = exec_context_named_iofds(context, params, named_iofds);
52c239d7
LB
4073 if (r < 0)
4074 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
4075
f2341e0a 4076 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 4077 if (r < 0)
f2341e0a 4078 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 4079
ee39ca20 4080 line = exec_command_line(command->argv);
d35fbf6b
DM
4081 if (!line)
4082 return log_oom();
fab56fc5 4083
f2341e0a 4084 log_struct(LOG_DEBUG,
f2341e0a
LP
4085 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
4086 "EXECUTABLE=%s", command->path,
ba360bb0 4087 LOG_UNIT_ID(unit),
a1230ff9 4088 LOG_UNIT_INVOCATION_ID(unit));
12145637 4089
78f93209
LP
4090 if (params->cgroup_path) {
4091 r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
4092 if (r < 0)
4093 return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
4094 if (r > 0) { /* We are using a child cgroup */
4095 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
4096 if (r < 0)
4097 return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
4098 }
4099 }
4100
d35fbf6b
DM
4101 pid = fork();
4102 if (pid < 0)
74129a12 4103 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
4104
4105 if (pid == 0) {
12145637 4106 int exit_status = EXIT_SUCCESS;
ff0af2a1 4107
f2341e0a
LP
4108 r = exec_child(unit,
4109 command,
ff0af2a1
LP
4110 context,
4111 params,
4112 runtime,
29206d46 4113 dcreds,
ff0af2a1 4114 socket_fd,
52c239d7 4115 named_iofds,
4c47affc 4116 fds,
9b141911 4117 n_socket_fds,
25b583d7 4118 n_storage_fds,
ff0af2a1 4119 files_env,
00d9ef85 4120 unit->manager->user_lookup_fds[1],
12145637
LP
4121 &exit_status);
4122
e1714f02
ZJS
4123 if (r < 0) {
4124 const char *status =
4125 exit_status_to_string(exit_status,
e04ed6db 4126 EXIT_STATUS_LIBC | EXIT_STATUS_SYSTEMD);
e1714f02 4127
12145637
LP
4128 log_struct_errno(LOG_ERR, r,
4129 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
4130 LOG_UNIT_ID(unit),
4131 LOG_UNIT_INVOCATION_ID(unit),
4132 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
e1714f02 4133 status, command->path),
a1230ff9 4134 "EXECUTABLE=%s", command->path);
e1714f02 4135 }
4c2630eb 4136
ff0af2a1 4137 _exit(exit_status);
034c6ed7
LP
4138 }
4139
f2341e0a 4140 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 4141
78f93209
LP
4142 /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
4143 * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
4144 * process will be killed too). */
4145 if (subcgroup_path)
4146 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
2da3263a 4147
b58b4116 4148 exec_status_start(&command->exec_status, pid);
9fb86720 4149
034c6ed7 4150 *ret = pid;
5cb5a6ff
LP
4151 return 0;
4152}
4153
034c6ed7 4154void exec_context_init(ExecContext *c) {
3536f49e
YW
4155 ExecDirectoryType i;
4156
034c6ed7
LP
4157 assert(c);
4158
4c12626c 4159 c->umask = 0022;
9eba9da4 4160 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 4161 c->cpu_sched_policy = SCHED_OTHER;
071830ff 4162 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 4163 c->syslog_level_prefix = true;
353e12c2 4164 c->ignore_sigpipe = true;
3a43da28 4165 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 4166 c->personality = PERSONALITY_INVALID;
72fd1768 4167 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 4168 c->directories[i].mode = 0755;
12213aed 4169 c->timeout_clean_usec = USEC_INFINITY;
a103496c 4170 c->capability_bounding_set = CAP_ALL;
aa9d574d
YW
4171 assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
4172 c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
d3070fbd 4173 c->log_level_max = -1;
b070c7c0 4174 numa_policy_reset(&c->numa_policy);
034c6ed7
LP
4175}
4176
613b411c 4177void exec_context_done(ExecContext *c) {
3536f49e 4178 ExecDirectoryType i;
d3070fbd 4179 size_t l;
5cb5a6ff
LP
4180
4181 assert(c);
4182
6796073e
LP
4183 c->environment = strv_free(c->environment);
4184 c->environment_files = strv_free(c->environment_files);
b4c14404 4185 c->pass_environment = strv_free(c->pass_environment);
00819cc1 4186 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 4187
31ce987c 4188 rlimit_free_all(c->rlimit);
034c6ed7 4189
2038c3f5 4190 for (l = 0; l < 3; l++) {
52c239d7 4191 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
2038c3f5
LP
4192 c->stdio_file[l] = mfree(c->stdio_file[l]);
4193 }
52c239d7 4194
a1e58e8e
LP
4195 c->working_directory = mfree(c->working_directory);
4196 c->root_directory = mfree(c->root_directory);
915e6d16 4197 c->root_image = mfree(c->root_image);
a1e58e8e
LP
4198 c->tty_path = mfree(c->tty_path);
4199 c->syslog_identifier = mfree(c->syslog_identifier);
4200 c->user = mfree(c->user);
4201 c->group = mfree(c->group);
034c6ed7 4202
6796073e 4203 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 4204
a1e58e8e 4205 c->pam_name = mfree(c->pam_name);
5b6319dc 4206
2a624c36
AP
4207 c->read_only_paths = strv_free(c->read_only_paths);
4208 c->read_write_paths = strv_free(c->read_write_paths);
4209 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 4210
d2d6c096 4211 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
8e06d57c
YW
4212 c->bind_mounts = NULL;
4213 c->n_bind_mounts = 0;
2abd4e38
YW
4214 temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
4215 c->temporary_filesystems = NULL;
4216 c->n_temporary_filesystems = 0;
d2d6c096 4217
0985c7c4 4218 cpu_set_reset(&c->cpu_set);
b070c7c0 4219 numa_policy_reset(&c->numa_policy);
86a3475b 4220
a1e58e8e
LP
4221 c->utmp_id = mfree(c->utmp_id);
4222 c->selinux_context = mfree(c->selinux_context);
4223 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 4224 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 4225
8cfa775f 4226 c->syscall_filter = hashmap_free(c->syscall_filter);
525d3cc7
LP
4227 c->syscall_archs = set_free(c->syscall_archs);
4228 c->address_families = set_free(c->address_families);
e66cf1a3 4229
72fd1768 4230 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 4231 c->directories[i].paths = strv_free(c->directories[i].paths);
d3070fbd
LP
4232
4233 c->log_level_max = -1;
4234
4235 exec_context_free_log_extra_fields(c);
08f3be7a 4236
5ac1530e
ZJS
4237 c->log_ratelimit_interval_usec = 0;
4238 c->log_ratelimit_burst = 0;
90fc172e 4239
08f3be7a
LP
4240 c->stdin_data = mfree(c->stdin_data);
4241 c->stdin_data_size = 0;
a8d08f39
LP
4242
4243 c->network_namespace_path = mfree(c->network_namespace_path);
91dd5f7c
LP
4244
4245 c->log_namespace = mfree(c->log_namespace);
e66cf1a3
LP
4246}
4247
34cf6c43 4248int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
e66cf1a3
LP
4249 char **i;
4250
4251 assert(c);
4252
4253 if (!runtime_prefix)
4254 return 0;
4255
3536f49e 4256 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
4257 _cleanup_free_ char *p;
4258
494d0247
YW
4259 if (exec_directory_is_private(c, EXEC_DIRECTORY_RUNTIME))
4260 p = path_join(runtime_prefix, "private", *i);
4261 else
4262 p = path_join(runtime_prefix, *i);
e66cf1a3
LP
4263 if (!p)
4264 return -ENOMEM;
4265
7bc4bf4a
LP
4266 /* We execute this synchronously, since we need to be sure this is gone when we start the
4267 * service next. */
c6878637 4268 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
4269 }
4270
4271 return 0;
5cb5a6ff
LP
4272}
4273
34cf6c43 4274static void exec_command_done(ExecCommand *c) {
43d0fcbd
LP
4275 assert(c);
4276
a1e58e8e 4277 c->path = mfree(c->path);
6796073e 4278 c->argv = strv_free(c->argv);
43d0fcbd
LP
4279}
4280
da6053d0
LP
4281void exec_command_done_array(ExecCommand *c, size_t n) {
4282 size_t i;
43d0fcbd
LP
4283
4284 for (i = 0; i < n; i++)
4285 exec_command_done(c+i);
4286}
4287
f1acf85a 4288ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
4289 ExecCommand *i;
4290
4291 while ((i = c)) {
71fda00f 4292 LIST_REMOVE(command, c, i);
43d0fcbd 4293 exec_command_done(i);
5cb5a6ff
LP
4294 free(i);
4295 }
f1acf85a
ZJS
4296
4297 return NULL;
5cb5a6ff
LP
4298}
4299
da6053d0
LP
4300void exec_command_free_array(ExecCommand **c, size_t n) {
4301 size_t i;
034c6ed7 4302
f1acf85a
ZJS
4303 for (i = 0; i < n; i++)
4304 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
4305}
4306
6a1d4d9f
LP
4307void exec_command_reset_status_array(ExecCommand *c, size_t n) {
4308 size_t i;
4309
4310 for (i = 0; i < n; i++)
4311 exec_status_reset(&c[i].exec_status);
4312}
4313
4314void exec_command_reset_status_list_array(ExecCommand **c, size_t n) {
4315 size_t i;
4316
4317 for (i = 0; i < n; i++) {
4318 ExecCommand *z;
4319
4320 LIST_FOREACH(command, z, c[i])
4321 exec_status_reset(&z->exec_status);
4322 }
4323}
4324
039f0e70 4325typedef struct InvalidEnvInfo {
34cf6c43 4326 const Unit *unit;
039f0e70
LP
4327 const char *path;
4328} InvalidEnvInfo;
4329
4330static void invalid_env(const char *p, void *userdata) {
4331 InvalidEnvInfo *info = userdata;
4332
f2341e0a 4333 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
4334}
4335
52c239d7
LB
4336const char* exec_context_fdname(const ExecContext *c, int fd_index) {
4337 assert(c);
4338
4339 switch (fd_index) {
5073ff6b 4340
52c239d7
LB
4341 case STDIN_FILENO:
4342 if (c->std_input != EXEC_INPUT_NAMED_FD)
4343 return NULL;
5073ff6b 4344
52c239d7 4345 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
5073ff6b 4346
52c239d7
LB
4347 case STDOUT_FILENO:
4348 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
4349 return NULL;
5073ff6b 4350
52c239d7 4351 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
5073ff6b 4352
52c239d7
LB
4353 case STDERR_FILENO:
4354 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
4355 return NULL;
5073ff6b 4356
52c239d7 4357 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
5073ff6b 4358
52c239d7
LB
4359 default:
4360 return NULL;
4361 }
4362}
4363
2caa38e9
LP
4364static int exec_context_named_iofds(
4365 const ExecContext *c,
4366 const ExecParameters *p,
4367 int named_iofds[static 3]) {
4368
da6053d0 4369 size_t i, targets;
56fbd561 4370 const char* stdio_fdname[3];
da6053d0 4371 size_t n_fds;
52c239d7
LB
4372
4373 assert(c);
4374 assert(p);
2caa38e9 4375 assert(named_iofds);
52c239d7
LB
4376
4377 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
4378 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
4379 (c->std_error == EXEC_OUTPUT_NAMED_FD);
4380
4381 for (i = 0; i < 3; i++)
4382 stdio_fdname[i] = exec_context_fdname(c, i);
4383
4c47affc
FB
4384 n_fds = p->n_storage_fds + p->n_socket_fds;
4385
4386 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
4387 if (named_iofds[STDIN_FILENO] < 0 &&
4388 c->std_input == EXEC_INPUT_NAMED_FD &&
4389 stdio_fdname[STDIN_FILENO] &&
4390 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
4391
52c239d7
LB
4392 named_iofds[STDIN_FILENO] = p->fds[i];
4393 targets--;
56fbd561
ZJS
4394
4395 } else if (named_iofds[STDOUT_FILENO] < 0 &&
4396 c->std_output == EXEC_OUTPUT_NAMED_FD &&
4397 stdio_fdname[STDOUT_FILENO] &&
4398 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
4399
52c239d7
LB
4400 named_iofds[STDOUT_FILENO] = p->fds[i];
4401 targets--;
56fbd561
ZJS
4402
4403 } else if (named_iofds[STDERR_FILENO] < 0 &&
4404 c->std_error == EXEC_OUTPUT_NAMED_FD &&
4405 stdio_fdname[STDERR_FILENO] &&
4406 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
4407
52c239d7
LB
4408 named_iofds[STDERR_FILENO] = p->fds[i];
4409 targets--;
4410 }
4411
56fbd561 4412 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
4413}
4414
34cf6c43 4415static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
4416 char **i, **r = NULL;
4417
4418 assert(c);
4419 assert(l);
4420
4421 STRV_FOREACH(i, c->environment_files) {
4422 char *fn;
52511fae
ZJS
4423 int k;
4424 unsigned n;
8c7be95e
LP
4425 bool ignore = false;
4426 char **p;
7fd1b19b 4427 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
4428
4429 fn = *i;
4430
4431 if (fn[0] == '-') {
4432 ignore = true;
313cefa1 4433 fn++;
8c7be95e
LP
4434 }
4435
4436 if (!path_is_absolute(fn)) {
8c7be95e
LP
4437 if (ignore)
4438 continue;
4439
4440 strv_free(r);
4441 return -EINVAL;
4442 }
4443
2bef10ab 4444 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
4445 k = safe_glob(fn, 0, &pglob);
4446 if (k < 0) {
2bef10ab
PL
4447 if (ignore)
4448 continue;
8c7be95e 4449
2bef10ab 4450 strv_free(r);
d8c92e8b 4451 return k;
2bef10ab 4452 }
8c7be95e 4453
d8c92e8b
ZJS
4454 /* When we don't match anything, -ENOENT should be returned */
4455 assert(pglob.gl_pathc > 0);
4456
4457 for (n = 0; n < pglob.gl_pathc; n++) {
aa8fbc74 4458 k = load_env_file(NULL, pglob.gl_pathv[n], &p);
2bef10ab
PL
4459 if (k < 0) {
4460 if (ignore)
4461 continue;
8c7be95e 4462
2bef10ab 4463 strv_free(r);
2bef10ab 4464 return k;
e9c1ea9d 4465 }
ebc05a09 4466 /* Log invalid environment variables with filename */
039f0e70
LP
4467 if (p) {
4468 InvalidEnvInfo info = {
f2341e0a 4469 .unit = unit,
039f0e70
LP
4470 .path = pglob.gl_pathv[n]
4471 };
4472
4473 p = strv_env_clean_with_callback(p, invalid_env, &info);
4474 }
8c7be95e 4475
234519ae 4476 if (!r)
2bef10ab
PL
4477 r = p;
4478 else {
4479 char **m;
8c7be95e 4480
2bef10ab
PL
4481 m = strv_env_merge(2, r, p);
4482 strv_free(r);
4483 strv_free(p);
c84a9488 4484 if (!m)
2bef10ab 4485 return -ENOMEM;
2bef10ab
PL
4486
4487 r = m;
4488 }
8c7be95e
LP
4489 }
4490 }
4491
4492 *l = r;
4493
4494 return 0;
4495}
4496
6ac8fdc9 4497static bool tty_may_match_dev_console(const char *tty) {
7b912648 4498 _cleanup_free_ char *resolved = NULL;
6ac8fdc9 4499
1e22b5cd
LP
4500 if (!tty)
4501 return true;
4502
a119ec7c 4503 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
4504
4505 /* trivial identity? */
4506 if (streq(tty, "console"))
4507 return true;
4508
7b912648
LP
4509 if (resolve_dev_console(&resolved) < 0)
4510 return true; /* if we could not resolve, assume it may */
6ac8fdc9
MS
4511
4512 /* "tty0" means the active VC, so it may be the same sometimes */
955f1c85 4513 return path_equal(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
4514}
4515
6c0ae739
LP
4516static bool exec_context_may_touch_tty(const ExecContext *ec) {
4517 assert(ec);
1e22b5cd 4518
6c0ae739 4519 return ec->tty_reset ||
1e22b5cd
LP
4520 ec->tty_vhangup ||
4521 ec->tty_vt_disallocate ||
6ac8fdc9
MS
4522 is_terminal_input(ec->std_input) ||
4523 is_terminal_output(ec->std_output) ||
6c0ae739
LP
4524 is_terminal_output(ec->std_error);
4525}
4526
4527bool exec_context_may_touch_console(const ExecContext *ec) {
4528
4529 return exec_context_may_touch_tty(ec) &&
1e22b5cd 4530 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
4531}
4532
15ae422b
LP
4533static void strv_fprintf(FILE *f, char **l) {
4534 char **g;
4535
4536 assert(f);
4537
4538 STRV_FOREACH(g, l)
4539 fprintf(f, " %s", *g);
4540}
4541
34cf6c43 4542void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
12213aed 4543 char **e, **d, buf_clean[FORMAT_TIMESPAN_MAX];
d3070fbd 4544 ExecDirectoryType dt;
94f04347 4545 unsigned i;
add00535 4546 int r;
9eba9da4 4547
5cb5a6ff
LP
4548 assert(c);
4549 assert(f);
4550
4ad49000 4551 prefix = strempty(prefix);
5cb5a6ff
LP
4552
4553 fprintf(f,
94f04347
LP
4554 "%sUMask: %04o\n"
4555 "%sWorkingDirectory: %s\n"
451a074f 4556 "%sRootDirectory: %s\n"
15ae422b 4557 "%sNonBlocking: %s\n"
64747e2d 4558 "%sPrivateTmp: %s\n"
7f112f50 4559 "%sPrivateDevices: %s\n"
59eeb84b 4560 "%sProtectKernelTunables: %s\n"
e66a2f65 4561 "%sProtectKernelModules: %s\n"
84703040 4562 "%sProtectKernelLogs: %s\n"
fc64760d 4563 "%sProtectClock: %s\n"
59eeb84b 4564 "%sProtectControlGroups: %s\n"
d251207d
LP
4565 "%sPrivateNetwork: %s\n"
4566 "%sPrivateUsers: %s\n"
1b8689f9
LP
4567 "%sProtectHome: %s\n"
4568 "%sProtectSystem: %s\n"
5d997827 4569 "%sMountAPIVFS: %s\n"
f3e43635 4570 "%sIgnoreSIGPIPE: %s\n"
f4170c67 4571 "%sMemoryDenyWriteExecute: %s\n"
b1edf445 4572 "%sRestrictRealtime: %s\n"
f69567cb 4573 "%sRestrictSUIDSGID: %s\n"
aecd5ac6
TM
4574 "%sKeyringMode: %s\n"
4575 "%sProtectHostname: %s\n",
5cb5a6ff 4576 prefix, c->umask,
9eba9da4 4577 prefix, c->working_directory ? c->working_directory : "/",
451a074f 4578 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 4579 prefix, yes_no(c->non_blocking),
64747e2d 4580 prefix, yes_no(c->private_tmp),
7f112f50 4581 prefix, yes_no(c->private_devices),
59eeb84b 4582 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 4583 prefix, yes_no(c->protect_kernel_modules),
84703040 4584 prefix, yes_no(c->protect_kernel_logs),
fc64760d 4585 prefix, yes_no(c->protect_clock),
59eeb84b 4586 prefix, yes_no(c->protect_control_groups),
d251207d
LP
4587 prefix, yes_no(c->private_network),
4588 prefix, yes_no(c->private_users),
1b8689f9
LP
4589 prefix, protect_home_to_string(c->protect_home),
4590 prefix, protect_system_to_string(c->protect_system),
5d997827 4591 prefix, yes_no(c->mount_apivfs),
f3e43635 4592 prefix, yes_no(c->ignore_sigpipe),
f4170c67 4593 prefix, yes_no(c->memory_deny_write_execute),
b1edf445 4594 prefix, yes_no(c->restrict_realtime),
f69567cb 4595 prefix, yes_no(c->restrict_suid_sgid),
aecd5ac6
TM
4596 prefix, exec_keyring_mode_to_string(c->keyring_mode),
4597 prefix, yes_no(c->protect_hostname));
fb33a393 4598
915e6d16
LP
4599 if (c->root_image)
4600 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
4601
8c7be95e
LP
4602 STRV_FOREACH(e, c->environment)
4603 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
4604
4605 STRV_FOREACH(e, c->environment_files)
4606 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 4607
b4c14404
FB
4608 STRV_FOREACH(e, c->pass_environment)
4609 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
4610
00819cc1
LP
4611 STRV_FOREACH(e, c->unset_environment)
4612 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
4613
53f47dfc
YW
4614 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
4615
72fd1768 4616 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
4617 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
4618
4619 STRV_FOREACH(d, c->directories[dt].paths)
4620 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
4621 }
c2bbd90b 4622
12213aed
YW
4623 fprintf(f,
4624 "%sTimeoutCleanSec: %s\n",
4625 prefix, format_timespan(buf_clean, sizeof(buf_clean), c->timeout_clean_usec, USEC_PER_SEC));
4626
fb33a393
LP
4627 if (c->nice_set)
4628 fprintf(f,
4629 "%sNice: %i\n",
4630 prefix, c->nice);
4631
dd6c17b1 4632 if (c->oom_score_adjust_set)
fb33a393 4633 fprintf(f,
dd6c17b1
LP
4634 "%sOOMScoreAdjust: %i\n",
4635 prefix, c->oom_score_adjust);
9eba9da4 4636
ad21e542
ZJS
4637 if (c->coredump_filter_set)
4638 fprintf(f,
4639 "%sCoredumpFilter: 0x%"PRIx64"\n",
4640 prefix, c->coredump_filter);
4641
94f04347 4642 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d 4643 if (c->rlimit[i]) {
4c3a2b84 4644 fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
3c11da9d 4645 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
4c3a2b84 4646 fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n",
3c11da9d
EV
4647 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
4648 }
94f04347 4649
f8b69d1d 4650 if (c->ioprio_set) {
1756a011 4651 _cleanup_free_ char *class_str = NULL;
f8b69d1d 4652
837df140
YW
4653 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
4654 if (r >= 0)
4655 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
4656
4657 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 4658 }
94f04347 4659
f8b69d1d 4660 if (c->cpu_sched_set) {
1756a011 4661 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 4662
837df140
YW
4663 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4664 if (r >= 0)
4665 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4666
94f04347 4667 fprintf(f,
38b48754
LP
4668 "%sCPUSchedulingPriority: %i\n"
4669 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
4670 prefix, c->cpu_sched_priority,
4671 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 4672 }
94f04347 4673
0985c7c4 4674 if (c->cpu_set.set) {
e7fca352
MS
4675 _cleanup_free_ char *affinity = NULL;
4676
4677 affinity = cpu_set_to_range_string(&c->cpu_set);
4678 fprintf(f, "%sCPUAffinity: %s\n", prefix, affinity);
94f04347
LP
4679 }
4680
b070c7c0
MS
4681 if (mpol_is_valid(numa_policy_get_type(&c->numa_policy))) {
4682 _cleanup_free_ char *nodes = NULL;
4683
4684 nodes = cpu_set_to_range_string(&c->numa_policy.nodes);
4685 fprintf(f, "%sNUMAPolicy: %s\n", prefix, mpol_to_string(numa_policy_get_type(&c->numa_policy)));
4686 fprintf(f, "%sNUMAMask: %s\n", prefix, strnull(nodes));
4687 }
4688
3a43da28 4689 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 4690 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
4691
4692 fprintf(f,
80876c20
LP
4693 "%sStandardInput: %s\n"
4694 "%sStandardOutput: %s\n"
4695 "%sStandardError: %s\n",
4696 prefix, exec_input_to_string(c->std_input),
4697 prefix, exec_output_to_string(c->std_output),
4698 prefix, exec_output_to_string(c->std_error));
4699
befc4a80
LP
4700 if (c->std_input == EXEC_INPUT_NAMED_FD)
4701 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4702 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4703 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4704 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4705 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4706
4707 if (c->std_input == EXEC_INPUT_FILE)
4708 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4709 if (c->std_output == EXEC_OUTPUT_FILE)
4710 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
566b7d23
ZD
4711 if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
4712 fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
befc4a80
LP
4713 if (c->std_error == EXEC_OUTPUT_FILE)
4714 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
566b7d23
ZD
4715 if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
4716 fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
befc4a80 4717
80876c20
LP
4718 if (c->tty_path)
4719 fprintf(f,
6ea832a2
LP
4720 "%sTTYPath: %s\n"
4721 "%sTTYReset: %s\n"
4722 "%sTTYVHangup: %s\n"
4723 "%sTTYVTDisallocate: %s\n",
4724 prefix, c->tty_path,
4725 prefix, yes_no(c->tty_reset),
4726 prefix, yes_no(c->tty_vhangup),
4727 prefix, yes_no(c->tty_vt_disallocate));
94f04347 4728
9f6444eb 4729 if (IN_SET(c->std_output,
9f6444eb
LP
4730 EXEC_OUTPUT_KMSG,
4731 EXEC_OUTPUT_JOURNAL,
9f6444eb
LP
4732 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4733 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4734 IN_SET(c->std_error,
9f6444eb
LP
4735 EXEC_OUTPUT_KMSG,
4736 EXEC_OUTPUT_JOURNAL,
9f6444eb
LP
4737 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4738 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 4739
5ce70e5b 4740 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 4741
837df140
YW
4742 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4743 if (r >= 0)
4744 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 4745
837df140
YW
4746 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4747 if (r >= 0)
4748 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 4749 }
94f04347 4750
d3070fbd
LP
4751 if (c->log_level_max >= 0) {
4752 _cleanup_free_ char *t = NULL;
4753
4754 (void) log_level_to_string_alloc(c->log_level_max, &t);
4755
4756 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4757 }
4758
5ac1530e 4759 if (c->log_ratelimit_interval_usec > 0) {
90fc172e
AZ
4760 char buf_timespan[FORMAT_TIMESPAN_MAX];
4761
4762 fprintf(f,
4763 "%sLogRateLimitIntervalSec: %s\n",
5ac1530e 4764 prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_ratelimit_interval_usec, USEC_PER_SEC));
90fc172e
AZ
4765 }
4766
5ac1530e
ZJS
4767 if (c->log_ratelimit_burst > 0)
4768 fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_ratelimit_burst);
90fc172e 4769
d3070fbd
LP
4770 if (c->n_log_extra_fields > 0) {
4771 size_t j;
4772
4773 for (j = 0; j < c->n_log_extra_fields; j++) {
4774 fprintf(f, "%sLogExtraFields: ", prefix);
4775 fwrite(c->log_extra_fields[j].iov_base,
4776 1, c->log_extra_fields[j].iov_len,
4777 f);
4778 fputc('\n', f);
4779 }
4780 }
4781
91dd5f7c
LP
4782 if (c->log_namespace)
4783 fprintf(f, "%sLogNamespace: %s\n", prefix, c->log_namespace);
4784
07d46372
YW
4785 if (c->secure_bits) {
4786 _cleanup_free_ char *str = NULL;
4787
4788 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4789 if (r >= 0)
4790 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4791 }
94f04347 4792
a103496c 4793 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4794 _cleanup_free_ char *str = NULL;
94f04347 4795
dd1f5bd0
YW
4796 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4797 if (r >= 0)
4798 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4799 }
4800
4801 if (c->capability_ambient_set != 0) {
dd1f5bd0 4802 _cleanup_free_ char *str = NULL;
755d4b67 4803
dd1f5bd0
YW
4804 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4805 if (r >= 0)
4806 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4807 }
4808
4809 if (c->user)
f2d3769a 4810 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4811 if (c->group)
f2d3769a 4812 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4813
29206d46
LP
4814 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4815
ac6e8be6 4816 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4817 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4818 strv_fprintf(f, c->supplementary_groups);
4819 fputs("\n", f);
4820 }
94f04347 4821
5b6319dc 4822 if (c->pam_name)
f2d3769a 4823 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4824
58629001 4825 if (!strv_isempty(c->read_write_paths)) {
2a624c36
AP
4826 fprintf(f, "%sReadWritePaths:", prefix);
4827 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4828 fputs("\n", f);
4829 }
4830
58629001 4831 if (!strv_isempty(c->read_only_paths)) {
2a624c36
AP
4832 fprintf(f, "%sReadOnlyPaths:", prefix);
4833 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4834 fputs("\n", f);
4835 }
94f04347 4836
58629001 4837 if (!strv_isempty(c->inaccessible_paths)) {
2a624c36
AP
4838 fprintf(f, "%sInaccessiblePaths:", prefix);
4839 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4840 fputs("\n", f);
4841 }
2e22afe9 4842
d2d6c096 4843 if (c->n_bind_mounts > 0)
4ca763a9
YW
4844 for (i = 0; i < c->n_bind_mounts; i++)
4845 fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
d2d6c096 4846 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4ca763a9 4847 c->bind_mounts[i].ignore_enoent ? "-": "",
d2d6c096
LP
4848 c->bind_mounts[i].source,
4849 c->bind_mounts[i].destination,
4850 c->bind_mounts[i].recursive ? "rbind" : "norbind");
d2d6c096 4851
2abd4e38
YW
4852 if (c->n_temporary_filesystems > 0)
4853 for (i = 0; i < c->n_temporary_filesystems; i++) {
4854 TemporaryFileSystem *t = c->temporary_filesystems + i;
4855
4856 fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
4857 t->path,
4858 isempty(t->options) ? "" : ":",
4859 strempty(t->options));
4860 }
4861
169c1bda
LP
4862 if (c->utmp_id)
4863 fprintf(f,
4864 "%sUtmpIdentifier: %s\n",
4865 prefix, c->utmp_id);
7b52a628
MS
4866
4867 if (c->selinux_context)
4868 fprintf(f,
5f8640fb
LP
4869 "%sSELinuxContext: %s%s\n",
4870 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4871
80c21aea
WC
4872 if (c->apparmor_profile)
4873 fprintf(f,
4874 "%sAppArmorProfile: %s%s\n",
4875 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4876
4877 if (c->smack_process_label)
4878 fprintf(f,
4879 "%sSmackProcessLabel: %s%s\n",
4880 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4881
050f7277 4882 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4883 fprintf(f,
4884 "%sPersonality: %s\n",
4885 prefix, strna(personality_to_string(c->personality)));
4886
78e864e5
TM
4887 fprintf(f,
4888 "%sLockPersonality: %s\n",
4889 prefix, yes_no(c->lock_personality));
4890
17df7223 4891 if (c->syscall_filter) {
349cc4a5 4892#if HAVE_SECCOMP
17df7223 4893 Iterator j;
8cfa775f 4894 void *id, *val;
17df7223 4895 bool first = true;
351a19b1 4896#endif
17df7223
LP
4897
4898 fprintf(f,
57183d11 4899 "%sSystemCallFilter: ",
17df7223
LP
4900 prefix);
4901
4902 if (!c->syscall_whitelist)
4903 fputc('~', f);
4904
349cc4a5 4905#if HAVE_SECCOMP
8cfa775f 4906 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
17df7223 4907 _cleanup_free_ char *name = NULL;
8cfa775f
YW
4908 const char *errno_name = NULL;
4909 int num = PTR_TO_INT(val);
17df7223
LP
4910
4911 if (first)
4912 first = false;
4913 else
4914 fputc(' ', f);
4915
57183d11 4916 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223 4917 fputs(strna(name), f);
8cfa775f
YW
4918
4919 if (num >= 0) {
4920 errno_name = errno_to_name(num);
4921 if (errno_name)
4922 fprintf(f, ":%s", errno_name);
4923 else
4924 fprintf(f, ":%d", num);
4925 }
17df7223 4926 }
351a19b1 4927#endif
17df7223
LP
4928
4929 fputc('\n', f);
4930 }
4931
57183d11 4932 if (c->syscall_archs) {
349cc4a5 4933#if HAVE_SECCOMP
57183d11
LP
4934 Iterator j;
4935 void *id;
4936#endif
4937
4938 fprintf(f,
4939 "%sSystemCallArchitectures:",
4940 prefix);
4941
349cc4a5 4942#if HAVE_SECCOMP
57183d11
LP
4943 SET_FOREACH(id, c->syscall_archs, j)
4944 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4945#endif
4946 fputc('\n', f);
4947 }
4948
add00535
LP
4949 if (exec_context_restrict_namespaces_set(c)) {
4950 _cleanup_free_ char *s = NULL;
4951
86c2a9f1 4952 r = namespace_flags_to_string(c->restrict_namespaces, &s);
add00535
LP
4953 if (r >= 0)
4954 fprintf(f, "%sRestrictNamespaces: %s\n",
dd0395b5 4955 prefix, strna(s));
add00535
LP
4956 }
4957
a8d08f39
LP
4958 if (c->network_namespace_path)
4959 fprintf(f,
4960 "%sNetworkNamespacePath: %s\n",
4961 prefix, c->network_namespace_path);
4962
3df90f24
YW
4963 if (c->syscall_errno > 0) {
4964 const char *errno_name;
4965
4966 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4967
4968 errno_name = errno_to_name(c->syscall_errno);
4969 if (errno_name)
4970 fprintf(f, "%s\n", errno_name);
4971 else
4972 fprintf(f, "%d\n", c->syscall_errno);
4973 }
5cb5a6ff
LP
4974}
4975
34cf6c43 4976bool exec_context_maintains_privileges(const ExecContext *c) {
a931ad47
LP
4977 assert(c);
4978
61233823 4979 /* Returns true if the process forked off would run under
a931ad47
LP
4980 * an unchanged UID or as root. */
4981
4982 if (!c->user)
4983 return true;
4984
4985 if (streq(c->user, "root") || streq(c->user, "0"))
4986 return true;
4987
4988 return false;
4989}
4990
34cf6c43 4991int exec_context_get_effective_ioprio(const ExecContext *c) {
7f452159
LP
4992 int p;
4993
4994 assert(c);
4995
4996 if (c->ioprio_set)
4997 return c->ioprio;
4998
4999 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
5000 if (p < 0)
5001 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
5002
5003 return p;
5004}
5005
d3070fbd
LP
5006void exec_context_free_log_extra_fields(ExecContext *c) {
5007 size_t l;
5008
5009 assert(c);
5010
5011 for (l = 0; l < c->n_log_extra_fields; l++)
5012 free(c->log_extra_fields[l].iov_base);
5013 c->log_extra_fields = mfree(c->log_extra_fields);
5014 c->n_log_extra_fields = 0;
5015}
5016
6f765baf
LP
5017void exec_context_revert_tty(ExecContext *c) {
5018 int r;
5019
5020 assert(c);
5021
5022 /* First, reset the TTY (possibly kicking everybody else from the TTY) */
5023 exec_context_tty_reset(c, NULL);
5024
5025 /* And then undo what chown_terminal() did earlier. Note that we only do this if we have a path
5026 * configured. If the TTY was passed to us as file descriptor we assume the TTY is opened and managed
5027 * by whoever passed it to us and thus knows better when and how to chmod()/chown() it back. */
5028
5029 if (exec_context_may_touch_tty(c)) {
5030 const char *path;
5031
5032 path = exec_context_tty_path(c);
5033 if (path) {
5034 r = chmod_and_chown(path, TTY_MODE, 0, TTY_GID);
5035 if (r < 0 && r != -ENOENT)
5036 log_warning_errno(r, "Failed to reset TTY ownership/access mode of %s, ignoring: %m", path);
5037 }
5038 }
5039}
5040
4c2f5842
LP
5041int exec_context_get_clean_directories(
5042 ExecContext *c,
5043 char **prefix,
5044 ExecCleanMask mask,
5045 char ***ret) {
5046
5047 _cleanup_strv_free_ char **l = NULL;
5048 ExecDirectoryType t;
5049 int r;
5050
5051 assert(c);
5052 assert(prefix);
5053 assert(ret);
5054
5055 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
5056 char **i;
5057
5058 if (!FLAGS_SET(mask, 1U << t))
5059 continue;
5060
5061 if (!prefix[t])
5062 continue;
5063
5064 STRV_FOREACH(i, c->directories[t].paths) {
5065 char *j;
5066
5067 j = path_join(prefix[t], *i);
5068 if (!j)
5069 return -ENOMEM;
5070
5071 r = strv_consume(&l, j);
5072 if (r < 0)
5073 return r;
7f622a19
YW
5074
5075 /* Also remove private directories unconditionally. */
5076 if (t != EXEC_DIRECTORY_CONFIGURATION) {
5077 j = path_join(prefix[t], "private", *i);
5078 if (!j)
5079 return -ENOMEM;
5080
5081 r = strv_consume(&l, j);
5082 if (r < 0)
5083 return r;
5084 }
4c2f5842
LP
5085 }
5086 }
5087
5088 *ret = TAKE_PTR(l);
5089 return 0;
5090}
5091
5092int exec_context_get_clean_mask(ExecContext *c, ExecCleanMask *ret) {
5093 ExecCleanMask mask = 0;
5094
5095 assert(c);
5096 assert(ret);
5097
5098 for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++)
5099 if (!strv_isempty(c->directories[t].paths))
5100 mask |= 1U << t;
5101
5102 *ret = mask;
5103 return 0;
5104}
5105
b58b4116 5106void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 5107 assert(s);
5cb5a6ff 5108
2ed26ed0
LP
5109 *s = (ExecStatus) {
5110 .pid = pid,
5111 };
5112
b58b4116
LP
5113 dual_timestamp_get(&s->start_timestamp);
5114}
5115
34cf6c43 5116void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
5117 assert(s);
5118
2ed26ed0
LP
5119 if (s->pid != pid) {
5120 *s = (ExecStatus) {
5121 .pid = pid,
5122 };
5123 }
b58b4116 5124
63983207 5125 dual_timestamp_get(&s->exit_timestamp);
9fb86720 5126
034c6ed7
LP
5127 s->code = code;
5128 s->status = status;
169c1bda 5129
6f765baf
LP
5130 if (context && context->utmp_id)
5131 (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
9fb86720
LP
5132}
5133
6a1d4d9f
LP
5134void exec_status_reset(ExecStatus *s) {
5135 assert(s);
5136
5137 *s = (ExecStatus) {};
5138}
5139
34cf6c43 5140void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
9fb86720
LP
5141 char buf[FORMAT_TIMESTAMP_MAX];
5142
5143 assert(s);
5144 assert(f);
5145
9fb86720
LP
5146 if (s->pid <= 0)
5147 return;
5148
4c940960
LP
5149 prefix = strempty(prefix);
5150
9fb86720 5151 fprintf(f,
ccd06097
ZJS
5152 "%sPID: "PID_FMT"\n",
5153 prefix, s->pid);
9fb86720 5154
af9d16e1 5155 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
5156 fprintf(f,
5157 "%sStart Timestamp: %s\n",
63983207 5158 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 5159
af9d16e1 5160 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
5161 fprintf(f,
5162 "%sExit Timestamp: %s\n"
5163 "%sExit Code: %s\n"
5164 "%sExit Status: %i\n",
63983207 5165 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
5166 prefix, sigchld_code_to_string(s->code),
5167 prefix, s->status);
5cb5a6ff 5168}
44d8db9e 5169
34cf6c43 5170static char *exec_command_line(char **argv) {
44d8db9e
LP
5171 size_t k;
5172 char *n, *p, **a;
5173 bool first = true;
5174
9e2f7c11 5175 assert(argv);
44d8db9e 5176
9164977d 5177 k = 1;
9e2f7c11 5178 STRV_FOREACH(a, argv)
44d8db9e
LP
5179 k += strlen(*a)+3;
5180
5cd9cd35
LP
5181 n = new(char, k);
5182 if (!n)
44d8db9e
LP
5183 return NULL;
5184
5185 p = n;
9e2f7c11 5186 STRV_FOREACH(a, argv) {
44d8db9e
LP
5187
5188 if (!first)
5189 *(p++) = ' ';
5190 else
5191 first = false;
5192
5193 if (strpbrk(*a, WHITESPACE)) {
5194 *(p++) = '\'';
5195 p = stpcpy(p, *a);
5196 *(p++) = '\'';
5197 } else
5198 p = stpcpy(p, *a);
5199
5200 }
5201
9164977d
LP
5202 *p = 0;
5203
44d8db9e
LP
5204 /* FIXME: this doesn't really handle arguments that have
5205 * spaces and ticks in them */
5206
5207 return n;
5208}
5209
34cf6c43 5210static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 5211 _cleanup_free_ char *cmd = NULL;
4c940960 5212 const char *prefix2;
44d8db9e
LP
5213
5214 assert(c);
5215 assert(f);
5216
4c940960 5217 prefix = strempty(prefix);
63c372cb 5218 prefix2 = strjoina(prefix, "\t");
44d8db9e 5219
9e2f7c11 5220 cmd = exec_command_line(c->argv);
44d8db9e
LP
5221 fprintf(f,
5222 "%sCommand Line: %s\n",
4bbccb02 5223 prefix, cmd ? cmd : strerror_safe(ENOMEM));
44d8db9e 5224
9fb86720 5225 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
5226}
5227
5228void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
5229 assert(f);
5230
4c940960 5231 prefix = strempty(prefix);
44d8db9e
LP
5232
5233 LIST_FOREACH(command, c, c)
5234 exec_command_dump(c, f, prefix);
5235}
94f04347 5236
a6a80b4f
LP
5237void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
5238 ExecCommand *end;
5239
5240 assert(l);
5241 assert(e);
5242
5243 if (*l) {
35b8ca3a 5244 /* It's kind of important, that we keep the order here */
71fda00f
LP
5245 LIST_FIND_TAIL(command, *l, end);
5246 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
5247 } else
5248 *l = e;
5249}
5250
26fd040d
LP
5251int exec_command_set(ExecCommand *c, const char *path, ...) {
5252 va_list ap;
5253 char **l, *p;
5254
5255 assert(c);
5256 assert(path);
5257
5258 va_start(ap, path);
5259 l = strv_new_ap(path, ap);
5260 va_end(ap);
5261
5262 if (!l)
5263 return -ENOMEM;
5264
250a918d
LP
5265 p = strdup(path);
5266 if (!p) {
26fd040d
LP
5267 strv_free(l);
5268 return -ENOMEM;
5269 }
5270
6897dfe8 5271 free_and_replace(c->path, p);
26fd040d 5272
130d3d22 5273 return strv_free_and_replace(c->argv, l);
26fd040d
LP
5274}
5275
86b23b07 5276int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 5277 _cleanup_strv_free_ char **l = NULL;
86b23b07 5278 va_list ap;
86b23b07
JS
5279 int r;
5280
5281 assert(c);
5282 assert(path);
5283
5284 va_start(ap, path);
5285 l = strv_new_ap(path, ap);
5286 va_end(ap);
5287
5288 if (!l)
5289 return -ENOMEM;
5290
e287086b 5291 r = strv_extend_strv(&c->argv, l, false);
e63ff941 5292 if (r < 0)
86b23b07 5293 return r;
86b23b07
JS
5294
5295 return 0;
5296}
5297
e8a565cb
YW
5298static void *remove_tmpdir_thread(void *p) {
5299 _cleanup_free_ char *path = p;
86b23b07 5300
e8a565cb
YW
5301 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
5302 return NULL;
5303}
5304
5305static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
5306 int r;
5307
5308 if (!rt)
5309 return NULL;
5310
5311 if (rt->manager)
5312 (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
5313
5314 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
5315 if (destroy && rt->tmp_dir) {
5316 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
5317
5318 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
5319 if (r < 0) {
5320 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
5321 free(rt->tmp_dir);
5322 }
5323
5324 rt->tmp_dir = NULL;
5325 }
613b411c 5326
e8a565cb
YW
5327 if (destroy && rt->var_tmp_dir) {
5328 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
5329
5330 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
5331 if (r < 0) {
5332 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
5333 free(rt->var_tmp_dir);
5334 }
5335
5336 rt->var_tmp_dir = NULL;
5337 }
5338
5339 rt->id = mfree(rt->id);
5340 rt->tmp_dir = mfree(rt->tmp_dir);
5341 rt->var_tmp_dir = mfree(rt->var_tmp_dir);
5342 safe_close_pair(rt->netns_storage_socket);
5343 return mfree(rt);
5344}
5345
5346static void exec_runtime_freep(ExecRuntime **rt) {
da6bc6ed 5347 (void) exec_runtime_free(*rt, false);
e8a565cb
YW
5348}
5349
8e8009dc
LP
5350static int exec_runtime_allocate(ExecRuntime **ret) {
5351 ExecRuntime *n;
613b411c 5352
8e8009dc 5353 assert(ret);
613b411c 5354
8e8009dc
LP
5355 n = new(ExecRuntime, 1);
5356 if (!n)
613b411c
LP
5357 return -ENOMEM;
5358
8e8009dc
LP
5359 *n = (ExecRuntime) {
5360 .netns_storage_socket = { -1, -1 },
5361 };
5362
5363 *ret = n;
613b411c
LP
5364 return 0;
5365}
5366
e8a565cb
YW
5367static int exec_runtime_add(
5368 Manager *m,
5369 const char *id,
5370 const char *tmp_dir,
5371 const char *var_tmp_dir,
5372 const int netns_storage_socket[2],
5373 ExecRuntime **ret) {
5374
5375 _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
613b411c
LP
5376 int r;
5377
e8a565cb 5378 assert(m);
613b411c
LP
5379 assert(id);
5380
e8a565cb
YW
5381 r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
5382 if (r < 0)
5383 return r;
613b411c 5384
e8a565cb 5385 r = exec_runtime_allocate(&rt);
613b411c
LP
5386 if (r < 0)
5387 return r;
5388
e8a565cb
YW
5389 rt->id = strdup(id);
5390 if (!rt->id)
5391 return -ENOMEM;
5392
5393 if (tmp_dir) {
5394 rt->tmp_dir = strdup(tmp_dir);
5395 if (!rt->tmp_dir)
5396 return -ENOMEM;
5397
5398 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
5399 assert(var_tmp_dir);
5400 rt->var_tmp_dir = strdup(var_tmp_dir);
5401 if (!rt->var_tmp_dir)
5402 return -ENOMEM;
5403 }
5404
5405 if (netns_storage_socket) {
5406 rt->netns_storage_socket[0] = netns_storage_socket[0];
5407 rt->netns_storage_socket[1] = netns_storage_socket[1];
613b411c
LP
5408 }
5409
e8a565cb
YW
5410 r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
5411 if (r < 0)
5412 return r;
5413
5414 rt->manager = m;
5415
5416 if (ret)
5417 *ret = rt;
5418
5419 /* do not remove created ExecRuntime object when the operation succeeds. */
5420 rt = NULL;
5421 return 0;
5422}
5423
5424static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
5425 _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
2fa3742d 5426 _cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
e8a565cb
YW
5427 int r;
5428
5429 assert(m);
5430 assert(c);
5431 assert(id);
5432
5433 /* It is not necessary to create ExecRuntime object. */
a8d08f39 5434 if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
e8a565cb
YW
5435 return 0;
5436
efa2f3a1
TM
5437 if (c->private_tmp &&
5438 !(prefixed_path_strv_contains(c->inaccessible_paths, "/tmp") &&
5439 (prefixed_path_strv_contains(c->inaccessible_paths, "/var/tmp") ||
5440 prefixed_path_strv_contains(c->inaccessible_paths, "/var")))) {
e8a565cb 5441 r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
613b411c
LP
5442 if (r < 0)
5443 return r;
5444 }
5445
a8d08f39 5446 if (c->private_network || c->network_namespace_path) {
e8a565cb
YW
5447 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
5448 return -errno;
5449 }
5450
5451 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
5452 if (r < 0)
5453 return r;
5454
5455 /* Avoid cleanup */
2fa3742d 5456 netns_storage_socket[0] = netns_storage_socket[1] = -1;
613b411c
LP
5457 return 1;
5458}
5459
e8a565cb
YW
5460int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
5461 ExecRuntime *rt;
5462 int r;
613b411c 5463
e8a565cb
YW
5464 assert(m);
5465 assert(id);
5466 assert(ret);
5467
5468 rt = hashmap_get(m->exec_runtime_by_id, id);
5469 if (rt)
5470 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
5471 goto ref;
5472
5473 if (!create)
5474 return 0;
5475
5476 /* If not found, then create a new object. */
5477 r = exec_runtime_make(m, c, id, &rt);
5478 if (r <= 0)
5479 /* When r == 0, it is not necessary to create ExecRuntime object. */
5480 return r;
613b411c 5481
e8a565cb
YW
5482ref:
5483 /* increment reference counter. */
5484 rt->n_ref++;
5485 *ret = rt;
5486 return 1;
5487}
613b411c 5488
e8a565cb
YW
5489ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
5490 if (!rt)
613b411c
LP
5491 return NULL;
5492
e8a565cb 5493 assert(rt->n_ref > 0);
613b411c 5494
e8a565cb
YW
5495 rt->n_ref--;
5496 if (rt->n_ref > 0)
f2341e0a
LP
5497 return NULL;
5498
e8a565cb 5499 return exec_runtime_free(rt, destroy);
613b411c
LP
5500}
5501
e8a565cb
YW
5502int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
5503 ExecRuntime *rt;
5504 Iterator i;
5505
5506 assert(m);
613b411c
LP
5507 assert(f);
5508 assert(fds);
5509
e8a565cb
YW
5510 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5511 fprintf(f, "exec-runtime=%s", rt->id);
613b411c 5512
e8a565cb
YW
5513 if (rt->tmp_dir)
5514 fprintf(f, " tmp-dir=%s", rt->tmp_dir);
613b411c 5515
e8a565cb
YW
5516 if (rt->var_tmp_dir)
5517 fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
613b411c 5518
e8a565cb
YW
5519 if (rt->netns_storage_socket[0] >= 0) {
5520 int copy;
613b411c 5521
e8a565cb
YW
5522 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
5523 if (copy < 0)
5524 return copy;
613b411c 5525
e8a565cb
YW
5526 fprintf(f, " netns-socket-0=%i", copy);
5527 }
613b411c 5528
e8a565cb
YW
5529 if (rt->netns_storage_socket[1] >= 0) {
5530 int copy;
613b411c 5531
e8a565cb
YW
5532 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
5533 if (copy < 0)
5534 return copy;
613b411c 5535
e8a565cb
YW
5536 fprintf(f, " netns-socket-1=%i", copy);
5537 }
5538
5539 fputc('\n', f);
613b411c
LP
5540 }
5541
5542 return 0;
5543}
5544
e8a565cb
YW
5545int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
5546 _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
5547 ExecRuntime *rt;
613b411c
LP
5548 int r;
5549
e8a565cb
YW
5550 /* This is for the migration from old (v237 or earlier) deserialization text.
5551 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
5552 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
5553 * so or not from the serialized text, then we always creates a new object owned by this. */
5554
5555 assert(u);
613b411c
LP
5556 assert(key);
5557 assert(value);
5558
e8a565cb
YW
5559 /* Manager manages ExecRuntime objects by the unit id.
5560 * So, we omit the serialized text when the unit does not have id (yet?)... */
5561 if (isempty(u->id)) {
5562 log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
5563 return 0;
5564 }
613b411c 5565
e8a565cb
YW
5566 r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
5567 if (r < 0) {
5568 log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
5569 return 0;
5570 }
5571
5572 rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
5573 if (!rt) {
5574 r = exec_runtime_allocate(&rt_create);
613b411c 5575 if (r < 0)
f2341e0a 5576 return log_oom();
613b411c 5577
e8a565cb
YW
5578 rt_create->id = strdup(u->id);
5579 if (!rt_create->id)
5580 return log_oom();
5581
5582 rt = rt_create;
5583 }
5584
5585 if (streq(key, "tmp-dir")) {
5586 char *copy;
5587
613b411c
LP
5588 copy = strdup(value);
5589 if (!copy)
5590 return log_oom();
5591
e8a565cb 5592 free_and_replace(rt->tmp_dir, copy);
613b411c
LP
5593
5594 } else if (streq(key, "var-tmp-dir")) {
5595 char *copy;
5596
613b411c
LP
5597 copy = strdup(value);
5598 if (!copy)
5599 return log_oom();
5600
e8a565cb 5601 free_and_replace(rt->var_tmp_dir, copy);
613b411c
LP
5602
5603 } else if (streq(key, "netns-socket-0")) {
5604 int fd;
5605
e8a565cb 5606 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5607 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5608 return 0;
613b411c 5609 }
e8a565cb
YW
5610
5611 safe_close(rt->netns_storage_socket[0]);
5612 rt->netns_storage_socket[0] = fdset_remove(fds, fd);
5613
613b411c
LP
5614 } else if (streq(key, "netns-socket-1")) {
5615 int fd;
5616
e8a565cb 5617 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5618 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5619 return 0;
613b411c 5620 }
e8a565cb
YW
5621
5622 safe_close(rt->netns_storage_socket[1]);
5623 rt->netns_storage_socket[1] = fdset_remove(fds, fd);
613b411c
LP
5624 } else
5625 return 0;
5626
e8a565cb
YW
5627 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
5628 if (rt_create) {
5629 r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
5630 if (r < 0) {
3fe91079 5631 log_unit_debug_errno(u, r, "Failed to put runtime parameter to manager's storage: %m");
e8a565cb
YW
5632 return 0;
5633 }
613b411c 5634
e8a565cb 5635 rt_create->manager = u->manager;
613b411c 5636
e8a565cb
YW
5637 /* Avoid cleanup */
5638 rt_create = NULL;
5639 }
98b47d54 5640
e8a565cb
YW
5641 return 1;
5642}
613b411c 5643
e8a565cb
YW
5644void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
5645 char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
5646 int r, fd0 = -1, fd1 = -1;
5647 const char *p, *v = value;
5648 size_t n;
613b411c 5649
e8a565cb
YW
5650 assert(m);
5651 assert(value);
5652 assert(fds);
98b47d54 5653
e8a565cb
YW
5654 n = strcspn(v, " ");
5655 id = strndupa(v, n);
5656 if (v[n] != ' ')
5657 goto finalize;
5658 p = v + n + 1;
5659
5660 v = startswith(p, "tmp-dir=");
5661 if (v) {
5662 n = strcspn(v, " ");
5663 tmp_dir = strndupa(v, n);
5664 if (v[n] != ' ')
5665 goto finalize;
5666 p = v + n + 1;
5667 }
5668
5669 v = startswith(p, "var-tmp-dir=");
5670 if (v) {
5671 n = strcspn(v, " ");
5672 var_tmp_dir = strndupa(v, n);
5673 if (v[n] != ' ')
5674 goto finalize;
5675 p = v + n + 1;
5676 }
5677
5678 v = startswith(p, "netns-socket-0=");
5679 if (v) {
5680 char *buf;
5681
5682 n = strcspn(v, " ");
5683 buf = strndupa(v, n);
5684 if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
5685 log_debug("Unable to process exec-runtime netns fd specification.");
5686 return;
98b47d54 5687 }
e8a565cb
YW
5688 fd0 = fdset_remove(fds, fd0);
5689 if (v[n] != ' ')
5690 goto finalize;
5691 p = v + n + 1;
613b411c
LP
5692 }
5693
e8a565cb
YW
5694 v = startswith(p, "netns-socket-1=");
5695 if (v) {
5696 char *buf;
98b47d54 5697
e8a565cb
YW
5698 n = strcspn(v, " ");
5699 buf = strndupa(v, n);
5700 if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
5701 log_debug("Unable to process exec-runtime netns fd specification.");
5702 return;
98b47d54 5703 }
e8a565cb
YW
5704 fd1 = fdset_remove(fds, fd1);
5705 }
98b47d54 5706
e8a565cb
YW
5707finalize:
5708
5709 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
7d853ca6 5710 if (r < 0)
e8a565cb 5711 log_debug_errno(r, "Failed to add exec-runtime: %m");
e8a565cb 5712}
613b411c 5713
e8a565cb
YW
5714void exec_runtime_vacuum(Manager *m) {
5715 ExecRuntime *rt;
5716 Iterator i;
5717
5718 assert(m);
5719
5720 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
5721
5722 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5723 if (rt->n_ref > 0)
5724 continue;
5725
5726 (void) exec_runtime_free(rt, false);
5727 }
613b411c
LP
5728}
5729
b9c04eaf
YW
5730void exec_params_clear(ExecParameters *p) {
5731 if (!p)
5732 return;
5733
5734 strv_free(p->environment);
5735}
5736
80876c20
LP
5737static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
5738 [EXEC_INPUT_NULL] = "null",
5739 [EXEC_INPUT_TTY] = "tty",
5740 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 5741 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
5742 [EXEC_INPUT_SOCKET] = "socket",
5743 [EXEC_INPUT_NAMED_FD] = "fd",
08f3be7a 5744 [EXEC_INPUT_DATA] = "data",
2038c3f5 5745 [EXEC_INPUT_FILE] = "file",
80876c20
LP
5746};
5747
8a0867d6
LP
5748DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
5749
94f04347 5750static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 5751 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 5752 [EXEC_OUTPUT_NULL] = "null",
80876c20 5753 [EXEC_OUTPUT_TTY] = "tty",
9a6bca7a 5754 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 5755 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
5756 [EXEC_OUTPUT_JOURNAL] = "journal",
5757 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
5758 [EXEC_OUTPUT_SOCKET] = "socket",
5759 [EXEC_OUTPUT_NAMED_FD] = "fd",
2038c3f5 5760 [EXEC_OUTPUT_FILE] = "file",
566b7d23 5761 [EXEC_OUTPUT_FILE_APPEND] = "append",
94f04347
LP
5762};
5763
5764DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
5765
5766static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
5767 [EXEC_UTMP_INIT] = "init",
5768 [EXEC_UTMP_LOGIN] = "login",
5769 [EXEC_UTMP_USER] = "user",
5770};
5771
5772DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
5773
5774static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
5775 [EXEC_PRESERVE_NO] = "no",
5776 [EXEC_PRESERVE_YES] = "yes",
5777 [EXEC_PRESERVE_RESTART] = "restart",
5778};
5779
5780DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 5781
6b7b2ed9 5782/* This table maps ExecDirectoryType to the setting it is configured with in the unit */
72fd1768 5783static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
5784 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
5785 [EXEC_DIRECTORY_STATE] = "StateDirectory",
5786 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
5787 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
5788 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
5789};
5790
5791DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445 5792
6b7b2ed9
LP
5793/* And this table maps ExecDirectoryType too, but to a generic term identifying the type of resource. This
5794 * one is supposed to be generic enough to be used for unit types that don't use ExecContext and per-unit
5795 * directories, specifically .timer units with their timestamp touch file. */
5796static const char* const exec_resource_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5797 [EXEC_DIRECTORY_RUNTIME] = "runtime",
5798 [EXEC_DIRECTORY_STATE] = "state",
5799 [EXEC_DIRECTORY_CACHE] = "cache",
5800 [EXEC_DIRECTORY_LOGS] = "logs",
5801 [EXEC_DIRECTORY_CONFIGURATION] = "configuration",
5802};
5803
5804DEFINE_STRING_TABLE_LOOKUP(exec_resource_type, ExecDirectoryType);
5805
5806/* And this table also maps ExecDirectoryType, to the environment variable we pass the selected directory to
5807 * the service payload in. */
fb2042dd
YW
5808static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5809 [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
5810 [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
5811 [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
5812 [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
5813 [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
5814};
5815
5816DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType);
5817
b1edf445
LP
5818static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
5819 [EXEC_KEYRING_INHERIT] = "inherit",
5820 [EXEC_KEYRING_PRIVATE] = "private",
5821 [EXEC_KEYRING_SHARED] = "shared",
5822};
5823
5824DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);