]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
execute: add const to array parameters, where possible
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
a7334b09 2
034c6ed7
LP
3#include <errno.h>
4#include <fcntl.h>
8dd4c05b 5#include <poll.h>
d251207d 6#include <sys/eventfd.h>
f5947a5e 7#include <sys/ioctl.h>
f3e43635 8#include <sys/mman.h>
8dd4c05b 9#include <sys/personality.h>
94f04347 10#include <sys/prctl.h>
d2ffa389 11#include <sys/shm.h>
d2ffa389 12#include <sys/types.h>
8dd4c05b
LP
13#include <sys/un.h>
14#include <unistd.h>
023a4f67 15#include <utmpx.h>
5cb5a6ff 16
349cc4a5 17#if HAVE_PAM
5b6319dc
LP
18#include <security/pam_appl.h>
19#endif
20
349cc4a5 21#if HAVE_SELINUX
7b52a628
MS
22#include <selinux/selinux.h>
23#endif
24
349cc4a5 25#if HAVE_SECCOMP
17df7223
LP
26#include <seccomp.h>
27#endif
28
349cc4a5 29#if HAVE_APPARMOR
eef65bf3
MS
30#include <sys/apparmor.h>
31#endif
32
24882e06 33#include "sd-messages.h"
8dd4c05b
LP
34
35#include "af-list.h"
b5efdb8a 36#include "alloc-util.h"
349cc4a5 37#if HAVE_APPARMOR
3ffd4af2
LP
38#include "apparmor-util.h"
39#endif
8dd4c05b
LP
40#include "async.h"
41#include "barrier.h"
8dd4c05b 42#include "cap-list.h"
430f0182 43#include "capability-util.h"
a1164ae3 44#include "chown-recursive.h"
fdb3deca 45#include "cgroup-setup.h"
da681e1b 46#include "cpu-set-util.h"
f6a6225e 47#include "def.h"
686d13b9 48#include "env-file.h"
4d1a6904 49#include "env-util.h"
17df7223 50#include "errno-list.h"
3ffd4af2 51#include "execute.h"
8dd4c05b 52#include "exit-status.h"
3ffd4af2 53#include "fd-util.h"
f97b34a6 54#include "format-util.h"
f4f15635 55#include "fs-util.h"
7d50b32a 56#include "glob-util.h"
c004493c 57#include "io-util.h"
8dd4c05b 58#include "ioprio.h"
a1164ae3 59#include "label.h"
8dd4c05b
LP
60#include "log.h"
61#include "macro.h"
e8a565cb 62#include "manager.h"
0a970718 63#include "memory-util.h"
f5947a5e 64#include "missing_fs.h"
8dd4c05b
LP
65#include "mkdir.h"
66#include "namespace.h"
6bedfcbb 67#include "parse-util.h"
8dd4c05b 68#include "path-util.h"
0b452006 69#include "process-util.h"
78f22b97 70#include "rlimit-util.h"
8dd4c05b 71#include "rm-rf.h"
349cc4a5 72#if HAVE_SECCOMP
3ffd4af2
LP
73#include "seccomp-util.h"
74#endif
07d46372 75#include "securebits-util.h"
8dd4c05b 76#include "selinux-util.h"
24882e06 77#include "signal-util.h"
8dd4c05b 78#include "smack-util.h"
57b7a260 79#include "socket-util.h"
fd63e712 80#include "special.h"
949befd3 81#include "stat-util.h"
8b43440b 82#include "string-table.h"
07630cea 83#include "string-util.h"
8dd4c05b 84#include "strv.h"
7ccbd1ae 85#include "syslog-util.h"
8dd4c05b 86#include "terminal-util.h"
566b7d23 87#include "umask-util.h"
8dd4c05b 88#include "unit.h"
b1d4f8e1 89#include "user-util.h"
8dd4c05b 90#include "utmp-wtmp.h"
5cb5a6ff 91
e056b01d 92#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 93#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 94
531dca78
LP
95#define SNDBUF_SIZE (8*1024*1024)
96
da6053d0 97static int shift_fds(int fds[], size_t n_fds) {
034c6ed7
LP
98 int start, restart_from;
99
100 if (n_fds <= 0)
101 return 0;
102
a0d40ac5
LP
103 /* Modifies the fds array! (sorts it) */
104
034c6ed7
LP
105 assert(fds);
106
107 start = 0;
108 for (;;) {
109 int i;
110
111 restart_from = -1;
112
113 for (i = start; i < (int) n_fds; i++) {
114 int nfd;
115
116 /* Already at right index? */
117 if (fds[i] == i+3)
118 continue;
119
3cc2aff1
LP
120 nfd = fcntl(fds[i], F_DUPFD, i + 3);
121 if (nfd < 0)
034c6ed7
LP
122 return -errno;
123
03e334a1 124 safe_close(fds[i]);
034c6ed7
LP
125 fds[i] = nfd;
126
127 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 128 * let's remember that and try again from here */
034c6ed7
LP
129 if (nfd != i+3 && restart_from < 0)
130 restart_from = i;
131 }
132
133 if (restart_from < 0)
134 break;
135
136 start = restart_from;
137 }
138
139 return 0;
140}
141
25b583d7 142static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
da6053d0 143 size_t i, n_fds;
e2c76839 144 int r;
47a71eed 145
25b583d7 146 n_fds = n_socket_fds + n_storage_fds;
47a71eed
LP
147 if (n_fds <= 0)
148 return 0;
149
150 assert(fds);
151
9b141911
FB
152 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
153 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
154
155 for (i = 0; i < n_fds; i++) {
47a71eed 156
9b141911
FB
157 if (i < n_socket_fds) {
158 r = fd_nonblock(fds[i], nonblock);
159 if (r < 0)
160 return r;
161 }
47a71eed 162
451a074f
LP
163 /* We unconditionally drop FD_CLOEXEC from the fds,
164 * since after all we want to pass these fds to our
165 * children */
47a71eed 166
3cc2aff1
LP
167 r = fd_cloexec(fds[i], false);
168 if (r < 0)
e2c76839 169 return r;
47a71eed
LP
170 }
171
172 return 0;
173}
174
1e22b5cd 175static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
176 assert(context);
177
1e22b5cd
LP
178 if (context->stdio_as_fds)
179 return NULL;
180
80876c20
LP
181 if (context->tty_path)
182 return context->tty_path;
183
184 return "/dev/console";
185}
186
1e22b5cd
LP
187static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
188 const char *path;
189
6ea832a2
LP
190 assert(context);
191
1e22b5cd 192 path = exec_context_tty_path(context);
6ea832a2 193
1e22b5cd
LP
194 if (context->tty_vhangup) {
195 if (p && p->stdin_fd >= 0)
196 (void) terminal_vhangup_fd(p->stdin_fd);
197 else if (path)
198 (void) terminal_vhangup(path);
199 }
6ea832a2 200
1e22b5cd
LP
201 if (context->tty_reset) {
202 if (p && p->stdin_fd >= 0)
203 (void) reset_terminal_fd(p->stdin_fd, true);
204 else if (path)
205 (void) reset_terminal(path);
206 }
207
208 if (context->tty_vt_disallocate && path)
209 (void) vt_disallocate(path);
6ea832a2
LP
210}
211
6af760f3
LP
212static bool is_terminal_input(ExecInput i) {
213 return IN_SET(i,
214 EXEC_INPUT_TTY,
215 EXEC_INPUT_TTY_FORCE,
216 EXEC_INPUT_TTY_FAIL);
217}
218
3a1286b6 219static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
220 return IN_SET(o,
221 EXEC_OUTPUT_TTY,
222 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
223 EXEC_OUTPUT_KMSG_AND_CONSOLE,
224 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
225}
226
aac8c0c3
LP
227static bool is_syslog_output(ExecOutput o) {
228 return IN_SET(o,
229 EXEC_OUTPUT_SYSLOG,
230 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
231}
232
233static bool is_kmsg_output(ExecOutput o) {
234 return IN_SET(o,
235 EXEC_OUTPUT_KMSG,
236 EXEC_OUTPUT_KMSG_AND_CONSOLE);
237}
238
6af760f3
LP
239static bool exec_context_needs_term(const ExecContext *c) {
240 assert(c);
241
242 /* Return true if the execution context suggests we should set $TERM to something useful. */
243
244 if (is_terminal_input(c->std_input))
245 return true;
246
247 if (is_terminal_output(c->std_output))
248 return true;
249
250 if (is_terminal_output(c->std_error))
251 return true;
252
253 return !!c->tty_path;
3a1286b6
MS
254}
255
80876c20 256static int open_null_as(int flags, int nfd) {
046a82c1 257 int fd;
071830ff 258
80876c20 259 assert(nfd >= 0);
071830ff 260
613b411c
LP
261 fd = open("/dev/null", flags|O_NOCTTY);
262 if (fd < 0)
071830ff
LP
263 return -errno;
264
046a82c1 265 return move_fd(fd, nfd, false);
071830ff
LP
266}
267
524daa8c 268static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 269 static const union sockaddr_union sa = {
b92bea5d
ZJS
270 .un.sun_family = AF_UNIX,
271 .un.sun_path = "/run/systemd/journal/stdout",
272 };
524daa8c
ZJS
273 uid_t olduid = UID_INVALID;
274 gid_t oldgid = GID_INVALID;
275 int r;
276
cad93f29 277 if (gid_is_valid(gid)) {
524daa8c
ZJS
278 oldgid = getgid();
279
92a17af9 280 if (setegid(gid) < 0)
524daa8c
ZJS
281 return -errno;
282 }
283
cad93f29 284 if (uid_is_valid(uid)) {
524daa8c
ZJS
285 olduid = getuid();
286
92a17af9 287 if (seteuid(uid) < 0) {
524daa8c
ZJS
288 r = -errno;
289 goto restore_gid;
290 }
291 }
292
92a17af9 293 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
294
295 /* If we fail to restore the uid or gid, things will likely
296 fail later on. This should only happen if an LSM interferes. */
297
cad93f29 298 if (uid_is_valid(uid))
524daa8c
ZJS
299 (void) seteuid(olduid);
300
301 restore_gid:
cad93f29 302 if (gid_is_valid(gid))
524daa8c
ZJS
303 (void) setegid(oldgid);
304
305 return r;
306}
307
fd1f9c89 308static int connect_logger_as(
34cf6c43 309 const Unit *unit,
fd1f9c89 310 const ExecContext *context,
af635cf3 311 const ExecParameters *params,
fd1f9c89
LP
312 ExecOutput output,
313 const char *ident,
fd1f9c89
LP
314 int nfd,
315 uid_t uid,
316 gid_t gid) {
317
2ac1ff68
EV
318 _cleanup_close_ int fd = -1;
319 int r;
071830ff
LP
320
321 assert(context);
af635cf3 322 assert(params);
80876c20
LP
323 assert(output < _EXEC_OUTPUT_MAX);
324 assert(ident);
325 assert(nfd >= 0);
071830ff 326
54fe0cdb
LP
327 fd = socket(AF_UNIX, SOCK_STREAM, 0);
328 if (fd < 0)
80876c20 329 return -errno;
071830ff 330
524daa8c
ZJS
331 r = connect_journal_socket(fd, uid, gid);
332 if (r < 0)
333 return r;
071830ff 334
2ac1ff68 335 if (shutdown(fd, SHUT_RD) < 0)
80876c20 336 return -errno;
071830ff 337
fd1f9c89 338 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 339
2ac1ff68 340 if (dprintf(fd,
62bca2c6 341 "%s\n"
80876c20
LP
342 "%s\n"
343 "%i\n"
54fe0cdb
LP
344 "%i\n"
345 "%i\n"
346 "%i\n"
4f4a1dbf 347 "%i\n",
c867611e 348 context->syslog_identifier ?: ident,
af635cf3 349 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
350 context->syslog_priority,
351 !!context->syslog_level_prefix,
aac8c0c3
LP
352 is_syslog_output(output),
353 is_kmsg_output(output),
2ac1ff68
EV
354 is_terminal_output(output)) < 0)
355 return -errno;
80876c20 356
2ac1ff68 357 return move_fd(TAKE_FD(fd), nfd, false);
80876c20 358}
2ac1ff68 359
3a274a21 360static int open_terminal_as(const char *path, int flags, int nfd) {
046a82c1 361 int fd;
071830ff 362
80876c20
LP
363 assert(path);
364 assert(nfd >= 0);
fd1f9c89 365
3a274a21 366 fd = open_terminal(path, flags | O_NOCTTY);
3cc2aff1 367 if (fd < 0)
80876c20 368 return fd;
071830ff 369
046a82c1 370 return move_fd(fd, nfd, false);
80876c20 371}
071830ff 372
2038c3f5 373static int acquire_path(const char *path, int flags, mode_t mode) {
15a3e96f
LP
374 union sockaddr_union sa = {};
375 _cleanup_close_ int fd = -1;
376 int r, salen;
071830ff 377
80876c20 378 assert(path);
071830ff 379
2038c3f5
LP
380 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
381 flags |= O_CREAT;
382
383 fd = open(path, flags|O_NOCTTY, mode);
384 if (fd >= 0)
15a3e96f 385 return TAKE_FD(fd);
071830ff 386
2038c3f5
LP
387 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
388 return -errno;
15a3e96f 389 if (strlen(path) >= sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
2038c3f5
LP
390 return -ENXIO;
391
392 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
393
394 fd = socket(AF_UNIX, SOCK_STREAM, 0);
395 if (fd < 0)
396 return -errno;
397
15a3e96f
LP
398 salen = sockaddr_un_set_path(&sa.un, path);
399 if (salen < 0)
400 return salen;
401
402 if (connect(fd, &sa.sa, salen) < 0)
2038c3f5
LP
403 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
404 * indication that his wasn't an AF_UNIX socket after all */
071830ff 405
2038c3f5
LP
406 if ((flags & O_ACCMODE) == O_RDONLY)
407 r = shutdown(fd, SHUT_WR);
408 else if ((flags & O_ACCMODE) == O_WRONLY)
409 r = shutdown(fd, SHUT_RD);
410 else
15a3e96f
LP
411 return TAKE_FD(fd);
412 if (r < 0)
2038c3f5 413 return -errno;
2038c3f5 414
15a3e96f 415 return TAKE_FD(fd);
80876c20 416}
071830ff 417
08f3be7a
LP
418static int fixup_input(
419 const ExecContext *context,
420 int socket_fd,
421 bool apply_tty_stdin) {
422
423 ExecInput std_input;
424
425 assert(context);
426
427 std_input = context->std_input;
1e3ad081
LP
428
429 if (is_terminal_input(std_input) && !apply_tty_stdin)
430 return EXEC_INPUT_NULL;
071830ff 431
03fd9c49 432 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
433 return EXEC_INPUT_NULL;
434
08f3be7a
LP
435 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
436 return EXEC_INPUT_NULL;
437
03fd9c49 438 return std_input;
4f2d528d
LP
439}
440
03fd9c49 441static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 442
03fd9c49 443 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
444 return EXEC_OUTPUT_INHERIT;
445
03fd9c49 446 return std_output;
4f2d528d
LP
447}
448
a34ceba6
LP
449static int setup_input(
450 const ExecContext *context,
451 const ExecParameters *params,
52c239d7 452 int socket_fd,
2caa38e9 453 const int named_iofds[static 3]) {
a34ceba6 454
4f2d528d
LP
455 ExecInput i;
456
457 assert(context);
a34ceba6 458 assert(params);
2caa38e9 459 assert(named_iofds);
a34ceba6
LP
460
461 if (params->stdin_fd >= 0) {
462 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
463 return -errno;
464
465 /* Try to make this the controlling tty, if it is a tty, and reset it */
1fb0682e
LP
466 if (isatty(STDIN_FILENO)) {
467 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
468 (void) reset_terminal_fd(STDIN_FILENO, true);
469 }
a34ceba6
LP
470
471 return STDIN_FILENO;
472 }
4f2d528d 473
08f3be7a 474 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
475
476 switch (i) {
071830ff 477
80876c20
LP
478 case EXEC_INPUT_NULL:
479 return open_null_as(O_RDONLY, STDIN_FILENO);
480
481 case EXEC_INPUT_TTY:
482 case EXEC_INPUT_TTY_FORCE:
483 case EXEC_INPUT_TTY_FAIL: {
046a82c1 484 int fd;
071830ff 485
1e22b5cd 486 fd = acquire_terminal(exec_context_tty_path(context),
8854d795
LP
487 i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
488 i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
489 ACQUIRE_TERMINAL_WAIT,
3a43da28 490 USEC_INFINITY);
970edce6 491 if (fd < 0)
80876c20
LP
492 return fd;
493
046a82c1 494 return move_fd(fd, STDIN_FILENO, false);
80876c20
LP
495 }
496
4f2d528d 497 case EXEC_INPUT_SOCKET:
e75a9ed1
LP
498 assert(socket_fd >= 0);
499
4f2d528d
LP
500 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
501
52c239d7 502 case EXEC_INPUT_NAMED_FD:
e75a9ed1
LP
503 assert(named_iofds[STDIN_FILENO] >= 0);
504
52c239d7
LB
505 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
506 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
507
08f3be7a
LP
508 case EXEC_INPUT_DATA: {
509 int fd;
510
511 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
512 if (fd < 0)
513 return fd;
514
515 return move_fd(fd, STDIN_FILENO, false);
516 }
517
2038c3f5
LP
518 case EXEC_INPUT_FILE: {
519 bool rw;
520 int fd;
521
522 assert(context->stdio_file[STDIN_FILENO]);
523
524 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
525 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
526
527 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
528 if (fd < 0)
529 return fd;
530
531 return move_fd(fd, STDIN_FILENO, false);
532 }
533
80876c20
LP
534 default:
535 assert_not_reached("Unknown input type");
536 }
537}
538
41fc585a
LP
539static bool can_inherit_stderr_from_stdout(
540 const ExecContext *context,
541 ExecOutput o,
542 ExecOutput e) {
543
544 assert(context);
545
546 /* Returns true, if given the specified STDERR and STDOUT output we can directly dup() the stdout fd to the
547 * stderr fd */
548
549 if (e == EXEC_OUTPUT_INHERIT)
550 return true;
551 if (e != o)
552 return false;
553
554 if (e == EXEC_OUTPUT_NAMED_FD)
555 return streq_ptr(context->stdio_fdname[STDOUT_FILENO], context->stdio_fdname[STDERR_FILENO]);
556
557 if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND))
558 return streq_ptr(context->stdio_file[STDOUT_FILENO], context->stdio_file[STDERR_FILENO]);
559
560 return true;
561}
562
a34ceba6 563static int setup_output(
34cf6c43 564 const Unit *unit,
a34ceba6
LP
565 const ExecContext *context,
566 const ExecParameters *params,
567 int fileno,
568 int socket_fd,
2caa38e9 569 const int named_iofds[static 3],
a34ceba6 570 const char *ident,
7bce046b
LP
571 uid_t uid,
572 gid_t gid,
573 dev_t *journal_stream_dev,
574 ino_t *journal_stream_ino) {
a34ceba6 575
4f2d528d
LP
576 ExecOutput o;
577 ExecInput i;
47c1d80d 578 int r;
4f2d528d 579
f2341e0a 580 assert(unit);
80876c20 581 assert(context);
a34ceba6 582 assert(params);
80876c20 583 assert(ident);
7bce046b
LP
584 assert(journal_stream_dev);
585 assert(journal_stream_ino);
80876c20 586
a34ceba6
LP
587 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
588
589 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
590 return -errno;
591
592 return STDOUT_FILENO;
593 }
594
595 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
596 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
597 return -errno;
598
599 return STDERR_FILENO;
600 }
601
08f3be7a 602 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 603 o = fixup_output(context->std_output, socket_fd);
4f2d528d 604
eb17e935
MS
605 if (fileno == STDERR_FILENO) {
606 ExecOutput e;
607 e = fixup_output(context->std_error, socket_fd);
80876c20 608
eb17e935
MS
609 /* This expects the input and output are already set up */
610
611 /* Don't change the stderr file descriptor if we inherit all
612 * the way and are not on a tty */
613 if (e == EXEC_OUTPUT_INHERIT &&
614 o == EXEC_OUTPUT_INHERIT &&
615 i == EXEC_INPUT_NULL &&
616 !is_terminal_input(context->std_input) &&
617 getppid () != 1)
618 return fileno;
619
620 /* Duplicate from stdout if possible */
41fc585a 621 if (can_inherit_stderr_from_stdout(context, o, e))
eb17e935 622 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 623
eb17e935 624 o = e;
80876c20 625
eb17e935 626 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
627 /* If input got downgraded, inherit the original value */
628 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 629 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 630
08f3be7a
LP
631 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
632 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
eb17e935 633 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 634
acb591e4
LP
635 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
636 if (getppid() != 1)
eb17e935 637 return fileno;
94f04347 638
eb17e935
MS
639 /* We need to open /dev/null here anew, to get the right access mode. */
640 return open_null_as(O_WRONLY, fileno);
071830ff 641 }
94f04347 642
eb17e935 643 switch (o) {
80876c20
LP
644
645 case EXEC_OUTPUT_NULL:
eb17e935 646 return open_null_as(O_WRONLY, fileno);
80876c20
LP
647
648 case EXEC_OUTPUT_TTY:
4f2d528d 649 if (is_terminal_input(i))
eb17e935 650 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
651
652 /* We don't reset the terminal if this is just about output */
1e22b5cd 653 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
654
655 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 656 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 657 case EXEC_OUTPUT_KMSG:
28dbc1e8 658 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
659 case EXEC_OUTPUT_JOURNAL:
660 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 661 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 662 if (r < 0) {
82677ae4 663 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 664 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
665 } else {
666 struct stat st;
667
668 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
669 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
670 * services to detect whether they are connected to the journal or not.
671 *
672 * If both stdout and stderr are connected to a stream then let's make sure to store the data
673 * about STDERR as that's usually the best way to do logging. */
7bce046b 674
ab2116b1
LP
675 if (fstat(fileno, &st) >= 0 &&
676 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
677 *journal_stream_dev = st.st_dev;
678 *journal_stream_ino = st.st_ino;
679 }
47c1d80d
MS
680 }
681 return r;
4f2d528d
LP
682
683 case EXEC_OUTPUT_SOCKET:
684 assert(socket_fd >= 0);
e75a9ed1 685
eb17e935 686 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 687
52c239d7 688 case EXEC_OUTPUT_NAMED_FD:
e75a9ed1
LP
689 assert(named_iofds[fileno] >= 0);
690
52c239d7
LB
691 (void) fd_nonblock(named_iofds[fileno], false);
692 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
693
566b7d23
ZD
694 case EXEC_OUTPUT_FILE:
695 case EXEC_OUTPUT_FILE_APPEND: {
2038c3f5 696 bool rw;
566b7d23 697 int fd, flags;
2038c3f5
LP
698
699 assert(context->stdio_file[fileno]);
700
701 rw = context->std_input == EXEC_INPUT_FILE &&
702 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
703
704 if (rw)
705 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
706
566b7d23
ZD
707 flags = O_WRONLY;
708 if (o == EXEC_OUTPUT_FILE_APPEND)
709 flags |= O_APPEND;
710
711 fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
2038c3f5
LP
712 if (fd < 0)
713 return fd;
714
566b7d23 715 return move_fd(fd, fileno, 0);
2038c3f5
LP
716 }
717
94f04347 718 default:
80876c20 719 assert_not_reached("Unknown error type");
94f04347 720 }
071830ff
LP
721}
722
02a51aba 723static int chown_terminal(int fd, uid_t uid) {
4b3b5bc7 724 int r;
02a51aba
LP
725
726 assert(fd >= 0);
02a51aba 727
1ff74fb6 728 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
4b3b5bc7
LP
729 if (isatty(fd) < 1) {
730 if (IN_SET(errno, EINVAL, ENOTTY))
731 return 0; /* not a tty */
1ff74fb6 732
02a51aba 733 return -errno;
4b3b5bc7 734 }
02a51aba 735
4b3b5bc7
LP
736 /* This might fail. What matters are the results. */
737 r = fchmod_and_chown(fd, TTY_MODE, uid, -1);
738 if (r < 0)
739 return r;
02a51aba 740
4b3b5bc7 741 return 1;
02a51aba
LP
742}
743
7d5ceb64 744static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
745 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
746 int r;
80876c20 747
80876c20
LP
748 assert(_saved_stdin);
749 assert(_saved_stdout);
750
af6da548
LP
751 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
752 if (saved_stdin < 0)
753 return -errno;
80876c20 754
af6da548 755 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
756 if (saved_stdout < 0)
757 return -errno;
80876c20 758
8854d795 759 fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
3d18b167
LP
760 if (fd < 0)
761 return fd;
80876c20 762
af6da548
LP
763 r = chown_terminal(fd, getuid());
764 if (r < 0)
3d18b167 765 return r;
02a51aba 766
3d18b167
LP
767 r = reset_terminal_fd(fd, true);
768 if (r < 0)
769 return r;
80876c20 770
2b33ab09 771 r = rearrange_stdio(fd, fd, STDERR_FILENO);
3d18b167 772 fd = -1;
2b33ab09
LP
773 if (r < 0)
774 return r;
80876c20
LP
775
776 *_saved_stdin = saved_stdin;
777 *_saved_stdout = saved_stdout;
778
3d18b167 779 saved_stdin = saved_stdout = -1;
80876c20 780
3d18b167 781 return 0;
80876c20
LP
782}
783
63d77c92 784static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
785 assert(err < 0);
786
787 if (err == -ETIMEDOUT)
63d77c92 788 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
789 else {
790 errno = -err;
63d77c92 791 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
792 }
793}
794
63d77c92 795static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 796 _cleanup_close_ int fd = -1;
80876c20 797
3b20f877 798 assert(vc);
80876c20 799
7d5ceb64 800 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 801 if (fd < 0)
3b20f877 802 return;
80876c20 803
63d77c92 804 write_confirm_error_fd(err, fd, u);
af6da548 805}
80876c20 806
3d18b167 807static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 808 int r = 0;
80876c20 809
af6da548
LP
810 assert(saved_stdin);
811 assert(saved_stdout);
812
813 release_terminal();
814
815 if (*saved_stdin >= 0)
80876c20 816 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 817 r = -errno;
80876c20 818
af6da548 819 if (*saved_stdout >= 0)
80876c20 820 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 821 r = -errno;
80876c20 822
3d18b167
LP
823 *saved_stdin = safe_close(*saved_stdin);
824 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
825
826 return r;
827}
828
3b20f877
FB
829enum {
830 CONFIRM_PRETEND_FAILURE = -1,
831 CONFIRM_PRETEND_SUCCESS = 0,
832 CONFIRM_EXECUTE = 1,
833};
834
eedf223a 835static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 836 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 837 _cleanup_free_ char *e = NULL;
3b20f877 838 char c;
af6da548 839
3b20f877 840 /* For any internal errors, assume a positive response. */
7d5ceb64 841 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 842 if (r < 0) {
63d77c92 843 write_confirm_error(r, vc, u);
3b20f877
FB
844 return CONFIRM_EXECUTE;
845 }
af6da548 846
b0eb2944
FB
847 /* confirm_spawn might have been disabled while we were sleeping. */
848 if (manager_is_confirm_spawn_disabled(u->manager)) {
849 r = 1;
850 goto restore_stdio;
851 }
af6da548 852
2bcd3c26
FB
853 e = ellipsize(cmdline, 60, 100);
854 if (!e) {
855 log_oom();
856 r = CONFIRM_EXECUTE;
857 goto restore_stdio;
858 }
af6da548 859
d172b175 860 for (;;) {
539622bd 861 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 862 if (r < 0) {
63d77c92 863 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
864 r = CONFIRM_EXECUTE;
865 goto restore_stdio;
866 }
af6da548 867
d172b175 868 switch (c) {
b0eb2944
FB
869 case 'c':
870 printf("Resuming normal execution.\n");
871 manager_disable_confirm_spawn();
872 r = 1;
873 break;
dd6f9ac0
FB
874 case 'D':
875 unit_dump(u, stdout, " ");
876 continue; /* ask again */
d172b175
FB
877 case 'f':
878 printf("Failing execution.\n");
879 r = CONFIRM_PRETEND_FAILURE;
880 break;
881 case 'h':
b0eb2944
FB
882 printf(" c - continue, proceed without asking anymore\n"
883 " D - dump, show the state of the unit\n"
dd6f9ac0 884 " f - fail, don't execute the command and pretend it failed\n"
d172b175 885 " h - help\n"
eedf223a 886 " i - info, show a short summary of the unit\n"
56fde33a 887 " j - jobs, show jobs that are in progress\n"
d172b175
FB
888 " s - skip, don't execute the command and pretend it succeeded\n"
889 " y - yes, execute the command\n");
dd6f9ac0 890 continue; /* ask again */
eedf223a
FB
891 case 'i':
892 printf(" Description: %s\n"
893 " Unit: %s\n"
894 " Command: %s\n",
895 u->id, u->description, cmdline);
896 continue; /* ask again */
56fde33a
FB
897 case 'j':
898 manager_dump_jobs(u->manager, stdout, " ");
899 continue; /* ask again */
539622bd
FB
900 case 'n':
901 /* 'n' was removed in favor of 'f'. */
902 printf("Didn't understand 'n', did you mean 'f'?\n");
903 continue; /* ask again */
d172b175
FB
904 case 's':
905 printf("Skipping execution.\n");
906 r = CONFIRM_PRETEND_SUCCESS;
907 break;
908 case 'y':
909 r = CONFIRM_EXECUTE;
910 break;
911 default:
912 assert_not_reached("Unhandled choice");
913 }
3b20f877 914 break;
3b20f877 915 }
af6da548 916
3b20f877 917restore_stdio:
af6da548 918 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 919 return r;
80876c20
LP
920}
921
4d885bd3
DH
922static int get_fixed_user(const ExecContext *c, const char **user,
923 uid_t *uid, gid_t *gid,
924 const char **home, const char **shell) {
81a2b7ce 925 int r;
4d885bd3 926 const char *name;
81a2b7ce 927
4d885bd3 928 assert(c);
81a2b7ce 929
23deef88
LP
930 if (!c->user)
931 return 0;
932
4d885bd3
DH
933 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
934 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 935
23deef88 936 name = c->user;
fafff8f1 937 r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN);
4d885bd3
DH
938 if (r < 0)
939 return r;
81a2b7ce 940
4d885bd3
DH
941 *user = name;
942 return 0;
943}
944
945static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
946 int r;
947 const char *name;
948
949 assert(c);
950
951 if (!c->group)
952 return 0;
953
954 name = c->group;
fafff8f1 955 r = get_group_creds(&name, gid, 0);
4d885bd3
DH
956 if (r < 0)
957 return r;
958
959 *group = name;
960 return 0;
961}
962
cdc5d5c5
DH
963static int get_supplementary_groups(const ExecContext *c, const char *user,
964 const char *group, gid_t gid,
965 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
966 char **i;
967 int r, k = 0;
968 int ngroups_max;
969 bool keep_groups = false;
970 gid_t *groups = NULL;
971 _cleanup_free_ gid_t *l_gids = NULL;
972
973 assert(c);
974
bbeea271
DH
975 /*
976 * If user is given, then lookup GID and supplementary groups list.
977 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
978 * here and as early as possible so we keep the list of supplementary
979 * groups of the caller.
bbeea271
DH
980 */
981 if (user && gid_is_valid(gid) && gid != 0) {
982 /* First step, initialize groups from /etc/groups */
983 if (initgroups(user, gid) < 0)
984 return -errno;
985
986 keep_groups = true;
987 }
988
ac6e8be6 989 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
990 return 0;
991
366ddd25
DH
992 /*
993 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
994 * be positive, otherwise fail.
995 */
996 errno = 0;
997 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
66855de7
LP
998 if (ngroups_max <= 0)
999 return errno_or_else(EOPNOTSUPP);
366ddd25 1000
4d885bd3
DH
1001 l_gids = new(gid_t, ngroups_max);
1002 if (!l_gids)
1003 return -ENOMEM;
81a2b7ce 1004
4d885bd3
DH
1005 if (keep_groups) {
1006 /*
1007 * Lookup the list of groups that the user belongs to, we
1008 * avoid NSS lookups here too for gid=0.
1009 */
1010 k = ngroups_max;
1011 if (getgrouplist(user, gid, l_gids, &k) < 0)
1012 return -EINVAL;
1013 } else
1014 k = 0;
81a2b7ce 1015
4d885bd3
DH
1016 STRV_FOREACH(i, c->supplementary_groups) {
1017 const char *g;
81a2b7ce 1018
4d885bd3
DH
1019 if (k >= ngroups_max)
1020 return -E2BIG;
81a2b7ce 1021
4d885bd3 1022 g = *i;
fafff8f1 1023 r = get_group_creds(&g, l_gids+k, 0);
4d885bd3
DH
1024 if (r < 0)
1025 return r;
81a2b7ce 1026
4d885bd3
DH
1027 k++;
1028 }
81a2b7ce 1029
4d885bd3
DH
1030 /*
1031 * Sets ngids to zero to drop all supplementary groups, happens
1032 * when we are under root and SupplementaryGroups= is empty.
1033 */
1034 if (k == 0) {
1035 *ngids = 0;
1036 return 0;
1037 }
81a2b7ce 1038
4d885bd3
DH
1039 /* Otherwise get the final list of supplementary groups */
1040 groups = memdup(l_gids, sizeof(gid_t) * k);
1041 if (!groups)
1042 return -ENOMEM;
1043
1044 *supplementary_gids = groups;
1045 *ngids = k;
1046
1047 groups = NULL;
1048
1049 return 0;
1050}
1051
34cf6c43 1052static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
4d885bd3
DH
1053 int r;
1054
709dbeac
YW
1055 /* Handle SupplementaryGroups= if it is not empty */
1056 if (ngids > 0) {
4d885bd3
DH
1057 r = maybe_setgroups(ngids, supplementary_gids);
1058 if (r < 0)
97f0e76f 1059 return r;
4d885bd3 1060 }
81a2b7ce 1061
4d885bd3
DH
1062 if (gid_is_valid(gid)) {
1063 /* Then set our gids */
1064 if (setresgid(gid, gid, gid) < 0)
1065 return -errno;
81a2b7ce
LP
1066 }
1067
1068 return 0;
1069}
1070
1071static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
1072 assert(context);
1073
4d885bd3
DH
1074 if (!uid_is_valid(uid))
1075 return 0;
1076
479050b3 1077 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
1078 * capabilities while doing so. */
1079
479050b3 1080 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
1081
1082 /* First step: If we need to keep capabilities but
1083 * drop privileges we need to make sure we keep our
cbb21cca 1084 * caps, while we drop privileges. */
693ced48 1085 if (uid != 0) {
cbb21cca 1086 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1087
1088 if (prctl(PR_GET_SECUREBITS) != sb)
1089 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1090 return -errno;
1091 }
81a2b7ce
LP
1092 }
1093
479050b3 1094 /* Second step: actually set the uids */
81a2b7ce
LP
1095 if (setresuid(uid, uid, uid) < 0)
1096 return -errno;
1097
1098 /* At this point we should have all necessary capabilities but
1099 are otherwise a normal user. However, the caps might got
1100 corrupted due to the setresuid() so we need clean them up
1101 later. This is done outside of this call. */
1102
1103 return 0;
1104}
1105
349cc4a5 1106#if HAVE_PAM
5b6319dc
LP
1107
1108static int null_conv(
1109 int num_msg,
1110 const struct pam_message **msg,
1111 struct pam_response **resp,
1112 void *appdata_ptr) {
1113
1114 /* We don't support conversations */
1115
1116 return PAM_CONV_ERR;
1117}
1118
cefc33ae
LP
1119#endif
1120
5b6319dc
LP
1121static int setup_pam(
1122 const char *name,
1123 const char *user,
940c5210 1124 uid_t uid,
2d6fce8d 1125 gid_t gid,
5b6319dc 1126 const char *tty,
2065ca69 1127 char ***env,
5b8d1f6b 1128 const int fds[], size_t n_fds) {
5b6319dc 1129
349cc4a5 1130#if HAVE_PAM
cefc33ae 1131
5b6319dc
LP
1132 static const struct pam_conv conv = {
1133 .conv = null_conv,
1134 .appdata_ptr = NULL
1135 };
1136
2d7c6aa2 1137 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1138 pam_handle_t *handle = NULL;
d6e5f3ad 1139 sigset_t old_ss;
7bb70b6e 1140 int pam_code = PAM_SUCCESS, r;
84eada2f 1141 char **nv, **e = NULL;
5b6319dc
LP
1142 bool close_session = false;
1143 pid_t pam_pid = 0, parent_pid;
970edce6 1144 int flags = 0;
5b6319dc
LP
1145
1146 assert(name);
1147 assert(user);
2065ca69 1148 assert(env);
5b6319dc
LP
1149
1150 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1151 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1152 * systemd via the cgroup logic. It will then remove the PAM
1153 * session again. The parent process will exec() the actual
1154 * daemon. We do things this way to ensure that the main PID
1155 * of the daemon is the one we initially fork()ed. */
1156
7bb70b6e
LP
1157 r = barrier_create(&barrier);
1158 if (r < 0)
2d7c6aa2
DH
1159 goto fail;
1160
553d2243 1161 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1162 flags |= PAM_SILENT;
1163
f546241b
ZJS
1164 pam_code = pam_start(name, user, &conv, &handle);
1165 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1166 handle = NULL;
1167 goto fail;
1168 }
1169
3cd24c1a
LP
1170 if (!tty) {
1171 _cleanup_free_ char *q = NULL;
1172
1173 /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
1174 * out if that's the case, and read the TTY off it. */
1175
1176 if (getttyname_malloc(STDIN_FILENO, &q) >= 0)
1177 tty = strjoina("/dev/", q);
1178 }
1179
f546241b
ZJS
1180 if (tty) {
1181 pam_code = pam_set_item(handle, PAM_TTY, tty);
1182 if (pam_code != PAM_SUCCESS)
5b6319dc 1183 goto fail;
f546241b 1184 }
5b6319dc 1185
84eada2f
JW
1186 STRV_FOREACH(nv, *env) {
1187 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1188 if (pam_code != PAM_SUCCESS)
1189 goto fail;
1190 }
1191
970edce6 1192 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1193 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1194 goto fail;
1195
3bb39ea9
DG
1196 pam_code = pam_setcred(handle, PAM_ESTABLISH_CRED | flags);
1197 if (pam_code != PAM_SUCCESS)
1198 goto fail;
1199
970edce6 1200 pam_code = pam_open_session(handle, flags);
f546241b 1201 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1202 goto fail;
1203
1204 close_session = true;
1205
f546241b
ZJS
1206 e = pam_getenvlist(handle);
1207 if (!e) {
5b6319dc
LP
1208 pam_code = PAM_BUF_ERR;
1209 goto fail;
1210 }
1211
1212 /* Block SIGTERM, so that we know that it won't get lost in
1213 * the child */
ce30c8dc 1214
72c0a2c2 1215 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1216
df0ff127 1217 parent_pid = getpid_cached();
5b6319dc 1218
4c253ed1
LP
1219 r = safe_fork("(sd-pam)", 0, &pam_pid);
1220 if (r < 0)
5b6319dc 1221 goto fail;
4c253ed1 1222 if (r == 0) {
7bb70b6e 1223 int sig, ret = EXIT_PAM;
5b6319dc
LP
1224
1225 /* The child's job is to reset the PAM session on
1226 * termination */
2d7c6aa2 1227 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc 1228
4c253ed1
LP
1229 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1230 * are open here that have been opened by PAM. */
1231 (void) close_many(fds, n_fds);
5b6319dc 1232
940c5210
AK
1233 /* Drop privileges - we don't need any to pam_close_session
1234 * and this will make PR_SET_PDEATHSIG work in most cases.
1235 * If this fails, ignore the error - but expect sd-pam threads
1236 * to fail to exit normally */
2d6fce8d 1237
97f0e76f
LP
1238 r = maybe_setgroups(0, NULL);
1239 if (r < 0)
1240 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1241 if (setresgid(gid, gid, gid) < 0)
1242 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1243 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1244 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1245
ce30c8dc
LP
1246 (void) ignore_signals(SIGPIPE, -1);
1247
940c5210
AK
1248 /* Wait until our parent died. This will only work if
1249 * the above setresuid() succeeds, otherwise the kernel
1250 * will not allow unprivileged parents kill their privileged
1251 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1252 * to do the rest for us. */
1253 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1254 goto child_finish;
1255
2d7c6aa2
DH
1256 /* Tell the parent that our setup is done. This is especially
1257 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1258 * setup might race against our setresuid(2) call.
1259 *
1260 * If the parent aborted, we'll detect this below, hence ignore
1261 * return failure here. */
1262 (void) barrier_place(&barrier);
2d7c6aa2 1263
643f4706 1264 /* Check if our parent process might already have died? */
5b6319dc 1265 if (getppid() == parent_pid) {
d6e5f3ad
DM
1266 sigset_t ss;
1267
1268 assert_se(sigemptyset(&ss) >= 0);
1269 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1270
3dead8d9
LP
1271 for (;;) {
1272 if (sigwait(&ss, &sig) < 0) {
1273 if (errno == EINTR)
1274 continue;
1275
1276 goto child_finish;
1277 }
5b6319dc 1278
3dead8d9
LP
1279 assert(sig == SIGTERM);
1280 break;
1281 }
5b6319dc
LP
1282 }
1283
3bb39ea9
DG
1284 pam_code = pam_setcred(handle, PAM_DELETE_CRED | flags);
1285 if (pam_code != PAM_SUCCESS)
1286 goto child_finish;
1287
3dead8d9 1288 /* If our parent died we'll end the session */
f546241b 1289 if (getppid() != parent_pid) {
970edce6 1290 pam_code = pam_close_session(handle, flags);
f546241b 1291 if (pam_code != PAM_SUCCESS)
5b6319dc 1292 goto child_finish;
f546241b 1293 }
5b6319dc 1294
7bb70b6e 1295 ret = 0;
5b6319dc
LP
1296
1297 child_finish:
970edce6 1298 pam_end(handle, pam_code | flags);
7bb70b6e 1299 _exit(ret);
5b6319dc
LP
1300 }
1301
2d7c6aa2
DH
1302 barrier_set_role(&barrier, BARRIER_PARENT);
1303
5b6319dc
LP
1304 /* If the child was forked off successfully it will do all the
1305 * cleanups, so forget about the handle here. */
1306 handle = NULL;
1307
3b8bddde 1308 /* Unblock SIGTERM again in the parent */
72c0a2c2 1309 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1310
1311 /* We close the log explicitly here, since the PAM modules
1312 * might have opened it, but we don't want this fd around. */
1313 closelog();
1314
2d7c6aa2
DH
1315 /* Synchronously wait for the child to initialize. We don't care for
1316 * errors as we cannot recover. However, warn loudly if it happens. */
1317 if (!barrier_place_and_sync(&barrier))
1318 log_error("PAM initialization failed");
1319
130d3d22 1320 return strv_free_and_replace(*env, e);
5b6319dc
LP
1321
1322fail:
970edce6
ZJS
1323 if (pam_code != PAM_SUCCESS) {
1324 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1325 r = -EPERM; /* PAM errors do not map to errno */
1326 } else
1327 log_error_errno(r, "PAM failed: %m");
9ba35398 1328
5b6319dc
LP
1329 if (handle) {
1330 if (close_session)
970edce6 1331 pam_code = pam_close_session(handle, flags);
5b6319dc 1332
970edce6 1333 pam_end(handle, pam_code | flags);
5b6319dc
LP
1334 }
1335
1336 strv_free(e);
5b6319dc
LP
1337 closelog();
1338
7bb70b6e 1339 return r;
cefc33ae
LP
1340#else
1341 return 0;
5b6319dc 1342#endif
cefc33ae 1343}
5b6319dc 1344
5d6b1584
LP
1345static void rename_process_from_path(const char *path) {
1346 char process_name[11];
1347 const char *p;
1348 size_t l;
1349
1350 /* This resulting string must fit in 10 chars (i.e. the length
1351 * of "/sbin/init") to look pretty in /bin/ps */
1352
2b6bf07d 1353 p = basename(path);
5d6b1584
LP
1354 if (isempty(p)) {
1355 rename_process("(...)");
1356 return;
1357 }
1358
1359 l = strlen(p);
1360 if (l > 8) {
1361 /* The end of the process name is usually more
1362 * interesting, since the first bit might just be
1363 * "systemd-" */
1364 p = p + l - 8;
1365 l = 8;
1366 }
1367
1368 process_name[0] = '(';
1369 memcpy(process_name+1, p, l);
1370 process_name[1+l] = ')';
1371 process_name[1+l+1] = 0;
1372
1373 rename_process(process_name);
1374}
1375
469830d1
LP
1376static bool context_has_address_families(const ExecContext *c) {
1377 assert(c);
1378
1379 return c->address_families_whitelist ||
1380 !set_isempty(c->address_families);
1381}
1382
1383static bool context_has_syscall_filters(const ExecContext *c) {
1384 assert(c);
1385
1386 return c->syscall_whitelist ||
8cfa775f 1387 !hashmap_isempty(c->syscall_filter);
469830d1
LP
1388}
1389
1390static bool context_has_no_new_privileges(const ExecContext *c) {
1391 assert(c);
1392
1393 if (c->no_new_privileges)
1394 return true;
1395
1396 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1397 return false;
1398
1399 /* We need NNP if we have any form of seccomp and are unprivileged */
1400 return context_has_address_families(c) ||
1401 c->memory_deny_write_execute ||
1402 c->restrict_realtime ||
f69567cb 1403 c->restrict_suid_sgid ||
469830d1
LP
1404 exec_context_restrict_namespaces_set(c) ||
1405 c->protect_kernel_tunables ||
1406 c->protect_kernel_modules ||
84703040 1407 c->protect_kernel_logs ||
469830d1
LP
1408 c->private_devices ||
1409 context_has_syscall_filters(c) ||
78e864e5 1410 !set_isempty(c->syscall_archs) ||
aecd5ac6
TM
1411 c->lock_personality ||
1412 c->protect_hostname;
469830d1
LP
1413}
1414
349cc4a5 1415#if HAVE_SECCOMP
17df7223 1416
83f12b27 1417static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1418
1419 if (is_seccomp_available())
1420 return false;
1421
f673b62d 1422 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1423 return true;
83f12b27
FS
1424}
1425
165a31c0 1426static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1427 uint32_t negative_action, default_action, action;
165a31c0 1428 int r;
8351ceae 1429
469830d1 1430 assert(u);
c0467cf3 1431 assert(c);
8351ceae 1432
469830d1 1433 if (!context_has_syscall_filters(c))
83f12b27
FS
1434 return 0;
1435
469830d1
LP
1436 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1437 return 0;
e9642be2 1438
ccc16c78 1439 negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1440
469830d1
LP
1441 if (c->syscall_whitelist) {
1442 default_action = negative_action;
1443 action = SCMP_ACT_ALLOW;
7c66bae2 1444 } else {
469830d1
LP
1445 default_action = SCMP_ACT_ALLOW;
1446 action = negative_action;
57183d11 1447 }
8351ceae 1448
165a31c0
LP
1449 if (needs_ambient_hack) {
1450 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1451 if (r < 0)
1452 return r;
1453 }
1454
b54f36c6 1455 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
4298d0b5
LP
1456}
1457
469830d1
LP
1458static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1459 assert(u);
4298d0b5
LP
1460 assert(c);
1461
469830d1 1462 if (set_isempty(c->syscall_archs))
83f12b27
FS
1463 return 0;
1464
469830d1
LP
1465 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1466 return 0;
4298d0b5 1467
469830d1
LP
1468 return seccomp_restrict_archs(c->syscall_archs);
1469}
4298d0b5 1470
469830d1
LP
1471static int apply_address_families(const Unit* u, const ExecContext *c) {
1472 assert(u);
1473 assert(c);
4298d0b5 1474
469830d1
LP
1475 if (!context_has_address_families(c))
1476 return 0;
4298d0b5 1477
469830d1
LP
1478 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1479 return 0;
4298d0b5 1480
469830d1 1481 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1482}
4298d0b5 1483
83f12b27 1484static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1485 assert(u);
f3e43635
TM
1486 assert(c);
1487
469830d1 1488 if (!c->memory_deny_write_execute)
83f12b27
FS
1489 return 0;
1490
469830d1
LP
1491 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1492 return 0;
f3e43635 1493
469830d1 1494 return seccomp_memory_deny_write_execute();
f3e43635
TM
1495}
1496
83f12b27 1497static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1498 assert(u);
f4170c67
LP
1499 assert(c);
1500
469830d1 1501 if (!c->restrict_realtime)
83f12b27
FS
1502 return 0;
1503
469830d1
LP
1504 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1505 return 0;
f4170c67 1506
469830d1 1507 return seccomp_restrict_realtime();
f4170c67
LP
1508}
1509
f69567cb
LP
1510static int apply_restrict_suid_sgid(const Unit* u, const ExecContext *c) {
1511 assert(u);
1512 assert(c);
1513
1514 if (!c->restrict_suid_sgid)
1515 return 0;
1516
1517 if (skip_seccomp_unavailable(u, "RestrictSUIDSGID="))
1518 return 0;
1519
1520 return seccomp_restrict_suid_sgid();
1521}
1522
59e856c7 1523static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1524 assert(u);
59eeb84b
LP
1525 assert(c);
1526
1527 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1528 * let's protect even those systems where this is left on in the kernel. */
1529
469830d1 1530 if (!c->protect_kernel_tunables)
59eeb84b
LP
1531 return 0;
1532
469830d1
LP
1533 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1534 return 0;
59eeb84b 1535
469830d1 1536 return seccomp_protect_sysctl();
59eeb84b
LP
1537}
1538
59e856c7 1539static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1540 assert(u);
502d704e
DH
1541 assert(c);
1542
25a8d8a0 1543 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1544
469830d1
LP
1545 if (!c->protect_kernel_modules)
1546 return 0;
1547
502d704e
DH
1548 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1549 return 0;
1550
b54f36c6 1551 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false);
502d704e
DH
1552}
1553
84703040
KK
1554static int apply_protect_kernel_logs(const Unit *u, const ExecContext *c) {
1555 assert(u);
1556 assert(c);
1557
1558 if (!c->protect_kernel_logs)
1559 return 0;
1560
1561 if (skip_seccomp_unavailable(u, "ProtectKernelLogs="))
1562 return 0;
1563
1564 return seccomp_protect_syslog();
1565}
1566
59e856c7 1567static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1568 assert(u);
ba128bb8
LP
1569 assert(c);
1570
8f81a5f6 1571 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1572
469830d1
LP
1573 if (!c->private_devices)
1574 return 0;
1575
ba128bb8
LP
1576 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1577 return 0;
1578
b54f36c6 1579 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false);
ba128bb8
LP
1580}
1581
34cf6c43 1582static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
469830d1 1583 assert(u);
add00535
LP
1584 assert(c);
1585
1586 if (!exec_context_restrict_namespaces_set(c))
1587 return 0;
1588
1589 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1590 return 0;
1591
1592 return seccomp_restrict_namespaces(c->restrict_namespaces);
1593}
1594
78e864e5 1595static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1596 unsigned long personality;
1597 int r;
78e864e5
TM
1598
1599 assert(u);
1600 assert(c);
1601
1602 if (!c->lock_personality)
1603 return 0;
1604
1605 if (skip_seccomp_unavailable(u, "LockPersonality="))
1606 return 0;
1607
e8132d63
LP
1608 personality = c->personality;
1609
1610 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1611 if (personality == PERSONALITY_INVALID) {
1612
1613 r = opinionated_personality(&personality);
1614 if (r < 0)
1615 return r;
1616 }
78e864e5
TM
1617
1618 return seccomp_lock_personality(personality);
1619}
1620
c0467cf3 1621#endif
8351ceae 1622
3042bbeb 1623static void do_idle_pipe_dance(int idle_pipe[static 4]) {
31a7eb86
ZJS
1624 assert(idle_pipe);
1625
54eb2300
LP
1626 idle_pipe[1] = safe_close(idle_pipe[1]);
1627 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1628
1629 if (idle_pipe[0] >= 0) {
1630 int r;
1631
1632 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1633
1634 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1635 ssize_t n;
1636
31a7eb86 1637 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1638 n = write(idle_pipe[3], "x", 1);
1639 if (n > 0)
cd972d69 1640 /* Wait for systemd to react to the signal above. */
54756dce 1641 (void) fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1642 }
1643
54eb2300 1644 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1645
1646 }
1647
54eb2300 1648 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1649}
1650
fb2042dd
YW
1651static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
1652
7cae38c4 1653static int build_environment(
34cf6c43 1654 const Unit *u,
9fa95f85 1655 const ExecContext *c,
1e22b5cd 1656 const ExecParameters *p,
da6053d0 1657 size_t n_fds,
7cae38c4
LP
1658 const char *home,
1659 const char *username,
1660 const char *shell,
7bce046b
LP
1661 dev_t journal_stream_dev,
1662 ino_t journal_stream_ino,
7cae38c4
LP
1663 char ***ret) {
1664
1665 _cleanup_strv_free_ char **our_env = NULL;
fb2042dd 1666 ExecDirectoryType t;
da6053d0 1667 size_t n_env = 0;
7cae38c4
LP
1668 char *x;
1669
4b58153d 1670 assert(u);
7cae38c4 1671 assert(c);
7c1cb6f1 1672 assert(p);
7cae38c4
LP
1673 assert(ret);
1674
fb2042dd 1675 our_env = new0(char*, 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4
LP
1676 if (!our_env)
1677 return -ENOMEM;
1678
1679 if (n_fds > 0) {
8dd4c05b
LP
1680 _cleanup_free_ char *joined = NULL;
1681
df0ff127 1682 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1683 return -ENOMEM;
1684 our_env[n_env++] = x;
1685
da6053d0 1686 if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
7cae38c4
LP
1687 return -ENOMEM;
1688 our_env[n_env++] = x;
8dd4c05b 1689
1e22b5cd 1690 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1691 if (!joined)
1692 return -ENOMEM;
1693
605405c6 1694 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1695 if (!x)
1696 return -ENOMEM;
1697 our_env[n_env++] = x;
7cae38c4
LP
1698 }
1699
b08af3b1 1700 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1701 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1702 return -ENOMEM;
1703 our_env[n_env++] = x;
1704
1e22b5cd 1705 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1706 return -ENOMEM;
1707 our_env[n_env++] = x;
1708 }
1709
fd63e712
LP
1710 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1711 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1712 * check the database directly. */
ac647978 1713 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1714 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1715 if (!x)
1716 return -ENOMEM;
1717 our_env[n_env++] = x;
1718 }
1719
7cae38c4 1720 if (home) {
b910cc72 1721 x = strjoin("HOME=", home);
7cae38c4
LP
1722 if (!x)
1723 return -ENOMEM;
7bbead1d
LP
1724
1725 path_simplify(x + 5, true);
7cae38c4
LP
1726 our_env[n_env++] = x;
1727 }
1728
1729 if (username) {
b910cc72 1730 x = strjoin("LOGNAME=", username);
7cae38c4
LP
1731 if (!x)
1732 return -ENOMEM;
1733 our_env[n_env++] = x;
1734
b910cc72 1735 x = strjoin("USER=", username);
7cae38c4
LP
1736 if (!x)
1737 return -ENOMEM;
1738 our_env[n_env++] = x;
1739 }
1740
1741 if (shell) {
b910cc72 1742 x = strjoin("SHELL=", shell);
7cae38c4
LP
1743 if (!x)
1744 return -ENOMEM;
7bbead1d
LP
1745
1746 path_simplify(x + 6, true);
7cae38c4
LP
1747 our_env[n_env++] = x;
1748 }
1749
4b58153d
LP
1750 if (!sd_id128_is_null(u->invocation_id)) {
1751 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1752 return -ENOMEM;
1753
1754 our_env[n_env++] = x;
1755 }
1756
6af760f3
LP
1757 if (exec_context_needs_term(c)) {
1758 const char *tty_path, *term = NULL;
1759
1760 tty_path = exec_context_tty_path(c);
1761
1762 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1763 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1764 * passes to PID 1 ends up all the way in the console login shown. */
1765
1766 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1767 term = getenv("TERM");
1768 if (!term)
1769 term = default_term_for_tty(tty_path);
7cae38c4 1770
b910cc72 1771 x = strjoin("TERM=", term);
7cae38c4
LP
1772 if (!x)
1773 return -ENOMEM;
1774 our_env[n_env++] = x;
1775 }
1776
7bce046b
LP
1777 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1778 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1779 return -ENOMEM;
1780
1781 our_env[n_env++] = x;
1782 }
1783
fb2042dd
YW
1784 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1785 _cleanup_free_ char *pre = NULL, *joined = NULL;
1786 const char *n;
1787
1788 if (!p->prefix[t])
1789 continue;
1790
1791 if (strv_isempty(c->directories[t].paths))
1792 continue;
1793
1794 n = exec_directory_env_name_to_string(t);
1795 if (!n)
1796 continue;
1797
1798 pre = strjoin(p->prefix[t], "/");
1799 if (!pre)
1800 return -ENOMEM;
1801
1802 joined = strv_join_prefix(c->directories[t].paths, ":", pre);
1803 if (!joined)
1804 return -ENOMEM;
1805
1806 x = strjoin(n, "=", joined);
1807 if (!x)
1808 return -ENOMEM;
1809
1810 our_env[n_env++] = x;
1811 }
1812
7cae38c4 1813 our_env[n_env++] = NULL;
fb2042dd 1814 assert(n_env <= 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4 1815
ae2a15bc 1816 *ret = TAKE_PTR(our_env);
7cae38c4
LP
1817
1818 return 0;
1819}
1820
b4c14404
FB
1821static int build_pass_environment(const ExecContext *c, char ***ret) {
1822 _cleanup_strv_free_ char **pass_env = NULL;
1823 size_t n_env = 0, n_bufsize = 0;
1824 char **i;
1825
1826 STRV_FOREACH(i, c->pass_environment) {
1827 _cleanup_free_ char *x = NULL;
1828 char *v;
1829
1830 v = getenv(*i);
1831 if (!v)
1832 continue;
605405c6 1833 x = strjoin(*i, "=", v);
b4c14404
FB
1834 if (!x)
1835 return -ENOMEM;
00819cc1 1836
b4c14404
FB
1837 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1838 return -ENOMEM;
00819cc1 1839
1cc6c93a 1840 pass_env[n_env++] = TAKE_PTR(x);
b4c14404 1841 pass_env[n_env] = NULL;
b4c14404
FB
1842 }
1843
ae2a15bc 1844 *ret = TAKE_PTR(pass_env);
b4c14404
FB
1845
1846 return 0;
1847}
1848
8b44a3d2
LP
1849static bool exec_needs_mount_namespace(
1850 const ExecContext *context,
1851 const ExecParameters *params,
4657abb5 1852 const ExecRuntime *runtime) {
8b44a3d2
LP
1853
1854 assert(context);
1855 assert(params);
1856
915e6d16
LP
1857 if (context->root_image)
1858 return true;
1859
2a624c36
AP
1860 if (!strv_isempty(context->read_write_paths) ||
1861 !strv_isempty(context->read_only_paths) ||
1862 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1863 return true;
1864
42b1d8e0 1865 if (context->n_bind_mounts > 0)
d2d6c096
LP
1866 return true;
1867
2abd4e38
YW
1868 if (context->n_temporary_filesystems > 0)
1869 return true;
1870
37ed15d7 1871 if (!IN_SET(context->mount_flags, 0, MS_SHARED))
8b44a3d2
LP
1872 return true;
1873
1874 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1875 return true;
1876
8b44a3d2 1877 if (context->private_devices ||
228af36f 1878 context->private_mounts ||
8b44a3d2 1879 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1880 context->protect_home != PROTECT_HOME_NO ||
1881 context->protect_kernel_tunables ||
c575770b 1882 context->protect_kernel_modules ||
94a7b275 1883 context->protect_kernel_logs ||
59eeb84b 1884 context->protect_control_groups)
8b44a3d2
LP
1885 return true;
1886
37c56f89
YW
1887 if (context->root_directory) {
1888 ExecDirectoryType t;
1889
1890 if (context->mount_apivfs)
1891 return true;
1892
1893 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1894 if (!params->prefix[t])
1895 continue;
1896
1897 if (!strv_isempty(context->directories[t].paths))
1898 return true;
1899 }
1900 }
5d997827 1901
42b1d8e0 1902 if (context->dynamic_user &&
b43ee82f 1903 (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
42b1d8e0
YW
1904 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1905 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1906 return true;
1907
8b44a3d2
LP
1908 return false;
1909}
1910
5749f855 1911static int setup_private_users(uid_t ouid, gid_t ogid, uid_t uid, gid_t gid) {
d251207d
LP
1912 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1913 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1914 _cleanup_close_ int unshare_ready_fd = -1;
1915 _cleanup_(sigkill_waitp) pid_t pid = 0;
1916 uint64_t c = 1;
d251207d
LP
1917 ssize_t n;
1918 int r;
1919
5749f855
AZ
1920 /* Set up a user namespace and map the original UID/GID (IDs from before any user or group changes, i.e.
1921 * the IDs from the user or system manager(s)) to itself, the selected UID/GID to itself, and everything else to
d251207d
LP
1922 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1923 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1924 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1925 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
5749f855
AZ
1926 * continues execution normally.
1927 * For unprivileged users (i.e. without capabilities), the root to root mapping is excluded. As such, it
1928 * does not need CAP_SETUID to write the single line mapping to itself. */
d251207d 1929
5749f855
AZ
1930 /* Can only set up multiple mappings with CAP_SETUID. */
1931 if (have_effective_cap(CAP_SETUID) && uid != ouid && uid_is_valid(uid))
587ab01b 1932 r = asprintf(&uid_map,
5749f855 1933 UID_FMT " " UID_FMT " 1\n" /* Map $OUID → $OUID */
587ab01b 1934 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
5749f855
AZ
1935 ouid, ouid, uid, uid);
1936 else
1937 r = asprintf(&uid_map,
1938 UID_FMT " " UID_FMT " 1\n", /* Map $OUID → $OUID */
1939 ouid, ouid);
d251207d 1940
5749f855
AZ
1941 if (r < 0)
1942 return -ENOMEM;
1943
1944 /* Can only set up multiple mappings with CAP_SETGID. */
1945 if (have_effective_cap(CAP_SETGID) && gid != ogid && gid_is_valid(gid))
587ab01b 1946 r = asprintf(&gid_map,
5749f855 1947 GID_FMT " " GID_FMT " 1\n" /* Map $OGID → $OGID */
587ab01b 1948 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
5749f855
AZ
1949 ogid, ogid, gid, gid);
1950 else
1951 r = asprintf(&gid_map,
1952 GID_FMT " " GID_FMT " 1\n", /* Map $OGID -> $OGID */
1953 ogid, ogid);
1954
1955 if (r < 0)
1956 return -ENOMEM;
d251207d
LP
1957
1958 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1959 * namespace. */
1960 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1961 if (unshare_ready_fd < 0)
1962 return -errno;
1963
1964 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1965 * failed. */
1966 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1967 return -errno;
1968
4c253ed1
LP
1969 r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
1970 if (r < 0)
1971 return r;
1972 if (r == 0) {
d251207d
LP
1973 _cleanup_close_ int fd = -1;
1974 const char *a;
1975 pid_t ppid;
1976
1977 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1978 * here, after the parent opened its own user namespace. */
1979
1980 ppid = getppid();
1981 errno_pipe[0] = safe_close(errno_pipe[0]);
1982
1983 /* Wait until the parent unshared the user namespace */
1984 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1985 r = -errno;
1986 goto child_fail;
1987 }
1988
1989 /* Disable the setgroups() system call in the child user namespace, for good. */
1990 a = procfs_file_alloca(ppid, "setgroups");
1991 fd = open(a, O_WRONLY|O_CLOEXEC);
1992 if (fd < 0) {
1993 if (errno != ENOENT) {
1994 r = -errno;
1995 goto child_fail;
1996 }
1997
1998 /* If the file is missing the kernel is too old, let's continue anyway. */
1999 } else {
2000 if (write(fd, "deny\n", 5) < 0) {
2001 r = -errno;
2002 goto child_fail;
2003 }
2004
2005 fd = safe_close(fd);
2006 }
2007
2008 /* First write the GID map */
2009 a = procfs_file_alloca(ppid, "gid_map");
2010 fd = open(a, O_WRONLY|O_CLOEXEC);
2011 if (fd < 0) {
2012 r = -errno;
2013 goto child_fail;
2014 }
2015 if (write(fd, gid_map, strlen(gid_map)) < 0) {
2016 r = -errno;
2017 goto child_fail;
2018 }
2019 fd = safe_close(fd);
2020
2021 /* The write the UID map */
2022 a = procfs_file_alloca(ppid, "uid_map");
2023 fd = open(a, O_WRONLY|O_CLOEXEC);
2024 if (fd < 0) {
2025 r = -errno;
2026 goto child_fail;
2027 }
2028 if (write(fd, uid_map, strlen(uid_map)) < 0) {
2029 r = -errno;
2030 goto child_fail;
2031 }
2032
2033 _exit(EXIT_SUCCESS);
2034
2035 child_fail:
2036 (void) write(errno_pipe[1], &r, sizeof(r));
2037 _exit(EXIT_FAILURE);
2038 }
2039
2040 errno_pipe[1] = safe_close(errno_pipe[1]);
2041
2042 if (unshare(CLONE_NEWUSER) < 0)
2043 return -errno;
2044
2045 /* Let the child know that the namespace is ready now */
2046 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
2047 return -errno;
2048
2049 /* Try to read an error code from the child */
2050 n = read(errno_pipe[0], &r, sizeof(r));
2051 if (n < 0)
2052 return -errno;
2053 if (n == sizeof(r)) { /* an error code was sent to us */
2054 if (r < 0)
2055 return r;
2056 return -EIO;
2057 }
2058 if (n != 0) /* on success we should have read 0 bytes */
2059 return -EIO;
2060
2e87a1fd
LP
2061 r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
2062 pid = 0;
d251207d
LP
2063 if (r < 0)
2064 return r;
2e87a1fd 2065 if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
d251207d
LP
2066 return -EIO;
2067
2068 return 0;
2069}
2070
494d0247
YW
2071static bool exec_directory_is_private(const ExecContext *context, ExecDirectoryType type) {
2072 if (!context->dynamic_user)
2073 return false;
2074
2075 if (type == EXEC_DIRECTORY_CONFIGURATION)
2076 return false;
2077
2078 if (type == EXEC_DIRECTORY_RUNTIME && context->runtime_directory_preserve_mode == EXEC_PRESERVE_NO)
2079 return false;
2080
2081 return true;
2082}
2083
3536f49e 2084static int setup_exec_directory(
07689d5d
LP
2085 const ExecContext *context,
2086 const ExecParameters *params,
2087 uid_t uid,
3536f49e 2088 gid_t gid,
3536f49e
YW
2089 ExecDirectoryType type,
2090 int *exit_status) {
07689d5d 2091
72fd1768 2092 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
2093 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
2094 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
2095 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
2096 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
2097 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
2098 };
07689d5d
LP
2099 char **rt;
2100 int r;
2101
2102 assert(context);
2103 assert(params);
72fd1768 2104 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 2105 assert(exit_status);
07689d5d 2106
3536f49e
YW
2107 if (!params->prefix[type])
2108 return 0;
2109
8679efde 2110 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
2111 if (!uid_is_valid(uid))
2112 uid = 0;
2113 if (!gid_is_valid(gid))
2114 gid = 0;
2115 }
2116
2117 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d 2118 _cleanup_free_ char *p = NULL, *pp = NULL;
07689d5d 2119
edbfeb12 2120 p = path_join(params->prefix[type], *rt);
3536f49e
YW
2121 if (!p) {
2122 r = -ENOMEM;
2123 goto fail;
2124 }
07689d5d 2125
23a7448e
YW
2126 r = mkdir_parents_label(p, 0755);
2127 if (r < 0)
3536f49e 2128 goto fail;
23a7448e 2129
494d0247 2130 if (exec_directory_is_private(context, type)) {
6c9c51e5 2131 _cleanup_free_ char *private_root = NULL;
6c47cd7d 2132
3f5b1508
LP
2133 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that
2134 * case we want to avoid leaving a directory around fully accessible that is owned by
2135 * a dynamic user whose UID is later on reused. To lock this down we use the same
2136 * trick used by container managers to prohibit host users to get access to files of
2137 * the same UID in containers: we place everything inside a directory that has an
2138 * access mode of 0700 and is owned root:root, so that it acts as security boundary
2139 * for unprivileged host code. We then use fs namespacing to make this directory
2140 * permeable for the service itself.
6c47cd7d 2141 *
3f5b1508
LP
2142 * Specifically: for a service which wants a special directory "foo/" we first create
2143 * a directory "private/" with access mode 0700 owned by root:root. Then we place
2144 * "foo" inside of that directory (i.e. "private/foo/"), and make "foo" a symlink to
2145 * "private/foo". This way, privileged host users can access "foo/" as usual, but
2146 * unprivileged host users can't look into it. Inside of the namespace of the unit
2147 * "private/" is replaced by a more liberally accessible tmpfs, into which the host's
2148 * "private/foo/" is mounted under the same name, thus disabling the access boundary
2149 * for the service and making sure it only gets access to the dirs it needs but no
2150 * others. Tricky? Yes, absolutely, but it works!
6c47cd7d 2151 *
3f5b1508
LP
2152 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not
2153 * to be owned by the service itself.
2154 *
2155 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used
2156 * for sharing files or sockets with other services. */
6c47cd7d 2157
edbfeb12 2158 private_root = path_join(params->prefix[type], "private");
6c47cd7d
LP
2159 if (!private_root) {
2160 r = -ENOMEM;
2161 goto fail;
2162 }
2163
2164 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
37c1d5e9 2165 r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
6c47cd7d
LP
2166 if (r < 0)
2167 goto fail;
2168
edbfeb12 2169 pp = path_join(private_root, *rt);
6c47cd7d
LP
2170 if (!pp) {
2171 r = -ENOMEM;
2172 goto fail;
2173 }
2174
2175 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2176 r = mkdir_parents_label(pp, 0755);
2177 if (r < 0)
2178 goto fail;
2179
949befd3
LP
2180 if (is_dir(p, false) > 0 &&
2181 (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
2182
2183 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2184 * it over. Most likely the service has been upgraded from one that didn't use
2185 * DynamicUser=1, to one that does. */
2186
cf52c45d
LP
2187 log_info("Found pre-existing public %s= directory %s, migrating to %s.\n"
2188 "Apparently, service previously had DynamicUser= turned off, and has now turned it on.",
2189 exec_directory_type_to_string(type), p, pp);
2190
949befd3
LP
2191 if (rename(p, pp) < 0) {
2192 r = -errno;
2193 goto fail;
2194 }
2195 } else {
2196 /* Otherwise, create the actual directory for the service */
2197
2198 r = mkdir_label(pp, context->directories[type].mode);
2199 if (r < 0 && r != -EEXIST)
2200 goto fail;
2201 }
6c47cd7d 2202
6c47cd7d 2203 /* And link it up from the original place */
6c9c51e5 2204 r = symlink_idempotent(pp, p, true);
6c47cd7d
LP
2205 if (r < 0)
2206 goto fail;
2207
6c47cd7d 2208 } else {
5c6d40d1
LP
2209 _cleanup_free_ char *target = NULL;
2210
2211 if (type != EXEC_DIRECTORY_CONFIGURATION &&
2212 readlink_and_make_absolute(p, &target) >= 0) {
2213 _cleanup_free_ char *q = NULL;
2214
2215 /* This already exists and is a symlink? Interesting. Maybe it's one created
2193f17c
LP
2216 * by DynamicUser=1 (see above)?
2217 *
2218 * We do this for all directory types except for ConfigurationDirectory=,
2219 * since they all support the private/ symlink logic at least in some
2220 * configurations, see above. */
5c6d40d1
LP
2221
2222 q = path_join(params->prefix[type], "private", *rt);
2223 if (!q) {
2224 r = -ENOMEM;
2225 goto fail;
2226 }
2227
2228 if (path_equal(q, target)) {
2229
2230 /* Hmm, apparently DynamicUser= was once turned on for this service,
2231 * but is no longer. Let's move the directory back up. */
2232
cf52c45d
LP
2233 log_info("Found pre-existing private %s= directory %s, migrating to %s.\n"
2234 "Apparently, service previously had DynamicUser= turned on, and has now turned it off.",
2235 exec_directory_type_to_string(type), q, p);
2236
5c6d40d1
LP
2237 if (unlink(p) < 0) {
2238 r = -errno;
2239 goto fail;
2240 }
2241
2242 if (rename(q, p) < 0) {
2243 r = -errno;
2244 goto fail;
2245 }
2246 }
2247 }
2248
6c47cd7d 2249 r = mkdir_label(p, context->directories[type].mode);
d484580c 2250 if (r < 0) {
d484580c
LP
2251 if (r != -EEXIST)
2252 goto fail;
2253
206e9864
LP
2254 if (type == EXEC_DIRECTORY_CONFIGURATION) {
2255 struct stat st;
2256
2257 /* Don't change the owner/access mode of the configuration directory,
2258 * as in the common case it is not written to by a service, and shall
2259 * not be writable. */
2260
2261 if (stat(p, &st) < 0) {
2262 r = -errno;
2263 goto fail;
2264 }
2265
2266 /* Still complain if the access mode doesn't match */
2267 if (((st.st_mode ^ context->directories[type].mode) & 07777) != 0)
2268 log_warning("%s \'%s\' already exists but the mode is different. "
2269 "(File system: %o %sMode: %o)",
2270 exec_directory_type_to_string(type), *rt,
2271 st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
2272
6cff72eb 2273 continue;
206e9864 2274 }
6cff72eb 2275 }
a1164ae3 2276 }
07689d5d 2277
206e9864 2278 /* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
5238e957 2279 * specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
206e9864
LP
2280 * current UID/GID ownership.) */
2281 r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
2282 if (r < 0)
2283 goto fail;
c71b2eb7 2284
607b358e
LP
2285 /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
2286 * drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
2287 * assignments to exist.*/
2288 r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777);
07689d5d 2289 if (r < 0)
3536f49e 2290 goto fail;
07689d5d
LP
2291 }
2292
2293 return 0;
3536f49e
YW
2294
2295fail:
2296 *exit_status = exit_status_table[type];
3536f49e 2297 return r;
07689d5d
LP
2298}
2299
92b423b9 2300#if ENABLE_SMACK
cefc33ae
LP
2301static int setup_smack(
2302 const ExecContext *context,
2303 const ExecCommand *command) {
2304
cefc33ae
LP
2305 int r;
2306
2307 assert(context);
2308 assert(command);
2309
cefc33ae
LP
2310 if (context->smack_process_label) {
2311 r = mac_smack_apply_pid(0, context->smack_process_label);
2312 if (r < 0)
2313 return r;
2314 }
2315#ifdef SMACK_DEFAULT_PROCESS_LABEL
2316 else {
2317 _cleanup_free_ char *exec_label = NULL;
2318
2319 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2320 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2321 return r;
2322
2323 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2324 if (r < 0)
2325 return r;
2326 }
cefc33ae
LP
2327#endif
2328
2329 return 0;
2330}
92b423b9 2331#endif
cefc33ae 2332
6c47cd7d
LP
2333static int compile_bind_mounts(
2334 const ExecContext *context,
2335 const ExecParameters *params,
2336 BindMount **ret_bind_mounts,
da6053d0 2337 size_t *ret_n_bind_mounts,
6c47cd7d
LP
2338 char ***ret_empty_directories) {
2339
2340 _cleanup_strv_free_ char **empty_directories = NULL;
2341 BindMount *bind_mounts;
da6053d0 2342 size_t n, h = 0, i;
6c47cd7d
LP
2343 ExecDirectoryType t;
2344 int r;
2345
2346 assert(context);
2347 assert(params);
2348 assert(ret_bind_mounts);
2349 assert(ret_n_bind_mounts);
2350 assert(ret_empty_directories);
2351
2352 n = context->n_bind_mounts;
2353 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2354 if (!params->prefix[t])
2355 continue;
2356
2357 n += strv_length(context->directories[t].paths);
2358 }
2359
2360 if (n <= 0) {
2361 *ret_bind_mounts = NULL;
2362 *ret_n_bind_mounts = 0;
2363 *ret_empty_directories = NULL;
2364 return 0;
2365 }
2366
2367 bind_mounts = new(BindMount, n);
2368 if (!bind_mounts)
2369 return -ENOMEM;
2370
a8cabc61 2371 for (i = 0; i < context->n_bind_mounts; i++) {
6c47cd7d
LP
2372 BindMount *item = context->bind_mounts + i;
2373 char *s, *d;
2374
2375 s = strdup(item->source);
2376 if (!s) {
2377 r = -ENOMEM;
2378 goto finish;
2379 }
2380
2381 d = strdup(item->destination);
2382 if (!d) {
2383 free(s);
2384 r = -ENOMEM;
2385 goto finish;
2386 }
2387
2388 bind_mounts[h++] = (BindMount) {
2389 .source = s,
2390 .destination = d,
2391 .read_only = item->read_only,
2392 .recursive = item->recursive,
2393 .ignore_enoent = item->ignore_enoent,
2394 };
2395 }
2396
2397 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2398 char **suffix;
2399
2400 if (!params->prefix[t])
2401 continue;
2402
2403 if (strv_isempty(context->directories[t].paths))
2404 continue;
2405
494d0247 2406 if (exec_directory_is_private(context, t) &&
5609f688 2407 !(context->root_directory || context->root_image)) {
6c47cd7d
LP
2408 char *private_root;
2409
2410 /* So this is for a dynamic user, and we need to make sure the process can access its own
2411 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2412 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2413
657ee2d8 2414 private_root = path_join(params->prefix[t], "private");
6c47cd7d
LP
2415 if (!private_root) {
2416 r = -ENOMEM;
2417 goto finish;
2418 }
2419
2420 r = strv_consume(&empty_directories, private_root);
a635a7ae 2421 if (r < 0)
6c47cd7d 2422 goto finish;
6c47cd7d
LP
2423 }
2424
2425 STRV_FOREACH(suffix, context->directories[t].paths) {
2426 char *s, *d;
2427
494d0247 2428 if (exec_directory_is_private(context, t))
657ee2d8 2429 s = path_join(params->prefix[t], "private", *suffix);
6c47cd7d 2430 else
657ee2d8 2431 s = path_join(params->prefix[t], *suffix);
6c47cd7d
LP
2432 if (!s) {
2433 r = -ENOMEM;
2434 goto finish;
2435 }
2436
494d0247 2437 if (exec_directory_is_private(context, t) &&
5609f688
YW
2438 (context->root_directory || context->root_image))
2439 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2440 * directory is not created on the root directory. So, let's bind-mount the directory
2441 * on the 'non-private' place. */
657ee2d8 2442 d = path_join(params->prefix[t], *suffix);
5609f688
YW
2443 else
2444 d = strdup(s);
6c47cd7d
LP
2445 if (!d) {
2446 free(s);
2447 r = -ENOMEM;
2448 goto finish;
2449 }
2450
2451 bind_mounts[h++] = (BindMount) {
2452 .source = s,
2453 .destination = d,
2454 .read_only = false,
9ce4e4b0 2455 .nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
6c47cd7d
LP
2456 .recursive = true,
2457 .ignore_enoent = false,
2458 };
2459 }
2460 }
2461
2462 assert(h == n);
2463
2464 *ret_bind_mounts = bind_mounts;
2465 *ret_n_bind_mounts = n;
ae2a15bc 2466 *ret_empty_directories = TAKE_PTR(empty_directories);
6c47cd7d
LP
2467
2468 return (int) n;
2469
2470finish:
2471 bind_mount_free_many(bind_mounts, h);
2472 return r;
2473}
2474
4e677599
LP
2475static bool insist_on_sandboxing(
2476 const ExecContext *context,
2477 const char *root_dir,
2478 const char *root_image,
2479 const BindMount *bind_mounts,
2480 size_t n_bind_mounts) {
2481
2482 size_t i;
2483
2484 assert(context);
2485 assert(n_bind_mounts == 0 || bind_mounts);
2486
2487 /* Checks whether we need to insist on fs namespacing. i.e. whether we have settings configured that
2488 * would alter the view on the file system beyond making things read-only or invisble, i.e. would
2489 * rearrange stuff in a way we cannot ignore gracefully. */
2490
2491 if (context->n_temporary_filesystems > 0)
2492 return true;
2493
2494 if (root_dir || root_image)
2495 return true;
2496
2497 if (context->dynamic_user)
2498 return true;
2499
2500 /* If there are any bind mounts set that don't map back onto themselves, fs namespacing becomes
2501 * essential. */
2502 for (i = 0; i < n_bind_mounts; i++)
2503 if (!path_equal(bind_mounts[i].source, bind_mounts[i].destination))
2504 return true;
2505
2506 return false;
2507}
2508
6818c54c 2509static int apply_mount_namespace(
34cf6c43
YW
2510 const Unit *u,
2511 const ExecCommand *command,
6818c54c
LP
2512 const ExecContext *context,
2513 const ExecParameters *params,
7cc5ef5f
ZJS
2514 const ExecRuntime *runtime,
2515 char **error_path) {
6818c54c 2516
7bcef4ef 2517 _cleanup_strv_free_ char **empty_directories = NULL;
93c6bb51 2518 char *tmp = NULL, *var = NULL;
915e6d16 2519 const char *root_dir = NULL, *root_image = NULL;
228af36f 2520 NamespaceInfo ns_info;
165a31c0 2521 bool needs_sandboxing;
6c47cd7d 2522 BindMount *bind_mounts = NULL;
da6053d0 2523 size_t n_bind_mounts = 0;
6818c54c 2524 int r;
93c6bb51 2525
2b3c1b9e
DH
2526 assert(context);
2527
93c6bb51
DH
2528 /* The runtime struct only contains the parent of the private /tmp,
2529 * which is non-accessible to world users. Inside of it there's a /tmp
2530 * that is sticky, and that's the one we want to use here. */
2531
2532 if (context->private_tmp && runtime) {
2533 if (runtime->tmp_dir)
2534 tmp = strjoina(runtime->tmp_dir, "/tmp");
2535 if (runtime->var_tmp_dir)
2536 var = strjoina(runtime->var_tmp_dir, "/tmp");
2537 }
2538
915e6d16
LP
2539 if (params->flags & EXEC_APPLY_CHROOT) {
2540 root_image = context->root_image;
2541
2542 if (!root_image)
2543 root_dir = context->root_directory;
2544 }
93c6bb51 2545
6c47cd7d
LP
2546 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2547 if (r < 0)
2548 return r;
2549
165a31c0 2550 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
b5a33299
YW
2551 if (needs_sandboxing)
2552 ns_info = (NamespaceInfo) {
2553 .ignore_protect_paths = false,
2554 .private_dev = context->private_devices,
2555 .protect_control_groups = context->protect_control_groups,
2556 .protect_kernel_tunables = context->protect_kernel_tunables,
2557 .protect_kernel_modules = context->protect_kernel_modules,
94a7b275 2558 .protect_kernel_logs = context->protect_kernel_logs,
aecd5ac6 2559 .protect_hostname = context->protect_hostname,
b5a33299 2560 .mount_apivfs = context->mount_apivfs,
228af36f 2561 .private_mounts = context->private_mounts,
b5a33299 2562 };
228af36f
LP
2563 else if (!context->dynamic_user && root_dir)
2564 /*
2565 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2566 * sandbox info, otherwise enforce it, don't ignore protected paths and
2567 * fail if we are enable to apply the sandbox inside the mount namespace.
2568 */
2569 ns_info = (NamespaceInfo) {
2570 .ignore_protect_paths = true,
2571 };
2572 else
2573 ns_info = (NamespaceInfo) {};
b5a33299 2574
37ed15d7
FB
2575 if (context->mount_flags == MS_SHARED)
2576 log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
2577
915e6d16 2578 r = setup_namespace(root_dir, root_image,
7bcef4ef 2579 &ns_info, context->read_write_paths,
165a31c0
LP
2580 needs_sandboxing ? context->read_only_paths : NULL,
2581 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2582 empty_directories,
2583 bind_mounts,
2584 n_bind_mounts,
2abd4e38
YW
2585 context->temporary_filesystems,
2586 context->n_temporary_filesystems,
93c6bb51
DH
2587 tmp,
2588 var,
165a31c0
LP
2589 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2590 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16 2591 context->mount_flags,
d4dffb85 2592 DISSECT_IMAGE_DISCARD_ON_LOOP|DISSECT_IMAGE_RELAX_VAR_CHECK,
7cc5ef5f 2593 error_path);
93c6bb51 2594
1beab8b0 2595 /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
5238e957 2596 * that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively
1beab8b0
LP
2597 * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
2598 * completely different execution environment. */
aca835ed 2599 if (r == -ENOANO) {
4e677599
LP
2600 if (insist_on_sandboxing(
2601 context,
2602 root_dir, root_image,
2603 bind_mounts,
2604 n_bind_mounts)) {
2605 log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
2606 "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
2607 n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
2608
2609 r = -EOPNOTSUPP;
2610 } else {
aca835ed 2611 log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
4e677599 2612 r = 0;
aca835ed 2613 }
93c6bb51
DH
2614 }
2615
4e677599 2616 bind_mount_free_many(bind_mounts, n_bind_mounts);
93c6bb51
DH
2617 return r;
2618}
2619
915e6d16
LP
2620static int apply_working_directory(
2621 const ExecContext *context,
2622 const ExecParameters *params,
2623 const char *home,
376fecf6 2624 int *exit_status) {
915e6d16 2625
6732edab 2626 const char *d, *wd;
2b3c1b9e
DH
2627
2628 assert(context);
376fecf6 2629 assert(exit_status);
2b3c1b9e 2630
6732edab
LP
2631 if (context->working_directory_home) {
2632
376fecf6
LP
2633 if (!home) {
2634 *exit_status = EXIT_CHDIR;
6732edab 2635 return -ENXIO;
376fecf6 2636 }
6732edab 2637
2b3c1b9e 2638 wd = home;
6732edab
LP
2639
2640 } else if (context->working_directory)
2b3c1b9e
DH
2641 wd = context->working_directory;
2642 else
2643 wd = "/";
e7f1e7c6 2644
fa97f630 2645 if (params->flags & EXEC_APPLY_CHROOT)
2b3c1b9e 2646 d = wd;
fa97f630 2647 else
3b0e5bb5 2648 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2649
376fecf6
LP
2650 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2651 *exit_status = EXIT_CHDIR;
2b3c1b9e 2652 return -errno;
376fecf6 2653 }
e7f1e7c6
DH
2654
2655 return 0;
2656}
2657
fa97f630
JB
2658static int apply_root_directory(
2659 const ExecContext *context,
2660 const ExecParameters *params,
2661 const bool needs_mount_ns,
2662 int *exit_status) {
2663
2664 assert(context);
2665 assert(exit_status);
2666
2667 if (params->flags & EXEC_APPLY_CHROOT) {
2668 if (!needs_mount_ns && context->root_directory)
2669 if (chroot(context->root_directory) < 0) {
2670 *exit_status = EXIT_CHROOT;
2671 return -errno;
2672 }
2673 }
2674
2675 return 0;
2676}
2677
b1edf445 2678static int setup_keyring(
34cf6c43 2679 const Unit *u,
b1edf445
LP
2680 const ExecContext *context,
2681 const ExecParameters *p,
2682 uid_t uid, gid_t gid) {
2683
74dd6b51 2684 key_serial_t keyring;
e64c2d0b
DJL
2685 int r = 0;
2686 uid_t saved_uid;
2687 gid_t saved_gid;
74dd6b51
LP
2688
2689 assert(u);
b1edf445 2690 assert(context);
74dd6b51
LP
2691 assert(p);
2692
2693 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2694 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2695 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2696 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2697 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2698 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2699
b1edf445
LP
2700 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2701 return 0;
2702
e64c2d0b
DJL
2703 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2704 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2705 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2706 * & group is just as nasty as acquiring a reference to the user keyring. */
2707
2708 saved_uid = getuid();
2709 saved_gid = getgid();
2710
2711 if (gid_is_valid(gid) && gid != saved_gid) {
2712 if (setregid(gid, -1) < 0)
2713 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2714 }
2715
2716 if (uid_is_valid(uid) && uid != saved_uid) {
2717 if (setreuid(uid, -1) < 0) {
2718 r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2719 goto out;
2720 }
2721 }
2722
74dd6b51
LP
2723 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2724 if (keyring == -1) {
2725 if (errno == ENOSYS)
8002fb97 2726 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2727 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2728 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2729 else if (errno == EDQUOT)
8002fb97 2730 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2731 else
e64c2d0b 2732 r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51 2733
e64c2d0b 2734 goto out;
74dd6b51
LP
2735 }
2736
e64c2d0b
DJL
2737 /* When requested link the user keyring into the session keyring. */
2738 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2739
2740 if (keyctl(KEYCTL_LINK,
2741 KEY_SPEC_USER_KEYRING,
2742 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2743 r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
2744 goto out;
2745 }
2746 }
2747
2748 /* Restore uid/gid back */
2749 if (uid_is_valid(uid) && uid != saved_uid) {
2750 if (setreuid(saved_uid, -1) < 0) {
2751 r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2752 goto out;
2753 }
2754 }
2755
2756 if (gid_is_valid(gid) && gid != saved_gid) {
2757 if (setregid(saved_gid, -1) < 0)
2758 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2759 }
2760
2761 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
b3415f5d
LP
2762 if (!sd_id128_is_null(u->invocation_id)) {
2763 key_serial_t key;
2764
2765 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2766 if (key == -1)
8002fb97 2767 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2768 else {
2769 if (keyctl(KEYCTL_SETPERM, key,
2770 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2771 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
e64c2d0b 2772 r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2773 }
2774 }
2775
e64c2d0b
DJL
2776out:
2777 /* Revert back uid & gid for the the last time, and exit */
2778 /* no extra logging, as only the first already reported error matters */
2779 if (getuid() != saved_uid)
2780 (void) setreuid(saved_uid, -1);
b1edf445 2781
e64c2d0b
DJL
2782 if (getgid() != saved_gid)
2783 (void) setregid(saved_gid, -1);
b1edf445 2784
e64c2d0b 2785 return r;
74dd6b51
LP
2786}
2787
3042bbeb 2788static void append_socket_pair(int *array, size_t *n, const int pair[static 2]) {
29206d46
LP
2789 assert(array);
2790 assert(n);
2caa38e9 2791 assert(pair);
29206d46
LP
2792
2793 if (pair[0] >= 0)
2794 array[(*n)++] = pair[0];
2795 if (pair[1] >= 0)
2796 array[(*n)++] = pair[1];
2797}
2798
a34ceba6
LP
2799static int close_remaining_fds(
2800 const ExecParameters *params,
34cf6c43
YW
2801 const ExecRuntime *runtime,
2802 const DynamicCreds *dcreds,
00d9ef85 2803 int user_lookup_fd,
a34ceba6 2804 int socket_fd,
5686391b 2805 int exec_fd,
5b8d1f6b 2806 const int *fds, size_t n_fds) {
a34ceba6 2807
da6053d0 2808 size_t n_dont_close = 0;
00d9ef85 2809 int dont_close[n_fds + 12];
a34ceba6
LP
2810
2811 assert(params);
2812
2813 if (params->stdin_fd >= 0)
2814 dont_close[n_dont_close++] = params->stdin_fd;
2815 if (params->stdout_fd >= 0)
2816 dont_close[n_dont_close++] = params->stdout_fd;
2817 if (params->stderr_fd >= 0)
2818 dont_close[n_dont_close++] = params->stderr_fd;
2819
2820 if (socket_fd >= 0)
2821 dont_close[n_dont_close++] = socket_fd;
5686391b
LP
2822 if (exec_fd >= 0)
2823 dont_close[n_dont_close++] = exec_fd;
a34ceba6
LP
2824 if (n_fds > 0) {
2825 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2826 n_dont_close += n_fds;
2827 }
2828
29206d46
LP
2829 if (runtime)
2830 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2831
2832 if (dcreds) {
2833 if (dcreds->user)
2834 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2835 if (dcreds->group)
2836 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2837 }
2838
00d9ef85
LP
2839 if (user_lookup_fd >= 0)
2840 dont_close[n_dont_close++] = user_lookup_fd;
2841
a34ceba6
LP
2842 return close_all_fds(dont_close, n_dont_close);
2843}
2844
00d9ef85
LP
2845static int send_user_lookup(
2846 Unit *unit,
2847 int user_lookup_fd,
2848 uid_t uid,
2849 gid_t gid) {
2850
2851 assert(unit);
2852
2853 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2854 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2855 * specified. */
2856
2857 if (user_lookup_fd < 0)
2858 return 0;
2859
2860 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2861 return 0;
2862
2863 if (writev(user_lookup_fd,
2864 (struct iovec[]) {
e6a7ec4b
LP
2865 IOVEC_INIT(&uid, sizeof(uid)),
2866 IOVEC_INIT(&gid, sizeof(gid)),
2867 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2868 return -errno;
2869
2870 return 0;
2871}
2872
6732edab
LP
2873static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2874 int r;
2875
2876 assert(c);
2877 assert(home);
2878 assert(buf);
2879
2880 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2881
2882 if (*home)
2883 return 0;
2884
2885 if (!c->working_directory_home)
2886 return 0;
2887
6732edab
LP
2888 r = get_home_dir(buf);
2889 if (r < 0)
2890 return r;
2891
2892 *home = *buf;
2893 return 1;
2894}
2895
da50b85a
LP
2896static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2897 _cleanup_strv_free_ char ** list = NULL;
2898 ExecDirectoryType t;
2899 int r;
2900
2901 assert(c);
2902 assert(p);
2903 assert(ret);
2904
2905 assert(c->dynamic_user);
2906
2907 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2908 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2909 * directories. */
2910
2911 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2912 char **i;
2913
2914 if (t == EXEC_DIRECTORY_CONFIGURATION)
2915 continue;
2916
2917 if (!p->prefix[t])
2918 continue;
2919
2920 STRV_FOREACH(i, c->directories[t].paths) {
2921 char *e;
2922
494d0247 2923 if (exec_directory_is_private(c, t))
657ee2d8 2924 e = path_join(p->prefix[t], "private", *i);
494d0247
YW
2925 else
2926 e = path_join(p->prefix[t], *i);
da50b85a
LP
2927 if (!e)
2928 return -ENOMEM;
2929
2930 r = strv_consume(&list, e);
2931 if (r < 0)
2932 return r;
2933 }
2934 }
2935
ae2a15bc 2936 *ret = TAKE_PTR(list);
da50b85a
LP
2937
2938 return 0;
2939}
2940
34cf6c43
YW
2941static char *exec_command_line(char **argv);
2942
78f93209
LP
2943static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
2944 bool using_subcgroup;
2945 char *p;
2946
2947 assert(params);
2948 assert(ret);
2949
2950 if (!params->cgroup_path)
2951 return -EINVAL;
2952
2953 /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
2954 * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
2955 * processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
2956 * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
2957 * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
2958 * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
2959 * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
2960 * flag, which is only passed for the former statements, not for the latter. */
2961
2962 using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
2963 if (using_subcgroup)
657ee2d8 2964 p = path_join(params->cgroup_path, ".control");
78f93209
LP
2965 else
2966 p = strdup(params->cgroup_path);
2967 if (!p)
2968 return -ENOMEM;
2969
2970 *ret = p;
2971 return using_subcgroup;
2972}
2973
ff0af2a1 2974static int exec_child(
f2341e0a 2975 Unit *unit,
34cf6c43 2976 const ExecCommand *command,
ff0af2a1
LP
2977 const ExecContext *context,
2978 const ExecParameters *params,
2979 ExecRuntime *runtime,
29206d46 2980 DynamicCreds *dcreds,
ff0af2a1 2981 int socket_fd,
2caa38e9 2982 const int named_iofds[static 3],
4c47affc 2983 int *fds,
da6053d0 2984 size_t n_socket_fds,
25b583d7 2985 size_t n_storage_fds,
ff0af2a1 2986 char **files_env,
00d9ef85 2987 int user_lookup_fd,
12145637 2988 int *exit_status) {
d35fbf6b 2989
7ca69792 2990 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **replaced_argv = NULL;
5686391b 2991 int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
4d885bd3
DH
2992 _cleanup_free_ gid_t *supplementary_gids = NULL;
2993 const char *username = NULL, *groupname = NULL;
5686391b 2994 _cleanup_free_ char *home_buffer = NULL;
2b3c1b9e 2995 const char *home = NULL, *shell = NULL;
7ca69792 2996 char **final_argv = NULL;
7bce046b
LP
2997 dev_t journal_stream_dev = 0;
2998 ino_t journal_stream_ino = 0;
5749f855 2999 bool userns_set_up = false;
165a31c0
LP
3000 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
3001 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
3002 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
3003 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 3004#if HAVE_SELINUX
7f59dd35 3005 _cleanup_free_ char *mac_selinux_context_net = NULL;
43b1f709 3006 bool use_selinux = false;
ecfbc84f 3007#endif
f9fa32f0 3008#if ENABLE_SMACK
43b1f709 3009 bool use_smack = false;
ecfbc84f 3010#endif
349cc4a5 3011#if HAVE_APPARMOR
43b1f709 3012 bool use_apparmor = false;
ecfbc84f 3013#endif
5749f855
AZ
3014 uid_t saved_uid = getuid();
3015 gid_t saved_gid = getgid();
fed1e721
LP
3016 uid_t uid = UID_INVALID;
3017 gid_t gid = GID_INVALID;
da6053d0 3018 size_t n_fds;
3536f49e 3019 ExecDirectoryType dt;
165a31c0 3020 int secure_bits;
afb11bf1
DG
3021 _cleanup_free_ gid_t *gids_after_pam = NULL;
3022 int ngids_after_pam = 0;
034c6ed7 3023
f2341e0a 3024 assert(unit);
5cb5a6ff
LP
3025 assert(command);
3026 assert(context);
d35fbf6b 3027 assert(params);
ff0af2a1 3028 assert(exit_status);
d35fbf6b
DM
3029
3030 rename_process_from_path(command->path);
3031
3032 /* We reset exactly these signals, since they are the
3033 * only ones we set to SIG_IGN in the main daemon. All
3034 * others we leave untouched because we set them to
3035 * SIG_DFL or a valid handler initially, both of which
3036 * will be demoted to SIG_DFL. */
ce30c8dc
LP
3037 (void) default_signals(SIGNALS_CRASH_HANDLER,
3038 SIGNALS_IGNORE, -1);
d35fbf6b
DM
3039
3040 if (context->ignore_sigpipe)
ce30c8dc 3041 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 3042
ff0af2a1
LP
3043 r = reset_signal_mask();
3044 if (r < 0) {
3045 *exit_status = EXIT_SIGNAL_MASK;
12145637 3046 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 3047 }
034c6ed7 3048
d35fbf6b
DM
3049 if (params->idle_pipe)
3050 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 3051
2c027c62
LP
3052 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
3053 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
3054 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
3055 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 3056
d35fbf6b 3057 log_forget_fds();
2c027c62 3058 log_set_open_when_needed(true);
4f2d528d 3059
40a80078
LP
3060 /* In case anything used libc syslog(), close this here, too */
3061 closelog();
3062
5686391b
LP
3063 n_fds = n_socket_fds + n_storage_fds;
3064 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
ff0af2a1
LP
3065 if (r < 0) {
3066 *exit_status = EXIT_FDS;
12145637 3067 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
3068 }
3069
d35fbf6b
DM
3070 if (!context->same_pgrp)
3071 if (setsid() < 0) {
ff0af2a1 3072 *exit_status = EXIT_SETSID;
12145637 3073 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 3074 }
9e2f7c11 3075
1e22b5cd 3076 exec_context_tty_reset(context, params);
d35fbf6b 3077
c891efaf 3078 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 3079 const char *vc = params->confirm_spawn;
3b20f877
FB
3080 _cleanup_free_ char *cmdline = NULL;
3081
ee39ca20 3082 cmdline = exec_command_line(command->argv);
3b20f877 3083 if (!cmdline) {
0460aa5c 3084 *exit_status = EXIT_MEMORY;
12145637 3085 return log_oom();
3b20f877 3086 }
d35fbf6b 3087
eedf223a 3088 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
3089 if (r != CONFIRM_EXECUTE) {
3090 if (r == CONFIRM_PRETEND_SUCCESS) {
3091 *exit_status = EXIT_SUCCESS;
3092 return 0;
3093 }
ff0af2a1 3094 *exit_status = EXIT_CONFIRM;
12145637 3095 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 3096 return -ECANCELED;
d35fbf6b
DM
3097 }
3098 }
1a63a750 3099
d521916d
LP
3100 /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
3101 * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
3102 * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
3103 * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
3104 * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
3105 if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
3106 setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
3107 *exit_status = EXIT_MEMORY;
3108 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
3109 }
3110
29206d46 3111 if (context->dynamic_user && dcreds) {
da50b85a 3112 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 3113
d521916d
LP
3114 /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
3115 * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
409093fe
LP
3116 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
3117 *exit_status = EXIT_USER;
12145637 3118 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
3119 }
3120
da50b85a
LP
3121 r = compile_suggested_paths(context, params, &suggested_paths);
3122 if (r < 0) {
3123 *exit_status = EXIT_MEMORY;
3124 return log_oom();
3125 }
3126
3127 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
3128 if (r < 0) {
3129 *exit_status = EXIT_USER;
e2b0cc34
YW
3130 if (r == -EILSEQ) {
3131 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
3132 return -EOPNOTSUPP;
3133 }
12145637 3134 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 3135 }
524daa8c 3136
70dd455c 3137 if (!uid_is_valid(uid)) {
29206d46 3138 *exit_status = EXIT_USER;
12145637 3139 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
3140 return -ESRCH;
3141 }
3142
3143 if (!gid_is_valid(gid)) {
3144 *exit_status = EXIT_USER;
12145637 3145 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
3146 return -ESRCH;
3147 }
5bc7452b 3148
29206d46
LP
3149 if (dcreds->user)
3150 username = dcreds->user->name;
3151
3152 } else {
4d885bd3
DH
3153 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
3154 if (r < 0) {
3155 *exit_status = EXIT_USER;
12145637 3156 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 3157 }
5bc7452b 3158
4d885bd3
DH
3159 r = get_fixed_group(context, &groupname, &gid);
3160 if (r < 0) {
3161 *exit_status = EXIT_GROUP;
12145637 3162 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 3163 }
cdc5d5c5 3164 }
29206d46 3165
cdc5d5c5
DH
3166 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
3167 r = get_supplementary_groups(context, username, groupname, gid,
3168 &supplementary_gids, &ngids);
3169 if (r < 0) {
3170 *exit_status = EXIT_GROUP;
12145637 3171 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 3172 }
5bc7452b 3173
00d9ef85
LP
3174 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
3175 if (r < 0) {
3176 *exit_status = EXIT_USER;
12145637 3177 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
3178 }
3179
3180 user_lookup_fd = safe_close(user_lookup_fd);
3181
6732edab
LP
3182 r = acquire_home(context, uid, &home, &home_buffer);
3183 if (r < 0) {
3184 *exit_status = EXIT_CHDIR;
12145637 3185 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
3186 }
3187
d35fbf6b
DM
3188 /* If a socket is connected to STDIN/STDOUT/STDERR, we
3189 * must sure to drop O_NONBLOCK */
3190 if (socket_fd >= 0)
a34ceba6 3191 (void) fd_nonblock(socket_fd, false);
acbb0225 3192
4c70a4a7
MS
3193 /* Journald will try to look-up our cgroup in order to populate _SYSTEMD_CGROUP and _SYSTEMD_UNIT fields.
3194 * Hence we need to migrate to the target cgroup from init.scope before connecting to journald */
3195 if (params->cgroup_path) {
3196 _cleanup_free_ char *p = NULL;
3197
3198 r = exec_parameters_get_cgroup_path(params, &p);
3199 if (r < 0) {
3200 *exit_status = EXIT_CGROUP;
3201 return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
3202 }
3203
3204 r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
3205 if (r < 0) {
3206 *exit_status = EXIT_CGROUP;
3207 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
3208 }
3209 }
3210
a8d08f39
LP
3211 if (context->network_namespace_path && runtime && runtime->netns_storage_socket[0] >= 0) {
3212 r = open_netns_path(runtime->netns_storage_socket, context->network_namespace_path);
3213 if (r < 0) {
3214 *exit_status = EXIT_NETWORK;
3215 return log_unit_error_errno(unit, r, "Failed to open network namespace path %s: %m", context->network_namespace_path);
3216 }
3217 }
3218
52c239d7 3219 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
3220 if (r < 0) {
3221 *exit_status = EXIT_STDIN;
12145637 3222 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 3223 }
034c6ed7 3224
52c239d7 3225 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3226 if (r < 0) {
3227 *exit_status = EXIT_STDOUT;
12145637 3228 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
3229 }
3230
52c239d7 3231 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3232 if (r < 0) {
3233 *exit_status = EXIT_STDERR;
12145637 3234 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
3235 }
3236
d35fbf6b 3237 if (context->oom_score_adjust_set) {
9f8168eb
LP
3238 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
3239 * prohibit write access to this file, and we shouldn't trip up over that. */
3240 r = set_oom_score_adjust(context->oom_score_adjust);
12145637 3241 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 3242 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 3243 else if (r < 0) {
ff0af2a1 3244 *exit_status = EXIT_OOM_ADJUST;
12145637 3245 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 3246 }
d35fbf6b
DM
3247 }
3248
39090201
DJL
3249 if (context->nice_set) {
3250 r = setpriority_closest(context->nice);
3251 if (r < 0)
3252 return log_unit_error_errno(unit, r, "Failed to set up process scheduling priority (nice level): %m");
3253 }
613b411c 3254
d35fbf6b
DM
3255 if (context->cpu_sched_set) {
3256 struct sched_param param = {
3257 .sched_priority = context->cpu_sched_priority,
3258 };
3259
ff0af2a1
LP
3260 r = sched_setscheduler(0,
3261 context->cpu_sched_policy |
3262 (context->cpu_sched_reset_on_fork ?
3263 SCHED_RESET_ON_FORK : 0),
3264 &param);
3265 if (r < 0) {
3266 *exit_status = EXIT_SETSCHEDULER;
12145637 3267 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 3268 }
d35fbf6b 3269 }
fc9b2a84 3270
0985c7c4
ZJS
3271 if (context->cpu_set.set)
3272 if (sched_setaffinity(0, context->cpu_set.allocated, context->cpu_set.set) < 0) {
ff0af2a1 3273 *exit_status = EXIT_CPUAFFINITY;
12145637 3274 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7
LP
3275 }
3276
b070c7c0
MS
3277 if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
3278 r = apply_numa_policy(&context->numa_policy);
3279 if (r == -EOPNOTSUPP)
33fe9e3f 3280 log_unit_debug_errno(unit, r, "NUMA support not available, ignoring.");
b070c7c0
MS
3281 else if (r < 0) {
3282 *exit_status = EXIT_NUMA_POLICY;
3283 return log_unit_error_errno(unit, r, "Failed to set NUMA memory policy: %m");
3284 }
3285 }
3286
d35fbf6b
DM
3287 if (context->ioprio_set)
3288 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 3289 *exit_status = EXIT_IOPRIO;
12145637 3290 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 3291 }
da726a4d 3292
d35fbf6b
DM
3293 if (context->timer_slack_nsec != NSEC_INFINITY)
3294 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 3295 *exit_status = EXIT_TIMERSLACK;
12145637 3296 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 3297 }
9eba9da4 3298
21022b9d
LP
3299 if (context->personality != PERSONALITY_INVALID) {
3300 r = safe_personality(context->personality);
3301 if (r < 0) {
ff0af2a1 3302 *exit_status = EXIT_PERSONALITY;
12145637 3303 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 3304 }
21022b9d 3305 }
94f04347 3306
d35fbf6b 3307 if (context->utmp_id)
df0ff127 3308 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 3309 context->tty_path,
023a4f67
LP
3310 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3311 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3312 USER_PROCESS,
6a93917d 3313 username);
d35fbf6b 3314
08f67696 3315 if (uid_is_valid(uid)) {
ff0af2a1
LP
3316 r = chown_terminal(STDIN_FILENO, uid);
3317 if (r < 0) {
3318 *exit_status = EXIT_STDIN;
12145637 3319 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 3320 }
d35fbf6b 3321 }
8e274523 3322
4e1dfa45 3323 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
62b9bb26 3324 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
4e1dfa45 3325 * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
62b9bb26 3326 * touch a single hierarchy too. */
584b8688 3327 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
62b9bb26 3328 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
ff0af2a1
LP
3329 if (r < 0) {
3330 *exit_status = EXIT_CGROUP;
12145637 3331 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 3332 }
d35fbf6b 3333 }
034c6ed7 3334
72fd1768 3335 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 3336 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
3337 if (r < 0)
3338 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 3339 }
94f04347 3340
7bce046b 3341 r = build_environment(
fd63e712 3342 unit,
7bce046b
LP
3343 context,
3344 params,
3345 n_fds,
3346 home,
3347 username,
3348 shell,
3349 journal_stream_dev,
3350 journal_stream_ino,
3351 &our_env);
2065ca69
JW
3352 if (r < 0) {
3353 *exit_status = EXIT_MEMORY;
12145637 3354 return log_oom();
2065ca69
JW
3355 }
3356
3357 r = build_pass_environment(context, &pass_env);
3358 if (r < 0) {
3359 *exit_status = EXIT_MEMORY;
12145637 3360 return log_oom();
2065ca69
JW
3361 }
3362
3363 accum_env = strv_env_merge(5,
3364 params->environment,
3365 our_env,
3366 pass_env,
3367 context->environment,
3368 files_env,
3369 NULL);
3370 if (!accum_env) {
3371 *exit_status = EXIT_MEMORY;
12145637 3372 return log_oom();
2065ca69 3373 }
1280503b 3374 accum_env = strv_env_clean(accum_env);
2065ca69 3375
096424d1 3376 (void) umask(context->umask);
b213e1c1 3377
b1edf445 3378 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
3379 if (r < 0) {
3380 *exit_status = EXIT_KEYRING;
12145637 3381 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
3382 }
3383
165a31c0 3384 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 3385 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 3386
165a31c0
LP
3387 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3388 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 3389
165a31c0
LP
3390 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3391 if (needs_ambient_hack)
3392 needs_setuid = false;
3393 else
3394 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3395
3396 if (needs_sandboxing) {
7f18ef0a
FK
3397 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3398 * present. The actual MAC context application will happen later, as late as possible, to avoid
3399 * impacting our own code paths. */
3400
349cc4a5 3401#if HAVE_SELINUX
43b1f709 3402 use_selinux = mac_selinux_use();
7f18ef0a 3403#endif
f9fa32f0 3404#if ENABLE_SMACK
43b1f709 3405 use_smack = mac_smack_use();
7f18ef0a 3406#endif
349cc4a5 3407#if HAVE_APPARMOR
43b1f709 3408 use_apparmor = mac_apparmor_use();
7f18ef0a 3409#endif
165a31c0 3410 }
7f18ef0a 3411
ce932d2d
LP
3412 if (needs_sandboxing) {
3413 int which_failed;
3414
3415 /* Let's set the resource limits before we call into PAM, so that pam_limits wins over what
3416 * is set here. (See below.) */
3417
3418 r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
3419 if (r < 0) {
3420 *exit_status = EXIT_LIMITS;
3421 return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
3422 }
3423 }
3424
165a31c0 3425 if (needs_setuid) {
ce932d2d
LP
3426
3427 /* Let's call into PAM after we set up our own idea of resource limits to that pam_limits
3428 * wins here. (See above.) */
3429
165a31c0
LP
3430 if (context->pam_name && username) {
3431 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3432 if (r < 0) {
3433 *exit_status = EXIT_PAM;
12145637 3434 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0 3435 }
afb11bf1
DG
3436
3437 ngids_after_pam = getgroups_alloc(&gids_after_pam);
3438 if (ngids_after_pam < 0) {
3439 *exit_status = EXIT_MEMORY;
3440 return log_unit_error_errno(unit, ngids_after_pam, "Failed to obtain groups after setting up PAM: %m");
3441 }
165a31c0 3442 }
b213e1c1 3443 }
ac45f971 3444
5749f855
AZ
3445 if (needs_sandboxing) {
3446#if HAVE_SELINUX
3447 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
3448 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3449 if (r < 0) {
3450 *exit_status = EXIT_SELINUX_CONTEXT;
3451 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
3452 }
3453 }
3454#endif
3455
3456 /* If we're unprivileged, set up the user namespace first to enable use of the other namespaces.
3457 * Users with CAP_SYS_ADMIN can set up user namespaces last because they will be able to
3458 * set up the all of the other namespaces (i.e. network, mount, UTS) without a user namespace. */
3459 if (context->private_users && !have_effective_cap(CAP_SYS_ADMIN)) {
3460 userns_set_up = true;
3461 r = setup_private_users(saved_uid, saved_gid, uid, gid);
3462 if (r < 0) {
3463 *exit_status = EXIT_USER;
3464 return log_unit_error_errno(unit, r, "Failed to set up user namespacing for unprivileged user: %m");
3465 }
3466 }
3467 }
3468
a8d08f39
LP
3469 if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
3470
6e2d7c4f
MS
3471 if (ns_type_supported(NAMESPACE_NET)) {
3472 r = setup_netns(runtime->netns_storage_socket);
3473 if (r < 0) {
3474 *exit_status = EXIT_NETWORK;
3475 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3476 }
a8d08f39
LP
3477 } else if (context->network_namespace_path) {
3478 *exit_status = EXIT_NETWORK;
3479 return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP), "NetworkNamespacePath= is not supported, refusing.");
6e2d7c4f
MS
3480 } else
3481 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3482 }
169c1bda 3483
ee818b89 3484 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3485 if (needs_mount_namespace) {
7cc5ef5f
ZJS
3486 _cleanup_free_ char *error_path = NULL;
3487
3488 r = apply_mount_namespace(unit, command, context, params, runtime, &error_path);
3fbe8dbe
LP
3489 if (r < 0) {
3490 *exit_status = EXIT_NAMESPACE;
7cc5ef5f
ZJS
3491 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m",
3492 error_path ? ": " : "", strempty(error_path));
3fbe8dbe 3493 }
d35fbf6b 3494 }
81a2b7ce 3495
aecd5ac6
TM
3496 if (context->protect_hostname) {
3497 if (ns_type_supported(NAMESPACE_UTS)) {
3498 if (unshare(CLONE_NEWUTS) < 0) {
6d19b718
LP
3499 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) {
3500 *exit_status = EXIT_NAMESPACE;
3501 return log_unit_error_errno(unit, errno, "Failed to set up UTS namespacing: %m");
3502 }
3503
3504 log_unit_warning(unit, "ProtectHostname=yes is configured, but UTS namespace setup is prohibited (container manager?), ignoring namespace setup.");
aecd5ac6
TM
3505 }
3506 } else
3507 log_unit_warning(unit, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
3508#if HAVE_SECCOMP
3509 r = seccomp_protect_hostname();
3510 if (r < 0) {
3511 *exit_status = EXIT_SECCOMP;
3512 return log_unit_error_errno(unit, r, "Failed to apply hostname restrictions: %m");
3513 }
3514#endif
3515 }
3516
5749f855
AZ
3517 /* Drop groups as early as possible.
3518 * This needs to be done after PrivateDevices=y setup as device nodes should be owned by the host's root.
3519 * For non-root in a userns, devices will be owned by the user/group before the group change, and nobody. */
165a31c0 3520 if (needs_setuid) {
afb11bf1
DG
3521 _cleanup_free_ gid_t *gids_to_enforce = NULL;
3522 int ngids_to_enforce = 0;
3523
3524 ngids_to_enforce = merge_gid_lists(supplementary_gids,
3525 ngids,
3526 gids_after_pam,
3527 ngids_after_pam,
3528 &gids_to_enforce);
3529 if (ngids_to_enforce < 0) {
3530 *exit_status = EXIT_MEMORY;
3531 return log_unit_error_errno(unit,
3532 ngids_to_enforce,
3533 "Failed to merge group lists. Group membership might be incorrect: %m");
3534 }
3535
3536 r = enforce_groups(gid, gids_to_enforce, ngids_to_enforce);
096424d1
LP
3537 if (r < 0) {
3538 *exit_status = EXIT_GROUP;
12145637 3539 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3540 }
165a31c0 3541 }
096424d1 3542
5749f855
AZ
3543 /* If the user namespace was not set up above, try to do it now.
3544 * It's preferred to set up the user namespace later (after all other namespaces) so as not to be
3545 * restricted by rules pertaining to combining user namspaces with other namespaces (e.g. in the
3546 * case of mount namespaces being less privileged when the mount point list is copied from a
3547 * different user namespace). */
9008e1ac 3548
5749f855
AZ
3549 if (needs_sandboxing && context->private_users && !userns_set_up) {
3550 r = setup_private_users(saved_uid, saved_gid, uid, gid);
3551 if (r < 0) {
3552 *exit_status = EXIT_USER;
3553 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
d251207d
LP
3554 }
3555 }
3556
165a31c0 3557 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
5686391b
LP
3558 * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
3559 * however if we have it as we want to keep it open until the final execve(). */
3560
3561 if (params->exec_fd >= 0) {
3562 exec_fd = params->exec_fd;
3563
3564 if (exec_fd < 3 + (int) n_fds) {
3565 int moved_fd;
3566
3567 /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
3568 * process we are about to execute. */
3569
3570 moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
3571 if (moved_fd < 0) {
3572 *exit_status = EXIT_FDS;
3573 return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
3574 }
3575
3576 safe_close(exec_fd);
3577 exec_fd = moved_fd;
3578 } else {
3579 /* This fd should be FD_CLOEXEC already, but let's make sure. */
3580 r = fd_cloexec(exec_fd, true);
3581 if (r < 0) {
3582 *exit_status = EXIT_FDS;
3583 return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
3584 }
3585 }
3586
3587 fds_with_exec_fd = newa(int, n_fds + 1);
7e8d494b 3588 memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
5686391b
LP
3589 fds_with_exec_fd[n_fds] = exec_fd;
3590 n_fds_with_exec_fd = n_fds + 1;
3591 } else {
3592 fds_with_exec_fd = fds;
3593 n_fds_with_exec_fd = n_fds;
3594 }
3595
3596 r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
ff0af2a1
LP
3597 if (r >= 0)
3598 r = shift_fds(fds, n_fds);
3599 if (r >= 0)
25b583d7 3600 r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
ff0af2a1
LP
3601 if (r < 0) {
3602 *exit_status = EXIT_FDS;
12145637 3603 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3604 }
e66cf1a3 3605
5686391b
LP
3606 /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
3607 * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
3608 * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
3609 * came this far. */
3610
165a31c0 3611 secure_bits = context->secure_bits;
e66cf1a3 3612
165a31c0
LP
3613 if (needs_sandboxing) {
3614 uint64_t bset;
e66cf1a3 3615
ce932d2d
LP
3616 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly
3617 * requested. (Note this is placed after the general resource limit initialization, see
3618 * above, in order to take precedence.) */
f4170c67
LP
3619 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3620 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3621 *exit_status = EXIT_LIMITS;
12145637 3622 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3623 }
3624 }
3625
37ac2744
JB
3626#if ENABLE_SMACK
3627 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3628 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3629 if (use_smack) {
3630 r = setup_smack(context, command);
3631 if (r < 0) {
3632 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3633 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3634 }
3635 }
3636#endif
3637
165a31c0
LP
3638 bset = context->capability_bounding_set;
3639 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3640 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3641 * instead of us doing that */
3642 if (needs_ambient_hack)
3643 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3644 (UINT64_C(1) << CAP_SETUID) |
3645 (UINT64_C(1) << CAP_SETGID);
3646
3647 if (!cap_test_all(bset)) {
3648 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3649 if (r < 0) {
3650 *exit_status = EXIT_CAPABILITIES;
12145637 3651 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3652 }
4c2630eb 3653 }
3b8bddde 3654
755d4b67
IP
3655 /* This is done before enforce_user, but ambient set
3656 * does not survive over setresuid() if keep_caps is not set. */
943800f4 3657 if (!needs_ambient_hack) {
755d4b67
IP
3658 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3659 if (r < 0) {
3660 *exit_status = EXIT_CAPABILITIES;
12145637 3661 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3662 }
755d4b67 3663 }
165a31c0 3664 }
755d4b67 3665
fa97f630
JB
3666 /* chroot to root directory first, before we lose the ability to chroot */
3667 r = apply_root_directory(context, params, needs_mount_namespace, exit_status);
3668 if (r < 0)
3669 return log_unit_error_errno(unit, r, "Chrooting to the requested root directory failed: %m");
3670
165a31c0 3671 if (needs_setuid) {
08f67696 3672 if (uid_is_valid(uid)) {
ff0af2a1
LP
3673 r = enforce_user(context, uid);
3674 if (r < 0) {
3675 *exit_status = EXIT_USER;
12145637 3676 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3677 }
165a31c0
LP
3678
3679 if (!needs_ambient_hack &&
3680 context->capability_ambient_set != 0) {
755d4b67
IP
3681
3682 /* Fix the ambient capabilities after user change. */
3683 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3684 if (r < 0) {
3685 *exit_status = EXIT_CAPABILITIES;
12145637 3686 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3687 }
3688
3689 /* If we were asked to change user and ambient capabilities
3690 * were requested, we had to add keep-caps to the securebits
3691 * so that we would maintain the inherited capability set
3692 * through the setresuid(). Make sure that the bit is added
3693 * also to the context secure_bits so that we don't try to
3694 * drop the bit away next. */
3695
7f508f2c 3696 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3697 }
5b6319dc 3698 }
165a31c0 3699 }
d35fbf6b 3700
56ef8db9
JB
3701 /* Apply working directory here, because the working directory might be on NFS and only the user running
3702 * this service might have the correct privilege to change to the working directory */
fa97f630 3703 r = apply_working_directory(context, params, home, exit_status);
56ef8db9
JB
3704 if (r < 0)
3705 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
3706
165a31c0 3707 if (needs_sandboxing) {
37ac2744 3708 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
5cd9cd35
LP
3709 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3710 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3711 * are restricted. */
3712
349cc4a5 3713#if HAVE_SELINUX
43b1f709 3714 if (use_selinux) {
5cd9cd35
LP
3715 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3716
3717 if (exec_context) {
3718 r = setexeccon(exec_context);
3719 if (r < 0) {
3720 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3721 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3722 }
3723 }
3724 }
3725#endif
3726
349cc4a5 3727#if HAVE_APPARMOR
43b1f709 3728 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3729 r = aa_change_onexec(context->apparmor_profile);
3730 if (r < 0 && !context->apparmor_profile_ignore) {
3731 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3732 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3733 }
3734 }
3735#endif
3736
165a31c0
LP
3737 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3738 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3739 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3740 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3741 *exit_status = EXIT_SECUREBITS;
12145637 3742 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3743 }
5b6319dc 3744
59eeb84b 3745 if (context_has_no_new_privileges(context))
d35fbf6b 3746 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3747 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3748 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3749 }
3750
349cc4a5 3751#if HAVE_SECCOMP
469830d1
LP
3752 r = apply_address_families(unit, context);
3753 if (r < 0) {
3754 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3755 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3756 }
04aa0cb9 3757
469830d1
LP
3758 r = apply_memory_deny_write_execute(unit, context);
3759 if (r < 0) {
3760 *exit_status = EXIT_SECCOMP;
12145637 3761 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3762 }
f4170c67 3763
469830d1
LP
3764 r = apply_restrict_realtime(unit, context);
3765 if (r < 0) {
3766 *exit_status = EXIT_SECCOMP;
12145637 3767 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3768 }
3769
f69567cb
LP
3770 r = apply_restrict_suid_sgid(unit, context);
3771 if (r < 0) {
3772 *exit_status = EXIT_SECCOMP;
3773 return log_unit_error_errno(unit, r, "Failed to apply SUID/SGID restrictions: %m");
3774 }
3775
add00535
LP
3776 r = apply_restrict_namespaces(unit, context);
3777 if (r < 0) {
3778 *exit_status = EXIT_SECCOMP;
12145637 3779 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3780 }
3781
469830d1
LP
3782 r = apply_protect_sysctl(unit, context);
3783 if (r < 0) {
3784 *exit_status = EXIT_SECCOMP;
12145637 3785 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3786 }
3787
469830d1
LP
3788 r = apply_protect_kernel_modules(unit, context);
3789 if (r < 0) {
3790 *exit_status = EXIT_SECCOMP;
12145637 3791 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3792 }
3793
84703040
KK
3794 r = apply_protect_kernel_logs(unit, context);
3795 if (r < 0) {
3796 *exit_status = EXIT_SECCOMP;
3797 return log_unit_error_errno(unit, r, "Failed to apply kernel log restrictions: %m");
3798 }
3799
469830d1
LP
3800 r = apply_private_devices(unit, context);
3801 if (r < 0) {
3802 *exit_status = EXIT_SECCOMP;
12145637 3803 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3804 }
3805
3806 r = apply_syscall_archs(unit, context);
3807 if (r < 0) {
3808 *exit_status = EXIT_SECCOMP;
12145637 3809 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3810 }
3811
78e864e5
TM
3812 r = apply_lock_personality(unit, context);
3813 if (r < 0) {
3814 *exit_status = EXIT_SECCOMP;
12145637 3815 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3816 }
3817
5cd9cd35
LP
3818 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3819 * by the filter as little as possible. */
165a31c0 3820 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3821 if (r < 0) {
3822 *exit_status = EXIT_SECCOMP;
12145637 3823 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3824 }
3825#endif
d35fbf6b 3826 }
034c6ed7 3827
00819cc1
LP
3828 if (!strv_isempty(context->unset_environment)) {
3829 char **ee = NULL;
3830
3831 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3832 if (!ee) {
3833 *exit_status = EXIT_MEMORY;
12145637 3834 return log_oom();
00819cc1
LP
3835 }
3836
130d3d22 3837 strv_free_and_replace(accum_env, ee);
00819cc1
LP
3838 }
3839
7ca69792
AZ
3840 if (!FLAGS_SET(command->flags, EXEC_COMMAND_NO_ENV_EXPAND)) {
3841 replaced_argv = replace_env_argv(command->argv, accum_env);
3842 if (!replaced_argv) {
3843 *exit_status = EXIT_MEMORY;
3844 return log_oom();
3845 }
3846 final_argv = replaced_argv;
3847 } else
3848 final_argv = command->argv;
034c6ed7 3849
f1d34068 3850 if (DEBUG_LOGGING) {
d35fbf6b 3851 _cleanup_free_ char *line;
81a2b7ce 3852
d35fbf6b 3853 line = exec_command_line(final_argv);
a1230ff9 3854 if (line)
f2341e0a 3855 log_struct(LOG_DEBUG,
f2341e0a
LP
3856 "EXECUTABLE=%s", command->path,
3857 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3858 LOG_UNIT_ID(unit),
a1230ff9 3859 LOG_UNIT_INVOCATION_ID(unit));
d35fbf6b 3860 }
dd305ec9 3861
5686391b
LP
3862 if (exec_fd >= 0) {
3863 uint8_t hot = 1;
3864
3865 /* We have finished with all our initializations. Let's now let the manager know that. From this point
3866 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
3867
3868 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3869 *exit_status = EXIT_EXEC;
3870 return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
3871 }
3872 }
3873
2065ca69 3874 execve(command->path, final_argv, accum_env);
5686391b
LP
3875 r = -errno;
3876
3877 if (exec_fd >= 0) {
3878 uint8_t hot = 0;
3879
3880 /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
3881 * that POLLHUP on it no longer means execve() succeeded. */
3882
3883 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3884 *exit_status = EXIT_EXEC;
3885 return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
3886 }
3887 }
12145637 3888
5686391b
LP
3889 if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3890 log_struct_errno(LOG_INFO, r,
12145637
LP
3891 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3892 LOG_UNIT_ID(unit),
3893 LOG_UNIT_INVOCATION_ID(unit),
3894 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3895 command->path),
a1230ff9 3896 "EXECUTABLE=%s", command->path);
12145637
LP
3897 return 0;
3898 }
3899
ff0af2a1 3900 *exit_status = EXIT_EXEC;
5686391b 3901 return log_unit_error_errno(unit, r, "Failed to execute command: %m");
d35fbf6b 3902}
81a2b7ce 3903
34cf6c43 3904static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
2caa38e9 3905static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[static 3]);
34cf6c43 3906
f2341e0a
LP
3907int exec_spawn(Unit *unit,
3908 ExecCommand *command,
d35fbf6b
DM
3909 const ExecContext *context,
3910 const ExecParameters *params,
3911 ExecRuntime *runtime,
29206d46 3912 DynamicCreds *dcreds,
d35fbf6b 3913 pid_t *ret) {
8351ceae 3914
ee39ca20 3915 int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
78f93209 3916 _cleanup_free_ char *subcgroup_path = NULL;
d35fbf6b 3917 _cleanup_strv_free_ char **files_env = NULL;
da6053d0 3918 size_t n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1 3919 _cleanup_free_ char *line = NULL;
d35fbf6b 3920 pid_t pid;
8351ceae 3921
f2341e0a 3922 assert(unit);
d35fbf6b
DM
3923 assert(command);
3924 assert(context);
3925 assert(ret);
3926 assert(params);
25b583d7 3927 assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
4298d0b5 3928
d35fbf6b
DM
3929 if (context->std_input == EXEC_INPUT_SOCKET ||
3930 context->std_output == EXEC_OUTPUT_SOCKET ||
3931 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3932
4c47affc 3933 if (params->n_socket_fds > 1) {
f2341e0a 3934 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3935 return -EINVAL;
ff0af2a1 3936 }
eef65bf3 3937
4c47affc 3938 if (params->n_socket_fds == 0) {
488ab41c
AA
3939 log_unit_error(unit, "Got no socket.");
3940 return -EINVAL;
3941 }
3942
d35fbf6b
DM
3943 socket_fd = params->fds[0];
3944 } else {
3945 socket_fd = -1;
3946 fds = params->fds;
9b141911 3947 n_socket_fds = params->n_socket_fds;
25b583d7 3948 n_storage_fds = params->n_storage_fds;
d35fbf6b 3949 }
94f04347 3950
34cf6c43 3951 r = exec_context_named_iofds(context, params, named_iofds);
52c239d7
LB
3952 if (r < 0)
3953 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3954
f2341e0a 3955 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3956 if (r < 0)
f2341e0a 3957 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3958
ee39ca20 3959 line = exec_command_line(command->argv);
d35fbf6b
DM
3960 if (!line)
3961 return log_oom();
fab56fc5 3962
f2341e0a 3963 log_struct(LOG_DEBUG,
f2341e0a
LP
3964 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3965 "EXECUTABLE=%s", command->path,
ba360bb0 3966 LOG_UNIT_ID(unit),
a1230ff9 3967 LOG_UNIT_INVOCATION_ID(unit));
12145637 3968
78f93209
LP
3969 if (params->cgroup_path) {
3970 r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
3971 if (r < 0)
3972 return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
3973 if (r > 0) { /* We are using a child cgroup */
3974 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
3975 if (r < 0)
3976 return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
3977 }
3978 }
3979
d35fbf6b
DM
3980 pid = fork();
3981 if (pid < 0)
74129a12 3982 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3983
3984 if (pid == 0) {
12145637 3985 int exit_status = EXIT_SUCCESS;
ff0af2a1 3986
f2341e0a
LP
3987 r = exec_child(unit,
3988 command,
ff0af2a1
LP
3989 context,
3990 params,
3991 runtime,
29206d46 3992 dcreds,
ff0af2a1 3993 socket_fd,
52c239d7 3994 named_iofds,
4c47affc 3995 fds,
9b141911 3996 n_socket_fds,
25b583d7 3997 n_storage_fds,
ff0af2a1 3998 files_env,
00d9ef85 3999 unit->manager->user_lookup_fds[1],
12145637
LP
4000 &exit_status);
4001
e1714f02
ZJS
4002 if (r < 0) {
4003 const char *status =
4004 exit_status_to_string(exit_status,
e04ed6db 4005 EXIT_STATUS_LIBC | EXIT_STATUS_SYSTEMD);
e1714f02 4006
12145637
LP
4007 log_struct_errno(LOG_ERR, r,
4008 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
4009 LOG_UNIT_ID(unit),
4010 LOG_UNIT_INVOCATION_ID(unit),
4011 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
e1714f02 4012 status, command->path),
a1230ff9 4013 "EXECUTABLE=%s", command->path);
e1714f02 4014 }
4c2630eb 4015
ff0af2a1 4016 _exit(exit_status);
034c6ed7
LP
4017 }
4018
f2341e0a 4019 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 4020
78f93209
LP
4021 /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
4022 * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
4023 * process will be killed too). */
4024 if (subcgroup_path)
4025 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
2da3263a 4026
b58b4116 4027 exec_status_start(&command->exec_status, pid);
9fb86720 4028
034c6ed7 4029 *ret = pid;
5cb5a6ff
LP
4030 return 0;
4031}
4032
034c6ed7 4033void exec_context_init(ExecContext *c) {
3536f49e
YW
4034 ExecDirectoryType i;
4035
034c6ed7
LP
4036 assert(c);
4037
4c12626c 4038 c->umask = 0022;
9eba9da4 4039 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 4040 c->cpu_sched_policy = SCHED_OTHER;
071830ff 4041 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 4042 c->syslog_level_prefix = true;
353e12c2 4043 c->ignore_sigpipe = true;
3a43da28 4044 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 4045 c->personality = PERSONALITY_INVALID;
72fd1768 4046 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 4047 c->directories[i].mode = 0755;
12213aed 4048 c->timeout_clean_usec = USEC_INFINITY;
a103496c 4049 c->capability_bounding_set = CAP_ALL;
aa9d574d
YW
4050 assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
4051 c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
d3070fbd 4052 c->log_level_max = -1;
b070c7c0 4053 numa_policy_reset(&c->numa_policy);
034c6ed7
LP
4054}
4055
613b411c 4056void exec_context_done(ExecContext *c) {
3536f49e 4057 ExecDirectoryType i;
d3070fbd 4058 size_t l;
5cb5a6ff
LP
4059
4060 assert(c);
4061
6796073e
LP
4062 c->environment = strv_free(c->environment);
4063 c->environment_files = strv_free(c->environment_files);
b4c14404 4064 c->pass_environment = strv_free(c->pass_environment);
00819cc1 4065 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 4066
31ce987c 4067 rlimit_free_all(c->rlimit);
034c6ed7 4068
2038c3f5 4069 for (l = 0; l < 3; l++) {
52c239d7 4070 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
2038c3f5
LP
4071 c->stdio_file[l] = mfree(c->stdio_file[l]);
4072 }
52c239d7 4073
a1e58e8e
LP
4074 c->working_directory = mfree(c->working_directory);
4075 c->root_directory = mfree(c->root_directory);
915e6d16 4076 c->root_image = mfree(c->root_image);
a1e58e8e
LP
4077 c->tty_path = mfree(c->tty_path);
4078 c->syslog_identifier = mfree(c->syslog_identifier);
4079 c->user = mfree(c->user);
4080 c->group = mfree(c->group);
034c6ed7 4081
6796073e 4082 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 4083
a1e58e8e 4084 c->pam_name = mfree(c->pam_name);
5b6319dc 4085
2a624c36
AP
4086 c->read_only_paths = strv_free(c->read_only_paths);
4087 c->read_write_paths = strv_free(c->read_write_paths);
4088 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 4089
d2d6c096 4090 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
8e06d57c
YW
4091 c->bind_mounts = NULL;
4092 c->n_bind_mounts = 0;
2abd4e38
YW
4093 temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
4094 c->temporary_filesystems = NULL;
4095 c->n_temporary_filesystems = 0;
d2d6c096 4096
0985c7c4 4097 cpu_set_reset(&c->cpu_set);
b070c7c0 4098 numa_policy_reset(&c->numa_policy);
86a3475b 4099
a1e58e8e
LP
4100 c->utmp_id = mfree(c->utmp_id);
4101 c->selinux_context = mfree(c->selinux_context);
4102 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 4103 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 4104
8cfa775f 4105 c->syscall_filter = hashmap_free(c->syscall_filter);
525d3cc7
LP
4106 c->syscall_archs = set_free(c->syscall_archs);
4107 c->address_families = set_free(c->address_families);
e66cf1a3 4108
72fd1768 4109 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 4110 c->directories[i].paths = strv_free(c->directories[i].paths);
d3070fbd
LP
4111
4112 c->log_level_max = -1;
4113
4114 exec_context_free_log_extra_fields(c);
08f3be7a 4115
5ac1530e
ZJS
4116 c->log_ratelimit_interval_usec = 0;
4117 c->log_ratelimit_burst = 0;
90fc172e 4118
08f3be7a
LP
4119 c->stdin_data = mfree(c->stdin_data);
4120 c->stdin_data_size = 0;
a8d08f39
LP
4121
4122 c->network_namespace_path = mfree(c->network_namespace_path);
e66cf1a3
LP
4123}
4124
34cf6c43 4125int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
e66cf1a3
LP
4126 char **i;
4127
4128 assert(c);
4129
4130 if (!runtime_prefix)
4131 return 0;
4132
3536f49e 4133 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
4134 _cleanup_free_ char *p;
4135
494d0247
YW
4136 if (exec_directory_is_private(c, EXEC_DIRECTORY_RUNTIME))
4137 p = path_join(runtime_prefix, "private", *i);
4138 else
4139 p = path_join(runtime_prefix, *i);
e66cf1a3
LP
4140 if (!p)
4141 return -ENOMEM;
4142
7bc4bf4a
LP
4143 /* We execute this synchronously, since we need to be sure this is gone when we start the
4144 * service next. */
c6878637 4145 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
4146 }
4147
4148 return 0;
5cb5a6ff
LP
4149}
4150
34cf6c43 4151static void exec_command_done(ExecCommand *c) {
43d0fcbd
LP
4152 assert(c);
4153
a1e58e8e 4154 c->path = mfree(c->path);
6796073e 4155 c->argv = strv_free(c->argv);
43d0fcbd
LP
4156}
4157
da6053d0
LP
4158void exec_command_done_array(ExecCommand *c, size_t n) {
4159 size_t i;
43d0fcbd
LP
4160
4161 for (i = 0; i < n; i++)
4162 exec_command_done(c+i);
4163}
4164
f1acf85a 4165ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
4166 ExecCommand *i;
4167
4168 while ((i = c)) {
71fda00f 4169 LIST_REMOVE(command, c, i);
43d0fcbd 4170 exec_command_done(i);
5cb5a6ff
LP
4171 free(i);
4172 }
f1acf85a
ZJS
4173
4174 return NULL;
5cb5a6ff
LP
4175}
4176
da6053d0
LP
4177void exec_command_free_array(ExecCommand **c, size_t n) {
4178 size_t i;
034c6ed7 4179
f1acf85a
ZJS
4180 for (i = 0; i < n; i++)
4181 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
4182}
4183
6a1d4d9f
LP
4184void exec_command_reset_status_array(ExecCommand *c, size_t n) {
4185 size_t i;
4186
4187 for (i = 0; i < n; i++)
4188 exec_status_reset(&c[i].exec_status);
4189}
4190
4191void exec_command_reset_status_list_array(ExecCommand **c, size_t n) {
4192 size_t i;
4193
4194 for (i = 0; i < n; i++) {
4195 ExecCommand *z;
4196
4197 LIST_FOREACH(command, z, c[i])
4198 exec_status_reset(&z->exec_status);
4199 }
4200}
4201
039f0e70 4202typedef struct InvalidEnvInfo {
34cf6c43 4203 const Unit *unit;
039f0e70
LP
4204 const char *path;
4205} InvalidEnvInfo;
4206
4207static void invalid_env(const char *p, void *userdata) {
4208 InvalidEnvInfo *info = userdata;
4209
f2341e0a 4210 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
4211}
4212
52c239d7
LB
4213const char* exec_context_fdname(const ExecContext *c, int fd_index) {
4214 assert(c);
4215
4216 switch (fd_index) {
5073ff6b 4217
52c239d7
LB
4218 case STDIN_FILENO:
4219 if (c->std_input != EXEC_INPUT_NAMED_FD)
4220 return NULL;
5073ff6b 4221
52c239d7 4222 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
5073ff6b 4223
52c239d7
LB
4224 case STDOUT_FILENO:
4225 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
4226 return NULL;
5073ff6b 4227
52c239d7 4228 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
5073ff6b 4229
52c239d7
LB
4230 case STDERR_FILENO:
4231 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
4232 return NULL;
5073ff6b 4233
52c239d7 4234 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
5073ff6b 4235
52c239d7
LB
4236 default:
4237 return NULL;
4238 }
4239}
4240
2caa38e9
LP
4241static int exec_context_named_iofds(
4242 const ExecContext *c,
4243 const ExecParameters *p,
4244 int named_iofds[static 3]) {
4245
da6053d0 4246 size_t i, targets;
56fbd561 4247 const char* stdio_fdname[3];
da6053d0 4248 size_t n_fds;
52c239d7
LB
4249
4250 assert(c);
4251 assert(p);
2caa38e9 4252 assert(named_iofds);
52c239d7
LB
4253
4254 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
4255 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
4256 (c->std_error == EXEC_OUTPUT_NAMED_FD);
4257
4258 for (i = 0; i < 3; i++)
4259 stdio_fdname[i] = exec_context_fdname(c, i);
4260
4c47affc
FB
4261 n_fds = p->n_storage_fds + p->n_socket_fds;
4262
4263 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
4264 if (named_iofds[STDIN_FILENO] < 0 &&
4265 c->std_input == EXEC_INPUT_NAMED_FD &&
4266 stdio_fdname[STDIN_FILENO] &&
4267 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
4268
52c239d7
LB
4269 named_iofds[STDIN_FILENO] = p->fds[i];
4270 targets--;
56fbd561
ZJS
4271
4272 } else if (named_iofds[STDOUT_FILENO] < 0 &&
4273 c->std_output == EXEC_OUTPUT_NAMED_FD &&
4274 stdio_fdname[STDOUT_FILENO] &&
4275 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
4276
52c239d7
LB
4277 named_iofds[STDOUT_FILENO] = p->fds[i];
4278 targets--;
56fbd561
ZJS
4279
4280 } else if (named_iofds[STDERR_FILENO] < 0 &&
4281 c->std_error == EXEC_OUTPUT_NAMED_FD &&
4282 stdio_fdname[STDERR_FILENO] &&
4283 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
4284
52c239d7
LB
4285 named_iofds[STDERR_FILENO] = p->fds[i];
4286 targets--;
4287 }
4288
56fbd561 4289 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
4290}
4291
34cf6c43 4292static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
4293 char **i, **r = NULL;
4294
4295 assert(c);
4296 assert(l);
4297
4298 STRV_FOREACH(i, c->environment_files) {
4299 char *fn;
52511fae
ZJS
4300 int k;
4301 unsigned n;
8c7be95e
LP
4302 bool ignore = false;
4303 char **p;
7fd1b19b 4304 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
4305
4306 fn = *i;
4307
4308 if (fn[0] == '-') {
4309 ignore = true;
313cefa1 4310 fn++;
8c7be95e
LP
4311 }
4312
4313 if (!path_is_absolute(fn)) {
8c7be95e
LP
4314 if (ignore)
4315 continue;
4316
4317 strv_free(r);
4318 return -EINVAL;
4319 }
4320
2bef10ab 4321 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
4322 k = safe_glob(fn, 0, &pglob);
4323 if (k < 0) {
2bef10ab
PL
4324 if (ignore)
4325 continue;
8c7be95e 4326
2bef10ab 4327 strv_free(r);
d8c92e8b 4328 return k;
2bef10ab 4329 }
8c7be95e 4330
d8c92e8b
ZJS
4331 /* When we don't match anything, -ENOENT should be returned */
4332 assert(pglob.gl_pathc > 0);
4333
4334 for (n = 0; n < pglob.gl_pathc; n++) {
aa8fbc74 4335 k = load_env_file(NULL, pglob.gl_pathv[n], &p);
2bef10ab
PL
4336 if (k < 0) {
4337 if (ignore)
4338 continue;
8c7be95e 4339
2bef10ab 4340 strv_free(r);
2bef10ab 4341 return k;
e9c1ea9d 4342 }
ebc05a09 4343 /* Log invalid environment variables with filename */
039f0e70
LP
4344 if (p) {
4345 InvalidEnvInfo info = {
f2341e0a 4346 .unit = unit,
039f0e70
LP
4347 .path = pglob.gl_pathv[n]
4348 };
4349
4350 p = strv_env_clean_with_callback(p, invalid_env, &info);
4351 }
8c7be95e 4352
234519ae 4353 if (!r)
2bef10ab
PL
4354 r = p;
4355 else {
4356 char **m;
8c7be95e 4357
2bef10ab
PL
4358 m = strv_env_merge(2, r, p);
4359 strv_free(r);
4360 strv_free(p);
c84a9488 4361 if (!m)
2bef10ab 4362 return -ENOMEM;
2bef10ab
PL
4363
4364 r = m;
4365 }
8c7be95e
LP
4366 }
4367 }
4368
4369 *l = r;
4370
4371 return 0;
4372}
4373
6ac8fdc9 4374static bool tty_may_match_dev_console(const char *tty) {
7b912648 4375 _cleanup_free_ char *resolved = NULL;
6ac8fdc9 4376
1e22b5cd
LP
4377 if (!tty)
4378 return true;
4379
a119ec7c 4380 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
4381
4382 /* trivial identity? */
4383 if (streq(tty, "console"))
4384 return true;
4385
7b912648
LP
4386 if (resolve_dev_console(&resolved) < 0)
4387 return true; /* if we could not resolve, assume it may */
6ac8fdc9
MS
4388
4389 /* "tty0" means the active VC, so it may be the same sometimes */
955f1c85 4390 return path_equal(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
4391}
4392
6c0ae739
LP
4393static bool exec_context_may_touch_tty(const ExecContext *ec) {
4394 assert(ec);
1e22b5cd 4395
6c0ae739 4396 return ec->tty_reset ||
1e22b5cd
LP
4397 ec->tty_vhangup ||
4398 ec->tty_vt_disallocate ||
6ac8fdc9
MS
4399 is_terminal_input(ec->std_input) ||
4400 is_terminal_output(ec->std_output) ||
6c0ae739
LP
4401 is_terminal_output(ec->std_error);
4402}
4403
4404bool exec_context_may_touch_console(const ExecContext *ec) {
4405
4406 return exec_context_may_touch_tty(ec) &&
1e22b5cd 4407 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
4408}
4409
15ae422b
LP
4410static void strv_fprintf(FILE *f, char **l) {
4411 char **g;
4412
4413 assert(f);
4414
4415 STRV_FOREACH(g, l)
4416 fprintf(f, " %s", *g);
4417}
4418
34cf6c43 4419void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
12213aed 4420 char **e, **d, buf_clean[FORMAT_TIMESPAN_MAX];
d3070fbd 4421 ExecDirectoryType dt;
94f04347 4422 unsigned i;
add00535 4423 int r;
9eba9da4 4424
5cb5a6ff
LP
4425 assert(c);
4426 assert(f);
4427
4ad49000 4428 prefix = strempty(prefix);
5cb5a6ff
LP
4429
4430 fprintf(f,
94f04347
LP
4431 "%sUMask: %04o\n"
4432 "%sWorkingDirectory: %s\n"
451a074f 4433 "%sRootDirectory: %s\n"
15ae422b 4434 "%sNonBlocking: %s\n"
64747e2d 4435 "%sPrivateTmp: %s\n"
7f112f50 4436 "%sPrivateDevices: %s\n"
59eeb84b 4437 "%sProtectKernelTunables: %s\n"
e66a2f65 4438 "%sProtectKernelModules: %s\n"
84703040 4439 "%sProtectKernelLogs: %s\n"
59eeb84b 4440 "%sProtectControlGroups: %s\n"
d251207d
LP
4441 "%sPrivateNetwork: %s\n"
4442 "%sPrivateUsers: %s\n"
1b8689f9
LP
4443 "%sProtectHome: %s\n"
4444 "%sProtectSystem: %s\n"
5d997827 4445 "%sMountAPIVFS: %s\n"
f3e43635 4446 "%sIgnoreSIGPIPE: %s\n"
f4170c67 4447 "%sMemoryDenyWriteExecute: %s\n"
b1edf445 4448 "%sRestrictRealtime: %s\n"
f69567cb 4449 "%sRestrictSUIDSGID: %s\n"
aecd5ac6
TM
4450 "%sKeyringMode: %s\n"
4451 "%sProtectHostname: %s\n",
5cb5a6ff 4452 prefix, c->umask,
9eba9da4 4453 prefix, c->working_directory ? c->working_directory : "/",
451a074f 4454 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 4455 prefix, yes_no(c->non_blocking),
64747e2d 4456 prefix, yes_no(c->private_tmp),
7f112f50 4457 prefix, yes_no(c->private_devices),
59eeb84b 4458 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 4459 prefix, yes_no(c->protect_kernel_modules),
84703040 4460 prefix, yes_no(c->protect_kernel_logs),
59eeb84b 4461 prefix, yes_no(c->protect_control_groups),
d251207d
LP
4462 prefix, yes_no(c->private_network),
4463 prefix, yes_no(c->private_users),
1b8689f9
LP
4464 prefix, protect_home_to_string(c->protect_home),
4465 prefix, protect_system_to_string(c->protect_system),
5d997827 4466 prefix, yes_no(c->mount_apivfs),
f3e43635 4467 prefix, yes_no(c->ignore_sigpipe),
f4170c67 4468 prefix, yes_no(c->memory_deny_write_execute),
b1edf445 4469 prefix, yes_no(c->restrict_realtime),
f69567cb 4470 prefix, yes_no(c->restrict_suid_sgid),
aecd5ac6
TM
4471 prefix, exec_keyring_mode_to_string(c->keyring_mode),
4472 prefix, yes_no(c->protect_hostname));
fb33a393 4473
915e6d16
LP
4474 if (c->root_image)
4475 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
4476
8c7be95e
LP
4477 STRV_FOREACH(e, c->environment)
4478 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
4479
4480 STRV_FOREACH(e, c->environment_files)
4481 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 4482
b4c14404
FB
4483 STRV_FOREACH(e, c->pass_environment)
4484 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
4485
00819cc1
LP
4486 STRV_FOREACH(e, c->unset_environment)
4487 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
4488
53f47dfc
YW
4489 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
4490
72fd1768 4491 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
4492 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
4493
4494 STRV_FOREACH(d, c->directories[dt].paths)
4495 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
4496 }
c2bbd90b 4497
12213aed
YW
4498 fprintf(f,
4499 "%sTimeoutCleanSec: %s\n",
4500 prefix, format_timespan(buf_clean, sizeof(buf_clean), c->timeout_clean_usec, USEC_PER_SEC));
4501
fb33a393
LP
4502 if (c->nice_set)
4503 fprintf(f,
4504 "%sNice: %i\n",
4505 prefix, c->nice);
4506
dd6c17b1 4507 if (c->oom_score_adjust_set)
fb33a393 4508 fprintf(f,
dd6c17b1
LP
4509 "%sOOMScoreAdjust: %i\n",
4510 prefix, c->oom_score_adjust);
9eba9da4 4511
94f04347 4512 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d 4513 if (c->rlimit[i]) {
4c3a2b84 4514 fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
3c11da9d 4515 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
4c3a2b84 4516 fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n",
3c11da9d
EV
4517 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
4518 }
94f04347 4519
f8b69d1d 4520 if (c->ioprio_set) {
1756a011 4521 _cleanup_free_ char *class_str = NULL;
f8b69d1d 4522
837df140
YW
4523 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
4524 if (r >= 0)
4525 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
4526
4527 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 4528 }
94f04347 4529
f8b69d1d 4530 if (c->cpu_sched_set) {
1756a011 4531 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 4532
837df140
YW
4533 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4534 if (r >= 0)
4535 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4536
94f04347 4537 fprintf(f,
38b48754
LP
4538 "%sCPUSchedulingPriority: %i\n"
4539 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
4540 prefix, c->cpu_sched_priority,
4541 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 4542 }
94f04347 4543
0985c7c4 4544 if (c->cpu_set.set) {
e7fca352
MS
4545 _cleanup_free_ char *affinity = NULL;
4546
4547 affinity = cpu_set_to_range_string(&c->cpu_set);
4548 fprintf(f, "%sCPUAffinity: %s\n", prefix, affinity);
94f04347
LP
4549 }
4550
b070c7c0
MS
4551 if (mpol_is_valid(numa_policy_get_type(&c->numa_policy))) {
4552 _cleanup_free_ char *nodes = NULL;
4553
4554 nodes = cpu_set_to_range_string(&c->numa_policy.nodes);
4555 fprintf(f, "%sNUMAPolicy: %s\n", prefix, mpol_to_string(numa_policy_get_type(&c->numa_policy)));
4556 fprintf(f, "%sNUMAMask: %s\n", prefix, strnull(nodes));
4557 }
4558
3a43da28 4559 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 4560 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
4561
4562 fprintf(f,
80876c20
LP
4563 "%sStandardInput: %s\n"
4564 "%sStandardOutput: %s\n"
4565 "%sStandardError: %s\n",
4566 prefix, exec_input_to_string(c->std_input),
4567 prefix, exec_output_to_string(c->std_output),
4568 prefix, exec_output_to_string(c->std_error));
4569
befc4a80
LP
4570 if (c->std_input == EXEC_INPUT_NAMED_FD)
4571 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4572 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4573 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4574 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4575 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4576
4577 if (c->std_input == EXEC_INPUT_FILE)
4578 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4579 if (c->std_output == EXEC_OUTPUT_FILE)
4580 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
566b7d23
ZD
4581 if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
4582 fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
befc4a80
LP
4583 if (c->std_error == EXEC_OUTPUT_FILE)
4584 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
566b7d23
ZD
4585 if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
4586 fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
befc4a80 4587
80876c20
LP
4588 if (c->tty_path)
4589 fprintf(f,
6ea832a2
LP
4590 "%sTTYPath: %s\n"
4591 "%sTTYReset: %s\n"
4592 "%sTTYVHangup: %s\n"
4593 "%sTTYVTDisallocate: %s\n",
4594 prefix, c->tty_path,
4595 prefix, yes_no(c->tty_reset),
4596 prefix, yes_no(c->tty_vhangup),
4597 prefix, yes_no(c->tty_vt_disallocate));
94f04347 4598
9f6444eb
LP
4599 if (IN_SET(c->std_output,
4600 EXEC_OUTPUT_SYSLOG,
4601 EXEC_OUTPUT_KMSG,
4602 EXEC_OUTPUT_JOURNAL,
4603 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4604 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4605 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4606 IN_SET(c->std_error,
4607 EXEC_OUTPUT_SYSLOG,
4608 EXEC_OUTPUT_KMSG,
4609 EXEC_OUTPUT_JOURNAL,
4610 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4611 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4612 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 4613
5ce70e5b 4614 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 4615
837df140
YW
4616 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4617 if (r >= 0)
4618 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 4619
837df140
YW
4620 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4621 if (r >= 0)
4622 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 4623 }
94f04347 4624
d3070fbd
LP
4625 if (c->log_level_max >= 0) {
4626 _cleanup_free_ char *t = NULL;
4627
4628 (void) log_level_to_string_alloc(c->log_level_max, &t);
4629
4630 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4631 }
4632
5ac1530e 4633 if (c->log_ratelimit_interval_usec > 0) {
90fc172e
AZ
4634 char buf_timespan[FORMAT_TIMESPAN_MAX];
4635
4636 fprintf(f,
4637 "%sLogRateLimitIntervalSec: %s\n",
5ac1530e 4638 prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_ratelimit_interval_usec, USEC_PER_SEC));
90fc172e
AZ
4639 }
4640
5ac1530e
ZJS
4641 if (c->log_ratelimit_burst > 0)
4642 fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_ratelimit_burst);
90fc172e 4643
d3070fbd
LP
4644 if (c->n_log_extra_fields > 0) {
4645 size_t j;
4646
4647 for (j = 0; j < c->n_log_extra_fields; j++) {
4648 fprintf(f, "%sLogExtraFields: ", prefix);
4649 fwrite(c->log_extra_fields[j].iov_base,
4650 1, c->log_extra_fields[j].iov_len,
4651 f);
4652 fputc('\n', f);
4653 }
4654 }
4655
07d46372
YW
4656 if (c->secure_bits) {
4657 _cleanup_free_ char *str = NULL;
4658
4659 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4660 if (r >= 0)
4661 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4662 }
94f04347 4663
a103496c 4664 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4665 _cleanup_free_ char *str = NULL;
94f04347 4666
dd1f5bd0
YW
4667 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4668 if (r >= 0)
4669 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4670 }
4671
4672 if (c->capability_ambient_set != 0) {
dd1f5bd0 4673 _cleanup_free_ char *str = NULL;
755d4b67 4674
dd1f5bd0
YW
4675 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4676 if (r >= 0)
4677 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4678 }
4679
4680 if (c->user)
f2d3769a 4681 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4682 if (c->group)
f2d3769a 4683 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4684
29206d46
LP
4685 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4686
ac6e8be6 4687 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4688 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4689 strv_fprintf(f, c->supplementary_groups);
4690 fputs("\n", f);
4691 }
94f04347 4692
5b6319dc 4693 if (c->pam_name)
f2d3769a 4694 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4695
58629001 4696 if (!strv_isempty(c->read_write_paths)) {
2a624c36
AP
4697 fprintf(f, "%sReadWritePaths:", prefix);
4698 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4699 fputs("\n", f);
4700 }
4701
58629001 4702 if (!strv_isempty(c->read_only_paths)) {
2a624c36
AP
4703 fprintf(f, "%sReadOnlyPaths:", prefix);
4704 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4705 fputs("\n", f);
4706 }
94f04347 4707
58629001 4708 if (!strv_isempty(c->inaccessible_paths)) {
2a624c36
AP
4709 fprintf(f, "%sInaccessiblePaths:", prefix);
4710 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4711 fputs("\n", f);
4712 }
2e22afe9 4713
d2d6c096 4714 if (c->n_bind_mounts > 0)
4ca763a9
YW
4715 for (i = 0; i < c->n_bind_mounts; i++)
4716 fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
d2d6c096 4717 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4ca763a9 4718 c->bind_mounts[i].ignore_enoent ? "-": "",
d2d6c096
LP
4719 c->bind_mounts[i].source,
4720 c->bind_mounts[i].destination,
4721 c->bind_mounts[i].recursive ? "rbind" : "norbind");
d2d6c096 4722
2abd4e38
YW
4723 if (c->n_temporary_filesystems > 0)
4724 for (i = 0; i < c->n_temporary_filesystems; i++) {
4725 TemporaryFileSystem *t = c->temporary_filesystems + i;
4726
4727 fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
4728 t->path,
4729 isempty(t->options) ? "" : ":",
4730 strempty(t->options));
4731 }
4732
169c1bda
LP
4733 if (c->utmp_id)
4734 fprintf(f,
4735 "%sUtmpIdentifier: %s\n",
4736 prefix, c->utmp_id);
7b52a628
MS
4737
4738 if (c->selinux_context)
4739 fprintf(f,
5f8640fb
LP
4740 "%sSELinuxContext: %s%s\n",
4741 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4742
80c21aea
WC
4743 if (c->apparmor_profile)
4744 fprintf(f,
4745 "%sAppArmorProfile: %s%s\n",
4746 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4747
4748 if (c->smack_process_label)
4749 fprintf(f,
4750 "%sSmackProcessLabel: %s%s\n",
4751 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4752
050f7277 4753 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4754 fprintf(f,
4755 "%sPersonality: %s\n",
4756 prefix, strna(personality_to_string(c->personality)));
4757
78e864e5
TM
4758 fprintf(f,
4759 "%sLockPersonality: %s\n",
4760 prefix, yes_no(c->lock_personality));
4761
17df7223 4762 if (c->syscall_filter) {
349cc4a5 4763#if HAVE_SECCOMP
17df7223 4764 Iterator j;
8cfa775f 4765 void *id, *val;
17df7223 4766 bool first = true;
351a19b1 4767#endif
17df7223
LP
4768
4769 fprintf(f,
57183d11 4770 "%sSystemCallFilter: ",
17df7223
LP
4771 prefix);
4772
4773 if (!c->syscall_whitelist)
4774 fputc('~', f);
4775
349cc4a5 4776#if HAVE_SECCOMP
8cfa775f 4777 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
17df7223 4778 _cleanup_free_ char *name = NULL;
8cfa775f
YW
4779 const char *errno_name = NULL;
4780 int num = PTR_TO_INT(val);
17df7223
LP
4781
4782 if (first)
4783 first = false;
4784 else
4785 fputc(' ', f);
4786
57183d11 4787 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223 4788 fputs(strna(name), f);
8cfa775f
YW
4789
4790 if (num >= 0) {
4791 errno_name = errno_to_name(num);
4792 if (errno_name)
4793 fprintf(f, ":%s", errno_name);
4794 else
4795 fprintf(f, ":%d", num);
4796 }
17df7223 4797 }
351a19b1 4798#endif
17df7223
LP
4799
4800 fputc('\n', f);
4801 }
4802
57183d11 4803 if (c->syscall_archs) {
349cc4a5 4804#if HAVE_SECCOMP
57183d11
LP
4805 Iterator j;
4806 void *id;
4807#endif
4808
4809 fprintf(f,
4810 "%sSystemCallArchitectures:",
4811 prefix);
4812
349cc4a5 4813#if HAVE_SECCOMP
57183d11
LP
4814 SET_FOREACH(id, c->syscall_archs, j)
4815 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4816#endif
4817 fputc('\n', f);
4818 }
4819
add00535
LP
4820 if (exec_context_restrict_namespaces_set(c)) {
4821 _cleanup_free_ char *s = NULL;
4822
86c2a9f1 4823 r = namespace_flags_to_string(c->restrict_namespaces, &s);
add00535
LP
4824 if (r >= 0)
4825 fprintf(f, "%sRestrictNamespaces: %s\n",
4826 prefix, s);
4827 }
4828
a8d08f39
LP
4829 if (c->network_namespace_path)
4830 fprintf(f,
4831 "%sNetworkNamespacePath: %s\n",
4832 prefix, c->network_namespace_path);
4833
3df90f24
YW
4834 if (c->syscall_errno > 0) {
4835 const char *errno_name;
4836
4837 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4838
4839 errno_name = errno_to_name(c->syscall_errno);
4840 if (errno_name)
4841 fprintf(f, "%s\n", errno_name);
4842 else
4843 fprintf(f, "%d\n", c->syscall_errno);
4844 }
5cb5a6ff
LP
4845}
4846
34cf6c43 4847bool exec_context_maintains_privileges(const ExecContext *c) {
a931ad47
LP
4848 assert(c);
4849
61233823 4850 /* Returns true if the process forked off would run under
a931ad47
LP
4851 * an unchanged UID or as root. */
4852
4853 if (!c->user)
4854 return true;
4855
4856 if (streq(c->user, "root") || streq(c->user, "0"))
4857 return true;
4858
4859 return false;
4860}
4861
34cf6c43 4862int exec_context_get_effective_ioprio(const ExecContext *c) {
7f452159
LP
4863 int p;
4864
4865 assert(c);
4866
4867 if (c->ioprio_set)
4868 return c->ioprio;
4869
4870 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4871 if (p < 0)
4872 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4873
4874 return p;
4875}
4876
d3070fbd
LP
4877void exec_context_free_log_extra_fields(ExecContext *c) {
4878 size_t l;
4879
4880 assert(c);
4881
4882 for (l = 0; l < c->n_log_extra_fields; l++)
4883 free(c->log_extra_fields[l].iov_base);
4884 c->log_extra_fields = mfree(c->log_extra_fields);
4885 c->n_log_extra_fields = 0;
4886}
4887
6f765baf
LP
4888void exec_context_revert_tty(ExecContext *c) {
4889 int r;
4890
4891 assert(c);
4892
4893 /* First, reset the TTY (possibly kicking everybody else from the TTY) */
4894 exec_context_tty_reset(c, NULL);
4895
4896 /* And then undo what chown_terminal() did earlier. Note that we only do this if we have a path
4897 * configured. If the TTY was passed to us as file descriptor we assume the TTY is opened and managed
4898 * by whoever passed it to us and thus knows better when and how to chmod()/chown() it back. */
4899
4900 if (exec_context_may_touch_tty(c)) {
4901 const char *path;
4902
4903 path = exec_context_tty_path(c);
4904 if (path) {
4905 r = chmod_and_chown(path, TTY_MODE, 0, TTY_GID);
4906 if (r < 0 && r != -ENOENT)
4907 log_warning_errno(r, "Failed to reset TTY ownership/access mode of %s, ignoring: %m", path);
4908 }
4909 }
4910}
4911
4c2f5842
LP
4912int exec_context_get_clean_directories(
4913 ExecContext *c,
4914 char **prefix,
4915 ExecCleanMask mask,
4916 char ***ret) {
4917
4918 _cleanup_strv_free_ char **l = NULL;
4919 ExecDirectoryType t;
4920 int r;
4921
4922 assert(c);
4923 assert(prefix);
4924 assert(ret);
4925
4926 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
4927 char **i;
4928
4929 if (!FLAGS_SET(mask, 1U << t))
4930 continue;
4931
4932 if (!prefix[t])
4933 continue;
4934
4935 STRV_FOREACH(i, c->directories[t].paths) {
4936 char *j;
4937
4938 j = path_join(prefix[t], *i);
4939 if (!j)
4940 return -ENOMEM;
4941
4942 r = strv_consume(&l, j);
4943 if (r < 0)
4944 return r;
7f622a19
YW
4945
4946 /* Also remove private directories unconditionally. */
4947 if (t != EXEC_DIRECTORY_CONFIGURATION) {
4948 j = path_join(prefix[t], "private", *i);
4949 if (!j)
4950 return -ENOMEM;
4951
4952 r = strv_consume(&l, j);
4953 if (r < 0)
4954 return r;
4955 }
4c2f5842
LP
4956 }
4957 }
4958
4959 *ret = TAKE_PTR(l);
4960 return 0;
4961}
4962
4963int exec_context_get_clean_mask(ExecContext *c, ExecCleanMask *ret) {
4964 ExecCleanMask mask = 0;
4965
4966 assert(c);
4967 assert(ret);
4968
4969 for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++)
4970 if (!strv_isempty(c->directories[t].paths))
4971 mask |= 1U << t;
4972
4973 *ret = mask;
4974 return 0;
4975}
4976
b58b4116 4977void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 4978 assert(s);
5cb5a6ff 4979
2ed26ed0
LP
4980 *s = (ExecStatus) {
4981 .pid = pid,
4982 };
4983
b58b4116
LP
4984 dual_timestamp_get(&s->start_timestamp);
4985}
4986
34cf6c43 4987void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
4988 assert(s);
4989
2ed26ed0
LP
4990 if (s->pid != pid) {
4991 *s = (ExecStatus) {
4992 .pid = pid,
4993 };
4994 }
b58b4116 4995
63983207 4996 dual_timestamp_get(&s->exit_timestamp);
9fb86720 4997
034c6ed7
LP
4998 s->code = code;
4999 s->status = status;
169c1bda 5000
6f765baf
LP
5001 if (context && context->utmp_id)
5002 (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
9fb86720
LP
5003}
5004
6a1d4d9f
LP
5005void exec_status_reset(ExecStatus *s) {
5006 assert(s);
5007
5008 *s = (ExecStatus) {};
5009}
5010
34cf6c43 5011void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
9fb86720
LP
5012 char buf[FORMAT_TIMESTAMP_MAX];
5013
5014 assert(s);
5015 assert(f);
5016
9fb86720
LP
5017 if (s->pid <= 0)
5018 return;
5019
4c940960
LP
5020 prefix = strempty(prefix);
5021
9fb86720 5022 fprintf(f,
ccd06097
ZJS
5023 "%sPID: "PID_FMT"\n",
5024 prefix, s->pid);
9fb86720 5025
af9d16e1 5026 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
5027 fprintf(f,
5028 "%sStart Timestamp: %s\n",
63983207 5029 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 5030
af9d16e1 5031 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
5032 fprintf(f,
5033 "%sExit Timestamp: %s\n"
5034 "%sExit Code: %s\n"
5035 "%sExit Status: %i\n",
63983207 5036 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
5037 prefix, sigchld_code_to_string(s->code),
5038 prefix, s->status);
5cb5a6ff 5039}
44d8db9e 5040
34cf6c43 5041static char *exec_command_line(char **argv) {
44d8db9e
LP
5042 size_t k;
5043 char *n, *p, **a;
5044 bool first = true;
5045
9e2f7c11 5046 assert(argv);
44d8db9e 5047
9164977d 5048 k = 1;
9e2f7c11 5049 STRV_FOREACH(a, argv)
44d8db9e
LP
5050 k += strlen(*a)+3;
5051
5cd9cd35
LP
5052 n = new(char, k);
5053 if (!n)
44d8db9e
LP
5054 return NULL;
5055
5056 p = n;
9e2f7c11 5057 STRV_FOREACH(a, argv) {
44d8db9e
LP
5058
5059 if (!first)
5060 *(p++) = ' ';
5061 else
5062 first = false;
5063
5064 if (strpbrk(*a, WHITESPACE)) {
5065 *(p++) = '\'';
5066 p = stpcpy(p, *a);
5067 *(p++) = '\'';
5068 } else
5069 p = stpcpy(p, *a);
5070
5071 }
5072
9164977d
LP
5073 *p = 0;
5074
44d8db9e
LP
5075 /* FIXME: this doesn't really handle arguments that have
5076 * spaces and ticks in them */
5077
5078 return n;
5079}
5080
34cf6c43 5081static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 5082 _cleanup_free_ char *cmd = NULL;
4c940960 5083 const char *prefix2;
44d8db9e
LP
5084
5085 assert(c);
5086 assert(f);
5087
4c940960 5088 prefix = strempty(prefix);
63c372cb 5089 prefix2 = strjoina(prefix, "\t");
44d8db9e 5090
9e2f7c11 5091 cmd = exec_command_line(c->argv);
44d8db9e
LP
5092 fprintf(f,
5093 "%sCommand Line: %s\n",
4bbccb02 5094 prefix, cmd ? cmd : strerror_safe(ENOMEM));
44d8db9e 5095
9fb86720 5096 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
5097}
5098
5099void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
5100 assert(f);
5101
4c940960 5102 prefix = strempty(prefix);
44d8db9e
LP
5103
5104 LIST_FOREACH(command, c, c)
5105 exec_command_dump(c, f, prefix);
5106}
94f04347 5107
a6a80b4f
LP
5108void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
5109 ExecCommand *end;
5110
5111 assert(l);
5112 assert(e);
5113
5114 if (*l) {
35b8ca3a 5115 /* It's kind of important, that we keep the order here */
71fda00f
LP
5116 LIST_FIND_TAIL(command, *l, end);
5117 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
5118 } else
5119 *l = e;
5120}
5121
26fd040d
LP
5122int exec_command_set(ExecCommand *c, const char *path, ...) {
5123 va_list ap;
5124 char **l, *p;
5125
5126 assert(c);
5127 assert(path);
5128
5129 va_start(ap, path);
5130 l = strv_new_ap(path, ap);
5131 va_end(ap);
5132
5133 if (!l)
5134 return -ENOMEM;
5135
250a918d
LP
5136 p = strdup(path);
5137 if (!p) {
26fd040d
LP
5138 strv_free(l);
5139 return -ENOMEM;
5140 }
5141
6897dfe8 5142 free_and_replace(c->path, p);
26fd040d 5143
130d3d22 5144 return strv_free_and_replace(c->argv, l);
26fd040d
LP
5145}
5146
86b23b07 5147int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 5148 _cleanup_strv_free_ char **l = NULL;
86b23b07 5149 va_list ap;
86b23b07
JS
5150 int r;
5151
5152 assert(c);
5153 assert(path);
5154
5155 va_start(ap, path);
5156 l = strv_new_ap(path, ap);
5157 va_end(ap);
5158
5159 if (!l)
5160 return -ENOMEM;
5161
e287086b 5162 r = strv_extend_strv(&c->argv, l, false);
e63ff941 5163 if (r < 0)
86b23b07 5164 return r;
86b23b07
JS
5165
5166 return 0;
5167}
5168
e8a565cb
YW
5169static void *remove_tmpdir_thread(void *p) {
5170 _cleanup_free_ char *path = p;
86b23b07 5171
e8a565cb
YW
5172 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
5173 return NULL;
5174}
5175
5176static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
5177 int r;
5178
5179 if (!rt)
5180 return NULL;
5181
5182 if (rt->manager)
5183 (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
5184
5185 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
5186 if (destroy && rt->tmp_dir) {
5187 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
5188
5189 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
5190 if (r < 0) {
5191 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
5192 free(rt->tmp_dir);
5193 }
5194
5195 rt->tmp_dir = NULL;
5196 }
613b411c 5197
e8a565cb
YW
5198 if (destroy && rt->var_tmp_dir) {
5199 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
5200
5201 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
5202 if (r < 0) {
5203 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
5204 free(rt->var_tmp_dir);
5205 }
5206
5207 rt->var_tmp_dir = NULL;
5208 }
5209
5210 rt->id = mfree(rt->id);
5211 rt->tmp_dir = mfree(rt->tmp_dir);
5212 rt->var_tmp_dir = mfree(rt->var_tmp_dir);
5213 safe_close_pair(rt->netns_storage_socket);
5214 return mfree(rt);
5215}
5216
5217static void exec_runtime_freep(ExecRuntime **rt) {
da6bc6ed 5218 (void) exec_runtime_free(*rt, false);
e8a565cb
YW
5219}
5220
8e8009dc
LP
5221static int exec_runtime_allocate(ExecRuntime **ret) {
5222 ExecRuntime *n;
613b411c 5223
8e8009dc 5224 assert(ret);
613b411c 5225
8e8009dc
LP
5226 n = new(ExecRuntime, 1);
5227 if (!n)
613b411c
LP
5228 return -ENOMEM;
5229
8e8009dc
LP
5230 *n = (ExecRuntime) {
5231 .netns_storage_socket = { -1, -1 },
5232 };
5233
5234 *ret = n;
613b411c
LP
5235 return 0;
5236}
5237
e8a565cb
YW
5238static int exec_runtime_add(
5239 Manager *m,
5240 const char *id,
5241 const char *tmp_dir,
5242 const char *var_tmp_dir,
5243 const int netns_storage_socket[2],
5244 ExecRuntime **ret) {
5245
5246 _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
613b411c
LP
5247 int r;
5248
e8a565cb 5249 assert(m);
613b411c
LP
5250 assert(id);
5251
e8a565cb
YW
5252 r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
5253 if (r < 0)
5254 return r;
613b411c 5255
e8a565cb 5256 r = exec_runtime_allocate(&rt);
613b411c
LP
5257 if (r < 0)
5258 return r;
5259
e8a565cb
YW
5260 rt->id = strdup(id);
5261 if (!rt->id)
5262 return -ENOMEM;
5263
5264 if (tmp_dir) {
5265 rt->tmp_dir = strdup(tmp_dir);
5266 if (!rt->tmp_dir)
5267 return -ENOMEM;
5268
5269 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
5270 assert(var_tmp_dir);
5271 rt->var_tmp_dir = strdup(var_tmp_dir);
5272 if (!rt->var_tmp_dir)
5273 return -ENOMEM;
5274 }
5275
5276 if (netns_storage_socket) {
5277 rt->netns_storage_socket[0] = netns_storage_socket[0];
5278 rt->netns_storage_socket[1] = netns_storage_socket[1];
613b411c
LP
5279 }
5280
e8a565cb
YW
5281 r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
5282 if (r < 0)
5283 return r;
5284
5285 rt->manager = m;
5286
5287 if (ret)
5288 *ret = rt;
5289
5290 /* do not remove created ExecRuntime object when the operation succeeds. */
5291 rt = NULL;
5292 return 0;
5293}
5294
5295static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
5296 _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
2fa3742d 5297 _cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
e8a565cb
YW
5298 int r;
5299
5300 assert(m);
5301 assert(c);
5302 assert(id);
5303
5304 /* It is not necessary to create ExecRuntime object. */
a8d08f39 5305 if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
e8a565cb
YW
5306 return 0;
5307
5308 if (c->private_tmp) {
5309 r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
613b411c
LP
5310 if (r < 0)
5311 return r;
5312 }
5313
a8d08f39 5314 if (c->private_network || c->network_namespace_path) {
e8a565cb
YW
5315 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
5316 return -errno;
5317 }
5318
5319 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
5320 if (r < 0)
5321 return r;
5322
5323 /* Avoid cleanup */
2fa3742d 5324 netns_storage_socket[0] = netns_storage_socket[1] = -1;
613b411c
LP
5325 return 1;
5326}
5327
e8a565cb
YW
5328int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
5329 ExecRuntime *rt;
5330 int r;
613b411c 5331
e8a565cb
YW
5332 assert(m);
5333 assert(id);
5334 assert(ret);
5335
5336 rt = hashmap_get(m->exec_runtime_by_id, id);
5337 if (rt)
5338 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
5339 goto ref;
5340
5341 if (!create)
5342 return 0;
5343
5344 /* If not found, then create a new object. */
5345 r = exec_runtime_make(m, c, id, &rt);
5346 if (r <= 0)
5347 /* When r == 0, it is not necessary to create ExecRuntime object. */
5348 return r;
613b411c 5349
e8a565cb
YW
5350ref:
5351 /* increment reference counter. */
5352 rt->n_ref++;
5353 *ret = rt;
5354 return 1;
5355}
613b411c 5356
e8a565cb
YW
5357ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
5358 if (!rt)
613b411c
LP
5359 return NULL;
5360
e8a565cb 5361 assert(rt->n_ref > 0);
613b411c 5362
e8a565cb
YW
5363 rt->n_ref--;
5364 if (rt->n_ref > 0)
f2341e0a
LP
5365 return NULL;
5366
e8a565cb 5367 return exec_runtime_free(rt, destroy);
613b411c
LP
5368}
5369
e8a565cb
YW
5370int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
5371 ExecRuntime *rt;
5372 Iterator i;
5373
5374 assert(m);
613b411c
LP
5375 assert(f);
5376 assert(fds);
5377
e8a565cb
YW
5378 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5379 fprintf(f, "exec-runtime=%s", rt->id);
613b411c 5380
e8a565cb
YW
5381 if (rt->tmp_dir)
5382 fprintf(f, " tmp-dir=%s", rt->tmp_dir);
613b411c 5383
e8a565cb
YW
5384 if (rt->var_tmp_dir)
5385 fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
613b411c 5386
e8a565cb
YW
5387 if (rt->netns_storage_socket[0] >= 0) {
5388 int copy;
613b411c 5389
e8a565cb
YW
5390 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
5391 if (copy < 0)
5392 return copy;
613b411c 5393
e8a565cb
YW
5394 fprintf(f, " netns-socket-0=%i", copy);
5395 }
613b411c 5396
e8a565cb
YW
5397 if (rt->netns_storage_socket[1] >= 0) {
5398 int copy;
613b411c 5399
e8a565cb
YW
5400 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
5401 if (copy < 0)
5402 return copy;
613b411c 5403
e8a565cb
YW
5404 fprintf(f, " netns-socket-1=%i", copy);
5405 }
5406
5407 fputc('\n', f);
613b411c
LP
5408 }
5409
5410 return 0;
5411}
5412
e8a565cb
YW
5413int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
5414 _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
5415 ExecRuntime *rt;
613b411c
LP
5416 int r;
5417
e8a565cb
YW
5418 /* This is for the migration from old (v237 or earlier) deserialization text.
5419 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
5420 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
5421 * so or not from the serialized text, then we always creates a new object owned by this. */
5422
5423 assert(u);
613b411c
LP
5424 assert(key);
5425 assert(value);
5426
e8a565cb
YW
5427 /* Manager manages ExecRuntime objects by the unit id.
5428 * So, we omit the serialized text when the unit does not have id (yet?)... */
5429 if (isempty(u->id)) {
5430 log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
5431 return 0;
5432 }
613b411c 5433
e8a565cb
YW
5434 r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
5435 if (r < 0) {
5436 log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
5437 return 0;
5438 }
5439
5440 rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
5441 if (!rt) {
5442 r = exec_runtime_allocate(&rt_create);
613b411c 5443 if (r < 0)
f2341e0a 5444 return log_oom();
613b411c 5445
e8a565cb
YW
5446 rt_create->id = strdup(u->id);
5447 if (!rt_create->id)
5448 return log_oom();
5449
5450 rt = rt_create;
5451 }
5452
5453 if (streq(key, "tmp-dir")) {
5454 char *copy;
5455
613b411c
LP
5456 copy = strdup(value);
5457 if (!copy)
5458 return log_oom();
5459
e8a565cb 5460 free_and_replace(rt->tmp_dir, copy);
613b411c
LP
5461
5462 } else if (streq(key, "var-tmp-dir")) {
5463 char *copy;
5464
613b411c
LP
5465 copy = strdup(value);
5466 if (!copy)
5467 return log_oom();
5468
e8a565cb 5469 free_and_replace(rt->var_tmp_dir, copy);
613b411c
LP
5470
5471 } else if (streq(key, "netns-socket-0")) {
5472 int fd;
5473
e8a565cb 5474 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5475 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5476 return 0;
613b411c 5477 }
e8a565cb
YW
5478
5479 safe_close(rt->netns_storage_socket[0]);
5480 rt->netns_storage_socket[0] = fdset_remove(fds, fd);
5481
613b411c
LP
5482 } else if (streq(key, "netns-socket-1")) {
5483 int fd;
5484
e8a565cb 5485 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5486 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5487 return 0;
613b411c 5488 }
e8a565cb
YW
5489
5490 safe_close(rt->netns_storage_socket[1]);
5491 rt->netns_storage_socket[1] = fdset_remove(fds, fd);
613b411c
LP
5492 } else
5493 return 0;
5494
e8a565cb
YW
5495 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
5496 if (rt_create) {
5497 r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
5498 if (r < 0) {
3fe91079 5499 log_unit_debug_errno(u, r, "Failed to put runtime parameter to manager's storage: %m");
e8a565cb
YW
5500 return 0;
5501 }
613b411c 5502
e8a565cb 5503 rt_create->manager = u->manager;
613b411c 5504
e8a565cb
YW
5505 /* Avoid cleanup */
5506 rt_create = NULL;
5507 }
98b47d54 5508
e8a565cb
YW
5509 return 1;
5510}
613b411c 5511
e8a565cb
YW
5512void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
5513 char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
5514 int r, fd0 = -1, fd1 = -1;
5515 const char *p, *v = value;
5516 size_t n;
613b411c 5517
e8a565cb
YW
5518 assert(m);
5519 assert(value);
5520 assert(fds);
98b47d54 5521
e8a565cb
YW
5522 n = strcspn(v, " ");
5523 id = strndupa(v, n);
5524 if (v[n] != ' ')
5525 goto finalize;
5526 p = v + n + 1;
5527
5528 v = startswith(p, "tmp-dir=");
5529 if (v) {
5530 n = strcspn(v, " ");
5531 tmp_dir = strndupa(v, n);
5532 if (v[n] != ' ')
5533 goto finalize;
5534 p = v + n + 1;
5535 }
5536
5537 v = startswith(p, "var-tmp-dir=");
5538 if (v) {
5539 n = strcspn(v, " ");
5540 var_tmp_dir = strndupa(v, n);
5541 if (v[n] != ' ')
5542 goto finalize;
5543 p = v + n + 1;
5544 }
5545
5546 v = startswith(p, "netns-socket-0=");
5547 if (v) {
5548 char *buf;
5549
5550 n = strcspn(v, " ");
5551 buf = strndupa(v, n);
5552 if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
5553 log_debug("Unable to process exec-runtime netns fd specification.");
5554 return;
98b47d54 5555 }
e8a565cb
YW
5556 fd0 = fdset_remove(fds, fd0);
5557 if (v[n] != ' ')
5558 goto finalize;
5559 p = v + n + 1;
613b411c
LP
5560 }
5561
e8a565cb
YW
5562 v = startswith(p, "netns-socket-1=");
5563 if (v) {
5564 char *buf;
98b47d54 5565
e8a565cb
YW
5566 n = strcspn(v, " ");
5567 buf = strndupa(v, n);
5568 if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
5569 log_debug("Unable to process exec-runtime netns fd specification.");
5570 return;
98b47d54 5571 }
e8a565cb
YW
5572 fd1 = fdset_remove(fds, fd1);
5573 }
98b47d54 5574
e8a565cb
YW
5575finalize:
5576
5577 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
7d853ca6 5578 if (r < 0)
e8a565cb 5579 log_debug_errno(r, "Failed to add exec-runtime: %m");
e8a565cb 5580}
613b411c 5581
e8a565cb
YW
5582void exec_runtime_vacuum(Manager *m) {
5583 ExecRuntime *rt;
5584 Iterator i;
5585
5586 assert(m);
5587
5588 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
5589
5590 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5591 if (rt->n_ref > 0)
5592 continue;
5593
5594 (void) exec_runtime_free(rt, false);
5595 }
613b411c
LP
5596}
5597
b9c04eaf
YW
5598void exec_params_clear(ExecParameters *p) {
5599 if (!p)
5600 return;
5601
5602 strv_free(p->environment);
5603}
5604
80876c20
LP
5605static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
5606 [EXEC_INPUT_NULL] = "null",
5607 [EXEC_INPUT_TTY] = "tty",
5608 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 5609 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
5610 [EXEC_INPUT_SOCKET] = "socket",
5611 [EXEC_INPUT_NAMED_FD] = "fd",
08f3be7a 5612 [EXEC_INPUT_DATA] = "data",
2038c3f5 5613 [EXEC_INPUT_FILE] = "file",
80876c20
LP
5614};
5615
8a0867d6
LP
5616DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
5617
94f04347 5618static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 5619 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 5620 [EXEC_OUTPUT_NULL] = "null",
80876c20 5621 [EXEC_OUTPUT_TTY] = "tty",
94f04347 5622 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 5623 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 5624 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 5625 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
5626 [EXEC_OUTPUT_JOURNAL] = "journal",
5627 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
5628 [EXEC_OUTPUT_SOCKET] = "socket",
5629 [EXEC_OUTPUT_NAMED_FD] = "fd",
2038c3f5 5630 [EXEC_OUTPUT_FILE] = "file",
566b7d23 5631 [EXEC_OUTPUT_FILE_APPEND] = "append",
94f04347
LP
5632};
5633
5634DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
5635
5636static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
5637 [EXEC_UTMP_INIT] = "init",
5638 [EXEC_UTMP_LOGIN] = "login",
5639 [EXEC_UTMP_USER] = "user",
5640};
5641
5642DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
5643
5644static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
5645 [EXEC_PRESERVE_NO] = "no",
5646 [EXEC_PRESERVE_YES] = "yes",
5647 [EXEC_PRESERVE_RESTART] = "restart",
5648};
5649
5650DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 5651
6b7b2ed9 5652/* This table maps ExecDirectoryType to the setting it is configured with in the unit */
72fd1768 5653static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
5654 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
5655 [EXEC_DIRECTORY_STATE] = "StateDirectory",
5656 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
5657 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
5658 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
5659};
5660
5661DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445 5662
6b7b2ed9
LP
5663/* And this table maps ExecDirectoryType too, but to a generic term identifying the type of resource. This
5664 * one is supposed to be generic enough to be used for unit types that don't use ExecContext and per-unit
5665 * directories, specifically .timer units with their timestamp touch file. */
5666static const char* const exec_resource_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5667 [EXEC_DIRECTORY_RUNTIME] = "runtime",
5668 [EXEC_DIRECTORY_STATE] = "state",
5669 [EXEC_DIRECTORY_CACHE] = "cache",
5670 [EXEC_DIRECTORY_LOGS] = "logs",
5671 [EXEC_DIRECTORY_CONFIGURATION] = "configuration",
5672};
5673
5674DEFINE_STRING_TABLE_LOOKUP(exec_resource_type, ExecDirectoryType);
5675
5676/* And this table also maps ExecDirectoryType, to the environment variable we pass the selected directory to
5677 * the service payload in. */
fb2042dd
YW
5678static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5679 [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
5680 [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
5681 [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
5682 [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
5683 [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
5684};
5685
5686DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType);
5687
b1edf445
LP
5688static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
5689 [EXEC_KEYRING_INHERIT] = "inherit",
5690 [EXEC_KEYRING_PRIVATE] = "private",
5691 [EXEC_KEYRING_SHARED] = "shared",
5692};
5693
5694DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);