]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
tree-wide: drop socket.h when socket-util.h is included
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
a7334b09 2
034c6ed7
LP
3#include <errno.h>
4#include <fcntl.h>
8dd4c05b 5#include <poll.h>
309bff19 6#include <signal.h>
d251207d 7#include <sys/eventfd.h>
f5947a5e 8#include <sys/ioctl.h>
f3e43635 9#include <sys/mman.h>
8dd4c05b 10#include <sys/personality.h>
94f04347 11#include <sys/prctl.h>
d2ffa389 12#include <sys/shm.h>
451a074f 13#include <sys/stat.h>
d2ffa389 14#include <sys/types.h>
8dd4c05b
LP
15#include <sys/un.h>
16#include <unistd.h>
023a4f67 17#include <utmpx.h>
5cb5a6ff 18
349cc4a5 19#if HAVE_PAM
5b6319dc
LP
20#include <security/pam_appl.h>
21#endif
22
349cc4a5 23#if HAVE_SELINUX
7b52a628
MS
24#include <selinux/selinux.h>
25#endif
26
349cc4a5 27#if HAVE_SECCOMP
17df7223
LP
28#include <seccomp.h>
29#endif
30
349cc4a5 31#if HAVE_APPARMOR
eef65bf3
MS
32#include <sys/apparmor.h>
33#endif
34
24882e06 35#include "sd-messages.h"
8dd4c05b
LP
36
37#include "af-list.h"
b5efdb8a 38#include "alloc-util.h"
349cc4a5 39#if HAVE_APPARMOR
3ffd4af2
LP
40#include "apparmor-util.h"
41#endif
8dd4c05b
LP
42#include "async.h"
43#include "barrier.h"
8dd4c05b 44#include "cap-list.h"
430f0182 45#include "capability-util.h"
a1164ae3 46#include "chown-recursive.h"
fdb3deca 47#include "cgroup-setup.h"
da681e1b 48#include "cpu-set-util.h"
f6a6225e 49#include "def.h"
686d13b9 50#include "env-file.h"
4d1a6904 51#include "env-util.h"
17df7223 52#include "errno-list.h"
3ffd4af2 53#include "execute.h"
8dd4c05b 54#include "exit-status.h"
3ffd4af2 55#include "fd-util.h"
f97b34a6 56#include "format-util.h"
f4f15635 57#include "fs-util.h"
7d50b32a 58#include "glob-util.h"
c004493c 59#include "io-util.h"
8dd4c05b 60#include "ioprio.h"
a1164ae3 61#include "label.h"
8dd4c05b
LP
62#include "log.h"
63#include "macro.h"
e8a565cb 64#include "manager.h"
0a970718 65#include "memory-util.h"
f5947a5e 66#include "missing_fs.h"
8dd4c05b
LP
67#include "mkdir.h"
68#include "namespace.h"
6bedfcbb 69#include "parse-util.h"
8dd4c05b 70#include "path-util.h"
0b452006 71#include "process-util.h"
78f22b97 72#include "rlimit-util.h"
8dd4c05b 73#include "rm-rf.h"
349cc4a5 74#if HAVE_SECCOMP
3ffd4af2
LP
75#include "seccomp-util.h"
76#endif
07d46372 77#include "securebits-util.h"
8dd4c05b 78#include "selinux-util.h"
24882e06 79#include "signal-util.h"
8dd4c05b 80#include "smack-util.h"
57b7a260 81#include "socket-util.h"
fd63e712 82#include "special.h"
949befd3 83#include "stat-util.h"
8b43440b 84#include "string-table.h"
07630cea 85#include "string-util.h"
8dd4c05b 86#include "strv.h"
7ccbd1ae 87#include "syslog-util.h"
8dd4c05b 88#include "terminal-util.h"
566b7d23 89#include "umask-util.h"
8dd4c05b 90#include "unit.h"
b1d4f8e1 91#include "user-util.h"
8dd4c05b 92#include "utmp-wtmp.h"
5cb5a6ff 93
e056b01d 94#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 95#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 96
531dca78
LP
97#define SNDBUF_SIZE (8*1024*1024)
98
da6053d0 99static int shift_fds(int fds[], size_t n_fds) {
034c6ed7
LP
100 int start, restart_from;
101
102 if (n_fds <= 0)
103 return 0;
104
a0d40ac5
LP
105 /* Modifies the fds array! (sorts it) */
106
034c6ed7
LP
107 assert(fds);
108
109 start = 0;
110 for (;;) {
111 int i;
112
113 restart_from = -1;
114
115 for (i = start; i < (int) n_fds; i++) {
116 int nfd;
117
118 /* Already at right index? */
119 if (fds[i] == i+3)
120 continue;
121
3cc2aff1
LP
122 nfd = fcntl(fds[i], F_DUPFD, i + 3);
123 if (nfd < 0)
034c6ed7
LP
124 return -errno;
125
03e334a1 126 safe_close(fds[i]);
034c6ed7
LP
127 fds[i] = nfd;
128
129 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 130 * let's remember that and try again from here */
034c6ed7
LP
131 if (nfd != i+3 && restart_from < 0)
132 restart_from = i;
133 }
134
135 if (restart_from < 0)
136 break;
137
138 start = restart_from;
139 }
140
141 return 0;
142}
143
25b583d7 144static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
da6053d0 145 size_t i, n_fds;
e2c76839 146 int r;
47a71eed 147
25b583d7 148 n_fds = n_socket_fds + n_storage_fds;
47a71eed
LP
149 if (n_fds <= 0)
150 return 0;
151
152 assert(fds);
153
9b141911
FB
154 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
155 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
156
157 for (i = 0; i < n_fds; i++) {
47a71eed 158
9b141911
FB
159 if (i < n_socket_fds) {
160 r = fd_nonblock(fds[i], nonblock);
161 if (r < 0)
162 return r;
163 }
47a71eed 164
451a074f
LP
165 /* We unconditionally drop FD_CLOEXEC from the fds,
166 * since after all we want to pass these fds to our
167 * children */
47a71eed 168
3cc2aff1
LP
169 r = fd_cloexec(fds[i], false);
170 if (r < 0)
e2c76839 171 return r;
47a71eed
LP
172 }
173
174 return 0;
175}
176
1e22b5cd 177static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
178 assert(context);
179
1e22b5cd
LP
180 if (context->stdio_as_fds)
181 return NULL;
182
80876c20
LP
183 if (context->tty_path)
184 return context->tty_path;
185
186 return "/dev/console";
187}
188
1e22b5cd
LP
189static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
190 const char *path;
191
6ea832a2
LP
192 assert(context);
193
1e22b5cd 194 path = exec_context_tty_path(context);
6ea832a2 195
1e22b5cd
LP
196 if (context->tty_vhangup) {
197 if (p && p->stdin_fd >= 0)
198 (void) terminal_vhangup_fd(p->stdin_fd);
199 else if (path)
200 (void) terminal_vhangup(path);
201 }
6ea832a2 202
1e22b5cd
LP
203 if (context->tty_reset) {
204 if (p && p->stdin_fd >= 0)
205 (void) reset_terminal_fd(p->stdin_fd, true);
206 else if (path)
207 (void) reset_terminal(path);
208 }
209
210 if (context->tty_vt_disallocate && path)
211 (void) vt_disallocate(path);
6ea832a2
LP
212}
213
6af760f3
LP
214static bool is_terminal_input(ExecInput i) {
215 return IN_SET(i,
216 EXEC_INPUT_TTY,
217 EXEC_INPUT_TTY_FORCE,
218 EXEC_INPUT_TTY_FAIL);
219}
220
3a1286b6 221static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
222 return IN_SET(o,
223 EXEC_OUTPUT_TTY,
224 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
225 EXEC_OUTPUT_KMSG_AND_CONSOLE,
226 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
227}
228
aac8c0c3
LP
229static bool is_syslog_output(ExecOutput o) {
230 return IN_SET(o,
231 EXEC_OUTPUT_SYSLOG,
232 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
233}
234
235static bool is_kmsg_output(ExecOutput o) {
236 return IN_SET(o,
237 EXEC_OUTPUT_KMSG,
238 EXEC_OUTPUT_KMSG_AND_CONSOLE);
239}
240
6af760f3
LP
241static bool exec_context_needs_term(const ExecContext *c) {
242 assert(c);
243
244 /* Return true if the execution context suggests we should set $TERM to something useful. */
245
246 if (is_terminal_input(c->std_input))
247 return true;
248
249 if (is_terminal_output(c->std_output))
250 return true;
251
252 if (is_terminal_output(c->std_error))
253 return true;
254
255 return !!c->tty_path;
3a1286b6
MS
256}
257
80876c20 258static int open_null_as(int flags, int nfd) {
046a82c1 259 int fd;
071830ff 260
80876c20 261 assert(nfd >= 0);
071830ff 262
613b411c
LP
263 fd = open("/dev/null", flags|O_NOCTTY);
264 if (fd < 0)
071830ff
LP
265 return -errno;
266
046a82c1 267 return move_fd(fd, nfd, false);
071830ff
LP
268}
269
524daa8c 270static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 271 static const union sockaddr_union sa = {
b92bea5d
ZJS
272 .un.sun_family = AF_UNIX,
273 .un.sun_path = "/run/systemd/journal/stdout",
274 };
524daa8c
ZJS
275 uid_t olduid = UID_INVALID;
276 gid_t oldgid = GID_INVALID;
277 int r;
278
cad93f29 279 if (gid_is_valid(gid)) {
524daa8c
ZJS
280 oldgid = getgid();
281
92a17af9 282 if (setegid(gid) < 0)
524daa8c
ZJS
283 return -errno;
284 }
285
cad93f29 286 if (uid_is_valid(uid)) {
524daa8c
ZJS
287 olduid = getuid();
288
92a17af9 289 if (seteuid(uid) < 0) {
524daa8c
ZJS
290 r = -errno;
291 goto restore_gid;
292 }
293 }
294
92a17af9 295 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
296
297 /* If we fail to restore the uid or gid, things will likely
298 fail later on. This should only happen if an LSM interferes. */
299
cad93f29 300 if (uid_is_valid(uid))
524daa8c
ZJS
301 (void) seteuid(olduid);
302
303 restore_gid:
cad93f29 304 if (gid_is_valid(gid))
524daa8c
ZJS
305 (void) setegid(oldgid);
306
307 return r;
308}
309
fd1f9c89 310static int connect_logger_as(
34cf6c43 311 const Unit *unit,
fd1f9c89 312 const ExecContext *context,
af635cf3 313 const ExecParameters *params,
fd1f9c89
LP
314 ExecOutput output,
315 const char *ident,
fd1f9c89
LP
316 int nfd,
317 uid_t uid,
318 gid_t gid) {
319
2ac1ff68
EV
320 _cleanup_close_ int fd = -1;
321 int r;
071830ff
LP
322
323 assert(context);
af635cf3 324 assert(params);
80876c20
LP
325 assert(output < _EXEC_OUTPUT_MAX);
326 assert(ident);
327 assert(nfd >= 0);
071830ff 328
54fe0cdb
LP
329 fd = socket(AF_UNIX, SOCK_STREAM, 0);
330 if (fd < 0)
80876c20 331 return -errno;
071830ff 332
524daa8c
ZJS
333 r = connect_journal_socket(fd, uid, gid);
334 if (r < 0)
335 return r;
071830ff 336
2ac1ff68 337 if (shutdown(fd, SHUT_RD) < 0)
80876c20 338 return -errno;
071830ff 339
fd1f9c89 340 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 341
2ac1ff68 342 if (dprintf(fd,
62bca2c6 343 "%s\n"
80876c20
LP
344 "%s\n"
345 "%i\n"
54fe0cdb
LP
346 "%i\n"
347 "%i\n"
348 "%i\n"
4f4a1dbf 349 "%i\n",
c867611e 350 context->syslog_identifier ?: ident,
af635cf3 351 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
352 context->syslog_priority,
353 !!context->syslog_level_prefix,
aac8c0c3
LP
354 is_syslog_output(output),
355 is_kmsg_output(output),
2ac1ff68
EV
356 is_terminal_output(output)) < 0)
357 return -errno;
80876c20 358
2ac1ff68 359 return move_fd(TAKE_FD(fd), nfd, false);
80876c20 360}
2ac1ff68 361
3a274a21 362static int open_terminal_as(const char *path, int flags, int nfd) {
046a82c1 363 int fd;
071830ff 364
80876c20
LP
365 assert(path);
366 assert(nfd >= 0);
fd1f9c89 367
3a274a21 368 fd = open_terminal(path, flags | O_NOCTTY);
3cc2aff1 369 if (fd < 0)
80876c20 370 return fd;
071830ff 371
046a82c1 372 return move_fd(fd, nfd, false);
80876c20 373}
071830ff 374
2038c3f5 375static int acquire_path(const char *path, int flags, mode_t mode) {
15a3e96f
LP
376 union sockaddr_union sa = {};
377 _cleanup_close_ int fd = -1;
378 int r, salen;
071830ff 379
80876c20 380 assert(path);
071830ff 381
2038c3f5
LP
382 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
383 flags |= O_CREAT;
384
385 fd = open(path, flags|O_NOCTTY, mode);
386 if (fd >= 0)
15a3e96f 387 return TAKE_FD(fd);
071830ff 388
2038c3f5
LP
389 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
390 return -errno;
15a3e96f 391 if (strlen(path) >= sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
2038c3f5
LP
392 return -ENXIO;
393
394 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
395
396 fd = socket(AF_UNIX, SOCK_STREAM, 0);
397 if (fd < 0)
398 return -errno;
399
15a3e96f
LP
400 salen = sockaddr_un_set_path(&sa.un, path);
401 if (salen < 0)
402 return salen;
403
404 if (connect(fd, &sa.sa, salen) < 0)
2038c3f5
LP
405 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
406 * indication that his wasn't an AF_UNIX socket after all */
071830ff 407
2038c3f5
LP
408 if ((flags & O_ACCMODE) == O_RDONLY)
409 r = shutdown(fd, SHUT_WR);
410 else if ((flags & O_ACCMODE) == O_WRONLY)
411 r = shutdown(fd, SHUT_RD);
412 else
15a3e96f
LP
413 return TAKE_FD(fd);
414 if (r < 0)
2038c3f5 415 return -errno;
2038c3f5 416
15a3e96f 417 return TAKE_FD(fd);
80876c20 418}
071830ff 419
08f3be7a
LP
420static int fixup_input(
421 const ExecContext *context,
422 int socket_fd,
423 bool apply_tty_stdin) {
424
425 ExecInput std_input;
426
427 assert(context);
428
429 std_input = context->std_input;
1e3ad081
LP
430
431 if (is_terminal_input(std_input) && !apply_tty_stdin)
432 return EXEC_INPUT_NULL;
071830ff 433
03fd9c49 434 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
435 return EXEC_INPUT_NULL;
436
08f3be7a
LP
437 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
438 return EXEC_INPUT_NULL;
439
03fd9c49 440 return std_input;
4f2d528d
LP
441}
442
03fd9c49 443static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 444
03fd9c49 445 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
446 return EXEC_OUTPUT_INHERIT;
447
03fd9c49 448 return std_output;
4f2d528d
LP
449}
450
a34ceba6
LP
451static int setup_input(
452 const ExecContext *context,
453 const ExecParameters *params,
52c239d7 454 int socket_fd,
2caa38e9 455 const int named_iofds[static 3]) {
a34ceba6 456
4f2d528d
LP
457 ExecInput i;
458
459 assert(context);
a34ceba6 460 assert(params);
2caa38e9 461 assert(named_iofds);
a34ceba6
LP
462
463 if (params->stdin_fd >= 0) {
464 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
465 return -errno;
466
467 /* Try to make this the controlling tty, if it is a tty, and reset it */
1fb0682e
LP
468 if (isatty(STDIN_FILENO)) {
469 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
470 (void) reset_terminal_fd(STDIN_FILENO, true);
471 }
a34ceba6
LP
472
473 return STDIN_FILENO;
474 }
4f2d528d 475
08f3be7a 476 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
477
478 switch (i) {
071830ff 479
80876c20
LP
480 case EXEC_INPUT_NULL:
481 return open_null_as(O_RDONLY, STDIN_FILENO);
482
483 case EXEC_INPUT_TTY:
484 case EXEC_INPUT_TTY_FORCE:
485 case EXEC_INPUT_TTY_FAIL: {
046a82c1 486 int fd;
071830ff 487
1e22b5cd 488 fd = acquire_terminal(exec_context_tty_path(context),
8854d795
LP
489 i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
490 i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
491 ACQUIRE_TERMINAL_WAIT,
3a43da28 492 USEC_INFINITY);
970edce6 493 if (fd < 0)
80876c20
LP
494 return fd;
495
046a82c1 496 return move_fd(fd, STDIN_FILENO, false);
80876c20
LP
497 }
498
4f2d528d 499 case EXEC_INPUT_SOCKET:
e75a9ed1
LP
500 assert(socket_fd >= 0);
501
4f2d528d
LP
502 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
503
52c239d7 504 case EXEC_INPUT_NAMED_FD:
e75a9ed1
LP
505 assert(named_iofds[STDIN_FILENO] >= 0);
506
52c239d7
LB
507 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
508 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
509
08f3be7a
LP
510 case EXEC_INPUT_DATA: {
511 int fd;
512
513 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
514 if (fd < 0)
515 return fd;
516
517 return move_fd(fd, STDIN_FILENO, false);
518 }
519
2038c3f5
LP
520 case EXEC_INPUT_FILE: {
521 bool rw;
522 int fd;
523
524 assert(context->stdio_file[STDIN_FILENO]);
525
526 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
527 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
528
529 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
530 if (fd < 0)
531 return fd;
532
533 return move_fd(fd, STDIN_FILENO, false);
534 }
535
80876c20
LP
536 default:
537 assert_not_reached("Unknown input type");
538 }
539}
540
41fc585a
LP
541static bool can_inherit_stderr_from_stdout(
542 const ExecContext *context,
543 ExecOutput o,
544 ExecOutput e) {
545
546 assert(context);
547
548 /* Returns true, if given the specified STDERR and STDOUT output we can directly dup() the stdout fd to the
549 * stderr fd */
550
551 if (e == EXEC_OUTPUT_INHERIT)
552 return true;
553 if (e != o)
554 return false;
555
556 if (e == EXEC_OUTPUT_NAMED_FD)
557 return streq_ptr(context->stdio_fdname[STDOUT_FILENO], context->stdio_fdname[STDERR_FILENO]);
558
559 if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND))
560 return streq_ptr(context->stdio_file[STDOUT_FILENO], context->stdio_file[STDERR_FILENO]);
561
562 return true;
563}
564
a34ceba6 565static int setup_output(
34cf6c43 566 const Unit *unit,
a34ceba6
LP
567 const ExecContext *context,
568 const ExecParameters *params,
569 int fileno,
570 int socket_fd,
2caa38e9 571 const int named_iofds[static 3],
a34ceba6 572 const char *ident,
7bce046b
LP
573 uid_t uid,
574 gid_t gid,
575 dev_t *journal_stream_dev,
576 ino_t *journal_stream_ino) {
a34ceba6 577
4f2d528d
LP
578 ExecOutput o;
579 ExecInput i;
47c1d80d 580 int r;
4f2d528d 581
f2341e0a 582 assert(unit);
80876c20 583 assert(context);
a34ceba6 584 assert(params);
80876c20 585 assert(ident);
7bce046b
LP
586 assert(journal_stream_dev);
587 assert(journal_stream_ino);
80876c20 588
a34ceba6
LP
589 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
590
591 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
592 return -errno;
593
594 return STDOUT_FILENO;
595 }
596
597 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
598 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
599 return -errno;
600
601 return STDERR_FILENO;
602 }
603
08f3be7a 604 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 605 o = fixup_output(context->std_output, socket_fd);
4f2d528d 606
eb17e935
MS
607 if (fileno == STDERR_FILENO) {
608 ExecOutput e;
609 e = fixup_output(context->std_error, socket_fd);
80876c20 610
eb17e935
MS
611 /* This expects the input and output are already set up */
612
613 /* Don't change the stderr file descriptor if we inherit all
614 * the way and are not on a tty */
615 if (e == EXEC_OUTPUT_INHERIT &&
616 o == EXEC_OUTPUT_INHERIT &&
617 i == EXEC_INPUT_NULL &&
618 !is_terminal_input(context->std_input) &&
619 getppid () != 1)
620 return fileno;
621
622 /* Duplicate from stdout if possible */
41fc585a 623 if (can_inherit_stderr_from_stdout(context, o, e))
eb17e935 624 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 625
eb17e935 626 o = e;
80876c20 627
eb17e935 628 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
629 /* If input got downgraded, inherit the original value */
630 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 631 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 632
08f3be7a
LP
633 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
634 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
eb17e935 635 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 636
acb591e4
LP
637 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
638 if (getppid() != 1)
eb17e935 639 return fileno;
94f04347 640
eb17e935
MS
641 /* We need to open /dev/null here anew, to get the right access mode. */
642 return open_null_as(O_WRONLY, fileno);
071830ff 643 }
94f04347 644
eb17e935 645 switch (o) {
80876c20
LP
646
647 case EXEC_OUTPUT_NULL:
eb17e935 648 return open_null_as(O_WRONLY, fileno);
80876c20
LP
649
650 case EXEC_OUTPUT_TTY:
4f2d528d 651 if (is_terminal_input(i))
eb17e935 652 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
653
654 /* We don't reset the terminal if this is just about output */
1e22b5cd 655 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
656
657 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 658 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 659 case EXEC_OUTPUT_KMSG:
28dbc1e8 660 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
661 case EXEC_OUTPUT_JOURNAL:
662 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 663 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 664 if (r < 0) {
82677ae4 665 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 666 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
667 } else {
668 struct stat st;
669
670 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
671 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
672 * services to detect whether they are connected to the journal or not.
673 *
674 * If both stdout and stderr are connected to a stream then let's make sure to store the data
675 * about STDERR as that's usually the best way to do logging. */
7bce046b 676
ab2116b1
LP
677 if (fstat(fileno, &st) >= 0 &&
678 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
679 *journal_stream_dev = st.st_dev;
680 *journal_stream_ino = st.st_ino;
681 }
47c1d80d
MS
682 }
683 return r;
4f2d528d
LP
684
685 case EXEC_OUTPUT_SOCKET:
686 assert(socket_fd >= 0);
e75a9ed1 687
eb17e935 688 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 689
52c239d7 690 case EXEC_OUTPUT_NAMED_FD:
e75a9ed1
LP
691 assert(named_iofds[fileno] >= 0);
692
52c239d7
LB
693 (void) fd_nonblock(named_iofds[fileno], false);
694 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
695
566b7d23
ZD
696 case EXEC_OUTPUT_FILE:
697 case EXEC_OUTPUT_FILE_APPEND: {
2038c3f5 698 bool rw;
566b7d23 699 int fd, flags;
2038c3f5
LP
700
701 assert(context->stdio_file[fileno]);
702
703 rw = context->std_input == EXEC_INPUT_FILE &&
704 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
705
706 if (rw)
707 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
708
566b7d23
ZD
709 flags = O_WRONLY;
710 if (o == EXEC_OUTPUT_FILE_APPEND)
711 flags |= O_APPEND;
712
713 fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
2038c3f5
LP
714 if (fd < 0)
715 return fd;
716
566b7d23 717 return move_fd(fd, fileno, 0);
2038c3f5
LP
718 }
719
94f04347 720 default:
80876c20 721 assert_not_reached("Unknown error type");
94f04347 722 }
071830ff
LP
723}
724
02a51aba 725static int chown_terminal(int fd, uid_t uid) {
4b3b5bc7 726 int r;
02a51aba
LP
727
728 assert(fd >= 0);
02a51aba 729
1ff74fb6 730 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
4b3b5bc7
LP
731 if (isatty(fd) < 1) {
732 if (IN_SET(errno, EINVAL, ENOTTY))
733 return 0; /* not a tty */
1ff74fb6 734
02a51aba 735 return -errno;
4b3b5bc7 736 }
02a51aba 737
4b3b5bc7
LP
738 /* This might fail. What matters are the results. */
739 r = fchmod_and_chown(fd, TTY_MODE, uid, -1);
740 if (r < 0)
741 return r;
02a51aba 742
4b3b5bc7 743 return 1;
02a51aba
LP
744}
745
7d5ceb64 746static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
747 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
748 int r;
80876c20 749
80876c20
LP
750 assert(_saved_stdin);
751 assert(_saved_stdout);
752
af6da548
LP
753 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
754 if (saved_stdin < 0)
755 return -errno;
80876c20 756
af6da548 757 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
758 if (saved_stdout < 0)
759 return -errno;
80876c20 760
8854d795 761 fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
3d18b167
LP
762 if (fd < 0)
763 return fd;
80876c20 764
af6da548
LP
765 r = chown_terminal(fd, getuid());
766 if (r < 0)
3d18b167 767 return r;
02a51aba 768
3d18b167
LP
769 r = reset_terminal_fd(fd, true);
770 if (r < 0)
771 return r;
80876c20 772
2b33ab09 773 r = rearrange_stdio(fd, fd, STDERR_FILENO);
3d18b167 774 fd = -1;
2b33ab09
LP
775 if (r < 0)
776 return r;
80876c20
LP
777
778 *_saved_stdin = saved_stdin;
779 *_saved_stdout = saved_stdout;
780
3d18b167 781 saved_stdin = saved_stdout = -1;
80876c20 782
3d18b167 783 return 0;
80876c20
LP
784}
785
63d77c92 786static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
787 assert(err < 0);
788
789 if (err == -ETIMEDOUT)
63d77c92 790 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
791 else {
792 errno = -err;
63d77c92 793 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
794 }
795}
796
63d77c92 797static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 798 _cleanup_close_ int fd = -1;
80876c20 799
3b20f877 800 assert(vc);
80876c20 801
7d5ceb64 802 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 803 if (fd < 0)
3b20f877 804 return;
80876c20 805
63d77c92 806 write_confirm_error_fd(err, fd, u);
af6da548 807}
80876c20 808
3d18b167 809static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 810 int r = 0;
80876c20 811
af6da548
LP
812 assert(saved_stdin);
813 assert(saved_stdout);
814
815 release_terminal();
816
817 if (*saved_stdin >= 0)
80876c20 818 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 819 r = -errno;
80876c20 820
af6da548 821 if (*saved_stdout >= 0)
80876c20 822 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 823 r = -errno;
80876c20 824
3d18b167
LP
825 *saved_stdin = safe_close(*saved_stdin);
826 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
827
828 return r;
829}
830
3b20f877
FB
831enum {
832 CONFIRM_PRETEND_FAILURE = -1,
833 CONFIRM_PRETEND_SUCCESS = 0,
834 CONFIRM_EXECUTE = 1,
835};
836
eedf223a 837static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 838 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 839 _cleanup_free_ char *e = NULL;
3b20f877 840 char c;
af6da548 841
3b20f877 842 /* For any internal errors, assume a positive response. */
7d5ceb64 843 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 844 if (r < 0) {
63d77c92 845 write_confirm_error(r, vc, u);
3b20f877
FB
846 return CONFIRM_EXECUTE;
847 }
af6da548 848
b0eb2944
FB
849 /* confirm_spawn might have been disabled while we were sleeping. */
850 if (manager_is_confirm_spawn_disabled(u->manager)) {
851 r = 1;
852 goto restore_stdio;
853 }
af6da548 854
2bcd3c26
FB
855 e = ellipsize(cmdline, 60, 100);
856 if (!e) {
857 log_oom();
858 r = CONFIRM_EXECUTE;
859 goto restore_stdio;
860 }
af6da548 861
d172b175 862 for (;;) {
539622bd 863 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 864 if (r < 0) {
63d77c92 865 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
866 r = CONFIRM_EXECUTE;
867 goto restore_stdio;
868 }
af6da548 869
d172b175 870 switch (c) {
b0eb2944
FB
871 case 'c':
872 printf("Resuming normal execution.\n");
873 manager_disable_confirm_spawn();
874 r = 1;
875 break;
dd6f9ac0
FB
876 case 'D':
877 unit_dump(u, stdout, " ");
878 continue; /* ask again */
d172b175
FB
879 case 'f':
880 printf("Failing execution.\n");
881 r = CONFIRM_PRETEND_FAILURE;
882 break;
883 case 'h':
b0eb2944
FB
884 printf(" c - continue, proceed without asking anymore\n"
885 " D - dump, show the state of the unit\n"
dd6f9ac0 886 " f - fail, don't execute the command and pretend it failed\n"
d172b175 887 " h - help\n"
eedf223a 888 " i - info, show a short summary of the unit\n"
56fde33a 889 " j - jobs, show jobs that are in progress\n"
d172b175
FB
890 " s - skip, don't execute the command and pretend it succeeded\n"
891 " y - yes, execute the command\n");
dd6f9ac0 892 continue; /* ask again */
eedf223a
FB
893 case 'i':
894 printf(" Description: %s\n"
895 " Unit: %s\n"
896 " Command: %s\n",
897 u->id, u->description, cmdline);
898 continue; /* ask again */
56fde33a
FB
899 case 'j':
900 manager_dump_jobs(u->manager, stdout, " ");
901 continue; /* ask again */
539622bd
FB
902 case 'n':
903 /* 'n' was removed in favor of 'f'. */
904 printf("Didn't understand 'n', did you mean 'f'?\n");
905 continue; /* ask again */
d172b175
FB
906 case 's':
907 printf("Skipping execution.\n");
908 r = CONFIRM_PRETEND_SUCCESS;
909 break;
910 case 'y':
911 r = CONFIRM_EXECUTE;
912 break;
913 default:
914 assert_not_reached("Unhandled choice");
915 }
3b20f877 916 break;
3b20f877 917 }
af6da548 918
3b20f877 919restore_stdio:
af6da548 920 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 921 return r;
80876c20
LP
922}
923
4d885bd3
DH
924static int get_fixed_user(const ExecContext *c, const char **user,
925 uid_t *uid, gid_t *gid,
926 const char **home, const char **shell) {
81a2b7ce 927 int r;
4d885bd3 928 const char *name;
81a2b7ce 929
4d885bd3 930 assert(c);
81a2b7ce 931
23deef88
LP
932 if (!c->user)
933 return 0;
934
4d885bd3
DH
935 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
936 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 937
23deef88 938 name = c->user;
fafff8f1 939 r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN);
4d885bd3
DH
940 if (r < 0)
941 return r;
81a2b7ce 942
4d885bd3
DH
943 *user = name;
944 return 0;
945}
946
947static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
948 int r;
949 const char *name;
950
951 assert(c);
952
953 if (!c->group)
954 return 0;
955
956 name = c->group;
fafff8f1 957 r = get_group_creds(&name, gid, 0);
4d885bd3
DH
958 if (r < 0)
959 return r;
960
961 *group = name;
962 return 0;
963}
964
cdc5d5c5
DH
965static int get_supplementary_groups(const ExecContext *c, const char *user,
966 const char *group, gid_t gid,
967 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
968 char **i;
969 int r, k = 0;
970 int ngroups_max;
971 bool keep_groups = false;
972 gid_t *groups = NULL;
973 _cleanup_free_ gid_t *l_gids = NULL;
974
975 assert(c);
976
bbeea271
DH
977 /*
978 * If user is given, then lookup GID and supplementary groups list.
979 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
980 * here and as early as possible so we keep the list of supplementary
981 * groups of the caller.
bbeea271
DH
982 */
983 if (user && gid_is_valid(gid) && gid != 0) {
984 /* First step, initialize groups from /etc/groups */
985 if (initgroups(user, gid) < 0)
986 return -errno;
987
988 keep_groups = true;
989 }
990
ac6e8be6 991 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
992 return 0;
993
366ddd25
DH
994 /*
995 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
996 * be positive, otherwise fail.
997 */
998 errno = 0;
999 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
66855de7
LP
1000 if (ngroups_max <= 0)
1001 return errno_or_else(EOPNOTSUPP);
366ddd25 1002
4d885bd3
DH
1003 l_gids = new(gid_t, ngroups_max);
1004 if (!l_gids)
1005 return -ENOMEM;
81a2b7ce 1006
4d885bd3
DH
1007 if (keep_groups) {
1008 /*
1009 * Lookup the list of groups that the user belongs to, we
1010 * avoid NSS lookups here too for gid=0.
1011 */
1012 k = ngroups_max;
1013 if (getgrouplist(user, gid, l_gids, &k) < 0)
1014 return -EINVAL;
1015 } else
1016 k = 0;
81a2b7ce 1017
4d885bd3
DH
1018 STRV_FOREACH(i, c->supplementary_groups) {
1019 const char *g;
81a2b7ce 1020
4d885bd3
DH
1021 if (k >= ngroups_max)
1022 return -E2BIG;
81a2b7ce 1023
4d885bd3 1024 g = *i;
fafff8f1 1025 r = get_group_creds(&g, l_gids+k, 0);
4d885bd3
DH
1026 if (r < 0)
1027 return r;
81a2b7ce 1028
4d885bd3
DH
1029 k++;
1030 }
81a2b7ce 1031
4d885bd3
DH
1032 /*
1033 * Sets ngids to zero to drop all supplementary groups, happens
1034 * when we are under root and SupplementaryGroups= is empty.
1035 */
1036 if (k == 0) {
1037 *ngids = 0;
1038 return 0;
1039 }
81a2b7ce 1040
4d885bd3
DH
1041 /* Otherwise get the final list of supplementary groups */
1042 groups = memdup(l_gids, sizeof(gid_t) * k);
1043 if (!groups)
1044 return -ENOMEM;
1045
1046 *supplementary_gids = groups;
1047 *ngids = k;
1048
1049 groups = NULL;
1050
1051 return 0;
1052}
1053
34cf6c43 1054static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
4d885bd3
DH
1055 int r;
1056
709dbeac
YW
1057 /* Handle SupplementaryGroups= if it is not empty */
1058 if (ngids > 0) {
4d885bd3
DH
1059 r = maybe_setgroups(ngids, supplementary_gids);
1060 if (r < 0)
97f0e76f 1061 return r;
4d885bd3 1062 }
81a2b7ce 1063
4d885bd3
DH
1064 if (gid_is_valid(gid)) {
1065 /* Then set our gids */
1066 if (setresgid(gid, gid, gid) < 0)
1067 return -errno;
81a2b7ce
LP
1068 }
1069
1070 return 0;
1071}
1072
1073static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
1074 assert(context);
1075
4d885bd3
DH
1076 if (!uid_is_valid(uid))
1077 return 0;
1078
479050b3 1079 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
1080 * capabilities while doing so. */
1081
479050b3 1082 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
1083
1084 /* First step: If we need to keep capabilities but
1085 * drop privileges we need to make sure we keep our
cbb21cca 1086 * caps, while we drop privileges. */
693ced48 1087 if (uid != 0) {
cbb21cca 1088 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1089
1090 if (prctl(PR_GET_SECUREBITS) != sb)
1091 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1092 return -errno;
1093 }
81a2b7ce
LP
1094 }
1095
479050b3 1096 /* Second step: actually set the uids */
81a2b7ce
LP
1097 if (setresuid(uid, uid, uid) < 0)
1098 return -errno;
1099
1100 /* At this point we should have all necessary capabilities but
1101 are otherwise a normal user. However, the caps might got
1102 corrupted due to the setresuid() so we need clean them up
1103 later. This is done outside of this call. */
1104
1105 return 0;
1106}
1107
349cc4a5 1108#if HAVE_PAM
5b6319dc
LP
1109
1110static int null_conv(
1111 int num_msg,
1112 const struct pam_message **msg,
1113 struct pam_response **resp,
1114 void *appdata_ptr) {
1115
1116 /* We don't support conversations */
1117
1118 return PAM_CONV_ERR;
1119}
1120
cefc33ae
LP
1121#endif
1122
5b6319dc
LP
1123static int setup_pam(
1124 const char *name,
1125 const char *user,
940c5210 1126 uid_t uid,
2d6fce8d 1127 gid_t gid,
5b6319dc 1128 const char *tty,
2065ca69 1129 char ***env,
da6053d0 1130 int fds[], size_t n_fds) {
5b6319dc 1131
349cc4a5 1132#if HAVE_PAM
cefc33ae 1133
5b6319dc
LP
1134 static const struct pam_conv conv = {
1135 .conv = null_conv,
1136 .appdata_ptr = NULL
1137 };
1138
2d7c6aa2 1139 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1140 pam_handle_t *handle = NULL;
d6e5f3ad 1141 sigset_t old_ss;
7bb70b6e 1142 int pam_code = PAM_SUCCESS, r;
84eada2f 1143 char **nv, **e = NULL;
5b6319dc
LP
1144 bool close_session = false;
1145 pid_t pam_pid = 0, parent_pid;
970edce6 1146 int flags = 0;
5b6319dc
LP
1147
1148 assert(name);
1149 assert(user);
2065ca69 1150 assert(env);
5b6319dc
LP
1151
1152 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1153 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1154 * systemd via the cgroup logic. It will then remove the PAM
1155 * session again. The parent process will exec() the actual
1156 * daemon. We do things this way to ensure that the main PID
1157 * of the daemon is the one we initially fork()ed. */
1158
7bb70b6e
LP
1159 r = barrier_create(&barrier);
1160 if (r < 0)
2d7c6aa2
DH
1161 goto fail;
1162
553d2243 1163 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1164 flags |= PAM_SILENT;
1165
f546241b
ZJS
1166 pam_code = pam_start(name, user, &conv, &handle);
1167 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1168 handle = NULL;
1169 goto fail;
1170 }
1171
3cd24c1a
LP
1172 if (!tty) {
1173 _cleanup_free_ char *q = NULL;
1174
1175 /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
1176 * out if that's the case, and read the TTY off it. */
1177
1178 if (getttyname_malloc(STDIN_FILENO, &q) >= 0)
1179 tty = strjoina("/dev/", q);
1180 }
1181
f546241b
ZJS
1182 if (tty) {
1183 pam_code = pam_set_item(handle, PAM_TTY, tty);
1184 if (pam_code != PAM_SUCCESS)
5b6319dc 1185 goto fail;
f546241b 1186 }
5b6319dc 1187
84eada2f
JW
1188 STRV_FOREACH(nv, *env) {
1189 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1190 if (pam_code != PAM_SUCCESS)
1191 goto fail;
1192 }
1193
970edce6 1194 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1195 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1196 goto fail;
1197
970edce6 1198 pam_code = pam_open_session(handle, flags);
f546241b 1199 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1200 goto fail;
1201
1202 close_session = true;
1203
f546241b
ZJS
1204 e = pam_getenvlist(handle);
1205 if (!e) {
5b6319dc
LP
1206 pam_code = PAM_BUF_ERR;
1207 goto fail;
1208 }
1209
1210 /* Block SIGTERM, so that we know that it won't get lost in
1211 * the child */
ce30c8dc 1212
72c0a2c2 1213 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1214
df0ff127 1215 parent_pid = getpid_cached();
5b6319dc 1216
4c253ed1
LP
1217 r = safe_fork("(sd-pam)", 0, &pam_pid);
1218 if (r < 0)
5b6319dc 1219 goto fail;
4c253ed1 1220 if (r == 0) {
7bb70b6e 1221 int sig, ret = EXIT_PAM;
5b6319dc
LP
1222
1223 /* The child's job is to reset the PAM session on
1224 * termination */
2d7c6aa2 1225 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc 1226
4c253ed1
LP
1227 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1228 * are open here that have been opened by PAM. */
1229 (void) close_many(fds, n_fds);
5b6319dc 1230
940c5210
AK
1231 /* Drop privileges - we don't need any to pam_close_session
1232 * and this will make PR_SET_PDEATHSIG work in most cases.
1233 * If this fails, ignore the error - but expect sd-pam threads
1234 * to fail to exit normally */
2d6fce8d 1235
97f0e76f
LP
1236 r = maybe_setgroups(0, NULL);
1237 if (r < 0)
1238 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1239 if (setresgid(gid, gid, gid) < 0)
1240 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1241 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1242 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1243
ce30c8dc
LP
1244 (void) ignore_signals(SIGPIPE, -1);
1245
940c5210
AK
1246 /* Wait until our parent died. This will only work if
1247 * the above setresuid() succeeds, otherwise the kernel
1248 * will not allow unprivileged parents kill their privileged
1249 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1250 * to do the rest for us. */
1251 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1252 goto child_finish;
1253
2d7c6aa2
DH
1254 /* Tell the parent that our setup is done. This is especially
1255 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1256 * setup might race against our setresuid(2) call.
1257 *
1258 * If the parent aborted, we'll detect this below, hence ignore
1259 * return failure here. */
1260 (void) barrier_place(&barrier);
2d7c6aa2 1261
643f4706 1262 /* Check if our parent process might already have died? */
5b6319dc 1263 if (getppid() == parent_pid) {
d6e5f3ad
DM
1264 sigset_t ss;
1265
1266 assert_se(sigemptyset(&ss) >= 0);
1267 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1268
3dead8d9
LP
1269 for (;;) {
1270 if (sigwait(&ss, &sig) < 0) {
1271 if (errno == EINTR)
1272 continue;
1273
1274 goto child_finish;
1275 }
5b6319dc 1276
3dead8d9
LP
1277 assert(sig == SIGTERM);
1278 break;
1279 }
5b6319dc
LP
1280 }
1281
3dead8d9 1282 /* If our parent died we'll end the session */
f546241b 1283 if (getppid() != parent_pid) {
970edce6 1284 pam_code = pam_close_session(handle, flags);
f546241b 1285 if (pam_code != PAM_SUCCESS)
5b6319dc 1286 goto child_finish;
f546241b 1287 }
5b6319dc 1288
7bb70b6e 1289 ret = 0;
5b6319dc
LP
1290
1291 child_finish:
970edce6 1292 pam_end(handle, pam_code | flags);
7bb70b6e 1293 _exit(ret);
5b6319dc
LP
1294 }
1295
2d7c6aa2
DH
1296 barrier_set_role(&barrier, BARRIER_PARENT);
1297
5b6319dc
LP
1298 /* If the child was forked off successfully it will do all the
1299 * cleanups, so forget about the handle here. */
1300 handle = NULL;
1301
3b8bddde 1302 /* Unblock SIGTERM again in the parent */
72c0a2c2 1303 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1304
1305 /* We close the log explicitly here, since the PAM modules
1306 * might have opened it, but we don't want this fd around. */
1307 closelog();
1308
2d7c6aa2
DH
1309 /* Synchronously wait for the child to initialize. We don't care for
1310 * errors as we cannot recover. However, warn loudly if it happens. */
1311 if (!barrier_place_and_sync(&barrier))
1312 log_error("PAM initialization failed");
1313
130d3d22 1314 return strv_free_and_replace(*env, e);
5b6319dc
LP
1315
1316fail:
970edce6
ZJS
1317 if (pam_code != PAM_SUCCESS) {
1318 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1319 r = -EPERM; /* PAM errors do not map to errno */
1320 } else
1321 log_error_errno(r, "PAM failed: %m");
9ba35398 1322
5b6319dc
LP
1323 if (handle) {
1324 if (close_session)
970edce6 1325 pam_code = pam_close_session(handle, flags);
5b6319dc 1326
970edce6 1327 pam_end(handle, pam_code | flags);
5b6319dc
LP
1328 }
1329
1330 strv_free(e);
5b6319dc
LP
1331 closelog();
1332
7bb70b6e 1333 return r;
cefc33ae
LP
1334#else
1335 return 0;
5b6319dc 1336#endif
cefc33ae 1337}
5b6319dc 1338
5d6b1584
LP
1339static void rename_process_from_path(const char *path) {
1340 char process_name[11];
1341 const char *p;
1342 size_t l;
1343
1344 /* This resulting string must fit in 10 chars (i.e. the length
1345 * of "/sbin/init") to look pretty in /bin/ps */
1346
2b6bf07d 1347 p = basename(path);
5d6b1584
LP
1348 if (isempty(p)) {
1349 rename_process("(...)");
1350 return;
1351 }
1352
1353 l = strlen(p);
1354 if (l > 8) {
1355 /* The end of the process name is usually more
1356 * interesting, since the first bit might just be
1357 * "systemd-" */
1358 p = p + l - 8;
1359 l = 8;
1360 }
1361
1362 process_name[0] = '(';
1363 memcpy(process_name+1, p, l);
1364 process_name[1+l] = ')';
1365 process_name[1+l+1] = 0;
1366
1367 rename_process(process_name);
1368}
1369
469830d1
LP
1370static bool context_has_address_families(const ExecContext *c) {
1371 assert(c);
1372
1373 return c->address_families_whitelist ||
1374 !set_isempty(c->address_families);
1375}
1376
1377static bool context_has_syscall_filters(const ExecContext *c) {
1378 assert(c);
1379
1380 return c->syscall_whitelist ||
8cfa775f 1381 !hashmap_isempty(c->syscall_filter);
469830d1
LP
1382}
1383
1384static bool context_has_no_new_privileges(const ExecContext *c) {
1385 assert(c);
1386
1387 if (c->no_new_privileges)
1388 return true;
1389
1390 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1391 return false;
1392
1393 /* We need NNP if we have any form of seccomp and are unprivileged */
1394 return context_has_address_families(c) ||
1395 c->memory_deny_write_execute ||
1396 c->restrict_realtime ||
f69567cb 1397 c->restrict_suid_sgid ||
469830d1
LP
1398 exec_context_restrict_namespaces_set(c) ||
1399 c->protect_kernel_tunables ||
1400 c->protect_kernel_modules ||
1401 c->private_devices ||
1402 context_has_syscall_filters(c) ||
78e864e5 1403 !set_isempty(c->syscall_archs) ||
aecd5ac6
TM
1404 c->lock_personality ||
1405 c->protect_hostname;
469830d1
LP
1406}
1407
349cc4a5 1408#if HAVE_SECCOMP
17df7223 1409
83f12b27 1410static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1411
1412 if (is_seccomp_available())
1413 return false;
1414
f673b62d 1415 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1416 return true;
83f12b27
FS
1417}
1418
165a31c0 1419static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1420 uint32_t negative_action, default_action, action;
165a31c0 1421 int r;
8351ceae 1422
469830d1 1423 assert(u);
c0467cf3 1424 assert(c);
8351ceae 1425
469830d1 1426 if (!context_has_syscall_filters(c))
83f12b27
FS
1427 return 0;
1428
469830d1
LP
1429 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1430 return 0;
e9642be2 1431
ccc16c78 1432 negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1433
469830d1
LP
1434 if (c->syscall_whitelist) {
1435 default_action = negative_action;
1436 action = SCMP_ACT_ALLOW;
7c66bae2 1437 } else {
469830d1
LP
1438 default_action = SCMP_ACT_ALLOW;
1439 action = negative_action;
57183d11 1440 }
8351ceae 1441
165a31c0
LP
1442 if (needs_ambient_hack) {
1443 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1444 if (r < 0)
1445 return r;
1446 }
1447
b54f36c6 1448 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
4298d0b5
LP
1449}
1450
469830d1
LP
1451static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1452 assert(u);
4298d0b5
LP
1453 assert(c);
1454
469830d1 1455 if (set_isempty(c->syscall_archs))
83f12b27
FS
1456 return 0;
1457
469830d1
LP
1458 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1459 return 0;
4298d0b5 1460
469830d1
LP
1461 return seccomp_restrict_archs(c->syscall_archs);
1462}
4298d0b5 1463
469830d1
LP
1464static int apply_address_families(const Unit* u, const ExecContext *c) {
1465 assert(u);
1466 assert(c);
4298d0b5 1467
469830d1
LP
1468 if (!context_has_address_families(c))
1469 return 0;
4298d0b5 1470
469830d1
LP
1471 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1472 return 0;
4298d0b5 1473
469830d1 1474 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1475}
4298d0b5 1476
83f12b27 1477static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1478 assert(u);
f3e43635
TM
1479 assert(c);
1480
469830d1 1481 if (!c->memory_deny_write_execute)
83f12b27
FS
1482 return 0;
1483
469830d1
LP
1484 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1485 return 0;
f3e43635 1486
469830d1 1487 return seccomp_memory_deny_write_execute();
f3e43635
TM
1488}
1489
83f12b27 1490static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1491 assert(u);
f4170c67
LP
1492 assert(c);
1493
469830d1 1494 if (!c->restrict_realtime)
83f12b27
FS
1495 return 0;
1496
469830d1
LP
1497 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1498 return 0;
f4170c67 1499
469830d1 1500 return seccomp_restrict_realtime();
f4170c67
LP
1501}
1502
f69567cb
LP
1503static int apply_restrict_suid_sgid(const Unit* u, const ExecContext *c) {
1504 assert(u);
1505 assert(c);
1506
1507 if (!c->restrict_suid_sgid)
1508 return 0;
1509
1510 if (skip_seccomp_unavailable(u, "RestrictSUIDSGID="))
1511 return 0;
1512
1513 return seccomp_restrict_suid_sgid();
1514}
1515
59e856c7 1516static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1517 assert(u);
59eeb84b
LP
1518 assert(c);
1519
1520 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1521 * let's protect even those systems where this is left on in the kernel. */
1522
469830d1 1523 if (!c->protect_kernel_tunables)
59eeb84b
LP
1524 return 0;
1525
469830d1
LP
1526 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1527 return 0;
59eeb84b 1528
469830d1 1529 return seccomp_protect_sysctl();
59eeb84b
LP
1530}
1531
59e856c7 1532static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1533 assert(u);
502d704e
DH
1534 assert(c);
1535
25a8d8a0 1536 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1537
469830d1
LP
1538 if (!c->protect_kernel_modules)
1539 return 0;
1540
502d704e
DH
1541 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1542 return 0;
1543
b54f36c6 1544 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false);
502d704e
DH
1545}
1546
59e856c7 1547static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1548 assert(u);
ba128bb8
LP
1549 assert(c);
1550
8f81a5f6 1551 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1552
469830d1
LP
1553 if (!c->private_devices)
1554 return 0;
1555
ba128bb8
LP
1556 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1557 return 0;
1558
b54f36c6 1559 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false);
ba128bb8
LP
1560}
1561
34cf6c43 1562static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
469830d1 1563 assert(u);
add00535
LP
1564 assert(c);
1565
1566 if (!exec_context_restrict_namespaces_set(c))
1567 return 0;
1568
1569 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1570 return 0;
1571
1572 return seccomp_restrict_namespaces(c->restrict_namespaces);
1573}
1574
78e864e5 1575static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1576 unsigned long personality;
1577 int r;
78e864e5
TM
1578
1579 assert(u);
1580 assert(c);
1581
1582 if (!c->lock_personality)
1583 return 0;
1584
1585 if (skip_seccomp_unavailable(u, "LockPersonality="))
1586 return 0;
1587
e8132d63
LP
1588 personality = c->personality;
1589
1590 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1591 if (personality == PERSONALITY_INVALID) {
1592
1593 r = opinionated_personality(&personality);
1594 if (r < 0)
1595 return r;
1596 }
78e864e5
TM
1597
1598 return seccomp_lock_personality(personality);
1599}
1600
c0467cf3 1601#endif
8351ceae 1602
3042bbeb 1603static void do_idle_pipe_dance(int idle_pipe[static 4]) {
31a7eb86
ZJS
1604 assert(idle_pipe);
1605
54eb2300
LP
1606 idle_pipe[1] = safe_close(idle_pipe[1]);
1607 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1608
1609 if (idle_pipe[0] >= 0) {
1610 int r;
1611
1612 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1613
1614 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1615 ssize_t n;
1616
31a7eb86 1617 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1618 n = write(idle_pipe[3], "x", 1);
1619 if (n > 0)
cd972d69 1620 /* Wait for systemd to react to the signal above. */
54756dce 1621 (void) fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1622 }
1623
54eb2300 1624 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1625
1626 }
1627
54eb2300 1628 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1629}
1630
fb2042dd
YW
1631static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
1632
7cae38c4 1633static int build_environment(
34cf6c43 1634 const Unit *u,
9fa95f85 1635 const ExecContext *c,
1e22b5cd 1636 const ExecParameters *p,
da6053d0 1637 size_t n_fds,
7cae38c4
LP
1638 const char *home,
1639 const char *username,
1640 const char *shell,
7bce046b
LP
1641 dev_t journal_stream_dev,
1642 ino_t journal_stream_ino,
7cae38c4
LP
1643 char ***ret) {
1644
1645 _cleanup_strv_free_ char **our_env = NULL;
fb2042dd 1646 ExecDirectoryType t;
da6053d0 1647 size_t n_env = 0;
7cae38c4
LP
1648 char *x;
1649
4b58153d 1650 assert(u);
7cae38c4 1651 assert(c);
7c1cb6f1 1652 assert(p);
7cae38c4
LP
1653 assert(ret);
1654
fb2042dd 1655 our_env = new0(char*, 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4
LP
1656 if (!our_env)
1657 return -ENOMEM;
1658
1659 if (n_fds > 0) {
8dd4c05b
LP
1660 _cleanup_free_ char *joined = NULL;
1661
df0ff127 1662 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1663 return -ENOMEM;
1664 our_env[n_env++] = x;
1665
da6053d0 1666 if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
7cae38c4
LP
1667 return -ENOMEM;
1668 our_env[n_env++] = x;
8dd4c05b 1669
1e22b5cd 1670 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1671 if (!joined)
1672 return -ENOMEM;
1673
605405c6 1674 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1675 if (!x)
1676 return -ENOMEM;
1677 our_env[n_env++] = x;
7cae38c4
LP
1678 }
1679
b08af3b1 1680 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1681 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1682 return -ENOMEM;
1683 our_env[n_env++] = x;
1684
1e22b5cd 1685 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1686 return -ENOMEM;
1687 our_env[n_env++] = x;
1688 }
1689
fd63e712
LP
1690 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1691 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1692 * check the database directly. */
ac647978 1693 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1694 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1695 if (!x)
1696 return -ENOMEM;
1697 our_env[n_env++] = x;
1698 }
1699
7cae38c4 1700 if (home) {
b910cc72 1701 x = strjoin("HOME=", home);
7cae38c4
LP
1702 if (!x)
1703 return -ENOMEM;
7bbead1d
LP
1704
1705 path_simplify(x + 5, true);
7cae38c4
LP
1706 our_env[n_env++] = x;
1707 }
1708
1709 if (username) {
b910cc72 1710 x = strjoin("LOGNAME=", username);
7cae38c4
LP
1711 if (!x)
1712 return -ENOMEM;
1713 our_env[n_env++] = x;
1714
b910cc72 1715 x = strjoin("USER=", username);
7cae38c4
LP
1716 if (!x)
1717 return -ENOMEM;
1718 our_env[n_env++] = x;
1719 }
1720
1721 if (shell) {
b910cc72 1722 x = strjoin("SHELL=", shell);
7cae38c4
LP
1723 if (!x)
1724 return -ENOMEM;
7bbead1d
LP
1725
1726 path_simplify(x + 6, true);
7cae38c4
LP
1727 our_env[n_env++] = x;
1728 }
1729
4b58153d
LP
1730 if (!sd_id128_is_null(u->invocation_id)) {
1731 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1732 return -ENOMEM;
1733
1734 our_env[n_env++] = x;
1735 }
1736
6af760f3
LP
1737 if (exec_context_needs_term(c)) {
1738 const char *tty_path, *term = NULL;
1739
1740 tty_path = exec_context_tty_path(c);
1741
1742 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1743 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1744 * passes to PID 1 ends up all the way in the console login shown. */
1745
1746 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1747 term = getenv("TERM");
1748 if (!term)
1749 term = default_term_for_tty(tty_path);
7cae38c4 1750
b910cc72 1751 x = strjoin("TERM=", term);
7cae38c4
LP
1752 if (!x)
1753 return -ENOMEM;
1754 our_env[n_env++] = x;
1755 }
1756
7bce046b
LP
1757 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1758 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1759 return -ENOMEM;
1760
1761 our_env[n_env++] = x;
1762 }
1763
fb2042dd
YW
1764 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1765 _cleanup_free_ char *pre = NULL, *joined = NULL;
1766 const char *n;
1767
1768 if (!p->prefix[t])
1769 continue;
1770
1771 if (strv_isempty(c->directories[t].paths))
1772 continue;
1773
1774 n = exec_directory_env_name_to_string(t);
1775 if (!n)
1776 continue;
1777
1778 pre = strjoin(p->prefix[t], "/");
1779 if (!pre)
1780 return -ENOMEM;
1781
1782 joined = strv_join_prefix(c->directories[t].paths, ":", pre);
1783 if (!joined)
1784 return -ENOMEM;
1785
1786 x = strjoin(n, "=", joined);
1787 if (!x)
1788 return -ENOMEM;
1789
1790 our_env[n_env++] = x;
1791 }
1792
7cae38c4 1793 our_env[n_env++] = NULL;
fb2042dd 1794 assert(n_env <= 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4 1795
ae2a15bc 1796 *ret = TAKE_PTR(our_env);
7cae38c4
LP
1797
1798 return 0;
1799}
1800
b4c14404
FB
1801static int build_pass_environment(const ExecContext *c, char ***ret) {
1802 _cleanup_strv_free_ char **pass_env = NULL;
1803 size_t n_env = 0, n_bufsize = 0;
1804 char **i;
1805
1806 STRV_FOREACH(i, c->pass_environment) {
1807 _cleanup_free_ char *x = NULL;
1808 char *v;
1809
1810 v = getenv(*i);
1811 if (!v)
1812 continue;
605405c6 1813 x = strjoin(*i, "=", v);
b4c14404
FB
1814 if (!x)
1815 return -ENOMEM;
00819cc1 1816
b4c14404
FB
1817 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1818 return -ENOMEM;
00819cc1 1819
1cc6c93a 1820 pass_env[n_env++] = TAKE_PTR(x);
b4c14404 1821 pass_env[n_env] = NULL;
b4c14404
FB
1822 }
1823
ae2a15bc 1824 *ret = TAKE_PTR(pass_env);
b4c14404
FB
1825
1826 return 0;
1827}
1828
8b44a3d2
LP
1829static bool exec_needs_mount_namespace(
1830 const ExecContext *context,
1831 const ExecParameters *params,
4657abb5 1832 const ExecRuntime *runtime) {
8b44a3d2
LP
1833
1834 assert(context);
1835 assert(params);
1836
915e6d16
LP
1837 if (context->root_image)
1838 return true;
1839
2a624c36
AP
1840 if (!strv_isempty(context->read_write_paths) ||
1841 !strv_isempty(context->read_only_paths) ||
1842 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1843 return true;
1844
42b1d8e0 1845 if (context->n_bind_mounts > 0)
d2d6c096
LP
1846 return true;
1847
2abd4e38
YW
1848 if (context->n_temporary_filesystems > 0)
1849 return true;
1850
37ed15d7 1851 if (!IN_SET(context->mount_flags, 0, MS_SHARED))
8b44a3d2
LP
1852 return true;
1853
1854 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1855 return true;
1856
8b44a3d2 1857 if (context->private_devices ||
228af36f 1858 context->private_mounts ||
8b44a3d2 1859 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1860 context->protect_home != PROTECT_HOME_NO ||
1861 context->protect_kernel_tunables ||
c575770b 1862 context->protect_kernel_modules ||
59eeb84b 1863 context->protect_control_groups)
8b44a3d2
LP
1864 return true;
1865
37c56f89
YW
1866 if (context->root_directory) {
1867 ExecDirectoryType t;
1868
1869 if (context->mount_apivfs)
1870 return true;
1871
1872 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1873 if (!params->prefix[t])
1874 continue;
1875
1876 if (!strv_isempty(context->directories[t].paths))
1877 return true;
1878 }
1879 }
5d997827 1880
42b1d8e0 1881 if (context->dynamic_user &&
b43ee82f 1882 (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
42b1d8e0
YW
1883 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1884 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1885 return true;
1886
8b44a3d2
LP
1887 return false;
1888}
1889
d251207d
LP
1890static int setup_private_users(uid_t uid, gid_t gid) {
1891 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1892 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1893 _cleanup_close_ int unshare_ready_fd = -1;
1894 _cleanup_(sigkill_waitp) pid_t pid = 0;
1895 uint64_t c = 1;
d251207d
LP
1896 ssize_t n;
1897 int r;
1898
1899 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1900 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1901 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1902 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1903 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1904 * continues execution normally. */
1905
587ab01b
ZJS
1906 if (uid != 0 && uid_is_valid(uid)) {
1907 r = asprintf(&uid_map,
1908 "0 0 1\n" /* Map root → root */
1909 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1910 uid, uid);
1911 if (r < 0)
1912 return -ENOMEM;
1913 } else {
e0f3720e 1914 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1915 if (!uid_map)
1916 return -ENOMEM;
1917 }
d251207d 1918
587ab01b
ZJS
1919 if (gid != 0 && gid_is_valid(gid)) {
1920 r = asprintf(&gid_map,
1921 "0 0 1\n" /* Map root → root */
1922 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1923 gid, gid);
1924 if (r < 0)
1925 return -ENOMEM;
1926 } else {
d251207d 1927 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1928 if (!gid_map)
1929 return -ENOMEM;
1930 }
d251207d
LP
1931
1932 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1933 * namespace. */
1934 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1935 if (unshare_ready_fd < 0)
1936 return -errno;
1937
1938 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1939 * failed. */
1940 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1941 return -errno;
1942
4c253ed1
LP
1943 r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
1944 if (r < 0)
1945 return r;
1946 if (r == 0) {
d251207d
LP
1947 _cleanup_close_ int fd = -1;
1948 const char *a;
1949 pid_t ppid;
1950
1951 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1952 * here, after the parent opened its own user namespace. */
1953
1954 ppid = getppid();
1955 errno_pipe[0] = safe_close(errno_pipe[0]);
1956
1957 /* Wait until the parent unshared the user namespace */
1958 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1959 r = -errno;
1960 goto child_fail;
1961 }
1962
1963 /* Disable the setgroups() system call in the child user namespace, for good. */
1964 a = procfs_file_alloca(ppid, "setgroups");
1965 fd = open(a, O_WRONLY|O_CLOEXEC);
1966 if (fd < 0) {
1967 if (errno != ENOENT) {
1968 r = -errno;
1969 goto child_fail;
1970 }
1971
1972 /* If the file is missing the kernel is too old, let's continue anyway. */
1973 } else {
1974 if (write(fd, "deny\n", 5) < 0) {
1975 r = -errno;
1976 goto child_fail;
1977 }
1978
1979 fd = safe_close(fd);
1980 }
1981
1982 /* First write the GID map */
1983 a = procfs_file_alloca(ppid, "gid_map");
1984 fd = open(a, O_WRONLY|O_CLOEXEC);
1985 if (fd < 0) {
1986 r = -errno;
1987 goto child_fail;
1988 }
1989 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1990 r = -errno;
1991 goto child_fail;
1992 }
1993 fd = safe_close(fd);
1994
1995 /* The write the UID map */
1996 a = procfs_file_alloca(ppid, "uid_map");
1997 fd = open(a, O_WRONLY|O_CLOEXEC);
1998 if (fd < 0) {
1999 r = -errno;
2000 goto child_fail;
2001 }
2002 if (write(fd, uid_map, strlen(uid_map)) < 0) {
2003 r = -errno;
2004 goto child_fail;
2005 }
2006
2007 _exit(EXIT_SUCCESS);
2008
2009 child_fail:
2010 (void) write(errno_pipe[1], &r, sizeof(r));
2011 _exit(EXIT_FAILURE);
2012 }
2013
2014 errno_pipe[1] = safe_close(errno_pipe[1]);
2015
2016 if (unshare(CLONE_NEWUSER) < 0)
2017 return -errno;
2018
2019 /* Let the child know that the namespace is ready now */
2020 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
2021 return -errno;
2022
2023 /* Try to read an error code from the child */
2024 n = read(errno_pipe[0], &r, sizeof(r));
2025 if (n < 0)
2026 return -errno;
2027 if (n == sizeof(r)) { /* an error code was sent to us */
2028 if (r < 0)
2029 return r;
2030 return -EIO;
2031 }
2032 if (n != 0) /* on success we should have read 0 bytes */
2033 return -EIO;
2034
2e87a1fd
LP
2035 r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
2036 pid = 0;
d251207d
LP
2037 if (r < 0)
2038 return r;
2e87a1fd 2039 if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
d251207d
LP
2040 return -EIO;
2041
2042 return 0;
2043}
2044
494d0247
YW
2045static bool exec_directory_is_private(const ExecContext *context, ExecDirectoryType type) {
2046 if (!context->dynamic_user)
2047 return false;
2048
2049 if (type == EXEC_DIRECTORY_CONFIGURATION)
2050 return false;
2051
2052 if (type == EXEC_DIRECTORY_RUNTIME && context->runtime_directory_preserve_mode == EXEC_PRESERVE_NO)
2053 return false;
2054
2055 return true;
2056}
2057
3536f49e 2058static int setup_exec_directory(
07689d5d
LP
2059 const ExecContext *context,
2060 const ExecParameters *params,
2061 uid_t uid,
3536f49e 2062 gid_t gid,
3536f49e
YW
2063 ExecDirectoryType type,
2064 int *exit_status) {
07689d5d 2065
72fd1768 2066 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
2067 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
2068 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
2069 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
2070 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
2071 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
2072 };
07689d5d
LP
2073 char **rt;
2074 int r;
2075
2076 assert(context);
2077 assert(params);
72fd1768 2078 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 2079 assert(exit_status);
07689d5d 2080
3536f49e
YW
2081 if (!params->prefix[type])
2082 return 0;
2083
8679efde 2084 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
2085 if (!uid_is_valid(uid))
2086 uid = 0;
2087 if (!gid_is_valid(gid))
2088 gid = 0;
2089 }
2090
2091 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d 2092 _cleanup_free_ char *p = NULL, *pp = NULL;
07689d5d 2093
edbfeb12 2094 p = path_join(params->prefix[type], *rt);
3536f49e
YW
2095 if (!p) {
2096 r = -ENOMEM;
2097 goto fail;
2098 }
07689d5d 2099
23a7448e
YW
2100 r = mkdir_parents_label(p, 0755);
2101 if (r < 0)
3536f49e 2102 goto fail;
23a7448e 2103
494d0247 2104 if (exec_directory_is_private(context, type)) {
6c9c51e5 2105 _cleanup_free_ char *private_root = NULL;
6c47cd7d 2106
3f5b1508
LP
2107 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that
2108 * case we want to avoid leaving a directory around fully accessible that is owned by
2109 * a dynamic user whose UID is later on reused. To lock this down we use the same
2110 * trick used by container managers to prohibit host users to get access to files of
2111 * the same UID in containers: we place everything inside a directory that has an
2112 * access mode of 0700 and is owned root:root, so that it acts as security boundary
2113 * for unprivileged host code. We then use fs namespacing to make this directory
2114 * permeable for the service itself.
6c47cd7d 2115 *
3f5b1508
LP
2116 * Specifically: for a service which wants a special directory "foo/" we first create
2117 * a directory "private/" with access mode 0700 owned by root:root. Then we place
2118 * "foo" inside of that directory (i.e. "private/foo/"), and make "foo" a symlink to
2119 * "private/foo". This way, privileged host users can access "foo/" as usual, but
2120 * unprivileged host users can't look into it. Inside of the namespace of the unit
2121 * "private/" is replaced by a more liberally accessible tmpfs, into which the host's
2122 * "private/foo/" is mounted under the same name, thus disabling the access boundary
2123 * for the service and making sure it only gets access to the dirs it needs but no
2124 * others. Tricky? Yes, absolutely, but it works!
6c47cd7d 2125 *
3f5b1508
LP
2126 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not
2127 * to be owned by the service itself.
2128 *
2129 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used
2130 * for sharing files or sockets with other services. */
6c47cd7d 2131
edbfeb12 2132 private_root = path_join(params->prefix[type], "private");
6c47cd7d
LP
2133 if (!private_root) {
2134 r = -ENOMEM;
2135 goto fail;
2136 }
2137
2138 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
37c1d5e9 2139 r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
6c47cd7d
LP
2140 if (r < 0)
2141 goto fail;
2142
edbfeb12 2143 pp = path_join(private_root, *rt);
6c47cd7d
LP
2144 if (!pp) {
2145 r = -ENOMEM;
2146 goto fail;
2147 }
2148
2149 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2150 r = mkdir_parents_label(pp, 0755);
2151 if (r < 0)
2152 goto fail;
2153
949befd3
LP
2154 if (is_dir(p, false) > 0 &&
2155 (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
2156
2157 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2158 * it over. Most likely the service has been upgraded from one that didn't use
2159 * DynamicUser=1, to one that does. */
2160
cf52c45d
LP
2161 log_info("Found pre-existing public %s= directory %s, migrating to %s.\n"
2162 "Apparently, service previously had DynamicUser= turned off, and has now turned it on.",
2163 exec_directory_type_to_string(type), p, pp);
2164
949befd3
LP
2165 if (rename(p, pp) < 0) {
2166 r = -errno;
2167 goto fail;
2168 }
2169 } else {
2170 /* Otherwise, create the actual directory for the service */
2171
2172 r = mkdir_label(pp, context->directories[type].mode);
2173 if (r < 0 && r != -EEXIST)
2174 goto fail;
2175 }
6c47cd7d 2176
6c47cd7d 2177 /* And link it up from the original place */
6c9c51e5 2178 r = symlink_idempotent(pp, p, true);
6c47cd7d
LP
2179 if (r < 0)
2180 goto fail;
2181
6c47cd7d 2182 } else {
5c6d40d1
LP
2183 _cleanup_free_ char *target = NULL;
2184
2185 if (type != EXEC_DIRECTORY_CONFIGURATION &&
2186 readlink_and_make_absolute(p, &target) >= 0) {
2187 _cleanup_free_ char *q = NULL;
2188
2189 /* This already exists and is a symlink? Interesting. Maybe it's one created
2193f17c
LP
2190 * by DynamicUser=1 (see above)?
2191 *
2192 * We do this for all directory types except for ConfigurationDirectory=,
2193 * since they all support the private/ symlink logic at least in some
2194 * configurations, see above. */
5c6d40d1
LP
2195
2196 q = path_join(params->prefix[type], "private", *rt);
2197 if (!q) {
2198 r = -ENOMEM;
2199 goto fail;
2200 }
2201
2202 if (path_equal(q, target)) {
2203
2204 /* Hmm, apparently DynamicUser= was once turned on for this service,
2205 * but is no longer. Let's move the directory back up. */
2206
cf52c45d
LP
2207 log_info("Found pre-existing private %s= directory %s, migrating to %s.\n"
2208 "Apparently, service previously had DynamicUser= turned on, and has now turned it off.",
2209 exec_directory_type_to_string(type), q, p);
2210
5c6d40d1
LP
2211 if (unlink(p) < 0) {
2212 r = -errno;
2213 goto fail;
2214 }
2215
2216 if (rename(q, p) < 0) {
2217 r = -errno;
2218 goto fail;
2219 }
2220 }
2221 }
2222
6c47cd7d 2223 r = mkdir_label(p, context->directories[type].mode);
d484580c 2224 if (r < 0) {
d484580c
LP
2225 if (r != -EEXIST)
2226 goto fail;
2227
206e9864
LP
2228 if (type == EXEC_DIRECTORY_CONFIGURATION) {
2229 struct stat st;
2230
2231 /* Don't change the owner/access mode of the configuration directory,
2232 * as in the common case it is not written to by a service, and shall
2233 * not be writable. */
2234
2235 if (stat(p, &st) < 0) {
2236 r = -errno;
2237 goto fail;
2238 }
2239
2240 /* Still complain if the access mode doesn't match */
2241 if (((st.st_mode ^ context->directories[type].mode) & 07777) != 0)
2242 log_warning("%s \'%s\' already exists but the mode is different. "
2243 "(File system: %o %sMode: %o)",
2244 exec_directory_type_to_string(type), *rt,
2245 st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
2246
6cff72eb 2247 continue;
206e9864 2248 }
6cff72eb 2249 }
a1164ae3 2250 }
07689d5d 2251
206e9864 2252 /* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
5238e957 2253 * specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
206e9864
LP
2254 * current UID/GID ownership.) */
2255 r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
2256 if (r < 0)
2257 goto fail;
c71b2eb7 2258
607b358e
LP
2259 /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
2260 * drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
2261 * assignments to exist.*/
2262 r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777);
07689d5d 2263 if (r < 0)
3536f49e 2264 goto fail;
07689d5d
LP
2265 }
2266
2267 return 0;
3536f49e
YW
2268
2269fail:
2270 *exit_status = exit_status_table[type];
3536f49e 2271 return r;
07689d5d
LP
2272}
2273
92b423b9 2274#if ENABLE_SMACK
cefc33ae
LP
2275static int setup_smack(
2276 const ExecContext *context,
2277 const ExecCommand *command) {
2278
cefc33ae
LP
2279 int r;
2280
2281 assert(context);
2282 assert(command);
2283
cefc33ae
LP
2284 if (context->smack_process_label) {
2285 r = mac_smack_apply_pid(0, context->smack_process_label);
2286 if (r < 0)
2287 return r;
2288 }
2289#ifdef SMACK_DEFAULT_PROCESS_LABEL
2290 else {
2291 _cleanup_free_ char *exec_label = NULL;
2292
2293 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2294 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2295 return r;
2296
2297 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2298 if (r < 0)
2299 return r;
2300 }
cefc33ae
LP
2301#endif
2302
2303 return 0;
2304}
92b423b9 2305#endif
cefc33ae 2306
6c47cd7d
LP
2307static int compile_bind_mounts(
2308 const ExecContext *context,
2309 const ExecParameters *params,
2310 BindMount **ret_bind_mounts,
da6053d0 2311 size_t *ret_n_bind_mounts,
6c47cd7d
LP
2312 char ***ret_empty_directories) {
2313
2314 _cleanup_strv_free_ char **empty_directories = NULL;
2315 BindMount *bind_mounts;
da6053d0 2316 size_t n, h = 0, i;
6c47cd7d
LP
2317 ExecDirectoryType t;
2318 int r;
2319
2320 assert(context);
2321 assert(params);
2322 assert(ret_bind_mounts);
2323 assert(ret_n_bind_mounts);
2324 assert(ret_empty_directories);
2325
2326 n = context->n_bind_mounts;
2327 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2328 if (!params->prefix[t])
2329 continue;
2330
2331 n += strv_length(context->directories[t].paths);
2332 }
2333
2334 if (n <= 0) {
2335 *ret_bind_mounts = NULL;
2336 *ret_n_bind_mounts = 0;
2337 *ret_empty_directories = NULL;
2338 return 0;
2339 }
2340
2341 bind_mounts = new(BindMount, n);
2342 if (!bind_mounts)
2343 return -ENOMEM;
2344
a8cabc61 2345 for (i = 0; i < context->n_bind_mounts; i++) {
6c47cd7d
LP
2346 BindMount *item = context->bind_mounts + i;
2347 char *s, *d;
2348
2349 s = strdup(item->source);
2350 if (!s) {
2351 r = -ENOMEM;
2352 goto finish;
2353 }
2354
2355 d = strdup(item->destination);
2356 if (!d) {
2357 free(s);
2358 r = -ENOMEM;
2359 goto finish;
2360 }
2361
2362 bind_mounts[h++] = (BindMount) {
2363 .source = s,
2364 .destination = d,
2365 .read_only = item->read_only,
2366 .recursive = item->recursive,
2367 .ignore_enoent = item->ignore_enoent,
2368 };
2369 }
2370
2371 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2372 char **suffix;
2373
2374 if (!params->prefix[t])
2375 continue;
2376
2377 if (strv_isempty(context->directories[t].paths))
2378 continue;
2379
494d0247 2380 if (exec_directory_is_private(context, t) &&
5609f688 2381 !(context->root_directory || context->root_image)) {
6c47cd7d
LP
2382 char *private_root;
2383
2384 /* So this is for a dynamic user, and we need to make sure the process can access its own
2385 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2386 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2387
657ee2d8 2388 private_root = path_join(params->prefix[t], "private");
6c47cd7d
LP
2389 if (!private_root) {
2390 r = -ENOMEM;
2391 goto finish;
2392 }
2393
2394 r = strv_consume(&empty_directories, private_root);
a635a7ae 2395 if (r < 0)
6c47cd7d 2396 goto finish;
6c47cd7d
LP
2397 }
2398
2399 STRV_FOREACH(suffix, context->directories[t].paths) {
2400 char *s, *d;
2401
494d0247 2402 if (exec_directory_is_private(context, t))
657ee2d8 2403 s = path_join(params->prefix[t], "private", *suffix);
6c47cd7d 2404 else
657ee2d8 2405 s = path_join(params->prefix[t], *suffix);
6c47cd7d
LP
2406 if (!s) {
2407 r = -ENOMEM;
2408 goto finish;
2409 }
2410
494d0247 2411 if (exec_directory_is_private(context, t) &&
5609f688
YW
2412 (context->root_directory || context->root_image))
2413 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2414 * directory is not created on the root directory. So, let's bind-mount the directory
2415 * on the 'non-private' place. */
657ee2d8 2416 d = path_join(params->prefix[t], *suffix);
5609f688
YW
2417 else
2418 d = strdup(s);
6c47cd7d
LP
2419 if (!d) {
2420 free(s);
2421 r = -ENOMEM;
2422 goto finish;
2423 }
2424
2425 bind_mounts[h++] = (BindMount) {
2426 .source = s,
2427 .destination = d,
2428 .read_only = false,
9ce4e4b0 2429 .nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
6c47cd7d
LP
2430 .recursive = true,
2431 .ignore_enoent = false,
2432 };
2433 }
2434 }
2435
2436 assert(h == n);
2437
2438 *ret_bind_mounts = bind_mounts;
2439 *ret_n_bind_mounts = n;
ae2a15bc 2440 *ret_empty_directories = TAKE_PTR(empty_directories);
6c47cd7d
LP
2441
2442 return (int) n;
2443
2444finish:
2445 bind_mount_free_many(bind_mounts, h);
2446 return r;
2447}
2448
6818c54c 2449static int apply_mount_namespace(
34cf6c43
YW
2450 const Unit *u,
2451 const ExecCommand *command,
6818c54c
LP
2452 const ExecContext *context,
2453 const ExecParameters *params,
7cc5ef5f
ZJS
2454 const ExecRuntime *runtime,
2455 char **error_path) {
6818c54c 2456
7bcef4ef 2457 _cleanup_strv_free_ char **empty_directories = NULL;
93c6bb51 2458 char *tmp = NULL, *var = NULL;
915e6d16 2459 const char *root_dir = NULL, *root_image = NULL;
228af36f 2460 NamespaceInfo ns_info;
165a31c0 2461 bool needs_sandboxing;
6c47cd7d 2462 BindMount *bind_mounts = NULL;
da6053d0 2463 size_t n_bind_mounts = 0;
6818c54c 2464 int r;
93c6bb51 2465
2b3c1b9e
DH
2466 assert(context);
2467
93c6bb51
DH
2468 /* The runtime struct only contains the parent of the private /tmp,
2469 * which is non-accessible to world users. Inside of it there's a /tmp
2470 * that is sticky, and that's the one we want to use here. */
2471
2472 if (context->private_tmp && runtime) {
2473 if (runtime->tmp_dir)
2474 tmp = strjoina(runtime->tmp_dir, "/tmp");
2475 if (runtime->var_tmp_dir)
2476 var = strjoina(runtime->var_tmp_dir, "/tmp");
2477 }
2478
915e6d16
LP
2479 if (params->flags & EXEC_APPLY_CHROOT) {
2480 root_image = context->root_image;
2481
2482 if (!root_image)
2483 root_dir = context->root_directory;
2484 }
93c6bb51 2485
6c47cd7d
LP
2486 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2487 if (r < 0)
2488 return r;
2489
165a31c0 2490 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
b5a33299
YW
2491 if (needs_sandboxing)
2492 ns_info = (NamespaceInfo) {
2493 .ignore_protect_paths = false,
2494 .private_dev = context->private_devices,
2495 .protect_control_groups = context->protect_control_groups,
2496 .protect_kernel_tunables = context->protect_kernel_tunables,
2497 .protect_kernel_modules = context->protect_kernel_modules,
aecd5ac6 2498 .protect_hostname = context->protect_hostname,
b5a33299 2499 .mount_apivfs = context->mount_apivfs,
228af36f 2500 .private_mounts = context->private_mounts,
b5a33299 2501 };
228af36f
LP
2502 else if (!context->dynamic_user && root_dir)
2503 /*
2504 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2505 * sandbox info, otherwise enforce it, don't ignore protected paths and
2506 * fail if we are enable to apply the sandbox inside the mount namespace.
2507 */
2508 ns_info = (NamespaceInfo) {
2509 .ignore_protect_paths = true,
2510 };
2511 else
2512 ns_info = (NamespaceInfo) {};
b5a33299 2513
37ed15d7
FB
2514 if (context->mount_flags == MS_SHARED)
2515 log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
2516
915e6d16 2517 r = setup_namespace(root_dir, root_image,
7bcef4ef 2518 &ns_info, context->read_write_paths,
165a31c0
LP
2519 needs_sandboxing ? context->read_only_paths : NULL,
2520 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2521 empty_directories,
2522 bind_mounts,
2523 n_bind_mounts,
2abd4e38
YW
2524 context->temporary_filesystems,
2525 context->n_temporary_filesystems,
93c6bb51
DH
2526 tmp,
2527 var,
165a31c0
LP
2528 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2529 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16 2530 context->mount_flags,
7cc5ef5f
ZJS
2531 DISSECT_IMAGE_DISCARD_ON_LOOP,
2532 error_path);
93c6bb51 2533
6c47cd7d
LP
2534 bind_mount_free_many(bind_mounts, n_bind_mounts);
2535
1beab8b0 2536 /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
5238e957 2537 * that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively
1beab8b0
LP
2538 * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
2539 * completely different execution environment. */
aca835ed
YW
2540 if (r == -ENOANO) {
2541 if (n_bind_mounts == 0 &&
2542 context->n_temporary_filesystems == 0 &&
2543 !root_dir && !root_image &&
2544 !context->dynamic_user) {
2545 log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
2546 return 0;
2547 }
2548
2194547e
LP
2549 log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
2550 "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
2551 n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
2552
aca835ed 2553 return -EOPNOTSUPP;
93c6bb51
DH
2554 }
2555
2556 return r;
2557}
2558
915e6d16
LP
2559static int apply_working_directory(
2560 const ExecContext *context,
2561 const ExecParameters *params,
2562 const char *home,
376fecf6 2563 int *exit_status) {
915e6d16 2564
6732edab 2565 const char *d, *wd;
2b3c1b9e
DH
2566
2567 assert(context);
376fecf6 2568 assert(exit_status);
2b3c1b9e 2569
6732edab
LP
2570 if (context->working_directory_home) {
2571
376fecf6
LP
2572 if (!home) {
2573 *exit_status = EXIT_CHDIR;
6732edab 2574 return -ENXIO;
376fecf6 2575 }
6732edab 2576
2b3c1b9e 2577 wd = home;
6732edab
LP
2578
2579 } else if (context->working_directory)
2b3c1b9e
DH
2580 wd = context->working_directory;
2581 else
2582 wd = "/";
e7f1e7c6 2583
fa97f630 2584 if (params->flags & EXEC_APPLY_CHROOT)
2b3c1b9e 2585 d = wd;
fa97f630 2586 else
3b0e5bb5 2587 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2588
376fecf6
LP
2589 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2590 *exit_status = EXIT_CHDIR;
2b3c1b9e 2591 return -errno;
376fecf6 2592 }
e7f1e7c6
DH
2593
2594 return 0;
2595}
2596
fa97f630
JB
2597static int apply_root_directory(
2598 const ExecContext *context,
2599 const ExecParameters *params,
2600 const bool needs_mount_ns,
2601 int *exit_status) {
2602
2603 assert(context);
2604 assert(exit_status);
2605
2606 if (params->flags & EXEC_APPLY_CHROOT) {
2607 if (!needs_mount_ns && context->root_directory)
2608 if (chroot(context->root_directory) < 0) {
2609 *exit_status = EXIT_CHROOT;
2610 return -errno;
2611 }
2612 }
2613
2614 return 0;
2615}
2616
b1edf445 2617static int setup_keyring(
34cf6c43 2618 const Unit *u,
b1edf445
LP
2619 const ExecContext *context,
2620 const ExecParameters *p,
2621 uid_t uid, gid_t gid) {
2622
74dd6b51 2623 key_serial_t keyring;
e64c2d0b
DJL
2624 int r = 0;
2625 uid_t saved_uid;
2626 gid_t saved_gid;
74dd6b51
LP
2627
2628 assert(u);
b1edf445 2629 assert(context);
74dd6b51
LP
2630 assert(p);
2631
2632 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2633 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2634 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2635 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2636 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2637 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2638
b1edf445
LP
2639 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2640 return 0;
2641
e64c2d0b
DJL
2642 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2643 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2644 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2645 * & group is just as nasty as acquiring a reference to the user keyring. */
2646
2647 saved_uid = getuid();
2648 saved_gid = getgid();
2649
2650 if (gid_is_valid(gid) && gid != saved_gid) {
2651 if (setregid(gid, -1) < 0)
2652 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2653 }
2654
2655 if (uid_is_valid(uid) && uid != saved_uid) {
2656 if (setreuid(uid, -1) < 0) {
2657 r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2658 goto out;
2659 }
2660 }
2661
74dd6b51
LP
2662 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2663 if (keyring == -1) {
2664 if (errno == ENOSYS)
8002fb97 2665 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2666 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2667 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2668 else if (errno == EDQUOT)
8002fb97 2669 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2670 else
e64c2d0b 2671 r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51 2672
e64c2d0b 2673 goto out;
74dd6b51
LP
2674 }
2675
e64c2d0b
DJL
2676 /* When requested link the user keyring into the session keyring. */
2677 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2678
2679 if (keyctl(KEYCTL_LINK,
2680 KEY_SPEC_USER_KEYRING,
2681 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2682 r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
2683 goto out;
2684 }
2685 }
2686
2687 /* Restore uid/gid back */
2688 if (uid_is_valid(uid) && uid != saved_uid) {
2689 if (setreuid(saved_uid, -1) < 0) {
2690 r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2691 goto out;
2692 }
2693 }
2694
2695 if (gid_is_valid(gid) && gid != saved_gid) {
2696 if (setregid(saved_gid, -1) < 0)
2697 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2698 }
2699
2700 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
b3415f5d
LP
2701 if (!sd_id128_is_null(u->invocation_id)) {
2702 key_serial_t key;
2703
2704 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2705 if (key == -1)
8002fb97 2706 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2707 else {
2708 if (keyctl(KEYCTL_SETPERM, key,
2709 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2710 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
e64c2d0b 2711 r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2712 }
2713 }
2714
e64c2d0b
DJL
2715out:
2716 /* Revert back uid & gid for the the last time, and exit */
2717 /* no extra logging, as only the first already reported error matters */
2718 if (getuid() != saved_uid)
2719 (void) setreuid(saved_uid, -1);
b1edf445 2720
e64c2d0b
DJL
2721 if (getgid() != saved_gid)
2722 (void) setregid(saved_gid, -1);
b1edf445 2723
e64c2d0b 2724 return r;
74dd6b51
LP
2725}
2726
3042bbeb 2727static void append_socket_pair(int *array, size_t *n, const int pair[static 2]) {
29206d46
LP
2728 assert(array);
2729 assert(n);
2caa38e9 2730 assert(pair);
29206d46
LP
2731
2732 if (pair[0] >= 0)
2733 array[(*n)++] = pair[0];
2734 if (pair[1] >= 0)
2735 array[(*n)++] = pair[1];
2736}
2737
a34ceba6
LP
2738static int close_remaining_fds(
2739 const ExecParameters *params,
34cf6c43
YW
2740 const ExecRuntime *runtime,
2741 const DynamicCreds *dcreds,
00d9ef85 2742 int user_lookup_fd,
a34ceba6 2743 int socket_fd,
5686391b 2744 int exec_fd,
da6053d0 2745 int *fds, size_t n_fds) {
a34ceba6 2746
da6053d0 2747 size_t n_dont_close = 0;
00d9ef85 2748 int dont_close[n_fds + 12];
a34ceba6
LP
2749
2750 assert(params);
2751
2752 if (params->stdin_fd >= 0)
2753 dont_close[n_dont_close++] = params->stdin_fd;
2754 if (params->stdout_fd >= 0)
2755 dont_close[n_dont_close++] = params->stdout_fd;
2756 if (params->stderr_fd >= 0)
2757 dont_close[n_dont_close++] = params->stderr_fd;
2758
2759 if (socket_fd >= 0)
2760 dont_close[n_dont_close++] = socket_fd;
5686391b
LP
2761 if (exec_fd >= 0)
2762 dont_close[n_dont_close++] = exec_fd;
a34ceba6
LP
2763 if (n_fds > 0) {
2764 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2765 n_dont_close += n_fds;
2766 }
2767
29206d46
LP
2768 if (runtime)
2769 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2770
2771 if (dcreds) {
2772 if (dcreds->user)
2773 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2774 if (dcreds->group)
2775 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2776 }
2777
00d9ef85
LP
2778 if (user_lookup_fd >= 0)
2779 dont_close[n_dont_close++] = user_lookup_fd;
2780
a34ceba6
LP
2781 return close_all_fds(dont_close, n_dont_close);
2782}
2783
00d9ef85
LP
2784static int send_user_lookup(
2785 Unit *unit,
2786 int user_lookup_fd,
2787 uid_t uid,
2788 gid_t gid) {
2789
2790 assert(unit);
2791
2792 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2793 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2794 * specified. */
2795
2796 if (user_lookup_fd < 0)
2797 return 0;
2798
2799 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2800 return 0;
2801
2802 if (writev(user_lookup_fd,
2803 (struct iovec[]) {
e6a7ec4b
LP
2804 IOVEC_INIT(&uid, sizeof(uid)),
2805 IOVEC_INIT(&gid, sizeof(gid)),
2806 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2807 return -errno;
2808
2809 return 0;
2810}
2811
6732edab
LP
2812static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2813 int r;
2814
2815 assert(c);
2816 assert(home);
2817 assert(buf);
2818
2819 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2820
2821 if (*home)
2822 return 0;
2823
2824 if (!c->working_directory_home)
2825 return 0;
2826
6732edab
LP
2827 r = get_home_dir(buf);
2828 if (r < 0)
2829 return r;
2830
2831 *home = *buf;
2832 return 1;
2833}
2834
da50b85a
LP
2835static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2836 _cleanup_strv_free_ char ** list = NULL;
2837 ExecDirectoryType t;
2838 int r;
2839
2840 assert(c);
2841 assert(p);
2842 assert(ret);
2843
2844 assert(c->dynamic_user);
2845
2846 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2847 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2848 * directories. */
2849
2850 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2851 char **i;
2852
2853 if (t == EXEC_DIRECTORY_CONFIGURATION)
2854 continue;
2855
2856 if (!p->prefix[t])
2857 continue;
2858
2859 STRV_FOREACH(i, c->directories[t].paths) {
2860 char *e;
2861
494d0247 2862 if (exec_directory_is_private(c, t))
657ee2d8 2863 e = path_join(p->prefix[t], "private", *i);
494d0247
YW
2864 else
2865 e = path_join(p->prefix[t], *i);
da50b85a
LP
2866 if (!e)
2867 return -ENOMEM;
2868
2869 r = strv_consume(&list, e);
2870 if (r < 0)
2871 return r;
2872 }
2873 }
2874
ae2a15bc 2875 *ret = TAKE_PTR(list);
da50b85a
LP
2876
2877 return 0;
2878}
2879
34cf6c43
YW
2880static char *exec_command_line(char **argv);
2881
78f93209
LP
2882static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
2883 bool using_subcgroup;
2884 char *p;
2885
2886 assert(params);
2887 assert(ret);
2888
2889 if (!params->cgroup_path)
2890 return -EINVAL;
2891
2892 /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
2893 * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
2894 * processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
2895 * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
2896 * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
2897 * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
2898 * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
2899 * flag, which is only passed for the former statements, not for the latter. */
2900
2901 using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
2902 if (using_subcgroup)
657ee2d8 2903 p = path_join(params->cgroup_path, ".control");
78f93209
LP
2904 else
2905 p = strdup(params->cgroup_path);
2906 if (!p)
2907 return -ENOMEM;
2908
2909 *ret = p;
2910 return using_subcgroup;
2911}
2912
ff0af2a1 2913static int exec_child(
f2341e0a 2914 Unit *unit,
34cf6c43 2915 const ExecCommand *command,
ff0af2a1
LP
2916 const ExecContext *context,
2917 const ExecParameters *params,
2918 ExecRuntime *runtime,
29206d46 2919 DynamicCreds *dcreds,
ff0af2a1 2920 int socket_fd,
2caa38e9 2921 const int named_iofds[static 3],
4c47affc 2922 int *fds,
da6053d0 2923 size_t n_socket_fds,
25b583d7 2924 size_t n_storage_fds,
ff0af2a1 2925 char **files_env,
00d9ef85 2926 int user_lookup_fd,
12145637 2927 int *exit_status) {
d35fbf6b 2928
7ca69792 2929 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **replaced_argv = NULL;
5686391b 2930 int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
4d885bd3
DH
2931 _cleanup_free_ gid_t *supplementary_gids = NULL;
2932 const char *username = NULL, *groupname = NULL;
5686391b 2933 _cleanup_free_ char *home_buffer = NULL;
2b3c1b9e 2934 const char *home = NULL, *shell = NULL;
7ca69792 2935 char **final_argv = NULL;
7bce046b
LP
2936 dev_t journal_stream_dev = 0;
2937 ino_t journal_stream_ino = 0;
165a31c0
LP
2938 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2939 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2940 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2941 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 2942#if HAVE_SELINUX
7f59dd35 2943 _cleanup_free_ char *mac_selinux_context_net = NULL;
43b1f709 2944 bool use_selinux = false;
ecfbc84f 2945#endif
f9fa32f0 2946#if ENABLE_SMACK
43b1f709 2947 bool use_smack = false;
ecfbc84f 2948#endif
349cc4a5 2949#if HAVE_APPARMOR
43b1f709 2950 bool use_apparmor = false;
ecfbc84f 2951#endif
fed1e721
LP
2952 uid_t uid = UID_INVALID;
2953 gid_t gid = GID_INVALID;
da6053d0 2954 size_t n_fds;
3536f49e 2955 ExecDirectoryType dt;
165a31c0 2956 int secure_bits;
034c6ed7 2957
f2341e0a 2958 assert(unit);
5cb5a6ff
LP
2959 assert(command);
2960 assert(context);
d35fbf6b 2961 assert(params);
ff0af2a1 2962 assert(exit_status);
d35fbf6b
DM
2963
2964 rename_process_from_path(command->path);
2965
2966 /* We reset exactly these signals, since they are the
2967 * only ones we set to SIG_IGN in the main daemon. All
2968 * others we leave untouched because we set them to
2969 * SIG_DFL or a valid handler initially, both of which
2970 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2971 (void) default_signals(SIGNALS_CRASH_HANDLER,
2972 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2973
2974 if (context->ignore_sigpipe)
ce30c8dc 2975 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2976
ff0af2a1
LP
2977 r = reset_signal_mask();
2978 if (r < 0) {
2979 *exit_status = EXIT_SIGNAL_MASK;
12145637 2980 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 2981 }
034c6ed7 2982
d35fbf6b
DM
2983 if (params->idle_pipe)
2984 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2985
2c027c62
LP
2986 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2987 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2988 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2989 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 2990
d35fbf6b 2991 log_forget_fds();
2c027c62 2992 log_set_open_when_needed(true);
4f2d528d 2993
40a80078
LP
2994 /* In case anything used libc syslog(), close this here, too */
2995 closelog();
2996
5686391b
LP
2997 n_fds = n_socket_fds + n_storage_fds;
2998 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
ff0af2a1
LP
2999 if (r < 0) {
3000 *exit_status = EXIT_FDS;
12145637 3001 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
3002 }
3003
d35fbf6b
DM
3004 if (!context->same_pgrp)
3005 if (setsid() < 0) {
ff0af2a1 3006 *exit_status = EXIT_SETSID;
12145637 3007 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 3008 }
9e2f7c11 3009
1e22b5cd 3010 exec_context_tty_reset(context, params);
d35fbf6b 3011
c891efaf 3012 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 3013 const char *vc = params->confirm_spawn;
3b20f877
FB
3014 _cleanup_free_ char *cmdline = NULL;
3015
ee39ca20 3016 cmdline = exec_command_line(command->argv);
3b20f877 3017 if (!cmdline) {
0460aa5c 3018 *exit_status = EXIT_MEMORY;
12145637 3019 return log_oom();
3b20f877 3020 }
d35fbf6b 3021
eedf223a 3022 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
3023 if (r != CONFIRM_EXECUTE) {
3024 if (r == CONFIRM_PRETEND_SUCCESS) {
3025 *exit_status = EXIT_SUCCESS;
3026 return 0;
3027 }
ff0af2a1 3028 *exit_status = EXIT_CONFIRM;
12145637 3029 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 3030 return -ECANCELED;
d35fbf6b
DM
3031 }
3032 }
1a63a750 3033
d521916d
LP
3034 /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
3035 * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
3036 * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
3037 * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
3038 * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
3039 if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
3040 setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
3041 *exit_status = EXIT_MEMORY;
3042 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
3043 }
3044
29206d46 3045 if (context->dynamic_user && dcreds) {
da50b85a 3046 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 3047
d521916d
LP
3048 /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
3049 * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
409093fe
LP
3050 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
3051 *exit_status = EXIT_USER;
12145637 3052 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
3053 }
3054
da50b85a
LP
3055 r = compile_suggested_paths(context, params, &suggested_paths);
3056 if (r < 0) {
3057 *exit_status = EXIT_MEMORY;
3058 return log_oom();
3059 }
3060
3061 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
3062 if (r < 0) {
3063 *exit_status = EXIT_USER;
e2b0cc34
YW
3064 if (r == -EILSEQ) {
3065 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
3066 return -EOPNOTSUPP;
3067 }
12145637 3068 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 3069 }
524daa8c 3070
70dd455c 3071 if (!uid_is_valid(uid)) {
29206d46 3072 *exit_status = EXIT_USER;
12145637 3073 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
3074 return -ESRCH;
3075 }
3076
3077 if (!gid_is_valid(gid)) {
3078 *exit_status = EXIT_USER;
12145637 3079 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
3080 return -ESRCH;
3081 }
5bc7452b 3082
29206d46
LP
3083 if (dcreds->user)
3084 username = dcreds->user->name;
3085
3086 } else {
4d885bd3
DH
3087 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
3088 if (r < 0) {
3089 *exit_status = EXIT_USER;
12145637 3090 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 3091 }
5bc7452b 3092
4d885bd3
DH
3093 r = get_fixed_group(context, &groupname, &gid);
3094 if (r < 0) {
3095 *exit_status = EXIT_GROUP;
12145637 3096 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 3097 }
cdc5d5c5 3098 }
29206d46 3099
cdc5d5c5
DH
3100 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
3101 r = get_supplementary_groups(context, username, groupname, gid,
3102 &supplementary_gids, &ngids);
3103 if (r < 0) {
3104 *exit_status = EXIT_GROUP;
12145637 3105 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 3106 }
5bc7452b 3107
00d9ef85
LP
3108 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
3109 if (r < 0) {
3110 *exit_status = EXIT_USER;
12145637 3111 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
3112 }
3113
3114 user_lookup_fd = safe_close(user_lookup_fd);
3115
6732edab
LP
3116 r = acquire_home(context, uid, &home, &home_buffer);
3117 if (r < 0) {
3118 *exit_status = EXIT_CHDIR;
12145637 3119 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
3120 }
3121
d35fbf6b
DM
3122 /* If a socket is connected to STDIN/STDOUT/STDERR, we
3123 * must sure to drop O_NONBLOCK */
3124 if (socket_fd >= 0)
a34ceba6 3125 (void) fd_nonblock(socket_fd, false);
acbb0225 3126
4c70a4a7
MS
3127 /* Journald will try to look-up our cgroup in order to populate _SYSTEMD_CGROUP and _SYSTEMD_UNIT fields.
3128 * Hence we need to migrate to the target cgroup from init.scope before connecting to journald */
3129 if (params->cgroup_path) {
3130 _cleanup_free_ char *p = NULL;
3131
3132 r = exec_parameters_get_cgroup_path(params, &p);
3133 if (r < 0) {
3134 *exit_status = EXIT_CGROUP;
3135 return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
3136 }
3137
3138 r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
3139 if (r < 0) {
3140 *exit_status = EXIT_CGROUP;
3141 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
3142 }
3143 }
3144
a8d08f39
LP
3145 if (context->network_namespace_path && runtime && runtime->netns_storage_socket[0] >= 0) {
3146 r = open_netns_path(runtime->netns_storage_socket, context->network_namespace_path);
3147 if (r < 0) {
3148 *exit_status = EXIT_NETWORK;
3149 return log_unit_error_errno(unit, r, "Failed to open network namespace path %s: %m", context->network_namespace_path);
3150 }
3151 }
3152
52c239d7 3153 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
3154 if (r < 0) {
3155 *exit_status = EXIT_STDIN;
12145637 3156 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 3157 }
034c6ed7 3158
52c239d7 3159 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3160 if (r < 0) {
3161 *exit_status = EXIT_STDOUT;
12145637 3162 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
3163 }
3164
52c239d7 3165 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3166 if (r < 0) {
3167 *exit_status = EXIT_STDERR;
12145637 3168 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
3169 }
3170
d35fbf6b 3171 if (context->oom_score_adjust_set) {
9f8168eb
LP
3172 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
3173 * prohibit write access to this file, and we shouldn't trip up over that. */
3174 r = set_oom_score_adjust(context->oom_score_adjust);
12145637 3175 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 3176 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 3177 else if (r < 0) {
ff0af2a1 3178 *exit_status = EXIT_OOM_ADJUST;
12145637 3179 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 3180 }
d35fbf6b
DM
3181 }
3182
3183 if (context->nice_set)
3184 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 3185 *exit_status = EXIT_NICE;
12145637 3186 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
613b411c
LP
3187 }
3188
d35fbf6b
DM
3189 if (context->cpu_sched_set) {
3190 struct sched_param param = {
3191 .sched_priority = context->cpu_sched_priority,
3192 };
3193
ff0af2a1
LP
3194 r = sched_setscheduler(0,
3195 context->cpu_sched_policy |
3196 (context->cpu_sched_reset_on_fork ?
3197 SCHED_RESET_ON_FORK : 0),
3198 &param);
3199 if (r < 0) {
3200 *exit_status = EXIT_SETSCHEDULER;
12145637 3201 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 3202 }
d35fbf6b 3203 }
fc9b2a84 3204
0985c7c4
ZJS
3205 if (context->cpu_set.set)
3206 if (sched_setaffinity(0, context->cpu_set.allocated, context->cpu_set.set) < 0) {
ff0af2a1 3207 *exit_status = EXIT_CPUAFFINITY;
12145637 3208 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7
LP
3209 }
3210
b070c7c0
MS
3211 if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
3212 r = apply_numa_policy(&context->numa_policy);
3213 if (r == -EOPNOTSUPP)
33fe9e3f 3214 log_unit_debug_errno(unit, r, "NUMA support not available, ignoring.");
b070c7c0
MS
3215 else if (r < 0) {
3216 *exit_status = EXIT_NUMA_POLICY;
3217 return log_unit_error_errno(unit, r, "Failed to set NUMA memory policy: %m");
3218 }
3219 }
3220
d35fbf6b
DM
3221 if (context->ioprio_set)
3222 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 3223 *exit_status = EXIT_IOPRIO;
12145637 3224 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 3225 }
da726a4d 3226
d35fbf6b
DM
3227 if (context->timer_slack_nsec != NSEC_INFINITY)
3228 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 3229 *exit_status = EXIT_TIMERSLACK;
12145637 3230 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 3231 }
9eba9da4 3232
21022b9d
LP
3233 if (context->personality != PERSONALITY_INVALID) {
3234 r = safe_personality(context->personality);
3235 if (r < 0) {
ff0af2a1 3236 *exit_status = EXIT_PERSONALITY;
12145637 3237 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 3238 }
21022b9d 3239 }
94f04347 3240
d35fbf6b 3241 if (context->utmp_id)
df0ff127 3242 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 3243 context->tty_path,
023a4f67
LP
3244 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3245 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3246 USER_PROCESS,
6a93917d 3247 username);
d35fbf6b 3248
08f67696 3249 if (uid_is_valid(uid)) {
ff0af2a1
LP
3250 r = chown_terminal(STDIN_FILENO, uid);
3251 if (r < 0) {
3252 *exit_status = EXIT_STDIN;
12145637 3253 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 3254 }
d35fbf6b 3255 }
8e274523 3256
4e1dfa45 3257 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
62b9bb26 3258 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
4e1dfa45 3259 * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
62b9bb26 3260 * touch a single hierarchy too. */
584b8688 3261 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
62b9bb26 3262 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
ff0af2a1
LP
3263 if (r < 0) {
3264 *exit_status = EXIT_CGROUP;
12145637 3265 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 3266 }
d35fbf6b 3267 }
034c6ed7 3268
72fd1768 3269 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 3270 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
3271 if (r < 0)
3272 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 3273 }
94f04347 3274
7bce046b 3275 r = build_environment(
fd63e712 3276 unit,
7bce046b
LP
3277 context,
3278 params,
3279 n_fds,
3280 home,
3281 username,
3282 shell,
3283 journal_stream_dev,
3284 journal_stream_ino,
3285 &our_env);
2065ca69
JW
3286 if (r < 0) {
3287 *exit_status = EXIT_MEMORY;
12145637 3288 return log_oom();
2065ca69
JW
3289 }
3290
3291 r = build_pass_environment(context, &pass_env);
3292 if (r < 0) {
3293 *exit_status = EXIT_MEMORY;
12145637 3294 return log_oom();
2065ca69
JW
3295 }
3296
3297 accum_env = strv_env_merge(5,
3298 params->environment,
3299 our_env,
3300 pass_env,
3301 context->environment,
3302 files_env,
3303 NULL);
3304 if (!accum_env) {
3305 *exit_status = EXIT_MEMORY;
12145637 3306 return log_oom();
2065ca69 3307 }
1280503b 3308 accum_env = strv_env_clean(accum_env);
2065ca69 3309
096424d1 3310 (void) umask(context->umask);
b213e1c1 3311
b1edf445 3312 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
3313 if (r < 0) {
3314 *exit_status = EXIT_KEYRING;
12145637 3315 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
3316 }
3317
165a31c0 3318 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 3319 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 3320
165a31c0
LP
3321 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3322 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 3323
165a31c0
LP
3324 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3325 if (needs_ambient_hack)
3326 needs_setuid = false;
3327 else
3328 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3329
3330 if (needs_sandboxing) {
7f18ef0a
FK
3331 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3332 * present. The actual MAC context application will happen later, as late as possible, to avoid
3333 * impacting our own code paths. */
3334
349cc4a5 3335#if HAVE_SELINUX
43b1f709 3336 use_selinux = mac_selinux_use();
7f18ef0a 3337#endif
f9fa32f0 3338#if ENABLE_SMACK
43b1f709 3339 use_smack = mac_smack_use();
7f18ef0a 3340#endif
349cc4a5 3341#if HAVE_APPARMOR
43b1f709 3342 use_apparmor = mac_apparmor_use();
7f18ef0a 3343#endif
165a31c0 3344 }
7f18ef0a 3345
ce932d2d
LP
3346 if (needs_sandboxing) {
3347 int which_failed;
3348
3349 /* Let's set the resource limits before we call into PAM, so that pam_limits wins over what
3350 * is set here. (See below.) */
3351
3352 r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
3353 if (r < 0) {
3354 *exit_status = EXIT_LIMITS;
3355 return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
3356 }
3357 }
3358
165a31c0 3359 if (needs_setuid) {
ce932d2d
LP
3360
3361 /* Let's call into PAM after we set up our own idea of resource limits to that pam_limits
3362 * wins here. (See above.) */
3363
165a31c0
LP
3364 if (context->pam_name && username) {
3365 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3366 if (r < 0) {
3367 *exit_status = EXIT_PAM;
12145637 3368 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0
LP
3369 }
3370 }
b213e1c1 3371 }
ac45f971 3372
a8d08f39
LP
3373 if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
3374
6e2d7c4f
MS
3375 if (ns_type_supported(NAMESPACE_NET)) {
3376 r = setup_netns(runtime->netns_storage_socket);
3377 if (r < 0) {
3378 *exit_status = EXIT_NETWORK;
3379 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3380 }
a8d08f39
LP
3381 } else if (context->network_namespace_path) {
3382 *exit_status = EXIT_NETWORK;
3383 return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP), "NetworkNamespacePath= is not supported, refusing.");
6e2d7c4f
MS
3384 } else
3385 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3386 }
169c1bda 3387
ee818b89 3388 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3389 if (needs_mount_namespace) {
7cc5ef5f
ZJS
3390 _cleanup_free_ char *error_path = NULL;
3391
3392 r = apply_mount_namespace(unit, command, context, params, runtime, &error_path);
3fbe8dbe
LP
3393 if (r < 0) {
3394 *exit_status = EXIT_NAMESPACE;
7cc5ef5f
ZJS
3395 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m",
3396 error_path ? ": " : "", strempty(error_path));
3fbe8dbe 3397 }
d35fbf6b 3398 }
81a2b7ce 3399
aecd5ac6
TM
3400 if (context->protect_hostname) {
3401 if (ns_type_supported(NAMESPACE_UTS)) {
3402 if (unshare(CLONE_NEWUTS) < 0) {
3403 *exit_status = EXIT_NAMESPACE;
3404 return log_unit_error_errno(unit, errno, "Failed to set up UTS namespacing: %m");
3405 }
3406 } else
3407 log_unit_warning(unit, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
3408#if HAVE_SECCOMP
3409 r = seccomp_protect_hostname();
3410 if (r < 0) {
3411 *exit_status = EXIT_SECCOMP;
3412 return log_unit_error_errno(unit, r, "Failed to apply hostname restrictions: %m");
3413 }
3414#endif
3415 }
3416
bbeea271 3417 /* Drop groups as early as possbile */
165a31c0 3418 if (needs_setuid) {
709dbeac 3419 r = enforce_groups(gid, supplementary_gids, ngids);
096424d1
LP
3420 if (r < 0) {
3421 *exit_status = EXIT_GROUP;
12145637 3422 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3423 }
165a31c0 3424 }
096424d1 3425
165a31c0 3426 if (needs_sandboxing) {
349cc4a5 3427#if HAVE_SELINUX
43b1f709 3428 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
3429 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3430 if (r < 0) {
3431 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3432 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
937ccce9 3433 }
9008e1ac 3434 }
9008e1ac
MS
3435#endif
3436
937ccce9
LP
3437 if (context->private_users) {
3438 r = setup_private_users(uid, gid);
3439 if (r < 0) {
3440 *exit_status = EXIT_USER;
12145637 3441 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
937ccce9 3442 }
d251207d
LP
3443 }
3444 }
3445
165a31c0 3446 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
5686391b
LP
3447 * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
3448 * however if we have it as we want to keep it open until the final execve(). */
3449
3450 if (params->exec_fd >= 0) {
3451 exec_fd = params->exec_fd;
3452
3453 if (exec_fd < 3 + (int) n_fds) {
3454 int moved_fd;
3455
3456 /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
3457 * process we are about to execute. */
3458
3459 moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
3460 if (moved_fd < 0) {
3461 *exit_status = EXIT_FDS;
3462 return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
3463 }
3464
3465 safe_close(exec_fd);
3466 exec_fd = moved_fd;
3467 } else {
3468 /* This fd should be FD_CLOEXEC already, but let's make sure. */
3469 r = fd_cloexec(exec_fd, true);
3470 if (r < 0) {
3471 *exit_status = EXIT_FDS;
3472 return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
3473 }
3474 }
3475
3476 fds_with_exec_fd = newa(int, n_fds + 1);
7e8d494b 3477 memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
5686391b
LP
3478 fds_with_exec_fd[n_fds] = exec_fd;
3479 n_fds_with_exec_fd = n_fds + 1;
3480 } else {
3481 fds_with_exec_fd = fds;
3482 n_fds_with_exec_fd = n_fds;
3483 }
3484
3485 r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
ff0af2a1
LP
3486 if (r >= 0)
3487 r = shift_fds(fds, n_fds);
3488 if (r >= 0)
25b583d7 3489 r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
ff0af2a1
LP
3490 if (r < 0) {
3491 *exit_status = EXIT_FDS;
12145637 3492 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3493 }
e66cf1a3 3494
5686391b
LP
3495 /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
3496 * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
3497 * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
3498 * came this far. */
3499
165a31c0 3500 secure_bits = context->secure_bits;
e66cf1a3 3501
165a31c0
LP
3502 if (needs_sandboxing) {
3503 uint64_t bset;
e66cf1a3 3504
ce932d2d
LP
3505 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly
3506 * requested. (Note this is placed after the general resource limit initialization, see
3507 * above, in order to take precedence.) */
f4170c67
LP
3508 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3509 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3510 *exit_status = EXIT_LIMITS;
12145637 3511 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3512 }
3513 }
3514
37ac2744
JB
3515#if ENABLE_SMACK
3516 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3517 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3518 if (use_smack) {
3519 r = setup_smack(context, command);
3520 if (r < 0) {
3521 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3522 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3523 }
3524 }
3525#endif
3526
165a31c0
LP
3527 bset = context->capability_bounding_set;
3528 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3529 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3530 * instead of us doing that */
3531 if (needs_ambient_hack)
3532 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3533 (UINT64_C(1) << CAP_SETUID) |
3534 (UINT64_C(1) << CAP_SETGID);
3535
3536 if (!cap_test_all(bset)) {
3537 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3538 if (r < 0) {
3539 *exit_status = EXIT_CAPABILITIES;
12145637 3540 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3541 }
4c2630eb 3542 }
3b8bddde 3543
755d4b67
IP
3544 /* This is done before enforce_user, but ambient set
3545 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
3546 if (!needs_ambient_hack &&
3547 context->capability_ambient_set != 0) {
755d4b67
IP
3548 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3549 if (r < 0) {
3550 *exit_status = EXIT_CAPABILITIES;
12145637 3551 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3552 }
755d4b67 3553 }
165a31c0 3554 }
755d4b67 3555
fa97f630
JB
3556 /* chroot to root directory first, before we lose the ability to chroot */
3557 r = apply_root_directory(context, params, needs_mount_namespace, exit_status);
3558 if (r < 0)
3559 return log_unit_error_errno(unit, r, "Chrooting to the requested root directory failed: %m");
3560
165a31c0 3561 if (needs_setuid) {
08f67696 3562 if (uid_is_valid(uid)) {
ff0af2a1
LP
3563 r = enforce_user(context, uid);
3564 if (r < 0) {
3565 *exit_status = EXIT_USER;
12145637 3566 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3567 }
165a31c0
LP
3568
3569 if (!needs_ambient_hack &&
3570 context->capability_ambient_set != 0) {
755d4b67
IP
3571
3572 /* Fix the ambient capabilities after user change. */
3573 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3574 if (r < 0) {
3575 *exit_status = EXIT_CAPABILITIES;
12145637 3576 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3577 }
3578
3579 /* If we were asked to change user and ambient capabilities
3580 * were requested, we had to add keep-caps to the securebits
3581 * so that we would maintain the inherited capability set
3582 * through the setresuid(). Make sure that the bit is added
3583 * also to the context secure_bits so that we don't try to
3584 * drop the bit away next. */
3585
7f508f2c 3586 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3587 }
5b6319dc 3588 }
165a31c0 3589 }
d35fbf6b 3590
56ef8db9
JB
3591 /* Apply working directory here, because the working directory might be on NFS and only the user running
3592 * this service might have the correct privilege to change to the working directory */
fa97f630 3593 r = apply_working_directory(context, params, home, exit_status);
56ef8db9
JB
3594 if (r < 0)
3595 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
3596
165a31c0 3597 if (needs_sandboxing) {
37ac2744 3598 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
5cd9cd35
LP
3599 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3600 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3601 * are restricted. */
3602
349cc4a5 3603#if HAVE_SELINUX
43b1f709 3604 if (use_selinux) {
5cd9cd35
LP
3605 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3606
3607 if (exec_context) {
3608 r = setexeccon(exec_context);
3609 if (r < 0) {
3610 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3611 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3612 }
3613 }
3614 }
3615#endif
3616
349cc4a5 3617#if HAVE_APPARMOR
43b1f709 3618 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3619 r = aa_change_onexec(context->apparmor_profile);
3620 if (r < 0 && !context->apparmor_profile_ignore) {
3621 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3622 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3623 }
3624 }
3625#endif
3626
165a31c0
LP
3627 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3628 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3629 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3630 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3631 *exit_status = EXIT_SECUREBITS;
12145637 3632 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3633 }
5b6319dc 3634
59eeb84b 3635 if (context_has_no_new_privileges(context))
d35fbf6b 3636 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3637 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3638 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3639 }
3640
349cc4a5 3641#if HAVE_SECCOMP
469830d1
LP
3642 r = apply_address_families(unit, context);
3643 if (r < 0) {
3644 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3645 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3646 }
04aa0cb9 3647
469830d1
LP
3648 r = apply_memory_deny_write_execute(unit, context);
3649 if (r < 0) {
3650 *exit_status = EXIT_SECCOMP;
12145637 3651 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3652 }
f4170c67 3653
469830d1
LP
3654 r = apply_restrict_realtime(unit, context);
3655 if (r < 0) {
3656 *exit_status = EXIT_SECCOMP;
12145637 3657 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3658 }
3659
f69567cb
LP
3660 r = apply_restrict_suid_sgid(unit, context);
3661 if (r < 0) {
3662 *exit_status = EXIT_SECCOMP;
3663 return log_unit_error_errno(unit, r, "Failed to apply SUID/SGID restrictions: %m");
3664 }
3665
add00535
LP
3666 r = apply_restrict_namespaces(unit, context);
3667 if (r < 0) {
3668 *exit_status = EXIT_SECCOMP;
12145637 3669 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3670 }
3671
469830d1
LP
3672 r = apply_protect_sysctl(unit, context);
3673 if (r < 0) {
3674 *exit_status = EXIT_SECCOMP;
12145637 3675 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3676 }
3677
469830d1
LP
3678 r = apply_protect_kernel_modules(unit, context);
3679 if (r < 0) {
3680 *exit_status = EXIT_SECCOMP;
12145637 3681 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3682 }
3683
469830d1
LP
3684 r = apply_private_devices(unit, context);
3685 if (r < 0) {
3686 *exit_status = EXIT_SECCOMP;
12145637 3687 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3688 }
3689
3690 r = apply_syscall_archs(unit, context);
3691 if (r < 0) {
3692 *exit_status = EXIT_SECCOMP;
12145637 3693 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3694 }
3695
78e864e5
TM
3696 r = apply_lock_personality(unit, context);
3697 if (r < 0) {
3698 *exit_status = EXIT_SECCOMP;
12145637 3699 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3700 }
3701
5cd9cd35
LP
3702 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3703 * by the filter as little as possible. */
165a31c0 3704 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3705 if (r < 0) {
3706 *exit_status = EXIT_SECCOMP;
12145637 3707 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3708 }
3709#endif
d35fbf6b 3710 }
034c6ed7 3711
00819cc1
LP
3712 if (!strv_isempty(context->unset_environment)) {
3713 char **ee = NULL;
3714
3715 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3716 if (!ee) {
3717 *exit_status = EXIT_MEMORY;
12145637 3718 return log_oom();
00819cc1
LP
3719 }
3720
130d3d22 3721 strv_free_and_replace(accum_env, ee);
00819cc1
LP
3722 }
3723
7ca69792
AZ
3724 if (!FLAGS_SET(command->flags, EXEC_COMMAND_NO_ENV_EXPAND)) {
3725 replaced_argv = replace_env_argv(command->argv, accum_env);
3726 if (!replaced_argv) {
3727 *exit_status = EXIT_MEMORY;
3728 return log_oom();
3729 }
3730 final_argv = replaced_argv;
3731 } else
3732 final_argv = command->argv;
034c6ed7 3733
f1d34068 3734 if (DEBUG_LOGGING) {
d35fbf6b 3735 _cleanup_free_ char *line;
81a2b7ce 3736
d35fbf6b 3737 line = exec_command_line(final_argv);
a1230ff9 3738 if (line)
f2341e0a 3739 log_struct(LOG_DEBUG,
f2341e0a
LP
3740 "EXECUTABLE=%s", command->path,
3741 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3742 LOG_UNIT_ID(unit),
a1230ff9 3743 LOG_UNIT_INVOCATION_ID(unit));
d35fbf6b 3744 }
dd305ec9 3745
5686391b
LP
3746 if (exec_fd >= 0) {
3747 uint8_t hot = 1;
3748
3749 /* We have finished with all our initializations. Let's now let the manager know that. From this point
3750 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
3751
3752 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3753 *exit_status = EXIT_EXEC;
3754 return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
3755 }
3756 }
3757
2065ca69 3758 execve(command->path, final_argv, accum_env);
5686391b
LP
3759 r = -errno;
3760
3761 if (exec_fd >= 0) {
3762 uint8_t hot = 0;
3763
3764 /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
3765 * that POLLHUP on it no longer means execve() succeeded. */
3766
3767 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3768 *exit_status = EXIT_EXEC;
3769 return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
3770 }
3771 }
12145637 3772
5686391b
LP
3773 if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3774 log_struct_errno(LOG_INFO, r,
12145637
LP
3775 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3776 LOG_UNIT_ID(unit),
3777 LOG_UNIT_INVOCATION_ID(unit),
3778 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3779 command->path),
a1230ff9 3780 "EXECUTABLE=%s", command->path);
12145637
LP
3781 return 0;
3782 }
3783
ff0af2a1 3784 *exit_status = EXIT_EXEC;
5686391b 3785 return log_unit_error_errno(unit, r, "Failed to execute command: %m");
d35fbf6b 3786}
81a2b7ce 3787
34cf6c43 3788static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
2caa38e9 3789static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[static 3]);
34cf6c43 3790
f2341e0a
LP
3791int exec_spawn(Unit *unit,
3792 ExecCommand *command,
d35fbf6b
DM
3793 const ExecContext *context,
3794 const ExecParameters *params,
3795 ExecRuntime *runtime,
29206d46 3796 DynamicCreds *dcreds,
d35fbf6b 3797 pid_t *ret) {
8351ceae 3798
ee39ca20 3799 int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
78f93209 3800 _cleanup_free_ char *subcgroup_path = NULL;
d35fbf6b 3801 _cleanup_strv_free_ char **files_env = NULL;
da6053d0 3802 size_t n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1 3803 _cleanup_free_ char *line = NULL;
d35fbf6b 3804 pid_t pid;
8351ceae 3805
f2341e0a 3806 assert(unit);
d35fbf6b
DM
3807 assert(command);
3808 assert(context);
3809 assert(ret);
3810 assert(params);
25b583d7 3811 assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
4298d0b5 3812
d35fbf6b
DM
3813 if (context->std_input == EXEC_INPUT_SOCKET ||
3814 context->std_output == EXEC_OUTPUT_SOCKET ||
3815 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3816
4c47affc 3817 if (params->n_socket_fds > 1) {
f2341e0a 3818 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3819 return -EINVAL;
ff0af2a1 3820 }
eef65bf3 3821
4c47affc 3822 if (params->n_socket_fds == 0) {
488ab41c
AA
3823 log_unit_error(unit, "Got no socket.");
3824 return -EINVAL;
3825 }
3826
d35fbf6b
DM
3827 socket_fd = params->fds[0];
3828 } else {
3829 socket_fd = -1;
3830 fds = params->fds;
9b141911 3831 n_socket_fds = params->n_socket_fds;
25b583d7 3832 n_storage_fds = params->n_storage_fds;
d35fbf6b 3833 }
94f04347 3834
34cf6c43 3835 r = exec_context_named_iofds(context, params, named_iofds);
52c239d7
LB
3836 if (r < 0)
3837 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3838
f2341e0a 3839 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3840 if (r < 0)
f2341e0a 3841 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3842
ee39ca20 3843 line = exec_command_line(command->argv);
d35fbf6b
DM
3844 if (!line)
3845 return log_oom();
fab56fc5 3846
f2341e0a 3847 log_struct(LOG_DEBUG,
f2341e0a
LP
3848 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3849 "EXECUTABLE=%s", command->path,
ba360bb0 3850 LOG_UNIT_ID(unit),
a1230ff9 3851 LOG_UNIT_INVOCATION_ID(unit));
12145637 3852
78f93209
LP
3853 if (params->cgroup_path) {
3854 r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
3855 if (r < 0)
3856 return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
3857 if (r > 0) { /* We are using a child cgroup */
3858 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
3859 if (r < 0)
3860 return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
3861 }
3862 }
3863
d35fbf6b
DM
3864 pid = fork();
3865 if (pid < 0)
74129a12 3866 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3867
3868 if (pid == 0) {
12145637 3869 int exit_status = EXIT_SUCCESS;
ff0af2a1 3870
f2341e0a
LP
3871 r = exec_child(unit,
3872 command,
ff0af2a1
LP
3873 context,
3874 params,
3875 runtime,
29206d46 3876 dcreds,
ff0af2a1 3877 socket_fd,
52c239d7 3878 named_iofds,
4c47affc 3879 fds,
9b141911 3880 n_socket_fds,
25b583d7 3881 n_storage_fds,
ff0af2a1 3882 files_env,
00d9ef85 3883 unit->manager->user_lookup_fds[1],
12145637
LP
3884 &exit_status);
3885
e1714f02
ZJS
3886 if (r < 0) {
3887 const char *status =
3888 exit_status_to_string(exit_status,
e04ed6db 3889 EXIT_STATUS_LIBC | EXIT_STATUS_SYSTEMD);
e1714f02 3890
12145637
LP
3891 log_struct_errno(LOG_ERR, r,
3892 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3893 LOG_UNIT_ID(unit),
3894 LOG_UNIT_INVOCATION_ID(unit),
3895 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
e1714f02 3896 status, command->path),
a1230ff9 3897 "EXECUTABLE=%s", command->path);
e1714f02 3898 }
4c2630eb 3899
ff0af2a1 3900 _exit(exit_status);
034c6ed7
LP
3901 }
3902
f2341e0a 3903 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3904
78f93209
LP
3905 /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
3906 * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
3907 * process will be killed too). */
3908 if (subcgroup_path)
3909 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
2da3263a 3910
b58b4116 3911 exec_status_start(&command->exec_status, pid);
9fb86720 3912
034c6ed7 3913 *ret = pid;
5cb5a6ff
LP
3914 return 0;
3915}
3916
034c6ed7 3917void exec_context_init(ExecContext *c) {
3536f49e
YW
3918 ExecDirectoryType i;
3919
034c6ed7
LP
3920 assert(c);
3921
4c12626c 3922 c->umask = 0022;
9eba9da4 3923 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3924 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3925 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3926 c->syslog_level_prefix = true;
353e12c2 3927 c->ignore_sigpipe = true;
3a43da28 3928 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3929 c->personality = PERSONALITY_INVALID;
72fd1768 3930 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3931 c->directories[i].mode = 0755;
12213aed 3932 c->timeout_clean_usec = USEC_INFINITY;
a103496c 3933 c->capability_bounding_set = CAP_ALL;
aa9d574d
YW
3934 assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
3935 c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
d3070fbd 3936 c->log_level_max = -1;
b070c7c0 3937 numa_policy_reset(&c->numa_policy);
034c6ed7
LP
3938}
3939
613b411c 3940void exec_context_done(ExecContext *c) {
3536f49e 3941 ExecDirectoryType i;
d3070fbd 3942 size_t l;
5cb5a6ff
LP
3943
3944 assert(c);
3945
6796073e
LP
3946 c->environment = strv_free(c->environment);
3947 c->environment_files = strv_free(c->environment_files);
b4c14404 3948 c->pass_environment = strv_free(c->pass_environment);
00819cc1 3949 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 3950
31ce987c 3951 rlimit_free_all(c->rlimit);
034c6ed7 3952
2038c3f5 3953 for (l = 0; l < 3; l++) {
52c239d7 3954 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
2038c3f5
LP
3955 c->stdio_file[l] = mfree(c->stdio_file[l]);
3956 }
52c239d7 3957
a1e58e8e
LP
3958 c->working_directory = mfree(c->working_directory);
3959 c->root_directory = mfree(c->root_directory);
915e6d16 3960 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3961 c->tty_path = mfree(c->tty_path);
3962 c->syslog_identifier = mfree(c->syslog_identifier);
3963 c->user = mfree(c->user);
3964 c->group = mfree(c->group);
034c6ed7 3965
6796073e 3966 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3967
a1e58e8e 3968 c->pam_name = mfree(c->pam_name);
5b6319dc 3969
2a624c36
AP
3970 c->read_only_paths = strv_free(c->read_only_paths);
3971 c->read_write_paths = strv_free(c->read_write_paths);
3972 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3973
d2d6c096 3974 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
8e06d57c
YW
3975 c->bind_mounts = NULL;
3976 c->n_bind_mounts = 0;
2abd4e38
YW
3977 temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
3978 c->temporary_filesystems = NULL;
3979 c->n_temporary_filesystems = 0;
d2d6c096 3980
0985c7c4 3981 cpu_set_reset(&c->cpu_set);
b070c7c0 3982 numa_policy_reset(&c->numa_policy);
86a3475b 3983
a1e58e8e
LP
3984 c->utmp_id = mfree(c->utmp_id);
3985 c->selinux_context = mfree(c->selinux_context);
3986 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3987 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3988
8cfa775f 3989 c->syscall_filter = hashmap_free(c->syscall_filter);
525d3cc7
LP
3990 c->syscall_archs = set_free(c->syscall_archs);
3991 c->address_families = set_free(c->address_families);
e66cf1a3 3992
72fd1768 3993 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3994 c->directories[i].paths = strv_free(c->directories[i].paths);
d3070fbd
LP
3995
3996 c->log_level_max = -1;
3997
3998 exec_context_free_log_extra_fields(c);
08f3be7a 3999
5ac1530e
ZJS
4000 c->log_ratelimit_interval_usec = 0;
4001 c->log_ratelimit_burst = 0;
90fc172e 4002
08f3be7a
LP
4003 c->stdin_data = mfree(c->stdin_data);
4004 c->stdin_data_size = 0;
a8d08f39
LP
4005
4006 c->network_namespace_path = mfree(c->network_namespace_path);
e66cf1a3
LP
4007}
4008
34cf6c43 4009int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
e66cf1a3
LP
4010 char **i;
4011
4012 assert(c);
4013
4014 if (!runtime_prefix)
4015 return 0;
4016
3536f49e 4017 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
4018 _cleanup_free_ char *p;
4019
494d0247
YW
4020 if (exec_directory_is_private(c, EXEC_DIRECTORY_RUNTIME))
4021 p = path_join(runtime_prefix, "private", *i);
4022 else
4023 p = path_join(runtime_prefix, *i);
e66cf1a3
LP
4024 if (!p)
4025 return -ENOMEM;
4026
7bc4bf4a
LP
4027 /* We execute this synchronously, since we need to be sure this is gone when we start the
4028 * service next. */
c6878637 4029 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
4030 }
4031
4032 return 0;
5cb5a6ff
LP
4033}
4034
34cf6c43 4035static void exec_command_done(ExecCommand *c) {
43d0fcbd
LP
4036 assert(c);
4037
a1e58e8e 4038 c->path = mfree(c->path);
6796073e 4039 c->argv = strv_free(c->argv);
43d0fcbd
LP
4040}
4041
da6053d0
LP
4042void exec_command_done_array(ExecCommand *c, size_t n) {
4043 size_t i;
43d0fcbd
LP
4044
4045 for (i = 0; i < n; i++)
4046 exec_command_done(c+i);
4047}
4048
f1acf85a 4049ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
4050 ExecCommand *i;
4051
4052 while ((i = c)) {
71fda00f 4053 LIST_REMOVE(command, c, i);
43d0fcbd 4054 exec_command_done(i);
5cb5a6ff
LP
4055 free(i);
4056 }
f1acf85a
ZJS
4057
4058 return NULL;
5cb5a6ff
LP
4059}
4060
da6053d0
LP
4061void exec_command_free_array(ExecCommand **c, size_t n) {
4062 size_t i;
034c6ed7 4063
f1acf85a
ZJS
4064 for (i = 0; i < n; i++)
4065 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
4066}
4067
6a1d4d9f
LP
4068void exec_command_reset_status_array(ExecCommand *c, size_t n) {
4069 size_t i;
4070
4071 for (i = 0; i < n; i++)
4072 exec_status_reset(&c[i].exec_status);
4073}
4074
4075void exec_command_reset_status_list_array(ExecCommand **c, size_t n) {
4076 size_t i;
4077
4078 for (i = 0; i < n; i++) {
4079 ExecCommand *z;
4080
4081 LIST_FOREACH(command, z, c[i])
4082 exec_status_reset(&z->exec_status);
4083 }
4084}
4085
039f0e70 4086typedef struct InvalidEnvInfo {
34cf6c43 4087 const Unit *unit;
039f0e70
LP
4088 const char *path;
4089} InvalidEnvInfo;
4090
4091static void invalid_env(const char *p, void *userdata) {
4092 InvalidEnvInfo *info = userdata;
4093
f2341e0a 4094 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
4095}
4096
52c239d7
LB
4097const char* exec_context_fdname(const ExecContext *c, int fd_index) {
4098 assert(c);
4099
4100 switch (fd_index) {
5073ff6b 4101
52c239d7
LB
4102 case STDIN_FILENO:
4103 if (c->std_input != EXEC_INPUT_NAMED_FD)
4104 return NULL;
5073ff6b 4105
52c239d7 4106 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
5073ff6b 4107
52c239d7
LB
4108 case STDOUT_FILENO:
4109 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
4110 return NULL;
5073ff6b 4111
52c239d7 4112 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
5073ff6b 4113
52c239d7
LB
4114 case STDERR_FILENO:
4115 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
4116 return NULL;
5073ff6b 4117
52c239d7 4118 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
5073ff6b 4119
52c239d7
LB
4120 default:
4121 return NULL;
4122 }
4123}
4124
2caa38e9
LP
4125static int exec_context_named_iofds(
4126 const ExecContext *c,
4127 const ExecParameters *p,
4128 int named_iofds[static 3]) {
4129
da6053d0 4130 size_t i, targets;
56fbd561 4131 const char* stdio_fdname[3];
da6053d0 4132 size_t n_fds;
52c239d7
LB
4133
4134 assert(c);
4135 assert(p);
2caa38e9 4136 assert(named_iofds);
52c239d7
LB
4137
4138 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
4139 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
4140 (c->std_error == EXEC_OUTPUT_NAMED_FD);
4141
4142 for (i = 0; i < 3; i++)
4143 stdio_fdname[i] = exec_context_fdname(c, i);
4144
4c47affc
FB
4145 n_fds = p->n_storage_fds + p->n_socket_fds;
4146
4147 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
4148 if (named_iofds[STDIN_FILENO] < 0 &&
4149 c->std_input == EXEC_INPUT_NAMED_FD &&
4150 stdio_fdname[STDIN_FILENO] &&
4151 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
4152
52c239d7
LB
4153 named_iofds[STDIN_FILENO] = p->fds[i];
4154 targets--;
56fbd561
ZJS
4155
4156 } else if (named_iofds[STDOUT_FILENO] < 0 &&
4157 c->std_output == EXEC_OUTPUT_NAMED_FD &&
4158 stdio_fdname[STDOUT_FILENO] &&
4159 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
4160
52c239d7
LB
4161 named_iofds[STDOUT_FILENO] = p->fds[i];
4162 targets--;
56fbd561
ZJS
4163
4164 } else if (named_iofds[STDERR_FILENO] < 0 &&
4165 c->std_error == EXEC_OUTPUT_NAMED_FD &&
4166 stdio_fdname[STDERR_FILENO] &&
4167 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
4168
52c239d7
LB
4169 named_iofds[STDERR_FILENO] = p->fds[i];
4170 targets--;
4171 }
4172
56fbd561 4173 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
4174}
4175
34cf6c43 4176static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
4177 char **i, **r = NULL;
4178
4179 assert(c);
4180 assert(l);
4181
4182 STRV_FOREACH(i, c->environment_files) {
4183 char *fn;
52511fae
ZJS
4184 int k;
4185 unsigned n;
8c7be95e
LP
4186 bool ignore = false;
4187 char **p;
7fd1b19b 4188 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
4189
4190 fn = *i;
4191
4192 if (fn[0] == '-') {
4193 ignore = true;
313cefa1 4194 fn++;
8c7be95e
LP
4195 }
4196
4197 if (!path_is_absolute(fn)) {
8c7be95e
LP
4198 if (ignore)
4199 continue;
4200
4201 strv_free(r);
4202 return -EINVAL;
4203 }
4204
2bef10ab 4205 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
4206 k = safe_glob(fn, 0, &pglob);
4207 if (k < 0) {
2bef10ab
PL
4208 if (ignore)
4209 continue;
8c7be95e 4210
2bef10ab 4211 strv_free(r);
d8c92e8b 4212 return k;
2bef10ab 4213 }
8c7be95e 4214
d8c92e8b
ZJS
4215 /* When we don't match anything, -ENOENT should be returned */
4216 assert(pglob.gl_pathc > 0);
4217
4218 for (n = 0; n < pglob.gl_pathc; n++) {
aa8fbc74 4219 k = load_env_file(NULL, pglob.gl_pathv[n], &p);
2bef10ab
PL
4220 if (k < 0) {
4221 if (ignore)
4222 continue;
8c7be95e 4223
2bef10ab 4224 strv_free(r);
2bef10ab 4225 return k;
e9c1ea9d 4226 }
ebc05a09 4227 /* Log invalid environment variables with filename */
039f0e70
LP
4228 if (p) {
4229 InvalidEnvInfo info = {
f2341e0a 4230 .unit = unit,
039f0e70
LP
4231 .path = pglob.gl_pathv[n]
4232 };
4233
4234 p = strv_env_clean_with_callback(p, invalid_env, &info);
4235 }
8c7be95e 4236
234519ae 4237 if (!r)
2bef10ab
PL
4238 r = p;
4239 else {
4240 char **m;
8c7be95e 4241
2bef10ab
PL
4242 m = strv_env_merge(2, r, p);
4243 strv_free(r);
4244 strv_free(p);
c84a9488 4245 if (!m)
2bef10ab 4246 return -ENOMEM;
2bef10ab
PL
4247
4248 r = m;
4249 }
8c7be95e
LP
4250 }
4251 }
4252
4253 *l = r;
4254
4255 return 0;
4256}
4257
6ac8fdc9 4258static bool tty_may_match_dev_console(const char *tty) {
7b912648 4259 _cleanup_free_ char *resolved = NULL;
6ac8fdc9 4260
1e22b5cd
LP
4261 if (!tty)
4262 return true;
4263
a119ec7c 4264 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
4265
4266 /* trivial identity? */
4267 if (streq(tty, "console"))
4268 return true;
4269
7b912648
LP
4270 if (resolve_dev_console(&resolved) < 0)
4271 return true; /* if we could not resolve, assume it may */
6ac8fdc9
MS
4272
4273 /* "tty0" means the active VC, so it may be the same sometimes */
955f1c85 4274 return path_equal(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
4275}
4276
6c0ae739
LP
4277static bool exec_context_may_touch_tty(const ExecContext *ec) {
4278 assert(ec);
1e22b5cd 4279
6c0ae739 4280 return ec->tty_reset ||
1e22b5cd
LP
4281 ec->tty_vhangup ||
4282 ec->tty_vt_disallocate ||
6ac8fdc9
MS
4283 is_terminal_input(ec->std_input) ||
4284 is_terminal_output(ec->std_output) ||
6c0ae739
LP
4285 is_terminal_output(ec->std_error);
4286}
4287
4288bool exec_context_may_touch_console(const ExecContext *ec) {
4289
4290 return exec_context_may_touch_tty(ec) &&
1e22b5cd 4291 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
4292}
4293
15ae422b
LP
4294static void strv_fprintf(FILE *f, char **l) {
4295 char **g;
4296
4297 assert(f);
4298
4299 STRV_FOREACH(g, l)
4300 fprintf(f, " %s", *g);
4301}
4302
34cf6c43 4303void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
12213aed 4304 char **e, **d, buf_clean[FORMAT_TIMESPAN_MAX];
d3070fbd 4305 ExecDirectoryType dt;
94f04347 4306 unsigned i;
add00535 4307 int r;
9eba9da4 4308
5cb5a6ff
LP
4309 assert(c);
4310 assert(f);
4311
4ad49000 4312 prefix = strempty(prefix);
5cb5a6ff
LP
4313
4314 fprintf(f,
94f04347
LP
4315 "%sUMask: %04o\n"
4316 "%sWorkingDirectory: %s\n"
451a074f 4317 "%sRootDirectory: %s\n"
15ae422b 4318 "%sNonBlocking: %s\n"
64747e2d 4319 "%sPrivateTmp: %s\n"
7f112f50 4320 "%sPrivateDevices: %s\n"
59eeb84b 4321 "%sProtectKernelTunables: %s\n"
e66a2f65 4322 "%sProtectKernelModules: %s\n"
59eeb84b 4323 "%sProtectControlGroups: %s\n"
d251207d
LP
4324 "%sPrivateNetwork: %s\n"
4325 "%sPrivateUsers: %s\n"
1b8689f9
LP
4326 "%sProtectHome: %s\n"
4327 "%sProtectSystem: %s\n"
5d997827 4328 "%sMountAPIVFS: %s\n"
f3e43635 4329 "%sIgnoreSIGPIPE: %s\n"
f4170c67 4330 "%sMemoryDenyWriteExecute: %s\n"
b1edf445 4331 "%sRestrictRealtime: %s\n"
f69567cb 4332 "%sRestrictSUIDSGID: %s\n"
aecd5ac6
TM
4333 "%sKeyringMode: %s\n"
4334 "%sProtectHostname: %s\n",
5cb5a6ff 4335 prefix, c->umask,
9eba9da4 4336 prefix, c->working_directory ? c->working_directory : "/",
451a074f 4337 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 4338 prefix, yes_no(c->non_blocking),
64747e2d 4339 prefix, yes_no(c->private_tmp),
7f112f50 4340 prefix, yes_no(c->private_devices),
59eeb84b 4341 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 4342 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 4343 prefix, yes_no(c->protect_control_groups),
d251207d
LP
4344 prefix, yes_no(c->private_network),
4345 prefix, yes_no(c->private_users),
1b8689f9
LP
4346 prefix, protect_home_to_string(c->protect_home),
4347 prefix, protect_system_to_string(c->protect_system),
5d997827 4348 prefix, yes_no(c->mount_apivfs),
f3e43635 4349 prefix, yes_no(c->ignore_sigpipe),
f4170c67 4350 prefix, yes_no(c->memory_deny_write_execute),
b1edf445 4351 prefix, yes_no(c->restrict_realtime),
f69567cb 4352 prefix, yes_no(c->restrict_suid_sgid),
aecd5ac6
TM
4353 prefix, exec_keyring_mode_to_string(c->keyring_mode),
4354 prefix, yes_no(c->protect_hostname));
fb33a393 4355
915e6d16
LP
4356 if (c->root_image)
4357 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
4358
8c7be95e
LP
4359 STRV_FOREACH(e, c->environment)
4360 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
4361
4362 STRV_FOREACH(e, c->environment_files)
4363 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 4364
b4c14404
FB
4365 STRV_FOREACH(e, c->pass_environment)
4366 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
4367
00819cc1
LP
4368 STRV_FOREACH(e, c->unset_environment)
4369 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
4370
53f47dfc
YW
4371 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
4372
72fd1768 4373 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
4374 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
4375
4376 STRV_FOREACH(d, c->directories[dt].paths)
4377 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
4378 }
c2bbd90b 4379
12213aed
YW
4380 fprintf(f,
4381 "%sTimeoutCleanSec: %s\n",
4382 prefix, format_timespan(buf_clean, sizeof(buf_clean), c->timeout_clean_usec, USEC_PER_SEC));
4383
fb33a393
LP
4384 if (c->nice_set)
4385 fprintf(f,
4386 "%sNice: %i\n",
4387 prefix, c->nice);
4388
dd6c17b1 4389 if (c->oom_score_adjust_set)
fb33a393 4390 fprintf(f,
dd6c17b1
LP
4391 "%sOOMScoreAdjust: %i\n",
4392 prefix, c->oom_score_adjust);
9eba9da4 4393
94f04347 4394 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d 4395 if (c->rlimit[i]) {
4c3a2b84 4396 fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
3c11da9d 4397 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
4c3a2b84 4398 fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n",
3c11da9d
EV
4399 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
4400 }
94f04347 4401
f8b69d1d 4402 if (c->ioprio_set) {
1756a011 4403 _cleanup_free_ char *class_str = NULL;
f8b69d1d 4404
837df140
YW
4405 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
4406 if (r >= 0)
4407 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
4408
4409 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 4410 }
94f04347 4411
f8b69d1d 4412 if (c->cpu_sched_set) {
1756a011 4413 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 4414
837df140
YW
4415 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4416 if (r >= 0)
4417 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4418
94f04347 4419 fprintf(f,
38b48754
LP
4420 "%sCPUSchedulingPriority: %i\n"
4421 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
4422 prefix, c->cpu_sched_priority,
4423 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 4424 }
94f04347 4425
0985c7c4 4426 if (c->cpu_set.set) {
e7fca352
MS
4427 _cleanup_free_ char *affinity = NULL;
4428
4429 affinity = cpu_set_to_range_string(&c->cpu_set);
4430 fprintf(f, "%sCPUAffinity: %s\n", prefix, affinity);
94f04347
LP
4431 }
4432
b070c7c0
MS
4433 if (mpol_is_valid(numa_policy_get_type(&c->numa_policy))) {
4434 _cleanup_free_ char *nodes = NULL;
4435
4436 nodes = cpu_set_to_range_string(&c->numa_policy.nodes);
4437 fprintf(f, "%sNUMAPolicy: %s\n", prefix, mpol_to_string(numa_policy_get_type(&c->numa_policy)));
4438 fprintf(f, "%sNUMAMask: %s\n", prefix, strnull(nodes));
4439 }
4440
3a43da28 4441 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 4442 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
4443
4444 fprintf(f,
80876c20
LP
4445 "%sStandardInput: %s\n"
4446 "%sStandardOutput: %s\n"
4447 "%sStandardError: %s\n",
4448 prefix, exec_input_to_string(c->std_input),
4449 prefix, exec_output_to_string(c->std_output),
4450 prefix, exec_output_to_string(c->std_error));
4451
befc4a80
LP
4452 if (c->std_input == EXEC_INPUT_NAMED_FD)
4453 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4454 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4455 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4456 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4457 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4458
4459 if (c->std_input == EXEC_INPUT_FILE)
4460 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4461 if (c->std_output == EXEC_OUTPUT_FILE)
4462 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
566b7d23
ZD
4463 if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
4464 fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
befc4a80
LP
4465 if (c->std_error == EXEC_OUTPUT_FILE)
4466 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
566b7d23
ZD
4467 if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
4468 fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
befc4a80 4469
80876c20
LP
4470 if (c->tty_path)
4471 fprintf(f,
6ea832a2
LP
4472 "%sTTYPath: %s\n"
4473 "%sTTYReset: %s\n"
4474 "%sTTYVHangup: %s\n"
4475 "%sTTYVTDisallocate: %s\n",
4476 prefix, c->tty_path,
4477 prefix, yes_no(c->tty_reset),
4478 prefix, yes_no(c->tty_vhangup),
4479 prefix, yes_no(c->tty_vt_disallocate));
94f04347 4480
9f6444eb
LP
4481 if (IN_SET(c->std_output,
4482 EXEC_OUTPUT_SYSLOG,
4483 EXEC_OUTPUT_KMSG,
4484 EXEC_OUTPUT_JOURNAL,
4485 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4486 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4487 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4488 IN_SET(c->std_error,
4489 EXEC_OUTPUT_SYSLOG,
4490 EXEC_OUTPUT_KMSG,
4491 EXEC_OUTPUT_JOURNAL,
4492 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4493 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4494 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 4495
5ce70e5b 4496 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 4497
837df140
YW
4498 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4499 if (r >= 0)
4500 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 4501
837df140
YW
4502 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4503 if (r >= 0)
4504 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 4505 }
94f04347 4506
d3070fbd
LP
4507 if (c->log_level_max >= 0) {
4508 _cleanup_free_ char *t = NULL;
4509
4510 (void) log_level_to_string_alloc(c->log_level_max, &t);
4511
4512 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4513 }
4514
5ac1530e 4515 if (c->log_ratelimit_interval_usec > 0) {
90fc172e
AZ
4516 char buf_timespan[FORMAT_TIMESPAN_MAX];
4517
4518 fprintf(f,
4519 "%sLogRateLimitIntervalSec: %s\n",
5ac1530e 4520 prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_ratelimit_interval_usec, USEC_PER_SEC));
90fc172e
AZ
4521 }
4522
5ac1530e
ZJS
4523 if (c->log_ratelimit_burst > 0)
4524 fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_ratelimit_burst);
90fc172e 4525
d3070fbd
LP
4526 if (c->n_log_extra_fields > 0) {
4527 size_t j;
4528
4529 for (j = 0; j < c->n_log_extra_fields; j++) {
4530 fprintf(f, "%sLogExtraFields: ", prefix);
4531 fwrite(c->log_extra_fields[j].iov_base,
4532 1, c->log_extra_fields[j].iov_len,
4533 f);
4534 fputc('\n', f);
4535 }
4536 }
4537
07d46372
YW
4538 if (c->secure_bits) {
4539 _cleanup_free_ char *str = NULL;
4540
4541 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4542 if (r >= 0)
4543 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4544 }
94f04347 4545
a103496c 4546 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4547 _cleanup_free_ char *str = NULL;
94f04347 4548
dd1f5bd0
YW
4549 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4550 if (r >= 0)
4551 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4552 }
4553
4554 if (c->capability_ambient_set != 0) {
dd1f5bd0 4555 _cleanup_free_ char *str = NULL;
755d4b67 4556
dd1f5bd0
YW
4557 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4558 if (r >= 0)
4559 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4560 }
4561
4562 if (c->user)
f2d3769a 4563 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4564 if (c->group)
f2d3769a 4565 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4566
29206d46
LP
4567 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4568
ac6e8be6 4569 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4570 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4571 strv_fprintf(f, c->supplementary_groups);
4572 fputs("\n", f);
4573 }
94f04347 4574
5b6319dc 4575 if (c->pam_name)
f2d3769a 4576 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4577
58629001 4578 if (!strv_isempty(c->read_write_paths)) {
2a624c36
AP
4579 fprintf(f, "%sReadWritePaths:", prefix);
4580 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4581 fputs("\n", f);
4582 }
4583
58629001 4584 if (!strv_isempty(c->read_only_paths)) {
2a624c36
AP
4585 fprintf(f, "%sReadOnlyPaths:", prefix);
4586 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4587 fputs("\n", f);
4588 }
94f04347 4589
58629001 4590 if (!strv_isempty(c->inaccessible_paths)) {
2a624c36
AP
4591 fprintf(f, "%sInaccessiblePaths:", prefix);
4592 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4593 fputs("\n", f);
4594 }
2e22afe9 4595
d2d6c096 4596 if (c->n_bind_mounts > 0)
4ca763a9
YW
4597 for (i = 0; i < c->n_bind_mounts; i++)
4598 fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
d2d6c096 4599 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4ca763a9 4600 c->bind_mounts[i].ignore_enoent ? "-": "",
d2d6c096
LP
4601 c->bind_mounts[i].source,
4602 c->bind_mounts[i].destination,
4603 c->bind_mounts[i].recursive ? "rbind" : "norbind");
d2d6c096 4604
2abd4e38
YW
4605 if (c->n_temporary_filesystems > 0)
4606 for (i = 0; i < c->n_temporary_filesystems; i++) {
4607 TemporaryFileSystem *t = c->temporary_filesystems + i;
4608
4609 fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
4610 t->path,
4611 isempty(t->options) ? "" : ":",
4612 strempty(t->options));
4613 }
4614
169c1bda
LP
4615 if (c->utmp_id)
4616 fprintf(f,
4617 "%sUtmpIdentifier: %s\n",
4618 prefix, c->utmp_id);
7b52a628
MS
4619
4620 if (c->selinux_context)
4621 fprintf(f,
5f8640fb
LP
4622 "%sSELinuxContext: %s%s\n",
4623 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4624
80c21aea
WC
4625 if (c->apparmor_profile)
4626 fprintf(f,
4627 "%sAppArmorProfile: %s%s\n",
4628 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4629
4630 if (c->smack_process_label)
4631 fprintf(f,
4632 "%sSmackProcessLabel: %s%s\n",
4633 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4634
050f7277 4635 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4636 fprintf(f,
4637 "%sPersonality: %s\n",
4638 prefix, strna(personality_to_string(c->personality)));
4639
78e864e5
TM
4640 fprintf(f,
4641 "%sLockPersonality: %s\n",
4642 prefix, yes_no(c->lock_personality));
4643
17df7223 4644 if (c->syscall_filter) {
349cc4a5 4645#if HAVE_SECCOMP
17df7223 4646 Iterator j;
8cfa775f 4647 void *id, *val;
17df7223 4648 bool first = true;
351a19b1 4649#endif
17df7223
LP
4650
4651 fprintf(f,
57183d11 4652 "%sSystemCallFilter: ",
17df7223
LP
4653 prefix);
4654
4655 if (!c->syscall_whitelist)
4656 fputc('~', f);
4657
349cc4a5 4658#if HAVE_SECCOMP
8cfa775f 4659 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
17df7223 4660 _cleanup_free_ char *name = NULL;
8cfa775f
YW
4661 const char *errno_name = NULL;
4662 int num = PTR_TO_INT(val);
17df7223
LP
4663
4664 if (first)
4665 first = false;
4666 else
4667 fputc(' ', f);
4668
57183d11 4669 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223 4670 fputs(strna(name), f);
8cfa775f
YW
4671
4672 if (num >= 0) {
4673 errno_name = errno_to_name(num);
4674 if (errno_name)
4675 fprintf(f, ":%s", errno_name);
4676 else
4677 fprintf(f, ":%d", num);
4678 }
17df7223 4679 }
351a19b1 4680#endif
17df7223
LP
4681
4682 fputc('\n', f);
4683 }
4684
57183d11 4685 if (c->syscall_archs) {
349cc4a5 4686#if HAVE_SECCOMP
57183d11
LP
4687 Iterator j;
4688 void *id;
4689#endif
4690
4691 fprintf(f,
4692 "%sSystemCallArchitectures:",
4693 prefix);
4694
349cc4a5 4695#if HAVE_SECCOMP
57183d11
LP
4696 SET_FOREACH(id, c->syscall_archs, j)
4697 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4698#endif
4699 fputc('\n', f);
4700 }
4701
add00535
LP
4702 if (exec_context_restrict_namespaces_set(c)) {
4703 _cleanup_free_ char *s = NULL;
4704
86c2a9f1 4705 r = namespace_flags_to_string(c->restrict_namespaces, &s);
add00535
LP
4706 if (r >= 0)
4707 fprintf(f, "%sRestrictNamespaces: %s\n",
4708 prefix, s);
4709 }
4710
a8d08f39
LP
4711 if (c->network_namespace_path)
4712 fprintf(f,
4713 "%sNetworkNamespacePath: %s\n",
4714 prefix, c->network_namespace_path);
4715
3df90f24
YW
4716 if (c->syscall_errno > 0) {
4717 const char *errno_name;
4718
4719 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4720
4721 errno_name = errno_to_name(c->syscall_errno);
4722 if (errno_name)
4723 fprintf(f, "%s\n", errno_name);
4724 else
4725 fprintf(f, "%d\n", c->syscall_errno);
4726 }
5cb5a6ff
LP
4727}
4728
34cf6c43 4729bool exec_context_maintains_privileges(const ExecContext *c) {
a931ad47
LP
4730 assert(c);
4731
61233823 4732 /* Returns true if the process forked off would run under
a931ad47
LP
4733 * an unchanged UID or as root. */
4734
4735 if (!c->user)
4736 return true;
4737
4738 if (streq(c->user, "root") || streq(c->user, "0"))
4739 return true;
4740
4741 return false;
4742}
4743
34cf6c43 4744int exec_context_get_effective_ioprio(const ExecContext *c) {
7f452159
LP
4745 int p;
4746
4747 assert(c);
4748
4749 if (c->ioprio_set)
4750 return c->ioprio;
4751
4752 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4753 if (p < 0)
4754 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4755
4756 return p;
4757}
4758
d3070fbd
LP
4759void exec_context_free_log_extra_fields(ExecContext *c) {
4760 size_t l;
4761
4762 assert(c);
4763
4764 for (l = 0; l < c->n_log_extra_fields; l++)
4765 free(c->log_extra_fields[l].iov_base);
4766 c->log_extra_fields = mfree(c->log_extra_fields);
4767 c->n_log_extra_fields = 0;
4768}
4769
6f765baf
LP
4770void exec_context_revert_tty(ExecContext *c) {
4771 int r;
4772
4773 assert(c);
4774
4775 /* First, reset the TTY (possibly kicking everybody else from the TTY) */
4776 exec_context_tty_reset(c, NULL);
4777
4778 /* And then undo what chown_terminal() did earlier. Note that we only do this if we have a path
4779 * configured. If the TTY was passed to us as file descriptor we assume the TTY is opened and managed
4780 * by whoever passed it to us and thus knows better when and how to chmod()/chown() it back. */
4781
4782 if (exec_context_may_touch_tty(c)) {
4783 const char *path;
4784
4785 path = exec_context_tty_path(c);
4786 if (path) {
4787 r = chmod_and_chown(path, TTY_MODE, 0, TTY_GID);
4788 if (r < 0 && r != -ENOENT)
4789 log_warning_errno(r, "Failed to reset TTY ownership/access mode of %s, ignoring: %m", path);
4790 }
4791 }
4792}
4793
4c2f5842
LP
4794int exec_context_get_clean_directories(
4795 ExecContext *c,
4796 char **prefix,
4797 ExecCleanMask mask,
4798 char ***ret) {
4799
4800 _cleanup_strv_free_ char **l = NULL;
4801 ExecDirectoryType t;
4802 int r;
4803
4804 assert(c);
4805 assert(prefix);
4806 assert(ret);
4807
4808 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
4809 char **i;
4810
4811 if (!FLAGS_SET(mask, 1U << t))
4812 continue;
4813
4814 if (!prefix[t])
4815 continue;
4816
4817 STRV_FOREACH(i, c->directories[t].paths) {
4818 char *j;
4819
4820 j = path_join(prefix[t], *i);
4821 if (!j)
4822 return -ENOMEM;
4823
4824 r = strv_consume(&l, j);
4825 if (r < 0)
4826 return r;
7f622a19
YW
4827
4828 /* Also remove private directories unconditionally. */
4829 if (t != EXEC_DIRECTORY_CONFIGURATION) {
4830 j = path_join(prefix[t], "private", *i);
4831 if (!j)
4832 return -ENOMEM;
4833
4834 r = strv_consume(&l, j);
4835 if (r < 0)
4836 return r;
4837 }
4c2f5842
LP
4838 }
4839 }
4840
4841 *ret = TAKE_PTR(l);
4842 return 0;
4843}
4844
4845int exec_context_get_clean_mask(ExecContext *c, ExecCleanMask *ret) {
4846 ExecCleanMask mask = 0;
4847
4848 assert(c);
4849 assert(ret);
4850
4851 for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++)
4852 if (!strv_isempty(c->directories[t].paths))
4853 mask |= 1U << t;
4854
4855 *ret = mask;
4856 return 0;
4857}
4858
b58b4116 4859void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 4860 assert(s);
5cb5a6ff 4861
2ed26ed0
LP
4862 *s = (ExecStatus) {
4863 .pid = pid,
4864 };
4865
b58b4116
LP
4866 dual_timestamp_get(&s->start_timestamp);
4867}
4868
34cf6c43 4869void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
4870 assert(s);
4871
2ed26ed0
LP
4872 if (s->pid != pid) {
4873 *s = (ExecStatus) {
4874 .pid = pid,
4875 };
4876 }
b58b4116 4877
63983207 4878 dual_timestamp_get(&s->exit_timestamp);
9fb86720 4879
034c6ed7
LP
4880 s->code = code;
4881 s->status = status;
169c1bda 4882
6f765baf
LP
4883 if (context && context->utmp_id)
4884 (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
9fb86720
LP
4885}
4886
6a1d4d9f
LP
4887void exec_status_reset(ExecStatus *s) {
4888 assert(s);
4889
4890 *s = (ExecStatus) {};
4891}
4892
34cf6c43 4893void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
9fb86720
LP
4894 char buf[FORMAT_TIMESTAMP_MAX];
4895
4896 assert(s);
4897 assert(f);
4898
9fb86720
LP
4899 if (s->pid <= 0)
4900 return;
4901
4c940960
LP
4902 prefix = strempty(prefix);
4903
9fb86720 4904 fprintf(f,
ccd06097
ZJS
4905 "%sPID: "PID_FMT"\n",
4906 prefix, s->pid);
9fb86720 4907
af9d16e1 4908 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
4909 fprintf(f,
4910 "%sStart Timestamp: %s\n",
63983207 4911 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 4912
af9d16e1 4913 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
4914 fprintf(f,
4915 "%sExit Timestamp: %s\n"
4916 "%sExit Code: %s\n"
4917 "%sExit Status: %i\n",
63983207 4918 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
4919 prefix, sigchld_code_to_string(s->code),
4920 prefix, s->status);
5cb5a6ff 4921}
44d8db9e 4922
34cf6c43 4923static char *exec_command_line(char **argv) {
44d8db9e
LP
4924 size_t k;
4925 char *n, *p, **a;
4926 bool first = true;
4927
9e2f7c11 4928 assert(argv);
44d8db9e 4929
9164977d 4930 k = 1;
9e2f7c11 4931 STRV_FOREACH(a, argv)
44d8db9e
LP
4932 k += strlen(*a)+3;
4933
5cd9cd35
LP
4934 n = new(char, k);
4935 if (!n)
44d8db9e
LP
4936 return NULL;
4937
4938 p = n;
9e2f7c11 4939 STRV_FOREACH(a, argv) {
44d8db9e
LP
4940
4941 if (!first)
4942 *(p++) = ' ';
4943 else
4944 first = false;
4945
4946 if (strpbrk(*a, WHITESPACE)) {
4947 *(p++) = '\'';
4948 p = stpcpy(p, *a);
4949 *(p++) = '\'';
4950 } else
4951 p = stpcpy(p, *a);
4952
4953 }
4954
9164977d
LP
4955 *p = 0;
4956
44d8db9e
LP
4957 /* FIXME: this doesn't really handle arguments that have
4958 * spaces and ticks in them */
4959
4960 return n;
4961}
4962
34cf6c43 4963static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 4964 _cleanup_free_ char *cmd = NULL;
4c940960 4965 const char *prefix2;
44d8db9e
LP
4966
4967 assert(c);
4968 assert(f);
4969
4c940960 4970 prefix = strempty(prefix);
63c372cb 4971 prefix2 = strjoina(prefix, "\t");
44d8db9e 4972
9e2f7c11 4973 cmd = exec_command_line(c->argv);
44d8db9e
LP
4974 fprintf(f,
4975 "%sCommand Line: %s\n",
4bbccb02 4976 prefix, cmd ? cmd : strerror_safe(ENOMEM));
44d8db9e 4977
9fb86720 4978 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
4979}
4980
4981void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4982 assert(f);
4983
4c940960 4984 prefix = strempty(prefix);
44d8db9e
LP
4985
4986 LIST_FOREACH(command, c, c)
4987 exec_command_dump(c, f, prefix);
4988}
94f04347 4989
a6a80b4f
LP
4990void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4991 ExecCommand *end;
4992
4993 assert(l);
4994 assert(e);
4995
4996 if (*l) {
35b8ca3a 4997 /* It's kind of important, that we keep the order here */
71fda00f
LP
4998 LIST_FIND_TAIL(command, *l, end);
4999 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
5000 } else
5001 *l = e;
5002}
5003
26fd040d
LP
5004int exec_command_set(ExecCommand *c, const char *path, ...) {
5005 va_list ap;
5006 char **l, *p;
5007
5008 assert(c);
5009 assert(path);
5010
5011 va_start(ap, path);
5012 l = strv_new_ap(path, ap);
5013 va_end(ap);
5014
5015 if (!l)
5016 return -ENOMEM;
5017
250a918d
LP
5018 p = strdup(path);
5019 if (!p) {
26fd040d
LP
5020 strv_free(l);
5021 return -ENOMEM;
5022 }
5023
6897dfe8 5024 free_and_replace(c->path, p);
26fd040d 5025
130d3d22 5026 return strv_free_and_replace(c->argv, l);
26fd040d
LP
5027}
5028
86b23b07 5029int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 5030 _cleanup_strv_free_ char **l = NULL;
86b23b07 5031 va_list ap;
86b23b07
JS
5032 int r;
5033
5034 assert(c);
5035 assert(path);
5036
5037 va_start(ap, path);
5038 l = strv_new_ap(path, ap);
5039 va_end(ap);
5040
5041 if (!l)
5042 return -ENOMEM;
5043
e287086b 5044 r = strv_extend_strv(&c->argv, l, false);
e63ff941 5045 if (r < 0)
86b23b07 5046 return r;
86b23b07
JS
5047
5048 return 0;
5049}
5050
e8a565cb
YW
5051static void *remove_tmpdir_thread(void *p) {
5052 _cleanup_free_ char *path = p;
86b23b07 5053
e8a565cb
YW
5054 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
5055 return NULL;
5056}
5057
5058static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
5059 int r;
5060
5061 if (!rt)
5062 return NULL;
5063
5064 if (rt->manager)
5065 (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
5066
5067 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
5068 if (destroy && rt->tmp_dir) {
5069 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
5070
5071 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
5072 if (r < 0) {
5073 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
5074 free(rt->tmp_dir);
5075 }
5076
5077 rt->tmp_dir = NULL;
5078 }
613b411c 5079
e8a565cb
YW
5080 if (destroy && rt->var_tmp_dir) {
5081 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
5082
5083 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
5084 if (r < 0) {
5085 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
5086 free(rt->var_tmp_dir);
5087 }
5088
5089 rt->var_tmp_dir = NULL;
5090 }
5091
5092 rt->id = mfree(rt->id);
5093 rt->tmp_dir = mfree(rt->tmp_dir);
5094 rt->var_tmp_dir = mfree(rt->var_tmp_dir);
5095 safe_close_pair(rt->netns_storage_socket);
5096 return mfree(rt);
5097}
5098
5099static void exec_runtime_freep(ExecRuntime **rt) {
da6bc6ed 5100 (void) exec_runtime_free(*rt, false);
e8a565cb
YW
5101}
5102
8e8009dc
LP
5103static int exec_runtime_allocate(ExecRuntime **ret) {
5104 ExecRuntime *n;
613b411c 5105
8e8009dc 5106 assert(ret);
613b411c 5107
8e8009dc
LP
5108 n = new(ExecRuntime, 1);
5109 if (!n)
613b411c
LP
5110 return -ENOMEM;
5111
8e8009dc
LP
5112 *n = (ExecRuntime) {
5113 .netns_storage_socket = { -1, -1 },
5114 };
5115
5116 *ret = n;
613b411c
LP
5117 return 0;
5118}
5119
e8a565cb
YW
5120static int exec_runtime_add(
5121 Manager *m,
5122 const char *id,
5123 const char *tmp_dir,
5124 const char *var_tmp_dir,
5125 const int netns_storage_socket[2],
5126 ExecRuntime **ret) {
5127
5128 _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
613b411c
LP
5129 int r;
5130
e8a565cb 5131 assert(m);
613b411c
LP
5132 assert(id);
5133
e8a565cb
YW
5134 r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
5135 if (r < 0)
5136 return r;
613b411c 5137
e8a565cb 5138 r = exec_runtime_allocate(&rt);
613b411c
LP
5139 if (r < 0)
5140 return r;
5141
e8a565cb
YW
5142 rt->id = strdup(id);
5143 if (!rt->id)
5144 return -ENOMEM;
5145
5146 if (tmp_dir) {
5147 rt->tmp_dir = strdup(tmp_dir);
5148 if (!rt->tmp_dir)
5149 return -ENOMEM;
5150
5151 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
5152 assert(var_tmp_dir);
5153 rt->var_tmp_dir = strdup(var_tmp_dir);
5154 if (!rt->var_tmp_dir)
5155 return -ENOMEM;
5156 }
5157
5158 if (netns_storage_socket) {
5159 rt->netns_storage_socket[0] = netns_storage_socket[0];
5160 rt->netns_storage_socket[1] = netns_storage_socket[1];
613b411c
LP
5161 }
5162
e8a565cb
YW
5163 r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
5164 if (r < 0)
5165 return r;
5166
5167 rt->manager = m;
5168
5169 if (ret)
5170 *ret = rt;
5171
5172 /* do not remove created ExecRuntime object when the operation succeeds. */
5173 rt = NULL;
5174 return 0;
5175}
5176
5177static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
5178 _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
2fa3742d 5179 _cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
e8a565cb
YW
5180 int r;
5181
5182 assert(m);
5183 assert(c);
5184 assert(id);
5185
5186 /* It is not necessary to create ExecRuntime object. */
a8d08f39 5187 if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
e8a565cb
YW
5188 return 0;
5189
5190 if (c->private_tmp) {
5191 r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
613b411c
LP
5192 if (r < 0)
5193 return r;
5194 }
5195
a8d08f39 5196 if (c->private_network || c->network_namespace_path) {
e8a565cb
YW
5197 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
5198 return -errno;
5199 }
5200
5201 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
5202 if (r < 0)
5203 return r;
5204
5205 /* Avoid cleanup */
2fa3742d 5206 netns_storage_socket[0] = netns_storage_socket[1] = -1;
613b411c
LP
5207 return 1;
5208}
5209
e8a565cb
YW
5210int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
5211 ExecRuntime *rt;
5212 int r;
613b411c 5213
e8a565cb
YW
5214 assert(m);
5215 assert(id);
5216 assert(ret);
5217
5218 rt = hashmap_get(m->exec_runtime_by_id, id);
5219 if (rt)
5220 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
5221 goto ref;
5222
5223 if (!create)
5224 return 0;
5225
5226 /* If not found, then create a new object. */
5227 r = exec_runtime_make(m, c, id, &rt);
5228 if (r <= 0)
5229 /* When r == 0, it is not necessary to create ExecRuntime object. */
5230 return r;
613b411c 5231
e8a565cb
YW
5232ref:
5233 /* increment reference counter. */
5234 rt->n_ref++;
5235 *ret = rt;
5236 return 1;
5237}
613b411c 5238
e8a565cb
YW
5239ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
5240 if (!rt)
613b411c
LP
5241 return NULL;
5242
e8a565cb 5243 assert(rt->n_ref > 0);
613b411c 5244
e8a565cb
YW
5245 rt->n_ref--;
5246 if (rt->n_ref > 0)
f2341e0a
LP
5247 return NULL;
5248
e8a565cb 5249 return exec_runtime_free(rt, destroy);
613b411c
LP
5250}
5251
e8a565cb
YW
5252int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
5253 ExecRuntime *rt;
5254 Iterator i;
5255
5256 assert(m);
613b411c
LP
5257 assert(f);
5258 assert(fds);
5259
e8a565cb
YW
5260 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5261 fprintf(f, "exec-runtime=%s", rt->id);
613b411c 5262
e8a565cb
YW
5263 if (rt->tmp_dir)
5264 fprintf(f, " tmp-dir=%s", rt->tmp_dir);
613b411c 5265
e8a565cb
YW
5266 if (rt->var_tmp_dir)
5267 fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
613b411c 5268
e8a565cb
YW
5269 if (rt->netns_storage_socket[0] >= 0) {
5270 int copy;
613b411c 5271
e8a565cb
YW
5272 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
5273 if (copy < 0)
5274 return copy;
613b411c 5275
e8a565cb
YW
5276 fprintf(f, " netns-socket-0=%i", copy);
5277 }
613b411c 5278
e8a565cb
YW
5279 if (rt->netns_storage_socket[1] >= 0) {
5280 int copy;
613b411c 5281
e8a565cb
YW
5282 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
5283 if (copy < 0)
5284 return copy;
613b411c 5285
e8a565cb
YW
5286 fprintf(f, " netns-socket-1=%i", copy);
5287 }
5288
5289 fputc('\n', f);
613b411c
LP
5290 }
5291
5292 return 0;
5293}
5294
e8a565cb
YW
5295int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
5296 _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
5297 ExecRuntime *rt;
613b411c
LP
5298 int r;
5299
e8a565cb
YW
5300 /* This is for the migration from old (v237 or earlier) deserialization text.
5301 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
5302 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
5303 * so or not from the serialized text, then we always creates a new object owned by this. */
5304
5305 assert(u);
613b411c
LP
5306 assert(key);
5307 assert(value);
5308
e8a565cb
YW
5309 /* Manager manages ExecRuntime objects by the unit id.
5310 * So, we omit the serialized text when the unit does not have id (yet?)... */
5311 if (isempty(u->id)) {
5312 log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
5313 return 0;
5314 }
613b411c 5315
e8a565cb
YW
5316 r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
5317 if (r < 0) {
5318 log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
5319 return 0;
5320 }
5321
5322 rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
5323 if (!rt) {
5324 r = exec_runtime_allocate(&rt_create);
613b411c 5325 if (r < 0)
f2341e0a 5326 return log_oom();
613b411c 5327
e8a565cb
YW
5328 rt_create->id = strdup(u->id);
5329 if (!rt_create->id)
5330 return log_oom();
5331
5332 rt = rt_create;
5333 }
5334
5335 if (streq(key, "tmp-dir")) {
5336 char *copy;
5337
613b411c
LP
5338 copy = strdup(value);
5339 if (!copy)
5340 return log_oom();
5341
e8a565cb 5342 free_and_replace(rt->tmp_dir, copy);
613b411c
LP
5343
5344 } else if (streq(key, "var-tmp-dir")) {
5345 char *copy;
5346
613b411c
LP
5347 copy = strdup(value);
5348 if (!copy)
5349 return log_oom();
5350
e8a565cb 5351 free_and_replace(rt->var_tmp_dir, copy);
613b411c
LP
5352
5353 } else if (streq(key, "netns-socket-0")) {
5354 int fd;
5355
e8a565cb 5356 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5357 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5358 return 0;
613b411c 5359 }
e8a565cb
YW
5360
5361 safe_close(rt->netns_storage_socket[0]);
5362 rt->netns_storage_socket[0] = fdset_remove(fds, fd);
5363
613b411c
LP
5364 } else if (streq(key, "netns-socket-1")) {
5365 int fd;
5366
e8a565cb 5367 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5368 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5369 return 0;
613b411c 5370 }
e8a565cb
YW
5371
5372 safe_close(rt->netns_storage_socket[1]);
5373 rt->netns_storage_socket[1] = fdset_remove(fds, fd);
613b411c
LP
5374 } else
5375 return 0;
5376
e8a565cb
YW
5377 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
5378 if (rt_create) {
5379 r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
5380 if (r < 0) {
3fe91079 5381 log_unit_debug_errno(u, r, "Failed to put runtime parameter to manager's storage: %m");
e8a565cb
YW
5382 return 0;
5383 }
613b411c 5384
e8a565cb 5385 rt_create->manager = u->manager;
613b411c 5386
e8a565cb
YW
5387 /* Avoid cleanup */
5388 rt_create = NULL;
5389 }
98b47d54 5390
e8a565cb
YW
5391 return 1;
5392}
613b411c 5393
e8a565cb
YW
5394void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
5395 char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
5396 int r, fd0 = -1, fd1 = -1;
5397 const char *p, *v = value;
5398 size_t n;
613b411c 5399
e8a565cb
YW
5400 assert(m);
5401 assert(value);
5402 assert(fds);
98b47d54 5403
e8a565cb
YW
5404 n = strcspn(v, " ");
5405 id = strndupa(v, n);
5406 if (v[n] != ' ')
5407 goto finalize;
5408 p = v + n + 1;
5409
5410 v = startswith(p, "tmp-dir=");
5411 if (v) {
5412 n = strcspn(v, " ");
5413 tmp_dir = strndupa(v, n);
5414 if (v[n] != ' ')
5415 goto finalize;
5416 p = v + n + 1;
5417 }
5418
5419 v = startswith(p, "var-tmp-dir=");
5420 if (v) {
5421 n = strcspn(v, " ");
5422 var_tmp_dir = strndupa(v, n);
5423 if (v[n] != ' ')
5424 goto finalize;
5425 p = v + n + 1;
5426 }
5427
5428 v = startswith(p, "netns-socket-0=");
5429 if (v) {
5430 char *buf;
5431
5432 n = strcspn(v, " ");
5433 buf = strndupa(v, n);
5434 if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
5435 log_debug("Unable to process exec-runtime netns fd specification.");
5436 return;
98b47d54 5437 }
e8a565cb
YW
5438 fd0 = fdset_remove(fds, fd0);
5439 if (v[n] != ' ')
5440 goto finalize;
5441 p = v + n + 1;
613b411c
LP
5442 }
5443
e8a565cb
YW
5444 v = startswith(p, "netns-socket-1=");
5445 if (v) {
5446 char *buf;
98b47d54 5447
e8a565cb
YW
5448 n = strcspn(v, " ");
5449 buf = strndupa(v, n);
5450 if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
5451 log_debug("Unable to process exec-runtime netns fd specification.");
5452 return;
98b47d54 5453 }
e8a565cb
YW
5454 fd1 = fdset_remove(fds, fd1);
5455 }
98b47d54 5456
e8a565cb
YW
5457finalize:
5458
5459 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
7d853ca6 5460 if (r < 0)
e8a565cb 5461 log_debug_errno(r, "Failed to add exec-runtime: %m");
e8a565cb 5462}
613b411c 5463
e8a565cb
YW
5464void exec_runtime_vacuum(Manager *m) {
5465 ExecRuntime *rt;
5466 Iterator i;
5467
5468 assert(m);
5469
5470 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
5471
5472 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5473 if (rt->n_ref > 0)
5474 continue;
5475
5476 (void) exec_runtime_free(rt, false);
5477 }
613b411c
LP
5478}
5479
b9c04eaf
YW
5480void exec_params_clear(ExecParameters *p) {
5481 if (!p)
5482 return;
5483
5484 strv_free(p->environment);
5485}
5486
80876c20
LP
5487static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
5488 [EXEC_INPUT_NULL] = "null",
5489 [EXEC_INPUT_TTY] = "tty",
5490 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 5491 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
5492 [EXEC_INPUT_SOCKET] = "socket",
5493 [EXEC_INPUT_NAMED_FD] = "fd",
08f3be7a 5494 [EXEC_INPUT_DATA] = "data",
2038c3f5 5495 [EXEC_INPUT_FILE] = "file",
80876c20
LP
5496};
5497
8a0867d6
LP
5498DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
5499
94f04347 5500static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 5501 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 5502 [EXEC_OUTPUT_NULL] = "null",
80876c20 5503 [EXEC_OUTPUT_TTY] = "tty",
94f04347 5504 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 5505 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 5506 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 5507 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
5508 [EXEC_OUTPUT_JOURNAL] = "journal",
5509 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
5510 [EXEC_OUTPUT_SOCKET] = "socket",
5511 [EXEC_OUTPUT_NAMED_FD] = "fd",
2038c3f5 5512 [EXEC_OUTPUT_FILE] = "file",
566b7d23 5513 [EXEC_OUTPUT_FILE_APPEND] = "append",
94f04347
LP
5514};
5515
5516DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
5517
5518static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
5519 [EXEC_UTMP_INIT] = "init",
5520 [EXEC_UTMP_LOGIN] = "login",
5521 [EXEC_UTMP_USER] = "user",
5522};
5523
5524DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
5525
5526static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
5527 [EXEC_PRESERVE_NO] = "no",
5528 [EXEC_PRESERVE_YES] = "yes",
5529 [EXEC_PRESERVE_RESTART] = "restart",
5530};
5531
5532DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 5533
6b7b2ed9 5534/* This table maps ExecDirectoryType to the setting it is configured with in the unit */
72fd1768 5535static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
5536 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
5537 [EXEC_DIRECTORY_STATE] = "StateDirectory",
5538 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
5539 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
5540 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
5541};
5542
5543DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445 5544
6b7b2ed9
LP
5545/* And this table maps ExecDirectoryType too, but to a generic term identifying the type of resource. This
5546 * one is supposed to be generic enough to be used for unit types that don't use ExecContext and per-unit
5547 * directories, specifically .timer units with their timestamp touch file. */
5548static const char* const exec_resource_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5549 [EXEC_DIRECTORY_RUNTIME] = "runtime",
5550 [EXEC_DIRECTORY_STATE] = "state",
5551 [EXEC_DIRECTORY_CACHE] = "cache",
5552 [EXEC_DIRECTORY_LOGS] = "logs",
5553 [EXEC_DIRECTORY_CONFIGURATION] = "configuration",
5554};
5555
5556DEFINE_STRING_TABLE_LOOKUP(exec_resource_type, ExecDirectoryType);
5557
5558/* And this table also maps ExecDirectoryType, to the environment variable we pass the selected directory to
5559 * the service payload in. */
fb2042dd
YW
5560static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5561 [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
5562 [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
5563 [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
5564 [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
5565 [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
5566};
5567
5568DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType);
5569
b1edf445
LP
5570static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
5571 [EXEC_KEYRING_INHERIT] = "inherit",
5572 [EXEC_KEYRING_PRIVATE] = "private",
5573 [EXEC_KEYRING_SHARED] = "shared",
5574};
5575
5576DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);