]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
shared/exit-status: turn status level into a bitmask, add "test"
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
a7334b09 2
034c6ed7
LP
3#include <errno.h>
4#include <fcntl.h>
8dd4c05b
LP
5#include <glob.h>
6#include <grp.h>
7#include <poll.h>
309bff19 8#include <signal.h>
8dd4c05b 9#include <string.h>
19c0b0b9 10#include <sys/capability.h>
d251207d 11#include <sys/eventfd.h>
f3e43635 12#include <sys/mman.h>
8dd4c05b 13#include <sys/personality.h>
94f04347 14#include <sys/prctl.h>
d2ffa389 15#include <sys/shm.h>
8dd4c05b 16#include <sys/socket.h>
451a074f 17#include <sys/stat.h>
d2ffa389 18#include <sys/types.h>
8dd4c05b
LP
19#include <sys/un.h>
20#include <unistd.h>
023a4f67 21#include <utmpx.h>
5cb5a6ff 22
349cc4a5 23#if HAVE_PAM
5b6319dc
LP
24#include <security/pam_appl.h>
25#endif
26
349cc4a5 27#if HAVE_SELINUX
7b52a628
MS
28#include <selinux/selinux.h>
29#endif
30
349cc4a5 31#if HAVE_SECCOMP
17df7223
LP
32#include <seccomp.h>
33#endif
34
349cc4a5 35#if HAVE_APPARMOR
eef65bf3
MS
36#include <sys/apparmor.h>
37#endif
38
24882e06 39#include "sd-messages.h"
8dd4c05b
LP
40
41#include "af-list.h"
b5efdb8a 42#include "alloc-util.h"
349cc4a5 43#if HAVE_APPARMOR
3ffd4af2
LP
44#include "apparmor-util.h"
45#endif
8dd4c05b
LP
46#include "async.h"
47#include "barrier.h"
8dd4c05b 48#include "cap-list.h"
430f0182 49#include "capability-util.h"
a1164ae3 50#include "chown-recursive.h"
da681e1b 51#include "cpu-set-util.h"
f6a6225e 52#include "def.h"
686d13b9 53#include "env-file.h"
4d1a6904 54#include "env-util.h"
17df7223 55#include "errno-list.h"
3ffd4af2 56#include "execute.h"
8dd4c05b 57#include "exit-status.h"
3ffd4af2 58#include "fd-util.h"
f97b34a6 59#include "format-util.h"
f4f15635 60#include "fs-util.h"
7d50b32a 61#include "glob-util.h"
c004493c 62#include "io-util.h"
8dd4c05b 63#include "ioprio.h"
a1164ae3 64#include "label.h"
8dd4c05b
LP
65#include "log.h"
66#include "macro.h"
e8a565cb 67#include "manager.h"
0a970718 68#include "memory-util.h"
8dd4c05b
LP
69#include "missing.h"
70#include "mkdir.h"
71#include "namespace.h"
6bedfcbb 72#include "parse-util.h"
8dd4c05b 73#include "path-util.h"
0b452006 74#include "process-util.h"
78f22b97 75#include "rlimit-util.h"
8dd4c05b 76#include "rm-rf.h"
349cc4a5 77#if HAVE_SECCOMP
3ffd4af2
LP
78#include "seccomp-util.h"
79#endif
07d46372 80#include "securebits-util.h"
8dd4c05b 81#include "selinux-util.h"
24882e06 82#include "signal-util.h"
8dd4c05b 83#include "smack-util.h"
57b7a260 84#include "socket-util.h"
fd63e712 85#include "special.h"
949befd3 86#include "stat-util.h"
8b43440b 87#include "string-table.h"
07630cea 88#include "string-util.h"
8dd4c05b 89#include "strv.h"
7ccbd1ae 90#include "syslog-util.h"
8dd4c05b 91#include "terminal-util.h"
566b7d23 92#include "umask-util.h"
8dd4c05b 93#include "unit.h"
b1d4f8e1 94#include "user-util.h"
8dd4c05b 95#include "utmp-wtmp.h"
5cb5a6ff 96
e056b01d 97#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 98#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 99
531dca78
LP
100#define SNDBUF_SIZE (8*1024*1024)
101
da6053d0 102static int shift_fds(int fds[], size_t n_fds) {
034c6ed7
LP
103 int start, restart_from;
104
105 if (n_fds <= 0)
106 return 0;
107
a0d40ac5
LP
108 /* Modifies the fds array! (sorts it) */
109
034c6ed7
LP
110 assert(fds);
111
112 start = 0;
113 for (;;) {
114 int i;
115
116 restart_from = -1;
117
118 for (i = start; i < (int) n_fds; i++) {
119 int nfd;
120
121 /* Already at right index? */
122 if (fds[i] == i+3)
123 continue;
124
3cc2aff1
LP
125 nfd = fcntl(fds[i], F_DUPFD, i + 3);
126 if (nfd < 0)
034c6ed7
LP
127 return -errno;
128
03e334a1 129 safe_close(fds[i]);
034c6ed7
LP
130 fds[i] = nfd;
131
132 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 133 * let's remember that and try again from here */
034c6ed7
LP
134 if (nfd != i+3 && restart_from < 0)
135 restart_from = i;
136 }
137
138 if (restart_from < 0)
139 break;
140
141 start = restart_from;
142 }
143
144 return 0;
145}
146
25b583d7 147static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
da6053d0 148 size_t i, n_fds;
e2c76839 149 int r;
47a71eed 150
25b583d7 151 n_fds = n_socket_fds + n_storage_fds;
47a71eed
LP
152 if (n_fds <= 0)
153 return 0;
154
155 assert(fds);
156
9b141911
FB
157 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
158 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
159
160 for (i = 0; i < n_fds; i++) {
47a71eed 161
9b141911
FB
162 if (i < n_socket_fds) {
163 r = fd_nonblock(fds[i], nonblock);
164 if (r < 0)
165 return r;
166 }
47a71eed 167
451a074f
LP
168 /* We unconditionally drop FD_CLOEXEC from the fds,
169 * since after all we want to pass these fds to our
170 * children */
47a71eed 171
3cc2aff1
LP
172 r = fd_cloexec(fds[i], false);
173 if (r < 0)
e2c76839 174 return r;
47a71eed
LP
175 }
176
177 return 0;
178}
179
1e22b5cd 180static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
181 assert(context);
182
1e22b5cd
LP
183 if (context->stdio_as_fds)
184 return NULL;
185
80876c20
LP
186 if (context->tty_path)
187 return context->tty_path;
188
189 return "/dev/console";
190}
191
1e22b5cd
LP
192static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
193 const char *path;
194
6ea832a2
LP
195 assert(context);
196
1e22b5cd 197 path = exec_context_tty_path(context);
6ea832a2 198
1e22b5cd
LP
199 if (context->tty_vhangup) {
200 if (p && p->stdin_fd >= 0)
201 (void) terminal_vhangup_fd(p->stdin_fd);
202 else if (path)
203 (void) terminal_vhangup(path);
204 }
6ea832a2 205
1e22b5cd
LP
206 if (context->tty_reset) {
207 if (p && p->stdin_fd >= 0)
208 (void) reset_terminal_fd(p->stdin_fd, true);
209 else if (path)
210 (void) reset_terminal(path);
211 }
212
213 if (context->tty_vt_disallocate && path)
214 (void) vt_disallocate(path);
6ea832a2
LP
215}
216
6af760f3
LP
217static bool is_terminal_input(ExecInput i) {
218 return IN_SET(i,
219 EXEC_INPUT_TTY,
220 EXEC_INPUT_TTY_FORCE,
221 EXEC_INPUT_TTY_FAIL);
222}
223
3a1286b6 224static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
225 return IN_SET(o,
226 EXEC_OUTPUT_TTY,
227 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
228 EXEC_OUTPUT_KMSG_AND_CONSOLE,
229 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
230}
231
aac8c0c3
LP
232static bool is_syslog_output(ExecOutput o) {
233 return IN_SET(o,
234 EXEC_OUTPUT_SYSLOG,
235 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
236}
237
238static bool is_kmsg_output(ExecOutput o) {
239 return IN_SET(o,
240 EXEC_OUTPUT_KMSG,
241 EXEC_OUTPUT_KMSG_AND_CONSOLE);
242}
243
6af760f3
LP
244static bool exec_context_needs_term(const ExecContext *c) {
245 assert(c);
246
247 /* Return true if the execution context suggests we should set $TERM to something useful. */
248
249 if (is_terminal_input(c->std_input))
250 return true;
251
252 if (is_terminal_output(c->std_output))
253 return true;
254
255 if (is_terminal_output(c->std_error))
256 return true;
257
258 return !!c->tty_path;
3a1286b6
MS
259}
260
80876c20 261static int open_null_as(int flags, int nfd) {
046a82c1 262 int fd;
071830ff 263
80876c20 264 assert(nfd >= 0);
071830ff 265
613b411c
LP
266 fd = open("/dev/null", flags|O_NOCTTY);
267 if (fd < 0)
071830ff
LP
268 return -errno;
269
046a82c1 270 return move_fd(fd, nfd, false);
071830ff
LP
271}
272
524daa8c 273static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 274 static const union sockaddr_union sa = {
b92bea5d
ZJS
275 .un.sun_family = AF_UNIX,
276 .un.sun_path = "/run/systemd/journal/stdout",
277 };
524daa8c
ZJS
278 uid_t olduid = UID_INVALID;
279 gid_t oldgid = GID_INVALID;
280 int r;
281
cad93f29 282 if (gid_is_valid(gid)) {
524daa8c
ZJS
283 oldgid = getgid();
284
92a17af9 285 if (setegid(gid) < 0)
524daa8c
ZJS
286 return -errno;
287 }
288
cad93f29 289 if (uid_is_valid(uid)) {
524daa8c
ZJS
290 olduid = getuid();
291
92a17af9 292 if (seteuid(uid) < 0) {
524daa8c
ZJS
293 r = -errno;
294 goto restore_gid;
295 }
296 }
297
92a17af9 298 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
299
300 /* If we fail to restore the uid or gid, things will likely
301 fail later on. This should only happen if an LSM interferes. */
302
cad93f29 303 if (uid_is_valid(uid))
524daa8c
ZJS
304 (void) seteuid(olduid);
305
306 restore_gid:
cad93f29 307 if (gid_is_valid(gid))
524daa8c
ZJS
308 (void) setegid(oldgid);
309
310 return r;
311}
312
fd1f9c89 313static int connect_logger_as(
34cf6c43 314 const Unit *unit,
fd1f9c89 315 const ExecContext *context,
af635cf3 316 const ExecParameters *params,
fd1f9c89
LP
317 ExecOutput output,
318 const char *ident,
fd1f9c89
LP
319 int nfd,
320 uid_t uid,
321 gid_t gid) {
322
2ac1ff68
EV
323 _cleanup_close_ int fd = -1;
324 int r;
071830ff
LP
325
326 assert(context);
af635cf3 327 assert(params);
80876c20
LP
328 assert(output < _EXEC_OUTPUT_MAX);
329 assert(ident);
330 assert(nfd >= 0);
071830ff 331
54fe0cdb
LP
332 fd = socket(AF_UNIX, SOCK_STREAM, 0);
333 if (fd < 0)
80876c20 334 return -errno;
071830ff 335
524daa8c
ZJS
336 r = connect_journal_socket(fd, uid, gid);
337 if (r < 0)
338 return r;
071830ff 339
2ac1ff68 340 if (shutdown(fd, SHUT_RD) < 0)
80876c20 341 return -errno;
071830ff 342
fd1f9c89 343 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 344
2ac1ff68 345 if (dprintf(fd,
62bca2c6 346 "%s\n"
80876c20
LP
347 "%s\n"
348 "%i\n"
54fe0cdb
LP
349 "%i\n"
350 "%i\n"
351 "%i\n"
4f4a1dbf 352 "%i\n",
c867611e 353 context->syslog_identifier ?: ident,
af635cf3 354 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
355 context->syslog_priority,
356 !!context->syslog_level_prefix,
aac8c0c3
LP
357 is_syslog_output(output),
358 is_kmsg_output(output),
2ac1ff68
EV
359 is_terminal_output(output)) < 0)
360 return -errno;
80876c20 361
2ac1ff68 362 return move_fd(TAKE_FD(fd), nfd, false);
80876c20 363}
2ac1ff68 364
3a274a21 365static int open_terminal_as(const char *path, int flags, int nfd) {
046a82c1 366 int fd;
071830ff 367
80876c20
LP
368 assert(path);
369 assert(nfd >= 0);
fd1f9c89 370
3a274a21 371 fd = open_terminal(path, flags | O_NOCTTY);
3cc2aff1 372 if (fd < 0)
80876c20 373 return fd;
071830ff 374
046a82c1 375 return move_fd(fd, nfd, false);
80876c20 376}
071830ff 377
2038c3f5 378static int acquire_path(const char *path, int flags, mode_t mode) {
15a3e96f
LP
379 union sockaddr_union sa = {};
380 _cleanup_close_ int fd = -1;
381 int r, salen;
071830ff 382
80876c20 383 assert(path);
071830ff 384
2038c3f5
LP
385 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
386 flags |= O_CREAT;
387
388 fd = open(path, flags|O_NOCTTY, mode);
389 if (fd >= 0)
15a3e96f 390 return TAKE_FD(fd);
071830ff 391
2038c3f5
LP
392 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
393 return -errno;
15a3e96f 394 if (strlen(path) >= sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
2038c3f5
LP
395 return -ENXIO;
396
397 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
398
399 fd = socket(AF_UNIX, SOCK_STREAM, 0);
400 if (fd < 0)
401 return -errno;
402
15a3e96f
LP
403 salen = sockaddr_un_set_path(&sa.un, path);
404 if (salen < 0)
405 return salen;
406
407 if (connect(fd, &sa.sa, salen) < 0)
2038c3f5
LP
408 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
409 * indication that his wasn't an AF_UNIX socket after all */
071830ff 410
2038c3f5
LP
411 if ((flags & O_ACCMODE) == O_RDONLY)
412 r = shutdown(fd, SHUT_WR);
413 else if ((flags & O_ACCMODE) == O_WRONLY)
414 r = shutdown(fd, SHUT_RD);
415 else
15a3e96f
LP
416 return TAKE_FD(fd);
417 if (r < 0)
2038c3f5 418 return -errno;
2038c3f5 419
15a3e96f 420 return TAKE_FD(fd);
80876c20 421}
071830ff 422
08f3be7a
LP
423static int fixup_input(
424 const ExecContext *context,
425 int socket_fd,
426 bool apply_tty_stdin) {
427
428 ExecInput std_input;
429
430 assert(context);
431
432 std_input = context->std_input;
1e3ad081
LP
433
434 if (is_terminal_input(std_input) && !apply_tty_stdin)
435 return EXEC_INPUT_NULL;
071830ff 436
03fd9c49 437 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
438 return EXEC_INPUT_NULL;
439
08f3be7a
LP
440 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
441 return EXEC_INPUT_NULL;
442
03fd9c49 443 return std_input;
4f2d528d
LP
444}
445
03fd9c49 446static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 447
03fd9c49 448 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
449 return EXEC_OUTPUT_INHERIT;
450
03fd9c49 451 return std_output;
4f2d528d
LP
452}
453
a34ceba6
LP
454static int setup_input(
455 const ExecContext *context,
456 const ExecParameters *params,
52c239d7 457 int socket_fd,
2caa38e9 458 const int named_iofds[static 3]) {
a34ceba6 459
4f2d528d
LP
460 ExecInput i;
461
462 assert(context);
a34ceba6 463 assert(params);
2caa38e9 464 assert(named_iofds);
a34ceba6
LP
465
466 if (params->stdin_fd >= 0) {
467 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
468 return -errno;
469
470 /* Try to make this the controlling tty, if it is a tty, and reset it */
1fb0682e
LP
471 if (isatty(STDIN_FILENO)) {
472 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
473 (void) reset_terminal_fd(STDIN_FILENO, true);
474 }
a34ceba6
LP
475
476 return STDIN_FILENO;
477 }
4f2d528d 478
08f3be7a 479 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
480
481 switch (i) {
071830ff 482
80876c20
LP
483 case EXEC_INPUT_NULL:
484 return open_null_as(O_RDONLY, STDIN_FILENO);
485
486 case EXEC_INPUT_TTY:
487 case EXEC_INPUT_TTY_FORCE:
488 case EXEC_INPUT_TTY_FAIL: {
046a82c1 489 int fd;
071830ff 490
1e22b5cd 491 fd = acquire_terminal(exec_context_tty_path(context),
8854d795
LP
492 i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
493 i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
494 ACQUIRE_TERMINAL_WAIT,
3a43da28 495 USEC_INFINITY);
970edce6 496 if (fd < 0)
80876c20
LP
497 return fd;
498
046a82c1 499 return move_fd(fd, STDIN_FILENO, false);
80876c20
LP
500 }
501
4f2d528d 502 case EXEC_INPUT_SOCKET:
e75a9ed1
LP
503 assert(socket_fd >= 0);
504
4f2d528d
LP
505 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
506
52c239d7 507 case EXEC_INPUT_NAMED_FD:
e75a9ed1
LP
508 assert(named_iofds[STDIN_FILENO] >= 0);
509
52c239d7
LB
510 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
511 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
512
08f3be7a
LP
513 case EXEC_INPUT_DATA: {
514 int fd;
515
516 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
517 if (fd < 0)
518 return fd;
519
520 return move_fd(fd, STDIN_FILENO, false);
521 }
522
2038c3f5
LP
523 case EXEC_INPUT_FILE: {
524 bool rw;
525 int fd;
526
527 assert(context->stdio_file[STDIN_FILENO]);
528
529 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
530 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
531
532 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
533 if (fd < 0)
534 return fd;
535
536 return move_fd(fd, STDIN_FILENO, false);
537 }
538
80876c20
LP
539 default:
540 assert_not_reached("Unknown input type");
541 }
542}
543
41fc585a
LP
544static bool can_inherit_stderr_from_stdout(
545 const ExecContext *context,
546 ExecOutput o,
547 ExecOutput e) {
548
549 assert(context);
550
551 /* Returns true, if given the specified STDERR and STDOUT output we can directly dup() the stdout fd to the
552 * stderr fd */
553
554 if (e == EXEC_OUTPUT_INHERIT)
555 return true;
556 if (e != o)
557 return false;
558
559 if (e == EXEC_OUTPUT_NAMED_FD)
560 return streq_ptr(context->stdio_fdname[STDOUT_FILENO], context->stdio_fdname[STDERR_FILENO]);
561
562 if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND))
563 return streq_ptr(context->stdio_file[STDOUT_FILENO], context->stdio_file[STDERR_FILENO]);
564
565 return true;
566}
567
a34ceba6 568static int setup_output(
34cf6c43 569 const Unit *unit,
a34ceba6
LP
570 const ExecContext *context,
571 const ExecParameters *params,
572 int fileno,
573 int socket_fd,
2caa38e9 574 const int named_iofds[static 3],
a34ceba6 575 const char *ident,
7bce046b
LP
576 uid_t uid,
577 gid_t gid,
578 dev_t *journal_stream_dev,
579 ino_t *journal_stream_ino) {
a34ceba6 580
4f2d528d
LP
581 ExecOutput o;
582 ExecInput i;
47c1d80d 583 int r;
4f2d528d 584
f2341e0a 585 assert(unit);
80876c20 586 assert(context);
a34ceba6 587 assert(params);
80876c20 588 assert(ident);
7bce046b
LP
589 assert(journal_stream_dev);
590 assert(journal_stream_ino);
80876c20 591
a34ceba6
LP
592 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
593
594 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
595 return -errno;
596
597 return STDOUT_FILENO;
598 }
599
600 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
601 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
602 return -errno;
603
604 return STDERR_FILENO;
605 }
606
08f3be7a 607 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 608 o = fixup_output(context->std_output, socket_fd);
4f2d528d 609
eb17e935
MS
610 if (fileno == STDERR_FILENO) {
611 ExecOutput e;
612 e = fixup_output(context->std_error, socket_fd);
80876c20 613
eb17e935
MS
614 /* This expects the input and output are already set up */
615
616 /* Don't change the stderr file descriptor if we inherit all
617 * the way and are not on a tty */
618 if (e == EXEC_OUTPUT_INHERIT &&
619 o == EXEC_OUTPUT_INHERIT &&
620 i == EXEC_INPUT_NULL &&
621 !is_terminal_input(context->std_input) &&
622 getppid () != 1)
623 return fileno;
624
625 /* Duplicate from stdout if possible */
41fc585a 626 if (can_inherit_stderr_from_stdout(context, o, e))
eb17e935 627 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 628
eb17e935 629 o = e;
80876c20 630
eb17e935 631 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
632 /* If input got downgraded, inherit the original value */
633 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 634 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 635
08f3be7a
LP
636 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
637 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
eb17e935 638 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 639
acb591e4
LP
640 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
641 if (getppid() != 1)
eb17e935 642 return fileno;
94f04347 643
eb17e935
MS
644 /* We need to open /dev/null here anew, to get the right access mode. */
645 return open_null_as(O_WRONLY, fileno);
071830ff 646 }
94f04347 647
eb17e935 648 switch (o) {
80876c20
LP
649
650 case EXEC_OUTPUT_NULL:
eb17e935 651 return open_null_as(O_WRONLY, fileno);
80876c20
LP
652
653 case EXEC_OUTPUT_TTY:
4f2d528d 654 if (is_terminal_input(i))
eb17e935 655 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
656
657 /* We don't reset the terminal if this is just about output */
1e22b5cd 658 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
659
660 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 661 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 662 case EXEC_OUTPUT_KMSG:
28dbc1e8 663 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
664 case EXEC_OUTPUT_JOURNAL:
665 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 666 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 667 if (r < 0) {
82677ae4 668 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 669 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
670 } else {
671 struct stat st;
672
673 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
674 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
675 * services to detect whether they are connected to the journal or not.
676 *
677 * If both stdout and stderr are connected to a stream then let's make sure to store the data
678 * about STDERR as that's usually the best way to do logging. */
7bce046b 679
ab2116b1
LP
680 if (fstat(fileno, &st) >= 0 &&
681 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
682 *journal_stream_dev = st.st_dev;
683 *journal_stream_ino = st.st_ino;
684 }
47c1d80d
MS
685 }
686 return r;
4f2d528d
LP
687
688 case EXEC_OUTPUT_SOCKET:
689 assert(socket_fd >= 0);
e75a9ed1 690
eb17e935 691 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 692
52c239d7 693 case EXEC_OUTPUT_NAMED_FD:
e75a9ed1
LP
694 assert(named_iofds[fileno] >= 0);
695
52c239d7
LB
696 (void) fd_nonblock(named_iofds[fileno], false);
697 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
698
566b7d23
ZD
699 case EXEC_OUTPUT_FILE:
700 case EXEC_OUTPUT_FILE_APPEND: {
2038c3f5 701 bool rw;
566b7d23 702 int fd, flags;
2038c3f5
LP
703
704 assert(context->stdio_file[fileno]);
705
706 rw = context->std_input == EXEC_INPUT_FILE &&
707 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
708
709 if (rw)
710 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
711
566b7d23
ZD
712 flags = O_WRONLY;
713 if (o == EXEC_OUTPUT_FILE_APPEND)
714 flags |= O_APPEND;
715
716 fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
2038c3f5
LP
717 if (fd < 0)
718 return fd;
719
566b7d23 720 return move_fd(fd, fileno, 0);
2038c3f5
LP
721 }
722
94f04347 723 default:
80876c20 724 assert_not_reached("Unknown error type");
94f04347 725 }
071830ff
LP
726}
727
02a51aba 728static int chown_terminal(int fd, uid_t uid) {
4b3b5bc7 729 int r;
02a51aba
LP
730
731 assert(fd >= 0);
02a51aba 732
1ff74fb6 733 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
4b3b5bc7
LP
734 if (isatty(fd) < 1) {
735 if (IN_SET(errno, EINVAL, ENOTTY))
736 return 0; /* not a tty */
1ff74fb6 737
02a51aba 738 return -errno;
4b3b5bc7 739 }
02a51aba 740
4b3b5bc7
LP
741 /* This might fail. What matters are the results. */
742 r = fchmod_and_chown(fd, TTY_MODE, uid, -1);
743 if (r < 0)
744 return r;
02a51aba 745
4b3b5bc7 746 return 1;
02a51aba
LP
747}
748
7d5ceb64 749static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
750 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
751 int r;
80876c20 752
80876c20
LP
753 assert(_saved_stdin);
754 assert(_saved_stdout);
755
af6da548
LP
756 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
757 if (saved_stdin < 0)
758 return -errno;
80876c20 759
af6da548 760 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
761 if (saved_stdout < 0)
762 return -errno;
80876c20 763
8854d795 764 fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
3d18b167
LP
765 if (fd < 0)
766 return fd;
80876c20 767
af6da548
LP
768 r = chown_terminal(fd, getuid());
769 if (r < 0)
3d18b167 770 return r;
02a51aba 771
3d18b167
LP
772 r = reset_terminal_fd(fd, true);
773 if (r < 0)
774 return r;
80876c20 775
2b33ab09 776 r = rearrange_stdio(fd, fd, STDERR_FILENO);
3d18b167 777 fd = -1;
2b33ab09
LP
778 if (r < 0)
779 return r;
80876c20
LP
780
781 *_saved_stdin = saved_stdin;
782 *_saved_stdout = saved_stdout;
783
3d18b167 784 saved_stdin = saved_stdout = -1;
80876c20 785
3d18b167 786 return 0;
80876c20
LP
787}
788
63d77c92 789static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
790 assert(err < 0);
791
792 if (err == -ETIMEDOUT)
63d77c92 793 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
794 else {
795 errno = -err;
63d77c92 796 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
797 }
798}
799
63d77c92 800static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 801 _cleanup_close_ int fd = -1;
80876c20 802
3b20f877 803 assert(vc);
80876c20 804
7d5ceb64 805 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 806 if (fd < 0)
3b20f877 807 return;
80876c20 808
63d77c92 809 write_confirm_error_fd(err, fd, u);
af6da548 810}
80876c20 811
3d18b167 812static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 813 int r = 0;
80876c20 814
af6da548
LP
815 assert(saved_stdin);
816 assert(saved_stdout);
817
818 release_terminal();
819
820 if (*saved_stdin >= 0)
80876c20 821 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 822 r = -errno;
80876c20 823
af6da548 824 if (*saved_stdout >= 0)
80876c20 825 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 826 r = -errno;
80876c20 827
3d18b167
LP
828 *saved_stdin = safe_close(*saved_stdin);
829 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
830
831 return r;
832}
833
3b20f877
FB
834enum {
835 CONFIRM_PRETEND_FAILURE = -1,
836 CONFIRM_PRETEND_SUCCESS = 0,
837 CONFIRM_EXECUTE = 1,
838};
839
eedf223a 840static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 841 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 842 _cleanup_free_ char *e = NULL;
3b20f877 843 char c;
af6da548 844
3b20f877 845 /* For any internal errors, assume a positive response. */
7d5ceb64 846 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 847 if (r < 0) {
63d77c92 848 write_confirm_error(r, vc, u);
3b20f877
FB
849 return CONFIRM_EXECUTE;
850 }
af6da548 851
b0eb2944
FB
852 /* confirm_spawn might have been disabled while we were sleeping. */
853 if (manager_is_confirm_spawn_disabled(u->manager)) {
854 r = 1;
855 goto restore_stdio;
856 }
af6da548 857
2bcd3c26
FB
858 e = ellipsize(cmdline, 60, 100);
859 if (!e) {
860 log_oom();
861 r = CONFIRM_EXECUTE;
862 goto restore_stdio;
863 }
af6da548 864
d172b175 865 for (;;) {
539622bd 866 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 867 if (r < 0) {
63d77c92 868 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
869 r = CONFIRM_EXECUTE;
870 goto restore_stdio;
871 }
af6da548 872
d172b175 873 switch (c) {
b0eb2944
FB
874 case 'c':
875 printf("Resuming normal execution.\n");
876 manager_disable_confirm_spawn();
877 r = 1;
878 break;
dd6f9ac0
FB
879 case 'D':
880 unit_dump(u, stdout, " ");
881 continue; /* ask again */
d172b175
FB
882 case 'f':
883 printf("Failing execution.\n");
884 r = CONFIRM_PRETEND_FAILURE;
885 break;
886 case 'h':
b0eb2944
FB
887 printf(" c - continue, proceed without asking anymore\n"
888 " D - dump, show the state of the unit\n"
dd6f9ac0 889 " f - fail, don't execute the command and pretend it failed\n"
d172b175 890 " h - help\n"
eedf223a 891 " i - info, show a short summary of the unit\n"
56fde33a 892 " j - jobs, show jobs that are in progress\n"
d172b175
FB
893 " s - skip, don't execute the command and pretend it succeeded\n"
894 " y - yes, execute the command\n");
dd6f9ac0 895 continue; /* ask again */
eedf223a
FB
896 case 'i':
897 printf(" Description: %s\n"
898 " Unit: %s\n"
899 " Command: %s\n",
900 u->id, u->description, cmdline);
901 continue; /* ask again */
56fde33a
FB
902 case 'j':
903 manager_dump_jobs(u->manager, stdout, " ");
904 continue; /* ask again */
539622bd
FB
905 case 'n':
906 /* 'n' was removed in favor of 'f'. */
907 printf("Didn't understand 'n', did you mean 'f'?\n");
908 continue; /* ask again */
d172b175
FB
909 case 's':
910 printf("Skipping execution.\n");
911 r = CONFIRM_PRETEND_SUCCESS;
912 break;
913 case 'y':
914 r = CONFIRM_EXECUTE;
915 break;
916 default:
917 assert_not_reached("Unhandled choice");
918 }
3b20f877 919 break;
3b20f877 920 }
af6da548 921
3b20f877 922restore_stdio:
af6da548 923 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 924 return r;
80876c20
LP
925}
926
4d885bd3
DH
927static int get_fixed_user(const ExecContext *c, const char **user,
928 uid_t *uid, gid_t *gid,
929 const char **home, const char **shell) {
81a2b7ce 930 int r;
4d885bd3 931 const char *name;
81a2b7ce 932
4d885bd3 933 assert(c);
81a2b7ce 934
23deef88
LP
935 if (!c->user)
936 return 0;
937
4d885bd3
DH
938 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
939 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 940
23deef88 941 name = c->user;
fafff8f1 942 r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN);
4d885bd3
DH
943 if (r < 0)
944 return r;
81a2b7ce 945
4d885bd3
DH
946 *user = name;
947 return 0;
948}
949
950static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
951 int r;
952 const char *name;
953
954 assert(c);
955
956 if (!c->group)
957 return 0;
958
959 name = c->group;
fafff8f1 960 r = get_group_creds(&name, gid, 0);
4d885bd3
DH
961 if (r < 0)
962 return r;
963
964 *group = name;
965 return 0;
966}
967
cdc5d5c5
DH
968static int get_supplementary_groups(const ExecContext *c, const char *user,
969 const char *group, gid_t gid,
970 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
971 char **i;
972 int r, k = 0;
973 int ngroups_max;
974 bool keep_groups = false;
975 gid_t *groups = NULL;
976 _cleanup_free_ gid_t *l_gids = NULL;
977
978 assert(c);
979
bbeea271
DH
980 /*
981 * If user is given, then lookup GID and supplementary groups list.
982 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
983 * here and as early as possible so we keep the list of supplementary
984 * groups of the caller.
bbeea271
DH
985 */
986 if (user && gid_is_valid(gid) && gid != 0) {
987 /* First step, initialize groups from /etc/groups */
988 if (initgroups(user, gid) < 0)
989 return -errno;
990
991 keep_groups = true;
992 }
993
ac6e8be6 994 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
995 return 0;
996
366ddd25
DH
997 /*
998 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
999 * be positive, otherwise fail.
1000 */
1001 errno = 0;
1002 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
66855de7
LP
1003 if (ngroups_max <= 0)
1004 return errno_or_else(EOPNOTSUPP);
366ddd25 1005
4d885bd3
DH
1006 l_gids = new(gid_t, ngroups_max);
1007 if (!l_gids)
1008 return -ENOMEM;
81a2b7ce 1009
4d885bd3
DH
1010 if (keep_groups) {
1011 /*
1012 * Lookup the list of groups that the user belongs to, we
1013 * avoid NSS lookups here too for gid=0.
1014 */
1015 k = ngroups_max;
1016 if (getgrouplist(user, gid, l_gids, &k) < 0)
1017 return -EINVAL;
1018 } else
1019 k = 0;
81a2b7ce 1020
4d885bd3
DH
1021 STRV_FOREACH(i, c->supplementary_groups) {
1022 const char *g;
81a2b7ce 1023
4d885bd3
DH
1024 if (k >= ngroups_max)
1025 return -E2BIG;
81a2b7ce 1026
4d885bd3 1027 g = *i;
fafff8f1 1028 r = get_group_creds(&g, l_gids+k, 0);
4d885bd3
DH
1029 if (r < 0)
1030 return r;
81a2b7ce 1031
4d885bd3
DH
1032 k++;
1033 }
81a2b7ce 1034
4d885bd3
DH
1035 /*
1036 * Sets ngids to zero to drop all supplementary groups, happens
1037 * when we are under root and SupplementaryGroups= is empty.
1038 */
1039 if (k == 0) {
1040 *ngids = 0;
1041 return 0;
1042 }
81a2b7ce 1043
4d885bd3
DH
1044 /* Otherwise get the final list of supplementary groups */
1045 groups = memdup(l_gids, sizeof(gid_t) * k);
1046 if (!groups)
1047 return -ENOMEM;
1048
1049 *supplementary_gids = groups;
1050 *ngids = k;
1051
1052 groups = NULL;
1053
1054 return 0;
1055}
1056
34cf6c43 1057static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
4d885bd3
DH
1058 int r;
1059
709dbeac
YW
1060 /* Handle SupplementaryGroups= if it is not empty */
1061 if (ngids > 0) {
4d885bd3
DH
1062 r = maybe_setgroups(ngids, supplementary_gids);
1063 if (r < 0)
97f0e76f 1064 return r;
4d885bd3 1065 }
81a2b7ce 1066
4d885bd3
DH
1067 if (gid_is_valid(gid)) {
1068 /* Then set our gids */
1069 if (setresgid(gid, gid, gid) < 0)
1070 return -errno;
81a2b7ce
LP
1071 }
1072
1073 return 0;
1074}
1075
1076static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
1077 assert(context);
1078
4d885bd3
DH
1079 if (!uid_is_valid(uid))
1080 return 0;
1081
479050b3 1082 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
1083 * capabilities while doing so. */
1084
479050b3 1085 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
1086
1087 /* First step: If we need to keep capabilities but
1088 * drop privileges we need to make sure we keep our
cbb21cca 1089 * caps, while we drop privileges. */
693ced48 1090 if (uid != 0) {
cbb21cca 1091 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1092
1093 if (prctl(PR_GET_SECUREBITS) != sb)
1094 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1095 return -errno;
1096 }
81a2b7ce
LP
1097 }
1098
479050b3 1099 /* Second step: actually set the uids */
81a2b7ce
LP
1100 if (setresuid(uid, uid, uid) < 0)
1101 return -errno;
1102
1103 /* At this point we should have all necessary capabilities but
1104 are otherwise a normal user. However, the caps might got
1105 corrupted due to the setresuid() so we need clean them up
1106 later. This is done outside of this call. */
1107
1108 return 0;
1109}
1110
349cc4a5 1111#if HAVE_PAM
5b6319dc
LP
1112
1113static int null_conv(
1114 int num_msg,
1115 const struct pam_message **msg,
1116 struct pam_response **resp,
1117 void *appdata_ptr) {
1118
1119 /* We don't support conversations */
1120
1121 return PAM_CONV_ERR;
1122}
1123
cefc33ae
LP
1124#endif
1125
5b6319dc
LP
1126static int setup_pam(
1127 const char *name,
1128 const char *user,
940c5210 1129 uid_t uid,
2d6fce8d 1130 gid_t gid,
5b6319dc 1131 const char *tty,
2065ca69 1132 char ***env,
da6053d0 1133 int fds[], size_t n_fds) {
5b6319dc 1134
349cc4a5 1135#if HAVE_PAM
cefc33ae 1136
5b6319dc
LP
1137 static const struct pam_conv conv = {
1138 .conv = null_conv,
1139 .appdata_ptr = NULL
1140 };
1141
2d7c6aa2 1142 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1143 pam_handle_t *handle = NULL;
d6e5f3ad 1144 sigset_t old_ss;
7bb70b6e 1145 int pam_code = PAM_SUCCESS, r;
84eada2f 1146 char **nv, **e = NULL;
5b6319dc
LP
1147 bool close_session = false;
1148 pid_t pam_pid = 0, parent_pid;
970edce6 1149 int flags = 0;
5b6319dc
LP
1150
1151 assert(name);
1152 assert(user);
2065ca69 1153 assert(env);
5b6319dc
LP
1154
1155 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1156 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1157 * systemd via the cgroup logic. It will then remove the PAM
1158 * session again. The parent process will exec() the actual
1159 * daemon. We do things this way to ensure that the main PID
1160 * of the daemon is the one we initially fork()ed. */
1161
7bb70b6e
LP
1162 r = barrier_create(&barrier);
1163 if (r < 0)
2d7c6aa2
DH
1164 goto fail;
1165
553d2243 1166 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1167 flags |= PAM_SILENT;
1168
f546241b
ZJS
1169 pam_code = pam_start(name, user, &conv, &handle);
1170 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1171 handle = NULL;
1172 goto fail;
1173 }
1174
3cd24c1a
LP
1175 if (!tty) {
1176 _cleanup_free_ char *q = NULL;
1177
1178 /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
1179 * out if that's the case, and read the TTY off it. */
1180
1181 if (getttyname_malloc(STDIN_FILENO, &q) >= 0)
1182 tty = strjoina("/dev/", q);
1183 }
1184
f546241b
ZJS
1185 if (tty) {
1186 pam_code = pam_set_item(handle, PAM_TTY, tty);
1187 if (pam_code != PAM_SUCCESS)
5b6319dc 1188 goto fail;
f546241b 1189 }
5b6319dc 1190
84eada2f
JW
1191 STRV_FOREACH(nv, *env) {
1192 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1193 if (pam_code != PAM_SUCCESS)
1194 goto fail;
1195 }
1196
970edce6 1197 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1198 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1199 goto fail;
1200
970edce6 1201 pam_code = pam_open_session(handle, flags);
f546241b 1202 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1203 goto fail;
1204
1205 close_session = true;
1206
f546241b
ZJS
1207 e = pam_getenvlist(handle);
1208 if (!e) {
5b6319dc
LP
1209 pam_code = PAM_BUF_ERR;
1210 goto fail;
1211 }
1212
1213 /* Block SIGTERM, so that we know that it won't get lost in
1214 * the child */
ce30c8dc 1215
72c0a2c2 1216 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1217
df0ff127 1218 parent_pid = getpid_cached();
5b6319dc 1219
4c253ed1
LP
1220 r = safe_fork("(sd-pam)", 0, &pam_pid);
1221 if (r < 0)
5b6319dc 1222 goto fail;
4c253ed1 1223 if (r == 0) {
7bb70b6e 1224 int sig, ret = EXIT_PAM;
5b6319dc
LP
1225
1226 /* The child's job is to reset the PAM session on
1227 * termination */
2d7c6aa2 1228 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc 1229
4c253ed1
LP
1230 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1231 * are open here that have been opened by PAM. */
1232 (void) close_many(fds, n_fds);
5b6319dc 1233
940c5210
AK
1234 /* Drop privileges - we don't need any to pam_close_session
1235 * and this will make PR_SET_PDEATHSIG work in most cases.
1236 * If this fails, ignore the error - but expect sd-pam threads
1237 * to fail to exit normally */
2d6fce8d 1238
97f0e76f
LP
1239 r = maybe_setgroups(0, NULL);
1240 if (r < 0)
1241 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1242 if (setresgid(gid, gid, gid) < 0)
1243 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1244 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1245 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1246
ce30c8dc
LP
1247 (void) ignore_signals(SIGPIPE, -1);
1248
940c5210
AK
1249 /* Wait until our parent died. This will only work if
1250 * the above setresuid() succeeds, otherwise the kernel
1251 * will not allow unprivileged parents kill their privileged
1252 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1253 * to do the rest for us. */
1254 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1255 goto child_finish;
1256
2d7c6aa2
DH
1257 /* Tell the parent that our setup is done. This is especially
1258 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1259 * setup might race against our setresuid(2) call.
1260 *
1261 * If the parent aborted, we'll detect this below, hence ignore
1262 * return failure here. */
1263 (void) barrier_place(&barrier);
2d7c6aa2 1264
643f4706 1265 /* Check if our parent process might already have died? */
5b6319dc 1266 if (getppid() == parent_pid) {
d6e5f3ad
DM
1267 sigset_t ss;
1268
1269 assert_se(sigemptyset(&ss) >= 0);
1270 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1271
3dead8d9
LP
1272 for (;;) {
1273 if (sigwait(&ss, &sig) < 0) {
1274 if (errno == EINTR)
1275 continue;
1276
1277 goto child_finish;
1278 }
5b6319dc 1279
3dead8d9
LP
1280 assert(sig == SIGTERM);
1281 break;
1282 }
5b6319dc
LP
1283 }
1284
3dead8d9 1285 /* If our parent died we'll end the session */
f546241b 1286 if (getppid() != parent_pid) {
970edce6 1287 pam_code = pam_close_session(handle, flags);
f546241b 1288 if (pam_code != PAM_SUCCESS)
5b6319dc 1289 goto child_finish;
f546241b 1290 }
5b6319dc 1291
7bb70b6e 1292 ret = 0;
5b6319dc
LP
1293
1294 child_finish:
970edce6 1295 pam_end(handle, pam_code | flags);
7bb70b6e 1296 _exit(ret);
5b6319dc
LP
1297 }
1298
2d7c6aa2
DH
1299 barrier_set_role(&barrier, BARRIER_PARENT);
1300
5b6319dc
LP
1301 /* If the child was forked off successfully it will do all the
1302 * cleanups, so forget about the handle here. */
1303 handle = NULL;
1304
3b8bddde 1305 /* Unblock SIGTERM again in the parent */
72c0a2c2 1306 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1307
1308 /* We close the log explicitly here, since the PAM modules
1309 * might have opened it, but we don't want this fd around. */
1310 closelog();
1311
2d7c6aa2
DH
1312 /* Synchronously wait for the child to initialize. We don't care for
1313 * errors as we cannot recover. However, warn loudly if it happens. */
1314 if (!barrier_place_and_sync(&barrier))
1315 log_error("PAM initialization failed");
1316
130d3d22 1317 return strv_free_and_replace(*env, e);
5b6319dc
LP
1318
1319fail:
970edce6
ZJS
1320 if (pam_code != PAM_SUCCESS) {
1321 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1322 r = -EPERM; /* PAM errors do not map to errno */
1323 } else
1324 log_error_errno(r, "PAM failed: %m");
9ba35398 1325
5b6319dc
LP
1326 if (handle) {
1327 if (close_session)
970edce6 1328 pam_code = pam_close_session(handle, flags);
5b6319dc 1329
970edce6 1330 pam_end(handle, pam_code | flags);
5b6319dc
LP
1331 }
1332
1333 strv_free(e);
5b6319dc
LP
1334 closelog();
1335
7bb70b6e 1336 return r;
cefc33ae
LP
1337#else
1338 return 0;
5b6319dc 1339#endif
cefc33ae 1340}
5b6319dc 1341
5d6b1584
LP
1342static void rename_process_from_path(const char *path) {
1343 char process_name[11];
1344 const char *p;
1345 size_t l;
1346
1347 /* This resulting string must fit in 10 chars (i.e. the length
1348 * of "/sbin/init") to look pretty in /bin/ps */
1349
2b6bf07d 1350 p = basename(path);
5d6b1584
LP
1351 if (isempty(p)) {
1352 rename_process("(...)");
1353 return;
1354 }
1355
1356 l = strlen(p);
1357 if (l > 8) {
1358 /* The end of the process name is usually more
1359 * interesting, since the first bit might just be
1360 * "systemd-" */
1361 p = p + l - 8;
1362 l = 8;
1363 }
1364
1365 process_name[0] = '(';
1366 memcpy(process_name+1, p, l);
1367 process_name[1+l] = ')';
1368 process_name[1+l+1] = 0;
1369
1370 rename_process(process_name);
1371}
1372
469830d1
LP
1373static bool context_has_address_families(const ExecContext *c) {
1374 assert(c);
1375
1376 return c->address_families_whitelist ||
1377 !set_isempty(c->address_families);
1378}
1379
1380static bool context_has_syscall_filters(const ExecContext *c) {
1381 assert(c);
1382
1383 return c->syscall_whitelist ||
8cfa775f 1384 !hashmap_isempty(c->syscall_filter);
469830d1
LP
1385}
1386
1387static bool context_has_no_new_privileges(const ExecContext *c) {
1388 assert(c);
1389
1390 if (c->no_new_privileges)
1391 return true;
1392
1393 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1394 return false;
1395
1396 /* We need NNP if we have any form of seccomp and are unprivileged */
1397 return context_has_address_families(c) ||
1398 c->memory_deny_write_execute ||
1399 c->restrict_realtime ||
f69567cb 1400 c->restrict_suid_sgid ||
469830d1
LP
1401 exec_context_restrict_namespaces_set(c) ||
1402 c->protect_kernel_tunables ||
1403 c->protect_kernel_modules ||
1404 c->private_devices ||
1405 context_has_syscall_filters(c) ||
78e864e5 1406 !set_isempty(c->syscall_archs) ||
aecd5ac6
TM
1407 c->lock_personality ||
1408 c->protect_hostname;
469830d1
LP
1409}
1410
349cc4a5 1411#if HAVE_SECCOMP
17df7223 1412
83f12b27 1413static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1414
1415 if (is_seccomp_available())
1416 return false;
1417
f673b62d 1418 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1419 return true;
83f12b27
FS
1420}
1421
165a31c0 1422static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1423 uint32_t negative_action, default_action, action;
165a31c0 1424 int r;
8351ceae 1425
469830d1 1426 assert(u);
c0467cf3 1427 assert(c);
8351ceae 1428
469830d1 1429 if (!context_has_syscall_filters(c))
83f12b27
FS
1430 return 0;
1431
469830d1
LP
1432 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1433 return 0;
e9642be2 1434
ccc16c78 1435 negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1436
469830d1
LP
1437 if (c->syscall_whitelist) {
1438 default_action = negative_action;
1439 action = SCMP_ACT_ALLOW;
7c66bae2 1440 } else {
469830d1
LP
1441 default_action = SCMP_ACT_ALLOW;
1442 action = negative_action;
57183d11 1443 }
8351ceae 1444
165a31c0
LP
1445 if (needs_ambient_hack) {
1446 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1447 if (r < 0)
1448 return r;
1449 }
1450
b54f36c6 1451 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
4298d0b5
LP
1452}
1453
469830d1
LP
1454static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1455 assert(u);
4298d0b5
LP
1456 assert(c);
1457
469830d1 1458 if (set_isempty(c->syscall_archs))
83f12b27
FS
1459 return 0;
1460
469830d1
LP
1461 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1462 return 0;
4298d0b5 1463
469830d1
LP
1464 return seccomp_restrict_archs(c->syscall_archs);
1465}
4298d0b5 1466
469830d1
LP
1467static int apply_address_families(const Unit* u, const ExecContext *c) {
1468 assert(u);
1469 assert(c);
4298d0b5 1470
469830d1
LP
1471 if (!context_has_address_families(c))
1472 return 0;
4298d0b5 1473
469830d1
LP
1474 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1475 return 0;
4298d0b5 1476
469830d1 1477 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1478}
4298d0b5 1479
83f12b27 1480static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1481 assert(u);
f3e43635
TM
1482 assert(c);
1483
469830d1 1484 if (!c->memory_deny_write_execute)
83f12b27
FS
1485 return 0;
1486
469830d1
LP
1487 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1488 return 0;
f3e43635 1489
469830d1 1490 return seccomp_memory_deny_write_execute();
f3e43635
TM
1491}
1492
83f12b27 1493static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1494 assert(u);
f4170c67
LP
1495 assert(c);
1496
469830d1 1497 if (!c->restrict_realtime)
83f12b27
FS
1498 return 0;
1499
469830d1
LP
1500 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1501 return 0;
f4170c67 1502
469830d1 1503 return seccomp_restrict_realtime();
f4170c67
LP
1504}
1505
f69567cb
LP
1506static int apply_restrict_suid_sgid(const Unit* u, const ExecContext *c) {
1507 assert(u);
1508 assert(c);
1509
1510 if (!c->restrict_suid_sgid)
1511 return 0;
1512
1513 if (skip_seccomp_unavailable(u, "RestrictSUIDSGID="))
1514 return 0;
1515
1516 return seccomp_restrict_suid_sgid();
1517}
1518
59e856c7 1519static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1520 assert(u);
59eeb84b
LP
1521 assert(c);
1522
1523 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1524 * let's protect even those systems where this is left on in the kernel. */
1525
469830d1 1526 if (!c->protect_kernel_tunables)
59eeb84b
LP
1527 return 0;
1528
469830d1
LP
1529 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1530 return 0;
59eeb84b 1531
469830d1 1532 return seccomp_protect_sysctl();
59eeb84b
LP
1533}
1534
59e856c7 1535static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1536 assert(u);
502d704e
DH
1537 assert(c);
1538
25a8d8a0 1539 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1540
469830d1
LP
1541 if (!c->protect_kernel_modules)
1542 return 0;
1543
502d704e
DH
1544 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1545 return 0;
1546
b54f36c6 1547 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false);
502d704e
DH
1548}
1549
59e856c7 1550static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1551 assert(u);
ba128bb8
LP
1552 assert(c);
1553
8f81a5f6 1554 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1555
469830d1
LP
1556 if (!c->private_devices)
1557 return 0;
1558
ba128bb8
LP
1559 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1560 return 0;
1561
b54f36c6 1562 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false);
ba128bb8
LP
1563}
1564
34cf6c43 1565static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
469830d1 1566 assert(u);
add00535
LP
1567 assert(c);
1568
1569 if (!exec_context_restrict_namespaces_set(c))
1570 return 0;
1571
1572 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1573 return 0;
1574
1575 return seccomp_restrict_namespaces(c->restrict_namespaces);
1576}
1577
78e864e5 1578static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1579 unsigned long personality;
1580 int r;
78e864e5
TM
1581
1582 assert(u);
1583 assert(c);
1584
1585 if (!c->lock_personality)
1586 return 0;
1587
1588 if (skip_seccomp_unavailable(u, "LockPersonality="))
1589 return 0;
1590
e8132d63
LP
1591 personality = c->personality;
1592
1593 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1594 if (personality == PERSONALITY_INVALID) {
1595
1596 r = opinionated_personality(&personality);
1597 if (r < 0)
1598 return r;
1599 }
78e864e5
TM
1600
1601 return seccomp_lock_personality(personality);
1602}
1603
c0467cf3 1604#endif
8351ceae 1605
3042bbeb 1606static void do_idle_pipe_dance(int idle_pipe[static 4]) {
31a7eb86
ZJS
1607 assert(idle_pipe);
1608
54eb2300
LP
1609 idle_pipe[1] = safe_close(idle_pipe[1]);
1610 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1611
1612 if (idle_pipe[0] >= 0) {
1613 int r;
1614
1615 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1616
1617 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1618 ssize_t n;
1619
31a7eb86 1620 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1621 n = write(idle_pipe[3], "x", 1);
1622 if (n > 0)
cd972d69
ZJS
1623 /* Wait for systemd to react to the signal above. */
1624 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1625 }
1626
54eb2300 1627 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1628
1629 }
1630
54eb2300 1631 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1632}
1633
fb2042dd
YW
1634static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
1635
7cae38c4 1636static int build_environment(
34cf6c43 1637 const Unit *u,
9fa95f85 1638 const ExecContext *c,
1e22b5cd 1639 const ExecParameters *p,
da6053d0 1640 size_t n_fds,
7cae38c4
LP
1641 const char *home,
1642 const char *username,
1643 const char *shell,
7bce046b
LP
1644 dev_t journal_stream_dev,
1645 ino_t journal_stream_ino,
7cae38c4
LP
1646 char ***ret) {
1647
1648 _cleanup_strv_free_ char **our_env = NULL;
fb2042dd 1649 ExecDirectoryType t;
da6053d0 1650 size_t n_env = 0;
7cae38c4
LP
1651 char *x;
1652
4b58153d 1653 assert(u);
7cae38c4 1654 assert(c);
7c1cb6f1 1655 assert(p);
7cae38c4
LP
1656 assert(ret);
1657
fb2042dd 1658 our_env = new0(char*, 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4
LP
1659 if (!our_env)
1660 return -ENOMEM;
1661
1662 if (n_fds > 0) {
8dd4c05b
LP
1663 _cleanup_free_ char *joined = NULL;
1664
df0ff127 1665 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1666 return -ENOMEM;
1667 our_env[n_env++] = x;
1668
da6053d0 1669 if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
7cae38c4
LP
1670 return -ENOMEM;
1671 our_env[n_env++] = x;
8dd4c05b 1672
1e22b5cd 1673 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1674 if (!joined)
1675 return -ENOMEM;
1676
605405c6 1677 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1678 if (!x)
1679 return -ENOMEM;
1680 our_env[n_env++] = x;
7cae38c4
LP
1681 }
1682
b08af3b1 1683 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1684 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1685 return -ENOMEM;
1686 our_env[n_env++] = x;
1687
1e22b5cd 1688 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1689 return -ENOMEM;
1690 our_env[n_env++] = x;
1691 }
1692
fd63e712
LP
1693 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1694 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1695 * check the database directly. */
ac647978 1696 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1697 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1698 if (!x)
1699 return -ENOMEM;
1700 our_env[n_env++] = x;
1701 }
1702
7cae38c4 1703 if (home) {
b910cc72 1704 x = strjoin("HOME=", home);
7cae38c4
LP
1705 if (!x)
1706 return -ENOMEM;
7bbead1d
LP
1707
1708 path_simplify(x + 5, true);
7cae38c4
LP
1709 our_env[n_env++] = x;
1710 }
1711
1712 if (username) {
b910cc72 1713 x = strjoin("LOGNAME=", username);
7cae38c4
LP
1714 if (!x)
1715 return -ENOMEM;
1716 our_env[n_env++] = x;
1717
b910cc72 1718 x = strjoin("USER=", username);
7cae38c4
LP
1719 if (!x)
1720 return -ENOMEM;
1721 our_env[n_env++] = x;
1722 }
1723
1724 if (shell) {
b910cc72 1725 x = strjoin("SHELL=", shell);
7cae38c4
LP
1726 if (!x)
1727 return -ENOMEM;
7bbead1d
LP
1728
1729 path_simplify(x + 6, true);
7cae38c4
LP
1730 our_env[n_env++] = x;
1731 }
1732
4b58153d
LP
1733 if (!sd_id128_is_null(u->invocation_id)) {
1734 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1735 return -ENOMEM;
1736
1737 our_env[n_env++] = x;
1738 }
1739
6af760f3
LP
1740 if (exec_context_needs_term(c)) {
1741 const char *tty_path, *term = NULL;
1742
1743 tty_path = exec_context_tty_path(c);
1744
1745 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1746 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1747 * passes to PID 1 ends up all the way in the console login shown. */
1748
1749 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1750 term = getenv("TERM");
1751 if (!term)
1752 term = default_term_for_tty(tty_path);
7cae38c4 1753
b910cc72 1754 x = strjoin("TERM=", term);
7cae38c4
LP
1755 if (!x)
1756 return -ENOMEM;
1757 our_env[n_env++] = x;
1758 }
1759
7bce046b
LP
1760 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1761 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1762 return -ENOMEM;
1763
1764 our_env[n_env++] = x;
1765 }
1766
fb2042dd
YW
1767 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1768 _cleanup_free_ char *pre = NULL, *joined = NULL;
1769 const char *n;
1770
1771 if (!p->prefix[t])
1772 continue;
1773
1774 if (strv_isempty(c->directories[t].paths))
1775 continue;
1776
1777 n = exec_directory_env_name_to_string(t);
1778 if (!n)
1779 continue;
1780
1781 pre = strjoin(p->prefix[t], "/");
1782 if (!pre)
1783 return -ENOMEM;
1784
1785 joined = strv_join_prefix(c->directories[t].paths, ":", pre);
1786 if (!joined)
1787 return -ENOMEM;
1788
1789 x = strjoin(n, "=", joined);
1790 if (!x)
1791 return -ENOMEM;
1792
1793 our_env[n_env++] = x;
1794 }
1795
7cae38c4 1796 our_env[n_env++] = NULL;
fb2042dd 1797 assert(n_env <= 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4 1798
ae2a15bc 1799 *ret = TAKE_PTR(our_env);
7cae38c4
LP
1800
1801 return 0;
1802}
1803
b4c14404
FB
1804static int build_pass_environment(const ExecContext *c, char ***ret) {
1805 _cleanup_strv_free_ char **pass_env = NULL;
1806 size_t n_env = 0, n_bufsize = 0;
1807 char **i;
1808
1809 STRV_FOREACH(i, c->pass_environment) {
1810 _cleanup_free_ char *x = NULL;
1811 char *v;
1812
1813 v = getenv(*i);
1814 if (!v)
1815 continue;
605405c6 1816 x = strjoin(*i, "=", v);
b4c14404
FB
1817 if (!x)
1818 return -ENOMEM;
00819cc1 1819
b4c14404
FB
1820 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1821 return -ENOMEM;
00819cc1 1822
1cc6c93a 1823 pass_env[n_env++] = TAKE_PTR(x);
b4c14404 1824 pass_env[n_env] = NULL;
b4c14404
FB
1825 }
1826
ae2a15bc 1827 *ret = TAKE_PTR(pass_env);
b4c14404
FB
1828
1829 return 0;
1830}
1831
8b44a3d2
LP
1832static bool exec_needs_mount_namespace(
1833 const ExecContext *context,
1834 const ExecParameters *params,
4657abb5 1835 const ExecRuntime *runtime) {
8b44a3d2
LP
1836
1837 assert(context);
1838 assert(params);
1839
915e6d16
LP
1840 if (context->root_image)
1841 return true;
1842
2a624c36
AP
1843 if (!strv_isempty(context->read_write_paths) ||
1844 !strv_isempty(context->read_only_paths) ||
1845 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1846 return true;
1847
42b1d8e0 1848 if (context->n_bind_mounts > 0)
d2d6c096
LP
1849 return true;
1850
2abd4e38
YW
1851 if (context->n_temporary_filesystems > 0)
1852 return true;
1853
37ed15d7 1854 if (!IN_SET(context->mount_flags, 0, MS_SHARED))
8b44a3d2
LP
1855 return true;
1856
1857 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1858 return true;
1859
8b44a3d2 1860 if (context->private_devices ||
228af36f 1861 context->private_mounts ||
8b44a3d2 1862 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1863 context->protect_home != PROTECT_HOME_NO ||
1864 context->protect_kernel_tunables ||
c575770b 1865 context->protect_kernel_modules ||
59eeb84b 1866 context->protect_control_groups)
8b44a3d2
LP
1867 return true;
1868
37c56f89
YW
1869 if (context->root_directory) {
1870 ExecDirectoryType t;
1871
1872 if (context->mount_apivfs)
1873 return true;
1874
1875 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1876 if (!params->prefix[t])
1877 continue;
1878
1879 if (!strv_isempty(context->directories[t].paths))
1880 return true;
1881 }
1882 }
5d997827 1883
42b1d8e0 1884 if (context->dynamic_user &&
b43ee82f 1885 (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
42b1d8e0
YW
1886 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1887 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1888 return true;
1889
8b44a3d2
LP
1890 return false;
1891}
1892
d251207d
LP
1893static int setup_private_users(uid_t uid, gid_t gid) {
1894 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1895 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1896 _cleanup_close_ int unshare_ready_fd = -1;
1897 _cleanup_(sigkill_waitp) pid_t pid = 0;
1898 uint64_t c = 1;
d251207d
LP
1899 ssize_t n;
1900 int r;
1901
1902 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1903 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1904 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1905 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1906 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1907 * continues execution normally. */
1908
587ab01b
ZJS
1909 if (uid != 0 && uid_is_valid(uid)) {
1910 r = asprintf(&uid_map,
1911 "0 0 1\n" /* Map root → root */
1912 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1913 uid, uid);
1914 if (r < 0)
1915 return -ENOMEM;
1916 } else {
e0f3720e 1917 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1918 if (!uid_map)
1919 return -ENOMEM;
1920 }
d251207d 1921
587ab01b
ZJS
1922 if (gid != 0 && gid_is_valid(gid)) {
1923 r = asprintf(&gid_map,
1924 "0 0 1\n" /* Map root → root */
1925 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1926 gid, gid);
1927 if (r < 0)
1928 return -ENOMEM;
1929 } else {
d251207d 1930 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1931 if (!gid_map)
1932 return -ENOMEM;
1933 }
d251207d
LP
1934
1935 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1936 * namespace. */
1937 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1938 if (unshare_ready_fd < 0)
1939 return -errno;
1940
1941 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1942 * failed. */
1943 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1944 return -errno;
1945
4c253ed1
LP
1946 r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
1947 if (r < 0)
1948 return r;
1949 if (r == 0) {
d251207d
LP
1950 _cleanup_close_ int fd = -1;
1951 const char *a;
1952 pid_t ppid;
1953
1954 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1955 * here, after the parent opened its own user namespace. */
1956
1957 ppid = getppid();
1958 errno_pipe[0] = safe_close(errno_pipe[0]);
1959
1960 /* Wait until the parent unshared the user namespace */
1961 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1962 r = -errno;
1963 goto child_fail;
1964 }
1965
1966 /* Disable the setgroups() system call in the child user namespace, for good. */
1967 a = procfs_file_alloca(ppid, "setgroups");
1968 fd = open(a, O_WRONLY|O_CLOEXEC);
1969 if (fd < 0) {
1970 if (errno != ENOENT) {
1971 r = -errno;
1972 goto child_fail;
1973 }
1974
1975 /* If the file is missing the kernel is too old, let's continue anyway. */
1976 } else {
1977 if (write(fd, "deny\n", 5) < 0) {
1978 r = -errno;
1979 goto child_fail;
1980 }
1981
1982 fd = safe_close(fd);
1983 }
1984
1985 /* First write the GID map */
1986 a = procfs_file_alloca(ppid, "gid_map");
1987 fd = open(a, O_WRONLY|O_CLOEXEC);
1988 if (fd < 0) {
1989 r = -errno;
1990 goto child_fail;
1991 }
1992 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1993 r = -errno;
1994 goto child_fail;
1995 }
1996 fd = safe_close(fd);
1997
1998 /* The write the UID map */
1999 a = procfs_file_alloca(ppid, "uid_map");
2000 fd = open(a, O_WRONLY|O_CLOEXEC);
2001 if (fd < 0) {
2002 r = -errno;
2003 goto child_fail;
2004 }
2005 if (write(fd, uid_map, strlen(uid_map)) < 0) {
2006 r = -errno;
2007 goto child_fail;
2008 }
2009
2010 _exit(EXIT_SUCCESS);
2011
2012 child_fail:
2013 (void) write(errno_pipe[1], &r, sizeof(r));
2014 _exit(EXIT_FAILURE);
2015 }
2016
2017 errno_pipe[1] = safe_close(errno_pipe[1]);
2018
2019 if (unshare(CLONE_NEWUSER) < 0)
2020 return -errno;
2021
2022 /* Let the child know that the namespace is ready now */
2023 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
2024 return -errno;
2025
2026 /* Try to read an error code from the child */
2027 n = read(errno_pipe[0], &r, sizeof(r));
2028 if (n < 0)
2029 return -errno;
2030 if (n == sizeof(r)) { /* an error code was sent to us */
2031 if (r < 0)
2032 return r;
2033 return -EIO;
2034 }
2035 if (n != 0) /* on success we should have read 0 bytes */
2036 return -EIO;
2037
2e87a1fd
LP
2038 r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
2039 pid = 0;
d251207d
LP
2040 if (r < 0)
2041 return r;
2e87a1fd 2042 if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
d251207d
LP
2043 return -EIO;
2044
2045 return 0;
2046}
2047
3536f49e 2048static int setup_exec_directory(
07689d5d
LP
2049 const ExecContext *context,
2050 const ExecParameters *params,
2051 uid_t uid,
3536f49e 2052 gid_t gid,
3536f49e
YW
2053 ExecDirectoryType type,
2054 int *exit_status) {
07689d5d 2055
72fd1768 2056 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
2057 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
2058 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
2059 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
2060 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
2061 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
2062 };
07689d5d
LP
2063 char **rt;
2064 int r;
2065
2066 assert(context);
2067 assert(params);
72fd1768 2068 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 2069 assert(exit_status);
07689d5d 2070
3536f49e
YW
2071 if (!params->prefix[type])
2072 return 0;
2073
8679efde 2074 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
2075 if (!uid_is_valid(uid))
2076 uid = 0;
2077 if (!gid_is_valid(gid))
2078 gid = 0;
2079 }
2080
2081 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d 2082 _cleanup_free_ char *p = NULL, *pp = NULL;
07689d5d 2083
edbfeb12 2084 p = path_join(params->prefix[type], *rt);
3536f49e
YW
2085 if (!p) {
2086 r = -ENOMEM;
2087 goto fail;
2088 }
07689d5d 2089
23a7448e
YW
2090 r = mkdir_parents_label(p, 0755);
2091 if (r < 0)
3536f49e 2092 goto fail;
23a7448e 2093
8092a48c 2094 if (context->dynamic_user &&
40cd2ecc
LP
2095 (!IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) ||
2096 (type == EXEC_DIRECTORY_RUNTIME && context->runtime_directory_preserve_mode != EXEC_PRESERVE_NO))) {
6c9c51e5 2097 _cleanup_free_ char *private_root = NULL;
6c47cd7d 2098
3f5b1508
LP
2099 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that
2100 * case we want to avoid leaving a directory around fully accessible that is owned by
2101 * a dynamic user whose UID is later on reused. To lock this down we use the same
2102 * trick used by container managers to prohibit host users to get access to files of
2103 * the same UID in containers: we place everything inside a directory that has an
2104 * access mode of 0700 and is owned root:root, so that it acts as security boundary
2105 * for unprivileged host code. We then use fs namespacing to make this directory
2106 * permeable for the service itself.
6c47cd7d 2107 *
3f5b1508
LP
2108 * Specifically: for a service which wants a special directory "foo/" we first create
2109 * a directory "private/" with access mode 0700 owned by root:root. Then we place
2110 * "foo" inside of that directory (i.e. "private/foo/"), and make "foo" a symlink to
2111 * "private/foo". This way, privileged host users can access "foo/" as usual, but
2112 * unprivileged host users can't look into it. Inside of the namespace of the unit
2113 * "private/" is replaced by a more liberally accessible tmpfs, into which the host's
2114 * "private/foo/" is mounted under the same name, thus disabling the access boundary
2115 * for the service and making sure it only gets access to the dirs it needs but no
2116 * others. Tricky? Yes, absolutely, but it works!
6c47cd7d 2117 *
3f5b1508
LP
2118 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not
2119 * to be owned by the service itself.
2120 *
2121 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used
2122 * for sharing files or sockets with other services. */
6c47cd7d 2123
edbfeb12 2124 private_root = path_join(params->prefix[type], "private");
6c47cd7d
LP
2125 if (!private_root) {
2126 r = -ENOMEM;
2127 goto fail;
2128 }
2129
2130 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
37c1d5e9 2131 r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
6c47cd7d
LP
2132 if (r < 0)
2133 goto fail;
2134
edbfeb12 2135 pp = path_join(private_root, *rt);
6c47cd7d
LP
2136 if (!pp) {
2137 r = -ENOMEM;
2138 goto fail;
2139 }
2140
2141 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2142 r = mkdir_parents_label(pp, 0755);
2143 if (r < 0)
2144 goto fail;
2145
949befd3
LP
2146 if (is_dir(p, false) > 0 &&
2147 (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
2148
2149 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2150 * it over. Most likely the service has been upgraded from one that didn't use
2151 * DynamicUser=1, to one that does. */
2152
cf52c45d
LP
2153 log_info("Found pre-existing public %s= directory %s, migrating to %s.\n"
2154 "Apparently, service previously had DynamicUser= turned off, and has now turned it on.",
2155 exec_directory_type_to_string(type), p, pp);
2156
949befd3
LP
2157 if (rename(p, pp) < 0) {
2158 r = -errno;
2159 goto fail;
2160 }
2161 } else {
2162 /* Otherwise, create the actual directory for the service */
2163
2164 r = mkdir_label(pp, context->directories[type].mode);
2165 if (r < 0 && r != -EEXIST)
2166 goto fail;
2167 }
6c47cd7d 2168
6c47cd7d 2169 /* And link it up from the original place */
6c9c51e5 2170 r = symlink_idempotent(pp, p, true);
6c47cd7d
LP
2171 if (r < 0)
2172 goto fail;
2173
6c47cd7d 2174 } else {
5c6d40d1
LP
2175 _cleanup_free_ char *target = NULL;
2176
2177 if (type != EXEC_DIRECTORY_CONFIGURATION &&
2178 readlink_and_make_absolute(p, &target) >= 0) {
2179 _cleanup_free_ char *q = NULL;
2180
2181 /* This already exists and is a symlink? Interesting. Maybe it's one created
2193f17c
LP
2182 * by DynamicUser=1 (see above)?
2183 *
2184 * We do this for all directory types except for ConfigurationDirectory=,
2185 * since they all support the private/ symlink logic at least in some
2186 * configurations, see above. */
5c6d40d1
LP
2187
2188 q = path_join(params->prefix[type], "private", *rt);
2189 if (!q) {
2190 r = -ENOMEM;
2191 goto fail;
2192 }
2193
2194 if (path_equal(q, target)) {
2195
2196 /* Hmm, apparently DynamicUser= was once turned on for this service,
2197 * but is no longer. Let's move the directory back up. */
2198
cf52c45d
LP
2199 log_info("Found pre-existing private %s= directory %s, migrating to %s.\n"
2200 "Apparently, service previously had DynamicUser= turned on, and has now turned it off.",
2201 exec_directory_type_to_string(type), q, p);
2202
5c6d40d1
LP
2203 if (unlink(p) < 0) {
2204 r = -errno;
2205 goto fail;
2206 }
2207
2208 if (rename(q, p) < 0) {
2209 r = -errno;
2210 goto fail;
2211 }
2212 }
2213 }
2214
6c47cd7d 2215 r = mkdir_label(p, context->directories[type].mode);
d484580c 2216 if (r < 0) {
d484580c
LP
2217 if (r != -EEXIST)
2218 goto fail;
2219
206e9864
LP
2220 if (type == EXEC_DIRECTORY_CONFIGURATION) {
2221 struct stat st;
2222
2223 /* Don't change the owner/access mode of the configuration directory,
2224 * as in the common case it is not written to by a service, and shall
2225 * not be writable. */
2226
2227 if (stat(p, &st) < 0) {
2228 r = -errno;
2229 goto fail;
2230 }
2231
2232 /* Still complain if the access mode doesn't match */
2233 if (((st.st_mode ^ context->directories[type].mode) & 07777) != 0)
2234 log_warning("%s \'%s\' already exists but the mode is different. "
2235 "(File system: %o %sMode: %o)",
2236 exec_directory_type_to_string(type), *rt,
2237 st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
2238
6cff72eb 2239 continue;
206e9864 2240 }
6cff72eb 2241 }
a1164ae3 2242 }
07689d5d 2243
206e9864 2244 /* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
5238e957 2245 * specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
206e9864
LP
2246 * current UID/GID ownership.) */
2247 r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
2248 if (r < 0)
2249 goto fail;
c71b2eb7 2250
607b358e
LP
2251 /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
2252 * drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
2253 * assignments to exist.*/
2254 r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777);
07689d5d 2255 if (r < 0)
3536f49e 2256 goto fail;
07689d5d
LP
2257 }
2258
2259 return 0;
3536f49e
YW
2260
2261fail:
2262 *exit_status = exit_status_table[type];
3536f49e 2263 return r;
07689d5d
LP
2264}
2265
92b423b9 2266#if ENABLE_SMACK
cefc33ae
LP
2267static int setup_smack(
2268 const ExecContext *context,
2269 const ExecCommand *command) {
2270
cefc33ae
LP
2271 int r;
2272
2273 assert(context);
2274 assert(command);
2275
cefc33ae
LP
2276 if (context->smack_process_label) {
2277 r = mac_smack_apply_pid(0, context->smack_process_label);
2278 if (r < 0)
2279 return r;
2280 }
2281#ifdef SMACK_DEFAULT_PROCESS_LABEL
2282 else {
2283 _cleanup_free_ char *exec_label = NULL;
2284
2285 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2286 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2287 return r;
2288
2289 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2290 if (r < 0)
2291 return r;
2292 }
cefc33ae
LP
2293#endif
2294
2295 return 0;
2296}
92b423b9 2297#endif
cefc33ae 2298
6c47cd7d
LP
2299static int compile_bind_mounts(
2300 const ExecContext *context,
2301 const ExecParameters *params,
2302 BindMount **ret_bind_mounts,
da6053d0 2303 size_t *ret_n_bind_mounts,
6c47cd7d
LP
2304 char ***ret_empty_directories) {
2305
2306 _cleanup_strv_free_ char **empty_directories = NULL;
2307 BindMount *bind_mounts;
da6053d0 2308 size_t n, h = 0, i;
6c47cd7d
LP
2309 ExecDirectoryType t;
2310 int r;
2311
2312 assert(context);
2313 assert(params);
2314 assert(ret_bind_mounts);
2315 assert(ret_n_bind_mounts);
2316 assert(ret_empty_directories);
2317
2318 n = context->n_bind_mounts;
2319 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2320 if (!params->prefix[t])
2321 continue;
2322
2323 n += strv_length(context->directories[t].paths);
2324 }
2325
2326 if (n <= 0) {
2327 *ret_bind_mounts = NULL;
2328 *ret_n_bind_mounts = 0;
2329 *ret_empty_directories = NULL;
2330 return 0;
2331 }
2332
2333 bind_mounts = new(BindMount, n);
2334 if (!bind_mounts)
2335 return -ENOMEM;
2336
a8cabc61 2337 for (i = 0; i < context->n_bind_mounts; i++) {
6c47cd7d
LP
2338 BindMount *item = context->bind_mounts + i;
2339 char *s, *d;
2340
2341 s = strdup(item->source);
2342 if (!s) {
2343 r = -ENOMEM;
2344 goto finish;
2345 }
2346
2347 d = strdup(item->destination);
2348 if (!d) {
2349 free(s);
2350 r = -ENOMEM;
2351 goto finish;
2352 }
2353
2354 bind_mounts[h++] = (BindMount) {
2355 .source = s,
2356 .destination = d,
2357 .read_only = item->read_only,
2358 .recursive = item->recursive,
2359 .ignore_enoent = item->ignore_enoent,
2360 };
2361 }
2362
2363 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2364 char **suffix;
2365
2366 if (!params->prefix[t])
2367 continue;
2368
2369 if (strv_isempty(context->directories[t].paths))
2370 continue;
2371
8092a48c 2372 if (context->dynamic_user &&
5609f688
YW
2373 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2374 !(context->root_directory || context->root_image)) {
6c47cd7d
LP
2375 char *private_root;
2376
2377 /* So this is for a dynamic user, and we need to make sure the process can access its own
2378 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2379 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2380
657ee2d8 2381 private_root = path_join(params->prefix[t], "private");
6c47cd7d
LP
2382 if (!private_root) {
2383 r = -ENOMEM;
2384 goto finish;
2385 }
2386
2387 r = strv_consume(&empty_directories, private_root);
a635a7ae 2388 if (r < 0)
6c47cd7d 2389 goto finish;
6c47cd7d
LP
2390 }
2391
2392 STRV_FOREACH(suffix, context->directories[t].paths) {
2393 char *s, *d;
2394
8092a48c
YW
2395 if (context->dynamic_user &&
2396 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
657ee2d8 2397 s = path_join(params->prefix[t], "private", *suffix);
6c47cd7d 2398 else
657ee2d8 2399 s = path_join(params->prefix[t], *suffix);
6c47cd7d
LP
2400 if (!s) {
2401 r = -ENOMEM;
2402 goto finish;
2403 }
2404
5609f688
YW
2405 if (context->dynamic_user &&
2406 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2407 (context->root_directory || context->root_image))
2408 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2409 * directory is not created on the root directory. So, let's bind-mount the directory
2410 * on the 'non-private' place. */
657ee2d8 2411 d = path_join(params->prefix[t], *suffix);
5609f688
YW
2412 else
2413 d = strdup(s);
6c47cd7d
LP
2414 if (!d) {
2415 free(s);
2416 r = -ENOMEM;
2417 goto finish;
2418 }
2419
2420 bind_mounts[h++] = (BindMount) {
2421 .source = s,
2422 .destination = d,
2423 .read_only = false,
9ce4e4b0 2424 .nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
6c47cd7d
LP
2425 .recursive = true,
2426 .ignore_enoent = false,
2427 };
2428 }
2429 }
2430
2431 assert(h == n);
2432
2433 *ret_bind_mounts = bind_mounts;
2434 *ret_n_bind_mounts = n;
ae2a15bc 2435 *ret_empty_directories = TAKE_PTR(empty_directories);
6c47cd7d
LP
2436
2437 return (int) n;
2438
2439finish:
2440 bind_mount_free_many(bind_mounts, h);
2441 return r;
2442}
2443
6818c54c 2444static int apply_mount_namespace(
34cf6c43
YW
2445 const Unit *u,
2446 const ExecCommand *command,
6818c54c
LP
2447 const ExecContext *context,
2448 const ExecParameters *params,
7cc5ef5f
ZJS
2449 const ExecRuntime *runtime,
2450 char **error_path) {
6818c54c 2451
7bcef4ef 2452 _cleanup_strv_free_ char **empty_directories = NULL;
93c6bb51 2453 char *tmp = NULL, *var = NULL;
915e6d16 2454 const char *root_dir = NULL, *root_image = NULL;
228af36f 2455 NamespaceInfo ns_info;
165a31c0 2456 bool needs_sandboxing;
6c47cd7d 2457 BindMount *bind_mounts = NULL;
da6053d0 2458 size_t n_bind_mounts = 0;
6818c54c 2459 int r;
93c6bb51 2460
2b3c1b9e
DH
2461 assert(context);
2462
93c6bb51
DH
2463 /* The runtime struct only contains the parent of the private /tmp,
2464 * which is non-accessible to world users. Inside of it there's a /tmp
2465 * that is sticky, and that's the one we want to use here. */
2466
2467 if (context->private_tmp && runtime) {
2468 if (runtime->tmp_dir)
2469 tmp = strjoina(runtime->tmp_dir, "/tmp");
2470 if (runtime->var_tmp_dir)
2471 var = strjoina(runtime->var_tmp_dir, "/tmp");
2472 }
2473
915e6d16
LP
2474 if (params->flags & EXEC_APPLY_CHROOT) {
2475 root_image = context->root_image;
2476
2477 if (!root_image)
2478 root_dir = context->root_directory;
2479 }
93c6bb51 2480
6c47cd7d
LP
2481 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2482 if (r < 0)
2483 return r;
2484
165a31c0 2485 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
b5a33299
YW
2486 if (needs_sandboxing)
2487 ns_info = (NamespaceInfo) {
2488 .ignore_protect_paths = false,
2489 .private_dev = context->private_devices,
2490 .protect_control_groups = context->protect_control_groups,
2491 .protect_kernel_tunables = context->protect_kernel_tunables,
2492 .protect_kernel_modules = context->protect_kernel_modules,
aecd5ac6 2493 .protect_hostname = context->protect_hostname,
b5a33299 2494 .mount_apivfs = context->mount_apivfs,
228af36f 2495 .private_mounts = context->private_mounts,
b5a33299 2496 };
228af36f
LP
2497 else if (!context->dynamic_user && root_dir)
2498 /*
2499 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2500 * sandbox info, otherwise enforce it, don't ignore protected paths and
2501 * fail if we are enable to apply the sandbox inside the mount namespace.
2502 */
2503 ns_info = (NamespaceInfo) {
2504 .ignore_protect_paths = true,
2505 };
2506 else
2507 ns_info = (NamespaceInfo) {};
b5a33299 2508
37ed15d7
FB
2509 if (context->mount_flags == MS_SHARED)
2510 log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
2511
915e6d16 2512 r = setup_namespace(root_dir, root_image,
7bcef4ef 2513 &ns_info, context->read_write_paths,
165a31c0
LP
2514 needs_sandboxing ? context->read_only_paths : NULL,
2515 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2516 empty_directories,
2517 bind_mounts,
2518 n_bind_mounts,
2abd4e38
YW
2519 context->temporary_filesystems,
2520 context->n_temporary_filesystems,
93c6bb51
DH
2521 tmp,
2522 var,
165a31c0
LP
2523 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2524 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16 2525 context->mount_flags,
7cc5ef5f
ZJS
2526 DISSECT_IMAGE_DISCARD_ON_LOOP,
2527 error_path);
93c6bb51 2528
6c47cd7d
LP
2529 bind_mount_free_many(bind_mounts, n_bind_mounts);
2530
1beab8b0 2531 /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
5238e957 2532 * that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively
1beab8b0
LP
2533 * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
2534 * completely different execution environment. */
aca835ed
YW
2535 if (r == -ENOANO) {
2536 if (n_bind_mounts == 0 &&
2537 context->n_temporary_filesystems == 0 &&
2538 !root_dir && !root_image &&
2539 !context->dynamic_user) {
2540 log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
2541 return 0;
2542 }
2543
2194547e
LP
2544 log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
2545 "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
2546 n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
2547
aca835ed 2548 return -EOPNOTSUPP;
93c6bb51
DH
2549 }
2550
2551 return r;
2552}
2553
915e6d16
LP
2554static int apply_working_directory(
2555 const ExecContext *context,
2556 const ExecParameters *params,
2557 const char *home,
376fecf6 2558 int *exit_status) {
915e6d16 2559
6732edab 2560 const char *d, *wd;
2b3c1b9e
DH
2561
2562 assert(context);
376fecf6 2563 assert(exit_status);
2b3c1b9e 2564
6732edab
LP
2565 if (context->working_directory_home) {
2566
376fecf6
LP
2567 if (!home) {
2568 *exit_status = EXIT_CHDIR;
6732edab 2569 return -ENXIO;
376fecf6 2570 }
6732edab 2571
2b3c1b9e 2572 wd = home;
6732edab
LP
2573
2574 } else if (context->working_directory)
2b3c1b9e
DH
2575 wd = context->working_directory;
2576 else
2577 wd = "/";
e7f1e7c6 2578
fa97f630 2579 if (params->flags & EXEC_APPLY_CHROOT)
2b3c1b9e 2580 d = wd;
fa97f630 2581 else
3b0e5bb5 2582 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2583
376fecf6
LP
2584 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2585 *exit_status = EXIT_CHDIR;
2b3c1b9e 2586 return -errno;
376fecf6 2587 }
e7f1e7c6
DH
2588
2589 return 0;
2590}
2591
fa97f630
JB
2592static int apply_root_directory(
2593 const ExecContext *context,
2594 const ExecParameters *params,
2595 const bool needs_mount_ns,
2596 int *exit_status) {
2597
2598 assert(context);
2599 assert(exit_status);
2600
2601 if (params->flags & EXEC_APPLY_CHROOT) {
2602 if (!needs_mount_ns && context->root_directory)
2603 if (chroot(context->root_directory) < 0) {
2604 *exit_status = EXIT_CHROOT;
2605 return -errno;
2606 }
2607 }
2608
2609 return 0;
2610}
2611
b1edf445 2612static int setup_keyring(
34cf6c43 2613 const Unit *u,
b1edf445
LP
2614 const ExecContext *context,
2615 const ExecParameters *p,
2616 uid_t uid, gid_t gid) {
2617
74dd6b51 2618 key_serial_t keyring;
e64c2d0b
DJL
2619 int r = 0;
2620 uid_t saved_uid;
2621 gid_t saved_gid;
74dd6b51
LP
2622
2623 assert(u);
b1edf445 2624 assert(context);
74dd6b51
LP
2625 assert(p);
2626
2627 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2628 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2629 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2630 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2631 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2632 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2633
b1edf445
LP
2634 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2635 return 0;
2636
e64c2d0b
DJL
2637 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2638 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2639 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2640 * & group is just as nasty as acquiring a reference to the user keyring. */
2641
2642 saved_uid = getuid();
2643 saved_gid = getgid();
2644
2645 if (gid_is_valid(gid) && gid != saved_gid) {
2646 if (setregid(gid, -1) < 0)
2647 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2648 }
2649
2650 if (uid_is_valid(uid) && uid != saved_uid) {
2651 if (setreuid(uid, -1) < 0) {
2652 r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2653 goto out;
2654 }
2655 }
2656
74dd6b51
LP
2657 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2658 if (keyring == -1) {
2659 if (errno == ENOSYS)
8002fb97 2660 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2661 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2662 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2663 else if (errno == EDQUOT)
8002fb97 2664 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2665 else
e64c2d0b 2666 r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51 2667
e64c2d0b 2668 goto out;
74dd6b51
LP
2669 }
2670
e64c2d0b
DJL
2671 /* When requested link the user keyring into the session keyring. */
2672 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2673
2674 if (keyctl(KEYCTL_LINK,
2675 KEY_SPEC_USER_KEYRING,
2676 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2677 r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
2678 goto out;
2679 }
2680 }
2681
2682 /* Restore uid/gid back */
2683 if (uid_is_valid(uid) && uid != saved_uid) {
2684 if (setreuid(saved_uid, -1) < 0) {
2685 r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2686 goto out;
2687 }
2688 }
2689
2690 if (gid_is_valid(gid) && gid != saved_gid) {
2691 if (setregid(saved_gid, -1) < 0)
2692 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2693 }
2694
2695 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
b3415f5d
LP
2696 if (!sd_id128_is_null(u->invocation_id)) {
2697 key_serial_t key;
2698
2699 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2700 if (key == -1)
8002fb97 2701 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2702 else {
2703 if (keyctl(KEYCTL_SETPERM, key,
2704 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2705 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
e64c2d0b 2706 r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2707 }
2708 }
2709
e64c2d0b
DJL
2710out:
2711 /* Revert back uid & gid for the the last time, and exit */
2712 /* no extra logging, as only the first already reported error matters */
2713 if (getuid() != saved_uid)
2714 (void) setreuid(saved_uid, -1);
b1edf445 2715
e64c2d0b
DJL
2716 if (getgid() != saved_gid)
2717 (void) setregid(saved_gid, -1);
b1edf445 2718
e64c2d0b 2719 return r;
74dd6b51
LP
2720}
2721
3042bbeb 2722static void append_socket_pair(int *array, size_t *n, const int pair[static 2]) {
29206d46
LP
2723 assert(array);
2724 assert(n);
2caa38e9 2725 assert(pair);
29206d46
LP
2726
2727 if (pair[0] >= 0)
2728 array[(*n)++] = pair[0];
2729 if (pair[1] >= 0)
2730 array[(*n)++] = pair[1];
2731}
2732
a34ceba6
LP
2733static int close_remaining_fds(
2734 const ExecParameters *params,
34cf6c43
YW
2735 const ExecRuntime *runtime,
2736 const DynamicCreds *dcreds,
00d9ef85 2737 int user_lookup_fd,
a34ceba6 2738 int socket_fd,
5686391b 2739 int exec_fd,
da6053d0 2740 int *fds, size_t n_fds) {
a34ceba6 2741
da6053d0 2742 size_t n_dont_close = 0;
00d9ef85 2743 int dont_close[n_fds + 12];
a34ceba6
LP
2744
2745 assert(params);
2746
2747 if (params->stdin_fd >= 0)
2748 dont_close[n_dont_close++] = params->stdin_fd;
2749 if (params->stdout_fd >= 0)
2750 dont_close[n_dont_close++] = params->stdout_fd;
2751 if (params->stderr_fd >= 0)
2752 dont_close[n_dont_close++] = params->stderr_fd;
2753
2754 if (socket_fd >= 0)
2755 dont_close[n_dont_close++] = socket_fd;
5686391b
LP
2756 if (exec_fd >= 0)
2757 dont_close[n_dont_close++] = exec_fd;
a34ceba6
LP
2758 if (n_fds > 0) {
2759 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2760 n_dont_close += n_fds;
2761 }
2762
29206d46
LP
2763 if (runtime)
2764 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2765
2766 if (dcreds) {
2767 if (dcreds->user)
2768 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2769 if (dcreds->group)
2770 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2771 }
2772
00d9ef85
LP
2773 if (user_lookup_fd >= 0)
2774 dont_close[n_dont_close++] = user_lookup_fd;
2775
a34ceba6
LP
2776 return close_all_fds(dont_close, n_dont_close);
2777}
2778
00d9ef85
LP
2779static int send_user_lookup(
2780 Unit *unit,
2781 int user_lookup_fd,
2782 uid_t uid,
2783 gid_t gid) {
2784
2785 assert(unit);
2786
2787 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2788 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2789 * specified. */
2790
2791 if (user_lookup_fd < 0)
2792 return 0;
2793
2794 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2795 return 0;
2796
2797 if (writev(user_lookup_fd,
2798 (struct iovec[]) {
e6a7ec4b
LP
2799 IOVEC_INIT(&uid, sizeof(uid)),
2800 IOVEC_INIT(&gid, sizeof(gid)),
2801 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2802 return -errno;
2803
2804 return 0;
2805}
2806
6732edab
LP
2807static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2808 int r;
2809
2810 assert(c);
2811 assert(home);
2812 assert(buf);
2813
2814 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2815
2816 if (*home)
2817 return 0;
2818
2819 if (!c->working_directory_home)
2820 return 0;
2821
6732edab
LP
2822 r = get_home_dir(buf);
2823 if (r < 0)
2824 return r;
2825
2826 *home = *buf;
2827 return 1;
2828}
2829
da50b85a
LP
2830static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2831 _cleanup_strv_free_ char ** list = NULL;
2832 ExecDirectoryType t;
2833 int r;
2834
2835 assert(c);
2836 assert(p);
2837 assert(ret);
2838
2839 assert(c->dynamic_user);
2840
2841 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2842 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2843 * directories. */
2844
2845 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2846 char **i;
2847
2848 if (t == EXEC_DIRECTORY_CONFIGURATION)
2849 continue;
2850
2851 if (!p->prefix[t])
2852 continue;
2853
2854 STRV_FOREACH(i, c->directories[t].paths) {
2855 char *e;
2856
8092a48c 2857 if (t == EXEC_DIRECTORY_RUNTIME)
657ee2d8 2858 e = path_join(p->prefix[t], *i);
8092a48c 2859 else
657ee2d8 2860 e = path_join(p->prefix[t], "private", *i);
da50b85a
LP
2861 if (!e)
2862 return -ENOMEM;
2863
2864 r = strv_consume(&list, e);
2865 if (r < 0)
2866 return r;
2867 }
2868 }
2869
ae2a15bc 2870 *ret = TAKE_PTR(list);
da50b85a
LP
2871
2872 return 0;
2873}
2874
34cf6c43
YW
2875static char *exec_command_line(char **argv);
2876
78f93209
LP
2877static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
2878 bool using_subcgroup;
2879 char *p;
2880
2881 assert(params);
2882 assert(ret);
2883
2884 if (!params->cgroup_path)
2885 return -EINVAL;
2886
2887 /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
2888 * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
2889 * processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
2890 * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
2891 * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
2892 * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
2893 * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
2894 * flag, which is only passed for the former statements, not for the latter. */
2895
2896 using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
2897 if (using_subcgroup)
657ee2d8 2898 p = path_join(params->cgroup_path, ".control");
78f93209
LP
2899 else
2900 p = strdup(params->cgroup_path);
2901 if (!p)
2902 return -ENOMEM;
2903
2904 *ret = p;
2905 return using_subcgroup;
2906}
2907
ff0af2a1 2908static int exec_child(
f2341e0a 2909 Unit *unit,
34cf6c43 2910 const ExecCommand *command,
ff0af2a1
LP
2911 const ExecContext *context,
2912 const ExecParameters *params,
2913 ExecRuntime *runtime,
29206d46 2914 DynamicCreds *dcreds,
ff0af2a1 2915 int socket_fd,
2caa38e9 2916 const int named_iofds[static 3],
4c47affc 2917 int *fds,
da6053d0 2918 size_t n_socket_fds,
25b583d7 2919 size_t n_storage_fds,
ff0af2a1 2920 char **files_env,
00d9ef85 2921 int user_lookup_fd,
12145637 2922 int *exit_status) {
d35fbf6b 2923
7ca69792 2924 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **replaced_argv = NULL;
5686391b 2925 int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
4d885bd3
DH
2926 _cleanup_free_ gid_t *supplementary_gids = NULL;
2927 const char *username = NULL, *groupname = NULL;
5686391b 2928 _cleanup_free_ char *home_buffer = NULL;
2b3c1b9e 2929 const char *home = NULL, *shell = NULL;
7ca69792 2930 char **final_argv = NULL;
7bce046b
LP
2931 dev_t journal_stream_dev = 0;
2932 ino_t journal_stream_ino = 0;
165a31c0
LP
2933 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2934 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2935 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2936 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 2937#if HAVE_SELINUX
7f59dd35 2938 _cleanup_free_ char *mac_selinux_context_net = NULL;
43b1f709 2939 bool use_selinux = false;
ecfbc84f 2940#endif
f9fa32f0 2941#if ENABLE_SMACK
43b1f709 2942 bool use_smack = false;
ecfbc84f 2943#endif
349cc4a5 2944#if HAVE_APPARMOR
43b1f709 2945 bool use_apparmor = false;
ecfbc84f 2946#endif
fed1e721
LP
2947 uid_t uid = UID_INVALID;
2948 gid_t gid = GID_INVALID;
da6053d0 2949 size_t n_fds;
3536f49e 2950 ExecDirectoryType dt;
165a31c0 2951 int secure_bits;
034c6ed7 2952
f2341e0a 2953 assert(unit);
5cb5a6ff
LP
2954 assert(command);
2955 assert(context);
d35fbf6b 2956 assert(params);
ff0af2a1 2957 assert(exit_status);
d35fbf6b
DM
2958
2959 rename_process_from_path(command->path);
2960
2961 /* We reset exactly these signals, since they are the
2962 * only ones we set to SIG_IGN in the main daemon. All
2963 * others we leave untouched because we set them to
2964 * SIG_DFL or a valid handler initially, both of which
2965 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2966 (void) default_signals(SIGNALS_CRASH_HANDLER,
2967 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2968
2969 if (context->ignore_sigpipe)
ce30c8dc 2970 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2971
ff0af2a1
LP
2972 r = reset_signal_mask();
2973 if (r < 0) {
2974 *exit_status = EXIT_SIGNAL_MASK;
12145637 2975 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 2976 }
034c6ed7 2977
d35fbf6b
DM
2978 if (params->idle_pipe)
2979 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2980
2c027c62
LP
2981 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2982 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2983 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2984 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 2985
d35fbf6b 2986 log_forget_fds();
2c027c62 2987 log_set_open_when_needed(true);
4f2d528d 2988
40a80078
LP
2989 /* In case anything used libc syslog(), close this here, too */
2990 closelog();
2991
5686391b
LP
2992 n_fds = n_socket_fds + n_storage_fds;
2993 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
ff0af2a1
LP
2994 if (r < 0) {
2995 *exit_status = EXIT_FDS;
12145637 2996 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
2997 }
2998
d35fbf6b
DM
2999 if (!context->same_pgrp)
3000 if (setsid() < 0) {
ff0af2a1 3001 *exit_status = EXIT_SETSID;
12145637 3002 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 3003 }
9e2f7c11 3004
1e22b5cd 3005 exec_context_tty_reset(context, params);
d35fbf6b 3006
c891efaf 3007 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 3008 const char *vc = params->confirm_spawn;
3b20f877
FB
3009 _cleanup_free_ char *cmdline = NULL;
3010
ee39ca20 3011 cmdline = exec_command_line(command->argv);
3b20f877 3012 if (!cmdline) {
0460aa5c 3013 *exit_status = EXIT_MEMORY;
12145637 3014 return log_oom();
3b20f877 3015 }
d35fbf6b 3016
eedf223a 3017 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
3018 if (r != CONFIRM_EXECUTE) {
3019 if (r == CONFIRM_PRETEND_SUCCESS) {
3020 *exit_status = EXIT_SUCCESS;
3021 return 0;
3022 }
ff0af2a1 3023 *exit_status = EXIT_CONFIRM;
12145637 3024 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 3025 return -ECANCELED;
d35fbf6b
DM
3026 }
3027 }
1a63a750 3028
d521916d
LP
3029 /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
3030 * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
3031 * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
3032 * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
3033 * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
3034 if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
3035 setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
3036 *exit_status = EXIT_MEMORY;
3037 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
3038 }
3039
29206d46 3040 if (context->dynamic_user && dcreds) {
da50b85a 3041 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 3042
d521916d
LP
3043 /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
3044 * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
409093fe
LP
3045 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
3046 *exit_status = EXIT_USER;
12145637 3047 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
3048 }
3049
da50b85a
LP
3050 r = compile_suggested_paths(context, params, &suggested_paths);
3051 if (r < 0) {
3052 *exit_status = EXIT_MEMORY;
3053 return log_oom();
3054 }
3055
3056 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
3057 if (r < 0) {
3058 *exit_status = EXIT_USER;
e2b0cc34
YW
3059 if (r == -EILSEQ) {
3060 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
3061 return -EOPNOTSUPP;
3062 }
12145637 3063 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 3064 }
524daa8c 3065
70dd455c 3066 if (!uid_is_valid(uid)) {
29206d46 3067 *exit_status = EXIT_USER;
12145637 3068 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
3069 return -ESRCH;
3070 }
3071
3072 if (!gid_is_valid(gid)) {
3073 *exit_status = EXIT_USER;
12145637 3074 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
3075 return -ESRCH;
3076 }
5bc7452b 3077
29206d46
LP
3078 if (dcreds->user)
3079 username = dcreds->user->name;
3080
3081 } else {
4d885bd3
DH
3082 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
3083 if (r < 0) {
3084 *exit_status = EXIT_USER;
12145637 3085 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 3086 }
5bc7452b 3087
4d885bd3
DH
3088 r = get_fixed_group(context, &groupname, &gid);
3089 if (r < 0) {
3090 *exit_status = EXIT_GROUP;
12145637 3091 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 3092 }
cdc5d5c5 3093 }
29206d46 3094
cdc5d5c5
DH
3095 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
3096 r = get_supplementary_groups(context, username, groupname, gid,
3097 &supplementary_gids, &ngids);
3098 if (r < 0) {
3099 *exit_status = EXIT_GROUP;
12145637 3100 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 3101 }
5bc7452b 3102
00d9ef85
LP
3103 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
3104 if (r < 0) {
3105 *exit_status = EXIT_USER;
12145637 3106 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
3107 }
3108
3109 user_lookup_fd = safe_close(user_lookup_fd);
3110
6732edab
LP
3111 r = acquire_home(context, uid, &home, &home_buffer);
3112 if (r < 0) {
3113 *exit_status = EXIT_CHDIR;
12145637 3114 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
3115 }
3116
d35fbf6b
DM
3117 /* If a socket is connected to STDIN/STDOUT/STDERR, we
3118 * must sure to drop O_NONBLOCK */
3119 if (socket_fd >= 0)
a34ceba6 3120 (void) fd_nonblock(socket_fd, false);
acbb0225 3121
4c70a4a7
MS
3122 /* Journald will try to look-up our cgroup in order to populate _SYSTEMD_CGROUP and _SYSTEMD_UNIT fields.
3123 * Hence we need to migrate to the target cgroup from init.scope before connecting to journald */
3124 if (params->cgroup_path) {
3125 _cleanup_free_ char *p = NULL;
3126
3127 r = exec_parameters_get_cgroup_path(params, &p);
3128 if (r < 0) {
3129 *exit_status = EXIT_CGROUP;
3130 return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
3131 }
3132
3133 r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
3134 if (r < 0) {
3135 *exit_status = EXIT_CGROUP;
3136 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
3137 }
3138 }
3139
a8d08f39
LP
3140 if (context->network_namespace_path && runtime && runtime->netns_storage_socket[0] >= 0) {
3141 r = open_netns_path(runtime->netns_storage_socket, context->network_namespace_path);
3142 if (r < 0) {
3143 *exit_status = EXIT_NETWORK;
3144 return log_unit_error_errno(unit, r, "Failed to open network namespace path %s: %m", context->network_namespace_path);
3145 }
3146 }
3147
52c239d7 3148 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
3149 if (r < 0) {
3150 *exit_status = EXIT_STDIN;
12145637 3151 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 3152 }
034c6ed7 3153
52c239d7 3154 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3155 if (r < 0) {
3156 *exit_status = EXIT_STDOUT;
12145637 3157 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
3158 }
3159
52c239d7 3160 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3161 if (r < 0) {
3162 *exit_status = EXIT_STDERR;
12145637 3163 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
3164 }
3165
d35fbf6b 3166 if (context->oom_score_adjust_set) {
9f8168eb
LP
3167 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
3168 * prohibit write access to this file, and we shouldn't trip up over that. */
3169 r = set_oom_score_adjust(context->oom_score_adjust);
12145637 3170 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 3171 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 3172 else if (r < 0) {
ff0af2a1 3173 *exit_status = EXIT_OOM_ADJUST;
12145637 3174 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 3175 }
d35fbf6b
DM
3176 }
3177
3178 if (context->nice_set)
3179 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 3180 *exit_status = EXIT_NICE;
12145637 3181 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
613b411c
LP
3182 }
3183
d35fbf6b
DM
3184 if (context->cpu_sched_set) {
3185 struct sched_param param = {
3186 .sched_priority = context->cpu_sched_priority,
3187 };
3188
ff0af2a1
LP
3189 r = sched_setscheduler(0,
3190 context->cpu_sched_policy |
3191 (context->cpu_sched_reset_on_fork ?
3192 SCHED_RESET_ON_FORK : 0),
3193 &param);
3194 if (r < 0) {
3195 *exit_status = EXIT_SETSCHEDULER;
12145637 3196 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 3197 }
d35fbf6b 3198 }
fc9b2a84 3199
0985c7c4
ZJS
3200 if (context->cpu_set.set)
3201 if (sched_setaffinity(0, context->cpu_set.allocated, context->cpu_set.set) < 0) {
ff0af2a1 3202 *exit_status = EXIT_CPUAFFINITY;
12145637 3203 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7
LP
3204 }
3205
b070c7c0
MS
3206 if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
3207 r = apply_numa_policy(&context->numa_policy);
3208 if (r == -EOPNOTSUPP)
33fe9e3f 3209 log_unit_debug_errno(unit, r, "NUMA support not available, ignoring.");
b070c7c0
MS
3210 else if (r < 0) {
3211 *exit_status = EXIT_NUMA_POLICY;
3212 return log_unit_error_errno(unit, r, "Failed to set NUMA memory policy: %m");
3213 }
3214 }
3215
d35fbf6b
DM
3216 if (context->ioprio_set)
3217 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 3218 *exit_status = EXIT_IOPRIO;
12145637 3219 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 3220 }
da726a4d 3221
d35fbf6b
DM
3222 if (context->timer_slack_nsec != NSEC_INFINITY)
3223 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 3224 *exit_status = EXIT_TIMERSLACK;
12145637 3225 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 3226 }
9eba9da4 3227
21022b9d
LP
3228 if (context->personality != PERSONALITY_INVALID) {
3229 r = safe_personality(context->personality);
3230 if (r < 0) {
ff0af2a1 3231 *exit_status = EXIT_PERSONALITY;
12145637 3232 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 3233 }
21022b9d 3234 }
94f04347 3235
d35fbf6b 3236 if (context->utmp_id)
df0ff127 3237 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 3238 context->tty_path,
023a4f67
LP
3239 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3240 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3241 USER_PROCESS,
6a93917d 3242 username);
d35fbf6b 3243
08f67696 3244 if (uid_is_valid(uid)) {
ff0af2a1
LP
3245 r = chown_terminal(STDIN_FILENO, uid);
3246 if (r < 0) {
3247 *exit_status = EXIT_STDIN;
12145637 3248 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 3249 }
d35fbf6b 3250 }
8e274523 3251
4e1dfa45 3252 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
62b9bb26 3253 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
4e1dfa45 3254 * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
62b9bb26 3255 * touch a single hierarchy too. */
584b8688 3256 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
62b9bb26 3257 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
ff0af2a1
LP
3258 if (r < 0) {
3259 *exit_status = EXIT_CGROUP;
12145637 3260 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 3261 }
d35fbf6b 3262 }
034c6ed7 3263
72fd1768 3264 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 3265 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
3266 if (r < 0)
3267 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 3268 }
94f04347 3269
7bce046b 3270 r = build_environment(
fd63e712 3271 unit,
7bce046b
LP
3272 context,
3273 params,
3274 n_fds,
3275 home,
3276 username,
3277 shell,
3278 journal_stream_dev,
3279 journal_stream_ino,
3280 &our_env);
2065ca69
JW
3281 if (r < 0) {
3282 *exit_status = EXIT_MEMORY;
12145637 3283 return log_oom();
2065ca69
JW
3284 }
3285
3286 r = build_pass_environment(context, &pass_env);
3287 if (r < 0) {
3288 *exit_status = EXIT_MEMORY;
12145637 3289 return log_oom();
2065ca69
JW
3290 }
3291
3292 accum_env = strv_env_merge(5,
3293 params->environment,
3294 our_env,
3295 pass_env,
3296 context->environment,
3297 files_env,
3298 NULL);
3299 if (!accum_env) {
3300 *exit_status = EXIT_MEMORY;
12145637 3301 return log_oom();
2065ca69 3302 }
1280503b 3303 accum_env = strv_env_clean(accum_env);
2065ca69 3304
096424d1 3305 (void) umask(context->umask);
b213e1c1 3306
b1edf445 3307 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
3308 if (r < 0) {
3309 *exit_status = EXIT_KEYRING;
12145637 3310 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
3311 }
3312
165a31c0 3313 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 3314 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 3315
165a31c0
LP
3316 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3317 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 3318
165a31c0
LP
3319 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3320 if (needs_ambient_hack)
3321 needs_setuid = false;
3322 else
3323 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3324
3325 if (needs_sandboxing) {
7f18ef0a
FK
3326 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3327 * present. The actual MAC context application will happen later, as late as possible, to avoid
3328 * impacting our own code paths. */
3329
349cc4a5 3330#if HAVE_SELINUX
43b1f709 3331 use_selinux = mac_selinux_use();
7f18ef0a 3332#endif
f9fa32f0 3333#if ENABLE_SMACK
43b1f709 3334 use_smack = mac_smack_use();
7f18ef0a 3335#endif
349cc4a5 3336#if HAVE_APPARMOR
43b1f709 3337 use_apparmor = mac_apparmor_use();
7f18ef0a 3338#endif
165a31c0 3339 }
7f18ef0a 3340
ce932d2d
LP
3341 if (needs_sandboxing) {
3342 int which_failed;
3343
3344 /* Let's set the resource limits before we call into PAM, so that pam_limits wins over what
3345 * is set here. (See below.) */
3346
3347 r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
3348 if (r < 0) {
3349 *exit_status = EXIT_LIMITS;
3350 return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
3351 }
3352 }
3353
165a31c0 3354 if (needs_setuid) {
ce932d2d
LP
3355
3356 /* Let's call into PAM after we set up our own idea of resource limits to that pam_limits
3357 * wins here. (See above.) */
3358
165a31c0
LP
3359 if (context->pam_name && username) {
3360 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3361 if (r < 0) {
3362 *exit_status = EXIT_PAM;
12145637 3363 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0
LP
3364 }
3365 }
b213e1c1 3366 }
ac45f971 3367
a8d08f39
LP
3368 if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
3369
6e2d7c4f
MS
3370 if (ns_type_supported(NAMESPACE_NET)) {
3371 r = setup_netns(runtime->netns_storage_socket);
3372 if (r < 0) {
3373 *exit_status = EXIT_NETWORK;
3374 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3375 }
a8d08f39
LP
3376 } else if (context->network_namespace_path) {
3377 *exit_status = EXIT_NETWORK;
3378 return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP), "NetworkNamespacePath= is not supported, refusing.");
6e2d7c4f
MS
3379 } else
3380 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3381 }
169c1bda 3382
ee818b89 3383 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3384 if (needs_mount_namespace) {
7cc5ef5f
ZJS
3385 _cleanup_free_ char *error_path = NULL;
3386
3387 r = apply_mount_namespace(unit, command, context, params, runtime, &error_path);
3fbe8dbe
LP
3388 if (r < 0) {
3389 *exit_status = EXIT_NAMESPACE;
7cc5ef5f
ZJS
3390 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m",
3391 error_path ? ": " : "", strempty(error_path));
3fbe8dbe 3392 }
d35fbf6b 3393 }
81a2b7ce 3394
aecd5ac6
TM
3395 if (context->protect_hostname) {
3396 if (ns_type_supported(NAMESPACE_UTS)) {
3397 if (unshare(CLONE_NEWUTS) < 0) {
3398 *exit_status = EXIT_NAMESPACE;
3399 return log_unit_error_errno(unit, errno, "Failed to set up UTS namespacing: %m");
3400 }
3401 } else
3402 log_unit_warning(unit, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
3403#if HAVE_SECCOMP
3404 r = seccomp_protect_hostname();
3405 if (r < 0) {
3406 *exit_status = EXIT_SECCOMP;
3407 return log_unit_error_errno(unit, r, "Failed to apply hostname restrictions: %m");
3408 }
3409#endif
3410 }
3411
bbeea271 3412 /* Drop groups as early as possbile */
165a31c0 3413 if (needs_setuid) {
709dbeac 3414 r = enforce_groups(gid, supplementary_gids, ngids);
096424d1
LP
3415 if (r < 0) {
3416 *exit_status = EXIT_GROUP;
12145637 3417 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3418 }
165a31c0 3419 }
096424d1 3420
165a31c0 3421 if (needs_sandboxing) {
349cc4a5 3422#if HAVE_SELINUX
43b1f709 3423 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
3424 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3425 if (r < 0) {
3426 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3427 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
937ccce9 3428 }
9008e1ac 3429 }
9008e1ac
MS
3430#endif
3431
937ccce9
LP
3432 if (context->private_users) {
3433 r = setup_private_users(uid, gid);
3434 if (r < 0) {
3435 *exit_status = EXIT_USER;
12145637 3436 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
937ccce9 3437 }
d251207d
LP
3438 }
3439 }
3440
165a31c0 3441 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
5686391b
LP
3442 * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
3443 * however if we have it as we want to keep it open until the final execve(). */
3444
3445 if (params->exec_fd >= 0) {
3446 exec_fd = params->exec_fd;
3447
3448 if (exec_fd < 3 + (int) n_fds) {
3449 int moved_fd;
3450
3451 /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
3452 * process we are about to execute. */
3453
3454 moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
3455 if (moved_fd < 0) {
3456 *exit_status = EXIT_FDS;
3457 return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
3458 }
3459
3460 safe_close(exec_fd);
3461 exec_fd = moved_fd;
3462 } else {
3463 /* This fd should be FD_CLOEXEC already, but let's make sure. */
3464 r = fd_cloexec(exec_fd, true);
3465 if (r < 0) {
3466 *exit_status = EXIT_FDS;
3467 return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
3468 }
3469 }
3470
3471 fds_with_exec_fd = newa(int, n_fds + 1);
7e8d494b 3472 memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
5686391b
LP
3473 fds_with_exec_fd[n_fds] = exec_fd;
3474 n_fds_with_exec_fd = n_fds + 1;
3475 } else {
3476 fds_with_exec_fd = fds;
3477 n_fds_with_exec_fd = n_fds;
3478 }
3479
3480 r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
ff0af2a1
LP
3481 if (r >= 0)
3482 r = shift_fds(fds, n_fds);
3483 if (r >= 0)
25b583d7 3484 r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
ff0af2a1
LP
3485 if (r < 0) {
3486 *exit_status = EXIT_FDS;
12145637 3487 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3488 }
e66cf1a3 3489
5686391b
LP
3490 /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
3491 * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
3492 * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
3493 * came this far. */
3494
165a31c0 3495 secure_bits = context->secure_bits;
e66cf1a3 3496
165a31c0
LP
3497 if (needs_sandboxing) {
3498 uint64_t bset;
e66cf1a3 3499
ce932d2d
LP
3500 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly
3501 * requested. (Note this is placed after the general resource limit initialization, see
3502 * above, in order to take precedence.) */
f4170c67
LP
3503 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3504 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3505 *exit_status = EXIT_LIMITS;
12145637 3506 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3507 }
3508 }
3509
37ac2744
JB
3510#if ENABLE_SMACK
3511 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3512 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3513 if (use_smack) {
3514 r = setup_smack(context, command);
3515 if (r < 0) {
3516 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3517 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3518 }
3519 }
3520#endif
3521
165a31c0
LP
3522 bset = context->capability_bounding_set;
3523 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3524 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3525 * instead of us doing that */
3526 if (needs_ambient_hack)
3527 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3528 (UINT64_C(1) << CAP_SETUID) |
3529 (UINT64_C(1) << CAP_SETGID);
3530
3531 if (!cap_test_all(bset)) {
3532 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3533 if (r < 0) {
3534 *exit_status = EXIT_CAPABILITIES;
12145637 3535 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3536 }
4c2630eb 3537 }
3b8bddde 3538
755d4b67
IP
3539 /* This is done before enforce_user, but ambient set
3540 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
3541 if (!needs_ambient_hack &&
3542 context->capability_ambient_set != 0) {
755d4b67
IP
3543 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3544 if (r < 0) {
3545 *exit_status = EXIT_CAPABILITIES;
12145637 3546 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3547 }
755d4b67 3548 }
165a31c0 3549 }
755d4b67 3550
fa97f630
JB
3551 /* chroot to root directory first, before we lose the ability to chroot */
3552 r = apply_root_directory(context, params, needs_mount_namespace, exit_status);
3553 if (r < 0)
3554 return log_unit_error_errno(unit, r, "Chrooting to the requested root directory failed: %m");
3555
165a31c0 3556 if (needs_setuid) {
08f67696 3557 if (uid_is_valid(uid)) {
ff0af2a1
LP
3558 r = enforce_user(context, uid);
3559 if (r < 0) {
3560 *exit_status = EXIT_USER;
12145637 3561 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3562 }
165a31c0
LP
3563
3564 if (!needs_ambient_hack &&
3565 context->capability_ambient_set != 0) {
755d4b67
IP
3566
3567 /* Fix the ambient capabilities after user change. */
3568 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3569 if (r < 0) {
3570 *exit_status = EXIT_CAPABILITIES;
12145637 3571 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3572 }
3573
3574 /* If we were asked to change user and ambient capabilities
3575 * were requested, we had to add keep-caps to the securebits
3576 * so that we would maintain the inherited capability set
3577 * through the setresuid(). Make sure that the bit is added
3578 * also to the context secure_bits so that we don't try to
3579 * drop the bit away next. */
3580
7f508f2c 3581 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3582 }
5b6319dc 3583 }
165a31c0 3584 }
d35fbf6b 3585
56ef8db9
JB
3586 /* Apply working directory here, because the working directory might be on NFS and only the user running
3587 * this service might have the correct privilege to change to the working directory */
fa97f630 3588 r = apply_working_directory(context, params, home, exit_status);
56ef8db9
JB
3589 if (r < 0)
3590 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
3591
165a31c0 3592 if (needs_sandboxing) {
37ac2744 3593 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
5cd9cd35
LP
3594 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3595 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3596 * are restricted. */
3597
349cc4a5 3598#if HAVE_SELINUX
43b1f709 3599 if (use_selinux) {
5cd9cd35
LP
3600 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3601
3602 if (exec_context) {
3603 r = setexeccon(exec_context);
3604 if (r < 0) {
3605 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3606 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3607 }
3608 }
3609 }
3610#endif
3611
349cc4a5 3612#if HAVE_APPARMOR
43b1f709 3613 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3614 r = aa_change_onexec(context->apparmor_profile);
3615 if (r < 0 && !context->apparmor_profile_ignore) {
3616 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3617 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3618 }
3619 }
3620#endif
3621
165a31c0
LP
3622 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3623 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3624 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3625 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3626 *exit_status = EXIT_SECUREBITS;
12145637 3627 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3628 }
5b6319dc 3629
59eeb84b 3630 if (context_has_no_new_privileges(context))
d35fbf6b 3631 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3632 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3633 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3634 }
3635
349cc4a5 3636#if HAVE_SECCOMP
469830d1
LP
3637 r = apply_address_families(unit, context);
3638 if (r < 0) {
3639 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3640 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3641 }
04aa0cb9 3642
469830d1
LP
3643 r = apply_memory_deny_write_execute(unit, context);
3644 if (r < 0) {
3645 *exit_status = EXIT_SECCOMP;
12145637 3646 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3647 }
f4170c67 3648
469830d1
LP
3649 r = apply_restrict_realtime(unit, context);
3650 if (r < 0) {
3651 *exit_status = EXIT_SECCOMP;
12145637 3652 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3653 }
3654
f69567cb
LP
3655 r = apply_restrict_suid_sgid(unit, context);
3656 if (r < 0) {
3657 *exit_status = EXIT_SECCOMP;
3658 return log_unit_error_errno(unit, r, "Failed to apply SUID/SGID restrictions: %m");
3659 }
3660
add00535
LP
3661 r = apply_restrict_namespaces(unit, context);
3662 if (r < 0) {
3663 *exit_status = EXIT_SECCOMP;
12145637 3664 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3665 }
3666
469830d1
LP
3667 r = apply_protect_sysctl(unit, context);
3668 if (r < 0) {
3669 *exit_status = EXIT_SECCOMP;
12145637 3670 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3671 }
3672
469830d1
LP
3673 r = apply_protect_kernel_modules(unit, context);
3674 if (r < 0) {
3675 *exit_status = EXIT_SECCOMP;
12145637 3676 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3677 }
3678
469830d1
LP
3679 r = apply_private_devices(unit, context);
3680 if (r < 0) {
3681 *exit_status = EXIT_SECCOMP;
12145637 3682 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3683 }
3684
3685 r = apply_syscall_archs(unit, context);
3686 if (r < 0) {
3687 *exit_status = EXIT_SECCOMP;
12145637 3688 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3689 }
3690
78e864e5
TM
3691 r = apply_lock_personality(unit, context);
3692 if (r < 0) {
3693 *exit_status = EXIT_SECCOMP;
12145637 3694 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3695 }
3696
5cd9cd35
LP
3697 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3698 * by the filter as little as possible. */
165a31c0 3699 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3700 if (r < 0) {
3701 *exit_status = EXIT_SECCOMP;
12145637 3702 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3703 }
3704#endif
d35fbf6b 3705 }
034c6ed7 3706
00819cc1
LP
3707 if (!strv_isempty(context->unset_environment)) {
3708 char **ee = NULL;
3709
3710 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3711 if (!ee) {
3712 *exit_status = EXIT_MEMORY;
12145637 3713 return log_oom();
00819cc1
LP
3714 }
3715
130d3d22 3716 strv_free_and_replace(accum_env, ee);
00819cc1
LP
3717 }
3718
7ca69792
AZ
3719 if (!FLAGS_SET(command->flags, EXEC_COMMAND_NO_ENV_EXPAND)) {
3720 replaced_argv = replace_env_argv(command->argv, accum_env);
3721 if (!replaced_argv) {
3722 *exit_status = EXIT_MEMORY;
3723 return log_oom();
3724 }
3725 final_argv = replaced_argv;
3726 } else
3727 final_argv = command->argv;
034c6ed7 3728
f1d34068 3729 if (DEBUG_LOGGING) {
d35fbf6b 3730 _cleanup_free_ char *line;
81a2b7ce 3731
d35fbf6b 3732 line = exec_command_line(final_argv);
a1230ff9 3733 if (line)
f2341e0a 3734 log_struct(LOG_DEBUG,
f2341e0a
LP
3735 "EXECUTABLE=%s", command->path,
3736 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3737 LOG_UNIT_ID(unit),
a1230ff9 3738 LOG_UNIT_INVOCATION_ID(unit));
d35fbf6b 3739 }
dd305ec9 3740
5686391b
LP
3741 if (exec_fd >= 0) {
3742 uint8_t hot = 1;
3743
3744 /* We have finished with all our initializations. Let's now let the manager know that. From this point
3745 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
3746
3747 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3748 *exit_status = EXIT_EXEC;
3749 return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
3750 }
3751 }
3752
2065ca69 3753 execve(command->path, final_argv, accum_env);
5686391b
LP
3754 r = -errno;
3755
3756 if (exec_fd >= 0) {
3757 uint8_t hot = 0;
3758
3759 /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
3760 * that POLLHUP on it no longer means execve() succeeded. */
3761
3762 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3763 *exit_status = EXIT_EXEC;
3764 return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
3765 }
3766 }
12145637 3767
5686391b
LP
3768 if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3769 log_struct_errno(LOG_INFO, r,
12145637
LP
3770 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3771 LOG_UNIT_ID(unit),
3772 LOG_UNIT_INVOCATION_ID(unit),
3773 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3774 command->path),
a1230ff9 3775 "EXECUTABLE=%s", command->path);
12145637
LP
3776 return 0;
3777 }
3778
ff0af2a1 3779 *exit_status = EXIT_EXEC;
5686391b 3780 return log_unit_error_errno(unit, r, "Failed to execute command: %m");
d35fbf6b 3781}
81a2b7ce 3782
34cf6c43 3783static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
2caa38e9 3784static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[static 3]);
34cf6c43 3785
f2341e0a
LP
3786int exec_spawn(Unit *unit,
3787 ExecCommand *command,
d35fbf6b
DM
3788 const ExecContext *context,
3789 const ExecParameters *params,
3790 ExecRuntime *runtime,
29206d46 3791 DynamicCreds *dcreds,
d35fbf6b 3792 pid_t *ret) {
8351ceae 3793
ee39ca20 3794 int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
78f93209 3795 _cleanup_free_ char *subcgroup_path = NULL;
d35fbf6b 3796 _cleanup_strv_free_ char **files_env = NULL;
da6053d0 3797 size_t n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1 3798 _cleanup_free_ char *line = NULL;
d35fbf6b 3799 pid_t pid;
8351ceae 3800
f2341e0a 3801 assert(unit);
d35fbf6b
DM
3802 assert(command);
3803 assert(context);
3804 assert(ret);
3805 assert(params);
25b583d7 3806 assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
4298d0b5 3807
d35fbf6b
DM
3808 if (context->std_input == EXEC_INPUT_SOCKET ||
3809 context->std_output == EXEC_OUTPUT_SOCKET ||
3810 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3811
4c47affc 3812 if (params->n_socket_fds > 1) {
f2341e0a 3813 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3814 return -EINVAL;
ff0af2a1 3815 }
eef65bf3 3816
4c47affc 3817 if (params->n_socket_fds == 0) {
488ab41c
AA
3818 log_unit_error(unit, "Got no socket.");
3819 return -EINVAL;
3820 }
3821
d35fbf6b
DM
3822 socket_fd = params->fds[0];
3823 } else {
3824 socket_fd = -1;
3825 fds = params->fds;
9b141911 3826 n_socket_fds = params->n_socket_fds;
25b583d7 3827 n_storage_fds = params->n_storage_fds;
d35fbf6b 3828 }
94f04347 3829
34cf6c43 3830 r = exec_context_named_iofds(context, params, named_iofds);
52c239d7
LB
3831 if (r < 0)
3832 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3833
f2341e0a 3834 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3835 if (r < 0)
f2341e0a 3836 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3837
ee39ca20 3838 line = exec_command_line(command->argv);
d35fbf6b
DM
3839 if (!line)
3840 return log_oom();
fab56fc5 3841
f2341e0a 3842 log_struct(LOG_DEBUG,
f2341e0a
LP
3843 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3844 "EXECUTABLE=%s", command->path,
ba360bb0 3845 LOG_UNIT_ID(unit),
a1230ff9 3846 LOG_UNIT_INVOCATION_ID(unit));
12145637 3847
78f93209
LP
3848 if (params->cgroup_path) {
3849 r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
3850 if (r < 0)
3851 return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
3852 if (r > 0) { /* We are using a child cgroup */
3853 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
3854 if (r < 0)
3855 return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
3856 }
3857 }
3858
d35fbf6b
DM
3859 pid = fork();
3860 if (pid < 0)
74129a12 3861 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3862
3863 if (pid == 0) {
12145637 3864 int exit_status = EXIT_SUCCESS;
ff0af2a1 3865
f2341e0a
LP
3866 r = exec_child(unit,
3867 command,
ff0af2a1
LP
3868 context,
3869 params,
3870 runtime,
29206d46 3871 dcreds,
ff0af2a1 3872 socket_fd,
52c239d7 3873 named_iofds,
4c47affc 3874 fds,
9b141911 3875 n_socket_fds,
25b583d7 3876 n_storage_fds,
ff0af2a1 3877 files_env,
00d9ef85 3878 unit->manager->user_lookup_fds[1],
12145637
LP
3879 &exit_status);
3880
e1714f02
ZJS
3881 if (r < 0) {
3882 const char *status =
3883 exit_status_to_string(exit_status,
3884 EXIT_STATUS_GLIBC | EXIT_STATUS_SYSTEMD);
3885
12145637
LP
3886 log_struct_errno(LOG_ERR, r,
3887 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3888 LOG_UNIT_ID(unit),
3889 LOG_UNIT_INVOCATION_ID(unit),
3890 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
e1714f02 3891 status, command->path),
a1230ff9 3892 "EXECUTABLE=%s", command->path);
e1714f02 3893 }
4c2630eb 3894
ff0af2a1 3895 _exit(exit_status);
034c6ed7
LP
3896 }
3897
f2341e0a 3898 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3899
78f93209
LP
3900 /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
3901 * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
3902 * process will be killed too). */
3903 if (subcgroup_path)
3904 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
2da3263a 3905
b58b4116 3906 exec_status_start(&command->exec_status, pid);
9fb86720 3907
034c6ed7 3908 *ret = pid;
5cb5a6ff
LP
3909 return 0;
3910}
3911
034c6ed7 3912void exec_context_init(ExecContext *c) {
3536f49e
YW
3913 ExecDirectoryType i;
3914
034c6ed7
LP
3915 assert(c);
3916
4c12626c 3917 c->umask = 0022;
9eba9da4 3918 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3919 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3920 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3921 c->syslog_level_prefix = true;
353e12c2 3922 c->ignore_sigpipe = true;
3a43da28 3923 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3924 c->personality = PERSONALITY_INVALID;
72fd1768 3925 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3926 c->directories[i].mode = 0755;
a103496c 3927 c->capability_bounding_set = CAP_ALL;
aa9d574d
YW
3928 assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
3929 c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
d3070fbd 3930 c->log_level_max = -1;
b070c7c0 3931 numa_policy_reset(&c->numa_policy);
034c6ed7
LP
3932}
3933
613b411c 3934void exec_context_done(ExecContext *c) {
3536f49e 3935 ExecDirectoryType i;
d3070fbd 3936 size_t l;
5cb5a6ff
LP
3937
3938 assert(c);
3939
6796073e
LP
3940 c->environment = strv_free(c->environment);
3941 c->environment_files = strv_free(c->environment_files);
b4c14404 3942 c->pass_environment = strv_free(c->pass_environment);
00819cc1 3943 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 3944
31ce987c 3945 rlimit_free_all(c->rlimit);
034c6ed7 3946
2038c3f5 3947 for (l = 0; l < 3; l++) {
52c239d7 3948 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
2038c3f5
LP
3949 c->stdio_file[l] = mfree(c->stdio_file[l]);
3950 }
52c239d7 3951
a1e58e8e
LP
3952 c->working_directory = mfree(c->working_directory);
3953 c->root_directory = mfree(c->root_directory);
915e6d16 3954 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3955 c->tty_path = mfree(c->tty_path);
3956 c->syslog_identifier = mfree(c->syslog_identifier);
3957 c->user = mfree(c->user);
3958 c->group = mfree(c->group);
034c6ed7 3959
6796073e 3960 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3961
a1e58e8e 3962 c->pam_name = mfree(c->pam_name);
5b6319dc 3963
2a624c36
AP
3964 c->read_only_paths = strv_free(c->read_only_paths);
3965 c->read_write_paths = strv_free(c->read_write_paths);
3966 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3967
d2d6c096 3968 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
8e06d57c
YW
3969 c->bind_mounts = NULL;
3970 c->n_bind_mounts = 0;
2abd4e38
YW
3971 temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
3972 c->temporary_filesystems = NULL;
3973 c->n_temporary_filesystems = 0;
d2d6c096 3974
0985c7c4 3975 cpu_set_reset(&c->cpu_set);
b070c7c0 3976 numa_policy_reset(&c->numa_policy);
86a3475b 3977
a1e58e8e
LP
3978 c->utmp_id = mfree(c->utmp_id);
3979 c->selinux_context = mfree(c->selinux_context);
3980 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3981 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3982
8cfa775f 3983 c->syscall_filter = hashmap_free(c->syscall_filter);
525d3cc7
LP
3984 c->syscall_archs = set_free(c->syscall_archs);
3985 c->address_families = set_free(c->address_families);
e66cf1a3 3986
72fd1768 3987 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3988 c->directories[i].paths = strv_free(c->directories[i].paths);
d3070fbd
LP
3989
3990 c->log_level_max = -1;
3991
3992 exec_context_free_log_extra_fields(c);
08f3be7a 3993
90fc172e
AZ
3994 c->log_rate_limit_interval_usec = 0;
3995 c->log_rate_limit_burst = 0;
3996
08f3be7a
LP
3997 c->stdin_data = mfree(c->stdin_data);
3998 c->stdin_data_size = 0;
a8d08f39
LP
3999
4000 c->network_namespace_path = mfree(c->network_namespace_path);
e66cf1a3
LP
4001}
4002
34cf6c43 4003int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
e66cf1a3
LP
4004 char **i;
4005
4006 assert(c);
4007
4008 if (!runtime_prefix)
4009 return 0;
4010
3536f49e 4011 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
4012 _cleanup_free_ char *p;
4013
7bc4bf4a 4014 p = path_join(runtime_prefix, *i);
e66cf1a3
LP
4015 if (!p)
4016 return -ENOMEM;
4017
7bc4bf4a
LP
4018 /* We execute this synchronously, since we need to be sure this is gone when we start the
4019 * service next. */
c6878637 4020 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
4021 }
4022
4023 return 0;
5cb5a6ff
LP
4024}
4025
34cf6c43 4026static void exec_command_done(ExecCommand *c) {
43d0fcbd
LP
4027 assert(c);
4028
a1e58e8e 4029 c->path = mfree(c->path);
6796073e 4030 c->argv = strv_free(c->argv);
43d0fcbd
LP
4031}
4032
da6053d0
LP
4033void exec_command_done_array(ExecCommand *c, size_t n) {
4034 size_t i;
43d0fcbd
LP
4035
4036 for (i = 0; i < n; i++)
4037 exec_command_done(c+i);
4038}
4039
f1acf85a 4040ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
4041 ExecCommand *i;
4042
4043 while ((i = c)) {
71fda00f 4044 LIST_REMOVE(command, c, i);
43d0fcbd 4045 exec_command_done(i);
5cb5a6ff
LP
4046 free(i);
4047 }
f1acf85a
ZJS
4048
4049 return NULL;
5cb5a6ff
LP
4050}
4051
da6053d0
LP
4052void exec_command_free_array(ExecCommand **c, size_t n) {
4053 size_t i;
034c6ed7 4054
f1acf85a
ZJS
4055 for (i = 0; i < n; i++)
4056 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
4057}
4058
6a1d4d9f
LP
4059void exec_command_reset_status_array(ExecCommand *c, size_t n) {
4060 size_t i;
4061
4062 for (i = 0; i < n; i++)
4063 exec_status_reset(&c[i].exec_status);
4064}
4065
4066void exec_command_reset_status_list_array(ExecCommand **c, size_t n) {
4067 size_t i;
4068
4069 for (i = 0; i < n; i++) {
4070 ExecCommand *z;
4071
4072 LIST_FOREACH(command, z, c[i])
4073 exec_status_reset(&z->exec_status);
4074 }
4075}
4076
039f0e70 4077typedef struct InvalidEnvInfo {
34cf6c43 4078 const Unit *unit;
039f0e70
LP
4079 const char *path;
4080} InvalidEnvInfo;
4081
4082static void invalid_env(const char *p, void *userdata) {
4083 InvalidEnvInfo *info = userdata;
4084
f2341e0a 4085 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
4086}
4087
52c239d7
LB
4088const char* exec_context_fdname(const ExecContext *c, int fd_index) {
4089 assert(c);
4090
4091 switch (fd_index) {
5073ff6b 4092
52c239d7
LB
4093 case STDIN_FILENO:
4094 if (c->std_input != EXEC_INPUT_NAMED_FD)
4095 return NULL;
5073ff6b 4096
52c239d7 4097 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
5073ff6b 4098
52c239d7
LB
4099 case STDOUT_FILENO:
4100 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
4101 return NULL;
5073ff6b 4102
52c239d7 4103 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
5073ff6b 4104
52c239d7
LB
4105 case STDERR_FILENO:
4106 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
4107 return NULL;
5073ff6b 4108
52c239d7 4109 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
5073ff6b 4110
52c239d7
LB
4111 default:
4112 return NULL;
4113 }
4114}
4115
2caa38e9
LP
4116static int exec_context_named_iofds(
4117 const ExecContext *c,
4118 const ExecParameters *p,
4119 int named_iofds[static 3]) {
4120
da6053d0 4121 size_t i, targets;
56fbd561 4122 const char* stdio_fdname[3];
da6053d0 4123 size_t n_fds;
52c239d7
LB
4124
4125 assert(c);
4126 assert(p);
2caa38e9 4127 assert(named_iofds);
52c239d7
LB
4128
4129 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
4130 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
4131 (c->std_error == EXEC_OUTPUT_NAMED_FD);
4132
4133 for (i = 0; i < 3; i++)
4134 stdio_fdname[i] = exec_context_fdname(c, i);
4135
4c47affc
FB
4136 n_fds = p->n_storage_fds + p->n_socket_fds;
4137
4138 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
4139 if (named_iofds[STDIN_FILENO] < 0 &&
4140 c->std_input == EXEC_INPUT_NAMED_FD &&
4141 stdio_fdname[STDIN_FILENO] &&
4142 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
4143
52c239d7
LB
4144 named_iofds[STDIN_FILENO] = p->fds[i];
4145 targets--;
56fbd561
ZJS
4146
4147 } else if (named_iofds[STDOUT_FILENO] < 0 &&
4148 c->std_output == EXEC_OUTPUT_NAMED_FD &&
4149 stdio_fdname[STDOUT_FILENO] &&
4150 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
4151
52c239d7
LB
4152 named_iofds[STDOUT_FILENO] = p->fds[i];
4153 targets--;
56fbd561
ZJS
4154
4155 } else if (named_iofds[STDERR_FILENO] < 0 &&
4156 c->std_error == EXEC_OUTPUT_NAMED_FD &&
4157 stdio_fdname[STDERR_FILENO] &&
4158 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
4159
52c239d7
LB
4160 named_iofds[STDERR_FILENO] = p->fds[i];
4161 targets--;
4162 }
4163
56fbd561 4164 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
4165}
4166
34cf6c43 4167static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
4168 char **i, **r = NULL;
4169
4170 assert(c);
4171 assert(l);
4172
4173 STRV_FOREACH(i, c->environment_files) {
4174 char *fn;
52511fae
ZJS
4175 int k;
4176 unsigned n;
8c7be95e
LP
4177 bool ignore = false;
4178 char **p;
7fd1b19b 4179 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
4180
4181 fn = *i;
4182
4183 if (fn[0] == '-') {
4184 ignore = true;
313cefa1 4185 fn++;
8c7be95e
LP
4186 }
4187
4188 if (!path_is_absolute(fn)) {
8c7be95e
LP
4189 if (ignore)
4190 continue;
4191
4192 strv_free(r);
4193 return -EINVAL;
4194 }
4195
2bef10ab 4196 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
4197 k = safe_glob(fn, 0, &pglob);
4198 if (k < 0) {
2bef10ab
PL
4199 if (ignore)
4200 continue;
8c7be95e 4201
2bef10ab 4202 strv_free(r);
d8c92e8b 4203 return k;
2bef10ab 4204 }
8c7be95e 4205
d8c92e8b
ZJS
4206 /* When we don't match anything, -ENOENT should be returned */
4207 assert(pglob.gl_pathc > 0);
4208
4209 for (n = 0; n < pglob.gl_pathc; n++) {
aa8fbc74 4210 k = load_env_file(NULL, pglob.gl_pathv[n], &p);
2bef10ab
PL
4211 if (k < 0) {
4212 if (ignore)
4213 continue;
8c7be95e 4214
2bef10ab 4215 strv_free(r);
2bef10ab 4216 return k;
e9c1ea9d 4217 }
ebc05a09 4218 /* Log invalid environment variables with filename */
039f0e70
LP
4219 if (p) {
4220 InvalidEnvInfo info = {
f2341e0a 4221 .unit = unit,
039f0e70
LP
4222 .path = pglob.gl_pathv[n]
4223 };
4224
4225 p = strv_env_clean_with_callback(p, invalid_env, &info);
4226 }
8c7be95e 4227
234519ae 4228 if (!r)
2bef10ab
PL
4229 r = p;
4230 else {
4231 char **m;
8c7be95e 4232
2bef10ab
PL
4233 m = strv_env_merge(2, r, p);
4234 strv_free(r);
4235 strv_free(p);
c84a9488 4236 if (!m)
2bef10ab 4237 return -ENOMEM;
2bef10ab
PL
4238
4239 r = m;
4240 }
8c7be95e
LP
4241 }
4242 }
4243
4244 *l = r;
4245
4246 return 0;
4247}
4248
6ac8fdc9 4249static bool tty_may_match_dev_console(const char *tty) {
7b912648 4250 _cleanup_free_ char *resolved = NULL;
6ac8fdc9 4251
1e22b5cd
LP
4252 if (!tty)
4253 return true;
4254
a119ec7c 4255 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
4256
4257 /* trivial identity? */
4258 if (streq(tty, "console"))
4259 return true;
4260
7b912648
LP
4261 if (resolve_dev_console(&resolved) < 0)
4262 return true; /* if we could not resolve, assume it may */
6ac8fdc9
MS
4263
4264 /* "tty0" means the active VC, so it may be the same sometimes */
955f1c85 4265 return path_equal(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
4266}
4267
6c0ae739
LP
4268static bool exec_context_may_touch_tty(const ExecContext *ec) {
4269 assert(ec);
1e22b5cd 4270
6c0ae739 4271 return ec->tty_reset ||
1e22b5cd
LP
4272 ec->tty_vhangup ||
4273 ec->tty_vt_disallocate ||
6ac8fdc9
MS
4274 is_terminal_input(ec->std_input) ||
4275 is_terminal_output(ec->std_output) ||
6c0ae739
LP
4276 is_terminal_output(ec->std_error);
4277}
4278
4279bool exec_context_may_touch_console(const ExecContext *ec) {
4280
4281 return exec_context_may_touch_tty(ec) &&
1e22b5cd 4282 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
4283}
4284
15ae422b
LP
4285static void strv_fprintf(FILE *f, char **l) {
4286 char **g;
4287
4288 assert(f);
4289
4290 STRV_FOREACH(g, l)
4291 fprintf(f, " %s", *g);
4292}
4293
34cf6c43 4294void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
d3070fbd 4295 ExecDirectoryType dt;
c2bbd90b 4296 char **e, **d;
94f04347 4297 unsigned i;
add00535 4298 int r;
9eba9da4 4299
5cb5a6ff
LP
4300 assert(c);
4301 assert(f);
4302
4ad49000 4303 prefix = strempty(prefix);
5cb5a6ff
LP
4304
4305 fprintf(f,
94f04347
LP
4306 "%sUMask: %04o\n"
4307 "%sWorkingDirectory: %s\n"
451a074f 4308 "%sRootDirectory: %s\n"
15ae422b 4309 "%sNonBlocking: %s\n"
64747e2d 4310 "%sPrivateTmp: %s\n"
7f112f50 4311 "%sPrivateDevices: %s\n"
59eeb84b 4312 "%sProtectKernelTunables: %s\n"
e66a2f65 4313 "%sProtectKernelModules: %s\n"
59eeb84b 4314 "%sProtectControlGroups: %s\n"
d251207d
LP
4315 "%sPrivateNetwork: %s\n"
4316 "%sPrivateUsers: %s\n"
1b8689f9
LP
4317 "%sProtectHome: %s\n"
4318 "%sProtectSystem: %s\n"
5d997827 4319 "%sMountAPIVFS: %s\n"
f3e43635 4320 "%sIgnoreSIGPIPE: %s\n"
f4170c67 4321 "%sMemoryDenyWriteExecute: %s\n"
b1edf445 4322 "%sRestrictRealtime: %s\n"
f69567cb 4323 "%sRestrictSUIDSGID: %s\n"
aecd5ac6
TM
4324 "%sKeyringMode: %s\n"
4325 "%sProtectHostname: %s\n",
5cb5a6ff 4326 prefix, c->umask,
9eba9da4 4327 prefix, c->working_directory ? c->working_directory : "/",
451a074f 4328 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 4329 prefix, yes_no(c->non_blocking),
64747e2d 4330 prefix, yes_no(c->private_tmp),
7f112f50 4331 prefix, yes_no(c->private_devices),
59eeb84b 4332 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 4333 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 4334 prefix, yes_no(c->protect_control_groups),
d251207d
LP
4335 prefix, yes_no(c->private_network),
4336 prefix, yes_no(c->private_users),
1b8689f9
LP
4337 prefix, protect_home_to_string(c->protect_home),
4338 prefix, protect_system_to_string(c->protect_system),
5d997827 4339 prefix, yes_no(c->mount_apivfs),
f3e43635 4340 prefix, yes_no(c->ignore_sigpipe),
f4170c67 4341 prefix, yes_no(c->memory_deny_write_execute),
b1edf445 4342 prefix, yes_no(c->restrict_realtime),
f69567cb 4343 prefix, yes_no(c->restrict_suid_sgid),
aecd5ac6
TM
4344 prefix, exec_keyring_mode_to_string(c->keyring_mode),
4345 prefix, yes_no(c->protect_hostname));
fb33a393 4346
915e6d16
LP
4347 if (c->root_image)
4348 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
4349
8c7be95e
LP
4350 STRV_FOREACH(e, c->environment)
4351 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
4352
4353 STRV_FOREACH(e, c->environment_files)
4354 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 4355
b4c14404
FB
4356 STRV_FOREACH(e, c->pass_environment)
4357 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
4358
00819cc1
LP
4359 STRV_FOREACH(e, c->unset_environment)
4360 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
4361
53f47dfc
YW
4362 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
4363
72fd1768 4364 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
4365 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
4366
4367 STRV_FOREACH(d, c->directories[dt].paths)
4368 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
4369 }
c2bbd90b 4370
fb33a393
LP
4371 if (c->nice_set)
4372 fprintf(f,
4373 "%sNice: %i\n",
4374 prefix, c->nice);
4375
dd6c17b1 4376 if (c->oom_score_adjust_set)
fb33a393 4377 fprintf(f,
dd6c17b1
LP
4378 "%sOOMScoreAdjust: %i\n",
4379 prefix, c->oom_score_adjust);
9eba9da4 4380
94f04347 4381 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d 4382 if (c->rlimit[i]) {
4c3a2b84 4383 fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
3c11da9d 4384 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
4c3a2b84 4385 fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n",
3c11da9d
EV
4386 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
4387 }
94f04347 4388
f8b69d1d 4389 if (c->ioprio_set) {
1756a011 4390 _cleanup_free_ char *class_str = NULL;
f8b69d1d 4391
837df140
YW
4392 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
4393 if (r >= 0)
4394 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
4395
4396 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 4397 }
94f04347 4398
f8b69d1d 4399 if (c->cpu_sched_set) {
1756a011 4400 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 4401
837df140
YW
4402 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4403 if (r >= 0)
4404 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4405
94f04347 4406 fprintf(f,
38b48754
LP
4407 "%sCPUSchedulingPriority: %i\n"
4408 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
4409 prefix, c->cpu_sched_priority,
4410 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 4411 }
94f04347 4412
0985c7c4 4413 if (c->cpu_set.set) {
e7fca352
MS
4414 _cleanup_free_ char *affinity = NULL;
4415
4416 affinity = cpu_set_to_range_string(&c->cpu_set);
4417 fprintf(f, "%sCPUAffinity: %s\n", prefix, affinity);
94f04347
LP
4418 }
4419
b070c7c0
MS
4420 if (mpol_is_valid(numa_policy_get_type(&c->numa_policy))) {
4421 _cleanup_free_ char *nodes = NULL;
4422
4423 nodes = cpu_set_to_range_string(&c->numa_policy.nodes);
4424 fprintf(f, "%sNUMAPolicy: %s\n", prefix, mpol_to_string(numa_policy_get_type(&c->numa_policy)));
4425 fprintf(f, "%sNUMAMask: %s\n", prefix, strnull(nodes));
4426 }
4427
3a43da28 4428 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 4429 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
4430
4431 fprintf(f,
80876c20
LP
4432 "%sStandardInput: %s\n"
4433 "%sStandardOutput: %s\n"
4434 "%sStandardError: %s\n",
4435 prefix, exec_input_to_string(c->std_input),
4436 prefix, exec_output_to_string(c->std_output),
4437 prefix, exec_output_to_string(c->std_error));
4438
befc4a80
LP
4439 if (c->std_input == EXEC_INPUT_NAMED_FD)
4440 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4441 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4442 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4443 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4444 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4445
4446 if (c->std_input == EXEC_INPUT_FILE)
4447 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4448 if (c->std_output == EXEC_OUTPUT_FILE)
4449 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
566b7d23
ZD
4450 if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
4451 fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
befc4a80
LP
4452 if (c->std_error == EXEC_OUTPUT_FILE)
4453 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
566b7d23
ZD
4454 if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
4455 fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
befc4a80 4456
80876c20
LP
4457 if (c->tty_path)
4458 fprintf(f,
6ea832a2
LP
4459 "%sTTYPath: %s\n"
4460 "%sTTYReset: %s\n"
4461 "%sTTYVHangup: %s\n"
4462 "%sTTYVTDisallocate: %s\n",
4463 prefix, c->tty_path,
4464 prefix, yes_no(c->tty_reset),
4465 prefix, yes_no(c->tty_vhangup),
4466 prefix, yes_no(c->tty_vt_disallocate));
94f04347 4467
9f6444eb
LP
4468 if (IN_SET(c->std_output,
4469 EXEC_OUTPUT_SYSLOG,
4470 EXEC_OUTPUT_KMSG,
4471 EXEC_OUTPUT_JOURNAL,
4472 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4473 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4474 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4475 IN_SET(c->std_error,
4476 EXEC_OUTPUT_SYSLOG,
4477 EXEC_OUTPUT_KMSG,
4478 EXEC_OUTPUT_JOURNAL,
4479 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4480 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4481 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 4482
5ce70e5b 4483 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 4484
837df140
YW
4485 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4486 if (r >= 0)
4487 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 4488
837df140
YW
4489 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4490 if (r >= 0)
4491 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 4492 }
94f04347 4493
d3070fbd
LP
4494 if (c->log_level_max >= 0) {
4495 _cleanup_free_ char *t = NULL;
4496
4497 (void) log_level_to_string_alloc(c->log_level_max, &t);
4498
4499 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4500 }
4501
90fc172e
AZ
4502 if (c->log_rate_limit_interval_usec > 0) {
4503 char buf_timespan[FORMAT_TIMESPAN_MAX];
4504
4505 fprintf(f,
4506 "%sLogRateLimitIntervalSec: %s\n",
4507 prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_rate_limit_interval_usec, USEC_PER_SEC));
4508 }
4509
4510 if (c->log_rate_limit_burst > 0)
4511 fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_rate_limit_burst);
4512
d3070fbd
LP
4513 if (c->n_log_extra_fields > 0) {
4514 size_t j;
4515
4516 for (j = 0; j < c->n_log_extra_fields; j++) {
4517 fprintf(f, "%sLogExtraFields: ", prefix);
4518 fwrite(c->log_extra_fields[j].iov_base,
4519 1, c->log_extra_fields[j].iov_len,
4520 f);
4521 fputc('\n', f);
4522 }
4523 }
4524
07d46372
YW
4525 if (c->secure_bits) {
4526 _cleanup_free_ char *str = NULL;
4527
4528 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4529 if (r >= 0)
4530 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4531 }
94f04347 4532
a103496c 4533 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4534 _cleanup_free_ char *str = NULL;
94f04347 4535
dd1f5bd0
YW
4536 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4537 if (r >= 0)
4538 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4539 }
4540
4541 if (c->capability_ambient_set != 0) {
dd1f5bd0 4542 _cleanup_free_ char *str = NULL;
755d4b67 4543
dd1f5bd0
YW
4544 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4545 if (r >= 0)
4546 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4547 }
4548
4549 if (c->user)
f2d3769a 4550 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4551 if (c->group)
f2d3769a 4552 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4553
29206d46
LP
4554 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4555
ac6e8be6 4556 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4557 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4558 strv_fprintf(f, c->supplementary_groups);
4559 fputs("\n", f);
4560 }
94f04347 4561
5b6319dc 4562 if (c->pam_name)
f2d3769a 4563 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4564
58629001 4565 if (!strv_isempty(c->read_write_paths)) {
2a624c36
AP
4566 fprintf(f, "%sReadWritePaths:", prefix);
4567 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4568 fputs("\n", f);
4569 }
4570
58629001 4571 if (!strv_isempty(c->read_only_paths)) {
2a624c36
AP
4572 fprintf(f, "%sReadOnlyPaths:", prefix);
4573 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4574 fputs("\n", f);
4575 }
94f04347 4576
58629001 4577 if (!strv_isempty(c->inaccessible_paths)) {
2a624c36
AP
4578 fprintf(f, "%sInaccessiblePaths:", prefix);
4579 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4580 fputs("\n", f);
4581 }
2e22afe9 4582
d2d6c096 4583 if (c->n_bind_mounts > 0)
4ca763a9
YW
4584 for (i = 0; i < c->n_bind_mounts; i++)
4585 fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
d2d6c096 4586 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4ca763a9 4587 c->bind_mounts[i].ignore_enoent ? "-": "",
d2d6c096
LP
4588 c->bind_mounts[i].source,
4589 c->bind_mounts[i].destination,
4590 c->bind_mounts[i].recursive ? "rbind" : "norbind");
d2d6c096 4591
2abd4e38
YW
4592 if (c->n_temporary_filesystems > 0)
4593 for (i = 0; i < c->n_temporary_filesystems; i++) {
4594 TemporaryFileSystem *t = c->temporary_filesystems + i;
4595
4596 fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
4597 t->path,
4598 isempty(t->options) ? "" : ":",
4599 strempty(t->options));
4600 }
4601
169c1bda
LP
4602 if (c->utmp_id)
4603 fprintf(f,
4604 "%sUtmpIdentifier: %s\n",
4605 prefix, c->utmp_id);
7b52a628
MS
4606
4607 if (c->selinux_context)
4608 fprintf(f,
5f8640fb
LP
4609 "%sSELinuxContext: %s%s\n",
4610 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4611
80c21aea
WC
4612 if (c->apparmor_profile)
4613 fprintf(f,
4614 "%sAppArmorProfile: %s%s\n",
4615 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4616
4617 if (c->smack_process_label)
4618 fprintf(f,
4619 "%sSmackProcessLabel: %s%s\n",
4620 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4621
050f7277 4622 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4623 fprintf(f,
4624 "%sPersonality: %s\n",
4625 prefix, strna(personality_to_string(c->personality)));
4626
78e864e5
TM
4627 fprintf(f,
4628 "%sLockPersonality: %s\n",
4629 prefix, yes_no(c->lock_personality));
4630
17df7223 4631 if (c->syscall_filter) {
349cc4a5 4632#if HAVE_SECCOMP
17df7223 4633 Iterator j;
8cfa775f 4634 void *id, *val;
17df7223 4635 bool first = true;
351a19b1 4636#endif
17df7223
LP
4637
4638 fprintf(f,
57183d11 4639 "%sSystemCallFilter: ",
17df7223
LP
4640 prefix);
4641
4642 if (!c->syscall_whitelist)
4643 fputc('~', f);
4644
349cc4a5 4645#if HAVE_SECCOMP
8cfa775f 4646 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
17df7223 4647 _cleanup_free_ char *name = NULL;
8cfa775f
YW
4648 const char *errno_name = NULL;
4649 int num = PTR_TO_INT(val);
17df7223
LP
4650
4651 if (first)
4652 first = false;
4653 else
4654 fputc(' ', f);
4655
57183d11 4656 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223 4657 fputs(strna(name), f);
8cfa775f
YW
4658
4659 if (num >= 0) {
4660 errno_name = errno_to_name(num);
4661 if (errno_name)
4662 fprintf(f, ":%s", errno_name);
4663 else
4664 fprintf(f, ":%d", num);
4665 }
17df7223 4666 }
351a19b1 4667#endif
17df7223
LP
4668
4669 fputc('\n', f);
4670 }
4671
57183d11 4672 if (c->syscall_archs) {
349cc4a5 4673#if HAVE_SECCOMP
57183d11
LP
4674 Iterator j;
4675 void *id;
4676#endif
4677
4678 fprintf(f,
4679 "%sSystemCallArchitectures:",
4680 prefix);
4681
349cc4a5 4682#if HAVE_SECCOMP
57183d11
LP
4683 SET_FOREACH(id, c->syscall_archs, j)
4684 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4685#endif
4686 fputc('\n', f);
4687 }
4688
add00535
LP
4689 if (exec_context_restrict_namespaces_set(c)) {
4690 _cleanup_free_ char *s = NULL;
4691
86c2a9f1 4692 r = namespace_flags_to_string(c->restrict_namespaces, &s);
add00535
LP
4693 if (r >= 0)
4694 fprintf(f, "%sRestrictNamespaces: %s\n",
4695 prefix, s);
4696 }
4697
a8d08f39
LP
4698 if (c->network_namespace_path)
4699 fprintf(f,
4700 "%sNetworkNamespacePath: %s\n",
4701 prefix, c->network_namespace_path);
4702
3df90f24
YW
4703 if (c->syscall_errno > 0) {
4704 const char *errno_name;
4705
4706 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4707
4708 errno_name = errno_to_name(c->syscall_errno);
4709 if (errno_name)
4710 fprintf(f, "%s\n", errno_name);
4711 else
4712 fprintf(f, "%d\n", c->syscall_errno);
4713 }
5cb5a6ff
LP
4714}
4715
34cf6c43 4716bool exec_context_maintains_privileges(const ExecContext *c) {
a931ad47
LP
4717 assert(c);
4718
61233823 4719 /* Returns true if the process forked off would run under
a931ad47
LP
4720 * an unchanged UID or as root. */
4721
4722 if (!c->user)
4723 return true;
4724
4725 if (streq(c->user, "root") || streq(c->user, "0"))
4726 return true;
4727
4728 return false;
4729}
4730
34cf6c43 4731int exec_context_get_effective_ioprio(const ExecContext *c) {
7f452159
LP
4732 int p;
4733
4734 assert(c);
4735
4736 if (c->ioprio_set)
4737 return c->ioprio;
4738
4739 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4740 if (p < 0)
4741 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4742
4743 return p;
4744}
4745
d3070fbd
LP
4746void exec_context_free_log_extra_fields(ExecContext *c) {
4747 size_t l;
4748
4749 assert(c);
4750
4751 for (l = 0; l < c->n_log_extra_fields; l++)
4752 free(c->log_extra_fields[l].iov_base);
4753 c->log_extra_fields = mfree(c->log_extra_fields);
4754 c->n_log_extra_fields = 0;
4755}
4756
6f765baf
LP
4757void exec_context_revert_tty(ExecContext *c) {
4758 int r;
4759
4760 assert(c);
4761
4762 /* First, reset the TTY (possibly kicking everybody else from the TTY) */
4763 exec_context_tty_reset(c, NULL);
4764
4765 /* And then undo what chown_terminal() did earlier. Note that we only do this if we have a path
4766 * configured. If the TTY was passed to us as file descriptor we assume the TTY is opened and managed
4767 * by whoever passed it to us and thus knows better when and how to chmod()/chown() it back. */
4768
4769 if (exec_context_may_touch_tty(c)) {
4770 const char *path;
4771
4772 path = exec_context_tty_path(c);
4773 if (path) {
4774 r = chmod_and_chown(path, TTY_MODE, 0, TTY_GID);
4775 if (r < 0 && r != -ENOENT)
4776 log_warning_errno(r, "Failed to reset TTY ownership/access mode of %s, ignoring: %m", path);
4777 }
4778 }
4779}
4780
4c2f5842
LP
4781int exec_context_get_clean_directories(
4782 ExecContext *c,
4783 char **prefix,
4784 ExecCleanMask mask,
4785 char ***ret) {
4786
4787 _cleanup_strv_free_ char **l = NULL;
4788 ExecDirectoryType t;
4789 int r;
4790
4791 assert(c);
4792 assert(prefix);
4793 assert(ret);
4794
4795 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
4796 char **i;
4797
4798 if (!FLAGS_SET(mask, 1U << t))
4799 continue;
4800
4801 if (!prefix[t])
4802 continue;
4803
4804 STRV_FOREACH(i, c->directories[t].paths) {
4805 char *j;
4806
4807 j = path_join(prefix[t], *i);
4808 if (!j)
4809 return -ENOMEM;
4810
4811 r = strv_consume(&l, j);
4812 if (r < 0)
4813 return r;
4814 }
4815 }
4816
4817 *ret = TAKE_PTR(l);
4818 return 0;
4819}
4820
4821int exec_context_get_clean_mask(ExecContext *c, ExecCleanMask *ret) {
4822 ExecCleanMask mask = 0;
4823
4824 assert(c);
4825 assert(ret);
4826
4827 for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++)
4828 if (!strv_isempty(c->directories[t].paths))
4829 mask |= 1U << t;
4830
4831 *ret = mask;
4832 return 0;
4833}
4834
b58b4116 4835void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 4836 assert(s);
5cb5a6ff 4837
2ed26ed0
LP
4838 *s = (ExecStatus) {
4839 .pid = pid,
4840 };
4841
b58b4116
LP
4842 dual_timestamp_get(&s->start_timestamp);
4843}
4844
34cf6c43 4845void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
4846 assert(s);
4847
2ed26ed0
LP
4848 if (s->pid != pid) {
4849 *s = (ExecStatus) {
4850 .pid = pid,
4851 };
4852 }
b58b4116 4853
63983207 4854 dual_timestamp_get(&s->exit_timestamp);
9fb86720 4855
034c6ed7
LP
4856 s->code = code;
4857 s->status = status;
169c1bda 4858
6f765baf
LP
4859 if (context && context->utmp_id)
4860 (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
9fb86720
LP
4861}
4862
6a1d4d9f
LP
4863void exec_status_reset(ExecStatus *s) {
4864 assert(s);
4865
4866 *s = (ExecStatus) {};
4867}
4868
34cf6c43 4869void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
9fb86720
LP
4870 char buf[FORMAT_TIMESTAMP_MAX];
4871
4872 assert(s);
4873 assert(f);
4874
9fb86720
LP
4875 if (s->pid <= 0)
4876 return;
4877
4c940960
LP
4878 prefix = strempty(prefix);
4879
9fb86720 4880 fprintf(f,
ccd06097
ZJS
4881 "%sPID: "PID_FMT"\n",
4882 prefix, s->pid);
9fb86720 4883
af9d16e1 4884 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
4885 fprintf(f,
4886 "%sStart Timestamp: %s\n",
63983207 4887 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 4888
af9d16e1 4889 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
4890 fprintf(f,
4891 "%sExit Timestamp: %s\n"
4892 "%sExit Code: %s\n"
4893 "%sExit Status: %i\n",
63983207 4894 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
4895 prefix, sigchld_code_to_string(s->code),
4896 prefix, s->status);
5cb5a6ff 4897}
44d8db9e 4898
34cf6c43 4899static char *exec_command_line(char **argv) {
44d8db9e
LP
4900 size_t k;
4901 char *n, *p, **a;
4902 bool first = true;
4903
9e2f7c11 4904 assert(argv);
44d8db9e 4905
9164977d 4906 k = 1;
9e2f7c11 4907 STRV_FOREACH(a, argv)
44d8db9e
LP
4908 k += strlen(*a)+3;
4909
5cd9cd35
LP
4910 n = new(char, k);
4911 if (!n)
44d8db9e
LP
4912 return NULL;
4913
4914 p = n;
9e2f7c11 4915 STRV_FOREACH(a, argv) {
44d8db9e
LP
4916
4917 if (!first)
4918 *(p++) = ' ';
4919 else
4920 first = false;
4921
4922 if (strpbrk(*a, WHITESPACE)) {
4923 *(p++) = '\'';
4924 p = stpcpy(p, *a);
4925 *(p++) = '\'';
4926 } else
4927 p = stpcpy(p, *a);
4928
4929 }
4930
9164977d
LP
4931 *p = 0;
4932
44d8db9e
LP
4933 /* FIXME: this doesn't really handle arguments that have
4934 * spaces and ticks in them */
4935
4936 return n;
4937}
4938
34cf6c43 4939static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 4940 _cleanup_free_ char *cmd = NULL;
4c940960 4941 const char *prefix2;
44d8db9e
LP
4942
4943 assert(c);
4944 assert(f);
4945
4c940960 4946 prefix = strempty(prefix);
63c372cb 4947 prefix2 = strjoina(prefix, "\t");
44d8db9e 4948
9e2f7c11 4949 cmd = exec_command_line(c->argv);
44d8db9e
LP
4950 fprintf(f,
4951 "%sCommand Line: %s\n",
4bbccb02 4952 prefix, cmd ? cmd : strerror_safe(ENOMEM));
44d8db9e 4953
9fb86720 4954 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
4955}
4956
4957void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4958 assert(f);
4959
4c940960 4960 prefix = strempty(prefix);
44d8db9e
LP
4961
4962 LIST_FOREACH(command, c, c)
4963 exec_command_dump(c, f, prefix);
4964}
94f04347 4965
a6a80b4f
LP
4966void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4967 ExecCommand *end;
4968
4969 assert(l);
4970 assert(e);
4971
4972 if (*l) {
35b8ca3a 4973 /* It's kind of important, that we keep the order here */
71fda00f
LP
4974 LIST_FIND_TAIL(command, *l, end);
4975 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
4976 } else
4977 *l = e;
4978}
4979
26fd040d
LP
4980int exec_command_set(ExecCommand *c, const char *path, ...) {
4981 va_list ap;
4982 char **l, *p;
4983
4984 assert(c);
4985 assert(path);
4986
4987 va_start(ap, path);
4988 l = strv_new_ap(path, ap);
4989 va_end(ap);
4990
4991 if (!l)
4992 return -ENOMEM;
4993
250a918d
LP
4994 p = strdup(path);
4995 if (!p) {
26fd040d
LP
4996 strv_free(l);
4997 return -ENOMEM;
4998 }
4999
6897dfe8 5000 free_and_replace(c->path, p);
26fd040d 5001
130d3d22 5002 return strv_free_and_replace(c->argv, l);
26fd040d
LP
5003}
5004
86b23b07 5005int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 5006 _cleanup_strv_free_ char **l = NULL;
86b23b07 5007 va_list ap;
86b23b07
JS
5008 int r;
5009
5010 assert(c);
5011 assert(path);
5012
5013 va_start(ap, path);
5014 l = strv_new_ap(path, ap);
5015 va_end(ap);
5016
5017 if (!l)
5018 return -ENOMEM;
5019
e287086b 5020 r = strv_extend_strv(&c->argv, l, false);
e63ff941 5021 if (r < 0)
86b23b07 5022 return r;
86b23b07
JS
5023
5024 return 0;
5025}
5026
e8a565cb
YW
5027static void *remove_tmpdir_thread(void *p) {
5028 _cleanup_free_ char *path = p;
86b23b07 5029
e8a565cb
YW
5030 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
5031 return NULL;
5032}
5033
5034static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
5035 int r;
5036
5037 if (!rt)
5038 return NULL;
5039
5040 if (rt->manager)
5041 (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
5042
5043 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
5044 if (destroy && rt->tmp_dir) {
5045 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
5046
5047 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
5048 if (r < 0) {
5049 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
5050 free(rt->tmp_dir);
5051 }
5052
5053 rt->tmp_dir = NULL;
5054 }
613b411c 5055
e8a565cb
YW
5056 if (destroy && rt->var_tmp_dir) {
5057 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
5058
5059 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
5060 if (r < 0) {
5061 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
5062 free(rt->var_tmp_dir);
5063 }
5064
5065 rt->var_tmp_dir = NULL;
5066 }
5067
5068 rt->id = mfree(rt->id);
5069 rt->tmp_dir = mfree(rt->tmp_dir);
5070 rt->var_tmp_dir = mfree(rt->var_tmp_dir);
5071 safe_close_pair(rt->netns_storage_socket);
5072 return mfree(rt);
5073}
5074
5075static void exec_runtime_freep(ExecRuntime **rt) {
da6bc6ed 5076 (void) exec_runtime_free(*rt, false);
e8a565cb
YW
5077}
5078
8e8009dc
LP
5079static int exec_runtime_allocate(ExecRuntime **ret) {
5080 ExecRuntime *n;
613b411c 5081
8e8009dc 5082 assert(ret);
613b411c 5083
8e8009dc
LP
5084 n = new(ExecRuntime, 1);
5085 if (!n)
613b411c
LP
5086 return -ENOMEM;
5087
8e8009dc
LP
5088 *n = (ExecRuntime) {
5089 .netns_storage_socket = { -1, -1 },
5090 };
5091
5092 *ret = n;
613b411c
LP
5093 return 0;
5094}
5095
e8a565cb
YW
5096static int exec_runtime_add(
5097 Manager *m,
5098 const char *id,
5099 const char *tmp_dir,
5100 const char *var_tmp_dir,
5101 const int netns_storage_socket[2],
5102 ExecRuntime **ret) {
5103
5104 _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
613b411c
LP
5105 int r;
5106
e8a565cb 5107 assert(m);
613b411c
LP
5108 assert(id);
5109
e8a565cb
YW
5110 r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
5111 if (r < 0)
5112 return r;
613b411c 5113
e8a565cb 5114 r = exec_runtime_allocate(&rt);
613b411c
LP
5115 if (r < 0)
5116 return r;
5117
e8a565cb
YW
5118 rt->id = strdup(id);
5119 if (!rt->id)
5120 return -ENOMEM;
5121
5122 if (tmp_dir) {
5123 rt->tmp_dir = strdup(tmp_dir);
5124 if (!rt->tmp_dir)
5125 return -ENOMEM;
5126
5127 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
5128 assert(var_tmp_dir);
5129 rt->var_tmp_dir = strdup(var_tmp_dir);
5130 if (!rt->var_tmp_dir)
5131 return -ENOMEM;
5132 }
5133
5134 if (netns_storage_socket) {
5135 rt->netns_storage_socket[0] = netns_storage_socket[0];
5136 rt->netns_storage_socket[1] = netns_storage_socket[1];
613b411c
LP
5137 }
5138
e8a565cb
YW
5139 r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
5140 if (r < 0)
5141 return r;
5142
5143 rt->manager = m;
5144
5145 if (ret)
5146 *ret = rt;
5147
5148 /* do not remove created ExecRuntime object when the operation succeeds. */
5149 rt = NULL;
5150 return 0;
5151}
5152
5153static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
5154 _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
2fa3742d 5155 _cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
e8a565cb
YW
5156 int r;
5157
5158 assert(m);
5159 assert(c);
5160 assert(id);
5161
5162 /* It is not necessary to create ExecRuntime object. */
a8d08f39 5163 if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
e8a565cb
YW
5164 return 0;
5165
5166 if (c->private_tmp) {
5167 r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
613b411c
LP
5168 if (r < 0)
5169 return r;
5170 }
5171
a8d08f39 5172 if (c->private_network || c->network_namespace_path) {
e8a565cb
YW
5173 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
5174 return -errno;
5175 }
5176
5177 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
5178 if (r < 0)
5179 return r;
5180
5181 /* Avoid cleanup */
2fa3742d 5182 netns_storage_socket[0] = netns_storage_socket[1] = -1;
613b411c
LP
5183 return 1;
5184}
5185
e8a565cb
YW
5186int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
5187 ExecRuntime *rt;
5188 int r;
613b411c 5189
e8a565cb
YW
5190 assert(m);
5191 assert(id);
5192 assert(ret);
5193
5194 rt = hashmap_get(m->exec_runtime_by_id, id);
5195 if (rt)
5196 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
5197 goto ref;
5198
5199 if (!create)
5200 return 0;
5201
5202 /* If not found, then create a new object. */
5203 r = exec_runtime_make(m, c, id, &rt);
5204 if (r <= 0)
5205 /* When r == 0, it is not necessary to create ExecRuntime object. */
5206 return r;
613b411c 5207
e8a565cb
YW
5208ref:
5209 /* increment reference counter. */
5210 rt->n_ref++;
5211 *ret = rt;
5212 return 1;
5213}
613b411c 5214
e8a565cb
YW
5215ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
5216 if (!rt)
613b411c
LP
5217 return NULL;
5218
e8a565cb 5219 assert(rt->n_ref > 0);
613b411c 5220
e8a565cb
YW
5221 rt->n_ref--;
5222 if (rt->n_ref > 0)
f2341e0a
LP
5223 return NULL;
5224
e8a565cb 5225 return exec_runtime_free(rt, destroy);
613b411c
LP
5226}
5227
e8a565cb
YW
5228int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
5229 ExecRuntime *rt;
5230 Iterator i;
5231
5232 assert(m);
613b411c
LP
5233 assert(f);
5234 assert(fds);
5235
e8a565cb
YW
5236 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5237 fprintf(f, "exec-runtime=%s", rt->id);
613b411c 5238
e8a565cb
YW
5239 if (rt->tmp_dir)
5240 fprintf(f, " tmp-dir=%s", rt->tmp_dir);
613b411c 5241
e8a565cb
YW
5242 if (rt->var_tmp_dir)
5243 fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
613b411c 5244
e8a565cb
YW
5245 if (rt->netns_storage_socket[0] >= 0) {
5246 int copy;
613b411c 5247
e8a565cb
YW
5248 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
5249 if (copy < 0)
5250 return copy;
613b411c 5251
e8a565cb
YW
5252 fprintf(f, " netns-socket-0=%i", copy);
5253 }
613b411c 5254
e8a565cb
YW
5255 if (rt->netns_storage_socket[1] >= 0) {
5256 int copy;
613b411c 5257
e8a565cb
YW
5258 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
5259 if (copy < 0)
5260 return copy;
613b411c 5261
e8a565cb
YW
5262 fprintf(f, " netns-socket-1=%i", copy);
5263 }
5264
5265 fputc('\n', f);
613b411c
LP
5266 }
5267
5268 return 0;
5269}
5270
e8a565cb
YW
5271int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
5272 _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
5273 ExecRuntime *rt;
613b411c
LP
5274 int r;
5275
e8a565cb
YW
5276 /* This is for the migration from old (v237 or earlier) deserialization text.
5277 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
5278 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
5279 * so or not from the serialized text, then we always creates a new object owned by this. */
5280
5281 assert(u);
613b411c
LP
5282 assert(key);
5283 assert(value);
5284
e8a565cb
YW
5285 /* Manager manages ExecRuntime objects by the unit id.
5286 * So, we omit the serialized text when the unit does not have id (yet?)... */
5287 if (isempty(u->id)) {
5288 log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
5289 return 0;
5290 }
613b411c 5291
e8a565cb
YW
5292 r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
5293 if (r < 0) {
5294 log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
5295 return 0;
5296 }
5297
5298 rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
5299 if (!rt) {
5300 r = exec_runtime_allocate(&rt_create);
613b411c 5301 if (r < 0)
f2341e0a 5302 return log_oom();
613b411c 5303
e8a565cb
YW
5304 rt_create->id = strdup(u->id);
5305 if (!rt_create->id)
5306 return log_oom();
5307
5308 rt = rt_create;
5309 }
5310
5311 if (streq(key, "tmp-dir")) {
5312 char *copy;
5313
613b411c
LP
5314 copy = strdup(value);
5315 if (!copy)
5316 return log_oom();
5317
e8a565cb 5318 free_and_replace(rt->tmp_dir, copy);
613b411c
LP
5319
5320 } else if (streq(key, "var-tmp-dir")) {
5321 char *copy;
5322
613b411c
LP
5323 copy = strdup(value);
5324 if (!copy)
5325 return log_oom();
5326
e8a565cb 5327 free_and_replace(rt->var_tmp_dir, copy);
613b411c
LP
5328
5329 } else if (streq(key, "netns-socket-0")) {
5330 int fd;
5331
e8a565cb 5332 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5333 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5334 return 0;
613b411c 5335 }
e8a565cb
YW
5336
5337 safe_close(rt->netns_storage_socket[0]);
5338 rt->netns_storage_socket[0] = fdset_remove(fds, fd);
5339
613b411c
LP
5340 } else if (streq(key, "netns-socket-1")) {
5341 int fd;
5342
e8a565cb 5343 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5344 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5345 return 0;
613b411c 5346 }
e8a565cb
YW
5347
5348 safe_close(rt->netns_storage_socket[1]);
5349 rt->netns_storage_socket[1] = fdset_remove(fds, fd);
613b411c
LP
5350 } else
5351 return 0;
5352
e8a565cb
YW
5353 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
5354 if (rt_create) {
5355 r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
5356 if (r < 0) {
3fe91079 5357 log_unit_debug_errno(u, r, "Failed to put runtime parameter to manager's storage: %m");
e8a565cb
YW
5358 return 0;
5359 }
613b411c 5360
e8a565cb 5361 rt_create->manager = u->manager;
613b411c 5362
e8a565cb
YW
5363 /* Avoid cleanup */
5364 rt_create = NULL;
5365 }
98b47d54 5366
e8a565cb
YW
5367 return 1;
5368}
613b411c 5369
e8a565cb
YW
5370void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
5371 char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
5372 int r, fd0 = -1, fd1 = -1;
5373 const char *p, *v = value;
5374 size_t n;
613b411c 5375
e8a565cb
YW
5376 assert(m);
5377 assert(value);
5378 assert(fds);
98b47d54 5379
e8a565cb
YW
5380 n = strcspn(v, " ");
5381 id = strndupa(v, n);
5382 if (v[n] != ' ')
5383 goto finalize;
5384 p = v + n + 1;
5385
5386 v = startswith(p, "tmp-dir=");
5387 if (v) {
5388 n = strcspn(v, " ");
5389 tmp_dir = strndupa(v, n);
5390 if (v[n] != ' ')
5391 goto finalize;
5392 p = v + n + 1;
5393 }
5394
5395 v = startswith(p, "var-tmp-dir=");
5396 if (v) {
5397 n = strcspn(v, " ");
5398 var_tmp_dir = strndupa(v, n);
5399 if (v[n] != ' ')
5400 goto finalize;
5401 p = v + n + 1;
5402 }
5403
5404 v = startswith(p, "netns-socket-0=");
5405 if (v) {
5406 char *buf;
5407
5408 n = strcspn(v, " ");
5409 buf = strndupa(v, n);
5410 if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
5411 log_debug("Unable to process exec-runtime netns fd specification.");
5412 return;
98b47d54 5413 }
e8a565cb
YW
5414 fd0 = fdset_remove(fds, fd0);
5415 if (v[n] != ' ')
5416 goto finalize;
5417 p = v + n + 1;
613b411c
LP
5418 }
5419
e8a565cb
YW
5420 v = startswith(p, "netns-socket-1=");
5421 if (v) {
5422 char *buf;
98b47d54 5423
e8a565cb
YW
5424 n = strcspn(v, " ");
5425 buf = strndupa(v, n);
5426 if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
5427 log_debug("Unable to process exec-runtime netns fd specification.");
5428 return;
98b47d54 5429 }
e8a565cb
YW
5430 fd1 = fdset_remove(fds, fd1);
5431 }
98b47d54 5432
e8a565cb
YW
5433finalize:
5434
5435 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
7d853ca6 5436 if (r < 0)
e8a565cb 5437 log_debug_errno(r, "Failed to add exec-runtime: %m");
e8a565cb 5438}
613b411c 5439
e8a565cb
YW
5440void exec_runtime_vacuum(Manager *m) {
5441 ExecRuntime *rt;
5442 Iterator i;
5443
5444 assert(m);
5445
5446 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
5447
5448 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5449 if (rt->n_ref > 0)
5450 continue;
5451
5452 (void) exec_runtime_free(rt, false);
5453 }
613b411c
LP
5454}
5455
b9c04eaf
YW
5456void exec_params_clear(ExecParameters *p) {
5457 if (!p)
5458 return;
5459
5460 strv_free(p->environment);
5461}
5462
80876c20
LP
5463static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
5464 [EXEC_INPUT_NULL] = "null",
5465 [EXEC_INPUT_TTY] = "tty",
5466 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 5467 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
5468 [EXEC_INPUT_SOCKET] = "socket",
5469 [EXEC_INPUT_NAMED_FD] = "fd",
08f3be7a 5470 [EXEC_INPUT_DATA] = "data",
2038c3f5 5471 [EXEC_INPUT_FILE] = "file",
80876c20
LP
5472};
5473
8a0867d6
LP
5474DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
5475
94f04347 5476static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 5477 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 5478 [EXEC_OUTPUT_NULL] = "null",
80876c20 5479 [EXEC_OUTPUT_TTY] = "tty",
94f04347 5480 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 5481 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 5482 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 5483 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
5484 [EXEC_OUTPUT_JOURNAL] = "journal",
5485 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
5486 [EXEC_OUTPUT_SOCKET] = "socket",
5487 [EXEC_OUTPUT_NAMED_FD] = "fd",
2038c3f5 5488 [EXEC_OUTPUT_FILE] = "file",
566b7d23 5489 [EXEC_OUTPUT_FILE_APPEND] = "append",
94f04347
LP
5490};
5491
5492DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
5493
5494static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
5495 [EXEC_UTMP_INIT] = "init",
5496 [EXEC_UTMP_LOGIN] = "login",
5497 [EXEC_UTMP_USER] = "user",
5498};
5499
5500DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
5501
5502static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
5503 [EXEC_PRESERVE_NO] = "no",
5504 [EXEC_PRESERVE_YES] = "yes",
5505 [EXEC_PRESERVE_RESTART] = "restart",
5506};
5507
5508DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 5509
6b7b2ed9 5510/* This table maps ExecDirectoryType to the setting it is configured with in the unit */
72fd1768 5511static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
5512 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
5513 [EXEC_DIRECTORY_STATE] = "StateDirectory",
5514 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
5515 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
5516 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
5517};
5518
5519DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445 5520
6b7b2ed9
LP
5521/* And this table maps ExecDirectoryType too, but to a generic term identifying the type of resource. This
5522 * one is supposed to be generic enough to be used for unit types that don't use ExecContext and per-unit
5523 * directories, specifically .timer units with their timestamp touch file. */
5524static const char* const exec_resource_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5525 [EXEC_DIRECTORY_RUNTIME] = "runtime",
5526 [EXEC_DIRECTORY_STATE] = "state",
5527 [EXEC_DIRECTORY_CACHE] = "cache",
5528 [EXEC_DIRECTORY_LOGS] = "logs",
5529 [EXEC_DIRECTORY_CONFIGURATION] = "configuration",
5530};
5531
5532DEFINE_STRING_TABLE_LOOKUP(exec_resource_type, ExecDirectoryType);
5533
5534/* And this table also maps ExecDirectoryType, to the environment variable we pass the selected directory to
5535 * the service payload in. */
fb2042dd
YW
5536static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5537 [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
5538 [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
5539 [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
5540 [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
5541 [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
5542};
5543
5544DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType);
5545
b1edf445
LP
5546static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
5547 [EXEC_KEYRING_INHERIT] = "inherit",
5548 [EXEC_KEYRING_PRIVATE] = "private",
5549 [EXEC_KEYRING_SHARED] = "shared",
5550};
5551
5552DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);