]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
core: log when we convert from DynamicUser=1 to =0 or vice versa
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
a7334b09 2
034c6ed7
LP
3#include <errno.h>
4#include <fcntl.h>
8dd4c05b
LP
5#include <glob.h>
6#include <grp.h>
7#include <poll.h>
309bff19 8#include <signal.h>
8dd4c05b 9#include <string.h>
19c0b0b9 10#include <sys/capability.h>
d251207d 11#include <sys/eventfd.h>
f3e43635 12#include <sys/mman.h>
8dd4c05b 13#include <sys/personality.h>
94f04347 14#include <sys/prctl.h>
d2ffa389 15#include <sys/shm.h>
8dd4c05b 16#include <sys/socket.h>
451a074f 17#include <sys/stat.h>
d2ffa389 18#include <sys/types.h>
8dd4c05b
LP
19#include <sys/un.h>
20#include <unistd.h>
023a4f67 21#include <utmpx.h>
5cb5a6ff 22
349cc4a5 23#if HAVE_PAM
5b6319dc
LP
24#include <security/pam_appl.h>
25#endif
26
349cc4a5 27#if HAVE_SELINUX
7b52a628
MS
28#include <selinux/selinux.h>
29#endif
30
349cc4a5 31#if HAVE_SECCOMP
17df7223
LP
32#include <seccomp.h>
33#endif
34
349cc4a5 35#if HAVE_APPARMOR
eef65bf3
MS
36#include <sys/apparmor.h>
37#endif
38
24882e06 39#include "sd-messages.h"
8dd4c05b
LP
40
41#include "af-list.h"
b5efdb8a 42#include "alloc-util.h"
349cc4a5 43#if HAVE_APPARMOR
3ffd4af2
LP
44#include "apparmor-util.h"
45#endif
8dd4c05b
LP
46#include "async.h"
47#include "barrier.h"
8dd4c05b 48#include "cap-list.h"
430f0182 49#include "capability-util.h"
a1164ae3 50#include "chown-recursive.h"
da681e1b 51#include "cpu-set-util.h"
f6a6225e 52#include "def.h"
686d13b9 53#include "env-file.h"
4d1a6904 54#include "env-util.h"
17df7223 55#include "errno-list.h"
3ffd4af2 56#include "execute.h"
8dd4c05b 57#include "exit-status.h"
3ffd4af2 58#include "fd-util.h"
f97b34a6 59#include "format-util.h"
f4f15635 60#include "fs-util.h"
7d50b32a 61#include "glob-util.h"
c004493c 62#include "io-util.h"
8dd4c05b 63#include "ioprio.h"
a1164ae3 64#include "label.h"
8dd4c05b
LP
65#include "log.h"
66#include "macro.h"
e8a565cb 67#include "manager.h"
0a970718 68#include "memory-util.h"
8dd4c05b
LP
69#include "missing.h"
70#include "mkdir.h"
71#include "namespace.h"
6bedfcbb 72#include "parse-util.h"
8dd4c05b 73#include "path-util.h"
0b452006 74#include "process-util.h"
78f22b97 75#include "rlimit-util.h"
8dd4c05b 76#include "rm-rf.h"
349cc4a5 77#if HAVE_SECCOMP
3ffd4af2
LP
78#include "seccomp-util.h"
79#endif
07d46372 80#include "securebits-util.h"
8dd4c05b 81#include "selinux-util.h"
24882e06 82#include "signal-util.h"
8dd4c05b 83#include "smack-util.h"
57b7a260 84#include "socket-util.h"
fd63e712 85#include "special.h"
949befd3 86#include "stat-util.h"
8b43440b 87#include "string-table.h"
07630cea 88#include "string-util.h"
8dd4c05b 89#include "strv.h"
7ccbd1ae 90#include "syslog-util.h"
8dd4c05b 91#include "terminal-util.h"
566b7d23 92#include "umask-util.h"
8dd4c05b 93#include "unit.h"
b1d4f8e1 94#include "user-util.h"
8dd4c05b 95#include "utmp-wtmp.h"
5cb5a6ff 96
e056b01d 97#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 98#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 99
531dca78
LP
100#define SNDBUF_SIZE (8*1024*1024)
101
da6053d0 102static int shift_fds(int fds[], size_t n_fds) {
034c6ed7
LP
103 int start, restart_from;
104
105 if (n_fds <= 0)
106 return 0;
107
a0d40ac5
LP
108 /* Modifies the fds array! (sorts it) */
109
034c6ed7
LP
110 assert(fds);
111
112 start = 0;
113 for (;;) {
114 int i;
115
116 restart_from = -1;
117
118 for (i = start; i < (int) n_fds; i++) {
119 int nfd;
120
121 /* Already at right index? */
122 if (fds[i] == i+3)
123 continue;
124
3cc2aff1
LP
125 nfd = fcntl(fds[i], F_DUPFD, i + 3);
126 if (nfd < 0)
034c6ed7
LP
127 return -errno;
128
03e334a1 129 safe_close(fds[i]);
034c6ed7
LP
130 fds[i] = nfd;
131
132 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 133 * let's remember that and try again from here */
034c6ed7
LP
134 if (nfd != i+3 && restart_from < 0)
135 restart_from = i;
136 }
137
138 if (restart_from < 0)
139 break;
140
141 start = restart_from;
142 }
143
144 return 0;
145}
146
25b583d7 147static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
da6053d0 148 size_t i, n_fds;
e2c76839 149 int r;
47a71eed 150
25b583d7 151 n_fds = n_socket_fds + n_storage_fds;
47a71eed
LP
152 if (n_fds <= 0)
153 return 0;
154
155 assert(fds);
156
9b141911
FB
157 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
158 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
159
160 for (i = 0; i < n_fds; i++) {
47a71eed 161
9b141911
FB
162 if (i < n_socket_fds) {
163 r = fd_nonblock(fds[i], nonblock);
164 if (r < 0)
165 return r;
166 }
47a71eed 167
451a074f
LP
168 /* We unconditionally drop FD_CLOEXEC from the fds,
169 * since after all we want to pass these fds to our
170 * children */
47a71eed 171
3cc2aff1
LP
172 r = fd_cloexec(fds[i], false);
173 if (r < 0)
e2c76839 174 return r;
47a71eed
LP
175 }
176
177 return 0;
178}
179
1e22b5cd 180static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
181 assert(context);
182
1e22b5cd
LP
183 if (context->stdio_as_fds)
184 return NULL;
185
80876c20
LP
186 if (context->tty_path)
187 return context->tty_path;
188
189 return "/dev/console";
190}
191
1e22b5cd
LP
192static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
193 const char *path;
194
6ea832a2
LP
195 assert(context);
196
1e22b5cd 197 path = exec_context_tty_path(context);
6ea832a2 198
1e22b5cd
LP
199 if (context->tty_vhangup) {
200 if (p && p->stdin_fd >= 0)
201 (void) terminal_vhangup_fd(p->stdin_fd);
202 else if (path)
203 (void) terminal_vhangup(path);
204 }
6ea832a2 205
1e22b5cd
LP
206 if (context->tty_reset) {
207 if (p && p->stdin_fd >= 0)
208 (void) reset_terminal_fd(p->stdin_fd, true);
209 else if (path)
210 (void) reset_terminal(path);
211 }
212
213 if (context->tty_vt_disallocate && path)
214 (void) vt_disallocate(path);
6ea832a2
LP
215}
216
6af760f3
LP
217static bool is_terminal_input(ExecInput i) {
218 return IN_SET(i,
219 EXEC_INPUT_TTY,
220 EXEC_INPUT_TTY_FORCE,
221 EXEC_INPUT_TTY_FAIL);
222}
223
3a1286b6 224static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
225 return IN_SET(o,
226 EXEC_OUTPUT_TTY,
227 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
228 EXEC_OUTPUT_KMSG_AND_CONSOLE,
229 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
230}
231
aac8c0c3
LP
232static bool is_syslog_output(ExecOutput o) {
233 return IN_SET(o,
234 EXEC_OUTPUT_SYSLOG,
235 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
236}
237
238static bool is_kmsg_output(ExecOutput o) {
239 return IN_SET(o,
240 EXEC_OUTPUT_KMSG,
241 EXEC_OUTPUT_KMSG_AND_CONSOLE);
242}
243
6af760f3
LP
244static bool exec_context_needs_term(const ExecContext *c) {
245 assert(c);
246
247 /* Return true if the execution context suggests we should set $TERM to something useful. */
248
249 if (is_terminal_input(c->std_input))
250 return true;
251
252 if (is_terminal_output(c->std_output))
253 return true;
254
255 if (is_terminal_output(c->std_error))
256 return true;
257
258 return !!c->tty_path;
3a1286b6
MS
259}
260
80876c20 261static int open_null_as(int flags, int nfd) {
046a82c1 262 int fd;
071830ff 263
80876c20 264 assert(nfd >= 0);
071830ff 265
613b411c
LP
266 fd = open("/dev/null", flags|O_NOCTTY);
267 if (fd < 0)
071830ff
LP
268 return -errno;
269
046a82c1 270 return move_fd(fd, nfd, false);
071830ff
LP
271}
272
524daa8c 273static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 274 static const union sockaddr_union sa = {
b92bea5d
ZJS
275 .un.sun_family = AF_UNIX,
276 .un.sun_path = "/run/systemd/journal/stdout",
277 };
524daa8c
ZJS
278 uid_t olduid = UID_INVALID;
279 gid_t oldgid = GID_INVALID;
280 int r;
281
cad93f29 282 if (gid_is_valid(gid)) {
524daa8c
ZJS
283 oldgid = getgid();
284
92a17af9 285 if (setegid(gid) < 0)
524daa8c
ZJS
286 return -errno;
287 }
288
cad93f29 289 if (uid_is_valid(uid)) {
524daa8c
ZJS
290 olduid = getuid();
291
92a17af9 292 if (seteuid(uid) < 0) {
524daa8c
ZJS
293 r = -errno;
294 goto restore_gid;
295 }
296 }
297
92a17af9 298 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
299
300 /* If we fail to restore the uid or gid, things will likely
301 fail later on. This should only happen if an LSM interferes. */
302
cad93f29 303 if (uid_is_valid(uid))
524daa8c
ZJS
304 (void) seteuid(olduid);
305
306 restore_gid:
cad93f29 307 if (gid_is_valid(gid))
524daa8c
ZJS
308 (void) setegid(oldgid);
309
310 return r;
311}
312
fd1f9c89 313static int connect_logger_as(
34cf6c43 314 const Unit *unit,
fd1f9c89 315 const ExecContext *context,
af635cf3 316 const ExecParameters *params,
fd1f9c89
LP
317 ExecOutput output,
318 const char *ident,
fd1f9c89
LP
319 int nfd,
320 uid_t uid,
321 gid_t gid) {
322
2ac1ff68
EV
323 _cleanup_close_ int fd = -1;
324 int r;
071830ff
LP
325
326 assert(context);
af635cf3 327 assert(params);
80876c20
LP
328 assert(output < _EXEC_OUTPUT_MAX);
329 assert(ident);
330 assert(nfd >= 0);
071830ff 331
54fe0cdb
LP
332 fd = socket(AF_UNIX, SOCK_STREAM, 0);
333 if (fd < 0)
80876c20 334 return -errno;
071830ff 335
524daa8c
ZJS
336 r = connect_journal_socket(fd, uid, gid);
337 if (r < 0)
338 return r;
071830ff 339
2ac1ff68 340 if (shutdown(fd, SHUT_RD) < 0)
80876c20 341 return -errno;
071830ff 342
fd1f9c89 343 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 344
2ac1ff68 345 if (dprintf(fd,
62bca2c6 346 "%s\n"
80876c20
LP
347 "%s\n"
348 "%i\n"
54fe0cdb
LP
349 "%i\n"
350 "%i\n"
351 "%i\n"
4f4a1dbf 352 "%i\n",
c867611e 353 context->syslog_identifier ?: ident,
af635cf3 354 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
355 context->syslog_priority,
356 !!context->syslog_level_prefix,
aac8c0c3
LP
357 is_syslog_output(output),
358 is_kmsg_output(output),
2ac1ff68
EV
359 is_terminal_output(output)) < 0)
360 return -errno;
80876c20 361
2ac1ff68 362 return move_fd(TAKE_FD(fd), nfd, false);
80876c20 363}
2ac1ff68 364
3a274a21 365static int open_terminal_as(const char *path, int flags, int nfd) {
046a82c1 366 int fd;
071830ff 367
80876c20
LP
368 assert(path);
369 assert(nfd >= 0);
fd1f9c89 370
3a274a21 371 fd = open_terminal(path, flags | O_NOCTTY);
3cc2aff1 372 if (fd < 0)
80876c20 373 return fd;
071830ff 374
046a82c1 375 return move_fd(fd, nfd, false);
80876c20 376}
071830ff 377
2038c3f5 378static int acquire_path(const char *path, int flags, mode_t mode) {
15a3e96f
LP
379 union sockaddr_union sa = {};
380 _cleanup_close_ int fd = -1;
381 int r, salen;
071830ff 382
80876c20 383 assert(path);
071830ff 384
2038c3f5
LP
385 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
386 flags |= O_CREAT;
387
388 fd = open(path, flags|O_NOCTTY, mode);
389 if (fd >= 0)
15a3e96f 390 return TAKE_FD(fd);
071830ff 391
2038c3f5
LP
392 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
393 return -errno;
15a3e96f 394 if (strlen(path) >= sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
2038c3f5
LP
395 return -ENXIO;
396
397 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
398
399 fd = socket(AF_UNIX, SOCK_STREAM, 0);
400 if (fd < 0)
401 return -errno;
402
15a3e96f
LP
403 salen = sockaddr_un_set_path(&sa.un, path);
404 if (salen < 0)
405 return salen;
406
407 if (connect(fd, &sa.sa, salen) < 0)
2038c3f5
LP
408 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
409 * indication that his wasn't an AF_UNIX socket after all */
071830ff 410
2038c3f5
LP
411 if ((flags & O_ACCMODE) == O_RDONLY)
412 r = shutdown(fd, SHUT_WR);
413 else if ((flags & O_ACCMODE) == O_WRONLY)
414 r = shutdown(fd, SHUT_RD);
415 else
15a3e96f
LP
416 return TAKE_FD(fd);
417 if (r < 0)
2038c3f5 418 return -errno;
2038c3f5 419
15a3e96f 420 return TAKE_FD(fd);
80876c20 421}
071830ff 422
08f3be7a
LP
423static int fixup_input(
424 const ExecContext *context,
425 int socket_fd,
426 bool apply_tty_stdin) {
427
428 ExecInput std_input;
429
430 assert(context);
431
432 std_input = context->std_input;
1e3ad081
LP
433
434 if (is_terminal_input(std_input) && !apply_tty_stdin)
435 return EXEC_INPUT_NULL;
071830ff 436
03fd9c49 437 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
438 return EXEC_INPUT_NULL;
439
08f3be7a
LP
440 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
441 return EXEC_INPUT_NULL;
442
03fd9c49 443 return std_input;
4f2d528d
LP
444}
445
03fd9c49 446static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 447
03fd9c49 448 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
449 return EXEC_OUTPUT_INHERIT;
450
03fd9c49 451 return std_output;
4f2d528d
LP
452}
453
a34ceba6
LP
454static int setup_input(
455 const ExecContext *context,
456 const ExecParameters *params,
52c239d7
LB
457 int socket_fd,
458 int named_iofds[3]) {
a34ceba6 459
4f2d528d
LP
460 ExecInput i;
461
462 assert(context);
a34ceba6
LP
463 assert(params);
464
465 if (params->stdin_fd >= 0) {
466 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
467 return -errno;
468
469 /* Try to make this the controlling tty, if it is a tty, and reset it */
1fb0682e
LP
470 if (isatty(STDIN_FILENO)) {
471 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
472 (void) reset_terminal_fd(STDIN_FILENO, true);
473 }
a34ceba6
LP
474
475 return STDIN_FILENO;
476 }
4f2d528d 477
08f3be7a 478 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
479
480 switch (i) {
071830ff 481
80876c20
LP
482 case EXEC_INPUT_NULL:
483 return open_null_as(O_RDONLY, STDIN_FILENO);
484
485 case EXEC_INPUT_TTY:
486 case EXEC_INPUT_TTY_FORCE:
487 case EXEC_INPUT_TTY_FAIL: {
046a82c1 488 int fd;
071830ff 489
1e22b5cd 490 fd = acquire_terminal(exec_context_tty_path(context),
8854d795
LP
491 i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
492 i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
493 ACQUIRE_TERMINAL_WAIT,
3a43da28 494 USEC_INFINITY);
970edce6 495 if (fd < 0)
80876c20
LP
496 return fd;
497
046a82c1 498 return move_fd(fd, STDIN_FILENO, false);
80876c20
LP
499 }
500
4f2d528d 501 case EXEC_INPUT_SOCKET:
e75a9ed1
LP
502 assert(socket_fd >= 0);
503
4f2d528d
LP
504 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
505
52c239d7 506 case EXEC_INPUT_NAMED_FD:
e75a9ed1
LP
507 assert(named_iofds[STDIN_FILENO] >= 0);
508
52c239d7
LB
509 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
510 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
511
08f3be7a
LP
512 case EXEC_INPUT_DATA: {
513 int fd;
514
515 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
516 if (fd < 0)
517 return fd;
518
519 return move_fd(fd, STDIN_FILENO, false);
520 }
521
2038c3f5
LP
522 case EXEC_INPUT_FILE: {
523 bool rw;
524 int fd;
525
526 assert(context->stdio_file[STDIN_FILENO]);
527
528 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
529 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
530
531 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
532 if (fd < 0)
533 return fd;
534
535 return move_fd(fd, STDIN_FILENO, false);
536 }
537
80876c20
LP
538 default:
539 assert_not_reached("Unknown input type");
540 }
541}
542
41fc585a
LP
543static bool can_inherit_stderr_from_stdout(
544 const ExecContext *context,
545 ExecOutput o,
546 ExecOutput e) {
547
548 assert(context);
549
550 /* Returns true, if given the specified STDERR and STDOUT output we can directly dup() the stdout fd to the
551 * stderr fd */
552
553 if (e == EXEC_OUTPUT_INHERIT)
554 return true;
555 if (e != o)
556 return false;
557
558 if (e == EXEC_OUTPUT_NAMED_FD)
559 return streq_ptr(context->stdio_fdname[STDOUT_FILENO], context->stdio_fdname[STDERR_FILENO]);
560
561 if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND))
562 return streq_ptr(context->stdio_file[STDOUT_FILENO], context->stdio_file[STDERR_FILENO]);
563
564 return true;
565}
566
a34ceba6 567static int setup_output(
34cf6c43 568 const Unit *unit,
a34ceba6
LP
569 const ExecContext *context,
570 const ExecParameters *params,
571 int fileno,
572 int socket_fd,
52c239d7 573 int named_iofds[3],
a34ceba6 574 const char *ident,
7bce046b
LP
575 uid_t uid,
576 gid_t gid,
577 dev_t *journal_stream_dev,
578 ino_t *journal_stream_ino) {
a34ceba6 579
4f2d528d
LP
580 ExecOutput o;
581 ExecInput i;
47c1d80d 582 int r;
4f2d528d 583
f2341e0a 584 assert(unit);
80876c20 585 assert(context);
a34ceba6 586 assert(params);
80876c20 587 assert(ident);
7bce046b
LP
588 assert(journal_stream_dev);
589 assert(journal_stream_ino);
80876c20 590
a34ceba6
LP
591 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
592
593 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
594 return -errno;
595
596 return STDOUT_FILENO;
597 }
598
599 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
600 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
601 return -errno;
602
603 return STDERR_FILENO;
604 }
605
08f3be7a 606 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 607 o = fixup_output(context->std_output, socket_fd);
4f2d528d 608
eb17e935
MS
609 if (fileno == STDERR_FILENO) {
610 ExecOutput e;
611 e = fixup_output(context->std_error, socket_fd);
80876c20 612
eb17e935
MS
613 /* This expects the input and output are already set up */
614
615 /* Don't change the stderr file descriptor if we inherit all
616 * the way and are not on a tty */
617 if (e == EXEC_OUTPUT_INHERIT &&
618 o == EXEC_OUTPUT_INHERIT &&
619 i == EXEC_INPUT_NULL &&
620 !is_terminal_input(context->std_input) &&
621 getppid () != 1)
622 return fileno;
623
624 /* Duplicate from stdout if possible */
41fc585a 625 if (can_inherit_stderr_from_stdout(context, o, e))
eb17e935 626 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 627
eb17e935 628 o = e;
80876c20 629
eb17e935 630 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
631 /* If input got downgraded, inherit the original value */
632 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 633 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 634
08f3be7a
LP
635 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
636 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
eb17e935 637 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 638
acb591e4
LP
639 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
640 if (getppid() != 1)
eb17e935 641 return fileno;
94f04347 642
eb17e935
MS
643 /* We need to open /dev/null here anew, to get the right access mode. */
644 return open_null_as(O_WRONLY, fileno);
071830ff 645 }
94f04347 646
eb17e935 647 switch (o) {
80876c20
LP
648
649 case EXEC_OUTPUT_NULL:
eb17e935 650 return open_null_as(O_WRONLY, fileno);
80876c20
LP
651
652 case EXEC_OUTPUT_TTY:
4f2d528d 653 if (is_terminal_input(i))
eb17e935 654 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
655
656 /* We don't reset the terminal if this is just about output */
1e22b5cd 657 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
658
659 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 660 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 661 case EXEC_OUTPUT_KMSG:
28dbc1e8 662 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
663 case EXEC_OUTPUT_JOURNAL:
664 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 665 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 666 if (r < 0) {
82677ae4 667 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 668 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
669 } else {
670 struct stat st;
671
672 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
673 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
674 * services to detect whether they are connected to the journal or not.
675 *
676 * If both stdout and stderr are connected to a stream then let's make sure to store the data
677 * about STDERR as that's usually the best way to do logging. */
7bce046b 678
ab2116b1
LP
679 if (fstat(fileno, &st) >= 0 &&
680 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
681 *journal_stream_dev = st.st_dev;
682 *journal_stream_ino = st.st_ino;
683 }
47c1d80d
MS
684 }
685 return r;
4f2d528d
LP
686
687 case EXEC_OUTPUT_SOCKET:
688 assert(socket_fd >= 0);
e75a9ed1 689
eb17e935 690 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 691
52c239d7 692 case EXEC_OUTPUT_NAMED_FD:
e75a9ed1
LP
693 assert(named_iofds[fileno] >= 0);
694
52c239d7
LB
695 (void) fd_nonblock(named_iofds[fileno], false);
696 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
697
566b7d23
ZD
698 case EXEC_OUTPUT_FILE:
699 case EXEC_OUTPUT_FILE_APPEND: {
2038c3f5 700 bool rw;
566b7d23 701 int fd, flags;
2038c3f5
LP
702
703 assert(context->stdio_file[fileno]);
704
705 rw = context->std_input == EXEC_INPUT_FILE &&
706 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
707
708 if (rw)
709 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
710
566b7d23
ZD
711 flags = O_WRONLY;
712 if (o == EXEC_OUTPUT_FILE_APPEND)
713 flags |= O_APPEND;
714
715 fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
2038c3f5
LP
716 if (fd < 0)
717 return fd;
718
566b7d23 719 return move_fd(fd, fileno, 0);
2038c3f5
LP
720 }
721
94f04347 722 default:
80876c20 723 assert_not_reached("Unknown error type");
94f04347 724 }
071830ff
LP
725}
726
02a51aba 727static int chown_terminal(int fd, uid_t uid) {
4b3b5bc7 728 int r;
02a51aba
LP
729
730 assert(fd >= 0);
02a51aba 731
1ff74fb6 732 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
4b3b5bc7
LP
733 if (isatty(fd) < 1) {
734 if (IN_SET(errno, EINVAL, ENOTTY))
735 return 0; /* not a tty */
1ff74fb6 736
02a51aba 737 return -errno;
4b3b5bc7 738 }
02a51aba 739
4b3b5bc7
LP
740 /* This might fail. What matters are the results. */
741 r = fchmod_and_chown(fd, TTY_MODE, uid, -1);
742 if (r < 0)
743 return r;
02a51aba 744
4b3b5bc7 745 return 1;
02a51aba
LP
746}
747
7d5ceb64 748static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
749 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
750 int r;
80876c20 751
80876c20
LP
752 assert(_saved_stdin);
753 assert(_saved_stdout);
754
af6da548
LP
755 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
756 if (saved_stdin < 0)
757 return -errno;
80876c20 758
af6da548 759 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
760 if (saved_stdout < 0)
761 return -errno;
80876c20 762
8854d795 763 fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
3d18b167
LP
764 if (fd < 0)
765 return fd;
80876c20 766
af6da548
LP
767 r = chown_terminal(fd, getuid());
768 if (r < 0)
3d18b167 769 return r;
02a51aba 770
3d18b167
LP
771 r = reset_terminal_fd(fd, true);
772 if (r < 0)
773 return r;
80876c20 774
2b33ab09 775 r = rearrange_stdio(fd, fd, STDERR_FILENO);
3d18b167 776 fd = -1;
2b33ab09
LP
777 if (r < 0)
778 return r;
80876c20
LP
779
780 *_saved_stdin = saved_stdin;
781 *_saved_stdout = saved_stdout;
782
3d18b167 783 saved_stdin = saved_stdout = -1;
80876c20 784
3d18b167 785 return 0;
80876c20
LP
786}
787
63d77c92 788static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
789 assert(err < 0);
790
791 if (err == -ETIMEDOUT)
63d77c92 792 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
793 else {
794 errno = -err;
63d77c92 795 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
796 }
797}
798
63d77c92 799static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 800 _cleanup_close_ int fd = -1;
80876c20 801
3b20f877 802 assert(vc);
80876c20 803
7d5ceb64 804 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 805 if (fd < 0)
3b20f877 806 return;
80876c20 807
63d77c92 808 write_confirm_error_fd(err, fd, u);
af6da548 809}
80876c20 810
3d18b167 811static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 812 int r = 0;
80876c20 813
af6da548
LP
814 assert(saved_stdin);
815 assert(saved_stdout);
816
817 release_terminal();
818
819 if (*saved_stdin >= 0)
80876c20 820 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 821 r = -errno;
80876c20 822
af6da548 823 if (*saved_stdout >= 0)
80876c20 824 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 825 r = -errno;
80876c20 826
3d18b167
LP
827 *saved_stdin = safe_close(*saved_stdin);
828 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
829
830 return r;
831}
832
3b20f877
FB
833enum {
834 CONFIRM_PRETEND_FAILURE = -1,
835 CONFIRM_PRETEND_SUCCESS = 0,
836 CONFIRM_EXECUTE = 1,
837};
838
eedf223a 839static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 840 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 841 _cleanup_free_ char *e = NULL;
3b20f877 842 char c;
af6da548 843
3b20f877 844 /* For any internal errors, assume a positive response. */
7d5ceb64 845 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 846 if (r < 0) {
63d77c92 847 write_confirm_error(r, vc, u);
3b20f877
FB
848 return CONFIRM_EXECUTE;
849 }
af6da548 850
b0eb2944
FB
851 /* confirm_spawn might have been disabled while we were sleeping. */
852 if (manager_is_confirm_spawn_disabled(u->manager)) {
853 r = 1;
854 goto restore_stdio;
855 }
af6da548 856
2bcd3c26
FB
857 e = ellipsize(cmdline, 60, 100);
858 if (!e) {
859 log_oom();
860 r = CONFIRM_EXECUTE;
861 goto restore_stdio;
862 }
af6da548 863
d172b175 864 for (;;) {
539622bd 865 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 866 if (r < 0) {
63d77c92 867 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
868 r = CONFIRM_EXECUTE;
869 goto restore_stdio;
870 }
af6da548 871
d172b175 872 switch (c) {
b0eb2944
FB
873 case 'c':
874 printf("Resuming normal execution.\n");
875 manager_disable_confirm_spawn();
876 r = 1;
877 break;
dd6f9ac0
FB
878 case 'D':
879 unit_dump(u, stdout, " ");
880 continue; /* ask again */
d172b175
FB
881 case 'f':
882 printf("Failing execution.\n");
883 r = CONFIRM_PRETEND_FAILURE;
884 break;
885 case 'h':
b0eb2944
FB
886 printf(" c - continue, proceed without asking anymore\n"
887 " D - dump, show the state of the unit\n"
dd6f9ac0 888 " f - fail, don't execute the command and pretend it failed\n"
d172b175 889 " h - help\n"
eedf223a 890 " i - info, show a short summary of the unit\n"
56fde33a 891 " j - jobs, show jobs that are in progress\n"
d172b175
FB
892 " s - skip, don't execute the command and pretend it succeeded\n"
893 " y - yes, execute the command\n");
dd6f9ac0 894 continue; /* ask again */
eedf223a
FB
895 case 'i':
896 printf(" Description: %s\n"
897 " Unit: %s\n"
898 " Command: %s\n",
899 u->id, u->description, cmdline);
900 continue; /* ask again */
56fde33a
FB
901 case 'j':
902 manager_dump_jobs(u->manager, stdout, " ");
903 continue; /* ask again */
539622bd
FB
904 case 'n':
905 /* 'n' was removed in favor of 'f'. */
906 printf("Didn't understand 'n', did you mean 'f'?\n");
907 continue; /* ask again */
d172b175
FB
908 case 's':
909 printf("Skipping execution.\n");
910 r = CONFIRM_PRETEND_SUCCESS;
911 break;
912 case 'y':
913 r = CONFIRM_EXECUTE;
914 break;
915 default:
916 assert_not_reached("Unhandled choice");
917 }
3b20f877 918 break;
3b20f877 919 }
af6da548 920
3b20f877 921restore_stdio:
af6da548 922 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 923 return r;
80876c20
LP
924}
925
4d885bd3
DH
926static int get_fixed_user(const ExecContext *c, const char **user,
927 uid_t *uid, gid_t *gid,
928 const char **home, const char **shell) {
81a2b7ce 929 int r;
4d885bd3 930 const char *name;
81a2b7ce 931
4d885bd3 932 assert(c);
81a2b7ce 933
23deef88
LP
934 if (!c->user)
935 return 0;
936
4d885bd3
DH
937 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
938 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 939
23deef88 940 name = c->user;
fafff8f1 941 r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN);
4d885bd3
DH
942 if (r < 0)
943 return r;
81a2b7ce 944
4d885bd3
DH
945 *user = name;
946 return 0;
947}
948
949static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
950 int r;
951 const char *name;
952
953 assert(c);
954
955 if (!c->group)
956 return 0;
957
958 name = c->group;
fafff8f1 959 r = get_group_creds(&name, gid, 0);
4d885bd3
DH
960 if (r < 0)
961 return r;
962
963 *group = name;
964 return 0;
965}
966
cdc5d5c5
DH
967static int get_supplementary_groups(const ExecContext *c, const char *user,
968 const char *group, gid_t gid,
969 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
970 char **i;
971 int r, k = 0;
972 int ngroups_max;
973 bool keep_groups = false;
974 gid_t *groups = NULL;
975 _cleanup_free_ gid_t *l_gids = NULL;
976
977 assert(c);
978
bbeea271
DH
979 /*
980 * If user is given, then lookup GID and supplementary groups list.
981 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
982 * here and as early as possible so we keep the list of supplementary
983 * groups of the caller.
bbeea271
DH
984 */
985 if (user && gid_is_valid(gid) && gid != 0) {
986 /* First step, initialize groups from /etc/groups */
987 if (initgroups(user, gid) < 0)
988 return -errno;
989
990 keep_groups = true;
991 }
992
ac6e8be6 993 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
994 return 0;
995
366ddd25
DH
996 /*
997 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
998 * be positive, otherwise fail.
999 */
1000 errno = 0;
1001 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
1002 if (ngroups_max <= 0) {
1003 if (errno > 0)
1004 return -errno;
1005 else
1006 return -EOPNOTSUPP; /* For all other values */
1007 }
1008
4d885bd3
DH
1009 l_gids = new(gid_t, ngroups_max);
1010 if (!l_gids)
1011 return -ENOMEM;
81a2b7ce 1012
4d885bd3
DH
1013 if (keep_groups) {
1014 /*
1015 * Lookup the list of groups that the user belongs to, we
1016 * avoid NSS lookups here too for gid=0.
1017 */
1018 k = ngroups_max;
1019 if (getgrouplist(user, gid, l_gids, &k) < 0)
1020 return -EINVAL;
1021 } else
1022 k = 0;
81a2b7ce 1023
4d885bd3
DH
1024 STRV_FOREACH(i, c->supplementary_groups) {
1025 const char *g;
81a2b7ce 1026
4d885bd3
DH
1027 if (k >= ngroups_max)
1028 return -E2BIG;
81a2b7ce 1029
4d885bd3 1030 g = *i;
fafff8f1 1031 r = get_group_creds(&g, l_gids+k, 0);
4d885bd3
DH
1032 if (r < 0)
1033 return r;
81a2b7ce 1034
4d885bd3
DH
1035 k++;
1036 }
81a2b7ce 1037
4d885bd3
DH
1038 /*
1039 * Sets ngids to zero to drop all supplementary groups, happens
1040 * when we are under root and SupplementaryGroups= is empty.
1041 */
1042 if (k == 0) {
1043 *ngids = 0;
1044 return 0;
1045 }
81a2b7ce 1046
4d885bd3
DH
1047 /* Otherwise get the final list of supplementary groups */
1048 groups = memdup(l_gids, sizeof(gid_t) * k);
1049 if (!groups)
1050 return -ENOMEM;
1051
1052 *supplementary_gids = groups;
1053 *ngids = k;
1054
1055 groups = NULL;
1056
1057 return 0;
1058}
1059
34cf6c43 1060static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
4d885bd3
DH
1061 int r;
1062
709dbeac
YW
1063 /* Handle SupplementaryGroups= if it is not empty */
1064 if (ngids > 0) {
4d885bd3
DH
1065 r = maybe_setgroups(ngids, supplementary_gids);
1066 if (r < 0)
97f0e76f 1067 return r;
4d885bd3 1068 }
81a2b7ce 1069
4d885bd3
DH
1070 if (gid_is_valid(gid)) {
1071 /* Then set our gids */
1072 if (setresgid(gid, gid, gid) < 0)
1073 return -errno;
81a2b7ce
LP
1074 }
1075
1076 return 0;
1077}
1078
1079static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
1080 assert(context);
1081
4d885bd3
DH
1082 if (!uid_is_valid(uid))
1083 return 0;
1084
479050b3 1085 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
1086 * capabilities while doing so. */
1087
479050b3 1088 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
1089
1090 /* First step: If we need to keep capabilities but
1091 * drop privileges we need to make sure we keep our
cbb21cca 1092 * caps, while we drop privileges. */
693ced48 1093 if (uid != 0) {
cbb21cca 1094 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1095
1096 if (prctl(PR_GET_SECUREBITS) != sb)
1097 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1098 return -errno;
1099 }
81a2b7ce
LP
1100 }
1101
479050b3 1102 /* Second step: actually set the uids */
81a2b7ce
LP
1103 if (setresuid(uid, uid, uid) < 0)
1104 return -errno;
1105
1106 /* At this point we should have all necessary capabilities but
1107 are otherwise a normal user. However, the caps might got
1108 corrupted due to the setresuid() so we need clean them up
1109 later. This is done outside of this call. */
1110
1111 return 0;
1112}
1113
349cc4a5 1114#if HAVE_PAM
5b6319dc
LP
1115
1116static int null_conv(
1117 int num_msg,
1118 const struct pam_message **msg,
1119 struct pam_response **resp,
1120 void *appdata_ptr) {
1121
1122 /* We don't support conversations */
1123
1124 return PAM_CONV_ERR;
1125}
1126
cefc33ae
LP
1127#endif
1128
5b6319dc
LP
1129static int setup_pam(
1130 const char *name,
1131 const char *user,
940c5210 1132 uid_t uid,
2d6fce8d 1133 gid_t gid,
5b6319dc 1134 const char *tty,
2065ca69 1135 char ***env,
da6053d0 1136 int fds[], size_t n_fds) {
5b6319dc 1137
349cc4a5 1138#if HAVE_PAM
cefc33ae 1139
5b6319dc
LP
1140 static const struct pam_conv conv = {
1141 .conv = null_conv,
1142 .appdata_ptr = NULL
1143 };
1144
2d7c6aa2 1145 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1146 pam_handle_t *handle = NULL;
d6e5f3ad 1147 sigset_t old_ss;
7bb70b6e 1148 int pam_code = PAM_SUCCESS, r;
84eada2f 1149 char **nv, **e = NULL;
5b6319dc
LP
1150 bool close_session = false;
1151 pid_t pam_pid = 0, parent_pid;
970edce6 1152 int flags = 0;
5b6319dc
LP
1153
1154 assert(name);
1155 assert(user);
2065ca69 1156 assert(env);
5b6319dc
LP
1157
1158 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1159 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1160 * systemd via the cgroup logic. It will then remove the PAM
1161 * session again. The parent process will exec() the actual
1162 * daemon. We do things this way to ensure that the main PID
1163 * of the daemon is the one we initially fork()ed. */
1164
7bb70b6e
LP
1165 r = barrier_create(&barrier);
1166 if (r < 0)
2d7c6aa2
DH
1167 goto fail;
1168
553d2243 1169 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1170 flags |= PAM_SILENT;
1171
f546241b
ZJS
1172 pam_code = pam_start(name, user, &conv, &handle);
1173 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1174 handle = NULL;
1175 goto fail;
1176 }
1177
3cd24c1a
LP
1178 if (!tty) {
1179 _cleanup_free_ char *q = NULL;
1180
1181 /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
1182 * out if that's the case, and read the TTY off it. */
1183
1184 if (getttyname_malloc(STDIN_FILENO, &q) >= 0)
1185 tty = strjoina("/dev/", q);
1186 }
1187
f546241b
ZJS
1188 if (tty) {
1189 pam_code = pam_set_item(handle, PAM_TTY, tty);
1190 if (pam_code != PAM_SUCCESS)
5b6319dc 1191 goto fail;
f546241b 1192 }
5b6319dc 1193
84eada2f
JW
1194 STRV_FOREACH(nv, *env) {
1195 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1196 if (pam_code != PAM_SUCCESS)
1197 goto fail;
1198 }
1199
970edce6 1200 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1201 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1202 goto fail;
1203
970edce6 1204 pam_code = pam_open_session(handle, flags);
f546241b 1205 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1206 goto fail;
1207
1208 close_session = true;
1209
f546241b
ZJS
1210 e = pam_getenvlist(handle);
1211 if (!e) {
5b6319dc
LP
1212 pam_code = PAM_BUF_ERR;
1213 goto fail;
1214 }
1215
1216 /* Block SIGTERM, so that we know that it won't get lost in
1217 * the child */
ce30c8dc 1218
72c0a2c2 1219 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1220
df0ff127 1221 parent_pid = getpid_cached();
5b6319dc 1222
4c253ed1
LP
1223 r = safe_fork("(sd-pam)", 0, &pam_pid);
1224 if (r < 0)
5b6319dc 1225 goto fail;
4c253ed1 1226 if (r == 0) {
7bb70b6e 1227 int sig, ret = EXIT_PAM;
5b6319dc
LP
1228
1229 /* The child's job is to reset the PAM session on
1230 * termination */
2d7c6aa2 1231 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc 1232
4c253ed1
LP
1233 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1234 * are open here that have been opened by PAM. */
1235 (void) close_many(fds, n_fds);
5b6319dc 1236
940c5210
AK
1237 /* Drop privileges - we don't need any to pam_close_session
1238 * and this will make PR_SET_PDEATHSIG work in most cases.
1239 * If this fails, ignore the error - but expect sd-pam threads
1240 * to fail to exit normally */
2d6fce8d 1241
97f0e76f
LP
1242 r = maybe_setgroups(0, NULL);
1243 if (r < 0)
1244 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1245 if (setresgid(gid, gid, gid) < 0)
1246 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1247 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1248 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1249
ce30c8dc
LP
1250 (void) ignore_signals(SIGPIPE, -1);
1251
940c5210
AK
1252 /* Wait until our parent died. This will only work if
1253 * the above setresuid() succeeds, otherwise the kernel
1254 * will not allow unprivileged parents kill their privileged
1255 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1256 * to do the rest for us. */
1257 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1258 goto child_finish;
1259
2d7c6aa2
DH
1260 /* Tell the parent that our setup is done. This is especially
1261 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1262 * setup might race against our setresuid(2) call.
1263 *
1264 * If the parent aborted, we'll detect this below, hence ignore
1265 * return failure here. */
1266 (void) barrier_place(&barrier);
2d7c6aa2 1267
643f4706 1268 /* Check if our parent process might already have died? */
5b6319dc 1269 if (getppid() == parent_pid) {
d6e5f3ad
DM
1270 sigset_t ss;
1271
1272 assert_se(sigemptyset(&ss) >= 0);
1273 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1274
3dead8d9
LP
1275 for (;;) {
1276 if (sigwait(&ss, &sig) < 0) {
1277 if (errno == EINTR)
1278 continue;
1279
1280 goto child_finish;
1281 }
5b6319dc 1282
3dead8d9
LP
1283 assert(sig == SIGTERM);
1284 break;
1285 }
5b6319dc
LP
1286 }
1287
3dead8d9 1288 /* If our parent died we'll end the session */
f546241b 1289 if (getppid() != parent_pid) {
970edce6 1290 pam_code = pam_close_session(handle, flags);
f546241b 1291 if (pam_code != PAM_SUCCESS)
5b6319dc 1292 goto child_finish;
f546241b 1293 }
5b6319dc 1294
7bb70b6e 1295 ret = 0;
5b6319dc
LP
1296
1297 child_finish:
970edce6 1298 pam_end(handle, pam_code | flags);
7bb70b6e 1299 _exit(ret);
5b6319dc
LP
1300 }
1301
2d7c6aa2
DH
1302 barrier_set_role(&barrier, BARRIER_PARENT);
1303
5b6319dc
LP
1304 /* If the child was forked off successfully it will do all the
1305 * cleanups, so forget about the handle here. */
1306 handle = NULL;
1307
3b8bddde 1308 /* Unblock SIGTERM again in the parent */
72c0a2c2 1309 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1310
1311 /* We close the log explicitly here, since the PAM modules
1312 * might have opened it, but we don't want this fd around. */
1313 closelog();
1314
2d7c6aa2
DH
1315 /* Synchronously wait for the child to initialize. We don't care for
1316 * errors as we cannot recover. However, warn loudly if it happens. */
1317 if (!barrier_place_and_sync(&barrier))
1318 log_error("PAM initialization failed");
1319
130d3d22 1320 return strv_free_and_replace(*env, e);
5b6319dc
LP
1321
1322fail:
970edce6
ZJS
1323 if (pam_code != PAM_SUCCESS) {
1324 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1325 r = -EPERM; /* PAM errors do not map to errno */
1326 } else
1327 log_error_errno(r, "PAM failed: %m");
9ba35398 1328
5b6319dc
LP
1329 if (handle) {
1330 if (close_session)
970edce6 1331 pam_code = pam_close_session(handle, flags);
5b6319dc 1332
970edce6 1333 pam_end(handle, pam_code | flags);
5b6319dc
LP
1334 }
1335
1336 strv_free(e);
5b6319dc
LP
1337 closelog();
1338
7bb70b6e 1339 return r;
cefc33ae
LP
1340#else
1341 return 0;
5b6319dc 1342#endif
cefc33ae 1343}
5b6319dc 1344
5d6b1584
LP
1345static void rename_process_from_path(const char *path) {
1346 char process_name[11];
1347 const char *p;
1348 size_t l;
1349
1350 /* This resulting string must fit in 10 chars (i.e. the length
1351 * of "/sbin/init") to look pretty in /bin/ps */
1352
2b6bf07d 1353 p = basename(path);
5d6b1584
LP
1354 if (isempty(p)) {
1355 rename_process("(...)");
1356 return;
1357 }
1358
1359 l = strlen(p);
1360 if (l > 8) {
1361 /* The end of the process name is usually more
1362 * interesting, since the first bit might just be
1363 * "systemd-" */
1364 p = p + l - 8;
1365 l = 8;
1366 }
1367
1368 process_name[0] = '(';
1369 memcpy(process_name+1, p, l);
1370 process_name[1+l] = ')';
1371 process_name[1+l+1] = 0;
1372
1373 rename_process(process_name);
1374}
1375
469830d1
LP
1376static bool context_has_address_families(const ExecContext *c) {
1377 assert(c);
1378
1379 return c->address_families_whitelist ||
1380 !set_isempty(c->address_families);
1381}
1382
1383static bool context_has_syscall_filters(const ExecContext *c) {
1384 assert(c);
1385
1386 return c->syscall_whitelist ||
8cfa775f 1387 !hashmap_isempty(c->syscall_filter);
469830d1
LP
1388}
1389
1390static bool context_has_no_new_privileges(const ExecContext *c) {
1391 assert(c);
1392
1393 if (c->no_new_privileges)
1394 return true;
1395
1396 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1397 return false;
1398
1399 /* We need NNP if we have any form of seccomp and are unprivileged */
1400 return context_has_address_families(c) ||
1401 c->memory_deny_write_execute ||
1402 c->restrict_realtime ||
f69567cb 1403 c->restrict_suid_sgid ||
469830d1
LP
1404 exec_context_restrict_namespaces_set(c) ||
1405 c->protect_kernel_tunables ||
1406 c->protect_kernel_modules ||
1407 c->private_devices ||
1408 context_has_syscall_filters(c) ||
78e864e5 1409 !set_isempty(c->syscall_archs) ||
aecd5ac6
TM
1410 c->lock_personality ||
1411 c->protect_hostname;
469830d1
LP
1412}
1413
349cc4a5 1414#if HAVE_SECCOMP
17df7223 1415
83f12b27 1416static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1417
1418 if (is_seccomp_available())
1419 return false;
1420
f673b62d 1421 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1422 return true;
83f12b27
FS
1423}
1424
165a31c0 1425static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1426 uint32_t negative_action, default_action, action;
165a31c0 1427 int r;
8351ceae 1428
469830d1 1429 assert(u);
c0467cf3 1430 assert(c);
8351ceae 1431
469830d1 1432 if (!context_has_syscall_filters(c))
83f12b27
FS
1433 return 0;
1434
469830d1
LP
1435 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1436 return 0;
e9642be2 1437
ccc16c78 1438 negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1439
469830d1
LP
1440 if (c->syscall_whitelist) {
1441 default_action = negative_action;
1442 action = SCMP_ACT_ALLOW;
7c66bae2 1443 } else {
469830d1
LP
1444 default_action = SCMP_ACT_ALLOW;
1445 action = negative_action;
57183d11 1446 }
8351ceae 1447
165a31c0
LP
1448 if (needs_ambient_hack) {
1449 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1450 if (r < 0)
1451 return r;
1452 }
1453
b54f36c6 1454 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
4298d0b5
LP
1455}
1456
469830d1
LP
1457static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1458 assert(u);
4298d0b5
LP
1459 assert(c);
1460
469830d1 1461 if (set_isempty(c->syscall_archs))
83f12b27
FS
1462 return 0;
1463
469830d1
LP
1464 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1465 return 0;
4298d0b5 1466
469830d1
LP
1467 return seccomp_restrict_archs(c->syscall_archs);
1468}
4298d0b5 1469
469830d1
LP
1470static int apply_address_families(const Unit* u, const ExecContext *c) {
1471 assert(u);
1472 assert(c);
4298d0b5 1473
469830d1
LP
1474 if (!context_has_address_families(c))
1475 return 0;
4298d0b5 1476
469830d1
LP
1477 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1478 return 0;
4298d0b5 1479
469830d1 1480 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1481}
4298d0b5 1482
83f12b27 1483static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1484 assert(u);
f3e43635
TM
1485 assert(c);
1486
469830d1 1487 if (!c->memory_deny_write_execute)
83f12b27
FS
1488 return 0;
1489
469830d1
LP
1490 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1491 return 0;
f3e43635 1492
469830d1 1493 return seccomp_memory_deny_write_execute();
f3e43635
TM
1494}
1495
83f12b27 1496static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1497 assert(u);
f4170c67
LP
1498 assert(c);
1499
469830d1 1500 if (!c->restrict_realtime)
83f12b27
FS
1501 return 0;
1502
469830d1
LP
1503 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1504 return 0;
f4170c67 1505
469830d1 1506 return seccomp_restrict_realtime();
f4170c67
LP
1507}
1508
f69567cb
LP
1509static int apply_restrict_suid_sgid(const Unit* u, const ExecContext *c) {
1510 assert(u);
1511 assert(c);
1512
1513 if (!c->restrict_suid_sgid)
1514 return 0;
1515
1516 if (skip_seccomp_unavailable(u, "RestrictSUIDSGID="))
1517 return 0;
1518
1519 return seccomp_restrict_suid_sgid();
1520}
1521
59e856c7 1522static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1523 assert(u);
59eeb84b
LP
1524 assert(c);
1525
1526 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1527 * let's protect even those systems where this is left on in the kernel. */
1528
469830d1 1529 if (!c->protect_kernel_tunables)
59eeb84b
LP
1530 return 0;
1531
469830d1
LP
1532 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1533 return 0;
59eeb84b 1534
469830d1 1535 return seccomp_protect_sysctl();
59eeb84b
LP
1536}
1537
59e856c7 1538static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1539 assert(u);
502d704e
DH
1540 assert(c);
1541
25a8d8a0 1542 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1543
469830d1
LP
1544 if (!c->protect_kernel_modules)
1545 return 0;
1546
502d704e
DH
1547 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1548 return 0;
1549
b54f36c6 1550 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false);
502d704e
DH
1551}
1552
59e856c7 1553static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1554 assert(u);
ba128bb8
LP
1555 assert(c);
1556
8f81a5f6 1557 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1558
469830d1
LP
1559 if (!c->private_devices)
1560 return 0;
1561
ba128bb8
LP
1562 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1563 return 0;
1564
b54f36c6 1565 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false);
ba128bb8
LP
1566}
1567
34cf6c43 1568static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
469830d1 1569 assert(u);
add00535
LP
1570 assert(c);
1571
1572 if (!exec_context_restrict_namespaces_set(c))
1573 return 0;
1574
1575 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1576 return 0;
1577
1578 return seccomp_restrict_namespaces(c->restrict_namespaces);
1579}
1580
78e864e5 1581static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1582 unsigned long personality;
1583 int r;
78e864e5
TM
1584
1585 assert(u);
1586 assert(c);
1587
1588 if (!c->lock_personality)
1589 return 0;
1590
1591 if (skip_seccomp_unavailable(u, "LockPersonality="))
1592 return 0;
1593
e8132d63
LP
1594 personality = c->personality;
1595
1596 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1597 if (personality == PERSONALITY_INVALID) {
1598
1599 r = opinionated_personality(&personality);
1600 if (r < 0)
1601 return r;
1602 }
78e864e5
TM
1603
1604 return seccomp_lock_personality(personality);
1605}
1606
c0467cf3 1607#endif
8351ceae 1608
3042bbeb 1609static void do_idle_pipe_dance(int idle_pipe[static 4]) {
31a7eb86
ZJS
1610 assert(idle_pipe);
1611
54eb2300
LP
1612 idle_pipe[1] = safe_close(idle_pipe[1]);
1613 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1614
1615 if (idle_pipe[0] >= 0) {
1616 int r;
1617
1618 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1619
1620 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1621 ssize_t n;
1622
31a7eb86 1623 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1624 n = write(idle_pipe[3], "x", 1);
1625 if (n > 0)
cd972d69
ZJS
1626 /* Wait for systemd to react to the signal above. */
1627 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1628 }
1629
54eb2300 1630 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1631
1632 }
1633
54eb2300 1634 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1635}
1636
fb2042dd
YW
1637static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
1638
7cae38c4 1639static int build_environment(
34cf6c43 1640 const Unit *u,
9fa95f85 1641 const ExecContext *c,
1e22b5cd 1642 const ExecParameters *p,
da6053d0 1643 size_t n_fds,
7cae38c4
LP
1644 const char *home,
1645 const char *username,
1646 const char *shell,
7bce046b
LP
1647 dev_t journal_stream_dev,
1648 ino_t journal_stream_ino,
7cae38c4
LP
1649 char ***ret) {
1650
1651 _cleanup_strv_free_ char **our_env = NULL;
fb2042dd 1652 ExecDirectoryType t;
da6053d0 1653 size_t n_env = 0;
7cae38c4
LP
1654 char *x;
1655
4b58153d 1656 assert(u);
7cae38c4 1657 assert(c);
7c1cb6f1 1658 assert(p);
7cae38c4
LP
1659 assert(ret);
1660
fb2042dd 1661 our_env = new0(char*, 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4
LP
1662 if (!our_env)
1663 return -ENOMEM;
1664
1665 if (n_fds > 0) {
8dd4c05b
LP
1666 _cleanup_free_ char *joined = NULL;
1667
df0ff127 1668 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1669 return -ENOMEM;
1670 our_env[n_env++] = x;
1671
da6053d0 1672 if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
7cae38c4
LP
1673 return -ENOMEM;
1674 our_env[n_env++] = x;
8dd4c05b 1675
1e22b5cd 1676 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1677 if (!joined)
1678 return -ENOMEM;
1679
605405c6 1680 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1681 if (!x)
1682 return -ENOMEM;
1683 our_env[n_env++] = x;
7cae38c4
LP
1684 }
1685
b08af3b1 1686 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1687 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1688 return -ENOMEM;
1689 our_env[n_env++] = x;
1690
1e22b5cd 1691 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1692 return -ENOMEM;
1693 our_env[n_env++] = x;
1694 }
1695
fd63e712
LP
1696 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1697 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1698 * check the database directly. */
ac647978 1699 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1700 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1701 if (!x)
1702 return -ENOMEM;
1703 our_env[n_env++] = x;
1704 }
1705
7cae38c4
LP
1706 if (home) {
1707 x = strappend("HOME=", home);
1708 if (!x)
1709 return -ENOMEM;
7bbead1d
LP
1710
1711 path_simplify(x + 5, true);
7cae38c4
LP
1712 our_env[n_env++] = x;
1713 }
1714
1715 if (username) {
1716 x = strappend("LOGNAME=", username);
1717 if (!x)
1718 return -ENOMEM;
1719 our_env[n_env++] = x;
1720
1721 x = strappend("USER=", username);
1722 if (!x)
1723 return -ENOMEM;
1724 our_env[n_env++] = x;
1725 }
1726
1727 if (shell) {
1728 x = strappend("SHELL=", shell);
1729 if (!x)
1730 return -ENOMEM;
7bbead1d
LP
1731
1732 path_simplify(x + 6, true);
7cae38c4
LP
1733 our_env[n_env++] = x;
1734 }
1735
4b58153d
LP
1736 if (!sd_id128_is_null(u->invocation_id)) {
1737 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1738 return -ENOMEM;
1739
1740 our_env[n_env++] = x;
1741 }
1742
6af760f3
LP
1743 if (exec_context_needs_term(c)) {
1744 const char *tty_path, *term = NULL;
1745
1746 tty_path = exec_context_tty_path(c);
1747
1748 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1749 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1750 * passes to PID 1 ends up all the way in the console login shown. */
1751
1752 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1753 term = getenv("TERM");
1754 if (!term)
1755 term = default_term_for_tty(tty_path);
7cae38c4 1756
6af760f3 1757 x = strappend("TERM=", term);
7cae38c4
LP
1758 if (!x)
1759 return -ENOMEM;
1760 our_env[n_env++] = x;
1761 }
1762
7bce046b
LP
1763 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1764 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1765 return -ENOMEM;
1766
1767 our_env[n_env++] = x;
1768 }
1769
fb2042dd
YW
1770 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1771 _cleanup_free_ char *pre = NULL, *joined = NULL;
1772 const char *n;
1773
1774 if (!p->prefix[t])
1775 continue;
1776
1777 if (strv_isempty(c->directories[t].paths))
1778 continue;
1779
1780 n = exec_directory_env_name_to_string(t);
1781 if (!n)
1782 continue;
1783
1784 pre = strjoin(p->prefix[t], "/");
1785 if (!pre)
1786 return -ENOMEM;
1787
1788 joined = strv_join_prefix(c->directories[t].paths, ":", pre);
1789 if (!joined)
1790 return -ENOMEM;
1791
1792 x = strjoin(n, "=", joined);
1793 if (!x)
1794 return -ENOMEM;
1795
1796 our_env[n_env++] = x;
1797 }
1798
7cae38c4 1799 our_env[n_env++] = NULL;
fb2042dd 1800 assert(n_env <= 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4 1801
ae2a15bc 1802 *ret = TAKE_PTR(our_env);
7cae38c4
LP
1803
1804 return 0;
1805}
1806
b4c14404
FB
1807static int build_pass_environment(const ExecContext *c, char ***ret) {
1808 _cleanup_strv_free_ char **pass_env = NULL;
1809 size_t n_env = 0, n_bufsize = 0;
1810 char **i;
1811
1812 STRV_FOREACH(i, c->pass_environment) {
1813 _cleanup_free_ char *x = NULL;
1814 char *v;
1815
1816 v = getenv(*i);
1817 if (!v)
1818 continue;
605405c6 1819 x = strjoin(*i, "=", v);
b4c14404
FB
1820 if (!x)
1821 return -ENOMEM;
00819cc1 1822
b4c14404
FB
1823 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1824 return -ENOMEM;
00819cc1 1825
1cc6c93a 1826 pass_env[n_env++] = TAKE_PTR(x);
b4c14404 1827 pass_env[n_env] = NULL;
b4c14404
FB
1828 }
1829
ae2a15bc 1830 *ret = TAKE_PTR(pass_env);
b4c14404
FB
1831
1832 return 0;
1833}
1834
8b44a3d2
LP
1835static bool exec_needs_mount_namespace(
1836 const ExecContext *context,
1837 const ExecParameters *params,
4657abb5 1838 const ExecRuntime *runtime) {
8b44a3d2
LP
1839
1840 assert(context);
1841 assert(params);
1842
915e6d16
LP
1843 if (context->root_image)
1844 return true;
1845
2a624c36
AP
1846 if (!strv_isempty(context->read_write_paths) ||
1847 !strv_isempty(context->read_only_paths) ||
1848 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1849 return true;
1850
42b1d8e0 1851 if (context->n_bind_mounts > 0)
d2d6c096
LP
1852 return true;
1853
2abd4e38
YW
1854 if (context->n_temporary_filesystems > 0)
1855 return true;
1856
37ed15d7 1857 if (!IN_SET(context->mount_flags, 0, MS_SHARED))
8b44a3d2
LP
1858 return true;
1859
1860 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1861 return true;
1862
8b44a3d2 1863 if (context->private_devices ||
228af36f 1864 context->private_mounts ||
8b44a3d2 1865 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1866 context->protect_home != PROTECT_HOME_NO ||
1867 context->protect_kernel_tunables ||
c575770b 1868 context->protect_kernel_modules ||
59eeb84b 1869 context->protect_control_groups)
8b44a3d2
LP
1870 return true;
1871
37c56f89
YW
1872 if (context->root_directory) {
1873 ExecDirectoryType t;
1874
1875 if (context->mount_apivfs)
1876 return true;
1877
1878 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1879 if (!params->prefix[t])
1880 continue;
1881
1882 if (!strv_isempty(context->directories[t].paths))
1883 return true;
1884 }
1885 }
5d997827 1886
42b1d8e0 1887 if (context->dynamic_user &&
b43ee82f 1888 (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
42b1d8e0
YW
1889 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1890 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1891 return true;
1892
8b44a3d2
LP
1893 return false;
1894}
1895
d251207d
LP
1896static int setup_private_users(uid_t uid, gid_t gid) {
1897 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1898 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1899 _cleanup_close_ int unshare_ready_fd = -1;
1900 _cleanup_(sigkill_waitp) pid_t pid = 0;
1901 uint64_t c = 1;
d251207d
LP
1902 ssize_t n;
1903 int r;
1904
1905 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1906 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1907 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1908 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1909 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1910 * continues execution normally. */
1911
587ab01b
ZJS
1912 if (uid != 0 && uid_is_valid(uid)) {
1913 r = asprintf(&uid_map,
1914 "0 0 1\n" /* Map root → root */
1915 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1916 uid, uid);
1917 if (r < 0)
1918 return -ENOMEM;
1919 } else {
e0f3720e 1920 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1921 if (!uid_map)
1922 return -ENOMEM;
1923 }
d251207d 1924
587ab01b
ZJS
1925 if (gid != 0 && gid_is_valid(gid)) {
1926 r = asprintf(&gid_map,
1927 "0 0 1\n" /* Map root → root */
1928 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1929 gid, gid);
1930 if (r < 0)
1931 return -ENOMEM;
1932 } else {
d251207d 1933 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1934 if (!gid_map)
1935 return -ENOMEM;
1936 }
d251207d
LP
1937
1938 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1939 * namespace. */
1940 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1941 if (unshare_ready_fd < 0)
1942 return -errno;
1943
1944 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1945 * failed. */
1946 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1947 return -errno;
1948
4c253ed1
LP
1949 r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
1950 if (r < 0)
1951 return r;
1952 if (r == 0) {
d251207d
LP
1953 _cleanup_close_ int fd = -1;
1954 const char *a;
1955 pid_t ppid;
1956
1957 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1958 * here, after the parent opened its own user namespace. */
1959
1960 ppid = getppid();
1961 errno_pipe[0] = safe_close(errno_pipe[0]);
1962
1963 /* Wait until the parent unshared the user namespace */
1964 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1965 r = -errno;
1966 goto child_fail;
1967 }
1968
1969 /* Disable the setgroups() system call in the child user namespace, for good. */
1970 a = procfs_file_alloca(ppid, "setgroups");
1971 fd = open(a, O_WRONLY|O_CLOEXEC);
1972 if (fd < 0) {
1973 if (errno != ENOENT) {
1974 r = -errno;
1975 goto child_fail;
1976 }
1977
1978 /* If the file is missing the kernel is too old, let's continue anyway. */
1979 } else {
1980 if (write(fd, "deny\n", 5) < 0) {
1981 r = -errno;
1982 goto child_fail;
1983 }
1984
1985 fd = safe_close(fd);
1986 }
1987
1988 /* First write the GID map */
1989 a = procfs_file_alloca(ppid, "gid_map");
1990 fd = open(a, O_WRONLY|O_CLOEXEC);
1991 if (fd < 0) {
1992 r = -errno;
1993 goto child_fail;
1994 }
1995 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1996 r = -errno;
1997 goto child_fail;
1998 }
1999 fd = safe_close(fd);
2000
2001 /* The write the UID map */
2002 a = procfs_file_alloca(ppid, "uid_map");
2003 fd = open(a, O_WRONLY|O_CLOEXEC);
2004 if (fd < 0) {
2005 r = -errno;
2006 goto child_fail;
2007 }
2008 if (write(fd, uid_map, strlen(uid_map)) < 0) {
2009 r = -errno;
2010 goto child_fail;
2011 }
2012
2013 _exit(EXIT_SUCCESS);
2014
2015 child_fail:
2016 (void) write(errno_pipe[1], &r, sizeof(r));
2017 _exit(EXIT_FAILURE);
2018 }
2019
2020 errno_pipe[1] = safe_close(errno_pipe[1]);
2021
2022 if (unshare(CLONE_NEWUSER) < 0)
2023 return -errno;
2024
2025 /* Let the child know that the namespace is ready now */
2026 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
2027 return -errno;
2028
2029 /* Try to read an error code from the child */
2030 n = read(errno_pipe[0], &r, sizeof(r));
2031 if (n < 0)
2032 return -errno;
2033 if (n == sizeof(r)) { /* an error code was sent to us */
2034 if (r < 0)
2035 return r;
2036 return -EIO;
2037 }
2038 if (n != 0) /* on success we should have read 0 bytes */
2039 return -EIO;
2040
2e87a1fd
LP
2041 r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
2042 pid = 0;
d251207d
LP
2043 if (r < 0)
2044 return r;
2e87a1fd 2045 if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
d251207d
LP
2046 return -EIO;
2047
2048 return 0;
2049}
2050
3536f49e 2051static int setup_exec_directory(
07689d5d
LP
2052 const ExecContext *context,
2053 const ExecParameters *params,
2054 uid_t uid,
3536f49e 2055 gid_t gid,
3536f49e
YW
2056 ExecDirectoryType type,
2057 int *exit_status) {
07689d5d 2058
72fd1768 2059 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
2060 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
2061 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
2062 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
2063 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
2064 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
2065 };
07689d5d
LP
2066 char **rt;
2067 int r;
2068
2069 assert(context);
2070 assert(params);
72fd1768 2071 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 2072 assert(exit_status);
07689d5d 2073
3536f49e
YW
2074 if (!params->prefix[type])
2075 return 0;
2076
8679efde 2077 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
2078 if (!uid_is_valid(uid))
2079 uid = 0;
2080 if (!gid_is_valid(gid))
2081 gid = 0;
2082 }
2083
2084 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d 2085 _cleanup_free_ char *p = NULL, *pp = NULL;
07689d5d 2086
edbfeb12 2087 p = path_join(params->prefix[type], *rt);
3536f49e
YW
2088 if (!p) {
2089 r = -ENOMEM;
2090 goto fail;
2091 }
07689d5d 2092
23a7448e
YW
2093 r = mkdir_parents_label(p, 0755);
2094 if (r < 0)
3536f49e 2095 goto fail;
23a7448e 2096
8092a48c 2097 if (context->dynamic_user &&
40cd2ecc
LP
2098 (!IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) ||
2099 (type == EXEC_DIRECTORY_RUNTIME && context->runtime_directory_preserve_mode != EXEC_PRESERVE_NO))) {
6c9c51e5 2100 _cleanup_free_ char *private_root = NULL;
6c47cd7d 2101
3f5b1508
LP
2102 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that
2103 * case we want to avoid leaving a directory around fully accessible that is owned by
2104 * a dynamic user whose UID is later on reused. To lock this down we use the same
2105 * trick used by container managers to prohibit host users to get access to files of
2106 * the same UID in containers: we place everything inside a directory that has an
2107 * access mode of 0700 and is owned root:root, so that it acts as security boundary
2108 * for unprivileged host code. We then use fs namespacing to make this directory
2109 * permeable for the service itself.
6c47cd7d 2110 *
3f5b1508
LP
2111 * Specifically: for a service which wants a special directory "foo/" we first create
2112 * a directory "private/" with access mode 0700 owned by root:root. Then we place
2113 * "foo" inside of that directory (i.e. "private/foo/"), and make "foo" a symlink to
2114 * "private/foo". This way, privileged host users can access "foo/" as usual, but
2115 * unprivileged host users can't look into it. Inside of the namespace of the unit
2116 * "private/" is replaced by a more liberally accessible tmpfs, into which the host's
2117 * "private/foo/" is mounted under the same name, thus disabling the access boundary
2118 * for the service and making sure it only gets access to the dirs it needs but no
2119 * others. Tricky? Yes, absolutely, but it works!
6c47cd7d 2120 *
3f5b1508
LP
2121 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not
2122 * to be owned by the service itself.
2123 *
2124 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used
2125 * for sharing files or sockets with other services. */
6c47cd7d 2126
edbfeb12 2127 private_root = path_join(params->prefix[type], "private");
6c47cd7d
LP
2128 if (!private_root) {
2129 r = -ENOMEM;
2130 goto fail;
2131 }
2132
2133 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
37c1d5e9 2134 r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
6c47cd7d
LP
2135 if (r < 0)
2136 goto fail;
2137
edbfeb12 2138 pp = path_join(private_root, *rt);
6c47cd7d
LP
2139 if (!pp) {
2140 r = -ENOMEM;
2141 goto fail;
2142 }
2143
2144 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2145 r = mkdir_parents_label(pp, 0755);
2146 if (r < 0)
2147 goto fail;
2148
949befd3
LP
2149 if (is_dir(p, false) > 0 &&
2150 (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
2151
2152 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2153 * it over. Most likely the service has been upgraded from one that didn't use
2154 * DynamicUser=1, to one that does. */
2155
cf52c45d
LP
2156 log_info("Found pre-existing public %s= directory %s, migrating to %s.\n"
2157 "Apparently, service previously had DynamicUser= turned off, and has now turned it on.",
2158 exec_directory_type_to_string(type), p, pp);
2159
949befd3
LP
2160 if (rename(p, pp) < 0) {
2161 r = -errno;
2162 goto fail;
2163 }
2164 } else {
2165 /* Otherwise, create the actual directory for the service */
2166
2167 r = mkdir_label(pp, context->directories[type].mode);
2168 if (r < 0 && r != -EEXIST)
2169 goto fail;
2170 }
6c47cd7d 2171
6c47cd7d 2172 /* And link it up from the original place */
6c9c51e5 2173 r = symlink_idempotent(pp, p, true);
6c47cd7d
LP
2174 if (r < 0)
2175 goto fail;
2176
6c47cd7d 2177 } else {
5c6d40d1
LP
2178 _cleanup_free_ char *target = NULL;
2179
2180 if (type != EXEC_DIRECTORY_CONFIGURATION &&
2181 readlink_and_make_absolute(p, &target) >= 0) {
2182 _cleanup_free_ char *q = NULL;
2183
2184 /* This already exists and is a symlink? Interesting. Maybe it's one created
2185 * by DynamicUser=1 (see above)? */
2186
2187 q = path_join(params->prefix[type], "private", *rt);
2188 if (!q) {
2189 r = -ENOMEM;
2190 goto fail;
2191 }
2192
2193 if (path_equal(q, target)) {
2194
2195 /* Hmm, apparently DynamicUser= was once turned on for this service,
2196 * but is no longer. Let's move the directory back up. */
2197
cf52c45d
LP
2198 log_info("Found pre-existing private %s= directory %s, migrating to %s.\n"
2199 "Apparently, service previously had DynamicUser= turned on, and has now turned it off.",
2200 exec_directory_type_to_string(type), q, p);
2201
5c6d40d1
LP
2202 if (unlink(p) < 0) {
2203 r = -errno;
2204 goto fail;
2205 }
2206
2207 if (rename(q, p) < 0) {
2208 r = -errno;
2209 goto fail;
2210 }
2211 }
2212 }
2213
6c47cd7d 2214 r = mkdir_label(p, context->directories[type].mode);
d484580c 2215 if (r < 0) {
d484580c
LP
2216 if (r != -EEXIST)
2217 goto fail;
2218
206e9864
LP
2219 if (type == EXEC_DIRECTORY_CONFIGURATION) {
2220 struct stat st;
2221
2222 /* Don't change the owner/access mode of the configuration directory,
2223 * as in the common case it is not written to by a service, and shall
2224 * not be writable. */
2225
2226 if (stat(p, &st) < 0) {
2227 r = -errno;
2228 goto fail;
2229 }
2230
2231 /* Still complain if the access mode doesn't match */
2232 if (((st.st_mode ^ context->directories[type].mode) & 07777) != 0)
2233 log_warning("%s \'%s\' already exists but the mode is different. "
2234 "(File system: %o %sMode: %o)",
2235 exec_directory_type_to_string(type), *rt,
2236 st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
2237
6cff72eb 2238 continue;
206e9864 2239 }
6cff72eb 2240 }
a1164ae3 2241 }
07689d5d 2242
206e9864 2243 /* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
5238e957 2244 * specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
206e9864
LP
2245 * current UID/GID ownership.) */
2246 r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
2247 if (r < 0)
2248 goto fail;
c71b2eb7 2249
607b358e
LP
2250 /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
2251 * drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
2252 * assignments to exist.*/
2253 r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777);
07689d5d 2254 if (r < 0)
3536f49e 2255 goto fail;
07689d5d
LP
2256 }
2257
2258 return 0;
3536f49e
YW
2259
2260fail:
2261 *exit_status = exit_status_table[type];
3536f49e 2262 return r;
07689d5d
LP
2263}
2264
92b423b9 2265#if ENABLE_SMACK
cefc33ae
LP
2266static int setup_smack(
2267 const ExecContext *context,
2268 const ExecCommand *command) {
2269
cefc33ae
LP
2270 int r;
2271
2272 assert(context);
2273 assert(command);
2274
cefc33ae
LP
2275 if (context->smack_process_label) {
2276 r = mac_smack_apply_pid(0, context->smack_process_label);
2277 if (r < 0)
2278 return r;
2279 }
2280#ifdef SMACK_DEFAULT_PROCESS_LABEL
2281 else {
2282 _cleanup_free_ char *exec_label = NULL;
2283
2284 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2285 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2286 return r;
2287
2288 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2289 if (r < 0)
2290 return r;
2291 }
cefc33ae
LP
2292#endif
2293
2294 return 0;
2295}
92b423b9 2296#endif
cefc33ae 2297
6c47cd7d
LP
2298static int compile_bind_mounts(
2299 const ExecContext *context,
2300 const ExecParameters *params,
2301 BindMount **ret_bind_mounts,
da6053d0 2302 size_t *ret_n_bind_mounts,
6c47cd7d
LP
2303 char ***ret_empty_directories) {
2304
2305 _cleanup_strv_free_ char **empty_directories = NULL;
2306 BindMount *bind_mounts;
da6053d0 2307 size_t n, h = 0, i;
6c47cd7d
LP
2308 ExecDirectoryType t;
2309 int r;
2310
2311 assert(context);
2312 assert(params);
2313 assert(ret_bind_mounts);
2314 assert(ret_n_bind_mounts);
2315 assert(ret_empty_directories);
2316
2317 n = context->n_bind_mounts;
2318 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2319 if (!params->prefix[t])
2320 continue;
2321
2322 n += strv_length(context->directories[t].paths);
2323 }
2324
2325 if (n <= 0) {
2326 *ret_bind_mounts = NULL;
2327 *ret_n_bind_mounts = 0;
2328 *ret_empty_directories = NULL;
2329 return 0;
2330 }
2331
2332 bind_mounts = new(BindMount, n);
2333 if (!bind_mounts)
2334 return -ENOMEM;
2335
a8cabc61 2336 for (i = 0; i < context->n_bind_mounts; i++) {
6c47cd7d
LP
2337 BindMount *item = context->bind_mounts + i;
2338 char *s, *d;
2339
2340 s = strdup(item->source);
2341 if (!s) {
2342 r = -ENOMEM;
2343 goto finish;
2344 }
2345
2346 d = strdup(item->destination);
2347 if (!d) {
2348 free(s);
2349 r = -ENOMEM;
2350 goto finish;
2351 }
2352
2353 bind_mounts[h++] = (BindMount) {
2354 .source = s,
2355 .destination = d,
2356 .read_only = item->read_only,
2357 .recursive = item->recursive,
2358 .ignore_enoent = item->ignore_enoent,
2359 };
2360 }
2361
2362 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2363 char **suffix;
2364
2365 if (!params->prefix[t])
2366 continue;
2367
2368 if (strv_isempty(context->directories[t].paths))
2369 continue;
2370
8092a48c 2371 if (context->dynamic_user &&
5609f688
YW
2372 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2373 !(context->root_directory || context->root_image)) {
6c47cd7d
LP
2374 char *private_root;
2375
2376 /* So this is for a dynamic user, and we need to make sure the process can access its own
2377 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2378 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2379
657ee2d8 2380 private_root = path_join(params->prefix[t], "private");
6c47cd7d
LP
2381 if (!private_root) {
2382 r = -ENOMEM;
2383 goto finish;
2384 }
2385
2386 r = strv_consume(&empty_directories, private_root);
a635a7ae 2387 if (r < 0)
6c47cd7d 2388 goto finish;
6c47cd7d
LP
2389 }
2390
2391 STRV_FOREACH(suffix, context->directories[t].paths) {
2392 char *s, *d;
2393
8092a48c
YW
2394 if (context->dynamic_user &&
2395 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
657ee2d8 2396 s = path_join(params->prefix[t], "private", *suffix);
6c47cd7d 2397 else
657ee2d8 2398 s = path_join(params->prefix[t], *suffix);
6c47cd7d
LP
2399 if (!s) {
2400 r = -ENOMEM;
2401 goto finish;
2402 }
2403
5609f688
YW
2404 if (context->dynamic_user &&
2405 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2406 (context->root_directory || context->root_image))
2407 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2408 * directory is not created on the root directory. So, let's bind-mount the directory
2409 * on the 'non-private' place. */
657ee2d8 2410 d = path_join(params->prefix[t], *suffix);
5609f688
YW
2411 else
2412 d = strdup(s);
6c47cd7d
LP
2413 if (!d) {
2414 free(s);
2415 r = -ENOMEM;
2416 goto finish;
2417 }
2418
2419 bind_mounts[h++] = (BindMount) {
2420 .source = s,
2421 .destination = d,
2422 .read_only = false,
9ce4e4b0 2423 .nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
6c47cd7d
LP
2424 .recursive = true,
2425 .ignore_enoent = false,
2426 };
2427 }
2428 }
2429
2430 assert(h == n);
2431
2432 *ret_bind_mounts = bind_mounts;
2433 *ret_n_bind_mounts = n;
ae2a15bc 2434 *ret_empty_directories = TAKE_PTR(empty_directories);
6c47cd7d
LP
2435
2436 return (int) n;
2437
2438finish:
2439 bind_mount_free_many(bind_mounts, h);
2440 return r;
2441}
2442
6818c54c 2443static int apply_mount_namespace(
34cf6c43
YW
2444 const Unit *u,
2445 const ExecCommand *command,
6818c54c
LP
2446 const ExecContext *context,
2447 const ExecParameters *params,
7cc5ef5f
ZJS
2448 const ExecRuntime *runtime,
2449 char **error_path) {
6818c54c 2450
7bcef4ef 2451 _cleanup_strv_free_ char **empty_directories = NULL;
93c6bb51 2452 char *tmp = NULL, *var = NULL;
915e6d16 2453 const char *root_dir = NULL, *root_image = NULL;
228af36f 2454 NamespaceInfo ns_info;
165a31c0 2455 bool needs_sandboxing;
6c47cd7d 2456 BindMount *bind_mounts = NULL;
da6053d0 2457 size_t n_bind_mounts = 0;
6818c54c 2458 int r;
93c6bb51 2459
2b3c1b9e
DH
2460 assert(context);
2461
93c6bb51
DH
2462 /* The runtime struct only contains the parent of the private /tmp,
2463 * which is non-accessible to world users. Inside of it there's a /tmp
2464 * that is sticky, and that's the one we want to use here. */
2465
2466 if (context->private_tmp && runtime) {
2467 if (runtime->tmp_dir)
2468 tmp = strjoina(runtime->tmp_dir, "/tmp");
2469 if (runtime->var_tmp_dir)
2470 var = strjoina(runtime->var_tmp_dir, "/tmp");
2471 }
2472
915e6d16
LP
2473 if (params->flags & EXEC_APPLY_CHROOT) {
2474 root_image = context->root_image;
2475
2476 if (!root_image)
2477 root_dir = context->root_directory;
2478 }
93c6bb51 2479
6c47cd7d
LP
2480 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2481 if (r < 0)
2482 return r;
2483
165a31c0 2484 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
b5a33299
YW
2485 if (needs_sandboxing)
2486 ns_info = (NamespaceInfo) {
2487 .ignore_protect_paths = false,
2488 .private_dev = context->private_devices,
2489 .protect_control_groups = context->protect_control_groups,
2490 .protect_kernel_tunables = context->protect_kernel_tunables,
2491 .protect_kernel_modules = context->protect_kernel_modules,
aecd5ac6 2492 .protect_hostname = context->protect_hostname,
b5a33299 2493 .mount_apivfs = context->mount_apivfs,
228af36f 2494 .private_mounts = context->private_mounts,
b5a33299 2495 };
228af36f
LP
2496 else if (!context->dynamic_user && root_dir)
2497 /*
2498 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2499 * sandbox info, otherwise enforce it, don't ignore protected paths and
2500 * fail if we are enable to apply the sandbox inside the mount namespace.
2501 */
2502 ns_info = (NamespaceInfo) {
2503 .ignore_protect_paths = true,
2504 };
2505 else
2506 ns_info = (NamespaceInfo) {};
b5a33299 2507
37ed15d7
FB
2508 if (context->mount_flags == MS_SHARED)
2509 log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
2510
915e6d16 2511 r = setup_namespace(root_dir, root_image,
7bcef4ef 2512 &ns_info, context->read_write_paths,
165a31c0
LP
2513 needs_sandboxing ? context->read_only_paths : NULL,
2514 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2515 empty_directories,
2516 bind_mounts,
2517 n_bind_mounts,
2abd4e38
YW
2518 context->temporary_filesystems,
2519 context->n_temporary_filesystems,
93c6bb51
DH
2520 tmp,
2521 var,
165a31c0
LP
2522 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2523 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16 2524 context->mount_flags,
7cc5ef5f
ZJS
2525 DISSECT_IMAGE_DISCARD_ON_LOOP,
2526 error_path);
93c6bb51 2527
6c47cd7d
LP
2528 bind_mount_free_many(bind_mounts, n_bind_mounts);
2529
1beab8b0 2530 /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
5238e957 2531 * that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively
1beab8b0
LP
2532 * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
2533 * completely different execution environment. */
aca835ed
YW
2534 if (r == -ENOANO) {
2535 if (n_bind_mounts == 0 &&
2536 context->n_temporary_filesystems == 0 &&
2537 !root_dir && !root_image &&
2538 !context->dynamic_user) {
2539 log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
2540 return 0;
2541 }
2542
2194547e
LP
2543 log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
2544 "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
2545 n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
2546
aca835ed 2547 return -EOPNOTSUPP;
93c6bb51
DH
2548 }
2549
2550 return r;
2551}
2552
915e6d16
LP
2553static int apply_working_directory(
2554 const ExecContext *context,
2555 const ExecParameters *params,
2556 const char *home,
376fecf6
LP
2557 const bool needs_mount_ns,
2558 int *exit_status) {
915e6d16 2559
6732edab 2560 const char *d, *wd;
2b3c1b9e
DH
2561
2562 assert(context);
376fecf6 2563 assert(exit_status);
2b3c1b9e 2564
6732edab
LP
2565 if (context->working_directory_home) {
2566
376fecf6
LP
2567 if (!home) {
2568 *exit_status = EXIT_CHDIR;
6732edab 2569 return -ENXIO;
376fecf6 2570 }
6732edab 2571
2b3c1b9e 2572 wd = home;
6732edab
LP
2573
2574 } else if (context->working_directory)
2b3c1b9e
DH
2575 wd = context->working_directory;
2576 else
2577 wd = "/";
e7f1e7c6
DH
2578
2579 if (params->flags & EXEC_APPLY_CHROOT) {
2580 if (!needs_mount_ns && context->root_directory)
376fecf6
LP
2581 if (chroot(context->root_directory) < 0) {
2582 *exit_status = EXIT_CHROOT;
e7f1e7c6 2583 return -errno;
376fecf6 2584 }
e7f1e7c6 2585
2b3c1b9e
DH
2586 d = wd;
2587 } else
3b0e5bb5 2588 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2589
376fecf6
LP
2590 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2591 *exit_status = EXIT_CHDIR;
2b3c1b9e 2592 return -errno;
376fecf6 2593 }
e7f1e7c6
DH
2594
2595 return 0;
2596}
2597
b1edf445 2598static int setup_keyring(
34cf6c43 2599 const Unit *u,
b1edf445
LP
2600 const ExecContext *context,
2601 const ExecParameters *p,
2602 uid_t uid, gid_t gid) {
2603
74dd6b51 2604 key_serial_t keyring;
e64c2d0b
DJL
2605 int r = 0;
2606 uid_t saved_uid;
2607 gid_t saved_gid;
74dd6b51
LP
2608
2609 assert(u);
b1edf445 2610 assert(context);
74dd6b51
LP
2611 assert(p);
2612
2613 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2614 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2615 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2616 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2617 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2618 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2619
b1edf445
LP
2620 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2621 return 0;
2622
e64c2d0b
DJL
2623 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2624 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2625 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2626 * & group is just as nasty as acquiring a reference to the user keyring. */
2627
2628 saved_uid = getuid();
2629 saved_gid = getgid();
2630
2631 if (gid_is_valid(gid) && gid != saved_gid) {
2632 if (setregid(gid, -1) < 0)
2633 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2634 }
2635
2636 if (uid_is_valid(uid) && uid != saved_uid) {
2637 if (setreuid(uid, -1) < 0) {
2638 r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2639 goto out;
2640 }
2641 }
2642
74dd6b51
LP
2643 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2644 if (keyring == -1) {
2645 if (errno == ENOSYS)
8002fb97 2646 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2647 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2648 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2649 else if (errno == EDQUOT)
8002fb97 2650 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2651 else
e64c2d0b 2652 r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51 2653
e64c2d0b 2654 goto out;
74dd6b51
LP
2655 }
2656
e64c2d0b
DJL
2657 /* When requested link the user keyring into the session keyring. */
2658 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2659
2660 if (keyctl(KEYCTL_LINK,
2661 KEY_SPEC_USER_KEYRING,
2662 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2663 r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
2664 goto out;
2665 }
2666 }
2667
2668 /* Restore uid/gid back */
2669 if (uid_is_valid(uid) && uid != saved_uid) {
2670 if (setreuid(saved_uid, -1) < 0) {
2671 r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2672 goto out;
2673 }
2674 }
2675
2676 if (gid_is_valid(gid) && gid != saved_gid) {
2677 if (setregid(saved_gid, -1) < 0)
2678 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2679 }
2680
2681 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
b3415f5d
LP
2682 if (!sd_id128_is_null(u->invocation_id)) {
2683 key_serial_t key;
2684
2685 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2686 if (key == -1)
8002fb97 2687 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2688 else {
2689 if (keyctl(KEYCTL_SETPERM, key,
2690 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2691 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
e64c2d0b 2692 r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2693 }
2694 }
2695
e64c2d0b
DJL
2696out:
2697 /* Revert back uid & gid for the the last time, and exit */
2698 /* no extra logging, as only the first already reported error matters */
2699 if (getuid() != saved_uid)
2700 (void) setreuid(saved_uid, -1);
b1edf445 2701
e64c2d0b
DJL
2702 if (getgid() != saved_gid)
2703 (void) setregid(saved_gid, -1);
b1edf445 2704
e64c2d0b 2705 return r;
74dd6b51
LP
2706}
2707
3042bbeb 2708static void append_socket_pair(int *array, size_t *n, const int pair[static 2]) {
29206d46
LP
2709 assert(array);
2710 assert(n);
2711
2712 if (!pair)
2713 return;
2714
2715 if (pair[0] >= 0)
2716 array[(*n)++] = pair[0];
2717 if (pair[1] >= 0)
2718 array[(*n)++] = pair[1];
2719}
2720
a34ceba6
LP
2721static int close_remaining_fds(
2722 const ExecParameters *params,
34cf6c43
YW
2723 const ExecRuntime *runtime,
2724 const DynamicCreds *dcreds,
00d9ef85 2725 int user_lookup_fd,
a34ceba6 2726 int socket_fd,
5686391b 2727 int exec_fd,
da6053d0 2728 int *fds, size_t n_fds) {
a34ceba6 2729
da6053d0 2730 size_t n_dont_close = 0;
00d9ef85 2731 int dont_close[n_fds + 12];
a34ceba6
LP
2732
2733 assert(params);
2734
2735 if (params->stdin_fd >= 0)
2736 dont_close[n_dont_close++] = params->stdin_fd;
2737 if (params->stdout_fd >= 0)
2738 dont_close[n_dont_close++] = params->stdout_fd;
2739 if (params->stderr_fd >= 0)
2740 dont_close[n_dont_close++] = params->stderr_fd;
2741
2742 if (socket_fd >= 0)
2743 dont_close[n_dont_close++] = socket_fd;
5686391b
LP
2744 if (exec_fd >= 0)
2745 dont_close[n_dont_close++] = exec_fd;
a34ceba6
LP
2746 if (n_fds > 0) {
2747 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2748 n_dont_close += n_fds;
2749 }
2750
29206d46
LP
2751 if (runtime)
2752 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2753
2754 if (dcreds) {
2755 if (dcreds->user)
2756 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2757 if (dcreds->group)
2758 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2759 }
2760
00d9ef85
LP
2761 if (user_lookup_fd >= 0)
2762 dont_close[n_dont_close++] = user_lookup_fd;
2763
a34ceba6
LP
2764 return close_all_fds(dont_close, n_dont_close);
2765}
2766
00d9ef85
LP
2767static int send_user_lookup(
2768 Unit *unit,
2769 int user_lookup_fd,
2770 uid_t uid,
2771 gid_t gid) {
2772
2773 assert(unit);
2774
2775 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2776 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2777 * specified. */
2778
2779 if (user_lookup_fd < 0)
2780 return 0;
2781
2782 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2783 return 0;
2784
2785 if (writev(user_lookup_fd,
2786 (struct iovec[]) {
e6a7ec4b
LP
2787 IOVEC_INIT(&uid, sizeof(uid)),
2788 IOVEC_INIT(&gid, sizeof(gid)),
2789 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2790 return -errno;
2791
2792 return 0;
2793}
2794
6732edab
LP
2795static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2796 int r;
2797
2798 assert(c);
2799 assert(home);
2800 assert(buf);
2801
2802 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2803
2804 if (*home)
2805 return 0;
2806
2807 if (!c->working_directory_home)
2808 return 0;
2809
6732edab
LP
2810 r = get_home_dir(buf);
2811 if (r < 0)
2812 return r;
2813
2814 *home = *buf;
2815 return 1;
2816}
2817
da50b85a
LP
2818static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2819 _cleanup_strv_free_ char ** list = NULL;
2820 ExecDirectoryType t;
2821 int r;
2822
2823 assert(c);
2824 assert(p);
2825 assert(ret);
2826
2827 assert(c->dynamic_user);
2828
2829 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2830 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2831 * directories. */
2832
2833 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2834 char **i;
2835
2836 if (t == EXEC_DIRECTORY_CONFIGURATION)
2837 continue;
2838
2839 if (!p->prefix[t])
2840 continue;
2841
2842 STRV_FOREACH(i, c->directories[t].paths) {
2843 char *e;
2844
8092a48c 2845 if (t == EXEC_DIRECTORY_RUNTIME)
657ee2d8 2846 e = path_join(p->prefix[t], *i);
8092a48c 2847 else
657ee2d8 2848 e = path_join(p->prefix[t], "private", *i);
da50b85a
LP
2849 if (!e)
2850 return -ENOMEM;
2851
2852 r = strv_consume(&list, e);
2853 if (r < 0)
2854 return r;
2855 }
2856 }
2857
ae2a15bc 2858 *ret = TAKE_PTR(list);
da50b85a
LP
2859
2860 return 0;
2861}
2862
34cf6c43
YW
2863static char *exec_command_line(char **argv);
2864
78f93209
LP
2865static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
2866 bool using_subcgroup;
2867 char *p;
2868
2869 assert(params);
2870 assert(ret);
2871
2872 if (!params->cgroup_path)
2873 return -EINVAL;
2874
2875 /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
2876 * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
2877 * processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
2878 * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
2879 * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
2880 * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
2881 * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
2882 * flag, which is only passed for the former statements, not for the latter. */
2883
2884 using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
2885 if (using_subcgroup)
657ee2d8 2886 p = path_join(params->cgroup_path, ".control");
78f93209
LP
2887 else
2888 p = strdup(params->cgroup_path);
2889 if (!p)
2890 return -ENOMEM;
2891
2892 *ret = p;
2893 return using_subcgroup;
2894}
2895
ff0af2a1 2896static int exec_child(
f2341e0a 2897 Unit *unit,
34cf6c43 2898 const ExecCommand *command,
ff0af2a1
LP
2899 const ExecContext *context,
2900 const ExecParameters *params,
2901 ExecRuntime *runtime,
29206d46 2902 DynamicCreds *dcreds,
ff0af2a1 2903 int socket_fd,
52c239d7 2904 int named_iofds[3],
4c47affc 2905 int *fds,
da6053d0 2906 size_t n_socket_fds,
25b583d7 2907 size_t n_storage_fds,
ff0af2a1 2908 char **files_env,
00d9ef85 2909 int user_lookup_fd,
12145637 2910 int *exit_status) {
d35fbf6b 2911
7ca69792 2912 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **replaced_argv = NULL;
5686391b 2913 int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
4d885bd3
DH
2914 _cleanup_free_ gid_t *supplementary_gids = NULL;
2915 const char *username = NULL, *groupname = NULL;
5686391b 2916 _cleanup_free_ char *home_buffer = NULL;
2b3c1b9e 2917 const char *home = NULL, *shell = NULL;
7ca69792 2918 char **final_argv = NULL;
7bce046b
LP
2919 dev_t journal_stream_dev = 0;
2920 ino_t journal_stream_ino = 0;
165a31c0
LP
2921 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2922 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2923 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2924 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 2925#if HAVE_SELINUX
7f59dd35 2926 _cleanup_free_ char *mac_selinux_context_net = NULL;
43b1f709 2927 bool use_selinux = false;
ecfbc84f 2928#endif
f9fa32f0 2929#if ENABLE_SMACK
43b1f709 2930 bool use_smack = false;
ecfbc84f 2931#endif
349cc4a5 2932#if HAVE_APPARMOR
43b1f709 2933 bool use_apparmor = false;
ecfbc84f 2934#endif
fed1e721
LP
2935 uid_t uid = UID_INVALID;
2936 gid_t gid = GID_INVALID;
da6053d0 2937 size_t n_fds;
3536f49e 2938 ExecDirectoryType dt;
165a31c0 2939 int secure_bits;
034c6ed7 2940
f2341e0a 2941 assert(unit);
5cb5a6ff
LP
2942 assert(command);
2943 assert(context);
d35fbf6b 2944 assert(params);
ff0af2a1 2945 assert(exit_status);
d35fbf6b
DM
2946
2947 rename_process_from_path(command->path);
2948
2949 /* We reset exactly these signals, since they are the
2950 * only ones we set to SIG_IGN in the main daemon. All
2951 * others we leave untouched because we set them to
2952 * SIG_DFL or a valid handler initially, both of which
2953 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2954 (void) default_signals(SIGNALS_CRASH_HANDLER,
2955 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2956
2957 if (context->ignore_sigpipe)
ce30c8dc 2958 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2959
ff0af2a1
LP
2960 r = reset_signal_mask();
2961 if (r < 0) {
2962 *exit_status = EXIT_SIGNAL_MASK;
12145637 2963 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 2964 }
034c6ed7 2965
d35fbf6b
DM
2966 if (params->idle_pipe)
2967 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2968
2c027c62
LP
2969 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2970 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2971 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2972 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 2973
d35fbf6b 2974 log_forget_fds();
2c027c62 2975 log_set_open_when_needed(true);
4f2d528d 2976
40a80078
LP
2977 /* In case anything used libc syslog(), close this here, too */
2978 closelog();
2979
5686391b
LP
2980 n_fds = n_socket_fds + n_storage_fds;
2981 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
ff0af2a1
LP
2982 if (r < 0) {
2983 *exit_status = EXIT_FDS;
12145637 2984 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
2985 }
2986
d35fbf6b
DM
2987 if (!context->same_pgrp)
2988 if (setsid() < 0) {
ff0af2a1 2989 *exit_status = EXIT_SETSID;
12145637 2990 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 2991 }
9e2f7c11 2992
1e22b5cd 2993 exec_context_tty_reset(context, params);
d35fbf6b 2994
c891efaf 2995 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 2996 const char *vc = params->confirm_spawn;
3b20f877
FB
2997 _cleanup_free_ char *cmdline = NULL;
2998
ee39ca20 2999 cmdline = exec_command_line(command->argv);
3b20f877 3000 if (!cmdline) {
0460aa5c 3001 *exit_status = EXIT_MEMORY;
12145637 3002 return log_oom();
3b20f877 3003 }
d35fbf6b 3004
eedf223a 3005 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
3006 if (r != CONFIRM_EXECUTE) {
3007 if (r == CONFIRM_PRETEND_SUCCESS) {
3008 *exit_status = EXIT_SUCCESS;
3009 return 0;
3010 }
ff0af2a1 3011 *exit_status = EXIT_CONFIRM;
12145637 3012 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 3013 return -ECANCELED;
d35fbf6b
DM
3014 }
3015 }
1a63a750 3016
d521916d
LP
3017 /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
3018 * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
3019 * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
3020 * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
3021 * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
3022 if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
3023 setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
3024 *exit_status = EXIT_MEMORY;
3025 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
3026 }
3027
29206d46 3028 if (context->dynamic_user && dcreds) {
da50b85a 3029 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 3030
d521916d
LP
3031 /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
3032 * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
409093fe
LP
3033 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
3034 *exit_status = EXIT_USER;
12145637 3035 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
3036 }
3037
da50b85a
LP
3038 r = compile_suggested_paths(context, params, &suggested_paths);
3039 if (r < 0) {
3040 *exit_status = EXIT_MEMORY;
3041 return log_oom();
3042 }
3043
3044 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
3045 if (r < 0) {
3046 *exit_status = EXIT_USER;
e2b0cc34
YW
3047 if (r == -EILSEQ) {
3048 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
3049 return -EOPNOTSUPP;
3050 }
12145637 3051 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 3052 }
524daa8c 3053
70dd455c 3054 if (!uid_is_valid(uid)) {
29206d46 3055 *exit_status = EXIT_USER;
12145637 3056 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
3057 return -ESRCH;
3058 }
3059
3060 if (!gid_is_valid(gid)) {
3061 *exit_status = EXIT_USER;
12145637 3062 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
3063 return -ESRCH;
3064 }
5bc7452b 3065
29206d46
LP
3066 if (dcreds->user)
3067 username = dcreds->user->name;
3068
3069 } else {
4d885bd3
DH
3070 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
3071 if (r < 0) {
3072 *exit_status = EXIT_USER;
12145637 3073 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 3074 }
5bc7452b 3075
4d885bd3
DH
3076 r = get_fixed_group(context, &groupname, &gid);
3077 if (r < 0) {
3078 *exit_status = EXIT_GROUP;
12145637 3079 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 3080 }
cdc5d5c5 3081 }
29206d46 3082
cdc5d5c5
DH
3083 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
3084 r = get_supplementary_groups(context, username, groupname, gid,
3085 &supplementary_gids, &ngids);
3086 if (r < 0) {
3087 *exit_status = EXIT_GROUP;
12145637 3088 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 3089 }
5bc7452b 3090
00d9ef85
LP
3091 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
3092 if (r < 0) {
3093 *exit_status = EXIT_USER;
12145637 3094 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
3095 }
3096
3097 user_lookup_fd = safe_close(user_lookup_fd);
3098
6732edab
LP
3099 r = acquire_home(context, uid, &home, &home_buffer);
3100 if (r < 0) {
3101 *exit_status = EXIT_CHDIR;
12145637 3102 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
3103 }
3104
d35fbf6b
DM
3105 /* If a socket is connected to STDIN/STDOUT/STDERR, we
3106 * must sure to drop O_NONBLOCK */
3107 if (socket_fd >= 0)
a34ceba6 3108 (void) fd_nonblock(socket_fd, false);
acbb0225 3109
4c70a4a7
MS
3110 /* Journald will try to look-up our cgroup in order to populate _SYSTEMD_CGROUP and _SYSTEMD_UNIT fields.
3111 * Hence we need to migrate to the target cgroup from init.scope before connecting to journald */
3112 if (params->cgroup_path) {
3113 _cleanup_free_ char *p = NULL;
3114
3115 r = exec_parameters_get_cgroup_path(params, &p);
3116 if (r < 0) {
3117 *exit_status = EXIT_CGROUP;
3118 return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
3119 }
3120
3121 r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
3122 if (r < 0) {
3123 *exit_status = EXIT_CGROUP;
3124 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
3125 }
3126 }
3127
a8d08f39
LP
3128 if (context->network_namespace_path && runtime && runtime->netns_storage_socket[0] >= 0) {
3129 r = open_netns_path(runtime->netns_storage_socket, context->network_namespace_path);
3130 if (r < 0) {
3131 *exit_status = EXIT_NETWORK;
3132 return log_unit_error_errno(unit, r, "Failed to open network namespace path %s: %m", context->network_namespace_path);
3133 }
3134 }
3135
52c239d7 3136 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
3137 if (r < 0) {
3138 *exit_status = EXIT_STDIN;
12145637 3139 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 3140 }
034c6ed7 3141
52c239d7 3142 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3143 if (r < 0) {
3144 *exit_status = EXIT_STDOUT;
12145637 3145 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
3146 }
3147
52c239d7 3148 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3149 if (r < 0) {
3150 *exit_status = EXIT_STDERR;
12145637 3151 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
3152 }
3153
d35fbf6b 3154 if (context->oom_score_adjust_set) {
9f8168eb
LP
3155 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
3156 * prohibit write access to this file, and we shouldn't trip up over that. */
3157 r = set_oom_score_adjust(context->oom_score_adjust);
12145637 3158 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 3159 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 3160 else if (r < 0) {
ff0af2a1 3161 *exit_status = EXIT_OOM_ADJUST;
12145637 3162 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 3163 }
d35fbf6b
DM
3164 }
3165
3166 if (context->nice_set)
3167 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 3168 *exit_status = EXIT_NICE;
12145637 3169 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
613b411c
LP
3170 }
3171
d35fbf6b
DM
3172 if (context->cpu_sched_set) {
3173 struct sched_param param = {
3174 .sched_priority = context->cpu_sched_priority,
3175 };
3176
ff0af2a1
LP
3177 r = sched_setscheduler(0,
3178 context->cpu_sched_policy |
3179 (context->cpu_sched_reset_on_fork ?
3180 SCHED_RESET_ON_FORK : 0),
3181 &param);
3182 if (r < 0) {
3183 *exit_status = EXIT_SETSCHEDULER;
12145637 3184 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 3185 }
d35fbf6b 3186 }
fc9b2a84 3187
0985c7c4
ZJS
3188 if (context->cpu_set.set)
3189 if (sched_setaffinity(0, context->cpu_set.allocated, context->cpu_set.set) < 0) {
ff0af2a1 3190 *exit_status = EXIT_CPUAFFINITY;
12145637 3191 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7
LP
3192 }
3193
b070c7c0
MS
3194 if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
3195 r = apply_numa_policy(&context->numa_policy);
3196 if (r == -EOPNOTSUPP)
3197 log_unit_debug_errno(unit, SYNTHETIC_ERRNO(r), "NUMA support not available, ignoring.");
3198 else if (r < 0) {
3199 *exit_status = EXIT_NUMA_POLICY;
3200 return log_unit_error_errno(unit, r, "Failed to set NUMA memory policy: %m");
3201 }
3202 }
3203
d35fbf6b
DM
3204 if (context->ioprio_set)
3205 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 3206 *exit_status = EXIT_IOPRIO;
12145637 3207 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 3208 }
da726a4d 3209
d35fbf6b
DM
3210 if (context->timer_slack_nsec != NSEC_INFINITY)
3211 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 3212 *exit_status = EXIT_TIMERSLACK;
12145637 3213 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 3214 }
9eba9da4 3215
21022b9d
LP
3216 if (context->personality != PERSONALITY_INVALID) {
3217 r = safe_personality(context->personality);
3218 if (r < 0) {
ff0af2a1 3219 *exit_status = EXIT_PERSONALITY;
12145637 3220 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 3221 }
21022b9d 3222 }
94f04347 3223
d35fbf6b 3224 if (context->utmp_id)
df0ff127 3225 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 3226 context->tty_path,
023a4f67
LP
3227 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3228 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3229 USER_PROCESS,
6a93917d 3230 username);
d35fbf6b 3231
08f67696 3232 if (uid_is_valid(uid)) {
ff0af2a1
LP
3233 r = chown_terminal(STDIN_FILENO, uid);
3234 if (r < 0) {
3235 *exit_status = EXIT_STDIN;
12145637 3236 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 3237 }
d35fbf6b 3238 }
8e274523 3239
4e1dfa45 3240 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
62b9bb26 3241 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
4e1dfa45 3242 * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
62b9bb26 3243 * touch a single hierarchy too. */
584b8688 3244 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
62b9bb26 3245 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
ff0af2a1
LP
3246 if (r < 0) {
3247 *exit_status = EXIT_CGROUP;
12145637 3248 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 3249 }
d35fbf6b 3250 }
034c6ed7 3251
72fd1768 3252 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 3253 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
3254 if (r < 0)
3255 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 3256 }
94f04347 3257
7bce046b 3258 r = build_environment(
fd63e712 3259 unit,
7bce046b
LP
3260 context,
3261 params,
3262 n_fds,
3263 home,
3264 username,
3265 shell,
3266 journal_stream_dev,
3267 journal_stream_ino,
3268 &our_env);
2065ca69
JW
3269 if (r < 0) {
3270 *exit_status = EXIT_MEMORY;
12145637 3271 return log_oom();
2065ca69
JW
3272 }
3273
3274 r = build_pass_environment(context, &pass_env);
3275 if (r < 0) {
3276 *exit_status = EXIT_MEMORY;
12145637 3277 return log_oom();
2065ca69
JW
3278 }
3279
3280 accum_env = strv_env_merge(5,
3281 params->environment,
3282 our_env,
3283 pass_env,
3284 context->environment,
3285 files_env,
3286 NULL);
3287 if (!accum_env) {
3288 *exit_status = EXIT_MEMORY;
12145637 3289 return log_oom();
2065ca69 3290 }
1280503b 3291 accum_env = strv_env_clean(accum_env);
2065ca69 3292
096424d1 3293 (void) umask(context->umask);
b213e1c1 3294
b1edf445 3295 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
3296 if (r < 0) {
3297 *exit_status = EXIT_KEYRING;
12145637 3298 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
3299 }
3300
165a31c0 3301 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 3302 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 3303
165a31c0
LP
3304 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3305 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 3306
165a31c0
LP
3307 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3308 if (needs_ambient_hack)
3309 needs_setuid = false;
3310 else
3311 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3312
3313 if (needs_sandboxing) {
7f18ef0a
FK
3314 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3315 * present. The actual MAC context application will happen later, as late as possible, to avoid
3316 * impacting our own code paths. */
3317
349cc4a5 3318#if HAVE_SELINUX
43b1f709 3319 use_selinux = mac_selinux_use();
7f18ef0a 3320#endif
f9fa32f0 3321#if ENABLE_SMACK
43b1f709 3322 use_smack = mac_smack_use();
7f18ef0a 3323#endif
349cc4a5 3324#if HAVE_APPARMOR
43b1f709 3325 use_apparmor = mac_apparmor_use();
7f18ef0a 3326#endif
165a31c0 3327 }
7f18ef0a 3328
ce932d2d
LP
3329 if (needs_sandboxing) {
3330 int which_failed;
3331
3332 /* Let's set the resource limits before we call into PAM, so that pam_limits wins over what
3333 * is set here. (See below.) */
3334
3335 r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
3336 if (r < 0) {
3337 *exit_status = EXIT_LIMITS;
3338 return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
3339 }
3340 }
3341
165a31c0 3342 if (needs_setuid) {
ce932d2d
LP
3343
3344 /* Let's call into PAM after we set up our own idea of resource limits to that pam_limits
3345 * wins here. (See above.) */
3346
165a31c0
LP
3347 if (context->pam_name && username) {
3348 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3349 if (r < 0) {
3350 *exit_status = EXIT_PAM;
12145637 3351 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0
LP
3352 }
3353 }
b213e1c1 3354 }
ac45f971 3355
a8d08f39
LP
3356 if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
3357
6e2d7c4f
MS
3358 if (ns_type_supported(NAMESPACE_NET)) {
3359 r = setup_netns(runtime->netns_storage_socket);
3360 if (r < 0) {
3361 *exit_status = EXIT_NETWORK;
3362 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3363 }
a8d08f39
LP
3364 } else if (context->network_namespace_path) {
3365 *exit_status = EXIT_NETWORK;
3366 return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP), "NetworkNamespacePath= is not supported, refusing.");
6e2d7c4f
MS
3367 } else
3368 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3369 }
169c1bda 3370
ee818b89 3371 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3372 if (needs_mount_namespace) {
7cc5ef5f
ZJS
3373 _cleanup_free_ char *error_path = NULL;
3374
3375 r = apply_mount_namespace(unit, command, context, params, runtime, &error_path);
3fbe8dbe
LP
3376 if (r < 0) {
3377 *exit_status = EXIT_NAMESPACE;
7cc5ef5f
ZJS
3378 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m",
3379 error_path ? ": " : "", strempty(error_path));
3fbe8dbe 3380 }
d35fbf6b 3381 }
81a2b7ce 3382
aecd5ac6
TM
3383 if (context->protect_hostname) {
3384 if (ns_type_supported(NAMESPACE_UTS)) {
3385 if (unshare(CLONE_NEWUTS) < 0) {
3386 *exit_status = EXIT_NAMESPACE;
3387 return log_unit_error_errno(unit, errno, "Failed to set up UTS namespacing: %m");
3388 }
3389 } else
3390 log_unit_warning(unit, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
3391#if HAVE_SECCOMP
3392 r = seccomp_protect_hostname();
3393 if (r < 0) {
3394 *exit_status = EXIT_SECCOMP;
3395 return log_unit_error_errno(unit, r, "Failed to apply hostname restrictions: %m");
3396 }
3397#endif
3398 }
3399
bbeea271 3400 /* Drop groups as early as possbile */
165a31c0 3401 if (needs_setuid) {
709dbeac 3402 r = enforce_groups(gid, supplementary_gids, ngids);
096424d1
LP
3403 if (r < 0) {
3404 *exit_status = EXIT_GROUP;
12145637 3405 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3406 }
165a31c0 3407 }
096424d1 3408
165a31c0 3409 if (needs_sandboxing) {
349cc4a5 3410#if HAVE_SELINUX
43b1f709 3411 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
3412 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3413 if (r < 0) {
3414 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3415 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
937ccce9 3416 }
9008e1ac 3417 }
9008e1ac
MS
3418#endif
3419
937ccce9
LP
3420 if (context->private_users) {
3421 r = setup_private_users(uid, gid);
3422 if (r < 0) {
3423 *exit_status = EXIT_USER;
12145637 3424 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
937ccce9 3425 }
d251207d
LP
3426 }
3427 }
3428
165a31c0 3429 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
5686391b
LP
3430 * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
3431 * however if we have it as we want to keep it open until the final execve(). */
3432
3433 if (params->exec_fd >= 0) {
3434 exec_fd = params->exec_fd;
3435
3436 if (exec_fd < 3 + (int) n_fds) {
3437 int moved_fd;
3438
3439 /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
3440 * process we are about to execute. */
3441
3442 moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
3443 if (moved_fd < 0) {
3444 *exit_status = EXIT_FDS;
3445 return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
3446 }
3447
3448 safe_close(exec_fd);
3449 exec_fd = moved_fd;
3450 } else {
3451 /* This fd should be FD_CLOEXEC already, but let's make sure. */
3452 r = fd_cloexec(exec_fd, true);
3453 if (r < 0) {
3454 *exit_status = EXIT_FDS;
3455 return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
3456 }
3457 }
3458
3459 fds_with_exec_fd = newa(int, n_fds + 1);
7e8d494b 3460 memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
5686391b
LP
3461 fds_with_exec_fd[n_fds] = exec_fd;
3462 n_fds_with_exec_fd = n_fds + 1;
3463 } else {
3464 fds_with_exec_fd = fds;
3465 n_fds_with_exec_fd = n_fds;
3466 }
3467
3468 r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
ff0af2a1
LP
3469 if (r >= 0)
3470 r = shift_fds(fds, n_fds);
3471 if (r >= 0)
25b583d7 3472 r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
ff0af2a1
LP
3473 if (r < 0) {
3474 *exit_status = EXIT_FDS;
12145637 3475 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3476 }
e66cf1a3 3477
5686391b
LP
3478 /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
3479 * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
3480 * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
3481 * came this far. */
3482
165a31c0 3483 secure_bits = context->secure_bits;
e66cf1a3 3484
165a31c0
LP
3485 if (needs_sandboxing) {
3486 uint64_t bset;
e66cf1a3 3487
ce932d2d
LP
3488 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly
3489 * requested. (Note this is placed after the general resource limit initialization, see
3490 * above, in order to take precedence.) */
f4170c67
LP
3491 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3492 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3493 *exit_status = EXIT_LIMITS;
12145637 3494 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3495 }
3496 }
3497
37ac2744
JB
3498#if ENABLE_SMACK
3499 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3500 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3501 if (use_smack) {
3502 r = setup_smack(context, command);
3503 if (r < 0) {
3504 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3505 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3506 }
3507 }
3508#endif
3509
165a31c0
LP
3510 bset = context->capability_bounding_set;
3511 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3512 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3513 * instead of us doing that */
3514 if (needs_ambient_hack)
3515 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3516 (UINT64_C(1) << CAP_SETUID) |
3517 (UINT64_C(1) << CAP_SETGID);
3518
3519 if (!cap_test_all(bset)) {
3520 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3521 if (r < 0) {
3522 *exit_status = EXIT_CAPABILITIES;
12145637 3523 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3524 }
4c2630eb 3525 }
3b8bddde 3526
755d4b67
IP
3527 /* This is done before enforce_user, but ambient set
3528 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
3529 if (!needs_ambient_hack &&
3530 context->capability_ambient_set != 0) {
755d4b67
IP
3531 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3532 if (r < 0) {
3533 *exit_status = EXIT_CAPABILITIES;
12145637 3534 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3535 }
755d4b67 3536 }
165a31c0 3537 }
755d4b67 3538
165a31c0 3539 if (needs_setuid) {
08f67696 3540 if (uid_is_valid(uid)) {
ff0af2a1
LP
3541 r = enforce_user(context, uid);
3542 if (r < 0) {
3543 *exit_status = EXIT_USER;
12145637 3544 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3545 }
165a31c0
LP
3546
3547 if (!needs_ambient_hack &&
3548 context->capability_ambient_set != 0) {
755d4b67
IP
3549
3550 /* Fix the ambient capabilities after user change. */
3551 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3552 if (r < 0) {
3553 *exit_status = EXIT_CAPABILITIES;
12145637 3554 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3555 }
3556
3557 /* If we were asked to change user and ambient capabilities
3558 * were requested, we had to add keep-caps to the securebits
3559 * so that we would maintain the inherited capability set
3560 * through the setresuid(). Make sure that the bit is added
3561 * also to the context secure_bits so that we don't try to
3562 * drop the bit away next. */
3563
7f508f2c 3564 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3565 }
5b6319dc 3566 }
165a31c0 3567 }
d35fbf6b 3568
56ef8db9
JB
3569 /* Apply working directory here, because the working directory might be on NFS and only the user running
3570 * this service might have the correct privilege to change to the working directory */
3571 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
3572 if (r < 0)
3573 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
3574
165a31c0 3575 if (needs_sandboxing) {
37ac2744 3576 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
5cd9cd35
LP
3577 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3578 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3579 * are restricted. */
3580
349cc4a5 3581#if HAVE_SELINUX
43b1f709 3582 if (use_selinux) {
5cd9cd35
LP
3583 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3584
3585 if (exec_context) {
3586 r = setexeccon(exec_context);
3587 if (r < 0) {
3588 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3589 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3590 }
3591 }
3592 }
3593#endif
3594
349cc4a5 3595#if HAVE_APPARMOR
43b1f709 3596 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3597 r = aa_change_onexec(context->apparmor_profile);
3598 if (r < 0 && !context->apparmor_profile_ignore) {
3599 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3600 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3601 }
3602 }
3603#endif
3604
165a31c0
LP
3605 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3606 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3607 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3608 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3609 *exit_status = EXIT_SECUREBITS;
12145637 3610 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3611 }
5b6319dc 3612
59eeb84b 3613 if (context_has_no_new_privileges(context))
d35fbf6b 3614 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3615 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3616 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3617 }
3618
349cc4a5 3619#if HAVE_SECCOMP
469830d1
LP
3620 r = apply_address_families(unit, context);
3621 if (r < 0) {
3622 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3623 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3624 }
04aa0cb9 3625
469830d1
LP
3626 r = apply_memory_deny_write_execute(unit, context);
3627 if (r < 0) {
3628 *exit_status = EXIT_SECCOMP;
12145637 3629 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3630 }
f4170c67 3631
469830d1
LP
3632 r = apply_restrict_realtime(unit, context);
3633 if (r < 0) {
3634 *exit_status = EXIT_SECCOMP;
12145637 3635 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3636 }
3637
f69567cb
LP
3638 r = apply_restrict_suid_sgid(unit, context);
3639 if (r < 0) {
3640 *exit_status = EXIT_SECCOMP;
3641 return log_unit_error_errno(unit, r, "Failed to apply SUID/SGID restrictions: %m");
3642 }
3643
add00535
LP
3644 r = apply_restrict_namespaces(unit, context);
3645 if (r < 0) {
3646 *exit_status = EXIT_SECCOMP;
12145637 3647 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3648 }
3649
469830d1
LP
3650 r = apply_protect_sysctl(unit, context);
3651 if (r < 0) {
3652 *exit_status = EXIT_SECCOMP;
12145637 3653 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3654 }
3655
469830d1
LP
3656 r = apply_protect_kernel_modules(unit, context);
3657 if (r < 0) {
3658 *exit_status = EXIT_SECCOMP;
12145637 3659 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3660 }
3661
469830d1
LP
3662 r = apply_private_devices(unit, context);
3663 if (r < 0) {
3664 *exit_status = EXIT_SECCOMP;
12145637 3665 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3666 }
3667
3668 r = apply_syscall_archs(unit, context);
3669 if (r < 0) {
3670 *exit_status = EXIT_SECCOMP;
12145637 3671 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3672 }
3673
78e864e5
TM
3674 r = apply_lock_personality(unit, context);
3675 if (r < 0) {
3676 *exit_status = EXIT_SECCOMP;
12145637 3677 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3678 }
3679
5cd9cd35
LP
3680 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3681 * by the filter as little as possible. */
165a31c0 3682 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3683 if (r < 0) {
3684 *exit_status = EXIT_SECCOMP;
12145637 3685 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3686 }
3687#endif
d35fbf6b 3688 }
034c6ed7 3689
00819cc1
LP
3690 if (!strv_isempty(context->unset_environment)) {
3691 char **ee = NULL;
3692
3693 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3694 if (!ee) {
3695 *exit_status = EXIT_MEMORY;
12145637 3696 return log_oom();
00819cc1
LP
3697 }
3698
130d3d22 3699 strv_free_and_replace(accum_env, ee);
00819cc1
LP
3700 }
3701
7ca69792
AZ
3702 if (!FLAGS_SET(command->flags, EXEC_COMMAND_NO_ENV_EXPAND)) {
3703 replaced_argv = replace_env_argv(command->argv, accum_env);
3704 if (!replaced_argv) {
3705 *exit_status = EXIT_MEMORY;
3706 return log_oom();
3707 }
3708 final_argv = replaced_argv;
3709 } else
3710 final_argv = command->argv;
034c6ed7 3711
f1d34068 3712 if (DEBUG_LOGGING) {
d35fbf6b 3713 _cleanup_free_ char *line;
81a2b7ce 3714
d35fbf6b 3715 line = exec_command_line(final_argv);
a1230ff9 3716 if (line)
f2341e0a 3717 log_struct(LOG_DEBUG,
f2341e0a
LP
3718 "EXECUTABLE=%s", command->path,
3719 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3720 LOG_UNIT_ID(unit),
a1230ff9 3721 LOG_UNIT_INVOCATION_ID(unit));
d35fbf6b 3722 }
dd305ec9 3723
5686391b
LP
3724 if (exec_fd >= 0) {
3725 uint8_t hot = 1;
3726
3727 /* We have finished with all our initializations. Let's now let the manager know that. From this point
3728 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
3729
3730 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3731 *exit_status = EXIT_EXEC;
3732 return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
3733 }
3734 }
3735
2065ca69 3736 execve(command->path, final_argv, accum_env);
5686391b
LP
3737 r = -errno;
3738
3739 if (exec_fd >= 0) {
3740 uint8_t hot = 0;
3741
3742 /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
3743 * that POLLHUP on it no longer means execve() succeeded. */
3744
3745 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3746 *exit_status = EXIT_EXEC;
3747 return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
3748 }
3749 }
12145637 3750
5686391b
LP
3751 if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3752 log_struct_errno(LOG_INFO, r,
12145637
LP
3753 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3754 LOG_UNIT_ID(unit),
3755 LOG_UNIT_INVOCATION_ID(unit),
3756 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3757 command->path),
a1230ff9 3758 "EXECUTABLE=%s", command->path);
12145637
LP
3759 return 0;
3760 }
3761
ff0af2a1 3762 *exit_status = EXIT_EXEC;
5686391b 3763 return log_unit_error_errno(unit, r, "Failed to execute command: %m");
d35fbf6b 3764}
81a2b7ce 3765
34cf6c43
YW
3766static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
3767static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[3]);
3768
f2341e0a
LP
3769int exec_spawn(Unit *unit,
3770 ExecCommand *command,
d35fbf6b
DM
3771 const ExecContext *context,
3772 const ExecParameters *params,
3773 ExecRuntime *runtime,
29206d46 3774 DynamicCreds *dcreds,
d35fbf6b 3775 pid_t *ret) {
8351ceae 3776
ee39ca20 3777 int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
78f93209 3778 _cleanup_free_ char *subcgroup_path = NULL;
d35fbf6b 3779 _cleanup_strv_free_ char **files_env = NULL;
da6053d0 3780 size_t n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1 3781 _cleanup_free_ char *line = NULL;
d35fbf6b 3782 pid_t pid;
8351ceae 3783
f2341e0a 3784 assert(unit);
d35fbf6b
DM
3785 assert(command);
3786 assert(context);
3787 assert(ret);
3788 assert(params);
25b583d7 3789 assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
4298d0b5 3790
d35fbf6b
DM
3791 if (context->std_input == EXEC_INPUT_SOCKET ||
3792 context->std_output == EXEC_OUTPUT_SOCKET ||
3793 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3794
4c47affc 3795 if (params->n_socket_fds > 1) {
f2341e0a 3796 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3797 return -EINVAL;
ff0af2a1 3798 }
eef65bf3 3799
4c47affc 3800 if (params->n_socket_fds == 0) {
488ab41c
AA
3801 log_unit_error(unit, "Got no socket.");
3802 return -EINVAL;
3803 }
3804
d35fbf6b
DM
3805 socket_fd = params->fds[0];
3806 } else {
3807 socket_fd = -1;
3808 fds = params->fds;
9b141911 3809 n_socket_fds = params->n_socket_fds;
25b583d7 3810 n_storage_fds = params->n_storage_fds;
d35fbf6b 3811 }
94f04347 3812
34cf6c43 3813 r = exec_context_named_iofds(context, params, named_iofds);
52c239d7
LB
3814 if (r < 0)
3815 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3816
f2341e0a 3817 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3818 if (r < 0)
f2341e0a 3819 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3820
ee39ca20 3821 line = exec_command_line(command->argv);
d35fbf6b
DM
3822 if (!line)
3823 return log_oom();
fab56fc5 3824
f2341e0a 3825 log_struct(LOG_DEBUG,
f2341e0a
LP
3826 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3827 "EXECUTABLE=%s", command->path,
ba360bb0 3828 LOG_UNIT_ID(unit),
a1230ff9 3829 LOG_UNIT_INVOCATION_ID(unit));
12145637 3830
78f93209
LP
3831 if (params->cgroup_path) {
3832 r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
3833 if (r < 0)
3834 return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
3835 if (r > 0) { /* We are using a child cgroup */
3836 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
3837 if (r < 0)
3838 return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
3839 }
3840 }
3841
d35fbf6b
DM
3842 pid = fork();
3843 if (pid < 0)
74129a12 3844 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3845
3846 if (pid == 0) {
12145637 3847 int exit_status = EXIT_SUCCESS;
ff0af2a1 3848
f2341e0a
LP
3849 r = exec_child(unit,
3850 command,
ff0af2a1
LP
3851 context,
3852 params,
3853 runtime,
29206d46 3854 dcreds,
ff0af2a1 3855 socket_fd,
52c239d7 3856 named_iofds,
4c47affc 3857 fds,
9b141911 3858 n_socket_fds,
25b583d7 3859 n_storage_fds,
ff0af2a1 3860 files_env,
00d9ef85 3861 unit->manager->user_lookup_fds[1],
12145637
LP
3862 &exit_status);
3863
a1230ff9 3864 if (r < 0)
12145637
LP
3865 log_struct_errno(LOG_ERR, r,
3866 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3867 LOG_UNIT_ID(unit),
3868 LOG_UNIT_INVOCATION_ID(unit),
3869 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3870 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3871 command->path),
a1230ff9 3872 "EXECUTABLE=%s", command->path);
4c2630eb 3873
ff0af2a1 3874 _exit(exit_status);
034c6ed7
LP
3875 }
3876
f2341e0a 3877 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3878
78f93209
LP
3879 /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
3880 * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
3881 * process will be killed too). */
3882 if (subcgroup_path)
3883 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
2da3263a 3884
b58b4116 3885 exec_status_start(&command->exec_status, pid);
9fb86720 3886
034c6ed7 3887 *ret = pid;
5cb5a6ff
LP
3888 return 0;
3889}
3890
034c6ed7 3891void exec_context_init(ExecContext *c) {
3536f49e
YW
3892 ExecDirectoryType i;
3893
034c6ed7
LP
3894 assert(c);
3895
4c12626c 3896 c->umask = 0022;
9eba9da4 3897 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3898 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3899 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3900 c->syslog_level_prefix = true;
353e12c2 3901 c->ignore_sigpipe = true;
3a43da28 3902 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3903 c->personality = PERSONALITY_INVALID;
72fd1768 3904 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3905 c->directories[i].mode = 0755;
a103496c 3906 c->capability_bounding_set = CAP_ALL;
aa9d574d
YW
3907 assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
3908 c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
d3070fbd 3909 c->log_level_max = -1;
b070c7c0 3910 numa_policy_reset(&c->numa_policy);
034c6ed7
LP
3911}
3912
613b411c 3913void exec_context_done(ExecContext *c) {
3536f49e 3914 ExecDirectoryType i;
d3070fbd 3915 size_t l;
5cb5a6ff
LP
3916
3917 assert(c);
3918
6796073e
LP
3919 c->environment = strv_free(c->environment);
3920 c->environment_files = strv_free(c->environment_files);
b4c14404 3921 c->pass_environment = strv_free(c->pass_environment);
00819cc1 3922 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 3923
31ce987c 3924 rlimit_free_all(c->rlimit);
034c6ed7 3925
2038c3f5 3926 for (l = 0; l < 3; l++) {
52c239d7 3927 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
2038c3f5
LP
3928 c->stdio_file[l] = mfree(c->stdio_file[l]);
3929 }
52c239d7 3930
a1e58e8e
LP
3931 c->working_directory = mfree(c->working_directory);
3932 c->root_directory = mfree(c->root_directory);
915e6d16 3933 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3934 c->tty_path = mfree(c->tty_path);
3935 c->syslog_identifier = mfree(c->syslog_identifier);
3936 c->user = mfree(c->user);
3937 c->group = mfree(c->group);
034c6ed7 3938
6796073e 3939 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3940
a1e58e8e 3941 c->pam_name = mfree(c->pam_name);
5b6319dc 3942
2a624c36
AP
3943 c->read_only_paths = strv_free(c->read_only_paths);
3944 c->read_write_paths = strv_free(c->read_write_paths);
3945 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3946
d2d6c096 3947 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
8e06d57c
YW
3948 c->bind_mounts = NULL;
3949 c->n_bind_mounts = 0;
2abd4e38
YW
3950 temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
3951 c->temporary_filesystems = NULL;
3952 c->n_temporary_filesystems = 0;
d2d6c096 3953
0985c7c4 3954 cpu_set_reset(&c->cpu_set);
b070c7c0 3955 numa_policy_reset(&c->numa_policy);
86a3475b 3956
a1e58e8e
LP
3957 c->utmp_id = mfree(c->utmp_id);
3958 c->selinux_context = mfree(c->selinux_context);
3959 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3960 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3961
8cfa775f 3962 c->syscall_filter = hashmap_free(c->syscall_filter);
525d3cc7
LP
3963 c->syscall_archs = set_free(c->syscall_archs);
3964 c->address_families = set_free(c->address_families);
e66cf1a3 3965
72fd1768 3966 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3967 c->directories[i].paths = strv_free(c->directories[i].paths);
d3070fbd
LP
3968
3969 c->log_level_max = -1;
3970
3971 exec_context_free_log_extra_fields(c);
08f3be7a 3972
90fc172e
AZ
3973 c->log_rate_limit_interval_usec = 0;
3974 c->log_rate_limit_burst = 0;
3975
08f3be7a
LP
3976 c->stdin_data = mfree(c->stdin_data);
3977 c->stdin_data_size = 0;
a8d08f39
LP
3978
3979 c->network_namespace_path = mfree(c->network_namespace_path);
e66cf1a3
LP
3980}
3981
34cf6c43 3982int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
e66cf1a3
LP
3983 char **i;
3984
3985 assert(c);
3986
3987 if (!runtime_prefix)
3988 return 0;
3989
3536f49e 3990 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
3991 _cleanup_free_ char *p;
3992
7bc4bf4a 3993 p = path_join(runtime_prefix, *i);
e66cf1a3
LP
3994 if (!p)
3995 return -ENOMEM;
3996
7bc4bf4a
LP
3997 /* We execute this synchronously, since we need to be sure this is gone when we start the
3998 * service next. */
c6878637 3999 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
4000 }
4001
4002 return 0;
5cb5a6ff
LP
4003}
4004
34cf6c43 4005static void exec_command_done(ExecCommand *c) {
43d0fcbd
LP
4006 assert(c);
4007
a1e58e8e 4008 c->path = mfree(c->path);
6796073e 4009 c->argv = strv_free(c->argv);
43d0fcbd
LP
4010}
4011
da6053d0
LP
4012void exec_command_done_array(ExecCommand *c, size_t n) {
4013 size_t i;
43d0fcbd
LP
4014
4015 for (i = 0; i < n; i++)
4016 exec_command_done(c+i);
4017}
4018
f1acf85a 4019ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
4020 ExecCommand *i;
4021
4022 while ((i = c)) {
71fda00f 4023 LIST_REMOVE(command, c, i);
43d0fcbd 4024 exec_command_done(i);
5cb5a6ff
LP
4025 free(i);
4026 }
f1acf85a
ZJS
4027
4028 return NULL;
5cb5a6ff
LP
4029}
4030
da6053d0
LP
4031void exec_command_free_array(ExecCommand **c, size_t n) {
4032 size_t i;
034c6ed7 4033
f1acf85a
ZJS
4034 for (i = 0; i < n; i++)
4035 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
4036}
4037
6a1d4d9f
LP
4038void exec_command_reset_status_array(ExecCommand *c, size_t n) {
4039 size_t i;
4040
4041 for (i = 0; i < n; i++)
4042 exec_status_reset(&c[i].exec_status);
4043}
4044
4045void exec_command_reset_status_list_array(ExecCommand **c, size_t n) {
4046 size_t i;
4047
4048 for (i = 0; i < n; i++) {
4049 ExecCommand *z;
4050
4051 LIST_FOREACH(command, z, c[i])
4052 exec_status_reset(&z->exec_status);
4053 }
4054}
4055
039f0e70 4056typedef struct InvalidEnvInfo {
34cf6c43 4057 const Unit *unit;
039f0e70
LP
4058 const char *path;
4059} InvalidEnvInfo;
4060
4061static void invalid_env(const char *p, void *userdata) {
4062 InvalidEnvInfo *info = userdata;
4063
f2341e0a 4064 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
4065}
4066
52c239d7
LB
4067const char* exec_context_fdname(const ExecContext *c, int fd_index) {
4068 assert(c);
4069
4070 switch (fd_index) {
5073ff6b 4071
52c239d7
LB
4072 case STDIN_FILENO:
4073 if (c->std_input != EXEC_INPUT_NAMED_FD)
4074 return NULL;
5073ff6b 4075
52c239d7 4076 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
5073ff6b 4077
52c239d7
LB
4078 case STDOUT_FILENO:
4079 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
4080 return NULL;
5073ff6b 4081
52c239d7 4082 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
5073ff6b 4083
52c239d7
LB
4084 case STDERR_FILENO:
4085 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
4086 return NULL;
5073ff6b 4087
52c239d7 4088 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
5073ff6b 4089
52c239d7
LB
4090 default:
4091 return NULL;
4092 }
4093}
4094
3042bbeb 4095static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[static 3]) {
da6053d0 4096 size_t i, targets;
56fbd561 4097 const char* stdio_fdname[3];
da6053d0 4098 size_t n_fds;
52c239d7
LB
4099
4100 assert(c);
4101 assert(p);
4102
4103 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
4104 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
4105 (c->std_error == EXEC_OUTPUT_NAMED_FD);
4106
4107 for (i = 0; i < 3; i++)
4108 stdio_fdname[i] = exec_context_fdname(c, i);
4109
4c47affc
FB
4110 n_fds = p->n_storage_fds + p->n_socket_fds;
4111
4112 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
4113 if (named_iofds[STDIN_FILENO] < 0 &&
4114 c->std_input == EXEC_INPUT_NAMED_FD &&
4115 stdio_fdname[STDIN_FILENO] &&
4116 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
4117
52c239d7
LB
4118 named_iofds[STDIN_FILENO] = p->fds[i];
4119 targets--;
56fbd561
ZJS
4120
4121 } else if (named_iofds[STDOUT_FILENO] < 0 &&
4122 c->std_output == EXEC_OUTPUT_NAMED_FD &&
4123 stdio_fdname[STDOUT_FILENO] &&
4124 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
4125
52c239d7
LB
4126 named_iofds[STDOUT_FILENO] = p->fds[i];
4127 targets--;
56fbd561
ZJS
4128
4129 } else if (named_iofds[STDERR_FILENO] < 0 &&
4130 c->std_error == EXEC_OUTPUT_NAMED_FD &&
4131 stdio_fdname[STDERR_FILENO] &&
4132 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
4133
52c239d7
LB
4134 named_iofds[STDERR_FILENO] = p->fds[i];
4135 targets--;
4136 }
4137
56fbd561 4138 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
4139}
4140
34cf6c43 4141static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
4142 char **i, **r = NULL;
4143
4144 assert(c);
4145 assert(l);
4146
4147 STRV_FOREACH(i, c->environment_files) {
4148 char *fn;
52511fae
ZJS
4149 int k;
4150 unsigned n;
8c7be95e
LP
4151 bool ignore = false;
4152 char **p;
7fd1b19b 4153 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
4154
4155 fn = *i;
4156
4157 if (fn[0] == '-') {
4158 ignore = true;
313cefa1 4159 fn++;
8c7be95e
LP
4160 }
4161
4162 if (!path_is_absolute(fn)) {
8c7be95e
LP
4163 if (ignore)
4164 continue;
4165
4166 strv_free(r);
4167 return -EINVAL;
4168 }
4169
2bef10ab 4170 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
4171 k = safe_glob(fn, 0, &pglob);
4172 if (k < 0) {
2bef10ab
PL
4173 if (ignore)
4174 continue;
8c7be95e 4175
2bef10ab 4176 strv_free(r);
d8c92e8b 4177 return k;
2bef10ab 4178 }
8c7be95e 4179
d8c92e8b
ZJS
4180 /* When we don't match anything, -ENOENT should be returned */
4181 assert(pglob.gl_pathc > 0);
4182
4183 for (n = 0; n < pglob.gl_pathc; n++) {
aa8fbc74 4184 k = load_env_file(NULL, pglob.gl_pathv[n], &p);
2bef10ab
PL
4185 if (k < 0) {
4186 if (ignore)
4187 continue;
8c7be95e 4188
2bef10ab 4189 strv_free(r);
2bef10ab 4190 return k;
e9c1ea9d 4191 }
ebc05a09 4192 /* Log invalid environment variables with filename */
039f0e70
LP
4193 if (p) {
4194 InvalidEnvInfo info = {
f2341e0a 4195 .unit = unit,
039f0e70
LP
4196 .path = pglob.gl_pathv[n]
4197 };
4198
4199 p = strv_env_clean_with_callback(p, invalid_env, &info);
4200 }
8c7be95e 4201
234519ae 4202 if (!r)
2bef10ab
PL
4203 r = p;
4204 else {
4205 char **m;
8c7be95e 4206
2bef10ab
PL
4207 m = strv_env_merge(2, r, p);
4208 strv_free(r);
4209 strv_free(p);
c84a9488 4210 if (!m)
2bef10ab 4211 return -ENOMEM;
2bef10ab
PL
4212
4213 r = m;
4214 }
8c7be95e
LP
4215 }
4216 }
4217
4218 *l = r;
4219
4220 return 0;
4221}
4222
6ac8fdc9 4223static bool tty_may_match_dev_console(const char *tty) {
7b912648 4224 _cleanup_free_ char *resolved = NULL;
6ac8fdc9 4225
1e22b5cd
LP
4226 if (!tty)
4227 return true;
4228
a119ec7c 4229 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
4230
4231 /* trivial identity? */
4232 if (streq(tty, "console"))
4233 return true;
4234
7b912648
LP
4235 if (resolve_dev_console(&resolved) < 0)
4236 return true; /* if we could not resolve, assume it may */
6ac8fdc9
MS
4237
4238 /* "tty0" means the active VC, so it may be the same sometimes */
955f1c85 4239 return path_equal(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
4240}
4241
6c0ae739
LP
4242static bool exec_context_may_touch_tty(const ExecContext *ec) {
4243 assert(ec);
1e22b5cd 4244
6c0ae739 4245 return ec->tty_reset ||
1e22b5cd
LP
4246 ec->tty_vhangup ||
4247 ec->tty_vt_disallocate ||
6ac8fdc9
MS
4248 is_terminal_input(ec->std_input) ||
4249 is_terminal_output(ec->std_output) ||
6c0ae739
LP
4250 is_terminal_output(ec->std_error);
4251}
4252
4253bool exec_context_may_touch_console(const ExecContext *ec) {
4254
4255 return exec_context_may_touch_tty(ec) &&
1e22b5cd 4256 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
4257}
4258
15ae422b
LP
4259static void strv_fprintf(FILE *f, char **l) {
4260 char **g;
4261
4262 assert(f);
4263
4264 STRV_FOREACH(g, l)
4265 fprintf(f, " %s", *g);
4266}
4267
34cf6c43 4268void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
d3070fbd 4269 ExecDirectoryType dt;
c2bbd90b 4270 char **e, **d;
94f04347 4271 unsigned i;
add00535 4272 int r;
9eba9da4 4273
5cb5a6ff
LP
4274 assert(c);
4275 assert(f);
4276
4ad49000 4277 prefix = strempty(prefix);
5cb5a6ff
LP
4278
4279 fprintf(f,
94f04347
LP
4280 "%sUMask: %04o\n"
4281 "%sWorkingDirectory: %s\n"
451a074f 4282 "%sRootDirectory: %s\n"
15ae422b 4283 "%sNonBlocking: %s\n"
64747e2d 4284 "%sPrivateTmp: %s\n"
7f112f50 4285 "%sPrivateDevices: %s\n"
59eeb84b 4286 "%sProtectKernelTunables: %s\n"
e66a2f65 4287 "%sProtectKernelModules: %s\n"
59eeb84b 4288 "%sProtectControlGroups: %s\n"
d251207d
LP
4289 "%sPrivateNetwork: %s\n"
4290 "%sPrivateUsers: %s\n"
1b8689f9
LP
4291 "%sProtectHome: %s\n"
4292 "%sProtectSystem: %s\n"
5d997827 4293 "%sMountAPIVFS: %s\n"
f3e43635 4294 "%sIgnoreSIGPIPE: %s\n"
f4170c67 4295 "%sMemoryDenyWriteExecute: %s\n"
b1edf445 4296 "%sRestrictRealtime: %s\n"
f69567cb 4297 "%sRestrictSUIDSGID: %s\n"
aecd5ac6
TM
4298 "%sKeyringMode: %s\n"
4299 "%sProtectHostname: %s\n",
5cb5a6ff 4300 prefix, c->umask,
9eba9da4 4301 prefix, c->working_directory ? c->working_directory : "/",
451a074f 4302 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 4303 prefix, yes_no(c->non_blocking),
64747e2d 4304 prefix, yes_no(c->private_tmp),
7f112f50 4305 prefix, yes_no(c->private_devices),
59eeb84b 4306 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 4307 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 4308 prefix, yes_no(c->protect_control_groups),
d251207d
LP
4309 prefix, yes_no(c->private_network),
4310 prefix, yes_no(c->private_users),
1b8689f9
LP
4311 prefix, protect_home_to_string(c->protect_home),
4312 prefix, protect_system_to_string(c->protect_system),
5d997827 4313 prefix, yes_no(c->mount_apivfs),
f3e43635 4314 prefix, yes_no(c->ignore_sigpipe),
f4170c67 4315 prefix, yes_no(c->memory_deny_write_execute),
b1edf445 4316 prefix, yes_no(c->restrict_realtime),
f69567cb 4317 prefix, yes_no(c->restrict_suid_sgid),
aecd5ac6
TM
4318 prefix, exec_keyring_mode_to_string(c->keyring_mode),
4319 prefix, yes_no(c->protect_hostname));
fb33a393 4320
915e6d16
LP
4321 if (c->root_image)
4322 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
4323
8c7be95e
LP
4324 STRV_FOREACH(e, c->environment)
4325 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
4326
4327 STRV_FOREACH(e, c->environment_files)
4328 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 4329
b4c14404
FB
4330 STRV_FOREACH(e, c->pass_environment)
4331 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
4332
00819cc1
LP
4333 STRV_FOREACH(e, c->unset_environment)
4334 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
4335
53f47dfc
YW
4336 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
4337
72fd1768 4338 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
4339 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
4340
4341 STRV_FOREACH(d, c->directories[dt].paths)
4342 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
4343 }
c2bbd90b 4344
fb33a393
LP
4345 if (c->nice_set)
4346 fprintf(f,
4347 "%sNice: %i\n",
4348 prefix, c->nice);
4349
dd6c17b1 4350 if (c->oom_score_adjust_set)
fb33a393 4351 fprintf(f,
dd6c17b1
LP
4352 "%sOOMScoreAdjust: %i\n",
4353 prefix, c->oom_score_adjust);
9eba9da4 4354
94f04347 4355 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d 4356 if (c->rlimit[i]) {
4c3a2b84 4357 fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
3c11da9d 4358 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
4c3a2b84 4359 fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n",
3c11da9d
EV
4360 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
4361 }
94f04347 4362
f8b69d1d 4363 if (c->ioprio_set) {
1756a011 4364 _cleanup_free_ char *class_str = NULL;
f8b69d1d 4365
837df140
YW
4366 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
4367 if (r >= 0)
4368 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
4369
4370 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 4371 }
94f04347 4372
f8b69d1d 4373 if (c->cpu_sched_set) {
1756a011 4374 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 4375
837df140
YW
4376 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4377 if (r >= 0)
4378 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4379
94f04347 4380 fprintf(f,
38b48754
LP
4381 "%sCPUSchedulingPriority: %i\n"
4382 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
4383 prefix, c->cpu_sched_priority,
4384 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 4385 }
94f04347 4386
0985c7c4 4387 if (c->cpu_set.set) {
e7fca352
MS
4388 _cleanup_free_ char *affinity = NULL;
4389
4390 affinity = cpu_set_to_range_string(&c->cpu_set);
4391 fprintf(f, "%sCPUAffinity: %s\n", prefix, affinity);
94f04347
LP
4392 }
4393
b070c7c0
MS
4394 if (mpol_is_valid(numa_policy_get_type(&c->numa_policy))) {
4395 _cleanup_free_ char *nodes = NULL;
4396
4397 nodes = cpu_set_to_range_string(&c->numa_policy.nodes);
4398 fprintf(f, "%sNUMAPolicy: %s\n", prefix, mpol_to_string(numa_policy_get_type(&c->numa_policy)));
4399 fprintf(f, "%sNUMAMask: %s\n", prefix, strnull(nodes));
4400 }
4401
3a43da28 4402 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 4403 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
4404
4405 fprintf(f,
80876c20
LP
4406 "%sStandardInput: %s\n"
4407 "%sStandardOutput: %s\n"
4408 "%sStandardError: %s\n",
4409 prefix, exec_input_to_string(c->std_input),
4410 prefix, exec_output_to_string(c->std_output),
4411 prefix, exec_output_to_string(c->std_error));
4412
befc4a80
LP
4413 if (c->std_input == EXEC_INPUT_NAMED_FD)
4414 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4415 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4416 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4417 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4418 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4419
4420 if (c->std_input == EXEC_INPUT_FILE)
4421 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4422 if (c->std_output == EXEC_OUTPUT_FILE)
4423 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
566b7d23
ZD
4424 if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
4425 fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
befc4a80
LP
4426 if (c->std_error == EXEC_OUTPUT_FILE)
4427 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
566b7d23
ZD
4428 if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
4429 fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
befc4a80 4430
80876c20
LP
4431 if (c->tty_path)
4432 fprintf(f,
6ea832a2
LP
4433 "%sTTYPath: %s\n"
4434 "%sTTYReset: %s\n"
4435 "%sTTYVHangup: %s\n"
4436 "%sTTYVTDisallocate: %s\n",
4437 prefix, c->tty_path,
4438 prefix, yes_no(c->tty_reset),
4439 prefix, yes_no(c->tty_vhangup),
4440 prefix, yes_no(c->tty_vt_disallocate));
94f04347 4441
9f6444eb
LP
4442 if (IN_SET(c->std_output,
4443 EXEC_OUTPUT_SYSLOG,
4444 EXEC_OUTPUT_KMSG,
4445 EXEC_OUTPUT_JOURNAL,
4446 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4447 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4448 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4449 IN_SET(c->std_error,
4450 EXEC_OUTPUT_SYSLOG,
4451 EXEC_OUTPUT_KMSG,
4452 EXEC_OUTPUT_JOURNAL,
4453 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4454 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4455 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 4456
5ce70e5b 4457 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 4458
837df140
YW
4459 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4460 if (r >= 0)
4461 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 4462
837df140
YW
4463 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4464 if (r >= 0)
4465 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 4466 }
94f04347 4467
d3070fbd
LP
4468 if (c->log_level_max >= 0) {
4469 _cleanup_free_ char *t = NULL;
4470
4471 (void) log_level_to_string_alloc(c->log_level_max, &t);
4472
4473 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4474 }
4475
90fc172e
AZ
4476 if (c->log_rate_limit_interval_usec > 0) {
4477 char buf_timespan[FORMAT_TIMESPAN_MAX];
4478
4479 fprintf(f,
4480 "%sLogRateLimitIntervalSec: %s\n",
4481 prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_rate_limit_interval_usec, USEC_PER_SEC));
4482 }
4483
4484 if (c->log_rate_limit_burst > 0)
4485 fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_rate_limit_burst);
4486
d3070fbd
LP
4487 if (c->n_log_extra_fields > 0) {
4488 size_t j;
4489
4490 for (j = 0; j < c->n_log_extra_fields; j++) {
4491 fprintf(f, "%sLogExtraFields: ", prefix);
4492 fwrite(c->log_extra_fields[j].iov_base,
4493 1, c->log_extra_fields[j].iov_len,
4494 f);
4495 fputc('\n', f);
4496 }
4497 }
4498
07d46372
YW
4499 if (c->secure_bits) {
4500 _cleanup_free_ char *str = NULL;
4501
4502 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4503 if (r >= 0)
4504 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4505 }
94f04347 4506
a103496c 4507 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4508 _cleanup_free_ char *str = NULL;
94f04347 4509
dd1f5bd0
YW
4510 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4511 if (r >= 0)
4512 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4513 }
4514
4515 if (c->capability_ambient_set != 0) {
dd1f5bd0 4516 _cleanup_free_ char *str = NULL;
755d4b67 4517
dd1f5bd0
YW
4518 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4519 if (r >= 0)
4520 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4521 }
4522
4523 if (c->user)
f2d3769a 4524 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4525 if (c->group)
f2d3769a 4526 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4527
29206d46
LP
4528 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4529
ac6e8be6 4530 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4531 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4532 strv_fprintf(f, c->supplementary_groups);
4533 fputs("\n", f);
4534 }
94f04347 4535
5b6319dc 4536 if (c->pam_name)
f2d3769a 4537 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4538
58629001 4539 if (!strv_isempty(c->read_write_paths)) {
2a624c36
AP
4540 fprintf(f, "%sReadWritePaths:", prefix);
4541 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4542 fputs("\n", f);
4543 }
4544
58629001 4545 if (!strv_isempty(c->read_only_paths)) {
2a624c36
AP
4546 fprintf(f, "%sReadOnlyPaths:", prefix);
4547 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4548 fputs("\n", f);
4549 }
94f04347 4550
58629001 4551 if (!strv_isempty(c->inaccessible_paths)) {
2a624c36
AP
4552 fprintf(f, "%sInaccessiblePaths:", prefix);
4553 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4554 fputs("\n", f);
4555 }
2e22afe9 4556
d2d6c096 4557 if (c->n_bind_mounts > 0)
4ca763a9
YW
4558 for (i = 0; i < c->n_bind_mounts; i++)
4559 fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
d2d6c096 4560 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4ca763a9 4561 c->bind_mounts[i].ignore_enoent ? "-": "",
d2d6c096
LP
4562 c->bind_mounts[i].source,
4563 c->bind_mounts[i].destination,
4564 c->bind_mounts[i].recursive ? "rbind" : "norbind");
d2d6c096 4565
2abd4e38
YW
4566 if (c->n_temporary_filesystems > 0)
4567 for (i = 0; i < c->n_temporary_filesystems; i++) {
4568 TemporaryFileSystem *t = c->temporary_filesystems + i;
4569
4570 fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
4571 t->path,
4572 isempty(t->options) ? "" : ":",
4573 strempty(t->options));
4574 }
4575
169c1bda
LP
4576 if (c->utmp_id)
4577 fprintf(f,
4578 "%sUtmpIdentifier: %s\n",
4579 prefix, c->utmp_id);
7b52a628
MS
4580
4581 if (c->selinux_context)
4582 fprintf(f,
5f8640fb
LP
4583 "%sSELinuxContext: %s%s\n",
4584 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4585
80c21aea
WC
4586 if (c->apparmor_profile)
4587 fprintf(f,
4588 "%sAppArmorProfile: %s%s\n",
4589 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4590
4591 if (c->smack_process_label)
4592 fprintf(f,
4593 "%sSmackProcessLabel: %s%s\n",
4594 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4595
050f7277 4596 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4597 fprintf(f,
4598 "%sPersonality: %s\n",
4599 prefix, strna(personality_to_string(c->personality)));
4600
78e864e5
TM
4601 fprintf(f,
4602 "%sLockPersonality: %s\n",
4603 prefix, yes_no(c->lock_personality));
4604
17df7223 4605 if (c->syscall_filter) {
349cc4a5 4606#if HAVE_SECCOMP
17df7223 4607 Iterator j;
8cfa775f 4608 void *id, *val;
17df7223 4609 bool first = true;
351a19b1 4610#endif
17df7223
LP
4611
4612 fprintf(f,
57183d11 4613 "%sSystemCallFilter: ",
17df7223
LP
4614 prefix);
4615
4616 if (!c->syscall_whitelist)
4617 fputc('~', f);
4618
349cc4a5 4619#if HAVE_SECCOMP
8cfa775f 4620 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
17df7223 4621 _cleanup_free_ char *name = NULL;
8cfa775f
YW
4622 const char *errno_name = NULL;
4623 int num = PTR_TO_INT(val);
17df7223
LP
4624
4625 if (first)
4626 first = false;
4627 else
4628 fputc(' ', f);
4629
57183d11 4630 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223 4631 fputs(strna(name), f);
8cfa775f
YW
4632
4633 if (num >= 0) {
4634 errno_name = errno_to_name(num);
4635 if (errno_name)
4636 fprintf(f, ":%s", errno_name);
4637 else
4638 fprintf(f, ":%d", num);
4639 }
17df7223 4640 }
351a19b1 4641#endif
17df7223
LP
4642
4643 fputc('\n', f);
4644 }
4645
57183d11 4646 if (c->syscall_archs) {
349cc4a5 4647#if HAVE_SECCOMP
57183d11
LP
4648 Iterator j;
4649 void *id;
4650#endif
4651
4652 fprintf(f,
4653 "%sSystemCallArchitectures:",
4654 prefix);
4655
349cc4a5 4656#if HAVE_SECCOMP
57183d11
LP
4657 SET_FOREACH(id, c->syscall_archs, j)
4658 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4659#endif
4660 fputc('\n', f);
4661 }
4662
add00535
LP
4663 if (exec_context_restrict_namespaces_set(c)) {
4664 _cleanup_free_ char *s = NULL;
4665
86c2a9f1 4666 r = namespace_flags_to_string(c->restrict_namespaces, &s);
add00535
LP
4667 if (r >= 0)
4668 fprintf(f, "%sRestrictNamespaces: %s\n",
4669 prefix, s);
4670 }
4671
a8d08f39
LP
4672 if (c->network_namespace_path)
4673 fprintf(f,
4674 "%sNetworkNamespacePath: %s\n",
4675 prefix, c->network_namespace_path);
4676
3df90f24
YW
4677 if (c->syscall_errno > 0) {
4678 const char *errno_name;
4679
4680 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4681
4682 errno_name = errno_to_name(c->syscall_errno);
4683 if (errno_name)
4684 fprintf(f, "%s\n", errno_name);
4685 else
4686 fprintf(f, "%d\n", c->syscall_errno);
4687 }
5cb5a6ff
LP
4688}
4689
34cf6c43 4690bool exec_context_maintains_privileges(const ExecContext *c) {
a931ad47
LP
4691 assert(c);
4692
61233823 4693 /* Returns true if the process forked off would run under
a931ad47
LP
4694 * an unchanged UID or as root. */
4695
4696 if (!c->user)
4697 return true;
4698
4699 if (streq(c->user, "root") || streq(c->user, "0"))
4700 return true;
4701
4702 return false;
4703}
4704
34cf6c43 4705int exec_context_get_effective_ioprio(const ExecContext *c) {
7f452159
LP
4706 int p;
4707
4708 assert(c);
4709
4710 if (c->ioprio_set)
4711 return c->ioprio;
4712
4713 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4714 if (p < 0)
4715 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4716
4717 return p;
4718}
4719
d3070fbd
LP
4720void exec_context_free_log_extra_fields(ExecContext *c) {
4721 size_t l;
4722
4723 assert(c);
4724
4725 for (l = 0; l < c->n_log_extra_fields; l++)
4726 free(c->log_extra_fields[l].iov_base);
4727 c->log_extra_fields = mfree(c->log_extra_fields);
4728 c->n_log_extra_fields = 0;
4729}
4730
6f765baf
LP
4731void exec_context_revert_tty(ExecContext *c) {
4732 int r;
4733
4734 assert(c);
4735
4736 /* First, reset the TTY (possibly kicking everybody else from the TTY) */
4737 exec_context_tty_reset(c, NULL);
4738
4739 /* And then undo what chown_terminal() did earlier. Note that we only do this if we have a path
4740 * configured. If the TTY was passed to us as file descriptor we assume the TTY is opened and managed
4741 * by whoever passed it to us and thus knows better when and how to chmod()/chown() it back. */
4742
4743 if (exec_context_may_touch_tty(c)) {
4744 const char *path;
4745
4746 path = exec_context_tty_path(c);
4747 if (path) {
4748 r = chmod_and_chown(path, TTY_MODE, 0, TTY_GID);
4749 if (r < 0 && r != -ENOENT)
4750 log_warning_errno(r, "Failed to reset TTY ownership/access mode of %s, ignoring: %m", path);
4751 }
4752 }
4753}
4754
b58b4116 4755void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 4756 assert(s);
5cb5a6ff 4757
2ed26ed0
LP
4758 *s = (ExecStatus) {
4759 .pid = pid,
4760 };
4761
b58b4116
LP
4762 dual_timestamp_get(&s->start_timestamp);
4763}
4764
34cf6c43 4765void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
4766 assert(s);
4767
2ed26ed0
LP
4768 if (s->pid != pid) {
4769 *s = (ExecStatus) {
4770 .pid = pid,
4771 };
4772 }
b58b4116 4773
63983207 4774 dual_timestamp_get(&s->exit_timestamp);
9fb86720 4775
034c6ed7
LP
4776 s->code = code;
4777 s->status = status;
169c1bda 4778
6f765baf
LP
4779 if (context && context->utmp_id)
4780 (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
9fb86720
LP
4781}
4782
6a1d4d9f
LP
4783void exec_status_reset(ExecStatus *s) {
4784 assert(s);
4785
4786 *s = (ExecStatus) {};
4787}
4788
34cf6c43 4789void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
9fb86720
LP
4790 char buf[FORMAT_TIMESTAMP_MAX];
4791
4792 assert(s);
4793 assert(f);
4794
9fb86720
LP
4795 if (s->pid <= 0)
4796 return;
4797
4c940960
LP
4798 prefix = strempty(prefix);
4799
9fb86720 4800 fprintf(f,
ccd06097
ZJS
4801 "%sPID: "PID_FMT"\n",
4802 prefix, s->pid);
9fb86720 4803
af9d16e1 4804 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
4805 fprintf(f,
4806 "%sStart Timestamp: %s\n",
63983207 4807 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 4808
af9d16e1 4809 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
4810 fprintf(f,
4811 "%sExit Timestamp: %s\n"
4812 "%sExit Code: %s\n"
4813 "%sExit Status: %i\n",
63983207 4814 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
4815 prefix, sigchld_code_to_string(s->code),
4816 prefix, s->status);
5cb5a6ff 4817}
44d8db9e 4818
34cf6c43 4819static char *exec_command_line(char **argv) {
44d8db9e
LP
4820 size_t k;
4821 char *n, *p, **a;
4822 bool first = true;
4823
9e2f7c11 4824 assert(argv);
44d8db9e 4825
9164977d 4826 k = 1;
9e2f7c11 4827 STRV_FOREACH(a, argv)
44d8db9e
LP
4828 k += strlen(*a)+3;
4829
5cd9cd35
LP
4830 n = new(char, k);
4831 if (!n)
44d8db9e
LP
4832 return NULL;
4833
4834 p = n;
9e2f7c11 4835 STRV_FOREACH(a, argv) {
44d8db9e
LP
4836
4837 if (!first)
4838 *(p++) = ' ';
4839 else
4840 first = false;
4841
4842 if (strpbrk(*a, WHITESPACE)) {
4843 *(p++) = '\'';
4844 p = stpcpy(p, *a);
4845 *(p++) = '\'';
4846 } else
4847 p = stpcpy(p, *a);
4848
4849 }
4850
9164977d
LP
4851 *p = 0;
4852
44d8db9e
LP
4853 /* FIXME: this doesn't really handle arguments that have
4854 * spaces and ticks in them */
4855
4856 return n;
4857}
4858
34cf6c43 4859static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 4860 _cleanup_free_ char *cmd = NULL;
4c940960 4861 const char *prefix2;
44d8db9e
LP
4862
4863 assert(c);
4864 assert(f);
4865
4c940960 4866 prefix = strempty(prefix);
63c372cb 4867 prefix2 = strjoina(prefix, "\t");
44d8db9e 4868
9e2f7c11 4869 cmd = exec_command_line(c->argv);
44d8db9e
LP
4870 fprintf(f,
4871 "%sCommand Line: %s\n",
4872 prefix, cmd ? cmd : strerror(ENOMEM));
4873
9fb86720 4874 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
4875}
4876
4877void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4878 assert(f);
4879
4c940960 4880 prefix = strempty(prefix);
44d8db9e
LP
4881
4882 LIST_FOREACH(command, c, c)
4883 exec_command_dump(c, f, prefix);
4884}
94f04347 4885
a6a80b4f
LP
4886void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4887 ExecCommand *end;
4888
4889 assert(l);
4890 assert(e);
4891
4892 if (*l) {
35b8ca3a 4893 /* It's kind of important, that we keep the order here */
71fda00f
LP
4894 LIST_FIND_TAIL(command, *l, end);
4895 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
4896 } else
4897 *l = e;
4898}
4899
26fd040d
LP
4900int exec_command_set(ExecCommand *c, const char *path, ...) {
4901 va_list ap;
4902 char **l, *p;
4903
4904 assert(c);
4905 assert(path);
4906
4907 va_start(ap, path);
4908 l = strv_new_ap(path, ap);
4909 va_end(ap);
4910
4911 if (!l)
4912 return -ENOMEM;
4913
250a918d
LP
4914 p = strdup(path);
4915 if (!p) {
26fd040d
LP
4916 strv_free(l);
4917 return -ENOMEM;
4918 }
4919
6897dfe8 4920 free_and_replace(c->path, p);
26fd040d 4921
130d3d22 4922 return strv_free_and_replace(c->argv, l);
26fd040d
LP
4923}
4924
86b23b07 4925int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 4926 _cleanup_strv_free_ char **l = NULL;
86b23b07 4927 va_list ap;
86b23b07
JS
4928 int r;
4929
4930 assert(c);
4931 assert(path);
4932
4933 va_start(ap, path);
4934 l = strv_new_ap(path, ap);
4935 va_end(ap);
4936
4937 if (!l)
4938 return -ENOMEM;
4939
e287086b 4940 r = strv_extend_strv(&c->argv, l, false);
e63ff941 4941 if (r < 0)
86b23b07 4942 return r;
86b23b07
JS
4943
4944 return 0;
4945}
4946
e8a565cb
YW
4947static void *remove_tmpdir_thread(void *p) {
4948 _cleanup_free_ char *path = p;
86b23b07 4949
e8a565cb
YW
4950 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
4951 return NULL;
4952}
4953
4954static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
4955 int r;
4956
4957 if (!rt)
4958 return NULL;
4959
4960 if (rt->manager)
4961 (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
4962
4963 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
4964 if (destroy && rt->tmp_dir) {
4965 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
4966
4967 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4968 if (r < 0) {
4969 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
4970 free(rt->tmp_dir);
4971 }
4972
4973 rt->tmp_dir = NULL;
4974 }
613b411c 4975
e8a565cb
YW
4976 if (destroy && rt->var_tmp_dir) {
4977 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
4978
4979 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4980 if (r < 0) {
4981 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
4982 free(rt->var_tmp_dir);
4983 }
4984
4985 rt->var_tmp_dir = NULL;
4986 }
4987
4988 rt->id = mfree(rt->id);
4989 rt->tmp_dir = mfree(rt->tmp_dir);
4990 rt->var_tmp_dir = mfree(rt->var_tmp_dir);
4991 safe_close_pair(rt->netns_storage_socket);
4992 return mfree(rt);
4993}
4994
4995static void exec_runtime_freep(ExecRuntime **rt) {
da6bc6ed 4996 (void) exec_runtime_free(*rt, false);
e8a565cb
YW
4997}
4998
8e8009dc
LP
4999static int exec_runtime_allocate(ExecRuntime **ret) {
5000 ExecRuntime *n;
613b411c 5001
8e8009dc 5002 assert(ret);
613b411c 5003
8e8009dc
LP
5004 n = new(ExecRuntime, 1);
5005 if (!n)
613b411c
LP
5006 return -ENOMEM;
5007
8e8009dc
LP
5008 *n = (ExecRuntime) {
5009 .netns_storage_socket = { -1, -1 },
5010 };
5011
5012 *ret = n;
613b411c
LP
5013 return 0;
5014}
5015
e8a565cb
YW
5016static int exec_runtime_add(
5017 Manager *m,
5018 const char *id,
5019 const char *tmp_dir,
5020 const char *var_tmp_dir,
5021 const int netns_storage_socket[2],
5022 ExecRuntime **ret) {
5023
5024 _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
613b411c
LP
5025 int r;
5026
e8a565cb 5027 assert(m);
613b411c
LP
5028 assert(id);
5029
e8a565cb
YW
5030 r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
5031 if (r < 0)
5032 return r;
613b411c 5033
e8a565cb 5034 r = exec_runtime_allocate(&rt);
613b411c
LP
5035 if (r < 0)
5036 return r;
5037
e8a565cb
YW
5038 rt->id = strdup(id);
5039 if (!rt->id)
5040 return -ENOMEM;
5041
5042 if (tmp_dir) {
5043 rt->tmp_dir = strdup(tmp_dir);
5044 if (!rt->tmp_dir)
5045 return -ENOMEM;
5046
5047 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
5048 assert(var_tmp_dir);
5049 rt->var_tmp_dir = strdup(var_tmp_dir);
5050 if (!rt->var_tmp_dir)
5051 return -ENOMEM;
5052 }
5053
5054 if (netns_storage_socket) {
5055 rt->netns_storage_socket[0] = netns_storage_socket[0];
5056 rt->netns_storage_socket[1] = netns_storage_socket[1];
613b411c
LP
5057 }
5058
e8a565cb
YW
5059 r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
5060 if (r < 0)
5061 return r;
5062
5063 rt->manager = m;
5064
5065 if (ret)
5066 *ret = rt;
5067
5068 /* do not remove created ExecRuntime object when the operation succeeds. */
5069 rt = NULL;
5070 return 0;
5071}
5072
5073static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
5074 _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
2fa3742d 5075 _cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
e8a565cb
YW
5076 int r;
5077
5078 assert(m);
5079 assert(c);
5080 assert(id);
5081
5082 /* It is not necessary to create ExecRuntime object. */
a8d08f39 5083 if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
e8a565cb
YW
5084 return 0;
5085
5086 if (c->private_tmp) {
5087 r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
613b411c
LP
5088 if (r < 0)
5089 return r;
5090 }
5091
a8d08f39 5092 if (c->private_network || c->network_namespace_path) {
e8a565cb
YW
5093 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
5094 return -errno;
5095 }
5096
5097 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
5098 if (r < 0)
5099 return r;
5100
5101 /* Avoid cleanup */
2fa3742d 5102 netns_storage_socket[0] = netns_storage_socket[1] = -1;
613b411c
LP
5103 return 1;
5104}
5105
e8a565cb
YW
5106int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
5107 ExecRuntime *rt;
5108 int r;
613b411c 5109
e8a565cb
YW
5110 assert(m);
5111 assert(id);
5112 assert(ret);
5113
5114 rt = hashmap_get(m->exec_runtime_by_id, id);
5115 if (rt)
5116 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
5117 goto ref;
5118
5119 if (!create)
5120 return 0;
5121
5122 /* If not found, then create a new object. */
5123 r = exec_runtime_make(m, c, id, &rt);
5124 if (r <= 0)
5125 /* When r == 0, it is not necessary to create ExecRuntime object. */
5126 return r;
613b411c 5127
e8a565cb
YW
5128ref:
5129 /* increment reference counter. */
5130 rt->n_ref++;
5131 *ret = rt;
5132 return 1;
5133}
613b411c 5134
e8a565cb
YW
5135ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
5136 if (!rt)
613b411c
LP
5137 return NULL;
5138
e8a565cb 5139 assert(rt->n_ref > 0);
613b411c 5140
e8a565cb
YW
5141 rt->n_ref--;
5142 if (rt->n_ref > 0)
f2341e0a
LP
5143 return NULL;
5144
e8a565cb 5145 return exec_runtime_free(rt, destroy);
613b411c
LP
5146}
5147
e8a565cb
YW
5148int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
5149 ExecRuntime *rt;
5150 Iterator i;
5151
5152 assert(m);
613b411c
LP
5153 assert(f);
5154 assert(fds);
5155
e8a565cb
YW
5156 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5157 fprintf(f, "exec-runtime=%s", rt->id);
613b411c 5158
e8a565cb
YW
5159 if (rt->tmp_dir)
5160 fprintf(f, " tmp-dir=%s", rt->tmp_dir);
613b411c 5161
e8a565cb
YW
5162 if (rt->var_tmp_dir)
5163 fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
613b411c 5164
e8a565cb
YW
5165 if (rt->netns_storage_socket[0] >= 0) {
5166 int copy;
613b411c 5167
e8a565cb
YW
5168 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
5169 if (copy < 0)
5170 return copy;
613b411c 5171
e8a565cb
YW
5172 fprintf(f, " netns-socket-0=%i", copy);
5173 }
613b411c 5174
e8a565cb
YW
5175 if (rt->netns_storage_socket[1] >= 0) {
5176 int copy;
613b411c 5177
e8a565cb
YW
5178 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
5179 if (copy < 0)
5180 return copy;
613b411c 5181
e8a565cb
YW
5182 fprintf(f, " netns-socket-1=%i", copy);
5183 }
5184
5185 fputc('\n', f);
613b411c
LP
5186 }
5187
5188 return 0;
5189}
5190
e8a565cb
YW
5191int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
5192 _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
5193 ExecRuntime *rt;
613b411c
LP
5194 int r;
5195
e8a565cb
YW
5196 /* This is for the migration from old (v237 or earlier) deserialization text.
5197 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
5198 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
5199 * so or not from the serialized text, then we always creates a new object owned by this. */
5200
5201 assert(u);
613b411c
LP
5202 assert(key);
5203 assert(value);
5204
e8a565cb
YW
5205 /* Manager manages ExecRuntime objects by the unit id.
5206 * So, we omit the serialized text when the unit does not have id (yet?)... */
5207 if (isempty(u->id)) {
5208 log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
5209 return 0;
5210 }
613b411c 5211
e8a565cb
YW
5212 r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
5213 if (r < 0) {
5214 log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
5215 return 0;
5216 }
5217
5218 rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
5219 if (!rt) {
5220 r = exec_runtime_allocate(&rt_create);
613b411c 5221 if (r < 0)
f2341e0a 5222 return log_oom();
613b411c 5223
e8a565cb
YW
5224 rt_create->id = strdup(u->id);
5225 if (!rt_create->id)
5226 return log_oom();
5227
5228 rt = rt_create;
5229 }
5230
5231 if (streq(key, "tmp-dir")) {
5232 char *copy;
5233
613b411c
LP
5234 copy = strdup(value);
5235 if (!copy)
5236 return log_oom();
5237
e8a565cb 5238 free_and_replace(rt->tmp_dir, copy);
613b411c
LP
5239
5240 } else if (streq(key, "var-tmp-dir")) {
5241 char *copy;
5242
613b411c
LP
5243 copy = strdup(value);
5244 if (!copy)
5245 return log_oom();
5246
e8a565cb 5247 free_and_replace(rt->var_tmp_dir, copy);
613b411c
LP
5248
5249 } else if (streq(key, "netns-socket-0")) {
5250 int fd;
5251
e8a565cb 5252 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5253 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5254 return 0;
613b411c 5255 }
e8a565cb
YW
5256
5257 safe_close(rt->netns_storage_socket[0]);
5258 rt->netns_storage_socket[0] = fdset_remove(fds, fd);
5259
613b411c
LP
5260 } else if (streq(key, "netns-socket-1")) {
5261 int fd;
5262
e8a565cb 5263 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5264 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5265 return 0;
613b411c 5266 }
e8a565cb
YW
5267
5268 safe_close(rt->netns_storage_socket[1]);
5269 rt->netns_storage_socket[1] = fdset_remove(fds, fd);
613b411c
LP
5270 } else
5271 return 0;
5272
e8a565cb
YW
5273 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
5274 if (rt_create) {
5275 r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
5276 if (r < 0) {
3fe91079 5277 log_unit_debug_errno(u, r, "Failed to put runtime parameter to manager's storage: %m");
e8a565cb
YW
5278 return 0;
5279 }
613b411c 5280
e8a565cb 5281 rt_create->manager = u->manager;
613b411c 5282
e8a565cb
YW
5283 /* Avoid cleanup */
5284 rt_create = NULL;
5285 }
98b47d54 5286
e8a565cb
YW
5287 return 1;
5288}
613b411c 5289
e8a565cb
YW
5290void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
5291 char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
5292 int r, fd0 = -1, fd1 = -1;
5293 const char *p, *v = value;
5294 size_t n;
613b411c 5295
e8a565cb
YW
5296 assert(m);
5297 assert(value);
5298 assert(fds);
98b47d54 5299
e8a565cb
YW
5300 n = strcspn(v, " ");
5301 id = strndupa(v, n);
5302 if (v[n] != ' ')
5303 goto finalize;
5304 p = v + n + 1;
5305
5306 v = startswith(p, "tmp-dir=");
5307 if (v) {
5308 n = strcspn(v, " ");
5309 tmp_dir = strndupa(v, n);
5310 if (v[n] != ' ')
5311 goto finalize;
5312 p = v + n + 1;
5313 }
5314
5315 v = startswith(p, "var-tmp-dir=");
5316 if (v) {
5317 n = strcspn(v, " ");
5318 var_tmp_dir = strndupa(v, n);
5319 if (v[n] != ' ')
5320 goto finalize;
5321 p = v + n + 1;
5322 }
5323
5324 v = startswith(p, "netns-socket-0=");
5325 if (v) {
5326 char *buf;
5327
5328 n = strcspn(v, " ");
5329 buf = strndupa(v, n);
5330 if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
5331 log_debug("Unable to process exec-runtime netns fd specification.");
5332 return;
98b47d54 5333 }
e8a565cb
YW
5334 fd0 = fdset_remove(fds, fd0);
5335 if (v[n] != ' ')
5336 goto finalize;
5337 p = v + n + 1;
613b411c
LP
5338 }
5339
e8a565cb
YW
5340 v = startswith(p, "netns-socket-1=");
5341 if (v) {
5342 char *buf;
98b47d54 5343
e8a565cb
YW
5344 n = strcspn(v, " ");
5345 buf = strndupa(v, n);
5346 if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
5347 log_debug("Unable to process exec-runtime netns fd specification.");
5348 return;
98b47d54 5349 }
e8a565cb
YW
5350 fd1 = fdset_remove(fds, fd1);
5351 }
98b47d54 5352
e8a565cb
YW
5353finalize:
5354
5355 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
7d853ca6 5356 if (r < 0)
e8a565cb 5357 log_debug_errno(r, "Failed to add exec-runtime: %m");
e8a565cb 5358}
613b411c 5359
e8a565cb
YW
5360void exec_runtime_vacuum(Manager *m) {
5361 ExecRuntime *rt;
5362 Iterator i;
5363
5364 assert(m);
5365
5366 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
5367
5368 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5369 if (rt->n_ref > 0)
5370 continue;
5371
5372 (void) exec_runtime_free(rt, false);
5373 }
613b411c
LP
5374}
5375
b9c04eaf
YW
5376void exec_params_clear(ExecParameters *p) {
5377 if (!p)
5378 return;
5379
5380 strv_free(p->environment);
5381}
5382
80876c20
LP
5383static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
5384 [EXEC_INPUT_NULL] = "null",
5385 [EXEC_INPUT_TTY] = "tty",
5386 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 5387 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
5388 [EXEC_INPUT_SOCKET] = "socket",
5389 [EXEC_INPUT_NAMED_FD] = "fd",
08f3be7a 5390 [EXEC_INPUT_DATA] = "data",
2038c3f5 5391 [EXEC_INPUT_FILE] = "file",
80876c20
LP
5392};
5393
8a0867d6
LP
5394DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
5395
94f04347 5396static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 5397 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 5398 [EXEC_OUTPUT_NULL] = "null",
80876c20 5399 [EXEC_OUTPUT_TTY] = "tty",
94f04347 5400 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 5401 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 5402 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 5403 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
5404 [EXEC_OUTPUT_JOURNAL] = "journal",
5405 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
5406 [EXEC_OUTPUT_SOCKET] = "socket",
5407 [EXEC_OUTPUT_NAMED_FD] = "fd",
2038c3f5 5408 [EXEC_OUTPUT_FILE] = "file",
566b7d23 5409 [EXEC_OUTPUT_FILE_APPEND] = "append",
94f04347
LP
5410};
5411
5412DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
5413
5414static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
5415 [EXEC_UTMP_INIT] = "init",
5416 [EXEC_UTMP_LOGIN] = "login",
5417 [EXEC_UTMP_USER] = "user",
5418};
5419
5420DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
5421
5422static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
5423 [EXEC_PRESERVE_NO] = "no",
5424 [EXEC_PRESERVE_YES] = "yes",
5425 [EXEC_PRESERVE_RESTART] = "restart",
5426};
5427
5428DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 5429
72fd1768 5430static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
5431 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
5432 [EXEC_DIRECTORY_STATE] = "StateDirectory",
5433 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
5434 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
5435 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
5436};
5437
5438DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445 5439
fb2042dd
YW
5440static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5441 [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
5442 [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
5443 [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
5444 [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
5445 [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
5446};
5447
5448DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType);
5449
b1edf445
LP
5450static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
5451 [EXEC_KEYRING_INHERIT] = "inherit",
5452 [EXEC_KEYRING_PRIVATE] = "private",
5453 [EXEC_KEYRING_SHARED] = "shared",
5454};
5455
5456DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);