]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
tree-wide: fix spelling errors
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
a7334b09 2
034c6ed7
LP
3#include <errno.h>
4#include <fcntl.h>
8dd4c05b 5#include <poll.h>
d251207d 6#include <sys/eventfd.h>
f5947a5e 7#include <sys/ioctl.h>
f3e43635 8#include <sys/mman.h>
8dd4c05b 9#include <sys/personality.h>
94f04347 10#include <sys/prctl.h>
d2ffa389 11#include <sys/shm.h>
d2ffa389 12#include <sys/types.h>
8dd4c05b
LP
13#include <sys/un.h>
14#include <unistd.h>
023a4f67 15#include <utmpx.h>
5cb5a6ff 16
349cc4a5 17#if HAVE_PAM
5b6319dc
LP
18#include <security/pam_appl.h>
19#endif
20
349cc4a5 21#if HAVE_SELINUX
7b52a628
MS
22#include <selinux/selinux.h>
23#endif
24
349cc4a5 25#if HAVE_SECCOMP
17df7223
LP
26#include <seccomp.h>
27#endif
28
349cc4a5 29#if HAVE_APPARMOR
eef65bf3
MS
30#include <sys/apparmor.h>
31#endif
32
24882e06 33#include "sd-messages.h"
8dd4c05b
LP
34
35#include "af-list.h"
b5efdb8a 36#include "alloc-util.h"
349cc4a5 37#if HAVE_APPARMOR
3ffd4af2
LP
38#include "apparmor-util.h"
39#endif
8dd4c05b
LP
40#include "async.h"
41#include "barrier.h"
8dd4c05b 42#include "cap-list.h"
430f0182 43#include "capability-util.h"
a1164ae3 44#include "chown-recursive.h"
fdb3deca 45#include "cgroup-setup.h"
da681e1b 46#include "cpu-set-util.h"
f6a6225e 47#include "def.h"
686d13b9 48#include "env-file.h"
4d1a6904 49#include "env-util.h"
17df7223 50#include "errno-list.h"
3ffd4af2 51#include "execute.h"
8dd4c05b 52#include "exit-status.h"
3ffd4af2 53#include "fd-util.h"
f97b34a6 54#include "format-util.h"
f4f15635 55#include "fs-util.h"
7d50b32a 56#include "glob-util.h"
c004493c 57#include "io-util.h"
8dd4c05b 58#include "ioprio.h"
a1164ae3 59#include "label.h"
8dd4c05b
LP
60#include "log.h"
61#include "macro.h"
e8a565cb 62#include "manager.h"
0a970718 63#include "memory-util.h"
f5947a5e 64#include "missing_fs.h"
8dd4c05b
LP
65#include "mkdir.h"
66#include "namespace.h"
6bedfcbb 67#include "parse-util.h"
8dd4c05b 68#include "path-util.h"
0b452006 69#include "process-util.h"
78f22b97 70#include "rlimit-util.h"
8dd4c05b 71#include "rm-rf.h"
349cc4a5 72#if HAVE_SECCOMP
3ffd4af2
LP
73#include "seccomp-util.h"
74#endif
07d46372 75#include "securebits-util.h"
8dd4c05b 76#include "selinux-util.h"
24882e06 77#include "signal-util.h"
8dd4c05b 78#include "smack-util.h"
57b7a260 79#include "socket-util.h"
fd63e712 80#include "special.h"
949befd3 81#include "stat-util.h"
8b43440b 82#include "string-table.h"
07630cea 83#include "string-util.h"
8dd4c05b 84#include "strv.h"
7ccbd1ae 85#include "syslog-util.h"
8dd4c05b 86#include "terminal-util.h"
566b7d23 87#include "umask-util.h"
8dd4c05b 88#include "unit.h"
b1d4f8e1 89#include "user-util.h"
8dd4c05b 90#include "utmp-wtmp.h"
5cb5a6ff 91
e056b01d 92#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 93#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 94
531dca78
LP
95#define SNDBUF_SIZE (8*1024*1024)
96
da6053d0 97static int shift_fds(int fds[], size_t n_fds) {
034c6ed7
LP
98 int start, restart_from;
99
100 if (n_fds <= 0)
101 return 0;
102
a0d40ac5
LP
103 /* Modifies the fds array! (sorts it) */
104
034c6ed7
LP
105 assert(fds);
106
107 start = 0;
108 for (;;) {
109 int i;
110
111 restart_from = -1;
112
113 for (i = start; i < (int) n_fds; i++) {
114 int nfd;
115
116 /* Already at right index? */
117 if (fds[i] == i+3)
118 continue;
119
3cc2aff1
LP
120 nfd = fcntl(fds[i], F_DUPFD, i + 3);
121 if (nfd < 0)
034c6ed7
LP
122 return -errno;
123
03e334a1 124 safe_close(fds[i]);
034c6ed7
LP
125 fds[i] = nfd;
126
127 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 128 * let's remember that and try again from here */
034c6ed7
LP
129 if (nfd != i+3 && restart_from < 0)
130 restart_from = i;
131 }
132
133 if (restart_from < 0)
134 break;
135
136 start = restart_from;
137 }
138
139 return 0;
140}
141
25b583d7 142static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
da6053d0 143 size_t i, n_fds;
e2c76839 144 int r;
47a71eed 145
25b583d7 146 n_fds = n_socket_fds + n_storage_fds;
47a71eed
LP
147 if (n_fds <= 0)
148 return 0;
149
150 assert(fds);
151
9b141911
FB
152 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
153 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
154
155 for (i = 0; i < n_fds; i++) {
47a71eed 156
9b141911
FB
157 if (i < n_socket_fds) {
158 r = fd_nonblock(fds[i], nonblock);
159 if (r < 0)
160 return r;
161 }
47a71eed 162
451a074f
LP
163 /* We unconditionally drop FD_CLOEXEC from the fds,
164 * since after all we want to pass these fds to our
165 * children */
47a71eed 166
3cc2aff1
LP
167 r = fd_cloexec(fds[i], false);
168 if (r < 0)
e2c76839 169 return r;
47a71eed
LP
170 }
171
172 return 0;
173}
174
1e22b5cd 175static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
176 assert(context);
177
1e22b5cd
LP
178 if (context->stdio_as_fds)
179 return NULL;
180
80876c20
LP
181 if (context->tty_path)
182 return context->tty_path;
183
184 return "/dev/console";
185}
186
1e22b5cd
LP
187static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
188 const char *path;
189
6ea832a2
LP
190 assert(context);
191
1e22b5cd 192 path = exec_context_tty_path(context);
6ea832a2 193
1e22b5cd
LP
194 if (context->tty_vhangup) {
195 if (p && p->stdin_fd >= 0)
196 (void) terminal_vhangup_fd(p->stdin_fd);
197 else if (path)
198 (void) terminal_vhangup(path);
199 }
6ea832a2 200
1e22b5cd
LP
201 if (context->tty_reset) {
202 if (p && p->stdin_fd >= 0)
203 (void) reset_terminal_fd(p->stdin_fd, true);
204 else if (path)
205 (void) reset_terminal(path);
206 }
207
208 if (context->tty_vt_disallocate && path)
209 (void) vt_disallocate(path);
6ea832a2
LP
210}
211
6af760f3
LP
212static bool is_terminal_input(ExecInput i) {
213 return IN_SET(i,
214 EXEC_INPUT_TTY,
215 EXEC_INPUT_TTY_FORCE,
216 EXEC_INPUT_TTY_FAIL);
217}
218
3a1286b6 219static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
220 return IN_SET(o,
221 EXEC_OUTPUT_TTY,
222 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
223 EXEC_OUTPUT_KMSG_AND_CONSOLE,
224 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
225}
226
aac8c0c3
LP
227static bool is_syslog_output(ExecOutput o) {
228 return IN_SET(o,
229 EXEC_OUTPUT_SYSLOG,
230 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
231}
232
233static bool is_kmsg_output(ExecOutput o) {
234 return IN_SET(o,
235 EXEC_OUTPUT_KMSG,
236 EXEC_OUTPUT_KMSG_AND_CONSOLE);
237}
238
6af760f3
LP
239static bool exec_context_needs_term(const ExecContext *c) {
240 assert(c);
241
242 /* Return true if the execution context suggests we should set $TERM to something useful. */
243
244 if (is_terminal_input(c->std_input))
245 return true;
246
247 if (is_terminal_output(c->std_output))
248 return true;
249
250 if (is_terminal_output(c->std_error))
251 return true;
252
253 return !!c->tty_path;
3a1286b6
MS
254}
255
80876c20 256static int open_null_as(int flags, int nfd) {
046a82c1 257 int fd;
071830ff 258
80876c20 259 assert(nfd >= 0);
071830ff 260
613b411c
LP
261 fd = open("/dev/null", flags|O_NOCTTY);
262 if (fd < 0)
071830ff
LP
263 return -errno;
264
046a82c1 265 return move_fd(fd, nfd, false);
071830ff
LP
266}
267
91dd5f7c
LP
268static int connect_journal_socket(
269 int fd,
270 const char *log_namespace,
271 uid_t uid,
272 gid_t gid) {
273
f36a9d59
ZJS
274 union sockaddr_union sa;
275 socklen_t sa_len;
524daa8c
ZJS
276 uid_t olduid = UID_INVALID;
277 gid_t oldgid = GID_INVALID;
91dd5f7c 278 const char *j;
524daa8c
ZJS
279 int r;
280
91dd5f7c
LP
281 j = log_namespace ?
282 strjoina("/run/systemd/journal.", log_namespace, "/stdout") :
283 "/run/systemd/journal/stdout";
284 r = sockaddr_un_set_path(&sa.un, j);
285 if (r < 0)
286 return r;
f36a9d59 287 sa_len = r;
91dd5f7c 288
cad93f29 289 if (gid_is_valid(gid)) {
524daa8c
ZJS
290 oldgid = getgid();
291
92a17af9 292 if (setegid(gid) < 0)
524daa8c
ZJS
293 return -errno;
294 }
295
cad93f29 296 if (uid_is_valid(uid)) {
524daa8c
ZJS
297 olduid = getuid();
298
92a17af9 299 if (seteuid(uid) < 0) {
524daa8c
ZJS
300 r = -errno;
301 goto restore_gid;
302 }
303 }
304
f36a9d59 305 r = connect(fd, &sa.sa, sa_len) < 0 ? -errno : 0;
524daa8c
ZJS
306
307 /* If we fail to restore the uid or gid, things will likely
308 fail later on. This should only happen if an LSM interferes. */
309
cad93f29 310 if (uid_is_valid(uid))
524daa8c
ZJS
311 (void) seteuid(olduid);
312
313 restore_gid:
cad93f29 314 if (gid_is_valid(gid))
524daa8c
ZJS
315 (void) setegid(oldgid);
316
317 return r;
318}
319
fd1f9c89 320static int connect_logger_as(
34cf6c43 321 const Unit *unit,
fd1f9c89 322 const ExecContext *context,
af635cf3 323 const ExecParameters *params,
fd1f9c89
LP
324 ExecOutput output,
325 const char *ident,
fd1f9c89
LP
326 int nfd,
327 uid_t uid,
328 gid_t gid) {
329
2ac1ff68
EV
330 _cleanup_close_ int fd = -1;
331 int r;
071830ff
LP
332
333 assert(context);
af635cf3 334 assert(params);
80876c20
LP
335 assert(output < _EXEC_OUTPUT_MAX);
336 assert(ident);
337 assert(nfd >= 0);
071830ff 338
54fe0cdb
LP
339 fd = socket(AF_UNIX, SOCK_STREAM, 0);
340 if (fd < 0)
80876c20 341 return -errno;
071830ff 342
91dd5f7c 343 r = connect_journal_socket(fd, context->log_namespace, uid, gid);
524daa8c
ZJS
344 if (r < 0)
345 return r;
071830ff 346
2ac1ff68 347 if (shutdown(fd, SHUT_RD) < 0)
80876c20 348 return -errno;
071830ff 349
fd1f9c89 350 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 351
2ac1ff68 352 if (dprintf(fd,
62bca2c6 353 "%s\n"
80876c20
LP
354 "%s\n"
355 "%i\n"
54fe0cdb
LP
356 "%i\n"
357 "%i\n"
358 "%i\n"
4f4a1dbf 359 "%i\n",
c867611e 360 context->syslog_identifier ?: ident,
af635cf3 361 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
362 context->syslog_priority,
363 !!context->syslog_level_prefix,
aac8c0c3
LP
364 is_syslog_output(output),
365 is_kmsg_output(output),
2ac1ff68
EV
366 is_terminal_output(output)) < 0)
367 return -errno;
80876c20 368
2ac1ff68 369 return move_fd(TAKE_FD(fd), nfd, false);
80876c20 370}
2ac1ff68 371
3a274a21 372static int open_terminal_as(const char *path, int flags, int nfd) {
046a82c1 373 int fd;
071830ff 374
80876c20
LP
375 assert(path);
376 assert(nfd >= 0);
fd1f9c89 377
3a274a21 378 fd = open_terminal(path, flags | O_NOCTTY);
3cc2aff1 379 if (fd < 0)
80876c20 380 return fd;
071830ff 381
046a82c1 382 return move_fd(fd, nfd, false);
80876c20 383}
071830ff 384
2038c3f5 385static int acquire_path(const char *path, int flags, mode_t mode) {
86fca584
ZJS
386 union sockaddr_union sa;
387 socklen_t sa_len;
15a3e96f 388 _cleanup_close_ int fd = -1;
86fca584 389 int r;
071830ff 390
80876c20 391 assert(path);
071830ff 392
2038c3f5
LP
393 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
394 flags |= O_CREAT;
395
396 fd = open(path, flags|O_NOCTTY, mode);
397 if (fd >= 0)
15a3e96f 398 return TAKE_FD(fd);
071830ff 399
2038c3f5
LP
400 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
401 return -errno;
2038c3f5
LP
402
403 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
404
86fca584
ZJS
405 r = sockaddr_un_set_path(&sa.un, path);
406 if (r < 0)
407 return r == -EINVAL ? -ENXIO : r;
408 sa_len = r;
409
2038c3f5
LP
410 fd = socket(AF_UNIX, SOCK_STREAM, 0);
411 if (fd < 0)
412 return -errno;
413
86fca584 414 if (connect(fd, &sa.sa, sa_len) < 0)
2038c3f5
LP
415 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
416 * indication that his wasn't an AF_UNIX socket after all */
071830ff 417
2038c3f5
LP
418 if ((flags & O_ACCMODE) == O_RDONLY)
419 r = shutdown(fd, SHUT_WR);
420 else if ((flags & O_ACCMODE) == O_WRONLY)
421 r = shutdown(fd, SHUT_RD);
422 else
86fca584 423 r = 0;
15a3e96f 424 if (r < 0)
2038c3f5 425 return -errno;
2038c3f5 426
15a3e96f 427 return TAKE_FD(fd);
80876c20 428}
071830ff 429
08f3be7a
LP
430static int fixup_input(
431 const ExecContext *context,
432 int socket_fd,
433 bool apply_tty_stdin) {
434
435 ExecInput std_input;
436
437 assert(context);
438
439 std_input = context->std_input;
1e3ad081
LP
440
441 if (is_terminal_input(std_input) && !apply_tty_stdin)
442 return EXEC_INPUT_NULL;
071830ff 443
03fd9c49 444 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
445 return EXEC_INPUT_NULL;
446
08f3be7a
LP
447 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
448 return EXEC_INPUT_NULL;
449
03fd9c49 450 return std_input;
4f2d528d
LP
451}
452
03fd9c49 453static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 454
03fd9c49 455 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
456 return EXEC_OUTPUT_INHERIT;
457
03fd9c49 458 return std_output;
4f2d528d
LP
459}
460
a34ceba6
LP
461static int setup_input(
462 const ExecContext *context,
463 const ExecParameters *params,
52c239d7 464 int socket_fd,
2caa38e9 465 const int named_iofds[static 3]) {
a34ceba6 466
4f2d528d
LP
467 ExecInput i;
468
469 assert(context);
a34ceba6 470 assert(params);
2caa38e9 471 assert(named_iofds);
a34ceba6
LP
472
473 if (params->stdin_fd >= 0) {
474 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
475 return -errno;
476
477 /* Try to make this the controlling tty, if it is a tty, and reset it */
1fb0682e
LP
478 if (isatty(STDIN_FILENO)) {
479 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
480 (void) reset_terminal_fd(STDIN_FILENO, true);
481 }
a34ceba6
LP
482
483 return STDIN_FILENO;
484 }
4f2d528d 485
08f3be7a 486 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
487
488 switch (i) {
071830ff 489
80876c20
LP
490 case EXEC_INPUT_NULL:
491 return open_null_as(O_RDONLY, STDIN_FILENO);
492
493 case EXEC_INPUT_TTY:
494 case EXEC_INPUT_TTY_FORCE:
495 case EXEC_INPUT_TTY_FAIL: {
046a82c1 496 int fd;
071830ff 497
1e22b5cd 498 fd = acquire_terminal(exec_context_tty_path(context),
8854d795
LP
499 i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
500 i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
501 ACQUIRE_TERMINAL_WAIT,
3a43da28 502 USEC_INFINITY);
970edce6 503 if (fd < 0)
80876c20
LP
504 return fd;
505
046a82c1 506 return move_fd(fd, STDIN_FILENO, false);
80876c20
LP
507 }
508
4f2d528d 509 case EXEC_INPUT_SOCKET:
e75a9ed1
LP
510 assert(socket_fd >= 0);
511
4f2d528d
LP
512 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
513
52c239d7 514 case EXEC_INPUT_NAMED_FD:
e75a9ed1
LP
515 assert(named_iofds[STDIN_FILENO] >= 0);
516
52c239d7
LB
517 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
518 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
519
08f3be7a
LP
520 case EXEC_INPUT_DATA: {
521 int fd;
522
523 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
524 if (fd < 0)
525 return fd;
526
527 return move_fd(fd, STDIN_FILENO, false);
528 }
529
2038c3f5
LP
530 case EXEC_INPUT_FILE: {
531 bool rw;
532 int fd;
533
534 assert(context->stdio_file[STDIN_FILENO]);
535
536 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
537 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
538
539 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
540 if (fd < 0)
541 return fd;
542
543 return move_fd(fd, STDIN_FILENO, false);
544 }
545
80876c20
LP
546 default:
547 assert_not_reached("Unknown input type");
548 }
549}
550
41fc585a
LP
551static bool can_inherit_stderr_from_stdout(
552 const ExecContext *context,
553 ExecOutput o,
554 ExecOutput e) {
555
556 assert(context);
557
558 /* Returns true, if given the specified STDERR and STDOUT output we can directly dup() the stdout fd to the
559 * stderr fd */
560
561 if (e == EXEC_OUTPUT_INHERIT)
562 return true;
563 if (e != o)
564 return false;
565
566 if (e == EXEC_OUTPUT_NAMED_FD)
567 return streq_ptr(context->stdio_fdname[STDOUT_FILENO], context->stdio_fdname[STDERR_FILENO]);
568
569 if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND))
570 return streq_ptr(context->stdio_file[STDOUT_FILENO], context->stdio_file[STDERR_FILENO]);
571
572 return true;
573}
574
a34ceba6 575static int setup_output(
34cf6c43 576 const Unit *unit,
a34ceba6
LP
577 const ExecContext *context,
578 const ExecParameters *params,
579 int fileno,
580 int socket_fd,
2caa38e9 581 const int named_iofds[static 3],
a34ceba6 582 const char *ident,
7bce046b
LP
583 uid_t uid,
584 gid_t gid,
585 dev_t *journal_stream_dev,
586 ino_t *journal_stream_ino) {
a34ceba6 587
4f2d528d
LP
588 ExecOutput o;
589 ExecInput i;
47c1d80d 590 int r;
4f2d528d 591
f2341e0a 592 assert(unit);
80876c20 593 assert(context);
a34ceba6 594 assert(params);
80876c20 595 assert(ident);
7bce046b
LP
596 assert(journal_stream_dev);
597 assert(journal_stream_ino);
80876c20 598
a34ceba6
LP
599 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
600
601 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
602 return -errno;
603
604 return STDOUT_FILENO;
605 }
606
607 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
608 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
609 return -errno;
610
611 return STDERR_FILENO;
612 }
613
08f3be7a 614 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 615 o = fixup_output(context->std_output, socket_fd);
4f2d528d 616
eb17e935
MS
617 if (fileno == STDERR_FILENO) {
618 ExecOutput e;
619 e = fixup_output(context->std_error, socket_fd);
80876c20 620
eb17e935
MS
621 /* This expects the input and output are already set up */
622
623 /* Don't change the stderr file descriptor if we inherit all
624 * the way and are not on a tty */
625 if (e == EXEC_OUTPUT_INHERIT &&
626 o == EXEC_OUTPUT_INHERIT &&
627 i == EXEC_INPUT_NULL &&
628 !is_terminal_input(context->std_input) &&
629 getppid () != 1)
630 return fileno;
631
632 /* Duplicate from stdout if possible */
41fc585a 633 if (can_inherit_stderr_from_stdout(context, o, e))
eb17e935 634 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 635
eb17e935 636 o = e;
80876c20 637
eb17e935 638 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
639 /* If input got downgraded, inherit the original value */
640 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 641 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 642
08f3be7a
LP
643 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
644 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
eb17e935 645 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 646
acb591e4
LP
647 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
648 if (getppid() != 1)
eb17e935 649 return fileno;
94f04347 650
eb17e935
MS
651 /* We need to open /dev/null here anew, to get the right access mode. */
652 return open_null_as(O_WRONLY, fileno);
071830ff 653 }
94f04347 654
eb17e935 655 switch (o) {
80876c20
LP
656
657 case EXEC_OUTPUT_NULL:
eb17e935 658 return open_null_as(O_WRONLY, fileno);
80876c20
LP
659
660 case EXEC_OUTPUT_TTY:
4f2d528d 661 if (is_terminal_input(i))
eb17e935 662 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
663
664 /* We don't reset the terminal if this is just about output */
1e22b5cd 665 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
666
667 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 668 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 669 case EXEC_OUTPUT_KMSG:
28dbc1e8 670 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
671 case EXEC_OUTPUT_JOURNAL:
672 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 673 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 674 if (r < 0) {
82677ae4 675 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 676 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
677 } else {
678 struct stat st;
679
680 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
681 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
682 * services to detect whether they are connected to the journal or not.
683 *
684 * If both stdout and stderr are connected to a stream then let's make sure to store the data
685 * about STDERR as that's usually the best way to do logging. */
7bce046b 686
ab2116b1
LP
687 if (fstat(fileno, &st) >= 0 &&
688 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
689 *journal_stream_dev = st.st_dev;
690 *journal_stream_ino = st.st_ino;
691 }
47c1d80d
MS
692 }
693 return r;
4f2d528d
LP
694
695 case EXEC_OUTPUT_SOCKET:
696 assert(socket_fd >= 0);
e75a9ed1 697
eb17e935 698 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 699
52c239d7 700 case EXEC_OUTPUT_NAMED_FD:
e75a9ed1
LP
701 assert(named_iofds[fileno] >= 0);
702
52c239d7
LB
703 (void) fd_nonblock(named_iofds[fileno], false);
704 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
705
566b7d23
ZD
706 case EXEC_OUTPUT_FILE:
707 case EXEC_OUTPUT_FILE_APPEND: {
2038c3f5 708 bool rw;
566b7d23 709 int fd, flags;
2038c3f5
LP
710
711 assert(context->stdio_file[fileno]);
712
713 rw = context->std_input == EXEC_INPUT_FILE &&
714 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
715
716 if (rw)
717 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
718
566b7d23
ZD
719 flags = O_WRONLY;
720 if (o == EXEC_OUTPUT_FILE_APPEND)
721 flags |= O_APPEND;
722
723 fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
2038c3f5
LP
724 if (fd < 0)
725 return fd;
726
566b7d23 727 return move_fd(fd, fileno, 0);
2038c3f5
LP
728 }
729
94f04347 730 default:
80876c20 731 assert_not_reached("Unknown error type");
94f04347 732 }
071830ff
LP
733}
734
02a51aba 735static int chown_terminal(int fd, uid_t uid) {
4b3b5bc7 736 int r;
02a51aba
LP
737
738 assert(fd >= 0);
02a51aba 739
1ff74fb6 740 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
4b3b5bc7
LP
741 if (isatty(fd) < 1) {
742 if (IN_SET(errno, EINVAL, ENOTTY))
743 return 0; /* not a tty */
1ff74fb6 744
02a51aba 745 return -errno;
4b3b5bc7 746 }
02a51aba 747
4b3b5bc7
LP
748 /* This might fail. What matters are the results. */
749 r = fchmod_and_chown(fd, TTY_MODE, uid, -1);
750 if (r < 0)
751 return r;
02a51aba 752
4b3b5bc7 753 return 1;
02a51aba
LP
754}
755
7d5ceb64 756static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
757 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
758 int r;
80876c20 759
80876c20
LP
760 assert(_saved_stdin);
761 assert(_saved_stdout);
762
af6da548
LP
763 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
764 if (saved_stdin < 0)
765 return -errno;
80876c20 766
af6da548 767 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
768 if (saved_stdout < 0)
769 return -errno;
80876c20 770
8854d795 771 fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
3d18b167
LP
772 if (fd < 0)
773 return fd;
80876c20 774
af6da548
LP
775 r = chown_terminal(fd, getuid());
776 if (r < 0)
3d18b167 777 return r;
02a51aba 778
3d18b167
LP
779 r = reset_terminal_fd(fd, true);
780 if (r < 0)
781 return r;
80876c20 782
2b33ab09 783 r = rearrange_stdio(fd, fd, STDERR_FILENO);
3d18b167 784 fd = -1;
2b33ab09
LP
785 if (r < 0)
786 return r;
80876c20
LP
787
788 *_saved_stdin = saved_stdin;
789 *_saved_stdout = saved_stdout;
790
3d18b167 791 saved_stdin = saved_stdout = -1;
80876c20 792
3d18b167 793 return 0;
80876c20
LP
794}
795
63d77c92 796static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
797 assert(err < 0);
798
799 if (err == -ETIMEDOUT)
63d77c92 800 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
801 else {
802 errno = -err;
63d77c92 803 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
804 }
805}
806
63d77c92 807static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 808 _cleanup_close_ int fd = -1;
80876c20 809
3b20f877 810 assert(vc);
80876c20 811
7d5ceb64 812 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 813 if (fd < 0)
3b20f877 814 return;
80876c20 815
63d77c92 816 write_confirm_error_fd(err, fd, u);
af6da548 817}
80876c20 818
3d18b167 819static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 820 int r = 0;
80876c20 821
af6da548
LP
822 assert(saved_stdin);
823 assert(saved_stdout);
824
825 release_terminal();
826
827 if (*saved_stdin >= 0)
80876c20 828 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 829 r = -errno;
80876c20 830
af6da548 831 if (*saved_stdout >= 0)
80876c20 832 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 833 r = -errno;
80876c20 834
3d18b167
LP
835 *saved_stdin = safe_close(*saved_stdin);
836 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
837
838 return r;
839}
840
3b20f877
FB
841enum {
842 CONFIRM_PRETEND_FAILURE = -1,
843 CONFIRM_PRETEND_SUCCESS = 0,
844 CONFIRM_EXECUTE = 1,
845};
846
eedf223a 847static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 848 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 849 _cleanup_free_ char *e = NULL;
3b20f877 850 char c;
af6da548 851
3b20f877 852 /* For any internal errors, assume a positive response. */
7d5ceb64 853 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 854 if (r < 0) {
63d77c92 855 write_confirm_error(r, vc, u);
3b20f877
FB
856 return CONFIRM_EXECUTE;
857 }
af6da548 858
b0eb2944
FB
859 /* confirm_spawn might have been disabled while we were sleeping. */
860 if (manager_is_confirm_spawn_disabled(u->manager)) {
861 r = 1;
862 goto restore_stdio;
863 }
af6da548 864
2bcd3c26
FB
865 e = ellipsize(cmdline, 60, 100);
866 if (!e) {
867 log_oom();
868 r = CONFIRM_EXECUTE;
869 goto restore_stdio;
870 }
af6da548 871
d172b175 872 for (;;) {
539622bd 873 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 874 if (r < 0) {
63d77c92 875 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
876 r = CONFIRM_EXECUTE;
877 goto restore_stdio;
878 }
af6da548 879
d172b175 880 switch (c) {
b0eb2944
FB
881 case 'c':
882 printf("Resuming normal execution.\n");
883 manager_disable_confirm_spawn();
884 r = 1;
885 break;
dd6f9ac0
FB
886 case 'D':
887 unit_dump(u, stdout, " ");
888 continue; /* ask again */
d172b175
FB
889 case 'f':
890 printf("Failing execution.\n");
891 r = CONFIRM_PRETEND_FAILURE;
892 break;
893 case 'h':
b0eb2944
FB
894 printf(" c - continue, proceed without asking anymore\n"
895 " D - dump, show the state of the unit\n"
dd6f9ac0 896 " f - fail, don't execute the command and pretend it failed\n"
d172b175 897 " h - help\n"
eedf223a 898 " i - info, show a short summary of the unit\n"
56fde33a 899 " j - jobs, show jobs that are in progress\n"
d172b175
FB
900 " s - skip, don't execute the command and pretend it succeeded\n"
901 " y - yes, execute the command\n");
dd6f9ac0 902 continue; /* ask again */
eedf223a
FB
903 case 'i':
904 printf(" Description: %s\n"
905 " Unit: %s\n"
906 " Command: %s\n",
907 u->id, u->description, cmdline);
908 continue; /* ask again */
56fde33a
FB
909 case 'j':
910 manager_dump_jobs(u->manager, stdout, " ");
911 continue; /* ask again */
539622bd
FB
912 case 'n':
913 /* 'n' was removed in favor of 'f'. */
914 printf("Didn't understand 'n', did you mean 'f'?\n");
915 continue; /* ask again */
d172b175
FB
916 case 's':
917 printf("Skipping execution.\n");
918 r = CONFIRM_PRETEND_SUCCESS;
919 break;
920 case 'y':
921 r = CONFIRM_EXECUTE;
922 break;
923 default:
924 assert_not_reached("Unhandled choice");
925 }
3b20f877 926 break;
3b20f877 927 }
af6da548 928
3b20f877 929restore_stdio:
af6da548 930 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 931 return r;
80876c20
LP
932}
933
4d885bd3
DH
934static int get_fixed_user(const ExecContext *c, const char **user,
935 uid_t *uid, gid_t *gid,
936 const char **home, const char **shell) {
81a2b7ce 937 int r;
4d885bd3 938 const char *name;
81a2b7ce 939
4d885bd3 940 assert(c);
81a2b7ce 941
23deef88
LP
942 if (!c->user)
943 return 0;
944
4d885bd3
DH
945 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
946 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 947
23deef88 948 name = c->user;
fafff8f1 949 r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN);
4d885bd3
DH
950 if (r < 0)
951 return r;
81a2b7ce 952
4d885bd3
DH
953 *user = name;
954 return 0;
955}
956
957static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
958 int r;
959 const char *name;
960
961 assert(c);
962
963 if (!c->group)
964 return 0;
965
966 name = c->group;
fafff8f1 967 r = get_group_creds(&name, gid, 0);
4d885bd3
DH
968 if (r < 0)
969 return r;
970
971 *group = name;
972 return 0;
973}
974
cdc5d5c5
DH
975static int get_supplementary_groups(const ExecContext *c, const char *user,
976 const char *group, gid_t gid,
977 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
978 char **i;
979 int r, k = 0;
980 int ngroups_max;
981 bool keep_groups = false;
982 gid_t *groups = NULL;
983 _cleanup_free_ gid_t *l_gids = NULL;
984
985 assert(c);
986
bbeea271
DH
987 /*
988 * If user is given, then lookup GID and supplementary groups list.
989 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
990 * here and as early as possible so we keep the list of supplementary
991 * groups of the caller.
bbeea271
DH
992 */
993 if (user && gid_is_valid(gid) && gid != 0) {
994 /* First step, initialize groups from /etc/groups */
995 if (initgroups(user, gid) < 0)
996 return -errno;
997
998 keep_groups = true;
999 }
1000
ac6e8be6 1001 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
1002 return 0;
1003
366ddd25
DH
1004 /*
1005 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
1006 * be positive, otherwise fail.
1007 */
1008 errno = 0;
1009 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
66855de7
LP
1010 if (ngroups_max <= 0)
1011 return errno_or_else(EOPNOTSUPP);
366ddd25 1012
4d885bd3
DH
1013 l_gids = new(gid_t, ngroups_max);
1014 if (!l_gids)
1015 return -ENOMEM;
81a2b7ce 1016
4d885bd3
DH
1017 if (keep_groups) {
1018 /*
1019 * Lookup the list of groups that the user belongs to, we
1020 * avoid NSS lookups here too for gid=0.
1021 */
1022 k = ngroups_max;
1023 if (getgrouplist(user, gid, l_gids, &k) < 0)
1024 return -EINVAL;
1025 } else
1026 k = 0;
81a2b7ce 1027
4d885bd3
DH
1028 STRV_FOREACH(i, c->supplementary_groups) {
1029 const char *g;
81a2b7ce 1030
4d885bd3
DH
1031 if (k >= ngroups_max)
1032 return -E2BIG;
81a2b7ce 1033
4d885bd3 1034 g = *i;
fafff8f1 1035 r = get_group_creds(&g, l_gids+k, 0);
4d885bd3
DH
1036 if (r < 0)
1037 return r;
81a2b7ce 1038
4d885bd3
DH
1039 k++;
1040 }
81a2b7ce 1041
4d885bd3
DH
1042 /*
1043 * Sets ngids to zero to drop all supplementary groups, happens
1044 * when we are under root and SupplementaryGroups= is empty.
1045 */
1046 if (k == 0) {
1047 *ngids = 0;
1048 return 0;
1049 }
81a2b7ce 1050
4d885bd3
DH
1051 /* Otherwise get the final list of supplementary groups */
1052 groups = memdup(l_gids, sizeof(gid_t) * k);
1053 if (!groups)
1054 return -ENOMEM;
1055
1056 *supplementary_gids = groups;
1057 *ngids = k;
1058
1059 groups = NULL;
1060
1061 return 0;
1062}
1063
34cf6c43 1064static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
4d885bd3
DH
1065 int r;
1066
709dbeac
YW
1067 /* Handle SupplementaryGroups= if it is not empty */
1068 if (ngids > 0) {
4d885bd3
DH
1069 r = maybe_setgroups(ngids, supplementary_gids);
1070 if (r < 0)
97f0e76f 1071 return r;
4d885bd3 1072 }
81a2b7ce 1073
4d885bd3
DH
1074 if (gid_is_valid(gid)) {
1075 /* Then set our gids */
1076 if (setresgid(gid, gid, gid) < 0)
1077 return -errno;
81a2b7ce
LP
1078 }
1079
1080 return 0;
1081}
1082
1083static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
1084 assert(context);
1085
4d885bd3
DH
1086 if (!uid_is_valid(uid))
1087 return 0;
1088
479050b3 1089 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
1090 * capabilities while doing so. */
1091
479050b3 1092 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
1093
1094 /* First step: If we need to keep capabilities but
1095 * drop privileges we need to make sure we keep our
cbb21cca 1096 * caps, while we drop privileges. */
693ced48 1097 if (uid != 0) {
cbb21cca 1098 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1099
1100 if (prctl(PR_GET_SECUREBITS) != sb)
1101 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1102 return -errno;
1103 }
81a2b7ce
LP
1104 }
1105
479050b3 1106 /* Second step: actually set the uids */
81a2b7ce
LP
1107 if (setresuid(uid, uid, uid) < 0)
1108 return -errno;
1109
1110 /* At this point we should have all necessary capabilities but
1111 are otherwise a normal user. However, the caps might got
1112 corrupted due to the setresuid() so we need clean them up
1113 later. This is done outside of this call. */
1114
1115 return 0;
1116}
1117
349cc4a5 1118#if HAVE_PAM
5b6319dc
LP
1119
1120static int null_conv(
1121 int num_msg,
1122 const struct pam_message **msg,
1123 struct pam_response **resp,
1124 void *appdata_ptr) {
1125
1126 /* We don't support conversations */
1127
1128 return PAM_CONV_ERR;
1129}
1130
cefc33ae
LP
1131#endif
1132
5b6319dc
LP
1133static int setup_pam(
1134 const char *name,
1135 const char *user,
940c5210 1136 uid_t uid,
2d6fce8d 1137 gid_t gid,
5b6319dc 1138 const char *tty,
2065ca69 1139 char ***env,
5b8d1f6b 1140 const int fds[], size_t n_fds) {
5b6319dc 1141
349cc4a5 1142#if HAVE_PAM
cefc33ae 1143
5b6319dc
LP
1144 static const struct pam_conv conv = {
1145 .conv = null_conv,
1146 .appdata_ptr = NULL
1147 };
1148
2d7c6aa2 1149 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1150 pam_handle_t *handle = NULL;
d6e5f3ad 1151 sigset_t old_ss;
7bb70b6e 1152 int pam_code = PAM_SUCCESS, r;
84eada2f 1153 char **nv, **e = NULL;
5b6319dc
LP
1154 bool close_session = false;
1155 pid_t pam_pid = 0, parent_pid;
970edce6 1156 int flags = 0;
5b6319dc
LP
1157
1158 assert(name);
1159 assert(user);
2065ca69 1160 assert(env);
5b6319dc
LP
1161
1162 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1163 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1164 * systemd via the cgroup logic. It will then remove the PAM
1165 * session again. The parent process will exec() the actual
1166 * daemon. We do things this way to ensure that the main PID
1167 * of the daemon is the one we initially fork()ed. */
1168
7bb70b6e
LP
1169 r = barrier_create(&barrier);
1170 if (r < 0)
2d7c6aa2
DH
1171 goto fail;
1172
553d2243 1173 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1174 flags |= PAM_SILENT;
1175
f546241b
ZJS
1176 pam_code = pam_start(name, user, &conv, &handle);
1177 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1178 handle = NULL;
1179 goto fail;
1180 }
1181
3cd24c1a
LP
1182 if (!tty) {
1183 _cleanup_free_ char *q = NULL;
1184
1185 /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
1186 * out if that's the case, and read the TTY off it. */
1187
1188 if (getttyname_malloc(STDIN_FILENO, &q) >= 0)
1189 tty = strjoina("/dev/", q);
1190 }
1191
f546241b
ZJS
1192 if (tty) {
1193 pam_code = pam_set_item(handle, PAM_TTY, tty);
1194 if (pam_code != PAM_SUCCESS)
5b6319dc 1195 goto fail;
f546241b 1196 }
5b6319dc 1197
84eada2f
JW
1198 STRV_FOREACH(nv, *env) {
1199 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1200 if (pam_code != PAM_SUCCESS)
1201 goto fail;
1202 }
1203
970edce6 1204 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1205 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1206 goto fail;
1207
3bb39ea9
DG
1208 pam_code = pam_setcred(handle, PAM_ESTABLISH_CRED | flags);
1209 if (pam_code != PAM_SUCCESS)
46d7c6af 1210 log_debug("pam_setcred() failed, ignoring: %s", pam_strerror(handle, pam_code));
3bb39ea9 1211
970edce6 1212 pam_code = pam_open_session(handle, flags);
f546241b 1213 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1214 goto fail;
1215
1216 close_session = true;
1217
f546241b
ZJS
1218 e = pam_getenvlist(handle);
1219 if (!e) {
5b6319dc
LP
1220 pam_code = PAM_BUF_ERR;
1221 goto fail;
1222 }
1223
1224 /* Block SIGTERM, so that we know that it won't get lost in
1225 * the child */
ce30c8dc 1226
72c0a2c2 1227 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1228
df0ff127 1229 parent_pid = getpid_cached();
5b6319dc 1230
4c253ed1
LP
1231 r = safe_fork("(sd-pam)", 0, &pam_pid);
1232 if (r < 0)
5b6319dc 1233 goto fail;
4c253ed1 1234 if (r == 0) {
7bb70b6e 1235 int sig, ret = EXIT_PAM;
5b6319dc
LP
1236
1237 /* The child's job is to reset the PAM session on
1238 * termination */
2d7c6aa2 1239 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc 1240
4c253ed1
LP
1241 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1242 * are open here that have been opened by PAM. */
1243 (void) close_many(fds, n_fds);
5b6319dc 1244
940c5210
AK
1245 /* Drop privileges - we don't need any to pam_close_session
1246 * and this will make PR_SET_PDEATHSIG work in most cases.
1247 * If this fails, ignore the error - but expect sd-pam threads
1248 * to fail to exit normally */
2d6fce8d 1249
97f0e76f
LP
1250 r = maybe_setgroups(0, NULL);
1251 if (r < 0)
1252 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1253 if (setresgid(gid, gid, gid) < 0)
1254 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1255 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1256 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1257
ce30c8dc
LP
1258 (void) ignore_signals(SIGPIPE, -1);
1259
940c5210
AK
1260 /* Wait until our parent died. This will only work if
1261 * the above setresuid() succeeds, otherwise the kernel
1262 * will not allow unprivileged parents kill their privileged
1263 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1264 * to do the rest for us. */
1265 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1266 goto child_finish;
1267
2d7c6aa2
DH
1268 /* Tell the parent that our setup is done. This is especially
1269 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1270 * setup might race against our setresuid(2) call.
1271 *
1272 * If the parent aborted, we'll detect this below, hence ignore
1273 * return failure here. */
1274 (void) barrier_place(&barrier);
2d7c6aa2 1275
643f4706 1276 /* Check if our parent process might already have died? */
5b6319dc 1277 if (getppid() == parent_pid) {
d6e5f3ad
DM
1278 sigset_t ss;
1279
1280 assert_se(sigemptyset(&ss) >= 0);
1281 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1282
3dead8d9
LP
1283 for (;;) {
1284 if (sigwait(&ss, &sig) < 0) {
1285 if (errno == EINTR)
1286 continue;
1287
1288 goto child_finish;
1289 }
5b6319dc 1290
3dead8d9
LP
1291 assert(sig == SIGTERM);
1292 break;
1293 }
5b6319dc
LP
1294 }
1295
3bb39ea9
DG
1296 pam_code = pam_setcred(handle, PAM_DELETE_CRED | flags);
1297 if (pam_code != PAM_SUCCESS)
1298 goto child_finish;
1299
3dead8d9 1300 /* If our parent died we'll end the session */
f546241b 1301 if (getppid() != parent_pid) {
970edce6 1302 pam_code = pam_close_session(handle, flags);
f546241b 1303 if (pam_code != PAM_SUCCESS)
5b6319dc 1304 goto child_finish;
f546241b 1305 }
5b6319dc 1306
7bb70b6e 1307 ret = 0;
5b6319dc
LP
1308
1309 child_finish:
970edce6 1310 pam_end(handle, pam_code | flags);
7bb70b6e 1311 _exit(ret);
5b6319dc
LP
1312 }
1313
2d7c6aa2
DH
1314 barrier_set_role(&barrier, BARRIER_PARENT);
1315
5b6319dc
LP
1316 /* If the child was forked off successfully it will do all the
1317 * cleanups, so forget about the handle here. */
1318 handle = NULL;
1319
3b8bddde 1320 /* Unblock SIGTERM again in the parent */
72c0a2c2 1321 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1322
1323 /* We close the log explicitly here, since the PAM modules
1324 * might have opened it, but we don't want this fd around. */
1325 closelog();
1326
2d7c6aa2
DH
1327 /* Synchronously wait for the child to initialize. We don't care for
1328 * errors as we cannot recover. However, warn loudly if it happens. */
1329 if (!barrier_place_and_sync(&barrier))
1330 log_error("PAM initialization failed");
1331
130d3d22 1332 return strv_free_and_replace(*env, e);
5b6319dc
LP
1333
1334fail:
970edce6
ZJS
1335 if (pam_code != PAM_SUCCESS) {
1336 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1337 r = -EPERM; /* PAM errors do not map to errno */
1338 } else
1339 log_error_errno(r, "PAM failed: %m");
9ba35398 1340
5b6319dc
LP
1341 if (handle) {
1342 if (close_session)
970edce6 1343 pam_code = pam_close_session(handle, flags);
5b6319dc 1344
970edce6 1345 pam_end(handle, pam_code | flags);
5b6319dc
LP
1346 }
1347
1348 strv_free(e);
5b6319dc
LP
1349 closelog();
1350
7bb70b6e 1351 return r;
cefc33ae
LP
1352#else
1353 return 0;
5b6319dc 1354#endif
cefc33ae 1355}
5b6319dc 1356
5d6b1584
LP
1357static void rename_process_from_path(const char *path) {
1358 char process_name[11];
1359 const char *p;
1360 size_t l;
1361
1362 /* This resulting string must fit in 10 chars (i.e. the length
1363 * of "/sbin/init") to look pretty in /bin/ps */
1364
2b6bf07d 1365 p = basename(path);
5d6b1584
LP
1366 if (isempty(p)) {
1367 rename_process("(...)");
1368 return;
1369 }
1370
1371 l = strlen(p);
1372 if (l > 8) {
1373 /* The end of the process name is usually more
1374 * interesting, since the first bit might just be
1375 * "systemd-" */
1376 p = p + l - 8;
1377 l = 8;
1378 }
1379
1380 process_name[0] = '(';
1381 memcpy(process_name+1, p, l);
1382 process_name[1+l] = ')';
1383 process_name[1+l+1] = 0;
1384
1385 rename_process(process_name);
1386}
1387
469830d1
LP
1388static bool context_has_address_families(const ExecContext *c) {
1389 assert(c);
1390
1391 return c->address_families_whitelist ||
1392 !set_isempty(c->address_families);
1393}
1394
1395static bool context_has_syscall_filters(const ExecContext *c) {
1396 assert(c);
1397
1398 return c->syscall_whitelist ||
8cfa775f 1399 !hashmap_isempty(c->syscall_filter);
469830d1
LP
1400}
1401
1402static bool context_has_no_new_privileges(const ExecContext *c) {
1403 assert(c);
1404
1405 if (c->no_new_privileges)
1406 return true;
1407
1408 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1409 return false;
1410
1411 /* We need NNP if we have any form of seccomp and are unprivileged */
1412 return context_has_address_families(c) ||
1413 c->memory_deny_write_execute ||
1414 c->restrict_realtime ||
f69567cb 1415 c->restrict_suid_sgid ||
469830d1 1416 exec_context_restrict_namespaces_set(c) ||
fc64760d 1417 c->protect_clock ||
469830d1
LP
1418 c->protect_kernel_tunables ||
1419 c->protect_kernel_modules ||
84703040 1420 c->protect_kernel_logs ||
469830d1
LP
1421 c->private_devices ||
1422 context_has_syscall_filters(c) ||
78e864e5 1423 !set_isempty(c->syscall_archs) ||
aecd5ac6
TM
1424 c->lock_personality ||
1425 c->protect_hostname;
469830d1
LP
1426}
1427
349cc4a5 1428#if HAVE_SECCOMP
17df7223 1429
83f12b27 1430static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1431
1432 if (is_seccomp_available())
1433 return false;
1434
f673b62d 1435 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1436 return true;
83f12b27
FS
1437}
1438
165a31c0 1439static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1440 uint32_t negative_action, default_action, action;
165a31c0 1441 int r;
8351ceae 1442
469830d1 1443 assert(u);
c0467cf3 1444 assert(c);
8351ceae 1445
469830d1 1446 if (!context_has_syscall_filters(c))
83f12b27
FS
1447 return 0;
1448
469830d1
LP
1449 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1450 return 0;
e9642be2 1451
ccc16c78 1452 negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1453
469830d1
LP
1454 if (c->syscall_whitelist) {
1455 default_action = negative_action;
1456 action = SCMP_ACT_ALLOW;
7c66bae2 1457 } else {
469830d1
LP
1458 default_action = SCMP_ACT_ALLOW;
1459 action = negative_action;
57183d11 1460 }
8351ceae 1461
165a31c0
LP
1462 if (needs_ambient_hack) {
1463 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1464 if (r < 0)
1465 return r;
1466 }
1467
b54f36c6 1468 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
4298d0b5
LP
1469}
1470
469830d1
LP
1471static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1472 assert(u);
4298d0b5
LP
1473 assert(c);
1474
469830d1 1475 if (set_isempty(c->syscall_archs))
83f12b27
FS
1476 return 0;
1477
469830d1
LP
1478 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1479 return 0;
4298d0b5 1480
469830d1
LP
1481 return seccomp_restrict_archs(c->syscall_archs);
1482}
4298d0b5 1483
469830d1
LP
1484static int apply_address_families(const Unit* u, const ExecContext *c) {
1485 assert(u);
1486 assert(c);
4298d0b5 1487
469830d1
LP
1488 if (!context_has_address_families(c))
1489 return 0;
4298d0b5 1490
469830d1
LP
1491 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1492 return 0;
4298d0b5 1493
469830d1 1494 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1495}
4298d0b5 1496
83f12b27 1497static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1498 assert(u);
f3e43635
TM
1499 assert(c);
1500
469830d1 1501 if (!c->memory_deny_write_execute)
83f12b27
FS
1502 return 0;
1503
469830d1
LP
1504 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1505 return 0;
f3e43635 1506
469830d1 1507 return seccomp_memory_deny_write_execute();
f3e43635
TM
1508}
1509
83f12b27 1510static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1511 assert(u);
f4170c67
LP
1512 assert(c);
1513
469830d1 1514 if (!c->restrict_realtime)
83f12b27
FS
1515 return 0;
1516
469830d1
LP
1517 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1518 return 0;
f4170c67 1519
469830d1 1520 return seccomp_restrict_realtime();
f4170c67
LP
1521}
1522
f69567cb
LP
1523static int apply_restrict_suid_sgid(const Unit* u, const ExecContext *c) {
1524 assert(u);
1525 assert(c);
1526
1527 if (!c->restrict_suid_sgid)
1528 return 0;
1529
1530 if (skip_seccomp_unavailable(u, "RestrictSUIDSGID="))
1531 return 0;
1532
1533 return seccomp_restrict_suid_sgid();
1534}
1535
59e856c7 1536static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1537 assert(u);
59eeb84b
LP
1538 assert(c);
1539
1540 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1541 * let's protect even those systems where this is left on in the kernel. */
1542
469830d1 1543 if (!c->protect_kernel_tunables)
59eeb84b
LP
1544 return 0;
1545
469830d1
LP
1546 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1547 return 0;
59eeb84b 1548
469830d1 1549 return seccomp_protect_sysctl();
59eeb84b
LP
1550}
1551
59e856c7 1552static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1553 assert(u);
502d704e
DH
1554 assert(c);
1555
25a8d8a0 1556 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1557
469830d1
LP
1558 if (!c->protect_kernel_modules)
1559 return 0;
1560
502d704e
DH
1561 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1562 return 0;
1563
b54f36c6 1564 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false);
502d704e
DH
1565}
1566
84703040
KK
1567static int apply_protect_kernel_logs(const Unit *u, const ExecContext *c) {
1568 assert(u);
1569 assert(c);
1570
1571 if (!c->protect_kernel_logs)
1572 return 0;
1573
1574 if (skip_seccomp_unavailable(u, "ProtectKernelLogs="))
1575 return 0;
1576
1577 return seccomp_protect_syslog();
1578}
1579
daf8f72b 1580static int apply_protect_clock(const Unit *u, const ExecContext *c) {
fc64760d
KK
1581 assert(u);
1582 assert(c);
1583
1584 if (!c->protect_clock)
1585 return 0;
1586
1587 if (skip_seccomp_unavailable(u, "ProtectClock="))
1588 return 0;
1589
1590 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_CLOCK, SCMP_ACT_ERRNO(EPERM), false);
1591}
1592
59e856c7 1593static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1594 assert(u);
ba128bb8
LP
1595 assert(c);
1596
8f81a5f6 1597 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1598
469830d1
LP
1599 if (!c->private_devices)
1600 return 0;
1601
ba128bb8
LP
1602 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1603 return 0;
1604
b54f36c6 1605 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false);
ba128bb8
LP
1606}
1607
34cf6c43 1608static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
469830d1 1609 assert(u);
add00535
LP
1610 assert(c);
1611
1612 if (!exec_context_restrict_namespaces_set(c))
1613 return 0;
1614
1615 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1616 return 0;
1617
1618 return seccomp_restrict_namespaces(c->restrict_namespaces);
1619}
1620
78e864e5 1621static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1622 unsigned long personality;
1623 int r;
78e864e5
TM
1624
1625 assert(u);
1626 assert(c);
1627
1628 if (!c->lock_personality)
1629 return 0;
1630
1631 if (skip_seccomp_unavailable(u, "LockPersonality="))
1632 return 0;
1633
e8132d63
LP
1634 personality = c->personality;
1635
1636 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1637 if (personality == PERSONALITY_INVALID) {
1638
1639 r = opinionated_personality(&personality);
1640 if (r < 0)
1641 return r;
1642 }
78e864e5
TM
1643
1644 return seccomp_lock_personality(personality);
1645}
1646
c0467cf3 1647#endif
8351ceae 1648
daf8f72b
LP
1649static int apply_protect_hostname(const Unit *u, const ExecContext *c, int *ret_exit_status) {
1650 int r;
1651
1652 assert(u);
1653 assert(c);
1654
1655 if (!c->protect_hostname)
1656 return 0;
1657
1658 if (ns_type_supported(NAMESPACE_UTS)) {
1659 if (unshare(CLONE_NEWUTS) < 0) {
1660 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) {
1661 *ret_exit_status = EXIT_NAMESPACE;
1662 return log_unit_error_errno(u, errno, "Failed to set up UTS namespacing: %m");
1663 }
1664
1665 log_unit_warning(u, "ProtectHostname=yes is configured, but UTS namespace setup is prohibited (container manager?), ignoring namespace setup.");
1666 }
1667 } else
1668 log_unit_warning(u, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
1669
1670#if HAVE_SECCOMP
1671 if (skip_seccomp_unavailable(u, "ProtectHostname="))
1672 return 0;
1673
1674 r = seccomp_protect_hostname();
1675 if (r < 0) {
1676 *ret_exit_status = EXIT_SECCOMP;
1677 return log_unit_error_errno(u, r, "Failed to apply hostname restrictions: %m");
1678 }
1679#endif
1680
1681 return 0;
1682}
1683
3042bbeb 1684static void do_idle_pipe_dance(int idle_pipe[static 4]) {
31a7eb86
ZJS
1685 assert(idle_pipe);
1686
54eb2300
LP
1687 idle_pipe[1] = safe_close(idle_pipe[1]);
1688 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1689
1690 if (idle_pipe[0] >= 0) {
1691 int r;
1692
1693 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1694
1695 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1696 ssize_t n;
1697
31a7eb86 1698 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1699 n = write(idle_pipe[3], "x", 1);
1700 if (n > 0)
cd972d69 1701 /* Wait for systemd to react to the signal above. */
54756dce 1702 (void) fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1703 }
1704
54eb2300 1705 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1706
1707 }
1708
54eb2300 1709 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1710}
1711
fb2042dd
YW
1712static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
1713
7cae38c4 1714static int build_environment(
34cf6c43 1715 const Unit *u,
9fa95f85 1716 const ExecContext *c,
1e22b5cd 1717 const ExecParameters *p,
da6053d0 1718 size_t n_fds,
7cae38c4
LP
1719 const char *home,
1720 const char *username,
1721 const char *shell,
7bce046b
LP
1722 dev_t journal_stream_dev,
1723 ino_t journal_stream_ino,
7cae38c4
LP
1724 char ***ret) {
1725
1726 _cleanup_strv_free_ char **our_env = NULL;
fb2042dd 1727 ExecDirectoryType t;
da6053d0 1728 size_t n_env = 0;
7cae38c4
LP
1729 char *x;
1730
4b58153d 1731 assert(u);
7cae38c4 1732 assert(c);
7c1cb6f1 1733 assert(p);
7cae38c4
LP
1734 assert(ret);
1735
91dd5f7c 1736 our_env = new0(char*, 15 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4
LP
1737 if (!our_env)
1738 return -ENOMEM;
1739
1740 if (n_fds > 0) {
8dd4c05b
LP
1741 _cleanup_free_ char *joined = NULL;
1742
df0ff127 1743 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1744 return -ENOMEM;
1745 our_env[n_env++] = x;
1746
da6053d0 1747 if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
7cae38c4
LP
1748 return -ENOMEM;
1749 our_env[n_env++] = x;
8dd4c05b 1750
1e22b5cd 1751 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1752 if (!joined)
1753 return -ENOMEM;
1754
605405c6 1755 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1756 if (!x)
1757 return -ENOMEM;
1758 our_env[n_env++] = x;
7cae38c4
LP
1759 }
1760
b08af3b1 1761 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1762 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1763 return -ENOMEM;
1764 our_env[n_env++] = x;
1765
1e22b5cd 1766 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1767 return -ENOMEM;
1768 our_env[n_env++] = x;
1769 }
1770
fd63e712
LP
1771 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1772 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1773 * check the database directly. */
ac647978 1774 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1775 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1776 if (!x)
1777 return -ENOMEM;
1778 our_env[n_env++] = x;
1779 }
1780
7cae38c4 1781 if (home) {
b910cc72 1782 x = strjoin("HOME=", home);
7cae38c4
LP
1783 if (!x)
1784 return -ENOMEM;
7bbead1d
LP
1785
1786 path_simplify(x + 5, true);
7cae38c4
LP
1787 our_env[n_env++] = x;
1788 }
1789
1790 if (username) {
b910cc72 1791 x = strjoin("LOGNAME=", username);
7cae38c4
LP
1792 if (!x)
1793 return -ENOMEM;
1794 our_env[n_env++] = x;
1795
b910cc72 1796 x = strjoin("USER=", username);
7cae38c4
LP
1797 if (!x)
1798 return -ENOMEM;
1799 our_env[n_env++] = x;
1800 }
1801
1802 if (shell) {
b910cc72 1803 x = strjoin("SHELL=", shell);
7cae38c4
LP
1804 if (!x)
1805 return -ENOMEM;
7bbead1d
LP
1806
1807 path_simplify(x + 6, true);
7cae38c4
LP
1808 our_env[n_env++] = x;
1809 }
1810
4b58153d
LP
1811 if (!sd_id128_is_null(u->invocation_id)) {
1812 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1813 return -ENOMEM;
1814
1815 our_env[n_env++] = x;
1816 }
1817
6af760f3
LP
1818 if (exec_context_needs_term(c)) {
1819 const char *tty_path, *term = NULL;
1820
1821 tty_path = exec_context_tty_path(c);
1822
1823 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1824 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1825 * passes to PID 1 ends up all the way in the console login shown. */
1826
1827 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1828 term = getenv("TERM");
1829 if (!term)
1830 term = default_term_for_tty(tty_path);
7cae38c4 1831
b910cc72 1832 x = strjoin("TERM=", term);
7cae38c4
LP
1833 if (!x)
1834 return -ENOMEM;
1835 our_env[n_env++] = x;
1836 }
1837
7bce046b
LP
1838 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1839 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1840 return -ENOMEM;
1841
1842 our_env[n_env++] = x;
1843 }
1844
91dd5f7c
LP
1845 if (c->log_namespace) {
1846 x = strjoin("LOG_NAMESPACE=", c->log_namespace);
1847 if (!x)
1848 return -ENOMEM;
1849
1850 our_env[n_env++] = x;
1851 }
1852
fb2042dd
YW
1853 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1854 _cleanup_free_ char *pre = NULL, *joined = NULL;
1855 const char *n;
1856
1857 if (!p->prefix[t])
1858 continue;
1859
1860 if (strv_isempty(c->directories[t].paths))
1861 continue;
1862
1863 n = exec_directory_env_name_to_string(t);
1864 if (!n)
1865 continue;
1866
1867 pre = strjoin(p->prefix[t], "/");
1868 if (!pre)
1869 return -ENOMEM;
1870
1871 joined = strv_join_prefix(c->directories[t].paths, ":", pre);
1872 if (!joined)
1873 return -ENOMEM;
1874
1875 x = strjoin(n, "=", joined);
1876 if (!x)
1877 return -ENOMEM;
1878
1879 our_env[n_env++] = x;
1880 }
1881
7cae38c4 1882 our_env[n_env++] = NULL;
fb2042dd 1883 assert(n_env <= 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4 1884
ae2a15bc 1885 *ret = TAKE_PTR(our_env);
7cae38c4
LP
1886
1887 return 0;
1888}
1889
b4c14404
FB
1890static int build_pass_environment(const ExecContext *c, char ***ret) {
1891 _cleanup_strv_free_ char **pass_env = NULL;
1892 size_t n_env = 0, n_bufsize = 0;
1893 char **i;
1894
1895 STRV_FOREACH(i, c->pass_environment) {
1896 _cleanup_free_ char *x = NULL;
1897 char *v;
1898
1899 v = getenv(*i);
1900 if (!v)
1901 continue;
605405c6 1902 x = strjoin(*i, "=", v);
b4c14404
FB
1903 if (!x)
1904 return -ENOMEM;
00819cc1 1905
b4c14404
FB
1906 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1907 return -ENOMEM;
00819cc1 1908
1cc6c93a 1909 pass_env[n_env++] = TAKE_PTR(x);
b4c14404 1910 pass_env[n_env] = NULL;
b4c14404
FB
1911 }
1912
ae2a15bc 1913 *ret = TAKE_PTR(pass_env);
b4c14404
FB
1914
1915 return 0;
1916}
1917
8b44a3d2
LP
1918static bool exec_needs_mount_namespace(
1919 const ExecContext *context,
1920 const ExecParameters *params,
4657abb5 1921 const ExecRuntime *runtime) {
8b44a3d2
LP
1922
1923 assert(context);
1924 assert(params);
1925
915e6d16
LP
1926 if (context->root_image)
1927 return true;
1928
2a624c36
AP
1929 if (!strv_isempty(context->read_write_paths) ||
1930 !strv_isempty(context->read_only_paths) ||
1931 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1932 return true;
1933
42b1d8e0 1934 if (context->n_bind_mounts > 0)
d2d6c096
LP
1935 return true;
1936
2abd4e38
YW
1937 if (context->n_temporary_filesystems > 0)
1938 return true;
1939
37ed15d7 1940 if (!IN_SET(context->mount_flags, 0, MS_SHARED))
8b44a3d2
LP
1941 return true;
1942
1943 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1944 return true;
1945
8b44a3d2 1946 if (context->private_devices ||
228af36f 1947 context->private_mounts ||
8b44a3d2 1948 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1949 context->protect_home != PROTECT_HOME_NO ||
1950 context->protect_kernel_tunables ||
c575770b 1951 context->protect_kernel_modules ||
94a7b275 1952 context->protect_kernel_logs ||
59eeb84b 1953 context->protect_control_groups)
8b44a3d2
LP
1954 return true;
1955
37c56f89
YW
1956 if (context->root_directory) {
1957 ExecDirectoryType t;
1958
1959 if (context->mount_apivfs)
1960 return true;
1961
1962 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1963 if (!params->prefix[t])
1964 continue;
1965
1966 if (!strv_isempty(context->directories[t].paths))
1967 return true;
1968 }
1969 }
5d997827 1970
42b1d8e0 1971 if (context->dynamic_user &&
b43ee82f 1972 (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
42b1d8e0
YW
1973 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1974 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1975 return true;
1976
91dd5f7c
LP
1977 if (context->log_namespace)
1978 return true;
1979
8b44a3d2
LP
1980 return false;
1981}
1982
5749f855 1983static int setup_private_users(uid_t ouid, gid_t ogid, uid_t uid, gid_t gid) {
d251207d
LP
1984 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1985 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1986 _cleanup_close_ int unshare_ready_fd = -1;
1987 _cleanup_(sigkill_waitp) pid_t pid = 0;
1988 uint64_t c = 1;
d251207d
LP
1989 ssize_t n;
1990 int r;
1991
5749f855
AZ
1992 /* Set up a user namespace and map the original UID/GID (IDs from before any user or group changes, i.e.
1993 * the IDs from the user or system manager(s)) to itself, the selected UID/GID to itself, and everything else to
d251207d
LP
1994 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1995 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1996 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1997 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
5749f855
AZ
1998 * continues execution normally.
1999 * For unprivileged users (i.e. without capabilities), the root to root mapping is excluded. As such, it
2000 * does not need CAP_SETUID to write the single line mapping to itself. */
d251207d 2001
5749f855
AZ
2002 /* Can only set up multiple mappings with CAP_SETUID. */
2003 if (have_effective_cap(CAP_SETUID) && uid != ouid && uid_is_valid(uid))
587ab01b 2004 r = asprintf(&uid_map,
5749f855 2005 UID_FMT " " UID_FMT " 1\n" /* Map $OUID → $OUID */
587ab01b 2006 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
5749f855
AZ
2007 ouid, ouid, uid, uid);
2008 else
2009 r = asprintf(&uid_map,
2010 UID_FMT " " UID_FMT " 1\n", /* Map $OUID → $OUID */
2011 ouid, ouid);
d251207d 2012
5749f855
AZ
2013 if (r < 0)
2014 return -ENOMEM;
2015
2016 /* Can only set up multiple mappings with CAP_SETGID. */
2017 if (have_effective_cap(CAP_SETGID) && gid != ogid && gid_is_valid(gid))
587ab01b 2018 r = asprintf(&gid_map,
5749f855 2019 GID_FMT " " GID_FMT " 1\n" /* Map $OGID → $OGID */
587ab01b 2020 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
5749f855
AZ
2021 ogid, ogid, gid, gid);
2022 else
2023 r = asprintf(&gid_map,
2024 GID_FMT " " GID_FMT " 1\n", /* Map $OGID -> $OGID */
2025 ogid, ogid);
2026
2027 if (r < 0)
2028 return -ENOMEM;
d251207d
LP
2029
2030 /* Create a communication channel so that the parent can tell the child when it finished creating the user
2031 * namespace. */
2032 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
2033 if (unshare_ready_fd < 0)
2034 return -errno;
2035
2036 /* Create a communication channel so that the child can tell the parent a proper error code in case it
2037 * failed. */
2038 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
2039 return -errno;
2040
4c253ed1
LP
2041 r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
2042 if (r < 0)
2043 return r;
2044 if (r == 0) {
d251207d
LP
2045 _cleanup_close_ int fd = -1;
2046 const char *a;
2047 pid_t ppid;
2048
2049 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
2050 * here, after the parent opened its own user namespace. */
2051
2052 ppid = getppid();
2053 errno_pipe[0] = safe_close(errno_pipe[0]);
2054
2055 /* Wait until the parent unshared the user namespace */
2056 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
2057 r = -errno;
2058 goto child_fail;
2059 }
2060
2061 /* Disable the setgroups() system call in the child user namespace, for good. */
2062 a = procfs_file_alloca(ppid, "setgroups");
2063 fd = open(a, O_WRONLY|O_CLOEXEC);
2064 if (fd < 0) {
2065 if (errno != ENOENT) {
2066 r = -errno;
2067 goto child_fail;
2068 }
2069
2070 /* If the file is missing the kernel is too old, let's continue anyway. */
2071 } else {
2072 if (write(fd, "deny\n", 5) < 0) {
2073 r = -errno;
2074 goto child_fail;
2075 }
2076
2077 fd = safe_close(fd);
2078 }
2079
2080 /* First write the GID map */
2081 a = procfs_file_alloca(ppid, "gid_map");
2082 fd = open(a, O_WRONLY|O_CLOEXEC);
2083 if (fd < 0) {
2084 r = -errno;
2085 goto child_fail;
2086 }
2087 if (write(fd, gid_map, strlen(gid_map)) < 0) {
2088 r = -errno;
2089 goto child_fail;
2090 }
2091 fd = safe_close(fd);
2092
2093 /* The write the UID map */
2094 a = procfs_file_alloca(ppid, "uid_map");
2095 fd = open(a, O_WRONLY|O_CLOEXEC);
2096 if (fd < 0) {
2097 r = -errno;
2098 goto child_fail;
2099 }
2100 if (write(fd, uid_map, strlen(uid_map)) < 0) {
2101 r = -errno;
2102 goto child_fail;
2103 }
2104
2105 _exit(EXIT_SUCCESS);
2106
2107 child_fail:
2108 (void) write(errno_pipe[1], &r, sizeof(r));
2109 _exit(EXIT_FAILURE);
2110 }
2111
2112 errno_pipe[1] = safe_close(errno_pipe[1]);
2113
2114 if (unshare(CLONE_NEWUSER) < 0)
2115 return -errno;
2116
2117 /* Let the child know that the namespace is ready now */
2118 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
2119 return -errno;
2120
2121 /* Try to read an error code from the child */
2122 n = read(errno_pipe[0], &r, sizeof(r));
2123 if (n < 0)
2124 return -errno;
2125 if (n == sizeof(r)) { /* an error code was sent to us */
2126 if (r < 0)
2127 return r;
2128 return -EIO;
2129 }
2130 if (n != 0) /* on success we should have read 0 bytes */
2131 return -EIO;
2132
2e87a1fd
LP
2133 r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
2134 pid = 0;
d251207d
LP
2135 if (r < 0)
2136 return r;
2e87a1fd 2137 if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
d251207d
LP
2138 return -EIO;
2139
2140 return 0;
2141}
2142
494d0247
YW
2143static bool exec_directory_is_private(const ExecContext *context, ExecDirectoryType type) {
2144 if (!context->dynamic_user)
2145 return false;
2146
2147 if (type == EXEC_DIRECTORY_CONFIGURATION)
2148 return false;
2149
2150 if (type == EXEC_DIRECTORY_RUNTIME && context->runtime_directory_preserve_mode == EXEC_PRESERVE_NO)
2151 return false;
2152
2153 return true;
2154}
2155
3536f49e 2156static int setup_exec_directory(
07689d5d
LP
2157 const ExecContext *context,
2158 const ExecParameters *params,
2159 uid_t uid,
3536f49e 2160 gid_t gid,
3536f49e
YW
2161 ExecDirectoryType type,
2162 int *exit_status) {
07689d5d 2163
72fd1768 2164 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
2165 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
2166 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
2167 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
2168 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
2169 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
2170 };
07689d5d
LP
2171 char **rt;
2172 int r;
2173
2174 assert(context);
2175 assert(params);
72fd1768 2176 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 2177 assert(exit_status);
07689d5d 2178
3536f49e
YW
2179 if (!params->prefix[type])
2180 return 0;
2181
8679efde 2182 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
2183 if (!uid_is_valid(uid))
2184 uid = 0;
2185 if (!gid_is_valid(gid))
2186 gid = 0;
2187 }
2188
2189 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d 2190 _cleanup_free_ char *p = NULL, *pp = NULL;
07689d5d 2191
edbfeb12 2192 p = path_join(params->prefix[type], *rt);
3536f49e
YW
2193 if (!p) {
2194 r = -ENOMEM;
2195 goto fail;
2196 }
07689d5d 2197
23a7448e
YW
2198 r = mkdir_parents_label(p, 0755);
2199 if (r < 0)
3536f49e 2200 goto fail;
23a7448e 2201
494d0247 2202 if (exec_directory_is_private(context, type)) {
6c9c51e5 2203 _cleanup_free_ char *private_root = NULL;
6c47cd7d 2204
3f5b1508
LP
2205 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that
2206 * case we want to avoid leaving a directory around fully accessible that is owned by
2207 * a dynamic user whose UID is later on reused. To lock this down we use the same
2208 * trick used by container managers to prohibit host users to get access to files of
2209 * the same UID in containers: we place everything inside a directory that has an
2210 * access mode of 0700 and is owned root:root, so that it acts as security boundary
2211 * for unprivileged host code. We then use fs namespacing to make this directory
2212 * permeable for the service itself.
6c47cd7d 2213 *
3f5b1508
LP
2214 * Specifically: for a service which wants a special directory "foo/" we first create
2215 * a directory "private/" with access mode 0700 owned by root:root. Then we place
2216 * "foo" inside of that directory (i.e. "private/foo/"), and make "foo" a symlink to
2217 * "private/foo". This way, privileged host users can access "foo/" as usual, but
2218 * unprivileged host users can't look into it. Inside of the namespace of the unit
2219 * "private/" is replaced by a more liberally accessible tmpfs, into which the host's
2220 * "private/foo/" is mounted under the same name, thus disabling the access boundary
2221 * for the service and making sure it only gets access to the dirs it needs but no
2222 * others. Tricky? Yes, absolutely, but it works!
6c47cd7d 2223 *
3f5b1508
LP
2224 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not
2225 * to be owned by the service itself.
2226 *
2227 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used
2228 * for sharing files or sockets with other services. */
6c47cd7d 2229
edbfeb12 2230 private_root = path_join(params->prefix[type], "private");
6c47cd7d
LP
2231 if (!private_root) {
2232 r = -ENOMEM;
2233 goto fail;
2234 }
2235
2236 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
37c1d5e9 2237 r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
6c47cd7d
LP
2238 if (r < 0)
2239 goto fail;
2240
edbfeb12 2241 pp = path_join(private_root, *rt);
6c47cd7d
LP
2242 if (!pp) {
2243 r = -ENOMEM;
2244 goto fail;
2245 }
2246
2247 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2248 r = mkdir_parents_label(pp, 0755);
2249 if (r < 0)
2250 goto fail;
2251
949befd3
LP
2252 if (is_dir(p, false) > 0 &&
2253 (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
2254
2255 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2256 * it over. Most likely the service has been upgraded from one that didn't use
2257 * DynamicUser=1, to one that does. */
2258
cf52c45d
LP
2259 log_info("Found pre-existing public %s= directory %s, migrating to %s.\n"
2260 "Apparently, service previously had DynamicUser= turned off, and has now turned it on.",
2261 exec_directory_type_to_string(type), p, pp);
2262
949befd3
LP
2263 if (rename(p, pp) < 0) {
2264 r = -errno;
2265 goto fail;
2266 }
2267 } else {
2268 /* Otherwise, create the actual directory for the service */
2269
2270 r = mkdir_label(pp, context->directories[type].mode);
2271 if (r < 0 && r != -EEXIST)
2272 goto fail;
2273 }
6c47cd7d 2274
6c47cd7d 2275 /* And link it up from the original place */
6c9c51e5 2276 r = symlink_idempotent(pp, p, true);
6c47cd7d
LP
2277 if (r < 0)
2278 goto fail;
2279
6c47cd7d 2280 } else {
5c6d40d1
LP
2281 _cleanup_free_ char *target = NULL;
2282
2283 if (type != EXEC_DIRECTORY_CONFIGURATION &&
2284 readlink_and_make_absolute(p, &target) >= 0) {
578dc69f 2285 _cleanup_free_ char *q = NULL, *q_resolved = NULL, *target_resolved = NULL;
5c6d40d1
LP
2286
2287 /* This already exists and is a symlink? Interesting. Maybe it's one created
2193f17c
LP
2288 * by DynamicUser=1 (see above)?
2289 *
2290 * We do this for all directory types except for ConfigurationDirectory=,
2291 * since they all support the private/ symlink logic at least in some
2292 * configurations, see above. */
5c6d40d1 2293
578dc69f
YW
2294 r = chase_symlinks(target, NULL, 0, &target_resolved, NULL);
2295 if (r < 0)
2296 goto fail;
2297
5c6d40d1
LP
2298 q = path_join(params->prefix[type], "private", *rt);
2299 if (!q) {
2300 r = -ENOMEM;
2301 goto fail;
2302 }
2303
578dc69f
YW
2304 /* /var/lib or friends may be symlinks. So, let's chase them also. */
2305 r = chase_symlinks(q, NULL, CHASE_NONEXISTENT, &q_resolved, NULL);
2306 if (r < 0)
2307 goto fail;
2308
2309 if (path_equal(q_resolved, target_resolved)) {
5c6d40d1
LP
2310
2311 /* Hmm, apparently DynamicUser= was once turned on for this service,
2312 * but is no longer. Let's move the directory back up. */
2313
cf52c45d
LP
2314 log_info("Found pre-existing private %s= directory %s, migrating to %s.\n"
2315 "Apparently, service previously had DynamicUser= turned on, and has now turned it off.",
2316 exec_directory_type_to_string(type), q, p);
2317
5c6d40d1
LP
2318 if (unlink(p) < 0) {
2319 r = -errno;
2320 goto fail;
2321 }
2322
2323 if (rename(q, p) < 0) {
2324 r = -errno;
2325 goto fail;
2326 }
2327 }
2328 }
2329
6c47cd7d 2330 r = mkdir_label(p, context->directories[type].mode);
d484580c 2331 if (r < 0) {
d484580c
LP
2332 if (r != -EEXIST)
2333 goto fail;
2334
206e9864
LP
2335 if (type == EXEC_DIRECTORY_CONFIGURATION) {
2336 struct stat st;
2337
2338 /* Don't change the owner/access mode of the configuration directory,
2339 * as in the common case it is not written to by a service, and shall
2340 * not be writable. */
2341
2342 if (stat(p, &st) < 0) {
2343 r = -errno;
2344 goto fail;
2345 }
2346
2347 /* Still complain if the access mode doesn't match */
2348 if (((st.st_mode ^ context->directories[type].mode) & 07777) != 0)
2349 log_warning("%s \'%s\' already exists but the mode is different. "
2350 "(File system: %o %sMode: %o)",
2351 exec_directory_type_to_string(type), *rt,
2352 st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
2353
6cff72eb 2354 continue;
206e9864 2355 }
6cff72eb 2356 }
a1164ae3 2357 }
07689d5d 2358
206e9864 2359 /* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
5238e957 2360 * specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
206e9864
LP
2361 * current UID/GID ownership.) */
2362 r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
2363 if (r < 0)
2364 goto fail;
c71b2eb7 2365
607b358e
LP
2366 /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
2367 * drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
2368 * assignments to exist.*/
2369 r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777);
07689d5d 2370 if (r < 0)
3536f49e 2371 goto fail;
07689d5d
LP
2372 }
2373
2374 return 0;
3536f49e
YW
2375
2376fail:
2377 *exit_status = exit_status_table[type];
3536f49e 2378 return r;
07689d5d
LP
2379}
2380
92b423b9 2381#if ENABLE_SMACK
cefc33ae
LP
2382static int setup_smack(
2383 const ExecContext *context,
2384 const ExecCommand *command) {
2385
cefc33ae
LP
2386 int r;
2387
2388 assert(context);
2389 assert(command);
2390
cefc33ae
LP
2391 if (context->smack_process_label) {
2392 r = mac_smack_apply_pid(0, context->smack_process_label);
2393 if (r < 0)
2394 return r;
2395 }
2396#ifdef SMACK_DEFAULT_PROCESS_LABEL
2397 else {
2398 _cleanup_free_ char *exec_label = NULL;
2399
2400 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2401 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2402 return r;
2403
2404 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2405 if (r < 0)
2406 return r;
2407 }
cefc33ae
LP
2408#endif
2409
2410 return 0;
2411}
92b423b9 2412#endif
cefc33ae 2413
6c47cd7d
LP
2414static int compile_bind_mounts(
2415 const ExecContext *context,
2416 const ExecParameters *params,
2417 BindMount **ret_bind_mounts,
da6053d0 2418 size_t *ret_n_bind_mounts,
6c47cd7d
LP
2419 char ***ret_empty_directories) {
2420
2421 _cleanup_strv_free_ char **empty_directories = NULL;
2422 BindMount *bind_mounts;
da6053d0 2423 size_t n, h = 0, i;
6c47cd7d
LP
2424 ExecDirectoryType t;
2425 int r;
2426
2427 assert(context);
2428 assert(params);
2429 assert(ret_bind_mounts);
2430 assert(ret_n_bind_mounts);
2431 assert(ret_empty_directories);
2432
2433 n = context->n_bind_mounts;
2434 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2435 if (!params->prefix[t])
2436 continue;
2437
2438 n += strv_length(context->directories[t].paths);
2439 }
2440
2441 if (n <= 0) {
2442 *ret_bind_mounts = NULL;
2443 *ret_n_bind_mounts = 0;
2444 *ret_empty_directories = NULL;
2445 return 0;
2446 }
2447
2448 bind_mounts = new(BindMount, n);
2449 if (!bind_mounts)
2450 return -ENOMEM;
2451
a8cabc61 2452 for (i = 0; i < context->n_bind_mounts; i++) {
6c47cd7d
LP
2453 BindMount *item = context->bind_mounts + i;
2454 char *s, *d;
2455
2456 s = strdup(item->source);
2457 if (!s) {
2458 r = -ENOMEM;
2459 goto finish;
2460 }
2461
2462 d = strdup(item->destination);
2463 if (!d) {
2464 free(s);
2465 r = -ENOMEM;
2466 goto finish;
2467 }
2468
2469 bind_mounts[h++] = (BindMount) {
2470 .source = s,
2471 .destination = d,
2472 .read_only = item->read_only,
2473 .recursive = item->recursive,
2474 .ignore_enoent = item->ignore_enoent,
2475 };
2476 }
2477
2478 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2479 char **suffix;
2480
2481 if (!params->prefix[t])
2482 continue;
2483
2484 if (strv_isempty(context->directories[t].paths))
2485 continue;
2486
494d0247 2487 if (exec_directory_is_private(context, t) &&
5609f688 2488 !(context->root_directory || context->root_image)) {
6c47cd7d
LP
2489 char *private_root;
2490
2491 /* So this is for a dynamic user, and we need to make sure the process can access its own
2492 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2493 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2494
657ee2d8 2495 private_root = path_join(params->prefix[t], "private");
6c47cd7d
LP
2496 if (!private_root) {
2497 r = -ENOMEM;
2498 goto finish;
2499 }
2500
2501 r = strv_consume(&empty_directories, private_root);
a635a7ae 2502 if (r < 0)
6c47cd7d 2503 goto finish;
6c47cd7d
LP
2504 }
2505
2506 STRV_FOREACH(suffix, context->directories[t].paths) {
2507 char *s, *d;
2508
494d0247 2509 if (exec_directory_is_private(context, t))
657ee2d8 2510 s = path_join(params->prefix[t], "private", *suffix);
6c47cd7d 2511 else
657ee2d8 2512 s = path_join(params->prefix[t], *suffix);
6c47cd7d
LP
2513 if (!s) {
2514 r = -ENOMEM;
2515 goto finish;
2516 }
2517
494d0247 2518 if (exec_directory_is_private(context, t) &&
5609f688
YW
2519 (context->root_directory || context->root_image))
2520 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2521 * directory is not created on the root directory. So, let's bind-mount the directory
2522 * on the 'non-private' place. */
657ee2d8 2523 d = path_join(params->prefix[t], *suffix);
5609f688
YW
2524 else
2525 d = strdup(s);
6c47cd7d
LP
2526 if (!d) {
2527 free(s);
2528 r = -ENOMEM;
2529 goto finish;
2530 }
2531
2532 bind_mounts[h++] = (BindMount) {
2533 .source = s,
2534 .destination = d,
2535 .read_only = false,
9ce4e4b0 2536 .nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
6c47cd7d
LP
2537 .recursive = true,
2538 .ignore_enoent = false,
2539 };
2540 }
2541 }
2542
2543 assert(h == n);
2544
2545 *ret_bind_mounts = bind_mounts;
2546 *ret_n_bind_mounts = n;
ae2a15bc 2547 *ret_empty_directories = TAKE_PTR(empty_directories);
6c47cd7d
LP
2548
2549 return (int) n;
2550
2551finish:
2552 bind_mount_free_many(bind_mounts, h);
2553 return r;
2554}
2555
4e677599
LP
2556static bool insist_on_sandboxing(
2557 const ExecContext *context,
2558 const char *root_dir,
2559 const char *root_image,
2560 const BindMount *bind_mounts,
2561 size_t n_bind_mounts) {
2562
2563 size_t i;
2564
2565 assert(context);
2566 assert(n_bind_mounts == 0 || bind_mounts);
2567
2568 /* Checks whether we need to insist on fs namespacing. i.e. whether we have settings configured that
86b52a39 2569 * would alter the view on the file system beyond making things read-only or invisible, i.e. would
4e677599
LP
2570 * rearrange stuff in a way we cannot ignore gracefully. */
2571
2572 if (context->n_temporary_filesystems > 0)
2573 return true;
2574
2575 if (root_dir || root_image)
2576 return true;
2577
2578 if (context->dynamic_user)
2579 return true;
2580
2581 /* If there are any bind mounts set that don't map back onto themselves, fs namespacing becomes
2582 * essential. */
2583 for (i = 0; i < n_bind_mounts; i++)
2584 if (!path_equal(bind_mounts[i].source, bind_mounts[i].destination))
2585 return true;
2586
91dd5f7c
LP
2587 if (context->log_namespace)
2588 return true;
2589
4e677599
LP
2590 return false;
2591}
2592
6818c54c 2593static int apply_mount_namespace(
34cf6c43
YW
2594 const Unit *u,
2595 const ExecCommand *command,
6818c54c
LP
2596 const ExecContext *context,
2597 const ExecParameters *params,
7cc5ef5f
ZJS
2598 const ExecRuntime *runtime,
2599 char **error_path) {
6818c54c 2600
7bcef4ef 2601 _cleanup_strv_free_ char **empty_directories = NULL;
93c6bb51 2602 char *tmp = NULL, *var = NULL;
915e6d16 2603 const char *root_dir = NULL, *root_image = NULL;
228af36f 2604 NamespaceInfo ns_info;
165a31c0 2605 bool needs_sandboxing;
6c47cd7d 2606 BindMount *bind_mounts = NULL;
da6053d0 2607 size_t n_bind_mounts = 0;
6818c54c 2608 int r;
93c6bb51 2609
2b3c1b9e
DH
2610 assert(context);
2611
915e6d16
LP
2612 if (params->flags & EXEC_APPLY_CHROOT) {
2613 root_image = context->root_image;
2614
2615 if (!root_image)
2616 root_dir = context->root_directory;
2617 }
93c6bb51 2618
6c47cd7d
LP
2619 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2620 if (r < 0)
2621 return r;
2622
165a31c0 2623 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
ecf63c91
NJ
2624 if (needs_sandboxing) {
2625 /* The runtime struct only contains the parent of the private /tmp,
2626 * which is non-accessible to world users. Inside of it there's a /tmp
2627 * that is sticky, and that's the one we want to use here. */
2628
2629 if (context->private_tmp && runtime) {
2630 if (runtime->tmp_dir)
2631 tmp = strjoina(runtime->tmp_dir, "/tmp");
2632 if (runtime->var_tmp_dir)
2633 var = strjoina(runtime->var_tmp_dir, "/tmp");
2634 }
2635
b5a33299
YW
2636 ns_info = (NamespaceInfo) {
2637 .ignore_protect_paths = false,
2638 .private_dev = context->private_devices,
2639 .protect_control_groups = context->protect_control_groups,
2640 .protect_kernel_tunables = context->protect_kernel_tunables,
2641 .protect_kernel_modules = context->protect_kernel_modules,
94a7b275 2642 .protect_kernel_logs = context->protect_kernel_logs,
aecd5ac6 2643 .protect_hostname = context->protect_hostname,
b5a33299 2644 .mount_apivfs = context->mount_apivfs,
228af36f 2645 .private_mounts = context->private_mounts,
b5a33299 2646 };
ecf63c91 2647 } else if (!context->dynamic_user && root_dir)
228af36f
LP
2648 /*
2649 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2650 * sandbox info, otherwise enforce it, don't ignore protected paths and
2651 * fail if we are enable to apply the sandbox inside the mount namespace.
2652 */
2653 ns_info = (NamespaceInfo) {
2654 .ignore_protect_paths = true,
2655 };
2656 else
2657 ns_info = (NamespaceInfo) {};
b5a33299 2658
37ed15d7
FB
2659 if (context->mount_flags == MS_SHARED)
2660 log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
2661
915e6d16 2662 r = setup_namespace(root_dir, root_image,
7bcef4ef 2663 &ns_info, context->read_write_paths,
165a31c0
LP
2664 needs_sandboxing ? context->read_only_paths : NULL,
2665 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2666 empty_directories,
2667 bind_mounts,
2668 n_bind_mounts,
2abd4e38
YW
2669 context->temporary_filesystems,
2670 context->n_temporary_filesystems,
93c6bb51
DH
2671 tmp,
2672 var,
91dd5f7c 2673 context->log_namespace,
165a31c0
LP
2674 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2675 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16 2676 context->mount_flags,
8d251485 2677 DISSECT_IMAGE_DISCARD_ON_LOOP|DISSECT_IMAGE_RELAX_VAR_CHECK|DISSECT_IMAGE_FSCK,
7cc5ef5f 2678 error_path);
93c6bb51 2679
1beab8b0 2680 /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
5238e957 2681 * that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively
1beab8b0
LP
2682 * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
2683 * completely different execution environment. */
aca835ed 2684 if (r == -ENOANO) {
4e677599
LP
2685 if (insist_on_sandboxing(
2686 context,
2687 root_dir, root_image,
2688 bind_mounts,
2689 n_bind_mounts)) {
2690 log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
2691 "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
2692 n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
2693
2694 r = -EOPNOTSUPP;
2695 } else {
aca835ed 2696 log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
4e677599 2697 r = 0;
aca835ed 2698 }
93c6bb51
DH
2699 }
2700
4e677599 2701 bind_mount_free_many(bind_mounts, n_bind_mounts);
93c6bb51
DH
2702 return r;
2703}
2704
915e6d16
LP
2705static int apply_working_directory(
2706 const ExecContext *context,
2707 const ExecParameters *params,
2708 const char *home,
376fecf6 2709 int *exit_status) {
915e6d16 2710
6732edab 2711 const char *d, *wd;
2b3c1b9e
DH
2712
2713 assert(context);
376fecf6 2714 assert(exit_status);
2b3c1b9e 2715
6732edab
LP
2716 if (context->working_directory_home) {
2717
376fecf6
LP
2718 if (!home) {
2719 *exit_status = EXIT_CHDIR;
6732edab 2720 return -ENXIO;
376fecf6 2721 }
6732edab 2722
2b3c1b9e 2723 wd = home;
6732edab
LP
2724
2725 } else if (context->working_directory)
2b3c1b9e
DH
2726 wd = context->working_directory;
2727 else
2728 wd = "/";
e7f1e7c6 2729
fa97f630 2730 if (params->flags & EXEC_APPLY_CHROOT)
2b3c1b9e 2731 d = wd;
fa97f630 2732 else
3b0e5bb5 2733 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2734
376fecf6
LP
2735 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2736 *exit_status = EXIT_CHDIR;
2b3c1b9e 2737 return -errno;
376fecf6 2738 }
e7f1e7c6
DH
2739
2740 return 0;
2741}
2742
fa97f630
JB
2743static int apply_root_directory(
2744 const ExecContext *context,
2745 const ExecParameters *params,
2746 const bool needs_mount_ns,
2747 int *exit_status) {
2748
2749 assert(context);
2750 assert(exit_status);
2751
2752 if (params->flags & EXEC_APPLY_CHROOT) {
2753 if (!needs_mount_ns && context->root_directory)
2754 if (chroot(context->root_directory) < 0) {
2755 *exit_status = EXIT_CHROOT;
2756 return -errno;
2757 }
2758 }
2759
2760 return 0;
2761}
2762
b1edf445 2763static int setup_keyring(
34cf6c43 2764 const Unit *u,
b1edf445
LP
2765 const ExecContext *context,
2766 const ExecParameters *p,
2767 uid_t uid, gid_t gid) {
2768
74dd6b51 2769 key_serial_t keyring;
e64c2d0b
DJL
2770 int r = 0;
2771 uid_t saved_uid;
2772 gid_t saved_gid;
74dd6b51
LP
2773
2774 assert(u);
b1edf445 2775 assert(context);
74dd6b51
LP
2776 assert(p);
2777
2778 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2779 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2780 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2781 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2782 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2783 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2784
b1edf445
LP
2785 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2786 return 0;
2787
e64c2d0b
DJL
2788 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2789 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2790 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2791 * & group is just as nasty as acquiring a reference to the user keyring. */
2792
2793 saved_uid = getuid();
2794 saved_gid = getgid();
2795
2796 if (gid_is_valid(gid) && gid != saved_gid) {
2797 if (setregid(gid, -1) < 0)
2798 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2799 }
2800
2801 if (uid_is_valid(uid) && uid != saved_uid) {
2802 if (setreuid(uid, -1) < 0) {
2803 r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2804 goto out;
2805 }
2806 }
2807
74dd6b51
LP
2808 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2809 if (keyring == -1) {
2810 if (errno == ENOSYS)
8002fb97 2811 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2812 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2813 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2814 else if (errno == EDQUOT)
8002fb97 2815 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2816 else
e64c2d0b 2817 r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51 2818
e64c2d0b 2819 goto out;
74dd6b51
LP
2820 }
2821
e64c2d0b
DJL
2822 /* When requested link the user keyring into the session keyring. */
2823 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2824
2825 if (keyctl(KEYCTL_LINK,
2826 KEY_SPEC_USER_KEYRING,
2827 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2828 r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
2829 goto out;
2830 }
2831 }
2832
2833 /* Restore uid/gid back */
2834 if (uid_is_valid(uid) && uid != saved_uid) {
2835 if (setreuid(saved_uid, -1) < 0) {
2836 r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2837 goto out;
2838 }
2839 }
2840
2841 if (gid_is_valid(gid) && gid != saved_gid) {
2842 if (setregid(saved_gid, -1) < 0)
2843 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2844 }
2845
2846 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
b3415f5d
LP
2847 if (!sd_id128_is_null(u->invocation_id)) {
2848 key_serial_t key;
2849
2850 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2851 if (key == -1)
8002fb97 2852 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2853 else {
2854 if (keyctl(KEYCTL_SETPERM, key,
2855 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2856 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
e64c2d0b 2857 r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2858 }
2859 }
2860
e64c2d0b
DJL
2861out:
2862 /* Revert back uid & gid for the the last time, and exit */
2863 /* no extra logging, as only the first already reported error matters */
2864 if (getuid() != saved_uid)
2865 (void) setreuid(saved_uid, -1);
b1edf445 2866
e64c2d0b
DJL
2867 if (getgid() != saved_gid)
2868 (void) setregid(saved_gid, -1);
b1edf445 2869
e64c2d0b 2870 return r;
74dd6b51
LP
2871}
2872
3042bbeb 2873static void append_socket_pair(int *array, size_t *n, const int pair[static 2]) {
29206d46
LP
2874 assert(array);
2875 assert(n);
2caa38e9 2876 assert(pair);
29206d46
LP
2877
2878 if (pair[0] >= 0)
2879 array[(*n)++] = pair[0];
2880 if (pair[1] >= 0)
2881 array[(*n)++] = pair[1];
2882}
2883
a34ceba6
LP
2884static int close_remaining_fds(
2885 const ExecParameters *params,
34cf6c43
YW
2886 const ExecRuntime *runtime,
2887 const DynamicCreds *dcreds,
00d9ef85 2888 int user_lookup_fd,
a34ceba6 2889 int socket_fd,
5686391b 2890 int exec_fd,
5b8d1f6b 2891 const int *fds, size_t n_fds) {
a34ceba6 2892
da6053d0 2893 size_t n_dont_close = 0;
00d9ef85 2894 int dont_close[n_fds + 12];
a34ceba6
LP
2895
2896 assert(params);
2897
2898 if (params->stdin_fd >= 0)
2899 dont_close[n_dont_close++] = params->stdin_fd;
2900 if (params->stdout_fd >= 0)
2901 dont_close[n_dont_close++] = params->stdout_fd;
2902 if (params->stderr_fd >= 0)
2903 dont_close[n_dont_close++] = params->stderr_fd;
2904
2905 if (socket_fd >= 0)
2906 dont_close[n_dont_close++] = socket_fd;
5686391b
LP
2907 if (exec_fd >= 0)
2908 dont_close[n_dont_close++] = exec_fd;
a34ceba6
LP
2909 if (n_fds > 0) {
2910 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2911 n_dont_close += n_fds;
2912 }
2913
29206d46
LP
2914 if (runtime)
2915 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2916
2917 if (dcreds) {
2918 if (dcreds->user)
2919 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2920 if (dcreds->group)
2921 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2922 }
2923
00d9ef85
LP
2924 if (user_lookup_fd >= 0)
2925 dont_close[n_dont_close++] = user_lookup_fd;
2926
a34ceba6
LP
2927 return close_all_fds(dont_close, n_dont_close);
2928}
2929
00d9ef85
LP
2930static int send_user_lookup(
2931 Unit *unit,
2932 int user_lookup_fd,
2933 uid_t uid,
2934 gid_t gid) {
2935
2936 assert(unit);
2937
2938 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2939 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2940 * specified. */
2941
2942 if (user_lookup_fd < 0)
2943 return 0;
2944
2945 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2946 return 0;
2947
2948 if (writev(user_lookup_fd,
2949 (struct iovec[]) {
e6a7ec4b
LP
2950 IOVEC_INIT(&uid, sizeof(uid)),
2951 IOVEC_INIT(&gid, sizeof(gid)),
2952 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2953 return -errno;
2954
2955 return 0;
2956}
2957
6732edab
LP
2958static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2959 int r;
2960
2961 assert(c);
2962 assert(home);
2963 assert(buf);
2964
2965 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2966
2967 if (*home)
2968 return 0;
2969
2970 if (!c->working_directory_home)
2971 return 0;
2972
6732edab
LP
2973 r = get_home_dir(buf);
2974 if (r < 0)
2975 return r;
2976
2977 *home = *buf;
2978 return 1;
2979}
2980
da50b85a
LP
2981static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2982 _cleanup_strv_free_ char ** list = NULL;
2983 ExecDirectoryType t;
2984 int r;
2985
2986 assert(c);
2987 assert(p);
2988 assert(ret);
2989
2990 assert(c->dynamic_user);
2991
2992 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2993 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2994 * directories. */
2995
2996 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2997 char **i;
2998
2999 if (t == EXEC_DIRECTORY_CONFIGURATION)
3000 continue;
3001
3002 if (!p->prefix[t])
3003 continue;
3004
3005 STRV_FOREACH(i, c->directories[t].paths) {
3006 char *e;
3007
494d0247 3008 if (exec_directory_is_private(c, t))
657ee2d8 3009 e = path_join(p->prefix[t], "private", *i);
494d0247
YW
3010 else
3011 e = path_join(p->prefix[t], *i);
da50b85a
LP
3012 if (!e)
3013 return -ENOMEM;
3014
3015 r = strv_consume(&list, e);
3016 if (r < 0)
3017 return r;
3018 }
3019 }
3020
ae2a15bc 3021 *ret = TAKE_PTR(list);
da50b85a
LP
3022
3023 return 0;
3024}
3025
34cf6c43
YW
3026static char *exec_command_line(char **argv);
3027
78f93209
LP
3028static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
3029 bool using_subcgroup;
3030 char *p;
3031
3032 assert(params);
3033 assert(ret);
3034
3035 if (!params->cgroup_path)
3036 return -EINVAL;
3037
3038 /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
3039 * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
3040 * processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
3041 * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
3042 * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
3043 * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
3044 * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
3045 * flag, which is only passed for the former statements, not for the latter. */
3046
3047 using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
3048 if (using_subcgroup)
657ee2d8 3049 p = path_join(params->cgroup_path, ".control");
78f93209
LP
3050 else
3051 p = strdup(params->cgroup_path);
3052 if (!p)
3053 return -ENOMEM;
3054
3055 *ret = p;
3056 return using_subcgroup;
3057}
3058
e2b2fb7f
MS
3059static int exec_context_cpu_affinity_from_numa(const ExecContext *c, CPUSet *ret) {
3060 _cleanup_(cpu_set_reset) CPUSet s = {};
3061 int r;
3062
3063 assert(c);
3064 assert(ret);
3065
3066 if (!c->numa_policy.nodes.set) {
3067 log_debug("Can't derive CPU affinity mask from NUMA mask because NUMA mask is not set, ignoring");
3068 return 0;
3069 }
3070
3071 r = numa_to_cpu_set(&c->numa_policy, &s);
3072 if (r < 0)
3073 return r;
3074
3075 cpu_set_reset(ret);
3076
3077 return cpu_set_add_all(ret, &s);
3078}
3079
3080bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c) {
3081 assert(c);
3082
3083 return c->cpu_affinity_from_numa;
3084}
3085
ff0af2a1 3086static int exec_child(
f2341e0a 3087 Unit *unit,
34cf6c43 3088 const ExecCommand *command,
ff0af2a1
LP
3089 const ExecContext *context,
3090 const ExecParameters *params,
3091 ExecRuntime *runtime,
29206d46 3092 DynamicCreds *dcreds,
ff0af2a1 3093 int socket_fd,
2caa38e9 3094 const int named_iofds[static 3],
4c47affc 3095 int *fds,
da6053d0 3096 size_t n_socket_fds,
25b583d7 3097 size_t n_storage_fds,
ff0af2a1 3098 char **files_env,
00d9ef85 3099 int user_lookup_fd,
12145637 3100 int *exit_status) {
d35fbf6b 3101
7ca69792 3102 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **replaced_argv = NULL;
5686391b 3103 int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
4d885bd3
DH
3104 _cleanup_free_ gid_t *supplementary_gids = NULL;
3105 const char *username = NULL, *groupname = NULL;
5686391b 3106 _cleanup_free_ char *home_buffer = NULL;
2b3c1b9e 3107 const char *home = NULL, *shell = NULL;
7ca69792 3108 char **final_argv = NULL;
7bce046b
LP
3109 dev_t journal_stream_dev = 0;
3110 ino_t journal_stream_ino = 0;
5749f855 3111 bool userns_set_up = false;
165a31c0
LP
3112 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
3113 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
3114 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
3115 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 3116#if HAVE_SELINUX
7f59dd35 3117 _cleanup_free_ char *mac_selinux_context_net = NULL;
43b1f709 3118 bool use_selinux = false;
ecfbc84f 3119#endif
f9fa32f0 3120#if ENABLE_SMACK
43b1f709 3121 bool use_smack = false;
ecfbc84f 3122#endif
349cc4a5 3123#if HAVE_APPARMOR
43b1f709 3124 bool use_apparmor = false;
ecfbc84f 3125#endif
5749f855
AZ
3126 uid_t saved_uid = getuid();
3127 gid_t saved_gid = getgid();
fed1e721
LP
3128 uid_t uid = UID_INVALID;
3129 gid_t gid = GID_INVALID;
da6053d0 3130 size_t n_fds;
3536f49e 3131 ExecDirectoryType dt;
165a31c0 3132 int secure_bits;
afb11bf1
DG
3133 _cleanup_free_ gid_t *gids_after_pam = NULL;
3134 int ngids_after_pam = 0;
034c6ed7 3135
f2341e0a 3136 assert(unit);
5cb5a6ff
LP
3137 assert(command);
3138 assert(context);
d35fbf6b 3139 assert(params);
ff0af2a1 3140 assert(exit_status);
d35fbf6b
DM
3141
3142 rename_process_from_path(command->path);
3143
3144 /* We reset exactly these signals, since they are the
3145 * only ones we set to SIG_IGN in the main daemon. All
3146 * others we leave untouched because we set them to
3147 * SIG_DFL or a valid handler initially, both of which
3148 * will be demoted to SIG_DFL. */
ce30c8dc
LP
3149 (void) default_signals(SIGNALS_CRASH_HANDLER,
3150 SIGNALS_IGNORE, -1);
d35fbf6b
DM
3151
3152 if (context->ignore_sigpipe)
ce30c8dc 3153 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 3154
ff0af2a1
LP
3155 r = reset_signal_mask();
3156 if (r < 0) {
3157 *exit_status = EXIT_SIGNAL_MASK;
12145637 3158 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 3159 }
034c6ed7 3160
d35fbf6b
DM
3161 if (params->idle_pipe)
3162 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 3163
2c027c62
LP
3164 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
3165 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
3166 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
3167 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 3168
d35fbf6b 3169 log_forget_fds();
2c027c62 3170 log_set_open_when_needed(true);
4f2d528d 3171
40a80078
LP
3172 /* In case anything used libc syslog(), close this here, too */
3173 closelog();
3174
5686391b
LP
3175 n_fds = n_socket_fds + n_storage_fds;
3176 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
ff0af2a1
LP
3177 if (r < 0) {
3178 *exit_status = EXIT_FDS;
12145637 3179 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
3180 }
3181
d35fbf6b
DM
3182 if (!context->same_pgrp)
3183 if (setsid() < 0) {
ff0af2a1 3184 *exit_status = EXIT_SETSID;
12145637 3185 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 3186 }
9e2f7c11 3187
1e22b5cd 3188 exec_context_tty_reset(context, params);
d35fbf6b 3189
c891efaf 3190 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 3191 const char *vc = params->confirm_spawn;
3b20f877
FB
3192 _cleanup_free_ char *cmdline = NULL;
3193
ee39ca20 3194 cmdline = exec_command_line(command->argv);
3b20f877 3195 if (!cmdline) {
0460aa5c 3196 *exit_status = EXIT_MEMORY;
12145637 3197 return log_oom();
3b20f877 3198 }
d35fbf6b 3199
eedf223a 3200 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
3201 if (r != CONFIRM_EXECUTE) {
3202 if (r == CONFIRM_PRETEND_SUCCESS) {
3203 *exit_status = EXIT_SUCCESS;
3204 return 0;
3205 }
ff0af2a1 3206 *exit_status = EXIT_CONFIRM;
12145637 3207 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 3208 return -ECANCELED;
d35fbf6b
DM
3209 }
3210 }
1a63a750 3211
d521916d
LP
3212 /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
3213 * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
3214 * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
3215 * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
3216 * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
3217 if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
3218 setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
3219 *exit_status = EXIT_MEMORY;
3220 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
3221 }
3222
29206d46 3223 if (context->dynamic_user && dcreds) {
da50b85a 3224 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 3225
d521916d
LP
3226 /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
3227 * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
409093fe
LP
3228 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
3229 *exit_status = EXIT_USER;
12145637 3230 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
3231 }
3232
da50b85a
LP
3233 r = compile_suggested_paths(context, params, &suggested_paths);
3234 if (r < 0) {
3235 *exit_status = EXIT_MEMORY;
3236 return log_oom();
3237 }
3238
3239 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
3240 if (r < 0) {
3241 *exit_status = EXIT_USER;
e2b0cc34
YW
3242 if (r == -EILSEQ) {
3243 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
3244 return -EOPNOTSUPP;
3245 }
12145637 3246 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 3247 }
524daa8c 3248
70dd455c 3249 if (!uid_is_valid(uid)) {
29206d46 3250 *exit_status = EXIT_USER;
12145637 3251 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
3252 return -ESRCH;
3253 }
3254
3255 if (!gid_is_valid(gid)) {
3256 *exit_status = EXIT_USER;
12145637 3257 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
3258 return -ESRCH;
3259 }
5bc7452b 3260
29206d46
LP
3261 if (dcreds->user)
3262 username = dcreds->user->name;
3263
3264 } else {
4d885bd3
DH
3265 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
3266 if (r < 0) {
3267 *exit_status = EXIT_USER;
12145637 3268 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 3269 }
5bc7452b 3270
4d885bd3
DH
3271 r = get_fixed_group(context, &groupname, &gid);
3272 if (r < 0) {
3273 *exit_status = EXIT_GROUP;
12145637 3274 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 3275 }
cdc5d5c5 3276 }
29206d46 3277
cdc5d5c5
DH
3278 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
3279 r = get_supplementary_groups(context, username, groupname, gid,
3280 &supplementary_gids, &ngids);
3281 if (r < 0) {
3282 *exit_status = EXIT_GROUP;
12145637 3283 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 3284 }
5bc7452b 3285
00d9ef85
LP
3286 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
3287 if (r < 0) {
3288 *exit_status = EXIT_USER;
12145637 3289 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
3290 }
3291
3292 user_lookup_fd = safe_close(user_lookup_fd);
3293
6732edab
LP
3294 r = acquire_home(context, uid, &home, &home_buffer);
3295 if (r < 0) {
3296 *exit_status = EXIT_CHDIR;
12145637 3297 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
3298 }
3299
d35fbf6b
DM
3300 /* If a socket is connected to STDIN/STDOUT/STDERR, we
3301 * must sure to drop O_NONBLOCK */
3302 if (socket_fd >= 0)
a34ceba6 3303 (void) fd_nonblock(socket_fd, false);
acbb0225 3304
4c70a4a7
MS
3305 /* Journald will try to look-up our cgroup in order to populate _SYSTEMD_CGROUP and _SYSTEMD_UNIT fields.
3306 * Hence we need to migrate to the target cgroup from init.scope before connecting to journald */
3307 if (params->cgroup_path) {
3308 _cleanup_free_ char *p = NULL;
3309
3310 r = exec_parameters_get_cgroup_path(params, &p);
3311 if (r < 0) {
3312 *exit_status = EXIT_CGROUP;
3313 return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
3314 }
3315
3316 r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
3317 if (r < 0) {
3318 *exit_status = EXIT_CGROUP;
3319 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
3320 }
3321 }
3322
a8d08f39
LP
3323 if (context->network_namespace_path && runtime && runtime->netns_storage_socket[0] >= 0) {
3324 r = open_netns_path(runtime->netns_storage_socket, context->network_namespace_path);
3325 if (r < 0) {
3326 *exit_status = EXIT_NETWORK;
3327 return log_unit_error_errno(unit, r, "Failed to open network namespace path %s: %m", context->network_namespace_path);
3328 }
3329 }
3330
52c239d7 3331 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
3332 if (r < 0) {
3333 *exit_status = EXIT_STDIN;
12145637 3334 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 3335 }
034c6ed7 3336
52c239d7 3337 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3338 if (r < 0) {
3339 *exit_status = EXIT_STDOUT;
12145637 3340 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
3341 }
3342
52c239d7 3343 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3344 if (r < 0) {
3345 *exit_status = EXIT_STDERR;
12145637 3346 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
3347 }
3348
d35fbf6b 3349 if (context->oom_score_adjust_set) {
9f8168eb
LP
3350 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
3351 * prohibit write access to this file, and we shouldn't trip up over that. */
3352 r = set_oom_score_adjust(context->oom_score_adjust);
12145637 3353 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 3354 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 3355 else if (r < 0) {
ff0af2a1 3356 *exit_status = EXIT_OOM_ADJUST;
12145637 3357 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 3358 }
d35fbf6b
DM
3359 }
3360
ad21e542
ZJS
3361 if (context->coredump_filter_set) {
3362 r = set_coredump_filter(context->coredump_filter);
3363 if (ERRNO_IS_PRIVILEGE(r))
3364 log_unit_debug_errno(unit, r, "Failed to adjust coredump_filter, ignoring: %m");
3365 else if (r < 0)
3366 return log_unit_error_errno(unit, r, "Failed to adjust coredump_filter: %m");
3367 }
3368
39090201
DJL
3369 if (context->nice_set) {
3370 r = setpriority_closest(context->nice);
3371 if (r < 0)
3372 return log_unit_error_errno(unit, r, "Failed to set up process scheduling priority (nice level): %m");
3373 }
613b411c 3374
d35fbf6b
DM
3375 if (context->cpu_sched_set) {
3376 struct sched_param param = {
3377 .sched_priority = context->cpu_sched_priority,
3378 };
3379
ff0af2a1
LP
3380 r = sched_setscheduler(0,
3381 context->cpu_sched_policy |
3382 (context->cpu_sched_reset_on_fork ?
3383 SCHED_RESET_ON_FORK : 0),
3384 &param);
3385 if (r < 0) {
3386 *exit_status = EXIT_SETSCHEDULER;
12145637 3387 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 3388 }
d35fbf6b 3389 }
fc9b2a84 3390
e2b2fb7f
MS
3391 if (context->cpu_affinity_from_numa || context->cpu_set.set) {
3392 _cleanup_(cpu_set_reset) CPUSet converted_cpu_set = {};
3393 const CPUSet *cpu_set;
3394
3395 if (context->cpu_affinity_from_numa) {
3396 r = exec_context_cpu_affinity_from_numa(context, &converted_cpu_set);
3397 if (r < 0) {
3398 *exit_status = EXIT_CPUAFFINITY;
3399 return log_unit_error_errno(unit, r, "Failed to derive CPU affinity mask from NUMA mask: %m");
3400 }
3401
3402 cpu_set = &converted_cpu_set;
3403 } else
3404 cpu_set = &context->cpu_set;
3405
3406 if (sched_setaffinity(0, cpu_set->allocated, cpu_set->set) < 0) {
ff0af2a1 3407 *exit_status = EXIT_CPUAFFINITY;
12145637 3408 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7 3409 }
e2b2fb7f 3410 }
034c6ed7 3411
b070c7c0
MS
3412 if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
3413 r = apply_numa_policy(&context->numa_policy);
3414 if (r == -EOPNOTSUPP)
33fe9e3f 3415 log_unit_debug_errno(unit, r, "NUMA support not available, ignoring.");
b070c7c0
MS
3416 else if (r < 0) {
3417 *exit_status = EXIT_NUMA_POLICY;
3418 return log_unit_error_errno(unit, r, "Failed to set NUMA memory policy: %m");
3419 }
3420 }
3421
d35fbf6b
DM
3422 if (context->ioprio_set)
3423 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 3424 *exit_status = EXIT_IOPRIO;
12145637 3425 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 3426 }
da726a4d 3427
d35fbf6b
DM
3428 if (context->timer_slack_nsec != NSEC_INFINITY)
3429 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 3430 *exit_status = EXIT_TIMERSLACK;
12145637 3431 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 3432 }
9eba9da4 3433
21022b9d
LP
3434 if (context->personality != PERSONALITY_INVALID) {
3435 r = safe_personality(context->personality);
3436 if (r < 0) {
ff0af2a1 3437 *exit_status = EXIT_PERSONALITY;
12145637 3438 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 3439 }
21022b9d 3440 }
94f04347 3441
d35fbf6b 3442 if (context->utmp_id)
df0ff127 3443 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 3444 context->tty_path,
023a4f67
LP
3445 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3446 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3447 USER_PROCESS,
6a93917d 3448 username);
d35fbf6b 3449
08f67696 3450 if (uid_is_valid(uid)) {
ff0af2a1
LP
3451 r = chown_terminal(STDIN_FILENO, uid);
3452 if (r < 0) {
3453 *exit_status = EXIT_STDIN;
12145637 3454 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 3455 }
d35fbf6b 3456 }
8e274523 3457
4e1dfa45 3458 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
62b9bb26 3459 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
4e1dfa45 3460 * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
62b9bb26 3461 * touch a single hierarchy too. */
584b8688 3462 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
62b9bb26 3463 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
ff0af2a1
LP
3464 if (r < 0) {
3465 *exit_status = EXIT_CGROUP;
12145637 3466 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 3467 }
d35fbf6b 3468 }
034c6ed7 3469
72fd1768 3470 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 3471 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
3472 if (r < 0)
3473 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 3474 }
94f04347 3475
7bce046b 3476 r = build_environment(
fd63e712 3477 unit,
7bce046b
LP
3478 context,
3479 params,
3480 n_fds,
3481 home,
3482 username,
3483 shell,
3484 journal_stream_dev,
3485 journal_stream_ino,
3486 &our_env);
2065ca69
JW
3487 if (r < 0) {
3488 *exit_status = EXIT_MEMORY;
12145637 3489 return log_oom();
2065ca69
JW
3490 }
3491
3492 r = build_pass_environment(context, &pass_env);
3493 if (r < 0) {
3494 *exit_status = EXIT_MEMORY;
12145637 3495 return log_oom();
2065ca69
JW
3496 }
3497
3498 accum_env = strv_env_merge(5,
3499 params->environment,
3500 our_env,
3501 pass_env,
3502 context->environment,
44e5d006 3503 files_env);
2065ca69
JW
3504 if (!accum_env) {
3505 *exit_status = EXIT_MEMORY;
12145637 3506 return log_oom();
2065ca69 3507 }
1280503b 3508 accum_env = strv_env_clean(accum_env);
2065ca69 3509
096424d1 3510 (void) umask(context->umask);
b213e1c1 3511
b1edf445 3512 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
3513 if (r < 0) {
3514 *exit_status = EXIT_KEYRING;
12145637 3515 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
3516 }
3517
165a31c0 3518 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 3519 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 3520
165a31c0
LP
3521 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3522 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 3523
165a31c0
LP
3524 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3525 if (needs_ambient_hack)
3526 needs_setuid = false;
3527 else
3528 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3529
3530 if (needs_sandboxing) {
7f18ef0a
FK
3531 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3532 * present. The actual MAC context application will happen later, as late as possible, to avoid
3533 * impacting our own code paths. */
3534
349cc4a5 3535#if HAVE_SELINUX
43b1f709 3536 use_selinux = mac_selinux_use();
7f18ef0a 3537#endif
f9fa32f0 3538#if ENABLE_SMACK
43b1f709 3539 use_smack = mac_smack_use();
7f18ef0a 3540#endif
349cc4a5 3541#if HAVE_APPARMOR
43b1f709 3542 use_apparmor = mac_apparmor_use();
7f18ef0a 3543#endif
165a31c0 3544 }
7f18ef0a 3545
ce932d2d
LP
3546 if (needs_sandboxing) {
3547 int which_failed;
3548
3549 /* Let's set the resource limits before we call into PAM, so that pam_limits wins over what
3550 * is set here. (See below.) */
3551
3552 r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
3553 if (r < 0) {
3554 *exit_status = EXIT_LIMITS;
3555 return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
3556 }
3557 }
3558
165a31c0 3559 if (needs_setuid) {
ce932d2d
LP
3560
3561 /* Let's call into PAM after we set up our own idea of resource limits to that pam_limits
3562 * wins here. (See above.) */
3563
165a31c0
LP
3564 if (context->pam_name && username) {
3565 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3566 if (r < 0) {
3567 *exit_status = EXIT_PAM;
12145637 3568 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0 3569 }
afb11bf1
DG
3570
3571 ngids_after_pam = getgroups_alloc(&gids_after_pam);
3572 if (ngids_after_pam < 0) {
3573 *exit_status = EXIT_MEMORY;
3574 return log_unit_error_errno(unit, ngids_after_pam, "Failed to obtain groups after setting up PAM: %m");
3575 }
165a31c0 3576 }
b213e1c1 3577 }
ac45f971 3578
5749f855
AZ
3579 if (needs_sandboxing) {
3580#if HAVE_SELINUX
3581 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
3582 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3583 if (r < 0) {
3584 *exit_status = EXIT_SELINUX_CONTEXT;
3585 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
3586 }
3587 }
3588#endif
3589
3590 /* If we're unprivileged, set up the user namespace first to enable use of the other namespaces.
3591 * Users with CAP_SYS_ADMIN can set up user namespaces last because they will be able to
3592 * set up the all of the other namespaces (i.e. network, mount, UTS) without a user namespace. */
3593 if (context->private_users && !have_effective_cap(CAP_SYS_ADMIN)) {
3594 userns_set_up = true;
3595 r = setup_private_users(saved_uid, saved_gid, uid, gid);
3596 if (r < 0) {
3597 *exit_status = EXIT_USER;
3598 return log_unit_error_errno(unit, r, "Failed to set up user namespacing for unprivileged user: %m");
3599 }
3600 }
3601 }
3602
a8d08f39
LP
3603 if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
3604
6e2d7c4f
MS
3605 if (ns_type_supported(NAMESPACE_NET)) {
3606 r = setup_netns(runtime->netns_storage_socket);
ee00d1e9
ZJS
3607 if (r == -EPERM)
3608 log_unit_warning_errno(unit, r,
3609 "PrivateNetwork=yes is configured, but network namespace setup failed, ignoring: %m");
3610 else if (r < 0) {
6e2d7c4f
MS
3611 *exit_status = EXIT_NETWORK;
3612 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3613 }
a8d08f39
LP
3614 } else if (context->network_namespace_path) {
3615 *exit_status = EXIT_NETWORK;
ee00d1e9
ZJS
3616 return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP),
3617 "NetworkNamespacePath= is not supported, refusing.");
6e2d7c4f
MS
3618 } else
3619 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3620 }
169c1bda 3621
ee818b89 3622 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3623 if (needs_mount_namespace) {
7cc5ef5f
ZJS
3624 _cleanup_free_ char *error_path = NULL;
3625
3626 r = apply_mount_namespace(unit, command, context, params, runtime, &error_path);
3fbe8dbe
LP
3627 if (r < 0) {
3628 *exit_status = EXIT_NAMESPACE;
7cc5ef5f
ZJS
3629 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m",
3630 error_path ? ": " : "", strempty(error_path));
3fbe8dbe 3631 }
d35fbf6b 3632 }
81a2b7ce 3633
daf8f72b
LP
3634 if (needs_sandboxing) {
3635 r = apply_protect_hostname(unit, context, exit_status);
3636 if (r < 0)
3637 return r;
aecd5ac6
TM
3638 }
3639
5749f855
AZ
3640 /* Drop groups as early as possible.
3641 * This needs to be done after PrivateDevices=y setup as device nodes should be owned by the host's root.
3642 * For non-root in a userns, devices will be owned by the user/group before the group change, and nobody. */
165a31c0 3643 if (needs_setuid) {
afb11bf1
DG
3644 _cleanup_free_ gid_t *gids_to_enforce = NULL;
3645 int ngids_to_enforce = 0;
3646
3647 ngids_to_enforce = merge_gid_lists(supplementary_gids,
3648 ngids,
3649 gids_after_pam,
3650 ngids_after_pam,
3651 &gids_to_enforce);
3652 if (ngids_to_enforce < 0) {
3653 *exit_status = EXIT_MEMORY;
3654 return log_unit_error_errno(unit,
3655 ngids_to_enforce,
3656 "Failed to merge group lists. Group membership might be incorrect: %m");
3657 }
3658
3659 r = enforce_groups(gid, gids_to_enforce, ngids_to_enforce);
096424d1
LP
3660 if (r < 0) {
3661 *exit_status = EXIT_GROUP;
12145637 3662 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3663 }
165a31c0 3664 }
096424d1 3665
5749f855
AZ
3666 /* If the user namespace was not set up above, try to do it now.
3667 * It's preferred to set up the user namespace later (after all other namespaces) so as not to be
3668 * restricted by rules pertaining to combining user namspaces with other namespaces (e.g. in the
3669 * case of mount namespaces being less privileged when the mount point list is copied from a
3670 * different user namespace). */
9008e1ac 3671
5749f855
AZ
3672 if (needs_sandboxing && context->private_users && !userns_set_up) {
3673 r = setup_private_users(saved_uid, saved_gid, uid, gid);
3674 if (r < 0) {
3675 *exit_status = EXIT_USER;
3676 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
d251207d
LP
3677 }
3678 }
3679
165a31c0 3680 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
5686391b
LP
3681 * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
3682 * however if we have it as we want to keep it open until the final execve(). */
3683
3684 if (params->exec_fd >= 0) {
3685 exec_fd = params->exec_fd;
3686
3687 if (exec_fd < 3 + (int) n_fds) {
3688 int moved_fd;
3689
3690 /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
3691 * process we are about to execute. */
3692
3693 moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
3694 if (moved_fd < 0) {
3695 *exit_status = EXIT_FDS;
3696 return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
3697 }
3698
3699 safe_close(exec_fd);
3700 exec_fd = moved_fd;
3701 } else {
3702 /* This fd should be FD_CLOEXEC already, but let's make sure. */
3703 r = fd_cloexec(exec_fd, true);
3704 if (r < 0) {
3705 *exit_status = EXIT_FDS;
3706 return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
3707 }
3708 }
3709
3710 fds_with_exec_fd = newa(int, n_fds + 1);
7e8d494b 3711 memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
5686391b
LP
3712 fds_with_exec_fd[n_fds] = exec_fd;
3713 n_fds_with_exec_fd = n_fds + 1;
3714 } else {
3715 fds_with_exec_fd = fds;
3716 n_fds_with_exec_fd = n_fds;
3717 }
3718
3719 r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
ff0af2a1
LP
3720 if (r >= 0)
3721 r = shift_fds(fds, n_fds);
3722 if (r >= 0)
25b583d7 3723 r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
ff0af2a1
LP
3724 if (r < 0) {
3725 *exit_status = EXIT_FDS;
12145637 3726 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3727 }
e66cf1a3 3728
5686391b
LP
3729 /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
3730 * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
3731 * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
3732 * came this far. */
3733
165a31c0 3734 secure_bits = context->secure_bits;
e66cf1a3 3735
165a31c0
LP
3736 if (needs_sandboxing) {
3737 uint64_t bset;
e66cf1a3 3738
ce932d2d
LP
3739 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly
3740 * requested. (Note this is placed after the general resource limit initialization, see
3741 * above, in order to take precedence.) */
f4170c67
LP
3742 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3743 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3744 *exit_status = EXIT_LIMITS;
12145637 3745 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3746 }
3747 }
3748
37ac2744
JB
3749#if ENABLE_SMACK
3750 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3751 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3752 if (use_smack) {
3753 r = setup_smack(context, command);
3754 if (r < 0) {
3755 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3756 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3757 }
3758 }
3759#endif
3760
165a31c0
LP
3761 bset = context->capability_bounding_set;
3762 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3763 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3764 * instead of us doing that */
3765 if (needs_ambient_hack)
3766 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3767 (UINT64_C(1) << CAP_SETUID) |
3768 (UINT64_C(1) << CAP_SETGID);
3769
3770 if (!cap_test_all(bset)) {
3771 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3772 if (r < 0) {
3773 *exit_status = EXIT_CAPABILITIES;
12145637 3774 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3775 }
4c2630eb 3776 }
3b8bddde 3777
755d4b67
IP
3778 /* This is done before enforce_user, but ambient set
3779 * does not survive over setresuid() if keep_caps is not set. */
943800f4 3780 if (!needs_ambient_hack) {
755d4b67
IP
3781 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3782 if (r < 0) {
3783 *exit_status = EXIT_CAPABILITIES;
12145637 3784 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3785 }
755d4b67 3786 }
165a31c0 3787 }
755d4b67 3788
fa97f630
JB
3789 /* chroot to root directory first, before we lose the ability to chroot */
3790 r = apply_root_directory(context, params, needs_mount_namespace, exit_status);
3791 if (r < 0)
3792 return log_unit_error_errno(unit, r, "Chrooting to the requested root directory failed: %m");
3793
165a31c0 3794 if (needs_setuid) {
08f67696 3795 if (uid_is_valid(uid)) {
ff0af2a1
LP
3796 r = enforce_user(context, uid);
3797 if (r < 0) {
3798 *exit_status = EXIT_USER;
12145637 3799 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3800 }
165a31c0
LP
3801
3802 if (!needs_ambient_hack &&
3803 context->capability_ambient_set != 0) {
755d4b67
IP
3804
3805 /* Fix the ambient capabilities after user change. */
3806 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3807 if (r < 0) {
3808 *exit_status = EXIT_CAPABILITIES;
12145637 3809 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3810 }
3811
3812 /* If we were asked to change user and ambient capabilities
3813 * were requested, we had to add keep-caps to the securebits
3814 * so that we would maintain the inherited capability set
3815 * through the setresuid(). Make sure that the bit is added
3816 * also to the context secure_bits so that we don't try to
3817 * drop the bit away next. */
3818
7f508f2c 3819 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3820 }
5b6319dc 3821 }
165a31c0 3822 }
d35fbf6b 3823
56ef8db9
JB
3824 /* Apply working directory here, because the working directory might be on NFS and only the user running
3825 * this service might have the correct privilege to change to the working directory */
fa97f630 3826 r = apply_working_directory(context, params, home, exit_status);
56ef8db9
JB
3827 if (r < 0)
3828 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
3829
165a31c0 3830 if (needs_sandboxing) {
37ac2744 3831 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
5cd9cd35
LP
3832 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3833 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3834 * are restricted. */
3835
349cc4a5 3836#if HAVE_SELINUX
43b1f709 3837 if (use_selinux) {
5cd9cd35
LP
3838 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3839
3840 if (exec_context) {
3841 r = setexeccon(exec_context);
3842 if (r < 0) {
3843 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3844 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3845 }
3846 }
3847 }
3848#endif
3849
349cc4a5 3850#if HAVE_APPARMOR
43b1f709 3851 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3852 r = aa_change_onexec(context->apparmor_profile);
3853 if (r < 0 && !context->apparmor_profile_ignore) {
3854 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3855 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3856 }
3857 }
3858#endif
3859
165a31c0
LP
3860 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3861 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3862 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3863 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3864 *exit_status = EXIT_SECUREBITS;
12145637 3865 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3866 }
5b6319dc 3867
59eeb84b 3868 if (context_has_no_new_privileges(context))
d35fbf6b 3869 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3870 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3871 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3872 }
3873
349cc4a5 3874#if HAVE_SECCOMP
469830d1
LP
3875 r = apply_address_families(unit, context);
3876 if (r < 0) {
3877 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3878 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3879 }
04aa0cb9 3880
469830d1
LP
3881 r = apply_memory_deny_write_execute(unit, context);
3882 if (r < 0) {
3883 *exit_status = EXIT_SECCOMP;
12145637 3884 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3885 }
f4170c67 3886
469830d1
LP
3887 r = apply_restrict_realtime(unit, context);
3888 if (r < 0) {
3889 *exit_status = EXIT_SECCOMP;
12145637 3890 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3891 }
3892
f69567cb
LP
3893 r = apply_restrict_suid_sgid(unit, context);
3894 if (r < 0) {
3895 *exit_status = EXIT_SECCOMP;
3896 return log_unit_error_errno(unit, r, "Failed to apply SUID/SGID restrictions: %m");
3897 }
3898
add00535
LP
3899 r = apply_restrict_namespaces(unit, context);
3900 if (r < 0) {
3901 *exit_status = EXIT_SECCOMP;
12145637 3902 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3903 }
3904
469830d1
LP
3905 r = apply_protect_sysctl(unit, context);
3906 if (r < 0) {
3907 *exit_status = EXIT_SECCOMP;
12145637 3908 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3909 }
3910
469830d1
LP
3911 r = apply_protect_kernel_modules(unit, context);
3912 if (r < 0) {
3913 *exit_status = EXIT_SECCOMP;
12145637 3914 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3915 }
3916
84703040
KK
3917 r = apply_protect_kernel_logs(unit, context);
3918 if (r < 0) {
3919 *exit_status = EXIT_SECCOMP;
3920 return log_unit_error_errno(unit, r, "Failed to apply kernel log restrictions: %m");
3921 }
3922
fc64760d
KK
3923 r = apply_protect_clock(unit, context);
3924 if (r < 0) {
3925 *exit_status = EXIT_SECCOMP;
3926 return log_unit_error_errno(unit, r, "Failed to apply clock restrictions: %m");
3927 }
3928
469830d1
LP
3929 r = apply_private_devices(unit, context);
3930 if (r < 0) {
3931 *exit_status = EXIT_SECCOMP;
12145637 3932 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3933 }
3934
3935 r = apply_syscall_archs(unit, context);
3936 if (r < 0) {
3937 *exit_status = EXIT_SECCOMP;
12145637 3938 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3939 }
3940
78e864e5
TM
3941 r = apply_lock_personality(unit, context);
3942 if (r < 0) {
3943 *exit_status = EXIT_SECCOMP;
12145637 3944 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3945 }
3946
5cd9cd35
LP
3947 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3948 * by the filter as little as possible. */
165a31c0 3949 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3950 if (r < 0) {
3951 *exit_status = EXIT_SECCOMP;
12145637 3952 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3953 }
3954#endif
d35fbf6b 3955 }
034c6ed7 3956
00819cc1
LP
3957 if (!strv_isempty(context->unset_environment)) {
3958 char **ee = NULL;
3959
3960 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3961 if (!ee) {
3962 *exit_status = EXIT_MEMORY;
12145637 3963 return log_oom();
00819cc1
LP
3964 }
3965
130d3d22 3966 strv_free_and_replace(accum_env, ee);
00819cc1
LP
3967 }
3968
7ca69792
AZ
3969 if (!FLAGS_SET(command->flags, EXEC_COMMAND_NO_ENV_EXPAND)) {
3970 replaced_argv = replace_env_argv(command->argv, accum_env);
3971 if (!replaced_argv) {
3972 *exit_status = EXIT_MEMORY;
3973 return log_oom();
3974 }
3975 final_argv = replaced_argv;
3976 } else
3977 final_argv = command->argv;
034c6ed7 3978
f1d34068 3979 if (DEBUG_LOGGING) {
d35fbf6b 3980 _cleanup_free_ char *line;
81a2b7ce 3981
d35fbf6b 3982 line = exec_command_line(final_argv);
a1230ff9 3983 if (line)
f2341e0a 3984 log_struct(LOG_DEBUG,
f2341e0a
LP
3985 "EXECUTABLE=%s", command->path,
3986 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3987 LOG_UNIT_ID(unit),
a1230ff9 3988 LOG_UNIT_INVOCATION_ID(unit));
d35fbf6b 3989 }
dd305ec9 3990
5686391b
LP
3991 if (exec_fd >= 0) {
3992 uint8_t hot = 1;
3993
3994 /* We have finished with all our initializations. Let's now let the manager know that. From this point
3995 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
3996
3997 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3998 *exit_status = EXIT_EXEC;
3999 return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
4000 }
4001 }
4002
2065ca69 4003 execve(command->path, final_argv, accum_env);
5686391b
LP
4004 r = -errno;
4005
4006 if (exec_fd >= 0) {
4007 uint8_t hot = 0;
4008
4009 /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
4010 * that POLLHUP on it no longer means execve() succeeded. */
4011
4012 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
4013 *exit_status = EXIT_EXEC;
4014 return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
4015 }
4016 }
12145637 4017
5686391b
LP
4018 if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
4019 log_struct_errno(LOG_INFO, r,
12145637
LP
4020 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
4021 LOG_UNIT_ID(unit),
4022 LOG_UNIT_INVOCATION_ID(unit),
4023 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
4024 command->path),
a1230ff9 4025 "EXECUTABLE=%s", command->path);
12145637
LP
4026 return 0;
4027 }
4028
ff0af2a1 4029 *exit_status = EXIT_EXEC;
5686391b 4030 return log_unit_error_errno(unit, r, "Failed to execute command: %m");
d35fbf6b 4031}
81a2b7ce 4032
34cf6c43 4033static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
2caa38e9 4034static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[static 3]);
34cf6c43 4035
f2341e0a
LP
4036int exec_spawn(Unit *unit,
4037 ExecCommand *command,
d35fbf6b
DM
4038 const ExecContext *context,
4039 const ExecParameters *params,
4040 ExecRuntime *runtime,
29206d46 4041 DynamicCreds *dcreds,
d35fbf6b 4042 pid_t *ret) {
8351ceae 4043
ee39ca20 4044 int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
78f93209 4045 _cleanup_free_ char *subcgroup_path = NULL;
d35fbf6b 4046 _cleanup_strv_free_ char **files_env = NULL;
da6053d0 4047 size_t n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1 4048 _cleanup_free_ char *line = NULL;
d35fbf6b 4049 pid_t pid;
8351ceae 4050
f2341e0a 4051 assert(unit);
d35fbf6b
DM
4052 assert(command);
4053 assert(context);
4054 assert(ret);
4055 assert(params);
25b583d7 4056 assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
4298d0b5 4057
d35fbf6b
DM
4058 if (context->std_input == EXEC_INPUT_SOCKET ||
4059 context->std_output == EXEC_OUTPUT_SOCKET ||
4060 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 4061
4c47affc 4062 if (params->n_socket_fds > 1) {
f2341e0a 4063 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 4064 return -EINVAL;
ff0af2a1 4065 }
eef65bf3 4066
4c47affc 4067 if (params->n_socket_fds == 0) {
488ab41c
AA
4068 log_unit_error(unit, "Got no socket.");
4069 return -EINVAL;
4070 }
4071
d35fbf6b
DM
4072 socket_fd = params->fds[0];
4073 } else {
4074 socket_fd = -1;
4075 fds = params->fds;
9b141911 4076 n_socket_fds = params->n_socket_fds;
25b583d7 4077 n_storage_fds = params->n_storage_fds;
d35fbf6b 4078 }
94f04347 4079
34cf6c43 4080 r = exec_context_named_iofds(context, params, named_iofds);
52c239d7
LB
4081 if (r < 0)
4082 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
4083
f2341e0a 4084 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 4085 if (r < 0)
f2341e0a 4086 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 4087
ee39ca20 4088 line = exec_command_line(command->argv);
d35fbf6b
DM
4089 if (!line)
4090 return log_oom();
fab56fc5 4091
f2341e0a 4092 log_struct(LOG_DEBUG,
f2341e0a
LP
4093 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
4094 "EXECUTABLE=%s", command->path,
ba360bb0 4095 LOG_UNIT_ID(unit),
a1230ff9 4096 LOG_UNIT_INVOCATION_ID(unit));
12145637 4097
78f93209
LP
4098 if (params->cgroup_path) {
4099 r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
4100 if (r < 0)
4101 return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
4102 if (r > 0) { /* We are using a child cgroup */
4103 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
4104 if (r < 0)
4105 return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
4106 }
4107 }
4108
d35fbf6b
DM
4109 pid = fork();
4110 if (pid < 0)
74129a12 4111 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
4112
4113 if (pid == 0) {
12145637 4114 int exit_status = EXIT_SUCCESS;
ff0af2a1 4115
f2341e0a
LP
4116 r = exec_child(unit,
4117 command,
ff0af2a1
LP
4118 context,
4119 params,
4120 runtime,
29206d46 4121 dcreds,
ff0af2a1 4122 socket_fd,
52c239d7 4123 named_iofds,
4c47affc 4124 fds,
9b141911 4125 n_socket_fds,
25b583d7 4126 n_storage_fds,
ff0af2a1 4127 files_env,
00d9ef85 4128 unit->manager->user_lookup_fds[1],
12145637
LP
4129 &exit_status);
4130
e1714f02
ZJS
4131 if (r < 0) {
4132 const char *status =
4133 exit_status_to_string(exit_status,
e04ed6db 4134 EXIT_STATUS_LIBC | EXIT_STATUS_SYSTEMD);
e1714f02 4135
12145637
LP
4136 log_struct_errno(LOG_ERR, r,
4137 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
4138 LOG_UNIT_ID(unit),
4139 LOG_UNIT_INVOCATION_ID(unit),
4140 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
e1714f02 4141 status, command->path),
a1230ff9 4142 "EXECUTABLE=%s", command->path);
e1714f02 4143 }
4c2630eb 4144
ff0af2a1 4145 _exit(exit_status);
034c6ed7
LP
4146 }
4147
f2341e0a 4148 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 4149
78f93209
LP
4150 /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
4151 * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
4152 * process will be killed too). */
4153 if (subcgroup_path)
4154 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
2da3263a 4155
b58b4116 4156 exec_status_start(&command->exec_status, pid);
9fb86720 4157
034c6ed7 4158 *ret = pid;
5cb5a6ff
LP
4159 return 0;
4160}
4161
034c6ed7 4162void exec_context_init(ExecContext *c) {
3536f49e
YW
4163 ExecDirectoryType i;
4164
034c6ed7
LP
4165 assert(c);
4166
4c12626c 4167 c->umask = 0022;
9eba9da4 4168 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 4169 c->cpu_sched_policy = SCHED_OTHER;
071830ff 4170 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 4171 c->syslog_level_prefix = true;
353e12c2 4172 c->ignore_sigpipe = true;
3a43da28 4173 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 4174 c->personality = PERSONALITY_INVALID;
72fd1768 4175 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 4176 c->directories[i].mode = 0755;
12213aed 4177 c->timeout_clean_usec = USEC_INFINITY;
a103496c 4178 c->capability_bounding_set = CAP_ALL;
aa9d574d
YW
4179 assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
4180 c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
d3070fbd 4181 c->log_level_max = -1;
b070c7c0 4182 numa_policy_reset(&c->numa_policy);
034c6ed7
LP
4183}
4184
613b411c 4185void exec_context_done(ExecContext *c) {
3536f49e 4186 ExecDirectoryType i;
d3070fbd 4187 size_t l;
5cb5a6ff
LP
4188
4189 assert(c);
4190
6796073e
LP
4191 c->environment = strv_free(c->environment);
4192 c->environment_files = strv_free(c->environment_files);
b4c14404 4193 c->pass_environment = strv_free(c->pass_environment);
00819cc1 4194 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 4195
31ce987c 4196 rlimit_free_all(c->rlimit);
034c6ed7 4197
2038c3f5 4198 for (l = 0; l < 3; l++) {
52c239d7 4199 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
2038c3f5
LP
4200 c->stdio_file[l] = mfree(c->stdio_file[l]);
4201 }
52c239d7 4202
a1e58e8e
LP
4203 c->working_directory = mfree(c->working_directory);
4204 c->root_directory = mfree(c->root_directory);
915e6d16 4205 c->root_image = mfree(c->root_image);
a1e58e8e
LP
4206 c->tty_path = mfree(c->tty_path);
4207 c->syslog_identifier = mfree(c->syslog_identifier);
4208 c->user = mfree(c->user);
4209 c->group = mfree(c->group);
034c6ed7 4210
6796073e 4211 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 4212
a1e58e8e 4213 c->pam_name = mfree(c->pam_name);
5b6319dc 4214
2a624c36
AP
4215 c->read_only_paths = strv_free(c->read_only_paths);
4216 c->read_write_paths = strv_free(c->read_write_paths);
4217 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 4218
d2d6c096 4219 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
8e06d57c
YW
4220 c->bind_mounts = NULL;
4221 c->n_bind_mounts = 0;
2abd4e38
YW
4222 temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
4223 c->temporary_filesystems = NULL;
4224 c->n_temporary_filesystems = 0;
d2d6c096 4225
0985c7c4 4226 cpu_set_reset(&c->cpu_set);
b070c7c0 4227 numa_policy_reset(&c->numa_policy);
86a3475b 4228
a1e58e8e
LP
4229 c->utmp_id = mfree(c->utmp_id);
4230 c->selinux_context = mfree(c->selinux_context);
4231 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 4232 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 4233
8cfa775f 4234 c->syscall_filter = hashmap_free(c->syscall_filter);
525d3cc7
LP
4235 c->syscall_archs = set_free(c->syscall_archs);
4236 c->address_families = set_free(c->address_families);
e66cf1a3 4237
72fd1768 4238 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 4239 c->directories[i].paths = strv_free(c->directories[i].paths);
d3070fbd
LP
4240
4241 c->log_level_max = -1;
4242
4243 exec_context_free_log_extra_fields(c);
08f3be7a 4244
5ac1530e
ZJS
4245 c->log_ratelimit_interval_usec = 0;
4246 c->log_ratelimit_burst = 0;
90fc172e 4247
08f3be7a
LP
4248 c->stdin_data = mfree(c->stdin_data);
4249 c->stdin_data_size = 0;
a8d08f39
LP
4250
4251 c->network_namespace_path = mfree(c->network_namespace_path);
91dd5f7c
LP
4252
4253 c->log_namespace = mfree(c->log_namespace);
e66cf1a3
LP
4254}
4255
34cf6c43 4256int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
e66cf1a3
LP
4257 char **i;
4258
4259 assert(c);
4260
4261 if (!runtime_prefix)
4262 return 0;
4263
3536f49e 4264 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
4265 _cleanup_free_ char *p;
4266
494d0247
YW
4267 if (exec_directory_is_private(c, EXEC_DIRECTORY_RUNTIME))
4268 p = path_join(runtime_prefix, "private", *i);
4269 else
4270 p = path_join(runtime_prefix, *i);
e66cf1a3
LP
4271 if (!p)
4272 return -ENOMEM;
4273
7bc4bf4a
LP
4274 /* We execute this synchronously, since we need to be sure this is gone when we start the
4275 * service next. */
c6878637 4276 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
4277 }
4278
4279 return 0;
5cb5a6ff
LP
4280}
4281
34cf6c43 4282static void exec_command_done(ExecCommand *c) {
43d0fcbd
LP
4283 assert(c);
4284
a1e58e8e 4285 c->path = mfree(c->path);
6796073e 4286 c->argv = strv_free(c->argv);
43d0fcbd
LP
4287}
4288
da6053d0
LP
4289void exec_command_done_array(ExecCommand *c, size_t n) {
4290 size_t i;
43d0fcbd
LP
4291
4292 for (i = 0; i < n; i++)
4293 exec_command_done(c+i);
4294}
4295
f1acf85a 4296ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
4297 ExecCommand *i;
4298
4299 while ((i = c)) {
71fda00f 4300 LIST_REMOVE(command, c, i);
43d0fcbd 4301 exec_command_done(i);
5cb5a6ff
LP
4302 free(i);
4303 }
f1acf85a
ZJS
4304
4305 return NULL;
5cb5a6ff
LP
4306}
4307
da6053d0
LP
4308void exec_command_free_array(ExecCommand **c, size_t n) {
4309 size_t i;
034c6ed7 4310
f1acf85a
ZJS
4311 for (i = 0; i < n; i++)
4312 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
4313}
4314
6a1d4d9f
LP
4315void exec_command_reset_status_array(ExecCommand *c, size_t n) {
4316 size_t i;
4317
4318 for (i = 0; i < n; i++)
4319 exec_status_reset(&c[i].exec_status);
4320}
4321
4322void exec_command_reset_status_list_array(ExecCommand **c, size_t n) {
4323 size_t i;
4324
4325 for (i = 0; i < n; i++) {
4326 ExecCommand *z;
4327
4328 LIST_FOREACH(command, z, c[i])
4329 exec_status_reset(&z->exec_status);
4330 }
4331}
4332
039f0e70 4333typedef struct InvalidEnvInfo {
34cf6c43 4334 const Unit *unit;
039f0e70
LP
4335 const char *path;
4336} InvalidEnvInfo;
4337
4338static void invalid_env(const char *p, void *userdata) {
4339 InvalidEnvInfo *info = userdata;
4340
f2341e0a 4341 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
4342}
4343
52c239d7
LB
4344const char* exec_context_fdname(const ExecContext *c, int fd_index) {
4345 assert(c);
4346
4347 switch (fd_index) {
5073ff6b 4348
52c239d7
LB
4349 case STDIN_FILENO:
4350 if (c->std_input != EXEC_INPUT_NAMED_FD)
4351 return NULL;
5073ff6b 4352
52c239d7 4353 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
5073ff6b 4354
52c239d7
LB
4355 case STDOUT_FILENO:
4356 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
4357 return NULL;
5073ff6b 4358
52c239d7 4359 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
5073ff6b 4360
52c239d7
LB
4361 case STDERR_FILENO:
4362 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
4363 return NULL;
5073ff6b 4364
52c239d7 4365 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
5073ff6b 4366
52c239d7
LB
4367 default:
4368 return NULL;
4369 }
4370}
4371
2caa38e9
LP
4372static int exec_context_named_iofds(
4373 const ExecContext *c,
4374 const ExecParameters *p,
4375 int named_iofds[static 3]) {
4376
da6053d0 4377 size_t i, targets;
56fbd561 4378 const char* stdio_fdname[3];
da6053d0 4379 size_t n_fds;
52c239d7
LB
4380
4381 assert(c);
4382 assert(p);
2caa38e9 4383 assert(named_iofds);
52c239d7
LB
4384
4385 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
4386 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
4387 (c->std_error == EXEC_OUTPUT_NAMED_FD);
4388
4389 for (i = 0; i < 3; i++)
4390 stdio_fdname[i] = exec_context_fdname(c, i);
4391
4c47affc
FB
4392 n_fds = p->n_storage_fds + p->n_socket_fds;
4393
4394 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
4395 if (named_iofds[STDIN_FILENO] < 0 &&
4396 c->std_input == EXEC_INPUT_NAMED_FD &&
4397 stdio_fdname[STDIN_FILENO] &&
4398 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
4399
52c239d7
LB
4400 named_iofds[STDIN_FILENO] = p->fds[i];
4401 targets--;
56fbd561
ZJS
4402
4403 } else if (named_iofds[STDOUT_FILENO] < 0 &&
4404 c->std_output == EXEC_OUTPUT_NAMED_FD &&
4405 stdio_fdname[STDOUT_FILENO] &&
4406 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
4407
52c239d7
LB
4408 named_iofds[STDOUT_FILENO] = p->fds[i];
4409 targets--;
56fbd561
ZJS
4410
4411 } else if (named_iofds[STDERR_FILENO] < 0 &&
4412 c->std_error == EXEC_OUTPUT_NAMED_FD &&
4413 stdio_fdname[STDERR_FILENO] &&
4414 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
4415
52c239d7
LB
4416 named_iofds[STDERR_FILENO] = p->fds[i];
4417 targets--;
4418 }
4419
56fbd561 4420 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
4421}
4422
34cf6c43 4423static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
4424 char **i, **r = NULL;
4425
4426 assert(c);
4427 assert(l);
4428
4429 STRV_FOREACH(i, c->environment_files) {
4430 char *fn;
52511fae
ZJS
4431 int k;
4432 unsigned n;
8c7be95e
LP
4433 bool ignore = false;
4434 char **p;
7fd1b19b 4435 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
4436
4437 fn = *i;
4438
4439 if (fn[0] == '-') {
4440 ignore = true;
313cefa1 4441 fn++;
8c7be95e
LP
4442 }
4443
4444 if (!path_is_absolute(fn)) {
8c7be95e
LP
4445 if (ignore)
4446 continue;
4447
4448 strv_free(r);
4449 return -EINVAL;
4450 }
4451
2bef10ab 4452 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
4453 k = safe_glob(fn, 0, &pglob);
4454 if (k < 0) {
2bef10ab
PL
4455 if (ignore)
4456 continue;
8c7be95e 4457
2bef10ab 4458 strv_free(r);
d8c92e8b 4459 return k;
2bef10ab 4460 }
8c7be95e 4461
d8c92e8b
ZJS
4462 /* When we don't match anything, -ENOENT should be returned */
4463 assert(pglob.gl_pathc > 0);
4464
4465 for (n = 0; n < pglob.gl_pathc; n++) {
aa8fbc74 4466 k = load_env_file(NULL, pglob.gl_pathv[n], &p);
2bef10ab
PL
4467 if (k < 0) {
4468 if (ignore)
4469 continue;
8c7be95e 4470
2bef10ab 4471 strv_free(r);
2bef10ab 4472 return k;
e9c1ea9d 4473 }
ebc05a09 4474 /* Log invalid environment variables with filename */
039f0e70
LP
4475 if (p) {
4476 InvalidEnvInfo info = {
f2341e0a 4477 .unit = unit,
039f0e70
LP
4478 .path = pglob.gl_pathv[n]
4479 };
4480
4481 p = strv_env_clean_with_callback(p, invalid_env, &info);
4482 }
8c7be95e 4483
234519ae 4484 if (!r)
2bef10ab
PL
4485 r = p;
4486 else {
4487 char **m;
8c7be95e 4488
2bef10ab
PL
4489 m = strv_env_merge(2, r, p);
4490 strv_free(r);
4491 strv_free(p);
c84a9488 4492 if (!m)
2bef10ab 4493 return -ENOMEM;
2bef10ab
PL
4494
4495 r = m;
4496 }
8c7be95e
LP
4497 }
4498 }
4499
4500 *l = r;
4501
4502 return 0;
4503}
4504
6ac8fdc9 4505static bool tty_may_match_dev_console(const char *tty) {
7b912648 4506 _cleanup_free_ char *resolved = NULL;
6ac8fdc9 4507
1e22b5cd
LP
4508 if (!tty)
4509 return true;
4510
a119ec7c 4511 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
4512
4513 /* trivial identity? */
4514 if (streq(tty, "console"))
4515 return true;
4516
7b912648
LP
4517 if (resolve_dev_console(&resolved) < 0)
4518 return true; /* if we could not resolve, assume it may */
6ac8fdc9
MS
4519
4520 /* "tty0" means the active VC, so it may be the same sometimes */
955f1c85 4521 return path_equal(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
4522}
4523
6c0ae739
LP
4524static bool exec_context_may_touch_tty(const ExecContext *ec) {
4525 assert(ec);
1e22b5cd 4526
6c0ae739 4527 return ec->tty_reset ||
1e22b5cd
LP
4528 ec->tty_vhangup ||
4529 ec->tty_vt_disallocate ||
6ac8fdc9
MS
4530 is_terminal_input(ec->std_input) ||
4531 is_terminal_output(ec->std_output) ||
6c0ae739
LP
4532 is_terminal_output(ec->std_error);
4533}
4534
4535bool exec_context_may_touch_console(const ExecContext *ec) {
4536
4537 return exec_context_may_touch_tty(ec) &&
1e22b5cd 4538 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
4539}
4540
15ae422b
LP
4541static void strv_fprintf(FILE *f, char **l) {
4542 char **g;
4543
4544 assert(f);
4545
4546 STRV_FOREACH(g, l)
4547 fprintf(f, " %s", *g);
4548}
4549
34cf6c43 4550void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
12213aed 4551 char **e, **d, buf_clean[FORMAT_TIMESPAN_MAX];
d3070fbd 4552 ExecDirectoryType dt;
94f04347 4553 unsigned i;
add00535 4554 int r;
9eba9da4 4555
5cb5a6ff
LP
4556 assert(c);
4557 assert(f);
4558
4ad49000 4559 prefix = strempty(prefix);
5cb5a6ff
LP
4560
4561 fprintf(f,
94f04347
LP
4562 "%sUMask: %04o\n"
4563 "%sWorkingDirectory: %s\n"
451a074f 4564 "%sRootDirectory: %s\n"
15ae422b 4565 "%sNonBlocking: %s\n"
64747e2d 4566 "%sPrivateTmp: %s\n"
7f112f50 4567 "%sPrivateDevices: %s\n"
59eeb84b 4568 "%sProtectKernelTunables: %s\n"
e66a2f65 4569 "%sProtectKernelModules: %s\n"
84703040 4570 "%sProtectKernelLogs: %s\n"
fc64760d 4571 "%sProtectClock: %s\n"
59eeb84b 4572 "%sProtectControlGroups: %s\n"
d251207d
LP
4573 "%sPrivateNetwork: %s\n"
4574 "%sPrivateUsers: %s\n"
1b8689f9
LP
4575 "%sProtectHome: %s\n"
4576 "%sProtectSystem: %s\n"
5d997827 4577 "%sMountAPIVFS: %s\n"
f3e43635 4578 "%sIgnoreSIGPIPE: %s\n"
f4170c67 4579 "%sMemoryDenyWriteExecute: %s\n"
b1edf445 4580 "%sRestrictRealtime: %s\n"
f69567cb 4581 "%sRestrictSUIDSGID: %s\n"
aecd5ac6
TM
4582 "%sKeyringMode: %s\n"
4583 "%sProtectHostname: %s\n",
5cb5a6ff 4584 prefix, c->umask,
9eba9da4 4585 prefix, c->working_directory ? c->working_directory : "/",
451a074f 4586 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 4587 prefix, yes_no(c->non_blocking),
64747e2d 4588 prefix, yes_no(c->private_tmp),
7f112f50 4589 prefix, yes_no(c->private_devices),
59eeb84b 4590 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 4591 prefix, yes_no(c->protect_kernel_modules),
84703040 4592 prefix, yes_no(c->protect_kernel_logs),
fc64760d 4593 prefix, yes_no(c->protect_clock),
59eeb84b 4594 prefix, yes_no(c->protect_control_groups),
d251207d
LP
4595 prefix, yes_no(c->private_network),
4596 prefix, yes_no(c->private_users),
1b8689f9
LP
4597 prefix, protect_home_to_string(c->protect_home),
4598 prefix, protect_system_to_string(c->protect_system),
5d997827 4599 prefix, yes_no(c->mount_apivfs),
f3e43635 4600 prefix, yes_no(c->ignore_sigpipe),
f4170c67 4601 prefix, yes_no(c->memory_deny_write_execute),
b1edf445 4602 prefix, yes_no(c->restrict_realtime),
f69567cb 4603 prefix, yes_no(c->restrict_suid_sgid),
aecd5ac6
TM
4604 prefix, exec_keyring_mode_to_string(c->keyring_mode),
4605 prefix, yes_no(c->protect_hostname));
fb33a393 4606
915e6d16
LP
4607 if (c->root_image)
4608 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
4609
8c7be95e
LP
4610 STRV_FOREACH(e, c->environment)
4611 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
4612
4613 STRV_FOREACH(e, c->environment_files)
4614 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 4615
b4c14404
FB
4616 STRV_FOREACH(e, c->pass_environment)
4617 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
4618
00819cc1
LP
4619 STRV_FOREACH(e, c->unset_environment)
4620 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
4621
53f47dfc
YW
4622 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
4623
72fd1768 4624 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
4625 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
4626
4627 STRV_FOREACH(d, c->directories[dt].paths)
4628 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
4629 }
c2bbd90b 4630
12213aed
YW
4631 fprintf(f,
4632 "%sTimeoutCleanSec: %s\n",
4633 prefix, format_timespan(buf_clean, sizeof(buf_clean), c->timeout_clean_usec, USEC_PER_SEC));
4634
fb33a393
LP
4635 if (c->nice_set)
4636 fprintf(f,
4637 "%sNice: %i\n",
4638 prefix, c->nice);
4639
dd6c17b1 4640 if (c->oom_score_adjust_set)
fb33a393 4641 fprintf(f,
dd6c17b1
LP
4642 "%sOOMScoreAdjust: %i\n",
4643 prefix, c->oom_score_adjust);
9eba9da4 4644
ad21e542
ZJS
4645 if (c->coredump_filter_set)
4646 fprintf(f,
4647 "%sCoredumpFilter: 0x%"PRIx64"\n",
4648 prefix, c->coredump_filter);
4649
94f04347 4650 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d 4651 if (c->rlimit[i]) {
4c3a2b84 4652 fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
3c11da9d 4653 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
4c3a2b84 4654 fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n",
3c11da9d
EV
4655 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
4656 }
94f04347 4657
f8b69d1d 4658 if (c->ioprio_set) {
1756a011 4659 _cleanup_free_ char *class_str = NULL;
f8b69d1d 4660
837df140
YW
4661 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
4662 if (r >= 0)
4663 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
4664
4665 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 4666 }
94f04347 4667
f8b69d1d 4668 if (c->cpu_sched_set) {
1756a011 4669 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 4670
837df140
YW
4671 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4672 if (r >= 0)
4673 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4674
94f04347 4675 fprintf(f,
38b48754
LP
4676 "%sCPUSchedulingPriority: %i\n"
4677 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
4678 prefix, c->cpu_sched_priority,
4679 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 4680 }
94f04347 4681
0985c7c4 4682 if (c->cpu_set.set) {
e7fca352
MS
4683 _cleanup_free_ char *affinity = NULL;
4684
4685 affinity = cpu_set_to_range_string(&c->cpu_set);
4686 fprintf(f, "%sCPUAffinity: %s\n", prefix, affinity);
94f04347
LP
4687 }
4688
b070c7c0
MS
4689 if (mpol_is_valid(numa_policy_get_type(&c->numa_policy))) {
4690 _cleanup_free_ char *nodes = NULL;
4691
4692 nodes = cpu_set_to_range_string(&c->numa_policy.nodes);
4693 fprintf(f, "%sNUMAPolicy: %s\n", prefix, mpol_to_string(numa_policy_get_type(&c->numa_policy)));
4694 fprintf(f, "%sNUMAMask: %s\n", prefix, strnull(nodes));
4695 }
4696
3a43da28 4697 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 4698 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
4699
4700 fprintf(f,
80876c20
LP
4701 "%sStandardInput: %s\n"
4702 "%sStandardOutput: %s\n"
4703 "%sStandardError: %s\n",
4704 prefix, exec_input_to_string(c->std_input),
4705 prefix, exec_output_to_string(c->std_output),
4706 prefix, exec_output_to_string(c->std_error));
4707
befc4a80
LP
4708 if (c->std_input == EXEC_INPUT_NAMED_FD)
4709 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4710 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4711 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4712 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4713 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4714
4715 if (c->std_input == EXEC_INPUT_FILE)
4716 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4717 if (c->std_output == EXEC_OUTPUT_FILE)
4718 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
566b7d23
ZD
4719 if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
4720 fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
befc4a80
LP
4721 if (c->std_error == EXEC_OUTPUT_FILE)
4722 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
566b7d23
ZD
4723 if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
4724 fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
befc4a80 4725
80876c20
LP
4726 if (c->tty_path)
4727 fprintf(f,
6ea832a2
LP
4728 "%sTTYPath: %s\n"
4729 "%sTTYReset: %s\n"
4730 "%sTTYVHangup: %s\n"
4731 "%sTTYVTDisallocate: %s\n",
4732 prefix, c->tty_path,
4733 prefix, yes_no(c->tty_reset),
4734 prefix, yes_no(c->tty_vhangup),
4735 prefix, yes_no(c->tty_vt_disallocate));
94f04347 4736
9f6444eb
LP
4737 if (IN_SET(c->std_output,
4738 EXEC_OUTPUT_SYSLOG,
4739 EXEC_OUTPUT_KMSG,
4740 EXEC_OUTPUT_JOURNAL,
4741 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4742 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4743 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4744 IN_SET(c->std_error,
4745 EXEC_OUTPUT_SYSLOG,
4746 EXEC_OUTPUT_KMSG,
4747 EXEC_OUTPUT_JOURNAL,
4748 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4749 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4750 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 4751
5ce70e5b 4752 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 4753
837df140
YW
4754 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4755 if (r >= 0)
4756 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 4757
837df140
YW
4758 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4759 if (r >= 0)
4760 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 4761 }
94f04347 4762
d3070fbd
LP
4763 if (c->log_level_max >= 0) {
4764 _cleanup_free_ char *t = NULL;
4765
4766 (void) log_level_to_string_alloc(c->log_level_max, &t);
4767
4768 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4769 }
4770
5ac1530e 4771 if (c->log_ratelimit_interval_usec > 0) {
90fc172e
AZ
4772 char buf_timespan[FORMAT_TIMESPAN_MAX];
4773
4774 fprintf(f,
4775 "%sLogRateLimitIntervalSec: %s\n",
5ac1530e 4776 prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_ratelimit_interval_usec, USEC_PER_SEC));
90fc172e
AZ
4777 }
4778
5ac1530e
ZJS
4779 if (c->log_ratelimit_burst > 0)
4780 fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_ratelimit_burst);
90fc172e 4781
d3070fbd
LP
4782 if (c->n_log_extra_fields > 0) {
4783 size_t j;
4784
4785 for (j = 0; j < c->n_log_extra_fields; j++) {
4786 fprintf(f, "%sLogExtraFields: ", prefix);
4787 fwrite(c->log_extra_fields[j].iov_base,
4788 1, c->log_extra_fields[j].iov_len,
4789 f);
4790 fputc('\n', f);
4791 }
4792 }
4793
91dd5f7c
LP
4794 if (c->log_namespace)
4795 fprintf(f, "%sLogNamespace: %s\n", prefix, c->log_namespace);
4796
07d46372
YW
4797 if (c->secure_bits) {
4798 _cleanup_free_ char *str = NULL;
4799
4800 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4801 if (r >= 0)
4802 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4803 }
94f04347 4804
a103496c 4805 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4806 _cleanup_free_ char *str = NULL;
94f04347 4807
dd1f5bd0
YW
4808 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4809 if (r >= 0)
4810 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4811 }
4812
4813 if (c->capability_ambient_set != 0) {
dd1f5bd0 4814 _cleanup_free_ char *str = NULL;
755d4b67 4815
dd1f5bd0
YW
4816 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4817 if (r >= 0)
4818 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4819 }
4820
4821 if (c->user)
f2d3769a 4822 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4823 if (c->group)
f2d3769a 4824 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4825
29206d46
LP
4826 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4827
ac6e8be6 4828 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4829 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4830 strv_fprintf(f, c->supplementary_groups);
4831 fputs("\n", f);
4832 }
94f04347 4833
5b6319dc 4834 if (c->pam_name)
f2d3769a 4835 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4836
58629001 4837 if (!strv_isempty(c->read_write_paths)) {
2a624c36
AP
4838 fprintf(f, "%sReadWritePaths:", prefix);
4839 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4840 fputs("\n", f);
4841 }
4842
58629001 4843 if (!strv_isempty(c->read_only_paths)) {
2a624c36
AP
4844 fprintf(f, "%sReadOnlyPaths:", prefix);
4845 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4846 fputs("\n", f);
4847 }
94f04347 4848
58629001 4849 if (!strv_isempty(c->inaccessible_paths)) {
2a624c36
AP
4850 fprintf(f, "%sInaccessiblePaths:", prefix);
4851 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4852 fputs("\n", f);
4853 }
2e22afe9 4854
d2d6c096 4855 if (c->n_bind_mounts > 0)
4ca763a9
YW
4856 for (i = 0; i < c->n_bind_mounts; i++)
4857 fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
d2d6c096 4858 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4ca763a9 4859 c->bind_mounts[i].ignore_enoent ? "-": "",
d2d6c096
LP
4860 c->bind_mounts[i].source,
4861 c->bind_mounts[i].destination,
4862 c->bind_mounts[i].recursive ? "rbind" : "norbind");
d2d6c096 4863
2abd4e38
YW
4864 if (c->n_temporary_filesystems > 0)
4865 for (i = 0; i < c->n_temporary_filesystems; i++) {
4866 TemporaryFileSystem *t = c->temporary_filesystems + i;
4867
4868 fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
4869 t->path,
4870 isempty(t->options) ? "" : ":",
4871 strempty(t->options));
4872 }
4873
169c1bda
LP
4874 if (c->utmp_id)
4875 fprintf(f,
4876 "%sUtmpIdentifier: %s\n",
4877 prefix, c->utmp_id);
7b52a628
MS
4878
4879 if (c->selinux_context)
4880 fprintf(f,
5f8640fb
LP
4881 "%sSELinuxContext: %s%s\n",
4882 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4883
80c21aea
WC
4884 if (c->apparmor_profile)
4885 fprintf(f,
4886 "%sAppArmorProfile: %s%s\n",
4887 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4888
4889 if (c->smack_process_label)
4890 fprintf(f,
4891 "%sSmackProcessLabel: %s%s\n",
4892 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4893
050f7277 4894 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4895 fprintf(f,
4896 "%sPersonality: %s\n",
4897 prefix, strna(personality_to_string(c->personality)));
4898
78e864e5
TM
4899 fprintf(f,
4900 "%sLockPersonality: %s\n",
4901 prefix, yes_no(c->lock_personality));
4902
17df7223 4903 if (c->syscall_filter) {
349cc4a5 4904#if HAVE_SECCOMP
17df7223 4905 Iterator j;
8cfa775f 4906 void *id, *val;
17df7223 4907 bool first = true;
351a19b1 4908#endif
17df7223
LP
4909
4910 fprintf(f,
57183d11 4911 "%sSystemCallFilter: ",
17df7223
LP
4912 prefix);
4913
4914 if (!c->syscall_whitelist)
4915 fputc('~', f);
4916
349cc4a5 4917#if HAVE_SECCOMP
8cfa775f 4918 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
17df7223 4919 _cleanup_free_ char *name = NULL;
8cfa775f
YW
4920 const char *errno_name = NULL;
4921 int num = PTR_TO_INT(val);
17df7223
LP
4922
4923 if (first)
4924 first = false;
4925 else
4926 fputc(' ', f);
4927
57183d11 4928 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223 4929 fputs(strna(name), f);
8cfa775f
YW
4930
4931 if (num >= 0) {
4932 errno_name = errno_to_name(num);
4933 if (errno_name)
4934 fprintf(f, ":%s", errno_name);
4935 else
4936 fprintf(f, ":%d", num);
4937 }
17df7223 4938 }
351a19b1 4939#endif
17df7223
LP
4940
4941 fputc('\n', f);
4942 }
4943
57183d11 4944 if (c->syscall_archs) {
349cc4a5 4945#if HAVE_SECCOMP
57183d11
LP
4946 Iterator j;
4947 void *id;
4948#endif
4949
4950 fprintf(f,
4951 "%sSystemCallArchitectures:",
4952 prefix);
4953
349cc4a5 4954#if HAVE_SECCOMP
57183d11
LP
4955 SET_FOREACH(id, c->syscall_archs, j)
4956 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4957#endif
4958 fputc('\n', f);
4959 }
4960
add00535
LP
4961 if (exec_context_restrict_namespaces_set(c)) {
4962 _cleanup_free_ char *s = NULL;
4963
86c2a9f1 4964 r = namespace_flags_to_string(c->restrict_namespaces, &s);
add00535
LP
4965 if (r >= 0)
4966 fprintf(f, "%sRestrictNamespaces: %s\n",
dd0395b5 4967 prefix, strna(s));
add00535
LP
4968 }
4969
a8d08f39
LP
4970 if (c->network_namespace_path)
4971 fprintf(f,
4972 "%sNetworkNamespacePath: %s\n",
4973 prefix, c->network_namespace_path);
4974
3df90f24
YW
4975 if (c->syscall_errno > 0) {
4976 const char *errno_name;
4977
4978 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4979
4980 errno_name = errno_to_name(c->syscall_errno);
4981 if (errno_name)
4982 fprintf(f, "%s\n", errno_name);
4983 else
4984 fprintf(f, "%d\n", c->syscall_errno);
4985 }
5cb5a6ff
LP
4986}
4987
34cf6c43 4988bool exec_context_maintains_privileges(const ExecContext *c) {
a931ad47
LP
4989 assert(c);
4990
61233823 4991 /* Returns true if the process forked off would run under
a931ad47
LP
4992 * an unchanged UID or as root. */
4993
4994 if (!c->user)
4995 return true;
4996
4997 if (streq(c->user, "root") || streq(c->user, "0"))
4998 return true;
4999
5000 return false;
5001}
5002
34cf6c43 5003int exec_context_get_effective_ioprio(const ExecContext *c) {
7f452159
LP
5004 int p;
5005
5006 assert(c);
5007
5008 if (c->ioprio_set)
5009 return c->ioprio;
5010
5011 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
5012 if (p < 0)
5013 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
5014
5015 return p;
5016}
5017
d3070fbd
LP
5018void exec_context_free_log_extra_fields(ExecContext *c) {
5019 size_t l;
5020
5021 assert(c);
5022
5023 for (l = 0; l < c->n_log_extra_fields; l++)
5024 free(c->log_extra_fields[l].iov_base);
5025 c->log_extra_fields = mfree(c->log_extra_fields);
5026 c->n_log_extra_fields = 0;
5027}
5028
6f765baf
LP
5029void exec_context_revert_tty(ExecContext *c) {
5030 int r;
5031
5032 assert(c);
5033
5034 /* First, reset the TTY (possibly kicking everybody else from the TTY) */
5035 exec_context_tty_reset(c, NULL);
5036
5037 /* And then undo what chown_terminal() did earlier. Note that we only do this if we have a path
5038 * configured. If the TTY was passed to us as file descriptor we assume the TTY is opened and managed
5039 * by whoever passed it to us and thus knows better when and how to chmod()/chown() it back. */
5040
5041 if (exec_context_may_touch_tty(c)) {
5042 const char *path;
5043
5044 path = exec_context_tty_path(c);
5045 if (path) {
5046 r = chmod_and_chown(path, TTY_MODE, 0, TTY_GID);
5047 if (r < 0 && r != -ENOENT)
5048 log_warning_errno(r, "Failed to reset TTY ownership/access mode of %s, ignoring: %m", path);
5049 }
5050 }
5051}
5052
4c2f5842
LP
5053int exec_context_get_clean_directories(
5054 ExecContext *c,
5055 char **prefix,
5056 ExecCleanMask mask,
5057 char ***ret) {
5058
5059 _cleanup_strv_free_ char **l = NULL;
5060 ExecDirectoryType t;
5061 int r;
5062
5063 assert(c);
5064 assert(prefix);
5065 assert(ret);
5066
5067 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
5068 char **i;
5069
5070 if (!FLAGS_SET(mask, 1U << t))
5071 continue;
5072
5073 if (!prefix[t])
5074 continue;
5075
5076 STRV_FOREACH(i, c->directories[t].paths) {
5077 char *j;
5078
5079 j = path_join(prefix[t], *i);
5080 if (!j)
5081 return -ENOMEM;
5082
5083 r = strv_consume(&l, j);
5084 if (r < 0)
5085 return r;
7f622a19
YW
5086
5087 /* Also remove private directories unconditionally. */
5088 if (t != EXEC_DIRECTORY_CONFIGURATION) {
5089 j = path_join(prefix[t], "private", *i);
5090 if (!j)
5091 return -ENOMEM;
5092
5093 r = strv_consume(&l, j);
5094 if (r < 0)
5095 return r;
5096 }
4c2f5842
LP
5097 }
5098 }
5099
5100 *ret = TAKE_PTR(l);
5101 return 0;
5102}
5103
5104int exec_context_get_clean_mask(ExecContext *c, ExecCleanMask *ret) {
5105 ExecCleanMask mask = 0;
5106
5107 assert(c);
5108 assert(ret);
5109
5110 for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++)
5111 if (!strv_isempty(c->directories[t].paths))
5112 mask |= 1U << t;
5113
5114 *ret = mask;
5115 return 0;
5116}
5117
b58b4116 5118void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 5119 assert(s);
5cb5a6ff 5120
2ed26ed0
LP
5121 *s = (ExecStatus) {
5122 .pid = pid,
5123 };
5124
b58b4116
LP
5125 dual_timestamp_get(&s->start_timestamp);
5126}
5127
34cf6c43 5128void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
5129 assert(s);
5130
2ed26ed0
LP
5131 if (s->pid != pid) {
5132 *s = (ExecStatus) {
5133 .pid = pid,
5134 };
5135 }
b58b4116 5136
63983207 5137 dual_timestamp_get(&s->exit_timestamp);
9fb86720 5138
034c6ed7
LP
5139 s->code = code;
5140 s->status = status;
169c1bda 5141
6f765baf
LP
5142 if (context && context->utmp_id)
5143 (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
9fb86720
LP
5144}
5145
6a1d4d9f
LP
5146void exec_status_reset(ExecStatus *s) {
5147 assert(s);
5148
5149 *s = (ExecStatus) {};
5150}
5151
34cf6c43 5152void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
9fb86720
LP
5153 char buf[FORMAT_TIMESTAMP_MAX];
5154
5155 assert(s);
5156 assert(f);
5157
9fb86720
LP
5158 if (s->pid <= 0)
5159 return;
5160
4c940960
LP
5161 prefix = strempty(prefix);
5162
9fb86720 5163 fprintf(f,
ccd06097
ZJS
5164 "%sPID: "PID_FMT"\n",
5165 prefix, s->pid);
9fb86720 5166
af9d16e1 5167 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
5168 fprintf(f,
5169 "%sStart Timestamp: %s\n",
63983207 5170 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 5171
af9d16e1 5172 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
5173 fprintf(f,
5174 "%sExit Timestamp: %s\n"
5175 "%sExit Code: %s\n"
5176 "%sExit Status: %i\n",
63983207 5177 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
5178 prefix, sigchld_code_to_string(s->code),
5179 prefix, s->status);
5cb5a6ff 5180}
44d8db9e 5181
34cf6c43 5182static char *exec_command_line(char **argv) {
44d8db9e
LP
5183 size_t k;
5184 char *n, *p, **a;
5185 bool first = true;
5186
9e2f7c11 5187 assert(argv);
44d8db9e 5188
9164977d 5189 k = 1;
9e2f7c11 5190 STRV_FOREACH(a, argv)
44d8db9e
LP
5191 k += strlen(*a)+3;
5192
5cd9cd35
LP
5193 n = new(char, k);
5194 if (!n)
44d8db9e
LP
5195 return NULL;
5196
5197 p = n;
9e2f7c11 5198 STRV_FOREACH(a, argv) {
44d8db9e
LP
5199
5200 if (!first)
5201 *(p++) = ' ';
5202 else
5203 first = false;
5204
5205 if (strpbrk(*a, WHITESPACE)) {
5206 *(p++) = '\'';
5207 p = stpcpy(p, *a);
5208 *(p++) = '\'';
5209 } else
5210 p = stpcpy(p, *a);
5211
5212 }
5213
9164977d
LP
5214 *p = 0;
5215
44d8db9e
LP
5216 /* FIXME: this doesn't really handle arguments that have
5217 * spaces and ticks in them */
5218
5219 return n;
5220}
5221
34cf6c43 5222static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 5223 _cleanup_free_ char *cmd = NULL;
4c940960 5224 const char *prefix2;
44d8db9e
LP
5225
5226 assert(c);
5227 assert(f);
5228
4c940960 5229 prefix = strempty(prefix);
63c372cb 5230 prefix2 = strjoina(prefix, "\t");
44d8db9e 5231
9e2f7c11 5232 cmd = exec_command_line(c->argv);
44d8db9e
LP
5233 fprintf(f,
5234 "%sCommand Line: %s\n",
4bbccb02 5235 prefix, cmd ? cmd : strerror_safe(ENOMEM));
44d8db9e 5236
9fb86720 5237 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
5238}
5239
5240void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
5241 assert(f);
5242
4c940960 5243 prefix = strempty(prefix);
44d8db9e
LP
5244
5245 LIST_FOREACH(command, c, c)
5246 exec_command_dump(c, f, prefix);
5247}
94f04347 5248
a6a80b4f
LP
5249void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
5250 ExecCommand *end;
5251
5252 assert(l);
5253 assert(e);
5254
5255 if (*l) {
35b8ca3a 5256 /* It's kind of important, that we keep the order here */
71fda00f
LP
5257 LIST_FIND_TAIL(command, *l, end);
5258 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
5259 } else
5260 *l = e;
5261}
5262
26fd040d
LP
5263int exec_command_set(ExecCommand *c, const char *path, ...) {
5264 va_list ap;
5265 char **l, *p;
5266
5267 assert(c);
5268 assert(path);
5269
5270 va_start(ap, path);
5271 l = strv_new_ap(path, ap);
5272 va_end(ap);
5273
5274 if (!l)
5275 return -ENOMEM;
5276
250a918d
LP
5277 p = strdup(path);
5278 if (!p) {
26fd040d
LP
5279 strv_free(l);
5280 return -ENOMEM;
5281 }
5282
6897dfe8 5283 free_and_replace(c->path, p);
26fd040d 5284
130d3d22 5285 return strv_free_and_replace(c->argv, l);
26fd040d
LP
5286}
5287
86b23b07 5288int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 5289 _cleanup_strv_free_ char **l = NULL;
86b23b07 5290 va_list ap;
86b23b07
JS
5291 int r;
5292
5293 assert(c);
5294 assert(path);
5295
5296 va_start(ap, path);
5297 l = strv_new_ap(path, ap);
5298 va_end(ap);
5299
5300 if (!l)
5301 return -ENOMEM;
5302
e287086b 5303 r = strv_extend_strv(&c->argv, l, false);
e63ff941 5304 if (r < 0)
86b23b07 5305 return r;
86b23b07
JS
5306
5307 return 0;
5308}
5309
e8a565cb
YW
5310static void *remove_tmpdir_thread(void *p) {
5311 _cleanup_free_ char *path = p;
86b23b07 5312
e8a565cb
YW
5313 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
5314 return NULL;
5315}
5316
5317static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
5318 int r;
5319
5320 if (!rt)
5321 return NULL;
5322
5323 if (rt->manager)
5324 (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
5325
5326 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
5327 if (destroy && rt->tmp_dir) {
5328 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
5329
5330 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
5331 if (r < 0) {
5332 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
5333 free(rt->tmp_dir);
5334 }
5335
5336 rt->tmp_dir = NULL;
5337 }
613b411c 5338
e8a565cb
YW
5339 if (destroy && rt->var_tmp_dir) {
5340 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
5341
5342 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
5343 if (r < 0) {
5344 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
5345 free(rt->var_tmp_dir);
5346 }
5347
5348 rt->var_tmp_dir = NULL;
5349 }
5350
5351 rt->id = mfree(rt->id);
5352 rt->tmp_dir = mfree(rt->tmp_dir);
5353 rt->var_tmp_dir = mfree(rt->var_tmp_dir);
5354 safe_close_pair(rt->netns_storage_socket);
5355 return mfree(rt);
5356}
5357
5358static void exec_runtime_freep(ExecRuntime **rt) {
da6bc6ed 5359 (void) exec_runtime_free(*rt, false);
e8a565cb
YW
5360}
5361
8e8009dc
LP
5362static int exec_runtime_allocate(ExecRuntime **ret) {
5363 ExecRuntime *n;
613b411c 5364
8e8009dc 5365 assert(ret);
613b411c 5366
8e8009dc
LP
5367 n = new(ExecRuntime, 1);
5368 if (!n)
613b411c
LP
5369 return -ENOMEM;
5370
8e8009dc
LP
5371 *n = (ExecRuntime) {
5372 .netns_storage_socket = { -1, -1 },
5373 };
5374
5375 *ret = n;
613b411c
LP
5376 return 0;
5377}
5378
e8a565cb
YW
5379static int exec_runtime_add(
5380 Manager *m,
5381 const char *id,
5382 const char *tmp_dir,
5383 const char *var_tmp_dir,
5384 const int netns_storage_socket[2],
5385 ExecRuntime **ret) {
5386
5387 _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
613b411c
LP
5388 int r;
5389
e8a565cb 5390 assert(m);
613b411c
LP
5391 assert(id);
5392
e8a565cb
YW
5393 r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
5394 if (r < 0)
5395 return r;
613b411c 5396
e8a565cb 5397 r = exec_runtime_allocate(&rt);
613b411c
LP
5398 if (r < 0)
5399 return r;
5400
e8a565cb
YW
5401 rt->id = strdup(id);
5402 if (!rt->id)
5403 return -ENOMEM;
5404
5405 if (tmp_dir) {
5406 rt->tmp_dir = strdup(tmp_dir);
5407 if (!rt->tmp_dir)
5408 return -ENOMEM;
5409
5410 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
5411 assert(var_tmp_dir);
5412 rt->var_tmp_dir = strdup(var_tmp_dir);
5413 if (!rt->var_tmp_dir)
5414 return -ENOMEM;
5415 }
5416
5417 if (netns_storage_socket) {
5418 rt->netns_storage_socket[0] = netns_storage_socket[0];
5419 rt->netns_storage_socket[1] = netns_storage_socket[1];
613b411c
LP
5420 }
5421
e8a565cb
YW
5422 r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
5423 if (r < 0)
5424 return r;
5425
5426 rt->manager = m;
5427
5428 if (ret)
5429 *ret = rt;
5430
5431 /* do not remove created ExecRuntime object when the operation succeeds. */
5432 rt = NULL;
5433 return 0;
5434}
5435
5436static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
5437 _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
2fa3742d 5438 _cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
e8a565cb
YW
5439 int r;
5440
5441 assert(m);
5442 assert(c);
5443 assert(id);
5444
5445 /* It is not necessary to create ExecRuntime object. */
a8d08f39 5446 if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
e8a565cb
YW
5447 return 0;
5448
efa2f3a1
TM
5449 if (c->private_tmp &&
5450 !(prefixed_path_strv_contains(c->inaccessible_paths, "/tmp") &&
5451 (prefixed_path_strv_contains(c->inaccessible_paths, "/var/tmp") ||
5452 prefixed_path_strv_contains(c->inaccessible_paths, "/var")))) {
e8a565cb 5453 r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
613b411c
LP
5454 if (r < 0)
5455 return r;
5456 }
5457
a8d08f39 5458 if (c->private_network || c->network_namespace_path) {
e8a565cb
YW
5459 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
5460 return -errno;
5461 }
5462
5463 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
5464 if (r < 0)
5465 return r;
5466
5467 /* Avoid cleanup */
2fa3742d 5468 netns_storage_socket[0] = netns_storage_socket[1] = -1;
613b411c
LP
5469 return 1;
5470}
5471
e8a565cb
YW
5472int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
5473 ExecRuntime *rt;
5474 int r;
613b411c 5475
e8a565cb
YW
5476 assert(m);
5477 assert(id);
5478 assert(ret);
5479
5480 rt = hashmap_get(m->exec_runtime_by_id, id);
5481 if (rt)
5482 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
5483 goto ref;
5484
5485 if (!create)
5486 return 0;
5487
5488 /* If not found, then create a new object. */
5489 r = exec_runtime_make(m, c, id, &rt);
5490 if (r <= 0)
5491 /* When r == 0, it is not necessary to create ExecRuntime object. */
5492 return r;
613b411c 5493
e8a565cb
YW
5494ref:
5495 /* increment reference counter. */
5496 rt->n_ref++;
5497 *ret = rt;
5498 return 1;
5499}
613b411c 5500
e8a565cb
YW
5501ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
5502 if (!rt)
613b411c
LP
5503 return NULL;
5504
e8a565cb 5505 assert(rt->n_ref > 0);
613b411c 5506
e8a565cb
YW
5507 rt->n_ref--;
5508 if (rt->n_ref > 0)
f2341e0a
LP
5509 return NULL;
5510
e8a565cb 5511 return exec_runtime_free(rt, destroy);
613b411c
LP
5512}
5513
e8a565cb
YW
5514int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
5515 ExecRuntime *rt;
5516 Iterator i;
5517
5518 assert(m);
613b411c
LP
5519 assert(f);
5520 assert(fds);
5521
e8a565cb
YW
5522 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5523 fprintf(f, "exec-runtime=%s", rt->id);
613b411c 5524
e8a565cb
YW
5525 if (rt->tmp_dir)
5526 fprintf(f, " tmp-dir=%s", rt->tmp_dir);
613b411c 5527
e8a565cb
YW
5528 if (rt->var_tmp_dir)
5529 fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
613b411c 5530
e8a565cb
YW
5531 if (rt->netns_storage_socket[0] >= 0) {
5532 int copy;
613b411c 5533
e8a565cb
YW
5534 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
5535 if (copy < 0)
5536 return copy;
613b411c 5537
e8a565cb
YW
5538 fprintf(f, " netns-socket-0=%i", copy);
5539 }
613b411c 5540
e8a565cb
YW
5541 if (rt->netns_storage_socket[1] >= 0) {
5542 int copy;
613b411c 5543
e8a565cb
YW
5544 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
5545 if (copy < 0)
5546 return copy;
613b411c 5547
e8a565cb
YW
5548 fprintf(f, " netns-socket-1=%i", copy);
5549 }
5550
5551 fputc('\n', f);
613b411c
LP
5552 }
5553
5554 return 0;
5555}
5556
e8a565cb
YW
5557int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
5558 _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
5559 ExecRuntime *rt;
613b411c
LP
5560 int r;
5561
e8a565cb
YW
5562 /* This is for the migration from old (v237 or earlier) deserialization text.
5563 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
5564 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
5565 * so or not from the serialized text, then we always creates a new object owned by this. */
5566
5567 assert(u);
613b411c
LP
5568 assert(key);
5569 assert(value);
5570
e8a565cb
YW
5571 /* Manager manages ExecRuntime objects by the unit id.
5572 * So, we omit the serialized text when the unit does not have id (yet?)... */
5573 if (isempty(u->id)) {
5574 log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
5575 return 0;
5576 }
613b411c 5577
e8a565cb
YW
5578 r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
5579 if (r < 0) {
5580 log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
5581 return 0;
5582 }
5583
5584 rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
5585 if (!rt) {
5586 r = exec_runtime_allocate(&rt_create);
613b411c 5587 if (r < 0)
f2341e0a 5588 return log_oom();
613b411c 5589
e8a565cb
YW
5590 rt_create->id = strdup(u->id);
5591 if (!rt_create->id)
5592 return log_oom();
5593
5594 rt = rt_create;
5595 }
5596
5597 if (streq(key, "tmp-dir")) {
5598 char *copy;
5599
613b411c
LP
5600 copy = strdup(value);
5601 if (!copy)
5602 return log_oom();
5603
e8a565cb 5604 free_and_replace(rt->tmp_dir, copy);
613b411c
LP
5605
5606 } else if (streq(key, "var-tmp-dir")) {
5607 char *copy;
5608
613b411c
LP
5609 copy = strdup(value);
5610 if (!copy)
5611 return log_oom();
5612
e8a565cb 5613 free_and_replace(rt->var_tmp_dir, copy);
613b411c
LP
5614
5615 } else if (streq(key, "netns-socket-0")) {
5616 int fd;
5617
e8a565cb 5618 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5619 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5620 return 0;
613b411c 5621 }
e8a565cb
YW
5622
5623 safe_close(rt->netns_storage_socket[0]);
5624 rt->netns_storage_socket[0] = fdset_remove(fds, fd);
5625
613b411c
LP
5626 } else if (streq(key, "netns-socket-1")) {
5627 int fd;
5628
e8a565cb 5629 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5630 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5631 return 0;
613b411c 5632 }
e8a565cb
YW
5633
5634 safe_close(rt->netns_storage_socket[1]);
5635 rt->netns_storage_socket[1] = fdset_remove(fds, fd);
613b411c
LP
5636 } else
5637 return 0;
5638
e8a565cb
YW
5639 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
5640 if (rt_create) {
5641 r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
5642 if (r < 0) {
3fe91079 5643 log_unit_debug_errno(u, r, "Failed to put runtime parameter to manager's storage: %m");
e8a565cb
YW
5644 return 0;
5645 }
613b411c 5646
e8a565cb 5647 rt_create->manager = u->manager;
613b411c 5648
e8a565cb
YW
5649 /* Avoid cleanup */
5650 rt_create = NULL;
5651 }
98b47d54 5652
e8a565cb
YW
5653 return 1;
5654}
613b411c 5655
e8a565cb
YW
5656void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
5657 char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
5658 int r, fd0 = -1, fd1 = -1;
5659 const char *p, *v = value;
5660 size_t n;
613b411c 5661
e8a565cb
YW
5662 assert(m);
5663 assert(value);
5664 assert(fds);
98b47d54 5665
e8a565cb
YW
5666 n = strcspn(v, " ");
5667 id = strndupa(v, n);
5668 if (v[n] != ' ')
5669 goto finalize;
5670 p = v + n + 1;
5671
5672 v = startswith(p, "tmp-dir=");
5673 if (v) {
5674 n = strcspn(v, " ");
5675 tmp_dir = strndupa(v, n);
5676 if (v[n] != ' ')
5677 goto finalize;
5678 p = v + n + 1;
5679 }
5680
5681 v = startswith(p, "var-tmp-dir=");
5682 if (v) {
5683 n = strcspn(v, " ");
5684 var_tmp_dir = strndupa(v, n);
5685 if (v[n] != ' ')
5686 goto finalize;
5687 p = v + n + 1;
5688 }
5689
5690 v = startswith(p, "netns-socket-0=");
5691 if (v) {
5692 char *buf;
5693
5694 n = strcspn(v, " ");
5695 buf = strndupa(v, n);
5696 if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
5697 log_debug("Unable to process exec-runtime netns fd specification.");
5698 return;
98b47d54 5699 }
e8a565cb
YW
5700 fd0 = fdset_remove(fds, fd0);
5701 if (v[n] != ' ')
5702 goto finalize;
5703 p = v + n + 1;
613b411c
LP
5704 }
5705
e8a565cb
YW
5706 v = startswith(p, "netns-socket-1=");
5707 if (v) {
5708 char *buf;
98b47d54 5709
e8a565cb
YW
5710 n = strcspn(v, " ");
5711 buf = strndupa(v, n);
5712 if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
5713 log_debug("Unable to process exec-runtime netns fd specification.");
5714 return;
98b47d54 5715 }
e8a565cb
YW
5716 fd1 = fdset_remove(fds, fd1);
5717 }
98b47d54 5718
e8a565cb
YW
5719finalize:
5720
5721 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
7d853ca6 5722 if (r < 0)
e8a565cb 5723 log_debug_errno(r, "Failed to add exec-runtime: %m");
e8a565cb 5724}
613b411c 5725
e8a565cb
YW
5726void exec_runtime_vacuum(Manager *m) {
5727 ExecRuntime *rt;
5728 Iterator i;
5729
5730 assert(m);
5731
5732 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
5733
5734 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5735 if (rt->n_ref > 0)
5736 continue;
5737
5738 (void) exec_runtime_free(rt, false);
5739 }
613b411c
LP
5740}
5741
b9c04eaf
YW
5742void exec_params_clear(ExecParameters *p) {
5743 if (!p)
5744 return;
5745
5746 strv_free(p->environment);
5747}
5748
80876c20
LP
5749static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
5750 [EXEC_INPUT_NULL] = "null",
5751 [EXEC_INPUT_TTY] = "tty",
5752 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 5753 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
5754 [EXEC_INPUT_SOCKET] = "socket",
5755 [EXEC_INPUT_NAMED_FD] = "fd",
08f3be7a 5756 [EXEC_INPUT_DATA] = "data",
2038c3f5 5757 [EXEC_INPUT_FILE] = "file",
80876c20
LP
5758};
5759
8a0867d6
LP
5760DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
5761
94f04347 5762static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 5763 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 5764 [EXEC_OUTPUT_NULL] = "null",
80876c20 5765 [EXEC_OUTPUT_TTY] = "tty",
94f04347 5766 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 5767 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 5768 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 5769 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
5770 [EXEC_OUTPUT_JOURNAL] = "journal",
5771 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
5772 [EXEC_OUTPUT_SOCKET] = "socket",
5773 [EXEC_OUTPUT_NAMED_FD] = "fd",
2038c3f5 5774 [EXEC_OUTPUT_FILE] = "file",
566b7d23 5775 [EXEC_OUTPUT_FILE_APPEND] = "append",
94f04347
LP
5776};
5777
5778DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
5779
5780static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
5781 [EXEC_UTMP_INIT] = "init",
5782 [EXEC_UTMP_LOGIN] = "login",
5783 [EXEC_UTMP_USER] = "user",
5784};
5785
5786DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
5787
5788static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
5789 [EXEC_PRESERVE_NO] = "no",
5790 [EXEC_PRESERVE_YES] = "yes",
5791 [EXEC_PRESERVE_RESTART] = "restart",
5792};
5793
5794DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 5795
6b7b2ed9 5796/* This table maps ExecDirectoryType to the setting it is configured with in the unit */
72fd1768 5797static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
5798 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
5799 [EXEC_DIRECTORY_STATE] = "StateDirectory",
5800 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
5801 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
5802 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
5803};
5804
5805DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445 5806
6b7b2ed9
LP
5807/* And this table maps ExecDirectoryType too, but to a generic term identifying the type of resource. This
5808 * one is supposed to be generic enough to be used for unit types that don't use ExecContext and per-unit
5809 * directories, specifically .timer units with their timestamp touch file. */
5810static const char* const exec_resource_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5811 [EXEC_DIRECTORY_RUNTIME] = "runtime",
5812 [EXEC_DIRECTORY_STATE] = "state",
5813 [EXEC_DIRECTORY_CACHE] = "cache",
5814 [EXEC_DIRECTORY_LOGS] = "logs",
5815 [EXEC_DIRECTORY_CONFIGURATION] = "configuration",
5816};
5817
5818DEFINE_STRING_TABLE_LOOKUP(exec_resource_type, ExecDirectoryType);
5819
5820/* And this table also maps ExecDirectoryType, to the environment variable we pass the selected directory to
5821 * the service payload in. */
fb2042dd
YW
5822static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5823 [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
5824 [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
5825 [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
5826 [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
5827 [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
5828};
5829
5830DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType);
5831
b1edf445
LP
5832static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
5833 [EXEC_KEYRING_INHERIT] = "inherit",
5834 [EXEC_KEYRING_PRIVATE] = "private",
5835 [EXEC_KEYRING_SHARED] = "shared",
5836};
5837
5838DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);