]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
execute: dump CPUAffinity as a range string instead of a list of CPUs
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
a7334b09 2
034c6ed7
LP
3#include <errno.h>
4#include <fcntl.h>
8dd4c05b
LP
5#include <glob.h>
6#include <grp.h>
7#include <poll.h>
309bff19 8#include <signal.h>
8dd4c05b 9#include <string.h>
19c0b0b9 10#include <sys/capability.h>
d251207d 11#include <sys/eventfd.h>
f3e43635 12#include <sys/mman.h>
8dd4c05b 13#include <sys/personality.h>
94f04347 14#include <sys/prctl.h>
d2ffa389 15#include <sys/shm.h>
8dd4c05b 16#include <sys/socket.h>
451a074f 17#include <sys/stat.h>
d2ffa389 18#include <sys/types.h>
8dd4c05b
LP
19#include <sys/un.h>
20#include <unistd.h>
023a4f67 21#include <utmpx.h>
5cb5a6ff 22
349cc4a5 23#if HAVE_PAM
5b6319dc
LP
24#include <security/pam_appl.h>
25#endif
26
349cc4a5 27#if HAVE_SELINUX
7b52a628
MS
28#include <selinux/selinux.h>
29#endif
30
349cc4a5 31#if HAVE_SECCOMP
17df7223
LP
32#include <seccomp.h>
33#endif
34
349cc4a5 35#if HAVE_APPARMOR
eef65bf3
MS
36#include <sys/apparmor.h>
37#endif
38
24882e06 39#include "sd-messages.h"
8dd4c05b
LP
40
41#include "af-list.h"
b5efdb8a 42#include "alloc-util.h"
349cc4a5 43#if HAVE_APPARMOR
3ffd4af2
LP
44#include "apparmor-util.h"
45#endif
8dd4c05b
LP
46#include "async.h"
47#include "barrier.h"
8dd4c05b 48#include "cap-list.h"
430f0182 49#include "capability-util.h"
a1164ae3 50#include "chown-recursive.h"
da681e1b 51#include "cpu-set-util.h"
f6a6225e 52#include "def.h"
686d13b9 53#include "env-file.h"
4d1a6904 54#include "env-util.h"
17df7223 55#include "errno-list.h"
3ffd4af2 56#include "execute.h"
8dd4c05b 57#include "exit-status.h"
3ffd4af2 58#include "fd-util.h"
f97b34a6 59#include "format-util.h"
f4f15635 60#include "fs-util.h"
7d50b32a 61#include "glob-util.h"
c004493c 62#include "io-util.h"
8dd4c05b 63#include "ioprio.h"
a1164ae3 64#include "label.h"
8dd4c05b
LP
65#include "log.h"
66#include "macro.h"
e8a565cb 67#include "manager.h"
0a970718 68#include "memory-util.h"
8dd4c05b
LP
69#include "missing.h"
70#include "mkdir.h"
71#include "namespace.h"
6bedfcbb 72#include "parse-util.h"
8dd4c05b 73#include "path-util.h"
0b452006 74#include "process-util.h"
78f22b97 75#include "rlimit-util.h"
8dd4c05b 76#include "rm-rf.h"
349cc4a5 77#if HAVE_SECCOMP
3ffd4af2
LP
78#include "seccomp-util.h"
79#endif
07d46372 80#include "securebits-util.h"
8dd4c05b 81#include "selinux-util.h"
24882e06 82#include "signal-util.h"
8dd4c05b 83#include "smack-util.h"
57b7a260 84#include "socket-util.h"
fd63e712 85#include "special.h"
949befd3 86#include "stat-util.h"
8b43440b 87#include "string-table.h"
07630cea 88#include "string-util.h"
8dd4c05b 89#include "strv.h"
7ccbd1ae 90#include "syslog-util.h"
8dd4c05b 91#include "terminal-util.h"
566b7d23 92#include "umask-util.h"
8dd4c05b 93#include "unit.h"
b1d4f8e1 94#include "user-util.h"
8dd4c05b 95#include "utmp-wtmp.h"
5cb5a6ff 96
e056b01d 97#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 98#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 99
531dca78
LP
100#define SNDBUF_SIZE (8*1024*1024)
101
da6053d0 102static int shift_fds(int fds[], size_t n_fds) {
034c6ed7
LP
103 int start, restart_from;
104
105 if (n_fds <= 0)
106 return 0;
107
a0d40ac5
LP
108 /* Modifies the fds array! (sorts it) */
109
034c6ed7
LP
110 assert(fds);
111
112 start = 0;
113 for (;;) {
114 int i;
115
116 restart_from = -1;
117
118 for (i = start; i < (int) n_fds; i++) {
119 int nfd;
120
121 /* Already at right index? */
122 if (fds[i] == i+3)
123 continue;
124
3cc2aff1
LP
125 nfd = fcntl(fds[i], F_DUPFD, i + 3);
126 if (nfd < 0)
034c6ed7
LP
127 return -errno;
128
03e334a1 129 safe_close(fds[i]);
034c6ed7
LP
130 fds[i] = nfd;
131
132 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 133 * let's remember that and try again from here */
034c6ed7
LP
134 if (nfd != i+3 && restart_from < 0)
135 restart_from = i;
136 }
137
138 if (restart_from < 0)
139 break;
140
141 start = restart_from;
142 }
143
144 return 0;
145}
146
25b583d7 147static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
da6053d0 148 size_t i, n_fds;
e2c76839 149 int r;
47a71eed 150
25b583d7 151 n_fds = n_socket_fds + n_storage_fds;
47a71eed
LP
152 if (n_fds <= 0)
153 return 0;
154
155 assert(fds);
156
9b141911
FB
157 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
158 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
159
160 for (i = 0; i < n_fds; i++) {
47a71eed 161
9b141911
FB
162 if (i < n_socket_fds) {
163 r = fd_nonblock(fds[i], nonblock);
164 if (r < 0)
165 return r;
166 }
47a71eed 167
451a074f
LP
168 /* We unconditionally drop FD_CLOEXEC from the fds,
169 * since after all we want to pass these fds to our
170 * children */
47a71eed 171
3cc2aff1
LP
172 r = fd_cloexec(fds[i], false);
173 if (r < 0)
e2c76839 174 return r;
47a71eed
LP
175 }
176
177 return 0;
178}
179
1e22b5cd 180static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
181 assert(context);
182
1e22b5cd
LP
183 if (context->stdio_as_fds)
184 return NULL;
185
80876c20
LP
186 if (context->tty_path)
187 return context->tty_path;
188
189 return "/dev/console";
190}
191
1e22b5cd
LP
192static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
193 const char *path;
194
6ea832a2
LP
195 assert(context);
196
1e22b5cd 197 path = exec_context_tty_path(context);
6ea832a2 198
1e22b5cd
LP
199 if (context->tty_vhangup) {
200 if (p && p->stdin_fd >= 0)
201 (void) terminal_vhangup_fd(p->stdin_fd);
202 else if (path)
203 (void) terminal_vhangup(path);
204 }
6ea832a2 205
1e22b5cd
LP
206 if (context->tty_reset) {
207 if (p && p->stdin_fd >= 0)
208 (void) reset_terminal_fd(p->stdin_fd, true);
209 else if (path)
210 (void) reset_terminal(path);
211 }
212
213 if (context->tty_vt_disallocate && path)
214 (void) vt_disallocate(path);
6ea832a2
LP
215}
216
6af760f3
LP
217static bool is_terminal_input(ExecInput i) {
218 return IN_SET(i,
219 EXEC_INPUT_TTY,
220 EXEC_INPUT_TTY_FORCE,
221 EXEC_INPUT_TTY_FAIL);
222}
223
3a1286b6 224static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
225 return IN_SET(o,
226 EXEC_OUTPUT_TTY,
227 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
228 EXEC_OUTPUT_KMSG_AND_CONSOLE,
229 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
230}
231
aac8c0c3
LP
232static bool is_syslog_output(ExecOutput o) {
233 return IN_SET(o,
234 EXEC_OUTPUT_SYSLOG,
235 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
236}
237
238static bool is_kmsg_output(ExecOutput o) {
239 return IN_SET(o,
240 EXEC_OUTPUT_KMSG,
241 EXEC_OUTPUT_KMSG_AND_CONSOLE);
242}
243
6af760f3
LP
244static bool exec_context_needs_term(const ExecContext *c) {
245 assert(c);
246
247 /* Return true if the execution context suggests we should set $TERM to something useful. */
248
249 if (is_terminal_input(c->std_input))
250 return true;
251
252 if (is_terminal_output(c->std_output))
253 return true;
254
255 if (is_terminal_output(c->std_error))
256 return true;
257
258 return !!c->tty_path;
3a1286b6
MS
259}
260
80876c20 261static int open_null_as(int flags, int nfd) {
046a82c1 262 int fd;
071830ff 263
80876c20 264 assert(nfd >= 0);
071830ff 265
613b411c
LP
266 fd = open("/dev/null", flags|O_NOCTTY);
267 if (fd < 0)
071830ff
LP
268 return -errno;
269
046a82c1 270 return move_fd(fd, nfd, false);
071830ff
LP
271}
272
524daa8c 273static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 274 static const union sockaddr_union sa = {
b92bea5d
ZJS
275 .un.sun_family = AF_UNIX,
276 .un.sun_path = "/run/systemd/journal/stdout",
277 };
524daa8c
ZJS
278 uid_t olduid = UID_INVALID;
279 gid_t oldgid = GID_INVALID;
280 int r;
281
cad93f29 282 if (gid_is_valid(gid)) {
524daa8c
ZJS
283 oldgid = getgid();
284
92a17af9 285 if (setegid(gid) < 0)
524daa8c
ZJS
286 return -errno;
287 }
288
cad93f29 289 if (uid_is_valid(uid)) {
524daa8c
ZJS
290 olduid = getuid();
291
92a17af9 292 if (seteuid(uid) < 0) {
524daa8c
ZJS
293 r = -errno;
294 goto restore_gid;
295 }
296 }
297
92a17af9 298 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
299
300 /* If we fail to restore the uid or gid, things will likely
301 fail later on. This should only happen if an LSM interferes. */
302
cad93f29 303 if (uid_is_valid(uid))
524daa8c
ZJS
304 (void) seteuid(olduid);
305
306 restore_gid:
cad93f29 307 if (gid_is_valid(gid))
524daa8c
ZJS
308 (void) setegid(oldgid);
309
310 return r;
311}
312
fd1f9c89 313static int connect_logger_as(
34cf6c43 314 const Unit *unit,
fd1f9c89 315 const ExecContext *context,
af635cf3 316 const ExecParameters *params,
fd1f9c89
LP
317 ExecOutput output,
318 const char *ident,
fd1f9c89
LP
319 int nfd,
320 uid_t uid,
321 gid_t gid) {
322
2ac1ff68
EV
323 _cleanup_close_ int fd = -1;
324 int r;
071830ff
LP
325
326 assert(context);
af635cf3 327 assert(params);
80876c20
LP
328 assert(output < _EXEC_OUTPUT_MAX);
329 assert(ident);
330 assert(nfd >= 0);
071830ff 331
54fe0cdb
LP
332 fd = socket(AF_UNIX, SOCK_STREAM, 0);
333 if (fd < 0)
80876c20 334 return -errno;
071830ff 335
524daa8c
ZJS
336 r = connect_journal_socket(fd, uid, gid);
337 if (r < 0)
338 return r;
071830ff 339
2ac1ff68 340 if (shutdown(fd, SHUT_RD) < 0)
80876c20 341 return -errno;
071830ff 342
fd1f9c89 343 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 344
2ac1ff68 345 if (dprintf(fd,
62bca2c6 346 "%s\n"
80876c20
LP
347 "%s\n"
348 "%i\n"
54fe0cdb
LP
349 "%i\n"
350 "%i\n"
351 "%i\n"
4f4a1dbf 352 "%i\n",
c867611e 353 context->syslog_identifier ?: ident,
af635cf3 354 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
355 context->syslog_priority,
356 !!context->syslog_level_prefix,
aac8c0c3
LP
357 is_syslog_output(output),
358 is_kmsg_output(output),
2ac1ff68
EV
359 is_terminal_output(output)) < 0)
360 return -errno;
80876c20 361
2ac1ff68 362 return move_fd(TAKE_FD(fd), nfd, false);
80876c20 363}
2ac1ff68 364
3a274a21 365static int open_terminal_as(const char *path, int flags, int nfd) {
046a82c1 366 int fd;
071830ff 367
80876c20
LP
368 assert(path);
369 assert(nfd >= 0);
fd1f9c89 370
3a274a21 371 fd = open_terminal(path, flags | O_NOCTTY);
3cc2aff1 372 if (fd < 0)
80876c20 373 return fd;
071830ff 374
046a82c1 375 return move_fd(fd, nfd, false);
80876c20 376}
071830ff 377
2038c3f5 378static int acquire_path(const char *path, int flags, mode_t mode) {
15a3e96f
LP
379 union sockaddr_union sa = {};
380 _cleanup_close_ int fd = -1;
381 int r, salen;
071830ff 382
80876c20 383 assert(path);
071830ff 384
2038c3f5
LP
385 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
386 flags |= O_CREAT;
387
388 fd = open(path, flags|O_NOCTTY, mode);
389 if (fd >= 0)
15a3e96f 390 return TAKE_FD(fd);
071830ff 391
2038c3f5
LP
392 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
393 return -errno;
15a3e96f 394 if (strlen(path) >= sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
2038c3f5
LP
395 return -ENXIO;
396
397 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
398
399 fd = socket(AF_UNIX, SOCK_STREAM, 0);
400 if (fd < 0)
401 return -errno;
402
15a3e96f
LP
403 salen = sockaddr_un_set_path(&sa.un, path);
404 if (salen < 0)
405 return salen;
406
407 if (connect(fd, &sa.sa, salen) < 0)
2038c3f5
LP
408 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
409 * indication that his wasn't an AF_UNIX socket after all */
071830ff 410
2038c3f5
LP
411 if ((flags & O_ACCMODE) == O_RDONLY)
412 r = shutdown(fd, SHUT_WR);
413 else if ((flags & O_ACCMODE) == O_WRONLY)
414 r = shutdown(fd, SHUT_RD);
415 else
15a3e96f
LP
416 return TAKE_FD(fd);
417 if (r < 0)
2038c3f5 418 return -errno;
2038c3f5 419
15a3e96f 420 return TAKE_FD(fd);
80876c20 421}
071830ff 422
08f3be7a
LP
423static int fixup_input(
424 const ExecContext *context,
425 int socket_fd,
426 bool apply_tty_stdin) {
427
428 ExecInput std_input;
429
430 assert(context);
431
432 std_input = context->std_input;
1e3ad081
LP
433
434 if (is_terminal_input(std_input) && !apply_tty_stdin)
435 return EXEC_INPUT_NULL;
071830ff 436
03fd9c49 437 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
438 return EXEC_INPUT_NULL;
439
08f3be7a
LP
440 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
441 return EXEC_INPUT_NULL;
442
03fd9c49 443 return std_input;
4f2d528d
LP
444}
445
03fd9c49 446static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 447
03fd9c49 448 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
449 return EXEC_OUTPUT_INHERIT;
450
03fd9c49 451 return std_output;
4f2d528d
LP
452}
453
a34ceba6
LP
454static int setup_input(
455 const ExecContext *context,
456 const ExecParameters *params,
52c239d7
LB
457 int socket_fd,
458 int named_iofds[3]) {
a34ceba6 459
4f2d528d
LP
460 ExecInput i;
461
462 assert(context);
a34ceba6
LP
463 assert(params);
464
465 if (params->stdin_fd >= 0) {
466 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
467 return -errno;
468
469 /* Try to make this the controlling tty, if it is a tty, and reset it */
1fb0682e
LP
470 if (isatty(STDIN_FILENO)) {
471 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
472 (void) reset_terminal_fd(STDIN_FILENO, true);
473 }
a34ceba6
LP
474
475 return STDIN_FILENO;
476 }
4f2d528d 477
08f3be7a 478 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
479
480 switch (i) {
071830ff 481
80876c20
LP
482 case EXEC_INPUT_NULL:
483 return open_null_as(O_RDONLY, STDIN_FILENO);
484
485 case EXEC_INPUT_TTY:
486 case EXEC_INPUT_TTY_FORCE:
487 case EXEC_INPUT_TTY_FAIL: {
046a82c1 488 int fd;
071830ff 489
1e22b5cd 490 fd = acquire_terminal(exec_context_tty_path(context),
8854d795
LP
491 i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
492 i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
493 ACQUIRE_TERMINAL_WAIT,
3a43da28 494 USEC_INFINITY);
970edce6 495 if (fd < 0)
80876c20
LP
496 return fd;
497
046a82c1 498 return move_fd(fd, STDIN_FILENO, false);
80876c20
LP
499 }
500
4f2d528d 501 case EXEC_INPUT_SOCKET:
e75a9ed1
LP
502 assert(socket_fd >= 0);
503
4f2d528d
LP
504 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
505
52c239d7 506 case EXEC_INPUT_NAMED_FD:
e75a9ed1
LP
507 assert(named_iofds[STDIN_FILENO] >= 0);
508
52c239d7
LB
509 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
510 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
511
08f3be7a
LP
512 case EXEC_INPUT_DATA: {
513 int fd;
514
515 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
516 if (fd < 0)
517 return fd;
518
519 return move_fd(fd, STDIN_FILENO, false);
520 }
521
2038c3f5
LP
522 case EXEC_INPUT_FILE: {
523 bool rw;
524 int fd;
525
526 assert(context->stdio_file[STDIN_FILENO]);
527
528 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
529 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
530
531 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
532 if (fd < 0)
533 return fd;
534
535 return move_fd(fd, STDIN_FILENO, false);
536 }
537
80876c20
LP
538 default:
539 assert_not_reached("Unknown input type");
540 }
541}
542
41fc585a
LP
543static bool can_inherit_stderr_from_stdout(
544 const ExecContext *context,
545 ExecOutput o,
546 ExecOutput e) {
547
548 assert(context);
549
550 /* Returns true, if given the specified STDERR and STDOUT output we can directly dup() the stdout fd to the
551 * stderr fd */
552
553 if (e == EXEC_OUTPUT_INHERIT)
554 return true;
555 if (e != o)
556 return false;
557
558 if (e == EXEC_OUTPUT_NAMED_FD)
559 return streq_ptr(context->stdio_fdname[STDOUT_FILENO], context->stdio_fdname[STDERR_FILENO]);
560
561 if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND))
562 return streq_ptr(context->stdio_file[STDOUT_FILENO], context->stdio_file[STDERR_FILENO]);
563
564 return true;
565}
566
a34ceba6 567static int setup_output(
34cf6c43 568 const Unit *unit,
a34ceba6
LP
569 const ExecContext *context,
570 const ExecParameters *params,
571 int fileno,
572 int socket_fd,
52c239d7 573 int named_iofds[3],
a34ceba6 574 const char *ident,
7bce046b
LP
575 uid_t uid,
576 gid_t gid,
577 dev_t *journal_stream_dev,
578 ino_t *journal_stream_ino) {
a34ceba6 579
4f2d528d
LP
580 ExecOutput o;
581 ExecInput i;
47c1d80d 582 int r;
4f2d528d 583
f2341e0a 584 assert(unit);
80876c20 585 assert(context);
a34ceba6 586 assert(params);
80876c20 587 assert(ident);
7bce046b
LP
588 assert(journal_stream_dev);
589 assert(journal_stream_ino);
80876c20 590
a34ceba6
LP
591 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
592
593 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
594 return -errno;
595
596 return STDOUT_FILENO;
597 }
598
599 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
600 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
601 return -errno;
602
603 return STDERR_FILENO;
604 }
605
08f3be7a 606 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 607 o = fixup_output(context->std_output, socket_fd);
4f2d528d 608
eb17e935
MS
609 if (fileno == STDERR_FILENO) {
610 ExecOutput e;
611 e = fixup_output(context->std_error, socket_fd);
80876c20 612
eb17e935
MS
613 /* This expects the input and output are already set up */
614
615 /* Don't change the stderr file descriptor if we inherit all
616 * the way and are not on a tty */
617 if (e == EXEC_OUTPUT_INHERIT &&
618 o == EXEC_OUTPUT_INHERIT &&
619 i == EXEC_INPUT_NULL &&
620 !is_terminal_input(context->std_input) &&
621 getppid () != 1)
622 return fileno;
623
624 /* Duplicate from stdout if possible */
41fc585a 625 if (can_inherit_stderr_from_stdout(context, o, e))
eb17e935 626 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 627
eb17e935 628 o = e;
80876c20 629
eb17e935 630 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
631 /* If input got downgraded, inherit the original value */
632 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 633 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 634
08f3be7a
LP
635 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
636 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
eb17e935 637 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 638
acb591e4
LP
639 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
640 if (getppid() != 1)
eb17e935 641 return fileno;
94f04347 642
eb17e935
MS
643 /* We need to open /dev/null here anew, to get the right access mode. */
644 return open_null_as(O_WRONLY, fileno);
071830ff 645 }
94f04347 646
eb17e935 647 switch (o) {
80876c20
LP
648
649 case EXEC_OUTPUT_NULL:
eb17e935 650 return open_null_as(O_WRONLY, fileno);
80876c20
LP
651
652 case EXEC_OUTPUT_TTY:
4f2d528d 653 if (is_terminal_input(i))
eb17e935 654 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
655
656 /* We don't reset the terminal if this is just about output */
1e22b5cd 657 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
658
659 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 660 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 661 case EXEC_OUTPUT_KMSG:
28dbc1e8 662 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
663 case EXEC_OUTPUT_JOURNAL:
664 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 665 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 666 if (r < 0) {
82677ae4 667 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 668 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
669 } else {
670 struct stat st;
671
672 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
673 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
674 * services to detect whether they are connected to the journal or not.
675 *
676 * If both stdout and stderr are connected to a stream then let's make sure to store the data
677 * about STDERR as that's usually the best way to do logging. */
7bce046b 678
ab2116b1
LP
679 if (fstat(fileno, &st) >= 0 &&
680 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
681 *journal_stream_dev = st.st_dev;
682 *journal_stream_ino = st.st_ino;
683 }
47c1d80d
MS
684 }
685 return r;
4f2d528d
LP
686
687 case EXEC_OUTPUT_SOCKET:
688 assert(socket_fd >= 0);
e75a9ed1 689
eb17e935 690 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 691
52c239d7 692 case EXEC_OUTPUT_NAMED_FD:
e75a9ed1
LP
693 assert(named_iofds[fileno] >= 0);
694
52c239d7
LB
695 (void) fd_nonblock(named_iofds[fileno], false);
696 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
697
566b7d23
ZD
698 case EXEC_OUTPUT_FILE:
699 case EXEC_OUTPUT_FILE_APPEND: {
2038c3f5 700 bool rw;
566b7d23 701 int fd, flags;
2038c3f5
LP
702
703 assert(context->stdio_file[fileno]);
704
705 rw = context->std_input == EXEC_INPUT_FILE &&
706 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
707
708 if (rw)
709 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
710
566b7d23
ZD
711 flags = O_WRONLY;
712 if (o == EXEC_OUTPUT_FILE_APPEND)
713 flags |= O_APPEND;
714
715 fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
2038c3f5
LP
716 if (fd < 0)
717 return fd;
718
566b7d23 719 return move_fd(fd, fileno, 0);
2038c3f5
LP
720 }
721
94f04347 722 default:
80876c20 723 assert_not_reached("Unknown error type");
94f04347 724 }
071830ff
LP
725}
726
02a51aba 727static int chown_terminal(int fd, uid_t uid) {
4b3b5bc7 728 int r;
02a51aba
LP
729
730 assert(fd >= 0);
02a51aba 731
1ff74fb6 732 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
4b3b5bc7
LP
733 if (isatty(fd) < 1) {
734 if (IN_SET(errno, EINVAL, ENOTTY))
735 return 0; /* not a tty */
1ff74fb6 736
02a51aba 737 return -errno;
4b3b5bc7 738 }
02a51aba 739
4b3b5bc7
LP
740 /* This might fail. What matters are the results. */
741 r = fchmod_and_chown(fd, TTY_MODE, uid, -1);
742 if (r < 0)
743 return r;
02a51aba 744
4b3b5bc7 745 return 1;
02a51aba
LP
746}
747
7d5ceb64 748static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
749 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
750 int r;
80876c20 751
80876c20
LP
752 assert(_saved_stdin);
753 assert(_saved_stdout);
754
af6da548
LP
755 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
756 if (saved_stdin < 0)
757 return -errno;
80876c20 758
af6da548 759 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
760 if (saved_stdout < 0)
761 return -errno;
80876c20 762
8854d795 763 fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
3d18b167
LP
764 if (fd < 0)
765 return fd;
80876c20 766
af6da548
LP
767 r = chown_terminal(fd, getuid());
768 if (r < 0)
3d18b167 769 return r;
02a51aba 770
3d18b167
LP
771 r = reset_terminal_fd(fd, true);
772 if (r < 0)
773 return r;
80876c20 774
2b33ab09 775 r = rearrange_stdio(fd, fd, STDERR_FILENO);
3d18b167 776 fd = -1;
2b33ab09
LP
777 if (r < 0)
778 return r;
80876c20
LP
779
780 *_saved_stdin = saved_stdin;
781 *_saved_stdout = saved_stdout;
782
3d18b167 783 saved_stdin = saved_stdout = -1;
80876c20 784
3d18b167 785 return 0;
80876c20
LP
786}
787
63d77c92 788static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
789 assert(err < 0);
790
791 if (err == -ETIMEDOUT)
63d77c92 792 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
793 else {
794 errno = -err;
63d77c92 795 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
796 }
797}
798
63d77c92 799static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 800 _cleanup_close_ int fd = -1;
80876c20 801
3b20f877 802 assert(vc);
80876c20 803
7d5ceb64 804 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 805 if (fd < 0)
3b20f877 806 return;
80876c20 807
63d77c92 808 write_confirm_error_fd(err, fd, u);
af6da548 809}
80876c20 810
3d18b167 811static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 812 int r = 0;
80876c20 813
af6da548
LP
814 assert(saved_stdin);
815 assert(saved_stdout);
816
817 release_terminal();
818
819 if (*saved_stdin >= 0)
80876c20 820 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 821 r = -errno;
80876c20 822
af6da548 823 if (*saved_stdout >= 0)
80876c20 824 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 825 r = -errno;
80876c20 826
3d18b167
LP
827 *saved_stdin = safe_close(*saved_stdin);
828 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
829
830 return r;
831}
832
3b20f877
FB
833enum {
834 CONFIRM_PRETEND_FAILURE = -1,
835 CONFIRM_PRETEND_SUCCESS = 0,
836 CONFIRM_EXECUTE = 1,
837};
838
eedf223a 839static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 840 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 841 _cleanup_free_ char *e = NULL;
3b20f877 842 char c;
af6da548 843
3b20f877 844 /* For any internal errors, assume a positive response. */
7d5ceb64 845 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 846 if (r < 0) {
63d77c92 847 write_confirm_error(r, vc, u);
3b20f877
FB
848 return CONFIRM_EXECUTE;
849 }
af6da548 850
b0eb2944
FB
851 /* confirm_spawn might have been disabled while we were sleeping. */
852 if (manager_is_confirm_spawn_disabled(u->manager)) {
853 r = 1;
854 goto restore_stdio;
855 }
af6da548 856
2bcd3c26
FB
857 e = ellipsize(cmdline, 60, 100);
858 if (!e) {
859 log_oom();
860 r = CONFIRM_EXECUTE;
861 goto restore_stdio;
862 }
af6da548 863
d172b175 864 for (;;) {
539622bd 865 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 866 if (r < 0) {
63d77c92 867 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
868 r = CONFIRM_EXECUTE;
869 goto restore_stdio;
870 }
af6da548 871
d172b175 872 switch (c) {
b0eb2944
FB
873 case 'c':
874 printf("Resuming normal execution.\n");
875 manager_disable_confirm_spawn();
876 r = 1;
877 break;
dd6f9ac0
FB
878 case 'D':
879 unit_dump(u, stdout, " ");
880 continue; /* ask again */
d172b175
FB
881 case 'f':
882 printf("Failing execution.\n");
883 r = CONFIRM_PRETEND_FAILURE;
884 break;
885 case 'h':
b0eb2944
FB
886 printf(" c - continue, proceed without asking anymore\n"
887 " D - dump, show the state of the unit\n"
dd6f9ac0 888 " f - fail, don't execute the command and pretend it failed\n"
d172b175 889 " h - help\n"
eedf223a 890 " i - info, show a short summary of the unit\n"
56fde33a 891 " j - jobs, show jobs that are in progress\n"
d172b175
FB
892 " s - skip, don't execute the command and pretend it succeeded\n"
893 " y - yes, execute the command\n");
dd6f9ac0 894 continue; /* ask again */
eedf223a
FB
895 case 'i':
896 printf(" Description: %s\n"
897 " Unit: %s\n"
898 " Command: %s\n",
899 u->id, u->description, cmdline);
900 continue; /* ask again */
56fde33a
FB
901 case 'j':
902 manager_dump_jobs(u->manager, stdout, " ");
903 continue; /* ask again */
539622bd
FB
904 case 'n':
905 /* 'n' was removed in favor of 'f'. */
906 printf("Didn't understand 'n', did you mean 'f'?\n");
907 continue; /* ask again */
d172b175
FB
908 case 's':
909 printf("Skipping execution.\n");
910 r = CONFIRM_PRETEND_SUCCESS;
911 break;
912 case 'y':
913 r = CONFIRM_EXECUTE;
914 break;
915 default:
916 assert_not_reached("Unhandled choice");
917 }
3b20f877 918 break;
3b20f877 919 }
af6da548 920
3b20f877 921restore_stdio:
af6da548 922 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 923 return r;
80876c20
LP
924}
925
4d885bd3
DH
926static int get_fixed_user(const ExecContext *c, const char **user,
927 uid_t *uid, gid_t *gid,
928 const char **home, const char **shell) {
81a2b7ce 929 int r;
4d885bd3 930 const char *name;
81a2b7ce 931
4d885bd3 932 assert(c);
81a2b7ce 933
23deef88
LP
934 if (!c->user)
935 return 0;
936
4d885bd3
DH
937 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
938 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 939
23deef88 940 name = c->user;
fafff8f1 941 r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN);
4d885bd3
DH
942 if (r < 0)
943 return r;
81a2b7ce 944
4d885bd3
DH
945 *user = name;
946 return 0;
947}
948
949static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
950 int r;
951 const char *name;
952
953 assert(c);
954
955 if (!c->group)
956 return 0;
957
958 name = c->group;
fafff8f1 959 r = get_group_creds(&name, gid, 0);
4d885bd3
DH
960 if (r < 0)
961 return r;
962
963 *group = name;
964 return 0;
965}
966
cdc5d5c5
DH
967static int get_supplementary_groups(const ExecContext *c, const char *user,
968 const char *group, gid_t gid,
969 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
970 char **i;
971 int r, k = 0;
972 int ngroups_max;
973 bool keep_groups = false;
974 gid_t *groups = NULL;
975 _cleanup_free_ gid_t *l_gids = NULL;
976
977 assert(c);
978
bbeea271
DH
979 /*
980 * If user is given, then lookup GID and supplementary groups list.
981 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
982 * here and as early as possible so we keep the list of supplementary
983 * groups of the caller.
bbeea271
DH
984 */
985 if (user && gid_is_valid(gid) && gid != 0) {
986 /* First step, initialize groups from /etc/groups */
987 if (initgroups(user, gid) < 0)
988 return -errno;
989
990 keep_groups = true;
991 }
992
ac6e8be6 993 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
994 return 0;
995
366ddd25
DH
996 /*
997 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
998 * be positive, otherwise fail.
999 */
1000 errno = 0;
1001 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
1002 if (ngroups_max <= 0) {
1003 if (errno > 0)
1004 return -errno;
1005 else
1006 return -EOPNOTSUPP; /* For all other values */
1007 }
1008
4d885bd3
DH
1009 l_gids = new(gid_t, ngroups_max);
1010 if (!l_gids)
1011 return -ENOMEM;
81a2b7ce 1012
4d885bd3
DH
1013 if (keep_groups) {
1014 /*
1015 * Lookup the list of groups that the user belongs to, we
1016 * avoid NSS lookups here too for gid=0.
1017 */
1018 k = ngroups_max;
1019 if (getgrouplist(user, gid, l_gids, &k) < 0)
1020 return -EINVAL;
1021 } else
1022 k = 0;
81a2b7ce 1023
4d885bd3
DH
1024 STRV_FOREACH(i, c->supplementary_groups) {
1025 const char *g;
81a2b7ce 1026
4d885bd3
DH
1027 if (k >= ngroups_max)
1028 return -E2BIG;
81a2b7ce 1029
4d885bd3 1030 g = *i;
fafff8f1 1031 r = get_group_creds(&g, l_gids+k, 0);
4d885bd3
DH
1032 if (r < 0)
1033 return r;
81a2b7ce 1034
4d885bd3
DH
1035 k++;
1036 }
81a2b7ce 1037
4d885bd3
DH
1038 /*
1039 * Sets ngids to zero to drop all supplementary groups, happens
1040 * when we are under root and SupplementaryGroups= is empty.
1041 */
1042 if (k == 0) {
1043 *ngids = 0;
1044 return 0;
1045 }
81a2b7ce 1046
4d885bd3
DH
1047 /* Otherwise get the final list of supplementary groups */
1048 groups = memdup(l_gids, sizeof(gid_t) * k);
1049 if (!groups)
1050 return -ENOMEM;
1051
1052 *supplementary_gids = groups;
1053 *ngids = k;
1054
1055 groups = NULL;
1056
1057 return 0;
1058}
1059
34cf6c43 1060static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
4d885bd3
DH
1061 int r;
1062
709dbeac
YW
1063 /* Handle SupplementaryGroups= if it is not empty */
1064 if (ngids > 0) {
4d885bd3
DH
1065 r = maybe_setgroups(ngids, supplementary_gids);
1066 if (r < 0)
97f0e76f 1067 return r;
4d885bd3 1068 }
81a2b7ce 1069
4d885bd3
DH
1070 if (gid_is_valid(gid)) {
1071 /* Then set our gids */
1072 if (setresgid(gid, gid, gid) < 0)
1073 return -errno;
81a2b7ce
LP
1074 }
1075
1076 return 0;
1077}
1078
1079static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
1080 assert(context);
1081
4d885bd3
DH
1082 if (!uid_is_valid(uid))
1083 return 0;
1084
479050b3 1085 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
1086 * capabilities while doing so. */
1087
479050b3 1088 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
1089
1090 /* First step: If we need to keep capabilities but
1091 * drop privileges we need to make sure we keep our
cbb21cca 1092 * caps, while we drop privileges. */
693ced48 1093 if (uid != 0) {
cbb21cca 1094 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1095
1096 if (prctl(PR_GET_SECUREBITS) != sb)
1097 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1098 return -errno;
1099 }
81a2b7ce
LP
1100 }
1101
479050b3 1102 /* Second step: actually set the uids */
81a2b7ce
LP
1103 if (setresuid(uid, uid, uid) < 0)
1104 return -errno;
1105
1106 /* At this point we should have all necessary capabilities but
1107 are otherwise a normal user. However, the caps might got
1108 corrupted due to the setresuid() so we need clean them up
1109 later. This is done outside of this call. */
1110
1111 return 0;
1112}
1113
349cc4a5 1114#if HAVE_PAM
5b6319dc
LP
1115
1116static int null_conv(
1117 int num_msg,
1118 const struct pam_message **msg,
1119 struct pam_response **resp,
1120 void *appdata_ptr) {
1121
1122 /* We don't support conversations */
1123
1124 return PAM_CONV_ERR;
1125}
1126
cefc33ae
LP
1127#endif
1128
5b6319dc
LP
1129static int setup_pam(
1130 const char *name,
1131 const char *user,
940c5210 1132 uid_t uid,
2d6fce8d 1133 gid_t gid,
5b6319dc 1134 const char *tty,
2065ca69 1135 char ***env,
da6053d0 1136 int fds[], size_t n_fds) {
5b6319dc 1137
349cc4a5 1138#if HAVE_PAM
cefc33ae 1139
5b6319dc
LP
1140 static const struct pam_conv conv = {
1141 .conv = null_conv,
1142 .appdata_ptr = NULL
1143 };
1144
2d7c6aa2 1145 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1146 pam_handle_t *handle = NULL;
d6e5f3ad 1147 sigset_t old_ss;
7bb70b6e 1148 int pam_code = PAM_SUCCESS, r;
84eada2f 1149 char **nv, **e = NULL;
5b6319dc
LP
1150 bool close_session = false;
1151 pid_t pam_pid = 0, parent_pid;
970edce6 1152 int flags = 0;
5b6319dc
LP
1153
1154 assert(name);
1155 assert(user);
2065ca69 1156 assert(env);
5b6319dc
LP
1157
1158 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1159 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1160 * systemd via the cgroup logic. It will then remove the PAM
1161 * session again. The parent process will exec() the actual
1162 * daemon. We do things this way to ensure that the main PID
1163 * of the daemon is the one we initially fork()ed. */
1164
7bb70b6e
LP
1165 r = barrier_create(&barrier);
1166 if (r < 0)
2d7c6aa2
DH
1167 goto fail;
1168
553d2243 1169 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1170 flags |= PAM_SILENT;
1171
f546241b
ZJS
1172 pam_code = pam_start(name, user, &conv, &handle);
1173 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1174 handle = NULL;
1175 goto fail;
1176 }
1177
3cd24c1a
LP
1178 if (!tty) {
1179 _cleanup_free_ char *q = NULL;
1180
1181 /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
1182 * out if that's the case, and read the TTY off it. */
1183
1184 if (getttyname_malloc(STDIN_FILENO, &q) >= 0)
1185 tty = strjoina("/dev/", q);
1186 }
1187
f546241b
ZJS
1188 if (tty) {
1189 pam_code = pam_set_item(handle, PAM_TTY, tty);
1190 if (pam_code != PAM_SUCCESS)
5b6319dc 1191 goto fail;
f546241b 1192 }
5b6319dc 1193
84eada2f
JW
1194 STRV_FOREACH(nv, *env) {
1195 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1196 if (pam_code != PAM_SUCCESS)
1197 goto fail;
1198 }
1199
970edce6 1200 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1201 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1202 goto fail;
1203
970edce6 1204 pam_code = pam_open_session(handle, flags);
f546241b 1205 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1206 goto fail;
1207
1208 close_session = true;
1209
f546241b
ZJS
1210 e = pam_getenvlist(handle);
1211 if (!e) {
5b6319dc
LP
1212 pam_code = PAM_BUF_ERR;
1213 goto fail;
1214 }
1215
1216 /* Block SIGTERM, so that we know that it won't get lost in
1217 * the child */
ce30c8dc 1218
72c0a2c2 1219 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1220
df0ff127 1221 parent_pid = getpid_cached();
5b6319dc 1222
4c253ed1
LP
1223 r = safe_fork("(sd-pam)", 0, &pam_pid);
1224 if (r < 0)
5b6319dc 1225 goto fail;
4c253ed1 1226 if (r == 0) {
7bb70b6e 1227 int sig, ret = EXIT_PAM;
5b6319dc
LP
1228
1229 /* The child's job is to reset the PAM session on
1230 * termination */
2d7c6aa2 1231 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc 1232
4c253ed1
LP
1233 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1234 * are open here that have been opened by PAM. */
1235 (void) close_many(fds, n_fds);
5b6319dc 1236
940c5210
AK
1237 /* Drop privileges - we don't need any to pam_close_session
1238 * and this will make PR_SET_PDEATHSIG work in most cases.
1239 * If this fails, ignore the error - but expect sd-pam threads
1240 * to fail to exit normally */
2d6fce8d 1241
97f0e76f
LP
1242 r = maybe_setgroups(0, NULL);
1243 if (r < 0)
1244 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1245 if (setresgid(gid, gid, gid) < 0)
1246 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1247 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1248 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1249
ce30c8dc
LP
1250 (void) ignore_signals(SIGPIPE, -1);
1251
940c5210
AK
1252 /* Wait until our parent died. This will only work if
1253 * the above setresuid() succeeds, otherwise the kernel
1254 * will not allow unprivileged parents kill their privileged
1255 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1256 * to do the rest for us. */
1257 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1258 goto child_finish;
1259
2d7c6aa2
DH
1260 /* Tell the parent that our setup is done. This is especially
1261 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1262 * setup might race against our setresuid(2) call.
1263 *
1264 * If the parent aborted, we'll detect this below, hence ignore
1265 * return failure here. */
1266 (void) barrier_place(&barrier);
2d7c6aa2 1267
643f4706 1268 /* Check if our parent process might already have died? */
5b6319dc 1269 if (getppid() == parent_pid) {
d6e5f3ad
DM
1270 sigset_t ss;
1271
1272 assert_se(sigemptyset(&ss) >= 0);
1273 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1274
3dead8d9
LP
1275 for (;;) {
1276 if (sigwait(&ss, &sig) < 0) {
1277 if (errno == EINTR)
1278 continue;
1279
1280 goto child_finish;
1281 }
5b6319dc 1282
3dead8d9
LP
1283 assert(sig == SIGTERM);
1284 break;
1285 }
5b6319dc
LP
1286 }
1287
3dead8d9 1288 /* If our parent died we'll end the session */
f546241b 1289 if (getppid() != parent_pid) {
970edce6 1290 pam_code = pam_close_session(handle, flags);
f546241b 1291 if (pam_code != PAM_SUCCESS)
5b6319dc 1292 goto child_finish;
f546241b 1293 }
5b6319dc 1294
7bb70b6e 1295 ret = 0;
5b6319dc
LP
1296
1297 child_finish:
970edce6 1298 pam_end(handle, pam_code | flags);
7bb70b6e 1299 _exit(ret);
5b6319dc
LP
1300 }
1301
2d7c6aa2
DH
1302 barrier_set_role(&barrier, BARRIER_PARENT);
1303
5b6319dc
LP
1304 /* If the child was forked off successfully it will do all the
1305 * cleanups, so forget about the handle here. */
1306 handle = NULL;
1307
3b8bddde 1308 /* Unblock SIGTERM again in the parent */
72c0a2c2 1309 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1310
1311 /* We close the log explicitly here, since the PAM modules
1312 * might have opened it, but we don't want this fd around. */
1313 closelog();
1314
2d7c6aa2
DH
1315 /* Synchronously wait for the child to initialize. We don't care for
1316 * errors as we cannot recover. However, warn loudly if it happens. */
1317 if (!barrier_place_and_sync(&barrier))
1318 log_error("PAM initialization failed");
1319
130d3d22 1320 return strv_free_and_replace(*env, e);
5b6319dc
LP
1321
1322fail:
970edce6
ZJS
1323 if (pam_code != PAM_SUCCESS) {
1324 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1325 r = -EPERM; /* PAM errors do not map to errno */
1326 } else
1327 log_error_errno(r, "PAM failed: %m");
9ba35398 1328
5b6319dc
LP
1329 if (handle) {
1330 if (close_session)
970edce6 1331 pam_code = pam_close_session(handle, flags);
5b6319dc 1332
970edce6 1333 pam_end(handle, pam_code | flags);
5b6319dc
LP
1334 }
1335
1336 strv_free(e);
5b6319dc
LP
1337 closelog();
1338
7bb70b6e 1339 return r;
cefc33ae
LP
1340#else
1341 return 0;
5b6319dc 1342#endif
cefc33ae 1343}
5b6319dc 1344
5d6b1584
LP
1345static void rename_process_from_path(const char *path) {
1346 char process_name[11];
1347 const char *p;
1348 size_t l;
1349
1350 /* This resulting string must fit in 10 chars (i.e. the length
1351 * of "/sbin/init") to look pretty in /bin/ps */
1352
2b6bf07d 1353 p = basename(path);
5d6b1584
LP
1354 if (isempty(p)) {
1355 rename_process("(...)");
1356 return;
1357 }
1358
1359 l = strlen(p);
1360 if (l > 8) {
1361 /* The end of the process name is usually more
1362 * interesting, since the first bit might just be
1363 * "systemd-" */
1364 p = p + l - 8;
1365 l = 8;
1366 }
1367
1368 process_name[0] = '(';
1369 memcpy(process_name+1, p, l);
1370 process_name[1+l] = ')';
1371 process_name[1+l+1] = 0;
1372
1373 rename_process(process_name);
1374}
1375
469830d1
LP
1376static bool context_has_address_families(const ExecContext *c) {
1377 assert(c);
1378
1379 return c->address_families_whitelist ||
1380 !set_isempty(c->address_families);
1381}
1382
1383static bool context_has_syscall_filters(const ExecContext *c) {
1384 assert(c);
1385
1386 return c->syscall_whitelist ||
8cfa775f 1387 !hashmap_isempty(c->syscall_filter);
469830d1
LP
1388}
1389
1390static bool context_has_no_new_privileges(const ExecContext *c) {
1391 assert(c);
1392
1393 if (c->no_new_privileges)
1394 return true;
1395
1396 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1397 return false;
1398
1399 /* We need NNP if we have any form of seccomp and are unprivileged */
1400 return context_has_address_families(c) ||
1401 c->memory_deny_write_execute ||
1402 c->restrict_realtime ||
f69567cb 1403 c->restrict_suid_sgid ||
469830d1
LP
1404 exec_context_restrict_namespaces_set(c) ||
1405 c->protect_kernel_tunables ||
1406 c->protect_kernel_modules ||
1407 c->private_devices ||
1408 context_has_syscall_filters(c) ||
78e864e5 1409 !set_isempty(c->syscall_archs) ||
aecd5ac6
TM
1410 c->lock_personality ||
1411 c->protect_hostname;
469830d1
LP
1412}
1413
349cc4a5 1414#if HAVE_SECCOMP
17df7223 1415
83f12b27 1416static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1417
1418 if (is_seccomp_available())
1419 return false;
1420
f673b62d 1421 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1422 return true;
83f12b27
FS
1423}
1424
165a31c0 1425static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1426 uint32_t negative_action, default_action, action;
165a31c0 1427 int r;
8351ceae 1428
469830d1 1429 assert(u);
c0467cf3 1430 assert(c);
8351ceae 1431
469830d1 1432 if (!context_has_syscall_filters(c))
83f12b27
FS
1433 return 0;
1434
469830d1
LP
1435 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1436 return 0;
e9642be2 1437
ccc16c78 1438 negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1439
469830d1
LP
1440 if (c->syscall_whitelist) {
1441 default_action = negative_action;
1442 action = SCMP_ACT_ALLOW;
7c66bae2 1443 } else {
469830d1
LP
1444 default_action = SCMP_ACT_ALLOW;
1445 action = negative_action;
57183d11 1446 }
8351ceae 1447
165a31c0
LP
1448 if (needs_ambient_hack) {
1449 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1450 if (r < 0)
1451 return r;
1452 }
1453
b54f36c6 1454 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
4298d0b5
LP
1455}
1456
469830d1
LP
1457static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1458 assert(u);
4298d0b5
LP
1459 assert(c);
1460
469830d1 1461 if (set_isempty(c->syscall_archs))
83f12b27
FS
1462 return 0;
1463
469830d1
LP
1464 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1465 return 0;
4298d0b5 1466
469830d1
LP
1467 return seccomp_restrict_archs(c->syscall_archs);
1468}
4298d0b5 1469
469830d1
LP
1470static int apply_address_families(const Unit* u, const ExecContext *c) {
1471 assert(u);
1472 assert(c);
4298d0b5 1473
469830d1
LP
1474 if (!context_has_address_families(c))
1475 return 0;
4298d0b5 1476
469830d1
LP
1477 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1478 return 0;
4298d0b5 1479
469830d1 1480 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1481}
4298d0b5 1482
83f12b27 1483static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1484 assert(u);
f3e43635
TM
1485 assert(c);
1486
469830d1 1487 if (!c->memory_deny_write_execute)
83f12b27
FS
1488 return 0;
1489
469830d1
LP
1490 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1491 return 0;
f3e43635 1492
469830d1 1493 return seccomp_memory_deny_write_execute();
f3e43635
TM
1494}
1495
83f12b27 1496static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1497 assert(u);
f4170c67
LP
1498 assert(c);
1499
469830d1 1500 if (!c->restrict_realtime)
83f12b27
FS
1501 return 0;
1502
469830d1
LP
1503 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1504 return 0;
f4170c67 1505
469830d1 1506 return seccomp_restrict_realtime();
f4170c67
LP
1507}
1508
f69567cb
LP
1509static int apply_restrict_suid_sgid(const Unit* u, const ExecContext *c) {
1510 assert(u);
1511 assert(c);
1512
1513 if (!c->restrict_suid_sgid)
1514 return 0;
1515
1516 if (skip_seccomp_unavailable(u, "RestrictSUIDSGID="))
1517 return 0;
1518
1519 return seccomp_restrict_suid_sgid();
1520}
1521
59e856c7 1522static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1523 assert(u);
59eeb84b
LP
1524 assert(c);
1525
1526 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1527 * let's protect even those systems where this is left on in the kernel. */
1528
469830d1 1529 if (!c->protect_kernel_tunables)
59eeb84b
LP
1530 return 0;
1531
469830d1
LP
1532 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1533 return 0;
59eeb84b 1534
469830d1 1535 return seccomp_protect_sysctl();
59eeb84b
LP
1536}
1537
59e856c7 1538static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1539 assert(u);
502d704e
DH
1540 assert(c);
1541
25a8d8a0 1542 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1543
469830d1
LP
1544 if (!c->protect_kernel_modules)
1545 return 0;
1546
502d704e
DH
1547 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1548 return 0;
1549
b54f36c6 1550 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false);
502d704e
DH
1551}
1552
59e856c7 1553static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1554 assert(u);
ba128bb8
LP
1555 assert(c);
1556
8f81a5f6 1557 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1558
469830d1
LP
1559 if (!c->private_devices)
1560 return 0;
1561
ba128bb8
LP
1562 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1563 return 0;
1564
b54f36c6 1565 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false);
ba128bb8
LP
1566}
1567
34cf6c43 1568static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
469830d1 1569 assert(u);
add00535
LP
1570 assert(c);
1571
1572 if (!exec_context_restrict_namespaces_set(c))
1573 return 0;
1574
1575 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1576 return 0;
1577
1578 return seccomp_restrict_namespaces(c->restrict_namespaces);
1579}
1580
78e864e5 1581static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1582 unsigned long personality;
1583 int r;
78e864e5
TM
1584
1585 assert(u);
1586 assert(c);
1587
1588 if (!c->lock_personality)
1589 return 0;
1590
1591 if (skip_seccomp_unavailable(u, "LockPersonality="))
1592 return 0;
1593
e8132d63
LP
1594 personality = c->personality;
1595
1596 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1597 if (personality == PERSONALITY_INVALID) {
1598
1599 r = opinionated_personality(&personality);
1600 if (r < 0)
1601 return r;
1602 }
78e864e5
TM
1603
1604 return seccomp_lock_personality(personality);
1605}
1606
c0467cf3 1607#endif
8351ceae 1608
3042bbeb 1609static void do_idle_pipe_dance(int idle_pipe[static 4]) {
31a7eb86
ZJS
1610 assert(idle_pipe);
1611
54eb2300
LP
1612 idle_pipe[1] = safe_close(idle_pipe[1]);
1613 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1614
1615 if (idle_pipe[0] >= 0) {
1616 int r;
1617
1618 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1619
1620 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1621 ssize_t n;
1622
31a7eb86 1623 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1624 n = write(idle_pipe[3], "x", 1);
1625 if (n > 0)
cd972d69
ZJS
1626 /* Wait for systemd to react to the signal above. */
1627 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1628 }
1629
54eb2300 1630 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1631
1632 }
1633
54eb2300 1634 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1635}
1636
fb2042dd
YW
1637static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
1638
7cae38c4 1639static int build_environment(
34cf6c43 1640 const Unit *u,
9fa95f85 1641 const ExecContext *c,
1e22b5cd 1642 const ExecParameters *p,
da6053d0 1643 size_t n_fds,
7cae38c4
LP
1644 const char *home,
1645 const char *username,
1646 const char *shell,
7bce046b
LP
1647 dev_t journal_stream_dev,
1648 ino_t journal_stream_ino,
7cae38c4
LP
1649 char ***ret) {
1650
1651 _cleanup_strv_free_ char **our_env = NULL;
fb2042dd 1652 ExecDirectoryType t;
da6053d0 1653 size_t n_env = 0;
7cae38c4
LP
1654 char *x;
1655
4b58153d 1656 assert(u);
7cae38c4 1657 assert(c);
7c1cb6f1 1658 assert(p);
7cae38c4
LP
1659 assert(ret);
1660
fb2042dd 1661 our_env = new0(char*, 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4
LP
1662 if (!our_env)
1663 return -ENOMEM;
1664
1665 if (n_fds > 0) {
8dd4c05b
LP
1666 _cleanup_free_ char *joined = NULL;
1667
df0ff127 1668 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1669 return -ENOMEM;
1670 our_env[n_env++] = x;
1671
da6053d0 1672 if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
7cae38c4
LP
1673 return -ENOMEM;
1674 our_env[n_env++] = x;
8dd4c05b 1675
1e22b5cd 1676 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1677 if (!joined)
1678 return -ENOMEM;
1679
605405c6 1680 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1681 if (!x)
1682 return -ENOMEM;
1683 our_env[n_env++] = x;
7cae38c4
LP
1684 }
1685
b08af3b1 1686 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1687 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1688 return -ENOMEM;
1689 our_env[n_env++] = x;
1690
1e22b5cd 1691 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1692 return -ENOMEM;
1693 our_env[n_env++] = x;
1694 }
1695
fd63e712
LP
1696 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1697 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1698 * check the database directly. */
ac647978 1699 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1700 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1701 if (!x)
1702 return -ENOMEM;
1703 our_env[n_env++] = x;
1704 }
1705
7cae38c4
LP
1706 if (home) {
1707 x = strappend("HOME=", home);
1708 if (!x)
1709 return -ENOMEM;
7bbead1d
LP
1710
1711 path_simplify(x + 5, true);
7cae38c4
LP
1712 our_env[n_env++] = x;
1713 }
1714
1715 if (username) {
1716 x = strappend("LOGNAME=", username);
1717 if (!x)
1718 return -ENOMEM;
1719 our_env[n_env++] = x;
1720
1721 x = strappend("USER=", username);
1722 if (!x)
1723 return -ENOMEM;
1724 our_env[n_env++] = x;
1725 }
1726
1727 if (shell) {
1728 x = strappend("SHELL=", shell);
1729 if (!x)
1730 return -ENOMEM;
7bbead1d
LP
1731
1732 path_simplify(x + 6, true);
7cae38c4
LP
1733 our_env[n_env++] = x;
1734 }
1735
4b58153d
LP
1736 if (!sd_id128_is_null(u->invocation_id)) {
1737 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1738 return -ENOMEM;
1739
1740 our_env[n_env++] = x;
1741 }
1742
6af760f3
LP
1743 if (exec_context_needs_term(c)) {
1744 const char *tty_path, *term = NULL;
1745
1746 tty_path = exec_context_tty_path(c);
1747
1748 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1749 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1750 * passes to PID 1 ends up all the way in the console login shown. */
1751
1752 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1753 term = getenv("TERM");
1754 if (!term)
1755 term = default_term_for_tty(tty_path);
7cae38c4 1756
6af760f3 1757 x = strappend("TERM=", term);
7cae38c4
LP
1758 if (!x)
1759 return -ENOMEM;
1760 our_env[n_env++] = x;
1761 }
1762
7bce046b
LP
1763 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1764 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1765 return -ENOMEM;
1766
1767 our_env[n_env++] = x;
1768 }
1769
fb2042dd
YW
1770 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1771 _cleanup_free_ char *pre = NULL, *joined = NULL;
1772 const char *n;
1773
1774 if (!p->prefix[t])
1775 continue;
1776
1777 if (strv_isempty(c->directories[t].paths))
1778 continue;
1779
1780 n = exec_directory_env_name_to_string(t);
1781 if (!n)
1782 continue;
1783
1784 pre = strjoin(p->prefix[t], "/");
1785 if (!pre)
1786 return -ENOMEM;
1787
1788 joined = strv_join_prefix(c->directories[t].paths, ":", pre);
1789 if (!joined)
1790 return -ENOMEM;
1791
1792 x = strjoin(n, "=", joined);
1793 if (!x)
1794 return -ENOMEM;
1795
1796 our_env[n_env++] = x;
1797 }
1798
7cae38c4 1799 our_env[n_env++] = NULL;
fb2042dd 1800 assert(n_env <= 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4 1801
ae2a15bc 1802 *ret = TAKE_PTR(our_env);
7cae38c4
LP
1803
1804 return 0;
1805}
1806
b4c14404
FB
1807static int build_pass_environment(const ExecContext *c, char ***ret) {
1808 _cleanup_strv_free_ char **pass_env = NULL;
1809 size_t n_env = 0, n_bufsize = 0;
1810 char **i;
1811
1812 STRV_FOREACH(i, c->pass_environment) {
1813 _cleanup_free_ char *x = NULL;
1814 char *v;
1815
1816 v = getenv(*i);
1817 if (!v)
1818 continue;
605405c6 1819 x = strjoin(*i, "=", v);
b4c14404
FB
1820 if (!x)
1821 return -ENOMEM;
00819cc1 1822
b4c14404
FB
1823 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1824 return -ENOMEM;
00819cc1 1825
1cc6c93a 1826 pass_env[n_env++] = TAKE_PTR(x);
b4c14404 1827 pass_env[n_env] = NULL;
b4c14404
FB
1828 }
1829
ae2a15bc 1830 *ret = TAKE_PTR(pass_env);
b4c14404
FB
1831
1832 return 0;
1833}
1834
8b44a3d2
LP
1835static bool exec_needs_mount_namespace(
1836 const ExecContext *context,
1837 const ExecParameters *params,
4657abb5 1838 const ExecRuntime *runtime) {
8b44a3d2
LP
1839
1840 assert(context);
1841 assert(params);
1842
915e6d16
LP
1843 if (context->root_image)
1844 return true;
1845
2a624c36
AP
1846 if (!strv_isempty(context->read_write_paths) ||
1847 !strv_isempty(context->read_only_paths) ||
1848 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1849 return true;
1850
42b1d8e0 1851 if (context->n_bind_mounts > 0)
d2d6c096
LP
1852 return true;
1853
2abd4e38
YW
1854 if (context->n_temporary_filesystems > 0)
1855 return true;
1856
37ed15d7 1857 if (!IN_SET(context->mount_flags, 0, MS_SHARED))
8b44a3d2
LP
1858 return true;
1859
1860 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1861 return true;
1862
8b44a3d2 1863 if (context->private_devices ||
228af36f 1864 context->private_mounts ||
8b44a3d2 1865 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1866 context->protect_home != PROTECT_HOME_NO ||
1867 context->protect_kernel_tunables ||
c575770b 1868 context->protect_kernel_modules ||
59eeb84b 1869 context->protect_control_groups)
8b44a3d2
LP
1870 return true;
1871
37c56f89
YW
1872 if (context->root_directory) {
1873 ExecDirectoryType t;
1874
1875 if (context->mount_apivfs)
1876 return true;
1877
1878 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1879 if (!params->prefix[t])
1880 continue;
1881
1882 if (!strv_isempty(context->directories[t].paths))
1883 return true;
1884 }
1885 }
5d997827 1886
42b1d8e0 1887 if (context->dynamic_user &&
b43ee82f 1888 (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
42b1d8e0
YW
1889 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1890 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1891 return true;
1892
8b44a3d2
LP
1893 return false;
1894}
1895
d251207d
LP
1896static int setup_private_users(uid_t uid, gid_t gid) {
1897 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1898 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1899 _cleanup_close_ int unshare_ready_fd = -1;
1900 _cleanup_(sigkill_waitp) pid_t pid = 0;
1901 uint64_t c = 1;
d251207d
LP
1902 ssize_t n;
1903 int r;
1904
1905 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1906 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1907 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1908 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1909 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1910 * continues execution normally. */
1911
587ab01b
ZJS
1912 if (uid != 0 && uid_is_valid(uid)) {
1913 r = asprintf(&uid_map,
1914 "0 0 1\n" /* Map root → root */
1915 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1916 uid, uid);
1917 if (r < 0)
1918 return -ENOMEM;
1919 } else {
e0f3720e 1920 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1921 if (!uid_map)
1922 return -ENOMEM;
1923 }
d251207d 1924
587ab01b
ZJS
1925 if (gid != 0 && gid_is_valid(gid)) {
1926 r = asprintf(&gid_map,
1927 "0 0 1\n" /* Map root → root */
1928 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1929 gid, gid);
1930 if (r < 0)
1931 return -ENOMEM;
1932 } else {
d251207d 1933 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1934 if (!gid_map)
1935 return -ENOMEM;
1936 }
d251207d
LP
1937
1938 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1939 * namespace. */
1940 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1941 if (unshare_ready_fd < 0)
1942 return -errno;
1943
1944 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1945 * failed. */
1946 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1947 return -errno;
1948
4c253ed1
LP
1949 r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
1950 if (r < 0)
1951 return r;
1952 if (r == 0) {
d251207d
LP
1953 _cleanup_close_ int fd = -1;
1954 const char *a;
1955 pid_t ppid;
1956
1957 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1958 * here, after the parent opened its own user namespace. */
1959
1960 ppid = getppid();
1961 errno_pipe[0] = safe_close(errno_pipe[0]);
1962
1963 /* Wait until the parent unshared the user namespace */
1964 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1965 r = -errno;
1966 goto child_fail;
1967 }
1968
1969 /* Disable the setgroups() system call in the child user namespace, for good. */
1970 a = procfs_file_alloca(ppid, "setgroups");
1971 fd = open(a, O_WRONLY|O_CLOEXEC);
1972 if (fd < 0) {
1973 if (errno != ENOENT) {
1974 r = -errno;
1975 goto child_fail;
1976 }
1977
1978 /* If the file is missing the kernel is too old, let's continue anyway. */
1979 } else {
1980 if (write(fd, "deny\n", 5) < 0) {
1981 r = -errno;
1982 goto child_fail;
1983 }
1984
1985 fd = safe_close(fd);
1986 }
1987
1988 /* First write the GID map */
1989 a = procfs_file_alloca(ppid, "gid_map");
1990 fd = open(a, O_WRONLY|O_CLOEXEC);
1991 if (fd < 0) {
1992 r = -errno;
1993 goto child_fail;
1994 }
1995 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1996 r = -errno;
1997 goto child_fail;
1998 }
1999 fd = safe_close(fd);
2000
2001 /* The write the UID map */
2002 a = procfs_file_alloca(ppid, "uid_map");
2003 fd = open(a, O_WRONLY|O_CLOEXEC);
2004 if (fd < 0) {
2005 r = -errno;
2006 goto child_fail;
2007 }
2008 if (write(fd, uid_map, strlen(uid_map)) < 0) {
2009 r = -errno;
2010 goto child_fail;
2011 }
2012
2013 _exit(EXIT_SUCCESS);
2014
2015 child_fail:
2016 (void) write(errno_pipe[1], &r, sizeof(r));
2017 _exit(EXIT_FAILURE);
2018 }
2019
2020 errno_pipe[1] = safe_close(errno_pipe[1]);
2021
2022 if (unshare(CLONE_NEWUSER) < 0)
2023 return -errno;
2024
2025 /* Let the child know that the namespace is ready now */
2026 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
2027 return -errno;
2028
2029 /* Try to read an error code from the child */
2030 n = read(errno_pipe[0], &r, sizeof(r));
2031 if (n < 0)
2032 return -errno;
2033 if (n == sizeof(r)) { /* an error code was sent to us */
2034 if (r < 0)
2035 return r;
2036 return -EIO;
2037 }
2038 if (n != 0) /* on success we should have read 0 bytes */
2039 return -EIO;
2040
2e87a1fd
LP
2041 r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
2042 pid = 0;
d251207d
LP
2043 if (r < 0)
2044 return r;
2e87a1fd 2045 if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
d251207d
LP
2046 return -EIO;
2047
2048 return 0;
2049}
2050
3536f49e 2051static int setup_exec_directory(
07689d5d
LP
2052 const ExecContext *context,
2053 const ExecParameters *params,
2054 uid_t uid,
3536f49e 2055 gid_t gid,
3536f49e
YW
2056 ExecDirectoryType type,
2057 int *exit_status) {
07689d5d 2058
72fd1768 2059 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
2060 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
2061 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
2062 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
2063 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
2064 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
2065 };
07689d5d
LP
2066 char **rt;
2067 int r;
2068
2069 assert(context);
2070 assert(params);
72fd1768 2071 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 2072 assert(exit_status);
07689d5d 2073
3536f49e
YW
2074 if (!params->prefix[type])
2075 return 0;
2076
8679efde 2077 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
2078 if (!uid_is_valid(uid))
2079 uid = 0;
2080 if (!gid_is_valid(gid))
2081 gid = 0;
2082 }
2083
2084 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d 2085 _cleanup_free_ char *p = NULL, *pp = NULL;
07689d5d 2086
edbfeb12 2087 p = path_join(params->prefix[type], *rt);
3536f49e
YW
2088 if (!p) {
2089 r = -ENOMEM;
2090 goto fail;
2091 }
07689d5d 2092
23a7448e
YW
2093 r = mkdir_parents_label(p, 0755);
2094 if (r < 0)
3536f49e 2095 goto fail;
23a7448e 2096
8092a48c 2097 if (context->dynamic_user &&
40cd2ecc
LP
2098 (!IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) ||
2099 (type == EXEC_DIRECTORY_RUNTIME && context->runtime_directory_preserve_mode != EXEC_PRESERVE_NO))) {
6c9c51e5 2100 _cleanup_free_ char *private_root = NULL;
6c47cd7d
LP
2101
2102 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
2103 * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
2104 * whose UID is later on reused. To lock this down we use the same trick used by container
2105 * managers to prohibit host users to get access to files of the same UID in containers: we
2106 * place everything inside a directory that has an access mode of 0700 and is owned root:root,
2107 * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
2108 * to make this directory permeable for the service itself.
2109 *
2110 * Specifically: for a service which wants a special directory "foo/" we first create a
2111 * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
2112 * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
2113 * privileged host users can access "foo/" as usual, but unprivileged host users can't look
2114 * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
2115 * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
2116 * disabling the access boundary for the service and making sure it only gets access to the
2117 * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
2118 *
2119 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
8092a48c
YW
2120 * owned by the service itself.
2121 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
2122 * files or sockets with other services. */
6c47cd7d 2123
edbfeb12 2124 private_root = path_join(params->prefix[type], "private");
6c47cd7d
LP
2125 if (!private_root) {
2126 r = -ENOMEM;
2127 goto fail;
2128 }
2129
2130 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
37c1d5e9 2131 r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
6c47cd7d
LP
2132 if (r < 0)
2133 goto fail;
2134
edbfeb12 2135 pp = path_join(private_root, *rt);
6c47cd7d
LP
2136 if (!pp) {
2137 r = -ENOMEM;
2138 goto fail;
2139 }
2140
2141 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2142 r = mkdir_parents_label(pp, 0755);
2143 if (r < 0)
2144 goto fail;
2145
949befd3
LP
2146 if (is_dir(p, false) > 0 &&
2147 (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
2148
2149 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2150 * it over. Most likely the service has been upgraded from one that didn't use
2151 * DynamicUser=1, to one that does. */
2152
2153 if (rename(p, pp) < 0) {
2154 r = -errno;
2155 goto fail;
2156 }
2157 } else {
2158 /* Otherwise, create the actual directory for the service */
2159
2160 r = mkdir_label(pp, context->directories[type].mode);
2161 if (r < 0 && r != -EEXIST)
2162 goto fail;
2163 }
6c47cd7d 2164
6c47cd7d 2165 /* And link it up from the original place */
6c9c51e5 2166 r = symlink_idempotent(pp, p, true);
6c47cd7d
LP
2167 if (r < 0)
2168 goto fail;
2169
6c47cd7d
LP
2170 } else {
2171 r = mkdir_label(p, context->directories[type].mode);
d484580c 2172 if (r < 0) {
d484580c
LP
2173 if (r != -EEXIST)
2174 goto fail;
2175
206e9864
LP
2176 if (type == EXEC_DIRECTORY_CONFIGURATION) {
2177 struct stat st;
2178
2179 /* Don't change the owner/access mode of the configuration directory,
2180 * as in the common case it is not written to by a service, and shall
2181 * not be writable. */
2182
2183 if (stat(p, &st) < 0) {
2184 r = -errno;
2185 goto fail;
2186 }
2187
2188 /* Still complain if the access mode doesn't match */
2189 if (((st.st_mode ^ context->directories[type].mode) & 07777) != 0)
2190 log_warning("%s \'%s\' already exists but the mode is different. "
2191 "(File system: %o %sMode: %o)",
2192 exec_directory_type_to_string(type), *rt,
2193 st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
2194
6cff72eb 2195 continue;
206e9864 2196 }
6cff72eb 2197 }
a1164ae3 2198 }
07689d5d 2199
206e9864 2200 /* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
5238e957 2201 * specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
206e9864
LP
2202 * current UID/GID ownership.) */
2203 r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
2204 if (r < 0)
2205 goto fail;
c71b2eb7 2206
607b358e
LP
2207 /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
2208 * drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
2209 * assignments to exist.*/
2210 r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777);
07689d5d 2211 if (r < 0)
3536f49e 2212 goto fail;
07689d5d
LP
2213 }
2214
2215 return 0;
3536f49e
YW
2216
2217fail:
2218 *exit_status = exit_status_table[type];
3536f49e 2219 return r;
07689d5d
LP
2220}
2221
92b423b9 2222#if ENABLE_SMACK
cefc33ae
LP
2223static int setup_smack(
2224 const ExecContext *context,
2225 const ExecCommand *command) {
2226
cefc33ae
LP
2227 int r;
2228
2229 assert(context);
2230 assert(command);
2231
cefc33ae
LP
2232 if (context->smack_process_label) {
2233 r = mac_smack_apply_pid(0, context->smack_process_label);
2234 if (r < 0)
2235 return r;
2236 }
2237#ifdef SMACK_DEFAULT_PROCESS_LABEL
2238 else {
2239 _cleanup_free_ char *exec_label = NULL;
2240
2241 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2242 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2243 return r;
2244
2245 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2246 if (r < 0)
2247 return r;
2248 }
cefc33ae
LP
2249#endif
2250
2251 return 0;
2252}
92b423b9 2253#endif
cefc33ae 2254
6c47cd7d
LP
2255static int compile_bind_mounts(
2256 const ExecContext *context,
2257 const ExecParameters *params,
2258 BindMount **ret_bind_mounts,
da6053d0 2259 size_t *ret_n_bind_mounts,
6c47cd7d
LP
2260 char ***ret_empty_directories) {
2261
2262 _cleanup_strv_free_ char **empty_directories = NULL;
2263 BindMount *bind_mounts;
da6053d0 2264 size_t n, h = 0, i;
6c47cd7d
LP
2265 ExecDirectoryType t;
2266 int r;
2267
2268 assert(context);
2269 assert(params);
2270 assert(ret_bind_mounts);
2271 assert(ret_n_bind_mounts);
2272 assert(ret_empty_directories);
2273
2274 n = context->n_bind_mounts;
2275 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2276 if (!params->prefix[t])
2277 continue;
2278
2279 n += strv_length(context->directories[t].paths);
2280 }
2281
2282 if (n <= 0) {
2283 *ret_bind_mounts = NULL;
2284 *ret_n_bind_mounts = 0;
2285 *ret_empty_directories = NULL;
2286 return 0;
2287 }
2288
2289 bind_mounts = new(BindMount, n);
2290 if (!bind_mounts)
2291 return -ENOMEM;
2292
a8cabc61 2293 for (i = 0; i < context->n_bind_mounts; i++) {
6c47cd7d
LP
2294 BindMount *item = context->bind_mounts + i;
2295 char *s, *d;
2296
2297 s = strdup(item->source);
2298 if (!s) {
2299 r = -ENOMEM;
2300 goto finish;
2301 }
2302
2303 d = strdup(item->destination);
2304 if (!d) {
2305 free(s);
2306 r = -ENOMEM;
2307 goto finish;
2308 }
2309
2310 bind_mounts[h++] = (BindMount) {
2311 .source = s,
2312 .destination = d,
2313 .read_only = item->read_only,
2314 .recursive = item->recursive,
2315 .ignore_enoent = item->ignore_enoent,
2316 };
2317 }
2318
2319 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2320 char **suffix;
2321
2322 if (!params->prefix[t])
2323 continue;
2324
2325 if (strv_isempty(context->directories[t].paths))
2326 continue;
2327
8092a48c 2328 if (context->dynamic_user &&
5609f688
YW
2329 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2330 !(context->root_directory || context->root_image)) {
6c47cd7d
LP
2331 char *private_root;
2332
2333 /* So this is for a dynamic user, and we need to make sure the process can access its own
2334 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2335 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2336
2337 private_root = strjoin(params->prefix[t], "/private");
2338 if (!private_root) {
2339 r = -ENOMEM;
2340 goto finish;
2341 }
2342
2343 r = strv_consume(&empty_directories, private_root);
a635a7ae 2344 if (r < 0)
6c47cd7d 2345 goto finish;
6c47cd7d
LP
2346 }
2347
2348 STRV_FOREACH(suffix, context->directories[t].paths) {
2349 char *s, *d;
2350
8092a48c
YW
2351 if (context->dynamic_user &&
2352 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
6c47cd7d
LP
2353 s = strjoin(params->prefix[t], "/private/", *suffix);
2354 else
2355 s = strjoin(params->prefix[t], "/", *suffix);
2356 if (!s) {
2357 r = -ENOMEM;
2358 goto finish;
2359 }
2360
5609f688
YW
2361 if (context->dynamic_user &&
2362 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2363 (context->root_directory || context->root_image))
2364 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2365 * directory is not created on the root directory. So, let's bind-mount the directory
2366 * on the 'non-private' place. */
2367 d = strjoin(params->prefix[t], "/", *suffix);
2368 else
2369 d = strdup(s);
6c47cd7d
LP
2370 if (!d) {
2371 free(s);
2372 r = -ENOMEM;
2373 goto finish;
2374 }
2375
2376 bind_mounts[h++] = (BindMount) {
2377 .source = s,
2378 .destination = d,
2379 .read_only = false,
9ce4e4b0 2380 .nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
6c47cd7d
LP
2381 .recursive = true,
2382 .ignore_enoent = false,
2383 };
2384 }
2385 }
2386
2387 assert(h == n);
2388
2389 *ret_bind_mounts = bind_mounts;
2390 *ret_n_bind_mounts = n;
ae2a15bc 2391 *ret_empty_directories = TAKE_PTR(empty_directories);
6c47cd7d
LP
2392
2393 return (int) n;
2394
2395finish:
2396 bind_mount_free_many(bind_mounts, h);
2397 return r;
2398}
2399
6818c54c 2400static int apply_mount_namespace(
34cf6c43
YW
2401 const Unit *u,
2402 const ExecCommand *command,
6818c54c
LP
2403 const ExecContext *context,
2404 const ExecParameters *params,
7cc5ef5f
ZJS
2405 const ExecRuntime *runtime,
2406 char **error_path) {
6818c54c 2407
7bcef4ef 2408 _cleanup_strv_free_ char **empty_directories = NULL;
93c6bb51 2409 char *tmp = NULL, *var = NULL;
915e6d16 2410 const char *root_dir = NULL, *root_image = NULL;
228af36f 2411 NamespaceInfo ns_info;
165a31c0 2412 bool needs_sandboxing;
6c47cd7d 2413 BindMount *bind_mounts = NULL;
da6053d0 2414 size_t n_bind_mounts = 0;
6818c54c 2415 int r;
93c6bb51 2416
2b3c1b9e
DH
2417 assert(context);
2418
93c6bb51
DH
2419 /* The runtime struct only contains the parent of the private /tmp,
2420 * which is non-accessible to world users. Inside of it there's a /tmp
2421 * that is sticky, and that's the one we want to use here. */
2422
2423 if (context->private_tmp && runtime) {
2424 if (runtime->tmp_dir)
2425 tmp = strjoina(runtime->tmp_dir, "/tmp");
2426 if (runtime->var_tmp_dir)
2427 var = strjoina(runtime->var_tmp_dir, "/tmp");
2428 }
2429
915e6d16
LP
2430 if (params->flags & EXEC_APPLY_CHROOT) {
2431 root_image = context->root_image;
2432
2433 if (!root_image)
2434 root_dir = context->root_directory;
2435 }
93c6bb51 2436
6c47cd7d
LP
2437 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2438 if (r < 0)
2439 return r;
2440
165a31c0 2441 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
b5a33299
YW
2442 if (needs_sandboxing)
2443 ns_info = (NamespaceInfo) {
2444 .ignore_protect_paths = false,
2445 .private_dev = context->private_devices,
2446 .protect_control_groups = context->protect_control_groups,
2447 .protect_kernel_tunables = context->protect_kernel_tunables,
2448 .protect_kernel_modules = context->protect_kernel_modules,
aecd5ac6 2449 .protect_hostname = context->protect_hostname,
b5a33299 2450 .mount_apivfs = context->mount_apivfs,
228af36f 2451 .private_mounts = context->private_mounts,
b5a33299 2452 };
228af36f
LP
2453 else if (!context->dynamic_user && root_dir)
2454 /*
2455 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2456 * sandbox info, otherwise enforce it, don't ignore protected paths and
2457 * fail if we are enable to apply the sandbox inside the mount namespace.
2458 */
2459 ns_info = (NamespaceInfo) {
2460 .ignore_protect_paths = true,
2461 };
2462 else
2463 ns_info = (NamespaceInfo) {};
b5a33299 2464
37ed15d7
FB
2465 if (context->mount_flags == MS_SHARED)
2466 log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
2467
915e6d16 2468 r = setup_namespace(root_dir, root_image,
7bcef4ef 2469 &ns_info, context->read_write_paths,
165a31c0
LP
2470 needs_sandboxing ? context->read_only_paths : NULL,
2471 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2472 empty_directories,
2473 bind_mounts,
2474 n_bind_mounts,
2abd4e38
YW
2475 context->temporary_filesystems,
2476 context->n_temporary_filesystems,
93c6bb51
DH
2477 tmp,
2478 var,
165a31c0
LP
2479 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2480 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16 2481 context->mount_flags,
7cc5ef5f
ZJS
2482 DISSECT_IMAGE_DISCARD_ON_LOOP,
2483 error_path);
93c6bb51 2484
6c47cd7d
LP
2485 bind_mount_free_many(bind_mounts, n_bind_mounts);
2486
1beab8b0 2487 /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
5238e957 2488 * that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively
1beab8b0
LP
2489 * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
2490 * completely different execution environment. */
aca835ed
YW
2491 if (r == -ENOANO) {
2492 if (n_bind_mounts == 0 &&
2493 context->n_temporary_filesystems == 0 &&
2494 !root_dir && !root_image &&
2495 !context->dynamic_user) {
2496 log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
2497 return 0;
2498 }
2499
2194547e
LP
2500 log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
2501 "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
2502 n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
2503
aca835ed 2504 return -EOPNOTSUPP;
93c6bb51
DH
2505 }
2506
2507 return r;
2508}
2509
915e6d16
LP
2510static int apply_working_directory(
2511 const ExecContext *context,
2512 const ExecParameters *params,
2513 const char *home,
376fecf6
LP
2514 const bool needs_mount_ns,
2515 int *exit_status) {
915e6d16 2516
6732edab 2517 const char *d, *wd;
2b3c1b9e
DH
2518
2519 assert(context);
376fecf6 2520 assert(exit_status);
2b3c1b9e 2521
6732edab
LP
2522 if (context->working_directory_home) {
2523
376fecf6
LP
2524 if (!home) {
2525 *exit_status = EXIT_CHDIR;
6732edab 2526 return -ENXIO;
376fecf6 2527 }
6732edab 2528
2b3c1b9e 2529 wd = home;
6732edab
LP
2530
2531 } else if (context->working_directory)
2b3c1b9e
DH
2532 wd = context->working_directory;
2533 else
2534 wd = "/";
e7f1e7c6
DH
2535
2536 if (params->flags & EXEC_APPLY_CHROOT) {
2537 if (!needs_mount_ns && context->root_directory)
376fecf6
LP
2538 if (chroot(context->root_directory) < 0) {
2539 *exit_status = EXIT_CHROOT;
e7f1e7c6 2540 return -errno;
376fecf6 2541 }
e7f1e7c6 2542
2b3c1b9e
DH
2543 d = wd;
2544 } else
3b0e5bb5 2545 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2546
376fecf6
LP
2547 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2548 *exit_status = EXIT_CHDIR;
2b3c1b9e 2549 return -errno;
376fecf6 2550 }
e7f1e7c6
DH
2551
2552 return 0;
2553}
2554
b1edf445 2555static int setup_keyring(
34cf6c43 2556 const Unit *u,
b1edf445
LP
2557 const ExecContext *context,
2558 const ExecParameters *p,
2559 uid_t uid, gid_t gid) {
2560
74dd6b51 2561 key_serial_t keyring;
e64c2d0b
DJL
2562 int r = 0;
2563 uid_t saved_uid;
2564 gid_t saved_gid;
74dd6b51
LP
2565
2566 assert(u);
b1edf445 2567 assert(context);
74dd6b51
LP
2568 assert(p);
2569
2570 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2571 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2572 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2573 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2574 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2575 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2576
b1edf445
LP
2577 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2578 return 0;
2579
e64c2d0b
DJL
2580 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2581 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2582 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2583 * & group is just as nasty as acquiring a reference to the user keyring. */
2584
2585 saved_uid = getuid();
2586 saved_gid = getgid();
2587
2588 if (gid_is_valid(gid) && gid != saved_gid) {
2589 if (setregid(gid, -1) < 0)
2590 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2591 }
2592
2593 if (uid_is_valid(uid) && uid != saved_uid) {
2594 if (setreuid(uid, -1) < 0) {
2595 r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2596 goto out;
2597 }
2598 }
2599
74dd6b51
LP
2600 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2601 if (keyring == -1) {
2602 if (errno == ENOSYS)
8002fb97 2603 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2604 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2605 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2606 else if (errno == EDQUOT)
8002fb97 2607 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2608 else
e64c2d0b 2609 r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51 2610
e64c2d0b 2611 goto out;
74dd6b51
LP
2612 }
2613
e64c2d0b
DJL
2614 /* When requested link the user keyring into the session keyring. */
2615 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2616
2617 if (keyctl(KEYCTL_LINK,
2618 KEY_SPEC_USER_KEYRING,
2619 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2620 r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
2621 goto out;
2622 }
2623 }
2624
2625 /* Restore uid/gid back */
2626 if (uid_is_valid(uid) && uid != saved_uid) {
2627 if (setreuid(saved_uid, -1) < 0) {
2628 r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2629 goto out;
2630 }
2631 }
2632
2633 if (gid_is_valid(gid) && gid != saved_gid) {
2634 if (setregid(saved_gid, -1) < 0)
2635 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2636 }
2637
2638 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
b3415f5d
LP
2639 if (!sd_id128_is_null(u->invocation_id)) {
2640 key_serial_t key;
2641
2642 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2643 if (key == -1)
8002fb97 2644 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2645 else {
2646 if (keyctl(KEYCTL_SETPERM, key,
2647 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2648 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
e64c2d0b 2649 r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2650 }
2651 }
2652
e64c2d0b
DJL
2653out:
2654 /* Revert back uid & gid for the the last time, and exit */
2655 /* no extra logging, as only the first already reported error matters */
2656 if (getuid() != saved_uid)
2657 (void) setreuid(saved_uid, -1);
b1edf445 2658
e64c2d0b
DJL
2659 if (getgid() != saved_gid)
2660 (void) setregid(saved_gid, -1);
b1edf445 2661
e64c2d0b 2662 return r;
74dd6b51
LP
2663}
2664
3042bbeb 2665static void append_socket_pair(int *array, size_t *n, const int pair[static 2]) {
29206d46
LP
2666 assert(array);
2667 assert(n);
2668
2669 if (!pair)
2670 return;
2671
2672 if (pair[0] >= 0)
2673 array[(*n)++] = pair[0];
2674 if (pair[1] >= 0)
2675 array[(*n)++] = pair[1];
2676}
2677
a34ceba6
LP
2678static int close_remaining_fds(
2679 const ExecParameters *params,
34cf6c43
YW
2680 const ExecRuntime *runtime,
2681 const DynamicCreds *dcreds,
00d9ef85 2682 int user_lookup_fd,
a34ceba6 2683 int socket_fd,
5686391b 2684 int exec_fd,
da6053d0 2685 int *fds, size_t n_fds) {
a34ceba6 2686
da6053d0 2687 size_t n_dont_close = 0;
00d9ef85 2688 int dont_close[n_fds + 12];
a34ceba6
LP
2689
2690 assert(params);
2691
2692 if (params->stdin_fd >= 0)
2693 dont_close[n_dont_close++] = params->stdin_fd;
2694 if (params->stdout_fd >= 0)
2695 dont_close[n_dont_close++] = params->stdout_fd;
2696 if (params->stderr_fd >= 0)
2697 dont_close[n_dont_close++] = params->stderr_fd;
2698
2699 if (socket_fd >= 0)
2700 dont_close[n_dont_close++] = socket_fd;
5686391b
LP
2701 if (exec_fd >= 0)
2702 dont_close[n_dont_close++] = exec_fd;
a34ceba6
LP
2703 if (n_fds > 0) {
2704 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2705 n_dont_close += n_fds;
2706 }
2707
29206d46
LP
2708 if (runtime)
2709 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2710
2711 if (dcreds) {
2712 if (dcreds->user)
2713 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2714 if (dcreds->group)
2715 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2716 }
2717
00d9ef85
LP
2718 if (user_lookup_fd >= 0)
2719 dont_close[n_dont_close++] = user_lookup_fd;
2720
a34ceba6
LP
2721 return close_all_fds(dont_close, n_dont_close);
2722}
2723
00d9ef85
LP
2724static int send_user_lookup(
2725 Unit *unit,
2726 int user_lookup_fd,
2727 uid_t uid,
2728 gid_t gid) {
2729
2730 assert(unit);
2731
2732 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2733 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2734 * specified. */
2735
2736 if (user_lookup_fd < 0)
2737 return 0;
2738
2739 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2740 return 0;
2741
2742 if (writev(user_lookup_fd,
2743 (struct iovec[]) {
e6a7ec4b
LP
2744 IOVEC_INIT(&uid, sizeof(uid)),
2745 IOVEC_INIT(&gid, sizeof(gid)),
2746 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2747 return -errno;
2748
2749 return 0;
2750}
2751
6732edab
LP
2752static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2753 int r;
2754
2755 assert(c);
2756 assert(home);
2757 assert(buf);
2758
2759 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2760
2761 if (*home)
2762 return 0;
2763
2764 if (!c->working_directory_home)
2765 return 0;
2766
6732edab
LP
2767 r = get_home_dir(buf);
2768 if (r < 0)
2769 return r;
2770
2771 *home = *buf;
2772 return 1;
2773}
2774
da50b85a
LP
2775static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2776 _cleanup_strv_free_ char ** list = NULL;
2777 ExecDirectoryType t;
2778 int r;
2779
2780 assert(c);
2781 assert(p);
2782 assert(ret);
2783
2784 assert(c->dynamic_user);
2785
2786 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2787 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2788 * directories. */
2789
2790 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2791 char **i;
2792
2793 if (t == EXEC_DIRECTORY_CONFIGURATION)
2794 continue;
2795
2796 if (!p->prefix[t])
2797 continue;
2798
2799 STRV_FOREACH(i, c->directories[t].paths) {
2800 char *e;
2801
8092a48c
YW
2802 if (t == EXEC_DIRECTORY_RUNTIME)
2803 e = strjoin(p->prefix[t], "/", *i);
2804 else
2805 e = strjoin(p->prefix[t], "/private/", *i);
da50b85a
LP
2806 if (!e)
2807 return -ENOMEM;
2808
2809 r = strv_consume(&list, e);
2810 if (r < 0)
2811 return r;
2812 }
2813 }
2814
ae2a15bc 2815 *ret = TAKE_PTR(list);
da50b85a
LP
2816
2817 return 0;
2818}
2819
34cf6c43
YW
2820static char *exec_command_line(char **argv);
2821
78f93209
LP
2822static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
2823 bool using_subcgroup;
2824 char *p;
2825
2826 assert(params);
2827 assert(ret);
2828
2829 if (!params->cgroup_path)
2830 return -EINVAL;
2831
2832 /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
2833 * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
2834 * processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
2835 * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
2836 * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
2837 * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
2838 * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
2839 * flag, which is only passed for the former statements, not for the latter. */
2840
2841 using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
2842 if (using_subcgroup)
2843 p = strjoin(params->cgroup_path, "/.control");
2844 else
2845 p = strdup(params->cgroup_path);
2846 if (!p)
2847 return -ENOMEM;
2848
2849 *ret = p;
2850 return using_subcgroup;
2851}
2852
ff0af2a1 2853static int exec_child(
f2341e0a 2854 Unit *unit,
34cf6c43 2855 const ExecCommand *command,
ff0af2a1
LP
2856 const ExecContext *context,
2857 const ExecParameters *params,
2858 ExecRuntime *runtime,
29206d46 2859 DynamicCreds *dcreds,
ff0af2a1 2860 int socket_fd,
52c239d7 2861 int named_iofds[3],
4c47affc 2862 int *fds,
da6053d0 2863 size_t n_socket_fds,
25b583d7 2864 size_t n_storage_fds,
ff0af2a1 2865 char **files_env,
00d9ef85 2866 int user_lookup_fd,
12145637 2867 int *exit_status) {
d35fbf6b 2868
7ca69792 2869 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **replaced_argv = NULL;
5686391b 2870 int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
4d885bd3
DH
2871 _cleanup_free_ gid_t *supplementary_gids = NULL;
2872 const char *username = NULL, *groupname = NULL;
5686391b 2873 _cleanup_free_ char *home_buffer = NULL;
2b3c1b9e 2874 const char *home = NULL, *shell = NULL;
7ca69792 2875 char **final_argv = NULL;
7bce046b
LP
2876 dev_t journal_stream_dev = 0;
2877 ino_t journal_stream_ino = 0;
165a31c0
LP
2878 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2879 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2880 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2881 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 2882#if HAVE_SELINUX
7f59dd35 2883 _cleanup_free_ char *mac_selinux_context_net = NULL;
43b1f709 2884 bool use_selinux = false;
ecfbc84f 2885#endif
f9fa32f0 2886#if ENABLE_SMACK
43b1f709 2887 bool use_smack = false;
ecfbc84f 2888#endif
349cc4a5 2889#if HAVE_APPARMOR
43b1f709 2890 bool use_apparmor = false;
ecfbc84f 2891#endif
fed1e721
LP
2892 uid_t uid = UID_INVALID;
2893 gid_t gid = GID_INVALID;
da6053d0 2894 size_t n_fds;
3536f49e 2895 ExecDirectoryType dt;
165a31c0 2896 int secure_bits;
034c6ed7 2897
f2341e0a 2898 assert(unit);
5cb5a6ff
LP
2899 assert(command);
2900 assert(context);
d35fbf6b 2901 assert(params);
ff0af2a1 2902 assert(exit_status);
d35fbf6b
DM
2903
2904 rename_process_from_path(command->path);
2905
2906 /* We reset exactly these signals, since they are the
2907 * only ones we set to SIG_IGN in the main daemon. All
2908 * others we leave untouched because we set them to
2909 * SIG_DFL or a valid handler initially, both of which
2910 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2911 (void) default_signals(SIGNALS_CRASH_HANDLER,
2912 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2913
2914 if (context->ignore_sigpipe)
ce30c8dc 2915 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2916
ff0af2a1
LP
2917 r = reset_signal_mask();
2918 if (r < 0) {
2919 *exit_status = EXIT_SIGNAL_MASK;
12145637 2920 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 2921 }
034c6ed7 2922
d35fbf6b
DM
2923 if (params->idle_pipe)
2924 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2925
2c027c62
LP
2926 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2927 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2928 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2929 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 2930
d35fbf6b 2931 log_forget_fds();
2c027c62 2932 log_set_open_when_needed(true);
4f2d528d 2933
40a80078
LP
2934 /* In case anything used libc syslog(), close this here, too */
2935 closelog();
2936
5686391b
LP
2937 n_fds = n_socket_fds + n_storage_fds;
2938 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
ff0af2a1
LP
2939 if (r < 0) {
2940 *exit_status = EXIT_FDS;
12145637 2941 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
2942 }
2943
d35fbf6b
DM
2944 if (!context->same_pgrp)
2945 if (setsid() < 0) {
ff0af2a1 2946 *exit_status = EXIT_SETSID;
12145637 2947 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 2948 }
9e2f7c11 2949
1e22b5cd 2950 exec_context_tty_reset(context, params);
d35fbf6b 2951
c891efaf 2952 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 2953 const char *vc = params->confirm_spawn;
3b20f877
FB
2954 _cleanup_free_ char *cmdline = NULL;
2955
ee39ca20 2956 cmdline = exec_command_line(command->argv);
3b20f877 2957 if (!cmdline) {
0460aa5c 2958 *exit_status = EXIT_MEMORY;
12145637 2959 return log_oom();
3b20f877 2960 }
d35fbf6b 2961
eedf223a 2962 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
2963 if (r != CONFIRM_EXECUTE) {
2964 if (r == CONFIRM_PRETEND_SUCCESS) {
2965 *exit_status = EXIT_SUCCESS;
2966 return 0;
2967 }
ff0af2a1 2968 *exit_status = EXIT_CONFIRM;
12145637 2969 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 2970 return -ECANCELED;
d35fbf6b
DM
2971 }
2972 }
1a63a750 2973
d521916d
LP
2974 /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
2975 * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
2976 * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
2977 * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
2978 * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
2979 if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
2980 setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
2981 *exit_status = EXIT_MEMORY;
2982 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
2983 }
2984
29206d46 2985 if (context->dynamic_user && dcreds) {
da50b85a 2986 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 2987
d521916d
LP
2988 /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
2989 * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
409093fe
LP
2990 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2991 *exit_status = EXIT_USER;
12145637 2992 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
2993 }
2994
da50b85a
LP
2995 r = compile_suggested_paths(context, params, &suggested_paths);
2996 if (r < 0) {
2997 *exit_status = EXIT_MEMORY;
2998 return log_oom();
2999 }
3000
3001 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
3002 if (r < 0) {
3003 *exit_status = EXIT_USER;
e2b0cc34
YW
3004 if (r == -EILSEQ) {
3005 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
3006 return -EOPNOTSUPP;
3007 }
12145637 3008 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 3009 }
524daa8c 3010
70dd455c 3011 if (!uid_is_valid(uid)) {
29206d46 3012 *exit_status = EXIT_USER;
12145637 3013 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
3014 return -ESRCH;
3015 }
3016
3017 if (!gid_is_valid(gid)) {
3018 *exit_status = EXIT_USER;
12145637 3019 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
3020 return -ESRCH;
3021 }
5bc7452b 3022
29206d46
LP
3023 if (dcreds->user)
3024 username = dcreds->user->name;
3025
3026 } else {
4d885bd3
DH
3027 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
3028 if (r < 0) {
3029 *exit_status = EXIT_USER;
12145637 3030 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 3031 }
5bc7452b 3032
4d885bd3
DH
3033 r = get_fixed_group(context, &groupname, &gid);
3034 if (r < 0) {
3035 *exit_status = EXIT_GROUP;
12145637 3036 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 3037 }
cdc5d5c5 3038 }
29206d46 3039
cdc5d5c5
DH
3040 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
3041 r = get_supplementary_groups(context, username, groupname, gid,
3042 &supplementary_gids, &ngids);
3043 if (r < 0) {
3044 *exit_status = EXIT_GROUP;
12145637 3045 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 3046 }
5bc7452b 3047
00d9ef85
LP
3048 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
3049 if (r < 0) {
3050 *exit_status = EXIT_USER;
12145637 3051 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
3052 }
3053
3054 user_lookup_fd = safe_close(user_lookup_fd);
3055
6732edab
LP
3056 r = acquire_home(context, uid, &home, &home_buffer);
3057 if (r < 0) {
3058 *exit_status = EXIT_CHDIR;
12145637 3059 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
3060 }
3061
d35fbf6b
DM
3062 /* If a socket is connected to STDIN/STDOUT/STDERR, we
3063 * must sure to drop O_NONBLOCK */
3064 if (socket_fd >= 0)
a34ceba6 3065 (void) fd_nonblock(socket_fd, false);
acbb0225 3066
4c70a4a7
MS
3067 /* Journald will try to look-up our cgroup in order to populate _SYSTEMD_CGROUP and _SYSTEMD_UNIT fields.
3068 * Hence we need to migrate to the target cgroup from init.scope before connecting to journald */
3069 if (params->cgroup_path) {
3070 _cleanup_free_ char *p = NULL;
3071
3072 r = exec_parameters_get_cgroup_path(params, &p);
3073 if (r < 0) {
3074 *exit_status = EXIT_CGROUP;
3075 return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
3076 }
3077
3078 r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
3079 if (r < 0) {
3080 *exit_status = EXIT_CGROUP;
3081 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
3082 }
3083 }
3084
a8d08f39
LP
3085 if (context->network_namespace_path && runtime && runtime->netns_storage_socket[0] >= 0) {
3086 r = open_netns_path(runtime->netns_storage_socket, context->network_namespace_path);
3087 if (r < 0) {
3088 *exit_status = EXIT_NETWORK;
3089 return log_unit_error_errno(unit, r, "Failed to open network namespace path %s: %m", context->network_namespace_path);
3090 }
3091 }
3092
52c239d7 3093 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
3094 if (r < 0) {
3095 *exit_status = EXIT_STDIN;
12145637 3096 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 3097 }
034c6ed7 3098
52c239d7 3099 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3100 if (r < 0) {
3101 *exit_status = EXIT_STDOUT;
12145637 3102 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
3103 }
3104
52c239d7 3105 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3106 if (r < 0) {
3107 *exit_status = EXIT_STDERR;
12145637 3108 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
3109 }
3110
d35fbf6b 3111 if (context->oom_score_adjust_set) {
9f8168eb
LP
3112 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
3113 * prohibit write access to this file, and we shouldn't trip up over that. */
3114 r = set_oom_score_adjust(context->oom_score_adjust);
12145637 3115 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 3116 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 3117 else if (r < 0) {
ff0af2a1 3118 *exit_status = EXIT_OOM_ADJUST;
12145637 3119 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 3120 }
d35fbf6b
DM
3121 }
3122
3123 if (context->nice_set)
3124 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 3125 *exit_status = EXIT_NICE;
12145637 3126 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
613b411c
LP
3127 }
3128
d35fbf6b
DM
3129 if (context->cpu_sched_set) {
3130 struct sched_param param = {
3131 .sched_priority = context->cpu_sched_priority,
3132 };
3133
ff0af2a1
LP
3134 r = sched_setscheduler(0,
3135 context->cpu_sched_policy |
3136 (context->cpu_sched_reset_on_fork ?
3137 SCHED_RESET_ON_FORK : 0),
3138 &param);
3139 if (r < 0) {
3140 *exit_status = EXIT_SETSCHEDULER;
12145637 3141 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 3142 }
d35fbf6b 3143 }
fc9b2a84 3144
0985c7c4
ZJS
3145 if (context->cpu_set.set)
3146 if (sched_setaffinity(0, context->cpu_set.allocated, context->cpu_set.set) < 0) {
ff0af2a1 3147 *exit_status = EXIT_CPUAFFINITY;
12145637 3148 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7
LP
3149 }
3150
d35fbf6b
DM
3151 if (context->ioprio_set)
3152 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 3153 *exit_status = EXIT_IOPRIO;
12145637 3154 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 3155 }
da726a4d 3156
d35fbf6b
DM
3157 if (context->timer_slack_nsec != NSEC_INFINITY)
3158 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 3159 *exit_status = EXIT_TIMERSLACK;
12145637 3160 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 3161 }
9eba9da4 3162
21022b9d
LP
3163 if (context->personality != PERSONALITY_INVALID) {
3164 r = safe_personality(context->personality);
3165 if (r < 0) {
ff0af2a1 3166 *exit_status = EXIT_PERSONALITY;
12145637 3167 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 3168 }
21022b9d 3169 }
94f04347 3170
d35fbf6b 3171 if (context->utmp_id)
df0ff127 3172 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 3173 context->tty_path,
023a4f67
LP
3174 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3175 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3176 USER_PROCESS,
6a93917d 3177 username);
d35fbf6b 3178
08f67696 3179 if (uid_is_valid(uid)) {
ff0af2a1
LP
3180 r = chown_terminal(STDIN_FILENO, uid);
3181 if (r < 0) {
3182 *exit_status = EXIT_STDIN;
12145637 3183 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 3184 }
d35fbf6b 3185 }
8e274523 3186
4e1dfa45 3187 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
62b9bb26 3188 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
4e1dfa45 3189 * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
62b9bb26 3190 * touch a single hierarchy too. */
584b8688 3191 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
62b9bb26 3192 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
ff0af2a1
LP
3193 if (r < 0) {
3194 *exit_status = EXIT_CGROUP;
12145637 3195 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 3196 }
d35fbf6b 3197 }
034c6ed7 3198
72fd1768 3199 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 3200 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
3201 if (r < 0)
3202 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 3203 }
94f04347 3204
7bce046b 3205 r = build_environment(
fd63e712 3206 unit,
7bce046b
LP
3207 context,
3208 params,
3209 n_fds,
3210 home,
3211 username,
3212 shell,
3213 journal_stream_dev,
3214 journal_stream_ino,
3215 &our_env);
2065ca69
JW
3216 if (r < 0) {
3217 *exit_status = EXIT_MEMORY;
12145637 3218 return log_oom();
2065ca69
JW
3219 }
3220
3221 r = build_pass_environment(context, &pass_env);
3222 if (r < 0) {
3223 *exit_status = EXIT_MEMORY;
12145637 3224 return log_oom();
2065ca69
JW
3225 }
3226
3227 accum_env = strv_env_merge(5,
3228 params->environment,
3229 our_env,
3230 pass_env,
3231 context->environment,
3232 files_env,
3233 NULL);
3234 if (!accum_env) {
3235 *exit_status = EXIT_MEMORY;
12145637 3236 return log_oom();
2065ca69 3237 }
1280503b 3238 accum_env = strv_env_clean(accum_env);
2065ca69 3239
096424d1 3240 (void) umask(context->umask);
b213e1c1 3241
b1edf445 3242 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
3243 if (r < 0) {
3244 *exit_status = EXIT_KEYRING;
12145637 3245 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
3246 }
3247
165a31c0 3248 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 3249 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 3250
165a31c0
LP
3251 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3252 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 3253
165a31c0
LP
3254 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3255 if (needs_ambient_hack)
3256 needs_setuid = false;
3257 else
3258 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3259
3260 if (needs_sandboxing) {
7f18ef0a
FK
3261 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3262 * present. The actual MAC context application will happen later, as late as possible, to avoid
3263 * impacting our own code paths. */
3264
349cc4a5 3265#if HAVE_SELINUX
43b1f709 3266 use_selinux = mac_selinux_use();
7f18ef0a 3267#endif
f9fa32f0 3268#if ENABLE_SMACK
43b1f709 3269 use_smack = mac_smack_use();
7f18ef0a 3270#endif
349cc4a5 3271#if HAVE_APPARMOR
43b1f709 3272 use_apparmor = mac_apparmor_use();
7f18ef0a 3273#endif
165a31c0 3274 }
7f18ef0a 3275
ce932d2d
LP
3276 if (needs_sandboxing) {
3277 int which_failed;
3278
3279 /* Let's set the resource limits before we call into PAM, so that pam_limits wins over what
3280 * is set here. (See below.) */
3281
3282 r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
3283 if (r < 0) {
3284 *exit_status = EXIT_LIMITS;
3285 return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
3286 }
3287 }
3288
165a31c0 3289 if (needs_setuid) {
ce932d2d
LP
3290
3291 /* Let's call into PAM after we set up our own idea of resource limits to that pam_limits
3292 * wins here. (See above.) */
3293
165a31c0
LP
3294 if (context->pam_name && username) {
3295 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3296 if (r < 0) {
3297 *exit_status = EXIT_PAM;
12145637 3298 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0
LP
3299 }
3300 }
b213e1c1 3301 }
ac45f971 3302
a8d08f39
LP
3303 if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
3304
6e2d7c4f
MS
3305 if (ns_type_supported(NAMESPACE_NET)) {
3306 r = setup_netns(runtime->netns_storage_socket);
3307 if (r < 0) {
3308 *exit_status = EXIT_NETWORK;
3309 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3310 }
a8d08f39
LP
3311 } else if (context->network_namespace_path) {
3312 *exit_status = EXIT_NETWORK;
3313 return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP), "NetworkNamespacePath= is not supported, refusing.");
6e2d7c4f
MS
3314 } else
3315 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3316 }
169c1bda 3317
ee818b89 3318 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3319 if (needs_mount_namespace) {
7cc5ef5f
ZJS
3320 _cleanup_free_ char *error_path = NULL;
3321
3322 r = apply_mount_namespace(unit, command, context, params, runtime, &error_path);
3fbe8dbe
LP
3323 if (r < 0) {
3324 *exit_status = EXIT_NAMESPACE;
7cc5ef5f
ZJS
3325 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m",
3326 error_path ? ": " : "", strempty(error_path));
3fbe8dbe 3327 }
d35fbf6b 3328 }
81a2b7ce 3329
aecd5ac6
TM
3330 if (context->protect_hostname) {
3331 if (ns_type_supported(NAMESPACE_UTS)) {
3332 if (unshare(CLONE_NEWUTS) < 0) {
3333 *exit_status = EXIT_NAMESPACE;
3334 return log_unit_error_errno(unit, errno, "Failed to set up UTS namespacing: %m");
3335 }
3336 } else
3337 log_unit_warning(unit, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
3338#if HAVE_SECCOMP
3339 r = seccomp_protect_hostname();
3340 if (r < 0) {
3341 *exit_status = EXIT_SECCOMP;
3342 return log_unit_error_errno(unit, r, "Failed to apply hostname restrictions: %m");
3343 }
3344#endif
3345 }
3346
bbeea271 3347 /* Drop groups as early as possbile */
165a31c0 3348 if (needs_setuid) {
709dbeac 3349 r = enforce_groups(gid, supplementary_gids, ngids);
096424d1
LP
3350 if (r < 0) {
3351 *exit_status = EXIT_GROUP;
12145637 3352 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3353 }
165a31c0 3354 }
096424d1 3355
165a31c0 3356 if (needs_sandboxing) {
349cc4a5 3357#if HAVE_SELINUX
43b1f709 3358 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
3359 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3360 if (r < 0) {
3361 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3362 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
937ccce9 3363 }
9008e1ac 3364 }
9008e1ac
MS
3365#endif
3366
937ccce9
LP
3367 if (context->private_users) {
3368 r = setup_private_users(uid, gid);
3369 if (r < 0) {
3370 *exit_status = EXIT_USER;
12145637 3371 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
937ccce9 3372 }
d251207d
LP
3373 }
3374 }
3375
165a31c0 3376 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
5686391b
LP
3377 * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
3378 * however if we have it as we want to keep it open until the final execve(). */
3379
3380 if (params->exec_fd >= 0) {
3381 exec_fd = params->exec_fd;
3382
3383 if (exec_fd < 3 + (int) n_fds) {
3384 int moved_fd;
3385
3386 /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
3387 * process we are about to execute. */
3388
3389 moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
3390 if (moved_fd < 0) {
3391 *exit_status = EXIT_FDS;
3392 return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
3393 }
3394
3395 safe_close(exec_fd);
3396 exec_fd = moved_fd;
3397 } else {
3398 /* This fd should be FD_CLOEXEC already, but let's make sure. */
3399 r = fd_cloexec(exec_fd, true);
3400 if (r < 0) {
3401 *exit_status = EXIT_FDS;
3402 return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
3403 }
3404 }
3405
3406 fds_with_exec_fd = newa(int, n_fds + 1);
7e8d494b 3407 memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
5686391b
LP
3408 fds_with_exec_fd[n_fds] = exec_fd;
3409 n_fds_with_exec_fd = n_fds + 1;
3410 } else {
3411 fds_with_exec_fd = fds;
3412 n_fds_with_exec_fd = n_fds;
3413 }
3414
3415 r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
ff0af2a1
LP
3416 if (r >= 0)
3417 r = shift_fds(fds, n_fds);
3418 if (r >= 0)
25b583d7 3419 r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
ff0af2a1
LP
3420 if (r < 0) {
3421 *exit_status = EXIT_FDS;
12145637 3422 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3423 }
e66cf1a3 3424
5686391b
LP
3425 /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
3426 * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
3427 * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
3428 * came this far. */
3429
165a31c0 3430 secure_bits = context->secure_bits;
e66cf1a3 3431
165a31c0
LP
3432 if (needs_sandboxing) {
3433 uint64_t bset;
e66cf1a3 3434
ce932d2d
LP
3435 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly
3436 * requested. (Note this is placed after the general resource limit initialization, see
3437 * above, in order to take precedence.) */
f4170c67
LP
3438 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3439 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3440 *exit_status = EXIT_LIMITS;
12145637 3441 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3442 }
3443 }
3444
37ac2744
JB
3445#if ENABLE_SMACK
3446 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3447 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3448 if (use_smack) {
3449 r = setup_smack(context, command);
3450 if (r < 0) {
3451 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3452 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3453 }
3454 }
3455#endif
3456
165a31c0
LP
3457 bset = context->capability_bounding_set;
3458 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3459 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3460 * instead of us doing that */
3461 if (needs_ambient_hack)
3462 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3463 (UINT64_C(1) << CAP_SETUID) |
3464 (UINT64_C(1) << CAP_SETGID);
3465
3466 if (!cap_test_all(bset)) {
3467 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3468 if (r < 0) {
3469 *exit_status = EXIT_CAPABILITIES;
12145637 3470 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3471 }
4c2630eb 3472 }
3b8bddde 3473
755d4b67
IP
3474 /* This is done before enforce_user, but ambient set
3475 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
3476 if (!needs_ambient_hack &&
3477 context->capability_ambient_set != 0) {
755d4b67
IP
3478 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3479 if (r < 0) {
3480 *exit_status = EXIT_CAPABILITIES;
12145637 3481 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3482 }
755d4b67 3483 }
165a31c0 3484 }
755d4b67 3485
165a31c0 3486 if (needs_setuid) {
08f67696 3487 if (uid_is_valid(uid)) {
ff0af2a1
LP
3488 r = enforce_user(context, uid);
3489 if (r < 0) {
3490 *exit_status = EXIT_USER;
12145637 3491 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3492 }
165a31c0
LP
3493
3494 if (!needs_ambient_hack &&
3495 context->capability_ambient_set != 0) {
755d4b67
IP
3496
3497 /* Fix the ambient capabilities after user change. */
3498 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3499 if (r < 0) {
3500 *exit_status = EXIT_CAPABILITIES;
12145637 3501 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3502 }
3503
3504 /* If we were asked to change user and ambient capabilities
3505 * were requested, we had to add keep-caps to the securebits
3506 * so that we would maintain the inherited capability set
3507 * through the setresuid(). Make sure that the bit is added
3508 * also to the context secure_bits so that we don't try to
3509 * drop the bit away next. */
3510
7f508f2c 3511 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3512 }
5b6319dc 3513 }
165a31c0 3514 }
d35fbf6b 3515
56ef8db9
JB
3516 /* Apply working directory here, because the working directory might be on NFS and only the user running
3517 * this service might have the correct privilege to change to the working directory */
3518 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
3519 if (r < 0)
3520 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
3521
165a31c0 3522 if (needs_sandboxing) {
37ac2744 3523 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
5cd9cd35
LP
3524 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3525 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3526 * are restricted. */
3527
349cc4a5 3528#if HAVE_SELINUX
43b1f709 3529 if (use_selinux) {
5cd9cd35
LP
3530 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3531
3532 if (exec_context) {
3533 r = setexeccon(exec_context);
3534 if (r < 0) {
3535 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3536 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3537 }
3538 }
3539 }
3540#endif
3541
349cc4a5 3542#if HAVE_APPARMOR
43b1f709 3543 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3544 r = aa_change_onexec(context->apparmor_profile);
3545 if (r < 0 && !context->apparmor_profile_ignore) {
3546 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3547 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3548 }
3549 }
3550#endif
3551
165a31c0
LP
3552 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3553 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3554 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3555 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3556 *exit_status = EXIT_SECUREBITS;
12145637 3557 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3558 }
5b6319dc 3559
59eeb84b 3560 if (context_has_no_new_privileges(context))
d35fbf6b 3561 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3562 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3563 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3564 }
3565
349cc4a5 3566#if HAVE_SECCOMP
469830d1
LP
3567 r = apply_address_families(unit, context);
3568 if (r < 0) {
3569 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3570 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3571 }
04aa0cb9 3572
469830d1
LP
3573 r = apply_memory_deny_write_execute(unit, context);
3574 if (r < 0) {
3575 *exit_status = EXIT_SECCOMP;
12145637 3576 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3577 }
f4170c67 3578
469830d1
LP
3579 r = apply_restrict_realtime(unit, context);
3580 if (r < 0) {
3581 *exit_status = EXIT_SECCOMP;
12145637 3582 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3583 }
3584
f69567cb
LP
3585 r = apply_restrict_suid_sgid(unit, context);
3586 if (r < 0) {
3587 *exit_status = EXIT_SECCOMP;
3588 return log_unit_error_errno(unit, r, "Failed to apply SUID/SGID restrictions: %m");
3589 }
3590
add00535
LP
3591 r = apply_restrict_namespaces(unit, context);
3592 if (r < 0) {
3593 *exit_status = EXIT_SECCOMP;
12145637 3594 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3595 }
3596
469830d1
LP
3597 r = apply_protect_sysctl(unit, context);
3598 if (r < 0) {
3599 *exit_status = EXIT_SECCOMP;
12145637 3600 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3601 }
3602
469830d1
LP
3603 r = apply_protect_kernel_modules(unit, context);
3604 if (r < 0) {
3605 *exit_status = EXIT_SECCOMP;
12145637 3606 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3607 }
3608
469830d1
LP
3609 r = apply_private_devices(unit, context);
3610 if (r < 0) {
3611 *exit_status = EXIT_SECCOMP;
12145637 3612 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3613 }
3614
3615 r = apply_syscall_archs(unit, context);
3616 if (r < 0) {
3617 *exit_status = EXIT_SECCOMP;
12145637 3618 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3619 }
3620
78e864e5
TM
3621 r = apply_lock_personality(unit, context);
3622 if (r < 0) {
3623 *exit_status = EXIT_SECCOMP;
12145637 3624 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3625 }
3626
5cd9cd35
LP
3627 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3628 * by the filter as little as possible. */
165a31c0 3629 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3630 if (r < 0) {
3631 *exit_status = EXIT_SECCOMP;
12145637 3632 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3633 }
3634#endif
d35fbf6b 3635 }
034c6ed7 3636
00819cc1
LP
3637 if (!strv_isempty(context->unset_environment)) {
3638 char **ee = NULL;
3639
3640 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3641 if (!ee) {
3642 *exit_status = EXIT_MEMORY;
12145637 3643 return log_oom();
00819cc1
LP
3644 }
3645
130d3d22 3646 strv_free_and_replace(accum_env, ee);
00819cc1
LP
3647 }
3648
7ca69792
AZ
3649 if (!FLAGS_SET(command->flags, EXEC_COMMAND_NO_ENV_EXPAND)) {
3650 replaced_argv = replace_env_argv(command->argv, accum_env);
3651 if (!replaced_argv) {
3652 *exit_status = EXIT_MEMORY;
3653 return log_oom();
3654 }
3655 final_argv = replaced_argv;
3656 } else
3657 final_argv = command->argv;
034c6ed7 3658
f1d34068 3659 if (DEBUG_LOGGING) {
d35fbf6b 3660 _cleanup_free_ char *line;
81a2b7ce 3661
d35fbf6b 3662 line = exec_command_line(final_argv);
a1230ff9 3663 if (line)
f2341e0a 3664 log_struct(LOG_DEBUG,
f2341e0a
LP
3665 "EXECUTABLE=%s", command->path,
3666 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3667 LOG_UNIT_ID(unit),
a1230ff9 3668 LOG_UNIT_INVOCATION_ID(unit));
d35fbf6b 3669 }
dd305ec9 3670
5686391b
LP
3671 if (exec_fd >= 0) {
3672 uint8_t hot = 1;
3673
3674 /* We have finished with all our initializations. Let's now let the manager know that. From this point
3675 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
3676
3677 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3678 *exit_status = EXIT_EXEC;
3679 return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
3680 }
3681 }
3682
2065ca69 3683 execve(command->path, final_argv, accum_env);
5686391b
LP
3684 r = -errno;
3685
3686 if (exec_fd >= 0) {
3687 uint8_t hot = 0;
3688
3689 /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
3690 * that POLLHUP on it no longer means execve() succeeded. */
3691
3692 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3693 *exit_status = EXIT_EXEC;
3694 return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
3695 }
3696 }
12145637 3697
5686391b
LP
3698 if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3699 log_struct_errno(LOG_INFO, r,
12145637
LP
3700 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3701 LOG_UNIT_ID(unit),
3702 LOG_UNIT_INVOCATION_ID(unit),
3703 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3704 command->path),
a1230ff9 3705 "EXECUTABLE=%s", command->path);
12145637
LP
3706 return 0;
3707 }
3708
ff0af2a1 3709 *exit_status = EXIT_EXEC;
5686391b 3710 return log_unit_error_errno(unit, r, "Failed to execute command: %m");
d35fbf6b 3711}
81a2b7ce 3712
34cf6c43
YW
3713static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
3714static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[3]);
3715
f2341e0a
LP
3716int exec_spawn(Unit *unit,
3717 ExecCommand *command,
d35fbf6b
DM
3718 const ExecContext *context,
3719 const ExecParameters *params,
3720 ExecRuntime *runtime,
29206d46 3721 DynamicCreds *dcreds,
d35fbf6b 3722 pid_t *ret) {
8351ceae 3723
ee39ca20 3724 int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
78f93209 3725 _cleanup_free_ char *subcgroup_path = NULL;
d35fbf6b 3726 _cleanup_strv_free_ char **files_env = NULL;
da6053d0 3727 size_t n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1 3728 _cleanup_free_ char *line = NULL;
d35fbf6b 3729 pid_t pid;
8351ceae 3730
f2341e0a 3731 assert(unit);
d35fbf6b
DM
3732 assert(command);
3733 assert(context);
3734 assert(ret);
3735 assert(params);
25b583d7 3736 assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
4298d0b5 3737
d35fbf6b
DM
3738 if (context->std_input == EXEC_INPUT_SOCKET ||
3739 context->std_output == EXEC_OUTPUT_SOCKET ||
3740 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3741
4c47affc 3742 if (params->n_socket_fds > 1) {
f2341e0a 3743 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3744 return -EINVAL;
ff0af2a1 3745 }
eef65bf3 3746
4c47affc 3747 if (params->n_socket_fds == 0) {
488ab41c
AA
3748 log_unit_error(unit, "Got no socket.");
3749 return -EINVAL;
3750 }
3751
d35fbf6b
DM
3752 socket_fd = params->fds[0];
3753 } else {
3754 socket_fd = -1;
3755 fds = params->fds;
9b141911 3756 n_socket_fds = params->n_socket_fds;
25b583d7 3757 n_storage_fds = params->n_storage_fds;
d35fbf6b 3758 }
94f04347 3759
34cf6c43 3760 r = exec_context_named_iofds(context, params, named_iofds);
52c239d7
LB
3761 if (r < 0)
3762 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3763
f2341e0a 3764 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3765 if (r < 0)
f2341e0a 3766 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3767
ee39ca20 3768 line = exec_command_line(command->argv);
d35fbf6b
DM
3769 if (!line)
3770 return log_oom();
fab56fc5 3771
f2341e0a 3772 log_struct(LOG_DEBUG,
f2341e0a
LP
3773 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3774 "EXECUTABLE=%s", command->path,
ba360bb0 3775 LOG_UNIT_ID(unit),
a1230ff9 3776 LOG_UNIT_INVOCATION_ID(unit));
12145637 3777
78f93209
LP
3778 if (params->cgroup_path) {
3779 r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
3780 if (r < 0)
3781 return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
3782 if (r > 0) { /* We are using a child cgroup */
3783 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
3784 if (r < 0)
3785 return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
3786 }
3787 }
3788
d35fbf6b
DM
3789 pid = fork();
3790 if (pid < 0)
74129a12 3791 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3792
3793 if (pid == 0) {
12145637 3794 int exit_status = EXIT_SUCCESS;
ff0af2a1 3795
f2341e0a
LP
3796 r = exec_child(unit,
3797 command,
ff0af2a1
LP
3798 context,
3799 params,
3800 runtime,
29206d46 3801 dcreds,
ff0af2a1 3802 socket_fd,
52c239d7 3803 named_iofds,
4c47affc 3804 fds,
9b141911 3805 n_socket_fds,
25b583d7 3806 n_storage_fds,
ff0af2a1 3807 files_env,
00d9ef85 3808 unit->manager->user_lookup_fds[1],
12145637
LP
3809 &exit_status);
3810
a1230ff9 3811 if (r < 0)
12145637
LP
3812 log_struct_errno(LOG_ERR, r,
3813 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3814 LOG_UNIT_ID(unit),
3815 LOG_UNIT_INVOCATION_ID(unit),
3816 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3817 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3818 command->path),
a1230ff9 3819 "EXECUTABLE=%s", command->path);
4c2630eb 3820
ff0af2a1 3821 _exit(exit_status);
034c6ed7
LP
3822 }
3823
f2341e0a 3824 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3825
78f93209
LP
3826 /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
3827 * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
3828 * process will be killed too). */
3829 if (subcgroup_path)
3830 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
2da3263a 3831
b58b4116 3832 exec_status_start(&command->exec_status, pid);
9fb86720 3833
034c6ed7 3834 *ret = pid;
5cb5a6ff
LP
3835 return 0;
3836}
3837
034c6ed7 3838void exec_context_init(ExecContext *c) {
3536f49e
YW
3839 ExecDirectoryType i;
3840
034c6ed7
LP
3841 assert(c);
3842
4c12626c 3843 c->umask = 0022;
9eba9da4 3844 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3845 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3846 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3847 c->syslog_level_prefix = true;
353e12c2 3848 c->ignore_sigpipe = true;
3a43da28 3849 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3850 c->personality = PERSONALITY_INVALID;
72fd1768 3851 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3852 c->directories[i].mode = 0755;
a103496c 3853 c->capability_bounding_set = CAP_ALL;
aa9d574d
YW
3854 assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
3855 c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
d3070fbd 3856 c->log_level_max = -1;
034c6ed7
LP
3857}
3858
613b411c 3859void exec_context_done(ExecContext *c) {
3536f49e 3860 ExecDirectoryType i;
d3070fbd 3861 size_t l;
5cb5a6ff
LP
3862
3863 assert(c);
3864
6796073e
LP
3865 c->environment = strv_free(c->environment);
3866 c->environment_files = strv_free(c->environment_files);
b4c14404 3867 c->pass_environment = strv_free(c->pass_environment);
00819cc1 3868 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 3869
31ce987c 3870 rlimit_free_all(c->rlimit);
034c6ed7 3871
2038c3f5 3872 for (l = 0; l < 3; l++) {
52c239d7 3873 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
2038c3f5
LP
3874 c->stdio_file[l] = mfree(c->stdio_file[l]);
3875 }
52c239d7 3876
a1e58e8e
LP
3877 c->working_directory = mfree(c->working_directory);
3878 c->root_directory = mfree(c->root_directory);
915e6d16 3879 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3880 c->tty_path = mfree(c->tty_path);
3881 c->syslog_identifier = mfree(c->syslog_identifier);
3882 c->user = mfree(c->user);
3883 c->group = mfree(c->group);
034c6ed7 3884
6796073e 3885 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3886
a1e58e8e 3887 c->pam_name = mfree(c->pam_name);
5b6319dc 3888
2a624c36
AP
3889 c->read_only_paths = strv_free(c->read_only_paths);
3890 c->read_write_paths = strv_free(c->read_write_paths);
3891 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3892
d2d6c096 3893 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
8e06d57c
YW
3894 c->bind_mounts = NULL;
3895 c->n_bind_mounts = 0;
2abd4e38
YW
3896 temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
3897 c->temporary_filesystems = NULL;
3898 c->n_temporary_filesystems = 0;
d2d6c096 3899
0985c7c4 3900 cpu_set_reset(&c->cpu_set);
86a3475b 3901
a1e58e8e
LP
3902 c->utmp_id = mfree(c->utmp_id);
3903 c->selinux_context = mfree(c->selinux_context);
3904 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3905 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3906
8cfa775f 3907 c->syscall_filter = hashmap_free(c->syscall_filter);
525d3cc7
LP
3908 c->syscall_archs = set_free(c->syscall_archs);
3909 c->address_families = set_free(c->address_families);
e66cf1a3 3910
72fd1768 3911 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3912 c->directories[i].paths = strv_free(c->directories[i].paths);
d3070fbd
LP
3913
3914 c->log_level_max = -1;
3915
3916 exec_context_free_log_extra_fields(c);
08f3be7a 3917
90fc172e
AZ
3918 c->log_rate_limit_interval_usec = 0;
3919 c->log_rate_limit_burst = 0;
3920
08f3be7a
LP
3921 c->stdin_data = mfree(c->stdin_data);
3922 c->stdin_data_size = 0;
a8d08f39
LP
3923
3924 c->network_namespace_path = mfree(c->network_namespace_path);
e66cf1a3
LP
3925}
3926
34cf6c43 3927int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
e66cf1a3
LP
3928 char **i;
3929
3930 assert(c);
3931
3932 if (!runtime_prefix)
3933 return 0;
3934
3536f49e 3935 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
3936 _cleanup_free_ char *p;
3937
7bc4bf4a 3938 p = path_join(runtime_prefix, *i);
e66cf1a3
LP
3939 if (!p)
3940 return -ENOMEM;
3941
7bc4bf4a
LP
3942 /* We execute this synchronously, since we need to be sure this is gone when we start the
3943 * service next. */
c6878637 3944 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
3945 }
3946
3947 return 0;
5cb5a6ff
LP
3948}
3949
34cf6c43 3950static void exec_command_done(ExecCommand *c) {
43d0fcbd
LP
3951 assert(c);
3952
a1e58e8e 3953 c->path = mfree(c->path);
6796073e 3954 c->argv = strv_free(c->argv);
43d0fcbd
LP
3955}
3956
da6053d0
LP
3957void exec_command_done_array(ExecCommand *c, size_t n) {
3958 size_t i;
43d0fcbd
LP
3959
3960 for (i = 0; i < n; i++)
3961 exec_command_done(c+i);
3962}
3963
f1acf85a 3964ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
3965 ExecCommand *i;
3966
3967 while ((i = c)) {
71fda00f 3968 LIST_REMOVE(command, c, i);
43d0fcbd 3969 exec_command_done(i);
5cb5a6ff
LP
3970 free(i);
3971 }
f1acf85a
ZJS
3972
3973 return NULL;
5cb5a6ff
LP
3974}
3975
da6053d0
LP
3976void exec_command_free_array(ExecCommand **c, size_t n) {
3977 size_t i;
034c6ed7 3978
f1acf85a
ZJS
3979 for (i = 0; i < n; i++)
3980 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
3981}
3982
6a1d4d9f
LP
3983void exec_command_reset_status_array(ExecCommand *c, size_t n) {
3984 size_t i;
3985
3986 for (i = 0; i < n; i++)
3987 exec_status_reset(&c[i].exec_status);
3988}
3989
3990void exec_command_reset_status_list_array(ExecCommand **c, size_t n) {
3991 size_t i;
3992
3993 for (i = 0; i < n; i++) {
3994 ExecCommand *z;
3995
3996 LIST_FOREACH(command, z, c[i])
3997 exec_status_reset(&z->exec_status);
3998 }
3999}
4000
039f0e70 4001typedef struct InvalidEnvInfo {
34cf6c43 4002 const Unit *unit;
039f0e70
LP
4003 const char *path;
4004} InvalidEnvInfo;
4005
4006static void invalid_env(const char *p, void *userdata) {
4007 InvalidEnvInfo *info = userdata;
4008
f2341e0a 4009 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
4010}
4011
52c239d7
LB
4012const char* exec_context_fdname(const ExecContext *c, int fd_index) {
4013 assert(c);
4014
4015 switch (fd_index) {
5073ff6b 4016
52c239d7
LB
4017 case STDIN_FILENO:
4018 if (c->std_input != EXEC_INPUT_NAMED_FD)
4019 return NULL;
5073ff6b 4020
52c239d7 4021 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
5073ff6b 4022
52c239d7
LB
4023 case STDOUT_FILENO:
4024 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
4025 return NULL;
5073ff6b 4026
52c239d7 4027 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
5073ff6b 4028
52c239d7
LB
4029 case STDERR_FILENO:
4030 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
4031 return NULL;
5073ff6b 4032
52c239d7 4033 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
5073ff6b 4034
52c239d7
LB
4035 default:
4036 return NULL;
4037 }
4038}
4039
3042bbeb 4040static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[static 3]) {
da6053d0 4041 size_t i, targets;
56fbd561 4042 const char* stdio_fdname[3];
da6053d0 4043 size_t n_fds;
52c239d7
LB
4044
4045 assert(c);
4046 assert(p);
4047
4048 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
4049 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
4050 (c->std_error == EXEC_OUTPUT_NAMED_FD);
4051
4052 for (i = 0; i < 3; i++)
4053 stdio_fdname[i] = exec_context_fdname(c, i);
4054
4c47affc
FB
4055 n_fds = p->n_storage_fds + p->n_socket_fds;
4056
4057 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
4058 if (named_iofds[STDIN_FILENO] < 0 &&
4059 c->std_input == EXEC_INPUT_NAMED_FD &&
4060 stdio_fdname[STDIN_FILENO] &&
4061 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
4062
52c239d7
LB
4063 named_iofds[STDIN_FILENO] = p->fds[i];
4064 targets--;
56fbd561
ZJS
4065
4066 } else if (named_iofds[STDOUT_FILENO] < 0 &&
4067 c->std_output == EXEC_OUTPUT_NAMED_FD &&
4068 stdio_fdname[STDOUT_FILENO] &&
4069 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
4070
52c239d7
LB
4071 named_iofds[STDOUT_FILENO] = p->fds[i];
4072 targets--;
56fbd561
ZJS
4073
4074 } else if (named_iofds[STDERR_FILENO] < 0 &&
4075 c->std_error == EXEC_OUTPUT_NAMED_FD &&
4076 stdio_fdname[STDERR_FILENO] &&
4077 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
4078
52c239d7
LB
4079 named_iofds[STDERR_FILENO] = p->fds[i];
4080 targets--;
4081 }
4082
56fbd561 4083 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
4084}
4085
34cf6c43 4086static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
4087 char **i, **r = NULL;
4088
4089 assert(c);
4090 assert(l);
4091
4092 STRV_FOREACH(i, c->environment_files) {
4093 char *fn;
52511fae
ZJS
4094 int k;
4095 unsigned n;
8c7be95e
LP
4096 bool ignore = false;
4097 char **p;
7fd1b19b 4098 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
4099
4100 fn = *i;
4101
4102 if (fn[0] == '-') {
4103 ignore = true;
313cefa1 4104 fn++;
8c7be95e
LP
4105 }
4106
4107 if (!path_is_absolute(fn)) {
8c7be95e
LP
4108 if (ignore)
4109 continue;
4110
4111 strv_free(r);
4112 return -EINVAL;
4113 }
4114
2bef10ab 4115 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
4116 k = safe_glob(fn, 0, &pglob);
4117 if (k < 0) {
2bef10ab
PL
4118 if (ignore)
4119 continue;
8c7be95e 4120
2bef10ab 4121 strv_free(r);
d8c92e8b 4122 return k;
2bef10ab 4123 }
8c7be95e 4124
d8c92e8b
ZJS
4125 /* When we don't match anything, -ENOENT should be returned */
4126 assert(pglob.gl_pathc > 0);
4127
4128 for (n = 0; n < pglob.gl_pathc; n++) {
aa8fbc74 4129 k = load_env_file(NULL, pglob.gl_pathv[n], &p);
2bef10ab
PL
4130 if (k < 0) {
4131 if (ignore)
4132 continue;
8c7be95e 4133
2bef10ab 4134 strv_free(r);
2bef10ab 4135 return k;
e9c1ea9d 4136 }
ebc05a09 4137 /* Log invalid environment variables with filename */
039f0e70
LP
4138 if (p) {
4139 InvalidEnvInfo info = {
f2341e0a 4140 .unit = unit,
039f0e70
LP
4141 .path = pglob.gl_pathv[n]
4142 };
4143
4144 p = strv_env_clean_with_callback(p, invalid_env, &info);
4145 }
8c7be95e 4146
234519ae 4147 if (!r)
2bef10ab
PL
4148 r = p;
4149 else {
4150 char **m;
8c7be95e 4151
2bef10ab
PL
4152 m = strv_env_merge(2, r, p);
4153 strv_free(r);
4154 strv_free(p);
c84a9488 4155 if (!m)
2bef10ab 4156 return -ENOMEM;
2bef10ab
PL
4157
4158 r = m;
4159 }
8c7be95e
LP
4160 }
4161 }
4162
4163 *l = r;
4164
4165 return 0;
4166}
4167
6ac8fdc9 4168static bool tty_may_match_dev_console(const char *tty) {
7b912648 4169 _cleanup_free_ char *resolved = NULL;
6ac8fdc9 4170
1e22b5cd
LP
4171 if (!tty)
4172 return true;
4173
a119ec7c 4174 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
4175
4176 /* trivial identity? */
4177 if (streq(tty, "console"))
4178 return true;
4179
7b912648
LP
4180 if (resolve_dev_console(&resolved) < 0)
4181 return true; /* if we could not resolve, assume it may */
6ac8fdc9
MS
4182
4183 /* "tty0" means the active VC, so it may be the same sometimes */
955f1c85 4184 return path_equal(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
4185}
4186
6c0ae739
LP
4187static bool exec_context_may_touch_tty(const ExecContext *ec) {
4188 assert(ec);
1e22b5cd 4189
6c0ae739 4190 return ec->tty_reset ||
1e22b5cd
LP
4191 ec->tty_vhangup ||
4192 ec->tty_vt_disallocate ||
6ac8fdc9
MS
4193 is_terminal_input(ec->std_input) ||
4194 is_terminal_output(ec->std_output) ||
6c0ae739
LP
4195 is_terminal_output(ec->std_error);
4196}
4197
4198bool exec_context_may_touch_console(const ExecContext *ec) {
4199
4200 return exec_context_may_touch_tty(ec) &&
1e22b5cd 4201 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
4202}
4203
15ae422b
LP
4204static void strv_fprintf(FILE *f, char **l) {
4205 char **g;
4206
4207 assert(f);
4208
4209 STRV_FOREACH(g, l)
4210 fprintf(f, " %s", *g);
4211}
4212
34cf6c43 4213void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
d3070fbd 4214 ExecDirectoryType dt;
c2bbd90b 4215 char **e, **d;
94f04347 4216 unsigned i;
add00535 4217 int r;
9eba9da4 4218
5cb5a6ff
LP
4219 assert(c);
4220 assert(f);
4221
4ad49000 4222 prefix = strempty(prefix);
5cb5a6ff
LP
4223
4224 fprintf(f,
94f04347
LP
4225 "%sUMask: %04o\n"
4226 "%sWorkingDirectory: %s\n"
451a074f 4227 "%sRootDirectory: %s\n"
15ae422b 4228 "%sNonBlocking: %s\n"
64747e2d 4229 "%sPrivateTmp: %s\n"
7f112f50 4230 "%sPrivateDevices: %s\n"
59eeb84b 4231 "%sProtectKernelTunables: %s\n"
e66a2f65 4232 "%sProtectKernelModules: %s\n"
59eeb84b 4233 "%sProtectControlGroups: %s\n"
d251207d
LP
4234 "%sPrivateNetwork: %s\n"
4235 "%sPrivateUsers: %s\n"
1b8689f9
LP
4236 "%sProtectHome: %s\n"
4237 "%sProtectSystem: %s\n"
5d997827 4238 "%sMountAPIVFS: %s\n"
f3e43635 4239 "%sIgnoreSIGPIPE: %s\n"
f4170c67 4240 "%sMemoryDenyWriteExecute: %s\n"
b1edf445 4241 "%sRestrictRealtime: %s\n"
f69567cb 4242 "%sRestrictSUIDSGID: %s\n"
aecd5ac6
TM
4243 "%sKeyringMode: %s\n"
4244 "%sProtectHostname: %s\n",
5cb5a6ff 4245 prefix, c->umask,
9eba9da4 4246 prefix, c->working_directory ? c->working_directory : "/",
451a074f 4247 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 4248 prefix, yes_no(c->non_blocking),
64747e2d 4249 prefix, yes_no(c->private_tmp),
7f112f50 4250 prefix, yes_no(c->private_devices),
59eeb84b 4251 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 4252 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 4253 prefix, yes_no(c->protect_control_groups),
d251207d
LP
4254 prefix, yes_no(c->private_network),
4255 prefix, yes_no(c->private_users),
1b8689f9
LP
4256 prefix, protect_home_to_string(c->protect_home),
4257 prefix, protect_system_to_string(c->protect_system),
5d997827 4258 prefix, yes_no(c->mount_apivfs),
f3e43635 4259 prefix, yes_no(c->ignore_sigpipe),
f4170c67 4260 prefix, yes_no(c->memory_deny_write_execute),
b1edf445 4261 prefix, yes_no(c->restrict_realtime),
f69567cb 4262 prefix, yes_no(c->restrict_suid_sgid),
aecd5ac6
TM
4263 prefix, exec_keyring_mode_to_string(c->keyring_mode),
4264 prefix, yes_no(c->protect_hostname));
fb33a393 4265
915e6d16
LP
4266 if (c->root_image)
4267 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
4268
8c7be95e
LP
4269 STRV_FOREACH(e, c->environment)
4270 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
4271
4272 STRV_FOREACH(e, c->environment_files)
4273 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 4274
b4c14404
FB
4275 STRV_FOREACH(e, c->pass_environment)
4276 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
4277
00819cc1
LP
4278 STRV_FOREACH(e, c->unset_environment)
4279 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
4280
53f47dfc
YW
4281 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
4282
72fd1768 4283 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
4284 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
4285
4286 STRV_FOREACH(d, c->directories[dt].paths)
4287 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
4288 }
c2bbd90b 4289
fb33a393
LP
4290 if (c->nice_set)
4291 fprintf(f,
4292 "%sNice: %i\n",
4293 prefix, c->nice);
4294
dd6c17b1 4295 if (c->oom_score_adjust_set)
fb33a393 4296 fprintf(f,
dd6c17b1
LP
4297 "%sOOMScoreAdjust: %i\n",
4298 prefix, c->oom_score_adjust);
9eba9da4 4299
94f04347 4300 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d 4301 if (c->rlimit[i]) {
4c3a2b84 4302 fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
3c11da9d 4303 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
4c3a2b84 4304 fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n",
3c11da9d
EV
4305 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
4306 }
94f04347 4307
f8b69d1d 4308 if (c->ioprio_set) {
1756a011 4309 _cleanup_free_ char *class_str = NULL;
f8b69d1d 4310
837df140
YW
4311 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
4312 if (r >= 0)
4313 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
4314
4315 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 4316 }
94f04347 4317
f8b69d1d 4318 if (c->cpu_sched_set) {
1756a011 4319 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 4320
837df140
YW
4321 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4322 if (r >= 0)
4323 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4324
94f04347 4325 fprintf(f,
38b48754
LP
4326 "%sCPUSchedulingPriority: %i\n"
4327 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
4328 prefix, c->cpu_sched_priority,
4329 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 4330 }
94f04347 4331
0985c7c4 4332 if (c->cpu_set.set) {
e7fca352
MS
4333 _cleanup_free_ char *affinity = NULL;
4334
4335 affinity = cpu_set_to_range_string(&c->cpu_set);
4336 fprintf(f, "%sCPUAffinity: %s\n", prefix, affinity);
94f04347
LP
4337 }
4338
3a43da28 4339 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 4340 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
4341
4342 fprintf(f,
80876c20
LP
4343 "%sStandardInput: %s\n"
4344 "%sStandardOutput: %s\n"
4345 "%sStandardError: %s\n",
4346 prefix, exec_input_to_string(c->std_input),
4347 prefix, exec_output_to_string(c->std_output),
4348 prefix, exec_output_to_string(c->std_error));
4349
befc4a80
LP
4350 if (c->std_input == EXEC_INPUT_NAMED_FD)
4351 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4352 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4353 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4354 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4355 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4356
4357 if (c->std_input == EXEC_INPUT_FILE)
4358 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4359 if (c->std_output == EXEC_OUTPUT_FILE)
4360 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
566b7d23
ZD
4361 if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
4362 fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
befc4a80
LP
4363 if (c->std_error == EXEC_OUTPUT_FILE)
4364 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
566b7d23
ZD
4365 if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
4366 fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
befc4a80 4367
80876c20
LP
4368 if (c->tty_path)
4369 fprintf(f,
6ea832a2
LP
4370 "%sTTYPath: %s\n"
4371 "%sTTYReset: %s\n"
4372 "%sTTYVHangup: %s\n"
4373 "%sTTYVTDisallocate: %s\n",
4374 prefix, c->tty_path,
4375 prefix, yes_no(c->tty_reset),
4376 prefix, yes_no(c->tty_vhangup),
4377 prefix, yes_no(c->tty_vt_disallocate));
94f04347 4378
9f6444eb
LP
4379 if (IN_SET(c->std_output,
4380 EXEC_OUTPUT_SYSLOG,
4381 EXEC_OUTPUT_KMSG,
4382 EXEC_OUTPUT_JOURNAL,
4383 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4384 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4385 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4386 IN_SET(c->std_error,
4387 EXEC_OUTPUT_SYSLOG,
4388 EXEC_OUTPUT_KMSG,
4389 EXEC_OUTPUT_JOURNAL,
4390 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4391 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4392 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 4393
5ce70e5b 4394 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 4395
837df140
YW
4396 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4397 if (r >= 0)
4398 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 4399
837df140
YW
4400 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4401 if (r >= 0)
4402 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 4403 }
94f04347 4404
d3070fbd
LP
4405 if (c->log_level_max >= 0) {
4406 _cleanup_free_ char *t = NULL;
4407
4408 (void) log_level_to_string_alloc(c->log_level_max, &t);
4409
4410 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4411 }
4412
90fc172e
AZ
4413 if (c->log_rate_limit_interval_usec > 0) {
4414 char buf_timespan[FORMAT_TIMESPAN_MAX];
4415
4416 fprintf(f,
4417 "%sLogRateLimitIntervalSec: %s\n",
4418 prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_rate_limit_interval_usec, USEC_PER_SEC));
4419 }
4420
4421 if (c->log_rate_limit_burst > 0)
4422 fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_rate_limit_burst);
4423
d3070fbd
LP
4424 if (c->n_log_extra_fields > 0) {
4425 size_t j;
4426
4427 for (j = 0; j < c->n_log_extra_fields; j++) {
4428 fprintf(f, "%sLogExtraFields: ", prefix);
4429 fwrite(c->log_extra_fields[j].iov_base,
4430 1, c->log_extra_fields[j].iov_len,
4431 f);
4432 fputc('\n', f);
4433 }
4434 }
4435
07d46372
YW
4436 if (c->secure_bits) {
4437 _cleanup_free_ char *str = NULL;
4438
4439 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4440 if (r >= 0)
4441 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4442 }
94f04347 4443
a103496c 4444 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4445 _cleanup_free_ char *str = NULL;
94f04347 4446
dd1f5bd0
YW
4447 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4448 if (r >= 0)
4449 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4450 }
4451
4452 if (c->capability_ambient_set != 0) {
dd1f5bd0 4453 _cleanup_free_ char *str = NULL;
755d4b67 4454
dd1f5bd0
YW
4455 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4456 if (r >= 0)
4457 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4458 }
4459
4460 if (c->user)
f2d3769a 4461 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4462 if (c->group)
f2d3769a 4463 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4464
29206d46
LP
4465 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4466
ac6e8be6 4467 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4468 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4469 strv_fprintf(f, c->supplementary_groups);
4470 fputs("\n", f);
4471 }
94f04347 4472
5b6319dc 4473 if (c->pam_name)
f2d3769a 4474 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4475
58629001 4476 if (!strv_isempty(c->read_write_paths)) {
2a624c36
AP
4477 fprintf(f, "%sReadWritePaths:", prefix);
4478 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4479 fputs("\n", f);
4480 }
4481
58629001 4482 if (!strv_isempty(c->read_only_paths)) {
2a624c36
AP
4483 fprintf(f, "%sReadOnlyPaths:", prefix);
4484 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4485 fputs("\n", f);
4486 }
94f04347 4487
58629001 4488 if (!strv_isempty(c->inaccessible_paths)) {
2a624c36
AP
4489 fprintf(f, "%sInaccessiblePaths:", prefix);
4490 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4491 fputs("\n", f);
4492 }
2e22afe9 4493
d2d6c096 4494 if (c->n_bind_mounts > 0)
4ca763a9
YW
4495 for (i = 0; i < c->n_bind_mounts; i++)
4496 fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
d2d6c096 4497 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4ca763a9 4498 c->bind_mounts[i].ignore_enoent ? "-": "",
d2d6c096
LP
4499 c->bind_mounts[i].source,
4500 c->bind_mounts[i].destination,
4501 c->bind_mounts[i].recursive ? "rbind" : "norbind");
d2d6c096 4502
2abd4e38
YW
4503 if (c->n_temporary_filesystems > 0)
4504 for (i = 0; i < c->n_temporary_filesystems; i++) {
4505 TemporaryFileSystem *t = c->temporary_filesystems + i;
4506
4507 fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
4508 t->path,
4509 isempty(t->options) ? "" : ":",
4510 strempty(t->options));
4511 }
4512
169c1bda
LP
4513 if (c->utmp_id)
4514 fprintf(f,
4515 "%sUtmpIdentifier: %s\n",
4516 prefix, c->utmp_id);
7b52a628
MS
4517
4518 if (c->selinux_context)
4519 fprintf(f,
5f8640fb
LP
4520 "%sSELinuxContext: %s%s\n",
4521 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4522
80c21aea
WC
4523 if (c->apparmor_profile)
4524 fprintf(f,
4525 "%sAppArmorProfile: %s%s\n",
4526 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4527
4528 if (c->smack_process_label)
4529 fprintf(f,
4530 "%sSmackProcessLabel: %s%s\n",
4531 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4532
050f7277 4533 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4534 fprintf(f,
4535 "%sPersonality: %s\n",
4536 prefix, strna(personality_to_string(c->personality)));
4537
78e864e5
TM
4538 fprintf(f,
4539 "%sLockPersonality: %s\n",
4540 prefix, yes_no(c->lock_personality));
4541
17df7223 4542 if (c->syscall_filter) {
349cc4a5 4543#if HAVE_SECCOMP
17df7223 4544 Iterator j;
8cfa775f 4545 void *id, *val;
17df7223 4546 bool first = true;
351a19b1 4547#endif
17df7223
LP
4548
4549 fprintf(f,
57183d11 4550 "%sSystemCallFilter: ",
17df7223
LP
4551 prefix);
4552
4553 if (!c->syscall_whitelist)
4554 fputc('~', f);
4555
349cc4a5 4556#if HAVE_SECCOMP
8cfa775f 4557 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
17df7223 4558 _cleanup_free_ char *name = NULL;
8cfa775f
YW
4559 const char *errno_name = NULL;
4560 int num = PTR_TO_INT(val);
17df7223
LP
4561
4562 if (first)
4563 first = false;
4564 else
4565 fputc(' ', f);
4566
57183d11 4567 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223 4568 fputs(strna(name), f);
8cfa775f
YW
4569
4570 if (num >= 0) {
4571 errno_name = errno_to_name(num);
4572 if (errno_name)
4573 fprintf(f, ":%s", errno_name);
4574 else
4575 fprintf(f, ":%d", num);
4576 }
17df7223 4577 }
351a19b1 4578#endif
17df7223
LP
4579
4580 fputc('\n', f);
4581 }
4582
57183d11 4583 if (c->syscall_archs) {
349cc4a5 4584#if HAVE_SECCOMP
57183d11
LP
4585 Iterator j;
4586 void *id;
4587#endif
4588
4589 fprintf(f,
4590 "%sSystemCallArchitectures:",
4591 prefix);
4592
349cc4a5 4593#if HAVE_SECCOMP
57183d11
LP
4594 SET_FOREACH(id, c->syscall_archs, j)
4595 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4596#endif
4597 fputc('\n', f);
4598 }
4599
add00535
LP
4600 if (exec_context_restrict_namespaces_set(c)) {
4601 _cleanup_free_ char *s = NULL;
4602
86c2a9f1 4603 r = namespace_flags_to_string(c->restrict_namespaces, &s);
add00535
LP
4604 if (r >= 0)
4605 fprintf(f, "%sRestrictNamespaces: %s\n",
4606 prefix, s);
4607 }
4608
a8d08f39
LP
4609 if (c->network_namespace_path)
4610 fprintf(f,
4611 "%sNetworkNamespacePath: %s\n",
4612 prefix, c->network_namespace_path);
4613
3df90f24
YW
4614 if (c->syscall_errno > 0) {
4615 const char *errno_name;
4616
4617 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4618
4619 errno_name = errno_to_name(c->syscall_errno);
4620 if (errno_name)
4621 fprintf(f, "%s\n", errno_name);
4622 else
4623 fprintf(f, "%d\n", c->syscall_errno);
4624 }
5cb5a6ff
LP
4625}
4626
34cf6c43 4627bool exec_context_maintains_privileges(const ExecContext *c) {
a931ad47
LP
4628 assert(c);
4629
61233823 4630 /* Returns true if the process forked off would run under
a931ad47
LP
4631 * an unchanged UID or as root. */
4632
4633 if (!c->user)
4634 return true;
4635
4636 if (streq(c->user, "root") || streq(c->user, "0"))
4637 return true;
4638
4639 return false;
4640}
4641
34cf6c43 4642int exec_context_get_effective_ioprio(const ExecContext *c) {
7f452159
LP
4643 int p;
4644
4645 assert(c);
4646
4647 if (c->ioprio_set)
4648 return c->ioprio;
4649
4650 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4651 if (p < 0)
4652 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4653
4654 return p;
4655}
4656
d3070fbd
LP
4657void exec_context_free_log_extra_fields(ExecContext *c) {
4658 size_t l;
4659
4660 assert(c);
4661
4662 for (l = 0; l < c->n_log_extra_fields; l++)
4663 free(c->log_extra_fields[l].iov_base);
4664 c->log_extra_fields = mfree(c->log_extra_fields);
4665 c->n_log_extra_fields = 0;
4666}
4667
6f765baf
LP
4668void exec_context_revert_tty(ExecContext *c) {
4669 int r;
4670
4671 assert(c);
4672
4673 /* First, reset the TTY (possibly kicking everybody else from the TTY) */
4674 exec_context_tty_reset(c, NULL);
4675
4676 /* And then undo what chown_terminal() did earlier. Note that we only do this if we have a path
4677 * configured. If the TTY was passed to us as file descriptor we assume the TTY is opened and managed
4678 * by whoever passed it to us and thus knows better when and how to chmod()/chown() it back. */
4679
4680 if (exec_context_may_touch_tty(c)) {
4681 const char *path;
4682
4683 path = exec_context_tty_path(c);
4684 if (path) {
4685 r = chmod_and_chown(path, TTY_MODE, 0, TTY_GID);
4686 if (r < 0 && r != -ENOENT)
4687 log_warning_errno(r, "Failed to reset TTY ownership/access mode of %s, ignoring: %m", path);
4688 }
4689 }
4690}
4691
b58b4116 4692void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 4693 assert(s);
5cb5a6ff 4694
2ed26ed0
LP
4695 *s = (ExecStatus) {
4696 .pid = pid,
4697 };
4698
b58b4116
LP
4699 dual_timestamp_get(&s->start_timestamp);
4700}
4701
34cf6c43 4702void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
4703 assert(s);
4704
2ed26ed0
LP
4705 if (s->pid != pid) {
4706 *s = (ExecStatus) {
4707 .pid = pid,
4708 };
4709 }
b58b4116 4710
63983207 4711 dual_timestamp_get(&s->exit_timestamp);
9fb86720 4712
034c6ed7
LP
4713 s->code = code;
4714 s->status = status;
169c1bda 4715
6f765baf
LP
4716 if (context && context->utmp_id)
4717 (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
9fb86720
LP
4718}
4719
6a1d4d9f
LP
4720void exec_status_reset(ExecStatus *s) {
4721 assert(s);
4722
4723 *s = (ExecStatus) {};
4724}
4725
34cf6c43 4726void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
9fb86720
LP
4727 char buf[FORMAT_TIMESTAMP_MAX];
4728
4729 assert(s);
4730 assert(f);
4731
9fb86720
LP
4732 if (s->pid <= 0)
4733 return;
4734
4c940960
LP
4735 prefix = strempty(prefix);
4736
9fb86720 4737 fprintf(f,
ccd06097
ZJS
4738 "%sPID: "PID_FMT"\n",
4739 prefix, s->pid);
9fb86720 4740
af9d16e1 4741 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
4742 fprintf(f,
4743 "%sStart Timestamp: %s\n",
63983207 4744 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 4745
af9d16e1 4746 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
4747 fprintf(f,
4748 "%sExit Timestamp: %s\n"
4749 "%sExit Code: %s\n"
4750 "%sExit Status: %i\n",
63983207 4751 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
4752 prefix, sigchld_code_to_string(s->code),
4753 prefix, s->status);
5cb5a6ff 4754}
44d8db9e 4755
34cf6c43 4756static char *exec_command_line(char **argv) {
44d8db9e
LP
4757 size_t k;
4758 char *n, *p, **a;
4759 bool first = true;
4760
9e2f7c11 4761 assert(argv);
44d8db9e 4762
9164977d 4763 k = 1;
9e2f7c11 4764 STRV_FOREACH(a, argv)
44d8db9e
LP
4765 k += strlen(*a)+3;
4766
5cd9cd35
LP
4767 n = new(char, k);
4768 if (!n)
44d8db9e
LP
4769 return NULL;
4770
4771 p = n;
9e2f7c11 4772 STRV_FOREACH(a, argv) {
44d8db9e
LP
4773
4774 if (!first)
4775 *(p++) = ' ';
4776 else
4777 first = false;
4778
4779 if (strpbrk(*a, WHITESPACE)) {
4780 *(p++) = '\'';
4781 p = stpcpy(p, *a);
4782 *(p++) = '\'';
4783 } else
4784 p = stpcpy(p, *a);
4785
4786 }
4787
9164977d
LP
4788 *p = 0;
4789
44d8db9e
LP
4790 /* FIXME: this doesn't really handle arguments that have
4791 * spaces and ticks in them */
4792
4793 return n;
4794}
4795
34cf6c43 4796static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 4797 _cleanup_free_ char *cmd = NULL;
4c940960 4798 const char *prefix2;
44d8db9e
LP
4799
4800 assert(c);
4801 assert(f);
4802
4c940960 4803 prefix = strempty(prefix);
63c372cb 4804 prefix2 = strjoina(prefix, "\t");
44d8db9e 4805
9e2f7c11 4806 cmd = exec_command_line(c->argv);
44d8db9e
LP
4807 fprintf(f,
4808 "%sCommand Line: %s\n",
4809 prefix, cmd ? cmd : strerror(ENOMEM));
4810
9fb86720 4811 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
4812}
4813
4814void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4815 assert(f);
4816
4c940960 4817 prefix = strempty(prefix);
44d8db9e
LP
4818
4819 LIST_FOREACH(command, c, c)
4820 exec_command_dump(c, f, prefix);
4821}
94f04347 4822
a6a80b4f
LP
4823void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4824 ExecCommand *end;
4825
4826 assert(l);
4827 assert(e);
4828
4829 if (*l) {
35b8ca3a 4830 /* It's kind of important, that we keep the order here */
71fda00f
LP
4831 LIST_FIND_TAIL(command, *l, end);
4832 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
4833 } else
4834 *l = e;
4835}
4836
26fd040d
LP
4837int exec_command_set(ExecCommand *c, const char *path, ...) {
4838 va_list ap;
4839 char **l, *p;
4840
4841 assert(c);
4842 assert(path);
4843
4844 va_start(ap, path);
4845 l = strv_new_ap(path, ap);
4846 va_end(ap);
4847
4848 if (!l)
4849 return -ENOMEM;
4850
250a918d
LP
4851 p = strdup(path);
4852 if (!p) {
26fd040d
LP
4853 strv_free(l);
4854 return -ENOMEM;
4855 }
4856
6897dfe8 4857 free_and_replace(c->path, p);
26fd040d 4858
130d3d22 4859 return strv_free_and_replace(c->argv, l);
26fd040d
LP
4860}
4861
86b23b07 4862int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 4863 _cleanup_strv_free_ char **l = NULL;
86b23b07 4864 va_list ap;
86b23b07
JS
4865 int r;
4866
4867 assert(c);
4868 assert(path);
4869
4870 va_start(ap, path);
4871 l = strv_new_ap(path, ap);
4872 va_end(ap);
4873
4874 if (!l)
4875 return -ENOMEM;
4876
e287086b 4877 r = strv_extend_strv(&c->argv, l, false);
e63ff941 4878 if (r < 0)
86b23b07 4879 return r;
86b23b07
JS
4880
4881 return 0;
4882}
4883
e8a565cb
YW
4884static void *remove_tmpdir_thread(void *p) {
4885 _cleanup_free_ char *path = p;
86b23b07 4886
e8a565cb
YW
4887 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
4888 return NULL;
4889}
4890
4891static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
4892 int r;
4893
4894 if (!rt)
4895 return NULL;
4896
4897 if (rt->manager)
4898 (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
4899
4900 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
4901 if (destroy && rt->tmp_dir) {
4902 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
4903
4904 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4905 if (r < 0) {
4906 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
4907 free(rt->tmp_dir);
4908 }
4909
4910 rt->tmp_dir = NULL;
4911 }
613b411c 4912
e8a565cb
YW
4913 if (destroy && rt->var_tmp_dir) {
4914 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
4915
4916 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4917 if (r < 0) {
4918 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
4919 free(rt->var_tmp_dir);
4920 }
4921
4922 rt->var_tmp_dir = NULL;
4923 }
4924
4925 rt->id = mfree(rt->id);
4926 rt->tmp_dir = mfree(rt->tmp_dir);
4927 rt->var_tmp_dir = mfree(rt->var_tmp_dir);
4928 safe_close_pair(rt->netns_storage_socket);
4929 return mfree(rt);
4930}
4931
4932static void exec_runtime_freep(ExecRuntime **rt) {
da6bc6ed 4933 (void) exec_runtime_free(*rt, false);
e8a565cb
YW
4934}
4935
8e8009dc
LP
4936static int exec_runtime_allocate(ExecRuntime **ret) {
4937 ExecRuntime *n;
613b411c 4938
8e8009dc 4939 assert(ret);
613b411c 4940
8e8009dc
LP
4941 n = new(ExecRuntime, 1);
4942 if (!n)
613b411c
LP
4943 return -ENOMEM;
4944
8e8009dc
LP
4945 *n = (ExecRuntime) {
4946 .netns_storage_socket = { -1, -1 },
4947 };
4948
4949 *ret = n;
613b411c
LP
4950 return 0;
4951}
4952
e8a565cb
YW
4953static int exec_runtime_add(
4954 Manager *m,
4955 const char *id,
4956 const char *tmp_dir,
4957 const char *var_tmp_dir,
4958 const int netns_storage_socket[2],
4959 ExecRuntime **ret) {
4960
4961 _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
613b411c
LP
4962 int r;
4963
e8a565cb 4964 assert(m);
613b411c
LP
4965 assert(id);
4966
e8a565cb
YW
4967 r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
4968 if (r < 0)
4969 return r;
613b411c 4970
e8a565cb 4971 r = exec_runtime_allocate(&rt);
613b411c
LP
4972 if (r < 0)
4973 return r;
4974
e8a565cb
YW
4975 rt->id = strdup(id);
4976 if (!rt->id)
4977 return -ENOMEM;
4978
4979 if (tmp_dir) {
4980 rt->tmp_dir = strdup(tmp_dir);
4981 if (!rt->tmp_dir)
4982 return -ENOMEM;
4983
4984 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
4985 assert(var_tmp_dir);
4986 rt->var_tmp_dir = strdup(var_tmp_dir);
4987 if (!rt->var_tmp_dir)
4988 return -ENOMEM;
4989 }
4990
4991 if (netns_storage_socket) {
4992 rt->netns_storage_socket[0] = netns_storage_socket[0];
4993 rt->netns_storage_socket[1] = netns_storage_socket[1];
613b411c
LP
4994 }
4995
e8a565cb
YW
4996 r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
4997 if (r < 0)
4998 return r;
4999
5000 rt->manager = m;
5001
5002 if (ret)
5003 *ret = rt;
5004
5005 /* do not remove created ExecRuntime object when the operation succeeds. */
5006 rt = NULL;
5007 return 0;
5008}
5009
5010static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
5011 _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
2fa3742d 5012 _cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
e8a565cb
YW
5013 int r;
5014
5015 assert(m);
5016 assert(c);
5017 assert(id);
5018
5019 /* It is not necessary to create ExecRuntime object. */
a8d08f39 5020 if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
e8a565cb
YW
5021 return 0;
5022
5023 if (c->private_tmp) {
5024 r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
613b411c
LP
5025 if (r < 0)
5026 return r;
5027 }
5028
a8d08f39 5029 if (c->private_network || c->network_namespace_path) {
e8a565cb
YW
5030 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
5031 return -errno;
5032 }
5033
5034 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
5035 if (r < 0)
5036 return r;
5037
5038 /* Avoid cleanup */
2fa3742d 5039 netns_storage_socket[0] = netns_storage_socket[1] = -1;
613b411c
LP
5040 return 1;
5041}
5042
e8a565cb
YW
5043int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
5044 ExecRuntime *rt;
5045 int r;
613b411c 5046
e8a565cb
YW
5047 assert(m);
5048 assert(id);
5049 assert(ret);
5050
5051 rt = hashmap_get(m->exec_runtime_by_id, id);
5052 if (rt)
5053 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
5054 goto ref;
5055
5056 if (!create)
5057 return 0;
5058
5059 /* If not found, then create a new object. */
5060 r = exec_runtime_make(m, c, id, &rt);
5061 if (r <= 0)
5062 /* When r == 0, it is not necessary to create ExecRuntime object. */
5063 return r;
613b411c 5064
e8a565cb
YW
5065ref:
5066 /* increment reference counter. */
5067 rt->n_ref++;
5068 *ret = rt;
5069 return 1;
5070}
613b411c 5071
e8a565cb
YW
5072ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
5073 if (!rt)
613b411c
LP
5074 return NULL;
5075
e8a565cb 5076 assert(rt->n_ref > 0);
613b411c 5077
e8a565cb
YW
5078 rt->n_ref--;
5079 if (rt->n_ref > 0)
f2341e0a
LP
5080 return NULL;
5081
e8a565cb 5082 return exec_runtime_free(rt, destroy);
613b411c
LP
5083}
5084
e8a565cb
YW
5085int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
5086 ExecRuntime *rt;
5087 Iterator i;
5088
5089 assert(m);
613b411c
LP
5090 assert(f);
5091 assert(fds);
5092
e8a565cb
YW
5093 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5094 fprintf(f, "exec-runtime=%s", rt->id);
613b411c 5095
e8a565cb
YW
5096 if (rt->tmp_dir)
5097 fprintf(f, " tmp-dir=%s", rt->tmp_dir);
613b411c 5098
e8a565cb
YW
5099 if (rt->var_tmp_dir)
5100 fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
613b411c 5101
e8a565cb
YW
5102 if (rt->netns_storage_socket[0] >= 0) {
5103 int copy;
613b411c 5104
e8a565cb
YW
5105 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
5106 if (copy < 0)
5107 return copy;
613b411c 5108
e8a565cb
YW
5109 fprintf(f, " netns-socket-0=%i", copy);
5110 }
613b411c 5111
e8a565cb
YW
5112 if (rt->netns_storage_socket[1] >= 0) {
5113 int copy;
613b411c 5114
e8a565cb
YW
5115 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
5116 if (copy < 0)
5117 return copy;
613b411c 5118
e8a565cb
YW
5119 fprintf(f, " netns-socket-1=%i", copy);
5120 }
5121
5122 fputc('\n', f);
613b411c
LP
5123 }
5124
5125 return 0;
5126}
5127
e8a565cb
YW
5128int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
5129 _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
5130 ExecRuntime *rt;
613b411c
LP
5131 int r;
5132
e8a565cb
YW
5133 /* This is for the migration from old (v237 or earlier) deserialization text.
5134 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
5135 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
5136 * so or not from the serialized text, then we always creates a new object owned by this. */
5137
5138 assert(u);
613b411c
LP
5139 assert(key);
5140 assert(value);
5141
e8a565cb
YW
5142 /* Manager manages ExecRuntime objects by the unit id.
5143 * So, we omit the serialized text when the unit does not have id (yet?)... */
5144 if (isempty(u->id)) {
5145 log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
5146 return 0;
5147 }
613b411c 5148
e8a565cb
YW
5149 r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
5150 if (r < 0) {
5151 log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
5152 return 0;
5153 }
5154
5155 rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
5156 if (!rt) {
5157 r = exec_runtime_allocate(&rt_create);
613b411c 5158 if (r < 0)
f2341e0a 5159 return log_oom();
613b411c 5160
e8a565cb
YW
5161 rt_create->id = strdup(u->id);
5162 if (!rt_create->id)
5163 return log_oom();
5164
5165 rt = rt_create;
5166 }
5167
5168 if (streq(key, "tmp-dir")) {
5169 char *copy;
5170
613b411c
LP
5171 copy = strdup(value);
5172 if (!copy)
5173 return log_oom();
5174
e8a565cb 5175 free_and_replace(rt->tmp_dir, copy);
613b411c
LP
5176
5177 } else if (streq(key, "var-tmp-dir")) {
5178 char *copy;
5179
613b411c
LP
5180 copy = strdup(value);
5181 if (!copy)
5182 return log_oom();
5183
e8a565cb 5184 free_and_replace(rt->var_tmp_dir, copy);
613b411c
LP
5185
5186 } else if (streq(key, "netns-socket-0")) {
5187 int fd;
5188
e8a565cb 5189 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5190 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5191 return 0;
613b411c 5192 }
e8a565cb
YW
5193
5194 safe_close(rt->netns_storage_socket[0]);
5195 rt->netns_storage_socket[0] = fdset_remove(fds, fd);
5196
613b411c
LP
5197 } else if (streq(key, "netns-socket-1")) {
5198 int fd;
5199
e8a565cb 5200 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5201 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5202 return 0;
613b411c 5203 }
e8a565cb
YW
5204
5205 safe_close(rt->netns_storage_socket[1]);
5206 rt->netns_storage_socket[1] = fdset_remove(fds, fd);
613b411c
LP
5207 } else
5208 return 0;
5209
e8a565cb
YW
5210 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
5211 if (rt_create) {
5212 r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
5213 if (r < 0) {
3fe91079 5214 log_unit_debug_errno(u, r, "Failed to put runtime parameter to manager's storage: %m");
e8a565cb
YW
5215 return 0;
5216 }
613b411c 5217
e8a565cb 5218 rt_create->manager = u->manager;
613b411c 5219
e8a565cb
YW
5220 /* Avoid cleanup */
5221 rt_create = NULL;
5222 }
98b47d54 5223
e8a565cb
YW
5224 return 1;
5225}
613b411c 5226
e8a565cb
YW
5227void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
5228 char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
5229 int r, fd0 = -1, fd1 = -1;
5230 const char *p, *v = value;
5231 size_t n;
613b411c 5232
e8a565cb
YW
5233 assert(m);
5234 assert(value);
5235 assert(fds);
98b47d54 5236
e8a565cb
YW
5237 n = strcspn(v, " ");
5238 id = strndupa(v, n);
5239 if (v[n] != ' ')
5240 goto finalize;
5241 p = v + n + 1;
5242
5243 v = startswith(p, "tmp-dir=");
5244 if (v) {
5245 n = strcspn(v, " ");
5246 tmp_dir = strndupa(v, n);
5247 if (v[n] != ' ')
5248 goto finalize;
5249 p = v + n + 1;
5250 }
5251
5252 v = startswith(p, "var-tmp-dir=");
5253 if (v) {
5254 n = strcspn(v, " ");
5255 var_tmp_dir = strndupa(v, n);
5256 if (v[n] != ' ')
5257 goto finalize;
5258 p = v + n + 1;
5259 }
5260
5261 v = startswith(p, "netns-socket-0=");
5262 if (v) {
5263 char *buf;
5264
5265 n = strcspn(v, " ");
5266 buf = strndupa(v, n);
5267 if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
5268 log_debug("Unable to process exec-runtime netns fd specification.");
5269 return;
98b47d54 5270 }
e8a565cb
YW
5271 fd0 = fdset_remove(fds, fd0);
5272 if (v[n] != ' ')
5273 goto finalize;
5274 p = v + n + 1;
613b411c
LP
5275 }
5276
e8a565cb
YW
5277 v = startswith(p, "netns-socket-1=");
5278 if (v) {
5279 char *buf;
98b47d54 5280
e8a565cb
YW
5281 n = strcspn(v, " ");
5282 buf = strndupa(v, n);
5283 if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
5284 log_debug("Unable to process exec-runtime netns fd specification.");
5285 return;
98b47d54 5286 }
e8a565cb
YW
5287 fd1 = fdset_remove(fds, fd1);
5288 }
98b47d54 5289
e8a565cb
YW
5290finalize:
5291
5292 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
7d853ca6 5293 if (r < 0)
e8a565cb 5294 log_debug_errno(r, "Failed to add exec-runtime: %m");
e8a565cb 5295}
613b411c 5296
e8a565cb
YW
5297void exec_runtime_vacuum(Manager *m) {
5298 ExecRuntime *rt;
5299 Iterator i;
5300
5301 assert(m);
5302
5303 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
5304
5305 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5306 if (rt->n_ref > 0)
5307 continue;
5308
5309 (void) exec_runtime_free(rt, false);
5310 }
613b411c
LP
5311}
5312
b9c04eaf
YW
5313void exec_params_clear(ExecParameters *p) {
5314 if (!p)
5315 return;
5316
5317 strv_free(p->environment);
5318}
5319
80876c20
LP
5320static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
5321 [EXEC_INPUT_NULL] = "null",
5322 [EXEC_INPUT_TTY] = "tty",
5323 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 5324 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
5325 [EXEC_INPUT_SOCKET] = "socket",
5326 [EXEC_INPUT_NAMED_FD] = "fd",
08f3be7a 5327 [EXEC_INPUT_DATA] = "data",
2038c3f5 5328 [EXEC_INPUT_FILE] = "file",
80876c20
LP
5329};
5330
8a0867d6
LP
5331DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
5332
94f04347 5333static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 5334 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 5335 [EXEC_OUTPUT_NULL] = "null",
80876c20 5336 [EXEC_OUTPUT_TTY] = "tty",
94f04347 5337 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 5338 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 5339 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 5340 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
5341 [EXEC_OUTPUT_JOURNAL] = "journal",
5342 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
5343 [EXEC_OUTPUT_SOCKET] = "socket",
5344 [EXEC_OUTPUT_NAMED_FD] = "fd",
2038c3f5 5345 [EXEC_OUTPUT_FILE] = "file",
566b7d23 5346 [EXEC_OUTPUT_FILE_APPEND] = "append",
94f04347
LP
5347};
5348
5349DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
5350
5351static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
5352 [EXEC_UTMP_INIT] = "init",
5353 [EXEC_UTMP_LOGIN] = "login",
5354 [EXEC_UTMP_USER] = "user",
5355};
5356
5357DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
5358
5359static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
5360 [EXEC_PRESERVE_NO] = "no",
5361 [EXEC_PRESERVE_YES] = "yes",
5362 [EXEC_PRESERVE_RESTART] = "restart",
5363};
5364
5365DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 5366
72fd1768 5367static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
5368 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
5369 [EXEC_DIRECTORY_STATE] = "StateDirectory",
5370 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
5371 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
5372 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
5373};
5374
5375DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445 5376
fb2042dd
YW
5377static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5378 [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
5379 [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
5380 [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
5381 [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
5382 [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
5383};
5384
5385DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType);
5386
b1edf445
LP
5387static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
5388 [EXEC_KEYRING_INHERIT] = "inherit",
5389 [EXEC_KEYRING_PRIVATE] = "private",
5390 [EXEC_KEYRING_SHARED] = "shared",
5391};
5392
5393DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);