]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
core: hook up timer unit type with clean operation
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
a7334b09 2
034c6ed7
LP
3#include <errno.h>
4#include <fcntl.h>
8dd4c05b
LP
5#include <glob.h>
6#include <grp.h>
7#include <poll.h>
309bff19 8#include <signal.h>
8dd4c05b 9#include <string.h>
19c0b0b9 10#include <sys/capability.h>
d251207d 11#include <sys/eventfd.h>
f3e43635 12#include <sys/mman.h>
8dd4c05b 13#include <sys/personality.h>
94f04347 14#include <sys/prctl.h>
d2ffa389 15#include <sys/shm.h>
8dd4c05b 16#include <sys/socket.h>
451a074f 17#include <sys/stat.h>
d2ffa389 18#include <sys/types.h>
8dd4c05b
LP
19#include <sys/un.h>
20#include <unistd.h>
023a4f67 21#include <utmpx.h>
5cb5a6ff 22
349cc4a5 23#if HAVE_PAM
5b6319dc
LP
24#include <security/pam_appl.h>
25#endif
26
349cc4a5 27#if HAVE_SELINUX
7b52a628
MS
28#include <selinux/selinux.h>
29#endif
30
349cc4a5 31#if HAVE_SECCOMP
17df7223
LP
32#include <seccomp.h>
33#endif
34
349cc4a5 35#if HAVE_APPARMOR
eef65bf3
MS
36#include <sys/apparmor.h>
37#endif
38
24882e06 39#include "sd-messages.h"
8dd4c05b
LP
40
41#include "af-list.h"
b5efdb8a 42#include "alloc-util.h"
349cc4a5 43#if HAVE_APPARMOR
3ffd4af2
LP
44#include "apparmor-util.h"
45#endif
8dd4c05b
LP
46#include "async.h"
47#include "barrier.h"
8dd4c05b 48#include "cap-list.h"
430f0182 49#include "capability-util.h"
a1164ae3 50#include "chown-recursive.h"
da681e1b 51#include "cpu-set-util.h"
f6a6225e 52#include "def.h"
686d13b9 53#include "env-file.h"
4d1a6904 54#include "env-util.h"
17df7223 55#include "errno-list.h"
3ffd4af2 56#include "execute.h"
8dd4c05b 57#include "exit-status.h"
3ffd4af2 58#include "fd-util.h"
f97b34a6 59#include "format-util.h"
f4f15635 60#include "fs-util.h"
7d50b32a 61#include "glob-util.h"
c004493c 62#include "io-util.h"
8dd4c05b 63#include "ioprio.h"
a1164ae3 64#include "label.h"
8dd4c05b
LP
65#include "log.h"
66#include "macro.h"
e8a565cb 67#include "manager.h"
0a970718 68#include "memory-util.h"
8dd4c05b
LP
69#include "missing.h"
70#include "mkdir.h"
71#include "namespace.h"
6bedfcbb 72#include "parse-util.h"
8dd4c05b 73#include "path-util.h"
0b452006 74#include "process-util.h"
78f22b97 75#include "rlimit-util.h"
8dd4c05b 76#include "rm-rf.h"
349cc4a5 77#if HAVE_SECCOMP
3ffd4af2
LP
78#include "seccomp-util.h"
79#endif
07d46372 80#include "securebits-util.h"
8dd4c05b 81#include "selinux-util.h"
24882e06 82#include "signal-util.h"
8dd4c05b 83#include "smack-util.h"
57b7a260 84#include "socket-util.h"
fd63e712 85#include "special.h"
949befd3 86#include "stat-util.h"
8b43440b 87#include "string-table.h"
07630cea 88#include "string-util.h"
8dd4c05b 89#include "strv.h"
7ccbd1ae 90#include "syslog-util.h"
8dd4c05b 91#include "terminal-util.h"
566b7d23 92#include "umask-util.h"
8dd4c05b 93#include "unit.h"
b1d4f8e1 94#include "user-util.h"
8dd4c05b 95#include "utmp-wtmp.h"
5cb5a6ff 96
e056b01d 97#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 98#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 99
531dca78
LP
100#define SNDBUF_SIZE (8*1024*1024)
101
da6053d0 102static int shift_fds(int fds[], size_t n_fds) {
034c6ed7
LP
103 int start, restart_from;
104
105 if (n_fds <= 0)
106 return 0;
107
a0d40ac5
LP
108 /* Modifies the fds array! (sorts it) */
109
034c6ed7
LP
110 assert(fds);
111
112 start = 0;
113 for (;;) {
114 int i;
115
116 restart_from = -1;
117
118 for (i = start; i < (int) n_fds; i++) {
119 int nfd;
120
121 /* Already at right index? */
122 if (fds[i] == i+3)
123 continue;
124
3cc2aff1
LP
125 nfd = fcntl(fds[i], F_DUPFD, i + 3);
126 if (nfd < 0)
034c6ed7
LP
127 return -errno;
128
03e334a1 129 safe_close(fds[i]);
034c6ed7
LP
130 fds[i] = nfd;
131
132 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 133 * let's remember that and try again from here */
034c6ed7
LP
134 if (nfd != i+3 && restart_from < 0)
135 restart_from = i;
136 }
137
138 if (restart_from < 0)
139 break;
140
141 start = restart_from;
142 }
143
144 return 0;
145}
146
25b583d7 147static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
da6053d0 148 size_t i, n_fds;
e2c76839 149 int r;
47a71eed 150
25b583d7 151 n_fds = n_socket_fds + n_storage_fds;
47a71eed
LP
152 if (n_fds <= 0)
153 return 0;
154
155 assert(fds);
156
9b141911
FB
157 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
158 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
159
160 for (i = 0; i < n_fds; i++) {
47a71eed 161
9b141911
FB
162 if (i < n_socket_fds) {
163 r = fd_nonblock(fds[i], nonblock);
164 if (r < 0)
165 return r;
166 }
47a71eed 167
451a074f
LP
168 /* We unconditionally drop FD_CLOEXEC from the fds,
169 * since after all we want to pass these fds to our
170 * children */
47a71eed 171
3cc2aff1
LP
172 r = fd_cloexec(fds[i], false);
173 if (r < 0)
e2c76839 174 return r;
47a71eed
LP
175 }
176
177 return 0;
178}
179
1e22b5cd 180static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
181 assert(context);
182
1e22b5cd
LP
183 if (context->stdio_as_fds)
184 return NULL;
185
80876c20
LP
186 if (context->tty_path)
187 return context->tty_path;
188
189 return "/dev/console";
190}
191
1e22b5cd
LP
192static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
193 const char *path;
194
6ea832a2
LP
195 assert(context);
196
1e22b5cd 197 path = exec_context_tty_path(context);
6ea832a2 198
1e22b5cd
LP
199 if (context->tty_vhangup) {
200 if (p && p->stdin_fd >= 0)
201 (void) terminal_vhangup_fd(p->stdin_fd);
202 else if (path)
203 (void) terminal_vhangup(path);
204 }
6ea832a2 205
1e22b5cd
LP
206 if (context->tty_reset) {
207 if (p && p->stdin_fd >= 0)
208 (void) reset_terminal_fd(p->stdin_fd, true);
209 else if (path)
210 (void) reset_terminal(path);
211 }
212
213 if (context->tty_vt_disallocate && path)
214 (void) vt_disallocate(path);
6ea832a2
LP
215}
216
6af760f3
LP
217static bool is_terminal_input(ExecInput i) {
218 return IN_SET(i,
219 EXEC_INPUT_TTY,
220 EXEC_INPUT_TTY_FORCE,
221 EXEC_INPUT_TTY_FAIL);
222}
223
3a1286b6 224static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
225 return IN_SET(o,
226 EXEC_OUTPUT_TTY,
227 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
228 EXEC_OUTPUT_KMSG_AND_CONSOLE,
229 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
230}
231
aac8c0c3
LP
232static bool is_syslog_output(ExecOutput o) {
233 return IN_SET(o,
234 EXEC_OUTPUT_SYSLOG,
235 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
236}
237
238static bool is_kmsg_output(ExecOutput o) {
239 return IN_SET(o,
240 EXEC_OUTPUT_KMSG,
241 EXEC_OUTPUT_KMSG_AND_CONSOLE);
242}
243
6af760f3
LP
244static bool exec_context_needs_term(const ExecContext *c) {
245 assert(c);
246
247 /* Return true if the execution context suggests we should set $TERM to something useful. */
248
249 if (is_terminal_input(c->std_input))
250 return true;
251
252 if (is_terminal_output(c->std_output))
253 return true;
254
255 if (is_terminal_output(c->std_error))
256 return true;
257
258 return !!c->tty_path;
3a1286b6
MS
259}
260
80876c20 261static int open_null_as(int flags, int nfd) {
046a82c1 262 int fd;
071830ff 263
80876c20 264 assert(nfd >= 0);
071830ff 265
613b411c
LP
266 fd = open("/dev/null", flags|O_NOCTTY);
267 if (fd < 0)
071830ff
LP
268 return -errno;
269
046a82c1 270 return move_fd(fd, nfd, false);
071830ff
LP
271}
272
524daa8c 273static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 274 static const union sockaddr_union sa = {
b92bea5d
ZJS
275 .un.sun_family = AF_UNIX,
276 .un.sun_path = "/run/systemd/journal/stdout",
277 };
524daa8c
ZJS
278 uid_t olduid = UID_INVALID;
279 gid_t oldgid = GID_INVALID;
280 int r;
281
cad93f29 282 if (gid_is_valid(gid)) {
524daa8c
ZJS
283 oldgid = getgid();
284
92a17af9 285 if (setegid(gid) < 0)
524daa8c
ZJS
286 return -errno;
287 }
288
cad93f29 289 if (uid_is_valid(uid)) {
524daa8c
ZJS
290 olduid = getuid();
291
92a17af9 292 if (seteuid(uid) < 0) {
524daa8c
ZJS
293 r = -errno;
294 goto restore_gid;
295 }
296 }
297
92a17af9 298 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
299
300 /* If we fail to restore the uid or gid, things will likely
301 fail later on. This should only happen if an LSM interferes. */
302
cad93f29 303 if (uid_is_valid(uid))
524daa8c
ZJS
304 (void) seteuid(olduid);
305
306 restore_gid:
cad93f29 307 if (gid_is_valid(gid))
524daa8c
ZJS
308 (void) setegid(oldgid);
309
310 return r;
311}
312
fd1f9c89 313static int connect_logger_as(
34cf6c43 314 const Unit *unit,
fd1f9c89 315 const ExecContext *context,
af635cf3 316 const ExecParameters *params,
fd1f9c89
LP
317 ExecOutput output,
318 const char *ident,
fd1f9c89
LP
319 int nfd,
320 uid_t uid,
321 gid_t gid) {
322
2ac1ff68
EV
323 _cleanup_close_ int fd = -1;
324 int r;
071830ff
LP
325
326 assert(context);
af635cf3 327 assert(params);
80876c20
LP
328 assert(output < _EXEC_OUTPUT_MAX);
329 assert(ident);
330 assert(nfd >= 0);
071830ff 331
54fe0cdb
LP
332 fd = socket(AF_UNIX, SOCK_STREAM, 0);
333 if (fd < 0)
80876c20 334 return -errno;
071830ff 335
524daa8c
ZJS
336 r = connect_journal_socket(fd, uid, gid);
337 if (r < 0)
338 return r;
071830ff 339
2ac1ff68 340 if (shutdown(fd, SHUT_RD) < 0)
80876c20 341 return -errno;
071830ff 342
fd1f9c89 343 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 344
2ac1ff68 345 if (dprintf(fd,
62bca2c6 346 "%s\n"
80876c20
LP
347 "%s\n"
348 "%i\n"
54fe0cdb
LP
349 "%i\n"
350 "%i\n"
351 "%i\n"
4f4a1dbf 352 "%i\n",
c867611e 353 context->syslog_identifier ?: ident,
af635cf3 354 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
355 context->syslog_priority,
356 !!context->syslog_level_prefix,
aac8c0c3
LP
357 is_syslog_output(output),
358 is_kmsg_output(output),
2ac1ff68
EV
359 is_terminal_output(output)) < 0)
360 return -errno;
80876c20 361
2ac1ff68 362 return move_fd(TAKE_FD(fd), nfd, false);
80876c20 363}
2ac1ff68 364
3a274a21 365static int open_terminal_as(const char *path, int flags, int nfd) {
046a82c1 366 int fd;
071830ff 367
80876c20
LP
368 assert(path);
369 assert(nfd >= 0);
fd1f9c89 370
3a274a21 371 fd = open_terminal(path, flags | O_NOCTTY);
3cc2aff1 372 if (fd < 0)
80876c20 373 return fd;
071830ff 374
046a82c1 375 return move_fd(fd, nfd, false);
80876c20 376}
071830ff 377
2038c3f5 378static int acquire_path(const char *path, int flags, mode_t mode) {
15a3e96f
LP
379 union sockaddr_union sa = {};
380 _cleanup_close_ int fd = -1;
381 int r, salen;
071830ff 382
80876c20 383 assert(path);
071830ff 384
2038c3f5
LP
385 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
386 flags |= O_CREAT;
387
388 fd = open(path, flags|O_NOCTTY, mode);
389 if (fd >= 0)
15a3e96f 390 return TAKE_FD(fd);
071830ff 391
2038c3f5
LP
392 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
393 return -errno;
15a3e96f 394 if (strlen(path) >= sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
2038c3f5
LP
395 return -ENXIO;
396
397 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
398
399 fd = socket(AF_UNIX, SOCK_STREAM, 0);
400 if (fd < 0)
401 return -errno;
402
15a3e96f
LP
403 salen = sockaddr_un_set_path(&sa.un, path);
404 if (salen < 0)
405 return salen;
406
407 if (connect(fd, &sa.sa, salen) < 0)
2038c3f5
LP
408 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
409 * indication that his wasn't an AF_UNIX socket after all */
071830ff 410
2038c3f5
LP
411 if ((flags & O_ACCMODE) == O_RDONLY)
412 r = shutdown(fd, SHUT_WR);
413 else if ((flags & O_ACCMODE) == O_WRONLY)
414 r = shutdown(fd, SHUT_RD);
415 else
15a3e96f
LP
416 return TAKE_FD(fd);
417 if (r < 0)
2038c3f5 418 return -errno;
2038c3f5 419
15a3e96f 420 return TAKE_FD(fd);
80876c20 421}
071830ff 422
08f3be7a
LP
423static int fixup_input(
424 const ExecContext *context,
425 int socket_fd,
426 bool apply_tty_stdin) {
427
428 ExecInput std_input;
429
430 assert(context);
431
432 std_input = context->std_input;
1e3ad081
LP
433
434 if (is_terminal_input(std_input) && !apply_tty_stdin)
435 return EXEC_INPUT_NULL;
071830ff 436
03fd9c49 437 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
438 return EXEC_INPUT_NULL;
439
08f3be7a
LP
440 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
441 return EXEC_INPUT_NULL;
442
03fd9c49 443 return std_input;
4f2d528d
LP
444}
445
03fd9c49 446static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 447
03fd9c49 448 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
449 return EXEC_OUTPUT_INHERIT;
450
03fd9c49 451 return std_output;
4f2d528d
LP
452}
453
a34ceba6
LP
454static int setup_input(
455 const ExecContext *context,
456 const ExecParameters *params,
52c239d7
LB
457 int socket_fd,
458 int named_iofds[3]) {
a34ceba6 459
4f2d528d
LP
460 ExecInput i;
461
462 assert(context);
a34ceba6
LP
463 assert(params);
464
465 if (params->stdin_fd >= 0) {
466 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
467 return -errno;
468
469 /* Try to make this the controlling tty, if it is a tty, and reset it */
1fb0682e
LP
470 if (isatty(STDIN_FILENO)) {
471 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
472 (void) reset_terminal_fd(STDIN_FILENO, true);
473 }
a34ceba6
LP
474
475 return STDIN_FILENO;
476 }
4f2d528d 477
08f3be7a 478 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
479
480 switch (i) {
071830ff 481
80876c20
LP
482 case EXEC_INPUT_NULL:
483 return open_null_as(O_RDONLY, STDIN_FILENO);
484
485 case EXEC_INPUT_TTY:
486 case EXEC_INPUT_TTY_FORCE:
487 case EXEC_INPUT_TTY_FAIL: {
046a82c1 488 int fd;
071830ff 489
1e22b5cd 490 fd = acquire_terminal(exec_context_tty_path(context),
8854d795
LP
491 i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
492 i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
493 ACQUIRE_TERMINAL_WAIT,
3a43da28 494 USEC_INFINITY);
970edce6 495 if (fd < 0)
80876c20
LP
496 return fd;
497
046a82c1 498 return move_fd(fd, STDIN_FILENO, false);
80876c20
LP
499 }
500
4f2d528d 501 case EXEC_INPUT_SOCKET:
e75a9ed1
LP
502 assert(socket_fd >= 0);
503
4f2d528d
LP
504 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
505
52c239d7 506 case EXEC_INPUT_NAMED_FD:
e75a9ed1
LP
507 assert(named_iofds[STDIN_FILENO] >= 0);
508
52c239d7
LB
509 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
510 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
511
08f3be7a
LP
512 case EXEC_INPUT_DATA: {
513 int fd;
514
515 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
516 if (fd < 0)
517 return fd;
518
519 return move_fd(fd, STDIN_FILENO, false);
520 }
521
2038c3f5
LP
522 case EXEC_INPUT_FILE: {
523 bool rw;
524 int fd;
525
526 assert(context->stdio_file[STDIN_FILENO]);
527
528 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
529 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
530
531 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
532 if (fd < 0)
533 return fd;
534
535 return move_fd(fd, STDIN_FILENO, false);
536 }
537
80876c20
LP
538 default:
539 assert_not_reached("Unknown input type");
540 }
541}
542
41fc585a
LP
543static bool can_inherit_stderr_from_stdout(
544 const ExecContext *context,
545 ExecOutput o,
546 ExecOutput e) {
547
548 assert(context);
549
550 /* Returns true, if given the specified STDERR and STDOUT output we can directly dup() the stdout fd to the
551 * stderr fd */
552
553 if (e == EXEC_OUTPUT_INHERIT)
554 return true;
555 if (e != o)
556 return false;
557
558 if (e == EXEC_OUTPUT_NAMED_FD)
559 return streq_ptr(context->stdio_fdname[STDOUT_FILENO], context->stdio_fdname[STDERR_FILENO]);
560
561 if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND))
562 return streq_ptr(context->stdio_file[STDOUT_FILENO], context->stdio_file[STDERR_FILENO]);
563
564 return true;
565}
566
a34ceba6 567static int setup_output(
34cf6c43 568 const Unit *unit,
a34ceba6
LP
569 const ExecContext *context,
570 const ExecParameters *params,
571 int fileno,
572 int socket_fd,
52c239d7 573 int named_iofds[3],
a34ceba6 574 const char *ident,
7bce046b
LP
575 uid_t uid,
576 gid_t gid,
577 dev_t *journal_stream_dev,
578 ino_t *journal_stream_ino) {
a34ceba6 579
4f2d528d
LP
580 ExecOutput o;
581 ExecInput i;
47c1d80d 582 int r;
4f2d528d 583
f2341e0a 584 assert(unit);
80876c20 585 assert(context);
a34ceba6 586 assert(params);
80876c20 587 assert(ident);
7bce046b
LP
588 assert(journal_stream_dev);
589 assert(journal_stream_ino);
80876c20 590
a34ceba6
LP
591 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
592
593 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
594 return -errno;
595
596 return STDOUT_FILENO;
597 }
598
599 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
600 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
601 return -errno;
602
603 return STDERR_FILENO;
604 }
605
08f3be7a 606 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 607 o = fixup_output(context->std_output, socket_fd);
4f2d528d 608
eb17e935
MS
609 if (fileno == STDERR_FILENO) {
610 ExecOutput e;
611 e = fixup_output(context->std_error, socket_fd);
80876c20 612
eb17e935
MS
613 /* This expects the input and output are already set up */
614
615 /* Don't change the stderr file descriptor if we inherit all
616 * the way and are not on a tty */
617 if (e == EXEC_OUTPUT_INHERIT &&
618 o == EXEC_OUTPUT_INHERIT &&
619 i == EXEC_INPUT_NULL &&
620 !is_terminal_input(context->std_input) &&
621 getppid () != 1)
622 return fileno;
623
624 /* Duplicate from stdout if possible */
41fc585a 625 if (can_inherit_stderr_from_stdout(context, o, e))
eb17e935 626 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 627
eb17e935 628 o = e;
80876c20 629
eb17e935 630 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
631 /* If input got downgraded, inherit the original value */
632 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 633 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 634
08f3be7a
LP
635 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
636 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
eb17e935 637 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 638
acb591e4
LP
639 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
640 if (getppid() != 1)
eb17e935 641 return fileno;
94f04347 642
eb17e935
MS
643 /* We need to open /dev/null here anew, to get the right access mode. */
644 return open_null_as(O_WRONLY, fileno);
071830ff 645 }
94f04347 646
eb17e935 647 switch (o) {
80876c20
LP
648
649 case EXEC_OUTPUT_NULL:
eb17e935 650 return open_null_as(O_WRONLY, fileno);
80876c20
LP
651
652 case EXEC_OUTPUT_TTY:
4f2d528d 653 if (is_terminal_input(i))
eb17e935 654 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
655
656 /* We don't reset the terminal if this is just about output */
1e22b5cd 657 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
658
659 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 660 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 661 case EXEC_OUTPUT_KMSG:
28dbc1e8 662 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
663 case EXEC_OUTPUT_JOURNAL:
664 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 665 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 666 if (r < 0) {
82677ae4 667 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 668 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
669 } else {
670 struct stat st;
671
672 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
673 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
674 * services to detect whether they are connected to the journal or not.
675 *
676 * If both stdout and stderr are connected to a stream then let's make sure to store the data
677 * about STDERR as that's usually the best way to do logging. */
7bce046b 678
ab2116b1
LP
679 if (fstat(fileno, &st) >= 0 &&
680 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
681 *journal_stream_dev = st.st_dev;
682 *journal_stream_ino = st.st_ino;
683 }
47c1d80d
MS
684 }
685 return r;
4f2d528d
LP
686
687 case EXEC_OUTPUT_SOCKET:
688 assert(socket_fd >= 0);
e75a9ed1 689
eb17e935 690 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 691
52c239d7 692 case EXEC_OUTPUT_NAMED_FD:
e75a9ed1
LP
693 assert(named_iofds[fileno] >= 0);
694
52c239d7
LB
695 (void) fd_nonblock(named_iofds[fileno], false);
696 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
697
566b7d23
ZD
698 case EXEC_OUTPUT_FILE:
699 case EXEC_OUTPUT_FILE_APPEND: {
2038c3f5 700 bool rw;
566b7d23 701 int fd, flags;
2038c3f5
LP
702
703 assert(context->stdio_file[fileno]);
704
705 rw = context->std_input == EXEC_INPUT_FILE &&
706 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
707
708 if (rw)
709 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
710
566b7d23
ZD
711 flags = O_WRONLY;
712 if (o == EXEC_OUTPUT_FILE_APPEND)
713 flags |= O_APPEND;
714
715 fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
2038c3f5
LP
716 if (fd < 0)
717 return fd;
718
566b7d23 719 return move_fd(fd, fileno, 0);
2038c3f5
LP
720 }
721
94f04347 722 default:
80876c20 723 assert_not_reached("Unknown error type");
94f04347 724 }
071830ff
LP
725}
726
02a51aba 727static int chown_terminal(int fd, uid_t uid) {
4b3b5bc7 728 int r;
02a51aba
LP
729
730 assert(fd >= 0);
02a51aba 731
1ff74fb6 732 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
4b3b5bc7
LP
733 if (isatty(fd) < 1) {
734 if (IN_SET(errno, EINVAL, ENOTTY))
735 return 0; /* not a tty */
1ff74fb6 736
02a51aba 737 return -errno;
4b3b5bc7 738 }
02a51aba 739
4b3b5bc7
LP
740 /* This might fail. What matters are the results. */
741 r = fchmod_and_chown(fd, TTY_MODE, uid, -1);
742 if (r < 0)
743 return r;
02a51aba 744
4b3b5bc7 745 return 1;
02a51aba
LP
746}
747
7d5ceb64 748static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
749 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
750 int r;
80876c20 751
80876c20
LP
752 assert(_saved_stdin);
753 assert(_saved_stdout);
754
af6da548
LP
755 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
756 if (saved_stdin < 0)
757 return -errno;
80876c20 758
af6da548 759 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
760 if (saved_stdout < 0)
761 return -errno;
80876c20 762
8854d795 763 fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
3d18b167
LP
764 if (fd < 0)
765 return fd;
80876c20 766
af6da548
LP
767 r = chown_terminal(fd, getuid());
768 if (r < 0)
3d18b167 769 return r;
02a51aba 770
3d18b167
LP
771 r = reset_terminal_fd(fd, true);
772 if (r < 0)
773 return r;
80876c20 774
2b33ab09 775 r = rearrange_stdio(fd, fd, STDERR_FILENO);
3d18b167 776 fd = -1;
2b33ab09
LP
777 if (r < 0)
778 return r;
80876c20
LP
779
780 *_saved_stdin = saved_stdin;
781 *_saved_stdout = saved_stdout;
782
3d18b167 783 saved_stdin = saved_stdout = -1;
80876c20 784
3d18b167 785 return 0;
80876c20
LP
786}
787
63d77c92 788static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
789 assert(err < 0);
790
791 if (err == -ETIMEDOUT)
63d77c92 792 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
793 else {
794 errno = -err;
63d77c92 795 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
796 }
797}
798
63d77c92 799static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 800 _cleanup_close_ int fd = -1;
80876c20 801
3b20f877 802 assert(vc);
80876c20 803
7d5ceb64 804 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 805 if (fd < 0)
3b20f877 806 return;
80876c20 807
63d77c92 808 write_confirm_error_fd(err, fd, u);
af6da548 809}
80876c20 810
3d18b167 811static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 812 int r = 0;
80876c20 813
af6da548
LP
814 assert(saved_stdin);
815 assert(saved_stdout);
816
817 release_terminal();
818
819 if (*saved_stdin >= 0)
80876c20 820 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 821 r = -errno;
80876c20 822
af6da548 823 if (*saved_stdout >= 0)
80876c20 824 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 825 r = -errno;
80876c20 826
3d18b167
LP
827 *saved_stdin = safe_close(*saved_stdin);
828 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
829
830 return r;
831}
832
3b20f877
FB
833enum {
834 CONFIRM_PRETEND_FAILURE = -1,
835 CONFIRM_PRETEND_SUCCESS = 0,
836 CONFIRM_EXECUTE = 1,
837};
838
eedf223a 839static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 840 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 841 _cleanup_free_ char *e = NULL;
3b20f877 842 char c;
af6da548 843
3b20f877 844 /* For any internal errors, assume a positive response. */
7d5ceb64 845 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 846 if (r < 0) {
63d77c92 847 write_confirm_error(r, vc, u);
3b20f877
FB
848 return CONFIRM_EXECUTE;
849 }
af6da548 850
b0eb2944
FB
851 /* confirm_spawn might have been disabled while we were sleeping. */
852 if (manager_is_confirm_spawn_disabled(u->manager)) {
853 r = 1;
854 goto restore_stdio;
855 }
af6da548 856
2bcd3c26
FB
857 e = ellipsize(cmdline, 60, 100);
858 if (!e) {
859 log_oom();
860 r = CONFIRM_EXECUTE;
861 goto restore_stdio;
862 }
af6da548 863
d172b175 864 for (;;) {
539622bd 865 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 866 if (r < 0) {
63d77c92 867 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
868 r = CONFIRM_EXECUTE;
869 goto restore_stdio;
870 }
af6da548 871
d172b175 872 switch (c) {
b0eb2944
FB
873 case 'c':
874 printf("Resuming normal execution.\n");
875 manager_disable_confirm_spawn();
876 r = 1;
877 break;
dd6f9ac0
FB
878 case 'D':
879 unit_dump(u, stdout, " ");
880 continue; /* ask again */
d172b175
FB
881 case 'f':
882 printf("Failing execution.\n");
883 r = CONFIRM_PRETEND_FAILURE;
884 break;
885 case 'h':
b0eb2944
FB
886 printf(" c - continue, proceed without asking anymore\n"
887 " D - dump, show the state of the unit\n"
dd6f9ac0 888 " f - fail, don't execute the command and pretend it failed\n"
d172b175 889 " h - help\n"
eedf223a 890 " i - info, show a short summary of the unit\n"
56fde33a 891 " j - jobs, show jobs that are in progress\n"
d172b175
FB
892 " s - skip, don't execute the command and pretend it succeeded\n"
893 " y - yes, execute the command\n");
dd6f9ac0 894 continue; /* ask again */
eedf223a
FB
895 case 'i':
896 printf(" Description: %s\n"
897 " Unit: %s\n"
898 " Command: %s\n",
899 u->id, u->description, cmdline);
900 continue; /* ask again */
56fde33a
FB
901 case 'j':
902 manager_dump_jobs(u->manager, stdout, " ");
903 continue; /* ask again */
539622bd
FB
904 case 'n':
905 /* 'n' was removed in favor of 'f'. */
906 printf("Didn't understand 'n', did you mean 'f'?\n");
907 continue; /* ask again */
d172b175
FB
908 case 's':
909 printf("Skipping execution.\n");
910 r = CONFIRM_PRETEND_SUCCESS;
911 break;
912 case 'y':
913 r = CONFIRM_EXECUTE;
914 break;
915 default:
916 assert_not_reached("Unhandled choice");
917 }
3b20f877 918 break;
3b20f877 919 }
af6da548 920
3b20f877 921restore_stdio:
af6da548 922 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 923 return r;
80876c20
LP
924}
925
4d885bd3
DH
926static int get_fixed_user(const ExecContext *c, const char **user,
927 uid_t *uid, gid_t *gid,
928 const char **home, const char **shell) {
81a2b7ce 929 int r;
4d885bd3 930 const char *name;
81a2b7ce 931
4d885bd3 932 assert(c);
81a2b7ce 933
23deef88
LP
934 if (!c->user)
935 return 0;
936
4d885bd3
DH
937 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
938 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 939
23deef88 940 name = c->user;
fafff8f1 941 r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN);
4d885bd3
DH
942 if (r < 0)
943 return r;
81a2b7ce 944
4d885bd3
DH
945 *user = name;
946 return 0;
947}
948
949static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
950 int r;
951 const char *name;
952
953 assert(c);
954
955 if (!c->group)
956 return 0;
957
958 name = c->group;
fafff8f1 959 r = get_group_creds(&name, gid, 0);
4d885bd3
DH
960 if (r < 0)
961 return r;
962
963 *group = name;
964 return 0;
965}
966
cdc5d5c5
DH
967static int get_supplementary_groups(const ExecContext *c, const char *user,
968 const char *group, gid_t gid,
969 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
970 char **i;
971 int r, k = 0;
972 int ngroups_max;
973 bool keep_groups = false;
974 gid_t *groups = NULL;
975 _cleanup_free_ gid_t *l_gids = NULL;
976
977 assert(c);
978
bbeea271
DH
979 /*
980 * If user is given, then lookup GID and supplementary groups list.
981 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
982 * here and as early as possible so we keep the list of supplementary
983 * groups of the caller.
bbeea271
DH
984 */
985 if (user && gid_is_valid(gid) && gid != 0) {
986 /* First step, initialize groups from /etc/groups */
987 if (initgroups(user, gid) < 0)
988 return -errno;
989
990 keep_groups = true;
991 }
992
ac6e8be6 993 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
994 return 0;
995
366ddd25
DH
996 /*
997 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
998 * be positive, otherwise fail.
999 */
1000 errno = 0;
1001 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
1002 if (ngroups_max <= 0) {
1003 if (errno > 0)
1004 return -errno;
1005 else
1006 return -EOPNOTSUPP; /* For all other values */
1007 }
1008
4d885bd3
DH
1009 l_gids = new(gid_t, ngroups_max);
1010 if (!l_gids)
1011 return -ENOMEM;
81a2b7ce 1012
4d885bd3
DH
1013 if (keep_groups) {
1014 /*
1015 * Lookup the list of groups that the user belongs to, we
1016 * avoid NSS lookups here too for gid=0.
1017 */
1018 k = ngroups_max;
1019 if (getgrouplist(user, gid, l_gids, &k) < 0)
1020 return -EINVAL;
1021 } else
1022 k = 0;
81a2b7ce 1023
4d885bd3
DH
1024 STRV_FOREACH(i, c->supplementary_groups) {
1025 const char *g;
81a2b7ce 1026
4d885bd3
DH
1027 if (k >= ngroups_max)
1028 return -E2BIG;
81a2b7ce 1029
4d885bd3 1030 g = *i;
fafff8f1 1031 r = get_group_creds(&g, l_gids+k, 0);
4d885bd3
DH
1032 if (r < 0)
1033 return r;
81a2b7ce 1034
4d885bd3
DH
1035 k++;
1036 }
81a2b7ce 1037
4d885bd3
DH
1038 /*
1039 * Sets ngids to zero to drop all supplementary groups, happens
1040 * when we are under root and SupplementaryGroups= is empty.
1041 */
1042 if (k == 0) {
1043 *ngids = 0;
1044 return 0;
1045 }
81a2b7ce 1046
4d885bd3
DH
1047 /* Otherwise get the final list of supplementary groups */
1048 groups = memdup(l_gids, sizeof(gid_t) * k);
1049 if (!groups)
1050 return -ENOMEM;
1051
1052 *supplementary_gids = groups;
1053 *ngids = k;
1054
1055 groups = NULL;
1056
1057 return 0;
1058}
1059
34cf6c43 1060static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
4d885bd3
DH
1061 int r;
1062
709dbeac
YW
1063 /* Handle SupplementaryGroups= if it is not empty */
1064 if (ngids > 0) {
4d885bd3
DH
1065 r = maybe_setgroups(ngids, supplementary_gids);
1066 if (r < 0)
97f0e76f 1067 return r;
4d885bd3 1068 }
81a2b7ce 1069
4d885bd3
DH
1070 if (gid_is_valid(gid)) {
1071 /* Then set our gids */
1072 if (setresgid(gid, gid, gid) < 0)
1073 return -errno;
81a2b7ce
LP
1074 }
1075
1076 return 0;
1077}
1078
1079static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
1080 assert(context);
1081
4d885bd3
DH
1082 if (!uid_is_valid(uid))
1083 return 0;
1084
479050b3 1085 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
1086 * capabilities while doing so. */
1087
479050b3 1088 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
1089
1090 /* First step: If we need to keep capabilities but
1091 * drop privileges we need to make sure we keep our
cbb21cca 1092 * caps, while we drop privileges. */
693ced48 1093 if (uid != 0) {
cbb21cca 1094 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1095
1096 if (prctl(PR_GET_SECUREBITS) != sb)
1097 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1098 return -errno;
1099 }
81a2b7ce
LP
1100 }
1101
479050b3 1102 /* Second step: actually set the uids */
81a2b7ce
LP
1103 if (setresuid(uid, uid, uid) < 0)
1104 return -errno;
1105
1106 /* At this point we should have all necessary capabilities but
1107 are otherwise a normal user. However, the caps might got
1108 corrupted due to the setresuid() so we need clean them up
1109 later. This is done outside of this call. */
1110
1111 return 0;
1112}
1113
349cc4a5 1114#if HAVE_PAM
5b6319dc
LP
1115
1116static int null_conv(
1117 int num_msg,
1118 const struct pam_message **msg,
1119 struct pam_response **resp,
1120 void *appdata_ptr) {
1121
1122 /* We don't support conversations */
1123
1124 return PAM_CONV_ERR;
1125}
1126
cefc33ae
LP
1127#endif
1128
5b6319dc
LP
1129static int setup_pam(
1130 const char *name,
1131 const char *user,
940c5210 1132 uid_t uid,
2d6fce8d 1133 gid_t gid,
5b6319dc 1134 const char *tty,
2065ca69 1135 char ***env,
da6053d0 1136 int fds[], size_t n_fds) {
5b6319dc 1137
349cc4a5 1138#if HAVE_PAM
cefc33ae 1139
5b6319dc
LP
1140 static const struct pam_conv conv = {
1141 .conv = null_conv,
1142 .appdata_ptr = NULL
1143 };
1144
2d7c6aa2 1145 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1146 pam_handle_t *handle = NULL;
d6e5f3ad 1147 sigset_t old_ss;
7bb70b6e 1148 int pam_code = PAM_SUCCESS, r;
84eada2f 1149 char **nv, **e = NULL;
5b6319dc
LP
1150 bool close_session = false;
1151 pid_t pam_pid = 0, parent_pid;
970edce6 1152 int flags = 0;
5b6319dc
LP
1153
1154 assert(name);
1155 assert(user);
2065ca69 1156 assert(env);
5b6319dc
LP
1157
1158 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1159 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1160 * systemd via the cgroup logic. It will then remove the PAM
1161 * session again. The parent process will exec() the actual
1162 * daemon. We do things this way to ensure that the main PID
1163 * of the daemon is the one we initially fork()ed. */
1164
7bb70b6e
LP
1165 r = barrier_create(&barrier);
1166 if (r < 0)
2d7c6aa2
DH
1167 goto fail;
1168
553d2243 1169 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1170 flags |= PAM_SILENT;
1171
f546241b
ZJS
1172 pam_code = pam_start(name, user, &conv, &handle);
1173 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1174 handle = NULL;
1175 goto fail;
1176 }
1177
3cd24c1a
LP
1178 if (!tty) {
1179 _cleanup_free_ char *q = NULL;
1180
1181 /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
1182 * out if that's the case, and read the TTY off it. */
1183
1184 if (getttyname_malloc(STDIN_FILENO, &q) >= 0)
1185 tty = strjoina("/dev/", q);
1186 }
1187
f546241b
ZJS
1188 if (tty) {
1189 pam_code = pam_set_item(handle, PAM_TTY, tty);
1190 if (pam_code != PAM_SUCCESS)
5b6319dc 1191 goto fail;
f546241b 1192 }
5b6319dc 1193
84eada2f
JW
1194 STRV_FOREACH(nv, *env) {
1195 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1196 if (pam_code != PAM_SUCCESS)
1197 goto fail;
1198 }
1199
970edce6 1200 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1201 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1202 goto fail;
1203
970edce6 1204 pam_code = pam_open_session(handle, flags);
f546241b 1205 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1206 goto fail;
1207
1208 close_session = true;
1209
f546241b
ZJS
1210 e = pam_getenvlist(handle);
1211 if (!e) {
5b6319dc
LP
1212 pam_code = PAM_BUF_ERR;
1213 goto fail;
1214 }
1215
1216 /* Block SIGTERM, so that we know that it won't get lost in
1217 * the child */
ce30c8dc 1218
72c0a2c2 1219 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1220
df0ff127 1221 parent_pid = getpid_cached();
5b6319dc 1222
4c253ed1
LP
1223 r = safe_fork("(sd-pam)", 0, &pam_pid);
1224 if (r < 0)
5b6319dc 1225 goto fail;
4c253ed1 1226 if (r == 0) {
7bb70b6e 1227 int sig, ret = EXIT_PAM;
5b6319dc
LP
1228
1229 /* The child's job is to reset the PAM session on
1230 * termination */
2d7c6aa2 1231 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc 1232
4c253ed1
LP
1233 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1234 * are open here that have been opened by PAM. */
1235 (void) close_many(fds, n_fds);
5b6319dc 1236
940c5210
AK
1237 /* Drop privileges - we don't need any to pam_close_session
1238 * and this will make PR_SET_PDEATHSIG work in most cases.
1239 * If this fails, ignore the error - but expect sd-pam threads
1240 * to fail to exit normally */
2d6fce8d 1241
97f0e76f
LP
1242 r = maybe_setgroups(0, NULL);
1243 if (r < 0)
1244 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1245 if (setresgid(gid, gid, gid) < 0)
1246 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1247 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1248 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1249
ce30c8dc
LP
1250 (void) ignore_signals(SIGPIPE, -1);
1251
940c5210
AK
1252 /* Wait until our parent died. This will only work if
1253 * the above setresuid() succeeds, otherwise the kernel
1254 * will not allow unprivileged parents kill their privileged
1255 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1256 * to do the rest for us. */
1257 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1258 goto child_finish;
1259
2d7c6aa2
DH
1260 /* Tell the parent that our setup is done. This is especially
1261 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1262 * setup might race against our setresuid(2) call.
1263 *
1264 * If the parent aborted, we'll detect this below, hence ignore
1265 * return failure here. */
1266 (void) barrier_place(&barrier);
2d7c6aa2 1267
643f4706 1268 /* Check if our parent process might already have died? */
5b6319dc 1269 if (getppid() == parent_pid) {
d6e5f3ad
DM
1270 sigset_t ss;
1271
1272 assert_se(sigemptyset(&ss) >= 0);
1273 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1274
3dead8d9
LP
1275 for (;;) {
1276 if (sigwait(&ss, &sig) < 0) {
1277 if (errno == EINTR)
1278 continue;
1279
1280 goto child_finish;
1281 }
5b6319dc 1282
3dead8d9
LP
1283 assert(sig == SIGTERM);
1284 break;
1285 }
5b6319dc
LP
1286 }
1287
3dead8d9 1288 /* If our parent died we'll end the session */
f546241b 1289 if (getppid() != parent_pid) {
970edce6 1290 pam_code = pam_close_session(handle, flags);
f546241b 1291 if (pam_code != PAM_SUCCESS)
5b6319dc 1292 goto child_finish;
f546241b 1293 }
5b6319dc 1294
7bb70b6e 1295 ret = 0;
5b6319dc
LP
1296
1297 child_finish:
970edce6 1298 pam_end(handle, pam_code | flags);
7bb70b6e 1299 _exit(ret);
5b6319dc
LP
1300 }
1301
2d7c6aa2
DH
1302 barrier_set_role(&barrier, BARRIER_PARENT);
1303
5b6319dc
LP
1304 /* If the child was forked off successfully it will do all the
1305 * cleanups, so forget about the handle here. */
1306 handle = NULL;
1307
3b8bddde 1308 /* Unblock SIGTERM again in the parent */
72c0a2c2 1309 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1310
1311 /* We close the log explicitly here, since the PAM modules
1312 * might have opened it, but we don't want this fd around. */
1313 closelog();
1314
2d7c6aa2
DH
1315 /* Synchronously wait for the child to initialize. We don't care for
1316 * errors as we cannot recover. However, warn loudly if it happens. */
1317 if (!barrier_place_and_sync(&barrier))
1318 log_error("PAM initialization failed");
1319
130d3d22 1320 return strv_free_and_replace(*env, e);
5b6319dc
LP
1321
1322fail:
970edce6
ZJS
1323 if (pam_code != PAM_SUCCESS) {
1324 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1325 r = -EPERM; /* PAM errors do not map to errno */
1326 } else
1327 log_error_errno(r, "PAM failed: %m");
9ba35398 1328
5b6319dc
LP
1329 if (handle) {
1330 if (close_session)
970edce6 1331 pam_code = pam_close_session(handle, flags);
5b6319dc 1332
970edce6 1333 pam_end(handle, pam_code | flags);
5b6319dc
LP
1334 }
1335
1336 strv_free(e);
5b6319dc
LP
1337 closelog();
1338
7bb70b6e 1339 return r;
cefc33ae
LP
1340#else
1341 return 0;
5b6319dc 1342#endif
cefc33ae 1343}
5b6319dc 1344
5d6b1584
LP
1345static void rename_process_from_path(const char *path) {
1346 char process_name[11];
1347 const char *p;
1348 size_t l;
1349
1350 /* This resulting string must fit in 10 chars (i.e. the length
1351 * of "/sbin/init") to look pretty in /bin/ps */
1352
2b6bf07d 1353 p = basename(path);
5d6b1584
LP
1354 if (isempty(p)) {
1355 rename_process("(...)");
1356 return;
1357 }
1358
1359 l = strlen(p);
1360 if (l > 8) {
1361 /* The end of the process name is usually more
1362 * interesting, since the first bit might just be
1363 * "systemd-" */
1364 p = p + l - 8;
1365 l = 8;
1366 }
1367
1368 process_name[0] = '(';
1369 memcpy(process_name+1, p, l);
1370 process_name[1+l] = ')';
1371 process_name[1+l+1] = 0;
1372
1373 rename_process(process_name);
1374}
1375
469830d1
LP
1376static bool context_has_address_families(const ExecContext *c) {
1377 assert(c);
1378
1379 return c->address_families_whitelist ||
1380 !set_isempty(c->address_families);
1381}
1382
1383static bool context_has_syscall_filters(const ExecContext *c) {
1384 assert(c);
1385
1386 return c->syscall_whitelist ||
8cfa775f 1387 !hashmap_isempty(c->syscall_filter);
469830d1
LP
1388}
1389
1390static bool context_has_no_new_privileges(const ExecContext *c) {
1391 assert(c);
1392
1393 if (c->no_new_privileges)
1394 return true;
1395
1396 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1397 return false;
1398
1399 /* We need NNP if we have any form of seccomp and are unprivileged */
1400 return context_has_address_families(c) ||
1401 c->memory_deny_write_execute ||
1402 c->restrict_realtime ||
f69567cb 1403 c->restrict_suid_sgid ||
469830d1
LP
1404 exec_context_restrict_namespaces_set(c) ||
1405 c->protect_kernel_tunables ||
1406 c->protect_kernel_modules ||
1407 c->private_devices ||
1408 context_has_syscall_filters(c) ||
78e864e5 1409 !set_isempty(c->syscall_archs) ||
aecd5ac6
TM
1410 c->lock_personality ||
1411 c->protect_hostname;
469830d1
LP
1412}
1413
349cc4a5 1414#if HAVE_SECCOMP
17df7223 1415
83f12b27 1416static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1417
1418 if (is_seccomp_available())
1419 return false;
1420
f673b62d 1421 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1422 return true;
83f12b27
FS
1423}
1424
165a31c0 1425static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1426 uint32_t negative_action, default_action, action;
165a31c0 1427 int r;
8351ceae 1428
469830d1 1429 assert(u);
c0467cf3 1430 assert(c);
8351ceae 1431
469830d1 1432 if (!context_has_syscall_filters(c))
83f12b27
FS
1433 return 0;
1434
469830d1
LP
1435 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1436 return 0;
e9642be2 1437
ccc16c78 1438 negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1439
469830d1
LP
1440 if (c->syscall_whitelist) {
1441 default_action = negative_action;
1442 action = SCMP_ACT_ALLOW;
7c66bae2 1443 } else {
469830d1
LP
1444 default_action = SCMP_ACT_ALLOW;
1445 action = negative_action;
57183d11 1446 }
8351ceae 1447
165a31c0
LP
1448 if (needs_ambient_hack) {
1449 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1450 if (r < 0)
1451 return r;
1452 }
1453
b54f36c6 1454 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
4298d0b5
LP
1455}
1456
469830d1
LP
1457static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1458 assert(u);
4298d0b5
LP
1459 assert(c);
1460
469830d1 1461 if (set_isempty(c->syscall_archs))
83f12b27
FS
1462 return 0;
1463
469830d1
LP
1464 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1465 return 0;
4298d0b5 1466
469830d1
LP
1467 return seccomp_restrict_archs(c->syscall_archs);
1468}
4298d0b5 1469
469830d1
LP
1470static int apply_address_families(const Unit* u, const ExecContext *c) {
1471 assert(u);
1472 assert(c);
4298d0b5 1473
469830d1
LP
1474 if (!context_has_address_families(c))
1475 return 0;
4298d0b5 1476
469830d1
LP
1477 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1478 return 0;
4298d0b5 1479
469830d1 1480 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1481}
4298d0b5 1482
83f12b27 1483static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1484 assert(u);
f3e43635
TM
1485 assert(c);
1486
469830d1 1487 if (!c->memory_deny_write_execute)
83f12b27
FS
1488 return 0;
1489
469830d1
LP
1490 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1491 return 0;
f3e43635 1492
469830d1 1493 return seccomp_memory_deny_write_execute();
f3e43635
TM
1494}
1495
83f12b27 1496static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1497 assert(u);
f4170c67
LP
1498 assert(c);
1499
469830d1 1500 if (!c->restrict_realtime)
83f12b27
FS
1501 return 0;
1502
469830d1
LP
1503 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1504 return 0;
f4170c67 1505
469830d1 1506 return seccomp_restrict_realtime();
f4170c67
LP
1507}
1508
f69567cb
LP
1509static int apply_restrict_suid_sgid(const Unit* u, const ExecContext *c) {
1510 assert(u);
1511 assert(c);
1512
1513 if (!c->restrict_suid_sgid)
1514 return 0;
1515
1516 if (skip_seccomp_unavailable(u, "RestrictSUIDSGID="))
1517 return 0;
1518
1519 return seccomp_restrict_suid_sgid();
1520}
1521
59e856c7 1522static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1523 assert(u);
59eeb84b
LP
1524 assert(c);
1525
1526 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1527 * let's protect even those systems where this is left on in the kernel. */
1528
469830d1 1529 if (!c->protect_kernel_tunables)
59eeb84b
LP
1530 return 0;
1531
469830d1
LP
1532 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1533 return 0;
59eeb84b 1534
469830d1 1535 return seccomp_protect_sysctl();
59eeb84b
LP
1536}
1537
59e856c7 1538static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1539 assert(u);
502d704e
DH
1540 assert(c);
1541
25a8d8a0 1542 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1543
469830d1
LP
1544 if (!c->protect_kernel_modules)
1545 return 0;
1546
502d704e
DH
1547 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1548 return 0;
1549
b54f36c6 1550 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false);
502d704e
DH
1551}
1552
59e856c7 1553static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1554 assert(u);
ba128bb8
LP
1555 assert(c);
1556
8f81a5f6 1557 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1558
469830d1
LP
1559 if (!c->private_devices)
1560 return 0;
1561
ba128bb8
LP
1562 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1563 return 0;
1564
b54f36c6 1565 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false);
ba128bb8
LP
1566}
1567
34cf6c43 1568static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
469830d1 1569 assert(u);
add00535
LP
1570 assert(c);
1571
1572 if (!exec_context_restrict_namespaces_set(c))
1573 return 0;
1574
1575 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1576 return 0;
1577
1578 return seccomp_restrict_namespaces(c->restrict_namespaces);
1579}
1580
78e864e5 1581static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1582 unsigned long personality;
1583 int r;
78e864e5
TM
1584
1585 assert(u);
1586 assert(c);
1587
1588 if (!c->lock_personality)
1589 return 0;
1590
1591 if (skip_seccomp_unavailable(u, "LockPersonality="))
1592 return 0;
1593
e8132d63
LP
1594 personality = c->personality;
1595
1596 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1597 if (personality == PERSONALITY_INVALID) {
1598
1599 r = opinionated_personality(&personality);
1600 if (r < 0)
1601 return r;
1602 }
78e864e5
TM
1603
1604 return seccomp_lock_personality(personality);
1605}
1606
c0467cf3 1607#endif
8351ceae 1608
3042bbeb 1609static void do_idle_pipe_dance(int idle_pipe[static 4]) {
31a7eb86
ZJS
1610 assert(idle_pipe);
1611
54eb2300
LP
1612 idle_pipe[1] = safe_close(idle_pipe[1]);
1613 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1614
1615 if (idle_pipe[0] >= 0) {
1616 int r;
1617
1618 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1619
1620 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1621 ssize_t n;
1622
31a7eb86 1623 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1624 n = write(idle_pipe[3], "x", 1);
1625 if (n > 0)
cd972d69
ZJS
1626 /* Wait for systemd to react to the signal above. */
1627 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1628 }
1629
54eb2300 1630 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1631
1632 }
1633
54eb2300 1634 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1635}
1636
fb2042dd
YW
1637static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
1638
7cae38c4 1639static int build_environment(
34cf6c43 1640 const Unit *u,
9fa95f85 1641 const ExecContext *c,
1e22b5cd 1642 const ExecParameters *p,
da6053d0 1643 size_t n_fds,
7cae38c4
LP
1644 const char *home,
1645 const char *username,
1646 const char *shell,
7bce046b
LP
1647 dev_t journal_stream_dev,
1648 ino_t journal_stream_ino,
7cae38c4
LP
1649 char ***ret) {
1650
1651 _cleanup_strv_free_ char **our_env = NULL;
fb2042dd 1652 ExecDirectoryType t;
da6053d0 1653 size_t n_env = 0;
7cae38c4
LP
1654 char *x;
1655
4b58153d 1656 assert(u);
7cae38c4 1657 assert(c);
7c1cb6f1 1658 assert(p);
7cae38c4
LP
1659 assert(ret);
1660
fb2042dd 1661 our_env = new0(char*, 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4
LP
1662 if (!our_env)
1663 return -ENOMEM;
1664
1665 if (n_fds > 0) {
8dd4c05b
LP
1666 _cleanup_free_ char *joined = NULL;
1667
df0ff127 1668 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1669 return -ENOMEM;
1670 our_env[n_env++] = x;
1671
da6053d0 1672 if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
7cae38c4
LP
1673 return -ENOMEM;
1674 our_env[n_env++] = x;
8dd4c05b 1675
1e22b5cd 1676 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1677 if (!joined)
1678 return -ENOMEM;
1679
605405c6 1680 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1681 if (!x)
1682 return -ENOMEM;
1683 our_env[n_env++] = x;
7cae38c4
LP
1684 }
1685
b08af3b1 1686 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1687 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1688 return -ENOMEM;
1689 our_env[n_env++] = x;
1690
1e22b5cd 1691 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1692 return -ENOMEM;
1693 our_env[n_env++] = x;
1694 }
1695
fd63e712
LP
1696 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1697 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1698 * check the database directly. */
ac647978 1699 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1700 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1701 if (!x)
1702 return -ENOMEM;
1703 our_env[n_env++] = x;
1704 }
1705
7cae38c4
LP
1706 if (home) {
1707 x = strappend("HOME=", home);
1708 if (!x)
1709 return -ENOMEM;
7bbead1d
LP
1710
1711 path_simplify(x + 5, true);
7cae38c4
LP
1712 our_env[n_env++] = x;
1713 }
1714
1715 if (username) {
1716 x = strappend("LOGNAME=", username);
1717 if (!x)
1718 return -ENOMEM;
1719 our_env[n_env++] = x;
1720
1721 x = strappend("USER=", username);
1722 if (!x)
1723 return -ENOMEM;
1724 our_env[n_env++] = x;
1725 }
1726
1727 if (shell) {
1728 x = strappend("SHELL=", shell);
1729 if (!x)
1730 return -ENOMEM;
7bbead1d
LP
1731
1732 path_simplify(x + 6, true);
7cae38c4
LP
1733 our_env[n_env++] = x;
1734 }
1735
4b58153d
LP
1736 if (!sd_id128_is_null(u->invocation_id)) {
1737 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1738 return -ENOMEM;
1739
1740 our_env[n_env++] = x;
1741 }
1742
6af760f3
LP
1743 if (exec_context_needs_term(c)) {
1744 const char *tty_path, *term = NULL;
1745
1746 tty_path = exec_context_tty_path(c);
1747
1748 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1749 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1750 * passes to PID 1 ends up all the way in the console login shown. */
1751
1752 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1753 term = getenv("TERM");
1754 if (!term)
1755 term = default_term_for_tty(tty_path);
7cae38c4 1756
6af760f3 1757 x = strappend("TERM=", term);
7cae38c4
LP
1758 if (!x)
1759 return -ENOMEM;
1760 our_env[n_env++] = x;
1761 }
1762
7bce046b
LP
1763 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1764 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1765 return -ENOMEM;
1766
1767 our_env[n_env++] = x;
1768 }
1769
fb2042dd
YW
1770 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1771 _cleanup_free_ char *pre = NULL, *joined = NULL;
1772 const char *n;
1773
1774 if (!p->prefix[t])
1775 continue;
1776
1777 if (strv_isempty(c->directories[t].paths))
1778 continue;
1779
1780 n = exec_directory_env_name_to_string(t);
1781 if (!n)
1782 continue;
1783
1784 pre = strjoin(p->prefix[t], "/");
1785 if (!pre)
1786 return -ENOMEM;
1787
1788 joined = strv_join_prefix(c->directories[t].paths, ":", pre);
1789 if (!joined)
1790 return -ENOMEM;
1791
1792 x = strjoin(n, "=", joined);
1793 if (!x)
1794 return -ENOMEM;
1795
1796 our_env[n_env++] = x;
1797 }
1798
7cae38c4 1799 our_env[n_env++] = NULL;
fb2042dd 1800 assert(n_env <= 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4 1801
ae2a15bc 1802 *ret = TAKE_PTR(our_env);
7cae38c4
LP
1803
1804 return 0;
1805}
1806
b4c14404
FB
1807static int build_pass_environment(const ExecContext *c, char ***ret) {
1808 _cleanup_strv_free_ char **pass_env = NULL;
1809 size_t n_env = 0, n_bufsize = 0;
1810 char **i;
1811
1812 STRV_FOREACH(i, c->pass_environment) {
1813 _cleanup_free_ char *x = NULL;
1814 char *v;
1815
1816 v = getenv(*i);
1817 if (!v)
1818 continue;
605405c6 1819 x = strjoin(*i, "=", v);
b4c14404
FB
1820 if (!x)
1821 return -ENOMEM;
00819cc1 1822
b4c14404
FB
1823 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1824 return -ENOMEM;
00819cc1 1825
1cc6c93a 1826 pass_env[n_env++] = TAKE_PTR(x);
b4c14404 1827 pass_env[n_env] = NULL;
b4c14404
FB
1828 }
1829
ae2a15bc 1830 *ret = TAKE_PTR(pass_env);
b4c14404
FB
1831
1832 return 0;
1833}
1834
8b44a3d2
LP
1835static bool exec_needs_mount_namespace(
1836 const ExecContext *context,
1837 const ExecParameters *params,
4657abb5 1838 const ExecRuntime *runtime) {
8b44a3d2
LP
1839
1840 assert(context);
1841 assert(params);
1842
915e6d16
LP
1843 if (context->root_image)
1844 return true;
1845
2a624c36
AP
1846 if (!strv_isempty(context->read_write_paths) ||
1847 !strv_isempty(context->read_only_paths) ||
1848 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1849 return true;
1850
42b1d8e0 1851 if (context->n_bind_mounts > 0)
d2d6c096
LP
1852 return true;
1853
2abd4e38
YW
1854 if (context->n_temporary_filesystems > 0)
1855 return true;
1856
37ed15d7 1857 if (!IN_SET(context->mount_flags, 0, MS_SHARED))
8b44a3d2
LP
1858 return true;
1859
1860 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1861 return true;
1862
8b44a3d2 1863 if (context->private_devices ||
228af36f 1864 context->private_mounts ||
8b44a3d2 1865 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1866 context->protect_home != PROTECT_HOME_NO ||
1867 context->protect_kernel_tunables ||
c575770b 1868 context->protect_kernel_modules ||
59eeb84b 1869 context->protect_control_groups)
8b44a3d2
LP
1870 return true;
1871
37c56f89
YW
1872 if (context->root_directory) {
1873 ExecDirectoryType t;
1874
1875 if (context->mount_apivfs)
1876 return true;
1877
1878 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1879 if (!params->prefix[t])
1880 continue;
1881
1882 if (!strv_isempty(context->directories[t].paths))
1883 return true;
1884 }
1885 }
5d997827 1886
42b1d8e0 1887 if (context->dynamic_user &&
b43ee82f 1888 (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
42b1d8e0
YW
1889 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1890 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1891 return true;
1892
8b44a3d2
LP
1893 return false;
1894}
1895
d251207d
LP
1896static int setup_private_users(uid_t uid, gid_t gid) {
1897 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1898 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1899 _cleanup_close_ int unshare_ready_fd = -1;
1900 _cleanup_(sigkill_waitp) pid_t pid = 0;
1901 uint64_t c = 1;
d251207d
LP
1902 ssize_t n;
1903 int r;
1904
1905 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1906 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1907 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1908 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1909 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1910 * continues execution normally. */
1911
587ab01b
ZJS
1912 if (uid != 0 && uid_is_valid(uid)) {
1913 r = asprintf(&uid_map,
1914 "0 0 1\n" /* Map root → root */
1915 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1916 uid, uid);
1917 if (r < 0)
1918 return -ENOMEM;
1919 } else {
e0f3720e 1920 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1921 if (!uid_map)
1922 return -ENOMEM;
1923 }
d251207d 1924
587ab01b
ZJS
1925 if (gid != 0 && gid_is_valid(gid)) {
1926 r = asprintf(&gid_map,
1927 "0 0 1\n" /* Map root → root */
1928 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1929 gid, gid);
1930 if (r < 0)
1931 return -ENOMEM;
1932 } else {
d251207d 1933 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1934 if (!gid_map)
1935 return -ENOMEM;
1936 }
d251207d
LP
1937
1938 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1939 * namespace. */
1940 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1941 if (unshare_ready_fd < 0)
1942 return -errno;
1943
1944 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1945 * failed. */
1946 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1947 return -errno;
1948
4c253ed1
LP
1949 r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
1950 if (r < 0)
1951 return r;
1952 if (r == 0) {
d251207d
LP
1953 _cleanup_close_ int fd = -1;
1954 const char *a;
1955 pid_t ppid;
1956
1957 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1958 * here, after the parent opened its own user namespace. */
1959
1960 ppid = getppid();
1961 errno_pipe[0] = safe_close(errno_pipe[0]);
1962
1963 /* Wait until the parent unshared the user namespace */
1964 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1965 r = -errno;
1966 goto child_fail;
1967 }
1968
1969 /* Disable the setgroups() system call in the child user namespace, for good. */
1970 a = procfs_file_alloca(ppid, "setgroups");
1971 fd = open(a, O_WRONLY|O_CLOEXEC);
1972 if (fd < 0) {
1973 if (errno != ENOENT) {
1974 r = -errno;
1975 goto child_fail;
1976 }
1977
1978 /* If the file is missing the kernel is too old, let's continue anyway. */
1979 } else {
1980 if (write(fd, "deny\n", 5) < 0) {
1981 r = -errno;
1982 goto child_fail;
1983 }
1984
1985 fd = safe_close(fd);
1986 }
1987
1988 /* First write the GID map */
1989 a = procfs_file_alloca(ppid, "gid_map");
1990 fd = open(a, O_WRONLY|O_CLOEXEC);
1991 if (fd < 0) {
1992 r = -errno;
1993 goto child_fail;
1994 }
1995 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1996 r = -errno;
1997 goto child_fail;
1998 }
1999 fd = safe_close(fd);
2000
2001 /* The write the UID map */
2002 a = procfs_file_alloca(ppid, "uid_map");
2003 fd = open(a, O_WRONLY|O_CLOEXEC);
2004 if (fd < 0) {
2005 r = -errno;
2006 goto child_fail;
2007 }
2008 if (write(fd, uid_map, strlen(uid_map)) < 0) {
2009 r = -errno;
2010 goto child_fail;
2011 }
2012
2013 _exit(EXIT_SUCCESS);
2014
2015 child_fail:
2016 (void) write(errno_pipe[1], &r, sizeof(r));
2017 _exit(EXIT_FAILURE);
2018 }
2019
2020 errno_pipe[1] = safe_close(errno_pipe[1]);
2021
2022 if (unshare(CLONE_NEWUSER) < 0)
2023 return -errno;
2024
2025 /* Let the child know that the namespace is ready now */
2026 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
2027 return -errno;
2028
2029 /* Try to read an error code from the child */
2030 n = read(errno_pipe[0], &r, sizeof(r));
2031 if (n < 0)
2032 return -errno;
2033 if (n == sizeof(r)) { /* an error code was sent to us */
2034 if (r < 0)
2035 return r;
2036 return -EIO;
2037 }
2038 if (n != 0) /* on success we should have read 0 bytes */
2039 return -EIO;
2040
2e87a1fd
LP
2041 r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
2042 pid = 0;
d251207d
LP
2043 if (r < 0)
2044 return r;
2e87a1fd 2045 if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
d251207d
LP
2046 return -EIO;
2047
2048 return 0;
2049}
2050
3536f49e 2051static int setup_exec_directory(
07689d5d
LP
2052 const ExecContext *context,
2053 const ExecParameters *params,
2054 uid_t uid,
3536f49e 2055 gid_t gid,
3536f49e
YW
2056 ExecDirectoryType type,
2057 int *exit_status) {
07689d5d 2058
72fd1768 2059 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
2060 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
2061 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
2062 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
2063 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
2064 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
2065 };
07689d5d
LP
2066 char **rt;
2067 int r;
2068
2069 assert(context);
2070 assert(params);
72fd1768 2071 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 2072 assert(exit_status);
07689d5d 2073
3536f49e
YW
2074 if (!params->prefix[type])
2075 return 0;
2076
8679efde 2077 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
2078 if (!uid_is_valid(uid))
2079 uid = 0;
2080 if (!gid_is_valid(gid))
2081 gid = 0;
2082 }
2083
2084 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d 2085 _cleanup_free_ char *p = NULL, *pp = NULL;
07689d5d 2086
edbfeb12 2087 p = path_join(params->prefix[type], *rt);
3536f49e
YW
2088 if (!p) {
2089 r = -ENOMEM;
2090 goto fail;
2091 }
07689d5d 2092
23a7448e
YW
2093 r = mkdir_parents_label(p, 0755);
2094 if (r < 0)
3536f49e 2095 goto fail;
23a7448e 2096
8092a48c 2097 if (context->dynamic_user &&
40cd2ecc
LP
2098 (!IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) ||
2099 (type == EXEC_DIRECTORY_RUNTIME && context->runtime_directory_preserve_mode != EXEC_PRESERVE_NO))) {
6c9c51e5 2100 _cleanup_free_ char *private_root = NULL;
6c47cd7d 2101
3f5b1508
LP
2102 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that
2103 * case we want to avoid leaving a directory around fully accessible that is owned by
2104 * a dynamic user whose UID is later on reused. To lock this down we use the same
2105 * trick used by container managers to prohibit host users to get access to files of
2106 * the same UID in containers: we place everything inside a directory that has an
2107 * access mode of 0700 and is owned root:root, so that it acts as security boundary
2108 * for unprivileged host code. We then use fs namespacing to make this directory
2109 * permeable for the service itself.
6c47cd7d 2110 *
3f5b1508
LP
2111 * Specifically: for a service which wants a special directory "foo/" we first create
2112 * a directory "private/" with access mode 0700 owned by root:root. Then we place
2113 * "foo" inside of that directory (i.e. "private/foo/"), and make "foo" a symlink to
2114 * "private/foo". This way, privileged host users can access "foo/" as usual, but
2115 * unprivileged host users can't look into it. Inside of the namespace of the unit
2116 * "private/" is replaced by a more liberally accessible tmpfs, into which the host's
2117 * "private/foo/" is mounted under the same name, thus disabling the access boundary
2118 * for the service and making sure it only gets access to the dirs it needs but no
2119 * others. Tricky? Yes, absolutely, but it works!
6c47cd7d 2120 *
3f5b1508
LP
2121 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not
2122 * to be owned by the service itself.
2123 *
2124 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used
2125 * for sharing files or sockets with other services. */
6c47cd7d 2126
edbfeb12 2127 private_root = path_join(params->prefix[type], "private");
6c47cd7d
LP
2128 if (!private_root) {
2129 r = -ENOMEM;
2130 goto fail;
2131 }
2132
2133 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
37c1d5e9 2134 r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
6c47cd7d
LP
2135 if (r < 0)
2136 goto fail;
2137
edbfeb12 2138 pp = path_join(private_root, *rt);
6c47cd7d
LP
2139 if (!pp) {
2140 r = -ENOMEM;
2141 goto fail;
2142 }
2143
2144 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2145 r = mkdir_parents_label(pp, 0755);
2146 if (r < 0)
2147 goto fail;
2148
949befd3
LP
2149 if (is_dir(p, false) > 0 &&
2150 (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
2151
2152 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2153 * it over. Most likely the service has been upgraded from one that didn't use
2154 * DynamicUser=1, to one that does. */
2155
cf52c45d
LP
2156 log_info("Found pre-existing public %s= directory %s, migrating to %s.\n"
2157 "Apparently, service previously had DynamicUser= turned off, and has now turned it on.",
2158 exec_directory_type_to_string(type), p, pp);
2159
949befd3
LP
2160 if (rename(p, pp) < 0) {
2161 r = -errno;
2162 goto fail;
2163 }
2164 } else {
2165 /* Otherwise, create the actual directory for the service */
2166
2167 r = mkdir_label(pp, context->directories[type].mode);
2168 if (r < 0 && r != -EEXIST)
2169 goto fail;
2170 }
6c47cd7d 2171
6c47cd7d 2172 /* And link it up from the original place */
6c9c51e5 2173 r = symlink_idempotent(pp, p, true);
6c47cd7d
LP
2174 if (r < 0)
2175 goto fail;
2176
6c47cd7d 2177 } else {
5c6d40d1
LP
2178 _cleanup_free_ char *target = NULL;
2179
2180 if (type != EXEC_DIRECTORY_CONFIGURATION &&
2181 readlink_and_make_absolute(p, &target) >= 0) {
2182 _cleanup_free_ char *q = NULL;
2183
2184 /* This already exists and is a symlink? Interesting. Maybe it's one created
2193f17c
LP
2185 * by DynamicUser=1 (see above)?
2186 *
2187 * We do this for all directory types except for ConfigurationDirectory=,
2188 * since they all support the private/ symlink logic at least in some
2189 * configurations, see above. */
5c6d40d1
LP
2190
2191 q = path_join(params->prefix[type], "private", *rt);
2192 if (!q) {
2193 r = -ENOMEM;
2194 goto fail;
2195 }
2196
2197 if (path_equal(q, target)) {
2198
2199 /* Hmm, apparently DynamicUser= was once turned on for this service,
2200 * but is no longer. Let's move the directory back up. */
2201
cf52c45d
LP
2202 log_info("Found pre-existing private %s= directory %s, migrating to %s.\n"
2203 "Apparently, service previously had DynamicUser= turned on, and has now turned it off.",
2204 exec_directory_type_to_string(type), q, p);
2205
5c6d40d1
LP
2206 if (unlink(p) < 0) {
2207 r = -errno;
2208 goto fail;
2209 }
2210
2211 if (rename(q, p) < 0) {
2212 r = -errno;
2213 goto fail;
2214 }
2215 }
2216 }
2217
6c47cd7d 2218 r = mkdir_label(p, context->directories[type].mode);
d484580c 2219 if (r < 0) {
d484580c
LP
2220 if (r != -EEXIST)
2221 goto fail;
2222
206e9864
LP
2223 if (type == EXEC_DIRECTORY_CONFIGURATION) {
2224 struct stat st;
2225
2226 /* Don't change the owner/access mode of the configuration directory,
2227 * as in the common case it is not written to by a service, and shall
2228 * not be writable. */
2229
2230 if (stat(p, &st) < 0) {
2231 r = -errno;
2232 goto fail;
2233 }
2234
2235 /* Still complain if the access mode doesn't match */
2236 if (((st.st_mode ^ context->directories[type].mode) & 07777) != 0)
2237 log_warning("%s \'%s\' already exists but the mode is different. "
2238 "(File system: %o %sMode: %o)",
2239 exec_directory_type_to_string(type), *rt,
2240 st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
2241
6cff72eb 2242 continue;
206e9864 2243 }
6cff72eb 2244 }
a1164ae3 2245 }
07689d5d 2246
206e9864 2247 /* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
5238e957 2248 * specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
206e9864
LP
2249 * current UID/GID ownership.) */
2250 r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
2251 if (r < 0)
2252 goto fail;
c71b2eb7 2253
607b358e
LP
2254 /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
2255 * drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
2256 * assignments to exist.*/
2257 r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777);
07689d5d 2258 if (r < 0)
3536f49e 2259 goto fail;
07689d5d
LP
2260 }
2261
2262 return 0;
3536f49e
YW
2263
2264fail:
2265 *exit_status = exit_status_table[type];
3536f49e 2266 return r;
07689d5d
LP
2267}
2268
92b423b9 2269#if ENABLE_SMACK
cefc33ae
LP
2270static int setup_smack(
2271 const ExecContext *context,
2272 const ExecCommand *command) {
2273
cefc33ae
LP
2274 int r;
2275
2276 assert(context);
2277 assert(command);
2278
cefc33ae
LP
2279 if (context->smack_process_label) {
2280 r = mac_smack_apply_pid(0, context->smack_process_label);
2281 if (r < 0)
2282 return r;
2283 }
2284#ifdef SMACK_DEFAULT_PROCESS_LABEL
2285 else {
2286 _cleanup_free_ char *exec_label = NULL;
2287
2288 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2289 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2290 return r;
2291
2292 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2293 if (r < 0)
2294 return r;
2295 }
cefc33ae
LP
2296#endif
2297
2298 return 0;
2299}
92b423b9 2300#endif
cefc33ae 2301
6c47cd7d
LP
2302static int compile_bind_mounts(
2303 const ExecContext *context,
2304 const ExecParameters *params,
2305 BindMount **ret_bind_mounts,
da6053d0 2306 size_t *ret_n_bind_mounts,
6c47cd7d
LP
2307 char ***ret_empty_directories) {
2308
2309 _cleanup_strv_free_ char **empty_directories = NULL;
2310 BindMount *bind_mounts;
da6053d0 2311 size_t n, h = 0, i;
6c47cd7d
LP
2312 ExecDirectoryType t;
2313 int r;
2314
2315 assert(context);
2316 assert(params);
2317 assert(ret_bind_mounts);
2318 assert(ret_n_bind_mounts);
2319 assert(ret_empty_directories);
2320
2321 n = context->n_bind_mounts;
2322 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2323 if (!params->prefix[t])
2324 continue;
2325
2326 n += strv_length(context->directories[t].paths);
2327 }
2328
2329 if (n <= 0) {
2330 *ret_bind_mounts = NULL;
2331 *ret_n_bind_mounts = 0;
2332 *ret_empty_directories = NULL;
2333 return 0;
2334 }
2335
2336 bind_mounts = new(BindMount, n);
2337 if (!bind_mounts)
2338 return -ENOMEM;
2339
a8cabc61 2340 for (i = 0; i < context->n_bind_mounts; i++) {
6c47cd7d
LP
2341 BindMount *item = context->bind_mounts + i;
2342 char *s, *d;
2343
2344 s = strdup(item->source);
2345 if (!s) {
2346 r = -ENOMEM;
2347 goto finish;
2348 }
2349
2350 d = strdup(item->destination);
2351 if (!d) {
2352 free(s);
2353 r = -ENOMEM;
2354 goto finish;
2355 }
2356
2357 bind_mounts[h++] = (BindMount) {
2358 .source = s,
2359 .destination = d,
2360 .read_only = item->read_only,
2361 .recursive = item->recursive,
2362 .ignore_enoent = item->ignore_enoent,
2363 };
2364 }
2365
2366 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2367 char **suffix;
2368
2369 if (!params->prefix[t])
2370 continue;
2371
2372 if (strv_isempty(context->directories[t].paths))
2373 continue;
2374
8092a48c 2375 if (context->dynamic_user &&
5609f688
YW
2376 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2377 !(context->root_directory || context->root_image)) {
6c47cd7d
LP
2378 char *private_root;
2379
2380 /* So this is for a dynamic user, and we need to make sure the process can access its own
2381 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2382 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2383
657ee2d8 2384 private_root = path_join(params->prefix[t], "private");
6c47cd7d
LP
2385 if (!private_root) {
2386 r = -ENOMEM;
2387 goto finish;
2388 }
2389
2390 r = strv_consume(&empty_directories, private_root);
a635a7ae 2391 if (r < 0)
6c47cd7d 2392 goto finish;
6c47cd7d
LP
2393 }
2394
2395 STRV_FOREACH(suffix, context->directories[t].paths) {
2396 char *s, *d;
2397
8092a48c
YW
2398 if (context->dynamic_user &&
2399 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
657ee2d8 2400 s = path_join(params->prefix[t], "private", *suffix);
6c47cd7d 2401 else
657ee2d8 2402 s = path_join(params->prefix[t], *suffix);
6c47cd7d
LP
2403 if (!s) {
2404 r = -ENOMEM;
2405 goto finish;
2406 }
2407
5609f688
YW
2408 if (context->dynamic_user &&
2409 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2410 (context->root_directory || context->root_image))
2411 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2412 * directory is not created on the root directory. So, let's bind-mount the directory
2413 * on the 'non-private' place. */
657ee2d8 2414 d = path_join(params->prefix[t], *suffix);
5609f688
YW
2415 else
2416 d = strdup(s);
6c47cd7d
LP
2417 if (!d) {
2418 free(s);
2419 r = -ENOMEM;
2420 goto finish;
2421 }
2422
2423 bind_mounts[h++] = (BindMount) {
2424 .source = s,
2425 .destination = d,
2426 .read_only = false,
9ce4e4b0 2427 .nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
6c47cd7d
LP
2428 .recursive = true,
2429 .ignore_enoent = false,
2430 };
2431 }
2432 }
2433
2434 assert(h == n);
2435
2436 *ret_bind_mounts = bind_mounts;
2437 *ret_n_bind_mounts = n;
ae2a15bc 2438 *ret_empty_directories = TAKE_PTR(empty_directories);
6c47cd7d
LP
2439
2440 return (int) n;
2441
2442finish:
2443 bind_mount_free_many(bind_mounts, h);
2444 return r;
2445}
2446
6818c54c 2447static int apply_mount_namespace(
34cf6c43
YW
2448 const Unit *u,
2449 const ExecCommand *command,
6818c54c
LP
2450 const ExecContext *context,
2451 const ExecParameters *params,
7cc5ef5f
ZJS
2452 const ExecRuntime *runtime,
2453 char **error_path) {
6818c54c 2454
7bcef4ef 2455 _cleanup_strv_free_ char **empty_directories = NULL;
93c6bb51 2456 char *tmp = NULL, *var = NULL;
915e6d16 2457 const char *root_dir = NULL, *root_image = NULL;
228af36f 2458 NamespaceInfo ns_info;
165a31c0 2459 bool needs_sandboxing;
6c47cd7d 2460 BindMount *bind_mounts = NULL;
da6053d0 2461 size_t n_bind_mounts = 0;
6818c54c 2462 int r;
93c6bb51 2463
2b3c1b9e
DH
2464 assert(context);
2465
93c6bb51
DH
2466 /* The runtime struct only contains the parent of the private /tmp,
2467 * which is non-accessible to world users. Inside of it there's a /tmp
2468 * that is sticky, and that's the one we want to use here. */
2469
2470 if (context->private_tmp && runtime) {
2471 if (runtime->tmp_dir)
2472 tmp = strjoina(runtime->tmp_dir, "/tmp");
2473 if (runtime->var_tmp_dir)
2474 var = strjoina(runtime->var_tmp_dir, "/tmp");
2475 }
2476
915e6d16
LP
2477 if (params->flags & EXEC_APPLY_CHROOT) {
2478 root_image = context->root_image;
2479
2480 if (!root_image)
2481 root_dir = context->root_directory;
2482 }
93c6bb51 2483
6c47cd7d
LP
2484 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2485 if (r < 0)
2486 return r;
2487
165a31c0 2488 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
b5a33299
YW
2489 if (needs_sandboxing)
2490 ns_info = (NamespaceInfo) {
2491 .ignore_protect_paths = false,
2492 .private_dev = context->private_devices,
2493 .protect_control_groups = context->protect_control_groups,
2494 .protect_kernel_tunables = context->protect_kernel_tunables,
2495 .protect_kernel_modules = context->protect_kernel_modules,
aecd5ac6 2496 .protect_hostname = context->protect_hostname,
b5a33299 2497 .mount_apivfs = context->mount_apivfs,
228af36f 2498 .private_mounts = context->private_mounts,
b5a33299 2499 };
228af36f
LP
2500 else if (!context->dynamic_user && root_dir)
2501 /*
2502 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2503 * sandbox info, otherwise enforce it, don't ignore protected paths and
2504 * fail if we are enable to apply the sandbox inside the mount namespace.
2505 */
2506 ns_info = (NamespaceInfo) {
2507 .ignore_protect_paths = true,
2508 };
2509 else
2510 ns_info = (NamespaceInfo) {};
b5a33299 2511
37ed15d7
FB
2512 if (context->mount_flags == MS_SHARED)
2513 log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
2514
915e6d16 2515 r = setup_namespace(root_dir, root_image,
7bcef4ef 2516 &ns_info, context->read_write_paths,
165a31c0
LP
2517 needs_sandboxing ? context->read_only_paths : NULL,
2518 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2519 empty_directories,
2520 bind_mounts,
2521 n_bind_mounts,
2abd4e38
YW
2522 context->temporary_filesystems,
2523 context->n_temporary_filesystems,
93c6bb51
DH
2524 tmp,
2525 var,
165a31c0
LP
2526 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2527 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16 2528 context->mount_flags,
7cc5ef5f
ZJS
2529 DISSECT_IMAGE_DISCARD_ON_LOOP,
2530 error_path);
93c6bb51 2531
6c47cd7d
LP
2532 bind_mount_free_many(bind_mounts, n_bind_mounts);
2533
1beab8b0 2534 /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
5238e957 2535 * that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively
1beab8b0
LP
2536 * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
2537 * completely different execution environment. */
aca835ed
YW
2538 if (r == -ENOANO) {
2539 if (n_bind_mounts == 0 &&
2540 context->n_temporary_filesystems == 0 &&
2541 !root_dir && !root_image &&
2542 !context->dynamic_user) {
2543 log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
2544 return 0;
2545 }
2546
2194547e
LP
2547 log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
2548 "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
2549 n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
2550
aca835ed 2551 return -EOPNOTSUPP;
93c6bb51
DH
2552 }
2553
2554 return r;
2555}
2556
915e6d16
LP
2557static int apply_working_directory(
2558 const ExecContext *context,
2559 const ExecParameters *params,
2560 const char *home,
376fecf6 2561 int *exit_status) {
915e6d16 2562
6732edab 2563 const char *d, *wd;
2b3c1b9e
DH
2564
2565 assert(context);
376fecf6 2566 assert(exit_status);
2b3c1b9e 2567
6732edab
LP
2568 if (context->working_directory_home) {
2569
376fecf6
LP
2570 if (!home) {
2571 *exit_status = EXIT_CHDIR;
6732edab 2572 return -ENXIO;
376fecf6 2573 }
6732edab 2574
2b3c1b9e 2575 wd = home;
6732edab
LP
2576
2577 } else if (context->working_directory)
2b3c1b9e
DH
2578 wd = context->working_directory;
2579 else
2580 wd = "/";
e7f1e7c6 2581
fa97f630 2582 if (params->flags & EXEC_APPLY_CHROOT)
2b3c1b9e 2583 d = wd;
fa97f630 2584 else
3b0e5bb5 2585 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2586
376fecf6
LP
2587 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2588 *exit_status = EXIT_CHDIR;
2b3c1b9e 2589 return -errno;
376fecf6 2590 }
e7f1e7c6
DH
2591
2592 return 0;
2593}
2594
fa97f630
JB
2595static int apply_root_directory(
2596 const ExecContext *context,
2597 const ExecParameters *params,
2598 const bool needs_mount_ns,
2599 int *exit_status) {
2600
2601 assert(context);
2602 assert(exit_status);
2603
2604 if (params->flags & EXEC_APPLY_CHROOT) {
2605 if (!needs_mount_ns && context->root_directory)
2606 if (chroot(context->root_directory) < 0) {
2607 *exit_status = EXIT_CHROOT;
2608 return -errno;
2609 }
2610 }
2611
2612 return 0;
2613}
2614
b1edf445 2615static int setup_keyring(
34cf6c43 2616 const Unit *u,
b1edf445
LP
2617 const ExecContext *context,
2618 const ExecParameters *p,
2619 uid_t uid, gid_t gid) {
2620
74dd6b51 2621 key_serial_t keyring;
e64c2d0b
DJL
2622 int r = 0;
2623 uid_t saved_uid;
2624 gid_t saved_gid;
74dd6b51
LP
2625
2626 assert(u);
b1edf445 2627 assert(context);
74dd6b51
LP
2628 assert(p);
2629
2630 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2631 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2632 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2633 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2634 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2635 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2636
b1edf445
LP
2637 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2638 return 0;
2639
e64c2d0b
DJL
2640 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2641 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2642 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2643 * & group is just as nasty as acquiring a reference to the user keyring. */
2644
2645 saved_uid = getuid();
2646 saved_gid = getgid();
2647
2648 if (gid_is_valid(gid) && gid != saved_gid) {
2649 if (setregid(gid, -1) < 0)
2650 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2651 }
2652
2653 if (uid_is_valid(uid) && uid != saved_uid) {
2654 if (setreuid(uid, -1) < 0) {
2655 r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2656 goto out;
2657 }
2658 }
2659
74dd6b51
LP
2660 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2661 if (keyring == -1) {
2662 if (errno == ENOSYS)
8002fb97 2663 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2664 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2665 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2666 else if (errno == EDQUOT)
8002fb97 2667 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2668 else
e64c2d0b 2669 r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51 2670
e64c2d0b 2671 goto out;
74dd6b51
LP
2672 }
2673
e64c2d0b
DJL
2674 /* When requested link the user keyring into the session keyring. */
2675 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2676
2677 if (keyctl(KEYCTL_LINK,
2678 KEY_SPEC_USER_KEYRING,
2679 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2680 r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
2681 goto out;
2682 }
2683 }
2684
2685 /* Restore uid/gid back */
2686 if (uid_is_valid(uid) && uid != saved_uid) {
2687 if (setreuid(saved_uid, -1) < 0) {
2688 r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2689 goto out;
2690 }
2691 }
2692
2693 if (gid_is_valid(gid) && gid != saved_gid) {
2694 if (setregid(saved_gid, -1) < 0)
2695 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2696 }
2697
2698 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
b3415f5d
LP
2699 if (!sd_id128_is_null(u->invocation_id)) {
2700 key_serial_t key;
2701
2702 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2703 if (key == -1)
8002fb97 2704 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2705 else {
2706 if (keyctl(KEYCTL_SETPERM, key,
2707 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2708 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
e64c2d0b 2709 r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2710 }
2711 }
2712
e64c2d0b
DJL
2713out:
2714 /* Revert back uid & gid for the the last time, and exit */
2715 /* no extra logging, as only the first already reported error matters */
2716 if (getuid() != saved_uid)
2717 (void) setreuid(saved_uid, -1);
b1edf445 2718
e64c2d0b
DJL
2719 if (getgid() != saved_gid)
2720 (void) setregid(saved_gid, -1);
b1edf445 2721
e64c2d0b 2722 return r;
74dd6b51
LP
2723}
2724
3042bbeb 2725static void append_socket_pair(int *array, size_t *n, const int pair[static 2]) {
29206d46
LP
2726 assert(array);
2727 assert(n);
2728
2729 if (!pair)
2730 return;
2731
2732 if (pair[0] >= 0)
2733 array[(*n)++] = pair[0];
2734 if (pair[1] >= 0)
2735 array[(*n)++] = pair[1];
2736}
2737
a34ceba6
LP
2738static int close_remaining_fds(
2739 const ExecParameters *params,
34cf6c43
YW
2740 const ExecRuntime *runtime,
2741 const DynamicCreds *dcreds,
00d9ef85 2742 int user_lookup_fd,
a34ceba6 2743 int socket_fd,
5686391b 2744 int exec_fd,
da6053d0 2745 int *fds, size_t n_fds) {
a34ceba6 2746
da6053d0 2747 size_t n_dont_close = 0;
00d9ef85 2748 int dont_close[n_fds + 12];
a34ceba6
LP
2749
2750 assert(params);
2751
2752 if (params->stdin_fd >= 0)
2753 dont_close[n_dont_close++] = params->stdin_fd;
2754 if (params->stdout_fd >= 0)
2755 dont_close[n_dont_close++] = params->stdout_fd;
2756 if (params->stderr_fd >= 0)
2757 dont_close[n_dont_close++] = params->stderr_fd;
2758
2759 if (socket_fd >= 0)
2760 dont_close[n_dont_close++] = socket_fd;
5686391b
LP
2761 if (exec_fd >= 0)
2762 dont_close[n_dont_close++] = exec_fd;
a34ceba6
LP
2763 if (n_fds > 0) {
2764 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2765 n_dont_close += n_fds;
2766 }
2767
29206d46
LP
2768 if (runtime)
2769 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2770
2771 if (dcreds) {
2772 if (dcreds->user)
2773 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2774 if (dcreds->group)
2775 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2776 }
2777
00d9ef85
LP
2778 if (user_lookup_fd >= 0)
2779 dont_close[n_dont_close++] = user_lookup_fd;
2780
a34ceba6
LP
2781 return close_all_fds(dont_close, n_dont_close);
2782}
2783
00d9ef85
LP
2784static int send_user_lookup(
2785 Unit *unit,
2786 int user_lookup_fd,
2787 uid_t uid,
2788 gid_t gid) {
2789
2790 assert(unit);
2791
2792 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2793 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2794 * specified. */
2795
2796 if (user_lookup_fd < 0)
2797 return 0;
2798
2799 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2800 return 0;
2801
2802 if (writev(user_lookup_fd,
2803 (struct iovec[]) {
e6a7ec4b
LP
2804 IOVEC_INIT(&uid, sizeof(uid)),
2805 IOVEC_INIT(&gid, sizeof(gid)),
2806 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2807 return -errno;
2808
2809 return 0;
2810}
2811
6732edab
LP
2812static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2813 int r;
2814
2815 assert(c);
2816 assert(home);
2817 assert(buf);
2818
2819 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2820
2821 if (*home)
2822 return 0;
2823
2824 if (!c->working_directory_home)
2825 return 0;
2826
6732edab
LP
2827 r = get_home_dir(buf);
2828 if (r < 0)
2829 return r;
2830
2831 *home = *buf;
2832 return 1;
2833}
2834
da50b85a
LP
2835static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2836 _cleanup_strv_free_ char ** list = NULL;
2837 ExecDirectoryType t;
2838 int r;
2839
2840 assert(c);
2841 assert(p);
2842 assert(ret);
2843
2844 assert(c->dynamic_user);
2845
2846 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2847 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2848 * directories. */
2849
2850 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2851 char **i;
2852
2853 if (t == EXEC_DIRECTORY_CONFIGURATION)
2854 continue;
2855
2856 if (!p->prefix[t])
2857 continue;
2858
2859 STRV_FOREACH(i, c->directories[t].paths) {
2860 char *e;
2861
8092a48c 2862 if (t == EXEC_DIRECTORY_RUNTIME)
657ee2d8 2863 e = path_join(p->prefix[t], *i);
8092a48c 2864 else
657ee2d8 2865 e = path_join(p->prefix[t], "private", *i);
da50b85a
LP
2866 if (!e)
2867 return -ENOMEM;
2868
2869 r = strv_consume(&list, e);
2870 if (r < 0)
2871 return r;
2872 }
2873 }
2874
ae2a15bc 2875 *ret = TAKE_PTR(list);
da50b85a
LP
2876
2877 return 0;
2878}
2879
34cf6c43
YW
2880static char *exec_command_line(char **argv);
2881
78f93209
LP
2882static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
2883 bool using_subcgroup;
2884 char *p;
2885
2886 assert(params);
2887 assert(ret);
2888
2889 if (!params->cgroup_path)
2890 return -EINVAL;
2891
2892 /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
2893 * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
2894 * processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
2895 * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
2896 * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
2897 * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
2898 * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
2899 * flag, which is only passed for the former statements, not for the latter. */
2900
2901 using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
2902 if (using_subcgroup)
657ee2d8 2903 p = path_join(params->cgroup_path, ".control");
78f93209
LP
2904 else
2905 p = strdup(params->cgroup_path);
2906 if (!p)
2907 return -ENOMEM;
2908
2909 *ret = p;
2910 return using_subcgroup;
2911}
2912
ff0af2a1 2913static int exec_child(
f2341e0a 2914 Unit *unit,
34cf6c43 2915 const ExecCommand *command,
ff0af2a1
LP
2916 const ExecContext *context,
2917 const ExecParameters *params,
2918 ExecRuntime *runtime,
29206d46 2919 DynamicCreds *dcreds,
ff0af2a1 2920 int socket_fd,
52c239d7 2921 int named_iofds[3],
4c47affc 2922 int *fds,
da6053d0 2923 size_t n_socket_fds,
25b583d7 2924 size_t n_storage_fds,
ff0af2a1 2925 char **files_env,
00d9ef85 2926 int user_lookup_fd,
12145637 2927 int *exit_status) {
d35fbf6b 2928
7ca69792 2929 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **replaced_argv = NULL;
5686391b 2930 int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
4d885bd3
DH
2931 _cleanup_free_ gid_t *supplementary_gids = NULL;
2932 const char *username = NULL, *groupname = NULL;
5686391b 2933 _cleanup_free_ char *home_buffer = NULL;
2b3c1b9e 2934 const char *home = NULL, *shell = NULL;
7ca69792 2935 char **final_argv = NULL;
7bce046b
LP
2936 dev_t journal_stream_dev = 0;
2937 ino_t journal_stream_ino = 0;
165a31c0
LP
2938 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2939 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2940 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2941 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 2942#if HAVE_SELINUX
7f59dd35 2943 _cleanup_free_ char *mac_selinux_context_net = NULL;
43b1f709 2944 bool use_selinux = false;
ecfbc84f 2945#endif
f9fa32f0 2946#if ENABLE_SMACK
43b1f709 2947 bool use_smack = false;
ecfbc84f 2948#endif
349cc4a5 2949#if HAVE_APPARMOR
43b1f709 2950 bool use_apparmor = false;
ecfbc84f 2951#endif
fed1e721
LP
2952 uid_t uid = UID_INVALID;
2953 gid_t gid = GID_INVALID;
da6053d0 2954 size_t n_fds;
3536f49e 2955 ExecDirectoryType dt;
165a31c0 2956 int secure_bits;
034c6ed7 2957
f2341e0a 2958 assert(unit);
5cb5a6ff
LP
2959 assert(command);
2960 assert(context);
d35fbf6b 2961 assert(params);
ff0af2a1 2962 assert(exit_status);
d35fbf6b
DM
2963
2964 rename_process_from_path(command->path);
2965
2966 /* We reset exactly these signals, since they are the
2967 * only ones we set to SIG_IGN in the main daemon. All
2968 * others we leave untouched because we set them to
2969 * SIG_DFL or a valid handler initially, both of which
2970 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2971 (void) default_signals(SIGNALS_CRASH_HANDLER,
2972 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2973
2974 if (context->ignore_sigpipe)
ce30c8dc 2975 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2976
ff0af2a1
LP
2977 r = reset_signal_mask();
2978 if (r < 0) {
2979 *exit_status = EXIT_SIGNAL_MASK;
12145637 2980 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 2981 }
034c6ed7 2982
d35fbf6b
DM
2983 if (params->idle_pipe)
2984 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2985
2c027c62
LP
2986 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2987 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2988 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2989 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 2990
d35fbf6b 2991 log_forget_fds();
2c027c62 2992 log_set_open_when_needed(true);
4f2d528d 2993
40a80078
LP
2994 /* In case anything used libc syslog(), close this here, too */
2995 closelog();
2996
5686391b
LP
2997 n_fds = n_socket_fds + n_storage_fds;
2998 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
ff0af2a1
LP
2999 if (r < 0) {
3000 *exit_status = EXIT_FDS;
12145637 3001 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
3002 }
3003
d35fbf6b
DM
3004 if (!context->same_pgrp)
3005 if (setsid() < 0) {
ff0af2a1 3006 *exit_status = EXIT_SETSID;
12145637 3007 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 3008 }
9e2f7c11 3009
1e22b5cd 3010 exec_context_tty_reset(context, params);
d35fbf6b 3011
c891efaf 3012 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 3013 const char *vc = params->confirm_spawn;
3b20f877
FB
3014 _cleanup_free_ char *cmdline = NULL;
3015
ee39ca20 3016 cmdline = exec_command_line(command->argv);
3b20f877 3017 if (!cmdline) {
0460aa5c 3018 *exit_status = EXIT_MEMORY;
12145637 3019 return log_oom();
3b20f877 3020 }
d35fbf6b 3021
eedf223a 3022 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
3023 if (r != CONFIRM_EXECUTE) {
3024 if (r == CONFIRM_PRETEND_SUCCESS) {
3025 *exit_status = EXIT_SUCCESS;
3026 return 0;
3027 }
ff0af2a1 3028 *exit_status = EXIT_CONFIRM;
12145637 3029 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 3030 return -ECANCELED;
d35fbf6b
DM
3031 }
3032 }
1a63a750 3033
d521916d
LP
3034 /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
3035 * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
3036 * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
3037 * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
3038 * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
3039 if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
3040 setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
3041 *exit_status = EXIT_MEMORY;
3042 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
3043 }
3044
29206d46 3045 if (context->dynamic_user && dcreds) {
da50b85a 3046 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 3047
d521916d
LP
3048 /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
3049 * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
409093fe
LP
3050 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
3051 *exit_status = EXIT_USER;
12145637 3052 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
3053 }
3054
da50b85a
LP
3055 r = compile_suggested_paths(context, params, &suggested_paths);
3056 if (r < 0) {
3057 *exit_status = EXIT_MEMORY;
3058 return log_oom();
3059 }
3060
3061 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
3062 if (r < 0) {
3063 *exit_status = EXIT_USER;
e2b0cc34
YW
3064 if (r == -EILSEQ) {
3065 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
3066 return -EOPNOTSUPP;
3067 }
12145637 3068 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 3069 }
524daa8c 3070
70dd455c 3071 if (!uid_is_valid(uid)) {
29206d46 3072 *exit_status = EXIT_USER;
12145637 3073 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
3074 return -ESRCH;
3075 }
3076
3077 if (!gid_is_valid(gid)) {
3078 *exit_status = EXIT_USER;
12145637 3079 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
3080 return -ESRCH;
3081 }
5bc7452b 3082
29206d46
LP
3083 if (dcreds->user)
3084 username = dcreds->user->name;
3085
3086 } else {
4d885bd3
DH
3087 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
3088 if (r < 0) {
3089 *exit_status = EXIT_USER;
12145637 3090 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 3091 }
5bc7452b 3092
4d885bd3
DH
3093 r = get_fixed_group(context, &groupname, &gid);
3094 if (r < 0) {
3095 *exit_status = EXIT_GROUP;
12145637 3096 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 3097 }
cdc5d5c5 3098 }
29206d46 3099
cdc5d5c5
DH
3100 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
3101 r = get_supplementary_groups(context, username, groupname, gid,
3102 &supplementary_gids, &ngids);
3103 if (r < 0) {
3104 *exit_status = EXIT_GROUP;
12145637 3105 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 3106 }
5bc7452b 3107
00d9ef85
LP
3108 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
3109 if (r < 0) {
3110 *exit_status = EXIT_USER;
12145637 3111 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
3112 }
3113
3114 user_lookup_fd = safe_close(user_lookup_fd);
3115
6732edab
LP
3116 r = acquire_home(context, uid, &home, &home_buffer);
3117 if (r < 0) {
3118 *exit_status = EXIT_CHDIR;
12145637 3119 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
3120 }
3121
d35fbf6b
DM
3122 /* If a socket is connected to STDIN/STDOUT/STDERR, we
3123 * must sure to drop O_NONBLOCK */
3124 if (socket_fd >= 0)
a34ceba6 3125 (void) fd_nonblock(socket_fd, false);
acbb0225 3126
4c70a4a7
MS
3127 /* Journald will try to look-up our cgroup in order to populate _SYSTEMD_CGROUP and _SYSTEMD_UNIT fields.
3128 * Hence we need to migrate to the target cgroup from init.scope before connecting to journald */
3129 if (params->cgroup_path) {
3130 _cleanup_free_ char *p = NULL;
3131
3132 r = exec_parameters_get_cgroup_path(params, &p);
3133 if (r < 0) {
3134 *exit_status = EXIT_CGROUP;
3135 return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
3136 }
3137
3138 r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
3139 if (r < 0) {
3140 *exit_status = EXIT_CGROUP;
3141 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
3142 }
3143 }
3144
a8d08f39
LP
3145 if (context->network_namespace_path && runtime && runtime->netns_storage_socket[0] >= 0) {
3146 r = open_netns_path(runtime->netns_storage_socket, context->network_namespace_path);
3147 if (r < 0) {
3148 *exit_status = EXIT_NETWORK;
3149 return log_unit_error_errno(unit, r, "Failed to open network namespace path %s: %m", context->network_namespace_path);
3150 }
3151 }
3152
52c239d7 3153 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
3154 if (r < 0) {
3155 *exit_status = EXIT_STDIN;
12145637 3156 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 3157 }
034c6ed7 3158
52c239d7 3159 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3160 if (r < 0) {
3161 *exit_status = EXIT_STDOUT;
12145637 3162 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
3163 }
3164
52c239d7 3165 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3166 if (r < 0) {
3167 *exit_status = EXIT_STDERR;
12145637 3168 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
3169 }
3170
d35fbf6b 3171 if (context->oom_score_adjust_set) {
9f8168eb
LP
3172 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
3173 * prohibit write access to this file, and we shouldn't trip up over that. */
3174 r = set_oom_score_adjust(context->oom_score_adjust);
12145637 3175 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 3176 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 3177 else if (r < 0) {
ff0af2a1 3178 *exit_status = EXIT_OOM_ADJUST;
12145637 3179 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 3180 }
d35fbf6b
DM
3181 }
3182
3183 if (context->nice_set)
3184 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 3185 *exit_status = EXIT_NICE;
12145637 3186 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
613b411c
LP
3187 }
3188
d35fbf6b
DM
3189 if (context->cpu_sched_set) {
3190 struct sched_param param = {
3191 .sched_priority = context->cpu_sched_priority,
3192 };
3193
ff0af2a1
LP
3194 r = sched_setscheduler(0,
3195 context->cpu_sched_policy |
3196 (context->cpu_sched_reset_on_fork ?
3197 SCHED_RESET_ON_FORK : 0),
3198 &param);
3199 if (r < 0) {
3200 *exit_status = EXIT_SETSCHEDULER;
12145637 3201 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 3202 }
d35fbf6b 3203 }
fc9b2a84 3204
0985c7c4
ZJS
3205 if (context->cpu_set.set)
3206 if (sched_setaffinity(0, context->cpu_set.allocated, context->cpu_set.set) < 0) {
ff0af2a1 3207 *exit_status = EXIT_CPUAFFINITY;
12145637 3208 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7
LP
3209 }
3210
b070c7c0
MS
3211 if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
3212 r = apply_numa_policy(&context->numa_policy);
3213 if (r == -EOPNOTSUPP)
33fe9e3f 3214 log_unit_debug_errno(unit, r, "NUMA support not available, ignoring.");
b070c7c0
MS
3215 else if (r < 0) {
3216 *exit_status = EXIT_NUMA_POLICY;
3217 return log_unit_error_errno(unit, r, "Failed to set NUMA memory policy: %m");
3218 }
3219 }
3220
d35fbf6b
DM
3221 if (context->ioprio_set)
3222 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 3223 *exit_status = EXIT_IOPRIO;
12145637 3224 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 3225 }
da726a4d 3226
d35fbf6b
DM
3227 if (context->timer_slack_nsec != NSEC_INFINITY)
3228 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 3229 *exit_status = EXIT_TIMERSLACK;
12145637 3230 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 3231 }
9eba9da4 3232
21022b9d
LP
3233 if (context->personality != PERSONALITY_INVALID) {
3234 r = safe_personality(context->personality);
3235 if (r < 0) {
ff0af2a1 3236 *exit_status = EXIT_PERSONALITY;
12145637 3237 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 3238 }
21022b9d 3239 }
94f04347 3240
d35fbf6b 3241 if (context->utmp_id)
df0ff127 3242 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 3243 context->tty_path,
023a4f67
LP
3244 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3245 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3246 USER_PROCESS,
6a93917d 3247 username);
d35fbf6b 3248
08f67696 3249 if (uid_is_valid(uid)) {
ff0af2a1
LP
3250 r = chown_terminal(STDIN_FILENO, uid);
3251 if (r < 0) {
3252 *exit_status = EXIT_STDIN;
12145637 3253 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 3254 }
d35fbf6b 3255 }
8e274523 3256
4e1dfa45 3257 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
62b9bb26 3258 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
4e1dfa45 3259 * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
62b9bb26 3260 * touch a single hierarchy too. */
584b8688 3261 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
62b9bb26 3262 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
ff0af2a1
LP
3263 if (r < 0) {
3264 *exit_status = EXIT_CGROUP;
12145637 3265 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 3266 }
d35fbf6b 3267 }
034c6ed7 3268
72fd1768 3269 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 3270 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
3271 if (r < 0)
3272 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 3273 }
94f04347 3274
7bce046b 3275 r = build_environment(
fd63e712 3276 unit,
7bce046b
LP
3277 context,
3278 params,
3279 n_fds,
3280 home,
3281 username,
3282 shell,
3283 journal_stream_dev,
3284 journal_stream_ino,
3285 &our_env);
2065ca69
JW
3286 if (r < 0) {
3287 *exit_status = EXIT_MEMORY;
12145637 3288 return log_oom();
2065ca69
JW
3289 }
3290
3291 r = build_pass_environment(context, &pass_env);
3292 if (r < 0) {
3293 *exit_status = EXIT_MEMORY;
12145637 3294 return log_oom();
2065ca69
JW
3295 }
3296
3297 accum_env = strv_env_merge(5,
3298 params->environment,
3299 our_env,
3300 pass_env,
3301 context->environment,
3302 files_env,
3303 NULL);
3304 if (!accum_env) {
3305 *exit_status = EXIT_MEMORY;
12145637 3306 return log_oom();
2065ca69 3307 }
1280503b 3308 accum_env = strv_env_clean(accum_env);
2065ca69 3309
096424d1 3310 (void) umask(context->umask);
b213e1c1 3311
b1edf445 3312 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
3313 if (r < 0) {
3314 *exit_status = EXIT_KEYRING;
12145637 3315 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
3316 }
3317
165a31c0 3318 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 3319 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 3320
165a31c0
LP
3321 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3322 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 3323
165a31c0
LP
3324 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3325 if (needs_ambient_hack)
3326 needs_setuid = false;
3327 else
3328 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3329
3330 if (needs_sandboxing) {
7f18ef0a
FK
3331 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3332 * present. The actual MAC context application will happen later, as late as possible, to avoid
3333 * impacting our own code paths. */
3334
349cc4a5 3335#if HAVE_SELINUX
43b1f709 3336 use_selinux = mac_selinux_use();
7f18ef0a 3337#endif
f9fa32f0 3338#if ENABLE_SMACK
43b1f709 3339 use_smack = mac_smack_use();
7f18ef0a 3340#endif
349cc4a5 3341#if HAVE_APPARMOR
43b1f709 3342 use_apparmor = mac_apparmor_use();
7f18ef0a 3343#endif
165a31c0 3344 }
7f18ef0a 3345
ce932d2d
LP
3346 if (needs_sandboxing) {
3347 int which_failed;
3348
3349 /* Let's set the resource limits before we call into PAM, so that pam_limits wins over what
3350 * is set here. (See below.) */
3351
3352 r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
3353 if (r < 0) {
3354 *exit_status = EXIT_LIMITS;
3355 return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
3356 }
3357 }
3358
165a31c0 3359 if (needs_setuid) {
ce932d2d
LP
3360
3361 /* Let's call into PAM after we set up our own idea of resource limits to that pam_limits
3362 * wins here. (See above.) */
3363
165a31c0
LP
3364 if (context->pam_name && username) {
3365 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3366 if (r < 0) {
3367 *exit_status = EXIT_PAM;
12145637 3368 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0
LP
3369 }
3370 }
b213e1c1 3371 }
ac45f971 3372
a8d08f39
LP
3373 if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
3374
6e2d7c4f
MS
3375 if (ns_type_supported(NAMESPACE_NET)) {
3376 r = setup_netns(runtime->netns_storage_socket);
3377 if (r < 0) {
3378 *exit_status = EXIT_NETWORK;
3379 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3380 }
a8d08f39
LP
3381 } else if (context->network_namespace_path) {
3382 *exit_status = EXIT_NETWORK;
3383 return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP), "NetworkNamespacePath= is not supported, refusing.");
6e2d7c4f
MS
3384 } else
3385 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3386 }
169c1bda 3387
ee818b89 3388 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3389 if (needs_mount_namespace) {
7cc5ef5f
ZJS
3390 _cleanup_free_ char *error_path = NULL;
3391
3392 r = apply_mount_namespace(unit, command, context, params, runtime, &error_path);
3fbe8dbe
LP
3393 if (r < 0) {
3394 *exit_status = EXIT_NAMESPACE;
7cc5ef5f
ZJS
3395 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m",
3396 error_path ? ": " : "", strempty(error_path));
3fbe8dbe 3397 }
d35fbf6b 3398 }
81a2b7ce 3399
aecd5ac6
TM
3400 if (context->protect_hostname) {
3401 if (ns_type_supported(NAMESPACE_UTS)) {
3402 if (unshare(CLONE_NEWUTS) < 0) {
3403 *exit_status = EXIT_NAMESPACE;
3404 return log_unit_error_errno(unit, errno, "Failed to set up UTS namespacing: %m");
3405 }
3406 } else
3407 log_unit_warning(unit, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
3408#if HAVE_SECCOMP
3409 r = seccomp_protect_hostname();
3410 if (r < 0) {
3411 *exit_status = EXIT_SECCOMP;
3412 return log_unit_error_errno(unit, r, "Failed to apply hostname restrictions: %m");
3413 }
3414#endif
3415 }
3416
bbeea271 3417 /* Drop groups as early as possbile */
165a31c0 3418 if (needs_setuid) {
709dbeac 3419 r = enforce_groups(gid, supplementary_gids, ngids);
096424d1
LP
3420 if (r < 0) {
3421 *exit_status = EXIT_GROUP;
12145637 3422 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3423 }
165a31c0 3424 }
096424d1 3425
165a31c0 3426 if (needs_sandboxing) {
349cc4a5 3427#if HAVE_SELINUX
43b1f709 3428 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
3429 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3430 if (r < 0) {
3431 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3432 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
937ccce9 3433 }
9008e1ac 3434 }
9008e1ac
MS
3435#endif
3436
937ccce9
LP
3437 if (context->private_users) {
3438 r = setup_private_users(uid, gid);
3439 if (r < 0) {
3440 *exit_status = EXIT_USER;
12145637 3441 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
937ccce9 3442 }
d251207d
LP
3443 }
3444 }
3445
165a31c0 3446 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
5686391b
LP
3447 * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
3448 * however if we have it as we want to keep it open until the final execve(). */
3449
3450 if (params->exec_fd >= 0) {
3451 exec_fd = params->exec_fd;
3452
3453 if (exec_fd < 3 + (int) n_fds) {
3454 int moved_fd;
3455
3456 /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
3457 * process we are about to execute. */
3458
3459 moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
3460 if (moved_fd < 0) {
3461 *exit_status = EXIT_FDS;
3462 return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
3463 }
3464
3465 safe_close(exec_fd);
3466 exec_fd = moved_fd;
3467 } else {
3468 /* This fd should be FD_CLOEXEC already, but let's make sure. */
3469 r = fd_cloexec(exec_fd, true);
3470 if (r < 0) {
3471 *exit_status = EXIT_FDS;
3472 return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
3473 }
3474 }
3475
3476 fds_with_exec_fd = newa(int, n_fds + 1);
7e8d494b 3477 memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
5686391b
LP
3478 fds_with_exec_fd[n_fds] = exec_fd;
3479 n_fds_with_exec_fd = n_fds + 1;
3480 } else {
3481 fds_with_exec_fd = fds;
3482 n_fds_with_exec_fd = n_fds;
3483 }
3484
3485 r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
ff0af2a1
LP
3486 if (r >= 0)
3487 r = shift_fds(fds, n_fds);
3488 if (r >= 0)
25b583d7 3489 r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
ff0af2a1
LP
3490 if (r < 0) {
3491 *exit_status = EXIT_FDS;
12145637 3492 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3493 }
e66cf1a3 3494
5686391b
LP
3495 /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
3496 * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
3497 * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
3498 * came this far. */
3499
165a31c0 3500 secure_bits = context->secure_bits;
e66cf1a3 3501
165a31c0
LP
3502 if (needs_sandboxing) {
3503 uint64_t bset;
e66cf1a3 3504
ce932d2d
LP
3505 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly
3506 * requested. (Note this is placed after the general resource limit initialization, see
3507 * above, in order to take precedence.) */
f4170c67
LP
3508 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3509 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3510 *exit_status = EXIT_LIMITS;
12145637 3511 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3512 }
3513 }
3514
37ac2744
JB
3515#if ENABLE_SMACK
3516 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3517 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3518 if (use_smack) {
3519 r = setup_smack(context, command);
3520 if (r < 0) {
3521 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3522 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3523 }
3524 }
3525#endif
3526
165a31c0
LP
3527 bset = context->capability_bounding_set;
3528 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3529 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3530 * instead of us doing that */
3531 if (needs_ambient_hack)
3532 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3533 (UINT64_C(1) << CAP_SETUID) |
3534 (UINT64_C(1) << CAP_SETGID);
3535
3536 if (!cap_test_all(bset)) {
3537 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3538 if (r < 0) {
3539 *exit_status = EXIT_CAPABILITIES;
12145637 3540 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3541 }
4c2630eb 3542 }
3b8bddde 3543
755d4b67
IP
3544 /* This is done before enforce_user, but ambient set
3545 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
3546 if (!needs_ambient_hack &&
3547 context->capability_ambient_set != 0) {
755d4b67
IP
3548 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3549 if (r < 0) {
3550 *exit_status = EXIT_CAPABILITIES;
12145637 3551 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3552 }
755d4b67 3553 }
165a31c0 3554 }
755d4b67 3555
fa97f630
JB
3556 /* chroot to root directory first, before we lose the ability to chroot */
3557 r = apply_root_directory(context, params, needs_mount_namespace, exit_status);
3558 if (r < 0)
3559 return log_unit_error_errno(unit, r, "Chrooting to the requested root directory failed: %m");
3560
165a31c0 3561 if (needs_setuid) {
08f67696 3562 if (uid_is_valid(uid)) {
ff0af2a1
LP
3563 r = enforce_user(context, uid);
3564 if (r < 0) {
3565 *exit_status = EXIT_USER;
12145637 3566 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3567 }
165a31c0
LP
3568
3569 if (!needs_ambient_hack &&
3570 context->capability_ambient_set != 0) {
755d4b67
IP
3571
3572 /* Fix the ambient capabilities after user change. */
3573 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3574 if (r < 0) {
3575 *exit_status = EXIT_CAPABILITIES;
12145637 3576 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3577 }
3578
3579 /* If we were asked to change user and ambient capabilities
3580 * were requested, we had to add keep-caps to the securebits
3581 * so that we would maintain the inherited capability set
3582 * through the setresuid(). Make sure that the bit is added
3583 * also to the context secure_bits so that we don't try to
3584 * drop the bit away next. */
3585
7f508f2c 3586 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3587 }
5b6319dc 3588 }
165a31c0 3589 }
d35fbf6b 3590
56ef8db9
JB
3591 /* Apply working directory here, because the working directory might be on NFS and only the user running
3592 * this service might have the correct privilege to change to the working directory */
fa97f630 3593 r = apply_working_directory(context, params, home, exit_status);
56ef8db9
JB
3594 if (r < 0)
3595 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
3596
165a31c0 3597 if (needs_sandboxing) {
37ac2744 3598 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
5cd9cd35
LP
3599 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3600 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3601 * are restricted. */
3602
349cc4a5 3603#if HAVE_SELINUX
43b1f709 3604 if (use_selinux) {
5cd9cd35
LP
3605 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3606
3607 if (exec_context) {
3608 r = setexeccon(exec_context);
3609 if (r < 0) {
3610 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3611 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3612 }
3613 }
3614 }
3615#endif
3616
349cc4a5 3617#if HAVE_APPARMOR
43b1f709 3618 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3619 r = aa_change_onexec(context->apparmor_profile);
3620 if (r < 0 && !context->apparmor_profile_ignore) {
3621 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3622 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3623 }
3624 }
3625#endif
3626
165a31c0
LP
3627 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3628 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3629 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3630 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3631 *exit_status = EXIT_SECUREBITS;
12145637 3632 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3633 }
5b6319dc 3634
59eeb84b 3635 if (context_has_no_new_privileges(context))
d35fbf6b 3636 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3637 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3638 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3639 }
3640
349cc4a5 3641#if HAVE_SECCOMP
469830d1
LP
3642 r = apply_address_families(unit, context);
3643 if (r < 0) {
3644 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3645 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3646 }
04aa0cb9 3647
469830d1
LP
3648 r = apply_memory_deny_write_execute(unit, context);
3649 if (r < 0) {
3650 *exit_status = EXIT_SECCOMP;
12145637 3651 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3652 }
f4170c67 3653
469830d1
LP
3654 r = apply_restrict_realtime(unit, context);
3655 if (r < 0) {
3656 *exit_status = EXIT_SECCOMP;
12145637 3657 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3658 }
3659
f69567cb
LP
3660 r = apply_restrict_suid_sgid(unit, context);
3661 if (r < 0) {
3662 *exit_status = EXIT_SECCOMP;
3663 return log_unit_error_errno(unit, r, "Failed to apply SUID/SGID restrictions: %m");
3664 }
3665
add00535
LP
3666 r = apply_restrict_namespaces(unit, context);
3667 if (r < 0) {
3668 *exit_status = EXIT_SECCOMP;
12145637 3669 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3670 }
3671
469830d1
LP
3672 r = apply_protect_sysctl(unit, context);
3673 if (r < 0) {
3674 *exit_status = EXIT_SECCOMP;
12145637 3675 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3676 }
3677
469830d1
LP
3678 r = apply_protect_kernel_modules(unit, context);
3679 if (r < 0) {
3680 *exit_status = EXIT_SECCOMP;
12145637 3681 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3682 }
3683
469830d1
LP
3684 r = apply_private_devices(unit, context);
3685 if (r < 0) {
3686 *exit_status = EXIT_SECCOMP;
12145637 3687 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3688 }
3689
3690 r = apply_syscall_archs(unit, context);
3691 if (r < 0) {
3692 *exit_status = EXIT_SECCOMP;
12145637 3693 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3694 }
3695
78e864e5
TM
3696 r = apply_lock_personality(unit, context);
3697 if (r < 0) {
3698 *exit_status = EXIT_SECCOMP;
12145637 3699 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3700 }
3701
5cd9cd35
LP
3702 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3703 * by the filter as little as possible. */
165a31c0 3704 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3705 if (r < 0) {
3706 *exit_status = EXIT_SECCOMP;
12145637 3707 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3708 }
3709#endif
d35fbf6b 3710 }
034c6ed7 3711
00819cc1
LP
3712 if (!strv_isempty(context->unset_environment)) {
3713 char **ee = NULL;
3714
3715 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3716 if (!ee) {
3717 *exit_status = EXIT_MEMORY;
12145637 3718 return log_oom();
00819cc1
LP
3719 }
3720
130d3d22 3721 strv_free_and_replace(accum_env, ee);
00819cc1
LP
3722 }
3723
7ca69792
AZ
3724 if (!FLAGS_SET(command->flags, EXEC_COMMAND_NO_ENV_EXPAND)) {
3725 replaced_argv = replace_env_argv(command->argv, accum_env);
3726 if (!replaced_argv) {
3727 *exit_status = EXIT_MEMORY;
3728 return log_oom();
3729 }
3730 final_argv = replaced_argv;
3731 } else
3732 final_argv = command->argv;
034c6ed7 3733
f1d34068 3734 if (DEBUG_LOGGING) {
d35fbf6b 3735 _cleanup_free_ char *line;
81a2b7ce 3736
d35fbf6b 3737 line = exec_command_line(final_argv);
a1230ff9 3738 if (line)
f2341e0a 3739 log_struct(LOG_DEBUG,
f2341e0a
LP
3740 "EXECUTABLE=%s", command->path,
3741 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3742 LOG_UNIT_ID(unit),
a1230ff9 3743 LOG_UNIT_INVOCATION_ID(unit));
d35fbf6b 3744 }
dd305ec9 3745
5686391b
LP
3746 if (exec_fd >= 0) {
3747 uint8_t hot = 1;
3748
3749 /* We have finished with all our initializations. Let's now let the manager know that. From this point
3750 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
3751
3752 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3753 *exit_status = EXIT_EXEC;
3754 return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
3755 }
3756 }
3757
2065ca69 3758 execve(command->path, final_argv, accum_env);
5686391b
LP
3759 r = -errno;
3760
3761 if (exec_fd >= 0) {
3762 uint8_t hot = 0;
3763
3764 /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
3765 * that POLLHUP on it no longer means execve() succeeded. */
3766
3767 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3768 *exit_status = EXIT_EXEC;
3769 return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
3770 }
3771 }
12145637 3772
5686391b
LP
3773 if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3774 log_struct_errno(LOG_INFO, r,
12145637
LP
3775 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3776 LOG_UNIT_ID(unit),
3777 LOG_UNIT_INVOCATION_ID(unit),
3778 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3779 command->path),
a1230ff9 3780 "EXECUTABLE=%s", command->path);
12145637
LP
3781 return 0;
3782 }
3783
ff0af2a1 3784 *exit_status = EXIT_EXEC;
5686391b 3785 return log_unit_error_errno(unit, r, "Failed to execute command: %m");
d35fbf6b 3786}
81a2b7ce 3787
34cf6c43
YW
3788static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
3789static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[3]);
3790
f2341e0a
LP
3791int exec_spawn(Unit *unit,
3792 ExecCommand *command,
d35fbf6b
DM
3793 const ExecContext *context,
3794 const ExecParameters *params,
3795 ExecRuntime *runtime,
29206d46 3796 DynamicCreds *dcreds,
d35fbf6b 3797 pid_t *ret) {
8351ceae 3798
ee39ca20 3799 int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
78f93209 3800 _cleanup_free_ char *subcgroup_path = NULL;
d35fbf6b 3801 _cleanup_strv_free_ char **files_env = NULL;
da6053d0 3802 size_t n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1 3803 _cleanup_free_ char *line = NULL;
d35fbf6b 3804 pid_t pid;
8351ceae 3805
f2341e0a 3806 assert(unit);
d35fbf6b
DM
3807 assert(command);
3808 assert(context);
3809 assert(ret);
3810 assert(params);
25b583d7 3811 assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
4298d0b5 3812
d35fbf6b
DM
3813 if (context->std_input == EXEC_INPUT_SOCKET ||
3814 context->std_output == EXEC_OUTPUT_SOCKET ||
3815 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3816
4c47affc 3817 if (params->n_socket_fds > 1) {
f2341e0a 3818 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3819 return -EINVAL;
ff0af2a1 3820 }
eef65bf3 3821
4c47affc 3822 if (params->n_socket_fds == 0) {
488ab41c
AA
3823 log_unit_error(unit, "Got no socket.");
3824 return -EINVAL;
3825 }
3826
d35fbf6b
DM
3827 socket_fd = params->fds[0];
3828 } else {
3829 socket_fd = -1;
3830 fds = params->fds;
9b141911 3831 n_socket_fds = params->n_socket_fds;
25b583d7 3832 n_storage_fds = params->n_storage_fds;
d35fbf6b 3833 }
94f04347 3834
34cf6c43 3835 r = exec_context_named_iofds(context, params, named_iofds);
52c239d7
LB
3836 if (r < 0)
3837 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3838
f2341e0a 3839 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3840 if (r < 0)
f2341e0a 3841 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3842
ee39ca20 3843 line = exec_command_line(command->argv);
d35fbf6b
DM
3844 if (!line)
3845 return log_oom();
fab56fc5 3846
f2341e0a 3847 log_struct(LOG_DEBUG,
f2341e0a
LP
3848 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3849 "EXECUTABLE=%s", command->path,
ba360bb0 3850 LOG_UNIT_ID(unit),
a1230ff9 3851 LOG_UNIT_INVOCATION_ID(unit));
12145637 3852
78f93209
LP
3853 if (params->cgroup_path) {
3854 r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
3855 if (r < 0)
3856 return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
3857 if (r > 0) { /* We are using a child cgroup */
3858 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
3859 if (r < 0)
3860 return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
3861 }
3862 }
3863
d35fbf6b
DM
3864 pid = fork();
3865 if (pid < 0)
74129a12 3866 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3867
3868 if (pid == 0) {
12145637 3869 int exit_status = EXIT_SUCCESS;
ff0af2a1 3870
f2341e0a
LP
3871 r = exec_child(unit,
3872 command,
ff0af2a1
LP
3873 context,
3874 params,
3875 runtime,
29206d46 3876 dcreds,
ff0af2a1 3877 socket_fd,
52c239d7 3878 named_iofds,
4c47affc 3879 fds,
9b141911 3880 n_socket_fds,
25b583d7 3881 n_storage_fds,
ff0af2a1 3882 files_env,
00d9ef85 3883 unit->manager->user_lookup_fds[1],
12145637
LP
3884 &exit_status);
3885
a1230ff9 3886 if (r < 0)
12145637
LP
3887 log_struct_errno(LOG_ERR, r,
3888 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3889 LOG_UNIT_ID(unit),
3890 LOG_UNIT_INVOCATION_ID(unit),
3891 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3892 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3893 command->path),
a1230ff9 3894 "EXECUTABLE=%s", command->path);
4c2630eb 3895
ff0af2a1 3896 _exit(exit_status);
034c6ed7
LP
3897 }
3898
f2341e0a 3899 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3900
78f93209
LP
3901 /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
3902 * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
3903 * process will be killed too). */
3904 if (subcgroup_path)
3905 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
2da3263a 3906
b58b4116 3907 exec_status_start(&command->exec_status, pid);
9fb86720 3908
034c6ed7 3909 *ret = pid;
5cb5a6ff
LP
3910 return 0;
3911}
3912
034c6ed7 3913void exec_context_init(ExecContext *c) {
3536f49e
YW
3914 ExecDirectoryType i;
3915
034c6ed7
LP
3916 assert(c);
3917
4c12626c 3918 c->umask = 0022;
9eba9da4 3919 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3920 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3921 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3922 c->syslog_level_prefix = true;
353e12c2 3923 c->ignore_sigpipe = true;
3a43da28 3924 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3925 c->personality = PERSONALITY_INVALID;
72fd1768 3926 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3927 c->directories[i].mode = 0755;
a103496c 3928 c->capability_bounding_set = CAP_ALL;
aa9d574d
YW
3929 assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
3930 c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
d3070fbd 3931 c->log_level_max = -1;
b070c7c0 3932 numa_policy_reset(&c->numa_policy);
034c6ed7
LP
3933}
3934
613b411c 3935void exec_context_done(ExecContext *c) {
3536f49e 3936 ExecDirectoryType i;
d3070fbd 3937 size_t l;
5cb5a6ff
LP
3938
3939 assert(c);
3940
6796073e
LP
3941 c->environment = strv_free(c->environment);
3942 c->environment_files = strv_free(c->environment_files);
b4c14404 3943 c->pass_environment = strv_free(c->pass_environment);
00819cc1 3944 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 3945
31ce987c 3946 rlimit_free_all(c->rlimit);
034c6ed7 3947
2038c3f5 3948 for (l = 0; l < 3; l++) {
52c239d7 3949 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
2038c3f5
LP
3950 c->stdio_file[l] = mfree(c->stdio_file[l]);
3951 }
52c239d7 3952
a1e58e8e
LP
3953 c->working_directory = mfree(c->working_directory);
3954 c->root_directory = mfree(c->root_directory);
915e6d16 3955 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3956 c->tty_path = mfree(c->tty_path);
3957 c->syslog_identifier = mfree(c->syslog_identifier);
3958 c->user = mfree(c->user);
3959 c->group = mfree(c->group);
034c6ed7 3960
6796073e 3961 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3962
a1e58e8e 3963 c->pam_name = mfree(c->pam_name);
5b6319dc 3964
2a624c36
AP
3965 c->read_only_paths = strv_free(c->read_only_paths);
3966 c->read_write_paths = strv_free(c->read_write_paths);
3967 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3968
d2d6c096 3969 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
8e06d57c
YW
3970 c->bind_mounts = NULL;
3971 c->n_bind_mounts = 0;
2abd4e38
YW
3972 temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
3973 c->temporary_filesystems = NULL;
3974 c->n_temporary_filesystems = 0;
d2d6c096 3975
0985c7c4 3976 cpu_set_reset(&c->cpu_set);
b070c7c0 3977 numa_policy_reset(&c->numa_policy);
86a3475b 3978
a1e58e8e
LP
3979 c->utmp_id = mfree(c->utmp_id);
3980 c->selinux_context = mfree(c->selinux_context);
3981 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3982 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3983
8cfa775f 3984 c->syscall_filter = hashmap_free(c->syscall_filter);
525d3cc7
LP
3985 c->syscall_archs = set_free(c->syscall_archs);
3986 c->address_families = set_free(c->address_families);
e66cf1a3 3987
72fd1768 3988 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3989 c->directories[i].paths = strv_free(c->directories[i].paths);
d3070fbd
LP
3990
3991 c->log_level_max = -1;
3992
3993 exec_context_free_log_extra_fields(c);
08f3be7a 3994
90fc172e
AZ
3995 c->log_rate_limit_interval_usec = 0;
3996 c->log_rate_limit_burst = 0;
3997
08f3be7a
LP
3998 c->stdin_data = mfree(c->stdin_data);
3999 c->stdin_data_size = 0;
a8d08f39
LP
4000
4001 c->network_namespace_path = mfree(c->network_namespace_path);
e66cf1a3
LP
4002}
4003
34cf6c43 4004int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
e66cf1a3
LP
4005 char **i;
4006
4007 assert(c);
4008
4009 if (!runtime_prefix)
4010 return 0;
4011
3536f49e 4012 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
4013 _cleanup_free_ char *p;
4014
7bc4bf4a 4015 p = path_join(runtime_prefix, *i);
e66cf1a3
LP
4016 if (!p)
4017 return -ENOMEM;
4018
7bc4bf4a
LP
4019 /* We execute this synchronously, since we need to be sure this is gone when we start the
4020 * service next. */
c6878637 4021 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
4022 }
4023
4024 return 0;
5cb5a6ff
LP
4025}
4026
34cf6c43 4027static void exec_command_done(ExecCommand *c) {
43d0fcbd
LP
4028 assert(c);
4029
a1e58e8e 4030 c->path = mfree(c->path);
6796073e 4031 c->argv = strv_free(c->argv);
43d0fcbd
LP
4032}
4033
da6053d0
LP
4034void exec_command_done_array(ExecCommand *c, size_t n) {
4035 size_t i;
43d0fcbd
LP
4036
4037 for (i = 0; i < n; i++)
4038 exec_command_done(c+i);
4039}
4040
f1acf85a 4041ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
4042 ExecCommand *i;
4043
4044 while ((i = c)) {
71fda00f 4045 LIST_REMOVE(command, c, i);
43d0fcbd 4046 exec_command_done(i);
5cb5a6ff
LP
4047 free(i);
4048 }
f1acf85a
ZJS
4049
4050 return NULL;
5cb5a6ff
LP
4051}
4052
da6053d0
LP
4053void exec_command_free_array(ExecCommand **c, size_t n) {
4054 size_t i;
034c6ed7 4055
f1acf85a
ZJS
4056 for (i = 0; i < n; i++)
4057 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
4058}
4059
6a1d4d9f
LP
4060void exec_command_reset_status_array(ExecCommand *c, size_t n) {
4061 size_t i;
4062
4063 for (i = 0; i < n; i++)
4064 exec_status_reset(&c[i].exec_status);
4065}
4066
4067void exec_command_reset_status_list_array(ExecCommand **c, size_t n) {
4068 size_t i;
4069
4070 for (i = 0; i < n; i++) {
4071 ExecCommand *z;
4072
4073 LIST_FOREACH(command, z, c[i])
4074 exec_status_reset(&z->exec_status);
4075 }
4076}
4077
039f0e70 4078typedef struct InvalidEnvInfo {
34cf6c43 4079 const Unit *unit;
039f0e70
LP
4080 const char *path;
4081} InvalidEnvInfo;
4082
4083static void invalid_env(const char *p, void *userdata) {
4084 InvalidEnvInfo *info = userdata;
4085
f2341e0a 4086 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
4087}
4088
52c239d7
LB
4089const char* exec_context_fdname(const ExecContext *c, int fd_index) {
4090 assert(c);
4091
4092 switch (fd_index) {
5073ff6b 4093
52c239d7
LB
4094 case STDIN_FILENO:
4095 if (c->std_input != EXEC_INPUT_NAMED_FD)
4096 return NULL;
5073ff6b 4097
52c239d7 4098 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
5073ff6b 4099
52c239d7
LB
4100 case STDOUT_FILENO:
4101 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
4102 return NULL;
5073ff6b 4103
52c239d7 4104 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
5073ff6b 4105
52c239d7
LB
4106 case STDERR_FILENO:
4107 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
4108 return NULL;
5073ff6b 4109
52c239d7 4110 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
5073ff6b 4111
52c239d7
LB
4112 default:
4113 return NULL;
4114 }
4115}
4116
3042bbeb 4117static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[static 3]) {
da6053d0 4118 size_t i, targets;
56fbd561 4119 const char* stdio_fdname[3];
da6053d0 4120 size_t n_fds;
52c239d7
LB
4121
4122 assert(c);
4123 assert(p);
4124
4125 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
4126 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
4127 (c->std_error == EXEC_OUTPUT_NAMED_FD);
4128
4129 for (i = 0; i < 3; i++)
4130 stdio_fdname[i] = exec_context_fdname(c, i);
4131
4c47affc
FB
4132 n_fds = p->n_storage_fds + p->n_socket_fds;
4133
4134 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
4135 if (named_iofds[STDIN_FILENO] < 0 &&
4136 c->std_input == EXEC_INPUT_NAMED_FD &&
4137 stdio_fdname[STDIN_FILENO] &&
4138 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
4139
52c239d7
LB
4140 named_iofds[STDIN_FILENO] = p->fds[i];
4141 targets--;
56fbd561
ZJS
4142
4143 } else if (named_iofds[STDOUT_FILENO] < 0 &&
4144 c->std_output == EXEC_OUTPUT_NAMED_FD &&
4145 stdio_fdname[STDOUT_FILENO] &&
4146 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
4147
52c239d7
LB
4148 named_iofds[STDOUT_FILENO] = p->fds[i];
4149 targets--;
56fbd561
ZJS
4150
4151 } else if (named_iofds[STDERR_FILENO] < 0 &&
4152 c->std_error == EXEC_OUTPUT_NAMED_FD &&
4153 stdio_fdname[STDERR_FILENO] &&
4154 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
4155
52c239d7
LB
4156 named_iofds[STDERR_FILENO] = p->fds[i];
4157 targets--;
4158 }
4159
56fbd561 4160 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
4161}
4162
34cf6c43 4163static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
4164 char **i, **r = NULL;
4165
4166 assert(c);
4167 assert(l);
4168
4169 STRV_FOREACH(i, c->environment_files) {
4170 char *fn;
52511fae
ZJS
4171 int k;
4172 unsigned n;
8c7be95e
LP
4173 bool ignore = false;
4174 char **p;
7fd1b19b 4175 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
4176
4177 fn = *i;
4178
4179 if (fn[0] == '-') {
4180 ignore = true;
313cefa1 4181 fn++;
8c7be95e
LP
4182 }
4183
4184 if (!path_is_absolute(fn)) {
8c7be95e
LP
4185 if (ignore)
4186 continue;
4187
4188 strv_free(r);
4189 return -EINVAL;
4190 }
4191
2bef10ab 4192 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
4193 k = safe_glob(fn, 0, &pglob);
4194 if (k < 0) {
2bef10ab
PL
4195 if (ignore)
4196 continue;
8c7be95e 4197
2bef10ab 4198 strv_free(r);
d8c92e8b 4199 return k;
2bef10ab 4200 }
8c7be95e 4201
d8c92e8b
ZJS
4202 /* When we don't match anything, -ENOENT should be returned */
4203 assert(pglob.gl_pathc > 0);
4204
4205 for (n = 0; n < pglob.gl_pathc; n++) {
aa8fbc74 4206 k = load_env_file(NULL, pglob.gl_pathv[n], &p);
2bef10ab
PL
4207 if (k < 0) {
4208 if (ignore)
4209 continue;
8c7be95e 4210
2bef10ab 4211 strv_free(r);
2bef10ab 4212 return k;
e9c1ea9d 4213 }
ebc05a09 4214 /* Log invalid environment variables with filename */
039f0e70
LP
4215 if (p) {
4216 InvalidEnvInfo info = {
f2341e0a 4217 .unit = unit,
039f0e70
LP
4218 .path = pglob.gl_pathv[n]
4219 };
4220
4221 p = strv_env_clean_with_callback(p, invalid_env, &info);
4222 }
8c7be95e 4223
234519ae 4224 if (!r)
2bef10ab
PL
4225 r = p;
4226 else {
4227 char **m;
8c7be95e 4228
2bef10ab
PL
4229 m = strv_env_merge(2, r, p);
4230 strv_free(r);
4231 strv_free(p);
c84a9488 4232 if (!m)
2bef10ab 4233 return -ENOMEM;
2bef10ab
PL
4234
4235 r = m;
4236 }
8c7be95e
LP
4237 }
4238 }
4239
4240 *l = r;
4241
4242 return 0;
4243}
4244
6ac8fdc9 4245static bool tty_may_match_dev_console(const char *tty) {
7b912648 4246 _cleanup_free_ char *resolved = NULL;
6ac8fdc9 4247
1e22b5cd
LP
4248 if (!tty)
4249 return true;
4250
a119ec7c 4251 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
4252
4253 /* trivial identity? */
4254 if (streq(tty, "console"))
4255 return true;
4256
7b912648
LP
4257 if (resolve_dev_console(&resolved) < 0)
4258 return true; /* if we could not resolve, assume it may */
6ac8fdc9
MS
4259
4260 /* "tty0" means the active VC, so it may be the same sometimes */
955f1c85 4261 return path_equal(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
4262}
4263
6c0ae739
LP
4264static bool exec_context_may_touch_tty(const ExecContext *ec) {
4265 assert(ec);
1e22b5cd 4266
6c0ae739 4267 return ec->tty_reset ||
1e22b5cd
LP
4268 ec->tty_vhangup ||
4269 ec->tty_vt_disallocate ||
6ac8fdc9
MS
4270 is_terminal_input(ec->std_input) ||
4271 is_terminal_output(ec->std_output) ||
6c0ae739
LP
4272 is_terminal_output(ec->std_error);
4273}
4274
4275bool exec_context_may_touch_console(const ExecContext *ec) {
4276
4277 return exec_context_may_touch_tty(ec) &&
1e22b5cd 4278 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
4279}
4280
15ae422b
LP
4281static void strv_fprintf(FILE *f, char **l) {
4282 char **g;
4283
4284 assert(f);
4285
4286 STRV_FOREACH(g, l)
4287 fprintf(f, " %s", *g);
4288}
4289
34cf6c43 4290void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
d3070fbd 4291 ExecDirectoryType dt;
c2bbd90b 4292 char **e, **d;
94f04347 4293 unsigned i;
add00535 4294 int r;
9eba9da4 4295
5cb5a6ff
LP
4296 assert(c);
4297 assert(f);
4298
4ad49000 4299 prefix = strempty(prefix);
5cb5a6ff
LP
4300
4301 fprintf(f,
94f04347
LP
4302 "%sUMask: %04o\n"
4303 "%sWorkingDirectory: %s\n"
451a074f 4304 "%sRootDirectory: %s\n"
15ae422b 4305 "%sNonBlocking: %s\n"
64747e2d 4306 "%sPrivateTmp: %s\n"
7f112f50 4307 "%sPrivateDevices: %s\n"
59eeb84b 4308 "%sProtectKernelTunables: %s\n"
e66a2f65 4309 "%sProtectKernelModules: %s\n"
59eeb84b 4310 "%sProtectControlGroups: %s\n"
d251207d
LP
4311 "%sPrivateNetwork: %s\n"
4312 "%sPrivateUsers: %s\n"
1b8689f9
LP
4313 "%sProtectHome: %s\n"
4314 "%sProtectSystem: %s\n"
5d997827 4315 "%sMountAPIVFS: %s\n"
f3e43635 4316 "%sIgnoreSIGPIPE: %s\n"
f4170c67 4317 "%sMemoryDenyWriteExecute: %s\n"
b1edf445 4318 "%sRestrictRealtime: %s\n"
f69567cb 4319 "%sRestrictSUIDSGID: %s\n"
aecd5ac6
TM
4320 "%sKeyringMode: %s\n"
4321 "%sProtectHostname: %s\n",
5cb5a6ff 4322 prefix, c->umask,
9eba9da4 4323 prefix, c->working_directory ? c->working_directory : "/",
451a074f 4324 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 4325 prefix, yes_no(c->non_blocking),
64747e2d 4326 prefix, yes_no(c->private_tmp),
7f112f50 4327 prefix, yes_no(c->private_devices),
59eeb84b 4328 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 4329 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 4330 prefix, yes_no(c->protect_control_groups),
d251207d
LP
4331 prefix, yes_no(c->private_network),
4332 prefix, yes_no(c->private_users),
1b8689f9
LP
4333 prefix, protect_home_to_string(c->protect_home),
4334 prefix, protect_system_to_string(c->protect_system),
5d997827 4335 prefix, yes_no(c->mount_apivfs),
f3e43635 4336 prefix, yes_no(c->ignore_sigpipe),
f4170c67 4337 prefix, yes_no(c->memory_deny_write_execute),
b1edf445 4338 prefix, yes_no(c->restrict_realtime),
f69567cb 4339 prefix, yes_no(c->restrict_suid_sgid),
aecd5ac6
TM
4340 prefix, exec_keyring_mode_to_string(c->keyring_mode),
4341 prefix, yes_no(c->protect_hostname));
fb33a393 4342
915e6d16
LP
4343 if (c->root_image)
4344 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
4345
8c7be95e
LP
4346 STRV_FOREACH(e, c->environment)
4347 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
4348
4349 STRV_FOREACH(e, c->environment_files)
4350 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 4351
b4c14404
FB
4352 STRV_FOREACH(e, c->pass_environment)
4353 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
4354
00819cc1
LP
4355 STRV_FOREACH(e, c->unset_environment)
4356 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
4357
53f47dfc
YW
4358 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
4359
72fd1768 4360 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
4361 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
4362
4363 STRV_FOREACH(d, c->directories[dt].paths)
4364 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
4365 }
c2bbd90b 4366
fb33a393
LP
4367 if (c->nice_set)
4368 fprintf(f,
4369 "%sNice: %i\n",
4370 prefix, c->nice);
4371
dd6c17b1 4372 if (c->oom_score_adjust_set)
fb33a393 4373 fprintf(f,
dd6c17b1
LP
4374 "%sOOMScoreAdjust: %i\n",
4375 prefix, c->oom_score_adjust);
9eba9da4 4376
94f04347 4377 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d 4378 if (c->rlimit[i]) {
4c3a2b84 4379 fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
3c11da9d 4380 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
4c3a2b84 4381 fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n",
3c11da9d
EV
4382 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
4383 }
94f04347 4384
f8b69d1d 4385 if (c->ioprio_set) {
1756a011 4386 _cleanup_free_ char *class_str = NULL;
f8b69d1d 4387
837df140
YW
4388 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
4389 if (r >= 0)
4390 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
4391
4392 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 4393 }
94f04347 4394
f8b69d1d 4395 if (c->cpu_sched_set) {
1756a011 4396 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 4397
837df140
YW
4398 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4399 if (r >= 0)
4400 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4401
94f04347 4402 fprintf(f,
38b48754
LP
4403 "%sCPUSchedulingPriority: %i\n"
4404 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
4405 prefix, c->cpu_sched_priority,
4406 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 4407 }
94f04347 4408
0985c7c4 4409 if (c->cpu_set.set) {
e7fca352
MS
4410 _cleanup_free_ char *affinity = NULL;
4411
4412 affinity = cpu_set_to_range_string(&c->cpu_set);
4413 fprintf(f, "%sCPUAffinity: %s\n", prefix, affinity);
94f04347
LP
4414 }
4415
b070c7c0
MS
4416 if (mpol_is_valid(numa_policy_get_type(&c->numa_policy))) {
4417 _cleanup_free_ char *nodes = NULL;
4418
4419 nodes = cpu_set_to_range_string(&c->numa_policy.nodes);
4420 fprintf(f, "%sNUMAPolicy: %s\n", prefix, mpol_to_string(numa_policy_get_type(&c->numa_policy)));
4421 fprintf(f, "%sNUMAMask: %s\n", prefix, strnull(nodes));
4422 }
4423
3a43da28 4424 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 4425 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
4426
4427 fprintf(f,
80876c20
LP
4428 "%sStandardInput: %s\n"
4429 "%sStandardOutput: %s\n"
4430 "%sStandardError: %s\n",
4431 prefix, exec_input_to_string(c->std_input),
4432 prefix, exec_output_to_string(c->std_output),
4433 prefix, exec_output_to_string(c->std_error));
4434
befc4a80
LP
4435 if (c->std_input == EXEC_INPUT_NAMED_FD)
4436 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4437 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4438 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4439 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4440 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4441
4442 if (c->std_input == EXEC_INPUT_FILE)
4443 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4444 if (c->std_output == EXEC_OUTPUT_FILE)
4445 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
566b7d23
ZD
4446 if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
4447 fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
befc4a80
LP
4448 if (c->std_error == EXEC_OUTPUT_FILE)
4449 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
566b7d23
ZD
4450 if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
4451 fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
befc4a80 4452
80876c20
LP
4453 if (c->tty_path)
4454 fprintf(f,
6ea832a2
LP
4455 "%sTTYPath: %s\n"
4456 "%sTTYReset: %s\n"
4457 "%sTTYVHangup: %s\n"
4458 "%sTTYVTDisallocate: %s\n",
4459 prefix, c->tty_path,
4460 prefix, yes_no(c->tty_reset),
4461 prefix, yes_no(c->tty_vhangup),
4462 prefix, yes_no(c->tty_vt_disallocate));
94f04347 4463
9f6444eb
LP
4464 if (IN_SET(c->std_output,
4465 EXEC_OUTPUT_SYSLOG,
4466 EXEC_OUTPUT_KMSG,
4467 EXEC_OUTPUT_JOURNAL,
4468 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4469 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4470 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4471 IN_SET(c->std_error,
4472 EXEC_OUTPUT_SYSLOG,
4473 EXEC_OUTPUT_KMSG,
4474 EXEC_OUTPUT_JOURNAL,
4475 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4476 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4477 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 4478
5ce70e5b 4479 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 4480
837df140
YW
4481 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4482 if (r >= 0)
4483 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 4484
837df140
YW
4485 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4486 if (r >= 0)
4487 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 4488 }
94f04347 4489
d3070fbd
LP
4490 if (c->log_level_max >= 0) {
4491 _cleanup_free_ char *t = NULL;
4492
4493 (void) log_level_to_string_alloc(c->log_level_max, &t);
4494
4495 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4496 }
4497
90fc172e
AZ
4498 if (c->log_rate_limit_interval_usec > 0) {
4499 char buf_timespan[FORMAT_TIMESPAN_MAX];
4500
4501 fprintf(f,
4502 "%sLogRateLimitIntervalSec: %s\n",
4503 prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_rate_limit_interval_usec, USEC_PER_SEC));
4504 }
4505
4506 if (c->log_rate_limit_burst > 0)
4507 fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_rate_limit_burst);
4508
d3070fbd
LP
4509 if (c->n_log_extra_fields > 0) {
4510 size_t j;
4511
4512 for (j = 0; j < c->n_log_extra_fields; j++) {
4513 fprintf(f, "%sLogExtraFields: ", prefix);
4514 fwrite(c->log_extra_fields[j].iov_base,
4515 1, c->log_extra_fields[j].iov_len,
4516 f);
4517 fputc('\n', f);
4518 }
4519 }
4520
07d46372
YW
4521 if (c->secure_bits) {
4522 _cleanup_free_ char *str = NULL;
4523
4524 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4525 if (r >= 0)
4526 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4527 }
94f04347 4528
a103496c 4529 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4530 _cleanup_free_ char *str = NULL;
94f04347 4531
dd1f5bd0
YW
4532 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4533 if (r >= 0)
4534 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4535 }
4536
4537 if (c->capability_ambient_set != 0) {
dd1f5bd0 4538 _cleanup_free_ char *str = NULL;
755d4b67 4539
dd1f5bd0
YW
4540 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4541 if (r >= 0)
4542 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4543 }
4544
4545 if (c->user)
f2d3769a 4546 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4547 if (c->group)
f2d3769a 4548 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4549
29206d46
LP
4550 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4551
ac6e8be6 4552 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4553 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4554 strv_fprintf(f, c->supplementary_groups);
4555 fputs("\n", f);
4556 }
94f04347 4557
5b6319dc 4558 if (c->pam_name)
f2d3769a 4559 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4560
58629001 4561 if (!strv_isempty(c->read_write_paths)) {
2a624c36
AP
4562 fprintf(f, "%sReadWritePaths:", prefix);
4563 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4564 fputs("\n", f);
4565 }
4566
58629001 4567 if (!strv_isempty(c->read_only_paths)) {
2a624c36
AP
4568 fprintf(f, "%sReadOnlyPaths:", prefix);
4569 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4570 fputs("\n", f);
4571 }
94f04347 4572
58629001 4573 if (!strv_isempty(c->inaccessible_paths)) {
2a624c36
AP
4574 fprintf(f, "%sInaccessiblePaths:", prefix);
4575 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4576 fputs("\n", f);
4577 }
2e22afe9 4578
d2d6c096 4579 if (c->n_bind_mounts > 0)
4ca763a9
YW
4580 for (i = 0; i < c->n_bind_mounts; i++)
4581 fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
d2d6c096 4582 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4ca763a9 4583 c->bind_mounts[i].ignore_enoent ? "-": "",
d2d6c096
LP
4584 c->bind_mounts[i].source,
4585 c->bind_mounts[i].destination,
4586 c->bind_mounts[i].recursive ? "rbind" : "norbind");
d2d6c096 4587
2abd4e38
YW
4588 if (c->n_temporary_filesystems > 0)
4589 for (i = 0; i < c->n_temporary_filesystems; i++) {
4590 TemporaryFileSystem *t = c->temporary_filesystems + i;
4591
4592 fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
4593 t->path,
4594 isempty(t->options) ? "" : ":",
4595 strempty(t->options));
4596 }
4597
169c1bda
LP
4598 if (c->utmp_id)
4599 fprintf(f,
4600 "%sUtmpIdentifier: %s\n",
4601 prefix, c->utmp_id);
7b52a628
MS
4602
4603 if (c->selinux_context)
4604 fprintf(f,
5f8640fb
LP
4605 "%sSELinuxContext: %s%s\n",
4606 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4607
80c21aea
WC
4608 if (c->apparmor_profile)
4609 fprintf(f,
4610 "%sAppArmorProfile: %s%s\n",
4611 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4612
4613 if (c->smack_process_label)
4614 fprintf(f,
4615 "%sSmackProcessLabel: %s%s\n",
4616 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4617
050f7277 4618 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4619 fprintf(f,
4620 "%sPersonality: %s\n",
4621 prefix, strna(personality_to_string(c->personality)));
4622
78e864e5
TM
4623 fprintf(f,
4624 "%sLockPersonality: %s\n",
4625 prefix, yes_no(c->lock_personality));
4626
17df7223 4627 if (c->syscall_filter) {
349cc4a5 4628#if HAVE_SECCOMP
17df7223 4629 Iterator j;
8cfa775f 4630 void *id, *val;
17df7223 4631 bool first = true;
351a19b1 4632#endif
17df7223
LP
4633
4634 fprintf(f,
57183d11 4635 "%sSystemCallFilter: ",
17df7223
LP
4636 prefix);
4637
4638 if (!c->syscall_whitelist)
4639 fputc('~', f);
4640
349cc4a5 4641#if HAVE_SECCOMP
8cfa775f 4642 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
17df7223 4643 _cleanup_free_ char *name = NULL;
8cfa775f
YW
4644 const char *errno_name = NULL;
4645 int num = PTR_TO_INT(val);
17df7223
LP
4646
4647 if (first)
4648 first = false;
4649 else
4650 fputc(' ', f);
4651
57183d11 4652 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223 4653 fputs(strna(name), f);
8cfa775f
YW
4654
4655 if (num >= 0) {
4656 errno_name = errno_to_name(num);
4657 if (errno_name)
4658 fprintf(f, ":%s", errno_name);
4659 else
4660 fprintf(f, ":%d", num);
4661 }
17df7223 4662 }
351a19b1 4663#endif
17df7223
LP
4664
4665 fputc('\n', f);
4666 }
4667
57183d11 4668 if (c->syscall_archs) {
349cc4a5 4669#if HAVE_SECCOMP
57183d11
LP
4670 Iterator j;
4671 void *id;
4672#endif
4673
4674 fprintf(f,
4675 "%sSystemCallArchitectures:",
4676 prefix);
4677
349cc4a5 4678#if HAVE_SECCOMP
57183d11
LP
4679 SET_FOREACH(id, c->syscall_archs, j)
4680 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4681#endif
4682 fputc('\n', f);
4683 }
4684
add00535
LP
4685 if (exec_context_restrict_namespaces_set(c)) {
4686 _cleanup_free_ char *s = NULL;
4687
86c2a9f1 4688 r = namespace_flags_to_string(c->restrict_namespaces, &s);
add00535
LP
4689 if (r >= 0)
4690 fprintf(f, "%sRestrictNamespaces: %s\n",
4691 prefix, s);
4692 }
4693
a8d08f39
LP
4694 if (c->network_namespace_path)
4695 fprintf(f,
4696 "%sNetworkNamespacePath: %s\n",
4697 prefix, c->network_namespace_path);
4698
3df90f24
YW
4699 if (c->syscall_errno > 0) {
4700 const char *errno_name;
4701
4702 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4703
4704 errno_name = errno_to_name(c->syscall_errno);
4705 if (errno_name)
4706 fprintf(f, "%s\n", errno_name);
4707 else
4708 fprintf(f, "%d\n", c->syscall_errno);
4709 }
5cb5a6ff
LP
4710}
4711
34cf6c43 4712bool exec_context_maintains_privileges(const ExecContext *c) {
a931ad47
LP
4713 assert(c);
4714
61233823 4715 /* Returns true if the process forked off would run under
a931ad47
LP
4716 * an unchanged UID or as root. */
4717
4718 if (!c->user)
4719 return true;
4720
4721 if (streq(c->user, "root") || streq(c->user, "0"))
4722 return true;
4723
4724 return false;
4725}
4726
34cf6c43 4727int exec_context_get_effective_ioprio(const ExecContext *c) {
7f452159
LP
4728 int p;
4729
4730 assert(c);
4731
4732 if (c->ioprio_set)
4733 return c->ioprio;
4734
4735 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4736 if (p < 0)
4737 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4738
4739 return p;
4740}
4741
d3070fbd
LP
4742void exec_context_free_log_extra_fields(ExecContext *c) {
4743 size_t l;
4744
4745 assert(c);
4746
4747 for (l = 0; l < c->n_log_extra_fields; l++)
4748 free(c->log_extra_fields[l].iov_base);
4749 c->log_extra_fields = mfree(c->log_extra_fields);
4750 c->n_log_extra_fields = 0;
4751}
4752
6f765baf
LP
4753void exec_context_revert_tty(ExecContext *c) {
4754 int r;
4755
4756 assert(c);
4757
4758 /* First, reset the TTY (possibly kicking everybody else from the TTY) */
4759 exec_context_tty_reset(c, NULL);
4760
4761 /* And then undo what chown_terminal() did earlier. Note that we only do this if we have a path
4762 * configured. If the TTY was passed to us as file descriptor we assume the TTY is opened and managed
4763 * by whoever passed it to us and thus knows better when and how to chmod()/chown() it back. */
4764
4765 if (exec_context_may_touch_tty(c)) {
4766 const char *path;
4767
4768 path = exec_context_tty_path(c);
4769 if (path) {
4770 r = chmod_and_chown(path, TTY_MODE, 0, TTY_GID);
4771 if (r < 0 && r != -ENOENT)
4772 log_warning_errno(r, "Failed to reset TTY ownership/access mode of %s, ignoring: %m", path);
4773 }
4774 }
4775}
4776
4c2f5842
LP
4777int exec_context_get_clean_directories(
4778 ExecContext *c,
4779 char **prefix,
4780 ExecCleanMask mask,
4781 char ***ret) {
4782
4783 _cleanup_strv_free_ char **l = NULL;
4784 ExecDirectoryType t;
4785 int r;
4786
4787 assert(c);
4788 assert(prefix);
4789 assert(ret);
4790
4791 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
4792 char **i;
4793
4794 if (!FLAGS_SET(mask, 1U << t))
4795 continue;
4796
4797 if (!prefix[t])
4798 continue;
4799
4800 STRV_FOREACH(i, c->directories[t].paths) {
4801 char *j;
4802
4803 j = path_join(prefix[t], *i);
4804 if (!j)
4805 return -ENOMEM;
4806
4807 r = strv_consume(&l, j);
4808 if (r < 0)
4809 return r;
4810 }
4811 }
4812
4813 *ret = TAKE_PTR(l);
4814 return 0;
4815}
4816
4817int exec_context_get_clean_mask(ExecContext *c, ExecCleanMask *ret) {
4818 ExecCleanMask mask = 0;
4819
4820 assert(c);
4821 assert(ret);
4822
4823 for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++)
4824 if (!strv_isempty(c->directories[t].paths))
4825 mask |= 1U << t;
4826
4827 *ret = mask;
4828 return 0;
4829}
4830
b58b4116 4831void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 4832 assert(s);
5cb5a6ff 4833
2ed26ed0
LP
4834 *s = (ExecStatus) {
4835 .pid = pid,
4836 };
4837
b58b4116
LP
4838 dual_timestamp_get(&s->start_timestamp);
4839}
4840
34cf6c43 4841void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
4842 assert(s);
4843
2ed26ed0
LP
4844 if (s->pid != pid) {
4845 *s = (ExecStatus) {
4846 .pid = pid,
4847 };
4848 }
b58b4116 4849
63983207 4850 dual_timestamp_get(&s->exit_timestamp);
9fb86720 4851
034c6ed7
LP
4852 s->code = code;
4853 s->status = status;
169c1bda 4854
6f765baf
LP
4855 if (context && context->utmp_id)
4856 (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
9fb86720
LP
4857}
4858
6a1d4d9f
LP
4859void exec_status_reset(ExecStatus *s) {
4860 assert(s);
4861
4862 *s = (ExecStatus) {};
4863}
4864
34cf6c43 4865void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
9fb86720
LP
4866 char buf[FORMAT_TIMESTAMP_MAX];
4867
4868 assert(s);
4869 assert(f);
4870
9fb86720
LP
4871 if (s->pid <= 0)
4872 return;
4873
4c940960
LP
4874 prefix = strempty(prefix);
4875
9fb86720 4876 fprintf(f,
ccd06097
ZJS
4877 "%sPID: "PID_FMT"\n",
4878 prefix, s->pid);
9fb86720 4879
af9d16e1 4880 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
4881 fprintf(f,
4882 "%sStart Timestamp: %s\n",
63983207 4883 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 4884
af9d16e1 4885 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
4886 fprintf(f,
4887 "%sExit Timestamp: %s\n"
4888 "%sExit Code: %s\n"
4889 "%sExit Status: %i\n",
63983207 4890 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
4891 prefix, sigchld_code_to_string(s->code),
4892 prefix, s->status);
5cb5a6ff 4893}
44d8db9e 4894
34cf6c43 4895static char *exec_command_line(char **argv) {
44d8db9e
LP
4896 size_t k;
4897 char *n, *p, **a;
4898 bool first = true;
4899
9e2f7c11 4900 assert(argv);
44d8db9e 4901
9164977d 4902 k = 1;
9e2f7c11 4903 STRV_FOREACH(a, argv)
44d8db9e
LP
4904 k += strlen(*a)+3;
4905
5cd9cd35
LP
4906 n = new(char, k);
4907 if (!n)
44d8db9e
LP
4908 return NULL;
4909
4910 p = n;
9e2f7c11 4911 STRV_FOREACH(a, argv) {
44d8db9e
LP
4912
4913 if (!first)
4914 *(p++) = ' ';
4915 else
4916 first = false;
4917
4918 if (strpbrk(*a, WHITESPACE)) {
4919 *(p++) = '\'';
4920 p = stpcpy(p, *a);
4921 *(p++) = '\'';
4922 } else
4923 p = stpcpy(p, *a);
4924
4925 }
4926
9164977d
LP
4927 *p = 0;
4928
44d8db9e
LP
4929 /* FIXME: this doesn't really handle arguments that have
4930 * spaces and ticks in them */
4931
4932 return n;
4933}
4934
34cf6c43 4935static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 4936 _cleanup_free_ char *cmd = NULL;
4c940960 4937 const char *prefix2;
44d8db9e
LP
4938
4939 assert(c);
4940 assert(f);
4941
4c940960 4942 prefix = strempty(prefix);
63c372cb 4943 prefix2 = strjoina(prefix, "\t");
44d8db9e 4944
9e2f7c11 4945 cmd = exec_command_line(c->argv);
44d8db9e
LP
4946 fprintf(f,
4947 "%sCommand Line: %s\n",
4bbccb02 4948 prefix, cmd ? cmd : strerror_safe(ENOMEM));
44d8db9e 4949
9fb86720 4950 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
4951}
4952
4953void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4954 assert(f);
4955
4c940960 4956 prefix = strempty(prefix);
44d8db9e
LP
4957
4958 LIST_FOREACH(command, c, c)
4959 exec_command_dump(c, f, prefix);
4960}
94f04347 4961
a6a80b4f
LP
4962void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4963 ExecCommand *end;
4964
4965 assert(l);
4966 assert(e);
4967
4968 if (*l) {
35b8ca3a 4969 /* It's kind of important, that we keep the order here */
71fda00f
LP
4970 LIST_FIND_TAIL(command, *l, end);
4971 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
4972 } else
4973 *l = e;
4974}
4975
26fd040d
LP
4976int exec_command_set(ExecCommand *c, const char *path, ...) {
4977 va_list ap;
4978 char **l, *p;
4979
4980 assert(c);
4981 assert(path);
4982
4983 va_start(ap, path);
4984 l = strv_new_ap(path, ap);
4985 va_end(ap);
4986
4987 if (!l)
4988 return -ENOMEM;
4989
250a918d
LP
4990 p = strdup(path);
4991 if (!p) {
26fd040d
LP
4992 strv_free(l);
4993 return -ENOMEM;
4994 }
4995
6897dfe8 4996 free_and_replace(c->path, p);
26fd040d 4997
130d3d22 4998 return strv_free_and_replace(c->argv, l);
26fd040d
LP
4999}
5000
86b23b07 5001int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 5002 _cleanup_strv_free_ char **l = NULL;
86b23b07 5003 va_list ap;
86b23b07
JS
5004 int r;
5005
5006 assert(c);
5007 assert(path);
5008
5009 va_start(ap, path);
5010 l = strv_new_ap(path, ap);
5011 va_end(ap);
5012
5013 if (!l)
5014 return -ENOMEM;
5015
e287086b 5016 r = strv_extend_strv(&c->argv, l, false);
e63ff941 5017 if (r < 0)
86b23b07 5018 return r;
86b23b07
JS
5019
5020 return 0;
5021}
5022
e8a565cb
YW
5023static void *remove_tmpdir_thread(void *p) {
5024 _cleanup_free_ char *path = p;
86b23b07 5025
e8a565cb
YW
5026 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
5027 return NULL;
5028}
5029
5030static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
5031 int r;
5032
5033 if (!rt)
5034 return NULL;
5035
5036 if (rt->manager)
5037 (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
5038
5039 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
5040 if (destroy && rt->tmp_dir) {
5041 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
5042
5043 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
5044 if (r < 0) {
5045 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
5046 free(rt->tmp_dir);
5047 }
5048
5049 rt->tmp_dir = NULL;
5050 }
613b411c 5051
e8a565cb
YW
5052 if (destroy && rt->var_tmp_dir) {
5053 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
5054
5055 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
5056 if (r < 0) {
5057 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
5058 free(rt->var_tmp_dir);
5059 }
5060
5061 rt->var_tmp_dir = NULL;
5062 }
5063
5064 rt->id = mfree(rt->id);
5065 rt->tmp_dir = mfree(rt->tmp_dir);
5066 rt->var_tmp_dir = mfree(rt->var_tmp_dir);
5067 safe_close_pair(rt->netns_storage_socket);
5068 return mfree(rt);
5069}
5070
5071static void exec_runtime_freep(ExecRuntime **rt) {
da6bc6ed 5072 (void) exec_runtime_free(*rt, false);
e8a565cb
YW
5073}
5074
8e8009dc
LP
5075static int exec_runtime_allocate(ExecRuntime **ret) {
5076 ExecRuntime *n;
613b411c 5077
8e8009dc 5078 assert(ret);
613b411c 5079
8e8009dc
LP
5080 n = new(ExecRuntime, 1);
5081 if (!n)
613b411c
LP
5082 return -ENOMEM;
5083
8e8009dc
LP
5084 *n = (ExecRuntime) {
5085 .netns_storage_socket = { -1, -1 },
5086 };
5087
5088 *ret = n;
613b411c
LP
5089 return 0;
5090}
5091
e8a565cb
YW
5092static int exec_runtime_add(
5093 Manager *m,
5094 const char *id,
5095 const char *tmp_dir,
5096 const char *var_tmp_dir,
5097 const int netns_storage_socket[2],
5098 ExecRuntime **ret) {
5099
5100 _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
613b411c
LP
5101 int r;
5102
e8a565cb 5103 assert(m);
613b411c
LP
5104 assert(id);
5105
e8a565cb
YW
5106 r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
5107 if (r < 0)
5108 return r;
613b411c 5109
e8a565cb 5110 r = exec_runtime_allocate(&rt);
613b411c
LP
5111 if (r < 0)
5112 return r;
5113
e8a565cb
YW
5114 rt->id = strdup(id);
5115 if (!rt->id)
5116 return -ENOMEM;
5117
5118 if (tmp_dir) {
5119 rt->tmp_dir = strdup(tmp_dir);
5120 if (!rt->tmp_dir)
5121 return -ENOMEM;
5122
5123 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
5124 assert(var_tmp_dir);
5125 rt->var_tmp_dir = strdup(var_tmp_dir);
5126 if (!rt->var_tmp_dir)
5127 return -ENOMEM;
5128 }
5129
5130 if (netns_storage_socket) {
5131 rt->netns_storage_socket[0] = netns_storage_socket[0];
5132 rt->netns_storage_socket[1] = netns_storage_socket[1];
613b411c
LP
5133 }
5134
e8a565cb
YW
5135 r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
5136 if (r < 0)
5137 return r;
5138
5139 rt->manager = m;
5140
5141 if (ret)
5142 *ret = rt;
5143
5144 /* do not remove created ExecRuntime object when the operation succeeds. */
5145 rt = NULL;
5146 return 0;
5147}
5148
5149static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
5150 _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
2fa3742d 5151 _cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
e8a565cb
YW
5152 int r;
5153
5154 assert(m);
5155 assert(c);
5156 assert(id);
5157
5158 /* It is not necessary to create ExecRuntime object. */
a8d08f39 5159 if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
e8a565cb
YW
5160 return 0;
5161
5162 if (c->private_tmp) {
5163 r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
613b411c
LP
5164 if (r < 0)
5165 return r;
5166 }
5167
a8d08f39 5168 if (c->private_network || c->network_namespace_path) {
e8a565cb
YW
5169 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
5170 return -errno;
5171 }
5172
5173 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
5174 if (r < 0)
5175 return r;
5176
5177 /* Avoid cleanup */
2fa3742d 5178 netns_storage_socket[0] = netns_storage_socket[1] = -1;
613b411c
LP
5179 return 1;
5180}
5181
e8a565cb
YW
5182int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
5183 ExecRuntime *rt;
5184 int r;
613b411c 5185
e8a565cb
YW
5186 assert(m);
5187 assert(id);
5188 assert(ret);
5189
5190 rt = hashmap_get(m->exec_runtime_by_id, id);
5191 if (rt)
5192 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
5193 goto ref;
5194
5195 if (!create)
5196 return 0;
5197
5198 /* If not found, then create a new object. */
5199 r = exec_runtime_make(m, c, id, &rt);
5200 if (r <= 0)
5201 /* When r == 0, it is not necessary to create ExecRuntime object. */
5202 return r;
613b411c 5203
e8a565cb
YW
5204ref:
5205 /* increment reference counter. */
5206 rt->n_ref++;
5207 *ret = rt;
5208 return 1;
5209}
613b411c 5210
e8a565cb
YW
5211ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
5212 if (!rt)
613b411c
LP
5213 return NULL;
5214
e8a565cb 5215 assert(rt->n_ref > 0);
613b411c 5216
e8a565cb
YW
5217 rt->n_ref--;
5218 if (rt->n_ref > 0)
f2341e0a
LP
5219 return NULL;
5220
e8a565cb 5221 return exec_runtime_free(rt, destroy);
613b411c
LP
5222}
5223
e8a565cb
YW
5224int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
5225 ExecRuntime *rt;
5226 Iterator i;
5227
5228 assert(m);
613b411c
LP
5229 assert(f);
5230 assert(fds);
5231
e8a565cb
YW
5232 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5233 fprintf(f, "exec-runtime=%s", rt->id);
613b411c 5234
e8a565cb
YW
5235 if (rt->tmp_dir)
5236 fprintf(f, " tmp-dir=%s", rt->tmp_dir);
613b411c 5237
e8a565cb
YW
5238 if (rt->var_tmp_dir)
5239 fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
613b411c 5240
e8a565cb
YW
5241 if (rt->netns_storage_socket[0] >= 0) {
5242 int copy;
613b411c 5243
e8a565cb
YW
5244 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
5245 if (copy < 0)
5246 return copy;
613b411c 5247
e8a565cb
YW
5248 fprintf(f, " netns-socket-0=%i", copy);
5249 }
613b411c 5250
e8a565cb
YW
5251 if (rt->netns_storage_socket[1] >= 0) {
5252 int copy;
613b411c 5253
e8a565cb
YW
5254 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
5255 if (copy < 0)
5256 return copy;
613b411c 5257
e8a565cb
YW
5258 fprintf(f, " netns-socket-1=%i", copy);
5259 }
5260
5261 fputc('\n', f);
613b411c
LP
5262 }
5263
5264 return 0;
5265}
5266
e8a565cb
YW
5267int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
5268 _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
5269 ExecRuntime *rt;
613b411c
LP
5270 int r;
5271
e8a565cb
YW
5272 /* This is for the migration from old (v237 or earlier) deserialization text.
5273 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
5274 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
5275 * so or not from the serialized text, then we always creates a new object owned by this. */
5276
5277 assert(u);
613b411c
LP
5278 assert(key);
5279 assert(value);
5280
e8a565cb
YW
5281 /* Manager manages ExecRuntime objects by the unit id.
5282 * So, we omit the serialized text when the unit does not have id (yet?)... */
5283 if (isempty(u->id)) {
5284 log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
5285 return 0;
5286 }
613b411c 5287
e8a565cb
YW
5288 r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
5289 if (r < 0) {
5290 log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
5291 return 0;
5292 }
5293
5294 rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
5295 if (!rt) {
5296 r = exec_runtime_allocate(&rt_create);
613b411c 5297 if (r < 0)
f2341e0a 5298 return log_oom();
613b411c 5299
e8a565cb
YW
5300 rt_create->id = strdup(u->id);
5301 if (!rt_create->id)
5302 return log_oom();
5303
5304 rt = rt_create;
5305 }
5306
5307 if (streq(key, "tmp-dir")) {
5308 char *copy;
5309
613b411c
LP
5310 copy = strdup(value);
5311 if (!copy)
5312 return log_oom();
5313
e8a565cb 5314 free_and_replace(rt->tmp_dir, copy);
613b411c
LP
5315
5316 } else if (streq(key, "var-tmp-dir")) {
5317 char *copy;
5318
613b411c
LP
5319 copy = strdup(value);
5320 if (!copy)
5321 return log_oom();
5322
e8a565cb 5323 free_and_replace(rt->var_tmp_dir, copy);
613b411c
LP
5324
5325 } else if (streq(key, "netns-socket-0")) {
5326 int fd;
5327
e8a565cb 5328 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5329 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5330 return 0;
613b411c 5331 }
e8a565cb
YW
5332
5333 safe_close(rt->netns_storage_socket[0]);
5334 rt->netns_storage_socket[0] = fdset_remove(fds, fd);
5335
613b411c
LP
5336 } else if (streq(key, "netns-socket-1")) {
5337 int fd;
5338
e8a565cb 5339 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5340 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5341 return 0;
613b411c 5342 }
e8a565cb
YW
5343
5344 safe_close(rt->netns_storage_socket[1]);
5345 rt->netns_storage_socket[1] = fdset_remove(fds, fd);
613b411c
LP
5346 } else
5347 return 0;
5348
e8a565cb
YW
5349 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
5350 if (rt_create) {
5351 r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
5352 if (r < 0) {
3fe91079 5353 log_unit_debug_errno(u, r, "Failed to put runtime parameter to manager's storage: %m");
e8a565cb
YW
5354 return 0;
5355 }
613b411c 5356
e8a565cb 5357 rt_create->manager = u->manager;
613b411c 5358
e8a565cb
YW
5359 /* Avoid cleanup */
5360 rt_create = NULL;
5361 }
98b47d54 5362
e8a565cb
YW
5363 return 1;
5364}
613b411c 5365
e8a565cb
YW
5366void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
5367 char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
5368 int r, fd0 = -1, fd1 = -1;
5369 const char *p, *v = value;
5370 size_t n;
613b411c 5371
e8a565cb
YW
5372 assert(m);
5373 assert(value);
5374 assert(fds);
98b47d54 5375
e8a565cb
YW
5376 n = strcspn(v, " ");
5377 id = strndupa(v, n);
5378 if (v[n] != ' ')
5379 goto finalize;
5380 p = v + n + 1;
5381
5382 v = startswith(p, "tmp-dir=");
5383 if (v) {
5384 n = strcspn(v, " ");
5385 tmp_dir = strndupa(v, n);
5386 if (v[n] != ' ')
5387 goto finalize;
5388 p = v + n + 1;
5389 }
5390
5391 v = startswith(p, "var-tmp-dir=");
5392 if (v) {
5393 n = strcspn(v, " ");
5394 var_tmp_dir = strndupa(v, n);
5395 if (v[n] != ' ')
5396 goto finalize;
5397 p = v + n + 1;
5398 }
5399
5400 v = startswith(p, "netns-socket-0=");
5401 if (v) {
5402 char *buf;
5403
5404 n = strcspn(v, " ");
5405 buf = strndupa(v, n);
5406 if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
5407 log_debug("Unable to process exec-runtime netns fd specification.");
5408 return;
98b47d54 5409 }
e8a565cb
YW
5410 fd0 = fdset_remove(fds, fd0);
5411 if (v[n] != ' ')
5412 goto finalize;
5413 p = v + n + 1;
613b411c
LP
5414 }
5415
e8a565cb
YW
5416 v = startswith(p, "netns-socket-1=");
5417 if (v) {
5418 char *buf;
98b47d54 5419
e8a565cb
YW
5420 n = strcspn(v, " ");
5421 buf = strndupa(v, n);
5422 if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
5423 log_debug("Unable to process exec-runtime netns fd specification.");
5424 return;
98b47d54 5425 }
e8a565cb
YW
5426 fd1 = fdset_remove(fds, fd1);
5427 }
98b47d54 5428
e8a565cb
YW
5429finalize:
5430
5431 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
7d853ca6 5432 if (r < 0)
e8a565cb 5433 log_debug_errno(r, "Failed to add exec-runtime: %m");
e8a565cb 5434}
613b411c 5435
e8a565cb
YW
5436void exec_runtime_vacuum(Manager *m) {
5437 ExecRuntime *rt;
5438 Iterator i;
5439
5440 assert(m);
5441
5442 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
5443
5444 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5445 if (rt->n_ref > 0)
5446 continue;
5447
5448 (void) exec_runtime_free(rt, false);
5449 }
613b411c
LP
5450}
5451
b9c04eaf
YW
5452void exec_params_clear(ExecParameters *p) {
5453 if (!p)
5454 return;
5455
5456 strv_free(p->environment);
5457}
5458
80876c20
LP
5459static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
5460 [EXEC_INPUT_NULL] = "null",
5461 [EXEC_INPUT_TTY] = "tty",
5462 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 5463 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
5464 [EXEC_INPUT_SOCKET] = "socket",
5465 [EXEC_INPUT_NAMED_FD] = "fd",
08f3be7a 5466 [EXEC_INPUT_DATA] = "data",
2038c3f5 5467 [EXEC_INPUT_FILE] = "file",
80876c20
LP
5468};
5469
8a0867d6
LP
5470DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
5471
94f04347 5472static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 5473 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 5474 [EXEC_OUTPUT_NULL] = "null",
80876c20 5475 [EXEC_OUTPUT_TTY] = "tty",
94f04347 5476 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 5477 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 5478 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 5479 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
5480 [EXEC_OUTPUT_JOURNAL] = "journal",
5481 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
5482 [EXEC_OUTPUT_SOCKET] = "socket",
5483 [EXEC_OUTPUT_NAMED_FD] = "fd",
2038c3f5 5484 [EXEC_OUTPUT_FILE] = "file",
566b7d23 5485 [EXEC_OUTPUT_FILE_APPEND] = "append",
94f04347
LP
5486};
5487
5488DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
5489
5490static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
5491 [EXEC_UTMP_INIT] = "init",
5492 [EXEC_UTMP_LOGIN] = "login",
5493 [EXEC_UTMP_USER] = "user",
5494};
5495
5496DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
5497
5498static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
5499 [EXEC_PRESERVE_NO] = "no",
5500 [EXEC_PRESERVE_YES] = "yes",
5501 [EXEC_PRESERVE_RESTART] = "restart",
5502};
5503
5504DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 5505
72fd1768 5506static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
5507 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
5508 [EXEC_DIRECTORY_STATE] = "StateDirectory",
5509 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
5510 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
5511 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
5512};
5513
5514DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445 5515
fb2042dd
YW
5516static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5517 [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
5518 [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
5519 [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
5520 [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
5521 [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
5522};
5523
5524DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType);
5525
b1edf445
LP
5526static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
5527 [EXEC_KEYRING_INHERIT] = "inherit",
5528 [EXEC_KEYRING_PRIVATE] = "private",
5529 [EXEC_KEYRING_SHARED] = "shared",
5530};
5531
5532DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);