]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
pid1: rename start_limit to start_ratelimit
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
a7334b09 2
034c6ed7
LP
3#include <errno.h>
4#include <fcntl.h>
8dd4c05b
LP
5#include <glob.h>
6#include <grp.h>
7#include <poll.h>
309bff19 8#include <signal.h>
8dd4c05b 9#include <string.h>
19c0b0b9 10#include <sys/capability.h>
d251207d 11#include <sys/eventfd.h>
f3e43635 12#include <sys/mman.h>
8dd4c05b 13#include <sys/personality.h>
94f04347 14#include <sys/prctl.h>
d2ffa389 15#include <sys/shm.h>
8dd4c05b 16#include <sys/socket.h>
451a074f 17#include <sys/stat.h>
d2ffa389 18#include <sys/types.h>
8dd4c05b
LP
19#include <sys/un.h>
20#include <unistd.h>
023a4f67 21#include <utmpx.h>
5cb5a6ff 22
349cc4a5 23#if HAVE_PAM
5b6319dc
LP
24#include <security/pam_appl.h>
25#endif
26
349cc4a5 27#if HAVE_SELINUX
7b52a628
MS
28#include <selinux/selinux.h>
29#endif
30
349cc4a5 31#if HAVE_SECCOMP
17df7223
LP
32#include <seccomp.h>
33#endif
34
349cc4a5 35#if HAVE_APPARMOR
eef65bf3
MS
36#include <sys/apparmor.h>
37#endif
38
24882e06 39#include "sd-messages.h"
8dd4c05b
LP
40
41#include "af-list.h"
b5efdb8a 42#include "alloc-util.h"
349cc4a5 43#if HAVE_APPARMOR
3ffd4af2
LP
44#include "apparmor-util.h"
45#endif
8dd4c05b
LP
46#include "async.h"
47#include "barrier.h"
8dd4c05b 48#include "cap-list.h"
430f0182 49#include "capability-util.h"
a1164ae3 50#include "chown-recursive.h"
da681e1b 51#include "cpu-set-util.h"
f6a6225e 52#include "def.h"
686d13b9 53#include "env-file.h"
4d1a6904 54#include "env-util.h"
17df7223 55#include "errno-list.h"
3ffd4af2 56#include "execute.h"
8dd4c05b 57#include "exit-status.h"
3ffd4af2 58#include "fd-util.h"
f97b34a6 59#include "format-util.h"
f4f15635 60#include "fs-util.h"
7d50b32a 61#include "glob-util.h"
c004493c 62#include "io-util.h"
8dd4c05b 63#include "ioprio.h"
a1164ae3 64#include "label.h"
8dd4c05b
LP
65#include "log.h"
66#include "macro.h"
e8a565cb 67#include "manager.h"
0a970718 68#include "memory-util.h"
8dd4c05b
LP
69#include "missing.h"
70#include "mkdir.h"
71#include "namespace.h"
6bedfcbb 72#include "parse-util.h"
8dd4c05b 73#include "path-util.h"
0b452006 74#include "process-util.h"
78f22b97 75#include "rlimit-util.h"
8dd4c05b 76#include "rm-rf.h"
349cc4a5 77#if HAVE_SECCOMP
3ffd4af2
LP
78#include "seccomp-util.h"
79#endif
07d46372 80#include "securebits-util.h"
8dd4c05b 81#include "selinux-util.h"
24882e06 82#include "signal-util.h"
8dd4c05b 83#include "smack-util.h"
57b7a260 84#include "socket-util.h"
fd63e712 85#include "special.h"
949befd3 86#include "stat-util.h"
8b43440b 87#include "string-table.h"
07630cea 88#include "string-util.h"
8dd4c05b 89#include "strv.h"
7ccbd1ae 90#include "syslog-util.h"
8dd4c05b 91#include "terminal-util.h"
566b7d23 92#include "umask-util.h"
8dd4c05b 93#include "unit.h"
b1d4f8e1 94#include "user-util.h"
8dd4c05b 95#include "utmp-wtmp.h"
5cb5a6ff 96
e056b01d 97#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 98#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 99
531dca78
LP
100#define SNDBUF_SIZE (8*1024*1024)
101
da6053d0 102static int shift_fds(int fds[], size_t n_fds) {
034c6ed7
LP
103 int start, restart_from;
104
105 if (n_fds <= 0)
106 return 0;
107
a0d40ac5
LP
108 /* Modifies the fds array! (sorts it) */
109
034c6ed7
LP
110 assert(fds);
111
112 start = 0;
113 for (;;) {
114 int i;
115
116 restart_from = -1;
117
118 for (i = start; i < (int) n_fds; i++) {
119 int nfd;
120
121 /* Already at right index? */
122 if (fds[i] == i+3)
123 continue;
124
3cc2aff1
LP
125 nfd = fcntl(fds[i], F_DUPFD, i + 3);
126 if (nfd < 0)
034c6ed7
LP
127 return -errno;
128
03e334a1 129 safe_close(fds[i]);
034c6ed7
LP
130 fds[i] = nfd;
131
132 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 133 * let's remember that and try again from here */
034c6ed7
LP
134 if (nfd != i+3 && restart_from < 0)
135 restart_from = i;
136 }
137
138 if (restart_from < 0)
139 break;
140
141 start = restart_from;
142 }
143
144 return 0;
145}
146
25b583d7 147static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
da6053d0 148 size_t i, n_fds;
e2c76839 149 int r;
47a71eed 150
25b583d7 151 n_fds = n_socket_fds + n_storage_fds;
47a71eed
LP
152 if (n_fds <= 0)
153 return 0;
154
155 assert(fds);
156
9b141911
FB
157 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
158 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
159
160 for (i = 0; i < n_fds; i++) {
47a71eed 161
9b141911
FB
162 if (i < n_socket_fds) {
163 r = fd_nonblock(fds[i], nonblock);
164 if (r < 0)
165 return r;
166 }
47a71eed 167
451a074f
LP
168 /* We unconditionally drop FD_CLOEXEC from the fds,
169 * since after all we want to pass these fds to our
170 * children */
47a71eed 171
3cc2aff1
LP
172 r = fd_cloexec(fds[i], false);
173 if (r < 0)
e2c76839 174 return r;
47a71eed
LP
175 }
176
177 return 0;
178}
179
1e22b5cd 180static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
181 assert(context);
182
1e22b5cd
LP
183 if (context->stdio_as_fds)
184 return NULL;
185
80876c20
LP
186 if (context->tty_path)
187 return context->tty_path;
188
189 return "/dev/console";
190}
191
1e22b5cd
LP
192static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
193 const char *path;
194
6ea832a2
LP
195 assert(context);
196
1e22b5cd 197 path = exec_context_tty_path(context);
6ea832a2 198
1e22b5cd
LP
199 if (context->tty_vhangup) {
200 if (p && p->stdin_fd >= 0)
201 (void) terminal_vhangup_fd(p->stdin_fd);
202 else if (path)
203 (void) terminal_vhangup(path);
204 }
6ea832a2 205
1e22b5cd
LP
206 if (context->tty_reset) {
207 if (p && p->stdin_fd >= 0)
208 (void) reset_terminal_fd(p->stdin_fd, true);
209 else if (path)
210 (void) reset_terminal(path);
211 }
212
213 if (context->tty_vt_disallocate && path)
214 (void) vt_disallocate(path);
6ea832a2
LP
215}
216
6af760f3
LP
217static bool is_terminal_input(ExecInput i) {
218 return IN_SET(i,
219 EXEC_INPUT_TTY,
220 EXEC_INPUT_TTY_FORCE,
221 EXEC_INPUT_TTY_FAIL);
222}
223
3a1286b6 224static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
225 return IN_SET(o,
226 EXEC_OUTPUT_TTY,
227 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
228 EXEC_OUTPUT_KMSG_AND_CONSOLE,
229 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
230}
231
aac8c0c3
LP
232static bool is_syslog_output(ExecOutput o) {
233 return IN_SET(o,
234 EXEC_OUTPUT_SYSLOG,
235 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
236}
237
238static bool is_kmsg_output(ExecOutput o) {
239 return IN_SET(o,
240 EXEC_OUTPUT_KMSG,
241 EXEC_OUTPUT_KMSG_AND_CONSOLE);
242}
243
6af760f3
LP
244static bool exec_context_needs_term(const ExecContext *c) {
245 assert(c);
246
247 /* Return true if the execution context suggests we should set $TERM to something useful. */
248
249 if (is_terminal_input(c->std_input))
250 return true;
251
252 if (is_terminal_output(c->std_output))
253 return true;
254
255 if (is_terminal_output(c->std_error))
256 return true;
257
258 return !!c->tty_path;
3a1286b6
MS
259}
260
80876c20 261static int open_null_as(int flags, int nfd) {
046a82c1 262 int fd;
071830ff 263
80876c20 264 assert(nfd >= 0);
071830ff 265
613b411c
LP
266 fd = open("/dev/null", flags|O_NOCTTY);
267 if (fd < 0)
071830ff
LP
268 return -errno;
269
046a82c1 270 return move_fd(fd, nfd, false);
071830ff
LP
271}
272
524daa8c 273static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 274 static const union sockaddr_union sa = {
b92bea5d
ZJS
275 .un.sun_family = AF_UNIX,
276 .un.sun_path = "/run/systemd/journal/stdout",
277 };
524daa8c
ZJS
278 uid_t olduid = UID_INVALID;
279 gid_t oldgid = GID_INVALID;
280 int r;
281
cad93f29 282 if (gid_is_valid(gid)) {
524daa8c
ZJS
283 oldgid = getgid();
284
92a17af9 285 if (setegid(gid) < 0)
524daa8c
ZJS
286 return -errno;
287 }
288
cad93f29 289 if (uid_is_valid(uid)) {
524daa8c
ZJS
290 olduid = getuid();
291
92a17af9 292 if (seteuid(uid) < 0) {
524daa8c
ZJS
293 r = -errno;
294 goto restore_gid;
295 }
296 }
297
92a17af9 298 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
299
300 /* If we fail to restore the uid or gid, things will likely
301 fail later on. This should only happen if an LSM interferes. */
302
cad93f29 303 if (uid_is_valid(uid))
524daa8c
ZJS
304 (void) seteuid(olduid);
305
306 restore_gid:
cad93f29 307 if (gid_is_valid(gid))
524daa8c
ZJS
308 (void) setegid(oldgid);
309
310 return r;
311}
312
fd1f9c89 313static int connect_logger_as(
34cf6c43 314 const Unit *unit,
fd1f9c89 315 const ExecContext *context,
af635cf3 316 const ExecParameters *params,
fd1f9c89
LP
317 ExecOutput output,
318 const char *ident,
fd1f9c89
LP
319 int nfd,
320 uid_t uid,
321 gid_t gid) {
322
2ac1ff68
EV
323 _cleanup_close_ int fd = -1;
324 int r;
071830ff
LP
325
326 assert(context);
af635cf3 327 assert(params);
80876c20
LP
328 assert(output < _EXEC_OUTPUT_MAX);
329 assert(ident);
330 assert(nfd >= 0);
071830ff 331
54fe0cdb
LP
332 fd = socket(AF_UNIX, SOCK_STREAM, 0);
333 if (fd < 0)
80876c20 334 return -errno;
071830ff 335
524daa8c
ZJS
336 r = connect_journal_socket(fd, uid, gid);
337 if (r < 0)
338 return r;
071830ff 339
2ac1ff68 340 if (shutdown(fd, SHUT_RD) < 0)
80876c20 341 return -errno;
071830ff 342
fd1f9c89 343 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 344
2ac1ff68 345 if (dprintf(fd,
62bca2c6 346 "%s\n"
80876c20
LP
347 "%s\n"
348 "%i\n"
54fe0cdb
LP
349 "%i\n"
350 "%i\n"
351 "%i\n"
4f4a1dbf 352 "%i\n",
c867611e 353 context->syslog_identifier ?: ident,
af635cf3 354 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
355 context->syslog_priority,
356 !!context->syslog_level_prefix,
aac8c0c3
LP
357 is_syslog_output(output),
358 is_kmsg_output(output),
2ac1ff68
EV
359 is_terminal_output(output)) < 0)
360 return -errno;
80876c20 361
2ac1ff68 362 return move_fd(TAKE_FD(fd), nfd, false);
80876c20 363}
2ac1ff68 364
3a274a21 365static int open_terminal_as(const char *path, int flags, int nfd) {
046a82c1 366 int fd;
071830ff 367
80876c20
LP
368 assert(path);
369 assert(nfd >= 0);
fd1f9c89 370
3a274a21 371 fd = open_terminal(path, flags | O_NOCTTY);
3cc2aff1 372 if (fd < 0)
80876c20 373 return fd;
071830ff 374
046a82c1 375 return move_fd(fd, nfd, false);
80876c20 376}
071830ff 377
2038c3f5 378static int acquire_path(const char *path, int flags, mode_t mode) {
15a3e96f
LP
379 union sockaddr_union sa = {};
380 _cleanup_close_ int fd = -1;
381 int r, salen;
071830ff 382
80876c20 383 assert(path);
071830ff 384
2038c3f5
LP
385 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
386 flags |= O_CREAT;
387
388 fd = open(path, flags|O_NOCTTY, mode);
389 if (fd >= 0)
15a3e96f 390 return TAKE_FD(fd);
071830ff 391
2038c3f5
LP
392 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
393 return -errno;
15a3e96f 394 if (strlen(path) >= sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
2038c3f5
LP
395 return -ENXIO;
396
397 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
398
399 fd = socket(AF_UNIX, SOCK_STREAM, 0);
400 if (fd < 0)
401 return -errno;
402
15a3e96f
LP
403 salen = sockaddr_un_set_path(&sa.un, path);
404 if (salen < 0)
405 return salen;
406
407 if (connect(fd, &sa.sa, salen) < 0)
2038c3f5
LP
408 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
409 * indication that his wasn't an AF_UNIX socket after all */
071830ff 410
2038c3f5
LP
411 if ((flags & O_ACCMODE) == O_RDONLY)
412 r = shutdown(fd, SHUT_WR);
413 else if ((flags & O_ACCMODE) == O_WRONLY)
414 r = shutdown(fd, SHUT_RD);
415 else
15a3e96f
LP
416 return TAKE_FD(fd);
417 if (r < 0)
2038c3f5 418 return -errno;
2038c3f5 419
15a3e96f 420 return TAKE_FD(fd);
80876c20 421}
071830ff 422
08f3be7a
LP
423static int fixup_input(
424 const ExecContext *context,
425 int socket_fd,
426 bool apply_tty_stdin) {
427
428 ExecInput std_input;
429
430 assert(context);
431
432 std_input = context->std_input;
1e3ad081
LP
433
434 if (is_terminal_input(std_input) && !apply_tty_stdin)
435 return EXEC_INPUT_NULL;
071830ff 436
03fd9c49 437 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
438 return EXEC_INPUT_NULL;
439
08f3be7a
LP
440 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
441 return EXEC_INPUT_NULL;
442
03fd9c49 443 return std_input;
4f2d528d
LP
444}
445
03fd9c49 446static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 447
03fd9c49 448 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
449 return EXEC_OUTPUT_INHERIT;
450
03fd9c49 451 return std_output;
4f2d528d
LP
452}
453
a34ceba6
LP
454static int setup_input(
455 const ExecContext *context,
456 const ExecParameters *params,
52c239d7 457 int socket_fd,
2caa38e9 458 const int named_iofds[static 3]) {
a34ceba6 459
4f2d528d
LP
460 ExecInput i;
461
462 assert(context);
a34ceba6 463 assert(params);
2caa38e9 464 assert(named_iofds);
a34ceba6
LP
465
466 if (params->stdin_fd >= 0) {
467 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
468 return -errno;
469
470 /* Try to make this the controlling tty, if it is a tty, and reset it */
1fb0682e
LP
471 if (isatty(STDIN_FILENO)) {
472 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
473 (void) reset_terminal_fd(STDIN_FILENO, true);
474 }
a34ceba6
LP
475
476 return STDIN_FILENO;
477 }
4f2d528d 478
08f3be7a 479 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
480
481 switch (i) {
071830ff 482
80876c20
LP
483 case EXEC_INPUT_NULL:
484 return open_null_as(O_RDONLY, STDIN_FILENO);
485
486 case EXEC_INPUT_TTY:
487 case EXEC_INPUT_TTY_FORCE:
488 case EXEC_INPUT_TTY_FAIL: {
046a82c1 489 int fd;
071830ff 490
1e22b5cd 491 fd = acquire_terminal(exec_context_tty_path(context),
8854d795
LP
492 i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
493 i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
494 ACQUIRE_TERMINAL_WAIT,
3a43da28 495 USEC_INFINITY);
970edce6 496 if (fd < 0)
80876c20
LP
497 return fd;
498
046a82c1 499 return move_fd(fd, STDIN_FILENO, false);
80876c20
LP
500 }
501
4f2d528d 502 case EXEC_INPUT_SOCKET:
e75a9ed1
LP
503 assert(socket_fd >= 0);
504
4f2d528d
LP
505 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
506
52c239d7 507 case EXEC_INPUT_NAMED_FD:
e75a9ed1
LP
508 assert(named_iofds[STDIN_FILENO] >= 0);
509
52c239d7
LB
510 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
511 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
512
08f3be7a
LP
513 case EXEC_INPUT_DATA: {
514 int fd;
515
516 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
517 if (fd < 0)
518 return fd;
519
520 return move_fd(fd, STDIN_FILENO, false);
521 }
522
2038c3f5
LP
523 case EXEC_INPUT_FILE: {
524 bool rw;
525 int fd;
526
527 assert(context->stdio_file[STDIN_FILENO]);
528
529 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
530 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
531
532 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
533 if (fd < 0)
534 return fd;
535
536 return move_fd(fd, STDIN_FILENO, false);
537 }
538
80876c20
LP
539 default:
540 assert_not_reached("Unknown input type");
541 }
542}
543
41fc585a
LP
544static bool can_inherit_stderr_from_stdout(
545 const ExecContext *context,
546 ExecOutput o,
547 ExecOutput e) {
548
549 assert(context);
550
551 /* Returns true, if given the specified STDERR and STDOUT output we can directly dup() the stdout fd to the
552 * stderr fd */
553
554 if (e == EXEC_OUTPUT_INHERIT)
555 return true;
556 if (e != o)
557 return false;
558
559 if (e == EXEC_OUTPUT_NAMED_FD)
560 return streq_ptr(context->stdio_fdname[STDOUT_FILENO], context->stdio_fdname[STDERR_FILENO]);
561
562 if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND))
563 return streq_ptr(context->stdio_file[STDOUT_FILENO], context->stdio_file[STDERR_FILENO]);
564
565 return true;
566}
567
a34ceba6 568static int setup_output(
34cf6c43 569 const Unit *unit,
a34ceba6
LP
570 const ExecContext *context,
571 const ExecParameters *params,
572 int fileno,
573 int socket_fd,
2caa38e9 574 const int named_iofds[static 3],
a34ceba6 575 const char *ident,
7bce046b
LP
576 uid_t uid,
577 gid_t gid,
578 dev_t *journal_stream_dev,
579 ino_t *journal_stream_ino) {
a34ceba6 580
4f2d528d
LP
581 ExecOutput o;
582 ExecInput i;
47c1d80d 583 int r;
4f2d528d 584
f2341e0a 585 assert(unit);
80876c20 586 assert(context);
a34ceba6 587 assert(params);
80876c20 588 assert(ident);
7bce046b
LP
589 assert(journal_stream_dev);
590 assert(journal_stream_ino);
80876c20 591
a34ceba6
LP
592 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
593
594 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
595 return -errno;
596
597 return STDOUT_FILENO;
598 }
599
600 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
601 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
602 return -errno;
603
604 return STDERR_FILENO;
605 }
606
08f3be7a 607 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 608 o = fixup_output(context->std_output, socket_fd);
4f2d528d 609
eb17e935
MS
610 if (fileno == STDERR_FILENO) {
611 ExecOutput e;
612 e = fixup_output(context->std_error, socket_fd);
80876c20 613
eb17e935
MS
614 /* This expects the input and output are already set up */
615
616 /* Don't change the stderr file descriptor if we inherit all
617 * the way and are not on a tty */
618 if (e == EXEC_OUTPUT_INHERIT &&
619 o == EXEC_OUTPUT_INHERIT &&
620 i == EXEC_INPUT_NULL &&
621 !is_terminal_input(context->std_input) &&
622 getppid () != 1)
623 return fileno;
624
625 /* Duplicate from stdout if possible */
41fc585a 626 if (can_inherit_stderr_from_stdout(context, o, e))
eb17e935 627 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 628
eb17e935 629 o = e;
80876c20 630
eb17e935 631 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
632 /* If input got downgraded, inherit the original value */
633 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 634 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 635
08f3be7a
LP
636 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
637 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
eb17e935 638 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 639
acb591e4
LP
640 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
641 if (getppid() != 1)
eb17e935 642 return fileno;
94f04347 643
eb17e935
MS
644 /* We need to open /dev/null here anew, to get the right access mode. */
645 return open_null_as(O_WRONLY, fileno);
071830ff 646 }
94f04347 647
eb17e935 648 switch (o) {
80876c20
LP
649
650 case EXEC_OUTPUT_NULL:
eb17e935 651 return open_null_as(O_WRONLY, fileno);
80876c20
LP
652
653 case EXEC_OUTPUT_TTY:
4f2d528d 654 if (is_terminal_input(i))
eb17e935 655 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
656
657 /* We don't reset the terminal if this is just about output */
1e22b5cd 658 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
659
660 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 661 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 662 case EXEC_OUTPUT_KMSG:
28dbc1e8 663 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
664 case EXEC_OUTPUT_JOURNAL:
665 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 666 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 667 if (r < 0) {
82677ae4 668 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 669 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
670 } else {
671 struct stat st;
672
673 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
674 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
675 * services to detect whether they are connected to the journal or not.
676 *
677 * If both stdout and stderr are connected to a stream then let's make sure to store the data
678 * about STDERR as that's usually the best way to do logging. */
7bce046b 679
ab2116b1
LP
680 if (fstat(fileno, &st) >= 0 &&
681 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
682 *journal_stream_dev = st.st_dev;
683 *journal_stream_ino = st.st_ino;
684 }
47c1d80d
MS
685 }
686 return r;
4f2d528d
LP
687
688 case EXEC_OUTPUT_SOCKET:
689 assert(socket_fd >= 0);
e75a9ed1 690
eb17e935 691 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 692
52c239d7 693 case EXEC_OUTPUT_NAMED_FD:
e75a9ed1
LP
694 assert(named_iofds[fileno] >= 0);
695
52c239d7
LB
696 (void) fd_nonblock(named_iofds[fileno], false);
697 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
698
566b7d23
ZD
699 case EXEC_OUTPUT_FILE:
700 case EXEC_OUTPUT_FILE_APPEND: {
2038c3f5 701 bool rw;
566b7d23 702 int fd, flags;
2038c3f5
LP
703
704 assert(context->stdio_file[fileno]);
705
706 rw = context->std_input == EXEC_INPUT_FILE &&
707 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
708
709 if (rw)
710 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
711
566b7d23
ZD
712 flags = O_WRONLY;
713 if (o == EXEC_OUTPUT_FILE_APPEND)
714 flags |= O_APPEND;
715
716 fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
2038c3f5
LP
717 if (fd < 0)
718 return fd;
719
566b7d23 720 return move_fd(fd, fileno, 0);
2038c3f5
LP
721 }
722
94f04347 723 default:
80876c20 724 assert_not_reached("Unknown error type");
94f04347 725 }
071830ff
LP
726}
727
02a51aba 728static int chown_terminal(int fd, uid_t uid) {
4b3b5bc7 729 int r;
02a51aba
LP
730
731 assert(fd >= 0);
02a51aba 732
1ff74fb6 733 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
4b3b5bc7
LP
734 if (isatty(fd) < 1) {
735 if (IN_SET(errno, EINVAL, ENOTTY))
736 return 0; /* not a tty */
1ff74fb6 737
02a51aba 738 return -errno;
4b3b5bc7 739 }
02a51aba 740
4b3b5bc7
LP
741 /* This might fail. What matters are the results. */
742 r = fchmod_and_chown(fd, TTY_MODE, uid, -1);
743 if (r < 0)
744 return r;
02a51aba 745
4b3b5bc7 746 return 1;
02a51aba
LP
747}
748
7d5ceb64 749static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
750 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
751 int r;
80876c20 752
80876c20
LP
753 assert(_saved_stdin);
754 assert(_saved_stdout);
755
af6da548
LP
756 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
757 if (saved_stdin < 0)
758 return -errno;
80876c20 759
af6da548 760 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
761 if (saved_stdout < 0)
762 return -errno;
80876c20 763
8854d795 764 fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
3d18b167
LP
765 if (fd < 0)
766 return fd;
80876c20 767
af6da548
LP
768 r = chown_terminal(fd, getuid());
769 if (r < 0)
3d18b167 770 return r;
02a51aba 771
3d18b167
LP
772 r = reset_terminal_fd(fd, true);
773 if (r < 0)
774 return r;
80876c20 775
2b33ab09 776 r = rearrange_stdio(fd, fd, STDERR_FILENO);
3d18b167 777 fd = -1;
2b33ab09
LP
778 if (r < 0)
779 return r;
80876c20
LP
780
781 *_saved_stdin = saved_stdin;
782 *_saved_stdout = saved_stdout;
783
3d18b167 784 saved_stdin = saved_stdout = -1;
80876c20 785
3d18b167 786 return 0;
80876c20
LP
787}
788
63d77c92 789static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
790 assert(err < 0);
791
792 if (err == -ETIMEDOUT)
63d77c92 793 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
794 else {
795 errno = -err;
63d77c92 796 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
797 }
798}
799
63d77c92 800static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 801 _cleanup_close_ int fd = -1;
80876c20 802
3b20f877 803 assert(vc);
80876c20 804
7d5ceb64 805 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 806 if (fd < 0)
3b20f877 807 return;
80876c20 808
63d77c92 809 write_confirm_error_fd(err, fd, u);
af6da548 810}
80876c20 811
3d18b167 812static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 813 int r = 0;
80876c20 814
af6da548
LP
815 assert(saved_stdin);
816 assert(saved_stdout);
817
818 release_terminal();
819
820 if (*saved_stdin >= 0)
80876c20 821 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 822 r = -errno;
80876c20 823
af6da548 824 if (*saved_stdout >= 0)
80876c20 825 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 826 r = -errno;
80876c20 827
3d18b167
LP
828 *saved_stdin = safe_close(*saved_stdin);
829 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
830
831 return r;
832}
833
3b20f877
FB
834enum {
835 CONFIRM_PRETEND_FAILURE = -1,
836 CONFIRM_PRETEND_SUCCESS = 0,
837 CONFIRM_EXECUTE = 1,
838};
839
eedf223a 840static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 841 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 842 _cleanup_free_ char *e = NULL;
3b20f877 843 char c;
af6da548 844
3b20f877 845 /* For any internal errors, assume a positive response. */
7d5ceb64 846 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 847 if (r < 0) {
63d77c92 848 write_confirm_error(r, vc, u);
3b20f877
FB
849 return CONFIRM_EXECUTE;
850 }
af6da548 851
b0eb2944
FB
852 /* confirm_spawn might have been disabled while we were sleeping. */
853 if (manager_is_confirm_spawn_disabled(u->manager)) {
854 r = 1;
855 goto restore_stdio;
856 }
af6da548 857
2bcd3c26
FB
858 e = ellipsize(cmdline, 60, 100);
859 if (!e) {
860 log_oom();
861 r = CONFIRM_EXECUTE;
862 goto restore_stdio;
863 }
af6da548 864
d172b175 865 for (;;) {
539622bd 866 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 867 if (r < 0) {
63d77c92 868 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
869 r = CONFIRM_EXECUTE;
870 goto restore_stdio;
871 }
af6da548 872
d172b175 873 switch (c) {
b0eb2944
FB
874 case 'c':
875 printf("Resuming normal execution.\n");
876 manager_disable_confirm_spawn();
877 r = 1;
878 break;
dd6f9ac0
FB
879 case 'D':
880 unit_dump(u, stdout, " ");
881 continue; /* ask again */
d172b175
FB
882 case 'f':
883 printf("Failing execution.\n");
884 r = CONFIRM_PRETEND_FAILURE;
885 break;
886 case 'h':
b0eb2944
FB
887 printf(" c - continue, proceed without asking anymore\n"
888 " D - dump, show the state of the unit\n"
dd6f9ac0 889 " f - fail, don't execute the command and pretend it failed\n"
d172b175 890 " h - help\n"
eedf223a 891 " i - info, show a short summary of the unit\n"
56fde33a 892 " j - jobs, show jobs that are in progress\n"
d172b175
FB
893 " s - skip, don't execute the command and pretend it succeeded\n"
894 " y - yes, execute the command\n");
dd6f9ac0 895 continue; /* ask again */
eedf223a
FB
896 case 'i':
897 printf(" Description: %s\n"
898 " Unit: %s\n"
899 " Command: %s\n",
900 u->id, u->description, cmdline);
901 continue; /* ask again */
56fde33a
FB
902 case 'j':
903 manager_dump_jobs(u->manager, stdout, " ");
904 continue; /* ask again */
539622bd
FB
905 case 'n':
906 /* 'n' was removed in favor of 'f'. */
907 printf("Didn't understand 'n', did you mean 'f'?\n");
908 continue; /* ask again */
d172b175
FB
909 case 's':
910 printf("Skipping execution.\n");
911 r = CONFIRM_PRETEND_SUCCESS;
912 break;
913 case 'y':
914 r = CONFIRM_EXECUTE;
915 break;
916 default:
917 assert_not_reached("Unhandled choice");
918 }
3b20f877 919 break;
3b20f877 920 }
af6da548 921
3b20f877 922restore_stdio:
af6da548 923 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 924 return r;
80876c20
LP
925}
926
4d885bd3
DH
927static int get_fixed_user(const ExecContext *c, const char **user,
928 uid_t *uid, gid_t *gid,
929 const char **home, const char **shell) {
81a2b7ce 930 int r;
4d885bd3 931 const char *name;
81a2b7ce 932
4d885bd3 933 assert(c);
81a2b7ce 934
23deef88
LP
935 if (!c->user)
936 return 0;
937
4d885bd3
DH
938 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
939 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 940
23deef88 941 name = c->user;
fafff8f1 942 r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN);
4d885bd3
DH
943 if (r < 0)
944 return r;
81a2b7ce 945
4d885bd3
DH
946 *user = name;
947 return 0;
948}
949
950static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
951 int r;
952 const char *name;
953
954 assert(c);
955
956 if (!c->group)
957 return 0;
958
959 name = c->group;
fafff8f1 960 r = get_group_creds(&name, gid, 0);
4d885bd3
DH
961 if (r < 0)
962 return r;
963
964 *group = name;
965 return 0;
966}
967
cdc5d5c5
DH
968static int get_supplementary_groups(const ExecContext *c, const char *user,
969 const char *group, gid_t gid,
970 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
971 char **i;
972 int r, k = 0;
973 int ngroups_max;
974 bool keep_groups = false;
975 gid_t *groups = NULL;
976 _cleanup_free_ gid_t *l_gids = NULL;
977
978 assert(c);
979
bbeea271
DH
980 /*
981 * If user is given, then lookup GID and supplementary groups list.
982 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
983 * here and as early as possible so we keep the list of supplementary
984 * groups of the caller.
bbeea271
DH
985 */
986 if (user && gid_is_valid(gid) && gid != 0) {
987 /* First step, initialize groups from /etc/groups */
988 if (initgroups(user, gid) < 0)
989 return -errno;
990
991 keep_groups = true;
992 }
993
ac6e8be6 994 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
995 return 0;
996
366ddd25
DH
997 /*
998 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
999 * be positive, otherwise fail.
1000 */
1001 errno = 0;
1002 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
66855de7
LP
1003 if (ngroups_max <= 0)
1004 return errno_or_else(EOPNOTSUPP);
366ddd25 1005
4d885bd3
DH
1006 l_gids = new(gid_t, ngroups_max);
1007 if (!l_gids)
1008 return -ENOMEM;
81a2b7ce 1009
4d885bd3
DH
1010 if (keep_groups) {
1011 /*
1012 * Lookup the list of groups that the user belongs to, we
1013 * avoid NSS lookups here too for gid=0.
1014 */
1015 k = ngroups_max;
1016 if (getgrouplist(user, gid, l_gids, &k) < 0)
1017 return -EINVAL;
1018 } else
1019 k = 0;
81a2b7ce 1020
4d885bd3
DH
1021 STRV_FOREACH(i, c->supplementary_groups) {
1022 const char *g;
81a2b7ce 1023
4d885bd3
DH
1024 if (k >= ngroups_max)
1025 return -E2BIG;
81a2b7ce 1026
4d885bd3 1027 g = *i;
fafff8f1 1028 r = get_group_creds(&g, l_gids+k, 0);
4d885bd3
DH
1029 if (r < 0)
1030 return r;
81a2b7ce 1031
4d885bd3
DH
1032 k++;
1033 }
81a2b7ce 1034
4d885bd3
DH
1035 /*
1036 * Sets ngids to zero to drop all supplementary groups, happens
1037 * when we are under root and SupplementaryGroups= is empty.
1038 */
1039 if (k == 0) {
1040 *ngids = 0;
1041 return 0;
1042 }
81a2b7ce 1043
4d885bd3
DH
1044 /* Otherwise get the final list of supplementary groups */
1045 groups = memdup(l_gids, sizeof(gid_t) * k);
1046 if (!groups)
1047 return -ENOMEM;
1048
1049 *supplementary_gids = groups;
1050 *ngids = k;
1051
1052 groups = NULL;
1053
1054 return 0;
1055}
1056
34cf6c43 1057static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
4d885bd3
DH
1058 int r;
1059
709dbeac
YW
1060 /* Handle SupplementaryGroups= if it is not empty */
1061 if (ngids > 0) {
4d885bd3
DH
1062 r = maybe_setgroups(ngids, supplementary_gids);
1063 if (r < 0)
97f0e76f 1064 return r;
4d885bd3 1065 }
81a2b7ce 1066
4d885bd3
DH
1067 if (gid_is_valid(gid)) {
1068 /* Then set our gids */
1069 if (setresgid(gid, gid, gid) < 0)
1070 return -errno;
81a2b7ce
LP
1071 }
1072
1073 return 0;
1074}
1075
1076static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
1077 assert(context);
1078
4d885bd3
DH
1079 if (!uid_is_valid(uid))
1080 return 0;
1081
479050b3 1082 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
1083 * capabilities while doing so. */
1084
479050b3 1085 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
1086
1087 /* First step: If we need to keep capabilities but
1088 * drop privileges we need to make sure we keep our
cbb21cca 1089 * caps, while we drop privileges. */
693ced48 1090 if (uid != 0) {
cbb21cca 1091 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1092
1093 if (prctl(PR_GET_SECUREBITS) != sb)
1094 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1095 return -errno;
1096 }
81a2b7ce
LP
1097 }
1098
479050b3 1099 /* Second step: actually set the uids */
81a2b7ce
LP
1100 if (setresuid(uid, uid, uid) < 0)
1101 return -errno;
1102
1103 /* At this point we should have all necessary capabilities but
1104 are otherwise a normal user. However, the caps might got
1105 corrupted due to the setresuid() so we need clean them up
1106 later. This is done outside of this call. */
1107
1108 return 0;
1109}
1110
349cc4a5 1111#if HAVE_PAM
5b6319dc
LP
1112
1113static int null_conv(
1114 int num_msg,
1115 const struct pam_message **msg,
1116 struct pam_response **resp,
1117 void *appdata_ptr) {
1118
1119 /* We don't support conversations */
1120
1121 return PAM_CONV_ERR;
1122}
1123
cefc33ae
LP
1124#endif
1125
5b6319dc
LP
1126static int setup_pam(
1127 const char *name,
1128 const char *user,
940c5210 1129 uid_t uid,
2d6fce8d 1130 gid_t gid,
5b6319dc 1131 const char *tty,
2065ca69 1132 char ***env,
da6053d0 1133 int fds[], size_t n_fds) {
5b6319dc 1134
349cc4a5 1135#if HAVE_PAM
cefc33ae 1136
5b6319dc
LP
1137 static const struct pam_conv conv = {
1138 .conv = null_conv,
1139 .appdata_ptr = NULL
1140 };
1141
2d7c6aa2 1142 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1143 pam_handle_t *handle = NULL;
d6e5f3ad 1144 sigset_t old_ss;
7bb70b6e 1145 int pam_code = PAM_SUCCESS, r;
84eada2f 1146 char **nv, **e = NULL;
5b6319dc
LP
1147 bool close_session = false;
1148 pid_t pam_pid = 0, parent_pid;
970edce6 1149 int flags = 0;
5b6319dc
LP
1150
1151 assert(name);
1152 assert(user);
2065ca69 1153 assert(env);
5b6319dc
LP
1154
1155 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1156 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1157 * systemd via the cgroup logic. It will then remove the PAM
1158 * session again. The parent process will exec() the actual
1159 * daemon. We do things this way to ensure that the main PID
1160 * of the daemon is the one we initially fork()ed. */
1161
7bb70b6e
LP
1162 r = barrier_create(&barrier);
1163 if (r < 0)
2d7c6aa2
DH
1164 goto fail;
1165
553d2243 1166 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1167 flags |= PAM_SILENT;
1168
f546241b
ZJS
1169 pam_code = pam_start(name, user, &conv, &handle);
1170 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1171 handle = NULL;
1172 goto fail;
1173 }
1174
3cd24c1a
LP
1175 if (!tty) {
1176 _cleanup_free_ char *q = NULL;
1177
1178 /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
1179 * out if that's the case, and read the TTY off it. */
1180
1181 if (getttyname_malloc(STDIN_FILENO, &q) >= 0)
1182 tty = strjoina("/dev/", q);
1183 }
1184
f546241b
ZJS
1185 if (tty) {
1186 pam_code = pam_set_item(handle, PAM_TTY, tty);
1187 if (pam_code != PAM_SUCCESS)
5b6319dc 1188 goto fail;
f546241b 1189 }
5b6319dc 1190
84eada2f
JW
1191 STRV_FOREACH(nv, *env) {
1192 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1193 if (pam_code != PAM_SUCCESS)
1194 goto fail;
1195 }
1196
970edce6 1197 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1198 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1199 goto fail;
1200
970edce6 1201 pam_code = pam_open_session(handle, flags);
f546241b 1202 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1203 goto fail;
1204
1205 close_session = true;
1206
f546241b
ZJS
1207 e = pam_getenvlist(handle);
1208 if (!e) {
5b6319dc
LP
1209 pam_code = PAM_BUF_ERR;
1210 goto fail;
1211 }
1212
1213 /* Block SIGTERM, so that we know that it won't get lost in
1214 * the child */
ce30c8dc 1215
72c0a2c2 1216 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1217
df0ff127 1218 parent_pid = getpid_cached();
5b6319dc 1219
4c253ed1
LP
1220 r = safe_fork("(sd-pam)", 0, &pam_pid);
1221 if (r < 0)
5b6319dc 1222 goto fail;
4c253ed1 1223 if (r == 0) {
7bb70b6e 1224 int sig, ret = EXIT_PAM;
5b6319dc
LP
1225
1226 /* The child's job is to reset the PAM session on
1227 * termination */
2d7c6aa2 1228 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc 1229
4c253ed1
LP
1230 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1231 * are open here that have been opened by PAM. */
1232 (void) close_many(fds, n_fds);
5b6319dc 1233
940c5210
AK
1234 /* Drop privileges - we don't need any to pam_close_session
1235 * and this will make PR_SET_PDEATHSIG work in most cases.
1236 * If this fails, ignore the error - but expect sd-pam threads
1237 * to fail to exit normally */
2d6fce8d 1238
97f0e76f
LP
1239 r = maybe_setgroups(0, NULL);
1240 if (r < 0)
1241 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1242 if (setresgid(gid, gid, gid) < 0)
1243 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1244 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1245 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1246
ce30c8dc
LP
1247 (void) ignore_signals(SIGPIPE, -1);
1248
940c5210
AK
1249 /* Wait until our parent died. This will only work if
1250 * the above setresuid() succeeds, otherwise the kernel
1251 * will not allow unprivileged parents kill their privileged
1252 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1253 * to do the rest for us. */
1254 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1255 goto child_finish;
1256
2d7c6aa2
DH
1257 /* Tell the parent that our setup is done. This is especially
1258 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1259 * setup might race against our setresuid(2) call.
1260 *
1261 * If the parent aborted, we'll detect this below, hence ignore
1262 * return failure here. */
1263 (void) barrier_place(&barrier);
2d7c6aa2 1264
643f4706 1265 /* Check if our parent process might already have died? */
5b6319dc 1266 if (getppid() == parent_pid) {
d6e5f3ad
DM
1267 sigset_t ss;
1268
1269 assert_se(sigemptyset(&ss) >= 0);
1270 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1271
3dead8d9
LP
1272 for (;;) {
1273 if (sigwait(&ss, &sig) < 0) {
1274 if (errno == EINTR)
1275 continue;
1276
1277 goto child_finish;
1278 }
5b6319dc 1279
3dead8d9
LP
1280 assert(sig == SIGTERM);
1281 break;
1282 }
5b6319dc
LP
1283 }
1284
3dead8d9 1285 /* If our parent died we'll end the session */
f546241b 1286 if (getppid() != parent_pid) {
970edce6 1287 pam_code = pam_close_session(handle, flags);
f546241b 1288 if (pam_code != PAM_SUCCESS)
5b6319dc 1289 goto child_finish;
f546241b 1290 }
5b6319dc 1291
7bb70b6e 1292 ret = 0;
5b6319dc
LP
1293
1294 child_finish:
970edce6 1295 pam_end(handle, pam_code | flags);
7bb70b6e 1296 _exit(ret);
5b6319dc
LP
1297 }
1298
2d7c6aa2
DH
1299 barrier_set_role(&barrier, BARRIER_PARENT);
1300
5b6319dc
LP
1301 /* If the child was forked off successfully it will do all the
1302 * cleanups, so forget about the handle here. */
1303 handle = NULL;
1304
3b8bddde 1305 /* Unblock SIGTERM again in the parent */
72c0a2c2 1306 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1307
1308 /* We close the log explicitly here, since the PAM modules
1309 * might have opened it, but we don't want this fd around. */
1310 closelog();
1311
2d7c6aa2
DH
1312 /* Synchronously wait for the child to initialize. We don't care for
1313 * errors as we cannot recover. However, warn loudly if it happens. */
1314 if (!barrier_place_and_sync(&barrier))
1315 log_error("PAM initialization failed");
1316
130d3d22 1317 return strv_free_and_replace(*env, e);
5b6319dc
LP
1318
1319fail:
970edce6
ZJS
1320 if (pam_code != PAM_SUCCESS) {
1321 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1322 r = -EPERM; /* PAM errors do not map to errno */
1323 } else
1324 log_error_errno(r, "PAM failed: %m");
9ba35398 1325
5b6319dc
LP
1326 if (handle) {
1327 if (close_session)
970edce6 1328 pam_code = pam_close_session(handle, flags);
5b6319dc 1329
970edce6 1330 pam_end(handle, pam_code | flags);
5b6319dc
LP
1331 }
1332
1333 strv_free(e);
5b6319dc
LP
1334 closelog();
1335
7bb70b6e 1336 return r;
cefc33ae
LP
1337#else
1338 return 0;
5b6319dc 1339#endif
cefc33ae 1340}
5b6319dc 1341
5d6b1584
LP
1342static void rename_process_from_path(const char *path) {
1343 char process_name[11];
1344 const char *p;
1345 size_t l;
1346
1347 /* This resulting string must fit in 10 chars (i.e. the length
1348 * of "/sbin/init") to look pretty in /bin/ps */
1349
2b6bf07d 1350 p = basename(path);
5d6b1584
LP
1351 if (isempty(p)) {
1352 rename_process("(...)");
1353 return;
1354 }
1355
1356 l = strlen(p);
1357 if (l > 8) {
1358 /* The end of the process name is usually more
1359 * interesting, since the first bit might just be
1360 * "systemd-" */
1361 p = p + l - 8;
1362 l = 8;
1363 }
1364
1365 process_name[0] = '(';
1366 memcpy(process_name+1, p, l);
1367 process_name[1+l] = ')';
1368 process_name[1+l+1] = 0;
1369
1370 rename_process(process_name);
1371}
1372
469830d1
LP
1373static bool context_has_address_families(const ExecContext *c) {
1374 assert(c);
1375
1376 return c->address_families_whitelist ||
1377 !set_isempty(c->address_families);
1378}
1379
1380static bool context_has_syscall_filters(const ExecContext *c) {
1381 assert(c);
1382
1383 return c->syscall_whitelist ||
8cfa775f 1384 !hashmap_isempty(c->syscall_filter);
469830d1
LP
1385}
1386
1387static bool context_has_no_new_privileges(const ExecContext *c) {
1388 assert(c);
1389
1390 if (c->no_new_privileges)
1391 return true;
1392
1393 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1394 return false;
1395
1396 /* We need NNP if we have any form of seccomp and are unprivileged */
1397 return context_has_address_families(c) ||
1398 c->memory_deny_write_execute ||
1399 c->restrict_realtime ||
f69567cb 1400 c->restrict_suid_sgid ||
469830d1
LP
1401 exec_context_restrict_namespaces_set(c) ||
1402 c->protect_kernel_tunables ||
1403 c->protect_kernel_modules ||
1404 c->private_devices ||
1405 context_has_syscall_filters(c) ||
78e864e5 1406 !set_isempty(c->syscall_archs) ||
aecd5ac6
TM
1407 c->lock_personality ||
1408 c->protect_hostname;
469830d1
LP
1409}
1410
349cc4a5 1411#if HAVE_SECCOMP
17df7223 1412
83f12b27 1413static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1414
1415 if (is_seccomp_available())
1416 return false;
1417
f673b62d 1418 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1419 return true;
83f12b27
FS
1420}
1421
165a31c0 1422static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1423 uint32_t negative_action, default_action, action;
165a31c0 1424 int r;
8351ceae 1425
469830d1 1426 assert(u);
c0467cf3 1427 assert(c);
8351ceae 1428
469830d1 1429 if (!context_has_syscall_filters(c))
83f12b27
FS
1430 return 0;
1431
469830d1
LP
1432 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1433 return 0;
e9642be2 1434
ccc16c78 1435 negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1436
469830d1
LP
1437 if (c->syscall_whitelist) {
1438 default_action = negative_action;
1439 action = SCMP_ACT_ALLOW;
7c66bae2 1440 } else {
469830d1
LP
1441 default_action = SCMP_ACT_ALLOW;
1442 action = negative_action;
57183d11 1443 }
8351ceae 1444
165a31c0
LP
1445 if (needs_ambient_hack) {
1446 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1447 if (r < 0)
1448 return r;
1449 }
1450
b54f36c6 1451 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
4298d0b5
LP
1452}
1453
469830d1
LP
1454static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1455 assert(u);
4298d0b5
LP
1456 assert(c);
1457
469830d1 1458 if (set_isempty(c->syscall_archs))
83f12b27
FS
1459 return 0;
1460
469830d1
LP
1461 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1462 return 0;
4298d0b5 1463
469830d1
LP
1464 return seccomp_restrict_archs(c->syscall_archs);
1465}
4298d0b5 1466
469830d1
LP
1467static int apply_address_families(const Unit* u, const ExecContext *c) {
1468 assert(u);
1469 assert(c);
4298d0b5 1470
469830d1
LP
1471 if (!context_has_address_families(c))
1472 return 0;
4298d0b5 1473
469830d1
LP
1474 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1475 return 0;
4298d0b5 1476
469830d1 1477 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1478}
4298d0b5 1479
83f12b27 1480static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1481 assert(u);
f3e43635
TM
1482 assert(c);
1483
469830d1 1484 if (!c->memory_deny_write_execute)
83f12b27
FS
1485 return 0;
1486
469830d1
LP
1487 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1488 return 0;
f3e43635 1489
469830d1 1490 return seccomp_memory_deny_write_execute();
f3e43635
TM
1491}
1492
83f12b27 1493static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1494 assert(u);
f4170c67
LP
1495 assert(c);
1496
469830d1 1497 if (!c->restrict_realtime)
83f12b27
FS
1498 return 0;
1499
469830d1
LP
1500 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1501 return 0;
f4170c67 1502
469830d1 1503 return seccomp_restrict_realtime();
f4170c67
LP
1504}
1505
f69567cb
LP
1506static int apply_restrict_suid_sgid(const Unit* u, const ExecContext *c) {
1507 assert(u);
1508 assert(c);
1509
1510 if (!c->restrict_suid_sgid)
1511 return 0;
1512
1513 if (skip_seccomp_unavailable(u, "RestrictSUIDSGID="))
1514 return 0;
1515
1516 return seccomp_restrict_suid_sgid();
1517}
1518
59e856c7 1519static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1520 assert(u);
59eeb84b
LP
1521 assert(c);
1522
1523 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1524 * let's protect even those systems where this is left on in the kernel. */
1525
469830d1 1526 if (!c->protect_kernel_tunables)
59eeb84b
LP
1527 return 0;
1528
469830d1
LP
1529 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1530 return 0;
59eeb84b 1531
469830d1 1532 return seccomp_protect_sysctl();
59eeb84b
LP
1533}
1534
59e856c7 1535static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1536 assert(u);
502d704e
DH
1537 assert(c);
1538
25a8d8a0 1539 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1540
469830d1
LP
1541 if (!c->protect_kernel_modules)
1542 return 0;
1543
502d704e
DH
1544 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1545 return 0;
1546
b54f36c6 1547 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false);
502d704e
DH
1548}
1549
59e856c7 1550static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1551 assert(u);
ba128bb8
LP
1552 assert(c);
1553
8f81a5f6 1554 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1555
469830d1
LP
1556 if (!c->private_devices)
1557 return 0;
1558
ba128bb8
LP
1559 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1560 return 0;
1561
b54f36c6 1562 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false);
ba128bb8
LP
1563}
1564
34cf6c43 1565static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
469830d1 1566 assert(u);
add00535
LP
1567 assert(c);
1568
1569 if (!exec_context_restrict_namespaces_set(c))
1570 return 0;
1571
1572 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1573 return 0;
1574
1575 return seccomp_restrict_namespaces(c->restrict_namespaces);
1576}
1577
78e864e5 1578static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1579 unsigned long personality;
1580 int r;
78e864e5
TM
1581
1582 assert(u);
1583 assert(c);
1584
1585 if (!c->lock_personality)
1586 return 0;
1587
1588 if (skip_seccomp_unavailable(u, "LockPersonality="))
1589 return 0;
1590
e8132d63
LP
1591 personality = c->personality;
1592
1593 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1594 if (personality == PERSONALITY_INVALID) {
1595
1596 r = opinionated_personality(&personality);
1597 if (r < 0)
1598 return r;
1599 }
78e864e5
TM
1600
1601 return seccomp_lock_personality(personality);
1602}
1603
c0467cf3 1604#endif
8351ceae 1605
3042bbeb 1606static void do_idle_pipe_dance(int idle_pipe[static 4]) {
31a7eb86
ZJS
1607 assert(idle_pipe);
1608
54eb2300
LP
1609 idle_pipe[1] = safe_close(idle_pipe[1]);
1610 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1611
1612 if (idle_pipe[0] >= 0) {
1613 int r;
1614
1615 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1616
1617 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1618 ssize_t n;
1619
31a7eb86 1620 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1621 n = write(idle_pipe[3], "x", 1);
1622 if (n > 0)
cd972d69
ZJS
1623 /* Wait for systemd to react to the signal above. */
1624 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1625 }
1626
54eb2300 1627 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1628
1629 }
1630
54eb2300 1631 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1632}
1633
fb2042dd
YW
1634static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
1635
7cae38c4 1636static int build_environment(
34cf6c43 1637 const Unit *u,
9fa95f85 1638 const ExecContext *c,
1e22b5cd 1639 const ExecParameters *p,
da6053d0 1640 size_t n_fds,
7cae38c4
LP
1641 const char *home,
1642 const char *username,
1643 const char *shell,
7bce046b
LP
1644 dev_t journal_stream_dev,
1645 ino_t journal_stream_ino,
7cae38c4
LP
1646 char ***ret) {
1647
1648 _cleanup_strv_free_ char **our_env = NULL;
fb2042dd 1649 ExecDirectoryType t;
da6053d0 1650 size_t n_env = 0;
7cae38c4
LP
1651 char *x;
1652
4b58153d 1653 assert(u);
7cae38c4 1654 assert(c);
7c1cb6f1 1655 assert(p);
7cae38c4
LP
1656 assert(ret);
1657
fb2042dd 1658 our_env = new0(char*, 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4
LP
1659 if (!our_env)
1660 return -ENOMEM;
1661
1662 if (n_fds > 0) {
8dd4c05b
LP
1663 _cleanup_free_ char *joined = NULL;
1664
df0ff127 1665 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1666 return -ENOMEM;
1667 our_env[n_env++] = x;
1668
da6053d0 1669 if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
7cae38c4
LP
1670 return -ENOMEM;
1671 our_env[n_env++] = x;
8dd4c05b 1672
1e22b5cd 1673 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1674 if (!joined)
1675 return -ENOMEM;
1676
605405c6 1677 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1678 if (!x)
1679 return -ENOMEM;
1680 our_env[n_env++] = x;
7cae38c4
LP
1681 }
1682
b08af3b1 1683 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1684 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1685 return -ENOMEM;
1686 our_env[n_env++] = x;
1687
1e22b5cd 1688 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1689 return -ENOMEM;
1690 our_env[n_env++] = x;
1691 }
1692
fd63e712
LP
1693 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1694 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1695 * check the database directly. */
ac647978 1696 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1697 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1698 if (!x)
1699 return -ENOMEM;
1700 our_env[n_env++] = x;
1701 }
1702
7cae38c4 1703 if (home) {
b910cc72 1704 x = strjoin("HOME=", home);
7cae38c4
LP
1705 if (!x)
1706 return -ENOMEM;
7bbead1d
LP
1707
1708 path_simplify(x + 5, true);
7cae38c4
LP
1709 our_env[n_env++] = x;
1710 }
1711
1712 if (username) {
b910cc72 1713 x = strjoin("LOGNAME=", username);
7cae38c4
LP
1714 if (!x)
1715 return -ENOMEM;
1716 our_env[n_env++] = x;
1717
b910cc72 1718 x = strjoin("USER=", username);
7cae38c4
LP
1719 if (!x)
1720 return -ENOMEM;
1721 our_env[n_env++] = x;
1722 }
1723
1724 if (shell) {
b910cc72 1725 x = strjoin("SHELL=", shell);
7cae38c4
LP
1726 if (!x)
1727 return -ENOMEM;
7bbead1d
LP
1728
1729 path_simplify(x + 6, true);
7cae38c4
LP
1730 our_env[n_env++] = x;
1731 }
1732
4b58153d
LP
1733 if (!sd_id128_is_null(u->invocation_id)) {
1734 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1735 return -ENOMEM;
1736
1737 our_env[n_env++] = x;
1738 }
1739
6af760f3
LP
1740 if (exec_context_needs_term(c)) {
1741 const char *tty_path, *term = NULL;
1742
1743 tty_path = exec_context_tty_path(c);
1744
1745 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1746 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1747 * passes to PID 1 ends up all the way in the console login shown. */
1748
1749 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1750 term = getenv("TERM");
1751 if (!term)
1752 term = default_term_for_tty(tty_path);
7cae38c4 1753
b910cc72 1754 x = strjoin("TERM=", term);
7cae38c4
LP
1755 if (!x)
1756 return -ENOMEM;
1757 our_env[n_env++] = x;
1758 }
1759
7bce046b
LP
1760 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1761 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1762 return -ENOMEM;
1763
1764 our_env[n_env++] = x;
1765 }
1766
fb2042dd
YW
1767 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1768 _cleanup_free_ char *pre = NULL, *joined = NULL;
1769 const char *n;
1770
1771 if (!p->prefix[t])
1772 continue;
1773
1774 if (strv_isempty(c->directories[t].paths))
1775 continue;
1776
1777 n = exec_directory_env_name_to_string(t);
1778 if (!n)
1779 continue;
1780
1781 pre = strjoin(p->prefix[t], "/");
1782 if (!pre)
1783 return -ENOMEM;
1784
1785 joined = strv_join_prefix(c->directories[t].paths, ":", pre);
1786 if (!joined)
1787 return -ENOMEM;
1788
1789 x = strjoin(n, "=", joined);
1790 if (!x)
1791 return -ENOMEM;
1792
1793 our_env[n_env++] = x;
1794 }
1795
7cae38c4 1796 our_env[n_env++] = NULL;
fb2042dd 1797 assert(n_env <= 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4 1798
ae2a15bc 1799 *ret = TAKE_PTR(our_env);
7cae38c4
LP
1800
1801 return 0;
1802}
1803
b4c14404
FB
1804static int build_pass_environment(const ExecContext *c, char ***ret) {
1805 _cleanup_strv_free_ char **pass_env = NULL;
1806 size_t n_env = 0, n_bufsize = 0;
1807 char **i;
1808
1809 STRV_FOREACH(i, c->pass_environment) {
1810 _cleanup_free_ char *x = NULL;
1811 char *v;
1812
1813 v = getenv(*i);
1814 if (!v)
1815 continue;
605405c6 1816 x = strjoin(*i, "=", v);
b4c14404
FB
1817 if (!x)
1818 return -ENOMEM;
00819cc1 1819
b4c14404
FB
1820 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1821 return -ENOMEM;
00819cc1 1822
1cc6c93a 1823 pass_env[n_env++] = TAKE_PTR(x);
b4c14404 1824 pass_env[n_env] = NULL;
b4c14404
FB
1825 }
1826
ae2a15bc 1827 *ret = TAKE_PTR(pass_env);
b4c14404
FB
1828
1829 return 0;
1830}
1831
8b44a3d2
LP
1832static bool exec_needs_mount_namespace(
1833 const ExecContext *context,
1834 const ExecParameters *params,
4657abb5 1835 const ExecRuntime *runtime) {
8b44a3d2
LP
1836
1837 assert(context);
1838 assert(params);
1839
915e6d16
LP
1840 if (context->root_image)
1841 return true;
1842
2a624c36
AP
1843 if (!strv_isempty(context->read_write_paths) ||
1844 !strv_isempty(context->read_only_paths) ||
1845 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1846 return true;
1847
42b1d8e0 1848 if (context->n_bind_mounts > 0)
d2d6c096
LP
1849 return true;
1850
2abd4e38
YW
1851 if (context->n_temporary_filesystems > 0)
1852 return true;
1853
37ed15d7 1854 if (!IN_SET(context->mount_flags, 0, MS_SHARED))
8b44a3d2
LP
1855 return true;
1856
1857 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1858 return true;
1859
8b44a3d2 1860 if (context->private_devices ||
228af36f 1861 context->private_mounts ||
8b44a3d2 1862 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1863 context->protect_home != PROTECT_HOME_NO ||
1864 context->protect_kernel_tunables ||
c575770b 1865 context->protect_kernel_modules ||
59eeb84b 1866 context->protect_control_groups)
8b44a3d2
LP
1867 return true;
1868
37c56f89
YW
1869 if (context->root_directory) {
1870 ExecDirectoryType t;
1871
1872 if (context->mount_apivfs)
1873 return true;
1874
1875 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1876 if (!params->prefix[t])
1877 continue;
1878
1879 if (!strv_isempty(context->directories[t].paths))
1880 return true;
1881 }
1882 }
5d997827 1883
42b1d8e0 1884 if (context->dynamic_user &&
b43ee82f 1885 (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
42b1d8e0
YW
1886 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1887 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1888 return true;
1889
8b44a3d2
LP
1890 return false;
1891}
1892
d251207d
LP
1893static int setup_private_users(uid_t uid, gid_t gid) {
1894 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1895 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1896 _cleanup_close_ int unshare_ready_fd = -1;
1897 _cleanup_(sigkill_waitp) pid_t pid = 0;
1898 uint64_t c = 1;
d251207d
LP
1899 ssize_t n;
1900 int r;
1901
1902 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1903 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1904 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1905 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1906 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1907 * continues execution normally. */
1908
587ab01b
ZJS
1909 if (uid != 0 && uid_is_valid(uid)) {
1910 r = asprintf(&uid_map,
1911 "0 0 1\n" /* Map root → root */
1912 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1913 uid, uid);
1914 if (r < 0)
1915 return -ENOMEM;
1916 } else {
e0f3720e 1917 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1918 if (!uid_map)
1919 return -ENOMEM;
1920 }
d251207d 1921
587ab01b
ZJS
1922 if (gid != 0 && gid_is_valid(gid)) {
1923 r = asprintf(&gid_map,
1924 "0 0 1\n" /* Map root → root */
1925 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1926 gid, gid);
1927 if (r < 0)
1928 return -ENOMEM;
1929 } else {
d251207d 1930 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1931 if (!gid_map)
1932 return -ENOMEM;
1933 }
d251207d
LP
1934
1935 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1936 * namespace. */
1937 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1938 if (unshare_ready_fd < 0)
1939 return -errno;
1940
1941 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1942 * failed. */
1943 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1944 return -errno;
1945
4c253ed1
LP
1946 r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
1947 if (r < 0)
1948 return r;
1949 if (r == 0) {
d251207d
LP
1950 _cleanup_close_ int fd = -1;
1951 const char *a;
1952 pid_t ppid;
1953
1954 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1955 * here, after the parent opened its own user namespace. */
1956
1957 ppid = getppid();
1958 errno_pipe[0] = safe_close(errno_pipe[0]);
1959
1960 /* Wait until the parent unshared the user namespace */
1961 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1962 r = -errno;
1963 goto child_fail;
1964 }
1965
1966 /* Disable the setgroups() system call in the child user namespace, for good. */
1967 a = procfs_file_alloca(ppid, "setgroups");
1968 fd = open(a, O_WRONLY|O_CLOEXEC);
1969 if (fd < 0) {
1970 if (errno != ENOENT) {
1971 r = -errno;
1972 goto child_fail;
1973 }
1974
1975 /* If the file is missing the kernel is too old, let's continue anyway. */
1976 } else {
1977 if (write(fd, "deny\n", 5) < 0) {
1978 r = -errno;
1979 goto child_fail;
1980 }
1981
1982 fd = safe_close(fd);
1983 }
1984
1985 /* First write the GID map */
1986 a = procfs_file_alloca(ppid, "gid_map");
1987 fd = open(a, O_WRONLY|O_CLOEXEC);
1988 if (fd < 0) {
1989 r = -errno;
1990 goto child_fail;
1991 }
1992 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1993 r = -errno;
1994 goto child_fail;
1995 }
1996 fd = safe_close(fd);
1997
1998 /* The write the UID map */
1999 a = procfs_file_alloca(ppid, "uid_map");
2000 fd = open(a, O_WRONLY|O_CLOEXEC);
2001 if (fd < 0) {
2002 r = -errno;
2003 goto child_fail;
2004 }
2005 if (write(fd, uid_map, strlen(uid_map)) < 0) {
2006 r = -errno;
2007 goto child_fail;
2008 }
2009
2010 _exit(EXIT_SUCCESS);
2011
2012 child_fail:
2013 (void) write(errno_pipe[1], &r, sizeof(r));
2014 _exit(EXIT_FAILURE);
2015 }
2016
2017 errno_pipe[1] = safe_close(errno_pipe[1]);
2018
2019 if (unshare(CLONE_NEWUSER) < 0)
2020 return -errno;
2021
2022 /* Let the child know that the namespace is ready now */
2023 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
2024 return -errno;
2025
2026 /* Try to read an error code from the child */
2027 n = read(errno_pipe[0], &r, sizeof(r));
2028 if (n < 0)
2029 return -errno;
2030 if (n == sizeof(r)) { /* an error code was sent to us */
2031 if (r < 0)
2032 return r;
2033 return -EIO;
2034 }
2035 if (n != 0) /* on success we should have read 0 bytes */
2036 return -EIO;
2037
2e87a1fd
LP
2038 r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
2039 pid = 0;
d251207d
LP
2040 if (r < 0)
2041 return r;
2e87a1fd 2042 if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
d251207d
LP
2043 return -EIO;
2044
2045 return 0;
2046}
2047
494d0247
YW
2048static bool exec_directory_is_private(const ExecContext *context, ExecDirectoryType type) {
2049 if (!context->dynamic_user)
2050 return false;
2051
2052 if (type == EXEC_DIRECTORY_CONFIGURATION)
2053 return false;
2054
2055 if (type == EXEC_DIRECTORY_RUNTIME && context->runtime_directory_preserve_mode == EXEC_PRESERVE_NO)
2056 return false;
2057
2058 return true;
2059}
2060
3536f49e 2061static int setup_exec_directory(
07689d5d
LP
2062 const ExecContext *context,
2063 const ExecParameters *params,
2064 uid_t uid,
3536f49e 2065 gid_t gid,
3536f49e
YW
2066 ExecDirectoryType type,
2067 int *exit_status) {
07689d5d 2068
72fd1768 2069 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
2070 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
2071 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
2072 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
2073 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
2074 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
2075 };
07689d5d
LP
2076 char **rt;
2077 int r;
2078
2079 assert(context);
2080 assert(params);
72fd1768 2081 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 2082 assert(exit_status);
07689d5d 2083
3536f49e
YW
2084 if (!params->prefix[type])
2085 return 0;
2086
8679efde 2087 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
2088 if (!uid_is_valid(uid))
2089 uid = 0;
2090 if (!gid_is_valid(gid))
2091 gid = 0;
2092 }
2093
2094 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d 2095 _cleanup_free_ char *p = NULL, *pp = NULL;
07689d5d 2096
edbfeb12 2097 p = path_join(params->prefix[type], *rt);
3536f49e
YW
2098 if (!p) {
2099 r = -ENOMEM;
2100 goto fail;
2101 }
07689d5d 2102
23a7448e
YW
2103 r = mkdir_parents_label(p, 0755);
2104 if (r < 0)
3536f49e 2105 goto fail;
23a7448e 2106
494d0247 2107 if (exec_directory_is_private(context, type)) {
6c9c51e5 2108 _cleanup_free_ char *private_root = NULL;
6c47cd7d 2109
3f5b1508
LP
2110 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that
2111 * case we want to avoid leaving a directory around fully accessible that is owned by
2112 * a dynamic user whose UID is later on reused. To lock this down we use the same
2113 * trick used by container managers to prohibit host users to get access to files of
2114 * the same UID in containers: we place everything inside a directory that has an
2115 * access mode of 0700 and is owned root:root, so that it acts as security boundary
2116 * for unprivileged host code. We then use fs namespacing to make this directory
2117 * permeable for the service itself.
6c47cd7d 2118 *
3f5b1508
LP
2119 * Specifically: for a service which wants a special directory "foo/" we first create
2120 * a directory "private/" with access mode 0700 owned by root:root. Then we place
2121 * "foo" inside of that directory (i.e. "private/foo/"), and make "foo" a symlink to
2122 * "private/foo". This way, privileged host users can access "foo/" as usual, but
2123 * unprivileged host users can't look into it. Inside of the namespace of the unit
2124 * "private/" is replaced by a more liberally accessible tmpfs, into which the host's
2125 * "private/foo/" is mounted under the same name, thus disabling the access boundary
2126 * for the service and making sure it only gets access to the dirs it needs but no
2127 * others. Tricky? Yes, absolutely, but it works!
6c47cd7d 2128 *
3f5b1508
LP
2129 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not
2130 * to be owned by the service itself.
2131 *
2132 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used
2133 * for sharing files or sockets with other services. */
6c47cd7d 2134
edbfeb12 2135 private_root = path_join(params->prefix[type], "private");
6c47cd7d
LP
2136 if (!private_root) {
2137 r = -ENOMEM;
2138 goto fail;
2139 }
2140
2141 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
37c1d5e9 2142 r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
6c47cd7d
LP
2143 if (r < 0)
2144 goto fail;
2145
edbfeb12 2146 pp = path_join(private_root, *rt);
6c47cd7d
LP
2147 if (!pp) {
2148 r = -ENOMEM;
2149 goto fail;
2150 }
2151
2152 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2153 r = mkdir_parents_label(pp, 0755);
2154 if (r < 0)
2155 goto fail;
2156
949befd3
LP
2157 if (is_dir(p, false) > 0 &&
2158 (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
2159
2160 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2161 * it over. Most likely the service has been upgraded from one that didn't use
2162 * DynamicUser=1, to one that does. */
2163
cf52c45d
LP
2164 log_info("Found pre-existing public %s= directory %s, migrating to %s.\n"
2165 "Apparently, service previously had DynamicUser= turned off, and has now turned it on.",
2166 exec_directory_type_to_string(type), p, pp);
2167
949befd3
LP
2168 if (rename(p, pp) < 0) {
2169 r = -errno;
2170 goto fail;
2171 }
2172 } else {
2173 /* Otherwise, create the actual directory for the service */
2174
2175 r = mkdir_label(pp, context->directories[type].mode);
2176 if (r < 0 && r != -EEXIST)
2177 goto fail;
2178 }
6c47cd7d 2179
6c47cd7d 2180 /* And link it up from the original place */
6c9c51e5 2181 r = symlink_idempotent(pp, p, true);
6c47cd7d
LP
2182 if (r < 0)
2183 goto fail;
2184
6c47cd7d 2185 } else {
5c6d40d1
LP
2186 _cleanup_free_ char *target = NULL;
2187
2188 if (type != EXEC_DIRECTORY_CONFIGURATION &&
2189 readlink_and_make_absolute(p, &target) >= 0) {
2190 _cleanup_free_ char *q = NULL;
2191
2192 /* This already exists and is a symlink? Interesting. Maybe it's one created
2193f17c
LP
2193 * by DynamicUser=1 (see above)?
2194 *
2195 * We do this for all directory types except for ConfigurationDirectory=,
2196 * since they all support the private/ symlink logic at least in some
2197 * configurations, see above. */
5c6d40d1
LP
2198
2199 q = path_join(params->prefix[type], "private", *rt);
2200 if (!q) {
2201 r = -ENOMEM;
2202 goto fail;
2203 }
2204
2205 if (path_equal(q, target)) {
2206
2207 /* Hmm, apparently DynamicUser= was once turned on for this service,
2208 * but is no longer. Let's move the directory back up. */
2209
cf52c45d
LP
2210 log_info("Found pre-existing private %s= directory %s, migrating to %s.\n"
2211 "Apparently, service previously had DynamicUser= turned on, and has now turned it off.",
2212 exec_directory_type_to_string(type), q, p);
2213
5c6d40d1
LP
2214 if (unlink(p) < 0) {
2215 r = -errno;
2216 goto fail;
2217 }
2218
2219 if (rename(q, p) < 0) {
2220 r = -errno;
2221 goto fail;
2222 }
2223 }
2224 }
2225
6c47cd7d 2226 r = mkdir_label(p, context->directories[type].mode);
d484580c 2227 if (r < 0) {
d484580c
LP
2228 if (r != -EEXIST)
2229 goto fail;
2230
206e9864
LP
2231 if (type == EXEC_DIRECTORY_CONFIGURATION) {
2232 struct stat st;
2233
2234 /* Don't change the owner/access mode of the configuration directory,
2235 * as in the common case it is not written to by a service, and shall
2236 * not be writable. */
2237
2238 if (stat(p, &st) < 0) {
2239 r = -errno;
2240 goto fail;
2241 }
2242
2243 /* Still complain if the access mode doesn't match */
2244 if (((st.st_mode ^ context->directories[type].mode) & 07777) != 0)
2245 log_warning("%s \'%s\' already exists but the mode is different. "
2246 "(File system: %o %sMode: %o)",
2247 exec_directory_type_to_string(type), *rt,
2248 st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
2249
6cff72eb 2250 continue;
206e9864 2251 }
6cff72eb 2252 }
a1164ae3 2253 }
07689d5d 2254
206e9864 2255 /* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
5238e957 2256 * specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
206e9864
LP
2257 * current UID/GID ownership.) */
2258 r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
2259 if (r < 0)
2260 goto fail;
c71b2eb7 2261
607b358e
LP
2262 /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
2263 * drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
2264 * assignments to exist.*/
2265 r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777);
07689d5d 2266 if (r < 0)
3536f49e 2267 goto fail;
07689d5d
LP
2268 }
2269
2270 return 0;
3536f49e
YW
2271
2272fail:
2273 *exit_status = exit_status_table[type];
3536f49e 2274 return r;
07689d5d
LP
2275}
2276
92b423b9 2277#if ENABLE_SMACK
cefc33ae
LP
2278static int setup_smack(
2279 const ExecContext *context,
2280 const ExecCommand *command) {
2281
cefc33ae
LP
2282 int r;
2283
2284 assert(context);
2285 assert(command);
2286
cefc33ae
LP
2287 if (context->smack_process_label) {
2288 r = mac_smack_apply_pid(0, context->smack_process_label);
2289 if (r < 0)
2290 return r;
2291 }
2292#ifdef SMACK_DEFAULT_PROCESS_LABEL
2293 else {
2294 _cleanup_free_ char *exec_label = NULL;
2295
2296 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2297 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2298 return r;
2299
2300 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2301 if (r < 0)
2302 return r;
2303 }
cefc33ae
LP
2304#endif
2305
2306 return 0;
2307}
92b423b9 2308#endif
cefc33ae 2309
6c47cd7d
LP
2310static int compile_bind_mounts(
2311 const ExecContext *context,
2312 const ExecParameters *params,
2313 BindMount **ret_bind_mounts,
da6053d0 2314 size_t *ret_n_bind_mounts,
6c47cd7d
LP
2315 char ***ret_empty_directories) {
2316
2317 _cleanup_strv_free_ char **empty_directories = NULL;
2318 BindMount *bind_mounts;
da6053d0 2319 size_t n, h = 0, i;
6c47cd7d
LP
2320 ExecDirectoryType t;
2321 int r;
2322
2323 assert(context);
2324 assert(params);
2325 assert(ret_bind_mounts);
2326 assert(ret_n_bind_mounts);
2327 assert(ret_empty_directories);
2328
2329 n = context->n_bind_mounts;
2330 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2331 if (!params->prefix[t])
2332 continue;
2333
2334 n += strv_length(context->directories[t].paths);
2335 }
2336
2337 if (n <= 0) {
2338 *ret_bind_mounts = NULL;
2339 *ret_n_bind_mounts = 0;
2340 *ret_empty_directories = NULL;
2341 return 0;
2342 }
2343
2344 bind_mounts = new(BindMount, n);
2345 if (!bind_mounts)
2346 return -ENOMEM;
2347
a8cabc61 2348 for (i = 0; i < context->n_bind_mounts; i++) {
6c47cd7d
LP
2349 BindMount *item = context->bind_mounts + i;
2350 char *s, *d;
2351
2352 s = strdup(item->source);
2353 if (!s) {
2354 r = -ENOMEM;
2355 goto finish;
2356 }
2357
2358 d = strdup(item->destination);
2359 if (!d) {
2360 free(s);
2361 r = -ENOMEM;
2362 goto finish;
2363 }
2364
2365 bind_mounts[h++] = (BindMount) {
2366 .source = s,
2367 .destination = d,
2368 .read_only = item->read_only,
2369 .recursive = item->recursive,
2370 .ignore_enoent = item->ignore_enoent,
2371 };
2372 }
2373
2374 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2375 char **suffix;
2376
2377 if (!params->prefix[t])
2378 continue;
2379
2380 if (strv_isempty(context->directories[t].paths))
2381 continue;
2382
494d0247 2383 if (exec_directory_is_private(context, t) &&
5609f688 2384 !(context->root_directory || context->root_image)) {
6c47cd7d
LP
2385 char *private_root;
2386
2387 /* So this is for a dynamic user, and we need to make sure the process can access its own
2388 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2389 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2390
657ee2d8 2391 private_root = path_join(params->prefix[t], "private");
6c47cd7d
LP
2392 if (!private_root) {
2393 r = -ENOMEM;
2394 goto finish;
2395 }
2396
2397 r = strv_consume(&empty_directories, private_root);
a635a7ae 2398 if (r < 0)
6c47cd7d 2399 goto finish;
6c47cd7d
LP
2400 }
2401
2402 STRV_FOREACH(suffix, context->directories[t].paths) {
2403 char *s, *d;
2404
494d0247 2405 if (exec_directory_is_private(context, t))
657ee2d8 2406 s = path_join(params->prefix[t], "private", *suffix);
6c47cd7d 2407 else
657ee2d8 2408 s = path_join(params->prefix[t], *suffix);
6c47cd7d
LP
2409 if (!s) {
2410 r = -ENOMEM;
2411 goto finish;
2412 }
2413
494d0247 2414 if (exec_directory_is_private(context, t) &&
5609f688
YW
2415 (context->root_directory || context->root_image))
2416 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2417 * directory is not created on the root directory. So, let's bind-mount the directory
2418 * on the 'non-private' place. */
657ee2d8 2419 d = path_join(params->prefix[t], *suffix);
5609f688
YW
2420 else
2421 d = strdup(s);
6c47cd7d
LP
2422 if (!d) {
2423 free(s);
2424 r = -ENOMEM;
2425 goto finish;
2426 }
2427
2428 bind_mounts[h++] = (BindMount) {
2429 .source = s,
2430 .destination = d,
2431 .read_only = false,
9ce4e4b0 2432 .nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
6c47cd7d
LP
2433 .recursive = true,
2434 .ignore_enoent = false,
2435 };
2436 }
2437 }
2438
2439 assert(h == n);
2440
2441 *ret_bind_mounts = bind_mounts;
2442 *ret_n_bind_mounts = n;
ae2a15bc 2443 *ret_empty_directories = TAKE_PTR(empty_directories);
6c47cd7d
LP
2444
2445 return (int) n;
2446
2447finish:
2448 bind_mount_free_many(bind_mounts, h);
2449 return r;
2450}
2451
6818c54c 2452static int apply_mount_namespace(
34cf6c43
YW
2453 const Unit *u,
2454 const ExecCommand *command,
6818c54c
LP
2455 const ExecContext *context,
2456 const ExecParameters *params,
7cc5ef5f
ZJS
2457 const ExecRuntime *runtime,
2458 char **error_path) {
6818c54c 2459
7bcef4ef 2460 _cleanup_strv_free_ char **empty_directories = NULL;
93c6bb51 2461 char *tmp = NULL, *var = NULL;
915e6d16 2462 const char *root_dir = NULL, *root_image = NULL;
228af36f 2463 NamespaceInfo ns_info;
165a31c0 2464 bool needs_sandboxing;
6c47cd7d 2465 BindMount *bind_mounts = NULL;
da6053d0 2466 size_t n_bind_mounts = 0;
6818c54c 2467 int r;
93c6bb51 2468
2b3c1b9e
DH
2469 assert(context);
2470
93c6bb51
DH
2471 /* The runtime struct only contains the parent of the private /tmp,
2472 * which is non-accessible to world users. Inside of it there's a /tmp
2473 * that is sticky, and that's the one we want to use here. */
2474
2475 if (context->private_tmp && runtime) {
2476 if (runtime->tmp_dir)
2477 tmp = strjoina(runtime->tmp_dir, "/tmp");
2478 if (runtime->var_tmp_dir)
2479 var = strjoina(runtime->var_tmp_dir, "/tmp");
2480 }
2481
915e6d16
LP
2482 if (params->flags & EXEC_APPLY_CHROOT) {
2483 root_image = context->root_image;
2484
2485 if (!root_image)
2486 root_dir = context->root_directory;
2487 }
93c6bb51 2488
6c47cd7d
LP
2489 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2490 if (r < 0)
2491 return r;
2492
165a31c0 2493 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
b5a33299
YW
2494 if (needs_sandboxing)
2495 ns_info = (NamespaceInfo) {
2496 .ignore_protect_paths = false,
2497 .private_dev = context->private_devices,
2498 .protect_control_groups = context->protect_control_groups,
2499 .protect_kernel_tunables = context->protect_kernel_tunables,
2500 .protect_kernel_modules = context->protect_kernel_modules,
aecd5ac6 2501 .protect_hostname = context->protect_hostname,
b5a33299 2502 .mount_apivfs = context->mount_apivfs,
228af36f 2503 .private_mounts = context->private_mounts,
b5a33299 2504 };
228af36f
LP
2505 else if (!context->dynamic_user && root_dir)
2506 /*
2507 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2508 * sandbox info, otherwise enforce it, don't ignore protected paths and
2509 * fail if we are enable to apply the sandbox inside the mount namespace.
2510 */
2511 ns_info = (NamespaceInfo) {
2512 .ignore_protect_paths = true,
2513 };
2514 else
2515 ns_info = (NamespaceInfo) {};
b5a33299 2516
37ed15d7
FB
2517 if (context->mount_flags == MS_SHARED)
2518 log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
2519
915e6d16 2520 r = setup_namespace(root_dir, root_image,
7bcef4ef 2521 &ns_info, context->read_write_paths,
165a31c0
LP
2522 needs_sandboxing ? context->read_only_paths : NULL,
2523 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2524 empty_directories,
2525 bind_mounts,
2526 n_bind_mounts,
2abd4e38
YW
2527 context->temporary_filesystems,
2528 context->n_temporary_filesystems,
93c6bb51
DH
2529 tmp,
2530 var,
165a31c0
LP
2531 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2532 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16 2533 context->mount_flags,
7cc5ef5f
ZJS
2534 DISSECT_IMAGE_DISCARD_ON_LOOP,
2535 error_path);
93c6bb51 2536
6c47cd7d
LP
2537 bind_mount_free_many(bind_mounts, n_bind_mounts);
2538
1beab8b0 2539 /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
5238e957 2540 * that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively
1beab8b0
LP
2541 * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
2542 * completely different execution environment. */
aca835ed
YW
2543 if (r == -ENOANO) {
2544 if (n_bind_mounts == 0 &&
2545 context->n_temporary_filesystems == 0 &&
2546 !root_dir && !root_image &&
2547 !context->dynamic_user) {
2548 log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
2549 return 0;
2550 }
2551
2194547e
LP
2552 log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
2553 "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
2554 n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
2555
aca835ed 2556 return -EOPNOTSUPP;
93c6bb51
DH
2557 }
2558
2559 return r;
2560}
2561
915e6d16
LP
2562static int apply_working_directory(
2563 const ExecContext *context,
2564 const ExecParameters *params,
2565 const char *home,
376fecf6 2566 int *exit_status) {
915e6d16 2567
6732edab 2568 const char *d, *wd;
2b3c1b9e
DH
2569
2570 assert(context);
376fecf6 2571 assert(exit_status);
2b3c1b9e 2572
6732edab
LP
2573 if (context->working_directory_home) {
2574
376fecf6
LP
2575 if (!home) {
2576 *exit_status = EXIT_CHDIR;
6732edab 2577 return -ENXIO;
376fecf6 2578 }
6732edab 2579
2b3c1b9e 2580 wd = home;
6732edab
LP
2581
2582 } else if (context->working_directory)
2b3c1b9e
DH
2583 wd = context->working_directory;
2584 else
2585 wd = "/";
e7f1e7c6 2586
fa97f630 2587 if (params->flags & EXEC_APPLY_CHROOT)
2b3c1b9e 2588 d = wd;
fa97f630 2589 else
3b0e5bb5 2590 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2591
376fecf6
LP
2592 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2593 *exit_status = EXIT_CHDIR;
2b3c1b9e 2594 return -errno;
376fecf6 2595 }
e7f1e7c6
DH
2596
2597 return 0;
2598}
2599
fa97f630
JB
2600static int apply_root_directory(
2601 const ExecContext *context,
2602 const ExecParameters *params,
2603 const bool needs_mount_ns,
2604 int *exit_status) {
2605
2606 assert(context);
2607 assert(exit_status);
2608
2609 if (params->flags & EXEC_APPLY_CHROOT) {
2610 if (!needs_mount_ns && context->root_directory)
2611 if (chroot(context->root_directory) < 0) {
2612 *exit_status = EXIT_CHROOT;
2613 return -errno;
2614 }
2615 }
2616
2617 return 0;
2618}
2619
b1edf445 2620static int setup_keyring(
34cf6c43 2621 const Unit *u,
b1edf445
LP
2622 const ExecContext *context,
2623 const ExecParameters *p,
2624 uid_t uid, gid_t gid) {
2625
74dd6b51 2626 key_serial_t keyring;
e64c2d0b
DJL
2627 int r = 0;
2628 uid_t saved_uid;
2629 gid_t saved_gid;
74dd6b51
LP
2630
2631 assert(u);
b1edf445 2632 assert(context);
74dd6b51
LP
2633 assert(p);
2634
2635 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2636 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2637 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2638 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2639 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2640 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2641
b1edf445
LP
2642 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2643 return 0;
2644
e64c2d0b
DJL
2645 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2646 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2647 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2648 * & group is just as nasty as acquiring a reference to the user keyring. */
2649
2650 saved_uid = getuid();
2651 saved_gid = getgid();
2652
2653 if (gid_is_valid(gid) && gid != saved_gid) {
2654 if (setregid(gid, -1) < 0)
2655 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2656 }
2657
2658 if (uid_is_valid(uid) && uid != saved_uid) {
2659 if (setreuid(uid, -1) < 0) {
2660 r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2661 goto out;
2662 }
2663 }
2664
74dd6b51
LP
2665 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2666 if (keyring == -1) {
2667 if (errno == ENOSYS)
8002fb97 2668 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2669 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2670 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2671 else if (errno == EDQUOT)
8002fb97 2672 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2673 else
e64c2d0b 2674 r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51 2675
e64c2d0b 2676 goto out;
74dd6b51
LP
2677 }
2678
e64c2d0b
DJL
2679 /* When requested link the user keyring into the session keyring. */
2680 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2681
2682 if (keyctl(KEYCTL_LINK,
2683 KEY_SPEC_USER_KEYRING,
2684 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2685 r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
2686 goto out;
2687 }
2688 }
2689
2690 /* Restore uid/gid back */
2691 if (uid_is_valid(uid) && uid != saved_uid) {
2692 if (setreuid(saved_uid, -1) < 0) {
2693 r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2694 goto out;
2695 }
2696 }
2697
2698 if (gid_is_valid(gid) && gid != saved_gid) {
2699 if (setregid(saved_gid, -1) < 0)
2700 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2701 }
2702
2703 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
b3415f5d
LP
2704 if (!sd_id128_is_null(u->invocation_id)) {
2705 key_serial_t key;
2706
2707 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2708 if (key == -1)
8002fb97 2709 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2710 else {
2711 if (keyctl(KEYCTL_SETPERM, key,
2712 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2713 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
e64c2d0b 2714 r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2715 }
2716 }
2717
e64c2d0b
DJL
2718out:
2719 /* Revert back uid & gid for the the last time, and exit */
2720 /* no extra logging, as only the first already reported error matters */
2721 if (getuid() != saved_uid)
2722 (void) setreuid(saved_uid, -1);
b1edf445 2723
e64c2d0b
DJL
2724 if (getgid() != saved_gid)
2725 (void) setregid(saved_gid, -1);
b1edf445 2726
e64c2d0b 2727 return r;
74dd6b51
LP
2728}
2729
3042bbeb 2730static void append_socket_pair(int *array, size_t *n, const int pair[static 2]) {
29206d46
LP
2731 assert(array);
2732 assert(n);
2caa38e9 2733 assert(pair);
29206d46
LP
2734
2735 if (pair[0] >= 0)
2736 array[(*n)++] = pair[0];
2737 if (pair[1] >= 0)
2738 array[(*n)++] = pair[1];
2739}
2740
a34ceba6
LP
2741static int close_remaining_fds(
2742 const ExecParameters *params,
34cf6c43
YW
2743 const ExecRuntime *runtime,
2744 const DynamicCreds *dcreds,
00d9ef85 2745 int user_lookup_fd,
a34ceba6 2746 int socket_fd,
5686391b 2747 int exec_fd,
da6053d0 2748 int *fds, size_t n_fds) {
a34ceba6 2749
da6053d0 2750 size_t n_dont_close = 0;
00d9ef85 2751 int dont_close[n_fds + 12];
a34ceba6
LP
2752
2753 assert(params);
2754
2755 if (params->stdin_fd >= 0)
2756 dont_close[n_dont_close++] = params->stdin_fd;
2757 if (params->stdout_fd >= 0)
2758 dont_close[n_dont_close++] = params->stdout_fd;
2759 if (params->stderr_fd >= 0)
2760 dont_close[n_dont_close++] = params->stderr_fd;
2761
2762 if (socket_fd >= 0)
2763 dont_close[n_dont_close++] = socket_fd;
5686391b
LP
2764 if (exec_fd >= 0)
2765 dont_close[n_dont_close++] = exec_fd;
a34ceba6
LP
2766 if (n_fds > 0) {
2767 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2768 n_dont_close += n_fds;
2769 }
2770
29206d46
LP
2771 if (runtime)
2772 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2773
2774 if (dcreds) {
2775 if (dcreds->user)
2776 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2777 if (dcreds->group)
2778 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2779 }
2780
00d9ef85
LP
2781 if (user_lookup_fd >= 0)
2782 dont_close[n_dont_close++] = user_lookup_fd;
2783
a34ceba6
LP
2784 return close_all_fds(dont_close, n_dont_close);
2785}
2786
00d9ef85
LP
2787static int send_user_lookup(
2788 Unit *unit,
2789 int user_lookup_fd,
2790 uid_t uid,
2791 gid_t gid) {
2792
2793 assert(unit);
2794
2795 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2796 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2797 * specified. */
2798
2799 if (user_lookup_fd < 0)
2800 return 0;
2801
2802 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2803 return 0;
2804
2805 if (writev(user_lookup_fd,
2806 (struct iovec[]) {
e6a7ec4b
LP
2807 IOVEC_INIT(&uid, sizeof(uid)),
2808 IOVEC_INIT(&gid, sizeof(gid)),
2809 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2810 return -errno;
2811
2812 return 0;
2813}
2814
6732edab
LP
2815static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2816 int r;
2817
2818 assert(c);
2819 assert(home);
2820 assert(buf);
2821
2822 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2823
2824 if (*home)
2825 return 0;
2826
2827 if (!c->working_directory_home)
2828 return 0;
2829
6732edab
LP
2830 r = get_home_dir(buf);
2831 if (r < 0)
2832 return r;
2833
2834 *home = *buf;
2835 return 1;
2836}
2837
da50b85a
LP
2838static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2839 _cleanup_strv_free_ char ** list = NULL;
2840 ExecDirectoryType t;
2841 int r;
2842
2843 assert(c);
2844 assert(p);
2845 assert(ret);
2846
2847 assert(c->dynamic_user);
2848
2849 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2850 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2851 * directories. */
2852
2853 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2854 char **i;
2855
2856 if (t == EXEC_DIRECTORY_CONFIGURATION)
2857 continue;
2858
2859 if (!p->prefix[t])
2860 continue;
2861
2862 STRV_FOREACH(i, c->directories[t].paths) {
2863 char *e;
2864
494d0247 2865 if (exec_directory_is_private(c, t))
657ee2d8 2866 e = path_join(p->prefix[t], "private", *i);
494d0247
YW
2867 else
2868 e = path_join(p->prefix[t], *i);
da50b85a
LP
2869 if (!e)
2870 return -ENOMEM;
2871
2872 r = strv_consume(&list, e);
2873 if (r < 0)
2874 return r;
2875 }
2876 }
2877
ae2a15bc 2878 *ret = TAKE_PTR(list);
da50b85a
LP
2879
2880 return 0;
2881}
2882
34cf6c43
YW
2883static char *exec_command_line(char **argv);
2884
78f93209
LP
2885static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
2886 bool using_subcgroup;
2887 char *p;
2888
2889 assert(params);
2890 assert(ret);
2891
2892 if (!params->cgroup_path)
2893 return -EINVAL;
2894
2895 /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
2896 * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
2897 * processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
2898 * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
2899 * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
2900 * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
2901 * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
2902 * flag, which is only passed for the former statements, not for the latter. */
2903
2904 using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
2905 if (using_subcgroup)
657ee2d8 2906 p = path_join(params->cgroup_path, ".control");
78f93209
LP
2907 else
2908 p = strdup(params->cgroup_path);
2909 if (!p)
2910 return -ENOMEM;
2911
2912 *ret = p;
2913 return using_subcgroup;
2914}
2915
ff0af2a1 2916static int exec_child(
f2341e0a 2917 Unit *unit,
34cf6c43 2918 const ExecCommand *command,
ff0af2a1
LP
2919 const ExecContext *context,
2920 const ExecParameters *params,
2921 ExecRuntime *runtime,
29206d46 2922 DynamicCreds *dcreds,
ff0af2a1 2923 int socket_fd,
2caa38e9 2924 const int named_iofds[static 3],
4c47affc 2925 int *fds,
da6053d0 2926 size_t n_socket_fds,
25b583d7 2927 size_t n_storage_fds,
ff0af2a1 2928 char **files_env,
00d9ef85 2929 int user_lookup_fd,
12145637 2930 int *exit_status) {
d35fbf6b 2931
7ca69792 2932 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **replaced_argv = NULL;
5686391b 2933 int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
4d885bd3
DH
2934 _cleanup_free_ gid_t *supplementary_gids = NULL;
2935 const char *username = NULL, *groupname = NULL;
5686391b 2936 _cleanup_free_ char *home_buffer = NULL;
2b3c1b9e 2937 const char *home = NULL, *shell = NULL;
7ca69792 2938 char **final_argv = NULL;
7bce046b
LP
2939 dev_t journal_stream_dev = 0;
2940 ino_t journal_stream_ino = 0;
165a31c0
LP
2941 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2942 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2943 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2944 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 2945#if HAVE_SELINUX
7f59dd35 2946 _cleanup_free_ char *mac_selinux_context_net = NULL;
43b1f709 2947 bool use_selinux = false;
ecfbc84f 2948#endif
f9fa32f0 2949#if ENABLE_SMACK
43b1f709 2950 bool use_smack = false;
ecfbc84f 2951#endif
349cc4a5 2952#if HAVE_APPARMOR
43b1f709 2953 bool use_apparmor = false;
ecfbc84f 2954#endif
fed1e721
LP
2955 uid_t uid = UID_INVALID;
2956 gid_t gid = GID_INVALID;
da6053d0 2957 size_t n_fds;
3536f49e 2958 ExecDirectoryType dt;
165a31c0 2959 int secure_bits;
034c6ed7 2960
f2341e0a 2961 assert(unit);
5cb5a6ff
LP
2962 assert(command);
2963 assert(context);
d35fbf6b 2964 assert(params);
ff0af2a1 2965 assert(exit_status);
d35fbf6b
DM
2966
2967 rename_process_from_path(command->path);
2968
2969 /* We reset exactly these signals, since they are the
2970 * only ones we set to SIG_IGN in the main daemon. All
2971 * others we leave untouched because we set them to
2972 * SIG_DFL or a valid handler initially, both of which
2973 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2974 (void) default_signals(SIGNALS_CRASH_HANDLER,
2975 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2976
2977 if (context->ignore_sigpipe)
ce30c8dc 2978 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2979
ff0af2a1
LP
2980 r = reset_signal_mask();
2981 if (r < 0) {
2982 *exit_status = EXIT_SIGNAL_MASK;
12145637 2983 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 2984 }
034c6ed7 2985
d35fbf6b
DM
2986 if (params->idle_pipe)
2987 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2988
2c027c62
LP
2989 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2990 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2991 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2992 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 2993
d35fbf6b 2994 log_forget_fds();
2c027c62 2995 log_set_open_when_needed(true);
4f2d528d 2996
40a80078
LP
2997 /* In case anything used libc syslog(), close this here, too */
2998 closelog();
2999
5686391b
LP
3000 n_fds = n_socket_fds + n_storage_fds;
3001 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
ff0af2a1
LP
3002 if (r < 0) {
3003 *exit_status = EXIT_FDS;
12145637 3004 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
3005 }
3006
d35fbf6b
DM
3007 if (!context->same_pgrp)
3008 if (setsid() < 0) {
ff0af2a1 3009 *exit_status = EXIT_SETSID;
12145637 3010 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 3011 }
9e2f7c11 3012
1e22b5cd 3013 exec_context_tty_reset(context, params);
d35fbf6b 3014
c891efaf 3015 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 3016 const char *vc = params->confirm_spawn;
3b20f877
FB
3017 _cleanup_free_ char *cmdline = NULL;
3018
ee39ca20 3019 cmdline = exec_command_line(command->argv);
3b20f877 3020 if (!cmdline) {
0460aa5c 3021 *exit_status = EXIT_MEMORY;
12145637 3022 return log_oom();
3b20f877 3023 }
d35fbf6b 3024
eedf223a 3025 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
3026 if (r != CONFIRM_EXECUTE) {
3027 if (r == CONFIRM_PRETEND_SUCCESS) {
3028 *exit_status = EXIT_SUCCESS;
3029 return 0;
3030 }
ff0af2a1 3031 *exit_status = EXIT_CONFIRM;
12145637 3032 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 3033 return -ECANCELED;
d35fbf6b
DM
3034 }
3035 }
1a63a750 3036
d521916d
LP
3037 /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
3038 * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
3039 * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
3040 * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
3041 * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
3042 if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
3043 setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
3044 *exit_status = EXIT_MEMORY;
3045 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
3046 }
3047
29206d46 3048 if (context->dynamic_user && dcreds) {
da50b85a 3049 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 3050
d521916d
LP
3051 /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
3052 * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
409093fe
LP
3053 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
3054 *exit_status = EXIT_USER;
12145637 3055 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
3056 }
3057
da50b85a
LP
3058 r = compile_suggested_paths(context, params, &suggested_paths);
3059 if (r < 0) {
3060 *exit_status = EXIT_MEMORY;
3061 return log_oom();
3062 }
3063
3064 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
3065 if (r < 0) {
3066 *exit_status = EXIT_USER;
e2b0cc34
YW
3067 if (r == -EILSEQ) {
3068 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
3069 return -EOPNOTSUPP;
3070 }
12145637 3071 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 3072 }
524daa8c 3073
70dd455c 3074 if (!uid_is_valid(uid)) {
29206d46 3075 *exit_status = EXIT_USER;
12145637 3076 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
3077 return -ESRCH;
3078 }
3079
3080 if (!gid_is_valid(gid)) {
3081 *exit_status = EXIT_USER;
12145637 3082 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
3083 return -ESRCH;
3084 }
5bc7452b 3085
29206d46
LP
3086 if (dcreds->user)
3087 username = dcreds->user->name;
3088
3089 } else {
4d885bd3
DH
3090 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
3091 if (r < 0) {
3092 *exit_status = EXIT_USER;
12145637 3093 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 3094 }
5bc7452b 3095
4d885bd3
DH
3096 r = get_fixed_group(context, &groupname, &gid);
3097 if (r < 0) {
3098 *exit_status = EXIT_GROUP;
12145637 3099 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 3100 }
cdc5d5c5 3101 }
29206d46 3102
cdc5d5c5
DH
3103 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
3104 r = get_supplementary_groups(context, username, groupname, gid,
3105 &supplementary_gids, &ngids);
3106 if (r < 0) {
3107 *exit_status = EXIT_GROUP;
12145637 3108 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 3109 }
5bc7452b 3110
00d9ef85
LP
3111 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
3112 if (r < 0) {
3113 *exit_status = EXIT_USER;
12145637 3114 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
3115 }
3116
3117 user_lookup_fd = safe_close(user_lookup_fd);
3118
6732edab
LP
3119 r = acquire_home(context, uid, &home, &home_buffer);
3120 if (r < 0) {
3121 *exit_status = EXIT_CHDIR;
12145637 3122 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
3123 }
3124
d35fbf6b
DM
3125 /* If a socket is connected to STDIN/STDOUT/STDERR, we
3126 * must sure to drop O_NONBLOCK */
3127 if (socket_fd >= 0)
a34ceba6 3128 (void) fd_nonblock(socket_fd, false);
acbb0225 3129
4c70a4a7
MS
3130 /* Journald will try to look-up our cgroup in order to populate _SYSTEMD_CGROUP and _SYSTEMD_UNIT fields.
3131 * Hence we need to migrate to the target cgroup from init.scope before connecting to journald */
3132 if (params->cgroup_path) {
3133 _cleanup_free_ char *p = NULL;
3134
3135 r = exec_parameters_get_cgroup_path(params, &p);
3136 if (r < 0) {
3137 *exit_status = EXIT_CGROUP;
3138 return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
3139 }
3140
3141 r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
3142 if (r < 0) {
3143 *exit_status = EXIT_CGROUP;
3144 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
3145 }
3146 }
3147
a8d08f39
LP
3148 if (context->network_namespace_path && runtime && runtime->netns_storage_socket[0] >= 0) {
3149 r = open_netns_path(runtime->netns_storage_socket, context->network_namespace_path);
3150 if (r < 0) {
3151 *exit_status = EXIT_NETWORK;
3152 return log_unit_error_errno(unit, r, "Failed to open network namespace path %s: %m", context->network_namespace_path);
3153 }
3154 }
3155
52c239d7 3156 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
3157 if (r < 0) {
3158 *exit_status = EXIT_STDIN;
12145637 3159 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 3160 }
034c6ed7 3161
52c239d7 3162 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3163 if (r < 0) {
3164 *exit_status = EXIT_STDOUT;
12145637 3165 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
3166 }
3167
52c239d7 3168 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3169 if (r < 0) {
3170 *exit_status = EXIT_STDERR;
12145637 3171 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
3172 }
3173
d35fbf6b 3174 if (context->oom_score_adjust_set) {
9f8168eb
LP
3175 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
3176 * prohibit write access to this file, and we shouldn't trip up over that. */
3177 r = set_oom_score_adjust(context->oom_score_adjust);
12145637 3178 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 3179 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 3180 else if (r < 0) {
ff0af2a1 3181 *exit_status = EXIT_OOM_ADJUST;
12145637 3182 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 3183 }
d35fbf6b
DM
3184 }
3185
3186 if (context->nice_set)
3187 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 3188 *exit_status = EXIT_NICE;
12145637 3189 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
613b411c
LP
3190 }
3191
d35fbf6b
DM
3192 if (context->cpu_sched_set) {
3193 struct sched_param param = {
3194 .sched_priority = context->cpu_sched_priority,
3195 };
3196
ff0af2a1
LP
3197 r = sched_setscheduler(0,
3198 context->cpu_sched_policy |
3199 (context->cpu_sched_reset_on_fork ?
3200 SCHED_RESET_ON_FORK : 0),
3201 &param);
3202 if (r < 0) {
3203 *exit_status = EXIT_SETSCHEDULER;
12145637 3204 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 3205 }
d35fbf6b 3206 }
fc9b2a84 3207
0985c7c4
ZJS
3208 if (context->cpu_set.set)
3209 if (sched_setaffinity(0, context->cpu_set.allocated, context->cpu_set.set) < 0) {
ff0af2a1 3210 *exit_status = EXIT_CPUAFFINITY;
12145637 3211 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7
LP
3212 }
3213
b070c7c0
MS
3214 if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
3215 r = apply_numa_policy(&context->numa_policy);
3216 if (r == -EOPNOTSUPP)
33fe9e3f 3217 log_unit_debug_errno(unit, r, "NUMA support not available, ignoring.");
b070c7c0
MS
3218 else if (r < 0) {
3219 *exit_status = EXIT_NUMA_POLICY;
3220 return log_unit_error_errno(unit, r, "Failed to set NUMA memory policy: %m");
3221 }
3222 }
3223
d35fbf6b
DM
3224 if (context->ioprio_set)
3225 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 3226 *exit_status = EXIT_IOPRIO;
12145637 3227 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 3228 }
da726a4d 3229
d35fbf6b
DM
3230 if (context->timer_slack_nsec != NSEC_INFINITY)
3231 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 3232 *exit_status = EXIT_TIMERSLACK;
12145637 3233 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 3234 }
9eba9da4 3235
21022b9d
LP
3236 if (context->personality != PERSONALITY_INVALID) {
3237 r = safe_personality(context->personality);
3238 if (r < 0) {
ff0af2a1 3239 *exit_status = EXIT_PERSONALITY;
12145637 3240 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 3241 }
21022b9d 3242 }
94f04347 3243
d35fbf6b 3244 if (context->utmp_id)
df0ff127 3245 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 3246 context->tty_path,
023a4f67
LP
3247 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3248 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3249 USER_PROCESS,
6a93917d 3250 username);
d35fbf6b 3251
08f67696 3252 if (uid_is_valid(uid)) {
ff0af2a1
LP
3253 r = chown_terminal(STDIN_FILENO, uid);
3254 if (r < 0) {
3255 *exit_status = EXIT_STDIN;
12145637 3256 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 3257 }
d35fbf6b 3258 }
8e274523 3259
4e1dfa45 3260 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
62b9bb26 3261 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
4e1dfa45 3262 * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
62b9bb26 3263 * touch a single hierarchy too. */
584b8688 3264 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
62b9bb26 3265 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
ff0af2a1
LP
3266 if (r < 0) {
3267 *exit_status = EXIT_CGROUP;
12145637 3268 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 3269 }
d35fbf6b 3270 }
034c6ed7 3271
72fd1768 3272 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 3273 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
3274 if (r < 0)
3275 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 3276 }
94f04347 3277
7bce046b 3278 r = build_environment(
fd63e712 3279 unit,
7bce046b
LP
3280 context,
3281 params,
3282 n_fds,
3283 home,
3284 username,
3285 shell,
3286 journal_stream_dev,
3287 journal_stream_ino,
3288 &our_env);
2065ca69
JW
3289 if (r < 0) {
3290 *exit_status = EXIT_MEMORY;
12145637 3291 return log_oom();
2065ca69
JW
3292 }
3293
3294 r = build_pass_environment(context, &pass_env);
3295 if (r < 0) {
3296 *exit_status = EXIT_MEMORY;
12145637 3297 return log_oom();
2065ca69
JW
3298 }
3299
3300 accum_env = strv_env_merge(5,
3301 params->environment,
3302 our_env,
3303 pass_env,
3304 context->environment,
3305 files_env,
3306 NULL);
3307 if (!accum_env) {
3308 *exit_status = EXIT_MEMORY;
12145637 3309 return log_oom();
2065ca69 3310 }
1280503b 3311 accum_env = strv_env_clean(accum_env);
2065ca69 3312
096424d1 3313 (void) umask(context->umask);
b213e1c1 3314
b1edf445 3315 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
3316 if (r < 0) {
3317 *exit_status = EXIT_KEYRING;
12145637 3318 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
3319 }
3320
165a31c0 3321 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 3322 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 3323
165a31c0
LP
3324 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3325 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 3326
165a31c0
LP
3327 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3328 if (needs_ambient_hack)
3329 needs_setuid = false;
3330 else
3331 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3332
3333 if (needs_sandboxing) {
7f18ef0a
FK
3334 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3335 * present. The actual MAC context application will happen later, as late as possible, to avoid
3336 * impacting our own code paths. */
3337
349cc4a5 3338#if HAVE_SELINUX
43b1f709 3339 use_selinux = mac_selinux_use();
7f18ef0a 3340#endif
f9fa32f0 3341#if ENABLE_SMACK
43b1f709 3342 use_smack = mac_smack_use();
7f18ef0a 3343#endif
349cc4a5 3344#if HAVE_APPARMOR
43b1f709 3345 use_apparmor = mac_apparmor_use();
7f18ef0a 3346#endif
165a31c0 3347 }
7f18ef0a 3348
ce932d2d
LP
3349 if (needs_sandboxing) {
3350 int which_failed;
3351
3352 /* Let's set the resource limits before we call into PAM, so that pam_limits wins over what
3353 * is set here. (See below.) */
3354
3355 r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
3356 if (r < 0) {
3357 *exit_status = EXIT_LIMITS;
3358 return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
3359 }
3360 }
3361
165a31c0 3362 if (needs_setuid) {
ce932d2d
LP
3363
3364 /* Let's call into PAM after we set up our own idea of resource limits to that pam_limits
3365 * wins here. (See above.) */
3366
165a31c0
LP
3367 if (context->pam_name && username) {
3368 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3369 if (r < 0) {
3370 *exit_status = EXIT_PAM;
12145637 3371 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0
LP
3372 }
3373 }
b213e1c1 3374 }
ac45f971 3375
a8d08f39
LP
3376 if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
3377
6e2d7c4f
MS
3378 if (ns_type_supported(NAMESPACE_NET)) {
3379 r = setup_netns(runtime->netns_storage_socket);
3380 if (r < 0) {
3381 *exit_status = EXIT_NETWORK;
3382 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3383 }
a8d08f39
LP
3384 } else if (context->network_namespace_path) {
3385 *exit_status = EXIT_NETWORK;
3386 return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP), "NetworkNamespacePath= is not supported, refusing.");
6e2d7c4f
MS
3387 } else
3388 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3389 }
169c1bda 3390
ee818b89 3391 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3392 if (needs_mount_namespace) {
7cc5ef5f
ZJS
3393 _cleanup_free_ char *error_path = NULL;
3394
3395 r = apply_mount_namespace(unit, command, context, params, runtime, &error_path);
3fbe8dbe
LP
3396 if (r < 0) {
3397 *exit_status = EXIT_NAMESPACE;
7cc5ef5f
ZJS
3398 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m",
3399 error_path ? ": " : "", strempty(error_path));
3fbe8dbe 3400 }
d35fbf6b 3401 }
81a2b7ce 3402
aecd5ac6
TM
3403 if (context->protect_hostname) {
3404 if (ns_type_supported(NAMESPACE_UTS)) {
3405 if (unshare(CLONE_NEWUTS) < 0) {
3406 *exit_status = EXIT_NAMESPACE;
3407 return log_unit_error_errno(unit, errno, "Failed to set up UTS namespacing: %m");
3408 }
3409 } else
3410 log_unit_warning(unit, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
3411#if HAVE_SECCOMP
3412 r = seccomp_protect_hostname();
3413 if (r < 0) {
3414 *exit_status = EXIT_SECCOMP;
3415 return log_unit_error_errno(unit, r, "Failed to apply hostname restrictions: %m");
3416 }
3417#endif
3418 }
3419
bbeea271 3420 /* Drop groups as early as possbile */
165a31c0 3421 if (needs_setuid) {
709dbeac 3422 r = enforce_groups(gid, supplementary_gids, ngids);
096424d1
LP
3423 if (r < 0) {
3424 *exit_status = EXIT_GROUP;
12145637 3425 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3426 }
165a31c0 3427 }
096424d1 3428
165a31c0 3429 if (needs_sandboxing) {
349cc4a5 3430#if HAVE_SELINUX
43b1f709 3431 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
3432 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3433 if (r < 0) {
3434 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3435 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
937ccce9 3436 }
9008e1ac 3437 }
9008e1ac
MS
3438#endif
3439
937ccce9
LP
3440 if (context->private_users) {
3441 r = setup_private_users(uid, gid);
3442 if (r < 0) {
3443 *exit_status = EXIT_USER;
12145637 3444 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
937ccce9 3445 }
d251207d
LP
3446 }
3447 }
3448
165a31c0 3449 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
5686391b
LP
3450 * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
3451 * however if we have it as we want to keep it open until the final execve(). */
3452
3453 if (params->exec_fd >= 0) {
3454 exec_fd = params->exec_fd;
3455
3456 if (exec_fd < 3 + (int) n_fds) {
3457 int moved_fd;
3458
3459 /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
3460 * process we are about to execute. */
3461
3462 moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
3463 if (moved_fd < 0) {
3464 *exit_status = EXIT_FDS;
3465 return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
3466 }
3467
3468 safe_close(exec_fd);
3469 exec_fd = moved_fd;
3470 } else {
3471 /* This fd should be FD_CLOEXEC already, but let's make sure. */
3472 r = fd_cloexec(exec_fd, true);
3473 if (r < 0) {
3474 *exit_status = EXIT_FDS;
3475 return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
3476 }
3477 }
3478
3479 fds_with_exec_fd = newa(int, n_fds + 1);
7e8d494b 3480 memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
5686391b
LP
3481 fds_with_exec_fd[n_fds] = exec_fd;
3482 n_fds_with_exec_fd = n_fds + 1;
3483 } else {
3484 fds_with_exec_fd = fds;
3485 n_fds_with_exec_fd = n_fds;
3486 }
3487
3488 r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
ff0af2a1
LP
3489 if (r >= 0)
3490 r = shift_fds(fds, n_fds);
3491 if (r >= 0)
25b583d7 3492 r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
ff0af2a1
LP
3493 if (r < 0) {
3494 *exit_status = EXIT_FDS;
12145637 3495 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3496 }
e66cf1a3 3497
5686391b
LP
3498 /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
3499 * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
3500 * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
3501 * came this far. */
3502
165a31c0 3503 secure_bits = context->secure_bits;
e66cf1a3 3504
165a31c0
LP
3505 if (needs_sandboxing) {
3506 uint64_t bset;
e66cf1a3 3507
ce932d2d
LP
3508 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly
3509 * requested. (Note this is placed after the general resource limit initialization, see
3510 * above, in order to take precedence.) */
f4170c67
LP
3511 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3512 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3513 *exit_status = EXIT_LIMITS;
12145637 3514 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3515 }
3516 }
3517
37ac2744
JB
3518#if ENABLE_SMACK
3519 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3520 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3521 if (use_smack) {
3522 r = setup_smack(context, command);
3523 if (r < 0) {
3524 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3525 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3526 }
3527 }
3528#endif
3529
165a31c0
LP
3530 bset = context->capability_bounding_set;
3531 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3532 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3533 * instead of us doing that */
3534 if (needs_ambient_hack)
3535 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3536 (UINT64_C(1) << CAP_SETUID) |
3537 (UINT64_C(1) << CAP_SETGID);
3538
3539 if (!cap_test_all(bset)) {
3540 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3541 if (r < 0) {
3542 *exit_status = EXIT_CAPABILITIES;
12145637 3543 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3544 }
4c2630eb 3545 }
3b8bddde 3546
755d4b67
IP
3547 /* This is done before enforce_user, but ambient set
3548 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
3549 if (!needs_ambient_hack &&
3550 context->capability_ambient_set != 0) {
755d4b67
IP
3551 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3552 if (r < 0) {
3553 *exit_status = EXIT_CAPABILITIES;
12145637 3554 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3555 }
755d4b67 3556 }
165a31c0 3557 }
755d4b67 3558
fa97f630
JB
3559 /* chroot to root directory first, before we lose the ability to chroot */
3560 r = apply_root_directory(context, params, needs_mount_namespace, exit_status);
3561 if (r < 0)
3562 return log_unit_error_errno(unit, r, "Chrooting to the requested root directory failed: %m");
3563
165a31c0 3564 if (needs_setuid) {
08f67696 3565 if (uid_is_valid(uid)) {
ff0af2a1
LP
3566 r = enforce_user(context, uid);
3567 if (r < 0) {
3568 *exit_status = EXIT_USER;
12145637 3569 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3570 }
165a31c0
LP
3571
3572 if (!needs_ambient_hack &&
3573 context->capability_ambient_set != 0) {
755d4b67
IP
3574
3575 /* Fix the ambient capabilities after user change. */
3576 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3577 if (r < 0) {
3578 *exit_status = EXIT_CAPABILITIES;
12145637 3579 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3580 }
3581
3582 /* If we were asked to change user and ambient capabilities
3583 * were requested, we had to add keep-caps to the securebits
3584 * so that we would maintain the inherited capability set
3585 * through the setresuid(). Make sure that the bit is added
3586 * also to the context secure_bits so that we don't try to
3587 * drop the bit away next. */
3588
7f508f2c 3589 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3590 }
5b6319dc 3591 }
165a31c0 3592 }
d35fbf6b 3593
56ef8db9
JB
3594 /* Apply working directory here, because the working directory might be on NFS and only the user running
3595 * this service might have the correct privilege to change to the working directory */
fa97f630 3596 r = apply_working_directory(context, params, home, exit_status);
56ef8db9
JB
3597 if (r < 0)
3598 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
3599
165a31c0 3600 if (needs_sandboxing) {
37ac2744 3601 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
5cd9cd35
LP
3602 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3603 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3604 * are restricted. */
3605
349cc4a5 3606#if HAVE_SELINUX
43b1f709 3607 if (use_selinux) {
5cd9cd35
LP
3608 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3609
3610 if (exec_context) {
3611 r = setexeccon(exec_context);
3612 if (r < 0) {
3613 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3614 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3615 }
3616 }
3617 }
3618#endif
3619
349cc4a5 3620#if HAVE_APPARMOR
43b1f709 3621 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3622 r = aa_change_onexec(context->apparmor_profile);
3623 if (r < 0 && !context->apparmor_profile_ignore) {
3624 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3625 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3626 }
3627 }
3628#endif
3629
165a31c0
LP
3630 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3631 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3632 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3633 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3634 *exit_status = EXIT_SECUREBITS;
12145637 3635 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3636 }
5b6319dc 3637
59eeb84b 3638 if (context_has_no_new_privileges(context))
d35fbf6b 3639 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3640 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3641 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3642 }
3643
349cc4a5 3644#if HAVE_SECCOMP
469830d1
LP
3645 r = apply_address_families(unit, context);
3646 if (r < 0) {
3647 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3648 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3649 }
04aa0cb9 3650
469830d1
LP
3651 r = apply_memory_deny_write_execute(unit, context);
3652 if (r < 0) {
3653 *exit_status = EXIT_SECCOMP;
12145637 3654 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3655 }
f4170c67 3656
469830d1
LP
3657 r = apply_restrict_realtime(unit, context);
3658 if (r < 0) {
3659 *exit_status = EXIT_SECCOMP;
12145637 3660 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3661 }
3662
f69567cb
LP
3663 r = apply_restrict_suid_sgid(unit, context);
3664 if (r < 0) {
3665 *exit_status = EXIT_SECCOMP;
3666 return log_unit_error_errno(unit, r, "Failed to apply SUID/SGID restrictions: %m");
3667 }
3668
add00535
LP
3669 r = apply_restrict_namespaces(unit, context);
3670 if (r < 0) {
3671 *exit_status = EXIT_SECCOMP;
12145637 3672 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3673 }
3674
469830d1
LP
3675 r = apply_protect_sysctl(unit, context);
3676 if (r < 0) {
3677 *exit_status = EXIT_SECCOMP;
12145637 3678 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3679 }
3680
469830d1
LP
3681 r = apply_protect_kernel_modules(unit, context);
3682 if (r < 0) {
3683 *exit_status = EXIT_SECCOMP;
12145637 3684 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3685 }
3686
469830d1
LP
3687 r = apply_private_devices(unit, context);
3688 if (r < 0) {
3689 *exit_status = EXIT_SECCOMP;
12145637 3690 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3691 }
3692
3693 r = apply_syscall_archs(unit, context);
3694 if (r < 0) {
3695 *exit_status = EXIT_SECCOMP;
12145637 3696 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3697 }
3698
78e864e5
TM
3699 r = apply_lock_personality(unit, context);
3700 if (r < 0) {
3701 *exit_status = EXIT_SECCOMP;
12145637 3702 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3703 }
3704
5cd9cd35
LP
3705 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3706 * by the filter as little as possible. */
165a31c0 3707 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3708 if (r < 0) {
3709 *exit_status = EXIT_SECCOMP;
12145637 3710 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3711 }
3712#endif
d35fbf6b 3713 }
034c6ed7 3714
00819cc1
LP
3715 if (!strv_isempty(context->unset_environment)) {
3716 char **ee = NULL;
3717
3718 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3719 if (!ee) {
3720 *exit_status = EXIT_MEMORY;
12145637 3721 return log_oom();
00819cc1
LP
3722 }
3723
130d3d22 3724 strv_free_and_replace(accum_env, ee);
00819cc1
LP
3725 }
3726
7ca69792
AZ
3727 if (!FLAGS_SET(command->flags, EXEC_COMMAND_NO_ENV_EXPAND)) {
3728 replaced_argv = replace_env_argv(command->argv, accum_env);
3729 if (!replaced_argv) {
3730 *exit_status = EXIT_MEMORY;
3731 return log_oom();
3732 }
3733 final_argv = replaced_argv;
3734 } else
3735 final_argv = command->argv;
034c6ed7 3736
f1d34068 3737 if (DEBUG_LOGGING) {
d35fbf6b 3738 _cleanup_free_ char *line;
81a2b7ce 3739
d35fbf6b 3740 line = exec_command_line(final_argv);
a1230ff9 3741 if (line)
f2341e0a 3742 log_struct(LOG_DEBUG,
f2341e0a
LP
3743 "EXECUTABLE=%s", command->path,
3744 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3745 LOG_UNIT_ID(unit),
a1230ff9 3746 LOG_UNIT_INVOCATION_ID(unit));
d35fbf6b 3747 }
dd305ec9 3748
5686391b
LP
3749 if (exec_fd >= 0) {
3750 uint8_t hot = 1;
3751
3752 /* We have finished with all our initializations. Let's now let the manager know that. From this point
3753 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
3754
3755 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3756 *exit_status = EXIT_EXEC;
3757 return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
3758 }
3759 }
3760
2065ca69 3761 execve(command->path, final_argv, accum_env);
5686391b
LP
3762 r = -errno;
3763
3764 if (exec_fd >= 0) {
3765 uint8_t hot = 0;
3766
3767 /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
3768 * that POLLHUP on it no longer means execve() succeeded. */
3769
3770 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3771 *exit_status = EXIT_EXEC;
3772 return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
3773 }
3774 }
12145637 3775
5686391b
LP
3776 if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3777 log_struct_errno(LOG_INFO, r,
12145637
LP
3778 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3779 LOG_UNIT_ID(unit),
3780 LOG_UNIT_INVOCATION_ID(unit),
3781 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3782 command->path),
a1230ff9 3783 "EXECUTABLE=%s", command->path);
12145637
LP
3784 return 0;
3785 }
3786
ff0af2a1 3787 *exit_status = EXIT_EXEC;
5686391b 3788 return log_unit_error_errno(unit, r, "Failed to execute command: %m");
d35fbf6b 3789}
81a2b7ce 3790
34cf6c43 3791static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
2caa38e9 3792static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[static 3]);
34cf6c43 3793
f2341e0a
LP
3794int exec_spawn(Unit *unit,
3795 ExecCommand *command,
d35fbf6b
DM
3796 const ExecContext *context,
3797 const ExecParameters *params,
3798 ExecRuntime *runtime,
29206d46 3799 DynamicCreds *dcreds,
d35fbf6b 3800 pid_t *ret) {
8351ceae 3801
ee39ca20 3802 int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
78f93209 3803 _cleanup_free_ char *subcgroup_path = NULL;
d35fbf6b 3804 _cleanup_strv_free_ char **files_env = NULL;
da6053d0 3805 size_t n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1 3806 _cleanup_free_ char *line = NULL;
d35fbf6b 3807 pid_t pid;
8351ceae 3808
f2341e0a 3809 assert(unit);
d35fbf6b
DM
3810 assert(command);
3811 assert(context);
3812 assert(ret);
3813 assert(params);
25b583d7 3814 assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
4298d0b5 3815
d35fbf6b
DM
3816 if (context->std_input == EXEC_INPUT_SOCKET ||
3817 context->std_output == EXEC_OUTPUT_SOCKET ||
3818 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3819
4c47affc 3820 if (params->n_socket_fds > 1) {
f2341e0a 3821 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3822 return -EINVAL;
ff0af2a1 3823 }
eef65bf3 3824
4c47affc 3825 if (params->n_socket_fds == 0) {
488ab41c
AA
3826 log_unit_error(unit, "Got no socket.");
3827 return -EINVAL;
3828 }
3829
d35fbf6b
DM
3830 socket_fd = params->fds[0];
3831 } else {
3832 socket_fd = -1;
3833 fds = params->fds;
9b141911 3834 n_socket_fds = params->n_socket_fds;
25b583d7 3835 n_storage_fds = params->n_storage_fds;
d35fbf6b 3836 }
94f04347 3837
34cf6c43 3838 r = exec_context_named_iofds(context, params, named_iofds);
52c239d7
LB
3839 if (r < 0)
3840 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3841
f2341e0a 3842 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3843 if (r < 0)
f2341e0a 3844 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3845
ee39ca20 3846 line = exec_command_line(command->argv);
d35fbf6b
DM
3847 if (!line)
3848 return log_oom();
fab56fc5 3849
f2341e0a 3850 log_struct(LOG_DEBUG,
f2341e0a
LP
3851 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3852 "EXECUTABLE=%s", command->path,
ba360bb0 3853 LOG_UNIT_ID(unit),
a1230ff9 3854 LOG_UNIT_INVOCATION_ID(unit));
12145637 3855
78f93209
LP
3856 if (params->cgroup_path) {
3857 r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
3858 if (r < 0)
3859 return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
3860 if (r > 0) { /* We are using a child cgroup */
3861 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
3862 if (r < 0)
3863 return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
3864 }
3865 }
3866
d35fbf6b
DM
3867 pid = fork();
3868 if (pid < 0)
74129a12 3869 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3870
3871 if (pid == 0) {
12145637 3872 int exit_status = EXIT_SUCCESS;
ff0af2a1 3873
f2341e0a
LP
3874 r = exec_child(unit,
3875 command,
ff0af2a1
LP
3876 context,
3877 params,
3878 runtime,
29206d46 3879 dcreds,
ff0af2a1 3880 socket_fd,
52c239d7 3881 named_iofds,
4c47affc 3882 fds,
9b141911 3883 n_socket_fds,
25b583d7 3884 n_storage_fds,
ff0af2a1 3885 files_env,
00d9ef85 3886 unit->manager->user_lookup_fds[1],
12145637
LP
3887 &exit_status);
3888
e1714f02
ZJS
3889 if (r < 0) {
3890 const char *status =
3891 exit_status_to_string(exit_status,
e04ed6db 3892 EXIT_STATUS_LIBC | EXIT_STATUS_SYSTEMD);
e1714f02 3893
12145637
LP
3894 log_struct_errno(LOG_ERR, r,
3895 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3896 LOG_UNIT_ID(unit),
3897 LOG_UNIT_INVOCATION_ID(unit),
3898 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
e1714f02 3899 status, command->path),
a1230ff9 3900 "EXECUTABLE=%s", command->path);
e1714f02 3901 }
4c2630eb 3902
ff0af2a1 3903 _exit(exit_status);
034c6ed7
LP
3904 }
3905
f2341e0a 3906 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3907
78f93209
LP
3908 /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
3909 * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
3910 * process will be killed too). */
3911 if (subcgroup_path)
3912 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
2da3263a 3913
b58b4116 3914 exec_status_start(&command->exec_status, pid);
9fb86720 3915
034c6ed7 3916 *ret = pid;
5cb5a6ff
LP
3917 return 0;
3918}
3919
034c6ed7 3920void exec_context_init(ExecContext *c) {
3536f49e
YW
3921 ExecDirectoryType i;
3922
034c6ed7
LP
3923 assert(c);
3924
4c12626c 3925 c->umask = 0022;
9eba9da4 3926 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3927 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3928 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3929 c->syslog_level_prefix = true;
353e12c2 3930 c->ignore_sigpipe = true;
3a43da28 3931 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3932 c->personality = PERSONALITY_INVALID;
72fd1768 3933 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3934 c->directories[i].mode = 0755;
12213aed 3935 c->timeout_clean_usec = USEC_INFINITY;
a103496c 3936 c->capability_bounding_set = CAP_ALL;
aa9d574d
YW
3937 assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
3938 c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
d3070fbd 3939 c->log_level_max = -1;
b070c7c0 3940 numa_policy_reset(&c->numa_policy);
034c6ed7
LP
3941}
3942
613b411c 3943void exec_context_done(ExecContext *c) {
3536f49e 3944 ExecDirectoryType i;
d3070fbd 3945 size_t l;
5cb5a6ff
LP
3946
3947 assert(c);
3948
6796073e
LP
3949 c->environment = strv_free(c->environment);
3950 c->environment_files = strv_free(c->environment_files);
b4c14404 3951 c->pass_environment = strv_free(c->pass_environment);
00819cc1 3952 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 3953
31ce987c 3954 rlimit_free_all(c->rlimit);
034c6ed7 3955
2038c3f5 3956 for (l = 0; l < 3; l++) {
52c239d7 3957 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
2038c3f5
LP
3958 c->stdio_file[l] = mfree(c->stdio_file[l]);
3959 }
52c239d7 3960
a1e58e8e
LP
3961 c->working_directory = mfree(c->working_directory);
3962 c->root_directory = mfree(c->root_directory);
915e6d16 3963 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3964 c->tty_path = mfree(c->tty_path);
3965 c->syslog_identifier = mfree(c->syslog_identifier);
3966 c->user = mfree(c->user);
3967 c->group = mfree(c->group);
034c6ed7 3968
6796073e 3969 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3970
a1e58e8e 3971 c->pam_name = mfree(c->pam_name);
5b6319dc 3972
2a624c36
AP
3973 c->read_only_paths = strv_free(c->read_only_paths);
3974 c->read_write_paths = strv_free(c->read_write_paths);
3975 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3976
d2d6c096 3977 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
8e06d57c
YW
3978 c->bind_mounts = NULL;
3979 c->n_bind_mounts = 0;
2abd4e38
YW
3980 temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
3981 c->temporary_filesystems = NULL;
3982 c->n_temporary_filesystems = 0;
d2d6c096 3983
0985c7c4 3984 cpu_set_reset(&c->cpu_set);
b070c7c0 3985 numa_policy_reset(&c->numa_policy);
86a3475b 3986
a1e58e8e
LP
3987 c->utmp_id = mfree(c->utmp_id);
3988 c->selinux_context = mfree(c->selinux_context);
3989 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3990 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3991
8cfa775f 3992 c->syscall_filter = hashmap_free(c->syscall_filter);
525d3cc7
LP
3993 c->syscall_archs = set_free(c->syscall_archs);
3994 c->address_families = set_free(c->address_families);
e66cf1a3 3995
72fd1768 3996 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3997 c->directories[i].paths = strv_free(c->directories[i].paths);
d3070fbd
LP
3998
3999 c->log_level_max = -1;
4000
4001 exec_context_free_log_extra_fields(c);
08f3be7a 4002
90fc172e
AZ
4003 c->log_rate_limit_interval_usec = 0;
4004 c->log_rate_limit_burst = 0;
4005
08f3be7a
LP
4006 c->stdin_data = mfree(c->stdin_data);
4007 c->stdin_data_size = 0;
a8d08f39
LP
4008
4009 c->network_namespace_path = mfree(c->network_namespace_path);
e66cf1a3
LP
4010}
4011
34cf6c43 4012int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
e66cf1a3
LP
4013 char **i;
4014
4015 assert(c);
4016
4017 if (!runtime_prefix)
4018 return 0;
4019
3536f49e 4020 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
4021 _cleanup_free_ char *p;
4022
494d0247
YW
4023 if (exec_directory_is_private(c, EXEC_DIRECTORY_RUNTIME))
4024 p = path_join(runtime_prefix, "private", *i);
4025 else
4026 p = path_join(runtime_prefix, *i);
e66cf1a3
LP
4027 if (!p)
4028 return -ENOMEM;
4029
7bc4bf4a
LP
4030 /* We execute this synchronously, since we need to be sure this is gone when we start the
4031 * service next. */
c6878637 4032 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
4033 }
4034
4035 return 0;
5cb5a6ff
LP
4036}
4037
34cf6c43 4038static void exec_command_done(ExecCommand *c) {
43d0fcbd
LP
4039 assert(c);
4040
a1e58e8e 4041 c->path = mfree(c->path);
6796073e 4042 c->argv = strv_free(c->argv);
43d0fcbd
LP
4043}
4044
da6053d0
LP
4045void exec_command_done_array(ExecCommand *c, size_t n) {
4046 size_t i;
43d0fcbd
LP
4047
4048 for (i = 0; i < n; i++)
4049 exec_command_done(c+i);
4050}
4051
f1acf85a 4052ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
4053 ExecCommand *i;
4054
4055 while ((i = c)) {
71fda00f 4056 LIST_REMOVE(command, c, i);
43d0fcbd 4057 exec_command_done(i);
5cb5a6ff
LP
4058 free(i);
4059 }
f1acf85a
ZJS
4060
4061 return NULL;
5cb5a6ff
LP
4062}
4063
da6053d0
LP
4064void exec_command_free_array(ExecCommand **c, size_t n) {
4065 size_t i;
034c6ed7 4066
f1acf85a
ZJS
4067 for (i = 0; i < n; i++)
4068 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
4069}
4070
6a1d4d9f
LP
4071void exec_command_reset_status_array(ExecCommand *c, size_t n) {
4072 size_t i;
4073
4074 for (i = 0; i < n; i++)
4075 exec_status_reset(&c[i].exec_status);
4076}
4077
4078void exec_command_reset_status_list_array(ExecCommand **c, size_t n) {
4079 size_t i;
4080
4081 for (i = 0; i < n; i++) {
4082 ExecCommand *z;
4083
4084 LIST_FOREACH(command, z, c[i])
4085 exec_status_reset(&z->exec_status);
4086 }
4087}
4088
039f0e70 4089typedef struct InvalidEnvInfo {
34cf6c43 4090 const Unit *unit;
039f0e70
LP
4091 const char *path;
4092} InvalidEnvInfo;
4093
4094static void invalid_env(const char *p, void *userdata) {
4095 InvalidEnvInfo *info = userdata;
4096
f2341e0a 4097 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
4098}
4099
52c239d7
LB
4100const char* exec_context_fdname(const ExecContext *c, int fd_index) {
4101 assert(c);
4102
4103 switch (fd_index) {
5073ff6b 4104
52c239d7
LB
4105 case STDIN_FILENO:
4106 if (c->std_input != EXEC_INPUT_NAMED_FD)
4107 return NULL;
5073ff6b 4108
52c239d7 4109 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
5073ff6b 4110
52c239d7
LB
4111 case STDOUT_FILENO:
4112 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
4113 return NULL;
5073ff6b 4114
52c239d7 4115 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
5073ff6b 4116
52c239d7
LB
4117 case STDERR_FILENO:
4118 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
4119 return NULL;
5073ff6b 4120
52c239d7 4121 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
5073ff6b 4122
52c239d7
LB
4123 default:
4124 return NULL;
4125 }
4126}
4127
2caa38e9
LP
4128static int exec_context_named_iofds(
4129 const ExecContext *c,
4130 const ExecParameters *p,
4131 int named_iofds[static 3]) {
4132
da6053d0 4133 size_t i, targets;
56fbd561 4134 const char* stdio_fdname[3];
da6053d0 4135 size_t n_fds;
52c239d7
LB
4136
4137 assert(c);
4138 assert(p);
2caa38e9 4139 assert(named_iofds);
52c239d7
LB
4140
4141 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
4142 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
4143 (c->std_error == EXEC_OUTPUT_NAMED_FD);
4144
4145 for (i = 0; i < 3; i++)
4146 stdio_fdname[i] = exec_context_fdname(c, i);
4147
4c47affc
FB
4148 n_fds = p->n_storage_fds + p->n_socket_fds;
4149
4150 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
4151 if (named_iofds[STDIN_FILENO] < 0 &&
4152 c->std_input == EXEC_INPUT_NAMED_FD &&
4153 stdio_fdname[STDIN_FILENO] &&
4154 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
4155
52c239d7
LB
4156 named_iofds[STDIN_FILENO] = p->fds[i];
4157 targets--;
56fbd561
ZJS
4158
4159 } else if (named_iofds[STDOUT_FILENO] < 0 &&
4160 c->std_output == EXEC_OUTPUT_NAMED_FD &&
4161 stdio_fdname[STDOUT_FILENO] &&
4162 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
4163
52c239d7
LB
4164 named_iofds[STDOUT_FILENO] = p->fds[i];
4165 targets--;
56fbd561
ZJS
4166
4167 } else if (named_iofds[STDERR_FILENO] < 0 &&
4168 c->std_error == EXEC_OUTPUT_NAMED_FD &&
4169 stdio_fdname[STDERR_FILENO] &&
4170 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
4171
52c239d7
LB
4172 named_iofds[STDERR_FILENO] = p->fds[i];
4173 targets--;
4174 }
4175
56fbd561 4176 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
4177}
4178
34cf6c43 4179static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
4180 char **i, **r = NULL;
4181
4182 assert(c);
4183 assert(l);
4184
4185 STRV_FOREACH(i, c->environment_files) {
4186 char *fn;
52511fae
ZJS
4187 int k;
4188 unsigned n;
8c7be95e
LP
4189 bool ignore = false;
4190 char **p;
7fd1b19b 4191 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
4192
4193 fn = *i;
4194
4195 if (fn[0] == '-') {
4196 ignore = true;
313cefa1 4197 fn++;
8c7be95e
LP
4198 }
4199
4200 if (!path_is_absolute(fn)) {
8c7be95e
LP
4201 if (ignore)
4202 continue;
4203
4204 strv_free(r);
4205 return -EINVAL;
4206 }
4207
2bef10ab 4208 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
4209 k = safe_glob(fn, 0, &pglob);
4210 if (k < 0) {
2bef10ab
PL
4211 if (ignore)
4212 continue;
8c7be95e 4213
2bef10ab 4214 strv_free(r);
d8c92e8b 4215 return k;
2bef10ab 4216 }
8c7be95e 4217
d8c92e8b
ZJS
4218 /* When we don't match anything, -ENOENT should be returned */
4219 assert(pglob.gl_pathc > 0);
4220
4221 for (n = 0; n < pglob.gl_pathc; n++) {
aa8fbc74 4222 k = load_env_file(NULL, pglob.gl_pathv[n], &p);
2bef10ab
PL
4223 if (k < 0) {
4224 if (ignore)
4225 continue;
8c7be95e 4226
2bef10ab 4227 strv_free(r);
2bef10ab 4228 return k;
e9c1ea9d 4229 }
ebc05a09 4230 /* Log invalid environment variables with filename */
039f0e70
LP
4231 if (p) {
4232 InvalidEnvInfo info = {
f2341e0a 4233 .unit = unit,
039f0e70
LP
4234 .path = pglob.gl_pathv[n]
4235 };
4236
4237 p = strv_env_clean_with_callback(p, invalid_env, &info);
4238 }
8c7be95e 4239
234519ae 4240 if (!r)
2bef10ab
PL
4241 r = p;
4242 else {
4243 char **m;
8c7be95e 4244
2bef10ab
PL
4245 m = strv_env_merge(2, r, p);
4246 strv_free(r);
4247 strv_free(p);
c84a9488 4248 if (!m)
2bef10ab 4249 return -ENOMEM;
2bef10ab
PL
4250
4251 r = m;
4252 }
8c7be95e
LP
4253 }
4254 }
4255
4256 *l = r;
4257
4258 return 0;
4259}
4260
6ac8fdc9 4261static bool tty_may_match_dev_console(const char *tty) {
7b912648 4262 _cleanup_free_ char *resolved = NULL;
6ac8fdc9 4263
1e22b5cd
LP
4264 if (!tty)
4265 return true;
4266
a119ec7c 4267 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
4268
4269 /* trivial identity? */
4270 if (streq(tty, "console"))
4271 return true;
4272
7b912648
LP
4273 if (resolve_dev_console(&resolved) < 0)
4274 return true; /* if we could not resolve, assume it may */
6ac8fdc9
MS
4275
4276 /* "tty0" means the active VC, so it may be the same sometimes */
955f1c85 4277 return path_equal(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
4278}
4279
6c0ae739
LP
4280static bool exec_context_may_touch_tty(const ExecContext *ec) {
4281 assert(ec);
1e22b5cd 4282
6c0ae739 4283 return ec->tty_reset ||
1e22b5cd
LP
4284 ec->tty_vhangup ||
4285 ec->tty_vt_disallocate ||
6ac8fdc9
MS
4286 is_terminal_input(ec->std_input) ||
4287 is_terminal_output(ec->std_output) ||
6c0ae739
LP
4288 is_terminal_output(ec->std_error);
4289}
4290
4291bool exec_context_may_touch_console(const ExecContext *ec) {
4292
4293 return exec_context_may_touch_tty(ec) &&
1e22b5cd 4294 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
4295}
4296
15ae422b
LP
4297static void strv_fprintf(FILE *f, char **l) {
4298 char **g;
4299
4300 assert(f);
4301
4302 STRV_FOREACH(g, l)
4303 fprintf(f, " %s", *g);
4304}
4305
34cf6c43 4306void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
12213aed 4307 char **e, **d, buf_clean[FORMAT_TIMESPAN_MAX];
d3070fbd 4308 ExecDirectoryType dt;
94f04347 4309 unsigned i;
add00535 4310 int r;
9eba9da4 4311
5cb5a6ff
LP
4312 assert(c);
4313 assert(f);
4314
4ad49000 4315 prefix = strempty(prefix);
5cb5a6ff
LP
4316
4317 fprintf(f,
94f04347
LP
4318 "%sUMask: %04o\n"
4319 "%sWorkingDirectory: %s\n"
451a074f 4320 "%sRootDirectory: %s\n"
15ae422b 4321 "%sNonBlocking: %s\n"
64747e2d 4322 "%sPrivateTmp: %s\n"
7f112f50 4323 "%sPrivateDevices: %s\n"
59eeb84b 4324 "%sProtectKernelTunables: %s\n"
e66a2f65 4325 "%sProtectKernelModules: %s\n"
59eeb84b 4326 "%sProtectControlGroups: %s\n"
d251207d
LP
4327 "%sPrivateNetwork: %s\n"
4328 "%sPrivateUsers: %s\n"
1b8689f9
LP
4329 "%sProtectHome: %s\n"
4330 "%sProtectSystem: %s\n"
5d997827 4331 "%sMountAPIVFS: %s\n"
f3e43635 4332 "%sIgnoreSIGPIPE: %s\n"
f4170c67 4333 "%sMemoryDenyWriteExecute: %s\n"
b1edf445 4334 "%sRestrictRealtime: %s\n"
f69567cb 4335 "%sRestrictSUIDSGID: %s\n"
aecd5ac6
TM
4336 "%sKeyringMode: %s\n"
4337 "%sProtectHostname: %s\n",
5cb5a6ff 4338 prefix, c->umask,
9eba9da4 4339 prefix, c->working_directory ? c->working_directory : "/",
451a074f 4340 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 4341 prefix, yes_no(c->non_blocking),
64747e2d 4342 prefix, yes_no(c->private_tmp),
7f112f50 4343 prefix, yes_no(c->private_devices),
59eeb84b 4344 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 4345 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 4346 prefix, yes_no(c->protect_control_groups),
d251207d
LP
4347 prefix, yes_no(c->private_network),
4348 prefix, yes_no(c->private_users),
1b8689f9
LP
4349 prefix, protect_home_to_string(c->protect_home),
4350 prefix, protect_system_to_string(c->protect_system),
5d997827 4351 prefix, yes_no(c->mount_apivfs),
f3e43635 4352 prefix, yes_no(c->ignore_sigpipe),
f4170c67 4353 prefix, yes_no(c->memory_deny_write_execute),
b1edf445 4354 prefix, yes_no(c->restrict_realtime),
f69567cb 4355 prefix, yes_no(c->restrict_suid_sgid),
aecd5ac6
TM
4356 prefix, exec_keyring_mode_to_string(c->keyring_mode),
4357 prefix, yes_no(c->protect_hostname));
fb33a393 4358
915e6d16
LP
4359 if (c->root_image)
4360 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
4361
8c7be95e
LP
4362 STRV_FOREACH(e, c->environment)
4363 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
4364
4365 STRV_FOREACH(e, c->environment_files)
4366 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 4367
b4c14404
FB
4368 STRV_FOREACH(e, c->pass_environment)
4369 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
4370
00819cc1
LP
4371 STRV_FOREACH(e, c->unset_environment)
4372 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
4373
53f47dfc
YW
4374 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
4375
72fd1768 4376 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
4377 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
4378
4379 STRV_FOREACH(d, c->directories[dt].paths)
4380 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
4381 }
c2bbd90b 4382
12213aed
YW
4383 fprintf(f,
4384 "%sTimeoutCleanSec: %s\n",
4385 prefix, format_timespan(buf_clean, sizeof(buf_clean), c->timeout_clean_usec, USEC_PER_SEC));
4386
fb33a393
LP
4387 if (c->nice_set)
4388 fprintf(f,
4389 "%sNice: %i\n",
4390 prefix, c->nice);
4391
dd6c17b1 4392 if (c->oom_score_adjust_set)
fb33a393 4393 fprintf(f,
dd6c17b1
LP
4394 "%sOOMScoreAdjust: %i\n",
4395 prefix, c->oom_score_adjust);
9eba9da4 4396
94f04347 4397 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d 4398 if (c->rlimit[i]) {
4c3a2b84 4399 fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
3c11da9d 4400 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
4c3a2b84 4401 fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n",
3c11da9d
EV
4402 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
4403 }
94f04347 4404
f8b69d1d 4405 if (c->ioprio_set) {
1756a011 4406 _cleanup_free_ char *class_str = NULL;
f8b69d1d 4407
837df140
YW
4408 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
4409 if (r >= 0)
4410 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
4411
4412 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 4413 }
94f04347 4414
f8b69d1d 4415 if (c->cpu_sched_set) {
1756a011 4416 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 4417
837df140
YW
4418 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4419 if (r >= 0)
4420 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4421
94f04347 4422 fprintf(f,
38b48754
LP
4423 "%sCPUSchedulingPriority: %i\n"
4424 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
4425 prefix, c->cpu_sched_priority,
4426 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 4427 }
94f04347 4428
0985c7c4 4429 if (c->cpu_set.set) {
e7fca352
MS
4430 _cleanup_free_ char *affinity = NULL;
4431
4432 affinity = cpu_set_to_range_string(&c->cpu_set);
4433 fprintf(f, "%sCPUAffinity: %s\n", prefix, affinity);
94f04347
LP
4434 }
4435
b070c7c0
MS
4436 if (mpol_is_valid(numa_policy_get_type(&c->numa_policy))) {
4437 _cleanup_free_ char *nodes = NULL;
4438
4439 nodes = cpu_set_to_range_string(&c->numa_policy.nodes);
4440 fprintf(f, "%sNUMAPolicy: %s\n", prefix, mpol_to_string(numa_policy_get_type(&c->numa_policy)));
4441 fprintf(f, "%sNUMAMask: %s\n", prefix, strnull(nodes));
4442 }
4443
3a43da28 4444 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 4445 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
4446
4447 fprintf(f,
80876c20
LP
4448 "%sStandardInput: %s\n"
4449 "%sStandardOutput: %s\n"
4450 "%sStandardError: %s\n",
4451 prefix, exec_input_to_string(c->std_input),
4452 prefix, exec_output_to_string(c->std_output),
4453 prefix, exec_output_to_string(c->std_error));
4454
befc4a80
LP
4455 if (c->std_input == EXEC_INPUT_NAMED_FD)
4456 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4457 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4458 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4459 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4460 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4461
4462 if (c->std_input == EXEC_INPUT_FILE)
4463 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4464 if (c->std_output == EXEC_OUTPUT_FILE)
4465 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
566b7d23
ZD
4466 if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
4467 fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
befc4a80
LP
4468 if (c->std_error == EXEC_OUTPUT_FILE)
4469 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
566b7d23
ZD
4470 if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
4471 fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
befc4a80 4472
80876c20
LP
4473 if (c->tty_path)
4474 fprintf(f,
6ea832a2
LP
4475 "%sTTYPath: %s\n"
4476 "%sTTYReset: %s\n"
4477 "%sTTYVHangup: %s\n"
4478 "%sTTYVTDisallocate: %s\n",
4479 prefix, c->tty_path,
4480 prefix, yes_no(c->tty_reset),
4481 prefix, yes_no(c->tty_vhangup),
4482 prefix, yes_no(c->tty_vt_disallocate));
94f04347 4483
9f6444eb
LP
4484 if (IN_SET(c->std_output,
4485 EXEC_OUTPUT_SYSLOG,
4486 EXEC_OUTPUT_KMSG,
4487 EXEC_OUTPUT_JOURNAL,
4488 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4489 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4490 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4491 IN_SET(c->std_error,
4492 EXEC_OUTPUT_SYSLOG,
4493 EXEC_OUTPUT_KMSG,
4494 EXEC_OUTPUT_JOURNAL,
4495 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4496 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4497 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 4498
5ce70e5b 4499 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 4500
837df140
YW
4501 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4502 if (r >= 0)
4503 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 4504
837df140
YW
4505 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4506 if (r >= 0)
4507 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 4508 }
94f04347 4509
d3070fbd
LP
4510 if (c->log_level_max >= 0) {
4511 _cleanup_free_ char *t = NULL;
4512
4513 (void) log_level_to_string_alloc(c->log_level_max, &t);
4514
4515 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4516 }
4517
90fc172e
AZ
4518 if (c->log_rate_limit_interval_usec > 0) {
4519 char buf_timespan[FORMAT_TIMESPAN_MAX];
4520
4521 fprintf(f,
4522 "%sLogRateLimitIntervalSec: %s\n",
4523 prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_rate_limit_interval_usec, USEC_PER_SEC));
4524 }
4525
4526 if (c->log_rate_limit_burst > 0)
4527 fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_rate_limit_burst);
4528
d3070fbd
LP
4529 if (c->n_log_extra_fields > 0) {
4530 size_t j;
4531
4532 for (j = 0; j < c->n_log_extra_fields; j++) {
4533 fprintf(f, "%sLogExtraFields: ", prefix);
4534 fwrite(c->log_extra_fields[j].iov_base,
4535 1, c->log_extra_fields[j].iov_len,
4536 f);
4537 fputc('\n', f);
4538 }
4539 }
4540
07d46372
YW
4541 if (c->secure_bits) {
4542 _cleanup_free_ char *str = NULL;
4543
4544 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4545 if (r >= 0)
4546 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4547 }
94f04347 4548
a103496c 4549 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4550 _cleanup_free_ char *str = NULL;
94f04347 4551
dd1f5bd0
YW
4552 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4553 if (r >= 0)
4554 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4555 }
4556
4557 if (c->capability_ambient_set != 0) {
dd1f5bd0 4558 _cleanup_free_ char *str = NULL;
755d4b67 4559
dd1f5bd0
YW
4560 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4561 if (r >= 0)
4562 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4563 }
4564
4565 if (c->user)
f2d3769a 4566 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4567 if (c->group)
f2d3769a 4568 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4569
29206d46
LP
4570 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4571
ac6e8be6 4572 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4573 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4574 strv_fprintf(f, c->supplementary_groups);
4575 fputs("\n", f);
4576 }
94f04347 4577
5b6319dc 4578 if (c->pam_name)
f2d3769a 4579 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4580
58629001 4581 if (!strv_isempty(c->read_write_paths)) {
2a624c36
AP
4582 fprintf(f, "%sReadWritePaths:", prefix);
4583 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4584 fputs("\n", f);
4585 }
4586
58629001 4587 if (!strv_isempty(c->read_only_paths)) {
2a624c36
AP
4588 fprintf(f, "%sReadOnlyPaths:", prefix);
4589 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4590 fputs("\n", f);
4591 }
94f04347 4592
58629001 4593 if (!strv_isempty(c->inaccessible_paths)) {
2a624c36
AP
4594 fprintf(f, "%sInaccessiblePaths:", prefix);
4595 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4596 fputs("\n", f);
4597 }
2e22afe9 4598
d2d6c096 4599 if (c->n_bind_mounts > 0)
4ca763a9
YW
4600 for (i = 0; i < c->n_bind_mounts; i++)
4601 fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
d2d6c096 4602 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4ca763a9 4603 c->bind_mounts[i].ignore_enoent ? "-": "",
d2d6c096
LP
4604 c->bind_mounts[i].source,
4605 c->bind_mounts[i].destination,
4606 c->bind_mounts[i].recursive ? "rbind" : "norbind");
d2d6c096 4607
2abd4e38
YW
4608 if (c->n_temporary_filesystems > 0)
4609 for (i = 0; i < c->n_temporary_filesystems; i++) {
4610 TemporaryFileSystem *t = c->temporary_filesystems + i;
4611
4612 fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
4613 t->path,
4614 isempty(t->options) ? "" : ":",
4615 strempty(t->options));
4616 }
4617
169c1bda
LP
4618 if (c->utmp_id)
4619 fprintf(f,
4620 "%sUtmpIdentifier: %s\n",
4621 prefix, c->utmp_id);
7b52a628
MS
4622
4623 if (c->selinux_context)
4624 fprintf(f,
5f8640fb
LP
4625 "%sSELinuxContext: %s%s\n",
4626 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4627
80c21aea
WC
4628 if (c->apparmor_profile)
4629 fprintf(f,
4630 "%sAppArmorProfile: %s%s\n",
4631 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4632
4633 if (c->smack_process_label)
4634 fprintf(f,
4635 "%sSmackProcessLabel: %s%s\n",
4636 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4637
050f7277 4638 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4639 fprintf(f,
4640 "%sPersonality: %s\n",
4641 prefix, strna(personality_to_string(c->personality)));
4642
78e864e5
TM
4643 fprintf(f,
4644 "%sLockPersonality: %s\n",
4645 prefix, yes_no(c->lock_personality));
4646
17df7223 4647 if (c->syscall_filter) {
349cc4a5 4648#if HAVE_SECCOMP
17df7223 4649 Iterator j;
8cfa775f 4650 void *id, *val;
17df7223 4651 bool first = true;
351a19b1 4652#endif
17df7223
LP
4653
4654 fprintf(f,
57183d11 4655 "%sSystemCallFilter: ",
17df7223
LP
4656 prefix);
4657
4658 if (!c->syscall_whitelist)
4659 fputc('~', f);
4660
349cc4a5 4661#if HAVE_SECCOMP
8cfa775f 4662 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
17df7223 4663 _cleanup_free_ char *name = NULL;
8cfa775f
YW
4664 const char *errno_name = NULL;
4665 int num = PTR_TO_INT(val);
17df7223
LP
4666
4667 if (first)
4668 first = false;
4669 else
4670 fputc(' ', f);
4671
57183d11 4672 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223 4673 fputs(strna(name), f);
8cfa775f
YW
4674
4675 if (num >= 0) {
4676 errno_name = errno_to_name(num);
4677 if (errno_name)
4678 fprintf(f, ":%s", errno_name);
4679 else
4680 fprintf(f, ":%d", num);
4681 }
17df7223 4682 }
351a19b1 4683#endif
17df7223
LP
4684
4685 fputc('\n', f);
4686 }
4687
57183d11 4688 if (c->syscall_archs) {
349cc4a5 4689#if HAVE_SECCOMP
57183d11
LP
4690 Iterator j;
4691 void *id;
4692#endif
4693
4694 fprintf(f,
4695 "%sSystemCallArchitectures:",
4696 prefix);
4697
349cc4a5 4698#if HAVE_SECCOMP
57183d11
LP
4699 SET_FOREACH(id, c->syscall_archs, j)
4700 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4701#endif
4702 fputc('\n', f);
4703 }
4704
add00535
LP
4705 if (exec_context_restrict_namespaces_set(c)) {
4706 _cleanup_free_ char *s = NULL;
4707
86c2a9f1 4708 r = namespace_flags_to_string(c->restrict_namespaces, &s);
add00535
LP
4709 if (r >= 0)
4710 fprintf(f, "%sRestrictNamespaces: %s\n",
4711 prefix, s);
4712 }
4713
a8d08f39
LP
4714 if (c->network_namespace_path)
4715 fprintf(f,
4716 "%sNetworkNamespacePath: %s\n",
4717 prefix, c->network_namespace_path);
4718
3df90f24
YW
4719 if (c->syscall_errno > 0) {
4720 const char *errno_name;
4721
4722 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4723
4724 errno_name = errno_to_name(c->syscall_errno);
4725 if (errno_name)
4726 fprintf(f, "%s\n", errno_name);
4727 else
4728 fprintf(f, "%d\n", c->syscall_errno);
4729 }
5cb5a6ff
LP
4730}
4731
34cf6c43 4732bool exec_context_maintains_privileges(const ExecContext *c) {
a931ad47
LP
4733 assert(c);
4734
61233823 4735 /* Returns true if the process forked off would run under
a931ad47
LP
4736 * an unchanged UID or as root. */
4737
4738 if (!c->user)
4739 return true;
4740
4741 if (streq(c->user, "root") || streq(c->user, "0"))
4742 return true;
4743
4744 return false;
4745}
4746
34cf6c43 4747int exec_context_get_effective_ioprio(const ExecContext *c) {
7f452159
LP
4748 int p;
4749
4750 assert(c);
4751
4752 if (c->ioprio_set)
4753 return c->ioprio;
4754
4755 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4756 if (p < 0)
4757 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4758
4759 return p;
4760}
4761
d3070fbd
LP
4762void exec_context_free_log_extra_fields(ExecContext *c) {
4763 size_t l;
4764
4765 assert(c);
4766
4767 for (l = 0; l < c->n_log_extra_fields; l++)
4768 free(c->log_extra_fields[l].iov_base);
4769 c->log_extra_fields = mfree(c->log_extra_fields);
4770 c->n_log_extra_fields = 0;
4771}
4772
6f765baf
LP
4773void exec_context_revert_tty(ExecContext *c) {
4774 int r;
4775
4776 assert(c);
4777
4778 /* First, reset the TTY (possibly kicking everybody else from the TTY) */
4779 exec_context_tty_reset(c, NULL);
4780
4781 /* And then undo what chown_terminal() did earlier. Note that we only do this if we have a path
4782 * configured. If the TTY was passed to us as file descriptor we assume the TTY is opened and managed
4783 * by whoever passed it to us and thus knows better when and how to chmod()/chown() it back. */
4784
4785 if (exec_context_may_touch_tty(c)) {
4786 const char *path;
4787
4788 path = exec_context_tty_path(c);
4789 if (path) {
4790 r = chmod_and_chown(path, TTY_MODE, 0, TTY_GID);
4791 if (r < 0 && r != -ENOENT)
4792 log_warning_errno(r, "Failed to reset TTY ownership/access mode of %s, ignoring: %m", path);
4793 }
4794 }
4795}
4796
4c2f5842
LP
4797int exec_context_get_clean_directories(
4798 ExecContext *c,
4799 char **prefix,
4800 ExecCleanMask mask,
4801 char ***ret) {
4802
4803 _cleanup_strv_free_ char **l = NULL;
4804 ExecDirectoryType t;
4805 int r;
4806
4807 assert(c);
4808 assert(prefix);
4809 assert(ret);
4810
4811 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
4812 char **i;
4813
4814 if (!FLAGS_SET(mask, 1U << t))
4815 continue;
4816
4817 if (!prefix[t])
4818 continue;
4819
4820 STRV_FOREACH(i, c->directories[t].paths) {
4821 char *j;
4822
4823 j = path_join(prefix[t], *i);
4824 if (!j)
4825 return -ENOMEM;
4826
4827 r = strv_consume(&l, j);
4828 if (r < 0)
4829 return r;
7f622a19
YW
4830
4831 /* Also remove private directories unconditionally. */
4832 if (t != EXEC_DIRECTORY_CONFIGURATION) {
4833 j = path_join(prefix[t], "private", *i);
4834 if (!j)
4835 return -ENOMEM;
4836
4837 r = strv_consume(&l, j);
4838 if (r < 0)
4839 return r;
4840 }
4c2f5842
LP
4841 }
4842 }
4843
4844 *ret = TAKE_PTR(l);
4845 return 0;
4846}
4847
4848int exec_context_get_clean_mask(ExecContext *c, ExecCleanMask *ret) {
4849 ExecCleanMask mask = 0;
4850
4851 assert(c);
4852 assert(ret);
4853
4854 for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++)
4855 if (!strv_isempty(c->directories[t].paths))
4856 mask |= 1U << t;
4857
4858 *ret = mask;
4859 return 0;
4860}
4861
b58b4116 4862void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 4863 assert(s);
5cb5a6ff 4864
2ed26ed0
LP
4865 *s = (ExecStatus) {
4866 .pid = pid,
4867 };
4868
b58b4116
LP
4869 dual_timestamp_get(&s->start_timestamp);
4870}
4871
34cf6c43 4872void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
4873 assert(s);
4874
2ed26ed0
LP
4875 if (s->pid != pid) {
4876 *s = (ExecStatus) {
4877 .pid = pid,
4878 };
4879 }
b58b4116 4880
63983207 4881 dual_timestamp_get(&s->exit_timestamp);
9fb86720 4882
034c6ed7
LP
4883 s->code = code;
4884 s->status = status;
169c1bda 4885
6f765baf
LP
4886 if (context && context->utmp_id)
4887 (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
9fb86720
LP
4888}
4889
6a1d4d9f
LP
4890void exec_status_reset(ExecStatus *s) {
4891 assert(s);
4892
4893 *s = (ExecStatus) {};
4894}
4895
34cf6c43 4896void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
9fb86720
LP
4897 char buf[FORMAT_TIMESTAMP_MAX];
4898
4899 assert(s);
4900 assert(f);
4901
9fb86720
LP
4902 if (s->pid <= 0)
4903 return;
4904
4c940960
LP
4905 prefix = strempty(prefix);
4906
9fb86720 4907 fprintf(f,
ccd06097
ZJS
4908 "%sPID: "PID_FMT"\n",
4909 prefix, s->pid);
9fb86720 4910
af9d16e1 4911 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
4912 fprintf(f,
4913 "%sStart Timestamp: %s\n",
63983207 4914 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 4915
af9d16e1 4916 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
4917 fprintf(f,
4918 "%sExit Timestamp: %s\n"
4919 "%sExit Code: %s\n"
4920 "%sExit Status: %i\n",
63983207 4921 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
4922 prefix, sigchld_code_to_string(s->code),
4923 prefix, s->status);
5cb5a6ff 4924}
44d8db9e 4925
34cf6c43 4926static char *exec_command_line(char **argv) {
44d8db9e
LP
4927 size_t k;
4928 char *n, *p, **a;
4929 bool first = true;
4930
9e2f7c11 4931 assert(argv);
44d8db9e 4932
9164977d 4933 k = 1;
9e2f7c11 4934 STRV_FOREACH(a, argv)
44d8db9e
LP
4935 k += strlen(*a)+3;
4936
5cd9cd35
LP
4937 n = new(char, k);
4938 if (!n)
44d8db9e
LP
4939 return NULL;
4940
4941 p = n;
9e2f7c11 4942 STRV_FOREACH(a, argv) {
44d8db9e
LP
4943
4944 if (!first)
4945 *(p++) = ' ';
4946 else
4947 first = false;
4948
4949 if (strpbrk(*a, WHITESPACE)) {
4950 *(p++) = '\'';
4951 p = stpcpy(p, *a);
4952 *(p++) = '\'';
4953 } else
4954 p = stpcpy(p, *a);
4955
4956 }
4957
9164977d
LP
4958 *p = 0;
4959
44d8db9e
LP
4960 /* FIXME: this doesn't really handle arguments that have
4961 * spaces and ticks in them */
4962
4963 return n;
4964}
4965
34cf6c43 4966static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 4967 _cleanup_free_ char *cmd = NULL;
4c940960 4968 const char *prefix2;
44d8db9e
LP
4969
4970 assert(c);
4971 assert(f);
4972
4c940960 4973 prefix = strempty(prefix);
63c372cb 4974 prefix2 = strjoina(prefix, "\t");
44d8db9e 4975
9e2f7c11 4976 cmd = exec_command_line(c->argv);
44d8db9e
LP
4977 fprintf(f,
4978 "%sCommand Line: %s\n",
4bbccb02 4979 prefix, cmd ? cmd : strerror_safe(ENOMEM));
44d8db9e 4980
9fb86720 4981 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
4982}
4983
4984void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4985 assert(f);
4986
4c940960 4987 prefix = strempty(prefix);
44d8db9e
LP
4988
4989 LIST_FOREACH(command, c, c)
4990 exec_command_dump(c, f, prefix);
4991}
94f04347 4992
a6a80b4f
LP
4993void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4994 ExecCommand *end;
4995
4996 assert(l);
4997 assert(e);
4998
4999 if (*l) {
35b8ca3a 5000 /* It's kind of important, that we keep the order here */
71fda00f
LP
5001 LIST_FIND_TAIL(command, *l, end);
5002 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
5003 } else
5004 *l = e;
5005}
5006
26fd040d
LP
5007int exec_command_set(ExecCommand *c, const char *path, ...) {
5008 va_list ap;
5009 char **l, *p;
5010
5011 assert(c);
5012 assert(path);
5013
5014 va_start(ap, path);
5015 l = strv_new_ap(path, ap);
5016 va_end(ap);
5017
5018 if (!l)
5019 return -ENOMEM;
5020
250a918d
LP
5021 p = strdup(path);
5022 if (!p) {
26fd040d
LP
5023 strv_free(l);
5024 return -ENOMEM;
5025 }
5026
6897dfe8 5027 free_and_replace(c->path, p);
26fd040d 5028
130d3d22 5029 return strv_free_and_replace(c->argv, l);
26fd040d
LP
5030}
5031
86b23b07 5032int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 5033 _cleanup_strv_free_ char **l = NULL;
86b23b07 5034 va_list ap;
86b23b07
JS
5035 int r;
5036
5037 assert(c);
5038 assert(path);
5039
5040 va_start(ap, path);
5041 l = strv_new_ap(path, ap);
5042 va_end(ap);
5043
5044 if (!l)
5045 return -ENOMEM;
5046
e287086b 5047 r = strv_extend_strv(&c->argv, l, false);
e63ff941 5048 if (r < 0)
86b23b07 5049 return r;
86b23b07
JS
5050
5051 return 0;
5052}
5053
e8a565cb
YW
5054static void *remove_tmpdir_thread(void *p) {
5055 _cleanup_free_ char *path = p;
86b23b07 5056
e8a565cb
YW
5057 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
5058 return NULL;
5059}
5060
5061static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
5062 int r;
5063
5064 if (!rt)
5065 return NULL;
5066
5067 if (rt->manager)
5068 (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
5069
5070 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
5071 if (destroy && rt->tmp_dir) {
5072 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
5073
5074 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
5075 if (r < 0) {
5076 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
5077 free(rt->tmp_dir);
5078 }
5079
5080 rt->tmp_dir = NULL;
5081 }
613b411c 5082
e8a565cb
YW
5083 if (destroy && rt->var_tmp_dir) {
5084 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
5085
5086 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
5087 if (r < 0) {
5088 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
5089 free(rt->var_tmp_dir);
5090 }
5091
5092 rt->var_tmp_dir = NULL;
5093 }
5094
5095 rt->id = mfree(rt->id);
5096 rt->tmp_dir = mfree(rt->tmp_dir);
5097 rt->var_tmp_dir = mfree(rt->var_tmp_dir);
5098 safe_close_pair(rt->netns_storage_socket);
5099 return mfree(rt);
5100}
5101
5102static void exec_runtime_freep(ExecRuntime **rt) {
da6bc6ed 5103 (void) exec_runtime_free(*rt, false);
e8a565cb
YW
5104}
5105
8e8009dc
LP
5106static int exec_runtime_allocate(ExecRuntime **ret) {
5107 ExecRuntime *n;
613b411c 5108
8e8009dc 5109 assert(ret);
613b411c 5110
8e8009dc
LP
5111 n = new(ExecRuntime, 1);
5112 if (!n)
613b411c
LP
5113 return -ENOMEM;
5114
8e8009dc
LP
5115 *n = (ExecRuntime) {
5116 .netns_storage_socket = { -1, -1 },
5117 };
5118
5119 *ret = n;
613b411c
LP
5120 return 0;
5121}
5122
e8a565cb
YW
5123static int exec_runtime_add(
5124 Manager *m,
5125 const char *id,
5126 const char *tmp_dir,
5127 const char *var_tmp_dir,
5128 const int netns_storage_socket[2],
5129 ExecRuntime **ret) {
5130
5131 _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
613b411c
LP
5132 int r;
5133
e8a565cb 5134 assert(m);
613b411c
LP
5135 assert(id);
5136
e8a565cb
YW
5137 r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
5138 if (r < 0)
5139 return r;
613b411c 5140
e8a565cb 5141 r = exec_runtime_allocate(&rt);
613b411c
LP
5142 if (r < 0)
5143 return r;
5144
e8a565cb
YW
5145 rt->id = strdup(id);
5146 if (!rt->id)
5147 return -ENOMEM;
5148
5149 if (tmp_dir) {
5150 rt->tmp_dir = strdup(tmp_dir);
5151 if (!rt->tmp_dir)
5152 return -ENOMEM;
5153
5154 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
5155 assert(var_tmp_dir);
5156 rt->var_tmp_dir = strdup(var_tmp_dir);
5157 if (!rt->var_tmp_dir)
5158 return -ENOMEM;
5159 }
5160
5161 if (netns_storage_socket) {
5162 rt->netns_storage_socket[0] = netns_storage_socket[0];
5163 rt->netns_storage_socket[1] = netns_storage_socket[1];
613b411c
LP
5164 }
5165
e8a565cb
YW
5166 r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
5167 if (r < 0)
5168 return r;
5169
5170 rt->manager = m;
5171
5172 if (ret)
5173 *ret = rt;
5174
5175 /* do not remove created ExecRuntime object when the operation succeeds. */
5176 rt = NULL;
5177 return 0;
5178}
5179
5180static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
5181 _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
2fa3742d 5182 _cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
e8a565cb
YW
5183 int r;
5184
5185 assert(m);
5186 assert(c);
5187 assert(id);
5188
5189 /* It is not necessary to create ExecRuntime object. */
a8d08f39 5190 if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
e8a565cb
YW
5191 return 0;
5192
5193 if (c->private_tmp) {
5194 r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
613b411c
LP
5195 if (r < 0)
5196 return r;
5197 }
5198
a8d08f39 5199 if (c->private_network || c->network_namespace_path) {
e8a565cb
YW
5200 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
5201 return -errno;
5202 }
5203
5204 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
5205 if (r < 0)
5206 return r;
5207
5208 /* Avoid cleanup */
2fa3742d 5209 netns_storage_socket[0] = netns_storage_socket[1] = -1;
613b411c
LP
5210 return 1;
5211}
5212
e8a565cb
YW
5213int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
5214 ExecRuntime *rt;
5215 int r;
613b411c 5216
e8a565cb
YW
5217 assert(m);
5218 assert(id);
5219 assert(ret);
5220
5221 rt = hashmap_get(m->exec_runtime_by_id, id);
5222 if (rt)
5223 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
5224 goto ref;
5225
5226 if (!create)
5227 return 0;
5228
5229 /* If not found, then create a new object. */
5230 r = exec_runtime_make(m, c, id, &rt);
5231 if (r <= 0)
5232 /* When r == 0, it is not necessary to create ExecRuntime object. */
5233 return r;
613b411c 5234
e8a565cb
YW
5235ref:
5236 /* increment reference counter. */
5237 rt->n_ref++;
5238 *ret = rt;
5239 return 1;
5240}
613b411c 5241
e8a565cb
YW
5242ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
5243 if (!rt)
613b411c
LP
5244 return NULL;
5245
e8a565cb 5246 assert(rt->n_ref > 0);
613b411c 5247
e8a565cb
YW
5248 rt->n_ref--;
5249 if (rt->n_ref > 0)
f2341e0a
LP
5250 return NULL;
5251
e8a565cb 5252 return exec_runtime_free(rt, destroy);
613b411c
LP
5253}
5254
e8a565cb
YW
5255int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
5256 ExecRuntime *rt;
5257 Iterator i;
5258
5259 assert(m);
613b411c
LP
5260 assert(f);
5261 assert(fds);
5262
e8a565cb
YW
5263 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5264 fprintf(f, "exec-runtime=%s", rt->id);
613b411c 5265
e8a565cb
YW
5266 if (rt->tmp_dir)
5267 fprintf(f, " tmp-dir=%s", rt->tmp_dir);
613b411c 5268
e8a565cb
YW
5269 if (rt->var_tmp_dir)
5270 fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
613b411c 5271
e8a565cb
YW
5272 if (rt->netns_storage_socket[0] >= 0) {
5273 int copy;
613b411c 5274
e8a565cb
YW
5275 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
5276 if (copy < 0)
5277 return copy;
613b411c 5278
e8a565cb
YW
5279 fprintf(f, " netns-socket-0=%i", copy);
5280 }
613b411c 5281
e8a565cb
YW
5282 if (rt->netns_storage_socket[1] >= 0) {
5283 int copy;
613b411c 5284
e8a565cb
YW
5285 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
5286 if (copy < 0)
5287 return copy;
613b411c 5288
e8a565cb
YW
5289 fprintf(f, " netns-socket-1=%i", copy);
5290 }
5291
5292 fputc('\n', f);
613b411c
LP
5293 }
5294
5295 return 0;
5296}
5297
e8a565cb
YW
5298int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
5299 _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
5300 ExecRuntime *rt;
613b411c
LP
5301 int r;
5302
e8a565cb
YW
5303 /* This is for the migration from old (v237 or earlier) deserialization text.
5304 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
5305 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
5306 * so or not from the serialized text, then we always creates a new object owned by this. */
5307
5308 assert(u);
613b411c
LP
5309 assert(key);
5310 assert(value);
5311
e8a565cb
YW
5312 /* Manager manages ExecRuntime objects by the unit id.
5313 * So, we omit the serialized text when the unit does not have id (yet?)... */
5314 if (isempty(u->id)) {
5315 log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
5316 return 0;
5317 }
613b411c 5318
e8a565cb
YW
5319 r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
5320 if (r < 0) {
5321 log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
5322 return 0;
5323 }
5324
5325 rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
5326 if (!rt) {
5327 r = exec_runtime_allocate(&rt_create);
613b411c 5328 if (r < 0)
f2341e0a 5329 return log_oom();
613b411c 5330
e8a565cb
YW
5331 rt_create->id = strdup(u->id);
5332 if (!rt_create->id)
5333 return log_oom();
5334
5335 rt = rt_create;
5336 }
5337
5338 if (streq(key, "tmp-dir")) {
5339 char *copy;
5340
613b411c
LP
5341 copy = strdup(value);
5342 if (!copy)
5343 return log_oom();
5344
e8a565cb 5345 free_and_replace(rt->tmp_dir, copy);
613b411c
LP
5346
5347 } else if (streq(key, "var-tmp-dir")) {
5348 char *copy;
5349
613b411c
LP
5350 copy = strdup(value);
5351 if (!copy)
5352 return log_oom();
5353
e8a565cb 5354 free_and_replace(rt->var_tmp_dir, copy);
613b411c
LP
5355
5356 } else if (streq(key, "netns-socket-0")) {
5357 int fd;
5358
e8a565cb 5359 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5360 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5361 return 0;
613b411c 5362 }
e8a565cb
YW
5363
5364 safe_close(rt->netns_storage_socket[0]);
5365 rt->netns_storage_socket[0] = fdset_remove(fds, fd);
5366
613b411c
LP
5367 } else if (streq(key, "netns-socket-1")) {
5368 int fd;
5369
e8a565cb 5370 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5371 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5372 return 0;
613b411c 5373 }
e8a565cb
YW
5374
5375 safe_close(rt->netns_storage_socket[1]);
5376 rt->netns_storage_socket[1] = fdset_remove(fds, fd);
613b411c
LP
5377 } else
5378 return 0;
5379
e8a565cb
YW
5380 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
5381 if (rt_create) {
5382 r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
5383 if (r < 0) {
3fe91079 5384 log_unit_debug_errno(u, r, "Failed to put runtime parameter to manager's storage: %m");
e8a565cb
YW
5385 return 0;
5386 }
613b411c 5387
e8a565cb 5388 rt_create->manager = u->manager;
613b411c 5389
e8a565cb
YW
5390 /* Avoid cleanup */
5391 rt_create = NULL;
5392 }
98b47d54 5393
e8a565cb
YW
5394 return 1;
5395}
613b411c 5396
e8a565cb
YW
5397void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
5398 char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
5399 int r, fd0 = -1, fd1 = -1;
5400 const char *p, *v = value;
5401 size_t n;
613b411c 5402
e8a565cb
YW
5403 assert(m);
5404 assert(value);
5405 assert(fds);
98b47d54 5406
e8a565cb
YW
5407 n = strcspn(v, " ");
5408 id = strndupa(v, n);
5409 if (v[n] != ' ')
5410 goto finalize;
5411 p = v + n + 1;
5412
5413 v = startswith(p, "tmp-dir=");
5414 if (v) {
5415 n = strcspn(v, " ");
5416 tmp_dir = strndupa(v, n);
5417 if (v[n] != ' ')
5418 goto finalize;
5419 p = v + n + 1;
5420 }
5421
5422 v = startswith(p, "var-tmp-dir=");
5423 if (v) {
5424 n = strcspn(v, " ");
5425 var_tmp_dir = strndupa(v, n);
5426 if (v[n] != ' ')
5427 goto finalize;
5428 p = v + n + 1;
5429 }
5430
5431 v = startswith(p, "netns-socket-0=");
5432 if (v) {
5433 char *buf;
5434
5435 n = strcspn(v, " ");
5436 buf = strndupa(v, n);
5437 if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
5438 log_debug("Unable to process exec-runtime netns fd specification.");
5439 return;
98b47d54 5440 }
e8a565cb
YW
5441 fd0 = fdset_remove(fds, fd0);
5442 if (v[n] != ' ')
5443 goto finalize;
5444 p = v + n + 1;
613b411c
LP
5445 }
5446
e8a565cb
YW
5447 v = startswith(p, "netns-socket-1=");
5448 if (v) {
5449 char *buf;
98b47d54 5450
e8a565cb
YW
5451 n = strcspn(v, " ");
5452 buf = strndupa(v, n);
5453 if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
5454 log_debug("Unable to process exec-runtime netns fd specification.");
5455 return;
98b47d54 5456 }
e8a565cb
YW
5457 fd1 = fdset_remove(fds, fd1);
5458 }
98b47d54 5459
e8a565cb
YW
5460finalize:
5461
5462 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
7d853ca6 5463 if (r < 0)
e8a565cb 5464 log_debug_errno(r, "Failed to add exec-runtime: %m");
e8a565cb 5465}
613b411c 5466
e8a565cb
YW
5467void exec_runtime_vacuum(Manager *m) {
5468 ExecRuntime *rt;
5469 Iterator i;
5470
5471 assert(m);
5472
5473 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
5474
5475 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5476 if (rt->n_ref > 0)
5477 continue;
5478
5479 (void) exec_runtime_free(rt, false);
5480 }
613b411c
LP
5481}
5482
b9c04eaf
YW
5483void exec_params_clear(ExecParameters *p) {
5484 if (!p)
5485 return;
5486
5487 strv_free(p->environment);
5488}
5489
80876c20
LP
5490static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
5491 [EXEC_INPUT_NULL] = "null",
5492 [EXEC_INPUT_TTY] = "tty",
5493 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 5494 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
5495 [EXEC_INPUT_SOCKET] = "socket",
5496 [EXEC_INPUT_NAMED_FD] = "fd",
08f3be7a 5497 [EXEC_INPUT_DATA] = "data",
2038c3f5 5498 [EXEC_INPUT_FILE] = "file",
80876c20
LP
5499};
5500
8a0867d6
LP
5501DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
5502
94f04347 5503static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 5504 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 5505 [EXEC_OUTPUT_NULL] = "null",
80876c20 5506 [EXEC_OUTPUT_TTY] = "tty",
94f04347 5507 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 5508 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 5509 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 5510 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
5511 [EXEC_OUTPUT_JOURNAL] = "journal",
5512 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
5513 [EXEC_OUTPUT_SOCKET] = "socket",
5514 [EXEC_OUTPUT_NAMED_FD] = "fd",
2038c3f5 5515 [EXEC_OUTPUT_FILE] = "file",
566b7d23 5516 [EXEC_OUTPUT_FILE_APPEND] = "append",
94f04347
LP
5517};
5518
5519DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
5520
5521static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
5522 [EXEC_UTMP_INIT] = "init",
5523 [EXEC_UTMP_LOGIN] = "login",
5524 [EXEC_UTMP_USER] = "user",
5525};
5526
5527DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
5528
5529static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
5530 [EXEC_PRESERVE_NO] = "no",
5531 [EXEC_PRESERVE_YES] = "yes",
5532 [EXEC_PRESERVE_RESTART] = "restart",
5533};
5534
5535DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 5536
6b7b2ed9 5537/* This table maps ExecDirectoryType to the setting it is configured with in the unit */
72fd1768 5538static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
5539 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
5540 [EXEC_DIRECTORY_STATE] = "StateDirectory",
5541 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
5542 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
5543 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
5544};
5545
5546DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445 5547
6b7b2ed9
LP
5548/* And this table maps ExecDirectoryType too, but to a generic term identifying the type of resource. This
5549 * one is supposed to be generic enough to be used for unit types that don't use ExecContext and per-unit
5550 * directories, specifically .timer units with their timestamp touch file. */
5551static const char* const exec_resource_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5552 [EXEC_DIRECTORY_RUNTIME] = "runtime",
5553 [EXEC_DIRECTORY_STATE] = "state",
5554 [EXEC_DIRECTORY_CACHE] = "cache",
5555 [EXEC_DIRECTORY_LOGS] = "logs",
5556 [EXEC_DIRECTORY_CONFIGURATION] = "configuration",
5557};
5558
5559DEFINE_STRING_TABLE_LOOKUP(exec_resource_type, ExecDirectoryType);
5560
5561/* And this table also maps ExecDirectoryType, to the environment variable we pass the selected directory to
5562 * the service payload in. */
fb2042dd
YW
5563static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5564 [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
5565 [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
5566 [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
5567 [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
5568 [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
5569};
5570
5571DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType);
5572
b1edf445
LP
5573static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
5574 [EXEC_KEYRING_INHERIT] = "inherit",
5575 [EXEC_KEYRING_PRIVATE] = "private",
5576 [EXEC_KEYRING_SHARED] = "shared",
5577};
5578
5579DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);