]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
basic/log: add the log_struct terminator to macro
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
a7334b09
LP
2/***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
a7334b09
LP
6***/
7
034c6ed7
LP
8#include <errno.h>
9#include <fcntl.h>
8dd4c05b
LP
10#include <glob.h>
11#include <grp.h>
12#include <poll.h>
309bff19 13#include <signal.h>
8dd4c05b 14#include <string.h>
19c0b0b9 15#include <sys/capability.h>
d251207d 16#include <sys/eventfd.h>
f3e43635 17#include <sys/mman.h>
8dd4c05b 18#include <sys/personality.h>
94f04347 19#include <sys/prctl.h>
d2ffa389 20#include <sys/shm.h>
8dd4c05b 21#include <sys/socket.h>
451a074f 22#include <sys/stat.h>
d2ffa389 23#include <sys/types.h>
8dd4c05b
LP
24#include <sys/un.h>
25#include <unistd.h>
023a4f67 26#include <utmpx.h>
5cb5a6ff 27
349cc4a5 28#if HAVE_PAM
5b6319dc
LP
29#include <security/pam_appl.h>
30#endif
31
349cc4a5 32#if HAVE_SELINUX
7b52a628
MS
33#include <selinux/selinux.h>
34#endif
35
349cc4a5 36#if HAVE_SECCOMP
17df7223
LP
37#include <seccomp.h>
38#endif
39
349cc4a5 40#if HAVE_APPARMOR
eef65bf3
MS
41#include <sys/apparmor.h>
42#endif
43
24882e06 44#include "sd-messages.h"
8dd4c05b
LP
45
46#include "af-list.h"
b5efdb8a 47#include "alloc-util.h"
349cc4a5 48#if HAVE_APPARMOR
3ffd4af2
LP
49#include "apparmor-util.h"
50#endif
8dd4c05b
LP
51#include "async.h"
52#include "barrier.h"
8dd4c05b 53#include "cap-list.h"
430f0182 54#include "capability-util.h"
a1164ae3 55#include "chown-recursive.h"
da681e1b 56#include "cpu-set-util.h"
f6a6225e 57#include "def.h"
4d1a6904 58#include "env-util.h"
17df7223 59#include "errno-list.h"
3ffd4af2 60#include "execute.h"
8dd4c05b 61#include "exit-status.h"
3ffd4af2 62#include "fd-util.h"
8dd4c05b 63#include "fileio.h"
f97b34a6 64#include "format-util.h"
f4f15635 65#include "fs-util.h"
7d50b32a 66#include "glob-util.h"
c004493c 67#include "io-util.h"
8dd4c05b 68#include "ioprio.h"
a1164ae3 69#include "label.h"
8dd4c05b
LP
70#include "log.h"
71#include "macro.h"
e8a565cb 72#include "manager.h"
8dd4c05b
LP
73#include "missing.h"
74#include "mkdir.h"
75#include "namespace.h"
6bedfcbb 76#include "parse-util.h"
8dd4c05b 77#include "path-util.h"
0b452006 78#include "process-util.h"
78f22b97 79#include "rlimit-util.h"
8dd4c05b 80#include "rm-rf.h"
349cc4a5 81#if HAVE_SECCOMP
3ffd4af2
LP
82#include "seccomp-util.h"
83#endif
8dd4c05b 84#include "securebits.h"
07d46372 85#include "securebits-util.h"
8dd4c05b 86#include "selinux-util.h"
24882e06 87#include "signal-util.h"
8dd4c05b 88#include "smack-util.h"
57b7a260 89#include "socket-util.h"
fd63e712 90#include "special.h"
949befd3 91#include "stat-util.h"
8b43440b 92#include "string-table.h"
07630cea 93#include "string-util.h"
8dd4c05b 94#include "strv.h"
7ccbd1ae 95#include "syslog-util.h"
8dd4c05b
LP
96#include "terminal-util.h"
97#include "unit.h"
b1d4f8e1 98#include "user-util.h"
8dd4c05b
LP
99#include "util.h"
100#include "utmp-wtmp.h"
5cb5a6ff 101
e056b01d 102#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 103#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 104
02a51aba
LP
105/* This assumes there is a 'tty' group */
106#define TTY_MODE 0620
107
531dca78
LP
108#define SNDBUF_SIZE (8*1024*1024)
109
da6053d0 110static int shift_fds(int fds[], size_t n_fds) {
034c6ed7
LP
111 int start, restart_from;
112
113 if (n_fds <= 0)
114 return 0;
115
a0d40ac5
LP
116 /* Modifies the fds array! (sorts it) */
117
034c6ed7
LP
118 assert(fds);
119
120 start = 0;
121 for (;;) {
122 int i;
123
124 restart_from = -1;
125
126 for (i = start; i < (int) n_fds; i++) {
127 int nfd;
128
129 /* Already at right index? */
130 if (fds[i] == i+3)
131 continue;
132
3cc2aff1
LP
133 nfd = fcntl(fds[i], F_DUPFD, i + 3);
134 if (nfd < 0)
034c6ed7
LP
135 return -errno;
136
03e334a1 137 safe_close(fds[i]);
034c6ed7
LP
138 fds[i] = nfd;
139
140 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 141 * let's remember that and try again from here */
034c6ed7
LP
142 if (nfd != i+3 && restart_from < 0)
143 restart_from = i;
144 }
145
146 if (restart_from < 0)
147 break;
148
149 start = restart_from;
150 }
151
152 return 0;
153}
154
da6053d0
LP
155static int flags_fds(const int fds[], size_t n_storage_fds, size_t n_socket_fds, bool nonblock) {
156 size_t i, n_fds;
e2c76839 157 int r;
47a71eed 158
4c47affc 159 n_fds = n_storage_fds + n_socket_fds;
47a71eed
LP
160 if (n_fds <= 0)
161 return 0;
162
163 assert(fds);
164
9b141911
FB
165 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
166 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
167
168 for (i = 0; i < n_fds; i++) {
47a71eed 169
9b141911
FB
170 if (i < n_socket_fds) {
171 r = fd_nonblock(fds[i], nonblock);
172 if (r < 0)
173 return r;
174 }
47a71eed 175
451a074f
LP
176 /* We unconditionally drop FD_CLOEXEC from the fds,
177 * since after all we want to pass these fds to our
178 * children */
47a71eed 179
3cc2aff1
LP
180 r = fd_cloexec(fds[i], false);
181 if (r < 0)
e2c76839 182 return r;
47a71eed
LP
183 }
184
185 return 0;
186}
187
1e22b5cd 188static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
189 assert(context);
190
1e22b5cd
LP
191 if (context->stdio_as_fds)
192 return NULL;
193
80876c20
LP
194 if (context->tty_path)
195 return context->tty_path;
196
197 return "/dev/console";
198}
199
1e22b5cd
LP
200static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
201 const char *path;
202
6ea832a2
LP
203 assert(context);
204
1e22b5cd 205 path = exec_context_tty_path(context);
6ea832a2 206
1e22b5cd
LP
207 if (context->tty_vhangup) {
208 if (p && p->stdin_fd >= 0)
209 (void) terminal_vhangup_fd(p->stdin_fd);
210 else if (path)
211 (void) terminal_vhangup(path);
212 }
6ea832a2 213
1e22b5cd
LP
214 if (context->tty_reset) {
215 if (p && p->stdin_fd >= 0)
216 (void) reset_terminal_fd(p->stdin_fd, true);
217 else if (path)
218 (void) reset_terminal(path);
219 }
220
221 if (context->tty_vt_disallocate && path)
222 (void) vt_disallocate(path);
6ea832a2
LP
223}
224
6af760f3
LP
225static bool is_terminal_input(ExecInput i) {
226 return IN_SET(i,
227 EXEC_INPUT_TTY,
228 EXEC_INPUT_TTY_FORCE,
229 EXEC_INPUT_TTY_FAIL);
230}
231
3a1286b6 232static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
233 return IN_SET(o,
234 EXEC_OUTPUT_TTY,
235 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
236 EXEC_OUTPUT_KMSG_AND_CONSOLE,
237 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
238}
239
aac8c0c3
LP
240static bool is_syslog_output(ExecOutput o) {
241 return IN_SET(o,
242 EXEC_OUTPUT_SYSLOG,
243 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
244}
245
246static bool is_kmsg_output(ExecOutput o) {
247 return IN_SET(o,
248 EXEC_OUTPUT_KMSG,
249 EXEC_OUTPUT_KMSG_AND_CONSOLE);
250}
251
6af760f3
LP
252static bool exec_context_needs_term(const ExecContext *c) {
253 assert(c);
254
255 /* Return true if the execution context suggests we should set $TERM to something useful. */
256
257 if (is_terminal_input(c->std_input))
258 return true;
259
260 if (is_terminal_output(c->std_output))
261 return true;
262
263 if (is_terminal_output(c->std_error))
264 return true;
265
266 return !!c->tty_path;
3a1286b6
MS
267}
268
80876c20 269static int open_null_as(int flags, int nfd) {
046a82c1 270 int fd;
071830ff 271
80876c20 272 assert(nfd >= 0);
071830ff 273
613b411c
LP
274 fd = open("/dev/null", flags|O_NOCTTY);
275 if (fd < 0)
071830ff
LP
276 return -errno;
277
046a82c1 278 return move_fd(fd, nfd, false);
071830ff
LP
279}
280
524daa8c 281static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 282 static const union sockaddr_union sa = {
b92bea5d
ZJS
283 .un.sun_family = AF_UNIX,
284 .un.sun_path = "/run/systemd/journal/stdout",
285 };
524daa8c
ZJS
286 uid_t olduid = UID_INVALID;
287 gid_t oldgid = GID_INVALID;
288 int r;
289
cad93f29 290 if (gid_is_valid(gid)) {
524daa8c
ZJS
291 oldgid = getgid();
292
92a17af9 293 if (setegid(gid) < 0)
524daa8c
ZJS
294 return -errno;
295 }
296
cad93f29 297 if (uid_is_valid(uid)) {
524daa8c
ZJS
298 olduid = getuid();
299
92a17af9 300 if (seteuid(uid) < 0) {
524daa8c
ZJS
301 r = -errno;
302 goto restore_gid;
303 }
304 }
305
92a17af9 306 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
307
308 /* If we fail to restore the uid or gid, things will likely
309 fail later on. This should only happen if an LSM interferes. */
310
cad93f29 311 if (uid_is_valid(uid))
524daa8c
ZJS
312 (void) seteuid(olduid);
313
314 restore_gid:
cad93f29 315 if (gid_is_valid(gid))
524daa8c
ZJS
316 (void) setegid(oldgid);
317
318 return r;
319}
320
fd1f9c89 321static int connect_logger_as(
34cf6c43 322 const Unit *unit,
fd1f9c89 323 const ExecContext *context,
af635cf3 324 const ExecParameters *params,
fd1f9c89
LP
325 ExecOutput output,
326 const char *ident,
fd1f9c89
LP
327 int nfd,
328 uid_t uid,
329 gid_t gid) {
330
524daa8c 331 int fd, r;
071830ff
LP
332
333 assert(context);
af635cf3 334 assert(params);
80876c20
LP
335 assert(output < _EXEC_OUTPUT_MAX);
336 assert(ident);
337 assert(nfd >= 0);
071830ff 338
54fe0cdb
LP
339 fd = socket(AF_UNIX, SOCK_STREAM, 0);
340 if (fd < 0)
80876c20 341 return -errno;
071830ff 342
524daa8c
ZJS
343 r = connect_journal_socket(fd, uid, gid);
344 if (r < 0)
345 return r;
071830ff 346
80876c20 347 if (shutdown(fd, SHUT_RD) < 0) {
03e334a1 348 safe_close(fd);
80876c20
LP
349 return -errno;
350 }
071830ff 351
fd1f9c89 352 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 353
80876c20 354 dprintf(fd,
62bca2c6 355 "%s\n"
80876c20
LP
356 "%s\n"
357 "%i\n"
54fe0cdb
LP
358 "%i\n"
359 "%i\n"
360 "%i\n"
4f4a1dbf 361 "%i\n",
c867611e 362 context->syslog_identifier ?: ident,
af635cf3 363 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
364 context->syslog_priority,
365 !!context->syslog_level_prefix,
aac8c0c3
LP
366 is_syslog_output(output),
367 is_kmsg_output(output),
3a1286b6 368 is_terminal_output(output));
80876c20 369
046a82c1 370 return move_fd(fd, nfd, false);
80876c20 371}
3a274a21 372static int open_terminal_as(const char *path, int flags, int nfd) {
046a82c1 373 int fd;
071830ff 374
80876c20
LP
375 assert(path);
376 assert(nfd >= 0);
fd1f9c89 377
3a274a21 378 fd = open_terminal(path, flags | O_NOCTTY);
3cc2aff1 379 if (fd < 0)
80876c20 380 return fd;
071830ff 381
046a82c1 382 return move_fd(fd, nfd, false);
80876c20 383}
071830ff 384
2038c3f5
LP
385static int acquire_path(const char *path, int flags, mode_t mode) {
386 union sockaddr_union sa = {
387 .sa.sa_family = AF_UNIX,
388 };
80876c20 389 int fd, r;
071830ff 390
80876c20 391 assert(path);
071830ff 392
2038c3f5
LP
393 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
394 flags |= O_CREAT;
395
396 fd = open(path, flags|O_NOCTTY, mode);
397 if (fd >= 0)
80876c20 398 return fd;
071830ff 399
2038c3f5
LP
400 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
401 return -errno;
402 if (strlen(path) > sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
403 return -ENXIO;
404
405 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
406
407 fd = socket(AF_UNIX, SOCK_STREAM, 0);
408 if (fd < 0)
409 return -errno;
410
411 strncpy(sa.un.sun_path, path, sizeof(sa.un.sun_path));
412 if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
03e334a1 413 safe_close(fd);
2038c3f5
LP
414 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
415 * indication that his wasn't an AF_UNIX socket after all */
416 }
071830ff 417
2038c3f5
LP
418 if ((flags & O_ACCMODE) == O_RDONLY)
419 r = shutdown(fd, SHUT_WR);
420 else if ((flags & O_ACCMODE) == O_WRONLY)
421 r = shutdown(fd, SHUT_RD);
422 else
423 return fd;
424 if (r < 0) {
425 safe_close(fd);
426 return -errno;
427 }
428
429 return fd;
80876c20 430}
071830ff 431
08f3be7a
LP
432static int fixup_input(
433 const ExecContext *context,
434 int socket_fd,
435 bool apply_tty_stdin) {
436
437 ExecInput std_input;
438
439 assert(context);
440
441 std_input = context->std_input;
1e3ad081
LP
442
443 if (is_terminal_input(std_input) && !apply_tty_stdin)
444 return EXEC_INPUT_NULL;
071830ff 445
03fd9c49 446 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
447 return EXEC_INPUT_NULL;
448
08f3be7a
LP
449 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
450 return EXEC_INPUT_NULL;
451
03fd9c49 452 return std_input;
4f2d528d
LP
453}
454
03fd9c49 455static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 456
03fd9c49 457 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
458 return EXEC_OUTPUT_INHERIT;
459
03fd9c49 460 return std_output;
4f2d528d
LP
461}
462
a34ceba6
LP
463static int setup_input(
464 const ExecContext *context,
465 const ExecParameters *params,
52c239d7
LB
466 int socket_fd,
467 int named_iofds[3]) {
a34ceba6 468
4f2d528d
LP
469 ExecInput i;
470
471 assert(context);
a34ceba6
LP
472 assert(params);
473
474 if (params->stdin_fd >= 0) {
475 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
476 return -errno;
477
478 /* Try to make this the controlling tty, if it is a tty, and reset it */
1fb0682e
LP
479 if (isatty(STDIN_FILENO)) {
480 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
481 (void) reset_terminal_fd(STDIN_FILENO, true);
482 }
a34ceba6
LP
483
484 return STDIN_FILENO;
485 }
4f2d528d 486
08f3be7a 487 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
488
489 switch (i) {
071830ff 490
80876c20
LP
491 case EXEC_INPUT_NULL:
492 return open_null_as(O_RDONLY, STDIN_FILENO);
493
494 case EXEC_INPUT_TTY:
495 case EXEC_INPUT_TTY_FORCE:
496 case EXEC_INPUT_TTY_FAIL: {
046a82c1 497 int fd;
071830ff 498
1e22b5cd 499 fd = acquire_terminal(exec_context_tty_path(context),
8854d795
LP
500 i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
501 i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
502 ACQUIRE_TERMINAL_WAIT,
3a43da28 503 USEC_INFINITY);
970edce6 504 if (fd < 0)
80876c20
LP
505 return fd;
506
046a82c1 507 return move_fd(fd, STDIN_FILENO, false);
80876c20
LP
508 }
509
4f2d528d 510 case EXEC_INPUT_SOCKET:
e75a9ed1
LP
511 assert(socket_fd >= 0);
512
4f2d528d
LP
513 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
514
52c239d7 515 case EXEC_INPUT_NAMED_FD:
e75a9ed1
LP
516 assert(named_iofds[STDIN_FILENO] >= 0);
517
52c239d7
LB
518 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
519 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
520
08f3be7a
LP
521 case EXEC_INPUT_DATA: {
522 int fd;
523
524 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
525 if (fd < 0)
526 return fd;
527
528 return move_fd(fd, STDIN_FILENO, false);
529 }
530
2038c3f5
LP
531 case EXEC_INPUT_FILE: {
532 bool rw;
533 int fd;
534
535 assert(context->stdio_file[STDIN_FILENO]);
536
537 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
538 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
539
540 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
541 if (fd < 0)
542 return fd;
543
544 return move_fd(fd, STDIN_FILENO, false);
545 }
546
80876c20
LP
547 default:
548 assert_not_reached("Unknown input type");
549 }
550}
551
a34ceba6 552static int setup_output(
34cf6c43 553 const Unit *unit,
a34ceba6
LP
554 const ExecContext *context,
555 const ExecParameters *params,
556 int fileno,
557 int socket_fd,
52c239d7 558 int named_iofds[3],
a34ceba6 559 const char *ident,
7bce046b
LP
560 uid_t uid,
561 gid_t gid,
562 dev_t *journal_stream_dev,
563 ino_t *journal_stream_ino) {
a34ceba6 564
4f2d528d
LP
565 ExecOutput o;
566 ExecInput i;
47c1d80d 567 int r;
4f2d528d 568
f2341e0a 569 assert(unit);
80876c20 570 assert(context);
a34ceba6 571 assert(params);
80876c20 572 assert(ident);
7bce046b
LP
573 assert(journal_stream_dev);
574 assert(journal_stream_ino);
80876c20 575
a34ceba6
LP
576 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
577
578 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
579 return -errno;
580
581 return STDOUT_FILENO;
582 }
583
584 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
585 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
586 return -errno;
587
588 return STDERR_FILENO;
589 }
590
08f3be7a 591 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 592 o = fixup_output(context->std_output, socket_fd);
4f2d528d 593
eb17e935
MS
594 if (fileno == STDERR_FILENO) {
595 ExecOutput e;
596 e = fixup_output(context->std_error, socket_fd);
80876c20 597
eb17e935
MS
598 /* This expects the input and output are already set up */
599
600 /* Don't change the stderr file descriptor if we inherit all
601 * the way and are not on a tty */
602 if (e == EXEC_OUTPUT_INHERIT &&
603 o == EXEC_OUTPUT_INHERIT &&
604 i == EXEC_INPUT_NULL &&
605 !is_terminal_input(context->std_input) &&
606 getppid () != 1)
607 return fileno;
608
609 /* Duplicate from stdout if possible */
52c239d7 610 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
eb17e935 611 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 612
eb17e935 613 o = e;
80876c20 614
eb17e935 615 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
616 /* If input got downgraded, inherit the original value */
617 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 618 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 619
08f3be7a
LP
620 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
621 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
eb17e935 622 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 623
acb591e4
LP
624 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
625 if (getppid() != 1)
eb17e935 626 return fileno;
94f04347 627
eb17e935
MS
628 /* We need to open /dev/null here anew, to get the right access mode. */
629 return open_null_as(O_WRONLY, fileno);
071830ff 630 }
94f04347 631
eb17e935 632 switch (o) {
80876c20
LP
633
634 case EXEC_OUTPUT_NULL:
eb17e935 635 return open_null_as(O_WRONLY, fileno);
80876c20
LP
636
637 case EXEC_OUTPUT_TTY:
4f2d528d 638 if (is_terminal_input(i))
eb17e935 639 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
640
641 /* We don't reset the terminal if this is just about output */
1e22b5cd 642 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
643
644 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 645 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 646 case EXEC_OUTPUT_KMSG:
28dbc1e8 647 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
648 case EXEC_OUTPUT_JOURNAL:
649 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 650 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 651 if (r < 0) {
82677ae4 652 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 653 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
654 } else {
655 struct stat st;
656
657 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
658 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
659 * services to detect whether they are connected to the journal or not.
660 *
661 * If both stdout and stderr are connected to a stream then let's make sure to store the data
662 * about STDERR as that's usually the best way to do logging. */
7bce046b 663
ab2116b1
LP
664 if (fstat(fileno, &st) >= 0 &&
665 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
666 *journal_stream_dev = st.st_dev;
667 *journal_stream_ino = st.st_ino;
668 }
47c1d80d
MS
669 }
670 return r;
4f2d528d
LP
671
672 case EXEC_OUTPUT_SOCKET:
673 assert(socket_fd >= 0);
e75a9ed1 674
eb17e935 675 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 676
52c239d7 677 case EXEC_OUTPUT_NAMED_FD:
e75a9ed1
LP
678 assert(named_iofds[fileno] >= 0);
679
52c239d7
LB
680 (void) fd_nonblock(named_iofds[fileno], false);
681 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
682
2038c3f5
LP
683 case EXEC_OUTPUT_FILE: {
684 bool rw;
685 int fd;
686
687 assert(context->stdio_file[fileno]);
688
689 rw = context->std_input == EXEC_INPUT_FILE &&
690 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
691
692 if (rw)
693 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
694
695 fd = acquire_path(context->stdio_file[fileno], O_WRONLY, 0666 & ~context->umask);
696 if (fd < 0)
697 return fd;
698
699 return move_fd(fd, fileno, false);
700 }
701
94f04347 702 default:
80876c20 703 assert_not_reached("Unknown error type");
94f04347 704 }
071830ff
LP
705}
706
02a51aba
LP
707static int chown_terminal(int fd, uid_t uid) {
708 struct stat st;
709
710 assert(fd >= 0);
02a51aba 711
1ff74fb6
LP
712 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
713 if (isatty(fd) < 1)
714 return 0;
715
02a51aba 716 /* This might fail. What matters are the results. */
bab45044
LP
717 (void) fchown(fd, uid, -1);
718 (void) fchmod(fd, TTY_MODE);
02a51aba
LP
719
720 if (fstat(fd, &st) < 0)
721 return -errno;
722
d8b4e2e9 723 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
02a51aba
LP
724 return -EPERM;
725
726 return 0;
727}
728
7d5ceb64 729static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
730 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
731 int r;
80876c20 732
80876c20
LP
733 assert(_saved_stdin);
734 assert(_saved_stdout);
735
af6da548
LP
736 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
737 if (saved_stdin < 0)
738 return -errno;
80876c20 739
af6da548 740 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
741 if (saved_stdout < 0)
742 return -errno;
80876c20 743
8854d795 744 fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
3d18b167
LP
745 if (fd < 0)
746 return fd;
80876c20 747
af6da548
LP
748 r = chown_terminal(fd, getuid());
749 if (r < 0)
3d18b167 750 return r;
02a51aba 751
3d18b167
LP
752 r = reset_terminal_fd(fd, true);
753 if (r < 0)
754 return r;
80876c20 755
2b33ab09 756 r = rearrange_stdio(fd, fd, STDERR_FILENO);
3d18b167 757 fd = -1;
2b33ab09
LP
758 if (r < 0)
759 return r;
80876c20
LP
760
761 *_saved_stdin = saved_stdin;
762 *_saved_stdout = saved_stdout;
763
3d18b167 764 saved_stdin = saved_stdout = -1;
80876c20 765
3d18b167 766 return 0;
80876c20
LP
767}
768
63d77c92 769static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
770 assert(err < 0);
771
772 if (err == -ETIMEDOUT)
63d77c92 773 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
774 else {
775 errno = -err;
63d77c92 776 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
777 }
778}
779
63d77c92 780static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 781 _cleanup_close_ int fd = -1;
80876c20 782
3b20f877 783 assert(vc);
80876c20 784
7d5ceb64 785 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 786 if (fd < 0)
3b20f877 787 return;
80876c20 788
63d77c92 789 write_confirm_error_fd(err, fd, u);
af6da548 790}
80876c20 791
3d18b167 792static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 793 int r = 0;
80876c20 794
af6da548
LP
795 assert(saved_stdin);
796 assert(saved_stdout);
797
798 release_terminal();
799
800 if (*saved_stdin >= 0)
80876c20 801 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 802 r = -errno;
80876c20 803
af6da548 804 if (*saved_stdout >= 0)
80876c20 805 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 806 r = -errno;
80876c20 807
3d18b167
LP
808 *saved_stdin = safe_close(*saved_stdin);
809 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
810
811 return r;
812}
813
3b20f877
FB
814enum {
815 CONFIRM_PRETEND_FAILURE = -1,
816 CONFIRM_PRETEND_SUCCESS = 0,
817 CONFIRM_EXECUTE = 1,
818};
819
eedf223a 820static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 821 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 822 _cleanup_free_ char *e = NULL;
3b20f877 823 char c;
af6da548 824
3b20f877 825 /* For any internal errors, assume a positive response. */
7d5ceb64 826 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 827 if (r < 0) {
63d77c92 828 write_confirm_error(r, vc, u);
3b20f877
FB
829 return CONFIRM_EXECUTE;
830 }
af6da548 831
b0eb2944
FB
832 /* confirm_spawn might have been disabled while we were sleeping. */
833 if (manager_is_confirm_spawn_disabled(u->manager)) {
834 r = 1;
835 goto restore_stdio;
836 }
af6da548 837
2bcd3c26
FB
838 e = ellipsize(cmdline, 60, 100);
839 if (!e) {
840 log_oom();
841 r = CONFIRM_EXECUTE;
842 goto restore_stdio;
843 }
af6da548 844
d172b175 845 for (;;) {
539622bd 846 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 847 if (r < 0) {
63d77c92 848 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
849 r = CONFIRM_EXECUTE;
850 goto restore_stdio;
851 }
af6da548 852
d172b175 853 switch (c) {
b0eb2944
FB
854 case 'c':
855 printf("Resuming normal execution.\n");
856 manager_disable_confirm_spawn();
857 r = 1;
858 break;
dd6f9ac0
FB
859 case 'D':
860 unit_dump(u, stdout, " ");
861 continue; /* ask again */
d172b175
FB
862 case 'f':
863 printf("Failing execution.\n");
864 r = CONFIRM_PRETEND_FAILURE;
865 break;
866 case 'h':
b0eb2944
FB
867 printf(" c - continue, proceed without asking anymore\n"
868 " D - dump, show the state of the unit\n"
dd6f9ac0 869 " f - fail, don't execute the command and pretend it failed\n"
d172b175 870 " h - help\n"
eedf223a 871 " i - info, show a short summary of the unit\n"
56fde33a 872 " j - jobs, show jobs that are in progress\n"
d172b175
FB
873 " s - skip, don't execute the command and pretend it succeeded\n"
874 " y - yes, execute the command\n");
dd6f9ac0 875 continue; /* ask again */
eedf223a
FB
876 case 'i':
877 printf(" Description: %s\n"
878 " Unit: %s\n"
879 " Command: %s\n",
880 u->id, u->description, cmdline);
881 continue; /* ask again */
56fde33a
FB
882 case 'j':
883 manager_dump_jobs(u->manager, stdout, " ");
884 continue; /* ask again */
539622bd
FB
885 case 'n':
886 /* 'n' was removed in favor of 'f'. */
887 printf("Didn't understand 'n', did you mean 'f'?\n");
888 continue; /* ask again */
d172b175
FB
889 case 's':
890 printf("Skipping execution.\n");
891 r = CONFIRM_PRETEND_SUCCESS;
892 break;
893 case 'y':
894 r = CONFIRM_EXECUTE;
895 break;
896 default:
897 assert_not_reached("Unhandled choice");
898 }
3b20f877 899 break;
3b20f877 900 }
af6da548 901
3b20f877 902restore_stdio:
af6da548 903 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 904 return r;
80876c20
LP
905}
906
4d885bd3
DH
907static int get_fixed_user(const ExecContext *c, const char **user,
908 uid_t *uid, gid_t *gid,
909 const char **home, const char **shell) {
81a2b7ce 910 int r;
4d885bd3 911 const char *name;
81a2b7ce 912
4d885bd3 913 assert(c);
81a2b7ce 914
23deef88
LP
915 if (!c->user)
916 return 0;
917
4d885bd3
DH
918 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
919 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 920
23deef88 921 name = c->user;
4d885bd3
DH
922 r = get_user_creds_clean(&name, uid, gid, home, shell);
923 if (r < 0)
924 return r;
81a2b7ce 925
4d885bd3
DH
926 *user = name;
927 return 0;
928}
929
930static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
931 int r;
932 const char *name;
933
934 assert(c);
935
936 if (!c->group)
937 return 0;
938
939 name = c->group;
940 r = get_group_creds(&name, gid);
941 if (r < 0)
942 return r;
943
944 *group = name;
945 return 0;
946}
947
cdc5d5c5
DH
948static int get_supplementary_groups(const ExecContext *c, const char *user,
949 const char *group, gid_t gid,
950 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
951 char **i;
952 int r, k = 0;
953 int ngroups_max;
954 bool keep_groups = false;
955 gid_t *groups = NULL;
956 _cleanup_free_ gid_t *l_gids = NULL;
957
958 assert(c);
959
bbeea271
DH
960 /*
961 * If user is given, then lookup GID and supplementary groups list.
962 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
963 * here and as early as possible so we keep the list of supplementary
964 * groups of the caller.
bbeea271
DH
965 */
966 if (user && gid_is_valid(gid) && gid != 0) {
967 /* First step, initialize groups from /etc/groups */
968 if (initgroups(user, gid) < 0)
969 return -errno;
970
971 keep_groups = true;
972 }
973
ac6e8be6 974 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
975 return 0;
976
366ddd25
DH
977 /*
978 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
979 * be positive, otherwise fail.
980 */
981 errno = 0;
982 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
983 if (ngroups_max <= 0) {
984 if (errno > 0)
985 return -errno;
986 else
987 return -EOPNOTSUPP; /* For all other values */
988 }
989
4d885bd3
DH
990 l_gids = new(gid_t, ngroups_max);
991 if (!l_gids)
992 return -ENOMEM;
81a2b7ce 993
4d885bd3
DH
994 if (keep_groups) {
995 /*
996 * Lookup the list of groups that the user belongs to, we
997 * avoid NSS lookups here too for gid=0.
998 */
999 k = ngroups_max;
1000 if (getgrouplist(user, gid, l_gids, &k) < 0)
1001 return -EINVAL;
1002 } else
1003 k = 0;
81a2b7ce 1004
4d885bd3
DH
1005 STRV_FOREACH(i, c->supplementary_groups) {
1006 const char *g;
81a2b7ce 1007
4d885bd3
DH
1008 if (k >= ngroups_max)
1009 return -E2BIG;
81a2b7ce 1010
4d885bd3
DH
1011 g = *i;
1012 r = get_group_creds(&g, l_gids+k);
1013 if (r < 0)
1014 return r;
81a2b7ce 1015
4d885bd3
DH
1016 k++;
1017 }
81a2b7ce 1018
4d885bd3
DH
1019 /*
1020 * Sets ngids to zero to drop all supplementary groups, happens
1021 * when we are under root and SupplementaryGroups= is empty.
1022 */
1023 if (k == 0) {
1024 *ngids = 0;
1025 return 0;
1026 }
81a2b7ce 1027
4d885bd3
DH
1028 /* Otherwise get the final list of supplementary groups */
1029 groups = memdup(l_gids, sizeof(gid_t) * k);
1030 if (!groups)
1031 return -ENOMEM;
1032
1033 *supplementary_gids = groups;
1034 *ngids = k;
1035
1036 groups = NULL;
1037
1038 return 0;
1039}
1040
34cf6c43 1041static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
4d885bd3
DH
1042 int r;
1043
709dbeac
YW
1044 /* Handle SupplementaryGroups= if it is not empty */
1045 if (ngids > 0) {
4d885bd3
DH
1046 r = maybe_setgroups(ngids, supplementary_gids);
1047 if (r < 0)
97f0e76f 1048 return r;
4d885bd3 1049 }
81a2b7ce 1050
4d885bd3
DH
1051 if (gid_is_valid(gid)) {
1052 /* Then set our gids */
1053 if (setresgid(gid, gid, gid) < 0)
1054 return -errno;
81a2b7ce
LP
1055 }
1056
1057 return 0;
1058}
1059
1060static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
1061 assert(context);
1062
4d885bd3
DH
1063 if (!uid_is_valid(uid))
1064 return 0;
1065
479050b3 1066 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
1067 * capabilities while doing so. */
1068
479050b3 1069 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
1070
1071 /* First step: If we need to keep capabilities but
1072 * drop privileges we need to make sure we keep our
cbb21cca 1073 * caps, while we drop privileges. */
693ced48 1074 if (uid != 0) {
cbb21cca 1075 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1076
1077 if (prctl(PR_GET_SECUREBITS) != sb)
1078 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1079 return -errno;
1080 }
81a2b7ce
LP
1081 }
1082
479050b3 1083 /* Second step: actually set the uids */
81a2b7ce
LP
1084 if (setresuid(uid, uid, uid) < 0)
1085 return -errno;
1086
1087 /* At this point we should have all necessary capabilities but
1088 are otherwise a normal user. However, the caps might got
1089 corrupted due to the setresuid() so we need clean them up
1090 later. This is done outside of this call. */
1091
1092 return 0;
1093}
1094
349cc4a5 1095#if HAVE_PAM
5b6319dc
LP
1096
1097static int null_conv(
1098 int num_msg,
1099 const struct pam_message **msg,
1100 struct pam_response **resp,
1101 void *appdata_ptr) {
1102
1103 /* We don't support conversations */
1104
1105 return PAM_CONV_ERR;
1106}
1107
cefc33ae
LP
1108#endif
1109
5b6319dc
LP
1110static int setup_pam(
1111 const char *name,
1112 const char *user,
940c5210 1113 uid_t uid,
2d6fce8d 1114 gid_t gid,
5b6319dc 1115 const char *tty,
2065ca69 1116 char ***env,
da6053d0 1117 int fds[], size_t n_fds) {
5b6319dc 1118
349cc4a5 1119#if HAVE_PAM
cefc33ae 1120
5b6319dc
LP
1121 static const struct pam_conv conv = {
1122 .conv = null_conv,
1123 .appdata_ptr = NULL
1124 };
1125
2d7c6aa2 1126 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1127 pam_handle_t *handle = NULL;
d6e5f3ad 1128 sigset_t old_ss;
7bb70b6e 1129 int pam_code = PAM_SUCCESS, r;
84eada2f 1130 char **nv, **e = NULL;
5b6319dc
LP
1131 bool close_session = false;
1132 pid_t pam_pid = 0, parent_pid;
970edce6 1133 int flags = 0;
5b6319dc
LP
1134
1135 assert(name);
1136 assert(user);
2065ca69 1137 assert(env);
5b6319dc
LP
1138
1139 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1140 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1141 * systemd via the cgroup logic. It will then remove the PAM
1142 * session again. The parent process will exec() the actual
1143 * daemon. We do things this way to ensure that the main PID
1144 * of the daemon is the one we initially fork()ed. */
1145
7bb70b6e
LP
1146 r = barrier_create(&barrier);
1147 if (r < 0)
2d7c6aa2
DH
1148 goto fail;
1149
553d2243 1150 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1151 flags |= PAM_SILENT;
1152
f546241b
ZJS
1153 pam_code = pam_start(name, user, &conv, &handle);
1154 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1155 handle = NULL;
1156 goto fail;
1157 }
1158
f546241b
ZJS
1159 if (tty) {
1160 pam_code = pam_set_item(handle, PAM_TTY, tty);
1161 if (pam_code != PAM_SUCCESS)
5b6319dc 1162 goto fail;
f546241b 1163 }
5b6319dc 1164
84eada2f
JW
1165 STRV_FOREACH(nv, *env) {
1166 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1167 if (pam_code != PAM_SUCCESS)
1168 goto fail;
1169 }
1170
970edce6 1171 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1172 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1173 goto fail;
1174
970edce6 1175 pam_code = pam_open_session(handle, flags);
f546241b 1176 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1177 goto fail;
1178
1179 close_session = true;
1180
f546241b
ZJS
1181 e = pam_getenvlist(handle);
1182 if (!e) {
5b6319dc
LP
1183 pam_code = PAM_BUF_ERR;
1184 goto fail;
1185 }
1186
1187 /* Block SIGTERM, so that we know that it won't get lost in
1188 * the child */
ce30c8dc 1189
72c0a2c2 1190 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1191
df0ff127 1192 parent_pid = getpid_cached();
5b6319dc 1193
4c253ed1
LP
1194 r = safe_fork("(sd-pam)", 0, &pam_pid);
1195 if (r < 0)
5b6319dc 1196 goto fail;
4c253ed1 1197 if (r == 0) {
7bb70b6e 1198 int sig, ret = EXIT_PAM;
5b6319dc
LP
1199
1200 /* The child's job is to reset the PAM session on
1201 * termination */
2d7c6aa2 1202 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc 1203
4c253ed1
LP
1204 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1205 * are open here that have been opened by PAM. */
1206 (void) close_many(fds, n_fds);
5b6319dc 1207
940c5210
AK
1208 /* Drop privileges - we don't need any to pam_close_session
1209 * and this will make PR_SET_PDEATHSIG work in most cases.
1210 * If this fails, ignore the error - but expect sd-pam threads
1211 * to fail to exit normally */
2d6fce8d 1212
97f0e76f
LP
1213 r = maybe_setgroups(0, NULL);
1214 if (r < 0)
1215 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1216 if (setresgid(gid, gid, gid) < 0)
1217 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1218 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1219 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1220
ce30c8dc
LP
1221 (void) ignore_signals(SIGPIPE, -1);
1222
940c5210
AK
1223 /* Wait until our parent died. This will only work if
1224 * the above setresuid() succeeds, otherwise the kernel
1225 * will not allow unprivileged parents kill their privileged
1226 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1227 * to do the rest for us. */
1228 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1229 goto child_finish;
1230
2d7c6aa2
DH
1231 /* Tell the parent that our setup is done. This is especially
1232 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1233 * setup might race against our setresuid(2) call.
1234 *
1235 * If the parent aborted, we'll detect this below, hence ignore
1236 * return failure here. */
1237 (void) barrier_place(&barrier);
2d7c6aa2 1238
643f4706 1239 /* Check if our parent process might already have died? */
5b6319dc 1240 if (getppid() == parent_pid) {
d6e5f3ad
DM
1241 sigset_t ss;
1242
1243 assert_se(sigemptyset(&ss) >= 0);
1244 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1245
3dead8d9
LP
1246 for (;;) {
1247 if (sigwait(&ss, &sig) < 0) {
1248 if (errno == EINTR)
1249 continue;
1250
1251 goto child_finish;
1252 }
5b6319dc 1253
3dead8d9
LP
1254 assert(sig == SIGTERM);
1255 break;
1256 }
5b6319dc
LP
1257 }
1258
3dead8d9 1259 /* If our parent died we'll end the session */
f546241b 1260 if (getppid() != parent_pid) {
970edce6 1261 pam_code = pam_close_session(handle, flags);
f546241b 1262 if (pam_code != PAM_SUCCESS)
5b6319dc 1263 goto child_finish;
f546241b 1264 }
5b6319dc 1265
7bb70b6e 1266 ret = 0;
5b6319dc
LP
1267
1268 child_finish:
970edce6 1269 pam_end(handle, pam_code | flags);
7bb70b6e 1270 _exit(ret);
5b6319dc
LP
1271 }
1272
2d7c6aa2
DH
1273 barrier_set_role(&barrier, BARRIER_PARENT);
1274
5b6319dc
LP
1275 /* If the child was forked off successfully it will do all the
1276 * cleanups, so forget about the handle here. */
1277 handle = NULL;
1278
3b8bddde 1279 /* Unblock SIGTERM again in the parent */
72c0a2c2 1280 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1281
1282 /* We close the log explicitly here, since the PAM modules
1283 * might have opened it, but we don't want this fd around. */
1284 closelog();
1285
2d7c6aa2
DH
1286 /* Synchronously wait for the child to initialize. We don't care for
1287 * errors as we cannot recover. However, warn loudly if it happens. */
1288 if (!barrier_place_and_sync(&barrier))
1289 log_error("PAM initialization failed");
1290
130d3d22 1291 return strv_free_and_replace(*env, e);
5b6319dc
LP
1292
1293fail:
970edce6
ZJS
1294 if (pam_code != PAM_SUCCESS) {
1295 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1296 r = -EPERM; /* PAM errors do not map to errno */
1297 } else
1298 log_error_errno(r, "PAM failed: %m");
9ba35398 1299
5b6319dc
LP
1300 if (handle) {
1301 if (close_session)
970edce6 1302 pam_code = pam_close_session(handle, flags);
5b6319dc 1303
970edce6 1304 pam_end(handle, pam_code | flags);
5b6319dc
LP
1305 }
1306
1307 strv_free(e);
5b6319dc
LP
1308 closelog();
1309
7bb70b6e 1310 return r;
cefc33ae
LP
1311#else
1312 return 0;
5b6319dc 1313#endif
cefc33ae 1314}
5b6319dc 1315
5d6b1584
LP
1316static void rename_process_from_path(const char *path) {
1317 char process_name[11];
1318 const char *p;
1319 size_t l;
1320
1321 /* This resulting string must fit in 10 chars (i.e. the length
1322 * of "/sbin/init") to look pretty in /bin/ps */
1323
2b6bf07d 1324 p = basename(path);
5d6b1584
LP
1325 if (isempty(p)) {
1326 rename_process("(...)");
1327 return;
1328 }
1329
1330 l = strlen(p);
1331 if (l > 8) {
1332 /* The end of the process name is usually more
1333 * interesting, since the first bit might just be
1334 * "systemd-" */
1335 p = p + l - 8;
1336 l = 8;
1337 }
1338
1339 process_name[0] = '(';
1340 memcpy(process_name+1, p, l);
1341 process_name[1+l] = ')';
1342 process_name[1+l+1] = 0;
1343
1344 rename_process(process_name);
1345}
1346
469830d1
LP
1347static bool context_has_address_families(const ExecContext *c) {
1348 assert(c);
1349
1350 return c->address_families_whitelist ||
1351 !set_isempty(c->address_families);
1352}
1353
1354static bool context_has_syscall_filters(const ExecContext *c) {
1355 assert(c);
1356
1357 return c->syscall_whitelist ||
8cfa775f 1358 !hashmap_isempty(c->syscall_filter);
469830d1
LP
1359}
1360
1361static bool context_has_no_new_privileges(const ExecContext *c) {
1362 assert(c);
1363
1364 if (c->no_new_privileges)
1365 return true;
1366
1367 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1368 return false;
1369
1370 /* We need NNP if we have any form of seccomp and are unprivileged */
1371 return context_has_address_families(c) ||
1372 c->memory_deny_write_execute ||
1373 c->restrict_realtime ||
1374 exec_context_restrict_namespaces_set(c) ||
1375 c->protect_kernel_tunables ||
1376 c->protect_kernel_modules ||
1377 c->private_devices ||
1378 context_has_syscall_filters(c) ||
78e864e5
TM
1379 !set_isempty(c->syscall_archs) ||
1380 c->lock_personality;
469830d1
LP
1381}
1382
349cc4a5 1383#if HAVE_SECCOMP
17df7223 1384
83f12b27 1385static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1386
1387 if (is_seccomp_available())
1388 return false;
1389
f673b62d 1390 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1391 return true;
83f12b27
FS
1392}
1393
165a31c0 1394static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1395 uint32_t negative_action, default_action, action;
165a31c0 1396 int r;
8351ceae 1397
469830d1 1398 assert(u);
c0467cf3 1399 assert(c);
8351ceae 1400
469830d1 1401 if (!context_has_syscall_filters(c))
83f12b27
FS
1402 return 0;
1403
469830d1
LP
1404 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1405 return 0;
e9642be2 1406
469830d1 1407 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1408
469830d1
LP
1409 if (c->syscall_whitelist) {
1410 default_action = negative_action;
1411 action = SCMP_ACT_ALLOW;
7c66bae2 1412 } else {
469830d1
LP
1413 default_action = SCMP_ACT_ALLOW;
1414 action = negative_action;
57183d11 1415 }
8351ceae 1416
165a31c0
LP
1417 if (needs_ambient_hack) {
1418 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1419 if (r < 0)
1420 return r;
1421 }
1422
469830d1 1423 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
4298d0b5
LP
1424}
1425
469830d1
LP
1426static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1427 assert(u);
4298d0b5
LP
1428 assert(c);
1429
469830d1 1430 if (set_isempty(c->syscall_archs))
83f12b27
FS
1431 return 0;
1432
469830d1
LP
1433 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1434 return 0;
4298d0b5 1435
469830d1
LP
1436 return seccomp_restrict_archs(c->syscall_archs);
1437}
4298d0b5 1438
469830d1
LP
1439static int apply_address_families(const Unit* u, const ExecContext *c) {
1440 assert(u);
1441 assert(c);
4298d0b5 1442
469830d1
LP
1443 if (!context_has_address_families(c))
1444 return 0;
4298d0b5 1445
469830d1
LP
1446 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1447 return 0;
4298d0b5 1448
469830d1 1449 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1450}
4298d0b5 1451
83f12b27 1452static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1453 assert(u);
f3e43635
TM
1454 assert(c);
1455
469830d1 1456 if (!c->memory_deny_write_execute)
83f12b27
FS
1457 return 0;
1458
469830d1
LP
1459 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1460 return 0;
f3e43635 1461
469830d1 1462 return seccomp_memory_deny_write_execute();
f3e43635
TM
1463}
1464
83f12b27 1465static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1466 assert(u);
f4170c67
LP
1467 assert(c);
1468
469830d1 1469 if (!c->restrict_realtime)
83f12b27
FS
1470 return 0;
1471
469830d1
LP
1472 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1473 return 0;
f4170c67 1474
469830d1 1475 return seccomp_restrict_realtime();
f4170c67
LP
1476}
1477
59e856c7 1478static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1479 assert(u);
59eeb84b
LP
1480 assert(c);
1481
1482 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1483 * let's protect even those systems where this is left on in the kernel. */
1484
469830d1 1485 if (!c->protect_kernel_tunables)
59eeb84b
LP
1486 return 0;
1487
469830d1
LP
1488 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1489 return 0;
59eeb84b 1490
469830d1 1491 return seccomp_protect_sysctl();
59eeb84b
LP
1492}
1493
59e856c7 1494static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1495 assert(u);
502d704e
DH
1496 assert(c);
1497
25a8d8a0 1498 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1499
469830d1
LP
1500 if (!c->protect_kernel_modules)
1501 return 0;
1502
502d704e
DH
1503 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1504 return 0;
1505
469830d1 1506 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
502d704e
DH
1507}
1508
59e856c7 1509static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1510 assert(u);
ba128bb8
LP
1511 assert(c);
1512
8f81a5f6 1513 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1514
469830d1
LP
1515 if (!c->private_devices)
1516 return 0;
1517
ba128bb8
LP
1518 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1519 return 0;
1520
469830d1 1521 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
ba128bb8
LP
1522}
1523
34cf6c43 1524static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
469830d1 1525 assert(u);
add00535
LP
1526 assert(c);
1527
1528 if (!exec_context_restrict_namespaces_set(c))
1529 return 0;
1530
1531 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1532 return 0;
1533
1534 return seccomp_restrict_namespaces(c->restrict_namespaces);
1535}
1536
78e864e5 1537static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1538 unsigned long personality;
1539 int r;
78e864e5
TM
1540
1541 assert(u);
1542 assert(c);
1543
1544 if (!c->lock_personality)
1545 return 0;
1546
1547 if (skip_seccomp_unavailable(u, "LockPersonality="))
1548 return 0;
1549
e8132d63
LP
1550 personality = c->personality;
1551
1552 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1553 if (personality == PERSONALITY_INVALID) {
1554
1555 r = opinionated_personality(&personality);
1556 if (r < 0)
1557 return r;
1558 }
78e864e5
TM
1559
1560 return seccomp_lock_personality(personality);
1561}
1562
c0467cf3 1563#endif
8351ceae 1564
31a7eb86
ZJS
1565static void do_idle_pipe_dance(int idle_pipe[4]) {
1566 assert(idle_pipe);
1567
54eb2300
LP
1568 idle_pipe[1] = safe_close(idle_pipe[1]);
1569 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1570
1571 if (idle_pipe[0] >= 0) {
1572 int r;
1573
1574 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1575
1576 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1577 ssize_t n;
1578
31a7eb86 1579 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1580 n = write(idle_pipe[3], "x", 1);
1581 if (n > 0)
cd972d69
ZJS
1582 /* Wait for systemd to react to the signal above. */
1583 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1584 }
1585
54eb2300 1586 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1587
1588 }
1589
54eb2300 1590 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1591}
1592
7cae38c4 1593static int build_environment(
34cf6c43 1594 const Unit *u,
9fa95f85 1595 const ExecContext *c,
1e22b5cd 1596 const ExecParameters *p,
da6053d0 1597 size_t n_fds,
7cae38c4
LP
1598 const char *home,
1599 const char *username,
1600 const char *shell,
7bce046b
LP
1601 dev_t journal_stream_dev,
1602 ino_t journal_stream_ino,
7cae38c4
LP
1603 char ***ret) {
1604
1605 _cleanup_strv_free_ char **our_env = NULL;
da6053d0 1606 size_t n_env = 0;
7cae38c4
LP
1607 char *x;
1608
4b58153d 1609 assert(u);
7cae38c4
LP
1610 assert(c);
1611 assert(ret);
1612
4b58153d 1613 our_env = new0(char*, 14);
7cae38c4
LP
1614 if (!our_env)
1615 return -ENOMEM;
1616
1617 if (n_fds > 0) {
8dd4c05b
LP
1618 _cleanup_free_ char *joined = NULL;
1619
df0ff127 1620 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1621 return -ENOMEM;
1622 our_env[n_env++] = x;
1623
da6053d0 1624 if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
7cae38c4
LP
1625 return -ENOMEM;
1626 our_env[n_env++] = x;
8dd4c05b 1627
1e22b5cd 1628 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1629 if (!joined)
1630 return -ENOMEM;
1631
605405c6 1632 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1633 if (!x)
1634 return -ENOMEM;
1635 our_env[n_env++] = x;
7cae38c4
LP
1636 }
1637
b08af3b1 1638 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1639 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1640 return -ENOMEM;
1641 our_env[n_env++] = x;
1642
1e22b5cd 1643 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1644 return -ENOMEM;
1645 our_env[n_env++] = x;
1646 }
1647
fd63e712
LP
1648 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1649 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1650 * check the database directly. */
ac647978 1651 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1652 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1653 if (!x)
1654 return -ENOMEM;
1655 our_env[n_env++] = x;
1656 }
1657
7cae38c4
LP
1658 if (home) {
1659 x = strappend("HOME=", home);
1660 if (!x)
1661 return -ENOMEM;
1662 our_env[n_env++] = x;
1663 }
1664
1665 if (username) {
1666 x = strappend("LOGNAME=", username);
1667 if (!x)
1668 return -ENOMEM;
1669 our_env[n_env++] = x;
1670
1671 x = strappend("USER=", username);
1672 if (!x)
1673 return -ENOMEM;
1674 our_env[n_env++] = x;
1675 }
1676
1677 if (shell) {
1678 x = strappend("SHELL=", shell);
1679 if (!x)
1680 return -ENOMEM;
1681 our_env[n_env++] = x;
1682 }
1683
4b58153d
LP
1684 if (!sd_id128_is_null(u->invocation_id)) {
1685 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1686 return -ENOMEM;
1687
1688 our_env[n_env++] = x;
1689 }
1690
6af760f3
LP
1691 if (exec_context_needs_term(c)) {
1692 const char *tty_path, *term = NULL;
1693
1694 tty_path = exec_context_tty_path(c);
1695
1696 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1697 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1698 * passes to PID 1 ends up all the way in the console login shown. */
1699
1700 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1701 term = getenv("TERM");
1702 if (!term)
1703 term = default_term_for_tty(tty_path);
7cae38c4 1704
6af760f3 1705 x = strappend("TERM=", term);
7cae38c4
LP
1706 if (!x)
1707 return -ENOMEM;
1708 our_env[n_env++] = x;
1709 }
1710
7bce046b
LP
1711 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1712 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1713 return -ENOMEM;
1714
1715 our_env[n_env++] = x;
1716 }
1717
7cae38c4 1718 our_env[n_env++] = NULL;
7bce046b 1719 assert(n_env <= 12);
7cae38c4 1720
ae2a15bc 1721 *ret = TAKE_PTR(our_env);
7cae38c4
LP
1722
1723 return 0;
1724}
1725
b4c14404
FB
1726static int build_pass_environment(const ExecContext *c, char ***ret) {
1727 _cleanup_strv_free_ char **pass_env = NULL;
1728 size_t n_env = 0, n_bufsize = 0;
1729 char **i;
1730
1731 STRV_FOREACH(i, c->pass_environment) {
1732 _cleanup_free_ char *x = NULL;
1733 char *v;
1734
1735 v = getenv(*i);
1736 if (!v)
1737 continue;
605405c6 1738 x = strjoin(*i, "=", v);
b4c14404
FB
1739 if (!x)
1740 return -ENOMEM;
00819cc1 1741
b4c14404
FB
1742 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1743 return -ENOMEM;
00819cc1 1744
1cc6c93a 1745 pass_env[n_env++] = TAKE_PTR(x);
b4c14404 1746 pass_env[n_env] = NULL;
b4c14404
FB
1747 }
1748
ae2a15bc 1749 *ret = TAKE_PTR(pass_env);
b4c14404
FB
1750
1751 return 0;
1752}
1753
8b44a3d2
LP
1754static bool exec_needs_mount_namespace(
1755 const ExecContext *context,
1756 const ExecParameters *params,
4657abb5 1757 const ExecRuntime *runtime) {
8b44a3d2
LP
1758
1759 assert(context);
1760 assert(params);
1761
915e6d16
LP
1762 if (context->root_image)
1763 return true;
1764
2a624c36
AP
1765 if (!strv_isempty(context->read_write_paths) ||
1766 !strv_isempty(context->read_only_paths) ||
1767 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1768 return true;
1769
42b1d8e0 1770 if (context->n_bind_mounts > 0)
d2d6c096
LP
1771 return true;
1772
2abd4e38
YW
1773 if (context->n_temporary_filesystems > 0)
1774 return true;
1775
8b44a3d2
LP
1776 if (context->mount_flags != 0)
1777 return true;
1778
1779 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1780 return true;
1781
8b44a3d2
LP
1782 if (context->private_devices ||
1783 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1784 context->protect_home != PROTECT_HOME_NO ||
1785 context->protect_kernel_tunables ||
c575770b 1786 context->protect_kernel_modules ||
59eeb84b 1787 context->protect_control_groups)
8b44a3d2
LP
1788 return true;
1789
37c56f89
YW
1790 if (context->root_directory) {
1791 ExecDirectoryType t;
1792
1793 if (context->mount_apivfs)
1794 return true;
1795
1796 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1797 if (!params->prefix[t])
1798 continue;
1799
1800 if (!strv_isempty(context->directories[t].paths))
1801 return true;
1802 }
1803 }
5d997827 1804
42b1d8e0 1805 if (context->dynamic_user &&
b43ee82f 1806 (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
42b1d8e0
YW
1807 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1808 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1809 return true;
1810
8b44a3d2
LP
1811 return false;
1812}
1813
d251207d
LP
1814static int setup_private_users(uid_t uid, gid_t gid) {
1815 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1816 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1817 _cleanup_close_ int unshare_ready_fd = -1;
1818 _cleanup_(sigkill_waitp) pid_t pid = 0;
1819 uint64_t c = 1;
d251207d
LP
1820 ssize_t n;
1821 int r;
1822
1823 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1824 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1825 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1826 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1827 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1828 * continues execution normally. */
1829
587ab01b
ZJS
1830 if (uid != 0 && uid_is_valid(uid)) {
1831 r = asprintf(&uid_map,
1832 "0 0 1\n" /* Map root → root */
1833 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1834 uid, uid);
1835 if (r < 0)
1836 return -ENOMEM;
1837 } else {
e0f3720e 1838 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1839 if (!uid_map)
1840 return -ENOMEM;
1841 }
d251207d 1842
587ab01b
ZJS
1843 if (gid != 0 && gid_is_valid(gid)) {
1844 r = asprintf(&gid_map,
1845 "0 0 1\n" /* Map root → root */
1846 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1847 gid, gid);
1848 if (r < 0)
1849 return -ENOMEM;
1850 } else {
d251207d 1851 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1852 if (!gid_map)
1853 return -ENOMEM;
1854 }
d251207d
LP
1855
1856 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1857 * namespace. */
1858 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1859 if (unshare_ready_fd < 0)
1860 return -errno;
1861
1862 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1863 * failed. */
1864 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1865 return -errno;
1866
4c253ed1
LP
1867 r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
1868 if (r < 0)
1869 return r;
1870 if (r == 0) {
d251207d
LP
1871 _cleanup_close_ int fd = -1;
1872 const char *a;
1873 pid_t ppid;
1874
1875 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1876 * here, after the parent opened its own user namespace. */
1877
1878 ppid = getppid();
1879 errno_pipe[0] = safe_close(errno_pipe[0]);
1880
1881 /* Wait until the parent unshared the user namespace */
1882 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1883 r = -errno;
1884 goto child_fail;
1885 }
1886
1887 /* Disable the setgroups() system call in the child user namespace, for good. */
1888 a = procfs_file_alloca(ppid, "setgroups");
1889 fd = open(a, O_WRONLY|O_CLOEXEC);
1890 if (fd < 0) {
1891 if (errno != ENOENT) {
1892 r = -errno;
1893 goto child_fail;
1894 }
1895
1896 /* If the file is missing the kernel is too old, let's continue anyway. */
1897 } else {
1898 if (write(fd, "deny\n", 5) < 0) {
1899 r = -errno;
1900 goto child_fail;
1901 }
1902
1903 fd = safe_close(fd);
1904 }
1905
1906 /* First write the GID map */
1907 a = procfs_file_alloca(ppid, "gid_map");
1908 fd = open(a, O_WRONLY|O_CLOEXEC);
1909 if (fd < 0) {
1910 r = -errno;
1911 goto child_fail;
1912 }
1913 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1914 r = -errno;
1915 goto child_fail;
1916 }
1917 fd = safe_close(fd);
1918
1919 /* The write the UID map */
1920 a = procfs_file_alloca(ppid, "uid_map");
1921 fd = open(a, O_WRONLY|O_CLOEXEC);
1922 if (fd < 0) {
1923 r = -errno;
1924 goto child_fail;
1925 }
1926 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1927 r = -errno;
1928 goto child_fail;
1929 }
1930
1931 _exit(EXIT_SUCCESS);
1932
1933 child_fail:
1934 (void) write(errno_pipe[1], &r, sizeof(r));
1935 _exit(EXIT_FAILURE);
1936 }
1937
1938 errno_pipe[1] = safe_close(errno_pipe[1]);
1939
1940 if (unshare(CLONE_NEWUSER) < 0)
1941 return -errno;
1942
1943 /* Let the child know that the namespace is ready now */
1944 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1945 return -errno;
1946
1947 /* Try to read an error code from the child */
1948 n = read(errno_pipe[0], &r, sizeof(r));
1949 if (n < 0)
1950 return -errno;
1951 if (n == sizeof(r)) { /* an error code was sent to us */
1952 if (r < 0)
1953 return r;
1954 return -EIO;
1955 }
1956 if (n != 0) /* on success we should have read 0 bytes */
1957 return -EIO;
1958
2e87a1fd
LP
1959 r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
1960 pid = 0;
d251207d
LP
1961 if (r < 0)
1962 return r;
2e87a1fd 1963 if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
d251207d
LP
1964 return -EIO;
1965
1966 return 0;
1967}
1968
3536f49e 1969static int setup_exec_directory(
07689d5d
LP
1970 const ExecContext *context,
1971 const ExecParameters *params,
1972 uid_t uid,
3536f49e 1973 gid_t gid,
3536f49e
YW
1974 ExecDirectoryType type,
1975 int *exit_status) {
07689d5d 1976
72fd1768 1977 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
1978 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
1979 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
1980 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
1981 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
1982 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
1983 };
07689d5d
LP
1984 char **rt;
1985 int r;
1986
1987 assert(context);
1988 assert(params);
72fd1768 1989 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 1990 assert(exit_status);
07689d5d 1991
3536f49e
YW
1992 if (!params->prefix[type])
1993 return 0;
1994
8679efde 1995 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
1996 if (!uid_is_valid(uid))
1997 uid = 0;
1998 if (!gid_is_valid(gid))
1999 gid = 0;
2000 }
2001
2002 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d 2003 _cleanup_free_ char *p = NULL, *pp = NULL;
07689d5d 2004
3536f49e
YW
2005 p = strjoin(params->prefix[type], "/", *rt);
2006 if (!p) {
2007 r = -ENOMEM;
2008 goto fail;
2009 }
07689d5d 2010
23a7448e
YW
2011 r = mkdir_parents_label(p, 0755);
2012 if (r < 0)
3536f49e 2013 goto fail;
23a7448e 2014
8092a48c
YW
2015 if (context->dynamic_user &&
2016 !IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
6c47cd7d
LP
2017 _cleanup_free_ char *private_root = NULL, *relative = NULL, *parent = NULL;
2018
2019 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
2020 * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
2021 * whose UID is later on reused. To lock this down we use the same trick used by container
2022 * managers to prohibit host users to get access to files of the same UID in containers: we
2023 * place everything inside a directory that has an access mode of 0700 and is owned root:root,
2024 * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
2025 * to make this directory permeable for the service itself.
2026 *
2027 * Specifically: for a service which wants a special directory "foo/" we first create a
2028 * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
2029 * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
2030 * privileged host users can access "foo/" as usual, but unprivileged host users can't look
2031 * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
2032 * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
2033 * disabling the access boundary for the service and making sure it only gets access to the
2034 * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
2035 *
2036 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
8092a48c
YW
2037 * owned by the service itself.
2038 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
2039 * files or sockets with other services. */
6c47cd7d
LP
2040
2041 private_root = strjoin(params->prefix[type], "/private");
2042 if (!private_root) {
2043 r = -ENOMEM;
2044 goto fail;
2045 }
2046
2047 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
37c1d5e9 2048 r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
6c47cd7d
LP
2049 if (r < 0)
2050 goto fail;
2051
2052 pp = strjoin(private_root, "/", *rt);
2053 if (!pp) {
2054 r = -ENOMEM;
2055 goto fail;
2056 }
2057
2058 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2059 r = mkdir_parents_label(pp, 0755);
2060 if (r < 0)
2061 goto fail;
2062
949befd3
LP
2063 if (is_dir(p, false) > 0 &&
2064 (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
2065
2066 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2067 * it over. Most likely the service has been upgraded from one that didn't use
2068 * DynamicUser=1, to one that does. */
2069
2070 if (rename(p, pp) < 0) {
2071 r = -errno;
2072 goto fail;
2073 }
2074 } else {
2075 /* Otherwise, create the actual directory for the service */
2076
2077 r = mkdir_label(pp, context->directories[type].mode);
2078 if (r < 0 && r != -EEXIST)
2079 goto fail;
2080 }
6c47cd7d
LP
2081
2082 parent = dirname_malloc(p);
2083 if (!parent) {
2084 r = -ENOMEM;
2085 goto fail;
2086 }
2087
2088 r = path_make_relative(parent, pp, &relative);
2089 if (r < 0)
2090 goto fail;
2091
2092 /* And link it up from the original place */
2093 r = symlink_idempotent(relative, p);
2094 if (r < 0)
2095 goto fail;
2096
30c81ce2
ZJS
2097 /* Lock down the access mode */
2098 if (chmod(pp, context->directories[type].mode) < 0) {
2099 r = -errno;
2100 goto fail;
2101 }
6c47cd7d
LP
2102 } else {
2103 r = mkdir_label(p, context->directories[type].mode);
fdff1da2 2104 if (r < 0 && r != -EEXIST)
6c47cd7d 2105 goto fail;
fdff1da2
YW
2106 if (r == -EEXIST && !context->dynamic_user)
2107 continue;
a1164ae3 2108 }
07689d5d 2109
c71b2eb7
LP
2110 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
2111 * a service, and shall not be writable. */
2112 if (type == EXEC_DIRECTORY_CONFIGURATION)
2113 continue;
2114
a1164ae3 2115 /* Then, change the ownership of the whole tree, if necessary */
30c81ce2 2116 r = path_chown_recursive(pp ?: p, uid, gid);
07689d5d 2117 if (r < 0)
3536f49e 2118 goto fail;
07689d5d
LP
2119 }
2120
2121 return 0;
3536f49e
YW
2122
2123fail:
2124 *exit_status = exit_status_table[type];
3536f49e 2125 return r;
07689d5d
LP
2126}
2127
92b423b9 2128#if ENABLE_SMACK
cefc33ae
LP
2129static int setup_smack(
2130 const ExecContext *context,
2131 const ExecCommand *command) {
2132
cefc33ae
LP
2133 int r;
2134
2135 assert(context);
2136 assert(command);
2137
cefc33ae
LP
2138 if (context->smack_process_label) {
2139 r = mac_smack_apply_pid(0, context->smack_process_label);
2140 if (r < 0)
2141 return r;
2142 }
2143#ifdef SMACK_DEFAULT_PROCESS_LABEL
2144 else {
2145 _cleanup_free_ char *exec_label = NULL;
2146
2147 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2148 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2149 return r;
2150
2151 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2152 if (r < 0)
2153 return r;
2154 }
cefc33ae
LP
2155#endif
2156
2157 return 0;
2158}
92b423b9 2159#endif
cefc33ae 2160
6c47cd7d
LP
2161static int compile_bind_mounts(
2162 const ExecContext *context,
2163 const ExecParameters *params,
2164 BindMount **ret_bind_mounts,
da6053d0 2165 size_t *ret_n_bind_mounts,
6c47cd7d
LP
2166 char ***ret_empty_directories) {
2167
2168 _cleanup_strv_free_ char **empty_directories = NULL;
2169 BindMount *bind_mounts;
da6053d0 2170 size_t n, h = 0, i;
6c47cd7d
LP
2171 ExecDirectoryType t;
2172 int r;
2173
2174 assert(context);
2175 assert(params);
2176 assert(ret_bind_mounts);
2177 assert(ret_n_bind_mounts);
2178 assert(ret_empty_directories);
2179
2180 n = context->n_bind_mounts;
2181 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2182 if (!params->prefix[t])
2183 continue;
2184
2185 n += strv_length(context->directories[t].paths);
2186 }
2187
2188 if (n <= 0) {
2189 *ret_bind_mounts = NULL;
2190 *ret_n_bind_mounts = 0;
2191 *ret_empty_directories = NULL;
2192 return 0;
2193 }
2194
2195 bind_mounts = new(BindMount, n);
2196 if (!bind_mounts)
2197 return -ENOMEM;
2198
a8cabc61 2199 for (i = 0; i < context->n_bind_mounts; i++) {
6c47cd7d
LP
2200 BindMount *item = context->bind_mounts + i;
2201 char *s, *d;
2202
2203 s = strdup(item->source);
2204 if (!s) {
2205 r = -ENOMEM;
2206 goto finish;
2207 }
2208
2209 d = strdup(item->destination);
2210 if (!d) {
2211 free(s);
2212 r = -ENOMEM;
2213 goto finish;
2214 }
2215
2216 bind_mounts[h++] = (BindMount) {
2217 .source = s,
2218 .destination = d,
2219 .read_only = item->read_only,
2220 .recursive = item->recursive,
2221 .ignore_enoent = item->ignore_enoent,
2222 };
2223 }
2224
2225 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2226 char **suffix;
2227
2228 if (!params->prefix[t])
2229 continue;
2230
2231 if (strv_isempty(context->directories[t].paths))
2232 continue;
2233
8092a48c 2234 if (context->dynamic_user &&
5609f688
YW
2235 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2236 !(context->root_directory || context->root_image)) {
6c47cd7d
LP
2237 char *private_root;
2238
2239 /* So this is for a dynamic user, and we need to make sure the process can access its own
2240 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2241 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2242
2243 private_root = strjoin(params->prefix[t], "/private");
2244 if (!private_root) {
2245 r = -ENOMEM;
2246 goto finish;
2247 }
2248
2249 r = strv_consume(&empty_directories, private_root);
a635a7ae 2250 if (r < 0)
6c47cd7d 2251 goto finish;
6c47cd7d
LP
2252 }
2253
2254 STRV_FOREACH(suffix, context->directories[t].paths) {
2255 char *s, *d;
2256
8092a48c
YW
2257 if (context->dynamic_user &&
2258 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
6c47cd7d
LP
2259 s = strjoin(params->prefix[t], "/private/", *suffix);
2260 else
2261 s = strjoin(params->prefix[t], "/", *suffix);
2262 if (!s) {
2263 r = -ENOMEM;
2264 goto finish;
2265 }
2266
5609f688
YW
2267 if (context->dynamic_user &&
2268 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2269 (context->root_directory || context->root_image))
2270 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2271 * directory is not created on the root directory. So, let's bind-mount the directory
2272 * on the 'non-private' place. */
2273 d = strjoin(params->prefix[t], "/", *suffix);
2274 else
2275 d = strdup(s);
6c47cd7d
LP
2276 if (!d) {
2277 free(s);
2278 r = -ENOMEM;
2279 goto finish;
2280 }
2281
2282 bind_mounts[h++] = (BindMount) {
2283 .source = s,
2284 .destination = d,
2285 .read_only = false,
2286 .recursive = true,
2287 .ignore_enoent = false,
2288 };
2289 }
2290 }
2291
2292 assert(h == n);
2293
2294 *ret_bind_mounts = bind_mounts;
2295 *ret_n_bind_mounts = n;
ae2a15bc 2296 *ret_empty_directories = TAKE_PTR(empty_directories);
6c47cd7d
LP
2297
2298 return (int) n;
2299
2300finish:
2301 bind_mount_free_many(bind_mounts, h);
2302 return r;
2303}
2304
6818c54c 2305static int apply_mount_namespace(
34cf6c43
YW
2306 const Unit *u,
2307 const ExecCommand *command,
6818c54c
LP
2308 const ExecContext *context,
2309 const ExecParameters *params,
34cf6c43 2310 const ExecRuntime *runtime) {
6818c54c 2311
7bcef4ef 2312 _cleanup_strv_free_ char **empty_directories = NULL;
93c6bb51 2313 char *tmp = NULL, *var = NULL;
915e6d16 2314 const char *root_dir = NULL, *root_image = NULL;
b5a33299 2315 NamespaceInfo ns_info = {};
165a31c0 2316 bool needs_sandboxing;
6c47cd7d 2317 BindMount *bind_mounts = NULL;
da6053d0 2318 size_t n_bind_mounts = 0;
6818c54c 2319 int r;
93c6bb51 2320
2b3c1b9e
DH
2321 assert(context);
2322
93c6bb51
DH
2323 /* The runtime struct only contains the parent of the private /tmp,
2324 * which is non-accessible to world users. Inside of it there's a /tmp
2325 * that is sticky, and that's the one we want to use here. */
2326
2327 if (context->private_tmp && runtime) {
2328 if (runtime->tmp_dir)
2329 tmp = strjoina(runtime->tmp_dir, "/tmp");
2330 if (runtime->var_tmp_dir)
2331 var = strjoina(runtime->var_tmp_dir, "/tmp");
2332 }
2333
915e6d16
LP
2334 if (params->flags & EXEC_APPLY_CHROOT) {
2335 root_image = context->root_image;
2336
2337 if (!root_image)
2338 root_dir = context->root_directory;
2339 }
93c6bb51 2340
6c47cd7d
LP
2341 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2342 if (r < 0)
2343 return r;
2344
af964954
DH
2345 /*
2346 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2347 * sandbox info, otherwise enforce it, don't ignore protected paths and
2348 * fail if we are enable to apply the sandbox inside the mount namespace.
2349 */
2350 if (!context->dynamic_user && root_dir)
2351 ns_info.ignore_protect_paths = true;
2352
165a31c0 2353 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
6818c54c 2354
b5a33299
YW
2355 if (needs_sandboxing)
2356 ns_info = (NamespaceInfo) {
2357 .ignore_protect_paths = false,
2358 .private_dev = context->private_devices,
2359 .protect_control_groups = context->protect_control_groups,
2360 .protect_kernel_tunables = context->protect_kernel_tunables,
2361 .protect_kernel_modules = context->protect_kernel_modules,
2362 .mount_apivfs = context->mount_apivfs,
2363 };
2364
915e6d16 2365 r = setup_namespace(root_dir, root_image,
7bcef4ef 2366 &ns_info, context->read_write_paths,
165a31c0
LP
2367 needs_sandboxing ? context->read_only_paths : NULL,
2368 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2369 empty_directories,
2370 bind_mounts,
2371 n_bind_mounts,
2abd4e38
YW
2372 context->temporary_filesystems,
2373 context->n_temporary_filesystems,
93c6bb51
DH
2374 tmp,
2375 var,
165a31c0
LP
2376 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2377 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16
LP
2378 context->mount_flags,
2379 DISSECT_IMAGE_DISCARD_ON_LOOP);
93c6bb51 2380
6c47cd7d
LP
2381 bind_mount_free_many(bind_mounts, n_bind_mounts);
2382
93c6bb51
DH
2383 /* If we couldn't set up the namespace this is probably due to a
2384 * missing capability. In this case, silently proceeed. */
2385 if (IN_SET(r, -EPERM, -EACCES)) {
93c6bb51 2386 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
86ffb325 2387 return 0;
93c6bb51
DH
2388 }
2389
2390 return r;
2391}
2392
915e6d16
LP
2393static int apply_working_directory(
2394 const ExecContext *context,
2395 const ExecParameters *params,
2396 const char *home,
376fecf6
LP
2397 const bool needs_mount_ns,
2398 int *exit_status) {
915e6d16 2399
6732edab 2400 const char *d, *wd;
2b3c1b9e
DH
2401
2402 assert(context);
376fecf6 2403 assert(exit_status);
2b3c1b9e 2404
6732edab
LP
2405 if (context->working_directory_home) {
2406
376fecf6
LP
2407 if (!home) {
2408 *exit_status = EXIT_CHDIR;
6732edab 2409 return -ENXIO;
376fecf6 2410 }
6732edab 2411
2b3c1b9e 2412 wd = home;
6732edab
LP
2413
2414 } else if (context->working_directory)
2b3c1b9e
DH
2415 wd = context->working_directory;
2416 else
2417 wd = "/";
e7f1e7c6
DH
2418
2419 if (params->flags & EXEC_APPLY_CHROOT) {
2420 if (!needs_mount_ns && context->root_directory)
376fecf6
LP
2421 if (chroot(context->root_directory) < 0) {
2422 *exit_status = EXIT_CHROOT;
e7f1e7c6 2423 return -errno;
376fecf6 2424 }
e7f1e7c6 2425
2b3c1b9e
DH
2426 d = wd;
2427 } else
3b0e5bb5 2428 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2429
376fecf6
LP
2430 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2431 *exit_status = EXIT_CHDIR;
2b3c1b9e 2432 return -errno;
376fecf6 2433 }
e7f1e7c6
DH
2434
2435 return 0;
2436}
2437
b1edf445 2438static int setup_keyring(
34cf6c43 2439 const Unit *u,
b1edf445
LP
2440 const ExecContext *context,
2441 const ExecParameters *p,
2442 uid_t uid, gid_t gid) {
2443
74dd6b51 2444 key_serial_t keyring;
e64c2d0b
DJL
2445 int r = 0;
2446 uid_t saved_uid;
2447 gid_t saved_gid;
74dd6b51
LP
2448
2449 assert(u);
b1edf445 2450 assert(context);
74dd6b51
LP
2451 assert(p);
2452
2453 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2454 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2455 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2456 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2457 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2458 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2459
2460 if (!(p->flags & EXEC_NEW_KEYRING))
2461 return 0;
2462
b1edf445
LP
2463 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2464 return 0;
2465
e64c2d0b
DJL
2466 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2467 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2468 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2469 * & group is just as nasty as acquiring a reference to the user keyring. */
2470
2471 saved_uid = getuid();
2472 saved_gid = getgid();
2473
2474 if (gid_is_valid(gid) && gid != saved_gid) {
2475 if (setregid(gid, -1) < 0)
2476 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2477 }
2478
2479 if (uid_is_valid(uid) && uid != saved_uid) {
2480 if (setreuid(uid, -1) < 0) {
2481 r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2482 goto out;
2483 }
2484 }
2485
74dd6b51
LP
2486 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2487 if (keyring == -1) {
2488 if (errno == ENOSYS)
8002fb97 2489 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2490 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2491 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2492 else if (errno == EDQUOT)
8002fb97 2493 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2494 else
e64c2d0b 2495 r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51 2496
e64c2d0b 2497 goto out;
74dd6b51
LP
2498 }
2499
e64c2d0b
DJL
2500 /* When requested link the user keyring into the session keyring. */
2501 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2502
2503 if (keyctl(KEYCTL_LINK,
2504 KEY_SPEC_USER_KEYRING,
2505 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2506 r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
2507 goto out;
2508 }
2509 }
2510
2511 /* Restore uid/gid back */
2512 if (uid_is_valid(uid) && uid != saved_uid) {
2513 if (setreuid(saved_uid, -1) < 0) {
2514 r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2515 goto out;
2516 }
2517 }
2518
2519 if (gid_is_valid(gid) && gid != saved_gid) {
2520 if (setregid(saved_gid, -1) < 0)
2521 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2522 }
2523
2524 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
b3415f5d
LP
2525 if (!sd_id128_is_null(u->invocation_id)) {
2526 key_serial_t key;
2527
2528 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2529 if (key == -1)
8002fb97 2530 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2531 else {
2532 if (keyctl(KEYCTL_SETPERM, key,
2533 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2534 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
e64c2d0b 2535 r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2536 }
2537 }
2538
e64c2d0b
DJL
2539out:
2540 /* Revert back uid & gid for the the last time, and exit */
2541 /* no extra logging, as only the first already reported error matters */
2542 if (getuid() != saved_uid)
2543 (void) setreuid(saved_uid, -1);
b1edf445 2544
e64c2d0b
DJL
2545 if (getgid() != saved_gid)
2546 (void) setregid(saved_gid, -1);
b1edf445 2547
e64c2d0b 2548 return r;
74dd6b51
LP
2549}
2550
da6053d0 2551static void append_socket_pair(int *array, size_t *n, const int pair[2]) {
29206d46
LP
2552 assert(array);
2553 assert(n);
2554
2555 if (!pair)
2556 return;
2557
2558 if (pair[0] >= 0)
2559 array[(*n)++] = pair[0];
2560 if (pair[1] >= 0)
2561 array[(*n)++] = pair[1];
2562}
2563
a34ceba6
LP
2564static int close_remaining_fds(
2565 const ExecParameters *params,
34cf6c43
YW
2566 const ExecRuntime *runtime,
2567 const DynamicCreds *dcreds,
00d9ef85 2568 int user_lookup_fd,
a34ceba6 2569 int socket_fd,
da6053d0 2570 int *fds, size_t n_fds) {
a34ceba6 2571
da6053d0 2572 size_t n_dont_close = 0;
00d9ef85 2573 int dont_close[n_fds + 12];
a34ceba6
LP
2574
2575 assert(params);
2576
2577 if (params->stdin_fd >= 0)
2578 dont_close[n_dont_close++] = params->stdin_fd;
2579 if (params->stdout_fd >= 0)
2580 dont_close[n_dont_close++] = params->stdout_fd;
2581 if (params->stderr_fd >= 0)
2582 dont_close[n_dont_close++] = params->stderr_fd;
2583
2584 if (socket_fd >= 0)
2585 dont_close[n_dont_close++] = socket_fd;
2586 if (n_fds > 0) {
2587 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2588 n_dont_close += n_fds;
2589 }
2590
29206d46
LP
2591 if (runtime)
2592 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2593
2594 if (dcreds) {
2595 if (dcreds->user)
2596 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2597 if (dcreds->group)
2598 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2599 }
2600
00d9ef85
LP
2601 if (user_lookup_fd >= 0)
2602 dont_close[n_dont_close++] = user_lookup_fd;
2603
a34ceba6
LP
2604 return close_all_fds(dont_close, n_dont_close);
2605}
2606
00d9ef85
LP
2607static int send_user_lookup(
2608 Unit *unit,
2609 int user_lookup_fd,
2610 uid_t uid,
2611 gid_t gid) {
2612
2613 assert(unit);
2614
2615 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2616 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2617 * specified. */
2618
2619 if (user_lookup_fd < 0)
2620 return 0;
2621
2622 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2623 return 0;
2624
2625 if (writev(user_lookup_fd,
2626 (struct iovec[]) {
e6a7ec4b
LP
2627 IOVEC_INIT(&uid, sizeof(uid)),
2628 IOVEC_INIT(&gid, sizeof(gid)),
2629 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2630 return -errno;
2631
2632 return 0;
2633}
2634
6732edab
LP
2635static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2636 int r;
2637
2638 assert(c);
2639 assert(home);
2640 assert(buf);
2641
2642 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2643
2644 if (*home)
2645 return 0;
2646
2647 if (!c->working_directory_home)
2648 return 0;
2649
2650 if (uid == 0) {
2651 /* Hardcode /root as home directory for UID 0 */
2652 *home = "/root";
2653 return 1;
2654 }
2655
2656 r = get_home_dir(buf);
2657 if (r < 0)
2658 return r;
2659
2660 *home = *buf;
2661 return 1;
2662}
2663
da50b85a
LP
2664static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2665 _cleanup_strv_free_ char ** list = NULL;
2666 ExecDirectoryType t;
2667 int r;
2668
2669 assert(c);
2670 assert(p);
2671 assert(ret);
2672
2673 assert(c->dynamic_user);
2674
2675 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2676 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2677 * directories. */
2678
2679 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2680 char **i;
2681
2682 if (t == EXEC_DIRECTORY_CONFIGURATION)
2683 continue;
2684
2685 if (!p->prefix[t])
2686 continue;
2687
2688 STRV_FOREACH(i, c->directories[t].paths) {
2689 char *e;
2690
8092a48c
YW
2691 if (t == EXEC_DIRECTORY_RUNTIME)
2692 e = strjoin(p->prefix[t], "/", *i);
2693 else
2694 e = strjoin(p->prefix[t], "/private/", *i);
da50b85a
LP
2695 if (!e)
2696 return -ENOMEM;
2697
2698 r = strv_consume(&list, e);
2699 if (r < 0)
2700 return r;
2701 }
2702 }
2703
ae2a15bc 2704 *ret = TAKE_PTR(list);
da50b85a
LP
2705
2706 return 0;
2707}
2708
34cf6c43
YW
2709static char *exec_command_line(char **argv);
2710
ff0af2a1 2711static int exec_child(
f2341e0a 2712 Unit *unit,
34cf6c43 2713 const ExecCommand *command,
ff0af2a1
LP
2714 const ExecContext *context,
2715 const ExecParameters *params,
2716 ExecRuntime *runtime,
29206d46 2717 DynamicCreds *dcreds,
ff0af2a1
LP
2718 char **argv,
2719 int socket_fd,
52c239d7 2720 int named_iofds[3],
4c47affc 2721 int *fds,
da6053d0
LP
2722 size_t n_storage_fds,
2723 size_t n_socket_fds,
ff0af2a1 2724 char **files_env,
00d9ef85 2725 int user_lookup_fd,
12145637 2726 int *exit_status) {
d35fbf6b 2727
2065ca69 2728 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
7f59dd35 2729 _cleanup_free_ char *home_buffer = NULL;
4d885bd3
DH
2730 _cleanup_free_ gid_t *supplementary_gids = NULL;
2731 const char *username = NULL, *groupname = NULL;
2b3c1b9e 2732 const char *home = NULL, *shell = NULL;
7bce046b
LP
2733 dev_t journal_stream_dev = 0;
2734 ino_t journal_stream_ino = 0;
165a31c0
LP
2735 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2736 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2737 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2738 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 2739#if HAVE_SELINUX
7f59dd35 2740 _cleanup_free_ char *mac_selinux_context_net = NULL;
43b1f709 2741 bool use_selinux = false;
ecfbc84f 2742#endif
f9fa32f0 2743#if ENABLE_SMACK
43b1f709 2744 bool use_smack = false;
ecfbc84f 2745#endif
349cc4a5 2746#if HAVE_APPARMOR
43b1f709 2747 bool use_apparmor = false;
ecfbc84f 2748#endif
fed1e721
LP
2749 uid_t uid = UID_INVALID;
2750 gid_t gid = GID_INVALID;
34a5df58 2751 int r, ngids = 0;
da6053d0 2752 size_t n_fds;
3536f49e 2753 ExecDirectoryType dt;
165a31c0 2754 int secure_bits;
034c6ed7 2755
f2341e0a 2756 assert(unit);
5cb5a6ff
LP
2757 assert(command);
2758 assert(context);
d35fbf6b 2759 assert(params);
ff0af2a1 2760 assert(exit_status);
d35fbf6b
DM
2761
2762 rename_process_from_path(command->path);
2763
2764 /* We reset exactly these signals, since they are the
2765 * only ones we set to SIG_IGN in the main daemon. All
2766 * others we leave untouched because we set them to
2767 * SIG_DFL or a valid handler initially, both of which
2768 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2769 (void) default_signals(SIGNALS_CRASH_HANDLER,
2770 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2771
2772 if (context->ignore_sigpipe)
ce30c8dc 2773 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2774
ff0af2a1
LP
2775 r = reset_signal_mask();
2776 if (r < 0) {
2777 *exit_status = EXIT_SIGNAL_MASK;
12145637 2778 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 2779 }
034c6ed7 2780
d35fbf6b
DM
2781 if (params->idle_pipe)
2782 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2783
2c027c62
LP
2784 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2785 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2786 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2787 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 2788
d35fbf6b 2789 log_forget_fds();
2c027c62 2790 log_set_open_when_needed(true);
4f2d528d 2791
40a80078
LP
2792 /* In case anything used libc syslog(), close this here, too */
2793 closelog();
2794
4c47affc 2795 n_fds = n_storage_fds + n_socket_fds;
00d9ef85 2796 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
ff0af2a1
LP
2797 if (r < 0) {
2798 *exit_status = EXIT_FDS;
12145637 2799 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
2800 }
2801
d35fbf6b
DM
2802 if (!context->same_pgrp)
2803 if (setsid() < 0) {
ff0af2a1 2804 *exit_status = EXIT_SETSID;
12145637 2805 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 2806 }
9e2f7c11 2807
1e22b5cd 2808 exec_context_tty_reset(context, params);
d35fbf6b 2809
c891efaf 2810 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 2811 const char *vc = params->confirm_spawn;
3b20f877
FB
2812 _cleanup_free_ char *cmdline = NULL;
2813
2814 cmdline = exec_command_line(argv);
2815 if (!cmdline) {
0460aa5c 2816 *exit_status = EXIT_MEMORY;
12145637 2817 return log_oom();
3b20f877 2818 }
d35fbf6b 2819
eedf223a 2820 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
2821 if (r != CONFIRM_EXECUTE) {
2822 if (r == CONFIRM_PRETEND_SUCCESS) {
2823 *exit_status = EXIT_SUCCESS;
2824 return 0;
2825 }
ff0af2a1 2826 *exit_status = EXIT_CONFIRM;
12145637 2827 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 2828 return -ECANCELED;
d35fbf6b
DM
2829 }
2830 }
1a63a750 2831
29206d46 2832 if (context->dynamic_user && dcreds) {
da50b85a 2833 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 2834
409093fe
LP
2835 /* Make sure we bypass our own NSS module for any NSS checks */
2836 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2837 *exit_status = EXIT_USER;
12145637 2838 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
2839 }
2840
da50b85a
LP
2841 r = compile_suggested_paths(context, params, &suggested_paths);
2842 if (r < 0) {
2843 *exit_status = EXIT_MEMORY;
2844 return log_oom();
2845 }
2846
2847 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
2848 if (r < 0) {
2849 *exit_status = EXIT_USER;
e2b0cc34
YW
2850 if (r == -EILSEQ) {
2851 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
2852 return -EOPNOTSUPP;
2853 }
12145637 2854 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 2855 }
524daa8c 2856
70dd455c 2857 if (!uid_is_valid(uid)) {
29206d46 2858 *exit_status = EXIT_USER;
12145637 2859 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
2860 return -ESRCH;
2861 }
2862
2863 if (!gid_is_valid(gid)) {
2864 *exit_status = EXIT_USER;
12145637 2865 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
2866 return -ESRCH;
2867 }
5bc7452b 2868
29206d46
LP
2869 if (dcreds->user)
2870 username = dcreds->user->name;
2871
2872 } else {
4d885bd3
DH
2873 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2874 if (r < 0) {
2875 *exit_status = EXIT_USER;
12145637 2876 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 2877 }
5bc7452b 2878
4d885bd3
DH
2879 r = get_fixed_group(context, &groupname, &gid);
2880 if (r < 0) {
2881 *exit_status = EXIT_GROUP;
12145637 2882 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 2883 }
cdc5d5c5 2884 }
29206d46 2885
cdc5d5c5
DH
2886 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2887 r = get_supplementary_groups(context, username, groupname, gid,
2888 &supplementary_gids, &ngids);
2889 if (r < 0) {
2890 *exit_status = EXIT_GROUP;
12145637 2891 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 2892 }
5bc7452b 2893
00d9ef85
LP
2894 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2895 if (r < 0) {
2896 *exit_status = EXIT_USER;
12145637 2897 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
2898 }
2899
2900 user_lookup_fd = safe_close(user_lookup_fd);
2901
6732edab
LP
2902 r = acquire_home(context, uid, &home, &home_buffer);
2903 if (r < 0) {
2904 *exit_status = EXIT_CHDIR;
12145637 2905 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
2906 }
2907
d35fbf6b
DM
2908 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2909 * must sure to drop O_NONBLOCK */
2910 if (socket_fd >= 0)
a34ceba6 2911 (void) fd_nonblock(socket_fd, false);
acbb0225 2912
52c239d7 2913 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
2914 if (r < 0) {
2915 *exit_status = EXIT_STDIN;
12145637 2916 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 2917 }
034c6ed7 2918
52c239d7 2919 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2920 if (r < 0) {
2921 *exit_status = EXIT_STDOUT;
12145637 2922 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
2923 }
2924
52c239d7 2925 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2926 if (r < 0) {
2927 *exit_status = EXIT_STDERR;
12145637 2928 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
2929 }
2930
2931 if (params->cgroup_path) {
ff0af2a1
LP
2932 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2933 if (r < 0) {
2934 *exit_status = EXIT_CGROUP;
12145637 2935 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path);
309bff19 2936 }
d35fbf6b 2937 }
309bff19 2938
d35fbf6b 2939 if (context->oom_score_adjust_set) {
9f8168eb
LP
2940 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
2941 * prohibit write access to this file, and we shouldn't trip up over that. */
2942 r = set_oom_score_adjust(context->oom_score_adjust);
12145637 2943 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 2944 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 2945 else if (r < 0) {
ff0af2a1 2946 *exit_status = EXIT_OOM_ADJUST;
12145637 2947 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 2948 }
d35fbf6b
DM
2949 }
2950
2951 if (context->nice_set)
2952 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 2953 *exit_status = EXIT_NICE;
12145637 2954 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
613b411c
LP
2955 }
2956
d35fbf6b
DM
2957 if (context->cpu_sched_set) {
2958 struct sched_param param = {
2959 .sched_priority = context->cpu_sched_priority,
2960 };
2961
ff0af2a1
LP
2962 r = sched_setscheduler(0,
2963 context->cpu_sched_policy |
2964 (context->cpu_sched_reset_on_fork ?
2965 SCHED_RESET_ON_FORK : 0),
2966 &param);
2967 if (r < 0) {
2968 *exit_status = EXIT_SETSCHEDULER;
12145637 2969 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 2970 }
d35fbf6b 2971 }
fc9b2a84 2972
d35fbf6b
DM
2973 if (context->cpuset)
2974 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
ff0af2a1 2975 *exit_status = EXIT_CPUAFFINITY;
12145637 2976 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7
LP
2977 }
2978
d35fbf6b
DM
2979 if (context->ioprio_set)
2980 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 2981 *exit_status = EXIT_IOPRIO;
12145637 2982 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 2983 }
da726a4d 2984
d35fbf6b
DM
2985 if (context->timer_slack_nsec != NSEC_INFINITY)
2986 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 2987 *exit_status = EXIT_TIMERSLACK;
12145637 2988 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 2989 }
9eba9da4 2990
21022b9d
LP
2991 if (context->personality != PERSONALITY_INVALID) {
2992 r = safe_personality(context->personality);
2993 if (r < 0) {
ff0af2a1 2994 *exit_status = EXIT_PERSONALITY;
12145637 2995 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 2996 }
21022b9d 2997 }
94f04347 2998
d35fbf6b 2999 if (context->utmp_id)
df0ff127 3000 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 3001 context->tty_path,
023a4f67
LP
3002 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3003 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3004 USER_PROCESS,
6a93917d 3005 username);
d35fbf6b 3006
e0d2adfd 3007 if (context->user) {
ff0af2a1
LP
3008 r = chown_terminal(STDIN_FILENO, uid);
3009 if (r < 0) {
3010 *exit_status = EXIT_STDIN;
12145637 3011 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 3012 }
d35fbf6b 3013 }
8e274523 3014
62b9bb26
LP
3015 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroupsv1
3016 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
3017 * safe. On cgroupsv2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
3018 * touch a single hierarchy too. */
584b8688 3019 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
62b9bb26 3020 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
ff0af2a1
LP
3021 if (r < 0) {
3022 *exit_status = EXIT_CGROUP;
12145637 3023 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 3024 }
d35fbf6b 3025 }
034c6ed7 3026
72fd1768 3027 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 3028 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
3029 if (r < 0)
3030 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 3031 }
94f04347 3032
7bce046b 3033 r = build_environment(
fd63e712 3034 unit,
7bce046b
LP
3035 context,
3036 params,
3037 n_fds,
3038 home,
3039 username,
3040 shell,
3041 journal_stream_dev,
3042 journal_stream_ino,
3043 &our_env);
2065ca69
JW
3044 if (r < 0) {
3045 *exit_status = EXIT_MEMORY;
12145637 3046 return log_oom();
2065ca69
JW
3047 }
3048
3049 r = build_pass_environment(context, &pass_env);
3050 if (r < 0) {
3051 *exit_status = EXIT_MEMORY;
12145637 3052 return log_oom();
2065ca69
JW
3053 }
3054
3055 accum_env = strv_env_merge(5,
3056 params->environment,
3057 our_env,
3058 pass_env,
3059 context->environment,
3060 files_env,
3061 NULL);
3062 if (!accum_env) {
3063 *exit_status = EXIT_MEMORY;
12145637 3064 return log_oom();
2065ca69 3065 }
1280503b 3066 accum_env = strv_env_clean(accum_env);
2065ca69 3067
096424d1 3068 (void) umask(context->umask);
b213e1c1 3069
b1edf445 3070 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
3071 if (r < 0) {
3072 *exit_status = EXIT_KEYRING;
12145637 3073 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
3074 }
3075
165a31c0 3076 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 3077 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 3078
165a31c0
LP
3079 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3080 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 3081
165a31c0
LP
3082 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3083 if (needs_ambient_hack)
3084 needs_setuid = false;
3085 else
3086 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3087
3088 if (needs_sandboxing) {
7f18ef0a
FK
3089 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3090 * present. The actual MAC context application will happen later, as late as possible, to avoid
3091 * impacting our own code paths. */
3092
349cc4a5 3093#if HAVE_SELINUX
43b1f709 3094 use_selinux = mac_selinux_use();
7f18ef0a 3095#endif
f9fa32f0 3096#if ENABLE_SMACK
43b1f709 3097 use_smack = mac_smack_use();
7f18ef0a 3098#endif
349cc4a5 3099#if HAVE_APPARMOR
43b1f709 3100 use_apparmor = mac_apparmor_use();
7f18ef0a 3101#endif
165a31c0 3102 }
7f18ef0a 3103
165a31c0
LP
3104 if (needs_setuid) {
3105 if (context->pam_name && username) {
3106 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3107 if (r < 0) {
3108 *exit_status = EXIT_PAM;
12145637 3109 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0
LP
3110 }
3111 }
b213e1c1 3112 }
ac45f971 3113
d35fbf6b 3114 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
6e2d7c4f
MS
3115 if (ns_type_supported(NAMESPACE_NET)) {
3116 r = setup_netns(runtime->netns_storage_socket);
3117 if (r < 0) {
3118 *exit_status = EXIT_NETWORK;
3119 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3120 }
3121 } else
3122 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3123 }
169c1bda 3124
ee818b89 3125 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3126 if (needs_mount_namespace) {
6818c54c 3127 r = apply_mount_namespace(unit, command, context, params, runtime);
3fbe8dbe
LP
3128 if (r < 0) {
3129 *exit_status = EXIT_NAMESPACE;
12145637 3130 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing: %m");
3fbe8dbe 3131 }
d35fbf6b 3132 }
81a2b7ce 3133
50b3dfb9 3134 /* Apply just after mount namespace setup */
376fecf6 3135 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
12145637
LP
3136 if (r < 0)
3137 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
50b3dfb9 3138
bbeea271 3139 /* Drop groups as early as possbile */
165a31c0 3140 if (needs_setuid) {
709dbeac 3141 r = enforce_groups(gid, supplementary_gids, ngids);
096424d1
LP
3142 if (r < 0) {
3143 *exit_status = EXIT_GROUP;
12145637 3144 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3145 }
165a31c0 3146 }
096424d1 3147
165a31c0 3148 if (needs_sandboxing) {
349cc4a5 3149#if HAVE_SELINUX
43b1f709 3150 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
3151 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3152 if (r < 0) {
3153 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3154 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
937ccce9 3155 }
9008e1ac 3156 }
9008e1ac
MS
3157#endif
3158
937ccce9
LP
3159 if (context->private_users) {
3160 r = setup_private_users(uid, gid);
3161 if (r < 0) {
3162 *exit_status = EXIT_USER;
12145637 3163 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
937ccce9 3164 }
d251207d
LP
3165 }
3166 }
3167
165a31c0
LP
3168 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
3169 * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
3170 * was needed to upload the policy and can now be closed as well. */
ff0af2a1
LP
3171 r = close_all_fds(fds, n_fds);
3172 if (r >= 0)
3173 r = shift_fds(fds, n_fds);
3174 if (r >= 0)
4c47affc 3175 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
ff0af2a1
LP
3176 if (r < 0) {
3177 *exit_status = EXIT_FDS;
12145637 3178 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3179 }
e66cf1a3 3180
165a31c0 3181 secure_bits = context->secure_bits;
e66cf1a3 3182
165a31c0
LP
3183 if (needs_sandboxing) {
3184 uint64_t bset;
34a5df58 3185 int which_failed;
755d4b67 3186
34a5df58
LP
3187 r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
3188 if (r < 0) {
3189 *exit_status = EXIT_LIMITS;
3190 return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
e66cf1a3
LP
3191 }
3192
f4170c67
LP
3193 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
3194 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3195 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3196 *exit_status = EXIT_LIMITS;
12145637 3197 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3198 }
3199 }
3200
37ac2744
JB
3201#if ENABLE_SMACK
3202 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3203 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3204 if (use_smack) {
3205 r = setup_smack(context, command);
3206 if (r < 0) {
3207 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3208 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3209 }
3210 }
3211#endif
3212
165a31c0
LP
3213 bset = context->capability_bounding_set;
3214 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3215 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3216 * instead of us doing that */
3217 if (needs_ambient_hack)
3218 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3219 (UINT64_C(1) << CAP_SETUID) |
3220 (UINT64_C(1) << CAP_SETGID);
3221
3222 if (!cap_test_all(bset)) {
3223 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3224 if (r < 0) {
3225 *exit_status = EXIT_CAPABILITIES;
12145637 3226 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3227 }
4c2630eb 3228 }
3b8bddde 3229
755d4b67
IP
3230 /* This is done before enforce_user, but ambient set
3231 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
3232 if (!needs_ambient_hack &&
3233 context->capability_ambient_set != 0) {
755d4b67
IP
3234 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3235 if (r < 0) {
3236 *exit_status = EXIT_CAPABILITIES;
12145637 3237 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3238 }
755d4b67 3239 }
165a31c0 3240 }
755d4b67 3241
165a31c0 3242 if (needs_setuid) {
d35fbf6b 3243 if (context->user) {
ff0af2a1
LP
3244 r = enforce_user(context, uid);
3245 if (r < 0) {
3246 *exit_status = EXIT_USER;
12145637 3247 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3248 }
165a31c0
LP
3249
3250 if (!needs_ambient_hack &&
3251 context->capability_ambient_set != 0) {
755d4b67
IP
3252
3253 /* Fix the ambient capabilities after user change. */
3254 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3255 if (r < 0) {
3256 *exit_status = EXIT_CAPABILITIES;
12145637 3257 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3258 }
3259
3260 /* If we were asked to change user and ambient capabilities
3261 * were requested, we had to add keep-caps to the securebits
3262 * so that we would maintain the inherited capability set
3263 * through the setresuid(). Make sure that the bit is added
3264 * also to the context secure_bits so that we don't try to
3265 * drop the bit away next. */
3266
7f508f2c 3267 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3268 }
5b6319dc 3269 }
165a31c0 3270 }
d35fbf6b 3271
165a31c0 3272 if (needs_sandboxing) {
37ac2744 3273 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
5cd9cd35
LP
3274 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3275 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3276 * are restricted. */
3277
349cc4a5 3278#if HAVE_SELINUX
43b1f709 3279 if (use_selinux) {
5cd9cd35
LP
3280 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3281
3282 if (exec_context) {
3283 r = setexeccon(exec_context);
3284 if (r < 0) {
3285 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3286 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3287 }
3288 }
3289 }
3290#endif
3291
349cc4a5 3292#if HAVE_APPARMOR
43b1f709 3293 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3294 r = aa_change_onexec(context->apparmor_profile);
3295 if (r < 0 && !context->apparmor_profile_ignore) {
3296 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3297 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3298 }
3299 }
3300#endif
3301
165a31c0
LP
3302 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3303 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3304 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3305 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3306 *exit_status = EXIT_SECUREBITS;
12145637 3307 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3308 }
5b6319dc 3309
59eeb84b 3310 if (context_has_no_new_privileges(context))
d35fbf6b 3311 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3312 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3313 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3314 }
3315
349cc4a5 3316#if HAVE_SECCOMP
469830d1
LP
3317 r = apply_address_families(unit, context);
3318 if (r < 0) {
3319 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3320 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3321 }
04aa0cb9 3322
469830d1
LP
3323 r = apply_memory_deny_write_execute(unit, context);
3324 if (r < 0) {
3325 *exit_status = EXIT_SECCOMP;
12145637 3326 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3327 }
f4170c67 3328
469830d1
LP
3329 r = apply_restrict_realtime(unit, context);
3330 if (r < 0) {
3331 *exit_status = EXIT_SECCOMP;
12145637 3332 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3333 }
3334
add00535
LP
3335 r = apply_restrict_namespaces(unit, context);
3336 if (r < 0) {
3337 *exit_status = EXIT_SECCOMP;
12145637 3338 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3339 }
3340
469830d1
LP
3341 r = apply_protect_sysctl(unit, context);
3342 if (r < 0) {
3343 *exit_status = EXIT_SECCOMP;
12145637 3344 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3345 }
3346
469830d1
LP
3347 r = apply_protect_kernel_modules(unit, context);
3348 if (r < 0) {
3349 *exit_status = EXIT_SECCOMP;
12145637 3350 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3351 }
3352
469830d1
LP
3353 r = apply_private_devices(unit, context);
3354 if (r < 0) {
3355 *exit_status = EXIT_SECCOMP;
12145637 3356 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3357 }
3358
3359 r = apply_syscall_archs(unit, context);
3360 if (r < 0) {
3361 *exit_status = EXIT_SECCOMP;
12145637 3362 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3363 }
3364
78e864e5
TM
3365 r = apply_lock_personality(unit, context);
3366 if (r < 0) {
3367 *exit_status = EXIT_SECCOMP;
12145637 3368 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3369 }
3370
5cd9cd35
LP
3371 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3372 * by the filter as little as possible. */
165a31c0 3373 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3374 if (r < 0) {
3375 *exit_status = EXIT_SECCOMP;
12145637 3376 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3377 }
3378#endif
d35fbf6b 3379 }
034c6ed7 3380
00819cc1
LP
3381 if (!strv_isempty(context->unset_environment)) {
3382 char **ee = NULL;
3383
3384 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3385 if (!ee) {
3386 *exit_status = EXIT_MEMORY;
12145637 3387 return log_oom();
00819cc1
LP
3388 }
3389
130d3d22 3390 strv_free_and_replace(accum_env, ee);
00819cc1
LP
3391 }
3392
2065ca69 3393 final_argv = replace_env_argv(argv, accum_env);
d35fbf6b 3394 if (!final_argv) {
ff0af2a1 3395 *exit_status = EXIT_MEMORY;
12145637 3396 return log_oom();
d35fbf6b 3397 }
034c6ed7 3398
f1d34068 3399 if (DEBUG_LOGGING) {
d35fbf6b 3400 _cleanup_free_ char *line;
81a2b7ce 3401
d35fbf6b 3402 line = exec_command_line(final_argv);
a1230ff9 3403 if (line)
f2341e0a 3404 log_struct(LOG_DEBUG,
f2341e0a
LP
3405 "EXECUTABLE=%s", command->path,
3406 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3407 LOG_UNIT_ID(unit),
a1230ff9 3408 LOG_UNIT_INVOCATION_ID(unit));
d35fbf6b 3409 }
dd305ec9 3410
2065ca69 3411 execve(command->path, final_argv, accum_env);
12145637
LP
3412
3413 if (errno == ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
12145637
LP
3414 log_struct_errno(LOG_INFO, errno,
3415 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3416 LOG_UNIT_ID(unit),
3417 LOG_UNIT_INVOCATION_ID(unit),
3418 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3419 command->path),
a1230ff9 3420 "EXECUTABLE=%s", command->path);
12145637
LP
3421 return 0;
3422 }
3423
ff0af2a1 3424 *exit_status = EXIT_EXEC;
12145637 3425 return log_unit_error_errno(unit, errno, "Failed to execute command: %m");
d35fbf6b 3426}
81a2b7ce 3427
34cf6c43
YW
3428static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
3429static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[3]);
3430
f2341e0a
LP
3431int exec_spawn(Unit *unit,
3432 ExecCommand *command,
d35fbf6b
DM
3433 const ExecContext *context,
3434 const ExecParameters *params,
3435 ExecRuntime *runtime,
29206d46 3436 DynamicCreds *dcreds,
d35fbf6b 3437 pid_t *ret) {
8351ceae 3438
d35fbf6b 3439 _cleanup_strv_free_ char **files_env = NULL;
9b141911 3440 int *fds = NULL;
da6053d0 3441 size_t n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1
LP
3442 _cleanup_free_ char *line = NULL;
3443 int socket_fd, r;
52c239d7 3444 int named_iofds[3] = { -1, -1, -1 };
ff0af2a1 3445 char **argv;
d35fbf6b 3446 pid_t pid;
8351ceae 3447
f2341e0a 3448 assert(unit);
d35fbf6b
DM
3449 assert(command);
3450 assert(context);
3451 assert(ret);
3452 assert(params);
4c47affc 3453 assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
4298d0b5 3454
d35fbf6b
DM
3455 if (context->std_input == EXEC_INPUT_SOCKET ||
3456 context->std_output == EXEC_OUTPUT_SOCKET ||
3457 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3458
4c47affc 3459 if (params->n_socket_fds > 1) {
f2341e0a 3460 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3461 return -EINVAL;
ff0af2a1 3462 }
eef65bf3 3463
4c47affc 3464 if (params->n_socket_fds == 0) {
488ab41c
AA
3465 log_unit_error(unit, "Got no socket.");
3466 return -EINVAL;
3467 }
3468
d35fbf6b
DM
3469 socket_fd = params->fds[0];
3470 } else {
3471 socket_fd = -1;
3472 fds = params->fds;
4c47affc 3473 n_storage_fds = params->n_storage_fds;
9b141911 3474 n_socket_fds = params->n_socket_fds;
d35fbf6b 3475 }
94f04347 3476
34cf6c43 3477 r = exec_context_named_iofds(context, params, named_iofds);
52c239d7
LB
3478 if (r < 0)
3479 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3480
f2341e0a 3481 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3482 if (r < 0)
f2341e0a 3483 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3484
d35fbf6b 3485 argv = params->argv ?: command->argv;
d35fbf6b
DM
3486 line = exec_command_line(argv);
3487 if (!line)
3488 return log_oom();
fab56fc5 3489
f2341e0a 3490 log_struct(LOG_DEBUG,
f2341e0a
LP
3491 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3492 "EXECUTABLE=%s", command->path,
ba360bb0 3493 LOG_UNIT_ID(unit),
a1230ff9 3494 LOG_UNIT_INVOCATION_ID(unit));
12145637 3495
d35fbf6b
DM
3496 pid = fork();
3497 if (pid < 0)
74129a12 3498 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3499
3500 if (pid == 0) {
12145637 3501 int exit_status = EXIT_SUCCESS;
ff0af2a1 3502
f2341e0a
LP
3503 r = exec_child(unit,
3504 command,
ff0af2a1
LP
3505 context,
3506 params,
3507 runtime,
29206d46 3508 dcreds,
ff0af2a1
LP
3509 argv,
3510 socket_fd,
52c239d7 3511 named_iofds,
4c47affc
FB
3512 fds,
3513 n_storage_fds,
9b141911 3514 n_socket_fds,
ff0af2a1 3515 files_env,
00d9ef85 3516 unit->manager->user_lookup_fds[1],
12145637
LP
3517 &exit_status);
3518
a1230ff9 3519 if (r < 0)
12145637
LP
3520 log_struct_errno(LOG_ERR, r,
3521 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3522 LOG_UNIT_ID(unit),
3523 LOG_UNIT_INVOCATION_ID(unit),
3524 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3525 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3526 command->path),
a1230ff9 3527 "EXECUTABLE=%s", command->path);
4c2630eb 3528
ff0af2a1 3529 _exit(exit_status);
034c6ed7
LP
3530 }
3531
f2341e0a 3532 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3533
80876c20
LP
3534 /* We add the new process to the cgroup both in the child (so
3535 * that we can be sure that no user code is ever executed
3536 * outside of the cgroup) and in the parent (so that we can be
3537 * sure that when we kill the cgroup the process will be
3538 * killed too). */
d35fbf6b 3539 if (params->cgroup_path)
dd305ec9 3540 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
2da3263a 3541
b58b4116 3542 exec_status_start(&command->exec_status, pid);
9fb86720 3543
034c6ed7 3544 *ret = pid;
5cb5a6ff
LP
3545 return 0;
3546}
3547
034c6ed7 3548void exec_context_init(ExecContext *c) {
3536f49e
YW
3549 ExecDirectoryType i;
3550
034c6ed7
LP
3551 assert(c);
3552
4c12626c 3553 c->umask = 0022;
9eba9da4 3554 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3555 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3556 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3557 c->syslog_level_prefix = true;
353e12c2 3558 c->ignore_sigpipe = true;
3a43da28 3559 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3560 c->personality = PERSONALITY_INVALID;
72fd1768 3561 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3562 c->directories[i].mode = 0755;
a103496c 3563 c->capability_bounding_set = CAP_ALL;
aa9d574d
YW
3564 assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
3565 c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
d3070fbd 3566 c->log_level_max = -1;
034c6ed7
LP
3567}
3568
613b411c 3569void exec_context_done(ExecContext *c) {
3536f49e 3570 ExecDirectoryType i;
d3070fbd 3571 size_t l;
5cb5a6ff
LP
3572
3573 assert(c);
3574
6796073e
LP
3575 c->environment = strv_free(c->environment);
3576 c->environment_files = strv_free(c->environment_files);
b4c14404 3577 c->pass_environment = strv_free(c->pass_environment);
00819cc1 3578 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 3579
31ce987c 3580 rlimit_free_all(c->rlimit);
034c6ed7 3581
2038c3f5 3582 for (l = 0; l < 3; l++) {
52c239d7 3583 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
2038c3f5
LP
3584 c->stdio_file[l] = mfree(c->stdio_file[l]);
3585 }
52c239d7 3586
a1e58e8e
LP
3587 c->working_directory = mfree(c->working_directory);
3588 c->root_directory = mfree(c->root_directory);
915e6d16 3589 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3590 c->tty_path = mfree(c->tty_path);
3591 c->syslog_identifier = mfree(c->syslog_identifier);
3592 c->user = mfree(c->user);
3593 c->group = mfree(c->group);
034c6ed7 3594
6796073e 3595 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3596
a1e58e8e 3597 c->pam_name = mfree(c->pam_name);
5b6319dc 3598
2a624c36
AP
3599 c->read_only_paths = strv_free(c->read_only_paths);
3600 c->read_write_paths = strv_free(c->read_write_paths);
3601 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3602
d2d6c096 3603 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
8e06d57c
YW
3604 c->bind_mounts = NULL;
3605 c->n_bind_mounts = 0;
2abd4e38
YW
3606 temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
3607 c->temporary_filesystems = NULL;
3608 c->n_temporary_filesystems = 0;
d2d6c096 3609
da681e1b 3610 c->cpuset = cpu_set_mfree(c->cpuset);
86a3475b 3611
a1e58e8e
LP
3612 c->utmp_id = mfree(c->utmp_id);
3613 c->selinux_context = mfree(c->selinux_context);
3614 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3615 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3616
8cfa775f 3617 c->syscall_filter = hashmap_free(c->syscall_filter);
525d3cc7
LP
3618 c->syscall_archs = set_free(c->syscall_archs);
3619 c->address_families = set_free(c->address_families);
e66cf1a3 3620
72fd1768 3621 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3622 c->directories[i].paths = strv_free(c->directories[i].paths);
d3070fbd
LP
3623
3624 c->log_level_max = -1;
3625
3626 exec_context_free_log_extra_fields(c);
08f3be7a
LP
3627
3628 c->stdin_data = mfree(c->stdin_data);
3629 c->stdin_data_size = 0;
e66cf1a3
LP
3630}
3631
34cf6c43 3632int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
e66cf1a3
LP
3633 char **i;
3634
3635 assert(c);
3636
3637 if (!runtime_prefix)
3638 return 0;
3639
3536f49e 3640 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
3641 _cleanup_free_ char *p;
3642
605405c6 3643 p = strjoin(runtime_prefix, "/", *i);
e66cf1a3
LP
3644 if (!p)
3645 return -ENOMEM;
3646
6c47cd7d 3647 /* We execute this synchronously, since we need to be sure this is gone when we start the service
e66cf1a3 3648 * next. */
c6878637 3649 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
3650 }
3651
3652 return 0;
5cb5a6ff
LP
3653}
3654
34cf6c43 3655static void exec_command_done(ExecCommand *c) {
43d0fcbd
LP
3656 assert(c);
3657
a1e58e8e 3658 c->path = mfree(c->path);
43d0fcbd 3659
6796073e 3660 c->argv = strv_free(c->argv);
43d0fcbd
LP
3661}
3662
da6053d0
LP
3663void exec_command_done_array(ExecCommand *c, size_t n) {
3664 size_t i;
43d0fcbd
LP
3665
3666 for (i = 0; i < n; i++)
3667 exec_command_done(c+i);
3668}
3669
f1acf85a 3670ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
3671 ExecCommand *i;
3672
3673 while ((i = c)) {
71fda00f 3674 LIST_REMOVE(command, c, i);
43d0fcbd 3675 exec_command_done(i);
5cb5a6ff
LP
3676 free(i);
3677 }
f1acf85a
ZJS
3678
3679 return NULL;
5cb5a6ff
LP
3680}
3681
da6053d0
LP
3682void exec_command_free_array(ExecCommand **c, size_t n) {
3683 size_t i;
034c6ed7 3684
f1acf85a
ZJS
3685 for (i = 0; i < n; i++)
3686 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
3687}
3688
039f0e70 3689typedef struct InvalidEnvInfo {
34cf6c43 3690 const Unit *unit;
039f0e70
LP
3691 const char *path;
3692} InvalidEnvInfo;
3693
3694static void invalid_env(const char *p, void *userdata) {
3695 InvalidEnvInfo *info = userdata;
3696
f2341e0a 3697 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
3698}
3699
52c239d7
LB
3700const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3701 assert(c);
3702
3703 switch (fd_index) {
5073ff6b 3704
52c239d7
LB
3705 case STDIN_FILENO:
3706 if (c->std_input != EXEC_INPUT_NAMED_FD)
3707 return NULL;
5073ff6b 3708
52c239d7 3709 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
5073ff6b 3710
52c239d7
LB
3711 case STDOUT_FILENO:
3712 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3713 return NULL;
5073ff6b 3714
52c239d7 3715 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
5073ff6b 3716
52c239d7
LB
3717 case STDERR_FILENO:
3718 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3719 return NULL;
5073ff6b 3720
52c239d7 3721 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
5073ff6b 3722
52c239d7
LB
3723 default:
3724 return NULL;
3725 }
3726}
3727
34cf6c43 3728static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
da6053d0 3729 size_t i, targets;
56fbd561 3730 const char* stdio_fdname[3];
da6053d0 3731 size_t n_fds;
52c239d7
LB
3732
3733 assert(c);
3734 assert(p);
3735
3736 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3737 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3738 (c->std_error == EXEC_OUTPUT_NAMED_FD);
3739
3740 for (i = 0; i < 3; i++)
3741 stdio_fdname[i] = exec_context_fdname(c, i);
3742
4c47affc
FB
3743 n_fds = p->n_storage_fds + p->n_socket_fds;
3744
3745 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
3746 if (named_iofds[STDIN_FILENO] < 0 &&
3747 c->std_input == EXEC_INPUT_NAMED_FD &&
3748 stdio_fdname[STDIN_FILENO] &&
3749 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3750
52c239d7
LB
3751 named_iofds[STDIN_FILENO] = p->fds[i];
3752 targets--;
56fbd561
ZJS
3753
3754 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3755 c->std_output == EXEC_OUTPUT_NAMED_FD &&
3756 stdio_fdname[STDOUT_FILENO] &&
3757 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3758
52c239d7
LB
3759 named_iofds[STDOUT_FILENO] = p->fds[i];
3760 targets--;
56fbd561
ZJS
3761
3762 } else if (named_iofds[STDERR_FILENO] < 0 &&
3763 c->std_error == EXEC_OUTPUT_NAMED_FD &&
3764 stdio_fdname[STDERR_FILENO] &&
3765 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3766
52c239d7
LB
3767 named_iofds[STDERR_FILENO] = p->fds[i];
3768 targets--;
3769 }
3770
56fbd561 3771 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
3772}
3773
34cf6c43 3774static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
3775 char **i, **r = NULL;
3776
3777 assert(c);
3778 assert(l);
3779
3780 STRV_FOREACH(i, c->environment_files) {
3781 char *fn;
52511fae
ZJS
3782 int k;
3783 unsigned n;
8c7be95e
LP
3784 bool ignore = false;
3785 char **p;
7fd1b19b 3786 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
3787
3788 fn = *i;
3789
3790 if (fn[0] == '-') {
3791 ignore = true;
313cefa1 3792 fn++;
8c7be95e
LP
3793 }
3794
3795 if (!path_is_absolute(fn)) {
8c7be95e
LP
3796 if (ignore)
3797 continue;
3798
3799 strv_free(r);
3800 return -EINVAL;
3801 }
3802
2bef10ab 3803 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
3804 k = safe_glob(fn, 0, &pglob);
3805 if (k < 0) {
2bef10ab
PL
3806 if (ignore)
3807 continue;
8c7be95e 3808
2bef10ab 3809 strv_free(r);
d8c92e8b 3810 return k;
2bef10ab 3811 }
8c7be95e 3812
d8c92e8b
ZJS
3813 /* When we don't match anything, -ENOENT should be returned */
3814 assert(pglob.gl_pathc > 0);
3815
3816 for (n = 0; n < pglob.gl_pathc; n++) {
717603e3 3817 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2bef10ab
PL
3818 if (k < 0) {
3819 if (ignore)
3820 continue;
8c7be95e 3821
2bef10ab 3822 strv_free(r);
2bef10ab 3823 return k;
e9c1ea9d 3824 }
ebc05a09 3825 /* Log invalid environment variables with filename */
039f0e70
LP
3826 if (p) {
3827 InvalidEnvInfo info = {
f2341e0a 3828 .unit = unit,
039f0e70
LP
3829 .path = pglob.gl_pathv[n]
3830 };
3831
3832 p = strv_env_clean_with_callback(p, invalid_env, &info);
3833 }
8c7be95e 3834
234519ae 3835 if (!r)
2bef10ab
PL
3836 r = p;
3837 else {
3838 char **m;
8c7be95e 3839
2bef10ab
PL
3840 m = strv_env_merge(2, r, p);
3841 strv_free(r);
3842 strv_free(p);
c84a9488 3843 if (!m)
2bef10ab 3844 return -ENOMEM;
2bef10ab
PL
3845
3846 r = m;
3847 }
8c7be95e
LP
3848 }
3849 }
3850
3851 *l = r;
3852
3853 return 0;
3854}
3855
6ac8fdc9 3856static bool tty_may_match_dev_console(const char *tty) {
7b912648 3857 _cleanup_free_ char *resolved = NULL;
6ac8fdc9 3858
1e22b5cd
LP
3859 if (!tty)
3860 return true;
3861
a119ec7c 3862 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
3863
3864 /* trivial identity? */
3865 if (streq(tty, "console"))
3866 return true;
3867
7b912648
LP
3868 if (resolve_dev_console(&resolved) < 0)
3869 return true; /* if we could not resolve, assume it may */
6ac8fdc9
MS
3870
3871 /* "tty0" means the active VC, so it may be the same sometimes */
7b912648 3872 return streq(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
3873}
3874
34cf6c43 3875bool exec_context_may_touch_console(const ExecContext *ec) {
1e22b5cd
LP
3876
3877 return (ec->tty_reset ||
3878 ec->tty_vhangup ||
3879 ec->tty_vt_disallocate ||
6ac8fdc9
MS
3880 is_terminal_input(ec->std_input) ||
3881 is_terminal_output(ec->std_output) ||
3882 is_terminal_output(ec->std_error)) &&
1e22b5cd 3883 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
3884}
3885
15ae422b
LP
3886static void strv_fprintf(FILE *f, char **l) {
3887 char **g;
3888
3889 assert(f);
3890
3891 STRV_FOREACH(g, l)
3892 fprintf(f, " %s", *g);
3893}
3894
34cf6c43 3895void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
d3070fbd 3896 ExecDirectoryType dt;
c2bbd90b 3897 char **e, **d;
94f04347 3898 unsigned i;
add00535 3899 int r;
9eba9da4 3900
5cb5a6ff
LP
3901 assert(c);
3902 assert(f);
3903
4ad49000 3904 prefix = strempty(prefix);
5cb5a6ff
LP
3905
3906 fprintf(f,
94f04347
LP
3907 "%sUMask: %04o\n"
3908 "%sWorkingDirectory: %s\n"
451a074f 3909 "%sRootDirectory: %s\n"
15ae422b 3910 "%sNonBlocking: %s\n"
64747e2d 3911 "%sPrivateTmp: %s\n"
7f112f50 3912 "%sPrivateDevices: %s\n"
59eeb84b 3913 "%sProtectKernelTunables: %s\n"
e66a2f65 3914 "%sProtectKernelModules: %s\n"
59eeb84b 3915 "%sProtectControlGroups: %s\n"
d251207d
LP
3916 "%sPrivateNetwork: %s\n"
3917 "%sPrivateUsers: %s\n"
1b8689f9
LP
3918 "%sProtectHome: %s\n"
3919 "%sProtectSystem: %s\n"
5d997827 3920 "%sMountAPIVFS: %s\n"
f3e43635 3921 "%sIgnoreSIGPIPE: %s\n"
f4170c67 3922 "%sMemoryDenyWriteExecute: %s\n"
b1edf445
LP
3923 "%sRestrictRealtime: %s\n"
3924 "%sKeyringMode: %s\n",
5cb5a6ff 3925 prefix, c->umask,
9eba9da4 3926 prefix, c->working_directory ? c->working_directory : "/",
451a074f 3927 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 3928 prefix, yes_no(c->non_blocking),
64747e2d 3929 prefix, yes_no(c->private_tmp),
7f112f50 3930 prefix, yes_no(c->private_devices),
59eeb84b 3931 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 3932 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 3933 prefix, yes_no(c->protect_control_groups),
d251207d
LP
3934 prefix, yes_no(c->private_network),
3935 prefix, yes_no(c->private_users),
1b8689f9
LP
3936 prefix, protect_home_to_string(c->protect_home),
3937 prefix, protect_system_to_string(c->protect_system),
5d997827 3938 prefix, yes_no(c->mount_apivfs),
f3e43635 3939 prefix, yes_no(c->ignore_sigpipe),
f4170c67 3940 prefix, yes_no(c->memory_deny_write_execute),
b1edf445
LP
3941 prefix, yes_no(c->restrict_realtime),
3942 prefix, exec_keyring_mode_to_string(c->keyring_mode));
fb33a393 3943
915e6d16
LP
3944 if (c->root_image)
3945 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
3946
8c7be95e
LP
3947 STRV_FOREACH(e, c->environment)
3948 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3949
3950 STRV_FOREACH(e, c->environment_files)
3951 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 3952
b4c14404
FB
3953 STRV_FOREACH(e, c->pass_environment)
3954 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3955
00819cc1
LP
3956 STRV_FOREACH(e, c->unset_environment)
3957 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
3958
53f47dfc
YW
3959 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
3960
72fd1768 3961 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
3962 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
3963
3964 STRV_FOREACH(d, c->directories[dt].paths)
3965 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
3966 }
c2bbd90b 3967
fb33a393
LP
3968 if (c->nice_set)
3969 fprintf(f,
3970 "%sNice: %i\n",
3971 prefix, c->nice);
3972
dd6c17b1 3973 if (c->oom_score_adjust_set)
fb33a393 3974 fprintf(f,
dd6c17b1
LP
3975 "%sOOMScoreAdjust: %i\n",
3976 prefix, c->oom_score_adjust);
9eba9da4 3977
94f04347 3978 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d 3979 if (c->rlimit[i]) {
6550c24c 3980 fprintf(f, "Limit%s%s: " RLIM_FMT "\n",
3c11da9d 3981 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
6550c24c 3982 fprintf(f, "Limit%s%sSoft: " RLIM_FMT "\n",
3c11da9d
EV
3983 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3984 }
94f04347 3985
f8b69d1d 3986 if (c->ioprio_set) {
1756a011 3987 _cleanup_free_ char *class_str = NULL;
f8b69d1d 3988
837df140
YW
3989 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
3990 if (r >= 0)
3991 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
3992
3993 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 3994 }
94f04347 3995
f8b69d1d 3996 if (c->cpu_sched_set) {
1756a011 3997 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 3998
837df140
YW
3999 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4000 if (r >= 0)
4001 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4002
94f04347 4003 fprintf(f,
38b48754
LP
4004 "%sCPUSchedulingPriority: %i\n"
4005 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
4006 prefix, c->cpu_sched_priority,
4007 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 4008 }
94f04347 4009
82c121a4 4010 if (c->cpuset) {
94f04347 4011 fprintf(f, "%sCPUAffinity:", prefix);
82c121a4
LP
4012 for (i = 0; i < c->cpuset_ncpus; i++)
4013 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
43a99a7a 4014 fprintf(f, " %u", i);
94f04347
LP
4015 fputs("\n", f);
4016 }
4017
3a43da28 4018 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 4019 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
4020
4021 fprintf(f,
80876c20
LP
4022 "%sStandardInput: %s\n"
4023 "%sStandardOutput: %s\n"
4024 "%sStandardError: %s\n",
4025 prefix, exec_input_to_string(c->std_input),
4026 prefix, exec_output_to_string(c->std_output),
4027 prefix, exec_output_to_string(c->std_error));
4028
befc4a80
LP
4029 if (c->std_input == EXEC_INPUT_NAMED_FD)
4030 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4031 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4032 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4033 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4034 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4035
4036 if (c->std_input == EXEC_INPUT_FILE)
4037 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4038 if (c->std_output == EXEC_OUTPUT_FILE)
4039 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
4040 if (c->std_error == EXEC_OUTPUT_FILE)
4041 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
4042
80876c20
LP
4043 if (c->tty_path)
4044 fprintf(f,
6ea832a2
LP
4045 "%sTTYPath: %s\n"
4046 "%sTTYReset: %s\n"
4047 "%sTTYVHangup: %s\n"
4048 "%sTTYVTDisallocate: %s\n",
4049 prefix, c->tty_path,
4050 prefix, yes_no(c->tty_reset),
4051 prefix, yes_no(c->tty_vhangup),
4052 prefix, yes_no(c->tty_vt_disallocate));
94f04347 4053
9f6444eb
LP
4054 if (IN_SET(c->std_output,
4055 EXEC_OUTPUT_SYSLOG,
4056 EXEC_OUTPUT_KMSG,
4057 EXEC_OUTPUT_JOURNAL,
4058 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4059 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4060 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4061 IN_SET(c->std_error,
4062 EXEC_OUTPUT_SYSLOG,
4063 EXEC_OUTPUT_KMSG,
4064 EXEC_OUTPUT_JOURNAL,
4065 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4066 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4067 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 4068
5ce70e5b 4069 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 4070
837df140
YW
4071 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4072 if (r >= 0)
4073 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 4074
837df140
YW
4075 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4076 if (r >= 0)
4077 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 4078 }
94f04347 4079
d3070fbd
LP
4080 if (c->log_level_max >= 0) {
4081 _cleanup_free_ char *t = NULL;
4082
4083 (void) log_level_to_string_alloc(c->log_level_max, &t);
4084
4085 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4086 }
4087
4088 if (c->n_log_extra_fields > 0) {
4089 size_t j;
4090
4091 for (j = 0; j < c->n_log_extra_fields; j++) {
4092 fprintf(f, "%sLogExtraFields: ", prefix);
4093 fwrite(c->log_extra_fields[j].iov_base,
4094 1, c->log_extra_fields[j].iov_len,
4095 f);
4096 fputc('\n', f);
4097 }
4098 }
4099
07d46372
YW
4100 if (c->secure_bits) {
4101 _cleanup_free_ char *str = NULL;
4102
4103 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4104 if (r >= 0)
4105 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4106 }
94f04347 4107
a103496c 4108 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4109 _cleanup_free_ char *str = NULL;
94f04347 4110
dd1f5bd0
YW
4111 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4112 if (r >= 0)
4113 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4114 }
4115
4116 if (c->capability_ambient_set != 0) {
dd1f5bd0 4117 _cleanup_free_ char *str = NULL;
755d4b67 4118
dd1f5bd0
YW
4119 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4120 if (r >= 0)
4121 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4122 }
4123
4124 if (c->user)
f2d3769a 4125 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4126 if (c->group)
f2d3769a 4127 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4128
29206d46
LP
4129 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4130
ac6e8be6 4131 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4132 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4133 strv_fprintf(f, c->supplementary_groups);
4134 fputs("\n", f);
4135 }
94f04347 4136
5b6319dc 4137 if (c->pam_name)
f2d3769a 4138 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4139
58629001 4140 if (!strv_isempty(c->read_write_paths)) {
2a624c36
AP
4141 fprintf(f, "%sReadWritePaths:", prefix);
4142 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4143 fputs("\n", f);
4144 }
4145
58629001 4146 if (!strv_isempty(c->read_only_paths)) {
2a624c36
AP
4147 fprintf(f, "%sReadOnlyPaths:", prefix);
4148 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4149 fputs("\n", f);
4150 }
94f04347 4151
58629001 4152 if (!strv_isempty(c->inaccessible_paths)) {
2a624c36
AP
4153 fprintf(f, "%sInaccessiblePaths:", prefix);
4154 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4155 fputs("\n", f);
4156 }
2e22afe9 4157
d2d6c096 4158 if (c->n_bind_mounts > 0)
4ca763a9
YW
4159 for (i = 0; i < c->n_bind_mounts; i++)
4160 fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
d2d6c096 4161 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4ca763a9 4162 c->bind_mounts[i].ignore_enoent ? "-": "",
d2d6c096
LP
4163 c->bind_mounts[i].source,
4164 c->bind_mounts[i].destination,
4165 c->bind_mounts[i].recursive ? "rbind" : "norbind");
d2d6c096 4166
2abd4e38
YW
4167 if (c->n_temporary_filesystems > 0)
4168 for (i = 0; i < c->n_temporary_filesystems; i++) {
4169 TemporaryFileSystem *t = c->temporary_filesystems + i;
4170
4171 fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
4172 t->path,
4173 isempty(t->options) ? "" : ":",
4174 strempty(t->options));
4175 }
4176
169c1bda
LP
4177 if (c->utmp_id)
4178 fprintf(f,
4179 "%sUtmpIdentifier: %s\n",
4180 prefix, c->utmp_id);
7b52a628
MS
4181
4182 if (c->selinux_context)
4183 fprintf(f,
5f8640fb
LP
4184 "%sSELinuxContext: %s%s\n",
4185 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4186
80c21aea
WC
4187 if (c->apparmor_profile)
4188 fprintf(f,
4189 "%sAppArmorProfile: %s%s\n",
4190 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4191
4192 if (c->smack_process_label)
4193 fprintf(f,
4194 "%sSmackProcessLabel: %s%s\n",
4195 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4196
050f7277 4197 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4198 fprintf(f,
4199 "%sPersonality: %s\n",
4200 prefix, strna(personality_to_string(c->personality)));
4201
78e864e5
TM
4202 fprintf(f,
4203 "%sLockPersonality: %s\n",
4204 prefix, yes_no(c->lock_personality));
4205
17df7223 4206 if (c->syscall_filter) {
349cc4a5 4207#if HAVE_SECCOMP
17df7223 4208 Iterator j;
8cfa775f 4209 void *id, *val;
17df7223 4210 bool first = true;
351a19b1 4211#endif
17df7223
LP
4212
4213 fprintf(f,
57183d11 4214 "%sSystemCallFilter: ",
17df7223
LP
4215 prefix);
4216
4217 if (!c->syscall_whitelist)
4218 fputc('~', f);
4219
349cc4a5 4220#if HAVE_SECCOMP
8cfa775f 4221 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
17df7223 4222 _cleanup_free_ char *name = NULL;
8cfa775f
YW
4223 const char *errno_name = NULL;
4224 int num = PTR_TO_INT(val);
17df7223
LP
4225
4226 if (first)
4227 first = false;
4228 else
4229 fputc(' ', f);
4230
57183d11 4231 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223 4232 fputs(strna(name), f);
8cfa775f
YW
4233
4234 if (num >= 0) {
4235 errno_name = errno_to_name(num);
4236 if (errno_name)
4237 fprintf(f, ":%s", errno_name);
4238 else
4239 fprintf(f, ":%d", num);
4240 }
17df7223 4241 }
351a19b1 4242#endif
17df7223
LP
4243
4244 fputc('\n', f);
4245 }
4246
57183d11 4247 if (c->syscall_archs) {
349cc4a5 4248#if HAVE_SECCOMP
57183d11
LP
4249 Iterator j;
4250 void *id;
4251#endif
4252
4253 fprintf(f,
4254 "%sSystemCallArchitectures:",
4255 prefix);
4256
349cc4a5 4257#if HAVE_SECCOMP
57183d11
LP
4258 SET_FOREACH(id, c->syscall_archs, j)
4259 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4260#endif
4261 fputc('\n', f);
4262 }
4263
add00535
LP
4264 if (exec_context_restrict_namespaces_set(c)) {
4265 _cleanup_free_ char *s = NULL;
4266
86c2a9f1 4267 r = namespace_flags_to_string(c->restrict_namespaces, &s);
add00535
LP
4268 if (r >= 0)
4269 fprintf(f, "%sRestrictNamespaces: %s\n",
4270 prefix, s);
4271 }
4272
3df90f24
YW
4273 if (c->syscall_errno > 0) {
4274 const char *errno_name;
4275
4276 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4277
4278 errno_name = errno_to_name(c->syscall_errno);
4279 if (errno_name)
4280 fprintf(f, "%s\n", errno_name);
4281 else
4282 fprintf(f, "%d\n", c->syscall_errno);
4283 }
eef65bf3
MS
4284
4285 if (c->apparmor_profile)
4286 fprintf(f,
4287 "%sAppArmorProfile: %s%s\n",
4288 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
5cb5a6ff
LP
4289}
4290
34cf6c43 4291bool exec_context_maintains_privileges(const ExecContext *c) {
a931ad47
LP
4292 assert(c);
4293
61233823 4294 /* Returns true if the process forked off would run under
a931ad47
LP
4295 * an unchanged UID or as root. */
4296
4297 if (!c->user)
4298 return true;
4299
4300 if (streq(c->user, "root") || streq(c->user, "0"))
4301 return true;
4302
4303 return false;
4304}
4305
34cf6c43 4306int exec_context_get_effective_ioprio(const ExecContext *c) {
7f452159
LP
4307 int p;
4308
4309 assert(c);
4310
4311 if (c->ioprio_set)
4312 return c->ioprio;
4313
4314 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4315 if (p < 0)
4316 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4317
4318 return p;
4319}
4320
d3070fbd
LP
4321void exec_context_free_log_extra_fields(ExecContext *c) {
4322 size_t l;
4323
4324 assert(c);
4325
4326 for (l = 0; l < c->n_log_extra_fields; l++)
4327 free(c->log_extra_fields[l].iov_base);
4328 c->log_extra_fields = mfree(c->log_extra_fields);
4329 c->n_log_extra_fields = 0;
4330}
4331
b58b4116 4332void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 4333 assert(s);
5cb5a6ff 4334
b58b4116
LP
4335 zero(*s);
4336 s->pid = pid;
4337 dual_timestamp_get(&s->start_timestamp);
4338}
4339
34cf6c43 4340void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
4341 assert(s);
4342
0b1f4ae6 4343 if (s->pid && s->pid != pid)
b58b4116
LP
4344 zero(*s);
4345
034c6ed7 4346 s->pid = pid;
63983207 4347 dual_timestamp_get(&s->exit_timestamp);
9fb86720 4348
034c6ed7
LP
4349 s->code = code;
4350 s->status = status;
169c1bda 4351
6ea832a2
LP
4352 if (context) {
4353 if (context->utmp_id)
4354 utmp_put_dead_process(context->utmp_id, pid, code, status);
4355
1e22b5cd 4356 exec_context_tty_reset(context, NULL);
6ea832a2 4357 }
9fb86720
LP
4358}
4359
34cf6c43 4360void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
9fb86720
LP
4361 char buf[FORMAT_TIMESTAMP_MAX];
4362
4363 assert(s);
4364 assert(f);
4365
9fb86720
LP
4366 if (s->pid <= 0)
4367 return;
4368
4c940960
LP
4369 prefix = strempty(prefix);
4370
9fb86720 4371 fprintf(f,
ccd06097
ZJS
4372 "%sPID: "PID_FMT"\n",
4373 prefix, s->pid);
9fb86720 4374
af9d16e1 4375 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
4376 fprintf(f,
4377 "%sStart Timestamp: %s\n",
63983207 4378 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 4379
af9d16e1 4380 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
4381 fprintf(f,
4382 "%sExit Timestamp: %s\n"
4383 "%sExit Code: %s\n"
4384 "%sExit Status: %i\n",
63983207 4385 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
4386 prefix, sigchld_code_to_string(s->code),
4387 prefix, s->status);
5cb5a6ff 4388}
44d8db9e 4389
34cf6c43 4390static char *exec_command_line(char **argv) {
44d8db9e
LP
4391 size_t k;
4392 char *n, *p, **a;
4393 bool first = true;
4394
9e2f7c11 4395 assert(argv);
44d8db9e 4396
9164977d 4397 k = 1;
9e2f7c11 4398 STRV_FOREACH(a, argv)
44d8db9e
LP
4399 k += strlen(*a)+3;
4400
5cd9cd35
LP
4401 n = new(char, k);
4402 if (!n)
44d8db9e
LP
4403 return NULL;
4404
4405 p = n;
9e2f7c11 4406 STRV_FOREACH(a, argv) {
44d8db9e
LP
4407
4408 if (!first)
4409 *(p++) = ' ';
4410 else
4411 first = false;
4412
4413 if (strpbrk(*a, WHITESPACE)) {
4414 *(p++) = '\'';
4415 p = stpcpy(p, *a);
4416 *(p++) = '\'';
4417 } else
4418 p = stpcpy(p, *a);
4419
4420 }
4421
9164977d
LP
4422 *p = 0;
4423
44d8db9e
LP
4424 /* FIXME: this doesn't really handle arguments that have
4425 * spaces and ticks in them */
4426
4427 return n;
4428}
4429
34cf6c43 4430static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 4431 _cleanup_free_ char *cmd = NULL;
4c940960 4432 const char *prefix2;
44d8db9e
LP
4433
4434 assert(c);
4435 assert(f);
4436
4c940960 4437 prefix = strempty(prefix);
63c372cb 4438 prefix2 = strjoina(prefix, "\t");
44d8db9e 4439
9e2f7c11 4440 cmd = exec_command_line(c->argv);
44d8db9e
LP
4441 fprintf(f,
4442 "%sCommand Line: %s\n",
4443 prefix, cmd ? cmd : strerror(ENOMEM));
4444
9fb86720 4445 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
4446}
4447
4448void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4449 assert(f);
4450
4c940960 4451 prefix = strempty(prefix);
44d8db9e
LP
4452
4453 LIST_FOREACH(command, c, c)
4454 exec_command_dump(c, f, prefix);
4455}
94f04347 4456
a6a80b4f
LP
4457void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4458 ExecCommand *end;
4459
4460 assert(l);
4461 assert(e);
4462
4463 if (*l) {
35b8ca3a 4464 /* It's kind of important, that we keep the order here */
71fda00f
LP
4465 LIST_FIND_TAIL(command, *l, end);
4466 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
4467 } else
4468 *l = e;
4469}
4470
26fd040d
LP
4471int exec_command_set(ExecCommand *c, const char *path, ...) {
4472 va_list ap;
4473 char **l, *p;
4474
4475 assert(c);
4476 assert(path);
4477
4478 va_start(ap, path);
4479 l = strv_new_ap(path, ap);
4480 va_end(ap);
4481
4482 if (!l)
4483 return -ENOMEM;
4484
250a918d
LP
4485 p = strdup(path);
4486 if (!p) {
26fd040d
LP
4487 strv_free(l);
4488 return -ENOMEM;
4489 }
4490
4491 free(c->path);
4492 c->path = p;
4493
130d3d22 4494 return strv_free_and_replace(c->argv, l);
26fd040d
LP
4495}
4496
86b23b07 4497int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 4498 _cleanup_strv_free_ char **l = NULL;
86b23b07 4499 va_list ap;
86b23b07
JS
4500 int r;
4501
4502 assert(c);
4503 assert(path);
4504
4505 va_start(ap, path);
4506 l = strv_new_ap(path, ap);
4507 va_end(ap);
4508
4509 if (!l)
4510 return -ENOMEM;
4511
e287086b 4512 r = strv_extend_strv(&c->argv, l, false);
e63ff941 4513 if (r < 0)
86b23b07 4514 return r;
86b23b07
JS
4515
4516 return 0;
4517}
4518
e8a565cb
YW
4519static void *remove_tmpdir_thread(void *p) {
4520 _cleanup_free_ char *path = p;
86b23b07 4521
e8a565cb
YW
4522 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
4523 return NULL;
4524}
4525
4526static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
4527 int r;
4528
4529 if (!rt)
4530 return NULL;
4531
4532 if (rt->manager)
4533 (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
4534
4535 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
4536 if (destroy && rt->tmp_dir) {
4537 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
4538
4539 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4540 if (r < 0) {
4541 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
4542 free(rt->tmp_dir);
4543 }
4544
4545 rt->tmp_dir = NULL;
4546 }
613b411c 4547
e8a565cb
YW
4548 if (destroy && rt->var_tmp_dir) {
4549 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
4550
4551 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4552 if (r < 0) {
4553 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
4554 free(rt->var_tmp_dir);
4555 }
4556
4557 rt->var_tmp_dir = NULL;
4558 }
4559
4560 rt->id = mfree(rt->id);
4561 rt->tmp_dir = mfree(rt->tmp_dir);
4562 rt->var_tmp_dir = mfree(rt->var_tmp_dir);
4563 safe_close_pair(rt->netns_storage_socket);
4564 return mfree(rt);
4565}
4566
4567static void exec_runtime_freep(ExecRuntime **rt) {
613b411c 4568 if (*rt)
e8a565cb
YW
4569 (void) exec_runtime_free(*rt, false);
4570}
4571
4572static int exec_runtime_allocate(ExecRuntime **rt) {
4573 assert(rt);
613b411c
LP
4574
4575 *rt = new0(ExecRuntime, 1);
f146f5e1 4576 if (!*rt)
613b411c
LP
4577 return -ENOMEM;
4578
613b411c 4579 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
613b411c
LP
4580 return 0;
4581}
4582
e8a565cb
YW
4583static int exec_runtime_add(
4584 Manager *m,
4585 const char *id,
4586 const char *tmp_dir,
4587 const char *var_tmp_dir,
4588 const int netns_storage_socket[2],
4589 ExecRuntime **ret) {
4590
4591 _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
613b411c
LP
4592 int r;
4593
e8a565cb 4594 assert(m);
613b411c
LP
4595 assert(id);
4596
e8a565cb
YW
4597 r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
4598 if (r < 0)
4599 return r;
613b411c 4600
e8a565cb 4601 r = exec_runtime_allocate(&rt);
613b411c
LP
4602 if (r < 0)
4603 return r;
4604
e8a565cb
YW
4605 rt->id = strdup(id);
4606 if (!rt->id)
4607 return -ENOMEM;
4608
4609 if (tmp_dir) {
4610 rt->tmp_dir = strdup(tmp_dir);
4611 if (!rt->tmp_dir)
4612 return -ENOMEM;
4613
4614 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
4615 assert(var_tmp_dir);
4616 rt->var_tmp_dir = strdup(var_tmp_dir);
4617 if (!rt->var_tmp_dir)
4618 return -ENOMEM;
4619 }
4620
4621 if (netns_storage_socket) {
4622 rt->netns_storage_socket[0] = netns_storage_socket[0];
4623 rt->netns_storage_socket[1] = netns_storage_socket[1];
613b411c
LP
4624 }
4625
e8a565cb
YW
4626 r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
4627 if (r < 0)
4628 return r;
4629
4630 rt->manager = m;
4631
4632 if (ret)
4633 *ret = rt;
4634
4635 /* do not remove created ExecRuntime object when the operation succeeds. */
4636 rt = NULL;
4637 return 0;
4638}
4639
4640static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
4641 _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
4642 _cleanup_close_pair_ int netns_storage_socket[2] = {-1, -1};
4643 int r;
4644
4645 assert(m);
4646 assert(c);
4647 assert(id);
4648
4649 /* It is not necessary to create ExecRuntime object. */
4650 if (!c->private_network && !c->private_tmp)
4651 return 0;
4652
4653 if (c->private_tmp) {
4654 r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
613b411c
LP
4655 if (r < 0)
4656 return r;
4657 }
4658
e8a565cb
YW
4659 if (c->private_network) {
4660 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
4661 return -errno;
4662 }
4663
4664 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
4665 if (r < 0)
4666 return r;
4667
4668 /* Avoid cleanup */
4669 netns_storage_socket[0] = -1;
4670 netns_storage_socket[1] = -1;
613b411c
LP
4671 return 1;
4672}
4673
e8a565cb
YW
4674int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
4675 ExecRuntime *rt;
4676 int r;
613b411c 4677
e8a565cb
YW
4678 assert(m);
4679 assert(id);
4680 assert(ret);
4681
4682 rt = hashmap_get(m->exec_runtime_by_id, id);
4683 if (rt)
4684 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
4685 goto ref;
4686
4687 if (!create)
4688 return 0;
4689
4690 /* If not found, then create a new object. */
4691 r = exec_runtime_make(m, c, id, &rt);
4692 if (r <= 0)
4693 /* When r == 0, it is not necessary to create ExecRuntime object. */
4694 return r;
613b411c 4695
e8a565cb
YW
4696ref:
4697 /* increment reference counter. */
4698 rt->n_ref++;
4699 *ret = rt;
4700 return 1;
4701}
613b411c 4702
e8a565cb
YW
4703ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
4704 if (!rt)
613b411c
LP
4705 return NULL;
4706
e8a565cb 4707 assert(rt->n_ref > 0);
613b411c 4708
e8a565cb
YW
4709 rt->n_ref--;
4710 if (rt->n_ref > 0)
f2341e0a
LP
4711 return NULL;
4712
e8a565cb 4713 return exec_runtime_free(rt, destroy);
613b411c
LP
4714}
4715
e8a565cb
YW
4716int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
4717 ExecRuntime *rt;
4718 Iterator i;
4719
4720 assert(m);
613b411c
LP
4721 assert(f);
4722 assert(fds);
4723
e8a565cb
YW
4724 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
4725 fprintf(f, "exec-runtime=%s", rt->id);
613b411c 4726
e8a565cb
YW
4727 if (rt->tmp_dir)
4728 fprintf(f, " tmp-dir=%s", rt->tmp_dir);
613b411c 4729
e8a565cb
YW
4730 if (rt->var_tmp_dir)
4731 fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
613b411c 4732
e8a565cb
YW
4733 if (rt->netns_storage_socket[0] >= 0) {
4734 int copy;
613b411c 4735
e8a565cb
YW
4736 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4737 if (copy < 0)
4738 return copy;
613b411c 4739
e8a565cb
YW
4740 fprintf(f, " netns-socket-0=%i", copy);
4741 }
613b411c 4742
e8a565cb
YW
4743 if (rt->netns_storage_socket[1] >= 0) {
4744 int copy;
613b411c 4745
e8a565cb
YW
4746 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4747 if (copy < 0)
4748 return copy;
613b411c 4749
e8a565cb
YW
4750 fprintf(f, " netns-socket-1=%i", copy);
4751 }
4752
4753 fputc('\n', f);
613b411c
LP
4754 }
4755
4756 return 0;
4757}
4758
e8a565cb
YW
4759int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
4760 _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
4761 ExecRuntime *rt;
613b411c
LP
4762 int r;
4763
e8a565cb
YW
4764 /* This is for the migration from old (v237 or earlier) deserialization text.
4765 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
4766 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
4767 * so or not from the serialized text, then we always creates a new object owned by this. */
4768
4769 assert(u);
613b411c
LP
4770 assert(key);
4771 assert(value);
4772
e8a565cb
YW
4773 /* Manager manages ExecRuntime objects by the unit id.
4774 * So, we omit the serialized text when the unit does not have id (yet?)... */
4775 if (isempty(u->id)) {
4776 log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
4777 return 0;
4778 }
613b411c 4779
e8a565cb
YW
4780 r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
4781 if (r < 0) {
4782 log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
4783 return 0;
4784 }
4785
4786 rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
4787 if (!rt) {
4788 r = exec_runtime_allocate(&rt_create);
613b411c 4789 if (r < 0)
f2341e0a 4790 return log_oom();
613b411c 4791
e8a565cb
YW
4792 rt_create->id = strdup(u->id);
4793 if (!rt_create->id)
4794 return log_oom();
4795
4796 rt = rt_create;
4797 }
4798
4799 if (streq(key, "tmp-dir")) {
4800 char *copy;
4801
613b411c
LP
4802 copy = strdup(value);
4803 if (!copy)
4804 return log_oom();
4805
e8a565cb 4806 free_and_replace(rt->tmp_dir, copy);
613b411c
LP
4807
4808 } else if (streq(key, "var-tmp-dir")) {
4809 char *copy;
4810
613b411c
LP
4811 copy = strdup(value);
4812 if (!copy)
4813 return log_oom();
4814
e8a565cb 4815 free_and_replace(rt->var_tmp_dir, copy);
613b411c
LP
4816
4817 } else if (streq(key, "netns-socket-0")) {
4818 int fd;
4819
e8a565cb 4820 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 4821 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 4822 return 0;
613b411c 4823 }
e8a565cb
YW
4824
4825 safe_close(rt->netns_storage_socket[0]);
4826 rt->netns_storage_socket[0] = fdset_remove(fds, fd);
4827
613b411c
LP
4828 } else if (streq(key, "netns-socket-1")) {
4829 int fd;
4830
e8a565cb 4831 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 4832 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 4833 return 0;
613b411c 4834 }
e8a565cb
YW
4835
4836 safe_close(rt->netns_storage_socket[1]);
4837 rt->netns_storage_socket[1] = fdset_remove(fds, fd);
613b411c
LP
4838 } else
4839 return 0;
4840
e8a565cb
YW
4841 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
4842 if (rt_create) {
4843 r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
4844 if (r < 0) {
4845 log_unit_debug_errno(u, r, "Failed to put runtime paramter to manager's storage: %m");
4846 return 0;
4847 }
613b411c 4848
e8a565cb 4849 rt_create->manager = u->manager;
613b411c 4850
e8a565cb
YW
4851 /* Avoid cleanup */
4852 rt_create = NULL;
4853 }
98b47d54 4854
e8a565cb
YW
4855 return 1;
4856}
613b411c 4857
e8a565cb
YW
4858void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
4859 char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
4860 int r, fd0 = -1, fd1 = -1;
4861 const char *p, *v = value;
4862 size_t n;
613b411c 4863
e8a565cb
YW
4864 assert(m);
4865 assert(value);
4866 assert(fds);
98b47d54 4867
e8a565cb
YW
4868 n = strcspn(v, " ");
4869 id = strndupa(v, n);
4870 if (v[n] != ' ')
4871 goto finalize;
4872 p = v + n + 1;
4873
4874 v = startswith(p, "tmp-dir=");
4875 if (v) {
4876 n = strcspn(v, " ");
4877 tmp_dir = strndupa(v, n);
4878 if (v[n] != ' ')
4879 goto finalize;
4880 p = v + n + 1;
4881 }
4882
4883 v = startswith(p, "var-tmp-dir=");
4884 if (v) {
4885 n = strcspn(v, " ");
4886 var_tmp_dir = strndupa(v, n);
4887 if (v[n] != ' ')
4888 goto finalize;
4889 p = v + n + 1;
4890 }
4891
4892 v = startswith(p, "netns-socket-0=");
4893 if (v) {
4894 char *buf;
4895
4896 n = strcspn(v, " ");
4897 buf = strndupa(v, n);
4898 if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
4899 log_debug("Unable to process exec-runtime netns fd specification.");
4900 return;
98b47d54 4901 }
e8a565cb
YW
4902 fd0 = fdset_remove(fds, fd0);
4903 if (v[n] != ' ')
4904 goto finalize;
4905 p = v + n + 1;
613b411c
LP
4906 }
4907
e8a565cb
YW
4908 v = startswith(p, "netns-socket-1=");
4909 if (v) {
4910 char *buf;
98b47d54 4911
e8a565cb
YW
4912 n = strcspn(v, " ");
4913 buf = strndupa(v, n);
4914 if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
4915 log_debug("Unable to process exec-runtime netns fd specification.");
4916 return;
98b47d54 4917 }
e8a565cb
YW
4918 fd1 = fdset_remove(fds, fd1);
4919 }
98b47d54 4920
e8a565cb
YW
4921finalize:
4922
4923 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
4924 if (r < 0) {
4925 log_debug_errno(r, "Failed to add exec-runtime: %m");
4926 return;
613b411c 4927 }
e8a565cb 4928}
613b411c 4929
e8a565cb
YW
4930void exec_runtime_vacuum(Manager *m) {
4931 ExecRuntime *rt;
4932 Iterator i;
4933
4934 assert(m);
4935
4936 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
4937
4938 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
4939 if (rt->n_ref > 0)
4940 continue;
4941
4942 (void) exec_runtime_free(rt, false);
4943 }
613b411c
LP
4944}
4945
80876c20
LP
4946static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
4947 [EXEC_INPUT_NULL] = "null",
4948 [EXEC_INPUT_TTY] = "tty",
4949 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 4950 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
4951 [EXEC_INPUT_SOCKET] = "socket",
4952 [EXEC_INPUT_NAMED_FD] = "fd",
08f3be7a 4953 [EXEC_INPUT_DATA] = "data",
2038c3f5 4954 [EXEC_INPUT_FILE] = "file",
80876c20
LP
4955};
4956
8a0867d6
LP
4957DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4958
94f04347 4959static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 4960 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 4961 [EXEC_OUTPUT_NULL] = "null",
80876c20 4962 [EXEC_OUTPUT_TTY] = "tty",
94f04347 4963 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 4964 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 4965 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 4966 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
4967 [EXEC_OUTPUT_JOURNAL] = "journal",
4968 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
4969 [EXEC_OUTPUT_SOCKET] = "socket",
4970 [EXEC_OUTPUT_NAMED_FD] = "fd",
2038c3f5 4971 [EXEC_OUTPUT_FILE] = "file",
94f04347
LP
4972};
4973
4974DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
4975
4976static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4977 [EXEC_UTMP_INIT] = "init",
4978 [EXEC_UTMP_LOGIN] = "login",
4979 [EXEC_UTMP_USER] = "user",
4980};
4981
4982DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
4983
4984static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
4985 [EXEC_PRESERVE_NO] = "no",
4986 [EXEC_PRESERVE_YES] = "yes",
4987 [EXEC_PRESERVE_RESTART] = "restart",
4988};
4989
4990DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 4991
72fd1768 4992static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
4993 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
4994 [EXEC_DIRECTORY_STATE] = "StateDirectory",
4995 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
4996 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
4997 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
4998};
4999
5000DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445
LP
5001
5002static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
5003 [EXEC_KEYRING_INHERIT] = "inherit",
5004 [EXEC_KEYRING_PRIVATE] = "private",
5005 [EXEC_KEYRING_SHARED] = "shared",
5006};
5007
5008DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);