]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
tree-wide: drop 'This file is part of systemd' blurb
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
a7334b09 2/***
a7334b09 3 Copyright 2010 Lennart Poettering
a7334b09
LP
4***/
5
034c6ed7
LP
6#include <errno.h>
7#include <fcntl.h>
8dd4c05b
LP
8#include <glob.h>
9#include <grp.h>
10#include <poll.h>
309bff19 11#include <signal.h>
8dd4c05b 12#include <string.h>
19c0b0b9 13#include <sys/capability.h>
d251207d 14#include <sys/eventfd.h>
f3e43635 15#include <sys/mman.h>
8dd4c05b 16#include <sys/personality.h>
94f04347 17#include <sys/prctl.h>
d2ffa389 18#include <sys/shm.h>
8dd4c05b 19#include <sys/socket.h>
451a074f 20#include <sys/stat.h>
d2ffa389 21#include <sys/types.h>
8dd4c05b
LP
22#include <sys/un.h>
23#include <unistd.h>
023a4f67 24#include <utmpx.h>
5cb5a6ff 25
349cc4a5 26#if HAVE_PAM
5b6319dc
LP
27#include <security/pam_appl.h>
28#endif
29
349cc4a5 30#if HAVE_SELINUX
7b52a628
MS
31#include <selinux/selinux.h>
32#endif
33
349cc4a5 34#if HAVE_SECCOMP
17df7223
LP
35#include <seccomp.h>
36#endif
37
349cc4a5 38#if HAVE_APPARMOR
eef65bf3
MS
39#include <sys/apparmor.h>
40#endif
41
24882e06 42#include "sd-messages.h"
8dd4c05b
LP
43
44#include "af-list.h"
b5efdb8a 45#include "alloc-util.h"
349cc4a5 46#if HAVE_APPARMOR
3ffd4af2
LP
47#include "apparmor-util.h"
48#endif
8dd4c05b
LP
49#include "async.h"
50#include "barrier.h"
8dd4c05b 51#include "cap-list.h"
430f0182 52#include "capability-util.h"
a1164ae3 53#include "chown-recursive.h"
da681e1b 54#include "cpu-set-util.h"
f6a6225e 55#include "def.h"
4d1a6904 56#include "env-util.h"
17df7223 57#include "errno-list.h"
3ffd4af2 58#include "execute.h"
8dd4c05b 59#include "exit-status.h"
3ffd4af2 60#include "fd-util.h"
8dd4c05b 61#include "fileio.h"
f97b34a6 62#include "format-util.h"
f4f15635 63#include "fs-util.h"
7d50b32a 64#include "glob-util.h"
c004493c 65#include "io-util.h"
8dd4c05b 66#include "ioprio.h"
a1164ae3 67#include "label.h"
8dd4c05b
LP
68#include "log.h"
69#include "macro.h"
e8a565cb 70#include "manager.h"
8dd4c05b
LP
71#include "missing.h"
72#include "mkdir.h"
73#include "namespace.h"
6bedfcbb 74#include "parse-util.h"
8dd4c05b 75#include "path-util.h"
0b452006 76#include "process-util.h"
78f22b97 77#include "rlimit-util.h"
8dd4c05b 78#include "rm-rf.h"
349cc4a5 79#if HAVE_SECCOMP
3ffd4af2
LP
80#include "seccomp-util.h"
81#endif
8dd4c05b 82#include "securebits.h"
07d46372 83#include "securebits-util.h"
8dd4c05b 84#include "selinux-util.h"
24882e06 85#include "signal-util.h"
8dd4c05b 86#include "smack-util.h"
57b7a260 87#include "socket-util.h"
fd63e712 88#include "special.h"
949befd3 89#include "stat-util.h"
8b43440b 90#include "string-table.h"
07630cea 91#include "string-util.h"
8dd4c05b 92#include "strv.h"
7ccbd1ae 93#include "syslog-util.h"
8dd4c05b
LP
94#include "terminal-util.h"
95#include "unit.h"
b1d4f8e1 96#include "user-util.h"
8dd4c05b
LP
97#include "util.h"
98#include "utmp-wtmp.h"
5cb5a6ff 99
e056b01d 100#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 101#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 102
02a51aba
LP
103/* This assumes there is a 'tty' group */
104#define TTY_MODE 0620
105
531dca78
LP
106#define SNDBUF_SIZE (8*1024*1024)
107
da6053d0 108static int shift_fds(int fds[], size_t n_fds) {
034c6ed7
LP
109 int start, restart_from;
110
111 if (n_fds <= 0)
112 return 0;
113
a0d40ac5
LP
114 /* Modifies the fds array! (sorts it) */
115
034c6ed7
LP
116 assert(fds);
117
118 start = 0;
119 for (;;) {
120 int i;
121
122 restart_from = -1;
123
124 for (i = start; i < (int) n_fds; i++) {
125 int nfd;
126
127 /* Already at right index? */
128 if (fds[i] == i+3)
129 continue;
130
3cc2aff1
LP
131 nfd = fcntl(fds[i], F_DUPFD, i + 3);
132 if (nfd < 0)
034c6ed7
LP
133 return -errno;
134
03e334a1 135 safe_close(fds[i]);
034c6ed7
LP
136 fds[i] = nfd;
137
138 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 139 * let's remember that and try again from here */
034c6ed7
LP
140 if (nfd != i+3 && restart_from < 0)
141 restart_from = i;
142 }
143
144 if (restart_from < 0)
145 break;
146
147 start = restart_from;
148 }
149
150 return 0;
151}
152
da6053d0
LP
153static int flags_fds(const int fds[], size_t n_storage_fds, size_t n_socket_fds, bool nonblock) {
154 size_t i, n_fds;
e2c76839 155 int r;
47a71eed 156
4c47affc 157 n_fds = n_storage_fds + n_socket_fds;
47a71eed
LP
158 if (n_fds <= 0)
159 return 0;
160
161 assert(fds);
162
9b141911
FB
163 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
164 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
165
166 for (i = 0; i < n_fds; i++) {
47a71eed 167
9b141911
FB
168 if (i < n_socket_fds) {
169 r = fd_nonblock(fds[i], nonblock);
170 if (r < 0)
171 return r;
172 }
47a71eed 173
451a074f
LP
174 /* We unconditionally drop FD_CLOEXEC from the fds,
175 * since after all we want to pass these fds to our
176 * children */
47a71eed 177
3cc2aff1
LP
178 r = fd_cloexec(fds[i], false);
179 if (r < 0)
e2c76839 180 return r;
47a71eed
LP
181 }
182
183 return 0;
184}
185
1e22b5cd 186static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
187 assert(context);
188
1e22b5cd
LP
189 if (context->stdio_as_fds)
190 return NULL;
191
80876c20
LP
192 if (context->tty_path)
193 return context->tty_path;
194
195 return "/dev/console";
196}
197
1e22b5cd
LP
198static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
199 const char *path;
200
6ea832a2
LP
201 assert(context);
202
1e22b5cd 203 path = exec_context_tty_path(context);
6ea832a2 204
1e22b5cd
LP
205 if (context->tty_vhangup) {
206 if (p && p->stdin_fd >= 0)
207 (void) terminal_vhangup_fd(p->stdin_fd);
208 else if (path)
209 (void) terminal_vhangup(path);
210 }
6ea832a2 211
1e22b5cd
LP
212 if (context->tty_reset) {
213 if (p && p->stdin_fd >= 0)
214 (void) reset_terminal_fd(p->stdin_fd, true);
215 else if (path)
216 (void) reset_terminal(path);
217 }
218
219 if (context->tty_vt_disallocate && path)
220 (void) vt_disallocate(path);
6ea832a2
LP
221}
222
6af760f3
LP
223static bool is_terminal_input(ExecInput i) {
224 return IN_SET(i,
225 EXEC_INPUT_TTY,
226 EXEC_INPUT_TTY_FORCE,
227 EXEC_INPUT_TTY_FAIL);
228}
229
3a1286b6 230static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
231 return IN_SET(o,
232 EXEC_OUTPUT_TTY,
233 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
234 EXEC_OUTPUT_KMSG_AND_CONSOLE,
235 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
236}
237
aac8c0c3
LP
238static bool is_syslog_output(ExecOutput o) {
239 return IN_SET(o,
240 EXEC_OUTPUT_SYSLOG,
241 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
242}
243
244static bool is_kmsg_output(ExecOutput o) {
245 return IN_SET(o,
246 EXEC_OUTPUT_KMSG,
247 EXEC_OUTPUT_KMSG_AND_CONSOLE);
248}
249
6af760f3
LP
250static bool exec_context_needs_term(const ExecContext *c) {
251 assert(c);
252
253 /* Return true if the execution context suggests we should set $TERM to something useful. */
254
255 if (is_terminal_input(c->std_input))
256 return true;
257
258 if (is_terminal_output(c->std_output))
259 return true;
260
261 if (is_terminal_output(c->std_error))
262 return true;
263
264 return !!c->tty_path;
3a1286b6
MS
265}
266
80876c20 267static int open_null_as(int flags, int nfd) {
046a82c1 268 int fd;
071830ff 269
80876c20 270 assert(nfd >= 0);
071830ff 271
613b411c
LP
272 fd = open("/dev/null", flags|O_NOCTTY);
273 if (fd < 0)
071830ff
LP
274 return -errno;
275
046a82c1 276 return move_fd(fd, nfd, false);
071830ff
LP
277}
278
524daa8c 279static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 280 static const union sockaddr_union sa = {
b92bea5d
ZJS
281 .un.sun_family = AF_UNIX,
282 .un.sun_path = "/run/systemd/journal/stdout",
283 };
524daa8c
ZJS
284 uid_t olduid = UID_INVALID;
285 gid_t oldgid = GID_INVALID;
286 int r;
287
cad93f29 288 if (gid_is_valid(gid)) {
524daa8c
ZJS
289 oldgid = getgid();
290
92a17af9 291 if (setegid(gid) < 0)
524daa8c
ZJS
292 return -errno;
293 }
294
cad93f29 295 if (uid_is_valid(uid)) {
524daa8c
ZJS
296 olduid = getuid();
297
92a17af9 298 if (seteuid(uid) < 0) {
524daa8c
ZJS
299 r = -errno;
300 goto restore_gid;
301 }
302 }
303
92a17af9 304 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
305
306 /* If we fail to restore the uid or gid, things will likely
307 fail later on. This should only happen if an LSM interferes. */
308
cad93f29 309 if (uid_is_valid(uid))
524daa8c
ZJS
310 (void) seteuid(olduid);
311
312 restore_gid:
cad93f29 313 if (gid_is_valid(gid))
524daa8c
ZJS
314 (void) setegid(oldgid);
315
316 return r;
317}
318
fd1f9c89 319static int connect_logger_as(
34cf6c43 320 const Unit *unit,
fd1f9c89 321 const ExecContext *context,
af635cf3 322 const ExecParameters *params,
fd1f9c89
LP
323 ExecOutput output,
324 const char *ident,
fd1f9c89
LP
325 int nfd,
326 uid_t uid,
327 gid_t gid) {
328
524daa8c 329 int fd, r;
071830ff
LP
330
331 assert(context);
af635cf3 332 assert(params);
80876c20
LP
333 assert(output < _EXEC_OUTPUT_MAX);
334 assert(ident);
335 assert(nfd >= 0);
071830ff 336
54fe0cdb
LP
337 fd = socket(AF_UNIX, SOCK_STREAM, 0);
338 if (fd < 0)
80876c20 339 return -errno;
071830ff 340
524daa8c
ZJS
341 r = connect_journal_socket(fd, uid, gid);
342 if (r < 0)
343 return r;
071830ff 344
80876c20 345 if (shutdown(fd, SHUT_RD) < 0) {
03e334a1 346 safe_close(fd);
80876c20
LP
347 return -errno;
348 }
071830ff 349
fd1f9c89 350 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 351
80876c20 352 dprintf(fd,
62bca2c6 353 "%s\n"
80876c20
LP
354 "%s\n"
355 "%i\n"
54fe0cdb
LP
356 "%i\n"
357 "%i\n"
358 "%i\n"
4f4a1dbf 359 "%i\n",
c867611e 360 context->syslog_identifier ?: ident,
af635cf3 361 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
362 context->syslog_priority,
363 !!context->syslog_level_prefix,
aac8c0c3
LP
364 is_syslog_output(output),
365 is_kmsg_output(output),
3a1286b6 366 is_terminal_output(output));
80876c20 367
046a82c1 368 return move_fd(fd, nfd, false);
80876c20 369}
3a274a21 370static int open_terminal_as(const char *path, int flags, int nfd) {
046a82c1 371 int fd;
071830ff 372
80876c20
LP
373 assert(path);
374 assert(nfd >= 0);
fd1f9c89 375
3a274a21 376 fd = open_terminal(path, flags | O_NOCTTY);
3cc2aff1 377 if (fd < 0)
80876c20 378 return fd;
071830ff 379
046a82c1 380 return move_fd(fd, nfd, false);
80876c20 381}
071830ff 382
2038c3f5
LP
383static int acquire_path(const char *path, int flags, mode_t mode) {
384 union sockaddr_union sa = {
385 .sa.sa_family = AF_UNIX,
386 };
80876c20 387 int fd, r;
071830ff 388
80876c20 389 assert(path);
071830ff 390
2038c3f5
LP
391 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
392 flags |= O_CREAT;
393
394 fd = open(path, flags|O_NOCTTY, mode);
395 if (fd >= 0)
80876c20 396 return fd;
071830ff 397
2038c3f5
LP
398 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
399 return -errno;
400 if (strlen(path) > sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
401 return -ENXIO;
402
403 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
404
405 fd = socket(AF_UNIX, SOCK_STREAM, 0);
406 if (fd < 0)
407 return -errno;
408
409 strncpy(sa.un.sun_path, path, sizeof(sa.un.sun_path));
410 if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
03e334a1 411 safe_close(fd);
2038c3f5
LP
412 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
413 * indication that his wasn't an AF_UNIX socket after all */
414 }
071830ff 415
2038c3f5
LP
416 if ((flags & O_ACCMODE) == O_RDONLY)
417 r = shutdown(fd, SHUT_WR);
418 else if ((flags & O_ACCMODE) == O_WRONLY)
419 r = shutdown(fd, SHUT_RD);
420 else
421 return fd;
422 if (r < 0) {
423 safe_close(fd);
424 return -errno;
425 }
426
427 return fd;
80876c20 428}
071830ff 429
08f3be7a
LP
430static int fixup_input(
431 const ExecContext *context,
432 int socket_fd,
433 bool apply_tty_stdin) {
434
435 ExecInput std_input;
436
437 assert(context);
438
439 std_input = context->std_input;
1e3ad081
LP
440
441 if (is_terminal_input(std_input) && !apply_tty_stdin)
442 return EXEC_INPUT_NULL;
071830ff 443
03fd9c49 444 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
445 return EXEC_INPUT_NULL;
446
08f3be7a
LP
447 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
448 return EXEC_INPUT_NULL;
449
03fd9c49 450 return std_input;
4f2d528d
LP
451}
452
03fd9c49 453static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 454
03fd9c49 455 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
456 return EXEC_OUTPUT_INHERIT;
457
03fd9c49 458 return std_output;
4f2d528d
LP
459}
460
a34ceba6
LP
461static int setup_input(
462 const ExecContext *context,
463 const ExecParameters *params,
52c239d7
LB
464 int socket_fd,
465 int named_iofds[3]) {
a34ceba6 466
4f2d528d
LP
467 ExecInput i;
468
469 assert(context);
a34ceba6
LP
470 assert(params);
471
472 if (params->stdin_fd >= 0) {
473 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
474 return -errno;
475
476 /* Try to make this the controlling tty, if it is a tty, and reset it */
1fb0682e
LP
477 if (isatty(STDIN_FILENO)) {
478 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
479 (void) reset_terminal_fd(STDIN_FILENO, true);
480 }
a34ceba6
LP
481
482 return STDIN_FILENO;
483 }
4f2d528d 484
08f3be7a 485 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
486
487 switch (i) {
071830ff 488
80876c20
LP
489 case EXEC_INPUT_NULL:
490 return open_null_as(O_RDONLY, STDIN_FILENO);
491
492 case EXEC_INPUT_TTY:
493 case EXEC_INPUT_TTY_FORCE:
494 case EXEC_INPUT_TTY_FAIL: {
046a82c1 495 int fd;
071830ff 496
1e22b5cd 497 fd = acquire_terminal(exec_context_tty_path(context),
8854d795
LP
498 i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
499 i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
500 ACQUIRE_TERMINAL_WAIT,
3a43da28 501 USEC_INFINITY);
970edce6 502 if (fd < 0)
80876c20
LP
503 return fd;
504
046a82c1 505 return move_fd(fd, STDIN_FILENO, false);
80876c20
LP
506 }
507
4f2d528d 508 case EXEC_INPUT_SOCKET:
e75a9ed1
LP
509 assert(socket_fd >= 0);
510
4f2d528d
LP
511 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
512
52c239d7 513 case EXEC_INPUT_NAMED_FD:
e75a9ed1
LP
514 assert(named_iofds[STDIN_FILENO] >= 0);
515
52c239d7
LB
516 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
517 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
518
08f3be7a
LP
519 case EXEC_INPUT_DATA: {
520 int fd;
521
522 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
523 if (fd < 0)
524 return fd;
525
526 return move_fd(fd, STDIN_FILENO, false);
527 }
528
2038c3f5
LP
529 case EXEC_INPUT_FILE: {
530 bool rw;
531 int fd;
532
533 assert(context->stdio_file[STDIN_FILENO]);
534
535 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
536 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
537
538 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
539 if (fd < 0)
540 return fd;
541
542 return move_fd(fd, STDIN_FILENO, false);
543 }
544
80876c20
LP
545 default:
546 assert_not_reached("Unknown input type");
547 }
548}
549
a34ceba6 550static int setup_output(
34cf6c43 551 const Unit *unit,
a34ceba6
LP
552 const ExecContext *context,
553 const ExecParameters *params,
554 int fileno,
555 int socket_fd,
52c239d7 556 int named_iofds[3],
a34ceba6 557 const char *ident,
7bce046b
LP
558 uid_t uid,
559 gid_t gid,
560 dev_t *journal_stream_dev,
561 ino_t *journal_stream_ino) {
a34ceba6 562
4f2d528d
LP
563 ExecOutput o;
564 ExecInput i;
47c1d80d 565 int r;
4f2d528d 566
f2341e0a 567 assert(unit);
80876c20 568 assert(context);
a34ceba6 569 assert(params);
80876c20 570 assert(ident);
7bce046b
LP
571 assert(journal_stream_dev);
572 assert(journal_stream_ino);
80876c20 573
a34ceba6
LP
574 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
575
576 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
577 return -errno;
578
579 return STDOUT_FILENO;
580 }
581
582 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
583 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
584 return -errno;
585
586 return STDERR_FILENO;
587 }
588
08f3be7a 589 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 590 o = fixup_output(context->std_output, socket_fd);
4f2d528d 591
eb17e935
MS
592 if (fileno == STDERR_FILENO) {
593 ExecOutput e;
594 e = fixup_output(context->std_error, socket_fd);
80876c20 595
eb17e935
MS
596 /* This expects the input and output are already set up */
597
598 /* Don't change the stderr file descriptor if we inherit all
599 * the way and are not on a tty */
600 if (e == EXEC_OUTPUT_INHERIT &&
601 o == EXEC_OUTPUT_INHERIT &&
602 i == EXEC_INPUT_NULL &&
603 !is_terminal_input(context->std_input) &&
604 getppid () != 1)
605 return fileno;
606
607 /* Duplicate from stdout if possible */
52c239d7 608 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
eb17e935 609 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 610
eb17e935 611 o = e;
80876c20 612
eb17e935 613 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
614 /* If input got downgraded, inherit the original value */
615 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 616 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 617
08f3be7a
LP
618 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
619 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
eb17e935 620 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 621
acb591e4
LP
622 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
623 if (getppid() != 1)
eb17e935 624 return fileno;
94f04347 625
eb17e935
MS
626 /* We need to open /dev/null here anew, to get the right access mode. */
627 return open_null_as(O_WRONLY, fileno);
071830ff 628 }
94f04347 629
eb17e935 630 switch (o) {
80876c20
LP
631
632 case EXEC_OUTPUT_NULL:
eb17e935 633 return open_null_as(O_WRONLY, fileno);
80876c20
LP
634
635 case EXEC_OUTPUT_TTY:
4f2d528d 636 if (is_terminal_input(i))
eb17e935 637 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
638
639 /* We don't reset the terminal if this is just about output */
1e22b5cd 640 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
641
642 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 643 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 644 case EXEC_OUTPUT_KMSG:
28dbc1e8 645 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
646 case EXEC_OUTPUT_JOURNAL:
647 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 648 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 649 if (r < 0) {
82677ae4 650 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 651 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
652 } else {
653 struct stat st;
654
655 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
656 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
657 * services to detect whether they are connected to the journal or not.
658 *
659 * If both stdout and stderr are connected to a stream then let's make sure to store the data
660 * about STDERR as that's usually the best way to do logging. */
7bce046b 661
ab2116b1
LP
662 if (fstat(fileno, &st) >= 0 &&
663 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
664 *journal_stream_dev = st.st_dev;
665 *journal_stream_ino = st.st_ino;
666 }
47c1d80d
MS
667 }
668 return r;
4f2d528d
LP
669
670 case EXEC_OUTPUT_SOCKET:
671 assert(socket_fd >= 0);
e75a9ed1 672
eb17e935 673 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 674
52c239d7 675 case EXEC_OUTPUT_NAMED_FD:
e75a9ed1
LP
676 assert(named_iofds[fileno] >= 0);
677
52c239d7
LB
678 (void) fd_nonblock(named_iofds[fileno], false);
679 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
680
2038c3f5
LP
681 case EXEC_OUTPUT_FILE: {
682 bool rw;
683 int fd;
684
685 assert(context->stdio_file[fileno]);
686
687 rw = context->std_input == EXEC_INPUT_FILE &&
688 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
689
690 if (rw)
691 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
692
693 fd = acquire_path(context->stdio_file[fileno], O_WRONLY, 0666 & ~context->umask);
694 if (fd < 0)
695 return fd;
696
697 return move_fd(fd, fileno, false);
698 }
699
94f04347 700 default:
80876c20 701 assert_not_reached("Unknown error type");
94f04347 702 }
071830ff
LP
703}
704
02a51aba
LP
705static int chown_terminal(int fd, uid_t uid) {
706 struct stat st;
707
708 assert(fd >= 0);
02a51aba 709
1ff74fb6
LP
710 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
711 if (isatty(fd) < 1)
712 return 0;
713
02a51aba 714 /* This might fail. What matters are the results. */
bab45044
LP
715 (void) fchown(fd, uid, -1);
716 (void) fchmod(fd, TTY_MODE);
02a51aba
LP
717
718 if (fstat(fd, &st) < 0)
719 return -errno;
720
d8b4e2e9 721 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
02a51aba
LP
722 return -EPERM;
723
724 return 0;
725}
726
7d5ceb64 727static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
728 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
729 int r;
80876c20 730
80876c20
LP
731 assert(_saved_stdin);
732 assert(_saved_stdout);
733
af6da548
LP
734 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
735 if (saved_stdin < 0)
736 return -errno;
80876c20 737
af6da548 738 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
739 if (saved_stdout < 0)
740 return -errno;
80876c20 741
8854d795 742 fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
3d18b167
LP
743 if (fd < 0)
744 return fd;
80876c20 745
af6da548
LP
746 r = chown_terminal(fd, getuid());
747 if (r < 0)
3d18b167 748 return r;
02a51aba 749
3d18b167
LP
750 r = reset_terminal_fd(fd, true);
751 if (r < 0)
752 return r;
80876c20 753
2b33ab09 754 r = rearrange_stdio(fd, fd, STDERR_FILENO);
3d18b167 755 fd = -1;
2b33ab09
LP
756 if (r < 0)
757 return r;
80876c20
LP
758
759 *_saved_stdin = saved_stdin;
760 *_saved_stdout = saved_stdout;
761
3d18b167 762 saved_stdin = saved_stdout = -1;
80876c20 763
3d18b167 764 return 0;
80876c20
LP
765}
766
63d77c92 767static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
768 assert(err < 0);
769
770 if (err == -ETIMEDOUT)
63d77c92 771 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
772 else {
773 errno = -err;
63d77c92 774 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
775 }
776}
777
63d77c92 778static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 779 _cleanup_close_ int fd = -1;
80876c20 780
3b20f877 781 assert(vc);
80876c20 782
7d5ceb64 783 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 784 if (fd < 0)
3b20f877 785 return;
80876c20 786
63d77c92 787 write_confirm_error_fd(err, fd, u);
af6da548 788}
80876c20 789
3d18b167 790static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 791 int r = 0;
80876c20 792
af6da548
LP
793 assert(saved_stdin);
794 assert(saved_stdout);
795
796 release_terminal();
797
798 if (*saved_stdin >= 0)
80876c20 799 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 800 r = -errno;
80876c20 801
af6da548 802 if (*saved_stdout >= 0)
80876c20 803 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 804 r = -errno;
80876c20 805
3d18b167
LP
806 *saved_stdin = safe_close(*saved_stdin);
807 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
808
809 return r;
810}
811
3b20f877
FB
812enum {
813 CONFIRM_PRETEND_FAILURE = -1,
814 CONFIRM_PRETEND_SUCCESS = 0,
815 CONFIRM_EXECUTE = 1,
816};
817
eedf223a 818static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 819 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 820 _cleanup_free_ char *e = NULL;
3b20f877 821 char c;
af6da548 822
3b20f877 823 /* For any internal errors, assume a positive response. */
7d5ceb64 824 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 825 if (r < 0) {
63d77c92 826 write_confirm_error(r, vc, u);
3b20f877
FB
827 return CONFIRM_EXECUTE;
828 }
af6da548 829
b0eb2944
FB
830 /* confirm_spawn might have been disabled while we were sleeping. */
831 if (manager_is_confirm_spawn_disabled(u->manager)) {
832 r = 1;
833 goto restore_stdio;
834 }
af6da548 835
2bcd3c26
FB
836 e = ellipsize(cmdline, 60, 100);
837 if (!e) {
838 log_oom();
839 r = CONFIRM_EXECUTE;
840 goto restore_stdio;
841 }
af6da548 842
d172b175 843 for (;;) {
539622bd 844 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 845 if (r < 0) {
63d77c92 846 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
847 r = CONFIRM_EXECUTE;
848 goto restore_stdio;
849 }
af6da548 850
d172b175 851 switch (c) {
b0eb2944
FB
852 case 'c':
853 printf("Resuming normal execution.\n");
854 manager_disable_confirm_spawn();
855 r = 1;
856 break;
dd6f9ac0
FB
857 case 'D':
858 unit_dump(u, stdout, " ");
859 continue; /* ask again */
d172b175
FB
860 case 'f':
861 printf("Failing execution.\n");
862 r = CONFIRM_PRETEND_FAILURE;
863 break;
864 case 'h':
b0eb2944
FB
865 printf(" c - continue, proceed without asking anymore\n"
866 " D - dump, show the state of the unit\n"
dd6f9ac0 867 " f - fail, don't execute the command and pretend it failed\n"
d172b175 868 " h - help\n"
eedf223a 869 " i - info, show a short summary of the unit\n"
56fde33a 870 " j - jobs, show jobs that are in progress\n"
d172b175
FB
871 " s - skip, don't execute the command and pretend it succeeded\n"
872 " y - yes, execute the command\n");
dd6f9ac0 873 continue; /* ask again */
eedf223a
FB
874 case 'i':
875 printf(" Description: %s\n"
876 " Unit: %s\n"
877 " Command: %s\n",
878 u->id, u->description, cmdline);
879 continue; /* ask again */
56fde33a
FB
880 case 'j':
881 manager_dump_jobs(u->manager, stdout, " ");
882 continue; /* ask again */
539622bd
FB
883 case 'n':
884 /* 'n' was removed in favor of 'f'. */
885 printf("Didn't understand 'n', did you mean 'f'?\n");
886 continue; /* ask again */
d172b175
FB
887 case 's':
888 printf("Skipping execution.\n");
889 r = CONFIRM_PRETEND_SUCCESS;
890 break;
891 case 'y':
892 r = CONFIRM_EXECUTE;
893 break;
894 default:
895 assert_not_reached("Unhandled choice");
896 }
3b20f877 897 break;
3b20f877 898 }
af6da548 899
3b20f877 900restore_stdio:
af6da548 901 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 902 return r;
80876c20
LP
903}
904
4d885bd3
DH
905static int get_fixed_user(const ExecContext *c, const char **user,
906 uid_t *uid, gid_t *gid,
907 const char **home, const char **shell) {
81a2b7ce 908 int r;
4d885bd3 909 const char *name;
81a2b7ce 910
4d885bd3 911 assert(c);
81a2b7ce 912
23deef88
LP
913 if (!c->user)
914 return 0;
915
4d885bd3
DH
916 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
917 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 918
23deef88 919 name = c->user;
4d885bd3
DH
920 r = get_user_creds_clean(&name, uid, gid, home, shell);
921 if (r < 0)
922 return r;
81a2b7ce 923
4d885bd3
DH
924 *user = name;
925 return 0;
926}
927
928static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
929 int r;
930 const char *name;
931
932 assert(c);
933
934 if (!c->group)
935 return 0;
936
937 name = c->group;
938 r = get_group_creds(&name, gid);
939 if (r < 0)
940 return r;
941
942 *group = name;
943 return 0;
944}
945
cdc5d5c5
DH
946static int get_supplementary_groups(const ExecContext *c, const char *user,
947 const char *group, gid_t gid,
948 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
949 char **i;
950 int r, k = 0;
951 int ngroups_max;
952 bool keep_groups = false;
953 gid_t *groups = NULL;
954 _cleanup_free_ gid_t *l_gids = NULL;
955
956 assert(c);
957
bbeea271
DH
958 /*
959 * If user is given, then lookup GID and supplementary groups list.
960 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
961 * here and as early as possible so we keep the list of supplementary
962 * groups of the caller.
bbeea271
DH
963 */
964 if (user && gid_is_valid(gid) && gid != 0) {
965 /* First step, initialize groups from /etc/groups */
966 if (initgroups(user, gid) < 0)
967 return -errno;
968
969 keep_groups = true;
970 }
971
ac6e8be6 972 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
973 return 0;
974
366ddd25
DH
975 /*
976 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
977 * be positive, otherwise fail.
978 */
979 errno = 0;
980 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
981 if (ngroups_max <= 0) {
982 if (errno > 0)
983 return -errno;
984 else
985 return -EOPNOTSUPP; /* For all other values */
986 }
987
4d885bd3
DH
988 l_gids = new(gid_t, ngroups_max);
989 if (!l_gids)
990 return -ENOMEM;
81a2b7ce 991
4d885bd3
DH
992 if (keep_groups) {
993 /*
994 * Lookup the list of groups that the user belongs to, we
995 * avoid NSS lookups here too for gid=0.
996 */
997 k = ngroups_max;
998 if (getgrouplist(user, gid, l_gids, &k) < 0)
999 return -EINVAL;
1000 } else
1001 k = 0;
81a2b7ce 1002
4d885bd3
DH
1003 STRV_FOREACH(i, c->supplementary_groups) {
1004 const char *g;
81a2b7ce 1005
4d885bd3
DH
1006 if (k >= ngroups_max)
1007 return -E2BIG;
81a2b7ce 1008
4d885bd3
DH
1009 g = *i;
1010 r = get_group_creds(&g, l_gids+k);
1011 if (r < 0)
1012 return r;
81a2b7ce 1013
4d885bd3
DH
1014 k++;
1015 }
81a2b7ce 1016
4d885bd3
DH
1017 /*
1018 * Sets ngids to zero to drop all supplementary groups, happens
1019 * when we are under root and SupplementaryGroups= is empty.
1020 */
1021 if (k == 0) {
1022 *ngids = 0;
1023 return 0;
1024 }
81a2b7ce 1025
4d885bd3
DH
1026 /* Otherwise get the final list of supplementary groups */
1027 groups = memdup(l_gids, sizeof(gid_t) * k);
1028 if (!groups)
1029 return -ENOMEM;
1030
1031 *supplementary_gids = groups;
1032 *ngids = k;
1033
1034 groups = NULL;
1035
1036 return 0;
1037}
1038
34cf6c43 1039static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
4d885bd3
DH
1040 int r;
1041
709dbeac
YW
1042 /* Handle SupplementaryGroups= if it is not empty */
1043 if (ngids > 0) {
4d885bd3
DH
1044 r = maybe_setgroups(ngids, supplementary_gids);
1045 if (r < 0)
97f0e76f 1046 return r;
4d885bd3 1047 }
81a2b7ce 1048
4d885bd3
DH
1049 if (gid_is_valid(gid)) {
1050 /* Then set our gids */
1051 if (setresgid(gid, gid, gid) < 0)
1052 return -errno;
81a2b7ce
LP
1053 }
1054
1055 return 0;
1056}
1057
1058static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
1059 assert(context);
1060
4d885bd3
DH
1061 if (!uid_is_valid(uid))
1062 return 0;
1063
479050b3 1064 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
1065 * capabilities while doing so. */
1066
479050b3 1067 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
1068
1069 /* First step: If we need to keep capabilities but
1070 * drop privileges we need to make sure we keep our
cbb21cca 1071 * caps, while we drop privileges. */
693ced48 1072 if (uid != 0) {
cbb21cca 1073 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1074
1075 if (prctl(PR_GET_SECUREBITS) != sb)
1076 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1077 return -errno;
1078 }
81a2b7ce
LP
1079 }
1080
479050b3 1081 /* Second step: actually set the uids */
81a2b7ce
LP
1082 if (setresuid(uid, uid, uid) < 0)
1083 return -errno;
1084
1085 /* At this point we should have all necessary capabilities but
1086 are otherwise a normal user. However, the caps might got
1087 corrupted due to the setresuid() so we need clean them up
1088 later. This is done outside of this call. */
1089
1090 return 0;
1091}
1092
349cc4a5 1093#if HAVE_PAM
5b6319dc
LP
1094
1095static int null_conv(
1096 int num_msg,
1097 const struct pam_message **msg,
1098 struct pam_response **resp,
1099 void *appdata_ptr) {
1100
1101 /* We don't support conversations */
1102
1103 return PAM_CONV_ERR;
1104}
1105
cefc33ae
LP
1106#endif
1107
5b6319dc
LP
1108static int setup_pam(
1109 const char *name,
1110 const char *user,
940c5210 1111 uid_t uid,
2d6fce8d 1112 gid_t gid,
5b6319dc 1113 const char *tty,
2065ca69 1114 char ***env,
da6053d0 1115 int fds[], size_t n_fds) {
5b6319dc 1116
349cc4a5 1117#if HAVE_PAM
cefc33ae 1118
5b6319dc
LP
1119 static const struct pam_conv conv = {
1120 .conv = null_conv,
1121 .appdata_ptr = NULL
1122 };
1123
2d7c6aa2 1124 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1125 pam_handle_t *handle = NULL;
d6e5f3ad 1126 sigset_t old_ss;
7bb70b6e 1127 int pam_code = PAM_SUCCESS, r;
84eada2f 1128 char **nv, **e = NULL;
5b6319dc
LP
1129 bool close_session = false;
1130 pid_t pam_pid = 0, parent_pid;
970edce6 1131 int flags = 0;
5b6319dc
LP
1132
1133 assert(name);
1134 assert(user);
2065ca69 1135 assert(env);
5b6319dc
LP
1136
1137 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1138 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1139 * systemd via the cgroup logic. It will then remove the PAM
1140 * session again. The parent process will exec() the actual
1141 * daemon. We do things this way to ensure that the main PID
1142 * of the daemon is the one we initially fork()ed. */
1143
7bb70b6e
LP
1144 r = barrier_create(&barrier);
1145 if (r < 0)
2d7c6aa2
DH
1146 goto fail;
1147
553d2243 1148 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1149 flags |= PAM_SILENT;
1150
f546241b
ZJS
1151 pam_code = pam_start(name, user, &conv, &handle);
1152 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1153 handle = NULL;
1154 goto fail;
1155 }
1156
f546241b
ZJS
1157 if (tty) {
1158 pam_code = pam_set_item(handle, PAM_TTY, tty);
1159 if (pam_code != PAM_SUCCESS)
5b6319dc 1160 goto fail;
f546241b 1161 }
5b6319dc 1162
84eada2f
JW
1163 STRV_FOREACH(nv, *env) {
1164 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1165 if (pam_code != PAM_SUCCESS)
1166 goto fail;
1167 }
1168
970edce6 1169 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1170 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1171 goto fail;
1172
970edce6 1173 pam_code = pam_open_session(handle, flags);
f546241b 1174 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1175 goto fail;
1176
1177 close_session = true;
1178
f546241b
ZJS
1179 e = pam_getenvlist(handle);
1180 if (!e) {
5b6319dc
LP
1181 pam_code = PAM_BUF_ERR;
1182 goto fail;
1183 }
1184
1185 /* Block SIGTERM, so that we know that it won't get lost in
1186 * the child */
ce30c8dc 1187
72c0a2c2 1188 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1189
df0ff127 1190 parent_pid = getpid_cached();
5b6319dc 1191
4c253ed1
LP
1192 r = safe_fork("(sd-pam)", 0, &pam_pid);
1193 if (r < 0)
5b6319dc 1194 goto fail;
4c253ed1 1195 if (r == 0) {
7bb70b6e 1196 int sig, ret = EXIT_PAM;
5b6319dc
LP
1197
1198 /* The child's job is to reset the PAM session on
1199 * termination */
2d7c6aa2 1200 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc 1201
4c253ed1
LP
1202 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1203 * are open here that have been opened by PAM. */
1204 (void) close_many(fds, n_fds);
5b6319dc 1205
940c5210
AK
1206 /* Drop privileges - we don't need any to pam_close_session
1207 * and this will make PR_SET_PDEATHSIG work in most cases.
1208 * If this fails, ignore the error - but expect sd-pam threads
1209 * to fail to exit normally */
2d6fce8d 1210
97f0e76f
LP
1211 r = maybe_setgroups(0, NULL);
1212 if (r < 0)
1213 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1214 if (setresgid(gid, gid, gid) < 0)
1215 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1216 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1217 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1218
ce30c8dc
LP
1219 (void) ignore_signals(SIGPIPE, -1);
1220
940c5210
AK
1221 /* Wait until our parent died. This will only work if
1222 * the above setresuid() succeeds, otherwise the kernel
1223 * will not allow unprivileged parents kill their privileged
1224 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1225 * to do the rest for us. */
1226 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1227 goto child_finish;
1228
2d7c6aa2
DH
1229 /* Tell the parent that our setup is done. This is especially
1230 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1231 * setup might race against our setresuid(2) call.
1232 *
1233 * If the parent aborted, we'll detect this below, hence ignore
1234 * return failure here. */
1235 (void) barrier_place(&barrier);
2d7c6aa2 1236
643f4706 1237 /* Check if our parent process might already have died? */
5b6319dc 1238 if (getppid() == parent_pid) {
d6e5f3ad
DM
1239 sigset_t ss;
1240
1241 assert_se(sigemptyset(&ss) >= 0);
1242 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1243
3dead8d9
LP
1244 for (;;) {
1245 if (sigwait(&ss, &sig) < 0) {
1246 if (errno == EINTR)
1247 continue;
1248
1249 goto child_finish;
1250 }
5b6319dc 1251
3dead8d9
LP
1252 assert(sig == SIGTERM);
1253 break;
1254 }
5b6319dc
LP
1255 }
1256
3dead8d9 1257 /* If our parent died we'll end the session */
f546241b 1258 if (getppid() != parent_pid) {
970edce6 1259 pam_code = pam_close_session(handle, flags);
f546241b 1260 if (pam_code != PAM_SUCCESS)
5b6319dc 1261 goto child_finish;
f546241b 1262 }
5b6319dc 1263
7bb70b6e 1264 ret = 0;
5b6319dc
LP
1265
1266 child_finish:
970edce6 1267 pam_end(handle, pam_code | flags);
7bb70b6e 1268 _exit(ret);
5b6319dc
LP
1269 }
1270
2d7c6aa2
DH
1271 barrier_set_role(&barrier, BARRIER_PARENT);
1272
5b6319dc
LP
1273 /* If the child was forked off successfully it will do all the
1274 * cleanups, so forget about the handle here. */
1275 handle = NULL;
1276
3b8bddde 1277 /* Unblock SIGTERM again in the parent */
72c0a2c2 1278 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1279
1280 /* We close the log explicitly here, since the PAM modules
1281 * might have opened it, but we don't want this fd around. */
1282 closelog();
1283
2d7c6aa2
DH
1284 /* Synchronously wait for the child to initialize. We don't care for
1285 * errors as we cannot recover. However, warn loudly if it happens. */
1286 if (!barrier_place_and_sync(&barrier))
1287 log_error("PAM initialization failed");
1288
130d3d22 1289 return strv_free_and_replace(*env, e);
5b6319dc
LP
1290
1291fail:
970edce6
ZJS
1292 if (pam_code != PAM_SUCCESS) {
1293 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1294 r = -EPERM; /* PAM errors do not map to errno */
1295 } else
1296 log_error_errno(r, "PAM failed: %m");
9ba35398 1297
5b6319dc
LP
1298 if (handle) {
1299 if (close_session)
970edce6 1300 pam_code = pam_close_session(handle, flags);
5b6319dc 1301
970edce6 1302 pam_end(handle, pam_code | flags);
5b6319dc
LP
1303 }
1304
1305 strv_free(e);
5b6319dc
LP
1306 closelog();
1307
7bb70b6e 1308 return r;
cefc33ae
LP
1309#else
1310 return 0;
5b6319dc 1311#endif
cefc33ae 1312}
5b6319dc 1313
5d6b1584
LP
1314static void rename_process_from_path(const char *path) {
1315 char process_name[11];
1316 const char *p;
1317 size_t l;
1318
1319 /* This resulting string must fit in 10 chars (i.e. the length
1320 * of "/sbin/init") to look pretty in /bin/ps */
1321
2b6bf07d 1322 p = basename(path);
5d6b1584
LP
1323 if (isempty(p)) {
1324 rename_process("(...)");
1325 return;
1326 }
1327
1328 l = strlen(p);
1329 if (l > 8) {
1330 /* The end of the process name is usually more
1331 * interesting, since the first bit might just be
1332 * "systemd-" */
1333 p = p + l - 8;
1334 l = 8;
1335 }
1336
1337 process_name[0] = '(';
1338 memcpy(process_name+1, p, l);
1339 process_name[1+l] = ')';
1340 process_name[1+l+1] = 0;
1341
1342 rename_process(process_name);
1343}
1344
469830d1
LP
1345static bool context_has_address_families(const ExecContext *c) {
1346 assert(c);
1347
1348 return c->address_families_whitelist ||
1349 !set_isempty(c->address_families);
1350}
1351
1352static bool context_has_syscall_filters(const ExecContext *c) {
1353 assert(c);
1354
1355 return c->syscall_whitelist ||
8cfa775f 1356 !hashmap_isempty(c->syscall_filter);
469830d1
LP
1357}
1358
1359static bool context_has_no_new_privileges(const ExecContext *c) {
1360 assert(c);
1361
1362 if (c->no_new_privileges)
1363 return true;
1364
1365 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1366 return false;
1367
1368 /* We need NNP if we have any form of seccomp and are unprivileged */
1369 return context_has_address_families(c) ||
1370 c->memory_deny_write_execute ||
1371 c->restrict_realtime ||
1372 exec_context_restrict_namespaces_set(c) ||
1373 c->protect_kernel_tunables ||
1374 c->protect_kernel_modules ||
1375 c->private_devices ||
1376 context_has_syscall_filters(c) ||
78e864e5
TM
1377 !set_isempty(c->syscall_archs) ||
1378 c->lock_personality;
469830d1
LP
1379}
1380
349cc4a5 1381#if HAVE_SECCOMP
17df7223 1382
83f12b27 1383static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1384
1385 if (is_seccomp_available())
1386 return false;
1387
f673b62d 1388 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1389 return true;
83f12b27
FS
1390}
1391
165a31c0 1392static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1393 uint32_t negative_action, default_action, action;
165a31c0 1394 int r;
8351ceae 1395
469830d1 1396 assert(u);
c0467cf3 1397 assert(c);
8351ceae 1398
469830d1 1399 if (!context_has_syscall_filters(c))
83f12b27
FS
1400 return 0;
1401
469830d1
LP
1402 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1403 return 0;
e9642be2 1404
469830d1 1405 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1406
469830d1
LP
1407 if (c->syscall_whitelist) {
1408 default_action = negative_action;
1409 action = SCMP_ACT_ALLOW;
7c66bae2 1410 } else {
469830d1
LP
1411 default_action = SCMP_ACT_ALLOW;
1412 action = negative_action;
57183d11 1413 }
8351ceae 1414
165a31c0
LP
1415 if (needs_ambient_hack) {
1416 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1417 if (r < 0)
1418 return r;
1419 }
1420
469830d1 1421 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
4298d0b5
LP
1422}
1423
469830d1
LP
1424static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1425 assert(u);
4298d0b5
LP
1426 assert(c);
1427
469830d1 1428 if (set_isempty(c->syscall_archs))
83f12b27
FS
1429 return 0;
1430
469830d1
LP
1431 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1432 return 0;
4298d0b5 1433
469830d1
LP
1434 return seccomp_restrict_archs(c->syscall_archs);
1435}
4298d0b5 1436
469830d1
LP
1437static int apply_address_families(const Unit* u, const ExecContext *c) {
1438 assert(u);
1439 assert(c);
4298d0b5 1440
469830d1
LP
1441 if (!context_has_address_families(c))
1442 return 0;
4298d0b5 1443
469830d1
LP
1444 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1445 return 0;
4298d0b5 1446
469830d1 1447 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1448}
4298d0b5 1449
83f12b27 1450static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1451 assert(u);
f3e43635
TM
1452 assert(c);
1453
469830d1 1454 if (!c->memory_deny_write_execute)
83f12b27
FS
1455 return 0;
1456
469830d1
LP
1457 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1458 return 0;
f3e43635 1459
469830d1 1460 return seccomp_memory_deny_write_execute();
f3e43635
TM
1461}
1462
83f12b27 1463static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1464 assert(u);
f4170c67
LP
1465 assert(c);
1466
469830d1 1467 if (!c->restrict_realtime)
83f12b27
FS
1468 return 0;
1469
469830d1
LP
1470 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1471 return 0;
f4170c67 1472
469830d1 1473 return seccomp_restrict_realtime();
f4170c67
LP
1474}
1475
59e856c7 1476static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1477 assert(u);
59eeb84b
LP
1478 assert(c);
1479
1480 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1481 * let's protect even those systems where this is left on in the kernel. */
1482
469830d1 1483 if (!c->protect_kernel_tunables)
59eeb84b
LP
1484 return 0;
1485
469830d1
LP
1486 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1487 return 0;
59eeb84b 1488
469830d1 1489 return seccomp_protect_sysctl();
59eeb84b
LP
1490}
1491
59e856c7 1492static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1493 assert(u);
502d704e
DH
1494 assert(c);
1495
25a8d8a0 1496 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1497
469830d1
LP
1498 if (!c->protect_kernel_modules)
1499 return 0;
1500
502d704e
DH
1501 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1502 return 0;
1503
469830d1 1504 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
502d704e
DH
1505}
1506
59e856c7 1507static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1508 assert(u);
ba128bb8
LP
1509 assert(c);
1510
8f81a5f6 1511 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1512
469830d1
LP
1513 if (!c->private_devices)
1514 return 0;
1515
ba128bb8
LP
1516 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1517 return 0;
1518
469830d1 1519 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
ba128bb8
LP
1520}
1521
34cf6c43 1522static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
469830d1 1523 assert(u);
add00535
LP
1524 assert(c);
1525
1526 if (!exec_context_restrict_namespaces_set(c))
1527 return 0;
1528
1529 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1530 return 0;
1531
1532 return seccomp_restrict_namespaces(c->restrict_namespaces);
1533}
1534
78e864e5 1535static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1536 unsigned long personality;
1537 int r;
78e864e5
TM
1538
1539 assert(u);
1540 assert(c);
1541
1542 if (!c->lock_personality)
1543 return 0;
1544
1545 if (skip_seccomp_unavailable(u, "LockPersonality="))
1546 return 0;
1547
e8132d63
LP
1548 personality = c->personality;
1549
1550 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1551 if (personality == PERSONALITY_INVALID) {
1552
1553 r = opinionated_personality(&personality);
1554 if (r < 0)
1555 return r;
1556 }
78e864e5
TM
1557
1558 return seccomp_lock_personality(personality);
1559}
1560
c0467cf3 1561#endif
8351ceae 1562
31a7eb86
ZJS
1563static void do_idle_pipe_dance(int idle_pipe[4]) {
1564 assert(idle_pipe);
1565
54eb2300
LP
1566 idle_pipe[1] = safe_close(idle_pipe[1]);
1567 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1568
1569 if (idle_pipe[0] >= 0) {
1570 int r;
1571
1572 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1573
1574 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1575 ssize_t n;
1576
31a7eb86 1577 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1578 n = write(idle_pipe[3], "x", 1);
1579 if (n > 0)
cd972d69
ZJS
1580 /* Wait for systemd to react to the signal above. */
1581 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1582 }
1583
54eb2300 1584 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1585
1586 }
1587
54eb2300 1588 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1589}
1590
7cae38c4 1591static int build_environment(
34cf6c43 1592 const Unit *u,
9fa95f85 1593 const ExecContext *c,
1e22b5cd 1594 const ExecParameters *p,
da6053d0 1595 size_t n_fds,
7cae38c4
LP
1596 const char *home,
1597 const char *username,
1598 const char *shell,
7bce046b
LP
1599 dev_t journal_stream_dev,
1600 ino_t journal_stream_ino,
7cae38c4
LP
1601 char ***ret) {
1602
1603 _cleanup_strv_free_ char **our_env = NULL;
da6053d0 1604 size_t n_env = 0;
7cae38c4
LP
1605 char *x;
1606
4b58153d 1607 assert(u);
7cae38c4
LP
1608 assert(c);
1609 assert(ret);
1610
4b58153d 1611 our_env = new0(char*, 14);
7cae38c4
LP
1612 if (!our_env)
1613 return -ENOMEM;
1614
1615 if (n_fds > 0) {
8dd4c05b
LP
1616 _cleanup_free_ char *joined = NULL;
1617
df0ff127 1618 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1619 return -ENOMEM;
1620 our_env[n_env++] = x;
1621
da6053d0 1622 if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
7cae38c4
LP
1623 return -ENOMEM;
1624 our_env[n_env++] = x;
8dd4c05b 1625
1e22b5cd 1626 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1627 if (!joined)
1628 return -ENOMEM;
1629
605405c6 1630 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1631 if (!x)
1632 return -ENOMEM;
1633 our_env[n_env++] = x;
7cae38c4
LP
1634 }
1635
b08af3b1 1636 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1637 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1638 return -ENOMEM;
1639 our_env[n_env++] = x;
1640
1e22b5cd 1641 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1642 return -ENOMEM;
1643 our_env[n_env++] = x;
1644 }
1645
fd63e712
LP
1646 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1647 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1648 * check the database directly. */
ac647978 1649 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1650 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1651 if (!x)
1652 return -ENOMEM;
1653 our_env[n_env++] = x;
1654 }
1655
7cae38c4
LP
1656 if (home) {
1657 x = strappend("HOME=", home);
1658 if (!x)
1659 return -ENOMEM;
1660 our_env[n_env++] = x;
1661 }
1662
1663 if (username) {
1664 x = strappend("LOGNAME=", username);
1665 if (!x)
1666 return -ENOMEM;
1667 our_env[n_env++] = x;
1668
1669 x = strappend("USER=", username);
1670 if (!x)
1671 return -ENOMEM;
1672 our_env[n_env++] = x;
1673 }
1674
1675 if (shell) {
1676 x = strappend("SHELL=", shell);
1677 if (!x)
1678 return -ENOMEM;
1679 our_env[n_env++] = x;
1680 }
1681
4b58153d
LP
1682 if (!sd_id128_is_null(u->invocation_id)) {
1683 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1684 return -ENOMEM;
1685
1686 our_env[n_env++] = x;
1687 }
1688
6af760f3
LP
1689 if (exec_context_needs_term(c)) {
1690 const char *tty_path, *term = NULL;
1691
1692 tty_path = exec_context_tty_path(c);
1693
1694 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1695 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1696 * passes to PID 1 ends up all the way in the console login shown. */
1697
1698 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1699 term = getenv("TERM");
1700 if (!term)
1701 term = default_term_for_tty(tty_path);
7cae38c4 1702
6af760f3 1703 x = strappend("TERM=", term);
7cae38c4
LP
1704 if (!x)
1705 return -ENOMEM;
1706 our_env[n_env++] = x;
1707 }
1708
7bce046b
LP
1709 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1710 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1711 return -ENOMEM;
1712
1713 our_env[n_env++] = x;
1714 }
1715
7cae38c4 1716 our_env[n_env++] = NULL;
7bce046b 1717 assert(n_env <= 12);
7cae38c4 1718
ae2a15bc 1719 *ret = TAKE_PTR(our_env);
7cae38c4
LP
1720
1721 return 0;
1722}
1723
b4c14404
FB
1724static int build_pass_environment(const ExecContext *c, char ***ret) {
1725 _cleanup_strv_free_ char **pass_env = NULL;
1726 size_t n_env = 0, n_bufsize = 0;
1727 char **i;
1728
1729 STRV_FOREACH(i, c->pass_environment) {
1730 _cleanup_free_ char *x = NULL;
1731 char *v;
1732
1733 v = getenv(*i);
1734 if (!v)
1735 continue;
605405c6 1736 x = strjoin(*i, "=", v);
b4c14404
FB
1737 if (!x)
1738 return -ENOMEM;
00819cc1 1739
b4c14404
FB
1740 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1741 return -ENOMEM;
00819cc1 1742
1cc6c93a 1743 pass_env[n_env++] = TAKE_PTR(x);
b4c14404 1744 pass_env[n_env] = NULL;
b4c14404
FB
1745 }
1746
ae2a15bc 1747 *ret = TAKE_PTR(pass_env);
b4c14404
FB
1748
1749 return 0;
1750}
1751
8b44a3d2
LP
1752static bool exec_needs_mount_namespace(
1753 const ExecContext *context,
1754 const ExecParameters *params,
4657abb5 1755 const ExecRuntime *runtime) {
8b44a3d2
LP
1756
1757 assert(context);
1758 assert(params);
1759
915e6d16
LP
1760 if (context->root_image)
1761 return true;
1762
2a624c36
AP
1763 if (!strv_isempty(context->read_write_paths) ||
1764 !strv_isempty(context->read_only_paths) ||
1765 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1766 return true;
1767
42b1d8e0 1768 if (context->n_bind_mounts > 0)
d2d6c096
LP
1769 return true;
1770
2abd4e38
YW
1771 if (context->n_temporary_filesystems > 0)
1772 return true;
1773
8b44a3d2
LP
1774 if (context->mount_flags != 0)
1775 return true;
1776
1777 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1778 return true;
1779
8b44a3d2 1780 if (context->private_devices ||
228af36f 1781 context->private_mounts ||
8b44a3d2 1782 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1783 context->protect_home != PROTECT_HOME_NO ||
1784 context->protect_kernel_tunables ||
c575770b 1785 context->protect_kernel_modules ||
59eeb84b 1786 context->protect_control_groups)
8b44a3d2
LP
1787 return true;
1788
37c56f89
YW
1789 if (context->root_directory) {
1790 ExecDirectoryType t;
1791
1792 if (context->mount_apivfs)
1793 return true;
1794
1795 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1796 if (!params->prefix[t])
1797 continue;
1798
1799 if (!strv_isempty(context->directories[t].paths))
1800 return true;
1801 }
1802 }
5d997827 1803
42b1d8e0 1804 if (context->dynamic_user &&
b43ee82f 1805 (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
42b1d8e0
YW
1806 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1807 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1808 return true;
1809
8b44a3d2
LP
1810 return false;
1811}
1812
d251207d
LP
1813static int setup_private_users(uid_t uid, gid_t gid) {
1814 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1815 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1816 _cleanup_close_ int unshare_ready_fd = -1;
1817 _cleanup_(sigkill_waitp) pid_t pid = 0;
1818 uint64_t c = 1;
d251207d
LP
1819 ssize_t n;
1820 int r;
1821
1822 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1823 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1824 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1825 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1826 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1827 * continues execution normally. */
1828
587ab01b
ZJS
1829 if (uid != 0 && uid_is_valid(uid)) {
1830 r = asprintf(&uid_map,
1831 "0 0 1\n" /* Map root → root */
1832 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1833 uid, uid);
1834 if (r < 0)
1835 return -ENOMEM;
1836 } else {
e0f3720e 1837 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1838 if (!uid_map)
1839 return -ENOMEM;
1840 }
d251207d 1841
587ab01b
ZJS
1842 if (gid != 0 && gid_is_valid(gid)) {
1843 r = asprintf(&gid_map,
1844 "0 0 1\n" /* Map root → root */
1845 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1846 gid, gid);
1847 if (r < 0)
1848 return -ENOMEM;
1849 } else {
d251207d 1850 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1851 if (!gid_map)
1852 return -ENOMEM;
1853 }
d251207d
LP
1854
1855 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1856 * namespace. */
1857 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1858 if (unshare_ready_fd < 0)
1859 return -errno;
1860
1861 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1862 * failed. */
1863 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1864 return -errno;
1865
4c253ed1
LP
1866 r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
1867 if (r < 0)
1868 return r;
1869 if (r == 0) {
d251207d
LP
1870 _cleanup_close_ int fd = -1;
1871 const char *a;
1872 pid_t ppid;
1873
1874 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1875 * here, after the parent opened its own user namespace. */
1876
1877 ppid = getppid();
1878 errno_pipe[0] = safe_close(errno_pipe[0]);
1879
1880 /* Wait until the parent unshared the user namespace */
1881 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1882 r = -errno;
1883 goto child_fail;
1884 }
1885
1886 /* Disable the setgroups() system call in the child user namespace, for good. */
1887 a = procfs_file_alloca(ppid, "setgroups");
1888 fd = open(a, O_WRONLY|O_CLOEXEC);
1889 if (fd < 0) {
1890 if (errno != ENOENT) {
1891 r = -errno;
1892 goto child_fail;
1893 }
1894
1895 /* If the file is missing the kernel is too old, let's continue anyway. */
1896 } else {
1897 if (write(fd, "deny\n", 5) < 0) {
1898 r = -errno;
1899 goto child_fail;
1900 }
1901
1902 fd = safe_close(fd);
1903 }
1904
1905 /* First write the GID map */
1906 a = procfs_file_alloca(ppid, "gid_map");
1907 fd = open(a, O_WRONLY|O_CLOEXEC);
1908 if (fd < 0) {
1909 r = -errno;
1910 goto child_fail;
1911 }
1912 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1913 r = -errno;
1914 goto child_fail;
1915 }
1916 fd = safe_close(fd);
1917
1918 /* The write the UID map */
1919 a = procfs_file_alloca(ppid, "uid_map");
1920 fd = open(a, O_WRONLY|O_CLOEXEC);
1921 if (fd < 0) {
1922 r = -errno;
1923 goto child_fail;
1924 }
1925 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1926 r = -errno;
1927 goto child_fail;
1928 }
1929
1930 _exit(EXIT_SUCCESS);
1931
1932 child_fail:
1933 (void) write(errno_pipe[1], &r, sizeof(r));
1934 _exit(EXIT_FAILURE);
1935 }
1936
1937 errno_pipe[1] = safe_close(errno_pipe[1]);
1938
1939 if (unshare(CLONE_NEWUSER) < 0)
1940 return -errno;
1941
1942 /* Let the child know that the namespace is ready now */
1943 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1944 return -errno;
1945
1946 /* Try to read an error code from the child */
1947 n = read(errno_pipe[0], &r, sizeof(r));
1948 if (n < 0)
1949 return -errno;
1950 if (n == sizeof(r)) { /* an error code was sent to us */
1951 if (r < 0)
1952 return r;
1953 return -EIO;
1954 }
1955 if (n != 0) /* on success we should have read 0 bytes */
1956 return -EIO;
1957
2e87a1fd
LP
1958 r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
1959 pid = 0;
d251207d
LP
1960 if (r < 0)
1961 return r;
2e87a1fd 1962 if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
d251207d
LP
1963 return -EIO;
1964
1965 return 0;
1966}
1967
3536f49e 1968static int setup_exec_directory(
07689d5d
LP
1969 const ExecContext *context,
1970 const ExecParameters *params,
1971 uid_t uid,
3536f49e 1972 gid_t gid,
3536f49e
YW
1973 ExecDirectoryType type,
1974 int *exit_status) {
07689d5d 1975
72fd1768 1976 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
1977 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
1978 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
1979 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
1980 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
1981 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
1982 };
07689d5d
LP
1983 char **rt;
1984 int r;
1985
1986 assert(context);
1987 assert(params);
72fd1768 1988 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 1989 assert(exit_status);
07689d5d 1990
3536f49e
YW
1991 if (!params->prefix[type])
1992 return 0;
1993
8679efde 1994 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
1995 if (!uid_is_valid(uid))
1996 uid = 0;
1997 if (!gid_is_valid(gid))
1998 gid = 0;
1999 }
2000
2001 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d 2002 _cleanup_free_ char *p = NULL, *pp = NULL;
07689d5d 2003
3536f49e
YW
2004 p = strjoin(params->prefix[type], "/", *rt);
2005 if (!p) {
2006 r = -ENOMEM;
2007 goto fail;
2008 }
07689d5d 2009
23a7448e
YW
2010 r = mkdir_parents_label(p, 0755);
2011 if (r < 0)
3536f49e 2012 goto fail;
23a7448e 2013
8092a48c
YW
2014 if (context->dynamic_user &&
2015 !IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
6c47cd7d
LP
2016 _cleanup_free_ char *private_root = NULL, *relative = NULL, *parent = NULL;
2017
2018 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
2019 * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
2020 * whose UID is later on reused. To lock this down we use the same trick used by container
2021 * managers to prohibit host users to get access to files of the same UID in containers: we
2022 * place everything inside a directory that has an access mode of 0700 and is owned root:root,
2023 * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
2024 * to make this directory permeable for the service itself.
2025 *
2026 * Specifically: for a service which wants a special directory "foo/" we first create a
2027 * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
2028 * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
2029 * privileged host users can access "foo/" as usual, but unprivileged host users can't look
2030 * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
2031 * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
2032 * disabling the access boundary for the service and making sure it only gets access to the
2033 * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
2034 *
2035 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
8092a48c
YW
2036 * owned by the service itself.
2037 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
2038 * files or sockets with other services. */
6c47cd7d
LP
2039
2040 private_root = strjoin(params->prefix[type], "/private");
2041 if (!private_root) {
2042 r = -ENOMEM;
2043 goto fail;
2044 }
2045
2046 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
37c1d5e9 2047 r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
6c47cd7d
LP
2048 if (r < 0)
2049 goto fail;
2050
2051 pp = strjoin(private_root, "/", *rt);
2052 if (!pp) {
2053 r = -ENOMEM;
2054 goto fail;
2055 }
2056
2057 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2058 r = mkdir_parents_label(pp, 0755);
2059 if (r < 0)
2060 goto fail;
2061
949befd3
LP
2062 if (is_dir(p, false) > 0 &&
2063 (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
2064
2065 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2066 * it over. Most likely the service has been upgraded from one that didn't use
2067 * DynamicUser=1, to one that does. */
2068
2069 if (rename(p, pp) < 0) {
2070 r = -errno;
2071 goto fail;
2072 }
2073 } else {
2074 /* Otherwise, create the actual directory for the service */
2075
2076 r = mkdir_label(pp, context->directories[type].mode);
2077 if (r < 0 && r != -EEXIST)
2078 goto fail;
2079 }
6c47cd7d
LP
2080
2081 parent = dirname_malloc(p);
2082 if (!parent) {
2083 r = -ENOMEM;
2084 goto fail;
2085 }
2086
2087 r = path_make_relative(parent, pp, &relative);
2088 if (r < 0)
2089 goto fail;
2090
2091 /* And link it up from the original place */
2092 r = symlink_idempotent(relative, p);
2093 if (r < 0)
2094 goto fail;
2095
30c81ce2
ZJS
2096 /* Lock down the access mode */
2097 if (chmod(pp, context->directories[type].mode) < 0) {
2098 r = -errno;
2099 goto fail;
2100 }
6c47cd7d
LP
2101 } else {
2102 r = mkdir_label(p, context->directories[type].mode);
fdff1da2 2103 if (r < 0 && r != -EEXIST)
6c47cd7d 2104 goto fail;
fdff1da2
YW
2105 if (r == -EEXIST && !context->dynamic_user)
2106 continue;
a1164ae3 2107 }
07689d5d 2108
c71b2eb7
LP
2109 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
2110 * a service, and shall not be writable. */
2111 if (type == EXEC_DIRECTORY_CONFIGURATION)
2112 continue;
2113
a1164ae3 2114 /* Then, change the ownership of the whole tree, if necessary */
30c81ce2 2115 r = path_chown_recursive(pp ?: p, uid, gid);
07689d5d 2116 if (r < 0)
3536f49e 2117 goto fail;
07689d5d
LP
2118 }
2119
2120 return 0;
3536f49e
YW
2121
2122fail:
2123 *exit_status = exit_status_table[type];
3536f49e 2124 return r;
07689d5d
LP
2125}
2126
92b423b9 2127#if ENABLE_SMACK
cefc33ae
LP
2128static int setup_smack(
2129 const ExecContext *context,
2130 const ExecCommand *command) {
2131
cefc33ae
LP
2132 int r;
2133
2134 assert(context);
2135 assert(command);
2136
cefc33ae
LP
2137 if (context->smack_process_label) {
2138 r = mac_smack_apply_pid(0, context->smack_process_label);
2139 if (r < 0)
2140 return r;
2141 }
2142#ifdef SMACK_DEFAULT_PROCESS_LABEL
2143 else {
2144 _cleanup_free_ char *exec_label = NULL;
2145
2146 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2147 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2148 return r;
2149
2150 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2151 if (r < 0)
2152 return r;
2153 }
cefc33ae
LP
2154#endif
2155
2156 return 0;
2157}
92b423b9 2158#endif
cefc33ae 2159
6c47cd7d
LP
2160static int compile_bind_mounts(
2161 const ExecContext *context,
2162 const ExecParameters *params,
2163 BindMount **ret_bind_mounts,
da6053d0 2164 size_t *ret_n_bind_mounts,
6c47cd7d
LP
2165 char ***ret_empty_directories) {
2166
2167 _cleanup_strv_free_ char **empty_directories = NULL;
2168 BindMount *bind_mounts;
da6053d0 2169 size_t n, h = 0, i;
6c47cd7d
LP
2170 ExecDirectoryType t;
2171 int r;
2172
2173 assert(context);
2174 assert(params);
2175 assert(ret_bind_mounts);
2176 assert(ret_n_bind_mounts);
2177 assert(ret_empty_directories);
2178
2179 n = context->n_bind_mounts;
2180 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2181 if (!params->prefix[t])
2182 continue;
2183
2184 n += strv_length(context->directories[t].paths);
2185 }
2186
2187 if (n <= 0) {
2188 *ret_bind_mounts = NULL;
2189 *ret_n_bind_mounts = 0;
2190 *ret_empty_directories = NULL;
2191 return 0;
2192 }
2193
2194 bind_mounts = new(BindMount, n);
2195 if (!bind_mounts)
2196 return -ENOMEM;
2197
a8cabc61 2198 for (i = 0; i < context->n_bind_mounts; i++) {
6c47cd7d
LP
2199 BindMount *item = context->bind_mounts + i;
2200 char *s, *d;
2201
2202 s = strdup(item->source);
2203 if (!s) {
2204 r = -ENOMEM;
2205 goto finish;
2206 }
2207
2208 d = strdup(item->destination);
2209 if (!d) {
2210 free(s);
2211 r = -ENOMEM;
2212 goto finish;
2213 }
2214
2215 bind_mounts[h++] = (BindMount) {
2216 .source = s,
2217 .destination = d,
2218 .read_only = item->read_only,
2219 .recursive = item->recursive,
2220 .ignore_enoent = item->ignore_enoent,
2221 };
2222 }
2223
2224 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2225 char **suffix;
2226
2227 if (!params->prefix[t])
2228 continue;
2229
2230 if (strv_isempty(context->directories[t].paths))
2231 continue;
2232
8092a48c 2233 if (context->dynamic_user &&
5609f688
YW
2234 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2235 !(context->root_directory || context->root_image)) {
6c47cd7d
LP
2236 char *private_root;
2237
2238 /* So this is for a dynamic user, and we need to make sure the process can access its own
2239 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2240 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2241
2242 private_root = strjoin(params->prefix[t], "/private");
2243 if (!private_root) {
2244 r = -ENOMEM;
2245 goto finish;
2246 }
2247
2248 r = strv_consume(&empty_directories, private_root);
a635a7ae 2249 if (r < 0)
6c47cd7d 2250 goto finish;
6c47cd7d
LP
2251 }
2252
2253 STRV_FOREACH(suffix, context->directories[t].paths) {
2254 char *s, *d;
2255
8092a48c
YW
2256 if (context->dynamic_user &&
2257 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
6c47cd7d
LP
2258 s = strjoin(params->prefix[t], "/private/", *suffix);
2259 else
2260 s = strjoin(params->prefix[t], "/", *suffix);
2261 if (!s) {
2262 r = -ENOMEM;
2263 goto finish;
2264 }
2265
5609f688
YW
2266 if (context->dynamic_user &&
2267 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2268 (context->root_directory || context->root_image))
2269 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2270 * directory is not created on the root directory. So, let's bind-mount the directory
2271 * on the 'non-private' place. */
2272 d = strjoin(params->prefix[t], "/", *suffix);
2273 else
2274 d = strdup(s);
6c47cd7d
LP
2275 if (!d) {
2276 free(s);
2277 r = -ENOMEM;
2278 goto finish;
2279 }
2280
2281 bind_mounts[h++] = (BindMount) {
2282 .source = s,
2283 .destination = d,
2284 .read_only = false,
2285 .recursive = true,
2286 .ignore_enoent = false,
2287 };
2288 }
2289 }
2290
2291 assert(h == n);
2292
2293 *ret_bind_mounts = bind_mounts;
2294 *ret_n_bind_mounts = n;
ae2a15bc 2295 *ret_empty_directories = TAKE_PTR(empty_directories);
6c47cd7d
LP
2296
2297 return (int) n;
2298
2299finish:
2300 bind_mount_free_many(bind_mounts, h);
2301 return r;
2302}
2303
6818c54c 2304static int apply_mount_namespace(
34cf6c43
YW
2305 const Unit *u,
2306 const ExecCommand *command,
6818c54c
LP
2307 const ExecContext *context,
2308 const ExecParameters *params,
34cf6c43 2309 const ExecRuntime *runtime) {
6818c54c 2310
7bcef4ef 2311 _cleanup_strv_free_ char **empty_directories = NULL;
93c6bb51 2312 char *tmp = NULL, *var = NULL;
915e6d16 2313 const char *root_dir = NULL, *root_image = NULL;
228af36f 2314 NamespaceInfo ns_info;
165a31c0 2315 bool needs_sandboxing;
6c47cd7d 2316 BindMount *bind_mounts = NULL;
da6053d0 2317 size_t n_bind_mounts = 0;
6818c54c 2318 int r;
93c6bb51 2319
2b3c1b9e
DH
2320 assert(context);
2321
93c6bb51
DH
2322 /* The runtime struct only contains the parent of the private /tmp,
2323 * which is non-accessible to world users. Inside of it there's a /tmp
2324 * that is sticky, and that's the one we want to use here. */
2325
2326 if (context->private_tmp && runtime) {
2327 if (runtime->tmp_dir)
2328 tmp = strjoina(runtime->tmp_dir, "/tmp");
2329 if (runtime->var_tmp_dir)
2330 var = strjoina(runtime->var_tmp_dir, "/tmp");
2331 }
2332
915e6d16
LP
2333 if (params->flags & EXEC_APPLY_CHROOT) {
2334 root_image = context->root_image;
2335
2336 if (!root_image)
2337 root_dir = context->root_directory;
2338 }
93c6bb51 2339
6c47cd7d
LP
2340 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2341 if (r < 0)
2342 return r;
2343
165a31c0 2344 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
b5a33299
YW
2345 if (needs_sandboxing)
2346 ns_info = (NamespaceInfo) {
2347 .ignore_protect_paths = false,
2348 .private_dev = context->private_devices,
2349 .protect_control_groups = context->protect_control_groups,
2350 .protect_kernel_tunables = context->protect_kernel_tunables,
2351 .protect_kernel_modules = context->protect_kernel_modules,
2352 .mount_apivfs = context->mount_apivfs,
228af36f 2353 .private_mounts = context->private_mounts,
b5a33299 2354 };
228af36f
LP
2355 else if (!context->dynamic_user && root_dir)
2356 /*
2357 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2358 * sandbox info, otherwise enforce it, don't ignore protected paths and
2359 * fail if we are enable to apply the sandbox inside the mount namespace.
2360 */
2361 ns_info = (NamespaceInfo) {
2362 .ignore_protect_paths = true,
2363 };
2364 else
2365 ns_info = (NamespaceInfo) {};
b5a33299 2366
915e6d16 2367 r = setup_namespace(root_dir, root_image,
7bcef4ef 2368 &ns_info, context->read_write_paths,
165a31c0
LP
2369 needs_sandboxing ? context->read_only_paths : NULL,
2370 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2371 empty_directories,
2372 bind_mounts,
2373 n_bind_mounts,
2abd4e38
YW
2374 context->temporary_filesystems,
2375 context->n_temporary_filesystems,
93c6bb51
DH
2376 tmp,
2377 var,
165a31c0
LP
2378 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2379 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16
LP
2380 context->mount_flags,
2381 DISSECT_IMAGE_DISCARD_ON_LOOP);
93c6bb51 2382
6c47cd7d
LP
2383 bind_mount_free_many(bind_mounts, n_bind_mounts);
2384
93c6bb51
DH
2385 /* If we couldn't set up the namespace this is probably due to a
2386 * missing capability. In this case, silently proceeed. */
2387 if (IN_SET(r, -EPERM, -EACCES)) {
93c6bb51 2388 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
86ffb325 2389 return 0;
93c6bb51
DH
2390 }
2391
2392 return r;
2393}
2394
915e6d16
LP
2395static int apply_working_directory(
2396 const ExecContext *context,
2397 const ExecParameters *params,
2398 const char *home,
376fecf6
LP
2399 const bool needs_mount_ns,
2400 int *exit_status) {
915e6d16 2401
6732edab 2402 const char *d, *wd;
2b3c1b9e
DH
2403
2404 assert(context);
376fecf6 2405 assert(exit_status);
2b3c1b9e 2406
6732edab
LP
2407 if (context->working_directory_home) {
2408
376fecf6
LP
2409 if (!home) {
2410 *exit_status = EXIT_CHDIR;
6732edab 2411 return -ENXIO;
376fecf6 2412 }
6732edab 2413
2b3c1b9e 2414 wd = home;
6732edab
LP
2415
2416 } else if (context->working_directory)
2b3c1b9e
DH
2417 wd = context->working_directory;
2418 else
2419 wd = "/";
e7f1e7c6
DH
2420
2421 if (params->flags & EXEC_APPLY_CHROOT) {
2422 if (!needs_mount_ns && context->root_directory)
376fecf6
LP
2423 if (chroot(context->root_directory) < 0) {
2424 *exit_status = EXIT_CHROOT;
e7f1e7c6 2425 return -errno;
376fecf6 2426 }
e7f1e7c6 2427
2b3c1b9e
DH
2428 d = wd;
2429 } else
3b0e5bb5 2430 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2431
376fecf6
LP
2432 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2433 *exit_status = EXIT_CHDIR;
2b3c1b9e 2434 return -errno;
376fecf6 2435 }
e7f1e7c6
DH
2436
2437 return 0;
2438}
2439
b1edf445 2440static int setup_keyring(
34cf6c43 2441 const Unit *u,
b1edf445
LP
2442 const ExecContext *context,
2443 const ExecParameters *p,
2444 uid_t uid, gid_t gid) {
2445
74dd6b51 2446 key_serial_t keyring;
e64c2d0b
DJL
2447 int r = 0;
2448 uid_t saved_uid;
2449 gid_t saved_gid;
74dd6b51
LP
2450
2451 assert(u);
b1edf445 2452 assert(context);
74dd6b51
LP
2453 assert(p);
2454
2455 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2456 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2457 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2458 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2459 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2460 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2461
2462 if (!(p->flags & EXEC_NEW_KEYRING))
2463 return 0;
2464
b1edf445
LP
2465 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2466 return 0;
2467
e64c2d0b
DJL
2468 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2469 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2470 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2471 * & group is just as nasty as acquiring a reference to the user keyring. */
2472
2473 saved_uid = getuid();
2474 saved_gid = getgid();
2475
2476 if (gid_is_valid(gid) && gid != saved_gid) {
2477 if (setregid(gid, -1) < 0)
2478 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2479 }
2480
2481 if (uid_is_valid(uid) && uid != saved_uid) {
2482 if (setreuid(uid, -1) < 0) {
2483 r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2484 goto out;
2485 }
2486 }
2487
74dd6b51
LP
2488 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2489 if (keyring == -1) {
2490 if (errno == ENOSYS)
8002fb97 2491 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2492 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2493 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2494 else if (errno == EDQUOT)
8002fb97 2495 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2496 else
e64c2d0b 2497 r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51 2498
e64c2d0b 2499 goto out;
74dd6b51
LP
2500 }
2501
e64c2d0b
DJL
2502 /* When requested link the user keyring into the session keyring. */
2503 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2504
2505 if (keyctl(KEYCTL_LINK,
2506 KEY_SPEC_USER_KEYRING,
2507 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2508 r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
2509 goto out;
2510 }
2511 }
2512
2513 /* Restore uid/gid back */
2514 if (uid_is_valid(uid) && uid != saved_uid) {
2515 if (setreuid(saved_uid, -1) < 0) {
2516 r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2517 goto out;
2518 }
2519 }
2520
2521 if (gid_is_valid(gid) && gid != saved_gid) {
2522 if (setregid(saved_gid, -1) < 0)
2523 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2524 }
2525
2526 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
b3415f5d
LP
2527 if (!sd_id128_is_null(u->invocation_id)) {
2528 key_serial_t key;
2529
2530 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2531 if (key == -1)
8002fb97 2532 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2533 else {
2534 if (keyctl(KEYCTL_SETPERM, key,
2535 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2536 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
e64c2d0b 2537 r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2538 }
2539 }
2540
e64c2d0b
DJL
2541out:
2542 /* Revert back uid & gid for the the last time, and exit */
2543 /* no extra logging, as only the first already reported error matters */
2544 if (getuid() != saved_uid)
2545 (void) setreuid(saved_uid, -1);
b1edf445 2546
e64c2d0b
DJL
2547 if (getgid() != saved_gid)
2548 (void) setregid(saved_gid, -1);
b1edf445 2549
e64c2d0b 2550 return r;
74dd6b51
LP
2551}
2552
da6053d0 2553static void append_socket_pair(int *array, size_t *n, const int pair[2]) {
29206d46
LP
2554 assert(array);
2555 assert(n);
2556
2557 if (!pair)
2558 return;
2559
2560 if (pair[0] >= 0)
2561 array[(*n)++] = pair[0];
2562 if (pair[1] >= 0)
2563 array[(*n)++] = pair[1];
2564}
2565
a34ceba6
LP
2566static int close_remaining_fds(
2567 const ExecParameters *params,
34cf6c43
YW
2568 const ExecRuntime *runtime,
2569 const DynamicCreds *dcreds,
00d9ef85 2570 int user_lookup_fd,
a34ceba6 2571 int socket_fd,
da6053d0 2572 int *fds, size_t n_fds) {
a34ceba6 2573
da6053d0 2574 size_t n_dont_close = 0;
00d9ef85 2575 int dont_close[n_fds + 12];
a34ceba6
LP
2576
2577 assert(params);
2578
2579 if (params->stdin_fd >= 0)
2580 dont_close[n_dont_close++] = params->stdin_fd;
2581 if (params->stdout_fd >= 0)
2582 dont_close[n_dont_close++] = params->stdout_fd;
2583 if (params->stderr_fd >= 0)
2584 dont_close[n_dont_close++] = params->stderr_fd;
2585
2586 if (socket_fd >= 0)
2587 dont_close[n_dont_close++] = socket_fd;
2588 if (n_fds > 0) {
2589 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2590 n_dont_close += n_fds;
2591 }
2592
29206d46
LP
2593 if (runtime)
2594 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2595
2596 if (dcreds) {
2597 if (dcreds->user)
2598 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2599 if (dcreds->group)
2600 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2601 }
2602
00d9ef85
LP
2603 if (user_lookup_fd >= 0)
2604 dont_close[n_dont_close++] = user_lookup_fd;
2605
a34ceba6
LP
2606 return close_all_fds(dont_close, n_dont_close);
2607}
2608
00d9ef85
LP
2609static int send_user_lookup(
2610 Unit *unit,
2611 int user_lookup_fd,
2612 uid_t uid,
2613 gid_t gid) {
2614
2615 assert(unit);
2616
2617 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2618 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2619 * specified. */
2620
2621 if (user_lookup_fd < 0)
2622 return 0;
2623
2624 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2625 return 0;
2626
2627 if (writev(user_lookup_fd,
2628 (struct iovec[]) {
e6a7ec4b
LP
2629 IOVEC_INIT(&uid, sizeof(uid)),
2630 IOVEC_INIT(&gid, sizeof(gid)),
2631 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2632 return -errno;
2633
2634 return 0;
2635}
2636
6732edab
LP
2637static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2638 int r;
2639
2640 assert(c);
2641 assert(home);
2642 assert(buf);
2643
2644 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2645
2646 if (*home)
2647 return 0;
2648
2649 if (!c->working_directory_home)
2650 return 0;
2651
2652 if (uid == 0) {
2653 /* Hardcode /root as home directory for UID 0 */
2654 *home = "/root";
2655 return 1;
2656 }
2657
2658 r = get_home_dir(buf);
2659 if (r < 0)
2660 return r;
2661
2662 *home = *buf;
2663 return 1;
2664}
2665
da50b85a
LP
2666static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2667 _cleanup_strv_free_ char ** list = NULL;
2668 ExecDirectoryType t;
2669 int r;
2670
2671 assert(c);
2672 assert(p);
2673 assert(ret);
2674
2675 assert(c->dynamic_user);
2676
2677 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2678 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2679 * directories. */
2680
2681 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2682 char **i;
2683
2684 if (t == EXEC_DIRECTORY_CONFIGURATION)
2685 continue;
2686
2687 if (!p->prefix[t])
2688 continue;
2689
2690 STRV_FOREACH(i, c->directories[t].paths) {
2691 char *e;
2692
8092a48c
YW
2693 if (t == EXEC_DIRECTORY_RUNTIME)
2694 e = strjoin(p->prefix[t], "/", *i);
2695 else
2696 e = strjoin(p->prefix[t], "/private/", *i);
da50b85a
LP
2697 if (!e)
2698 return -ENOMEM;
2699
2700 r = strv_consume(&list, e);
2701 if (r < 0)
2702 return r;
2703 }
2704 }
2705
ae2a15bc 2706 *ret = TAKE_PTR(list);
da50b85a
LP
2707
2708 return 0;
2709}
2710
34cf6c43
YW
2711static char *exec_command_line(char **argv);
2712
ff0af2a1 2713static int exec_child(
f2341e0a 2714 Unit *unit,
34cf6c43 2715 const ExecCommand *command,
ff0af2a1
LP
2716 const ExecContext *context,
2717 const ExecParameters *params,
2718 ExecRuntime *runtime,
29206d46 2719 DynamicCreds *dcreds,
ff0af2a1
LP
2720 char **argv,
2721 int socket_fd,
52c239d7 2722 int named_iofds[3],
4c47affc 2723 int *fds,
da6053d0
LP
2724 size_t n_storage_fds,
2725 size_t n_socket_fds,
ff0af2a1 2726 char **files_env,
00d9ef85 2727 int user_lookup_fd,
12145637 2728 int *exit_status) {
d35fbf6b 2729
2065ca69 2730 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
7f59dd35 2731 _cleanup_free_ char *home_buffer = NULL;
4d885bd3
DH
2732 _cleanup_free_ gid_t *supplementary_gids = NULL;
2733 const char *username = NULL, *groupname = NULL;
2b3c1b9e 2734 const char *home = NULL, *shell = NULL;
7bce046b
LP
2735 dev_t journal_stream_dev = 0;
2736 ino_t journal_stream_ino = 0;
165a31c0
LP
2737 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2738 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2739 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2740 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 2741#if HAVE_SELINUX
7f59dd35 2742 _cleanup_free_ char *mac_selinux_context_net = NULL;
43b1f709 2743 bool use_selinux = false;
ecfbc84f 2744#endif
f9fa32f0 2745#if ENABLE_SMACK
43b1f709 2746 bool use_smack = false;
ecfbc84f 2747#endif
349cc4a5 2748#if HAVE_APPARMOR
43b1f709 2749 bool use_apparmor = false;
ecfbc84f 2750#endif
fed1e721
LP
2751 uid_t uid = UID_INVALID;
2752 gid_t gid = GID_INVALID;
34a5df58 2753 int r, ngids = 0;
da6053d0 2754 size_t n_fds;
3536f49e 2755 ExecDirectoryType dt;
165a31c0 2756 int secure_bits;
034c6ed7 2757
f2341e0a 2758 assert(unit);
5cb5a6ff
LP
2759 assert(command);
2760 assert(context);
d35fbf6b 2761 assert(params);
ff0af2a1 2762 assert(exit_status);
d35fbf6b
DM
2763
2764 rename_process_from_path(command->path);
2765
2766 /* We reset exactly these signals, since they are the
2767 * only ones we set to SIG_IGN in the main daemon. All
2768 * others we leave untouched because we set them to
2769 * SIG_DFL or a valid handler initially, both of which
2770 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2771 (void) default_signals(SIGNALS_CRASH_HANDLER,
2772 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2773
2774 if (context->ignore_sigpipe)
ce30c8dc 2775 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2776
ff0af2a1
LP
2777 r = reset_signal_mask();
2778 if (r < 0) {
2779 *exit_status = EXIT_SIGNAL_MASK;
12145637 2780 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 2781 }
034c6ed7 2782
d35fbf6b
DM
2783 if (params->idle_pipe)
2784 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2785
2c027c62
LP
2786 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2787 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2788 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2789 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 2790
d35fbf6b 2791 log_forget_fds();
2c027c62 2792 log_set_open_when_needed(true);
4f2d528d 2793
40a80078
LP
2794 /* In case anything used libc syslog(), close this here, too */
2795 closelog();
2796
4c47affc 2797 n_fds = n_storage_fds + n_socket_fds;
00d9ef85 2798 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
ff0af2a1
LP
2799 if (r < 0) {
2800 *exit_status = EXIT_FDS;
12145637 2801 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
2802 }
2803
d35fbf6b
DM
2804 if (!context->same_pgrp)
2805 if (setsid() < 0) {
ff0af2a1 2806 *exit_status = EXIT_SETSID;
12145637 2807 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 2808 }
9e2f7c11 2809
1e22b5cd 2810 exec_context_tty_reset(context, params);
d35fbf6b 2811
c891efaf 2812 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 2813 const char *vc = params->confirm_spawn;
3b20f877
FB
2814 _cleanup_free_ char *cmdline = NULL;
2815
2816 cmdline = exec_command_line(argv);
2817 if (!cmdline) {
0460aa5c 2818 *exit_status = EXIT_MEMORY;
12145637 2819 return log_oom();
3b20f877 2820 }
d35fbf6b 2821
eedf223a 2822 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
2823 if (r != CONFIRM_EXECUTE) {
2824 if (r == CONFIRM_PRETEND_SUCCESS) {
2825 *exit_status = EXIT_SUCCESS;
2826 return 0;
2827 }
ff0af2a1 2828 *exit_status = EXIT_CONFIRM;
12145637 2829 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 2830 return -ECANCELED;
d35fbf6b
DM
2831 }
2832 }
1a63a750 2833
29206d46 2834 if (context->dynamic_user && dcreds) {
da50b85a 2835 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 2836
409093fe
LP
2837 /* Make sure we bypass our own NSS module for any NSS checks */
2838 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2839 *exit_status = EXIT_USER;
12145637 2840 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
2841 }
2842
da50b85a
LP
2843 r = compile_suggested_paths(context, params, &suggested_paths);
2844 if (r < 0) {
2845 *exit_status = EXIT_MEMORY;
2846 return log_oom();
2847 }
2848
2849 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
2850 if (r < 0) {
2851 *exit_status = EXIT_USER;
e2b0cc34
YW
2852 if (r == -EILSEQ) {
2853 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
2854 return -EOPNOTSUPP;
2855 }
12145637 2856 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 2857 }
524daa8c 2858
70dd455c 2859 if (!uid_is_valid(uid)) {
29206d46 2860 *exit_status = EXIT_USER;
12145637 2861 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
2862 return -ESRCH;
2863 }
2864
2865 if (!gid_is_valid(gid)) {
2866 *exit_status = EXIT_USER;
12145637 2867 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
2868 return -ESRCH;
2869 }
5bc7452b 2870
29206d46
LP
2871 if (dcreds->user)
2872 username = dcreds->user->name;
2873
2874 } else {
4d885bd3
DH
2875 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2876 if (r < 0) {
2877 *exit_status = EXIT_USER;
12145637 2878 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 2879 }
5bc7452b 2880
4d885bd3
DH
2881 r = get_fixed_group(context, &groupname, &gid);
2882 if (r < 0) {
2883 *exit_status = EXIT_GROUP;
12145637 2884 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 2885 }
cdc5d5c5 2886 }
29206d46 2887
cdc5d5c5
DH
2888 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2889 r = get_supplementary_groups(context, username, groupname, gid,
2890 &supplementary_gids, &ngids);
2891 if (r < 0) {
2892 *exit_status = EXIT_GROUP;
12145637 2893 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 2894 }
5bc7452b 2895
00d9ef85
LP
2896 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2897 if (r < 0) {
2898 *exit_status = EXIT_USER;
12145637 2899 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
2900 }
2901
2902 user_lookup_fd = safe_close(user_lookup_fd);
2903
6732edab
LP
2904 r = acquire_home(context, uid, &home, &home_buffer);
2905 if (r < 0) {
2906 *exit_status = EXIT_CHDIR;
12145637 2907 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
2908 }
2909
d35fbf6b
DM
2910 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2911 * must sure to drop O_NONBLOCK */
2912 if (socket_fd >= 0)
a34ceba6 2913 (void) fd_nonblock(socket_fd, false);
acbb0225 2914
52c239d7 2915 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
2916 if (r < 0) {
2917 *exit_status = EXIT_STDIN;
12145637 2918 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 2919 }
034c6ed7 2920
52c239d7 2921 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2922 if (r < 0) {
2923 *exit_status = EXIT_STDOUT;
12145637 2924 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
2925 }
2926
52c239d7 2927 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2928 if (r < 0) {
2929 *exit_status = EXIT_STDERR;
12145637 2930 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
2931 }
2932
2933 if (params->cgroup_path) {
ff0af2a1
LP
2934 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2935 if (r < 0) {
2936 *exit_status = EXIT_CGROUP;
12145637 2937 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path);
309bff19 2938 }
d35fbf6b 2939 }
309bff19 2940
d35fbf6b 2941 if (context->oom_score_adjust_set) {
9f8168eb
LP
2942 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
2943 * prohibit write access to this file, and we shouldn't trip up over that. */
2944 r = set_oom_score_adjust(context->oom_score_adjust);
12145637 2945 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 2946 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 2947 else if (r < 0) {
ff0af2a1 2948 *exit_status = EXIT_OOM_ADJUST;
12145637 2949 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 2950 }
d35fbf6b
DM
2951 }
2952
2953 if (context->nice_set)
2954 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 2955 *exit_status = EXIT_NICE;
12145637 2956 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
613b411c
LP
2957 }
2958
d35fbf6b
DM
2959 if (context->cpu_sched_set) {
2960 struct sched_param param = {
2961 .sched_priority = context->cpu_sched_priority,
2962 };
2963
ff0af2a1
LP
2964 r = sched_setscheduler(0,
2965 context->cpu_sched_policy |
2966 (context->cpu_sched_reset_on_fork ?
2967 SCHED_RESET_ON_FORK : 0),
2968 &param);
2969 if (r < 0) {
2970 *exit_status = EXIT_SETSCHEDULER;
12145637 2971 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 2972 }
d35fbf6b 2973 }
fc9b2a84 2974
d35fbf6b
DM
2975 if (context->cpuset)
2976 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
ff0af2a1 2977 *exit_status = EXIT_CPUAFFINITY;
12145637 2978 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7
LP
2979 }
2980
d35fbf6b
DM
2981 if (context->ioprio_set)
2982 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 2983 *exit_status = EXIT_IOPRIO;
12145637 2984 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 2985 }
da726a4d 2986
d35fbf6b
DM
2987 if (context->timer_slack_nsec != NSEC_INFINITY)
2988 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 2989 *exit_status = EXIT_TIMERSLACK;
12145637 2990 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 2991 }
9eba9da4 2992
21022b9d
LP
2993 if (context->personality != PERSONALITY_INVALID) {
2994 r = safe_personality(context->personality);
2995 if (r < 0) {
ff0af2a1 2996 *exit_status = EXIT_PERSONALITY;
12145637 2997 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 2998 }
21022b9d 2999 }
94f04347 3000
d35fbf6b 3001 if (context->utmp_id)
df0ff127 3002 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 3003 context->tty_path,
023a4f67
LP
3004 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3005 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3006 USER_PROCESS,
6a93917d 3007 username);
d35fbf6b 3008
e0d2adfd 3009 if (context->user) {
ff0af2a1
LP
3010 r = chown_terminal(STDIN_FILENO, uid);
3011 if (r < 0) {
3012 *exit_status = EXIT_STDIN;
12145637 3013 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 3014 }
d35fbf6b 3015 }
8e274523 3016
62b9bb26
LP
3017 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroupsv1
3018 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
3019 * safe. On cgroupsv2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
3020 * touch a single hierarchy too. */
584b8688 3021 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
62b9bb26 3022 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
ff0af2a1
LP
3023 if (r < 0) {
3024 *exit_status = EXIT_CGROUP;
12145637 3025 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 3026 }
d35fbf6b 3027 }
034c6ed7 3028
72fd1768 3029 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 3030 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
3031 if (r < 0)
3032 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 3033 }
94f04347 3034
7bce046b 3035 r = build_environment(
fd63e712 3036 unit,
7bce046b
LP
3037 context,
3038 params,
3039 n_fds,
3040 home,
3041 username,
3042 shell,
3043 journal_stream_dev,
3044 journal_stream_ino,
3045 &our_env);
2065ca69
JW
3046 if (r < 0) {
3047 *exit_status = EXIT_MEMORY;
12145637 3048 return log_oom();
2065ca69
JW
3049 }
3050
3051 r = build_pass_environment(context, &pass_env);
3052 if (r < 0) {
3053 *exit_status = EXIT_MEMORY;
12145637 3054 return log_oom();
2065ca69
JW
3055 }
3056
3057 accum_env = strv_env_merge(5,
3058 params->environment,
3059 our_env,
3060 pass_env,
3061 context->environment,
3062 files_env,
3063 NULL);
3064 if (!accum_env) {
3065 *exit_status = EXIT_MEMORY;
12145637 3066 return log_oom();
2065ca69 3067 }
1280503b 3068 accum_env = strv_env_clean(accum_env);
2065ca69 3069
096424d1 3070 (void) umask(context->umask);
b213e1c1 3071
b1edf445 3072 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
3073 if (r < 0) {
3074 *exit_status = EXIT_KEYRING;
12145637 3075 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
3076 }
3077
165a31c0 3078 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 3079 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 3080
165a31c0
LP
3081 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3082 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 3083
165a31c0
LP
3084 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3085 if (needs_ambient_hack)
3086 needs_setuid = false;
3087 else
3088 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3089
3090 if (needs_sandboxing) {
7f18ef0a
FK
3091 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3092 * present. The actual MAC context application will happen later, as late as possible, to avoid
3093 * impacting our own code paths. */
3094
349cc4a5 3095#if HAVE_SELINUX
43b1f709 3096 use_selinux = mac_selinux_use();
7f18ef0a 3097#endif
f9fa32f0 3098#if ENABLE_SMACK
43b1f709 3099 use_smack = mac_smack_use();
7f18ef0a 3100#endif
349cc4a5 3101#if HAVE_APPARMOR
43b1f709 3102 use_apparmor = mac_apparmor_use();
7f18ef0a 3103#endif
165a31c0 3104 }
7f18ef0a 3105
165a31c0
LP
3106 if (needs_setuid) {
3107 if (context->pam_name && username) {
3108 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3109 if (r < 0) {
3110 *exit_status = EXIT_PAM;
12145637 3111 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0
LP
3112 }
3113 }
b213e1c1 3114 }
ac45f971 3115
d35fbf6b 3116 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
6e2d7c4f
MS
3117 if (ns_type_supported(NAMESPACE_NET)) {
3118 r = setup_netns(runtime->netns_storage_socket);
3119 if (r < 0) {
3120 *exit_status = EXIT_NETWORK;
3121 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3122 }
3123 } else
3124 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3125 }
169c1bda 3126
ee818b89 3127 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3128 if (needs_mount_namespace) {
6818c54c 3129 r = apply_mount_namespace(unit, command, context, params, runtime);
3fbe8dbe
LP
3130 if (r < 0) {
3131 *exit_status = EXIT_NAMESPACE;
12145637 3132 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing: %m");
3fbe8dbe 3133 }
d35fbf6b 3134 }
81a2b7ce 3135
50b3dfb9 3136 /* Apply just after mount namespace setup */
376fecf6 3137 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
12145637
LP
3138 if (r < 0)
3139 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
50b3dfb9 3140
bbeea271 3141 /* Drop groups as early as possbile */
165a31c0 3142 if (needs_setuid) {
709dbeac 3143 r = enforce_groups(gid, supplementary_gids, ngids);
096424d1
LP
3144 if (r < 0) {
3145 *exit_status = EXIT_GROUP;
12145637 3146 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3147 }
165a31c0 3148 }
096424d1 3149
165a31c0 3150 if (needs_sandboxing) {
349cc4a5 3151#if HAVE_SELINUX
43b1f709 3152 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
3153 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3154 if (r < 0) {
3155 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3156 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
937ccce9 3157 }
9008e1ac 3158 }
9008e1ac
MS
3159#endif
3160
937ccce9
LP
3161 if (context->private_users) {
3162 r = setup_private_users(uid, gid);
3163 if (r < 0) {
3164 *exit_status = EXIT_USER;
12145637 3165 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
937ccce9 3166 }
d251207d
LP
3167 }
3168 }
3169
165a31c0
LP
3170 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
3171 * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
3172 * was needed to upload the policy and can now be closed as well. */
ff0af2a1
LP
3173 r = close_all_fds(fds, n_fds);
3174 if (r >= 0)
3175 r = shift_fds(fds, n_fds);
3176 if (r >= 0)
4c47affc 3177 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
ff0af2a1
LP
3178 if (r < 0) {
3179 *exit_status = EXIT_FDS;
12145637 3180 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3181 }
e66cf1a3 3182
165a31c0 3183 secure_bits = context->secure_bits;
e66cf1a3 3184
165a31c0
LP
3185 if (needs_sandboxing) {
3186 uint64_t bset;
34a5df58 3187 int which_failed;
755d4b67 3188
34a5df58
LP
3189 r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
3190 if (r < 0) {
3191 *exit_status = EXIT_LIMITS;
3192 return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
e66cf1a3
LP
3193 }
3194
f4170c67
LP
3195 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
3196 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3197 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3198 *exit_status = EXIT_LIMITS;
12145637 3199 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3200 }
3201 }
3202
37ac2744
JB
3203#if ENABLE_SMACK
3204 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3205 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3206 if (use_smack) {
3207 r = setup_smack(context, command);
3208 if (r < 0) {
3209 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3210 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3211 }
3212 }
3213#endif
3214
165a31c0
LP
3215 bset = context->capability_bounding_set;
3216 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3217 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3218 * instead of us doing that */
3219 if (needs_ambient_hack)
3220 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3221 (UINT64_C(1) << CAP_SETUID) |
3222 (UINT64_C(1) << CAP_SETGID);
3223
3224 if (!cap_test_all(bset)) {
3225 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3226 if (r < 0) {
3227 *exit_status = EXIT_CAPABILITIES;
12145637 3228 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3229 }
4c2630eb 3230 }
3b8bddde 3231
755d4b67
IP
3232 /* This is done before enforce_user, but ambient set
3233 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
3234 if (!needs_ambient_hack &&
3235 context->capability_ambient_set != 0) {
755d4b67
IP
3236 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3237 if (r < 0) {
3238 *exit_status = EXIT_CAPABILITIES;
12145637 3239 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3240 }
755d4b67 3241 }
165a31c0 3242 }
755d4b67 3243
165a31c0 3244 if (needs_setuid) {
d35fbf6b 3245 if (context->user) {
ff0af2a1
LP
3246 r = enforce_user(context, uid);
3247 if (r < 0) {
3248 *exit_status = EXIT_USER;
12145637 3249 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3250 }
165a31c0
LP
3251
3252 if (!needs_ambient_hack &&
3253 context->capability_ambient_set != 0) {
755d4b67
IP
3254
3255 /* Fix the ambient capabilities after user change. */
3256 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3257 if (r < 0) {
3258 *exit_status = EXIT_CAPABILITIES;
12145637 3259 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3260 }
3261
3262 /* If we were asked to change user and ambient capabilities
3263 * were requested, we had to add keep-caps to the securebits
3264 * so that we would maintain the inherited capability set
3265 * through the setresuid(). Make sure that the bit is added
3266 * also to the context secure_bits so that we don't try to
3267 * drop the bit away next. */
3268
7f508f2c 3269 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3270 }
5b6319dc 3271 }
165a31c0 3272 }
d35fbf6b 3273
165a31c0 3274 if (needs_sandboxing) {
37ac2744 3275 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
5cd9cd35
LP
3276 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3277 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3278 * are restricted. */
3279
349cc4a5 3280#if HAVE_SELINUX
43b1f709 3281 if (use_selinux) {
5cd9cd35
LP
3282 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3283
3284 if (exec_context) {
3285 r = setexeccon(exec_context);
3286 if (r < 0) {
3287 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3288 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3289 }
3290 }
3291 }
3292#endif
3293
349cc4a5 3294#if HAVE_APPARMOR
43b1f709 3295 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3296 r = aa_change_onexec(context->apparmor_profile);
3297 if (r < 0 && !context->apparmor_profile_ignore) {
3298 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3299 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3300 }
3301 }
3302#endif
3303
165a31c0
LP
3304 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3305 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3306 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3307 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3308 *exit_status = EXIT_SECUREBITS;
12145637 3309 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3310 }
5b6319dc 3311
59eeb84b 3312 if (context_has_no_new_privileges(context))
d35fbf6b 3313 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3314 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3315 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3316 }
3317
349cc4a5 3318#if HAVE_SECCOMP
469830d1
LP
3319 r = apply_address_families(unit, context);
3320 if (r < 0) {
3321 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3322 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3323 }
04aa0cb9 3324
469830d1
LP
3325 r = apply_memory_deny_write_execute(unit, context);
3326 if (r < 0) {
3327 *exit_status = EXIT_SECCOMP;
12145637 3328 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3329 }
f4170c67 3330
469830d1
LP
3331 r = apply_restrict_realtime(unit, context);
3332 if (r < 0) {
3333 *exit_status = EXIT_SECCOMP;
12145637 3334 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3335 }
3336
add00535
LP
3337 r = apply_restrict_namespaces(unit, context);
3338 if (r < 0) {
3339 *exit_status = EXIT_SECCOMP;
12145637 3340 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3341 }
3342
469830d1
LP
3343 r = apply_protect_sysctl(unit, context);
3344 if (r < 0) {
3345 *exit_status = EXIT_SECCOMP;
12145637 3346 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3347 }
3348
469830d1
LP
3349 r = apply_protect_kernel_modules(unit, context);
3350 if (r < 0) {
3351 *exit_status = EXIT_SECCOMP;
12145637 3352 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3353 }
3354
469830d1
LP
3355 r = apply_private_devices(unit, context);
3356 if (r < 0) {
3357 *exit_status = EXIT_SECCOMP;
12145637 3358 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3359 }
3360
3361 r = apply_syscall_archs(unit, context);
3362 if (r < 0) {
3363 *exit_status = EXIT_SECCOMP;
12145637 3364 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3365 }
3366
78e864e5
TM
3367 r = apply_lock_personality(unit, context);
3368 if (r < 0) {
3369 *exit_status = EXIT_SECCOMP;
12145637 3370 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3371 }
3372
5cd9cd35
LP
3373 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3374 * by the filter as little as possible. */
165a31c0 3375 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3376 if (r < 0) {
3377 *exit_status = EXIT_SECCOMP;
12145637 3378 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3379 }
3380#endif
d35fbf6b 3381 }
034c6ed7 3382
00819cc1
LP
3383 if (!strv_isempty(context->unset_environment)) {
3384 char **ee = NULL;
3385
3386 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3387 if (!ee) {
3388 *exit_status = EXIT_MEMORY;
12145637 3389 return log_oom();
00819cc1
LP
3390 }
3391
130d3d22 3392 strv_free_and_replace(accum_env, ee);
00819cc1
LP
3393 }
3394
2065ca69 3395 final_argv = replace_env_argv(argv, accum_env);
d35fbf6b 3396 if (!final_argv) {
ff0af2a1 3397 *exit_status = EXIT_MEMORY;
12145637 3398 return log_oom();
d35fbf6b 3399 }
034c6ed7 3400
f1d34068 3401 if (DEBUG_LOGGING) {
d35fbf6b 3402 _cleanup_free_ char *line;
81a2b7ce 3403
d35fbf6b 3404 line = exec_command_line(final_argv);
a1230ff9 3405 if (line)
f2341e0a 3406 log_struct(LOG_DEBUG,
f2341e0a
LP
3407 "EXECUTABLE=%s", command->path,
3408 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3409 LOG_UNIT_ID(unit),
a1230ff9 3410 LOG_UNIT_INVOCATION_ID(unit));
d35fbf6b 3411 }
dd305ec9 3412
2065ca69 3413 execve(command->path, final_argv, accum_env);
12145637
LP
3414
3415 if (errno == ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
12145637
LP
3416 log_struct_errno(LOG_INFO, errno,
3417 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3418 LOG_UNIT_ID(unit),
3419 LOG_UNIT_INVOCATION_ID(unit),
3420 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3421 command->path),
a1230ff9 3422 "EXECUTABLE=%s", command->path);
12145637
LP
3423 return 0;
3424 }
3425
ff0af2a1 3426 *exit_status = EXIT_EXEC;
12145637 3427 return log_unit_error_errno(unit, errno, "Failed to execute command: %m");
d35fbf6b 3428}
81a2b7ce 3429
34cf6c43
YW
3430static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
3431static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[3]);
3432
f2341e0a
LP
3433int exec_spawn(Unit *unit,
3434 ExecCommand *command,
d35fbf6b
DM
3435 const ExecContext *context,
3436 const ExecParameters *params,
3437 ExecRuntime *runtime,
29206d46 3438 DynamicCreds *dcreds,
d35fbf6b 3439 pid_t *ret) {
8351ceae 3440
d35fbf6b 3441 _cleanup_strv_free_ char **files_env = NULL;
9b141911 3442 int *fds = NULL;
da6053d0 3443 size_t n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1
LP
3444 _cleanup_free_ char *line = NULL;
3445 int socket_fd, r;
52c239d7 3446 int named_iofds[3] = { -1, -1, -1 };
ff0af2a1 3447 char **argv;
d35fbf6b 3448 pid_t pid;
8351ceae 3449
f2341e0a 3450 assert(unit);
d35fbf6b
DM
3451 assert(command);
3452 assert(context);
3453 assert(ret);
3454 assert(params);
4c47affc 3455 assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
4298d0b5 3456
d35fbf6b
DM
3457 if (context->std_input == EXEC_INPUT_SOCKET ||
3458 context->std_output == EXEC_OUTPUT_SOCKET ||
3459 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3460
4c47affc 3461 if (params->n_socket_fds > 1) {
f2341e0a 3462 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3463 return -EINVAL;
ff0af2a1 3464 }
eef65bf3 3465
4c47affc 3466 if (params->n_socket_fds == 0) {
488ab41c
AA
3467 log_unit_error(unit, "Got no socket.");
3468 return -EINVAL;
3469 }
3470
d35fbf6b
DM
3471 socket_fd = params->fds[0];
3472 } else {
3473 socket_fd = -1;
3474 fds = params->fds;
4c47affc 3475 n_storage_fds = params->n_storage_fds;
9b141911 3476 n_socket_fds = params->n_socket_fds;
d35fbf6b 3477 }
94f04347 3478
34cf6c43 3479 r = exec_context_named_iofds(context, params, named_iofds);
52c239d7
LB
3480 if (r < 0)
3481 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3482
f2341e0a 3483 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3484 if (r < 0)
f2341e0a 3485 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3486
d35fbf6b 3487 argv = params->argv ?: command->argv;
d35fbf6b
DM
3488 line = exec_command_line(argv);
3489 if (!line)
3490 return log_oom();
fab56fc5 3491
f2341e0a 3492 log_struct(LOG_DEBUG,
f2341e0a
LP
3493 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3494 "EXECUTABLE=%s", command->path,
ba360bb0 3495 LOG_UNIT_ID(unit),
a1230ff9 3496 LOG_UNIT_INVOCATION_ID(unit));
12145637 3497
d35fbf6b
DM
3498 pid = fork();
3499 if (pid < 0)
74129a12 3500 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3501
3502 if (pid == 0) {
12145637 3503 int exit_status = EXIT_SUCCESS;
ff0af2a1 3504
f2341e0a
LP
3505 r = exec_child(unit,
3506 command,
ff0af2a1
LP
3507 context,
3508 params,
3509 runtime,
29206d46 3510 dcreds,
ff0af2a1
LP
3511 argv,
3512 socket_fd,
52c239d7 3513 named_iofds,
4c47affc
FB
3514 fds,
3515 n_storage_fds,
9b141911 3516 n_socket_fds,
ff0af2a1 3517 files_env,
00d9ef85 3518 unit->manager->user_lookup_fds[1],
12145637
LP
3519 &exit_status);
3520
a1230ff9 3521 if (r < 0)
12145637
LP
3522 log_struct_errno(LOG_ERR, r,
3523 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3524 LOG_UNIT_ID(unit),
3525 LOG_UNIT_INVOCATION_ID(unit),
3526 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3527 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3528 command->path),
a1230ff9 3529 "EXECUTABLE=%s", command->path);
4c2630eb 3530
ff0af2a1 3531 _exit(exit_status);
034c6ed7
LP
3532 }
3533
f2341e0a 3534 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3535
80876c20
LP
3536 /* We add the new process to the cgroup both in the child (so
3537 * that we can be sure that no user code is ever executed
3538 * outside of the cgroup) and in the parent (so that we can be
3539 * sure that when we kill the cgroup the process will be
3540 * killed too). */
d35fbf6b 3541 if (params->cgroup_path)
dd305ec9 3542 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
2da3263a 3543
b58b4116 3544 exec_status_start(&command->exec_status, pid);
9fb86720 3545
034c6ed7 3546 *ret = pid;
5cb5a6ff
LP
3547 return 0;
3548}
3549
034c6ed7 3550void exec_context_init(ExecContext *c) {
3536f49e
YW
3551 ExecDirectoryType i;
3552
034c6ed7
LP
3553 assert(c);
3554
4c12626c 3555 c->umask = 0022;
9eba9da4 3556 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3557 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3558 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3559 c->syslog_level_prefix = true;
353e12c2 3560 c->ignore_sigpipe = true;
3a43da28 3561 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3562 c->personality = PERSONALITY_INVALID;
72fd1768 3563 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3564 c->directories[i].mode = 0755;
a103496c 3565 c->capability_bounding_set = CAP_ALL;
aa9d574d
YW
3566 assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
3567 c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
d3070fbd 3568 c->log_level_max = -1;
034c6ed7
LP
3569}
3570
613b411c 3571void exec_context_done(ExecContext *c) {
3536f49e 3572 ExecDirectoryType i;
d3070fbd 3573 size_t l;
5cb5a6ff
LP
3574
3575 assert(c);
3576
6796073e
LP
3577 c->environment = strv_free(c->environment);
3578 c->environment_files = strv_free(c->environment_files);
b4c14404 3579 c->pass_environment = strv_free(c->pass_environment);
00819cc1 3580 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 3581
31ce987c 3582 rlimit_free_all(c->rlimit);
034c6ed7 3583
2038c3f5 3584 for (l = 0; l < 3; l++) {
52c239d7 3585 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
2038c3f5
LP
3586 c->stdio_file[l] = mfree(c->stdio_file[l]);
3587 }
52c239d7 3588
a1e58e8e
LP
3589 c->working_directory = mfree(c->working_directory);
3590 c->root_directory = mfree(c->root_directory);
915e6d16 3591 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3592 c->tty_path = mfree(c->tty_path);
3593 c->syslog_identifier = mfree(c->syslog_identifier);
3594 c->user = mfree(c->user);
3595 c->group = mfree(c->group);
034c6ed7 3596
6796073e 3597 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3598
a1e58e8e 3599 c->pam_name = mfree(c->pam_name);
5b6319dc 3600
2a624c36
AP
3601 c->read_only_paths = strv_free(c->read_only_paths);
3602 c->read_write_paths = strv_free(c->read_write_paths);
3603 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3604
d2d6c096 3605 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
8e06d57c
YW
3606 c->bind_mounts = NULL;
3607 c->n_bind_mounts = 0;
2abd4e38
YW
3608 temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
3609 c->temporary_filesystems = NULL;
3610 c->n_temporary_filesystems = 0;
d2d6c096 3611
da681e1b 3612 c->cpuset = cpu_set_mfree(c->cpuset);
86a3475b 3613
a1e58e8e
LP
3614 c->utmp_id = mfree(c->utmp_id);
3615 c->selinux_context = mfree(c->selinux_context);
3616 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3617 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3618
8cfa775f 3619 c->syscall_filter = hashmap_free(c->syscall_filter);
525d3cc7
LP
3620 c->syscall_archs = set_free(c->syscall_archs);
3621 c->address_families = set_free(c->address_families);
e66cf1a3 3622
72fd1768 3623 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3624 c->directories[i].paths = strv_free(c->directories[i].paths);
d3070fbd
LP
3625
3626 c->log_level_max = -1;
3627
3628 exec_context_free_log_extra_fields(c);
08f3be7a
LP
3629
3630 c->stdin_data = mfree(c->stdin_data);
3631 c->stdin_data_size = 0;
e66cf1a3
LP
3632}
3633
34cf6c43 3634int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
e66cf1a3
LP
3635 char **i;
3636
3637 assert(c);
3638
3639 if (!runtime_prefix)
3640 return 0;
3641
3536f49e 3642 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
3643 _cleanup_free_ char *p;
3644
605405c6 3645 p = strjoin(runtime_prefix, "/", *i);
e66cf1a3
LP
3646 if (!p)
3647 return -ENOMEM;
3648
6c47cd7d 3649 /* We execute this synchronously, since we need to be sure this is gone when we start the service
e66cf1a3 3650 * next. */
c6878637 3651 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
3652 }
3653
3654 return 0;
5cb5a6ff
LP
3655}
3656
34cf6c43 3657static void exec_command_done(ExecCommand *c) {
43d0fcbd
LP
3658 assert(c);
3659
a1e58e8e 3660 c->path = mfree(c->path);
43d0fcbd 3661
6796073e 3662 c->argv = strv_free(c->argv);
43d0fcbd
LP
3663}
3664
da6053d0
LP
3665void exec_command_done_array(ExecCommand *c, size_t n) {
3666 size_t i;
43d0fcbd
LP
3667
3668 for (i = 0; i < n; i++)
3669 exec_command_done(c+i);
3670}
3671
f1acf85a 3672ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
3673 ExecCommand *i;
3674
3675 while ((i = c)) {
71fda00f 3676 LIST_REMOVE(command, c, i);
43d0fcbd 3677 exec_command_done(i);
5cb5a6ff
LP
3678 free(i);
3679 }
f1acf85a
ZJS
3680
3681 return NULL;
5cb5a6ff
LP
3682}
3683
da6053d0
LP
3684void exec_command_free_array(ExecCommand **c, size_t n) {
3685 size_t i;
034c6ed7 3686
f1acf85a
ZJS
3687 for (i = 0; i < n; i++)
3688 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
3689}
3690
039f0e70 3691typedef struct InvalidEnvInfo {
34cf6c43 3692 const Unit *unit;
039f0e70
LP
3693 const char *path;
3694} InvalidEnvInfo;
3695
3696static void invalid_env(const char *p, void *userdata) {
3697 InvalidEnvInfo *info = userdata;
3698
f2341e0a 3699 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
3700}
3701
52c239d7
LB
3702const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3703 assert(c);
3704
3705 switch (fd_index) {
5073ff6b 3706
52c239d7
LB
3707 case STDIN_FILENO:
3708 if (c->std_input != EXEC_INPUT_NAMED_FD)
3709 return NULL;
5073ff6b 3710
52c239d7 3711 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
5073ff6b 3712
52c239d7
LB
3713 case STDOUT_FILENO:
3714 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3715 return NULL;
5073ff6b 3716
52c239d7 3717 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
5073ff6b 3718
52c239d7
LB
3719 case STDERR_FILENO:
3720 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3721 return NULL;
5073ff6b 3722
52c239d7 3723 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
5073ff6b 3724
52c239d7
LB
3725 default:
3726 return NULL;
3727 }
3728}
3729
34cf6c43 3730static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
da6053d0 3731 size_t i, targets;
56fbd561 3732 const char* stdio_fdname[3];
da6053d0 3733 size_t n_fds;
52c239d7
LB
3734
3735 assert(c);
3736 assert(p);
3737
3738 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3739 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3740 (c->std_error == EXEC_OUTPUT_NAMED_FD);
3741
3742 for (i = 0; i < 3; i++)
3743 stdio_fdname[i] = exec_context_fdname(c, i);
3744
4c47affc
FB
3745 n_fds = p->n_storage_fds + p->n_socket_fds;
3746
3747 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
3748 if (named_iofds[STDIN_FILENO] < 0 &&
3749 c->std_input == EXEC_INPUT_NAMED_FD &&
3750 stdio_fdname[STDIN_FILENO] &&
3751 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3752
52c239d7
LB
3753 named_iofds[STDIN_FILENO] = p->fds[i];
3754 targets--;
56fbd561
ZJS
3755
3756 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3757 c->std_output == EXEC_OUTPUT_NAMED_FD &&
3758 stdio_fdname[STDOUT_FILENO] &&
3759 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3760
52c239d7
LB
3761 named_iofds[STDOUT_FILENO] = p->fds[i];
3762 targets--;
56fbd561
ZJS
3763
3764 } else if (named_iofds[STDERR_FILENO] < 0 &&
3765 c->std_error == EXEC_OUTPUT_NAMED_FD &&
3766 stdio_fdname[STDERR_FILENO] &&
3767 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3768
52c239d7
LB
3769 named_iofds[STDERR_FILENO] = p->fds[i];
3770 targets--;
3771 }
3772
56fbd561 3773 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
3774}
3775
34cf6c43 3776static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
3777 char **i, **r = NULL;
3778
3779 assert(c);
3780 assert(l);
3781
3782 STRV_FOREACH(i, c->environment_files) {
3783 char *fn;
52511fae
ZJS
3784 int k;
3785 unsigned n;
8c7be95e
LP
3786 bool ignore = false;
3787 char **p;
7fd1b19b 3788 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
3789
3790 fn = *i;
3791
3792 if (fn[0] == '-') {
3793 ignore = true;
313cefa1 3794 fn++;
8c7be95e
LP
3795 }
3796
3797 if (!path_is_absolute(fn)) {
8c7be95e
LP
3798 if (ignore)
3799 continue;
3800
3801 strv_free(r);
3802 return -EINVAL;
3803 }
3804
2bef10ab 3805 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
3806 k = safe_glob(fn, 0, &pglob);
3807 if (k < 0) {
2bef10ab
PL
3808 if (ignore)
3809 continue;
8c7be95e 3810
2bef10ab 3811 strv_free(r);
d8c92e8b 3812 return k;
2bef10ab 3813 }
8c7be95e 3814
d8c92e8b
ZJS
3815 /* When we don't match anything, -ENOENT should be returned */
3816 assert(pglob.gl_pathc > 0);
3817
3818 for (n = 0; n < pglob.gl_pathc; n++) {
717603e3 3819 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2bef10ab
PL
3820 if (k < 0) {
3821 if (ignore)
3822 continue;
8c7be95e 3823
2bef10ab 3824 strv_free(r);
2bef10ab 3825 return k;
e9c1ea9d 3826 }
ebc05a09 3827 /* Log invalid environment variables with filename */
039f0e70
LP
3828 if (p) {
3829 InvalidEnvInfo info = {
f2341e0a 3830 .unit = unit,
039f0e70
LP
3831 .path = pglob.gl_pathv[n]
3832 };
3833
3834 p = strv_env_clean_with_callback(p, invalid_env, &info);
3835 }
8c7be95e 3836
234519ae 3837 if (!r)
2bef10ab
PL
3838 r = p;
3839 else {
3840 char **m;
8c7be95e 3841
2bef10ab
PL
3842 m = strv_env_merge(2, r, p);
3843 strv_free(r);
3844 strv_free(p);
c84a9488 3845 if (!m)
2bef10ab 3846 return -ENOMEM;
2bef10ab
PL
3847
3848 r = m;
3849 }
8c7be95e
LP
3850 }
3851 }
3852
3853 *l = r;
3854
3855 return 0;
3856}
3857
6ac8fdc9 3858static bool tty_may_match_dev_console(const char *tty) {
7b912648 3859 _cleanup_free_ char *resolved = NULL;
6ac8fdc9 3860
1e22b5cd
LP
3861 if (!tty)
3862 return true;
3863
a119ec7c 3864 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
3865
3866 /* trivial identity? */
3867 if (streq(tty, "console"))
3868 return true;
3869
7b912648
LP
3870 if (resolve_dev_console(&resolved) < 0)
3871 return true; /* if we could not resolve, assume it may */
6ac8fdc9
MS
3872
3873 /* "tty0" means the active VC, so it may be the same sometimes */
7b912648 3874 return streq(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
3875}
3876
34cf6c43 3877bool exec_context_may_touch_console(const ExecContext *ec) {
1e22b5cd
LP
3878
3879 return (ec->tty_reset ||
3880 ec->tty_vhangup ||
3881 ec->tty_vt_disallocate ||
6ac8fdc9
MS
3882 is_terminal_input(ec->std_input) ||
3883 is_terminal_output(ec->std_output) ||
3884 is_terminal_output(ec->std_error)) &&
1e22b5cd 3885 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
3886}
3887
15ae422b
LP
3888static void strv_fprintf(FILE *f, char **l) {
3889 char **g;
3890
3891 assert(f);
3892
3893 STRV_FOREACH(g, l)
3894 fprintf(f, " %s", *g);
3895}
3896
34cf6c43 3897void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
d3070fbd 3898 ExecDirectoryType dt;
c2bbd90b 3899 char **e, **d;
94f04347 3900 unsigned i;
add00535 3901 int r;
9eba9da4 3902
5cb5a6ff
LP
3903 assert(c);
3904 assert(f);
3905
4ad49000 3906 prefix = strempty(prefix);
5cb5a6ff
LP
3907
3908 fprintf(f,
94f04347
LP
3909 "%sUMask: %04o\n"
3910 "%sWorkingDirectory: %s\n"
451a074f 3911 "%sRootDirectory: %s\n"
15ae422b 3912 "%sNonBlocking: %s\n"
64747e2d 3913 "%sPrivateTmp: %s\n"
7f112f50 3914 "%sPrivateDevices: %s\n"
59eeb84b 3915 "%sProtectKernelTunables: %s\n"
e66a2f65 3916 "%sProtectKernelModules: %s\n"
59eeb84b 3917 "%sProtectControlGroups: %s\n"
d251207d
LP
3918 "%sPrivateNetwork: %s\n"
3919 "%sPrivateUsers: %s\n"
1b8689f9
LP
3920 "%sProtectHome: %s\n"
3921 "%sProtectSystem: %s\n"
5d997827 3922 "%sMountAPIVFS: %s\n"
f3e43635 3923 "%sIgnoreSIGPIPE: %s\n"
f4170c67 3924 "%sMemoryDenyWriteExecute: %s\n"
b1edf445
LP
3925 "%sRestrictRealtime: %s\n"
3926 "%sKeyringMode: %s\n",
5cb5a6ff 3927 prefix, c->umask,
9eba9da4 3928 prefix, c->working_directory ? c->working_directory : "/",
451a074f 3929 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 3930 prefix, yes_no(c->non_blocking),
64747e2d 3931 prefix, yes_no(c->private_tmp),
7f112f50 3932 prefix, yes_no(c->private_devices),
59eeb84b 3933 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 3934 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 3935 prefix, yes_no(c->protect_control_groups),
d251207d
LP
3936 prefix, yes_no(c->private_network),
3937 prefix, yes_no(c->private_users),
1b8689f9
LP
3938 prefix, protect_home_to_string(c->protect_home),
3939 prefix, protect_system_to_string(c->protect_system),
5d997827 3940 prefix, yes_no(c->mount_apivfs),
f3e43635 3941 prefix, yes_no(c->ignore_sigpipe),
f4170c67 3942 prefix, yes_no(c->memory_deny_write_execute),
b1edf445
LP
3943 prefix, yes_no(c->restrict_realtime),
3944 prefix, exec_keyring_mode_to_string(c->keyring_mode));
fb33a393 3945
915e6d16
LP
3946 if (c->root_image)
3947 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
3948
8c7be95e
LP
3949 STRV_FOREACH(e, c->environment)
3950 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3951
3952 STRV_FOREACH(e, c->environment_files)
3953 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 3954
b4c14404
FB
3955 STRV_FOREACH(e, c->pass_environment)
3956 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3957
00819cc1
LP
3958 STRV_FOREACH(e, c->unset_environment)
3959 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
3960
53f47dfc
YW
3961 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
3962
72fd1768 3963 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
3964 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
3965
3966 STRV_FOREACH(d, c->directories[dt].paths)
3967 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
3968 }
c2bbd90b 3969
fb33a393
LP
3970 if (c->nice_set)
3971 fprintf(f,
3972 "%sNice: %i\n",
3973 prefix, c->nice);
3974
dd6c17b1 3975 if (c->oom_score_adjust_set)
fb33a393 3976 fprintf(f,
dd6c17b1
LP
3977 "%sOOMScoreAdjust: %i\n",
3978 prefix, c->oom_score_adjust);
9eba9da4 3979
94f04347 3980 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d 3981 if (c->rlimit[i]) {
6550c24c 3982 fprintf(f, "Limit%s%s: " RLIM_FMT "\n",
3c11da9d 3983 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
6550c24c 3984 fprintf(f, "Limit%s%sSoft: " RLIM_FMT "\n",
3c11da9d
EV
3985 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3986 }
94f04347 3987
f8b69d1d 3988 if (c->ioprio_set) {
1756a011 3989 _cleanup_free_ char *class_str = NULL;
f8b69d1d 3990
837df140
YW
3991 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
3992 if (r >= 0)
3993 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
3994
3995 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 3996 }
94f04347 3997
f8b69d1d 3998 if (c->cpu_sched_set) {
1756a011 3999 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 4000
837df140
YW
4001 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4002 if (r >= 0)
4003 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4004
94f04347 4005 fprintf(f,
38b48754
LP
4006 "%sCPUSchedulingPriority: %i\n"
4007 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
4008 prefix, c->cpu_sched_priority,
4009 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 4010 }
94f04347 4011
82c121a4 4012 if (c->cpuset) {
94f04347 4013 fprintf(f, "%sCPUAffinity:", prefix);
82c121a4
LP
4014 for (i = 0; i < c->cpuset_ncpus; i++)
4015 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
43a99a7a 4016 fprintf(f, " %u", i);
94f04347
LP
4017 fputs("\n", f);
4018 }
4019
3a43da28 4020 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 4021 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
4022
4023 fprintf(f,
80876c20
LP
4024 "%sStandardInput: %s\n"
4025 "%sStandardOutput: %s\n"
4026 "%sStandardError: %s\n",
4027 prefix, exec_input_to_string(c->std_input),
4028 prefix, exec_output_to_string(c->std_output),
4029 prefix, exec_output_to_string(c->std_error));
4030
befc4a80
LP
4031 if (c->std_input == EXEC_INPUT_NAMED_FD)
4032 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4033 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4034 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4035 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4036 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4037
4038 if (c->std_input == EXEC_INPUT_FILE)
4039 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4040 if (c->std_output == EXEC_OUTPUT_FILE)
4041 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
4042 if (c->std_error == EXEC_OUTPUT_FILE)
4043 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
4044
80876c20
LP
4045 if (c->tty_path)
4046 fprintf(f,
6ea832a2
LP
4047 "%sTTYPath: %s\n"
4048 "%sTTYReset: %s\n"
4049 "%sTTYVHangup: %s\n"
4050 "%sTTYVTDisallocate: %s\n",
4051 prefix, c->tty_path,
4052 prefix, yes_no(c->tty_reset),
4053 prefix, yes_no(c->tty_vhangup),
4054 prefix, yes_no(c->tty_vt_disallocate));
94f04347 4055
9f6444eb
LP
4056 if (IN_SET(c->std_output,
4057 EXEC_OUTPUT_SYSLOG,
4058 EXEC_OUTPUT_KMSG,
4059 EXEC_OUTPUT_JOURNAL,
4060 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4061 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4062 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4063 IN_SET(c->std_error,
4064 EXEC_OUTPUT_SYSLOG,
4065 EXEC_OUTPUT_KMSG,
4066 EXEC_OUTPUT_JOURNAL,
4067 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4068 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4069 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 4070
5ce70e5b 4071 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 4072
837df140
YW
4073 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4074 if (r >= 0)
4075 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 4076
837df140
YW
4077 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4078 if (r >= 0)
4079 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 4080 }
94f04347 4081
d3070fbd
LP
4082 if (c->log_level_max >= 0) {
4083 _cleanup_free_ char *t = NULL;
4084
4085 (void) log_level_to_string_alloc(c->log_level_max, &t);
4086
4087 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4088 }
4089
4090 if (c->n_log_extra_fields > 0) {
4091 size_t j;
4092
4093 for (j = 0; j < c->n_log_extra_fields; j++) {
4094 fprintf(f, "%sLogExtraFields: ", prefix);
4095 fwrite(c->log_extra_fields[j].iov_base,
4096 1, c->log_extra_fields[j].iov_len,
4097 f);
4098 fputc('\n', f);
4099 }
4100 }
4101
07d46372
YW
4102 if (c->secure_bits) {
4103 _cleanup_free_ char *str = NULL;
4104
4105 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4106 if (r >= 0)
4107 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4108 }
94f04347 4109
a103496c 4110 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4111 _cleanup_free_ char *str = NULL;
94f04347 4112
dd1f5bd0
YW
4113 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4114 if (r >= 0)
4115 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4116 }
4117
4118 if (c->capability_ambient_set != 0) {
dd1f5bd0 4119 _cleanup_free_ char *str = NULL;
755d4b67 4120
dd1f5bd0
YW
4121 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4122 if (r >= 0)
4123 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4124 }
4125
4126 if (c->user)
f2d3769a 4127 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4128 if (c->group)
f2d3769a 4129 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4130
29206d46
LP
4131 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4132
ac6e8be6 4133 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4134 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4135 strv_fprintf(f, c->supplementary_groups);
4136 fputs("\n", f);
4137 }
94f04347 4138
5b6319dc 4139 if (c->pam_name)
f2d3769a 4140 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4141
58629001 4142 if (!strv_isempty(c->read_write_paths)) {
2a624c36
AP
4143 fprintf(f, "%sReadWritePaths:", prefix);
4144 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4145 fputs("\n", f);
4146 }
4147
58629001 4148 if (!strv_isempty(c->read_only_paths)) {
2a624c36
AP
4149 fprintf(f, "%sReadOnlyPaths:", prefix);
4150 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4151 fputs("\n", f);
4152 }
94f04347 4153
58629001 4154 if (!strv_isempty(c->inaccessible_paths)) {
2a624c36
AP
4155 fprintf(f, "%sInaccessiblePaths:", prefix);
4156 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4157 fputs("\n", f);
4158 }
2e22afe9 4159
d2d6c096 4160 if (c->n_bind_mounts > 0)
4ca763a9
YW
4161 for (i = 0; i < c->n_bind_mounts; i++)
4162 fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
d2d6c096 4163 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4ca763a9 4164 c->bind_mounts[i].ignore_enoent ? "-": "",
d2d6c096
LP
4165 c->bind_mounts[i].source,
4166 c->bind_mounts[i].destination,
4167 c->bind_mounts[i].recursive ? "rbind" : "norbind");
d2d6c096 4168
2abd4e38
YW
4169 if (c->n_temporary_filesystems > 0)
4170 for (i = 0; i < c->n_temporary_filesystems; i++) {
4171 TemporaryFileSystem *t = c->temporary_filesystems + i;
4172
4173 fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
4174 t->path,
4175 isempty(t->options) ? "" : ":",
4176 strempty(t->options));
4177 }
4178
169c1bda
LP
4179 if (c->utmp_id)
4180 fprintf(f,
4181 "%sUtmpIdentifier: %s\n",
4182 prefix, c->utmp_id);
7b52a628
MS
4183
4184 if (c->selinux_context)
4185 fprintf(f,
5f8640fb
LP
4186 "%sSELinuxContext: %s%s\n",
4187 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4188
80c21aea
WC
4189 if (c->apparmor_profile)
4190 fprintf(f,
4191 "%sAppArmorProfile: %s%s\n",
4192 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4193
4194 if (c->smack_process_label)
4195 fprintf(f,
4196 "%sSmackProcessLabel: %s%s\n",
4197 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4198
050f7277 4199 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4200 fprintf(f,
4201 "%sPersonality: %s\n",
4202 prefix, strna(personality_to_string(c->personality)));
4203
78e864e5
TM
4204 fprintf(f,
4205 "%sLockPersonality: %s\n",
4206 prefix, yes_no(c->lock_personality));
4207
17df7223 4208 if (c->syscall_filter) {
349cc4a5 4209#if HAVE_SECCOMP
17df7223 4210 Iterator j;
8cfa775f 4211 void *id, *val;
17df7223 4212 bool first = true;
351a19b1 4213#endif
17df7223
LP
4214
4215 fprintf(f,
57183d11 4216 "%sSystemCallFilter: ",
17df7223
LP
4217 prefix);
4218
4219 if (!c->syscall_whitelist)
4220 fputc('~', f);
4221
349cc4a5 4222#if HAVE_SECCOMP
8cfa775f 4223 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
17df7223 4224 _cleanup_free_ char *name = NULL;
8cfa775f
YW
4225 const char *errno_name = NULL;
4226 int num = PTR_TO_INT(val);
17df7223
LP
4227
4228 if (first)
4229 first = false;
4230 else
4231 fputc(' ', f);
4232
57183d11 4233 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223 4234 fputs(strna(name), f);
8cfa775f
YW
4235
4236 if (num >= 0) {
4237 errno_name = errno_to_name(num);
4238 if (errno_name)
4239 fprintf(f, ":%s", errno_name);
4240 else
4241 fprintf(f, ":%d", num);
4242 }
17df7223 4243 }
351a19b1 4244#endif
17df7223
LP
4245
4246 fputc('\n', f);
4247 }
4248
57183d11 4249 if (c->syscall_archs) {
349cc4a5 4250#if HAVE_SECCOMP
57183d11
LP
4251 Iterator j;
4252 void *id;
4253#endif
4254
4255 fprintf(f,
4256 "%sSystemCallArchitectures:",
4257 prefix);
4258
349cc4a5 4259#if HAVE_SECCOMP
57183d11
LP
4260 SET_FOREACH(id, c->syscall_archs, j)
4261 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4262#endif
4263 fputc('\n', f);
4264 }
4265
add00535
LP
4266 if (exec_context_restrict_namespaces_set(c)) {
4267 _cleanup_free_ char *s = NULL;
4268
86c2a9f1 4269 r = namespace_flags_to_string(c->restrict_namespaces, &s);
add00535
LP
4270 if (r >= 0)
4271 fprintf(f, "%sRestrictNamespaces: %s\n",
4272 prefix, s);
4273 }
4274
3df90f24
YW
4275 if (c->syscall_errno > 0) {
4276 const char *errno_name;
4277
4278 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4279
4280 errno_name = errno_to_name(c->syscall_errno);
4281 if (errno_name)
4282 fprintf(f, "%s\n", errno_name);
4283 else
4284 fprintf(f, "%d\n", c->syscall_errno);
4285 }
eef65bf3
MS
4286
4287 if (c->apparmor_profile)
4288 fprintf(f,
4289 "%sAppArmorProfile: %s%s\n",
4290 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
5cb5a6ff
LP
4291}
4292
34cf6c43 4293bool exec_context_maintains_privileges(const ExecContext *c) {
a931ad47
LP
4294 assert(c);
4295
61233823 4296 /* Returns true if the process forked off would run under
a931ad47
LP
4297 * an unchanged UID or as root. */
4298
4299 if (!c->user)
4300 return true;
4301
4302 if (streq(c->user, "root") || streq(c->user, "0"))
4303 return true;
4304
4305 return false;
4306}
4307
34cf6c43 4308int exec_context_get_effective_ioprio(const ExecContext *c) {
7f452159
LP
4309 int p;
4310
4311 assert(c);
4312
4313 if (c->ioprio_set)
4314 return c->ioprio;
4315
4316 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4317 if (p < 0)
4318 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4319
4320 return p;
4321}
4322
d3070fbd
LP
4323void exec_context_free_log_extra_fields(ExecContext *c) {
4324 size_t l;
4325
4326 assert(c);
4327
4328 for (l = 0; l < c->n_log_extra_fields; l++)
4329 free(c->log_extra_fields[l].iov_base);
4330 c->log_extra_fields = mfree(c->log_extra_fields);
4331 c->n_log_extra_fields = 0;
4332}
4333
b58b4116 4334void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 4335 assert(s);
5cb5a6ff 4336
b58b4116
LP
4337 zero(*s);
4338 s->pid = pid;
4339 dual_timestamp_get(&s->start_timestamp);
4340}
4341
34cf6c43 4342void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
4343 assert(s);
4344
0b1f4ae6 4345 if (s->pid && s->pid != pid)
b58b4116
LP
4346 zero(*s);
4347
034c6ed7 4348 s->pid = pid;
63983207 4349 dual_timestamp_get(&s->exit_timestamp);
9fb86720 4350
034c6ed7
LP
4351 s->code = code;
4352 s->status = status;
169c1bda 4353
6ea832a2
LP
4354 if (context) {
4355 if (context->utmp_id)
4356 utmp_put_dead_process(context->utmp_id, pid, code, status);
4357
1e22b5cd 4358 exec_context_tty_reset(context, NULL);
6ea832a2 4359 }
9fb86720
LP
4360}
4361
34cf6c43 4362void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
9fb86720
LP
4363 char buf[FORMAT_TIMESTAMP_MAX];
4364
4365 assert(s);
4366 assert(f);
4367
9fb86720
LP
4368 if (s->pid <= 0)
4369 return;
4370
4c940960
LP
4371 prefix = strempty(prefix);
4372
9fb86720 4373 fprintf(f,
ccd06097
ZJS
4374 "%sPID: "PID_FMT"\n",
4375 prefix, s->pid);
9fb86720 4376
af9d16e1 4377 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
4378 fprintf(f,
4379 "%sStart Timestamp: %s\n",
63983207 4380 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 4381
af9d16e1 4382 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
4383 fprintf(f,
4384 "%sExit Timestamp: %s\n"
4385 "%sExit Code: %s\n"
4386 "%sExit Status: %i\n",
63983207 4387 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
4388 prefix, sigchld_code_to_string(s->code),
4389 prefix, s->status);
5cb5a6ff 4390}
44d8db9e 4391
34cf6c43 4392static char *exec_command_line(char **argv) {
44d8db9e
LP
4393 size_t k;
4394 char *n, *p, **a;
4395 bool first = true;
4396
9e2f7c11 4397 assert(argv);
44d8db9e 4398
9164977d 4399 k = 1;
9e2f7c11 4400 STRV_FOREACH(a, argv)
44d8db9e
LP
4401 k += strlen(*a)+3;
4402
5cd9cd35
LP
4403 n = new(char, k);
4404 if (!n)
44d8db9e
LP
4405 return NULL;
4406
4407 p = n;
9e2f7c11 4408 STRV_FOREACH(a, argv) {
44d8db9e
LP
4409
4410 if (!first)
4411 *(p++) = ' ';
4412 else
4413 first = false;
4414
4415 if (strpbrk(*a, WHITESPACE)) {
4416 *(p++) = '\'';
4417 p = stpcpy(p, *a);
4418 *(p++) = '\'';
4419 } else
4420 p = stpcpy(p, *a);
4421
4422 }
4423
9164977d
LP
4424 *p = 0;
4425
44d8db9e
LP
4426 /* FIXME: this doesn't really handle arguments that have
4427 * spaces and ticks in them */
4428
4429 return n;
4430}
4431
34cf6c43 4432static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 4433 _cleanup_free_ char *cmd = NULL;
4c940960 4434 const char *prefix2;
44d8db9e
LP
4435
4436 assert(c);
4437 assert(f);
4438
4c940960 4439 prefix = strempty(prefix);
63c372cb 4440 prefix2 = strjoina(prefix, "\t");
44d8db9e 4441
9e2f7c11 4442 cmd = exec_command_line(c->argv);
44d8db9e
LP
4443 fprintf(f,
4444 "%sCommand Line: %s\n",
4445 prefix, cmd ? cmd : strerror(ENOMEM));
4446
9fb86720 4447 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
4448}
4449
4450void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4451 assert(f);
4452
4c940960 4453 prefix = strempty(prefix);
44d8db9e
LP
4454
4455 LIST_FOREACH(command, c, c)
4456 exec_command_dump(c, f, prefix);
4457}
94f04347 4458
a6a80b4f
LP
4459void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4460 ExecCommand *end;
4461
4462 assert(l);
4463 assert(e);
4464
4465 if (*l) {
35b8ca3a 4466 /* It's kind of important, that we keep the order here */
71fda00f
LP
4467 LIST_FIND_TAIL(command, *l, end);
4468 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
4469 } else
4470 *l = e;
4471}
4472
26fd040d
LP
4473int exec_command_set(ExecCommand *c, const char *path, ...) {
4474 va_list ap;
4475 char **l, *p;
4476
4477 assert(c);
4478 assert(path);
4479
4480 va_start(ap, path);
4481 l = strv_new_ap(path, ap);
4482 va_end(ap);
4483
4484 if (!l)
4485 return -ENOMEM;
4486
250a918d
LP
4487 p = strdup(path);
4488 if (!p) {
26fd040d
LP
4489 strv_free(l);
4490 return -ENOMEM;
4491 }
4492
4493 free(c->path);
4494 c->path = p;
4495
130d3d22 4496 return strv_free_and_replace(c->argv, l);
26fd040d
LP
4497}
4498
86b23b07 4499int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 4500 _cleanup_strv_free_ char **l = NULL;
86b23b07 4501 va_list ap;
86b23b07
JS
4502 int r;
4503
4504 assert(c);
4505 assert(path);
4506
4507 va_start(ap, path);
4508 l = strv_new_ap(path, ap);
4509 va_end(ap);
4510
4511 if (!l)
4512 return -ENOMEM;
4513
e287086b 4514 r = strv_extend_strv(&c->argv, l, false);
e63ff941 4515 if (r < 0)
86b23b07 4516 return r;
86b23b07
JS
4517
4518 return 0;
4519}
4520
e8a565cb
YW
4521static void *remove_tmpdir_thread(void *p) {
4522 _cleanup_free_ char *path = p;
86b23b07 4523
e8a565cb
YW
4524 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
4525 return NULL;
4526}
4527
4528static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
4529 int r;
4530
4531 if (!rt)
4532 return NULL;
4533
4534 if (rt->manager)
4535 (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
4536
4537 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
4538 if (destroy && rt->tmp_dir) {
4539 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
4540
4541 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4542 if (r < 0) {
4543 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
4544 free(rt->tmp_dir);
4545 }
4546
4547 rt->tmp_dir = NULL;
4548 }
613b411c 4549
e8a565cb
YW
4550 if (destroy && rt->var_tmp_dir) {
4551 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
4552
4553 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4554 if (r < 0) {
4555 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
4556 free(rt->var_tmp_dir);
4557 }
4558
4559 rt->var_tmp_dir = NULL;
4560 }
4561
4562 rt->id = mfree(rt->id);
4563 rt->tmp_dir = mfree(rt->tmp_dir);
4564 rt->var_tmp_dir = mfree(rt->var_tmp_dir);
4565 safe_close_pair(rt->netns_storage_socket);
4566 return mfree(rt);
4567}
4568
4569static void exec_runtime_freep(ExecRuntime **rt) {
613b411c 4570 if (*rt)
e8a565cb
YW
4571 (void) exec_runtime_free(*rt, false);
4572}
4573
4574static int exec_runtime_allocate(ExecRuntime **rt) {
4575 assert(rt);
613b411c
LP
4576
4577 *rt = new0(ExecRuntime, 1);
f146f5e1 4578 if (!*rt)
613b411c
LP
4579 return -ENOMEM;
4580
613b411c 4581 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
613b411c
LP
4582 return 0;
4583}
4584
e8a565cb
YW
4585static int exec_runtime_add(
4586 Manager *m,
4587 const char *id,
4588 const char *tmp_dir,
4589 const char *var_tmp_dir,
4590 const int netns_storage_socket[2],
4591 ExecRuntime **ret) {
4592
4593 _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
613b411c
LP
4594 int r;
4595
e8a565cb 4596 assert(m);
613b411c
LP
4597 assert(id);
4598
e8a565cb
YW
4599 r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
4600 if (r < 0)
4601 return r;
613b411c 4602
e8a565cb 4603 r = exec_runtime_allocate(&rt);
613b411c
LP
4604 if (r < 0)
4605 return r;
4606
e8a565cb
YW
4607 rt->id = strdup(id);
4608 if (!rt->id)
4609 return -ENOMEM;
4610
4611 if (tmp_dir) {
4612 rt->tmp_dir = strdup(tmp_dir);
4613 if (!rt->tmp_dir)
4614 return -ENOMEM;
4615
4616 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
4617 assert(var_tmp_dir);
4618 rt->var_tmp_dir = strdup(var_tmp_dir);
4619 if (!rt->var_tmp_dir)
4620 return -ENOMEM;
4621 }
4622
4623 if (netns_storage_socket) {
4624 rt->netns_storage_socket[0] = netns_storage_socket[0];
4625 rt->netns_storage_socket[1] = netns_storage_socket[1];
613b411c
LP
4626 }
4627
e8a565cb
YW
4628 r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
4629 if (r < 0)
4630 return r;
4631
4632 rt->manager = m;
4633
4634 if (ret)
4635 *ret = rt;
4636
4637 /* do not remove created ExecRuntime object when the operation succeeds. */
4638 rt = NULL;
4639 return 0;
4640}
4641
4642static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
4643 _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
4644 _cleanup_close_pair_ int netns_storage_socket[2] = {-1, -1};
4645 int r;
4646
4647 assert(m);
4648 assert(c);
4649 assert(id);
4650
4651 /* It is not necessary to create ExecRuntime object. */
4652 if (!c->private_network && !c->private_tmp)
4653 return 0;
4654
4655 if (c->private_tmp) {
4656 r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
613b411c
LP
4657 if (r < 0)
4658 return r;
4659 }
4660
e8a565cb
YW
4661 if (c->private_network) {
4662 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
4663 return -errno;
4664 }
4665
4666 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
4667 if (r < 0)
4668 return r;
4669
4670 /* Avoid cleanup */
4671 netns_storage_socket[0] = -1;
4672 netns_storage_socket[1] = -1;
613b411c
LP
4673 return 1;
4674}
4675
e8a565cb
YW
4676int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
4677 ExecRuntime *rt;
4678 int r;
613b411c 4679
e8a565cb
YW
4680 assert(m);
4681 assert(id);
4682 assert(ret);
4683
4684 rt = hashmap_get(m->exec_runtime_by_id, id);
4685 if (rt)
4686 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
4687 goto ref;
4688
4689 if (!create)
4690 return 0;
4691
4692 /* If not found, then create a new object. */
4693 r = exec_runtime_make(m, c, id, &rt);
4694 if (r <= 0)
4695 /* When r == 0, it is not necessary to create ExecRuntime object. */
4696 return r;
613b411c 4697
e8a565cb
YW
4698ref:
4699 /* increment reference counter. */
4700 rt->n_ref++;
4701 *ret = rt;
4702 return 1;
4703}
613b411c 4704
e8a565cb
YW
4705ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
4706 if (!rt)
613b411c
LP
4707 return NULL;
4708
e8a565cb 4709 assert(rt->n_ref > 0);
613b411c 4710
e8a565cb
YW
4711 rt->n_ref--;
4712 if (rt->n_ref > 0)
f2341e0a
LP
4713 return NULL;
4714
e8a565cb 4715 return exec_runtime_free(rt, destroy);
613b411c
LP
4716}
4717
e8a565cb
YW
4718int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
4719 ExecRuntime *rt;
4720 Iterator i;
4721
4722 assert(m);
613b411c
LP
4723 assert(f);
4724 assert(fds);
4725
e8a565cb
YW
4726 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
4727 fprintf(f, "exec-runtime=%s", rt->id);
613b411c 4728
e8a565cb
YW
4729 if (rt->tmp_dir)
4730 fprintf(f, " tmp-dir=%s", rt->tmp_dir);
613b411c 4731
e8a565cb
YW
4732 if (rt->var_tmp_dir)
4733 fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
613b411c 4734
e8a565cb
YW
4735 if (rt->netns_storage_socket[0] >= 0) {
4736 int copy;
613b411c 4737
e8a565cb
YW
4738 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4739 if (copy < 0)
4740 return copy;
613b411c 4741
e8a565cb
YW
4742 fprintf(f, " netns-socket-0=%i", copy);
4743 }
613b411c 4744
e8a565cb
YW
4745 if (rt->netns_storage_socket[1] >= 0) {
4746 int copy;
613b411c 4747
e8a565cb
YW
4748 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4749 if (copy < 0)
4750 return copy;
613b411c 4751
e8a565cb
YW
4752 fprintf(f, " netns-socket-1=%i", copy);
4753 }
4754
4755 fputc('\n', f);
613b411c
LP
4756 }
4757
4758 return 0;
4759}
4760
e8a565cb
YW
4761int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
4762 _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
4763 ExecRuntime *rt;
613b411c
LP
4764 int r;
4765
e8a565cb
YW
4766 /* This is for the migration from old (v237 or earlier) deserialization text.
4767 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
4768 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
4769 * so or not from the serialized text, then we always creates a new object owned by this. */
4770
4771 assert(u);
613b411c
LP
4772 assert(key);
4773 assert(value);
4774
e8a565cb
YW
4775 /* Manager manages ExecRuntime objects by the unit id.
4776 * So, we omit the serialized text when the unit does not have id (yet?)... */
4777 if (isempty(u->id)) {
4778 log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
4779 return 0;
4780 }
613b411c 4781
e8a565cb
YW
4782 r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
4783 if (r < 0) {
4784 log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
4785 return 0;
4786 }
4787
4788 rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
4789 if (!rt) {
4790 r = exec_runtime_allocate(&rt_create);
613b411c 4791 if (r < 0)
f2341e0a 4792 return log_oom();
613b411c 4793
e8a565cb
YW
4794 rt_create->id = strdup(u->id);
4795 if (!rt_create->id)
4796 return log_oom();
4797
4798 rt = rt_create;
4799 }
4800
4801 if (streq(key, "tmp-dir")) {
4802 char *copy;
4803
613b411c
LP
4804 copy = strdup(value);
4805 if (!copy)
4806 return log_oom();
4807
e8a565cb 4808 free_and_replace(rt->tmp_dir, copy);
613b411c
LP
4809
4810 } else if (streq(key, "var-tmp-dir")) {
4811 char *copy;
4812
613b411c
LP
4813 copy = strdup(value);
4814 if (!copy)
4815 return log_oom();
4816
e8a565cb 4817 free_and_replace(rt->var_tmp_dir, copy);
613b411c
LP
4818
4819 } else if (streq(key, "netns-socket-0")) {
4820 int fd;
4821
e8a565cb 4822 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 4823 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 4824 return 0;
613b411c 4825 }
e8a565cb
YW
4826
4827 safe_close(rt->netns_storage_socket[0]);
4828 rt->netns_storage_socket[0] = fdset_remove(fds, fd);
4829
613b411c
LP
4830 } else if (streq(key, "netns-socket-1")) {
4831 int fd;
4832
e8a565cb 4833 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 4834 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 4835 return 0;
613b411c 4836 }
e8a565cb
YW
4837
4838 safe_close(rt->netns_storage_socket[1]);
4839 rt->netns_storage_socket[1] = fdset_remove(fds, fd);
613b411c
LP
4840 } else
4841 return 0;
4842
e8a565cb
YW
4843 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
4844 if (rt_create) {
4845 r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
4846 if (r < 0) {
4847 log_unit_debug_errno(u, r, "Failed to put runtime paramter to manager's storage: %m");
4848 return 0;
4849 }
613b411c 4850
e8a565cb 4851 rt_create->manager = u->manager;
613b411c 4852
e8a565cb
YW
4853 /* Avoid cleanup */
4854 rt_create = NULL;
4855 }
98b47d54 4856
e8a565cb
YW
4857 return 1;
4858}
613b411c 4859
e8a565cb
YW
4860void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
4861 char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
4862 int r, fd0 = -1, fd1 = -1;
4863 const char *p, *v = value;
4864 size_t n;
613b411c 4865
e8a565cb
YW
4866 assert(m);
4867 assert(value);
4868 assert(fds);
98b47d54 4869
e8a565cb
YW
4870 n = strcspn(v, " ");
4871 id = strndupa(v, n);
4872 if (v[n] != ' ')
4873 goto finalize;
4874 p = v + n + 1;
4875
4876 v = startswith(p, "tmp-dir=");
4877 if (v) {
4878 n = strcspn(v, " ");
4879 tmp_dir = strndupa(v, n);
4880 if (v[n] != ' ')
4881 goto finalize;
4882 p = v + n + 1;
4883 }
4884
4885 v = startswith(p, "var-tmp-dir=");
4886 if (v) {
4887 n = strcspn(v, " ");
4888 var_tmp_dir = strndupa(v, n);
4889 if (v[n] != ' ')
4890 goto finalize;
4891 p = v + n + 1;
4892 }
4893
4894 v = startswith(p, "netns-socket-0=");
4895 if (v) {
4896 char *buf;
4897
4898 n = strcspn(v, " ");
4899 buf = strndupa(v, n);
4900 if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
4901 log_debug("Unable to process exec-runtime netns fd specification.");
4902 return;
98b47d54 4903 }
e8a565cb
YW
4904 fd0 = fdset_remove(fds, fd0);
4905 if (v[n] != ' ')
4906 goto finalize;
4907 p = v + n + 1;
613b411c
LP
4908 }
4909
e8a565cb
YW
4910 v = startswith(p, "netns-socket-1=");
4911 if (v) {
4912 char *buf;
98b47d54 4913
e8a565cb
YW
4914 n = strcspn(v, " ");
4915 buf = strndupa(v, n);
4916 if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
4917 log_debug("Unable to process exec-runtime netns fd specification.");
4918 return;
98b47d54 4919 }
e8a565cb
YW
4920 fd1 = fdset_remove(fds, fd1);
4921 }
98b47d54 4922
e8a565cb
YW
4923finalize:
4924
4925 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
4926 if (r < 0) {
4927 log_debug_errno(r, "Failed to add exec-runtime: %m");
4928 return;
613b411c 4929 }
e8a565cb 4930}
613b411c 4931
e8a565cb
YW
4932void exec_runtime_vacuum(Manager *m) {
4933 ExecRuntime *rt;
4934 Iterator i;
4935
4936 assert(m);
4937
4938 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
4939
4940 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
4941 if (rt->n_ref > 0)
4942 continue;
4943
4944 (void) exec_runtime_free(rt, false);
4945 }
613b411c
LP
4946}
4947
80876c20
LP
4948static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
4949 [EXEC_INPUT_NULL] = "null",
4950 [EXEC_INPUT_TTY] = "tty",
4951 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 4952 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
4953 [EXEC_INPUT_SOCKET] = "socket",
4954 [EXEC_INPUT_NAMED_FD] = "fd",
08f3be7a 4955 [EXEC_INPUT_DATA] = "data",
2038c3f5 4956 [EXEC_INPUT_FILE] = "file",
80876c20
LP
4957};
4958
8a0867d6
LP
4959DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4960
94f04347 4961static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 4962 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 4963 [EXEC_OUTPUT_NULL] = "null",
80876c20 4964 [EXEC_OUTPUT_TTY] = "tty",
94f04347 4965 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 4966 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 4967 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 4968 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
4969 [EXEC_OUTPUT_JOURNAL] = "journal",
4970 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
4971 [EXEC_OUTPUT_SOCKET] = "socket",
4972 [EXEC_OUTPUT_NAMED_FD] = "fd",
2038c3f5 4973 [EXEC_OUTPUT_FILE] = "file",
94f04347
LP
4974};
4975
4976DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
4977
4978static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4979 [EXEC_UTMP_INIT] = "init",
4980 [EXEC_UTMP_LOGIN] = "login",
4981 [EXEC_UTMP_USER] = "user",
4982};
4983
4984DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
4985
4986static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
4987 [EXEC_PRESERVE_NO] = "no",
4988 [EXEC_PRESERVE_YES] = "yes",
4989 [EXEC_PRESERVE_RESTART] = "restart",
4990};
4991
4992DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 4993
72fd1768 4994static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
4995 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
4996 [EXEC_DIRECTORY_STATE] = "StateDirectory",
4997 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
4998 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
4999 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
5000};
5001
5002DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445
LP
5003
5004static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
5005 [EXEC_KEYRING_INHERIT] = "inherit",
5006 [EXEC_KEYRING_PRIVATE] = "private",
5007 [EXEC_KEYRING_SHARED] = "shared",
5008};
5009
5010DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);