]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
tree-wide: port various places over to use chmod_and_chown()
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
a7334b09 2
034c6ed7
LP
3#include <errno.h>
4#include <fcntl.h>
8dd4c05b
LP
5#include <glob.h>
6#include <grp.h>
7#include <poll.h>
309bff19 8#include <signal.h>
8dd4c05b 9#include <string.h>
19c0b0b9 10#include <sys/capability.h>
d251207d 11#include <sys/eventfd.h>
f3e43635 12#include <sys/mman.h>
8dd4c05b 13#include <sys/personality.h>
94f04347 14#include <sys/prctl.h>
d2ffa389 15#include <sys/shm.h>
8dd4c05b 16#include <sys/socket.h>
451a074f 17#include <sys/stat.h>
d2ffa389 18#include <sys/types.h>
8dd4c05b
LP
19#include <sys/un.h>
20#include <unistd.h>
023a4f67 21#include <utmpx.h>
5cb5a6ff 22
349cc4a5 23#if HAVE_PAM
5b6319dc
LP
24#include <security/pam_appl.h>
25#endif
26
349cc4a5 27#if HAVE_SELINUX
7b52a628
MS
28#include <selinux/selinux.h>
29#endif
30
349cc4a5 31#if HAVE_SECCOMP
17df7223
LP
32#include <seccomp.h>
33#endif
34
349cc4a5 35#if HAVE_APPARMOR
eef65bf3
MS
36#include <sys/apparmor.h>
37#endif
38
24882e06 39#include "sd-messages.h"
8dd4c05b
LP
40
41#include "af-list.h"
b5efdb8a 42#include "alloc-util.h"
349cc4a5 43#if HAVE_APPARMOR
3ffd4af2
LP
44#include "apparmor-util.h"
45#endif
8dd4c05b
LP
46#include "async.h"
47#include "barrier.h"
8dd4c05b 48#include "cap-list.h"
430f0182 49#include "capability-util.h"
a1164ae3 50#include "chown-recursive.h"
da681e1b 51#include "cpu-set-util.h"
f6a6225e 52#include "def.h"
686d13b9 53#include "env-file.h"
4d1a6904 54#include "env-util.h"
17df7223 55#include "errno-list.h"
3ffd4af2 56#include "execute.h"
8dd4c05b 57#include "exit-status.h"
3ffd4af2 58#include "fd-util.h"
f97b34a6 59#include "format-util.h"
f4f15635 60#include "fs-util.h"
7d50b32a 61#include "glob-util.h"
c004493c 62#include "io-util.h"
8dd4c05b 63#include "ioprio.h"
a1164ae3 64#include "label.h"
8dd4c05b
LP
65#include "log.h"
66#include "macro.h"
e8a565cb 67#include "manager.h"
0a970718 68#include "memory-util.h"
8dd4c05b
LP
69#include "missing.h"
70#include "mkdir.h"
71#include "namespace.h"
6bedfcbb 72#include "parse-util.h"
8dd4c05b 73#include "path-util.h"
0b452006 74#include "process-util.h"
78f22b97 75#include "rlimit-util.h"
8dd4c05b 76#include "rm-rf.h"
349cc4a5 77#if HAVE_SECCOMP
3ffd4af2
LP
78#include "seccomp-util.h"
79#endif
07d46372 80#include "securebits-util.h"
8dd4c05b 81#include "selinux-util.h"
24882e06 82#include "signal-util.h"
8dd4c05b 83#include "smack-util.h"
57b7a260 84#include "socket-util.h"
fd63e712 85#include "special.h"
949befd3 86#include "stat-util.h"
8b43440b 87#include "string-table.h"
07630cea 88#include "string-util.h"
8dd4c05b 89#include "strv.h"
7ccbd1ae 90#include "syslog-util.h"
8dd4c05b 91#include "terminal-util.h"
566b7d23 92#include "umask-util.h"
8dd4c05b 93#include "unit.h"
b1d4f8e1 94#include "user-util.h"
8dd4c05b 95#include "utmp-wtmp.h"
5cb5a6ff 96
e056b01d 97#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 98#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 99
02a51aba
LP
100/* This assumes there is a 'tty' group */
101#define TTY_MODE 0620
102
531dca78
LP
103#define SNDBUF_SIZE (8*1024*1024)
104
da6053d0 105static int shift_fds(int fds[], size_t n_fds) {
034c6ed7
LP
106 int start, restart_from;
107
108 if (n_fds <= 0)
109 return 0;
110
a0d40ac5
LP
111 /* Modifies the fds array! (sorts it) */
112
034c6ed7
LP
113 assert(fds);
114
115 start = 0;
116 for (;;) {
117 int i;
118
119 restart_from = -1;
120
121 for (i = start; i < (int) n_fds; i++) {
122 int nfd;
123
124 /* Already at right index? */
125 if (fds[i] == i+3)
126 continue;
127
3cc2aff1
LP
128 nfd = fcntl(fds[i], F_DUPFD, i + 3);
129 if (nfd < 0)
034c6ed7
LP
130 return -errno;
131
03e334a1 132 safe_close(fds[i]);
034c6ed7
LP
133 fds[i] = nfd;
134
135 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 136 * let's remember that and try again from here */
034c6ed7
LP
137 if (nfd != i+3 && restart_from < 0)
138 restart_from = i;
139 }
140
141 if (restart_from < 0)
142 break;
143
144 start = restart_from;
145 }
146
147 return 0;
148}
149
25b583d7 150static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
da6053d0 151 size_t i, n_fds;
e2c76839 152 int r;
47a71eed 153
25b583d7 154 n_fds = n_socket_fds + n_storage_fds;
47a71eed
LP
155 if (n_fds <= 0)
156 return 0;
157
158 assert(fds);
159
9b141911
FB
160 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
161 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
162
163 for (i = 0; i < n_fds; i++) {
47a71eed 164
9b141911
FB
165 if (i < n_socket_fds) {
166 r = fd_nonblock(fds[i], nonblock);
167 if (r < 0)
168 return r;
169 }
47a71eed 170
451a074f
LP
171 /* We unconditionally drop FD_CLOEXEC from the fds,
172 * since after all we want to pass these fds to our
173 * children */
47a71eed 174
3cc2aff1
LP
175 r = fd_cloexec(fds[i], false);
176 if (r < 0)
e2c76839 177 return r;
47a71eed
LP
178 }
179
180 return 0;
181}
182
1e22b5cd 183static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
184 assert(context);
185
1e22b5cd
LP
186 if (context->stdio_as_fds)
187 return NULL;
188
80876c20
LP
189 if (context->tty_path)
190 return context->tty_path;
191
192 return "/dev/console";
193}
194
1e22b5cd
LP
195static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
196 const char *path;
197
6ea832a2
LP
198 assert(context);
199
1e22b5cd 200 path = exec_context_tty_path(context);
6ea832a2 201
1e22b5cd
LP
202 if (context->tty_vhangup) {
203 if (p && p->stdin_fd >= 0)
204 (void) terminal_vhangup_fd(p->stdin_fd);
205 else if (path)
206 (void) terminal_vhangup(path);
207 }
6ea832a2 208
1e22b5cd
LP
209 if (context->tty_reset) {
210 if (p && p->stdin_fd >= 0)
211 (void) reset_terminal_fd(p->stdin_fd, true);
212 else if (path)
213 (void) reset_terminal(path);
214 }
215
216 if (context->tty_vt_disallocate && path)
217 (void) vt_disallocate(path);
6ea832a2
LP
218}
219
6af760f3
LP
220static bool is_terminal_input(ExecInput i) {
221 return IN_SET(i,
222 EXEC_INPUT_TTY,
223 EXEC_INPUT_TTY_FORCE,
224 EXEC_INPUT_TTY_FAIL);
225}
226
3a1286b6 227static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
228 return IN_SET(o,
229 EXEC_OUTPUT_TTY,
230 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
231 EXEC_OUTPUT_KMSG_AND_CONSOLE,
232 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
233}
234
aac8c0c3
LP
235static bool is_syslog_output(ExecOutput o) {
236 return IN_SET(o,
237 EXEC_OUTPUT_SYSLOG,
238 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
239}
240
241static bool is_kmsg_output(ExecOutput o) {
242 return IN_SET(o,
243 EXEC_OUTPUT_KMSG,
244 EXEC_OUTPUT_KMSG_AND_CONSOLE);
245}
246
6af760f3
LP
247static bool exec_context_needs_term(const ExecContext *c) {
248 assert(c);
249
250 /* Return true if the execution context suggests we should set $TERM to something useful. */
251
252 if (is_terminal_input(c->std_input))
253 return true;
254
255 if (is_terminal_output(c->std_output))
256 return true;
257
258 if (is_terminal_output(c->std_error))
259 return true;
260
261 return !!c->tty_path;
3a1286b6
MS
262}
263
80876c20 264static int open_null_as(int flags, int nfd) {
046a82c1 265 int fd;
071830ff 266
80876c20 267 assert(nfd >= 0);
071830ff 268
613b411c
LP
269 fd = open("/dev/null", flags|O_NOCTTY);
270 if (fd < 0)
071830ff
LP
271 return -errno;
272
046a82c1 273 return move_fd(fd, nfd, false);
071830ff
LP
274}
275
524daa8c 276static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 277 static const union sockaddr_union sa = {
b92bea5d
ZJS
278 .un.sun_family = AF_UNIX,
279 .un.sun_path = "/run/systemd/journal/stdout",
280 };
524daa8c
ZJS
281 uid_t olduid = UID_INVALID;
282 gid_t oldgid = GID_INVALID;
283 int r;
284
cad93f29 285 if (gid_is_valid(gid)) {
524daa8c
ZJS
286 oldgid = getgid();
287
92a17af9 288 if (setegid(gid) < 0)
524daa8c
ZJS
289 return -errno;
290 }
291
cad93f29 292 if (uid_is_valid(uid)) {
524daa8c
ZJS
293 olduid = getuid();
294
92a17af9 295 if (seteuid(uid) < 0) {
524daa8c
ZJS
296 r = -errno;
297 goto restore_gid;
298 }
299 }
300
92a17af9 301 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
302
303 /* If we fail to restore the uid or gid, things will likely
304 fail later on. This should only happen if an LSM interferes. */
305
cad93f29 306 if (uid_is_valid(uid))
524daa8c
ZJS
307 (void) seteuid(olduid);
308
309 restore_gid:
cad93f29 310 if (gid_is_valid(gid))
524daa8c
ZJS
311 (void) setegid(oldgid);
312
313 return r;
314}
315
fd1f9c89 316static int connect_logger_as(
34cf6c43 317 const Unit *unit,
fd1f9c89 318 const ExecContext *context,
af635cf3 319 const ExecParameters *params,
fd1f9c89
LP
320 ExecOutput output,
321 const char *ident,
fd1f9c89
LP
322 int nfd,
323 uid_t uid,
324 gid_t gid) {
325
2ac1ff68
EV
326 _cleanup_close_ int fd = -1;
327 int r;
071830ff
LP
328
329 assert(context);
af635cf3 330 assert(params);
80876c20
LP
331 assert(output < _EXEC_OUTPUT_MAX);
332 assert(ident);
333 assert(nfd >= 0);
071830ff 334
54fe0cdb
LP
335 fd = socket(AF_UNIX, SOCK_STREAM, 0);
336 if (fd < 0)
80876c20 337 return -errno;
071830ff 338
524daa8c
ZJS
339 r = connect_journal_socket(fd, uid, gid);
340 if (r < 0)
341 return r;
071830ff 342
2ac1ff68 343 if (shutdown(fd, SHUT_RD) < 0)
80876c20 344 return -errno;
071830ff 345
fd1f9c89 346 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 347
2ac1ff68 348 if (dprintf(fd,
62bca2c6 349 "%s\n"
80876c20
LP
350 "%s\n"
351 "%i\n"
54fe0cdb
LP
352 "%i\n"
353 "%i\n"
354 "%i\n"
4f4a1dbf 355 "%i\n",
c867611e 356 context->syslog_identifier ?: ident,
af635cf3 357 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
358 context->syslog_priority,
359 !!context->syslog_level_prefix,
aac8c0c3
LP
360 is_syslog_output(output),
361 is_kmsg_output(output),
2ac1ff68
EV
362 is_terminal_output(output)) < 0)
363 return -errno;
80876c20 364
2ac1ff68 365 return move_fd(TAKE_FD(fd), nfd, false);
80876c20 366}
2ac1ff68 367
3a274a21 368static int open_terminal_as(const char *path, int flags, int nfd) {
046a82c1 369 int fd;
071830ff 370
80876c20
LP
371 assert(path);
372 assert(nfd >= 0);
fd1f9c89 373
3a274a21 374 fd = open_terminal(path, flags | O_NOCTTY);
3cc2aff1 375 if (fd < 0)
80876c20 376 return fd;
071830ff 377
046a82c1 378 return move_fd(fd, nfd, false);
80876c20 379}
071830ff 380
2038c3f5 381static int acquire_path(const char *path, int flags, mode_t mode) {
15a3e96f
LP
382 union sockaddr_union sa = {};
383 _cleanup_close_ int fd = -1;
384 int r, salen;
071830ff 385
80876c20 386 assert(path);
071830ff 387
2038c3f5
LP
388 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
389 flags |= O_CREAT;
390
391 fd = open(path, flags|O_NOCTTY, mode);
392 if (fd >= 0)
15a3e96f 393 return TAKE_FD(fd);
071830ff 394
2038c3f5
LP
395 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
396 return -errno;
15a3e96f 397 if (strlen(path) >= sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
2038c3f5
LP
398 return -ENXIO;
399
400 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
401
402 fd = socket(AF_UNIX, SOCK_STREAM, 0);
403 if (fd < 0)
404 return -errno;
405
15a3e96f
LP
406 salen = sockaddr_un_set_path(&sa.un, path);
407 if (salen < 0)
408 return salen;
409
410 if (connect(fd, &sa.sa, salen) < 0)
2038c3f5
LP
411 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
412 * indication that his wasn't an AF_UNIX socket after all */
071830ff 413
2038c3f5
LP
414 if ((flags & O_ACCMODE) == O_RDONLY)
415 r = shutdown(fd, SHUT_WR);
416 else if ((flags & O_ACCMODE) == O_WRONLY)
417 r = shutdown(fd, SHUT_RD);
418 else
15a3e96f
LP
419 return TAKE_FD(fd);
420 if (r < 0)
2038c3f5 421 return -errno;
2038c3f5 422
15a3e96f 423 return TAKE_FD(fd);
80876c20 424}
071830ff 425
08f3be7a
LP
426static int fixup_input(
427 const ExecContext *context,
428 int socket_fd,
429 bool apply_tty_stdin) {
430
431 ExecInput std_input;
432
433 assert(context);
434
435 std_input = context->std_input;
1e3ad081
LP
436
437 if (is_terminal_input(std_input) && !apply_tty_stdin)
438 return EXEC_INPUT_NULL;
071830ff 439
03fd9c49 440 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
441 return EXEC_INPUT_NULL;
442
08f3be7a
LP
443 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
444 return EXEC_INPUT_NULL;
445
03fd9c49 446 return std_input;
4f2d528d
LP
447}
448
03fd9c49 449static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 450
03fd9c49 451 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
452 return EXEC_OUTPUT_INHERIT;
453
03fd9c49 454 return std_output;
4f2d528d
LP
455}
456
a34ceba6
LP
457static int setup_input(
458 const ExecContext *context,
459 const ExecParameters *params,
52c239d7
LB
460 int socket_fd,
461 int named_iofds[3]) {
a34ceba6 462
4f2d528d
LP
463 ExecInput i;
464
465 assert(context);
a34ceba6
LP
466 assert(params);
467
468 if (params->stdin_fd >= 0) {
469 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
470 return -errno;
471
472 /* Try to make this the controlling tty, if it is a tty, and reset it */
1fb0682e
LP
473 if (isatty(STDIN_FILENO)) {
474 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
475 (void) reset_terminal_fd(STDIN_FILENO, true);
476 }
a34ceba6
LP
477
478 return STDIN_FILENO;
479 }
4f2d528d 480
08f3be7a 481 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
482
483 switch (i) {
071830ff 484
80876c20
LP
485 case EXEC_INPUT_NULL:
486 return open_null_as(O_RDONLY, STDIN_FILENO);
487
488 case EXEC_INPUT_TTY:
489 case EXEC_INPUT_TTY_FORCE:
490 case EXEC_INPUT_TTY_FAIL: {
046a82c1 491 int fd;
071830ff 492
1e22b5cd 493 fd = acquire_terminal(exec_context_tty_path(context),
8854d795
LP
494 i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
495 i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
496 ACQUIRE_TERMINAL_WAIT,
3a43da28 497 USEC_INFINITY);
970edce6 498 if (fd < 0)
80876c20
LP
499 return fd;
500
046a82c1 501 return move_fd(fd, STDIN_FILENO, false);
80876c20
LP
502 }
503
4f2d528d 504 case EXEC_INPUT_SOCKET:
e75a9ed1
LP
505 assert(socket_fd >= 0);
506
4f2d528d
LP
507 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
508
52c239d7 509 case EXEC_INPUT_NAMED_FD:
e75a9ed1
LP
510 assert(named_iofds[STDIN_FILENO] >= 0);
511
52c239d7
LB
512 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
513 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
514
08f3be7a
LP
515 case EXEC_INPUT_DATA: {
516 int fd;
517
518 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
519 if (fd < 0)
520 return fd;
521
522 return move_fd(fd, STDIN_FILENO, false);
523 }
524
2038c3f5
LP
525 case EXEC_INPUT_FILE: {
526 bool rw;
527 int fd;
528
529 assert(context->stdio_file[STDIN_FILENO]);
530
531 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
532 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
533
534 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
535 if (fd < 0)
536 return fd;
537
538 return move_fd(fd, STDIN_FILENO, false);
539 }
540
80876c20
LP
541 default:
542 assert_not_reached("Unknown input type");
543 }
544}
545
41fc585a
LP
546static bool can_inherit_stderr_from_stdout(
547 const ExecContext *context,
548 ExecOutput o,
549 ExecOutput e) {
550
551 assert(context);
552
553 /* Returns true, if given the specified STDERR and STDOUT output we can directly dup() the stdout fd to the
554 * stderr fd */
555
556 if (e == EXEC_OUTPUT_INHERIT)
557 return true;
558 if (e != o)
559 return false;
560
561 if (e == EXEC_OUTPUT_NAMED_FD)
562 return streq_ptr(context->stdio_fdname[STDOUT_FILENO], context->stdio_fdname[STDERR_FILENO]);
563
564 if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND))
565 return streq_ptr(context->stdio_file[STDOUT_FILENO], context->stdio_file[STDERR_FILENO]);
566
567 return true;
568}
569
a34ceba6 570static int setup_output(
34cf6c43 571 const Unit *unit,
a34ceba6
LP
572 const ExecContext *context,
573 const ExecParameters *params,
574 int fileno,
575 int socket_fd,
52c239d7 576 int named_iofds[3],
a34ceba6 577 const char *ident,
7bce046b
LP
578 uid_t uid,
579 gid_t gid,
580 dev_t *journal_stream_dev,
581 ino_t *journal_stream_ino) {
a34ceba6 582
4f2d528d
LP
583 ExecOutput o;
584 ExecInput i;
47c1d80d 585 int r;
4f2d528d 586
f2341e0a 587 assert(unit);
80876c20 588 assert(context);
a34ceba6 589 assert(params);
80876c20 590 assert(ident);
7bce046b
LP
591 assert(journal_stream_dev);
592 assert(journal_stream_ino);
80876c20 593
a34ceba6
LP
594 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
595
596 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
597 return -errno;
598
599 return STDOUT_FILENO;
600 }
601
602 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
603 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
604 return -errno;
605
606 return STDERR_FILENO;
607 }
608
08f3be7a 609 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 610 o = fixup_output(context->std_output, socket_fd);
4f2d528d 611
eb17e935
MS
612 if (fileno == STDERR_FILENO) {
613 ExecOutput e;
614 e = fixup_output(context->std_error, socket_fd);
80876c20 615
eb17e935
MS
616 /* This expects the input and output are already set up */
617
618 /* Don't change the stderr file descriptor if we inherit all
619 * the way and are not on a tty */
620 if (e == EXEC_OUTPUT_INHERIT &&
621 o == EXEC_OUTPUT_INHERIT &&
622 i == EXEC_INPUT_NULL &&
623 !is_terminal_input(context->std_input) &&
624 getppid () != 1)
625 return fileno;
626
627 /* Duplicate from stdout if possible */
41fc585a 628 if (can_inherit_stderr_from_stdout(context, o, e))
eb17e935 629 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 630
eb17e935 631 o = e;
80876c20 632
eb17e935 633 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
634 /* If input got downgraded, inherit the original value */
635 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 636 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 637
08f3be7a
LP
638 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
639 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
eb17e935 640 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 641
acb591e4
LP
642 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
643 if (getppid() != 1)
eb17e935 644 return fileno;
94f04347 645
eb17e935
MS
646 /* We need to open /dev/null here anew, to get the right access mode. */
647 return open_null_as(O_WRONLY, fileno);
071830ff 648 }
94f04347 649
eb17e935 650 switch (o) {
80876c20
LP
651
652 case EXEC_OUTPUT_NULL:
eb17e935 653 return open_null_as(O_WRONLY, fileno);
80876c20
LP
654
655 case EXEC_OUTPUT_TTY:
4f2d528d 656 if (is_terminal_input(i))
eb17e935 657 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
658
659 /* We don't reset the terminal if this is just about output */
1e22b5cd 660 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
661
662 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 663 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 664 case EXEC_OUTPUT_KMSG:
28dbc1e8 665 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
666 case EXEC_OUTPUT_JOURNAL:
667 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 668 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 669 if (r < 0) {
82677ae4 670 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 671 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
672 } else {
673 struct stat st;
674
675 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
676 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
677 * services to detect whether they are connected to the journal or not.
678 *
679 * If both stdout and stderr are connected to a stream then let's make sure to store the data
680 * about STDERR as that's usually the best way to do logging. */
7bce046b 681
ab2116b1
LP
682 if (fstat(fileno, &st) >= 0 &&
683 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
684 *journal_stream_dev = st.st_dev;
685 *journal_stream_ino = st.st_ino;
686 }
47c1d80d
MS
687 }
688 return r;
4f2d528d
LP
689
690 case EXEC_OUTPUT_SOCKET:
691 assert(socket_fd >= 0);
e75a9ed1 692
eb17e935 693 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 694
52c239d7 695 case EXEC_OUTPUT_NAMED_FD:
e75a9ed1
LP
696 assert(named_iofds[fileno] >= 0);
697
52c239d7
LB
698 (void) fd_nonblock(named_iofds[fileno], false);
699 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
700
566b7d23
ZD
701 case EXEC_OUTPUT_FILE:
702 case EXEC_OUTPUT_FILE_APPEND: {
2038c3f5 703 bool rw;
566b7d23 704 int fd, flags;
2038c3f5
LP
705
706 assert(context->stdio_file[fileno]);
707
708 rw = context->std_input == EXEC_INPUT_FILE &&
709 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
710
711 if (rw)
712 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
713
566b7d23
ZD
714 flags = O_WRONLY;
715 if (o == EXEC_OUTPUT_FILE_APPEND)
716 flags |= O_APPEND;
717
718 fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
2038c3f5
LP
719 if (fd < 0)
720 return fd;
721
566b7d23 722 return move_fd(fd, fileno, 0);
2038c3f5
LP
723 }
724
94f04347 725 default:
80876c20 726 assert_not_reached("Unknown error type");
94f04347 727 }
071830ff
LP
728}
729
02a51aba 730static int chown_terminal(int fd, uid_t uid) {
4b3b5bc7 731 int r;
02a51aba
LP
732
733 assert(fd >= 0);
02a51aba 734
1ff74fb6 735 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
4b3b5bc7
LP
736 if (isatty(fd) < 1) {
737 if (IN_SET(errno, EINVAL, ENOTTY))
738 return 0; /* not a tty */
1ff74fb6 739
02a51aba 740 return -errno;
4b3b5bc7 741 }
02a51aba 742
4b3b5bc7
LP
743 /* This might fail. What matters are the results. */
744 r = fchmod_and_chown(fd, TTY_MODE, uid, -1);
745 if (r < 0)
746 return r;
02a51aba 747
4b3b5bc7 748 return 1;
02a51aba
LP
749}
750
7d5ceb64 751static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
752 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
753 int r;
80876c20 754
80876c20
LP
755 assert(_saved_stdin);
756 assert(_saved_stdout);
757
af6da548
LP
758 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
759 if (saved_stdin < 0)
760 return -errno;
80876c20 761
af6da548 762 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
763 if (saved_stdout < 0)
764 return -errno;
80876c20 765
8854d795 766 fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
3d18b167
LP
767 if (fd < 0)
768 return fd;
80876c20 769
af6da548
LP
770 r = chown_terminal(fd, getuid());
771 if (r < 0)
3d18b167 772 return r;
02a51aba 773
3d18b167
LP
774 r = reset_terminal_fd(fd, true);
775 if (r < 0)
776 return r;
80876c20 777
2b33ab09 778 r = rearrange_stdio(fd, fd, STDERR_FILENO);
3d18b167 779 fd = -1;
2b33ab09
LP
780 if (r < 0)
781 return r;
80876c20
LP
782
783 *_saved_stdin = saved_stdin;
784 *_saved_stdout = saved_stdout;
785
3d18b167 786 saved_stdin = saved_stdout = -1;
80876c20 787
3d18b167 788 return 0;
80876c20
LP
789}
790
63d77c92 791static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
792 assert(err < 0);
793
794 if (err == -ETIMEDOUT)
63d77c92 795 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
796 else {
797 errno = -err;
63d77c92 798 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
799 }
800}
801
63d77c92 802static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 803 _cleanup_close_ int fd = -1;
80876c20 804
3b20f877 805 assert(vc);
80876c20 806
7d5ceb64 807 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 808 if (fd < 0)
3b20f877 809 return;
80876c20 810
63d77c92 811 write_confirm_error_fd(err, fd, u);
af6da548 812}
80876c20 813
3d18b167 814static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 815 int r = 0;
80876c20 816
af6da548
LP
817 assert(saved_stdin);
818 assert(saved_stdout);
819
820 release_terminal();
821
822 if (*saved_stdin >= 0)
80876c20 823 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 824 r = -errno;
80876c20 825
af6da548 826 if (*saved_stdout >= 0)
80876c20 827 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 828 r = -errno;
80876c20 829
3d18b167
LP
830 *saved_stdin = safe_close(*saved_stdin);
831 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
832
833 return r;
834}
835
3b20f877
FB
836enum {
837 CONFIRM_PRETEND_FAILURE = -1,
838 CONFIRM_PRETEND_SUCCESS = 0,
839 CONFIRM_EXECUTE = 1,
840};
841
eedf223a 842static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 843 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 844 _cleanup_free_ char *e = NULL;
3b20f877 845 char c;
af6da548 846
3b20f877 847 /* For any internal errors, assume a positive response. */
7d5ceb64 848 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 849 if (r < 0) {
63d77c92 850 write_confirm_error(r, vc, u);
3b20f877
FB
851 return CONFIRM_EXECUTE;
852 }
af6da548 853
b0eb2944
FB
854 /* confirm_spawn might have been disabled while we were sleeping. */
855 if (manager_is_confirm_spawn_disabled(u->manager)) {
856 r = 1;
857 goto restore_stdio;
858 }
af6da548 859
2bcd3c26
FB
860 e = ellipsize(cmdline, 60, 100);
861 if (!e) {
862 log_oom();
863 r = CONFIRM_EXECUTE;
864 goto restore_stdio;
865 }
af6da548 866
d172b175 867 for (;;) {
539622bd 868 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 869 if (r < 0) {
63d77c92 870 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
871 r = CONFIRM_EXECUTE;
872 goto restore_stdio;
873 }
af6da548 874
d172b175 875 switch (c) {
b0eb2944
FB
876 case 'c':
877 printf("Resuming normal execution.\n");
878 manager_disable_confirm_spawn();
879 r = 1;
880 break;
dd6f9ac0
FB
881 case 'D':
882 unit_dump(u, stdout, " ");
883 continue; /* ask again */
d172b175
FB
884 case 'f':
885 printf("Failing execution.\n");
886 r = CONFIRM_PRETEND_FAILURE;
887 break;
888 case 'h':
b0eb2944
FB
889 printf(" c - continue, proceed without asking anymore\n"
890 " D - dump, show the state of the unit\n"
dd6f9ac0 891 " f - fail, don't execute the command and pretend it failed\n"
d172b175 892 " h - help\n"
eedf223a 893 " i - info, show a short summary of the unit\n"
56fde33a 894 " j - jobs, show jobs that are in progress\n"
d172b175
FB
895 " s - skip, don't execute the command and pretend it succeeded\n"
896 " y - yes, execute the command\n");
dd6f9ac0 897 continue; /* ask again */
eedf223a
FB
898 case 'i':
899 printf(" Description: %s\n"
900 " Unit: %s\n"
901 " Command: %s\n",
902 u->id, u->description, cmdline);
903 continue; /* ask again */
56fde33a
FB
904 case 'j':
905 manager_dump_jobs(u->manager, stdout, " ");
906 continue; /* ask again */
539622bd
FB
907 case 'n':
908 /* 'n' was removed in favor of 'f'. */
909 printf("Didn't understand 'n', did you mean 'f'?\n");
910 continue; /* ask again */
d172b175
FB
911 case 's':
912 printf("Skipping execution.\n");
913 r = CONFIRM_PRETEND_SUCCESS;
914 break;
915 case 'y':
916 r = CONFIRM_EXECUTE;
917 break;
918 default:
919 assert_not_reached("Unhandled choice");
920 }
3b20f877 921 break;
3b20f877 922 }
af6da548 923
3b20f877 924restore_stdio:
af6da548 925 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 926 return r;
80876c20
LP
927}
928
4d885bd3
DH
929static int get_fixed_user(const ExecContext *c, const char **user,
930 uid_t *uid, gid_t *gid,
931 const char **home, const char **shell) {
81a2b7ce 932 int r;
4d885bd3 933 const char *name;
81a2b7ce 934
4d885bd3 935 assert(c);
81a2b7ce 936
23deef88
LP
937 if (!c->user)
938 return 0;
939
4d885bd3
DH
940 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
941 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 942
23deef88 943 name = c->user;
fafff8f1 944 r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN);
4d885bd3
DH
945 if (r < 0)
946 return r;
81a2b7ce 947
4d885bd3
DH
948 *user = name;
949 return 0;
950}
951
952static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
953 int r;
954 const char *name;
955
956 assert(c);
957
958 if (!c->group)
959 return 0;
960
961 name = c->group;
fafff8f1 962 r = get_group_creds(&name, gid, 0);
4d885bd3
DH
963 if (r < 0)
964 return r;
965
966 *group = name;
967 return 0;
968}
969
cdc5d5c5
DH
970static int get_supplementary_groups(const ExecContext *c, const char *user,
971 const char *group, gid_t gid,
972 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
973 char **i;
974 int r, k = 0;
975 int ngroups_max;
976 bool keep_groups = false;
977 gid_t *groups = NULL;
978 _cleanup_free_ gid_t *l_gids = NULL;
979
980 assert(c);
981
bbeea271
DH
982 /*
983 * If user is given, then lookup GID and supplementary groups list.
984 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
985 * here and as early as possible so we keep the list of supplementary
986 * groups of the caller.
bbeea271
DH
987 */
988 if (user && gid_is_valid(gid) && gid != 0) {
989 /* First step, initialize groups from /etc/groups */
990 if (initgroups(user, gid) < 0)
991 return -errno;
992
993 keep_groups = true;
994 }
995
ac6e8be6 996 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
997 return 0;
998
366ddd25
DH
999 /*
1000 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
1001 * be positive, otherwise fail.
1002 */
1003 errno = 0;
1004 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
1005 if (ngroups_max <= 0) {
1006 if (errno > 0)
1007 return -errno;
1008 else
1009 return -EOPNOTSUPP; /* For all other values */
1010 }
1011
4d885bd3
DH
1012 l_gids = new(gid_t, ngroups_max);
1013 if (!l_gids)
1014 return -ENOMEM;
81a2b7ce 1015
4d885bd3
DH
1016 if (keep_groups) {
1017 /*
1018 * Lookup the list of groups that the user belongs to, we
1019 * avoid NSS lookups here too for gid=0.
1020 */
1021 k = ngroups_max;
1022 if (getgrouplist(user, gid, l_gids, &k) < 0)
1023 return -EINVAL;
1024 } else
1025 k = 0;
81a2b7ce 1026
4d885bd3
DH
1027 STRV_FOREACH(i, c->supplementary_groups) {
1028 const char *g;
81a2b7ce 1029
4d885bd3
DH
1030 if (k >= ngroups_max)
1031 return -E2BIG;
81a2b7ce 1032
4d885bd3 1033 g = *i;
fafff8f1 1034 r = get_group_creds(&g, l_gids+k, 0);
4d885bd3
DH
1035 if (r < 0)
1036 return r;
81a2b7ce 1037
4d885bd3
DH
1038 k++;
1039 }
81a2b7ce 1040
4d885bd3
DH
1041 /*
1042 * Sets ngids to zero to drop all supplementary groups, happens
1043 * when we are under root and SupplementaryGroups= is empty.
1044 */
1045 if (k == 0) {
1046 *ngids = 0;
1047 return 0;
1048 }
81a2b7ce 1049
4d885bd3
DH
1050 /* Otherwise get the final list of supplementary groups */
1051 groups = memdup(l_gids, sizeof(gid_t) * k);
1052 if (!groups)
1053 return -ENOMEM;
1054
1055 *supplementary_gids = groups;
1056 *ngids = k;
1057
1058 groups = NULL;
1059
1060 return 0;
1061}
1062
34cf6c43 1063static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
4d885bd3
DH
1064 int r;
1065
709dbeac
YW
1066 /* Handle SupplementaryGroups= if it is not empty */
1067 if (ngids > 0) {
4d885bd3
DH
1068 r = maybe_setgroups(ngids, supplementary_gids);
1069 if (r < 0)
97f0e76f 1070 return r;
4d885bd3 1071 }
81a2b7ce 1072
4d885bd3
DH
1073 if (gid_is_valid(gid)) {
1074 /* Then set our gids */
1075 if (setresgid(gid, gid, gid) < 0)
1076 return -errno;
81a2b7ce
LP
1077 }
1078
1079 return 0;
1080}
1081
1082static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
1083 assert(context);
1084
4d885bd3
DH
1085 if (!uid_is_valid(uid))
1086 return 0;
1087
479050b3 1088 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
1089 * capabilities while doing so. */
1090
479050b3 1091 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
1092
1093 /* First step: If we need to keep capabilities but
1094 * drop privileges we need to make sure we keep our
cbb21cca 1095 * caps, while we drop privileges. */
693ced48 1096 if (uid != 0) {
cbb21cca 1097 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1098
1099 if (prctl(PR_GET_SECUREBITS) != sb)
1100 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1101 return -errno;
1102 }
81a2b7ce
LP
1103 }
1104
479050b3 1105 /* Second step: actually set the uids */
81a2b7ce
LP
1106 if (setresuid(uid, uid, uid) < 0)
1107 return -errno;
1108
1109 /* At this point we should have all necessary capabilities but
1110 are otherwise a normal user. However, the caps might got
1111 corrupted due to the setresuid() so we need clean them up
1112 later. This is done outside of this call. */
1113
1114 return 0;
1115}
1116
349cc4a5 1117#if HAVE_PAM
5b6319dc
LP
1118
1119static int null_conv(
1120 int num_msg,
1121 const struct pam_message **msg,
1122 struct pam_response **resp,
1123 void *appdata_ptr) {
1124
1125 /* We don't support conversations */
1126
1127 return PAM_CONV_ERR;
1128}
1129
cefc33ae
LP
1130#endif
1131
5b6319dc
LP
1132static int setup_pam(
1133 const char *name,
1134 const char *user,
940c5210 1135 uid_t uid,
2d6fce8d 1136 gid_t gid,
5b6319dc 1137 const char *tty,
2065ca69 1138 char ***env,
da6053d0 1139 int fds[], size_t n_fds) {
5b6319dc 1140
349cc4a5 1141#if HAVE_PAM
cefc33ae 1142
5b6319dc
LP
1143 static const struct pam_conv conv = {
1144 .conv = null_conv,
1145 .appdata_ptr = NULL
1146 };
1147
2d7c6aa2 1148 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1149 pam_handle_t *handle = NULL;
d6e5f3ad 1150 sigset_t old_ss;
7bb70b6e 1151 int pam_code = PAM_SUCCESS, r;
84eada2f 1152 char **nv, **e = NULL;
5b6319dc
LP
1153 bool close_session = false;
1154 pid_t pam_pid = 0, parent_pid;
970edce6 1155 int flags = 0;
5b6319dc
LP
1156
1157 assert(name);
1158 assert(user);
2065ca69 1159 assert(env);
5b6319dc
LP
1160
1161 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1162 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1163 * systemd via the cgroup logic. It will then remove the PAM
1164 * session again. The parent process will exec() the actual
1165 * daemon. We do things this way to ensure that the main PID
1166 * of the daemon is the one we initially fork()ed. */
1167
7bb70b6e
LP
1168 r = barrier_create(&barrier);
1169 if (r < 0)
2d7c6aa2
DH
1170 goto fail;
1171
553d2243 1172 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1173 flags |= PAM_SILENT;
1174
f546241b
ZJS
1175 pam_code = pam_start(name, user, &conv, &handle);
1176 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1177 handle = NULL;
1178 goto fail;
1179 }
1180
3cd24c1a
LP
1181 if (!tty) {
1182 _cleanup_free_ char *q = NULL;
1183
1184 /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
1185 * out if that's the case, and read the TTY off it. */
1186
1187 if (getttyname_malloc(STDIN_FILENO, &q) >= 0)
1188 tty = strjoina("/dev/", q);
1189 }
1190
f546241b
ZJS
1191 if (tty) {
1192 pam_code = pam_set_item(handle, PAM_TTY, tty);
1193 if (pam_code != PAM_SUCCESS)
5b6319dc 1194 goto fail;
f546241b 1195 }
5b6319dc 1196
84eada2f
JW
1197 STRV_FOREACH(nv, *env) {
1198 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1199 if (pam_code != PAM_SUCCESS)
1200 goto fail;
1201 }
1202
970edce6 1203 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1204 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1205 goto fail;
1206
970edce6 1207 pam_code = pam_open_session(handle, flags);
f546241b 1208 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1209 goto fail;
1210
1211 close_session = true;
1212
f546241b
ZJS
1213 e = pam_getenvlist(handle);
1214 if (!e) {
5b6319dc
LP
1215 pam_code = PAM_BUF_ERR;
1216 goto fail;
1217 }
1218
1219 /* Block SIGTERM, so that we know that it won't get lost in
1220 * the child */
ce30c8dc 1221
72c0a2c2 1222 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1223
df0ff127 1224 parent_pid = getpid_cached();
5b6319dc 1225
4c253ed1
LP
1226 r = safe_fork("(sd-pam)", 0, &pam_pid);
1227 if (r < 0)
5b6319dc 1228 goto fail;
4c253ed1 1229 if (r == 0) {
7bb70b6e 1230 int sig, ret = EXIT_PAM;
5b6319dc
LP
1231
1232 /* The child's job is to reset the PAM session on
1233 * termination */
2d7c6aa2 1234 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc 1235
4c253ed1
LP
1236 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1237 * are open here that have been opened by PAM. */
1238 (void) close_many(fds, n_fds);
5b6319dc 1239
940c5210
AK
1240 /* Drop privileges - we don't need any to pam_close_session
1241 * and this will make PR_SET_PDEATHSIG work in most cases.
1242 * If this fails, ignore the error - but expect sd-pam threads
1243 * to fail to exit normally */
2d6fce8d 1244
97f0e76f
LP
1245 r = maybe_setgroups(0, NULL);
1246 if (r < 0)
1247 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1248 if (setresgid(gid, gid, gid) < 0)
1249 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1250 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1251 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1252
ce30c8dc
LP
1253 (void) ignore_signals(SIGPIPE, -1);
1254
940c5210
AK
1255 /* Wait until our parent died. This will only work if
1256 * the above setresuid() succeeds, otherwise the kernel
1257 * will not allow unprivileged parents kill their privileged
1258 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1259 * to do the rest for us. */
1260 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1261 goto child_finish;
1262
2d7c6aa2
DH
1263 /* Tell the parent that our setup is done. This is especially
1264 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1265 * setup might race against our setresuid(2) call.
1266 *
1267 * If the parent aborted, we'll detect this below, hence ignore
1268 * return failure here. */
1269 (void) barrier_place(&barrier);
2d7c6aa2 1270
643f4706 1271 /* Check if our parent process might already have died? */
5b6319dc 1272 if (getppid() == parent_pid) {
d6e5f3ad
DM
1273 sigset_t ss;
1274
1275 assert_se(sigemptyset(&ss) >= 0);
1276 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1277
3dead8d9
LP
1278 for (;;) {
1279 if (sigwait(&ss, &sig) < 0) {
1280 if (errno == EINTR)
1281 continue;
1282
1283 goto child_finish;
1284 }
5b6319dc 1285
3dead8d9
LP
1286 assert(sig == SIGTERM);
1287 break;
1288 }
5b6319dc
LP
1289 }
1290
3dead8d9 1291 /* If our parent died we'll end the session */
f546241b 1292 if (getppid() != parent_pid) {
970edce6 1293 pam_code = pam_close_session(handle, flags);
f546241b 1294 if (pam_code != PAM_SUCCESS)
5b6319dc 1295 goto child_finish;
f546241b 1296 }
5b6319dc 1297
7bb70b6e 1298 ret = 0;
5b6319dc
LP
1299
1300 child_finish:
970edce6 1301 pam_end(handle, pam_code | flags);
7bb70b6e 1302 _exit(ret);
5b6319dc
LP
1303 }
1304
2d7c6aa2
DH
1305 barrier_set_role(&barrier, BARRIER_PARENT);
1306
5b6319dc
LP
1307 /* If the child was forked off successfully it will do all the
1308 * cleanups, so forget about the handle here. */
1309 handle = NULL;
1310
3b8bddde 1311 /* Unblock SIGTERM again in the parent */
72c0a2c2 1312 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1313
1314 /* We close the log explicitly here, since the PAM modules
1315 * might have opened it, but we don't want this fd around. */
1316 closelog();
1317
2d7c6aa2
DH
1318 /* Synchronously wait for the child to initialize. We don't care for
1319 * errors as we cannot recover. However, warn loudly if it happens. */
1320 if (!barrier_place_and_sync(&barrier))
1321 log_error("PAM initialization failed");
1322
130d3d22 1323 return strv_free_and_replace(*env, e);
5b6319dc
LP
1324
1325fail:
970edce6
ZJS
1326 if (pam_code != PAM_SUCCESS) {
1327 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1328 r = -EPERM; /* PAM errors do not map to errno */
1329 } else
1330 log_error_errno(r, "PAM failed: %m");
9ba35398 1331
5b6319dc
LP
1332 if (handle) {
1333 if (close_session)
970edce6 1334 pam_code = pam_close_session(handle, flags);
5b6319dc 1335
970edce6 1336 pam_end(handle, pam_code | flags);
5b6319dc
LP
1337 }
1338
1339 strv_free(e);
5b6319dc
LP
1340 closelog();
1341
7bb70b6e 1342 return r;
cefc33ae
LP
1343#else
1344 return 0;
5b6319dc 1345#endif
cefc33ae 1346}
5b6319dc 1347
5d6b1584
LP
1348static void rename_process_from_path(const char *path) {
1349 char process_name[11];
1350 const char *p;
1351 size_t l;
1352
1353 /* This resulting string must fit in 10 chars (i.e. the length
1354 * of "/sbin/init") to look pretty in /bin/ps */
1355
2b6bf07d 1356 p = basename(path);
5d6b1584
LP
1357 if (isempty(p)) {
1358 rename_process("(...)");
1359 return;
1360 }
1361
1362 l = strlen(p);
1363 if (l > 8) {
1364 /* The end of the process name is usually more
1365 * interesting, since the first bit might just be
1366 * "systemd-" */
1367 p = p + l - 8;
1368 l = 8;
1369 }
1370
1371 process_name[0] = '(';
1372 memcpy(process_name+1, p, l);
1373 process_name[1+l] = ')';
1374 process_name[1+l+1] = 0;
1375
1376 rename_process(process_name);
1377}
1378
469830d1
LP
1379static bool context_has_address_families(const ExecContext *c) {
1380 assert(c);
1381
1382 return c->address_families_whitelist ||
1383 !set_isempty(c->address_families);
1384}
1385
1386static bool context_has_syscall_filters(const ExecContext *c) {
1387 assert(c);
1388
1389 return c->syscall_whitelist ||
8cfa775f 1390 !hashmap_isempty(c->syscall_filter);
469830d1
LP
1391}
1392
1393static bool context_has_no_new_privileges(const ExecContext *c) {
1394 assert(c);
1395
1396 if (c->no_new_privileges)
1397 return true;
1398
1399 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1400 return false;
1401
1402 /* We need NNP if we have any form of seccomp and are unprivileged */
1403 return context_has_address_families(c) ||
1404 c->memory_deny_write_execute ||
1405 c->restrict_realtime ||
f69567cb 1406 c->restrict_suid_sgid ||
469830d1
LP
1407 exec_context_restrict_namespaces_set(c) ||
1408 c->protect_kernel_tunables ||
1409 c->protect_kernel_modules ||
1410 c->private_devices ||
1411 context_has_syscall_filters(c) ||
78e864e5 1412 !set_isempty(c->syscall_archs) ||
aecd5ac6
TM
1413 c->lock_personality ||
1414 c->protect_hostname;
469830d1
LP
1415}
1416
349cc4a5 1417#if HAVE_SECCOMP
17df7223 1418
83f12b27 1419static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1420
1421 if (is_seccomp_available())
1422 return false;
1423
f673b62d 1424 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1425 return true;
83f12b27
FS
1426}
1427
165a31c0 1428static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1429 uint32_t negative_action, default_action, action;
165a31c0 1430 int r;
8351ceae 1431
469830d1 1432 assert(u);
c0467cf3 1433 assert(c);
8351ceae 1434
469830d1 1435 if (!context_has_syscall_filters(c))
83f12b27
FS
1436 return 0;
1437
469830d1
LP
1438 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1439 return 0;
e9642be2 1440
ccc16c78 1441 negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1442
469830d1
LP
1443 if (c->syscall_whitelist) {
1444 default_action = negative_action;
1445 action = SCMP_ACT_ALLOW;
7c66bae2 1446 } else {
469830d1
LP
1447 default_action = SCMP_ACT_ALLOW;
1448 action = negative_action;
57183d11 1449 }
8351ceae 1450
165a31c0
LP
1451 if (needs_ambient_hack) {
1452 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1453 if (r < 0)
1454 return r;
1455 }
1456
b54f36c6 1457 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
4298d0b5
LP
1458}
1459
469830d1
LP
1460static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1461 assert(u);
4298d0b5
LP
1462 assert(c);
1463
469830d1 1464 if (set_isempty(c->syscall_archs))
83f12b27
FS
1465 return 0;
1466
469830d1
LP
1467 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1468 return 0;
4298d0b5 1469
469830d1
LP
1470 return seccomp_restrict_archs(c->syscall_archs);
1471}
4298d0b5 1472
469830d1
LP
1473static int apply_address_families(const Unit* u, const ExecContext *c) {
1474 assert(u);
1475 assert(c);
4298d0b5 1476
469830d1
LP
1477 if (!context_has_address_families(c))
1478 return 0;
4298d0b5 1479
469830d1
LP
1480 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1481 return 0;
4298d0b5 1482
469830d1 1483 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1484}
4298d0b5 1485
83f12b27 1486static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1487 assert(u);
f3e43635
TM
1488 assert(c);
1489
469830d1 1490 if (!c->memory_deny_write_execute)
83f12b27
FS
1491 return 0;
1492
469830d1
LP
1493 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1494 return 0;
f3e43635 1495
469830d1 1496 return seccomp_memory_deny_write_execute();
f3e43635
TM
1497}
1498
83f12b27 1499static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1500 assert(u);
f4170c67
LP
1501 assert(c);
1502
469830d1 1503 if (!c->restrict_realtime)
83f12b27
FS
1504 return 0;
1505
469830d1
LP
1506 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1507 return 0;
f4170c67 1508
469830d1 1509 return seccomp_restrict_realtime();
f4170c67
LP
1510}
1511
f69567cb
LP
1512static int apply_restrict_suid_sgid(const Unit* u, const ExecContext *c) {
1513 assert(u);
1514 assert(c);
1515
1516 if (!c->restrict_suid_sgid)
1517 return 0;
1518
1519 if (skip_seccomp_unavailable(u, "RestrictSUIDSGID="))
1520 return 0;
1521
1522 return seccomp_restrict_suid_sgid();
1523}
1524
59e856c7 1525static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1526 assert(u);
59eeb84b
LP
1527 assert(c);
1528
1529 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1530 * let's protect even those systems where this is left on in the kernel. */
1531
469830d1 1532 if (!c->protect_kernel_tunables)
59eeb84b
LP
1533 return 0;
1534
469830d1
LP
1535 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1536 return 0;
59eeb84b 1537
469830d1 1538 return seccomp_protect_sysctl();
59eeb84b
LP
1539}
1540
59e856c7 1541static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1542 assert(u);
502d704e
DH
1543 assert(c);
1544
25a8d8a0 1545 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1546
469830d1
LP
1547 if (!c->protect_kernel_modules)
1548 return 0;
1549
502d704e
DH
1550 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1551 return 0;
1552
b54f36c6 1553 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false);
502d704e
DH
1554}
1555
59e856c7 1556static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1557 assert(u);
ba128bb8
LP
1558 assert(c);
1559
8f81a5f6 1560 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1561
469830d1
LP
1562 if (!c->private_devices)
1563 return 0;
1564
ba128bb8
LP
1565 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1566 return 0;
1567
b54f36c6 1568 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false);
ba128bb8
LP
1569}
1570
34cf6c43 1571static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
469830d1 1572 assert(u);
add00535
LP
1573 assert(c);
1574
1575 if (!exec_context_restrict_namespaces_set(c))
1576 return 0;
1577
1578 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1579 return 0;
1580
1581 return seccomp_restrict_namespaces(c->restrict_namespaces);
1582}
1583
78e864e5 1584static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1585 unsigned long personality;
1586 int r;
78e864e5
TM
1587
1588 assert(u);
1589 assert(c);
1590
1591 if (!c->lock_personality)
1592 return 0;
1593
1594 if (skip_seccomp_unavailable(u, "LockPersonality="))
1595 return 0;
1596
e8132d63
LP
1597 personality = c->personality;
1598
1599 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1600 if (personality == PERSONALITY_INVALID) {
1601
1602 r = opinionated_personality(&personality);
1603 if (r < 0)
1604 return r;
1605 }
78e864e5
TM
1606
1607 return seccomp_lock_personality(personality);
1608}
1609
c0467cf3 1610#endif
8351ceae 1611
3042bbeb 1612static void do_idle_pipe_dance(int idle_pipe[static 4]) {
31a7eb86
ZJS
1613 assert(idle_pipe);
1614
54eb2300
LP
1615 idle_pipe[1] = safe_close(idle_pipe[1]);
1616 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1617
1618 if (idle_pipe[0] >= 0) {
1619 int r;
1620
1621 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1622
1623 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1624 ssize_t n;
1625
31a7eb86 1626 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1627 n = write(idle_pipe[3], "x", 1);
1628 if (n > 0)
cd972d69
ZJS
1629 /* Wait for systemd to react to the signal above. */
1630 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1631 }
1632
54eb2300 1633 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1634
1635 }
1636
54eb2300 1637 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1638}
1639
fb2042dd
YW
1640static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
1641
7cae38c4 1642static int build_environment(
34cf6c43 1643 const Unit *u,
9fa95f85 1644 const ExecContext *c,
1e22b5cd 1645 const ExecParameters *p,
da6053d0 1646 size_t n_fds,
7cae38c4
LP
1647 const char *home,
1648 const char *username,
1649 const char *shell,
7bce046b
LP
1650 dev_t journal_stream_dev,
1651 ino_t journal_stream_ino,
7cae38c4
LP
1652 char ***ret) {
1653
1654 _cleanup_strv_free_ char **our_env = NULL;
fb2042dd 1655 ExecDirectoryType t;
da6053d0 1656 size_t n_env = 0;
7cae38c4
LP
1657 char *x;
1658
4b58153d 1659 assert(u);
7cae38c4 1660 assert(c);
7c1cb6f1 1661 assert(p);
7cae38c4
LP
1662 assert(ret);
1663
fb2042dd 1664 our_env = new0(char*, 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4
LP
1665 if (!our_env)
1666 return -ENOMEM;
1667
1668 if (n_fds > 0) {
8dd4c05b
LP
1669 _cleanup_free_ char *joined = NULL;
1670
df0ff127 1671 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1672 return -ENOMEM;
1673 our_env[n_env++] = x;
1674
da6053d0 1675 if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
7cae38c4
LP
1676 return -ENOMEM;
1677 our_env[n_env++] = x;
8dd4c05b 1678
1e22b5cd 1679 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1680 if (!joined)
1681 return -ENOMEM;
1682
605405c6 1683 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1684 if (!x)
1685 return -ENOMEM;
1686 our_env[n_env++] = x;
7cae38c4
LP
1687 }
1688
b08af3b1 1689 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1690 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1691 return -ENOMEM;
1692 our_env[n_env++] = x;
1693
1e22b5cd 1694 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1695 return -ENOMEM;
1696 our_env[n_env++] = x;
1697 }
1698
fd63e712
LP
1699 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1700 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1701 * check the database directly. */
ac647978 1702 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1703 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1704 if (!x)
1705 return -ENOMEM;
1706 our_env[n_env++] = x;
1707 }
1708
7cae38c4
LP
1709 if (home) {
1710 x = strappend("HOME=", home);
1711 if (!x)
1712 return -ENOMEM;
7bbead1d
LP
1713
1714 path_simplify(x + 5, true);
7cae38c4
LP
1715 our_env[n_env++] = x;
1716 }
1717
1718 if (username) {
1719 x = strappend("LOGNAME=", username);
1720 if (!x)
1721 return -ENOMEM;
1722 our_env[n_env++] = x;
1723
1724 x = strappend("USER=", username);
1725 if (!x)
1726 return -ENOMEM;
1727 our_env[n_env++] = x;
1728 }
1729
1730 if (shell) {
1731 x = strappend("SHELL=", shell);
1732 if (!x)
1733 return -ENOMEM;
7bbead1d
LP
1734
1735 path_simplify(x + 6, true);
7cae38c4
LP
1736 our_env[n_env++] = x;
1737 }
1738
4b58153d
LP
1739 if (!sd_id128_is_null(u->invocation_id)) {
1740 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1741 return -ENOMEM;
1742
1743 our_env[n_env++] = x;
1744 }
1745
6af760f3
LP
1746 if (exec_context_needs_term(c)) {
1747 const char *tty_path, *term = NULL;
1748
1749 tty_path = exec_context_tty_path(c);
1750
1751 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1752 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1753 * passes to PID 1 ends up all the way in the console login shown. */
1754
1755 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1756 term = getenv("TERM");
1757 if (!term)
1758 term = default_term_for_tty(tty_path);
7cae38c4 1759
6af760f3 1760 x = strappend("TERM=", term);
7cae38c4
LP
1761 if (!x)
1762 return -ENOMEM;
1763 our_env[n_env++] = x;
1764 }
1765
7bce046b
LP
1766 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1767 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1768 return -ENOMEM;
1769
1770 our_env[n_env++] = x;
1771 }
1772
fb2042dd
YW
1773 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1774 _cleanup_free_ char *pre = NULL, *joined = NULL;
1775 const char *n;
1776
1777 if (!p->prefix[t])
1778 continue;
1779
1780 if (strv_isempty(c->directories[t].paths))
1781 continue;
1782
1783 n = exec_directory_env_name_to_string(t);
1784 if (!n)
1785 continue;
1786
1787 pre = strjoin(p->prefix[t], "/");
1788 if (!pre)
1789 return -ENOMEM;
1790
1791 joined = strv_join_prefix(c->directories[t].paths, ":", pre);
1792 if (!joined)
1793 return -ENOMEM;
1794
1795 x = strjoin(n, "=", joined);
1796 if (!x)
1797 return -ENOMEM;
1798
1799 our_env[n_env++] = x;
1800 }
1801
7cae38c4 1802 our_env[n_env++] = NULL;
fb2042dd 1803 assert(n_env <= 14 + _EXEC_DIRECTORY_TYPE_MAX);
7cae38c4 1804
ae2a15bc 1805 *ret = TAKE_PTR(our_env);
7cae38c4
LP
1806
1807 return 0;
1808}
1809
b4c14404
FB
1810static int build_pass_environment(const ExecContext *c, char ***ret) {
1811 _cleanup_strv_free_ char **pass_env = NULL;
1812 size_t n_env = 0, n_bufsize = 0;
1813 char **i;
1814
1815 STRV_FOREACH(i, c->pass_environment) {
1816 _cleanup_free_ char *x = NULL;
1817 char *v;
1818
1819 v = getenv(*i);
1820 if (!v)
1821 continue;
605405c6 1822 x = strjoin(*i, "=", v);
b4c14404
FB
1823 if (!x)
1824 return -ENOMEM;
00819cc1 1825
b4c14404
FB
1826 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1827 return -ENOMEM;
00819cc1 1828
1cc6c93a 1829 pass_env[n_env++] = TAKE_PTR(x);
b4c14404 1830 pass_env[n_env] = NULL;
b4c14404
FB
1831 }
1832
ae2a15bc 1833 *ret = TAKE_PTR(pass_env);
b4c14404
FB
1834
1835 return 0;
1836}
1837
8b44a3d2
LP
1838static bool exec_needs_mount_namespace(
1839 const ExecContext *context,
1840 const ExecParameters *params,
4657abb5 1841 const ExecRuntime *runtime) {
8b44a3d2
LP
1842
1843 assert(context);
1844 assert(params);
1845
915e6d16
LP
1846 if (context->root_image)
1847 return true;
1848
2a624c36
AP
1849 if (!strv_isempty(context->read_write_paths) ||
1850 !strv_isempty(context->read_only_paths) ||
1851 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1852 return true;
1853
42b1d8e0 1854 if (context->n_bind_mounts > 0)
d2d6c096
LP
1855 return true;
1856
2abd4e38
YW
1857 if (context->n_temporary_filesystems > 0)
1858 return true;
1859
37ed15d7 1860 if (!IN_SET(context->mount_flags, 0, MS_SHARED))
8b44a3d2
LP
1861 return true;
1862
1863 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1864 return true;
1865
8b44a3d2 1866 if (context->private_devices ||
228af36f 1867 context->private_mounts ||
8b44a3d2 1868 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1869 context->protect_home != PROTECT_HOME_NO ||
1870 context->protect_kernel_tunables ||
c575770b 1871 context->protect_kernel_modules ||
59eeb84b 1872 context->protect_control_groups)
8b44a3d2
LP
1873 return true;
1874
37c56f89
YW
1875 if (context->root_directory) {
1876 ExecDirectoryType t;
1877
1878 if (context->mount_apivfs)
1879 return true;
1880
1881 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1882 if (!params->prefix[t])
1883 continue;
1884
1885 if (!strv_isempty(context->directories[t].paths))
1886 return true;
1887 }
1888 }
5d997827 1889
42b1d8e0 1890 if (context->dynamic_user &&
b43ee82f 1891 (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
42b1d8e0
YW
1892 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1893 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1894 return true;
1895
8b44a3d2
LP
1896 return false;
1897}
1898
d251207d
LP
1899static int setup_private_users(uid_t uid, gid_t gid) {
1900 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1901 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1902 _cleanup_close_ int unshare_ready_fd = -1;
1903 _cleanup_(sigkill_waitp) pid_t pid = 0;
1904 uint64_t c = 1;
d251207d
LP
1905 ssize_t n;
1906 int r;
1907
1908 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1909 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1910 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1911 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1912 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1913 * continues execution normally. */
1914
587ab01b
ZJS
1915 if (uid != 0 && uid_is_valid(uid)) {
1916 r = asprintf(&uid_map,
1917 "0 0 1\n" /* Map root → root */
1918 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1919 uid, uid);
1920 if (r < 0)
1921 return -ENOMEM;
1922 } else {
e0f3720e 1923 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1924 if (!uid_map)
1925 return -ENOMEM;
1926 }
d251207d 1927
587ab01b
ZJS
1928 if (gid != 0 && gid_is_valid(gid)) {
1929 r = asprintf(&gid_map,
1930 "0 0 1\n" /* Map root → root */
1931 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1932 gid, gid);
1933 if (r < 0)
1934 return -ENOMEM;
1935 } else {
d251207d 1936 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1937 if (!gid_map)
1938 return -ENOMEM;
1939 }
d251207d
LP
1940
1941 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1942 * namespace. */
1943 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1944 if (unshare_ready_fd < 0)
1945 return -errno;
1946
1947 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1948 * failed. */
1949 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1950 return -errno;
1951
4c253ed1
LP
1952 r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
1953 if (r < 0)
1954 return r;
1955 if (r == 0) {
d251207d
LP
1956 _cleanup_close_ int fd = -1;
1957 const char *a;
1958 pid_t ppid;
1959
1960 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1961 * here, after the parent opened its own user namespace. */
1962
1963 ppid = getppid();
1964 errno_pipe[0] = safe_close(errno_pipe[0]);
1965
1966 /* Wait until the parent unshared the user namespace */
1967 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1968 r = -errno;
1969 goto child_fail;
1970 }
1971
1972 /* Disable the setgroups() system call in the child user namespace, for good. */
1973 a = procfs_file_alloca(ppid, "setgroups");
1974 fd = open(a, O_WRONLY|O_CLOEXEC);
1975 if (fd < 0) {
1976 if (errno != ENOENT) {
1977 r = -errno;
1978 goto child_fail;
1979 }
1980
1981 /* If the file is missing the kernel is too old, let's continue anyway. */
1982 } else {
1983 if (write(fd, "deny\n", 5) < 0) {
1984 r = -errno;
1985 goto child_fail;
1986 }
1987
1988 fd = safe_close(fd);
1989 }
1990
1991 /* First write the GID map */
1992 a = procfs_file_alloca(ppid, "gid_map");
1993 fd = open(a, O_WRONLY|O_CLOEXEC);
1994 if (fd < 0) {
1995 r = -errno;
1996 goto child_fail;
1997 }
1998 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1999 r = -errno;
2000 goto child_fail;
2001 }
2002 fd = safe_close(fd);
2003
2004 /* The write the UID map */
2005 a = procfs_file_alloca(ppid, "uid_map");
2006 fd = open(a, O_WRONLY|O_CLOEXEC);
2007 if (fd < 0) {
2008 r = -errno;
2009 goto child_fail;
2010 }
2011 if (write(fd, uid_map, strlen(uid_map)) < 0) {
2012 r = -errno;
2013 goto child_fail;
2014 }
2015
2016 _exit(EXIT_SUCCESS);
2017
2018 child_fail:
2019 (void) write(errno_pipe[1], &r, sizeof(r));
2020 _exit(EXIT_FAILURE);
2021 }
2022
2023 errno_pipe[1] = safe_close(errno_pipe[1]);
2024
2025 if (unshare(CLONE_NEWUSER) < 0)
2026 return -errno;
2027
2028 /* Let the child know that the namespace is ready now */
2029 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
2030 return -errno;
2031
2032 /* Try to read an error code from the child */
2033 n = read(errno_pipe[0], &r, sizeof(r));
2034 if (n < 0)
2035 return -errno;
2036 if (n == sizeof(r)) { /* an error code was sent to us */
2037 if (r < 0)
2038 return r;
2039 return -EIO;
2040 }
2041 if (n != 0) /* on success we should have read 0 bytes */
2042 return -EIO;
2043
2e87a1fd
LP
2044 r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
2045 pid = 0;
d251207d
LP
2046 if (r < 0)
2047 return r;
2e87a1fd 2048 if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
d251207d
LP
2049 return -EIO;
2050
2051 return 0;
2052}
2053
3536f49e 2054static int setup_exec_directory(
07689d5d
LP
2055 const ExecContext *context,
2056 const ExecParameters *params,
2057 uid_t uid,
3536f49e 2058 gid_t gid,
3536f49e
YW
2059 ExecDirectoryType type,
2060 int *exit_status) {
07689d5d 2061
72fd1768 2062 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
2063 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
2064 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
2065 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
2066 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
2067 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
2068 };
07689d5d
LP
2069 char **rt;
2070 int r;
2071
2072 assert(context);
2073 assert(params);
72fd1768 2074 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 2075 assert(exit_status);
07689d5d 2076
3536f49e
YW
2077 if (!params->prefix[type])
2078 return 0;
2079
8679efde 2080 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
2081 if (!uid_is_valid(uid))
2082 uid = 0;
2083 if (!gid_is_valid(gid))
2084 gid = 0;
2085 }
2086
2087 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d 2088 _cleanup_free_ char *p = NULL, *pp = NULL;
07689d5d 2089
edbfeb12 2090 p = path_join(params->prefix[type], *rt);
3536f49e
YW
2091 if (!p) {
2092 r = -ENOMEM;
2093 goto fail;
2094 }
07689d5d 2095
23a7448e
YW
2096 r = mkdir_parents_label(p, 0755);
2097 if (r < 0)
3536f49e 2098 goto fail;
23a7448e 2099
8092a48c 2100 if (context->dynamic_user &&
40cd2ecc
LP
2101 (!IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) ||
2102 (type == EXEC_DIRECTORY_RUNTIME && context->runtime_directory_preserve_mode != EXEC_PRESERVE_NO))) {
6c9c51e5 2103 _cleanup_free_ char *private_root = NULL;
6c47cd7d
LP
2104
2105 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
2106 * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
2107 * whose UID is later on reused. To lock this down we use the same trick used by container
2108 * managers to prohibit host users to get access to files of the same UID in containers: we
2109 * place everything inside a directory that has an access mode of 0700 and is owned root:root,
2110 * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
2111 * to make this directory permeable for the service itself.
2112 *
2113 * Specifically: for a service which wants a special directory "foo/" we first create a
2114 * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
2115 * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
2116 * privileged host users can access "foo/" as usual, but unprivileged host users can't look
2117 * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
2118 * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
2119 * disabling the access boundary for the service and making sure it only gets access to the
2120 * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
2121 *
2122 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
8092a48c
YW
2123 * owned by the service itself.
2124 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
2125 * files or sockets with other services. */
6c47cd7d 2126
edbfeb12 2127 private_root = path_join(params->prefix[type], "private");
6c47cd7d
LP
2128 if (!private_root) {
2129 r = -ENOMEM;
2130 goto fail;
2131 }
2132
2133 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
37c1d5e9 2134 r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
6c47cd7d
LP
2135 if (r < 0)
2136 goto fail;
2137
edbfeb12 2138 pp = path_join(private_root, *rt);
6c47cd7d
LP
2139 if (!pp) {
2140 r = -ENOMEM;
2141 goto fail;
2142 }
2143
2144 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2145 r = mkdir_parents_label(pp, 0755);
2146 if (r < 0)
2147 goto fail;
2148
949befd3
LP
2149 if (is_dir(p, false) > 0 &&
2150 (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
2151
2152 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2153 * it over. Most likely the service has been upgraded from one that didn't use
2154 * DynamicUser=1, to one that does. */
2155
2156 if (rename(p, pp) < 0) {
2157 r = -errno;
2158 goto fail;
2159 }
2160 } else {
2161 /* Otherwise, create the actual directory for the service */
2162
2163 r = mkdir_label(pp, context->directories[type].mode);
2164 if (r < 0 && r != -EEXIST)
2165 goto fail;
2166 }
6c47cd7d 2167
6c47cd7d 2168 /* And link it up from the original place */
6c9c51e5 2169 r = symlink_idempotent(pp, p, true);
6c47cd7d
LP
2170 if (r < 0)
2171 goto fail;
2172
6c47cd7d
LP
2173 } else {
2174 r = mkdir_label(p, context->directories[type].mode);
d484580c 2175 if (r < 0) {
d484580c
LP
2176 if (r != -EEXIST)
2177 goto fail;
2178
206e9864
LP
2179 if (type == EXEC_DIRECTORY_CONFIGURATION) {
2180 struct stat st;
2181
2182 /* Don't change the owner/access mode of the configuration directory,
2183 * as in the common case it is not written to by a service, and shall
2184 * not be writable. */
2185
2186 if (stat(p, &st) < 0) {
2187 r = -errno;
2188 goto fail;
2189 }
2190
2191 /* Still complain if the access mode doesn't match */
2192 if (((st.st_mode ^ context->directories[type].mode) & 07777) != 0)
2193 log_warning("%s \'%s\' already exists but the mode is different. "
2194 "(File system: %o %sMode: %o)",
2195 exec_directory_type_to_string(type), *rt,
2196 st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
2197
6cff72eb 2198 continue;
206e9864 2199 }
6cff72eb 2200 }
a1164ae3 2201 }
07689d5d 2202
206e9864 2203 /* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
5238e957 2204 * specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
206e9864
LP
2205 * current UID/GID ownership.) */
2206 r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
2207 if (r < 0)
2208 goto fail;
c71b2eb7 2209
607b358e
LP
2210 /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
2211 * drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
2212 * assignments to exist.*/
2213 r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777);
07689d5d 2214 if (r < 0)
3536f49e 2215 goto fail;
07689d5d
LP
2216 }
2217
2218 return 0;
3536f49e
YW
2219
2220fail:
2221 *exit_status = exit_status_table[type];
3536f49e 2222 return r;
07689d5d
LP
2223}
2224
92b423b9 2225#if ENABLE_SMACK
cefc33ae
LP
2226static int setup_smack(
2227 const ExecContext *context,
2228 const ExecCommand *command) {
2229
cefc33ae
LP
2230 int r;
2231
2232 assert(context);
2233 assert(command);
2234
cefc33ae
LP
2235 if (context->smack_process_label) {
2236 r = mac_smack_apply_pid(0, context->smack_process_label);
2237 if (r < 0)
2238 return r;
2239 }
2240#ifdef SMACK_DEFAULT_PROCESS_LABEL
2241 else {
2242 _cleanup_free_ char *exec_label = NULL;
2243
2244 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2245 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2246 return r;
2247
2248 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2249 if (r < 0)
2250 return r;
2251 }
cefc33ae
LP
2252#endif
2253
2254 return 0;
2255}
92b423b9 2256#endif
cefc33ae 2257
6c47cd7d
LP
2258static int compile_bind_mounts(
2259 const ExecContext *context,
2260 const ExecParameters *params,
2261 BindMount **ret_bind_mounts,
da6053d0 2262 size_t *ret_n_bind_mounts,
6c47cd7d
LP
2263 char ***ret_empty_directories) {
2264
2265 _cleanup_strv_free_ char **empty_directories = NULL;
2266 BindMount *bind_mounts;
da6053d0 2267 size_t n, h = 0, i;
6c47cd7d
LP
2268 ExecDirectoryType t;
2269 int r;
2270
2271 assert(context);
2272 assert(params);
2273 assert(ret_bind_mounts);
2274 assert(ret_n_bind_mounts);
2275 assert(ret_empty_directories);
2276
2277 n = context->n_bind_mounts;
2278 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2279 if (!params->prefix[t])
2280 continue;
2281
2282 n += strv_length(context->directories[t].paths);
2283 }
2284
2285 if (n <= 0) {
2286 *ret_bind_mounts = NULL;
2287 *ret_n_bind_mounts = 0;
2288 *ret_empty_directories = NULL;
2289 return 0;
2290 }
2291
2292 bind_mounts = new(BindMount, n);
2293 if (!bind_mounts)
2294 return -ENOMEM;
2295
a8cabc61 2296 for (i = 0; i < context->n_bind_mounts; i++) {
6c47cd7d
LP
2297 BindMount *item = context->bind_mounts + i;
2298 char *s, *d;
2299
2300 s = strdup(item->source);
2301 if (!s) {
2302 r = -ENOMEM;
2303 goto finish;
2304 }
2305
2306 d = strdup(item->destination);
2307 if (!d) {
2308 free(s);
2309 r = -ENOMEM;
2310 goto finish;
2311 }
2312
2313 bind_mounts[h++] = (BindMount) {
2314 .source = s,
2315 .destination = d,
2316 .read_only = item->read_only,
2317 .recursive = item->recursive,
2318 .ignore_enoent = item->ignore_enoent,
2319 };
2320 }
2321
2322 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2323 char **suffix;
2324
2325 if (!params->prefix[t])
2326 continue;
2327
2328 if (strv_isempty(context->directories[t].paths))
2329 continue;
2330
8092a48c 2331 if (context->dynamic_user &&
5609f688
YW
2332 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2333 !(context->root_directory || context->root_image)) {
6c47cd7d
LP
2334 char *private_root;
2335
2336 /* So this is for a dynamic user, and we need to make sure the process can access its own
2337 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2338 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2339
2340 private_root = strjoin(params->prefix[t], "/private");
2341 if (!private_root) {
2342 r = -ENOMEM;
2343 goto finish;
2344 }
2345
2346 r = strv_consume(&empty_directories, private_root);
a635a7ae 2347 if (r < 0)
6c47cd7d 2348 goto finish;
6c47cd7d
LP
2349 }
2350
2351 STRV_FOREACH(suffix, context->directories[t].paths) {
2352 char *s, *d;
2353
8092a48c
YW
2354 if (context->dynamic_user &&
2355 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
6c47cd7d
LP
2356 s = strjoin(params->prefix[t], "/private/", *suffix);
2357 else
2358 s = strjoin(params->prefix[t], "/", *suffix);
2359 if (!s) {
2360 r = -ENOMEM;
2361 goto finish;
2362 }
2363
5609f688
YW
2364 if (context->dynamic_user &&
2365 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2366 (context->root_directory || context->root_image))
2367 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2368 * directory is not created on the root directory. So, let's bind-mount the directory
2369 * on the 'non-private' place. */
2370 d = strjoin(params->prefix[t], "/", *suffix);
2371 else
2372 d = strdup(s);
6c47cd7d
LP
2373 if (!d) {
2374 free(s);
2375 r = -ENOMEM;
2376 goto finish;
2377 }
2378
2379 bind_mounts[h++] = (BindMount) {
2380 .source = s,
2381 .destination = d,
2382 .read_only = false,
9ce4e4b0 2383 .nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
6c47cd7d
LP
2384 .recursive = true,
2385 .ignore_enoent = false,
2386 };
2387 }
2388 }
2389
2390 assert(h == n);
2391
2392 *ret_bind_mounts = bind_mounts;
2393 *ret_n_bind_mounts = n;
ae2a15bc 2394 *ret_empty_directories = TAKE_PTR(empty_directories);
6c47cd7d
LP
2395
2396 return (int) n;
2397
2398finish:
2399 bind_mount_free_many(bind_mounts, h);
2400 return r;
2401}
2402
6818c54c 2403static int apply_mount_namespace(
34cf6c43
YW
2404 const Unit *u,
2405 const ExecCommand *command,
6818c54c
LP
2406 const ExecContext *context,
2407 const ExecParameters *params,
7cc5ef5f
ZJS
2408 const ExecRuntime *runtime,
2409 char **error_path) {
6818c54c 2410
7bcef4ef 2411 _cleanup_strv_free_ char **empty_directories = NULL;
93c6bb51 2412 char *tmp = NULL, *var = NULL;
915e6d16 2413 const char *root_dir = NULL, *root_image = NULL;
228af36f 2414 NamespaceInfo ns_info;
165a31c0 2415 bool needs_sandboxing;
6c47cd7d 2416 BindMount *bind_mounts = NULL;
da6053d0 2417 size_t n_bind_mounts = 0;
6818c54c 2418 int r;
93c6bb51 2419
2b3c1b9e
DH
2420 assert(context);
2421
93c6bb51
DH
2422 /* The runtime struct only contains the parent of the private /tmp,
2423 * which is non-accessible to world users. Inside of it there's a /tmp
2424 * that is sticky, and that's the one we want to use here. */
2425
2426 if (context->private_tmp && runtime) {
2427 if (runtime->tmp_dir)
2428 tmp = strjoina(runtime->tmp_dir, "/tmp");
2429 if (runtime->var_tmp_dir)
2430 var = strjoina(runtime->var_tmp_dir, "/tmp");
2431 }
2432
915e6d16
LP
2433 if (params->flags & EXEC_APPLY_CHROOT) {
2434 root_image = context->root_image;
2435
2436 if (!root_image)
2437 root_dir = context->root_directory;
2438 }
93c6bb51 2439
6c47cd7d
LP
2440 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2441 if (r < 0)
2442 return r;
2443
165a31c0 2444 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
b5a33299
YW
2445 if (needs_sandboxing)
2446 ns_info = (NamespaceInfo) {
2447 .ignore_protect_paths = false,
2448 .private_dev = context->private_devices,
2449 .protect_control_groups = context->protect_control_groups,
2450 .protect_kernel_tunables = context->protect_kernel_tunables,
2451 .protect_kernel_modules = context->protect_kernel_modules,
aecd5ac6 2452 .protect_hostname = context->protect_hostname,
b5a33299 2453 .mount_apivfs = context->mount_apivfs,
228af36f 2454 .private_mounts = context->private_mounts,
b5a33299 2455 };
228af36f
LP
2456 else if (!context->dynamic_user && root_dir)
2457 /*
2458 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2459 * sandbox info, otherwise enforce it, don't ignore protected paths and
2460 * fail if we are enable to apply the sandbox inside the mount namespace.
2461 */
2462 ns_info = (NamespaceInfo) {
2463 .ignore_protect_paths = true,
2464 };
2465 else
2466 ns_info = (NamespaceInfo) {};
b5a33299 2467
37ed15d7
FB
2468 if (context->mount_flags == MS_SHARED)
2469 log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
2470
915e6d16 2471 r = setup_namespace(root_dir, root_image,
7bcef4ef 2472 &ns_info, context->read_write_paths,
165a31c0
LP
2473 needs_sandboxing ? context->read_only_paths : NULL,
2474 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2475 empty_directories,
2476 bind_mounts,
2477 n_bind_mounts,
2abd4e38
YW
2478 context->temporary_filesystems,
2479 context->n_temporary_filesystems,
93c6bb51
DH
2480 tmp,
2481 var,
165a31c0
LP
2482 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2483 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16 2484 context->mount_flags,
7cc5ef5f
ZJS
2485 DISSECT_IMAGE_DISCARD_ON_LOOP,
2486 error_path);
93c6bb51 2487
6c47cd7d
LP
2488 bind_mount_free_many(bind_mounts, n_bind_mounts);
2489
1beab8b0 2490 /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
5238e957 2491 * that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively
1beab8b0
LP
2492 * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
2493 * completely different execution environment. */
aca835ed
YW
2494 if (r == -ENOANO) {
2495 if (n_bind_mounts == 0 &&
2496 context->n_temporary_filesystems == 0 &&
2497 !root_dir && !root_image &&
2498 !context->dynamic_user) {
2499 log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
2500 return 0;
2501 }
2502
2194547e
LP
2503 log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
2504 "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
2505 n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
2506
aca835ed 2507 return -EOPNOTSUPP;
93c6bb51
DH
2508 }
2509
2510 return r;
2511}
2512
915e6d16
LP
2513static int apply_working_directory(
2514 const ExecContext *context,
2515 const ExecParameters *params,
2516 const char *home,
376fecf6
LP
2517 const bool needs_mount_ns,
2518 int *exit_status) {
915e6d16 2519
6732edab 2520 const char *d, *wd;
2b3c1b9e
DH
2521
2522 assert(context);
376fecf6 2523 assert(exit_status);
2b3c1b9e 2524
6732edab
LP
2525 if (context->working_directory_home) {
2526
376fecf6
LP
2527 if (!home) {
2528 *exit_status = EXIT_CHDIR;
6732edab 2529 return -ENXIO;
376fecf6 2530 }
6732edab 2531
2b3c1b9e 2532 wd = home;
6732edab
LP
2533
2534 } else if (context->working_directory)
2b3c1b9e
DH
2535 wd = context->working_directory;
2536 else
2537 wd = "/";
e7f1e7c6
DH
2538
2539 if (params->flags & EXEC_APPLY_CHROOT) {
2540 if (!needs_mount_ns && context->root_directory)
376fecf6
LP
2541 if (chroot(context->root_directory) < 0) {
2542 *exit_status = EXIT_CHROOT;
e7f1e7c6 2543 return -errno;
376fecf6 2544 }
e7f1e7c6 2545
2b3c1b9e
DH
2546 d = wd;
2547 } else
3b0e5bb5 2548 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2549
376fecf6
LP
2550 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2551 *exit_status = EXIT_CHDIR;
2b3c1b9e 2552 return -errno;
376fecf6 2553 }
e7f1e7c6
DH
2554
2555 return 0;
2556}
2557
b1edf445 2558static int setup_keyring(
34cf6c43 2559 const Unit *u,
b1edf445
LP
2560 const ExecContext *context,
2561 const ExecParameters *p,
2562 uid_t uid, gid_t gid) {
2563
74dd6b51 2564 key_serial_t keyring;
e64c2d0b
DJL
2565 int r = 0;
2566 uid_t saved_uid;
2567 gid_t saved_gid;
74dd6b51
LP
2568
2569 assert(u);
b1edf445 2570 assert(context);
74dd6b51
LP
2571 assert(p);
2572
2573 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2574 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2575 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2576 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2577 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2578 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2579
b1edf445
LP
2580 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2581 return 0;
2582
e64c2d0b
DJL
2583 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2584 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2585 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2586 * & group is just as nasty as acquiring a reference to the user keyring. */
2587
2588 saved_uid = getuid();
2589 saved_gid = getgid();
2590
2591 if (gid_is_valid(gid) && gid != saved_gid) {
2592 if (setregid(gid, -1) < 0)
2593 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2594 }
2595
2596 if (uid_is_valid(uid) && uid != saved_uid) {
2597 if (setreuid(uid, -1) < 0) {
2598 r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2599 goto out;
2600 }
2601 }
2602
74dd6b51
LP
2603 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2604 if (keyring == -1) {
2605 if (errno == ENOSYS)
8002fb97 2606 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2607 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2608 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2609 else if (errno == EDQUOT)
8002fb97 2610 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2611 else
e64c2d0b 2612 r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51 2613
e64c2d0b 2614 goto out;
74dd6b51
LP
2615 }
2616
e64c2d0b
DJL
2617 /* When requested link the user keyring into the session keyring. */
2618 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2619
2620 if (keyctl(KEYCTL_LINK,
2621 KEY_SPEC_USER_KEYRING,
2622 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2623 r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
2624 goto out;
2625 }
2626 }
2627
2628 /* Restore uid/gid back */
2629 if (uid_is_valid(uid) && uid != saved_uid) {
2630 if (setreuid(saved_uid, -1) < 0) {
2631 r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2632 goto out;
2633 }
2634 }
2635
2636 if (gid_is_valid(gid) && gid != saved_gid) {
2637 if (setregid(saved_gid, -1) < 0)
2638 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2639 }
2640
2641 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
b3415f5d
LP
2642 if (!sd_id128_is_null(u->invocation_id)) {
2643 key_serial_t key;
2644
2645 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2646 if (key == -1)
8002fb97 2647 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2648 else {
2649 if (keyctl(KEYCTL_SETPERM, key,
2650 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2651 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
e64c2d0b 2652 r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2653 }
2654 }
2655
e64c2d0b
DJL
2656out:
2657 /* Revert back uid & gid for the the last time, and exit */
2658 /* no extra logging, as only the first already reported error matters */
2659 if (getuid() != saved_uid)
2660 (void) setreuid(saved_uid, -1);
b1edf445 2661
e64c2d0b
DJL
2662 if (getgid() != saved_gid)
2663 (void) setregid(saved_gid, -1);
b1edf445 2664
e64c2d0b 2665 return r;
74dd6b51
LP
2666}
2667
3042bbeb 2668static void append_socket_pair(int *array, size_t *n, const int pair[static 2]) {
29206d46
LP
2669 assert(array);
2670 assert(n);
2671
2672 if (!pair)
2673 return;
2674
2675 if (pair[0] >= 0)
2676 array[(*n)++] = pair[0];
2677 if (pair[1] >= 0)
2678 array[(*n)++] = pair[1];
2679}
2680
a34ceba6
LP
2681static int close_remaining_fds(
2682 const ExecParameters *params,
34cf6c43
YW
2683 const ExecRuntime *runtime,
2684 const DynamicCreds *dcreds,
00d9ef85 2685 int user_lookup_fd,
a34ceba6 2686 int socket_fd,
5686391b 2687 int exec_fd,
da6053d0 2688 int *fds, size_t n_fds) {
a34ceba6 2689
da6053d0 2690 size_t n_dont_close = 0;
00d9ef85 2691 int dont_close[n_fds + 12];
a34ceba6
LP
2692
2693 assert(params);
2694
2695 if (params->stdin_fd >= 0)
2696 dont_close[n_dont_close++] = params->stdin_fd;
2697 if (params->stdout_fd >= 0)
2698 dont_close[n_dont_close++] = params->stdout_fd;
2699 if (params->stderr_fd >= 0)
2700 dont_close[n_dont_close++] = params->stderr_fd;
2701
2702 if (socket_fd >= 0)
2703 dont_close[n_dont_close++] = socket_fd;
5686391b
LP
2704 if (exec_fd >= 0)
2705 dont_close[n_dont_close++] = exec_fd;
a34ceba6
LP
2706 if (n_fds > 0) {
2707 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2708 n_dont_close += n_fds;
2709 }
2710
29206d46
LP
2711 if (runtime)
2712 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2713
2714 if (dcreds) {
2715 if (dcreds->user)
2716 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2717 if (dcreds->group)
2718 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2719 }
2720
00d9ef85
LP
2721 if (user_lookup_fd >= 0)
2722 dont_close[n_dont_close++] = user_lookup_fd;
2723
a34ceba6
LP
2724 return close_all_fds(dont_close, n_dont_close);
2725}
2726
00d9ef85
LP
2727static int send_user_lookup(
2728 Unit *unit,
2729 int user_lookup_fd,
2730 uid_t uid,
2731 gid_t gid) {
2732
2733 assert(unit);
2734
2735 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2736 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2737 * specified. */
2738
2739 if (user_lookup_fd < 0)
2740 return 0;
2741
2742 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2743 return 0;
2744
2745 if (writev(user_lookup_fd,
2746 (struct iovec[]) {
e6a7ec4b
LP
2747 IOVEC_INIT(&uid, sizeof(uid)),
2748 IOVEC_INIT(&gid, sizeof(gid)),
2749 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2750 return -errno;
2751
2752 return 0;
2753}
2754
6732edab
LP
2755static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2756 int r;
2757
2758 assert(c);
2759 assert(home);
2760 assert(buf);
2761
2762 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2763
2764 if (*home)
2765 return 0;
2766
2767 if (!c->working_directory_home)
2768 return 0;
2769
6732edab
LP
2770 r = get_home_dir(buf);
2771 if (r < 0)
2772 return r;
2773
2774 *home = *buf;
2775 return 1;
2776}
2777
da50b85a
LP
2778static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2779 _cleanup_strv_free_ char ** list = NULL;
2780 ExecDirectoryType t;
2781 int r;
2782
2783 assert(c);
2784 assert(p);
2785 assert(ret);
2786
2787 assert(c->dynamic_user);
2788
2789 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2790 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2791 * directories. */
2792
2793 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2794 char **i;
2795
2796 if (t == EXEC_DIRECTORY_CONFIGURATION)
2797 continue;
2798
2799 if (!p->prefix[t])
2800 continue;
2801
2802 STRV_FOREACH(i, c->directories[t].paths) {
2803 char *e;
2804
8092a48c
YW
2805 if (t == EXEC_DIRECTORY_RUNTIME)
2806 e = strjoin(p->prefix[t], "/", *i);
2807 else
2808 e = strjoin(p->prefix[t], "/private/", *i);
da50b85a
LP
2809 if (!e)
2810 return -ENOMEM;
2811
2812 r = strv_consume(&list, e);
2813 if (r < 0)
2814 return r;
2815 }
2816 }
2817
ae2a15bc 2818 *ret = TAKE_PTR(list);
da50b85a
LP
2819
2820 return 0;
2821}
2822
34cf6c43
YW
2823static char *exec_command_line(char **argv);
2824
78f93209
LP
2825static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
2826 bool using_subcgroup;
2827 char *p;
2828
2829 assert(params);
2830 assert(ret);
2831
2832 if (!params->cgroup_path)
2833 return -EINVAL;
2834
2835 /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
2836 * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
2837 * processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
2838 * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
2839 * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
2840 * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
2841 * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
2842 * flag, which is only passed for the former statements, not for the latter. */
2843
2844 using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
2845 if (using_subcgroup)
2846 p = strjoin(params->cgroup_path, "/.control");
2847 else
2848 p = strdup(params->cgroup_path);
2849 if (!p)
2850 return -ENOMEM;
2851
2852 *ret = p;
2853 return using_subcgroup;
2854}
2855
ff0af2a1 2856static int exec_child(
f2341e0a 2857 Unit *unit,
34cf6c43 2858 const ExecCommand *command,
ff0af2a1
LP
2859 const ExecContext *context,
2860 const ExecParameters *params,
2861 ExecRuntime *runtime,
29206d46 2862 DynamicCreds *dcreds,
ff0af2a1 2863 int socket_fd,
52c239d7 2864 int named_iofds[3],
4c47affc 2865 int *fds,
da6053d0 2866 size_t n_socket_fds,
25b583d7 2867 size_t n_storage_fds,
ff0af2a1 2868 char **files_env,
00d9ef85 2869 int user_lookup_fd,
12145637 2870 int *exit_status) {
d35fbf6b 2871
7ca69792 2872 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **replaced_argv = NULL;
5686391b 2873 int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
4d885bd3
DH
2874 _cleanup_free_ gid_t *supplementary_gids = NULL;
2875 const char *username = NULL, *groupname = NULL;
5686391b 2876 _cleanup_free_ char *home_buffer = NULL;
2b3c1b9e 2877 const char *home = NULL, *shell = NULL;
7ca69792 2878 char **final_argv = NULL;
7bce046b
LP
2879 dev_t journal_stream_dev = 0;
2880 ino_t journal_stream_ino = 0;
165a31c0
LP
2881 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2882 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2883 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2884 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 2885#if HAVE_SELINUX
7f59dd35 2886 _cleanup_free_ char *mac_selinux_context_net = NULL;
43b1f709 2887 bool use_selinux = false;
ecfbc84f 2888#endif
f9fa32f0 2889#if ENABLE_SMACK
43b1f709 2890 bool use_smack = false;
ecfbc84f 2891#endif
349cc4a5 2892#if HAVE_APPARMOR
43b1f709 2893 bool use_apparmor = false;
ecfbc84f 2894#endif
fed1e721
LP
2895 uid_t uid = UID_INVALID;
2896 gid_t gid = GID_INVALID;
da6053d0 2897 size_t n_fds;
3536f49e 2898 ExecDirectoryType dt;
165a31c0 2899 int secure_bits;
034c6ed7 2900
f2341e0a 2901 assert(unit);
5cb5a6ff
LP
2902 assert(command);
2903 assert(context);
d35fbf6b 2904 assert(params);
ff0af2a1 2905 assert(exit_status);
d35fbf6b
DM
2906
2907 rename_process_from_path(command->path);
2908
2909 /* We reset exactly these signals, since they are the
2910 * only ones we set to SIG_IGN in the main daemon. All
2911 * others we leave untouched because we set them to
2912 * SIG_DFL or a valid handler initially, both of which
2913 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2914 (void) default_signals(SIGNALS_CRASH_HANDLER,
2915 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2916
2917 if (context->ignore_sigpipe)
ce30c8dc 2918 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2919
ff0af2a1
LP
2920 r = reset_signal_mask();
2921 if (r < 0) {
2922 *exit_status = EXIT_SIGNAL_MASK;
12145637 2923 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 2924 }
034c6ed7 2925
d35fbf6b
DM
2926 if (params->idle_pipe)
2927 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2928
2c027c62
LP
2929 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2930 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2931 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2932 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 2933
d35fbf6b 2934 log_forget_fds();
2c027c62 2935 log_set_open_when_needed(true);
4f2d528d 2936
40a80078
LP
2937 /* In case anything used libc syslog(), close this here, too */
2938 closelog();
2939
5686391b
LP
2940 n_fds = n_socket_fds + n_storage_fds;
2941 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
ff0af2a1
LP
2942 if (r < 0) {
2943 *exit_status = EXIT_FDS;
12145637 2944 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
2945 }
2946
d35fbf6b
DM
2947 if (!context->same_pgrp)
2948 if (setsid() < 0) {
ff0af2a1 2949 *exit_status = EXIT_SETSID;
12145637 2950 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 2951 }
9e2f7c11 2952
1e22b5cd 2953 exec_context_tty_reset(context, params);
d35fbf6b 2954
c891efaf 2955 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 2956 const char *vc = params->confirm_spawn;
3b20f877
FB
2957 _cleanup_free_ char *cmdline = NULL;
2958
ee39ca20 2959 cmdline = exec_command_line(command->argv);
3b20f877 2960 if (!cmdline) {
0460aa5c 2961 *exit_status = EXIT_MEMORY;
12145637 2962 return log_oom();
3b20f877 2963 }
d35fbf6b 2964
eedf223a 2965 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
2966 if (r != CONFIRM_EXECUTE) {
2967 if (r == CONFIRM_PRETEND_SUCCESS) {
2968 *exit_status = EXIT_SUCCESS;
2969 return 0;
2970 }
ff0af2a1 2971 *exit_status = EXIT_CONFIRM;
12145637 2972 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 2973 return -ECANCELED;
d35fbf6b
DM
2974 }
2975 }
1a63a750 2976
d521916d
LP
2977 /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
2978 * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
2979 * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
2980 * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
2981 * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
2982 if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
2983 setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
2984 *exit_status = EXIT_MEMORY;
2985 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
2986 }
2987
29206d46 2988 if (context->dynamic_user && dcreds) {
da50b85a 2989 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 2990
d521916d
LP
2991 /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
2992 * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
409093fe
LP
2993 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2994 *exit_status = EXIT_USER;
12145637 2995 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
2996 }
2997
da50b85a
LP
2998 r = compile_suggested_paths(context, params, &suggested_paths);
2999 if (r < 0) {
3000 *exit_status = EXIT_MEMORY;
3001 return log_oom();
3002 }
3003
3004 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
3005 if (r < 0) {
3006 *exit_status = EXIT_USER;
e2b0cc34
YW
3007 if (r == -EILSEQ) {
3008 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
3009 return -EOPNOTSUPP;
3010 }
12145637 3011 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 3012 }
524daa8c 3013
70dd455c 3014 if (!uid_is_valid(uid)) {
29206d46 3015 *exit_status = EXIT_USER;
12145637 3016 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
3017 return -ESRCH;
3018 }
3019
3020 if (!gid_is_valid(gid)) {
3021 *exit_status = EXIT_USER;
12145637 3022 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
3023 return -ESRCH;
3024 }
5bc7452b 3025
29206d46
LP
3026 if (dcreds->user)
3027 username = dcreds->user->name;
3028
3029 } else {
4d885bd3
DH
3030 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
3031 if (r < 0) {
3032 *exit_status = EXIT_USER;
12145637 3033 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 3034 }
5bc7452b 3035
4d885bd3
DH
3036 r = get_fixed_group(context, &groupname, &gid);
3037 if (r < 0) {
3038 *exit_status = EXIT_GROUP;
12145637 3039 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 3040 }
cdc5d5c5 3041 }
29206d46 3042
cdc5d5c5
DH
3043 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
3044 r = get_supplementary_groups(context, username, groupname, gid,
3045 &supplementary_gids, &ngids);
3046 if (r < 0) {
3047 *exit_status = EXIT_GROUP;
12145637 3048 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 3049 }
5bc7452b 3050
00d9ef85
LP
3051 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
3052 if (r < 0) {
3053 *exit_status = EXIT_USER;
12145637 3054 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
3055 }
3056
3057 user_lookup_fd = safe_close(user_lookup_fd);
3058
6732edab
LP
3059 r = acquire_home(context, uid, &home, &home_buffer);
3060 if (r < 0) {
3061 *exit_status = EXIT_CHDIR;
12145637 3062 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
3063 }
3064
d35fbf6b
DM
3065 /* If a socket is connected to STDIN/STDOUT/STDERR, we
3066 * must sure to drop O_NONBLOCK */
3067 if (socket_fd >= 0)
a34ceba6 3068 (void) fd_nonblock(socket_fd, false);
acbb0225 3069
4c70a4a7
MS
3070 /* Journald will try to look-up our cgroup in order to populate _SYSTEMD_CGROUP and _SYSTEMD_UNIT fields.
3071 * Hence we need to migrate to the target cgroup from init.scope before connecting to journald */
3072 if (params->cgroup_path) {
3073 _cleanup_free_ char *p = NULL;
3074
3075 r = exec_parameters_get_cgroup_path(params, &p);
3076 if (r < 0) {
3077 *exit_status = EXIT_CGROUP;
3078 return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
3079 }
3080
3081 r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
3082 if (r < 0) {
3083 *exit_status = EXIT_CGROUP;
3084 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
3085 }
3086 }
3087
a8d08f39
LP
3088 if (context->network_namespace_path && runtime && runtime->netns_storage_socket[0] >= 0) {
3089 r = open_netns_path(runtime->netns_storage_socket, context->network_namespace_path);
3090 if (r < 0) {
3091 *exit_status = EXIT_NETWORK;
3092 return log_unit_error_errno(unit, r, "Failed to open network namespace path %s: %m", context->network_namespace_path);
3093 }
3094 }
3095
52c239d7 3096 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
3097 if (r < 0) {
3098 *exit_status = EXIT_STDIN;
12145637 3099 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 3100 }
034c6ed7 3101
52c239d7 3102 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3103 if (r < 0) {
3104 *exit_status = EXIT_STDOUT;
12145637 3105 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
3106 }
3107
52c239d7 3108 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
3109 if (r < 0) {
3110 *exit_status = EXIT_STDERR;
12145637 3111 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
3112 }
3113
d35fbf6b 3114 if (context->oom_score_adjust_set) {
9f8168eb
LP
3115 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
3116 * prohibit write access to this file, and we shouldn't trip up over that. */
3117 r = set_oom_score_adjust(context->oom_score_adjust);
12145637 3118 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 3119 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 3120 else if (r < 0) {
ff0af2a1 3121 *exit_status = EXIT_OOM_ADJUST;
12145637 3122 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 3123 }
d35fbf6b
DM
3124 }
3125
3126 if (context->nice_set)
3127 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 3128 *exit_status = EXIT_NICE;
12145637 3129 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
613b411c
LP
3130 }
3131
d35fbf6b
DM
3132 if (context->cpu_sched_set) {
3133 struct sched_param param = {
3134 .sched_priority = context->cpu_sched_priority,
3135 };
3136
ff0af2a1
LP
3137 r = sched_setscheduler(0,
3138 context->cpu_sched_policy |
3139 (context->cpu_sched_reset_on_fork ?
3140 SCHED_RESET_ON_FORK : 0),
3141 &param);
3142 if (r < 0) {
3143 *exit_status = EXIT_SETSCHEDULER;
12145637 3144 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 3145 }
d35fbf6b 3146 }
fc9b2a84 3147
d35fbf6b
DM
3148 if (context->cpuset)
3149 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
ff0af2a1 3150 *exit_status = EXIT_CPUAFFINITY;
12145637 3151 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7
LP
3152 }
3153
d35fbf6b
DM
3154 if (context->ioprio_set)
3155 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 3156 *exit_status = EXIT_IOPRIO;
12145637 3157 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 3158 }
da726a4d 3159
d35fbf6b
DM
3160 if (context->timer_slack_nsec != NSEC_INFINITY)
3161 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 3162 *exit_status = EXIT_TIMERSLACK;
12145637 3163 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 3164 }
9eba9da4 3165
21022b9d
LP
3166 if (context->personality != PERSONALITY_INVALID) {
3167 r = safe_personality(context->personality);
3168 if (r < 0) {
ff0af2a1 3169 *exit_status = EXIT_PERSONALITY;
12145637 3170 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 3171 }
21022b9d 3172 }
94f04347 3173
d35fbf6b 3174 if (context->utmp_id)
df0ff127 3175 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 3176 context->tty_path,
023a4f67
LP
3177 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3178 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3179 USER_PROCESS,
6a93917d 3180 username);
d35fbf6b 3181
08f67696 3182 if (uid_is_valid(uid)) {
ff0af2a1
LP
3183 r = chown_terminal(STDIN_FILENO, uid);
3184 if (r < 0) {
3185 *exit_status = EXIT_STDIN;
12145637 3186 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 3187 }
d35fbf6b 3188 }
8e274523 3189
4e1dfa45 3190 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
62b9bb26 3191 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
4e1dfa45 3192 * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
62b9bb26 3193 * touch a single hierarchy too. */
584b8688 3194 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
62b9bb26 3195 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
ff0af2a1
LP
3196 if (r < 0) {
3197 *exit_status = EXIT_CGROUP;
12145637 3198 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 3199 }
d35fbf6b 3200 }
034c6ed7 3201
72fd1768 3202 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 3203 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
3204 if (r < 0)
3205 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 3206 }
94f04347 3207
7bce046b 3208 r = build_environment(
fd63e712 3209 unit,
7bce046b
LP
3210 context,
3211 params,
3212 n_fds,
3213 home,
3214 username,
3215 shell,
3216 journal_stream_dev,
3217 journal_stream_ino,
3218 &our_env);
2065ca69
JW
3219 if (r < 0) {
3220 *exit_status = EXIT_MEMORY;
12145637 3221 return log_oom();
2065ca69
JW
3222 }
3223
3224 r = build_pass_environment(context, &pass_env);
3225 if (r < 0) {
3226 *exit_status = EXIT_MEMORY;
12145637 3227 return log_oom();
2065ca69
JW
3228 }
3229
3230 accum_env = strv_env_merge(5,
3231 params->environment,
3232 our_env,
3233 pass_env,
3234 context->environment,
3235 files_env,
3236 NULL);
3237 if (!accum_env) {
3238 *exit_status = EXIT_MEMORY;
12145637 3239 return log_oom();
2065ca69 3240 }
1280503b 3241 accum_env = strv_env_clean(accum_env);
2065ca69 3242
096424d1 3243 (void) umask(context->umask);
b213e1c1 3244
b1edf445 3245 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
3246 if (r < 0) {
3247 *exit_status = EXIT_KEYRING;
12145637 3248 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
3249 }
3250
165a31c0 3251 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 3252 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 3253
165a31c0
LP
3254 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3255 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 3256
165a31c0
LP
3257 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3258 if (needs_ambient_hack)
3259 needs_setuid = false;
3260 else
3261 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3262
3263 if (needs_sandboxing) {
7f18ef0a
FK
3264 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3265 * present. The actual MAC context application will happen later, as late as possible, to avoid
3266 * impacting our own code paths. */
3267
349cc4a5 3268#if HAVE_SELINUX
43b1f709 3269 use_selinux = mac_selinux_use();
7f18ef0a 3270#endif
f9fa32f0 3271#if ENABLE_SMACK
43b1f709 3272 use_smack = mac_smack_use();
7f18ef0a 3273#endif
349cc4a5 3274#if HAVE_APPARMOR
43b1f709 3275 use_apparmor = mac_apparmor_use();
7f18ef0a 3276#endif
165a31c0 3277 }
7f18ef0a 3278
ce932d2d
LP
3279 if (needs_sandboxing) {
3280 int which_failed;
3281
3282 /* Let's set the resource limits before we call into PAM, so that pam_limits wins over what
3283 * is set here. (See below.) */
3284
3285 r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
3286 if (r < 0) {
3287 *exit_status = EXIT_LIMITS;
3288 return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
3289 }
3290 }
3291
165a31c0 3292 if (needs_setuid) {
ce932d2d
LP
3293
3294 /* Let's call into PAM after we set up our own idea of resource limits to that pam_limits
3295 * wins here. (See above.) */
3296
165a31c0
LP
3297 if (context->pam_name && username) {
3298 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3299 if (r < 0) {
3300 *exit_status = EXIT_PAM;
12145637 3301 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0
LP
3302 }
3303 }
b213e1c1 3304 }
ac45f971 3305
a8d08f39
LP
3306 if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
3307
6e2d7c4f
MS
3308 if (ns_type_supported(NAMESPACE_NET)) {
3309 r = setup_netns(runtime->netns_storage_socket);
3310 if (r < 0) {
3311 *exit_status = EXIT_NETWORK;
3312 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3313 }
a8d08f39
LP
3314 } else if (context->network_namespace_path) {
3315 *exit_status = EXIT_NETWORK;
3316 return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP), "NetworkNamespacePath= is not supported, refusing.");
6e2d7c4f
MS
3317 } else
3318 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3319 }
169c1bda 3320
ee818b89 3321 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3322 if (needs_mount_namespace) {
7cc5ef5f
ZJS
3323 _cleanup_free_ char *error_path = NULL;
3324
3325 r = apply_mount_namespace(unit, command, context, params, runtime, &error_path);
3fbe8dbe
LP
3326 if (r < 0) {
3327 *exit_status = EXIT_NAMESPACE;
7cc5ef5f
ZJS
3328 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m",
3329 error_path ? ": " : "", strempty(error_path));
3fbe8dbe 3330 }
d35fbf6b 3331 }
81a2b7ce 3332
aecd5ac6
TM
3333 if (context->protect_hostname) {
3334 if (ns_type_supported(NAMESPACE_UTS)) {
3335 if (unshare(CLONE_NEWUTS) < 0) {
3336 *exit_status = EXIT_NAMESPACE;
3337 return log_unit_error_errno(unit, errno, "Failed to set up UTS namespacing: %m");
3338 }
3339 } else
3340 log_unit_warning(unit, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
3341#if HAVE_SECCOMP
3342 r = seccomp_protect_hostname();
3343 if (r < 0) {
3344 *exit_status = EXIT_SECCOMP;
3345 return log_unit_error_errno(unit, r, "Failed to apply hostname restrictions: %m");
3346 }
3347#endif
3348 }
3349
bbeea271 3350 /* Drop groups as early as possbile */
165a31c0 3351 if (needs_setuid) {
709dbeac 3352 r = enforce_groups(gid, supplementary_gids, ngids);
096424d1
LP
3353 if (r < 0) {
3354 *exit_status = EXIT_GROUP;
12145637 3355 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3356 }
165a31c0 3357 }
096424d1 3358
165a31c0 3359 if (needs_sandboxing) {
349cc4a5 3360#if HAVE_SELINUX
43b1f709 3361 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
3362 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3363 if (r < 0) {
3364 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3365 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
937ccce9 3366 }
9008e1ac 3367 }
9008e1ac
MS
3368#endif
3369
937ccce9
LP
3370 if (context->private_users) {
3371 r = setup_private_users(uid, gid);
3372 if (r < 0) {
3373 *exit_status = EXIT_USER;
12145637 3374 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
937ccce9 3375 }
d251207d
LP
3376 }
3377 }
3378
165a31c0 3379 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
5686391b
LP
3380 * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
3381 * however if we have it as we want to keep it open until the final execve(). */
3382
3383 if (params->exec_fd >= 0) {
3384 exec_fd = params->exec_fd;
3385
3386 if (exec_fd < 3 + (int) n_fds) {
3387 int moved_fd;
3388
3389 /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
3390 * process we are about to execute. */
3391
3392 moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
3393 if (moved_fd < 0) {
3394 *exit_status = EXIT_FDS;
3395 return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
3396 }
3397
3398 safe_close(exec_fd);
3399 exec_fd = moved_fd;
3400 } else {
3401 /* This fd should be FD_CLOEXEC already, but let's make sure. */
3402 r = fd_cloexec(exec_fd, true);
3403 if (r < 0) {
3404 *exit_status = EXIT_FDS;
3405 return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
3406 }
3407 }
3408
3409 fds_with_exec_fd = newa(int, n_fds + 1);
7e8d494b 3410 memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
5686391b
LP
3411 fds_with_exec_fd[n_fds] = exec_fd;
3412 n_fds_with_exec_fd = n_fds + 1;
3413 } else {
3414 fds_with_exec_fd = fds;
3415 n_fds_with_exec_fd = n_fds;
3416 }
3417
3418 r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
ff0af2a1
LP
3419 if (r >= 0)
3420 r = shift_fds(fds, n_fds);
3421 if (r >= 0)
25b583d7 3422 r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
ff0af2a1
LP
3423 if (r < 0) {
3424 *exit_status = EXIT_FDS;
12145637 3425 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3426 }
e66cf1a3 3427
5686391b
LP
3428 /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
3429 * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
3430 * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
3431 * came this far. */
3432
165a31c0 3433 secure_bits = context->secure_bits;
e66cf1a3 3434
165a31c0
LP
3435 if (needs_sandboxing) {
3436 uint64_t bset;
e66cf1a3 3437
ce932d2d
LP
3438 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly
3439 * requested. (Note this is placed after the general resource limit initialization, see
3440 * above, in order to take precedence.) */
f4170c67
LP
3441 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3442 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3443 *exit_status = EXIT_LIMITS;
12145637 3444 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3445 }
3446 }
3447
37ac2744
JB
3448#if ENABLE_SMACK
3449 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3450 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3451 if (use_smack) {
3452 r = setup_smack(context, command);
3453 if (r < 0) {
3454 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3455 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3456 }
3457 }
3458#endif
3459
165a31c0
LP
3460 bset = context->capability_bounding_set;
3461 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3462 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3463 * instead of us doing that */
3464 if (needs_ambient_hack)
3465 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3466 (UINT64_C(1) << CAP_SETUID) |
3467 (UINT64_C(1) << CAP_SETGID);
3468
3469 if (!cap_test_all(bset)) {
3470 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3471 if (r < 0) {
3472 *exit_status = EXIT_CAPABILITIES;
12145637 3473 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3474 }
4c2630eb 3475 }
3b8bddde 3476
755d4b67
IP
3477 /* This is done before enforce_user, but ambient set
3478 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
3479 if (!needs_ambient_hack &&
3480 context->capability_ambient_set != 0) {
755d4b67
IP
3481 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3482 if (r < 0) {
3483 *exit_status = EXIT_CAPABILITIES;
12145637 3484 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3485 }
755d4b67 3486 }
165a31c0 3487 }
755d4b67 3488
165a31c0 3489 if (needs_setuid) {
08f67696 3490 if (uid_is_valid(uid)) {
ff0af2a1
LP
3491 r = enforce_user(context, uid);
3492 if (r < 0) {
3493 *exit_status = EXIT_USER;
12145637 3494 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3495 }
165a31c0
LP
3496
3497 if (!needs_ambient_hack &&
3498 context->capability_ambient_set != 0) {
755d4b67
IP
3499
3500 /* Fix the ambient capabilities after user change. */
3501 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3502 if (r < 0) {
3503 *exit_status = EXIT_CAPABILITIES;
12145637 3504 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3505 }
3506
3507 /* If we were asked to change user and ambient capabilities
3508 * were requested, we had to add keep-caps to the securebits
3509 * so that we would maintain the inherited capability set
3510 * through the setresuid(). Make sure that the bit is added
3511 * also to the context secure_bits so that we don't try to
3512 * drop the bit away next. */
3513
7f508f2c 3514 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3515 }
5b6319dc 3516 }
165a31c0 3517 }
d35fbf6b 3518
56ef8db9
JB
3519 /* Apply working directory here, because the working directory might be on NFS and only the user running
3520 * this service might have the correct privilege to change to the working directory */
3521 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
3522 if (r < 0)
3523 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
3524
165a31c0 3525 if (needs_sandboxing) {
37ac2744 3526 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
5cd9cd35
LP
3527 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3528 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3529 * are restricted. */
3530
349cc4a5 3531#if HAVE_SELINUX
43b1f709 3532 if (use_selinux) {
5cd9cd35
LP
3533 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3534
3535 if (exec_context) {
3536 r = setexeccon(exec_context);
3537 if (r < 0) {
3538 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3539 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3540 }
3541 }
3542 }
3543#endif
3544
349cc4a5 3545#if HAVE_APPARMOR
43b1f709 3546 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3547 r = aa_change_onexec(context->apparmor_profile);
3548 if (r < 0 && !context->apparmor_profile_ignore) {
3549 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3550 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3551 }
3552 }
3553#endif
3554
165a31c0
LP
3555 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3556 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3557 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3558 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3559 *exit_status = EXIT_SECUREBITS;
12145637 3560 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3561 }
5b6319dc 3562
59eeb84b 3563 if (context_has_no_new_privileges(context))
d35fbf6b 3564 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3565 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3566 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3567 }
3568
349cc4a5 3569#if HAVE_SECCOMP
469830d1
LP
3570 r = apply_address_families(unit, context);
3571 if (r < 0) {
3572 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3573 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3574 }
04aa0cb9 3575
469830d1
LP
3576 r = apply_memory_deny_write_execute(unit, context);
3577 if (r < 0) {
3578 *exit_status = EXIT_SECCOMP;
12145637 3579 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3580 }
f4170c67 3581
469830d1
LP
3582 r = apply_restrict_realtime(unit, context);
3583 if (r < 0) {
3584 *exit_status = EXIT_SECCOMP;
12145637 3585 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3586 }
3587
f69567cb
LP
3588 r = apply_restrict_suid_sgid(unit, context);
3589 if (r < 0) {
3590 *exit_status = EXIT_SECCOMP;
3591 return log_unit_error_errno(unit, r, "Failed to apply SUID/SGID restrictions: %m");
3592 }
3593
add00535
LP
3594 r = apply_restrict_namespaces(unit, context);
3595 if (r < 0) {
3596 *exit_status = EXIT_SECCOMP;
12145637 3597 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3598 }
3599
469830d1
LP
3600 r = apply_protect_sysctl(unit, context);
3601 if (r < 0) {
3602 *exit_status = EXIT_SECCOMP;
12145637 3603 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3604 }
3605
469830d1
LP
3606 r = apply_protect_kernel_modules(unit, context);
3607 if (r < 0) {
3608 *exit_status = EXIT_SECCOMP;
12145637 3609 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3610 }
3611
469830d1
LP
3612 r = apply_private_devices(unit, context);
3613 if (r < 0) {
3614 *exit_status = EXIT_SECCOMP;
12145637 3615 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3616 }
3617
3618 r = apply_syscall_archs(unit, context);
3619 if (r < 0) {
3620 *exit_status = EXIT_SECCOMP;
12145637 3621 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3622 }
3623
78e864e5
TM
3624 r = apply_lock_personality(unit, context);
3625 if (r < 0) {
3626 *exit_status = EXIT_SECCOMP;
12145637 3627 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3628 }
3629
5cd9cd35
LP
3630 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3631 * by the filter as little as possible. */
165a31c0 3632 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3633 if (r < 0) {
3634 *exit_status = EXIT_SECCOMP;
12145637 3635 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3636 }
3637#endif
d35fbf6b 3638 }
034c6ed7 3639
00819cc1
LP
3640 if (!strv_isempty(context->unset_environment)) {
3641 char **ee = NULL;
3642
3643 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3644 if (!ee) {
3645 *exit_status = EXIT_MEMORY;
12145637 3646 return log_oom();
00819cc1
LP
3647 }
3648
130d3d22 3649 strv_free_and_replace(accum_env, ee);
00819cc1
LP
3650 }
3651
7ca69792
AZ
3652 if (!FLAGS_SET(command->flags, EXEC_COMMAND_NO_ENV_EXPAND)) {
3653 replaced_argv = replace_env_argv(command->argv, accum_env);
3654 if (!replaced_argv) {
3655 *exit_status = EXIT_MEMORY;
3656 return log_oom();
3657 }
3658 final_argv = replaced_argv;
3659 } else
3660 final_argv = command->argv;
034c6ed7 3661
f1d34068 3662 if (DEBUG_LOGGING) {
d35fbf6b 3663 _cleanup_free_ char *line;
81a2b7ce 3664
d35fbf6b 3665 line = exec_command_line(final_argv);
a1230ff9 3666 if (line)
f2341e0a 3667 log_struct(LOG_DEBUG,
f2341e0a
LP
3668 "EXECUTABLE=%s", command->path,
3669 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3670 LOG_UNIT_ID(unit),
a1230ff9 3671 LOG_UNIT_INVOCATION_ID(unit));
d35fbf6b 3672 }
dd305ec9 3673
5686391b
LP
3674 if (exec_fd >= 0) {
3675 uint8_t hot = 1;
3676
3677 /* We have finished with all our initializations. Let's now let the manager know that. From this point
3678 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
3679
3680 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3681 *exit_status = EXIT_EXEC;
3682 return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
3683 }
3684 }
3685
2065ca69 3686 execve(command->path, final_argv, accum_env);
5686391b
LP
3687 r = -errno;
3688
3689 if (exec_fd >= 0) {
3690 uint8_t hot = 0;
3691
3692 /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
3693 * that POLLHUP on it no longer means execve() succeeded. */
3694
3695 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3696 *exit_status = EXIT_EXEC;
3697 return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
3698 }
3699 }
12145637 3700
5686391b
LP
3701 if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3702 log_struct_errno(LOG_INFO, r,
12145637
LP
3703 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3704 LOG_UNIT_ID(unit),
3705 LOG_UNIT_INVOCATION_ID(unit),
3706 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3707 command->path),
a1230ff9 3708 "EXECUTABLE=%s", command->path);
12145637
LP
3709 return 0;
3710 }
3711
ff0af2a1 3712 *exit_status = EXIT_EXEC;
5686391b 3713 return log_unit_error_errno(unit, r, "Failed to execute command: %m");
d35fbf6b 3714}
81a2b7ce 3715
34cf6c43
YW
3716static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
3717static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[3]);
3718
f2341e0a
LP
3719int exec_spawn(Unit *unit,
3720 ExecCommand *command,
d35fbf6b
DM
3721 const ExecContext *context,
3722 const ExecParameters *params,
3723 ExecRuntime *runtime,
29206d46 3724 DynamicCreds *dcreds,
d35fbf6b 3725 pid_t *ret) {
8351ceae 3726
ee39ca20 3727 int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
78f93209 3728 _cleanup_free_ char *subcgroup_path = NULL;
d35fbf6b 3729 _cleanup_strv_free_ char **files_env = NULL;
da6053d0 3730 size_t n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1 3731 _cleanup_free_ char *line = NULL;
d35fbf6b 3732 pid_t pid;
8351ceae 3733
f2341e0a 3734 assert(unit);
d35fbf6b
DM
3735 assert(command);
3736 assert(context);
3737 assert(ret);
3738 assert(params);
25b583d7 3739 assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
4298d0b5 3740
d35fbf6b
DM
3741 if (context->std_input == EXEC_INPUT_SOCKET ||
3742 context->std_output == EXEC_OUTPUT_SOCKET ||
3743 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3744
4c47affc 3745 if (params->n_socket_fds > 1) {
f2341e0a 3746 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3747 return -EINVAL;
ff0af2a1 3748 }
eef65bf3 3749
4c47affc 3750 if (params->n_socket_fds == 0) {
488ab41c
AA
3751 log_unit_error(unit, "Got no socket.");
3752 return -EINVAL;
3753 }
3754
d35fbf6b
DM
3755 socket_fd = params->fds[0];
3756 } else {
3757 socket_fd = -1;
3758 fds = params->fds;
9b141911 3759 n_socket_fds = params->n_socket_fds;
25b583d7 3760 n_storage_fds = params->n_storage_fds;
d35fbf6b 3761 }
94f04347 3762
34cf6c43 3763 r = exec_context_named_iofds(context, params, named_iofds);
52c239d7
LB
3764 if (r < 0)
3765 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3766
f2341e0a 3767 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3768 if (r < 0)
f2341e0a 3769 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3770
ee39ca20 3771 line = exec_command_line(command->argv);
d35fbf6b
DM
3772 if (!line)
3773 return log_oom();
fab56fc5 3774
f2341e0a 3775 log_struct(LOG_DEBUG,
f2341e0a
LP
3776 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3777 "EXECUTABLE=%s", command->path,
ba360bb0 3778 LOG_UNIT_ID(unit),
a1230ff9 3779 LOG_UNIT_INVOCATION_ID(unit));
12145637 3780
78f93209
LP
3781 if (params->cgroup_path) {
3782 r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
3783 if (r < 0)
3784 return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
3785 if (r > 0) { /* We are using a child cgroup */
3786 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
3787 if (r < 0)
3788 return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
3789 }
3790 }
3791
d35fbf6b
DM
3792 pid = fork();
3793 if (pid < 0)
74129a12 3794 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3795
3796 if (pid == 0) {
12145637 3797 int exit_status = EXIT_SUCCESS;
ff0af2a1 3798
f2341e0a
LP
3799 r = exec_child(unit,
3800 command,
ff0af2a1
LP
3801 context,
3802 params,
3803 runtime,
29206d46 3804 dcreds,
ff0af2a1 3805 socket_fd,
52c239d7 3806 named_iofds,
4c47affc 3807 fds,
9b141911 3808 n_socket_fds,
25b583d7 3809 n_storage_fds,
ff0af2a1 3810 files_env,
00d9ef85 3811 unit->manager->user_lookup_fds[1],
12145637
LP
3812 &exit_status);
3813
a1230ff9 3814 if (r < 0)
12145637
LP
3815 log_struct_errno(LOG_ERR, r,
3816 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3817 LOG_UNIT_ID(unit),
3818 LOG_UNIT_INVOCATION_ID(unit),
3819 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3820 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3821 command->path),
a1230ff9 3822 "EXECUTABLE=%s", command->path);
4c2630eb 3823
ff0af2a1 3824 _exit(exit_status);
034c6ed7
LP
3825 }
3826
f2341e0a 3827 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3828
78f93209
LP
3829 /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
3830 * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
3831 * process will be killed too). */
3832 if (subcgroup_path)
3833 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
2da3263a 3834
b58b4116 3835 exec_status_start(&command->exec_status, pid);
9fb86720 3836
034c6ed7 3837 *ret = pid;
5cb5a6ff
LP
3838 return 0;
3839}
3840
034c6ed7 3841void exec_context_init(ExecContext *c) {
3536f49e
YW
3842 ExecDirectoryType i;
3843
034c6ed7
LP
3844 assert(c);
3845
4c12626c 3846 c->umask = 0022;
9eba9da4 3847 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3848 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3849 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3850 c->syslog_level_prefix = true;
353e12c2 3851 c->ignore_sigpipe = true;
3a43da28 3852 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3853 c->personality = PERSONALITY_INVALID;
72fd1768 3854 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3855 c->directories[i].mode = 0755;
a103496c 3856 c->capability_bounding_set = CAP_ALL;
aa9d574d
YW
3857 assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
3858 c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
d3070fbd 3859 c->log_level_max = -1;
034c6ed7
LP
3860}
3861
613b411c 3862void exec_context_done(ExecContext *c) {
3536f49e 3863 ExecDirectoryType i;
d3070fbd 3864 size_t l;
5cb5a6ff
LP
3865
3866 assert(c);
3867
6796073e
LP
3868 c->environment = strv_free(c->environment);
3869 c->environment_files = strv_free(c->environment_files);
b4c14404 3870 c->pass_environment = strv_free(c->pass_environment);
00819cc1 3871 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 3872
31ce987c 3873 rlimit_free_all(c->rlimit);
034c6ed7 3874
2038c3f5 3875 for (l = 0; l < 3; l++) {
52c239d7 3876 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
2038c3f5
LP
3877 c->stdio_file[l] = mfree(c->stdio_file[l]);
3878 }
52c239d7 3879
a1e58e8e
LP
3880 c->working_directory = mfree(c->working_directory);
3881 c->root_directory = mfree(c->root_directory);
915e6d16 3882 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3883 c->tty_path = mfree(c->tty_path);
3884 c->syslog_identifier = mfree(c->syslog_identifier);
3885 c->user = mfree(c->user);
3886 c->group = mfree(c->group);
034c6ed7 3887
6796073e 3888 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3889
a1e58e8e 3890 c->pam_name = mfree(c->pam_name);
5b6319dc 3891
2a624c36
AP
3892 c->read_only_paths = strv_free(c->read_only_paths);
3893 c->read_write_paths = strv_free(c->read_write_paths);
3894 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3895
d2d6c096 3896 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
8e06d57c
YW
3897 c->bind_mounts = NULL;
3898 c->n_bind_mounts = 0;
2abd4e38
YW
3899 temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
3900 c->temporary_filesystems = NULL;
3901 c->n_temporary_filesystems = 0;
d2d6c096 3902
da681e1b 3903 c->cpuset = cpu_set_mfree(c->cpuset);
86a3475b 3904
a1e58e8e
LP
3905 c->utmp_id = mfree(c->utmp_id);
3906 c->selinux_context = mfree(c->selinux_context);
3907 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3908 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3909
8cfa775f 3910 c->syscall_filter = hashmap_free(c->syscall_filter);
525d3cc7
LP
3911 c->syscall_archs = set_free(c->syscall_archs);
3912 c->address_families = set_free(c->address_families);
e66cf1a3 3913
72fd1768 3914 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3915 c->directories[i].paths = strv_free(c->directories[i].paths);
d3070fbd
LP
3916
3917 c->log_level_max = -1;
3918
3919 exec_context_free_log_extra_fields(c);
08f3be7a 3920
90fc172e
AZ
3921 c->log_rate_limit_interval_usec = 0;
3922 c->log_rate_limit_burst = 0;
3923
08f3be7a
LP
3924 c->stdin_data = mfree(c->stdin_data);
3925 c->stdin_data_size = 0;
a8d08f39
LP
3926
3927 c->network_namespace_path = mfree(c->network_namespace_path);
e66cf1a3
LP
3928}
3929
34cf6c43 3930int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
e66cf1a3
LP
3931 char **i;
3932
3933 assert(c);
3934
3935 if (!runtime_prefix)
3936 return 0;
3937
3536f49e 3938 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
3939 _cleanup_free_ char *p;
3940
7bc4bf4a 3941 p = path_join(runtime_prefix, *i);
e66cf1a3
LP
3942 if (!p)
3943 return -ENOMEM;
3944
7bc4bf4a
LP
3945 /* We execute this synchronously, since we need to be sure this is gone when we start the
3946 * service next. */
c6878637 3947 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
3948 }
3949
3950 return 0;
5cb5a6ff
LP
3951}
3952
34cf6c43 3953static void exec_command_done(ExecCommand *c) {
43d0fcbd
LP
3954 assert(c);
3955
a1e58e8e 3956 c->path = mfree(c->path);
6796073e 3957 c->argv = strv_free(c->argv);
43d0fcbd
LP
3958}
3959
da6053d0
LP
3960void exec_command_done_array(ExecCommand *c, size_t n) {
3961 size_t i;
43d0fcbd
LP
3962
3963 for (i = 0; i < n; i++)
3964 exec_command_done(c+i);
3965}
3966
f1acf85a 3967ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
3968 ExecCommand *i;
3969
3970 while ((i = c)) {
71fda00f 3971 LIST_REMOVE(command, c, i);
43d0fcbd 3972 exec_command_done(i);
5cb5a6ff
LP
3973 free(i);
3974 }
f1acf85a
ZJS
3975
3976 return NULL;
5cb5a6ff
LP
3977}
3978
da6053d0
LP
3979void exec_command_free_array(ExecCommand **c, size_t n) {
3980 size_t i;
034c6ed7 3981
f1acf85a
ZJS
3982 for (i = 0; i < n; i++)
3983 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
3984}
3985
6a1d4d9f
LP
3986void exec_command_reset_status_array(ExecCommand *c, size_t n) {
3987 size_t i;
3988
3989 for (i = 0; i < n; i++)
3990 exec_status_reset(&c[i].exec_status);
3991}
3992
3993void exec_command_reset_status_list_array(ExecCommand **c, size_t n) {
3994 size_t i;
3995
3996 for (i = 0; i < n; i++) {
3997 ExecCommand *z;
3998
3999 LIST_FOREACH(command, z, c[i])
4000 exec_status_reset(&z->exec_status);
4001 }
4002}
4003
039f0e70 4004typedef struct InvalidEnvInfo {
34cf6c43 4005 const Unit *unit;
039f0e70
LP
4006 const char *path;
4007} InvalidEnvInfo;
4008
4009static void invalid_env(const char *p, void *userdata) {
4010 InvalidEnvInfo *info = userdata;
4011
f2341e0a 4012 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
4013}
4014
52c239d7
LB
4015const char* exec_context_fdname(const ExecContext *c, int fd_index) {
4016 assert(c);
4017
4018 switch (fd_index) {
5073ff6b 4019
52c239d7
LB
4020 case STDIN_FILENO:
4021 if (c->std_input != EXEC_INPUT_NAMED_FD)
4022 return NULL;
5073ff6b 4023
52c239d7 4024 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
5073ff6b 4025
52c239d7
LB
4026 case STDOUT_FILENO:
4027 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
4028 return NULL;
5073ff6b 4029
52c239d7 4030 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
5073ff6b 4031
52c239d7
LB
4032 case STDERR_FILENO:
4033 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
4034 return NULL;
5073ff6b 4035
52c239d7 4036 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
5073ff6b 4037
52c239d7
LB
4038 default:
4039 return NULL;
4040 }
4041}
4042
3042bbeb 4043static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[static 3]) {
da6053d0 4044 size_t i, targets;
56fbd561 4045 const char* stdio_fdname[3];
da6053d0 4046 size_t n_fds;
52c239d7
LB
4047
4048 assert(c);
4049 assert(p);
4050
4051 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
4052 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
4053 (c->std_error == EXEC_OUTPUT_NAMED_FD);
4054
4055 for (i = 0; i < 3; i++)
4056 stdio_fdname[i] = exec_context_fdname(c, i);
4057
4c47affc
FB
4058 n_fds = p->n_storage_fds + p->n_socket_fds;
4059
4060 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
4061 if (named_iofds[STDIN_FILENO] < 0 &&
4062 c->std_input == EXEC_INPUT_NAMED_FD &&
4063 stdio_fdname[STDIN_FILENO] &&
4064 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
4065
52c239d7
LB
4066 named_iofds[STDIN_FILENO] = p->fds[i];
4067 targets--;
56fbd561
ZJS
4068
4069 } else if (named_iofds[STDOUT_FILENO] < 0 &&
4070 c->std_output == EXEC_OUTPUT_NAMED_FD &&
4071 stdio_fdname[STDOUT_FILENO] &&
4072 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
4073
52c239d7
LB
4074 named_iofds[STDOUT_FILENO] = p->fds[i];
4075 targets--;
56fbd561
ZJS
4076
4077 } else if (named_iofds[STDERR_FILENO] < 0 &&
4078 c->std_error == EXEC_OUTPUT_NAMED_FD &&
4079 stdio_fdname[STDERR_FILENO] &&
4080 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
4081
52c239d7
LB
4082 named_iofds[STDERR_FILENO] = p->fds[i];
4083 targets--;
4084 }
4085
56fbd561 4086 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
4087}
4088
34cf6c43 4089static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
4090 char **i, **r = NULL;
4091
4092 assert(c);
4093 assert(l);
4094
4095 STRV_FOREACH(i, c->environment_files) {
4096 char *fn;
52511fae
ZJS
4097 int k;
4098 unsigned n;
8c7be95e
LP
4099 bool ignore = false;
4100 char **p;
7fd1b19b 4101 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
4102
4103 fn = *i;
4104
4105 if (fn[0] == '-') {
4106 ignore = true;
313cefa1 4107 fn++;
8c7be95e
LP
4108 }
4109
4110 if (!path_is_absolute(fn)) {
8c7be95e
LP
4111 if (ignore)
4112 continue;
4113
4114 strv_free(r);
4115 return -EINVAL;
4116 }
4117
2bef10ab 4118 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
4119 k = safe_glob(fn, 0, &pglob);
4120 if (k < 0) {
2bef10ab
PL
4121 if (ignore)
4122 continue;
8c7be95e 4123
2bef10ab 4124 strv_free(r);
d8c92e8b 4125 return k;
2bef10ab 4126 }
8c7be95e 4127
d8c92e8b
ZJS
4128 /* When we don't match anything, -ENOENT should be returned */
4129 assert(pglob.gl_pathc > 0);
4130
4131 for (n = 0; n < pglob.gl_pathc; n++) {
aa8fbc74 4132 k = load_env_file(NULL, pglob.gl_pathv[n], &p);
2bef10ab
PL
4133 if (k < 0) {
4134 if (ignore)
4135 continue;
8c7be95e 4136
2bef10ab 4137 strv_free(r);
2bef10ab 4138 return k;
e9c1ea9d 4139 }
ebc05a09 4140 /* Log invalid environment variables with filename */
039f0e70
LP
4141 if (p) {
4142 InvalidEnvInfo info = {
f2341e0a 4143 .unit = unit,
039f0e70
LP
4144 .path = pglob.gl_pathv[n]
4145 };
4146
4147 p = strv_env_clean_with_callback(p, invalid_env, &info);
4148 }
8c7be95e 4149
234519ae 4150 if (!r)
2bef10ab
PL
4151 r = p;
4152 else {
4153 char **m;
8c7be95e 4154
2bef10ab
PL
4155 m = strv_env_merge(2, r, p);
4156 strv_free(r);
4157 strv_free(p);
c84a9488 4158 if (!m)
2bef10ab 4159 return -ENOMEM;
2bef10ab
PL
4160
4161 r = m;
4162 }
8c7be95e
LP
4163 }
4164 }
4165
4166 *l = r;
4167
4168 return 0;
4169}
4170
6ac8fdc9 4171static bool tty_may_match_dev_console(const char *tty) {
7b912648 4172 _cleanup_free_ char *resolved = NULL;
6ac8fdc9 4173
1e22b5cd
LP
4174 if (!tty)
4175 return true;
4176
a119ec7c 4177 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
4178
4179 /* trivial identity? */
4180 if (streq(tty, "console"))
4181 return true;
4182
7b912648
LP
4183 if (resolve_dev_console(&resolved) < 0)
4184 return true; /* if we could not resolve, assume it may */
6ac8fdc9
MS
4185
4186 /* "tty0" means the active VC, so it may be the same sometimes */
955f1c85 4187 return path_equal(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
4188}
4189
6c0ae739
LP
4190static bool exec_context_may_touch_tty(const ExecContext *ec) {
4191 assert(ec);
1e22b5cd 4192
6c0ae739 4193 return ec->tty_reset ||
1e22b5cd
LP
4194 ec->tty_vhangup ||
4195 ec->tty_vt_disallocate ||
6ac8fdc9
MS
4196 is_terminal_input(ec->std_input) ||
4197 is_terminal_output(ec->std_output) ||
6c0ae739
LP
4198 is_terminal_output(ec->std_error);
4199}
4200
4201bool exec_context_may_touch_console(const ExecContext *ec) {
4202
4203 return exec_context_may_touch_tty(ec) &&
1e22b5cd 4204 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
4205}
4206
15ae422b
LP
4207static void strv_fprintf(FILE *f, char **l) {
4208 char **g;
4209
4210 assert(f);
4211
4212 STRV_FOREACH(g, l)
4213 fprintf(f, " %s", *g);
4214}
4215
34cf6c43 4216void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
d3070fbd 4217 ExecDirectoryType dt;
c2bbd90b 4218 char **e, **d;
94f04347 4219 unsigned i;
add00535 4220 int r;
9eba9da4 4221
5cb5a6ff
LP
4222 assert(c);
4223 assert(f);
4224
4ad49000 4225 prefix = strempty(prefix);
5cb5a6ff
LP
4226
4227 fprintf(f,
94f04347
LP
4228 "%sUMask: %04o\n"
4229 "%sWorkingDirectory: %s\n"
451a074f 4230 "%sRootDirectory: %s\n"
15ae422b 4231 "%sNonBlocking: %s\n"
64747e2d 4232 "%sPrivateTmp: %s\n"
7f112f50 4233 "%sPrivateDevices: %s\n"
59eeb84b 4234 "%sProtectKernelTunables: %s\n"
e66a2f65 4235 "%sProtectKernelModules: %s\n"
59eeb84b 4236 "%sProtectControlGroups: %s\n"
d251207d
LP
4237 "%sPrivateNetwork: %s\n"
4238 "%sPrivateUsers: %s\n"
1b8689f9
LP
4239 "%sProtectHome: %s\n"
4240 "%sProtectSystem: %s\n"
5d997827 4241 "%sMountAPIVFS: %s\n"
f3e43635 4242 "%sIgnoreSIGPIPE: %s\n"
f4170c67 4243 "%sMemoryDenyWriteExecute: %s\n"
b1edf445 4244 "%sRestrictRealtime: %s\n"
f69567cb 4245 "%sRestrictSUIDSGID: %s\n"
aecd5ac6
TM
4246 "%sKeyringMode: %s\n"
4247 "%sProtectHostname: %s\n",
5cb5a6ff 4248 prefix, c->umask,
9eba9da4 4249 prefix, c->working_directory ? c->working_directory : "/",
451a074f 4250 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 4251 prefix, yes_no(c->non_blocking),
64747e2d 4252 prefix, yes_no(c->private_tmp),
7f112f50 4253 prefix, yes_no(c->private_devices),
59eeb84b 4254 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 4255 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 4256 prefix, yes_no(c->protect_control_groups),
d251207d
LP
4257 prefix, yes_no(c->private_network),
4258 prefix, yes_no(c->private_users),
1b8689f9
LP
4259 prefix, protect_home_to_string(c->protect_home),
4260 prefix, protect_system_to_string(c->protect_system),
5d997827 4261 prefix, yes_no(c->mount_apivfs),
f3e43635 4262 prefix, yes_no(c->ignore_sigpipe),
f4170c67 4263 prefix, yes_no(c->memory_deny_write_execute),
b1edf445 4264 prefix, yes_no(c->restrict_realtime),
f69567cb 4265 prefix, yes_no(c->restrict_suid_sgid),
aecd5ac6
TM
4266 prefix, exec_keyring_mode_to_string(c->keyring_mode),
4267 prefix, yes_no(c->protect_hostname));
fb33a393 4268
915e6d16
LP
4269 if (c->root_image)
4270 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
4271
8c7be95e
LP
4272 STRV_FOREACH(e, c->environment)
4273 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
4274
4275 STRV_FOREACH(e, c->environment_files)
4276 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 4277
b4c14404
FB
4278 STRV_FOREACH(e, c->pass_environment)
4279 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
4280
00819cc1
LP
4281 STRV_FOREACH(e, c->unset_environment)
4282 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
4283
53f47dfc
YW
4284 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
4285
72fd1768 4286 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
4287 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
4288
4289 STRV_FOREACH(d, c->directories[dt].paths)
4290 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
4291 }
c2bbd90b 4292
fb33a393
LP
4293 if (c->nice_set)
4294 fprintf(f,
4295 "%sNice: %i\n",
4296 prefix, c->nice);
4297
dd6c17b1 4298 if (c->oom_score_adjust_set)
fb33a393 4299 fprintf(f,
dd6c17b1
LP
4300 "%sOOMScoreAdjust: %i\n",
4301 prefix, c->oom_score_adjust);
9eba9da4 4302
94f04347 4303 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d 4304 if (c->rlimit[i]) {
4c3a2b84 4305 fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
3c11da9d 4306 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
4c3a2b84 4307 fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n",
3c11da9d
EV
4308 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
4309 }
94f04347 4310
f8b69d1d 4311 if (c->ioprio_set) {
1756a011 4312 _cleanup_free_ char *class_str = NULL;
f8b69d1d 4313
837df140
YW
4314 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
4315 if (r >= 0)
4316 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
4317
4318 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 4319 }
94f04347 4320
f8b69d1d 4321 if (c->cpu_sched_set) {
1756a011 4322 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 4323
837df140
YW
4324 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4325 if (r >= 0)
4326 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4327
94f04347 4328 fprintf(f,
38b48754
LP
4329 "%sCPUSchedulingPriority: %i\n"
4330 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
4331 prefix, c->cpu_sched_priority,
4332 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 4333 }
94f04347 4334
82c121a4 4335 if (c->cpuset) {
94f04347 4336 fprintf(f, "%sCPUAffinity:", prefix);
82c121a4
LP
4337 for (i = 0; i < c->cpuset_ncpus; i++)
4338 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
43a99a7a 4339 fprintf(f, " %u", i);
94f04347
LP
4340 fputs("\n", f);
4341 }
4342
3a43da28 4343 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 4344 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
4345
4346 fprintf(f,
80876c20
LP
4347 "%sStandardInput: %s\n"
4348 "%sStandardOutput: %s\n"
4349 "%sStandardError: %s\n",
4350 prefix, exec_input_to_string(c->std_input),
4351 prefix, exec_output_to_string(c->std_output),
4352 prefix, exec_output_to_string(c->std_error));
4353
befc4a80
LP
4354 if (c->std_input == EXEC_INPUT_NAMED_FD)
4355 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4356 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4357 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4358 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4359 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4360
4361 if (c->std_input == EXEC_INPUT_FILE)
4362 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4363 if (c->std_output == EXEC_OUTPUT_FILE)
4364 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
566b7d23
ZD
4365 if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
4366 fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
befc4a80
LP
4367 if (c->std_error == EXEC_OUTPUT_FILE)
4368 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
566b7d23
ZD
4369 if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
4370 fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
befc4a80 4371
80876c20
LP
4372 if (c->tty_path)
4373 fprintf(f,
6ea832a2
LP
4374 "%sTTYPath: %s\n"
4375 "%sTTYReset: %s\n"
4376 "%sTTYVHangup: %s\n"
4377 "%sTTYVTDisallocate: %s\n",
4378 prefix, c->tty_path,
4379 prefix, yes_no(c->tty_reset),
4380 prefix, yes_no(c->tty_vhangup),
4381 prefix, yes_no(c->tty_vt_disallocate));
94f04347 4382
9f6444eb
LP
4383 if (IN_SET(c->std_output,
4384 EXEC_OUTPUT_SYSLOG,
4385 EXEC_OUTPUT_KMSG,
4386 EXEC_OUTPUT_JOURNAL,
4387 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4388 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4389 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4390 IN_SET(c->std_error,
4391 EXEC_OUTPUT_SYSLOG,
4392 EXEC_OUTPUT_KMSG,
4393 EXEC_OUTPUT_JOURNAL,
4394 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4395 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4396 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 4397
5ce70e5b 4398 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 4399
837df140
YW
4400 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4401 if (r >= 0)
4402 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 4403
837df140
YW
4404 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4405 if (r >= 0)
4406 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 4407 }
94f04347 4408
d3070fbd
LP
4409 if (c->log_level_max >= 0) {
4410 _cleanup_free_ char *t = NULL;
4411
4412 (void) log_level_to_string_alloc(c->log_level_max, &t);
4413
4414 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4415 }
4416
90fc172e
AZ
4417 if (c->log_rate_limit_interval_usec > 0) {
4418 char buf_timespan[FORMAT_TIMESPAN_MAX];
4419
4420 fprintf(f,
4421 "%sLogRateLimitIntervalSec: %s\n",
4422 prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_rate_limit_interval_usec, USEC_PER_SEC));
4423 }
4424
4425 if (c->log_rate_limit_burst > 0)
4426 fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_rate_limit_burst);
4427
d3070fbd
LP
4428 if (c->n_log_extra_fields > 0) {
4429 size_t j;
4430
4431 for (j = 0; j < c->n_log_extra_fields; j++) {
4432 fprintf(f, "%sLogExtraFields: ", prefix);
4433 fwrite(c->log_extra_fields[j].iov_base,
4434 1, c->log_extra_fields[j].iov_len,
4435 f);
4436 fputc('\n', f);
4437 }
4438 }
4439
07d46372
YW
4440 if (c->secure_bits) {
4441 _cleanup_free_ char *str = NULL;
4442
4443 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4444 if (r >= 0)
4445 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4446 }
94f04347 4447
a103496c 4448 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4449 _cleanup_free_ char *str = NULL;
94f04347 4450
dd1f5bd0
YW
4451 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4452 if (r >= 0)
4453 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4454 }
4455
4456 if (c->capability_ambient_set != 0) {
dd1f5bd0 4457 _cleanup_free_ char *str = NULL;
755d4b67 4458
dd1f5bd0
YW
4459 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4460 if (r >= 0)
4461 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4462 }
4463
4464 if (c->user)
f2d3769a 4465 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4466 if (c->group)
f2d3769a 4467 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4468
29206d46
LP
4469 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4470
ac6e8be6 4471 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4472 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4473 strv_fprintf(f, c->supplementary_groups);
4474 fputs("\n", f);
4475 }
94f04347 4476
5b6319dc 4477 if (c->pam_name)
f2d3769a 4478 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4479
58629001 4480 if (!strv_isempty(c->read_write_paths)) {
2a624c36
AP
4481 fprintf(f, "%sReadWritePaths:", prefix);
4482 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4483 fputs("\n", f);
4484 }
4485
58629001 4486 if (!strv_isempty(c->read_only_paths)) {
2a624c36
AP
4487 fprintf(f, "%sReadOnlyPaths:", prefix);
4488 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4489 fputs("\n", f);
4490 }
94f04347 4491
58629001 4492 if (!strv_isempty(c->inaccessible_paths)) {
2a624c36
AP
4493 fprintf(f, "%sInaccessiblePaths:", prefix);
4494 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4495 fputs("\n", f);
4496 }
2e22afe9 4497
d2d6c096 4498 if (c->n_bind_mounts > 0)
4ca763a9
YW
4499 for (i = 0; i < c->n_bind_mounts; i++)
4500 fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
d2d6c096 4501 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4ca763a9 4502 c->bind_mounts[i].ignore_enoent ? "-": "",
d2d6c096
LP
4503 c->bind_mounts[i].source,
4504 c->bind_mounts[i].destination,
4505 c->bind_mounts[i].recursive ? "rbind" : "norbind");
d2d6c096 4506
2abd4e38
YW
4507 if (c->n_temporary_filesystems > 0)
4508 for (i = 0; i < c->n_temporary_filesystems; i++) {
4509 TemporaryFileSystem *t = c->temporary_filesystems + i;
4510
4511 fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
4512 t->path,
4513 isempty(t->options) ? "" : ":",
4514 strempty(t->options));
4515 }
4516
169c1bda
LP
4517 if (c->utmp_id)
4518 fprintf(f,
4519 "%sUtmpIdentifier: %s\n",
4520 prefix, c->utmp_id);
7b52a628
MS
4521
4522 if (c->selinux_context)
4523 fprintf(f,
5f8640fb
LP
4524 "%sSELinuxContext: %s%s\n",
4525 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4526
80c21aea
WC
4527 if (c->apparmor_profile)
4528 fprintf(f,
4529 "%sAppArmorProfile: %s%s\n",
4530 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4531
4532 if (c->smack_process_label)
4533 fprintf(f,
4534 "%sSmackProcessLabel: %s%s\n",
4535 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4536
050f7277 4537 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4538 fprintf(f,
4539 "%sPersonality: %s\n",
4540 prefix, strna(personality_to_string(c->personality)));
4541
78e864e5
TM
4542 fprintf(f,
4543 "%sLockPersonality: %s\n",
4544 prefix, yes_no(c->lock_personality));
4545
17df7223 4546 if (c->syscall_filter) {
349cc4a5 4547#if HAVE_SECCOMP
17df7223 4548 Iterator j;
8cfa775f 4549 void *id, *val;
17df7223 4550 bool first = true;
351a19b1 4551#endif
17df7223
LP
4552
4553 fprintf(f,
57183d11 4554 "%sSystemCallFilter: ",
17df7223
LP
4555 prefix);
4556
4557 if (!c->syscall_whitelist)
4558 fputc('~', f);
4559
349cc4a5 4560#if HAVE_SECCOMP
8cfa775f 4561 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
17df7223 4562 _cleanup_free_ char *name = NULL;
8cfa775f
YW
4563 const char *errno_name = NULL;
4564 int num = PTR_TO_INT(val);
17df7223
LP
4565
4566 if (first)
4567 first = false;
4568 else
4569 fputc(' ', f);
4570
57183d11 4571 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223 4572 fputs(strna(name), f);
8cfa775f
YW
4573
4574 if (num >= 0) {
4575 errno_name = errno_to_name(num);
4576 if (errno_name)
4577 fprintf(f, ":%s", errno_name);
4578 else
4579 fprintf(f, ":%d", num);
4580 }
17df7223 4581 }
351a19b1 4582#endif
17df7223
LP
4583
4584 fputc('\n', f);
4585 }
4586
57183d11 4587 if (c->syscall_archs) {
349cc4a5 4588#if HAVE_SECCOMP
57183d11
LP
4589 Iterator j;
4590 void *id;
4591#endif
4592
4593 fprintf(f,
4594 "%sSystemCallArchitectures:",
4595 prefix);
4596
349cc4a5 4597#if HAVE_SECCOMP
57183d11
LP
4598 SET_FOREACH(id, c->syscall_archs, j)
4599 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4600#endif
4601 fputc('\n', f);
4602 }
4603
add00535
LP
4604 if (exec_context_restrict_namespaces_set(c)) {
4605 _cleanup_free_ char *s = NULL;
4606
86c2a9f1 4607 r = namespace_flags_to_string(c->restrict_namespaces, &s);
add00535
LP
4608 if (r >= 0)
4609 fprintf(f, "%sRestrictNamespaces: %s\n",
4610 prefix, s);
4611 }
4612
a8d08f39
LP
4613 if (c->network_namespace_path)
4614 fprintf(f,
4615 "%sNetworkNamespacePath: %s\n",
4616 prefix, c->network_namespace_path);
4617
3df90f24
YW
4618 if (c->syscall_errno > 0) {
4619 const char *errno_name;
4620
4621 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4622
4623 errno_name = errno_to_name(c->syscall_errno);
4624 if (errno_name)
4625 fprintf(f, "%s\n", errno_name);
4626 else
4627 fprintf(f, "%d\n", c->syscall_errno);
4628 }
5cb5a6ff
LP
4629}
4630
34cf6c43 4631bool exec_context_maintains_privileges(const ExecContext *c) {
a931ad47
LP
4632 assert(c);
4633
61233823 4634 /* Returns true if the process forked off would run under
a931ad47
LP
4635 * an unchanged UID or as root. */
4636
4637 if (!c->user)
4638 return true;
4639
4640 if (streq(c->user, "root") || streq(c->user, "0"))
4641 return true;
4642
4643 return false;
4644}
4645
34cf6c43 4646int exec_context_get_effective_ioprio(const ExecContext *c) {
7f452159
LP
4647 int p;
4648
4649 assert(c);
4650
4651 if (c->ioprio_set)
4652 return c->ioprio;
4653
4654 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4655 if (p < 0)
4656 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4657
4658 return p;
4659}
4660
d3070fbd
LP
4661void exec_context_free_log_extra_fields(ExecContext *c) {
4662 size_t l;
4663
4664 assert(c);
4665
4666 for (l = 0; l < c->n_log_extra_fields; l++)
4667 free(c->log_extra_fields[l].iov_base);
4668 c->log_extra_fields = mfree(c->log_extra_fields);
4669 c->n_log_extra_fields = 0;
4670}
4671
6f765baf
LP
4672void exec_context_revert_tty(ExecContext *c) {
4673 int r;
4674
4675 assert(c);
4676
4677 /* First, reset the TTY (possibly kicking everybody else from the TTY) */
4678 exec_context_tty_reset(c, NULL);
4679
4680 /* And then undo what chown_terminal() did earlier. Note that we only do this if we have a path
4681 * configured. If the TTY was passed to us as file descriptor we assume the TTY is opened and managed
4682 * by whoever passed it to us and thus knows better when and how to chmod()/chown() it back. */
4683
4684 if (exec_context_may_touch_tty(c)) {
4685 const char *path;
4686
4687 path = exec_context_tty_path(c);
4688 if (path) {
4689 r = chmod_and_chown(path, TTY_MODE, 0, TTY_GID);
4690 if (r < 0 && r != -ENOENT)
4691 log_warning_errno(r, "Failed to reset TTY ownership/access mode of %s, ignoring: %m", path);
4692 }
4693 }
4694}
4695
b58b4116 4696void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 4697 assert(s);
5cb5a6ff 4698
2ed26ed0
LP
4699 *s = (ExecStatus) {
4700 .pid = pid,
4701 };
4702
b58b4116
LP
4703 dual_timestamp_get(&s->start_timestamp);
4704}
4705
34cf6c43 4706void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
4707 assert(s);
4708
2ed26ed0
LP
4709 if (s->pid != pid) {
4710 *s = (ExecStatus) {
4711 .pid = pid,
4712 };
4713 }
b58b4116 4714
63983207 4715 dual_timestamp_get(&s->exit_timestamp);
9fb86720 4716
034c6ed7
LP
4717 s->code = code;
4718 s->status = status;
169c1bda 4719
6f765baf
LP
4720 if (context && context->utmp_id)
4721 (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
9fb86720
LP
4722}
4723
6a1d4d9f
LP
4724void exec_status_reset(ExecStatus *s) {
4725 assert(s);
4726
4727 *s = (ExecStatus) {};
4728}
4729
34cf6c43 4730void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
9fb86720
LP
4731 char buf[FORMAT_TIMESTAMP_MAX];
4732
4733 assert(s);
4734 assert(f);
4735
9fb86720
LP
4736 if (s->pid <= 0)
4737 return;
4738
4c940960
LP
4739 prefix = strempty(prefix);
4740
9fb86720 4741 fprintf(f,
ccd06097
ZJS
4742 "%sPID: "PID_FMT"\n",
4743 prefix, s->pid);
9fb86720 4744
af9d16e1 4745 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
4746 fprintf(f,
4747 "%sStart Timestamp: %s\n",
63983207 4748 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 4749
af9d16e1 4750 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
4751 fprintf(f,
4752 "%sExit Timestamp: %s\n"
4753 "%sExit Code: %s\n"
4754 "%sExit Status: %i\n",
63983207 4755 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
4756 prefix, sigchld_code_to_string(s->code),
4757 prefix, s->status);
5cb5a6ff 4758}
44d8db9e 4759
34cf6c43 4760static char *exec_command_line(char **argv) {
44d8db9e
LP
4761 size_t k;
4762 char *n, *p, **a;
4763 bool first = true;
4764
9e2f7c11 4765 assert(argv);
44d8db9e 4766
9164977d 4767 k = 1;
9e2f7c11 4768 STRV_FOREACH(a, argv)
44d8db9e
LP
4769 k += strlen(*a)+3;
4770
5cd9cd35
LP
4771 n = new(char, k);
4772 if (!n)
44d8db9e
LP
4773 return NULL;
4774
4775 p = n;
9e2f7c11 4776 STRV_FOREACH(a, argv) {
44d8db9e
LP
4777
4778 if (!first)
4779 *(p++) = ' ';
4780 else
4781 first = false;
4782
4783 if (strpbrk(*a, WHITESPACE)) {
4784 *(p++) = '\'';
4785 p = stpcpy(p, *a);
4786 *(p++) = '\'';
4787 } else
4788 p = stpcpy(p, *a);
4789
4790 }
4791
9164977d
LP
4792 *p = 0;
4793
44d8db9e
LP
4794 /* FIXME: this doesn't really handle arguments that have
4795 * spaces and ticks in them */
4796
4797 return n;
4798}
4799
34cf6c43 4800static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 4801 _cleanup_free_ char *cmd = NULL;
4c940960 4802 const char *prefix2;
44d8db9e
LP
4803
4804 assert(c);
4805 assert(f);
4806
4c940960 4807 prefix = strempty(prefix);
63c372cb 4808 prefix2 = strjoina(prefix, "\t");
44d8db9e 4809
9e2f7c11 4810 cmd = exec_command_line(c->argv);
44d8db9e
LP
4811 fprintf(f,
4812 "%sCommand Line: %s\n",
4813 prefix, cmd ? cmd : strerror(ENOMEM));
4814
9fb86720 4815 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
4816}
4817
4818void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4819 assert(f);
4820
4c940960 4821 prefix = strempty(prefix);
44d8db9e
LP
4822
4823 LIST_FOREACH(command, c, c)
4824 exec_command_dump(c, f, prefix);
4825}
94f04347 4826
a6a80b4f
LP
4827void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4828 ExecCommand *end;
4829
4830 assert(l);
4831 assert(e);
4832
4833 if (*l) {
35b8ca3a 4834 /* It's kind of important, that we keep the order here */
71fda00f
LP
4835 LIST_FIND_TAIL(command, *l, end);
4836 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
4837 } else
4838 *l = e;
4839}
4840
26fd040d
LP
4841int exec_command_set(ExecCommand *c, const char *path, ...) {
4842 va_list ap;
4843 char **l, *p;
4844
4845 assert(c);
4846 assert(path);
4847
4848 va_start(ap, path);
4849 l = strv_new_ap(path, ap);
4850 va_end(ap);
4851
4852 if (!l)
4853 return -ENOMEM;
4854
250a918d
LP
4855 p = strdup(path);
4856 if (!p) {
26fd040d
LP
4857 strv_free(l);
4858 return -ENOMEM;
4859 }
4860
6897dfe8 4861 free_and_replace(c->path, p);
26fd040d 4862
130d3d22 4863 return strv_free_and_replace(c->argv, l);
26fd040d
LP
4864}
4865
86b23b07 4866int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 4867 _cleanup_strv_free_ char **l = NULL;
86b23b07 4868 va_list ap;
86b23b07
JS
4869 int r;
4870
4871 assert(c);
4872 assert(path);
4873
4874 va_start(ap, path);
4875 l = strv_new_ap(path, ap);
4876 va_end(ap);
4877
4878 if (!l)
4879 return -ENOMEM;
4880
e287086b 4881 r = strv_extend_strv(&c->argv, l, false);
e63ff941 4882 if (r < 0)
86b23b07 4883 return r;
86b23b07
JS
4884
4885 return 0;
4886}
4887
e8a565cb
YW
4888static void *remove_tmpdir_thread(void *p) {
4889 _cleanup_free_ char *path = p;
86b23b07 4890
e8a565cb
YW
4891 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
4892 return NULL;
4893}
4894
4895static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
4896 int r;
4897
4898 if (!rt)
4899 return NULL;
4900
4901 if (rt->manager)
4902 (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
4903
4904 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
4905 if (destroy && rt->tmp_dir) {
4906 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
4907
4908 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4909 if (r < 0) {
4910 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
4911 free(rt->tmp_dir);
4912 }
4913
4914 rt->tmp_dir = NULL;
4915 }
613b411c 4916
e8a565cb
YW
4917 if (destroy && rt->var_tmp_dir) {
4918 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
4919
4920 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4921 if (r < 0) {
4922 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
4923 free(rt->var_tmp_dir);
4924 }
4925
4926 rt->var_tmp_dir = NULL;
4927 }
4928
4929 rt->id = mfree(rt->id);
4930 rt->tmp_dir = mfree(rt->tmp_dir);
4931 rt->var_tmp_dir = mfree(rt->var_tmp_dir);
4932 safe_close_pair(rt->netns_storage_socket);
4933 return mfree(rt);
4934}
4935
4936static void exec_runtime_freep(ExecRuntime **rt) {
da6bc6ed 4937 (void) exec_runtime_free(*rt, false);
e8a565cb
YW
4938}
4939
8e8009dc
LP
4940static int exec_runtime_allocate(ExecRuntime **ret) {
4941 ExecRuntime *n;
613b411c 4942
8e8009dc 4943 assert(ret);
613b411c 4944
8e8009dc
LP
4945 n = new(ExecRuntime, 1);
4946 if (!n)
613b411c
LP
4947 return -ENOMEM;
4948
8e8009dc
LP
4949 *n = (ExecRuntime) {
4950 .netns_storage_socket = { -1, -1 },
4951 };
4952
4953 *ret = n;
613b411c
LP
4954 return 0;
4955}
4956
e8a565cb
YW
4957static int exec_runtime_add(
4958 Manager *m,
4959 const char *id,
4960 const char *tmp_dir,
4961 const char *var_tmp_dir,
4962 const int netns_storage_socket[2],
4963 ExecRuntime **ret) {
4964
4965 _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
613b411c
LP
4966 int r;
4967
e8a565cb 4968 assert(m);
613b411c
LP
4969 assert(id);
4970
e8a565cb
YW
4971 r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
4972 if (r < 0)
4973 return r;
613b411c 4974
e8a565cb 4975 r = exec_runtime_allocate(&rt);
613b411c
LP
4976 if (r < 0)
4977 return r;
4978
e8a565cb
YW
4979 rt->id = strdup(id);
4980 if (!rt->id)
4981 return -ENOMEM;
4982
4983 if (tmp_dir) {
4984 rt->tmp_dir = strdup(tmp_dir);
4985 if (!rt->tmp_dir)
4986 return -ENOMEM;
4987
4988 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
4989 assert(var_tmp_dir);
4990 rt->var_tmp_dir = strdup(var_tmp_dir);
4991 if (!rt->var_tmp_dir)
4992 return -ENOMEM;
4993 }
4994
4995 if (netns_storage_socket) {
4996 rt->netns_storage_socket[0] = netns_storage_socket[0];
4997 rt->netns_storage_socket[1] = netns_storage_socket[1];
613b411c
LP
4998 }
4999
e8a565cb
YW
5000 r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
5001 if (r < 0)
5002 return r;
5003
5004 rt->manager = m;
5005
5006 if (ret)
5007 *ret = rt;
5008
5009 /* do not remove created ExecRuntime object when the operation succeeds. */
5010 rt = NULL;
5011 return 0;
5012}
5013
5014static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
5015 _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
2fa3742d 5016 _cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
e8a565cb
YW
5017 int r;
5018
5019 assert(m);
5020 assert(c);
5021 assert(id);
5022
5023 /* It is not necessary to create ExecRuntime object. */
a8d08f39 5024 if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
e8a565cb
YW
5025 return 0;
5026
5027 if (c->private_tmp) {
5028 r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
613b411c
LP
5029 if (r < 0)
5030 return r;
5031 }
5032
a8d08f39 5033 if (c->private_network || c->network_namespace_path) {
e8a565cb
YW
5034 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
5035 return -errno;
5036 }
5037
5038 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
5039 if (r < 0)
5040 return r;
5041
5042 /* Avoid cleanup */
2fa3742d 5043 netns_storage_socket[0] = netns_storage_socket[1] = -1;
613b411c
LP
5044 return 1;
5045}
5046
e8a565cb
YW
5047int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
5048 ExecRuntime *rt;
5049 int r;
613b411c 5050
e8a565cb
YW
5051 assert(m);
5052 assert(id);
5053 assert(ret);
5054
5055 rt = hashmap_get(m->exec_runtime_by_id, id);
5056 if (rt)
5057 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
5058 goto ref;
5059
5060 if (!create)
5061 return 0;
5062
5063 /* If not found, then create a new object. */
5064 r = exec_runtime_make(m, c, id, &rt);
5065 if (r <= 0)
5066 /* When r == 0, it is not necessary to create ExecRuntime object. */
5067 return r;
613b411c 5068
e8a565cb
YW
5069ref:
5070 /* increment reference counter. */
5071 rt->n_ref++;
5072 *ret = rt;
5073 return 1;
5074}
613b411c 5075
e8a565cb
YW
5076ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
5077 if (!rt)
613b411c
LP
5078 return NULL;
5079
e8a565cb 5080 assert(rt->n_ref > 0);
613b411c 5081
e8a565cb
YW
5082 rt->n_ref--;
5083 if (rt->n_ref > 0)
f2341e0a
LP
5084 return NULL;
5085
e8a565cb 5086 return exec_runtime_free(rt, destroy);
613b411c
LP
5087}
5088
e8a565cb
YW
5089int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
5090 ExecRuntime *rt;
5091 Iterator i;
5092
5093 assert(m);
613b411c
LP
5094 assert(f);
5095 assert(fds);
5096
e8a565cb
YW
5097 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5098 fprintf(f, "exec-runtime=%s", rt->id);
613b411c 5099
e8a565cb
YW
5100 if (rt->tmp_dir)
5101 fprintf(f, " tmp-dir=%s", rt->tmp_dir);
613b411c 5102
e8a565cb
YW
5103 if (rt->var_tmp_dir)
5104 fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
613b411c 5105
e8a565cb
YW
5106 if (rt->netns_storage_socket[0] >= 0) {
5107 int copy;
613b411c 5108
e8a565cb
YW
5109 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
5110 if (copy < 0)
5111 return copy;
613b411c 5112
e8a565cb
YW
5113 fprintf(f, " netns-socket-0=%i", copy);
5114 }
613b411c 5115
e8a565cb
YW
5116 if (rt->netns_storage_socket[1] >= 0) {
5117 int copy;
613b411c 5118
e8a565cb
YW
5119 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
5120 if (copy < 0)
5121 return copy;
613b411c 5122
e8a565cb
YW
5123 fprintf(f, " netns-socket-1=%i", copy);
5124 }
5125
5126 fputc('\n', f);
613b411c
LP
5127 }
5128
5129 return 0;
5130}
5131
e8a565cb
YW
5132int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
5133 _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
5134 ExecRuntime *rt;
613b411c
LP
5135 int r;
5136
e8a565cb
YW
5137 /* This is for the migration from old (v237 or earlier) deserialization text.
5138 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
5139 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
5140 * so or not from the serialized text, then we always creates a new object owned by this. */
5141
5142 assert(u);
613b411c
LP
5143 assert(key);
5144 assert(value);
5145
e8a565cb
YW
5146 /* Manager manages ExecRuntime objects by the unit id.
5147 * So, we omit the serialized text when the unit does not have id (yet?)... */
5148 if (isempty(u->id)) {
5149 log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
5150 return 0;
5151 }
613b411c 5152
e8a565cb
YW
5153 r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
5154 if (r < 0) {
5155 log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
5156 return 0;
5157 }
5158
5159 rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
5160 if (!rt) {
5161 r = exec_runtime_allocate(&rt_create);
613b411c 5162 if (r < 0)
f2341e0a 5163 return log_oom();
613b411c 5164
e8a565cb
YW
5165 rt_create->id = strdup(u->id);
5166 if (!rt_create->id)
5167 return log_oom();
5168
5169 rt = rt_create;
5170 }
5171
5172 if (streq(key, "tmp-dir")) {
5173 char *copy;
5174
613b411c
LP
5175 copy = strdup(value);
5176 if (!copy)
5177 return log_oom();
5178
e8a565cb 5179 free_and_replace(rt->tmp_dir, copy);
613b411c
LP
5180
5181 } else if (streq(key, "var-tmp-dir")) {
5182 char *copy;
5183
613b411c
LP
5184 copy = strdup(value);
5185 if (!copy)
5186 return log_oom();
5187
e8a565cb 5188 free_and_replace(rt->var_tmp_dir, copy);
613b411c
LP
5189
5190 } else if (streq(key, "netns-socket-0")) {
5191 int fd;
5192
e8a565cb 5193 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5194 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5195 return 0;
613b411c 5196 }
e8a565cb
YW
5197
5198 safe_close(rt->netns_storage_socket[0]);
5199 rt->netns_storage_socket[0] = fdset_remove(fds, fd);
5200
613b411c
LP
5201 } else if (streq(key, "netns-socket-1")) {
5202 int fd;
5203
e8a565cb 5204 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
f2341e0a 5205 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
e8a565cb 5206 return 0;
613b411c 5207 }
e8a565cb
YW
5208
5209 safe_close(rt->netns_storage_socket[1]);
5210 rt->netns_storage_socket[1] = fdset_remove(fds, fd);
613b411c
LP
5211 } else
5212 return 0;
5213
e8a565cb
YW
5214 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
5215 if (rt_create) {
5216 r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
5217 if (r < 0) {
3fe91079 5218 log_unit_debug_errno(u, r, "Failed to put runtime parameter to manager's storage: %m");
e8a565cb
YW
5219 return 0;
5220 }
613b411c 5221
e8a565cb 5222 rt_create->manager = u->manager;
613b411c 5223
e8a565cb
YW
5224 /* Avoid cleanup */
5225 rt_create = NULL;
5226 }
98b47d54 5227
e8a565cb
YW
5228 return 1;
5229}
613b411c 5230
e8a565cb
YW
5231void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
5232 char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
5233 int r, fd0 = -1, fd1 = -1;
5234 const char *p, *v = value;
5235 size_t n;
613b411c 5236
e8a565cb
YW
5237 assert(m);
5238 assert(value);
5239 assert(fds);
98b47d54 5240
e8a565cb
YW
5241 n = strcspn(v, " ");
5242 id = strndupa(v, n);
5243 if (v[n] != ' ')
5244 goto finalize;
5245 p = v + n + 1;
5246
5247 v = startswith(p, "tmp-dir=");
5248 if (v) {
5249 n = strcspn(v, " ");
5250 tmp_dir = strndupa(v, n);
5251 if (v[n] != ' ')
5252 goto finalize;
5253 p = v + n + 1;
5254 }
5255
5256 v = startswith(p, "var-tmp-dir=");
5257 if (v) {
5258 n = strcspn(v, " ");
5259 var_tmp_dir = strndupa(v, n);
5260 if (v[n] != ' ')
5261 goto finalize;
5262 p = v + n + 1;
5263 }
5264
5265 v = startswith(p, "netns-socket-0=");
5266 if (v) {
5267 char *buf;
5268
5269 n = strcspn(v, " ");
5270 buf = strndupa(v, n);
5271 if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
5272 log_debug("Unable to process exec-runtime netns fd specification.");
5273 return;
98b47d54 5274 }
e8a565cb
YW
5275 fd0 = fdset_remove(fds, fd0);
5276 if (v[n] != ' ')
5277 goto finalize;
5278 p = v + n + 1;
613b411c
LP
5279 }
5280
e8a565cb
YW
5281 v = startswith(p, "netns-socket-1=");
5282 if (v) {
5283 char *buf;
98b47d54 5284
e8a565cb
YW
5285 n = strcspn(v, " ");
5286 buf = strndupa(v, n);
5287 if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
5288 log_debug("Unable to process exec-runtime netns fd specification.");
5289 return;
98b47d54 5290 }
e8a565cb
YW
5291 fd1 = fdset_remove(fds, fd1);
5292 }
98b47d54 5293
e8a565cb
YW
5294finalize:
5295
5296 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
7d853ca6 5297 if (r < 0)
e8a565cb 5298 log_debug_errno(r, "Failed to add exec-runtime: %m");
e8a565cb 5299}
613b411c 5300
e8a565cb
YW
5301void exec_runtime_vacuum(Manager *m) {
5302 ExecRuntime *rt;
5303 Iterator i;
5304
5305 assert(m);
5306
5307 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
5308
5309 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5310 if (rt->n_ref > 0)
5311 continue;
5312
5313 (void) exec_runtime_free(rt, false);
5314 }
613b411c
LP
5315}
5316
b9c04eaf
YW
5317void exec_params_clear(ExecParameters *p) {
5318 if (!p)
5319 return;
5320
5321 strv_free(p->environment);
5322}
5323
80876c20
LP
5324static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
5325 [EXEC_INPUT_NULL] = "null",
5326 [EXEC_INPUT_TTY] = "tty",
5327 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 5328 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
5329 [EXEC_INPUT_SOCKET] = "socket",
5330 [EXEC_INPUT_NAMED_FD] = "fd",
08f3be7a 5331 [EXEC_INPUT_DATA] = "data",
2038c3f5 5332 [EXEC_INPUT_FILE] = "file",
80876c20
LP
5333};
5334
8a0867d6
LP
5335DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
5336
94f04347 5337static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 5338 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 5339 [EXEC_OUTPUT_NULL] = "null",
80876c20 5340 [EXEC_OUTPUT_TTY] = "tty",
94f04347 5341 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 5342 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 5343 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 5344 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
5345 [EXEC_OUTPUT_JOURNAL] = "journal",
5346 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
5347 [EXEC_OUTPUT_SOCKET] = "socket",
5348 [EXEC_OUTPUT_NAMED_FD] = "fd",
2038c3f5 5349 [EXEC_OUTPUT_FILE] = "file",
566b7d23 5350 [EXEC_OUTPUT_FILE_APPEND] = "append",
94f04347
LP
5351};
5352
5353DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
5354
5355static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
5356 [EXEC_UTMP_INIT] = "init",
5357 [EXEC_UTMP_LOGIN] = "login",
5358 [EXEC_UTMP_USER] = "user",
5359};
5360
5361DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
5362
5363static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
5364 [EXEC_PRESERVE_NO] = "no",
5365 [EXEC_PRESERVE_YES] = "yes",
5366 [EXEC_PRESERVE_RESTART] = "restart",
5367};
5368
5369DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 5370
72fd1768 5371static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
5372 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
5373 [EXEC_DIRECTORY_STATE] = "StateDirectory",
5374 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
5375 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
5376 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
5377};
5378
5379DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445 5380
fb2042dd
YW
5381static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5382 [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
5383 [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
5384 [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
5385 [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
5386 [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
5387};
5388
5389DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType);
5390
b1edf445
LP
5391static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
5392 [EXEC_KEYRING_INHERIT] = "inherit",
5393 [EXEC_KEYRING_PRIVATE] = "private",
5394 [EXEC_KEYRING_SHARED] = "shared",
5395};
5396
5397DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);