]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
core: add support for StandardInputFile= and friends
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
a7334b09
LP
1/***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
a7334b09
LP
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 14 Lesser General Public License for more details.
a7334b09 15
5430f7f2 16 You should have received a copy of the GNU Lesser General Public License
a7334b09
LP
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
034c6ed7
LP
20#include <errno.h>
21#include <fcntl.h>
8dd4c05b
LP
22#include <glob.h>
23#include <grp.h>
24#include <poll.h>
309bff19 25#include <signal.h>
8dd4c05b 26#include <string.h>
19c0b0b9 27#include <sys/capability.h>
d251207d 28#include <sys/eventfd.h>
f3e43635 29#include <sys/mman.h>
8dd4c05b 30#include <sys/personality.h>
94f04347 31#include <sys/prctl.h>
d2ffa389 32#include <sys/shm.h>
8dd4c05b 33#include <sys/socket.h>
451a074f 34#include <sys/stat.h>
d2ffa389 35#include <sys/types.h>
8dd4c05b
LP
36#include <sys/un.h>
37#include <unistd.h>
023a4f67 38#include <utmpx.h>
5cb5a6ff 39
349cc4a5 40#if HAVE_PAM
5b6319dc
LP
41#include <security/pam_appl.h>
42#endif
43
349cc4a5 44#if HAVE_SELINUX
7b52a628
MS
45#include <selinux/selinux.h>
46#endif
47
349cc4a5 48#if HAVE_SECCOMP
17df7223
LP
49#include <seccomp.h>
50#endif
51
349cc4a5 52#if HAVE_APPARMOR
eef65bf3
MS
53#include <sys/apparmor.h>
54#endif
55
24882e06 56#include "sd-messages.h"
8dd4c05b
LP
57
58#include "af-list.h"
b5efdb8a 59#include "alloc-util.h"
349cc4a5 60#if HAVE_APPARMOR
3ffd4af2
LP
61#include "apparmor-util.h"
62#endif
8dd4c05b
LP
63#include "async.h"
64#include "barrier.h"
8dd4c05b 65#include "cap-list.h"
430f0182 66#include "capability-util.h"
a1164ae3 67#include "chown-recursive.h"
f6a6225e 68#include "def.h"
4d1a6904 69#include "env-util.h"
17df7223 70#include "errno-list.h"
3ffd4af2 71#include "execute.h"
8dd4c05b 72#include "exit-status.h"
3ffd4af2 73#include "fd-util.h"
8dd4c05b 74#include "fileio.h"
f97b34a6 75#include "format-util.h"
f4f15635 76#include "fs-util.h"
7d50b32a 77#include "glob-util.h"
c004493c 78#include "io-util.h"
8dd4c05b 79#include "ioprio.h"
a1164ae3 80#include "label.h"
8dd4c05b
LP
81#include "log.h"
82#include "macro.h"
83#include "missing.h"
84#include "mkdir.h"
85#include "namespace.h"
6bedfcbb 86#include "parse-util.h"
8dd4c05b 87#include "path-util.h"
0b452006 88#include "process-util.h"
78f22b97 89#include "rlimit-util.h"
8dd4c05b 90#include "rm-rf.h"
349cc4a5 91#if HAVE_SECCOMP
3ffd4af2
LP
92#include "seccomp-util.h"
93#endif
8dd4c05b 94#include "securebits.h"
07d46372 95#include "securebits-util.h"
8dd4c05b 96#include "selinux-util.h"
24882e06 97#include "signal-util.h"
8dd4c05b 98#include "smack-util.h"
fd63e712 99#include "special.h"
8b43440b 100#include "string-table.h"
07630cea 101#include "string-util.h"
8dd4c05b 102#include "strv.h"
7ccbd1ae 103#include "syslog-util.h"
8dd4c05b
LP
104#include "terminal-util.h"
105#include "unit.h"
b1d4f8e1 106#include "user-util.h"
8dd4c05b
LP
107#include "util.h"
108#include "utmp-wtmp.h"
5cb5a6ff 109
e056b01d 110#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 111#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 112
02a51aba
LP
113/* This assumes there is a 'tty' group */
114#define TTY_MODE 0620
115
531dca78
LP
116#define SNDBUF_SIZE (8*1024*1024)
117
034c6ed7
LP
118static int shift_fds(int fds[], unsigned n_fds) {
119 int start, restart_from;
120
121 if (n_fds <= 0)
122 return 0;
123
a0d40ac5
LP
124 /* Modifies the fds array! (sorts it) */
125
034c6ed7
LP
126 assert(fds);
127
128 start = 0;
129 for (;;) {
130 int i;
131
132 restart_from = -1;
133
134 for (i = start; i < (int) n_fds; i++) {
135 int nfd;
136
137 /* Already at right index? */
138 if (fds[i] == i+3)
139 continue;
140
3cc2aff1
LP
141 nfd = fcntl(fds[i], F_DUPFD, i + 3);
142 if (nfd < 0)
034c6ed7
LP
143 return -errno;
144
03e334a1 145 safe_close(fds[i]);
034c6ed7
LP
146 fds[i] = nfd;
147
148 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 149 * let's remember that and try again from here */
034c6ed7
LP
150 if (nfd != i+3 && restart_from < 0)
151 restart_from = i;
152 }
153
154 if (restart_from < 0)
155 break;
156
157 start = restart_from;
158 }
159
160 return 0;
161}
162
4c47affc
FB
163static int flags_fds(const int fds[], unsigned n_storage_fds, unsigned n_socket_fds, bool nonblock) {
164 unsigned i, n_fds;
e2c76839 165 int r;
47a71eed 166
4c47affc 167 n_fds = n_storage_fds + n_socket_fds;
47a71eed
LP
168 if (n_fds <= 0)
169 return 0;
170
171 assert(fds);
172
9b141911
FB
173 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
174 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
175
176 for (i = 0; i < n_fds; i++) {
47a71eed 177
9b141911
FB
178 if (i < n_socket_fds) {
179 r = fd_nonblock(fds[i], nonblock);
180 if (r < 0)
181 return r;
182 }
47a71eed 183
451a074f
LP
184 /* We unconditionally drop FD_CLOEXEC from the fds,
185 * since after all we want to pass these fds to our
186 * children */
47a71eed 187
3cc2aff1
LP
188 r = fd_cloexec(fds[i], false);
189 if (r < 0)
e2c76839 190 return r;
47a71eed
LP
191 }
192
193 return 0;
194}
195
1e22b5cd 196static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
197 assert(context);
198
1e22b5cd
LP
199 if (context->stdio_as_fds)
200 return NULL;
201
80876c20
LP
202 if (context->tty_path)
203 return context->tty_path;
204
205 return "/dev/console";
206}
207
1e22b5cd
LP
208static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
209 const char *path;
210
6ea832a2
LP
211 assert(context);
212
1e22b5cd 213 path = exec_context_tty_path(context);
6ea832a2 214
1e22b5cd
LP
215 if (context->tty_vhangup) {
216 if (p && p->stdin_fd >= 0)
217 (void) terminal_vhangup_fd(p->stdin_fd);
218 else if (path)
219 (void) terminal_vhangup(path);
220 }
6ea832a2 221
1e22b5cd
LP
222 if (context->tty_reset) {
223 if (p && p->stdin_fd >= 0)
224 (void) reset_terminal_fd(p->stdin_fd, true);
225 else if (path)
226 (void) reset_terminal(path);
227 }
228
229 if (context->tty_vt_disallocate && path)
230 (void) vt_disallocate(path);
6ea832a2
LP
231}
232
6af760f3
LP
233static bool is_terminal_input(ExecInput i) {
234 return IN_SET(i,
235 EXEC_INPUT_TTY,
236 EXEC_INPUT_TTY_FORCE,
237 EXEC_INPUT_TTY_FAIL);
238}
239
3a1286b6 240static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
241 return IN_SET(o,
242 EXEC_OUTPUT_TTY,
243 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
244 EXEC_OUTPUT_KMSG_AND_CONSOLE,
245 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
246}
247
aac8c0c3
LP
248static bool is_syslog_output(ExecOutput o) {
249 return IN_SET(o,
250 EXEC_OUTPUT_SYSLOG,
251 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
252}
253
254static bool is_kmsg_output(ExecOutput o) {
255 return IN_SET(o,
256 EXEC_OUTPUT_KMSG,
257 EXEC_OUTPUT_KMSG_AND_CONSOLE);
258}
259
6af760f3
LP
260static bool exec_context_needs_term(const ExecContext *c) {
261 assert(c);
262
263 /* Return true if the execution context suggests we should set $TERM to something useful. */
264
265 if (is_terminal_input(c->std_input))
266 return true;
267
268 if (is_terminal_output(c->std_output))
269 return true;
270
271 if (is_terminal_output(c->std_error))
272 return true;
273
274 return !!c->tty_path;
3a1286b6
MS
275}
276
80876c20 277static int open_null_as(int flags, int nfd) {
046a82c1 278 int fd;
071830ff 279
80876c20 280 assert(nfd >= 0);
071830ff 281
613b411c
LP
282 fd = open("/dev/null", flags|O_NOCTTY);
283 if (fd < 0)
071830ff
LP
284 return -errno;
285
046a82c1 286 return move_fd(fd, nfd, false);
071830ff
LP
287}
288
524daa8c 289static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 290 static const union sockaddr_union sa = {
b92bea5d
ZJS
291 .un.sun_family = AF_UNIX,
292 .un.sun_path = "/run/systemd/journal/stdout",
293 };
524daa8c
ZJS
294 uid_t olduid = UID_INVALID;
295 gid_t oldgid = GID_INVALID;
296 int r;
297
cad93f29 298 if (gid_is_valid(gid)) {
524daa8c
ZJS
299 oldgid = getgid();
300
92a17af9 301 if (setegid(gid) < 0)
524daa8c
ZJS
302 return -errno;
303 }
304
cad93f29 305 if (uid_is_valid(uid)) {
524daa8c
ZJS
306 olduid = getuid();
307
92a17af9 308 if (seteuid(uid) < 0) {
524daa8c
ZJS
309 r = -errno;
310 goto restore_gid;
311 }
312 }
313
92a17af9 314 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
315
316 /* If we fail to restore the uid or gid, things will likely
317 fail later on. This should only happen if an LSM interferes. */
318
cad93f29 319 if (uid_is_valid(uid))
524daa8c
ZJS
320 (void) seteuid(olduid);
321
322 restore_gid:
cad93f29 323 if (gid_is_valid(gid))
524daa8c
ZJS
324 (void) setegid(oldgid);
325
326 return r;
327}
328
fd1f9c89 329static int connect_logger_as(
7a1ab780 330 Unit *unit,
fd1f9c89 331 const ExecContext *context,
af635cf3 332 const ExecParameters *params,
fd1f9c89
LP
333 ExecOutput output,
334 const char *ident,
fd1f9c89
LP
335 int nfd,
336 uid_t uid,
337 gid_t gid) {
338
524daa8c 339 int fd, r;
071830ff
LP
340
341 assert(context);
af635cf3 342 assert(params);
80876c20
LP
343 assert(output < _EXEC_OUTPUT_MAX);
344 assert(ident);
345 assert(nfd >= 0);
071830ff 346
54fe0cdb
LP
347 fd = socket(AF_UNIX, SOCK_STREAM, 0);
348 if (fd < 0)
80876c20 349 return -errno;
071830ff 350
524daa8c
ZJS
351 r = connect_journal_socket(fd, uid, gid);
352 if (r < 0)
353 return r;
071830ff 354
80876c20 355 if (shutdown(fd, SHUT_RD) < 0) {
03e334a1 356 safe_close(fd);
80876c20
LP
357 return -errno;
358 }
071830ff 359
fd1f9c89 360 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 361
80876c20 362 dprintf(fd,
62bca2c6 363 "%s\n"
80876c20
LP
364 "%s\n"
365 "%i\n"
54fe0cdb
LP
366 "%i\n"
367 "%i\n"
368 "%i\n"
4f4a1dbf 369 "%i\n",
c867611e 370 context->syslog_identifier ?: ident,
af635cf3 371 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
372 context->syslog_priority,
373 !!context->syslog_level_prefix,
aac8c0c3
LP
374 is_syslog_output(output),
375 is_kmsg_output(output),
3a1286b6 376 is_terminal_output(output));
80876c20 377
046a82c1 378 return move_fd(fd, nfd, false);
80876c20 379}
3a274a21 380static int open_terminal_as(const char *path, int flags, int nfd) {
046a82c1 381 int fd;
071830ff 382
80876c20
LP
383 assert(path);
384 assert(nfd >= 0);
071830ff 385
3a274a21 386 fd = open_terminal(path, flags | O_NOCTTY);
3cc2aff1 387 if (fd < 0)
80876c20 388 return fd;
071830ff 389
046a82c1 390 return move_fd(fd, nfd, false);
80876c20 391}
071830ff 392
2038c3f5
LP
393static int acquire_path(const char *path, int flags, mode_t mode) {
394 union sockaddr_union sa = {
395 .sa.sa_family = AF_UNIX,
396 };
397 int fd, r;
398
399 assert(path);
400
401 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
402 flags |= O_CREAT;
403
404 fd = open(path, flags|O_NOCTTY, mode);
405 if (fd >= 0)
406 return fd;
407
408 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
409 return -errno;
410 if (strlen(path) > sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
411 return -ENXIO;
412
413 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
414
415 fd = socket(AF_UNIX, SOCK_STREAM, 0);
416 if (fd < 0)
417 return -errno;
418
419 strncpy(sa.un.sun_path, path, sizeof(sa.un.sun_path));
420 if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
421 safe_close(fd);
422 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
423 * indication that his wasn't an AF_UNIX socket after all */
424 }
425
426 if ((flags & O_ACCMODE) == O_RDONLY)
427 r = shutdown(fd, SHUT_WR);
428 else if ((flags & O_ACCMODE) == O_WRONLY)
429 r = shutdown(fd, SHUT_RD);
430 else
431 return fd;
432 if (r < 0) {
433 safe_close(fd);
434 return -errno;
435 }
436
437 return fd;
438}
439
08f3be7a
LP
440static int fixup_input(
441 const ExecContext *context,
442 int socket_fd,
443 bool apply_tty_stdin) {
444
445 ExecInput std_input;
446
447 assert(context);
448
449 std_input = context->std_input;
1e3ad081
LP
450
451 if (is_terminal_input(std_input) && !apply_tty_stdin)
452 return EXEC_INPUT_NULL;
071830ff 453
03fd9c49 454 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
455 return EXEC_INPUT_NULL;
456
08f3be7a
LP
457 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
458 return EXEC_INPUT_NULL;
459
03fd9c49 460 return std_input;
4f2d528d
LP
461}
462
03fd9c49 463static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 464
03fd9c49 465 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
466 return EXEC_OUTPUT_INHERIT;
467
03fd9c49 468 return std_output;
4f2d528d
LP
469}
470
a34ceba6
LP
471static int setup_input(
472 const ExecContext *context,
473 const ExecParameters *params,
52c239d7
LB
474 int socket_fd,
475 int named_iofds[3]) {
a34ceba6 476
4f2d528d
LP
477 ExecInput i;
478
479 assert(context);
a34ceba6
LP
480 assert(params);
481
482 if (params->stdin_fd >= 0) {
483 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
484 return -errno;
485
486 /* Try to make this the controlling tty, if it is a tty, and reset it */
1fb0682e
LP
487 if (isatty(STDIN_FILENO)) {
488 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
489 (void) reset_terminal_fd(STDIN_FILENO, true);
490 }
a34ceba6
LP
491
492 return STDIN_FILENO;
493 }
4f2d528d 494
08f3be7a 495 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
496
497 switch (i) {
071830ff 498
80876c20
LP
499 case EXEC_INPUT_NULL:
500 return open_null_as(O_RDONLY, STDIN_FILENO);
501
502 case EXEC_INPUT_TTY:
503 case EXEC_INPUT_TTY_FORCE:
504 case EXEC_INPUT_TTY_FAIL: {
046a82c1 505 int fd;
071830ff 506
1e22b5cd 507 fd = acquire_terminal(exec_context_tty_path(context),
970edce6
ZJS
508 i == EXEC_INPUT_TTY_FAIL,
509 i == EXEC_INPUT_TTY_FORCE,
510 false,
3a43da28 511 USEC_INFINITY);
970edce6 512 if (fd < 0)
80876c20
LP
513 return fd;
514
046a82c1 515 return move_fd(fd, STDIN_FILENO, false);
80876c20
LP
516 }
517
4f2d528d 518 case EXEC_INPUT_SOCKET:
e75a9ed1
LP
519 assert(socket_fd >= 0);
520
4f2d528d
LP
521 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
522
52c239d7 523 case EXEC_INPUT_NAMED_FD:
e75a9ed1
LP
524 assert(named_iofds[STDIN_FILENO] >= 0);
525
52c239d7
LB
526 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
527 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
528
08f3be7a
LP
529 case EXEC_INPUT_DATA: {
530 int fd;
531
532 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
533 if (fd < 0)
534 return fd;
535
536 return move_fd(fd, STDIN_FILENO, false);
537 }
538
2038c3f5
LP
539 case EXEC_INPUT_FILE: {
540 bool rw;
541 int fd;
542
543 assert(context->stdio_file[STDIN_FILENO]);
544
545 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
546 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
547
548 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
549 if (fd < 0)
550 return fd;
551
552 return move_fd(fd, STDIN_FILENO, false);
553 }
554
80876c20
LP
555 default:
556 assert_not_reached("Unknown input type");
557 }
558}
559
a34ceba6
LP
560static int setup_output(
561 Unit *unit,
562 const ExecContext *context,
563 const ExecParameters *params,
564 int fileno,
565 int socket_fd,
52c239d7 566 int named_iofds[3],
a34ceba6 567 const char *ident,
7bce046b
LP
568 uid_t uid,
569 gid_t gid,
570 dev_t *journal_stream_dev,
571 ino_t *journal_stream_ino) {
a34ceba6 572
4f2d528d
LP
573 ExecOutput o;
574 ExecInput i;
47c1d80d 575 int r;
4f2d528d 576
f2341e0a 577 assert(unit);
80876c20 578 assert(context);
a34ceba6 579 assert(params);
80876c20 580 assert(ident);
7bce046b
LP
581 assert(journal_stream_dev);
582 assert(journal_stream_ino);
80876c20 583
a34ceba6
LP
584 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
585
586 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
587 return -errno;
588
589 return STDOUT_FILENO;
590 }
591
592 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
593 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
594 return -errno;
595
596 return STDERR_FILENO;
597 }
598
08f3be7a 599 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 600 o = fixup_output(context->std_output, socket_fd);
4f2d528d 601
eb17e935
MS
602 if (fileno == STDERR_FILENO) {
603 ExecOutput e;
604 e = fixup_output(context->std_error, socket_fd);
80876c20 605
eb17e935
MS
606 /* This expects the input and output are already set up */
607
608 /* Don't change the stderr file descriptor if we inherit all
609 * the way and are not on a tty */
610 if (e == EXEC_OUTPUT_INHERIT &&
611 o == EXEC_OUTPUT_INHERIT &&
612 i == EXEC_INPUT_NULL &&
613 !is_terminal_input(context->std_input) &&
614 getppid () != 1)
615 return fileno;
616
617 /* Duplicate from stdout if possible */
52c239d7 618 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
eb17e935 619 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 620
eb17e935 621 o = e;
80876c20 622
eb17e935 623 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
624 /* If input got downgraded, inherit the original value */
625 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 626 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 627
08f3be7a
LP
628 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
629 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
eb17e935 630 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 631
acb591e4
LP
632 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
633 if (getppid() != 1)
eb17e935 634 return fileno;
94f04347 635
eb17e935
MS
636 /* We need to open /dev/null here anew, to get the right access mode. */
637 return open_null_as(O_WRONLY, fileno);
071830ff 638 }
94f04347 639
eb17e935 640 switch (o) {
80876c20
LP
641
642 case EXEC_OUTPUT_NULL:
eb17e935 643 return open_null_as(O_WRONLY, fileno);
80876c20
LP
644
645 case EXEC_OUTPUT_TTY:
4f2d528d 646 if (is_terminal_input(i))
eb17e935 647 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
648
649 /* We don't reset the terminal if this is just about output */
1e22b5cd 650 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
651
652 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 653 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 654 case EXEC_OUTPUT_KMSG:
28dbc1e8 655 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
656 case EXEC_OUTPUT_JOURNAL:
657 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 658 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 659 if (r < 0) {
82677ae4 660 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 661 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
662 } else {
663 struct stat st;
664
665 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
666 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
667 * services to detect whether they are connected to the journal or not.
668 *
669 * If both stdout and stderr are connected to a stream then let's make sure to store the data
670 * about STDERR as that's usually the best way to do logging. */
7bce046b 671
ab2116b1
LP
672 if (fstat(fileno, &st) >= 0 &&
673 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
674 *journal_stream_dev = st.st_dev;
675 *journal_stream_ino = st.st_ino;
676 }
47c1d80d
MS
677 }
678 return r;
4f2d528d
LP
679
680 case EXEC_OUTPUT_SOCKET:
681 assert(socket_fd >= 0);
e75a9ed1 682
eb17e935 683 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 684
52c239d7 685 case EXEC_OUTPUT_NAMED_FD:
e75a9ed1
LP
686 assert(named_iofds[fileno] >= 0);
687
52c239d7
LB
688 (void) fd_nonblock(named_iofds[fileno], false);
689 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
690
2038c3f5
LP
691 case EXEC_OUTPUT_FILE: {
692 bool rw;
693 int fd;
694
695 assert(context->stdio_file[fileno]);
696
697 rw = context->std_input == EXEC_INPUT_FILE &&
698 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
699
700 if (rw)
701 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
702
703 fd = acquire_path(context->stdio_file[fileno], O_WRONLY, 0666 & ~context->umask);
704 if (fd < 0)
705 return fd;
706
707 return move_fd(fd, fileno, false);
708 }
709
94f04347 710 default:
80876c20 711 assert_not_reached("Unknown error type");
94f04347 712 }
071830ff
LP
713}
714
02a51aba
LP
715static int chown_terminal(int fd, uid_t uid) {
716 struct stat st;
717
718 assert(fd >= 0);
02a51aba 719
1ff74fb6
LP
720 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
721 if (isatty(fd) < 1)
722 return 0;
723
02a51aba 724 /* This might fail. What matters are the results. */
bab45044
LP
725 (void) fchown(fd, uid, -1);
726 (void) fchmod(fd, TTY_MODE);
02a51aba
LP
727
728 if (fstat(fd, &st) < 0)
729 return -errno;
730
d8b4e2e9 731 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
02a51aba
LP
732 return -EPERM;
733
734 return 0;
735}
736
7d5ceb64 737static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
738 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
739 int r;
80876c20 740
80876c20
LP
741 assert(_saved_stdin);
742 assert(_saved_stdout);
743
af6da548
LP
744 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
745 if (saved_stdin < 0)
746 return -errno;
80876c20 747
af6da548 748 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
749 if (saved_stdout < 0)
750 return -errno;
80876c20 751
7d5ceb64 752 fd = acquire_terminal(vc, false, false, false, DEFAULT_CONFIRM_USEC);
3d18b167
LP
753 if (fd < 0)
754 return fd;
80876c20 755
af6da548
LP
756 r = chown_terminal(fd, getuid());
757 if (r < 0)
3d18b167 758 return r;
02a51aba 759
3d18b167
LP
760 r = reset_terminal_fd(fd, true);
761 if (r < 0)
762 return r;
80876c20 763
3d18b167
LP
764 if (dup2(fd, STDIN_FILENO) < 0)
765 return -errno;
766
767 if (dup2(fd, STDOUT_FILENO) < 0)
768 return -errno;
80876c20
LP
769
770 if (fd >= 2)
03e334a1 771 safe_close(fd);
3d18b167 772 fd = -1;
80876c20
LP
773
774 *_saved_stdin = saved_stdin;
775 *_saved_stdout = saved_stdout;
776
3d18b167 777 saved_stdin = saved_stdout = -1;
80876c20 778
3d18b167 779 return 0;
80876c20
LP
780}
781
63d77c92 782static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
783 assert(err < 0);
784
785 if (err == -ETIMEDOUT)
63d77c92 786 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
787 else {
788 errno = -err;
63d77c92 789 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
790 }
791}
792
63d77c92 793static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 794 _cleanup_close_ int fd = -1;
80876c20 795
3b20f877 796 assert(vc);
80876c20 797
7d5ceb64 798 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 799 if (fd < 0)
3b20f877 800 return;
80876c20 801
63d77c92 802 write_confirm_error_fd(err, fd, u);
af6da548 803}
80876c20 804
3d18b167 805static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 806 int r = 0;
80876c20 807
af6da548
LP
808 assert(saved_stdin);
809 assert(saved_stdout);
810
811 release_terminal();
812
813 if (*saved_stdin >= 0)
80876c20 814 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 815 r = -errno;
80876c20 816
af6da548 817 if (*saved_stdout >= 0)
80876c20 818 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 819 r = -errno;
80876c20 820
3d18b167
LP
821 *saved_stdin = safe_close(*saved_stdin);
822 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
823
824 return r;
825}
826
3b20f877
FB
827enum {
828 CONFIRM_PRETEND_FAILURE = -1,
829 CONFIRM_PRETEND_SUCCESS = 0,
830 CONFIRM_EXECUTE = 1,
831};
832
eedf223a 833static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 834 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 835 _cleanup_free_ char *e = NULL;
3b20f877 836 char c;
af6da548 837
3b20f877 838 /* For any internal errors, assume a positive response. */
7d5ceb64 839 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 840 if (r < 0) {
63d77c92 841 write_confirm_error(r, vc, u);
3b20f877
FB
842 return CONFIRM_EXECUTE;
843 }
af6da548 844
b0eb2944
FB
845 /* confirm_spawn might have been disabled while we were sleeping. */
846 if (manager_is_confirm_spawn_disabled(u->manager)) {
847 r = 1;
848 goto restore_stdio;
849 }
af6da548 850
2bcd3c26
FB
851 e = ellipsize(cmdline, 60, 100);
852 if (!e) {
853 log_oom();
854 r = CONFIRM_EXECUTE;
855 goto restore_stdio;
856 }
af6da548 857
d172b175 858 for (;;) {
539622bd 859 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 860 if (r < 0) {
63d77c92 861 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
862 r = CONFIRM_EXECUTE;
863 goto restore_stdio;
864 }
af6da548 865
d172b175 866 switch (c) {
b0eb2944
FB
867 case 'c':
868 printf("Resuming normal execution.\n");
869 manager_disable_confirm_spawn();
870 r = 1;
871 break;
dd6f9ac0
FB
872 case 'D':
873 unit_dump(u, stdout, " ");
874 continue; /* ask again */
d172b175
FB
875 case 'f':
876 printf("Failing execution.\n");
877 r = CONFIRM_PRETEND_FAILURE;
878 break;
879 case 'h':
b0eb2944
FB
880 printf(" c - continue, proceed without asking anymore\n"
881 " D - dump, show the state of the unit\n"
dd6f9ac0 882 " f - fail, don't execute the command and pretend it failed\n"
d172b175 883 " h - help\n"
eedf223a 884 " i - info, show a short summary of the unit\n"
56fde33a 885 " j - jobs, show jobs that are in progress\n"
d172b175
FB
886 " s - skip, don't execute the command and pretend it succeeded\n"
887 " y - yes, execute the command\n");
dd6f9ac0 888 continue; /* ask again */
eedf223a
FB
889 case 'i':
890 printf(" Description: %s\n"
891 " Unit: %s\n"
892 " Command: %s\n",
893 u->id, u->description, cmdline);
894 continue; /* ask again */
56fde33a
FB
895 case 'j':
896 manager_dump_jobs(u->manager, stdout, " ");
897 continue; /* ask again */
539622bd
FB
898 case 'n':
899 /* 'n' was removed in favor of 'f'. */
900 printf("Didn't understand 'n', did you mean 'f'?\n");
901 continue; /* ask again */
d172b175
FB
902 case 's':
903 printf("Skipping execution.\n");
904 r = CONFIRM_PRETEND_SUCCESS;
905 break;
906 case 'y':
907 r = CONFIRM_EXECUTE;
908 break;
909 default:
910 assert_not_reached("Unhandled choice");
911 }
3b20f877 912 break;
3b20f877 913 }
af6da548 914
3b20f877 915restore_stdio:
af6da548 916 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 917 return r;
80876c20
LP
918}
919
4d885bd3
DH
920static int get_fixed_user(const ExecContext *c, const char **user,
921 uid_t *uid, gid_t *gid,
922 const char **home, const char **shell) {
81a2b7ce 923 int r;
4d885bd3 924 const char *name;
81a2b7ce 925
4d885bd3 926 assert(c);
81a2b7ce 927
23deef88
LP
928 if (!c->user)
929 return 0;
930
4d885bd3
DH
931 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
932 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 933
23deef88 934 name = c->user;
4d885bd3
DH
935 r = get_user_creds_clean(&name, uid, gid, home, shell);
936 if (r < 0)
937 return r;
81a2b7ce 938
4d885bd3
DH
939 *user = name;
940 return 0;
941}
942
943static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
944 int r;
945 const char *name;
946
947 assert(c);
948
949 if (!c->group)
950 return 0;
951
952 name = c->group;
953 r = get_group_creds(&name, gid);
954 if (r < 0)
955 return r;
956
957 *group = name;
958 return 0;
959}
960
cdc5d5c5
DH
961static int get_supplementary_groups(const ExecContext *c, const char *user,
962 const char *group, gid_t gid,
963 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
964 char **i;
965 int r, k = 0;
966 int ngroups_max;
967 bool keep_groups = false;
968 gid_t *groups = NULL;
969 _cleanup_free_ gid_t *l_gids = NULL;
970
971 assert(c);
972
bbeea271
DH
973 /*
974 * If user is given, then lookup GID and supplementary groups list.
975 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
976 * here and as early as possible so we keep the list of supplementary
977 * groups of the caller.
bbeea271
DH
978 */
979 if (user && gid_is_valid(gid) && gid != 0) {
980 /* First step, initialize groups from /etc/groups */
981 if (initgroups(user, gid) < 0)
982 return -errno;
983
984 keep_groups = true;
985 }
986
ac6e8be6 987 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
988 return 0;
989
366ddd25
DH
990 /*
991 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
992 * be positive, otherwise fail.
993 */
994 errno = 0;
995 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
996 if (ngroups_max <= 0) {
997 if (errno > 0)
998 return -errno;
999 else
1000 return -EOPNOTSUPP; /* For all other values */
1001 }
1002
4d885bd3
DH
1003 l_gids = new(gid_t, ngroups_max);
1004 if (!l_gids)
1005 return -ENOMEM;
81a2b7ce 1006
4d885bd3
DH
1007 if (keep_groups) {
1008 /*
1009 * Lookup the list of groups that the user belongs to, we
1010 * avoid NSS lookups here too for gid=0.
1011 */
1012 k = ngroups_max;
1013 if (getgrouplist(user, gid, l_gids, &k) < 0)
1014 return -EINVAL;
1015 } else
1016 k = 0;
81a2b7ce 1017
4d885bd3
DH
1018 STRV_FOREACH(i, c->supplementary_groups) {
1019 const char *g;
81a2b7ce 1020
4d885bd3
DH
1021 if (k >= ngroups_max)
1022 return -E2BIG;
81a2b7ce 1023
4d885bd3
DH
1024 g = *i;
1025 r = get_group_creds(&g, l_gids+k);
1026 if (r < 0)
1027 return r;
81a2b7ce 1028
4d885bd3
DH
1029 k++;
1030 }
81a2b7ce 1031
4d885bd3
DH
1032 /*
1033 * Sets ngids to zero to drop all supplementary groups, happens
1034 * when we are under root and SupplementaryGroups= is empty.
1035 */
1036 if (k == 0) {
1037 *ngids = 0;
1038 return 0;
1039 }
81a2b7ce 1040
4d885bd3
DH
1041 /* Otherwise get the final list of supplementary groups */
1042 groups = memdup(l_gids, sizeof(gid_t) * k);
1043 if (!groups)
1044 return -ENOMEM;
1045
1046 *supplementary_gids = groups;
1047 *ngids = k;
1048
1049 groups = NULL;
1050
1051 return 0;
1052}
1053
709dbeac 1054static int enforce_groups(gid_t gid, gid_t *supplementary_gids, int ngids) {
4d885bd3
DH
1055 int r;
1056
709dbeac
YW
1057 /* Handle SupplementaryGroups= if it is not empty */
1058 if (ngids > 0) {
4d885bd3
DH
1059 r = maybe_setgroups(ngids, supplementary_gids);
1060 if (r < 0)
97f0e76f 1061 return r;
4d885bd3 1062 }
81a2b7ce 1063
4d885bd3
DH
1064 if (gid_is_valid(gid)) {
1065 /* Then set our gids */
1066 if (setresgid(gid, gid, gid) < 0)
1067 return -errno;
81a2b7ce
LP
1068 }
1069
1070 return 0;
1071}
1072
1073static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
1074 assert(context);
1075
4d885bd3
DH
1076 if (!uid_is_valid(uid))
1077 return 0;
1078
479050b3 1079 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
1080 * capabilities while doing so. */
1081
479050b3 1082 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
1083
1084 /* First step: If we need to keep capabilities but
1085 * drop privileges we need to make sure we keep our
cbb21cca 1086 * caps, while we drop privileges. */
693ced48 1087 if (uid != 0) {
cbb21cca 1088 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1089
1090 if (prctl(PR_GET_SECUREBITS) != sb)
1091 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1092 return -errno;
1093 }
81a2b7ce
LP
1094 }
1095
479050b3 1096 /* Second step: actually set the uids */
81a2b7ce
LP
1097 if (setresuid(uid, uid, uid) < 0)
1098 return -errno;
1099
1100 /* At this point we should have all necessary capabilities but
1101 are otherwise a normal user. However, the caps might got
1102 corrupted due to the setresuid() so we need clean them up
1103 later. This is done outside of this call. */
1104
1105 return 0;
1106}
1107
349cc4a5 1108#if HAVE_PAM
5b6319dc
LP
1109
1110static int null_conv(
1111 int num_msg,
1112 const struct pam_message **msg,
1113 struct pam_response **resp,
1114 void *appdata_ptr) {
1115
1116 /* We don't support conversations */
1117
1118 return PAM_CONV_ERR;
1119}
1120
cefc33ae
LP
1121#endif
1122
5b6319dc
LP
1123static int setup_pam(
1124 const char *name,
1125 const char *user,
940c5210 1126 uid_t uid,
2d6fce8d 1127 gid_t gid,
5b6319dc 1128 const char *tty,
2065ca69 1129 char ***env,
5b6319dc
LP
1130 int fds[], unsigned n_fds) {
1131
349cc4a5 1132#if HAVE_PAM
cefc33ae 1133
5b6319dc
LP
1134 static const struct pam_conv conv = {
1135 .conv = null_conv,
1136 .appdata_ptr = NULL
1137 };
1138
2d7c6aa2 1139 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1140 pam_handle_t *handle = NULL;
d6e5f3ad 1141 sigset_t old_ss;
7bb70b6e 1142 int pam_code = PAM_SUCCESS, r;
84eada2f 1143 char **nv, **e = NULL;
5b6319dc
LP
1144 bool close_session = false;
1145 pid_t pam_pid = 0, parent_pid;
970edce6 1146 int flags = 0;
5b6319dc
LP
1147
1148 assert(name);
1149 assert(user);
2065ca69 1150 assert(env);
5b6319dc
LP
1151
1152 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1153 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1154 * systemd via the cgroup logic. It will then remove the PAM
1155 * session again. The parent process will exec() the actual
1156 * daemon. We do things this way to ensure that the main PID
1157 * of the daemon is the one we initially fork()ed. */
1158
7bb70b6e
LP
1159 r = barrier_create(&barrier);
1160 if (r < 0)
2d7c6aa2
DH
1161 goto fail;
1162
553d2243 1163 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1164 flags |= PAM_SILENT;
1165
f546241b
ZJS
1166 pam_code = pam_start(name, user, &conv, &handle);
1167 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1168 handle = NULL;
1169 goto fail;
1170 }
1171
f546241b
ZJS
1172 if (tty) {
1173 pam_code = pam_set_item(handle, PAM_TTY, tty);
1174 if (pam_code != PAM_SUCCESS)
5b6319dc 1175 goto fail;
f546241b 1176 }
5b6319dc 1177
84eada2f
JW
1178 STRV_FOREACH(nv, *env) {
1179 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1180 if (pam_code != PAM_SUCCESS)
1181 goto fail;
1182 }
1183
970edce6 1184 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1185 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1186 goto fail;
1187
970edce6 1188 pam_code = pam_open_session(handle, flags);
f546241b 1189 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1190 goto fail;
1191
1192 close_session = true;
1193
f546241b
ZJS
1194 e = pam_getenvlist(handle);
1195 if (!e) {
5b6319dc
LP
1196 pam_code = PAM_BUF_ERR;
1197 goto fail;
1198 }
1199
1200 /* Block SIGTERM, so that we know that it won't get lost in
1201 * the child */
ce30c8dc 1202
72c0a2c2 1203 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1204
df0ff127 1205 parent_pid = getpid_cached();
5b6319dc 1206
f546241b 1207 pam_pid = fork();
7bb70b6e
LP
1208 if (pam_pid < 0) {
1209 r = -errno;
5b6319dc 1210 goto fail;
7bb70b6e 1211 }
5b6319dc
LP
1212
1213 if (pam_pid == 0) {
7bb70b6e 1214 int sig, ret = EXIT_PAM;
5b6319dc
LP
1215
1216 /* The child's job is to reset the PAM session on
1217 * termination */
2d7c6aa2 1218 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc
LP
1219
1220 /* This string must fit in 10 chars (i.e. the length
5d6b1584
LP
1221 * of "/sbin/init"), to look pretty in /bin/ps */
1222 rename_process("(sd-pam)");
5b6319dc
LP
1223
1224 /* Make sure we don't keep open the passed fds in this
1225 child. We assume that otherwise only those fds are
1226 open here that have been opened by PAM. */
1227 close_many(fds, n_fds);
1228
940c5210
AK
1229 /* Drop privileges - we don't need any to pam_close_session
1230 * and this will make PR_SET_PDEATHSIG work in most cases.
1231 * If this fails, ignore the error - but expect sd-pam threads
1232 * to fail to exit normally */
2d6fce8d 1233
97f0e76f
LP
1234 r = maybe_setgroups(0, NULL);
1235 if (r < 0)
1236 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1237 if (setresgid(gid, gid, gid) < 0)
1238 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1239 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1240 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1241
ce30c8dc
LP
1242 (void) ignore_signals(SIGPIPE, -1);
1243
940c5210
AK
1244 /* Wait until our parent died. This will only work if
1245 * the above setresuid() succeeds, otherwise the kernel
1246 * will not allow unprivileged parents kill their privileged
1247 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1248 * to do the rest for us. */
1249 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1250 goto child_finish;
1251
2d7c6aa2
DH
1252 /* Tell the parent that our setup is done. This is especially
1253 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1254 * setup might race against our setresuid(2) call.
1255 *
1256 * If the parent aborted, we'll detect this below, hence ignore
1257 * return failure here. */
1258 (void) barrier_place(&barrier);
2d7c6aa2 1259
643f4706 1260 /* Check if our parent process might already have died? */
5b6319dc 1261 if (getppid() == parent_pid) {
d6e5f3ad
DM
1262 sigset_t ss;
1263
1264 assert_se(sigemptyset(&ss) >= 0);
1265 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1266
3dead8d9
LP
1267 for (;;) {
1268 if (sigwait(&ss, &sig) < 0) {
1269 if (errno == EINTR)
1270 continue;
1271
1272 goto child_finish;
1273 }
5b6319dc 1274
3dead8d9
LP
1275 assert(sig == SIGTERM);
1276 break;
1277 }
5b6319dc
LP
1278 }
1279
3dead8d9 1280 /* If our parent died we'll end the session */
f546241b 1281 if (getppid() != parent_pid) {
970edce6 1282 pam_code = pam_close_session(handle, flags);
f546241b 1283 if (pam_code != PAM_SUCCESS)
5b6319dc 1284 goto child_finish;
f546241b 1285 }
5b6319dc 1286
7bb70b6e 1287 ret = 0;
5b6319dc
LP
1288
1289 child_finish:
970edce6 1290 pam_end(handle, pam_code | flags);
7bb70b6e 1291 _exit(ret);
5b6319dc
LP
1292 }
1293
2d7c6aa2
DH
1294 barrier_set_role(&barrier, BARRIER_PARENT);
1295
5b6319dc
LP
1296 /* If the child was forked off successfully it will do all the
1297 * cleanups, so forget about the handle here. */
1298 handle = NULL;
1299
3b8bddde 1300 /* Unblock SIGTERM again in the parent */
72c0a2c2 1301 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1302
1303 /* We close the log explicitly here, since the PAM modules
1304 * might have opened it, but we don't want this fd around. */
1305 closelog();
1306
2d7c6aa2
DH
1307 /* Synchronously wait for the child to initialize. We don't care for
1308 * errors as we cannot recover. However, warn loudly if it happens. */
1309 if (!barrier_place_and_sync(&barrier))
1310 log_error("PAM initialization failed");
1311
2065ca69
JW
1312 strv_free(*env);
1313 *env = e;
aa87e624 1314
5b6319dc
LP
1315 return 0;
1316
1317fail:
970edce6
ZJS
1318 if (pam_code != PAM_SUCCESS) {
1319 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1320 r = -EPERM; /* PAM errors do not map to errno */
1321 } else
1322 log_error_errno(r, "PAM failed: %m");
9ba35398 1323
5b6319dc
LP
1324 if (handle) {
1325 if (close_session)
970edce6 1326 pam_code = pam_close_session(handle, flags);
5b6319dc 1327
970edce6 1328 pam_end(handle, pam_code | flags);
5b6319dc
LP
1329 }
1330
1331 strv_free(e);
5b6319dc
LP
1332 closelog();
1333
7bb70b6e 1334 return r;
cefc33ae
LP
1335#else
1336 return 0;
5b6319dc 1337#endif
cefc33ae 1338}
5b6319dc 1339
5d6b1584
LP
1340static void rename_process_from_path(const char *path) {
1341 char process_name[11];
1342 const char *p;
1343 size_t l;
1344
1345 /* This resulting string must fit in 10 chars (i.e. the length
1346 * of "/sbin/init") to look pretty in /bin/ps */
1347
2b6bf07d 1348 p = basename(path);
5d6b1584
LP
1349 if (isempty(p)) {
1350 rename_process("(...)");
1351 return;
1352 }
1353
1354 l = strlen(p);
1355 if (l > 8) {
1356 /* The end of the process name is usually more
1357 * interesting, since the first bit might just be
1358 * "systemd-" */
1359 p = p + l - 8;
1360 l = 8;
1361 }
1362
1363 process_name[0] = '(';
1364 memcpy(process_name+1, p, l);
1365 process_name[1+l] = ')';
1366 process_name[1+l+1] = 0;
1367
1368 rename_process(process_name);
1369}
1370
469830d1
LP
1371static bool context_has_address_families(const ExecContext *c) {
1372 assert(c);
1373
1374 return c->address_families_whitelist ||
1375 !set_isempty(c->address_families);
1376}
1377
1378static bool context_has_syscall_filters(const ExecContext *c) {
1379 assert(c);
1380
1381 return c->syscall_whitelist ||
8cfa775f 1382 !hashmap_isempty(c->syscall_filter);
469830d1
LP
1383}
1384
1385static bool context_has_no_new_privileges(const ExecContext *c) {
1386 assert(c);
1387
1388 if (c->no_new_privileges)
1389 return true;
1390
1391 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1392 return false;
1393
1394 /* We need NNP if we have any form of seccomp and are unprivileged */
1395 return context_has_address_families(c) ||
1396 c->memory_deny_write_execute ||
1397 c->restrict_realtime ||
1398 exec_context_restrict_namespaces_set(c) ||
1399 c->protect_kernel_tunables ||
1400 c->protect_kernel_modules ||
1401 c->private_devices ||
1402 context_has_syscall_filters(c) ||
78e864e5
TM
1403 !set_isempty(c->syscall_archs) ||
1404 c->lock_personality;
469830d1
LP
1405}
1406
349cc4a5 1407#if HAVE_SECCOMP
17df7223 1408
83f12b27 1409static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1410
1411 if (is_seccomp_available())
1412 return false;
1413
f673b62d 1414 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1415 return true;
83f12b27
FS
1416}
1417
165a31c0 1418static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1419 uint32_t negative_action, default_action, action;
165a31c0 1420 int r;
8351ceae 1421
469830d1 1422 assert(u);
c0467cf3 1423 assert(c);
8351ceae 1424
469830d1 1425 if (!context_has_syscall_filters(c))
83f12b27
FS
1426 return 0;
1427
469830d1
LP
1428 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1429 return 0;
e9642be2 1430
469830d1 1431 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1432
469830d1
LP
1433 if (c->syscall_whitelist) {
1434 default_action = negative_action;
1435 action = SCMP_ACT_ALLOW;
7c66bae2 1436 } else {
469830d1
LP
1437 default_action = SCMP_ACT_ALLOW;
1438 action = negative_action;
57183d11 1439 }
8351ceae 1440
165a31c0
LP
1441 if (needs_ambient_hack) {
1442 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1443 if (r < 0)
1444 return r;
1445 }
1446
469830d1 1447 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
4298d0b5
LP
1448}
1449
469830d1
LP
1450static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1451 assert(u);
4298d0b5
LP
1452 assert(c);
1453
469830d1 1454 if (set_isempty(c->syscall_archs))
83f12b27
FS
1455 return 0;
1456
469830d1
LP
1457 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1458 return 0;
4298d0b5 1459
469830d1
LP
1460 return seccomp_restrict_archs(c->syscall_archs);
1461}
4298d0b5 1462
469830d1
LP
1463static int apply_address_families(const Unit* u, const ExecContext *c) {
1464 assert(u);
1465 assert(c);
4298d0b5 1466
469830d1
LP
1467 if (!context_has_address_families(c))
1468 return 0;
4298d0b5 1469
469830d1
LP
1470 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1471 return 0;
4298d0b5 1472
469830d1 1473 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1474}
4298d0b5 1475
83f12b27 1476static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1477 assert(u);
f3e43635
TM
1478 assert(c);
1479
469830d1 1480 if (!c->memory_deny_write_execute)
83f12b27
FS
1481 return 0;
1482
469830d1
LP
1483 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1484 return 0;
f3e43635 1485
469830d1 1486 return seccomp_memory_deny_write_execute();
f3e43635
TM
1487}
1488
83f12b27 1489static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1490 assert(u);
f4170c67
LP
1491 assert(c);
1492
469830d1 1493 if (!c->restrict_realtime)
83f12b27
FS
1494 return 0;
1495
469830d1
LP
1496 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1497 return 0;
f4170c67 1498
469830d1 1499 return seccomp_restrict_realtime();
f4170c67
LP
1500}
1501
59e856c7 1502static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1503 assert(u);
59eeb84b
LP
1504 assert(c);
1505
1506 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1507 * let's protect even those systems where this is left on in the kernel. */
1508
469830d1 1509 if (!c->protect_kernel_tunables)
59eeb84b
LP
1510 return 0;
1511
469830d1
LP
1512 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1513 return 0;
59eeb84b 1514
469830d1 1515 return seccomp_protect_sysctl();
59eeb84b
LP
1516}
1517
59e856c7 1518static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1519 assert(u);
502d704e
DH
1520 assert(c);
1521
25a8d8a0 1522 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1523
469830d1
LP
1524 if (!c->protect_kernel_modules)
1525 return 0;
1526
502d704e
DH
1527 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1528 return 0;
1529
469830d1 1530 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
502d704e
DH
1531}
1532
59e856c7 1533static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1534 assert(u);
ba128bb8
LP
1535 assert(c);
1536
8f81a5f6 1537 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1538
469830d1
LP
1539 if (!c->private_devices)
1540 return 0;
1541
ba128bb8
LP
1542 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1543 return 0;
1544
469830d1 1545 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
ba128bb8
LP
1546}
1547
add00535 1548static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
469830d1 1549 assert(u);
add00535
LP
1550 assert(c);
1551
1552 if (!exec_context_restrict_namespaces_set(c))
1553 return 0;
1554
1555 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1556 return 0;
1557
1558 return seccomp_restrict_namespaces(c->restrict_namespaces);
1559}
1560
78e864e5 1561static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1562 unsigned long personality;
1563 int r;
78e864e5
TM
1564
1565 assert(u);
1566 assert(c);
1567
1568 if (!c->lock_personality)
1569 return 0;
1570
1571 if (skip_seccomp_unavailable(u, "LockPersonality="))
1572 return 0;
1573
e8132d63
LP
1574 personality = c->personality;
1575
1576 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1577 if (personality == PERSONALITY_INVALID) {
1578
1579 r = opinionated_personality(&personality);
1580 if (r < 0)
1581 return r;
1582 }
78e864e5
TM
1583
1584 return seccomp_lock_personality(personality);
1585}
1586
c0467cf3 1587#endif
8351ceae 1588
31a7eb86
ZJS
1589static void do_idle_pipe_dance(int idle_pipe[4]) {
1590 assert(idle_pipe);
1591
54eb2300
LP
1592 idle_pipe[1] = safe_close(idle_pipe[1]);
1593 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1594
1595 if (idle_pipe[0] >= 0) {
1596 int r;
1597
1598 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1599
1600 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1601 ssize_t n;
1602
31a7eb86 1603 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1604 n = write(idle_pipe[3], "x", 1);
1605 if (n > 0)
cd972d69
ZJS
1606 /* Wait for systemd to react to the signal above. */
1607 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1608 }
1609
54eb2300 1610 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1611
1612 }
1613
54eb2300 1614 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1615}
1616
7cae38c4 1617static int build_environment(
fd63e712 1618 Unit *u,
9fa95f85 1619 const ExecContext *c,
1e22b5cd 1620 const ExecParameters *p,
7cae38c4
LP
1621 unsigned n_fds,
1622 const char *home,
1623 const char *username,
1624 const char *shell,
7bce046b
LP
1625 dev_t journal_stream_dev,
1626 ino_t journal_stream_ino,
7cae38c4
LP
1627 char ***ret) {
1628
1629 _cleanup_strv_free_ char **our_env = NULL;
1630 unsigned n_env = 0;
1631 char *x;
1632
4b58153d 1633 assert(u);
7cae38c4
LP
1634 assert(c);
1635 assert(ret);
1636
4b58153d 1637 our_env = new0(char*, 14);
7cae38c4
LP
1638 if (!our_env)
1639 return -ENOMEM;
1640
1641 if (n_fds > 0) {
8dd4c05b
LP
1642 _cleanup_free_ char *joined = NULL;
1643
df0ff127 1644 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1645 return -ENOMEM;
1646 our_env[n_env++] = x;
1647
1648 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1649 return -ENOMEM;
1650 our_env[n_env++] = x;
8dd4c05b 1651
1e22b5cd 1652 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1653 if (!joined)
1654 return -ENOMEM;
1655
605405c6 1656 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1657 if (!x)
1658 return -ENOMEM;
1659 our_env[n_env++] = x;
7cae38c4
LP
1660 }
1661
b08af3b1 1662 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1663 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1664 return -ENOMEM;
1665 our_env[n_env++] = x;
1666
1e22b5cd 1667 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1668 return -ENOMEM;
1669 our_env[n_env++] = x;
1670 }
1671
fd63e712
LP
1672 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1673 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1674 * check the database directly. */
ac647978 1675 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1676 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1677 if (!x)
1678 return -ENOMEM;
1679 our_env[n_env++] = x;
1680 }
1681
7cae38c4
LP
1682 if (home) {
1683 x = strappend("HOME=", home);
1684 if (!x)
1685 return -ENOMEM;
1686 our_env[n_env++] = x;
1687 }
1688
1689 if (username) {
1690 x = strappend("LOGNAME=", username);
1691 if (!x)
1692 return -ENOMEM;
1693 our_env[n_env++] = x;
1694
1695 x = strappend("USER=", username);
1696 if (!x)
1697 return -ENOMEM;
1698 our_env[n_env++] = x;
1699 }
1700
1701 if (shell) {
1702 x = strappend("SHELL=", shell);
1703 if (!x)
1704 return -ENOMEM;
1705 our_env[n_env++] = x;
1706 }
1707
4b58153d
LP
1708 if (!sd_id128_is_null(u->invocation_id)) {
1709 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1710 return -ENOMEM;
1711
1712 our_env[n_env++] = x;
1713 }
1714
6af760f3
LP
1715 if (exec_context_needs_term(c)) {
1716 const char *tty_path, *term = NULL;
1717
1718 tty_path = exec_context_tty_path(c);
1719
1720 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1721 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1722 * passes to PID 1 ends up all the way in the console login shown. */
1723
1724 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1725 term = getenv("TERM");
1726 if (!term)
1727 term = default_term_for_tty(tty_path);
7cae38c4 1728
6af760f3 1729 x = strappend("TERM=", term);
7cae38c4
LP
1730 if (!x)
1731 return -ENOMEM;
1732 our_env[n_env++] = x;
1733 }
1734
7bce046b
LP
1735 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1736 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1737 return -ENOMEM;
1738
1739 our_env[n_env++] = x;
1740 }
1741
7cae38c4 1742 our_env[n_env++] = NULL;
7bce046b 1743 assert(n_env <= 12);
7cae38c4
LP
1744
1745 *ret = our_env;
1746 our_env = NULL;
1747
1748 return 0;
1749}
1750
b4c14404
FB
1751static int build_pass_environment(const ExecContext *c, char ***ret) {
1752 _cleanup_strv_free_ char **pass_env = NULL;
1753 size_t n_env = 0, n_bufsize = 0;
1754 char **i;
1755
1756 STRV_FOREACH(i, c->pass_environment) {
1757 _cleanup_free_ char *x = NULL;
1758 char *v;
1759
1760 v = getenv(*i);
1761 if (!v)
1762 continue;
605405c6 1763 x = strjoin(*i, "=", v);
b4c14404
FB
1764 if (!x)
1765 return -ENOMEM;
00819cc1 1766
b4c14404
FB
1767 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1768 return -ENOMEM;
00819cc1 1769
b4c14404
FB
1770 pass_env[n_env++] = x;
1771 pass_env[n_env] = NULL;
1772 x = NULL;
1773 }
1774
1775 *ret = pass_env;
1776 pass_env = NULL;
1777
1778 return 0;
1779}
1780
8b44a3d2
LP
1781static bool exec_needs_mount_namespace(
1782 const ExecContext *context,
1783 const ExecParameters *params,
1784 ExecRuntime *runtime) {
1785
1786 assert(context);
1787 assert(params);
1788
915e6d16
LP
1789 if (context->root_image)
1790 return true;
1791
2a624c36
AP
1792 if (!strv_isempty(context->read_write_paths) ||
1793 !strv_isempty(context->read_only_paths) ||
1794 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1795 return true;
1796
652bb263
YW
1797 if (context->n_bind_mounts > 0 ||
1798 !strv_isempty(context->directories[EXEC_DIRECTORY_RUNTIME].paths) ||
1799 !strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
1800 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1801 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths) ||
1802 !strv_isempty(context->directories[EXEC_DIRECTORY_CONFIGURATION].paths))
d2d6c096
LP
1803 return true;
1804
8b44a3d2
LP
1805 if (context->mount_flags != 0)
1806 return true;
1807
1808 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1809 return true;
1810
8b44a3d2
LP
1811 if (context->private_devices ||
1812 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1813 context->protect_home != PROTECT_HOME_NO ||
1814 context->protect_kernel_tunables ||
c575770b 1815 context->protect_kernel_modules ||
59eeb84b 1816 context->protect_control_groups)
8b44a3d2
LP
1817 return true;
1818
9c988f93 1819 if (context->mount_apivfs && (context->root_image || context->root_directory))
5d997827
LP
1820 return true;
1821
8b44a3d2
LP
1822 return false;
1823}
1824
d251207d
LP
1825static int setup_private_users(uid_t uid, gid_t gid) {
1826 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1827 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1828 _cleanup_close_ int unshare_ready_fd = -1;
1829 _cleanup_(sigkill_waitp) pid_t pid = 0;
1830 uint64_t c = 1;
1831 siginfo_t si;
1832 ssize_t n;
1833 int r;
1834
1835 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1836 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1837 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1838 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1839 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1840 * continues execution normally. */
1841
587ab01b
ZJS
1842 if (uid != 0 && uid_is_valid(uid)) {
1843 r = asprintf(&uid_map,
1844 "0 0 1\n" /* Map root → root */
1845 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1846 uid, uid);
1847 if (r < 0)
1848 return -ENOMEM;
1849 } else {
e0f3720e 1850 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1851 if (!uid_map)
1852 return -ENOMEM;
1853 }
d251207d 1854
587ab01b
ZJS
1855 if (gid != 0 && gid_is_valid(gid)) {
1856 r = asprintf(&gid_map,
1857 "0 0 1\n" /* Map root → root */
1858 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1859 gid, gid);
1860 if (r < 0)
1861 return -ENOMEM;
1862 } else {
d251207d 1863 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1864 if (!gid_map)
1865 return -ENOMEM;
1866 }
d251207d
LP
1867
1868 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1869 * namespace. */
1870 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1871 if (unshare_ready_fd < 0)
1872 return -errno;
1873
1874 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1875 * failed. */
1876 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1877 return -errno;
1878
1879 pid = fork();
1880 if (pid < 0)
1881 return -errno;
1882
1883 if (pid == 0) {
1884 _cleanup_close_ int fd = -1;
1885 const char *a;
1886 pid_t ppid;
1887
1888 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1889 * here, after the parent opened its own user namespace. */
1890
1891 ppid = getppid();
1892 errno_pipe[0] = safe_close(errno_pipe[0]);
1893
1894 /* Wait until the parent unshared the user namespace */
1895 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1896 r = -errno;
1897 goto child_fail;
1898 }
1899
1900 /* Disable the setgroups() system call in the child user namespace, for good. */
1901 a = procfs_file_alloca(ppid, "setgroups");
1902 fd = open(a, O_WRONLY|O_CLOEXEC);
1903 if (fd < 0) {
1904 if (errno != ENOENT) {
1905 r = -errno;
1906 goto child_fail;
1907 }
1908
1909 /* If the file is missing the kernel is too old, let's continue anyway. */
1910 } else {
1911 if (write(fd, "deny\n", 5) < 0) {
1912 r = -errno;
1913 goto child_fail;
1914 }
1915
1916 fd = safe_close(fd);
1917 }
1918
1919 /* First write the GID map */
1920 a = procfs_file_alloca(ppid, "gid_map");
1921 fd = open(a, O_WRONLY|O_CLOEXEC);
1922 if (fd < 0) {
1923 r = -errno;
1924 goto child_fail;
1925 }
1926 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1927 r = -errno;
1928 goto child_fail;
1929 }
1930 fd = safe_close(fd);
1931
1932 /* The write the UID map */
1933 a = procfs_file_alloca(ppid, "uid_map");
1934 fd = open(a, O_WRONLY|O_CLOEXEC);
1935 if (fd < 0) {
1936 r = -errno;
1937 goto child_fail;
1938 }
1939 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1940 r = -errno;
1941 goto child_fail;
1942 }
1943
1944 _exit(EXIT_SUCCESS);
1945
1946 child_fail:
1947 (void) write(errno_pipe[1], &r, sizeof(r));
1948 _exit(EXIT_FAILURE);
1949 }
1950
1951 errno_pipe[1] = safe_close(errno_pipe[1]);
1952
1953 if (unshare(CLONE_NEWUSER) < 0)
1954 return -errno;
1955
1956 /* Let the child know that the namespace is ready now */
1957 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1958 return -errno;
1959
1960 /* Try to read an error code from the child */
1961 n = read(errno_pipe[0], &r, sizeof(r));
1962 if (n < 0)
1963 return -errno;
1964 if (n == sizeof(r)) { /* an error code was sent to us */
1965 if (r < 0)
1966 return r;
1967 return -EIO;
1968 }
1969 if (n != 0) /* on success we should have read 0 bytes */
1970 return -EIO;
1971
1972 r = wait_for_terminate(pid, &si);
1973 if (r < 0)
1974 return r;
1975 pid = 0;
1976
1977 /* If something strange happened with the child, let's consider this fatal, too */
1978 if (si.si_code != CLD_EXITED || si.si_status != 0)
1979 return -EIO;
1980
1981 return 0;
1982}
1983
3536f49e 1984static int setup_exec_directory(
07689d5d
LP
1985 const ExecContext *context,
1986 const ExecParameters *params,
1987 uid_t uid,
3536f49e 1988 gid_t gid,
3536f49e
YW
1989 ExecDirectoryType type,
1990 int *exit_status) {
07689d5d 1991
72fd1768 1992 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
1993 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
1994 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
1995 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
1996 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
1997 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
1998 };
07689d5d
LP
1999 char **rt;
2000 int r;
2001
2002 assert(context);
2003 assert(params);
72fd1768 2004 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 2005 assert(exit_status);
07689d5d 2006
3536f49e
YW
2007 if (!params->prefix[type])
2008 return 0;
2009
8679efde 2010 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
2011 if (!uid_is_valid(uid))
2012 uid = 0;
2013 if (!gid_is_valid(gid))
2014 gid = 0;
2015 }
2016
2017 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d
LP
2018 _cleanup_free_ char *p = NULL, *pp = NULL;
2019 const char *effective;
07689d5d 2020
3536f49e
YW
2021 p = strjoin(params->prefix[type], "/", *rt);
2022 if (!p) {
2023 r = -ENOMEM;
2024 goto fail;
2025 }
07689d5d 2026
23a7448e
YW
2027 r = mkdir_parents_label(p, 0755);
2028 if (r < 0)
3536f49e 2029 goto fail;
23a7448e 2030
8092a48c
YW
2031 if (context->dynamic_user &&
2032 !IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
6c47cd7d
LP
2033 _cleanup_free_ char *private_root = NULL, *relative = NULL, *parent = NULL;
2034
2035 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
2036 * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
2037 * whose UID is later on reused. To lock this down we use the same trick used by container
2038 * managers to prohibit host users to get access to files of the same UID in containers: we
2039 * place everything inside a directory that has an access mode of 0700 and is owned root:root,
2040 * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
2041 * to make this directory permeable for the service itself.
2042 *
2043 * Specifically: for a service which wants a special directory "foo/" we first create a
2044 * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
2045 * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
2046 * privileged host users can access "foo/" as usual, but unprivileged host users can't look
2047 * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
2048 * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
2049 * disabling the access boundary for the service and making sure it only gets access to the
2050 * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
2051 *
2052 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
8092a48c
YW
2053 * owned by the service itself.
2054 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
2055 * files or sockets with other services. */
6c47cd7d
LP
2056
2057 private_root = strjoin(params->prefix[type], "/private");
2058 if (!private_root) {
2059 r = -ENOMEM;
2060 goto fail;
2061 }
2062
2063 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
c31ad024 2064 r = mkdir_safe_label(private_root, 0700, 0, 0, false);
6c47cd7d
LP
2065 if (r < 0)
2066 goto fail;
2067
2068 pp = strjoin(private_root, "/", *rt);
2069 if (!pp) {
2070 r = -ENOMEM;
2071 goto fail;
2072 }
2073
2074 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2075 r = mkdir_parents_label(pp, 0755);
2076 if (r < 0)
2077 goto fail;
2078
2079 /* Finally, create the actual directory for the service */
2080 r = mkdir_label(pp, context->directories[type].mode);
2081 if (r < 0 && r != -EEXIST)
2082 goto fail;
2083
2084 parent = dirname_malloc(p);
2085 if (!parent) {
2086 r = -ENOMEM;
2087 goto fail;
2088 }
2089
2090 r = path_make_relative(parent, pp, &relative);
2091 if (r < 0)
2092 goto fail;
2093
2094 /* And link it up from the original place */
2095 r = symlink_idempotent(relative, p);
2096 if (r < 0)
2097 goto fail;
2098
2099 effective = pp;
2100
2101 } else {
2102 r = mkdir_label(p, context->directories[type].mode);
2103 if (r < 0 && r != -EEXIST)
2104 goto fail;
2105
2106 effective = p;
2107 }
a1164ae3
LP
2108
2109 /* First lock down the access mode */
6c47cd7d 2110 if (chmod(effective, context->directories[type].mode) < 0) {
a1164ae3 2111 r = -errno;
3536f49e 2112 goto fail;
a1164ae3 2113 }
07689d5d 2114
c71b2eb7
LP
2115 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
2116 * a service, and shall not be writable. */
2117 if (type == EXEC_DIRECTORY_CONFIGURATION)
2118 continue;
2119
a1164ae3 2120 /* Then, change the ownership of the whole tree, if necessary */
6c47cd7d 2121 r = path_chown_recursive(effective, uid, gid);
07689d5d 2122 if (r < 0)
3536f49e 2123 goto fail;
07689d5d
LP
2124 }
2125
2126 return 0;
3536f49e
YW
2127
2128fail:
2129 *exit_status = exit_status_table[type];
3536f49e 2130 return r;
07689d5d
LP
2131}
2132
cefc33ae
LP
2133static int setup_smack(
2134 const ExecContext *context,
2135 const ExecCommand *command) {
2136
cefc33ae
LP
2137 int r;
2138
2139 assert(context);
2140 assert(command);
2141
cefc33ae
LP
2142 if (context->smack_process_label) {
2143 r = mac_smack_apply_pid(0, context->smack_process_label);
2144 if (r < 0)
2145 return r;
2146 }
2147#ifdef SMACK_DEFAULT_PROCESS_LABEL
2148 else {
2149 _cleanup_free_ char *exec_label = NULL;
2150
2151 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2152 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2153 return r;
2154
2155 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2156 if (r < 0)
2157 return r;
2158 }
cefc33ae
LP
2159#endif
2160
2161 return 0;
2162}
2163
6c47cd7d
LP
2164static int compile_bind_mounts(
2165 const ExecContext *context,
2166 const ExecParameters *params,
2167 BindMount **ret_bind_mounts,
2168 unsigned *ret_n_bind_mounts,
2169 char ***ret_empty_directories) {
2170
2171 _cleanup_strv_free_ char **empty_directories = NULL;
2172 BindMount *bind_mounts;
2173 unsigned n, h = 0, i;
2174 ExecDirectoryType t;
2175 int r;
2176
2177 assert(context);
2178 assert(params);
2179 assert(ret_bind_mounts);
2180 assert(ret_n_bind_mounts);
2181 assert(ret_empty_directories);
2182
2183 n = context->n_bind_mounts;
2184 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2185 if (!params->prefix[t])
2186 continue;
2187
2188 n += strv_length(context->directories[t].paths);
2189 }
2190
2191 if (n <= 0) {
2192 *ret_bind_mounts = NULL;
2193 *ret_n_bind_mounts = 0;
2194 *ret_empty_directories = NULL;
2195 return 0;
2196 }
2197
2198 bind_mounts = new(BindMount, n);
2199 if (!bind_mounts)
2200 return -ENOMEM;
2201
a8cabc61 2202 for (i = 0; i < context->n_bind_mounts; i++) {
6c47cd7d
LP
2203 BindMount *item = context->bind_mounts + i;
2204 char *s, *d;
2205
2206 s = strdup(item->source);
2207 if (!s) {
2208 r = -ENOMEM;
2209 goto finish;
2210 }
2211
2212 d = strdup(item->destination);
2213 if (!d) {
2214 free(s);
2215 r = -ENOMEM;
2216 goto finish;
2217 }
2218
2219 bind_mounts[h++] = (BindMount) {
2220 .source = s,
2221 .destination = d,
2222 .read_only = item->read_only,
2223 .recursive = item->recursive,
2224 .ignore_enoent = item->ignore_enoent,
2225 };
2226 }
2227
2228 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2229 char **suffix;
2230
2231 if (!params->prefix[t])
2232 continue;
2233
2234 if (strv_isempty(context->directories[t].paths))
2235 continue;
2236
8092a48c
YW
2237 if (context->dynamic_user &&
2238 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
6c47cd7d
LP
2239 char *private_root;
2240
2241 /* So this is for a dynamic user, and we need to make sure the process can access its own
2242 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2243 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2244
2245 private_root = strjoin(params->prefix[t], "/private");
2246 if (!private_root) {
2247 r = -ENOMEM;
2248 goto finish;
2249 }
2250
2251 r = strv_consume(&empty_directories, private_root);
2252 if (r < 0) {
2253 r = -ENOMEM;
2254 goto finish;
2255 }
2256 }
2257
2258 STRV_FOREACH(suffix, context->directories[t].paths) {
2259 char *s, *d;
2260
8092a48c
YW
2261 if (context->dynamic_user &&
2262 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
6c47cd7d
LP
2263 s = strjoin(params->prefix[t], "/private/", *suffix);
2264 else
2265 s = strjoin(params->prefix[t], "/", *suffix);
2266 if (!s) {
2267 r = -ENOMEM;
2268 goto finish;
2269 }
2270
2271 d = strdup(s);
2272 if (!d) {
2273 free(s);
2274 r = -ENOMEM;
2275 goto finish;
2276 }
2277
2278 bind_mounts[h++] = (BindMount) {
2279 .source = s,
2280 .destination = d,
2281 .read_only = false,
2282 .recursive = true,
2283 .ignore_enoent = false,
2284 };
2285 }
2286 }
2287
2288 assert(h == n);
2289
2290 *ret_bind_mounts = bind_mounts;
2291 *ret_n_bind_mounts = n;
2292 *ret_empty_directories = empty_directories;
2293
2294 empty_directories = NULL;
2295
2296 return (int) n;
2297
2298finish:
2299 bind_mount_free_many(bind_mounts, h);
2300 return r;
2301}
2302
6818c54c
LP
2303static int apply_mount_namespace(
2304 Unit *u,
2305 ExecCommand *command,
2306 const ExecContext *context,
2307 const ExecParameters *params,
2308 ExecRuntime *runtime) {
2309
7bcef4ef 2310 _cleanup_strv_free_ char **empty_directories = NULL;
93c6bb51 2311 char *tmp = NULL, *var = NULL;
915e6d16 2312 const char *root_dir = NULL, *root_image = NULL;
bb0ff3fb 2313 NamespaceInfo ns_info = {
af964954 2314 .ignore_protect_paths = false,
93c6bb51
DH
2315 .private_dev = context->private_devices,
2316 .protect_control_groups = context->protect_control_groups,
2317 .protect_kernel_tunables = context->protect_kernel_tunables,
2318 .protect_kernel_modules = context->protect_kernel_modules,
5d997827 2319 .mount_apivfs = context->mount_apivfs,
93c6bb51 2320 };
165a31c0 2321 bool needs_sandboxing;
6c47cd7d
LP
2322 BindMount *bind_mounts = NULL;
2323 unsigned n_bind_mounts = 0;
6818c54c 2324 int r;
93c6bb51 2325
2b3c1b9e
DH
2326 assert(context);
2327
93c6bb51
DH
2328 /* The runtime struct only contains the parent of the private /tmp,
2329 * which is non-accessible to world users. Inside of it there's a /tmp
2330 * that is sticky, and that's the one we want to use here. */
2331
2332 if (context->private_tmp && runtime) {
2333 if (runtime->tmp_dir)
2334 tmp = strjoina(runtime->tmp_dir, "/tmp");
2335 if (runtime->var_tmp_dir)
2336 var = strjoina(runtime->var_tmp_dir, "/tmp");
2337 }
2338
915e6d16
LP
2339 if (params->flags & EXEC_APPLY_CHROOT) {
2340 root_image = context->root_image;
2341
2342 if (!root_image)
2343 root_dir = context->root_directory;
2344 }
93c6bb51 2345
6c47cd7d
LP
2346 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2347 if (r < 0)
2348 return r;
2349
af964954
DH
2350 /*
2351 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2352 * sandbox info, otherwise enforce it, don't ignore protected paths and
2353 * fail if we are enable to apply the sandbox inside the mount namespace.
2354 */
2355 if (!context->dynamic_user && root_dir)
2356 ns_info.ignore_protect_paths = true;
2357
165a31c0 2358 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
6818c54c 2359
915e6d16 2360 r = setup_namespace(root_dir, root_image,
7bcef4ef 2361 &ns_info, context->read_write_paths,
165a31c0
LP
2362 needs_sandboxing ? context->read_only_paths : NULL,
2363 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2364 empty_directories,
2365 bind_mounts,
2366 n_bind_mounts,
93c6bb51
DH
2367 tmp,
2368 var,
165a31c0
LP
2369 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2370 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16
LP
2371 context->mount_flags,
2372 DISSECT_IMAGE_DISCARD_ON_LOOP);
93c6bb51 2373
6c47cd7d
LP
2374 bind_mount_free_many(bind_mounts, n_bind_mounts);
2375
93c6bb51
DH
2376 /* If we couldn't set up the namespace this is probably due to a
2377 * missing capability. In this case, silently proceeed. */
2378 if (IN_SET(r, -EPERM, -EACCES)) {
93c6bb51 2379 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
86ffb325 2380 return 0;
93c6bb51
DH
2381 }
2382
2383 return r;
2384}
2385
915e6d16
LP
2386static int apply_working_directory(
2387 const ExecContext *context,
2388 const ExecParameters *params,
2389 const char *home,
376fecf6
LP
2390 const bool needs_mount_ns,
2391 int *exit_status) {
915e6d16 2392
6732edab 2393 const char *d, *wd;
2b3c1b9e
DH
2394
2395 assert(context);
376fecf6 2396 assert(exit_status);
2b3c1b9e 2397
6732edab
LP
2398 if (context->working_directory_home) {
2399
376fecf6
LP
2400 if (!home) {
2401 *exit_status = EXIT_CHDIR;
6732edab 2402 return -ENXIO;
376fecf6 2403 }
6732edab 2404
2b3c1b9e 2405 wd = home;
6732edab
LP
2406
2407 } else if (context->working_directory)
2b3c1b9e
DH
2408 wd = context->working_directory;
2409 else
2410 wd = "/";
e7f1e7c6
DH
2411
2412 if (params->flags & EXEC_APPLY_CHROOT) {
2413 if (!needs_mount_ns && context->root_directory)
376fecf6
LP
2414 if (chroot(context->root_directory) < 0) {
2415 *exit_status = EXIT_CHROOT;
e7f1e7c6 2416 return -errno;
376fecf6 2417 }
e7f1e7c6 2418
2b3c1b9e
DH
2419 d = wd;
2420 } else
3b0e5bb5 2421 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2422
376fecf6
LP
2423 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2424 *exit_status = EXIT_CHDIR;
2b3c1b9e 2425 return -errno;
376fecf6 2426 }
e7f1e7c6
DH
2427
2428 return 0;
2429}
2430
b1edf445
LP
2431static int setup_keyring(
2432 Unit *u,
2433 const ExecContext *context,
2434 const ExecParameters *p,
2435 uid_t uid, gid_t gid) {
2436
74dd6b51 2437 key_serial_t keyring;
b1edf445 2438 int r;
74dd6b51
LP
2439
2440 assert(u);
b1edf445 2441 assert(context);
74dd6b51
LP
2442 assert(p);
2443
2444 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2445 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2446 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2447 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2448 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2449 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2450
2451 if (!(p->flags & EXEC_NEW_KEYRING))
2452 return 0;
2453
b1edf445
LP
2454 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2455 return 0;
2456
74dd6b51
LP
2457 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2458 if (keyring == -1) {
2459 if (errno == ENOSYS)
8002fb97 2460 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2461 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2462 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2463 else if (errno == EDQUOT)
8002fb97 2464 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2465 else
8002fb97 2466 return log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51
LP
2467
2468 return 0;
2469 }
2470
b3415f5d
LP
2471 /* Populate they keyring with the invocation ID by default. */
2472 if (!sd_id128_is_null(u->invocation_id)) {
2473 key_serial_t key;
2474
2475 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2476 if (key == -1)
8002fb97 2477 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2478 else {
2479 if (keyctl(KEYCTL_SETPERM, key,
2480 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2481 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
8002fb97 2482 return log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2483 }
2484 }
2485
74dd6b51
LP
2486 /* And now, make the keyring owned by the service's user */
2487 if (uid_is_valid(uid) || gid_is_valid(gid))
2488 if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0)
8002fb97 2489 return log_unit_error_errno(u, errno, "Failed to change ownership of session keyring: %m");
74dd6b51 2490
b1edf445
LP
2491 /* When requested link the user keyring into the session keyring. */
2492 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2493 uid_t saved_uid;
2494 gid_t saved_gid;
2495
2496 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things
2497 * set up properly by the kernel. If we don't do that then we can't create it atomically, and that
2498 * sucks for parallel execution. This mimics what pam_keyinit does, too.*/
2499
2500 saved_uid = getuid();
2501 saved_gid = getgid();
2502
2503 if (gid_is_valid(gid) && gid != saved_gid) {
2504 if (setregid(gid, -1) < 0)
8002fb97 2505 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
b1edf445
LP
2506 }
2507
2508 if (uid_is_valid(uid) && uid != saved_uid) {
2509 if (setreuid(uid, -1) < 0) {
2510 (void) setregid(saved_gid, -1);
8002fb97 2511 return log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
b1edf445
LP
2512 }
2513 }
2514
2515 if (keyctl(KEYCTL_LINK,
2516 KEY_SPEC_USER_KEYRING,
2517 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2518
2519 r = -errno;
2520
2521 (void) setreuid(saved_uid, -1);
2522 (void) setregid(saved_gid, -1);
2523
8002fb97 2524 return log_unit_error_errno(u, r, "Failed to link user keyring into session keyring: %m");
b1edf445
LP
2525 }
2526
2527 if (uid_is_valid(uid) && uid != saved_uid) {
2528 if (setreuid(saved_uid, -1) < 0) {
2529 (void) setregid(saved_gid, -1);
8002fb97 2530 return log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
b1edf445
LP
2531 }
2532 }
2533
2534 if (gid_is_valid(gid) && gid != saved_gid) {
2535 if (setregid(saved_gid, -1) < 0)
8002fb97 2536 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
b1edf445 2537 }
61ceaea5 2538 }
b1edf445 2539
74dd6b51
LP
2540 return 0;
2541}
2542
29206d46
LP
2543static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
2544 assert(array);
2545 assert(n);
2546
2547 if (!pair)
2548 return;
2549
2550 if (pair[0] >= 0)
2551 array[(*n)++] = pair[0];
2552 if (pair[1] >= 0)
2553 array[(*n)++] = pair[1];
2554}
2555
a34ceba6
LP
2556static int close_remaining_fds(
2557 const ExecParameters *params,
2558 ExecRuntime *runtime,
29206d46 2559 DynamicCreds *dcreds,
00d9ef85 2560 int user_lookup_fd,
a34ceba6
LP
2561 int socket_fd,
2562 int *fds, unsigned n_fds) {
2563
2564 unsigned n_dont_close = 0;
00d9ef85 2565 int dont_close[n_fds + 12];
a34ceba6
LP
2566
2567 assert(params);
2568
2569 if (params->stdin_fd >= 0)
2570 dont_close[n_dont_close++] = params->stdin_fd;
2571 if (params->stdout_fd >= 0)
2572 dont_close[n_dont_close++] = params->stdout_fd;
2573 if (params->stderr_fd >= 0)
2574 dont_close[n_dont_close++] = params->stderr_fd;
2575
2576 if (socket_fd >= 0)
2577 dont_close[n_dont_close++] = socket_fd;
2578 if (n_fds > 0) {
2579 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2580 n_dont_close += n_fds;
2581 }
2582
29206d46
LP
2583 if (runtime)
2584 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2585
2586 if (dcreds) {
2587 if (dcreds->user)
2588 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2589 if (dcreds->group)
2590 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2591 }
2592
00d9ef85
LP
2593 if (user_lookup_fd >= 0)
2594 dont_close[n_dont_close++] = user_lookup_fd;
2595
a34ceba6
LP
2596 return close_all_fds(dont_close, n_dont_close);
2597}
2598
00d9ef85
LP
2599static int send_user_lookup(
2600 Unit *unit,
2601 int user_lookup_fd,
2602 uid_t uid,
2603 gid_t gid) {
2604
2605 assert(unit);
2606
2607 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2608 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2609 * specified. */
2610
2611 if (user_lookup_fd < 0)
2612 return 0;
2613
2614 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2615 return 0;
2616
2617 if (writev(user_lookup_fd,
2618 (struct iovec[]) {
e6a7ec4b
LP
2619 IOVEC_INIT(&uid, sizeof(uid)),
2620 IOVEC_INIT(&gid, sizeof(gid)),
2621 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2622 return -errno;
2623
2624 return 0;
2625}
2626
6732edab
LP
2627static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2628 int r;
2629
2630 assert(c);
2631 assert(home);
2632 assert(buf);
2633
2634 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2635
2636 if (*home)
2637 return 0;
2638
2639 if (!c->working_directory_home)
2640 return 0;
2641
2642 if (uid == 0) {
2643 /* Hardcode /root as home directory for UID 0 */
2644 *home = "/root";
2645 return 1;
2646 }
2647
2648 r = get_home_dir(buf);
2649 if (r < 0)
2650 return r;
2651
2652 *home = *buf;
2653 return 1;
2654}
2655
da50b85a
LP
2656static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2657 _cleanup_strv_free_ char ** list = NULL;
2658 ExecDirectoryType t;
2659 int r;
2660
2661 assert(c);
2662 assert(p);
2663 assert(ret);
2664
2665 assert(c->dynamic_user);
2666
2667 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2668 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2669 * directories. */
2670
2671 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2672 char **i;
2673
2674 if (t == EXEC_DIRECTORY_CONFIGURATION)
2675 continue;
2676
2677 if (!p->prefix[t])
2678 continue;
2679
2680 STRV_FOREACH(i, c->directories[t].paths) {
2681 char *e;
2682
8092a48c
YW
2683 if (t == EXEC_DIRECTORY_RUNTIME)
2684 e = strjoin(p->prefix[t], "/", *i);
2685 else
2686 e = strjoin(p->prefix[t], "/private/", *i);
da50b85a
LP
2687 if (!e)
2688 return -ENOMEM;
2689
2690 r = strv_consume(&list, e);
2691 if (r < 0)
2692 return r;
2693 }
2694 }
2695
2696 *ret = list;
2697 list = NULL;
2698
2699 return 0;
2700}
2701
ff0af2a1 2702static int exec_child(
f2341e0a 2703 Unit *unit,
ff0af2a1
LP
2704 ExecCommand *command,
2705 const ExecContext *context,
2706 const ExecParameters *params,
2707 ExecRuntime *runtime,
29206d46 2708 DynamicCreds *dcreds,
ff0af2a1
LP
2709 char **argv,
2710 int socket_fd,
52c239d7 2711 int named_iofds[3],
4c47affc
FB
2712 int *fds,
2713 unsigned n_storage_fds,
9b141911 2714 unsigned n_socket_fds,
ff0af2a1 2715 char **files_env,
00d9ef85 2716 int user_lookup_fd,
12145637 2717 int *exit_status) {
d35fbf6b 2718
2065ca69 2719 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
6732edab 2720 _cleanup_free_ char *mac_selinux_context_net = NULL, *home_buffer = NULL;
4d885bd3
DH
2721 _cleanup_free_ gid_t *supplementary_gids = NULL;
2722 const char *username = NULL, *groupname = NULL;
2b3c1b9e 2723 const char *home = NULL, *shell = NULL;
7bce046b
LP
2724 dev_t journal_stream_dev = 0;
2725 ino_t journal_stream_ino = 0;
165a31c0
LP
2726 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2727 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2728 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2729 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 2730#if HAVE_SELINUX
43b1f709 2731 bool use_selinux = false;
ecfbc84f 2732#endif
f9fa32f0 2733#if ENABLE_SMACK
43b1f709 2734 bool use_smack = false;
ecfbc84f 2735#endif
349cc4a5 2736#if HAVE_APPARMOR
43b1f709 2737 bool use_apparmor = false;
ecfbc84f 2738#endif
fed1e721
LP
2739 uid_t uid = UID_INVALID;
2740 gid_t gid = GID_INVALID;
4d885bd3 2741 int i, r, ngids = 0;
4c47affc 2742 unsigned n_fds;
3536f49e 2743 ExecDirectoryType dt;
165a31c0 2744 int secure_bits;
034c6ed7 2745
f2341e0a 2746 assert(unit);
5cb5a6ff
LP
2747 assert(command);
2748 assert(context);
d35fbf6b 2749 assert(params);
ff0af2a1 2750 assert(exit_status);
d35fbf6b
DM
2751
2752 rename_process_from_path(command->path);
2753
2754 /* We reset exactly these signals, since they are the
2755 * only ones we set to SIG_IGN in the main daemon. All
2756 * others we leave untouched because we set them to
2757 * SIG_DFL or a valid handler initially, both of which
2758 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2759 (void) default_signals(SIGNALS_CRASH_HANDLER,
2760 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2761
2762 if (context->ignore_sigpipe)
ce30c8dc 2763 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2764
ff0af2a1
LP
2765 r = reset_signal_mask();
2766 if (r < 0) {
2767 *exit_status = EXIT_SIGNAL_MASK;
12145637 2768 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 2769 }
034c6ed7 2770
d35fbf6b
DM
2771 if (params->idle_pipe)
2772 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2773
2c027c62
LP
2774 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2775 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2776 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2777 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 2778
d35fbf6b 2779 log_forget_fds();
2c027c62 2780 log_set_open_when_needed(true);
4f2d528d 2781
40a80078
LP
2782 /* In case anything used libc syslog(), close this here, too */
2783 closelog();
2784
4c47affc 2785 n_fds = n_storage_fds + n_socket_fds;
00d9ef85 2786 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
ff0af2a1
LP
2787 if (r < 0) {
2788 *exit_status = EXIT_FDS;
12145637 2789 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
2790 }
2791
d35fbf6b
DM
2792 if (!context->same_pgrp)
2793 if (setsid() < 0) {
ff0af2a1 2794 *exit_status = EXIT_SETSID;
12145637 2795 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 2796 }
9e2f7c11 2797
1e22b5cd 2798 exec_context_tty_reset(context, params);
d35fbf6b 2799
c891efaf 2800 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 2801 const char *vc = params->confirm_spawn;
3b20f877
FB
2802 _cleanup_free_ char *cmdline = NULL;
2803
2804 cmdline = exec_command_line(argv);
2805 if (!cmdline) {
0460aa5c 2806 *exit_status = EXIT_MEMORY;
12145637 2807 return log_oom();
3b20f877 2808 }
d35fbf6b 2809
eedf223a 2810 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
2811 if (r != CONFIRM_EXECUTE) {
2812 if (r == CONFIRM_PRETEND_SUCCESS) {
2813 *exit_status = EXIT_SUCCESS;
2814 return 0;
2815 }
ff0af2a1 2816 *exit_status = EXIT_CONFIRM;
12145637 2817 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 2818 return -ECANCELED;
d35fbf6b
DM
2819 }
2820 }
1a63a750 2821
29206d46 2822 if (context->dynamic_user && dcreds) {
da50b85a 2823 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 2824
409093fe
LP
2825 /* Make sure we bypass our own NSS module for any NSS checks */
2826 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2827 *exit_status = EXIT_USER;
12145637 2828 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
2829 }
2830
da50b85a
LP
2831 r = compile_suggested_paths(context, params, &suggested_paths);
2832 if (r < 0) {
2833 *exit_status = EXIT_MEMORY;
2834 return log_oom();
2835 }
2836
2837 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
2838 if (r < 0) {
2839 *exit_status = EXIT_USER;
e2b0cc34
YW
2840 if (r == -EILSEQ) {
2841 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
2842 return -EOPNOTSUPP;
2843 }
12145637 2844 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 2845 }
524daa8c 2846
70dd455c 2847 if (!uid_is_valid(uid)) {
29206d46 2848 *exit_status = EXIT_USER;
12145637 2849 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
2850 return -ESRCH;
2851 }
2852
2853 if (!gid_is_valid(gid)) {
2854 *exit_status = EXIT_USER;
12145637 2855 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
2856 return -ESRCH;
2857 }
5bc7452b 2858
29206d46
LP
2859 if (dcreds->user)
2860 username = dcreds->user->name;
2861
2862 } else {
4d885bd3
DH
2863 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2864 if (r < 0) {
2865 *exit_status = EXIT_USER;
12145637 2866 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 2867 }
5bc7452b 2868
4d885bd3
DH
2869 r = get_fixed_group(context, &groupname, &gid);
2870 if (r < 0) {
2871 *exit_status = EXIT_GROUP;
12145637 2872 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 2873 }
cdc5d5c5 2874 }
29206d46 2875
cdc5d5c5
DH
2876 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2877 r = get_supplementary_groups(context, username, groupname, gid,
2878 &supplementary_gids, &ngids);
2879 if (r < 0) {
2880 *exit_status = EXIT_GROUP;
12145637 2881 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 2882 }
5bc7452b 2883
00d9ef85
LP
2884 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2885 if (r < 0) {
2886 *exit_status = EXIT_USER;
12145637 2887 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
2888 }
2889
2890 user_lookup_fd = safe_close(user_lookup_fd);
2891
6732edab
LP
2892 r = acquire_home(context, uid, &home, &home_buffer);
2893 if (r < 0) {
2894 *exit_status = EXIT_CHDIR;
12145637 2895 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
2896 }
2897
d35fbf6b
DM
2898 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2899 * must sure to drop O_NONBLOCK */
2900 if (socket_fd >= 0)
a34ceba6 2901 (void) fd_nonblock(socket_fd, false);
acbb0225 2902
52c239d7 2903 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
2904 if (r < 0) {
2905 *exit_status = EXIT_STDIN;
12145637 2906 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 2907 }
034c6ed7 2908
52c239d7 2909 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2910 if (r < 0) {
2911 *exit_status = EXIT_STDOUT;
12145637 2912 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
2913 }
2914
52c239d7 2915 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2916 if (r < 0) {
2917 *exit_status = EXIT_STDERR;
12145637 2918 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
2919 }
2920
2921 if (params->cgroup_path) {
ff0af2a1
LP
2922 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2923 if (r < 0) {
2924 *exit_status = EXIT_CGROUP;
12145637 2925 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path);
309bff19 2926 }
d35fbf6b 2927 }
309bff19 2928
d35fbf6b 2929 if (context->oom_score_adjust_set) {
d5243d62 2930 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
f2b68789 2931
d5243d62
LP
2932 /* When we can't make this change due to EPERM, then
2933 * let's silently skip over it. User namespaces
2934 * prohibit write access to this file, and we
2935 * shouldn't trip up over that. */
613b411c 2936
d5243d62 2937 sprintf(t, "%i", context->oom_score_adjust);
ad118bda 2938 r = write_string_file("/proc/self/oom_score_adj", t, 0);
12145637 2939 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 2940 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 2941 else if (r < 0) {
ff0af2a1 2942 *exit_status = EXIT_OOM_ADJUST;
12145637 2943 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 2944 }
d35fbf6b
DM
2945 }
2946
2947 if (context->nice_set)
2948 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 2949 *exit_status = EXIT_NICE;
12145637 2950 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
613b411c
LP
2951 }
2952
d35fbf6b
DM
2953 if (context->cpu_sched_set) {
2954 struct sched_param param = {
2955 .sched_priority = context->cpu_sched_priority,
2956 };
2957
ff0af2a1
LP
2958 r = sched_setscheduler(0,
2959 context->cpu_sched_policy |
2960 (context->cpu_sched_reset_on_fork ?
2961 SCHED_RESET_ON_FORK : 0),
2962 &param);
2963 if (r < 0) {
2964 *exit_status = EXIT_SETSCHEDULER;
12145637 2965 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 2966 }
d35fbf6b 2967 }
fc9b2a84 2968
d35fbf6b
DM
2969 if (context->cpuset)
2970 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
ff0af2a1 2971 *exit_status = EXIT_CPUAFFINITY;
12145637 2972 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7
LP
2973 }
2974
d35fbf6b
DM
2975 if (context->ioprio_set)
2976 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 2977 *exit_status = EXIT_IOPRIO;
12145637 2978 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 2979 }
da726a4d 2980
d35fbf6b
DM
2981 if (context->timer_slack_nsec != NSEC_INFINITY)
2982 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 2983 *exit_status = EXIT_TIMERSLACK;
12145637 2984 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 2985 }
9eba9da4 2986
21022b9d
LP
2987 if (context->personality != PERSONALITY_INVALID) {
2988 r = safe_personality(context->personality);
2989 if (r < 0) {
ff0af2a1 2990 *exit_status = EXIT_PERSONALITY;
12145637 2991 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 2992 }
21022b9d 2993 }
94f04347 2994
d35fbf6b 2995 if (context->utmp_id)
df0ff127 2996 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 2997 context->tty_path,
023a4f67
LP
2998 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
2999 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3000 USER_PROCESS,
6a93917d 3001 username);
d35fbf6b 3002
e0d2adfd 3003 if (context->user) {
ff0af2a1
LP
3004 r = chown_terminal(STDIN_FILENO, uid);
3005 if (r < 0) {
3006 *exit_status = EXIT_STDIN;
12145637 3007 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 3008 }
d35fbf6b 3009 }
8e274523 3010
a931ad47
LP
3011 /* If delegation is enabled we'll pass ownership of the cgroup
3012 * (but only in systemd's own controller hierarchy!) to the
3013 * user of the new process. */
584b8688 3014 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
ff0af2a1
LP
3015 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
3016 if (r < 0) {
3017 *exit_status = EXIT_CGROUP;
12145637 3018 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
d35fbf6b 3019 }
034c6ed7 3020
ff0af2a1
LP
3021 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
3022 if (r < 0) {
3023 *exit_status = EXIT_CGROUP;
12145637 3024 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 3025 }
d35fbf6b 3026 }
034c6ed7 3027
72fd1768 3028 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 3029 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
3030 if (r < 0)
3031 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 3032 }
94f04347 3033
7bce046b 3034 r = build_environment(
fd63e712 3035 unit,
7bce046b
LP
3036 context,
3037 params,
3038 n_fds,
3039 home,
3040 username,
3041 shell,
3042 journal_stream_dev,
3043 journal_stream_ino,
3044 &our_env);
2065ca69
JW
3045 if (r < 0) {
3046 *exit_status = EXIT_MEMORY;
12145637 3047 return log_oom();
2065ca69
JW
3048 }
3049
3050 r = build_pass_environment(context, &pass_env);
3051 if (r < 0) {
3052 *exit_status = EXIT_MEMORY;
12145637 3053 return log_oom();
2065ca69
JW
3054 }
3055
3056 accum_env = strv_env_merge(5,
3057 params->environment,
3058 our_env,
3059 pass_env,
3060 context->environment,
3061 files_env,
3062 NULL);
3063 if (!accum_env) {
3064 *exit_status = EXIT_MEMORY;
12145637 3065 return log_oom();
2065ca69 3066 }
1280503b 3067 accum_env = strv_env_clean(accum_env);
2065ca69 3068
096424d1 3069 (void) umask(context->umask);
b213e1c1 3070
b1edf445 3071 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
3072 if (r < 0) {
3073 *exit_status = EXIT_KEYRING;
12145637 3074 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
3075 }
3076
165a31c0 3077 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 3078 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 3079
165a31c0
LP
3080 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3081 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 3082
165a31c0
LP
3083 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3084 if (needs_ambient_hack)
3085 needs_setuid = false;
3086 else
3087 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3088
3089 if (needs_sandboxing) {
7f18ef0a
FK
3090 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3091 * present. The actual MAC context application will happen later, as late as possible, to avoid
3092 * impacting our own code paths. */
3093
349cc4a5 3094#if HAVE_SELINUX
43b1f709 3095 use_selinux = mac_selinux_use();
7f18ef0a 3096#endif
f9fa32f0 3097#if ENABLE_SMACK
43b1f709 3098 use_smack = mac_smack_use();
7f18ef0a 3099#endif
349cc4a5 3100#if HAVE_APPARMOR
43b1f709 3101 use_apparmor = mac_apparmor_use();
7f18ef0a 3102#endif
165a31c0 3103 }
7f18ef0a 3104
165a31c0
LP
3105 if (needs_setuid) {
3106 if (context->pam_name && username) {
3107 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3108 if (r < 0) {
3109 *exit_status = EXIT_PAM;
12145637 3110 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0
LP
3111 }
3112 }
b213e1c1 3113 }
ac45f971 3114
d35fbf6b 3115 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
6e2d7c4f
MS
3116 if (ns_type_supported(NAMESPACE_NET)) {
3117 r = setup_netns(runtime->netns_storage_socket);
3118 if (r < 0) {
3119 *exit_status = EXIT_NETWORK;
3120 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3121 }
3122 } else
3123 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3124 }
169c1bda 3125
ee818b89 3126 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3127 if (needs_mount_namespace) {
6818c54c 3128 r = apply_mount_namespace(unit, command, context, params, runtime);
3fbe8dbe
LP
3129 if (r < 0) {
3130 *exit_status = EXIT_NAMESPACE;
12145637 3131 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing: %m");
3fbe8dbe 3132 }
d35fbf6b 3133 }
81a2b7ce 3134
50b3dfb9 3135 /* Apply just after mount namespace setup */
376fecf6 3136 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
12145637
LP
3137 if (r < 0)
3138 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
50b3dfb9 3139
bbeea271 3140 /* Drop groups as early as possbile */
165a31c0 3141 if (needs_setuid) {
709dbeac 3142 r = enforce_groups(gid, supplementary_gids, ngids);
096424d1
LP
3143 if (r < 0) {
3144 *exit_status = EXIT_GROUP;
12145637 3145 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3146 }
165a31c0 3147 }
096424d1 3148
165a31c0 3149 if (needs_sandboxing) {
349cc4a5 3150#if HAVE_SELINUX
43b1f709 3151 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
3152 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3153 if (r < 0) {
3154 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3155 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
937ccce9 3156 }
9008e1ac 3157 }
9008e1ac
MS
3158#endif
3159
937ccce9
LP
3160 if (context->private_users) {
3161 r = setup_private_users(uid, gid);
3162 if (r < 0) {
3163 *exit_status = EXIT_USER;
12145637 3164 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
937ccce9 3165 }
d251207d
LP
3166 }
3167 }
3168
165a31c0
LP
3169 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
3170 * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
3171 * was needed to upload the policy and can now be closed as well. */
ff0af2a1
LP
3172 r = close_all_fds(fds, n_fds);
3173 if (r >= 0)
3174 r = shift_fds(fds, n_fds);
3175 if (r >= 0)
4c47affc 3176 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
ff0af2a1
LP
3177 if (r < 0) {
3178 *exit_status = EXIT_FDS;
12145637 3179 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3180 }
e66cf1a3 3181
165a31c0 3182 secure_bits = context->secure_bits;
e66cf1a3 3183
165a31c0
LP
3184 if (needs_sandboxing) {
3185 uint64_t bset;
755d4b67 3186
d35fbf6b 3187 for (i = 0; i < _RLIMIT_MAX; i++) {
03857c43 3188
d35fbf6b
DM
3189 if (!context->rlimit[i])
3190 continue;
3191
03857c43
LP
3192 r = setrlimit_closest(i, context->rlimit[i]);
3193 if (r < 0) {
ff0af2a1 3194 *exit_status = EXIT_LIMITS;
12145637 3195 return log_unit_error_errno(unit, r, "Failed to adjust resource limit %s: %m", rlimit_to_string(i));
e66cf1a3
LP
3196 }
3197 }
3198
f4170c67
LP
3199 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
3200 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3201 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3202 *exit_status = EXIT_LIMITS;
12145637 3203 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3204 }
3205 }
3206
165a31c0
LP
3207 bset = context->capability_bounding_set;
3208 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3209 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3210 * instead of us doing that */
3211 if (needs_ambient_hack)
3212 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3213 (UINT64_C(1) << CAP_SETUID) |
3214 (UINT64_C(1) << CAP_SETGID);
3215
3216 if (!cap_test_all(bset)) {
3217 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3218 if (r < 0) {
3219 *exit_status = EXIT_CAPABILITIES;
12145637 3220 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3221 }
4c2630eb 3222 }
3b8bddde 3223
755d4b67
IP
3224 /* This is done before enforce_user, but ambient set
3225 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
3226 if (!needs_ambient_hack &&
3227 context->capability_ambient_set != 0) {
755d4b67
IP
3228 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3229 if (r < 0) {
3230 *exit_status = EXIT_CAPABILITIES;
12145637 3231 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3232 }
755d4b67 3233 }
165a31c0 3234 }
755d4b67 3235
165a31c0 3236 if (needs_setuid) {
d35fbf6b 3237 if (context->user) {
ff0af2a1
LP
3238 r = enforce_user(context, uid);
3239 if (r < 0) {
3240 *exit_status = EXIT_USER;
12145637 3241 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3242 }
165a31c0
LP
3243
3244 if (!needs_ambient_hack &&
3245 context->capability_ambient_set != 0) {
755d4b67
IP
3246
3247 /* Fix the ambient capabilities after user change. */
3248 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3249 if (r < 0) {
3250 *exit_status = EXIT_CAPABILITIES;
12145637 3251 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3252 }
3253
3254 /* If we were asked to change user and ambient capabilities
3255 * were requested, we had to add keep-caps to the securebits
3256 * so that we would maintain the inherited capability set
3257 * through the setresuid(). Make sure that the bit is added
3258 * also to the context secure_bits so that we don't try to
3259 * drop the bit away next. */
3260
7f508f2c 3261 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3262 }
5b6319dc 3263 }
165a31c0 3264 }
d35fbf6b 3265
165a31c0 3266 if (needs_sandboxing) {
5cd9cd35
LP
3267 /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
3268 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3269 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3270 * are restricted. */
3271
349cc4a5 3272#if HAVE_SELINUX
43b1f709 3273 if (use_selinux) {
5cd9cd35
LP
3274 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3275
3276 if (exec_context) {
3277 r = setexeccon(exec_context);
3278 if (r < 0) {
3279 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3280 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3281 }
3282 }
3283 }
3284#endif
3285
f9fa32f0 3286#if ENABLE_SMACK
43b1f709 3287 if (use_smack) {
7f18ef0a
FK
3288 r = setup_smack(context, command);
3289 if (r < 0) {
3290 *exit_status = EXIT_SMACK_PROCESS_LABEL;
12145637 3291 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
7f18ef0a 3292 }
5cd9cd35 3293 }
7f18ef0a 3294#endif
5cd9cd35 3295
349cc4a5 3296#if HAVE_APPARMOR
43b1f709 3297 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3298 r = aa_change_onexec(context->apparmor_profile);
3299 if (r < 0 && !context->apparmor_profile_ignore) {
3300 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3301 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3302 }
3303 }
3304#endif
3305
165a31c0
LP
3306 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3307 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3308 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3309 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3310 *exit_status = EXIT_SECUREBITS;
12145637 3311 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3312 }
5b6319dc 3313
59eeb84b 3314 if (context_has_no_new_privileges(context))
d35fbf6b 3315 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3316 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3317 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3318 }
3319
349cc4a5 3320#if HAVE_SECCOMP
469830d1
LP
3321 r = apply_address_families(unit, context);
3322 if (r < 0) {
3323 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3324 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3325 }
04aa0cb9 3326
469830d1
LP
3327 r = apply_memory_deny_write_execute(unit, context);
3328 if (r < 0) {
3329 *exit_status = EXIT_SECCOMP;
12145637 3330 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3331 }
f4170c67 3332
469830d1
LP
3333 r = apply_restrict_realtime(unit, context);
3334 if (r < 0) {
3335 *exit_status = EXIT_SECCOMP;
12145637 3336 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3337 }
3338
add00535
LP
3339 r = apply_restrict_namespaces(unit, context);
3340 if (r < 0) {
3341 *exit_status = EXIT_SECCOMP;
12145637 3342 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3343 }
3344
469830d1
LP
3345 r = apply_protect_sysctl(unit, context);
3346 if (r < 0) {
3347 *exit_status = EXIT_SECCOMP;
12145637 3348 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3349 }
3350
469830d1
LP
3351 r = apply_protect_kernel_modules(unit, context);
3352 if (r < 0) {
3353 *exit_status = EXIT_SECCOMP;
12145637 3354 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3355 }
3356
469830d1
LP
3357 r = apply_private_devices(unit, context);
3358 if (r < 0) {
3359 *exit_status = EXIT_SECCOMP;
12145637 3360 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3361 }
3362
3363 r = apply_syscall_archs(unit, context);
3364 if (r < 0) {
3365 *exit_status = EXIT_SECCOMP;
12145637 3366 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3367 }
3368
78e864e5
TM
3369 r = apply_lock_personality(unit, context);
3370 if (r < 0) {
3371 *exit_status = EXIT_SECCOMP;
12145637 3372 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3373 }
3374
5cd9cd35
LP
3375 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3376 * by the filter as little as possible. */
165a31c0 3377 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3378 if (r < 0) {
3379 *exit_status = EXIT_SECCOMP;
12145637 3380 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3381 }
3382#endif
d35fbf6b 3383 }
034c6ed7 3384
00819cc1
LP
3385 if (!strv_isempty(context->unset_environment)) {
3386 char **ee = NULL;
3387
3388 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3389 if (!ee) {
3390 *exit_status = EXIT_MEMORY;
12145637 3391 return log_oom();
00819cc1
LP
3392 }
3393
3394 strv_free(accum_env);
3395 accum_env = ee;
3396 }
3397
2065ca69 3398 final_argv = replace_env_argv(argv, accum_env);
d35fbf6b 3399 if (!final_argv) {
ff0af2a1 3400 *exit_status = EXIT_MEMORY;
12145637 3401 return log_oom();
d35fbf6b 3402 }
034c6ed7 3403
553d2243 3404 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
d35fbf6b 3405 _cleanup_free_ char *line;
81a2b7ce 3406
d35fbf6b
DM
3407 line = exec_command_line(final_argv);
3408 if (line) {
f2341e0a 3409 log_struct(LOG_DEBUG,
f2341e0a
LP
3410 "EXECUTABLE=%s", command->path,
3411 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3412 LOG_UNIT_ID(unit),
f1c50bec 3413 LOG_UNIT_INVOCATION_ID(unit),
f2341e0a 3414 NULL);
d35fbf6b
DM
3415 }
3416 }
dd305ec9 3417
2065ca69 3418 execve(command->path, final_argv, accum_env);
12145637
LP
3419
3420 if (errno == ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3421
3422 log_struct_errno(LOG_INFO, errno,
3423 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3424 LOG_UNIT_ID(unit),
3425 LOG_UNIT_INVOCATION_ID(unit),
3426 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3427 command->path),
3428 "EXECUTABLE=%s", command->path,
3429 NULL);
3430
3431 return 0;
3432 }
3433
ff0af2a1 3434 *exit_status = EXIT_EXEC;
12145637 3435 return log_unit_error_errno(unit, errno, "Failed to execute command: %m");
d35fbf6b 3436}
81a2b7ce 3437
f2341e0a
LP
3438int exec_spawn(Unit *unit,
3439 ExecCommand *command,
d35fbf6b
DM
3440 const ExecContext *context,
3441 const ExecParameters *params,
3442 ExecRuntime *runtime,
29206d46 3443 DynamicCreds *dcreds,
d35fbf6b 3444 pid_t *ret) {
8351ceae 3445
d35fbf6b 3446 _cleanup_strv_free_ char **files_env = NULL;
9b141911 3447 int *fds = NULL;
4c47affc 3448 unsigned n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1
LP
3449 _cleanup_free_ char *line = NULL;
3450 int socket_fd, r;
52c239d7 3451 int named_iofds[3] = { -1, -1, -1 };
ff0af2a1 3452 char **argv;
d35fbf6b 3453 pid_t pid;
8351ceae 3454
f2341e0a 3455 assert(unit);
d35fbf6b
DM
3456 assert(command);
3457 assert(context);
3458 assert(ret);
3459 assert(params);
4c47affc 3460 assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
4298d0b5 3461
d35fbf6b
DM
3462 if (context->std_input == EXEC_INPUT_SOCKET ||
3463 context->std_output == EXEC_OUTPUT_SOCKET ||
3464 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3465
4c47affc 3466 if (params->n_socket_fds > 1) {
f2341e0a 3467 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3468 return -EINVAL;
ff0af2a1 3469 }
eef65bf3 3470
4c47affc 3471 if (params->n_socket_fds == 0) {
488ab41c
AA
3472 log_unit_error(unit, "Got no socket.");
3473 return -EINVAL;
3474 }
3475
d35fbf6b
DM
3476 socket_fd = params->fds[0];
3477 } else {
3478 socket_fd = -1;
3479 fds = params->fds;
4c47affc 3480 n_storage_fds = params->n_storage_fds;
9b141911 3481 n_socket_fds = params->n_socket_fds;
d35fbf6b 3482 }
94f04347 3483
52c239d7
LB
3484 r = exec_context_named_iofds(unit, context, params, named_iofds);
3485 if (r < 0)
3486 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3487
f2341e0a 3488 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3489 if (r < 0)
f2341e0a 3490 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3491
d35fbf6b 3492 argv = params->argv ?: command->argv;
d35fbf6b
DM
3493 line = exec_command_line(argv);
3494 if (!line)
3495 return log_oom();
fab56fc5 3496
f2341e0a 3497 log_struct(LOG_DEBUG,
f2341e0a
LP
3498 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3499 "EXECUTABLE=%s", command->path,
ba360bb0 3500 LOG_UNIT_ID(unit),
f1c50bec 3501 LOG_UNIT_INVOCATION_ID(unit),
f2341e0a 3502 NULL);
12145637 3503
d35fbf6b
DM
3504 pid = fork();
3505 if (pid < 0)
74129a12 3506 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3507
3508 if (pid == 0) {
12145637 3509 int exit_status = EXIT_SUCCESS;
ff0af2a1 3510
f2341e0a
LP
3511 r = exec_child(unit,
3512 command,
ff0af2a1
LP
3513 context,
3514 params,
3515 runtime,
29206d46 3516 dcreds,
ff0af2a1
LP
3517 argv,
3518 socket_fd,
52c239d7 3519 named_iofds,
4c47affc
FB
3520 fds,
3521 n_storage_fds,
9b141911 3522 n_socket_fds,
ff0af2a1 3523 files_env,
00d9ef85 3524 unit->manager->user_lookup_fds[1],
12145637
LP
3525 &exit_status);
3526
ff0af2a1 3527 if (r < 0) {
12145637
LP
3528 log_struct_errno(LOG_ERR, r,
3529 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3530 LOG_UNIT_ID(unit),
3531 LOG_UNIT_INVOCATION_ID(unit),
3532 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3533 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3534 command->path),
3535 "EXECUTABLE=%s", command->path,
3536 NULL);
4c2630eb
MS
3537 }
3538
ff0af2a1 3539 _exit(exit_status);
034c6ed7
LP
3540 }
3541
f2341e0a 3542 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3543
80876c20
LP
3544 /* We add the new process to the cgroup both in the child (so
3545 * that we can be sure that no user code is ever executed
3546 * outside of the cgroup) and in the parent (so that we can be
3547 * sure that when we kill the cgroup the process will be
3548 * killed too). */
d35fbf6b 3549 if (params->cgroup_path)
dd305ec9 3550 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
2da3263a 3551
b58b4116 3552 exec_status_start(&command->exec_status, pid);
9fb86720 3553
034c6ed7 3554 *ret = pid;
5cb5a6ff
LP
3555 return 0;
3556}
3557
034c6ed7 3558void exec_context_init(ExecContext *c) {
3536f49e
YW
3559 ExecDirectoryType i;
3560
034c6ed7
LP
3561 assert(c);
3562
4c12626c 3563 c->umask = 0022;
9eba9da4 3564 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3565 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3566 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3567 c->syslog_level_prefix = true;
353e12c2 3568 c->ignore_sigpipe = true;
3a43da28 3569 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3570 c->personality = PERSONALITY_INVALID;
72fd1768 3571 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3572 c->directories[i].mode = 0755;
a103496c 3573 c->capability_bounding_set = CAP_ALL;
add00535 3574 c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
d3070fbd 3575 c->log_level_max = -1;
034c6ed7
LP
3576}
3577
613b411c 3578void exec_context_done(ExecContext *c) {
3536f49e 3579 ExecDirectoryType i;
d3070fbd 3580 size_t l;
5cb5a6ff
LP
3581
3582 assert(c);
3583
6796073e
LP
3584 c->environment = strv_free(c->environment);
3585 c->environment_files = strv_free(c->environment_files);
b4c14404 3586 c->pass_environment = strv_free(c->pass_environment);
00819cc1 3587 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 3588
1f6b4113 3589 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
a1e58e8e 3590 c->rlimit[l] = mfree(c->rlimit[l]);
034c6ed7 3591
2038c3f5 3592 for (l = 0; l < 3; l++) {
52c239d7 3593 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
2038c3f5
LP
3594 c->stdio_file[l] = mfree(c->stdio_file[l]);
3595 }
52c239d7 3596
a1e58e8e
LP
3597 c->working_directory = mfree(c->working_directory);
3598 c->root_directory = mfree(c->root_directory);
915e6d16 3599 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3600 c->tty_path = mfree(c->tty_path);
3601 c->syslog_identifier = mfree(c->syslog_identifier);
3602 c->user = mfree(c->user);
3603 c->group = mfree(c->group);
034c6ed7 3604
6796073e 3605 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3606
a1e58e8e 3607 c->pam_name = mfree(c->pam_name);
5b6319dc 3608
2a624c36
AP
3609 c->read_only_paths = strv_free(c->read_only_paths);
3610 c->read_write_paths = strv_free(c->read_write_paths);
3611 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3612
d2d6c096
LP
3613 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
3614
82c121a4
LP
3615 if (c->cpuset)
3616 CPU_FREE(c->cpuset);
86a3475b 3617
a1e58e8e
LP
3618 c->utmp_id = mfree(c->utmp_id);
3619 c->selinux_context = mfree(c->selinux_context);
3620 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3621 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3622
8cfa775f 3623 c->syscall_filter = hashmap_free(c->syscall_filter);
525d3cc7
LP
3624 c->syscall_archs = set_free(c->syscall_archs);
3625 c->address_families = set_free(c->address_families);
e66cf1a3 3626
72fd1768 3627 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3628 c->directories[i].paths = strv_free(c->directories[i].paths);
d3070fbd
LP
3629
3630 c->log_level_max = -1;
3631
3632 exec_context_free_log_extra_fields(c);
08f3be7a
LP
3633
3634 c->stdin_data = mfree(c->stdin_data);
3635 c->stdin_data_size = 0;
e66cf1a3
LP
3636}
3637
3638int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
3639 char **i;
3640
3641 assert(c);
3642
3643 if (!runtime_prefix)
3644 return 0;
3645
3536f49e 3646 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
3647 _cleanup_free_ char *p;
3648
605405c6 3649 p = strjoin(runtime_prefix, "/", *i);
e66cf1a3
LP
3650 if (!p)
3651 return -ENOMEM;
3652
6c47cd7d 3653 /* We execute this synchronously, since we need to be sure this is gone when we start the service
e66cf1a3 3654 * next. */
c6878637 3655 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
3656 }
3657
3658 return 0;
5cb5a6ff
LP
3659}
3660
43d0fcbd
LP
3661void exec_command_done(ExecCommand *c) {
3662 assert(c);
3663
a1e58e8e 3664 c->path = mfree(c->path);
43d0fcbd 3665
6796073e 3666 c->argv = strv_free(c->argv);
43d0fcbd
LP
3667}
3668
3669void exec_command_done_array(ExecCommand *c, unsigned n) {
3670 unsigned i;
3671
3672 for (i = 0; i < n; i++)
3673 exec_command_done(c+i);
3674}
3675
f1acf85a 3676ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
3677 ExecCommand *i;
3678
3679 while ((i = c)) {
71fda00f 3680 LIST_REMOVE(command, c, i);
43d0fcbd 3681 exec_command_done(i);
5cb5a6ff
LP
3682 free(i);
3683 }
f1acf85a
ZJS
3684
3685 return NULL;
5cb5a6ff
LP
3686}
3687
034c6ed7
LP
3688void exec_command_free_array(ExecCommand **c, unsigned n) {
3689 unsigned i;
3690
f1acf85a
ZJS
3691 for (i = 0; i < n; i++)
3692 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
3693}
3694
039f0e70 3695typedef struct InvalidEnvInfo {
f2341e0a 3696 Unit *unit;
039f0e70
LP
3697 const char *path;
3698} InvalidEnvInfo;
3699
3700static void invalid_env(const char *p, void *userdata) {
3701 InvalidEnvInfo *info = userdata;
3702
f2341e0a 3703 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
3704}
3705
52c239d7
LB
3706const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3707 assert(c);
3708
3709 switch (fd_index) {
5073ff6b 3710
52c239d7
LB
3711 case STDIN_FILENO:
3712 if (c->std_input != EXEC_INPUT_NAMED_FD)
3713 return NULL;
5073ff6b 3714
52c239d7 3715 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
5073ff6b 3716
52c239d7
LB
3717 case STDOUT_FILENO:
3718 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3719 return NULL;
5073ff6b 3720
52c239d7 3721 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
5073ff6b 3722
52c239d7
LB
3723 case STDERR_FILENO:
3724 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3725 return NULL;
5073ff6b 3726
52c239d7 3727 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
5073ff6b 3728
52c239d7
LB
3729 default:
3730 return NULL;
3731 }
3732}
3733
3734int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
3735 unsigned i, targets;
56fbd561 3736 const char* stdio_fdname[3];
4c47affc 3737 unsigned n_fds;
52c239d7
LB
3738
3739 assert(c);
3740 assert(p);
3741
3742 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3743 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3744 (c->std_error == EXEC_OUTPUT_NAMED_FD);
3745
3746 for (i = 0; i < 3; i++)
3747 stdio_fdname[i] = exec_context_fdname(c, i);
3748
4c47affc
FB
3749 n_fds = p->n_storage_fds + p->n_socket_fds;
3750
3751 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
3752 if (named_iofds[STDIN_FILENO] < 0 &&
3753 c->std_input == EXEC_INPUT_NAMED_FD &&
3754 stdio_fdname[STDIN_FILENO] &&
3755 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3756
52c239d7
LB
3757 named_iofds[STDIN_FILENO] = p->fds[i];
3758 targets--;
56fbd561
ZJS
3759
3760 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3761 c->std_output == EXEC_OUTPUT_NAMED_FD &&
3762 stdio_fdname[STDOUT_FILENO] &&
3763 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3764
52c239d7
LB
3765 named_iofds[STDOUT_FILENO] = p->fds[i];
3766 targets--;
56fbd561
ZJS
3767
3768 } else if (named_iofds[STDERR_FILENO] < 0 &&
3769 c->std_error == EXEC_OUTPUT_NAMED_FD &&
3770 stdio_fdname[STDERR_FILENO] &&
3771 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3772
52c239d7
LB
3773 named_iofds[STDERR_FILENO] = p->fds[i];
3774 targets--;
3775 }
3776
56fbd561 3777 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
3778}
3779
f2341e0a 3780int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
3781 char **i, **r = NULL;
3782
3783 assert(c);
3784 assert(l);
3785
3786 STRV_FOREACH(i, c->environment_files) {
3787 char *fn;
52511fae
ZJS
3788 int k;
3789 unsigned n;
8c7be95e
LP
3790 bool ignore = false;
3791 char **p;
7fd1b19b 3792 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
3793
3794 fn = *i;
3795
3796 if (fn[0] == '-') {
3797 ignore = true;
313cefa1 3798 fn++;
8c7be95e
LP
3799 }
3800
3801 if (!path_is_absolute(fn)) {
8c7be95e
LP
3802 if (ignore)
3803 continue;
3804
3805 strv_free(r);
3806 return -EINVAL;
3807 }
3808
2bef10ab 3809 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
3810 k = safe_glob(fn, 0, &pglob);
3811 if (k < 0) {
2bef10ab
PL
3812 if (ignore)
3813 continue;
8c7be95e 3814
2bef10ab 3815 strv_free(r);
d8c92e8b 3816 return k;
2bef10ab 3817 }
8c7be95e 3818
d8c92e8b
ZJS
3819 /* When we don't match anything, -ENOENT should be returned */
3820 assert(pglob.gl_pathc > 0);
3821
3822 for (n = 0; n < pglob.gl_pathc; n++) {
717603e3 3823 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2bef10ab
PL
3824 if (k < 0) {
3825 if (ignore)
3826 continue;
8c7be95e 3827
2bef10ab 3828 strv_free(r);
2bef10ab 3829 return k;
e9c1ea9d 3830 }
ebc05a09 3831 /* Log invalid environment variables with filename */
039f0e70
LP
3832 if (p) {
3833 InvalidEnvInfo info = {
f2341e0a 3834 .unit = unit,
039f0e70
LP
3835 .path = pglob.gl_pathv[n]
3836 };
3837
3838 p = strv_env_clean_with_callback(p, invalid_env, &info);
3839 }
8c7be95e 3840
2bef10ab
PL
3841 if (r == NULL)
3842 r = p;
3843 else {
3844 char **m;
8c7be95e 3845
2bef10ab
PL
3846 m = strv_env_merge(2, r, p);
3847 strv_free(r);
3848 strv_free(p);
c84a9488 3849 if (!m)
2bef10ab 3850 return -ENOMEM;
2bef10ab
PL
3851
3852 r = m;
3853 }
8c7be95e
LP
3854 }
3855 }
3856
3857 *l = r;
3858
3859 return 0;
3860}
3861
6ac8fdc9 3862static bool tty_may_match_dev_console(const char *tty) {
e1d75803 3863 _cleanup_free_ char *active = NULL;
7d6884b6 3864 char *console;
6ac8fdc9 3865
1e22b5cd
LP
3866 if (!tty)
3867 return true;
3868
a119ec7c 3869 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
3870
3871 /* trivial identity? */
3872 if (streq(tty, "console"))
3873 return true;
3874
3875 console = resolve_dev_console(&active);
3876 /* if we could not resolve, assume it may */
3877 if (!console)
3878 return true;
3879
3880 /* "tty0" means the active VC, so it may be the same sometimes */
e1d75803 3881 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
3882}
3883
3884bool exec_context_may_touch_console(ExecContext *ec) {
1e22b5cd
LP
3885
3886 return (ec->tty_reset ||
3887 ec->tty_vhangup ||
3888 ec->tty_vt_disallocate ||
6ac8fdc9
MS
3889 is_terminal_input(ec->std_input) ||
3890 is_terminal_output(ec->std_output) ||
3891 is_terminal_output(ec->std_error)) &&
1e22b5cd 3892 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
3893}
3894
15ae422b
LP
3895static void strv_fprintf(FILE *f, char **l) {
3896 char **g;
3897
3898 assert(f);
3899
3900 STRV_FOREACH(g, l)
3901 fprintf(f, " %s", *g);
3902}
3903
5cb5a6ff 3904void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
d3070fbd 3905 ExecDirectoryType dt;
c2bbd90b 3906 char **e, **d;
94f04347 3907 unsigned i;
add00535 3908 int r;
9eba9da4 3909
5cb5a6ff
LP
3910 assert(c);
3911 assert(f);
3912
4ad49000 3913 prefix = strempty(prefix);
5cb5a6ff
LP
3914
3915 fprintf(f,
94f04347
LP
3916 "%sUMask: %04o\n"
3917 "%sWorkingDirectory: %s\n"
451a074f 3918 "%sRootDirectory: %s\n"
15ae422b 3919 "%sNonBlocking: %s\n"
64747e2d 3920 "%sPrivateTmp: %s\n"
7f112f50 3921 "%sPrivateDevices: %s\n"
59eeb84b 3922 "%sProtectKernelTunables: %s\n"
e66a2f65 3923 "%sProtectKernelModules: %s\n"
59eeb84b 3924 "%sProtectControlGroups: %s\n"
d251207d
LP
3925 "%sPrivateNetwork: %s\n"
3926 "%sPrivateUsers: %s\n"
1b8689f9
LP
3927 "%sProtectHome: %s\n"
3928 "%sProtectSystem: %s\n"
5d997827 3929 "%sMountAPIVFS: %s\n"
f3e43635 3930 "%sIgnoreSIGPIPE: %s\n"
f4170c67 3931 "%sMemoryDenyWriteExecute: %s\n"
b1edf445
LP
3932 "%sRestrictRealtime: %s\n"
3933 "%sKeyringMode: %s\n",
5cb5a6ff 3934 prefix, c->umask,
9eba9da4 3935 prefix, c->working_directory ? c->working_directory : "/",
451a074f 3936 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 3937 prefix, yes_no(c->non_blocking),
64747e2d 3938 prefix, yes_no(c->private_tmp),
7f112f50 3939 prefix, yes_no(c->private_devices),
59eeb84b 3940 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 3941 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 3942 prefix, yes_no(c->protect_control_groups),
d251207d
LP
3943 prefix, yes_no(c->private_network),
3944 prefix, yes_no(c->private_users),
1b8689f9
LP
3945 prefix, protect_home_to_string(c->protect_home),
3946 prefix, protect_system_to_string(c->protect_system),
5d997827 3947 prefix, yes_no(c->mount_apivfs),
f3e43635 3948 prefix, yes_no(c->ignore_sigpipe),
f4170c67 3949 prefix, yes_no(c->memory_deny_write_execute),
b1edf445
LP
3950 prefix, yes_no(c->restrict_realtime),
3951 prefix, exec_keyring_mode_to_string(c->keyring_mode));
fb33a393 3952
915e6d16
LP
3953 if (c->root_image)
3954 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
3955
8c7be95e
LP
3956 STRV_FOREACH(e, c->environment)
3957 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3958
3959 STRV_FOREACH(e, c->environment_files)
3960 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 3961
b4c14404
FB
3962 STRV_FOREACH(e, c->pass_environment)
3963 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3964
00819cc1
LP
3965 STRV_FOREACH(e, c->unset_environment)
3966 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
3967
53f47dfc
YW
3968 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
3969
72fd1768 3970 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
3971 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
3972
3973 STRV_FOREACH(d, c->directories[dt].paths)
3974 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
3975 }
c2bbd90b 3976
fb33a393
LP
3977 if (c->nice_set)
3978 fprintf(f,
3979 "%sNice: %i\n",
3980 prefix, c->nice);
3981
dd6c17b1 3982 if (c->oom_score_adjust_set)
fb33a393 3983 fprintf(f,
dd6c17b1
LP
3984 "%sOOMScoreAdjust: %i\n",
3985 prefix, c->oom_score_adjust);
9eba9da4 3986
94f04347 3987 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d
EV
3988 if (c->rlimit[i]) {
3989 fprintf(f, "%s%s: " RLIM_FMT "\n",
3990 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
3991 fprintf(f, "%s%sSoft: " RLIM_FMT "\n",
3992 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3993 }
94f04347 3994
f8b69d1d 3995 if (c->ioprio_set) {
1756a011 3996 _cleanup_free_ char *class_str = NULL;
f8b69d1d 3997
837df140
YW
3998 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
3999 if (r >= 0)
4000 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
4001
4002 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 4003 }
94f04347 4004
f8b69d1d 4005 if (c->cpu_sched_set) {
1756a011 4006 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 4007
837df140
YW
4008 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4009 if (r >= 0)
4010 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4011
94f04347 4012 fprintf(f,
38b48754
LP
4013 "%sCPUSchedulingPriority: %i\n"
4014 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
4015 prefix, c->cpu_sched_priority,
4016 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 4017 }
94f04347 4018
82c121a4 4019 if (c->cpuset) {
94f04347 4020 fprintf(f, "%sCPUAffinity:", prefix);
82c121a4
LP
4021 for (i = 0; i < c->cpuset_ncpus; i++)
4022 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
43a99a7a 4023 fprintf(f, " %u", i);
94f04347
LP
4024 fputs("\n", f);
4025 }
4026
3a43da28 4027 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 4028 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
4029
4030 fprintf(f,
80876c20
LP
4031 "%sStandardInput: %s\n"
4032 "%sStandardOutput: %s\n"
4033 "%sStandardError: %s\n",
4034 prefix, exec_input_to_string(c->std_input),
4035 prefix, exec_output_to_string(c->std_output),
4036 prefix, exec_output_to_string(c->std_error));
4037
4038 if (c->tty_path)
4039 fprintf(f,
6ea832a2
LP
4040 "%sTTYPath: %s\n"
4041 "%sTTYReset: %s\n"
4042 "%sTTYVHangup: %s\n"
4043 "%sTTYVTDisallocate: %s\n",
4044 prefix, c->tty_path,
4045 prefix, yes_no(c->tty_reset),
4046 prefix, yes_no(c->tty_vhangup),
4047 prefix, yes_no(c->tty_vt_disallocate));
94f04347 4048
9f6444eb
LP
4049 if (IN_SET(c->std_output,
4050 EXEC_OUTPUT_SYSLOG,
4051 EXEC_OUTPUT_KMSG,
4052 EXEC_OUTPUT_JOURNAL,
4053 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4054 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4055 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4056 IN_SET(c->std_error,
4057 EXEC_OUTPUT_SYSLOG,
4058 EXEC_OUTPUT_KMSG,
4059 EXEC_OUTPUT_JOURNAL,
4060 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4061 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4062 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 4063
5ce70e5b 4064 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 4065
837df140
YW
4066 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4067 if (r >= 0)
4068 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 4069
837df140
YW
4070 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4071 if (r >= 0)
4072 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 4073 }
94f04347 4074
d3070fbd
LP
4075 if (c->log_level_max >= 0) {
4076 _cleanup_free_ char *t = NULL;
4077
4078 (void) log_level_to_string_alloc(c->log_level_max, &t);
4079
4080 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4081 }
4082
4083 if (c->n_log_extra_fields > 0) {
4084 size_t j;
4085
4086 for (j = 0; j < c->n_log_extra_fields; j++) {
4087 fprintf(f, "%sLogExtraFields: ", prefix);
4088 fwrite(c->log_extra_fields[j].iov_base,
4089 1, c->log_extra_fields[j].iov_len,
4090 f);
4091 fputc('\n', f);
4092 }
4093 }
4094
07d46372
YW
4095 if (c->secure_bits) {
4096 _cleanup_free_ char *str = NULL;
4097
4098 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4099 if (r >= 0)
4100 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4101 }
94f04347 4102
a103496c 4103 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4104 _cleanup_free_ char *str = NULL;
94f04347 4105
dd1f5bd0
YW
4106 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4107 if (r >= 0)
4108 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4109 }
4110
4111 if (c->capability_ambient_set != 0) {
dd1f5bd0 4112 _cleanup_free_ char *str = NULL;
755d4b67 4113
dd1f5bd0
YW
4114 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4115 if (r >= 0)
4116 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4117 }
4118
4119 if (c->user)
f2d3769a 4120 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4121 if (c->group)
f2d3769a 4122 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4123
29206d46
LP
4124 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4125
ac6e8be6 4126 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4127 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4128 strv_fprintf(f, c->supplementary_groups);
4129 fputs("\n", f);
4130 }
94f04347 4131
5b6319dc 4132 if (c->pam_name)
f2d3769a 4133 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4134
2a624c36
AP
4135 if (strv_length(c->read_write_paths) > 0) {
4136 fprintf(f, "%sReadWritePaths:", prefix);
4137 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4138 fputs("\n", f);
4139 }
4140
2a624c36
AP
4141 if (strv_length(c->read_only_paths) > 0) {
4142 fprintf(f, "%sReadOnlyPaths:", prefix);
4143 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4144 fputs("\n", f);
4145 }
94f04347 4146
2a624c36
AP
4147 if (strv_length(c->inaccessible_paths) > 0) {
4148 fprintf(f, "%sInaccessiblePaths:", prefix);
4149 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4150 fputs("\n", f);
4151 }
2e22afe9 4152
d2d6c096
LP
4153 if (c->n_bind_mounts > 0)
4154 for (i = 0; i < c->n_bind_mounts; i++) {
4155 fprintf(f, "%s%s: %s:%s:%s\n", prefix,
4156 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4157 c->bind_mounts[i].source,
4158 c->bind_mounts[i].destination,
4159 c->bind_mounts[i].recursive ? "rbind" : "norbind");
4160 }
4161
169c1bda
LP
4162 if (c->utmp_id)
4163 fprintf(f,
4164 "%sUtmpIdentifier: %s\n",
4165 prefix, c->utmp_id);
7b52a628
MS
4166
4167 if (c->selinux_context)
4168 fprintf(f,
5f8640fb
LP
4169 "%sSELinuxContext: %s%s\n",
4170 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4171
80c21aea
WC
4172 if (c->apparmor_profile)
4173 fprintf(f,
4174 "%sAppArmorProfile: %s%s\n",
4175 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4176
4177 if (c->smack_process_label)
4178 fprintf(f,
4179 "%sSmackProcessLabel: %s%s\n",
4180 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4181
050f7277 4182 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4183 fprintf(f,
4184 "%sPersonality: %s\n",
4185 prefix, strna(personality_to_string(c->personality)));
4186
78e864e5
TM
4187 fprintf(f,
4188 "%sLockPersonality: %s\n",
4189 prefix, yes_no(c->lock_personality));
4190
17df7223 4191 if (c->syscall_filter) {
349cc4a5 4192#if HAVE_SECCOMP
17df7223 4193 Iterator j;
8cfa775f 4194 void *id, *val;
17df7223 4195 bool first = true;
351a19b1 4196#endif
17df7223
LP
4197
4198 fprintf(f,
57183d11 4199 "%sSystemCallFilter: ",
17df7223
LP
4200 prefix);
4201
4202 if (!c->syscall_whitelist)
4203 fputc('~', f);
4204
349cc4a5 4205#if HAVE_SECCOMP
8cfa775f 4206 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
17df7223 4207 _cleanup_free_ char *name = NULL;
8cfa775f
YW
4208 const char *errno_name = NULL;
4209 int num = PTR_TO_INT(val);
17df7223
LP
4210
4211 if (first)
4212 first = false;
4213 else
4214 fputc(' ', f);
4215
57183d11 4216 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223 4217 fputs(strna(name), f);
8cfa775f
YW
4218
4219 if (num >= 0) {
4220 errno_name = errno_to_name(num);
4221 if (errno_name)
4222 fprintf(f, ":%s", errno_name);
4223 else
4224 fprintf(f, ":%d", num);
4225 }
17df7223 4226 }
351a19b1 4227#endif
17df7223
LP
4228
4229 fputc('\n', f);
4230 }
4231
57183d11 4232 if (c->syscall_archs) {
349cc4a5 4233#if HAVE_SECCOMP
57183d11
LP
4234 Iterator j;
4235 void *id;
4236#endif
4237
4238 fprintf(f,
4239 "%sSystemCallArchitectures:",
4240 prefix);
4241
349cc4a5 4242#if HAVE_SECCOMP
57183d11
LP
4243 SET_FOREACH(id, c->syscall_archs, j)
4244 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4245#endif
4246 fputc('\n', f);
4247 }
4248
add00535
LP
4249 if (exec_context_restrict_namespaces_set(c)) {
4250 _cleanup_free_ char *s = NULL;
4251
4252 r = namespace_flag_to_string_many(c->restrict_namespaces, &s);
4253 if (r >= 0)
4254 fprintf(f, "%sRestrictNamespaces: %s\n",
4255 prefix, s);
4256 }
4257
3df90f24
YW
4258 if (c->syscall_errno > 0) {
4259 const char *errno_name;
4260
4261 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4262
4263 errno_name = errno_to_name(c->syscall_errno);
4264 if (errno_name)
4265 fprintf(f, "%s\n", errno_name);
4266 else
4267 fprintf(f, "%d\n", c->syscall_errno);
4268 }
eef65bf3
MS
4269
4270 if (c->apparmor_profile)
4271 fprintf(f,
4272 "%sAppArmorProfile: %s%s\n",
4273 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
5cb5a6ff
LP
4274}
4275
a931ad47
LP
4276bool exec_context_maintains_privileges(ExecContext *c) {
4277 assert(c);
4278
61233823 4279 /* Returns true if the process forked off would run under
a931ad47
LP
4280 * an unchanged UID or as root. */
4281
4282 if (!c->user)
4283 return true;
4284
4285 if (streq(c->user, "root") || streq(c->user, "0"))
4286 return true;
4287
4288 return false;
4289}
4290
7f452159
LP
4291int exec_context_get_effective_ioprio(ExecContext *c) {
4292 int p;
4293
4294 assert(c);
4295
4296 if (c->ioprio_set)
4297 return c->ioprio;
4298
4299 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4300 if (p < 0)
4301 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4302
4303 return p;
4304}
4305
d3070fbd
LP
4306void exec_context_free_log_extra_fields(ExecContext *c) {
4307 size_t l;
4308
4309 assert(c);
4310
4311 for (l = 0; l < c->n_log_extra_fields; l++)
4312 free(c->log_extra_fields[l].iov_base);
4313 c->log_extra_fields = mfree(c->log_extra_fields);
4314 c->n_log_extra_fields = 0;
4315}
4316
b58b4116 4317void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 4318 assert(s);
5cb5a6ff 4319
b58b4116
LP
4320 zero(*s);
4321 s->pid = pid;
4322 dual_timestamp_get(&s->start_timestamp);
4323}
4324
6ea832a2 4325void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
4326 assert(s);
4327
0b1f4ae6 4328 if (s->pid && s->pid != pid)
b58b4116
LP
4329 zero(*s);
4330
034c6ed7 4331 s->pid = pid;
63983207 4332 dual_timestamp_get(&s->exit_timestamp);
9fb86720 4333
034c6ed7
LP
4334 s->code = code;
4335 s->status = status;
169c1bda 4336
6ea832a2
LP
4337 if (context) {
4338 if (context->utmp_id)
4339 utmp_put_dead_process(context->utmp_id, pid, code, status);
4340
1e22b5cd 4341 exec_context_tty_reset(context, NULL);
6ea832a2 4342 }
9fb86720
LP
4343}
4344
4345void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
4346 char buf[FORMAT_TIMESTAMP_MAX];
4347
4348 assert(s);
4349 assert(f);
4350
9fb86720
LP
4351 if (s->pid <= 0)
4352 return;
4353
4c940960
LP
4354 prefix = strempty(prefix);
4355
9fb86720 4356 fprintf(f,
ccd06097
ZJS
4357 "%sPID: "PID_FMT"\n",
4358 prefix, s->pid);
9fb86720 4359
af9d16e1 4360 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
4361 fprintf(f,
4362 "%sStart Timestamp: %s\n",
63983207 4363 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 4364
af9d16e1 4365 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
4366 fprintf(f,
4367 "%sExit Timestamp: %s\n"
4368 "%sExit Code: %s\n"
4369 "%sExit Status: %i\n",
63983207 4370 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
4371 prefix, sigchld_code_to_string(s->code),
4372 prefix, s->status);
5cb5a6ff 4373}
44d8db9e 4374
9e2f7c11 4375char *exec_command_line(char **argv) {
44d8db9e
LP
4376 size_t k;
4377 char *n, *p, **a;
4378 bool first = true;
4379
9e2f7c11 4380 assert(argv);
44d8db9e 4381
9164977d 4382 k = 1;
9e2f7c11 4383 STRV_FOREACH(a, argv)
44d8db9e
LP
4384 k += strlen(*a)+3;
4385
5cd9cd35
LP
4386 n = new(char, k);
4387 if (!n)
44d8db9e
LP
4388 return NULL;
4389
4390 p = n;
9e2f7c11 4391 STRV_FOREACH(a, argv) {
44d8db9e
LP
4392
4393 if (!first)
4394 *(p++) = ' ';
4395 else
4396 first = false;
4397
4398 if (strpbrk(*a, WHITESPACE)) {
4399 *(p++) = '\'';
4400 p = stpcpy(p, *a);
4401 *(p++) = '\'';
4402 } else
4403 p = stpcpy(p, *a);
4404
4405 }
4406
9164977d
LP
4407 *p = 0;
4408
44d8db9e
LP
4409 /* FIXME: this doesn't really handle arguments that have
4410 * spaces and ticks in them */
4411
4412 return n;
4413}
4414
4415void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 4416 _cleanup_free_ char *cmd = NULL;
4c940960 4417 const char *prefix2;
44d8db9e
LP
4418
4419 assert(c);
4420 assert(f);
4421
4c940960 4422 prefix = strempty(prefix);
63c372cb 4423 prefix2 = strjoina(prefix, "\t");
44d8db9e 4424
9e2f7c11 4425 cmd = exec_command_line(c->argv);
44d8db9e
LP
4426 fprintf(f,
4427 "%sCommand Line: %s\n",
4428 prefix, cmd ? cmd : strerror(ENOMEM));
4429
9fb86720 4430 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
4431}
4432
4433void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4434 assert(f);
4435
4c940960 4436 prefix = strempty(prefix);
44d8db9e
LP
4437
4438 LIST_FOREACH(command, c, c)
4439 exec_command_dump(c, f, prefix);
4440}
94f04347 4441
a6a80b4f
LP
4442void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4443 ExecCommand *end;
4444
4445 assert(l);
4446 assert(e);
4447
4448 if (*l) {
35b8ca3a 4449 /* It's kind of important, that we keep the order here */
71fda00f
LP
4450 LIST_FIND_TAIL(command, *l, end);
4451 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
4452 } else
4453 *l = e;
4454}
4455
26fd040d
LP
4456int exec_command_set(ExecCommand *c, const char *path, ...) {
4457 va_list ap;
4458 char **l, *p;
4459
4460 assert(c);
4461 assert(path);
4462
4463 va_start(ap, path);
4464 l = strv_new_ap(path, ap);
4465 va_end(ap);
4466
4467 if (!l)
4468 return -ENOMEM;
4469
250a918d
LP
4470 p = strdup(path);
4471 if (!p) {
26fd040d
LP
4472 strv_free(l);
4473 return -ENOMEM;
4474 }
4475
4476 free(c->path);
4477 c->path = p;
4478
4479 strv_free(c->argv);
4480 c->argv = l;
4481
4482 return 0;
4483}
4484
86b23b07 4485int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 4486 _cleanup_strv_free_ char **l = NULL;
86b23b07 4487 va_list ap;
86b23b07
JS
4488 int r;
4489
4490 assert(c);
4491 assert(path);
4492
4493 va_start(ap, path);
4494 l = strv_new_ap(path, ap);
4495 va_end(ap);
4496
4497 if (!l)
4498 return -ENOMEM;
4499
e287086b 4500 r = strv_extend_strv(&c->argv, l, false);
e63ff941 4501 if (r < 0)
86b23b07 4502 return r;
86b23b07
JS
4503
4504 return 0;
4505}
4506
4507
613b411c
LP
4508static int exec_runtime_allocate(ExecRuntime **rt) {
4509
4510 if (*rt)
4511 return 0;
4512
4513 *rt = new0(ExecRuntime, 1);
f146f5e1 4514 if (!*rt)
613b411c
LP
4515 return -ENOMEM;
4516
4517 (*rt)->n_ref = 1;
4518 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
4519
4520 return 0;
4521}
4522
4523int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
4524 int r;
4525
4526 assert(rt);
4527 assert(c);
4528 assert(id);
4529
4530 if (*rt)
4531 return 1;
4532
4533 if (!c->private_network && !c->private_tmp)
4534 return 0;
4535
4536 r = exec_runtime_allocate(rt);
4537 if (r < 0)
4538 return r;
4539
4540 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
33df919d 4541 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
613b411c
LP
4542 return -errno;
4543 }
4544
4545 if (c->private_tmp && !(*rt)->tmp_dir) {
4546 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
4547 if (r < 0)
4548 return r;
4549 }
4550
4551 return 1;
4552}
4553
4554ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
4555 assert(r);
4556 assert(r->n_ref > 0);
4557
4558 r->n_ref++;
4559 return r;
4560}
4561
4562ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
4563
4564 if (!r)
4565 return NULL;
4566
4567 assert(r->n_ref > 0);
4568
4569 r->n_ref--;
f2341e0a
LP
4570 if (r->n_ref > 0)
4571 return NULL;
4572
4573 free(r->tmp_dir);
4574 free(r->var_tmp_dir);
4575 safe_close_pair(r->netns_storage_socket);
6b430fdb 4576 return mfree(r);
613b411c
LP
4577}
4578
f2341e0a 4579int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
613b411c
LP
4580 assert(u);
4581 assert(f);
4582 assert(fds);
4583
4584 if (!rt)
4585 return 0;
4586
4587 if (rt->tmp_dir)
4588 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
4589
4590 if (rt->var_tmp_dir)
4591 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
4592
4593 if (rt->netns_storage_socket[0] >= 0) {
4594 int copy;
4595
4596 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4597 if (copy < 0)
4598 return copy;
4599
4600 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
4601 }
4602
4603 if (rt->netns_storage_socket[1] >= 0) {
4604 int copy;
4605
4606 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4607 if (copy < 0)
4608 return copy;
4609
4610 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
4611 }
4612
4613 return 0;
4614}
4615
f2341e0a 4616int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
613b411c
LP
4617 int r;
4618
4619 assert(rt);
4620 assert(key);
4621 assert(value);
4622
4623 if (streq(key, "tmp-dir")) {
4624 char *copy;
4625
4626 r = exec_runtime_allocate(rt);
4627 if (r < 0)
f2341e0a 4628 return log_oom();
613b411c
LP
4629
4630 copy = strdup(value);
4631 if (!copy)
4632 return log_oom();
4633
4634 free((*rt)->tmp_dir);
4635 (*rt)->tmp_dir = copy;
4636
4637 } else if (streq(key, "var-tmp-dir")) {
4638 char *copy;
4639
4640 r = exec_runtime_allocate(rt);
4641 if (r < 0)
f2341e0a 4642 return log_oom();
613b411c
LP
4643
4644 copy = strdup(value);
4645 if (!copy)
4646 return log_oom();
4647
4648 free((*rt)->var_tmp_dir);
4649 (*rt)->var_tmp_dir = copy;
4650
4651 } else if (streq(key, "netns-socket-0")) {
4652 int fd;
4653
4654 r = exec_runtime_allocate(rt);
4655 if (r < 0)
f2341e0a 4656 return log_oom();
613b411c
LP
4657
4658 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 4659 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 4660 else {
03e334a1 4661 safe_close((*rt)->netns_storage_socket[0]);
613b411c
LP
4662 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
4663 }
4664 } else if (streq(key, "netns-socket-1")) {
4665 int fd;
4666
4667 r = exec_runtime_allocate(rt);
4668 if (r < 0)
f2341e0a 4669 return log_oom();
613b411c
LP
4670
4671 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 4672 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 4673 else {
03e334a1 4674 safe_close((*rt)->netns_storage_socket[1]);
613b411c
LP
4675 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
4676 }
4677 } else
4678 return 0;
4679
4680 return 1;
4681}
4682
4683static void *remove_tmpdir_thread(void *p) {
4684 _cleanup_free_ char *path = p;
4685
c6878637 4686 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
613b411c
LP
4687 return NULL;
4688}
4689
4690void exec_runtime_destroy(ExecRuntime *rt) {
98b47d54
LP
4691 int r;
4692
613b411c
LP
4693 if (!rt)
4694 return;
4695
4696 /* If there are multiple users of this, let's leave the stuff around */
4697 if (rt->n_ref > 1)
4698 return;
4699
4700 if (rt->tmp_dir) {
4701 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
98b47d54
LP
4702
4703 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4704 if (r < 0) {
da927ba9 4705 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
98b47d54
LP
4706 free(rt->tmp_dir);
4707 }
4708
613b411c
LP
4709 rt->tmp_dir = NULL;
4710 }
4711
4712 if (rt->var_tmp_dir) {
4713 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
98b47d54
LP
4714
4715 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4716 if (r < 0) {
da927ba9 4717 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
98b47d54
LP
4718 free(rt->var_tmp_dir);
4719 }
4720
613b411c
LP
4721 rt->var_tmp_dir = NULL;
4722 }
4723
3d94f76c 4724 safe_close_pair(rt->netns_storage_socket);
613b411c
LP
4725}
4726
80876c20
LP
4727static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
4728 [EXEC_INPUT_NULL] = "null",
4729 [EXEC_INPUT_TTY] = "tty",
4730 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 4731 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
4732 [EXEC_INPUT_SOCKET] = "socket",
4733 [EXEC_INPUT_NAMED_FD] = "fd",
08f3be7a 4734 [EXEC_INPUT_DATA] = "data",
2038c3f5 4735 [EXEC_INPUT_FILE] = "file",
80876c20
LP
4736};
4737
8a0867d6
LP
4738DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4739
94f04347 4740static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 4741 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 4742 [EXEC_OUTPUT_NULL] = "null",
80876c20 4743 [EXEC_OUTPUT_TTY] = "tty",
94f04347 4744 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 4745 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 4746 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 4747 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
4748 [EXEC_OUTPUT_JOURNAL] = "journal",
4749 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
4750 [EXEC_OUTPUT_SOCKET] = "socket",
4751 [EXEC_OUTPUT_NAMED_FD] = "fd",
2038c3f5 4752 [EXEC_OUTPUT_FILE] = "file",
94f04347
LP
4753};
4754
4755DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
4756
4757static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4758 [EXEC_UTMP_INIT] = "init",
4759 [EXEC_UTMP_LOGIN] = "login",
4760 [EXEC_UTMP_USER] = "user",
4761};
4762
4763DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
4764
4765static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
4766 [EXEC_PRESERVE_NO] = "no",
4767 [EXEC_PRESERVE_YES] = "yes",
4768 [EXEC_PRESERVE_RESTART] = "restart",
4769};
4770
4771DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 4772
72fd1768 4773static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
4774 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
4775 [EXEC_DIRECTORY_STATE] = "StateDirectory",
4776 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
4777 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
4778 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
4779};
4780
4781DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445
LP
4782
4783static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
4784 [EXEC_KEYRING_INHERIT] = "inherit",
4785 [EXEC_KEYRING_PRIVATE] = "private",
4786 [EXEC_KEYRING_SHARED] = "shared",
4787};
4788
4789DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);