]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
Revert "core/execute: RuntimeDirectory= or friends requires mount namespace"
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
a7334b09
LP
2/***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
a7334b09
LP
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 15 Lesser General Public License for more details.
a7334b09 16
5430f7f2 17 You should have received a copy of the GNU Lesser General Public License
a7334b09
LP
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19***/
20
034c6ed7
LP
21#include <errno.h>
22#include <fcntl.h>
8dd4c05b
LP
23#include <glob.h>
24#include <grp.h>
25#include <poll.h>
309bff19 26#include <signal.h>
8dd4c05b 27#include <string.h>
19c0b0b9 28#include <sys/capability.h>
d251207d 29#include <sys/eventfd.h>
f3e43635 30#include <sys/mman.h>
8dd4c05b 31#include <sys/personality.h>
94f04347 32#include <sys/prctl.h>
d2ffa389 33#include <sys/shm.h>
8dd4c05b 34#include <sys/socket.h>
451a074f 35#include <sys/stat.h>
d2ffa389 36#include <sys/types.h>
8dd4c05b
LP
37#include <sys/un.h>
38#include <unistd.h>
023a4f67 39#include <utmpx.h>
5cb5a6ff 40
349cc4a5 41#if HAVE_PAM
5b6319dc
LP
42#include <security/pam_appl.h>
43#endif
44
349cc4a5 45#if HAVE_SELINUX
7b52a628
MS
46#include <selinux/selinux.h>
47#endif
48
349cc4a5 49#if HAVE_SECCOMP
17df7223
LP
50#include <seccomp.h>
51#endif
52
349cc4a5 53#if HAVE_APPARMOR
eef65bf3
MS
54#include <sys/apparmor.h>
55#endif
56
24882e06 57#include "sd-messages.h"
8dd4c05b
LP
58
59#include "af-list.h"
b5efdb8a 60#include "alloc-util.h"
349cc4a5 61#if HAVE_APPARMOR
3ffd4af2
LP
62#include "apparmor-util.h"
63#endif
8dd4c05b
LP
64#include "async.h"
65#include "barrier.h"
8dd4c05b 66#include "cap-list.h"
430f0182 67#include "capability-util.h"
a1164ae3 68#include "chown-recursive.h"
da681e1b 69#include "cpu-set-util.h"
f6a6225e 70#include "def.h"
4d1a6904 71#include "env-util.h"
17df7223 72#include "errno-list.h"
3ffd4af2 73#include "execute.h"
8dd4c05b 74#include "exit-status.h"
3ffd4af2 75#include "fd-util.h"
8dd4c05b 76#include "fileio.h"
f97b34a6 77#include "format-util.h"
f4f15635 78#include "fs-util.h"
7d50b32a 79#include "glob-util.h"
c004493c 80#include "io-util.h"
8dd4c05b 81#include "ioprio.h"
a1164ae3 82#include "label.h"
8dd4c05b
LP
83#include "log.h"
84#include "macro.h"
85#include "missing.h"
86#include "mkdir.h"
87#include "namespace.h"
6bedfcbb 88#include "parse-util.h"
8dd4c05b 89#include "path-util.h"
0b452006 90#include "process-util.h"
78f22b97 91#include "rlimit-util.h"
8dd4c05b 92#include "rm-rf.h"
349cc4a5 93#if HAVE_SECCOMP
3ffd4af2
LP
94#include "seccomp-util.h"
95#endif
8dd4c05b 96#include "securebits.h"
07d46372 97#include "securebits-util.h"
8dd4c05b 98#include "selinux-util.h"
24882e06 99#include "signal-util.h"
8dd4c05b 100#include "smack-util.h"
fd63e712 101#include "special.h"
949befd3 102#include "stat-util.h"
8b43440b 103#include "string-table.h"
07630cea 104#include "string-util.h"
8dd4c05b 105#include "strv.h"
7ccbd1ae 106#include "syslog-util.h"
8dd4c05b
LP
107#include "terminal-util.h"
108#include "unit.h"
b1d4f8e1 109#include "user-util.h"
8dd4c05b
LP
110#include "util.h"
111#include "utmp-wtmp.h"
5cb5a6ff 112
e056b01d 113#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 114#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 115
02a51aba
LP
116/* This assumes there is a 'tty' group */
117#define TTY_MODE 0620
118
531dca78
LP
119#define SNDBUF_SIZE (8*1024*1024)
120
034c6ed7
LP
121static int shift_fds(int fds[], unsigned n_fds) {
122 int start, restart_from;
123
124 if (n_fds <= 0)
125 return 0;
126
a0d40ac5
LP
127 /* Modifies the fds array! (sorts it) */
128
034c6ed7
LP
129 assert(fds);
130
131 start = 0;
132 for (;;) {
133 int i;
134
135 restart_from = -1;
136
137 for (i = start; i < (int) n_fds; i++) {
138 int nfd;
139
140 /* Already at right index? */
141 if (fds[i] == i+3)
142 continue;
143
3cc2aff1
LP
144 nfd = fcntl(fds[i], F_DUPFD, i + 3);
145 if (nfd < 0)
034c6ed7
LP
146 return -errno;
147
03e334a1 148 safe_close(fds[i]);
034c6ed7
LP
149 fds[i] = nfd;
150
151 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 152 * let's remember that and try again from here */
034c6ed7
LP
153 if (nfd != i+3 && restart_from < 0)
154 restart_from = i;
155 }
156
157 if (restart_from < 0)
158 break;
159
160 start = restart_from;
161 }
162
163 return 0;
164}
165
4c47affc
FB
166static int flags_fds(const int fds[], unsigned n_storage_fds, unsigned n_socket_fds, bool nonblock) {
167 unsigned i, n_fds;
e2c76839 168 int r;
47a71eed 169
4c47affc 170 n_fds = n_storage_fds + n_socket_fds;
47a71eed
LP
171 if (n_fds <= 0)
172 return 0;
173
174 assert(fds);
175
9b141911
FB
176 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
177 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
178
179 for (i = 0; i < n_fds; i++) {
47a71eed 180
9b141911
FB
181 if (i < n_socket_fds) {
182 r = fd_nonblock(fds[i], nonblock);
183 if (r < 0)
184 return r;
185 }
47a71eed 186
451a074f
LP
187 /* We unconditionally drop FD_CLOEXEC from the fds,
188 * since after all we want to pass these fds to our
189 * children */
47a71eed 190
3cc2aff1
LP
191 r = fd_cloexec(fds[i], false);
192 if (r < 0)
e2c76839 193 return r;
47a71eed
LP
194 }
195
196 return 0;
197}
198
1e22b5cd 199static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
200 assert(context);
201
1e22b5cd
LP
202 if (context->stdio_as_fds)
203 return NULL;
204
80876c20
LP
205 if (context->tty_path)
206 return context->tty_path;
207
208 return "/dev/console";
209}
210
1e22b5cd
LP
211static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
212 const char *path;
213
6ea832a2
LP
214 assert(context);
215
1e22b5cd 216 path = exec_context_tty_path(context);
6ea832a2 217
1e22b5cd
LP
218 if (context->tty_vhangup) {
219 if (p && p->stdin_fd >= 0)
220 (void) terminal_vhangup_fd(p->stdin_fd);
221 else if (path)
222 (void) terminal_vhangup(path);
223 }
6ea832a2 224
1e22b5cd
LP
225 if (context->tty_reset) {
226 if (p && p->stdin_fd >= 0)
227 (void) reset_terminal_fd(p->stdin_fd, true);
228 else if (path)
229 (void) reset_terminal(path);
230 }
231
232 if (context->tty_vt_disallocate && path)
233 (void) vt_disallocate(path);
6ea832a2
LP
234}
235
6af760f3
LP
236static bool is_terminal_input(ExecInput i) {
237 return IN_SET(i,
238 EXEC_INPUT_TTY,
239 EXEC_INPUT_TTY_FORCE,
240 EXEC_INPUT_TTY_FAIL);
241}
242
3a1286b6 243static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
244 return IN_SET(o,
245 EXEC_OUTPUT_TTY,
246 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
247 EXEC_OUTPUT_KMSG_AND_CONSOLE,
248 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
249}
250
aac8c0c3
LP
251static bool is_syslog_output(ExecOutput o) {
252 return IN_SET(o,
253 EXEC_OUTPUT_SYSLOG,
254 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
255}
256
257static bool is_kmsg_output(ExecOutput o) {
258 return IN_SET(o,
259 EXEC_OUTPUT_KMSG,
260 EXEC_OUTPUT_KMSG_AND_CONSOLE);
261}
262
6af760f3
LP
263static bool exec_context_needs_term(const ExecContext *c) {
264 assert(c);
265
266 /* Return true if the execution context suggests we should set $TERM to something useful. */
267
268 if (is_terminal_input(c->std_input))
269 return true;
270
271 if (is_terminal_output(c->std_output))
272 return true;
273
274 if (is_terminal_output(c->std_error))
275 return true;
276
277 return !!c->tty_path;
3a1286b6
MS
278}
279
80876c20 280static int open_null_as(int flags, int nfd) {
046a82c1 281 int fd;
071830ff 282
80876c20 283 assert(nfd >= 0);
071830ff 284
613b411c
LP
285 fd = open("/dev/null", flags|O_NOCTTY);
286 if (fd < 0)
071830ff
LP
287 return -errno;
288
046a82c1 289 return move_fd(fd, nfd, false);
071830ff
LP
290}
291
524daa8c 292static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 293 static const union sockaddr_union sa = {
b92bea5d
ZJS
294 .un.sun_family = AF_UNIX,
295 .un.sun_path = "/run/systemd/journal/stdout",
296 };
524daa8c
ZJS
297 uid_t olduid = UID_INVALID;
298 gid_t oldgid = GID_INVALID;
299 int r;
300
cad93f29 301 if (gid_is_valid(gid)) {
524daa8c
ZJS
302 oldgid = getgid();
303
92a17af9 304 if (setegid(gid) < 0)
524daa8c
ZJS
305 return -errno;
306 }
307
cad93f29 308 if (uid_is_valid(uid)) {
524daa8c
ZJS
309 olduid = getuid();
310
92a17af9 311 if (seteuid(uid) < 0) {
524daa8c
ZJS
312 r = -errno;
313 goto restore_gid;
314 }
315 }
316
92a17af9 317 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
318
319 /* If we fail to restore the uid or gid, things will likely
320 fail later on. This should only happen if an LSM interferes. */
321
cad93f29 322 if (uid_is_valid(uid))
524daa8c
ZJS
323 (void) seteuid(olduid);
324
325 restore_gid:
cad93f29 326 if (gid_is_valid(gid))
524daa8c
ZJS
327 (void) setegid(oldgid);
328
329 return r;
330}
331
fd1f9c89 332static int connect_logger_as(
7a1ab780 333 Unit *unit,
fd1f9c89 334 const ExecContext *context,
af635cf3 335 const ExecParameters *params,
fd1f9c89
LP
336 ExecOutput output,
337 const char *ident,
fd1f9c89
LP
338 int nfd,
339 uid_t uid,
340 gid_t gid) {
341
524daa8c 342 int fd, r;
071830ff
LP
343
344 assert(context);
af635cf3 345 assert(params);
80876c20
LP
346 assert(output < _EXEC_OUTPUT_MAX);
347 assert(ident);
348 assert(nfd >= 0);
071830ff 349
54fe0cdb
LP
350 fd = socket(AF_UNIX, SOCK_STREAM, 0);
351 if (fd < 0)
80876c20 352 return -errno;
071830ff 353
524daa8c
ZJS
354 r = connect_journal_socket(fd, uid, gid);
355 if (r < 0)
356 return r;
071830ff 357
80876c20 358 if (shutdown(fd, SHUT_RD) < 0) {
03e334a1 359 safe_close(fd);
80876c20
LP
360 return -errno;
361 }
071830ff 362
fd1f9c89 363 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 364
80876c20 365 dprintf(fd,
62bca2c6 366 "%s\n"
80876c20
LP
367 "%s\n"
368 "%i\n"
54fe0cdb
LP
369 "%i\n"
370 "%i\n"
371 "%i\n"
4f4a1dbf 372 "%i\n",
c867611e 373 context->syslog_identifier ?: ident,
af635cf3 374 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
375 context->syslog_priority,
376 !!context->syslog_level_prefix,
aac8c0c3
LP
377 is_syslog_output(output),
378 is_kmsg_output(output),
3a1286b6 379 is_terminal_output(output));
80876c20 380
046a82c1 381 return move_fd(fd, nfd, false);
80876c20 382}
3a274a21 383static int open_terminal_as(const char *path, int flags, int nfd) {
046a82c1 384 int fd;
071830ff 385
80876c20
LP
386 assert(path);
387 assert(nfd >= 0);
fd1f9c89 388
3a274a21 389 fd = open_terminal(path, flags | O_NOCTTY);
3cc2aff1 390 if (fd < 0)
80876c20 391 return fd;
071830ff 392
046a82c1 393 return move_fd(fd, nfd, false);
80876c20 394}
071830ff 395
2038c3f5
LP
396static int acquire_path(const char *path, int flags, mode_t mode) {
397 union sockaddr_union sa = {
398 .sa.sa_family = AF_UNIX,
399 };
80876c20 400 int fd, r;
071830ff 401
80876c20 402 assert(path);
071830ff 403
2038c3f5
LP
404 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
405 flags |= O_CREAT;
406
407 fd = open(path, flags|O_NOCTTY, mode);
408 if (fd >= 0)
80876c20 409 return fd;
071830ff 410
2038c3f5
LP
411 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
412 return -errno;
413 if (strlen(path) > sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
414 return -ENXIO;
415
416 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
417
418 fd = socket(AF_UNIX, SOCK_STREAM, 0);
419 if (fd < 0)
420 return -errno;
421
422 strncpy(sa.un.sun_path, path, sizeof(sa.un.sun_path));
423 if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
03e334a1 424 safe_close(fd);
2038c3f5
LP
425 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
426 * indication that his wasn't an AF_UNIX socket after all */
427 }
071830ff 428
2038c3f5
LP
429 if ((flags & O_ACCMODE) == O_RDONLY)
430 r = shutdown(fd, SHUT_WR);
431 else if ((flags & O_ACCMODE) == O_WRONLY)
432 r = shutdown(fd, SHUT_RD);
433 else
434 return fd;
435 if (r < 0) {
436 safe_close(fd);
437 return -errno;
438 }
439
440 return fd;
80876c20 441}
071830ff 442
08f3be7a
LP
443static int fixup_input(
444 const ExecContext *context,
445 int socket_fd,
446 bool apply_tty_stdin) {
447
448 ExecInput std_input;
449
450 assert(context);
451
452 std_input = context->std_input;
1e3ad081
LP
453
454 if (is_terminal_input(std_input) && !apply_tty_stdin)
455 return EXEC_INPUT_NULL;
071830ff 456
03fd9c49 457 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
458 return EXEC_INPUT_NULL;
459
08f3be7a
LP
460 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
461 return EXEC_INPUT_NULL;
462
03fd9c49 463 return std_input;
4f2d528d
LP
464}
465
03fd9c49 466static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 467
03fd9c49 468 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
469 return EXEC_OUTPUT_INHERIT;
470
03fd9c49 471 return std_output;
4f2d528d
LP
472}
473
a34ceba6
LP
474static int setup_input(
475 const ExecContext *context,
476 const ExecParameters *params,
52c239d7
LB
477 int socket_fd,
478 int named_iofds[3]) {
a34ceba6 479
4f2d528d
LP
480 ExecInput i;
481
482 assert(context);
a34ceba6
LP
483 assert(params);
484
485 if (params->stdin_fd >= 0) {
486 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
487 return -errno;
488
489 /* Try to make this the controlling tty, if it is a tty, and reset it */
1fb0682e
LP
490 if (isatty(STDIN_FILENO)) {
491 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
492 (void) reset_terminal_fd(STDIN_FILENO, true);
493 }
a34ceba6
LP
494
495 return STDIN_FILENO;
496 }
4f2d528d 497
08f3be7a 498 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
499
500 switch (i) {
071830ff 501
80876c20
LP
502 case EXEC_INPUT_NULL:
503 return open_null_as(O_RDONLY, STDIN_FILENO);
504
505 case EXEC_INPUT_TTY:
506 case EXEC_INPUT_TTY_FORCE:
507 case EXEC_INPUT_TTY_FAIL: {
046a82c1 508 int fd;
071830ff 509
1e22b5cd 510 fd = acquire_terminal(exec_context_tty_path(context),
970edce6
ZJS
511 i == EXEC_INPUT_TTY_FAIL,
512 i == EXEC_INPUT_TTY_FORCE,
513 false,
3a43da28 514 USEC_INFINITY);
970edce6 515 if (fd < 0)
80876c20
LP
516 return fd;
517
046a82c1 518 return move_fd(fd, STDIN_FILENO, false);
80876c20
LP
519 }
520
4f2d528d 521 case EXEC_INPUT_SOCKET:
e75a9ed1
LP
522 assert(socket_fd >= 0);
523
4f2d528d
LP
524 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
525
52c239d7 526 case EXEC_INPUT_NAMED_FD:
e75a9ed1
LP
527 assert(named_iofds[STDIN_FILENO] >= 0);
528
52c239d7
LB
529 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
530 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
531
08f3be7a
LP
532 case EXEC_INPUT_DATA: {
533 int fd;
534
535 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
536 if (fd < 0)
537 return fd;
538
539 return move_fd(fd, STDIN_FILENO, false);
540 }
541
2038c3f5
LP
542 case EXEC_INPUT_FILE: {
543 bool rw;
544 int fd;
545
546 assert(context->stdio_file[STDIN_FILENO]);
547
548 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
549 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
550
551 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
552 if (fd < 0)
553 return fd;
554
555 return move_fd(fd, STDIN_FILENO, false);
556 }
557
80876c20
LP
558 default:
559 assert_not_reached("Unknown input type");
560 }
561}
562
a34ceba6
LP
563static int setup_output(
564 Unit *unit,
565 const ExecContext *context,
566 const ExecParameters *params,
567 int fileno,
568 int socket_fd,
52c239d7 569 int named_iofds[3],
a34ceba6 570 const char *ident,
7bce046b
LP
571 uid_t uid,
572 gid_t gid,
573 dev_t *journal_stream_dev,
574 ino_t *journal_stream_ino) {
a34ceba6 575
4f2d528d
LP
576 ExecOutput o;
577 ExecInput i;
47c1d80d 578 int r;
4f2d528d 579
f2341e0a 580 assert(unit);
80876c20 581 assert(context);
a34ceba6 582 assert(params);
80876c20 583 assert(ident);
7bce046b
LP
584 assert(journal_stream_dev);
585 assert(journal_stream_ino);
80876c20 586
a34ceba6
LP
587 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
588
589 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
590 return -errno;
591
592 return STDOUT_FILENO;
593 }
594
595 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
596 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
597 return -errno;
598
599 return STDERR_FILENO;
600 }
601
08f3be7a 602 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 603 o = fixup_output(context->std_output, socket_fd);
4f2d528d 604
eb17e935
MS
605 if (fileno == STDERR_FILENO) {
606 ExecOutput e;
607 e = fixup_output(context->std_error, socket_fd);
80876c20 608
eb17e935
MS
609 /* This expects the input and output are already set up */
610
611 /* Don't change the stderr file descriptor if we inherit all
612 * the way and are not on a tty */
613 if (e == EXEC_OUTPUT_INHERIT &&
614 o == EXEC_OUTPUT_INHERIT &&
615 i == EXEC_INPUT_NULL &&
616 !is_terminal_input(context->std_input) &&
617 getppid () != 1)
618 return fileno;
619
620 /* Duplicate from stdout if possible */
52c239d7 621 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
eb17e935 622 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 623
eb17e935 624 o = e;
80876c20 625
eb17e935 626 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
627 /* If input got downgraded, inherit the original value */
628 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 629 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 630
08f3be7a
LP
631 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
632 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
eb17e935 633 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 634
acb591e4
LP
635 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
636 if (getppid() != 1)
eb17e935 637 return fileno;
94f04347 638
eb17e935
MS
639 /* We need to open /dev/null here anew, to get the right access mode. */
640 return open_null_as(O_WRONLY, fileno);
071830ff 641 }
94f04347 642
eb17e935 643 switch (o) {
80876c20
LP
644
645 case EXEC_OUTPUT_NULL:
eb17e935 646 return open_null_as(O_WRONLY, fileno);
80876c20
LP
647
648 case EXEC_OUTPUT_TTY:
4f2d528d 649 if (is_terminal_input(i))
eb17e935 650 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
651
652 /* We don't reset the terminal if this is just about output */
1e22b5cd 653 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
654
655 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 656 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 657 case EXEC_OUTPUT_KMSG:
28dbc1e8 658 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
659 case EXEC_OUTPUT_JOURNAL:
660 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 661 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 662 if (r < 0) {
82677ae4 663 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 664 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
665 } else {
666 struct stat st;
667
668 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
669 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
670 * services to detect whether they are connected to the journal or not.
671 *
672 * If both stdout and stderr are connected to a stream then let's make sure to store the data
673 * about STDERR as that's usually the best way to do logging. */
7bce046b 674
ab2116b1
LP
675 if (fstat(fileno, &st) >= 0 &&
676 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
677 *journal_stream_dev = st.st_dev;
678 *journal_stream_ino = st.st_ino;
679 }
47c1d80d
MS
680 }
681 return r;
4f2d528d
LP
682
683 case EXEC_OUTPUT_SOCKET:
684 assert(socket_fd >= 0);
e75a9ed1 685
eb17e935 686 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 687
52c239d7 688 case EXEC_OUTPUT_NAMED_FD:
e75a9ed1
LP
689 assert(named_iofds[fileno] >= 0);
690
52c239d7
LB
691 (void) fd_nonblock(named_iofds[fileno], false);
692 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
693
2038c3f5
LP
694 case EXEC_OUTPUT_FILE: {
695 bool rw;
696 int fd;
697
698 assert(context->stdio_file[fileno]);
699
700 rw = context->std_input == EXEC_INPUT_FILE &&
701 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
702
703 if (rw)
704 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
705
706 fd = acquire_path(context->stdio_file[fileno], O_WRONLY, 0666 & ~context->umask);
707 if (fd < 0)
708 return fd;
709
710 return move_fd(fd, fileno, false);
711 }
712
94f04347 713 default:
80876c20 714 assert_not_reached("Unknown error type");
94f04347 715 }
071830ff
LP
716}
717
02a51aba
LP
718static int chown_terminal(int fd, uid_t uid) {
719 struct stat st;
720
721 assert(fd >= 0);
02a51aba 722
1ff74fb6
LP
723 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
724 if (isatty(fd) < 1)
725 return 0;
726
02a51aba 727 /* This might fail. What matters are the results. */
bab45044
LP
728 (void) fchown(fd, uid, -1);
729 (void) fchmod(fd, TTY_MODE);
02a51aba
LP
730
731 if (fstat(fd, &st) < 0)
732 return -errno;
733
d8b4e2e9 734 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
02a51aba
LP
735 return -EPERM;
736
737 return 0;
738}
739
7d5ceb64 740static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
741 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
742 int r;
80876c20 743
80876c20
LP
744 assert(_saved_stdin);
745 assert(_saved_stdout);
746
af6da548
LP
747 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
748 if (saved_stdin < 0)
749 return -errno;
80876c20 750
af6da548 751 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
752 if (saved_stdout < 0)
753 return -errno;
80876c20 754
7d5ceb64 755 fd = acquire_terminal(vc, false, false, false, DEFAULT_CONFIRM_USEC);
3d18b167
LP
756 if (fd < 0)
757 return fd;
80876c20 758
af6da548
LP
759 r = chown_terminal(fd, getuid());
760 if (r < 0)
3d18b167 761 return r;
02a51aba 762
3d18b167
LP
763 r = reset_terminal_fd(fd, true);
764 if (r < 0)
765 return r;
80876c20 766
3d18b167
LP
767 if (dup2(fd, STDIN_FILENO) < 0)
768 return -errno;
769
770 if (dup2(fd, STDOUT_FILENO) < 0)
771 return -errno;
80876c20
LP
772
773 if (fd >= 2)
03e334a1 774 safe_close(fd);
3d18b167 775 fd = -1;
80876c20
LP
776
777 *_saved_stdin = saved_stdin;
778 *_saved_stdout = saved_stdout;
779
3d18b167 780 saved_stdin = saved_stdout = -1;
80876c20 781
3d18b167 782 return 0;
80876c20
LP
783}
784
63d77c92 785static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
786 assert(err < 0);
787
788 if (err == -ETIMEDOUT)
63d77c92 789 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
790 else {
791 errno = -err;
63d77c92 792 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
793 }
794}
795
63d77c92 796static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 797 _cleanup_close_ int fd = -1;
80876c20 798
3b20f877 799 assert(vc);
80876c20 800
7d5ceb64 801 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 802 if (fd < 0)
3b20f877 803 return;
80876c20 804
63d77c92 805 write_confirm_error_fd(err, fd, u);
af6da548 806}
80876c20 807
3d18b167 808static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 809 int r = 0;
80876c20 810
af6da548
LP
811 assert(saved_stdin);
812 assert(saved_stdout);
813
814 release_terminal();
815
816 if (*saved_stdin >= 0)
80876c20 817 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 818 r = -errno;
80876c20 819
af6da548 820 if (*saved_stdout >= 0)
80876c20 821 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 822 r = -errno;
80876c20 823
3d18b167
LP
824 *saved_stdin = safe_close(*saved_stdin);
825 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
826
827 return r;
828}
829
3b20f877
FB
830enum {
831 CONFIRM_PRETEND_FAILURE = -1,
832 CONFIRM_PRETEND_SUCCESS = 0,
833 CONFIRM_EXECUTE = 1,
834};
835
eedf223a 836static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 837 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 838 _cleanup_free_ char *e = NULL;
3b20f877 839 char c;
af6da548 840
3b20f877 841 /* For any internal errors, assume a positive response. */
7d5ceb64 842 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 843 if (r < 0) {
63d77c92 844 write_confirm_error(r, vc, u);
3b20f877
FB
845 return CONFIRM_EXECUTE;
846 }
af6da548 847
b0eb2944
FB
848 /* confirm_spawn might have been disabled while we were sleeping. */
849 if (manager_is_confirm_spawn_disabled(u->manager)) {
850 r = 1;
851 goto restore_stdio;
852 }
af6da548 853
2bcd3c26
FB
854 e = ellipsize(cmdline, 60, 100);
855 if (!e) {
856 log_oom();
857 r = CONFIRM_EXECUTE;
858 goto restore_stdio;
859 }
af6da548 860
d172b175 861 for (;;) {
539622bd 862 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 863 if (r < 0) {
63d77c92 864 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
865 r = CONFIRM_EXECUTE;
866 goto restore_stdio;
867 }
af6da548 868
d172b175 869 switch (c) {
b0eb2944
FB
870 case 'c':
871 printf("Resuming normal execution.\n");
872 manager_disable_confirm_spawn();
873 r = 1;
874 break;
dd6f9ac0
FB
875 case 'D':
876 unit_dump(u, stdout, " ");
877 continue; /* ask again */
d172b175
FB
878 case 'f':
879 printf("Failing execution.\n");
880 r = CONFIRM_PRETEND_FAILURE;
881 break;
882 case 'h':
b0eb2944
FB
883 printf(" c - continue, proceed without asking anymore\n"
884 " D - dump, show the state of the unit\n"
dd6f9ac0 885 " f - fail, don't execute the command and pretend it failed\n"
d172b175 886 " h - help\n"
eedf223a 887 " i - info, show a short summary of the unit\n"
56fde33a 888 " j - jobs, show jobs that are in progress\n"
d172b175
FB
889 " s - skip, don't execute the command and pretend it succeeded\n"
890 " y - yes, execute the command\n");
dd6f9ac0 891 continue; /* ask again */
eedf223a
FB
892 case 'i':
893 printf(" Description: %s\n"
894 " Unit: %s\n"
895 " Command: %s\n",
896 u->id, u->description, cmdline);
897 continue; /* ask again */
56fde33a
FB
898 case 'j':
899 manager_dump_jobs(u->manager, stdout, " ");
900 continue; /* ask again */
539622bd
FB
901 case 'n':
902 /* 'n' was removed in favor of 'f'. */
903 printf("Didn't understand 'n', did you mean 'f'?\n");
904 continue; /* ask again */
d172b175
FB
905 case 's':
906 printf("Skipping execution.\n");
907 r = CONFIRM_PRETEND_SUCCESS;
908 break;
909 case 'y':
910 r = CONFIRM_EXECUTE;
911 break;
912 default:
913 assert_not_reached("Unhandled choice");
914 }
3b20f877 915 break;
3b20f877 916 }
af6da548 917
3b20f877 918restore_stdio:
af6da548 919 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 920 return r;
80876c20
LP
921}
922
4d885bd3
DH
923static int get_fixed_user(const ExecContext *c, const char **user,
924 uid_t *uid, gid_t *gid,
925 const char **home, const char **shell) {
81a2b7ce 926 int r;
4d885bd3 927 const char *name;
81a2b7ce 928
4d885bd3 929 assert(c);
81a2b7ce 930
23deef88
LP
931 if (!c->user)
932 return 0;
933
4d885bd3
DH
934 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
935 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 936
23deef88 937 name = c->user;
4d885bd3
DH
938 r = get_user_creds_clean(&name, uid, gid, home, shell);
939 if (r < 0)
940 return r;
81a2b7ce 941
4d885bd3
DH
942 *user = name;
943 return 0;
944}
945
946static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
947 int r;
948 const char *name;
949
950 assert(c);
951
952 if (!c->group)
953 return 0;
954
955 name = c->group;
956 r = get_group_creds(&name, gid);
957 if (r < 0)
958 return r;
959
960 *group = name;
961 return 0;
962}
963
cdc5d5c5
DH
964static int get_supplementary_groups(const ExecContext *c, const char *user,
965 const char *group, gid_t gid,
966 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
967 char **i;
968 int r, k = 0;
969 int ngroups_max;
970 bool keep_groups = false;
971 gid_t *groups = NULL;
972 _cleanup_free_ gid_t *l_gids = NULL;
973
974 assert(c);
975
bbeea271
DH
976 /*
977 * If user is given, then lookup GID and supplementary groups list.
978 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
979 * here and as early as possible so we keep the list of supplementary
980 * groups of the caller.
bbeea271
DH
981 */
982 if (user && gid_is_valid(gid) && gid != 0) {
983 /* First step, initialize groups from /etc/groups */
984 if (initgroups(user, gid) < 0)
985 return -errno;
986
987 keep_groups = true;
988 }
989
ac6e8be6 990 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
991 return 0;
992
366ddd25
DH
993 /*
994 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
995 * be positive, otherwise fail.
996 */
997 errno = 0;
998 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
999 if (ngroups_max <= 0) {
1000 if (errno > 0)
1001 return -errno;
1002 else
1003 return -EOPNOTSUPP; /* For all other values */
1004 }
1005
4d885bd3
DH
1006 l_gids = new(gid_t, ngroups_max);
1007 if (!l_gids)
1008 return -ENOMEM;
81a2b7ce 1009
4d885bd3
DH
1010 if (keep_groups) {
1011 /*
1012 * Lookup the list of groups that the user belongs to, we
1013 * avoid NSS lookups here too for gid=0.
1014 */
1015 k = ngroups_max;
1016 if (getgrouplist(user, gid, l_gids, &k) < 0)
1017 return -EINVAL;
1018 } else
1019 k = 0;
81a2b7ce 1020
4d885bd3
DH
1021 STRV_FOREACH(i, c->supplementary_groups) {
1022 const char *g;
81a2b7ce 1023
4d885bd3
DH
1024 if (k >= ngroups_max)
1025 return -E2BIG;
81a2b7ce 1026
4d885bd3
DH
1027 g = *i;
1028 r = get_group_creds(&g, l_gids+k);
1029 if (r < 0)
1030 return r;
81a2b7ce 1031
4d885bd3
DH
1032 k++;
1033 }
81a2b7ce 1034
4d885bd3
DH
1035 /*
1036 * Sets ngids to zero to drop all supplementary groups, happens
1037 * when we are under root and SupplementaryGroups= is empty.
1038 */
1039 if (k == 0) {
1040 *ngids = 0;
1041 return 0;
1042 }
81a2b7ce 1043
4d885bd3
DH
1044 /* Otherwise get the final list of supplementary groups */
1045 groups = memdup(l_gids, sizeof(gid_t) * k);
1046 if (!groups)
1047 return -ENOMEM;
1048
1049 *supplementary_gids = groups;
1050 *ngids = k;
1051
1052 groups = NULL;
1053
1054 return 0;
1055}
1056
709dbeac 1057static int enforce_groups(gid_t gid, gid_t *supplementary_gids, int ngids) {
4d885bd3
DH
1058 int r;
1059
709dbeac
YW
1060 /* Handle SupplementaryGroups= if it is not empty */
1061 if (ngids > 0) {
4d885bd3
DH
1062 r = maybe_setgroups(ngids, supplementary_gids);
1063 if (r < 0)
97f0e76f 1064 return r;
4d885bd3 1065 }
81a2b7ce 1066
4d885bd3
DH
1067 if (gid_is_valid(gid)) {
1068 /* Then set our gids */
1069 if (setresgid(gid, gid, gid) < 0)
1070 return -errno;
81a2b7ce
LP
1071 }
1072
1073 return 0;
1074}
1075
1076static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
1077 assert(context);
1078
4d885bd3
DH
1079 if (!uid_is_valid(uid))
1080 return 0;
1081
479050b3 1082 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
1083 * capabilities while doing so. */
1084
479050b3 1085 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
1086
1087 /* First step: If we need to keep capabilities but
1088 * drop privileges we need to make sure we keep our
cbb21cca 1089 * caps, while we drop privileges. */
693ced48 1090 if (uid != 0) {
cbb21cca 1091 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1092
1093 if (prctl(PR_GET_SECUREBITS) != sb)
1094 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1095 return -errno;
1096 }
81a2b7ce
LP
1097 }
1098
479050b3 1099 /* Second step: actually set the uids */
81a2b7ce
LP
1100 if (setresuid(uid, uid, uid) < 0)
1101 return -errno;
1102
1103 /* At this point we should have all necessary capabilities but
1104 are otherwise a normal user. However, the caps might got
1105 corrupted due to the setresuid() so we need clean them up
1106 later. This is done outside of this call. */
1107
1108 return 0;
1109}
1110
349cc4a5 1111#if HAVE_PAM
5b6319dc
LP
1112
1113static int null_conv(
1114 int num_msg,
1115 const struct pam_message **msg,
1116 struct pam_response **resp,
1117 void *appdata_ptr) {
1118
1119 /* We don't support conversations */
1120
1121 return PAM_CONV_ERR;
1122}
1123
cefc33ae
LP
1124#endif
1125
5b6319dc
LP
1126static int setup_pam(
1127 const char *name,
1128 const char *user,
940c5210 1129 uid_t uid,
2d6fce8d 1130 gid_t gid,
5b6319dc 1131 const char *tty,
2065ca69 1132 char ***env,
5b6319dc
LP
1133 int fds[], unsigned n_fds) {
1134
349cc4a5 1135#if HAVE_PAM
cefc33ae 1136
5b6319dc
LP
1137 static const struct pam_conv conv = {
1138 .conv = null_conv,
1139 .appdata_ptr = NULL
1140 };
1141
2d7c6aa2 1142 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1143 pam_handle_t *handle = NULL;
d6e5f3ad 1144 sigset_t old_ss;
7bb70b6e 1145 int pam_code = PAM_SUCCESS, r;
84eada2f 1146 char **nv, **e = NULL;
5b6319dc
LP
1147 bool close_session = false;
1148 pid_t pam_pid = 0, parent_pid;
970edce6 1149 int flags = 0;
5b6319dc
LP
1150
1151 assert(name);
1152 assert(user);
2065ca69 1153 assert(env);
5b6319dc
LP
1154
1155 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1156 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1157 * systemd via the cgroup logic. It will then remove the PAM
1158 * session again. The parent process will exec() the actual
1159 * daemon. We do things this way to ensure that the main PID
1160 * of the daemon is the one we initially fork()ed. */
1161
7bb70b6e
LP
1162 r = barrier_create(&barrier);
1163 if (r < 0)
2d7c6aa2
DH
1164 goto fail;
1165
553d2243 1166 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1167 flags |= PAM_SILENT;
1168
f546241b
ZJS
1169 pam_code = pam_start(name, user, &conv, &handle);
1170 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1171 handle = NULL;
1172 goto fail;
1173 }
1174
f546241b
ZJS
1175 if (tty) {
1176 pam_code = pam_set_item(handle, PAM_TTY, tty);
1177 if (pam_code != PAM_SUCCESS)
5b6319dc 1178 goto fail;
f546241b 1179 }
5b6319dc 1180
84eada2f
JW
1181 STRV_FOREACH(nv, *env) {
1182 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1183 if (pam_code != PAM_SUCCESS)
1184 goto fail;
1185 }
1186
970edce6 1187 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1188 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1189 goto fail;
1190
970edce6 1191 pam_code = pam_open_session(handle, flags);
f546241b 1192 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1193 goto fail;
1194
1195 close_session = true;
1196
f546241b
ZJS
1197 e = pam_getenvlist(handle);
1198 if (!e) {
5b6319dc
LP
1199 pam_code = PAM_BUF_ERR;
1200 goto fail;
1201 }
1202
1203 /* Block SIGTERM, so that we know that it won't get lost in
1204 * the child */
ce30c8dc 1205
72c0a2c2 1206 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1207
df0ff127 1208 parent_pid = getpid_cached();
5b6319dc 1209
4c253ed1
LP
1210 r = safe_fork("(sd-pam)", 0, &pam_pid);
1211 if (r < 0)
5b6319dc 1212 goto fail;
4c253ed1 1213 if (r == 0) {
7bb70b6e 1214 int sig, ret = EXIT_PAM;
5b6319dc
LP
1215
1216 /* The child's job is to reset the PAM session on
1217 * termination */
2d7c6aa2 1218 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc 1219
4c253ed1
LP
1220 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1221 * are open here that have been opened by PAM. */
1222 (void) close_many(fds, n_fds);
5b6319dc 1223
940c5210
AK
1224 /* Drop privileges - we don't need any to pam_close_session
1225 * and this will make PR_SET_PDEATHSIG work in most cases.
1226 * If this fails, ignore the error - but expect sd-pam threads
1227 * to fail to exit normally */
2d6fce8d 1228
97f0e76f
LP
1229 r = maybe_setgroups(0, NULL);
1230 if (r < 0)
1231 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1232 if (setresgid(gid, gid, gid) < 0)
1233 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1234 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1235 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1236
ce30c8dc
LP
1237 (void) ignore_signals(SIGPIPE, -1);
1238
940c5210
AK
1239 /* Wait until our parent died. This will only work if
1240 * the above setresuid() succeeds, otherwise the kernel
1241 * will not allow unprivileged parents kill their privileged
1242 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1243 * to do the rest for us. */
1244 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1245 goto child_finish;
1246
2d7c6aa2
DH
1247 /* Tell the parent that our setup is done. This is especially
1248 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1249 * setup might race against our setresuid(2) call.
1250 *
1251 * If the parent aborted, we'll detect this below, hence ignore
1252 * return failure here. */
1253 (void) barrier_place(&barrier);
2d7c6aa2 1254
643f4706 1255 /* Check if our parent process might already have died? */
5b6319dc 1256 if (getppid() == parent_pid) {
d6e5f3ad
DM
1257 sigset_t ss;
1258
1259 assert_se(sigemptyset(&ss) >= 0);
1260 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1261
3dead8d9
LP
1262 for (;;) {
1263 if (sigwait(&ss, &sig) < 0) {
1264 if (errno == EINTR)
1265 continue;
1266
1267 goto child_finish;
1268 }
5b6319dc 1269
3dead8d9
LP
1270 assert(sig == SIGTERM);
1271 break;
1272 }
5b6319dc
LP
1273 }
1274
3dead8d9 1275 /* If our parent died we'll end the session */
f546241b 1276 if (getppid() != parent_pid) {
970edce6 1277 pam_code = pam_close_session(handle, flags);
f546241b 1278 if (pam_code != PAM_SUCCESS)
5b6319dc 1279 goto child_finish;
f546241b 1280 }
5b6319dc 1281
7bb70b6e 1282 ret = 0;
5b6319dc
LP
1283
1284 child_finish:
970edce6 1285 pam_end(handle, pam_code | flags);
7bb70b6e 1286 _exit(ret);
5b6319dc
LP
1287 }
1288
2d7c6aa2
DH
1289 barrier_set_role(&barrier, BARRIER_PARENT);
1290
5b6319dc
LP
1291 /* If the child was forked off successfully it will do all the
1292 * cleanups, so forget about the handle here. */
1293 handle = NULL;
1294
3b8bddde 1295 /* Unblock SIGTERM again in the parent */
72c0a2c2 1296 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1297
1298 /* We close the log explicitly here, since the PAM modules
1299 * might have opened it, but we don't want this fd around. */
1300 closelog();
1301
2d7c6aa2
DH
1302 /* Synchronously wait for the child to initialize. We don't care for
1303 * errors as we cannot recover. However, warn loudly if it happens. */
1304 if (!barrier_place_and_sync(&barrier))
1305 log_error("PAM initialization failed");
1306
2065ca69
JW
1307 strv_free(*env);
1308 *env = e;
aa87e624 1309
5b6319dc
LP
1310 return 0;
1311
1312fail:
970edce6
ZJS
1313 if (pam_code != PAM_SUCCESS) {
1314 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1315 r = -EPERM; /* PAM errors do not map to errno */
1316 } else
1317 log_error_errno(r, "PAM failed: %m");
9ba35398 1318
5b6319dc
LP
1319 if (handle) {
1320 if (close_session)
970edce6 1321 pam_code = pam_close_session(handle, flags);
5b6319dc 1322
970edce6 1323 pam_end(handle, pam_code | flags);
5b6319dc
LP
1324 }
1325
1326 strv_free(e);
5b6319dc
LP
1327 closelog();
1328
7bb70b6e 1329 return r;
cefc33ae
LP
1330#else
1331 return 0;
5b6319dc 1332#endif
cefc33ae 1333}
5b6319dc 1334
5d6b1584
LP
1335static void rename_process_from_path(const char *path) {
1336 char process_name[11];
1337 const char *p;
1338 size_t l;
1339
1340 /* This resulting string must fit in 10 chars (i.e. the length
1341 * of "/sbin/init") to look pretty in /bin/ps */
1342
2b6bf07d 1343 p = basename(path);
5d6b1584
LP
1344 if (isempty(p)) {
1345 rename_process("(...)");
1346 return;
1347 }
1348
1349 l = strlen(p);
1350 if (l > 8) {
1351 /* The end of the process name is usually more
1352 * interesting, since the first bit might just be
1353 * "systemd-" */
1354 p = p + l - 8;
1355 l = 8;
1356 }
1357
1358 process_name[0] = '(';
1359 memcpy(process_name+1, p, l);
1360 process_name[1+l] = ')';
1361 process_name[1+l+1] = 0;
1362
1363 rename_process(process_name);
1364}
1365
469830d1
LP
1366static bool context_has_address_families(const ExecContext *c) {
1367 assert(c);
1368
1369 return c->address_families_whitelist ||
1370 !set_isempty(c->address_families);
1371}
1372
1373static bool context_has_syscall_filters(const ExecContext *c) {
1374 assert(c);
1375
1376 return c->syscall_whitelist ||
8cfa775f 1377 !hashmap_isempty(c->syscall_filter);
469830d1
LP
1378}
1379
1380static bool context_has_no_new_privileges(const ExecContext *c) {
1381 assert(c);
1382
1383 if (c->no_new_privileges)
1384 return true;
1385
1386 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1387 return false;
1388
1389 /* We need NNP if we have any form of seccomp and are unprivileged */
1390 return context_has_address_families(c) ||
1391 c->memory_deny_write_execute ||
1392 c->restrict_realtime ||
1393 exec_context_restrict_namespaces_set(c) ||
1394 c->protect_kernel_tunables ||
1395 c->protect_kernel_modules ||
1396 c->private_devices ||
1397 context_has_syscall_filters(c) ||
78e864e5
TM
1398 !set_isempty(c->syscall_archs) ||
1399 c->lock_personality;
469830d1
LP
1400}
1401
349cc4a5 1402#if HAVE_SECCOMP
17df7223 1403
83f12b27 1404static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1405
1406 if (is_seccomp_available())
1407 return false;
1408
f673b62d 1409 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1410 return true;
83f12b27
FS
1411}
1412
165a31c0 1413static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1414 uint32_t negative_action, default_action, action;
165a31c0 1415 int r;
8351ceae 1416
469830d1 1417 assert(u);
c0467cf3 1418 assert(c);
8351ceae 1419
469830d1 1420 if (!context_has_syscall_filters(c))
83f12b27
FS
1421 return 0;
1422
469830d1
LP
1423 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1424 return 0;
e9642be2 1425
469830d1 1426 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1427
469830d1
LP
1428 if (c->syscall_whitelist) {
1429 default_action = negative_action;
1430 action = SCMP_ACT_ALLOW;
7c66bae2 1431 } else {
469830d1
LP
1432 default_action = SCMP_ACT_ALLOW;
1433 action = negative_action;
57183d11 1434 }
8351ceae 1435
165a31c0
LP
1436 if (needs_ambient_hack) {
1437 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1438 if (r < 0)
1439 return r;
1440 }
1441
469830d1 1442 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
4298d0b5
LP
1443}
1444
469830d1
LP
1445static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1446 assert(u);
4298d0b5
LP
1447 assert(c);
1448
469830d1 1449 if (set_isempty(c->syscall_archs))
83f12b27
FS
1450 return 0;
1451
469830d1
LP
1452 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1453 return 0;
4298d0b5 1454
469830d1
LP
1455 return seccomp_restrict_archs(c->syscall_archs);
1456}
4298d0b5 1457
469830d1
LP
1458static int apply_address_families(const Unit* u, const ExecContext *c) {
1459 assert(u);
1460 assert(c);
4298d0b5 1461
469830d1
LP
1462 if (!context_has_address_families(c))
1463 return 0;
4298d0b5 1464
469830d1
LP
1465 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1466 return 0;
4298d0b5 1467
469830d1 1468 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1469}
4298d0b5 1470
83f12b27 1471static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1472 assert(u);
f3e43635
TM
1473 assert(c);
1474
469830d1 1475 if (!c->memory_deny_write_execute)
83f12b27
FS
1476 return 0;
1477
469830d1
LP
1478 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1479 return 0;
f3e43635 1480
469830d1 1481 return seccomp_memory_deny_write_execute();
f3e43635
TM
1482}
1483
83f12b27 1484static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1485 assert(u);
f4170c67
LP
1486 assert(c);
1487
469830d1 1488 if (!c->restrict_realtime)
83f12b27
FS
1489 return 0;
1490
469830d1
LP
1491 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1492 return 0;
f4170c67 1493
469830d1 1494 return seccomp_restrict_realtime();
f4170c67
LP
1495}
1496
59e856c7 1497static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1498 assert(u);
59eeb84b
LP
1499 assert(c);
1500
1501 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1502 * let's protect even those systems where this is left on in the kernel. */
1503
469830d1 1504 if (!c->protect_kernel_tunables)
59eeb84b
LP
1505 return 0;
1506
469830d1
LP
1507 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1508 return 0;
59eeb84b 1509
469830d1 1510 return seccomp_protect_sysctl();
59eeb84b
LP
1511}
1512
59e856c7 1513static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1514 assert(u);
502d704e
DH
1515 assert(c);
1516
25a8d8a0 1517 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1518
469830d1
LP
1519 if (!c->protect_kernel_modules)
1520 return 0;
1521
502d704e
DH
1522 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1523 return 0;
1524
469830d1 1525 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
502d704e
DH
1526}
1527
59e856c7 1528static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1529 assert(u);
ba128bb8
LP
1530 assert(c);
1531
8f81a5f6 1532 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1533
469830d1
LP
1534 if (!c->private_devices)
1535 return 0;
1536
ba128bb8
LP
1537 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1538 return 0;
1539
469830d1 1540 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
ba128bb8
LP
1541}
1542
add00535 1543static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
469830d1 1544 assert(u);
add00535
LP
1545 assert(c);
1546
1547 if (!exec_context_restrict_namespaces_set(c))
1548 return 0;
1549
1550 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1551 return 0;
1552
1553 return seccomp_restrict_namespaces(c->restrict_namespaces);
1554}
1555
78e864e5 1556static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1557 unsigned long personality;
1558 int r;
78e864e5
TM
1559
1560 assert(u);
1561 assert(c);
1562
1563 if (!c->lock_personality)
1564 return 0;
1565
1566 if (skip_seccomp_unavailable(u, "LockPersonality="))
1567 return 0;
1568
e8132d63
LP
1569 personality = c->personality;
1570
1571 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1572 if (personality == PERSONALITY_INVALID) {
1573
1574 r = opinionated_personality(&personality);
1575 if (r < 0)
1576 return r;
1577 }
78e864e5
TM
1578
1579 return seccomp_lock_personality(personality);
1580}
1581
c0467cf3 1582#endif
8351ceae 1583
31a7eb86
ZJS
1584static void do_idle_pipe_dance(int idle_pipe[4]) {
1585 assert(idle_pipe);
1586
54eb2300
LP
1587 idle_pipe[1] = safe_close(idle_pipe[1]);
1588 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1589
1590 if (idle_pipe[0] >= 0) {
1591 int r;
1592
1593 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1594
1595 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1596 ssize_t n;
1597
31a7eb86 1598 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1599 n = write(idle_pipe[3], "x", 1);
1600 if (n > 0)
cd972d69
ZJS
1601 /* Wait for systemd to react to the signal above. */
1602 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1603 }
1604
54eb2300 1605 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1606
1607 }
1608
54eb2300 1609 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1610}
1611
7cae38c4 1612static int build_environment(
fd63e712 1613 Unit *u,
9fa95f85 1614 const ExecContext *c,
1e22b5cd 1615 const ExecParameters *p,
7cae38c4
LP
1616 unsigned n_fds,
1617 const char *home,
1618 const char *username,
1619 const char *shell,
7bce046b
LP
1620 dev_t journal_stream_dev,
1621 ino_t journal_stream_ino,
7cae38c4
LP
1622 char ***ret) {
1623
1624 _cleanup_strv_free_ char **our_env = NULL;
1625 unsigned n_env = 0;
1626 char *x;
1627
4b58153d 1628 assert(u);
7cae38c4
LP
1629 assert(c);
1630 assert(ret);
1631
4b58153d 1632 our_env = new0(char*, 14);
7cae38c4
LP
1633 if (!our_env)
1634 return -ENOMEM;
1635
1636 if (n_fds > 0) {
8dd4c05b
LP
1637 _cleanup_free_ char *joined = NULL;
1638
df0ff127 1639 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1640 return -ENOMEM;
1641 our_env[n_env++] = x;
1642
1643 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1644 return -ENOMEM;
1645 our_env[n_env++] = x;
8dd4c05b 1646
1e22b5cd 1647 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1648 if (!joined)
1649 return -ENOMEM;
1650
605405c6 1651 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1652 if (!x)
1653 return -ENOMEM;
1654 our_env[n_env++] = x;
7cae38c4
LP
1655 }
1656
b08af3b1 1657 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1658 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1659 return -ENOMEM;
1660 our_env[n_env++] = x;
1661
1e22b5cd 1662 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1663 return -ENOMEM;
1664 our_env[n_env++] = x;
1665 }
1666
fd63e712
LP
1667 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1668 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1669 * check the database directly. */
ac647978 1670 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1671 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1672 if (!x)
1673 return -ENOMEM;
1674 our_env[n_env++] = x;
1675 }
1676
7cae38c4
LP
1677 if (home) {
1678 x = strappend("HOME=", home);
1679 if (!x)
1680 return -ENOMEM;
1681 our_env[n_env++] = x;
1682 }
1683
1684 if (username) {
1685 x = strappend("LOGNAME=", username);
1686 if (!x)
1687 return -ENOMEM;
1688 our_env[n_env++] = x;
1689
1690 x = strappend("USER=", username);
1691 if (!x)
1692 return -ENOMEM;
1693 our_env[n_env++] = x;
1694 }
1695
1696 if (shell) {
1697 x = strappend("SHELL=", shell);
1698 if (!x)
1699 return -ENOMEM;
1700 our_env[n_env++] = x;
1701 }
1702
4b58153d
LP
1703 if (!sd_id128_is_null(u->invocation_id)) {
1704 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1705 return -ENOMEM;
1706
1707 our_env[n_env++] = x;
1708 }
1709
6af760f3
LP
1710 if (exec_context_needs_term(c)) {
1711 const char *tty_path, *term = NULL;
1712
1713 tty_path = exec_context_tty_path(c);
1714
1715 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1716 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1717 * passes to PID 1 ends up all the way in the console login shown. */
1718
1719 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1720 term = getenv("TERM");
1721 if (!term)
1722 term = default_term_for_tty(tty_path);
7cae38c4 1723
6af760f3 1724 x = strappend("TERM=", term);
7cae38c4
LP
1725 if (!x)
1726 return -ENOMEM;
1727 our_env[n_env++] = x;
1728 }
1729
7bce046b
LP
1730 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1731 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1732 return -ENOMEM;
1733
1734 our_env[n_env++] = x;
1735 }
1736
7cae38c4 1737 our_env[n_env++] = NULL;
7bce046b 1738 assert(n_env <= 12);
7cae38c4
LP
1739
1740 *ret = our_env;
1741 our_env = NULL;
1742
1743 return 0;
1744}
1745
b4c14404
FB
1746static int build_pass_environment(const ExecContext *c, char ***ret) {
1747 _cleanup_strv_free_ char **pass_env = NULL;
1748 size_t n_env = 0, n_bufsize = 0;
1749 char **i;
1750
1751 STRV_FOREACH(i, c->pass_environment) {
1752 _cleanup_free_ char *x = NULL;
1753 char *v;
1754
1755 v = getenv(*i);
1756 if (!v)
1757 continue;
605405c6 1758 x = strjoin(*i, "=", v);
b4c14404
FB
1759 if (!x)
1760 return -ENOMEM;
00819cc1 1761
b4c14404
FB
1762 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1763 return -ENOMEM;
00819cc1 1764
b4c14404
FB
1765 pass_env[n_env++] = x;
1766 pass_env[n_env] = NULL;
1767 x = NULL;
1768 }
1769
1770 *ret = pass_env;
1771 pass_env = NULL;
1772
1773 return 0;
1774}
1775
8b44a3d2
LP
1776static bool exec_needs_mount_namespace(
1777 const ExecContext *context,
1778 const ExecParameters *params,
1779 ExecRuntime *runtime) {
1780
1781 assert(context);
1782 assert(params);
1783
915e6d16
LP
1784 if (context->root_image)
1785 return true;
1786
2a624c36
AP
1787 if (!strv_isempty(context->read_write_paths) ||
1788 !strv_isempty(context->read_only_paths) ||
1789 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1790 return true;
1791
42b1d8e0 1792 if (context->n_bind_mounts > 0)
d2d6c096
LP
1793 return true;
1794
8b44a3d2
LP
1795 if (context->mount_flags != 0)
1796 return true;
1797
1798 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1799 return true;
1800
8b44a3d2
LP
1801 if (context->private_devices ||
1802 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1803 context->protect_home != PROTECT_HOME_NO ||
1804 context->protect_kernel_tunables ||
c575770b 1805 context->protect_kernel_modules ||
59eeb84b 1806 context->protect_control_groups)
8b44a3d2
LP
1807 return true;
1808
9c988f93 1809 if (context->mount_apivfs && (context->root_image || context->root_directory))
5d997827
LP
1810 return true;
1811
42b1d8e0
YW
1812 if (context->dynamic_user &&
1813 (!strv_isempty(context->directories[EXEC_DIRECTORY_RUNTIME].paths) ||
1814 !strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
1815 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1816 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1817 return true;
1818
8b44a3d2
LP
1819 return false;
1820}
1821
d251207d
LP
1822static int setup_private_users(uid_t uid, gid_t gid) {
1823 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1824 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1825 _cleanup_close_ int unshare_ready_fd = -1;
1826 _cleanup_(sigkill_waitp) pid_t pid = 0;
1827 uint64_t c = 1;
1828 siginfo_t si;
1829 ssize_t n;
1830 int r;
1831
1832 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1833 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1834 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1835 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1836 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1837 * continues execution normally. */
1838
587ab01b
ZJS
1839 if (uid != 0 && uid_is_valid(uid)) {
1840 r = asprintf(&uid_map,
1841 "0 0 1\n" /* Map root → root */
1842 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1843 uid, uid);
1844 if (r < 0)
1845 return -ENOMEM;
1846 } else {
e0f3720e 1847 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1848 if (!uid_map)
1849 return -ENOMEM;
1850 }
d251207d 1851
587ab01b
ZJS
1852 if (gid != 0 && gid_is_valid(gid)) {
1853 r = asprintf(&gid_map,
1854 "0 0 1\n" /* Map root → root */
1855 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1856 gid, gid);
1857 if (r < 0)
1858 return -ENOMEM;
1859 } else {
d251207d 1860 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1861 if (!gid_map)
1862 return -ENOMEM;
1863 }
d251207d
LP
1864
1865 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1866 * namespace. */
1867 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1868 if (unshare_ready_fd < 0)
1869 return -errno;
1870
1871 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1872 * failed. */
1873 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1874 return -errno;
1875
4c253ed1
LP
1876 r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
1877 if (r < 0)
1878 return r;
1879 if (r == 0) {
d251207d
LP
1880 _cleanup_close_ int fd = -1;
1881 const char *a;
1882 pid_t ppid;
1883
1884 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1885 * here, after the parent opened its own user namespace. */
1886
1887 ppid = getppid();
1888 errno_pipe[0] = safe_close(errno_pipe[0]);
1889
1890 /* Wait until the parent unshared the user namespace */
1891 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1892 r = -errno;
1893 goto child_fail;
1894 }
1895
1896 /* Disable the setgroups() system call in the child user namespace, for good. */
1897 a = procfs_file_alloca(ppid, "setgroups");
1898 fd = open(a, O_WRONLY|O_CLOEXEC);
1899 if (fd < 0) {
1900 if (errno != ENOENT) {
1901 r = -errno;
1902 goto child_fail;
1903 }
1904
1905 /* If the file is missing the kernel is too old, let's continue anyway. */
1906 } else {
1907 if (write(fd, "deny\n", 5) < 0) {
1908 r = -errno;
1909 goto child_fail;
1910 }
1911
1912 fd = safe_close(fd);
1913 }
1914
1915 /* First write the GID map */
1916 a = procfs_file_alloca(ppid, "gid_map");
1917 fd = open(a, O_WRONLY|O_CLOEXEC);
1918 if (fd < 0) {
1919 r = -errno;
1920 goto child_fail;
1921 }
1922 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1923 r = -errno;
1924 goto child_fail;
1925 }
1926 fd = safe_close(fd);
1927
1928 /* The write the UID map */
1929 a = procfs_file_alloca(ppid, "uid_map");
1930 fd = open(a, O_WRONLY|O_CLOEXEC);
1931 if (fd < 0) {
1932 r = -errno;
1933 goto child_fail;
1934 }
1935 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1936 r = -errno;
1937 goto child_fail;
1938 }
1939
1940 _exit(EXIT_SUCCESS);
1941
1942 child_fail:
1943 (void) write(errno_pipe[1], &r, sizeof(r));
1944 _exit(EXIT_FAILURE);
1945 }
1946
1947 errno_pipe[1] = safe_close(errno_pipe[1]);
1948
1949 if (unshare(CLONE_NEWUSER) < 0)
1950 return -errno;
1951
1952 /* Let the child know that the namespace is ready now */
1953 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1954 return -errno;
1955
1956 /* Try to read an error code from the child */
1957 n = read(errno_pipe[0], &r, sizeof(r));
1958 if (n < 0)
1959 return -errno;
1960 if (n == sizeof(r)) { /* an error code was sent to us */
1961 if (r < 0)
1962 return r;
1963 return -EIO;
1964 }
1965 if (n != 0) /* on success we should have read 0 bytes */
1966 return -EIO;
1967
1968 r = wait_for_terminate(pid, &si);
1969 if (r < 0)
1970 return r;
1971 pid = 0;
1972
1973 /* If something strange happened with the child, let's consider this fatal, too */
1974 if (si.si_code != CLD_EXITED || si.si_status != 0)
1975 return -EIO;
1976
1977 return 0;
1978}
1979
3536f49e 1980static int setup_exec_directory(
07689d5d
LP
1981 const ExecContext *context,
1982 const ExecParameters *params,
1983 uid_t uid,
3536f49e 1984 gid_t gid,
3536f49e
YW
1985 ExecDirectoryType type,
1986 int *exit_status) {
07689d5d 1987
72fd1768 1988 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
1989 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
1990 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
1991 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
1992 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
1993 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
1994 };
07689d5d
LP
1995 char **rt;
1996 int r;
1997
1998 assert(context);
1999 assert(params);
72fd1768 2000 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 2001 assert(exit_status);
07689d5d 2002
3536f49e
YW
2003 if (!params->prefix[type])
2004 return 0;
2005
8679efde 2006 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
2007 if (!uid_is_valid(uid))
2008 uid = 0;
2009 if (!gid_is_valid(gid))
2010 gid = 0;
2011 }
2012
2013 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d
LP
2014 _cleanup_free_ char *p = NULL, *pp = NULL;
2015 const char *effective;
07689d5d 2016
3536f49e
YW
2017 p = strjoin(params->prefix[type], "/", *rt);
2018 if (!p) {
2019 r = -ENOMEM;
2020 goto fail;
2021 }
07689d5d 2022
23a7448e
YW
2023 r = mkdir_parents_label(p, 0755);
2024 if (r < 0)
3536f49e 2025 goto fail;
23a7448e 2026
8092a48c
YW
2027 if (context->dynamic_user &&
2028 !IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
6c47cd7d
LP
2029 _cleanup_free_ char *private_root = NULL, *relative = NULL, *parent = NULL;
2030
2031 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
2032 * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
2033 * whose UID is later on reused. To lock this down we use the same trick used by container
2034 * managers to prohibit host users to get access to files of the same UID in containers: we
2035 * place everything inside a directory that has an access mode of 0700 and is owned root:root,
2036 * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
2037 * to make this directory permeable for the service itself.
2038 *
2039 * Specifically: for a service which wants a special directory "foo/" we first create a
2040 * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
2041 * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
2042 * privileged host users can access "foo/" as usual, but unprivileged host users can't look
2043 * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
2044 * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
2045 * disabling the access boundary for the service and making sure it only gets access to the
2046 * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
2047 *
2048 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
8092a48c
YW
2049 * owned by the service itself.
2050 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
2051 * files or sockets with other services. */
6c47cd7d
LP
2052
2053 private_root = strjoin(params->prefix[type], "/private");
2054 if (!private_root) {
2055 r = -ENOMEM;
2056 goto fail;
2057 }
2058
2059 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
c31ad024 2060 r = mkdir_safe_label(private_root, 0700, 0, 0, false);
6c47cd7d
LP
2061 if (r < 0)
2062 goto fail;
2063
2064 pp = strjoin(private_root, "/", *rt);
2065 if (!pp) {
2066 r = -ENOMEM;
2067 goto fail;
2068 }
2069
2070 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2071 r = mkdir_parents_label(pp, 0755);
2072 if (r < 0)
2073 goto fail;
2074
949befd3
LP
2075 if (is_dir(p, false) > 0 &&
2076 (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
2077
2078 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2079 * it over. Most likely the service has been upgraded from one that didn't use
2080 * DynamicUser=1, to one that does. */
2081
2082 if (rename(p, pp) < 0) {
2083 r = -errno;
2084 goto fail;
2085 }
2086 } else {
2087 /* Otherwise, create the actual directory for the service */
2088
2089 r = mkdir_label(pp, context->directories[type].mode);
2090 if (r < 0 && r != -EEXIST)
2091 goto fail;
2092 }
6c47cd7d
LP
2093
2094 parent = dirname_malloc(p);
2095 if (!parent) {
2096 r = -ENOMEM;
2097 goto fail;
2098 }
2099
2100 r = path_make_relative(parent, pp, &relative);
2101 if (r < 0)
2102 goto fail;
2103
2104 /* And link it up from the original place */
2105 r = symlink_idempotent(relative, p);
2106 if (r < 0)
2107 goto fail;
2108
2109 effective = pp;
2110
2111 } else {
2112 r = mkdir_label(p, context->directories[type].mode);
2113 if (r < 0 && r != -EEXIST)
2114 goto fail;
2115
2116 effective = p;
2117 }
a1164ae3
LP
2118
2119 /* First lock down the access mode */
6c47cd7d 2120 if (chmod(effective, context->directories[type].mode) < 0) {
a1164ae3 2121 r = -errno;
3536f49e 2122 goto fail;
a1164ae3 2123 }
07689d5d 2124
c71b2eb7
LP
2125 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
2126 * a service, and shall not be writable. */
2127 if (type == EXEC_DIRECTORY_CONFIGURATION)
2128 continue;
2129
a1164ae3 2130 /* Then, change the ownership of the whole tree, if necessary */
6c47cd7d 2131 r = path_chown_recursive(effective, uid, gid);
07689d5d 2132 if (r < 0)
3536f49e 2133 goto fail;
07689d5d
LP
2134 }
2135
2136 return 0;
3536f49e
YW
2137
2138fail:
2139 *exit_status = exit_status_table[type];
3536f49e 2140 return r;
07689d5d
LP
2141}
2142
92b423b9 2143#if ENABLE_SMACK
cefc33ae
LP
2144static int setup_smack(
2145 const ExecContext *context,
2146 const ExecCommand *command) {
2147
cefc33ae
LP
2148 int r;
2149
2150 assert(context);
2151 assert(command);
2152
cefc33ae
LP
2153 if (context->smack_process_label) {
2154 r = mac_smack_apply_pid(0, context->smack_process_label);
2155 if (r < 0)
2156 return r;
2157 }
2158#ifdef SMACK_DEFAULT_PROCESS_LABEL
2159 else {
2160 _cleanup_free_ char *exec_label = NULL;
2161
2162 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2163 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2164 return r;
2165
2166 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2167 if (r < 0)
2168 return r;
2169 }
cefc33ae
LP
2170#endif
2171
2172 return 0;
2173}
92b423b9 2174#endif
cefc33ae 2175
6c47cd7d
LP
2176static int compile_bind_mounts(
2177 const ExecContext *context,
2178 const ExecParameters *params,
2179 BindMount **ret_bind_mounts,
2180 unsigned *ret_n_bind_mounts,
2181 char ***ret_empty_directories) {
2182
2183 _cleanup_strv_free_ char **empty_directories = NULL;
2184 BindMount *bind_mounts;
2185 unsigned n, h = 0, i;
2186 ExecDirectoryType t;
2187 int r;
2188
2189 assert(context);
2190 assert(params);
2191 assert(ret_bind_mounts);
2192 assert(ret_n_bind_mounts);
2193 assert(ret_empty_directories);
2194
2195 n = context->n_bind_mounts;
2196 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2197 if (!params->prefix[t])
2198 continue;
2199
2200 n += strv_length(context->directories[t].paths);
2201 }
2202
2203 if (n <= 0) {
2204 *ret_bind_mounts = NULL;
2205 *ret_n_bind_mounts = 0;
2206 *ret_empty_directories = NULL;
2207 return 0;
2208 }
2209
2210 bind_mounts = new(BindMount, n);
2211 if (!bind_mounts)
2212 return -ENOMEM;
2213
a8cabc61 2214 for (i = 0; i < context->n_bind_mounts; i++) {
6c47cd7d
LP
2215 BindMount *item = context->bind_mounts + i;
2216 char *s, *d;
2217
2218 s = strdup(item->source);
2219 if (!s) {
2220 r = -ENOMEM;
2221 goto finish;
2222 }
2223
2224 d = strdup(item->destination);
2225 if (!d) {
2226 free(s);
2227 r = -ENOMEM;
2228 goto finish;
2229 }
2230
2231 bind_mounts[h++] = (BindMount) {
2232 .source = s,
2233 .destination = d,
2234 .read_only = item->read_only,
2235 .recursive = item->recursive,
2236 .ignore_enoent = item->ignore_enoent,
2237 };
2238 }
2239
2240 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2241 char **suffix;
2242
2243 if (!params->prefix[t])
2244 continue;
2245
2246 if (strv_isempty(context->directories[t].paths))
2247 continue;
2248
8092a48c
YW
2249 if (context->dynamic_user &&
2250 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
6c47cd7d
LP
2251 char *private_root;
2252
2253 /* So this is for a dynamic user, and we need to make sure the process can access its own
2254 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2255 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2256
2257 private_root = strjoin(params->prefix[t], "/private");
2258 if (!private_root) {
2259 r = -ENOMEM;
2260 goto finish;
2261 }
2262
2263 r = strv_consume(&empty_directories, private_root);
2264 if (r < 0) {
2265 r = -ENOMEM;
2266 goto finish;
2267 }
2268 }
2269
2270 STRV_FOREACH(suffix, context->directories[t].paths) {
2271 char *s, *d;
2272
8092a48c
YW
2273 if (context->dynamic_user &&
2274 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
6c47cd7d
LP
2275 s = strjoin(params->prefix[t], "/private/", *suffix);
2276 else
2277 s = strjoin(params->prefix[t], "/", *suffix);
2278 if (!s) {
2279 r = -ENOMEM;
2280 goto finish;
2281 }
2282
2283 d = strdup(s);
2284 if (!d) {
2285 free(s);
2286 r = -ENOMEM;
2287 goto finish;
2288 }
2289
2290 bind_mounts[h++] = (BindMount) {
2291 .source = s,
2292 .destination = d,
2293 .read_only = false,
2294 .recursive = true,
2295 .ignore_enoent = false,
2296 };
2297 }
2298 }
2299
2300 assert(h == n);
2301
2302 *ret_bind_mounts = bind_mounts;
2303 *ret_n_bind_mounts = n;
2304 *ret_empty_directories = empty_directories;
2305
2306 empty_directories = NULL;
2307
2308 return (int) n;
2309
2310finish:
2311 bind_mount_free_many(bind_mounts, h);
2312 return r;
2313}
2314
6818c54c
LP
2315static int apply_mount_namespace(
2316 Unit *u,
2317 ExecCommand *command,
2318 const ExecContext *context,
2319 const ExecParameters *params,
2320 ExecRuntime *runtime) {
2321
7bcef4ef 2322 _cleanup_strv_free_ char **empty_directories = NULL;
93c6bb51 2323 char *tmp = NULL, *var = NULL;
915e6d16 2324 const char *root_dir = NULL, *root_image = NULL;
bb0ff3fb 2325 NamespaceInfo ns_info = {
af964954 2326 .ignore_protect_paths = false,
93c6bb51
DH
2327 .private_dev = context->private_devices,
2328 .protect_control_groups = context->protect_control_groups,
2329 .protect_kernel_tunables = context->protect_kernel_tunables,
2330 .protect_kernel_modules = context->protect_kernel_modules,
5d997827 2331 .mount_apivfs = context->mount_apivfs,
93c6bb51 2332 };
165a31c0 2333 bool needs_sandboxing;
6c47cd7d
LP
2334 BindMount *bind_mounts = NULL;
2335 unsigned n_bind_mounts = 0;
6818c54c 2336 int r;
93c6bb51 2337
2b3c1b9e
DH
2338 assert(context);
2339
93c6bb51
DH
2340 /* The runtime struct only contains the parent of the private /tmp,
2341 * which is non-accessible to world users. Inside of it there's a /tmp
2342 * that is sticky, and that's the one we want to use here. */
2343
2344 if (context->private_tmp && runtime) {
2345 if (runtime->tmp_dir)
2346 tmp = strjoina(runtime->tmp_dir, "/tmp");
2347 if (runtime->var_tmp_dir)
2348 var = strjoina(runtime->var_tmp_dir, "/tmp");
2349 }
2350
915e6d16
LP
2351 if (params->flags & EXEC_APPLY_CHROOT) {
2352 root_image = context->root_image;
2353
2354 if (!root_image)
2355 root_dir = context->root_directory;
2356 }
93c6bb51 2357
6c47cd7d
LP
2358 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2359 if (r < 0)
2360 return r;
2361
af964954
DH
2362 /*
2363 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2364 * sandbox info, otherwise enforce it, don't ignore protected paths and
2365 * fail if we are enable to apply the sandbox inside the mount namespace.
2366 */
2367 if (!context->dynamic_user && root_dir)
2368 ns_info.ignore_protect_paths = true;
2369
165a31c0 2370 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
6818c54c 2371
915e6d16 2372 r = setup_namespace(root_dir, root_image,
7bcef4ef 2373 &ns_info, context->read_write_paths,
165a31c0
LP
2374 needs_sandboxing ? context->read_only_paths : NULL,
2375 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2376 empty_directories,
2377 bind_mounts,
2378 n_bind_mounts,
93c6bb51
DH
2379 tmp,
2380 var,
165a31c0
LP
2381 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2382 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16
LP
2383 context->mount_flags,
2384 DISSECT_IMAGE_DISCARD_ON_LOOP);
93c6bb51 2385
6c47cd7d
LP
2386 bind_mount_free_many(bind_mounts, n_bind_mounts);
2387
93c6bb51
DH
2388 /* If we couldn't set up the namespace this is probably due to a
2389 * missing capability. In this case, silently proceeed. */
2390 if (IN_SET(r, -EPERM, -EACCES)) {
93c6bb51 2391 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
86ffb325 2392 return 0;
93c6bb51
DH
2393 }
2394
2395 return r;
2396}
2397
915e6d16
LP
2398static int apply_working_directory(
2399 const ExecContext *context,
2400 const ExecParameters *params,
2401 const char *home,
376fecf6
LP
2402 const bool needs_mount_ns,
2403 int *exit_status) {
915e6d16 2404
6732edab 2405 const char *d, *wd;
2b3c1b9e
DH
2406
2407 assert(context);
376fecf6 2408 assert(exit_status);
2b3c1b9e 2409
6732edab
LP
2410 if (context->working_directory_home) {
2411
376fecf6
LP
2412 if (!home) {
2413 *exit_status = EXIT_CHDIR;
6732edab 2414 return -ENXIO;
376fecf6 2415 }
6732edab 2416
2b3c1b9e 2417 wd = home;
6732edab
LP
2418
2419 } else if (context->working_directory)
2b3c1b9e
DH
2420 wd = context->working_directory;
2421 else
2422 wd = "/";
e7f1e7c6
DH
2423
2424 if (params->flags & EXEC_APPLY_CHROOT) {
2425 if (!needs_mount_ns && context->root_directory)
376fecf6
LP
2426 if (chroot(context->root_directory) < 0) {
2427 *exit_status = EXIT_CHROOT;
e7f1e7c6 2428 return -errno;
376fecf6 2429 }
e7f1e7c6 2430
2b3c1b9e
DH
2431 d = wd;
2432 } else
3b0e5bb5 2433 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2434
376fecf6
LP
2435 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2436 *exit_status = EXIT_CHDIR;
2b3c1b9e 2437 return -errno;
376fecf6 2438 }
e7f1e7c6
DH
2439
2440 return 0;
2441}
2442
b1edf445
LP
2443static int setup_keyring(
2444 Unit *u,
2445 const ExecContext *context,
2446 const ExecParameters *p,
2447 uid_t uid, gid_t gid) {
2448
74dd6b51 2449 key_serial_t keyring;
b1edf445 2450 int r;
74dd6b51
LP
2451
2452 assert(u);
b1edf445 2453 assert(context);
74dd6b51
LP
2454 assert(p);
2455
2456 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2457 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2458 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2459 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2460 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2461 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2462
2463 if (!(p->flags & EXEC_NEW_KEYRING))
2464 return 0;
2465
b1edf445
LP
2466 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2467 return 0;
2468
74dd6b51
LP
2469 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2470 if (keyring == -1) {
2471 if (errno == ENOSYS)
8002fb97 2472 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2473 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2474 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2475 else if (errno == EDQUOT)
8002fb97 2476 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2477 else
8002fb97 2478 return log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51
LP
2479
2480 return 0;
2481 }
2482
b3415f5d
LP
2483 /* Populate they keyring with the invocation ID by default. */
2484 if (!sd_id128_is_null(u->invocation_id)) {
2485 key_serial_t key;
2486
2487 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2488 if (key == -1)
8002fb97 2489 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2490 else {
2491 if (keyctl(KEYCTL_SETPERM, key,
2492 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2493 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
8002fb97 2494 return log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2495 }
2496 }
2497
74dd6b51
LP
2498 /* And now, make the keyring owned by the service's user */
2499 if (uid_is_valid(uid) || gid_is_valid(gid))
2500 if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0)
8002fb97 2501 return log_unit_error_errno(u, errno, "Failed to change ownership of session keyring: %m");
74dd6b51 2502
b1edf445
LP
2503 /* When requested link the user keyring into the session keyring. */
2504 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2505 uid_t saved_uid;
2506 gid_t saved_gid;
2507
2508 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things
2509 * set up properly by the kernel. If we don't do that then we can't create it atomically, and that
2510 * sucks for parallel execution. This mimics what pam_keyinit does, too.*/
2511
2512 saved_uid = getuid();
2513 saved_gid = getgid();
2514
2515 if (gid_is_valid(gid) && gid != saved_gid) {
2516 if (setregid(gid, -1) < 0)
8002fb97 2517 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
b1edf445
LP
2518 }
2519
2520 if (uid_is_valid(uid) && uid != saved_uid) {
2521 if (setreuid(uid, -1) < 0) {
2522 (void) setregid(saved_gid, -1);
8002fb97 2523 return log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
b1edf445
LP
2524 }
2525 }
2526
2527 if (keyctl(KEYCTL_LINK,
2528 KEY_SPEC_USER_KEYRING,
2529 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2530
2531 r = -errno;
2532
2533 (void) setreuid(saved_uid, -1);
2534 (void) setregid(saved_gid, -1);
2535
8002fb97 2536 return log_unit_error_errno(u, r, "Failed to link user keyring into session keyring: %m");
b1edf445
LP
2537 }
2538
2539 if (uid_is_valid(uid) && uid != saved_uid) {
2540 if (setreuid(saved_uid, -1) < 0) {
2541 (void) setregid(saved_gid, -1);
8002fb97 2542 return log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
b1edf445
LP
2543 }
2544 }
2545
2546 if (gid_is_valid(gid) && gid != saved_gid) {
2547 if (setregid(saved_gid, -1) < 0)
8002fb97 2548 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
b1edf445 2549 }
61ceaea5 2550 }
b1edf445 2551
74dd6b51
LP
2552 return 0;
2553}
2554
29206d46
LP
2555static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
2556 assert(array);
2557 assert(n);
2558
2559 if (!pair)
2560 return;
2561
2562 if (pair[0] >= 0)
2563 array[(*n)++] = pair[0];
2564 if (pair[1] >= 0)
2565 array[(*n)++] = pair[1];
2566}
2567
a34ceba6
LP
2568static int close_remaining_fds(
2569 const ExecParameters *params,
2570 ExecRuntime *runtime,
29206d46 2571 DynamicCreds *dcreds,
00d9ef85 2572 int user_lookup_fd,
a34ceba6
LP
2573 int socket_fd,
2574 int *fds, unsigned n_fds) {
2575
2576 unsigned n_dont_close = 0;
00d9ef85 2577 int dont_close[n_fds + 12];
a34ceba6
LP
2578
2579 assert(params);
2580
2581 if (params->stdin_fd >= 0)
2582 dont_close[n_dont_close++] = params->stdin_fd;
2583 if (params->stdout_fd >= 0)
2584 dont_close[n_dont_close++] = params->stdout_fd;
2585 if (params->stderr_fd >= 0)
2586 dont_close[n_dont_close++] = params->stderr_fd;
2587
2588 if (socket_fd >= 0)
2589 dont_close[n_dont_close++] = socket_fd;
2590 if (n_fds > 0) {
2591 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2592 n_dont_close += n_fds;
2593 }
2594
29206d46
LP
2595 if (runtime)
2596 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2597
2598 if (dcreds) {
2599 if (dcreds->user)
2600 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2601 if (dcreds->group)
2602 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2603 }
2604
00d9ef85
LP
2605 if (user_lookup_fd >= 0)
2606 dont_close[n_dont_close++] = user_lookup_fd;
2607
a34ceba6
LP
2608 return close_all_fds(dont_close, n_dont_close);
2609}
2610
00d9ef85
LP
2611static int send_user_lookup(
2612 Unit *unit,
2613 int user_lookup_fd,
2614 uid_t uid,
2615 gid_t gid) {
2616
2617 assert(unit);
2618
2619 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2620 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2621 * specified. */
2622
2623 if (user_lookup_fd < 0)
2624 return 0;
2625
2626 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2627 return 0;
2628
2629 if (writev(user_lookup_fd,
2630 (struct iovec[]) {
e6a7ec4b
LP
2631 IOVEC_INIT(&uid, sizeof(uid)),
2632 IOVEC_INIT(&gid, sizeof(gid)),
2633 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2634 return -errno;
2635
2636 return 0;
2637}
2638
6732edab
LP
2639static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2640 int r;
2641
2642 assert(c);
2643 assert(home);
2644 assert(buf);
2645
2646 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2647
2648 if (*home)
2649 return 0;
2650
2651 if (!c->working_directory_home)
2652 return 0;
2653
2654 if (uid == 0) {
2655 /* Hardcode /root as home directory for UID 0 */
2656 *home = "/root";
2657 return 1;
2658 }
2659
2660 r = get_home_dir(buf);
2661 if (r < 0)
2662 return r;
2663
2664 *home = *buf;
2665 return 1;
2666}
2667
da50b85a
LP
2668static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2669 _cleanup_strv_free_ char ** list = NULL;
2670 ExecDirectoryType t;
2671 int r;
2672
2673 assert(c);
2674 assert(p);
2675 assert(ret);
2676
2677 assert(c->dynamic_user);
2678
2679 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2680 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2681 * directories. */
2682
2683 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2684 char **i;
2685
2686 if (t == EXEC_DIRECTORY_CONFIGURATION)
2687 continue;
2688
2689 if (!p->prefix[t])
2690 continue;
2691
2692 STRV_FOREACH(i, c->directories[t].paths) {
2693 char *e;
2694
8092a48c
YW
2695 if (t == EXEC_DIRECTORY_RUNTIME)
2696 e = strjoin(p->prefix[t], "/", *i);
2697 else
2698 e = strjoin(p->prefix[t], "/private/", *i);
da50b85a
LP
2699 if (!e)
2700 return -ENOMEM;
2701
2702 r = strv_consume(&list, e);
2703 if (r < 0)
2704 return r;
2705 }
2706 }
2707
2708 *ret = list;
2709 list = NULL;
2710
2711 return 0;
2712}
2713
ff0af2a1 2714static int exec_child(
f2341e0a 2715 Unit *unit,
ff0af2a1
LP
2716 ExecCommand *command,
2717 const ExecContext *context,
2718 const ExecParameters *params,
2719 ExecRuntime *runtime,
29206d46 2720 DynamicCreds *dcreds,
ff0af2a1
LP
2721 char **argv,
2722 int socket_fd,
52c239d7 2723 int named_iofds[3],
4c47affc
FB
2724 int *fds,
2725 unsigned n_storage_fds,
9b141911 2726 unsigned n_socket_fds,
ff0af2a1 2727 char **files_env,
00d9ef85 2728 int user_lookup_fd,
12145637 2729 int *exit_status) {
d35fbf6b 2730
2065ca69 2731 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
7f59dd35 2732 _cleanup_free_ char *home_buffer = NULL;
4d885bd3
DH
2733 _cleanup_free_ gid_t *supplementary_gids = NULL;
2734 const char *username = NULL, *groupname = NULL;
2b3c1b9e 2735 const char *home = NULL, *shell = NULL;
7bce046b
LP
2736 dev_t journal_stream_dev = 0;
2737 ino_t journal_stream_ino = 0;
165a31c0
LP
2738 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2739 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2740 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2741 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 2742#if HAVE_SELINUX
7f59dd35 2743 _cleanup_free_ char *mac_selinux_context_net = NULL;
43b1f709 2744 bool use_selinux = false;
ecfbc84f 2745#endif
f9fa32f0 2746#if ENABLE_SMACK
43b1f709 2747 bool use_smack = false;
ecfbc84f 2748#endif
349cc4a5 2749#if HAVE_APPARMOR
43b1f709 2750 bool use_apparmor = false;
ecfbc84f 2751#endif
fed1e721
LP
2752 uid_t uid = UID_INVALID;
2753 gid_t gid = GID_INVALID;
4d885bd3 2754 int i, r, ngids = 0;
4c47affc 2755 unsigned n_fds;
3536f49e 2756 ExecDirectoryType dt;
165a31c0 2757 int secure_bits;
034c6ed7 2758
f2341e0a 2759 assert(unit);
5cb5a6ff
LP
2760 assert(command);
2761 assert(context);
d35fbf6b 2762 assert(params);
ff0af2a1 2763 assert(exit_status);
d35fbf6b
DM
2764
2765 rename_process_from_path(command->path);
2766
2767 /* We reset exactly these signals, since they are the
2768 * only ones we set to SIG_IGN in the main daemon. All
2769 * others we leave untouched because we set them to
2770 * SIG_DFL or a valid handler initially, both of which
2771 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2772 (void) default_signals(SIGNALS_CRASH_HANDLER,
2773 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2774
2775 if (context->ignore_sigpipe)
ce30c8dc 2776 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2777
ff0af2a1
LP
2778 r = reset_signal_mask();
2779 if (r < 0) {
2780 *exit_status = EXIT_SIGNAL_MASK;
12145637 2781 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 2782 }
034c6ed7 2783
d35fbf6b
DM
2784 if (params->idle_pipe)
2785 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2786
2c027c62
LP
2787 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2788 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2789 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2790 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 2791
d35fbf6b 2792 log_forget_fds();
2c027c62 2793 log_set_open_when_needed(true);
4f2d528d 2794
40a80078
LP
2795 /* In case anything used libc syslog(), close this here, too */
2796 closelog();
2797
4c47affc 2798 n_fds = n_storage_fds + n_socket_fds;
00d9ef85 2799 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
ff0af2a1
LP
2800 if (r < 0) {
2801 *exit_status = EXIT_FDS;
12145637 2802 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
2803 }
2804
d35fbf6b
DM
2805 if (!context->same_pgrp)
2806 if (setsid() < 0) {
ff0af2a1 2807 *exit_status = EXIT_SETSID;
12145637 2808 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 2809 }
9e2f7c11 2810
1e22b5cd 2811 exec_context_tty_reset(context, params);
d35fbf6b 2812
c891efaf 2813 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 2814 const char *vc = params->confirm_spawn;
3b20f877
FB
2815 _cleanup_free_ char *cmdline = NULL;
2816
2817 cmdline = exec_command_line(argv);
2818 if (!cmdline) {
0460aa5c 2819 *exit_status = EXIT_MEMORY;
12145637 2820 return log_oom();
3b20f877 2821 }
d35fbf6b 2822
eedf223a 2823 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
2824 if (r != CONFIRM_EXECUTE) {
2825 if (r == CONFIRM_PRETEND_SUCCESS) {
2826 *exit_status = EXIT_SUCCESS;
2827 return 0;
2828 }
ff0af2a1 2829 *exit_status = EXIT_CONFIRM;
12145637 2830 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 2831 return -ECANCELED;
d35fbf6b
DM
2832 }
2833 }
1a63a750 2834
29206d46 2835 if (context->dynamic_user && dcreds) {
da50b85a 2836 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 2837
409093fe
LP
2838 /* Make sure we bypass our own NSS module for any NSS checks */
2839 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2840 *exit_status = EXIT_USER;
12145637 2841 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
2842 }
2843
da50b85a
LP
2844 r = compile_suggested_paths(context, params, &suggested_paths);
2845 if (r < 0) {
2846 *exit_status = EXIT_MEMORY;
2847 return log_oom();
2848 }
2849
2850 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
2851 if (r < 0) {
2852 *exit_status = EXIT_USER;
e2b0cc34
YW
2853 if (r == -EILSEQ) {
2854 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
2855 return -EOPNOTSUPP;
2856 }
12145637 2857 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 2858 }
524daa8c 2859
70dd455c 2860 if (!uid_is_valid(uid)) {
29206d46 2861 *exit_status = EXIT_USER;
12145637 2862 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
2863 return -ESRCH;
2864 }
2865
2866 if (!gid_is_valid(gid)) {
2867 *exit_status = EXIT_USER;
12145637 2868 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
2869 return -ESRCH;
2870 }
5bc7452b 2871
29206d46
LP
2872 if (dcreds->user)
2873 username = dcreds->user->name;
2874
2875 } else {
4d885bd3
DH
2876 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2877 if (r < 0) {
2878 *exit_status = EXIT_USER;
12145637 2879 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 2880 }
5bc7452b 2881
4d885bd3
DH
2882 r = get_fixed_group(context, &groupname, &gid);
2883 if (r < 0) {
2884 *exit_status = EXIT_GROUP;
12145637 2885 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 2886 }
cdc5d5c5 2887 }
29206d46 2888
cdc5d5c5
DH
2889 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2890 r = get_supplementary_groups(context, username, groupname, gid,
2891 &supplementary_gids, &ngids);
2892 if (r < 0) {
2893 *exit_status = EXIT_GROUP;
12145637 2894 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 2895 }
5bc7452b 2896
00d9ef85
LP
2897 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2898 if (r < 0) {
2899 *exit_status = EXIT_USER;
12145637 2900 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
2901 }
2902
2903 user_lookup_fd = safe_close(user_lookup_fd);
2904
6732edab
LP
2905 r = acquire_home(context, uid, &home, &home_buffer);
2906 if (r < 0) {
2907 *exit_status = EXIT_CHDIR;
12145637 2908 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
2909 }
2910
d35fbf6b
DM
2911 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2912 * must sure to drop O_NONBLOCK */
2913 if (socket_fd >= 0)
a34ceba6 2914 (void) fd_nonblock(socket_fd, false);
acbb0225 2915
52c239d7 2916 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
2917 if (r < 0) {
2918 *exit_status = EXIT_STDIN;
12145637 2919 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 2920 }
034c6ed7 2921
52c239d7 2922 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2923 if (r < 0) {
2924 *exit_status = EXIT_STDOUT;
12145637 2925 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
2926 }
2927
52c239d7 2928 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2929 if (r < 0) {
2930 *exit_status = EXIT_STDERR;
12145637 2931 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
2932 }
2933
2934 if (params->cgroup_path) {
ff0af2a1
LP
2935 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2936 if (r < 0) {
2937 *exit_status = EXIT_CGROUP;
12145637 2938 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path);
309bff19 2939 }
d35fbf6b 2940 }
309bff19 2941
d35fbf6b 2942 if (context->oom_score_adjust_set) {
d5243d62 2943 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
f2b68789 2944
d5243d62
LP
2945 /* When we can't make this change due to EPERM, then
2946 * let's silently skip over it. User namespaces
2947 * prohibit write access to this file, and we
2948 * shouldn't trip up over that. */
613b411c 2949
d5243d62 2950 sprintf(t, "%i", context->oom_score_adjust);
ad118bda 2951 r = write_string_file("/proc/self/oom_score_adj", t, 0);
12145637 2952 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 2953 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 2954 else if (r < 0) {
ff0af2a1 2955 *exit_status = EXIT_OOM_ADJUST;
12145637 2956 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 2957 }
d35fbf6b
DM
2958 }
2959
2960 if (context->nice_set)
2961 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 2962 *exit_status = EXIT_NICE;
12145637 2963 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
613b411c
LP
2964 }
2965
d35fbf6b
DM
2966 if (context->cpu_sched_set) {
2967 struct sched_param param = {
2968 .sched_priority = context->cpu_sched_priority,
2969 };
2970
ff0af2a1
LP
2971 r = sched_setscheduler(0,
2972 context->cpu_sched_policy |
2973 (context->cpu_sched_reset_on_fork ?
2974 SCHED_RESET_ON_FORK : 0),
2975 &param);
2976 if (r < 0) {
2977 *exit_status = EXIT_SETSCHEDULER;
12145637 2978 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 2979 }
d35fbf6b 2980 }
fc9b2a84 2981
d35fbf6b
DM
2982 if (context->cpuset)
2983 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
ff0af2a1 2984 *exit_status = EXIT_CPUAFFINITY;
12145637 2985 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7
LP
2986 }
2987
d35fbf6b
DM
2988 if (context->ioprio_set)
2989 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 2990 *exit_status = EXIT_IOPRIO;
12145637 2991 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 2992 }
da726a4d 2993
d35fbf6b
DM
2994 if (context->timer_slack_nsec != NSEC_INFINITY)
2995 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 2996 *exit_status = EXIT_TIMERSLACK;
12145637 2997 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 2998 }
9eba9da4 2999
21022b9d
LP
3000 if (context->personality != PERSONALITY_INVALID) {
3001 r = safe_personality(context->personality);
3002 if (r < 0) {
ff0af2a1 3003 *exit_status = EXIT_PERSONALITY;
12145637 3004 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 3005 }
21022b9d 3006 }
94f04347 3007
d35fbf6b 3008 if (context->utmp_id)
df0ff127 3009 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 3010 context->tty_path,
023a4f67
LP
3011 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3012 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3013 USER_PROCESS,
6a93917d 3014 username);
d35fbf6b 3015
e0d2adfd 3016 if (context->user) {
ff0af2a1
LP
3017 r = chown_terminal(STDIN_FILENO, uid);
3018 if (r < 0) {
3019 *exit_status = EXIT_STDIN;
12145637 3020 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 3021 }
d35fbf6b 3022 }
8e274523 3023
62b9bb26
LP
3024 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroupsv1
3025 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
3026 * safe. On cgroupsv2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
3027 * touch a single hierarchy too. */
584b8688 3028 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
62b9bb26 3029 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
ff0af2a1
LP
3030 if (r < 0) {
3031 *exit_status = EXIT_CGROUP;
12145637 3032 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 3033 }
d35fbf6b 3034 }
034c6ed7 3035
72fd1768 3036 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 3037 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
3038 if (r < 0)
3039 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 3040 }
94f04347 3041
7bce046b 3042 r = build_environment(
fd63e712 3043 unit,
7bce046b
LP
3044 context,
3045 params,
3046 n_fds,
3047 home,
3048 username,
3049 shell,
3050 journal_stream_dev,
3051 journal_stream_ino,
3052 &our_env);
2065ca69
JW
3053 if (r < 0) {
3054 *exit_status = EXIT_MEMORY;
12145637 3055 return log_oom();
2065ca69
JW
3056 }
3057
3058 r = build_pass_environment(context, &pass_env);
3059 if (r < 0) {
3060 *exit_status = EXIT_MEMORY;
12145637 3061 return log_oom();
2065ca69
JW
3062 }
3063
3064 accum_env = strv_env_merge(5,
3065 params->environment,
3066 our_env,
3067 pass_env,
3068 context->environment,
3069 files_env,
3070 NULL);
3071 if (!accum_env) {
3072 *exit_status = EXIT_MEMORY;
12145637 3073 return log_oom();
2065ca69 3074 }
1280503b 3075 accum_env = strv_env_clean(accum_env);
2065ca69 3076
096424d1 3077 (void) umask(context->umask);
b213e1c1 3078
b1edf445 3079 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
3080 if (r < 0) {
3081 *exit_status = EXIT_KEYRING;
12145637 3082 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
3083 }
3084
165a31c0 3085 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 3086 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 3087
165a31c0
LP
3088 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3089 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 3090
165a31c0
LP
3091 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3092 if (needs_ambient_hack)
3093 needs_setuid = false;
3094 else
3095 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3096
3097 if (needs_sandboxing) {
7f18ef0a
FK
3098 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3099 * present. The actual MAC context application will happen later, as late as possible, to avoid
3100 * impacting our own code paths. */
3101
349cc4a5 3102#if HAVE_SELINUX
43b1f709 3103 use_selinux = mac_selinux_use();
7f18ef0a 3104#endif
f9fa32f0 3105#if ENABLE_SMACK
43b1f709 3106 use_smack = mac_smack_use();
7f18ef0a 3107#endif
349cc4a5 3108#if HAVE_APPARMOR
43b1f709 3109 use_apparmor = mac_apparmor_use();
7f18ef0a 3110#endif
165a31c0 3111 }
7f18ef0a 3112
165a31c0
LP
3113 if (needs_setuid) {
3114 if (context->pam_name && username) {
3115 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3116 if (r < 0) {
3117 *exit_status = EXIT_PAM;
12145637 3118 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0
LP
3119 }
3120 }
b213e1c1 3121 }
ac45f971 3122
d35fbf6b 3123 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
6e2d7c4f
MS
3124 if (ns_type_supported(NAMESPACE_NET)) {
3125 r = setup_netns(runtime->netns_storage_socket);
3126 if (r < 0) {
3127 *exit_status = EXIT_NETWORK;
3128 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3129 }
3130 } else
3131 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3132 }
169c1bda 3133
ee818b89 3134 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3135 if (needs_mount_namespace) {
6818c54c 3136 r = apply_mount_namespace(unit, command, context, params, runtime);
3fbe8dbe
LP
3137 if (r < 0) {
3138 *exit_status = EXIT_NAMESPACE;
12145637 3139 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing: %m");
3fbe8dbe 3140 }
d35fbf6b 3141 }
81a2b7ce 3142
50b3dfb9 3143 /* Apply just after mount namespace setup */
376fecf6 3144 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
12145637
LP
3145 if (r < 0)
3146 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
50b3dfb9 3147
bbeea271 3148 /* Drop groups as early as possbile */
165a31c0 3149 if (needs_setuid) {
709dbeac 3150 r = enforce_groups(gid, supplementary_gids, ngids);
096424d1
LP
3151 if (r < 0) {
3152 *exit_status = EXIT_GROUP;
12145637 3153 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3154 }
165a31c0 3155 }
096424d1 3156
165a31c0 3157 if (needs_sandboxing) {
349cc4a5 3158#if HAVE_SELINUX
43b1f709 3159 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
3160 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3161 if (r < 0) {
3162 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3163 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
937ccce9 3164 }
9008e1ac 3165 }
9008e1ac
MS
3166#endif
3167
937ccce9
LP
3168 if (context->private_users) {
3169 r = setup_private_users(uid, gid);
3170 if (r < 0) {
3171 *exit_status = EXIT_USER;
12145637 3172 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
937ccce9 3173 }
d251207d
LP
3174 }
3175 }
3176
165a31c0
LP
3177 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
3178 * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
3179 * was needed to upload the policy and can now be closed as well. */
ff0af2a1
LP
3180 r = close_all_fds(fds, n_fds);
3181 if (r >= 0)
3182 r = shift_fds(fds, n_fds);
3183 if (r >= 0)
4c47affc 3184 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
ff0af2a1
LP
3185 if (r < 0) {
3186 *exit_status = EXIT_FDS;
12145637 3187 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3188 }
e66cf1a3 3189
165a31c0 3190 secure_bits = context->secure_bits;
e66cf1a3 3191
165a31c0
LP
3192 if (needs_sandboxing) {
3193 uint64_t bset;
755d4b67 3194
d35fbf6b 3195 for (i = 0; i < _RLIMIT_MAX; i++) {
03857c43 3196
d35fbf6b
DM
3197 if (!context->rlimit[i])
3198 continue;
3199
03857c43
LP
3200 r = setrlimit_closest(i, context->rlimit[i]);
3201 if (r < 0) {
ff0af2a1 3202 *exit_status = EXIT_LIMITS;
12145637 3203 return log_unit_error_errno(unit, r, "Failed to adjust resource limit %s: %m", rlimit_to_string(i));
e66cf1a3
LP
3204 }
3205 }
3206
f4170c67
LP
3207 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
3208 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3209 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3210 *exit_status = EXIT_LIMITS;
12145637 3211 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3212 }
3213 }
3214
37ac2744
JB
3215#if ENABLE_SMACK
3216 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3217 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3218 if (use_smack) {
3219 r = setup_smack(context, command);
3220 if (r < 0) {
3221 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3222 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3223 }
3224 }
3225#endif
3226
165a31c0
LP
3227 bset = context->capability_bounding_set;
3228 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3229 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3230 * instead of us doing that */
3231 if (needs_ambient_hack)
3232 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3233 (UINT64_C(1) << CAP_SETUID) |
3234 (UINT64_C(1) << CAP_SETGID);
3235
3236 if (!cap_test_all(bset)) {
3237 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3238 if (r < 0) {
3239 *exit_status = EXIT_CAPABILITIES;
12145637 3240 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3241 }
4c2630eb 3242 }
3b8bddde 3243
755d4b67
IP
3244 /* This is done before enforce_user, but ambient set
3245 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
3246 if (!needs_ambient_hack &&
3247 context->capability_ambient_set != 0) {
755d4b67
IP
3248 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3249 if (r < 0) {
3250 *exit_status = EXIT_CAPABILITIES;
12145637 3251 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3252 }
755d4b67 3253 }
165a31c0 3254 }
755d4b67 3255
165a31c0 3256 if (needs_setuid) {
d35fbf6b 3257 if (context->user) {
ff0af2a1
LP
3258 r = enforce_user(context, uid);
3259 if (r < 0) {
3260 *exit_status = EXIT_USER;
12145637 3261 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3262 }
165a31c0
LP
3263
3264 if (!needs_ambient_hack &&
3265 context->capability_ambient_set != 0) {
755d4b67
IP
3266
3267 /* Fix the ambient capabilities after user change. */
3268 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3269 if (r < 0) {
3270 *exit_status = EXIT_CAPABILITIES;
12145637 3271 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3272 }
3273
3274 /* If we were asked to change user and ambient capabilities
3275 * were requested, we had to add keep-caps to the securebits
3276 * so that we would maintain the inherited capability set
3277 * through the setresuid(). Make sure that the bit is added
3278 * also to the context secure_bits so that we don't try to
3279 * drop the bit away next. */
3280
7f508f2c 3281 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3282 }
5b6319dc 3283 }
165a31c0 3284 }
d35fbf6b 3285
165a31c0 3286 if (needs_sandboxing) {
37ac2744 3287 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
5cd9cd35
LP
3288 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3289 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3290 * are restricted. */
3291
349cc4a5 3292#if HAVE_SELINUX
43b1f709 3293 if (use_selinux) {
5cd9cd35
LP
3294 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3295
3296 if (exec_context) {
3297 r = setexeccon(exec_context);
3298 if (r < 0) {
3299 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3300 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3301 }
3302 }
3303 }
3304#endif
3305
349cc4a5 3306#if HAVE_APPARMOR
43b1f709 3307 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3308 r = aa_change_onexec(context->apparmor_profile);
3309 if (r < 0 && !context->apparmor_profile_ignore) {
3310 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3311 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3312 }
3313 }
3314#endif
3315
165a31c0
LP
3316 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3317 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3318 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3319 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3320 *exit_status = EXIT_SECUREBITS;
12145637 3321 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3322 }
5b6319dc 3323
59eeb84b 3324 if (context_has_no_new_privileges(context))
d35fbf6b 3325 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3326 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3327 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3328 }
3329
349cc4a5 3330#if HAVE_SECCOMP
469830d1
LP
3331 r = apply_address_families(unit, context);
3332 if (r < 0) {
3333 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3334 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3335 }
04aa0cb9 3336
469830d1
LP
3337 r = apply_memory_deny_write_execute(unit, context);
3338 if (r < 0) {
3339 *exit_status = EXIT_SECCOMP;
12145637 3340 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3341 }
f4170c67 3342
469830d1
LP
3343 r = apply_restrict_realtime(unit, context);
3344 if (r < 0) {
3345 *exit_status = EXIT_SECCOMP;
12145637 3346 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3347 }
3348
add00535
LP
3349 r = apply_restrict_namespaces(unit, context);
3350 if (r < 0) {
3351 *exit_status = EXIT_SECCOMP;
12145637 3352 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3353 }
3354
469830d1
LP
3355 r = apply_protect_sysctl(unit, context);
3356 if (r < 0) {
3357 *exit_status = EXIT_SECCOMP;
12145637 3358 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3359 }
3360
469830d1
LP
3361 r = apply_protect_kernel_modules(unit, context);
3362 if (r < 0) {
3363 *exit_status = EXIT_SECCOMP;
12145637 3364 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3365 }
3366
469830d1
LP
3367 r = apply_private_devices(unit, context);
3368 if (r < 0) {
3369 *exit_status = EXIT_SECCOMP;
12145637 3370 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3371 }
3372
3373 r = apply_syscall_archs(unit, context);
3374 if (r < 0) {
3375 *exit_status = EXIT_SECCOMP;
12145637 3376 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3377 }
3378
78e864e5
TM
3379 r = apply_lock_personality(unit, context);
3380 if (r < 0) {
3381 *exit_status = EXIT_SECCOMP;
12145637 3382 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3383 }
3384
5cd9cd35
LP
3385 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3386 * by the filter as little as possible. */
165a31c0 3387 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3388 if (r < 0) {
3389 *exit_status = EXIT_SECCOMP;
12145637 3390 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3391 }
3392#endif
d35fbf6b 3393 }
034c6ed7 3394
00819cc1
LP
3395 if (!strv_isempty(context->unset_environment)) {
3396 char **ee = NULL;
3397
3398 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3399 if (!ee) {
3400 *exit_status = EXIT_MEMORY;
12145637 3401 return log_oom();
00819cc1
LP
3402 }
3403
3404 strv_free(accum_env);
3405 accum_env = ee;
3406 }
3407
2065ca69 3408 final_argv = replace_env_argv(argv, accum_env);
d35fbf6b 3409 if (!final_argv) {
ff0af2a1 3410 *exit_status = EXIT_MEMORY;
12145637 3411 return log_oom();
d35fbf6b 3412 }
034c6ed7 3413
f1d34068 3414 if (DEBUG_LOGGING) {
d35fbf6b 3415 _cleanup_free_ char *line;
81a2b7ce 3416
d35fbf6b
DM
3417 line = exec_command_line(final_argv);
3418 if (line) {
f2341e0a 3419 log_struct(LOG_DEBUG,
f2341e0a
LP
3420 "EXECUTABLE=%s", command->path,
3421 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3422 LOG_UNIT_ID(unit),
f1c50bec 3423 LOG_UNIT_INVOCATION_ID(unit),
f2341e0a 3424 NULL);
d35fbf6b
DM
3425 }
3426 }
dd305ec9 3427
2065ca69 3428 execve(command->path, final_argv, accum_env);
12145637
LP
3429
3430 if (errno == ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3431
3432 log_struct_errno(LOG_INFO, errno,
3433 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3434 LOG_UNIT_ID(unit),
3435 LOG_UNIT_INVOCATION_ID(unit),
3436 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3437 command->path),
3438 "EXECUTABLE=%s", command->path,
3439 NULL);
3440
3441 return 0;
3442 }
3443
ff0af2a1 3444 *exit_status = EXIT_EXEC;
12145637 3445 return log_unit_error_errno(unit, errno, "Failed to execute command: %m");
d35fbf6b 3446}
81a2b7ce 3447
f2341e0a
LP
3448int exec_spawn(Unit *unit,
3449 ExecCommand *command,
d35fbf6b
DM
3450 const ExecContext *context,
3451 const ExecParameters *params,
3452 ExecRuntime *runtime,
29206d46 3453 DynamicCreds *dcreds,
d35fbf6b 3454 pid_t *ret) {
8351ceae 3455
d35fbf6b 3456 _cleanup_strv_free_ char **files_env = NULL;
9b141911 3457 int *fds = NULL;
4c47affc 3458 unsigned n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1
LP
3459 _cleanup_free_ char *line = NULL;
3460 int socket_fd, r;
52c239d7 3461 int named_iofds[3] = { -1, -1, -1 };
ff0af2a1 3462 char **argv;
d35fbf6b 3463 pid_t pid;
8351ceae 3464
f2341e0a 3465 assert(unit);
d35fbf6b
DM
3466 assert(command);
3467 assert(context);
3468 assert(ret);
3469 assert(params);
4c47affc 3470 assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
4298d0b5 3471
d35fbf6b
DM
3472 if (context->std_input == EXEC_INPUT_SOCKET ||
3473 context->std_output == EXEC_OUTPUT_SOCKET ||
3474 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3475
4c47affc 3476 if (params->n_socket_fds > 1) {
f2341e0a 3477 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3478 return -EINVAL;
ff0af2a1 3479 }
eef65bf3 3480
4c47affc 3481 if (params->n_socket_fds == 0) {
488ab41c
AA
3482 log_unit_error(unit, "Got no socket.");
3483 return -EINVAL;
3484 }
3485
d35fbf6b
DM
3486 socket_fd = params->fds[0];
3487 } else {
3488 socket_fd = -1;
3489 fds = params->fds;
4c47affc 3490 n_storage_fds = params->n_storage_fds;
9b141911 3491 n_socket_fds = params->n_socket_fds;
d35fbf6b 3492 }
94f04347 3493
52c239d7
LB
3494 r = exec_context_named_iofds(unit, context, params, named_iofds);
3495 if (r < 0)
3496 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3497
f2341e0a 3498 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3499 if (r < 0)
f2341e0a 3500 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3501
d35fbf6b 3502 argv = params->argv ?: command->argv;
d35fbf6b
DM
3503 line = exec_command_line(argv);
3504 if (!line)
3505 return log_oom();
fab56fc5 3506
f2341e0a 3507 log_struct(LOG_DEBUG,
f2341e0a
LP
3508 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3509 "EXECUTABLE=%s", command->path,
ba360bb0 3510 LOG_UNIT_ID(unit),
f1c50bec 3511 LOG_UNIT_INVOCATION_ID(unit),
f2341e0a 3512 NULL);
12145637 3513
d35fbf6b
DM
3514 pid = fork();
3515 if (pid < 0)
74129a12 3516 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3517
3518 if (pid == 0) {
12145637 3519 int exit_status = EXIT_SUCCESS;
ff0af2a1 3520
f2341e0a
LP
3521 r = exec_child(unit,
3522 command,
ff0af2a1
LP
3523 context,
3524 params,
3525 runtime,
29206d46 3526 dcreds,
ff0af2a1
LP
3527 argv,
3528 socket_fd,
52c239d7 3529 named_iofds,
4c47affc
FB
3530 fds,
3531 n_storage_fds,
9b141911 3532 n_socket_fds,
ff0af2a1 3533 files_env,
00d9ef85 3534 unit->manager->user_lookup_fds[1],
12145637
LP
3535 &exit_status);
3536
ff0af2a1 3537 if (r < 0) {
12145637
LP
3538 log_struct_errno(LOG_ERR, r,
3539 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3540 LOG_UNIT_ID(unit),
3541 LOG_UNIT_INVOCATION_ID(unit),
3542 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3543 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3544 command->path),
3545 "EXECUTABLE=%s", command->path,
3546 NULL);
4c2630eb
MS
3547 }
3548
ff0af2a1 3549 _exit(exit_status);
034c6ed7
LP
3550 }
3551
f2341e0a 3552 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3553
80876c20
LP
3554 /* We add the new process to the cgroup both in the child (so
3555 * that we can be sure that no user code is ever executed
3556 * outside of the cgroup) and in the parent (so that we can be
3557 * sure that when we kill the cgroup the process will be
3558 * killed too). */
d35fbf6b 3559 if (params->cgroup_path)
dd305ec9 3560 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
2da3263a 3561
b58b4116 3562 exec_status_start(&command->exec_status, pid);
9fb86720 3563
034c6ed7 3564 *ret = pid;
5cb5a6ff
LP
3565 return 0;
3566}
3567
034c6ed7 3568void exec_context_init(ExecContext *c) {
3536f49e
YW
3569 ExecDirectoryType i;
3570
034c6ed7
LP
3571 assert(c);
3572
4c12626c 3573 c->umask = 0022;
9eba9da4 3574 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3575 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3576 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3577 c->syslog_level_prefix = true;
353e12c2 3578 c->ignore_sigpipe = true;
3a43da28 3579 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3580 c->personality = PERSONALITY_INVALID;
72fd1768 3581 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3582 c->directories[i].mode = 0755;
a103496c 3583 c->capability_bounding_set = CAP_ALL;
add00535 3584 c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
d3070fbd 3585 c->log_level_max = -1;
034c6ed7
LP
3586}
3587
613b411c 3588void exec_context_done(ExecContext *c) {
3536f49e 3589 ExecDirectoryType i;
d3070fbd 3590 size_t l;
5cb5a6ff
LP
3591
3592 assert(c);
3593
6796073e
LP
3594 c->environment = strv_free(c->environment);
3595 c->environment_files = strv_free(c->environment_files);
b4c14404 3596 c->pass_environment = strv_free(c->pass_environment);
00819cc1 3597 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 3598
1f6b4113 3599 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
a1e58e8e 3600 c->rlimit[l] = mfree(c->rlimit[l]);
034c6ed7 3601
2038c3f5 3602 for (l = 0; l < 3; l++) {
52c239d7 3603 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
2038c3f5
LP
3604 c->stdio_file[l] = mfree(c->stdio_file[l]);
3605 }
52c239d7 3606
a1e58e8e
LP
3607 c->working_directory = mfree(c->working_directory);
3608 c->root_directory = mfree(c->root_directory);
915e6d16 3609 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3610 c->tty_path = mfree(c->tty_path);
3611 c->syslog_identifier = mfree(c->syslog_identifier);
3612 c->user = mfree(c->user);
3613 c->group = mfree(c->group);
034c6ed7 3614
6796073e 3615 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3616
a1e58e8e 3617 c->pam_name = mfree(c->pam_name);
5b6319dc 3618
2a624c36
AP
3619 c->read_only_paths = strv_free(c->read_only_paths);
3620 c->read_write_paths = strv_free(c->read_write_paths);
3621 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3622
d2d6c096
LP
3623 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
3624
da681e1b 3625 c->cpuset = cpu_set_mfree(c->cpuset);
86a3475b 3626
a1e58e8e
LP
3627 c->utmp_id = mfree(c->utmp_id);
3628 c->selinux_context = mfree(c->selinux_context);
3629 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3630 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3631
8cfa775f 3632 c->syscall_filter = hashmap_free(c->syscall_filter);
525d3cc7
LP
3633 c->syscall_archs = set_free(c->syscall_archs);
3634 c->address_families = set_free(c->address_families);
e66cf1a3 3635
72fd1768 3636 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3637 c->directories[i].paths = strv_free(c->directories[i].paths);
d3070fbd
LP
3638
3639 c->log_level_max = -1;
3640
3641 exec_context_free_log_extra_fields(c);
08f3be7a
LP
3642
3643 c->stdin_data = mfree(c->stdin_data);
3644 c->stdin_data_size = 0;
e66cf1a3
LP
3645}
3646
3647int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
3648 char **i;
3649
3650 assert(c);
3651
3652 if (!runtime_prefix)
3653 return 0;
3654
3536f49e 3655 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
3656 _cleanup_free_ char *p;
3657
605405c6 3658 p = strjoin(runtime_prefix, "/", *i);
e66cf1a3
LP
3659 if (!p)
3660 return -ENOMEM;
3661
6c47cd7d 3662 /* We execute this synchronously, since we need to be sure this is gone when we start the service
e66cf1a3 3663 * next. */
c6878637 3664 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
3665 }
3666
3667 return 0;
5cb5a6ff
LP
3668}
3669
43d0fcbd
LP
3670void exec_command_done(ExecCommand *c) {
3671 assert(c);
3672
a1e58e8e 3673 c->path = mfree(c->path);
43d0fcbd 3674
6796073e 3675 c->argv = strv_free(c->argv);
43d0fcbd
LP
3676}
3677
3678void exec_command_done_array(ExecCommand *c, unsigned n) {
3679 unsigned i;
3680
3681 for (i = 0; i < n; i++)
3682 exec_command_done(c+i);
3683}
3684
f1acf85a 3685ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
3686 ExecCommand *i;
3687
3688 while ((i = c)) {
71fda00f 3689 LIST_REMOVE(command, c, i);
43d0fcbd 3690 exec_command_done(i);
5cb5a6ff
LP
3691 free(i);
3692 }
f1acf85a
ZJS
3693
3694 return NULL;
5cb5a6ff
LP
3695}
3696
034c6ed7
LP
3697void exec_command_free_array(ExecCommand **c, unsigned n) {
3698 unsigned i;
3699
f1acf85a
ZJS
3700 for (i = 0; i < n; i++)
3701 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
3702}
3703
039f0e70 3704typedef struct InvalidEnvInfo {
f2341e0a 3705 Unit *unit;
039f0e70
LP
3706 const char *path;
3707} InvalidEnvInfo;
3708
3709static void invalid_env(const char *p, void *userdata) {
3710 InvalidEnvInfo *info = userdata;
3711
f2341e0a 3712 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
3713}
3714
52c239d7
LB
3715const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3716 assert(c);
3717
3718 switch (fd_index) {
5073ff6b 3719
52c239d7
LB
3720 case STDIN_FILENO:
3721 if (c->std_input != EXEC_INPUT_NAMED_FD)
3722 return NULL;
5073ff6b 3723
52c239d7 3724 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
5073ff6b 3725
52c239d7
LB
3726 case STDOUT_FILENO:
3727 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3728 return NULL;
5073ff6b 3729
52c239d7 3730 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
5073ff6b 3731
52c239d7
LB
3732 case STDERR_FILENO:
3733 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3734 return NULL;
5073ff6b 3735
52c239d7 3736 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
5073ff6b 3737
52c239d7
LB
3738 default:
3739 return NULL;
3740 }
3741}
3742
3743int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
3744 unsigned i, targets;
56fbd561 3745 const char* stdio_fdname[3];
4c47affc 3746 unsigned n_fds;
52c239d7
LB
3747
3748 assert(c);
3749 assert(p);
3750
3751 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3752 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3753 (c->std_error == EXEC_OUTPUT_NAMED_FD);
3754
3755 for (i = 0; i < 3; i++)
3756 stdio_fdname[i] = exec_context_fdname(c, i);
3757
4c47affc
FB
3758 n_fds = p->n_storage_fds + p->n_socket_fds;
3759
3760 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
3761 if (named_iofds[STDIN_FILENO] < 0 &&
3762 c->std_input == EXEC_INPUT_NAMED_FD &&
3763 stdio_fdname[STDIN_FILENO] &&
3764 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3765
52c239d7
LB
3766 named_iofds[STDIN_FILENO] = p->fds[i];
3767 targets--;
56fbd561
ZJS
3768
3769 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3770 c->std_output == EXEC_OUTPUT_NAMED_FD &&
3771 stdio_fdname[STDOUT_FILENO] &&
3772 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3773
52c239d7
LB
3774 named_iofds[STDOUT_FILENO] = p->fds[i];
3775 targets--;
56fbd561
ZJS
3776
3777 } else if (named_iofds[STDERR_FILENO] < 0 &&
3778 c->std_error == EXEC_OUTPUT_NAMED_FD &&
3779 stdio_fdname[STDERR_FILENO] &&
3780 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3781
52c239d7
LB
3782 named_iofds[STDERR_FILENO] = p->fds[i];
3783 targets--;
3784 }
3785
56fbd561 3786 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
3787}
3788
f2341e0a 3789int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
3790 char **i, **r = NULL;
3791
3792 assert(c);
3793 assert(l);
3794
3795 STRV_FOREACH(i, c->environment_files) {
3796 char *fn;
52511fae
ZJS
3797 int k;
3798 unsigned n;
8c7be95e
LP
3799 bool ignore = false;
3800 char **p;
7fd1b19b 3801 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
3802
3803 fn = *i;
3804
3805 if (fn[0] == '-') {
3806 ignore = true;
313cefa1 3807 fn++;
8c7be95e
LP
3808 }
3809
3810 if (!path_is_absolute(fn)) {
8c7be95e
LP
3811 if (ignore)
3812 continue;
3813
3814 strv_free(r);
3815 return -EINVAL;
3816 }
3817
2bef10ab 3818 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
3819 k = safe_glob(fn, 0, &pglob);
3820 if (k < 0) {
2bef10ab
PL
3821 if (ignore)
3822 continue;
8c7be95e 3823
2bef10ab 3824 strv_free(r);
d8c92e8b 3825 return k;
2bef10ab 3826 }
8c7be95e 3827
d8c92e8b
ZJS
3828 /* When we don't match anything, -ENOENT should be returned */
3829 assert(pglob.gl_pathc > 0);
3830
3831 for (n = 0; n < pglob.gl_pathc; n++) {
717603e3 3832 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2bef10ab
PL
3833 if (k < 0) {
3834 if (ignore)
3835 continue;
8c7be95e 3836
2bef10ab 3837 strv_free(r);
2bef10ab 3838 return k;
e9c1ea9d 3839 }
ebc05a09 3840 /* Log invalid environment variables with filename */
039f0e70
LP
3841 if (p) {
3842 InvalidEnvInfo info = {
f2341e0a 3843 .unit = unit,
039f0e70
LP
3844 .path = pglob.gl_pathv[n]
3845 };
3846
3847 p = strv_env_clean_with_callback(p, invalid_env, &info);
3848 }
8c7be95e 3849
234519ae 3850 if (!r)
2bef10ab
PL
3851 r = p;
3852 else {
3853 char **m;
8c7be95e 3854
2bef10ab
PL
3855 m = strv_env_merge(2, r, p);
3856 strv_free(r);
3857 strv_free(p);
c84a9488 3858 if (!m)
2bef10ab 3859 return -ENOMEM;
2bef10ab
PL
3860
3861 r = m;
3862 }
8c7be95e
LP
3863 }
3864 }
3865
3866 *l = r;
3867
3868 return 0;
3869}
3870
6ac8fdc9 3871static bool tty_may_match_dev_console(const char *tty) {
e1d75803 3872 _cleanup_free_ char *active = NULL;
7d6884b6 3873 char *console;
6ac8fdc9 3874
1e22b5cd
LP
3875 if (!tty)
3876 return true;
3877
a119ec7c 3878 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
3879
3880 /* trivial identity? */
3881 if (streq(tty, "console"))
3882 return true;
3883
3884 console = resolve_dev_console(&active);
3885 /* if we could not resolve, assume it may */
3886 if (!console)
3887 return true;
3888
3889 /* "tty0" means the active VC, so it may be the same sometimes */
e1d75803 3890 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
3891}
3892
3893bool exec_context_may_touch_console(ExecContext *ec) {
1e22b5cd
LP
3894
3895 return (ec->tty_reset ||
3896 ec->tty_vhangup ||
3897 ec->tty_vt_disallocate ||
6ac8fdc9
MS
3898 is_terminal_input(ec->std_input) ||
3899 is_terminal_output(ec->std_output) ||
3900 is_terminal_output(ec->std_error)) &&
1e22b5cd 3901 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
3902}
3903
15ae422b
LP
3904static void strv_fprintf(FILE *f, char **l) {
3905 char **g;
3906
3907 assert(f);
3908
3909 STRV_FOREACH(g, l)
3910 fprintf(f, " %s", *g);
3911}
3912
5cb5a6ff 3913void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
d3070fbd 3914 ExecDirectoryType dt;
c2bbd90b 3915 char **e, **d;
94f04347 3916 unsigned i;
add00535 3917 int r;
9eba9da4 3918
5cb5a6ff
LP
3919 assert(c);
3920 assert(f);
3921
4ad49000 3922 prefix = strempty(prefix);
5cb5a6ff
LP
3923
3924 fprintf(f,
94f04347
LP
3925 "%sUMask: %04o\n"
3926 "%sWorkingDirectory: %s\n"
451a074f 3927 "%sRootDirectory: %s\n"
15ae422b 3928 "%sNonBlocking: %s\n"
64747e2d 3929 "%sPrivateTmp: %s\n"
7f112f50 3930 "%sPrivateDevices: %s\n"
59eeb84b 3931 "%sProtectKernelTunables: %s\n"
e66a2f65 3932 "%sProtectKernelModules: %s\n"
59eeb84b 3933 "%sProtectControlGroups: %s\n"
d251207d
LP
3934 "%sPrivateNetwork: %s\n"
3935 "%sPrivateUsers: %s\n"
1b8689f9
LP
3936 "%sProtectHome: %s\n"
3937 "%sProtectSystem: %s\n"
5d997827 3938 "%sMountAPIVFS: %s\n"
f3e43635 3939 "%sIgnoreSIGPIPE: %s\n"
f4170c67 3940 "%sMemoryDenyWriteExecute: %s\n"
b1edf445
LP
3941 "%sRestrictRealtime: %s\n"
3942 "%sKeyringMode: %s\n",
5cb5a6ff 3943 prefix, c->umask,
9eba9da4 3944 prefix, c->working_directory ? c->working_directory : "/",
451a074f 3945 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 3946 prefix, yes_no(c->non_blocking),
64747e2d 3947 prefix, yes_no(c->private_tmp),
7f112f50 3948 prefix, yes_no(c->private_devices),
59eeb84b 3949 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 3950 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 3951 prefix, yes_no(c->protect_control_groups),
d251207d
LP
3952 prefix, yes_no(c->private_network),
3953 prefix, yes_no(c->private_users),
1b8689f9
LP
3954 prefix, protect_home_to_string(c->protect_home),
3955 prefix, protect_system_to_string(c->protect_system),
5d997827 3956 prefix, yes_no(c->mount_apivfs),
f3e43635 3957 prefix, yes_no(c->ignore_sigpipe),
f4170c67 3958 prefix, yes_no(c->memory_deny_write_execute),
b1edf445
LP
3959 prefix, yes_no(c->restrict_realtime),
3960 prefix, exec_keyring_mode_to_string(c->keyring_mode));
fb33a393 3961
915e6d16
LP
3962 if (c->root_image)
3963 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
3964
8c7be95e
LP
3965 STRV_FOREACH(e, c->environment)
3966 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3967
3968 STRV_FOREACH(e, c->environment_files)
3969 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 3970
b4c14404
FB
3971 STRV_FOREACH(e, c->pass_environment)
3972 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3973
00819cc1
LP
3974 STRV_FOREACH(e, c->unset_environment)
3975 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
3976
53f47dfc
YW
3977 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
3978
72fd1768 3979 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
3980 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
3981
3982 STRV_FOREACH(d, c->directories[dt].paths)
3983 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
3984 }
c2bbd90b 3985
fb33a393
LP
3986 if (c->nice_set)
3987 fprintf(f,
3988 "%sNice: %i\n",
3989 prefix, c->nice);
3990
dd6c17b1 3991 if (c->oom_score_adjust_set)
fb33a393 3992 fprintf(f,
dd6c17b1
LP
3993 "%sOOMScoreAdjust: %i\n",
3994 prefix, c->oom_score_adjust);
9eba9da4 3995
94f04347 3996 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d
EV
3997 if (c->rlimit[i]) {
3998 fprintf(f, "%s%s: " RLIM_FMT "\n",
3999 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
4000 fprintf(f, "%s%sSoft: " RLIM_FMT "\n",
4001 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
4002 }
94f04347 4003
f8b69d1d 4004 if (c->ioprio_set) {
1756a011 4005 _cleanup_free_ char *class_str = NULL;
f8b69d1d 4006
837df140
YW
4007 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
4008 if (r >= 0)
4009 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
4010
4011 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 4012 }
94f04347 4013
f8b69d1d 4014 if (c->cpu_sched_set) {
1756a011 4015 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 4016
837df140
YW
4017 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4018 if (r >= 0)
4019 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4020
94f04347 4021 fprintf(f,
38b48754
LP
4022 "%sCPUSchedulingPriority: %i\n"
4023 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
4024 prefix, c->cpu_sched_priority,
4025 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 4026 }
94f04347 4027
82c121a4 4028 if (c->cpuset) {
94f04347 4029 fprintf(f, "%sCPUAffinity:", prefix);
82c121a4
LP
4030 for (i = 0; i < c->cpuset_ncpus; i++)
4031 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
43a99a7a 4032 fprintf(f, " %u", i);
94f04347
LP
4033 fputs("\n", f);
4034 }
4035
3a43da28 4036 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 4037 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
4038
4039 fprintf(f,
80876c20
LP
4040 "%sStandardInput: %s\n"
4041 "%sStandardOutput: %s\n"
4042 "%sStandardError: %s\n",
4043 prefix, exec_input_to_string(c->std_input),
4044 prefix, exec_output_to_string(c->std_output),
4045 prefix, exec_output_to_string(c->std_error));
4046
befc4a80
LP
4047 if (c->std_input == EXEC_INPUT_NAMED_FD)
4048 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4049 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4050 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4051 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4052 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4053
4054 if (c->std_input == EXEC_INPUT_FILE)
4055 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4056 if (c->std_output == EXEC_OUTPUT_FILE)
4057 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
4058 if (c->std_error == EXEC_OUTPUT_FILE)
4059 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
4060
80876c20
LP
4061 if (c->tty_path)
4062 fprintf(f,
6ea832a2
LP
4063 "%sTTYPath: %s\n"
4064 "%sTTYReset: %s\n"
4065 "%sTTYVHangup: %s\n"
4066 "%sTTYVTDisallocate: %s\n",
4067 prefix, c->tty_path,
4068 prefix, yes_no(c->tty_reset),
4069 prefix, yes_no(c->tty_vhangup),
4070 prefix, yes_no(c->tty_vt_disallocate));
94f04347 4071
9f6444eb
LP
4072 if (IN_SET(c->std_output,
4073 EXEC_OUTPUT_SYSLOG,
4074 EXEC_OUTPUT_KMSG,
4075 EXEC_OUTPUT_JOURNAL,
4076 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4077 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4078 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4079 IN_SET(c->std_error,
4080 EXEC_OUTPUT_SYSLOG,
4081 EXEC_OUTPUT_KMSG,
4082 EXEC_OUTPUT_JOURNAL,
4083 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4084 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4085 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 4086
5ce70e5b 4087 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 4088
837df140
YW
4089 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4090 if (r >= 0)
4091 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 4092
837df140
YW
4093 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4094 if (r >= 0)
4095 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 4096 }
94f04347 4097
d3070fbd
LP
4098 if (c->log_level_max >= 0) {
4099 _cleanup_free_ char *t = NULL;
4100
4101 (void) log_level_to_string_alloc(c->log_level_max, &t);
4102
4103 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4104 }
4105
4106 if (c->n_log_extra_fields > 0) {
4107 size_t j;
4108
4109 for (j = 0; j < c->n_log_extra_fields; j++) {
4110 fprintf(f, "%sLogExtraFields: ", prefix);
4111 fwrite(c->log_extra_fields[j].iov_base,
4112 1, c->log_extra_fields[j].iov_len,
4113 f);
4114 fputc('\n', f);
4115 }
4116 }
4117
07d46372
YW
4118 if (c->secure_bits) {
4119 _cleanup_free_ char *str = NULL;
4120
4121 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4122 if (r >= 0)
4123 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4124 }
94f04347 4125
a103496c 4126 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4127 _cleanup_free_ char *str = NULL;
94f04347 4128
dd1f5bd0
YW
4129 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4130 if (r >= 0)
4131 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4132 }
4133
4134 if (c->capability_ambient_set != 0) {
dd1f5bd0 4135 _cleanup_free_ char *str = NULL;
755d4b67 4136
dd1f5bd0
YW
4137 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4138 if (r >= 0)
4139 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4140 }
4141
4142 if (c->user)
f2d3769a 4143 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4144 if (c->group)
f2d3769a 4145 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4146
29206d46
LP
4147 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4148
ac6e8be6 4149 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4150 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4151 strv_fprintf(f, c->supplementary_groups);
4152 fputs("\n", f);
4153 }
94f04347 4154
5b6319dc 4155 if (c->pam_name)
f2d3769a 4156 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4157
58629001 4158 if (!strv_isempty(c->read_write_paths)) {
2a624c36
AP
4159 fprintf(f, "%sReadWritePaths:", prefix);
4160 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4161 fputs("\n", f);
4162 }
4163
58629001 4164 if (!strv_isempty(c->read_only_paths)) {
2a624c36
AP
4165 fprintf(f, "%sReadOnlyPaths:", prefix);
4166 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4167 fputs("\n", f);
4168 }
94f04347 4169
58629001 4170 if (!strv_isempty(c->inaccessible_paths)) {
2a624c36
AP
4171 fprintf(f, "%sInaccessiblePaths:", prefix);
4172 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4173 fputs("\n", f);
4174 }
2e22afe9 4175
d2d6c096
LP
4176 if (c->n_bind_mounts > 0)
4177 for (i = 0; i < c->n_bind_mounts; i++) {
4178 fprintf(f, "%s%s: %s:%s:%s\n", prefix,
4179 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4180 c->bind_mounts[i].source,
4181 c->bind_mounts[i].destination,
4182 c->bind_mounts[i].recursive ? "rbind" : "norbind");
4183 }
4184
169c1bda
LP
4185 if (c->utmp_id)
4186 fprintf(f,
4187 "%sUtmpIdentifier: %s\n",
4188 prefix, c->utmp_id);
7b52a628
MS
4189
4190 if (c->selinux_context)
4191 fprintf(f,
5f8640fb
LP
4192 "%sSELinuxContext: %s%s\n",
4193 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4194
80c21aea
WC
4195 if (c->apparmor_profile)
4196 fprintf(f,
4197 "%sAppArmorProfile: %s%s\n",
4198 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4199
4200 if (c->smack_process_label)
4201 fprintf(f,
4202 "%sSmackProcessLabel: %s%s\n",
4203 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4204
050f7277 4205 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4206 fprintf(f,
4207 "%sPersonality: %s\n",
4208 prefix, strna(personality_to_string(c->personality)));
4209
78e864e5
TM
4210 fprintf(f,
4211 "%sLockPersonality: %s\n",
4212 prefix, yes_no(c->lock_personality));
4213
17df7223 4214 if (c->syscall_filter) {
349cc4a5 4215#if HAVE_SECCOMP
17df7223 4216 Iterator j;
8cfa775f 4217 void *id, *val;
17df7223 4218 bool first = true;
351a19b1 4219#endif
17df7223
LP
4220
4221 fprintf(f,
57183d11 4222 "%sSystemCallFilter: ",
17df7223
LP
4223 prefix);
4224
4225 if (!c->syscall_whitelist)
4226 fputc('~', f);
4227
349cc4a5 4228#if HAVE_SECCOMP
8cfa775f 4229 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
17df7223 4230 _cleanup_free_ char *name = NULL;
8cfa775f
YW
4231 const char *errno_name = NULL;
4232 int num = PTR_TO_INT(val);
17df7223
LP
4233
4234 if (first)
4235 first = false;
4236 else
4237 fputc(' ', f);
4238
57183d11 4239 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223 4240 fputs(strna(name), f);
8cfa775f
YW
4241
4242 if (num >= 0) {
4243 errno_name = errno_to_name(num);
4244 if (errno_name)
4245 fprintf(f, ":%s", errno_name);
4246 else
4247 fprintf(f, ":%d", num);
4248 }
17df7223 4249 }
351a19b1 4250#endif
17df7223
LP
4251
4252 fputc('\n', f);
4253 }
4254
57183d11 4255 if (c->syscall_archs) {
349cc4a5 4256#if HAVE_SECCOMP
57183d11
LP
4257 Iterator j;
4258 void *id;
4259#endif
4260
4261 fprintf(f,
4262 "%sSystemCallArchitectures:",
4263 prefix);
4264
349cc4a5 4265#if HAVE_SECCOMP
57183d11
LP
4266 SET_FOREACH(id, c->syscall_archs, j)
4267 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4268#endif
4269 fputc('\n', f);
4270 }
4271
add00535
LP
4272 if (exec_context_restrict_namespaces_set(c)) {
4273 _cleanup_free_ char *s = NULL;
4274
4275 r = namespace_flag_to_string_many(c->restrict_namespaces, &s);
4276 if (r >= 0)
4277 fprintf(f, "%sRestrictNamespaces: %s\n",
4278 prefix, s);
4279 }
4280
3df90f24
YW
4281 if (c->syscall_errno > 0) {
4282 const char *errno_name;
4283
4284 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4285
4286 errno_name = errno_to_name(c->syscall_errno);
4287 if (errno_name)
4288 fprintf(f, "%s\n", errno_name);
4289 else
4290 fprintf(f, "%d\n", c->syscall_errno);
4291 }
eef65bf3
MS
4292
4293 if (c->apparmor_profile)
4294 fprintf(f,
4295 "%sAppArmorProfile: %s%s\n",
4296 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
5cb5a6ff
LP
4297}
4298
a931ad47
LP
4299bool exec_context_maintains_privileges(ExecContext *c) {
4300 assert(c);
4301
61233823 4302 /* Returns true if the process forked off would run under
a931ad47
LP
4303 * an unchanged UID or as root. */
4304
4305 if (!c->user)
4306 return true;
4307
4308 if (streq(c->user, "root") || streq(c->user, "0"))
4309 return true;
4310
4311 return false;
4312}
4313
7f452159
LP
4314int exec_context_get_effective_ioprio(ExecContext *c) {
4315 int p;
4316
4317 assert(c);
4318
4319 if (c->ioprio_set)
4320 return c->ioprio;
4321
4322 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4323 if (p < 0)
4324 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4325
4326 return p;
4327}
4328
d3070fbd
LP
4329void exec_context_free_log_extra_fields(ExecContext *c) {
4330 size_t l;
4331
4332 assert(c);
4333
4334 for (l = 0; l < c->n_log_extra_fields; l++)
4335 free(c->log_extra_fields[l].iov_base);
4336 c->log_extra_fields = mfree(c->log_extra_fields);
4337 c->n_log_extra_fields = 0;
4338}
4339
b58b4116 4340void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 4341 assert(s);
5cb5a6ff 4342
b58b4116
LP
4343 zero(*s);
4344 s->pid = pid;
4345 dual_timestamp_get(&s->start_timestamp);
4346}
4347
6ea832a2 4348void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
4349 assert(s);
4350
0b1f4ae6 4351 if (s->pid && s->pid != pid)
b58b4116
LP
4352 zero(*s);
4353
034c6ed7 4354 s->pid = pid;
63983207 4355 dual_timestamp_get(&s->exit_timestamp);
9fb86720 4356
034c6ed7
LP
4357 s->code = code;
4358 s->status = status;
169c1bda 4359
6ea832a2
LP
4360 if (context) {
4361 if (context->utmp_id)
4362 utmp_put_dead_process(context->utmp_id, pid, code, status);
4363
1e22b5cd 4364 exec_context_tty_reset(context, NULL);
6ea832a2 4365 }
9fb86720
LP
4366}
4367
4368void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
4369 char buf[FORMAT_TIMESTAMP_MAX];
4370
4371 assert(s);
4372 assert(f);
4373
9fb86720
LP
4374 if (s->pid <= 0)
4375 return;
4376
4c940960
LP
4377 prefix = strempty(prefix);
4378
9fb86720 4379 fprintf(f,
ccd06097
ZJS
4380 "%sPID: "PID_FMT"\n",
4381 prefix, s->pid);
9fb86720 4382
af9d16e1 4383 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
4384 fprintf(f,
4385 "%sStart Timestamp: %s\n",
63983207 4386 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 4387
af9d16e1 4388 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
4389 fprintf(f,
4390 "%sExit Timestamp: %s\n"
4391 "%sExit Code: %s\n"
4392 "%sExit Status: %i\n",
63983207 4393 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
4394 prefix, sigchld_code_to_string(s->code),
4395 prefix, s->status);
5cb5a6ff 4396}
44d8db9e 4397
9e2f7c11 4398char *exec_command_line(char **argv) {
44d8db9e
LP
4399 size_t k;
4400 char *n, *p, **a;
4401 bool first = true;
4402
9e2f7c11 4403 assert(argv);
44d8db9e 4404
9164977d 4405 k = 1;
9e2f7c11 4406 STRV_FOREACH(a, argv)
44d8db9e
LP
4407 k += strlen(*a)+3;
4408
5cd9cd35
LP
4409 n = new(char, k);
4410 if (!n)
44d8db9e
LP
4411 return NULL;
4412
4413 p = n;
9e2f7c11 4414 STRV_FOREACH(a, argv) {
44d8db9e
LP
4415
4416 if (!first)
4417 *(p++) = ' ';
4418 else
4419 first = false;
4420
4421 if (strpbrk(*a, WHITESPACE)) {
4422 *(p++) = '\'';
4423 p = stpcpy(p, *a);
4424 *(p++) = '\'';
4425 } else
4426 p = stpcpy(p, *a);
4427
4428 }
4429
9164977d
LP
4430 *p = 0;
4431
44d8db9e
LP
4432 /* FIXME: this doesn't really handle arguments that have
4433 * spaces and ticks in them */
4434
4435 return n;
4436}
4437
4438void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 4439 _cleanup_free_ char *cmd = NULL;
4c940960 4440 const char *prefix2;
44d8db9e
LP
4441
4442 assert(c);
4443 assert(f);
4444
4c940960 4445 prefix = strempty(prefix);
63c372cb 4446 prefix2 = strjoina(prefix, "\t");
44d8db9e 4447
9e2f7c11 4448 cmd = exec_command_line(c->argv);
44d8db9e
LP
4449 fprintf(f,
4450 "%sCommand Line: %s\n",
4451 prefix, cmd ? cmd : strerror(ENOMEM));
4452
9fb86720 4453 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
4454}
4455
4456void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4457 assert(f);
4458
4c940960 4459 prefix = strempty(prefix);
44d8db9e
LP
4460
4461 LIST_FOREACH(command, c, c)
4462 exec_command_dump(c, f, prefix);
4463}
94f04347 4464
a6a80b4f
LP
4465void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4466 ExecCommand *end;
4467
4468 assert(l);
4469 assert(e);
4470
4471 if (*l) {
35b8ca3a 4472 /* It's kind of important, that we keep the order here */
71fda00f
LP
4473 LIST_FIND_TAIL(command, *l, end);
4474 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
4475 } else
4476 *l = e;
4477}
4478
26fd040d
LP
4479int exec_command_set(ExecCommand *c, const char *path, ...) {
4480 va_list ap;
4481 char **l, *p;
4482
4483 assert(c);
4484 assert(path);
4485
4486 va_start(ap, path);
4487 l = strv_new_ap(path, ap);
4488 va_end(ap);
4489
4490 if (!l)
4491 return -ENOMEM;
4492
250a918d
LP
4493 p = strdup(path);
4494 if (!p) {
26fd040d
LP
4495 strv_free(l);
4496 return -ENOMEM;
4497 }
4498
4499 free(c->path);
4500 c->path = p;
4501
4502 strv_free(c->argv);
4503 c->argv = l;
4504
4505 return 0;
4506}
4507
86b23b07 4508int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 4509 _cleanup_strv_free_ char **l = NULL;
86b23b07 4510 va_list ap;
86b23b07
JS
4511 int r;
4512
4513 assert(c);
4514 assert(path);
4515
4516 va_start(ap, path);
4517 l = strv_new_ap(path, ap);
4518 va_end(ap);
4519
4520 if (!l)
4521 return -ENOMEM;
4522
e287086b 4523 r = strv_extend_strv(&c->argv, l, false);
e63ff941 4524 if (r < 0)
86b23b07 4525 return r;
86b23b07
JS
4526
4527 return 0;
4528}
4529
4530
613b411c
LP
4531static int exec_runtime_allocate(ExecRuntime **rt) {
4532
4533 if (*rt)
4534 return 0;
4535
4536 *rt = new0(ExecRuntime, 1);
f146f5e1 4537 if (!*rt)
613b411c
LP
4538 return -ENOMEM;
4539
4540 (*rt)->n_ref = 1;
4541 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
4542
4543 return 0;
4544}
4545
4546int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
4547 int r;
4548
4549 assert(rt);
4550 assert(c);
4551 assert(id);
4552
4553 if (*rt)
4554 return 1;
4555
4556 if (!c->private_network && !c->private_tmp)
4557 return 0;
4558
4559 r = exec_runtime_allocate(rt);
4560 if (r < 0)
4561 return r;
4562
4563 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
33df919d 4564 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
613b411c
LP
4565 return -errno;
4566 }
4567
4568 if (c->private_tmp && !(*rt)->tmp_dir) {
4569 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
4570 if (r < 0)
4571 return r;
4572 }
4573
4574 return 1;
4575}
4576
4577ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
4578 assert(r);
4579 assert(r->n_ref > 0);
4580
4581 r->n_ref++;
4582 return r;
4583}
4584
4585ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
4586
4587 if (!r)
4588 return NULL;
4589
4590 assert(r->n_ref > 0);
4591
4592 r->n_ref--;
f2341e0a
LP
4593 if (r->n_ref > 0)
4594 return NULL;
4595
4596 free(r->tmp_dir);
4597 free(r->var_tmp_dir);
4598 safe_close_pair(r->netns_storage_socket);
6b430fdb 4599 return mfree(r);
613b411c
LP
4600}
4601
f2341e0a 4602int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
613b411c
LP
4603 assert(u);
4604 assert(f);
4605 assert(fds);
4606
4607 if (!rt)
4608 return 0;
4609
4610 if (rt->tmp_dir)
4611 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
4612
4613 if (rt->var_tmp_dir)
4614 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
4615
4616 if (rt->netns_storage_socket[0] >= 0) {
4617 int copy;
4618
4619 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4620 if (copy < 0)
4621 return copy;
4622
4623 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
4624 }
4625
4626 if (rt->netns_storage_socket[1] >= 0) {
4627 int copy;
4628
4629 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4630 if (copy < 0)
4631 return copy;
4632
4633 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
4634 }
4635
4636 return 0;
4637}
4638
f2341e0a 4639int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
613b411c
LP
4640 int r;
4641
4642 assert(rt);
4643 assert(key);
4644 assert(value);
4645
4646 if (streq(key, "tmp-dir")) {
4647 char *copy;
4648
4649 r = exec_runtime_allocate(rt);
4650 if (r < 0)
f2341e0a 4651 return log_oom();
613b411c
LP
4652
4653 copy = strdup(value);
4654 if (!copy)
4655 return log_oom();
4656
4657 free((*rt)->tmp_dir);
4658 (*rt)->tmp_dir = copy;
4659
4660 } else if (streq(key, "var-tmp-dir")) {
4661 char *copy;
4662
4663 r = exec_runtime_allocate(rt);
4664 if (r < 0)
f2341e0a 4665 return log_oom();
613b411c
LP
4666
4667 copy = strdup(value);
4668 if (!copy)
4669 return log_oom();
4670
4671 free((*rt)->var_tmp_dir);
4672 (*rt)->var_tmp_dir = copy;
4673
4674 } else if (streq(key, "netns-socket-0")) {
4675 int fd;
4676
4677 r = exec_runtime_allocate(rt);
4678 if (r < 0)
f2341e0a 4679 return log_oom();
613b411c
LP
4680
4681 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 4682 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 4683 else {
03e334a1 4684 safe_close((*rt)->netns_storage_socket[0]);
613b411c
LP
4685 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
4686 }
4687 } else if (streq(key, "netns-socket-1")) {
4688 int fd;
4689
4690 r = exec_runtime_allocate(rt);
4691 if (r < 0)
f2341e0a 4692 return log_oom();
613b411c
LP
4693
4694 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 4695 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 4696 else {
03e334a1 4697 safe_close((*rt)->netns_storage_socket[1]);
613b411c
LP
4698 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
4699 }
4700 } else
4701 return 0;
4702
4703 return 1;
4704}
4705
4706static void *remove_tmpdir_thread(void *p) {
4707 _cleanup_free_ char *path = p;
4708
c6878637 4709 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
613b411c
LP
4710 return NULL;
4711}
4712
4713void exec_runtime_destroy(ExecRuntime *rt) {
98b47d54
LP
4714 int r;
4715
613b411c
LP
4716 if (!rt)
4717 return;
4718
4719 /* If there are multiple users of this, let's leave the stuff around */
4720 if (rt->n_ref > 1)
4721 return;
4722
4723 if (rt->tmp_dir) {
4724 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
98b47d54
LP
4725
4726 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4727 if (r < 0) {
da927ba9 4728 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
98b47d54
LP
4729 free(rt->tmp_dir);
4730 }
4731
613b411c
LP
4732 rt->tmp_dir = NULL;
4733 }
4734
4735 if (rt->var_tmp_dir) {
4736 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
98b47d54
LP
4737
4738 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4739 if (r < 0) {
da927ba9 4740 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
98b47d54
LP
4741 free(rt->var_tmp_dir);
4742 }
4743
613b411c
LP
4744 rt->var_tmp_dir = NULL;
4745 }
4746
3d94f76c 4747 safe_close_pair(rt->netns_storage_socket);
613b411c
LP
4748}
4749
80876c20
LP
4750static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
4751 [EXEC_INPUT_NULL] = "null",
4752 [EXEC_INPUT_TTY] = "tty",
4753 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 4754 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
4755 [EXEC_INPUT_SOCKET] = "socket",
4756 [EXEC_INPUT_NAMED_FD] = "fd",
08f3be7a 4757 [EXEC_INPUT_DATA] = "data",
2038c3f5 4758 [EXEC_INPUT_FILE] = "file",
80876c20
LP
4759};
4760
8a0867d6
LP
4761DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4762
94f04347 4763static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 4764 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 4765 [EXEC_OUTPUT_NULL] = "null",
80876c20 4766 [EXEC_OUTPUT_TTY] = "tty",
94f04347 4767 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 4768 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 4769 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 4770 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
4771 [EXEC_OUTPUT_JOURNAL] = "journal",
4772 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
4773 [EXEC_OUTPUT_SOCKET] = "socket",
4774 [EXEC_OUTPUT_NAMED_FD] = "fd",
2038c3f5 4775 [EXEC_OUTPUT_FILE] = "file",
94f04347
LP
4776};
4777
4778DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
4779
4780static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4781 [EXEC_UTMP_INIT] = "init",
4782 [EXEC_UTMP_LOGIN] = "login",
4783 [EXEC_UTMP_USER] = "user",
4784};
4785
4786DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
4787
4788static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
4789 [EXEC_PRESERVE_NO] = "no",
4790 [EXEC_PRESERVE_YES] = "yes",
4791 [EXEC_PRESERVE_RESTART] = "restart",
4792};
4793
4794DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 4795
72fd1768 4796static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
4797 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
4798 [EXEC_DIRECTORY_STATE] = "StateDirectory",
4799 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
4800 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
4801 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
4802};
4803
4804DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445
LP
4805
4806static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
4807 [EXEC_KEYRING_INHERIT] = "inherit",
4808 [EXEC_KEYRING_PRIVATE] = "private",
4809 [EXEC_KEYRING_SHARED] = "shared",
4810};
4811
4812DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);