]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
Merge pull request #7335 from poettering/dissect-meta-info
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
a7334b09
LP
2/***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
a7334b09
LP
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 15 Lesser General Public License for more details.
a7334b09 16
5430f7f2 17 You should have received a copy of the GNU Lesser General Public License
a7334b09
LP
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19***/
20
034c6ed7
LP
21#include <errno.h>
22#include <fcntl.h>
8dd4c05b
LP
23#include <glob.h>
24#include <grp.h>
25#include <poll.h>
309bff19 26#include <signal.h>
8dd4c05b 27#include <string.h>
19c0b0b9 28#include <sys/capability.h>
d251207d 29#include <sys/eventfd.h>
f3e43635 30#include <sys/mman.h>
8dd4c05b 31#include <sys/personality.h>
94f04347 32#include <sys/prctl.h>
d2ffa389 33#include <sys/shm.h>
8dd4c05b 34#include <sys/socket.h>
451a074f 35#include <sys/stat.h>
d2ffa389 36#include <sys/types.h>
8dd4c05b
LP
37#include <sys/un.h>
38#include <unistd.h>
023a4f67 39#include <utmpx.h>
5cb5a6ff 40
349cc4a5 41#if HAVE_PAM
5b6319dc
LP
42#include <security/pam_appl.h>
43#endif
44
349cc4a5 45#if HAVE_SELINUX
7b52a628
MS
46#include <selinux/selinux.h>
47#endif
48
349cc4a5 49#if HAVE_SECCOMP
17df7223
LP
50#include <seccomp.h>
51#endif
52
349cc4a5 53#if HAVE_APPARMOR
eef65bf3
MS
54#include <sys/apparmor.h>
55#endif
56
24882e06 57#include "sd-messages.h"
8dd4c05b
LP
58
59#include "af-list.h"
b5efdb8a 60#include "alloc-util.h"
349cc4a5 61#if HAVE_APPARMOR
3ffd4af2
LP
62#include "apparmor-util.h"
63#endif
8dd4c05b
LP
64#include "async.h"
65#include "barrier.h"
8dd4c05b 66#include "cap-list.h"
430f0182 67#include "capability-util.h"
a1164ae3 68#include "chown-recursive.h"
f6a6225e 69#include "def.h"
4d1a6904 70#include "env-util.h"
17df7223 71#include "errno-list.h"
3ffd4af2 72#include "execute.h"
8dd4c05b 73#include "exit-status.h"
3ffd4af2 74#include "fd-util.h"
8dd4c05b 75#include "fileio.h"
f97b34a6 76#include "format-util.h"
f4f15635 77#include "fs-util.h"
7d50b32a 78#include "glob-util.h"
c004493c 79#include "io-util.h"
8dd4c05b 80#include "ioprio.h"
a1164ae3 81#include "label.h"
8dd4c05b
LP
82#include "log.h"
83#include "macro.h"
84#include "missing.h"
85#include "mkdir.h"
86#include "namespace.h"
6bedfcbb 87#include "parse-util.h"
8dd4c05b 88#include "path-util.h"
0b452006 89#include "process-util.h"
78f22b97 90#include "rlimit-util.h"
8dd4c05b 91#include "rm-rf.h"
349cc4a5 92#if HAVE_SECCOMP
3ffd4af2
LP
93#include "seccomp-util.h"
94#endif
8dd4c05b 95#include "securebits.h"
07d46372 96#include "securebits-util.h"
8dd4c05b 97#include "selinux-util.h"
24882e06 98#include "signal-util.h"
8dd4c05b 99#include "smack-util.h"
fd63e712 100#include "special.h"
8b43440b 101#include "string-table.h"
07630cea 102#include "string-util.h"
8dd4c05b 103#include "strv.h"
7ccbd1ae 104#include "syslog-util.h"
8dd4c05b
LP
105#include "terminal-util.h"
106#include "unit.h"
b1d4f8e1 107#include "user-util.h"
8dd4c05b
LP
108#include "util.h"
109#include "utmp-wtmp.h"
5cb5a6ff 110
e056b01d 111#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 112#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 113
02a51aba
LP
114/* This assumes there is a 'tty' group */
115#define TTY_MODE 0620
116
531dca78
LP
117#define SNDBUF_SIZE (8*1024*1024)
118
034c6ed7
LP
119static int shift_fds(int fds[], unsigned n_fds) {
120 int start, restart_from;
121
122 if (n_fds <= 0)
123 return 0;
124
a0d40ac5
LP
125 /* Modifies the fds array! (sorts it) */
126
034c6ed7
LP
127 assert(fds);
128
129 start = 0;
130 for (;;) {
131 int i;
132
133 restart_from = -1;
134
135 for (i = start; i < (int) n_fds; i++) {
136 int nfd;
137
138 /* Already at right index? */
139 if (fds[i] == i+3)
140 continue;
141
3cc2aff1
LP
142 nfd = fcntl(fds[i], F_DUPFD, i + 3);
143 if (nfd < 0)
034c6ed7
LP
144 return -errno;
145
03e334a1 146 safe_close(fds[i]);
034c6ed7
LP
147 fds[i] = nfd;
148
149 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 150 * let's remember that and try again from here */
034c6ed7
LP
151 if (nfd != i+3 && restart_from < 0)
152 restart_from = i;
153 }
154
155 if (restart_from < 0)
156 break;
157
158 start = restart_from;
159 }
160
161 return 0;
162}
163
4c47affc
FB
164static int flags_fds(const int fds[], unsigned n_storage_fds, unsigned n_socket_fds, bool nonblock) {
165 unsigned i, n_fds;
e2c76839 166 int r;
47a71eed 167
4c47affc 168 n_fds = n_storage_fds + n_socket_fds;
47a71eed
LP
169 if (n_fds <= 0)
170 return 0;
171
172 assert(fds);
173
9b141911
FB
174 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
175 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
176
177 for (i = 0; i < n_fds; i++) {
47a71eed 178
9b141911
FB
179 if (i < n_socket_fds) {
180 r = fd_nonblock(fds[i], nonblock);
181 if (r < 0)
182 return r;
183 }
47a71eed 184
451a074f
LP
185 /* We unconditionally drop FD_CLOEXEC from the fds,
186 * since after all we want to pass these fds to our
187 * children */
47a71eed 188
3cc2aff1
LP
189 r = fd_cloexec(fds[i], false);
190 if (r < 0)
e2c76839 191 return r;
47a71eed
LP
192 }
193
194 return 0;
195}
196
1e22b5cd 197static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
198 assert(context);
199
1e22b5cd
LP
200 if (context->stdio_as_fds)
201 return NULL;
202
80876c20
LP
203 if (context->tty_path)
204 return context->tty_path;
205
206 return "/dev/console";
207}
208
1e22b5cd
LP
209static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
210 const char *path;
211
6ea832a2
LP
212 assert(context);
213
1e22b5cd 214 path = exec_context_tty_path(context);
6ea832a2 215
1e22b5cd
LP
216 if (context->tty_vhangup) {
217 if (p && p->stdin_fd >= 0)
218 (void) terminal_vhangup_fd(p->stdin_fd);
219 else if (path)
220 (void) terminal_vhangup(path);
221 }
6ea832a2 222
1e22b5cd
LP
223 if (context->tty_reset) {
224 if (p && p->stdin_fd >= 0)
225 (void) reset_terminal_fd(p->stdin_fd, true);
226 else if (path)
227 (void) reset_terminal(path);
228 }
229
230 if (context->tty_vt_disallocate && path)
231 (void) vt_disallocate(path);
6ea832a2
LP
232}
233
6af760f3
LP
234static bool is_terminal_input(ExecInput i) {
235 return IN_SET(i,
236 EXEC_INPUT_TTY,
237 EXEC_INPUT_TTY_FORCE,
238 EXEC_INPUT_TTY_FAIL);
239}
240
3a1286b6 241static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
242 return IN_SET(o,
243 EXEC_OUTPUT_TTY,
244 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
245 EXEC_OUTPUT_KMSG_AND_CONSOLE,
246 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
247}
248
aac8c0c3
LP
249static bool is_syslog_output(ExecOutput o) {
250 return IN_SET(o,
251 EXEC_OUTPUT_SYSLOG,
252 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
253}
254
255static bool is_kmsg_output(ExecOutput o) {
256 return IN_SET(o,
257 EXEC_OUTPUT_KMSG,
258 EXEC_OUTPUT_KMSG_AND_CONSOLE);
259}
260
6af760f3
LP
261static bool exec_context_needs_term(const ExecContext *c) {
262 assert(c);
263
264 /* Return true if the execution context suggests we should set $TERM to something useful. */
265
266 if (is_terminal_input(c->std_input))
267 return true;
268
269 if (is_terminal_output(c->std_output))
270 return true;
271
272 if (is_terminal_output(c->std_error))
273 return true;
274
275 return !!c->tty_path;
3a1286b6
MS
276}
277
80876c20 278static int open_null_as(int flags, int nfd) {
046a82c1 279 int fd;
071830ff 280
80876c20 281 assert(nfd >= 0);
071830ff 282
613b411c
LP
283 fd = open("/dev/null", flags|O_NOCTTY);
284 if (fd < 0)
071830ff
LP
285 return -errno;
286
046a82c1 287 return move_fd(fd, nfd, false);
071830ff
LP
288}
289
524daa8c 290static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 291 static const union sockaddr_union sa = {
b92bea5d
ZJS
292 .un.sun_family = AF_UNIX,
293 .un.sun_path = "/run/systemd/journal/stdout",
294 };
524daa8c
ZJS
295 uid_t olduid = UID_INVALID;
296 gid_t oldgid = GID_INVALID;
297 int r;
298
cad93f29 299 if (gid_is_valid(gid)) {
524daa8c
ZJS
300 oldgid = getgid();
301
92a17af9 302 if (setegid(gid) < 0)
524daa8c
ZJS
303 return -errno;
304 }
305
cad93f29 306 if (uid_is_valid(uid)) {
524daa8c
ZJS
307 olduid = getuid();
308
92a17af9 309 if (seteuid(uid) < 0) {
524daa8c
ZJS
310 r = -errno;
311 goto restore_gid;
312 }
313 }
314
92a17af9 315 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
316
317 /* If we fail to restore the uid or gid, things will likely
318 fail later on. This should only happen if an LSM interferes. */
319
cad93f29 320 if (uid_is_valid(uid))
524daa8c
ZJS
321 (void) seteuid(olduid);
322
323 restore_gid:
cad93f29 324 if (gid_is_valid(gid))
524daa8c
ZJS
325 (void) setegid(oldgid);
326
327 return r;
328}
329
fd1f9c89 330static int connect_logger_as(
7a1ab780 331 Unit *unit,
fd1f9c89 332 const ExecContext *context,
af635cf3 333 const ExecParameters *params,
fd1f9c89
LP
334 ExecOutput output,
335 const char *ident,
fd1f9c89
LP
336 int nfd,
337 uid_t uid,
338 gid_t gid) {
339
524daa8c 340 int fd, r;
071830ff
LP
341
342 assert(context);
af635cf3 343 assert(params);
80876c20
LP
344 assert(output < _EXEC_OUTPUT_MAX);
345 assert(ident);
346 assert(nfd >= 0);
071830ff 347
54fe0cdb
LP
348 fd = socket(AF_UNIX, SOCK_STREAM, 0);
349 if (fd < 0)
80876c20 350 return -errno;
071830ff 351
524daa8c
ZJS
352 r = connect_journal_socket(fd, uid, gid);
353 if (r < 0)
354 return r;
071830ff 355
80876c20 356 if (shutdown(fd, SHUT_RD) < 0) {
03e334a1 357 safe_close(fd);
80876c20
LP
358 return -errno;
359 }
071830ff 360
fd1f9c89 361 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 362
80876c20 363 dprintf(fd,
62bca2c6 364 "%s\n"
80876c20
LP
365 "%s\n"
366 "%i\n"
54fe0cdb
LP
367 "%i\n"
368 "%i\n"
369 "%i\n"
4f4a1dbf 370 "%i\n",
c867611e 371 context->syslog_identifier ?: ident,
af635cf3 372 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
373 context->syslog_priority,
374 !!context->syslog_level_prefix,
aac8c0c3
LP
375 is_syslog_output(output),
376 is_kmsg_output(output),
3a1286b6 377 is_terminal_output(output));
80876c20 378
046a82c1 379 return move_fd(fd, nfd, false);
80876c20 380}
3a274a21 381static int open_terminal_as(const char *path, int flags, int nfd) {
046a82c1 382 int fd;
071830ff 383
80876c20
LP
384 assert(path);
385 assert(nfd >= 0);
fd1f9c89 386
3a274a21 387 fd = open_terminal(path, flags | O_NOCTTY);
3cc2aff1 388 if (fd < 0)
80876c20 389 return fd;
071830ff 390
046a82c1 391 return move_fd(fd, nfd, false);
80876c20 392}
071830ff 393
2038c3f5
LP
394static int acquire_path(const char *path, int flags, mode_t mode) {
395 union sockaddr_union sa = {
396 .sa.sa_family = AF_UNIX,
397 };
80876c20 398 int fd, r;
071830ff 399
80876c20 400 assert(path);
071830ff 401
2038c3f5
LP
402 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
403 flags |= O_CREAT;
404
405 fd = open(path, flags|O_NOCTTY, mode);
406 if (fd >= 0)
80876c20 407 return fd;
071830ff 408
2038c3f5
LP
409 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
410 return -errno;
411 if (strlen(path) > sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
412 return -ENXIO;
413
414 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
415
416 fd = socket(AF_UNIX, SOCK_STREAM, 0);
417 if (fd < 0)
418 return -errno;
419
420 strncpy(sa.un.sun_path, path, sizeof(sa.un.sun_path));
421 if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
03e334a1 422 safe_close(fd);
2038c3f5
LP
423 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
424 * indication that his wasn't an AF_UNIX socket after all */
425 }
071830ff 426
2038c3f5
LP
427 if ((flags & O_ACCMODE) == O_RDONLY)
428 r = shutdown(fd, SHUT_WR);
429 else if ((flags & O_ACCMODE) == O_WRONLY)
430 r = shutdown(fd, SHUT_RD);
431 else
432 return fd;
433 if (r < 0) {
434 safe_close(fd);
435 return -errno;
436 }
437
438 return fd;
80876c20 439}
071830ff 440
08f3be7a
LP
441static int fixup_input(
442 const ExecContext *context,
443 int socket_fd,
444 bool apply_tty_stdin) {
445
446 ExecInput std_input;
447
448 assert(context);
449
450 std_input = context->std_input;
1e3ad081
LP
451
452 if (is_terminal_input(std_input) && !apply_tty_stdin)
453 return EXEC_INPUT_NULL;
071830ff 454
03fd9c49 455 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
456 return EXEC_INPUT_NULL;
457
08f3be7a
LP
458 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
459 return EXEC_INPUT_NULL;
460
03fd9c49 461 return std_input;
4f2d528d
LP
462}
463
03fd9c49 464static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 465
03fd9c49 466 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
467 return EXEC_OUTPUT_INHERIT;
468
03fd9c49 469 return std_output;
4f2d528d
LP
470}
471
a34ceba6
LP
472static int setup_input(
473 const ExecContext *context,
474 const ExecParameters *params,
52c239d7
LB
475 int socket_fd,
476 int named_iofds[3]) {
a34ceba6 477
4f2d528d
LP
478 ExecInput i;
479
480 assert(context);
a34ceba6
LP
481 assert(params);
482
483 if (params->stdin_fd >= 0) {
484 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
485 return -errno;
486
487 /* Try to make this the controlling tty, if it is a tty, and reset it */
1fb0682e
LP
488 if (isatty(STDIN_FILENO)) {
489 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
490 (void) reset_terminal_fd(STDIN_FILENO, true);
491 }
a34ceba6
LP
492
493 return STDIN_FILENO;
494 }
4f2d528d 495
08f3be7a 496 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
497
498 switch (i) {
071830ff 499
80876c20
LP
500 case EXEC_INPUT_NULL:
501 return open_null_as(O_RDONLY, STDIN_FILENO);
502
503 case EXEC_INPUT_TTY:
504 case EXEC_INPUT_TTY_FORCE:
505 case EXEC_INPUT_TTY_FAIL: {
046a82c1 506 int fd;
071830ff 507
1e22b5cd 508 fd = acquire_terminal(exec_context_tty_path(context),
970edce6
ZJS
509 i == EXEC_INPUT_TTY_FAIL,
510 i == EXEC_INPUT_TTY_FORCE,
511 false,
3a43da28 512 USEC_INFINITY);
970edce6 513 if (fd < 0)
80876c20
LP
514 return fd;
515
046a82c1 516 return move_fd(fd, STDIN_FILENO, false);
80876c20
LP
517 }
518
4f2d528d 519 case EXEC_INPUT_SOCKET:
e75a9ed1
LP
520 assert(socket_fd >= 0);
521
4f2d528d
LP
522 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
523
52c239d7 524 case EXEC_INPUT_NAMED_FD:
e75a9ed1
LP
525 assert(named_iofds[STDIN_FILENO] >= 0);
526
52c239d7
LB
527 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
528 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
529
08f3be7a
LP
530 case EXEC_INPUT_DATA: {
531 int fd;
532
533 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
534 if (fd < 0)
535 return fd;
536
537 return move_fd(fd, STDIN_FILENO, false);
538 }
539
2038c3f5
LP
540 case EXEC_INPUT_FILE: {
541 bool rw;
542 int fd;
543
544 assert(context->stdio_file[STDIN_FILENO]);
545
546 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
547 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
548
549 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
550 if (fd < 0)
551 return fd;
552
553 return move_fd(fd, STDIN_FILENO, false);
554 }
555
80876c20
LP
556 default:
557 assert_not_reached("Unknown input type");
558 }
559}
560
a34ceba6
LP
561static int setup_output(
562 Unit *unit,
563 const ExecContext *context,
564 const ExecParameters *params,
565 int fileno,
566 int socket_fd,
52c239d7 567 int named_iofds[3],
a34ceba6 568 const char *ident,
7bce046b
LP
569 uid_t uid,
570 gid_t gid,
571 dev_t *journal_stream_dev,
572 ino_t *journal_stream_ino) {
a34ceba6 573
4f2d528d
LP
574 ExecOutput o;
575 ExecInput i;
47c1d80d 576 int r;
4f2d528d 577
f2341e0a 578 assert(unit);
80876c20 579 assert(context);
a34ceba6 580 assert(params);
80876c20 581 assert(ident);
7bce046b
LP
582 assert(journal_stream_dev);
583 assert(journal_stream_ino);
80876c20 584
a34ceba6
LP
585 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
586
587 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
588 return -errno;
589
590 return STDOUT_FILENO;
591 }
592
593 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
594 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
595 return -errno;
596
597 return STDERR_FILENO;
598 }
599
08f3be7a 600 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 601 o = fixup_output(context->std_output, socket_fd);
4f2d528d 602
eb17e935
MS
603 if (fileno == STDERR_FILENO) {
604 ExecOutput e;
605 e = fixup_output(context->std_error, socket_fd);
80876c20 606
eb17e935
MS
607 /* This expects the input and output are already set up */
608
609 /* Don't change the stderr file descriptor if we inherit all
610 * the way and are not on a tty */
611 if (e == EXEC_OUTPUT_INHERIT &&
612 o == EXEC_OUTPUT_INHERIT &&
613 i == EXEC_INPUT_NULL &&
614 !is_terminal_input(context->std_input) &&
615 getppid () != 1)
616 return fileno;
617
618 /* Duplicate from stdout if possible */
52c239d7 619 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
eb17e935 620 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 621
eb17e935 622 o = e;
80876c20 623
eb17e935 624 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
625 /* If input got downgraded, inherit the original value */
626 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 627 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 628
08f3be7a
LP
629 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
630 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
eb17e935 631 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 632
acb591e4
LP
633 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
634 if (getppid() != 1)
eb17e935 635 return fileno;
94f04347 636
eb17e935
MS
637 /* We need to open /dev/null here anew, to get the right access mode. */
638 return open_null_as(O_WRONLY, fileno);
071830ff 639 }
94f04347 640
eb17e935 641 switch (o) {
80876c20
LP
642
643 case EXEC_OUTPUT_NULL:
eb17e935 644 return open_null_as(O_WRONLY, fileno);
80876c20
LP
645
646 case EXEC_OUTPUT_TTY:
4f2d528d 647 if (is_terminal_input(i))
eb17e935 648 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
649
650 /* We don't reset the terminal if this is just about output */
1e22b5cd 651 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
652
653 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 654 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 655 case EXEC_OUTPUT_KMSG:
28dbc1e8 656 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
657 case EXEC_OUTPUT_JOURNAL:
658 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 659 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 660 if (r < 0) {
82677ae4 661 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 662 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
663 } else {
664 struct stat st;
665
666 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
667 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
668 * services to detect whether they are connected to the journal or not.
669 *
670 * If both stdout and stderr are connected to a stream then let's make sure to store the data
671 * about STDERR as that's usually the best way to do logging. */
7bce046b 672
ab2116b1
LP
673 if (fstat(fileno, &st) >= 0 &&
674 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
675 *journal_stream_dev = st.st_dev;
676 *journal_stream_ino = st.st_ino;
677 }
47c1d80d
MS
678 }
679 return r;
4f2d528d
LP
680
681 case EXEC_OUTPUT_SOCKET:
682 assert(socket_fd >= 0);
e75a9ed1 683
eb17e935 684 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 685
52c239d7 686 case EXEC_OUTPUT_NAMED_FD:
e75a9ed1
LP
687 assert(named_iofds[fileno] >= 0);
688
52c239d7
LB
689 (void) fd_nonblock(named_iofds[fileno], false);
690 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
691
2038c3f5
LP
692 case EXEC_OUTPUT_FILE: {
693 bool rw;
694 int fd;
695
696 assert(context->stdio_file[fileno]);
697
698 rw = context->std_input == EXEC_INPUT_FILE &&
699 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
700
701 if (rw)
702 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
703
704 fd = acquire_path(context->stdio_file[fileno], O_WRONLY, 0666 & ~context->umask);
705 if (fd < 0)
706 return fd;
707
708 return move_fd(fd, fileno, false);
709 }
710
94f04347 711 default:
80876c20 712 assert_not_reached("Unknown error type");
94f04347 713 }
071830ff
LP
714}
715
02a51aba
LP
716static int chown_terminal(int fd, uid_t uid) {
717 struct stat st;
718
719 assert(fd >= 0);
02a51aba 720
1ff74fb6
LP
721 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
722 if (isatty(fd) < 1)
723 return 0;
724
02a51aba 725 /* This might fail. What matters are the results. */
bab45044
LP
726 (void) fchown(fd, uid, -1);
727 (void) fchmod(fd, TTY_MODE);
02a51aba
LP
728
729 if (fstat(fd, &st) < 0)
730 return -errno;
731
d8b4e2e9 732 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
02a51aba
LP
733 return -EPERM;
734
735 return 0;
736}
737
7d5ceb64 738static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
739 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
740 int r;
80876c20 741
80876c20
LP
742 assert(_saved_stdin);
743 assert(_saved_stdout);
744
af6da548
LP
745 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
746 if (saved_stdin < 0)
747 return -errno;
80876c20 748
af6da548 749 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
750 if (saved_stdout < 0)
751 return -errno;
80876c20 752
7d5ceb64 753 fd = acquire_terminal(vc, false, false, false, DEFAULT_CONFIRM_USEC);
3d18b167
LP
754 if (fd < 0)
755 return fd;
80876c20 756
af6da548
LP
757 r = chown_terminal(fd, getuid());
758 if (r < 0)
3d18b167 759 return r;
02a51aba 760
3d18b167
LP
761 r = reset_terminal_fd(fd, true);
762 if (r < 0)
763 return r;
80876c20 764
3d18b167
LP
765 if (dup2(fd, STDIN_FILENO) < 0)
766 return -errno;
767
768 if (dup2(fd, STDOUT_FILENO) < 0)
769 return -errno;
80876c20
LP
770
771 if (fd >= 2)
03e334a1 772 safe_close(fd);
3d18b167 773 fd = -1;
80876c20
LP
774
775 *_saved_stdin = saved_stdin;
776 *_saved_stdout = saved_stdout;
777
3d18b167 778 saved_stdin = saved_stdout = -1;
80876c20 779
3d18b167 780 return 0;
80876c20
LP
781}
782
63d77c92 783static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
784 assert(err < 0);
785
786 if (err == -ETIMEDOUT)
63d77c92 787 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
788 else {
789 errno = -err;
63d77c92 790 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
791 }
792}
793
63d77c92 794static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 795 _cleanup_close_ int fd = -1;
80876c20 796
3b20f877 797 assert(vc);
80876c20 798
7d5ceb64 799 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 800 if (fd < 0)
3b20f877 801 return;
80876c20 802
63d77c92 803 write_confirm_error_fd(err, fd, u);
af6da548 804}
80876c20 805
3d18b167 806static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 807 int r = 0;
80876c20 808
af6da548
LP
809 assert(saved_stdin);
810 assert(saved_stdout);
811
812 release_terminal();
813
814 if (*saved_stdin >= 0)
80876c20 815 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 816 r = -errno;
80876c20 817
af6da548 818 if (*saved_stdout >= 0)
80876c20 819 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 820 r = -errno;
80876c20 821
3d18b167
LP
822 *saved_stdin = safe_close(*saved_stdin);
823 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
824
825 return r;
826}
827
3b20f877
FB
828enum {
829 CONFIRM_PRETEND_FAILURE = -1,
830 CONFIRM_PRETEND_SUCCESS = 0,
831 CONFIRM_EXECUTE = 1,
832};
833
eedf223a 834static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 835 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 836 _cleanup_free_ char *e = NULL;
3b20f877 837 char c;
af6da548 838
3b20f877 839 /* For any internal errors, assume a positive response. */
7d5ceb64 840 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 841 if (r < 0) {
63d77c92 842 write_confirm_error(r, vc, u);
3b20f877
FB
843 return CONFIRM_EXECUTE;
844 }
af6da548 845
b0eb2944
FB
846 /* confirm_spawn might have been disabled while we were sleeping. */
847 if (manager_is_confirm_spawn_disabled(u->manager)) {
848 r = 1;
849 goto restore_stdio;
850 }
af6da548 851
2bcd3c26
FB
852 e = ellipsize(cmdline, 60, 100);
853 if (!e) {
854 log_oom();
855 r = CONFIRM_EXECUTE;
856 goto restore_stdio;
857 }
af6da548 858
d172b175 859 for (;;) {
539622bd 860 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 861 if (r < 0) {
63d77c92 862 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
863 r = CONFIRM_EXECUTE;
864 goto restore_stdio;
865 }
af6da548 866
d172b175 867 switch (c) {
b0eb2944
FB
868 case 'c':
869 printf("Resuming normal execution.\n");
870 manager_disable_confirm_spawn();
871 r = 1;
872 break;
dd6f9ac0
FB
873 case 'D':
874 unit_dump(u, stdout, " ");
875 continue; /* ask again */
d172b175
FB
876 case 'f':
877 printf("Failing execution.\n");
878 r = CONFIRM_PRETEND_FAILURE;
879 break;
880 case 'h':
b0eb2944
FB
881 printf(" c - continue, proceed without asking anymore\n"
882 " D - dump, show the state of the unit\n"
dd6f9ac0 883 " f - fail, don't execute the command and pretend it failed\n"
d172b175 884 " h - help\n"
eedf223a 885 " i - info, show a short summary of the unit\n"
56fde33a 886 " j - jobs, show jobs that are in progress\n"
d172b175
FB
887 " s - skip, don't execute the command and pretend it succeeded\n"
888 " y - yes, execute the command\n");
dd6f9ac0 889 continue; /* ask again */
eedf223a
FB
890 case 'i':
891 printf(" Description: %s\n"
892 " Unit: %s\n"
893 " Command: %s\n",
894 u->id, u->description, cmdline);
895 continue; /* ask again */
56fde33a
FB
896 case 'j':
897 manager_dump_jobs(u->manager, stdout, " ");
898 continue; /* ask again */
539622bd
FB
899 case 'n':
900 /* 'n' was removed in favor of 'f'. */
901 printf("Didn't understand 'n', did you mean 'f'?\n");
902 continue; /* ask again */
d172b175
FB
903 case 's':
904 printf("Skipping execution.\n");
905 r = CONFIRM_PRETEND_SUCCESS;
906 break;
907 case 'y':
908 r = CONFIRM_EXECUTE;
909 break;
910 default:
911 assert_not_reached("Unhandled choice");
912 }
3b20f877 913 break;
3b20f877 914 }
af6da548 915
3b20f877 916restore_stdio:
af6da548 917 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 918 return r;
80876c20
LP
919}
920
4d885bd3
DH
921static int get_fixed_user(const ExecContext *c, const char **user,
922 uid_t *uid, gid_t *gid,
923 const char **home, const char **shell) {
81a2b7ce 924 int r;
4d885bd3 925 const char *name;
81a2b7ce 926
4d885bd3 927 assert(c);
81a2b7ce 928
23deef88
LP
929 if (!c->user)
930 return 0;
931
4d885bd3
DH
932 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
933 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 934
23deef88 935 name = c->user;
4d885bd3
DH
936 r = get_user_creds_clean(&name, uid, gid, home, shell);
937 if (r < 0)
938 return r;
81a2b7ce 939
4d885bd3
DH
940 *user = name;
941 return 0;
942}
943
944static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
945 int r;
946 const char *name;
947
948 assert(c);
949
950 if (!c->group)
951 return 0;
952
953 name = c->group;
954 r = get_group_creds(&name, gid);
955 if (r < 0)
956 return r;
957
958 *group = name;
959 return 0;
960}
961
cdc5d5c5
DH
962static int get_supplementary_groups(const ExecContext *c, const char *user,
963 const char *group, gid_t gid,
964 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
965 char **i;
966 int r, k = 0;
967 int ngroups_max;
968 bool keep_groups = false;
969 gid_t *groups = NULL;
970 _cleanup_free_ gid_t *l_gids = NULL;
971
972 assert(c);
973
bbeea271
DH
974 /*
975 * If user is given, then lookup GID and supplementary groups list.
976 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
977 * here and as early as possible so we keep the list of supplementary
978 * groups of the caller.
bbeea271
DH
979 */
980 if (user && gid_is_valid(gid) && gid != 0) {
981 /* First step, initialize groups from /etc/groups */
982 if (initgroups(user, gid) < 0)
983 return -errno;
984
985 keep_groups = true;
986 }
987
ac6e8be6 988 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
989 return 0;
990
366ddd25
DH
991 /*
992 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
993 * be positive, otherwise fail.
994 */
995 errno = 0;
996 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
997 if (ngroups_max <= 0) {
998 if (errno > 0)
999 return -errno;
1000 else
1001 return -EOPNOTSUPP; /* For all other values */
1002 }
1003
4d885bd3
DH
1004 l_gids = new(gid_t, ngroups_max);
1005 if (!l_gids)
1006 return -ENOMEM;
81a2b7ce 1007
4d885bd3
DH
1008 if (keep_groups) {
1009 /*
1010 * Lookup the list of groups that the user belongs to, we
1011 * avoid NSS lookups here too for gid=0.
1012 */
1013 k = ngroups_max;
1014 if (getgrouplist(user, gid, l_gids, &k) < 0)
1015 return -EINVAL;
1016 } else
1017 k = 0;
81a2b7ce 1018
4d885bd3
DH
1019 STRV_FOREACH(i, c->supplementary_groups) {
1020 const char *g;
81a2b7ce 1021
4d885bd3
DH
1022 if (k >= ngroups_max)
1023 return -E2BIG;
81a2b7ce 1024
4d885bd3
DH
1025 g = *i;
1026 r = get_group_creds(&g, l_gids+k);
1027 if (r < 0)
1028 return r;
81a2b7ce 1029
4d885bd3
DH
1030 k++;
1031 }
81a2b7ce 1032
4d885bd3
DH
1033 /*
1034 * Sets ngids to zero to drop all supplementary groups, happens
1035 * when we are under root and SupplementaryGroups= is empty.
1036 */
1037 if (k == 0) {
1038 *ngids = 0;
1039 return 0;
1040 }
81a2b7ce 1041
4d885bd3
DH
1042 /* Otherwise get the final list of supplementary groups */
1043 groups = memdup(l_gids, sizeof(gid_t) * k);
1044 if (!groups)
1045 return -ENOMEM;
1046
1047 *supplementary_gids = groups;
1048 *ngids = k;
1049
1050 groups = NULL;
1051
1052 return 0;
1053}
1054
709dbeac 1055static int enforce_groups(gid_t gid, gid_t *supplementary_gids, int ngids) {
4d885bd3
DH
1056 int r;
1057
709dbeac
YW
1058 /* Handle SupplementaryGroups= if it is not empty */
1059 if (ngids > 0) {
4d885bd3
DH
1060 r = maybe_setgroups(ngids, supplementary_gids);
1061 if (r < 0)
97f0e76f 1062 return r;
4d885bd3 1063 }
81a2b7ce 1064
4d885bd3
DH
1065 if (gid_is_valid(gid)) {
1066 /* Then set our gids */
1067 if (setresgid(gid, gid, gid) < 0)
1068 return -errno;
81a2b7ce
LP
1069 }
1070
1071 return 0;
1072}
1073
1074static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
1075 assert(context);
1076
4d885bd3
DH
1077 if (!uid_is_valid(uid))
1078 return 0;
1079
479050b3 1080 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
1081 * capabilities while doing so. */
1082
479050b3 1083 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
1084
1085 /* First step: If we need to keep capabilities but
1086 * drop privileges we need to make sure we keep our
cbb21cca 1087 * caps, while we drop privileges. */
693ced48 1088 if (uid != 0) {
cbb21cca 1089 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1090
1091 if (prctl(PR_GET_SECUREBITS) != sb)
1092 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1093 return -errno;
1094 }
81a2b7ce
LP
1095 }
1096
479050b3 1097 /* Second step: actually set the uids */
81a2b7ce
LP
1098 if (setresuid(uid, uid, uid) < 0)
1099 return -errno;
1100
1101 /* At this point we should have all necessary capabilities but
1102 are otherwise a normal user. However, the caps might got
1103 corrupted due to the setresuid() so we need clean them up
1104 later. This is done outside of this call. */
1105
1106 return 0;
1107}
1108
349cc4a5 1109#if HAVE_PAM
5b6319dc
LP
1110
1111static int null_conv(
1112 int num_msg,
1113 const struct pam_message **msg,
1114 struct pam_response **resp,
1115 void *appdata_ptr) {
1116
1117 /* We don't support conversations */
1118
1119 return PAM_CONV_ERR;
1120}
1121
cefc33ae
LP
1122#endif
1123
5b6319dc
LP
1124static int setup_pam(
1125 const char *name,
1126 const char *user,
940c5210 1127 uid_t uid,
2d6fce8d 1128 gid_t gid,
5b6319dc 1129 const char *tty,
2065ca69 1130 char ***env,
5b6319dc
LP
1131 int fds[], unsigned n_fds) {
1132
349cc4a5 1133#if HAVE_PAM
cefc33ae 1134
5b6319dc
LP
1135 static const struct pam_conv conv = {
1136 .conv = null_conv,
1137 .appdata_ptr = NULL
1138 };
1139
2d7c6aa2 1140 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1141 pam_handle_t *handle = NULL;
d6e5f3ad 1142 sigset_t old_ss;
7bb70b6e 1143 int pam_code = PAM_SUCCESS, r;
84eada2f 1144 char **nv, **e = NULL;
5b6319dc
LP
1145 bool close_session = false;
1146 pid_t pam_pid = 0, parent_pid;
970edce6 1147 int flags = 0;
5b6319dc
LP
1148
1149 assert(name);
1150 assert(user);
2065ca69 1151 assert(env);
5b6319dc
LP
1152
1153 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1154 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1155 * systemd via the cgroup logic. It will then remove the PAM
1156 * session again. The parent process will exec() the actual
1157 * daemon. We do things this way to ensure that the main PID
1158 * of the daemon is the one we initially fork()ed. */
1159
7bb70b6e
LP
1160 r = barrier_create(&barrier);
1161 if (r < 0)
2d7c6aa2
DH
1162 goto fail;
1163
553d2243 1164 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1165 flags |= PAM_SILENT;
1166
f546241b
ZJS
1167 pam_code = pam_start(name, user, &conv, &handle);
1168 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1169 handle = NULL;
1170 goto fail;
1171 }
1172
f546241b
ZJS
1173 if (tty) {
1174 pam_code = pam_set_item(handle, PAM_TTY, tty);
1175 if (pam_code != PAM_SUCCESS)
5b6319dc 1176 goto fail;
f546241b 1177 }
5b6319dc 1178
84eada2f
JW
1179 STRV_FOREACH(nv, *env) {
1180 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1181 if (pam_code != PAM_SUCCESS)
1182 goto fail;
1183 }
1184
970edce6 1185 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1186 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1187 goto fail;
1188
970edce6 1189 pam_code = pam_open_session(handle, flags);
f546241b 1190 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1191 goto fail;
1192
1193 close_session = true;
1194
f546241b
ZJS
1195 e = pam_getenvlist(handle);
1196 if (!e) {
5b6319dc
LP
1197 pam_code = PAM_BUF_ERR;
1198 goto fail;
1199 }
1200
1201 /* Block SIGTERM, so that we know that it won't get lost in
1202 * the child */
ce30c8dc 1203
72c0a2c2 1204 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1205
df0ff127 1206 parent_pid = getpid_cached();
5b6319dc 1207
f546241b 1208 pam_pid = fork();
7bb70b6e
LP
1209 if (pam_pid < 0) {
1210 r = -errno;
5b6319dc 1211 goto fail;
7bb70b6e 1212 }
5b6319dc
LP
1213
1214 if (pam_pid == 0) {
7bb70b6e 1215 int sig, ret = EXIT_PAM;
5b6319dc
LP
1216
1217 /* The child's job is to reset the PAM session on
1218 * termination */
2d7c6aa2 1219 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc
LP
1220
1221 /* This string must fit in 10 chars (i.e. the length
5d6b1584
LP
1222 * of "/sbin/init"), to look pretty in /bin/ps */
1223 rename_process("(sd-pam)");
5b6319dc
LP
1224
1225 /* Make sure we don't keep open the passed fds in this
1226 child. We assume that otherwise only those fds are
1227 open here that have been opened by PAM. */
1228 close_many(fds, n_fds);
1229
940c5210
AK
1230 /* Drop privileges - we don't need any to pam_close_session
1231 * and this will make PR_SET_PDEATHSIG work in most cases.
1232 * If this fails, ignore the error - but expect sd-pam threads
1233 * to fail to exit normally */
2d6fce8d 1234
97f0e76f
LP
1235 r = maybe_setgroups(0, NULL);
1236 if (r < 0)
1237 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1238 if (setresgid(gid, gid, gid) < 0)
1239 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1240 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1241 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1242
ce30c8dc
LP
1243 (void) ignore_signals(SIGPIPE, -1);
1244
940c5210
AK
1245 /* Wait until our parent died. This will only work if
1246 * the above setresuid() succeeds, otherwise the kernel
1247 * will not allow unprivileged parents kill their privileged
1248 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1249 * to do the rest for us. */
1250 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1251 goto child_finish;
1252
2d7c6aa2
DH
1253 /* Tell the parent that our setup is done. This is especially
1254 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1255 * setup might race against our setresuid(2) call.
1256 *
1257 * If the parent aborted, we'll detect this below, hence ignore
1258 * return failure here. */
1259 (void) barrier_place(&barrier);
2d7c6aa2 1260
643f4706 1261 /* Check if our parent process might already have died? */
5b6319dc 1262 if (getppid() == parent_pid) {
d6e5f3ad
DM
1263 sigset_t ss;
1264
1265 assert_se(sigemptyset(&ss) >= 0);
1266 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1267
3dead8d9
LP
1268 for (;;) {
1269 if (sigwait(&ss, &sig) < 0) {
1270 if (errno == EINTR)
1271 continue;
1272
1273 goto child_finish;
1274 }
5b6319dc 1275
3dead8d9
LP
1276 assert(sig == SIGTERM);
1277 break;
1278 }
5b6319dc
LP
1279 }
1280
3dead8d9 1281 /* If our parent died we'll end the session */
f546241b 1282 if (getppid() != parent_pid) {
970edce6 1283 pam_code = pam_close_session(handle, flags);
f546241b 1284 if (pam_code != PAM_SUCCESS)
5b6319dc 1285 goto child_finish;
f546241b 1286 }
5b6319dc 1287
7bb70b6e 1288 ret = 0;
5b6319dc
LP
1289
1290 child_finish:
970edce6 1291 pam_end(handle, pam_code | flags);
7bb70b6e 1292 _exit(ret);
5b6319dc
LP
1293 }
1294
2d7c6aa2
DH
1295 barrier_set_role(&barrier, BARRIER_PARENT);
1296
5b6319dc
LP
1297 /* If the child was forked off successfully it will do all the
1298 * cleanups, so forget about the handle here. */
1299 handle = NULL;
1300
3b8bddde 1301 /* Unblock SIGTERM again in the parent */
72c0a2c2 1302 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1303
1304 /* We close the log explicitly here, since the PAM modules
1305 * might have opened it, but we don't want this fd around. */
1306 closelog();
1307
2d7c6aa2
DH
1308 /* Synchronously wait for the child to initialize. We don't care for
1309 * errors as we cannot recover. However, warn loudly if it happens. */
1310 if (!barrier_place_and_sync(&barrier))
1311 log_error("PAM initialization failed");
1312
2065ca69
JW
1313 strv_free(*env);
1314 *env = e;
aa87e624 1315
5b6319dc
LP
1316 return 0;
1317
1318fail:
970edce6
ZJS
1319 if (pam_code != PAM_SUCCESS) {
1320 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1321 r = -EPERM; /* PAM errors do not map to errno */
1322 } else
1323 log_error_errno(r, "PAM failed: %m");
9ba35398 1324
5b6319dc
LP
1325 if (handle) {
1326 if (close_session)
970edce6 1327 pam_code = pam_close_session(handle, flags);
5b6319dc 1328
970edce6 1329 pam_end(handle, pam_code | flags);
5b6319dc
LP
1330 }
1331
1332 strv_free(e);
5b6319dc
LP
1333 closelog();
1334
7bb70b6e 1335 return r;
cefc33ae
LP
1336#else
1337 return 0;
5b6319dc 1338#endif
cefc33ae 1339}
5b6319dc 1340
5d6b1584
LP
1341static void rename_process_from_path(const char *path) {
1342 char process_name[11];
1343 const char *p;
1344 size_t l;
1345
1346 /* This resulting string must fit in 10 chars (i.e. the length
1347 * of "/sbin/init") to look pretty in /bin/ps */
1348
2b6bf07d 1349 p = basename(path);
5d6b1584
LP
1350 if (isempty(p)) {
1351 rename_process("(...)");
1352 return;
1353 }
1354
1355 l = strlen(p);
1356 if (l > 8) {
1357 /* The end of the process name is usually more
1358 * interesting, since the first bit might just be
1359 * "systemd-" */
1360 p = p + l - 8;
1361 l = 8;
1362 }
1363
1364 process_name[0] = '(';
1365 memcpy(process_name+1, p, l);
1366 process_name[1+l] = ')';
1367 process_name[1+l+1] = 0;
1368
1369 rename_process(process_name);
1370}
1371
469830d1
LP
1372static bool context_has_address_families(const ExecContext *c) {
1373 assert(c);
1374
1375 return c->address_families_whitelist ||
1376 !set_isempty(c->address_families);
1377}
1378
1379static bool context_has_syscall_filters(const ExecContext *c) {
1380 assert(c);
1381
1382 return c->syscall_whitelist ||
8cfa775f 1383 !hashmap_isempty(c->syscall_filter);
469830d1
LP
1384}
1385
1386static bool context_has_no_new_privileges(const ExecContext *c) {
1387 assert(c);
1388
1389 if (c->no_new_privileges)
1390 return true;
1391
1392 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1393 return false;
1394
1395 /* We need NNP if we have any form of seccomp and are unprivileged */
1396 return context_has_address_families(c) ||
1397 c->memory_deny_write_execute ||
1398 c->restrict_realtime ||
1399 exec_context_restrict_namespaces_set(c) ||
1400 c->protect_kernel_tunables ||
1401 c->protect_kernel_modules ||
1402 c->private_devices ||
1403 context_has_syscall_filters(c) ||
78e864e5
TM
1404 !set_isempty(c->syscall_archs) ||
1405 c->lock_personality;
469830d1
LP
1406}
1407
349cc4a5 1408#if HAVE_SECCOMP
17df7223 1409
83f12b27 1410static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1411
1412 if (is_seccomp_available())
1413 return false;
1414
f673b62d 1415 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1416 return true;
83f12b27
FS
1417}
1418
165a31c0 1419static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1420 uint32_t negative_action, default_action, action;
165a31c0 1421 int r;
8351ceae 1422
469830d1 1423 assert(u);
c0467cf3 1424 assert(c);
8351ceae 1425
469830d1 1426 if (!context_has_syscall_filters(c))
83f12b27
FS
1427 return 0;
1428
469830d1
LP
1429 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1430 return 0;
e9642be2 1431
469830d1 1432 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1433
469830d1
LP
1434 if (c->syscall_whitelist) {
1435 default_action = negative_action;
1436 action = SCMP_ACT_ALLOW;
7c66bae2 1437 } else {
469830d1
LP
1438 default_action = SCMP_ACT_ALLOW;
1439 action = negative_action;
57183d11 1440 }
8351ceae 1441
165a31c0
LP
1442 if (needs_ambient_hack) {
1443 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1444 if (r < 0)
1445 return r;
1446 }
1447
469830d1 1448 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
4298d0b5
LP
1449}
1450
469830d1
LP
1451static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1452 assert(u);
4298d0b5
LP
1453 assert(c);
1454
469830d1 1455 if (set_isempty(c->syscall_archs))
83f12b27
FS
1456 return 0;
1457
469830d1
LP
1458 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1459 return 0;
4298d0b5 1460
469830d1
LP
1461 return seccomp_restrict_archs(c->syscall_archs);
1462}
4298d0b5 1463
469830d1
LP
1464static int apply_address_families(const Unit* u, const ExecContext *c) {
1465 assert(u);
1466 assert(c);
4298d0b5 1467
469830d1
LP
1468 if (!context_has_address_families(c))
1469 return 0;
4298d0b5 1470
469830d1
LP
1471 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1472 return 0;
4298d0b5 1473
469830d1 1474 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1475}
4298d0b5 1476
83f12b27 1477static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1478 assert(u);
f3e43635
TM
1479 assert(c);
1480
469830d1 1481 if (!c->memory_deny_write_execute)
83f12b27
FS
1482 return 0;
1483
469830d1
LP
1484 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1485 return 0;
f3e43635 1486
469830d1 1487 return seccomp_memory_deny_write_execute();
f3e43635
TM
1488}
1489
83f12b27 1490static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1491 assert(u);
f4170c67
LP
1492 assert(c);
1493
469830d1 1494 if (!c->restrict_realtime)
83f12b27
FS
1495 return 0;
1496
469830d1
LP
1497 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1498 return 0;
f4170c67 1499
469830d1 1500 return seccomp_restrict_realtime();
f4170c67
LP
1501}
1502
59e856c7 1503static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1504 assert(u);
59eeb84b
LP
1505 assert(c);
1506
1507 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1508 * let's protect even those systems where this is left on in the kernel. */
1509
469830d1 1510 if (!c->protect_kernel_tunables)
59eeb84b
LP
1511 return 0;
1512
469830d1
LP
1513 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1514 return 0;
59eeb84b 1515
469830d1 1516 return seccomp_protect_sysctl();
59eeb84b
LP
1517}
1518
59e856c7 1519static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1520 assert(u);
502d704e
DH
1521 assert(c);
1522
25a8d8a0 1523 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1524
469830d1
LP
1525 if (!c->protect_kernel_modules)
1526 return 0;
1527
502d704e
DH
1528 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1529 return 0;
1530
469830d1 1531 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
502d704e
DH
1532}
1533
59e856c7 1534static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1535 assert(u);
ba128bb8
LP
1536 assert(c);
1537
8f81a5f6 1538 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1539
469830d1
LP
1540 if (!c->private_devices)
1541 return 0;
1542
ba128bb8
LP
1543 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1544 return 0;
1545
469830d1 1546 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
ba128bb8
LP
1547}
1548
add00535 1549static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
469830d1 1550 assert(u);
add00535
LP
1551 assert(c);
1552
1553 if (!exec_context_restrict_namespaces_set(c))
1554 return 0;
1555
1556 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1557 return 0;
1558
1559 return seccomp_restrict_namespaces(c->restrict_namespaces);
1560}
1561
78e864e5 1562static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1563 unsigned long personality;
1564 int r;
78e864e5
TM
1565
1566 assert(u);
1567 assert(c);
1568
1569 if (!c->lock_personality)
1570 return 0;
1571
1572 if (skip_seccomp_unavailable(u, "LockPersonality="))
1573 return 0;
1574
e8132d63
LP
1575 personality = c->personality;
1576
1577 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1578 if (personality == PERSONALITY_INVALID) {
1579
1580 r = opinionated_personality(&personality);
1581 if (r < 0)
1582 return r;
1583 }
78e864e5
TM
1584
1585 return seccomp_lock_personality(personality);
1586}
1587
c0467cf3 1588#endif
8351ceae 1589
31a7eb86
ZJS
1590static void do_idle_pipe_dance(int idle_pipe[4]) {
1591 assert(idle_pipe);
1592
54eb2300
LP
1593 idle_pipe[1] = safe_close(idle_pipe[1]);
1594 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1595
1596 if (idle_pipe[0] >= 0) {
1597 int r;
1598
1599 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1600
1601 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1602 ssize_t n;
1603
31a7eb86 1604 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1605 n = write(idle_pipe[3], "x", 1);
1606 if (n > 0)
cd972d69
ZJS
1607 /* Wait for systemd to react to the signal above. */
1608 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1609 }
1610
54eb2300 1611 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1612
1613 }
1614
54eb2300 1615 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1616}
1617
7cae38c4 1618static int build_environment(
fd63e712 1619 Unit *u,
9fa95f85 1620 const ExecContext *c,
1e22b5cd 1621 const ExecParameters *p,
7cae38c4
LP
1622 unsigned n_fds,
1623 const char *home,
1624 const char *username,
1625 const char *shell,
7bce046b
LP
1626 dev_t journal_stream_dev,
1627 ino_t journal_stream_ino,
7cae38c4
LP
1628 char ***ret) {
1629
1630 _cleanup_strv_free_ char **our_env = NULL;
1631 unsigned n_env = 0;
1632 char *x;
1633
4b58153d 1634 assert(u);
7cae38c4
LP
1635 assert(c);
1636 assert(ret);
1637
4b58153d 1638 our_env = new0(char*, 14);
7cae38c4
LP
1639 if (!our_env)
1640 return -ENOMEM;
1641
1642 if (n_fds > 0) {
8dd4c05b
LP
1643 _cleanup_free_ char *joined = NULL;
1644
df0ff127 1645 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1646 return -ENOMEM;
1647 our_env[n_env++] = x;
1648
1649 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1650 return -ENOMEM;
1651 our_env[n_env++] = x;
8dd4c05b 1652
1e22b5cd 1653 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1654 if (!joined)
1655 return -ENOMEM;
1656
605405c6 1657 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1658 if (!x)
1659 return -ENOMEM;
1660 our_env[n_env++] = x;
7cae38c4
LP
1661 }
1662
b08af3b1 1663 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1664 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1665 return -ENOMEM;
1666 our_env[n_env++] = x;
1667
1e22b5cd 1668 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1669 return -ENOMEM;
1670 our_env[n_env++] = x;
1671 }
1672
fd63e712
LP
1673 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1674 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1675 * check the database directly. */
ac647978 1676 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1677 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1678 if (!x)
1679 return -ENOMEM;
1680 our_env[n_env++] = x;
1681 }
1682
7cae38c4
LP
1683 if (home) {
1684 x = strappend("HOME=", home);
1685 if (!x)
1686 return -ENOMEM;
1687 our_env[n_env++] = x;
1688 }
1689
1690 if (username) {
1691 x = strappend("LOGNAME=", username);
1692 if (!x)
1693 return -ENOMEM;
1694 our_env[n_env++] = x;
1695
1696 x = strappend("USER=", username);
1697 if (!x)
1698 return -ENOMEM;
1699 our_env[n_env++] = x;
1700 }
1701
1702 if (shell) {
1703 x = strappend("SHELL=", shell);
1704 if (!x)
1705 return -ENOMEM;
1706 our_env[n_env++] = x;
1707 }
1708
4b58153d
LP
1709 if (!sd_id128_is_null(u->invocation_id)) {
1710 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1711 return -ENOMEM;
1712
1713 our_env[n_env++] = x;
1714 }
1715
6af760f3
LP
1716 if (exec_context_needs_term(c)) {
1717 const char *tty_path, *term = NULL;
1718
1719 tty_path = exec_context_tty_path(c);
1720
1721 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1722 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1723 * passes to PID 1 ends up all the way in the console login shown. */
1724
1725 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1726 term = getenv("TERM");
1727 if (!term)
1728 term = default_term_for_tty(tty_path);
7cae38c4 1729
6af760f3 1730 x = strappend("TERM=", term);
7cae38c4
LP
1731 if (!x)
1732 return -ENOMEM;
1733 our_env[n_env++] = x;
1734 }
1735
7bce046b
LP
1736 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1737 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1738 return -ENOMEM;
1739
1740 our_env[n_env++] = x;
1741 }
1742
7cae38c4 1743 our_env[n_env++] = NULL;
7bce046b 1744 assert(n_env <= 12);
7cae38c4
LP
1745
1746 *ret = our_env;
1747 our_env = NULL;
1748
1749 return 0;
1750}
1751
b4c14404
FB
1752static int build_pass_environment(const ExecContext *c, char ***ret) {
1753 _cleanup_strv_free_ char **pass_env = NULL;
1754 size_t n_env = 0, n_bufsize = 0;
1755 char **i;
1756
1757 STRV_FOREACH(i, c->pass_environment) {
1758 _cleanup_free_ char *x = NULL;
1759 char *v;
1760
1761 v = getenv(*i);
1762 if (!v)
1763 continue;
605405c6 1764 x = strjoin(*i, "=", v);
b4c14404
FB
1765 if (!x)
1766 return -ENOMEM;
00819cc1 1767
b4c14404
FB
1768 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1769 return -ENOMEM;
00819cc1 1770
b4c14404
FB
1771 pass_env[n_env++] = x;
1772 pass_env[n_env] = NULL;
1773 x = NULL;
1774 }
1775
1776 *ret = pass_env;
1777 pass_env = NULL;
1778
1779 return 0;
1780}
1781
8b44a3d2
LP
1782static bool exec_needs_mount_namespace(
1783 const ExecContext *context,
1784 const ExecParameters *params,
1785 ExecRuntime *runtime) {
1786
1787 assert(context);
1788 assert(params);
1789
915e6d16
LP
1790 if (context->root_image)
1791 return true;
1792
2a624c36
AP
1793 if (!strv_isempty(context->read_write_paths) ||
1794 !strv_isempty(context->read_only_paths) ||
1795 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1796 return true;
1797
652bb263
YW
1798 if (context->n_bind_mounts > 0 ||
1799 !strv_isempty(context->directories[EXEC_DIRECTORY_RUNTIME].paths) ||
1800 !strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
1801 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1802 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths) ||
1803 !strv_isempty(context->directories[EXEC_DIRECTORY_CONFIGURATION].paths))
d2d6c096
LP
1804 return true;
1805
8b44a3d2
LP
1806 if (context->mount_flags != 0)
1807 return true;
1808
1809 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1810 return true;
1811
8b44a3d2
LP
1812 if (context->private_devices ||
1813 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1814 context->protect_home != PROTECT_HOME_NO ||
1815 context->protect_kernel_tunables ||
c575770b 1816 context->protect_kernel_modules ||
59eeb84b 1817 context->protect_control_groups)
8b44a3d2
LP
1818 return true;
1819
9c988f93 1820 if (context->mount_apivfs && (context->root_image || context->root_directory))
5d997827
LP
1821 return true;
1822
8b44a3d2
LP
1823 return false;
1824}
1825
d251207d
LP
1826static int setup_private_users(uid_t uid, gid_t gid) {
1827 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1828 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1829 _cleanup_close_ int unshare_ready_fd = -1;
1830 _cleanup_(sigkill_waitp) pid_t pid = 0;
1831 uint64_t c = 1;
1832 siginfo_t si;
1833 ssize_t n;
1834 int r;
1835
1836 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1837 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1838 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1839 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1840 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1841 * continues execution normally. */
1842
587ab01b
ZJS
1843 if (uid != 0 && uid_is_valid(uid)) {
1844 r = asprintf(&uid_map,
1845 "0 0 1\n" /* Map root → root */
1846 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1847 uid, uid);
1848 if (r < 0)
1849 return -ENOMEM;
1850 } else {
e0f3720e 1851 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1852 if (!uid_map)
1853 return -ENOMEM;
1854 }
d251207d 1855
587ab01b
ZJS
1856 if (gid != 0 && gid_is_valid(gid)) {
1857 r = asprintf(&gid_map,
1858 "0 0 1\n" /* Map root → root */
1859 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1860 gid, gid);
1861 if (r < 0)
1862 return -ENOMEM;
1863 } else {
d251207d 1864 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1865 if (!gid_map)
1866 return -ENOMEM;
1867 }
d251207d
LP
1868
1869 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1870 * namespace. */
1871 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1872 if (unshare_ready_fd < 0)
1873 return -errno;
1874
1875 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1876 * failed. */
1877 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1878 return -errno;
1879
1880 pid = fork();
1881 if (pid < 0)
1882 return -errno;
1883
1884 if (pid == 0) {
1885 _cleanup_close_ int fd = -1;
1886 const char *a;
1887 pid_t ppid;
1888
1889 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1890 * here, after the parent opened its own user namespace. */
1891
1892 ppid = getppid();
1893 errno_pipe[0] = safe_close(errno_pipe[0]);
1894
1895 /* Wait until the parent unshared the user namespace */
1896 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1897 r = -errno;
1898 goto child_fail;
1899 }
1900
1901 /* Disable the setgroups() system call in the child user namespace, for good. */
1902 a = procfs_file_alloca(ppid, "setgroups");
1903 fd = open(a, O_WRONLY|O_CLOEXEC);
1904 if (fd < 0) {
1905 if (errno != ENOENT) {
1906 r = -errno;
1907 goto child_fail;
1908 }
1909
1910 /* If the file is missing the kernel is too old, let's continue anyway. */
1911 } else {
1912 if (write(fd, "deny\n", 5) < 0) {
1913 r = -errno;
1914 goto child_fail;
1915 }
1916
1917 fd = safe_close(fd);
1918 }
1919
1920 /* First write the GID map */
1921 a = procfs_file_alloca(ppid, "gid_map");
1922 fd = open(a, O_WRONLY|O_CLOEXEC);
1923 if (fd < 0) {
1924 r = -errno;
1925 goto child_fail;
1926 }
1927 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1928 r = -errno;
1929 goto child_fail;
1930 }
1931 fd = safe_close(fd);
1932
1933 /* The write the UID map */
1934 a = procfs_file_alloca(ppid, "uid_map");
1935 fd = open(a, O_WRONLY|O_CLOEXEC);
1936 if (fd < 0) {
1937 r = -errno;
1938 goto child_fail;
1939 }
1940 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1941 r = -errno;
1942 goto child_fail;
1943 }
1944
1945 _exit(EXIT_SUCCESS);
1946
1947 child_fail:
1948 (void) write(errno_pipe[1], &r, sizeof(r));
1949 _exit(EXIT_FAILURE);
1950 }
1951
1952 errno_pipe[1] = safe_close(errno_pipe[1]);
1953
1954 if (unshare(CLONE_NEWUSER) < 0)
1955 return -errno;
1956
1957 /* Let the child know that the namespace is ready now */
1958 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1959 return -errno;
1960
1961 /* Try to read an error code from the child */
1962 n = read(errno_pipe[0], &r, sizeof(r));
1963 if (n < 0)
1964 return -errno;
1965 if (n == sizeof(r)) { /* an error code was sent to us */
1966 if (r < 0)
1967 return r;
1968 return -EIO;
1969 }
1970 if (n != 0) /* on success we should have read 0 bytes */
1971 return -EIO;
1972
1973 r = wait_for_terminate(pid, &si);
1974 if (r < 0)
1975 return r;
1976 pid = 0;
1977
1978 /* If something strange happened with the child, let's consider this fatal, too */
1979 if (si.si_code != CLD_EXITED || si.si_status != 0)
1980 return -EIO;
1981
1982 return 0;
1983}
1984
3536f49e 1985static int setup_exec_directory(
07689d5d
LP
1986 const ExecContext *context,
1987 const ExecParameters *params,
1988 uid_t uid,
3536f49e 1989 gid_t gid,
3536f49e
YW
1990 ExecDirectoryType type,
1991 int *exit_status) {
07689d5d 1992
72fd1768 1993 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
1994 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
1995 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
1996 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
1997 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
1998 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
1999 };
07689d5d
LP
2000 char **rt;
2001 int r;
2002
2003 assert(context);
2004 assert(params);
72fd1768 2005 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 2006 assert(exit_status);
07689d5d 2007
3536f49e
YW
2008 if (!params->prefix[type])
2009 return 0;
2010
8679efde 2011 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
2012 if (!uid_is_valid(uid))
2013 uid = 0;
2014 if (!gid_is_valid(gid))
2015 gid = 0;
2016 }
2017
2018 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d
LP
2019 _cleanup_free_ char *p = NULL, *pp = NULL;
2020 const char *effective;
07689d5d 2021
3536f49e
YW
2022 p = strjoin(params->prefix[type], "/", *rt);
2023 if (!p) {
2024 r = -ENOMEM;
2025 goto fail;
2026 }
07689d5d 2027
23a7448e
YW
2028 r = mkdir_parents_label(p, 0755);
2029 if (r < 0)
3536f49e 2030 goto fail;
23a7448e 2031
8092a48c
YW
2032 if (context->dynamic_user &&
2033 !IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
6c47cd7d
LP
2034 _cleanup_free_ char *private_root = NULL, *relative = NULL, *parent = NULL;
2035
2036 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
2037 * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
2038 * whose UID is later on reused. To lock this down we use the same trick used by container
2039 * managers to prohibit host users to get access to files of the same UID in containers: we
2040 * place everything inside a directory that has an access mode of 0700 and is owned root:root,
2041 * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
2042 * to make this directory permeable for the service itself.
2043 *
2044 * Specifically: for a service which wants a special directory "foo/" we first create a
2045 * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
2046 * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
2047 * privileged host users can access "foo/" as usual, but unprivileged host users can't look
2048 * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
2049 * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
2050 * disabling the access boundary for the service and making sure it only gets access to the
2051 * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
2052 *
2053 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
8092a48c
YW
2054 * owned by the service itself.
2055 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
2056 * files or sockets with other services. */
6c47cd7d
LP
2057
2058 private_root = strjoin(params->prefix[type], "/private");
2059 if (!private_root) {
2060 r = -ENOMEM;
2061 goto fail;
2062 }
2063
2064 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
c31ad024 2065 r = mkdir_safe_label(private_root, 0700, 0, 0, false);
6c47cd7d
LP
2066 if (r < 0)
2067 goto fail;
2068
2069 pp = strjoin(private_root, "/", *rt);
2070 if (!pp) {
2071 r = -ENOMEM;
2072 goto fail;
2073 }
2074
2075 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2076 r = mkdir_parents_label(pp, 0755);
2077 if (r < 0)
2078 goto fail;
2079
2080 /* Finally, create the actual directory for the service */
2081 r = mkdir_label(pp, context->directories[type].mode);
2082 if (r < 0 && r != -EEXIST)
2083 goto fail;
2084
2085 parent = dirname_malloc(p);
2086 if (!parent) {
2087 r = -ENOMEM;
2088 goto fail;
2089 }
2090
2091 r = path_make_relative(parent, pp, &relative);
2092 if (r < 0)
2093 goto fail;
2094
2095 /* And link it up from the original place */
2096 r = symlink_idempotent(relative, p);
2097 if (r < 0)
2098 goto fail;
2099
2100 effective = pp;
2101
2102 } else {
2103 r = mkdir_label(p, context->directories[type].mode);
2104 if (r < 0 && r != -EEXIST)
2105 goto fail;
2106
2107 effective = p;
2108 }
a1164ae3
LP
2109
2110 /* First lock down the access mode */
6c47cd7d 2111 if (chmod(effective, context->directories[type].mode) < 0) {
a1164ae3 2112 r = -errno;
3536f49e 2113 goto fail;
a1164ae3 2114 }
07689d5d 2115
c71b2eb7
LP
2116 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
2117 * a service, and shall not be writable. */
2118 if (type == EXEC_DIRECTORY_CONFIGURATION)
2119 continue;
2120
a1164ae3 2121 /* Then, change the ownership of the whole tree, if necessary */
6c47cd7d 2122 r = path_chown_recursive(effective, uid, gid);
07689d5d 2123 if (r < 0)
3536f49e 2124 goto fail;
07689d5d
LP
2125 }
2126
2127 return 0;
3536f49e
YW
2128
2129fail:
2130 *exit_status = exit_status_table[type];
3536f49e 2131 return r;
07689d5d
LP
2132}
2133
cefc33ae
LP
2134static int setup_smack(
2135 const ExecContext *context,
2136 const ExecCommand *command) {
2137
cefc33ae
LP
2138 int r;
2139
2140 assert(context);
2141 assert(command);
2142
cefc33ae
LP
2143 if (context->smack_process_label) {
2144 r = mac_smack_apply_pid(0, context->smack_process_label);
2145 if (r < 0)
2146 return r;
2147 }
2148#ifdef SMACK_DEFAULT_PROCESS_LABEL
2149 else {
2150 _cleanup_free_ char *exec_label = NULL;
2151
2152 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2153 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2154 return r;
2155
2156 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2157 if (r < 0)
2158 return r;
2159 }
cefc33ae
LP
2160#endif
2161
2162 return 0;
2163}
2164
6c47cd7d
LP
2165static int compile_bind_mounts(
2166 const ExecContext *context,
2167 const ExecParameters *params,
2168 BindMount **ret_bind_mounts,
2169 unsigned *ret_n_bind_mounts,
2170 char ***ret_empty_directories) {
2171
2172 _cleanup_strv_free_ char **empty_directories = NULL;
2173 BindMount *bind_mounts;
2174 unsigned n, h = 0, i;
2175 ExecDirectoryType t;
2176 int r;
2177
2178 assert(context);
2179 assert(params);
2180 assert(ret_bind_mounts);
2181 assert(ret_n_bind_mounts);
2182 assert(ret_empty_directories);
2183
2184 n = context->n_bind_mounts;
2185 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2186 if (!params->prefix[t])
2187 continue;
2188
2189 n += strv_length(context->directories[t].paths);
2190 }
2191
2192 if (n <= 0) {
2193 *ret_bind_mounts = NULL;
2194 *ret_n_bind_mounts = 0;
2195 *ret_empty_directories = NULL;
2196 return 0;
2197 }
2198
2199 bind_mounts = new(BindMount, n);
2200 if (!bind_mounts)
2201 return -ENOMEM;
2202
a8cabc61 2203 for (i = 0; i < context->n_bind_mounts; i++) {
6c47cd7d
LP
2204 BindMount *item = context->bind_mounts + i;
2205 char *s, *d;
2206
2207 s = strdup(item->source);
2208 if (!s) {
2209 r = -ENOMEM;
2210 goto finish;
2211 }
2212
2213 d = strdup(item->destination);
2214 if (!d) {
2215 free(s);
2216 r = -ENOMEM;
2217 goto finish;
2218 }
2219
2220 bind_mounts[h++] = (BindMount) {
2221 .source = s,
2222 .destination = d,
2223 .read_only = item->read_only,
2224 .recursive = item->recursive,
2225 .ignore_enoent = item->ignore_enoent,
2226 };
2227 }
2228
2229 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2230 char **suffix;
2231
2232 if (!params->prefix[t])
2233 continue;
2234
2235 if (strv_isempty(context->directories[t].paths))
2236 continue;
2237
8092a48c
YW
2238 if (context->dynamic_user &&
2239 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
6c47cd7d
LP
2240 char *private_root;
2241
2242 /* So this is for a dynamic user, and we need to make sure the process can access its own
2243 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2244 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2245
2246 private_root = strjoin(params->prefix[t], "/private");
2247 if (!private_root) {
2248 r = -ENOMEM;
2249 goto finish;
2250 }
2251
2252 r = strv_consume(&empty_directories, private_root);
2253 if (r < 0) {
2254 r = -ENOMEM;
2255 goto finish;
2256 }
2257 }
2258
2259 STRV_FOREACH(suffix, context->directories[t].paths) {
2260 char *s, *d;
2261
8092a48c
YW
2262 if (context->dynamic_user &&
2263 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
6c47cd7d
LP
2264 s = strjoin(params->prefix[t], "/private/", *suffix);
2265 else
2266 s = strjoin(params->prefix[t], "/", *suffix);
2267 if (!s) {
2268 r = -ENOMEM;
2269 goto finish;
2270 }
2271
2272 d = strdup(s);
2273 if (!d) {
2274 free(s);
2275 r = -ENOMEM;
2276 goto finish;
2277 }
2278
2279 bind_mounts[h++] = (BindMount) {
2280 .source = s,
2281 .destination = d,
2282 .read_only = false,
2283 .recursive = true,
2284 .ignore_enoent = false,
2285 };
2286 }
2287 }
2288
2289 assert(h == n);
2290
2291 *ret_bind_mounts = bind_mounts;
2292 *ret_n_bind_mounts = n;
2293 *ret_empty_directories = empty_directories;
2294
2295 empty_directories = NULL;
2296
2297 return (int) n;
2298
2299finish:
2300 bind_mount_free_many(bind_mounts, h);
2301 return r;
2302}
2303
6818c54c
LP
2304static int apply_mount_namespace(
2305 Unit *u,
2306 ExecCommand *command,
2307 const ExecContext *context,
2308 const ExecParameters *params,
2309 ExecRuntime *runtime) {
2310
7bcef4ef 2311 _cleanup_strv_free_ char **empty_directories = NULL;
93c6bb51 2312 char *tmp = NULL, *var = NULL;
915e6d16 2313 const char *root_dir = NULL, *root_image = NULL;
bb0ff3fb 2314 NamespaceInfo ns_info = {
af964954 2315 .ignore_protect_paths = false,
93c6bb51
DH
2316 .private_dev = context->private_devices,
2317 .protect_control_groups = context->protect_control_groups,
2318 .protect_kernel_tunables = context->protect_kernel_tunables,
2319 .protect_kernel_modules = context->protect_kernel_modules,
5d997827 2320 .mount_apivfs = context->mount_apivfs,
93c6bb51 2321 };
165a31c0 2322 bool needs_sandboxing;
6c47cd7d
LP
2323 BindMount *bind_mounts = NULL;
2324 unsigned n_bind_mounts = 0;
6818c54c 2325 int r;
93c6bb51 2326
2b3c1b9e
DH
2327 assert(context);
2328
93c6bb51
DH
2329 /* The runtime struct only contains the parent of the private /tmp,
2330 * which is non-accessible to world users. Inside of it there's a /tmp
2331 * that is sticky, and that's the one we want to use here. */
2332
2333 if (context->private_tmp && runtime) {
2334 if (runtime->tmp_dir)
2335 tmp = strjoina(runtime->tmp_dir, "/tmp");
2336 if (runtime->var_tmp_dir)
2337 var = strjoina(runtime->var_tmp_dir, "/tmp");
2338 }
2339
915e6d16
LP
2340 if (params->flags & EXEC_APPLY_CHROOT) {
2341 root_image = context->root_image;
2342
2343 if (!root_image)
2344 root_dir = context->root_directory;
2345 }
93c6bb51 2346
6c47cd7d
LP
2347 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2348 if (r < 0)
2349 return r;
2350
af964954
DH
2351 /*
2352 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2353 * sandbox info, otherwise enforce it, don't ignore protected paths and
2354 * fail if we are enable to apply the sandbox inside the mount namespace.
2355 */
2356 if (!context->dynamic_user && root_dir)
2357 ns_info.ignore_protect_paths = true;
2358
165a31c0 2359 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
6818c54c 2360
915e6d16 2361 r = setup_namespace(root_dir, root_image,
7bcef4ef 2362 &ns_info, context->read_write_paths,
165a31c0
LP
2363 needs_sandboxing ? context->read_only_paths : NULL,
2364 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2365 empty_directories,
2366 bind_mounts,
2367 n_bind_mounts,
93c6bb51
DH
2368 tmp,
2369 var,
165a31c0
LP
2370 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2371 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16
LP
2372 context->mount_flags,
2373 DISSECT_IMAGE_DISCARD_ON_LOOP);
93c6bb51 2374
6c47cd7d
LP
2375 bind_mount_free_many(bind_mounts, n_bind_mounts);
2376
93c6bb51
DH
2377 /* If we couldn't set up the namespace this is probably due to a
2378 * missing capability. In this case, silently proceeed. */
2379 if (IN_SET(r, -EPERM, -EACCES)) {
93c6bb51 2380 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
86ffb325 2381 return 0;
93c6bb51
DH
2382 }
2383
2384 return r;
2385}
2386
915e6d16
LP
2387static int apply_working_directory(
2388 const ExecContext *context,
2389 const ExecParameters *params,
2390 const char *home,
376fecf6
LP
2391 const bool needs_mount_ns,
2392 int *exit_status) {
915e6d16 2393
6732edab 2394 const char *d, *wd;
2b3c1b9e
DH
2395
2396 assert(context);
376fecf6 2397 assert(exit_status);
2b3c1b9e 2398
6732edab
LP
2399 if (context->working_directory_home) {
2400
376fecf6
LP
2401 if (!home) {
2402 *exit_status = EXIT_CHDIR;
6732edab 2403 return -ENXIO;
376fecf6 2404 }
6732edab 2405
2b3c1b9e 2406 wd = home;
6732edab
LP
2407
2408 } else if (context->working_directory)
2b3c1b9e
DH
2409 wd = context->working_directory;
2410 else
2411 wd = "/";
e7f1e7c6
DH
2412
2413 if (params->flags & EXEC_APPLY_CHROOT) {
2414 if (!needs_mount_ns && context->root_directory)
376fecf6
LP
2415 if (chroot(context->root_directory) < 0) {
2416 *exit_status = EXIT_CHROOT;
e7f1e7c6 2417 return -errno;
376fecf6 2418 }
e7f1e7c6 2419
2b3c1b9e
DH
2420 d = wd;
2421 } else
3b0e5bb5 2422 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2423
376fecf6
LP
2424 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2425 *exit_status = EXIT_CHDIR;
2b3c1b9e 2426 return -errno;
376fecf6 2427 }
e7f1e7c6
DH
2428
2429 return 0;
2430}
2431
b1edf445
LP
2432static int setup_keyring(
2433 Unit *u,
2434 const ExecContext *context,
2435 const ExecParameters *p,
2436 uid_t uid, gid_t gid) {
2437
74dd6b51 2438 key_serial_t keyring;
b1edf445 2439 int r;
74dd6b51
LP
2440
2441 assert(u);
b1edf445 2442 assert(context);
74dd6b51
LP
2443 assert(p);
2444
2445 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2446 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2447 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2448 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2449 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2450 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2451
2452 if (!(p->flags & EXEC_NEW_KEYRING))
2453 return 0;
2454
b1edf445
LP
2455 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2456 return 0;
2457
74dd6b51
LP
2458 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2459 if (keyring == -1) {
2460 if (errno == ENOSYS)
8002fb97 2461 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2462 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2463 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2464 else if (errno == EDQUOT)
8002fb97 2465 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2466 else
8002fb97 2467 return log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51
LP
2468
2469 return 0;
2470 }
2471
b3415f5d
LP
2472 /* Populate they keyring with the invocation ID by default. */
2473 if (!sd_id128_is_null(u->invocation_id)) {
2474 key_serial_t key;
2475
2476 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2477 if (key == -1)
8002fb97 2478 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2479 else {
2480 if (keyctl(KEYCTL_SETPERM, key,
2481 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2482 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
8002fb97 2483 return log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2484 }
2485 }
2486
74dd6b51
LP
2487 /* And now, make the keyring owned by the service's user */
2488 if (uid_is_valid(uid) || gid_is_valid(gid))
2489 if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0)
8002fb97 2490 return log_unit_error_errno(u, errno, "Failed to change ownership of session keyring: %m");
74dd6b51 2491
b1edf445
LP
2492 /* When requested link the user keyring into the session keyring. */
2493 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2494 uid_t saved_uid;
2495 gid_t saved_gid;
2496
2497 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things
2498 * set up properly by the kernel. If we don't do that then we can't create it atomically, and that
2499 * sucks for parallel execution. This mimics what pam_keyinit does, too.*/
2500
2501 saved_uid = getuid();
2502 saved_gid = getgid();
2503
2504 if (gid_is_valid(gid) && gid != saved_gid) {
2505 if (setregid(gid, -1) < 0)
8002fb97 2506 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
b1edf445
LP
2507 }
2508
2509 if (uid_is_valid(uid) && uid != saved_uid) {
2510 if (setreuid(uid, -1) < 0) {
2511 (void) setregid(saved_gid, -1);
8002fb97 2512 return log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
b1edf445
LP
2513 }
2514 }
2515
2516 if (keyctl(KEYCTL_LINK,
2517 KEY_SPEC_USER_KEYRING,
2518 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2519
2520 r = -errno;
2521
2522 (void) setreuid(saved_uid, -1);
2523 (void) setregid(saved_gid, -1);
2524
8002fb97 2525 return log_unit_error_errno(u, r, "Failed to link user keyring into session keyring: %m");
b1edf445
LP
2526 }
2527
2528 if (uid_is_valid(uid) && uid != saved_uid) {
2529 if (setreuid(saved_uid, -1) < 0) {
2530 (void) setregid(saved_gid, -1);
8002fb97 2531 return log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
b1edf445
LP
2532 }
2533 }
2534
2535 if (gid_is_valid(gid) && gid != saved_gid) {
2536 if (setregid(saved_gid, -1) < 0)
8002fb97 2537 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
b1edf445 2538 }
61ceaea5 2539 }
b1edf445 2540
74dd6b51
LP
2541 return 0;
2542}
2543
29206d46
LP
2544static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
2545 assert(array);
2546 assert(n);
2547
2548 if (!pair)
2549 return;
2550
2551 if (pair[0] >= 0)
2552 array[(*n)++] = pair[0];
2553 if (pair[1] >= 0)
2554 array[(*n)++] = pair[1];
2555}
2556
a34ceba6
LP
2557static int close_remaining_fds(
2558 const ExecParameters *params,
2559 ExecRuntime *runtime,
29206d46 2560 DynamicCreds *dcreds,
00d9ef85 2561 int user_lookup_fd,
a34ceba6
LP
2562 int socket_fd,
2563 int *fds, unsigned n_fds) {
2564
2565 unsigned n_dont_close = 0;
00d9ef85 2566 int dont_close[n_fds + 12];
a34ceba6
LP
2567
2568 assert(params);
2569
2570 if (params->stdin_fd >= 0)
2571 dont_close[n_dont_close++] = params->stdin_fd;
2572 if (params->stdout_fd >= 0)
2573 dont_close[n_dont_close++] = params->stdout_fd;
2574 if (params->stderr_fd >= 0)
2575 dont_close[n_dont_close++] = params->stderr_fd;
2576
2577 if (socket_fd >= 0)
2578 dont_close[n_dont_close++] = socket_fd;
2579 if (n_fds > 0) {
2580 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2581 n_dont_close += n_fds;
2582 }
2583
29206d46
LP
2584 if (runtime)
2585 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2586
2587 if (dcreds) {
2588 if (dcreds->user)
2589 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2590 if (dcreds->group)
2591 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2592 }
2593
00d9ef85
LP
2594 if (user_lookup_fd >= 0)
2595 dont_close[n_dont_close++] = user_lookup_fd;
2596
a34ceba6
LP
2597 return close_all_fds(dont_close, n_dont_close);
2598}
2599
00d9ef85
LP
2600static int send_user_lookup(
2601 Unit *unit,
2602 int user_lookup_fd,
2603 uid_t uid,
2604 gid_t gid) {
2605
2606 assert(unit);
2607
2608 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2609 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2610 * specified. */
2611
2612 if (user_lookup_fd < 0)
2613 return 0;
2614
2615 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2616 return 0;
2617
2618 if (writev(user_lookup_fd,
2619 (struct iovec[]) {
e6a7ec4b
LP
2620 IOVEC_INIT(&uid, sizeof(uid)),
2621 IOVEC_INIT(&gid, sizeof(gid)),
2622 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2623 return -errno;
2624
2625 return 0;
2626}
2627
6732edab
LP
2628static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2629 int r;
2630
2631 assert(c);
2632 assert(home);
2633 assert(buf);
2634
2635 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2636
2637 if (*home)
2638 return 0;
2639
2640 if (!c->working_directory_home)
2641 return 0;
2642
2643 if (uid == 0) {
2644 /* Hardcode /root as home directory for UID 0 */
2645 *home = "/root";
2646 return 1;
2647 }
2648
2649 r = get_home_dir(buf);
2650 if (r < 0)
2651 return r;
2652
2653 *home = *buf;
2654 return 1;
2655}
2656
da50b85a
LP
2657static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2658 _cleanup_strv_free_ char ** list = NULL;
2659 ExecDirectoryType t;
2660 int r;
2661
2662 assert(c);
2663 assert(p);
2664 assert(ret);
2665
2666 assert(c->dynamic_user);
2667
2668 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2669 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2670 * directories. */
2671
2672 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2673 char **i;
2674
2675 if (t == EXEC_DIRECTORY_CONFIGURATION)
2676 continue;
2677
2678 if (!p->prefix[t])
2679 continue;
2680
2681 STRV_FOREACH(i, c->directories[t].paths) {
2682 char *e;
2683
8092a48c
YW
2684 if (t == EXEC_DIRECTORY_RUNTIME)
2685 e = strjoin(p->prefix[t], "/", *i);
2686 else
2687 e = strjoin(p->prefix[t], "/private/", *i);
da50b85a
LP
2688 if (!e)
2689 return -ENOMEM;
2690
2691 r = strv_consume(&list, e);
2692 if (r < 0)
2693 return r;
2694 }
2695 }
2696
2697 *ret = list;
2698 list = NULL;
2699
2700 return 0;
2701}
2702
ff0af2a1 2703static int exec_child(
f2341e0a 2704 Unit *unit,
ff0af2a1
LP
2705 ExecCommand *command,
2706 const ExecContext *context,
2707 const ExecParameters *params,
2708 ExecRuntime *runtime,
29206d46 2709 DynamicCreds *dcreds,
ff0af2a1
LP
2710 char **argv,
2711 int socket_fd,
52c239d7 2712 int named_iofds[3],
4c47affc
FB
2713 int *fds,
2714 unsigned n_storage_fds,
9b141911 2715 unsigned n_socket_fds,
ff0af2a1 2716 char **files_env,
00d9ef85 2717 int user_lookup_fd,
12145637 2718 int *exit_status) {
d35fbf6b 2719
2065ca69 2720 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
6732edab 2721 _cleanup_free_ char *mac_selinux_context_net = NULL, *home_buffer = NULL;
4d885bd3
DH
2722 _cleanup_free_ gid_t *supplementary_gids = NULL;
2723 const char *username = NULL, *groupname = NULL;
2b3c1b9e 2724 const char *home = NULL, *shell = NULL;
7bce046b
LP
2725 dev_t journal_stream_dev = 0;
2726 ino_t journal_stream_ino = 0;
165a31c0
LP
2727 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2728 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2729 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2730 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 2731#if HAVE_SELINUX
43b1f709 2732 bool use_selinux = false;
ecfbc84f 2733#endif
f9fa32f0 2734#if ENABLE_SMACK
43b1f709 2735 bool use_smack = false;
ecfbc84f 2736#endif
349cc4a5 2737#if HAVE_APPARMOR
43b1f709 2738 bool use_apparmor = false;
ecfbc84f 2739#endif
fed1e721
LP
2740 uid_t uid = UID_INVALID;
2741 gid_t gid = GID_INVALID;
4d885bd3 2742 int i, r, ngids = 0;
4c47affc 2743 unsigned n_fds;
3536f49e 2744 ExecDirectoryType dt;
165a31c0 2745 int secure_bits;
034c6ed7 2746
f2341e0a 2747 assert(unit);
5cb5a6ff
LP
2748 assert(command);
2749 assert(context);
d35fbf6b 2750 assert(params);
ff0af2a1 2751 assert(exit_status);
d35fbf6b
DM
2752
2753 rename_process_from_path(command->path);
2754
2755 /* We reset exactly these signals, since they are the
2756 * only ones we set to SIG_IGN in the main daemon. All
2757 * others we leave untouched because we set them to
2758 * SIG_DFL or a valid handler initially, both of which
2759 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2760 (void) default_signals(SIGNALS_CRASH_HANDLER,
2761 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2762
2763 if (context->ignore_sigpipe)
ce30c8dc 2764 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2765
ff0af2a1
LP
2766 r = reset_signal_mask();
2767 if (r < 0) {
2768 *exit_status = EXIT_SIGNAL_MASK;
12145637 2769 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 2770 }
034c6ed7 2771
d35fbf6b
DM
2772 if (params->idle_pipe)
2773 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2774
2c027c62
LP
2775 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2776 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2777 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2778 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 2779
d35fbf6b 2780 log_forget_fds();
2c027c62 2781 log_set_open_when_needed(true);
4f2d528d 2782
40a80078
LP
2783 /* In case anything used libc syslog(), close this here, too */
2784 closelog();
2785
4c47affc 2786 n_fds = n_storage_fds + n_socket_fds;
00d9ef85 2787 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
ff0af2a1
LP
2788 if (r < 0) {
2789 *exit_status = EXIT_FDS;
12145637 2790 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
2791 }
2792
d35fbf6b
DM
2793 if (!context->same_pgrp)
2794 if (setsid() < 0) {
ff0af2a1 2795 *exit_status = EXIT_SETSID;
12145637 2796 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 2797 }
9e2f7c11 2798
1e22b5cd 2799 exec_context_tty_reset(context, params);
d35fbf6b 2800
c891efaf 2801 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 2802 const char *vc = params->confirm_spawn;
3b20f877
FB
2803 _cleanup_free_ char *cmdline = NULL;
2804
2805 cmdline = exec_command_line(argv);
2806 if (!cmdline) {
0460aa5c 2807 *exit_status = EXIT_MEMORY;
12145637 2808 return log_oom();
3b20f877 2809 }
d35fbf6b 2810
eedf223a 2811 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
2812 if (r != CONFIRM_EXECUTE) {
2813 if (r == CONFIRM_PRETEND_SUCCESS) {
2814 *exit_status = EXIT_SUCCESS;
2815 return 0;
2816 }
ff0af2a1 2817 *exit_status = EXIT_CONFIRM;
12145637 2818 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 2819 return -ECANCELED;
d35fbf6b
DM
2820 }
2821 }
1a63a750 2822
29206d46 2823 if (context->dynamic_user && dcreds) {
da50b85a 2824 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 2825
409093fe
LP
2826 /* Make sure we bypass our own NSS module for any NSS checks */
2827 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2828 *exit_status = EXIT_USER;
12145637 2829 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
2830 }
2831
da50b85a
LP
2832 r = compile_suggested_paths(context, params, &suggested_paths);
2833 if (r < 0) {
2834 *exit_status = EXIT_MEMORY;
2835 return log_oom();
2836 }
2837
2838 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
2839 if (r < 0) {
2840 *exit_status = EXIT_USER;
e2b0cc34
YW
2841 if (r == -EILSEQ) {
2842 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
2843 return -EOPNOTSUPP;
2844 }
12145637 2845 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 2846 }
524daa8c 2847
70dd455c 2848 if (!uid_is_valid(uid)) {
29206d46 2849 *exit_status = EXIT_USER;
12145637 2850 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
2851 return -ESRCH;
2852 }
2853
2854 if (!gid_is_valid(gid)) {
2855 *exit_status = EXIT_USER;
12145637 2856 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
2857 return -ESRCH;
2858 }
5bc7452b 2859
29206d46
LP
2860 if (dcreds->user)
2861 username = dcreds->user->name;
2862
2863 } else {
4d885bd3
DH
2864 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2865 if (r < 0) {
2866 *exit_status = EXIT_USER;
12145637 2867 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 2868 }
5bc7452b 2869
4d885bd3
DH
2870 r = get_fixed_group(context, &groupname, &gid);
2871 if (r < 0) {
2872 *exit_status = EXIT_GROUP;
12145637 2873 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 2874 }
cdc5d5c5 2875 }
29206d46 2876
cdc5d5c5
DH
2877 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2878 r = get_supplementary_groups(context, username, groupname, gid,
2879 &supplementary_gids, &ngids);
2880 if (r < 0) {
2881 *exit_status = EXIT_GROUP;
12145637 2882 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 2883 }
5bc7452b 2884
00d9ef85
LP
2885 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2886 if (r < 0) {
2887 *exit_status = EXIT_USER;
12145637 2888 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
2889 }
2890
2891 user_lookup_fd = safe_close(user_lookup_fd);
2892
6732edab
LP
2893 r = acquire_home(context, uid, &home, &home_buffer);
2894 if (r < 0) {
2895 *exit_status = EXIT_CHDIR;
12145637 2896 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
2897 }
2898
d35fbf6b
DM
2899 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2900 * must sure to drop O_NONBLOCK */
2901 if (socket_fd >= 0)
a34ceba6 2902 (void) fd_nonblock(socket_fd, false);
acbb0225 2903
52c239d7 2904 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
2905 if (r < 0) {
2906 *exit_status = EXIT_STDIN;
12145637 2907 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 2908 }
034c6ed7 2909
52c239d7 2910 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2911 if (r < 0) {
2912 *exit_status = EXIT_STDOUT;
12145637 2913 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
2914 }
2915
52c239d7 2916 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2917 if (r < 0) {
2918 *exit_status = EXIT_STDERR;
12145637 2919 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
2920 }
2921
2922 if (params->cgroup_path) {
ff0af2a1
LP
2923 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2924 if (r < 0) {
2925 *exit_status = EXIT_CGROUP;
12145637 2926 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path);
309bff19 2927 }
d35fbf6b 2928 }
309bff19 2929
d35fbf6b 2930 if (context->oom_score_adjust_set) {
d5243d62 2931 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
f2b68789 2932
d5243d62
LP
2933 /* When we can't make this change due to EPERM, then
2934 * let's silently skip over it. User namespaces
2935 * prohibit write access to this file, and we
2936 * shouldn't trip up over that. */
613b411c 2937
d5243d62 2938 sprintf(t, "%i", context->oom_score_adjust);
ad118bda 2939 r = write_string_file("/proc/self/oom_score_adj", t, 0);
12145637 2940 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 2941 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 2942 else if (r < 0) {
ff0af2a1 2943 *exit_status = EXIT_OOM_ADJUST;
12145637 2944 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 2945 }
d35fbf6b
DM
2946 }
2947
2948 if (context->nice_set)
2949 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 2950 *exit_status = EXIT_NICE;
12145637 2951 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
613b411c
LP
2952 }
2953
d35fbf6b
DM
2954 if (context->cpu_sched_set) {
2955 struct sched_param param = {
2956 .sched_priority = context->cpu_sched_priority,
2957 };
2958
ff0af2a1
LP
2959 r = sched_setscheduler(0,
2960 context->cpu_sched_policy |
2961 (context->cpu_sched_reset_on_fork ?
2962 SCHED_RESET_ON_FORK : 0),
2963 &param);
2964 if (r < 0) {
2965 *exit_status = EXIT_SETSCHEDULER;
12145637 2966 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 2967 }
d35fbf6b 2968 }
fc9b2a84 2969
d35fbf6b
DM
2970 if (context->cpuset)
2971 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
ff0af2a1 2972 *exit_status = EXIT_CPUAFFINITY;
12145637 2973 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7
LP
2974 }
2975
d35fbf6b
DM
2976 if (context->ioprio_set)
2977 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 2978 *exit_status = EXIT_IOPRIO;
12145637 2979 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 2980 }
da726a4d 2981
d35fbf6b
DM
2982 if (context->timer_slack_nsec != NSEC_INFINITY)
2983 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 2984 *exit_status = EXIT_TIMERSLACK;
12145637 2985 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 2986 }
9eba9da4 2987
21022b9d
LP
2988 if (context->personality != PERSONALITY_INVALID) {
2989 r = safe_personality(context->personality);
2990 if (r < 0) {
ff0af2a1 2991 *exit_status = EXIT_PERSONALITY;
12145637 2992 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 2993 }
21022b9d 2994 }
94f04347 2995
d35fbf6b 2996 if (context->utmp_id)
df0ff127 2997 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 2998 context->tty_path,
023a4f67
LP
2999 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3000 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3001 USER_PROCESS,
6a93917d 3002 username);
d35fbf6b 3003
e0d2adfd 3004 if (context->user) {
ff0af2a1
LP
3005 r = chown_terminal(STDIN_FILENO, uid);
3006 if (r < 0) {
3007 *exit_status = EXIT_STDIN;
12145637 3008 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 3009 }
d35fbf6b 3010 }
8e274523 3011
a931ad47
LP
3012 /* If delegation is enabled we'll pass ownership of the cgroup
3013 * (but only in systemd's own controller hierarchy!) to the
3014 * user of the new process. */
584b8688 3015 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
ff0af2a1
LP
3016 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
3017 if (r < 0) {
3018 *exit_status = EXIT_CGROUP;
12145637 3019 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
d35fbf6b 3020 }
034c6ed7 3021
ff0af2a1
LP
3022 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
3023 if (r < 0) {
3024 *exit_status = EXIT_CGROUP;
12145637 3025 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 3026 }
d35fbf6b 3027 }
034c6ed7 3028
72fd1768 3029 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 3030 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
3031 if (r < 0)
3032 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 3033 }
94f04347 3034
7bce046b 3035 r = build_environment(
fd63e712 3036 unit,
7bce046b
LP
3037 context,
3038 params,
3039 n_fds,
3040 home,
3041 username,
3042 shell,
3043 journal_stream_dev,
3044 journal_stream_ino,
3045 &our_env);
2065ca69
JW
3046 if (r < 0) {
3047 *exit_status = EXIT_MEMORY;
12145637 3048 return log_oom();
2065ca69
JW
3049 }
3050
3051 r = build_pass_environment(context, &pass_env);
3052 if (r < 0) {
3053 *exit_status = EXIT_MEMORY;
12145637 3054 return log_oom();
2065ca69
JW
3055 }
3056
3057 accum_env = strv_env_merge(5,
3058 params->environment,
3059 our_env,
3060 pass_env,
3061 context->environment,
3062 files_env,
3063 NULL);
3064 if (!accum_env) {
3065 *exit_status = EXIT_MEMORY;
12145637 3066 return log_oom();
2065ca69 3067 }
1280503b 3068 accum_env = strv_env_clean(accum_env);
2065ca69 3069
096424d1 3070 (void) umask(context->umask);
b213e1c1 3071
b1edf445 3072 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
3073 if (r < 0) {
3074 *exit_status = EXIT_KEYRING;
12145637 3075 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
3076 }
3077
165a31c0 3078 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 3079 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 3080
165a31c0
LP
3081 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3082 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 3083
165a31c0
LP
3084 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3085 if (needs_ambient_hack)
3086 needs_setuid = false;
3087 else
3088 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3089
3090 if (needs_sandboxing) {
7f18ef0a
FK
3091 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3092 * present. The actual MAC context application will happen later, as late as possible, to avoid
3093 * impacting our own code paths. */
3094
349cc4a5 3095#if HAVE_SELINUX
43b1f709 3096 use_selinux = mac_selinux_use();
7f18ef0a 3097#endif
f9fa32f0 3098#if ENABLE_SMACK
43b1f709 3099 use_smack = mac_smack_use();
7f18ef0a 3100#endif
349cc4a5 3101#if HAVE_APPARMOR
43b1f709 3102 use_apparmor = mac_apparmor_use();
7f18ef0a 3103#endif
165a31c0 3104 }
7f18ef0a 3105
165a31c0
LP
3106 if (needs_setuid) {
3107 if (context->pam_name && username) {
3108 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3109 if (r < 0) {
3110 *exit_status = EXIT_PAM;
12145637 3111 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0
LP
3112 }
3113 }
b213e1c1 3114 }
ac45f971 3115
d35fbf6b 3116 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
6e2d7c4f
MS
3117 if (ns_type_supported(NAMESPACE_NET)) {
3118 r = setup_netns(runtime->netns_storage_socket);
3119 if (r < 0) {
3120 *exit_status = EXIT_NETWORK;
3121 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3122 }
3123 } else
3124 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3125 }
169c1bda 3126
ee818b89 3127 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3128 if (needs_mount_namespace) {
6818c54c 3129 r = apply_mount_namespace(unit, command, context, params, runtime);
3fbe8dbe
LP
3130 if (r < 0) {
3131 *exit_status = EXIT_NAMESPACE;
12145637 3132 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing: %m");
3fbe8dbe 3133 }
d35fbf6b 3134 }
81a2b7ce 3135
50b3dfb9 3136 /* Apply just after mount namespace setup */
376fecf6 3137 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
12145637
LP
3138 if (r < 0)
3139 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
50b3dfb9 3140
bbeea271 3141 /* Drop groups as early as possbile */
165a31c0 3142 if (needs_setuid) {
709dbeac 3143 r = enforce_groups(gid, supplementary_gids, ngids);
096424d1
LP
3144 if (r < 0) {
3145 *exit_status = EXIT_GROUP;
12145637 3146 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3147 }
165a31c0 3148 }
096424d1 3149
165a31c0 3150 if (needs_sandboxing) {
349cc4a5 3151#if HAVE_SELINUX
43b1f709 3152 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
3153 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3154 if (r < 0) {
3155 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3156 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
937ccce9 3157 }
9008e1ac 3158 }
9008e1ac
MS
3159#endif
3160
937ccce9
LP
3161 if (context->private_users) {
3162 r = setup_private_users(uid, gid);
3163 if (r < 0) {
3164 *exit_status = EXIT_USER;
12145637 3165 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
937ccce9 3166 }
d251207d
LP
3167 }
3168 }
3169
165a31c0
LP
3170 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
3171 * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
3172 * was needed to upload the policy and can now be closed as well. */
ff0af2a1
LP
3173 r = close_all_fds(fds, n_fds);
3174 if (r >= 0)
3175 r = shift_fds(fds, n_fds);
3176 if (r >= 0)
4c47affc 3177 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
ff0af2a1
LP
3178 if (r < 0) {
3179 *exit_status = EXIT_FDS;
12145637 3180 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3181 }
e66cf1a3 3182
165a31c0 3183 secure_bits = context->secure_bits;
e66cf1a3 3184
165a31c0
LP
3185 if (needs_sandboxing) {
3186 uint64_t bset;
755d4b67 3187
d35fbf6b 3188 for (i = 0; i < _RLIMIT_MAX; i++) {
03857c43 3189
d35fbf6b
DM
3190 if (!context->rlimit[i])
3191 continue;
3192
03857c43
LP
3193 r = setrlimit_closest(i, context->rlimit[i]);
3194 if (r < 0) {
ff0af2a1 3195 *exit_status = EXIT_LIMITS;
12145637 3196 return log_unit_error_errno(unit, r, "Failed to adjust resource limit %s: %m", rlimit_to_string(i));
e66cf1a3
LP
3197 }
3198 }
3199
f4170c67
LP
3200 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
3201 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3202 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3203 *exit_status = EXIT_LIMITS;
12145637 3204 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3205 }
3206 }
3207
165a31c0
LP
3208 bset = context->capability_bounding_set;
3209 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3210 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3211 * instead of us doing that */
3212 if (needs_ambient_hack)
3213 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3214 (UINT64_C(1) << CAP_SETUID) |
3215 (UINT64_C(1) << CAP_SETGID);
3216
3217 if (!cap_test_all(bset)) {
3218 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3219 if (r < 0) {
3220 *exit_status = EXIT_CAPABILITIES;
12145637 3221 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3222 }
4c2630eb 3223 }
3b8bddde 3224
755d4b67
IP
3225 /* This is done before enforce_user, but ambient set
3226 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
3227 if (!needs_ambient_hack &&
3228 context->capability_ambient_set != 0) {
755d4b67
IP
3229 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3230 if (r < 0) {
3231 *exit_status = EXIT_CAPABILITIES;
12145637 3232 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3233 }
755d4b67 3234 }
165a31c0 3235 }
755d4b67 3236
165a31c0 3237 if (needs_setuid) {
d35fbf6b 3238 if (context->user) {
ff0af2a1
LP
3239 r = enforce_user(context, uid);
3240 if (r < 0) {
3241 *exit_status = EXIT_USER;
12145637 3242 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3243 }
165a31c0
LP
3244
3245 if (!needs_ambient_hack &&
3246 context->capability_ambient_set != 0) {
755d4b67
IP
3247
3248 /* Fix the ambient capabilities after user change. */
3249 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3250 if (r < 0) {
3251 *exit_status = EXIT_CAPABILITIES;
12145637 3252 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3253 }
3254
3255 /* If we were asked to change user and ambient capabilities
3256 * were requested, we had to add keep-caps to the securebits
3257 * so that we would maintain the inherited capability set
3258 * through the setresuid(). Make sure that the bit is added
3259 * also to the context secure_bits so that we don't try to
3260 * drop the bit away next. */
3261
7f508f2c 3262 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3263 }
5b6319dc 3264 }
165a31c0 3265 }
d35fbf6b 3266
165a31c0 3267 if (needs_sandboxing) {
5cd9cd35
LP
3268 /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
3269 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3270 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3271 * are restricted. */
3272
349cc4a5 3273#if HAVE_SELINUX
43b1f709 3274 if (use_selinux) {
5cd9cd35
LP
3275 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3276
3277 if (exec_context) {
3278 r = setexeccon(exec_context);
3279 if (r < 0) {
3280 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3281 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3282 }
3283 }
3284 }
3285#endif
3286
f9fa32f0 3287#if ENABLE_SMACK
43b1f709 3288 if (use_smack) {
7f18ef0a
FK
3289 r = setup_smack(context, command);
3290 if (r < 0) {
3291 *exit_status = EXIT_SMACK_PROCESS_LABEL;
12145637 3292 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
7f18ef0a 3293 }
5cd9cd35 3294 }
7f18ef0a 3295#endif
5cd9cd35 3296
349cc4a5 3297#if HAVE_APPARMOR
43b1f709 3298 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3299 r = aa_change_onexec(context->apparmor_profile);
3300 if (r < 0 && !context->apparmor_profile_ignore) {
3301 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3302 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3303 }
3304 }
3305#endif
3306
165a31c0
LP
3307 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3308 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3309 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3310 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3311 *exit_status = EXIT_SECUREBITS;
12145637 3312 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3313 }
5b6319dc 3314
59eeb84b 3315 if (context_has_no_new_privileges(context))
d35fbf6b 3316 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3317 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3318 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3319 }
3320
349cc4a5 3321#if HAVE_SECCOMP
469830d1
LP
3322 r = apply_address_families(unit, context);
3323 if (r < 0) {
3324 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3325 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3326 }
04aa0cb9 3327
469830d1
LP
3328 r = apply_memory_deny_write_execute(unit, context);
3329 if (r < 0) {
3330 *exit_status = EXIT_SECCOMP;
12145637 3331 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3332 }
f4170c67 3333
469830d1
LP
3334 r = apply_restrict_realtime(unit, context);
3335 if (r < 0) {
3336 *exit_status = EXIT_SECCOMP;
12145637 3337 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3338 }
3339
add00535
LP
3340 r = apply_restrict_namespaces(unit, context);
3341 if (r < 0) {
3342 *exit_status = EXIT_SECCOMP;
12145637 3343 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3344 }
3345
469830d1
LP
3346 r = apply_protect_sysctl(unit, context);
3347 if (r < 0) {
3348 *exit_status = EXIT_SECCOMP;
12145637 3349 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3350 }
3351
469830d1
LP
3352 r = apply_protect_kernel_modules(unit, context);
3353 if (r < 0) {
3354 *exit_status = EXIT_SECCOMP;
12145637 3355 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3356 }
3357
469830d1
LP
3358 r = apply_private_devices(unit, context);
3359 if (r < 0) {
3360 *exit_status = EXIT_SECCOMP;
12145637 3361 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3362 }
3363
3364 r = apply_syscall_archs(unit, context);
3365 if (r < 0) {
3366 *exit_status = EXIT_SECCOMP;
12145637 3367 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3368 }
3369
78e864e5
TM
3370 r = apply_lock_personality(unit, context);
3371 if (r < 0) {
3372 *exit_status = EXIT_SECCOMP;
12145637 3373 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3374 }
3375
5cd9cd35
LP
3376 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3377 * by the filter as little as possible. */
165a31c0 3378 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3379 if (r < 0) {
3380 *exit_status = EXIT_SECCOMP;
12145637 3381 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3382 }
3383#endif
d35fbf6b 3384 }
034c6ed7 3385
00819cc1
LP
3386 if (!strv_isempty(context->unset_environment)) {
3387 char **ee = NULL;
3388
3389 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3390 if (!ee) {
3391 *exit_status = EXIT_MEMORY;
12145637 3392 return log_oom();
00819cc1
LP
3393 }
3394
3395 strv_free(accum_env);
3396 accum_env = ee;
3397 }
3398
2065ca69 3399 final_argv = replace_env_argv(argv, accum_env);
d35fbf6b 3400 if (!final_argv) {
ff0af2a1 3401 *exit_status = EXIT_MEMORY;
12145637 3402 return log_oom();
d35fbf6b 3403 }
034c6ed7 3404
553d2243 3405 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
d35fbf6b 3406 _cleanup_free_ char *line;
81a2b7ce 3407
d35fbf6b
DM
3408 line = exec_command_line(final_argv);
3409 if (line) {
f2341e0a 3410 log_struct(LOG_DEBUG,
f2341e0a
LP
3411 "EXECUTABLE=%s", command->path,
3412 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3413 LOG_UNIT_ID(unit),
f1c50bec 3414 LOG_UNIT_INVOCATION_ID(unit),
f2341e0a 3415 NULL);
d35fbf6b
DM
3416 }
3417 }
dd305ec9 3418
2065ca69 3419 execve(command->path, final_argv, accum_env);
12145637
LP
3420
3421 if (errno == ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3422
3423 log_struct_errno(LOG_INFO, errno,
3424 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3425 LOG_UNIT_ID(unit),
3426 LOG_UNIT_INVOCATION_ID(unit),
3427 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3428 command->path),
3429 "EXECUTABLE=%s", command->path,
3430 NULL);
3431
3432 return 0;
3433 }
3434
ff0af2a1 3435 *exit_status = EXIT_EXEC;
12145637 3436 return log_unit_error_errno(unit, errno, "Failed to execute command: %m");
d35fbf6b 3437}
81a2b7ce 3438
f2341e0a
LP
3439int exec_spawn(Unit *unit,
3440 ExecCommand *command,
d35fbf6b
DM
3441 const ExecContext *context,
3442 const ExecParameters *params,
3443 ExecRuntime *runtime,
29206d46 3444 DynamicCreds *dcreds,
d35fbf6b 3445 pid_t *ret) {
8351ceae 3446
d35fbf6b 3447 _cleanup_strv_free_ char **files_env = NULL;
9b141911 3448 int *fds = NULL;
4c47affc 3449 unsigned n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1
LP
3450 _cleanup_free_ char *line = NULL;
3451 int socket_fd, r;
52c239d7 3452 int named_iofds[3] = { -1, -1, -1 };
ff0af2a1 3453 char **argv;
d35fbf6b 3454 pid_t pid;
8351ceae 3455
f2341e0a 3456 assert(unit);
d35fbf6b
DM
3457 assert(command);
3458 assert(context);
3459 assert(ret);
3460 assert(params);
4c47affc 3461 assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
4298d0b5 3462
d35fbf6b
DM
3463 if (context->std_input == EXEC_INPUT_SOCKET ||
3464 context->std_output == EXEC_OUTPUT_SOCKET ||
3465 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3466
4c47affc 3467 if (params->n_socket_fds > 1) {
f2341e0a 3468 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3469 return -EINVAL;
ff0af2a1 3470 }
eef65bf3 3471
4c47affc 3472 if (params->n_socket_fds == 0) {
488ab41c
AA
3473 log_unit_error(unit, "Got no socket.");
3474 return -EINVAL;
3475 }
3476
d35fbf6b
DM
3477 socket_fd = params->fds[0];
3478 } else {
3479 socket_fd = -1;
3480 fds = params->fds;
4c47affc 3481 n_storage_fds = params->n_storage_fds;
9b141911 3482 n_socket_fds = params->n_socket_fds;
d35fbf6b 3483 }
94f04347 3484
52c239d7
LB
3485 r = exec_context_named_iofds(unit, context, params, named_iofds);
3486 if (r < 0)
3487 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3488
f2341e0a 3489 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3490 if (r < 0)
f2341e0a 3491 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3492
d35fbf6b 3493 argv = params->argv ?: command->argv;
d35fbf6b
DM
3494 line = exec_command_line(argv);
3495 if (!line)
3496 return log_oom();
fab56fc5 3497
f2341e0a 3498 log_struct(LOG_DEBUG,
f2341e0a
LP
3499 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3500 "EXECUTABLE=%s", command->path,
ba360bb0 3501 LOG_UNIT_ID(unit),
f1c50bec 3502 LOG_UNIT_INVOCATION_ID(unit),
f2341e0a 3503 NULL);
12145637 3504
d35fbf6b
DM
3505 pid = fork();
3506 if (pid < 0)
74129a12 3507 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3508
3509 if (pid == 0) {
12145637 3510 int exit_status = EXIT_SUCCESS;
ff0af2a1 3511
f2341e0a
LP
3512 r = exec_child(unit,
3513 command,
ff0af2a1
LP
3514 context,
3515 params,
3516 runtime,
29206d46 3517 dcreds,
ff0af2a1
LP
3518 argv,
3519 socket_fd,
52c239d7 3520 named_iofds,
4c47affc
FB
3521 fds,
3522 n_storage_fds,
9b141911 3523 n_socket_fds,
ff0af2a1 3524 files_env,
00d9ef85 3525 unit->manager->user_lookup_fds[1],
12145637
LP
3526 &exit_status);
3527
ff0af2a1 3528 if (r < 0) {
12145637
LP
3529 log_struct_errno(LOG_ERR, r,
3530 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3531 LOG_UNIT_ID(unit),
3532 LOG_UNIT_INVOCATION_ID(unit),
3533 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3534 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3535 command->path),
3536 "EXECUTABLE=%s", command->path,
3537 NULL);
4c2630eb
MS
3538 }
3539
ff0af2a1 3540 _exit(exit_status);
034c6ed7
LP
3541 }
3542
f2341e0a 3543 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3544
80876c20
LP
3545 /* We add the new process to the cgroup both in the child (so
3546 * that we can be sure that no user code is ever executed
3547 * outside of the cgroup) and in the parent (so that we can be
3548 * sure that when we kill the cgroup the process will be
3549 * killed too). */
d35fbf6b 3550 if (params->cgroup_path)
dd305ec9 3551 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
2da3263a 3552
b58b4116 3553 exec_status_start(&command->exec_status, pid);
9fb86720 3554
034c6ed7 3555 *ret = pid;
5cb5a6ff
LP
3556 return 0;
3557}
3558
034c6ed7 3559void exec_context_init(ExecContext *c) {
3536f49e
YW
3560 ExecDirectoryType i;
3561
034c6ed7
LP
3562 assert(c);
3563
4c12626c 3564 c->umask = 0022;
9eba9da4 3565 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3566 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3567 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3568 c->syslog_level_prefix = true;
353e12c2 3569 c->ignore_sigpipe = true;
3a43da28 3570 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3571 c->personality = PERSONALITY_INVALID;
72fd1768 3572 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3573 c->directories[i].mode = 0755;
a103496c 3574 c->capability_bounding_set = CAP_ALL;
add00535 3575 c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
d3070fbd 3576 c->log_level_max = -1;
034c6ed7
LP
3577}
3578
613b411c 3579void exec_context_done(ExecContext *c) {
3536f49e 3580 ExecDirectoryType i;
d3070fbd 3581 size_t l;
5cb5a6ff
LP
3582
3583 assert(c);
3584
6796073e
LP
3585 c->environment = strv_free(c->environment);
3586 c->environment_files = strv_free(c->environment_files);
b4c14404 3587 c->pass_environment = strv_free(c->pass_environment);
00819cc1 3588 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 3589
1f6b4113 3590 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
a1e58e8e 3591 c->rlimit[l] = mfree(c->rlimit[l]);
034c6ed7 3592
2038c3f5 3593 for (l = 0; l < 3; l++) {
52c239d7 3594 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
2038c3f5
LP
3595 c->stdio_file[l] = mfree(c->stdio_file[l]);
3596 }
52c239d7 3597
a1e58e8e
LP
3598 c->working_directory = mfree(c->working_directory);
3599 c->root_directory = mfree(c->root_directory);
915e6d16 3600 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3601 c->tty_path = mfree(c->tty_path);
3602 c->syslog_identifier = mfree(c->syslog_identifier);
3603 c->user = mfree(c->user);
3604 c->group = mfree(c->group);
034c6ed7 3605
6796073e 3606 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3607
a1e58e8e 3608 c->pam_name = mfree(c->pam_name);
5b6319dc 3609
2a624c36
AP
3610 c->read_only_paths = strv_free(c->read_only_paths);
3611 c->read_write_paths = strv_free(c->read_write_paths);
3612 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3613
d2d6c096
LP
3614 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
3615
82c121a4
LP
3616 if (c->cpuset)
3617 CPU_FREE(c->cpuset);
86a3475b 3618
a1e58e8e
LP
3619 c->utmp_id = mfree(c->utmp_id);
3620 c->selinux_context = mfree(c->selinux_context);
3621 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3622 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3623
8cfa775f 3624 c->syscall_filter = hashmap_free(c->syscall_filter);
525d3cc7
LP
3625 c->syscall_archs = set_free(c->syscall_archs);
3626 c->address_families = set_free(c->address_families);
e66cf1a3 3627
72fd1768 3628 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3629 c->directories[i].paths = strv_free(c->directories[i].paths);
d3070fbd
LP
3630
3631 c->log_level_max = -1;
3632
3633 exec_context_free_log_extra_fields(c);
08f3be7a
LP
3634
3635 c->stdin_data = mfree(c->stdin_data);
3636 c->stdin_data_size = 0;
e66cf1a3
LP
3637}
3638
3639int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
3640 char **i;
3641
3642 assert(c);
3643
3644 if (!runtime_prefix)
3645 return 0;
3646
3536f49e 3647 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
3648 _cleanup_free_ char *p;
3649
605405c6 3650 p = strjoin(runtime_prefix, "/", *i);
e66cf1a3
LP
3651 if (!p)
3652 return -ENOMEM;
3653
6c47cd7d 3654 /* We execute this synchronously, since we need to be sure this is gone when we start the service
e66cf1a3 3655 * next. */
c6878637 3656 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
3657 }
3658
3659 return 0;
5cb5a6ff
LP
3660}
3661
43d0fcbd
LP
3662void exec_command_done(ExecCommand *c) {
3663 assert(c);
3664
a1e58e8e 3665 c->path = mfree(c->path);
43d0fcbd 3666
6796073e 3667 c->argv = strv_free(c->argv);
43d0fcbd
LP
3668}
3669
3670void exec_command_done_array(ExecCommand *c, unsigned n) {
3671 unsigned i;
3672
3673 for (i = 0; i < n; i++)
3674 exec_command_done(c+i);
3675}
3676
f1acf85a 3677ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
3678 ExecCommand *i;
3679
3680 while ((i = c)) {
71fda00f 3681 LIST_REMOVE(command, c, i);
43d0fcbd 3682 exec_command_done(i);
5cb5a6ff
LP
3683 free(i);
3684 }
f1acf85a
ZJS
3685
3686 return NULL;
5cb5a6ff
LP
3687}
3688
034c6ed7
LP
3689void exec_command_free_array(ExecCommand **c, unsigned n) {
3690 unsigned i;
3691
f1acf85a
ZJS
3692 for (i = 0; i < n; i++)
3693 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
3694}
3695
039f0e70 3696typedef struct InvalidEnvInfo {
f2341e0a 3697 Unit *unit;
039f0e70
LP
3698 const char *path;
3699} InvalidEnvInfo;
3700
3701static void invalid_env(const char *p, void *userdata) {
3702 InvalidEnvInfo *info = userdata;
3703
f2341e0a 3704 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
3705}
3706
52c239d7
LB
3707const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3708 assert(c);
3709
3710 switch (fd_index) {
5073ff6b 3711
52c239d7
LB
3712 case STDIN_FILENO:
3713 if (c->std_input != EXEC_INPUT_NAMED_FD)
3714 return NULL;
5073ff6b 3715
52c239d7 3716 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
5073ff6b 3717
52c239d7
LB
3718 case STDOUT_FILENO:
3719 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3720 return NULL;
5073ff6b 3721
52c239d7 3722 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
5073ff6b 3723
52c239d7
LB
3724 case STDERR_FILENO:
3725 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3726 return NULL;
5073ff6b 3727
52c239d7 3728 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
5073ff6b 3729
52c239d7
LB
3730 default:
3731 return NULL;
3732 }
3733}
3734
3735int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
3736 unsigned i, targets;
56fbd561 3737 const char* stdio_fdname[3];
4c47affc 3738 unsigned n_fds;
52c239d7
LB
3739
3740 assert(c);
3741 assert(p);
3742
3743 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3744 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3745 (c->std_error == EXEC_OUTPUT_NAMED_FD);
3746
3747 for (i = 0; i < 3; i++)
3748 stdio_fdname[i] = exec_context_fdname(c, i);
3749
4c47affc
FB
3750 n_fds = p->n_storage_fds + p->n_socket_fds;
3751
3752 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
3753 if (named_iofds[STDIN_FILENO] < 0 &&
3754 c->std_input == EXEC_INPUT_NAMED_FD &&
3755 stdio_fdname[STDIN_FILENO] &&
3756 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3757
52c239d7
LB
3758 named_iofds[STDIN_FILENO] = p->fds[i];
3759 targets--;
56fbd561
ZJS
3760
3761 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3762 c->std_output == EXEC_OUTPUT_NAMED_FD &&
3763 stdio_fdname[STDOUT_FILENO] &&
3764 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3765
52c239d7
LB
3766 named_iofds[STDOUT_FILENO] = p->fds[i];
3767 targets--;
56fbd561
ZJS
3768
3769 } else if (named_iofds[STDERR_FILENO] < 0 &&
3770 c->std_error == EXEC_OUTPUT_NAMED_FD &&
3771 stdio_fdname[STDERR_FILENO] &&
3772 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3773
52c239d7
LB
3774 named_iofds[STDERR_FILENO] = p->fds[i];
3775 targets--;
3776 }
3777
56fbd561 3778 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
3779}
3780
f2341e0a 3781int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
3782 char **i, **r = NULL;
3783
3784 assert(c);
3785 assert(l);
3786
3787 STRV_FOREACH(i, c->environment_files) {
3788 char *fn;
52511fae
ZJS
3789 int k;
3790 unsigned n;
8c7be95e
LP
3791 bool ignore = false;
3792 char **p;
7fd1b19b 3793 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
3794
3795 fn = *i;
3796
3797 if (fn[0] == '-') {
3798 ignore = true;
313cefa1 3799 fn++;
8c7be95e
LP
3800 }
3801
3802 if (!path_is_absolute(fn)) {
8c7be95e
LP
3803 if (ignore)
3804 continue;
3805
3806 strv_free(r);
3807 return -EINVAL;
3808 }
3809
2bef10ab 3810 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
3811 k = safe_glob(fn, 0, &pglob);
3812 if (k < 0) {
2bef10ab
PL
3813 if (ignore)
3814 continue;
8c7be95e 3815
2bef10ab 3816 strv_free(r);
d8c92e8b 3817 return k;
2bef10ab 3818 }
8c7be95e 3819
d8c92e8b
ZJS
3820 /* When we don't match anything, -ENOENT should be returned */
3821 assert(pglob.gl_pathc > 0);
3822
3823 for (n = 0; n < pglob.gl_pathc; n++) {
717603e3 3824 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2bef10ab
PL
3825 if (k < 0) {
3826 if (ignore)
3827 continue;
8c7be95e 3828
2bef10ab 3829 strv_free(r);
2bef10ab 3830 return k;
e9c1ea9d 3831 }
ebc05a09 3832 /* Log invalid environment variables with filename */
039f0e70
LP
3833 if (p) {
3834 InvalidEnvInfo info = {
f2341e0a 3835 .unit = unit,
039f0e70
LP
3836 .path = pglob.gl_pathv[n]
3837 };
3838
3839 p = strv_env_clean_with_callback(p, invalid_env, &info);
3840 }
8c7be95e 3841
2bef10ab
PL
3842 if (r == NULL)
3843 r = p;
3844 else {
3845 char **m;
8c7be95e 3846
2bef10ab
PL
3847 m = strv_env_merge(2, r, p);
3848 strv_free(r);
3849 strv_free(p);
c84a9488 3850 if (!m)
2bef10ab 3851 return -ENOMEM;
2bef10ab
PL
3852
3853 r = m;
3854 }
8c7be95e
LP
3855 }
3856 }
3857
3858 *l = r;
3859
3860 return 0;
3861}
3862
6ac8fdc9 3863static bool tty_may_match_dev_console(const char *tty) {
e1d75803 3864 _cleanup_free_ char *active = NULL;
7d6884b6 3865 char *console;
6ac8fdc9 3866
1e22b5cd
LP
3867 if (!tty)
3868 return true;
3869
a119ec7c 3870 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
3871
3872 /* trivial identity? */
3873 if (streq(tty, "console"))
3874 return true;
3875
3876 console = resolve_dev_console(&active);
3877 /* if we could not resolve, assume it may */
3878 if (!console)
3879 return true;
3880
3881 /* "tty0" means the active VC, so it may be the same sometimes */
e1d75803 3882 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
3883}
3884
3885bool exec_context_may_touch_console(ExecContext *ec) {
1e22b5cd
LP
3886
3887 return (ec->tty_reset ||
3888 ec->tty_vhangup ||
3889 ec->tty_vt_disallocate ||
6ac8fdc9
MS
3890 is_terminal_input(ec->std_input) ||
3891 is_terminal_output(ec->std_output) ||
3892 is_terminal_output(ec->std_error)) &&
1e22b5cd 3893 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
3894}
3895
15ae422b
LP
3896static void strv_fprintf(FILE *f, char **l) {
3897 char **g;
3898
3899 assert(f);
3900
3901 STRV_FOREACH(g, l)
3902 fprintf(f, " %s", *g);
3903}
3904
5cb5a6ff 3905void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
d3070fbd 3906 ExecDirectoryType dt;
c2bbd90b 3907 char **e, **d;
94f04347 3908 unsigned i;
add00535 3909 int r;
9eba9da4 3910
5cb5a6ff
LP
3911 assert(c);
3912 assert(f);
3913
4ad49000 3914 prefix = strempty(prefix);
5cb5a6ff
LP
3915
3916 fprintf(f,
94f04347
LP
3917 "%sUMask: %04o\n"
3918 "%sWorkingDirectory: %s\n"
451a074f 3919 "%sRootDirectory: %s\n"
15ae422b 3920 "%sNonBlocking: %s\n"
64747e2d 3921 "%sPrivateTmp: %s\n"
7f112f50 3922 "%sPrivateDevices: %s\n"
59eeb84b 3923 "%sProtectKernelTunables: %s\n"
e66a2f65 3924 "%sProtectKernelModules: %s\n"
59eeb84b 3925 "%sProtectControlGroups: %s\n"
d251207d
LP
3926 "%sPrivateNetwork: %s\n"
3927 "%sPrivateUsers: %s\n"
1b8689f9
LP
3928 "%sProtectHome: %s\n"
3929 "%sProtectSystem: %s\n"
5d997827 3930 "%sMountAPIVFS: %s\n"
f3e43635 3931 "%sIgnoreSIGPIPE: %s\n"
f4170c67 3932 "%sMemoryDenyWriteExecute: %s\n"
b1edf445
LP
3933 "%sRestrictRealtime: %s\n"
3934 "%sKeyringMode: %s\n",
5cb5a6ff 3935 prefix, c->umask,
9eba9da4 3936 prefix, c->working_directory ? c->working_directory : "/",
451a074f 3937 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 3938 prefix, yes_no(c->non_blocking),
64747e2d 3939 prefix, yes_no(c->private_tmp),
7f112f50 3940 prefix, yes_no(c->private_devices),
59eeb84b 3941 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 3942 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 3943 prefix, yes_no(c->protect_control_groups),
d251207d
LP
3944 prefix, yes_no(c->private_network),
3945 prefix, yes_no(c->private_users),
1b8689f9
LP
3946 prefix, protect_home_to_string(c->protect_home),
3947 prefix, protect_system_to_string(c->protect_system),
5d997827 3948 prefix, yes_no(c->mount_apivfs),
f3e43635 3949 prefix, yes_no(c->ignore_sigpipe),
f4170c67 3950 prefix, yes_no(c->memory_deny_write_execute),
b1edf445
LP
3951 prefix, yes_no(c->restrict_realtime),
3952 prefix, exec_keyring_mode_to_string(c->keyring_mode));
fb33a393 3953
915e6d16
LP
3954 if (c->root_image)
3955 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
3956
8c7be95e
LP
3957 STRV_FOREACH(e, c->environment)
3958 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3959
3960 STRV_FOREACH(e, c->environment_files)
3961 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 3962
b4c14404
FB
3963 STRV_FOREACH(e, c->pass_environment)
3964 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3965
00819cc1
LP
3966 STRV_FOREACH(e, c->unset_environment)
3967 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
3968
53f47dfc
YW
3969 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
3970
72fd1768 3971 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
3972 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
3973
3974 STRV_FOREACH(d, c->directories[dt].paths)
3975 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
3976 }
c2bbd90b 3977
fb33a393
LP
3978 if (c->nice_set)
3979 fprintf(f,
3980 "%sNice: %i\n",
3981 prefix, c->nice);
3982
dd6c17b1 3983 if (c->oom_score_adjust_set)
fb33a393 3984 fprintf(f,
dd6c17b1
LP
3985 "%sOOMScoreAdjust: %i\n",
3986 prefix, c->oom_score_adjust);
9eba9da4 3987
94f04347 3988 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d
EV
3989 if (c->rlimit[i]) {
3990 fprintf(f, "%s%s: " RLIM_FMT "\n",
3991 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
3992 fprintf(f, "%s%sSoft: " RLIM_FMT "\n",
3993 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3994 }
94f04347 3995
f8b69d1d 3996 if (c->ioprio_set) {
1756a011 3997 _cleanup_free_ char *class_str = NULL;
f8b69d1d 3998
837df140
YW
3999 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
4000 if (r >= 0)
4001 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
4002
4003 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 4004 }
94f04347 4005
f8b69d1d 4006 if (c->cpu_sched_set) {
1756a011 4007 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 4008
837df140
YW
4009 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4010 if (r >= 0)
4011 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4012
94f04347 4013 fprintf(f,
38b48754
LP
4014 "%sCPUSchedulingPriority: %i\n"
4015 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
4016 prefix, c->cpu_sched_priority,
4017 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 4018 }
94f04347 4019
82c121a4 4020 if (c->cpuset) {
94f04347 4021 fprintf(f, "%sCPUAffinity:", prefix);
82c121a4
LP
4022 for (i = 0; i < c->cpuset_ncpus; i++)
4023 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
43a99a7a 4024 fprintf(f, " %u", i);
94f04347
LP
4025 fputs("\n", f);
4026 }
4027
3a43da28 4028 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 4029 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
4030
4031 fprintf(f,
80876c20
LP
4032 "%sStandardInput: %s\n"
4033 "%sStandardOutput: %s\n"
4034 "%sStandardError: %s\n",
4035 prefix, exec_input_to_string(c->std_input),
4036 prefix, exec_output_to_string(c->std_output),
4037 prefix, exec_output_to_string(c->std_error));
4038
befc4a80
LP
4039 if (c->std_input == EXEC_INPUT_NAMED_FD)
4040 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4041 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4042 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4043 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4044 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4045
4046 if (c->std_input == EXEC_INPUT_FILE)
4047 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4048 if (c->std_output == EXEC_OUTPUT_FILE)
4049 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
4050 if (c->std_error == EXEC_OUTPUT_FILE)
4051 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
4052
80876c20
LP
4053 if (c->tty_path)
4054 fprintf(f,
6ea832a2
LP
4055 "%sTTYPath: %s\n"
4056 "%sTTYReset: %s\n"
4057 "%sTTYVHangup: %s\n"
4058 "%sTTYVTDisallocate: %s\n",
4059 prefix, c->tty_path,
4060 prefix, yes_no(c->tty_reset),
4061 prefix, yes_no(c->tty_vhangup),
4062 prefix, yes_no(c->tty_vt_disallocate));
94f04347 4063
9f6444eb
LP
4064 if (IN_SET(c->std_output,
4065 EXEC_OUTPUT_SYSLOG,
4066 EXEC_OUTPUT_KMSG,
4067 EXEC_OUTPUT_JOURNAL,
4068 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4069 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4070 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4071 IN_SET(c->std_error,
4072 EXEC_OUTPUT_SYSLOG,
4073 EXEC_OUTPUT_KMSG,
4074 EXEC_OUTPUT_JOURNAL,
4075 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4076 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4077 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 4078
5ce70e5b 4079 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 4080
837df140
YW
4081 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4082 if (r >= 0)
4083 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 4084
837df140
YW
4085 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4086 if (r >= 0)
4087 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 4088 }
94f04347 4089
d3070fbd
LP
4090 if (c->log_level_max >= 0) {
4091 _cleanup_free_ char *t = NULL;
4092
4093 (void) log_level_to_string_alloc(c->log_level_max, &t);
4094
4095 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4096 }
4097
4098 if (c->n_log_extra_fields > 0) {
4099 size_t j;
4100
4101 for (j = 0; j < c->n_log_extra_fields; j++) {
4102 fprintf(f, "%sLogExtraFields: ", prefix);
4103 fwrite(c->log_extra_fields[j].iov_base,
4104 1, c->log_extra_fields[j].iov_len,
4105 f);
4106 fputc('\n', f);
4107 }
4108 }
4109
07d46372
YW
4110 if (c->secure_bits) {
4111 _cleanup_free_ char *str = NULL;
4112
4113 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4114 if (r >= 0)
4115 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4116 }
94f04347 4117
a103496c 4118 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4119 _cleanup_free_ char *str = NULL;
94f04347 4120
dd1f5bd0
YW
4121 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4122 if (r >= 0)
4123 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4124 }
4125
4126 if (c->capability_ambient_set != 0) {
dd1f5bd0 4127 _cleanup_free_ char *str = NULL;
755d4b67 4128
dd1f5bd0
YW
4129 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4130 if (r >= 0)
4131 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4132 }
4133
4134 if (c->user)
f2d3769a 4135 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4136 if (c->group)
f2d3769a 4137 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4138
29206d46
LP
4139 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4140
ac6e8be6 4141 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4142 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4143 strv_fprintf(f, c->supplementary_groups);
4144 fputs("\n", f);
4145 }
94f04347 4146
5b6319dc 4147 if (c->pam_name)
f2d3769a 4148 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4149
2a624c36
AP
4150 if (strv_length(c->read_write_paths) > 0) {
4151 fprintf(f, "%sReadWritePaths:", prefix);
4152 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4153 fputs("\n", f);
4154 }
4155
2a624c36
AP
4156 if (strv_length(c->read_only_paths) > 0) {
4157 fprintf(f, "%sReadOnlyPaths:", prefix);
4158 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4159 fputs("\n", f);
4160 }
94f04347 4161
2a624c36
AP
4162 if (strv_length(c->inaccessible_paths) > 0) {
4163 fprintf(f, "%sInaccessiblePaths:", prefix);
4164 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4165 fputs("\n", f);
4166 }
2e22afe9 4167
d2d6c096
LP
4168 if (c->n_bind_mounts > 0)
4169 for (i = 0; i < c->n_bind_mounts; i++) {
4170 fprintf(f, "%s%s: %s:%s:%s\n", prefix,
4171 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4172 c->bind_mounts[i].source,
4173 c->bind_mounts[i].destination,
4174 c->bind_mounts[i].recursive ? "rbind" : "norbind");
4175 }
4176
169c1bda
LP
4177 if (c->utmp_id)
4178 fprintf(f,
4179 "%sUtmpIdentifier: %s\n",
4180 prefix, c->utmp_id);
7b52a628
MS
4181
4182 if (c->selinux_context)
4183 fprintf(f,
5f8640fb
LP
4184 "%sSELinuxContext: %s%s\n",
4185 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4186
80c21aea
WC
4187 if (c->apparmor_profile)
4188 fprintf(f,
4189 "%sAppArmorProfile: %s%s\n",
4190 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4191
4192 if (c->smack_process_label)
4193 fprintf(f,
4194 "%sSmackProcessLabel: %s%s\n",
4195 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4196
050f7277 4197 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4198 fprintf(f,
4199 "%sPersonality: %s\n",
4200 prefix, strna(personality_to_string(c->personality)));
4201
78e864e5
TM
4202 fprintf(f,
4203 "%sLockPersonality: %s\n",
4204 prefix, yes_no(c->lock_personality));
4205
17df7223 4206 if (c->syscall_filter) {
349cc4a5 4207#if HAVE_SECCOMP
17df7223 4208 Iterator j;
8cfa775f 4209 void *id, *val;
17df7223 4210 bool first = true;
351a19b1 4211#endif
17df7223
LP
4212
4213 fprintf(f,
57183d11 4214 "%sSystemCallFilter: ",
17df7223
LP
4215 prefix);
4216
4217 if (!c->syscall_whitelist)
4218 fputc('~', f);
4219
349cc4a5 4220#if HAVE_SECCOMP
8cfa775f 4221 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
17df7223 4222 _cleanup_free_ char *name = NULL;
8cfa775f
YW
4223 const char *errno_name = NULL;
4224 int num = PTR_TO_INT(val);
17df7223
LP
4225
4226 if (first)
4227 first = false;
4228 else
4229 fputc(' ', f);
4230
57183d11 4231 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223 4232 fputs(strna(name), f);
8cfa775f
YW
4233
4234 if (num >= 0) {
4235 errno_name = errno_to_name(num);
4236 if (errno_name)
4237 fprintf(f, ":%s", errno_name);
4238 else
4239 fprintf(f, ":%d", num);
4240 }
17df7223 4241 }
351a19b1 4242#endif
17df7223
LP
4243
4244 fputc('\n', f);
4245 }
4246
57183d11 4247 if (c->syscall_archs) {
349cc4a5 4248#if HAVE_SECCOMP
57183d11
LP
4249 Iterator j;
4250 void *id;
4251#endif
4252
4253 fprintf(f,
4254 "%sSystemCallArchitectures:",
4255 prefix);
4256
349cc4a5 4257#if HAVE_SECCOMP
57183d11
LP
4258 SET_FOREACH(id, c->syscall_archs, j)
4259 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4260#endif
4261 fputc('\n', f);
4262 }
4263
add00535
LP
4264 if (exec_context_restrict_namespaces_set(c)) {
4265 _cleanup_free_ char *s = NULL;
4266
4267 r = namespace_flag_to_string_many(c->restrict_namespaces, &s);
4268 if (r >= 0)
4269 fprintf(f, "%sRestrictNamespaces: %s\n",
4270 prefix, s);
4271 }
4272
3df90f24
YW
4273 if (c->syscall_errno > 0) {
4274 const char *errno_name;
4275
4276 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4277
4278 errno_name = errno_to_name(c->syscall_errno);
4279 if (errno_name)
4280 fprintf(f, "%s\n", errno_name);
4281 else
4282 fprintf(f, "%d\n", c->syscall_errno);
4283 }
eef65bf3
MS
4284
4285 if (c->apparmor_profile)
4286 fprintf(f,
4287 "%sAppArmorProfile: %s%s\n",
4288 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
5cb5a6ff
LP
4289}
4290
a931ad47
LP
4291bool exec_context_maintains_privileges(ExecContext *c) {
4292 assert(c);
4293
61233823 4294 /* Returns true if the process forked off would run under
a931ad47
LP
4295 * an unchanged UID or as root. */
4296
4297 if (!c->user)
4298 return true;
4299
4300 if (streq(c->user, "root") || streq(c->user, "0"))
4301 return true;
4302
4303 return false;
4304}
4305
7f452159
LP
4306int exec_context_get_effective_ioprio(ExecContext *c) {
4307 int p;
4308
4309 assert(c);
4310
4311 if (c->ioprio_set)
4312 return c->ioprio;
4313
4314 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4315 if (p < 0)
4316 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4317
4318 return p;
4319}
4320
d3070fbd
LP
4321void exec_context_free_log_extra_fields(ExecContext *c) {
4322 size_t l;
4323
4324 assert(c);
4325
4326 for (l = 0; l < c->n_log_extra_fields; l++)
4327 free(c->log_extra_fields[l].iov_base);
4328 c->log_extra_fields = mfree(c->log_extra_fields);
4329 c->n_log_extra_fields = 0;
4330}
4331
b58b4116 4332void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 4333 assert(s);
5cb5a6ff 4334
b58b4116
LP
4335 zero(*s);
4336 s->pid = pid;
4337 dual_timestamp_get(&s->start_timestamp);
4338}
4339
6ea832a2 4340void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
4341 assert(s);
4342
0b1f4ae6 4343 if (s->pid && s->pid != pid)
b58b4116
LP
4344 zero(*s);
4345
034c6ed7 4346 s->pid = pid;
63983207 4347 dual_timestamp_get(&s->exit_timestamp);
9fb86720 4348
034c6ed7
LP
4349 s->code = code;
4350 s->status = status;
169c1bda 4351
6ea832a2
LP
4352 if (context) {
4353 if (context->utmp_id)
4354 utmp_put_dead_process(context->utmp_id, pid, code, status);
4355
1e22b5cd 4356 exec_context_tty_reset(context, NULL);
6ea832a2 4357 }
9fb86720
LP
4358}
4359
4360void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
4361 char buf[FORMAT_TIMESTAMP_MAX];
4362
4363 assert(s);
4364 assert(f);
4365
9fb86720
LP
4366 if (s->pid <= 0)
4367 return;
4368
4c940960
LP
4369 prefix = strempty(prefix);
4370
9fb86720 4371 fprintf(f,
ccd06097
ZJS
4372 "%sPID: "PID_FMT"\n",
4373 prefix, s->pid);
9fb86720 4374
af9d16e1 4375 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
4376 fprintf(f,
4377 "%sStart Timestamp: %s\n",
63983207 4378 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 4379
af9d16e1 4380 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
4381 fprintf(f,
4382 "%sExit Timestamp: %s\n"
4383 "%sExit Code: %s\n"
4384 "%sExit Status: %i\n",
63983207 4385 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
4386 prefix, sigchld_code_to_string(s->code),
4387 prefix, s->status);
5cb5a6ff 4388}
44d8db9e 4389
9e2f7c11 4390char *exec_command_line(char **argv) {
44d8db9e
LP
4391 size_t k;
4392 char *n, *p, **a;
4393 bool first = true;
4394
9e2f7c11 4395 assert(argv);
44d8db9e 4396
9164977d 4397 k = 1;
9e2f7c11 4398 STRV_FOREACH(a, argv)
44d8db9e
LP
4399 k += strlen(*a)+3;
4400
5cd9cd35
LP
4401 n = new(char, k);
4402 if (!n)
44d8db9e
LP
4403 return NULL;
4404
4405 p = n;
9e2f7c11 4406 STRV_FOREACH(a, argv) {
44d8db9e
LP
4407
4408 if (!first)
4409 *(p++) = ' ';
4410 else
4411 first = false;
4412
4413 if (strpbrk(*a, WHITESPACE)) {
4414 *(p++) = '\'';
4415 p = stpcpy(p, *a);
4416 *(p++) = '\'';
4417 } else
4418 p = stpcpy(p, *a);
4419
4420 }
4421
9164977d
LP
4422 *p = 0;
4423
44d8db9e
LP
4424 /* FIXME: this doesn't really handle arguments that have
4425 * spaces and ticks in them */
4426
4427 return n;
4428}
4429
4430void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 4431 _cleanup_free_ char *cmd = NULL;
4c940960 4432 const char *prefix2;
44d8db9e
LP
4433
4434 assert(c);
4435 assert(f);
4436
4c940960 4437 prefix = strempty(prefix);
63c372cb 4438 prefix2 = strjoina(prefix, "\t");
44d8db9e 4439
9e2f7c11 4440 cmd = exec_command_line(c->argv);
44d8db9e
LP
4441 fprintf(f,
4442 "%sCommand Line: %s\n",
4443 prefix, cmd ? cmd : strerror(ENOMEM));
4444
9fb86720 4445 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
4446}
4447
4448void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4449 assert(f);
4450
4c940960 4451 prefix = strempty(prefix);
44d8db9e
LP
4452
4453 LIST_FOREACH(command, c, c)
4454 exec_command_dump(c, f, prefix);
4455}
94f04347 4456
a6a80b4f
LP
4457void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4458 ExecCommand *end;
4459
4460 assert(l);
4461 assert(e);
4462
4463 if (*l) {
35b8ca3a 4464 /* It's kind of important, that we keep the order here */
71fda00f
LP
4465 LIST_FIND_TAIL(command, *l, end);
4466 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
4467 } else
4468 *l = e;
4469}
4470
26fd040d
LP
4471int exec_command_set(ExecCommand *c, const char *path, ...) {
4472 va_list ap;
4473 char **l, *p;
4474
4475 assert(c);
4476 assert(path);
4477
4478 va_start(ap, path);
4479 l = strv_new_ap(path, ap);
4480 va_end(ap);
4481
4482 if (!l)
4483 return -ENOMEM;
4484
250a918d
LP
4485 p = strdup(path);
4486 if (!p) {
26fd040d
LP
4487 strv_free(l);
4488 return -ENOMEM;
4489 }
4490
4491 free(c->path);
4492 c->path = p;
4493
4494 strv_free(c->argv);
4495 c->argv = l;
4496
4497 return 0;
4498}
4499
86b23b07 4500int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 4501 _cleanup_strv_free_ char **l = NULL;
86b23b07 4502 va_list ap;
86b23b07
JS
4503 int r;
4504
4505 assert(c);
4506 assert(path);
4507
4508 va_start(ap, path);
4509 l = strv_new_ap(path, ap);
4510 va_end(ap);
4511
4512 if (!l)
4513 return -ENOMEM;
4514
e287086b 4515 r = strv_extend_strv(&c->argv, l, false);
e63ff941 4516 if (r < 0)
86b23b07 4517 return r;
86b23b07
JS
4518
4519 return 0;
4520}
4521
4522
613b411c
LP
4523static int exec_runtime_allocate(ExecRuntime **rt) {
4524
4525 if (*rt)
4526 return 0;
4527
4528 *rt = new0(ExecRuntime, 1);
f146f5e1 4529 if (!*rt)
613b411c
LP
4530 return -ENOMEM;
4531
4532 (*rt)->n_ref = 1;
4533 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
4534
4535 return 0;
4536}
4537
4538int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
4539 int r;
4540
4541 assert(rt);
4542 assert(c);
4543 assert(id);
4544
4545 if (*rt)
4546 return 1;
4547
4548 if (!c->private_network && !c->private_tmp)
4549 return 0;
4550
4551 r = exec_runtime_allocate(rt);
4552 if (r < 0)
4553 return r;
4554
4555 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
33df919d 4556 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
613b411c
LP
4557 return -errno;
4558 }
4559
4560 if (c->private_tmp && !(*rt)->tmp_dir) {
4561 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
4562 if (r < 0)
4563 return r;
4564 }
4565
4566 return 1;
4567}
4568
4569ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
4570 assert(r);
4571 assert(r->n_ref > 0);
4572
4573 r->n_ref++;
4574 return r;
4575}
4576
4577ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
4578
4579 if (!r)
4580 return NULL;
4581
4582 assert(r->n_ref > 0);
4583
4584 r->n_ref--;
f2341e0a
LP
4585 if (r->n_ref > 0)
4586 return NULL;
4587
4588 free(r->tmp_dir);
4589 free(r->var_tmp_dir);
4590 safe_close_pair(r->netns_storage_socket);
6b430fdb 4591 return mfree(r);
613b411c
LP
4592}
4593
f2341e0a 4594int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
613b411c
LP
4595 assert(u);
4596 assert(f);
4597 assert(fds);
4598
4599 if (!rt)
4600 return 0;
4601
4602 if (rt->tmp_dir)
4603 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
4604
4605 if (rt->var_tmp_dir)
4606 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
4607
4608 if (rt->netns_storage_socket[0] >= 0) {
4609 int copy;
4610
4611 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4612 if (copy < 0)
4613 return copy;
4614
4615 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
4616 }
4617
4618 if (rt->netns_storage_socket[1] >= 0) {
4619 int copy;
4620
4621 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4622 if (copy < 0)
4623 return copy;
4624
4625 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
4626 }
4627
4628 return 0;
4629}
4630
f2341e0a 4631int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
613b411c
LP
4632 int r;
4633
4634 assert(rt);
4635 assert(key);
4636 assert(value);
4637
4638 if (streq(key, "tmp-dir")) {
4639 char *copy;
4640
4641 r = exec_runtime_allocate(rt);
4642 if (r < 0)
f2341e0a 4643 return log_oom();
613b411c
LP
4644
4645 copy = strdup(value);
4646 if (!copy)
4647 return log_oom();
4648
4649 free((*rt)->tmp_dir);
4650 (*rt)->tmp_dir = copy;
4651
4652 } else if (streq(key, "var-tmp-dir")) {
4653 char *copy;
4654
4655 r = exec_runtime_allocate(rt);
4656 if (r < 0)
f2341e0a 4657 return log_oom();
613b411c
LP
4658
4659 copy = strdup(value);
4660 if (!copy)
4661 return log_oom();
4662
4663 free((*rt)->var_tmp_dir);
4664 (*rt)->var_tmp_dir = copy;
4665
4666 } else if (streq(key, "netns-socket-0")) {
4667 int fd;
4668
4669 r = exec_runtime_allocate(rt);
4670 if (r < 0)
f2341e0a 4671 return log_oom();
613b411c
LP
4672
4673 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 4674 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 4675 else {
03e334a1 4676 safe_close((*rt)->netns_storage_socket[0]);
613b411c
LP
4677 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
4678 }
4679 } else if (streq(key, "netns-socket-1")) {
4680 int fd;
4681
4682 r = exec_runtime_allocate(rt);
4683 if (r < 0)
f2341e0a 4684 return log_oom();
613b411c
LP
4685
4686 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 4687 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 4688 else {
03e334a1 4689 safe_close((*rt)->netns_storage_socket[1]);
613b411c
LP
4690 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
4691 }
4692 } else
4693 return 0;
4694
4695 return 1;
4696}
4697
4698static void *remove_tmpdir_thread(void *p) {
4699 _cleanup_free_ char *path = p;
4700
c6878637 4701 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
613b411c
LP
4702 return NULL;
4703}
4704
4705void exec_runtime_destroy(ExecRuntime *rt) {
98b47d54
LP
4706 int r;
4707
613b411c
LP
4708 if (!rt)
4709 return;
4710
4711 /* If there are multiple users of this, let's leave the stuff around */
4712 if (rt->n_ref > 1)
4713 return;
4714
4715 if (rt->tmp_dir) {
4716 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
98b47d54
LP
4717
4718 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4719 if (r < 0) {
da927ba9 4720 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
98b47d54
LP
4721 free(rt->tmp_dir);
4722 }
4723
613b411c
LP
4724 rt->tmp_dir = NULL;
4725 }
4726
4727 if (rt->var_tmp_dir) {
4728 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
98b47d54
LP
4729
4730 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4731 if (r < 0) {
da927ba9 4732 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
98b47d54
LP
4733 free(rt->var_tmp_dir);
4734 }
4735
613b411c
LP
4736 rt->var_tmp_dir = NULL;
4737 }
4738
3d94f76c 4739 safe_close_pair(rt->netns_storage_socket);
613b411c
LP
4740}
4741
80876c20
LP
4742static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
4743 [EXEC_INPUT_NULL] = "null",
4744 [EXEC_INPUT_TTY] = "tty",
4745 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 4746 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
4747 [EXEC_INPUT_SOCKET] = "socket",
4748 [EXEC_INPUT_NAMED_FD] = "fd",
08f3be7a 4749 [EXEC_INPUT_DATA] = "data",
2038c3f5 4750 [EXEC_INPUT_FILE] = "file",
80876c20
LP
4751};
4752
8a0867d6
LP
4753DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4754
94f04347 4755static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 4756 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 4757 [EXEC_OUTPUT_NULL] = "null",
80876c20 4758 [EXEC_OUTPUT_TTY] = "tty",
94f04347 4759 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 4760 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 4761 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 4762 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
4763 [EXEC_OUTPUT_JOURNAL] = "journal",
4764 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
4765 [EXEC_OUTPUT_SOCKET] = "socket",
4766 [EXEC_OUTPUT_NAMED_FD] = "fd",
2038c3f5 4767 [EXEC_OUTPUT_FILE] = "file",
94f04347
LP
4768};
4769
4770DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
4771
4772static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4773 [EXEC_UTMP_INIT] = "init",
4774 [EXEC_UTMP_LOGIN] = "login",
4775 [EXEC_UTMP_USER] = "user",
4776};
4777
4778DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
4779
4780static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
4781 [EXEC_PRESERVE_NO] = "no",
4782 [EXEC_PRESERVE_YES] = "yes",
4783 [EXEC_PRESERVE_RESTART] = "restart",
4784};
4785
4786DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 4787
72fd1768 4788static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
4789 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
4790 [EXEC_DIRECTORY_STATE] = "StateDirectory",
4791 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
4792 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
4793 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
4794};
4795
4796DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445
LP
4797
4798static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
4799 [EXEC_KEYRING_INHERIT] = "inherit",
4800 [EXEC_KEYRING_PRIVATE] = "private",
4801 [EXEC_KEYRING_SHARED] = "shared",
4802};
4803
4804DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);