]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
namespace: fall back gracefully when kernel doesn't support network namespaces (...
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
a7334b09
LP
1/***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
a7334b09
LP
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 14 Lesser General Public License for more details.
a7334b09 15
5430f7f2 16 You should have received a copy of the GNU Lesser General Public License
a7334b09
LP
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
034c6ed7
LP
20#include <errno.h>
21#include <fcntl.h>
8dd4c05b
LP
22#include <glob.h>
23#include <grp.h>
24#include <poll.h>
309bff19 25#include <signal.h>
8dd4c05b 26#include <string.h>
19c0b0b9 27#include <sys/capability.h>
d251207d 28#include <sys/eventfd.h>
f3e43635 29#include <sys/mman.h>
8dd4c05b 30#include <sys/personality.h>
94f04347 31#include <sys/prctl.h>
d2ffa389 32#include <sys/shm.h>
8dd4c05b 33#include <sys/socket.h>
451a074f 34#include <sys/stat.h>
d2ffa389 35#include <sys/types.h>
8dd4c05b
LP
36#include <sys/un.h>
37#include <unistd.h>
023a4f67 38#include <utmpx.h>
5cb5a6ff 39
349cc4a5 40#if HAVE_PAM
5b6319dc
LP
41#include <security/pam_appl.h>
42#endif
43
349cc4a5 44#if HAVE_SELINUX
7b52a628
MS
45#include <selinux/selinux.h>
46#endif
47
349cc4a5 48#if HAVE_SECCOMP
17df7223
LP
49#include <seccomp.h>
50#endif
51
349cc4a5 52#if HAVE_APPARMOR
eef65bf3
MS
53#include <sys/apparmor.h>
54#endif
55
24882e06 56#include "sd-messages.h"
8dd4c05b
LP
57
58#include "af-list.h"
b5efdb8a 59#include "alloc-util.h"
349cc4a5 60#if HAVE_APPARMOR
3ffd4af2
LP
61#include "apparmor-util.h"
62#endif
8dd4c05b
LP
63#include "async.h"
64#include "barrier.h"
8dd4c05b 65#include "cap-list.h"
430f0182 66#include "capability-util.h"
a1164ae3 67#include "chown-recursive.h"
f6a6225e 68#include "def.h"
4d1a6904 69#include "env-util.h"
17df7223 70#include "errno-list.h"
3ffd4af2 71#include "execute.h"
8dd4c05b 72#include "exit-status.h"
3ffd4af2 73#include "fd-util.h"
8dd4c05b 74#include "fileio.h"
f97b34a6 75#include "format-util.h"
f4f15635 76#include "fs-util.h"
7d50b32a 77#include "glob-util.h"
c004493c 78#include "io-util.h"
8dd4c05b 79#include "ioprio.h"
a1164ae3 80#include "label.h"
8dd4c05b
LP
81#include "log.h"
82#include "macro.h"
83#include "missing.h"
84#include "mkdir.h"
85#include "namespace.h"
6bedfcbb 86#include "parse-util.h"
8dd4c05b 87#include "path-util.h"
0b452006 88#include "process-util.h"
78f22b97 89#include "rlimit-util.h"
8dd4c05b 90#include "rm-rf.h"
349cc4a5 91#if HAVE_SECCOMP
3ffd4af2
LP
92#include "seccomp-util.h"
93#endif
8dd4c05b 94#include "securebits.h"
07d46372 95#include "securebits-util.h"
8dd4c05b 96#include "selinux-util.h"
24882e06 97#include "signal-util.h"
8dd4c05b 98#include "smack-util.h"
fd63e712 99#include "special.h"
8b43440b 100#include "string-table.h"
07630cea 101#include "string-util.h"
8dd4c05b 102#include "strv.h"
7ccbd1ae 103#include "syslog-util.h"
8dd4c05b
LP
104#include "terminal-util.h"
105#include "unit.h"
b1d4f8e1 106#include "user-util.h"
8dd4c05b
LP
107#include "util.h"
108#include "utmp-wtmp.h"
5cb5a6ff 109
e056b01d 110#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 111#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 112
02a51aba
LP
113/* This assumes there is a 'tty' group */
114#define TTY_MODE 0620
115
531dca78
LP
116#define SNDBUF_SIZE (8*1024*1024)
117
034c6ed7
LP
118static int shift_fds(int fds[], unsigned n_fds) {
119 int start, restart_from;
120
121 if (n_fds <= 0)
122 return 0;
123
a0d40ac5
LP
124 /* Modifies the fds array! (sorts it) */
125
034c6ed7
LP
126 assert(fds);
127
128 start = 0;
129 for (;;) {
130 int i;
131
132 restart_from = -1;
133
134 for (i = start; i < (int) n_fds; i++) {
135 int nfd;
136
137 /* Already at right index? */
138 if (fds[i] == i+3)
139 continue;
140
3cc2aff1
LP
141 nfd = fcntl(fds[i], F_DUPFD, i + 3);
142 if (nfd < 0)
034c6ed7
LP
143 return -errno;
144
03e334a1 145 safe_close(fds[i]);
034c6ed7
LP
146 fds[i] = nfd;
147
148 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 149 * let's remember that and try again from here */
034c6ed7
LP
150 if (nfd != i+3 && restart_from < 0)
151 restart_from = i;
152 }
153
154 if (restart_from < 0)
155 break;
156
157 start = restart_from;
158 }
159
160 return 0;
161}
162
4c47affc
FB
163static int flags_fds(const int fds[], unsigned n_storage_fds, unsigned n_socket_fds, bool nonblock) {
164 unsigned i, n_fds;
e2c76839 165 int r;
47a71eed 166
4c47affc 167 n_fds = n_storage_fds + n_socket_fds;
47a71eed
LP
168 if (n_fds <= 0)
169 return 0;
170
171 assert(fds);
172
9b141911
FB
173 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
174 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
175
176 for (i = 0; i < n_fds; i++) {
47a71eed 177
9b141911
FB
178 if (i < n_socket_fds) {
179 r = fd_nonblock(fds[i], nonblock);
180 if (r < 0)
181 return r;
182 }
47a71eed 183
451a074f
LP
184 /* We unconditionally drop FD_CLOEXEC from the fds,
185 * since after all we want to pass these fds to our
186 * children */
47a71eed 187
3cc2aff1
LP
188 r = fd_cloexec(fds[i], false);
189 if (r < 0)
e2c76839 190 return r;
47a71eed
LP
191 }
192
193 return 0;
194}
195
1e22b5cd 196static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
197 assert(context);
198
1e22b5cd
LP
199 if (context->stdio_as_fds)
200 return NULL;
201
80876c20
LP
202 if (context->tty_path)
203 return context->tty_path;
204
205 return "/dev/console";
206}
207
1e22b5cd
LP
208static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
209 const char *path;
210
6ea832a2
LP
211 assert(context);
212
1e22b5cd 213 path = exec_context_tty_path(context);
6ea832a2 214
1e22b5cd
LP
215 if (context->tty_vhangup) {
216 if (p && p->stdin_fd >= 0)
217 (void) terminal_vhangup_fd(p->stdin_fd);
218 else if (path)
219 (void) terminal_vhangup(path);
220 }
6ea832a2 221
1e22b5cd
LP
222 if (context->tty_reset) {
223 if (p && p->stdin_fd >= 0)
224 (void) reset_terminal_fd(p->stdin_fd, true);
225 else if (path)
226 (void) reset_terminal(path);
227 }
228
229 if (context->tty_vt_disallocate && path)
230 (void) vt_disallocate(path);
6ea832a2
LP
231}
232
6af760f3
LP
233static bool is_terminal_input(ExecInput i) {
234 return IN_SET(i,
235 EXEC_INPUT_TTY,
236 EXEC_INPUT_TTY_FORCE,
237 EXEC_INPUT_TTY_FAIL);
238}
239
3a1286b6 240static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
241 return IN_SET(o,
242 EXEC_OUTPUT_TTY,
243 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
244 EXEC_OUTPUT_KMSG_AND_CONSOLE,
245 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
246}
247
aac8c0c3
LP
248static bool is_syslog_output(ExecOutput o) {
249 return IN_SET(o,
250 EXEC_OUTPUT_SYSLOG,
251 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
252}
253
254static bool is_kmsg_output(ExecOutput o) {
255 return IN_SET(o,
256 EXEC_OUTPUT_KMSG,
257 EXEC_OUTPUT_KMSG_AND_CONSOLE);
258}
259
6af760f3
LP
260static bool exec_context_needs_term(const ExecContext *c) {
261 assert(c);
262
263 /* Return true if the execution context suggests we should set $TERM to something useful. */
264
265 if (is_terminal_input(c->std_input))
266 return true;
267
268 if (is_terminal_output(c->std_output))
269 return true;
270
271 if (is_terminal_output(c->std_error))
272 return true;
273
274 return !!c->tty_path;
3a1286b6
MS
275}
276
80876c20
LP
277static int open_null_as(int flags, int nfd) {
278 int fd, r;
071830ff 279
80876c20 280 assert(nfd >= 0);
071830ff 281
613b411c
LP
282 fd = open("/dev/null", flags|O_NOCTTY);
283 if (fd < 0)
071830ff
LP
284 return -errno;
285
80876c20
LP
286 if (fd != nfd) {
287 r = dup2(fd, nfd) < 0 ? -errno : nfd;
03e334a1 288 safe_close(fd);
80876c20
LP
289 } else
290 r = nfd;
071830ff 291
80876c20 292 return r;
071830ff
LP
293}
294
524daa8c 295static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 296 static const union sockaddr_union sa = {
b92bea5d
ZJS
297 .un.sun_family = AF_UNIX,
298 .un.sun_path = "/run/systemd/journal/stdout",
299 };
524daa8c
ZJS
300 uid_t olduid = UID_INVALID;
301 gid_t oldgid = GID_INVALID;
302 int r;
303
cad93f29 304 if (gid_is_valid(gid)) {
524daa8c
ZJS
305 oldgid = getgid();
306
92a17af9 307 if (setegid(gid) < 0)
524daa8c
ZJS
308 return -errno;
309 }
310
cad93f29 311 if (uid_is_valid(uid)) {
524daa8c
ZJS
312 olduid = getuid();
313
92a17af9 314 if (seteuid(uid) < 0) {
524daa8c
ZJS
315 r = -errno;
316 goto restore_gid;
317 }
318 }
319
92a17af9 320 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
321
322 /* If we fail to restore the uid or gid, things will likely
323 fail later on. This should only happen if an LSM interferes. */
324
cad93f29 325 if (uid_is_valid(uid))
524daa8c
ZJS
326 (void) seteuid(olduid);
327
328 restore_gid:
cad93f29 329 if (gid_is_valid(gid))
524daa8c
ZJS
330 (void) setegid(oldgid);
331
332 return r;
333}
334
fd1f9c89 335static int connect_logger_as(
7a1ab780 336 Unit *unit,
fd1f9c89 337 const ExecContext *context,
af635cf3 338 const ExecParameters *params,
fd1f9c89
LP
339 ExecOutput output,
340 const char *ident,
fd1f9c89
LP
341 int nfd,
342 uid_t uid,
343 gid_t gid) {
344
524daa8c 345 int fd, r;
071830ff
LP
346
347 assert(context);
af635cf3 348 assert(params);
80876c20
LP
349 assert(output < _EXEC_OUTPUT_MAX);
350 assert(ident);
351 assert(nfd >= 0);
071830ff 352
54fe0cdb
LP
353 fd = socket(AF_UNIX, SOCK_STREAM, 0);
354 if (fd < 0)
80876c20 355 return -errno;
071830ff 356
524daa8c
ZJS
357 r = connect_journal_socket(fd, uid, gid);
358 if (r < 0)
359 return r;
071830ff 360
80876c20 361 if (shutdown(fd, SHUT_RD) < 0) {
03e334a1 362 safe_close(fd);
80876c20
LP
363 return -errno;
364 }
071830ff 365
fd1f9c89 366 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 367
80876c20 368 dprintf(fd,
62bca2c6 369 "%s\n"
80876c20
LP
370 "%s\n"
371 "%i\n"
54fe0cdb
LP
372 "%i\n"
373 "%i\n"
374 "%i\n"
4f4a1dbf 375 "%i\n",
c867611e 376 context->syslog_identifier ?: ident,
af635cf3 377 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
378 context->syslog_priority,
379 !!context->syslog_level_prefix,
aac8c0c3
LP
380 is_syslog_output(output),
381 is_kmsg_output(output),
3a1286b6 382 is_terminal_output(output));
80876c20 383
fd1f9c89
LP
384 if (fd == nfd)
385 return nfd;
386
387 r = dup2(fd, nfd) < 0 ? -errno : nfd;
388 safe_close(fd);
071830ff 389
80876c20
LP
390 return r;
391}
392static int open_terminal_as(const char *path, mode_t mode, int nfd) {
393 int fd, r;
071830ff 394
80876c20
LP
395 assert(path);
396 assert(nfd >= 0);
071830ff 397
3cc2aff1
LP
398 fd = open_terminal(path, mode | O_NOCTTY);
399 if (fd < 0)
80876c20 400 return fd;
071830ff 401
80876c20
LP
402 if (fd != nfd) {
403 r = dup2(fd, nfd) < 0 ? -errno : nfd;
03e334a1 404 safe_close(fd);
80876c20
LP
405 } else
406 r = nfd;
071830ff 407
80876c20
LP
408 return r;
409}
071830ff 410
1e3ad081
LP
411static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
412
413 if (is_terminal_input(std_input) && !apply_tty_stdin)
414 return EXEC_INPUT_NULL;
071830ff 415
03fd9c49 416 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
417 return EXEC_INPUT_NULL;
418
03fd9c49 419 return std_input;
4f2d528d
LP
420}
421
03fd9c49 422static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 423
03fd9c49 424 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
425 return EXEC_OUTPUT_INHERIT;
426
03fd9c49 427 return std_output;
4f2d528d
LP
428}
429
a34ceba6
LP
430static int setup_input(
431 const ExecContext *context,
432 const ExecParameters *params,
52c239d7
LB
433 int socket_fd,
434 int named_iofds[3]) {
a34ceba6 435
4f2d528d
LP
436 ExecInput i;
437
438 assert(context);
a34ceba6
LP
439 assert(params);
440
441 if (params->stdin_fd >= 0) {
442 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
443 return -errno;
444
445 /* Try to make this the controlling tty, if it is a tty, and reset it */
446 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
447 (void) reset_terminal_fd(STDIN_FILENO, true);
448
449 return STDIN_FILENO;
450 }
4f2d528d 451
c39f1ce2 452 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
453
454 switch (i) {
071830ff 455
80876c20
LP
456 case EXEC_INPUT_NULL:
457 return open_null_as(O_RDONLY, STDIN_FILENO);
458
459 case EXEC_INPUT_TTY:
460 case EXEC_INPUT_TTY_FORCE:
461 case EXEC_INPUT_TTY_FAIL: {
462 int fd, r;
071830ff 463
1e22b5cd 464 fd = acquire_terminal(exec_context_tty_path(context),
970edce6
ZJS
465 i == EXEC_INPUT_TTY_FAIL,
466 i == EXEC_INPUT_TTY_FORCE,
467 false,
3a43da28 468 USEC_INFINITY);
970edce6 469 if (fd < 0)
80876c20
LP
470 return fd;
471
472 if (fd != STDIN_FILENO) {
473 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
03e334a1 474 safe_close(fd);
80876c20
LP
475 } else
476 r = STDIN_FILENO;
477
478 return r;
479 }
480
4f2d528d
LP
481 case EXEC_INPUT_SOCKET:
482 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
483
52c239d7
LB
484 case EXEC_INPUT_NAMED_FD:
485 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
486 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
487
80876c20
LP
488 default:
489 assert_not_reached("Unknown input type");
490 }
491}
492
a34ceba6
LP
493static int setup_output(
494 Unit *unit,
495 const ExecContext *context,
496 const ExecParameters *params,
497 int fileno,
498 int socket_fd,
52c239d7 499 int named_iofds[3],
a34ceba6 500 const char *ident,
7bce046b
LP
501 uid_t uid,
502 gid_t gid,
503 dev_t *journal_stream_dev,
504 ino_t *journal_stream_ino) {
a34ceba6 505
4f2d528d
LP
506 ExecOutput o;
507 ExecInput i;
47c1d80d 508 int r;
4f2d528d 509
f2341e0a 510 assert(unit);
80876c20 511 assert(context);
a34ceba6 512 assert(params);
80876c20 513 assert(ident);
7bce046b
LP
514 assert(journal_stream_dev);
515 assert(journal_stream_ino);
80876c20 516
a34ceba6
LP
517 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
518
519 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
520 return -errno;
521
522 return STDOUT_FILENO;
523 }
524
525 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
526 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
527 return -errno;
528
529 return STDERR_FILENO;
530 }
531
c39f1ce2 532 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 533 o = fixup_output(context->std_output, socket_fd);
4f2d528d 534
eb17e935
MS
535 if (fileno == STDERR_FILENO) {
536 ExecOutput e;
537 e = fixup_output(context->std_error, socket_fd);
80876c20 538
eb17e935
MS
539 /* This expects the input and output are already set up */
540
541 /* Don't change the stderr file descriptor if we inherit all
542 * the way and are not on a tty */
543 if (e == EXEC_OUTPUT_INHERIT &&
544 o == EXEC_OUTPUT_INHERIT &&
545 i == EXEC_INPUT_NULL &&
546 !is_terminal_input(context->std_input) &&
547 getppid () != 1)
548 return fileno;
549
550 /* Duplicate from stdout if possible */
52c239d7 551 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
eb17e935 552 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 553
eb17e935 554 o = e;
80876c20 555
eb17e935 556 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
557 /* If input got downgraded, inherit the original value */
558 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 559 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 560
acb591e4 561 /* If the input is connected to anything that's not a /dev/null, inherit that... */
ff876e28 562 if (i != EXEC_INPUT_NULL)
eb17e935 563 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 564
acb591e4
LP
565 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
566 if (getppid() != 1)
eb17e935 567 return fileno;
94f04347 568
eb17e935
MS
569 /* We need to open /dev/null here anew, to get the right access mode. */
570 return open_null_as(O_WRONLY, fileno);
071830ff 571 }
94f04347 572
eb17e935 573 switch (o) {
80876c20
LP
574
575 case EXEC_OUTPUT_NULL:
eb17e935 576 return open_null_as(O_WRONLY, fileno);
80876c20
LP
577
578 case EXEC_OUTPUT_TTY:
4f2d528d 579 if (is_terminal_input(i))
eb17e935 580 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
581
582 /* We don't reset the terminal if this is just about output */
1e22b5cd 583 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
584
585 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 586 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 587 case EXEC_OUTPUT_KMSG:
28dbc1e8 588 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
589 case EXEC_OUTPUT_JOURNAL:
590 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 591 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 592 if (r < 0) {
82677ae4 593 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 594 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
595 } else {
596 struct stat st;
597
598 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
599 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
600 * services to detect whether they are connected to the journal or not.
601 *
602 * If both stdout and stderr are connected to a stream then let's make sure to store the data
603 * about STDERR as that's usually the best way to do logging. */
7bce046b 604
ab2116b1
LP
605 if (fstat(fileno, &st) >= 0 &&
606 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
607 *journal_stream_dev = st.st_dev;
608 *journal_stream_ino = st.st_ino;
609 }
47c1d80d
MS
610 }
611 return r;
4f2d528d
LP
612
613 case EXEC_OUTPUT_SOCKET:
614 assert(socket_fd >= 0);
eb17e935 615 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 616
52c239d7
LB
617 case EXEC_OUTPUT_NAMED_FD:
618 (void) fd_nonblock(named_iofds[fileno], false);
619 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
620
94f04347 621 default:
80876c20 622 assert_not_reached("Unknown error type");
94f04347 623 }
071830ff
LP
624}
625
02a51aba
LP
626static int chown_terminal(int fd, uid_t uid) {
627 struct stat st;
628
629 assert(fd >= 0);
02a51aba 630
1ff74fb6
LP
631 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
632 if (isatty(fd) < 1)
633 return 0;
634
02a51aba 635 /* This might fail. What matters are the results. */
bab45044
LP
636 (void) fchown(fd, uid, -1);
637 (void) fchmod(fd, TTY_MODE);
02a51aba
LP
638
639 if (fstat(fd, &st) < 0)
640 return -errno;
641
d8b4e2e9 642 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
02a51aba
LP
643 return -EPERM;
644
645 return 0;
646}
647
7d5ceb64 648static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
649 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
650 int r;
80876c20 651
80876c20
LP
652 assert(_saved_stdin);
653 assert(_saved_stdout);
654
af6da548
LP
655 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
656 if (saved_stdin < 0)
657 return -errno;
80876c20 658
af6da548 659 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
660 if (saved_stdout < 0)
661 return -errno;
80876c20 662
7d5ceb64 663 fd = acquire_terminal(vc, false, false, false, DEFAULT_CONFIRM_USEC);
3d18b167
LP
664 if (fd < 0)
665 return fd;
80876c20 666
af6da548
LP
667 r = chown_terminal(fd, getuid());
668 if (r < 0)
3d18b167 669 return r;
02a51aba 670
3d18b167
LP
671 r = reset_terminal_fd(fd, true);
672 if (r < 0)
673 return r;
80876c20 674
3d18b167
LP
675 if (dup2(fd, STDIN_FILENO) < 0)
676 return -errno;
677
678 if (dup2(fd, STDOUT_FILENO) < 0)
679 return -errno;
80876c20
LP
680
681 if (fd >= 2)
03e334a1 682 safe_close(fd);
3d18b167 683 fd = -1;
80876c20
LP
684
685 *_saved_stdin = saved_stdin;
686 *_saved_stdout = saved_stdout;
687
3d18b167 688 saved_stdin = saved_stdout = -1;
80876c20 689
3d18b167 690 return 0;
80876c20
LP
691}
692
63d77c92 693static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
694 assert(err < 0);
695
696 if (err == -ETIMEDOUT)
63d77c92 697 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
698 else {
699 errno = -err;
63d77c92 700 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
701 }
702}
703
63d77c92 704static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 705 _cleanup_close_ int fd = -1;
80876c20 706
3b20f877 707 assert(vc);
80876c20 708
7d5ceb64 709 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 710 if (fd < 0)
3b20f877 711 return;
80876c20 712
63d77c92 713 write_confirm_error_fd(err, fd, u);
af6da548 714}
80876c20 715
3d18b167 716static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 717 int r = 0;
80876c20 718
af6da548
LP
719 assert(saved_stdin);
720 assert(saved_stdout);
721
722 release_terminal();
723
724 if (*saved_stdin >= 0)
80876c20 725 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 726 r = -errno;
80876c20 727
af6da548 728 if (*saved_stdout >= 0)
80876c20 729 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 730 r = -errno;
80876c20 731
3d18b167
LP
732 *saved_stdin = safe_close(*saved_stdin);
733 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
734
735 return r;
736}
737
3b20f877
FB
738enum {
739 CONFIRM_PRETEND_FAILURE = -1,
740 CONFIRM_PRETEND_SUCCESS = 0,
741 CONFIRM_EXECUTE = 1,
742};
743
eedf223a 744static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 745 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 746 _cleanup_free_ char *e = NULL;
3b20f877 747 char c;
af6da548 748
3b20f877 749 /* For any internal errors, assume a positive response. */
7d5ceb64 750 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 751 if (r < 0) {
63d77c92 752 write_confirm_error(r, vc, u);
3b20f877
FB
753 return CONFIRM_EXECUTE;
754 }
af6da548 755
b0eb2944
FB
756 /* confirm_spawn might have been disabled while we were sleeping. */
757 if (manager_is_confirm_spawn_disabled(u->manager)) {
758 r = 1;
759 goto restore_stdio;
760 }
af6da548 761
2bcd3c26
FB
762 e = ellipsize(cmdline, 60, 100);
763 if (!e) {
764 log_oom();
765 r = CONFIRM_EXECUTE;
766 goto restore_stdio;
767 }
af6da548 768
d172b175 769 for (;;) {
539622bd 770 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 771 if (r < 0) {
63d77c92 772 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
773 r = CONFIRM_EXECUTE;
774 goto restore_stdio;
775 }
af6da548 776
d172b175 777 switch (c) {
b0eb2944
FB
778 case 'c':
779 printf("Resuming normal execution.\n");
780 manager_disable_confirm_spawn();
781 r = 1;
782 break;
dd6f9ac0
FB
783 case 'D':
784 unit_dump(u, stdout, " ");
785 continue; /* ask again */
d172b175
FB
786 case 'f':
787 printf("Failing execution.\n");
788 r = CONFIRM_PRETEND_FAILURE;
789 break;
790 case 'h':
b0eb2944
FB
791 printf(" c - continue, proceed without asking anymore\n"
792 " D - dump, show the state of the unit\n"
dd6f9ac0 793 " f - fail, don't execute the command and pretend it failed\n"
d172b175 794 " h - help\n"
eedf223a 795 " i - info, show a short summary of the unit\n"
56fde33a 796 " j - jobs, show jobs that are in progress\n"
d172b175
FB
797 " s - skip, don't execute the command and pretend it succeeded\n"
798 " y - yes, execute the command\n");
dd6f9ac0 799 continue; /* ask again */
eedf223a
FB
800 case 'i':
801 printf(" Description: %s\n"
802 " Unit: %s\n"
803 " Command: %s\n",
804 u->id, u->description, cmdline);
805 continue; /* ask again */
56fde33a
FB
806 case 'j':
807 manager_dump_jobs(u->manager, stdout, " ");
808 continue; /* ask again */
539622bd
FB
809 case 'n':
810 /* 'n' was removed in favor of 'f'. */
811 printf("Didn't understand 'n', did you mean 'f'?\n");
812 continue; /* ask again */
d172b175
FB
813 case 's':
814 printf("Skipping execution.\n");
815 r = CONFIRM_PRETEND_SUCCESS;
816 break;
817 case 'y':
818 r = CONFIRM_EXECUTE;
819 break;
820 default:
821 assert_not_reached("Unhandled choice");
822 }
3b20f877 823 break;
3b20f877 824 }
af6da548 825
3b20f877 826restore_stdio:
af6da548 827 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 828 return r;
80876c20
LP
829}
830
4d885bd3
DH
831static int get_fixed_user(const ExecContext *c, const char **user,
832 uid_t *uid, gid_t *gid,
833 const char **home, const char **shell) {
81a2b7ce 834 int r;
4d885bd3 835 const char *name;
81a2b7ce 836
4d885bd3 837 assert(c);
81a2b7ce 838
23deef88
LP
839 if (!c->user)
840 return 0;
841
4d885bd3
DH
842 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
843 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 844
23deef88 845 name = c->user;
4d885bd3
DH
846 r = get_user_creds_clean(&name, uid, gid, home, shell);
847 if (r < 0)
848 return r;
81a2b7ce 849
4d885bd3
DH
850 *user = name;
851 return 0;
852}
853
854static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
855 int r;
856 const char *name;
857
858 assert(c);
859
860 if (!c->group)
861 return 0;
862
863 name = c->group;
864 r = get_group_creds(&name, gid);
865 if (r < 0)
866 return r;
867
868 *group = name;
869 return 0;
870}
871
cdc5d5c5
DH
872static int get_supplementary_groups(const ExecContext *c, const char *user,
873 const char *group, gid_t gid,
874 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
875 char **i;
876 int r, k = 0;
877 int ngroups_max;
878 bool keep_groups = false;
879 gid_t *groups = NULL;
880 _cleanup_free_ gid_t *l_gids = NULL;
881
882 assert(c);
883
bbeea271
DH
884 /*
885 * If user is given, then lookup GID and supplementary groups list.
886 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
887 * here and as early as possible so we keep the list of supplementary
888 * groups of the caller.
bbeea271
DH
889 */
890 if (user && gid_is_valid(gid) && gid != 0) {
891 /* First step, initialize groups from /etc/groups */
892 if (initgroups(user, gid) < 0)
893 return -errno;
894
895 keep_groups = true;
896 }
897
ac6e8be6 898 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
899 return 0;
900
366ddd25
DH
901 /*
902 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
903 * be positive, otherwise fail.
904 */
905 errno = 0;
906 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
907 if (ngroups_max <= 0) {
908 if (errno > 0)
909 return -errno;
910 else
911 return -EOPNOTSUPP; /* For all other values */
912 }
913
4d885bd3
DH
914 l_gids = new(gid_t, ngroups_max);
915 if (!l_gids)
916 return -ENOMEM;
81a2b7ce 917
4d885bd3
DH
918 if (keep_groups) {
919 /*
920 * Lookup the list of groups that the user belongs to, we
921 * avoid NSS lookups here too for gid=0.
922 */
923 k = ngroups_max;
924 if (getgrouplist(user, gid, l_gids, &k) < 0)
925 return -EINVAL;
926 } else
927 k = 0;
81a2b7ce 928
4d885bd3
DH
929 STRV_FOREACH(i, c->supplementary_groups) {
930 const char *g;
81a2b7ce 931
4d885bd3
DH
932 if (k >= ngroups_max)
933 return -E2BIG;
81a2b7ce 934
4d885bd3
DH
935 g = *i;
936 r = get_group_creds(&g, l_gids+k);
937 if (r < 0)
938 return r;
81a2b7ce 939
4d885bd3
DH
940 k++;
941 }
81a2b7ce 942
4d885bd3
DH
943 /*
944 * Sets ngids to zero to drop all supplementary groups, happens
945 * when we are under root and SupplementaryGroups= is empty.
946 */
947 if (k == 0) {
948 *ngids = 0;
949 return 0;
950 }
81a2b7ce 951
4d885bd3
DH
952 /* Otherwise get the final list of supplementary groups */
953 groups = memdup(l_gids, sizeof(gid_t) * k);
954 if (!groups)
955 return -ENOMEM;
956
957 *supplementary_gids = groups;
958 *ngids = k;
959
960 groups = NULL;
961
962 return 0;
963}
964
965static int enforce_groups(const ExecContext *context, gid_t gid,
966 gid_t *supplementary_gids, int ngids) {
967 int r;
968
969 assert(context);
970
971 /* Handle SupplementaryGroups= even if it is empty */
ac6e8be6 972 if (!strv_isempty(context->supplementary_groups)) {
4d885bd3
DH
973 r = maybe_setgroups(ngids, supplementary_gids);
974 if (r < 0)
97f0e76f 975 return r;
4d885bd3 976 }
81a2b7ce 977
4d885bd3
DH
978 if (gid_is_valid(gid)) {
979 /* Then set our gids */
980 if (setresgid(gid, gid, gid) < 0)
981 return -errno;
81a2b7ce
LP
982 }
983
984 return 0;
985}
986
987static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
988 assert(context);
989
4d885bd3
DH
990 if (!uid_is_valid(uid))
991 return 0;
992
479050b3 993 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
994 * capabilities while doing so. */
995
479050b3 996 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
997
998 /* First step: If we need to keep capabilities but
999 * drop privileges we need to make sure we keep our
cbb21cca 1000 * caps, while we drop privileges. */
693ced48 1001 if (uid != 0) {
cbb21cca 1002 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1003
1004 if (prctl(PR_GET_SECUREBITS) != sb)
1005 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1006 return -errno;
1007 }
81a2b7ce
LP
1008 }
1009
479050b3 1010 /* Second step: actually set the uids */
81a2b7ce
LP
1011 if (setresuid(uid, uid, uid) < 0)
1012 return -errno;
1013
1014 /* At this point we should have all necessary capabilities but
1015 are otherwise a normal user. However, the caps might got
1016 corrupted due to the setresuid() so we need clean them up
1017 later. This is done outside of this call. */
1018
1019 return 0;
1020}
1021
349cc4a5 1022#if HAVE_PAM
5b6319dc
LP
1023
1024static int null_conv(
1025 int num_msg,
1026 const struct pam_message **msg,
1027 struct pam_response **resp,
1028 void *appdata_ptr) {
1029
1030 /* We don't support conversations */
1031
1032 return PAM_CONV_ERR;
1033}
1034
cefc33ae
LP
1035#endif
1036
5b6319dc
LP
1037static int setup_pam(
1038 const char *name,
1039 const char *user,
940c5210 1040 uid_t uid,
2d6fce8d 1041 gid_t gid,
5b6319dc 1042 const char *tty,
2065ca69 1043 char ***env,
5b6319dc
LP
1044 int fds[], unsigned n_fds) {
1045
349cc4a5 1046#if HAVE_PAM
cefc33ae 1047
5b6319dc
LP
1048 static const struct pam_conv conv = {
1049 .conv = null_conv,
1050 .appdata_ptr = NULL
1051 };
1052
2d7c6aa2 1053 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1054 pam_handle_t *handle = NULL;
d6e5f3ad 1055 sigset_t old_ss;
7bb70b6e 1056 int pam_code = PAM_SUCCESS, r;
84eada2f 1057 char **nv, **e = NULL;
5b6319dc
LP
1058 bool close_session = false;
1059 pid_t pam_pid = 0, parent_pid;
970edce6 1060 int flags = 0;
5b6319dc
LP
1061
1062 assert(name);
1063 assert(user);
2065ca69 1064 assert(env);
5b6319dc
LP
1065
1066 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1067 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1068 * systemd via the cgroup logic. It will then remove the PAM
1069 * session again. The parent process will exec() the actual
1070 * daemon. We do things this way to ensure that the main PID
1071 * of the daemon is the one we initially fork()ed. */
1072
7bb70b6e
LP
1073 r = barrier_create(&barrier);
1074 if (r < 0)
2d7c6aa2
DH
1075 goto fail;
1076
553d2243 1077 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1078 flags |= PAM_SILENT;
1079
f546241b
ZJS
1080 pam_code = pam_start(name, user, &conv, &handle);
1081 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1082 handle = NULL;
1083 goto fail;
1084 }
1085
f546241b
ZJS
1086 if (tty) {
1087 pam_code = pam_set_item(handle, PAM_TTY, tty);
1088 if (pam_code != PAM_SUCCESS)
5b6319dc 1089 goto fail;
f546241b 1090 }
5b6319dc 1091
84eada2f
JW
1092 STRV_FOREACH(nv, *env) {
1093 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1094 if (pam_code != PAM_SUCCESS)
1095 goto fail;
1096 }
1097
970edce6 1098 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1099 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1100 goto fail;
1101
970edce6 1102 pam_code = pam_open_session(handle, flags);
f546241b 1103 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1104 goto fail;
1105
1106 close_session = true;
1107
f546241b
ZJS
1108 e = pam_getenvlist(handle);
1109 if (!e) {
5b6319dc
LP
1110 pam_code = PAM_BUF_ERR;
1111 goto fail;
1112 }
1113
1114 /* Block SIGTERM, so that we know that it won't get lost in
1115 * the child */
ce30c8dc 1116
72c0a2c2 1117 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1118
df0ff127 1119 parent_pid = getpid_cached();
5b6319dc 1120
f546241b 1121 pam_pid = fork();
7bb70b6e
LP
1122 if (pam_pid < 0) {
1123 r = -errno;
5b6319dc 1124 goto fail;
7bb70b6e 1125 }
5b6319dc
LP
1126
1127 if (pam_pid == 0) {
7bb70b6e 1128 int sig, ret = EXIT_PAM;
5b6319dc
LP
1129
1130 /* The child's job is to reset the PAM session on
1131 * termination */
2d7c6aa2 1132 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc
LP
1133
1134 /* This string must fit in 10 chars (i.e. the length
5d6b1584
LP
1135 * of "/sbin/init"), to look pretty in /bin/ps */
1136 rename_process("(sd-pam)");
5b6319dc
LP
1137
1138 /* Make sure we don't keep open the passed fds in this
1139 child. We assume that otherwise only those fds are
1140 open here that have been opened by PAM. */
1141 close_many(fds, n_fds);
1142
940c5210
AK
1143 /* Drop privileges - we don't need any to pam_close_session
1144 * and this will make PR_SET_PDEATHSIG work in most cases.
1145 * If this fails, ignore the error - but expect sd-pam threads
1146 * to fail to exit normally */
2d6fce8d 1147
97f0e76f
LP
1148 r = maybe_setgroups(0, NULL);
1149 if (r < 0)
1150 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1151 if (setresgid(gid, gid, gid) < 0)
1152 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1153 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1154 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1155
ce30c8dc
LP
1156 (void) ignore_signals(SIGPIPE, -1);
1157
940c5210
AK
1158 /* Wait until our parent died. This will only work if
1159 * the above setresuid() succeeds, otherwise the kernel
1160 * will not allow unprivileged parents kill their privileged
1161 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1162 * to do the rest for us. */
1163 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1164 goto child_finish;
1165
2d7c6aa2
DH
1166 /* Tell the parent that our setup is done. This is especially
1167 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1168 * setup might race against our setresuid(2) call.
1169 *
1170 * If the parent aborted, we'll detect this below, hence ignore
1171 * return failure here. */
1172 (void) barrier_place(&barrier);
2d7c6aa2 1173
643f4706 1174 /* Check if our parent process might already have died? */
5b6319dc 1175 if (getppid() == parent_pid) {
d6e5f3ad
DM
1176 sigset_t ss;
1177
1178 assert_se(sigemptyset(&ss) >= 0);
1179 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1180
3dead8d9
LP
1181 for (;;) {
1182 if (sigwait(&ss, &sig) < 0) {
1183 if (errno == EINTR)
1184 continue;
1185
1186 goto child_finish;
1187 }
5b6319dc 1188
3dead8d9
LP
1189 assert(sig == SIGTERM);
1190 break;
1191 }
5b6319dc
LP
1192 }
1193
3dead8d9 1194 /* If our parent died we'll end the session */
f546241b 1195 if (getppid() != parent_pid) {
970edce6 1196 pam_code = pam_close_session(handle, flags);
f546241b 1197 if (pam_code != PAM_SUCCESS)
5b6319dc 1198 goto child_finish;
f546241b 1199 }
5b6319dc 1200
7bb70b6e 1201 ret = 0;
5b6319dc
LP
1202
1203 child_finish:
970edce6 1204 pam_end(handle, pam_code | flags);
7bb70b6e 1205 _exit(ret);
5b6319dc
LP
1206 }
1207
2d7c6aa2
DH
1208 barrier_set_role(&barrier, BARRIER_PARENT);
1209
5b6319dc
LP
1210 /* If the child was forked off successfully it will do all the
1211 * cleanups, so forget about the handle here. */
1212 handle = NULL;
1213
3b8bddde 1214 /* Unblock SIGTERM again in the parent */
72c0a2c2 1215 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1216
1217 /* We close the log explicitly here, since the PAM modules
1218 * might have opened it, but we don't want this fd around. */
1219 closelog();
1220
2d7c6aa2
DH
1221 /* Synchronously wait for the child to initialize. We don't care for
1222 * errors as we cannot recover. However, warn loudly if it happens. */
1223 if (!barrier_place_and_sync(&barrier))
1224 log_error("PAM initialization failed");
1225
2065ca69
JW
1226 strv_free(*env);
1227 *env = e;
aa87e624 1228
5b6319dc
LP
1229 return 0;
1230
1231fail:
970edce6
ZJS
1232 if (pam_code != PAM_SUCCESS) {
1233 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1234 r = -EPERM; /* PAM errors do not map to errno */
1235 } else
1236 log_error_errno(r, "PAM failed: %m");
9ba35398 1237
5b6319dc
LP
1238 if (handle) {
1239 if (close_session)
970edce6 1240 pam_code = pam_close_session(handle, flags);
5b6319dc 1241
970edce6 1242 pam_end(handle, pam_code | flags);
5b6319dc
LP
1243 }
1244
1245 strv_free(e);
5b6319dc
LP
1246 closelog();
1247
7bb70b6e 1248 return r;
cefc33ae
LP
1249#else
1250 return 0;
5b6319dc 1251#endif
cefc33ae 1252}
5b6319dc 1253
5d6b1584
LP
1254static void rename_process_from_path(const char *path) {
1255 char process_name[11];
1256 const char *p;
1257 size_t l;
1258
1259 /* This resulting string must fit in 10 chars (i.e. the length
1260 * of "/sbin/init") to look pretty in /bin/ps */
1261
2b6bf07d 1262 p = basename(path);
5d6b1584
LP
1263 if (isempty(p)) {
1264 rename_process("(...)");
1265 return;
1266 }
1267
1268 l = strlen(p);
1269 if (l > 8) {
1270 /* The end of the process name is usually more
1271 * interesting, since the first bit might just be
1272 * "systemd-" */
1273 p = p + l - 8;
1274 l = 8;
1275 }
1276
1277 process_name[0] = '(';
1278 memcpy(process_name+1, p, l);
1279 process_name[1+l] = ')';
1280 process_name[1+l+1] = 0;
1281
1282 rename_process(process_name);
1283}
1284
469830d1
LP
1285static bool context_has_address_families(const ExecContext *c) {
1286 assert(c);
1287
1288 return c->address_families_whitelist ||
1289 !set_isempty(c->address_families);
1290}
1291
1292static bool context_has_syscall_filters(const ExecContext *c) {
1293 assert(c);
1294
1295 return c->syscall_whitelist ||
1296 !set_isempty(c->syscall_filter);
1297}
1298
1299static bool context_has_no_new_privileges(const ExecContext *c) {
1300 assert(c);
1301
1302 if (c->no_new_privileges)
1303 return true;
1304
1305 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1306 return false;
1307
1308 /* We need NNP if we have any form of seccomp and are unprivileged */
1309 return context_has_address_families(c) ||
1310 c->memory_deny_write_execute ||
1311 c->restrict_realtime ||
1312 exec_context_restrict_namespaces_set(c) ||
1313 c->protect_kernel_tunables ||
1314 c->protect_kernel_modules ||
1315 c->private_devices ||
1316 context_has_syscall_filters(c) ||
78e864e5
TM
1317 !set_isempty(c->syscall_archs) ||
1318 c->lock_personality;
469830d1
LP
1319}
1320
349cc4a5 1321#if HAVE_SECCOMP
17df7223 1322
83f12b27 1323static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1324
1325 if (is_seccomp_available())
1326 return false;
1327
f673b62d 1328 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1329 return true;
83f12b27
FS
1330}
1331
165a31c0 1332static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1333 uint32_t negative_action, default_action, action;
165a31c0 1334 int r;
8351ceae 1335
469830d1 1336 assert(u);
c0467cf3 1337 assert(c);
8351ceae 1338
469830d1 1339 if (!context_has_syscall_filters(c))
83f12b27
FS
1340 return 0;
1341
469830d1
LP
1342 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1343 return 0;
e9642be2 1344
469830d1 1345 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1346
469830d1
LP
1347 if (c->syscall_whitelist) {
1348 default_action = negative_action;
1349 action = SCMP_ACT_ALLOW;
7c66bae2 1350 } else {
469830d1
LP
1351 default_action = SCMP_ACT_ALLOW;
1352 action = negative_action;
57183d11 1353 }
8351ceae 1354
165a31c0
LP
1355 if (needs_ambient_hack) {
1356 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1357 if (r < 0)
1358 return r;
1359 }
1360
469830d1 1361 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
4298d0b5
LP
1362}
1363
469830d1
LP
1364static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1365 assert(u);
4298d0b5
LP
1366 assert(c);
1367
469830d1 1368 if (set_isempty(c->syscall_archs))
83f12b27
FS
1369 return 0;
1370
469830d1
LP
1371 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1372 return 0;
4298d0b5 1373
469830d1
LP
1374 return seccomp_restrict_archs(c->syscall_archs);
1375}
4298d0b5 1376
469830d1
LP
1377static int apply_address_families(const Unit* u, const ExecContext *c) {
1378 assert(u);
1379 assert(c);
4298d0b5 1380
469830d1
LP
1381 if (!context_has_address_families(c))
1382 return 0;
4298d0b5 1383
469830d1
LP
1384 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1385 return 0;
4298d0b5 1386
469830d1 1387 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1388}
4298d0b5 1389
83f12b27 1390static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1391 assert(u);
f3e43635
TM
1392 assert(c);
1393
469830d1 1394 if (!c->memory_deny_write_execute)
83f12b27
FS
1395 return 0;
1396
469830d1
LP
1397 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1398 return 0;
f3e43635 1399
469830d1 1400 return seccomp_memory_deny_write_execute();
f3e43635
TM
1401}
1402
83f12b27 1403static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1404 assert(u);
f4170c67
LP
1405 assert(c);
1406
469830d1 1407 if (!c->restrict_realtime)
83f12b27
FS
1408 return 0;
1409
469830d1
LP
1410 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1411 return 0;
f4170c67 1412
469830d1 1413 return seccomp_restrict_realtime();
f4170c67
LP
1414}
1415
59e856c7 1416static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1417 assert(u);
59eeb84b
LP
1418 assert(c);
1419
1420 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1421 * let's protect even those systems where this is left on in the kernel. */
1422
469830d1 1423 if (!c->protect_kernel_tunables)
59eeb84b
LP
1424 return 0;
1425
469830d1
LP
1426 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1427 return 0;
59eeb84b 1428
469830d1 1429 return seccomp_protect_sysctl();
59eeb84b
LP
1430}
1431
59e856c7 1432static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1433 assert(u);
502d704e
DH
1434 assert(c);
1435
25a8d8a0 1436 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1437
469830d1
LP
1438 if (!c->protect_kernel_modules)
1439 return 0;
1440
502d704e
DH
1441 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1442 return 0;
1443
469830d1 1444 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
502d704e
DH
1445}
1446
59e856c7 1447static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1448 assert(u);
ba128bb8
LP
1449 assert(c);
1450
8f81a5f6 1451 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1452
469830d1
LP
1453 if (!c->private_devices)
1454 return 0;
1455
ba128bb8
LP
1456 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1457 return 0;
1458
469830d1 1459 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
ba128bb8
LP
1460}
1461
add00535 1462static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
469830d1 1463 assert(u);
add00535
LP
1464 assert(c);
1465
1466 if (!exec_context_restrict_namespaces_set(c))
1467 return 0;
1468
1469 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1470 return 0;
1471
1472 return seccomp_restrict_namespaces(c->restrict_namespaces);
1473}
1474
78e864e5 1475static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1476 unsigned long personality;
1477 int r;
78e864e5
TM
1478
1479 assert(u);
1480 assert(c);
1481
1482 if (!c->lock_personality)
1483 return 0;
1484
1485 if (skip_seccomp_unavailable(u, "LockPersonality="))
1486 return 0;
1487
e8132d63
LP
1488 personality = c->personality;
1489
1490 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1491 if (personality == PERSONALITY_INVALID) {
1492
1493 r = opinionated_personality(&personality);
1494 if (r < 0)
1495 return r;
1496 }
78e864e5
TM
1497
1498 return seccomp_lock_personality(personality);
1499}
1500
c0467cf3 1501#endif
8351ceae 1502
31a7eb86
ZJS
1503static void do_idle_pipe_dance(int idle_pipe[4]) {
1504 assert(idle_pipe);
1505
54eb2300
LP
1506 idle_pipe[1] = safe_close(idle_pipe[1]);
1507 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1508
1509 if (idle_pipe[0] >= 0) {
1510 int r;
1511
1512 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1513
1514 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1515 ssize_t n;
1516
31a7eb86 1517 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1518 n = write(idle_pipe[3], "x", 1);
1519 if (n > 0)
cd972d69
ZJS
1520 /* Wait for systemd to react to the signal above. */
1521 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1522 }
1523
54eb2300 1524 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1525
1526 }
1527
54eb2300 1528 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1529}
1530
7cae38c4 1531static int build_environment(
fd63e712 1532 Unit *u,
9fa95f85 1533 const ExecContext *c,
1e22b5cd 1534 const ExecParameters *p,
7cae38c4
LP
1535 unsigned n_fds,
1536 const char *home,
1537 const char *username,
1538 const char *shell,
7bce046b
LP
1539 dev_t journal_stream_dev,
1540 ino_t journal_stream_ino,
7cae38c4
LP
1541 char ***ret) {
1542
1543 _cleanup_strv_free_ char **our_env = NULL;
1544 unsigned n_env = 0;
1545 char *x;
1546
4b58153d 1547 assert(u);
7cae38c4
LP
1548 assert(c);
1549 assert(ret);
1550
4b58153d 1551 our_env = new0(char*, 14);
7cae38c4
LP
1552 if (!our_env)
1553 return -ENOMEM;
1554
1555 if (n_fds > 0) {
8dd4c05b
LP
1556 _cleanup_free_ char *joined = NULL;
1557
df0ff127 1558 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1559 return -ENOMEM;
1560 our_env[n_env++] = x;
1561
1562 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1563 return -ENOMEM;
1564 our_env[n_env++] = x;
8dd4c05b 1565
1e22b5cd 1566 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1567 if (!joined)
1568 return -ENOMEM;
1569
605405c6 1570 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1571 if (!x)
1572 return -ENOMEM;
1573 our_env[n_env++] = x;
7cae38c4
LP
1574 }
1575
b08af3b1 1576 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1577 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1578 return -ENOMEM;
1579 our_env[n_env++] = x;
1580
1e22b5cd 1581 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1582 return -ENOMEM;
1583 our_env[n_env++] = x;
1584 }
1585
fd63e712
LP
1586 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1587 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1588 * check the database directly. */
ac647978 1589 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1590 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1591 if (!x)
1592 return -ENOMEM;
1593 our_env[n_env++] = x;
1594 }
1595
7cae38c4
LP
1596 if (home) {
1597 x = strappend("HOME=", home);
1598 if (!x)
1599 return -ENOMEM;
1600 our_env[n_env++] = x;
1601 }
1602
1603 if (username) {
1604 x = strappend("LOGNAME=", username);
1605 if (!x)
1606 return -ENOMEM;
1607 our_env[n_env++] = x;
1608
1609 x = strappend("USER=", username);
1610 if (!x)
1611 return -ENOMEM;
1612 our_env[n_env++] = x;
1613 }
1614
1615 if (shell) {
1616 x = strappend("SHELL=", shell);
1617 if (!x)
1618 return -ENOMEM;
1619 our_env[n_env++] = x;
1620 }
1621
4b58153d
LP
1622 if (!sd_id128_is_null(u->invocation_id)) {
1623 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1624 return -ENOMEM;
1625
1626 our_env[n_env++] = x;
1627 }
1628
6af760f3
LP
1629 if (exec_context_needs_term(c)) {
1630 const char *tty_path, *term = NULL;
1631
1632 tty_path = exec_context_tty_path(c);
1633
1634 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1635 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1636 * passes to PID 1 ends up all the way in the console login shown. */
1637
1638 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1639 term = getenv("TERM");
1640 if (!term)
1641 term = default_term_for_tty(tty_path);
7cae38c4 1642
6af760f3 1643 x = strappend("TERM=", term);
7cae38c4
LP
1644 if (!x)
1645 return -ENOMEM;
1646 our_env[n_env++] = x;
1647 }
1648
7bce046b
LP
1649 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1650 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1651 return -ENOMEM;
1652
1653 our_env[n_env++] = x;
1654 }
1655
7cae38c4 1656 our_env[n_env++] = NULL;
7bce046b 1657 assert(n_env <= 12);
7cae38c4
LP
1658
1659 *ret = our_env;
1660 our_env = NULL;
1661
1662 return 0;
1663}
1664
b4c14404
FB
1665static int build_pass_environment(const ExecContext *c, char ***ret) {
1666 _cleanup_strv_free_ char **pass_env = NULL;
1667 size_t n_env = 0, n_bufsize = 0;
1668 char **i;
1669
1670 STRV_FOREACH(i, c->pass_environment) {
1671 _cleanup_free_ char *x = NULL;
1672 char *v;
1673
1674 v = getenv(*i);
1675 if (!v)
1676 continue;
605405c6 1677 x = strjoin(*i, "=", v);
b4c14404
FB
1678 if (!x)
1679 return -ENOMEM;
00819cc1 1680
b4c14404
FB
1681 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1682 return -ENOMEM;
00819cc1 1683
b4c14404
FB
1684 pass_env[n_env++] = x;
1685 pass_env[n_env] = NULL;
1686 x = NULL;
1687 }
1688
1689 *ret = pass_env;
1690 pass_env = NULL;
1691
1692 return 0;
1693}
1694
8b44a3d2
LP
1695static bool exec_needs_mount_namespace(
1696 const ExecContext *context,
1697 const ExecParameters *params,
1698 ExecRuntime *runtime) {
1699
1700 assert(context);
1701 assert(params);
1702
915e6d16
LP
1703 if (context->root_image)
1704 return true;
1705
2a624c36
AP
1706 if (!strv_isempty(context->read_write_paths) ||
1707 !strv_isempty(context->read_only_paths) ||
1708 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1709 return true;
1710
d2d6c096
LP
1711 if (context->n_bind_mounts > 0)
1712 return true;
1713
8b44a3d2
LP
1714 if (context->mount_flags != 0)
1715 return true;
1716
1717 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1718 return true;
1719
8b44a3d2
LP
1720 if (context->private_devices ||
1721 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1722 context->protect_home != PROTECT_HOME_NO ||
1723 context->protect_kernel_tunables ||
c575770b 1724 context->protect_kernel_modules ||
59eeb84b 1725 context->protect_control_groups)
8b44a3d2
LP
1726 return true;
1727
9c988f93 1728 if (context->mount_apivfs && (context->root_image || context->root_directory))
5d997827
LP
1729 return true;
1730
6c47cd7d
LP
1731 if (context->dynamic_user &&
1732 (!strv_isempty(context->directories[EXEC_DIRECTORY_RUNTIME].paths) ||
1733 !strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
1734 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1735 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1736 return true;
1737
8b44a3d2
LP
1738 return false;
1739}
1740
d251207d
LP
1741static int setup_private_users(uid_t uid, gid_t gid) {
1742 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1743 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1744 _cleanup_close_ int unshare_ready_fd = -1;
1745 _cleanup_(sigkill_waitp) pid_t pid = 0;
1746 uint64_t c = 1;
1747 siginfo_t si;
1748 ssize_t n;
1749 int r;
1750
1751 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1752 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1753 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1754 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1755 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1756 * continues execution normally. */
1757
587ab01b
ZJS
1758 if (uid != 0 && uid_is_valid(uid)) {
1759 r = asprintf(&uid_map,
1760 "0 0 1\n" /* Map root → root */
1761 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1762 uid, uid);
1763 if (r < 0)
1764 return -ENOMEM;
1765 } else {
e0f3720e 1766 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1767 if (!uid_map)
1768 return -ENOMEM;
1769 }
d251207d 1770
587ab01b
ZJS
1771 if (gid != 0 && gid_is_valid(gid)) {
1772 r = asprintf(&gid_map,
1773 "0 0 1\n" /* Map root → root */
1774 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1775 gid, gid);
1776 if (r < 0)
1777 return -ENOMEM;
1778 } else {
d251207d 1779 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1780 if (!gid_map)
1781 return -ENOMEM;
1782 }
d251207d
LP
1783
1784 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1785 * namespace. */
1786 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1787 if (unshare_ready_fd < 0)
1788 return -errno;
1789
1790 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1791 * failed. */
1792 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1793 return -errno;
1794
1795 pid = fork();
1796 if (pid < 0)
1797 return -errno;
1798
1799 if (pid == 0) {
1800 _cleanup_close_ int fd = -1;
1801 const char *a;
1802 pid_t ppid;
1803
1804 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1805 * here, after the parent opened its own user namespace. */
1806
1807 ppid = getppid();
1808 errno_pipe[0] = safe_close(errno_pipe[0]);
1809
1810 /* Wait until the parent unshared the user namespace */
1811 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1812 r = -errno;
1813 goto child_fail;
1814 }
1815
1816 /* Disable the setgroups() system call in the child user namespace, for good. */
1817 a = procfs_file_alloca(ppid, "setgroups");
1818 fd = open(a, O_WRONLY|O_CLOEXEC);
1819 if (fd < 0) {
1820 if (errno != ENOENT) {
1821 r = -errno;
1822 goto child_fail;
1823 }
1824
1825 /* If the file is missing the kernel is too old, let's continue anyway. */
1826 } else {
1827 if (write(fd, "deny\n", 5) < 0) {
1828 r = -errno;
1829 goto child_fail;
1830 }
1831
1832 fd = safe_close(fd);
1833 }
1834
1835 /* First write the GID map */
1836 a = procfs_file_alloca(ppid, "gid_map");
1837 fd = open(a, O_WRONLY|O_CLOEXEC);
1838 if (fd < 0) {
1839 r = -errno;
1840 goto child_fail;
1841 }
1842 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1843 r = -errno;
1844 goto child_fail;
1845 }
1846 fd = safe_close(fd);
1847
1848 /* The write the UID map */
1849 a = procfs_file_alloca(ppid, "uid_map");
1850 fd = open(a, O_WRONLY|O_CLOEXEC);
1851 if (fd < 0) {
1852 r = -errno;
1853 goto child_fail;
1854 }
1855 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1856 r = -errno;
1857 goto child_fail;
1858 }
1859
1860 _exit(EXIT_SUCCESS);
1861
1862 child_fail:
1863 (void) write(errno_pipe[1], &r, sizeof(r));
1864 _exit(EXIT_FAILURE);
1865 }
1866
1867 errno_pipe[1] = safe_close(errno_pipe[1]);
1868
1869 if (unshare(CLONE_NEWUSER) < 0)
1870 return -errno;
1871
1872 /* Let the child know that the namespace is ready now */
1873 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1874 return -errno;
1875
1876 /* Try to read an error code from the child */
1877 n = read(errno_pipe[0], &r, sizeof(r));
1878 if (n < 0)
1879 return -errno;
1880 if (n == sizeof(r)) { /* an error code was sent to us */
1881 if (r < 0)
1882 return r;
1883 return -EIO;
1884 }
1885 if (n != 0) /* on success we should have read 0 bytes */
1886 return -EIO;
1887
1888 r = wait_for_terminate(pid, &si);
1889 if (r < 0)
1890 return r;
1891 pid = 0;
1892
1893 /* If something strange happened with the child, let's consider this fatal, too */
1894 if (si.si_code != CLD_EXITED || si.si_status != 0)
1895 return -EIO;
1896
1897 return 0;
1898}
1899
3536f49e 1900static int setup_exec_directory(
07689d5d
LP
1901 const ExecContext *context,
1902 const ExecParameters *params,
1903 uid_t uid,
3536f49e 1904 gid_t gid,
3536f49e
YW
1905 ExecDirectoryType type,
1906 int *exit_status) {
07689d5d 1907
72fd1768 1908 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
1909 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
1910 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
1911 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
1912 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
1913 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
1914 };
07689d5d
LP
1915 char **rt;
1916 int r;
1917
1918 assert(context);
1919 assert(params);
72fd1768 1920 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 1921 assert(exit_status);
07689d5d 1922
3536f49e
YW
1923 if (!params->prefix[type])
1924 return 0;
1925
8679efde 1926 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
1927 if (!uid_is_valid(uid))
1928 uid = 0;
1929 if (!gid_is_valid(gid))
1930 gid = 0;
1931 }
1932
1933 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d
LP
1934 _cleanup_free_ char *p = NULL, *pp = NULL;
1935 const char *effective;
07689d5d 1936
3536f49e
YW
1937 p = strjoin(params->prefix[type], "/", *rt);
1938 if (!p) {
1939 r = -ENOMEM;
1940 goto fail;
1941 }
07689d5d 1942
23a7448e
YW
1943 r = mkdir_parents_label(p, 0755);
1944 if (r < 0)
3536f49e 1945 goto fail;
23a7448e 1946
6c47cd7d
LP
1947 if (context->dynamic_user && type != EXEC_DIRECTORY_CONFIGURATION) {
1948 _cleanup_free_ char *private_root = NULL, *relative = NULL, *parent = NULL;
1949
1950 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
1951 * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
1952 * whose UID is later on reused. To lock this down we use the same trick used by container
1953 * managers to prohibit host users to get access to files of the same UID in containers: we
1954 * place everything inside a directory that has an access mode of 0700 and is owned root:root,
1955 * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
1956 * to make this directory permeable for the service itself.
1957 *
1958 * Specifically: for a service which wants a special directory "foo/" we first create a
1959 * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
1960 * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
1961 * privileged host users can access "foo/" as usual, but unprivileged host users can't look
1962 * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
1963 * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
1964 * disabling the access boundary for the service and making sure it only gets access to the
1965 * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
1966 *
1967 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
1968 * owned by the service itself. */
1969
1970 private_root = strjoin(params->prefix[type], "/private");
1971 if (!private_root) {
1972 r = -ENOMEM;
1973 goto fail;
1974 }
1975
1976 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
1977 r = mkdir_safe_label(private_root, 0700, 0, 0);
1978 if (r < 0)
1979 goto fail;
1980
1981 pp = strjoin(private_root, "/", *rt);
1982 if (!pp) {
1983 r = -ENOMEM;
1984 goto fail;
1985 }
1986
1987 /* Create all directories between the configured directory and this private root, and mark them 0755 */
1988 r = mkdir_parents_label(pp, 0755);
1989 if (r < 0)
1990 goto fail;
1991
1992 /* Finally, create the actual directory for the service */
1993 r = mkdir_label(pp, context->directories[type].mode);
1994 if (r < 0 && r != -EEXIST)
1995 goto fail;
1996
1997 parent = dirname_malloc(p);
1998 if (!parent) {
1999 r = -ENOMEM;
2000 goto fail;
2001 }
2002
2003 r = path_make_relative(parent, pp, &relative);
2004 if (r < 0)
2005 goto fail;
2006
2007 /* And link it up from the original place */
2008 r = symlink_idempotent(relative, p);
2009 if (r < 0)
2010 goto fail;
2011
2012 effective = pp;
2013
2014 } else {
2015 r = mkdir_label(p, context->directories[type].mode);
2016 if (r < 0 && r != -EEXIST)
2017 goto fail;
2018
2019 effective = p;
2020 }
a1164ae3
LP
2021
2022 /* First lock down the access mode */
6c47cd7d 2023 if (chmod(effective, context->directories[type].mode) < 0) {
a1164ae3 2024 r = -errno;
3536f49e 2025 goto fail;
a1164ae3 2026 }
07689d5d 2027
c71b2eb7
LP
2028 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
2029 * a service, and shall not be writable. */
2030 if (type == EXEC_DIRECTORY_CONFIGURATION)
2031 continue;
2032
a1164ae3 2033 /* Then, change the ownership of the whole tree, if necessary */
6c47cd7d 2034 r = path_chown_recursive(effective, uid, gid);
07689d5d 2035 if (r < 0)
3536f49e 2036 goto fail;
07689d5d
LP
2037 }
2038
2039 return 0;
3536f49e
YW
2040
2041fail:
2042 *exit_status = exit_status_table[type];
3536f49e 2043 return r;
07689d5d
LP
2044}
2045
cefc33ae
LP
2046static int setup_smack(
2047 const ExecContext *context,
2048 const ExecCommand *command) {
2049
cefc33ae
LP
2050 int r;
2051
2052 assert(context);
2053 assert(command);
2054
cefc33ae
LP
2055 if (context->smack_process_label) {
2056 r = mac_smack_apply_pid(0, context->smack_process_label);
2057 if (r < 0)
2058 return r;
2059 }
2060#ifdef SMACK_DEFAULT_PROCESS_LABEL
2061 else {
2062 _cleanup_free_ char *exec_label = NULL;
2063
2064 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2065 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2066 return r;
2067
2068 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2069 if (r < 0)
2070 return r;
2071 }
cefc33ae
LP
2072#endif
2073
2074 return 0;
2075}
2076
3fbe8dbe
LP
2077static int compile_read_write_paths(
2078 const ExecContext *context,
2079 const ExecParameters *params,
2080 char ***ret) {
2081
2082 _cleanup_strv_free_ char **l = NULL;
2083 char **rt;
3536f49e 2084 ExecDirectoryType i;
3fbe8dbe 2085
06ec51d8
ZJS
2086 /* Compile the list of writable paths. This is the combination of
2087 * the explicitly configured paths, plus all runtime directories. */
3fbe8dbe 2088
3536f49e 2089 if (strv_isempty(context->read_write_paths)) {
72fd1768 2090 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e
YW
2091 if (!strv_isempty(context->directories[i].paths))
2092 break;
2093
72fd1768 2094 if (i == _EXEC_DIRECTORY_TYPE_MAX) {
3536f49e
YW
2095 *ret = NULL; /* NOP if neither is set */
2096 return 0;
2097 }
3fbe8dbe
LP
2098 }
2099
2100 l = strv_copy(context->read_write_paths);
2101 if (!l)
2102 return -ENOMEM;
2103
72fd1768 2104 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++) {
3536f49e
YW
2105 if (!params->prefix[i])
2106 continue;
3fbe8dbe 2107
3536f49e
YW
2108 STRV_FOREACH(rt, context->directories[i].paths) {
2109 char *s;
3fbe8dbe 2110
3536f49e
YW
2111 s = strjoin(params->prefix[i], "/", *rt);
2112 if (!s)
2113 return -ENOMEM;
2114
2115 if (strv_consume(&l, s) < 0)
2116 return -ENOMEM;
2117 }
3fbe8dbe
LP
2118 }
2119
2120 *ret = l;
2121 l = NULL;
2122
2123 return 0;
2124}
2125
6c47cd7d
LP
2126static int compile_bind_mounts(
2127 const ExecContext *context,
2128 const ExecParameters *params,
2129 BindMount **ret_bind_mounts,
2130 unsigned *ret_n_bind_mounts,
2131 char ***ret_empty_directories) {
2132
2133 _cleanup_strv_free_ char **empty_directories = NULL;
2134 BindMount *bind_mounts;
2135 unsigned n, h = 0, i;
2136 ExecDirectoryType t;
2137 int r;
2138
2139 assert(context);
2140 assert(params);
2141 assert(ret_bind_mounts);
2142 assert(ret_n_bind_mounts);
2143 assert(ret_empty_directories);
2144
2145 n = context->n_bind_mounts;
2146 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2147 if (!params->prefix[t])
2148 continue;
2149
2150 n += strv_length(context->directories[t].paths);
2151 }
2152
2153 if (n <= 0) {
2154 *ret_bind_mounts = NULL;
2155 *ret_n_bind_mounts = 0;
2156 *ret_empty_directories = NULL;
2157 return 0;
2158 }
2159
2160 bind_mounts = new(BindMount, n);
2161 if (!bind_mounts)
2162 return -ENOMEM;
2163
2164 for (i = 0; context->n_bind_mounts; i++) {
2165 BindMount *item = context->bind_mounts + i;
2166 char *s, *d;
2167
2168 s = strdup(item->source);
2169 if (!s) {
2170 r = -ENOMEM;
2171 goto finish;
2172 }
2173
2174 d = strdup(item->destination);
2175 if (!d) {
2176 free(s);
2177 r = -ENOMEM;
2178 goto finish;
2179 }
2180
2181 bind_mounts[h++] = (BindMount) {
2182 .source = s,
2183 .destination = d,
2184 .read_only = item->read_only,
2185 .recursive = item->recursive,
2186 .ignore_enoent = item->ignore_enoent,
2187 };
2188 }
2189
2190 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2191 char **suffix;
2192
2193 if (!params->prefix[t])
2194 continue;
2195
2196 if (strv_isempty(context->directories[t].paths))
2197 continue;
2198
2199 if (context->dynamic_user && t != EXEC_DIRECTORY_CONFIGURATION) {
2200 char *private_root;
2201
2202 /* So this is for a dynamic user, and we need to make sure the process can access its own
2203 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2204 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2205
2206 private_root = strjoin(params->prefix[t], "/private");
2207 if (!private_root) {
2208 r = -ENOMEM;
2209 goto finish;
2210 }
2211
2212 r = strv_consume(&empty_directories, private_root);
2213 if (r < 0) {
2214 r = -ENOMEM;
2215 goto finish;
2216 }
2217 }
2218
2219 STRV_FOREACH(suffix, context->directories[t].paths) {
2220 char *s, *d;
2221
2222 if (context->dynamic_user && t != EXEC_DIRECTORY_CONFIGURATION)
2223 s = strjoin(params->prefix[t], "/private/", *suffix);
2224 else
2225 s = strjoin(params->prefix[t], "/", *suffix);
2226 if (!s) {
2227 r = -ENOMEM;
2228 goto finish;
2229 }
2230
2231 d = strdup(s);
2232 if (!d) {
2233 free(s);
2234 r = -ENOMEM;
2235 goto finish;
2236 }
2237
2238 bind_mounts[h++] = (BindMount) {
2239 .source = s,
2240 .destination = d,
2241 .read_only = false,
2242 .recursive = true,
2243 .ignore_enoent = false,
2244 };
2245 }
2246 }
2247
2248 assert(h == n);
2249
2250 *ret_bind_mounts = bind_mounts;
2251 *ret_n_bind_mounts = n;
2252 *ret_empty_directories = empty_directories;
2253
2254 empty_directories = NULL;
2255
2256 return (int) n;
2257
2258finish:
2259 bind_mount_free_many(bind_mounts, h);
2260 return r;
2261}
2262
6818c54c
LP
2263static int apply_mount_namespace(
2264 Unit *u,
2265 ExecCommand *command,
2266 const ExecContext *context,
2267 const ExecParameters *params,
2268 ExecRuntime *runtime) {
2269
6c47cd7d 2270 _cleanup_strv_free_ char **rw = NULL, **empty_directories = NULL;
93c6bb51 2271 char *tmp = NULL, *var = NULL;
915e6d16 2272 const char *root_dir = NULL, *root_image = NULL;
93c6bb51 2273 NameSpaceInfo ns_info = {
af964954 2274 .ignore_protect_paths = false,
93c6bb51
DH
2275 .private_dev = context->private_devices,
2276 .protect_control_groups = context->protect_control_groups,
2277 .protect_kernel_tunables = context->protect_kernel_tunables,
2278 .protect_kernel_modules = context->protect_kernel_modules,
5d997827 2279 .mount_apivfs = context->mount_apivfs,
93c6bb51 2280 };
165a31c0 2281 bool needs_sandboxing;
6c47cd7d
LP
2282 BindMount *bind_mounts = NULL;
2283 unsigned n_bind_mounts = 0;
6818c54c 2284 int r;
93c6bb51 2285
2b3c1b9e
DH
2286 assert(context);
2287
93c6bb51
DH
2288 /* The runtime struct only contains the parent of the private /tmp,
2289 * which is non-accessible to world users. Inside of it there's a /tmp
2290 * that is sticky, and that's the one we want to use here. */
2291
2292 if (context->private_tmp && runtime) {
2293 if (runtime->tmp_dir)
2294 tmp = strjoina(runtime->tmp_dir, "/tmp");
2295 if (runtime->var_tmp_dir)
2296 var = strjoina(runtime->var_tmp_dir, "/tmp");
2297 }
2298
2299 r = compile_read_write_paths(context, params, &rw);
2300 if (r < 0)
2301 return r;
2302
915e6d16
LP
2303 if (params->flags & EXEC_APPLY_CHROOT) {
2304 root_image = context->root_image;
2305
2306 if (!root_image)
2307 root_dir = context->root_directory;
2308 }
93c6bb51 2309
6c47cd7d
LP
2310 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2311 if (r < 0)
2312 return r;
2313
af964954
DH
2314 /*
2315 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2316 * sandbox info, otherwise enforce it, don't ignore protected paths and
2317 * fail if we are enable to apply the sandbox inside the mount namespace.
2318 */
2319 if (!context->dynamic_user && root_dir)
2320 ns_info.ignore_protect_paths = true;
2321
165a31c0 2322 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
6818c54c 2323
915e6d16
LP
2324 r = setup_namespace(root_dir, root_image,
2325 &ns_info, rw,
165a31c0
LP
2326 needs_sandboxing ? context->read_only_paths : NULL,
2327 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2328 empty_directories,
2329 bind_mounts,
2330 n_bind_mounts,
93c6bb51
DH
2331 tmp,
2332 var,
165a31c0
LP
2333 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2334 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16
LP
2335 context->mount_flags,
2336 DISSECT_IMAGE_DISCARD_ON_LOOP);
93c6bb51 2337
6c47cd7d
LP
2338 bind_mount_free_many(bind_mounts, n_bind_mounts);
2339
93c6bb51
DH
2340 /* If we couldn't set up the namespace this is probably due to a
2341 * missing capability. In this case, silently proceeed. */
2342 if (IN_SET(r, -EPERM, -EACCES)) {
93c6bb51 2343 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
86ffb325 2344 return 0;
93c6bb51
DH
2345 }
2346
2347 return r;
2348}
2349
915e6d16
LP
2350static int apply_working_directory(
2351 const ExecContext *context,
2352 const ExecParameters *params,
2353 const char *home,
376fecf6
LP
2354 const bool needs_mount_ns,
2355 int *exit_status) {
915e6d16 2356
6732edab 2357 const char *d, *wd;
2b3c1b9e
DH
2358
2359 assert(context);
376fecf6 2360 assert(exit_status);
2b3c1b9e 2361
6732edab
LP
2362 if (context->working_directory_home) {
2363
376fecf6
LP
2364 if (!home) {
2365 *exit_status = EXIT_CHDIR;
6732edab 2366 return -ENXIO;
376fecf6 2367 }
6732edab 2368
2b3c1b9e 2369 wd = home;
6732edab
LP
2370
2371 } else if (context->working_directory)
2b3c1b9e
DH
2372 wd = context->working_directory;
2373 else
2374 wd = "/";
e7f1e7c6
DH
2375
2376 if (params->flags & EXEC_APPLY_CHROOT) {
2377 if (!needs_mount_ns && context->root_directory)
376fecf6
LP
2378 if (chroot(context->root_directory) < 0) {
2379 *exit_status = EXIT_CHROOT;
e7f1e7c6 2380 return -errno;
376fecf6 2381 }
e7f1e7c6 2382
2b3c1b9e
DH
2383 d = wd;
2384 } else
3b0e5bb5 2385 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2386
376fecf6
LP
2387 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2388 *exit_status = EXIT_CHDIR;
2b3c1b9e 2389 return -errno;
376fecf6 2390 }
e7f1e7c6
DH
2391
2392 return 0;
2393}
2394
b1edf445
LP
2395static int setup_keyring(
2396 Unit *u,
2397 const ExecContext *context,
2398 const ExecParameters *p,
2399 uid_t uid, gid_t gid) {
2400
74dd6b51 2401 key_serial_t keyring;
b1edf445 2402 int r;
74dd6b51
LP
2403
2404 assert(u);
b1edf445 2405 assert(context);
74dd6b51
LP
2406 assert(p);
2407
2408 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2409 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2410 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2411 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2412 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2413 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2414
2415 if (!(p->flags & EXEC_NEW_KEYRING))
2416 return 0;
2417
b1edf445
LP
2418 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2419 return 0;
2420
74dd6b51
LP
2421 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2422 if (keyring == -1) {
2423 if (errno == ENOSYS)
8002fb97 2424 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2425 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2426 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2427 else if (errno == EDQUOT)
8002fb97 2428 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2429 else
8002fb97 2430 return log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51
LP
2431
2432 return 0;
2433 }
2434
b3415f5d
LP
2435 /* Populate they keyring with the invocation ID by default. */
2436 if (!sd_id128_is_null(u->invocation_id)) {
2437 key_serial_t key;
2438
2439 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2440 if (key == -1)
8002fb97 2441 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2442 else {
2443 if (keyctl(KEYCTL_SETPERM, key,
2444 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2445 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
8002fb97 2446 return log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2447 }
2448 }
2449
74dd6b51
LP
2450 /* And now, make the keyring owned by the service's user */
2451 if (uid_is_valid(uid) || gid_is_valid(gid))
2452 if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0)
8002fb97 2453 return log_unit_error_errno(u, errno, "Failed to change ownership of session keyring: %m");
74dd6b51 2454
b1edf445
LP
2455 /* When requested link the user keyring into the session keyring. */
2456 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2457 uid_t saved_uid;
2458 gid_t saved_gid;
2459
2460 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things
2461 * set up properly by the kernel. If we don't do that then we can't create it atomically, and that
2462 * sucks for parallel execution. This mimics what pam_keyinit does, too.*/
2463
2464 saved_uid = getuid();
2465 saved_gid = getgid();
2466
2467 if (gid_is_valid(gid) && gid != saved_gid) {
2468 if (setregid(gid, -1) < 0)
8002fb97 2469 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
b1edf445
LP
2470 }
2471
2472 if (uid_is_valid(uid) && uid != saved_uid) {
2473 if (setreuid(uid, -1) < 0) {
2474 (void) setregid(saved_gid, -1);
8002fb97 2475 return log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
b1edf445
LP
2476 }
2477 }
2478
2479 if (keyctl(KEYCTL_LINK,
2480 KEY_SPEC_USER_KEYRING,
2481 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2482
2483 r = -errno;
2484
2485 (void) setreuid(saved_uid, -1);
2486 (void) setregid(saved_gid, -1);
2487
8002fb97 2488 return log_unit_error_errno(u, r, "Failed to link user keyring into session keyring: %m");
b1edf445
LP
2489 }
2490
2491 if (uid_is_valid(uid) && uid != saved_uid) {
2492 if (setreuid(saved_uid, -1) < 0) {
2493 (void) setregid(saved_gid, -1);
8002fb97 2494 return log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
b1edf445
LP
2495 }
2496 }
2497
2498 if (gid_is_valid(gid) && gid != saved_gid) {
2499 if (setregid(saved_gid, -1) < 0)
8002fb97 2500 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
b1edf445 2501 }
61ceaea5 2502 }
b1edf445 2503
74dd6b51
LP
2504 return 0;
2505}
2506
29206d46
LP
2507static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
2508 assert(array);
2509 assert(n);
2510
2511 if (!pair)
2512 return;
2513
2514 if (pair[0] >= 0)
2515 array[(*n)++] = pair[0];
2516 if (pair[1] >= 0)
2517 array[(*n)++] = pair[1];
2518}
2519
a34ceba6
LP
2520static int close_remaining_fds(
2521 const ExecParameters *params,
2522 ExecRuntime *runtime,
29206d46 2523 DynamicCreds *dcreds,
00d9ef85 2524 int user_lookup_fd,
a34ceba6
LP
2525 int socket_fd,
2526 int *fds, unsigned n_fds) {
2527
2528 unsigned n_dont_close = 0;
00d9ef85 2529 int dont_close[n_fds + 12];
a34ceba6
LP
2530
2531 assert(params);
2532
2533 if (params->stdin_fd >= 0)
2534 dont_close[n_dont_close++] = params->stdin_fd;
2535 if (params->stdout_fd >= 0)
2536 dont_close[n_dont_close++] = params->stdout_fd;
2537 if (params->stderr_fd >= 0)
2538 dont_close[n_dont_close++] = params->stderr_fd;
2539
2540 if (socket_fd >= 0)
2541 dont_close[n_dont_close++] = socket_fd;
2542 if (n_fds > 0) {
2543 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2544 n_dont_close += n_fds;
2545 }
2546
29206d46
LP
2547 if (runtime)
2548 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2549
2550 if (dcreds) {
2551 if (dcreds->user)
2552 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2553 if (dcreds->group)
2554 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2555 }
2556
00d9ef85
LP
2557 if (user_lookup_fd >= 0)
2558 dont_close[n_dont_close++] = user_lookup_fd;
2559
a34ceba6
LP
2560 return close_all_fds(dont_close, n_dont_close);
2561}
2562
00d9ef85
LP
2563static int send_user_lookup(
2564 Unit *unit,
2565 int user_lookup_fd,
2566 uid_t uid,
2567 gid_t gid) {
2568
2569 assert(unit);
2570
2571 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2572 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2573 * specified. */
2574
2575 if (user_lookup_fd < 0)
2576 return 0;
2577
2578 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2579 return 0;
2580
2581 if (writev(user_lookup_fd,
2582 (struct iovec[]) {
e6a7ec4b
LP
2583 IOVEC_INIT(&uid, sizeof(uid)),
2584 IOVEC_INIT(&gid, sizeof(gid)),
2585 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2586 return -errno;
2587
2588 return 0;
2589}
2590
6732edab
LP
2591static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2592 int r;
2593
2594 assert(c);
2595 assert(home);
2596 assert(buf);
2597
2598 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2599
2600 if (*home)
2601 return 0;
2602
2603 if (!c->working_directory_home)
2604 return 0;
2605
2606 if (uid == 0) {
2607 /* Hardcode /root as home directory for UID 0 */
2608 *home = "/root";
2609 return 1;
2610 }
2611
2612 r = get_home_dir(buf);
2613 if (r < 0)
2614 return r;
2615
2616 *home = *buf;
2617 return 1;
2618}
2619
da50b85a
LP
2620static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2621 _cleanup_strv_free_ char ** list = NULL;
2622 ExecDirectoryType t;
2623 int r;
2624
2625 assert(c);
2626 assert(p);
2627 assert(ret);
2628
2629 assert(c->dynamic_user);
2630
2631 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2632 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2633 * directories. */
2634
2635 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2636 char **i;
2637
2638 if (t == EXEC_DIRECTORY_CONFIGURATION)
2639 continue;
2640
2641 if (!p->prefix[t])
2642 continue;
2643
2644 STRV_FOREACH(i, c->directories[t].paths) {
2645 char *e;
2646
2647 e = strjoin(p->prefix[t], "/private/", *i);
2648 if (!e)
2649 return -ENOMEM;
2650
2651 r = strv_consume(&list, e);
2652 if (r < 0)
2653 return r;
2654 }
2655 }
2656
2657 *ret = list;
2658 list = NULL;
2659
2660 return 0;
2661}
2662
ff0af2a1 2663static int exec_child(
f2341e0a 2664 Unit *unit,
ff0af2a1
LP
2665 ExecCommand *command,
2666 const ExecContext *context,
2667 const ExecParameters *params,
2668 ExecRuntime *runtime,
29206d46 2669 DynamicCreds *dcreds,
ff0af2a1
LP
2670 char **argv,
2671 int socket_fd,
52c239d7 2672 int named_iofds[3],
4c47affc
FB
2673 int *fds,
2674 unsigned n_storage_fds,
9b141911 2675 unsigned n_socket_fds,
ff0af2a1 2676 char **files_env,
00d9ef85 2677 int user_lookup_fd,
12145637 2678 int *exit_status) {
d35fbf6b 2679
2065ca69 2680 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
6732edab 2681 _cleanup_free_ char *mac_selinux_context_net = NULL, *home_buffer = NULL;
4d885bd3
DH
2682 _cleanup_free_ gid_t *supplementary_gids = NULL;
2683 const char *username = NULL, *groupname = NULL;
2b3c1b9e 2684 const char *home = NULL, *shell = NULL;
7bce046b
LP
2685 dev_t journal_stream_dev = 0;
2686 ino_t journal_stream_ino = 0;
165a31c0
LP
2687 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2688 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2689 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2690 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 2691#if HAVE_SELINUX
43b1f709 2692 bool use_selinux = false;
ecfbc84f 2693#endif
f9fa32f0 2694#if ENABLE_SMACK
43b1f709 2695 bool use_smack = false;
ecfbc84f 2696#endif
349cc4a5 2697#if HAVE_APPARMOR
43b1f709 2698 bool use_apparmor = false;
ecfbc84f 2699#endif
fed1e721
LP
2700 uid_t uid = UID_INVALID;
2701 gid_t gid = GID_INVALID;
4d885bd3 2702 int i, r, ngids = 0;
4c47affc 2703 unsigned n_fds;
3536f49e 2704 ExecDirectoryType dt;
165a31c0 2705 int secure_bits;
034c6ed7 2706
f2341e0a 2707 assert(unit);
5cb5a6ff
LP
2708 assert(command);
2709 assert(context);
d35fbf6b 2710 assert(params);
ff0af2a1 2711 assert(exit_status);
d35fbf6b
DM
2712
2713 rename_process_from_path(command->path);
2714
2715 /* We reset exactly these signals, since they are the
2716 * only ones we set to SIG_IGN in the main daemon. All
2717 * others we leave untouched because we set them to
2718 * SIG_DFL or a valid handler initially, both of which
2719 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2720 (void) default_signals(SIGNALS_CRASH_HANDLER,
2721 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2722
2723 if (context->ignore_sigpipe)
ce30c8dc 2724 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2725
ff0af2a1
LP
2726 r = reset_signal_mask();
2727 if (r < 0) {
2728 *exit_status = EXIT_SIGNAL_MASK;
12145637 2729 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 2730 }
034c6ed7 2731
d35fbf6b
DM
2732 if (params->idle_pipe)
2733 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2734
2c027c62
LP
2735 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2736 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2737 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2738 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 2739
d35fbf6b 2740 log_forget_fds();
2c027c62 2741 log_set_open_when_needed(true);
4f2d528d 2742
40a80078
LP
2743 /* In case anything used libc syslog(), close this here, too */
2744 closelog();
2745
4c47affc 2746 n_fds = n_storage_fds + n_socket_fds;
00d9ef85 2747 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
ff0af2a1
LP
2748 if (r < 0) {
2749 *exit_status = EXIT_FDS;
12145637 2750 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
2751 }
2752
d35fbf6b
DM
2753 if (!context->same_pgrp)
2754 if (setsid() < 0) {
ff0af2a1 2755 *exit_status = EXIT_SETSID;
12145637 2756 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 2757 }
9e2f7c11 2758
1e22b5cd 2759 exec_context_tty_reset(context, params);
d35fbf6b 2760
c891efaf 2761 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 2762 const char *vc = params->confirm_spawn;
3b20f877
FB
2763 _cleanup_free_ char *cmdline = NULL;
2764
2765 cmdline = exec_command_line(argv);
2766 if (!cmdline) {
0460aa5c 2767 *exit_status = EXIT_MEMORY;
12145637 2768 return log_oom();
3b20f877 2769 }
d35fbf6b 2770
eedf223a 2771 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
2772 if (r != CONFIRM_EXECUTE) {
2773 if (r == CONFIRM_PRETEND_SUCCESS) {
2774 *exit_status = EXIT_SUCCESS;
2775 return 0;
2776 }
ff0af2a1 2777 *exit_status = EXIT_CONFIRM;
12145637 2778 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 2779 return -ECANCELED;
d35fbf6b
DM
2780 }
2781 }
1a63a750 2782
29206d46 2783 if (context->dynamic_user && dcreds) {
da50b85a 2784 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 2785
409093fe
LP
2786 /* Make sure we bypass our own NSS module for any NSS checks */
2787 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2788 *exit_status = EXIT_USER;
12145637 2789 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
2790 }
2791
da50b85a
LP
2792 r = compile_suggested_paths(context, params, &suggested_paths);
2793 if (r < 0) {
2794 *exit_status = EXIT_MEMORY;
2795 return log_oom();
2796 }
2797
2798 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
2799 if (r < 0) {
2800 *exit_status = EXIT_USER;
12145637 2801 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 2802 }
524daa8c 2803
70dd455c 2804 if (!uid_is_valid(uid)) {
29206d46 2805 *exit_status = EXIT_USER;
12145637 2806 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
2807 return -ESRCH;
2808 }
2809
2810 if (!gid_is_valid(gid)) {
2811 *exit_status = EXIT_USER;
12145637 2812 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
2813 return -ESRCH;
2814 }
5bc7452b 2815
29206d46
LP
2816 if (dcreds->user)
2817 username = dcreds->user->name;
2818
2819 } else {
4d885bd3
DH
2820 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2821 if (r < 0) {
2822 *exit_status = EXIT_USER;
12145637 2823 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 2824 }
5bc7452b 2825
4d885bd3
DH
2826 r = get_fixed_group(context, &groupname, &gid);
2827 if (r < 0) {
2828 *exit_status = EXIT_GROUP;
12145637 2829 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 2830 }
cdc5d5c5 2831 }
29206d46 2832
cdc5d5c5
DH
2833 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2834 r = get_supplementary_groups(context, username, groupname, gid,
2835 &supplementary_gids, &ngids);
2836 if (r < 0) {
2837 *exit_status = EXIT_GROUP;
12145637 2838 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 2839 }
5bc7452b 2840
00d9ef85
LP
2841 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2842 if (r < 0) {
2843 *exit_status = EXIT_USER;
12145637 2844 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
2845 }
2846
2847 user_lookup_fd = safe_close(user_lookup_fd);
2848
6732edab
LP
2849 r = acquire_home(context, uid, &home, &home_buffer);
2850 if (r < 0) {
2851 *exit_status = EXIT_CHDIR;
12145637 2852 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
2853 }
2854
d35fbf6b
DM
2855 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2856 * must sure to drop O_NONBLOCK */
2857 if (socket_fd >= 0)
a34ceba6 2858 (void) fd_nonblock(socket_fd, false);
acbb0225 2859
52c239d7 2860 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
2861 if (r < 0) {
2862 *exit_status = EXIT_STDIN;
12145637 2863 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 2864 }
034c6ed7 2865
52c239d7 2866 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2867 if (r < 0) {
2868 *exit_status = EXIT_STDOUT;
12145637 2869 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
2870 }
2871
52c239d7 2872 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2873 if (r < 0) {
2874 *exit_status = EXIT_STDERR;
12145637 2875 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
2876 }
2877
2878 if (params->cgroup_path) {
ff0af2a1
LP
2879 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2880 if (r < 0) {
2881 *exit_status = EXIT_CGROUP;
12145637 2882 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path);
309bff19 2883 }
d35fbf6b 2884 }
309bff19 2885
d35fbf6b 2886 if (context->oom_score_adjust_set) {
d5243d62 2887 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
f2b68789 2888
d5243d62
LP
2889 /* When we can't make this change due to EPERM, then
2890 * let's silently skip over it. User namespaces
2891 * prohibit write access to this file, and we
2892 * shouldn't trip up over that. */
613b411c 2893
d5243d62 2894 sprintf(t, "%i", context->oom_score_adjust);
ad118bda 2895 r = write_string_file("/proc/self/oom_score_adj", t, 0);
12145637 2896 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 2897 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 2898 else if (r < 0) {
ff0af2a1 2899 *exit_status = EXIT_OOM_ADJUST;
12145637 2900 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 2901 }
d35fbf6b
DM
2902 }
2903
2904 if (context->nice_set)
2905 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 2906 *exit_status = EXIT_NICE;
12145637 2907 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
613b411c
LP
2908 }
2909
d35fbf6b
DM
2910 if (context->cpu_sched_set) {
2911 struct sched_param param = {
2912 .sched_priority = context->cpu_sched_priority,
2913 };
2914
ff0af2a1
LP
2915 r = sched_setscheduler(0,
2916 context->cpu_sched_policy |
2917 (context->cpu_sched_reset_on_fork ?
2918 SCHED_RESET_ON_FORK : 0),
2919 &param);
2920 if (r < 0) {
2921 *exit_status = EXIT_SETSCHEDULER;
12145637 2922 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 2923 }
d35fbf6b 2924 }
fc9b2a84 2925
d35fbf6b
DM
2926 if (context->cpuset)
2927 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
ff0af2a1 2928 *exit_status = EXIT_CPUAFFINITY;
12145637 2929 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7
LP
2930 }
2931
d35fbf6b
DM
2932 if (context->ioprio_set)
2933 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 2934 *exit_status = EXIT_IOPRIO;
12145637 2935 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 2936 }
da726a4d 2937
d35fbf6b
DM
2938 if (context->timer_slack_nsec != NSEC_INFINITY)
2939 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 2940 *exit_status = EXIT_TIMERSLACK;
12145637 2941 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 2942 }
9eba9da4 2943
21022b9d
LP
2944 if (context->personality != PERSONALITY_INVALID) {
2945 r = safe_personality(context->personality);
2946 if (r < 0) {
ff0af2a1 2947 *exit_status = EXIT_PERSONALITY;
12145637 2948 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 2949 }
21022b9d 2950 }
94f04347 2951
d35fbf6b 2952 if (context->utmp_id)
df0ff127 2953 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 2954 context->tty_path,
023a4f67
LP
2955 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
2956 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
2957 USER_PROCESS,
6a93917d 2958 username);
d35fbf6b 2959
e0d2adfd 2960 if (context->user) {
ff0af2a1
LP
2961 r = chown_terminal(STDIN_FILENO, uid);
2962 if (r < 0) {
2963 *exit_status = EXIT_STDIN;
12145637 2964 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 2965 }
d35fbf6b 2966 }
8e274523 2967
a931ad47
LP
2968 /* If delegation is enabled we'll pass ownership of the cgroup
2969 * (but only in systemd's own controller hierarchy!) to the
2970 * user of the new process. */
584b8688 2971 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
ff0af2a1
LP
2972 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
2973 if (r < 0) {
2974 *exit_status = EXIT_CGROUP;
12145637 2975 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
d35fbf6b 2976 }
034c6ed7 2977
ff0af2a1
LP
2978 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
2979 if (r < 0) {
2980 *exit_status = EXIT_CGROUP;
12145637 2981 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 2982 }
d35fbf6b 2983 }
034c6ed7 2984
72fd1768 2985 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 2986 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
2987 if (r < 0)
2988 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 2989 }
94f04347 2990
7bce046b 2991 r = build_environment(
fd63e712 2992 unit,
7bce046b
LP
2993 context,
2994 params,
2995 n_fds,
2996 home,
2997 username,
2998 shell,
2999 journal_stream_dev,
3000 journal_stream_ino,
3001 &our_env);
2065ca69
JW
3002 if (r < 0) {
3003 *exit_status = EXIT_MEMORY;
12145637 3004 return log_oom();
2065ca69
JW
3005 }
3006
3007 r = build_pass_environment(context, &pass_env);
3008 if (r < 0) {
3009 *exit_status = EXIT_MEMORY;
12145637 3010 return log_oom();
2065ca69
JW
3011 }
3012
3013 accum_env = strv_env_merge(5,
3014 params->environment,
3015 our_env,
3016 pass_env,
3017 context->environment,
3018 files_env,
3019 NULL);
3020 if (!accum_env) {
3021 *exit_status = EXIT_MEMORY;
12145637 3022 return log_oom();
2065ca69 3023 }
1280503b 3024 accum_env = strv_env_clean(accum_env);
2065ca69 3025
096424d1 3026 (void) umask(context->umask);
b213e1c1 3027
b1edf445 3028 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
3029 if (r < 0) {
3030 *exit_status = EXIT_KEYRING;
12145637 3031 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
3032 }
3033
165a31c0 3034 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 3035 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 3036
165a31c0
LP
3037 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3038 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 3039
165a31c0
LP
3040 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3041 if (needs_ambient_hack)
3042 needs_setuid = false;
3043 else
3044 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3045
3046 if (needs_sandboxing) {
7f18ef0a
FK
3047 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3048 * present. The actual MAC context application will happen later, as late as possible, to avoid
3049 * impacting our own code paths. */
3050
349cc4a5 3051#if HAVE_SELINUX
43b1f709 3052 use_selinux = mac_selinux_use();
7f18ef0a 3053#endif
f9fa32f0 3054#if ENABLE_SMACK
43b1f709 3055 use_smack = mac_smack_use();
7f18ef0a 3056#endif
349cc4a5 3057#if HAVE_APPARMOR
43b1f709 3058 use_apparmor = mac_apparmor_use();
7f18ef0a 3059#endif
165a31c0 3060 }
7f18ef0a 3061
165a31c0
LP
3062 if (needs_setuid) {
3063 if (context->pam_name && username) {
3064 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3065 if (r < 0) {
3066 *exit_status = EXIT_PAM;
12145637 3067 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0
LP
3068 }
3069 }
b213e1c1 3070 }
ac45f971 3071
d35fbf6b 3072 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
6e2d7c4f
MS
3073 if (ns_type_supported(NAMESPACE_NET)) {
3074 r = setup_netns(runtime->netns_storage_socket);
3075 if (r < 0) {
3076 *exit_status = EXIT_NETWORK;
3077 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3078 }
3079 } else
3080 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3081 }
169c1bda 3082
ee818b89 3083 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3084 if (needs_mount_namespace) {
6818c54c 3085 r = apply_mount_namespace(unit, command, context, params, runtime);
3fbe8dbe
LP
3086 if (r < 0) {
3087 *exit_status = EXIT_NAMESPACE;
12145637 3088 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing: %m");
3fbe8dbe 3089 }
d35fbf6b 3090 }
81a2b7ce 3091
50b3dfb9 3092 /* Apply just after mount namespace setup */
376fecf6 3093 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
12145637
LP
3094 if (r < 0)
3095 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
50b3dfb9 3096
bbeea271 3097 /* Drop groups as early as possbile */
165a31c0 3098 if (needs_setuid) {
4d885bd3 3099 r = enforce_groups(context, gid, supplementary_gids, ngids);
096424d1
LP
3100 if (r < 0) {
3101 *exit_status = EXIT_GROUP;
12145637 3102 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3103 }
165a31c0 3104 }
096424d1 3105
165a31c0 3106 if (needs_sandboxing) {
349cc4a5 3107#if HAVE_SELINUX
43b1f709 3108 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
3109 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3110 if (r < 0) {
3111 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3112 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
937ccce9 3113 }
9008e1ac 3114 }
9008e1ac
MS
3115#endif
3116
937ccce9
LP
3117 if (context->private_users) {
3118 r = setup_private_users(uid, gid);
3119 if (r < 0) {
3120 *exit_status = EXIT_USER;
12145637 3121 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
937ccce9 3122 }
d251207d
LP
3123 }
3124 }
3125
165a31c0
LP
3126 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
3127 * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
3128 * was needed to upload the policy and can now be closed as well. */
ff0af2a1
LP
3129 r = close_all_fds(fds, n_fds);
3130 if (r >= 0)
3131 r = shift_fds(fds, n_fds);
3132 if (r >= 0)
4c47affc 3133 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
ff0af2a1
LP
3134 if (r < 0) {
3135 *exit_status = EXIT_FDS;
12145637 3136 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3137 }
e66cf1a3 3138
165a31c0 3139 secure_bits = context->secure_bits;
e66cf1a3 3140
165a31c0
LP
3141 if (needs_sandboxing) {
3142 uint64_t bset;
755d4b67 3143
d35fbf6b 3144 for (i = 0; i < _RLIMIT_MAX; i++) {
03857c43 3145
d35fbf6b
DM
3146 if (!context->rlimit[i])
3147 continue;
3148
03857c43
LP
3149 r = setrlimit_closest(i, context->rlimit[i]);
3150 if (r < 0) {
ff0af2a1 3151 *exit_status = EXIT_LIMITS;
12145637 3152 return log_unit_error_errno(unit, r, "Failed to adjust resource limit %s: %m", rlimit_to_string(i));
e66cf1a3
LP
3153 }
3154 }
3155
f4170c67
LP
3156 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
3157 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3158 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3159 *exit_status = EXIT_LIMITS;
12145637 3160 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3161 }
3162 }
3163
165a31c0
LP
3164 bset = context->capability_bounding_set;
3165 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3166 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3167 * instead of us doing that */
3168 if (needs_ambient_hack)
3169 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3170 (UINT64_C(1) << CAP_SETUID) |
3171 (UINT64_C(1) << CAP_SETGID);
3172
3173 if (!cap_test_all(bset)) {
3174 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3175 if (r < 0) {
3176 *exit_status = EXIT_CAPABILITIES;
12145637 3177 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3178 }
4c2630eb 3179 }
3b8bddde 3180
755d4b67
IP
3181 /* This is done before enforce_user, but ambient set
3182 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
3183 if (!needs_ambient_hack &&
3184 context->capability_ambient_set != 0) {
755d4b67
IP
3185 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3186 if (r < 0) {
3187 *exit_status = EXIT_CAPABILITIES;
12145637 3188 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3189 }
755d4b67 3190 }
165a31c0 3191 }
755d4b67 3192
165a31c0 3193 if (needs_setuid) {
d35fbf6b 3194 if (context->user) {
ff0af2a1
LP
3195 r = enforce_user(context, uid);
3196 if (r < 0) {
3197 *exit_status = EXIT_USER;
12145637 3198 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3199 }
165a31c0
LP
3200
3201 if (!needs_ambient_hack &&
3202 context->capability_ambient_set != 0) {
755d4b67
IP
3203
3204 /* Fix the ambient capabilities after user change. */
3205 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3206 if (r < 0) {
3207 *exit_status = EXIT_CAPABILITIES;
12145637 3208 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3209 }
3210
3211 /* If we were asked to change user and ambient capabilities
3212 * were requested, we had to add keep-caps to the securebits
3213 * so that we would maintain the inherited capability set
3214 * through the setresuid(). Make sure that the bit is added
3215 * also to the context secure_bits so that we don't try to
3216 * drop the bit away next. */
3217
7f508f2c 3218 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3219 }
5b6319dc 3220 }
165a31c0 3221 }
d35fbf6b 3222
165a31c0 3223 if (needs_sandboxing) {
5cd9cd35
LP
3224 /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
3225 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3226 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3227 * are restricted. */
3228
349cc4a5 3229#if HAVE_SELINUX
43b1f709 3230 if (use_selinux) {
5cd9cd35
LP
3231 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3232
3233 if (exec_context) {
3234 r = setexeccon(exec_context);
3235 if (r < 0) {
3236 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3237 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3238 }
3239 }
3240 }
3241#endif
3242
f9fa32f0 3243#if ENABLE_SMACK
43b1f709 3244 if (use_smack) {
7f18ef0a
FK
3245 r = setup_smack(context, command);
3246 if (r < 0) {
3247 *exit_status = EXIT_SMACK_PROCESS_LABEL;
12145637 3248 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
7f18ef0a 3249 }
5cd9cd35 3250 }
7f18ef0a 3251#endif
5cd9cd35 3252
349cc4a5 3253#if HAVE_APPARMOR
43b1f709 3254 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3255 r = aa_change_onexec(context->apparmor_profile);
3256 if (r < 0 && !context->apparmor_profile_ignore) {
3257 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3258 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3259 }
3260 }
3261#endif
3262
165a31c0
LP
3263 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3264 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3265 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3266 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3267 *exit_status = EXIT_SECUREBITS;
12145637 3268 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3269 }
5b6319dc 3270
59eeb84b 3271 if (context_has_no_new_privileges(context))
d35fbf6b 3272 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3273 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3274 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3275 }
3276
349cc4a5 3277#if HAVE_SECCOMP
469830d1
LP
3278 r = apply_address_families(unit, context);
3279 if (r < 0) {
3280 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3281 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3282 }
04aa0cb9 3283
469830d1
LP
3284 r = apply_memory_deny_write_execute(unit, context);
3285 if (r < 0) {
3286 *exit_status = EXIT_SECCOMP;
12145637 3287 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3288 }
f4170c67 3289
469830d1
LP
3290 r = apply_restrict_realtime(unit, context);
3291 if (r < 0) {
3292 *exit_status = EXIT_SECCOMP;
12145637 3293 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3294 }
3295
add00535
LP
3296 r = apply_restrict_namespaces(unit, context);
3297 if (r < 0) {
3298 *exit_status = EXIT_SECCOMP;
12145637 3299 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3300 }
3301
469830d1
LP
3302 r = apply_protect_sysctl(unit, context);
3303 if (r < 0) {
3304 *exit_status = EXIT_SECCOMP;
12145637 3305 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3306 }
3307
469830d1
LP
3308 r = apply_protect_kernel_modules(unit, context);
3309 if (r < 0) {
3310 *exit_status = EXIT_SECCOMP;
12145637 3311 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3312 }
3313
469830d1
LP
3314 r = apply_private_devices(unit, context);
3315 if (r < 0) {
3316 *exit_status = EXIT_SECCOMP;
12145637 3317 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3318 }
3319
3320 r = apply_syscall_archs(unit, context);
3321 if (r < 0) {
3322 *exit_status = EXIT_SECCOMP;
12145637 3323 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3324 }
3325
78e864e5
TM
3326 r = apply_lock_personality(unit, context);
3327 if (r < 0) {
3328 *exit_status = EXIT_SECCOMP;
12145637 3329 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3330 }
3331
5cd9cd35
LP
3332 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3333 * by the filter as little as possible. */
165a31c0 3334 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3335 if (r < 0) {
3336 *exit_status = EXIT_SECCOMP;
12145637 3337 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3338 }
3339#endif
d35fbf6b 3340 }
034c6ed7 3341
00819cc1
LP
3342 if (!strv_isempty(context->unset_environment)) {
3343 char **ee = NULL;
3344
3345 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3346 if (!ee) {
3347 *exit_status = EXIT_MEMORY;
12145637 3348 return log_oom();
00819cc1
LP
3349 }
3350
3351 strv_free(accum_env);
3352 accum_env = ee;
3353 }
3354
2065ca69 3355 final_argv = replace_env_argv(argv, accum_env);
d35fbf6b 3356 if (!final_argv) {
ff0af2a1 3357 *exit_status = EXIT_MEMORY;
12145637 3358 return log_oom();
d35fbf6b 3359 }
034c6ed7 3360
553d2243 3361 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
d35fbf6b 3362 _cleanup_free_ char *line;
81a2b7ce 3363
d35fbf6b
DM
3364 line = exec_command_line(final_argv);
3365 if (line) {
f2341e0a 3366 log_struct(LOG_DEBUG,
f2341e0a
LP
3367 "EXECUTABLE=%s", command->path,
3368 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3369 LOG_UNIT_ID(unit),
f1c50bec 3370 LOG_UNIT_INVOCATION_ID(unit),
f2341e0a 3371 NULL);
d35fbf6b
DM
3372 }
3373 }
dd305ec9 3374
2065ca69 3375 execve(command->path, final_argv, accum_env);
12145637
LP
3376
3377 if (errno == ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3378
3379 log_struct_errno(LOG_INFO, errno,
3380 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3381 LOG_UNIT_ID(unit),
3382 LOG_UNIT_INVOCATION_ID(unit),
3383 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3384 command->path),
3385 "EXECUTABLE=%s", command->path,
3386 NULL);
3387
3388 return 0;
3389 }
3390
ff0af2a1 3391 *exit_status = EXIT_EXEC;
12145637 3392 return log_unit_error_errno(unit, errno, "Failed to execute command: %m");
d35fbf6b 3393}
81a2b7ce 3394
f2341e0a
LP
3395int exec_spawn(Unit *unit,
3396 ExecCommand *command,
d35fbf6b
DM
3397 const ExecContext *context,
3398 const ExecParameters *params,
3399 ExecRuntime *runtime,
29206d46 3400 DynamicCreds *dcreds,
d35fbf6b 3401 pid_t *ret) {
8351ceae 3402
d35fbf6b 3403 _cleanup_strv_free_ char **files_env = NULL;
9b141911 3404 int *fds = NULL;
4c47affc 3405 unsigned n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1
LP
3406 _cleanup_free_ char *line = NULL;
3407 int socket_fd, r;
52c239d7 3408 int named_iofds[3] = { -1, -1, -1 };
ff0af2a1 3409 char **argv;
d35fbf6b 3410 pid_t pid;
8351ceae 3411
f2341e0a 3412 assert(unit);
d35fbf6b
DM
3413 assert(command);
3414 assert(context);
3415 assert(ret);
3416 assert(params);
4c47affc 3417 assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
4298d0b5 3418
d35fbf6b
DM
3419 if (context->std_input == EXEC_INPUT_SOCKET ||
3420 context->std_output == EXEC_OUTPUT_SOCKET ||
3421 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3422
4c47affc 3423 if (params->n_socket_fds > 1) {
f2341e0a 3424 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3425 return -EINVAL;
ff0af2a1 3426 }
eef65bf3 3427
4c47affc 3428 if (params->n_socket_fds == 0) {
488ab41c
AA
3429 log_unit_error(unit, "Got no socket.");
3430 return -EINVAL;
3431 }
3432
d35fbf6b
DM
3433 socket_fd = params->fds[0];
3434 } else {
3435 socket_fd = -1;
3436 fds = params->fds;
4c47affc 3437 n_storage_fds = params->n_storage_fds;
9b141911 3438 n_socket_fds = params->n_socket_fds;
d35fbf6b 3439 }
94f04347 3440
52c239d7
LB
3441 r = exec_context_named_iofds(unit, context, params, named_iofds);
3442 if (r < 0)
3443 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3444
f2341e0a 3445 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3446 if (r < 0)
f2341e0a 3447 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3448
d35fbf6b 3449 argv = params->argv ?: command->argv;
d35fbf6b
DM
3450 line = exec_command_line(argv);
3451 if (!line)
3452 return log_oom();
fab56fc5 3453
f2341e0a 3454 log_struct(LOG_DEBUG,
f2341e0a
LP
3455 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3456 "EXECUTABLE=%s", command->path,
ba360bb0 3457 LOG_UNIT_ID(unit),
f1c50bec 3458 LOG_UNIT_INVOCATION_ID(unit),
f2341e0a 3459 NULL);
12145637 3460
d35fbf6b
DM
3461 pid = fork();
3462 if (pid < 0)
74129a12 3463 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3464
3465 if (pid == 0) {
12145637 3466 int exit_status = EXIT_SUCCESS;
ff0af2a1 3467
f2341e0a
LP
3468 r = exec_child(unit,
3469 command,
ff0af2a1
LP
3470 context,
3471 params,
3472 runtime,
29206d46 3473 dcreds,
ff0af2a1
LP
3474 argv,
3475 socket_fd,
52c239d7 3476 named_iofds,
4c47affc
FB
3477 fds,
3478 n_storage_fds,
9b141911 3479 n_socket_fds,
ff0af2a1 3480 files_env,
00d9ef85 3481 unit->manager->user_lookup_fds[1],
12145637
LP
3482 &exit_status);
3483
ff0af2a1 3484 if (r < 0) {
12145637
LP
3485 log_struct_errno(LOG_ERR, r,
3486 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3487 LOG_UNIT_ID(unit),
3488 LOG_UNIT_INVOCATION_ID(unit),
3489 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3490 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3491 command->path),
3492 "EXECUTABLE=%s", command->path,
3493 NULL);
4c2630eb
MS
3494 }
3495
ff0af2a1 3496 _exit(exit_status);
034c6ed7
LP
3497 }
3498
f2341e0a 3499 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3500
80876c20
LP
3501 /* We add the new process to the cgroup both in the child (so
3502 * that we can be sure that no user code is ever executed
3503 * outside of the cgroup) and in the parent (so that we can be
3504 * sure that when we kill the cgroup the process will be
3505 * killed too). */
d35fbf6b 3506 if (params->cgroup_path)
dd305ec9 3507 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
2da3263a 3508
b58b4116 3509 exec_status_start(&command->exec_status, pid);
9fb86720 3510
034c6ed7 3511 *ret = pid;
5cb5a6ff
LP
3512 return 0;
3513}
3514
034c6ed7 3515void exec_context_init(ExecContext *c) {
3536f49e
YW
3516 ExecDirectoryType i;
3517
034c6ed7
LP
3518 assert(c);
3519
4c12626c 3520 c->umask = 0022;
9eba9da4 3521 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3522 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3523 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3524 c->syslog_level_prefix = true;
353e12c2 3525 c->ignore_sigpipe = true;
3a43da28 3526 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3527 c->personality = PERSONALITY_INVALID;
72fd1768 3528 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3529 c->directories[i].mode = 0755;
a103496c 3530 c->capability_bounding_set = CAP_ALL;
add00535 3531 c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
034c6ed7
LP
3532}
3533
613b411c 3534void exec_context_done(ExecContext *c) {
5cb5a6ff 3535 unsigned l;
3536f49e 3536 ExecDirectoryType i;
5cb5a6ff
LP
3537
3538 assert(c);
3539
6796073e
LP
3540 c->environment = strv_free(c->environment);
3541 c->environment_files = strv_free(c->environment_files);
b4c14404 3542 c->pass_environment = strv_free(c->pass_environment);
00819cc1 3543 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 3544
1f6b4113 3545 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
a1e58e8e 3546 c->rlimit[l] = mfree(c->rlimit[l]);
034c6ed7 3547
52c239d7
LB
3548 for (l = 0; l < 3; l++)
3549 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
3550
a1e58e8e
LP
3551 c->working_directory = mfree(c->working_directory);
3552 c->root_directory = mfree(c->root_directory);
915e6d16 3553 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3554 c->tty_path = mfree(c->tty_path);
3555 c->syslog_identifier = mfree(c->syslog_identifier);
3556 c->user = mfree(c->user);
3557 c->group = mfree(c->group);
034c6ed7 3558
6796073e 3559 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3560
a1e58e8e 3561 c->pam_name = mfree(c->pam_name);
5b6319dc 3562
2a624c36
AP
3563 c->read_only_paths = strv_free(c->read_only_paths);
3564 c->read_write_paths = strv_free(c->read_write_paths);
3565 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3566
d2d6c096
LP
3567 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
3568
82c121a4
LP
3569 if (c->cpuset)
3570 CPU_FREE(c->cpuset);
86a3475b 3571
a1e58e8e
LP
3572 c->utmp_id = mfree(c->utmp_id);
3573 c->selinux_context = mfree(c->selinux_context);
3574 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3575 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3576
525d3cc7
LP
3577 c->syscall_filter = set_free(c->syscall_filter);
3578 c->syscall_archs = set_free(c->syscall_archs);
3579 c->address_families = set_free(c->address_families);
e66cf1a3 3580
72fd1768 3581 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3582 c->directories[i].paths = strv_free(c->directories[i].paths);
e66cf1a3
LP
3583}
3584
3585int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
3586 char **i;
3587
3588 assert(c);
3589
3590 if (!runtime_prefix)
3591 return 0;
3592
3536f49e 3593 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
3594 _cleanup_free_ char *p;
3595
605405c6 3596 p = strjoin(runtime_prefix, "/", *i);
e66cf1a3
LP
3597 if (!p)
3598 return -ENOMEM;
3599
6c47cd7d 3600 /* We execute this synchronously, since we need to be sure this is gone when we start the service
e66cf1a3 3601 * next. */
c6878637 3602 (void) rm_rf(p, REMOVE_ROOT);
6c47cd7d
LP
3603
3604 /* Also destroy any matching subdirectory below /private/. This is done to support DynamicUser=1
3605 * setups. Note that we don't conditionalize here on that though, as the namespace is same way, and it
3606 * makes us a bit more robust towards changing unit settings. Or to say this differently: in the worst
3607 * case this is a NOP. */
3608
3609 free(p);
3610 p = strjoin(runtime_prefix, "/private/", *i);
3611 if (!p)
3612 return -ENOMEM;
3613
3614 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
3615 }
3616
3617 return 0;
5cb5a6ff
LP
3618}
3619
43d0fcbd
LP
3620void exec_command_done(ExecCommand *c) {
3621 assert(c);
3622
a1e58e8e 3623 c->path = mfree(c->path);
43d0fcbd 3624
6796073e 3625 c->argv = strv_free(c->argv);
43d0fcbd
LP
3626}
3627
3628void exec_command_done_array(ExecCommand *c, unsigned n) {
3629 unsigned i;
3630
3631 for (i = 0; i < n; i++)
3632 exec_command_done(c+i);
3633}
3634
f1acf85a 3635ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
3636 ExecCommand *i;
3637
3638 while ((i = c)) {
71fda00f 3639 LIST_REMOVE(command, c, i);
43d0fcbd 3640 exec_command_done(i);
5cb5a6ff
LP
3641 free(i);
3642 }
f1acf85a
ZJS
3643
3644 return NULL;
5cb5a6ff
LP
3645}
3646
034c6ed7
LP
3647void exec_command_free_array(ExecCommand **c, unsigned n) {
3648 unsigned i;
3649
f1acf85a
ZJS
3650 for (i = 0; i < n; i++)
3651 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
3652}
3653
039f0e70 3654typedef struct InvalidEnvInfo {
f2341e0a 3655 Unit *unit;
039f0e70
LP
3656 const char *path;
3657} InvalidEnvInfo;
3658
3659static void invalid_env(const char *p, void *userdata) {
3660 InvalidEnvInfo *info = userdata;
3661
f2341e0a 3662 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
3663}
3664
52c239d7
LB
3665const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3666 assert(c);
3667
3668 switch (fd_index) {
3669 case STDIN_FILENO:
3670 if (c->std_input != EXEC_INPUT_NAMED_FD)
3671 return NULL;
3672 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
3673 case STDOUT_FILENO:
3674 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3675 return NULL;
3676 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
3677 case STDERR_FILENO:
3678 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3679 return NULL;
3680 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
3681 default:
3682 return NULL;
3683 }
3684}
3685
3686int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
3687 unsigned i, targets;
56fbd561 3688 const char* stdio_fdname[3];
4c47affc 3689 unsigned n_fds;
52c239d7
LB
3690
3691 assert(c);
3692 assert(p);
3693
3694 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3695 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3696 (c->std_error == EXEC_OUTPUT_NAMED_FD);
3697
3698 for (i = 0; i < 3; i++)
3699 stdio_fdname[i] = exec_context_fdname(c, i);
3700
4c47affc
FB
3701 n_fds = p->n_storage_fds + p->n_socket_fds;
3702
3703 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
3704 if (named_iofds[STDIN_FILENO] < 0 &&
3705 c->std_input == EXEC_INPUT_NAMED_FD &&
3706 stdio_fdname[STDIN_FILENO] &&
3707 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3708
52c239d7
LB
3709 named_iofds[STDIN_FILENO] = p->fds[i];
3710 targets--;
56fbd561
ZJS
3711
3712 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3713 c->std_output == EXEC_OUTPUT_NAMED_FD &&
3714 stdio_fdname[STDOUT_FILENO] &&
3715 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3716
52c239d7
LB
3717 named_iofds[STDOUT_FILENO] = p->fds[i];
3718 targets--;
56fbd561
ZJS
3719
3720 } else if (named_iofds[STDERR_FILENO] < 0 &&
3721 c->std_error == EXEC_OUTPUT_NAMED_FD &&
3722 stdio_fdname[STDERR_FILENO] &&
3723 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3724
52c239d7
LB
3725 named_iofds[STDERR_FILENO] = p->fds[i];
3726 targets--;
3727 }
3728
56fbd561 3729 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
3730}
3731
f2341e0a 3732int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
3733 char **i, **r = NULL;
3734
3735 assert(c);
3736 assert(l);
3737
3738 STRV_FOREACH(i, c->environment_files) {
3739 char *fn;
52511fae
ZJS
3740 int k;
3741 unsigned n;
8c7be95e
LP
3742 bool ignore = false;
3743 char **p;
7fd1b19b 3744 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
3745
3746 fn = *i;
3747
3748 if (fn[0] == '-') {
3749 ignore = true;
313cefa1 3750 fn++;
8c7be95e
LP
3751 }
3752
3753 if (!path_is_absolute(fn)) {
8c7be95e
LP
3754 if (ignore)
3755 continue;
3756
3757 strv_free(r);
3758 return -EINVAL;
3759 }
3760
2bef10ab 3761 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
3762 k = safe_glob(fn, 0, &pglob);
3763 if (k < 0) {
2bef10ab
PL
3764 if (ignore)
3765 continue;
8c7be95e 3766
2bef10ab 3767 strv_free(r);
d8c92e8b 3768 return k;
2bef10ab 3769 }
8c7be95e 3770
d8c92e8b
ZJS
3771 /* When we don't match anything, -ENOENT should be returned */
3772 assert(pglob.gl_pathc > 0);
3773
3774 for (n = 0; n < pglob.gl_pathc; n++) {
717603e3 3775 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2bef10ab
PL
3776 if (k < 0) {
3777 if (ignore)
3778 continue;
8c7be95e 3779
2bef10ab 3780 strv_free(r);
2bef10ab 3781 return k;
e9c1ea9d 3782 }
ebc05a09 3783 /* Log invalid environment variables with filename */
039f0e70
LP
3784 if (p) {
3785 InvalidEnvInfo info = {
f2341e0a 3786 .unit = unit,
039f0e70
LP
3787 .path = pglob.gl_pathv[n]
3788 };
3789
3790 p = strv_env_clean_with_callback(p, invalid_env, &info);
3791 }
8c7be95e 3792
2bef10ab
PL
3793 if (r == NULL)
3794 r = p;
3795 else {
3796 char **m;
8c7be95e 3797
2bef10ab
PL
3798 m = strv_env_merge(2, r, p);
3799 strv_free(r);
3800 strv_free(p);
c84a9488 3801 if (!m)
2bef10ab 3802 return -ENOMEM;
2bef10ab
PL
3803
3804 r = m;
3805 }
8c7be95e
LP
3806 }
3807 }
3808
3809 *l = r;
3810
3811 return 0;
3812}
3813
6ac8fdc9 3814static bool tty_may_match_dev_console(const char *tty) {
e1d75803 3815 _cleanup_free_ char *active = NULL;
7d6884b6 3816 char *console;
6ac8fdc9 3817
1e22b5cd
LP
3818 if (!tty)
3819 return true;
3820
a119ec7c 3821 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
3822
3823 /* trivial identity? */
3824 if (streq(tty, "console"))
3825 return true;
3826
3827 console = resolve_dev_console(&active);
3828 /* if we could not resolve, assume it may */
3829 if (!console)
3830 return true;
3831
3832 /* "tty0" means the active VC, so it may be the same sometimes */
e1d75803 3833 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
3834}
3835
3836bool exec_context_may_touch_console(ExecContext *ec) {
1e22b5cd
LP
3837
3838 return (ec->tty_reset ||
3839 ec->tty_vhangup ||
3840 ec->tty_vt_disallocate ||
6ac8fdc9
MS
3841 is_terminal_input(ec->std_input) ||
3842 is_terminal_output(ec->std_output) ||
3843 is_terminal_output(ec->std_error)) &&
1e22b5cd 3844 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
3845}
3846
15ae422b
LP
3847static void strv_fprintf(FILE *f, char **l) {
3848 char **g;
3849
3850 assert(f);
3851
3852 STRV_FOREACH(g, l)
3853 fprintf(f, " %s", *g);
3854}
3855
5cb5a6ff 3856void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
c2bbd90b 3857 char **e, **d;
94f04347 3858 unsigned i;
3536f49e 3859 ExecDirectoryType dt;
add00535 3860 int r;
9eba9da4 3861
5cb5a6ff
LP
3862 assert(c);
3863 assert(f);
3864
4ad49000 3865 prefix = strempty(prefix);
5cb5a6ff
LP
3866
3867 fprintf(f,
94f04347
LP
3868 "%sUMask: %04o\n"
3869 "%sWorkingDirectory: %s\n"
451a074f 3870 "%sRootDirectory: %s\n"
15ae422b 3871 "%sNonBlocking: %s\n"
64747e2d 3872 "%sPrivateTmp: %s\n"
7f112f50 3873 "%sPrivateDevices: %s\n"
59eeb84b 3874 "%sProtectKernelTunables: %s\n"
e66a2f65 3875 "%sProtectKernelModules: %s\n"
59eeb84b 3876 "%sProtectControlGroups: %s\n"
d251207d
LP
3877 "%sPrivateNetwork: %s\n"
3878 "%sPrivateUsers: %s\n"
1b8689f9
LP
3879 "%sProtectHome: %s\n"
3880 "%sProtectSystem: %s\n"
5d997827 3881 "%sMountAPIVFS: %s\n"
f3e43635 3882 "%sIgnoreSIGPIPE: %s\n"
f4170c67 3883 "%sMemoryDenyWriteExecute: %s\n"
b1edf445
LP
3884 "%sRestrictRealtime: %s\n"
3885 "%sKeyringMode: %s\n",
5cb5a6ff 3886 prefix, c->umask,
9eba9da4 3887 prefix, c->working_directory ? c->working_directory : "/",
451a074f 3888 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 3889 prefix, yes_no(c->non_blocking),
64747e2d 3890 prefix, yes_no(c->private_tmp),
7f112f50 3891 prefix, yes_no(c->private_devices),
59eeb84b 3892 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 3893 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 3894 prefix, yes_no(c->protect_control_groups),
d251207d
LP
3895 prefix, yes_no(c->private_network),
3896 prefix, yes_no(c->private_users),
1b8689f9
LP
3897 prefix, protect_home_to_string(c->protect_home),
3898 prefix, protect_system_to_string(c->protect_system),
5d997827 3899 prefix, yes_no(c->mount_apivfs),
f3e43635 3900 prefix, yes_no(c->ignore_sigpipe),
f4170c67 3901 prefix, yes_no(c->memory_deny_write_execute),
b1edf445
LP
3902 prefix, yes_no(c->restrict_realtime),
3903 prefix, exec_keyring_mode_to_string(c->keyring_mode));
fb33a393 3904
915e6d16
LP
3905 if (c->root_image)
3906 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
3907
8c7be95e
LP
3908 STRV_FOREACH(e, c->environment)
3909 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3910
3911 STRV_FOREACH(e, c->environment_files)
3912 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 3913
b4c14404
FB
3914 STRV_FOREACH(e, c->pass_environment)
3915 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3916
00819cc1
LP
3917 STRV_FOREACH(e, c->unset_environment)
3918 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
3919
53f47dfc
YW
3920 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
3921
72fd1768 3922 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
3923 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
3924
3925 STRV_FOREACH(d, c->directories[dt].paths)
3926 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
3927 }
c2bbd90b 3928
fb33a393
LP
3929 if (c->nice_set)
3930 fprintf(f,
3931 "%sNice: %i\n",
3932 prefix, c->nice);
3933
dd6c17b1 3934 if (c->oom_score_adjust_set)
fb33a393 3935 fprintf(f,
dd6c17b1
LP
3936 "%sOOMScoreAdjust: %i\n",
3937 prefix, c->oom_score_adjust);
9eba9da4 3938
94f04347 3939 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d
EV
3940 if (c->rlimit[i]) {
3941 fprintf(f, "%s%s: " RLIM_FMT "\n",
3942 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
3943 fprintf(f, "%s%sSoft: " RLIM_FMT "\n",
3944 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3945 }
94f04347 3946
f8b69d1d 3947 if (c->ioprio_set) {
1756a011 3948 _cleanup_free_ char *class_str = NULL;
f8b69d1d 3949
837df140
YW
3950 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
3951 if (r >= 0)
3952 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
3953
3954 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 3955 }
94f04347 3956
f8b69d1d 3957 if (c->cpu_sched_set) {
1756a011 3958 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 3959
837df140
YW
3960 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
3961 if (r >= 0)
3962 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
3963
94f04347 3964 fprintf(f,
38b48754
LP
3965 "%sCPUSchedulingPriority: %i\n"
3966 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
3967 prefix, c->cpu_sched_priority,
3968 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 3969 }
94f04347 3970
82c121a4 3971 if (c->cpuset) {
94f04347 3972 fprintf(f, "%sCPUAffinity:", prefix);
82c121a4
LP
3973 for (i = 0; i < c->cpuset_ncpus; i++)
3974 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
43a99a7a 3975 fprintf(f, " %u", i);
94f04347
LP
3976 fputs("\n", f);
3977 }
3978
3a43da28 3979 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 3980 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
3981
3982 fprintf(f,
80876c20
LP
3983 "%sStandardInput: %s\n"
3984 "%sStandardOutput: %s\n"
3985 "%sStandardError: %s\n",
3986 prefix, exec_input_to_string(c->std_input),
3987 prefix, exec_output_to_string(c->std_output),
3988 prefix, exec_output_to_string(c->std_error));
3989
3990 if (c->tty_path)
3991 fprintf(f,
6ea832a2
LP
3992 "%sTTYPath: %s\n"
3993 "%sTTYReset: %s\n"
3994 "%sTTYVHangup: %s\n"
3995 "%sTTYVTDisallocate: %s\n",
3996 prefix, c->tty_path,
3997 prefix, yes_no(c->tty_reset),
3998 prefix, yes_no(c->tty_vhangup),
3999 prefix, yes_no(c->tty_vt_disallocate));
94f04347 4000
9f6444eb
LP
4001 if (IN_SET(c->std_output,
4002 EXEC_OUTPUT_SYSLOG,
4003 EXEC_OUTPUT_KMSG,
4004 EXEC_OUTPUT_JOURNAL,
4005 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4006 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4007 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4008 IN_SET(c->std_error,
4009 EXEC_OUTPUT_SYSLOG,
4010 EXEC_OUTPUT_KMSG,
4011 EXEC_OUTPUT_JOURNAL,
4012 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4013 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4014 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 4015
5ce70e5b 4016 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 4017
837df140
YW
4018 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4019 if (r >= 0)
4020 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 4021
837df140
YW
4022 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4023 if (r >= 0)
4024 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 4025 }
94f04347 4026
07d46372
YW
4027 if (c->secure_bits) {
4028 _cleanup_free_ char *str = NULL;
4029
4030 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4031 if (r >= 0)
4032 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4033 }
94f04347 4034
a103496c 4035 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4036 _cleanup_free_ char *str = NULL;
94f04347 4037
dd1f5bd0
YW
4038 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4039 if (r >= 0)
4040 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4041 }
4042
4043 if (c->capability_ambient_set != 0) {
dd1f5bd0 4044 _cleanup_free_ char *str = NULL;
755d4b67 4045
dd1f5bd0
YW
4046 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4047 if (r >= 0)
4048 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4049 }
4050
4051 if (c->user)
f2d3769a 4052 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4053 if (c->group)
f2d3769a 4054 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4055
29206d46
LP
4056 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4057
ac6e8be6 4058 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4059 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4060 strv_fprintf(f, c->supplementary_groups);
4061 fputs("\n", f);
4062 }
94f04347 4063
5b6319dc 4064 if (c->pam_name)
f2d3769a 4065 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4066
2a624c36
AP
4067 if (strv_length(c->read_write_paths) > 0) {
4068 fprintf(f, "%sReadWritePaths:", prefix);
4069 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4070 fputs("\n", f);
4071 }
4072
2a624c36
AP
4073 if (strv_length(c->read_only_paths) > 0) {
4074 fprintf(f, "%sReadOnlyPaths:", prefix);
4075 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4076 fputs("\n", f);
4077 }
94f04347 4078
2a624c36
AP
4079 if (strv_length(c->inaccessible_paths) > 0) {
4080 fprintf(f, "%sInaccessiblePaths:", prefix);
4081 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4082 fputs("\n", f);
4083 }
2e22afe9 4084
d2d6c096
LP
4085 if (c->n_bind_mounts > 0)
4086 for (i = 0; i < c->n_bind_mounts; i++) {
4087 fprintf(f, "%s%s: %s:%s:%s\n", prefix,
4088 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4089 c->bind_mounts[i].source,
4090 c->bind_mounts[i].destination,
4091 c->bind_mounts[i].recursive ? "rbind" : "norbind");
4092 }
4093
169c1bda
LP
4094 if (c->utmp_id)
4095 fprintf(f,
4096 "%sUtmpIdentifier: %s\n",
4097 prefix, c->utmp_id);
7b52a628
MS
4098
4099 if (c->selinux_context)
4100 fprintf(f,
5f8640fb
LP
4101 "%sSELinuxContext: %s%s\n",
4102 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4103
80c21aea
WC
4104 if (c->apparmor_profile)
4105 fprintf(f,
4106 "%sAppArmorProfile: %s%s\n",
4107 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4108
4109 if (c->smack_process_label)
4110 fprintf(f,
4111 "%sSmackProcessLabel: %s%s\n",
4112 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4113
050f7277 4114 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4115 fprintf(f,
4116 "%sPersonality: %s\n",
4117 prefix, strna(personality_to_string(c->personality)));
4118
78e864e5
TM
4119 fprintf(f,
4120 "%sLockPersonality: %s\n",
4121 prefix, yes_no(c->lock_personality));
4122
17df7223 4123 if (c->syscall_filter) {
349cc4a5 4124#if HAVE_SECCOMP
17df7223
LP
4125 Iterator j;
4126 void *id;
4127 bool first = true;
351a19b1 4128#endif
17df7223
LP
4129
4130 fprintf(f,
57183d11 4131 "%sSystemCallFilter: ",
17df7223
LP
4132 prefix);
4133
4134 if (!c->syscall_whitelist)
4135 fputc('~', f);
4136
349cc4a5 4137#if HAVE_SECCOMP
17df7223
LP
4138 SET_FOREACH(id, c->syscall_filter, j) {
4139 _cleanup_free_ char *name = NULL;
4140
4141 if (first)
4142 first = false;
4143 else
4144 fputc(' ', f);
4145
57183d11 4146 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223
LP
4147 fputs(strna(name), f);
4148 }
351a19b1 4149#endif
17df7223
LP
4150
4151 fputc('\n', f);
4152 }
4153
57183d11 4154 if (c->syscall_archs) {
349cc4a5 4155#if HAVE_SECCOMP
57183d11
LP
4156 Iterator j;
4157 void *id;
4158#endif
4159
4160 fprintf(f,
4161 "%sSystemCallArchitectures:",
4162 prefix);
4163
349cc4a5 4164#if HAVE_SECCOMP
57183d11
LP
4165 SET_FOREACH(id, c->syscall_archs, j)
4166 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4167#endif
4168 fputc('\n', f);
4169 }
4170
add00535
LP
4171 if (exec_context_restrict_namespaces_set(c)) {
4172 _cleanup_free_ char *s = NULL;
4173
4174 r = namespace_flag_to_string_many(c->restrict_namespaces, &s);
4175 if (r >= 0)
4176 fprintf(f, "%sRestrictNamespaces: %s\n",
4177 prefix, s);
4178 }
4179
b3267152 4180 if (c->syscall_errno > 0)
17df7223
LP
4181 fprintf(f,
4182 "%sSystemCallErrorNumber: %s\n",
4183 prefix, strna(errno_to_name(c->syscall_errno)));
eef65bf3
MS
4184
4185 if (c->apparmor_profile)
4186 fprintf(f,
4187 "%sAppArmorProfile: %s%s\n",
4188 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
5cb5a6ff
LP
4189}
4190
a931ad47
LP
4191bool exec_context_maintains_privileges(ExecContext *c) {
4192 assert(c);
4193
61233823 4194 /* Returns true if the process forked off would run under
a931ad47
LP
4195 * an unchanged UID or as root. */
4196
4197 if (!c->user)
4198 return true;
4199
4200 if (streq(c->user, "root") || streq(c->user, "0"))
4201 return true;
4202
4203 return false;
4204}
4205
7f452159
LP
4206int exec_context_get_effective_ioprio(ExecContext *c) {
4207 int p;
4208
4209 assert(c);
4210
4211 if (c->ioprio_set)
4212 return c->ioprio;
4213
4214 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4215 if (p < 0)
4216 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4217
4218 return p;
4219}
4220
b58b4116 4221void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 4222 assert(s);
5cb5a6ff 4223
b58b4116
LP
4224 zero(*s);
4225 s->pid = pid;
4226 dual_timestamp_get(&s->start_timestamp);
4227}
4228
6ea832a2 4229void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
4230 assert(s);
4231
0b1f4ae6 4232 if (s->pid && s->pid != pid)
b58b4116
LP
4233 zero(*s);
4234
034c6ed7 4235 s->pid = pid;
63983207 4236 dual_timestamp_get(&s->exit_timestamp);
9fb86720 4237
034c6ed7
LP
4238 s->code = code;
4239 s->status = status;
169c1bda 4240
6ea832a2
LP
4241 if (context) {
4242 if (context->utmp_id)
4243 utmp_put_dead_process(context->utmp_id, pid, code, status);
4244
1e22b5cd 4245 exec_context_tty_reset(context, NULL);
6ea832a2 4246 }
9fb86720
LP
4247}
4248
4249void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
4250 char buf[FORMAT_TIMESTAMP_MAX];
4251
4252 assert(s);
4253 assert(f);
4254
9fb86720
LP
4255 if (s->pid <= 0)
4256 return;
4257
4c940960
LP
4258 prefix = strempty(prefix);
4259
9fb86720 4260 fprintf(f,
ccd06097
ZJS
4261 "%sPID: "PID_FMT"\n",
4262 prefix, s->pid);
9fb86720 4263
af9d16e1 4264 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
4265 fprintf(f,
4266 "%sStart Timestamp: %s\n",
63983207 4267 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 4268
af9d16e1 4269 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
4270 fprintf(f,
4271 "%sExit Timestamp: %s\n"
4272 "%sExit Code: %s\n"
4273 "%sExit Status: %i\n",
63983207 4274 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
4275 prefix, sigchld_code_to_string(s->code),
4276 prefix, s->status);
5cb5a6ff 4277}
44d8db9e 4278
9e2f7c11 4279char *exec_command_line(char **argv) {
44d8db9e
LP
4280 size_t k;
4281 char *n, *p, **a;
4282 bool first = true;
4283
9e2f7c11 4284 assert(argv);
44d8db9e 4285
9164977d 4286 k = 1;
9e2f7c11 4287 STRV_FOREACH(a, argv)
44d8db9e
LP
4288 k += strlen(*a)+3;
4289
5cd9cd35
LP
4290 n = new(char, k);
4291 if (!n)
44d8db9e
LP
4292 return NULL;
4293
4294 p = n;
9e2f7c11 4295 STRV_FOREACH(a, argv) {
44d8db9e
LP
4296
4297 if (!first)
4298 *(p++) = ' ';
4299 else
4300 first = false;
4301
4302 if (strpbrk(*a, WHITESPACE)) {
4303 *(p++) = '\'';
4304 p = stpcpy(p, *a);
4305 *(p++) = '\'';
4306 } else
4307 p = stpcpy(p, *a);
4308
4309 }
4310
9164977d
LP
4311 *p = 0;
4312
44d8db9e
LP
4313 /* FIXME: this doesn't really handle arguments that have
4314 * spaces and ticks in them */
4315
4316 return n;
4317}
4318
4319void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 4320 _cleanup_free_ char *cmd = NULL;
4c940960 4321 const char *prefix2;
44d8db9e
LP
4322
4323 assert(c);
4324 assert(f);
4325
4c940960 4326 prefix = strempty(prefix);
63c372cb 4327 prefix2 = strjoina(prefix, "\t");
44d8db9e 4328
9e2f7c11 4329 cmd = exec_command_line(c->argv);
44d8db9e
LP
4330 fprintf(f,
4331 "%sCommand Line: %s\n",
4332 prefix, cmd ? cmd : strerror(ENOMEM));
4333
9fb86720 4334 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
4335}
4336
4337void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4338 assert(f);
4339
4c940960 4340 prefix = strempty(prefix);
44d8db9e
LP
4341
4342 LIST_FOREACH(command, c, c)
4343 exec_command_dump(c, f, prefix);
4344}
94f04347 4345
a6a80b4f
LP
4346void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4347 ExecCommand *end;
4348
4349 assert(l);
4350 assert(e);
4351
4352 if (*l) {
35b8ca3a 4353 /* It's kind of important, that we keep the order here */
71fda00f
LP
4354 LIST_FIND_TAIL(command, *l, end);
4355 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
4356 } else
4357 *l = e;
4358}
4359
26fd040d
LP
4360int exec_command_set(ExecCommand *c, const char *path, ...) {
4361 va_list ap;
4362 char **l, *p;
4363
4364 assert(c);
4365 assert(path);
4366
4367 va_start(ap, path);
4368 l = strv_new_ap(path, ap);
4369 va_end(ap);
4370
4371 if (!l)
4372 return -ENOMEM;
4373
250a918d
LP
4374 p = strdup(path);
4375 if (!p) {
26fd040d
LP
4376 strv_free(l);
4377 return -ENOMEM;
4378 }
4379
4380 free(c->path);
4381 c->path = p;
4382
4383 strv_free(c->argv);
4384 c->argv = l;
4385
4386 return 0;
4387}
4388
86b23b07 4389int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 4390 _cleanup_strv_free_ char **l = NULL;
86b23b07 4391 va_list ap;
86b23b07
JS
4392 int r;
4393
4394 assert(c);
4395 assert(path);
4396
4397 va_start(ap, path);
4398 l = strv_new_ap(path, ap);
4399 va_end(ap);
4400
4401 if (!l)
4402 return -ENOMEM;
4403
e287086b 4404 r = strv_extend_strv(&c->argv, l, false);
e63ff941 4405 if (r < 0)
86b23b07 4406 return r;
86b23b07
JS
4407
4408 return 0;
4409}
4410
4411
613b411c
LP
4412static int exec_runtime_allocate(ExecRuntime **rt) {
4413
4414 if (*rt)
4415 return 0;
4416
4417 *rt = new0(ExecRuntime, 1);
f146f5e1 4418 if (!*rt)
613b411c
LP
4419 return -ENOMEM;
4420
4421 (*rt)->n_ref = 1;
4422 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
4423
4424 return 0;
4425}
4426
4427int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
4428 int r;
4429
4430 assert(rt);
4431 assert(c);
4432 assert(id);
4433
4434 if (*rt)
4435 return 1;
4436
4437 if (!c->private_network && !c->private_tmp)
4438 return 0;
4439
4440 r = exec_runtime_allocate(rt);
4441 if (r < 0)
4442 return r;
4443
4444 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
33df919d 4445 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
613b411c
LP
4446 return -errno;
4447 }
4448
4449 if (c->private_tmp && !(*rt)->tmp_dir) {
4450 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
4451 if (r < 0)
4452 return r;
4453 }
4454
4455 return 1;
4456}
4457
4458ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
4459 assert(r);
4460 assert(r->n_ref > 0);
4461
4462 r->n_ref++;
4463 return r;
4464}
4465
4466ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
4467
4468 if (!r)
4469 return NULL;
4470
4471 assert(r->n_ref > 0);
4472
4473 r->n_ref--;
f2341e0a
LP
4474 if (r->n_ref > 0)
4475 return NULL;
4476
4477 free(r->tmp_dir);
4478 free(r->var_tmp_dir);
4479 safe_close_pair(r->netns_storage_socket);
6b430fdb 4480 return mfree(r);
613b411c
LP
4481}
4482
f2341e0a 4483int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
613b411c
LP
4484 assert(u);
4485 assert(f);
4486 assert(fds);
4487
4488 if (!rt)
4489 return 0;
4490
4491 if (rt->tmp_dir)
4492 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
4493
4494 if (rt->var_tmp_dir)
4495 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
4496
4497 if (rt->netns_storage_socket[0] >= 0) {
4498 int copy;
4499
4500 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4501 if (copy < 0)
4502 return copy;
4503
4504 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
4505 }
4506
4507 if (rt->netns_storage_socket[1] >= 0) {
4508 int copy;
4509
4510 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4511 if (copy < 0)
4512 return copy;
4513
4514 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
4515 }
4516
4517 return 0;
4518}
4519
f2341e0a 4520int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
613b411c
LP
4521 int r;
4522
4523 assert(rt);
4524 assert(key);
4525 assert(value);
4526
4527 if (streq(key, "tmp-dir")) {
4528 char *copy;
4529
4530 r = exec_runtime_allocate(rt);
4531 if (r < 0)
f2341e0a 4532 return log_oom();
613b411c
LP
4533
4534 copy = strdup(value);
4535 if (!copy)
4536 return log_oom();
4537
4538 free((*rt)->tmp_dir);
4539 (*rt)->tmp_dir = copy;
4540
4541 } else if (streq(key, "var-tmp-dir")) {
4542 char *copy;
4543
4544 r = exec_runtime_allocate(rt);
4545 if (r < 0)
f2341e0a 4546 return log_oom();
613b411c
LP
4547
4548 copy = strdup(value);
4549 if (!copy)
4550 return log_oom();
4551
4552 free((*rt)->var_tmp_dir);
4553 (*rt)->var_tmp_dir = copy;
4554
4555 } else if (streq(key, "netns-socket-0")) {
4556 int fd;
4557
4558 r = exec_runtime_allocate(rt);
4559 if (r < 0)
f2341e0a 4560 return log_oom();
613b411c
LP
4561
4562 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 4563 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 4564 else {
03e334a1 4565 safe_close((*rt)->netns_storage_socket[0]);
613b411c
LP
4566 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
4567 }
4568 } else if (streq(key, "netns-socket-1")) {
4569 int fd;
4570
4571 r = exec_runtime_allocate(rt);
4572 if (r < 0)
f2341e0a 4573 return log_oom();
613b411c
LP
4574
4575 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 4576 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 4577 else {
03e334a1 4578 safe_close((*rt)->netns_storage_socket[1]);
613b411c
LP
4579 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
4580 }
4581 } else
4582 return 0;
4583
4584 return 1;
4585}
4586
4587static void *remove_tmpdir_thread(void *p) {
4588 _cleanup_free_ char *path = p;
4589
c6878637 4590 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
613b411c
LP
4591 return NULL;
4592}
4593
4594void exec_runtime_destroy(ExecRuntime *rt) {
98b47d54
LP
4595 int r;
4596
613b411c
LP
4597 if (!rt)
4598 return;
4599
4600 /* If there are multiple users of this, let's leave the stuff around */
4601 if (rt->n_ref > 1)
4602 return;
4603
4604 if (rt->tmp_dir) {
4605 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
98b47d54
LP
4606
4607 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4608 if (r < 0) {
da927ba9 4609 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
98b47d54
LP
4610 free(rt->tmp_dir);
4611 }
4612
613b411c
LP
4613 rt->tmp_dir = NULL;
4614 }
4615
4616 if (rt->var_tmp_dir) {
4617 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
98b47d54
LP
4618
4619 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4620 if (r < 0) {
da927ba9 4621 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
98b47d54
LP
4622 free(rt->var_tmp_dir);
4623 }
4624
613b411c
LP
4625 rt->var_tmp_dir = NULL;
4626 }
4627
3d94f76c 4628 safe_close_pair(rt->netns_storage_socket);
613b411c
LP
4629}
4630
80876c20
LP
4631static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
4632 [EXEC_INPUT_NULL] = "null",
4633 [EXEC_INPUT_TTY] = "tty",
4634 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 4635 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
4636 [EXEC_INPUT_SOCKET] = "socket",
4637 [EXEC_INPUT_NAMED_FD] = "fd",
80876c20
LP
4638};
4639
8a0867d6
LP
4640DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4641
94f04347 4642static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 4643 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 4644 [EXEC_OUTPUT_NULL] = "null",
80876c20 4645 [EXEC_OUTPUT_TTY] = "tty",
94f04347 4646 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 4647 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 4648 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 4649 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
4650 [EXEC_OUTPUT_JOURNAL] = "journal",
4651 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
4652 [EXEC_OUTPUT_SOCKET] = "socket",
4653 [EXEC_OUTPUT_NAMED_FD] = "fd",
94f04347
LP
4654};
4655
4656DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
4657
4658static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4659 [EXEC_UTMP_INIT] = "init",
4660 [EXEC_UTMP_LOGIN] = "login",
4661 [EXEC_UTMP_USER] = "user",
4662};
4663
4664DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
4665
4666static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
4667 [EXEC_PRESERVE_NO] = "no",
4668 [EXEC_PRESERVE_YES] = "yes",
4669 [EXEC_PRESERVE_RESTART] = "restart",
4670};
4671
4672DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 4673
72fd1768 4674static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
4675 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
4676 [EXEC_DIRECTORY_STATE] = "StateDirectory",
4677 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
4678 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
4679 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
4680};
4681
4682DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445
LP
4683
4684static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
4685 [EXEC_KEYRING_INHERIT] = "inherit",
4686 [EXEC_KEYRING_PRIVATE] = "private",
4687 [EXEC_KEYRING_SHARED] = "shared",
4688};
4689
4690DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);