]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
Add SPDX license identifiers to source files under the LGPL
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
a7334b09
LP
2/***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
a7334b09
LP
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 15 Lesser General Public License for more details.
a7334b09 16
5430f7f2 17 You should have received a copy of the GNU Lesser General Public License
a7334b09
LP
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19***/
20
034c6ed7
LP
21#include <errno.h>
22#include <fcntl.h>
8dd4c05b
LP
23#include <glob.h>
24#include <grp.h>
25#include <poll.h>
309bff19 26#include <signal.h>
8dd4c05b 27#include <string.h>
19c0b0b9 28#include <sys/capability.h>
d251207d 29#include <sys/eventfd.h>
f3e43635 30#include <sys/mman.h>
8dd4c05b 31#include <sys/personality.h>
94f04347 32#include <sys/prctl.h>
d2ffa389 33#include <sys/shm.h>
8dd4c05b 34#include <sys/socket.h>
451a074f 35#include <sys/stat.h>
d2ffa389 36#include <sys/types.h>
8dd4c05b
LP
37#include <sys/un.h>
38#include <unistd.h>
023a4f67 39#include <utmpx.h>
5cb5a6ff 40
349cc4a5 41#if HAVE_PAM
5b6319dc
LP
42#include <security/pam_appl.h>
43#endif
44
349cc4a5 45#if HAVE_SELINUX
7b52a628
MS
46#include <selinux/selinux.h>
47#endif
48
349cc4a5 49#if HAVE_SECCOMP
17df7223
LP
50#include <seccomp.h>
51#endif
52
349cc4a5 53#if HAVE_APPARMOR
eef65bf3
MS
54#include <sys/apparmor.h>
55#endif
56
24882e06 57#include "sd-messages.h"
8dd4c05b
LP
58
59#include "af-list.h"
b5efdb8a 60#include "alloc-util.h"
349cc4a5 61#if HAVE_APPARMOR
3ffd4af2
LP
62#include "apparmor-util.h"
63#endif
8dd4c05b
LP
64#include "async.h"
65#include "barrier.h"
8dd4c05b 66#include "cap-list.h"
430f0182 67#include "capability-util.h"
a1164ae3 68#include "chown-recursive.h"
f6a6225e 69#include "def.h"
4d1a6904 70#include "env-util.h"
17df7223 71#include "errno-list.h"
3ffd4af2 72#include "execute.h"
8dd4c05b 73#include "exit-status.h"
3ffd4af2 74#include "fd-util.h"
8dd4c05b 75#include "fileio.h"
f97b34a6 76#include "format-util.h"
f4f15635 77#include "fs-util.h"
7d50b32a 78#include "glob-util.h"
c004493c 79#include "io-util.h"
8dd4c05b 80#include "ioprio.h"
a1164ae3 81#include "label.h"
8dd4c05b
LP
82#include "log.h"
83#include "macro.h"
84#include "missing.h"
85#include "mkdir.h"
86#include "namespace.h"
6bedfcbb 87#include "parse-util.h"
8dd4c05b 88#include "path-util.h"
0b452006 89#include "process-util.h"
78f22b97 90#include "rlimit-util.h"
8dd4c05b 91#include "rm-rf.h"
349cc4a5 92#if HAVE_SECCOMP
3ffd4af2
LP
93#include "seccomp-util.h"
94#endif
8dd4c05b 95#include "securebits.h"
07d46372 96#include "securebits-util.h"
8dd4c05b 97#include "selinux-util.h"
24882e06 98#include "signal-util.h"
8dd4c05b 99#include "smack-util.h"
fd63e712 100#include "special.h"
8b43440b 101#include "string-table.h"
07630cea 102#include "string-util.h"
8dd4c05b 103#include "strv.h"
7ccbd1ae 104#include "syslog-util.h"
8dd4c05b
LP
105#include "terminal-util.h"
106#include "unit.h"
b1d4f8e1 107#include "user-util.h"
8dd4c05b
LP
108#include "util.h"
109#include "utmp-wtmp.h"
5cb5a6ff 110
e056b01d 111#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 112#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 113
02a51aba
LP
114/* This assumes there is a 'tty' group */
115#define TTY_MODE 0620
116
531dca78
LP
117#define SNDBUF_SIZE (8*1024*1024)
118
034c6ed7
LP
119static int shift_fds(int fds[], unsigned n_fds) {
120 int start, restart_from;
121
122 if (n_fds <= 0)
123 return 0;
124
a0d40ac5
LP
125 /* Modifies the fds array! (sorts it) */
126
034c6ed7
LP
127 assert(fds);
128
129 start = 0;
130 for (;;) {
131 int i;
132
133 restart_from = -1;
134
135 for (i = start; i < (int) n_fds; i++) {
136 int nfd;
137
138 /* Already at right index? */
139 if (fds[i] == i+3)
140 continue;
141
3cc2aff1
LP
142 nfd = fcntl(fds[i], F_DUPFD, i + 3);
143 if (nfd < 0)
034c6ed7
LP
144 return -errno;
145
03e334a1 146 safe_close(fds[i]);
034c6ed7
LP
147 fds[i] = nfd;
148
149 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 150 * let's remember that and try again from here */
034c6ed7
LP
151 if (nfd != i+3 && restart_from < 0)
152 restart_from = i;
153 }
154
155 if (restart_from < 0)
156 break;
157
158 start = restart_from;
159 }
160
161 return 0;
162}
163
4c47affc
FB
164static int flags_fds(const int fds[], unsigned n_storage_fds, unsigned n_socket_fds, bool nonblock) {
165 unsigned i, n_fds;
e2c76839 166 int r;
47a71eed 167
4c47affc 168 n_fds = n_storage_fds + n_socket_fds;
47a71eed
LP
169 if (n_fds <= 0)
170 return 0;
171
172 assert(fds);
173
9b141911
FB
174 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
175 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
176
177 for (i = 0; i < n_fds; i++) {
47a71eed 178
9b141911
FB
179 if (i < n_socket_fds) {
180 r = fd_nonblock(fds[i], nonblock);
181 if (r < 0)
182 return r;
183 }
47a71eed 184
451a074f
LP
185 /* We unconditionally drop FD_CLOEXEC from the fds,
186 * since after all we want to pass these fds to our
187 * children */
47a71eed 188
3cc2aff1
LP
189 r = fd_cloexec(fds[i], false);
190 if (r < 0)
e2c76839 191 return r;
47a71eed
LP
192 }
193
194 return 0;
195}
196
1e22b5cd 197static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
198 assert(context);
199
1e22b5cd
LP
200 if (context->stdio_as_fds)
201 return NULL;
202
80876c20
LP
203 if (context->tty_path)
204 return context->tty_path;
205
206 return "/dev/console";
207}
208
1e22b5cd
LP
209static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
210 const char *path;
211
6ea832a2
LP
212 assert(context);
213
1e22b5cd 214 path = exec_context_tty_path(context);
6ea832a2 215
1e22b5cd
LP
216 if (context->tty_vhangup) {
217 if (p && p->stdin_fd >= 0)
218 (void) terminal_vhangup_fd(p->stdin_fd);
219 else if (path)
220 (void) terminal_vhangup(path);
221 }
6ea832a2 222
1e22b5cd
LP
223 if (context->tty_reset) {
224 if (p && p->stdin_fd >= 0)
225 (void) reset_terminal_fd(p->stdin_fd, true);
226 else if (path)
227 (void) reset_terminal(path);
228 }
229
230 if (context->tty_vt_disallocate && path)
231 (void) vt_disallocate(path);
6ea832a2
LP
232}
233
6af760f3
LP
234static bool is_terminal_input(ExecInput i) {
235 return IN_SET(i,
236 EXEC_INPUT_TTY,
237 EXEC_INPUT_TTY_FORCE,
238 EXEC_INPUT_TTY_FAIL);
239}
240
3a1286b6 241static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
242 return IN_SET(o,
243 EXEC_OUTPUT_TTY,
244 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
245 EXEC_OUTPUT_KMSG_AND_CONSOLE,
246 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
247}
248
aac8c0c3
LP
249static bool is_syslog_output(ExecOutput o) {
250 return IN_SET(o,
251 EXEC_OUTPUT_SYSLOG,
252 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
253}
254
255static bool is_kmsg_output(ExecOutput o) {
256 return IN_SET(o,
257 EXEC_OUTPUT_KMSG,
258 EXEC_OUTPUT_KMSG_AND_CONSOLE);
259}
260
6af760f3
LP
261static bool exec_context_needs_term(const ExecContext *c) {
262 assert(c);
263
264 /* Return true if the execution context suggests we should set $TERM to something useful. */
265
266 if (is_terminal_input(c->std_input))
267 return true;
268
269 if (is_terminal_output(c->std_output))
270 return true;
271
272 if (is_terminal_output(c->std_error))
273 return true;
274
275 return !!c->tty_path;
3a1286b6
MS
276}
277
80876c20
LP
278static int open_null_as(int flags, int nfd) {
279 int fd, r;
071830ff 280
80876c20 281 assert(nfd >= 0);
071830ff 282
613b411c
LP
283 fd = open("/dev/null", flags|O_NOCTTY);
284 if (fd < 0)
071830ff
LP
285 return -errno;
286
80876c20
LP
287 if (fd != nfd) {
288 r = dup2(fd, nfd) < 0 ? -errno : nfd;
03e334a1 289 safe_close(fd);
80876c20
LP
290 } else
291 r = nfd;
071830ff 292
80876c20 293 return r;
071830ff
LP
294}
295
524daa8c 296static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 297 static const union sockaddr_union sa = {
b92bea5d
ZJS
298 .un.sun_family = AF_UNIX,
299 .un.sun_path = "/run/systemd/journal/stdout",
300 };
524daa8c
ZJS
301 uid_t olduid = UID_INVALID;
302 gid_t oldgid = GID_INVALID;
303 int r;
304
cad93f29 305 if (gid_is_valid(gid)) {
524daa8c
ZJS
306 oldgid = getgid();
307
92a17af9 308 if (setegid(gid) < 0)
524daa8c
ZJS
309 return -errno;
310 }
311
cad93f29 312 if (uid_is_valid(uid)) {
524daa8c
ZJS
313 olduid = getuid();
314
92a17af9 315 if (seteuid(uid) < 0) {
524daa8c
ZJS
316 r = -errno;
317 goto restore_gid;
318 }
319 }
320
92a17af9 321 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
322
323 /* If we fail to restore the uid or gid, things will likely
324 fail later on. This should only happen if an LSM interferes. */
325
cad93f29 326 if (uid_is_valid(uid))
524daa8c
ZJS
327 (void) seteuid(olduid);
328
329 restore_gid:
cad93f29 330 if (gid_is_valid(gid))
524daa8c
ZJS
331 (void) setegid(oldgid);
332
333 return r;
334}
335
fd1f9c89 336static int connect_logger_as(
7a1ab780 337 Unit *unit,
fd1f9c89 338 const ExecContext *context,
af635cf3 339 const ExecParameters *params,
fd1f9c89
LP
340 ExecOutput output,
341 const char *ident,
fd1f9c89
LP
342 int nfd,
343 uid_t uid,
344 gid_t gid) {
345
524daa8c 346 int fd, r;
071830ff
LP
347
348 assert(context);
af635cf3 349 assert(params);
80876c20
LP
350 assert(output < _EXEC_OUTPUT_MAX);
351 assert(ident);
352 assert(nfd >= 0);
071830ff 353
54fe0cdb
LP
354 fd = socket(AF_UNIX, SOCK_STREAM, 0);
355 if (fd < 0)
80876c20 356 return -errno;
071830ff 357
524daa8c
ZJS
358 r = connect_journal_socket(fd, uid, gid);
359 if (r < 0)
360 return r;
071830ff 361
80876c20 362 if (shutdown(fd, SHUT_RD) < 0) {
03e334a1 363 safe_close(fd);
80876c20
LP
364 return -errno;
365 }
071830ff 366
fd1f9c89 367 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 368
80876c20 369 dprintf(fd,
62bca2c6 370 "%s\n"
80876c20
LP
371 "%s\n"
372 "%i\n"
54fe0cdb
LP
373 "%i\n"
374 "%i\n"
375 "%i\n"
4f4a1dbf 376 "%i\n",
c867611e 377 context->syslog_identifier ?: ident,
af635cf3 378 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
379 context->syslog_priority,
380 !!context->syslog_level_prefix,
aac8c0c3
LP
381 is_syslog_output(output),
382 is_kmsg_output(output),
3a1286b6 383 is_terminal_output(output));
80876c20 384
fd1f9c89
LP
385 if (fd == nfd)
386 return nfd;
387
388 r = dup2(fd, nfd) < 0 ? -errno : nfd;
389 safe_close(fd);
071830ff 390
80876c20
LP
391 return r;
392}
393static int open_terminal_as(const char *path, mode_t mode, int nfd) {
394 int fd, r;
071830ff 395
80876c20
LP
396 assert(path);
397 assert(nfd >= 0);
071830ff 398
3cc2aff1
LP
399 fd = open_terminal(path, mode | O_NOCTTY);
400 if (fd < 0)
80876c20 401 return fd;
071830ff 402
80876c20
LP
403 if (fd != nfd) {
404 r = dup2(fd, nfd) < 0 ? -errno : nfd;
03e334a1 405 safe_close(fd);
80876c20
LP
406 } else
407 r = nfd;
071830ff 408
80876c20
LP
409 return r;
410}
071830ff 411
1e3ad081
LP
412static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
413
414 if (is_terminal_input(std_input) && !apply_tty_stdin)
415 return EXEC_INPUT_NULL;
071830ff 416
03fd9c49 417 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
418 return EXEC_INPUT_NULL;
419
03fd9c49 420 return std_input;
4f2d528d
LP
421}
422
03fd9c49 423static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 424
03fd9c49 425 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
426 return EXEC_OUTPUT_INHERIT;
427
03fd9c49 428 return std_output;
4f2d528d
LP
429}
430
a34ceba6
LP
431static int setup_input(
432 const ExecContext *context,
433 const ExecParameters *params,
52c239d7
LB
434 int socket_fd,
435 int named_iofds[3]) {
a34ceba6 436
4f2d528d
LP
437 ExecInput i;
438
439 assert(context);
a34ceba6
LP
440 assert(params);
441
442 if (params->stdin_fd >= 0) {
443 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
444 return -errno;
445
446 /* Try to make this the controlling tty, if it is a tty, and reset it */
447 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
448 (void) reset_terminal_fd(STDIN_FILENO, true);
449
450 return STDIN_FILENO;
451 }
4f2d528d 452
c39f1ce2 453 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
454
455 switch (i) {
071830ff 456
80876c20
LP
457 case EXEC_INPUT_NULL:
458 return open_null_as(O_RDONLY, STDIN_FILENO);
459
460 case EXEC_INPUT_TTY:
461 case EXEC_INPUT_TTY_FORCE:
462 case EXEC_INPUT_TTY_FAIL: {
463 int fd, r;
071830ff 464
1e22b5cd 465 fd = acquire_terminal(exec_context_tty_path(context),
970edce6
ZJS
466 i == EXEC_INPUT_TTY_FAIL,
467 i == EXEC_INPUT_TTY_FORCE,
468 false,
3a43da28 469 USEC_INFINITY);
970edce6 470 if (fd < 0)
80876c20
LP
471 return fd;
472
473 if (fd != STDIN_FILENO) {
474 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
03e334a1 475 safe_close(fd);
80876c20
LP
476 } else
477 r = STDIN_FILENO;
478
479 return r;
480 }
481
4f2d528d
LP
482 case EXEC_INPUT_SOCKET:
483 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
484
52c239d7
LB
485 case EXEC_INPUT_NAMED_FD:
486 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
487 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
488
80876c20
LP
489 default:
490 assert_not_reached("Unknown input type");
491 }
492}
493
a34ceba6
LP
494static int setup_output(
495 Unit *unit,
496 const ExecContext *context,
497 const ExecParameters *params,
498 int fileno,
499 int socket_fd,
52c239d7 500 int named_iofds[3],
a34ceba6 501 const char *ident,
7bce046b
LP
502 uid_t uid,
503 gid_t gid,
504 dev_t *journal_stream_dev,
505 ino_t *journal_stream_ino) {
a34ceba6 506
4f2d528d
LP
507 ExecOutput o;
508 ExecInput i;
47c1d80d 509 int r;
4f2d528d 510
f2341e0a 511 assert(unit);
80876c20 512 assert(context);
a34ceba6 513 assert(params);
80876c20 514 assert(ident);
7bce046b
LP
515 assert(journal_stream_dev);
516 assert(journal_stream_ino);
80876c20 517
a34ceba6
LP
518 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
519
520 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
521 return -errno;
522
523 return STDOUT_FILENO;
524 }
525
526 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
527 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
528 return -errno;
529
530 return STDERR_FILENO;
531 }
532
c39f1ce2 533 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 534 o = fixup_output(context->std_output, socket_fd);
4f2d528d 535
eb17e935
MS
536 if (fileno == STDERR_FILENO) {
537 ExecOutput e;
538 e = fixup_output(context->std_error, socket_fd);
80876c20 539
eb17e935
MS
540 /* This expects the input and output are already set up */
541
542 /* Don't change the stderr file descriptor if we inherit all
543 * the way and are not on a tty */
544 if (e == EXEC_OUTPUT_INHERIT &&
545 o == EXEC_OUTPUT_INHERIT &&
546 i == EXEC_INPUT_NULL &&
547 !is_terminal_input(context->std_input) &&
548 getppid () != 1)
549 return fileno;
550
551 /* Duplicate from stdout if possible */
52c239d7 552 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
eb17e935 553 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 554
eb17e935 555 o = e;
80876c20 556
eb17e935 557 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
558 /* If input got downgraded, inherit the original value */
559 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 560 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 561
acb591e4 562 /* If the input is connected to anything that's not a /dev/null, inherit that... */
ff876e28 563 if (i != EXEC_INPUT_NULL)
eb17e935 564 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 565
acb591e4
LP
566 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
567 if (getppid() != 1)
eb17e935 568 return fileno;
94f04347 569
eb17e935
MS
570 /* We need to open /dev/null here anew, to get the right access mode. */
571 return open_null_as(O_WRONLY, fileno);
071830ff 572 }
94f04347 573
eb17e935 574 switch (o) {
80876c20
LP
575
576 case EXEC_OUTPUT_NULL:
eb17e935 577 return open_null_as(O_WRONLY, fileno);
80876c20
LP
578
579 case EXEC_OUTPUT_TTY:
4f2d528d 580 if (is_terminal_input(i))
eb17e935 581 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
582
583 /* We don't reset the terminal if this is just about output */
1e22b5cd 584 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
585
586 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 587 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 588 case EXEC_OUTPUT_KMSG:
28dbc1e8 589 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
590 case EXEC_OUTPUT_JOURNAL:
591 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 592 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 593 if (r < 0) {
82677ae4 594 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 595 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
596 } else {
597 struct stat st;
598
599 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
600 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
601 * services to detect whether they are connected to the journal or not.
602 *
603 * If both stdout and stderr are connected to a stream then let's make sure to store the data
604 * about STDERR as that's usually the best way to do logging. */
7bce046b 605
ab2116b1
LP
606 if (fstat(fileno, &st) >= 0 &&
607 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
608 *journal_stream_dev = st.st_dev;
609 *journal_stream_ino = st.st_ino;
610 }
47c1d80d
MS
611 }
612 return r;
4f2d528d
LP
613
614 case EXEC_OUTPUT_SOCKET:
615 assert(socket_fd >= 0);
eb17e935 616 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 617
52c239d7
LB
618 case EXEC_OUTPUT_NAMED_FD:
619 (void) fd_nonblock(named_iofds[fileno], false);
620 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
621
94f04347 622 default:
80876c20 623 assert_not_reached("Unknown error type");
94f04347 624 }
071830ff
LP
625}
626
02a51aba
LP
627static int chown_terminal(int fd, uid_t uid) {
628 struct stat st;
629
630 assert(fd >= 0);
02a51aba 631
1ff74fb6
LP
632 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
633 if (isatty(fd) < 1)
634 return 0;
635
02a51aba 636 /* This might fail. What matters are the results. */
bab45044
LP
637 (void) fchown(fd, uid, -1);
638 (void) fchmod(fd, TTY_MODE);
02a51aba
LP
639
640 if (fstat(fd, &st) < 0)
641 return -errno;
642
d8b4e2e9 643 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
02a51aba
LP
644 return -EPERM;
645
646 return 0;
647}
648
7d5ceb64 649static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
650 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
651 int r;
80876c20 652
80876c20
LP
653 assert(_saved_stdin);
654 assert(_saved_stdout);
655
af6da548
LP
656 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
657 if (saved_stdin < 0)
658 return -errno;
80876c20 659
af6da548 660 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
661 if (saved_stdout < 0)
662 return -errno;
80876c20 663
7d5ceb64 664 fd = acquire_terminal(vc, false, false, false, DEFAULT_CONFIRM_USEC);
3d18b167
LP
665 if (fd < 0)
666 return fd;
80876c20 667
af6da548
LP
668 r = chown_terminal(fd, getuid());
669 if (r < 0)
3d18b167 670 return r;
02a51aba 671
3d18b167
LP
672 r = reset_terminal_fd(fd, true);
673 if (r < 0)
674 return r;
80876c20 675
3d18b167
LP
676 if (dup2(fd, STDIN_FILENO) < 0)
677 return -errno;
678
679 if (dup2(fd, STDOUT_FILENO) < 0)
680 return -errno;
80876c20
LP
681
682 if (fd >= 2)
03e334a1 683 safe_close(fd);
3d18b167 684 fd = -1;
80876c20
LP
685
686 *_saved_stdin = saved_stdin;
687 *_saved_stdout = saved_stdout;
688
3d18b167 689 saved_stdin = saved_stdout = -1;
80876c20 690
3d18b167 691 return 0;
80876c20
LP
692}
693
63d77c92 694static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
695 assert(err < 0);
696
697 if (err == -ETIMEDOUT)
63d77c92 698 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
699 else {
700 errno = -err;
63d77c92 701 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
702 }
703}
704
63d77c92 705static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 706 _cleanup_close_ int fd = -1;
80876c20 707
3b20f877 708 assert(vc);
80876c20 709
7d5ceb64 710 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 711 if (fd < 0)
3b20f877 712 return;
80876c20 713
63d77c92 714 write_confirm_error_fd(err, fd, u);
af6da548 715}
80876c20 716
3d18b167 717static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 718 int r = 0;
80876c20 719
af6da548
LP
720 assert(saved_stdin);
721 assert(saved_stdout);
722
723 release_terminal();
724
725 if (*saved_stdin >= 0)
80876c20 726 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 727 r = -errno;
80876c20 728
af6da548 729 if (*saved_stdout >= 0)
80876c20 730 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 731 r = -errno;
80876c20 732
3d18b167
LP
733 *saved_stdin = safe_close(*saved_stdin);
734 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
735
736 return r;
737}
738
3b20f877
FB
739enum {
740 CONFIRM_PRETEND_FAILURE = -1,
741 CONFIRM_PRETEND_SUCCESS = 0,
742 CONFIRM_EXECUTE = 1,
743};
744
eedf223a 745static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 746 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 747 _cleanup_free_ char *e = NULL;
3b20f877 748 char c;
af6da548 749
3b20f877 750 /* For any internal errors, assume a positive response. */
7d5ceb64 751 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 752 if (r < 0) {
63d77c92 753 write_confirm_error(r, vc, u);
3b20f877
FB
754 return CONFIRM_EXECUTE;
755 }
af6da548 756
b0eb2944
FB
757 /* confirm_spawn might have been disabled while we were sleeping. */
758 if (manager_is_confirm_spawn_disabled(u->manager)) {
759 r = 1;
760 goto restore_stdio;
761 }
af6da548 762
2bcd3c26
FB
763 e = ellipsize(cmdline, 60, 100);
764 if (!e) {
765 log_oom();
766 r = CONFIRM_EXECUTE;
767 goto restore_stdio;
768 }
af6da548 769
d172b175 770 for (;;) {
539622bd 771 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 772 if (r < 0) {
63d77c92 773 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
774 r = CONFIRM_EXECUTE;
775 goto restore_stdio;
776 }
af6da548 777
d172b175 778 switch (c) {
b0eb2944
FB
779 case 'c':
780 printf("Resuming normal execution.\n");
781 manager_disable_confirm_spawn();
782 r = 1;
783 break;
dd6f9ac0
FB
784 case 'D':
785 unit_dump(u, stdout, " ");
786 continue; /* ask again */
d172b175
FB
787 case 'f':
788 printf("Failing execution.\n");
789 r = CONFIRM_PRETEND_FAILURE;
790 break;
791 case 'h':
b0eb2944
FB
792 printf(" c - continue, proceed without asking anymore\n"
793 " D - dump, show the state of the unit\n"
dd6f9ac0 794 " f - fail, don't execute the command and pretend it failed\n"
d172b175 795 " h - help\n"
eedf223a 796 " i - info, show a short summary of the unit\n"
56fde33a 797 " j - jobs, show jobs that are in progress\n"
d172b175
FB
798 " s - skip, don't execute the command and pretend it succeeded\n"
799 " y - yes, execute the command\n");
dd6f9ac0 800 continue; /* ask again */
eedf223a
FB
801 case 'i':
802 printf(" Description: %s\n"
803 " Unit: %s\n"
804 " Command: %s\n",
805 u->id, u->description, cmdline);
806 continue; /* ask again */
56fde33a
FB
807 case 'j':
808 manager_dump_jobs(u->manager, stdout, " ");
809 continue; /* ask again */
539622bd
FB
810 case 'n':
811 /* 'n' was removed in favor of 'f'. */
812 printf("Didn't understand 'n', did you mean 'f'?\n");
813 continue; /* ask again */
d172b175
FB
814 case 's':
815 printf("Skipping execution.\n");
816 r = CONFIRM_PRETEND_SUCCESS;
817 break;
818 case 'y':
819 r = CONFIRM_EXECUTE;
820 break;
821 default:
822 assert_not_reached("Unhandled choice");
823 }
3b20f877 824 break;
3b20f877 825 }
af6da548 826
3b20f877 827restore_stdio:
af6da548 828 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 829 return r;
80876c20
LP
830}
831
4d885bd3
DH
832static int get_fixed_user(const ExecContext *c, const char **user,
833 uid_t *uid, gid_t *gid,
834 const char **home, const char **shell) {
81a2b7ce 835 int r;
4d885bd3 836 const char *name;
81a2b7ce 837
4d885bd3 838 assert(c);
81a2b7ce 839
23deef88
LP
840 if (!c->user)
841 return 0;
842
4d885bd3
DH
843 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
844 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 845
23deef88 846 name = c->user;
4d885bd3
DH
847 r = get_user_creds_clean(&name, uid, gid, home, shell);
848 if (r < 0)
849 return r;
81a2b7ce 850
4d885bd3
DH
851 *user = name;
852 return 0;
853}
854
855static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
856 int r;
857 const char *name;
858
859 assert(c);
860
861 if (!c->group)
862 return 0;
863
864 name = c->group;
865 r = get_group_creds(&name, gid);
866 if (r < 0)
867 return r;
868
869 *group = name;
870 return 0;
871}
872
cdc5d5c5
DH
873static int get_supplementary_groups(const ExecContext *c, const char *user,
874 const char *group, gid_t gid,
875 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
876 char **i;
877 int r, k = 0;
878 int ngroups_max;
879 bool keep_groups = false;
880 gid_t *groups = NULL;
881 _cleanup_free_ gid_t *l_gids = NULL;
882
883 assert(c);
884
bbeea271
DH
885 /*
886 * If user is given, then lookup GID and supplementary groups list.
887 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
888 * here and as early as possible so we keep the list of supplementary
889 * groups of the caller.
bbeea271
DH
890 */
891 if (user && gid_is_valid(gid) && gid != 0) {
892 /* First step, initialize groups from /etc/groups */
893 if (initgroups(user, gid) < 0)
894 return -errno;
895
896 keep_groups = true;
897 }
898
ac6e8be6 899 if (strv_isempty(c->supplementary_groups))
4d885bd3
DH
900 return 0;
901
366ddd25
DH
902 /*
903 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
904 * be positive, otherwise fail.
905 */
906 errno = 0;
907 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
908 if (ngroups_max <= 0) {
909 if (errno > 0)
910 return -errno;
911 else
912 return -EOPNOTSUPP; /* For all other values */
913 }
914
4d885bd3
DH
915 l_gids = new(gid_t, ngroups_max);
916 if (!l_gids)
917 return -ENOMEM;
81a2b7ce 918
4d885bd3
DH
919 if (keep_groups) {
920 /*
921 * Lookup the list of groups that the user belongs to, we
922 * avoid NSS lookups here too for gid=0.
923 */
924 k = ngroups_max;
925 if (getgrouplist(user, gid, l_gids, &k) < 0)
926 return -EINVAL;
927 } else
928 k = 0;
81a2b7ce 929
4d885bd3
DH
930 STRV_FOREACH(i, c->supplementary_groups) {
931 const char *g;
81a2b7ce 932
4d885bd3
DH
933 if (k >= ngroups_max)
934 return -E2BIG;
81a2b7ce 935
4d885bd3
DH
936 g = *i;
937 r = get_group_creds(&g, l_gids+k);
938 if (r < 0)
939 return r;
81a2b7ce 940
4d885bd3
DH
941 k++;
942 }
81a2b7ce 943
4d885bd3
DH
944 /*
945 * Sets ngids to zero to drop all supplementary groups, happens
946 * when we are under root and SupplementaryGroups= is empty.
947 */
948 if (k == 0) {
949 *ngids = 0;
950 return 0;
951 }
81a2b7ce 952
4d885bd3
DH
953 /* Otherwise get the final list of supplementary groups */
954 groups = memdup(l_gids, sizeof(gid_t) * k);
955 if (!groups)
956 return -ENOMEM;
957
958 *supplementary_gids = groups;
959 *ngids = k;
960
961 groups = NULL;
962
963 return 0;
964}
965
709dbeac 966static int enforce_groups(gid_t gid, gid_t *supplementary_gids, int ngids) {
4d885bd3
DH
967 int r;
968
709dbeac
YW
969 /* Handle SupplementaryGroups= if it is not empty */
970 if (ngids > 0) {
4d885bd3
DH
971 r = maybe_setgroups(ngids, supplementary_gids);
972 if (r < 0)
97f0e76f 973 return r;
4d885bd3 974 }
81a2b7ce 975
4d885bd3
DH
976 if (gid_is_valid(gid)) {
977 /* Then set our gids */
978 if (setresgid(gid, gid, gid) < 0)
979 return -errno;
81a2b7ce
LP
980 }
981
982 return 0;
983}
984
985static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
986 assert(context);
987
4d885bd3
DH
988 if (!uid_is_valid(uid))
989 return 0;
990
479050b3 991 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
992 * capabilities while doing so. */
993
479050b3 994 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
995
996 /* First step: If we need to keep capabilities but
997 * drop privileges we need to make sure we keep our
cbb21cca 998 * caps, while we drop privileges. */
693ced48 999 if (uid != 0) {
cbb21cca 1000 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1001
1002 if (prctl(PR_GET_SECUREBITS) != sb)
1003 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1004 return -errno;
1005 }
81a2b7ce
LP
1006 }
1007
479050b3 1008 /* Second step: actually set the uids */
81a2b7ce
LP
1009 if (setresuid(uid, uid, uid) < 0)
1010 return -errno;
1011
1012 /* At this point we should have all necessary capabilities but
1013 are otherwise a normal user. However, the caps might got
1014 corrupted due to the setresuid() so we need clean them up
1015 later. This is done outside of this call. */
1016
1017 return 0;
1018}
1019
349cc4a5 1020#if HAVE_PAM
5b6319dc
LP
1021
1022static int null_conv(
1023 int num_msg,
1024 const struct pam_message **msg,
1025 struct pam_response **resp,
1026 void *appdata_ptr) {
1027
1028 /* We don't support conversations */
1029
1030 return PAM_CONV_ERR;
1031}
1032
cefc33ae
LP
1033#endif
1034
5b6319dc
LP
1035static int setup_pam(
1036 const char *name,
1037 const char *user,
940c5210 1038 uid_t uid,
2d6fce8d 1039 gid_t gid,
5b6319dc 1040 const char *tty,
2065ca69 1041 char ***env,
5b6319dc
LP
1042 int fds[], unsigned n_fds) {
1043
349cc4a5 1044#if HAVE_PAM
cefc33ae 1045
5b6319dc
LP
1046 static const struct pam_conv conv = {
1047 .conv = null_conv,
1048 .appdata_ptr = NULL
1049 };
1050
2d7c6aa2 1051 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1052 pam_handle_t *handle = NULL;
d6e5f3ad 1053 sigset_t old_ss;
7bb70b6e 1054 int pam_code = PAM_SUCCESS, r;
84eada2f 1055 char **nv, **e = NULL;
5b6319dc
LP
1056 bool close_session = false;
1057 pid_t pam_pid = 0, parent_pid;
970edce6 1058 int flags = 0;
5b6319dc
LP
1059
1060 assert(name);
1061 assert(user);
2065ca69 1062 assert(env);
5b6319dc
LP
1063
1064 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1065 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1066 * systemd via the cgroup logic. It will then remove the PAM
1067 * session again. The parent process will exec() the actual
1068 * daemon. We do things this way to ensure that the main PID
1069 * of the daemon is the one we initially fork()ed. */
1070
7bb70b6e
LP
1071 r = barrier_create(&barrier);
1072 if (r < 0)
2d7c6aa2
DH
1073 goto fail;
1074
553d2243 1075 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1076 flags |= PAM_SILENT;
1077
f546241b
ZJS
1078 pam_code = pam_start(name, user, &conv, &handle);
1079 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1080 handle = NULL;
1081 goto fail;
1082 }
1083
f546241b
ZJS
1084 if (tty) {
1085 pam_code = pam_set_item(handle, PAM_TTY, tty);
1086 if (pam_code != PAM_SUCCESS)
5b6319dc 1087 goto fail;
f546241b 1088 }
5b6319dc 1089
84eada2f
JW
1090 STRV_FOREACH(nv, *env) {
1091 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1092 if (pam_code != PAM_SUCCESS)
1093 goto fail;
1094 }
1095
970edce6 1096 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1097 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1098 goto fail;
1099
970edce6 1100 pam_code = pam_open_session(handle, flags);
f546241b 1101 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1102 goto fail;
1103
1104 close_session = true;
1105
f546241b
ZJS
1106 e = pam_getenvlist(handle);
1107 if (!e) {
5b6319dc
LP
1108 pam_code = PAM_BUF_ERR;
1109 goto fail;
1110 }
1111
1112 /* Block SIGTERM, so that we know that it won't get lost in
1113 * the child */
ce30c8dc 1114
72c0a2c2 1115 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1116
df0ff127 1117 parent_pid = getpid_cached();
5b6319dc 1118
f546241b 1119 pam_pid = fork();
7bb70b6e
LP
1120 if (pam_pid < 0) {
1121 r = -errno;
5b6319dc 1122 goto fail;
7bb70b6e 1123 }
5b6319dc
LP
1124
1125 if (pam_pid == 0) {
7bb70b6e 1126 int sig, ret = EXIT_PAM;
5b6319dc
LP
1127
1128 /* The child's job is to reset the PAM session on
1129 * termination */
2d7c6aa2 1130 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc
LP
1131
1132 /* This string must fit in 10 chars (i.e. the length
5d6b1584
LP
1133 * of "/sbin/init"), to look pretty in /bin/ps */
1134 rename_process("(sd-pam)");
5b6319dc
LP
1135
1136 /* Make sure we don't keep open the passed fds in this
1137 child. We assume that otherwise only those fds are
1138 open here that have been opened by PAM. */
1139 close_many(fds, n_fds);
1140
940c5210
AK
1141 /* Drop privileges - we don't need any to pam_close_session
1142 * and this will make PR_SET_PDEATHSIG work in most cases.
1143 * If this fails, ignore the error - but expect sd-pam threads
1144 * to fail to exit normally */
2d6fce8d 1145
97f0e76f
LP
1146 r = maybe_setgroups(0, NULL);
1147 if (r < 0)
1148 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1149 if (setresgid(gid, gid, gid) < 0)
1150 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1151 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1152 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1153
ce30c8dc
LP
1154 (void) ignore_signals(SIGPIPE, -1);
1155
940c5210
AK
1156 /* Wait until our parent died. This will only work if
1157 * the above setresuid() succeeds, otherwise the kernel
1158 * will not allow unprivileged parents kill their privileged
1159 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1160 * to do the rest for us. */
1161 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1162 goto child_finish;
1163
2d7c6aa2
DH
1164 /* Tell the parent that our setup is done. This is especially
1165 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1166 * setup might race against our setresuid(2) call.
1167 *
1168 * If the parent aborted, we'll detect this below, hence ignore
1169 * return failure here. */
1170 (void) barrier_place(&barrier);
2d7c6aa2 1171
643f4706 1172 /* Check if our parent process might already have died? */
5b6319dc 1173 if (getppid() == parent_pid) {
d6e5f3ad
DM
1174 sigset_t ss;
1175
1176 assert_se(sigemptyset(&ss) >= 0);
1177 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1178
3dead8d9
LP
1179 for (;;) {
1180 if (sigwait(&ss, &sig) < 0) {
1181 if (errno == EINTR)
1182 continue;
1183
1184 goto child_finish;
1185 }
5b6319dc 1186
3dead8d9
LP
1187 assert(sig == SIGTERM);
1188 break;
1189 }
5b6319dc
LP
1190 }
1191
3dead8d9 1192 /* If our parent died we'll end the session */
f546241b 1193 if (getppid() != parent_pid) {
970edce6 1194 pam_code = pam_close_session(handle, flags);
f546241b 1195 if (pam_code != PAM_SUCCESS)
5b6319dc 1196 goto child_finish;
f546241b 1197 }
5b6319dc 1198
7bb70b6e 1199 ret = 0;
5b6319dc
LP
1200
1201 child_finish:
970edce6 1202 pam_end(handle, pam_code | flags);
7bb70b6e 1203 _exit(ret);
5b6319dc
LP
1204 }
1205
2d7c6aa2
DH
1206 barrier_set_role(&barrier, BARRIER_PARENT);
1207
5b6319dc
LP
1208 /* If the child was forked off successfully it will do all the
1209 * cleanups, so forget about the handle here. */
1210 handle = NULL;
1211
3b8bddde 1212 /* Unblock SIGTERM again in the parent */
72c0a2c2 1213 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1214
1215 /* We close the log explicitly here, since the PAM modules
1216 * might have opened it, but we don't want this fd around. */
1217 closelog();
1218
2d7c6aa2
DH
1219 /* Synchronously wait for the child to initialize. We don't care for
1220 * errors as we cannot recover. However, warn loudly if it happens. */
1221 if (!barrier_place_and_sync(&barrier))
1222 log_error("PAM initialization failed");
1223
2065ca69
JW
1224 strv_free(*env);
1225 *env = e;
aa87e624 1226
5b6319dc
LP
1227 return 0;
1228
1229fail:
970edce6
ZJS
1230 if (pam_code != PAM_SUCCESS) {
1231 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1232 r = -EPERM; /* PAM errors do not map to errno */
1233 } else
1234 log_error_errno(r, "PAM failed: %m");
9ba35398 1235
5b6319dc
LP
1236 if (handle) {
1237 if (close_session)
970edce6 1238 pam_code = pam_close_session(handle, flags);
5b6319dc 1239
970edce6 1240 pam_end(handle, pam_code | flags);
5b6319dc
LP
1241 }
1242
1243 strv_free(e);
5b6319dc
LP
1244 closelog();
1245
7bb70b6e 1246 return r;
cefc33ae
LP
1247#else
1248 return 0;
5b6319dc 1249#endif
cefc33ae 1250}
5b6319dc 1251
5d6b1584
LP
1252static void rename_process_from_path(const char *path) {
1253 char process_name[11];
1254 const char *p;
1255 size_t l;
1256
1257 /* This resulting string must fit in 10 chars (i.e. the length
1258 * of "/sbin/init") to look pretty in /bin/ps */
1259
2b6bf07d 1260 p = basename(path);
5d6b1584
LP
1261 if (isempty(p)) {
1262 rename_process("(...)");
1263 return;
1264 }
1265
1266 l = strlen(p);
1267 if (l > 8) {
1268 /* The end of the process name is usually more
1269 * interesting, since the first bit might just be
1270 * "systemd-" */
1271 p = p + l - 8;
1272 l = 8;
1273 }
1274
1275 process_name[0] = '(';
1276 memcpy(process_name+1, p, l);
1277 process_name[1+l] = ')';
1278 process_name[1+l+1] = 0;
1279
1280 rename_process(process_name);
1281}
1282
469830d1
LP
1283static bool context_has_address_families(const ExecContext *c) {
1284 assert(c);
1285
1286 return c->address_families_whitelist ||
1287 !set_isempty(c->address_families);
1288}
1289
1290static bool context_has_syscall_filters(const ExecContext *c) {
1291 assert(c);
1292
1293 return c->syscall_whitelist ||
8cfa775f 1294 !hashmap_isempty(c->syscall_filter);
469830d1
LP
1295}
1296
1297static bool context_has_no_new_privileges(const ExecContext *c) {
1298 assert(c);
1299
1300 if (c->no_new_privileges)
1301 return true;
1302
1303 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1304 return false;
1305
1306 /* We need NNP if we have any form of seccomp and are unprivileged */
1307 return context_has_address_families(c) ||
1308 c->memory_deny_write_execute ||
1309 c->restrict_realtime ||
1310 exec_context_restrict_namespaces_set(c) ||
1311 c->protect_kernel_tunables ||
1312 c->protect_kernel_modules ||
1313 c->private_devices ||
1314 context_has_syscall_filters(c) ||
78e864e5
TM
1315 !set_isempty(c->syscall_archs) ||
1316 c->lock_personality;
469830d1
LP
1317}
1318
349cc4a5 1319#if HAVE_SECCOMP
17df7223 1320
83f12b27 1321static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1322
1323 if (is_seccomp_available())
1324 return false;
1325
f673b62d 1326 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1327 return true;
83f12b27
FS
1328}
1329
165a31c0 1330static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1331 uint32_t negative_action, default_action, action;
165a31c0 1332 int r;
8351ceae 1333
469830d1 1334 assert(u);
c0467cf3 1335 assert(c);
8351ceae 1336
469830d1 1337 if (!context_has_syscall_filters(c))
83f12b27
FS
1338 return 0;
1339
469830d1
LP
1340 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1341 return 0;
e9642be2 1342
469830d1 1343 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1344
469830d1
LP
1345 if (c->syscall_whitelist) {
1346 default_action = negative_action;
1347 action = SCMP_ACT_ALLOW;
7c66bae2 1348 } else {
469830d1
LP
1349 default_action = SCMP_ACT_ALLOW;
1350 action = negative_action;
57183d11 1351 }
8351ceae 1352
165a31c0
LP
1353 if (needs_ambient_hack) {
1354 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1355 if (r < 0)
1356 return r;
1357 }
1358
469830d1 1359 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
4298d0b5
LP
1360}
1361
469830d1
LP
1362static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1363 assert(u);
4298d0b5
LP
1364 assert(c);
1365
469830d1 1366 if (set_isempty(c->syscall_archs))
83f12b27
FS
1367 return 0;
1368
469830d1
LP
1369 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1370 return 0;
4298d0b5 1371
469830d1
LP
1372 return seccomp_restrict_archs(c->syscall_archs);
1373}
4298d0b5 1374
469830d1
LP
1375static int apply_address_families(const Unit* u, const ExecContext *c) {
1376 assert(u);
1377 assert(c);
4298d0b5 1378
469830d1
LP
1379 if (!context_has_address_families(c))
1380 return 0;
4298d0b5 1381
469830d1
LP
1382 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1383 return 0;
4298d0b5 1384
469830d1 1385 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1386}
4298d0b5 1387
83f12b27 1388static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1389 assert(u);
f3e43635
TM
1390 assert(c);
1391
469830d1 1392 if (!c->memory_deny_write_execute)
83f12b27
FS
1393 return 0;
1394
469830d1
LP
1395 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1396 return 0;
f3e43635 1397
469830d1 1398 return seccomp_memory_deny_write_execute();
f3e43635
TM
1399}
1400
83f12b27 1401static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1402 assert(u);
f4170c67
LP
1403 assert(c);
1404
469830d1 1405 if (!c->restrict_realtime)
83f12b27
FS
1406 return 0;
1407
469830d1
LP
1408 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1409 return 0;
f4170c67 1410
469830d1 1411 return seccomp_restrict_realtime();
f4170c67
LP
1412}
1413
59e856c7 1414static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1415 assert(u);
59eeb84b
LP
1416 assert(c);
1417
1418 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1419 * let's protect even those systems where this is left on in the kernel. */
1420
469830d1 1421 if (!c->protect_kernel_tunables)
59eeb84b
LP
1422 return 0;
1423
469830d1
LP
1424 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1425 return 0;
59eeb84b 1426
469830d1 1427 return seccomp_protect_sysctl();
59eeb84b
LP
1428}
1429
59e856c7 1430static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1431 assert(u);
502d704e
DH
1432 assert(c);
1433
25a8d8a0 1434 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1435
469830d1
LP
1436 if (!c->protect_kernel_modules)
1437 return 0;
1438
502d704e
DH
1439 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1440 return 0;
1441
469830d1 1442 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
502d704e
DH
1443}
1444
59e856c7 1445static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1446 assert(u);
ba128bb8
LP
1447 assert(c);
1448
8f81a5f6 1449 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1450
469830d1
LP
1451 if (!c->private_devices)
1452 return 0;
1453
ba128bb8
LP
1454 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1455 return 0;
1456
469830d1 1457 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
ba128bb8
LP
1458}
1459
add00535 1460static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
469830d1 1461 assert(u);
add00535
LP
1462 assert(c);
1463
1464 if (!exec_context_restrict_namespaces_set(c))
1465 return 0;
1466
1467 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1468 return 0;
1469
1470 return seccomp_restrict_namespaces(c->restrict_namespaces);
1471}
1472
78e864e5 1473static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1474 unsigned long personality;
1475 int r;
78e864e5
TM
1476
1477 assert(u);
1478 assert(c);
1479
1480 if (!c->lock_personality)
1481 return 0;
1482
1483 if (skip_seccomp_unavailable(u, "LockPersonality="))
1484 return 0;
1485
e8132d63
LP
1486 personality = c->personality;
1487
1488 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1489 if (personality == PERSONALITY_INVALID) {
1490
1491 r = opinionated_personality(&personality);
1492 if (r < 0)
1493 return r;
1494 }
78e864e5
TM
1495
1496 return seccomp_lock_personality(personality);
1497}
1498
c0467cf3 1499#endif
8351ceae 1500
31a7eb86
ZJS
1501static void do_idle_pipe_dance(int idle_pipe[4]) {
1502 assert(idle_pipe);
1503
54eb2300
LP
1504 idle_pipe[1] = safe_close(idle_pipe[1]);
1505 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1506
1507 if (idle_pipe[0] >= 0) {
1508 int r;
1509
1510 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1511
1512 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1513 ssize_t n;
1514
31a7eb86 1515 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1516 n = write(idle_pipe[3], "x", 1);
1517 if (n > 0)
cd972d69
ZJS
1518 /* Wait for systemd to react to the signal above. */
1519 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1520 }
1521
54eb2300 1522 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1523
1524 }
1525
54eb2300 1526 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1527}
1528
7cae38c4 1529static int build_environment(
fd63e712 1530 Unit *u,
9fa95f85 1531 const ExecContext *c,
1e22b5cd 1532 const ExecParameters *p,
7cae38c4
LP
1533 unsigned n_fds,
1534 const char *home,
1535 const char *username,
1536 const char *shell,
7bce046b
LP
1537 dev_t journal_stream_dev,
1538 ino_t journal_stream_ino,
7cae38c4
LP
1539 char ***ret) {
1540
1541 _cleanup_strv_free_ char **our_env = NULL;
1542 unsigned n_env = 0;
1543 char *x;
1544
4b58153d 1545 assert(u);
7cae38c4
LP
1546 assert(c);
1547 assert(ret);
1548
4b58153d 1549 our_env = new0(char*, 14);
7cae38c4
LP
1550 if (!our_env)
1551 return -ENOMEM;
1552
1553 if (n_fds > 0) {
8dd4c05b
LP
1554 _cleanup_free_ char *joined = NULL;
1555
df0ff127 1556 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1557 return -ENOMEM;
1558 our_env[n_env++] = x;
1559
1560 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1561 return -ENOMEM;
1562 our_env[n_env++] = x;
8dd4c05b 1563
1e22b5cd 1564 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1565 if (!joined)
1566 return -ENOMEM;
1567
605405c6 1568 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1569 if (!x)
1570 return -ENOMEM;
1571 our_env[n_env++] = x;
7cae38c4
LP
1572 }
1573
b08af3b1 1574 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1575 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1576 return -ENOMEM;
1577 our_env[n_env++] = x;
1578
1e22b5cd 1579 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1580 return -ENOMEM;
1581 our_env[n_env++] = x;
1582 }
1583
fd63e712
LP
1584 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1585 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1586 * check the database directly. */
ac647978 1587 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1588 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1589 if (!x)
1590 return -ENOMEM;
1591 our_env[n_env++] = x;
1592 }
1593
7cae38c4
LP
1594 if (home) {
1595 x = strappend("HOME=", home);
1596 if (!x)
1597 return -ENOMEM;
1598 our_env[n_env++] = x;
1599 }
1600
1601 if (username) {
1602 x = strappend("LOGNAME=", username);
1603 if (!x)
1604 return -ENOMEM;
1605 our_env[n_env++] = x;
1606
1607 x = strappend("USER=", username);
1608 if (!x)
1609 return -ENOMEM;
1610 our_env[n_env++] = x;
1611 }
1612
1613 if (shell) {
1614 x = strappend("SHELL=", shell);
1615 if (!x)
1616 return -ENOMEM;
1617 our_env[n_env++] = x;
1618 }
1619
4b58153d
LP
1620 if (!sd_id128_is_null(u->invocation_id)) {
1621 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1622 return -ENOMEM;
1623
1624 our_env[n_env++] = x;
1625 }
1626
6af760f3
LP
1627 if (exec_context_needs_term(c)) {
1628 const char *tty_path, *term = NULL;
1629
1630 tty_path = exec_context_tty_path(c);
1631
1632 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1633 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1634 * passes to PID 1 ends up all the way in the console login shown. */
1635
1636 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1637 term = getenv("TERM");
1638 if (!term)
1639 term = default_term_for_tty(tty_path);
7cae38c4 1640
6af760f3 1641 x = strappend("TERM=", term);
7cae38c4
LP
1642 if (!x)
1643 return -ENOMEM;
1644 our_env[n_env++] = x;
1645 }
1646
7bce046b
LP
1647 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1648 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1649 return -ENOMEM;
1650
1651 our_env[n_env++] = x;
1652 }
1653
7cae38c4 1654 our_env[n_env++] = NULL;
7bce046b 1655 assert(n_env <= 12);
7cae38c4
LP
1656
1657 *ret = our_env;
1658 our_env = NULL;
1659
1660 return 0;
1661}
1662
b4c14404
FB
1663static int build_pass_environment(const ExecContext *c, char ***ret) {
1664 _cleanup_strv_free_ char **pass_env = NULL;
1665 size_t n_env = 0, n_bufsize = 0;
1666 char **i;
1667
1668 STRV_FOREACH(i, c->pass_environment) {
1669 _cleanup_free_ char *x = NULL;
1670 char *v;
1671
1672 v = getenv(*i);
1673 if (!v)
1674 continue;
605405c6 1675 x = strjoin(*i, "=", v);
b4c14404
FB
1676 if (!x)
1677 return -ENOMEM;
00819cc1 1678
b4c14404
FB
1679 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1680 return -ENOMEM;
00819cc1 1681
b4c14404
FB
1682 pass_env[n_env++] = x;
1683 pass_env[n_env] = NULL;
1684 x = NULL;
1685 }
1686
1687 *ret = pass_env;
1688 pass_env = NULL;
1689
1690 return 0;
1691}
1692
8b44a3d2
LP
1693static bool exec_needs_mount_namespace(
1694 const ExecContext *context,
1695 const ExecParameters *params,
1696 ExecRuntime *runtime) {
1697
1698 assert(context);
1699 assert(params);
1700
915e6d16
LP
1701 if (context->root_image)
1702 return true;
1703
2a624c36
AP
1704 if (!strv_isempty(context->read_write_paths) ||
1705 !strv_isempty(context->read_only_paths) ||
1706 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1707 return true;
1708
652bb263
YW
1709 if (context->n_bind_mounts > 0 ||
1710 !strv_isempty(context->directories[EXEC_DIRECTORY_RUNTIME].paths) ||
1711 !strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
1712 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1713 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths) ||
1714 !strv_isempty(context->directories[EXEC_DIRECTORY_CONFIGURATION].paths))
d2d6c096
LP
1715 return true;
1716
8b44a3d2
LP
1717 if (context->mount_flags != 0)
1718 return true;
1719
1720 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1721 return true;
1722
8b44a3d2
LP
1723 if (context->private_devices ||
1724 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1725 context->protect_home != PROTECT_HOME_NO ||
1726 context->protect_kernel_tunables ||
c575770b 1727 context->protect_kernel_modules ||
59eeb84b 1728 context->protect_control_groups)
8b44a3d2
LP
1729 return true;
1730
9c988f93 1731 if (context->mount_apivfs && (context->root_image || context->root_directory))
5d997827
LP
1732 return true;
1733
8b44a3d2
LP
1734 return false;
1735}
1736
d251207d
LP
1737static int setup_private_users(uid_t uid, gid_t gid) {
1738 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1739 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1740 _cleanup_close_ int unshare_ready_fd = -1;
1741 _cleanup_(sigkill_waitp) pid_t pid = 0;
1742 uint64_t c = 1;
1743 siginfo_t si;
1744 ssize_t n;
1745 int r;
1746
1747 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1748 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1749 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1750 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1751 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1752 * continues execution normally. */
1753
587ab01b
ZJS
1754 if (uid != 0 && uid_is_valid(uid)) {
1755 r = asprintf(&uid_map,
1756 "0 0 1\n" /* Map root → root */
1757 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1758 uid, uid);
1759 if (r < 0)
1760 return -ENOMEM;
1761 } else {
e0f3720e 1762 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1763 if (!uid_map)
1764 return -ENOMEM;
1765 }
d251207d 1766
587ab01b
ZJS
1767 if (gid != 0 && gid_is_valid(gid)) {
1768 r = asprintf(&gid_map,
1769 "0 0 1\n" /* Map root → root */
1770 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1771 gid, gid);
1772 if (r < 0)
1773 return -ENOMEM;
1774 } else {
d251207d 1775 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1776 if (!gid_map)
1777 return -ENOMEM;
1778 }
d251207d
LP
1779
1780 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1781 * namespace. */
1782 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1783 if (unshare_ready_fd < 0)
1784 return -errno;
1785
1786 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1787 * failed. */
1788 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1789 return -errno;
1790
1791 pid = fork();
1792 if (pid < 0)
1793 return -errno;
1794
1795 if (pid == 0) {
1796 _cleanup_close_ int fd = -1;
1797 const char *a;
1798 pid_t ppid;
1799
1800 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1801 * here, after the parent opened its own user namespace. */
1802
1803 ppid = getppid();
1804 errno_pipe[0] = safe_close(errno_pipe[0]);
1805
1806 /* Wait until the parent unshared the user namespace */
1807 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1808 r = -errno;
1809 goto child_fail;
1810 }
1811
1812 /* Disable the setgroups() system call in the child user namespace, for good. */
1813 a = procfs_file_alloca(ppid, "setgroups");
1814 fd = open(a, O_WRONLY|O_CLOEXEC);
1815 if (fd < 0) {
1816 if (errno != ENOENT) {
1817 r = -errno;
1818 goto child_fail;
1819 }
1820
1821 /* If the file is missing the kernel is too old, let's continue anyway. */
1822 } else {
1823 if (write(fd, "deny\n", 5) < 0) {
1824 r = -errno;
1825 goto child_fail;
1826 }
1827
1828 fd = safe_close(fd);
1829 }
1830
1831 /* First write the GID map */
1832 a = procfs_file_alloca(ppid, "gid_map");
1833 fd = open(a, O_WRONLY|O_CLOEXEC);
1834 if (fd < 0) {
1835 r = -errno;
1836 goto child_fail;
1837 }
1838 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1839 r = -errno;
1840 goto child_fail;
1841 }
1842 fd = safe_close(fd);
1843
1844 /* The write the UID map */
1845 a = procfs_file_alloca(ppid, "uid_map");
1846 fd = open(a, O_WRONLY|O_CLOEXEC);
1847 if (fd < 0) {
1848 r = -errno;
1849 goto child_fail;
1850 }
1851 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1852 r = -errno;
1853 goto child_fail;
1854 }
1855
1856 _exit(EXIT_SUCCESS);
1857
1858 child_fail:
1859 (void) write(errno_pipe[1], &r, sizeof(r));
1860 _exit(EXIT_FAILURE);
1861 }
1862
1863 errno_pipe[1] = safe_close(errno_pipe[1]);
1864
1865 if (unshare(CLONE_NEWUSER) < 0)
1866 return -errno;
1867
1868 /* Let the child know that the namespace is ready now */
1869 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1870 return -errno;
1871
1872 /* Try to read an error code from the child */
1873 n = read(errno_pipe[0], &r, sizeof(r));
1874 if (n < 0)
1875 return -errno;
1876 if (n == sizeof(r)) { /* an error code was sent to us */
1877 if (r < 0)
1878 return r;
1879 return -EIO;
1880 }
1881 if (n != 0) /* on success we should have read 0 bytes */
1882 return -EIO;
1883
1884 r = wait_for_terminate(pid, &si);
1885 if (r < 0)
1886 return r;
1887 pid = 0;
1888
1889 /* If something strange happened with the child, let's consider this fatal, too */
1890 if (si.si_code != CLD_EXITED || si.si_status != 0)
1891 return -EIO;
1892
1893 return 0;
1894}
1895
3536f49e 1896static int setup_exec_directory(
07689d5d
LP
1897 const ExecContext *context,
1898 const ExecParameters *params,
1899 uid_t uid,
3536f49e 1900 gid_t gid,
3536f49e
YW
1901 ExecDirectoryType type,
1902 int *exit_status) {
07689d5d 1903
72fd1768 1904 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
1905 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
1906 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
1907 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
1908 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
1909 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
1910 };
07689d5d
LP
1911 char **rt;
1912 int r;
1913
1914 assert(context);
1915 assert(params);
72fd1768 1916 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
3536f49e 1917 assert(exit_status);
07689d5d 1918
3536f49e
YW
1919 if (!params->prefix[type])
1920 return 0;
1921
8679efde 1922 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
1923 if (!uid_is_valid(uid))
1924 uid = 0;
1925 if (!gid_is_valid(gid))
1926 gid = 0;
1927 }
1928
1929 STRV_FOREACH(rt, context->directories[type].paths) {
6c47cd7d
LP
1930 _cleanup_free_ char *p = NULL, *pp = NULL;
1931 const char *effective;
07689d5d 1932
3536f49e
YW
1933 p = strjoin(params->prefix[type], "/", *rt);
1934 if (!p) {
1935 r = -ENOMEM;
1936 goto fail;
1937 }
07689d5d 1938
23a7448e
YW
1939 r = mkdir_parents_label(p, 0755);
1940 if (r < 0)
3536f49e 1941 goto fail;
23a7448e 1942
8092a48c
YW
1943 if (context->dynamic_user &&
1944 !IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
6c47cd7d
LP
1945 _cleanup_free_ char *private_root = NULL, *relative = NULL, *parent = NULL;
1946
1947 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
1948 * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
1949 * whose UID is later on reused. To lock this down we use the same trick used by container
1950 * managers to prohibit host users to get access to files of the same UID in containers: we
1951 * place everything inside a directory that has an access mode of 0700 and is owned root:root,
1952 * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
1953 * to make this directory permeable for the service itself.
1954 *
1955 * Specifically: for a service which wants a special directory "foo/" we first create a
1956 * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
1957 * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
1958 * privileged host users can access "foo/" as usual, but unprivileged host users can't look
1959 * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
1960 * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
1961 * disabling the access boundary for the service and making sure it only gets access to the
1962 * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
1963 *
1964 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
8092a48c
YW
1965 * owned by the service itself.
1966 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
1967 * files or sockets with other services. */
6c47cd7d
LP
1968
1969 private_root = strjoin(params->prefix[type], "/private");
1970 if (!private_root) {
1971 r = -ENOMEM;
1972 goto fail;
1973 }
1974
1975 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
c31ad024 1976 r = mkdir_safe_label(private_root, 0700, 0, 0, false);
6c47cd7d
LP
1977 if (r < 0)
1978 goto fail;
1979
1980 pp = strjoin(private_root, "/", *rt);
1981 if (!pp) {
1982 r = -ENOMEM;
1983 goto fail;
1984 }
1985
1986 /* Create all directories between the configured directory and this private root, and mark them 0755 */
1987 r = mkdir_parents_label(pp, 0755);
1988 if (r < 0)
1989 goto fail;
1990
1991 /* Finally, create the actual directory for the service */
1992 r = mkdir_label(pp, context->directories[type].mode);
1993 if (r < 0 && r != -EEXIST)
1994 goto fail;
1995
1996 parent = dirname_malloc(p);
1997 if (!parent) {
1998 r = -ENOMEM;
1999 goto fail;
2000 }
2001
2002 r = path_make_relative(parent, pp, &relative);
2003 if (r < 0)
2004 goto fail;
2005
2006 /* And link it up from the original place */
2007 r = symlink_idempotent(relative, p);
2008 if (r < 0)
2009 goto fail;
2010
2011 effective = pp;
2012
2013 } else {
2014 r = mkdir_label(p, context->directories[type].mode);
2015 if (r < 0 && r != -EEXIST)
2016 goto fail;
2017
2018 effective = p;
2019 }
a1164ae3
LP
2020
2021 /* First lock down the access mode */
6c47cd7d 2022 if (chmod(effective, context->directories[type].mode) < 0) {
a1164ae3 2023 r = -errno;
3536f49e 2024 goto fail;
a1164ae3 2025 }
07689d5d 2026
c71b2eb7
LP
2027 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
2028 * a service, and shall not be writable. */
2029 if (type == EXEC_DIRECTORY_CONFIGURATION)
2030 continue;
2031
a1164ae3 2032 /* Then, change the ownership of the whole tree, if necessary */
6c47cd7d 2033 r = path_chown_recursive(effective, uid, gid);
07689d5d 2034 if (r < 0)
3536f49e 2035 goto fail;
07689d5d
LP
2036 }
2037
2038 return 0;
3536f49e
YW
2039
2040fail:
2041 *exit_status = exit_status_table[type];
3536f49e 2042 return r;
07689d5d
LP
2043}
2044
cefc33ae
LP
2045static int setup_smack(
2046 const ExecContext *context,
2047 const ExecCommand *command) {
2048
cefc33ae
LP
2049 int r;
2050
2051 assert(context);
2052 assert(command);
2053
cefc33ae
LP
2054 if (context->smack_process_label) {
2055 r = mac_smack_apply_pid(0, context->smack_process_label);
2056 if (r < 0)
2057 return r;
2058 }
2059#ifdef SMACK_DEFAULT_PROCESS_LABEL
2060 else {
2061 _cleanup_free_ char *exec_label = NULL;
2062
2063 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
4c701096 2064 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
cefc33ae
LP
2065 return r;
2066
2067 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2068 if (r < 0)
2069 return r;
2070 }
cefc33ae
LP
2071#endif
2072
2073 return 0;
2074}
2075
6c47cd7d
LP
2076static int compile_bind_mounts(
2077 const ExecContext *context,
2078 const ExecParameters *params,
2079 BindMount **ret_bind_mounts,
2080 unsigned *ret_n_bind_mounts,
2081 char ***ret_empty_directories) {
2082
2083 _cleanup_strv_free_ char **empty_directories = NULL;
2084 BindMount *bind_mounts;
2085 unsigned n, h = 0, i;
2086 ExecDirectoryType t;
2087 int r;
2088
2089 assert(context);
2090 assert(params);
2091 assert(ret_bind_mounts);
2092 assert(ret_n_bind_mounts);
2093 assert(ret_empty_directories);
2094
2095 n = context->n_bind_mounts;
2096 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2097 if (!params->prefix[t])
2098 continue;
2099
2100 n += strv_length(context->directories[t].paths);
2101 }
2102
2103 if (n <= 0) {
2104 *ret_bind_mounts = NULL;
2105 *ret_n_bind_mounts = 0;
2106 *ret_empty_directories = NULL;
2107 return 0;
2108 }
2109
2110 bind_mounts = new(BindMount, n);
2111 if (!bind_mounts)
2112 return -ENOMEM;
2113
a8cabc61 2114 for (i = 0; i < context->n_bind_mounts; i++) {
6c47cd7d
LP
2115 BindMount *item = context->bind_mounts + i;
2116 char *s, *d;
2117
2118 s = strdup(item->source);
2119 if (!s) {
2120 r = -ENOMEM;
2121 goto finish;
2122 }
2123
2124 d = strdup(item->destination);
2125 if (!d) {
2126 free(s);
2127 r = -ENOMEM;
2128 goto finish;
2129 }
2130
2131 bind_mounts[h++] = (BindMount) {
2132 .source = s,
2133 .destination = d,
2134 .read_only = item->read_only,
2135 .recursive = item->recursive,
2136 .ignore_enoent = item->ignore_enoent,
2137 };
2138 }
2139
2140 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2141 char **suffix;
2142
2143 if (!params->prefix[t])
2144 continue;
2145
2146 if (strv_isempty(context->directories[t].paths))
2147 continue;
2148
8092a48c
YW
2149 if (context->dynamic_user &&
2150 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
6c47cd7d
LP
2151 char *private_root;
2152
2153 /* So this is for a dynamic user, and we need to make sure the process can access its own
2154 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2155 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2156
2157 private_root = strjoin(params->prefix[t], "/private");
2158 if (!private_root) {
2159 r = -ENOMEM;
2160 goto finish;
2161 }
2162
2163 r = strv_consume(&empty_directories, private_root);
2164 if (r < 0) {
2165 r = -ENOMEM;
2166 goto finish;
2167 }
2168 }
2169
2170 STRV_FOREACH(suffix, context->directories[t].paths) {
2171 char *s, *d;
2172
8092a48c
YW
2173 if (context->dynamic_user &&
2174 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
6c47cd7d
LP
2175 s = strjoin(params->prefix[t], "/private/", *suffix);
2176 else
2177 s = strjoin(params->prefix[t], "/", *suffix);
2178 if (!s) {
2179 r = -ENOMEM;
2180 goto finish;
2181 }
2182
2183 d = strdup(s);
2184 if (!d) {
2185 free(s);
2186 r = -ENOMEM;
2187 goto finish;
2188 }
2189
2190 bind_mounts[h++] = (BindMount) {
2191 .source = s,
2192 .destination = d,
2193 .read_only = false,
2194 .recursive = true,
2195 .ignore_enoent = false,
2196 };
2197 }
2198 }
2199
2200 assert(h == n);
2201
2202 *ret_bind_mounts = bind_mounts;
2203 *ret_n_bind_mounts = n;
2204 *ret_empty_directories = empty_directories;
2205
2206 empty_directories = NULL;
2207
2208 return (int) n;
2209
2210finish:
2211 bind_mount_free_many(bind_mounts, h);
2212 return r;
2213}
2214
6818c54c
LP
2215static int apply_mount_namespace(
2216 Unit *u,
2217 ExecCommand *command,
2218 const ExecContext *context,
2219 const ExecParameters *params,
2220 ExecRuntime *runtime) {
2221
7bcef4ef 2222 _cleanup_strv_free_ char **empty_directories = NULL;
93c6bb51 2223 char *tmp = NULL, *var = NULL;
915e6d16 2224 const char *root_dir = NULL, *root_image = NULL;
bb0ff3fb 2225 NamespaceInfo ns_info = {
af964954 2226 .ignore_protect_paths = false,
93c6bb51
DH
2227 .private_dev = context->private_devices,
2228 .protect_control_groups = context->protect_control_groups,
2229 .protect_kernel_tunables = context->protect_kernel_tunables,
2230 .protect_kernel_modules = context->protect_kernel_modules,
5d997827 2231 .mount_apivfs = context->mount_apivfs,
93c6bb51 2232 };
165a31c0 2233 bool needs_sandboxing;
6c47cd7d
LP
2234 BindMount *bind_mounts = NULL;
2235 unsigned n_bind_mounts = 0;
6818c54c 2236 int r;
93c6bb51 2237
2b3c1b9e
DH
2238 assert(context);
2239
93c6bb51
DH
2240 /* The runtime struct only contains the parent of the private /tmp,
2241 * which is non-accessible to world users. Inside of it there's a /tmp
2242 * that is sticky, and that's the one we want to use here. */
2243
2244 if (context->private_tmp && runtime) {
2245 if (runtime->tmp_dir)
2246 tmp = strjoina(runtime->tmp_dir, "/tmp");
2247 if (runtime->var_tmp_dir)
2248 var = strjoina(runtime->var_tmp_dir, "/tmp");
2249 }
2250
915e6d16
LP
2251 if (params->flags & EXEC_APPLY_CHROOT) {
2252 root_image = context->root_image;
2253
2254 if (!root_image)
2255 root_dir = context->root_directory;
2256 }
93c6bb51 2257
6c47cd7d
LP
2258 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2259 if (r < 0)
2260 return r;
2261
af964954
DH
2262 /*
2263 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2264 * sandbox info, otherwise enforce it, don't ignore protected paths and
2265 * fail if we are enable to apply the sandbox inside the mount namespace.
2266 */
2267 if (!context->dynamic_user && root_dir)
2268 ns_info.ignore_protect_paths = true;
2269
165a31c0 2270 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
6818c54c 2271
915e6d16 2272 r = setup_namespace(root_dir, root_image,
7bcef4ef 2273 &ns_info, context->read_write_paths,
165a31c0
LP
2274 needs_sandboxing ? context->read_only_paths : NULL,
2275 needs_sandboxing ? context->inaccessible_paths : NULL,
6c47cd7d
LP
2276 empty_directories,
2277 bind_mounts,
2278 n_bind_mounts,
93c6bb51
DH
2279 tmp,
2280 var,
165a31c0
LP
2281 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2282 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16
LP
2283 context->mount_flags,
2284 DISSECT_IMAGE_DISCARD_ON_LOOP);
93c6bb51 2285
6c47cd7d
LP
2286 bind_mount_free_many(bind_mounts, n_bind_mounts);
2287
93c6bb51
DH
2288 /* If we couldn't set up the namespace this is probably due to a
2289 * missing capability. In this case, silently proceeed. */
2290 if (IN_SET(r, -EPERM, -EACCES)) {
93c6bb51 2291 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
86ffb325 2292 return 0;
93c6bb51
DH
2293 }
2294
2295 return r;
2296}
2297
915e6d16
LP
2298static int apply_working_directory(
2299 const ExecContext *context,
2300 const ExecParameters *params,
2301 const char *home,
376fecf6
LP
2302 const bool needs_mount_ns,
2303 int *exit_status) {
915e6d16 2304
6732edab 2305 const char *d, *wd;
2b3c1b9e
DH
2306
2307 assert(context);
376fecf6 2308 assert(exit_status);
2b3c1b9e 2309
6732edab
LP
2310 if (context->working_directory_home) {
2311
376fecf6
LP
2312 if (!home) {
2313 *exit_status = EXIT_CHDIR;
6732edab 2314 return -ENXIO;
376fecf6 2315 }
6732edab 2316
2b3c1b9e 2317 wd = home;
6732edab
LP
2318
2319 } else if (context->working_directory)
2b3c1b9e
DH
2320 wd = context->working_directory;
2321 else
2322 wd = "/";
e7f1e7c6
DH
2323
2324 if (params->flags & EXEC_APPLY_CHROOT) {
2325 if (!needs_mount_ns && context->root_directory)
376fecf6
LP
2326 if (chroot(context->root_directory) < 0) {
2327 *exit_status = EXIT_CHROOT;
e7f1e7c6 2328 return -errno;
376fecf6 2329 }
e7f1e7c6 2330
2b3c1b9e
DH
2331 d = wd;
2332 } else
3b0e5bb5 2333 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2334
376fecf6
LP
2335 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2336 *exit_status = EXIT_CHDIR;
2b3c1b9e 2337 return -errno;
376fecf6 2338 }
e7f1e7c6
DH
2339
2340 return 0;
2341}
2342
b1edf445
LP
2343static int setup_keyring(
2344 Unit *u,
2345 const ExecContext *context,
2346 const ExecParameters *p,
2347 uid_t uid, gid_t gid) {
2348
74dd6b51 2349 key_serial_t keyring;
b1edf445 2350 int r;
74dd6b51
LP
2351
2352 assert(u);
b1edf445 2353 assert(context);
74dd6b51
LP
2354 assert(p);
2355
2356 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2357 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2358 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2359 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2360 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2361 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2362
2363 if (!(p->flags & EXEC_NEW_KEYRING))
2364 return 0;
2365
b1edf445
LP
2366 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2367 return 0;
2368
74dd6b51
LP
2369 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2370 if (keyring == -1) {
2371 if (errno == ENOSYS)
8002fb97 2372 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2373 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2374 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2375 else if (errno == EDQUOT)
8002fb97 2376 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2377 else
8002fb97 2378 return log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51
LP
2379
2380 return 0;
2381 }
2382
b3415f5d
LP
2383 /* Populate they keyring with the invocation ID by default. */
2384 if (!sd_id128_is_null(u->invocation_id)) {
2385 key_serial_t key;
2386
2387 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2388 if (key == -1)
8002fb97 2389 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2390 else {
2391 if (keyctl(KEYCTL_SETPERM, key,
2392 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2393 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
8002fb97 2394 return log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2395 }
2396 }
2397
74dd6b51
LP
2398 /* And now, make the keyring owned by the service's user */
2399 if (uid_is_valid(uid) || gid_is_valid(gid))
2400 if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0)
8002fb97 2401 return log_unit_error_errno(u, errno, "Failed to change ownership of session keyring: %m");
74dd6b51 2402
b1edf445
LP
2403 /* When requested link the user keyring into the session keyring. */
2404 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2405 uid_t saved_uid;
2406 gid_t saved_gid;
2407
2408 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things
2409 * set up properly by the kernel. If we don't do that then we can't create it atomically, and that
2410 * sucks for parallel execution. This mimics what pam_keyinit does, too.*/
2411
2412 saved_uid = getuid();
2413 saved_gid = getgid();
2414
2415 if (gid_is_valid(gid) && gid != saved_gid) {
2416 if (setregid(gid, -1) < 0)
8002fb97 2417 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
b1edf445
LP
2418 }
2419
2420 if (uid_is_valid(uid) && uid != saved_uid) {
2421 if (setreuid(uid, -1) < 0) {
2422 (void) setregid(saved_gid, -1);
8002fb97 2423 return log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
b1edf445
LP
2424 }
2425 }
2426
2427 if (keyctl(KEYCTL_LINK,
2428 KEY_SPEC_USER_KEYRING,
2429 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2430
2431 r = -errno;
2432
2433 (void) setreuid(saved_uid, -1);
2434 (void) setregid(saved_gid, -1);
2435
8002fb97 2436 return log_unit_error_errno(u, r, "Failed to link user keyring into session keyring: %m");
b1edf445
LP
2437 }
2438
2439 if (uid_is_valid(uid) && uid != saved_uid) {
2440 if (setreuid(saved_uid, -1) < 0) {
2441 (void) setregid(saved_gid, -1);
8002fb97 2442 return log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
b1edf445
LP
2443 }
2444 }
2445
2446 if (gid_is_valid(gid) && gid != saved_gid) {
2447 if (setregid(saved_gid, -1) < 0)
8002fb97 2448 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
b1edf445 2449 }
61ceaea5 2450 }
b1edf445 2451
74dd6b51
LP
2452 return 0;
2453}
2454
29206d46
LP
2455static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
2456 assert(array);
2457 assert(n);
2458
2459 if (!pair)
2460 return;
2461
2462 if (pair[0] >= 0)
2463 array[(*n)++] = pair[0];
2464 if (pair[1] >= 0)
2465 array[(*n)++] = pair[1];
2466}
2467
a34ceba6
LP
2468static int close_remaining_fds(
2469 const ExecParameters *params,
2470 ExecRuntime *runtime,
29206d46 2471 DynamicCreds *dcreds,
00d9ef85 2472 int user_lookup_fd,
a34ceba6
LP
2473 int socket_fd,
2474 int *fds, unsigned n_fds) {
2475
2476 unsigned n_dont_close = 0;
00d9ef85 2477 int dont_close[n_fds + 12];
a34ceba6
LP
2478
2479 assert(params);
2480
2481 if (params->stdin_fd >= 0)
2482 dont_close[n_dont_close++] = params->stdin_fd;
2483 if (params->stdout_fd >= 0)
2484 dont_close[n_dont_close++] = params->stdout_fd;
2485 if (params->stderr_fd >= 0)
2486 dont_close[n_dont_close++] = params->stderr_fd;
2487
2488 if (socket_fd >= 0)
2489 dont_close[n_dont_close++] = socket_fd;
2490 if (n_fds > 0) {
2491 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2492 n_dont_close += n_fds;
2493 }
2494
29206d46
LP
2495 if (runtime)
2496 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2497
2498 if (dcreds) {
2499 if (dcreds->user)
2500 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2501 if (dcreds->group)
2502 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2503 }
2504
00d9ef85
LP
2505 if (user_lookup_fd >= 0)
2506 dont_close[n_dont_close++] = user_lookup_fd;
2507
a34ceba6
LP
2508 return close_all_fds(dont_close, n_dont_close);
2509}
2510
00d9ef85
LP
2511static int send_user_lookup(
2512 Unit *unit,
2513 int user_lookup_fd,
2514 uid_t uid,
2515 gid_t gid) {
2516
2517 assert(unit);
2518
2519 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2520 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2521 * specified. */
2522
2523 if (user_lookup_fd < 0)
2524 return 0;
2525
2526 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2527 return 0;
2528
2529 if (writev(user_lookup_fd,
2530 (struct iovec[]) {
e6a7ec4b
LP
2531 IOVEC_INIT(&uid, sizeof(uid)),
2532 IOVEC_INIT(&gid, sizeof(gid)),
2533 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2534 return -errno;
2535
2536 return 0;
2537}
2538
6732edab
LP
2539static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2540 int r;
2541
2542 assert(c);
2543 assert(home);
2544 assert(buf);
2545
2546 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2547
2548 if (*home)
2549 return 0;
2550
2551 if (!c->working_directory_home)
2552 return 0;
2553
2554 if (uid == 0) {
2555 /* Hardcode /root as home directory for UID 0 */
2556 *home = "/root";
2557 return 1;
2558 }
2559
2560 r = get_home_dir(buf);
2561 if (r < 0)
2562 return r;
2563
2564 *home = *buf;
2565 return 1;
2566}
2567
da50b85a
LP
2568static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2569 _cleanup_strv_free_ char ** list = NULL;
2570 ExecDirectoryType t;
2571 int r;
2572
2573 assert(c);
2574 assert(p);
2575 assert(ret);
2576
2577 assert(c->dynamic_user);
2578
2579 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2580 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2581 * directories. */
2582
2583 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2584 char **i;
2585
2586 if (t == EXEC_DIRECTORY_CONFIGURATION)
2587 continue;
2588
2589 if (!p->prefix[t])
2590 continue;
2591
2592 STRV_FOREACH(i, c->directories[t].paths) {
2593 char *e;
2594
8092a48c
YW
2595 if (t == EXEC_DIRECTORY_RUNTIME)
2596 e = strjoin(p->prefix[t], "/", *i);
2597 else
2598 e = strjoin(p->prefix[t], "/private/", *i);
da50b85a
LP
2599 if (!e)
2600 return -ENOMEM;
2601
2602 r = strv_consume(&list, e);
2603 if (r < 0)
2604 return r;
2605 }
2606 }
2607
2608 *ret = list;
2609 list = NULL;
2610
2611 return 0;
2612}
2613
ff0af2a1 2614static int exec_child(
f2341e0a 2615 Unit *unit,
ff0af2a1
LP
2616 ExecCommand *command,
2617 const ExecContext *context,
2618 const ExecParameters *params,
2619 ExecRuntime *runtime,
29206d46 2620 DynamicCreds *dcreds,
ff0af2a1
LP
2621 char **argv,
2622 int socket_fd,
52c239d7 2623 int named_iofds[3],
4c47affc
FB
2624 int *fds,
2625 unsigned n_storage_fds,
9b141911 2626 unsigned n_socket_fds,
ff0af2a1 2627 char **files_env,
00d9ef85 2628 int user_lookup_fd,
12145637 2629 int *exit_status) {
d35fbf6b 2630
2065ca69 2631 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
6732edab 2632 _cleanup_free_ char *mac_selinux_context_net = NULL, *home_buffer = NULL;
4d885bd3
DH
2633 _cleanup_free_ gid_t *supplementary_gids = NULL;
2634 const char *username = NULL, *groupname = NULL;
2b3c1b9e 2635 const char *home = NULL, *shell = NULL;
7bce046b
LP
2636 dev_t journal_stream_dev = 0;
2637 ino_t journal_stream_ino = 0;
165a31c0
LP
2638 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2639 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2640 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2641 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
349cc4a5 2642#if HAVE_SELINUX
43b1f709 2643 bool use_selinux = false;
ecfbc84f 2644#endif
f9fa32f0 2645#if ENABLE_SMACK
43b1f709 2646 bool use_smack = false;
ecfbc84f 2647#endif
349cc4a5 2648#if HAVE_APPARMOR
43b1f709 2649 bool use_apparmor = false;
ecfbc84f 2650#endif
fed1e721
LP
2651 uid_t uid = UID_INVALID;
2652 gid_t gid = GID_INVALID;
4d885bd3 2653 int i, r, ngids = 0;
4c47affc 2654 unsigned n_fds;
3536f49e 2655 ExecDirectoryType dt;
165a31c0 2656 int secure_bits;
034c6ed7 2657
f2341e0a 2658 assert(unit);
5cb5a6ff
LP
2659 assert(command);
2660 assert(context);
d35fbf6b 2661 assert(params);
ff0af2a1 2662 assert(exit_status);
d35fbf6b
DM
2663
2664 rename_process_from_path(command->path);
2665
2666 /* We reset exactly these signals, since they are the
2667 * only ones we set to SIG_IGN in the main daemon. All
2668 * others we leave untouched because we set them to
2669 * SIG_DFL or a valid handler initially, both of which
2670 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2671 (void) default_signals(SIGNALS_CRASH_HANDLER,
2672 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2673
2674 if (context->ignore_sigpipe)
ce30c8dc 2675 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2676
ff0af2a1
LP
2677 r = reset_signal_mask();
2678 if (r < 0) {
2679 *exit_status = EXIT_SIGNAL_MASK;
12145637 2680 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
d35fbf6b 2681 }
034c6ed7 2682
d35fbf6b
DM
2683 if (params->idle_pipe)
2684 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2685
2c027c62
LP
2686 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2687 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2688 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2689 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 2690
d35fbf6b 2691 log_forget_fds();
2c027c62 2692 log_set_open_when_needed(true);
4f2d528d 2693
40a80078
LP
2694 /* In case anything used libc syslog(), close this here, too */
2695 closelog();
2696
4c47affc 2697 n_fds = n_storage_fds + n_socket_fds;
00d9ef85 2698 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
ff0af2a1
LP
2699 if (r < 0) {
2700 *exit_status = EXIT_FDS;
12145637 2701 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
8c7be95e
LP
2702 }
2703
d35fbf6b
DM
2704 if (!context->same_pgrp)
2705 if (setsid() < 0) {
ff0af2a1 2706 *exit_status = EXIT_SETSID;
12145637 2707 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
d35fbf6b 2708 }
9e2f7c11 2709
1e22b5cd 2710 exec_context_tty_reset(context, params);
d35fbf6b 2711
c891efaf 2712 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 2713 const char *vc = params->confirm_spawn;
3b20f877
FB
2714 _cleanup_free_ char *cmdline = NULL;
2715
2716 cmdline = exec_command_line(argv);
2717 if (!cmdline) {
0460aa5c 2718 *exit_status = EXIT_MEMORY;
12145637 2719 return log_oom();
3b20f877 2720 }
d35fbf6b 2721
eedf223a 2722 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
2723 if (r != CONFIRM_EXECUTE) {
2724 if (r == CONFIRM_PRETEND_SUCCESS) {
2725 *exit_status = EXIT_SUCCESS;
2726 return 0;
2727 }
ff0af2a1 2728 *exit_status = EXIT_CONFIRM;
12145637 2729 log_unit_error(unit, "Execution cancelled by the user");
d35fbf6b 2730 return -ECANCELED;
d35fbf6b
DM
2731 }
2732 }
1a63a750 2733
29206d46 2734 if (context->dynamic_user && dcreds) {
da50b85a 2735 _cleanup_strv_free_ char **suggested_paths = NULL;
29206d46 2736
409093fe
LP
2737 /* Make sure we bypass our own NSS module for any NSS checks */
2738 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2739 *exit_status = EXIT_USER;
12145637 2740 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
409093fe
LP
2741 }
2742
da50b85a
LP
2743 r = compile_suggested_paths(context, params, &suggested_paths);
2744 if (r < 0) {
2745 *exit_status = EXIT_MEMORY;
2746 return log_oom();
2747 }
2748
2749 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
ff0af2a1
LP
2750 if (r < 0) {
2751 *exit_status = EXIT_USER;
e2b0cc34
YW
2752 if (r == -EILSEQ) {
2753 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
2754 return -EOPNOTSUPP;
2755 }
12145637 2756 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
524daa8c 2757 }
524daa8c 2758
70dd455c 2759 if (!uid_is_valid(uid)) {
29206d46 2760 *exit_status = EXIT_USER;
12145637 2761 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
70dd455c
ZJS
2762 return -ESRCH;
2763 }
2764
2765 if (!gid_is_valid(gid)) {
2766 *exit_status = EXIT_USER;
12145637 2767 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
2768 return -ESRCH;
2769 }
5bc7452b 2770
29206d46
LP
2771 if (dcreds->user)
2772 username = dcreds->user->name;
2773
2774 } else {
4d885bd3
DH
2775 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2776 if (r < 0) {
2777 *exit_status = EXIT_USER;
12145637 2778 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
5bc7452b 2779 }
5bc7452b 2780
4d885bd3
DH
2781 r = get_fixed_group(context, &groupname, &gid);
2782 if (r < 0) {
2783 *exit_status = EXIT_GROUP;
12145637 2784 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
4d885bd3 2785 }
cdc5d5c5 2786 }
29206d46 2787
cdc5d5c5
DH
2788 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2789 r = get_supplementary_groups(context, username, groupname, gid,
2790 &supplementary_gids, &ngids);
2791 if (r < 0) {
2792 *exit_status = EXIT_GROUP;
12145637 2793 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
29206d46 2794 }
5bc7452b 2795
00d9ef85
LP
2796 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2797 if (r < 0) {
2798 *exit_status = EXIT_USER;
12145637 2799 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
00d9ef85
LP
2800 }
2801
2802 user_lookup_fd = safe_close(user_lookup_fd);
2803
6732edab
LP
2804 r = acquire_home(context, uid, &home, &home_buffer);
2805 if (r < 0) {
2806 *exit_status = EXIT_CHDIR;
12145637 2807 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
6732edab
LP
2808 }
2809
d35fbf6b
DM
2810 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2811 * must sure to drop O_NONBLOCK */
2812 if (socket_fd >= 0)
a34ceba6 2813 (void) fd_nonblock(socket_fd, false);
acbb0225 2814
52c239d7 2815 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
2816 if (r < 0) {
2817 *exit_status = EXIT_STDIN;
12145637 2818 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
d35fbf6b 2819 }
034c6ed7 2820
52c239d7 2821 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2822 if (r < 0) {
2823 *exit_status = EXIT_STDOUT;
12145637 2824 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
d35fbf6b
DM
2825 }
2826
52c239d7 2827 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2828 if (r < 0) {
2829 *exit_status = EXIT_STDERR;
12145637 2830 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
d35fbf6b
DM
2831 }
2832
2833 if (params->cgroup_path) {
ff0af2a1
LP
2834 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2835 if (r < 0) {
2836 *exit_status = EXIT_CGROUP;
12145637 2837 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path);
309bff19 2838 }
d35fbf6b 2839 }
309bff19 2840
d35fbf6b 2841 if (context->oom_score_adjust_set) {
d5243d62 2842 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
f2b68789 2843
d5243d62
LP
2844 /* When we can't make this change due to EPERM, then
2845 * let's silently skip over it. User namespaces
2846 * prohibit write access to this file, and we
2847 * shouldn't trip up over that. */
613b411c 2848
d5243d62 2849 sprintf(t, "%i", context->oom_score_adjust);
ad118bda 2850 r = write_string_file("/proc/self/oom_score_adj", t, 0);
12145637 2851 if (IN_SET(r, -EPERM, -EACCES))
f2341e0a 2852 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
12145637 2853 else if (r < 0) {
ff0af2a1 2854 *exit_status = EXIT_OOM_ADJUST;
12145637 2855 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
613b411c 2856 }
d35fbf6b
DM
2857 }
2858
2859 if (context->nice_set)
2860 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 2861 *exit_status = EXIT_NICE;
12145637 2862 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
613b411c
LP
2863 }
2864
d35fbf6b
DM
2865 if (context->cpu_sched_set) {
2866 struct sched_param param = {
2867 .sched_priority = context->cpu_sched_priority,
2868 };
2869
ff0af2a1
LP
2870 r = sched_setscheduler(0,
2871 context->cpu_sched_policy |
2872 (context->cpu_sched_reset_on_fork ?
2873 SCHED_RESET_ON_FORK : 0),
2874 &param);
2875 if (r < 0) {
2876 *exit_status = EXIT_SETSCHEDULER;
12145637 2877 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
fc9b2a84 2878 }
d35fbf6b 2879 }
fc9b2a84 2880
d35fbf6b
DM
2881 if (context->cpuset)
2882 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
ff0af2a1 2883 *exit_status = EXIT_CPUAFFINITY;
12145637 2884 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
034c6ed7
LP
2885 }
2886
d35fbf6b
DM
2887 if (context->ioprio_set)
2888 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 2889 *exit_status = EXIT_IOPRIO;
12145637 2890 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
d35fbf6b 2891 }
da726a4d 2892
d35fbf6b
DM
2893 if (context->timer_slack_nsec != NSEC_INFINITY)
2894 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 2895 *exit_status = EXIT_TIMERSLACK;
12145637 2896 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
4c2630eb 2897 }
9eba9da4 2898
21022b9d
LP
2899 if (context->personality != PERSONALITY_INVALID) {
2900 r = safe_personality(context->personality);
2901 if (r < 0) {
ff0af2a1 2902 *exit_status = EXIT_PERSONALITY;
12145637 2903 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
4c2630eb 2904 }
21022b9d 2905 }
94f04347 2906
d35fbf6b 2907 if (context->utmp_id)
df0ff127 2908 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 2909 context->tty_path,
023a4f67
LP
2910 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
2911 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
2912 USER_PROCESS,
6a93917d 2913 username);
d35fbf6b 2914
e0d2adfd 2915 if (context->user) {
ff0af2a1
LP
2916 r = chown_terminal(STDIN_FILENO, uid);
2917 if (r < 0) {
2918 *exit_status = EXIT_STDIN;
12145637 2919 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
071830ff 2920 }
d35fbf6b 2921 }
8e274523 2922
a931ad47
LP
2923 /* If delegation is enabled we'll pass ownership of the cgroup
2924 * (but only in systemd's own controller hierarchy!) to the
2925 * user of the new process. */
584b8688 2926 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
ff0af2a1
LP
2927 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
2928 if (r < 0) {
2929 *exit_status = EXIT_CGROUP;
12145637 2930 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
d35fbf6b 2931 }
034c6ed7 2932
ff0af2a1
LP
2933 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
2934 if (r < 0) {
2935 *exit_status = EXIT_CGROUP;
12145637 2936 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
034c6ed7 2937 }
d35fbf6b 2938 }
034c6ed7 2939
72fd1768 2940 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
8679efde 2941 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
12145637
LP
2942 if (r < 0)
2943 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
d35fbf6b 2944 }
94f04347 2945
7bce046b 2946 r = build_environment(
fd63e712 2947 unit,
7bce046b
LP
2948 context,
2949 params,
2950 n_fds,
2951 home,
2952 username,
2953 shell,
2954 journal_stream_dev,
2955 journal_stream_ino,
2956 &our_env);
2065ca69
JW
2957 if (r < 0) {
2958 *exit_status = EXIT_MEMORY;
12145637 2959 return log_oom();
2065ca69
JW
2960 }
2961
2962 r = build_pass_environment(context, &pass_env);
2963 if (r < 0) {
2964 *exit_status = EXIT_MEMORY;
12145637 2965 return log_oom();
2065ca69
JW
2966 }
2967
2968 accum_env = strv_env_merge(5,
2969 params->environment,
2970 our_env,
2971 pass_env,
2972 context->environment,
2973 files_env,
2974 NULL);
2975 if (!accum_env) {
2976 *exit_status = EXIT_MEMORY;
12145637 2977 return log_oom();
2065ca69 2978 }
1280503b 2979 accum_env = strv_env_clean(accum_env);
2065ca69 2980
096424d1 2981 (void) umask(context->umask);
b213e1c1 2982
b1edf445 2983 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
2984 if (r < 0) {
2985 *exit_status = EXIT_KEYRING;
12145637 2986 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
74dd6b51
LP
2987 }
2988
165a31c0 2989 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 2990 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 2991
165a31c0
LP
2992 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
2993 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 2994
165a31c0
LP
2995 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
2996 if (needs_ambient_hack)
2997 needs_setuid = false;
2998 else
2999 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3000
3001 if (needs_sandboxing) {
7f18ef0a
FK
3002 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3003 * present. The actual MAC context application will happen later, as late as possible, to avoid
3004 * impacting our own code paths. */
3005
349cc4a5 3006#if HAVE_SELINUX
43b1f709 3007 use_selinux = mac_selinux_use();
7f18ef0a 3008#endif
f9fa32f0 3009#if ENABLE_SMACK
43b1f709 3010 use_smack = mac_smack_use();
7f18ef0a 3011#endif
349cc4a5 3012#if HAVE_APPARMOR
43b1f709 3013 use_apparmor = mac_apparmor_use();
7f18ef0a 3014#endif
165a31c0 3015 }
7f18ef0a 3016
165a31c0
LP
3017 if (needs_setuid) {
3018 if (context->pam_name && username) {
3019 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3020 if (r < 0) {
3021 *exit_status = EXIT_PAM;
12145637 3022 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
165a31c0
LP
3023 }
3024 }
b213e1c1 3025 }
ac45f971 3026
d35fbf6b 3027 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
6e2d7c4f
MS
3028 if (ns_type_supported(NAMESPACE_NET)) {
3029 r = setup_netns(runtime->netns_storage_socket);
3030 if (r < 0) {
3031 *exit_status = EXIT_NETWORK;
3032 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3033 }
3034 } else
3035 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
d35fbf6b 3036 }
169c1bda 3037
ee818b89 3038 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 3039 if (needs_mount_namespace) {
6818c54c 3040 r = apply_mount_namespace(unit, command, context, params, runtime);
3fbe8dbe
LP
3041 if (r < 0) {
3042 *exit_status = EXIT_NAMESPACE;
12145637 3043 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing: %m");
3fbe8dbe 3044 }
d35fbf6b 3045 }
81a2b7ce 3046
50b3dfb9 3047 /* Apply just after mount namespace setup */
376fecf6 3048 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
12145637
LP
3049 if (r < 0)
3050 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
50b3dfb9 3051
bbeea271 3052 /* Drop groups as early as possbile */
165a31c0 3053 if (needs_setuid) {
709dbeac 3054 r = enforce_groups(gid, supplementary_gids, ngids);
096424d1
LP
3055 if (r < 0) {
3056 *exit_status = EXIT_GROUP;
12145637 3057 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
096424d1 3058 }
165a31c0 3059 }
096424d1 3060
165a31c0 3061 if (needs_sandboxing) {
349cc4a5 3062#if HAVE_SELINUX
43b1f709 3063 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
3064 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3065 if (r < 0) {
3066 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3067 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
937ccce9 3068 }
9008e1ac 3069 }
9008e1ac
MS
3070#endif
3071
937ccce9
LP
3072 if (context->private_users) {
3073 r = setup_private_users(uid, gid);
3074 if (r < 0) {
3075 *exit_status = EXIT_USER;
12145637 3076 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
937ccce9 3077 }
d251207d
LP
3078 }
3079 }
3080
165a31c0
LP
3081 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
3082 * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
3083 * was needed to upload the policy and can now be closed as well. */
ff0af2a1
LP
3084 r = close_all_fds(fds, n_fds);
3085 if (r >= 0)
3086 r = shift_fds(fds, n_fds);
3087 if (r >= 0)
4c47affc 3088 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
ff0af2a1
LP
3089 if (r < 0) {
3090 *exit_status = EXIT_FDS;
12145637 3091 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
d35fbf6b 3092 }
e66cf1a3 3093
165a31c0 3094 secure_bits = context->secure_bits;
e66cf1a3 3095
165a31c0
LP
3096 if (needs_sandboxing) {
3097 uint64_t bset;
755d4b67 3098
d35fbf6b 3099 for (i = 0; i < _RLIMIT_MAX; i++) {
03857c43 3100
d35fbf6b
DM
3101 if (!context->rlimit[i])
3102 continue;
3103
03857c43
LP
3104 r = setrlimit_closest(i, context->rlimit[i]);
3105 if (r < 0) {
ff0af2a1 3106 *exit_status = EXIT_LIMITS;
12145637 3107 return log_unit_error_errno(unit, r, "Failed to adjust resource limit %s: %m", rlimit_to_string(i));
e66cf1a3
LP
3108 }
3109 }
3110
f4170c67
LP
3111 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
3112 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3113 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3114 *exit_status = EXIT_LIMITS;
12145637 3115 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
f4170c67
LP
3116 }
3117 }
3118
165a31c0
LP
3119 bset = context->capability_bounding_set;
3120 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3121 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3122 * instead of us doing that */
3123 if (needs_ambient_hack)
3124 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3125 (UINT64_C(1) << CAP_SETUID) |
3126 (UINT64_C(1) << CAP_SETGID);
3127
3128 if (!cap_test_all(bset)) {
3129 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
3130 if (r < 0) {
3131 *exit_status = EXIT_CAPABILITIES;
12145637 3132 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3b8bddde 3133 }
4c2630eb 3134 }
3b8bddde 3135
755d4b67
IP
3136 /* This is done before enforce_user, but ambient set
3137 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
3138 if (!needs_ambient_hack &&
3139 context->capability_ambient_set != 0) {
755d4b67
IP
3140 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3141 if (r < 0) {
3142 *exit_status = EXIT_CAPABILITIES;
12145637 3143 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
755d4b67 3144 }
755d4b67 3145 }
165a31c0 3146 }
755d4b67 3147
165a31c0 3148 if (needs_setuid) {
d35fbf6b 3149 if (context->user) {
ff0af2a1
LP
3150 r = enforce_user(context, uid);
3151 if (r < 0) {
3152 *exit_status = EXIT_USER;
12145637 3153 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
5b6319dc 3154 }
165a31c0
LP
3155
3156 if (!needs_ambient_hack &&
3157 context->capability_ambient_set != 0) {
755d4b67
IP
3158
3159 /* Fix the ambient capabilities after user change. */
3160 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3161 if (r < 0) {
3162 *exit_status = EXIT_CAPABILITIES;
12145637 3163 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
755d4b67
IP
3164 }
3165
3166 /* If we were asked to change user and ambient capabilities
3167 * were requested, we had to add keep-caps to the securebits
3168 * so that we would maintain the inherited capability set
3169 * through the setresuid(). Make sure that the bit is added
3170 * also to the context secure_bits so that we don't try to
3171 * drop the bit away next. */
3172
7f508f2c 3173 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 3174 }
5b6319dc 3175 }
165a31c0 3176 }
d35fbf6b 3177
165a31c0 3178 if (needs_sandboxing) {
5cd9cd35
LP
3179 /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
3180 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3181 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3182 * are restricted. */
3183
349cc4a5 3184#if HAVE_SELINUX
43b1f709 3185 if (use_selinux) {
5cd9cd35
LP
3186 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3187
3188 if (exec_context) {
3189 r = setexeccon(exec_context);
3190 if (r < 0) {
3191 *exit_status = EXIT_SELINUX_CONTEXT;
12145637 3192 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
5cd9cd35
LP
3193 }
3194 }
3195 }
3196#endif
3197
f9fa32f0 3198#if ENABLE_SMACK
43b1f709 3199 if (use_smack) {
7f18ef0a
FK
3200 r = setup_smack(context, command);
3201 if (r < 0) {
3202 *exit_status = EXIT_SMACK_PROCESS_LABEL;
12145637 3203 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
7f18ef0a 3204 }
5cd9cd35 3205 }
7f18ef0a 3206#endif
5cd9cd35 3207
349cc4a5 3208#if HAVE_APPARMOR
43b1f709 3209 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3210 r = aa_change_onexec(context->apparmor_profile);
3211 if (r < 0 && !context->apparmor_profile_ignore) {
3212 *exit_status = EXIT_APPARMOR_PROFILE;
12145637 3213 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
5cd9cd35
LP
3214 }
3215 }
3216#endif
3217
165a31c0
LP
3218 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3219 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3220 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3221 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3222 *exit_status = EXIT_SECUREBITS;
12145637 3223 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
ff01d048 3224 }
5b6319dc 3225
59eeb84b 3226 if (context_has_no_new_privileges(context))
d35fbf6b 3227 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3228 *exit_status = EXIT_NO_NEW_PRIVILEGES;
12145637 3229 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
d35fbf6b
DM
3230 }
3231
349cc4a5 3232#if HAVE_SECCOMP
469830d1
LP
3233 r = apply_address_families(unit, context);
3234 if (r < 0) {
3235 *exit_status = EXIT_ADDRESS_FAMILIES;
12145637 3236 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
4c2630eb 3237 }
04aa0cb9 3238
469830d1
LP
3239 r = apply_memory_deny_write_execute(unit, context);
3240 if (r < 0) {
3241 *exit_status = EXIT_SECCOMP;
12145637 3242 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
f3e43635 3243 }
f4170c67 3244
469830d1
LP
3245 r = apply_restrict_realtime(unit, context);
3246 if (r < 0) {
3247 *exit_status = EXIT_SECCOMP;
12145637 3248 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
f4170c67
LP
3249 }
3250
add00535
LP
3251 r = apply_restrict_namespaces(unit, context);
3252 if (r < 0) {
3253 *exit_status = EXIT_SECCOMP;
12145637 3254 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
add00535
LP
3255 }
3256
469830d1
LP
3257 r = apply_protect_sysctl(unit, context);
3258 if (r < 0) {
3259 *exit_status = EXIT_SECCOMP;
12145637 3260 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
502d704e
DH
3261 }
3262
469830d1
LP
3263 r = apply_protect_kernel_modules(unit, context);
3264 if (r < 0) {
3265 *exit_status = EXIT_SECCOMP;
12145637 3266 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
59eeb84b
LP
3267 }
3268
469830d1
LP
3269 r = apply_private_devices(unit, context);
3270 if (r < 0) {
3271 *exit_status = EXIT_SECCOMP;
12145637 3272 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
469830d1
LP
3273 }
3274
3275 r = apply_syscall_archs(unit, context);
3276 if (r < 0) {
3277 *exit_status = EXIT_SECCOMP;
12145637 3278 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
ba128bb8
LP
3279 }
3280
78e864e5
TM
3281 r = apply_lock_personality(unit, context);
3282 if (r < 0) {
3283 *exit_status = EXIT_SECCOMP;
12145637 3284 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
78e864e5
TM
3285 }
3286
5cd9cd35
LP
3287 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3288 * by the filter as little as possible. */
165a31c0 3289 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3290 if (r < 0) {
3291 *exit_status = EXIT_SECCOMP;
12145637 3292 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
d35fbf6b
DM
3293 }
3294#endif
d35fbf6b 3295 }
034c6ed7 3296
00819cc1
LP
3297 if (!strv_isempty(context->unset_environment)) {
3298 char **ee = NULL;
3299
3300 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3301 if (!ee) {
3302 *exit_status = EXIT_MEMORY;
12145637 3303 return log_oom();
00819cc1
LP
3304 }
3305
3306 strv_free(accum_env);
3307 accum_env = ee;
3308 }
3309
2065ca69 3310 final_argv = replace_env_argv(argv, accum_env);
d35fbf6b 3311 if (!final_argv) {
ff0af2a1 3312 *exit_status = EXIT_MEMORY;
12145637 3313 return log_oom();
d35fbf6b 3314 }
034c6ed7 3315
553d2243 3316 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
d35fbf6b 3317 _cleanup_free_ char *line;
81a2b7ce 3318
d35fbf6b
DM
3319 line = exec_command_line(final_argv);
3320 if (line) {
f2341e0a 3321 log_struct(LOG_DEBUG,
f2341e0a
LP
3322 "EXECUTABLE=%s", command->path,
3323 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3324 LOG_UNIT_ID(unit),
f1c50bec 3325 LOG_UNIT_INVOCATION_ID(unit),
f2341e0a 3326 NULL);
d35fbf6b
DM
3327 }
3328 }
dd305ec9 3329
2065ca69 3330 execve(command->path, final_argv, accum_env);
12145637
LP
3331
3332 if (errno == ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3333
3334 log_struct_errno(LOG_INFO, errno,
3335 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3336 LOG_UNIT_ID(unit),
3337 LOG_UNIT_INVOCATION_ID(unit),
3338 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3339 command->path),
3340 "EXECUTABLE=%s", command->path,
3341 NULL);
3342
3343 return 0;
3344 }
3345
ff0af2a1 3346 *exit_status = EXIT_EXEC;
12145637 3347 return log_unit_error_errno(unit, errno, "Failed to execute command: %m");
d35fbf6b 3348}
81a2b7ce 3349
f2341e0a
LP
3350int exec_spawn(Unit *unit,
3351 ExecCommand *command,
d35fbf6b
DM
3352 const ExecContext *context,
3353 const ExecParameters *params,
3354 ExecRuntime *runtime,
29206d46 3355 DynamicCreds *dcreds,
d35fbf6b 3356 pid_t *ret) {
8351ceae 3357
d35fbf6b 3358 _cleanup_strv_free_ char **files_env = NULL;
9b141911 3359 int *fds = NULL;
4c47affc 3360 unsigned n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1
LP
3361 _cleanup_free_ char *line = NULL;
3362 int socket_fd, r;
52c239d7 3363 int named_iofds[3] = { -1, -1, -1 };
ff0af2a1 3364 char **argv;
d35fbf6b 3365 pid_t pid;
8351ceae 3366
f2341e0a 3367 assert(unit);
d35fbf6b
DM
3368 assert(command);
3369 assert(context);
3370 assert(ret);
3371 assert(params);
4c47affc 3372 assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
4298d0b5 3373
d35fbf6b
DM
3374 if (context->std_input == EXEC_INPUT_SOCKET ||
3375 context->std_output == EXEC_OUTPUT_SOCKET ||
3376 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3377
4c47affc 3378 if (params->n_socket_fds > 1) {
f2341e0a 3379 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3380 return -EINVAL;
ff0af2a1 3381 }
eef65bf3 3382
4c47affc 3383 if (params->n_socket_fds == 0) {
488ab41c
AA
3384 log_unit_error(unit, "Got no socket.");
3385 return -EINVAL;
3386 }
3387
d35fbf6b
DM
3388 socket_fd = params->fds[0];
3389 } else {
3390 socket_fd = -1;
3391 fds = params->fds;
4c47affc 3392 n_storage_fds = params->n_storage_fds;
9b141911 3393 n_socket_fds = params->n_socket_fds;
d35fbf6b 3394 }
94f04347 3395
52c239d7
LB
3396 r = exec_context_named_iofds(unit, context, params, named_iofds);
3397 if (r < 0)
3398 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3399
f2341e0a 3400 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3401 if (r < 0)
f2341e0a 3402 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3403
d35fbf6b 3404 argv = params->argv ?: command->argv;
d35fbf6b
DM
3405 line = exec_command_line(argv);
3406 if (!line)
3407 return log_oom();
fab56fc5 3408
f2341e0a 3409 log_struct(LOG_DEBUG,
f2341e0a
LP
3410 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3411 "EXECUTABLE=%s", command->path,
ba360bb0 3412 LOG_UNIT_ID(unit),
f1c50bec 3413 LOG_UNIT_INVOCATION_ID(unit),
f2341e0a 3414 NULL);
12145637 3415
d35fbf6b
DM
3416 pid = fork();
3417 if (pid < 0)
74129a12 3418 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3419
3420 if (pid == 0) {
12145637 3421 int exit_status = EXIT_SUCCESS;
ff0af2a1 3422
f2341e0a
LP
3423 r = exec_child(unit,
3424 command,
ff0af2a1
LP
3425 context,
3426 params,
3427 runtime,
29206d46 3428 dcreds,
ff0af2a1
LP
3429 argv,
3430 socket_fd,
52c239d7 3431 named_iofds,
4c47affc
FB
3432 fds,
3433 n_storage_fds,
9b141911 3434 n_socket_fds,
ff0af2a1 3435 files_env,
00d9ef85 3436 unit->manager->user_lookup_fds[1],
12145637
LP
3437 &exit_status);
3438
ff0af2a1 3439 if (r < 0) {
12145637
LP
3440 log_struct_errno(LOG_ERR, r,
3441 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3442 LOG_UNIT_ID(unit),
3443 LOG_UNIT_INVOCATION_ID(unit),
3444 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3445 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3446 command->path),
3447 "EXECUTABLE=%s", command->path,
3448 NULL);
4c2630eb
MS
3449 }
3450
ff0af2a1 3451 _exit(exit_status);
034c6ed7
LP
3452 }
3453
f2341e0a 3454 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3455
80876c20
LP
3456 /* We add the new process to the cgroup both in the child (so
3457 * that we can be sure that no user code is ever executed
3458 * outside of the cgroup) and in the parent (so that we can be
3459 * sure that when we kill the cgroup the process will be
3460 * killed too). */
d35fbf6b 3461 if (params->cgroup_path)
dd305ec9 3462 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
2da3263a 3463
b58b4116 3464 exec_status_start(&command->exec_status, pid);
9fb86720 3465
034c6ed7 3466 *ret = pid;
5cb5a6ff
LP
3467 return 0;
3468}
3469
034c6ed7 3470void exec_context_init(ExecContext *c) {
3536f49e
YW
3471 ExecDirectoryType i;
3472
034c6ed7
LP
3473 assert(c);
3474
4c12626c 3475 c->umask = 0022;
9eba9da4 3476 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3477 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3478 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3479 c->syslog_level_prefix = true;
353e12c2 3480 c->ignore_sigpipe = true;
3a43da28 3481 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3482 c->personality = PERSONALITY_INVALID;
72fd1768 3483 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3484 c->directories[i].mode = 0755;
a103496c 3485 c->capability_bounding_set = CAP_ALL;
add00535 3486 c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
d3070fbd 3487 c->log_level_max = -1;
034c6ed7
LP
3488}
3489
613b411c 3490void exec_context_done(ExecContext *c) {
3536f49e 3491 ExecDirectoryType i;
d3070fbd 3492 size_t l;
5cb5a6ff
LP
3493
3494 assert(c);
3495
6796073e
LP
3496 c->environment = strv_free(c->environment);
3497 c->environment_files = strv_free(c->environment_files);
b4c14404 3498 c->pass_environment = strv_free(c->pass_environment);
00819cc1 3499 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 3500
1f6b4113 3501 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
a1e58e8e 3502 c->rlimit[l] = mfree(c->rlimit[l]);
034c6ed7 3503
52c239d7
LB
3504 for (l = 0; l < 3; l++)
3505 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
3506
a1e58e8e
LP
3507 c->working_directory = mfree(c->working_directory);
3508 c->root_directory = mfree(c->root_directory);
915e6d16 3509 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3510 c->tty_path = mfree(c->tty_path);
3511 c->syslog_identifier = mfree(c->syslog_identifier);
3512 c->user = mfree(c->user);
3513 c->group = mfree(c->group);
034c6ed7 3514
6796073e 3515 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3516
a1e58e8e 3517 c->pam_name = mfree(c->pam_name);
5b6319dc 3518
2a624c36
AP
3519 c->read_only_paths = strv_free(c->read_only_paths);
3520 c->read_write_paths = strv_free(c->read_write_paths);
3521 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3522
d2d6c096
LP
3523 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
3524
82c121a4
LP
3525 if (c->cpuset)
3526 CPU_FREE(c->cpuset);
86a3475b 3527
a1e58e8e
LP
3528 c->utmp_id = mfree(c->utmp_id);
3529 c->selinux_context = mfree(c->selinux_context);
3530 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3531 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3532
8cfa775f 3533 c->syscall_filter = hashmap_free(c->syscall_filter);
525d3cc7
LP
3534 c->syscall_archs = set_free(c->syscall_archs);
3535 c->address_families = set_free(c->address_families);
e66cf1a3 3536
72fd1768 3537 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3536f49e 3538 c->directories[i].paths = strv_free(c->directories[i].paths);
d3070fbd
LP
3539
3540 c->log_level_max = -1;
3541
3542 exec_context_free_log_extra_fields(c);
e66cf1a3
LP
3543}
3544
3545int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
3546 char **i;
3547
3548 assert(c);
3549
3550 if (!runtime_prefix)
3551 return 0;
3552
3536f49e 3553 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
3554 _cleanup_free_ char *p;
3555
605405c6 3556 p = strjoin(runtime_prefix, "/", *i);
e66cf1a3
LP
3557 if (!p)
3558 return -ENOMEM;
3559
6c47cd7d 3560 /* We execute this synchronously, since we need to be sure this is gone when we start the service
e66cf1a3 3561 * next. */
c6878637 3562 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
3563 }
3564
3565 return 0;
5cb5a6ff
LP
3566}
3567
43d0fcbd
LP
3568void exec_command_done(ExecCommand *c) {
3569 assert(c);
3570
a1e58e8e 3571 c->path = mfree(c->path);
43d0fcbd 3572
6796073e 3573 c->argv = strv_free(c->argv);
43d0fcbd
LP
3574}
3575
3576void exec_command_done_array(ExecCommand *c, unsigned n) {
3577 unsigned i;
3578
3579 for (i = 0; i < n; i++)
3580 exec_command_done(c+i);
3581}
3582
f1acf85a 3583ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
3584 ExecCommand *i;
3585
3586 while ((i = c)) {
71fda00f 3587 LIST_REMOVE(command, c, i);
43d0fcbd 3588 exec_command_done(i);
5cb5a6ff
LP
3589 free(i);
3590 }
f1acf85a
ZJS
3591
3592 return NULL;
5cb5a6ff
LP
3593}
3594
034c6ed7
LP
3595void exec_command_free_array(ExecCommand **c, unsigned n) {
3596 unsigned i;
3597
f1acf85a
ZJS
3598 for (i = 0; i < n; i++)
3599 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
3600}
3601
039f0e70 3602typedef struct InvalidEnvInfo {
f2341e0a 3603 Unit *unit;
039f0e70
LP
3604 const char *path;
3605} InvalidEnvInfo;
3606
3607static void invalid_env(const char *p, void *userdata) {
3608 InvalidEnvInfo *info = userdata;
3609
f2341e0a 3610 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
3611}
3612
52c239d7
LB
3613const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3614 assert(c);
3615
3616 switch (fd_index) {
3617 case STDIN_FILENO:
3618 if (c->std_input != EXEC_INPUT_NAMED_FD)
3619 return NULL;
3620 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
3621 case STDOUT_FILENO:
3622 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3623 return NULL;
3624 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
3625 case STDERR_FILENO:
3626 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3627 return NULL;
3628 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
3629 default:
3630 return NULL;
3631 }
3632}
3633
3634int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
3635 unsigned i, targets;
56fbd561 3636 const char* stdio_fdname[3];
4c47affc 3637 unsigned n_fds;
52c239d7
LB
3638
3639 assert(c);
3640 assert(p);
3641
3642 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3643 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3644 (c->std_error == EXEC_OUTPUT_NAMED_FD);
3645
3646 for (i = 0; i < 3; i++)
3647 stdio_fdname[i] = exec_context_fdname(c, i);
3648
4c47affc
FB
3649 n_fds = p->n_storage_fds + p->n_socket_fds;
3650
3651 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
3652 if (named_iofds[STDIN_FILENO] < 0 &&
3653 c->std_input == EXEC_INPUT_NAMED_FD &&
3654 stdio_fdname[STDIN_FILENO] &&
3655 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3656
52c239d7
LB
3657 named_iofds[STDIN_FILENO] = p->fds[i];
3658 targets--;
56fbd561
ZJS
3659
3660 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3661 c->std_output == EXEC_OUTPUT_NAMED_FD &&
3662 stdio_fdname[STDOUT_FILENO] &&
3663 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3664
52c239d7
LB
3665 named_iofds[STDOUT_FILENO] = p->fds[i];
3666 targets--;
56fbd561
ZJS
3667
3668 } else if (named_iofds[STDERR_FILENO] < 0 &&
3669 c->std_error == EXEC_OUTPUT_NAMED_FD &&
3670 stdio_fdname[STDERR_FILENO] &&
3671 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3672
52c239d7
LB
3673 named_iofds[STDERR_FILENO] = p->fds[i];
3674 targets--;
3675 }
3676
56fbd561 3677 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
3678}
3679
f2341e0a 3680int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
3681 char **i, **r = NULL;
3682
3683 assert(c);
3684 assert(l);
3685
3686 STRV_FOREACH(i, c->environment_files) {
3687 char *fn;
52511fae
ZJS
3688 int k;
3689 unsigned n;
8c7be95e
LP
3690 bool ignore = false;
3691 char **p;
7fd1b19b 3692 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
3693
3694 fn = *i;
3695
3696 if (fn[0] == '-') {
3697 ignore = true;
313cefa1 3698 fn++;
8c7be95e
LP
3699 }
3700
3701 if (!path_is_absolute(fn)) {
8c7be95e
LP
3702 if (ignore)
3703 continue;
3704
3705 strv_free(r);
3706 return -EINVAL;
3707 }
3708
2bef10ab 3709 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
3710 k = safe_glob(fn, 0, &pglob);
3711 if (k < 0) {
2bef10ab
PL
3712 if (ignore)
3713 continue;
8c7be95e 3714
2bef10ab 3715 strv_free(r);
d8c92e8b 3716 return k;
2bef10ab 3717 }
8c7be95e 3718
d8c92e8b
ZJS
3719 /* When we don't match anything, -ENOENT should be returned */
3720 assert(pglob.gl_pathc > 0);
3721
3722 for (n = 0; n < pglob.gl_pathc; n++) {
717603e3 3723 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2bef10ab
PL
3724 if (k < 0) {
3725 if (ignore)
3726 continue;
8c7be95e 3727
2bef10ab 3728 strv_free(r);
2bef10ab 3729 return k;
e9c1ea9d 3730 }
ebc05a09 3731 /* Log invalid environment variables with filename */
039f0e70
LP
3732 if (p) {
3733 InvalidEnvInfo info = {
f2341e0a 3734 .unit = unit,
039f0e70
LP
3735 .path = pglob.gl_pathv[n]
3736 };
3737
3738 p = strv_env_clean_with_callback(p, invalid_env, &info);
3739 }
8c7be95e 3740
2bef10ab
PL
3741 if (r == NULL)
3742 r = p;
3743 else {
3744 char **m;
8c7be95e 3745
2bef10ab
PL
3746 m = strv_env_merge(2, r, p);
3747 strv_free(r);
3748 strv_free(p);
c84a9488 3749 if (!m)
2bef10ab 3750 return -ENOMEM;
2bef10ab
PL
3751
3752 r = m;
3753 }
8c7be95e
LP
3754 }
3755 }
3756
3757 *l = r;
3758
3759 return 0;
3760}
3761
6ac8fdc9 3762static bool tty_may_match_dev_console(const char *tty) {
e1d75803 3763 _cleanup_free_ char *active = NULL;
7d6884b6 3764 char *console;
6ac8fdc9 3765
1e22b5cd
LP
3766 if (!tty)
3767 return true;
3768
a119ec7c 3769 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
3770
3771 /* trivial identity? */
3772 if (streq(tty, "console"))
3773 return true;
3774
3775 console = resolve_dev_console(&active);
3776 /* if we could not resolve, assume it may */
3777 if (!console)
3778 return true;
3779
3780 /* "tty0" means the active VC, so it may be the same sometimes */
e1d75803 3781 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
3782}
3783
3784bool exec_context_may_touch_console(ExecContext *ec) {
1e22b5cd
LP
3785
3786 return (ec->tty_reset ||
3787 ec->tty_vhangup ||
3788 ec->tty_vt_disallocate ||
6ac8fdc9
MS
3789 is_terminal_input(ec->std_input) ||
3790 is_terminal_output(ec->std_output) ||
3791 is_terminal_output(ec->std_error)) &&
1e22b5cd 3792 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
3793}
3794
15ae422b
LP
3795static void strv_fprintf(FILE *f, char **l) {
3796 char **g;
3797
3798 assert(f);
3799
3800 STRV_FOREACH(g, l)
3801 fprintf(f, " %s", *g);
3802}
3803
5cb5a6ff 3804void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
d3070fbd 3805 ExecDirectoryType dt;
c2bbd90b 3806 char **e, **d;
94f04347 3807 unsigned i;
add00535 3808 int r;
9eba9da4 3809
5cb5a6ff
LP
3810 assert(c);
3811 assert(f);
3812
4ad49000 3813 prefix = strempty(prefix);
5cb5a6ff
LP
3814
3815 fprintf(f,
94f04347
LP
3816 "%sUMask: %04o\n"
3817 "%sWorkingDirectory: %s\n"
451a074f 3818 "%sRootDirectory: %s\n"
15ae422b 3819 "%sNonBlocking: %s\n"
64747e2d 3820 "%sPrivateTmp: %s\n"
7f112f50 3821 "%sPrivateDevices: %s\n"
59eeb84b 3822 "%sProtectKernelTunables: %s\n"
e66a2f65 3823 "%sProtectKernelModules: %s\n"
59eeb84b 3824 "%sProtectControlGroups: %s\n"
d251207d
LP
3825 "%sPrivateNetwork: %s\n"
3826 "%sPrivateUsers: %s\n"
1b8689f9
LP
3827 "%sProtectHome: %s\n"
3828 "%sProtectSystem: %s\n"
5d997827 3829 "%sMountAPIVFS: %s\n"
f3e43635 3830 "%sIgnoreSIGPIPE: %s\n"
f4170c67 3831 "%sMemoryDenyWriteExecute: %s\n"
b1edf445
LP
3832 "%sRestrictRealtime: %s\n"
3833 "%sKeyringMode: %s\n",
5cb5a6ff 3834 prefix, c->umask,
9eba9da4 3835 prefix, c->working_directory ? c->working_directory : "/",
451a074f 3836 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 3837 prefix, yes_no(c->non_blocking),
64747e2d 3838 prefix, yes_no(c->private_tmp),
7f112f50 3839 prefix, yes_no(c->private_devices),
59eeb84b 3840 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 3841 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 3842 prefix, yes_no(c->protect_control_groups),
d251207d
LP
3843 prefix, yes_no(c->private_network),
3844 prefix, yes_no(c->private_users),
1b8689f9
LP
3845 prefix, protect_home_to_string(c->protect_home),
3846 prefix, protect_system_to_string(c->protect_system),
5d997827 3847 prefix, yes_no(c->mount_apivfs),
f3e43635 3848 prefix, yes_no(c->ignore_sigpipe),
f4170c67 3849 prefix, yes_no(c->memory_deny_write_execute),
b1edf445
LP
3850 prefix, yes_no(c->restrict_realtime),
3851 prefix, exec_keyring_mode_to_string(c->keyring_mode));
fb33a393 3852
915e6d16
LP
3853 if (c->root_image)
3854 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
3855
8c7be95e
LP
3856 STRV_FOREACH(e, c->environment)
3857 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3858
3859 STRV_FOREACH(e, c->environment_files)
3860 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 3861
b4c14404
FB
3862 STRV_FOREACH(e, c->pass_environment)
3863 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3864
00819cc1
LP
3865 STRV_FOREACH(e, c->unset_environment)
3866 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
3867
53f47dfc
YW
3868 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
3869
72fd1768 3870 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3536f49e
YW
3871 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
3872
3873 STRV_FOREACH(d, c->directories[dt].paths)
3874 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
3875 }
c2bbd90b 3876
fb33a393
LP
3877 if (c->nice_set)
3878 fprintf(f,
3879 "%sNice: %i\n",
3880 prefix, c->nice);
3881
dd6c17b1 3882 if (c->oom_score_adjust_set)
fb33a393 3883 fprintf(f,
dd6c17b1
LP
3884 "%sOOMScoreAdjust: %i\n",
3885 prefix, c->oom_score_adjust);
9eba9da4 3886
94f04347 3887 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d
EV
3888 if (c->rlimit[i]) {
3889 fprintf(f, "%s%s: " RLIM_FMT "\n",
3890 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
3891 fprintf(f, "%s%sSoft: " RLIM_FMT "\n",
3892 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3893 }
94f04347 3894
f8b69d1d 3895 if (c->ioprio_set) {
1756a011 3896 _cleanup_free_ char *class_str = NULL;
f8b69d1d 3897
837df140
YW
3898 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
3899 if (r >= 0)
3900 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
3901
3902 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 3903 }
94f04347 3904
f8b69d1d 3905 if (c->cpu_sched_set) {
1756a011 3906 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 3907
837df140
YW
3908 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
3909 if (r >= 0)
3910 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
3911
94f04347 3912 fprintf(f,
38b48754
LP
3913 "%sCPUSchedulingPriority: %i\n"
3914 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
3915 prefix, c->cpu_sched_priority,
3916 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 3917 }
94f04347 3918
82c121a4 3919 if (c->cpuset) {
94f04347 3920 fprintf(f, "%sCPUAffinity:", prefix);
82c121a4
LP
3921 for (i = 0; i < c->cpuset_ncpus; i++)
3922 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
43a99a7a 3923 fprintf(f, " %u", i);
94f04347
LP
3924 fputs("\n", f);
3925 }
3926
3a43da28 3927 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 3928 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
3929
3930 fprintf(f,
80876c20
LP
3931 "%sStandardInput: %s\n"
3932 "%sStandardOutput: %s\n"
3933 "%sStandardError: %s\n",
3934 prefix, exec_input_to_string(c->std_input),
3935 prefix, exec_output_to_string(c->std_output),
3936 prefix, exec_output_to_string(c->std_error));
3937
3938 if (c->tty_path)
3939 fprintf(f,
6ea832a2
LP
3940 "%sTTYPath: %s\n"
3941 "%sTTYReset: %s\n"
3942 "%sTTYVHangup: %s\n"
3943 "%sTTYVTDisallocate: %s\n",
3944 prefix, c->tty_path,
3945 prefix, yes_no(c->tty_reset),
3946 prefix, yes_no(c->tty_vhangup),
3947 prefix, yes_no(c->tty_vt_disallocate));
94f04347 3948
9f6444eb
LP
3949 if (IN_SET(c->std_output,
3950 EXEC_OUTPUT_SYSLOG,
3951 EXEC_OUTPUT_KMSG,
3952 EXEC_OUTPUT_JOURNAL,
3953 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3954 EXEC_OUTPUT_KMSG_AND_CONSOLE,
3955 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
3956 IN_SET(c->std_error,
3957 EXEC_OUTPUT_SYSLOG,
3958 EXEC_OUTPUT_KMSG,
3959 EXEC_OUTPUT_JOURNAL,
3960 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3961 EXEC_OUTPUT_KMSG_AND_CONSOLE,
3962 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 3963
5ce70e5b 3964 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 3965
837df140
YW
3966 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
3967 if (r >= 0)
3968 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 3969
837df140
YW
3970 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
3971 if (r >= 0)
3972 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 3973 }
94f04347 3974
d3070fbd
LP
3975 if (c->log_level_max >= 0) {
3976 _cleanup_free_ char *t = NULL;
3977
3978 (void) log_level_to_string_alloc(c->log_level_max, &t);
3979
3980 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
3981 }
3982
3983 if (c->n_log_extra_fields > 0) {
3984 size_t j;
3985
3986 for (j = 0; j < c->n_log_extra_fields; j++) {
3987 fprintf(f, "%sLogExtraFields: ", prefix);
3988 fwrite(c->log_extra_fields[j].iov_base,
3989 1, c->log_extra_fields[j].iov_len,
3990 f);
3991 fputc('\n', f);
3992 }
3993 }
3994
07d46372
YW
3995 if (c->secure_bits) {
3996 _cleanup_free_ char *str = NULL;
3997
3998 r = secure_bits_to_string_alloc(c->secure_bits, &str);
3999 if (r >= 0)
4000 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4001 }
94f04347 4002
a103496c 4003 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 4004 _cleanup_free_ char *str = NULL;
94f04347 4005
dd1f5bd0
YW
4006 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4007 if (r >= 0)
4008 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
4009 }
4010
4011 if (c->capability_ambient_set != 0) {
dd1f5bd0 4012 _cleanup_free_ char *str = NULL;
755d4b67 4013
dd1f5bd0
YW
4014 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4015 if (r >= 0)
4016 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
4017 }
4018
4019 if (c->user)
f2d3769a 4020 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 4021 if (c->group)
f2d3769a 4022 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 4023
29206d46
LP
4024 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4025
ac6e8be6 4026 if (!strv_isempty(c->supplementary_groups)) {
94f04347 4027 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
4028 strv_fprintf(f, c->supplementary_groups);
4029 fputs("\n", f);
4030 }
94f04347 4031
5b6319dc 4032 if (c->pam_name)
f2d3769a 4033 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 4034
2a624c36
AP
4035 if (strv_length(c->read_write_paths) > 0) {
4036 fprintf(f, "%sReadWritePaths:", prefix);
4037 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
4038 fputs("\n", f);
4039 }
4040
2a624c36
AP
4041 if (strv_length(c->read_only_paths) > 0) {
4042 fprintf(f, "%sReadOnlyPaths:", prefix);
4043 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
4044 fputs("\n", f);
4045 }
94f04347 4046
2a624c36
AP
4047 if (strv_length(c->inaccessible_paths) > 0) {
4048 fprintf(f, "%sInaccessiblePaths:", prefix);
4049 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
4050 fputs("\n", f);
4051 }
2e22afe9 4052
d2d6c096
LP
4053 if (c->n_bind_mounts > 0)
4054 for (i = 0; i < c->n_bind_mounts; i++) {
4055 fprintf(f, "%s%s: %s:%s:%s\n", prefix,
4056 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4057 c->bind_mounts[i].source,
4058 c->bind_mounts[i].destination,
4059 c->bind_mounts[i].recursive ? "rbind" : "norbind");
4060 }
4061
169c1bda
LP
4062 if (c->utmp_id)
4063 fprintf(f,
4064 "%sUtmpIdentifier: %s\n",
4065 prefix, c->utmp_id);
7b52a628
MS
4066
4067 if (c->selinux_context)
4068 fprintf(f,
5f8640fb
LP
4069 "%sSELinuxContext: %s%s\n",
4070 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 4071
80c21aea
WC
4072 if (c->apparmor_profile)
4073 fprintf(f,
4074 "%sAppArmorProfile: %s%s\n",
4075 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4076
4077 if (c->smack_process_label)
4078 fprintf(f,
4079 "%sSmackProcessLabel: %s%s\n",
4080 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4081
050f7277 4082 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
4083 fprintf(f,
4084 "%sPersonality: %s\n",
4085 prefix, strna(personality_to_string(c->personality)));
4086
78e864e5
TM
4087 fprintf(f,
4088 "%sLockPersonality: %s\n",
4089 prefix, yes_no(c->lock_personality));
4090
17df7223 4091 if (c->syscall_filter) {
349cc4a5 4092#if HAVE_SECCOMP
17df7223 4093 Iterator j;
8cfa775f 4094 void *id, *val;
17df7223 4095 bool first = true;
351a19b1 4096#endif
17df7223
LP
4097
4098 fprintf(f,
57183d11 4099 "%sSystemCallFilter: ",
17df7223
LP
4100 prefix);
4101
4102 if (!c->syscall_whitelist)
4103 fputc('~', f);
4104
349cc4a5 4105#if HAVE_SECCOMP
8cfa775f 4106 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
17df7223 4107 _cleanup_free_ char *name = NULL;
8cfa775f
YW
4108 const char *errno_name = NULL;
4109 int num = PTR_TO_INT(val);
17df7223
LP
4110
4111 if (first)
4112 first = false;
4113 else
4114 fputc(' ', f);
4115
57183d11 4116 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223 4117 fputs(strna(name), f);
8cfa775f
YW
4118
4119 if (num >= 0) {
4120 errno_name = errno_to_name(num);
4121 if (errno_name)
4122 fprintf(f, ":%s", errno_name);
4123 else
4124 fprintf(f, ":%d", num);
4125 }
17df7223 4126 }
351a19b1 4127#endif
17df7223
LP
4128
4129 fputc('\n', f);
4130 }
4131
57183d11 4132 if (c->syscall_archs) {
349cc4a5 4133#if HAVE_SECCOMP
57183d11
LP
4134 Iterator j;
4135 void *id;
4136#endif
4137
4138 fprintf(f,
4139 "%sSystemCallArchitectures:",
4140 prefix);
4141
349cc4a5 4142#if HAVE_SECCOMP
57183d11
LP
4143 SET_FOREACH(id, c->syscall_archs, j)
4144 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4145#endif
4146 fputc('\n', f);
4147 }
4148
add00535
LP
4149 if (exec_context_restrict_namespaces_set(c)) {
4150 _cleanup_free_ char *s = NULL;
4151
4152 r = namespace_flag_to_string_many(c->restrict_namespaces, &s);
4153 if (r >= 0)
4154 fprintf(f, "%sRestrictNamespaces: %s\n",
4155 prefix, s);
4156 }
4157
3df90f24
YW
4158 if (c->syscall_errno > 0) {
4159 const char *errno_name;
4160
4161 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4162
4163 errno_name = errno_to_name(c->syscall_errno);
4164 if (errno_name)
4165 fprintf(f, "%s\n", errno_name);
4166 else
4167 fprintf(f, "%d\n", c->syscall_errno);
4168 }
eef65bf3
MS
4169
4170 if (c->apparmor_profile)
4171 fprintf(f,
4172 "%sAppArmorProfile: %s%s\n",
4173 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
5cb5a6ff
LP
4174}
4175
a931ad47
LP
4176bool exec_context_maintains_privileges(ExecContext *c) {
4177 assert(c);
4178
61233823 4179 /* Returns true if the process forked off would run under
a931ad47
LP
4180 * an unchanged UID or as root. */
4181
4182 if (!c->user)
4183 return true;
4184
4185 if (streq(c->user, "root") || streq(c->user, "0"))
4186 return true;
4187
4188 return false;
4189}
4190
7f452159
LP
4191int exec_context_get_effective_ioprio(ExecContext *c) {
4192 int p;
4193
4194 assert(c);
4195
4196 if (c->ioprio_set)
4197 return c->ioprio;
4198
4199 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4200 if (p < 0)
4201 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4202
4203 return p;
4204}
4205
d3070fbd
LP
4206void exec_context_free_log_extra_fields(ExecContext *c) {
4207 size_t l;
4208
4209 assert(c);
4210
4211 for (l = 0; l < c->n_log_extra_fields; l++)
4212 free(c->log_extra_fields[l].iov_base);
4213 c->log_extra_fields = mfree(c->log_extra_fields);
4214 c->n_log_extra_fields = 0;
4215}
4216
b58b4116 4217void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 4218 assert(s);
5cb5a6ff 4219
b58b4116
LP
4220 zero(*s);
4221 s->pid = pid;
4222 dual_timestamp_get(&s->start_timestamp);
4223}
4224
6ea832a2 4225void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
4226 assert(s);
4227
0b1f4ae6 4228 if (s->pid && s->pid != pid)
b58b4116
LP
4229 zero(*s);
4230
034c6ed7 4231 s->pid = pid;
63983207 4232 dual_timestamp_get(&s->exit_timestamp);
9fb86720 4233
034c6ed7
LP
4234 s->code = code;
4235 s->status = status;
169c1bda 4236
6ea832a2
LP
4237 if (context) {
4238 if (context->utmp_id)
4239 utmp_put_dead_process(context->utmp_id, pid, code, status);
4240
1e22b5cd 4241 exec_context_tty_reset(context, NULL);
6ea832a2 4242 }
9fb86720
LP
4243}
4244
4245void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
4246 char buf[FORMAT_TIMESTAMP_MAX];
4247
4248 assert(s);
4249 assert(f);
4250
9fb86720
LP
4251 if (s->pid <= 0)
4252 return;
4253
4c940960
LP
4254 prefix = strempty(prefix);
4255
9fb86720 4256 fprintf(f,
ccd06097
ZJS
4257 "%sPID: "PID_FMT"\n",
4258 prefix, s->pid);
9fb86720 4259
af9d16e1 4260 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
4261 fprintf(f,
4262 "%sStart Timestamp: %s\n",
63983207 4263 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 4264
af9d16e1 4265 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
4266 fprintf(f,
4267 "%sExit Timestamp: %s\n"
4268 "%sExit Code: %s\n"
4269 "%sExit Status: %i\n",
63983207 4270 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
4271 prefix, sigchld_code_to_string(s->code),
4272 prefix, s->status);
5cb5a6ff 4273}
44d8db9e 4274
9e2f7c11 4275char *exec_command_line(char **argv) {
44d8db9e
LP
4276 size_t k;
4277 char *n, *p, **a;
4278 bool first = true;
4279
9e2f7c11 4280 assert(argv);
44d8db9e 4281
9164977d 4282 k = 1;
9e2f7c11 4283 STRV_FOREACH(a, argv)
44d8db9e
LP
4284 k += strlen(*a)+3;
4285
5cd9cd35
LP
4286 n = new(char, k);
4287 if (!n)
44d8db9e
LP
4288 return NULL;
4289
4290 p = n;
9e2f7c11 4291 STRV_FOREACH(a, argv) {
44d8db9e
LP
4292
4293 if (!first)
4294 *(p++) = ' ';
4295 else
4296 first = false;
4297
4298 if (strpbrk(*a, WHITESPACE)) {
4299 *(p++) = '\'';
4300 p = stpcpy(p, *a);
4301 *(p++) = '\'';
4302 } else
4303 p = stpcpy(p, *a);
4304
4305 }
4306
9164977d
LP
4307 *p = 0;
4308
44d8db9e
LP
4309 /* FIXME: this doesn't really handle arguments that have
4310 * spaces and ticks in them */
4311
4312 return n;
4313}
4314
4315void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 4316 _cleanup_free_ char *cmd = NULL;
4c940960 4317 const char *prefix2;
44d8db9e
LP
4318
4319 assert(c);
4320 assert(f);
4321
4c940960 4322 prefix = strempty(prefix);
63c372cb 4323 prefix2 = strjoina(prefix, "\t");
44d8db9e 4324
9e2f7c11 4325 cmd = exec_command_line(c->argv);
44d8db9e
LP
4326 fprintf(f,
4327 "%sCommand Line: %s\n",
4328 prefix, cmd ? cmd : strerror(ENOMEM));
4329
9fb86720 4330 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
4331}
4332
4333void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4334 assert(f);
4335
4c940960 4336 prefix = strempty(prefix);
44d8db9e
LP
4337
4338 LIST_FOREACH(command, c, c)
4339 exec_command_dump(c, f, prefix);
4340}
94f04347 4341
a6a80b4f
LP
4342void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4343 ExecCommand *end;
4344
4345 assert(l);
4346 assert(e);
4347
4348 if (*l) {
35b8ca3a 4349 /* It's kind of important, that we keep the order here */
71fda00f
LP
4350 LIST_FIND_TAIL(command, *l, end);
4351 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
4352 } else
4353 *l = e;
4354}
4355
26fd040d
LP
4356int exec_command_set(ExecCommand *c, const char *path, ...) {
4357 va_list ap;
4358 char **l, *p;
4359
4360 assert(c);
4361 assert(path);
4362
4363 va_start(ap, path);
4364 l = strv_new_ap(path, ap);
4365 va_end(ap);
4366
4367 if (!l)
4368 return -ENOMEM;
4369
250a918d
LP
4370 p = strdup(path);
4371 if (!p) {
26fd040d
LP
4372 strv_free(l);
4373 return -ENOMEM;
4374 }
4375
4376 free(c->path);
4377 c->path = p;
4378
4379 strv_free(c->argv);
4380 c->argv = l;
4381
4382 return 0;
4383}
4384
86b23b07 4385int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 4386 _cleanup_strv_free_ char **l = NULL;
86b23b07 4387 va_list ap;
86b23b07
JS
4388 int r;
4389
4390 assert(c);
4391 assert(path);
4392
4393 va_start(ap, path);
4394 l = strv_new_ap(path, ap);
4395 va_end(ap);
4396
4397 if (!l)
4398 return -ENOMEM;
4399
e287086b 4400 r = strv_extend_strv(&c->argv, l, false);
e63ff941 4401 if (r < 0)
86b23b07 4402 return r;
86b23b07
JS
4403
4404 return 0;
4405}
4406
4407
613b411c
LP
4408static int exec_runtime_allocate(ExecRuntime **rt) {
4409
4410 if (*rt)
4411 return 0;
4412
4413 *rt = new0(ExecRuntime, 1);
f146f5e1 4414 if (!*rt)
613b411c
LP
4415 return -ENOMEM;
4416
4417 (*rt)->n_ref = 1;
4418 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
4419
4420 return 0;
4421}
4422
4423int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
4424 int r;
4425
4426 assert(rt);
4427 assert(c);
4428 assert(id);
4429
4430 if (*rt)
4431 return 1;
4432
4433 if (!c->private_network && !c->private_tmp)
4434 return 0;
4435
4436 r = exec_runtime_allocate(rt);
4437 if (r < 0)
4438 return r;
4439
4440 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
33df919d 4441 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
613b411c
LP
4442 return -errno;
4443 }
4444
4445 if (c->private_tmp && !(*rt)->tmp_dir) {
4446 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
4447 if (r < 0)
4448 return r;
4449 }
4450
4451 return 1;
4452}
4453
4454ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
4455 assert(r);
4456 assert(r->n_ref > 0);
4457
4458 r->n_ref++;
4459 return r;
4460}
4461
4462ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
4463
4464 if (!r)
4465 return NULL;
4466
4467 assert(r->n_ref > 0);
4468
4469 r->n_ref--;
f2341e0a
LP
4470 if (r->n_ref > 0)
4471 return NULL;
4472
4473 free(r->tmp_dir);
4474 free(r->var_tmp_dir);
4475 safe_close_pair(r->netns_storage_socket);
6b430fdb 4476 return mfree(r);
613b411c
LP
4477}
4478
f2341e0a 4479int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
613b411c
LP
4480 assert(u);
4481 assert(f);
4482 assert(fds);
4483
4484 if (!rt)
4485 return 0;
4486
4487 if (rt->tmp_dir)
4488 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
4489
4490 if (rt->var_tmp_dir)
4491 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
4492
4493 if (rt->netns_storage_socket[0] >= 0) {
4494 int copy;
4495
4496 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4497 if (copy < 0)
4498 return copy;
4499
4500 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
4501 }
4502
4503 if (rt->netns_storage_socket[1] >= 0) {
4504 int copy;
4505
4506 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4507 if (copy < 0)
4508 return copy;
4509
4510 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
4511 }
4512
4513 return 0;
4514}
4515
f2341e0a 4516int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
613b411c
LP
4517 int r;
4518
4519 assert(rt);
4520 assert(key);
4521 assert(value);
4522
4523 if (streq(key, "tmp-dir")) {
4524 char *copy;
4525
4526 r = exec_runtime_allocate(rt);
4527 if (r < 0)
f2341e0a 4528 return log_oom();
613b411c
LP
4529
4530 copy = strdup(value);
4531 if (!copy)
4532 return log_oom();
4533
4534 free((*rt)->tmp_dir);
4535 (*rt)->tmp_dir = copy;
4536
4537 } else if (streq(key, "var-tmp-dir")) {
4538 char *copy;
4539
4540 r = exec_runtime_allocate(rt);
4541 if (r < 0)
f2341e0a 4542 return log_oom();
613b411c
LP
4543
4544 copy = strdup(value);
4545 if (!copy)
4546 return log_oom();
4547
4548 free((*rt)->var_tmp_dir);
4549 (*rt)->var_tmp_dir = copy;
4550
4551 } else if (streq(key, "netns-socket-0")) {
4552 int fd;
4553
4554 r = exec_runtime_allocate(rt);
4555 if (r < 0)
f2341e0a 4556 return log_oom();
613b411c
LP
4557
4558 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 4559 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 4560 else {
03e334a1 4561 safe_close((*rt)->netns_storage_socket[0]);
613b411c
LP
4562 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
4563 }
4564 } else if (streq(key, "netns-socket-1")) {
4565 int fd;
4566
4567 r = exec_runtime_allocate(rt);
4568 if (r < 0)
f2341e0a 4569 return log_oom();
613b411c
LP
4570
4571 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 4572 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 4573 else {
03e334a1 4574 safe_close((*rt)->netns_storage_socket[1]);
613b411c
LP
4575 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
4576 }
4577 } else
4578 return 0;
4579
4580 return 1;
4581}
4582
4583static void *remove_tmpdir_thread(void *p) {
4584 _cleanup_free_ char *path = p;
4585
c6878637 4586 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
613b411c
LP
4587 return NULL;
4588}
4589
4590void exec_runtime_destroy(ExecRuntime *rt) {
98b47d54
LP
4591 int r;
4592
613b411c
LP
4593 if (!rt)
4594 return;
4595
4596 /* If there are multiple users of this, let's leave the stuff around */
4597 if (rt->n_ref > 1)
4598 return;
4599
4600 if (rt->tmp_dir) {
4601 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
98b47d54
LP
4602
4603 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4604 if (r < 0) {
da927ba9 4605 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
98b47d54
LP
4606 free(rt->tmp_dir);
4607 }
4608
613b411c
LP
4609 rt->tmp_dir = NULL;
4610 }
4611
4612 if (rt->var_tmp_dir) {
4613 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
98b47d54
LP
4614
4615 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4616 if (r < 0) {
da927ba9 4617 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
98b47d54
LP
4618 free(rt->var_tmp_dir);
4619 }
4620
613b411c
LP
4621 rt->var_tmp_dir = NULL;
4622 }
4623
3d94f76c 4624 safe_close_pair(rt->netns_storage_socket);
613b411c
LP
4625}
4626
80876c20
LP
4627static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
4628 [EXEC_INPUT_NULL] = "null",
4629 [EXEC_INPUT_TTY] = "tty",
4630 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 4631 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
4632 [EXEC_INPUT_SOCKET] = "socket",
4633 [EXEC_INPUT_NAMED_FD] = "fd",
80876c20
LP
4634};
4635
8a0867d6
LP
4636DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4637
94f04347 4638static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 4639 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 4640 [EXEC_OUTPUT_NULL] = "null",
80876c20 4641 [EXEC_OUTPUT_TTY] = "tty",
94f04347 4642 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 4643 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 4644 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 4645 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
4646 [EXEC_OUTPUT_JOURNAL] = "journal",
4647 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
4648 [EXEC_OUTPUT_SOCKET] = "socket",
4649 [EXEC_OUTPUT_NAMED_FD] = "fd",
94f04347
LP
4650};
4651
4652DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
4653
4654static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4655 [EXEC_UTMP_INIT] = "init",
4656 [EXEC_UTMP_LOGIN] = "login",
4657 [EXEC_UTMP_USER] = "user",
4658};
4659
4660DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
4661
4662static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
4663 [EXEC_PRESERVE_NO] = "no",
4664 [EXEC_PRESERVE_YES] = "yes",
4665 [EXEC_PRESERVE_RESTART] = "restart",
4666};
4667
4668DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e 4669
72fd1768 4670static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
3536f49e
YW
4671 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
4672 [EXEC_DIRECTORY_STATE] = "StateDirectory",
4673 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
4674 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
4675 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
4676};
4677
4678DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445
LP
4679
4680static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
4681 [EXEC_KEYRING_INHERIT] = "inherit",
4682 [EXEC_KEYRING_PRIVATE] = "private",
4683 [EXEC_KEYRING_SHARED] = "shared",
4684};
4685
4686DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);