]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
util-lib: wrap personality() to fix up broken glibc error handling (#6766)
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
a7334b09
LP
1/***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
a7334b09
LP
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 14 Lesser General Public License for more details.
a7334b09 15
5430f7f2 16 You should have received a copy of the GNU Lesser General Public License
a7334b09
LP
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
034c6ed7
LP
20#include <errno.h>
21#include <fcntl.h>
8dd4c05b
LP
22#include <glob.h>
23#include <grp.h>
24#include <poll.h>
309bff19 25#include <signal.h>
8dd4c05b 26#include <string.h>
19c0b0b9 27#include <sys/capability.h>
d251207d 28#include <sys/eventfd.h>
f3e43635 29#include <sys/mman.h>
8dd4c05b 30#include <sys/personality.h>
94f04347 31#include <sys/prctl.h>
d2ffa389 32#include <sys/shm.h>
8dd4c05b 33#include <sys/socket.h>
451a074f 34#include <sys/stat.h>
d2ffa389 35#include <sys/types.h>
8dd4c05b
LP
36#include <sys/un.h>
37#include <unistd.h>
023a4f67 38#include <utmpx.h>
5cb5a6ff 39
5b6319dc
LP
40#ifdef HAVE_PAM
41#include <security/pam_appl.h>
42#endif
43
7b52a628
MS
44#ifdef HAVE_SELINUX
45#include <selinux/selinux.h>
46#endif
47
17df7223
LP
48#ifdef HAVE_SECCOMP
49#include <seccomp.h>
50#endif
51
eef65bf3
MS
52#ifdef HAVE_APPARMOR
53#include <sys/apparmor.h>
54#endif
55
24882e06 56#include "sd-messages.h"
8dd4c05b
LP
57
58#include "af-list.h"
b5efdb8a 59#include "alloc-util.h"
3ffd4af2
LP
60#ifdef HAVE_APPARMOR
61#include "apparmor-util.h"
62#endif
8dd4c05b
LP
63#include "async.h"
64#include "barrier.h"
8dd4c05b 65#include "cap-list.h"
430f0182 66#include "capability-util.h"
f6a6225e 67#include "def.h"
4d1a6904 68#include "env-util.h"
17df7223 69#include "errno-list.h"
3ffd4af2 70#include "execute.h"
8dd4c05b 71#include "exit-status.h"
3ffd4af2 72#include "fd-util.h"
8dd4c05b 73#include "fileio.h"
f97b34a6 74#include "format-util.h"
f4f15635 75#include "fs-util.h"
7d50b32a 76#include "glob-util.h"
c004493c 77#include "io-util.h"
8dd4c05b
LP
78#include "ioprio.h"
79#include "log.h"
80#include "macro.h"
81#include "missing.h"
82#include "mkdir.h"
83#include "namespace.h"
6bedfcbb 84#include "parse-util.h"
8dd4c05b 85#include "path-util.h"
0b452006 86#include "process-util.h"
78f22b97 87#include "rlimit-util.h"
8dd4c05b 88#include "rm-rf.h"
3ffd4af2
LP
89#ifdef HAVE_SECCOMP
90#include "seccomp-util.h"
91#endif
8dd4c05b 92#include "securebits.h"
07d46372 93#include "securebits-util.h"
8dd4c05b 94#include "selinux-util.h"
24882e06 95#include "signal-util.h"
8dd4c05b 96#include "smack-util.h"
fd63e712 97#include "special.h"
8b43440b 98#include "string-table.h"
07630cea 99#include "string-util.h"
8dd4c05b 100#include "strv.h"
7ccbd1ae 101#include "syslog-util.h"
8dd4c05b
LP
102#include "terminal-util.h"
103#include "unit.h"
b1d4f8e1 104#include "user-util.h"
8dd4c05b
LP
105#include "util.h"
106#include "utmp-wtmp.h"
5cb5a6ff 107
e056b01d 108#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 109#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 110
02a51aba
LP
111/* This assumes there is a 'tty' group */
112#define TTY_MODE 0620
113
531dca78
LP
114#define SNDBUF_SIZE (8*1024*1024)
115
034c6ed7
LP
116static int shift_fds(int fds[], unsigned n_fds) {
117 int start, restart_from;
118
119 if (n_fds <= 0)
120 return 0;
121
a0d40ac5
LP
122 /* Modifies the fds array! (sorts it) */
123
034c6ed7
LP
124 assert(fds);
125
126 start = 0;
127 for (;;) {
128 int i;
129
130 restart_from = -1;
131
132 for (i = start; i < (int) n_fds; i++) {
133 int nfd;
134
135 /* Already at right index? */
136 if (fds[i] == i+3)
137 continue;
138
3cc2aff1
LP
139 nfd = fcntl(fds[i], F_DUPFD, i + 3);
140 if (nfd < 0)
034c6ed7
LP
141 return -errno;
142
03e334a1 143 safe_close(fds[i]);
034c6ed7
LP
144 fds[i] = nfd;
145
146 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 147 * let's remember that and try again from here */
034c6ed7
LP
148 if (nfd != i+3 && restart_from < 0)
149 restart_from = i;
150 }
151
152 if (restart_from < 0)
153 break;
154
155 start = restart_from;
156 }
157
158 return 0;
159}
160
4c47affc
FB
161static int flags_fds(const int fds[], unsigned n_storage_fds, unsigned n_socket_fds, bool nonblock) {
162 unsigned i, n_fds;
e2c76839 163 int r;
47a71eed 164
4c47affc 165 n_fds = n_storage_fds + n_socket_fds;
47a71eed
LP
166 if (n_fds <= 0)
167 return 0;
168
169 assert(fds);
170
9b141911
FB
171 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
172 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
173
174 for (i = 0; i < n_fds; i++) {
47a71eed 175
9b141911
FB
176 if (i < n_socket_fds) {
177 r = fd_nonblock(fds[i], nonblock);
178 if (r < 0)
179 return r;
180 }
47a71eed 181
451a074f
LP
182 /* We unconditionally drop FD_CLOEXEC from the fds,
183 * since after all we want to pass these fds to our
184 * children */
47a71eed 185
3cc2aff1
LP
186 r = fd_cloexec(fds[i], false);
187 if (r < 0)
e2c76839 188 return r;
47a71eed
LP
189 }
190
191 return 0;
192}
193
1e22b5cd 194static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
195 assert(context);
196
1e22b5cd
LP
197 if (context->stdio_as_fds)
198 return NULL;
199
80876c20
LP
200 if (context->tty_path)
201 return context->tty_path;
202
203 return "/dev/console";
204}
205
1e22b5cd
LP
206static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
207 const char *path;
208
6ea832a2
LP
209 assert(context);
210
1e22b5cd 211 path = exec_context_tty_path(context);
6ea832a2 212
1e22b5cd
LP
213 if (context->tty_vhangup) {
214 if (p && p->stdin_fd >= 0)
215 (void) terminal_vhangup_fd(p->stdin_fd);
216 else if (path)
217 (void) terminal_vhangup(path);
218 }
6ea832a2 219
1e22b5cd
LP
220 if (context->tty_reset) {
221 if (p && p->stdin_fd >= 0)
222 (void) reset_terminal_fd(p->stdin_fd, true);
223 else if (path)
224 (void) reset_terminal(path);
225 }
226
227 if (context->tty_vt_disallocate && path)
228 (void) vt_disallocate(path);
6ea832a2
LP
229}
230
6af760f3
LP
231static bool is_terminal_input(ExecInput i) {
232 return IN_SET(i,
233 EXEC_INPUT_TTY,
234 EXEC_INPUT_TTY_FORCE,
235 EXEC_INPUT_TTY_FAIL);
236}
237
3a1286b6 238static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
239 return IN_SET(o,
240 EXEC_OUTPUT_TTY,
241 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
242 EXEC_OUTPUT_KMSG_AND_CONSOLE,
243 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
244}
245
aac8c0c3
LP
246static bool is_syslog_output(ExecOutput o) {
247 return IN_SET(o,
248 EXEC_OUTPUT_SYSLOG,
249 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
250}
251
252static bool is_kmsg_output(ExecOutput o) {
253 return IN_SET(o,
254 EXEC_OUTPUT_KMSG,
255 EXEC_OUTPUT_KMSG_AND_CONSOLE);
256}
257
6af760f3
LP
258static bool exec_context_needs_term(const ExecContext *c) {
259 assert(c);
260
261 /* Return true if the execution context suggests we should set $TERM to something useful. */
262
263 if (is_terminal_input(c->std_input))
264 return true;
265
266 if (is_terminal_output(c->std_output))
267 return true;
268
269 if (is_terminal_output(c->std_error))
270 return true;
271
272 return !!c->tty_path;
3a1286b6
MS
273}
274
80876c20
LP
275static int open_null_as(int flags, int nfd) {
276 int fd, r;
071830ff 277
80876c20 278 assert(nfd >= 0);
071830ff 279
613b411c
LP
280 fd = open("/dev/null", flags|O_NOCTTY);
281 if (fd < 0)
071830ff
LP
282 return -errno;
283
80876c20
LP
284 if (fd != nfd) {
285 r = dup2(fd, nfd) < 0 ? -errno : nfd;
03e334a1 286 safe_close(fd);
80876c20
LP
287 } else
288 r = nfd;
071830ff 289
80876c20 290 return r;
071830ff
LP
291}
292
524daa8c 293static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 294 static const union sockaddr_union sa = {
b92bea5d
ZJS
295 .un.sun_family = AF_UNIX,
296 .un.sun_path = "/run/systemd/journal/stdout",
297 };
524daa8c
ZJS
298 uid_t olduid = UID_INVALID;
299 gid_t oldgid = GID_INVALID;
300 int r;
301
cad93f29 302 if (gid_is_valid(gid)) {
524daa8c
ZJS
303 oldgid = getgid();
304
92a17af9 305 if (setegid(gid) < 0)
524daa8c
ZJS
306 return -errno;
307 }
308
cad93f29 309 if (uid_is_valid(uid)) {
524daa8c
ZJS
310 olduid = getuid();
311
92a17af9 312 if (seteuid(uid) < 0) {
524daa8c
ZJS
313 r = -errno;
314 goto restore_gid;
315 }
316 }
317
92a17af9 318 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
319
320 /* If we fail to restore the uid or gid, things will likely
321 fail later on. This should only happen if an LSM interferes. */
322
cad93f29 323 if (uid_is_valid(uid))
524daa8c
ZJS
324 (void) seteuid(olduid);
325
326 restore_gid:
cad93f29 327 if (gid_is_valid(gid))
524daa8c
ZJS
328 (void) setegid(oldgid);
329
330 return r;
331}
332
fd1f9c89 333static int connect_logger_as(
7a1ab780 334 Unit *unit,
fd1f9c89 335 const ExecContext *context,
af635cf3 336 const ExecParameters *params,
fd1f9c89
LP
337 ExecOutput output,
338 const char *ident,
fd1f9c89
LP
339 int nfd,
340 uid_t uid,
341 gid_t gid) {
342
524daa8c 343 int fd, r;
071830ff
LP
344
345 assert(context);
af635cf3 346 assert(params);
80876c20
LP
347 assert(output < _EXEC_OUTPUT_MAX);
348 assert(ident);
349 assert(nfd >= 0);
071830ff 350
54fe0cdb
LP
351 fd = socket(AF_UNIX, SOCK_STREAM, 0);
352 if (fd < 0)
80876c20 353 return -errno;
071830ff 354
524daa8c
ZJS
355 r = connect_journal_socket(fd, uid, gid);
356 if (r < 0)
357 return r;
071830ff 358
80876c20 359 if (shutdown(fd, SHUT_RD) < 0) {
03e334a1 360 safe_close(fd);
80876c20
LP
361 return -errno;
362 }
071830ff 363
fd1f9c89 364 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 365
80876c20 366 dprintf(fd,
62bca2c6 367 "%s\n"
80876c20
LP
368 "%s\n"
369 "%i\n"
54fe0cdb
LP
370 "%i\n"
371 "%i\n"
372 "%i\n"
4f4a1dbf 373 "%i\n",
c867611e 374 context->syslog_identifier ?: ident,
af635cf3 375 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
376 context->syslog_priority,
377 !!context->syslog_level_prefix,
aac8c0c3
LP
378 is_syslog_output(output),
379 is_kmsg_output(output),
3a1286b6 380 is_terminal_output(output));
80876c20 381
fd1f9c89
LP
382 if (fd == nfd)
383 return nfd;
384
385 r = dup2(fd, nfd) < 0 ? -errno : nfd;
386 safe_close(fd);
071830ff 387
80876c20
LP
388 return r;
389}
390static int open_terminal_as(const char *path, mode_t mode, int nfd) {
391 int fd, r;
071830ff 392
80876c20
LP
393 assert(path);
394 assert(nfd >= 0);
071830ff 395
3cc2aff1
LP
396 fd = open_terminal(path, mode | O_NOCTTY);
397 if (fd < 0)
80876c20 398 return fd;
071830ff 399
80876c20
LP
400 if (fd != nfd) {
401 r = dup2(fd, nfd) < 0 ? -errno : nfd;
03e334a1 402 safe_close(fd);
80876c20
LP
403 } else
404 r = nfd;
071830ff 405
80876c20
LP
406 return r;
407}
071830ff 408
1e3ad081
LP
409static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
410
411 if (is_terminal_input(std_input) && !apply_tty_stdin)
412 return EXEC_INPUT_NULL;
071830ff 413
03fd9c49 414 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
415 return EXEC_INPUT_NULL;
416
03fd9c49 417 return std_input;
4f2d528d
LP
418}
419
03fd9c49 420static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 421
03fd9c49 422 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
423 return EXEC_OUTPUT_INHERIT;
424
03fd9c49 425 return std_output;
4f2d528d
LP
426}
427
a34ceba6
LP
428static int setup_input(
429 const ExecContext *context,
430 const ExecParameters *params,
52c239d7
LB
431 int socket_fd,
432 int named_iofds[3]) {
a34ceba6 433
4f2d528d
LP
434 ExecInput i;
435
436 assert(context);
a34ceba6
LP
437 assert(params);
438
439 if (params->stdin_fd >= 0) {
440 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
441 return -errno;
442
443 /* Try to make this the controlling tty, if it is a tty, and reset it */
444 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
445 (void) reset_terminal_fd(STDIN_FILENO, true);
446
447 return STDIN_FILENO;
448 }
4f2d528d 449
c39f1ce2 450 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
451
452 switch (i) {
071830ff 453
80876c20
LP
454 case EXEC_INPUT_NULL:
455 return open_null_as(O_RDONLY, STDIN_FILENO);
456
457 case EXEC_INPUT_TTY:
458 case EXEC_INPUT_TTY_FORCE:
459 case EXEC_INPUT_TTY_FAIL: {
460 int fd, r;
071830ff 461
1e22b5cd 462 fd = acquire_terminal(exec_context_tty_path(context),
970edce6
ZJS
463 i == EXEC_INPUT_TTY_FAIL,
464 i == EXEC_INPUT_TTY_FORCE,
465 false,
3a43da28 466 USEC_INFINITY);
970edce6 467 if (fd < 0)
80876c20
LP
468 return fd;
469
470 if (fd != STDIN_FILENO) {
471 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
03e334a1 472 safe_close(fd);
80876c20
LP
473 } else
474 r = STDIN_FILENO;
475
476 return r;
477 }
478
4f2d528d
LP
479 case EXEC_INPUT_SOCKET:
480 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
481
52c239d7
LB
482 case EXEC_INPUT_NAMED_FD:
483 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
484 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
485
80876c20
LP
486 default:
487 assert_not_reached("Unknown input type");
488 }
489}
490
a34ceba6
LP
491static int setup_output(
492 Unit *unit,
493 const ExecContext *context,
494 const ExecParameters *params,
495 int fileno,
496 int socket_fd,
52c239d7 497 int named_iofds[3],
a34ceba6 498 const char *ident,
7bce046b
LP
499 uid_t uid,
500 gid_t gid,
501 dev_t *journal_stream_dev,
502 ino_t *journal_stream_ino) {
a34ceba6 503
4f2d528d
LP
504 ExecOutput o;
505 ExecInput i;
47c1d80d 506 int r;
4f2d528d 507
f2341e0a 508 assert(unit);
80876c20 509 assert(context);
a34ceba6 510 assert(params);
80876c20 511 assert(ident);
7bce046b
LP
512 assert(journal_stream_dev);
513 assert(journal_stream_ino);
80876c20 514
a34ceba6
LP
515 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
516
517 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
518 return -errno;
519
520 return STDOUT_FILENO;
521 }
522
523 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
524 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
525 return -errno;
526
527 return STDERR_FILENO;
528 }
529
c39f1ce2 530 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 531 o = fixup_output(context->std_output, socket_fd);
4f2d528d 532
eb17e935
MS
533 if (fileno == STDERR_FILENO) {
534 ExecOutput e;
535 e = fixup_output(context->std_error, socket_fd);
80876c20 536
eb17e935
MS
537 /* This expects the input and output are already set up */
538
539 /* Don't change the stderr file descriptor if we inherit all
540 * the way and are not on a tty */
541 if (e == EXEC_OUTPUT_INHERIT &&
542 o == EXEC_OUTPUT_INHERIT &&
543 i == EXEC_INPUT_NULL &&
544 !is_terminal_input(context->std_input) &&
545 getppid () != 1)
546 return fileno;
547
548 /* Duplicate from stdout if possible */
52c239d7 549 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
eb17e935 550 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 551
eb17e935 552 o = e;
80876c20 553
eb17e935 554 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
555 /* If input got downgraded, inherit the original value */
556 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 557 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 558
acb591e4 559 /* If the input is connected to anything that's not a /dev/null, inherit that... */
ff876e28 560 if (i != EXEC_INPUT_NULL)
eb17e935 561 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 562
acb591e4
LP
563 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
564 if (getppid() != 1)
eb17e935 565 return fileno;
94f04347 566
eb17e935
MS
567 /* We need to open /dev/null here anew, to get the right access mode. */
568 return open_null_as(O_WRONLY, fileno);
071830ff 569 }
94f04347 570
eb17e935 571 switch (o) {
80876c20
LP
572
573 case EXEC_OUTPUT_NULL:
eb17e935 574 return open_null_as(O_WRONLY, fileno);
80876c20
LP
575
576 case EXEC_OUTPUT_TTY:
4f2d528d 577 if (is_terminal_input(i))
eb17e935 578 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
579
580 /* We don't reset the terminal if this is just about output */
1e22b5cd 581 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
582
583 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 584 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 585 case EXEC_OUTPUT_KMSG:
28dbc1e8 586 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
587 case EXEC_OUTPUT_JOURNAL:
588 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 589 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 590 if (r < 0) {
f2341e0a 591 log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 592 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
593 } else {
594 struct stat st;
595
596 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
597 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
598 * services to detect whether they are connected to the journal or not. */
599
600 if (fstat(fileno, &st) >= 0) {
601 *journal_stream_dev = st.st_dev;
602 *journal_stream_ino = st.st_ino;
603 }
47c1d80d
MS
604 }
605 return r;
4f2d528d
LP
606
607 case EXEC_OUTPUT_SOCKET:
608 assert(socket_fd >= 0);
eb17e935 609 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 610
52c239d7
LB
611 case EXEC_OUTPUT_NAMED_FD:
612 (void) fd_nonblock(named_iofds[fileno], false);
613 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
614
94f04347 615 default:
80876c20 616 assert_not_reached("Unknown error type");
94f04347 617 }
071830ff
LP
618}
619
02a51aba
LP
620static int chown_terminal(int fd, uid_t uid) {
621 struct stat st;
622
623 assert(fd >= 0);
02a51aba 624
1ff74fb6
LP
625 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
626 if (isatty(fd) < 1)
627 return 0;
628
02a51aba 629 /* This might fail. What matters are the results. */
bab45044
LP
630 (void) fchown(fd, uid, -1);
631 (void) fchmod(fd, TTY_MODE);
02a51aba
LP
632
633 if (fstat(fd, &st) < 0)
634 return -errno;
635
d8b4e2e9 636 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
02a51aba
LP
637 return -EPERM;
638
639 return 0;
640}
641
7d5ceb64 642static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
643 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
644 int r;
80876c20 645
80876c20
LP
646 assert(_saved_stdin);
647 assert(_saved_stdout);
648
af6da548
LP
649 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
650 if (saved_stdin < 0)
651 return -errno;
80876c20 652
af6da548 653 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
654 if (saved_stdout < 0)
655 return -errno;
80876c20 656
7d5ceb64 657 fd = acquire_terminal(vc, false, false, false, DEFAULT_CONFIRM_USEC);
3d18b167
LP
658 if (fd < 0)
659 return fd;
80876c20 660
af6da548
LP
661 r = chown_terminal(fd, getuid());
662 if (r < 0)
3d18b167 663 return r;
02a51aba 664
3d18b167
LP
665 r = reset_terminal_fd(fd, true);
666 if (r < 0)
667 return r;
80876c20 668
3d18b167
LP
669 if (dup2(fd, STDIN_FILENO) < 0)
670 return -errno;
671
672 if (dup2(fd, STDOUT_FILENO) < 0)
673 return -errno;
80876c20
LP
674
675 if (fd >= 2)
03e334a1 676 safe_close(fd);
3d18b167 677 fd = -1;
80876c20
LP
678
679 *_saved_stdin = saved_stdin;
680 *_saved_stdout = saved_stdout;
681
3d18b167 682 saved_stdin = saved_stdout = -1;
80876c20 683
3d18b167 684 return 0;
80876c20
LP
685}
686
63d77c92 687static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
688 assert(err < 0);
689
690 if (err == -ETIMEDOUT)
63d77c92 691 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
692 else {
693 errno = -err;
63d77c92 694 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
695 }
696}
697
63d77c92 698static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 699 _cleanup_close_ int fd = -1;
80876c20 700
3b20f877 701 assert(vc);
80876c20 702
7d5ceb64 703 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 704 if (fd < 0)
3b20f877 705 return;
80876c20 706
63d77c92 707 write_confirm_error_fd(err, fd, u);
af6da548 708}
80876c20 709
3d18b167 710static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 711 int r = 0;
80876c20 712
af6da548
LP
713 assert(saved_stdin);
714 assert(saved_stdout);
715
716 release_terminal();
717
718 if (*saved_stdin >= 0)
80876c20 719 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 720 r = -errno;
80876c20 721
af6da548 722 if (*saved_stdout >= 0)
80876c20 723 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 724 r = -errno;
80876c20 725
3d18b167
LP
726 *saved_stdin = safe_close(*saved_stdin);
727 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
728
729 return r;
730}
731
3b20f877
FB
732enum {
733 CONFIRM_PRETEND_FAILURE = -1,
734 CONFIRM_PRETEND_SUCCESS = 0,
735 CONFIRM_EXECUTE = 1,
736};
737
eedf223a 738static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 739 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 740 _cleanup_free_ char *e = NULL;
3b20f877 741 char c;
af6da548 742
3b20f877 743 /* For any internal errors, assume a positive response. */
7d5ceb64 744 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 745 if (r < 0) {
63d77c92 746 write_confirm_error(r, vc, u);
3b20f877
FB
747 return CONFIRM_EXECUTE;
748 }
af6da548 749
b0eb2944
FB
750 /* confirm_spawn might have been disabled while we were sleeping. */
751 if (manager_is_confirm_spawn_disabled(u->manager)) {
752 r = 1;
753 goto restore_stdio;
754 }
af6da548 755
2bcd3c26
FB
756 e = ellipsize(cmdline, 60, 100);
757 if (!e) {
758 log_oom();
759 r = CONFIRM_EXECUTE;
760 goto restore_stdio;
761 }
af6da548 762
d172b175 763 for (;;) {
539622bd 764 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 765 if (r < 0) {
63d77c92 766 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
767 r = CONFIRM_EXECUTE;
768 goto restore_stdio;
769 }
af6da548 770
d172b175 771 switch (c) {
b0eb2944
FB
772 case 'c':
773 printf("Resuming normal execution.\n");
774 manager_disable_confirm_spawn();
775 r = 1;
776 break;
dd6f9ac0
FB
777 case 'D':
778 unit_dump(u, stdout, " ");
779 continue; /* ask again */
d172b175
FB
780 case 'f':
781 printf("Failing execution.\n");
782 r = CONFIRM_PRETEND_FAILURE;
783 break;
784 case 'h':
b0eb2944
FB
785 printf(" c - continue, proceed without asking anymore\n"
786 " D - dump, show the state of the unit\n"
dd6f9ac0 787 " f - fail, don't execute the command and pretend it failed\n"
d172b175 788 " h - help\n"
eedf223a 789 " i - info, show a short summary of the unit\n"
56fde33a 790 " j - jobs, show jobs that are in progress\n"
d172b175
FB
791 " s - skip, don't execute the command and pretend it succeeded\n"
792 " y - yes, execute the command\n");
dd6f9ac0 793 continue; /* ask again */
eedf223a
FB
794 case 'i':
795 printf(" Description: %s\n"
796 " Unit: %s\n"
797 " Command: %s\n",
798 u->id, u->description, cmdline);
799 continue; /* ask again */
56fde33a
FB
800 case 'j':
801 manager_dump_jobs(u->manager, stdout, " ");
802 continue; /* ask again */
539622bd
FB
803 case 'n':
804 /* 'n' was removed in favor of 'f'. */
805 printf("Didn't understand 'n', did you mean 'f'?\n");
806 continue; /* ask again */
d172b175
FB
807 case 's':
808 printf("Skipping execution.\n");
809 r = CONFIRM_PRETEND_SUCCESS;
810 break;
811 case 'y':
812 r = CONFIRM_EXECUTE;
813 break;
814 default:
815 assert_not_reached("Unhandled choice");
816 }
3b20f877 817 break;
3b20f877 818 }
af6da548 819
3b20f877 820restore_stdio:
af6da548 821 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 822 return r;
80876c20
LP
823}
824
4d885bd3
DH
825static int get_fixed_user(const ExecContext *c, const char **user,
826 uid_t *uid, gid_t *gid,
827 const char **home, const char **shell) {
81a2b7ce 828 int r;
4d885bd3 829 const char *name;
81a2b7ce 830
4d885bd3 831 assert(c);
81a2b7ce 832
23deef88
LP
833 if (!c->user)
834 return 0;
835
4d885bd3
DH
836 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
837 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 838
23deef88 839 name = c->user;
4d885bd3
DH
840 r = get_user_creds_clean(&name, uid, gid, home, shell);
841 if (r < 0)
842 return r;
81a2b7ce 843
4d885bd3
DH
844 *user = name;
845 return 0;
846}
847
848static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
849 int r;
850 const char *name;
851
852 assert(c);
853
854 if (!c->group)
855 return 0;
856
857 name = c->group;
858 r = get_group_creds(&name, gid);
859 if (r < 0)
860 return r;
861
862 *group = name;
863 return 0;
864}
865
cdc5d5c5
DH
866static int get_supplementary_groups(const ExecContext *c, const char *user,
867 const char *group, gid_t gid,
868 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
869 char **i;
870 int r, k = 0;
871 int ngroups_max;
872 bool keep_groups = false;
873 gid_t *groups = NULL;
874 _cleanup_free_ gid_t *l_gids = NULL;
875
876 assert(c);
877
bbeea271
DH
878 /*
879 * If user is given, then lookup GID and supplementary groups list.
880 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
881 * here and as early as possible so we keep the list of supplementary
882 * groups of the caller.
bbeea271
DH
883 */
884 if (user && gid_is_valid(gid) && gid != 0) {
885 /* First step, initialize groups from /etc/groups */
886 if (initgroups(user, gid) < 0)
887 return -errno;
888
889 keep_groups = true;
890 }
891
4d885bd3
DH
892 if (!c->supplementary_groups)
893 return 0;
894
366ddd25
DH
895 /*
896 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
897 * be positive, otherwise fail.
898 */
899 errno = 0;
900 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
901 if (ngroups_max <= 0) {
902 if (errno > 0)
903 return -errno;
904 else
905 return -EOPNOTSUPP; /* For all other values */
906 }
907
4d885bd3
DH
908 l_gids = new(gid_t, ngroups_max);
909 if (!l_gids)
910 return -ENOMEM;
81a2b7ce 911
4d885bd3
DH
912 if (keep_groups) {
913 /*
914 * Lookup the list of groups that the user belongs to, we
915 * avoid NSS lookups here too for gid=0.
916 */
917 k = ngroups_max;
918 if (getgrouplist(user, gid, l_gids, &k) < 0)
919 return -EINVAL;
920 } else
921 k = 0;
81a2b7ce 922
4d885bd3
DH
923 STRV_FOREACH(i, c->supplementary_groups) {
924 const char *g;
81a2b7ce 925
4d885bd3
DH
926 if (k >= ngroups_max)
927 return -E2BIG;
81a2b7ce 928
4d885bd3
DH
929 g = *i;
930 r = get_group_creds(&g, l_gids+k);
931 if (r < 0)
932 return r;
81a2b7ce 933
4d885bd3
DH
934 k++;
935 }
81a2b7ce 936
4d885bd3
DH
937 /*
938 * Sets ngids to zero to drop all supplementary groups, happens
939 * when we are under root and SupplementaryGroups= is empty.
940 */
941 if (k == 0) {
942 *ngids = 0;
943 return 0;
944 }
81a2b7ce 945
4d885bd3
DH
946 /* Otherwise get the final list of supplementary groups */
947 groups = memdup(l_gids, sizeof(gid_t) * k);
948 if (!groups)
949 return -ENOMEM;
950
951 *supplementary_gids = groups;
952 *ngids = k;
953
954 groups = NULL;
955
956 return 0;
957}
958
959static int enforce_groups(const ExecContext *context, gid_t gid,
960 gid_t *supplementary_gids, int ngids) {
961 int r;
962
963 assert(context);
964
965 /* Handle SupplementaryGroups= even if it is empty */
966 if (context->supplementary_groups) {
967 r = maybe_setgroups(ngids, supplementary_gids);
968 if (r < 0)
97f0e76f 969 return r;
4d885bd3 970 }
81a2b7ce 971
4d885bd3
DH
972 if (gid_is_valid(gid)) {
973 /* Then set our gids */
974 if (setresgid(gid, gid, gid) < 0)
975 return -errno;
81a2b7ce
LP
976 }
977
978 return 0;
979}
980
981static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
982 assert(context);
983
4d885bd3
DH
984 if (!uid_is_valid(uid))
985 return 0;
986
479050b3 987 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
988 * capabilities while doing so. */
989
479050b3 990 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
991
992 /* First step: If we need to keep capabilities but
993 * drop privileges we need to make sure we keep our
cbb21cca 994 * caps, while we drop privileges. */
693ced48 995 if (uid != 0) {
cbb21cca 996 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
997
998 if (prctl(PR_GET_SECUREBITS) != sb)
999 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1000 return -errno;
1001 }
81a2b7ce
LP
1002 }
1003
479050b3 1004 /* Second step: actually set the uids */
81a2b7ce
LP
1005 if (setresuid(uid, uid, uid) < 0)
1006 return -errno;
1007
1008 /* At this point we should have all necessary capabilities but
1009 are otherwise a normal user. However, the caps might got
1010 corrupted due to the setresuid() so we need clean them up
1011 later. This is done outside of this call. */
1012
1013 return 0;
1014}
1015
5b6319dc
LP
1016#ifdef HAVE_PAM
1017
1018static int null_conv(
1019 int num_msg,
1020 const struct pam_message **msg,
1021 struct pam_response **resp,
1022 void *appdata_ptr) {
1023
1024 /* We don't support conversations */
1025
1026 return PAM_CONV_ERR;
1027}
1028
cefc33ae
LP
1029#endif
1030
5b6319dc
LP
1031static int setup_pam(
1032 const char *name,
1033 const char *user,
940c5210 1034 uid_t uid,
2d6fce8d 1035 gid_t gid,
5b6319dc 1036 const char *tty,
2065ca69 1037 char ***env,
5b6319dc
LP
1038 int fds[], unsigned n_fds) {
1039
cefc33ae
LP
1040#ifdef HAVE_PAM
1041
5b6319dc
LP
1042 static const struct pam_conv conv = {
1043 .conv = null_conv,
1044 .appdata_ptr = NULL
1045 };
1046
2d7c6aa2 1047 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1048 pam_handle_t *handle = NULL;
d6e5f3ad 1049 sigset_t old_ss;
7bb70b6e 1050 int pam_code = PAM_SUCCESS, r;
84eada2f 1051 char **nv, **e = NULL;
5b6319dc
LP
1052 bool close_session = false;
1053 pid_t pam_pid = 0, parent_pid;
970edce6 1054 int flags = 0;
5b6319dc
LP
1055
1056 assert(name);
1057 assert(user);
2065ca69 1058 assert(env);
5b6319dc
LP
1059
1060 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1061 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1062 * systemd via the cgroup logic. It will then remove the PAM
1063 * session again. The parent process will exec() the actual
1064 * daemon. We do things this way to ensure that the main PID
1065 * of the daemon is the one we initially fork()ed. */
1066
7bb70b6e
LP
1067 r = barrier_create(&barrier);
1068 if (r < 0)
2d7c6aa2
DH
1069 goto fail;
1070
553d2243 1071 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1072 flags |= PAM_SILENT;
1073
f546241b
ZJS
1074 pam_code = pam_start(name, user, &conv, &handle);
1075 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1076 handle = NULL;
1077 goto fail;
1078 }
1079
f546241b
ZJS
1080 if (tty) {
1081 pam_code = pam_set_item(handle, PAM_TTY, tty);
1082 if (pam_code != PAM_SUCCESS)
5b6319dc 1083 goto fail;
f546241b 1084 }
5b6319dc 1085
84eada2f
JW
1086 STRV_FOREACH(nv, *env) {
1087 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1088 if (pam_code != PAM_SUCCESS)
1089 goto fail;
1090 }
1091
970edce6 1092 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1093 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1094 goto fail;
1095
970edce6 1096 pam_code = pam_open_session(handle, flags);
f546241b 1097 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1098 goto fail;
1099
1100 close_session = true;
1101
f546241b
ZJS
1102 e = pam_getenvlist(handle);
1103 if (!e) {
5b6319dc
LP
1104 pam_code = PAM_BUF_ERR;
1105 goto fail;
1106 }
1107
1108 /* Block SIGTERM, so that we know that it won't get lost in
1109 * the child */
ce30c8dc 1110
72c0a2c2 1111 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1112
df0ff127 1113 parent_pid = getpid_cached();
5b6319dc 1114
f546241b 1115 pam_pid = fork();
7bb70b6e
LP
1116 if (pam_pid < 0) {
1117 r = -errno;
5b6319dc 1118 goto fail;
7bb70b6e 1119 }
5b6319dc
LP
1120
1121 if (pam_pid == 0) {
7bb70b6e 1122 int sig, ret = EXIT_PAM;
5b6319dc
LP
1123
1124 /* The child's job is to reset the PAM session on
1125 * termination */
2d7c6aa2 1126 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc
LP
1127
1128 /* This string must fit in 10 chars (i.e. the length
5d6b1584
LP
1129 * of "/sbin/init"), to look pretty in /bin/ps */
1130 rename_process("(sd-pam)");
5b6319dc
LP
1131
1132 /* Make sure we don't keep open the passed fds in this
1133 child. We assume that otherwise only those fds are
1134 open here that have been opened by PAM. */
1135 close_many(fds, n_fds);
1136
940c5210
AK
1137 /* Drop privileges - we don't need any to pam_close_session
1138 * and this will make PR_SET_PDEATHSIG work in most cases.
1139 * If this fails, ignore the error - but expect sd-pam threads
1140 * to fail to exit normally */
2d6fce8d 1141
97f0e76f
LP
1142 r = maybe_setgroups(0, NULL);
1143 if (r < 0)
1144 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1145 if (setresgid(gid, gid, gid) < 0)
1146 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1147 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1148 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1149
ce30c8dc
LP
1150 (void) ignore_signals(SIGPIPE, -1);
1151
940c5210
AK
1152 /* Wait until our parent died. This will only work if
1153 * the above setresuid() succeeds, otherwise the kernel
1154 * will not allow unprivileged parents kill their privileged
1155 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1156 * to do the rest for us. */
1157 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1158 goto child_finish;
1159
2d7c6aa2
DH
1160 /* Tell the parent that our setup is done. This is especially
1161 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1162 * setup might race against our setresuid(2) call.
1163 *
1164 * If the parent aborted, we'll detect this below, hence ignore
1165 * return failure here. */
1166 (void) barrier_place(&barrier);
2d7c6aa2 1167
643f4706 1168 /* Check if our parent process might already have died? */
5b6319dc 1169 if (getppid() == parent_pid) {
d6e5f3ad
DM
1170 sigset_t ss;
1171
1172 assert_se(sigemptyset(&ss) >= 0);
1173 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1174
3dead8d9
LP
1175 for (;;) {
1176 if (sigwait(&ss, &sig) < 0) {
1177 if (errno == EINTR)
1178 continue;
1179
1180 goto child_finish;
1181 }
5b6319dc 1182
3dead8d9
LP
1183 assert(sig == SIGTERM);
1184 break;
1185 }
5b6319dc
LP
1186 }
1187
3dead8d9 1188 /* If our parent died we'll end the session */
f546241b 1189 if (getppid() != parent_pid) {
970edce6 1190 pam_code = pam_close_session(handle, flags);
f546241b 1191 if (pam_code != PAM_SUCCESS)
5b6319dc 1192 goto child_finish;
f546241b 1193 }
5b6319dc 1194
7bb70b6e 1195 ret = 0;
5b6319dc
LP
1196
1197 child_finish:
970edce6 1198 pam_end(handle, pam_code | flags);
7bb70b6e 1199 _exit(ret);
5b6319dc
LP
1200 }
1201
2d7c6aa2
DH
1202 barrier_set_role(&barrier, BARRIER_PARENT);
1203
5b6319dc
LP
1204 /* If the child was forked off successfully it will do all the
1205 * cleanups, so forget about the handle here. */
1206 handle = NULL;
1207
3b8bddde 1208 /* Unblock SIGTERM again in the parent */
72c0a2c2 1209 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1210
1211 /* We close the log explicitly here, since the PAM modules
1212 * might have opened it, but we don't want this fd around. */
1213 closelog();
1214
2d7c6aa2
DH
1215 /* Synchronously wait for the child to initialize. We don't care for
1216 * errors as we cannot recover. However, warn loudly if it happens. */
1217 if (!barrier_place_and_sync(&barrier))
1218 log_error("PAM initialization failed");
1219
2065ca69
JW
1220 strv_free(*env);
1221 *env = e;
aa87e624 1222
5b6319dc
LP
1223 return 0;
1224
1225fail:
970edce6
ZJS
1226 if (pam_code != PAM_SUCCESS) {
1227 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1228 r = -EPERM; /* PAM errors do not map to errno */
1229 } else
1230 log_error_errno(r, "PAM failed: %m");
9ba35398 1231
5b6319dc
LP
1232 if (handle) {
1233 if (close_session)
970edce6 1234 pam_code = pam_close_session(handle, flags);
5b6319dc 1235
970edce6 1236 pam_end(handle, pam_code | flags);
5b6319dc
LP
1237 }
1238
1239 strv_free(e);
5b6319dc
LP
1240 closelog();
1241
7bb70b6e 1242 return r;
cefc33ae
LP
1243#else
1244 return 0;
5b6319dc 1245#endif
cefc33ae 1246}
5b6319dc 1247
5d6b1584
LP
1248static void rename_process_from_path(const char *path) {
1249 char process_name[11];
1250 const char *p;
1251 size_t l;
1252
1253 /* This resulting string must fit in 10 chars (i.e. the length
1254 * of "/sbin/init") to look pretty in /bin/ps */
1255
2b6bf07d 1256 p = basename(path);
5d6b1584
LP
1257 if (isempty(p)) {
1258 rename_process("(...)");
1259 return;
1260 }
1261
1262 l = strlen(p);
1263 if (l > 8) {
1264 /* The end of the process name is usually more
1265 * interesting, since the first bit might just be
1266 * "systemd-" */
1267 p = p + l - 8;
1268 l = 8;
1269 }
1270
1271 process_name[0] = '(';
1272 memcpy(process_name+1, p, l);
1273 process_name[1+l] = ')';
1274 process_name[1+l+1] = 0;
1275
1276 rename_process(process_name);
1277}
1278
469830d1
LP
1279static bool context_has_address_families(const ExecContext *c) {
1280 assert(c);
1281
1282 return c->address_families_whitelist ||
1283 !set_isempty(c->address_families);
1284}
1285
1286static bool context_has_syscall_filters(const ExecContext *c) {
1287 assert(c);
1288
1289 return c->syscall_whitelist ||
1290 !set_isempty(c->syscall_filter);
1291}
1292
1293static bool context_has_no_new_privileges(const ExecContext *c) {
1294 assert(c);
1295
1296 if (c->no_new_privileges)
1297 return true;
1298
1299 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1300 return false;
1301
1302 /* We need NNP if we have any form of seccomp and are unprivileged */
1303 return context_has_address_families(c) ||
1304 c->memory_deny_write_execute ||
1305 c->restrict_realtime ||
1306 exec_context_restrict_namespaces_set(c) ||
1307 c->protect_kernel_tunables ||
1308 c->protect_kernel_modules ||
1309 c->private_devices ||
1310 context_has_syscall_filters(c) ||
78e864e5
TM
1311 !set_isempty(c->syscall_archs) ||
1312 c->lock_personality;
469830d1
LP
1313}
1314
c0467cf3 1315#ifdef HAVE_SECCOMP
17df7223 1316
83f12b27 1317static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1318
1319 if (is_seccomp_available())
1320 return false;
1321
1322 log_open();
1323 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
1324 log_close();
1325 return true;
83f12b27
FS
1326}
1327
165a31c0 1328static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1329 uint32_t negative_action, default_action, action;
165a31c0 1330 int r;
8351ceae 1331
469830d1 1332 assert(u);
c0467cf3 1333 assert(c);
8351ceae 1334
469830d1 1335 if (!context_has_syscall_filters(c))
83f12b27
FS
1336 return 0;
1337
469830d1
LP
1338 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1339 return 0;
e9642be2 1340
469830d1 1341 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1342
469830d1
LP
1343 if (c->syscall_whitelist) {
1344 default_action = negative_action;
1345 action = SCMP_ACT_ALLOW;
7c66bae2 1346 } else {
469830d1
LP
1347 default_action = SCMP_ACT_ALLOW;
1348 action = negative_action;
57183d11 1349 }
8351ceae 1350
165a31c0
LP
1351 if (needs_ambient_hack) {
1352 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1353 if (r < 0)
1354 return r;
1355 }
1356
469830d1 1357 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
4298d0b5
LP
1358}
1359
469830d1
LP
1360static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1361 assert(u);
4298d0b5
LP
1362 assert(c);
1363
469830d1 1364 if (set_isempty(c->syscall_archs))
83f12b27
FS
1365 return 0;
1366
469830d1
LP
1367 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1368 return 0;
4298d0b5 1369
469830d1
LP
1370 return seccomp_restrict_archs(c->syscall_archs);
1371}
4298d0b5 1372
469830d1
LP
1373static int apply_address_families(const Unit* u, const ExecContext *c) {
1374 assert(u);
1375 assert(c);
4298d0b5 1376
469830d1
LP
1377 if (!context_has_address_families(c))
1378 return 0;
4298d0b5 1379
469830d1
LP
1380 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1381 return 0;
4298d0b5 1382
469830d1 1383 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1384}
4298d0b5 1385
83f12b27 1386static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1387 assert(u);
f3e43635
TM
1388 assert(c);
1389
469830d1 1390 if (!c->memory_deny_write_execute)
83f12b27
FS
1391 return 0;
1392
469830d1
LP
1393 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1394 return 0;
f3e43635 1395
469830d1 1396 return seccomp_memory_deny_write_execute();
f3e43635
TM
1397}
1398
83f12b27 1399static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1400 assert(u);
f4170c67
LP
1401 assert(c);
1402
469830d1 1403 if (!c->restrict_realtime)
83f12b27
FS
1404 return 0;
1405
469830d1
LP
1406 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1407 return 0;
f4170c67 1408
469830d1 1409 return seccomp_restrict_realtime();
f4170c67
LP
1410}
1411
59e856c7 1412static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1413 assert(u);
59eeb84b
LP
1414 assert(c);
1415
1416 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1417 * let's protect even those systems where this is left on in the kernel. */
1418
469830d1 1419 if (!c->protect_kernel_tunables)
59eeb84b
LP
1420 return 0;
1421
469830d1
LP
1422 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1423 return 0;
59eeb84b 1424
469830d1 1425 return seccomp_protect_sysctl();
59eeb84b
LP
1426}
1427
59e856c7 1428static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1429 assert(u);
502d704e
DH
1430 assert(c);
1431
25a8d8a0 1432 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1433
469830d1
LP
1434 if (!c->protect_kernel_modules)
1435 return 0;
1436
502d704e
DH
1437 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1438 return 0;
1439
469830d1 1440 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
502d704e
DH
1441}
1442
59e856c7 1443static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1444 assert(u);
ba128bb8
LP
1445 assert(c);
1446
8f81a5f6 1447 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1448
469830d1
LP
1449 if (!c->private_devices)
1450 return 0;
1451
ba128bb8
LP
1452 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1453 return 0;
1454
469830d1 1455 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
ba128bb8
LP
1456}
1457
add00535 1458static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
469830d1 1459 assert(u);
add00535
LP
1460 assert(c);
1461
1462 if (!exec_context_restrict_namespaces_set(c))
1463 return 0;
1464
1465 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1466 return 0;
1467
1468 return seccomp_restrict_namespaces(c->restrict_namespaces);
1469}
1470
78e864e5 1471static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1472 unsigned long personality;
1473 int r;
78e864e5
TM
1474
1475 assert(u);
1476 assert(c);
1477
1478 if (!c->lock_personality)
1479 return 0;
1480
1481 if (skip_seccomp_unavailable(u, "LockPersonality="))
1482 return 0;
1483
e8132d63
LP
1484 personality = c->personality;
1485
1486 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1487 if (personality == PERSONALITY_INVALID) {
1488
1489 r = opinionated_personality(&personality);
1490 if (r < 0)
1491 return r;
1492 }
78e864e5
TM
1493
1494 return seccomp_lock_personality(personality);
1495}
1496
c0467cf3 1497#endif
8351ceae 1498
31a7eb86
ZJS
1499static void do_idle_pipe_dance(int idle_pipe[4]) {
1500 assert(idle_pipe);
1501
54eb2300
LP
1502 idle_pipe[1] = safe_close(idle_pipe[1]);
1503 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1504
1505 if (idle_pipe[0] >= 0) {
1506 int r;
1507
1508 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1509
1510 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1511 ssize_t n;
1512
31a7eb86 1513 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1514 n = write(idle_pipe[3], "x", 1);
1515 if (n > 0)
cd972d69
ZJS
1516 /* Wait for systemd to react to the signal above. */
1517 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1518 }
1519
54eb2300 1520 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1521
1522 }
1523
54eb2300 1524 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1525}
1526
7cae38c4 1527static int build_environment(
fd63e712 1528 Unit *u,
9fa95f85 1529 const ExecContext *c,
1e22b5cd 1530 const ExecParameters *p,
7cae38c4
LP
1531 unsigned n_fds,
1532 const char *home,
1533 const char *username,
1534 const char *shell,
7bce046b
LP
1535 dev_t journal_stream_dev,
1536 ino_t journal_stream_ino,
7cae38c4
LP
1537 char ***ret) {
1538
1539 _cleanup_strv_free_ char **our_env = NULL;
1540 unsigned n_env = 0;
1541 char *x;
1542
4b58153d 1543 assert(u);
7cae38c4
LP
1544 assert(c);
1545 assert(ret);
1546
4b58153d 1547 our_env = new0(char*, 14);
7cae38c4
LP
1548 if (!our_env)
1549 return -ENOMEM;
1550
1551 if (n_fds > 0) {
8dd4c05b
LP
1552 _cleanup_free_ char *joined = NULL;
1553
df0ff127 1554 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1555 return -ENOMEM;
1556 our_env[n_env++] = x;
1557
1558 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1559 return -ENOMEM;
1560 our_env[n_env++] = x;
8dd4c05b 1561
1e22b5cd 1562 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1563 if (!joined)
1564 return -ENOMEM;
1565
605405c6 1566 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1567 if (!x)
1568 return -ENOMEM;
1569 our_env[n_env++] = x;
7cae38c4
LP
1570 }
1571
b08af3b1 1572 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1573 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1574 return -ENOMEM;
1575 our_env[n_env++] = x;
1576
1e22b5cd 1577 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1578 return -ENOMEM;
1579 our_env[n_env++] = x;
1580 }
1581
fd63e712
LP
1582 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1583 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1584 * check the database directly. */
ac647978 1585 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1586 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1587 if (!x)
1588 return -ENOMEM;
1589 our_env[n_env++] = x;
1590 }
1591
7cae38c4
LP
1592 if (home) {
1593 x = strappend("HOME=", home);
1594 if (!x)
1595 return -ENOMEM;
1596 our_env[n_env++] = x;
1597 }
1598
1599 if (username) {
1600 x = strappend("LOGNAME=", username);
1601 if (!x)
1602 return -ENOMEM;
1603 our_env[n_env++] = x;
1604
1605 x = strappend("USER=", username);
1606 if (!x)
1607 return -ENOMEM;
1608 our_env[n_env++] = x;
1609 }
1610
1611 if (shell) {
1612 x = strappend("SHELL=", shell);
1613 if (!x)
1614 return -ENOMEM;
1615 our_env[n_env++] = x;
1616 }
1617
4b58153d
LP
1618 if (!sd_id128_is_null(u->invocation_id)) {
1619 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1620 return -ENOMEM;
1621
1622 our_env[n_env++] = x;
1623 }
1624
6af760f3
LP
1625 if (exec_context_needs_term(c)) {
1626 const char *tty_path, *term = NULL;
1627
1628 tty_path = exec_context_tty_path(c);
1629
1630 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1631 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1632 * passes to PID 1 ends up all the way in the console login shown. */
1633
1634 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1635 term = getenv("TERM");
1636 if (!term)
1637 term = default_term_for_tty(tty_path);
7cae38c4 1638
6af760f3 1639 x = strappend("TERM=", term);
7cae38c4
LP
1640 if (!x)
1641 return -ENOMEM;
1642 our_env[n_env++] = x;
1643 }
1644
7bce046b
LP
1645 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1646 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1647 return -ENOMEM;
1648
1649 our_env[n_env++] = x;
1650 }
1651
7cae38c4 1652 our_env[n_env++] = NULL;
7bce046b 1653 assert(n_env <= 12);
7cae38c4
LP
1654
1655 *ret = our_env;
1656 our_env = NULL;
1657
1658 return 0;
1659}
1660
b4c14404
FB
1661static int build_pass_environment(const ExecContext *c, char ***ret) {
1662 _cleanup_strv_free_ char **pass_env = NULL;
1663 size_t n_env = 0, n_bufsize = 0;
1664 char **i;
1665
1666 STRV_FOREACH(i, c->pass_environment) {
1667 _cleanup_free_ char *x = NULL;
1668 char *v;
1669
1670 v = getenv(*i);
1671 if (!v)
1672 continue;
605405c6 1673 x = strjoin(*i, "=", v);
b4c14404
FB
1674 if (!x)
1675 return -ENOMEM;
1676 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1677 return -ENOMEM;
1678 pass_env[n_env++] = x;
1679 pass_env[n_env] = NULL;
1680 x = NULL;
1681 }
1682
1683 *ret = pass_env;
1684 pass_env = NULL;
1685
1686 return 0;
1687}
1688
8b44a3d2
LP
1689static bool exec_needs_mount_namespace(
1690 const ExecContext *context,
1691 const ExecParameters *params,
1692 ExecRuntime *runtime) {
1693
1694 assert(context);
1695 assert(params);
1696
915e6d16
LP
1697 if (context->root_image)
1698 return true;
1699
2a624c36
AP
1700 if (!strv_isempty(context->read_write_paths) ||
1701 !strv_isempty(context->read_only_paths) ||
1702 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1703 return true;
1704
d2d6c096
LP
1705 if (context->n_bind_mounts > 0)
1706 return true;
1707
8b44a3d2
LP
1708 if (context->mount_flags != 0)
1709 return true;
1710
1711 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1712 return true;
1713
8b44a3d2
LP
1714 if (context->private_devices ||
1715 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1716 context->protect_home != PROTECT_HOME_NO ||
1717 context->protect_kernel_tunables ||
c575770b 1718 context->protect_kernel_modules ||
59eeb84b 1719 context->protect_control_groups)
8b44a3d2
LP
1720 return true;
1721
9c988f93 1722 if (context->mount_apivfs && (context->root_image || context->root_directory))
5d997827
LP
1723 return true;
1724
8b44a3d2
LP
1725 return false;
1726}
1727
d251207d
LP
1728static int setup_private_users(uid_t uid, gid_t gid) {
1729 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1730 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1731 _cleanup_close_ int unshare_ready_fd = -1;
1732 _cleanup_(sigkill_waitp) pid_t pid = 0;
1733 uint64_t c = 1;
1734 siginfo_t si;
1735 ssize_t n;
1736 int r;
1737
1738 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1739 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1740 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1741 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1742 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1743 * continues execution normally. */
1744
587ab01b
ZJS
1745 if (uid != 0 && uid_is_valid(uid)) {
1746 r = asprintf(&uid_map,
1747 "0 0 1\n" /* Map root → root */
1748 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1749 uid, uid);
1750 if (r < 0)
1751 return -ENOMEM;
1752 } else {
e0f3720e 1753 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1754 if (!uid_map)
1755 return -ENOMEM;
1756 }
d251207d 1757
587ab01b
ZJS
1758 if (gid != 0 && gid_is_valid(gid)) {
1759 r = asprintf(&gid_map,
1760 "0 0 1\n" /* Map root → root */
1761 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1762 gid, gid);
1763 if (r < 0)
1764 return -ENOMEM;
1765 } else {
d251207d 1766 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1767 if (!gid_map)
1768 return -ENOMEM;
1769 }
d251207d
LP
1770
1771 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1772 * namespace. */
1773 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1774 if (unshare_ready_fd < 0)
1775 return -errno;
1776
1777 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1778 * failed. */
1779 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1780 return -errno;
1781
1782 pid = fork();
1783 if (pid < 0)
1784 return -errno;
1785
1786 if (pid == 0) {
1787 _cleanup_close_ int fd = -1;
1788 const char *a;
1789 pid_t ppid;
1790
1791 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1792 * here, after the parent opened its own user namespace. */
1793
1794 ppid = getppid();
1795 errno_pipe[0] = safe_close(errno_pipe[0]);
1796
1797 /* Wait until the parent unshared the user namespace */
1798 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1799 r = -errno;
1800 goto child_fail;
1801 }
1802
1803 /* Disable the setgroups() system call in the child user namespace, for good. */
1804 a = procfs_file_alloca(ppid, "setgroups");
1805 fd = open(a, O_WRONLY|O_CLOEXEC);
1806 if (fd < 0) {
1807 if (errno != ENOENT) {
1808 r = -errno;
1809 goto child_fail;
1810 }
1811
1812 /* If the file is missing the kernel is too old, let's continue anyway. */
1813 } else {
1814 if (write(fd, "deny\n", 5) < 0) {
1815 r = -errno;
1816 goto child_fail;
1817 }
1818
1819 fd = safe_close(fd);
1820 }
1821
1822 /* First write the GID map */
1823 a = procfs_file_alloca(ppid, "gid_map");
1824 fd = open(a, O_WRONLY|O_CLOEXEC);
1825 if (fd < 0) {
1826 r = -errno;
1827 goto child_fail;
1828 }
1829 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1830 r = -errno;
1831 goto child_fail;
1832 }
1833 fd = safe_close(fd);
1834
1835 /* The write the UID map */
1836 a = procfs_file_alloca(ppid, "uid_map");
1837 fd = open(a, O_WRONLY|O_CLOEXEC);
1838 if (fd < 0) {
1839 r = -errno;
1840 goto child_fail;
1841 }
1842 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1843 r = -errno;
1844 goto child_fail;
1845 }
1846
1847 _exit(EXIT_SUCCESS);
1848
1849 child_fail:
1850 (void) write(errno_pipe[1], &r, sizeof(r));
1851 _exit(EXIT_FAILURE);
1852 }
1853
1854 errno_pipe[1] = safe_close(errno_pipe[1]);
1855
1856 if (unshare(CLONE_NEWUSER) < 0)
1857 return -errno;
1858
1859 /* Let the child know that the namespace is ready now */
1860 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1861 return -errno;
1862
1863 /* Try to read an error code from the child */
1864 n = read(errno_pipe[0], &r, sizeof(r));
1865 if (n < 0)
1866 return -errno;
1867 if (n == sizeof(r)) { /* an error code was sent to us */
1868 if (r < 0)
1869 return r;
1870 return -EIO;
1871 }
1872 if (n != 0) /* on success we should have read 0 bytes */
1873 return -EIO;
1874
1875 r = wait_for_terminate(pid, &si);
1876 if (r < 0)
1877 return r;
1878 pid = 0;
1879
1880 /* If something strange happened with the child, let's consider this fatal, too */
1881 if (si.si_code != CLD_EXITED || si.si_status != 0)
1882 return -EIO;
1883
1884 return 0;
1885}
1886
3536f49e 1887static int setup_exec_directory(
07689d5d
LP
1888 const ExecContext *context,
1889 const ExecParameters *params,
1890 uid_t uid,
3536f49e 1891 gid_t gid,
3536f49e
YW
1892 ExecDirectoryType type,
1893 int *exit_status) {
07689d5d 1894
3536f49e
YW
1895 static const int exit_status_table[_EXEC_DIRECTORY_MAX] = {
1896 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
1897 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
1898 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
1899 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
1900 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
1901 };
07689d5d
LP
1902 char **rt;
1903 int r;
1904
1905 assert(context);
1906 assert(params);
3536f49e
YW
1907 assert(type >= 0 && type < _EXEC_DIRECTORY_MAX);
1908 assert(exit_status);
07689d5d 1909
3536f49e
YW
1910 if (!params->prefix[type])
1911 return 0;
1912
8679efde 1913 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
1914 if (!uid_is_valid(uid))
1915 uid = 0;
1916 if (!gid_is_valid(gid))
1917 gid = 0;
1918 }
1919
1920 STRV_FOREACH(rt, context->directories[type].paths) {
07689d5d
LP
1921 _cleanup_free_ char *p;
1922
3536f49e
YW
1923 p = strjoin(params->prefix[type], "/", *rt);
1924 if (!p) {
1925 r = -ENOMEM;
1926 goto fail;
1927 }
07689d5d 1928
23a7448e
YW
1929 r = mkdir_parents_label(p, 0755);
1930 if (r < 0)
3536f49e 1931 goto fail;
23a7448e 1932
3536f49e 1933 r = mkdir_p_label(p, context->directories[type].mode);
07689d5d 1934 if (r < 0)
3536f49e 1935 goto fail;
07689d5d 1936
c71b2eb7
LP
1937 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
1938 * a service, and shall not be writable. */
1939 if (type == EXEC_DIRECTORY_CONFIGURATION)
1940 continue;
1941
3536f49e 1942 r = chmod_and_chown(p, context->directories[type].mode, uid, gid);
07689d5d 1943 if (r < 0)
3536f49e 1944 goto fail;
07689d5d
LP
1945 }
1946
1947 return 0;
3536f49e
YW
1948
1949fail:
1950 *exit_status = exit_status_table[type];
1951
1952 return r;
07689d5d
LP
1953}
1954
cefc33ae
LP
1955static int setup_smack(
1956 const ExecContext *context,
1957 const ExecCommand *command) {
1958
cefc33ae
LP
1959 int r;
1960
1961 assert(context);
1962 assert(command);
1963
cefc33ae
LP
1964 if (context->smack_process_label) {
1965 r = mac_smack_apply_pid(0, context->smack_process_label);
1966 if (r < 0)
1967 return r;
1968 }
1969#ifdef SMACK_DEFAULT_PROCESS_LABEL
1970 else {
1971 _cleanup_free_ char *exec_label = NULL;
1972
1973 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
1974 if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP)
1975 return r;
1976
1977 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
1978 if (r < 0)
1979 return r;
1980 }
cefc33ae
LP
1981#endif
1982
1983 return 0;
1984}
1985
3fbe8dbe
LP
1986static int compile_read_write_paths(
1987 const ExecContext *context,
1988 const ExecParameters *params,
1989 char ***ret) {
1990
1991 _cleanup_strv_free_ char **l = NULL;
1992 char **rt;
3536f49e 1993 ExecDirectoryType i;
3fbe8dbe 1994
06ec51d8
ZJS
1995 /* Compile the list of writable paths. This is the combination of
1996 * the explicitly configured paths, plus all runtime directories. */
3fbe8dbe 1997
3536f49e
YW
1998 if (strv_isempty(context->read_write_paths)) {
1999 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
2000 if (!strv_isempty(context->directories[i].paths))
2001 break;
2002
2003 if (i == _EXEC_DIRECTORY_MAX) {
2004 *ret = NULL; /* NOP if neither is set */
2005 return 0;
2006 }
3fbe8dbe
LP
2007 }
2008
2009 l = strv_copy(context->read_write_paths);
2010 if (!l)
2011 return -ENOMEM;
2012
3536f49e
YW
2013 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++) {
2014 if (!params->prefix[i])
2015 continue;
3fbe8dbe 2016
3536f49e
YW
2017 STRV_FOREACH(rt, context->directories[i].paths) {
2018 char *s;
3fbe8dbe 2019
3536f49e
YW
2020 s = strjoin(params->prefix[i], "/", *rt);
2021 if (!s)
2022 return -ENOMEM;
2023
2024 if (strv_consume(&l, s) < 0)
2025 return -ENOMEM;
2026 }
3fbe8dbe
LP
2027 }
2028
2029 *ret = l;
2030 l = NULL;
2031
2032 return 0;
2033}
2034
6818c54c
LP
2035static int apply_mount_namespace(
2036 Unit *u,
2037 ExecCommand *command,
2038 const ExecContext *context,
2039 const ExecParameters *params,
2040 ExecRuntime *runtime) {
2041
06ec51d8 2042 _cleanup_strv_free_ char **rw = NULL;
93c6bb51 2043 char *tmp = NULL, *var = NULL;
915e6d16 2044 const char *root_dir = NULL, *root_image = NULL;
93c6bb51 2045 NameSpaceInfo ns_info = {
af964954 2046 .ignore_protect_paths = false,
93c6bb51
DH
2047 .private_dev = context->private_devices,
2048 .protect_control_groups = context->protect_control_groups,
2049 .protect_kernel_tunables = context->protect_kernel_tunables,
2050 .protect_kernel_modules = context->protect_kernel_modules,
5d997827 2051 .mount_apivfs = context->mount_apivfs,
93c6bb51 2052 };
165a31c0 2053 bool needs_sandboxing;
6818c54c 2054 int r;
93c6bb51 2055
2b3c1b9e
DH
2056 assert(context);
2057
93c6bb51
DH
2058 /* The runtime struct only contains the parent of the private /tmp,
2059 * which is non-accessible to world users. Inside of it there's a /tmp
2060 * that is sticky, and that's the one we want to use here. */
2061
2062 if (context->private_tmp && runtime) {
2063 if (runtime->tmp_dir)
2064 tmp = strjoina(runtime->tmp_dir, "/tmp");
2065 if (runtime->var_tmp_dir)
2066 var = strjoina(runtime->var_tmp_dir, "/tmp");
2067 }
2068
2069 r = compile_read_write_paths(context, params, &rw);
2070 if (r < 0)
2071 return r;
2072
915e6d16
LP
2073 if (params->flags & EXEC_APPLY_CHROOT) {
2074 root_image = context->root_image;
2075
2076 if (!root_image)
2077 root_dir = context->root_directory;
2078 }
93c6bb51 2079
af964954
DH
2080 /*
2081 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2082 * sandbox info, otherwise enforce it, don't ignore protected paths and
2083 * fail if we are enable to apply the sandbox inside the mount namespace.
2084 */
2085 if (!context->dynamic_user && root_dir)
2086 ns_info.ignore_protect_paths = true;
2087
165a31c0 2088 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
6818c54c 2089
915e6d16
LP
2090 r = setup_namespace(root_dir, root_image,
2091 &ns_info, rw,
165a31c0
LP
2092 needs_sandboxing ? context->read_only_paths : NULL,
2093 needs_sandboxing ? context->inaccessible_paths : NULL,
d2d6c096
LP
2094 context->bind_mounts,
2095 context->n_bind_mounts,
93c6bb51
DH
2096 tmp,
2097 var,
165a31c0
LP
2098 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2099 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16
LP
2100 context->mount_flags,
2101 DISSECT_IMAGE_DISCARD_ON_LOOP);
93c6bb51
DH
2102
2103 /* If we couldn't set up the namespace this is probably due to a
2104 * missing capability. In this case, silently proceeed. */
2105 if (IN_SET(r, -EPERM, -EACCES)) {
2106 log_open();
2107 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
2108 log_close();
2109 r = 0;
2110 }
2111
2112 return r;
2113}
2114
915e6d16
LP
2115static int apply_working_directory(
2116 const ExecContext *context,
2117 const ExecParameters *params,
2118 const char *home,
376fecf6
LP
2119 const bool needs_mount_ns,
2120 int *exit_status) {
915e6d16 2121
6732edab 2122 const char *d, *wd;
2b3c1b9e
DH
2123
2124 assert(context);
376fecf6 2125 assert(exit_status);
2b3c1b9e 2126
6732edab
LP
2127 if (context->working_directory_home) {
2128
376fecf6
LP
2129 if (!home) {
2130 *exit_status = EXIT_CHDIR;
6732edab 2131 return -ENXIO;
376fecf6 2132 }
6732edab 2133
2b3c1b9e 2134 wd = home;
6732edab
LP
2135
2136 } else if (context->working_directory)
2b3c1b9e
DH
2137 wd = context->working_directory;
2138 else
2139 wd = "/";
e7f1e7c6
DH
2140
2141 if (params->flags & EXEC_APPLY_CHROOT) {
2142 if (!needs_mount_ns && context->root_directory)
376fecf6
LP
2143 if (chroot(context->root_directory) < 0) {
2144 *exit_status = EXIT_CHROOT;
e7f1e7c6 2145 return -errno;
376fecf6 2146 }
e7f1e7c6 2147
2b3c1b9e
DH
2148 d = wd;
2149 } else
3b0e5bb5 2150 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2151
376fecf6
LP
2152 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2153 *exit_status = EXIT_CHDIR;
2b3c1b9e 2154 return -errno;
376fecf6 2155 }
e7f1e7c6
DH
2156
2157 return 0;
2158}
2159
74dd6b51
LP
2160static int setup_keyring(Unit *u, const ExecParameters *p, uid_t uid, gid_t gid) {
2161 key_serial_t keyring;
2162
2163 assert(u);
2164 assert(p);
2165
2166 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2167 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2168 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2169 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2170 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2171 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2172
2173 if (!(p->flags & EXEC_NEW_KEYRING))
2174 return 0;
2175
2176 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2177 if (keyring == -1) {
2178 if (errno == ENOSYS)
2179 log_debug_errno(errno, "Kernel keyring not supported, ignoring.");
2180 else if (IN_SET(errno, EACCES, EPERM))
2181 log_debug_errno(errno, "Kernel keyring access prohibited, ignoring.");
2182 else if (errno == EDQUOT)
2183 log_debug_errno(errno, "Out of kernel keyrings to allocate, ignoring.");
2184 else
2185 return log_error_errno(errno, "Setting up kernel keyring failed: %m");
2186
2187 return 0;
2188 }
2189
b3415f5d
LP
2190 /* Populate they keyring with the invocation ID by default. */
2191 if (!sd_id128_is_null(u->invocation_id)) {
2192 key_serial_t key;
2193
2194 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2195 if (key == -1)
2196 log_debug_errno(errno, "Failed to add invocation ID to keyring, ignoring: %m");
2197 else {
2198 if (keyctl(KEYCTL_SETPERM, key,
2199 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2200 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
2201 return log_error_errno(errno, "Failed to restrict invocation ID permission: %m");
2202 }
2203 }
2204
74dd6b51
LP
2205 /* And now, make the keyring owned by the service's user */
2206 if (uid_is_valid(uid) || gid_is_valid(gid))
2207 if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0)
2208 return log_error_errno(errno, "Failed to change ownership of session keyring: %m");
2209
2210 return 0;
2211}
2212
29206d46
LP
2213static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
2214 assert(array);
2215 assert(n);
2216
2217 if (!pair)
2218 return;
2219
2220 if (pair[0] >= 0)
2221 array[(*n)++] = pair[0];
2222 if (pair[1] >= 0)
2223 array[(*n)++] = pair[1];
2224}
2225
a34ceba6
LP
2226static int close_remaining_fds(
2227 const ExecParameters *params,
2228 ExecRuntime *runtime,
29206d46 2229 DynamicCreds *dcreds,
00d9ef85 2230 int user_lookup_fd,
a34ceba6
LP
2231 int socket_fd,
2232 int *fds, unsigned n_fds) {
2233
2234 unsigned n_dont_close = 0;
00d9ef85 2235 int dont_close[n_fds + 12];
a34ceba6
LP
2236
2237 assert(params);
2238
2239 if (params->stdin_fd >= 0)
2240 dont_close[n_dont_close++] = params->stdin_fd;
2241 if (params->stdout_fd >= 0)
2242 dont_close[n_dont_close++] = params->stdout_fd;
2243 if (params->stderr_fd >= 0)
2244 dont_close[n_dont_close++] = params->stderr_fd;
2245
2246 if (socket_fd >= 0)
2247 dont_close[n_dont_close++] = socket_fd;
2248 if (n_fds > 0) {
2249 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2250 n_dont_close += n_fds;
2251 }
2252
29206d46
LP
2253 if (runtime)
2254 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2255
2256 if (dcreds) {
2257 if (dcreds->user)
2258 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2259 if (dcreds->group)
2260 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2261 }
2262
00d9ef85
LP
2263 if (user_lookup_fd >= 0)
2264 dont_close[n_dont_close++] = user_lookup_fd;
2265
a34ceba6
LP
2266 return close_all_fds(dont_close, n_dont_close);
2267}
2268
00d9ef85
LP
2269static int send_user_lookup(
2270 Unit *unit,
2271 int user_lookup_fd,
2272 uid_t uid,
2273 gid_t gid) {
2274
2275 assert(unit);
2276
2277 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2278 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2279 * specified. */
2280
2281 if (user_lookup_fd < 0)
2282 return 0;
2283
2284 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2285 return 0;
2286
2287 if (writev(user_lookup_fd,
2288 (struct iovec[]) {
2289 { .iov_base = &uid, .iov_len = sizeof(uid) },
2290 { .iov_base = &gid, .iov_len = sizeof(gid) },
2291 { .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0)
2292 return -errno;
2293
2294 return 0;
2295}
2296
6732edab
LP
2297static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2298 int r;
2299
2300 assert(c);
2301 assert(home);
2302 assert(buf);
2303
2304 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2305
2306 if (*home)
2307 return 0;
2308
2309 if (!c->working_directory_home)
2310 return 0;
2311
2312 if (uid == 0) {
2313 /* Hardcode /root as home directory for UID 0 */
2314 *home = "/root";
2315 return 1;
2316 }
2317
2318 r = get_home_dir(buf);
2319 if (r < 0)
2320 return r;
2321
2322 *home = *buf;
2323 return 1;
2324}
2325
ff0af2a1 2326static int exec_child(
f2341e0a 2327 Unit *unit,
ff0af2a1
LP
2328 ExecCommand *command,
2329 const ExecContext *context,
2330 const ExecParameters *params,
2331 ExecRuntime *runtime,
29206d46 2332 DynamicCreds *dcreds,
ff0af2a1
LP
2333 char **argv,
2334 int socket_fd,
52c239d7 2335 int named_iofds[3],
4c47affc
FB
2336 int *fds,
2337 unsigned n_storage_fds,
9b141911 2338 unsigned n_socket_fds,
ff0af2a1 2339 char **files_env,
00d9ef85 2340 int user_lookup_fd,
70dd455c
ZJS
2341 int *exit_status,
2342 char **error_message) {
d35fbf6b 2343
2065ca69 2344 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
6732edab 2345 _cleanup_free_ char *mac_selinux_context_net = NULL, *home_buffer = NULL;
4d885bd3
DH
2346 _cleanup_free_ gid_t *supplementary_gids = NULL;
2347 const char *username = NULL, *groupname = NULL;
2b3c1b9e 2348 const char *home = NULL, *shell = NULL;
7bce046b
LP
2349 dev_t journal_stream_dev = 0;
2350 ino_t journal_stream_ino = 0;
165a31c0
LP
2351 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2352 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2353 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2354 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
ecfbc84f 2355#ifdef HAVE_SELINUX
43b1f709 2356 bool use_selinux = false;
ecfbc84f
YW
2357#endif
2358#ifdef HAVE_SMACK
43b1f709 2359 bool use_smack = false;
ecfbc84f
YW
2360#endif
2361#ifdef HAVE_APPARMOR
43b1f709 2362 bool use_apparmor = false;
ecfbc84f 2363#endif
fed1e721
LP
2364 uid_t uid = UID_INVALID;
2365 gid_t gid = GID_INVALID;
4d885bd3 2366 int i, r, ngids = 0;
4c47affc 2367 unsigned n_fds;
3536f49e 2368 ExecDirectoryType dt;
165a31c0 2369 int secure_bits;
034c6ed7 2370
f2341e0a 2371 assert(unit);
5cb5a6ff
LP
2372 assert(command);
2373 assert(context);
d35fbf6b 2374 assert(params);
ff0af2a1 2375 assert(exit_status);
70dd455c
ZJS
2376 assert(error_message);
2377 /* We don't always set error_message, hence it must be initialized */
2378 assert(*error_message == NULL);
d35fbf6b
DM
2379
2380 rename_process_from_path(command->path);
2381
2382 /* We reset exactly these signals, since they are the
2383 * only ones we set to SIG_IGN in the main daemon. All
2384 * others we leave untouched because we set them to
2385 * SIG_DFL or a valid handler initially, both of which
2386 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2387 (void) default_signals(SIGNALS_CRASH_HANDLER,
2388 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2389
2390 if (context->ignore_sigpipe)
ce30c8dc 2391 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2392
ff0af2a1
LP
2393 r = reset_signal_mask();
2394 if (r < 0) {
2395 *exit_status = EXIT_SIGNAL_MASK;
70dd455c
ZJS
2396 *error_message = strdup("Failed to reset signal mask");
2397 /* If strdup fails, here and below, we will just print the generic error message. */
ff0af2a1 2398 return r;
d35fbf6b 2399 }
034c6ed7 2400
d35fbf6b
DM
2401 if (params->idle_pipe)
2402 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2403
d35fbf6b
DM
2404 /* Close sockets very early to make sure we don't
2405 * block init reexecution because it cannot bind its
2406 * sockets */
ff0af2a1 2407
d35fbf6b 2408 log_forget_fds();
4f2d528d 2409
4c47affc 2410 n_fds = n_storage_fds + n_socket_fds;
00d9ef85 2411 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
ff0af2a1
LP
2412 if (r < 0) {
2413 *exit_status = EXIT_FDS;
70dd455c 2414 *error_message = strdup("Failed to close remaining fds");
ff0af2a1 2415 return r;
8c7be95e
LP
2416 }
2417
d35fbf6b
DM
2418 if (!context->same_pgrp)
2419 if (setsid() < 0) {
ff0af2a1 2420 *exit_status = EXIT_SETSID;
d35fbf6b
DM
2421 return -errno;
2422 }
9e2f7c11 2423
1e22b5cd 2424 exec_context_tty_reset(context, params);
d35fbf6b 2425
c891efaf 2426 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 2427 const char *vc = params->confirm_spawn;
3b20f877
FB
2428 _cleanup_free_ char *cmdline = NULL;
2429
2430 cmdline = exec_command_line(argv);
2431 if (!cmdline) {
2432 *exit_status = EXIT_CONFIRM;
2433 return -ENOMEM;
2434 }
d35fbf6b 2435
eedf223a 2436 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
2437 if (r != CONFIRM_EXECUTE) {
2438 if (r == CONFIRM_PRETEND_SUCCESS) {
2439 *exit_status = EXIT_SUCCESS;
2440 return 0;
2441 }
ff0af2a1 2442 *exit_status = EXIT_CONFIRM;
70dd455c 2443 *error_message = strdup("Execution cancelled");
d35fbf6b 2444 return -ECANCELED;
d35fbf6b
DM
2445 }
2446 }
1a63a750 2447
29206d46
LP
2448 if (context->dynamic_user && dcreds) {
2449
409093fe
LP
2450 /* Make sure we bypass our own NSS module for any NSS checks */
2451 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2452 *exit_status = EXIT_USER;
70dd455c 2453 *error_message = strdup("Failed to update environment");
409093fe
LP
2454 return -errno;
2455 }
2456
29206d46 2457 r = dynamic_creds_realize(dcreds, &uid, &gid);
ff0af2a1
LP
2458 if (r < 0) {
2459 *exit_status = EXIT_USER;
70dd455c 2460 *error_message = strdup("Failed to update dynamic user credentials");
ff0af2a1 2461 return r;
524daa8c 2462 }
524daa8c 2463
70dd455c 2464 if (!uid_is_valid(uid)) {
29206d46 2465 *exit_status = EXIT_USER;
70dd455c
ZJS
2466 (void) asprintf(error_message, "UID validation failed for \""UID_FMT"\"", uid);
2467 /* If asprintf fails, here and below, we will just print the generic error message. */
2468 return -ESRCH;
2469 }
2470
2471 if (!gid_is_valid(gid)) {
2472 *exit_status = EXIT_USER;
2473 (void) asprintf(error_message, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
2474 return -ESRCH;
2475 }
5bc7452b 2476
29206d46
LP
2477 if (dcreds->user)
2478 username = dcreds->user->name;
2479
2480 } else {
4d885bd3
DH
2481 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2482 if (r < 0) {
2483 *exit_status = EXIT_USER;
70dd455c 2484 *error_message = strdup("Failed to determine user credentials");
4d885bd3 2485 return r;
5bc7452b 2486 }
5bc7452b 2487
4d885bd3
DH
2488 r = get_fixed_group(context, &groupname, &gid);
2489 if (r < 0) {
2490 *exit_status = EXIT_GROUP;
70dd455c 2491 *error_message = strdup("Failed to determine group credentials");
4d885bd3
DH
2492 return r;
2493 }
cdc5d5c5 2494 }
29206d46 2495
cdc5d5c5
DH
2496 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2497 r = get_supplementary_groups(context, username, groupname, gid,
2498 &supplementary_gids, &ngids);
2499 if (r < 0) {
2500 *exit_status = EXIT_GROUP;
70dd455c 2501 *error_message = strdup("Failed to determine supplementary groups");
cdc5d5c5 2502 return r;
29206d46 2503 }
5bc7452b 2504
00d9ef85
LP
2505 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2506 if (r < 0) {
2507 *exit_status = EXIT_USER;
70dd455c 2508 *error_message = strdup("Failed to send user credentials to PID1");
00d9ef85
LP
2509 return r;
2510 }
2511
2512 user_lookup_fd = safe_close(user_lookup_fd);
2513
6732edab
LP
2514 r = acquire_home(context, uid, &home, &home_buffer);
2515 if (r < 0) {
2516 *exit_status = EXIT_CHDIR;
2517 *error_message = strdup("Failed to determine $HOME for user");
2518 return r;
2519 }
2520
d35fbf6b
DM
2521 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2522 * must sure to drop O_NONBLOCK */
2523 if (socket_fd >= 0)
a34ceba6 2524 (void) fd_nonblock(socket_fd, false);
acbb0225 2525
52c239d7 2526 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
2527 if (r < 0) {
2528 *exit_status = EXIT_STDIN;
70dd455c 2529 *error_message = strdup("Failed to set up stdin");
ff0af2a1 2530 return r;
d35fbf6b 2531 }
034c6ed7 2532
52c239d7 2533 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2534 if (r < 0) {
2535 *exit_status = EXIT_STDOUT;
70dd455c 2536 *error_message = strdup("Failed to set up stdout");
ff0af2a1 2537 return r;
d35fbf6b
DM
2538 }
2539
52c239d7 2540 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2541 if (r < 0) {
2542 *exit_status = EXIT_STDERR;
70dd455c 2543 *error_message = strdup("Failed to set up stderr");
ff0af2a1 2544 return r;
d35fbf6b
DM
2545 }
2546
2547 if (params->cgroup_path) {
ff0af2a1
LP
2548 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2549 if (r < 0) {
2550 *exit_status = EXIT_CGROUP;
70dd455c 2551 (void) asprintf(error_message, "Failed to attach to cgroup %s", params->cgroup_path);
ff0af2a1 2552 return r;
309bff19 2553 }
d35fbf6b 2554 }
309bff19 2555
d35fbf6b 2556 if (context->oom_score_adjust_set) {
d5243d62 2557 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
f2b68789 2558
d5243d62
LP
2559 /* When we can't make this change due to EPERM, then
2560 * let's silently skip over it. User namespaces
2561 * prohibit write access to this file, and we
2562 * shouldn't trip up over that. */
613b411c 2563
d5243d62 2564 sprintf(t, "%i", context->oom_score_adjust);
ad118bda 2565 r = write_string_file("/proc/self/oom_score_adj", t, 0);
6cb7fa17 2566 if (r == -EPERM || r == -EACCES) {
ff0af2a1 2567 log_open();
f2341e0a 2568 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
ff0af2a1
LP
2569 log_close();
2570 } else if (r < 0) {
2571 *exit_status = EXIT_OOM_ADJUST;
70dd455c 2572 *error_message = strdup("Failed to write /proc/self/oom_score_adj");
d35fbf6b 2573 return -errno;
613b411c 2574 }
d35fbf6b
DM
2575 }
2576
2577 if (context->nice_set)
2578 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 2579 *exit_status = EXIT_NICE;
d35fbf6b 2580 return -errno;
613b411c
LP
2581 }
2582
d35fbf6b
DM
2583 if (context->cpu_sched_set) {
2584 struct sched_param param = {
2585 .sched_priority = context->cpu_sched_priority,
2586 };
2587
ff0af2a1
LP
2588 r = sched_setscheduler(0,
2589 context->cpu_sched_policy |
2590 (context->cpu_sched_reset_on_fork ?
2591 SCHED_RESET_ON_FORK : 0),
2592 &param);
2593 if (r < 0) {
2594 *exit_status = EXIT_SETSCHEDULER;
d35fbf6b 2595 return -errno;
fc9b2a84 2596 }
d35fbf6b 2597 }
fc9b2a84 2598
d35fbf6b
DM
2599 if (context->cpuset)
2600 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
ff0af2a1 2601 *exit_status = EXIT_CPUAFFINITY;
d35fbf6b 2602 return -errno;
034c6ed7
LP
2603 }
2604
d35fbf6b
DM
2605 if (context->ioprio_set)
2606 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 2607 *exit_status = EXIT_IOPRIO;
d35fbf6b
DM
2608 return -errno;
2609 }
da726a4d 2610
d35fbf6b
DM
2611 if (context->timer_slack_nsec != NSEC_INFINITY)
2612 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 2613 *exit_status = EXIT_TIMERSLACK;
d35fbf6b 2614 return -errno;
4c2630eb 2615 }
9eba9da4 2616
21022b9d
LP
2617 if (context->personality != PERSONALITY_INVALID) {
2618 r = safe_personality(context->personality);
2619 if (r < 0) {
ff0af2a1 2620 *exit_status = EXIT_PERSONALITY;
21022b9d 2621 return r;
4c2630eb 2622 }
21022b9d 2623 }
94f04347 2624
d35fbf6b 2625 if (context->utmp_id)
df0ff127 2626 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 2627 context->tty_path,
023a4f67
LP
2628 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
2629 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
2630 USER_PROCESS,
6a93917d 2631 username);
d35fbf6b 2632
e0d2adfd 2633 if (context->user) {
ff0af2a1
LP
2634 r = chown_terminal(STDIN_FILENO, uid);
2635 if (r < 0) {
2636 *exit_status = EXIT_STDIN;
2637 return r;
071830ff 2638 }
d35fbf6b 2639 }
8e274523 2640
a931ad47
LP
2641 /* If delegation is enabled we'll pass ownership of the cgroup
2642 * (but only in systemd's own controller hierarchy!) to the
2643 * user of the new process. */
584b8688 2644 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
ff0af2a1
LP
2645 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
2646 if (r < 0) {
2647 *exit_status = EXIT_CGROUP;
2648 return r;
d35fbf6b 2649 }
034c6ed7 2650
034c6ed7 2651
ff0af2a1
LP
2652 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
2653 if (r < 0) {
2654 *exit_status = EXIT_CGROUP;
2655 return r;
034c6ed7 2656 }
d35fbf6b 2657 }
034c6ed7 2658
3536f49e 2659 for (dt = 0; dt < _EXEC_DIRECTORY_MAX; dt++) {
8679efde 2660 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
3536f49e 2661 if (r < 0)
07689d5d 2662 return r;
d35fbf6b 2663 }
94f04347 2664
7bce046b 2665 r = build_environment(
fd63e712 2666 unit,
7bce046b
LP
2667 context,
2668 params,
2669 n_fds,
2670 home,
2671 username,
2672 shell,
2673 journal_stream_dev,
2674 journal_stream_ino,
2675 &our_env);
2065ca69
JW
2676 if (r < 0) {
2677 *exit_status = EXIT_MEMORY;
2678 return r;
2679 }
2680
2681 r = build_pass_environment(context, &pass_env);
2682 if (r < 0) {
2683 *exit_status = EXIT_MEMORY;
2684 return r;
2685 }
2686
2687 accum_env = strv_env_merge(5,
2688 params->environment,
2689 our_env,
2690 pass_env,
2691 context->environment,
2692 files_env,
2693 NULL);
2694 if (!accum_env) {
2695 *exit_status = EXIT_MEMORY;
2696 return -ENOMEM;
2697 }
1280503b 2698 accum_env = strv_env_clean(accum_env);
2065ca69 2699
096424d1 2700 (void) umask(context->umask);
b213e1c1 2701
74dd6b51
LP
2702 r = setup_keyring(unit, params, uid, gid);
2703 if (r < 0) {
2704 *exit_status = EXIT_KEYRING;
2705 return r;
2706 }
2707
165a31c0 2708 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 2709 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 2710
165a31c0
LP
2711 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
2712 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 2713
165a31c0
LP
2714 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
2715 if (needs_ambient_hack)
2716 needs_setuid = false;
2717 else
2718 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
2719
2720 if (needs_sandboxing) {
7f18ef0a
FK
2721 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
2722 * present. The actual MAC context application will happen later, as late as possible, to avoid
2723 * impacting our own code paths. */
2724
2725#ifdef HAVE_SELINUX
43b1f709 2726 use_selinux = mac_selinux_use();
7f18ef0a 2727#endif
7f18ef0a 2728#ifdef HAVE_SMACK
43b1f709 2729 use_smack = mac_smack_use();
7f18ef0a 2730#endif
7f18ef0a 2731#ifdef HAVE_APPARMOR
43b1f709 2732 use_apparmor = mac_apparmor_use();
7f18ef0a 2733#endif
165a31c0 2734 }
7f18ef0a 2735
165a31c0
LP
2736 if (needs_setuid) {
2737 if (context->pam_name && username) {
2738 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
2739 if (r < 0) {
2740 *exit_status = EXIT_PAM;
2741 return r;
2742 }
2743 }
b213e1c1 2744 }
ac45f971 2745
d35fbf6b 2746 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
ff0af2a1
LP
2747 r = setup_netns(runtime->netns_storage_socket);
2748 if (r < 0) {
2749 *exit_status = EXIT_NETWORK;
2750 return r;
d35fbf6b
DM
2751 }
2752 }
169c1bda 2753
ee818b89 2754 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 2755 if (needs_mount_namespace) {
6818c54c 2756 r = apply_mount_namespace(unit, command, context, params, runtime);
3fbe8dbe
LP
2757 if (r < 0) {
2758 *exit_status = EXIT_NAMESPACE;
2759 return r;
2760 }
d35fbf6b 2761 }
81a2b7ce 2762
50b3dfb9 2763 /* Apply just after mount namespace setup */
376fecf6
LP
2764 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
2765 if (r < 0)
50b3dfb9 2766 return r;
50b3dfb9 2767
bbeea271 2768 /* Drop groups as early as possbile */
165a31c0 2769 if (needs_setuid) {
4d885bd3 2770 r = enforce_groups(context, gid, supplementary_gids, ngids);
096424d1
LP
2771 if (r < 0) {
2772 *exit_status = EXIT_GROUP;
2773 return r;
2774 }
165a31c0 2775 }
096424d1 2776
165a31c0 2777 if (needs_sandboxing) {
9008e1ac 2778#ifdef HAVE_SELINUX
43b1f709 2779 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
2780 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
2781 if (r < 0) {
2782 *exit_status = EXIT_SELINUX_CONTEXT;
2783 return r;
2784 }
9008e1ac 2785 }
9008e1ac
MS
2786#endif
2787
937ccce9
LP
2788 if (context->private_users) {
2789 r = setup_private_users(uid, gid);
2790 if (r < 0) {
2791 *exit_status = EXIT_USER;
2792 return r;
2793 }
d251207d
LP
2794 }
2795 }
2796
165a31c0
LP
2797 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
2798 * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
2799 * was needed to upload the policy and can now be closed as well. */
ff0af2a1
LP
2800 r = close_all_fds(fds, n_fds);
2801 if (r >= 0)
2802 r = shift_fds(fds, n_fds);
2803 if (r >= 0)
4c47affc 2804 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
ff0af2a1
LP
2805 if (r < 0) {
2806 *exit_status = EXIT_FDS;
2807 return r;
d35fbf6b 2808 }
e66cf1a3 2809
165a31c0 2810 secure_bits = context->secure_bits;
e66cf1a3 2811
165a31c0
LP
2812 if (needs_sandboxing) {
2813 uint64_t bset;
755d4b67 2814
d35fbf6b 2815 for (i = 0; i < _RLIMIT_MAX; i++) {
03857c43 2816
d35fbf6b
DM
2817 if (!context->rlimit[i])
2818 continue;
2819
03857c43
LP
2820 r = setrlimit_closest(i, context->rlimit[i]);
2821 if (r < 0) {
ff0af2a1 2822 *exit_status = EXIT_LIMITS;
03857c43 2823 return r;
e66cf1a3
LP
2824 }
2825 }
2826
f4170c67
LP
2827 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
2828 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
2829 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
2830 *exit_status = EXIT_LIMITS;
2831 return -errno;
2832 }
2833 }
2834
165a31c0
LP
2835 bset = context->capability_bounding_set;
2836 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
2837 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
2838 * instead of us doing that */
2839 if (needs_ambient_hack)
2840 bset |= (UINT64_C(1) << CAP_SETPCAP) |
2841 (UINT64_C(1) << CAP_SETUID) |
2842 (UINT64_C(1) << CAP_SETGID);
2843
2844 if (!cap_test_all(bset)) {
2845 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
2846 if (r < 0) {
2847 *exit_status = EXIT_CAPABILITIES;
70dd455c 2848 *error_message = strdup("Failed to drop capabilities");
ff0af2a1 2849 return r;
3b8bddde 2850 }
4c2630eb 2851 }
3b8bddde 2852
755d4b67
IP
2853 /* This is done before enforce_user, but ambient set
2854 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
2855 if (!needs_ambient_hack &&
2856 context->capability_ambient_set != 0) {
755d4b67
IP
2857 r = capability_ambient_set_apply(context->capability_ambient_set, true);
2858 if (r < 0) {
2859 *exit_status = EXIT_CAPABILITIES;
70dd455c 2860 *error_message = strdup("Failed to apply ambient capabilities (before UID change)");
755d4b67
IP
2861 return r;
2862 }
755d4b67 2863 }
165a31c0 2864 }
755d4b67 2865
165a31c0 2866 if (needs_setuid) {
d35fbf6b 2867 if (context->user) {
ff0af2a1
LP
2868 r = enforce_user(context, uid);
2869 if (r < 0) {
2870 *exit_status = EXIT_USER;
70dd455c 2871 (void) asprintf(error_message, "Failed to change UID to "UID_FMT, uid);
ff0af2a1 2872 return r;
5b6319dc 2873 }
165a31c0
LP
2874
2875 if (!needs_ambient_hack &&
2876 context->capability_ambient_set != 0) {
755d4b67
IP
2877
2878 /* Fix the ambient capabilities after user change. */
2879 r = capability_ambient_set_apply(context->capability_ambient_set, false);
2880 if (r < 0) {
2881 *exit_status = EXIT_CAPABILITIES;
70dd455c 2882 *error_message = strdup("Failed to apply ambient capabilities (after UID change)");
755d4b67
IP
2883 return r;
2884 }
2885
2886 /* If we were asked to change user and ambient capabilities
2887 * were requested, we had to add keep-caps to the securebits
2888 * so that we would maintain the inherited capability set
2889 * through the setresuid(). Make sure that the bit is added
2890 * also to the context secure_bits so that we don't try to
2891 * drop the bit away next. */
2892
7f508f2c 2893 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 2894 }
5b6319dc 2895 }
165a31c0 2896 }
d35fbf6b 2897
165a31c0 2898 if (needs_sandboxing) {
5cd9cd35
LP
2899 /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
2900 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
2901 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
2902 * are restricted. */
2903
2904#ifdef HAVE_SELINUX
43b1f709 2905 if (use_selinux) {
5cd9cd35
LP
2906 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
2907
2908 if (exec_context) {
2909 r = setexeccon(exec_context);
2910 if (r < 0) {
2911 *exit_status = EXIT_SELINUX_CONTEXT;
70dd455c 2912 (void) asprintf(error_message, "Failed to set SELinux context to %s", exec_context);
5cd9cd35
LP
2913 return r;
2914 }
2915 }
2916 }
2917#endif
2918
7f18ef0a 2919#ifdef HAVE_SMACK
43b1f709 2920 if (use_smack) {
7f18ef0a
FK
2921 r = setup_smack(context, command);
2922 if (r < 0) {
2923 *exit_status = EXIT_SMACK_PROCESS_LABEL;
2924 *error_message = strdup("Failed to set SMACK process label");
2925 return r;
2926 }
5cd9cd35 2927 }
7f18ef0a 2928#endif
5cd9cd35
LP
2929
2930#ifdef HAVE_APPARMOR
43b1f709 2931 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
2932 r = aa_change_onexec(context->apparmor_profile);
2933 if (r < 0 && !context->apparmor_profile_ignore) {
2934 *exit_status = EXIT_APPARMOR_PROFILE;
70dd455c
ZJS
2935 (void) asprintf(error_message,
2936 "Failed to prepare AppArmor profile change to %s",
2937 context->apparmor_profile);
5cd9cd35
LP
2938 return -errno;
2939 }
2940 }
2941#endif
2942
165a31c0
LP
2943 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
2944 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
2945 if (prctl(PR_GET_SECUREBITS) != secure_bits)
2946 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 2947 *exit_status = EXIT_SECUREBITS;
70dd455c 2948 *error_message = strdup("Failed to set secure bits");
d35fbf6b 2949 return -errno;
ff01d048 2950 }
5b6319dc 2951
59eeb84b 2952 if (context_has_no_new_privileges(context))
d35fbf6b 2953 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 2954 *exit_status = EXIT_NO_NEW_PRIVILEGES;
70dd455c 2955 *error_message = strdup("Failed to disable new privileges");
d35fbf6b
DM
2956 return -errno;
2957 }
2958
2959#ifdef HAVE_SECCOMP
469830d1
LP
2960 r = apply_address_families(unit, context);
2961 if (r < 0) {
2962 *exit_status = EXIT_ADDRESS_FAMILIES;
5b3637b4 2963 *error_message = strdup("Failed to restrict address families");
469830d1 2964 return r;
4c2630eb 2965 }
04aa0cb9 2966
469830d1
LP
2967 r = apply_memory_deny_write_execute(unit, context);
2968 if (r < 0) {
2969 *exit_status = EXIT_SECCOMP;
5b3637b4 2970 *error_message = strdup("Failed to disable writing to executable memory");
469830d1 2971 return r;
f3e43635 2972 }
f4170c67 2973
469830d1
LP
2974 r = apply_restrict_realtime(unit, context);
2975 if (r < 0) {
2976 *exit_status = EXIT_SECCOMP;
5b3637b4 2977 *error_message = strdup("Failed to apply realtime restrictions");
469830d1 2978 return r;
f4170c67
LP
2979 }
2980
add00535
LP
2981 r = apply_restrict_namespaces(unit, context);
2982 if (r < 0) {
2983 *exit_status = EXIT_SECCOMP;
70dd455c 2984 *error_message = strdup("Failed to apply namespace restrictions");
add00535
LP
2985 return r;
2986 }
2987
469830d1
LP
2988 r = apply_protect_sysctl(unit, context);
2989 if (r < 0) {
2990 *exit_status = EXIT_SECCOMP;
5b3637b4 2991 *error_message = strdup("Failed to apply sysctl restrictions");
469830d1 2992 return r;
502d704e
DH
2993 }
2994
469830d1
LP
2995 r = apply_protect_kernel_modules(unit, context);
2996 if (r < 0) {
2997 *exit_status = EXIT_SECCOMP;
5b3637b4 2998 *error_message = strdup("Failed to apply module loading restrictions");
469830d1 2999 return r;
59eeb84b
LP
3000 }
3001
469830d1
LP
3002 r = apply_private_devices(unit, context);
3003 if (r < 0) {
3004 *exit_status = EXIT_SECCOMP;
5b3637b4 3005 *error_message = strdup("Failed to set up private devices");
469830d1
LP
3006 return r;
3007 }
3008
3009 r = apply_syscall_archs(unit, context);
3010 if (r < 0) {
3011 *exit_status = EXIT_SECCOMP;
5b3637b4 3012 *error_message = strdup("Failed to apply syscall architecture restrictions");
469830d1 3013 return r;
ba128bb8
LP
3014 }
3015
78e864e5
TM
3016 r = apply_lock_personality(unit, context);
3017 if (r < 0) {
3018 *exit_status = EXIT_SECCOMP;
3019 *error_message = strdup("Failed to lock personalities");
3020 return r;
3021 }
3022
5cd9cd35
LP
3023 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3024 * by the filter as little as possible. */
165a31c0 3025 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3026 if (r < 0) {
3027 *exit_status = EXIT_SECCOMP;
5b3637b4 3028 *error_message = strdup("Failed to apply syscall filters");
469830d1 3029 return r;
d35fbf6b
DM
3030 }
3031#endif
d35fbf6b 3032 }
034c6ed7 3033
2065ca69 3034 final_argv = replace_env_argv(argv, accum_env);
d35fbf6b 3035 if (!final_argv) {
ff0af2a1 3036 *exit_status = EXIT_MEMORY;
70dd455c 3037 *error_message = strdup("Failed to prepare process arguments");
d35fbf6b
DM
3038 return -ENOMEM;
3039 }
034c6ed7 3040
553d2243 3041 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
d35fbf6b 3042 _cleanup_free_ char *line;
81a2b7ce 3043
d35fbf6b
DM
3044 line = exec_command_line(final_argv);
3045 if (line) {
3046 log_open();
f2341e0a 3047 log_struct(LOG_DEBUG,
f2341e0a
LP
3048 "EXECUTABLE=%s", command->path,
3049 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3050 LOG_UNIT_ID(unit),
f2341e0a 3051 NULL);
d35fbf6b
DM
3052 log_close();
3053 }
3054 }
dd305ec9 3055
2065ca69 3056 execve(command->path, final_argv, accum_env);
ff0af2a1 3057 *exit_status = EXIT_EXEC;
d35fbf6b
DM
3058 return -errno;
3059}
81a2b7ce 3060
f2341e0a
LP
3061int exec_spawn(Unit *unit,
3062 ExecCommand *command,
d35fbf6b
DM
3063 const ExecContext *context,
3064 const ExecParameters *params,
3065 ExecRuntime *runtime,
29206d46 3066 DynamicCreds *dcreds,
d35fbf6b 3067 pid_t *ret) {
8351ceae 3068
d35fbf6b 3069 _cleanup_strv_free_ char **files_env = NULL;
9b141911 3070 int *fds = NULL;
4c47affc 3071 unsigned n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1
LP
3072 _cleanup_free_ char *line = NULL;
3073 int socket_fd, r;
52c239d7 3074 int named_iofds[3] = { -1, -1, -1 };
ff0af2a1 3075 char **argv;
d35fbf6b 3076 pid_t pid;
8351ceae 3077
f2341e0a 3078 assert(unit);
d35fbf6b
DM
3079 assert(command);
3080 assert(context);
3081 assert(ret);
3082 assert(params);
4c47affc 3083 assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
4298d0b5 3084
d35fbf6b
DM
3085 if (context->std_input == EXEC_INPUT_SOCKET ||
3086 context->std_output == EXEC_OUTPUT_SOCKET ||
3087 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3088
4c47affc 3089 if (params->n_socket_fds > 1) {
f2341e0a 3090 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3091 return -EINVAL;
ff0af2a1 3092 }
eef65bf3 3093
4c47affc 3094 if (params->n_socket_fds == 0) {
488ab41c
AA
3095 log_unit_error(unit, "Got no socket.");
3096 return -EINVAL;
3097 }
3098
d35fbf6b
DM
3099 socket_fd = params->fds[0];
3100 } else {
3101 socket_fd = -1;
3102 fds = params->fds;
4c47affc 3103 n_storage_fds = params->n_storage_fds;
9b141911 3104 n_socket_fds = params->n_socket_fds;
d35fbf6b 3105 }
94f04347 3106
52c239d7
LB
3107 r = exec_context_named_iofds(unit, context, params, named_iofds);
3108 if (r < 0)
3109 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3110
f2341e0a 3111 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3112 if (r < 0)
f2341e0a 3113 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3114
d35fbf6b 3115 argv = params->argv ?: command->argv;
d35fbf6b
DM
3116 line = exec_command_line(argv);
3117 if (!line)
3118 return log_oom();
fab56fc5 3119
f2341e0a 3120 log_struct(LOG_DEBUG,
f2341e0a
LP
3121 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3122 "EXECUTABLE=%s", command->path,
ba360bb0 3123 LOG_UNIT_ID(unit),
f2341e0a 3124 NULL);
d35fbf6b
DM
3125 pid = fork();
3126 if (pid < 0)
74129a12 3127 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3128
3129 if (pid == 0) {
ff0af2a1 3130 int exit_status;
70dd455c 3131 _cleanup_free_ char *error_message = NULL;
ff0af2a1 3132
f2341e0a
LP
3133 r = exec_child(unit,
3134 command,
ff0af2a1
LP
3135 context,
3136 params,
3137 runtime,
29206d46 3138 dcreds,
ff0af2a1
LP
3139 argv,
3140 socket_fd,
52c239d7 3141 named_iofds,
4c47affc
FB
3142 fds,
3143 n_storage_fds,
9b141911 3144 n_socket_fds,
ff0af2a1 3145 files_env,
00d9ef85 3146 unit->manager->user_lookup_fds[1],
70dd455c
ZJS
3147 &exit_status,
3148 &error_message);
ff0af2a1 3149 if (r < 0) {
4c2630eb 3150 log_open();
70dd455c
ZJS
3151 if (error_message)
3152 log_struct_errno(LOG_ERR, r,
2b044526 3153 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
70dd455c
ZJS
3154 LOG_UNIT_ID(unit),
3155 LOG_UNIT_MESSAGE(unit, "%s: %m",
3156 error_message),
3157 "EXECUTABLE=%s", command->path,
3158 NULL);
3ed0cd26 3159 else if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE))
4d8b0f0f
YW
3160 log_struct_errno(LOG_INFO, r,
3161 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3162 LOG_UNIT_ID(unit),
3163 LOG_UNIT_MESSAGE(unit, "Skipped spawning %s: %m",
3164 command->path),
3165 "EXECUTABLE=%s", command->path,
3166 NULL);
70dd455c
ZJS
3167 else
3168 log_struct_errno(LOG_ERR, r,
2b044526 3169 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
70dd455c
ZJS
3170 LOG_UNIT_ID(unit),
3171 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3172 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3173 command->path),
3174 "EXECUTABLE=%s", command->path,
3175 NULL);
4c2630eb
MS
3176 }
3177
ff0af2a1 3178 _exit(exit_status);
034c6ed7
LP
3179 }
3180
f2341e0a 3181 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3182
80876c20
LP
3183 /* We add the new process to the cgroup both in the child (so
3184 * that we can be sure that no user code is ever executed
3185 * outside of the cgroup) and in the parent (so that we can be
3186 * sure that when we kill the cgroup the process will be
3187 * killed too). */
d35fbf6b 3188 if (params->cgroup_path)
dd305ec9 3189 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
2da3263a 3190
b58b4116 3191 exec_status_start(&command->exec_status, pid);
9fb86720 3192
034c6ed7 3193 *ret = pid;
5cb5a6ff
LP
3194 return 0;
3195}
3196
034c6ed7 3197void exec_context_init(ExecContext *c) {
3536f49e
YW
3198 ExecDirectoryType i;
3199
034c6ed7
LP
3200 assert(c);
3201
4c12626c 3202 c->umask = 0022;
9eba9da4 3203 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3204 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3205 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3206 c->syslog_level_prefix = true;
353e12c2 3207 c->ignore_sigpipe = true;
3a43da28 3208 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3209 c->personality = PERSONALITY_INVALID;
3536f49e
YW
3210 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
3211 c->directories[i].mode = 0755;
a103496c 3212 c->capability_bounding_set = CAP_ALL;
add00535 3213 c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
034c6ed7
LP
3214}
3215
613b411c 3216void exec_context_done(ExecContext *c) {
5cb5a6ff 3217 unsigned l;
3536f49e 3218 ExecDirectoryType i;
5cb5a6ff
LP
3219
3220 assert(c);
3221
6796073e
LP
3222 c->environment = strv_free(c->environment);
3223 c->environment_files = strv_free(c->environment_files);
b4c14404 3224 c->pass_environment = strv_free(c->pass_environment);
8c7be95e 3225
1f6b4113 3226 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
a1e58e8e 3227 c->rlimit[l] = mfree(c->rlimit[l]);
034c6ed7 3228
52c239d7
LB
3229 for (l = 0; l < 3; l++)
3230 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
3231
a1e58e8e
LP
3232 c->working_directory = mfree(c->working_directory);
3233 c->root_directory = mfree(c->root_directory);
915e6d16 3234 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3235 c->tty_path = mfree(c->tty_path);
3236 c->syslog_identifier = mfree(c->syslog_identifier);
3237 c->user = mfree(c->user);
3238 c->group = mfree(c->group);
034c6ed7 3239
6796073e 3240 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3241
a1e58e8e 3242 c->pam_name = mfree(c->pam_name);
5b6319dc 3243
2a624c36
AP
3244 c->read_only_paths = strv_free(c->read_only_paths);
3245 c->read_write_paths = strv_free(c->read_write_paths);
3246 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3247
d2d6c096
LP
3248 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
3249
82c121a4
LP
3250 if (c->cpuset)
3251 CPU_FREE(c->cpuset);
86a3475b 3252
a1e58e8e
LP
3253 c->utmp_id = mfree(c->utmp_id);
3254 c->selinux_context = mfree(c->selinux_context);
3255 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3256 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3257
525d3cc7
LP
3258 c->syscall_filter = set_free(c->syscall_filter);
3259 c->syscall_archs = set_free(c->syscall_archs);
3260 c->address_families = set_free(c->address_families);
e66cf1a3 3261
3536f49e
YW
3262 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
3263 c->directories[i].paths = strv_free(c->directories[i].paths);
e66cf1a3
LP
3264}
3265
3266int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
3267 char **i;
3268
3269 assert(c);
3270
3271 if (!runtime_prefix)
3272 return 0;
3273
3536f49e 3274 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
3275 _cleanup_free_ char *p;
3276
605405c6 3277 p = strjoin(runtime_prefix, "/", *i);
e66cf1a3
LP
3278 if (!p)
3279 return -ENOMEM;
3280
3281 /* We execute this synchronously, since we need to be
3282 * sure this is gone when we start the service
3283 * next. */
c6878637 3284 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
3285 }
3286
3287 return 0;
5cb5a6ff
LP
3288}
3289
43d0fcbd
LP
3290void exec_command_done(ExecCommand *c) {
3291 assert(c);
3292
a1e58e8e 3293 c->path = mfree(c->path);
43d0fcbd 3294
6796073e 3295 c->argv = strv_free(c->argv);
43d0fcbd
LP
3296}
3297
3298void exec_command_done_array(ExecCommand *c, unsigned n) {
3299 unsigned i;
3300
3301 for (i = 0; i < n; i++)
3302 exec_command_done(c+i);
3303}
3304
f1acf85a 3305ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
3306 ExecCommand *i;
3307
3308 while ((i = c)) {
71fda00f 3309 LIST_REMOVE(command, c, i);
43d0fcbd 3310 exec_command_done(i);
5cb5a6ff
LP
3311 free(i);
3312 }
f1acf85a
ZJS
3313
3314 return NULL;
5cb5a6ff
LP
3315}
3316
034c6ed7
LP
3317void exec_command_free_array(ExecCommand **c, unsigned n) {
3318 unsigned i;
3319
f1acf85a
ZJS
3320 for (i = 0; i < n; i++)
3321 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
3322}
3323
039f0e70 3324typedef struct InvalidEnvInfo {
f2341e0a 3325 Unit *unit;
039f0e70
LP
3326 const char *path;
3327} InvalidEnvInfo;
3328
3329static void invalid_env(const char *p, void *userdata) {
3330 InvalidEnvInfo *info = userdata;
3331
f2341e0a 3332 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
3333}
3334
52c239d7
LB
3335const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3336 assert(c);
3337
3338 switch (fd_index) {
3339 case STDIN_FILENO:
3340 if (c->std_input != EXEC_INPUT_NAMED_FD)
3341 return NULL;
3342 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
3343 case STDOUT_FILENO:
3344 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3345 return NULL;
3346 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
3347 case STDERR_FILENO:
3348 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3349 return NULL;
3350 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
3351 default:
3352 return NULL;
3353 }
3354}
3355
3356int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
3357 unsigned i, targets;
56fbd561 3358 const char* stdio_fdname[3];
4c47affc 3359 unsigned n_fds;
52c239d7
LB
3360
3361 assert(c);
3362 assert(p);
3363
3364 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3365 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3366 (c->std_error == EXEC_OUTPUT_NAMED_FD);
3367
3368 for (i = 0; i < 3; i++)
3369 stdio_fdname[i] = exec_context_fdname(c, i);
3370
4c47affc
FB
3371 n_fds = p->n_storage_fds + p->n_socket_fds;
3372
3373 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
3374 if (named_iofds[STDIN_FILENO] < 0 &&
3375 c->std_input == EXEC_INPUT_NAMED_FD &&
3376 stdio_fdname[STDIN_FILENO] &&
3377 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3378
52c239d7
LB
3379 named_iofds[STDIN_FILENO] = p->fds[i];
3380 targets--;
56fbd561
ZJS
3381
3382 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3383 c->std_output == EXEC_OUTPUT_NAMED_FD &&
3384 stdio_fdname[STDOUT_FILENO] &&
3385 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3386
52c239d7
LB
3387 named_iofds[STDOUT_FILENO] = p->fds[i];
3388 targets--;
56fbd561
ZJS
3389
3390 } else if (named_iofds[STDERR_FILENO] < 0 &&
3391 c->std_error == EXEC_OUTPUT_NAMED_FD &&
3392 stdio_fdname[STDERR_FILENO] &&
3393 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3394
52c239d7
LB
3395 named_iofds[STDERR_FILENO] = p->fds[i];
3396 targets--;
3397 }
3398
56fbd561 3399 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
3400}
3401
f2341e0a 3402int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
3403 char **i, **r = NULL;
3404
3405 assert(c);
3406 assert(l);
3407
3408 STRV_FOREACH(i, c->environment_files) {
3409 char *fn;
52511fae
ZJS
3410 int k;
3411 unsigned n;
8c7be95e
LP
3412 bool ignore = false;
3413 char **p;
7fd1b19b 3414 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
3415
3416 fn = *i;
3417
3418 if (fn[0] == '-') {
3419 ignore = true;
313cefa1 3420 fn++;
8c7be95e
LP
3421 }
3422
3423 if (!path_is_absolute(fn)) {
8c7be95e
LP
3424 if (ignore)
3425 continue;
3426
3427 strv_free(r);
3428 return -EINVAL;
3429 }
3430
2bef10ab 3431 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
3432 k = safe_glob(fn, 0, &pglob);
3433 if (k < 0) {
2bef10ab
PL
3434 if (ignore)
3435 continue;
8c7be95e 3436
2bef10ab 3437 strv_free(r);
d8c92e8b 3438 return k;
2bef10ab 3439 }
8c7be95e 3440
d8c92e8b
ZJS
3441 /* When we don't match anything, -ENOENT should be returned */
3442 assert(pglob.gl_pathc > 0);
3443
3444 for (n = 0; n < pglob.gl_pathc; n++) {
717603e3 3445 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2bef10ab
PL
3446 if (k < 0) {
3447 if (ignore)
3448 continue;
8c7be95e 3449
2bef10ab 3450 strv_free(r);
2bef10ab 3451 return k;
e9c1ea9d 3452 }
ebc05a09 3453 /* Log invalid environment variables with filename */
039f0e70
LP
3454 if (p) {
3455 InvalidEnvInfo info = {
f2341e0a 3456 .unit = unit,
039f0e70
LP
3457 .path = pglob.gl_pathv[n]
3458 };
3459
3460 p = strv_env_clean_with_callback(p, invalid_env, &info);
3461 }
8c7be95e 3462
2bef10ab
PL
3463 if (r == NULL)
3464 r = p;
3465 else {
3466 char **m;
8c7be95e 3467
2bef10ab
PL
3468 m = strv_env_merge(2, r, p);
3469 strv_free(r);
3470 strv_free(p);
c84a9488 3471 if (!m)
2bef10ab 3472 return -ENOMEM;
2bef10ab
PL
3473
3474 r = m;
3475 }
8c7be95e
LP
3476 }
3477 }
3478
3479 *l = r;
3480
3481 return 0;
3482}
3483
6ac8fdc9 3484static bool tty_may_match_dev_console(const char *tty) {
e1d75803 3485 _cleanup_free_ char *active = NULL;
7d6884b6 3486 char *console;
6ac8fdc9 3487
1e22b5cd
LP
3488 if (!tty)
3489 return true;
3490
a119ec7c 3491 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
3492
3493 /* trivial identity? */
3494 if (streq(tty, "console"))
3495 return true;
3496
3497 console = resolve_dev_console(&active);
3498 /* if we could not resolve, assume it may */
3499 if (!console)
3500 return true;
3501
3502 /* "tty0" means the active VC, so it may be the same sometimes */
e1d75803 3503 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
3504}
3505
3506bool exec_context_may_touch_console(ExecContext *ec) {
1e22b5cd
LP
3507
3508 return (ec->tty_reset ||
3509 ec->tty_vhangup ||
3510 ec->tty_vt_disallocate ||
6ac8fdc9
MS
3511 is_terminal_input(ec->std_input) ||
3512 is_terminal_output(ec->std_output) ||
3513 is_terminal_output(ec->std_error)) &&
1e22b5cd 3514 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
3515}
3516
15ae422b
LP
3517static void strv_fprintf(FILE *f, char **l) {
3518 char **g;
3519
3520 assert(f);
3521
3522 STRV_FOREACH(g, l)
3523 fprintf(f, " %s", *g);
3524}
3525
5cb5a6ff 3526void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
c2bbd90b 3527 char **e, **d;
94f04347 3528 unsigned i;
3536f49e 3529 ExecDirectoryType dt;
add00535 3530 int r;
9eba9da4 3531
5cb5a6ff
LP
3532 assert(c);
3533 assert(f);
3534
4ad49000 3535 prefix = strempty(prefix);
5cb5a6ff
LP
3536
3537 fprintf(f,
94f04347
LP
3538 "%sUMask: %04o\n"
3539 "%sWorkingDirectory: %s\n"
451a074f 3540 "%sRootDirectory: %s\n"
15ae422b 3541 "%sNonBlocking: %s\n"
64747e2d 3542 "%sPrivateTmp: %s\n"
7f112f50 3543 "%sPrivateDevices: %s\n"
59eeb84b 3544 "%sProtectKernelTunables: %s\n"
e66a2f65 3545 "%sProtectKernelModules: %s\n"
59eeb84b 3546 "%sProtectControlGroups: %s\n"
d251207d
LP
3547 "%sPrivateNetwork: %s\n"
3548 "%sPrivateUsers: %s\n"
1b8689f9
LP
3549 "%sProtectHome: %s\n"
3550 "%sProtectSystem: %s\n"
5d997827 3551 "%sMountAPIVFS: %s\n"
f3e43635 3552 "%sIgnoreSIGPIPE: %s\n"
f4170c67
LP
3553 "%sMemoryDenyWriteExecute: %s\n"
3554 "%sRestrictRealtime: %s\n",
5cb5a6ff 3555 prefix, c->umask,
9eba9da4 3556 prefix, c->working_directory ? c->working_directory : "/",
451a074f 3557 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 3558 prefix, yes_no(c->non_blocking),
64747e2d 3559 prefix, yes_no(c->private_tmp),
7f112f50 3560 prefix, yes_no(c->private_devices),
59eeb84b 3561 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 3562 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 3563 prefix, yes_no(c->protect_control_groups),
d251207d
LP
3564 prefix, yes_no(c->private_network),
3565 prefix, yes_no(c->private_users),
1b8689f9
LP
3566 prefix, protect_home_to_string(c->protect_home),
3567 prefix, protect_system_to_string(c->protect_system),
5d997827 3568 prefix, yes_no(c->mount_apivfs),
f3e43635 3569 prefix, yes_no(c->ignore_sigpipe),
f4170c67
LP
3570 prefix, yes_no(c->memory_deny_write_execute),
3571 prefix, yes_no(c->restrict_realtime));
fb33a393 3572
915e6d16
LP
3573 if (c->root_image)
3574 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
3575
8c7be95e
LP
3576 STRV_FOREACH(e, c->environment)
3577 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3578
3579 STRV_FOREACH(e, c->environment_files)
3580 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 3581
b4c14404
FB
3582 STRV_FOREACH(e, c->pass_environment)
3583 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3584
53f47dfc
YW
3585 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
3586
3536f49e
YW
3587 for (dt = 0; dt < _EXEC_DIRECTORY_MAX; dt++) {
3588 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
3589
3590 STRV_FOREACH(d, c->directories[dt].paths)
3591 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
3592 }
c2bbd90b 3593
fb33a393
LP
3594 if (c->nice_set)
3595 fprintf(f,
3596 "%sNice: %i\n",
3597 prefix, c->nice);
3598
dd6c17b1 3599 if (c->oom_score_adjust_set)
fb33a393 3600 fprintf(f,
dd6c17b1
LP
3601 "%sOOMScoreAdjust: %i\n",
3602 prefix, c->oom_score_adjust);
9eba9da4 3603
94f04347 3604 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d
EV
3605 if (c->rlimit[i]) {
3606 fprintf(f, "%s%s: " RLIM_FMT "\n",
3607 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
3608 fprintf(f, "%s%sSoft: " RLIM_FMT "\n",
3609 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3610 }
94f04347 3611
f8b69d1d 3612 if (c->ioprio_set) {
1756a011 3613 _cleanup_free_ char *class_str = NULL;
f8b69d1d 3614
837df140
YW
3615 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
3616 if (r >= 0)
3617 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
3618
3619 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 3620 }
94f04347 3621
f8b69d1d 3622 if (c->cpu_sched_set) {
1756a011 3623 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 3624
837df140
YW
3625 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
3626 if (r >= 0)
3627 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
3628
94f04347 3629 fprintf(f,
38b48754
LP
3630 "%sCPUSchedulingPriority: %i\n"
3631 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
3632 prefix, c->cpu_sched_priority,
3633 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 3634 }
94f04347 3635
82c121a4 3636 if (c->cpuset) {
94f04347 3637 fprintf(f, "%sCPUAffinity:", prefix);
82c121a4
LP
3638 for (i = 0; i < c->cpuset_ncpus; i++)
3639 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
43a99a7a 3640 fprintf(f, " %u", i);
94f04347
LP
3641 fputs("\n", f);
3642 }
3643
3a43da28 3644 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 3645 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
3646
3647 fprintf(f,
80876c20
LP
3648 "%sStandardInput: %s\n"
3649 "%sStandardOutput: %s\n"
3650 "%sStandardError: %s\n",
3651 prefix, exec_input_to_string(c->std_input),
3652 prefix, exec_output_to_string(c->std_output),
3653 prefix, exec_output_to_string(c->std_error));
3654
3655 if (c->tty_path)
3656 fprintf(f,
6ea832a2
LP
3657 "%sTTYPath: %s\n"
3658 "%sTTYReset: %s\n"
3659 "%sTTYVHangup: %s\n"
3660 "%sTTYVTDisallocate: %s\n",
3661 prefix, c->tty_path,
3662 prefix, yes_no(c->tty_reset),
3663 prefix, yes_no(c->tty_vhangup),
3664 prefix, yes_no(c->tty_vt_disallocate));
94f04347 3665
9f6444eb
LP
3666 if (IN_SET(c->std_output,
3667 EXEC_OUTPUT_SYSLOG,
3668 EXEC_OUTPUT_KMSG,
3669 EXEC_OUTPUT_JOURNAL,
3670 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3671 EXEC_OUTPUT_KMSG_AND_CONSOLE,
3672 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
3673 IN_SET(c->std_error,
3674 EXEC_OUTPUT_SYSLOG,
3675 EXEC_OUTPUT_KMSG,
3676 EXEC_OUTPUT_JOURNAL,
3677 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3678 EXEC_OUTPUT_KMSG_AND_CONSOLE,
3679 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 3680
5ce70e5b 3681 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 3682
837df140
YW
3683 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
3684 if (r >= 0)
3685 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 3686
837df140
YW
3687 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
3688 if (r >= 0)
3689 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 3690 }
94f04347 3691
07d46372
YW
3692 if (c->secure_bits) {
3693 _cleanup_free_ char *str = NULL;
3694
3695 r = secure_bits_to_string_alloc(c->secure_bits, &str);
3696 if (r >= 0)
3697 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
3698 }
94f04347 3699
a103496c 3700 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 3701 _cleanup_free_ char *str = NULL;
94f04347 3702
dd1f5bd0
YW
3703 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
3704 if (r >= 0)
3705 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
3706 }
3707
3708 if (c->capability_ambient_set != 0) {
dd1f5bd0 3709 _cleanup_free_ char *str = NULL;
755d4b67 3710
dd1f5bd0
YW
3711 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
3712 if (r >= 0)
3713 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
3714 }
3715
3716 if (c->user)
f2d3769a 3717 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 3718 if (c->group)
f2d3769a 3719 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 3720
29206d46
LP
3721 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
3722
15ae422b 3723 if (strv_length(c->supplementary_groups) > 0) {
94f04347 3724 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
3725 strv_fprintf(f, c->supplementary_groups);
3726 fputs("\n", f);
3727 }
94f04347 3728
5b6319dc 3729 if (c->pam_name)
f2d3769a 3730 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 3731
2a624c36
AP
3732 if (strv_length(c->read_write_paths) > 0) {
3733 fprintf(f, "%sReadWritePaths:", prefix);
3734 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
3735 fputs("\n", f);
3736 }
3737
2a624c36
AP
3738 if (strv_length(c->read_only_paths) > 0) {
3739 fprintf(f, "%sReadOnlyPaths:", prefix);
3740 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
3741 fputs("\n", f);
3742 }
94f04347 3743
2a624c36
AP
3744 if (strv_length(c->inaccessible_paths) > 0) {
3745 fprintf(f, "%sInaccessiblePaths:", prefix);
3746 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
3747 fputs("\n", f);
3748 }
2e22afe9 3749
d2d6c096
LP
3750 if (c->n_bind_mounts > 0)
3751 for (i = 0; i < c->n_bind_mounts; i++) {
3752 fprintf(f, "%s%s: %s:%s:%s\n", prefix,
3753 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
3754 c->bind_mounts[i].source,
3755 c->bind_mounts[i].destination,
3756 c->bind_mounts[i].recursive ? "rbind" : "norbind");
3757 }
3758
169c1bda
LP
3759 if (c->utmp_id)
3760 fprintf(f,
3761 "%sUtmpIdentifier: %s\n",
3762 prefix, c->utmp_id);
7b52a628
MS
3763
3764 if (c->selinux_context)
3765 fprintf(f,
5f8640fb
LP
3766 "%sSELinuxContext: %s%s\n",
3767 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 3768
80c21aea
WC
3769 if (c->apparmor_profile)
3770 fprintf(f,
3771 "%sAppArmorProfile: %s%s\n",
3772 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
3773
3774 if (c->smack_process_label)
3775 fprintf(f,
3776 "%sSmackProcessLabel: %s%s\n",
3777 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
3778
050f7277 3779 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
3780 fprintf(f,
3781 "%sPersonality: %s\n",
3782 prefix, strna(personality_to_string(c->personality)));
3783
78e864e5
TM
3784 fprintf(f,
3785 "%sLockPersonality: %s\n",
3786 prefix, yes_no(c->lock_personality));
3787
17df7223 3788 if (c->syscall_filter) {
351a19b1 3789#ifdef HAVE_SECCOMP
17df7223
LP
3790 Iterator j;
3791 void *id;
3792 bool first = true;
351a19b1 3793#endif
17df7223
LP
3794
3795 fprintf(f,
57183d11 3796 "%sSystemCallFilter: ",
17df7223
LP
3797 prefix);
3798
3799 if (!c->syscall_whitelist)
3800 fputc('~', f);
3801
351a19b1 3802#ifdef HAVE_SECCOMP
17df7223
LP
3803 SET_FOREACH(id, c->syscall_filter, j) {
3804 _cleanup_free_ char *name = NULL;
3805
3806 if (first)
3807 first = false;
3808 else
3809 fputc(' ', f);
3810
57183d11 3811 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223
LP
3812 fputs(strna(name), f);
3813 }
351a19b1 3814#endif
17df7223
LP
3815
3816 fputc('\n', f);
3817 }
3818
57183d11
LP
3819 if (c->syscall_archs) {
3820#ifdef HAVE_SECCOMP
3821 Iterator j;
3822 void *id;
3823#endif
3824
3825 fprintf(f,
3826 "%sSystemCallArchitectures:",
3827 prefix);
3828
3829#ifdef HAVE_SECCOMP
3830 SET_FOREACH(id, c->syscall_archs, j)
3831 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
3832#endif
3833 fputc('\n', f);
3834 }
3835
add00535
LP
3836 if (exec_context_restrict_namespaces_set(c)) {
3837 _cleanup_free_ char *s = NULL;
3838
3839 r = namespace_flag_to_string_many(c->restrict_namespaces, &s);
3840 if (r >= 0)
3841 fprintf(f, "%sRestrictNamespaces: %s\n",
3842 prefix, s);
3843 }
3844
b3267152 3845 if (c->syscall_errno > 0)
17df7223
LP
3846 fprintf(f,
3847 "%sSystemCallErrorNumber: %s\n",
3848 prefix, strna(errno_to_name(c->syscall_errno)));
eef65bf3
MS
3849
3850 if (c->apparmor_profile)
3851 fprintf(f,
3852 "%sAppArmorProfile: %s%s\n",
3853 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
5cb5a6ff
LP
3854}
3855
a931ad47
LP
3856bool exec_context_maintains_privileges(ExecContext *c) {
3857 assert(c);
3858
61233823 3859 /* Returns true if the process forked off would run under
a931ad47
LP
3860 * an unchanged UID or as root. */
3861
3862 if (!c->user)
3863 return true;
3864
3865 if (streq(c->user, "root") || streq(c->user, "0"))
3866 return true;
3867
3868 return false;
3869}
3870
7f452159
LP
3871int exec_context_get_effective_ioprio(ExecContext *c) {
3872 int p;
3873
3874 assert(c);
3875
3876 if (c->ioprio_set)
3877 return c->ioprio;
3878
3879 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
3880 if (p < 0)
3881 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
3882
3883 return p;
3884}
3885
b58b4116 3886void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 3887 assert(s);
5cb5a6ff 3888
b58b4116
LP
3889 zero(*s);
3890 s->pid = pid;
3891 dual_timestamp_get(&s->start_timestamp);
3892}
3893
6ea832a2 3894void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
3895 assert(s);
3896
0b1f4ae6 3897 if (s->pid && s->pid != pid)
b58b4116
LP
3898 zero(*s);
3899
034c6ed7 3900 s->pid = pid;
63983207 3901 dual_timestamp_get(&s->exit_timestamp);
9fb86720 3902
034c6ed7
LP
3903 s->code = code;
3904 s->status = status;
169c1bda 3905
6ea832a2
LP
3906 if (context) {
3907 if (context->utmp_id)
3908 utmp_put_dead_process(context->utmp_id, pid, code, status);
3909
1e22b5cd 3910 exec_context_tty_reset(context, NULL);
6ea832a2 3911 }
9fb86720
LP
3912}
3913
3914void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
3915 char buf[FORMAT_TIMESTAMP_MAX];
3916
3917 assert(s);
3918 assert(f);
3919
9fb86720
LP
3920 if (s->pid <= 0)
3921 return;
3922
4c940960
LP
3923 prefix = strempty(prefix);
3924
9fb86720 3925 fprintf(f,
ccd06097
ZJS
3926 "%sPID: "PID_FMT"\n",
3927 prefix, s->pid);
9fb86720 3928
af9d16e1 3929 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
3930 fprintf(f,
3931 "%sStart Timestamp: %s\n",
63983207 3932 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 3933
af9d16e1 3934 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
3935 fprintf(f,
3936 "%sExit Timestamp: %s\n"
3937 "%sExit Code: %s\n"
3938 "%sExit Status: %i\n",
63983207 3939 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
3940 prefix, sigchld_code_to_string(s->code),
3941 prefix, s->status);
5cb5a6ff 3942}
44d8db9e 3943
9e2f7c11 3944char *exec_command_line(char **argv) {
44d8db9e
LP
3945 size_t k;
3946 char *n, *p, **a;
3947 bool first = true;
3948
9e2f7c11 3949 assert(argv);
44d8db9e 3950
9164977d 3951 k = 1;
9e2f7c11 3952 STRV_FOREACH(a, argv)
44d8db9e
LP
3953 k += strlen(*a)+3;
3954
5cd9cd35
LP
3955 n = new(char, k);
3956 if (!n)
44d8db9e
LP
3957 return NULL;
3958
3959 p = n;
9e2f7c11 3960 STRV_FOREACH(a, argv) {
44d8db9e
LP
3961
3962 if (!first)
3963 *(p++) = ' ';
3964 else
3965 first = false;
3966
3967 if (strpbrk(*a, WHITESPACE)) {
3968 *(p++) = '\'';
3969 p = stpcpy(p, *a);
3970 *(p++) = '\'';
3971 } else
3972 p = stpcpy(p, *a);
3973
3974 }
3975
9164977d
LP
3976 *p = 0;
3977
44d8db9e
LP
3978 /* FIXME: this doesn't really handle arguments that have
3979 * spaces and ticks in them */
3980
3981 return n;
3982}
3983
3984void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 3985 _cleanup_free_ char *cmd = NULL;
4c940960 3986 const char *prefix2;
44d8db9e
LP
3987
3988 assert(c);
3989 assert(f);
3990
4c940960 3991 prefix = strempty(prefix);
63c372cb 3992 prefix2 = strjoina(prefix, "\t");
44d8db9e 3993
9e2f7c11 3994 cmd = exec_command_line(c->argv);
44d8db9e
LP
3995 fprintf(f,
3996 "%sCommand Line: %s\n",
3997 prefix, cmd ? cmd : strerror(ENOMEM));
3998
9fb86720 3999 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
4000}
4001
4002void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4003 assert(f);
4004
4c940960 4005 prefix = strempty(prefix);
44d8db9e
LP
4006
4007 LIST_FOREACH(command, c, c)
4008 exec_command_dump(c, f, prefix);
4009}
94f04347 4010
a6a80b4f
LP
4011void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4012 ExecCommand *end;
4013
4014 assert(l);
4015 assert(e);
4016
4017 if (*l) {
35b8ca3a 4018 /* It's kind of important, that we keep the order here */
71fda00f
LP
4019 LIST_FIND_TAIL(command, *l, end);
4020 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
4021 } else
4022 *l = e;
4023}
4024
26fd040d
LP
4025int exec_command_set(ExecCommand *c, const char *path, ...) {
4026 va_list ap;
4027 char **l, *p;
4028
4029 assert(c);
4030 assert(path);
4031
4032 va_start(ap, path);
4033 l = strv_new_ap(path, ap);
4034 va_end(ap);
4035
4036 if (!l)
4037 return -ENOMEM;
4038
250a918d
LP
4039 p = strdup(path);
4040 if (!p) {
26fd040d
LP
4041 strv_free(l);
4042 return -ENOMEM;
4043 }
4044
4045 free(c->path);
4046 c->path = p;
4047
4048 strv_free(c->argv);
4049 c->argv = l;
4050
4051 return 0;
4052}
4053
86b23b07 4054int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 4055 _cleanup_strv_free_ char **l = NULL;
86b23b07 4056 va_list ap;
86b23b07
JS
4057 int r;
4058
4059 assert(c);
4060 assert(path);
4061
4062 va_start(ap, path);
4063 l = strv_new_ap(path, ap);
4064 va_end(ap);
4065
4066 if (!l)
4067 return -ENOMEM;
4068
e287086b 4069 r = strv_extend_strv(&c->argv, l, false);
e63ff941 4070 if (r < 0)
86b23b07 4071 return r;
86b23b07
JS
4072
4073 return 0;
4074}
4075
4076
613b411c
LP
4077static int exec_runtime_allocate(ExecRuntime **rt) {
4078
4079 if (*rt)
4080 return 0;
4081
4082 *rt = new0(ExecRuntime, 1);
f146f5e1 4083 if (!*rt)
613b411c
LP
4084 return -ENOMEM;
4085
4086 (*rt)->n_ref = 1;
4087 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
4088
4089 return 0;
4090}
4091
4092int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
4093 int r;
4094
4095 assert(rt);
4096 assert(c);
4097 assert(id);
4098
4099 if (*rt)
4100 return 1;
4101
4102 if (!c->private_network && !c->private_tmp)
4103 return 0;
4104
4105 r = exec_runtime_allocate(rt);
4106 if (r < 0)
4107 return r;
4108
4109 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
33df919d 4110 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
613b411c
LP
4111 return -errno;
4112 }
4113
4114 if (c->private_tmp && !(*rt)->tmp_dir) {
4115 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
4116 if (r < 0)
4117 return r;
4118 }
4119
4120 return 1;
4121}
4122
4123ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
4124 assert(r);
4125 assert(r->n_ref > 0);
4126
4127 r->n_ref++;
4128 return r;
4129}
4130
4131ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
4132
4133 if (!r)
4134 return NULL;
4135
4136 assert(r->n_ref > 0);
4137
4138 r->n_ref--;
f2341e0a
LP
4139 if (r->n_ref > 0)
4140 return NULL;
4141
4142 free(r->tmp_dir);
4143 free(r->var_tmp_dir);
4144 safe_close_pair(r->netns_storage_socket);
6b430fdb 4145 return mfree(r);
613b411c
LP
4146}
4147
f2341e0a 4148int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
613b411c
LP
4149 assert(u);
4150 assert(f);
4151 assert(fds);
4152
4153 if (!rt)
4154 return 0;
4155
4156 if (rt->tmp_dir)
4157 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
4158
4159 if (rt->var_tmp_dir)
4160 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
4161
4162 if (rt->netns_storage_socket[0] >= 0) {
4163 int copy;
4164
4165 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4166 if (copy < 0)
4167 return copy;
4168
4169 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
4170 }
4171
4172 if (rt->netns_storage_socket[1] >= 0) {
4173 int copy;
4174
4175 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4176 if (copy < 0)
4177 return copy;
4178
4179 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
4180 }
4181
4182 return 0;
4183}
4184
f2341e0a 4185int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
613b411c
LP
4186 int r;
4187
4188 assert(rt);
4189 assert(key);
4190 assert(value);
4191
4192 if (streq(key, "tmp-dir")) {
4193 char *copy;
4194
4195 r = exec_runtime_allocate(rt);
4196 if (r < 0)
f2341e0a 4197 return log_oom();
613b411c
LP
4198
4199 copy = strdup(value);
4200 if (!copy)
4201 return log_oom();
4202
4203 free((*rt)->tmp_dir);
4204 (*rt)->tmp_dir = copy;
4205
4206 } else if (streq(key, "var-tmp-dir")) {
4207 char *copy;
4208
4209 r = exec_runtime_allocate(rt);
4210 if (r < 0)
f2341e0a 4211 return log_oom();
613b411c
LP
4212
4213 copy = strdup(value);
4214 if (!copy)
4215 return log_oom();
4216
4217 free((*rt)->var_tmp_dir);
4218 (*rt)->var_tmp_dir = copy;
4219
4220 } else if (streq(key, "netns-socket-0")) {
4221 int fd;
4222
4223 r = exec_runtime_allocate(rt);
4224 if (r < 0)
f2341e0a 4225 return log_oom();
613b411c
LP
4226
4227 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 4228 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 4229 else {
03e334a1 4230 safe_close((*rt)->netns_storage_socket[0]);
613b411c
LP
4231 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
4232 }
4233 } else if (streq(key, "netns-socket-1")) {
4234 int fd;
4235
4236 r = exec_runtime_allocate(rt);
4237 if (r < 0)
f2341e0a 4238 return log_oom();
613b411c
LP
4239
4240 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 4241 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 4242 else {
03e334a1 4243 safe_close((*rt)->netns_storage_socket[1]);
613b411c
LP
4244 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
4245 }
4246 } else
4247 return 0;
4248
4249 return 1;
4250}
4251
4252static void *remove_tmpdir_thread(void *p) {
4253 _cleanup_free_ char *path = p;
4254
c6878637 4255 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
613b411c
LP
4256 return NULL;
4257}
4258
4259void exec_runtime_destroy(ExecRuntime *rt) {
98b47d54
LP
4260 int r;
4261
613b411c
LP
4262 if (!rt)
4263 return;
4264
4265 /* If there are multiple users of this, let's leave the stuff around */
4266 if (rt->n_ref > 1)
4267 return;
4268
4269 if (rt->tmp_dir) {
4270 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
98b47d54
LP
4271
4272 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4273 if (r < 0) {
da927ba9 4274 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
98b47d54
LP
4275 free(rt->tmp_dir);
4276 }
4277
613b411c
LP
4278 rt->tmp_dir = NULL;
4279 }
4280
4281 if (rt->var_tmp_dir) {
4282 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
98b47d54
LP
4283
4284 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4285 if (r < 0) {
da927ba9 4286 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
98b47d54
LP
4287 free(rt->var_tmp_dir);
4288 }
4289
613b411c
LP
4290 rt->var_tmp_dir = NULL;
4291 }
4292
3d94f76c 4293 safe_close_pair(rt->netns_storage_socket);
613b411c
LP
4294}
4295
80876c20
LP
4296static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
4297 [EXEC_INPUT_NULL] = "null",
4298 [EXEC_INPUT_TTY] = "tty",
4299 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 4300 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
4301 [EXEC_INPUT_SOCKET] = "socket",
4302 [EXEC_INPUT_NAMED_FD] = "fd",
80876c20
LP
4303};
4304
8a0867d6
LP
4305DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4306
94f04347 4307static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 4308 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 4309 [EXEC_OUTPUT_NULL] = "null",
80876c20 4310 [EXEC_OUTPUT_TTY] = "tty",
94f04347 4311 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 4312 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 4313 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 4314 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
4315 [EXEC_OUTPUT_JOURNAL] = "journal",
4316 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
4317 [EXEC_OUTPUT_SOCKET] = "socket",
4318 [EXEC_OUTPUT_NAMED_FD] = "fd",
94f04347
LP
4319};
4320
4321DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
4322
4323static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4324 [EXEC_UTMP_INIT] = "init",
4325 [EXEC_UTMP_LOGIN] = "login",
4326 [EXEC_UTMP_USER] = "user",
4327};
4328
4329DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
4330
4331static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
4332 [EXEC_PRESERVE_NO] = "no",
4333 [EXEC_PRESERVE_YES] = "yes",
4334 [EXEC_PRESERVE_RESTART] = "restart",
4335};
4336
4337DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e
YW
4338
4339static const char* const exec_directory_type_table[_EXEC_DIRECTORY_MAX] = {
4340 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
4341 [EXEC_DIRECTORY_STATE] = "StateDirectory",
4342 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
4343 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
4344 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
4345};
4346
4347DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);