]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
Merge pull request #6830 from keszybz/generator-dirs
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
a7334b09
LP
1/***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
a7334b09
LP
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 14 Lesser General Public License for more details.
a7334b09 15
5430f7f2 16 You should have received a copy of the GNU Lesser General Public License
a7334b09
LP
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
034c6ed7
LP
20#include <errno.h>
21#include <fcntl.h>
8dd4c05b
LP
22#include <glob.h>
23#include <grp.h>
24#include <poll.h>
309bff19 25#include <signal.h>
8dd4c05b 26#include <string.h>
19c0b0b9 27#include <sys/capability.h>
d251207d 28#include <sys/eventfd.h>
f3e43635 29#include <sys/mman.h>
8dd4c05b 30#include <sys/personality.h>
94f04347 31#include <sys/prctl.h>
d2ffa389 32#include <sys/shm.h>
8dd4c05b 33#include <sys/socket.h>
451a074f 34#include <sys/stat.h>
d2ffa389 35#include <sys/types.h>
8dd4c05b
LP
36#include <sys/un.h>
37#include <unistd.h>
023a4f67 38#include <utmpx.h>
5cb5a6ff 39
5b6319dc
LP
40#ifdef HAVE_PAM
41#include <security/pam_appl.h>
42#endif
43
7b52a628
MS
44#ifdef HAVE_SELINUX
45#include <selinux/selinux.h>
46#endif
47
17df7223
LP
48#ifdef HAVE_SECCOMP
49#include <seccomp.h>
50#endif
51
eef65bf3
MS
52#ifdef HAVE_APPARMOR
53#include <sys/apparmor.h>
54#endif
55
24882e06 56#include "sd-messages.h"
8dd4c05b
LP
57
58#include "af-list.h"
b5efdb8a 59#include "alloc-util.h"
3ffd4af2
LP
60#ifdef HAVE_APPARMOR
61#include "apparmor-util.h"
62#endif
8dd4c05b
LP
63#include "async.h"
64#include "barrier.h"
8dd4c05b 65#include "cap-list.h"
430f0182 66#include "capability-util.h"
f6a6225e 67#include "def.h"
4d1a6904 68#include "env-util.h"
17df7223 69#include "errno-list.h"
3ffd4af2 70#include "execute.h"
8dd4c05b 71#include "exit-status.h"
3ffd4af2 72#include "fd-util.h"
8dd4c05b 73#include "fileio.h"
f97b34a6 74#include "format-util.h"
f4f15635 75#include "fs-util.h"
7d50b32a 76#include "glob-util.h"
c004493c 77#include "io-util.h"
8dd4c05b
LP
78#include "ioprio.h"
79#include "log.h"
80#include "macro.h"
81#include "missing.h"
82#include "mkdir.h"
83#include "namespace.h"
6bedfcbb 84#include "parse-util.h"
8dd4c05b 85#include "path-util.h"
0b452006 86#include "process-util.h"
78f22b97 87#include "rlimit-util.h"
8dd4c05b 88#include "rm-rf.h"
3ffd4af2
LP
89#ifdef HAVE_SECCOMP
90#include "seccomp-util.h"
91#endif
8dd4c05b 92#include "securebits.h"
07d46372 93#include "securebits-util.h"
8dd4c05b 94#include "selinux-util.h"
24882e06 95#include "signal-util.h"
8dd4c05b 96#include "smack-util.h"
fd63e712 97#include "special.h"
8b43440b 98#include "string-table.h"
07630cea 99#include "string-util.h"
8dd4c05b 100#include "strv.h"
7ccbd1ae 101#include "syslog-util.h"
8dd4c05b
LP
102#include "terminal-util.h"
103#include "unit.h"
b1d4f8e1 104#include "user-util.h"
8dd4c05b
LP
105#include "util.h"
106#include "utmp-wtmp.h"
5cb5a6ff 107
e056b01d 108#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 109#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 110
02a51aba
LP
111/* This assumes there is a 'tty' group */
112#define TTY_MODE 0620
113
531dca78
LP
114#define SNDBUF_SIZE (8*1024*1024)
115
034c6ed7
LP
116static int shift_fds(int fds[], unsigned n_fds) {
117 int start, restart_from;
118
119 if (n_fds <= 0)
120 return 0;
121
a0d40ac5
LP
122 /* Modifies the fds array! (sorts it) */
123
034c6ed7
LP
124 assert(fds);
125
126 start = 0;
127 for (;;) {
128 int i;
129
130 restart_from = -1;
131
132 for (i = start; i < (int) n_fds; i++) {
133 int nfd;
134
135 /* Already at right index? */
136 if (fds[i] == i+3)
137 continue;
138
3cc2aff1
LP
139 nfd = fcntl(fds[i], F_DUPFD, i + 3);
140 if (nfd < 0)
034c6ed7
LP
141 return -errno;
142
03e334a1 143 safe_close(fds[i]);
034c6ed7
LP
144 fds[i] = nfd;
145
146 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 147 * let's remember that and try again from here */
034c6ed7
LP
148 if (nfd != i+3 && restart_from < 0)
149 restart_from = i;
150 }
151
152 if (restart_from < 0)
153 break;
154
155 start = restart_from;
156 }
157
158 return 0;
159}
160
4c47affc
FB
161static int flags_fds(const int fds[], unsigned n_storage_fds, unsigned n_socket_fds, bool nonblock) {
162 unsigned i, n_fds;
e2c76839 163 int r;
47a71eed 164
4c47affc 165 n_fds = n_storage_fds + n_socket_fds;
47a71eed
LP
166 if (n_fds <= 0)
167 return 0;
168
169 assert(fds);
170
9b141911
FB
171 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
172 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
173
174 for (i = 0; i < n_fds; i++) {
47a71eed 175
9b141911
FB
176 if (i < n_socket_fds) {
177 r = fd_nonblock(fds[i], nonblock);
178 if (r < 0)
179 return r;
180 }
47a71eed 181
451a074f
LP
182 /* We unconditionally drop FD_CLOEXEC from the fds,
183 * since after all we want to pass these fds to our
184 * children */
47a71eed 185
3cc2aff1
LP
186 r = fd_cloexec(fds[i], false);
187 if (r < 0)
e2c76839 188 return r;
47a71eed
LP
189 }
190
191 return 0;
192}
193
1e22b5cd 194static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
195 assert(context);
196
1e22b5cd
LP
197 if (context->stdio_as_fds)
198 return NULL;
199
80876c20
LP
200 if (context->tty_path)
201 return context->tty_path;
202
203 return "/dev/console";
204}
205
1e22b5cd
LP
206static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
207 const char *path;
208
6ea832a2
LP
209 assert(context);
210
1e22b5cd 211 path = exec_context_tty_path(context);
6ea832a2 212
1e22b5cd
LP
213 if (context->tty_vhangup) {
214 if (p && p->stdin_fd >= 0)
215 (void) terminal_vhangup_fd(p->stdin_fd);
216 else if (path)
217 (void) terminal_vhangup(path);
218 }
6ea832a2 219
1e22b5cd
LP
220 if (context->tty_reset) {
221 if (p && p->stdin_fd >= 0)
222 (void) reset_terminal_fd(p->stdin_fd, true);
223 else if (path)
224 (void) reset_terminal(path);
225 }
226
227 if (context->tty_vt_disallocate && path)
228 (void) vt_disallocate(path);
6ea832a2
LP
229}
230
6af760f3
LP
231static bool is_terminal_input(ExecInput i) {
232 return IN_SET(i,
233 EXEC_INPUT_TTY,
234 EXEC_INPUT_TTY_FORCE,
235 EXEC_INPUT_TTY_FAIL);
236}
237
3a1286b6 238static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
239 return IN_SET(o,
240 EXEC_OUTPUT_TTY,
241 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
242 EXEC_OUTPUT_KMSG_AND_CONSOLE,
243 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
244}
245
aac8c0c3
LP
246static bool is_syslog_output(ExecOutput o) {
247 return IN_SET(o,
248 EXEC_OUTPUT_SYSLOG,
249 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
250}
251
252static bool is_kmsg_output(ExecOutput o) {
253 return IN_SET(o,
254 EXEC_OUTPUT_KMSG,
255 EXEC_OUTPUT_KMSG_AND_CONSOLE);
256}
257
6af760f3
LP
258static bool exec_context_needs_term(const ExecContext *c) {
259 assert(c);
260
261 /* Return true if the execution context suggests we should set $TERM to something useful. */
262
263 if (is_terminal_input(c->std_input))
264 return true;
265
266 if (is_terminal_output(c->std_output))
267 return true;
268
269 if (is_terminal_output(c->std_error))
270 return true;
271
272 return !!c->tty_path;
3a1286b6
MS
273}
274
80876c20
LP
275static int open_null_as(int flags, int nfd) {
276 int fd, r;
071830ff 277
80876c20 278 assert(nfd >= 0);
071830ff 279
613b411c
LP
280 fd = open("/dev/null", flags|O_NOCTTY);
281 if (fd < 0)
071830ff
LP
282 return -errno;
283
80876c20
LP
284 if (fd != nfd) {
285 r = dup2(fd, nfd) < 0 ? -errno : nfd;
03e334a1 286 safe_close(fd);
80876c20
LP
287 } else
288 r = nfd;
071830ff 289
80876c20 290 return r;
071830ff
LP
291}
292
524daa8c 293static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 294 static const union sockaddr_union sa = {
b92bea5d
ZJS
295 .un.sun_family = AF_UNIX,
296 .un.sun_path = "/run/systemd/journal/stdout",
297 };
524daa8c
ZJS
298 uid_t olduid = UID_INVALID;
299 gid_t oldgid = GID_INVALID;
300 int r;
301
cad93f29 302 if (gid_is_valid(gid)) {
524daa8c
ZJS
303 oldgid = getgid();
304
92a17af9 305 if (setegid(gid) < 0)
524daa8c
ZJS
306 return -errno;
307 }
308
cad93f29 309 if (uid_is_valid(uid)) {
524daa8c
ZJS
310 olduid = getuid();
311
92a17af9 312 if (seteuid(uid) < 0) {
524daa8c
ZJS
313 r = -errno;
314 goto restore_gid;
315 }
316 }
317
92a17af9 318 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
319
320 /* If we fail to restore the uid or gid, things will likely
321 fail later on. This should only happen if an LSM interferes. */
322
cad93f29 323 if (uid_is_valid(uid))
524daa8c
ZJS
324 (void) seteuid(olduid);
325
326 restore_gid:
cad93f29 327 if (gid_is_valid(gid))
524daa8c
ZJS
328 (void) setegid(oldgid);
329
330 return r;
331}
332
fd1f9c89 333static int connect_logger_as(
7a1ab780 334 Unit *unit,
fd1f9c89 335 const ExecContext *context,
af635cf3 336 const ExecParameters *params,
fd1f9c89
LP
337 ExecOutput output,
338 const char *ident,
fd1f9c89
LP
339 int nfd,
340 uid_t uid,
341 gid_t gid) {
342
524daa8c 343 int fd, r;
071830ff
LP
344
345 assert(context);
af635cf3 346 assert(params);
80876c20
LP
347 assert(output < _EXEC_OUTPUT_MAX);
348 assert(ident);
349 assert(nfd >= 0);
071830ff 350
54fe0cdb
LP
351 fd = socket(AF_UNIX, SOCK_STREAM, 0);
352 if (fd < 0)
80876c20 353 return -errno;
071830ff 354
524daa8c
ZJS
355 r = connect_journal_socket(fd, uid, gid);
356 if (r < 0)
357 return r;
071830ff 358
80876c20 359 if (shutdown(fd, SHUT_RD) < 0) {
03e334a1 360 safe_close(fd);
80876c20
LP
361 return -errno;
362 }
071830ff 363
fd1f9c89 364 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 365
80876c20 366 dprintf(fd,
62bca2c6 367 "%s\n"
80876c20
LP
368 "%s\n"
369 "%i\n"
54fe0cdb
LP
370 "%i\n"
371 "%i\n"
372 "%i\n"
4f4a1dbf 373 "%i\n",
c867611e 374 context->syslog_identifier ?: ident,
af635cf3 375 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
376 context->syslog_priority,
377 !!context->syslog_level_prefix,
aac8c0c3
LP
378 is_syslog_output(output),
379 is_kmsg_output(output),
3a1286b6 380 is_terminal_output(output));
80876c20 381
fd1f9c89
LP
382 if (fd == nfd)
383 return nfd;
384
385 r = dup2(fd, nfd) < 0 ? -errno : nfd;
386 safe_close(fd);
071830ff 387
80876c20
LP
388 return r;
389}
390static int open_terminal_as(const char *path, mode_t mode, int nfd) {
391 int fd, r;
071830ff 392
80876c20
LP
393 assert(path);
394 assert(nfd >= 0);
071830ff 395
3cc2aff1
LP
396 fd = open_terminal(path, mode | O_NOCTTY);
397 if (fd < 0)
80876c20 398 return fd;
071830ff 399
80876c20
LP
400 if (fd != nfd) {
401 r = dup2(fd, nfd) < 0 ? -errno : nfd;
03e334a1 402 safe_close(fd);
80876c20
LP
403 } else
404 r = nfd;
071830ff 405
80876c20
LP
406 return r;
407}
071830ff 408
1e3ad081
LP
409static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
410
411 if (is_terminal_input(std_input) && !apply_tty_stdin)
412 return EXEC_INPUT_NULL;
071830ff 413
03fd9c49 414 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
415 return EXEC_INPUT_NULL;
416
03fd9c49 417 return std_input;
4f2d528d
LP
418}
419
03fd9c49 420static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 421
03fd9c49 422 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
423 return EXEC_OUTPUT_INHERIT;
424
03fd9c49 425 return std_output;
4f2d528d
LP
426}
427
a34ceba6
LP
428static int setup_input(
429 const ExecContext *context,
430 const ExecParameters *params,
52c239d7
LB
431 int socket_fd,
432 int named_iofds[3]) {
a34ceba6 433
4f2d528d
LP
434 ExecInput i;
435
436 assert(context);
a34ceba6
LP
437 assert(params);
438
439 if (params->stdin_fd >= 0) {
440 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
441 return -errno;
442
443 /* Try to make this the controlling tty, if it is a tty, and reset it */
444 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
445 (void) reset_terminal_fd(STDIN_FILENO, true);
446
447 return STDIN_FILENO;
448 }
4f2d528d 449
c39f1ce2 450 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
451
452 switch (i) {
071830ff 453
80876c20
LP
454 case EXEC_INPUT_NULL:
455 return open_null_as(O_RDONLY, STDIN_FILENO);
456
457 case EXEC_INPUT_TTY:
458 case EXEC_INPUT_TTY_FORCE:
459 case EXEC_INPUT_TTY_FAIL: {
460 int fd, r;
071830ff 461
1e22b5cd 462 fd = acquire_terminal(exec_context_tty_path(context),
970edce6
ZJS
463 i == EXEC_INPUT_TTY_FAIL,
464 i == EXEC_INPUT_TTY_FORCE,
465 false,
3a43da28 466 USEC_INFINITY);
970edce6 467 if (fd < 0)
80876c20
LP
468 return fd;
469
470 if (fd != STDIN_FILENO) {
471 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
03e334a1 472 safe_close(fd);
80876c20
LP
473 } else
474 r = STDIN_FILENO;
475
476 return r;
477 }
478
4f2d528d
LP
479 case EXEC_INPUT_SOCKET:
480 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
481
52c239d7
LB
482 case EXEC_INPUT_NAMED_FD:
483 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
484 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
485
80876c20
LP
486 default:
487 assert_not_reached("Unknown input type");
488 }
489}
490
a34ceba6
LP
491static int setup_output(
492 Unit *unit,
493 const ExecContext *context,
494 const ExecParameters *params,
495 int fileno,
496 int socket_fd,
52c239d7 497 int named_iofds[3],
a34ceba6 498 const char *ident,
7bce046b
LP
499 uid_t uid,
500 gid_t gid,
501 dev_t *journal_stream_dev,
502 ino_t *journal_stream_ino) {
a34ceba6 503
4f2d528d
LP
504 ExecOutput o;
505 ExecInput i;
47c1d80d 506 int r;
4f2d528d 507
f2341e0a 508 assert(unit);
80876c20 509 assert(context);
a34ceba6 510 assert(params);
80876c20 511 assert(ident);
7bce046b
LP
512 assert(journal_stream_dev);
513 assert(journal_stream_ino);
80876c20 514
a34ceba6
LP
515 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
516
517 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
518 return -errno;
519
520 return STDOUT_FILENO;
521 }
522
523 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
524 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
525 return -errno;
526
527 return STDERR_FILENO;
528 }
529
c39f1ce2 530 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 531 o = fixup_output(context->std_output, socket_fd);
4f2d528d 532
eb17e935
MS
533 if (fileno == STDERR_FILENO) {
534 ExecOutput e;
535 e = fixup_output(context->std_error, socket_fd);
80876c20 536
eb17e935
MS
537 /* This expects the input and output are already set up */
538
539 /* Don't change the stderr file descriptor if we inherit all
540 * the way and are not on a tty */
541 if (e == EXEC_OUTPUT_INHERIT &&
542 o == EXEC_OUTPUT_INHERIT &&
543 i == EXEC_INPUT_NULL &&
544 !is_terminal_input(context->std_input) &&
545 getppid () != 1)
546 return fileno;
547
548 /* Duplicate from stdout if possible */
52c239d7 549 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
eb17e935 550 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 551
eb17e935 552 o = e;
80876c20 553
eb17e935 554 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
555 /* If input got downgraded, inherit the original value */
556 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 557 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 558
acb591e4 559 /* If the input is connected to anything that's not a /dev/null, inherit that... */
ff876e28 560 if (i != EXEC_INPUT_NULL)
eb17e935 561 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 562
acb591e4
LP
563 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
564 if (getppid() != 1)
eb17e935 565 return fileno;
94f04347 566
eb17e935
MS
567 /* We need to open /dev/null here anew, to get the right access mode. */
568 return open_null_as(O_WRONLY, fileno);
071830ff 569 }
94f04347 570
eb17e935 571 switch (o) {
80876c20
LP
572
573 case EXEC_OUTPUT_NULL:
eb17e935 574 return open_null_as(O_WRONLY, fileno);
80876c20
LP
575
576 case EXEC_OUTPUT_TTY:
4f2d528d 577 if (is_terminal_input(i))
eb17e935 578 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
579
580 /* We don't reset the terminal if this is just about output */
1e22b5cd 581 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
582
583 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 584 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 585 case EXEC_OUTPUT_KMSG:
28dbc1e8 586 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
587 case EXEC_OUTPUT_JOURNAL:
588 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 589 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 590 if (r < 0) {
f2341e0a 591 log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 592 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
593 } else {
594 struct stat st;
595
596 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
597 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
598 * services to detect whether they are connected to the journal or not.
599 *
600 * If both stdout and stderr are connected to a stream then let's make sure to store the data
601 * about STDERR as that's usually the best way to do logging. */
7bce046b 602
ab2116b1
LP
603 if (fstat(fileno, &st) >= 0 &&
604 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
605 *journal_stream_dev = st.st_dev;
606 *journal_stream_ino = st.st_ino;
607 }
47c1d80d
MS
608 }
609 return r;
4f2d528d
LP
610
611 case EXEC_OUTPUT_SOCKET:
612 assert(socket_fd >= 0);
eb17e935 613 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 614
52c239d7
LB
615 case EXEC_OUTPUT_NAMED_FD:
616 (void) fd_nonblock(named_iofds[fileno], false);
617 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
618
94f04347 619 default:
80876c20 620 assert_not_reached("Unknown error type");
94f04347 621 }
071830ff
LP
622}
623
02a51aba
LP
624static int chown_terminal(int fd, uid_t uid) {
625 struct stat st;
626
627 assert(fd >= 0);
02a51aba 628
1ff74fb6
LP
629 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
630 if (isatty(fd) < 1)
631 return 0;
632
02a51aba 633 /* This might fail. What matters are the results. */
bab45044
LP
634 (void) fchown(fd, uid, -1);
635 (void) fchmod(fd, TTY_MODE);
02a51aba
LP
636
637 if (fstat(fd, &st) < 0)
638 return -errno;
639
d8b4e2e9 640 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
02a51aba
LP
641 return -EPERM;
642
643 return 0;
644}
645
7d5ceb64 646static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
647 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
648 int r;
80876c20 649
80876c20
LP
650 assert(_saved_stdin);
651 assert(_saved_stdout);
652
af6da548
LP
653 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
654 if (saved_stdin < 0)
655 return -errno;
80876c20 656
af6da548 657 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
658 if (saved_stdout < 0)
659 return -errno;
80876c20 660
7d5ceb64 661 fd = acquire_terminal(vc, false, false, false, DEFAULT_CONFIRM_USEC);
3d18b167
LP
662 if (fd < 0)
663 return fd;
80876c20 664
af6da548
LP
665 r = chown_terminal(fd, getuid());
666 if (r < 0)
3d18b167 667 return r;
02a51aba 668
3d18b167
LP
669 r = reset_terminal_fd(fd, true);
670 if (r < 0)
671 return r;
80876c20 672
3d18b167
LP
673 if (dup2(fd, STDIN_FILENO) < 0)
674 return -errno;
675
676 if (dup2(fd, STDOUT_FILENO) < 0)
677 return -errno;
80876c20
LP
678
679 if (fd >= 2)
03e334a1 680 safe_close(fd);
3d18b167 681 fd = -1;
80876c20
LP
682
683 *_saved_stdin = saved_stdin;
684 *_saved_stdout = saved_stdout;
685
3d18b167 686 saved_stdin = saved_stdout = -1;
80876c20 687
3d18b167 688 return 0;
80876c20
LP
689}
690
63d77c92 691static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
692 assert(err < 0);
693
694 if (err == -ETIMEDOUT)
63d77c92 695 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
696 else {
697 errno = -err;
63d77c92 698 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
699 }
700}
701
63d77c92 702static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 703 _cleanup_close_ int fd = -1;
80876c20 704
3b20f877 705 assert(vc);
80876c20 706
7d5ceb64 707 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 708 if (fd < 0)
3b20f877 709 return;
80876c20 710
63d77c92 711 write_confirm_error_fd(err, fd, u);
af6da548 712}
80876c20 713
3d18b167 714static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 715 int r = 0;
80876c20 716
af6da548
LP
717 assert(saved_stdin);
718 assert(saved_stdout);
719
720 release_terminal();
721
722 if (*saved_stdin >= 0)
80876c20 723 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 724 r = -errno;
80876c20 725
af6da548 726 if (*saved_stdout >= 0)
80876c20 727 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 728 r = -errno;
80876c20 729
3d18b167
LP
730 *saved_stdin = safe_close(*saved_stdin);
731 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
732
733 return r;
734}
735
3b20f877
FB
736enum {
737 CONFIRM_PRETEND_FAILURE = -1,
738 CONFIRM_PRETEND_SUCCESS = 0,
739 CONFIRM_EXECUTE = 1,
740};
741
eedf223a 742static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 743 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 744 _cleanup_free_ char *e = NULL;
3b20f877 745 char c;
af6da548 746
3b20f877 747 /* For any internal errors, assume a positive response. */
7d5ceb64 748 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 749 if (r < 0) {
63d77c92 750 write_confirm_error(r, vc, u);
3b20f877
FB
751 return CONFIRM_EXECUTE;
752 }
af6da548 753
b0eb2944
FB
754 /* confirm_spawn might have been disabled while we were sleeping. */
755 if (manager_is_confirm_spawn_disabled(u->manager)) {
756 r = 1;
757 goto restore_stdio;
758 }
af6da548 759
2bcd3c26
FB
760 e = ellipsize(cmdline, 60, 100);
761 if (!e) {
762 log_oom();
763 r = CONFIRM_EXECUTE;
764 goto restore_stdio;
765 }
af6da548 766
d172b175 767 for (;;) {
539622bd 768 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 769 if (r < 0) {
63d77c92 770 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
771 r = CONFIRM_EXECUTE;
772 goto restore_stdio;
773 }
af6da548 774
d172b175 775 switch (c) {
b0eb2944
FB
776 case 'c':
777 printf("Resuming normal execution.\n");
778 manager_disable_confirm_spawn();
779 r = 1;
780 break;
dd6f9ac0
FB
781 case 'D':
782 unit_dump(u, stdout, " ");
783 continue; /* ask again */
d172b175
FB
784 case 'f':
785 printf("Failing execution.\n");
786 r = CONFIRM_PRETEND_FAILURE;
787 break;
788 case 'h':
b0eb2944
FB
789 printf(" c - continue, proceed without asking anymore\n"
790 " D - dump, show the state of the unit\n"
dd6f9ac0 791 " f - fail, don't execute the command and pretend it failed\n"
d172b175 792 " h - help\n"
eedf223a 793 " i - info, show a short summary of the unit\n"
56fde33a 794 " j - jobs, show jobs that are in progress\n"
d172b175
FB
795 " s - skip, don't execute the command and pretend it succeeded\n"
796 " y - yes, execute the command\n");
dd6f9ac0 797 continue; /* ask again */
eedf223a
FB
798 case 'i':
799 printf(" Description: %s\n"
800 " Unit: %s\n"
801 " Command: %s\n",
802 u->id, u->description, cmdline);
803 continue; /* ask again */
56fde33a
FB
804 case 'j':
805 manager_dump_jobs(u->manager, stdout, " ");
806 continue; /* ask again */
539622bd
FB
807 case 'n':
808 /* 'n' was removed in favor of 'f'. */
809 printf("Didn't understand 'n', did you mean 'f'?\n");
810 continue; /* ask again */
d172b175
FB
811 case 's':
812 printf("Skipping execution.\n");
813 r = CONFIRM_PRETEND_SUCCESS;
814 break;
815 case 'y':
816 r = CONFIRM_EXECUTE;
817 break;
818 default:
819 assert_not_reached("Unhandled choice");
820 }
3b20f877 821 break;
3b20f877 822 }
af6da548 823
3b20f877 824restore_stdio:
af6da548 825 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 826 return r;
80876c20
LP
827}
828
4d885bd3
DH
829static int get_fixed_user(const ExecContext *c, const char **user,
830 uid_t *uid, gid_t *gid,
831 const char **home, const char **shell) {
81a2b7ce 832 int r;
4d885bd3 833 const char *name;
81a2b7ce 834
4d885bd3 835 assert(c);
81a2b7ce 836
23deef88
LP
837 if (!c->user)
838 return 0;
839
4d885bd3
DH
840 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
841 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 842
23deef88 843 name = c->user;
4d885bd3
DH
844 r = get_user_creds_clean(&name, uid, gid, home, shell);
845 if (r < 0)
846 return r;
81a2b7ce 847
4d885bd3
DH
848 *user = name;
849 return 0;
850}
851
852static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
853 int r;
854 const char *name;
855
856 assert(c);
857
858 if (!c->group)
859 return 0;
860
861 name = c->group;
862 r = get_group_creds(&name, gid);
863 if (r < 0)
864 return r;
865
866 *group = name;
867 return 0;
868}
869
cdc5d5c5
DH
870static int get_supplementary_groups(const ExecContext *c, const char *user,
871 const char *group, gid_t gid,
872 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
873 char **i;
874 int r, k = 0;
875 int ngroups_max;
876 bool keep_groups = false;
877 gid_t *groups = NULL;
878 _cleanup_free_ gid_t *l_gids = NULL;
879
880 assert(c);
881
bbeea271
DH
882 /*
883 * If user is given, then lookup GID and supplementary groups list.
884 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
885 * here and as early as possible so we keep the list of supplementary
886 * groups of the caller.
bbeea271
DH
887 */
888 if (user && gid_is_valid(gid) && gid != 0) {
889 /* First step, initialize groups from /etc/groups */
890 if (initgroups(user, gid) < 0)
891 return -errno;
892
893 keep_groups = true;
894 }
895
4d885bd3
DH
896 if (!c->supplementary_groups)
897 return 0;
898
366ddd25
DH
899 /*
900 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
901 * be positive, otherwise fail.
902 */
903 errno = 0;
904 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
905 if (ngroups_max <= 0) {
906 if (errno > 0)
907 return -errno;
908 else
909 return -EOPNOTSUPP; /* For all other values */
910 }
911
4d885bd3
DH
912 l_gids = new(gid_t, ngroups_max);
913 if (!l_gids)
914 return -ENOMEM;
81a2b7ce 915
4d885bd3
DH
916 if (keep_groups) {
917 /*
918 * Lookup the list of groups that the user belongs to, we
919 * avoid NSS lookups here too for gid=0.
920 */
921 k = ngroups_max;
922 if (getgrouplist(user, gid, l_gids, &k) < 0)
923 return -EINVAL;
924 } else
925 k = 0;
81a2b7ce 926
4d885bd3
DH
927 STRV_FOREACH(i, c->supplementary_groups) {
928 const char *g;
81a2b7ce 929
4d885bd3
DH
930 if (k >= ngroups_max)
931 return -E2BIG;
81a2b7ce 932
4d885bd3
DH
933 g = *i;
934 r = get_group_creds(&g, l_gids+k);
935 if (r < 0)
936 return r;
81a2b7ce 937
4d885bd3
DH
938 k++;
939 }
81a2b7ce 940
4d885bd3
DH
941 /*
942 * Sets ngids to zero to drop all supplementary groups, happens
943 * when we are under root and SupplementaryGroups= is empty.
944 */
945 if (k == 0) {
946 *ngids = 0;
947 return 0;
948 }
81a2b7ce 949
4d885bd3
DH
950 /* Otherwise get the final list of supplementary groups */
951 groups = memdup(l_gids, sizeof(gid_t) * k);
952 if (!groups)
953 return -ENOMEM;
954
955 *supplementary_gids = groups;
956 *ngids = k;
957
958 groups = NULL;
959
960 return 0;
961}
962
963static int enforce_groups(const ExecContext *context, gid_t gid,
964 gid_t *supplementary_gids, int ngids) {
965 int r;
966
967 assert(context);
968
969 /* Handle SupplementaryGroups= even if it is empty */
970 if (context->supplementary_groups) {
971 r = maybe_setgroups(ngids, supplementary_gids);
972 if (r < 0)
97f0e76f 973 return r;
4d885bd3 974 }
81a2b7ce 975
4d885bd3
DH
976 if (gid_is_valid(gid)) {
977 /* Then set our gids */
978 if (setresgid(gid, gid, gid) < 0)
979 return -errno;
81a2b7ce
LP
980 }
981
982 return 0;
983}
984
985static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
986 assert(context);
987
4d885bd3
DH
988 if (!uid_is_valid(uid))
989 return 0;
990
479050b3 991 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
992 * capabilities while doing so. */
993
479050b3 994 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
995
996 /* First step: If we need to keep capabilities but
997 * drop privileges we need to make sure we keep our
cbb21cca 998 * caps, while we drop privileges. */
693ced48 999 if (uid != 0) {
cbb21cca 1000 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1001
1002 if (prctl(PR_GET_SECUREBITS) != sb)
1003 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1004 return -errno;
1005 }
81a2b7ce
LP
1006 }
1007
479050b3 1008 /* Second step: actually set the uids */
81a2b7ce
LP
1009 if (setresuid(uid, uid, uid) < 0)
1010 return -errno;
1011
1012 /* At this point we should have all necessary capabilities but
1013 are otherwise a normal user. However, the caps might got
1014 corrupted due to the setresuid() so we need clean them up
1015 later. This is done outside of this call. */
1016
1017 return 0;
1018}
1019
5b6319dc
LP
1020#ifdef HAVE_PAM
1021
1022static int null_conv(
1023 int num_msg,
1024 const struct pam_message **msg,
1025 struct pam_response **resp,
1026 void *appdata_ptr) {
1027
1028 /* We don't support conversations */
1029
1030 return PAM_CONV_ERR;
1031}
1032
cefc33ae
LP
1033#endif
1034
5b6319dc
LP
1035static int setup_pam(
1036 const char *name,
1037 const char *user,
940c5210 1038 uid_t uid,
2d6fce8d 1039 gid_t gid,
5b6319dc 1040 const char *tty,
2065ca69 1041 char ***env,
5b6319dc
LP
1042 int fds[], unsigned n_fds) {
1043
cefc33ae
LP
1044#ifdef HAVE_PAM
1045
5b6319dc
LP
1046 static const struct pam_conv conv = {
1047 .conv = null_conv,
1048 .appdata_ptr = NULL
1049 };
1050
2d7c6aa2 1051 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1052 pam_handle_t *handle = NULL;
d6e5f3ad 1053 sigset_t old_ss;
7bb70b6e 1054 int pam_code = PAM_SUCCESS, r;
84eada2f 1055 char **nv, **e = NULL;
5b6319dc
LP
1056 bool close_session = false;
1057 pid_t pam_pid = 0, parent_pid;
970edce6 1058 int flags = 0;
5b6319dc
LP
1059
1060 assert(name);
1061 assert(user);
2065ca69 1062 assert(env);
5b6319dc
LP
1063
1064 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1065 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1066 * systemd via the cgroup logic. It will then remove the PAM
1067 * session again. The parent process will exec() the actual
1068 * daemon. We do things this way to ensure that the main PID
1069 * of the daemon is the one we initially fork()ed. */
1070
7bb70b6e
LP
1071 r = barrier_create(&barrier);
1072 if (r < 0)
2d7c6aa2
DH
1073 goto fail;
1074
553d2243 1075 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1076 flags |= PAM_SILENT;
1077
f546241b
ZJS
1078 pam_code = pam_start(name, user, &conv, &handle);
1079 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1080 handle = NULL;
1081 goto fail;
1082 }
1083
f546241b
ZJS
1084 if (tty) {
1085 pam_code = pam_set_item(handle, PAM_TTY, tty);
1086 if (pam_code != PAM_SUCCESS)
5b6319dc 1087 goto fail;
f546241b 1088 }
5b6319dc 1089
84eada2f
JW
1090 STRV_FOREACH(nv, *env) {
1091 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1092 if (pam_code != PAM_SUCCESS)
1093 goto fail;
1094 }
1095
970edce6 1096 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1097 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1098 goto fail;
1099
970edce6 1100 pam_code = pam_open_session(handle, flags);
f546241b 1101 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1102 goto fail;
1103
1104 close_session = true;
1105
f546241b
ZJS
1106 e = pam_getenvlist(handle);
1107 if (!e) {
5b6319dc
LP
1108 pam_code = PAM_BUF_ERR;
1109 goto fail;
1110 }
1111
1112 /* Block SIGTERM, so that we know that it won't get lost in
1113 * the child */
ce30c8dc 1114
72c0a2c2 1115 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1116
df0ff127 1117 parent_pid = getpid_cached();
5b6319dc 1118
f546241b 1119 pam_pid = fork();
7bb70b6e
LP
1120 if (pam_pid < 0) {
1121 r = -errno;
5b6319dc 1122 goto fail;
7bb70b6e 1123 }
5b6319dc
LP
1124
1125 if (pam_pid == 0) {
7bb70b6e 1126 int sig, ret = EXIT_PAM;
5b6319dc
LP
1127
1128 /* The child's job is to reset the PAM session on
1129 * termination */
2d7c6aa2 1130 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc
LP
1131
1132 /* This string must fit in 10 chars (i.e. the length
5d6b1584
LP
1133 * of "/sbin/init"), to look pretty in /bin/ps */
1134 rename_process("(sd-pam)");
5b6319dc
LP
1135
1136 /* Make sure we don't keep open the passed fds in this
1137 child. We assume that otherwise only those fds are
1138 open here that have been opened by PAM. */
1139 close_many(fds, n_fds);
1140
940c5210
AK
1141 /* Drop privileges - we don't need any to pam_close_session
1142 * and this will make PR_SET_PDEATHSIG work in most cases.
1143 * If this fails, ignore the error - but expect sd-pam threads
1144 * to fail to exit normally */
2d6fce8d 1145
97f0e76f
LP
1146 r = maybe_setgroups(0, NULL);
1147 if (r < 0)
1148 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1149 if (setresgid(gid, gid, gid) < 0)
1150 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1151 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1152 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1153
ce30c8dc
LP
1154 (void) ignore_signals(SIGPIPE, -1);
1155
940c5210
AK
1156 /* Wait until our parent died. This will only work if
1157 * the above setresuid() succeeds, otherwise the kernel
1158 * will not allow unprivileged parents kill their privileged
1159 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1160 * to do the rest for us. */
1161 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1162 goto child_finish;
1163
2d7c6aa2
DH
1164 /* Tell the parent that our setup is done. This is especially
1165 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1166 * setup might race against our setresuid(2) call.
1167 *
1168 * If the parent aborted, we'll detect this below, hence ignore
1169 * return failure here. */
1170 (void) barrier_place(&barrier);
2d7c6aa2 1171
643f4706 1172 /* Check if our parent process might already have died? */
5b6319dc 1173 if (getppid() == parent_pid) {
d6e5f3ad
DM
1174 sigset_t ss;
1175
1176 assert_se(sigemptyset(&ss) >= 0);
1177 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1178
3dead8d9
LP
1179 for (;;) {
1180 if (sigwait(&ss, &sig) < 0) {
1181 if (errno == EINTR)
1182 continue;
1183
1184 goto child_finish;
1185 }
5b6319dc 1186
3dead8d9
LP
1187 assert(sig == SIGTERM);
1188 break;
1189 }
5b6319dc
LP
1190 }
1191
3dead8d9 1192 /* If our parent died we'll end the session */
f546241b 1193 if (getppid() != parent_pid) {
970edce6 1194 pam_code = pam_close_session(handle, flags);
f546241b 1195 if (pam_code != PAM_SUCCESS)
5b6319dc 1196 goto child_finish;
f546241b 1197 }
5b6319dc 1198
7bb70b6e 1199 ret = 0;
5b6319dc
LP
1200
1201 child_finish:
970edce6 1202 pam_end(handle, pam_code | flags);
7bb70b6e 1203 _exit(ret);
5b6319dc
LP
1204 }
1205
2d7c6aa2
DH
1206 barrier_set_role(&barrier, BARRIER_PARENT);
1207
5b6319dc
LP
1208 /* If the child was forked off successfully it will do all the
1209 * cleanups, so forget about the handle here. */
1210 handle = NULL;
1211
3b8bddde 1212 /* Unblock SIGTERM again in the parent */
72c0a2c2 1213 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1214
1215 /* We close the log explicitly here, since the PAM modules
1216 * might have opened it, but we don't want this fd around. */
1217 closelog();
1218
2d7c6aa2
DH
1219 /* Synchronously wait for the child to initialize. We don't care for
1220 * errors as we cannot recover. However, warn loudly if it happens. */
1221 if (!barrier_place_and_sync(&barrier))
1222 log_error("PAM initialization failed");
1223
2065ca69
JW
1224 strv_free(*env);
1225 *env = e;
aa87e624 1226
5b6319dc
LP
1227 return 0;
1228
1229fail:
970edce6
ZJS
1230 if (pam_code != PAM_SUCCESS) {
1231 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1232 r = -EPERM; /* PAM errors do not map to errno */
1233 } else
1234 log_error_errno(r, "PAM failed: %m");
9ba35398 1235
5b6319dc
LP
1236 if (handle) {
1237 if (close_session)
970edce6 1238 pam_code = pam_close_session(handle, flags);
5b6319dc 1239
970edce6 1240 pam_end(handle, pam_code | flags);
5b6319dc
LP
1241 }
1242
1243 strv_free(e);
5b6319dc
LP
1244 closelog();
1245
7bb70b6e 1246 return r;
cefc33ae
LP
1247#else
1248 return 0;
5b6319dc 1249#endif
cefc33ae 1250}
5b6319dc 1251
5d6b1584
LP
1252static void rename_process_from_path(const char *path) {
1253 char process_name[11];
1254 const char *p;
1255 size_t l;
1256
1257 /* This resulting string must fit in 10 chars (i.e. the length
1258 * of "/sbin/init") to look pretty in /bin/ps */
1259
2b6bf07d 1260 p = basename(path);
5d6b1584
LP
1261 if (isempty(p)) {
1262 rename_process("(...)");
1263 return;
1264 }
1265
1266 l = strlen(p);
1267 if (l > 8) {
1268 /* The end of the process name is usually more
1269 * interesting, since the first bit might just be
1270 * "systemd-" */
1271 p = p + l - 8;
1272 l = 8;
1273 }
1274
1275 process_name[0] = '(';
1276 memcpy(process_name+1, p, l);
1277 process_name[1+l] = ')';
1278 process_name[1+l+1] = 0;
1279
1280 rename_process(process_name);
1281}
1282
469830d1
LP
1283static bool context_has_address_families(const ExecContext *c) {
1284 assert(c);
1285
1286 return c->address_families_whitelist ||
1287 !set_isempty(c->address_families);
1288}
1289
1290static bool context_has_syscall_filters(const ExecContext *c) {
1291 assert(c);
1292
1293 return c->syscall_whitelist ||
1294 !set_isempty(c->syscall_filter);
1295}
1296
1297static bool context_has_no_new_privileges(const ExecContext *c) {
1298 assert(c);
1299
1300 if (c->no_new_privileges)
1301 return true;
1302
1303 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1304 return false;
1305
1306 /* We need NNP if we have any form of seccomp and are unprivileged */
1307 return context_has_address_families(c) ||
1308 c->memory_deny_write_execute ||
1309 c->restrict_realtime ||
1310 exec_context_restrict_namespaces_set(c) ||
1311 c->protect_kernel_tunables ||
1312 c->protect_kernel_modules ||
1313 c->private_devices ||
1314 context_has_syscall_filters(c) ||
78e864e5
TM
1315 !set_isempty(c->syscall_archs) ||
1316 c->lock_personality;
469830d1
LP
1317}
1318
c0467cf3 1319#ifdef HAVE_SECCOMP
17df7223 1320
83f12b27 1321static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1322
1323 if (is_seccomp_available())
1324 return false;
1325
1326 log_open();
1327 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
1328 log_close();
1329 return true;
83f12b27
FS
1330}
1331
165a31c0 1332static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1333 uint32_t negative_action, default_action, action;
165a31c0 1334 int r;
8351ceae 1335
469830d1 1336 assert(u);
c0467cf3 1337 assert(c);
8351ceae 1338
469830d1 1339 if (!context_has_syscall_filters(c))
83f12b27
FS
1340 return 0;
1341
469830d1
LP
1342 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1343 return 0;
e9642be2 1344
469830d1 1345 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1346
469830d1
LP
1347 if (c->syscall_whitelist) {
1348 default_action = negative_action;
1349 action = SCMP_ACT_ALLOW;
7c66bae2 1350 } else {
469830d1
LP
1351 default_action = SCMP_ACT_ALLOW;
1352 action = negative_action;
57183d11 1353 }
8351ceae 1354
165a31c0
LP
1355 if (needs_ambient_hack) {
1356 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1357 if (r < 0)
1358 return r;
1359 }
1360
469830d1 1361 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
4298d0b5
LP
1362}
1363
469830d1
LP
1364static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1365 assert(u);
4298d0b5
LP
1366 assert(c);
1367
469830d1 1368 if (set_isempty(c->syscall_archs))
83f12b27
FS
1369 return 0;
1370
469830d1
LP
1371 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1372 return 0;
4298d0b5 1373
469830d1
LP
1374 return seccomp_restrict_archs(c->syscall_archs);
1375}
4298d0b5 1376
469830d1
LP
1377static int apply_address_families(const Unit* u, const ExecContext *c) {
1378 assert(u);
1379 assert(c);
4298d0b5 1380
469830d1
LP
1381 if (!context_has_address_families(c))
1382 return 0;
4298d0b5 1383
469830d1
LP
1384 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1385 return 0;
4298d0b5 1386
469830d1 1387 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1388}
4298d0b5 1389
83f12b27 1390static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1391 assert(u);
f3e43635
TM
1392 assert(c);
1393
469830d1 1394 if (!c->memory_deny_write_execute)
83f12b27
FS
1395 return 0;
1396
469830d1
LP
1397 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1398 return 0;
f3e43635 1399
469830d1 1400 return seccomp_memory_deny_write_execute();
f3e43635
TM
1401}
1402
83f12b27 1403static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1404 assert(u);
f4170c67
LP
1405 assert(c);
1406
469830d1 1407 if (!c->restrict_realtime)
83f12b27
FS
1408 return 0;
1409
469830d1
LP
1410 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1411 return 0;
f4170c67 1412
469830d1 1413 return seccomp_restrict_realtime();
f4170c67
LP
1414}
1415
59e856c7 1416static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1417 assert(u);
59eeb84b
LP
1418 assert(c);
1419
1420 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1421 * let's protect even those systems where this is left on in the kernel. */
1422
469830d1 1423 if (!c->protect_kernel_tunables)
59eeb84b
LP
1424 return 0;
1425
469830d1
LP
1426 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1427 return 0;
59eeb84b 1428
469830d1 1429 return seccomp_protect_sysctl();
59eeb84b
LP
1430}
1431
59e856c7 1432static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1433 assert(u);
502d704e
DH
1434 assert(c);
1435
25a8d8a0 1436 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1437
469830d1
LP
1438 if (!c->protect_kernel_modules)
1439 return 0;
1440
502d704e
DH
1441 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1442 return 0;
1443
469830d1 1444 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
502d704e
DH
1445}
1446
59e856c7 1447static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1448 assert(u);
ba128bb8
LP
1449 assert(c);
1450
8f81a5f6 1451 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1452
469830d1
LP
1453 if (!c->private_devices)
1454 return 0;
1455
ba128bb8
LP
1456 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1457 return 0;
1458
469830d1 1459 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
ba128bb8
LP
1460}
1461
add00535 1462static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
469830d1 1463 assert(u);
add00535
LP
1464 assert(c);
1465
1466 if (!exec_context_restrict_namespaces_set(c))
1467 return 0;
1468
1469 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1470 return 0;
1471
1472 return seccomp_restrict_namespaces(c->restrict_namespaces);
1473}
1474
78e864e5 1475static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1476 unsigned long personality;
1477 int r;
78e864e5
TM
1478
1479 assert(u);
1480 assert(c);
1481
1482 if (!c->lock_personality)
1483 return 0;
1484
1485 if (skip_seccomp_unavailable(u, "LockPersonality="))
1486 return 0;
1487
e8132d63
LP
1488 personality = c->personality;
1489
1490 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1491 if (personality == PERSONALITY_INVALID) {
1492
1493 r = opinionated_personality(&personality);
1494 if (r < 0)
1495 return r;
1496 }
78e864e5
TM
1497
1498 return seccomp_lock_personality(personality);
1499}
1500
c0467cf3 1501#endif
8351ceae 1502
31a7eb86
ZJS
1503static void do_idle_pipe_dance(int idle_pipe[4]) {
1504 assert(idle_pipe);
1505
54eb2300
LP
1506 idle_pipe[1] = safe_close(idle_pipe[1]);
1507 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1508
1509 if (idle_pipe[0] >= 0) {
1510 int r;
1511
1512 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1513
1514 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1515 ssize_t n;
1516
31a7eb86 1517 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1518 n = write(idle_pipe[3], "x", 1);
1519 if (n > 0)
cd972d69
ZJS
1520 /* Wait for systemd to react to the signal above. */
1521 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1522 }
1523
54eb2300 1524 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1525
1526 }
1527
54eb2300 1528 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1529}
1530
7cae38c4 1531static int build_environment(
fd63e712 1532 Unit *u,
9fa95f85 1533 const ExecContext *c,
1e22b5cd 1534 const ExecParameters *p,
7cae38c4
LP
1535 unsigned n_fds,
1536 const char *home,
1537 const char *username,
1538 const char *shell,
7bce046b
LP
1539 dev_t journal_stream_dev,
1540 ino_t journal_stream_ino,
7cae38c4
LP
1541 char ***ret) {
1542
1543 _cleanup_strv_free_ char **our_env = NULL;
1544 unsigned n_env = 0;
1545 char *x;
1546
4b58153d 1547 assert(u);
7cae38c4
LP
1548 assert(c);
1549 assert(ret);
1550
4b58153d 1551 our_env = new0(char*, 14);
7cae38c4
LP
1552 if (!our_env)
1553 return -ENOMEM;
1554
1555 if (n_fds > 0) {
8dd4c05b
LP
1556 _cleanup_free_ char *joined = NULL;
1557
df0ff127 1558 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1559 return -ENOMEM;
1560 our_env[n_env++] = x;
1561
1562 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1563 return -ENOMEM;
1564 our_env[n_env++] = x;
8dd4c05b 1565
1e22b5cd 1566 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1567 if (!joined)
1568 return -ENOMEM;
1569
605405c6 1570 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1571 if (!x)
1572 return -ENOMEM;
1573 our_env[n_env++] = x;
7cae38c4
LP
1574 }
1575
b08af3b1 1576 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1577 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1578 return -ENOMEM;
1579 our_env[n_env++] = x;
1580
1e22b5cd 1581 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1582 return -ENOMEM;
1583 our_env[n_env++] = x;
1584 }
1585
fd63e712
LP
1586 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1587 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1588 * check the database directly. */
ac647978 1589 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1590 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1591 if (!x)
1592 return -ENOMEM;
1593 our_env[n_env++] = x;
1594 }
1595
7cae38c4
LP
1596 if (home) {
1597 x = strappend("HOME=", home);
1598 if (!x)
1599 return -ENOMEM;
1600 our_env[n_env++] = x;
1601 }
1602
1603 if (username) {
1604 x = strappend("LOGNAME=", username);
1605 if (!x)
1606 return -ENOMEM;
1607 our_env[n_env++] = x;
1608
1609 x = strappend("USER=", username);
1610 if (!x)
1611 return -ENOMEM;
1612 our_env[n_env++] = x;
1613 }
1614
1615 if (shell) {
1616 x = strappend("SHELL=", shell);
1617 if (!x)
1618 return -ENOMEM;
1619 our_env[n_env++] = x;
1620 }
1621
4b58153d
LP
1622 if (!sd_id128_is_null(u->invocation_id)) {
1623 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1624 return -ENOMEM;
1625
1626 our_env[n_env++] = x;
1627 }
1628
6af760f3
LP
1629 if (exec_context_needs_term(c)) {
1630 const char *tty_path, *term = NULL;
1631
1632 tty_path = exec_context_tty_path(c);
1633
1634 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1635 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1636 * passes to PID 1 ends up all the way in the console login shown. */
1637
1638 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1639 term = getenv("TERM");
1640 if (!term)
1641 term = default_term_for_tty(tty_path);
7cae38c4 1642
6af760f3 1643 x = strappend("TERM=", term);
7cae38c4
LP
1644 if (!x)
1645 return -ENOMEM;
1646 our_env[n_env++] = x;
1647 }
1648
7bce046b
LP
1649 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1650 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1651 return -ENOMEM;
1652
1653 our_env[n_env++] = x;
1654 }
1655
7cae38c4 1656 our_env[n_env++] = NULL;
7bce046b 1657 assert(n_env <= 12);
7cae38c4
LP
1658
1659 *ret = our_env;
1660 our_env = NULL;
1661
1662 return 0;
1663}
1664
b4c14404
FB
1665static int build_pass_environment(const ExecContext *c, char ***ret) {
1666 _cleanup_strv_free_ char **pass_env = NULL;
1667 size_t n_env = 0, n_bufsize = 0;
1668 char **i;
1669
1670 STRV_FOREACH(i, c->pass_environment) {
1671 _cleanup_free_ char *x = NULL;
1672 char *v;
1673
1674 v = getenv(*i);
1675 if (!v)
1676 continue;
605405c6 1677 x = strjoin(*i, "=", v);
b4c14404
FB
1678 if (!x)
1679 return -ENOMEM;
00819cc1 1680
b4c14404
FB
1681 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1682 return -ENOMEM;
00819cc1 1683
b4c14404
FB
1684 pass_env[n_env++] = x;
1685 pass_env[n_env] = NULL;
1686 x = NULL;
1687 }
1688
1689 *ret = pass_env;
1690 pass_env = NULL;
1691
1692 return 0;
1693}
1694
8b44a3d2
LP
1695static bool exec_needs_mount_namespace(
1696 const ExecContext *context,
1697 const ExecParameters *params,
1698 ExecRuntime *runtime) {
1699
1700 assert(context);
1701 assert(params);
1702
915e6d16
LP
1703 if (context->root_image)
1704 return true;
1705
2a624c36
AP
1706 if (!strv_isempty(context->read_write_paths) ||
1707 !strv_isempty(context->read_only_paths) ||
1708 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1709 return true;
1710
d2d6c096
LP
1711 if (context->n_bind_mounts > 0)
1712 return true;
1713
8b44a3d2
LP
1714 if (context->mount_flags != 0)
1715 return true;
1716
1717 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1718 return true;
1719
8b44a3d2
LP
1720 if (context->private_devices ||
1721 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1722 context->protect_home != PROTECT_HOME_NO ||
1723 context->protect_kernel_tunables ||
c575770b 1724 context->protect_kernel_modules ||
59eeb84b 1725 context->protect_control_groups)
8b44a3d2
LP
1726 return true;
1727
9c988f93 1728 if (context->mount_apivfs && (context->root_image || context->root_directory))
5d997827
LP
1729 return true;
1730
8b44a3d2
LP
1731 return false;
1732}
1733
d251207d
LP
1734static int setup_private_users(uid_t uid, gid_t gid) {
1735 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1736 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1737 _cleanup_close_ int unshare_ready_fd = -1;
1738 _cleanup_(sigkill_waitp) pid_t pid = 0;
1739 uint64_t c = 1;
1740 siginfo_t si;
1741 ssize_t n;
1742 int r;
1743
1744 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1745 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1746 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1747 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1748 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1749 * continues execution normally. */
1750
587ab01b
ZJS
1751 if (uid != 0 && uid_is_valid(uid)) {
1752 r = asprintf(&uid_map,
1753 "0 0 1\n" /* Map root → root */
1754 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1755 uid, uid);
1756 if (r < 0)
1757 return -ENOMEM;
1758 } else {
e0f3720e 1759 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1760 if (!uid_map)
1761 return -ENOMEM;
1762 }
d251207d 1763
587ab01b
ZJS
1764 if (gid != 0 && gid_is_valid(gid)) {
1765 r = asprintf(&gid_map,
1766 "0 0 1\n" /* Map root → root */
1767 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1768 gid, gid);
1769 if (r < 0)
1770 return -ENOMEM;
1771 } else {
d251207d 1772 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1773 if (!gid_map)
1774 return -ENOMEM;
1775 }
d251207d
LP
1776
1777 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1778 * namespace. */
1779 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1780 if (unshare_ready_fd < 0)
1781 return -errno;
1782
1783 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1784 * failed. */
1785 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1786 return -errno;
1787
1788 pid = fork();
1789 if (pid < 0)
1790 return -errno;
1791
1792 if (pid == 0) {
1793 _cleanup_close_ int fd = -1;
1794 const char *a;
1795 pid_t ppid;
1796
1797 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1798 * here, after the parent opened its own user namespace. */
1799
1800 ppid = getppid();
1801 errno_pipe[0] = safe_close(errno_pipe[0]);
1802
1803 /* Wait until the parent unshared the user namespace */
1804 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1805 r = -errno;
1806 goto child_fail;
1807 }
1808
1809 /* Disable the setgroups() system call in the child user namespace, for good. */
1810 a = procfs_file_alloca(ppid, "setgroups");
1811 fd = open(a, O_WRONLY|O_CLOEXEC);
1812 if (fd < 0) {
1813 if (errno != ENOENT) {
1814 r = -errno;
1815 goto child_fail;
1816 }
1817
1818 /* If the file is missing the kernel is too old, let's continue anyway. */
1819 } else {
1820 if (write(fd, "deny\n", 5) < 0) {
1821 r = -errno;
1822 goto child_fail;
1823 }
1824
1825 fd = safe_close(fd);
1826 }
1827
1828 /* First write the GID map */
1829 a = procfs_file_alloca(ppid, "gid_map");
1830 fd = open(a, O_WRONLY|O_CLOEXEC);
1831 if (fd < 0) {
1832 r = -errno;
1833 goto child_fail;
1834 }
1835 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1836 r = -errno;
1837 goto child_fail;
1838 }
1839 fd = safe_close(fd);
1840
1841 /* The write the UID map */
1842 a = procfs_file_alloca(ppid, "uid_map");
1843 fd = open(a, O_WRONLY|O_CLOEXEC);
1844 if (fd < 0) {
1845 r = -errno;
1846 goto child_fail;
1847 }
1848 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1849 r = -errno;
1850 goto child_fail;
1851 }
1852
1853 _exit(EXIT_SUCCESS);
1854
1855 child_fail:
1856 (void) write(errno_pipe[1], &r, sizeof(r));
1857 _exit(EXIT_FAILURE);
1858 }
1859
1860 errno_pipe[1] = safe_close(errno_pipe[1]);
1861
1862 if (unshare(CLONE_NEWUSER) < 0)
1863 return -errno;
1864
1865 /* Let the child know that the namespace is ready now */
1866 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1867 return -errno;
1868
1869 /* Try to read an error code from the child */
1870 n = read(errno_pipe[0], &r, sizeof(r));
1871 if (n < 0)
1872 return -errno;
1873 if (n == sizeof(r)) { /* an error code was sent to us */
1874 if (r < 0)
1875 return r;
1876 return -EIO;
1877 }
1878 if (n != 0) /* on success we should have read 0 bytes */
1879 return -EIO;
1880
1881 r = wait_for_terminate(pid, &si);
1882 if (r < 0)
1883 return r;
1884 pid = 0;
1885
1886 /* If something strange happened with the child, let's consider this fatal, too */
1887 if (si.si_code != CLD_EXITED || si.si_status != 0)
1888 return -EIO;
1889
1890 return 0;
1891}
1892
3536f49e 1893static int setup_exec_directory(
07689d5d
LP
1894 const ExecContext *context,
1895 const ExecParameters *params,
1896 uid_t uid,
3536f49e 1897 gid_t gid,
3536f49e
YW
1898 ExecDirectoryType type,
1899 int *exit_status) {
07689d5d 1900
3536f49e
YW
1901 static const int exit_status_table[_EXEC_DIRECTORY_MAX] = {
1902 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
1903 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
1904 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
1905 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
1906 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
1907 };
07689d5d
LP
1908 char **rt;
1909 int r;
1910
1911 assert(context);
1912 assert(params);
3536f49e
YW
1913 assert(type >= 0 && type < _EXEC_DIRECTORY_MAX);
1914 assert(exit_status);
07689d5d 1915
3536f49e
YW
1916 if (!params->prefix[type])
1917 return 0;
1918
8679efde 1919 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
1920 if (!uid_is_valid(uid))
1921 uid = 0;
1922 if (!gid_is_valid(gid))
1923 gid = 0;
1924 }
1925
1926 STRV_FOREACH(rt, context->directories[type].paths) {
07689d5d
LP
1927 _cleanup_free_ char *p;
1928
3536f49e
YW
1929 p = strjoin(params->prefix[type], "/", *rt);
1930 if (!p) {
1931 r = -ENOMEM;
1932 goto fail;
1933 }
07689d5d 1934
23a7448e
YW
1935 r = mkdir_parents_label(p, 0755);
1936 if (r < 0)
3536f49e 1937 goto fail;
23a7448e 1938
3536f49e 1939 r = mkdir_p_label(p, context->directories[type].mode);
07689d5d 1940 if (r < 0)
3536f49e 1941 goto fail;
07689d5d 1942
c71b2eb7
LP
1943 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
1944 * a service, and shall not be writable. */
1945 if (type == EXEC_DIRECTORY_CONFIGURATION)
1946 continue;
1947
3536f49e 1948 r = chmod_and_chown(p, context->directories[type].mode, uid, gid);
07689d5d 1949 if (r < 0)
3536f49e 1950 goto fail;
07689d5d
LP
1951 }
1952
1953 return 0;
3536f49e
YW
1954
1955fail:
1956 *exit_status = exit_status_table[type];
1957
1958 return r;
07689d5d
LP
1959}
1960
cefc33ae
LP
1961static int setup_smack(
1962 const ExecContext *context,
1963 const ExecCommand *command) {
1964
cefc33ae
LP
1965 int r;
1966
1967 assert(context);
1968 assert(command);
1969
cefc33ae
LP
1970 if (context->smack_process_label) {
1971 r = mac_smack_apply_pid(0, context->smack_process_label);
1972 if (r < 0)
1973 return r;
1974 }
1975#ifdef SMACK_DEFAULT_PROCESS_LABEL
1976 else {
1977 _cleanup_free_ char *exec_label = NULL;
1978
1979 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
1980 if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP)
1981 return r;
1982
1983 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
1984 if (r < 0)
1985 return r;
1986 }
cefc33ae
LP
1987#endif
1988
1989 return 0;
1990}
1991
3fbe8dbe
LP
1992static int compile_read_write_paths(
1993 const ExecContext *context,
1994 const ExecParameters *params,
1995 char ***ret) {
1996
1997 _cleanup_strv_free_ char **l = NULL;
1998 char **rt;
3536f49e 1999 ExecDirectoryType i;
3fbe8dbe 2000
06ec51d8
ZJS
2001 /* Compile the list of writable paths. This is the combination of
2002 * the explicitly configured paths, plus all runtime directories. */
3fbe8dbe 2003
3536f49e
YW
2004 if (strv_isempty(context->read_write_paths)) {
2005 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
2006 if (!strv_isempty(context->directories[i].paths))
2007 break;
2008
2009 if (i == _EXEC_DIRECTORY_MAX) {
2010 *ret = NULL; /* NOP if neither is set */
2011 return 0;
2012 }
3fbe8dbe
LP
2013 }
2014
2015 l = strv_copy(context->read_write_paths);
2016 if (!l)
2017 return -ENOMEM;
2018
3536f49e
YW
2019 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++) {
2020 if (!params->prefix[i])
2021 continue;
3fbe8dbe 2022
3536f49e
YW
2023 STRV_FOREACH(rt, context->directories[i].paths) {
2024 char *s;
3fbe8dbe 2025
3536f49e
YW
2026 s = strjoin(params->prefix[i], "/", *rt);
2027 if (!s)
2028 return -ENOMEM;
2029
2030 if (strv_consume(&l, s) < 0)
2031 return -ENOMEM;
2032 }
3fbe8dbe
LP
2033 }
2034
2035 *ret = l;
2036 l = NULL;
2037
2038 return 0;
2039}
2040
6818c54c
LP
2041static int apply_mount_namespace(
2042 Unit *u,
2043 ExecCommand *command,
2044 const ExecContext *context,
2045 const ExecParameters *params,
2046 ExecRuntime *runtime) {
2047
06ec51d8 2048 _cleanup_strv_free_ char **rw = NULL;
93c6bb51 2049 char *tmp = NULL, *var = NULL;
915e6d16 2050 const char *root_dir = NULL, *root_image = NULL;
93c6bb51 2051 NameSpaceInfo ns_info = {
af964954 2052 .ignore_protect_paths = false,
93c6bb51
DH
2053 .private_dev = context->private_devices,
2054 .protect_control_groups = context->protect_control_groups,
2055 .protect_kernel_tunables = context->protect_kernel_tunables,
2056 .protect_kernel_modules = context->protect_kernel_modules,
5d997827 2057 .mount_apivfs = context->mount_apivfs,
93c6bb51 2058 };
165a31c0 2059 bool needs_sandboxing;
6818c54c 2060 int r;
93c6bb51 2061
2b3c1b9e
DH
2062 assert(context);
2063
93c6bb51
DH
2064 /* The runtime struct only contains the parent of the private /tmp,
2065 * which is non-accessible to world users. Inside of it there's a /tmp
2066 * that is sticky, and that's the one we want to use here. */
2067
2068 if (context->private_tmp && runtime) {
2069 if (runtime->tmp_dir)
2070 tmp = strjoina(runtime->tmp_dir, "/tmp");
2071 if (runtime->var_tmp_dir)
2072 var = strjoina(runtime->var_tmp_dir, "/tmp");
2073 }
2074
2075 r = compile_read_write_paths(context, params, &rw);
2076 if (r < 0)
2077 return r;
2078
915e6d16
LP
2079 if (params->flags & EXEC_APPLY_CHROOT) {
2080 root_image = context->root_image;
2081
2082 if (!root_image)
2083 root_dir = context->root_directory;
2084 }
93c6bb51 2085
af964954
DH
2086 /*
2087 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2088 * sandbox info, otherwise enforce it, don't ignore protected paths and
2089 * fail if we are enable to apply the sandbox inside the mount namespace.
2090 */
2091 if (!context->dynamic_user && root_dir)
2092 ns_info.ignore_protect_paths = true;
2093
165a31c0 2094 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
6818c54c 2095
915e6d16
LP
2096 r = setup_namespace(root_dir, root_image,
2097 &ns_info, rw,
165a31c0
LP
2098 needs_sandboxing ? context->read_only_paths : NULL,
2099 needs_sandboxing ? context->inaccessible_paths : NULL,
d2d6c096
LP
2100 context->bind_mounts,
2101 context->n_bind_mounts,
93c6bb51
DH
2102 tmp,
2103 var,
165a31c0
LP
2104 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2105 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16
LP
2106 context->mount_flags,
2107 DISSECT_IMAGE_DISCARD_ON_LOOP);
93c6bb51
DH
2108
2109 /* If we couldn't set up the namespace this is probably due to a
2110 * missing capability. In this case, silently proceeed. */
2111 if (IN_SET(r, -EPERM, -EACCES)) {
2112 log_open();
2113 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
2114 log_close();
2115 r = 0;
2116 }
2117
2118 return r;
2119}
2120
915e6d16
LP
2121static int apply_working_directory(
2122 const ExecContext *context,
2123 const ExecParameters *params,
2124 const char *home,
376fecf6
LP
2125 const bool needs_mount_ns,
2126 int *exit_status) {
915e6d16 2127
6732edab 2128 const char *d, *wd;
2b3c1b9e
DH
2129
2130 assert(context);
376fecf6 2131 assert(exit_status);
2b3c1b9e 2132
6732edab
LP
2133 if (context->working_directory_home) {
2134
376fecf6
LP
2135 if (!home) {
2136 *exit_status = EXIT_CHDIR;
6732edab 2137 return -ENXIO;
376fecf6 2138 }
6732edab 2139
2b3c1b9e 2140 wd = home;
6732edab
LP
2141
2142 } else if (context->working_directory)
2b3c1b9e
DH
2143 wd = context->working_directory;
2144 else
2145 wd = "/";
e7f1e7c6
DH
2146
2147 if (params->flags & EXEC_APPLY_CHROOT) {
2148 if (!needs_mount_ns && context->root_directory)
376fecf6
LP
2149 if (chroot(context->root_directory) < 0) {
2150 *exit_status = EXIT_CHROOT;
e7f1e7c6 2151 return -errno;
376fecf6 2152 }
e7f1e7c6 2153
2b3c1b9e
DH
2154 d = wd;
2155 } else
3b0e5bb5 2156 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2157
376fecf6
LP
2158 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2159 *exit_status = EXIT_CHDIR;
2b3c1b9e 2160 return -errno;
376fecf6 2161 }
e7f1e7c6
DH
2162
2163 return 0;
2164}
2165
74dd6b51
LP
2166static int setup_keyring(Unit *u, const ExecParameters *p, uid_t uid, gid_t gid) {
2167 key_serial_t keyring;
2168
2169 assert(u);
2170 assert(p);
2171
2172 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2173 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2174 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2175 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2176 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2177 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2178
2179 if (!(p->flags & EXEC_NEW_KEYRING))
2180 return 0;
2181
2182 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2183 if (keyring == -1) {
2184 if (errno == ENOSYS)
2185 log_debug_errno(errno, "Kernel keyring not supported, ignoring.");
2186 else if (IN_SET(errno, EACCES, EPERM))
2187 log_debug_errno(errno, "Kernel keyring access prohibited, ignoring.");
2188 else if (errno == EDQUOT)
2189 log_debug_errno(errno, "Out of kernel keyrings to allocate, ignoring.");
2190 else
2191 return log_error_errno(errno, "Setting up kernel keyring failed: %m");
2192
2193 return 0;
2194 }
2195
b3415f5d
LP
2196 /* Populate they keyring with the invocation ID by default. */
2197 if (!sd_id128_is_null(u->invocation_id)) {
2198 key_serial_t key;
2199
2200 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2201 if (key == -1)
2202 log_debug_errno(errno, "Failed to add invocation ID to keyring, ignoring: %m");
2203 else {
2204 if (keyctl(KEYCTL_SETPERM, key,
2205 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2206 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
2207 return log_error_errno(errno, "Failed to restrict invocation ID permission: %m");
2208 }
2209 }
2210
74dd6b51
LP
2211 /* And now, make the keyring owned by the service's user */
2212 if (uid_is_valid(uid) || gid_is_valid(gid))
2213 if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0)
2214 return log_error_errno(errno, "Failed to change ownership of session keyring: %m");
2215
2216 return 0;
2217}
2218
29206d46
LP
2219static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
2220 assert(array);
2221 assert(n);
2222
2223 if (!pair)
2224 return;
2225
2226 if (pair[0] >= 0)
2227 array[(*n)++] = pair[0];
2228 if (pair[1] >= 0)
2229 array[(*n)++] = pair[1];
2230}
2231
a34ceba6
LP
2232static int close_remaining_fds(
2233 const ExecParameters *params,
2234 ExecRuntime *runtime,
29206d46 2235 DynamicCreds *dcreds,
00d9ef85 2236 int user_lookup_fd,
a34ceba6
LP
2237 int socket_fd,
2238 int *fds, unsigned n_fds) {
2239
2240 unsigned n_dont_close = 0;
00d9ef85 2241 int dont_close[n_fds + 12];
a34ceba6
LP
2242
2243 assert(params);
2244
2245 if (params->stdin_fd >= 0)
2246 dont_close[n_dont_close++] = params->stdin_fd;
2247 if (params->stdout_fd >= 0)
2248 dont_close[n_dont_close++] = params->stdout_fd;
2249 if (params->stderr_fd >= 0)
2250 dont_close[n_dont_close++] = params->stderr_fd;
2251
2252 if (socket_fd >= 0)
2253 dont_close[n_dont_close++] = socket_fd;
2254 if (n_fds > 0) {
2255 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2256 n_dont_close += n_fds;
2257 }
2258
29206d46
LP
2259 if (runtime)
2260 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2261
2262 if (dcreds) {
2263 if (dcreds->user)
2264 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2265 if (dcreds->group)
2266 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2267 }
2268
00d9ef85
LP
2269 if (user_lookup_fd >= 0)
2270 dont_close[n_dont_close++] = user_lookup_fd;
2271
a34ceba6
LP
2272 return close_all_fds(dont_close, n_dont_close);
2273}
2274
00d9ef85
LP
2275static int send_user_lookup(
2276 Unit *unit,
2277 int user_lookup_fd,
2278 uid_t uid,
2279 gid_t gid) {
2280
2281 assert(unit);
2282
2283 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2284 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2285 * specified. */
2286
2287 if (user_lookup_fd < 0)
2288 return 0;
2289
2290 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2291 return 0;
2292
2293 if (writev(user_lookup_fd,
2294 (struct iovec[]) {
2295 { .iov_base = &uid, .iov_len = sizeof(uid) },
2296 { .iov_base = &gid, .iov_len = sizeof(gid) },
2297 { .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0)
2298 return -errno;
2299
2300 return 0;
2301}
2302
6732edab
LP
2303static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2304 int r;
2305
2306 assert(c);
2307 assert(home);
2308 assert(buf);
2309
2310 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2311
2312 if (*home)
2313 return 0;
2314
2315 if (!c->working_directory_home)
2316 return 0;
2317
2318 if (uid == 0) {
2319 /* Hardcode /root as home directory for UID 0 */
2320 *home = "/root";
2321 return 1;
2322 }
2323
2324 r = get_home_dir(buf);
2325 if (r < 0)
2326 return r;
2327
2328 *home = *buf;
2329 return 1;
2330}
2331
ff0af2a1 2332static int exec_child(
f2341e0a 2333 Unit *unit,
ff0af2a1
LP
2334 ExecCommand *command,
2335 const ExecContext *context,
2336 const ExecParameters *params,
2337 ExecRuntime *runtime,
29206d46 2338 DynamicCreds *dcreds,
ff0af2a1
LP
2339 char **argv,
2340 int socket_fd,
52c239d7 2341 int named_iofds[3],
4c47affc
FB
2342 int *fds,
2343 unsigned n_storage_fds,
9b141911 2344 unsigned n_socket_fds,
ff0af2a1 2345 char **files_env,
00d9ef85 2346 int user_lookup_fd,
70dd455c
ZJS
2347 int *exit_status,
2348 char **error_message) {
d35fbf6b 2349
2065ca69 2350 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
6732edab 2351 _cleanup_free_ char *mac_selinux_context_net = NULL, *home_buffer = NULL;
4d885bd3
DH
2352 _cleanup_free_ gid_t *supplementary_gids = NULL;
2353 const char *username = NULL, *groupname = NULL;
2b3c1b9e 2354 const char *home = NULL, *shell = NULL;
7bce046b
LP
2355 dev_t journal_stream_dev = 0;
2356 ino_t journal_stream_ino = 0;
165a31c0
LP
2357 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2358 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2359 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2360 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
ecfbc84f 2361#ifdef HAVE_SELINUX
43b1f709 2362 bool use_selinux = false;
ecfbc84f
YW
2363#endif
2364#ifdef HAVE_SMACK
43b1f709 2365 bool use_smack = false;
ecfbc84f
YW
2366#endif
2367#ifdef HAVE_APPARMOR
43b1f709 2368 bool use_apparmor = false;
ecfbc84f 2369#endif
fed1e721
LP
2370 uid_t uid = UID_INVALID;
2371 gid_t gid = GID_INVALID;
4d885bd3 2372 int i, r, ngids = 0;
4c47affc 2373 unsigned n_fds;
3536f49e 2374 ExecDirectoryType dt;
165a31c0 2375 int secure_bits;
034c6ed7 2376
f2341e0a 2377 assert(unit);
5cb5a6ff
LP
2378 assert(command);
2379 assert(context);
d35fbf6b 2380 assert(params);
ff0af2a1 2381 assert(exit_status);
70dd455c
ZJS
2382 assert(error_message);
2383 /* We don't always set error_message, hence it must be initialized */
2384 assert(*error_message == NULL);
d35fbf6b
DM
2385
2386 rename_process_from_path(command->path);
2387
2388 /* We reset exactly these signals, since they are the
2389 * only ones we set to SIG_IGN in the main daemon. All
2390 * others we leave untouched because we set them to
2391 * SIG_DFL or a valid handler initially, both of which
2392 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2393 (void) default_signals(SIGNALS_CRASH_HANDLER,
2394 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2395
2396 if (context->ignore_sigpipe)
ce30c8dc 2397 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2398
ff0af2a1
LP
2399 r = reset_signal_mask();
2400 if (r < 0) {
2401 *exit_status = EXIT_SIGNAL_MASK;
70dd455c
ZJS
2402 *error_message = strdup("Failed to reset signal mask");
2403 /* If strdup fails, here and below, we will just print the generic error message. */
ff0af2a1 2404 return r;
d35fbf6b 2405 }
034c6ed7 2406
d35fbf6b
DM
2407 if (params->idle_pipe)
2408 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2409
d35fbf6b
DM
2410 /* Close sockets very early to make sure we don't
2411 * block init reexecution because it cannot bind its
2412 * sockets */
ff0af2a1 2413
d35fbf6b 2414 log_forget_fds();
4f2d528d 2415
4c47affc 2416 n_fds = n_storage_fds + n_socket_fds;
00d9ef85 2417 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
ff0af2a1
LP
2418 if (r < 0) {
2419 *exit_status = EXIT_FDS;
70dd455c 2420 *error_message = strdup("Failed to close remaining fds");
ff0af2a1 2421 return r;
8c7be95e
LP
2422 }
2423
d35fbf6b
DM
2424 if (!context->same_pgrp)
2425 if (setsid() < 0) {
ff0af2a1 2426 *exit_status = EXIT_SETSID;
d35fbf6b
DM
2427 return -errno;
2428 }
9e2f7c11 2429
1e22b5cd 2430 exec_context_tty_reset(context, params);
d35fbf6b 2431
c891efaf 2432 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 2433 const char *vc = params->confirm_spawn;
3b20f877
FB
2434 _cleanup_free_ char *cmdline = NULL;
2435
2436 cmdline = exec_command_line(argv);
2437 if (!cmdline) {
2438 *exit_status = EXIT_CONFIRM;
2439 return -ENOMEM;
2440 }
d35fbf6b 2441
eedf223a 2442 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
2443 if (r != CONFIRM_EXECUTE) {
2444 if (r == CONFIRM_PRETEND_SUCCESS) {
2445 *exit_status = EXIT_SUCCESS;
2446 return 0;
2447 }
ff0af2a1 2448 *exit_status = EXIT_CONFIRM;
70dd455c 2449 *error_message = strdup("Execution cancelled");
d35fbf6b 2450 return -ECANCELED;
d35fbf6b
DM
2451 }
2452 }
1a63a750 2453
29206d46
LP
2454 if (context->dynamic_user && dcreds) {
2455
409093fe
LP
2456 /* Make sure we bypass our own NSS module for any NSS checks */
2457 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2458 *exit_status = EXIT_USER;
70dd455c 2459 *error_message = strdup("Failed to update environment");
409093fe
LP
2460 return -errno;
2461 }
2462
29206d46 2463 r = dynamic_creds_realize(dcreds, &uid, &gid);
ff0af2a1
LP
2464 if (r < 0) {
2465 *exit_status = EXIT_USER;
70dd455c 2466 *error_message = strdup("Failed to update dynamic user credentials");
ff0af2a1 2467 return r;
524daa8c 2468 }
524daa8c 2469
70dd455c 2470 if (!uid_is_valid(uid)) {
29206d46 2471 *exit_status = EXIT_USER;
70dd455c
ZJS
2472 (void) asprintf(error_message, "UID validation failed for \""UID_FMT"\"", uid);
2473 /* If asprintf fails, here and below, we will just print the generic error message. */
2474 return -ESRCH;
2475 }
2476
2477 if (!gid_is_valid(gid)) {
2478 *exit_status = EXIT_USER;
2479 (void) asprintf(error_message, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
2480 return -ESRCH;
2481 }
5bc7452b 2482
29206d46
LP
2483 if (dcreds->user)
2484 username = dcreds->user->name;
2485
2486 } else {
4d885bd3
DH
2487 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2488 if (r < 0) {
2489 *exit_status = EXIT_USER;
70dd455c 2490 *error_message = strdup("Failed to determine user credentials");
4d885bd3 2491 return r;
5bc7452b 2492 }
5bc7452b 2493
4d885bd3
DH
2494 r = get_fixed_group(context, &groupname, &gid);
2495 if (r < 0) {
2496 *exit_status = EXIT_GROUP;
70dd455c 2497 *error_message = strdup("Failed to determine group credentials");
4d885bd3
DH
2498 return r;
2499 }
cdc5d5c5 2500 }
29206d46 2501
cdc5d5c5
DH
2502 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2503 r = get_supplementary_groups(context, username, groupname, gid,
2504 &supplementary_gids, &ngids);
2505 if (r < 0) {
2506 *exit_status = EXIT_GROUP;
70dd455c 2507 *error_message = strdup("Failed to determine supplementary groups");
cdc5d5c5 2508 return r;
29206d46 2509 }
5bc7452b 2510
00d9ef85
LP
2511 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2512 if (r < 0) {
2513 *exit_status = EXIT_USER;
70dd455c 2514 *error_message = strdup("Failed to send user credentials to PID1");
00d9ef85
LP
2515 return r;
2516 }
2517
2518 user_lookup_fd = safe_close(user_lookup_fd);
2519
6732edab
LP
2520 r = acquire_home(context, uid, &home, &home_buffer);
2521 if (r < 0) {
2522 *exit_status = EXIT_CHDIR;
2523 *error_message = strdup("Failed to determine $HOME for user");
2524 return r;
2525 }
2526
d35fbf6b
DM
2527 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2528 * must sure to drop O_NONBLOCK */
2529 if (socket_fd >= 0)
a34ceba6 2530 (void) fd_nonblock(socket_fd, false);
acbb0225 2531
52c239d7 2532 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
2533 if (r < 0) {
2534 *exit_status = EXIT_STDIN;
70dd455c 2535 *error_message = strdup("Failed to set up stdin");
ff0af2a1 2536 return r;
d35fbf6b 2537 }
034c6ed7 2538
52c239d7 2539 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2540 if (r < 0) {
2541 *exit_status = EXIT_STDOUT;
70dd455c 2542 *error_message = strdup("Failed to set up stdout");
ff0af2a1 2543 return r;
d35fbf6b
DM
2544 }
2545
52c239d7 2546 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2547 if (r < 0) {
2548 *exit_status = EXIT_STDERR;
70dd455c 2549 *error_message = strdup("Failed to set up stderr");
ff0af2a1 2550 return r;
d35fbf6b
DM
2551 }
2552
2553 if (params->cgroup_path) {
ff0af2a1
LP
2554 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2555 if (r < 0) {
2556 *exit_status = EXIT_CGROUP;
70dd455c 2557 (void) asprintf(error_message, "Failed to attach to cgroup %s", params->cgroup_path);
ff0af2a1 2558 return r;
309bff19 2559 }
d35fbf6b 2560 }
309bff19 2561
d35fbf6b 2562 if (context->oom_score_adjust_set) {
d5243d62 2563 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
f2b68789 2564
d5243d62
LP
2565 /* When we can't make this change due to EPERM, then
2566 * let's silently skip over it. User namespaces
2567 * prohibit write access to this file, and we
2568 * shouldn't trip up over that. */
613b411c 2569
d5243d62 2570 sprintf(t, "%i", context->oom_score_adjust);
ad118bda 2571 r = write_string_file("/proc/self/oom_score_adj", t, 0);
6cb7fa17 2572 if (r == -EPERM || r == -EACCES) {
ff0af2a1 2573 log_open();
f2341e0a 2574 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
ff0af2a1
LP
2575 log_close();
2576 } else if (r < 0) {
2577 *exit_status = EXIT_OOM_ADJUST;
70dd455c 2578 *error_message = strdup("Failed to write /proc/self/oom_score_adj");
d35fbf6b 2579 return -errno;
613b411c 2580 }
d35fbf6b
DM
2581 }
2582
2583 if (context->nice_set)
2584 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 2585 *exit_status = EXIT_NICE;
d35fbf6b 2586 return -errno;
613b411c
LP
2587 }
2588
d35fbf6b
DM
2589 if (context->cpu_sched_set) {
2590 struct sched_param param = {
2591 .sched_priority = context->cpu_sched_priority,
2592 };
2593
ff0af2a1
LP
2594 r = sched_setscheduler(0,
2595 context->cpu_sched_policy |
2596 (context->cpu_sched_reset_on_fork ?
2597 SCHED_RESET_ON_FORK : 0),
2598 &param);
2599 if (r < 0) {
2600 *exit_status = EXIT_SETSCHEDULER;
d35fbf6b 2601 return -errno;
fc9b2a84 2602 }
d35fbf6b 2603 }
fc9b2a84 2604
d35fbf6b
DM
2605 if (context->cpuset)
2606 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
ff0af2a1 2607 *exit_status = EXIT_CPUAFFINITY;
d35fbf6b 2608 return -errno;
034c6ed7
LP
2609 }
2610
d35fbf6b
DM
2611 if (context->ioprio_set)
2612 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 2613 *exit_status = EXIT_IOPRIO;
d35fbf6b
DM
2614 return -errno;
2615 }
da726a4d 2616
d35fbf6b
DM
2617 if (context->timer_slack_nsec != NSEC_INFINITY)
2618 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 2619 *exit_status = EXIT_TIMERSLACK;
d35fbf6b 2620 return -errno;
4c2630eb 2621 }
9eba9da4 2622
21022b9d
LP
2623 if (context->personality != PERSONALITY_INVALID) {
2624 r = safe_personality(context->personality);
2625 if (r < 0) {
ff0af2a1 2626 *exit_status = EXIT_PERSONALITY;
21022b9d 2627 return r;
4c2630eb 2628 }
21022b9d 2629 }
94f04347 2630
d35fbf6b 2631 if (context->utmp_id)
df0ff127 2632 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 2633 context->tty_path,
023a4f67
LP
2634 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
2635 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
2636 USER_PROCESS,
6a93917d 2637 username);
d35fbf6b 2638
e0d2adfd 2639 if (context->user) {
ff0af2a1
LP
2640 r = chown_terminal(STDIN_FILENO, uid);
2641 if (r < 0) {
2642 *exit_status = EXIT_STDIN;
2643 return r;
071830ff 2644 }
d35fbf6b 2645 }
8e274523 2646
a931ad47
LP
2647 /* If delegation is enabled we'll pass ownership of the cgroup
2648 * (but only in systemd's own controller hierarchy!) to the
2649 * user of the new process. */
584b8688 2650 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
ff0af2a1
LP
2651 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
2652 if (r < 0) {
2653 *exit_status = EXIT_CGROUP;
2654 return r;
d35fbf6b 2655 }
034c6ed7 2656
034c6ed7 2657
ff0af2a1
LP
2658 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
2659 if (r < 0) {
2660 *exit_status = EXIT_CGROUP;
2661 return r;
034c6ed7 2662 }
d35fbf6b 2663 }
034c6ed7 2664
3536f49e 2665 for (dt = 0; dt < _EXEC_DIRECTORY_MAX; dt++) {
8679efde 2666 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
3536f49e 2667 if (r < 0)
07689d5d 2668 return r;
d35fbf6b 2669 }
94f04347 2670
7bce046b 2671 r = build_environment(
fd63e712 2672 unit,
7bce046b
LP
2673 context,
2674 params,
2675 n_fds,
2676 home,
2677 username,
2678 shell,
2679 journal_stream_dev,
2680 journal_stream_ino,
2681 &our_env);
2065ca69
JW
2682 if (r < 0) {
2683 *exit_status = EXIT_MEMORY;
2684 return r;
2685 }
2686
2687 r = build_pass_environment(context, &pass_env);
2688 if (r < 0) {
2689 *exit_status = EXIT_MEMORY;
2690 return r;
2691 }
2692
2693 accum_env = strv_env_merge(5,
2694 params->environment,
2695 our_env,
2696 pass_env,
2697 context->environment,
2698 files_env,
2699 NULL);
2700 if (!accum_env) {
2701 *exit_status = EXIT_MEMORY;
2702 return -ENOMEM;
2703 }
1280503b 2704 accum_env = strv_env_clean(accum_env);
2065ca69 2705
096424d1 2706 (void) umask(context->umask);
b213e1c1 2707
74dd6b51
LP
2708 r = setup_keyring(unit, params, uid, gid);
2709 if (r < 0) {
2710 *exit_status = EXIT_KEYRING;
2711 return r;
2712 }
2713
165a31c0 2714 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 2715 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 2716
165a31c0
LP
2717 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
2718 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 2719
165a31c0
LP
2720 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
2721 if (needs_ambient_hack)
2722 needs_setuid = false;
2723 else
2724 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
2725
2726 if (needs_sandboxing) {
7f18ef0a
FK
2727 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
2728 * present. The actual MAC context application will happen later, as late as possible, to avoid
2729 * impacting our own code paths. */
2730
2731#ifdef HAVE_SELINUX
43b1f709 2732 use_selinux = mac_selinux_use();
7f18ef0a 2733#endif
7f18ef0a 2734#ifdef HAVE_SMACK
43b1f709 2735 use_smack = mac_smack_use();
7f18ef0a 2736#endif
7f18ef0a 2737#ifdef HAVE_APPARMOR
43b1f709 2738 use_apparmor = mac_apparmor_use();
7f18ef0a 2739#endif
165a31c0 2740 }
7f18ef0a 2741
165a31c0
LP
2742 if (needs_setuid) {
2743 if (context->pam_name && username) {
2744 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
2745 if (r < 0) {
2746 *exit_status = EXIT_PAM;
2747 return r;
2748 }
2749 }
b213e1c1 2750 }
ac45f971 2751
d35fbf6b 2752 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
ff0af2a1
LP
2753 r = setup_netns(runtime->netns_storage_socket);
2754 if (r < 0) {
2755 *exit_status = EXIT_NETWORK;
2756 return r;
d35fbf6b
DM
2757 }
2758 }
169c1bda 2759
ee818b89 2760 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 2761 if (needs_mount_namespace) {
6818c54c 2762 r = apply_mount_namespace(unit, command, context, params, runtime);
3fbe8dbe
LP
2763 if (r < 0) {
2764 *exit_status = EXIT_NAMESPACE;
2765 return r;
2766 }
d35fbf6b 2767 }
81a2b7ce 2768
50b3dfb9 2769 /* Apply just after mount namespace setup */
376fecf6
LP
2770 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
2771 if (r < 0)
50b3dfb9 2772 return r;
50b3dfb9 2773
bbeea271 2774 /* Drop groups as early as possbile */
165a31c0 2775 if (needs_setuid) {
4d885bd3 2776 r = enforce_groups(context, gid, supplementary_gids, ngids);
096424d1
LP
2777 if (r < 0) {
2778 *exit_status = EXIT_GROUP;
2779 return r;
2780 }
165a31c0 2781 }
096424d1 2782
165a31c0 2783 if (needs_sandboxing) {
9008e1ac 2784#ifdef HAVE_SELINUX
43b1f709 2785 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
2786 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
2787 if (r < 0) {
2788 *exit_status = EXIT_SELINUX_CONTEXT;
2789 return r;
2790 }
9008e1ac 2791 }
9008e1ac
MS
2792#endif
2793
937ccce9
LP
2794 if (context->private_users) {
2795 r = setup_private_users(uid, gid);
2796 if (r < 0) {
2797 *exit_status = EXIT_USER;
2798 return r;
2799 }
d251207d
LP
2800 }
2801 }
2802
165a31c0
LP
2803 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
2804 * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
2805 * was needed to upload the policy and can now be closed as well. */
ff0af2a1
LP
2806 r = close_all_fds(fds, n_fds);
2807 if (r >= 0)
2808 r = shift_fds(fds, n_fds);
2809 if (r >= 0)
4c47affc 2810 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
ff0af2a1
LP
2811 if (r < 0) {
2812 *exit_status = EXIT_FDS;
2813 return r;
d35fbf6b 2814 }
e66cf1a3 2815
165a31c0 2816 secure_bits = context->secure_bits;
e66cf1a3 2817
165a31c0
LP
2818 if (needs_sandboxing) {
2819 uint64_t bset;
755d4b67 2820
d35fbf6b 2821 for (i = 0; i < _RLIMIT_MAX; i++) {
03857c43 2822
d35fbf6b
DM
2823 if (!context->rlimit[i])
2824 continue;
2825
03857c43
LP
2826 r = setrlimit_closest(i, context->rlimit[i]);
2827 if (r < 0) {
ff0af2a1 2828 *exit_status = EXIT_LIMITS;
03857c43 2829 return r;
e66cf1a3
LP
2830 }
2831 }
2832
f4170c67
LP
2833 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
2834 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
2835 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
2836 *exit_status = EXIT_LIMITS;
2837 return -errno;
2838 }
2839 }
2840
165a31c0
LP
2841 bset = context->capability_bounding_set;
2842 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
2843 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
2844 * instead of us doing that */
2845 if (needs_ambient_hack)
2846 bset |= (UINT64_C(1) << CAP_SETPCAP) |
2847 (UINT64_C(1) << CAP_SETUID) |
2848 (UINT64_C(1) << CAP_SETGID);
2849
2850 if (!cap_test_all(bset)) {
2851 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
2852 if (r < 0) {
2853 *exit_status = EXIT_CAPABILITIES;
70dd455c 2854 *error_message = strdup("Failed to drop capabilities");
ff0af2a1 2855 return r;
3b8bddde 2856 }
4c2630eb 2857 }
3b8bddde 2858
755d4b67
IP
2859 /* This is done before enforce_user, but ambient set
2860 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
2861 if (!needs_ambient_hack &&
2862 context->capability_ambient_set != 0) {
755d4b67
IP
2863 r = capability_ambient_set_apply(context->capability_ambient_set, true);
2864 if (r < 0) {
2865 *exit_status = EXIT_CAPABILITIES;
70dd455c 2866 *error_message = strdup("Failed to apply ambient capabilities (before UID change)");
755d4b67
IP
2867 return r;
2868 }
755d4b67 2869 }
165a31c0 2870 }
755d4b67 2871
165a31c0 2872 if (needs_setuid) {
d35fbf6b 2873 if (context->user) {
ff0af2a1
LP
2874 r = enforce_user(context, uid);
2875 if (r < 0) {
2876 *exit_status = EXIT_USER;
70dd455c 2877 (void) asprintf(error_message, "Failed to change UID to "UID_FMT, uid);
ff0af2a1 2878 return r;
5b6319dc 2879 }
165a31c0
LP
2880
2881 if (!needs_ambient_hack &&
2882 context->capability_ambient_set != 0) {
755d4b67
IP
2883
2884 /* Fix the ambient capabilities after user change. */
2885 r = capability_ambient_set_apply(context->capability_ambient_set, false);
2886 if (r < 0) {
2887 *exit_status = EXIT_CAPABILITIES;
70dd455c 2888 *error_message = strdup("Failed to apply ambient capabilities (after UID change)");
755d4b67
IP
2889 return r;
2890 }
2891
2892 /* If we were asked to change user and ambient capabilities
2893 * were requested, we had to add keep-caps to the securebits
2894 * so that we would maintain the inherited capability set
2895 * through the setresuid(). Make sure that the bit is added
2896 * also to the context secure_bits so that we don't try to
2897 * drop the bit away next. */
2898
7f508f2c 2899 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 2900 }
5b6319dc 2901 }
165a31c0 2902 }
d35fbf6b 2903
165a31c0 2904 if (needs_sandboxing) {
5cd9cd35
LP
2905 /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
2906 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
2907 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
2908 * are restricted. */
2909
2910#ifdef HAVE_SELINUX
43b1f709 2911 if (use_selinux) {
5cd9cd35
LP
2912 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
2913
2914 if (exec_context) {
2915 r = setexeccon(exec_context);
2916 if (r < 0) {
2917 *exit_status = EXIT_SELINUX_CONTEXT;
70dd455c 2918 (void) asprintf(error_message, "Failed to set SELinux context to %s", exec_context);
5cd9cd35
LP
2919 return r;
2920 }
2921 }
2922 }
2923#endif
2924
7f18ef0a 2925#ifdef HAVE_SMACK
43b1f709 2926 if (use_smack) {
7f18ef0a
FK
2927 r = setup_smack(context, command);
2928 if (r < 0) {
2929 *exit_status = EXIT_SMACK_PROCESS_LABEL;
2930 *error_message = strdup("Failed to set SMACK process label");
2931 return r;
2932 }
5cd9cd35 2933 }
7f18ef0a 2934#endif
5cd9cd35
LP
2935
2936#ifdef HAVE_APPARMOR
43b1f709 2937 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
2938 r = aa_change_onexec(context->apparmor_profile);
2939 if (r < 0 && !context->apparmor_profile_ignore) {
2940 *exit_status = EXIT_APPARMOR_PROFILE;
70dd455c
ZJS
2941 (void) asprintf(error_message,
2942 "Failed to prepare AppArmor profile change to %s",
2943 context->apparmor_profile);
5cd9cd35
LP
2944 return -errno;
2945 }
2946 }
2947#endif
2948
165a31c0
LP
2949 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
2950 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
2951 if (prctl(PR_GET_SECUREBITS) != secure_bits)
2952 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 2953 *exit_status = EXIT_SECUREBITS;
70dd455c 2954 *error_message = strdup("Failed to set secure bits");
d35fbf6b 2955 return -errno;
ff01d048 2956 }
5b6319dc 2957
59eeb84b 2958 if (context_has_no_new_privileges(context))
d35fbf6b 2959 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 2960 *exit_status = EXIT_NO_NEW_PRIVILEGES;
70dd455c 2961 *error_message = strdup("Failed to disable new privileges");
d35fbf6b
DM
2962 return -errno;
2963 }
2964
2965#ifdef HAVE_SECCOMP
469830d1
LP
2966 r = apply_address_families(unit, context);
2967 if (r < 0) {
2968 *exit_status = EXIT_ADDRESS_FAMILIES;
5b3637b4 2969 *error_message = strdup("Failed to restrict address families");
469830d1 2970 return r;
4c2630eb 2971 }
04aa0cb9 2972
469830d1
LP
2973 r = apply_memory_deny_write_execute(unit, context);
2974 if (r < 0) {
2975 *exit_status = EXIT_SECCOMP;
5b3637b4 2976 *error_message = strdup("Failed to disable writing to executable memory");
469830d1 2977 return r;
f3e43635 2978 }
f4170c67 2979
469830d1
LP
2980 r = apply_restrict_realtime(unit, context);
2981 if (r < 0) {
2982 *exit_status = EXIT_SECCOMP;
5b3637b4 2983 *error_message = strdup("Failed to apply realtime restrictions");
469830d1 2984 return r;
f4170c67
LP
2985 }
2986
add00535
LP
2987 r = apply_restrict_namespaces(unit, context);
2988 if (r < 0) {
2989 *exit_status = EXIT_SECCOMP;
70dd455c 2990 *error_message = strdup("Failed to apply namespace restrictions");
add00535
LP
2991 return r;
2992 }
2993
469830d1
LP
2994 r = apply_protect_sysctl(unit, context);
2995 if (r < 0) {
2996 *exit_status = EXIT_SECCOMP;
5b3637b4 2997 *error_message = strdup("Failed to apply sysctl restrictions");
469830d1 2998 return r;
502d704e
DH
2999 }
3000
469830d1
LP
3001 r = apply_protect_kernel_modules(unit, context);
3002 if (r < 0) {
3003 *exit_status = EXIT_SECCOMP;
5b3637b4 3004 *error_message = strdup("Failed to apply module loading restrictions");
469830d1 3005 return r;
59eeb84b
LP
3006 }
3007
469830d1
LP
3008 r = apply_private_devices(unit, context);
3009 if (r < 0) {
3010 *exit_status = EXIT_SECCOMP;
5b3637b4 3011 *error_message = strdup("Failed to set up private devices");
469830d1
LP
3012 return r;
3013 }
3014
3015 r = apply_syscall_archs(unit, context);
3016 if (r < 0) {
3017 *exit_status = EXIT_SECCOMP;
5b3637b4 3018 *error_message = strdup("Failed to apply syscall architecture restrictions");
469830d1 3019 return r;
ba128bb8
LP
3020 }
3021
78e864e5
TM
3022 r = apply_lock_personality(unit, context);
3023 if (r < 0) {
3024 *exit_status = EXIT_SECCOMP;
3025 *error_message = strdup("Failed to lock personalities");
3026 return r;
3027 }
3028
5cd9cd35
LP
3029 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3030 * by the filter as little as possible. */
165a31c0 3031 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3032 if (r < 0) {
3033 *exit_status = EXIT_SECCOMP;
5b3637b4 3034 *error_message = strdup("Failed to apply syscall filters");
469830d1 3035 return r;
d35fbf6b
DM
3036 }
3037#endif
d35fbf6b 3038 }
034c6ed7 3039
00819cc1
LP
3040 if (!strv_isempty(context->unset_environment)) {
3041 char **ee = NULL;
3042
3043 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3044 if (!ee) {
3045 *exit_status = EXIT_MEMORY;
3046 return -ENOMEM;
3047 }
3048
3049 strv_free(accum_env);
3050 accum_env = ee;
3051 }
3052
2065ca69 3053 final_argv = replace_env_argv(argv, accum_env);
d35fbf6b 3054 if (!final_argv) {
ff0af2a1 3055 *exit_status = EXIT_MEMORY;
70dd455c 3056 *error_message = strdup("Failed to prepare process arguments");
d35fbf6b
DM
3057 return -ENOMEM;
3058 }
034c6ed7 3059
553d2243 3060 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
d35fbf6b 3061 _cleanup_free_ char *line;
81a2b7ce 3062
d35fbf6b
DM
3063 line = exec_command_line(final_argv);
3064 if (line) {
3065 log_open();
f2341e0a 3066 log_struct(LOG_DEBUG,
f2341e0a
LP
3067 "EXECUTABLE=%s", command->path,
3068 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3069 LOG_UNIT_ID(unit),
f2341e0a 3070 NULL);
d35fbf6b
DM
3071 log_close();
3072 }
3073 }
dd305ec9 3074
2065ca69 3075 execve(command->path, final_argv, accum_env);
ff0af2a1 3076 *exit_status = EXIT_EXEC;
d35fbf6b
DM
3077 return -errno;
3078}
81a2b7ce 3079
f2341e0a
LP
3080int exec_spawn(Unit *unit,
3081 ExecCommand *command,
d35fbf6b
DM
3082 const ExecContext *context,
3083 const ExecParameters *params,
3084 ExecRuntime *runtime,
29206d46 3085 DynamicCreds *dcreds,
d35fbf6b 3086 pid_t *ret) {
8351ceae 3087
d35fbf6b 3088 _cleanup_strv_free_ char **files_env = NULL;
9b141911 3089 int *fds = NULL;
4c47affc 3090 unsigned n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1
LP
3091 _cleanup_free_ char *line = NULL;
3092 int socket_fd, r;
52c239d7 3093 int named_iofds[3] = { -1, -1, -1 };
ff0af2a1 3094 char **argv;
d35fbf6b 3095 pid_t pid;
8351ceae 3096
f2341e0a 3097 assert(unit);
d35fbf6b
DM
3098 assert(command);
3099 assert(context);
3100 assert(ret);
3101 assert(params);
4c47affc 3102 assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
4298d0b5 3103
d35fbf6b
DM
3104 if (context->std_input == EXEC_INPUT_SOCKET ||
3105 context->std_output == EXEC_OUTPUT_SOCKET ||
3106 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3107
4c47affc 3108 if (params->n_socket_fds > 1) {
f2341e0a 3109 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3110 return -EINVAL;
ff0af2a1 3111 }
eef65bf3 3112
4c47affc 3113 if (params->n_socket_fds == 0) {
488ab41c
AA
3114 log_unit_error(unit, "Got no socket.");
3115 return -EINVAL;
3116 }
3117
d35fbf6b
DM
3118 socket_fd = params->fds[0];
3119 } else {
3120 socket_fd = -1;
3121 fds = params->fds;
4c47affc 3122 n_storage_fds = params->n_storage_fds;
9b141911 3123 n_socket_fds = params->n_socket_fds;
d35fbf6b 3124 }
94f04347 3125
52c239d7
LB
3126 r = exec_context_named_iofds(unit, context, params, named_iofds);
3127 if (r < 0)
3128 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3129
f2341e0a 3130 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3131 if (r < 0)
f2341e0a 3132 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3133
d35fbf6b 3134 argv = params->argv ?: command->argv;
d35fbf6b
DM
3135 line = exec_command_line(argv);
3136 if (!line)
3137 return log_oom();
fab56fc5 3138
f2341e0a 3139 log_struct(LOG_DEBUG,
f2341e0a
LP
3140 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3141 "EXECUTABLE=%s", command->path,
ba360bb0 3142 LOG_UNIT_ID(unit),
f2341e0a 3143 NULL);
d35fbf6b
DM
3144 pid = fork();
3145 if (pid < 0)
74129a12 3146 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3147
3148 if (pid == 0) {
ff0af2a1 3149 int exit_status;
70dd455c 3150 _cleanup_free_ char *error_message = NULL;
ff0af2a1 3151
f2341e0a
LP
3152 r = exec_child(unit,
3153 command,
ff0af2a1
LP
3154 context,
3155 params,
3156 runtime,
29206d46 3157 dcreds,
ff0af2a1
LP
3158 argv,
3159 socket_fd,
52c239d7 3160 named_iofds,
4c47affc
FB
3161 fds,
3162 n_storage_fds,
9b141911 3163 n_socket_fds,
ff0af2a1 3164 files_env,
00d9ef85 3165 unit->manager->user_lookup_fds[1],
70dd455c
ZJS
3166 &exit_status,
3167 &error_message);
ff0af2a1 3168 if (r < 0) {
4c2630eb 3169 log_open();
70dd455c
ZJS
3170 if (error_message)
3171 log_struct_errno(LOG_ERR, r,
2b044526 3172 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
70dd455c
ZJS
3173 LOG_UNIT_ID(unit),
3174 LOG_UNIT_MESSAGE(unit, "%s: %m",
3175 error_message),
3176 "EXECUTABLE=%s", command->path,
3177 NULL);
3ed0cd26 3178 else if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE))
4d8b0f0f
YW
3179 log_struct_errno(LOG_INFO, r,
3180 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3181 LOG_UNIT_ID(unit),
3182 LOG_UNIT_MESSAGE(unit, "Skipped spawning %s: %m",
3183 command->path),
3184 "EXECUTABLE=%s", command->path,
3185 NULL);
70dd455c
ZJS
3186 else
3187 log_struct_errno(LOG_ERR, r,
2b044526 3188 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
70dd455c
ZJS
3189 LOG_UNIT_ID(unit),
3190 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3191 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3192 command->path),
3193 "EXECUTABLE=%s", command->path,
3194 NULL);
4c2630eb
MS
3195 }
3196
ff0af2a1 3197 _exit(exit_status);
034c6ed7
LP
3198 }
3199
f2341e0a 3200 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3201
80876c20
LP
3202 /* We add the new process to the cgroup both in the child (so
3203 * that we can be sure that no user code is ever executed
3204 * outside of the cgroup) and in the parent (so that we can be
3205 * sure that when we kill the cgroup the process will be
3206 * killed too). */
d35fbf6b 3207 if (params->cgroup_path)
dd305ec9 3208 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
2da3263a 3209
b58b4116 3210 exec_status_start(&command->exec_status, pid);
9fb86720 3211
034c6ed7 3212 *ret = pid;
5cb5a6ff
LP
3213 return 0;
3214}
3215
034c6ed7 3216void exec_context_init(ExecContext *c) {
3536f49e
YW
3217 ExecDirectoryType i;
3218
034c6ed7
LP
3219 assert(c);
3220
4c12626c 3221 c->umask = 0022;
9eba9da4 3222 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3223 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3224 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3225 c->syslog_level_prefix = true;
353e12c2 3226 c->ignore_sigpipe = true;
3a43da28 3227 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3228 c->personality = PERSONALITY_INVALID;
3536f49e
YW
3229 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
3230 c->directories[i].mode = 0755;
a103496c 3231 c->capability_bounding_set = CAP_ALL;
add00535 3232 c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
034c6ed7
LP
3233}
3234
613b411c 3235void exec_context_done(ExecContext *c) {
5cb5a6ff 3236 unsigned l;
3536f49e 3237 ExecDirectoryType i;
5cb5a6ff
LP
3238
3239 assert(c);
3240
6796073e
LP
3241 c->environment = strv_free(c->environment);
3242 c->environment_files = strv_free(c->environment_files);
b4c14404 3243 c->pass_environment = strv_free(c->pass_environment);
00819cc1 3244 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 3245
1f6b4113 3246 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
a1e58e8e 3247 c->rlimit[l] = mfree(c->rlimit[l]);
034c6ed7 3248
52c239d7
LB
3249 for (l = 0; l < 3; l++)
3250 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
3251
a1e58e8e
LP
3252 c->working_directory = mfree(c->working_directory);
3253 c->root_directory = mfree(c->root_directory);
915e6d16 3254 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3255 c->tty_path = mfree(c->tty_path);
3256 c->syslog_identifier = mfree(c->syslog_identifier);
3257 c->user = mfree(c->user);
3258 c->group = mfree(c->group);
034c6ed7 3259
6796073e 3260 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3261
a1e58e8e 3262 c->pam_name = mfree(c->pam_name);
5b6319dc 3263
2a624c36
AP
3264 c->read_only_paths = strv_free(c->read_only_paths);
3265 c->read_write_paths = strv_free(c->read_write_paths);
3266 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3267
d2d6c096
LP
3268 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
3269
82c121a4
LP
3270 if (c->cpuset)
3271 CPU_FREE(c->cpuset);
86a3475b 3272
a1e58e8e
LP
3273 c->utmp_id = mfree(c->utmp_id);
3274 c->selinux_context = mfree(c->selinux_context);
3275 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3276 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3277
525d3cc7
LP
3278 c->syscall_filter = set_free(c->syscall_filter);
3279 c->syscall_archs = set_free(c->syscall_archs);
3280 c->address_families = set_free(c->address_families);
e66cf1a3 3281
3536f49e
YW
3282 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
3283 c->directories[i].paths = strv_free(c->directories[i].paths);
e66cf1a3
LP
3284}
3285
3286int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
3287 char **i;
3288
3289 assert(c);
3290
3291 if (!runtime_prefix)
3292 return 0;
3293
3536f49e 3294 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
3295 _cleanup_free_ char *p;
3296
605405c6 3297 p = strjoin(runtime_prefix, "/", *i);
e66cf1a3
LP
3298 if (!p)
3299 return -ENOMEM;
3300
3301 /* We execute this synchronously, since we need to be
3302 * sure this is gone when we start the service
3303 * next. */
c6878637 3304 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
3305 }
3306
3307 return 0;
5cb5a6ff
LP
3308}
3309
43d0fcbd
LP
3310void exec_command_done(ExecCommand *c) {
3311 assert(c);
3312
a1e58e8e 3313 c->path = mfree(c->path);
43d0fcbd 3314
6796073e 3315 c->argv = strv_free(c->argv);
43d0fcbd
LP
3316}
3317
3318void exec_command_done_array(ExecCommand *c, unsigned n) {
3319 unsigned i;
3320
3321 for (i = 0; i < n; i++)
3322 exec_command_done(c+i);
3323}
3324
f1acf85a 3325ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
3326 ExecCommand *i;
3327
3328 while ((i = c)) {
71fda00f 3329 LIST_REMOVE(command, c, i);
43d0fcbd 3330 exec_command_done(i);
5cb5a6ff
LP
3331 free(i);
3332 }
f1acf85a
ZJS
3333
3334 return NULL;
5cb5a6ff
LP
3335}
3336
034c6ed7
LP
3337void exec_command_free_array(ExecCommand **c, unsigned n) {
3338 unsigned i;
3339
f1acf85a
ZJS
3340 for (i = 0; i < n; i++)
3341 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
3342}
3343
039f0e70 3344typedef struct InvalidEnvInfo {
f2341e0a 3345 Unit *unit;
039f0e70
LP
3346 const char *path;
3347} InvalidEnvInfo;
3348
3349static void invalid_env(const char *p, void *userdata) {
3350 InvalidEnvInfo *info = userdata;
3351
f2341e0a 3352 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
3353}
3354
52c239d7
LB
3355const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3356 assert(c);
3357
3358 switch (fd_index) {
3359 case STDIN_FILENO:
3360 if (c->std_input != EXEC_INPUT_NAMED_FD)
3361 return NULL;
3362 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
3363 case STDOUT_FILENO:
3364 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3365 return NULL;
3366 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
3367 case STDERR_FILENO:
3368 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3369 return NULL;
3370 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
3371 default:
3372 return NULL;
3373 }
3374}
3375
3376int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
3377 unsigned i, targets;
56fbd561 3378 const char* stdio_fdname[3];
4c47affc 3379 unsigned n_fds;
52c239d7
LB
3380
3381 assert(c);
3382 assert(p);
3383
3384 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3385 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3386 (c->std_error == EXEC_OUTPUT_NAMED_FD);
3387
3388 for (i = 0; i < 3; i++)
3389 stdio_fdname[i] = exec_context_fdname(c, i);
3390
4c47affc
FB
3391 n_fds = p->n_storage_fds + p->n_socket_fds;
3392
3393 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
3394 if (named_iofds[STDIN_FILENO] < 0 &&
3395 c->std_input == EXEC_INPUT_NAMED_FD &&
3396 stdio_fdname[STDIN_FILENO] &&
3397 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3398
52c239d7
LB
3399 named_iofds[STDIN_FILENO] = p->fds[i];
3400 targets--;
56fbd561
ZJS
3401
3402 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3403 c->std_output == EXEC_OUTPUT_NAMED_FD &&
3404 stdio_fdname[STDOUT_FILENO] &&
3405 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3406
52c239d7
LB
3407 named_iofds[STDOUT_FILENO] = p->fds[i];
3408 targets--;
56fbd561
ZJS
3409
3410 } else if (named_iofds[STDERR_FILENO] < 0 &&
3411 c->std_error == EXEC_OUTPUT_NAMED_FD &&
3412 stdio_fdname[STDERR_FILENO] &&
3413 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3414
52c239d7
LB
3415 named_iofds[STDERR_FILENO] = p->fds[i];
3416 targets--;
3417 }
3418
56fbd561 3419 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
3420}
3421
f2341e0a 3422int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
3423 char **i, **r = NULL;
3424
3425 assert(c);
3426 assert(l);
3427
3428 STRV_FOREACH(i, c->environment_files) {
3429 char *fn;
52511fae
ZJS
3430 int k;
3431 unsigned n;
8c7be95e
LP
3432 bool ignore = false;
3433 char **p;
7fd1b19b 3434 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
3435
3436 fn = *i;
3437
3438 if (fn[0] == '-') {
3439 ignore = true;
313cefa1 3440 fn++;
8c7be95e
LP
3441 }
3442
3443 if (!path_is_absolute(fn)) {
8c7be95e
LP
3444 if (ignore)
3445 continue;
3446
3447 strv_free(r);
3448 return -EINVAL;
3449 }
3450
2bef10ab 3451 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
3452 k = safe_glob(fn, 0, &pglob);
3453 if (k < 0) {
2bef10ab
PL
3454 if (ignore)
3455 continue;
8c7be95e 3456
2bef10ab 3457 strv_free(r);
d8c92e8b 3458 return k;
2bef10ab 3459 }
8c7be95e 3460
d8c92e8b
ZJS
3461 /* When we don't match anything, -ENOENT should be returned */
3462 assert(pglob.gl_pathc > 0);
3463
3464 for (n = 0; n < pglob.gl_pathc; n++) {
717603e3 3465 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2bef10ab
PL
3466 if (k < 0) {
3467 if (ignore)
3468 continue;
8c7be95e 3469
2bef10ab 3470 strv_free(r);
2bef10ab 3471 return k;
e9c1ea9d 3472 }
ebc05a09 3473 /* Log invalid environment variables with filename */
039f0e70
LP
3474 if (p) {
3475 InvalidEnvInfo info = {
f2341e0a 3476 .unit = unit,
039f0e70
LP
3477 .path = pglob.gl_pathv[n]
3478 };
3479
3480 p = strv_env_clean_with_callback(p, invalid_env, &info);
3481 }
8c7be95e 3482
2bef10ab
PL
3483 if (r == NULL)
3484 r = p;
3485 else {
3486 char **m;
8c7be95e 3487
2bef10ab
PL
3488 m = strv_env_merge(2, r, p);
3489 strv_free(r);
3490 strv_free(p);
c84a9488 3491 if (!m)
2bef10ab 3492 return -ENOMEM;
2bef10ab
PL
3493
3494 r = m;
3495 }
8c7be95e
LP
3496 }
3497 }
3498
3499 *l = r;
3500
3501 return 0;
3502}
3503
6ac8fdc9 3504static bool tty_may_match_dev_console(const char *tty) {
e1d75803 3505 _cleanup_free_ char *active = NULL;
7d6884b6 3506 char *console;
6ac8fdc9 3507
1e22b5cd
LP
3508 if (!tty)
3509 return true;
3510
a119ec7c 3511 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
3512
3513 /* trivial identity? */
3514 if (streq(tty, "console"))
3515 return true;
3516
3517 console = resolve_dev_console(&active);
3518 /* if we could not resolve, assume it may */
3519 if (!console)
3520 return true;
3521
3522 /* "tty0" means the active VC, so it may be the same sometimes */
e1d75803 3523 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
3524}
3525
3526bool exec_context_may_touch_console(ExecContext *ec) {
1e22b5cd
LP
3527
3528 return (ec->tty_reset ||
3529 ec->tty_vhangup ||
3530 ec->tty_vt_disallocate ||
6ac8fdc9
MS
3531 is_terminal_input(ec->std_input) ||
3532 is_terminal_output(ec->std_output) ||
3533 is_terminal_output(ec->std_error)) &&
1e22b5cd 3534 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
3535}
3536
15ae422b
LP
3537static void strv_fprintf(FILE *f, char **l) {
3538 char **g;
3539
3540 assert(f);
3541
3542 STRV_FOREACH(g, l)
3543 fprintf(f, " %s", *g);
3544}
3545
5cb5a6ff 3546void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
c2bbd90b 3547 char **e, **d;
94f04347 3548 unsigned i;
3536f49e 3549 ExecDirectoryType dt;
add00535 3550 int r;
9eba9da4 3551
5cb5a6ff
LP
3552 assert(c);
3553 assert(f);
3554
4ad49000 3555 prefix = strempty(prefix);
5cb5a6ff
LP
3556
3557 fprintf(f,
94f04347
LP
3558 "%sUMask: %04o\n"
3559 "%sWorkingDirectory: %s\n"
451a074f 3560 "%sRootDirectory: %s\n"
15ae422b 3561 "%sNonBlocking: %s\n"
64747e2d 3562 "%sPrivateTmp: %s\n"
7f112f50 3563 "%sPrivateDevices: %s\n"
59eeb84b 3564 "%sProtectKernelTunables: %s\n"
e66a2f65 3565 "%sProtectKernelModules: %s\n"
59eeb84b 3566 "%sProtectControlGroups: %s\n"
d251207d
LP
3567 "%sPrivateNetwork: %s\n"
3568 "%sPrivateUsers: %s\n"
1b8689f9
LP
3569 "%sProtectHome: %s\n"
3570 "%sProtectSystem: %s\n"
5d997827 3571 "%sMountAPIVFS: %s\n"
f3e43635 3572 "%sIgnoreSIGPIPE: %s\n"
f4170c67
LP
3573 "%sMemoryDenyWriteExecute: %s\n"
3574 "%sRestrictRealtime: %s\n",
5cb5a6ff 3575 prefix, c->umask,
9eba9da4 3576 prefix, c->working_directory ? c->working_directory : "/",
451a074f 3577 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 3578 prefix, yes_no(c->non_blocking),
64747e2d 3579 prefix, yes_no(c->private_tmp),
7f112f50 3580 prefix, yes_no(c->private_devices),
59eeb84b 3581 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 3582 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 3583 prefix, yes_no(c->protect_control_groups),
d251207d
LP
3584 prefix, yes_no(c->private_network),
3585 prefix, yes_no(c->private_users),
1b8689f9
LP
3586 prefix, protect_home_to_string(c->protect_home),
3587 prefix, protect_system_to_string(c->protect_system),
5d997827 3588 prefix, yes_no(c->mount_apivfs),
f3e43635 3589 prefix, yes_no(c->ignore_sigpipe),
f4170c67
LP
3590 prefix, yes_no(c->memory_deny_write_execute),
3591 prefix, yes_no(c->restrict_realtime));
fb33a393 3592
915e6d16
LP
3593 if (c->root_image)
3594 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
3595
8c7be95e
LP
3596 STRV_FOREACH(e, c->environment)
3597 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3598
3599 STRV_FOREACH(e, c->environment_files)
3600 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 3601
b4c14404
FB
3602 STRV_FOREACH(e, c->pass_environment)
3603 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3604
00819cc1
LP
3605 STRV_FOREACH(e, c->unset_environment)
3606 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
3607
53f47dfc
YW
3608 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
3609
3536f49e
YW
3610 for (dt = 0; dt < _EXEC_DIRECTORY_MAX; dt++) {
3611 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
3612
3613 STRV_FOREACH(d, c->directories[dt].paths)
3614 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
3615 }
c2bbd90b 3616
fb33a393
LP
3617 if (c->nice_set)
3618 fprintf(f,
3619 "%sNice: %i\n",
3620 prefix, c->nice);
3621
dd6c17b1 3622 if (c->oom_score_adjust_set)
fb33a393 3623 fprintf(f,
dd6c17b1
LP
3624 "%sOOMScoreAdjust: %i\n",
3625 prefix, c->oom_score_adjust);
9eba9da4 3626
94f04347 3627 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d
EV
3628 if (c->rlimit[i]) {
3629 fprintf(f, "%s%s: " RLIM_FMT "\n",
3630 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
3631 fprintf(f, "%s%sSoft: " RLIM_FMT "\n",
3632 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3633 }
94f04347 3634
f8b69d1d 3635 if (c->ioprio_set) {
1756a011 3636 _cleanup_free_ char *class_str = NULL;
f8b69d1d 3637
837df140
YW
3638 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
3639 if (r >= 0)
3640 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
3641
3642 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 3643 }
94f04347 3644
f8b69d1d 3645 if (c->cpu_sched_set) {
1756a011 3646 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 3647
837df140
YW
3648 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
3649 if (r >= 0)
3650 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
3651
94f04347 3652 fprintf(f,
38b48754
LP
3653 "%sCPUSchedulingPriority: %i\n"
3654 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
3655 prefix, c->cpu_sched_priority,
3656 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 3657 }
94f04347 3658
82c121a4 3659 if (c->cpuset) {
94f04347 3660 fprintf(f, "%sCPUAffinity:", prefix);
82c121a4
LP
3661 for (i = 0; i < c->cpuset_ncpus; i++)
3662 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
43a99a7a 3663 fprintf(f, " %u", i);
94f04347
LP
3664 fputs("\n", f);
3665 }
3666
3a43da28 3667 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 3668 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
3669
3670 fprintf(f,
80876c20
LP
3671 "%sStandardInput: %s\n"
3672 "%sStandardOutput: %s\n"
3673 "%sStandardError: %s\n",
3674 prefix, exec_input_to_string(c->std_input),
3675 prefix, exec_output_to_string(c->std_output),
3676 prefix, exec_output_to_string(c->std_error));
3677
3678 if (c->tty_path)
3679 fprintf(f,
6ea832a2
LP
3680 "%sTTYPath: %s\n"
3681 "%sTTYReset: %s\n"
3682 "%sTTYVHangup: %s\n"
3683 "%sTTYVTDisallocate: %s\n",
3684 prefix, c->tty_path,
3685 prefix, yes_no(c->tty_reset),
3686 prefix, yes_no(c->tty_vhangup),
3687 prefix, yes_no(c->tty_vt_disallocate));
94f04347 3688
9f6444eb
LP
3689 if (IN_SET(c->std_output,
3690 EXEC_OUTPUT_SYSLOG,
3691 EXEC_OUTPUT_KMSG,
3692 EXEC_OUTPUT_JOURNAL,
3693 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3694 EXEC_OUTPUT_KMSG_AND_CONSOLE,
3695 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
3696 IN_SET(c->std_error,
3697 EXEC_OUTPUT_SYSLOG,
3698 EXEC_OUTPUT_KMSG,
3699 EXEC_OUTPUT_JOURNAL,
3700 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3701 EXEC_OUTPUT_KMSG_AND_CONSOLE,
3702 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 3703
5ce70e5b 3704 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 3705
837df140
YW
3706 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
3707 if (r >= 0)
3708 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 3709
837df140
YW
3710 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
3711 if (r >= 0)
3712 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 3713 }
94f04347 3714
07d46372
YW
3715 if (c->secure_bits) {
3716 _cleanup_free_ char *str = NULL;
3717
3718 r = secure_bits_to_string_alloc(c->secure_bits, &str);
3719 if (r >= 0)
3720 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
3721 }
94f04347 3722
a103496c 3723 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 3724 _cleanup_free_ char *str = NULL;
94f04347 3725
dd1f5bd0
YW
3726 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
3727 if (r >= 0)
3728 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
3729 }
3730
3731 if (c->capability_ambient_set != 0) {
dd1f5bd0 3732 _cleanup_free_ char *str = NULL;
755d4b67 3733
dd1f5bd0
YW
3734 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
3735 if (r >= 0)
3736 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
3737 }
3738
3739 if (c->user)
f2d3769a 3740 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 3741 if (c->group)
f2d3769a 3742 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 3743
29206d46
LP
3744 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
3745
15ae422b 3746 if (strv_length(c->supplementary_groups) > 0) {
94f04347 3747 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
3748 strv_fprintf(f, c->supplementary_groups);
3749 fputs("\n", f);
3750 }
94f04347 3751
5b6319dc 3752 if (c->pam_name)
f2d3769a 3753 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 3754
2a624c36
AP
3755 if (strv_length(c->read_write_paths) > 0) {
3756 fprintf(f, "%sReadWritePaths:", prefix);
3757 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
3758 fputs("\n", f);
3759 }
3760
2a624c36
AP
3761 if (strv_length(c->read_only_paths) > 0) {
3762 fprintf(f, "%sReadOnlyPaths:", prefix);
3763 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
3764 fputs("\n", f);
3765 }
94f04347 3766
2a624c36
AP
3767 if (strv_length(c->inaccessible_paths) > 0) {
3768 fprintf(f, "%sInaccessiblePaths:", prefix);
3769 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
3770 fputs("\n", f);
3771 }
2e22afe9 3772
d2d6c096
LP
3773 if (c->n_bind_mounts > 0)
3774 for (i = 0; i < c->n_bind_mounts; i++) {
3775 fprintf(f, "%s%s: %s:%s:%s\n", prefix,
3776 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
3777 c->bind_mounts[i].source,
3778 c->bind_mounts[i].destination,
3779 c->bind_mounts[i].recursive ? "rbind" : "norbind");
3780 }
3781
169c1bda
LP
3782 if (c->utmp_id)
3783 fprintf(f,
3784 "%sUtmpIdentifier: %s\n",
3785 prefix, c->utmp_id);
7b52a628
MS
3786
3787 if (c->selinux_context)
3788 fprintf(f,
5f8640fb
LP
3789 "%sSELinuxContext: %s%s\n",
3790 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 3791
80c21aea
WC
3792 if (c->apparmor_profile)
3793 fprintf(f,
3794 "%sAppArmorProfile: %s%s\n",
3795 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
3796
3797 if (c->smack_process_label)
3798 fprintf(f,
3799 "%sSmackProcessLabel: %s%s\n",
3800 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
3801
050f7277 3802 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
3803 fprintf(f,
3804 "%sPersonality: %s\n",
3805 prefix, strna(personality_to_string(c->personality)));
3806
78e864e5
TM
3807 fprintf(f,
3808 "%sLockPersonality: %s\n",
3809 prefix, yes_no(c->lock_personality));
3810
17df7223 3811 if (c->syscall_filter) {
351a19b1 3812#ifdef HAVE_SECCOMP
17df7223
LP
3813 Iterator j;
3814 void *id;
3815 bool first = true;
351a19b1 3816#endif
17df7223
LP
3817
3818 fprintf(f,
57183d11 3819 "%sSystemCallFilter: ",
17df7223
LP
3820 prefix);
3821
3822 if (!c->syscall_whitelist)
3823 fputc('~', f);
3824
351a19b1 3825#ifdef HAVE_SECCOMP
17df7223
LP
3826 SET_FOREACH(id, c->syscall_filter, j) {
3827 _cleanup_free_ char *name = NULL;
3828
3829 if (first)
3830 first = false;
3831 else
3832 fputc(' ', f);
3833
57183d11 3834 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223
LP
3835 fputs(strna(name), f);
3836 }
351a19b1 3837#endif
17df7223
LP
3838
3839 fputc('\n', f);
3840 }
3841
57183d11
LP
3842 if (c->syscall_archs) {
3843#ifdef HAVE_SECCOMP
3844 Iterator j;
3845 void *id;
3846#endif
3847
3848 fprintf(f,
3849 "%sSystemCallArchitectures:",
3850 prefix);
3851
3852#ifdef HAVE_SECCOMP
3853 SET_FOREACH(id, c->syscall_archs, j)
3854 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
3855#endif
3856 fputc('\n', f);
3857 }
3858
add00535
LP
3859 if (exec_context_restrict_namespaces_set(c)) {
3860 _cleanup_free_ char *s = NULL;
3861
3862 r = namespace_flag_to_string_many(c->restrict_namespaces, &s);
3863 if (r >= 0)
3864 fprintf(f, "%sRestrictNamespaces: %s\n",
3865 prefix, s);
3866 }
3867
b3267152 3868 if (c->syscall_errno > 0)
17df7223
LP
3869 fprintf(f,
3870 "%sSystemCallErrorNumber: %s\n",
3871 prefix, strna(errno_to_name(c->syscall_errno)));
eef65bf3
MS
3872
3873 if (c->apparmor_profile)
3874 fprintf(f,
3875 "%sAppArmorProfile: %s%s\n",
3876 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
5cb5a6ff
LP
3877}
3878
a931ad47
LP
3879bool exec_context_maintains_privileges(ExecContext *c) {
3880 assert(c);
3881
61233823 3882 /* Returns true if the process forked off would run under
a931ad47
LP
3883 * an unchanged UID or as root. */
3884
3885 if (!c->user)
3886 return true;
3887
3888 if (streq(c->user, "root") || streq(c->user, "0"))
3889 return true;
3890
3891 return false;
3892}
3893
7f452159
LP
3894int exec_context_get_effective_ioprio(ExecContext *c) {
3895 int p;
3896
3897 assert(c);
3898
3899 if (c->ioprio_set)
3900 return c->ioprio;
3901
3902 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
3903 if (p < 0)
3904 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
3905
3906 return p;
3907}
3908
b58b4116 3909void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 3910 assert(s);
5cb5a6ff 3911
b58b4116
LP
3912 zero(*s);
3913 s->pid = pid;
3914 dual_timestamp_get(&s->start_timestamp);
3915}
3916
6ea832a2 3917void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
3918 assert(s);
3919
0b1f4ae6 3920 if (s->pid && s->pid != pid)
b58b4116
LP
3921 zero(*s);
3922
034c6ed7 3923 s->pid = pid;
63983207 3924 dual_timestamp_get(&s->exit_timestamp);
9fb86720 3925
034c6ed7
LP
3926 s->code = code;
3927 s->status = status;
169c1bda 3928
6ea832a2
LP
3929 if (context) {
3930 if (context->utmp_id)
3931 utmp_put_dead_process(context->utmp_id, pid, code, status);
3932
1e22b5cd 3933 exec_context_tty_reset(context, NULL);
6ea832a2 3934 }
9fb86720
LP
3935}
3936
3937void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
3938 char buf[FORMAT_TIMESTAMP_MAX];
3939
3940 assert(s);
3941 assert(f);
3942
9fb86720
LP
3943 if (s->pid <= 0)
3944 return;
3945
4c940960
LP
3946 prefix = strempty(prefix);
3947
9fb86720 3948 fprintf(f,
ccd06097
ZJS
3949 "%sPID: "PID_FMT"\n",
3950 prefix, s->pid);
9fb86720 3951
af9d16e1 3952 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
3953 fprintf(f,
3954 "%sStart Timestamp: %s\n",
63983207 3955 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 3956
af9d16e1 3957 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
3958 fprintf(f,
3959 "%sExit Timestamp: %s\n"
3960 "%sExit Code: %s\n"
3961 "%sExit Status: %i\n",
63983207 3962 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
3963 prefix, sigchld_code_to_string(s->code),
3964 prefix, s->status);
5cb5a6ff 3965}
44d8db9e 3966
9e2f7c11 3967char *exec_command_line(char **argv) {
44d8db9e
LP
3968 size_t k;
3969 char *n, *p, **a;
3970 bool first = true;
3971
9e2f7c11 3972 assert(argv);
44d8db9e 3973
9164977d 3974 k = 1;
9e2f7c11 3975 STRV_FOREACH(a, argv)
44d8db9e
LP
3976 k += strlen(*a)+3;
3977
5cd9cd35
LP
3978 n = new(char, k);
3979 if (!n)
44d8db9e
LP
3980 return NULL;
3981
3982 p = n;
9e2f7c11 3983 STRV_FOREACH(a, argv) {
44d8db9e
LP
3984
3985 if (!first)
3986 *(p++) = ' ';
3987 else
3988 first = false;
3989
3990 if (strpbrk(*a, WHITESPACE)) {
3991 *(p++) = '\'';
3992 p = stpcpy(p, *a);
3993 *(p++) = '\'';
3994 } else
3995 p = stpcpy(p, *a);
3996
3997 }
3998
9164977d
LP
3999 *p = 0;
4000
44d8db9e
LP
4001 /* FIXME: this doesn't really handle arguments that have
4002 * spaces and ticks in them */
4003
4004 return n;
4005}
4006
4007void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 4008 _cleanup_free_ char *cmd = NULL;
4c940960 4009 const char *prefix2;
44d8db9e
LP
4010
4011 assert(c);
4012 assert(f);
4013
4c940960 4014 prefix = strempty(prefix);
63c372cb 4015 prefix2 = strjoina(prefix, "\t");
44d8db9e 4016
9e2f7c11 4017 cmd = exec_command_line(c->argv);
44d8db9e
LP
4018 fprintf(f,
4019 "%sCommand Line: %s\n",
4020 prefix, cmd ? cmd : strerror(ENOMEM));
4021
9fb86720 4022 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
4023}
4024
4025void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4026 assert(f);
4027
4c940960 4028 prefix = strempty(prefix);
44d8db9e
LP
4029
4030 LIST_FOREACH(command, c, c)
4031 exec_command_dump(c, f, prefix);
4032}
94f04347 4033
a6a80b4f
LP
4034void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4035 ExecCommand *end;
4036
4037 assert(l);
4038 assert(e);
4039
4040 if (*l) {
35b8ca3a 4041 /* It's kind of important, that we keep the order here */
71fda00f
LP
4042 LIST_FIND_TAIL(command, *l, end);
4043 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
4044 } else
4045 *l = e;
4046}
4047
26fd040d
LP
4048int exec_command_set(ExecCommand *c, const char *path, ...) {
4049 va_list ap;
4050 char **l, *p;
4051
4052 assert(c);
4053 assert(path);
4054
4055 va_start(ap, path);
4056 l = strv_new_ap(path, ap);
4057 va_end(ap);
4058
4059 if (!l)
4060 return -ENOMEM;
4061
250a918d
LP
4062 p = strdup(path);
4063 if (!p) {
26fd040d
LP
4064 strv_free(l);
4065 return -ENOMEM;
4066 }
4067
4068 free(c->path);
4069 c->path = p;
4070
4071 strv_free(c->argv);
4072 c->argv = l;
4073
4074 return 0;
4075}
4076
86b23b07 4077int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 4078 _cleanup_strv_free_ char **l = NULL;
86b23b07 4079 va_list ap;
86b23b07
JS
4080 int r;
4081
4082 assert(c);
4083 assert(path);
4084
4085 va_start(ap, path);
4086 l = strv_new_ap(path, ap);
4087 va_end(ap);
4088
4089 if (!l)
4090 return -ENOMEM;
4091
e287086b 4092 r = strv_extend_strv(&c->argv, l, false);
e63ff941 4093 if (r < 0)
86b23b07 4094 return r;
86b23b07
JS
4095
4096 return 0;
4097}
4098
4099
613b411c
LP
4100static int exec_runtime_allocate(ExecRuntime **rt) {
4101
4102 if (*rt)
4103 return 0;
4104
4105 *rt = new0(ExecRuntime, 1);
f146f5e1 4106 if (!*rt)
613b411c
LP
4107 return -ENOMEM;
4108
4109 (*rt)->n_ref = 1;
4110 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
4111
4112 return 0;
4113}
4114
4115int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
4116 int r;
4117
4118 assert(rt);
4119 assert(c);
4120 assert(id);
4121
4122 if (*rt)
4123 return 1;
4124
4125 if (!c->private_network && !c->private_tmp)
4126 return 0;
4127
4128 r = exec_runtime_allocate(rt);
4129 if (r < 0)
4130 return r;
4131
4132 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
33df919d 4133 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
613b411c
LP
4134 return -errno;
4135 }
4136
4137 if (c->private_tmp && !(*rt)->tmp_dir) {
4138 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
4139 if (r < 0)
4140 return r;
4141 }
4142
4143 return 1;
4144}
4145
4146ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
4147 assert(r);
4148 assert(r->n_ref > 0);
4149
4150 r->n_ref++;
4151 return r;
4152}
4153
4154ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
4155
4156 if (!r)
4157 return NULL;
4158
4159 assert(r->n_ref > 0);
4160
4161 r->n_ref--;
f2341e0a
LP
4162 if (r->n_ref > 0)
4163 return NULL;
4164
4165 free(r->tmp_dir);
4166 free(r->var_tmp_dir);
4167 safe_close_pair(r->netns_storage_socket);
6b430fdb 4168 return mfree(r);
613b411c
LP
4169}
4170
f2341e0a 4171int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
613b411c
LP
4172 assert(u);
4173 assert(f);
4174 assert(fds);
4175
4176 if (!rt)
4177 return 0;
4178
4179 if (rt->tmp_dir)
4180 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
4181
4182 if (rt->var_tmp_dir)
4183 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
4184
4185 if (rt->netns_storage_socket[0] >= 0) {
4186 int copy;
4187
4188 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4189 if (copy < 0)
4190 return copy;
4191
4192 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
4193 }
4194
4195 if (rt->netns_storage_socket[1] >= 0) {
4196 int copy;
4197
4198 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4199 if (copy < 0)
4200 return copy;
4201
4202 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
4203 }
4204
4205 return 0;
4206}
4207
f2341e0a 4208int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
613b411c
LP
4209 int r;
4210
4211 assert(rt);
4212 assert(key);
4213 assert(value);
4214
4215 if (streq(key, "tmp-dir")) {
4216 char *copy;
4217
4218 r = exec_runtime_allocate(rt);
4219 if (r < 0)
f2341e0a 4220 return log_oom();
613b411c
LP
4221
4222 copy = strdup(value);
4223 if (!copy)
4224 return log_oom();
4225
4226 free((*rt)->tmp_dir);
4227 (*rt)->tmp_dir = copy;
4228
4229 } else if (streq(key, "var-tmp-dir")) {
4230 char *copy;
4231
4232 r = exec_runtime_allocate(rt);
4233 if (r < 0)
f2341e0a 4234 return log_oom();
613b411c
LP
4235
4236 copy = strdup(value);
4237 if (!copy)
4238 return log_oom();
4239
4240 free((*rt)->var_tmp_dir);
4241 (*rt)->var_tmp_dir = copy;
4242
4243 } else if (streq(key, "netns-socket-0")) {
4244 int fd;
4245
4246 r = exec_runtime_allocate(rt);
4247 if (r < 0)
f2341e0a 4248 return log_oom();
613b411c
LP
4249
4250 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 4251 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 4252 else {
03e334a1 4253 safe_close((*rt)->netns_storage_socket[0]);
613b411c
LP
4254 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
4255 }
4256 } else if (streq(key, "netns-socket-1")) {
4257 int fd;
4258
4259 r = exec_runtime_allocate(rt);
4260 if (r < 0)
f2341e0a 4261 return log_oom();
613b411c
LP
4262
4263 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 4264 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 4265 else {
03e334a1 4266 safe_close((*rt)->netns_storage_socket[1]);
613b411c
LP
4267 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
4268 }
4269 } else
4270 return 0;
4271
4272 return 1;
4273}
4274
4275static void *remove_tmpdir_thread(void *p) {
4276 _cleanup_free_ char *path = p;
4277
c6878637 4278 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
613b411c
LP
4279 return NULL;
4280}
4281
4282void exec_runtime_destroy(ExecRuntime *rt) {
98b47d54
LP
4283 int r;
4284
613b411c
LP
4285 if (!rt)
4286 return;
4287
4288 /* If there are multiple users of this, let's leave the stuff around */
4289 if (rt->n_ref > 1)
4290 return;
4291
4292 if (rt->tmp_dir) {
4293 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
98b47d54
LP
4294
4295 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4296 if (r < 0) {
da927ba9 4297 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
98b47d54
LP
4298 free(rt->tmp_dir);
4299 }
4300
613b411c
LP
4301 rt->tmp_dir = NULL;
4302 }
4303
4304 if (rt->var_tmp_dir) {
4305 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
98b47d54
LP
4306
4307 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4308 if (r < 0) {
da927ba9 4309 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
98b47d54
LP
4310 free(rt->var_tmp_dir);
4311 }
4312
613b411c
LP
4313 rt->var_tmp_dir = NULL;
4314 }
4315
3d94f76c 4316 safe_close_pair(rt->netns_storage_socket);
613b411c
LP
4317}
4318
80876c20
LP
4319static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
4320 [EXEC_INPUT_NULL] = "null",
4321 [EXEC_INPUT_TTY] = "tty",
4322 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 4323 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
4324 [EXEC_INPUT_SOCKET] = "socket",
4325 [EXEC_INPUT_NAMED_FD] = "fd",
80876c20
LP
4326};
4327
8a0867d6
LP
4328DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4329
94f04347 4330static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 4331 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 4332 [EXEC_OUTPUT_NULL] = "null",
80876c20 4333 [EXEC_OUTPUT_TTY] = "tty",
94f04347 4334 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 4335 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 4336 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 4337 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
4338 [EXEC_OUTPUT_JOURNAL] = "journal",
4339 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
4340 [EXEC_OUTPUT_SOCKET] = "socket",
4341 [EXEC_OUTPUT_NAMED_FD] = "fd",
94f04347
LP
4342};
4343
4344DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
4345
4346static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4347 [EXEC_UTMP_INIT] = "init",
4348 [EXEC_UTMP_LOGIN] = "login",
4349 [EXEC_UTMP_USER] = "user",
4350};
4351
4352DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
4353
4354static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
4355 [EXEC_PRESERVE_NO] = "no",
4356 [EXEC_PRESERVE_YES] = "yes",
4357 [EXEC_PRESERVE_RESTART] = "restart",
4358};
4359
4360DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e
YW
4361
4362static const char* const exec_directory_type_table[_EXEC_DIRECTORY_MAX] = {
4363 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
4364 [EXEC_DIRECTORY_STATE] = "StateDirectory",
4365 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
4366 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
4367 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
4368};
4369
4370DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);